From 0b936c71d8a1d5effa6c29d5ee9c227fff18a070 Mon Sep 17 00:00:00 2001
From: Christoph Helma
Date: Mon, 23 May 2011 14:03:02 +0000
Subject: owl-dl fixed for model and prediction datasets
---
lib/dataset.rb | 7 ++-
lib/model.rb | 136 ++++++++++++++++++++++++++++++------------------------
lib/serializer.rb | 27 +++++++----
3 files changed, 95 insertions(+), 75 deletions(-)
(limited to 'lib')
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 4005c1c..4dc4296 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -179,7 +179,6 @@ module OpenTox
end
end
-=begin
# Detect feature type(s) in the dataset
# @return [String] `classification", "regression", "mixed" or unknown`
def feature_type(subjectid=nil)
@@ -193,6 +192,7 @@ module OpenTox
"unknown"
end
end
+=begin
=end
# Get Spreadsheet representation
@@ -369,12 +369,11 @@ module OpenTox
end
def value(compound)
- @data_entries[compound.uri].collect{|f,v| v.first if f.match(/prediction/)}.compact.first
+ @data_entries[compound.uri].collect{|f,v| v.first if f.match(/value/)}.compact.first
end
def confidence(compound)
- feature_uri = @data_entries[compound.uri].collect{|f,v| f if f.match(/prediction/)}.compact.first
- @features[feature_uri][OT.confidence]
+ @data_entries[compound.uri].collect{|f,v| v.first if f.match(/confidence/)}.compact.first
end
def descriptors(compound)
diff --git a/lib/model.rb b/lib/model.rb
index 998d2dc..d46152d 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -164,8 +164,6 @@ module OpenTox
features = {}
unless @prediction_dataset
- #@prediction_dataset = cached_prediction
- #return @prediction_dataset if cached_prediction
@prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
@prediction_dataset.add_metadata( {
OT.hasSource => @uri,
@@ -237,38 +235,90 @@ module OpenTox
prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
end
- prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s)
- # TODO: fix dependentVariable
- @prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri
+ # TODO: reasonable feature name
+ #prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s)
+ value_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),"value")
+ confidence_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),"confidence")
+ prediction_feature_uris = {value_feature_uri => prediction[:prediction], confidence_feature_uri => prediction[:confidence]}
+ prediction_feature_uris[value_feature_uri] = "No similar compounds in training dataset." if @neighbors.size == 0 or prediction[:prediction].nil?
+
+
+ #@prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri
+ @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables]
+
+=begin
if @neighbors.size == 0
- @prediction_dataset.add_feature(prediction_feature_uri, {
- RDF.type => [OT.MeasuredFeature],
- OT.hasSource => @uri,
- DC.creator => @uri,
- DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
- OT.error => "No similar compounds in training dataset.",
- OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
- })
- @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction]
+ prediction_feature_uris.each do |prediction_feature_uri,value|
+ @prediction_dataset.add_feature(prediction_feature_uri, {
+ RDF.type => [OT.MeasuredFeature],
+ OT.hasSource => @uri,
+ DC.creator => @uri,
+ DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
+ OT.error => "No similar compounds in training dataset.",
+ #OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
+ })
+ @prediction_dataset.add @compound.uri, prediction_feature_uri, value
+ end
else
+=end
+ prediction_feature_uris.each do |prediction_feature_uri,value|
+ @prediction_dataset.metadata[OT.predictedVariables] = [] unless @prediction_dataset.metadata[OT.predictedVariables]
+ @prediction_dataset.metadata[OT.predictedVariables] << prediction_feature_uri
@prediction_dataset.add_feature(prediction_feature_uri, {
RDF.type => [OT.ModelPrediction],
OT.hasSource => @uri,
DC.creator => @uri,
DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
- OT.prediction => prediction[:prediction],
- OT.confidence => prediction[:confidence],
- OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
+ # TODO: factor information to value
})
- @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction]
+ #OT.prediction => prediction[:prediction],
+ #OT.confidence => prediction[:confidence],
+ #OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
+ @prediction_dataset.add @compound.uri, prediction_feature_uri, value
+ end
- if verbose
- if @feature_calculation_algorithm == "Substructure.match"
- f = 0
- @compound_features.each do |feature|
- feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s)
+ if verbose
+ if @feature_calculation_algorithm == "Substructure.match"
+ f = 0
+ @compound_features.each do |feature|
+ feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s)
+ features[feature] = feature_uri
+ @prediction_dataset.add_feature(feature_uri, {
+ RDF.type => [OT.Substructure],
+ OT.smarts => feature,
+ OT.pValue => @p_values[feature],
+ OT.effect => @effects[feature]
+ })
+ @prediction_dataset.add @compound.uri, feature_uri, true
+ f+=1
+ end
+ else
+ @compound_features.each do |feature|
+ features[feature] = feature
+ @prediction_dataset.add @compound.uri, feature, true
+ end
+ end
+ n = 0
+ @neighbors.each do |neighbor|
+ neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s )
+ @prediction_dataset.add_feature(neighbor_uri, {
+ OT.compound => neighbor[:compound],
+ OT.similarity => neighbor[:similarity],
+ OT.measuredActivity => neighbor[:activity],
+ RDF.type => [OT.Neighbor]
+ })
+ @prediction_dataset.add @compound.uri, neighbor_uri, true
+ f = 0 unless f
+ neighbor[:features].each do |feature|
+ if @feature_calculation_algorithm == "Substructure.match"
+ feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature]
+ else
+ feature_uri = feature
+ end
+ @prediction_dataset.add neighbor[:compound], feature_uri, true
+ unless features.has_key? feature
features[feature] = feature_uri
@prediction_dataset.add_feature(feature_uri, {
RDF.type => [OT.Substructure],
@@ -276,49 +326,13 @@ module OpenTox
OT.pValue => @p_values[feature],
OT.effect => @effects[feature]
})
- @prediction_dataset.add @compound.uri, feature_uri, true
f+=1
end
- else
- @compound_features.each do |feature|
- features[feature] = feature
- @prediction_dataset.add @compound.uri, feature, true
- end
- end
- n = 0
- @neighbors.each do |neighbor|
- neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s )
- @prediction_dataset.add_feature(neighbor_uri, {
- OT.compound => neighbor[:compound],
- OT.similarity => neighbor[:similarity],
- OT.measuredActivity => neighbor[:activity],
- RDF.type => [OT.Neighbor]
- })
- @prediction_dataset.add @compound.uri, neighbor_uri, true
- f = 0 unless f
- neighbor[:features].each do |feature|
- if @feature_calculation_algorithm == "Substructure.match"
- feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature]
- else
- feature_uri = feature
- end
- @prediction_dataset.add neighbor[:compound], feature_uri, true
- unless features.has_key? feature
- features[feature] = feature_uri
- @prediction_dataset.add_feature(feature_uri, {
- RDF.type => [OT.Substructure],
- OT.smarts => feature,
- OT.pValue => @p_values[feature],
- OT.effect => @effects[feature]
- })
- f+=1
- end
- end
- n+=1
end
- # what happens with dataset predictions?
+ n+=1
end
end
+ #end
@prediction_dataset.save(subjectid)
@prediction_dataset
diff --git a/lib/serializer.rb b/lib/serializer.rb
index e4cb541..78e7709 100644
--- a/lib/serializer.rb
+++ b/lib/serializer.rb
@@ -17,6 +17,7 @@ module OpenTox
# this should come from opentox.owl
OT.Compound => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
OT.Feature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
+ OT.Model => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
OT.NominalFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
OT.NumericFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
OT.StringFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
@@ -27,6 +28,8 @@ module OpenTox
OT.Parameter => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
OT.Task => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
OTA.PatternMiningSupervised => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
+ OTA.ClassificationLazySingleTarget => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
+ OTA.RegressionLazySingleTarget => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
#classes for validation
OT.Validation => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
@@ -45,6 +48,9 @@ module OpenTox
OT.values => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
OT.algorithm => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
OT.parameters => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
+ OT.featureDataset => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
+ OT.dependentVariables => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
+ OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
#object props for validation#
OT.model => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
@@ -126,7 +132,7 @@ module OpenTox
OT.hasSource => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
OT.value => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
OT.paramScope => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
- OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
+ #OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
}
@data_entries = {}
@@ -157,23 +163,16 @@ module OpenTox
# Add a dataset
# @param [String] uri Dataset URI
def add_dataset(dataset)
-
@dataset = dataset.uri
-
@object[dataset.uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] }
-
add_metadata dataset.uri, dataset.metadata
-
dataset.compounds.each { |compound| add_compound compound }
-
dataset.features.each { |feature,metadata| add_feature feature,metadata }
-
dataset.data_entries.each do |compound,entry|
entry.each do |feature,values|
values.each { |value| add_data_entry compound,feature,value }
end
end
-
end
# Add a algorithm
@@ -188,6 +187,13 @@ module OpenTox
def add_model(uri,metadata)
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Model }] }
add_metadata uri, metadata
+ @object[metadata[OT.featureDataset]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] }
+ @object[metadata[OT.trainingDataset]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] }
+ @object[metadata[OT.dependentVariables]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }] }
+ # TODO: add algorithms from parameters
+ @object["http://ot-dev.in-silico.ch/algorithm/fminer/bbrc"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] }
+ @object["http://ot-dev.in-silico.ch/algorithm/fminer/last"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] }
+ @object["http://ot-dev.in-silico.ch/algorithm/lazar"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] }
end
# Add a task
@@ -272,7 +278,7 @@ module OpenTox
@object[genid][name] = [{"type" => type(entry), "value" => entry }]
end
end
- elsif v.is_a? Array and u == RDF.type
+ elsif v.is_a? Array #and u == RDF.type
@object[uri] = {} unless @object[uri]
v.each do |value|
@object[uri][u] = [] unless @object[uri][u]
@@ -354,7 +360,8 @@ module OpenTox
# @return [text/plain] Object OWL-DL in RDF/XML format
def to_rdfxml
Tempfile.open("owl-serializer"){|f| f.write(self.to_ntriples); @path = f.path}
- `rapper -i ntriples -f 'xmlns:ot="#{OT.uri}"' -f 'xmlns:dc="#{DC.uri}"' -f 'xmlns:rdf="#{RDF.uri}"' -f 'xmlns:owl="#{OWL.uri}"' -o rdfxml #{@path} 2>/dev/null`
+ # TODO: add base uri for ist services
+ `rapper -i ntriples -f 'xmlns:ot="#{OT.uri}"' -f 'xmlns:ota="#{OTA.uri}"' -f 'xmlns:dc="#{DC.uri}"' -f 'xmlns:rdf="#{RDF.uri}"' -f 'xmlns:owl="#{OWL.uri}"' -o rdfxml #{@path} 2>/dev/null`
end
# Convert to JSON as specified in http://n2.talis.com/wiki/RDF_JSON_Specification
--
cgit v1.2.3
From 87eb7cc1e079821c2f7c5e101e7e392e9bd10f00 Mon Sep 17 00:00:00 2001
From: davor
Date: Tue, 24 May 2011 09:35:11 +0200
Subject: Fixing regression detection
---
lib/parser.rb | 66 +++++++++++++++++++++++++++++++++++++++++++++--------------
1 file changed, 51 insertions(+), 15 deletions(-)
(limited to 'lib')
diff --git a/lib/parser.rb b/lib/parser.rb
index 7bdee95..8deaa91 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -277,7 +277,23 @@ module OpenTox
def load_spreadsheet(book)
book.default_sheet = 0
add_features book.row(1)
- 2.upto(book.last_row) { |i| add_values book.row(i) }
+
+ # AM: fix mixed read in
+ regression_features=false
+ 2.upto(book.last_row) { |i|
+ row = book.row(i)
+ smiles = row.shift
+ row.each_index do |i|
+ value = row[i]
+ type = feature_type(value)
+ if type == OT.NumericFeature
+ regression_features=true
+ break
+ end
+ end
+ }
+
+ 2.upto(book.last_row) { |i| add_values book.row(i),regression_features }
warnings
@dataset
end
@@ -289,7 +305,23 @@ module OpenTox
row = 0
input = csv.split("\n")
add_features split_row(input.shift)
- input.each { |row| add_values split_row(row) }
+
+
+ # AM: fix mixed read in
+ regression_features=false
+ input.each { |row|
+ row = split_row(row)
+ smiles = row.shift
+ row.each_index do |i|
+ value = row[i]
+ type = feature_type(value)
+ if type == OT.NumericFeature
+ regression_features=true
+ break
+ end
+ end
+ }
+ input.each { |row| add_values split_row(row),regression_features }
warnings
@dataset
end
@@ -335,7 +367,7 @@ module OpenTox
end
end
- def add_values(row)
+ def add_values(row, regression_features=false)
smiles = row.shift
compound = Compound.from_smiles(smiles)
@@ -353,19 +385,23 @@ module OpenTox
@feature_types[feature] << type
- case type
- when OT.NominalFeature
- case value.to_s
- when TRUE_REGEXP
- val = true
- when FALSE_REGEXP
- val = false
- end
- when OT.NumericFeature
+ if (regression_features)
val = value.to_f
- when OT.StringFeature
- val = value.to_s
- @activity_errors << smiles+", "+row.join(", ")
+ else
+ case type
+ when OT.NominalFeature
+ case value.to_s
+ when TRUE_REGEXP
+ val = true
+ when FALSE_REGEXP
+ val = false
+ end
+ when OT.NumericFeature
+ val = value.to_f
+ when OT.StringFeature
+ val = value.to_s
+ @activity_errors << smiles+", "+row.join(", ")
+ end
end
if val!=nil
@dataset.add(compound.uri, feature, val)
--
cgit v1.2.3
From 4a7ba2adb0743cd225ad5c2cf9f71c896d87b157 Mon Sep 17 00:00:00 2001
From: davor
Date: Tue, 24 May 2011 10:45:53 +0200
Subject: Created dedicated function for value sweeping
---
lib/parser.rb | 35 +++++++++++++++++------------------
1 file changed, 17 insertions(+), 18 deletions(-)
(limited to 'lib')
diff --git a/lib/parser.rb b/lib/parser.rb
index 8deaa91..4984292 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -282,15 +282,8 @@ module OpenTox
regression_features=false
2.upto(book.last_row) { |i|
row = book.row(i)
- smiles = row.shift
- row.each_index do |i|
- value = row[i]
- type = feature_type(value)
- if type == OT.NumericFeature
- regression_features=true
- break
- end
- end
+ regression_features = detect_regression_features row
+ break if regression_features=true
}
2.upto(book.last_row) { |i| add_values book.row(i),regression_features }
@@ -311,21 +304,15 @@ module OpenTox
regression_features=false
input.each { |row|
row = split_row(row)
- smiles = row.shift
- row.each_index do |i|
- value = row[i]
- type = feature_type(value)
- if type == OT.NumericFeature
- regression_features=true
- break
- end
- end
+ regression_features = detect_regression_features row
+ break if regression_features=true
}
input.each { |row| add_values split_row(row),regression_features }
warnings
@dataset
end
+
private
def warnings
@@ -367,6 +354,18 @@ module OpenTox
end
end
+ def detect_regression_features row
+ regression_features=false
+ row.each_index do |i|
+ value = row[i]
+ type = feature_type(value)
+ if type == OT.NumericFeature
+ regression_features=true
+ end
+ end
+ regression_features
+ end
+
def add_values(row, regression_features=false)
smiles = row.shift
--
cgit v1.2.3
From 8a20cf940c346fd04649d3c3c8f7ad4c1fcb20cb Mon Sep 17 00:00:00 2001
From: Andreas Maunz
Date: Tue, 24 May 2011 14:00:16 +0200
Subject: Fix: break was too early
---
lib/parser.rb | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
(limited to 'lib')
diff --git a/lib/parser.rb b/lib/parser.rb
index 4984292..5f847c3 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -283,7 +283,7 @@ module OpenTox
2.upto(book.last_row) { |i|
row = book.row(i)
regression_features = detect_regression_features row
- break if regression_features=true
+ break if regression_features==true
}
2.upto(book.last_row) { |i| add_values book.row(i),regression_features }
@@ -305,7 +305,7 @@ module OpenTox
input.each { |row|
row = split_row(row)
regression_features = detect_regression_features row
- break if regression_features=true
+ break if regression_features==true
}
input.each { |row| add_values split_row(row),regression_features }
warnings
@@ -355,6 +355,7 @@ module OpenTox
end
def detect_regression_features row
+ row.shift
regression_features=false
row.each_index do |i|
value = row[i]
--
cgit v1.2.3
From 0d87789eec37f7ae09d01937dbfc72af1ef17252 Mon Sep 17 00:00:00 2001
From: mguetlein
Date: Tue, 24 May 2011 16:06:05 +0200
Subject: fix small errors in to-html method
---
lib/to-html.rb | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
(limited to 'lib')
diff --git a/lib/to-html.rb b/lib/to-html.rb
index 66a3e74..51602d7 100644
--- a/lib/to-html.rb
+++ b/lib/to-html.rb
@@ -6,7 +6,7 @@ class String
# encloses URI in text with with link tag
# @return [String] new text with marked links
def link_urls
- self.gsub(/(?i)http(s?):\/\/[^\r\n\s']*/, '\0')
+ self.gsub(/(?i)http(s?):\/\/[^\r\n\s']*/, '\0')
end
end
@@ -30,7 +30,7 @@ module OpenTox
title = nil #$sinatra.url_for($sinatra.request.env['PATH_INFO'], :full) if $sinatra
html = ""
html += ""+title+"" if title
- html += ""
+ html += "<\/img>"
if AA_SERVER
user = OpenTox::Authorization.get_user(subjectid) if subjectid
@@ -63,7 +63,7 @@ module OpenTox
html += "Content
" if description || related_links
html += ""
html += text.link_urls
- html += "
"
+ html += "
"
html
end
@@ -78,7 +78,7 @@ module OpenTox
"password: | |
"+
#""+
" |
"
- html += ""
+ html += "