diff options
author | mr <mr@mrautenberg.de> | 2011-05-25 13:37:06 +0200 |
---|---|---|
committer | mr <mr@mrautenberg.de> | 2011-05-25 13:37:06 +0200 |
commit | d5bf9fd2b7f4401c36d00ab6afb1a003fdb07d2f (patch) | |
tree | db88d9b1a7f01f7a24a597a05b293d9307dc1d3f /lib | |
parent | 796ef9b86fcdcf5f9520b958c09cb37ae1374bf4 (diff) | |
parent | 52e73a3da8e99da9a0a973b6ef9934297bc6511e (diff) |
Merge branch 'development' of github.com:opentox/opentox-ruby into development
Diffstat (limited to 'lib')
-rw-r--r-- | lib/algorithm.rb | 2 | ||||
-rw-r--r-- | lib/dataset.rb | 7 | ||||
-rw-r--r-- | lib/model.rb | 136 | ||||
-rw-r--r-- | lib/parser.rb | 8 | ||||
-rw-r--r-- | lib/rest_client_wrapper.rb | 2 | ||||
-rw-r--r-- | lib/serializer.rb | 28 | ||||
-rw-r--r-- | lib/task.rb | 5 | ||||
-rw-r--r-- | lib/to-html.rb | 8 |
8 files changed, 112 insertions, 84 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 5b41cbf..96b9df1 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -202,7 +202,7 @@ module OpenTox acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin - prediction = local_svm (neighbors, acts_f, sims, "C-bsvc", params) + prediction = local_svm(neighbors, acts_f, sims, "C-bsvc", params) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" diff --git a/lib/dataset.rb b/lib/dataset.rb index 4005c1c..4dc4296 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -179,7 +179,6 @@ module OpenTox end end -=begin # Detect feature type(s) in the dataset # @return [String] `classification", "regression", "mixed" or unknown` def feature_type(subjectid=nil) @@ -193,6 +192,7 @@ module OpenTox "unknown" end end +=begin =end # Get Spreadsheet representation @@ -369,12 +369,11 @@ module OpenTox end def value(compound) - @data_entries[compound.uri].collect{|f,v| v.first if f.match(/prediction/)}.compact.first + @data_entries[compound.uri].collect{|f,v| v.first if f.match(/value/)}.compact.first end def confidence(compound) - feature_uri = @data_entries[compound.uri].collect{|f,v| f if f.match(/prediction/)}.compact.first - @features[feature_uri][OT.confidence] + @data_entries[compound.uri].collect{|f,v| v.first if f.match(/confidence/)}.compact.first end def descriptors(compound) diff --git a/lib/model.rb b/lib/model.rb index 998d2dc..139aed8 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -164,8 +164,6 @@ module OpenTox features = {} unless @prediction_dataset - #@prediction_dataset = cached_prediction - #return @prediction_dataset if cached_prediction @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid) @prediction_dataset.add_metadata( { OT.hasSource => @uri, @@ -237,38 +235,90 @@ module OpenTox prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") end - prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s) - # TODO: fix dependentVariable - @prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri + # TODO: reasonable feature name + #prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s) + value_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),"value") + confidence_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),"confidence") + + prediction_feature_uris = {value_feature_uri => prediction[:prediction], confidence_feature_uri => prediction[:confidence]} + #prediction_feature_uris[value_feature_uri] = "No similar compounds in training dataset." if @neighbors.size == 0 or prediction[:prediction].nil? + prediction_feature_uris[value_feature_uri] = nil if @neighbors.size == 0 or prediction[:prediction].nil? + + #@prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri + @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] +=begin if @neighbors.size == 0 - @prediction_dataset.add_feature(prediction_feature_uri, { - RDF.type => [OT.MeasuredFeature], - OT.hasSource => @uri, - DC.creator => @uri, - DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), - OT.error => "No similar compounds in training dataset.", - OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] - }) - @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction] + prediction_feature_uris.each do |prediction_feature_uri,value| + @prediction_dataset.add_feature(prediction_feature_uri, { + RDF.type => [OT.MeasuredFeature], + OT.hasSource => @uri, + DC.creator => @uri, + DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), + OT.error => "No similar compounds in training dataset.", + #OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] + }) + @prediction_dataset.add @compound.uri, prediction_feature_uri, value + end else +=end + prediction_feature_uris.each do |prediction_feature_uri,value| + @prediction_dataset.metadata[OT.predictedVariables] = [] unless @prediction_dataset.metadata[OT.predictedVariables] + @prediction_dataset.metadata[OT.predictedVariables] << prediction_feature_uri @prediction_dataset.add_feature(prediction_feature_uri, { RDF.type => [OT.ModelPrediction], OT.hasSource => @uri, DC.creator => @uri, DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), - OT.prediction => prediction[:prediction], - OT.confidence => prediction[:confidence], - OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] + # TODO: factor information to value }) - @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction] + #OT.prediction => prediction[:prediction], + #OT.confidence => prediction[:confidence], + #OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] + @prediction_dataset.add @compound.uri, prediction_feature_uri, value + end - if verbose - if @feature_calculation_algorithm == "Substructure.match" - f = 0 - @compound_features.each do |feature| - feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) + if verbose + if @feature_calculation_algorithm == "Substructure.match" + f = 0 + @compound_features.each do |feature| + feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) + features[feature] = feature_uri + @prediction_dataset.add_feature(feature_uri, { + RDF.type => [OT.Substructure], + OT.smarts => feature, + OT.pValue => @p_values[feature], + OT.effect => @effects[feature] + }) + @prediction_dataset.add @compound.uri, feature_uri, true + f+=1 + end + else + @compound_features.each do |feature| + features[feature] = feature + @prediction_dataset.add @compound.uri, feature, true + end + end + n = 0 + @neighbors.each do |neighbor| + neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s ) + @prediction_dataset.add_feature(neighbor_uri, { + OT.compound => neighbor[:compound], + OT.similarity => neighbor[:similarity], + OT.measuredActivity => neighbor[:activity], + RDF.type => [OT.Neighbor] + }) + @prediction_dataset.add @compound.uri, neighbor_uri, true + f = 0 unless f + neighbor[:features].each do |feature| + if @feature_calculation_algorithm == "Substructure.match" + feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature] + else + feature_uri = feature + end + @prediction_dataset.add neighbor[:compound], feature_uri, true + unless features.has_key? feature features[feature] = feature_uri @prediction_dataset.add_feature(feature_uri, { RDF.type => [OT.Substructure], @@ -276,49 +326,13 @@ module OpenTox OT.pValue => @p_values[feature], OT.effect => @effects[feature] }) - @prediction_dataset.add @compound.uri, feature_uri, true f+=1 end - else - @compound_features.each do |feature| - features[feature] = feature - @prediction_dataset.add @compound.uri, feature, true - end - end - n = 0 - @neighbors.each do |neighbor| - neighbor_uri = File.join( @prediction_dataset.uri, "feature", "neighbor", n.to_s ) - @prediction_dataset.add_feature(neighbor_uri, { - OT.compound => neighbor[:compound], - OT.similarity => neighbor[:similarity], - OT.measuredActivity => neighbor[:activity], - RDF.type => [OT.Neighbor] - }) - @prediction_dataset.add @compound.uri, neighbor_uri, true - f = 0 unless f - neighbor[:features].each do |feature| - if @feature_calculation_algorithm == "Substructure.match" - feature_uri = File.join( @prediction_dataset.uri, "feature", "descriptor", f.to_s) unless feature_uri = features[feature] - else - feature_uri = feature - end - @prediction_dataset.add neighbor[:compound], feature_uri, true - unless features.has_key? feature - features[feature] = feature_uri - @prediction_dataset.add_feature(feature_uri, { - RDF.type => [OT.Substructure], - OT.smarts => feature, - OT.pValue => @p_values[feature], - OT.effect => @effects[feature] - }) - f+=1 - end - end - n+=1 end - # what happens with dataset predictions? + n+=1 end end + #end @prediction_dataset.save(subjectid) @prediction_dataset diff --git a/lib/parser.rb b/lib/parser.rb index 5f847c3..a6878a2 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -56,7 +56,7 @@ module OpenTox `rapper -i rdfxml -o ntriples #{file.path} 2>/dev/null`.each_line do |line| triple = line.to_triple if triple[0] == @uri - if triple[1] == RDF.type # allow multiple types + if triple[1] == RDF.type || triple[1]==OT.predictedVariables # allow multiple types @metadata[triple[1]] = [] unless @metadata[triple[1]] @metadata[triple[1]] << triple[2].split('^^').first else @@ -228,7 +228,11 @@ module OpenTox file = Tempfile.new("ot-rdfxml") # do not concat /features to uri string, this would not work for dataset/R401577?max=3 uri = URI::parse(@uri) - uri.path = File.join(uri.path,"features") + # PENDING + # ambit models return http://host/dataset/id?feature_uris[]=sth but + # amibt dataset services does not support http://host/dataset/id/features?feature_uris[]=sth + # -> load features from complete dataset + uri.path = File.join(uri.path,"features") unless @uri=~/\?feature_uris\[\]/ uri = uri.to_s file.puts OpenTox::RestClientWrapper.get uri,{:subjectid => subjectid,:accept => "application/rdf+xml"},nil,false file.close diff --git a/lib/rest_client_wrapper.rb b/lib/rest_client_wrapper.rb index 747a353..53887a2 100644 --- a/lib/rest_client_wrapper.rb +++ b/lib/rest_client_wrapper.rb @@ -131,7 +131,7 @@ module OpenTox raise "unknown content-type for task : '"+res.content_type.to_s+"'"+" base-uri: "+base_uri.to_s+" content: "+res[0..200].to_s end - LOGGER.debug "result is a task '"+task.uri.to_s+"', wait for completion" + #LOGGER.debug "result is a task '"+task.uri.to_s+"', wait for completion" task.wait_for_completion waiting_task unless task.completed? # maybe task was cancelled / error if task.errorReport diff --git a/lib/serializer.rb b/lib/serializer.rb index e4cb541..62c1159 100644 --- a/lib/serializer.rb +++ b/lib/serializer.rb @@ -17,6 +17,7 @@ module OpenTox # this should come from opentox.owl OT.Compound => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , OT.Feature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.Model => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , OT.NominalFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , OT.NumericFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , OT.StringFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , @@ -27,6 +28,8 @@ module OpenTox OT.Parameter => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , OT.Task => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , OTA.PatternMiningSupervised => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OTA.ClassificationLazySingleTarget => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OTA.RegressionLazySingleTarget => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , #classes for validation OT.Validation => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , @@ -45,6 +48,9 @@ module OpenTox OT.values => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , OT.algorithm => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , OT.parameters => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.featureDataset => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.dependentVariables => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , #object props for validation# OT.model => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , @@ -103,6 +109,7 @@ module OpenTox OT.precision => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.areaUnderRoc => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.weightedAreaUnderRoc => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , + OT.weightedAccuracy => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.fMeasure => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.percentIncorrect => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.validationType => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , @@ -126,7 +133,7 @@ module OpenTox OT.hasSource => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , OT.value => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , OT.paramScope => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , - OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , + #OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , } @data_entries = {} @@ -157,23 +164,16 @@ module OpenTox # Add a dataset # @param [String] uri Dataset URI def add_dataset(dataset) - @dataset = dataset.uri - @object[dataset.uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] } - add_metadata dataset.uri, dataset.metadata - dataset.compounds.each { |compound| add_compound compound } - dataset.features.each { |feature,metadata| add_feature feature,metadata } - dataset.data_entries.each do |compound,entry| entry.each do |feature,values| values.each { |value| add_data_entry compound,feature,value } end end - end # Add a algorithm @@ -188,6 +188,13 @@ module OpenTox def add_model(uri,metadata) @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Model }] } add_metadata uri, metadata + @object[metadata[OT.featureDataset]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] } + @object[metadata[OT.trainingDataset]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] } + @object[metadata[OT.dependentVariables]] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }] } + # TODO: add algorithms from parameters + @object["http://ot-dev.in-silico.ch/algorithm/fminer/bbrc"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] } + @object["http://ot-dev.in-silico.ch/algorithm/fminer/last"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] } + @object["http://ot-dev.in-silico.ch/algorithm/lazar"] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] } end # Add a task @@ -272,7 +279,7 @@ module OpenTox @object[genid][name] = [{"type" => type(entry), "value" => entry }] end end - elsif v.is_a? Array and u == RDF.type + elsif v.is_a? Array #and u == RDF.type @object[uri] = {} unless @object[uri] v.each do |value| @object[uri][u] = [] unless @object[uri][u] @@ -354,7 +361,8 @@ module OpenTox # @return [text/plain] Object OWL-DL in RDF/XML format def to_rdfxml Tempfile.open("owl-serializer"){|f| f.write(self.to_ntriples); @path = f.path} - `rapper -i ntriples -f 'xmlns:ot="#{OT.uri}"' -f 'xmlns:dc="#{DC.uri}"' -f 'xmlns:rdf="#{RDF.uri}"' -f 'xmlns:owl="#{OWL.uri}"' -o rdfxml #{@path} 2>/dev/null` + # TODO: add base uri for ist services + `rapper -i ntriples -f 'xmlns:ot="#{OT.uri}"' -f 'xmlns:ota="#{OTA.uri}"' -f 'xmlns:dc="#{DC.uri}"' -f 'xmlns:rdf="#{RDF.uri}"' -f 'xmlns:owl="#{OWL.uri}"' -o rdfxml #{@path} 2>/dev/null` end # Convert to JSON as specified in http://n2.talis.com/wiki/RDF_JSON_Specification diff --git a/lib/task.rb b/lib/task.rb index 19f42d6..146a756 100644 --- a/lib/task.rb +++ b/lib/task.rb @@ -38,6 +38,7 @@ module OpenTox task = Task.new(task_uri.chomp) # measure current memory consumption +=begin memory = `free -m|sed -n '2p'`.split free_memory = memory[3].to_i + memory[6].to_i # include cache if free_memory < 20 # require at least 200 M free memory @@ -56,6 +57,7 @@ module OpenTox # return task # #raise "Server too busy to start a new task" #end +=end task_pid = Spork.spork(:logger => LOGGER) do LOGGER.debug "Task #{task.uri} started #{Time.now}" @@ -286,7 +288,8 @@ module OpenTox if @http_code == 202 raise "#{@uri}: illegal task state, code is 202, but hasStatus is not Running: '"+@metadata[OT.hasStatus]+"'" unless running? elsif @http_code == 201 - raise "#{@uri}: illegal task state, code is 201, but hasStatus is not Completed: '"+@metadata[OT.hasStatus]+"'" unless completed? + # ignore hasStatus + # raise "#{@uri}: illegal task state, code is 201, but hasStatus is not Completed: '"+@metadata[OT.hasStatus]+"'" unless completed? raise "#{@uri}: illegal task state, code is 201, resultURI is no task-URI: '"+@metadata[OT.resultURI].to_s+ "'" unless @metadata[OT.resultURI] and @metadata[OT.resultURI].to_s.uri? end diff --git a/lib/to-html.rb b/lib/to-html.rb index 66a3e74..51602d7 100644 --- a/lib/to-html.rb +++ b/lib/to-html.rb @@ -6,7 +6,7 @@ class String # encloses URI in text with with link tag # @return [String] new text with marked links def link_urls - self.gsub(/(?i)http(s?):\/\/[^\r\n\s']*/, '<a href=\0>\0</a>') + self.gsub(/(?i)http(s?):\/\/[^\r\n\s']*/, '<a href="\0">\0</a>') end end @@ -30,7 +30,7 @@ module OpenTox title = nil #$sinatra.url_for($sinatra.request.env['PATH_INFO'], :full) if $sinatra html = "<html>" html += "<title>"+title+"</title>" if title - html += "<img src="+OT_LOGO+"><body>" + html += "<img src=\""+OT_LOGO+"\"><\/img><body>" if AA_SERVER user = OpenTox::Authorization.get_user(subjectid) if subjectid @@ -63,7 +63,7 @@ module OpenTox html += "<h3>Content</h3>" if description || related_links html += "<pre><p style=\"padding:15px; border:10px solid \#5D308A\">" html += text.link_urls - html += "</p></pre></body><html>" + html += "</p></pre></body></html>" html end @@ -78,7 +78,7 @@ module OpenTox "<tr><td>password:</td><td><input type='password' name='password' size='15' /></td></tr>"+ #"<input type=hidden name=back_to value="+back_to.to_s+">"+ "<tr><td><input type='submit' value='Login' /></td></tr>" - html += "</table></p></pre></form></body><html>" + html += "</table></p></pre></form></body></html>" html end end |