diff options
author | Christoph Helma <helma@in-silico.ch> | 2013-03-26 10:43:43 +0100 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2013-03-26 10:43:43 +0100 |
commit | 84d44cc32d31b6cf1030c2ef271a902ca9800129 (patch) | |
tree | e7ca04ec9a90cff87661a7da0f7fc04ab738aab8 | |
parent | b586920b9baf38d6d178f345cebac93771bcf04f (diff) |
descriptor calculation moved to algorithm service. code cleanup and refactoring.
-rw-r--r-- | Gemfile | 8 | ||||
-rw-r--r-- | algorithm.gemspec | 3 | ||||
-rw-r--r-- | application.rb | 18 | ||||
-rw-r--r-- | descriptor.rb | 285 | ||||
-rw-r--r-- | lib/algorithm.rb | 3 | ||||
-rw-r--r-- | lib/algorithm/feature_values.rb | 9 | ||||
-rw-r--r-- | lib/algorithm/fminer.rb | 25 | ||||
-rw-r--r-- | lib/algorithm/similarity.rb | 2 | ||||
-rw-r--r-- | lib/algorithm/transform.rb | 6 | ||||
-rw-r--r-- | lib/dataset.rb | 6 | ||||
-rw-r--r-- | lib/lazar.rb | 222 | ||||
-rw-r--r-- | lib/model.rb | 151 | ||||
-rw-r--r-- | webapp/fminer.rb | 241 | ||||
-rw-r--r-- | webapp/fs.rb | 14 | ||||
-rw-r--r-- | webapp/lazar.rb | 233 | ||||
-rw-r--r-- | webapp/sinatra.rb | 13 | ||||
-rw-r--r-- | webapp/test.rb | 2 |
17 files changed, 709 insertions, 532 deletions
@@ -1,10 +1,4 @@ source :gemcutter - +gemspec gem 'opentox-server', :path => "../opentox-server" gem "opentox-client", :path => "../opentox-client" -gem "openbabel", "~>2.3.1.5" -gem "rinruby", "~>2.0.2" -gem "nokogiri", "~>1.4.4" -gem "emk-sinatra-url-for", "~>0.2.1" -gem "statsample", "~>1.1" -gem "gsl", "~>1.14" diff --git a/algorithm.gemspec b/algorithm.gemspec index 925c397..dc334b9 100644 --- a/algorithm.gemspec +++ b/algorithm.gemspec @@ -23,8 +23,7 @@ Gem::Specification.new do |s| s.add_runtime_dependency 'nokogiri', "~>1.4.4" s.add_runtime_dependency 'statsample', "~>1.1" s.add_runtime_dependency 'gsl', "~>1.14" - s.add_runtime_dependency 'emk-sinatra-url-for', "~>0.2.1" -# s.add_runtime_dependency '', "~>" s.add_runtime_dependency "openbabel", "~>2.3.1.5" + s.add_runtime_dependency "rjb" ,"1.4.3" # error in 1.4.5 s.post_install_message = "Please configure your service in ~/.opentox/config/algorithm.rb" end diff --git a/application.rb b/application.rb index 1b9c776..f15aa48 100644 --- a/application.rb +++ b/application.rb @@ -1,17 +1,21 @@ # application.rb # Loads sub-repositories, library code, and webapps. # Author: Andreas Maunz +require 'statsample' # Require sub-Repositories -require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/libbbrc/bbrc') # include before openbabel -require File.join(File.expand_path(File.dirname(__FILE__)), 'libfminer/liblast/last') # -require File.join(File.expand_path(File.dirname(__FILE__)), 'last-utils/lu.rb') +require_relative 'libfminer/libbbrc/bbrc' # include before openbabel +require_relative 'libfminer/liblast/last' # +require_relative 'last-utils/lu.rb' # Library Code $logger.debug "Algorithm booting: #{$algorithm.collect{ |k,v| "#{k}: '#{v}'"} }" -Dir['./lib/algorithm/*.rb'].each { |f| require f } # Libs -Dir['./lib/*.rb'].each { |f| require f } # Libs -Dir['./webapp/*.rb'].each { |f| require f } # Webapps +Dir['./lib/algorithm/*.rb'].each { |f| require f; also_reload f } # Libs +Dir['./lib/*.rb'].each { |f| require f; also_reload f } # Libs +Dir['./webapp/*.rb'].each { |f| require f; also_reload f } # Webapps +require_relative "descriptor.rb" +also_reload "descriptor.rb" +#Dir['./webapp/pc-descriptors.rb'].each { |f| require f; also_reload f } # Webapps # Entry point module OpenTox @@ -24,7 +28,7 @@ module OpenTox to('/fminer/bbrc/match', :full), to('/fminer/last/match', :full), to('/fs/rfe', :full), - to('/pc', :full) ].join("\n") + "\n" + to('/descriptor') ].join("\n") + "\n" format_output (list) end end diff --git a/descriptor.rb b/descriptor.rb new file mode 100644 index 0000000..18b25a5 --- /dev/null +++ b/descriptor.rb @@ -0,0 +1,285 @@ +# descriptors.rb +# Calculation of physico-chemical descriptors +# Author: Andreas Maunz, Christoph Helma +require 'rjb' +require 'openbabel' + +module OpenTox + + class Application < Service + + ENV["JAVA_HOME"] ||= "/usr/lib/jvm/java-7-openjdk" + java_dir = File.join(File.dirname(__FILE__),"java") + jars = Dir[File.join(ENV["JAVA_HOME"],"lib","*.jar")] + jars += Dir[File.join(java_dir,"*jar")] + ENV["CLASSPATH"] = ([java_dir]+jars).join(":") + jars.each { |jar| Rjb::load jar } + + StringReader ||= Rjb::import "java.io.StringReader" + CDKMdlReader ||= Rjb::import "org.openscience.cdk.io.MDLReader" + CDKMolecule ||= Rjb::import "org.openscience.cdk.Molecule" + CDKDescriptorEngine ||= Rjb::import "org.openscience.cdk.qsar.DescriptorEngine" + #AromaticityDetector = Rjb::import 'org.openscience.cdk.aromaticity.CDKHueckelAromaticityDetector' + JOELIBHelper ||= Rjb::import 'joelib2.feature.FeatureHelper' + JOELIBFactory ||= Rjb::import 'joelib2.feature.FeatureFactory' + JOELIBSmilesParser ||= Rjb::import "joelib2.smiles.SMILESParser" + JOELIBTypeHolder ||= Rjb::import "joelib2.io.BasicIOTypeHolder" + JOELIBMolecule ||= Rjb::import "joelib2.molecule.BasicConformerMolecule" + + unless defined? DESCRIPTORS + + # initialize descriptors and features at startup to avoid duplication + descriptors = { :cdk => [], :openbabel => [], :joelib => [] } # use arrays to keep the sequence intact + + @@obmol = OpenBabel::OBMol.new + @@obconversion = OpenBabel::OBConversion.new + @@obconversion.set_in_format 'inchi' + @@cdk_engine = CDKDescriptorEngine.new(CDKDescriptorEngine.MOLECULAR) + + # OpenBabel + OpenBabel::OBDescriptor.list_as_string("descriptors").split("\n").each do |d| + title,description = d.split(/\s+/,2) + unless title =~ /cansmi|formula|InChI|smarts|title/ or title == "s" + title = "OpenBabel "+title + feature = OpenTox::Feature.find_or_create({ + RDF::DC.title => title, + RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature], + RDF::DC.description => description, + }, @subjectid) + descriptors[:openbabel] << { + :title => title, + :description => description, + :calculator => OpenBabel::OBDescriptor.find_type(title.split(" ").last), + :feature => feature + } + end + end + + # CDK + @@cdk_engine.getDescriptorClassNames.toArray.each do |d| + cdk_class = d.toString + title = "CDK "+cdk_class.split('.').last + description = @@cdk_engine.getDictionaryDefinition(cdk_class).gsub(/\s+/,' ').strip + " (Class: " + @@cdk_engine.getDictionaryClass(cdk_class).join(", ") + ")" + descriptor = { + :title => title, + :description => description, + :calculator => Rjb::import(cdk_class).new, + :features => [] + } + # CDK Descriptors may return more than one value + descriptor[:features] = descriptor[:calculator].getDescriptorNames.collect do |name| + feature = OpenTox::Feature.find_or_create({ + RDF::DC.title => "#{title} #{name}", + RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature], + RDF::DC.description => description + }, @subjectid) + end + descriptors[:cdk] << descriptor + end + + # JOELIB + factory = JOELIBFactory.instance + JOELIBHelper.instance.getNativeFeatures.toArray.each do |f| + joelib_class = f.toString + unless joelib_class == "joelib2.feature.types.GlobalTopologicalChargeIndex" + # CH: returns "joelib2.feature.types.atomlabel.AtomValence\n#{numeric value}" + # unsure if numeric_value is GlobalTopologicalChargeIndex or AtomValence + # excluded from descriptor list + title = "JOELib "+joelib_class.split('.').last + description = title # feature.getDescription.hasText returns false, feature.getDescription.getHtml returns unparsable content + feature = OpenTox::Feature.find_or_create({ + RDF::DC.title => title, + RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature], + RDF::DC.description => description, + }, @subjectid) + descriptors[:joelib] << { + :title => title, + :description => description, + :calculator => Rjb::import(joelib_class).new, + :feature => feature + } + end + end + DESCRIPTORS = descriptors + + end + + helpers do + + def openbabel compounds, descriptors + compounds.each do |compound| + @@obconversion.read_string @@obmol, compound.inchi + descriptors.each do |descriptor| + puts descriptor[:title] + @feature_dataset.add_data_entry compound, descriptor[:feature], fix_value(descriptor[:calculator].predict(@@obmol)) + end + end + end + + def cdk compounds, descriptors + @@obconversion.set_out_format 'sdf' + compounds.each do |compound| + @@obconversion.read_string @@obmol, compound.inchi + sdf = @@obconversion.write_string(@@obmol) + OpenBabel::OBOp.find_type("Gen3D").do(@@obmol) + sdf_3D = @@obconversion.write_string(@@obmol) + if sdf_3D.match(/.nan/) + warning = "3D generation failed for compound #{compound.uri} (using 2D structure)." + $logger.warn warning + @feature_dataset[RDF::OT.Warnings] ? @feature_dataset[RDF::OT.Warnings] << warning : @feature_dataset[RDF::OT.Warnings] = warning + else + sdf = sdf_3D + end + reader = CDKMdlReader.new(StringReader.new(sdf)) + cdk_compound = reader.read(CDKMolecule.new) + #AromaticityDetector.detectAromaticity(cdk_compound) + values = [] + descriptors.each do |descriptor| + puts descriptor[:title] + begin + result = descriptor[:calculator].calculate cdk_compound + result.getValue.toString.split(",").each_with_index do |value,i| + @feature_dataset.add_data_entry compound, descriptor[:features][i], fix_value(value) + end + rescue + $logger.error "#{descriptor[:title]} calculation failed with #{$!.message} for compound #{compound.uri}." + end + end + end + end + + def joelib compounds, descriptors + @@obconversion.set_out_format 'smi' + compounds.each do |compound| + mol = JOELIBMolecule.new(JOELIBTypeHolder.instance.getIOType("SMILES"), JOELIBTypeHolder.instance.getIOType("SMILES")) + @@obconversion.read_string @@obmol, compound.inchi + JOELIBSmilesParser.smiles2molecule mol, @@obconversion.write_string(@@obmol).strip, "Smiles: #{@@obconversion.write_string(@@obmol).strip}" + mol.addHydrogens + descriptors.each do |descriptor| + puts descriptor[:title] + puts descriptor[:calculator].toString#java_methods.inspect + puts descriptor[:calculator].calculate(mol).toString + @feature_dataset.add_data_entry compound, descriptor[:feature], fix_value(descriptor[:calculator].calculate(mol).toString) + end + end + end + + def fix_value val + #unless val.numeric? + if val.numeric? + val = Float(val) + val = nil if val.nan? or val.infinite? + end + val + end + end + + before '/descriptor/?*' do + if request.get? + @algorithm = OpenTox::Algorithm.new @uri + @algorithm.parameters = [ + { RDF::DC.description => "Dataset URI", + RDF::OT.paramScope => "optional", + RDF::DC.title => "dataset_uri" } , + { RDF::DC.description => "Compound URI", + RDF::OT.paramScope => "optional", + RDF::DC.title => "compound_uri" } + ] + @algorithm.metadata = { + RDF.type => [RDF::OTA.DescriptorCalculation], + } + elsif request.post? + @feature_dataset = Dataset.new nil, @subjectid + @feature_dataset.metadata = { + RDF::DC.title => "Physico-chemical descriptors", + RDF::DC.creator => @uri, + RDF::OT.hasSource => @uri, + } + if params[:compound_uri] + @feature_dataset.parameters = [ { RDF::DC.title => "compound_uri", RDF::OT.paramValue => params[:compound_uri] }] + elsif params[:dataset_uri] + @feature_dataset.parameters = [ { RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] }] + else + bad_request_error "Please provide a dataset_uri or compound_uri paramaeter", @uri + end + end + end + + before '/descriptor/:lib/:descriptor/?' do + @descriptors = DESCRIPTORS[params[:lib].to_sym].select{|d| d[:title].split(" ").last == params[:descriptor]} + bad_request_error "Unknown descriptor #{@uri}. See #{uri('descriptors')} for a complete list of supported descriptors.", @uri if @descriptors.empty? + @descriptor = @descriptors.first + end + + # Get a list of descriptor calculation + # @return [text/uri-list] URIs + get '/descriptor/?' do + DESCRIPTORS.collect{|lib,d| d.collect{|n| uri("/descriptors/#{lib}/#{n[:title].split(" ").last}")}}.flatten.sort.join("\n") + end + + get '/descriptor/:lib/?' do + DESCRIPTORS[params[:lib].to_sym].collect{|n| uri("/descriptors/#{params[:lib].to_sym}/#{n[:title].split(" ").last}")}.sort.join("\n") + end + + # Get representation of descriptor calculation + # @return [String] Representation + get '/descriptor/:lib/:descriptor/?' do + @algorithm[RDF::DC.title] = @descriptor[:title] + @algorithm[RDF::DC.description] = @descriptor[:description] + format_output(@algorithm) + end + + post '/descriptor/?' do + #task = OpenTox::Task.run "Calculating PC descriptors", @uri, @subjectid do |task| + puts "Task created" + if params[:descriptors] + descriptors = {} + params[:descriptors].each do |descriptor| + #lib, title = descriptor.split('/') + descriptors[lib.to_sym] ||= [] + descriptors[lib.to_sym] << DESCRIPTORS[lib.to_sym].select{|d| d[:title] == descriptor} + end + else + descriptors = DESCRIPTORS + end + if params[:compound_uri] + compounds = [ Compound.new(params[:compound_uri], @subjectid) ] + elsif params[:dataset_uri] + compounds = Dataset.new(params[:dataset_uri]).compounds + end + puts "Calculating" + [:openbabel, :cdk, :joelib].each{ |lib| puts lib; send lib, compounds, descriptors[lib]; puts lib.to_s+" finished" } + #[:joelib].each{ |lib| send lib, compounds, descriptors[lib]; puts lib.to_s+" finished" } + puts "saving file" + File.open("/home/ch/tmp.nt","w+"){|f| f.puts @feature_dataset.to_ntriples} + puts "saving "+@feature_dataset.uri + @feature_dataset.put + puts "finished" + @feature_dataset.uri + #end + #response['Content-Type'] = 'text/uri-list' + #halt 202, task.uri + end + + post '/descriptor/:lib/:descriptor/?' do + if params[:compound_uri] + compounds = [ Compound.new(params[:compound_uri], @subjectid) ] + send params[:lib].to_sym, compounds, @descriptors + @feature_dataset.put + @feature_dataset.uri + elsif params[:dataset_uri] + task = OpenTox::Task.run "Calculating PC descriptors", @uri, @subjectid do |task| + compounds = Dataset.new(params[:dataset_uri]).compounds + send params[:lib].to_sym, compounds, @descriptors + @feature_dataset.put + @feature_dataset.uri + end + response['Content-Type'] = 'text/uri-list' + halt 202, task.uri + end + end + + end + +end + diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 1a26322..fb47385 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -13,8 +13,7 @@ module OpenTox # return [Integer] min-frequency def self.min_frequency(training_dataset,prediction_feature,per_mil) nr_labeled_cmpds=0 - training_dataset.build_feature_positions - f_idx=training_dataset.feature_positions[prediction_feature.uri] + f_idx=training_dataset.features.collect{|f| f.uri}.index prediction_feature.uri training_dataset.compounds.each_with_index { |cmpd, c_idx| if ( training_dataset.data_entries[c_idx] ) unless training_dataset.data_entries[c_idx][f_idx].nil? diff --git a/lib/algorithm/feature_values.rb b/lib/algorithm/feature_values.rb index f08dea8..62373e7 100644 --- a/lib/algorithm/feature_values.rb +++ b/lib/algorithm/feature_values.rb @@ -13,7 +13,8 @@ module OpenTox # @param [Hash] keys: compound, feature_dataset, values: OpenTox::Compound, Array of SMARTS strings # @return [Array] Array with matching Smarts def self.match(params, subjectid) - features = params[:feature_dataset].features.collect{ |f| f[DC.title] } + features = params[:feature_dataset].features.collect{ |f| f[RDF::DC.title] } + puts features.inspect params[:compound].match(features) end @@ -21,7 +22,7 @@ module OpenTox # @param [Hash] keys: compound, feature_dataset, values: OpenTox::Compound, Array of SMARTS strings # @return [Hash] Hash with matching Smarts and number of hits def self.match_hits(params, subjectid) - features = params[:feature_dataset].features.collect{ |f| f[DC.title] }, + features = params[:feature_dataset].features.collect{ |f| f[RDF::DC.title] }, params[:compound].match_hits(features) end @@ -37,7 +38,7 @@ module OpenTox }.compact if cmpd_idxs.size > 0 # We have entries cmpd_numeric_f = ds.features.collect { |f| - f if f[RDF.type].include? OT.NumericFeature + f if f[RDF.type].include? RDF::OT.NumericFeature }.compact cmpd_data_entries = cmpd_idxs.collect { |idx| ds.data_entries[idx] @@ -61,7 +62,7 @@ module OpenTox params[:subjectid] = subjectid [:compound, :feature_dataset].each { |p| params.delete(p) }; [:pc_type, :lib].each { |p| params.delete(p) if params[p] == "" } single_cmpd_ds = OpenTox::Dataset.new(nil,subjectid) - single_cmpd_ds.parse_rdfxml(RestClient.post("#{$compound[:uri]}/#{cmpd_inchi}/pc", params, {:accept => "application/rdf+xml"})) + single_cmpd_ds.parse_rdfxml(RestClientWrapper.post(File.join($compound[:uri],cmpd_inchi,"pc"), params, {:accept => "application/rdf+xml"})) single_cmpd_ds.get(true) single_cmpd_ds.build_feature_positions cmpd_fingerprints = single_cmpd_ds.features.inject({}) { |h,f| diff --git a/lib/algorithm/fminer.rb b/lib/algorithm/fminer.rb index d969bef..5b0527a 100644 --- a/lib/algorithm/fminer.rb +++ b/lib/algorithm/fminer.rb @@ -22,16 +22,16 @@ module OpenTox # @param[Hash] parameters of the REST call # @param[Integer] per-mil value for min frequency - def check_params(params,per_mil,subjectid=nil) + def check_params(params,per_mil) bad_request_error "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil? - @training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", subjectid # AM: find is a shim + @training_dataset = OpenTox::Dataset.new "#{params[:dataset_uri]}", @subjectid unless params[:prediction_feature] # try to read prediction_feature from dataset resource_not_found_error "Please provide a prediction_feature parameter" unless @training_dataset.features.size == 1 params[:prediction_feature] = @training_dataset.features.first.uri end - @prediction_feature = OpenTox::Feature.find params[:prediction_feature], subjectid # AM: find is a shim + @prediction_feature = OpenTox::Feature.find params[:prediction_feature], @subjectid resource_not_found_error "No feature '#{params[:prediction_feature]}' in dataset '#{params[:dataset_uri]}'" unless - @training_dataset.find_feature( params[:prediction_feature] ) # AM: find_feature is a shim + @training_dataset.find_feature_uri( params[:prediction_feature] ) unless params[:min_frequency].nil? # check for percentage if params[:min_frequency].include? "pc" @@ -101,7 +101,7 @@ module OpenTox def add_fminer_data(fminer_instance, value_map) id=1 @training_dataset.compounds.each do |compound| - compound_activities = @training_dataset.find_data_entry(compound.uri, @prediction_feature.uri) + compound_activities = @training_dataset.values(compound, @prediction_feature) begin if @prediction_feature.feature_type == "classification" compound_activities = compound_activities.to_scale.mode @@ -164,21 +164,20 @@ module OpenTox end metadata = { - RDF.type => [OT.Feature, OT.Substructure, OT.NumericFeature], - OT.smarts => smarts.dup, - OT.pValue => p_value.abs.round(5), - OT.effect => effect + RDF.type => [RDF::OT.Feature, RDF::OT.Substructure, RDF::OT.NumericFeature], + RDF::OT.smarts => smarts.dup, + RDF::OT.pValue => p_value.abs.round(5), + RDF::OT.effect => effect } parameters = [ - { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, - { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] } + { RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] }, + { RDF::DC.title => "prediction_feature", RDF::OT.paramValue => params[:prediction_feature] } ] - metadata[OT.hasSource]=feature_dataset_uri if feature_dataset_uri + metadata[RDF::OT.hasSource]=feature_dataset_uri if feature_dataset_uri [ metadata, parameters ] end end - end end diff --git a/lib/algorithm/similarity.rb b/lib/algorithm/similarity.rb index 22b4c28..88eed35 100644 --- a/lib/algorithm/similarity.rb +++ b/lib/algorithm/similarity.rb @@ -15,6 +15,8 @@ module OpenTox # @param [Array] b fingerprints of second compound # @return [Float] Tanimoto similarity def self.tanimoto(a,b) + puts a.inspect + puts b.inspect common_p_sum = 0.0 all_p_sum = 0.0 size = [ a.size, b.size ].min diff --git a/lib/algorithm/transform.rb b/lib/algorithm/transform.rb index ec25526..c851f73 100644 --- a/lib/algorithm/transform.rb +++ b/lib/algorithm/transform.rb @@ -334,8 +334,13 @@ module OpenTox # @param[Array] training_props Propositionalized data for this neighbor # @param[Integer] Index of neighbor def add_neighbor(training_props, idx) + puts idx + #puts training_props.inspect + #puts @q_prop.inspect + #puts @model.acts[idx].to_s unless @model.acts[idx].nil? sim = similarity(training_props) + puts sim if sim > @model.min_sim.to_f @model.neighbors << { :compound => @cmpds[idx], @@ -381,7 +386,6 @@ module OpenTox end end - # Replaces nils by zeroes in n_prop and q_prop # Enables the use of Tanimoto similarities with arrays (rows of n_prop and q_prop) def convert_nils diff --git a/lib/dataset.rb b/lib/dataset.rb index e7588ae..a42356b 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -12,9 +12,9 @@ module OpenTox # @param [Hash] uri Compound URI, Feature URI # @return [Object] activity Database activity, or nil def database_activity(params) - f=Feature.find(params[:prediction_feature_uri],params[:subjectid]) - db_act = find_data_entry(params[:compound_uri], params[:prediction_feature_uri]) - if db_act + f=Feature.new params[:prediction_feature_uri], @subjectid + db_act = values(Compound.new(params[:compound_uri]), f) + if !db_act.empty? if f.feature_type == "classification" db_act = db_act.to_scale.mode.dup else diff --git a/lib/lazar.rb b/lib/lazar.rb new file mode 100644 index 0000000..f731544 --- /dev/null +++ b/lib/lazar.rb @@ -0,0 +1,222 @@ +=begin +* Name: lazar.rb +* Description: Lazar model representation +* Author: Andreas Maunz <andreas@maunz.de>, Christoph Helma +* Date: 10/2012 +=end + +module OpenTox + + class LazarPrediction < Model + + attr_accessor :prediction_dataset + + def initialize(params) + @prediction_dataset = OpenTox::Dataset.new(nil, @subjectid) + # set instance variables and prediction dataset parameters from parameters + params.each {|k,v| + self.class.class_eval { attr_accessor k.to_sym } + instance_variable_set "@#{k}", v + @prediction_dataset.parameters << {RDF::DC.title => k, RDF::OT.paramValue => v} + } + ["cmpds", "fps", "acts", "n_prop", "q_prop", "neighbors"].each {|k| + self.class.class_eval { attr_accessor k.to_sym } + instance_variable_set("@#{k}", []) + } + + @prediction_feature = OpenTox::Feature.new(@prediction_feature_uri,@subjectid) + # TODO: set feature type + @predicted_variable = OpenTox::Feature.find_or_create({RDF::DC.title => "#{@prediction_feature.title} prediction", RDF.type => @prediction_feature[RDF.type]}, @subjectid) + @predicted_confidence = OpenTox::Feature.find_or_create({RDF::DC.title => "#{@prediction_feature.title} confidence", RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature]}, @subjectid) + + @prediction_dataset.metadata = { + RDF::DC.title => "Lazar prediction for #{@prediction_feature.title}", + RDF::DC.creator => @model_uri, + RDF::OT.hasSource => @model_uri, + RDF::OT.dependentVariables => @prediction_feature_uri, + RDF::OT.predictedVariables => [@predicted_variable.uri,@predicted_confidence.uri] + } + + @training_dataset = OpenTox::Dataset.new(@training_dataset_uri,@subjectid) + + @feature_dataset = OpenTox::Dataset.new(@feature_dataset_uri, @subjectid) + bad_request_error "No features found in feature dataset #{@feature_dataset.uri}." if @feature_dataset.features.empty? + + @similarity_feature = OpenTox::Feature.find_or_create({RDF::DC.title => "#{@similarity_algorithm.capitalize} similarity", RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature]}, @subjectid) + + @prediction_dataset.features = [ @predicted_variable, @predicted_confidence, @prediction_feature, @similarity_feature ] + + prediction_feature_pos = @training_dataset.features.collect{|f| f.uri}.index @prediction_feature.uri + + if @dataset_uri + compounds = OpenTox::Dataset.find(@dataset_uri).compounds + else + compounds = [ OpenTox::Compound.new(@compound_uri) ] + end + compounds.each do |compound| + + puts compound.smiles + database_activity = @training_dataset.database_activity(params) + if database_activity + @prediction_dataset.add_data_entry compound, @prediction_feature, database_activity + next + else + #pc_type = @feature_dataset.parameters["pc_type"] + #@model.pc_type = pc_type unless pc_type.nil? + #lib = @feature_dataset.parameters["lib"] + #@model.lib = lib unless lib.nil? + + # AM: transform to cosine space + @min_sim = (@min_sim.to_f*2.0-1.0).to_s if @similarity_algorithm =~ /cosine/ + + compound_params = { + :compound => compound, + :feature_dataset => @feature_dataset, + # TODO: fix in algorithm/lib/algorithm/feature_values.rb + #:pc_type => @model.pc_type, + #:lib => @model.lib + } + compound_fingerprints = OpenTox::Algorithm::FeatureValues.send( @feature_calculation_algorithm, compound_params, @subjectid ) + puts compound_fingerprints.inspect + @training_dataset.compounds.each_with_index { |cmpd, idx| + act = @training_dataset.data_entries[idx][prediction_feature_pos] + @acts << (@prediction_feature.feature_type=="classification" ? @prediction_feature.value_map.invert[act] : nil) + @n_prop << @feature_dataset.data_entries[idx]#.collect.to_a + @cmpds << cmpd.uri + } + #puts "COMPOUNDS" + #puts @n_prop.inspect + puts @cmpds.inspect + puts @fps.inspect + puts @acts.inspect + puts @n_prop.inspect + puts @q_prop.inspect + + @q_prop = @feature_dataset.features.collect { |f| + val = compound_fingerprints[f.title] + bad_request_error "Can not parse value '#{val}' to numeric" if val and !val.numeric? + val ? val.to_f : 0.0 + } # query structure + + mtf = OpenTox::Algorithm::Transform::ModelTransformer.new(self) + mtf.transform + + prediction = OpenTox::Algorithm::Neighbors.send(@prediction_algorithm, + { :props => mtf.props, + :acts => mtf.acts, + :sims => mtf.sims, + :value_map => @prediction_feature.feature_type=="classification" ? @prediction_feature.value_map : nil, + :min_train_performance => @min_train_performance + } ) + + puts prediction.inspect + predicted_value = prediction[:prediction].to_f + confidence_value = prediction[:confidence].to_f + + # AM: transform to original space + confidence_value = ((confidence_value+1.0)/2.0).abs if @similarity_algorithm =~ /cosine/ + predicted_value = @prediction_feature.value_map[prediction[:prediction].to_i] if @prediction_feature.feature_type == "classification" + + end + + @prediction_dataset.add_data_entry compound, @predicted_variable, predicted_value + @prediction_dataset.add_data_entry compound, @predicted_confidence, confidence_value + + if @compound_uri # add neighbors only for compound predictions + @neighbors.each do |neighbor| + n = OpenTox::Compound.new(neighbor[:compound]) + @prediction_dataset.add_data_entry n, @prediction_feature, @prediction_feature.value_map[neighbor[:activity]] + @prediction_dataset.add_data_entry n, @similarity_feature, neighbor[:similarity] + #@prediction_dataset << [ n, @prediction_feature.value_map[neighbor[:activity]], nil, nil, neighbor[:similarity] ] + end + end + + end # iteration over compounds + @prediction_dataset.put + + end + + end + + class Model + + # Check parameters for plausibility + # Prepare lazar object (includes graph mining) + # @param[Array] lazar parameters as strings + # @param[Hash] REST parameters, as input by user + def create_model(params) + + training_dataset = OpenTox::Dataset.new(params[:dataset_uri], @subjectid) + @parameters << {RDF::DC.title => "training_dataset_uri", RDF::OT.paramValue => training_dataset.uri} + + # TODO: This is inconsistent, it would be better to have prediction_feature_uri in the API + if params[:prediction_feature] + resource_not_found_error "No feature '#{params[:prediction_feature]}' in dataset '#{params[:dataset_uri]}'" unless training_dataset.find_feature_uri( params[:prediction_feature] ) + else # try to read prediction_feature from dataset + resource_not_found_error "Please provide a prediction_feature parameter" unless training_dataset.features.size == 1 + params[:prediction_feature] = training_dataset.features.first.uri + end + self[RDF::OT.trainingDataset] = training_dataset.uri + prediction_feature = OpenTox::Feature.new(params[:prediction_feature], @subjectid) + self[RDF::DC.title] = prediction_feature.title + @parameters << {RDF::DC.title => "prediction_feature_uri", RDF::OT.paramValue => prediction_feature.uri} + self[RDF::OT.dependentVariables] = prediction_feature.uri + + bad_request_error "Unknown prediction_algorithm #{params[:prediction_algorithm]}" if params[:prediction_algorithm] and !OpenTox::Algorithm::Neighbors.respond_to?(params[:prediction_algorithm]) + @parameters << {RDF::DC.title => "prediction_algorithm", RDF::OT.paramValue => params[:prediction_algorithm]} if params[:prediction_algorithm] + + confidence_feature = OpenTox::Feature.find_or_create({RDF::DC.title => "predicted_confidence", RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature]}, @subjectid) + self[RDF::OT.predictedVariables] = [ prediction_feature.uri, confidence_feature.uri ] + case prediction_feature.feature_type + when "classification" + @parameters << {RDF::DC.title => "prediction_algorithm", RDF::OT.paramValue => "weighted_majority_vote"} unless parameter_value "prediction_algorithm" + self[RDF.type] = [RDF::OT.Model, RDF::OTA.ClassificationLazySingleTarget] + when "regression" + @parameters << {RDF::DC.title => "prediction_algorithm", RDF::OT.paramValue => "local_svm_regression"} unless parameter_value "prediction_algorithm" + self[RDF.type] = [RDF::OT.Model, RDF::OTA.RegressionLazySingleTarget] + end + parameter_value("prediction_algorithm") =~ /majority_vote/ ? @parameters << {RDF::DC.title => "propositionalized", RDF::OT.paramValue => false} : @parameters << {RDF::DC.title => "propositionalized", RDF::OT.paramValue => true} + + @parameters << {RDF::DC.title => "min_sim", RDF::OT.paramValue => params[:min_sim].to_f} if params[:min_sim] and params[:min_sim].numeric? + @parameters << {RDF::DC.title => "feature_generation_uri", RDF::OT.paramValue => params[:feature_generation_uri]} + #@parameters["nr_hits"] = params[:nr_hits] + case params["feature_generation_uri"] + when /fminer/ + if (params[:nr_hits] == "true") + @parameters << {RDF::DC.title => "feature_calculation_algorithm", RDF::OT.paramValue => "match_hits"} + else + @parameters << {RDF::DC.title => "feature_calculation_algorithm", RDF::OT.paramValue => "match"} + end + @parameters << {RDF::DC.title => "similarity_algorithm", RDF::OT.paramValue => "tanimoto"} + @parameters << {RDF::DC.title => "min_sim", RDF::OT.paramValue => 0.3} unless parameter_value("min_sim") + when /descriptors/ + @parameters << {RDF::DC.title => "feature_calculation_algorithm", RDF::OT.paramValue => "lookup"} + @parameters << {RDF::DC.title => "similarity_algorithm", RDF::OT.paramValue => "cosine"} + @parameters << {RDF::DC.title => "min_sim", RDF::OT.paramValue => 0.7} unless parameter_value("min_sim") + end + + #TODO: check if these parameters are necessary with new version + #set_parameter("pc_type", params[:pc_type] if params[:pc_type] + #set_parameter("lib", params[:lib] if params[:lib] + + bad_request_error "Parameter min_train_performance is not numeric." if params[:min_train_performance] and !params[:min_train_performance].numeric? + @parameters << {RDF::DC.title => "min_train_performance", RDF::OT.paramValue => params[:min_train_performance].to_f} if params[:min_train_performance] and params[:min_train_performance].numeric? + @parameters << {RDF::DC.title => "min_train_performance", RDF::OT.paramValue => 0.1} unless parameter_value("min_train_performance") + + if params[:feature_dataset_uri] + bad_request_error "Feature dataset #{params[:feature_dataset_uri]} does not exist." unless URI.accessible? params[:feature_dataset_uri] + @parameters << {RDF::DC.title => "feature_dataset_uri", RDF::OT.paramValue => params[:feature_dataset_uri]} + self[RDF::OT.featureDataset] = params["feature_dataset_uri"] + else + # run feature generation algorithm + feature_dataset_uri = OpenTox::Algorithm.new(params[:feature_generation_uri]).run(params) + @parameters << {RDF::DC.title => "feature_dataset_uri", RDF::OT.paramValue => feature_dataset_uri} + self[RDF::OT.featureDataset] = feature_dataset_uri + end + + end + + end + +end + diff --git a/lib/model.rb b/lib/model.rb deleted file mode 100644 index a808aa7..0000000 --- a/lib/model.rb +++ /dev/null @@ -1,151 +0,0 @@ -=begin -* Name: lazar.rb -* Description: Lazar model representation -* Author: Andreas Maunz <andreas@maunz.de> -* Date: 10/2012 -=end - -module OpenTox - - class Model - - def initialize(*args) - if args.size == 2 - super(*args)# We have uri and subjectid - end - if args.size == 1 - prepare_prediction_model(args[0]) # We have a hash (prediction time) - end - end - - # Internal use only - def prepare_prediction_model(params) - puts params.inspect - params.each {|k,v| - self.class.class_eval { attr_accessor k.to_sym } - instance_variable_set(eval(":@"+k), v) - } - ["cmpds", "fps", "acts", "n_prop", "q_prop", "neighbors"].each {|k| - self.class.class_eval { attr_accessor k.to_sym } - instance_variable_set(eval(":@"+k), []) - } - end - private :prepare_prediction_model - - # Fills model in with data for prediction - # Avoids associative lookups, since canonization to InChI takes time - # @param [OpenTox::Dataset] training dataset - # @param [OpenTox::Dataset] feature dataset - # @param [OpenTox::Feature] prediction feature - # @param [Hash] compound fingerprints - # @param [String] subjectid - def add_data(training_dataset, feature_dataset, prediction_feature, compound_fingerprints, subjectid) - training_dataset.build_feature_positions - prediction_feature_pos = training_dataset.feature_positions[prediction_feature.uri] - training_dataset.compounds.each_with_index { |cmpd, idx| - act = training_dataset.data_entries[idx][prediction_feature_pos] - @acts << (prediction_feature.feature_type=="classification" ? - training_dataset.value_map(prediction_feature).invert[act] : nil) - @n_prop << feature_dataset.data_entries[idx].collect.to_a - @cmpds << cmpd.uri - } - @q_prop = feature_dataset.features.collect { |f| - val = compound_fingerprints[f.title] - bad_request_error "Can not parse value '#{val}' to numeric" if val and !val.numeric? - val ? val.to_f : 0.0 - } # query structure - end - - - # Check parameters for plausibility - # Prepare lazar object (includes graph mining) - # @param[Array] lazar parameters as strings - # @param[Hash] REST parameters, as input by user - def check_params(lazar_params, params) - - unless params[:feature_generation_uri] - bad_request_error "Please provide a feature generation uri" - end - feature_generation_uri = params[:feature_generation_uri] - - unless training_dataset = OpenTox::Dataset.find(params[:dataset_uri], @subjectid) # AM: find is a shim - resource_not_found_error "Dataset '#{params[:dataset_uri]}' not found." - end - training_dataset_uri = training_dataset.uri - - unless params[:prediction_feature] # try to read prediction_feature from dataset - resource_not_found_error "Please provide a prediction_feature parameter" unless training_dataset.features.size == 1 - params[:prediction_feature] = training_dataset.features.first.uri - end - - unless training_dataset.find_feature( params[:prediction_feature] ) # AM: find_feature is a shim - resource_not_found_error "No feature '#{params[:prediction_feature]}' in dataset '#{params[:dataset_uri]}'" - end - prediction_feature = OpenTox::Feature.find(params[:prediction_feature], @subjectid) # AM: find is a shim - prediction_feature_uri = prediction_feature.uri - - if params[:feature_dataset_uri] - feature_dataset_uri = params[:feature_dataset_uri] - else - feature_dataset_uri = OpenTox::Algorithm.new(feature_generation_uri).run(params) - end - - if (feature_generation_uri =~ /fminer/) - feature_calculation_algorithm = "match" - if (params[:nr_hits] == "true") - feature_calculation_algorithm = "match_hits" - end - elsif feature_generation_uri =~ /dataset.*\/pc/ - feature_calculation_algorithm = "lookup" - end - - if feature_calculation_algorithm == "lookup" - similarity_algorithm = "cosine" - min_sim = 0.7 - elsif feature_calculation_algorithm =~ /match/ - similarity_algorithm = "tanimoto" - min_sim = 0.3 - end - if params[:min_sim] and params[:min_sim].numeric? - min_sim = params[:min_sim].to_f # AM: frequent manual option - end - - if prediction_feature.feature_type == "classification" - prediction_algorithm = "weighted_majority_vote" - elsif prediction_feature.feature_type == "regression" - prediction_algorithm = "local_svm_regression" - end - if params[:prediction_algorithm] and OpenTox::Algorithm::Neighbors.respond_to? params[:prediction_algorithm] - prediction_algorithm = params[:prediction_algorithm] # AM: frequent manual option - end - - propositionalized = true - if prediction_algorithm =~ /majority_vote/ - propositionalized = false - end - - if params[:pc_type] - pc_type = params[:pc_type] - end - - if params[:lib] - lib = params[:lib] - end - - min_train_performance = $lazar_min_train_performance_default - if params[:min_train_performance] and params[:min_train_performance].numeric? - min_train_performance = params[:min_train_performance].to_f # AM: frequent manual option - end - - - lazar_params.collect { |p| - val = eval(p) - { DC.title => p, OT.paramValue => (val.nil? ? "" : val) } - }.compact - end - - - end - -end - diff --git a/webapp/fminer.rb b/webapp/fminer.rb index 379a863..2d11dcb 100644 --- a/webapp/fminer.rb +++ b/webapp/fminer.rb @@ -14,7 +14,6 @@ ENV['FMINER_NR_HITS'] = 'true' @@bbrc = Bbrc::Bbrc.new @@last = Last::Last.new - module OpenTox class Application < Service @@ -22,11 +21,11 @@ module OpenTox # Get list of fminer algorithms # @return [text/uri-list] URIs get '/fminer/?' do - list = [ to('/fminer/bbrc', :full), - to('/fminer/bbrc/sample', :full), - to('/fminer/last', :full), - to('/fminer/bbrc/match', :full), - to('/fminer/last/match', :full) + list = [ uri('/fminer/bbrc'), + uri('/fminer/bbrc/sample'), + uri('/fminer/last'), + uri('/fminer/bbrc/match'), + uri('/fminer/last/match') ].join("\n") + "\n" format_output(list) end @@ -36,19 +35,19 @@ module OpenTox get "/fminer/bbrc/?" do algorithm = OpenTox::Algorithm.new(to('/fminer/bbrc',:full)) algorithm.metadata = { - DC.title => 'Backbone Refinement Class Representatives', - DC.creator => "andreas@maunz.de", - RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised] + RDF::DC.title => 'Backbone Refinement Class Representatives', + RDF::DC.creator => "andreas@maunz.de", + RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised] } algorithm.parameters = [ - { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, - { DC.description => "Feature URI for dependent variable", OT.paramScope => "optional", DC.title => "prediction_feature" }, - { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" }, - { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" }, - { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" }, - { DC.description => "Significance threshold (between 0 and 1)", OT.paramScope => "optional", DC.title => "min_chisq_significance" }, - { DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", OT.paramScope => "optional", DC.title => "nr_hits" }, - { DC.description => "Set to 'true' to obtain target variables as a feature", OT.paramScope => "optional", DC.title => "get_target" } + { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" }, + { RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" }, + { RDF::DC.description => "Minimum frequency", RDF::OT.paramScope => "optional", RDF::DC.title => "min_frequency" }, + { RDF::DC.description => "Feature type, can be 'paths' or 'trees'", RDF::OT.paramScope => "optional", RDF::DC.title => "feature_type" }, + { RDF::DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", RDF::OT.paramScope => "optional", RDF::DC.title => "backbone" }, + { RDF::DC.description => "Significance threshold (between 0 and 1)", RDF::OT.paramScope => "optional", RDF::DC.title => "min_chisq_significance" }, + { RDF::DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", RDF::OT.paramScope => "optional", RDF::DC.title => "nr_hits" }, + { RDF::DC.description => "Set to 'true' to obtain target variables as a feature", RDF::OT.paramScope => "optional", RDF::DC.title => "get_target" } ] format_output(algorithm) end @@ -58,19 +57,19 @@ module OpenTox get "/fminer/bbrc/sample/?" do algorithm = OpenTox::Algorithm.new(to('/fminer/bbrc/sample',:full)) algorithm.metadata = { - DC.title => 'Backbone Refinement Class Representatives, obtained from samples of a dataset', - DC.creator => "andreas@maunz.de", - RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised] + RDF::DC.title => 'Backbone Refinement Class Representatives, obtained from samples of a dataset', + RDF::DC.creator => "andreas@maunz.de", + RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised] } algorithm.parameters = [ - { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, - { DC.description => "Feature URI for dependent variable", OT.paramScope => "optional", DC.title => "prediction_feature" }, - { DC.description => "Number of bootstrap samples", OT.paramScope => "optional", DC.title => "num_boots" }, - { DC.description => "Minimum sampling support", OT.paramScope => "optional", DC.title => "min_sampling_support" }, - { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" }, - { DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", OT.paramScope => "optional", DC.title => "nr_hits" }, - { DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", OT.paramScope => "optional", DC.title => "backbone" }, - { DC.description => "Chisq estimation method, pass 'mean' to use simple mean estimate for chisq test.", OT.paramScope => "optional", DC.title => "method" } + { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" }, + { RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" }, + { RDF::DC.description => "Number of bootstrap samples", RDF::OT.paramScope => "optional", RDF::DC.title => "num_boots" }, + { RDF::DC.description => "Minimum sampling support", RDF::OT.paramScope => "optional", RDF::DC.title => "min_sampling_support" }, + { RDF::DC.description => "Minimum frequency", RDF::OT.paramScope => "optional", RDF::DC.title => "min_frequency" }, + { RDF::DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", RDF::OT.paramScope => "optional", RDF::DC.title => "nr_hits" }, + { RDF::DC.description => "BBRC classes, pass 'false' to switch off mining for BBRC representatives.", RDF::OT.paramScope => "optional", RDF::DC.title => "backbone" }, + { RDF::DC.description => "Chisq estimation method, pass 'mean' to use simple mean estimate for chisq test.", RDF::OT.paramScope => "optional", RDF::DC.title => "method" } ] format_output(algorithm) end @@ -80,17 +79,17 @@ module OpenTox get "/fminer/last/?" do algorithm = OpenTox::Algorithm.new(to('/fminer/last',:full)) algorithm.metadata = { - DC.title => 'Latent Structure Pattern Mining descriptors', - DC.creator => "andreas@maunz.de", - RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised] + RDF::DC.title => 'Latent Structure Pattern Mining descriptors', + RDF::DC.creator => "andreas@maunz.de", + RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised] } algorithm.parameters = [ - { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, - { DC.description => "Feature URI for dependent variable", OT.paramScope => "optional", DC.title => "prediction_feature" }, - { DC.description => "Minimum frequency", OT.paramScope => "optional", DC.title => "min_frequency" }, - { DC.description => "Feature type, can be 'paths' or 'trees'", OT.paramScope => "optional", DC.title => "feature_type" }, - { DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", OT.paramScope => "optional", DC.title => "nr_hits" }, - { DC.description => "Set to 'true' to obtain target variables as a feature", OT.paramScope => "optional", DC.title => "get_target" } + { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" }, + { RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" }, + { RDF::DC.description => "Minimum frequency", RDF::OT.paramScope => "optional", RDF::DC.title => "min_frequency" }, + { RDF::DC.description => "Feature type, can be 'paths' or 'trees'", RDF::OT.paramScope => "optional", RDF::DC.title => "feature_type" }, + { RDF::DC.description => "Whether subgraphs should be weighted with their occurrence counts in the instances (frequency)", RDF::OT.paramScope => "optional", RDF::DC.title => "nr_hits" }, + { RDF::DC.description => "Set to 'true' to obtain target variables as a feature", RDF::OT.paramScope => "optional", RDF::DC.title => "get_target" } ] format_output(algorithm) end @@ -101,14 +100,14 @@ module OpenTox get "/fminer/:method/match?" do algorithm = OpenTox::Algorithm.new(to("/fminer/#{params[:method]}/match",:full)) algorithm.metadata = { - DC.title => 'fminer feature matching', - DC.creator => "mguetlein@gmail.com, andreas@maunz.de", - RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised] + RDF::DC.title => 'fminer feature matching', + RDF::DC.creator => "mguetlein@gmail.com, andreas@maunz.de", + RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised] } algorithm.parameters = [ - { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, - { DC.description => "Feature Dataset URI", OT.paramScope => "mandatory", DC.title => "feature_dataset_uri" }, - { DC.description => "Feature URI for dependent variable", OT.paramScope => "optional", DC.title => "prediction_feature" } + { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" }, + { RDF::DC.description => "Feature Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "feature_dataset_uri" }, + { RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" } ] format_output(algorithm) end @@ -128,18 +127,10 @@ module OpenTox # @return [text/uri-list] Task URI post '/fminer/bbrc/?' do - @@fminer=OpenTox::Algorithm::Fminer.new(to('/fminer/bbrc',:full)) - @@fminer.check_params(params,5,@subjectid) + @@fminer.check_params(params,5) - task = OpenTox::Task.create( - $task[:uri], - @subjectid, - { RDF::DC.description => "Mining BBRC features", - RDF::DC.creator => to('/fminer/bbrc',:full) - } - ) do |task| - + task = OpenTox::Task.run("Mining BBRC features", uri('/fminer/bbrc'), @subjectid) do |task| @@bbrc.Reset if @@fminer.prediction_feature.feature_type == "regression" @@ -149,7 +140,7 @@ module OpenTox "dataset '#{@@fminer.training_dataset.uri}' and "\ "feature '#{@@fminer.prediction_feature.uri}'" unless @@fminer.prediction_feature.accept_values - value_map=@@fminer.training_dataset.value_map(@@fminer.prediction_feature) + value_map=@@fminer.prediction_feature.value_map end @@bbrc.SetMinfreq(@@fminer.minfreq) @@bbrc.SetType(1) if params[:feature_type] == "paths" @@ -157,21 +148,20 @@ module OpenTox @@bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance] @@bbrc.SetConsoleOut(false) - feature_dataset = OpenTox::Dataset.new(nil, @subjectid) feature_dataset.metadata = { - DC.title => "BBRC representatives", - DC.creator => to('/fminer/bbrc',:full), - OT.hasSource => to('/fminer/bbrc', :full), + RDF::DC.title => "BBRC representatives", + RDF::DC.creator => to('/fminer/bbrc',:full), + RDF::OT.hasSource => to('/fminer/bbrc', :full), } feature_dataset.parameters = [ - { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, - { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }, - { DC.title => "min_frequency", OT.paramValue => @@fminer.minfreq }, - { DC.title => "nr_hits", OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") }, - { DC.title => "backbone", OT.paramValue => (params[:backbone] == "false" ? "false" : "true") } + { RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] }, + { RDF::DC.title => "prediction_feature", RDF::OT.paramValue => params[:prediction_feature] }, + { RDF::DC.title => "min_frequency", RDF::OT.paramValue => @@fminer.minfreq }, + { RDF::DC.title => "nr_hits", RDF::OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") }, + { RDF::DC.title => "backbone", RDF::OT.paramValue => (params[:backbone] == "false" ? "false" : "true") } ] - + @@fminer.compounds = [] @@fminer.db_class_sizes = Array.new # AM: effect @@fminer.all_activities = Hash.new # DV: for effect calculation in regression part @@ -219,17 +209,16 @@ module OpenTox end end - #feature_uri = File.join feature_dataset.uri,"feature","bbrc", features.size.to_s unless features_smarts.include? smarts features_smarts << smarts - metadata = { - OT.hasSource => to('/fminer/bbrc', :full), - RDF.type => [OT.Feature, OT.Substructure, OT.NumericFeature], - OT.smarts => smarts.dup, - OT.pValue => p_value.to_f.abs.round(5), - OT.effect => effect - } - feature = OpenTox::Feature.find_by_title(smarts.dup,metadata) + feature = OpenTox::Feature.find_or_create({ + RDF::DC.title => smarts.dup, + RDF::OT.hasSource => to('/fminer/bbrc', :full), + RDF.type => [RDF::OT.Feature, RDF::OT.Substructure, RDF::OT.NumericFeature], + RDF::OT.smarts => smarts.dup, + RDF::OT.pValue => p_value.to_f.abs.round(5), + RDF::OT.effect => effect + }, @subjectid) features << feature end @@ -247,9 +236,8 @@ module OpenTox end # end of end # feature parsing - fminer_compounds = @@fminer.training_dataset.compounds.collect.to_a - @@fminer.training_dataset.build_feature_positions - prediction_feature_idx = @@fminer.training_dataset.feature_positions[@@fminer.prediction_feature.uri] + fminer_compounds = @@fminer.training_dataset.compounds + prediction_feature_idx = @@fminer.training_dataset.features.collect{|f| f.uri}.index @@fminer.prediction_feature.uri prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx| @@fminer.training_dataset.data_entries[idx][prediction_feature_idx] } @@ -257,29 +245,65 @@ module OpenTox feature_dataset.features = features if (params[:get_target] == "true") + puts "get_target TRUE" feature_dataset.features = [ @@fminer.prediction_feature ] + feature_dataset.features + else + puts "get_target FALSE" end + feature_dataset.compounds = fminer_compounds fminer_compounds.each_with_index { |c,idx| - row = [ c ] - if (params[:get_target] == "true") - row = row + [ prediction_feature_all_acts[idx] ] - end + #puts c.smiles + # TODO: fix here, insert with add_data_entry + #row = [ c ] + # TODO: reenable option + #if (params[:get_target] == "true") + #row = row + [ prediction_feature_all_acts[idx] ] + #end features.each { |f| - row << (fminer_results[c] ? fminer_results[c][f.uri] : nil) + #m = c.match([f.title])[f.title] + #m = 0 unless m + v = fminer_results[c][f.uri] if fminer_results[c] + unless fminer_noact_compounds.include? c + v = 0 if v.nil? + end + #unless m == v + #puts f.title + #puts m + #puts v + #end + feature_dataset.add_data_entry c, f, v.to_i + #row << (fminer_results[c] ? fminer_results[c][f.uri] : nil) } - row.collect! { |v| v ? v : 0 } unless fminer_noact_compounds.include? c - feature_dataset << row + #row.collect! { |v| v ? v : 0 } unless fminer_noact_compounds.include? c + #feature_dataset << row } + +=begin +CH: Ordering seems to be ok here + feature_dataset.compounds.each_with_index do |c,i| + feature_dataset.features.each_with_index do |f,j| + m = c.match([f.title])[f.title] + #puts c.smiles + #puts f.title + #puts m.inspect + v = feature_dataset.data_entries[i][j] + #puts v.inspect + unless m.to_i == v.to_i + puts f.title + puts m.to_i + puts v.to_i + end + end + end +=end - $logger.debug "fminer found #{feature_dataset.features.size} features for #{feature_dataset.compounds.size} compounds" - feature_dataset.put @subjectid - $logger.debug feature_dataset.uri + #puts feature_dataset.to_csv + feature_dataset.put feature_dataset.uri end response['Content-Type'] = 'text/uri-list' - service_unavailable_error "Service unavailable" if task.cancelled? - halt 202,task.uri.to_s+"\n" + halt 202,task.uri end @@ -298,15 +322,9 @@ module OpenTox post '/fminer/last/?' do @@fminer=OpenTox::Algorithm::Fminer.new(to('/fminer/last',:full)) - @@fminer.check_params(params,80,@subjectid) + @@fminer.check_params(params,80) - task = OpenTox::Task.create( - $task[:uri], - @subjectid, - { RDF::DC.description => "Mining LAST features", - RDF::DC.creator => to('/fminer/last',:full) - } - ) do |task| + task = OpenTox::Task.run("Mining LAST features", uri('/fminer/last'), @subjectid) do |task| @@last.Reset if @@fminer.prediction_feature.feature_type == "regression" @@ -316,7 +334,7 @@ module OpenTox "dataset '#{fminer.training_dataset.uri}' and "\ "feature '#{fminer.prediction_feature.uri}'" unless @@fminer.prediction_feature.accept_values - value_map=@@fminer.training_dataset.value_map(@@fminer.prediction_feature) + value_map=@@fminer.prediction_feature.value_map end @@last.SetMinfreq(@@fminer.minfreq) @@last.SetType(1) if params[:feature_type] == "paths" @@ -325,15 +343,15 @@ module OpenTox feature_dataset = OpenTox::Dataset.new(nil, @subjectid) feature_dataset.metadata = { - DC.title => "LAST representatives for " + @@fminer.training_dataset.metadata[DC.title].to_s, - DC.creator => to('/fminer/last',:full), - OT.hasSource => to('/fminer/last', :full) + RDF::DC.title => "LAST representatives for " + @@fminer.training_dataset.metadata[RDF::DC.title].to_s, + RDF::DC.creator => to('/fminer/last'), + RDF::OT.hasSource => to('/fminer/last') } feature_dataset.parameters = [ - { DC.title => "dataset_uri", OT.paramValue => params[:dataset_uri] }, - { DC.title => "prediction_feature", OT.paramValue => params[:prediction_feature] }, - { DC.title => "min_frequency", OT.paramValue => @@fminer.minfreq }, - { DC.title => "nr_hits", OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") } + { RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] }, + { RDF::DC.title => "prediction_feature", RDF::OT.paramValue => params[:prediction_feature] }, + { RDF::DC.title => "min_frequency", RDF::OT.paramValue => @@fminer.minfreq }, + { RDF::DC.title => "nr_hits", RDF::OT.paramValue => (params[:nr_hits] == "true" ? "true" : "false") } ] @@fminer.compounds = [] @@ -366,7 +384,8 @@ module OpenTox fminer_results = {} matches.each do |smarts, ids| metadata, parameters = @@fminer.calc_metadata(smarts, ids, counts[smarts], @@last, nil, value_map, params) - feature = OpenTox::Feature.find_by_title(smarts.dup,metadata) + metadata[RDF::DC.title] = smarts.dup + feature = OpenTox::Feature.find_or_create(metadata, @subjectid) features << feature ids.each_with_index { |id,idx| fminer_results[@@fminer.compounds[id]] || fminer_results[@@fminer.compounds[id]] = {} @@ -374,9 +393,8 @@ module OpenTox } end - fminer_compounds = @@fminer.training_dataset.compounds.collect.to_a - @@fminer.training_dataset.build_feature_positions - prediction_feature_idx = @@fminer.training_dataset.feature_positions[@@fminer.prediction_feature.uri] + fminer_compounds = @@fminer.training_dataset.compounds + prediction_feature_idx = @@fminer.training_dataset.features.collect{|f| f.uri}.index @@fminer.prediction_feature.uri prediction_feature_all_acts = fminer_compounds.each_with_index.collect { |c,idx| @@fminer.training_dataset.data_entries[idx][prediction_feature_idx] } @@ -387,6 +405,7 @@ module OpenTox feature_dataset.features = [ @@fminer.prediction_feature ] + feature_dataset.features end fminer_compounds.each_with_index { |c,idx| + # TODO: fix value insertion row = [ c ] if (params[:get_target] == "true") row = row + [ prediction_feature_all_acts[idx] ] @@ -397,14 +416,12 @@ module OpenTox row.collect! { |v| v ? v : 0 } unless fminer_noact_compounds.include? c feature_dataset << row } - feature_dataset.put @subjectid - $logger.debug feature_dataset.uri + feature_dataset.put feature_dataset.uri end response['Content-Type'] = 'text/uri-list' - service_unavailable_error "Service unavailable" if task.cancelled? - halt 202,task.uri.to_s+"\n" + halt 202,task.uri end end diff --git a/webapp/fs.rb b/webapp/fs.rb index 6286629..e570860 100644 --- a/webapp/fs.rb +++ b/webapp/fs.rb @@ -20,15 +20,15 @@ module OpenTox get "/fs/rfe/?" do algorithm = OpenTox::Algorithm.new(to('/fs/rfe',:full)) algorithm.metadata = { - DC.title => 'Recursive Feature Elimination', - DC.creator => "andreas@maunz.de", - RDF.type => [OT.Algorithm,OTA.PatternMiningSupervised] + RDF::DC.title => 'Recursive Feature Elimination', + RDF::DC.creator => "andreas@maunz.de", + RDF.type => [RDF::OT.Algorithm,RDF::OTA.PatternMiningSupervised] } algorithm.parameters = [ - { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, - { DC.description => "Prediction Feature URI", OT.paramScope => "mandatory", DC.title => "prediction_feature" }, - { DC.description => "Feature Dataset URI", OT.paramScope => "mandatory", DC.title => "feature_dataset_uri" }, - { DC.description => "Delete Instances with missing values", OT.paramScope => "optional", DC.title => "del_missing" } + { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" }, + { RDF::DC.description => "Prediction Feature URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "prediction_feature" }, + { RDF::DC.description => "Feature Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "feature_dataset_uri" }, + { RDF::DC.description => "Delete Instances with missing values", RDF::OT.paramScope => "optional", RDF::DC.title => "del_missing" } ] format_output(algorithm) end diff --git a/webapp/lazar.rb b/webapp/lazar.rb index 9eec397..c40fc98 100644 --- a/webapp/lazar.rb +++ b/webapp/lazar.rb @@ -1,58 +1,25 @@ -=begin -* Name: lazar.rb -* Description: Lazar -* Author: Andreas Maunz <andreas@maunz.de> -* Date: 10/2012 -=end - -$lazar_params = [ - "training_dataset_uri", - "prediction_feature_uri", - "feature_dataset_uri", - "feature_generation_uri", - "feature_calculation_algorithm", - "similarity_algorithm", - "min_sim", - "prediction_algorithm", - "propositionalized", - "pc_type", - "lib", - "min_train_performance" -] -$lazar_min_train_performance_default = 0.1 - - module OpenTox class Application < Service - # Get representation of lazar algorithm # @return [String] Representation get '/lazar/?' do algorithm = OpenTox::Algorithm.new(to('/lazar',:full)) algorithm.metadata = { - DC.title => 'lazar', - DC.creator => 'helma@in-silico.ch, andreas@maunz.de', - RDF.Type => [OT.Algorithm] + RDF::DC.title => 'lazar', + RDF::DC.creator => 'helma@in-silico.ch, andreas@maunz.de', + RDF.Type => [RDF::OT.Algorithm] } algorithm.parameters = [ - { DC.description => "Dataset URI", OT.paramScope => "mandatory", DC.title => "dataset_uri" }, - { DC.description => "Feature URI for dependent variable", OT.paramScope => "optional", DC.title => "prediction_feature" }, - { DC.description => "Feature generation service URI", OT.paramScope => "optional", DC.title => "feature_generation_uri" }, - { DC.description => "Feature dataset URI", OT.paramScope => "optional", DC.title => "feature_dataset_uri" }, - { DC.description => "Further parameters for the feature generation service", OT.paramScope => "optional" } + { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" }, + { RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" }, + { RDF::DC.description => "Feature generation service URI", RDF::OT.paramScope => "optional", RDF::DC.title => "feature_generation_uri" }, + { RDF::DC.description => "Feature dataset URI", RDF::OT.paramScope => "optional", RDF::DC.title => "feature_dataset_uri" }, + { RDF::DC.description => "Further parameters for the feature generation service", RDF::OT.paramScope => "optional" } ] format_output(algorithm) end - def predicted_variable(prediction_feature) - OpenTox::Feature.find_by_title("predicted_variable", {RDF.type => prediction_feature[RDF.type]}) - end - - def predicted_confidence - OpenTox::Feature.find_by_title("predicted_confidence", {RDF.type => [RDF::OT.NumericFeature]}) - end - # Create a lazar prediction model # @param [String] dataset_uri Training dataset URI @@ -61,37 +28,17 @@ module OpenTox # @param [optional,String] - further parameters for the feature generation service # @return [text/uri-list] Task URI post '/lazar/?' do - params[:subjectid] = @subjectid - resource_not_found_error "No dataset_uri parameter." unless params[:dataset_uri] - task = OpenTox::Task.create( - $task[:uri], - @subjectid, - { RDF::DC.description => "Create lazar model", - RDF::DC.creator => to('/lazar',:full) - } - ) do |task| - + resource_not_found_error "Please provide a dataset_uri parameter." unless params[:dataset_uri] + resource_not_found_error "Dataset '#{params[:dataset_uri]}' not found." unless URI.accessible? params[:dataset_uri] + bad_request_error "Please provide a feature_generation_uri parameter." unless params[:feature_generation_uri] + task = OpenTox::Task.run("Create lazar model", uri('/lazar'), @subjectid) do |task| lazar = OpenTox::Model.new(nil, @subjectid) - lazar.parameters = lazar.check_params($lazar_params, params) - prediction_feature = OpenTox::Feature.find(lazar.find_parameter_value("prediction_feature_uri")) - lazar.metadata = { - DC.title => "lazar model", - OT.dependentVariables => lazar.find_parameter_value("prediction_feature_uri"), - OT.predictedVariables => [ predicted_variable(prediction_feature).uri, predicted_confidence.uri ], - OT.trainingDataset => lazar.find_parameter_value("training_dataset_uri"), - OT.featureDataset => lazar.find_parameter_value("feature_dataset_uri"), - RDF.type => ( prediction_feature.feature_type == "classification" ? - [OT.Model, OTA.ClassificationLazySingleTarget] : - [OT.Model, OTA.RegressionLazySingleTarget] - ) - } - # task.progress 10 - lazar.put @subjectid + lazar.create_model(params) + lazar.put lazar.uri end response['Content-Type'] = 'text/uri-list' - #service_unavailable_error "Service unavailable" if task.cancelled? - halt 202,task.uri.to_s+"\n" + halt 202,task.uri end @@ -109,154 +56,18 @@ module OpenTox # @param [optional,String] Further parameters for the feature generation service # @return [text/uri-list] Task URI post '/lazar/predict/?' do + # pass parameters instead of model_uri, because model service is blocked by incoming call - params[:subjectid] = @subjectid - if ( (params[:compound_uri] and params[:dataset_uri]) or - (!params[:compound_uri] and !params[:dataset_uri]) - ) - bad_request_error "Submit either compound uri or dataset uri" - end - - task = OpenTox::Task.create( - $task[:uri], - @subjectid, - { - RDF::DC.description => "Apply lazar model", - RDF::DC.creator => to('/lazar/predict',:full) - } - ) do |task| - - - if params[:dataset_uri] - compounds = OpenTox::Dataset.find(params[:dataset_uri]).compounds - else - compounds = [ OpenTox::Compound.new(params[:compound_uri]) ] - end - - compounds.each { |query_compound| - params[:compound_uri] = query_compound.uri # AM: store compound in params hash - unless @prediction_dataset # AM: only once for dataset predictions - @prediction_dataset = OpenTox::Dataset.new(nil, @subjectid) - - @model_params_hash = $lazar_params.inject({}){ |h,p| - h[p] = params[p].to_s unless params[p].nil? - h - } - @model = OpenTox::Model.new(@model_params_hash) - - $logger.debug "Loading training dataset" - @training_dataset = OpenTox::Dataset.find(params[:training_dataset_uri], @subjectid) - @prediction_feature = OpenTox::Feature.find(params[:prediction_feature_uri],@subjectid) - @predicted_variable = predicted_variable(@prediction_feature) - @predicted_confidence = predicted_confidence - @similarity_feature = OpenTox::Feature.find_by_title("similarity", {RDF.type => [RDF::OT.NumericFeature]}) - @prediction_dataset.features = [ @prediction_feature, @predicted_variable, @predicted_confidence, @similarity_feature ] - - @prediction_dataset.metadata = { - DC.title => "Lazar prediction", - DC.creator => @uri.to_s, - OT.hasSource => @uri.to_s, - OT.dependentVariables => @model_params_hash["prediction_feature_uri"], - OT.predictedVariables => [@predicted_variable.uri,@predicted_confidence.uri] - } - end - - database_activity = @training_dataset.database_activity(params) - if database_activity - - orig_value = database_activity.to_f - predicted_value = orig_value - confidence_value = 1.0 - - else - @model = OpenTox::Model.new(@model_params_hash) - - unless @feature_dataset - $logger.debug "Loading f dataset" - @feature_dataset = OpenTox::Dataset.find(params[:feature_dataset_uri], @subjectid) - end - - case @feature_dataset.find_parameter_value("nr_hits") - when "true" then @model.feature_calculation_algorithm = "match_hits" - when "false" then @model.feature_calculation_algorithm = "match" - end - pc_type = @feature_dataset.find_parameter_value("pc_type") - @model.pc_type = pc_type unless pc_type.nil? - lib = @feature_dataset.find_parameter_value("lib") - @model.lib = lib unless lib.nil? - - # AM: transform to cosine space - @model.min_sim = (@model.min_sim.to_f*2.0-1.0).to_s if @model.similarity_algorithm =~ /cosine/ - - if @feature_dataset.features.size > 0 - compound_params = { - :compound => query_compound, - :feature_dataset => @feature_dataset, - :pc_type => @model.pc_type, - :lib => @model.lib - } - # use send, not eval, for calling the method (good backtrace) - $logger.debug "Calculating q fps" - compound_fingerprints = OpenTox::Algorithm::FeatureValues.send( @model.feature_calculation_algorithm, compound_params, @subjectid ) - else - bad_request_error "No features found" - end - - @model.add_data(@training_dataset, @feature_dataset, @prediction_feature, compound_fingerprints, @subjectid) - mtf = OpenTox::Algorithm::Transform::ModelTransformer.new(@model) - mtf.transform - $logger.debug "Predicting q" - prediction = OpenTox::Algorithm::Neighbors.send(@model.prediction_algorithm, - { :props => mtf.props, - :acts => mtf.acts, - :sims => mtf.sims, - :value_map => @prediction_feature.feature_type=="classification" ? - @training_dataset.value_map(@prediction_feature) : nil, - :min_train_performance => @model.min_train_performance - } ) - orig_value = nil - predicted_value = prediction[:prediction].to_f - confidence_value = prediction[:confidence].to_f - - # AM: transform to original space - confidence_value = ((confidence_value+1.0)/2.0).abs if @model.similarity_algorithm =~ /cosine/ - predicted_value = @training_dataset.value_map(@prediction_feature)[prediction[:prediction].to_i] if @prediction_feature.feature_type == "classification" - - $logger.debug "Prediction: '#{predicted_value}'" - $logger.debug "Confidence: '#{confidence_value}'" - end - - @prediction_dataset << [ - query_compound, - orig_value, - predicted_value, - confidence_value, - nil - ] - @model.neighbors.each { |neighbor| - @prediction_dataset << [ - OpenTox::Compound.new(neighbor[:compound]), - @training_dataset.value_map(@prediction_feature)[neighbor[:activity]], - nil, - nil, - neighbor[:similarity] - ] - } - - } - - @prediction_dataset.parameters = $lazar_params.collect { |p| - {DC.title => p, OT.paramValue => @model.instance_variable_get("@#{p}")} unless @model.instance_variable_get("@#{p}").nil? - } + task = OpenTox::Task.run("Apply lazar model",uri('/lazar/predict'), @subjectid) do |task| - @prediction_dataset.put - $logger.debug @prediction_dataset.uri - @prediction_dataset.uri + lazar = OpenTox::LazarPrediction.new params + puts "RESULT" + puts lazar.prediction_dataset.uri + lazar.prediction_dataset.uri end response['Content-Type'] = 'text/uri-list' - #service_unavailable_error "Service unavailable" if task.cancelled? - halt 202,task.uri.to_s+"\n" + halt 202,task.uri end diff --git a/webapp/sinatra.rb b/webapp/sinatra.rb index cf0e7c5..b61b0a3 100644 --- a/webapp/sinatra.rb +++ b/webapp/sinatra.rb @@ -8,15 +8,6 @@ module OpenTox class Application < Service - # Put any code here that should be executed immediately before - # request is processed - before { - $logger.debug "Request: " + request.path - # fix IE - request.env['HTTP_ACCEPT'] += ";text/html" if request.env["HTTP_USER_AGENT"]=~/MSIE/ - request.env['HTTP_ACCEPT']=request.params["media"] if request.params["media"] - } - # Conveniently accessible from anywhere within the Application class, # it negotiates the appropriate output format based on object class # and requested MIME type. @@ -29,7 +20,7 @@ module OpenTox case @accept when /text\/html/ content_type "text/html" - OpenTox.text_to_html obj + obj.to_html else content_type 'text/uri-list' obj @@ -43,7 +34,7 @@ module OpenTox obj.to_rdfxml when /text\/html/ content_type "text/html" - OpenTox.text_to_html obj.to_turtle + obj.to_html else content_type "text/turtle" obj.to_turtle diff --git a/webapp/test.rb b/webapp/test.rb index 0cb85e5..75044da 100644 --- a/webapp/test.rb +++ b/webapp/test.rb @@ -4,7 +4,7 @@ module OpenTox class Application < Service post '/test/wait_for_error_in_task/?' do - task = OpenTox::Task.create($task[:uri],@subjectid,{ RDF::DC.description => "wait_for_error_in_task"}) do |task| + task = OpenTox::Task.run("wait_for_error_in_task",@uri,@subjectid) do |task| sleep 1 uri = OpenTox::Dataset.new(File.join($dataset[:uri],'test/error_in_task')).post end |