diff options
author | Christoph Helma <helma@in-silico.ch> | 2013-06-19 16:33:25 +0200 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2013-06-19 16:33:25 +0200 |
commit | 0be40680cb30d26fa961d0c47f3e0569db6c4e03 (patch) | |
tree | 75eae696c41270fb6b23c9b6c75e3ed65c3f7218 | |
parent | f901ca7ca635e522f124e97d274bee8ddc7698b8 (diff) |
descriptor classes refactored
-rw-r--r-- | application.rb | 2 | ||||
-rw-r--r-- | descriptor.rb | 165 | ||||
-rw-r--r-- | lazar.rb | 79 | ||||
-rw-r--r-- | lib/descriptor.rb | 122 | ||||
-rw-r--r-- | lib/lazar.rb | 7 |
5 files changed, 279 insertions, 96 deletions
diff --git a/application.rb b/application.rb index d40e70d..198d3a8 100644 --- a/application.rb +++ b/application.rb @@ -10,8 +10,8 @@ require_relative 'last-utils/lu.rb' # Library Code $logger.debug "Algorithm booting: #{$algorithm.collect{ |k,v| "#{k}: '#{v}'"} }" -Dir['./lib/algorithm/*.rb'].each { |f| require f; also_reload f } # Libs Dir['./lib/*.rb'].each { |f| require f; also_reload f } # Libs +#Dir['./lib/descriptor.rb'].each { |f| require f; also_reload f } # Libs Dir['./*.rb'].each { |f| require_relative f; also_reload f } # Webapps # Entry point diff --git a/descriptor.rb b/descriptor.rb index 03e32cf..f4597e0 100644 --- a/descriptor.rb +++ b/descriptor.rb @@ -7,6 +7,77 @@ module OpenTox class Application < Service + before '/descriptor/:lib/:descriptor/?' do + #if request.get? + lib = @uri.split("/")[-2].capitalize + klass = OpenTox::Descriptor.const_get params[:lib].capitalize + @algorithm = klass.new @uri, @subjectid unless params[:lib] == "smarts" +=begin + elsif request.post? + @feature_dataset = Dataset.new nil, @subjectid + @feature_dataset.metadata = { + RDF::DC.title => "Physico-chemical descriptors", + RDF::DC.creator => @uri, + RDF::OT.hasSource => @uri, + } + if params[:compound_uri] + @feature_dataset.parameters = [ { RDF::DC.title => "compound_uri", RDF::OT.paramValue => params[:compound_uri] }] + elsif params[:dataset_uri] + @feature_dataset.parameters = [ { RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] }] + else + bad_request_error "Please provide a dataset_uri or compound_uri parameter", @uri + end + end +=end + end + + # Get a list of descriptor calculation + # @return [text/uri-list] URIs + get '/descriptor/?' do + #uris = ["Openbabel","Cdk","Joelib"].collect do |lib| + uris = ["Openbabel"].collect do |lib| + klass = OpenTox::Descriptor.const_get lib + klass.all + end.flatten + render uris + end + + get '/descriptor/:lib/?' do + klass = OpenTox::Descriptor.const_get params[:lib].capitalize + render klass.all + end + + # Get representation of descriptor calculation + # @return [String] Representation + get '/descriptor/:lib/:descriptor/?' do + render @algorithm + end + + post '/descriptor/smarts/:method/?' do + method = params[:method].to_sym + bad_request_error "Please provide a compound_uri or dataset_uri parameter and a smarts parameter. The count parameter is optional and defaults to false." unless (params[:compound_uri] or params[:dataset_uri]) and params[:smarts] + params[:count] ? params[:count] = params[:count].to_boolean : params[:count] = false + if params[:compound_uri] + compounds = OpenTox::Compound.new params[:compound_uri] + response['Content-Type'] = "application/json" + OpenTox::Descriptor::Smarts.send(method, compounds, params[:smarts], params[:count]).to_json + elsif params[:dataset_uri] + compounds = OpenTox::Dataset.new params[:dataset_uri] + # TODO: create and return dataset + end + end + + # use /descriptor with dataset_uri and descriptor_uri parameters for efficient calculation of multiple compounds/descriptors + post '/descriptor/:lib/:descriptor/?' do + bad_request_error "Please provide a compound_uri parameter", @uri unless params[:compound_uri] + params[:descriptor_uris] = [@uri] + @algorithm.calculate params + #compounds = [ Compound.new(params[:compound_uri], @subjectid) ] + #send params[:lib].to_sym, compounds, @descriptors + #@feature_dataset.put + #@feature_dataset.uri + end +=begin ENV["JAVA_HOME"] ||= "/usr/lib/jvm/java-7-openjdk" JAVA_DIR = File.join(File.dirname(__FILE__),"java") CDK_JAR = Dir[File.join(JAVA_DIR,"cdk-*jar")].last @@ -23,26 +94,6 @@ module OpenTox @@obconversion = OpenBabel::OBConversion.new @@obconversion.set_in_format 'inchi' - # OpenBabel - OpenBabel::OBDescriptor.list_as_string("descriptors").split("\n").each do |d| - title,description = d.split(/\s+/,2) - unless title =~ /cansmi|formula|InChI|smarts|title/ or title == "s" - uri = File.join $algorithm[:uri], "descriptor/openbabel" ,title - title = "OpenBabel "+title - feature = OpenTox::Feature.find_or_create({ - RDF::DC.title => title, - RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature], - RDF::DC.description => description, - }, @subjectid) - descriptors[:openbabel] << { - :title => title, - :uri => uri, - :description => description, - :calculator => OpenBabel::OBDescriptor.find_type(title.split(" ").last), - :feature => feature - } - end - end # CDK cdk_descriptors = YAML.load(`java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptorInfo`) @@ -83,15 +134,6 @@ module OpenTox helpers do - def openbabel compounds, descriptors - compounds.each do |compound| - @@obconversion.read_string @@obmol, compound.inchi - descriptors.each do |descriptor| - @feature_dataset.add_data_entry compound, descriptor[:feature], fix_value(descriptor[:calculator].predict(@@obmol)) - end - end - end - def cdk compounds, descriptors sdf_3d compounds # use java system call (rjb blocks within tasks) @@ -142,47 +184,6 @@ module OpenTox @sdf_file.close end end - - def fix_value val - if val.numeric? - val = Float(val) - val = nil if val.nan? or val.infinite? - else - val = nil if val == "NaN" - end - val - end - end - - before '/descriptor/?*' do - if request.get? - @algorithm = OpenTox::Algorithm.new @uri - @algorithm.parameters = [ - { RDF::DC.description => "Dataset URI", - RDF::OT.paramScope => "optional", - RDF::DC.title => "dataset_uri" } , - { RDF::DC.description => "Compound URI", - RDF::OT.paramScope => "optional", - RDF::DC.title => "compound_uri" } - ] - @algorithm.metadata = { - RDF.type => [RDF::OTA.DescriptorCalculation], - } - elsif request.post? - @feature_dataset = Dataset.new nil, @subjectid - @feature_dataset.metadata = { - RDF::DC.title => "Physico-chemical descriptors", - RDF::DC.creator => @uri, - RDF::OT.hasSource => @uri, - } - if params[:compound_uri] - @feature_dataset.parameters = [ { RDF::DC.title => "compound_uri", RDF::OT.paramValue => params[:compound_uri] }] - elsif params[:dataset_uri] - @feature_dataset.parameters = [ { RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] }] - else - bad_request_error "Please provide a dataset_uri or compound_uri parameter", @uri - end - end end before '/descriptor/:lib/:descriptor/?' do @@ -199,22 +200,10 @@ module OpenTox @sdf_file = nil end - # Get a list of descriptor calculation - # @return [text/uri-list] URIs - get '/descriptor/?' do - DESCRIPTORS.collect{|lib,d| d.collect{|n| uri("/descriptor/#{lib}/#{n[:title].split(" ").last}")}}.flatten.sort.join("\n") - end - - get '/descriptor/:lib/?' do - DESCRIPTORS[params[:lib].to_sym].collect{|n| uri("/descriptor/#{params[:lib].to_sym}/#{n[:title].split(" ").last}")}.sort.join("\n") - end - # Get representation of descriptor calculation # @return [String] Representation get '/descriptor/:lib/:descriptor/?' do - @algorithm[RDF::DC.title] = @descriptor[:title] - @algorithm[RDF::DC.description] = @descriptor[:description] if @descriptor[:description] - render(@algorithm) + render @algorithm end post '/descriptor/?' do @@ -241,15 +230,7 @@ module OpenTox response['Content-Type'] = 'text/uri-list' halt 202, task.uri end - - # use /descriptor with dataset_uri and descriptor_uri parameters for efficient calculation of multiple compounds/descriptors - post '/descriptor/:lib/:descriptor/?' do - bad_request_error "Please provide a compound_uri parameter", @uri unless params[:compound_uri] - compounds = [ Compound.new(params[:compound_uri], @subjectid) ] - send params[:lib].to_sym, compounds, @descriptors - @feature_dataset.put - @feature_dataset.uri - end +=end end diff --git a/lazar.rb b/lazar.rb new file mode 100644 index 0000000..de3f753 --- /dev/null +++ b/lazar.rb @@ -0,0 +1,79 @@ +module OpenTox + class Application < Service + + # Get representation of lazar algorithm + # @return [String] Representation + get '/lazar/?' do + algorithm = OpenTox::Algorithm.new(to('/lazar',:full)) + algorithm.metadata = { + RDF::DC.title => 'lazar', + RDF::DC.creator => 'helma@in-silico.ch, andreas@maunz.de', + RDF.Type => [RDF::OT.Algorithm] + } + algorithm.parameters = [ + { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" }, + { RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" }, + { RDF::DC.description => "Feature generation service URI", RDF::OT.paramScope => "optional", RDF::DC.title => "feature_generation_uri" }, + { RDF::DC.description => "Feature dataset URI", RDF::OT.paramScope => "optional", RDF::DC.title => "feature_dataset_uri" }, + { RDF::DC.description => "Further parameters for the feature generation service", RDF::OT.paramScope => "optional" } + ] + #format_output(algorithm) + render algorithm + end + + + # Create a lazar prediction model + # @param [String] dataset_uri Training dataset URI + # @param [optional,String] prediction_feature URI of the feature to be predicted + # @param [optional,String] feature_generation_uri URI of the feature generation algorithm + # @param [optional,String] - further parameters for the feature generation service + # @return [text/uri-list] Task URI + post '/lazar/?' do + bad_request_error "Please provide a dataset_uri parameter." unless params[:dataset_uri] + #TODO: URI namespace clashes + #resource_not_found_error "Dataset '#{params[:dataset_uri]}' not found." unless URI.accessible? params[:dataset_uri] + bad_request_error "Please provide a feature_generation_uri parameter." unless params[:feature_generation_uri] + task = OpenTox::Task.run("Create lazar model", uri('/lazar'), @subjectid) do |task| + #lazar = OpenTox::Model::Lazar.new(nil, @subjectid) + lazar = OpenTox::Model::Lazar.new(File.join($model[:uri],SecureRandom.uuid), @subjectid) + lazar.create(params) + #lazar.put + #lazar.uri + end + response['Content-Type'] = 'text/uri-list' + halt 202,task.uri + end + + + # Make a lazar prediction -- not to be called directly + # @param [String] compound_uri URI of compound to be predicted + # @param [String] training_dataset_uri URI of training dataset + # @param [String] prediction_feature_uri URI of prediction feature + # @param [String] feature_dataset_uri URI of feature dataset + # @param [String] feature_calculation_algorithm Name of feature calculation algorithm + # @param [String] min_sim Numeric value for minimum similarity + # @param [String] prediction_algorithm Name of prediction algorithm + # @param [String] propositionalized Whether propositionalization should be used + # @param [optional,String] pc_type Physico-chemical descriptor type + # @param [optional,String] pc_lib Physico-chemical descriptor library + # @param [optional,String] Further parameters for the feature generation service + # @return [text/uri-list] Task URI + post '/lazar/predict/?' do + # pass parameters instead of model_uri, because model service is blocked by incoming call + + puts "LAZAR" + puts params.inspect + task = OpenTox::Task.run("Apply lazar model",uri('/lazar/predict'), @subjectid) do |task| + + lazar = OpenTox::LazarPrediction.new params + puts lazar.inspect + lazar.prediction_dataset.uri + + end + response['Content-Type'] = 'text/uri-list' + halt 202,task.uri + end + + + end +end diff --git a/lib/descriptor.rb b/lib/descriptor.rb new file mode 100644 index 0000000..b6b7cd4 --- /dev/null +++ b/lib/descriptor.rb @@ -0,0 +1,122 @@ +module OpenTox + + module Descriptor + include OpenTox + + def initialize uri, subjectid + super uri, subjectid + @parameters = [ + { RDF::DC.description => "Dataset URI", + RDF::OT.paramScope => "optional", + RDF::DC.title => "dataset_uri" } , + { RDF::DC.description => "Compound URI", + RDF::OT.paramScope => "optional", + RDF::DC.title => "compound_uri" } + ] + tokens = uri.split %r{/} + @metadata = { + RDF::DC.title => "#{tokens[-2].capitalize} #{tokens[-1]}", + RDF.type => [RDF::OT.Algorithm, RDF::OTA.DescriptorCalculation], + } + end + + def fix_value val + if val.numeric? + val = Float(val) + val = nil if val.nan? or val.infinite? + else + val = nil if val == "NaN" + end + val + end + + class Openbabel + include Descriptor + + def initialize uri, subjectid=nil + descriptor = OpenBabel::OBDescriptor.find_type(uri.split("/").last) + bad_request_error "Unknown descriptor #{uri}. See #{File.join $algorithm[:uri], "descriptor"} for a list of supported descriptors.", uri unless descriptor + super uri, subjectid + @metadata[RDF::DC.description] = descriptor.description.split("\n").first + @obmol = OpenBabel::OBMol.new + @obconversion = OpenBabel::OBConversion.new + @obconversion.set_in_format 'inchi' + end + + def self.all + puts OpenBabel::OBDescriptor.list_as_string("descriptors") + OpenBabel::OBDescriptor.list_as_string("descriptors").split("\n").collect do |d| + title = d.split(/\s+/).first + puts title + unless title =~ /cansmi|formula|InChI|smarts|title/ or title == "s" + File.join $algorithm[:uri], "descriptor/openbabel" ,title + end + end.compact.sort{|a,b| a.upcase <=> b.upcase} + end + + # TODO: add to feature dataset + # find feature + # generic method for all libs + def calculate params + if params[:compound_uri] + compounds = [ Compound.new(params[:compound_uri], @subjectid) ] + elsif params[:dataset_uri] + compounds = Dataset.new(params[:dataset_uri], @subjectid).compounds + end + compounds.collect do |compound| + @obconversion.read_string @obmol, compound.inchi + params[:descriptor_uris].each do |descriptor_uri| + method = descriptor_uri.split('/').last + calculator = OpenBabel::OBDescriptor.find_type method + value = fix_value calculator.predict(@obmol) + feature = OpenTox::Feature.find_or_create({ + RDF::DC.title => "OpenBabel "+method, + RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature], + RDF::DC.description => calculator.description, + }, @subjectid) + [compound, feature, value] + end + end + end + end + + class Smarts + + def self.fingerprint compounds, smarts, count=false + if compounds.is_a? OpenTox::Compound + compounds = [compounds] + elsif compounds.is_a? OpenTox::Dataset + # TODO: create and return dataset + compounds = compounds.compounds + else + bad_request_error "Cannot match smarts on #{compounds.class} objects." + end + smarts = [smarts] unless smarts.is_a? Array + obconversion = OpenBabel::OBConversion.new + obmol = OpenBabel::OBMol.new + obconversion.set_in_format('inchi') + smarts_pattern = OpenBabel::OBSmartsPattern.new + matches = [] + compounds.each do |compound| + obconversion.read_string(obmol,compound.inchi) + matches << [] + smarts.each do |smart| + smarts_pattern.init(smart) + if smarts_pattern.match(obmol) + count ? value = smarts_pattern.get_map_list.to_a.size : value = 1 + else + value = 0 + end + matches.last << value + end + end + matches + end + + def self.smarts_count compounds, smarts + smarts_fingerprint compounds,smarts,true + end + end + end + +end diff --git a/lib/lazar.rb b/lib/lazar.rb index 6c8145d..34d868c 100644 --- a/lib/lazar.rb +++ b/lib/lazar.rb @@ -80,7 +80,8 @@ module OpenTox puts compound_params #compound_fingerprints = OpenTox::Algorithm::FeatureValues.send( @feature_calculation_algorithm, compound_params, @subjectid ) # TODO: fix for pc descriptors - compound_fingerprints = OpenTox::Algorithm::Descriptor.send( @feature_calculation_algorithm, compound, @feature_dataset.features.collect{ |f| f[RDF::DC.title] } ) + #compound_fingerprints = OpenTox::Algorithm::Descriptor.send( @feature_calculation_algorithm, compound, @feature_dataset.features.collect{ |f| f[RDF::DC.title] } ) + compound_fingerprints = eval("#{@feature_calculation_algorithm}(compound, @feature_dataset.features.collect{ |f| f[RDF::DC.title] } )") puts "Fingerprints" puts compound_fingerprints.inspect @training_dataset.compounds.each_with_index { |cmpd, idx| @@ -188,9 +189,9 @@ module OpenTox case params["feature_generation_uri"] when /fminer/ if (params[:nr_hits] == "true") - @parameters << {RDF::DC.title => "feature_calculation_algorithm", RDF::OT.paramValue => "match_hits"} + @parameters << {RDF::DC.title => "feature_calculation_algorithm", RDF::OT.paramValue => "OpenTox::Descriptor::Smarts.count"} else - @parameters << {RDF::DC.title => "feature_calculation_algorithm", RDF::OT.paramValue => "match"} + @parameters << {RDF::DC.title => "feature_calculation_algorithm", RDF::OT.paramValue => "OpenTox::Descriptor::Smarts.fingerprint"} end @parameters << {RDF::DC.title => "similarity_algorithm", RDF::OT.paramValue => "tanimoto"} @parameters << {RDF::DC.title => "min_sim", RDF::OT.paramValue => 0.3} unless parameter_value("min_sim") |