summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2013-06-19 16:33:25 +0200
committerChristoph Helma <helma@in-silico.ch>2013-06-19 16:33:25 +0200
commit0be40680cb30d26fa961d0c47f3e0569db6c4e03 (patch)
tree75eae696c41270fb6b23c9b6c75e3ed65c3f7218
parentf901ca7ca635e522f124e97d274bee8ddc7698b8 (diff)
descriptor classes refactored
-rw-r--r--application.rb2
-rw-r--r--descriptor.rb165
-rw-r--r--lazar.rb79
-rw-r--r--lib/descriptor.rb122
-rw-r--r--lib/lazar.rb7
5 files changed, 279 insertions, 96 deletions
diff --git a/application.rb b/application.rb
index d40e70d..198d3a8 100644
--- a/application.rb
+++ b/application.rb
@@ -10,8 +10,8 @@ require_relative 'last-utils/lu.rb'
# Library Code
$logger.debug "Algorithm booting: #{$algorithm.collect{ |k,v| "#{k}: '#{v}'"} }"
-Dir['./lib/algorithm/*.rb'].each { |f| require f; also_reload f } # Libs
Dir['./lib/*.rb'].each { |f| require f; also_reload f } # Libs
+#Dir['./lib/descriptor.rb'].each { |f| require f; also_reload f } # Libs
Dir['./*.rb'].each { |f| require_relative f; also_reload f } # Webapps
# Entry point
diff --git a/descriptor.rb b/descriptor.rb
index 03e32cf..f4597e0 100644
--- a/descriptor.rb
+++ b/descriptor.rb
@@ -7,6 +7,77 @@ module OpenTox
class Application < Service
+ before '/descriptor/:lib/:descriptor/?' do
+ #if request.get?
+ lib = @uri.split("/")[-2].capitalize
+ klass = OpenTox::Descriptor.const_get params[:lib].capitalize
+ @algorithm = klass.new @uri, @subjectid unless params[:lib] == "smarts"
+=begin
+ elsif request.post?
+ @feature_dataset = Dataset.new nil, @subjectid
+ @feature_dataset.metadata = {
+ RDF::DC.title => "Physico-chemical descriptors",
+ RDF::DC.creator => @uri,
+ RDF::OT.hasSource => @uri,
+ }
+ if params[:compound_uri]
+ @feature_dataset.parameters = [ { RDF::DC.title => "compound_uri", RDF::OT.paramValue => params[:compound_uri] }]
+ elsif params[:dataset_uri]
+ @feature_dataset.parameters = [ { RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] }]
+ else
+ bad_request_error "Please provide a dataset_uri or compound_uri parameter", @uri
+ end
+ end
+=end
+ end
+
+ # Get a list of descriptor calculation
+ # @return [text/uri-list] URIs
+ get '/descriptor/?' do
+ #uris = ["Openbabel","Cdk","Joelib"].collect do |lib|
+ uris = ["Openbabel"].collect do |lib|
+ klass = OpenTox::Descriptor.const_get lib
+ klass.all
+ end.flatten
+ render uris
+ end
+
+ get '/descriptor/:lib/?' do
+ klass = OpenTox::Descriptor.const_get params[:lib].capitalize
+ render klass.all
+ end
+
+ # Get representation of descriptor calculation
+ # @return [String] Representation
+ get '/descriptor/:lib/:descriptor/?' do
+ render @algorithm
+ end
+
+ post '/descriptor/smarts/:method/?' do
+ method = params[:method].to_sym
+ bad_request_error "Please provide a compound_uri or dataset_uri parameter and a smarts parameter. The count parameter is optional and defaults to false." unless (params[:compound_uri] or params[:dataset_uri]) and params[:smarts]
+ params[:count] ? params[:count] = params[:count].to_boolean : params[:count] = false
+ if params[:compound_uri]
+ compounds = OpenTox::Compound.new params[:compound_uri]
+ response['Content-Type'] = "application/json"
+ OpenTox::Descriptor::Smarts.send(method, compounds, params[:smarts], params[:count]).to_json
+ elsif params[:dataset_uri]
+ compounds = OpenTox::Dataset.new params[:dataset_uri]
+ # TODO: create and return dataset
+ end
+ end
+
+ # use /descriptor with dataset_uri and descriptor_uri parameters for efficient calculation of multiple compounds/descriptors
+ post '/descriptor/:lib/:descriptor/?' do
+ bad_request_error "Please provide a compound_uri parameter", @uri unless params[:compound_uri]
+ params[:descriptor_uris] = [@uri]
+ @algorithm.calculate params
+ #compounds = [ Compound.new(params[:compound_uri], @subjectid) ]
+ #send params[:lib].to_sym, compounds, @descriptors
+ #@feature_dataset.put
+ #@feature_dataset.uri
+ end
+=begin
ENV["JAVA_HOME"] ||= "/usr/lib/jvm/java-7-openjdk"
JAVA_DIR = File.join(File.dirname(__FILE__),"java")
CDK_JAR = Dir[File.join(JAVA_DIR,"cdk-*jar")].last
@@ -23,26 +94,6 @@ module OpenTox
@@obconversion = OpenBabel::OBConversion.new
@@obconversion.set_in_format 'inchi'
- # OpenBabel
- OpenBabel::OBDescriptor.list_as_string("descriptors").split("\n").each do |d|
- title,description = d.split(/\s+/,2)
- unless title =~ /cansmi|formula|InChI|smarts|title/ or title == "s"
- uri = File.join $algorithm[:uri], "descriptor/openbabel" ,title
- title = "OpenBabel "+title
- feature = OpenTox::Feature.find_or_create({
- RDF::DC.title => title,
- RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature],
- RDF::DC.description => description,
- }, @subjectid)
- descriptors[:openbabel] << {
- :title => title,
- :uri => uri,
- :description => description,
- :calculator => OpenBabel::OBDescriptor.find_type(title.split(" ").last),
- :feature => feature
- }
- end
- end
# CDK
cdk_descriptors = YAML.load(`java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptorInfo`)
@@ -83,15 +134,6 @@ module OpenTox
helpers do
- def openbabel compounds, descriptors
- compounds.each do |compound|
- @@obconversion.read_string @@obmol, compound.inchi
- descriptors.each do |descriptor|
- @feature_dataset.add_data_entry compound, descriptor[:feature], fix_value(descriptor[:calculator].predict(@@obmol))
- end
- end
- end
-
def cdk compounds, descriptors
sdf_3d compounds
# use java system call (rjb blocks within tasks)
@@ -142,47 +184,6 @@ module OpenTox
@sdf_file.close
end
end
-
- def fix_value val
- if val.numeric?
- val = Float(val)
- val = nil if val.nan? or val.infinite?
- else
- val = nil if val == "NaN"
- end
- val
- end
- end
-
- before '/descriptor/?*' do
- if request.get?
- @algorithm = OpenTox::Algorithm.new @uri
- @algorithm.parameters = [
- { RDF::DC.description => "Dataset URI",
- RDF::OT.paramScope => "optional",
- RDF::DC.title => "dataset_uri" } ,
- { RDF::DC.description => "Compound URI",
- RDF::OT.paramScope => "optional",
- RDF::DC.title => "compound_uri" }
- ]
- @algorithm.metadata = {
- RDF.type => [RDF::OTA.DescriptorCalculation],
- }
- elsif request.post?
- @feature_dataset = Dataset.new nil, @subjectid
- @feature_dataset.metadata = {
- RDF::DC.title => "Physico-chemical descriptors",
- RDF::DC.creator => @uri,
- RDF::OT.hasSource => @uri,
- }
- if params[:compound_uri]
- @feature_dataset.parameters = [ { RDF::DC.title => "compound_uri", RDF::OT.paramValue => params[:compound_uri] }]
- elsif params[:dataset_uri]
- @feature_dataset.parameters = [ { RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] }]
- else
- bad_request_error "Please provide a dataset_uri or compound_uri parameter", @uri
- end
- end
end
before '/descriptor/:lib/:descriptor/?' do
@@ -199,22 +200,10 @@ module OpenTox
@sdf_file = nil
end
- # Get a list of descriptor calculation
- # @return [text/uri-list] URIs
- get '/descriptor/?' do
- DESCRIPTORS.collect{|lib,d| d.collect{|n| uri("/descriptor/#{lib}/#{n[:title].split(" ").last}")}}.flatten.sort.join("\n")
- end
-
- get '/descriptor/:lib/?' do
- DESCRIPTORS[params[:lib].to_sym].collect{|n| uri("/descriptor/#{params[:lib].to_sym}/#{n[:title].split(" ").last}")}.sort.join("\n")
- end
-
# Get representation of descriptor calculation
# @return [String] Representation
get '/descriptor/:lib/:descriptor/?' do
- @algorithm[RDF::DC.title] = @descriptor[:title]
- @algorithm[RDF::DC.description] = @descriptor[:description] if @descriptor[:description]
- render(@algorithm)
+ render @algorithm
end
post '/descriptor/?' do
@@ -241,15 +230,7 @@ module OpenTox
response['Content-Type'] = 'text/uri-list'
halt 202, task.uri
end
-
- # use /descriptor with dataset_uri and descriptor_uri parameters for efficient calculation of multiple compounds/descriptors
- post '/descriptor/:lib/:descriptor/?' do
- bad_request_error "Please provide a compound_uri parameter", @uri unless params[:compound_uri]
- compounds = [ Compound.new(params[:compound_uri], @subjectid) ]
- send params[:lib].to_sym, compounds, @descriptors
- @feature_dataset.put
- @feature_dataset.uri
- end
+=end
end
diff --git a/lazar.rb b/lazar.rb
new file mode 100644
index 0000000..de3f753
--- /dev/null
+++ b/lazar.rb
@@ -0,0 +1,79 @@
+module OpenTox
+ class Application < Service
+
+ # Get representation of lazar algorithm
+ # @return [String] Representation
+ get '/lazar/?' do
+ algorithm = OpenTox::Algorithm.new(to('/lazar',:full))
+ algorithm.metadata = {
+ RDF::DC.title => 'lazar',
+ RDF::DC.creator => 'helma@in-silico.ch, andreas@maunz.de',
+ RDF.Type => [RDF::OT.Algorithm]
+ }
+ algorithm.parameters = [
+ { RDF::DC.description => "Dataset URI", RDF::OT.paramScope => "mandatory", RDF::DC.title => "dataset_uri" },
+ { RDF::DC.description => "Feature URI for dependent variable", RDF::OT.paramScope => "optional", RDF::DC.title => "prediction_feature" },
+ { RDF::DC.description => "Feature generation service URI", RDF::OT.paramScope => "optional", RDF::DC.title => "feature_generation_uri" },
+ { RDF::DC.description => "Feature dataset URI", RDF::OT.paramScope => "optional", RDF::DC.title => "feature_dataset_uri" },
+ { RDF::DC.description => "Further parameters for the feature generation service", RDF::OT.paramScope => "optional" }
+ ]
+ #format_output(algorithm)
+ render algorithm
+ end
+
+
+ # Create a lazar prediction model
+ # @param [String] dataset_uri Training dataset URI
+ # @param [optional,String] prediction_feature URI of the feature to be predicted
+ # @param [optional,String] feature_generation_uri URI of the feature generation algorithm
+ # @param [optional,String] - further parameters for the feature generation service
+ # @return [text/uri-list] Task URI
+ post '/lazar/?' do
+ bad_request_error "Please provide a dataset_uri parameter." unless params[:dataset_uri]
+ #TODO: URI namespace clashes
+ #resource_not_found_error "Dataset '#{params[:dataset_uri]}' not found." unless URI.accessible? params[:dataset_uri]
+ bad_request_error "Please provide a feature_generation_uri parameter." unless params[:feature_generation_uri]
+ task = OpenTox::Task.run("Create lazar model", uri('/lazar'), @subjectid) do |task|
+ #lazar = OpenTox::Model::Lazar.new(nil, @subjectid)
+ lazar = OpenTox::Model::Lazar.new(File.join($model[:uri],SecureRandom.uuid), @subjectid)
+ lazar.create(params)
+ #lazar.put
+ #lazar.uri
+ end
+ response['Content-Type'] = 'text/uri-list'
+ halt 202,task.uri
+ end
+
+
+ # Make a lazar prediction -- not to be called directly
+ # @param [String] compound_uri URI of compound to be predicted
+ # @param [String] training_dataset_uri URI of training dataset
+ # @param [String] prediction_feature_uri URI of prediction feature
+ # @param [String] feature_dataset_uri URI of feature dataset
+ # @param [String] feature_calculation_algorithm Name of feature calculation algorithm
+ # @param [String] min_sim Numeric value for minimum similarity
+ # @param [String] prediction_algorithm Name of prediction algorithm
+ # @param [String] propositionalized Whether propositionalization should be used
+ # @param [optional,String] pc_type Physico-chemical descriptor type
+ # @param [optional,String] pc_lib Physico-chemical descriptor library
+ # @param [optional,String] Further parameters for the feature generation service
+ # @return [text/uri-list] Task URI
+ post '/lazar/predict/?' do
+ # pass parameters instead of model_uri, because model service is blocked by incoming call
+
+ puts "LAZAR"
+ puts params.inspect
+ task = OpenTox::Task.run("Apply lazar model",uri('/lazar/predict'), @subjectid) do |task|
+
+ lazar = OpenTox::LazarPrediction.new params
+ puts lazar.inspect
+ lazar.prediction_dataset.uri
+
+ end
+ response['Content-Type'] = 'text/uri-list'
+ halt 202,task.uri
+ end
+
+
+ end
+end
diff --git a/lib/descriptor.rb b/lib/descriptor.rb
new file mode 100644
index 0000000..b6b7cd4
--- /dev/null
+++ b/lib/descriptor.rb
@@ -0,0 +1,122 @@
+module OpenTox
+
+ module Descriptor
+ include OpenTox
+
+ def initialize uri, subjectid
+ super uri, subjectid
+ @parameters = [
+ { RDF::DC.description => "Dataset URI",
+ RDF::OT.paramScope => "optional",
+ RDF::DC.title => "dataset_uri" } ,
+ { RDF::DC.description => "Compound URI",
+ RDF::OT.paramScope => "optional",
+ RDF::DC.title => "compound_uri" }
+ ]
+ tokens = uri.split %r{/}
+ @metadata = {
+ RDF::DC.title => "#{tokens[-2].capitalize} #{tokens[-1]}",
+ RDF.type => [RDF::OT.Algorithm, RDF::OTA.DescriptorCalculation],
+ }
+ end
+
+ def fix_value val
+ if val.numeric?
+ val = Float(val)
+ val = nil if val.nan? or val.infinite?
+ else
+ val = nil if val == "NaN"
+ end
+ val
+ end
+
+ class Openbabel
+ include Descriptor
+
+ def initialize uri, subjectid=nil
+ descriptor = OpenBabel::OBDescriptor.find_type(uri.split("/").last)
+ bad_request_error "Unknown descriptor #{uri}. See #{File.join $algorithm[:uri], "descriptor"} for a list of supported descriptors.", uri unless descriptor
+ super uri, subjectid
+ @metadata[RDF::DC.description] = descriptor.description.split("\n").first
+ @obmol = OpenBabel::OBMol.new
+ @obconversion = OpenBabel::OBConversion.new
+ @obconversion.set_in_format 'inchi'
+ end
+
+ def self.all
+ puts OpenBabel::OBDescriptor.list_as_string("descriptors")
+ OpenBabel::OBDescriptor.list_as_string("descriptors").split("\n").collect do |d|
+ title = d.split(/\s+/).first
+ puts title
+ unless title =~ /cansmi|formula|InChI|smarts|title/ or title == "s"
+ File.join $algorithm[:uri], "descriptor/openbabel" ,title
+ end
+ end.compact.sort{|a,b| a.upcase <=> b.upcase}
+ end
+
+ # TODO: add to feature dataset
+ # find feature
+ # generic method for all libs
+ def calculate params
+ if params[:compound_uri]
+ compounds = [ Compound.new(params[:compound_uri], @subjectid) ]
+ elsif params[:dataset_uri]
+ compounds = Dataset.new(params[:dataset_uri], @subjectid).compounds
+ end
+ compounds.collect do |compound|
+ @obconversion.read_string @obmol, compound.inchi
+ params[:descriptor_uris].each do |descriptor_uri|
+ method = descriptor_uri.split('/').last
+ calculator = OpenBabel::OBDescriptor.find_type method
+ value = fix_value calculator.predict(@obmol)
+ feature = OpenTox::Feature.find_or_create({
+ RDF::DC.title => "OpenBabel "+method,
+ RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature],
+ RDF::DC.description => calculator.description,
+ }, @subjectid)
+ [compound, feature, value]
+ end
+ end
+ end
+ end
+
+ class Smarts
+
+ def self.fingerprint compounds, smarts, count=false
+ if compounds.is_a? OpenTox::Compound
+ compounds = [compounds]
+ elsif compounds.is_a? OpenTox::Dataset
+ # TODO: create and return dataset
+ compounds = compounds.compounds
+ else
+ bad_request_error "Cannot match smarts on #{compounds.class} objects."
+ end
+ smarts = [smarts] unless smarts.is_a? Array
+ obconversion = OpenBabel::OBConversion.new
+ obmol = OpenBabel::OBMol.new
+ obconversion.set_in_format('inchi')
+ smarts_pattern = OpenBabel::OBSmartsPattern.new
+ matches = []
+ compounds.each do |compound|
+ obconversion.read_string(obmol,compound.inchi)
+ matches << []
+ smarts.each do |smart|
+ smarts_pattern.init(smart)
+ if smarts_pattern.match(obmol)
+ count ? value = smarts_pattern.get_map_list.to_a.size : value = 1
+ else
+ value = 0
+ end
+ matches.last << value
+ end
+ end
+ matches
+ end
+
+ def self.smarts_count compounds, smarts
+ smarts_fingerprint compounds,smarts,true
+ end
+ end
+ end
+
+end
diff --git a/lib/lazar.rb b/lib/lazar.rb
index 6c8145d..34d868c 100644
--- a/lib/lazar.rb
+++ b/lib/lazar.rb
@@ -80,7 +80,8 @@ module OpenTox
puts compound_params
#compound_fingerprints = OpenTox::Algorithm::FeatureValues.send( @feature_calculation_algorithm, compound_params, @subjectid )
# TODO: fix for pc descriptors
- compound_fingerprints = OpenTox::Algorithm::Descriptor.send( @feature_calculation_algorithm, compound, @feature_dataset.features.collect{ |f| f[RDF::DC.title] } )
+ #compound_fingerprints = OpenTox::Algorithm::Descriptor.send( @feature_calculation_algorithm, compound, @feature_dataset.features.collect{ |f| f[RDF::DC.title] } )
+ compound_fingerprints = eval("#{@feature_calculation_algorithm}(compound, @feature_dataset.features.collect{ |f| f[RDF::DC.title] } )")
puts "Fingerprints"
puts compound_fingerprints.inspect
@training_dataset.compounds.each_with_index { |cmpd, idx|
@@ -188,9 +189,9 @@ module OpenTox
case params["feature_generation_uri"]
when /fminer/
if (params[:nr_hits] == "true")
- @parameters << {RDF::DC.title => "feature_calculation_algorithm", RDF::OT.paramValue => "match_hits"}
+ @parameters << {RDF::DC.title => "feature_calculation_algorithm", RDF::OT.paramValue => "OpenTox::Descriptor::Smarts.count"}
else
- @parameters << {RDF::DC.title => "feature_calculation_algorithm", RDF::OT.paramValue => "match"}
+ @parameters << {RDF::DC.title => "feature_calculation_algorithm", RDF::OT.paramValue => "OpenTox::Descriptor::Smarts.fingerprint"}
end
@parameters << {RDF::DC.title => "similarity_algorithm", RDF::OT.paramValue => "tanimoto"}
@parameters << {RDF::DC.title => "min_sim", RDF::OT.paramValue => 0.3} unless parameter_value("min_sim")