summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2013-07-02 19:20:23 +0200
committerChristoph Helma <helma@in-silico.ch>2013-07-02 19:20:23 +0200
commit404c644ee52bac97ff737fcb2057df3f1ec18b76 (patch)
treedfcb83af10fc48015818a4e0ed3f21232e548f4f
parentc0f75827c36405c18a9108fa98106de2706eae8d (diff)
algorithm service restructured. descriptor calculation and initial quantitative models working.
-rw-r--r--application.rb8
-rw-r--r--descriptor.rb147
-rw-r--r--fminer.rb16
-rw-r--r--java/CdkDescriptors.classbin3576 -> 3401 bytes
-rw-r--r--java/JoelibDescriptors.classbin2774 -> 2831 bytes
-rw-r--r--java/JoelibDescriptors.java3
-rw-r--r--lazar.rb18
-rw-r--r--lib/descriptor.rb289
-rw-r--r--lib/feature_values.rb81
-rw-r--r--lib/fminer.rb9
-rw-r--r--lib/lazar.rb314
-rw-r--r--lib/neighbors.rb90
-rw-r--r--lib/similarity.rb2
-rw-r--r--lib/transform.rb33
14 files changed, 557 insertions, 453 deletions
diff --git a/application.rb b/application.rb
index 6eb49e4..547da08 100644
--- a/application.rb
+++ b/application.rb
@@ -11,14 +11,6 @@ require_relative 'last-utils/lu.rb'
# Library Code
$logger.debug "Algorithm booting: #{$algorithm.collect{ |k,v| "#{k}: '#{v}'"} }"
Dir['./lib/*.rb'].each { |f| require f; also_reload f } # Libs
-=begin
-Dir['./*.rb'].each do |f|
- unless f == "unicorn.rb"
- require_relative f
- also_reload f # Webapps
- end
-end
-=end
[
"descriptor.rb",
diff --git a/descriptor.rb b/descriptor.rb
index 23f4174..13a6fa3 100644
--- a/descriptor.rb
+++ b/descriptor.rb
@@ -7,12 +7,36 @@ module OpenTox
class Application < Service
+ before '/descriptor/:method' do
+ if params[:compound_uri]
+ @compounds = [params[:compound_uri]].flatten.collect{|u| OpenTox::Compound.new u}
+ elsif params[:dataset_uri]
+ @compounds = OpenTox::Dataset.new(params[:dataset_uri], @subjectid).compounds
+ else
+ bad_request_error "Please provide a dataset_uri or compound_uri parameter", @uri
+ end
+=begin
+ new_params = {}
+ delete = []
+ params.each do |k,v|
+ if k.match(/_uri$/)
+ klass = k.sub(/_uri$/,'')
+ v = [v] if v.is_a? String
+ new_params[klass] = v.collect{|u| OpenTox.const_get(klass.capitalize).new(u)}
+ delete << k
+ end
+ end
+ delete.each{|k| params.delete k}
+ params.merge! new_params
+=end
+ end
+=begin
before '/descriptor/:lib/:descriptor/?' do
#if request.get?
- lib = @uri.split("/")[-2].capitalize
- klass = OpenTox::Descriptor.const_get params[:lib].capitalize
- @algorithm = klass.new @uri, @subjectid unless params[:lib] == "smarts"
-=begin
+ #lib = @uri.split("/")[-2].capitalize
+ @klass = OpenTox::Descriptor.const_get params[:lib].capitalize
+ #@algorithm = klass.new @uri, @subjectid unless params[:lib] == "smarts"
+ @method = params[:descriptor].to_sym
elsif request.post?
@feature_dataset = Dataset.new nil, @subjectid
@feature_dataset.metadata = {
@@ -28,9 +52,88 @@ module OpenTox
bad_request_error "Please provide a dataset_uri or compound_uri parameter", @uri
end
end
+ end
=end
+ get '/descriptor/?' do
+ #OpenTox::Algorithm::Descriptor.list.collect{|d| uri d}.join "\n"
+ OpenTox::Algorithm::Descriptor.list.join "\n"
+ #OpenTox::Algorithm::Descriptor.list.inspect
end
+ post '/descriptor/:method' do
+ puts params.inspect
+ bad_request_error "Please provide 'descriptors' parameters.", @uri unless params["descriptors"]
+ if params[:compound_uri]
+ result = OpenTox::Algorithm::Descriptor.send(params[:method].to_sym, @compounds, params["descriptors"])
+ Hash[result.map {|compound, v| [compound.uri, v] }].to_json
+ elsif params[:dataset_uri]
+ puts "starting task"
+ task = OpenTox::Task.run("Calculating #{params[:method]} descriptors for dataset #{params[:dataset_uri]}.", @uri, @subjectid) do |task|
+ puts "start calculation"
+ result = OpenTox::Algorithm::Descriptor.send(params[:method].to_sym, @compounds, params["descriptors"])
+ puts "create dataset"
+ puts result.inspect
+ dataset = OpenTox::Dataset.new nil, @subjectid
+ @compounds.each do |compound|
+ @features ||= result[compound].keys.collect{|name|
+ # TODO set other metadata
+ OpenTox::Feature.find_or_create({RDF::DC.title => name}, @subjectid)
+ }
+ @features.each do |feature|
+ value = result[compound][feature.title]
+ puts compound, feature, value if value
+ dataset.add_data_entry compound, feature, value if value
+ end
+ end
+ puts "put dataset"
+ dataset.put
+ puts "dataset stored"
+ dataset.uri
+ end
+ puts "Task"
+ puts task.uri
+ response['Content-Type'] = 'text/uri-list'
+ halt 202,task.uri
+ end
+ end
+
+=begin
+ post '/descriptor/smarts_match/?' do
+ bad_request_error "Please provide a compound_uri or dataset_uri parameter and a smarts parameter. The count parameter is optional and defaults to false." unless (params[:compound_uri] or params[:dataset_uri]) and params[:smarts]
+ params[:count] ? params[:count] = params[:count].to_boolean : params[:count] = false
+ if params[:compound_uri]
+ params[:compound_uri] = [ params[:compound_uri] ] unless params[:compound_uri].is_a? Array
+ response['Content-Type'] = "application/json"
+ OpenTox::Algorithm::Descriptor.smarts_match(params[:compound_uri].collect{|c| OpenTox::Compound.new c}, params[:smarts], params[:count]).to_json
+ elsif params[:dataset_uri]
+ task = OpenTox::Task.run("Calculating Smarts #{method} for dataset #{params[:dataset_uri]}.", @uri, @subjectid) do |task|
+ compounds = OpenTox::Dataset.new params[:dataset_uri]
+ matches = OpenTox::Descriptor::Smarts.fingerprint(compounds, params[:smarts], params[:count])
+ end
+ response['Content-Type'] = 'text/uri-list'
+ halt 202,task.uri
+ end
+ end
+
+ post '/descriptor/smarts_count/?' do
+ bad_request_error "Please provide a compound_uri or dataset_uri parameter and a smarts parameter. The count parameter is optional and defaults to false." unless (params[:compound_uri] or params[:dataset_uri]) and params[:smarts]
+ params[:count] ? params[:count] = params[:count].to_boolean : params[:count] = false
+ if params[:compound_uri]
+ params[:compound_uri] = [ params[:compound_uri] ] unless params[:compound_uri].is_a? Array
+ response['Content-Type'] = "application/json"
+ OpenTox::Algorithm::Descriptor.smarts_count(params[:compound_uri].collect{|c| OpenTox::Compound.new c}, params[:smarts]).to_json
+ elsif params[:dataset_uri]
+ task = OpenTox::Task.run("Calculating Smarts #{method} for dataset #{params[:dataset_uri]}.", @uri, @subjectid) do |task|
+ compounds = OpenTox::Dataset.new params[:dataset_uri]
+ matches = OpenTox::Descriptor::Smarts.fingerprint(compounds, params[:smarts], params[:count])
+ end
+ response['Content-Type'] = 'text/uri-list'
+ halt 202,task.uri
+ end
+ end
+=end
+
+=begin
# Get a list of descriptor calculation
# @return [text/uri-list] URIs
get '/descriptor/?' do
@@ -43,8 +146,12 @@ module OpenTox
end
get '/descriptor/:lib/?' do
- klass = OpenTox::Descriptor.const_get params[:lib].capitalize
- render klass.all
+ begin
+ klass = OpenTox::Descriptor.const_get params[:lib].capitalize
+ render klass.all
+ rescue
+ bad_request_error "Descriptor library '#{params[:lib]}' not found.", @uri
+ end
end
# Get representation of descriptor calculation
@@ -53,30 +160,38 @@ module OpenTox
render @algorithm
end
- post '/descriptor/smarts/:method/?' do
- method = params[:method].to_sym
- bad_request_error "Please provide a compound_uri or dataset_uri parameter and a smarts parameter. The count parameter is optional and defaults to false." unless (params[:compound_uri] or params[:dataset_uri]) and params[:smarts]
- params[:count] ? params[:count] = params[:count].to_boolean : params[:count] = false
+ post '/descriptor/?' do
+ descriptors = OpenTox::Descriptor::Set.new params
if params[:compound_uri]
- compounds = OpenTox::Compound.new params[:compound_uri]
- response['Content-Type'] = "application/json"
- OpenTox::Descriptor::Smarts.send(method, compounds, params[:smarts], params[:count]).to_json
+ compound = OpenTox::Compound.new params[:compound_uri]
+ descriptors.calculate compound
elsif params[:dataset_uri]
- compounds = OpenTox::Dataset.new params[:dataset_uri]
- # TODO: create and return dataset
+ task = OpenTox::Task.run("Calculating Smarts #{method} for dataset #{params[:dataset_uri]}.", @uri, @subjectid) do |task|
+ dataset = OpenTox::Dataset.new params[:dataset_uri]
+ descriptors.calculate dataset
+ end
+ response['Content-Type'] = 'text/uri-list'
+ halt 202,task.uri
+ else
+
end
end
+ #post '/descriptor/physchem/?' do
+ #post '/descriptor/lookup/?' do
# use /descriptor with dataset_uri and descriptor_uri parameters for efficient calculation of multiple compounds/descriptors
post '/descriptor/:lib/:descriptor/?' do
bad_request_error "Please provide a compound_uri parameter", @uri unless params[:compound_uri]
params[:descriptor_uris] = [@uri]
- @algorithm.calculate params
+ result = @algorithm.calculate(params)
+ puts result.inspect
+ result.to_json
#compounds = [ Compound.new(params[:compound_uri], @subjectid) ]
#send params[:lib].to_sym, compounds, @descriptors
#@feature_dataset.put
#@feature_dataset.uri
end
+=end
=begin
ENV["JAVA_HOME"] ||= "/usr/lib/jvm/java-7-openjdk"
JAVA_DIR = File.join(File.dirname(__FILE__),"java")
diff --git a/fminer.rb b/fminer.rb
index 049b4e5..696a9a5 100644
--- a/fminer.rb
+++ b/fminer.rb
@@ -21,21 +21,7 @@ module OpenTox
# Get list of fminer algorithms
# @return [text/uri-list] URIs
get '/fminer/?' do
-=begin
- list = [ uri('/fminer/bbrc'),
- #uri('/fminer/bbrc/sample'),
- uri('/fminer/last'),
- #uri('/fminer/bbrc/match'),
- #uri('/fminer/last/match')
- ].join("\n") + "\n"
- render(list)
-=end
- render [ uri('/fminer/bbrc'),
- #uri('/fminer/bbrc/sample'),
- uri('/fminer/last'),
- #uri('/fminer/bbrc/match'),
- #uri('/fminer/last/match')
- ]
+ render [ uri('/fminer/bbrc'), uri('/fminer/last') ]
end
# Get representation of BBRC algorithm
diff --git a/java/CdkDescriptors.class b/java/CdkDescriptors.class
index 9373bc7..c2dc069 100644
--- a/java/CdkDescriptors.class
+++ b/java/CdkDescriptors.class
Binary files differ
diff --git a/java/JoelibDescriptors.class b/java/JoelibDescriptors.class
index 1426e7d..7f3eced 100644
--- a/java/JoelibDescriptors.class
+++ b/java/JoelibDescriptors.class
Binary files differ
diff --git a/java/JoelibDescriptors.java b/java/JoelibDescriptors.java
index ecd1b3f..64c099e 100644
--- a/java/JoelibDescriptors.java
+++ b/java/JoelibDescriptors.java
@@ -41,7 +41,8 @@ class JoelibDescriptors {
Boolean success = loader.read(mol);
if (!success) { break; } // last molecule
for (int i =0; i < features.length; i++) {
- Feature feature = factory.getFeature(features[i]);
+ String name = "joelib2.feature.types." + features[i];
+ Feature feature = factory.getFeature(name);
FeatureResult result = feature.calculate(mol);
if (i == 0) { yaml.print("- "); }
else { yaml.print(" "); }
diff --git a/lazar.rb b/lazar.rb
index 36b910a..4cb57de 100644
--- a/lazar.rb
+++ b/lazar.rb
@@ -4,7 +4,7 @@ module OpenTox
# Get representation of lazar algorithm
# @return [String] Representation
get '/lazar/?' do
- algorithm = OpenTox::Algorithm.new(to('/lazar',:full))
+ algorithm = OpenTox::Algorithm::Generic.new(to('/lazar',:full))
algorithm.metadata = {
RDF::DC.title => 'lazar',
RDF::DC.creator => 'helma@in-silico.ch, andreas@maunz.de',
@@ -17,7 +17,6 @@ module OpenTox
{ RDF::DC.description => "Feature dataset URI", RDF::OT.paramScope => "optional", RDF::DC.title => "feature_dataset_uri" },
{ RDF::DC.description => "Further parameters for the feature generation service", RDF::OT.paramScope => "optional" }
]
- #format_output(algorithm)
render algorithm
end
@@ -33,11 +32,7 @@ module OpenTox
#resource_not_found_error "Dataset '#{params[:dataset_uri]}' not found." unless URI.accessible? params[:dataset_uri], @subjectid # wrong URI class
bad_request_error "Please provide a feature_generation_uri parameter." unless params[:feature_generation_uri]
task = OpenTox::Task.run("Create lazar model", uri('/lazar'), @subjectid) do |task|
- #lazar = OpenTox::Model::Lazar.new(nil, @subjectid)
- lazar = OpenTox::Model::Lazar.new(File.join($model[:uri],SecureRandom.uuid), @subjectid)
- lazar.create(params)
- #lazar.put
- #lazar.uri
+ OpenTox::Model::Lazar.create(params)
end
response['Content-Type'] = 'text/uri-list'
halt 202,task.uri
@@ -60,19 +55,12 @@ module OpenTox
post '/lazar/predict/?' do
# pass parameters instead of model_uri, because model service is blocked by incoming call
- puts "LAZAR"
- puts params.inspect
task = OpenTox::Task.run("Apply lazar model",uri('/lazar/predict'), @subjectid) do |task|
-
- lazar = OpenTox::LazarPrediction.new params
- puts lazar.inspect
- lazar.prediction_dataset.uri
-
+ OpenTox::Model::Lazar.new(params[:model_uri]).predict(params).uri
end
response['Content-Type'] = 'text/uri-list'
halt 202,task.uri
end
-
end
end
diff --git a/lib/descriptor.rb b/lib/descriptor.rb
index b6b7cd4..8c8129c 100644
--- a/lib/descriptor.rb
+++ b/lib/descriptor.rb
@@ -1,105 +1,56 @@
+require 'digest/md5'
+ENV["JAVA_HOME"] ||= "/usr/lib/jvm/java-7-openjdk"
module OpenTox
- module Descriptor
- include OpenTox
-
- def initialize uri, subjectid
- super uri, subjectid
- @parameters = [
- { RDF::DC.description => "Dataset URI",
- RDF::OT.paramScope => "optional",
- RDF::DC.title => "dataset_uri" } ,
- { RDF::DC.description => "Compound URI",
- RDF::OT.paramScope => "optional",
- RDF::DC.title => "compound_uri" }
- ]
- tokens = uri.split %r{/}
- @metadata = {
- RDF::DC.title => "#{tokens[-2].capitalize} #{tokens[-1]}",
- RDF.type => [RDF::OT.Algorithm, RDF::OTA.DescriptorCalculation],
- }
- end
+ module Algorithm
+ class Descriptor
+ include OpenTox
- def fix_value val
- if val.numeric?
- val = Float(val)
- val = nil if val.nan? or val.infinite?
- else
- val = nil if val == "NaN"
- end
- val
- end
+ JAVA_DIR = File.join(File.dirname(__FILE__),"..","java")
+ CDK_JAR = Dir[File.join(JAVA_DIR,"cdk-*jar")].last
+ JOELIB_JAR = File.join(JAVA_DIR,"joelib2.jar")
+ LOG4J_JAR = File.join(JAVA_DIR,"log4j.jar")
+ JMOL_JAR = File.join(JAVA_DIR,"Jmol.jar")
- class Openbabel
- include Descriptor
-
- def initialize uri, subjectid=nil
- descriptor = OpenBabel::OBDescriptor.find_type(uri.split("/").last)
- bad_request_error "Unknown descriptor #{uri}. See #{File.join $algorithm[:uri], "descriptor"} for a list of supported descriptors.", uri unless descriptor
+=begin
+ def initialize uri, subjectid
super uri, subjectid
- @metadata[RDF::DC.description] = descriptor.description.split("\n").first
- @obmol = OpenBabel::OBMol.new
- @obconversion = OpenBabel::OBConversion.new
- @obconversion.set_in_format 'inchi'
+ @parameters = [
+ { RDF::DC.description => "Dataset URI",
+ RDF::OT.paramScope => "optional",
+ RDF::DC.title => "dataset_uri" } ,
+ { RDF::DC.description => "Compound URI",
+ RDF::OT.paramScope => "optional",
+ RDF::DC.title => "compound_uri" }
+ ]
+ tokens = uri.split %r{/}
+ @metadata = {
+ RDF::DC.title => "#{tokens[-2].capitalize} #{tokens[-1]}",
+ RDF.type => [RDF::OT.Algorithm, RDF::OTA.DescriptorCalculation],
+ }
end
+=end
- def self.all
- puts OpenBabel::OBDescriptor.list_as_string("descriptors")
- OpenBabel::OBDescriptor.list_as_string("descriptors").split("\n").collect do |d|
- title = d.split(/\s+/).first
- puts title
- unless title =~ /cansmi|formula|InChI|smarts|title/ or title == "s"
- File.join $algorithm[:uri], "descriptor/openbabel" ,title
- end
- end.compact.sort{|a,b| a.upcase <=> b.upcase}
- end
-
- # TODO: add to feature dataset
- # find feature
- # generic method for all libs
- def calculate params
- if params[:compound_uri]
- compounds = [ Compound.new(params[:compound_uri], @subjectid) ]
- elsif params[:dataset_uri]
- compounds = Dataset.new(params[:dataset_uri], @subjectid).compounds
- end
- compounds.collect do |compound|
- @obconversion.read_string @obmol, compound.inchi
- params[:descriptor_uris].each do |descriptor_uri|
- method = descriptor_uri.split('/').last
- calculator = OpenBabel::OBDescriptor.find_type method
- value = fix_value calculator.predict(@obmol)
- feature = OpenTox::Feature.find_or_create({
- RDF::DC.title => "OpenBabel "+method,
- RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature],
- RDF::DC.description => calculator.description,
- }, @subjectid)
- [compound, feature, value]
- end
- end
+ def self.list
+ list = OpenBabel::OBDescriptor.list_as_string("descriptors").split("\n").collect{|line| "/openbabel/#{line.split(/\s+/).first}" }
+ list += YAML.load(`java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptorInfo`).collect{|d| "cdk/#{d[:java_class].split('.').last.sub(/Descriptor/,'')}" }
+ joelib = YAML.load(`java -classpath #{JOELIB_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptorInfo | sed '0,/---/d'`) # strip Joelib messages at stdout
+ # exclude Hashcode (not a physchem property) and GlobalTopologicalChargeIndex (Joelib bug)
+ list += joelib.collect{|d| "joelib/#{d[:java_class].split('.').last}" unless d[:java_class] == "joelib2.feature.types.MoleculeHashcode" or d[:java_class] == "joelib2.feature.types.GlobalTopologicalChargeIndex"}.compact
+ list.collect{|item| File.join "descriptor",item}
end
- end
-
- class Smarts
- def self.fingerprint compounds, smarts, count=false
- if compounds.is_a? OpenTox::Compound
- compounds = [compounds]
- elsif compounds.is_a? OpenTox::Dataset
- # TODO: create and return dataset
- compounds = compounds.compounds
- else
- bad_request_error "Cannot match smarts on #{compounds.class} objects."
- end
- smarts = [smarts] unless smarts.is_a? Array
+ def self.smarts_match compounds, smarts, count=false
obconversion = OpenBabel::OBConversion.new
obmol = OpenBabel::OBMol.new
obconversion.set_in_format('inchi')
smarts_pattern = OpenBabel::OBSmartsPattern.new
- matches = []
+ fingerprint = {}
+ compounds = [compounds] unless compounds.is_a? Array
+ smarts = [smarts] unless smarts.is_a? Array
compounds.each do |compound|
obconversion.read_string(obmol,compound.inchi)
- matches << []
+ fingerprint[compound] = {}
smarts.each do |smart|
smarts_pattern.init(smart)
if smarts_pattern.match(obmol)
@@ -107,16 +58,172 @@ module OpenTox
else
value = 0
end
- matches.last << value
+ fingerprint[compound][smart] = value
end
end
- matches
+ fingerprint
end
def self.smarts_count compounds, smarts
- smarts_fingerprint compounds,smarts,true
+ smarts_match compounds,smarts,true
+ end
+
+ def self.physchem compounds, descriptors
+ des = {}
+ descriptors.each do |d|
+ lib, descriptor = d.split(".")
+ des[lib.to_sym] ||= []
+ des[lib.to_sym] << descriptor
+ end
+ result = {}
+ des.each do |lib,d|
+ send(lib, compounds, d).each do |compound,values|
+ result[compound] ||= {}
+ result[compound].merge! values
+ end
+ end
+ result
+ end
+
+ def self.openbabel compounds, descriptors
+ obdescriptors = descriptors.collect{|d| OpenBabel::OBDescriptor.find_type d}
+ obmol = OpenBabel::OBMol.new
+ obconversion = OpenBabel::OBConversion.new
+ obconversion.set_in_format 'inchi'
+ fingerprint = {}
+ compounds.each do |compound|
+ obconversion.read_string obmol, compound.inchi
+ fingerprint[compound] = {}
+ obdescriptors.each_with_index do |descriptor,i|
+ fingerprint[compound][descriptors[i]] = fix_value(descriptor.predict(obmol))
+ end
+ end
+ fingerprint
+ end
+
+ def self.cdk compounds, descriptors
+ sdf = sdf_3d compounds
+ # use java system call (rjb blocks within tasks)
+ # use Tempfiles to avoid "Argument list too long" error
+ `java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors #{sdf} #{descriptors.join(" ")}`
+ fingerprint = {}
+ YAML.load_file(sdf+"cdk.yaml").each_with_index do |calculation,i|
+ $logger.error "Descriptor calculation failed for compound #{compounds[i].uri}." if calculation.empty?
+ descriptors.each_with_index do |descriptor,j|
+ fingerprint[compounds[i]] = calculation
+ end
+ end
+ FileUtils.rm sdf+"cdk.yaml"
+ fingerprint
+ end
+
+ def self.joelib compounds, descriptors
+ # use java system call (rjb blocks within tasks)
+ # use Tempfiles to avoid "Argument list too long" error
+ sdf = sdf_3d compounds
+ `java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{sdf} #{descriptors.join(' ')}`
+ fingerprint = {}
+ YAML.load_file(sdf+"joelib.yaml").each_with_index do |calculation,i|
+ $logger.error "Descriptor calculation failed for compound #{compounds[i].uri}." if calculation.empty?
+ descriptors.each_with_index do |descriptor,j|
+ fingerprint[compounds[i]] = calculation
+ end
+ end
+ FileUtils.rm sdf+"joelib.yaml"
+ fingerprint
end
+
+ def self.lookup compounds, features, dataset
+ fingerprint = []
+ compounds.each do |compound|
+ fingerprint << []
+ features.each do |feature|
+ end
+ end
+ end
+
+ def self.sdf_3d compounds
+ obconversion = OpenBabel::OBConversion.new
+ obmol = OpenBabel::OBMol.new
+ obconversion.set_in_format 'inchi'
+ obconversion.set_out_format 'sdf'
+ digest = Digest::MD5.hexdigest compounds.inspect
+ sdf_file = "/tmp/#{digest}.sdf"
+ unless File.exists? sdf_file # do not recreate existing 3d sdfs
+ sdf = File.open sdf_file,"w+"
+ # create 3d sdf file (faster in Openbabel than in CDK)
+ compounds.each do |compound|
+ obconversion.read_string obmol, compound.inchi
+ sdf_2d = obconversion.write_string(obmol)
+ OpenBabel::OBOp.find_type("Gen3D").do(obmol)
+ sdf_3d = obconversion.write_string(obmol)
+ if sdf_3d.match(/.nan/)
+ warning = "3D generation failed for compound #{compound.uri}, trying to calculate descriptors from 2D structure."
+ $logger.warn warning
+ # TODO
+ #@feature_dataset[RDF::OT.Warnings] ? @feature_dataset[RDF::OT.Warnings] << warning : @feature_dataset[RDF::OT.Warnings] = warning
+ sdf.puts sdf_2d
+ else
+ sdf.puts sdf_3d
+ end
+ end
+ sdf.close
+ end
+ sdf_file
+ end
+
+ def self.fix_value val
+ val = val.first if val.is_a? Array and val.size == 1
+ if val.numeric?
+ val = Float(val)
+ val = nil if val.nan? or val.infinite?
+ else
+ val = nil if val == "NaN"
+ end
+ val
+ end
+ private_class_method :sdf_3d, :fix_value
end
end
-
end
+=begin
+ class Set
+
+ def initialize params
+ bad_request_error "Please provide a compound_uri or dataset_uri parameter." unless params[:compound_uri] or params[:dataset_uri]
+ @dataset = OpenTox::Dataset.new params[:dataset_uri]
+ @compound = OpenTox::Compound.new params[:compound_uri]
+ @descriptors = []
+
+ end
+
+ def calculate
+ end
+
+ end
+
+ class Openbabel
+ include Descriptor
+
+ def initialize uri, subjectid=nil
+ descriptor = OpenBabel::OBDescriptor.find_type(uri.split("/").last)
+ bad_request_error "Unknown descriptor #{uri}. See #{File.join $algorithm[:uri], "descriptor"} for a list of supported descriptors.", uri unless descriptor
+ super uri, subjectid
+ @metadata[RDF::DC.description] = descriptor.description.split("\n").first
+ @obmol = OpenBabel::OBMol.new
+ @obconversion = OpenBabel::OBConversion.new
+ @obconversion.set_in_format 'inchi'
+ end
+
+ def self.all
+ OpenBabel::OBDescriptor.list_as_string("descriptors").split("\n").collect do |d|
+ title = d.split(/\s+/).first
+ unless title =~ /cansmi|formula|InChI|smarts|title/ or title == "s"
+ File.join $algorithm[:uri], "descriptor/openbabel" ,title
+ end
+ end.compact.sort{|a,b| a.upcase <=> b.upcase}
+ end
+
+
+ end
+=end
diff --git a/lib/feature_values.rb b/lib/feature_values.rb
deleted file mode 100644
index b441c23..0000000
--- a/lib/feature_values.rb
+++ /dev/null
@@ -1,81 +0,0 @@
-=begin
-* Name: feature_values.rb
-* Description: Feature value calculation
-* Author: Andreas Maunz <andreas@maunz.de>
-* Date: 10/2012
-=end
-
-module OpenTox
- class Algorithm
-
- class FeatureValues
- # Substructure matching
- # @param [Hash] keys: compound, feature_dataset, values: OpenTox::Compound, Array of SMARTS strings
- # @return [Array] Array with matching Smarts
- def self.match(params, subjectid)
- features = params[:feature_dataset].features.collect{ |f| f[RDF::DC.title] }
- params[:compound].match(features)
- end
-
- # Substructure matching with number of non-unique hits
- # @param [Hash] keys: compound, feature_dataset, values: OpenTox::Compound, Array of SMARTS strings
- # @return [Hash] Hash with matching Smarts and number of hits
- def self.match_hits(params, subjectid)
- features = params[:feature_dataset].features.collect{ |f| f[RDF::DC.title] },
- params[:compound].match_hits(features)
- end
-
- # PC descriptor calculation
- # @param [Hash] keys: compound, feature_dataset, pc_type, lib, values: OpenTox::Compound, String, String
- # @return [Hash] Hash with feature name as key and value as value
- def self.lookup(params, subjectid)
- puts "lookup started"
- ds = params[:feature_dataset]
- #ds.build_feature_positions
- cmpd_inchi = params[:compound].inchi
- cmpd_idxs = ds.compounds.each_with_index.collect{ |cmpd,idx|
- idx if cmpd.inchi == cmpd_inchi
- }.compact
- if cmpd_idxs.size > 0 # We have entries
- puts "entries"
- cmpd_numeric_f = ds.features.collect { |f|
- f if f[RDF.type].include? RDF::OT.NumericFeature
- }.compact
- cmpd_data_entries = cmpd_idxs.collect { |idx|
- ds.data_entries[idx]
- }
- cmpd_fingerprints = cmpd_numeric_f.inject({}) { |h,f|
- values = cmpd_data_entries.collect { |entry|
- val = entry[ds.feature_positions[f.uri]]
- val.nil? ? nil : val.to_f
- }.compact
- h[f.title] = (values.size > 0) ? values.to_scale.median : nil # AM: median for numeric features
- h
- }
- (ds.features - cmpd_numeric_f).each { |f|
- values = cmpd_data_entries.collect { |entry|
- val = entry[ds.feature_positions[f.uri]]
- val.nil? ? nil : val
- }.compact
- cmpd_fingerprints[f.title] = values.to_scale.mode # AM: mode for the others
- }
- else # We need lookup
- puts "no entries"
- params[:subjectid] = subjectid
- [:compound, :feature_dataset].each { |p| params.delete(p) }; [:pc_type, :lib].each { |p| params.delete(p) if params[p] == "" }
- single_cmpd_ds = OpenTox::Dataset.new(nil,subjectid)
- # TODO: ntriples !!!
- single_cmpd_ds.parse_rdfxml(RestClientWrapper.post(File.join($compound[:uri],cmpd_inchi,"pc"), params, {:accept => "application/rdf+xml"}))
- single_cmpd_ds.get(true)
- #single_cmpd_ds.build_feature_positions
- cmpd_fingerprints = single_cmpd_ds.features.inject({}) { |h,f|
- h[f.title] = single_cmpd_ds.data_entries[0][single_cmpd_ds.feature_positions[f.uri]]
- h
- }
- end
- cmpd_fingerprints
- end
- end
-
- end
-end
diff --git a/lib/fminer.rb b/lib/fminer.rb
index 7f88c8b..6b21ce8 100644
--- a/lib/fminer.rb
+++ b/lib/fminer.rb
@@ -6,14 +6,17 @@
=end
module OpenTox
- class Algorithm
+ module Algorithm
# Fminer algorithms (https://github.com/amaunz/fminer2)
- class Fminer < Algorithm
+ class Fminer #< Algorithm
+
attr_accessor :prediction_feature, :training_dataset, :minfreq, :compounds, :db_class_sizes, :all_activities, :smi
def initialize(uri, subjectid=nil)
- super(uri, subjectid)
+ @uri = uri
+ @subjectid = subjectid
+ #super(uri, subjectid)
end
diff --git a/lib/lazar.rb b/lib/lazar.rb
index 98293d5..d2eba5c 100644
--- a/lib/lazar.rb
+++ b/lib/lazar.rb
@@ -7,214 +7,198 @@
module OpenTox
- class LazarPrediction < Model
-
- attr_accessor :prediction_dataset
-
- def initialize(params)
- @prediction_dataset = OpenTox::Dataset.new(nil, @subjectid)
- # set instance variables and prediction dataset parameters from parameters
- params.each {|k,v|
- self.class.class_eval { attr_accessor k.to_sym }
- instance_variable_set "@#{k}", v
- @prediction_dataset.parameters << {RDF::DC.title => k, RDF::OT.paramValue => v}
- }
- ["cmpds", "fps", "acts", "n_prop", "q_prop", "neighbors"].each {|k|
- self.class.class_eval { attr_accessor k.to_sym }
- instance_variable_set("@#{k}", [])
- }
-
- @prediction_feature = OpenTox::Feature.new @prediction_feature_uri, @subjectid
- @predicted_variable = OpenTox::Feature.new @predicted_variable_uri, @subjectid
- @predicted_confidence = OpenTox::Feature.new @predicted_confidence_uri, @subjectid
- @prediction_dataset.metadata = {
- RDF::DC.title => "Lazar prediction for #{@prediction_feature.title}",
- RDF::DC.creator => @model_uri,
- RDF::OT.hasSource => @model_uri,
- RDF::OT.dependentVariables => @prediction_feature_uri,
- RDF::OT.predictedVariables => [@predicted_variable_uri,@predicted_confidence_uri]
- }
-
- @training_dataset = OpenTox::Dataset.new(@training_dataset_uri,@subjectid)
-
- @feature_dataset = OpenTox::Dataset.new(@feature_dataset_uri, @subjectid)
- bad_request_error "No features found in feature dataset #{@feature_dataset.uri}." if @feature_dataset.features.empty?
-
- @similarity_feature = OpenTox::Feature.find_or_create({RDF::DC.title => "#{@similarity_algorithm.capitalize} similarity", RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature]}, @subjectid)
-
- @prediction_dataset.features = [ @predicted_variable, @predicted_confidence, @prediction_feature, @similarity_feature ]
-
- prediction_feature_pos = @training_dataset.features.collect{|f| f.uri}.index @prediction_feature.uri
-
- if @dataset_uri
- compounds = OpenTox::Dataset.new(@dataset_uri, @subjectid).compounds
- else
- compounds = [ OpenTox::Compound.new(@compound_uri, @subjectid) ]
- end
-
- compounds.each do |compound|
-
-
- #database_activity = @training_dataset.database_activity(params)
- database_activities = @training_dataset.values(compound,@prediction_feature)
- if database_activities and !database_activities.empty?
- database_activities.each do |database_activity|
- @prediction_dataset.add_data_entry compound, @prediction_feature, database_activity
- end
- next
- else
- # AM: transform to cosine space
- @min_sim = (@min_sim.to_f*2.0-1.0).to_s if @similarity_algorithm =~ /cosine/
-
- compound_params = {
- :compound => compound,
- :feature_dataset => @feature_dataset,
- }
- #compound_fingerprints = OpenTox::Algorithm::FeatureValues.send( @feature_calculation_algorithm, compound_params, @subjectid )
- # TODO: fix for pc descriptors
- #compound_fingerprints = OpenTox::Algorithm::Descriptor.send( @feature_calculation_algorithm, compound, @feature_dataset.features.collect{ |f| f[RDF::DC.title] } )
- compound_fingerprints = eval("#{@feature_calculation_algorithm}(compound, @feature_dataset.features.collect{ |f| f[RDF::DC.title] } )")
- @training_dataset.compounds.each_with_index { |cmpd, idx|
- act = @training_dataset.data_entries[idx][prediction_feature_pos]
- @acts << (@prediction_feature.feature_type=="classification" ? @prediction_feature.value_map.invert[act] : nil)
- @n_prop << @feature_dataset.data_entries[idx]#.collect.to_a
- @cmpds << cmpd.uri
- }
-
-=begin
- @q_prop = @feature_dataset.features.collect { |f|
- val = compound_fingerprints[f.title]
- bad_request_error "Can not parse value '#{val}' to numeric" if val and !val.numeric?
- val ? val.to_f : 0.0
- } # query structure
-=end
- @q_prop = compound_fingerprints.first.collect{|v| v.to_f}
-
- mtf = OpenTox::Algorithm::Transform::ModelTransformer.new(self)
- mtf.transform
-
- prediction = OpenTox::Algorithm::Neighbors.send(@prediction_algorithm,
- { :props => mtf.props,
- :acts => mtf.acts,
- :sims => mtf.sims,
- :value_map => @prediction_feature.feature_type=="classification" ? @prediction_feature.value_map : nil,
- :min_train_performance => @min_train_performance
- } )
-
- predicted_value = prediction[:prediction].to_f
- confidence_value = prediction[:confidence].to_f
-
- # AM: transform to original space
- confidence_value = ((confidence_value+1.0)/2.0).abs if @similarity_algorithm =~ /cosine/
- predicted_value = @prediction_feature.value_map[prediction[:prediction].to_i] if @prediction_feature.feature_type == "classification"
-
- end
-
- @prediction_dataset.add_data_entry compound, predicted_variable, predicted_value
- @prediction_dataset.add_data_entry compound, predicted_confidence, confidence_value
-
- if @compound_uri # add neighbors only for compound predictions
- @neighbors.each do |neighbor|
- n = OpenTox::Compound.new(neighbor[:compound], @subjectid)
- @prediction_dataset.add_data_entry n, @prediction_feature, @prediction_feature.value_map[neighbor[:activity]]
- @prediction_dataset.add_data_entry n, @similarity_feature, neighbor[:similarity]
- #@prediction_dataset << [ n, @prediction_feature.value_map[neighbor[:activity]], nil, nil, neighbor[:similarity] ]
- end
- end
-
- end # iteration over compounds
- @prediction_dataset.put
-
- end
+ module Model
- end
-
- class Model
+ class Lazar
+ include OpenTox
- class Lazar < Model
+ attr_accessor :prediction_dataset
# Check parameters for plausibility
# Prepare lazar object (includes graph mining)
# @param[Array] lazar parameters as strings
# @param[Hash] REST parameters, as input by user
- def create(params)
+ def self.create params
+
+ lazar = OpenTox::Model::Lazar.new(File.join($model[:uri],SecureRandom.uuid), @subjectid)
training_dataset = OpenTox::Dataset.new(params[:dataset_uri], @subjectid)
- @parameters << {RDF::DC.title => "training_dataset_uri", RDF::OT.paramValue => training_dataset.uri}
+ lazar.parameters << {RDF::DC.title => "training_dataset_uri", RDF::OT.paramValue => training_dataset.uri}
- # TODO: This is inconsistent, it would be better to have prediction_feature_uri in the API
if params[:prediction_feature]
resource_not_found_error "No feature '#{params[:prediction_feature]}' in dataset '#{params[:dataset_uri]}'" unless training_dataset.find_feature_uri( params[:prediction_feature] )
else # try to read prediction_feature from dataset
resource_not_found_error "Please provide a prediction_feature parameter" unless training_dataset.features.size == 1
params[:prediction_feature] = training_dataset.features.first.uri
end
- self[RDF::OT.trainingDataset] = training_dataset.uri
+ lazar[RDF::OT.trainingDataset] = training_dataset.uri
prediction_feature = OpenTox::Feature.new(params[:prediction_feature], @subjectid)
predicted_variable = OpenTox::Feature.find_or_create({RDF::DC.title => "#{prediction_feature.title} prediction", RDF.type => [RDF::OT.Feature, prediction_feature[RDF.type]]}, @subjectid)
- self[RDF::DC.title] = prediction_feature.title
- @parameters << {RDF::DC.title => "prediction_feature_uri", RDF::OT.paramValue => prediction_feature.uri}
- self[RDF::OT.dependentVariables] = prediction_feature.uri
+ lazar[RDF::DC.title] = prediction_feature.title
+ lazar.parameters << {RDF::DC.title => "prediction_feature_uri", RDF::OT.paramValue => prediction_feature.uri}
+ lazar[RDF::OT.dependentVariables] = prediction_feature.uri
bad_request_error "Unknown prediction_algorithm #{params[:prediction_algorithm]}" if params[:prediction_algorithm] and !OpenTox::Algorithm::Neighbors.respond_to?(params[:prediction_algorithm])
- @parameters << {RDF::DC.title => "prediction_algorithm", RDF::OT.paramValue => params[:prediction_algorithm]} if params[:prediction_algorithm]
+ lazar.parameters << {RDF::DC.title => "prediction_algorithm", RDF::OT.paramValue => params[:prediction_algorithm]} if params[:prediction_algorithm]
confidence_feature = OpenTox::Feature.find_or_create({RDF::DC.title => "predicted_confidence", RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature]}, @subjectid)
- self[RDF::OT.predictedVariables] = [ predicted_variable.uri, confidence_feature.uri ]
+ lazar[RDF::OT.predictedVariables] = [ predicted_variable.uri, confidence_feature.uri ]
case prediction_feature.feature_type
when "classification"
- @parameters << {RDF::DC.title => "prediction_algorithm", RDF::OT.paramValue => "weighted_majority_vote"} unless parameter_value "prediction_algorithm"
- self[RDF.type] = [RDF::OT.Model, RDF::OTA.ClassificationLazySingleTarget]
+ lazar.parameters << {RDF::DC.title => "prediction_algorithm", RDF::OT.paramValue => "weighted_majority_vote"} unless lazar.parameter_value "prediction_algorithm"
+ lazar[RDF.type] = [RDF::OT.Model, RDF::OTA.ClassificationLazySingleTarget]
when "regression"
- @parameters << {RDF::DC.title => "prediction_algorithm", RDF::OT.paramValue => "local_svm_regression"} unless parameter_value "prediction_algorithm"
- self[RDF.type] = [RDF::OT.Model, RDF::OTA.RegressionLazySingleTarget]
+ lazar.parameters << {RDF::DC.title => "prediction_algorithm", RDF::OT.paramValue => "local_svm_regression"} unless lazar.parameter_value "prediction_algorithm"
+ lazar[RDF.type] = [RDF::OT.Model, RDF::OTA.RegressionLazySingleTarget]
end
- parameter_value("prediction_algorithm") =~ /majority_vote/ ? @parameters << {RDF::DC.title => "propositionalized", RDF::OT.paramValue => false} : @parameters << {RDF::DC.title => "propositionalized", RDF::OT.paramValue => true}
+ lazar.parameter_value("prediction_algorithm") =~ /majority_vote/ ? lazar.parameters << {RDF::DC.title => "propositionalized", RDF::OT.paramValue => false} : lazar.parameters << {RDF::DC.title => "propositionalized", RDF::OT.paramValue => true}
- @parameters << {RDF::DC.title => "min_sim", RDF::OT.paramValue => params[:min_sim].to_f} if params[:min_sim] and params[:min_sim].numeric?
- @parameters << {RDF::DC.title => "feature_generation_uri", RDF::OT.paramValue => params[:feature_generation_uri]}
- #@parameters["nr_hits"] = params[:nr_hits]
+ lazar.parameters << {RDF::DC.title => "min_sim", RDF::OT.paramValue => params[:min_sim].to_f} if params[:min_sim] and params[:min_sim].numeric?
+ lazar.parameters << {RDF::DC.title => "feature_generation_uri", RDF::OT.paramValue => params[:feature_generation_uri]}
+ #lazar.parameters["nr_hits"] = params[:nr_hits]
case params["feature_generation_uri"]
when /fminer/
if (params[:nr_hits] == "true")
- @parameters << {RDF::DC.title => "feature_calculation_algorithm", RDF::OT.paramValue => "OpenTox::Descriptor::Smarts.count"}
+ lazar.parameters << {RDF::DC.title => "feature_calculation_algorithm", RDF::OT.paramValue => "smarts_count"}
else
- @parameters << {RDF::DC.title => "feature_calculation_algorithm", RDF::OT.paramValue => "OpenTox::Descriptor::Smarts.fingerprint"}
+ lazar.parameters << {RDF::DC.title => "feature_calculation_algorithm", RDF::OT.paramValue => "smarts_match"}
end
- @parameters << {RDF::DC.title => "similarity_algorithm", RDF::OT.paramValue => "tanimoto"}
- @parameters << {RDF::DC.title => "min_sim", RDF::OT.paramValue => 0.3} unless parameter_value("min_sim")
+ lazar.parameters << {RDF::DC.title => "similarity_algorithm", RDF::OT.paramValue => "tanimoto"}
+ lazar.parameters << {RDF::DC.title => "min_sim", RDF::OT.paramValue => 0.3} unless lazar.parameter_value("min_sim")
when /descriptor/
- @parameters << {RDF::DC.title => "feature_calculation_algorithm", RDF::OT.paramValue => "lookup"}
- @parameters << {RDF::DC.title => "similarity_algorithm", RDF::OT.paramValue => "cosine"}
- @parameters << {RDF::DC.title => "min_sim", RDF::OT.paramValue => 0.7} unless parameter_value("min_sim")
+ method = params["feature_generation_uri"].split(%r{/}).last.chomp
+ lazar.parameters << {RDF::DC.title => "feature_calculation_algorithm", RDF::OT.paramValue => method}
+ lazar.parameters << {RDF::DC.title => "similarity_algorithm", RDF::OT.paramValue => "cosine"}
+ lazar.parameters << {RDF::DC.title => "min_sim", RDF::OT.paramValue => 0.7} unless lazar.parameter_value("min_sim")
end
bad_request_error "Parameter min_train_performance is not numeric." if params[:min_train_performance] and !params[:min_train_performance].numeric?
- @parameters << {RDF::DC.title => "min_train_performance", RDF::OT.paramValue => params[:min_train_performance].to_f} if params[:min_train_performance] and params[:min_train_performance].numeric?
- @parameters << {RDF::DC.title => "min_train_performance", RDF::OT.paramValue => 0.1} unless parameter_value("min_train_performance")
+ lazar.parameters << {RDF::DC.title => "min_train_performance", RDF::OT.paramValue => params[:min_train_performance].to_f} if params[:min_train_performance] and params[:min_train_performance].numeric?
+ lazar.parameters << {RDF::DC.title => "min_train_performance", RDF::OT.paramValue => 0.1} unless lazar.parameter_value("min_train_performance")
if params[:feature_dataset_uri]
- bad_request_error "Feature dataset #{params[:feature_dataset_uri]} does not exist." unless URI.accessible? params[:feature_dataset_uri]
- @parameters << {RDF::DC.title => "feature_dataset_uri", RDF::OT.paramValue => params[:feature_dataset_uri]}
- self[RDF::OT.featureDataset] = params["feature_dataset_uri"]
+ bad_request_error "Feature dataset #{params[:feature_dataset_uri]} does not exist." unless URI.accessible? params[:feature_dataset_uri], @subjectid
+ lazar.parameters << {RDF::DC.title => "feature_dataset_uri", RDF::OT.paramValue => params[:feature_dataset_uri]}
+ lazar[RDF::OT.featureDataset] = params["feature_dataset_uri"]
else
# run feature generation algorithm
- feature_dataset_uri = OpenTox::Algorithm.new(params[:feature_generation_uri]).run(params)
- @parameters << {RDF::DC.title => "feature_dataset_uri", RDF::OT.paramValue => feature_dataset_uri}
- self[RDF::OT.featureDataset] = feature_dataset_uri
+ feature_dataset_uri = OpenTox::Algorithm::Generic.new(params[:feature_generation_uri], @subjectid).run(params)
+ lazar.parameters << {RDF::DC.title => "feature_dataset_uri", RDF::OT.paramValue => feature_dataset_uri}
+ lazar[RDF::OT.featureDataset] = feature_dataset_uri
end
- if params[:feature_dataset_uri]
- bad_request_error "Feature dataset #{params[:feature_dataset_uri]} does not exist." unless URI.accessible? params[:feature_dataset_uri], @subjectid
- @parameters << {RDF::DC.title => "feature_dataset_uri", RDF::OT.paramValue => params[:feature_dataset_uri]}
- self[RDF::OT.featureDataset] = params["feature_dataset_uri"]
+ lazar.put
+ lazar.uri
+ end
+
+ def predict(params)
+ @prediction_dataset = OpenTox::Dataset.new(nil, @subjectid)
+ # set instance variables and prediction dataset parameters from parameters
+ params.each {|k,v|
+ self.class.class_eval { attr_accessor k.to_sym }
+ instance_variable_set "@#{k}", v
+ @prediction_dataset.parameters << {RDF::DC.title => k, RDF::OT.paramValue => v}
+ }
+ #["training_compounds", "fingerprints", "training_activities", "training_fingerprints", "query_fingerprint", "neighbors"].each {|k|
+ ["training_compounds", "training_activities", "training_fingerprints", "query_fingerprint", "neighbors"].each {|k|
+ self.class.class_eval { attr_accessor k.to_sym }
+ instance_variable_set("@#{k}", [])
+ }
+
+ @prediction_feature = OpenTox::Feature.new @prediction_feature_uri, @subjectid
+ @predicted_variable = OpenTox::Feature.new @predicted_variable_uri, @subjectid
+ @predicted_confidence = OpenTox::Feature.new @predicted_confidence_uri, @subjectid
+ @prediction_dataset.metadata = {
+ RDF::DC.title => "Lazar prediction for #{@prediction_feature.title}",
+ RDF::DC.creator => @model_uri,
+ RDF::OT.hasSource => @model_uri,
+ RDF::OT.dependentVariables => @prediction_feature_uri,
+ RDF::OT.predictedVariables => [@predicted_variable_uri,@predicted_confidence_uri]
+ }
+
+ @training_dataset = OpenTox::Dataset.new(@training_dataset_uri,@subjectid)
+
+ @feature_dataset = OpenTox::Dataset.new(@feature_dataset_uri, @subjectid)
+ bad_request_error "No features found in feature dataset #{@feature_dataset.uri}." if @feature_dataset.features.empty?
+
+ @similarity_feature = OpenTox::Feature.find_or_create({RDF::DC.title => "#{@similarity_algorithm.capitalize} similarity", RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature]}, @subjectid)
+
+ @prediction_dataset.features = [ @predicted_variable, @predicted_confidence, @prediction_feature, @similarity_feature ]
+
+ prediction_feature_pos = @training_dataset.features.collect{|f| f.uri}.index @prediction_feature.uri
+
+ if @dataset_uri
+ compounds = OpenTox::Dataset.new(@dataset_uri, @subjectid).compounds
else
- # run feature generation algorithm
- feature_dataset_uri = OpenTox::Algorithm.new(params[:feature_generation_uri], @subjectid).run(params)
- @parameters << {RDF::DC.title => "feature_dataset_uri", RDF::OT.paramValue => feature_dataset_uri}
- self[RDF::OT.featureDataset] = feature_dataset_uri
+ compounds = [ OpenTox::Compound.new(@compound_uri, @subjectid) ]
end
- put
- @uri
+
+ @training_fingerprints = @feature_dataset.data_entries
+ @training_compounds = @training_dataset.compounds
+
+ query_fingerprints = OpenTox::Algorithm::Descriptor.send( @feature_calculation_algorithm, compounds, @feature_dataset.features.collect{ |f| f[RDF::DC.title] } )#.collect{|row| row.collect{|val| val ? val.to_f : 0.0 } }
+
+ compounds.each do |compound|
+
+ database_activities = @training_dataset.values(compound,@prediction_feature)
+ if database_activities and !database_activities.empty?
+ database_activities.each do |database_activity|
+ @prediction_dataset.add_data_entry compound, @prediction_feature, database_activity
+ end
+ next
+ else
+ # AM: transform to cosine space
+ @min_sim = (@min_sim.to_f*2.0-1.0).to_s if @similarity_algorithm =~ /cosine/
+ @training_activities = @training_dataset.data_entries.collect{|entry|
+ act = entry[prediction_feature_pos]
+ @prediction_feature.feature_type=="classification" ? @prediction_feature.value_map.invert[act] : act
+ }
+
+ @query_fingerprint = @feature_dataset.features.collect { |f|
+ val = query_fingerprints[compound][f.title]
+ bad_request_error "Can not parse value '#{val}' to numeric" if val and !val.numeric?
+ val ? val.to_f : 0.0
+ } # query structure
+
+ mtf = OpenTox::Algorithm::Transform::ModelTransformer.new(self)
+ mtf.transform
+
+ prediction = OpenTox::Algorithm::Neighbors.send(@prediction_algorithm,
+ { :props => mtf.props,
+ :activities => mtf.activities,
+ :sims => mtf.sims,
+ :value_map => @prediction_feature.feature_type=="classification" ? @prediction_feature.value_map : nil,
+ :min_train_performance => @min_train_performance
+ } )
+
+ predicted_value = prediction[:prediction]#.to_f
+ confidence_value = prediction[:confidence]#.to_f
+
+ # AM: transform to original space
+ confidence_value = ((confidence_value+1.0)/2.0).abs if @similarity_algorithm =~ /cosine/
+ predicted_value = @prediction_feature.value_map[prediction[:prediction].to_i] if @prediction_feature.feature_type == "classification"
+
+ end
+
+ @prediction_dataset.add_data_entry compound, @predicted_variable, predicted_value
+ @prediction_dataset.add_data_entry compound, @predicted_confidence, confidence_value
+
+ if @compound_uri # add neighbors only for compound predictions
+ @neighbors.each do |neighbor|
+ puts "Neighbor"
+ puts neighbor.inspect
+ n = neighbor[:compound]
+ @prediction_feature.feature_type == "classification" ? a = @prediction_feature.value_map[neighbor[:activity]] : a = neighbor[:activity]
+ @prediction_dataset.add_data_entry n, @prediction_feature, a
+ @prediction_dataset.add_data_entry n, @similarity_feature, neighbor[:similarity]
+ #@prediction_dataset << [ n, @prediction_feature.value_map[neighbor[:activity]], nil, nil, neighbor[:similarity] ]
+ end
+ end
+
+ end # iteration over compounds
+ puts prediction_dataset.to_turtle
+ @prediction_dataset.put
+ @prediction_dataset
+
end
end
diff --git a/lib/neighbors.rb b/lib/neighbors.rb
index b255f18..285afa8 100644
--- a/lib/neighbors.rb
+++ b/lib/neighbors.rb
@@ -4,14 +4,15 @@
* Author: Andreas Maunz <andreas@maunz.de>
* Date: 10/2012
=end
+require 'rinruby'
module OpenTox
- class Algorithm
+ module Algorithm
class Neighbors
# Get confidence.
- # @param[Hash] Required keys: :sims, :acts
+ # @param[Hash] Required keys: :sims, :activities
# @return[Float] Confidence
def self.get_confidence(params)
conf = params[:sims].inject{|sum,x| sum + x }
@@ -21,7 +22,7 @@ module OpenTox
end
# Classification with majority vote from neighbors weighted by similarity
- # @param [Hash] params Keys `:acts, :sims, :value_map` are required
+ # @param [Hash] params Keys `:activities, :sims, :value_map` are required
# @return [Numeric] A prediction value.
def self.weighted_majority_vote(params)
@@ -32,11 +33,11 @@ module OpenTox
$logger.debug "Weighted Majority Vote Classification."
- params[:acts].each_index do |idx|
+ params[:activities].each_index do |idx|
neighbor_weight = params[:sims][1][idx]
- neighbor_contribution += params[:acts][idx] * neighbor_weight
+ neighbor_contribution += params[:activities][idx] * neighbor_weight
if params[:value_map].size == 2 # AM: provide compat to binary classification: 1=>false 2=>true
- case params[:acts][idx]
+ case params[:activities][idx]
when 1
confidence_sum -= neighbor_weight
when 2
@@ -48,16 +49,16 @@ module OpenTox
end
if params[:value_map].size == 2
if confidence_sum >= 0.0
- prediction = 2 unless params[:acts].size==0
+ prediction = 2 unless params[:activities].size==0
elsif confidence_sum < 0.0
- prediction = 1 unless params[:acts].size==0
+ prediction = 1 unless params[:activities].size==0
end
else
- prediction = (neighbor_contribution/confidence_sum).round unless params[:acts].size==0 # AM: new multinomial prediction
+ prediction = (neighbor_contribution/confidence_sum).round unless params[:activities].size==0 # AM: new multinomial prediction
end
#$logger.debug "Prediction: '" + prediction.to_s + "'." unless prediction.nil?
- confidence = (confidence_sum/params[:acts].size).abs if params[:acts].size > 0
+ confidence = (confidence_sum/params[:activities].size).abs if params[:activities].size > 0
#$logger.debug "Confidence: '" + confidence.to_s + "'." unless prediction.nil?
return {:prediction => prediction, :confidence => confidence.abs}
end
@@ -65,7 +66,7 @@ module OpenTox
# Local support vector regression from neighbors
- # @param [Hash] params Keys `:props, :acts, :sims, :min_train_performance` are required
+ # @param [Hash] params Keys `:props, :activities, :sims, :min_train_performance` are required
# @return [Numeric] A prediction value.
def self.local_svm_regression(params)
puts "SVM"
@@ -74,17 +75,17 @@ module OpenTox
prediction = nil
$logger.debug "Local SVM."
- if params[:acts].size>0
+ if params[:activities].size>0
if params[:props]
n_prop = params[:props][0].collect.to_a
q_prop = params[:props][1].collect.to_a
props = [ n_prop, q_prop ]
end
- acts = params[:acts].collect.to_a
- prediction = local_svm_prop( props, acts, params[:min_train_performance]) # params[:props].nil? signals non-prop setting
+ activities = params[:activities].collect.to_a
+ prediction = local_svm_prop( props, activities, params[:min_train_performance]) # params[:props].nil? signals non-prop setting
prediction = nil if (!prediction.nil? && prediction.infinite?)
#$logger.debug "Prediction: '" + prediction.to_s + "' ('#{prediction.class}')."
- confidence = get_confidence({:sims => params[:sims][1], :acts => params[:acts]})
+ confidence = get_confidence({:sims => params[:sims][1], :activities => params[:activities]})
confidence = 0.0 if prediction.nil?
end
{:prediction => prediction, :confidence => confidence}
@@ -93,7 +94,7 @@ module OpenTox
# Local support vector regression from neighbors
- # @param [Hash] params Keys `:props, :acts, :sims, :min_train_performance` are required
+ # @param [Hash] params Keys `:props, :activities, :sims, :min_train_performance` are required
# @return [Numeric] A prediction value.
def self.local_svm_classification(params)
@@ -101,19 +102,19 @@ module OpenTox
prediction = nil
$logger.debug "Local SVM."
- if params[:acts].size>0
+ if params[:activities].size>0
if params[:props]
n_prop = params[:props][0].collect.to_a
q_prop = params[:props][1].collect.to_a
props = [ n_prop, q_prop ]
end
- acts = params[:acts].collect.to_a
- acts = acts.collect{|v| "Val" + v.to_s} # Convert to string for R to recognize classification
- prediction = local_svm_prop( props, acts, params[:min_train_performance]) # params[:props].nil? signals non-prop setting
+ activities = params[:activities].collect.to_a
+ activities = activities.collect{|v| "Val" + v.to_s} # Convert to string for R to recognize classification
+ prediction = local_svm_prop( props, activities, params[:min_train_performance]) # params[:props].nil? signals non-prop setting
prediction = prediction.sub(/Val/,"") if prediction # Convert back
confidence = 0.0 if prediction.nil?
#$logger.debug "Prediction: '" + prediction.to_s + "' ('#{prediction.class}')."
- confidence = get_confidence({:sims => params[:sims][1], :acts => params[:acts]})
+ confidence = get_confidence({:sims => params[:sims][1], :activities => params[:activities]})
end
{:prediction => prediction, :confidence => confidence}
@@ -125,18 +126,18 @@ module OpenTox
# Uses propositionalized setting.
# Not to be called directly (use local_svm_regression or local_svm_classification).
# @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ]
- # @param [Array] acts, activities for neighbors.
+ # @param [Array] activities, activities for neighbors.
# @param [Float] min_train_performance, parameter to control censoring
# @return [Numeric] A prediction value.
- def self.local_svm_prop(props, acts, min_train_performance)
+ def self.local_svm_prop(props, activities, min_train_performance)
$logger.debug "Local SVM (Propositionalization / Kernlab Kernel)."
n_prop = props[0] # is a matrix, i.e. two nested Arrays.
q_prop = props[1] # is an Array.
prediction = nil
- if acts.uniq.size == 1
- prediction = acts[0]
+ if activities.uniq.size == 1
+ prediction = activities[0]
else
#$logger.debug gram_matrix.to_yaml
@r = RinRuby.new(true,false) # global R instance leads to Socket errors after a large number of requests
@@ -151,7 +152,7 @@ module OpenTox
@r.n_prop = n_prop.flatten
@r.n_prop_x_size = n_prop.size
@r.n_prop_y_size = n_prop[0].size
- @r.y = acts
+ @r.y = activities
@r.q_prop = q_prop
#@r.eval "y = matrix(y)"
@r.eval "prop_matrix = matrix(n_prop, n_prop_x_size, n_prop_y_size, byrow=T)"
@@ -196,22 +197,29 @@ module OpenTox
EOR
- # prediction
- $logger.debug "Predicting ..."
- @r.eval "predict(model,q_prop); p = predict(model,q_prop)" # kernlab bug: predict twice
- @r.eval "if (class(y)!='numeric') p = as.character(p)"
- prediction = @r.p
-
- # censoring
- prediction = nil if ( @r.perf.nan? || @r.perf < min_train_performance.to_f )
- prediction = nil if prediction =~ /NA/
- prediction = nil unless train_success
- $logger.debug "Performance: '#{sprintf("%.2f", @r.perf)}'"
- #rescue Exception => e
- #$logger.debug "#{e.class}: #{e.message}"
- #$logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+ if train_success
+ # prediction
+ $logger.debug "Predicting ..."
+ @r.eval "predict(model,q_prop); p = predict(model,q_prop)" # kernlab bug: predict twice
+ #@r.eval "p = predict(model,q_prop)" # kernlab bug: predict twice
+ @r.eval "if (class(y)!='numeric') p = as.character(p)"
+ prediction = @r.p
+
+ # censoring
+ prediction = nil if ( @r.perf.nan? || @r.perf < min_train_performance.to_f )
+ prediction = nil if prediction =~ /NA/
+ $logger.debug "Performance: '#{sprintf("%.2f", @r.perf)}'"
+ else
+ $logger.debug "Model creation failed."
+ prediction = nil
+ end
+ rescue Exception => e
+ $logger.debug "#{e.class}: #{e.message}"
+ $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
ensure
- @r.quit # free R
+ #puts @r.inspect
+ #TODO: broken pipe
+ #@r.quit # free R
end
end
prediction
diff --git a/lib/similarity.rb b/lib/similarity.rb
index 22b4c28..cdac4b8 100644
--- a/lib/similarity.rb
+++ b/lib/similarity.rb
@@ -6,7 +6,7 @@
=end
module OpenTox
- class Algorithm
+ module Algorithm
class Similarity
diff --git a/lib/transform.rb b/lib/transform.rb
index b2f7e7e..7b92df5 100644
--- a/lib/transform.rb
+++ b/lib/transform.rb
@@ -6,7 +6,7 @@
=end
module OpenTox
- class Algorithm
+ module Algorithm
class Transform
# Uses Statsample Library (http://ruby-statsample.rubyforge.org/) by C. Bustos
@@ -231,7 +231,7 @@ module OpenTox
# Attaches transformations to an OpenTox::Model
# Stores props, sims, performs similarity calculations
class ModelTransformer
- attr_accessor :model, :similarity_algorithm, :acts, :sims
+ attr_accessor :model, :similarity_algorithm, :activities, :sims
# @params[OpenTox::Model] model Model to transform
def initialize model
@@ -241,7 +241,7 @@ module OpenTox
# Transforms the model
def transform
- get_matrices # creates @n_prop, @q_prop, @acts from ordered fps
+ get_matrices # creates @n_prop, @q_prop, @activities from ordered fingerprints
@ids = (0..((@n_prop.length)-1)).to_a # surviving compounds; become neighbors
if (@model.similarity_algorithm =~ /cosine/)
@@ -258,9 +258,9 @@ module OpenTox
$logger.debug "M: #{@n_prop.size}x#{@n_prop[0].size}; R: #{@q_prop.size}"
# adjust rest
- fps_tmp = []; @ids.each { |idx| fps_tmp << @fps[idx] }; @fps = fps_tmp
- cmpds_tmp = []; @ids.each { |idx| cmpds_tmp << @cmpds[idx] }; @cmpds = cmpds_tmp
- acts_tmp = []; @ids.each { |idx| acts_tmp << @acts[idx] }; @acts = acts_tmp
+ #fingerprints_tmp = []; @ids.each { |idx| fingerprints_tmp << @fingerprints[idx] }; @fingerprints = fingerprints_tmp
+ compounds_tmp = []; @ids.each { |idx| compounds_tmp << @compounds[idx] }; @compounds = compounds_tmp
+ acts_tmp = []; @ids.each { |idx| acts_tmp << @activities[idx] }; @activities = acts_tmp
# scale and svd
nr_cases, nr_features = @n_prop.size, @n_prop[0].size
@@ -284,7 +284,7 @@ module OpenTox
@sims = [] # calculated by neighbor routine
neighbors
n_prop_tmp = []; @ids.each { |idx| n_prop_tmp << @n_prop[idx] }; @n_prop = n_prop_tmp # select neighbors from matrix
- acts_tmp = []; @ids.each { |idx| acts_tmp << @acts[idx] }; @acts = acts_tmp
+ acts_tmp = []; @ids.each { |idx| acts_tmp << @activities[idx] }; @activities = acts_tmp
# Sims between neighbors, if necessary
@@ -312,7 +312,7 @@ module OpenTox
end
$logger.debug "F: #{@n_prop.size}x#{@n_prop[0].size}; R: #{@q_prop.size}" if (@n_prop && @n_prop[0] && @q_prop)
- $logger.debug "Sims: #{@sims.size}, Acts: #{@acts.size}"
+ $logger.debug "Sims: #{@sims.size}, Acts: #{@activities.size}"
@sims = [ gram_matrix, @sims ]
@@ -334,13 +334,13 @@ module OpenTox
# @param[Array] training_props Propositionalized data for this neighbor
# @param[Integer] Index of neighbor
def add_neighbor(training_props, idx)
- unless @model.acts[idx].nil?
+ unless @model.training_activities[idx].nil?
sim = similarity(training_props)
if sim > @model.min_sim.to_f
@model.neighbors << {
- :compound => @cmpds[idx],
+ :compound => @compounds[idx],
:similarity => sim,
- :activity => acts[idx]
+ :activity => activities[idx]
}
@sims << sim
@ids << idx
@@ -400,11 +400,12 @@ module OpenTox
# Converts fingerprints to matrix, order of rows by fingerprints. nil values allowed.
# Same for compound fingerprints.
def get_matrices
- @cmpds = @model.cmpds
- @fps = @model.fps
- @acts = @model.acts
- @n_prop = @model.n_prop
- @q_prop = @model.q_prop
+ @compounds = @model.training_compounds
+ puts @compounds.inspect
+ #@fingerprints = @model.fingerprints
+ @activities = @model.training_activities
+ @n_prop = @model.training_fingerprints
+ @q_prop = @model.query_fingerprint
end
# Returns propositionalized data, if appropriate, or nil