summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2015-07-31 19:24:45 +0200
committerChristoph Helma <helma@in-silico.ch>2015-07-31 19:24:45 +0200
commit28c41fc27bea4668ee1dc3c8d1f086e64d271b5a (patch)
tree806fdf0e4ef8fefc81ed8109908a0f0f81564e9a
parentc5cbcd9b617047c0933e465d4cb247618920ec6d (diff)
intermediary commit
-rw-r--r--algorithm.gemspec3
-rw-r--r--lib/classification.rb37
-rw-r--r--lib/descriptor.rb10
-rw-r--r--lib/lazar.rb117
-rw-r--r--lib/opentox-algorithm.rb3
-rw-r--r--lib/transform.rb7
6 files changed, 118 insertions, 59 deletions
diff --git a/algorithm.gemspec b/algorithm.gemspec
index c3119e6..1a94225 100644
--- a/algorithm.gemspec
+++ b/algorithm.gemspec
@@ -20,7 +20,8 @@ Gem::Specification.new do |s|
# specify any dependencies here; for example:
#s.add_runtime_dependency "opentox-server"
s.add_runtime_dependency "opentox-client"
- s.add_runtime_dependency 'rinruby'#, "~>2.0.2"
+ s.add_runtime_dependency 'rserve-client'#, "~>2.0.2"
+ #s.add_runtime_dependency 'rinruby'#, "~>2.0.2"
s.add_runtime_dependency 'nokogiri'#, "~>1.4.4"
s.add_runtime_dependency 'statsample'#, "~>1.1"
s.add_runtime_dependency 'gsl'#, "~>1.14"
diff --git a/lib/classification.rb b/lib/classification.rb
index f6c9b11..127fa28 100644
--- a/lib/classification.rb
+++ b/lib/classification.rb
@@ -17,10 +17,10 @@ module OpenTox
$logger.debug "Weighted Majority Vote Classification."
- values = neighbors.collect{|n| n[1]}.uniq
+ values = neighbors.collect{|n| n[2]}.uniq
neighbors.each do |neighbor|
- neighbor_weight = neighbor[2]
- activity = values.index(neighbor[1]) + 1 # map values to integers > 1
+ neighbor_weight = neighbor[1]
+ activity = values.index(neighbor[2]) + 1 # map values to integers > 1
neighbor_contribution += activity * neighbor_weight
if values.size == 2 # AM: provide compat to binary classification: 1=>false 2=>true
case activity
@@ -46,9 +46,38 @@ module OpenTox
$logger.debug "Prediction: '" + prediction.to_s + "'." unless prediction.nil?
confidence = (confidence_sum/neighbors.size).abs
$logger.debug "Confidence: '" + confidence.to_s + "'." unless prediction.nil?
- return {:prediction => prediction, :confidence => confidence.abs}
+ [prediction, confidence.abs]
end
+ # Local support vector regression from neighbors
+ # @param [Hash] params Keys `:props, :activities, :sims, :min_train_performance` are required
+ # @return [Numeric] A prediction value.
+ def self.local_svm_classification(params)
+
+ confidence = 0.0
+ prediction = nil
+
+ $logger.debug "Local SVM."
+ if params[:activities].size>0
+ if params[:props]
+ n_prop = params[:props][0].collect.to_a
+ q_prop = params[:props][1].collect.to_a
+ props = [ n_prop, q_prop ]
+ end
+ activities = params[:activities].collect.to_a
+ activities = activities.collect{|v| "Val" + v.to_s} # Convert to string for R to recognize classification
+ prediction = local_svm_prop( props, activities, params[:min_train_performance]) # params[:props].nil? signals non-prop setting
+ prediction = prediction.sub(/Val/,"") if prediction # Convert back
+ confidence = 0.0 if prediction.nil?
+ #$logger.debug "Prediction: '" + prediction.to_s + "' ('#{prediction.class}')."
+ confidence = get_confidence({:sims => params[:sims][1], :activities => params[:activities]})
+ end
+ {:prediction => prediction, :confidence => confidence}
+
+ end
+
+
+
end
end
diff --git a/lib/descriptor.rb b/lib/descriptor.rb
index 1d43d7d..8ec7480 100644
--- a/lib/descriptor.rb
+++ b/lib/descriptor.rb
@@ -1,7 +1,6 @@
require 'digest/md5'
ENV["JAVA_HOME"] ||= "/usr/lib/jvm/java-7-openjdk"
BABEL_3D_CACHE_DIR = File.join(File.dirname(__FILE__),"..",'/babel_3d_cache')
-# TODO store 3D structures in mongodb
# TODO store descriptors in mongodb
module OpenTox
@@ -59,6 +58,7 @@ module OpenTox
bad_request_error "Compounds for smarts_match are empty" unless compounds
bad_request_error "Smarts for smarts_match are empty" unless smarts
parse compounds
+ @count = count
obconversion = OpenBabel::OBConversion.new
obmol = OpenBabel::OBMol.new
obconversion.set_in_format('inchi')
@@ -100,13 +100,19 @@ module OpenTox
@data_entries
end
when "OpenTox::Dataset"
- dataset = OpenTox::Dataset.new(:compound_ids => @compounds.collect{|c| c.id})
+ dataset = OpenTox::DescriptorDataset.new(:compound_ids => @compounds.collect{|c| c.id})
if @smarts
dataset.feature_ids = @smarts.collect{|smart| Smarts.find_or_create_by(:smarts => smart).id}
+ @count ? algo = "count" : algo = "match"
+ dataset.feature_calculation_algorithm = "#{self}.smarts_#{algo}"
+
elsif @physchem_descriptors
dataset.feature_ids = @physchem_descriptors.collect{|d| PhysChemDescriptor.find_or_create_by(:name => d, :creator => __FILE__).id}
dataset.data_entries = @data_entries
+ dataset.feature_calculation_algorithm = "#{self}.physchem"
+ #TODO params?
end
+ dataset.save_all
dataset
end
end
diff --git a/lib/lazar.rb b/lib/lazar.rb
index 19f8cdd..399f5c1 100644
--- a/lib/lazar.rb
+++ b/lib/lazar.rb
@@ -23,8 +23,8 @@ module OpenTox
field :training_dataset_id, type: BSON::ObjectId
field :feature_dataset_id, type: BSON::ObjectId
# algorithms
- field :feature_generation, type: String
- field :feature_calculation_algorithm, type: String
+ #field :feature_generation, type: String
+ #field :feature_calculation_algorithm, type: String
field :prediction_algorithm, type: String
field :similarity_algorithm, type: String
# prediction features
@@ -34,7 +34,7 @@ module OpenTox
# parameters
field :nr_hits, type: Boolean
field :min_sim, type: Float
- field :propositionalized, type:Boolean
+ #field :propositionalized, type:Boolean
field :min_train_performance, type: Float
attr_accessor :prediction_dataset
@@ -54,7 +54,6 @@ module OpenTox
bad_request_error "No features found in feature dataset #{feature_dataset.id}." if feature_dataset.features.empty?
lazar.feature_dataset_id = feature_dataset.id
@training_dataset = training_dataset
- #@training_dataset = OpenTox::Dataset.find(feature_dataset.parameters.select{|p| p["title"] == "dataset_id"}.first["paramValue"])
bad_request_error "Training dataset compounds do not match feature dataset compounds. Please ensure that they are in the same order." unless @training_dataset.compounds == feature_dataset.compounds
lazar.training_dataset_id = @training_dataset.id
@@ -73,31 +72,26 @@ module OpenTox
lazar.prediction_algorithm = params[:prediction_algorithm]
end
- unless lazar.prediction_algorithm
- lazar.prediction_algorithm = "OpenTox::Algorithm::Classification.weighted_majority_vote" if prediction_feature.nominal
- lazar.prediction_algorithm = "OpenTox::Algorithm::Regression.local_svm_regression" if prediction_feature.numeric
+ unless lazar.prediction_algorithm # set defaults
+ # TODO consider params
+ if prediction_feature.nominal
+ lazar.prediction_algorithm = "OpenTox::Algorithm::Classification.weighted_majority_vote"
+ lazar.similarity_algorithm = "OpenTox::Algorithm::Similarity.tanimoto"
+ lazar.min_sim = 0.3 unless lazar.min_sim
+ elsif prediction_feature.numeric
+ lazar.prediction_algorithm = "OpenTox::Algorithm::Regression.local_svm_regression"
+ lazar.similarity_algorithm = "OpenTox::Algorithm::Similarity.cosine"
+ # cosine similartiy is default
+ lazar.min_sim = 0.7 unless lazar.min_sim
+ end
end
- lazar.prediction_algorithm =~ /majority_vote/ ? lazar.propositionalized = false : lazar.propositionalized = true
+ #lazar.prediction_algorithm =~ /majority_vote/ ? lazar.propositionalized = false : lazar.propositionalized = true
lazar.min_sim = params[:min_sim].to_f if params[:min_sim] and params[:min_sim].numeric?
+ # TODO: get info from training_dataset
lazar.nr_hits = nr_hits
- lazar.feature_generation = feature_dataset.training_algorithm
+ #lazar.feature_generation = feature_dataset.training_algorithm
#lazar.parameters << {"title" => "feature_generation_uri", "paramValue" => params[:feature_generation_uri]}
- if lazar.feature_generation =~ /fminer|bbrc|last/
- if lazar[:nr_hits]
- lazar.feature_calculation_algorithm = "OpenTox::Algorithm::Descriptor.smarts_count"
- else
- lazar.feature_calculation_algorithm = "OpenTox::Algorithm::Descriptor.smarts_match"
- end
- lazar.similarity_algorithm = "OpenTox::Algorithm::Similarity.tanimoto"
- lazar.min_sim = 0.3 unless lazar.min_sim
- elsif lazar.feature_generation =~/descriptor/ or lazar.feature_generation.nil?
- # cosine similartiy is default (e.g. used when no fetature_generation_uri is given and a feature_dataset_uri is provided instead)
- lazar.similarity_algorithm = "OpenTox::Algorithm::Similarity.cosine"
- lazar.min_sim = 0.7 unless lazar.min_sim
- else
- bad_request_error "unkown feature generation method #{lazar.feature_generation}"
- end
bad_request_error "Parameter min_train_performance is not numeric." if params[:min_train_performance] and !params[:min_train_performance].numeric?
lazar.min_train_performance = params[:min_train_performance].to_f if params[:min_train_performance] and params[:min_train_performance].numeric?
@@ -107,7 +101,7 @@ module OpenTox
lazar
end
- def predict params
+ def predict object
# tailored for performance
# all consistency checks should be done during model creation
@@ -131,20 +125,21 @@ module OpenTox
@feature_dataset = OpenTox::Dataset.find(feature_dataset_id)
compounds = []
- if params[:compound]
- compounds = [ params[:compound]]
- elsif params[:compounds]
- compounds = params[:compounds]
- elsif params[:dataset]
- compounds = params[:dataset].compounds
+ case object.class.to_s
+ when "OpenTox::Compound"
+ compounds = [object]
+ when "Array"
+ compounds = object
+ when "OpenTox::Dataset"
+ compounds = object.compounds
else
- bad_request_error "Please provide one of the parameters: :compound, :compounds, :dataset"
+ bad_request_error "Please provide a OpenTox::Compound an Array of OpenTox::Compounds or an OpenTox::Dataset as parameter."
end
$logger.debug "Setup: #{Time.now-time}"
time = Time.now
- @query_fingerprint = Algorithm.run(feature_calculation_algorithm, compounds, @feature_dataset.features.collect{|f| f.smarts} )
+ @query_fingerprint = Algorithm.run(feature_dataset.feature_calculation_algorithm, compounds, @feature_dataset.features.collect{|f| f.name} )
$logger.debug "Fingerprint calculation: #{Time.now-time}"
time = Time.now
@@ -166,35 +161,59 @@ module OpenTox
end
next
else
-
- # TODO reintroduce for regression
- #mtf = OpenTox::Algorithm::Transform::ModelTransformer.new(self)
- #mtf.transform
- #
+ t = Time.new
+
+ if prediction_algorithm =~ /Regression/
+ mtf = OpenTox::Algorithm::Transform::ModelTransformer.new(self)
+ mtf.transform
+ training_fingerprints = mtf.n_prop
+ training_activities = mtf.activities
+ p training_activities
+ query_fingerprint = mtf.q_prop
+ neighbors = [[nil,nil,nil,query_fingerprint]]
+ else
+ training_fingerprints = @feature_dataset.data_entries
+ # TODO fix for multi feature datasets
+ training_activities = @training_dataset.data_entries[i].first
+ query_fingerprint = @query_fingerprint[c]
+ neighbors = []
+ end
+ $logger.debug "Transform: #{Time.now-t}"
+ t = Time.new
+
# find neighbors
- neighbors = []
- @feature_dataset.data_entries.each_with_index do |fingerprint, i|
-
- sim = Algorithm.run(similarity_algorithm,fingerprint, @query_fingerprint[c])
- # TODO fix for multi feature datasets
- neighbors << [@feature_dataset.compounds[i],@training_dataset.data_entries[i].first,sim] if sim > self.min_sim
+ training_fingerprints.each_with_index do |fingerprint, i|
+
+ sim = Algorithm.run(similarity_algorithm,fingerprint, query_fingerprint)
+ if sim > self.min_sim
+ if prediction_algorithm =~ /Regression/
+ neighbors << [@feature_dataset.compounds[i],sim,training_activities[i], fingerprint]
+ else
+ neighbors << [@feature_dataset.compounds[i],sim,training_activities[i]]
+ end
+ end
end
- prediction = Algorithm.run(prediction_algorithm, neighbors)
+ if prediction_algorithm =~ /Regression/
+ prediction = Algorithm.run(prediction_algorithm, neighbors, :min_train_performance => self.min_train_performance)
+ else
+ prediction = Algorithm.run(prediction_algorithm, neighbors)
+ end
$logger.debug "Prediction time: #{Time.now-time}"
time = Time.now
+ p prediction
# AM: transform to original space (TODO)
- confidence_value = ((confidence_value+1.0)/2.0).abs if similarity_algorithm =~ /cosine/
+ confidence_value = ((confidence_value+1.0)/2.0).abs if prediction.first and similarity_algorithm =~ /cosine/
- $logger.debug "predicted value: #{prediction[:prediction]}, confidence: #{prediction[:confidence]}"
+ $logger.debug "predicted value: #{prediction[0]}, confidence: #{prediction[1]}"
end
prediction_dataset.compound_ids << compound
- prediction_dataset[c,0] = prediction[:prediction]
- prediction_dataset[c,1] = prediction[:confidence]
+ prediction_dataset[c,0] = prediction[0]
+ prediction_dataset[c,1] = prediction[1]
end
prediction_dataset
diff --git a/lib/opentox-algorithm.rb b/lib/opentox-algorithm.rb
index d768cfd..7743247 100644
--- a/lib/opentox-algorithm.rb
+++ b/lib/opentox-algorithm.rb
@@ -19,5 +19,6 @@ require_relative "fminer.rb"
require_relative "lazar.rb"
require_relative "transform.rb"
require_relative "similarity.rb"
-require_relative "neighbors.rb"
+#require_relative "neighbors.rb"
require_relative "classification.rb"
+require_relative "regression.rb"
diff --git a/lib/transform.rb b/lib/transform.rb
index fad9517..15b7b60 100644
--- a/lib/transform.rb
+++ b/lib/transform.rb
@@ -231,7 +231,7 @@ module OpenTox
# Attaches transformations to an OpenTox::Model
# Stores props, sims, performs similarity calculations
class ModelTransformer
- attr_accessor :model, :similarity_algorithm, :activities, :sims
+ attr_accessor :model, :similarity_algorithm, :activities, :sims, :n_prop, :q_prop
# @params[OpenTox::Model] model Model to transform
def initialize model
@@ -282,6 +282,7 @@ module OpenTox
@ids = [] # surviving compounds become neighbors
@sims = [] # calculated by neighbor routine
+=begin
neighbors
n_prop_tmp = []; @ids.each { |idx| n_prop_tmp << @n_prop[idx] }; @n_prop = n_prop_tmp # select neighbors from matrix
acts_tmp = []; @ids.each { |idx| acts_tmp << @activities[idx] }; @activities = acts_tmp
@@ -315,6 +316,7 @@ module OpenTox
$logger.debug "Sims: #{@sims.size}, Acts: #{@activities.size}"
@sims = [ gram_matrix, @sims ]
+=end
end
@@ -393,7 +395,8 @@ module OpenTox
# @param[Array] A propositionalized data entry
# @return[Float] Similarity to query structure
def similarity(training_props)
- OpenTox::Algorithm::Similarity.send(@model.similarity_algorithm,training_props, @q_prop)
+ eval("#{@model.similarity_algorithm}(#{training_props}, #{@q_prop})")
+ #OpenTox::Algorithm::Similarity.send(@model.similarity_algorithm,training_props, @q_prop)
end