summaryrefslogtreecommitdiff
path: root/lib/lazar.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/lazar.rb')
-rw-r--r--lib/lazar.rb117
1 files changed, 68 insertions, 49 deletions
diff --git a/lib/lazar.rb b/lib/lazar.rb
index 19f8cdd..399f5c1 100644
--- a/lib/lazar.rb
+++ b/lib/lazar.rb
@@ -23,8 +23,8 @@ module OpenTox
field :training_dataset_id, type: BSON::ObjectId
field :feature_dataset_id, type: BSON::ObjectId
# algorithms
- field :feature_generation, type: String
- field :feature_calculation_algorithm, type: String
+ #field :feature_generation, type: String
+ #field :feature_calculation_algorithm, type: String
field :prediction_algorithm, type: String
field :similarity_algorithm, type: String
# prediction features
@@ -34,7 +34,7 @@ module OpenTox
# parameters
field :nr_hits, type: Boolean
field :min_sim, type: Float
- field :propositionalized, type:Boolean
+ #field :propositionalized, type:Boolean
field :min_train_performance, type: Float
attr_accessor :prediction_dataset
@@ -54,7 +54,6 @@ module OpenTox
bad_request_error "No features found in feature dataset #{feature_dataset.id}." if feature_dataset.features.empty?
lazar.feature_dataset_id = feature_dataset.id
@training_dataset = training_dataset
- #@training_dataset = OpenTox::Dataset.find(feature_dataset.parameters.select{|p| p["title"] == "dataset_id"}.first["paramValue"])
bad_request_error "Training dataset compounds do not match feature dataset compounds. Please ensure that they are in the same order." unless @training_dataset.compounds == feature_dataset.compounds
lazar.training_dataset_id = @training_dataset.id
@@ -73,31 +72,26 @@ module OpenTox
lazar.prediction_algorithm = params[:prediction_algorithm]
end
- unless lazar.prediction_algorithm
- lazar.prediction_algorithm = "OpenTox::Algorithm::Classification.weighted_majority_vote" if prediction_feature.nominal
- lazar.prediction_algorithm = "OpenTox::Algorithm::Regression.local_svm_regression" if prediction_feature.numeric
+ unless lazar.prediction_algorithm # set defaults
+ # TODO consider params
+ if prediction_feature.nominal
+ lazar.prediction_algorithm = "OpenTox::Algorithm::Classification.weighted_majority_vote"
+ lazar.similarity_algorithm = "OpenTox::Algorithm::Similarity.tanimoto"
+ lazar.min_sim = 0.3 unless lazar.min_sim
+ elsif prediction_feature.numeric
+ lazar.prediction_algorithm = "OpenTox::Algorithm::Regression.local_svm_regression"
+ lazar.similarity_algorithm = "OpenTox::Algorithm::Similarity.cosine"
+ # cosine similartiy is default
+ lazar.min_sim = 0.7 unless lazar.min_sim
+ end
end
- lazar.prediction_algorithm =~ /majority_vote/ ? lazar.propositionalized = false : lazar.propositionalized = true
+ #lazar.prediction_algorithm =~ /majority_vote/ ? lazar.propositionalized = false : lazar.propositionalized = true
lazar.min_sim = params[:min_sim].to_f if params[:min_sim] and params[:min_sim].numeric?
+ # TODO: get info from training_dataset
lazar.nr_hits = nr_hits
- lazar.feature_generation = feature_dataset.training_algorithm
+ #lazar.feature_generation = feature_dataset.training_algorithm
#lazar.parameters << {"title" => "feature_generation_uri", "paramValue" => params[:feature_generation_uri]}
- if lazar.feature_generation =~ /fminer|bbrc|last/
- if lazar[:nr_hits]
- lazar.feature_calculation_algorithm = "OpenTox::Algorithm::Descriptor.smarts_count"
- else
- lazar.feature_calculation_algorithm = "OpenTox::Algorithm::Descriptor.smarts_match"
- end
- lazar.similarity_algorithm = "OpenTox::Algorithm::Similarity.tanimoto"
- lazar.min_sim = 0.3 unless lazar.min_sim
- elsif lazar.feature_generation =~/descriptor/ or lazar.feature_generation.nil?
- # cosine similartiy is default (e.g. used when no fetature_generation_uri is given and a feature_dataset_uri is provided instead)
- lazar.similarity_algorithm = "OpenTox::Algorithm::Similarity.cosine"
- lazar.min_sim = 0.7 unless lazar.min_sim
- else
- bad_request_error "unkown feature generation method #{lazar.feature_generation}"
- end
bad_request_error "Parameter min_train_performance is not numeric." if params[:min_train_performance] and !params[:min_train_performance].numeric?
lazar.min_train_performance = params[:min_train_performance].to_f if params[:min_train_performance] and params[:min_train_performance].numeric?
@@ -107,7 +101,7 @@ module OpenTox
lazar
end
- def predict params
+ def predict object
# tailored for performance
# all consistency checks should be done during model creation
@@ -131,20 +125,21 @@ module OpenTox
@feature_dataset = OpenTox::Dataset.find(feature_dataset_id)
compounds = []
- if params[:compound]
- compounds = [ params[:compound]]
- elsif params[:compounds]
- compounds = params[:compounds]
- elsif params[:dataset]
- compounds = params[:dataset].compounds
+ case object.class.to_s
+ when "OpenTox::Compound"
+ compounds = [object]
+ when "Array"
+ compounds = object
+ when "OpenTox::Dataset"
+ compounds = object.compounds
else
- bad_request_error "Please provide one of the parameters: :compound, :compounds, :dataset"
+ bad_request_error "Please provide a OpenTox::Compound an Array of OpenTox::Compounds or an OpenTox::Dataset as parameter."
end
$logger.debug "Setup: #{Time.now-time}"
time = Time.now
- @query_fingerprint = Algorithm.run(feature_calculation_algorithm, compounds, @feature_dataset.features.collect{|f| f.smarts} )
+ @query_fingerprint = Algorithm.run(feature_dataset.feature_calculation_algorithm, compounds, @feature_dataset.features.collect{|f| f.name} )
$logger.debug "Fingerprint calculation: #{Time.now-time}"
time = Time.now
@@ -166,35 +161,59 @@ module OpenTox
end
next
else
-
- # TODO reintroduce for regression
- #mtf = OpenTox::Algorithm::Transform::ModelTransformer.new(self)
- #mtf.transform
- #
+ t = Time.new
+
+ if prediction_algorithm =~ /Regression/
+ mtf = OpenTox::Algorithm::Transform::ModelTransformer.new(self)
+ mtf.transform
+ training_fingerprints = mtf.n_prop
+ training_activities = mtf.activities
+ p training_activities
+ query_fingerprint = mtf.q_prop
+ neighbors = [[nil,nil,nil,query_fingerprint]]
+ else
+ training_fingerprints = @feature_dataset.data_entries
+ # TODO fix for multi feature datasets
+ training_activities = @training_dataset.data_entries[i].first
+ query_fingerprint = @query_fingerprint[c]
+ neighbors = []
+ end
+ $logger.debug "Transform: #{Time.now-t}"
+ t = Time.new
+
# find neighbors
- neighbors = []
- @feature_dataset.data_entries.each_with_index do |fingerprint, i|
-
- sim = Algorithm.run(similarity_algorithm,fingerprint, @query_fingerprint[c])
- # TODO fix for multi feature datasets
- neighbors << [@feature_dataset.compounds[i],@training_dataset.data_entries[i].first,sim] if sim > self.min_sim
+ training_fingerprints.each_with_index do |fingerprint, i|
+
+ sim = Algorithm.run(similarity_algorithm,fingerprint, query_fingerprint)
+ if sim > self.min_sim
+ if prediction_algorithm =~ /Regression/
+ neighbors << [@feature_dataset.compounds[i],sim,training_activities[i], fingerprint]
+ else
+ neighbors << [@feature_dataset.compounds[i],sim,training_activities[i]]
+ end
+ end
end
- prediction = Algorithm.run(prediction_algorithm, neighbors)
+ if prediction_algorithm =~ /Regression/
+ prediction = Algorithm.run(prediction_algorithm, neighbors, :min_train_performance => self.min_train_performance)
+ else
+ prediction = Algorithm.run(prediction_algorithm, neighbors)
+ end
$logger.debug "Prediction time: #{Time.now-time}"
time = Time.now
+ p prediction
# AM: transform to original space (TODO)
- confidence_value = ((confidence_value+1.0)/2.0).abs if similarity_algorithm =~ /cosine/
+ confidence_value = ((confidence_value+1.0)/2.0).abs if prediction.first and similarity_algorithm =~ /cosine/
- $logger.debug "predicted value: #{prediction[:prediction]}, confidence: #{prediction[:confidence]}"
+ $logger.debug "predicted value: #{prediction[0]}, confidence: #{prediction[1]}"
end
prediction_dataset.compound_ids << compound
- prediction_dataset[c,0] = prediction[:prediction]
- prediction_dataset[c,1] = prediction[:confidence]
+ prediction_dataset[c,0] = prediction[0]
+ prediction_dataset[c,1] = prediction[1]
end
prediction_dataset