diff options
Diffstat (limited to 'lib/lazar.rb')
-rw-r--r-- | lib/lazar.rb | 117 |
1 files changed, 68 insertions, 49 deletions
diff --git a/lib/lazar.rb b/lib/lazar.rb index 19f8cdd..399f5c1 100644 --- a/lib/lazar.rb +++ b/lib/lazar.rb @@ -23,8 +23,8 @@ module OpenTox field :training_dataset_id, type: BSON::ObjectId field :feature_dataset_id, type: BSON::ObjectId # algorithms - field :feature_generation, type: String - field :feature_calculation_algorithm, type: String + #field :feature_generation, type: String + #field :feature_calculation_algorithm, type: String field :prediction_algorithm, type: String field :similarity_algorithm, type: String # prediction features @@ -34,7 +34,7 @@ module OpenTox # parameters field :nr_hits, type: Boolean field :min_sim, type: Float - field :propositionalized, type:Boolean + #field :propositionalized, type:Boolean field :min_train_performance, type: Float attr_accessor :prediction_dataset @@ -54,7 +54,6 @@ module OpenTox bad_request_error "No features found in feature dataset #{feature_dataset.id}." if feature_dataset.features.empty? lazar.feature_dataset_id = feature_dataset.id @training_dataset = training_dataset - #@training_dataset = OpenTox::Dataset.find(feature_dataset.parameters.select{|p| p["title"] == "dataset_id"}.first["paramValue"]) bad_request_error "Training dataset compounds do not match feature dataset compounds. Please ensure that they are in the same order." unless @training_dataset.compounds == feature_dataset.compounds lazar.training_dataset_id = @training_dataset.id @@ -73,31 +72,26 @@ module OpenTox lazar.prediction_algorithm = params[:prediction_algorithm] end - unless lazar.prediction_algorithm - lazar.prediction_algorithm = "OpenTox::Algorithm::Classification.weighted_majority_vote" if prediction_feature.nominal - lazar.prediction_algorithm = "OpenTox::Algorithm::Regression.local_svm_regression" if prediction_feature.numeric + unless lazar.prediction_algorithm # set defaults + # TODO consider params + if prediction_feature.nominal + lazar.prediction_algorithm = "OpenTox::Algorithm::Classification.weighted_majority_vote" + lazar.similarity_algorithm = "OpenTox::Algorithm::Similarity.tanimoto" + lazar.min_sim = 0.3 unless lazar.min_sim + elsif prediction_feature.numeric + lazar.prediction_algorithm = "OpenTox::Algorithm::Regression.local_svm_regression" + lazar.similarity_algorithm = "OpenTox::Algorithm::Similarity.cosine" + # cosine similartiy is default + lazar.min_sim = 0.7 unless lazar.min_sim + end end - lazar.prediction_algorithm =~ /majority_vote/ ? lazar.propositionalized = false : lazar.propositionalized = true + #lazar.prediction_algorithm =~ /majority_vote/ ? lazar.propositionalized = false : lazar.propositionalized = true lazar.min_sim = params[:min_sim].to_f if params[:min_sim] and params[:min_sim].numeric? + # TODO: get info from training_dataset lazar.nr_hits = nr_hits - lazar.feature_generation = feature_dataset.training_algorithm + #lazar.feature_generation = feature_dataset.training_algorithm #lazar.parameters << {"title" => "feature_generation_uri", "paramValue" => params[:feature_generation_uri]} - if lazar.feature_generation =~ /fminer|bbrc|last/ - if lazar[:nr_hits] - lazar.feature_calculation_algorithm = "OpenTox::Algorithm::Descriptor.smarts_count" - else - lazar.feature_calculation_algorithm = "OpenTox::Algorithm::Descriptor.smarts_match" - end - lazar.similarity_algorithm = "OpenTox::Algorithm::Similarity.tanimoto" - lazar.min_sim = 0.3 unless lazar.min_sim - elsif lazar.feature_generation =~/descriptor/ or lazar.feature_generation.nil? - # cosine similartiy is default (e.g. used when no fetature_generation_uri is given and a feature_dataset_uri is provided instead) - lazar.similarity_algorithm = "OpenTox::Algorithm::Similarity.cosine" - lazar.min_sim = 0.7 unless lazar.min_sim - else - bad_request_error "unkown feature generation method #{lazar.feature_generation}" - end bad_request_error "Parameter min_train_performance is not numeric." if params[:min_train_performance] and !params[:min_train_performance].numeric? lazar.min_train_performance = params[:min_train_performance].to_f if params[:min_train_performance] and params[:min_train_performance].numeric? @@ -107,7 +101,7 @@ module OpenTox lazar end - def predict params + def predict object # tailored for performance # all consistency checks should be done during model creation @@ -131,20 +125,21 @@ module OpenTox @feature_dataset = OpenTox::Dataset.find(feature_dataset_id) compounds = [] - if params[:compound] - compounds = [ params[:compound]] - elsif params[:compounds] - compounds = params[:compounds] - elsif params[:dataset] - compounds = params[:dataset].compounds + case object.class.to_s + when "OpenTox::Compound" + compounds = [object] + when "Array" + compounds = object + when "OpenTox::Dataset" + compounds = object.compounds else - bad_request_error "Please provide one of the parameters: :compound, :compounds, :dataset" + bad_request_error "Please provide a OpenTox::Compound an Array of OpenTox::Compounds or an OpenTox::Dataset as parameter." end $logger.debug "Setup: #{Time.now-time}" time = Time.now - @query_fingerprint = Algorithm.run(feature_calculation_algorithm, compounds, @feature_dataset.features.collect{|f| f.smarts} ) + @query_fingerprint = Algorithm.run(feature_dataset.feature_calculation_algorithm, compounds, @feature_dataset.features.collect{|f| f.name} ) $logger.debug "Fingerprint calculation: #{Time.now-time}" time = Time.now @@ -166,35 +161,59 @@ module OpenTox end next else - - # TODO reintroduce for regression - #mtf = OpenTox::Algorithm::Transform::ModelTransformer.new(self) - #mtf.transform - # + t = Time.new + + if prediction_algorithm =~ /Regression/ + mtf = OpenTox::Algorithm::Transform::ModelTransformer.new(self) + mtf.transform + training_fingerprints = mtf.n_prop + training_activities = mtf.activities + p training_activities + query_fingerprint = mtf.q_prop + neighbors = [[nil,nil,nil,query_fingerprint]] + else + training_fingerprints = @feature_dataset.data_entries + # TODO fix for multi feature datasets + training_activities = @training_dataset.data_entries[i].first + query_fingerprint = @query_fingerprint[c] + neighbors = [] + end + $logger.debug "Transform: #{Time.now-t}" + t = Time.new + # find neighbors - neighbors = [] - @feature_dataset.data_entries.each_with_index do |fingerprint, i| - - sim = Algorithm.run(similarity_algorithm,fingerprint, @query_fingerprint[c]) - # TODO fix for multi feature datasets - neighbors << [@feature_dataset.compounds[i],@training_dataset.data_entries[i].first,sim] if sim > self.min_sim + training_fingerprints.each_with_index do |fingerprint, i| + + sim = Algorithm.run(similarity_algorithm,fingerprint, query_fingerprint) + if sim > self.min_sim + if prediction_algorithm =~ /Regression/ + neighbors << [@feature_dataset.compounds[i],sim,training_activities[i], fingerprint] + else + neighbors << [@feature_dataset.compounds[i],sim,training_activities[i]] + end + end end - prediction = Algorithm.run(prediction_algorithm, neighbors) + if prediction_algorithm =~ /Regression/ + prediction = Algorithm.run(prediction_algorithm, neighbors, :min_train_performance => self.min_train_performance) + else + prediction = Algorithm.run(prediction_algorithm, neighbors) + end $logger.debug "Prediction time: #{Time.now-time}" time = Time.now + p prediction # AM: transform to original space (TODO) - confidence_value = ((confidence_value+1.0)/2.0).abs if similarity_algorithm =~ /cosine/ + confidence_value = ((confidence_value+1.0)/2.0).abs if prediction.first and similarity_algorithm =~ /cosine/ - $logger.debug "predicted value: #{prediction[:prediction]}, confidence: #{prediction[:confidence]}" + $logger.debug "predicted value: #{prediction[0]}, confidence: #{prediction[1]}" end prediction_dataset.compound_ids << compound - prediction_dataset[c,0] = prediction[:prediction] - prediction_dataset[c,1] = prediction[:confidence] + prediction_dataset[c,0] = prediction[0] + prediction_dataset[c,1] = prediction[1] end prediction_dataset |