From e55ed6f22e07ff36e74a8a5838ddb7e51524c89c Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 29 Jul 2015 17:10:49 +0200 Subject: kazius lazar predictions working --- lib/algorithm.rb | 4 ++-- lib/bbrc.rb | 8 +++++--- lib/lazar.rb | 33 +++++++++++++-------------------- lib/similarity.rb | 6 +++--- 4 files changed, 23 insertions(+), 28 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 1b97584..eda7588 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -2,9 +2,9 @@ module OpenTox module Algorithm - def self.run algorithm, arg1, arg2 #parameters + def self.run algorithm, object, parameters={} klass,method = algorithm.split('.') - Object.const_get(klass).send(method, arg1,arg2) + parameters.empty? ? Object.const_get(klass).send(method,object) : Object.const_get(klass).send(method,object, parameters) end end diff --git a/lib/bbrc.rb b/lib/bbrc.rb index cf6fa6c..595d712 100644 --- a/lib/bbrc.rb +++ b/lib/bbrc.rb @@ -40,14 +40,15 @@ module OpenTox @bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance] @bbrc.SetConsoleOut(false) + params[:nr_hits] ? nr_hits = params[:nr_hits] : nr_hits = false feature_dataset = FminerDataset.new( :training_dataset_id => dataset.id, :training_algorithm => "#{self.to_s}.bbrc", :training_feature_id => params[:prediction_feature].id , :training_parameters => { :min_frequency => @fminer.minfreq, - :nr_hits => (params[:nr_hits] == "true" ? "true" : "false"), - :backbone => (params[:backbone] == "false" ? "false" : "true") + :nr_hits => nr_hits, + :backbone => (params[:backbone] == false ? false : true) } ) @@ -117,7 +118,8 @@ module OpenTox it = Time.now f.each do |id_count_hash| id_count_hash.each do |id,count| - feature_dataset[id-1, feature_dataset.feature_ids.size-1] = count.to_i + nr_hits ? count = count.to_i : count = 1 + feature_dataset[id-1, feature_dataset.feature_ids.size-1] = count end end itime += Time.now - it diff --git a/lib/lazar.rb b/lib/lazar.rb index 2c83f38..19f8cdd 100644 --- a/lib/lazar.rb +++ b/lib/lazar.rb @@ -13,7 +13,7 @@ module OpenTox include OpenTox include Mongoid::Document include Mongoid::Timestamps - store_in collection: "model" + store_in collection: "models" field :title, type: String field :description, type: String @@ -74,8 +74,8 @@ module OpenTox end unless lazar.prediction_algorithm - lazar.prediction_algorithm = :weighted_majority_vote if prediction_feature.nominal - lazar.prediction_algorithm = :local_svm_regression if prediction_feature.numeric + lazar.prediction_algorithm = "OpenTox::Algorithm::Classification.weighted_majority_vote" if prediction_feature.nominal + lazar.prediction_algorithm = "OpenTox::Algorithm::Regression.local_svm_regression" if prediction_feature.numeric end lazar.prediction_algorithm =~ /majority_vote/ ? lazar.propositionalized = false : lazar.propositionalized = true @@ -144,11 +144,7 @@ module OpenTox $logger.debug "Setup: #{Time.now-time}" time = Time.now - # TODO: remove eval - #p ("#{feature_calculation_algorithm}(#{compounds}, #{@feature_dataset.features.collect{|f| f.smarts}})") - #@query_fingerprint = eval("#{feature_calculation_algorithm}(#{compounds}, #{@feature_dataset.features.collect{|f| f.smarts}})") @query_fingerprint = Algorithm.run(feature_calculation_algorithm, compounds, @feature_dataset.features.collect{|f| f.smarts} ) - #p @query_fingerprint $logger.debug "Fingerprint calculation: #{Time.now-time}" time = Time.now @@ -164,7 +160,9 @@ module OpenTox if database_activities and !database_activities.empty? database_activities.each do |database_activity| $logger.debug "do not predict compound, it occurs in dataset with activity #{database_activity}" - prediction_dataset << [compound, database_activity, nil] + prediction_dataset.compound_ids << compound.id + prediction_dataset[c,0] = database_activity + prediction_dataset[c,1] = nil end next else @@ -176,23 +174,16 @@ module OpenTox # find neighbors neighbors = [] - #@feature_dataset.data_entries.each_with_index do |fingerprint, i| - @feature_dataset.compounds.each_with_index do |compound, i| - #p compound - #p @feature_dataset.features.size - fingerprint = @feature_dataset.feature_values(compound) - #fingerprint = @feature_dataset.features(compound) - #p fingerprint - - sim = Algorithm.run(similarity_algorithm,[fingerprint, @query_fingerprint[c]]) + @feature_dataset.data_entries.each_with_index do |fingerprint, i| + + sim = Algorithm.run(similarity_algorithm,fingerprint, @query_fingerprint[c]) # TODO fix for multi feature datasets neighbors << [@feature_dataset.compounds[i],@training_dataset.data_entries[i].first,sim] if sim > self.min_sim end - #p neighbors prediction = Algorithm.run(prediction_algorithm, neighbors) - $logger.debug "Prediction: #{Time.now-time}" + $logger.debug "Prediction time: #{Time.now-time}" time = Time.now # AM: transform to original space (TODO) @@ -201,7 +192,9 @@ module OpenTox $logger.debug "predicted value: #{prediction[:prediction]}, confidence: #{prediction[:confidence]}" end - prediction_dataset << [ compound, prediction[:prediction], prediction[:confidence] ] + prediction_dataset.compound_ids << compound + prediction_dataset[c,0] = prediction[:prediction] + prediction_dataset[c,1] = prediction[:confidence] end prediction_dataset diff --git a/lib/similarity.rb b/lib/similarity.rb index 59c86ff..78783d5 100644 --- a/lib/similarity.rb +++ b/lib/similarity.rb @@ -14,9 +14,9 @@ module OpenTox # @param [Array] a fingerprints of first compound # @param [Array] b fingerprints of second compound # @return [Float] Tanimoto similarity - def self.tanimoto(fingerprints) - a = fingerprints.first - b = fingerprints.last + def self.tanimoto(a,b) + #a = fingerprints.first + #b = fingerprints.last common_p_sum = 0.0 all_p_sum = 0.0 size = [ a.size, b.size ].min -- cgit v1.2.3