From 5648e31134953703429e38dba0c22392b50092a9 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sat, 1 Aug 2015 19:37:10 +0200 Subject: more timer details --- lib/bbrc.rb | 15 +++------------ lib/lazar.rb | 33 +++++++++++++++++++++++---------- lib/opentox-algorithm.rb | 1 + lib/similarity.rb | 21 +++++++++++++-------- 4 files changed, 40 insertions(+), 30 deletions(-) diff --git a/lib/bbrc.rb b/lib/bbrc.rb index 6b0eb26..6e3af3a 100644 --- a/lib/bbrc.rb +++ b/lib/bbrc.rb @@ -34,11 +34,6 @@ module OpenTox minfreq = minfreq.round end - #@fminer=OpenTox::Algorithm::Fminer.new - #@fminer.check_params(dataset,params,5) - #p @fminer.instance_variables - - @bbrc = Bbrc::Bbrc.new @bbrc.Reset if prediction_feature.numeric @@ -47,8 +42,7 @@ module OpenTox bad_request_error "No accept values for "\ "dataset '#{training_dataset.id}' and "\ "feature '#{prediction_feature.id}'" unless prediction_feature.accept_values - act2value = prediction_feature.accept_values.each_index.inject({}) { |h,idx| h[idx+1]=prediction_feature.accept_values[idx]; h } - value2act = act2value.invert + value2act = Hash[[*prediction_feature.accept_values.map.with_index]] end @bbrc.SetMinfreq(minfreq) @bbrc.SetType(1) if params[:feature_type] == "paths" @@ -70,10 +64,7 @@ module OpenTox ) feature_dataset.compounds = training_dataset.compounds - $logger.debug "Setup: #{Time.now-time}" - time = Time.now - # Add data to fminer - #@fminer.add_fminer_data(@bbrc, value_map) + # add data training_dataset.compounds.each_with_index do |compound,i| @bbrc.AddCompound(compound.smiles,i+1) act = value2act[training_dataset.data_entries[i].first] @@ -84,7 +75,7 @@ module OpenTox #task.progress 10 #step_width = 80 / @bbrc.GetNoRootNodes().to_f - $logger.debug "Setup: #{Time.now-time}" + $logger.debug "BBRC setup: #{Time.now-time}" time = Time.now ftime = 0 itime = 0 diff --git a/lib/lazar.rb b/lib/lazar.rb index d9195ad..1e123d7 100644 --- a/lib/lazar.rb +++ b/lib/lazar.rb @@ -51,7 +51,8 @@ module OpenTox def predict object - time = Time.now + t = Time.now + at = Time.now @training_dataset = OpenTox::Dataset.find(training_dataset_id) @feature_dataset = OpenTox::Dataset.find(feature_dataset_id) @@ -68,17 +69,22 @@ module OpenTox bad_request_error "Please provide a OpenTox::Compound an Array of OpenTox::Compounds or an OpenTox::Dataset as parameter." end - $logger.debug "Setup: #{Time.now-time}" - time = Time.now + $logger.debug "Setup: #{Time.now-t}" + t = Time.now @query_fingerprint = Algorithm.run(feature_calculation_algorithm, compounds, @feature_dataset.features.collect{|f| f.name} ) - $logger.debug "Query fingerprint calculation: #{Time.now-time}" + $logger.debug "Query fingerprint calculation: #{Time.now-t}" + t = Time.now predictions = [] prediction_feature = OpenTox::Feature.find prediction_feature_id tt = 0 pt = 0 + nt = 0 + st = 0 + nit = 0 + @training_fingerprints ||= @feature_dataset.data_entries compounds.each_with_index do |compound,c| t = Time.new @@ -95,11 +101,11 @@ module OpenTox if prediction_algorithm =~ /Regression/ mtf = OpenTox::Algorithm::Transform::ModelTransformer.new(self) mtf.transform - training_fingerprints = mtf.n_prop + @training_fingerprints = mtf.n_prop query_fingerprint = mtf.q_prop neighbors = [[nil,nil,nil,query_fingerprint]] else - training_fingerprints = @feature_dataset.data_entries + #training_fingerprints = @feature_dataset.data_entries query_fingerprint = @query_fingerprint[c] neighbors = [] end @@ -108,22 +114,27 @@ module OpenTox # find neighbors - training_fingerprints.each_with_index do |fingerprint, i| + @training_fingerprints.each_with_index do |fingerprint, i| + ts = Time.new sim = Algorithm.run(similarity_algorithm,fingerprint, query_fingerprint) + st += Time.now-ts + ts = Time.new if sim > self.min_sim if prediction_algorithm =~ /Regression/ - neighbors << [@feature_dataset.compounds[i],sim,training_activities[i], fingerprint] + neighbors << [@feature_dataset.compound_ids[i],sim,training_activities[i], fingerprint] else - neighbors << [@feature_dataset.compounds[i],sim,training_activities[i]] + neighbors << [@feature_dataset.compound_ids[i],sim,training_activities[i]] # use compound_ids, instantiation of Compounds is too time consuming end end + nit += Time.now-ts end if neighbors.empty? predictions << {:compound => compound, :value => nil, :confidence => nil, :warning => "No neighbors with similarity > #{min_sim} in dataset #{training_dataset.id}"} - #$logger.warn "No neighbors found for compound #{compound}." next end + nt += Time.now-t + t = Time.new if prediction_algorithm =~ /Regression/ prediction = Algorithm.run(prediction_algorithm, neighbors, :min_train_performance => self.min_train_performance) @@ -145,7 +156,9 @@ module OpenTox end $logger.debug "Transform time: #{tt}" + $logger.debug "Neighbor search time: #{nt} (Similarity calculation: #{st}, Neighbor insert: #{nit})" $logger.debug "Prediction time: #{pt}" + $logger.debug "Total prediction time: #{Time.now-at}" # serialize result case object.class.to_s diff --git a/lib/opentox-algorithm.rb b/lib/opentox-algorithm.rb index 1764b47..790803b 100644 --- a/lib/opentox-algorithm.rb +++ b/lib/opentox-algorithm.rb @@ -15,6 +15,7 @@ require_relative '../last-utils/lu.rb' #Dir[File.join(File.dirname(__FILE__),"*.rb")].each{ |f| require_relative f} require_relative "algorithm.rb" require_relative "descriptor.rb" +require_relative "bbrc.rb" #require_relative "fminer.rb" require_relative "lazar.rb" require_relative "transform.rb" diff --git a/lib/similarity.rb b/lib/similarity.rb index 78783d5..934c4b0 100644 --- a/lib/similarity.rb +++ b/lib/similarity.rb @@ -10,22 +10,27 @@ module OpenTox class Similarity + #TODO weighted tanimoto + # Tanimoto similarity # @param [Array] a fingerprints of first compound # @param [Array] b fingerprints of second compound # @return [Float] Tanimoto similarity def self.tanimoto(a,b) - #a = fingerprints.first - #b = fingerprints.last + bad_request_error "fingerprints #{a} and #{b} don't have equal size" unless a.size == b.size + #common = 0.0 + #a.each_with_index do |n,i| + #common += 1 if n == b[i] + #end + #common/a.size + # TODO check if calculation is correct common_p_sum = 0.0 all_p_sum = 0.0 - size = [ a.size, b.size ].min - $logger.warn "fingerprints don't have equal size" if a.size != b.size - (0...size).each { |idx| - common_p_sum += [ a[idx].to_f, b[idx].to_f ].min - all_p_sum += [ a[idx].to_f, b[idx].to_f ].max + (0...a.size).each { |idx| + common_p_sum += [ a[idx], b[idx] ].min + all_p_sum += [ a[idx], b[idx] ].max } - (all_p_sum > 0.0) ? (common_p_sum/all_p_sum) : 0.0 + common_p_sum/all_p_sum end -- cgit v1.2.3