summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2015-08-01 19:37:10 +0200
committerChristoph Helma <helma@in-silico.ch>2015-08-01 19:37:10 +0200
commit5648e31134953703429e38dba0c22392b50092a9 (patch)
tree956ebb94b6471878b80221a1c144338c81519a29
parentcdc0e4dd01c4b8da3a43e7d6b49a7e09a9881d63 (diff)
more timer details
-rw-r--r--lib/bbrc.rb15
-rw-r--r--lib/lazar.rb33
-rw-r--r--lib/opentox-algorithm.rb1
-rw-r--r--lib/similarity.rb21
4 files changed, 40 insertions, 30 deletions
diff --git a/lib/bbrc.rb b/lib/bbrc.rb
index 6b0eb26..6e3af3a 100644
--- a/lib/bbrc.rb
+++ b/lib/bbrc.rb
@@ -34,11 +34,6 @@ module OpenTox
minfreq = minfreq.round
end
- #@fminer=OpenTox::Algorithm::Fminer.new
- #@fminer.check_params(dataset,params,5)
- #p @fminer.instance_variables
-
-
@bbrc = Bbrc::Bbrc.new
@bbrc.Reset
if prediction_feature.numeric
@@ -47,8 +42,7 @@ module OpenTox
bad_request_error "No accept values for "\
"dataset '#{training_dataset.id}' and "\
"feature '#{prediction_feature.id}'" unless prediction_feature.accept_values
- act2value = prediction_feature.accept_values.each_index.inject({}) { |h,idx| h[idx+1]=prediction_feature.accept_values[idx]; h }
- value2act = act2value.invert
+ value2act = Hash[[*prediction_feature.accept_values.map.with_index]]
end
@bbrc.SetMinfreq(minfreq)
@bbrc.SetType(1) if params[:feature_type] == "paths"
@@ -70,10 +64,7 @@ module OpenTox
)
feature_dataset.compounds = training_dataset.compounds
- $logger.debug "Setup: #{Time.now-time}"
- time = Time.now
- # Add data to fminer
- #@fminer.add_fminer_data(@bbrc, value_map)
+ # add data
training_dataset.compounds.each_with_index do |compound,i|
@bbrc.AddCompound(compound.smiles,i+1)
act = value2act[training_dataset.data_entries[i].first]
@@ -84,7 +75,7 @@ module OpenTox
#task.progress 10
#step_width = 80 / @bbrc.GetNoRootNodes().to_f
- $logger.debug "Setup: #{Time.now-time}"
+ $logger.debug "BBRC setup: #{Time.now-time}"
time = Time.now
ftime = 0
itime = 0
diff --git a/lib/lazar.rb b/lib/lazar.rb
index d9195ad..1e123d7 100644
--- a/lib/lazar.rb
+++ b/lib/lazar.rb
@@ -51,7 +51,8 @@ module OpenTox
def predict object
- time = Time.now
+ t = Time.now
+ at = Time.now
@training_dataset = OpenTox::Dataset.find(training_dataset_id)
@feature_dataset = OpenTox::Dataset.find(feature_dataset_id)
@@ -68,17 +69,22 @@ module OpenTox
bad_request_error "Please provide a OpenTox::Compound an Array of OpenTox::Compounds or an OpenTox::Dataset as parameter."
end
- $logger.debug "Setup: #{Time.now-time}"
- time = Time.now
+ $logger.debug "Setup: #{Time.now-t}"
+ t = Time.now
@query_fingerprint = Algorithm.run(feature_calculation_algorithm, compounds, @feature_dataset.features.collect{|f| f.name} )
- $logger.debug "Query fingerprint calculation: #{Time.now-time}"
+ $logger.debug "Query fingerprint calculation: #{Time.now-t}"
+ t = Time.now
predictions = []
prediction_feature = OpenTox::Feature.find prediction_feature_id
tt = 0
pt = 0
+ nt = 0
+ st = 0
+ nit = 0
+ @training_fingerprints ||= @feature_dataset.data_entries
compounds.each_with_index do |compound,c|
t = Time.new
@@ -95,11 +101,11 @@ module OpenTox
if prediction_algorithm =~ /Regression/
mtf = OpenTox::Algorithm::Transform::ModelTransformer.new(self)
mtf.transform
- training_fingerprints = mtf.n_prop
+ @training_fingerprints = mtf.n_prop
query_fingerprint = mtf.q_prop
neighbors = [[nil,nil,nil,query_fingerprint]]
else
- training_fingerprints = @feature_dataset.data_entries
+ #training_fingerprints = @feature_dataset.data_entries
query_fingerprint = @query_fingerprint[c]
neighbors = []
end
@@ -108,22 +114,27 @@ module OpenTox
# find neighbors
- training_fingerprints.each_with_index do |fingerprint, i|
+ @training_fingerprints.each_with_index do |fingerprint, i|
+ ts = Time.new
sim = Algorithm.run(similarity_algorithm,fingerprint, query_fingerprint)
+ st += Time.now-ts
+ ts = Time.new
if sim > self.min_sim
if prediction_algorithm =~ /Regression/
- neighbors << [@feature_dataset.compounds[i],sim,training_activities[i], fingerprint]
+ neighbors << [@feature_dataset.compound_ids[i],sim,training_activities[i], fingerprint]
else
- neighbors << [@feature_dataset.compounds[i],sim,training_activities[i]]
+ neighbors << [@feature_dataset.compound_ids[i],sim,training_activities[i]] # use compound_ids, instantiation of Compounds is too time consuming
end
end
+ nit += Time.now-ts
end
if neighbors.empty?
predictions << {:compound => compound, :value => nil, :confidence => nil, :warning => "No neighbors with similarity > #{min_sim} in dataset #{training_dataset.id}"}
- #$logger.warn "No neighbors found for compound #{compound}."
next
end
+ nt += Time.now-t
+ t = Time.new
if prediction_algorithm =~ /Regression/
prediction = Algorithm.run(prediction_algorithm, neighbors, :min_train_performance => self.min_train_performance)
@@ -145,7 +156,9 @@ module OpenTox
end
$logger.debug "Transform time: #{tt}"
+ $logger.debug "Neighbor search time: #{nt} (Similarity calculation: #{st}, Neighbor insert: #{nit})"
$logger.debug "Prediction time: #{pt}"
+ $logger.debug "Total prediction time: #{Time.now-at}"
# serialize result
case object.class.to_s
diff --git a/lib/opentox-algorithm.rb b/lib/opentox-algorithm.rb
index 1764b47..790803b 100644
--- a/lib/opentox-algorithm.rb
+++ b/lib/opentox-algorithm.rb
@@ -15,6 +15,7 @@ require_relative '../last-utils/lu.rb'
#Dir[File.join(File.dirname(__FILE__),"*.rb")].each{ |f| require_relative f}
require_relative "algorithm.rb"
require_relative "descriptor.rb"
+require_relative "bbrc.rb"
#require_relative "fminer.rb"
require_relative "lazar.rb"
require_relative "transform.rb"
diff --git a/lib/similarity.rb b/lib/similarity.rb
index 78783d5..934c4b0 100644
--- a/lib/similarity.rb
+++ b/lib/similarity.rb
@@ -10,22 +10,27 @@ module OpenTox
class Similarity
+ #TODO weighted tanimoto
+
# Tanimoto similarity
# @param [Array] a fingerprints of first compound
# @param [Array] b fingerprints of second compound
# @return [Float] Tanimoto similarity
def self.tanimoto(a,b)
- #a = fingerprints.first
- #b = fingerprints.last
+ bad_request_error "fingerprints #{a} and #{b} don't have equal size" unless a.size == b.size
+ #common = 0.0
+ #a.each_with_index do |n,i|
+ #common += 1 if n == b[i]
+ #end
+ #common/a.size
+ # TODO check if calculation is correct
common_p_sum = 0.0
all_p_sum = 0.0
- size = [ a.size, b.size ].min
- $logger.warn "fingerprints don't have equal size" if a.size != b.size
- (0...size).each { |idx|
- common_p_sum += [ a[idx].to_f, b[idx].to_f ].min
- all_p_sum += [ a[idx].to_f, b[idx].to_f ].max
+ (0...a.size).each { |idx|
+ common_p_sum += [ a[idx], b[idx] ].min
+ all_p_sum += [ a[idx], b[idx] ].max
}
- (all_p_sum > 0.0) ? (common_p_sum/all_p_sum) : 0.0
+ common_p_sum/all_p_sum
end