summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2015-07-29 17:10:49 +0200
committerChristoph Helma <helma@in-silico.ch>2015-07-29 17:10:49 +0200
commite55ed6f22e07ff36e74a8a5838ddb7e51524c89c (patch)
treea79d5e6c619ec64bf3f77ff050d272af92a7788b
parent81b30bad9f45d621014b18168f2ba1366c8e4912 (diff)
kazius lazar predictions working
-rw-r--r--lib/algorithm.rb4
-rw-r--r--lib/bbrc.rb8
-rw-r--r--lib/lazar.rb33
-rw-r--r--lib/similarity.rb6
4 files changed, 23 insertions, 28 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 1b97584..eda7588 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -2,9 +2,9 @@ module OpenTox
module Algorithm
- def self.run algorithm, arg1, arg2 #parameters
+ def self.run algorithm, object, parameters={}
klass,method = algorithm.split('.')
- Object.const_get(klass).send(method, arg1,arg2)
+ parameters.empty? ? Object.const_get(klass).send(method,object) : Object.const_get(klass).send(method,object, parameters)
end
end
diff --git a/lib/bbrc.rb b/lib/bbrc.rb
index cf6fa6c..595d712 100644
--- a/lib/bbrc.rb
+++ b/lib/bbrc.rb
@@ -40,14 +40,15 @@ module OpenTox
@bbrc.SetChisqSig(params[:min_chisq_significance].to_f) if params[:min_chisq_significance]
@bbrc.SetConsoleOut(false)
+ params[:nr_hits] ? nr_hits = params[:nr_hits] : nr_hits = false
feature_dataset = FminerDataset.new(
:training_dataset_id => dataset.id,
:training_algorithm => "#{self.to_s}.bbrc",
:training_feature_id => params[:prediction_feature].id ,
:training_parameters => {
:min_frequency => @fminer.minfreq,
- :nr_hits => (params[:nr_hits] == "true" ? "true" : "false"),
- :backbone => (params[:backbone] == "false" ? "false" : "true")
+ :nr_hits => nr_hits,
+ :backbone => (params[:backbone] == false ? false : true)
}
)
@@ -117,7 +118,8 @@ module OpenTox
it = Time.now
f.each do |id_count_hash|
id_count_hash.each do |id,count|
- feature_dataset[id-1, feature_dataset.feature_ids.size-1] = count.to_i
+ nr_hits ? count = count.to_i : count = 1
+ feature_dataset[id-1, feature_dataset.feature_ids.size-1] = count
end
end
itime += Time.now - it
diff --git a/lib/lazar.rb b/lib/lazar.rb
index 2c83f38..19f8cdd 100644
--- a/lib/lazar.rb
+++ b/lib/lazar.rb
@@ -13,7 +13,7 @@ module OpenTox
include OpenTox
include Mongoid::Document
include Mongoid::Timestamps
- store_in collection: "model"
+ store_in collection: "models"
field :title, type: String
field :description, type: String
@@ -74,8 +74,8 @@ module OpenTox
end
unless lazar.prediction_algorithm
- lazar.prediction_algorithm = :weighted_majority_vote if prediction_feature.nominal
- lazar.prediction_algorithm = :local_svm_regression if prediction_feature.numeric
+ lazar.prediction_algorithm = "OpenTox::Algorithm::Classification.weighted_majority_vote" if prediction_feature.nominal
+ lazar.prediction_algorithm = "OpenTox::Algorithm::Regression.local_svm_regression" if prediction_feature.numeric
end
lazar.prediction_algorithm =~ /majority_vote/ ? lazar.propositionalized = false : lazar.propositionalized = true
@@ -144,11 +144,7 @@ module OpenTox
$logger.debug "Setup: #{Time.now-time}"
time = Time.now
- # TODO: remove eval
- #p ("#{feature_calculation_algorithm}(#{compounds}, #{@feature_dataset.features.collect{|f| f.smarts}})")
- #@query_fingerprint = eval("#{feature_calculation_algorithm}(#{compounds}, #{@feature_dataset.features.collect{|f| f.smarts}})")
@query_fingerprint = Algorithm.run(feature_calculation_algorithm, compounds, @feature_dataset.features.collect{|f| f.smarts} )
- #p @query_fingerprint
$logger.debug "Fingerprint calculation: #{Time.now-time}"
time = Time.now
@@ -164,7 +160,9 @@ module OpenTox
if database_activities and !database_activities.empty?
database_activities.each do |database_activity|
$logger.debug "do not predict compound, it occurs in dataset with activity #{database_activity}"
- prediction_dataset << [compound, database_activity, nil]
+ prediction_dataset.compound_ids << compound.id
+ prediction_dataset[c,0] = database_activity
+ prediction_dataset[c,1] = nil
end
next
else
@@ -176,23 +174,16 @@ module OpenTox
# find neighbors
neighbors = []
- #@feature_dataset.data_entries.each_with_index do |fingerprint, i|
- @feature_dataset.compounds.each_with_index do |compound, i|
- #p compound
- #p @feature_dataset.features.size
- fingerprint = @feature_dataset.feature_values(compound)
- #fingerprint = @feature_dataset.features(compound)
- #p fingerprint
-
- sim = Algorithm.run(similarity_algorithm,[fingerprint, @query_fingerprint[c]])
+ @feature_dataset.data_entries.each_with_index do |fingerprint, i|
+
+ sim = Algorithm.run(similarity_algorithm,fingerprint, @query_fingerprint[c])
# TODO fix for multi feature datasets
neighbors << [@feature_dataset.compounds[i],@training_dataset.data_entries[i].first,sim] if sim > self.min_sim
end
- #p neighbors
prediction = Algorithm.run(prediction_algorithm, neighbors)
- $logger.debug "Prediction: #{Time.now-time}"
+ $logger.debug "Prediction time: #{Time.now-time}"
time = Time.now
# AM: transform to original space (TODO)
@@ -201,7 +192,9 @@ module OpenTox
$logger.debug "predicted value: #{prediction[:prediction]}, confidence: #{prediction[:confidence]}"
end
- prediction_dataset << [ compound, prediction[:prediction], prediction[:confidence] ]
+ prediction_dataset.compound_ids << compound
+ prediction_dataset[c,0] = prediction[:prediction]
+ prediction_dataset[c,1] = prediction[:confidence]
end
prediction_dataset
diff --git a/lib/similarity.rb b/lib/similarity.rb
index 59c86ff..78783d5 100644
--- a/lib/similarity.rb
+++ b/lib/similarity.rb
@@ -14,9 +14,9 @@ module OpenTox
# @param [Array] a fingerprints of first compound
# @param [Array] b fingerprints of second compound
# @return [Float] Tanimoto similarity
- def self.tanimoto(fingerprints)
- a = fingerprints.first
- b = fingerprints.last
+ def self.tanimoto(a,b)
+ #a = fingerprints.first
+ #b = fingerprints.last
common_p_sum = 0.0
all_p_sum = 0.0
size = [ a.size, b.size ].min