summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Maunz <andreas@maunz.de>2012-10-08 10:55:50 +0200
committerAndreas Maunz <andreas@maunz.de>2012-10-08 10:55:50 +0200
commit48211c3bc5dd2f3604ef08b7bb672b822525ef94 (patch)
treeffe8b1e7be36ab6f8ddac5a237a9be0429346230
parentbc6118dae334171cbaff4a61d4ae124052f74a59 (diff)
Fixed min_frequency calculation
-rw-r--r--lib/algorithm.rb2
-rw-r--r--lib/utils.rb15
2 files changed, 14 insertions, 3 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 72a87cf..ca57c50 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -96,7 +96,7 @@ module OpenTox
raise OpenTox::BadRequestError.new "Minimum frequency must be integer [n], or a percentage [n]pc, or a per-mil [n]pm , with n greater 0" if bad_request
end
if @minfreq.nil?
- @minfreq=OpenTox::Algorithm.min_frequency(@training_dataset,per_mil)
+ @minfreq=OpenTox::Algorithm.min_frequency(@training_dataset,@prediction_feature,per_mil)
LOGGER.debug "min_frequency #{@minfreq} (input was #{per_mil} per-mil)"
end
end
diff --git a/lib/utils.rb b/lib/utils.rb
index d34081b..324fc58 100644
--- a/lib/utils.rb
+++ b/lib/utils.rb
@@ -410,8 +410,19 @@ module OpenTox
# Minimum Frequency
# @param [Integer] per-mil value
# return [Integer] min-frequency
- def self.min_frequency(training_dataset,per_mil)
- minfreq = per_mil * training_dataset.compounds.size.to_f / 1000.0 # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST
+ def self.min_frequency(training_dataset,prediction_feature,per_mil)
+ which_row=training_dataset.compounds.inject({}) {|h,c| h[c]=0; h}
+ nr_labeled_cmpds=0
+ training_dataset.compounds.each_with_index { |cmpd, idx|
+ if ( training_dataset.data_entries[cmpd] and
+ training_dataset.data_entries[cmpd][prediction_feature.uri] )
+ unless training_dataset.data_entries[cmpd][prediction_feature.uri][which_row[cmpd]].nil?
+ nr_labeled_cmpds += 1
+ end
+ end
+ which_row[cmpd] += 1
+ }
+ minfreq = per_mil * nr_labeled_cmpds.to_f / 1000.0 # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST
minfreq = 2 unless minfreq > 2
Integer (minfreq)
end