diff options
author | Andreas Maunz <andreas@maunz.de> | 2012-10-08 10:55:50 +0200 |
---|---|---|
committer | Andreas Maunz <andreas@maunz.de> | 2012-10-08 10:55:50 +0200 |
commit | 48211c3bc5dd2f3604ef08b7bb672b822525ef94 (patch) | |
tree | ffe8b1e7be36ab6f8ddac5a237a9be0429346230 | |
parent | bc6118dae334171cbaff4a61d4ae124052f74a59 (diff) |
Fixed min_frequency calculation
-rw-r--r-- | lib/algorithm.rb | 2 | ||||
-rw-r--r-- | lib/utils.rb | 15 |
2 files changed, 14 insertions, 3 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 72a87cf..ca57c50 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -96,7 +96,7 @@ module OpenTox raise OpenTox::BadRequestError.new "Minimum frequency must be integer [n], or a percentage [n]pc, or a per-mil [n]pm , with n greater 0" if bad_request end if @minfreq.nil? - @minfreq=OpenTox::Algorithm.min_frequency(@training_dataset,per_mil) + @minfreq=OpenTox::Algorithm.min_frequency(@training_dataset,@prediction_feature,per_mil) LOGGER.debug "min_frequency #{@minfreq} (input was #{per_mil} per-mil)" end end diff --git a/lib/utils.rb b/lib/utils.rb index d34081b..324fc58 100644 --- a/lib/utils.rb +++ b/lib/utils.rb @@ -410,8 +410,19 @@ module OpenTox # Minimum Frequency # @param [Integer] per-mil value # return [Integer] min-frequency - def self.min_frequency(training_dataset,per_mil) - minfreq = per_mil * training_dataset.compounds.size.to_f / 1000.0 # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST + def self.min_frequency(training_dataset,prediction_feature,per_mil) + which_row=training_dataset.compounds.inject({}) {|h,c| h[c]=0; h} + nr_labeled_cmpds=0 + training_dataset.compounds.each_with_index { |cmpd, idx| + if ( training_dataset.data_entries[cmpd] and + training_dataset.data_entries[cmpd][prediction_feature.uri] ) + unless training_dataset.data_entries[cmpd][prediction_feature.uri][which_row[cmpd]].nil? + nr_labeled_cmpds += 1 + end + end + which_row[cmpd] += 1 + } + minfreq = per_mil * nr_labeled_cmpds.to_f / 1000.0 # AM sugg. 8-10 per mil for BBRC, 50 per mil for LAST minfreq = 2 unless minfreq > 2 Integer (minfreq) end |