diff options
author | Christoph Helma <helma@in-silico.ch> | 2016-04-21 15:06:10 +0200 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2016-04-21 15:06:10 +0200 |
commit | 6890687df6de0a6eaa3d35c6be66639614ea2ef2 (patch) | |
tree | c863c5857d8716d35eb3a22efd3da24c5f27b968 /lib | |
parent | 75b70425ae8699464a18529eb7bf35a216c06243 (diff) | |
parent | 4ebd80fee52c04bd36781f846eae60019918345d (diff) |
probabilities branch merged
Diffstat (limited to 'lib')
-rw-r--r-- | lib/classification.rb | 38 | ||||
-rw-r--r-- | lib/crossvalidation.rb | 2 | ||||
-rw-r--r-- | lib/validation-statistics.rb | 13 |
3 files changed, 26 insertions, 27 deletions
diff --git a/lib/classification.rb b/lib/classification.rb index 0de8726..93b4f0f 100644 --- a/lib/classification.rb +++ b/lib/classification.rb @@ -5,29 +5,27 @@ module OpenTox def self.weighted_majority_vote compound, params neighbors = params[:neighbors] - weighted_sum = {} - sim_sum = 0.0 - confidence = 0.0 - # see ~/src/pubchem-read-across/application.rb:353 - neighbors.each do |row| - sim = row["tanimoto"] - row["toxicities"][params[:prediction_feature_id].to_s].each do |act| - weighted_sum[act] ||= 0 - weighted_sum[act] += sim + feature_id = params[:prediction_feature_id].to_s + sims = {} + neighbors.each do |n| + sim = n["tanimoto"] + n["toxicities"][feature_id].each do |act| + sims[act] ||= [] + sims[act] << sim end end - case weighted_sum.size - when 1 - return {:value => weighted_sum.keys.first, :confidence => weighted_sum.values.first/neighbors.size.abs} - when 2 - sim_sum = weighted_sum[weighted_sum.keys[0]] - sim_sum -= weighted_sum[weighted_sum.keys[1]] - sim_sum > 0 ? prediction = weighted_sum.keys[0] : prediction = weighted_sum.keys[1] - confidence = (sim_sum/neighbors.size).abs - return {:value => prediction,:confidence => confidence} - else - bad_request_error "Cannot predict more than 2 classes, multinomial classifications is not yet implemented. Received classes were: '#{weighted.sum.keys}'" + sim_all = sims.collect{|a,s| s}.flatten + sim_sum = sim_all.sum + sim_max = sim_all.max + probabilities = {} + sims.each do |a,s| + probabilities[a] = s.sum/sim_sum end + probabilities = probabilities.collect{|a,p| [a,sim_max*p]}.to_h + p_max = probabilities.collect{|a,p| p}.max + prediction = probabilities.key(p_max) + {:value => prediction,:probabilities => probabilities} + end end end diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb index 752d393..50afb6f 100644 --- a/lib/crossvalidation.rb +++ b/lib/crossvalidation.rb @@ -54,7 +54,7 @@ module OpenTox cv.update_attributes( nr_instances: nr_instances, nr_unpredicted: nr_unpredicted, - predictions: predictions#.sort{|a,b| b[3] <=> a[3]} # sort according to confidence + predictions: predictions ) $logger.debug "Nr unpredicted: #{nr_unpredicted}" cv.statistics diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb index 570b2d4..c6b2a07 100644 --- a/lib/validation-statistics.rb +++ b/lib/validation-statistics.rb @@ -15,21 +15,21 @@ module OpenTox if pred[:value] == m if pred[:value] == accept_values[0] confusion_matrix[0][0] += 1 - weighted_confusion_matrix[0][0] += pred[:confidence] + weighted_confusion_matrix[0][0] += pred[:probabilities][pred[:value]] nr_instances += 1 elsif pred[:value] == accept_values[1] confusion_matrix[1][1] += 1 - weighted_confusion_matrix[1][1] += pred[:confidence] + weighted_confusion_matrix[1][1] += pred[:probabilities][pred[:value]] nr_instances += 1 end elsif pred[:value] != m if pred[:value] == accept_values[0] confusion_matrix[0][1] += 1 - weighted_confusion_matrix[0][1] += pred[:confidence] + weighted_confusion_matrix[0][1] += pred[:probabilities][pred[:value]] nr_instances += 1 elsif pred[:value] == accept_values[1] confusion_matrix[1][0] += 1 - weighted_confusion_matrix[1][0] += pred[:confidence] + weighted_confusion_matrix[1][0] += pred[:probabilities][pred[:value]] nr_instances += 1 end end @@ -47,14 +47,15 @@ module OpenTox confidence_sum += c end end - accuracy = (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f + accuracy = (confusion_matrix[0][0]+confusion_matrix[1][1])/nr_instances.to_f + weighted_accuracy = (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f $logger.debug "Accuracy #{accuracy}" { :accept_values => accept_values, :confusion_matrix => confusion_matrix, :weighted_confusion_matrix => weighted_confusion_matrix, :accuracy => accuracy, - :weighted_accuracy => (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f, + :weighted_accuracy => weighted_accuracy, :true_rate => true_rate, :predictivity => predictivity, :finished_at => Time.now |