From 4ebd80fee52c04bd36781f846eae60019918345d Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 21 Apr 2016 14:29:23 +0200 Subject: initial classification probabilities --- lib/classification.rb | 38 +++++++++++++++++++------------------- lib/crossvalidation.rb | 2 +- lib/leave-one-out-validation.rb | 22 +++++++++++----------- test/setup.rb | 4 ++-- 4 files changed, 33 insertions(+), 33 deletions(-) diff --git a/lib/classification.rb b/lib/classification.rb index 0202940..b9b66f0 100644 --- a/lib/classification.rb +++ b/lib/classification.rb @@ -5,28 +5,28 @@ module OpenTox def self.weighted_majority_vote compound, params neighbors = params[:neighbors] - weighted_sum = {} - sim_sum = 0.0 - confidence = 0.0 - neighbors.each do |row| - sim = row["tanimoto"] - row["features"][params[:prediction_feature_id].to_s].each do |act| - weighted_sum[act] ||= 0 - weighted_sum[act] += sim + feature_id = params[:prediction_feature_id].to_s + sims = {} + neighbors.each do |n| + sim = n["tanimoto"] + n["features"][feature_id].each do |act| + sims[act] ||= [] + sims[act] << sim + #sims[act] << 0.5*sim+0.5 # scale to 1-0.5 end end - case weighted_sum.size - when 1 - return {:value => weighted_sum.keys.first, :confidence => weighted_sum.values.first/neighbors.size.abs} - when 2 - sim_sum = weighted_sum[weighted_sum.keys[0]] - sim_sum -= weighted_sum[weighted_sum.keys[1]] - sim_sum > 0 ? prediction = weighted_sum.keys[0] : prediction = weighted_sum.keys[1] - confidence = (sim_sum/neighbors.size).abs - return {:value => prediction,:confidence => confidence} - else - bad_request_error "Cannot predict more than 2 classes, multinomial classifications is not yet implemented. Received classes were: '#{weighted.sum.keys}'" + sim_all = sims.collect{|a,s| s}.flatten + sim_sum = sim_all.sum + sim_max = sim_all.max + probabilities = {} + sims.each do |a,s| + probabilities[a] = s.sum/sim_sum end + probabilities = probabilities.collect{|a,p| [a,sim_max*p]}.to_h + p_max = probabilities.collect{|a,p| p}.max + prediction = probabilities.key(p_max) + {:value => prediction,:probabilities => probabilities} + end end end diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb index 15dfb21..6ffeb25 100644 --- a/lib/crossvalidation.rb +++ b/lib/crossvalidation.rb @@ -52,7 +52,7 @@ module OpenTox cv.update_attributes( nr_instances: nr_instances, nr_unpredicted: nr_unpredicted, - predictions: predictions#.sort{|a,b| b[3] <=> a[3]} # sort according to confidence + predictions: predictions ) $logger.debug "Nr unpredicted: #{nr_unpredicted}" cv.statistics diff --git a/lib/leave-one-out-validation.rb b/lib/leave-one-out-validation.rb index 2cd13db..0a131a4 100644 --- a/lib/leave-one-out-validation.rb +++ b/lib/leave-one-out-validation.rb @@ -51,18 +51,18 @@ module OpenTox if pred[:value] == db_act if pred[:value] == accept_values[0] confusion_matrix[0][0] += 1 - weighted_confusion_matrix[0][0] += pred[:confidence] + #weighted_confusion_matrix[0][0] += pred[:confidence] elsif pred[:value] == accept_values[1] confusion_matrix[1][1] += 1 - weighted_confusion_matrix[1][1] += pred[:confidence] + #weighted_confusion_matrix[1][1] += pred[:confidence] end else if pred[:value] == accept_values[0] confusion_matrix[0][1] += 1 - weighted_confusion_matrix[0][1] += pred[:confidence] + #weighted_confusion_matrix[0][1] += pred[:confidence] elsif pred[:value] == accept_values[1] confusion_matrix[1][0] += 1 - weighted_confusion_matrix[1][0] += pred[:confidence] + #weighted_confusion_matrix[1][0] += pred[:confidence] end end end @@ -73,17 +73,17 @@ module OpenTox predictivity[v] = confusion_matrix[i][i]/confusion_matrix.collect{|n| n[i]}.reduce(:+).to_f end confidence_sum = 0 - weighted_confusion_matrix.each do |r| - r.each do |c| - confidence_sum += c - end - end +# weighted_confusion_matrix.each do |r| +# r.each do |c| +# confidence_sum += c +# end +# end update_attributes( accept_values: accept_values, confusion_matrix: confusion_matrix, - weighted_confusion_matrix: weighted_confusion_matrix, +# weighted_confusion_matrix: weighted_confusion_matrix, accuracy: (confusion_matrix[0][0]+confusion_matrix[1][1])/(nr_instances-nr_unpredicted).to_f, - weighted_accuracy: (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f, +# weighted_accuracy: (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f, true_rate: true_rate, predictivity: predictivity, finished_at: Time.now diff --git a/test/setup.rb b/test/setup.rb index be3140a..e7c32b4 100644 --- a/test/setup.rb +++ b/test/setup.rb @@ -1,7 +1,7 @@ ENV["LAZAR_ENV"] = "development" require 'minitest/autorun' -#require_relative '../lib/lazar.rb' -require 'lazar' +require_relative '../lib/lazar.rb' +#require 'lazar' include OpenTox TEST_DIR ||= File.expand_path(File.dirname(__FILE__)) DATA_DIR ||= File.join(TEST_DIR,"data") -- cgit v1.2.3