From 1f789133d961c29d3babfaf69cdde3d675288537 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sat, 24 Aug 2019 14:44:52 +0200 Subject: initial refactored version for mutagenicity paper --- lib/statistics.rb | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 lib/statistics.rb (limited to 'lib/statistics.rb') diff --git a/lib/statistics.rb b/lib/statistics.rb new file mode 100644 index 0000000..15ea416 --- /dev/null +++ b/lib/statistics.rb @@ -0,0 +1,71 @@ +class ClassificationStatistics + + def initialize dir + @dir = dir + @folds = Dir[File.join(@dir,"[0-9]*")] + @confusion_matrix_dir = File.join(@dir,"confusion_matrices") + @summaries_dir = File.join(@dir,"summaries") + end + + def confusion_matrix + + confusion_matrices = { + :all => {:tp => 0, :fp => 0, :tn => 0, :fn => 0}, + :high_confidence => {:tp => 0, :fp => 0, :tn => 0, :fn => 0}, + :low_confidence => {:tp => 0, :fp => 0, :tn => 0, :fn => 0}, + } + + @folds.each do |dir| + test_dir = File.join(dir,"test") + classifications = File.readlines(File.join(test_dir,"classification")).collect{|row| row.chomp.split(",")} + measurements = File.readlines(File.join(test_dir,"dependent_variables")).collect{|v| v.to_i} + similarity_thresholds = File.readlines(File.join(dir,"train","similarity_thresholds")).collect{|v| v.chomp.to_f} + classifications.each_with_index do |c,i| + prediction = c[1] + max_sim = c[4].to_f + unless prediction.empty? + prediction = prediction.to_i + if prediction == 1 and measurements[i] == 1 + confusion_matrices[:all][:tp] +=1 + max_sim > similarity_thresholds[1] ? confusion_matrices[:high_confidence][:tp] +=1 : confusion_matrices[:low_confidence][:tp] +=1 + elsif prediction == 0 and measurements[i] == 0 + confusion_matrices[:all][:tn] +=1 + max_sim > similarity_thresholds[1] ? confusion_matrices[:high_confidence][:tn] +=1 : confusion_matrices[:low_confidence][:tn] +=1 + elsif prediction == 1 and measurements[i] == 0 + confusion_matrices[:all][:fp] +=1 + max_sim > similarity_thresholds[1] ? confusion_matrices[:high_confidence][:fp] +=1 : confusion_matrices[:low_confidence][:fp] +=1 + elsif prediction == 0 and measurements[i] == 1 + confusion_matrices[:all][:fn] +=1 + max_sim > similarity_thresholds[1] ? confusion_matrices[:high_confidence][:fn] +=1 : confusion_matrices[:low_confidence][:fn] +=1 + end + end + end + FileUtils.mkdir_p @confusion_matrix_dir + confusion_matrices.each do |t,m| + File.open(File.join(@confusion_matrix_dir,t.to_s),"w+"){ |f| f.puts "#{m[:tp]},#{m[:fp]}\n#{m[:fn]},#{m[:tn]}" } + end + end + + end + + def summary + [:all,:high_confidence,:low_confidence].each do |cat| + confusion_matrix_file = File.join(@confusion_matrix_dir,cat.to_s) + confusion_matrix unless File.exists? confusion_matrix_file + matrix = File.readlines(confusion_matrix_file).collect{|row| row.chomp.split(",").collect{|v| v.to_f}} + tp = matrix[0][0] + fp = matrix[0][1] + fn = matrix[1][0] + tn = matrix[1][1] + FileUtils.mkdir_p @summaries_dir + File.open(File.join(@summaries_dir,cat.to_s),"w+") do |f| + f.puts "accuracy,#{(tp+tn)/(tp+fp+tn+fn)}" + f.puts "true_positive_rate,#{tp/(tp+fn)}" + f.puts "true_negative_rate,#{tn/(tn+fp)}" + f.puts "positive_predictive_value,#{tp/(tp+fp)}" + f.puts "negative_predictive_value,#{tn/(tn+fn)}" + end + end + end +end + -- cgit v1.2.3