summaryrefslogtreecommitdiff
path: root/lib/statistics.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2019-08-24 14:44:52 +0200
committerChristoph Helma <helma@in-silico.ch>2019-08-24 14:44:52 +0200
commit1f789133d961c29d3babfaf69cdde3d675288537 (patch)
tree25fcae7c3aa4c86a8f69708393d720afd78ce711 /lib/statistics.rb
parentfaccda14c0f98333bf7623d4caef00eea7bb1933 (diff)
initial refactored version for mutagenicity paper
Diffstat (limited to 'lib/statistics.rb')
-rw-r--r--lib/statistics.rb71
1 files changed, 71 insertions, 0 deletions
diff --git a/lib/statistics.rb b/lib/statistics.rb
new file mode 100644
index 0000000..15ea416
--- /dev/null
+++ b/lib/statistics.rb
@@ -0,0 +1,71 @@
+class ClassificationStatistics
+
+ def initialize dir
+ @dir = dir
+ @folds = Dir[File.join(@dir,"[0-9]*")]
+ @confusion_matrix_dir = File.join(@dir,"confusion_matrices")
+ @summaries_dir = File.join(@dir,"summaries")
+ end
+
+ def confusion_matrix
+
+ confusion_matrices = {
+ :all => {:tp => 0, :fp => 0, :tn => 0, :fn => 0},
+ :high_confidence => {:tp => 0, :fp => 0, :tn => 0, :fn => 0},
+ :low_confidence => {:tp => 0, :fp => 0, :tn => 0, :fn => 0},
+ }
+
+ @folds.each do |dir|
+ test_dir = File.join(dir,"test")
+ classifications = File.readlines(File.join(test_dir,"classification")).collect{|row| row.chomp.split(",")}
+ measurements = File.readlines(File.join(test_dir,"dependent_variables")).collect{|v| v.to_i}
+ similarity_thresholds = File.readlines(File.join(dir,"train","similarity_thresholds")).collect{|v| v.chomp.to_f}
+ classifications.each_with_index do |c,i|
+ prediction = c[1]
+ max_sim = c[4].to_f
+ unless prediction.empty?
+ prediction = prediction.to_i
+ if prediction == 1 and measurements[i] == 1
+ confusion_matrices[:all][:tp] +=1
+ max_sim > similarity_thresholds[1] ? confusion_matrices[:high_confidence][:tp] +=1 : confusion_matrices[:low_confidence][:tp] +=1
+ elsif prediction == 0 and measurements[i] == 0
+ confusion_matrices[:all][:tn] +=1
+ max_sim > similarity_thresholds[1] ? confusion_matrices[:high_confidence][:tn] +=1 : confusion_matrices[:low_confidence][:tn] +=1
+ elsif prediction == 1 and measurements[i] == 0
+ confusion_matrices[:all][:fp] +=1
+ max_sim > similarity_thresholds[1] ? confusion_matrices[:high_confidence][:fp] +=1 : confusion_matrices[:low_confidence][:fp] +=1
+ elsif prediction == 0 and measurements[i] == 1
+ confusion_matrices[:all][:fn] +=1
+ max_sim > similarity_thresholds[1] ? confusion_matrices[:high_confidence][:fn] +=1 : confusion_matrices[:low_confidence][:fn] +=1
+ end
+ end
+ end
+ FileUtils.mkdir_p @confusion_matrix_dir
+ confusion_matrices.each do |t,m|
+ File.open(File.join(@confusion_matrix_dir,t.to_s),"w+"){ |f| f.puts "#{m[:tp]},#{m[:fp]}\n#{m[:fn]},#{m[:tn]}" }
+ end
+ end
+
+ end
+
+ def summary
+ [:all,:high_confidence,:low_confidence].each do |cat|
+ confusion_matrix_file = File.join(@confusion_matrix_dir,cat.to_s)
+ confusion_matrix unless File.exists? confusion_matrix_file
+ matrix = File.readlines(confusion_matrix_file).collect{|row| row.chomp.split(",").collect{|v| v.to_f}}
+ tp = matrix[0][0]
+ fp = matrix[0][1]
+ fn = matrix[1][0]
+ tn = matrix[1][1]
+ FileUtils.mkdir_p @summaries_dir
+ File.open(File.join(@summaries_dir,cat.to_s),"w+") do |f|
+ f.puts "accuracy,#{(tp+tn)/(tp+fp+tn+fn)}"
+ f.puts "true_positive_rate,#{tp/(tp+fn)}"
+ f.puts "true_negative_rate,#{tn/(tn+fp)}"
+ f.puts "positive_predictive_value,#{tp/(tp+fp)}"
+ f.puts "negative_predictive_value,#{tn/(tn+fn)}"
+ end
+ end
+ end
+end
+