class ClassificationStatistics def initialize dir @dir = dir @folds = Dir[File.join(@dir,"[0-9]*")] @confusion_matrix_dir = File.join(@dir,"confusion_matrices") @summaries_dir = File.join(@dir,"summaries") end def confusion_matrix confusion_matrices = { :all => {:tp => 0, :fp => 0, :tn => 0, :fn => 0}, :high_confidence => {:tp => 0, :fp => 0, :tn => 0, :fn => 0}, :low_confidence => {:tp => 0, :fp => 0, :tn => 0, :fn => 0}, } @folds.each do |dir| test_dir = File.join(dir,"test") classifications = File.readlines(File.join(test_dir,"classification")).collect{|row| row.chomp.split(",")} measurements = File.readlines(File.join(test_dir,"dependent_variables")).collect{|v| v.to_i} similarity_thresholds = File.readlines(File.join(dir,"train","similarity_thresholds")).collect{|v| v.chomp.to_f} classifications.each_with_index do |c,i| prediction = c[1] max_sim = c[4].to_f unless prediction.empty? prediction = prediction.to_i if prediction == 1 and measurements[i] == 1 confusion_matrices[:all][:tp] +=1 max_sim > similarity_thresholds[1] ? confusion_matrices[:high_confidence][:tp] +=1 : confusion_matrices[:low_confidence][:tp] +=1 elsif prediction == 0 and measurements[i] == 0 confusion_matrices[:all][:tn] +=1 max_sim > similarity_thresholds[1] ? confusion_matrices[:high_confidence][:tn] +=1 : confusion_matrices[:low_confidence][:tn] +=1 elsif prediction == 1 and measurements[i] == 0 confusion_matrices[:all][:fp] +=1 max_sim > similarity_thresholds[1] ? confusion_matrices[:high_confidence][:fp] +=1 : confusion_matrices[:low_confidence][:fp] +=1 elsif prediction == 0 and measurements[i] == 1 confusion_matrices[:all][:fn] +=1 max_sim > similarity_thresholds[1] ? confusion_matrices[:high_confidence][:fn] +=1 : confusion_matrices[:low_confidence][:fn] +=1 end end end FileUtils.mkdir_p @confusion_matrix_dir confusion_matrices.each do |t,m| File.open(File.join(@confusion_matrix_dir,t.to_s),"w+"){ |f| f.puts "#{m[:tp]},#{m[:fp]}\n#{m[:fn]},#{m[:tn]}" } end end end def summary [:all,:high_confidence,:low_confidence].each do |cat| confusion_matrix_file = File.join(@confusion_matrix_dir,cat.to_s) confusion_matrix unless File.exists? confusion_matrix_file matrix = File.readlines(confusion_matrix_file).collect{|row| row.chomp.split(",").collect{|v| v.to_f}} tp = matrix[0][0] fp = matrix[0][1] fn = matrix[1][0] tn = matrix[1][1] FileUtils.mkdir_p @summaries_dir File.open(File.join(@summaries_dir,cat.to_s),"w+") do |f| f.puts "accuracy,#{(tp+tn)/(tp+fp+tn+fn)}" f.puts "true_positive_rate,#{tp/(tp+fn)}" f.puts "true_negative_rate,#{tn/(tn+fp)}" f.puts "positive_predictive_value,#{tp/(tp+fp)}" f.puts "negative_predictive_value,#{tn/(tn+fn)}" end end end end