From 1dcd741a5bff8dc41abf0840f59031eb557ff230 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 8 Mar 2021 21:25:45 +0100 Subject: neighbor selction adjusted, summary for high-confidence predictions --- bin/classification-summary.rb | 34 ++++++++++++++++++++++++++++------ bin/crossvalidation-folds.rb | 9 +-------- 2 files changed, 29 insertions(+), 14 deletions(-) (limited to 'bin') diff --git a/bin/classification-summary.rb b/bin/classification-summary.rb index c6755a1..45ffb29 100755 --- a/bin/classification-summary.rb +++ b/bin/classification-summary.rb @@ -3,6 +3,7 @@ require_relative "../lib/lazar" #stat = ClassificationStatistics.new ARGV[0] #stat.summary dir = File.join(File.dirname(ARGV[0]),"crossvalidation") +thresh = ARGV[1].to_f folds = Dir[File.join(dir,"[0-9]*")] predictions = [] @@ -10,7 +11,11 @@ tp=0 tn=0 fp=0 fn=0 -n=0 +hc_tp=0 +hc_tn=0 +hc_fp=0 +hc_fn=0 +#n=0 experimental = {} lines = File.readlines(File.join(ARGV[0])) @@ -28,39 +33,48 @@ File.open(File.join(dir,"predictions.csv"),"w+") do |f| pred.each do |prediction| smi = prediction[0] exp = experimental[smi] + maxsim = prediction[5].to_f + v = "NA" unless exp.nil? or prediction[2].empty? or exp.empty? p = prediction[2].to_i - n+=1 - v = "NA" + #n+=1 exp.each do |e| if p and e if p == 1 and e == 1 v = "TP" tp+=1 + hc_tp+=1 if maxsim > thresh elsif p == 0 and e == 0 v = "TN" tn+=1 + hc_tn+=1 if maxsim > thresh elsif p == 1 and e == 0 v = "FP" fp+=1 + hc_fp+=1 if maxsim > thresh elsif p == 0 and e == 1 v = "FN" fn+=1 + hc_fn+=1 if maxsim > thresh end end predictions << v end - f.puts([smi,v].join(",")) end + f.puts([smi,v,maxsim].join(",")) end end end -File.open(File.join(dir,"confusion-matrix.csv"),"w+") do |f| +File.open(File.join(dir,"confusion-matrix-all.csv"),"w+") do |f| f.puts "#{tp},#{fp}\n#{fn},#{tn}" end -File.open(File.join(dir,"summary.csv"),"w+") do |f| +File.open(File.join(dir,"confusion-matrix-high-confidence.csv"),"w+") do |f| + f.puts "#{hc_tp},#{hc_fp}\n#{hc_fn},#{hc_tn}" +end + +File.open(File.join(dir,"summary-all.csv"),"w+") do |f| f.puts "accuracy,#{(tp+tn)/(tp+fp+tn+fn).to_f}" f.puts "true_positive_rate,#{tp/(tp+fn).to_f}" f.puts "true_negative_rate,#{tn/(tn+fp).to_f}" @@ -68,3 +82,11 @@ File.open(File.join(dir,"summary.csv"),"w+") do |f| f.puts "negative_predictive_value,#{tn/(tn+fn).to_f}" end +File.open(File.join(dir,"summary-high-confidence.csv"),"w+") do |f| + f.puts "accuracy,#{(hc_tp+hc_tn)/(hc_tp+hc_fp+hc_tn+hc_fn).to_f}" + f.puts "true_positive_rate,#{hc_tp/(hc_tp+hc_fn).to_f}" + f.puts "true_negative_rate,#{hc_tn/(hc_tn+hc_fp).to_f}" + f.puts "positive_predictive_value,#{hc_tp/(hc_tp+hc_fp).to_f}" + f.puts "negative_predictive_value,#{hc_tn/(hc_tn+hc_fn).to_f}" +end + diff --git a/bin/crossvalidation-folds.rb b/bin/crossvalidation-folds.rb index 0c765f7..16a4103 100755 --- a/bin/crossvalidation-folds.rb +++ b/bin/crossvalidation-folds.rb @@ -17,6 +17,7 @@ start = 0 :train => indices-test_idxs, :test => test_idxs } + p idxs start = last+1 # write training/test data cv_dir = File.join(File.dirname(ARGV[0]),"crossvalidation",i.to_s) @@ -39,14 +40,6 @@ start = 0 f.puts t.join(",") end end - file = File.join(cv_dir,t.to_s+"-experimental.csv") - File.open(file,"w+") do |f| - f.puts (["Canonical SMILES", model.dependent_variable_name]).join(",") - idx.collect{|i| model.train[i]}.each do |t| - # TODO fix - f.puts t[0..1].join(",") - end - end end end Process.waitall -- cgit v1.2.3