summaryrefslogtreecommitdiff
path: root/bin/classification-summary.rb
blob: 45ffb29fb222010340cd3c7e8bf5d45ad4c1fbe9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/env ruby
require_relative "../lib/lazar"
#stat = ClassificationStatistics.new ARGV[0]
#stat.summary
dir = File.join(File.dirname(ARGV[0]),"crossvalidation")
thresh = ARGV[1].to_f
folds = Dir[File.join(dir,"[0-9]*")]

predictions = []
tp=0
tn=0
fp=0
fn=0
hc_tp=0
hc_tn=0
hc_fp=0
hc_fn=0
#n=0
experimental = {}

lines = File.readlines(File.join(ARGV[0])) 
lines.shift
lines.each do |line|
  items = line.chomp.split(',')
  experimental[items[0]] ||= []
  experimental[items[0]] << items[1].to_i
end

File.open(File.join(dir,"predictions.csv"),"w+") do |f|
  folds.each do |fold|
    pred = File.readlines(File.join(fold,"test-prediction.csv")).collect{|row| row.chomp.split(",")}
    pred.shift
    pred.each do |prediction|
      smi = prediction[0]
      exp = experimental[smi]
      maxsim = prediction[5].to_f
      v = "NA"
      unless exp.nil? or prediction[2].empty? or exp.empty?
        p = prediction[2].to_i
        #n+=1
        exp.each do |e|
          if p and e
            if p == 1 and e == 1
              v = "TP"
              tp+=1
              hc_tp+=1 if maxsim > thresh
            elsif p == 0 and e == 0
              v = "TN"
              tn+=1
              hc_tn+=1 if maxsim > thresh
            elsif p == 1 and e == 0
              v = "FP"
              fp+=1
              hc_fp+=1 if maxsim > thresh
            elsif p == 0 and e == 1
              v = "FN"
              fn+=1
              hc_fn+=1 if maxsim > thresh
            end
          end
          predictions << v
        end
      end
      f.puts([smi,v,maxsim].join(","))
    end
  end
end

File.open(File.join(dir,"confusion-matrix-all.csv"),"w+") do |f|
  f.puts "#{tp},#{fp}\n#{fn},#{tn}"
end

File.open(File.join(dir,"confusion-matrix-high-confidence.csv"),"w+") do |f|
  f.puts "#{hc_tp},#{hc_fp}\n#{hc_fn},#{hc_tn}"
end

File.open(File.join(dir,"summary-all.csv"),"w+") do |f|
  f.puts "accuracy,#{(tp+tn)/(tp+fp+tn+fn).to_f}"
  f.puts "true_positive_rate,#{tp/(tp+fn).to_f}"
  f.puts "true_negative_rate,#{tn/(tn+fp).to_f}"
  f.puts "positive_predictive_value,#{tp/(tp+fp).to_f}"
  f.puts "negative_predictive_value,#{tn/(tn+fn).to_f}"
end

File.open(File.join(dir,"summary-high-confidence.csv"),"w+") do |f|
  f.puts "accuracy,#{(hc_tp+hc_tn)/(hc_tp+hc_fp+hc_tn+hc_fn).to_f}"
  f.puts "true_positive_rate,#{hc_tp/(hc_tp+hc_fn).to_f}"
  f.puts "true_negative_rate,#{hc_tn/(hc_tn+hc_fp).to_f}"
  f.puts "positive_predictive_value,#{hc_tp/(hc_tp+hc_fp).to_f}"
  f.puts "negative_predictive_value,#{hc_tn/(hc_tn+hc_fn).to_f}"
end