1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
|
#!/usr/bin/env ruby
require_relative "../lib/lazar"
#stat = ClassificationStatistics.new ARGV[0]
#stat.summary
dir = File.join(File.dirname(ARGV[0]),"crossvalidation")
thresh = ARGV[1].to_f
folds = Dir[File.join(dir,"[0-9]*")]
predictions = []
tp=0
tn=0
fp=0
fn=0
hc_tp=0
hc_tn=0
hc_fp=0
hc_fn=0
#n=0
experimental = {}
lines = File.readlines(File.join(ARGV[0]))
lines.shift
lines.each do |line|
items = line.chomp.split(',')
experimental[items[0]] ||= []
experimental[items[0]] << items[1].to_i
end
File.open(File.join(dir,"predictions.csv"),"w+") do |f|
folds.each do |fold|
pred = File.readlines(File.join(fold,"test-prediction.csv")).collect{|row| row.chomp.split(",")}
pred.shift
pred.each do |prediction|
smi = prediction[0]
exp = experimental[smi]
maxsim = prediction[5].to_f
v = "NA"
unless exp.nil? or prediction[2].empty? or exp.empty?
p = prediction[2].to_i
#n+=1
exp.each do |e|
if p and e
if p == 1 and e == 1
v = "TP"
tp+=1
hc_tp+=1 if maxsim > thresh
elsif p == 0 and e == 0
v = "TN"
tn+=1
hc_tn+=1 if maxsim > thresh
elsif p == 1 and e == 0
v = "FP"
fp+=1
hc_fp+=1 if maxsim > thresh
elsif p == 0 and e == 1
v = "FN"
fn+=1
hc_fn+=1 if maxsim > thresh
end
end
predictions << v
end
end
f.puts([smi,v,maxsim].join(","))
end
end
end
File.open(File.join(dir,"confusion-matrix-all.csv"),"w+") do |f|
f.puts "#{tp},#{fp}\n#{fn},#{tn}"
end
File.open(File.join(dir,"confusion-matrix-high-confidence.csv"),"w+") do |f|
f.puts "#{hc_tp},#{hc_fp}\n#{hc_fn},#{hc_tn}"
end
File.open(File.join(dir,"summary-all.csv"),"w+") do |f|
f.puts "accuracy,#{(tp+tn)/(tp+fp+tn+fn).to_f}"
f.puts "true_positive_rate,#{tp/(tp+fn).to_f}"
f.puts "true_negative_rate,#{tn/(tn+fp).to_f}"
f.puts "positive_predictive_value,#{tp/(tp+fp).to_f}"
f.puts "negative_predictive_value,#{tn/(tn+fn).to_f}"
end
File.open(File.join(dir,"summary-high-confidence.csv"),"w+") do |f|
f.puts "accuracy,#{(hc_tp+hc_tn)/(hc_tp+hc_fp+hc_tn+hc_fn).to_f}"
f.puts "true_positive_rate,#{hc_tp/(hc_tp+hc_fn).to_f}"
f.puts "true_negative_rate,#{hc_tn/(hc_tn+hc_fp).to_f}"
f.puts "positive_predictive_value,#{hc_tp/(hc_tp+hc_fp).to_f}"
f.puts "negative_predictive_value,#{hc_tn/(hc_tn+hc_fn).to_f}"
end
|