1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
|
#!/usr/bin/env ruby
require_relative '../../lazar/lib/lazar'
include OpenTox
class Range
def intersection(other)
return nil if (self.max < other.begin or other.max < self.begin)
[self.begin, other.begin].max..[self.max, other.max].min
end
alias_method :&, :intersection
end
experimental = {}
CSV.foreach(File.join("data","test_log10.csv")) do |row|
experimental[row[0]] ||= []
experimental[row[0]] << row[1].to_f
end
predictions = {}
CSV.foreach(File.join("data","training-test-predictions.csv"),:headers => true) do |row|
predictions[row[0]] = [row[2].to_f,row[3].to_f.abs]
end
outside_experimental_values = 0
within_experimental_values = 0
out = []
predictions.each do |smi,pred|
exp = experimental[smi].uniq
# https://en.wikipedia.org/wiki/Prediction_interval
min = pred[0]-1.96*pred[1]
max = pred[0]+1.96*pred[1]
pred = predictions[smi][0]
ci = predictions[smi][1]
err = nil
if (min..max) & (exp.min..exp.max)
within_experimental_values += 1
else
outside_experimental_values += 1
if exp.min < min
err = exp.min - min
elsif exp.max > max
err = exp.max - max
end
end
if err
out << {
:smi => smi,
:experimental => exp,
:min => min,
:max => max,
:prediction => predictions[smi][0],
:ci => predictions[smi][1],
:error => err
}
end
end
out.sort!{|a,b| b[:error].abs <=> a[:error].abs}
csv = [["SMILES","Distance"]] + out.collect{|o| [o[:smi], o[:error]]}
File.open("data/misclassifications.csv","w+"){|f| f.puts csv.collect{|r| r.join ", "}.join("\n")}
#File.open("correct-predictions.R","w+"){|f| f.puts "correct_predictions = #{within_experimental_values}"}
|