summaryrefslogtreecommitdiff
path: root/scripts/misclassifications.rb
blob: d285868dc0888809d51a434afaa4d66eaecda798 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env ruby
require_relative '../../lazar/lib/lazar'
include OpenTox

class Range
  def intersection(other)
    return nil if (self.max < other.begin or other.max < self.begin) 
    [self.begin, other.begin].max..[self.max, other.max].min
  end
  alias_method :&, :intersection
end

experimental = {}
CSV.foreach(File.join("data","test_log10.csv")) do |row|
  experimental[row[0]] ||= []
  experimental[row[0]] << row[1].to_f
end

predictions = {}
CSV.foreach(File.join("data","training-test-predictions.csv"),:headers => true) do |row|
  predictions[row[0]] = [row[2].to_f,row[3].to_f.abs]
end

outside_experimental_values = 0
within_experimental_values = 0
out = []
predictions.each do |smi,pred|
  exp = experimental[smi].uniq
  # https://en.wikipedia.org/wiki/Prediction_interval
  min = pred[0]-1.96*pred[1]
  max = pred[0]+1.96*pred[1]
  pred = predictions[smi][0]
  ci = predictions[smi][1]
  err = nil
  if (min..max) & (exp.min..exp.max)
    within_experimental_values += 1
  else
    outside_experimental_values += 1
    if exp.min < min
      err = exp.min - min
    elsif exp.max > max
      err = exp.max - max
    end
  end
  if err
    out << {
      :smi => smi,
      :experimental => exp, 
      :min => min,
      :max => max,
      :prediction => predictions[smi][0],
      :ci => predictions[smi][1],
      :error => err
    }
  end
end


out.sort!{|a,b| b[:error].abs <=> a[:error].abs}
csv = [["SMILES","Distance"]] + out.collect{|o| [o[:smi], o[:error]]}
File.open("data/misclassifications.csv","w+"){|f| f.puts csv.collect{|r| r.join ", "}.join("\n")}

#File.open("correct-predictions.R","w+"){|f| f.puts "correct_predictions = #{within_experimental_values}"}