summaryrefslogtreecommitdiff
path: root/scripts/misclassifications.rb
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/misclassifications.rb')
-rwxr-xr-xscripts/misclassifications.rb61
1 files changed, 61 insertions, 0 deletions
diff --git a/scripts/misclassifications.rb b/scripts/misclassifications.rb
new file mode 100755
index 0000000..171077c
--- /dev/null
+++ b/scripts/misclassifications.rb
@@ -0,0 +1,61 @@
+require_relative 'include.rb'
+
+class Range
+ def intersection(other)
+ return nil if (self.max < other.begin or other.max < self.begin)
+ [self.begin, other.begin].max..[self.max, other.max].min
+ end
+ alias_method :&, :intersection
+end
+
+experimental = {}
+CSV.foreach(File.join(DATA,"test.csv")) do |row|
+ experimental[row[0]] ||= []
+ experimental[row[0]] << row[1].to_f
+end
+
+predictions = {}
+CSV.foreach(File.join(DATA,"training-test-predictions.csv"),:headers => true) do |row|
+ predictions[row[0]] = [-Math.log10(row[2].to_f),Math.log10(row[3].to_f).abs]
+end
+
+outside_experimental_values = 0
+within_experimental_values = 0
+out = []
+predictions.each do |smi,pred|
+ exp = experimental[smi].collect{|e| -Math.log10(e)}.uniq
+ # https://en.wikipedia.org/wiki/Prediction_interval
+ min = pred[0]-1.96*pred[1]
+ max = pred[0]+1.96*pred[1]
+ pred = predictions[smi][0]
+ ci = predictions[smi][1]
+ err = nil
+ if (min..max) & (exp.min..exp.max)
+ within_experimental_values += 1
+ else
+ outside_experimental_values += 1
+ if exp.min < min
+ err = exp.min - min
+ elsif exp.max > max
+ err = exp.max - max
+ end
+ end
+ if err
+ out << {
+ :smi => smi,
+ :experimental => exp,
+ :min => min,
+ :max => max,
+ :prediction => predictions[smi][0],
+ :ci => predictions[smi][1],
+ :error => err
+ }
+ end
+end
+
+
+out.sort!{|a,b| b[:error].abs <=> a[:error].abs}
+csv = [["SMILES","Distance"]] + out.collect{|o| [o[:smi], o[:error]]}
+File.open("data/misclassifications.csv","w+"){|f| f.puts csv.collect{|r| r.join ", "}.join("\n")}
+
+#File.open("correct-predictions.R","w+"){|f| f.puts "correct_predictions = #{within_experimental_values}"}