diff options
Diffstat (limited to 'scripts/misclassifications.rb')
-rwxr-xr-x | scripts/misclassifications.rb | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/scripts/misclassifications.rb b/scripts/misclassifications.rb new file mode 100755 index 0000000..171077c --- /dev/null +++ b/scripts/misclassifications.rb @@ -0,0 +1,61 @@ +require_relative 'include.rb' + +class Range + def intersection(other) + return nil if (self.max < other.begin or other.max < self.begin) + [self.begin, other.begin].max..[self.max, other.max].min + end + alias_method :&, :intersection +end + +experimental = {} +CSV.foreach(File.join(DATA,"test.csv")) do |row| + experimental[row[0]] ||= [] + experimental[row[0]] << row[1].to_f +end + +predictions = {} +CSV.foreach(File.join(DATA,"training-test-predictions.csv"),:headers => true) do |row| + predictions[row[0]] = [-Math.log10(row[2].to_f),Math.log10(row[3].to_f).abs] +end + +outside_experimental_values = 0 +within_experimental_values = 0 +out = [] +predictions.each do |smi,pred| + exp = experimental[smi].collect{|e| -Math.log10(e)}.uniq + # https://en.wikipedia.org/wiki/Prediction_interval + min = pred[0]-1.96*pred[1] + max = pred[0]+1.96*pred[1] + pred = predictions[smi][0] + ci = predictions[smi][1] + err = nil + if (min..max) & (exp.min..exp.max) + within_experimental_values += 1 + else + outside_experimental_values += 1 + if exp.min < min + err = exp.min - min + elsif exp.max > max + err = exp.max - max + end + end + if err + out << { + :smi => smi, + :experimental => exp, + :min => min, + :max => max, + :prediction => predictions[smi][0], + :ci => predictions[smi][1], + :error => err + } + end +end + + +out.sort!{|a,b| b[:error].abs <=> a[:error].abs} +csv = [["SMILES","Distance"]] + out.collect{|o| [o[:smi], o[:error]]} +File.open("data/misclassifications.csv","w+"){|f| f.puts csv.collect{|r| r.join ", "}.join("\n")} + +#File.open("correct-predictions.R","w+"){|f| f.puts "correct_predictions = #{within_experimental_values}"} |