new files added

author: Christoph Helma <helma@in-silico.ch> 2016-04-13 15:35:01 +0200
committer: Christoph Helma <helma@in-silico.ch> 2016-04-13 15:35:01 +0200
commit: 815cf6ba1543fc323eb7cbd1202fadbf03bcfbca (patch)
tree: 23a69a2dc885649df7995180a5fcbb9c8c5cb470 /lib/validation-statistics.rb
parent: a8368dda776c05331474adf7eaf9a6e413a3b1eb (diff)
1 files changed, 100 insertions, 0 deletions
diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
new file mode 100644
index 0000000..570b2d4
--- /dev/null
+++ b/lib/validation-statistics.rb
@@ -0,0 +1,100 @@
+module OpenTox
+  class ValidationStatistics
+    include OpenTox
+    def self.classification predictions, accept_values
+      confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)}
+      weighted_confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)}
+      true_rate = {}
+      predictivity = {}
+      nr_instances = 0
+      predictions.each do |cid,pred|
+        # TODO use measured majority class
+        if pred[:measured].uniq.size == 1
+          m = pred[:measured].first
+        #pred[:measured].each do |m|
+          if pred[:value] == m
+            if pred[:value] == accept_values[0]
+              confusion_matrix[0][0] += 1
+              weighted_confusion_matrix[0][0] += pred[:confidence]
+              nr_instances += 1
+            elsif pred[:value] == accept_values[1]
+              confusion_matrix[1][1] += 1
+              weighted_confusion_matrix[1][1] += pred[:confidence]
+              nr_instances += 1
+            end
+          elsif pred[:value] != m
+            if pred[:value] == accept_values[0]
+              confusion_matrix[0][1] += 1
+              weighted_confusion_matrix[0][1] += pred[:confidence]
+              nr_instances += 1
+            elsif pred[:value] == accept_values[1]
+              confusion_matrix[1][0] += 1
+              weighted_confusion_matrix[1][0] += pred[:confidence]
+              nr_instances += 1
+            end
+          end
+        end
+      end
+      true_rate = {}
+      predictivity = {}
+      accept_values.each_with_index do |v,i|
+        true_rate[v] = confusion_matrix[i][i]/confusion_matrix[i].reduce(:+).to_f
+        predictivity[v] = confusion_matrix[i][i]/confusion_matrix.collect{|n| n[i]}.reduce(:+).to_f
+      end
+      confidence_sum = 0
+      weighted_confusion_matrix.each do |r|
+        r.each do |c|
+          confidence_sum += c
+        end
+      end
+      accuracy = (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f
+      $logger.debug "Accuracy #{accuracy}"
+      {
+        :accept_values => accept_values,
+        :confusion_matrix => confusion_matrix,
+        :weighted_confusion_matrix => weighted_confusion_matrix,
+        :accuracy => accuracy,
+        :weighted_accuracy => (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f,
+        :true_rate => true_rate,
+        :predictivity => predictivity,
+        :finished_at => Time.now
+      }
+    end
+
+    def self.regression predictions
+      # TODO: prediction intervals
+      rmse = 0
+      mae = 0
+      x = []
+      y = []
+      predictions.each do |cid,pred|
+        if pred[:value] and pred[:measured] #and pred[:measured] != [nil]
+          x << -Math.log10(pred[:measured].median)
+          y << -Math.log10(pred[:value])
+          error = Math.log10(pred[:value])-Math.log10(pred[:measured].median)
+          rmse += error**2
+          mae += error.abs
+        else
+          warnings << "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
+          $logger.debug "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
+        end
+      end
+      R.assign "measurement", x
+      R.assign "prediction", y
+      R.eval "r <- cor(measurement,prediction,use='complete')"
+      r = R.eval("r").to_ruby
+
+      mae = mae/predictions.size
+      rmse = Math.sqrt(rmse/predictions.size)
+      $logger.debug "R^2 #{r**2}"
+      $logger.debug "RMSE #{rmse}"
+      $logger.debug "MAE #{mae}"
+      {
+        :mae => mae,
+        :rmse => rmse,
+        :r_squared => r**2,
+        :finished_at => Time.now
+      }
+    end
+  end
+end
author	Christoph Helma <helma@in-silico.ch>	2016-04-13 15:35:01 +0200
committer	Christoph Helma <helma@in-silico.ch>	2016-04-13 15:35:01 +0200
commit	815cf6ba1543fc323eb7cbd1202fadbf03bcfbca (patch)
tree	23a69a2dc885649df7995180a5fcbb9c8c5cb470 /lib/validation-statistics.rb
parent	a8368dda776c05331474adf7eaf9a6e413a3b1eb (diff)