From 9d17895ab9e8cd31e0f32e8e622e13612ea5ff77 Mon Sep 17 00:00:00 2001
From: "helma@in-silico.ch" <helma@in-silico.ch>
Date: Fri, 12 Oct 2018 21:58:36 +0200
Subject: validation statistic fixes

---
 lib/validation-statistics.rb | 128 ++++++++++++++++++++++---------------------
 1 file changed, 66 insertions(+), 62 deletions(-)

(limited to 'lib/validation-statistics.rb')

diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index a69ede3..e440731 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -9,8 +9,7 @@ module OpenTox
         self.accept_values = model.prediction_feature.accept_values
         self.confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :without_warnings => Array.new(accept_values.size){Array.new(accept_values.size,0)}}
         self.weighted_confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :without_warnings => Array.new(accept_values.size){Array.new(accept_values.size,0)}}
-        #self.weighted_confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)}
-	self.nr_predictions = {:all => 0,:without_warnings => 0}
+        self.nr_predictions = {:all => 0,:without_warnings => 0}
         predictions.each do |cid,pred|
           # TODO
           # use predictions without probabilities (single neighbor)??
@@ -21,41 +20,41 @@ module OpenTox
               if pred[:value] == accept_values[0]
                 confusion_matrix[:all][0][0] += 1
                 weighted_confusion_matrix[:all][0][0] += pred[:probabilities][pred[:value]]
-		self.nr_predictions[:all] += 1
-		if pred[:warnings].empty?
+                self.nr_predictions[:all] += 1
+                if pred[:warnings].empty?
                   confusion_matrix[:without_warnings][0][0] += 1
                   weighted_confusion_matrix[:without_warnings][0][0] += pred[:probabilities][pred[:value]]
-		  self.nr_predictions[:without_warnings] += 1
-		end
+                  self.nr_predictions[:without_warnings] += 1
+                end
               elsif pred[:value] == accept_values[1]
                 confusion_matrix[:all][1][1] += 1
                 weighted_confusion_matrix[:all][1][1] += pred[:probabilities][pred[:value]]
-		self.nr_predictions[:all] += 1
-		if pred[:warnings].empty?
+                self.nr_predictions[:all] += 1
+                if pred[:warnings].empty?
                   confusion_matrix[:without_warnings][1][1] += 1
                   weighted_confusion_matrix[:without_warnings][1][1] += pred[:probabilities][pred[:value]]
-		  self.nr_predictions[:without_warnings] += 1
-		end
+                  self.nr_predictions[:without_warnings] += 1
+                end
               end
             elsif pred[:value] != m
               if pred[:value] == accept_values[0]
                 confusion_matrix[:all][0][1] += 1
                 weighted_confusion_matrix[:all][0][1] += pred[:probabilities][pred[:value]]
-		self.nr_predictions[:all] += 1
-		if pred[:warnings].empty?
+                self.nr_predictions[:all] += 1
+                if pred[:warnings].empty?
                   confusion_matrix[:without_warnings][0][1] += 1
                   weighted_confusion_matrix[:without_warnings][0][1] += pred[:probabilities][pred[:value]]
-		  self.nr_predictions[:without_warnings] += 1
-		end
+                  self.nr_predictions[:without_warnings] += 1
+                end
               elsif pred[:value] == accept_values[1]
                 confusion_matrix[:all][1][0] += 1
                 weighted_confusion_matrix[:all][1][0] += pred[:probabilities][pred[:value]]
-		self.nr_predictions[:all] += 1
-		if pred[:warnings].empty?
+                self.nr_predictions[:all] += 1
+                if pred[:warnings].empty?
                   confusion_matrix[:without_warnings][1][0] += 1
                   weighted_confusion_matrix[:without_warnings][1][0] += pred[:probabilities][pred[:value]]
-		  self.nr_predictions[:without_warnings] += 1
-		end
+                  self.nr_predictions[:without_warnings] += 1
+                end
               end
             end
           end
@@ -63,25 +62,25 @@ module OpenTox
         self.true_rate = {:all => {}, :without_warnings => {}}
         self.predictivity = {:all => {}, :without_warnings => {}}
         accept_values.each_with_index do |v,i|
-	  [:all,:without_warnings].each do |a|
-		  self.true_rate[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a][i].reduce(:+).to_f
-		  self.predictivity[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a].collect{|n| n[i]}.reduce(:+).to_f
-	  end
+          [:all,:without_warnings].each do |a|
+            self.true_rate[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a][i].reduce(:+).to_f
+            self.predictivity[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a].collect{|n| n[i]}.reduce(:+).to_f
+          end
         end
         confidence_sum = {:all => 0, :without_warnings => 0}
         [:all,:without_warnings].each do |a|
           weighted_confusion_matrix[a].each do |r|
             r.each do |c|
               confidence_sum[a] += c
-	    end
+            end
           end
         end
-	self.accuracy = {}
-	self.weighted_accuracy = {}
+        self.accuracy = {}
+        self.weighted_accuracy = {}
         [:all,:without_warnings].each do |a|
           self.accuracy[a] = (confusion_matrix[a][0][0]+confusion_matrix[a][1][1])/nr_predictions[a].to_f
           self.weighted_accuracy[a] = (weighted_confusion_matrix[a][0][0]+weighted_confusion_matrix[a][1][1])/confidence_sum[a].to_f
-	end
+        end
         $logger.debug "Accuracy #{accuracy}"
         save
         {
@@ -92,7 +91,7 @@ module OpenTox
           :weighted_accuracy => weighted_accuracy,
           :true_rate => self.true_rate,
           :predictivity => self.predictivity,
-	  :nr_predictions => nr_predictions,
+          :nr_predictions => nr_predictions,
         }
       end
 
@@ -143,19 +142,20 @@ module OpenTox
       def statistics
         self.warnings = []
         self.rmse = {:all =>0,:without_warnings => 0}
+        self.r_squared  = {:all =>0,:without_warnings => 0}
         self.mae = {:all =>0,:without_warnings => 0}
         self.within_prediction_interval = {:all =>0,:without_warnings => 0}
         self.out_of_prediction_interval = {:all =>0,:without_warnings => 0}
         x = {:all => [],:without_warnings => []}
         y = {:all => [],:without_warnings => []}
         self.nr_predictions = {:all =>0,:without_warnings => 0}
-	error = {}
         predictions.each do |cid,pred|
+          p pred
           if pred[:value] and pred[:measurements] 
-     	    self.nr_predictions[:all] +=1
+            self.nr_predictions[:all] +=1
             x[:all] << pred[:measurements].median
             y[:all] << pred[:value]
-            error[:all] = pred[:value]-pred[:measurements].median
+            error = pred[:value]-pred[:measurements].median
             self.rmse[:all] += error**2
             self.mae[:all] += error.abs
             if pred[:prediction_interval]
@@ -165,21 +165,21 @@ module OpenTox
                 self.out_of_prediction_interval[:all] += 1
               end
             end
-	    if pred[:warnings].empty?
-     	      self.nr_predictions[:without_warnings] +=1
-       	      x[:without_warnings] << pred[:measurements].median
-	      y[:without_warnings] << pred[:value]
-	      error[:without_warnings] = pred[:value]-pred[:measurements].median
-	      self.rmse[:without_warnings] += error**2
-	      self.mae[:without_warnings] += error.abs
-	      if pred[:prediction_interval]
-	        if pred[:measurements].median >= pred[:prediction_interval][0] and pred[:measurements].median <= pred[:prediction_interval][1]
-		  self.within_prediction_interval[:without_warnings] += 1
-	        else
-		  self.out_of_prediction_interval[:without_warnings] += 1
-	        end
-	      end
-	    end
+            if pred[:warnings].empty?
+              self.nr_predictions[:without_warnings] +=1
+              x[:without_warnings] << pred[:measurements].median
+              y[:without_warnings] << pred[:value]
+              error = pred[:value]-pred[:measurements].median
+              self.rmse[:without_warnings] += error**2
+              self.mae[:without_warnings] += error.abs
+              if pred[:prediction_interval]
+                if pred[:measurements].median >= pred[:prediction_interval][0] and pred[:measurements].median <= pred[:prediction_interval][1]
+                  self.within_prediction_interval[:without_warnings] += 1
+                else
+                  self.out_of_prediction_interval[:without_warnings] += 1
+                end
+              end
+            end
           else
             trd_id = model.training_dataset_id
             smiles = Compound.find(cid).smiles
@@ -187,36 +187,40 @@ module OpenTox
             $logger.debug "No training activities for #{smiles} in training dataset #{trd_id}."
           end
         end
-	[:all,:without_warnings].each do |a|
-          R.assign "measurement", x[a]
-          R.assign "prediction", y[a]
-          R.eval "r <- cor(measurement,prediction,use='pairwise')"
-          self.r_squared[a] = R.eval("r").to_ruby**2
-	  self.mae[a] = self.mae[a]/self.nr_predictions[a]
-	  self.rmse[a] = Math.sqrt(self.rmse[a]/self.nr_predictions[a])
-	end
+        [:all,:without_warnings].each do |a|
+          if x[a].size > 2
+            R.assign "measurement", x[a]
+            R.assign "prediction", y[a]
+            R.eval "r <- cor(measurement,prediction,use='pairwise')"
+            self.r_squared[a] = R.eval("r").to_ruby**2
+          else
+            self.r_squared[a] = 0
+          end
+          if self.nr_predictions[a] > 0
+            self.mae[a] = self.mae[a]/self.nr_predictions[a]
+            self.rmse[a] = Math.sqrt(self.rmse[a]/self.nr_predictions[a])
+          else
+            self.mae[a] = nil
+            self.rmse[a] = nil
+          end
+        end
         $logger.debug "R^2 #{r_squared}"
         $logger.debug "RMSE #{rmse}"
         $logger.debug "MAE #{mae}"
-        $logger.debug "#{percent_within_prediction_interval.round(2)}% of measurements within prediction interval"
+        $logger.debug "Nr predictions #{nr_predictions}"
+        $logger.debug "#{within_prediction_interval} measurements within prediction interval"
         $logger.debug "#{warnings}"
         save
         {
           :mae => mae,
           :rmse => rmse,
           :r_squared => r_squared,
-          :within_prediction_interval => within_prediction_interval,
+          :within_prediction_interval => self.within_prediction_interval,
           :out_of_prediction_interval => out_of_prediction_interval,
-	  :nr_predictions => nr_predictions,
+          :nr_predictions => nr_predictions,
         }
       end
 
-      # Get percentage of measurements within the prediction interval
-      # @return [Float]
-      def percent_within_prediction_interval
-        100*within_prediction_interval.to_f/(within_prediction_interval+out_of_prediction_interval)
-      end
-
       # Plot predicted vs measured values
       # @param [String,nil] format
       # @return [Blob]
-- 
cgit v1.2.3