From 6d6be53a110e71d0d56ae5ea9a2675f76f7c84ec Mon Sep 17 00:00:00 2001
From: "helma@in-silico.ch" <helma@in-silico.ch>
Date: Thu, 6 Sep 2018 17:24:25 +0200
Subject: adjusted classification similarities, dataset sdf export

---
 lib/validation-statistics.rb | 139 +++++++++++++++++++++++++++++--------------
 1 file changed, 95 insertions(+), 44 deletions(-)

(limited to 'lib/validation-statistics.rb')

diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index 69e7992..a69ede3 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -7,9 +7,10 @@ module OpenTox
       # @return [Hash]
       def statistics 
         self.accept_values = model.prediction_feature.accept_values
-        self.confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)}
-        self.weighted_confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)}
-        nr_instances = 0
+        self.confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :without_warnings => Array.new(accept_values.size){Array.new(accept_values.size,0)}}
+        self.weighted_confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :without_warnings => Array.new(accept_values.size){Array.new(accept_values.size,0)}}
+        #self.weighted_confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)}
+	self.nr_predictions = {:all => 0,:without_warnings => 0}
         predictions.each do |cid,pred|
           # TODO
           # use predictions without probabilities (single neighbor)??
@@ -18,41 +19,69 @@ module OpenTox
             m = pred[:measurements].first
             if pred[:value] == m
               if pred[:value] == accept_values[0]
-                confusion_matrix[0][0] += 1
-                weighted_confusion_matrix[0][0] += pred[:probabilities][pred[:value]]
-                nr_instances += 1
+                confusion_matrix[:all][0][0] += 1
+                weighted_confusion_matrix[:all][0][0] += pred[:probabilities][pred[:value]]
+		self.nr_predictions[:all] += 1
+		if pred[:warnings].empty?
+                  confusion_matrix[:without_warnings][0][0] += 1
+                  weighted_confusion_matrix[:without_warnings][0][0] += pred[:probabilities][pred[:value]]
+		  self.nr_predictions[:without_warnings] += 1
+		end
               elsif pred[:value] == accept_values[1]
-                confusion_matrix[1][1] += 1
-                weighted_confusion_matrix[1][1] += pred[:probabilities][pred[:value]]
-                nr_instances += 1
+                confusion_matrix[:all][1][1] += 1
+                weighted_confusion_matrix[:all][1][1] += pred[:probabilities][pred[:value]]
+		self.nr_predictions[:all] += 1
+		if pred[:warnings].empty?
+                  confusion_matrix[:without_warnings][1][1] += 1
+                  weighted_confusion_matrix[:without_warnings][1][1] += pred[:probabilities][pred[:value]]
+		  self.nr_predictions[:without_warnings] += 1
+		end
               end
             elsif pred[:value] != m
               if pred[:value] == accept_values[0]
-                confusion_matrix[0][1] += 1
-                weighted_confusion_matrix[0][1] += pred[:probabilities][pred[:value]]
-                nr_instances += 1
+                confusion_matrix[:all][0][1] += 1
+                weighted_confusion_matrix[:all][0][1] += pred[:probabilities][pred[:value]]
+		self.nr_predictions[:all] += 1
+		if pred[:warnings].empty?
+                  confusion_matrix[:without_warnings][0][1] += 1
+                  weighted_confusion_matrix[:without_warnings][0][1] += pred[:probabilities][pred[:value]]
+		  self.nr_predictions[:without_warnings] += 1
+		end
               elsif pred[:value] == accept_values[1]
-                confusion_matrix[1][0] += 1
-                weighted_confusion_matrix[1][0] += pred[:probabilities][pred[:value]]
-                nr_instances += 1
+                confusion_matrix[:all][1][0] += 1
+                weighted_confusion_matrix[:all][1][0] += pred[:probabilities][pred[:value]]
+		self.nr_predictions[:all] += 1
+		if pred[:warnings].empty?
+                  confusion_matrix[:without_warnings][1][0] += 1
+                  weighted_confusion_matrix[:without_warnings][1][0] += pred[:probabilities][pred[:value]]
+		  self.nr_predictions[:without_warnings] += 1
+		end
               end
             end
           end
         end
-        self.true_rate = {}
-        self.predictivity = {}
+        self.true_rate = {:all => {}, :without_warnings => {}}
+        self.predictivity = {:all => {}, :without_warnings => {}}
         accept_values.each_with_index do |v,i|
-          self.true_rate[v] = confusion_matrix[i][i]/confusion_matrix[i].reduce(:+).to_f
-          self.predictivity[v] = confusion_matrix[i][i]/confusion_matrix.collect{|n| n[i]}.reduce(:+).to_f
+	  [:all,:without_warnings].each do |a|
+		  self.true_rate[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a][i].reduce(:+).to_f
+		  self.predictivity[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a].collect{|n| n[i]}.reduce(:+).to_f
+	  end
         end
-        confidence_sum = 0
-        weighted_confusion_matrix.each do |r|
-          r.each do |c|
-            confidence_sum += c
+        confidence_sum = {:all => 0, :without_warnings => 0}
+        [:all,:without_warnings].each do |a|
+          weighted_confusion_matrix[a].each do |r|
+            r.each do |c|
+              confidence_sum[a] += c
+	    end
           end
         end
-        self.accuracy = (confusion_matrix[0][0]+confusion_matrix[1][1])/nr_instances.to_f
-        self.weighted_accuracy = (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f
+	self.accuracy = {}
+	self.weighted_accuracy = {}
+        [:all,:without_warnings].each do |a|
+          self.accuracy[a] = (confusion_matrix[a][0][0]+confusion_matrix[a][1][1])/nr_predictions[a].to_f
+          self.weighted_accuracy[a] = (weighted_confusion_matrix[a][0][0]+weighted_confusion_matrix[a][1][1])/confidence_sum[a].to_f
+	end
         $logger.debug "Accuracy #{accuracy}"
         save
         {
@@ -63,6 +92,7 @@ module OpenTox
           :weighted_accuracy => weighted_accuracy,
           :true_rate => self.true_rate,
           :predictivity => self.predictivity,
+	  :nr_predictions => nr_predictions,
         }
       end
 
@@ -112,26 +142,44 @@ module OpenTox
       # @return [Hash]
       def statistics
         self.warnings = []
-        self.rmse = 0
-        self.mae = 0
-        self.within_prediction_interval = 0
-        self.out_of_prediction_interval = 0
-        x = []
-        y = []
+        self.rmse = {:all =>0,:without_warnings => 0}
+        self.mae = {:all =>0,:without_warnings => 0}
+        self.within_prediction_interval = {:all =>0,:without_warnings => 0}
+        self.out_of_prediction_interval = {:all =>0,:without_warnings => 0}
+        x = {:all => [],:without_warnings => []}
+        y = {:all => [],:without_warnings => []}
+        self.nr_predictions = {:all =>0,:without_warnings => 0}
+	error = {}
         predictions.each do |cid,pred|
           if pred[:value] and pred[:measurements] 
-            x << pred[:measurements].median
-            y << pred[:value]
-            error = pred[:value]-pred[:measurements].median
-            self.rmse += error**2
-            self.mae += error.abs
+     	    self.nr_predictions[:all] +=1
+            x[:all] << pred[:measurements].median
+            y[:all] << pred[:value]
+            error[:all] = pred[:value]-pred[:measurements].median
+            self.rmse[:all] += error**2
+            self.mae[:all] += error.abs
             if pred[:prediction_interval]
               if pred[:measurements].median >= pred[:prediction_interval][0] and pred[:measurements].median <= pred[:prediction_interval][1]
-                self.within_prediction_interval += 1
+                self.within_prediction_interval[:all] += 1
               else
-                self.out_of_prediction_interval += 1
+                self.out_of_prediction_interval[:all] += 1
               end
             end
+	    if pred[:warnings].empty?
+     	      self.nr_predictions[:without_warnings] +=1
+       	      x[:without_warnings] << pred[:measurements].median
+	      y[:without_warnings] << pred[:value]
+	      error[:without_warnings] = pred[:value]-pred[:measurements].median
+	      self.rmse[:without_warnings] += error**2
+	      self.mae[:without_warnings] += error.abs
+	      if pred[:prediction_interval]
+	        if pred[:measurements].median >= pred[:prediction_interval][0] and pred[:measurements].median <= pred[:prediction_interval][1]
+		  self.within_prediction_interval[:without_warnings] += 1
+	        else
+		  self.out_of_prediction_interval[:without_warnings] += 1
+	        end
+	      end
+	    end
           else
             trd_id = model.training_dataset_id
             smiles = Compound.find(cid).smiles
@@ -139,12 +187,14 @@ module OpenTox
             $logger.debug "No training activities for #{smiles} in training dataset #{trd_id}."
           end
         end
-        R.assign "measurement", x
-        R.assign "prediction", y
-        R.eval "r <- cor(measurement,prediction,use='pairwise')"
-        self.r_squared = R.eval("r").to_ruby**2
-        self.mae = self.mae/predictions.size
-        self.rmse = Math.sqrt(self.rmse/predictions.size)
+	[:all,:without_warnings].each do |a|
+          R.assign "measurement", x[a]
+          R.assign "prediction", y[a]
+          R.eval "r <- cor(measurement,prediction,use='pairwise')"
+          self.r_squared[a] = R.eval("r").to_ruby**2
+	  self.mae[a] = self.mae[a]/self.nr_predictions[a]
+	  self.rmse[a] = Math.sqrt(self.rmse[a]/self.nr_predictions[a])
+	end
         $logger.debug "R^2 #{r_squared}"
         $logger.debug "RMSE #{rmse}"
         $logger.debug "MAE #{mae}"
@@ -157,6 +207,7 @@ module OpenTox
           :r_squared => r_squared,
           :within_prediction_interval => within_prediction_interval,
           :out_of_prediction_interval => out_of_prediction_interval,
+	  :nr_predictions => nr_predictions,
         }
       end
 
-- 
cgit v1.2.3


From 9d17895ab9e8cd31e0f32e8e622e13612ea5ff77 Mon Sep 17 00:00:00 2001
From: "helma@in-silico.ch" <helma@in-silico.ch>
Date: Fri, 12 Oct 2018 21:58:36 +0200
Subject: validation statistic fixes

---
 lib/validation-statistics.rb | 128 ++++++++++++++++++++++---------------------
 1 file changed, 66 insertions(+), 62 deletions(-)

(limited to 'lib/validation-statistics.rb')

diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index a69ede3..e440731 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -9,8 +9,7 @@ module OpenTox
         self.accept_values = model.prediction_feature.accept_values
         self.confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :without_warnings => Array.new(accept_values.size){Array.new(accept_values.size,0)}}
         self.weighted_confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :without_warnings => Array.new(accept_values.size){Array.new(accept_values.size,0)}}
-        #self.weighted_confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)}
-	self.nr_predictions = {:all => 0,:without_warnings => 0}
+        self.nr_predictions = {:all => 0,:without_warnings => 0}
         predictions.each do |cid,pred|
           # TODO
           # use predictions without probabilities (single neighbor)??
@@ -21,41 +20,41 @@ module OpenTox
               if pred[:value] == accept_values[0]
                 confusion_matrix[:all][0][0] += 1
                 weighted_confusion_matrix[:all][0][0] += pred[:probabilities][pred[:value]]
-		self.nr_predictions[:all] += 1
-		if pred[:warnings].empty?
+                self.nr_predictions[:all] += 1
+                if pred[:warnings].empty?
                   confusion_matrix[:without_warnings][0][0] += 1
                   weighted_confusion_matrix[:without_warnings][0][0] += pred[:probabilities][pred[:value]]
-		  self.nr_predictions[:without_warnings] += 1
-		end
+                  self.nr_predictions[:without_warnings] += 1
+                end
               elsif pred[:value] == accept_values[1]
                 confusion_matrix[:all][1][1] += 1
                 weighted_confusion_matrix[:all][1][1] += pred[:probabilities][pred[:value]]
-		self.nr_predictions[:all] += 1
-		if pred[:warnings].empty?
+                self.nr_predictions[:all] += 1
+                if pred[:warnings].empty?
                   confusion_matrix[:without_warnings][1][1] += 1
                   weighted_confusion_matrix[:without_warnings][1][1] += pred[:probabilities][pred[:value]]
-		  self.nr_predictions[:without_warnings] += 1
-		end
+                  self.nr_predictions[:without_warnings] += 1
+                end
               end
             elsif pred[:value] != m
               if pred[:value] == accept_values[0]
                 confusion_matrix[:all][0][1] += 1
                 weighted_confusion_matrix[:all][0][1] += pred[:probabilities][pred[:value]]
-		self.nr_predictions[:all] += 1
-		if pred[:warnings].empty?
+                self.nr_predictions[:all] += 1
+                if pred[:warnings].empty?
                   confusion_matrix[:without_warnings][0][1] += 1
                   weighted_confusion_matrix[:without_warnings][0][1] += pred[:probabilities][pred[:value]]
-		  self.nr_predictions[:without_warnings] += 1
-		end
+                  self.nr_predictions[:without_warnings] += 1
+                end
               elsif pred[:value] == accept_values[1]
                 confusion_matrix[:all][1][0] += 1
                 weighted_confusion_matrix[:all][1][0] += pred[:probabilities][pred[:value]]
-		self.nr_predictions[:all] += 1
-		if pred[:warnings].empty?
+                self.nr_predictions[:all] += 1
+                if pred[:warnings].empty?
                   confusion_matrix[:without_warnings][1][0] += 1
                   weighted_confusion_matrix[:without_warnings][1][0] += pred[:probabilities][pred[:value]]
-		  self.nr_predictions[:without_warnings] += 1
-		end
+                  self.nr_predictions[:without_warnings] += 1
+                end
               end
             end
           end
@@ -63,25 +62,25 @@ module OpenTox
         self.true_rate = {:all => {}, :without_warnings => {}}
         self.predictivity = {:all => {}, :without_warnings => {}}
         accept_values.each_with_index do |v,i|
-	  [:all,:without_warnings].each do |a|
-		  self.true_rate[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a][i].reduce(:+).to_f
-		  self.predictivity[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a].collect{|n| n[i]}.reduce(:+).to_f
-	  end
+          [:all,:without_warnings].each do |a|
+            self.true_rate[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a][i].reduce(:+).to_f
+            self.predictivity[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a].collect{|n| n[i]}.reduce(:+).to_f
+          end
         end
         confidence_sum = {:all => 0, :without_warnings => 0}
         [:all,:without_warnings].each do |a|
           weighted_confusion_matrix[a].each do |r|
             r.each do |c|
               confidence_sum[a] += c
-	    end
+            end
           end
         end
-	self.accuracy = {}
-	self.weighted_accuracy = {}
+        self.accuracy = {}
+        self.weighted_accuracy = {}
         [:all,:without_warnings].each do |a|
           self.accuracy[a] = (confusion_matrix[a][0][0]+confusion_matrix[a][1][1])/nr_predictions[a].to_f
           self.weighted_accuracy[a] = (weighted_confusion_matrix[a][0][0]+weighted_confusion_matrix[a][1][1])/confidence_sum[a].to_f
-	end
+        end
         $logger.debug "Accuracy #{accuracy}"
         save
         {
@@ -92,7 +91,7 @@ module OpenTox
           :weighted_accuracy => weighted_accuracy,
           :true_rate => self.true_rate,
           :predictivity => self.predictivity,
-	  :nr_predictions => nr_predictions,
+          :nr_predictions => nr_predictions,
         }
       end
 
@@ -143,19 +142,20 @@ module OpenTox
       def statistics
         self.warnings = []
         self.rmse = {:all =>0,:without_warnings => 0}
+        self.r_squared  = {:all =>0,:without_warnings => 0}
         self.mae = {:all =>0,:without_warnings => 0}
         self.within_prediction_interval = {:all =>0,:without_warnings => 0}
         self.out_of_prediction_interval = {:all =>0,:without_warnings => 0}
         x = {:all => [],:without_warnings => []}
         y = {:all => [],:without_warnings => []}
         self.nr_predictions = {:all =>0,:without_warnings => 0}
-	error = {}
         predictions.each do |cid,pred|
+          p pred
           if pred[:value] and pred[:measurements] 
-     	    self.nr_predictions[:all] +=1
+            self.nr_predictions[:all] +=1
             x[:all] << pred[:measurements].median
             y[:all] << pred[:value]
-            error[:all] = pred[:value]-pred[:measurements].median
+            error = pred[:value]-pred[:measurements].median
             self.rmse[:all] += error**2
             self.mae[:all] += error.abs
             if pred[:prediction_interval]
@@ -165,21 +165,21 @@ module OpenTox
                 self.out_of_prediction_interval[:all] += 1
               end
             end
-	    if pred[:warnings].empty?
-     	      self.nr_predictions[:without_warnings] +=1
-       	      x[:without_warnings] << pred[:measurements].median
-	      y[:without_warnings] << pred[:value]
-	      error[:without_warnings] = pred[:value]-pred[:measurements].median
-	      self.rmse[:without_warnings] += error**2
-	      self.mae[:without_warnings] += error.abs
-	      if pred[:prediction_interval]
-	        if pred[:measurements].median >= pred[:prediction_interval][0] and pred[:measurements].median <= pred[:prediction_interval][1]
-		  self.within_prediction_interval[:without_warnings] += 1
-	        else
-		  self.out_of_prediction_interval[:without_warnings] += 1
-	        end
-	      end
-	    end
+            if pred[:warnings].empty?
+              self.nr_predictions[:without_warnings] +=1
+              x[:without_warnings] << pred[:measurements].median
+              y[:without_warnings] << pred[:value]
+              error = pred[:value]-pred[:measurements].median
+              self.rmse[:without_warnings] += error**2
+              self.mae[:without_warnings] += error.abs
+              if pred[:prediction_interval]
+                if pred[:measurements].median >= pred[:prediction_interval][0] and pred[:measurements].median <= pred[:prediction_interval][1]
+                  self.within_prediction_interval[:without_warnings] += 1
+                else
+                  self.out_of_prediction_interval[:without_warnings] += 1
+                end
+              end
+            end
           else
             trd_id = model.training_dataset_id
             smiles = Compound.find(cid).smiles
@@ -187,36 +187,40 @@ module OpenTox
             $logger.debug "No training activities for #{smiles} in training dataset #{trd_id}."
           end
         end
-	[:all,:without_warnings].each do |a|
-          R.assign "measurement", x[a]
-          R.assign "prediction", y[a]
-          R.eval "r <- cor(measurement,prediction,use='pairwise')"
-          self.r_squared[a] = R.eval("r").to_ruby**2
-	  self.mae[a] = self.mae[a]/self.nr_predictions[a]
-	  self.rmse[a] = Math.sqrt(self.rmse[a]/self.nr_predictions[a])
-	end
+        [:all,:without_warnings].each do |a|
+          if x[a].size > 2
+            R.assign "measurement", x[a]
+            R.assign "prediction", y[a]
+            R.eval "r <- cor(measurement,prediction,use='pairwise')"
+            self.r_squared[a] = R.eval("r").to_ruby**2
+          else
+            self.r_squared[a] = 0
+          end
+          if self.nr_predictions[a] > 0
+            self.mae[a] = self.mae[a]/self.nr_predictions[a]
+            self.rmse[a] = Math.sqrt(self.rmse[a]/self.nr_predictions[a])
+          else
+            self.mae[a] = nil
+            self.rmse[a] = nil
+          end
+        end
         $logger.debug "R^2 #{r_squared}"
         $logger.debug "RMSE #{rmse}"
         $logger.debug "MAE #{mae}"
-        $logger.debug "#{percent_within_prediction_interval.round(2)}% of measurements within prediction interval"
+        $logger.debug "Nr predictions #{nr_predictions}"
+        $logger.debug "#{within_prediction_interval} measurements within prediction interval"
         $logger.debug "#{warnings}"
         save
         {
           :mae => mae,
           :rmse => rmse,
           :r_squared => r_squared,
-          :within_prediction_interval => within_prediction_interval,
+          :within_prediction_interval => self.within_prediction_interval,
           :out_of_prediction_interval => out_of_prediction_interval,
-	  :nr_predictions => nr_predictions,
+          :nr_predictions => nr_predictions,
         }
       end
 
-      # Get percentage of measurements within the prediction interval
-      # @return [Float]
-      def percent_within_prediction_interval
-        100*within_prediction_interval.to_f/(within_prediction_interval+out_of_prediction_interval)
-      end
-
       # Plot predicted vs measured values
       # @param [String,nil] format
       # @return [Blob]
-- 
cgit v1.2.3


From d9c9d78e49d886ea91386adbbd2b523347df226e Mon Sep 17 00:00:00 2001
From: "helma@in-silico.ch" <helma@in-silico.ch>
Date: Mon, 29 Oct 2018 20:34:39 +0100
Subject: dataset predictions fixed

---
 lib/validation-statistics.rb | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'lib/validation-statistics.rb')

diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index e440731..7bae891 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -150,8 +150,7 @@ module OpenTox
         y = {:all => [],:without_warnings => []}
         self.nr_predictions = {:all =>0,:without_warnings => 0}
         predictions.each do |cid,pred|
-          p pred
-          if pred[:value] and pred[:measurements] 
+          !if pred[:value] and pred[:measurements] and !pred[:measurements].empty?
             self.nr_predictions[:all] +=1
             x[:all] << pred[:measurements].median
             y[:all] << pred[:value]
-- 
cgit v1.2.3


From 3a9c9332b660d35720ad4fa1f55ee0883e53aecd Mon Sep 17 00:00:00 2001
From: "helma@in-silico.ch" <helma@in-silico.ch>
Date: Fri, 2 Nov 2018 20:34:44 +0100
Subject: warnings fixed, cleanup

---
 lib/validation-statistics.rb | 1 +
 1 file changed, 1 insertion(+)

(limited to 'lib/validation-statistics.rb')

diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index 7bae891..ad4c14d 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -82,6 +82,7 @@ module OpenTox
           self.weighted_accuracy[a] = (weighted_confusion_matrix[a][0][0]+weighted_confusion_matrix[a][1][1])/confidence_sum[a].to_f
         end
         $logger.debug "Accuracy #{accuracy}"
+        $logger.debug "Nr Predictions #{nr_predictions}"
         save
         {
           :accept_values => accept_values,
-- 
cgit v1.2.3


From 7e547fd4a296f497615a7805d565b378cb1bd7cd Mon Sep 17 00:00:00 2001
From: "helma@in-silico.ch" <helma@in-silico.ch>
Date: Wed, 14 Nov 2018 17:33:44 +0100
Subject: bad_request_error substituted with ArgumentError

---
 lib/validation-statistics.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/validation-statistics.rb')

diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index ad4c14d..f3e3af8 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -242,7 +242,7 @@ module OpenTox
             title = "log2(Net cell association [mL/ug(Mg)])"
           else
             title = feature.name
-            title += " [#{feature.unit}]" if feature.unit and !feature.unit.blank?
+            title += "-log10(#{feature.unit})" if feature.unit and !feature.unit.blank?
           end
           R.eval "image = qplot(prediction,measurement,main='#{title}',xlab='Prediction',ylab='Measurement',asp=1,xlim=range, ylim=range)"
           R.eval "image = image + geom_abline(intercept=0, slope=1)"
-- 
cgit v1.2.3


From 0882c2cd0de934d7377fc9d08c306be98612c88a Mon Sep 17 00:00:00 2001
From: "helma@in-silico.ch" <helma@in-silico.ch>
Date: Fri, 16 Nov 2018 18:42:42 +0100
Subject: real datasets for testing, test data cleanup, Daphnia import, upper
 and lower similarity thresholds

---
 lib/validation-statistics.rb | 163 ++++++++++++++++++-------------------------
 1 file changed, 67 insertions(+), 96 deletions(-)

(limited to 'lib/validation-statistics.rb')

diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index f3e3af8..8a8970e 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -7,79 +7,55 @@ module OpenTox
       # @return [Hash]
       def statistics 
         self.accept_values = model.prediction_feature.accept_values
-        self.confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :without_warnings => Array.new(accept_values.size){Array.new(accept_values.size,0)}}
-        self.weighted_confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :without_warnings => Array.new(accept_values.size){Array.new(accept_values.size,0)}}
-        self.nr_predictions = {:all => 0,:without_warnings => 0}
+        self.confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :confidence_high => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :confidence_low => Array.new(accept_values.size){Array.new(accept_values.size,0)}}
+        self.nr_predictions = {:all => 0,:confidence_high => 0,:confidence_low => 0}
         predictions.each do |cid,pred|
-          # TODO
-          # use predictions without probabilities (single neighbor)??
-          # use measured majority class??
+          # TODO: use measured majority class or all measurements??
           if pred[:measurements].uniq.size == 1 and pred[:probabilities]
             m = pred[:measurements].first
             if pred[:value] == m
-              if pred[:value] == accept_values[0]
-                confusion_matrix[:all][0][0] += 1
-                weighted_confusion_matrix[:all][0][0] += pred[:probabilities][pred[:value]]
-                self.nr_predictions[:all] += 1
-                if pred[:warnings].empty?
-                  confusion_matrix[:without_warnings][0][0] += 1
-                  weighted_confusion_matrix[:without_warnings][0][0] += pred[:probabilities][pred[:value]]
-                  self.nr_predictions[:without_warnings] += 1
-                end
-              elsif pred[:value] == accept_values[1]
-                confusion_matrix[:all][1][1] += 1
-                weighted_confusion_matrix[:all][1][1] += pred[:probabilities][pred[:value]]
-                self.nr_predictions[:all] += 1
-                if pred[:warnings].empty?
-                  confusion_matrix[:without_warnings][1][1] += 1
-                  weighted_confusion_matrix[:without_warnings][1][1] += pred[:probabilities][pred[:value]]
-                  self.nr_predictions[:without_warnings] += 1
+              accept_values.each_with_index do |v,i|
+                if pred[:value] == v
+                  confusion_matrix[:all][i][i] += 1
+                  self.nr_predictions[:all] += 1
+                  if pred[:confidence].match(/High/i)
+                    confusion_matrix[:confidence_high][i][i] += 1
+                    self.nr_predictions[:confidence_high] += 1
+                  elsif pred[:confidence].match(/Low/i)
+                    confusion_matrix[:confidence_low][i][i] += 1
+                    self.nr_predictions[:confidence_low] += 1
+                  end
                 end
               end
             elsif pred[:value] != m
-              if pred[:value] == accept_values[0]
-                confusion_matrix[:all][0][1] += 1
-                weighted_confusion_matrix[:all][0][1] += pred[:probabilities][pred[:value]]
-                self.nr_predictions[:all] += 1
-                if pred[:warnings].empty?
-                  confusion_matrix[:without_warnings][0][1] += 1
-                  weighted_confusion_matrix[:without_warnings][0][1] += pred[:probabilities][pred[:value]]
-                  self.nr_predictions[:without_warnings] += 1
-                end
-              elsif pred[:value] == accept_values[1]
-                confusion_matrix[:all][1][0] += 1
-                weighted_confusion_matrix[:all][1][0] += pred[:probabilities][pred[:value]]
-                self.nr_predictions[:all] += 1
-                if pred[:warnings].empty?
-                  confusion_matrix[:without_warnings][1][0] += 1
-                  weighted_confusion_matrix[:without_warnings][1][0] += pred[:probabilities][pred[:value]]
-                  self.nr_predictions[:without_warnings] += 1
+              accept_values.each_with_index do |v,i|
+                if pred[:value] == v
+                  confusion_matrix[:all][i][(i+1)%2] += 1
+                  self.nr_predictions[:all] += 1
+                  if pred[:confidence].match(/High/i)
+                    confusion_matrix[:confidence_high][i][(i+1)%2] += 1
+                    self.nr_predictions[:confidence_high] += 1
+                  elsif pred[:confidence].match(/Low/i)
+                    confusion_matrix[:confidence_low][i][(i+1)%2] += 1
+                    self.nr_predictions[:confidence_low] += 1
+                  end
                 end
               end
             end
           end
         end
-        self.true_rate = {:all => {}, :without_warnings => {}}
-        self.predictivity = {:all => {}, :without_warnings => {}}
+
+        self.true_rate = {:all => {}, :confidence_high => {}, :confidence_low => {}}
+        self.predictivity = {:all => {}, :confidence_high => {}, :confidence_low => {}}
         accept_values.each_with_index do |v,i|
-          [:all,:without_warnings].each do |a|
+          [:all,:confidence_high,:confidence_low].each do |a|
             self.true_rate[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a][i].reduce(:+).to_f
             self.predictivity[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a].collect{|n| n[i]}.reduce(:+).to_f
           end
         end
-        confidence_sum = {:all => 0, :without_warnings => 0}
-        [:all,:without_warnings].each do |a|
-          weighted_confusion_matrix[a].each do |r|
-            r.each do |c|
-              confidence_sum[a] += c
-            end
-          end
-        end
         self.accuracy = {}
-        self.weighted_accuracy = {}
-        [:all,:without_warnings].each do |a|
+        [:all,:confidence_high,:confidence_low].each do |a|
           self.accuracy[a] = (confusion_matrix[a][0][0]+confusion_matrix[a][1][1])/nr_predictions[a].to_f
-          self.weighted_accuracy[a] = (weighted_confusion_matrix[a][0][0]+weighted_confusion_matrix[a][1][1])/confidence_sum[a].to_f
         end
         $logger.debug "Accuracy #{accuracy}"
         $logger.debug "Nr Predictions #{nr_predictions}"
@@ -87,9 +63,7 @@ module OpenTox
         {
           :accept_values => accept_values,
           :confusion_matrix => confusion_matrix,
-          :weighted_confusion_matrix => weighted_confusion_matrix,
           :accuracy => accuracy,
-          :weighted_accuracy => weighted_accuracy,
           :true_rate => self.true_rate,
           :predictivity => self.predictivity,
           :nr_predictions => nr_predictions,
@@ -138,47 +112,27 @@ module OpenTox
     # Statistical evaluation of regression validations
     module RegressionStatistics
 
+      attr_accessor :x, :y
+
       # Get statistics
       # @return [Hash]
       def statistics
         self.warnings = []
-        self.rmse = {:all =>0,:without_warnings => 0}
-        self.r_squared  = {:all =>0,:without_warnings => 0}
-        self.mae = {:all =>0,:without_warnings => 0}
-        self.within_prediction_interval = {:all =>0,:without_warnings => 0}
-        self.out_of_prediction_interval = {:all =>0,:without_warnings => 0}
-        x = {:all => [],:without_warnings => []}
-        y = {:all => [],:without_warnings => []}
-        self.nr_predictions = {:all =>0,:without_warnings => 0}
+        self.rmse = {:all =>0,:confidence_high => 0,:confidence_low => 0}
+        self.r_squared  = {:all =>0,:confidence_high => 0,:confidence_low => 0}
+        self.mae = {:all =>0,:confidence_high => 0,:confidence_low => 0}
+        self.within_prediction_interval = {:all =>0,:confidence_high => 0,:confidence_low => 0}
+        self.out_of_prediction_interval = {:all =>0,:confidence_high => 0,:confidence_low => 0}
+        @x = {:all => [],:confidence_high => [],:confidence_low => []}
+        @y = {:all => [],:confidence_high => [],:confidence_low => []}
+        self.nr_predictions = {:all =>0,:confidence_high => 0,:confidence_low => 0}
         predictions.each do |cid,pred|
           !if pred[:value] and pred[:measurements] and !pred[:measurements].empty?
-            self.nr_predictions[:all] +=1
-            x[:all] << pred[:measurements].median
-            y[:all] << pred[:value]
-            error = pred[:value]-pred[:measurements].median
-            self.rmse[:all] += error**2
-            self.mae[:all] += error.abs
-            if pred[:prediction_interval]
-              if pred[:measurements].median >= pred[:prediction_interval][0] and pred[:measurements].median <= pred[:prediction_interval][1]
-                self.within_prediction_interval[:all] += 1
-              else
-                self.out_of_prediction_interval[:all] += 1
-              end
-            end
-            if pred[:warnings].empty?
-              self.nr_predictions[:without_warnings] +=1
-              x[:without_warnings] << pred[:measurements].median
-              y[:without_warnings] << pred[:value]
-              error = pred[:value]-pred[:measurements].median
-              self.rmse[:without_warnings] += error**2
-              self.mae[:without_warnings] += error.abs
-              if pred[:prediction_interval]
-                if pred[:measurements].median >= pred[:prediction_interval][0] and pred[:measurements].median <= pred[:prediction_interval][1]
-                  self.within_prediction_interval[:without_warnings] += 1
-                else
-                  self.out_of_prediction_interval[:without_warnings] += 1
-                end
-              end
+            insert_prediction pred, :all
+            if pred[:confidence].match(/High/i)
+              insert_prediction pred, :confidence_high
+            elsif pred[:confidence].match(/Low/i)
+              insert_prediction pred, :confidence_low
             end
           else
             trd_id = model.training_dataset_id
@@ -187,10 +141,10 @@ module OpenTox
             $logger.debug "No training activities for #{smiles} in training dataset #{trd_id}."
           end
         end
-        [:all,:without_warnings].each do |a|
-          if x[a].size > 2
-            R.assign "measurement", x[a]
-            R.assign "prediction", y[a]
+        [:all,:confidence_high,:confidence_low].each do |a|
+          if @x[a].size > 2
+            R.assign "measurement", @x[a]
+            R.assign "prediction", @y[a]
             R.eval "r <- cor(measurement,prediction,use='pairwise')"
             self.r_squared[a] = R.eval("r").to_ruby**2
           else
@@ -209,7 +163,6 @@ module OpenTox
         $logger.debug "MAE #{mae}"
         $logger.debug "Nr predictions #{nr_predictions}"
         $logger.debug "#{within_prediction_interval} measurements within prediction interval"
-        $logger.debug "#{warnings}"
         save
         {
           :mae => mae,
@@ -270,6 +223,24 @@ module OpenTox
         end
         worst_predictions.sort_by{|sid,p| p["distance_prediction_interval"] }.to_h
       end
+
+      private
+
+      def insert_prediction prediction, type
+        self.nr_predictions[type] +=1
+        @x[type] << prediction[:measurements].median
+        @y[type] << prediction[:value]
+        error = prediction[:value]-prediction[:measurements].median
+        self.rmse[type] += error**2
+        self.mae[type] += error.abs
+        if prediction[:prediction_interval]
+          if prediction[:measurements].median >= prediction[:prediction_interval][0] and prediction[:measurements].median <= prediction[:prediction_interval][1]
+            self.within_prediction_interval[type] += 1
+          else
+            self.out_of_prediction_interval[type] += 1
+          end
+        end
+      end
     end
   end
 end
-- 
cgit v1.2.3


From 455da06aa6459da0d25b286ca6cb866ff64c4c34 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Thu, 20 Jun 2019 22:01:50 +0200
Subject: separate csv serialisations for batch predictions and training data,
 repeated measurements in mutagenicity dataset fixed, daphnia import fixed,
 CENTRAL_MONGO_IP removed

---
 lib/validation-statistics.rb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/validation-statistics.rb')

diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index 8a8970e..d603294 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -18,7 +18,7 @@ module OpenTox
                 if pred[:value] == v
                   confusion_matrix[:all][i][i] += 1
                   self.nr_predictions[:all] += 1
-                  if pred[:confidence].match(/High/i)
+                  if pred[:confidence].match(/Similar/i)
                     confusion_matrix[:confidence_high][i][i] += 1
                     self.nr_predictions[:confidence_high] += 1
                   elsif pred[:confidence].match(/Low/i)
@@ -32,7 +32,7 @@ module OpenTox
                 if pred[:value] == v
                   confusion_matrix[:all][i][(i+1)%2] += 1
                   self.nr_predictions[:all] += 1
-                  if pred[:confidence].match(/High/i)
+                  if pred[:confidence].match(/Similar/i)
                     confusion_matrix[:confidence_high][i][(i+1)%2] += 1
                     self.nr_predictions[:confidence_high] += 1
                   elsif pred[:confidence].match(/Low/i)
-- 
cgit v1.2.3


From b536a45cf18b070cec3f9cb8a44fdac0bfa3c58e Mon Sep 17 00:00:00 2001
From: gebele <gebele@in-silico.ch>
Date: Thu, 27 Jun 2019 14:08:57 +0000
Subject: fixed confidence value for cv stats; added tests

---
 lib/validation-statistics.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/validation-statistics.rb')

diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index d603294..2dd9c7a 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -129,7 +129,7 @@ module OpenTox
         predictions.each do |cid,pred|
           !if pred[:value] and pred[:measurements] and !pred[:measurements].empty?
             insert_prediction pred, :all
-            if pred[:confidence].match(/High/i)
+            if pred[:confidence].match(/Similar/i)
               insert_prediction pred, :confidence_high
             elsif pred[:confidence].match(/Low/i)
               insert_prediction pred, :confidence_low
-- 
cgit v1.2.3


From 29c3cb2e8a8bbfb12178785f81d1cb324dc328e7 Mon Sep 17 00:00:00 2001
From: gebele <gebele@in-silico.ch>
Date: Fri, 12 Jul 2019 12:20:20 +0000
Subject: fixed, probability plot format was not taken from params for filename

---
 lib/validation-statistics.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/validation-statistics.rb')

diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index 2dd9c7a..4910573 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -101,7 +101,7 @@ module OpenTox
           R.assign "probability", probabilities
           R.eval "image = qplot(probability,accuracy)+ylab('Accumulated accuracy')+xlab('Prediction probability')+ylim(c(0,1))+scale_x_reverse()+geom_line()"
           R.eval "ggsave(file='#{tmpfile}', plot=image)"
-          file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_probability_plot.svg")
+          file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_probability_plot.#{format}")
           plot_id = $gridfs.insert_one(file)
           update(:probability_plot_id => plot_id)
         #end
-- 
cgit v1.2.3


From 581707afa48711cfd2f929a91a96e4f5041b9ba2 Mon Sep 17 00:00:00 2001
From: gebele <gebele@in-silico.ch>
Date: Tue, 16 Jul 2019 14:03:03 +0000
Subject: always render new correlation plot; keep same handling as for
 probability plot

---
 lib/validation-statistics.rb | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'lib/validation-statistics.rb')

diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index 4910573..5fd9985 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -178,8 +178,12 @@ module OpenTox
       # @param [String,nil] format
       # @return [Blob]
       def correlation_plot format: "png"
-        unless correlation_plot_id
-          tmpfile = "/tmp/#{id.to_s}_correlation.#{format}"
+        #unless correlation_plot_id
+          #tmpfile = "/tmp/#{id.to_s}_correlation.#{format}"
+          tmpdir = "/tmp"
+          #p tmpdir
+          FileUtils.mkdir_p tmpdir
+          tmpfile = File.join(tmpdir,"#{id.to_s}_correlation.#{format}")
           x = []
           y = []
           feature = Feature.find(predictions.first.last["prediction_feature_id"])
@@ -203,7 +207,7 @@ module OpenTox
           file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{id.to_s}_correlation_plot.#{format}")
           plot_id = $gridfs.insert_one(file)
           update(:correlation_plot_id => plot_id)
-        end
+        #end
         $gridfs.find_one(_id: correlation_plot_id).data
       end
 
-- 
cgit v1.2.3