From 9d17895ab9e8cd31e0f32e8e622e13612ea5ff77 Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Fri, 12 Oct 2018 21:58:36 +0200 Subject: validation statistic fixes --- lib/validation-statistics.rb | 128 ++++++++++++++++++++++--------------------- 1 file changed, 66 insertions(+), 62 deletions(-) (limited to 'lib/validation-statistics.rb') diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb index a69ede3..e440731 100644 --- a/lib/validation-statistics.rb +++ b/lib/validation-statistics.rb @@ -9,8 +9,7 @@ module OpenTox self.accept_values = model.prediction_feature.accept_values self.confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :without_warnings => Array.new(accept_values.size){Array.new(accept_values.size,0)}} self.weighted_confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :without_warnings => Array.new(accept_values.size){Array.new(accept_values.size,0)}} - #self.weighted_confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)} - self.nr_predictions = {:all => 0,:without_warnings => 0} + self.nr_predictions = {:all => 0,:without_warnings => 0} predictions.each do |cid,pred| # TODO # use predictions without probabilities (single neighbor)?? @@ -21,41 +20,41 @@ module OpenTox if pred[:value] == accept_values[0] confusion_matrix[:all][0][0] += 1 weighted_confusion_matrix[:all][0][0] += pred[:probabilities][pred[:value]] - self.nr_predictions[:all] += 1 - if pred[:warnings].empty? + self.nr_predictions[:all] += 1 + if pred[:warnings].empty? confusion_matrix[:without_warnings][0][0] += 1 weighted_confusion_matrix[:without_warnings][0][0] += pred[:probabilities][pred[:value]] - self.nr_predictions[:without_warnings] += 1 - end + self.nr_predictions[:without_warnings] += 1 + end elsif pred[:value] == accept_values[1] confusion_matrix[:all][1][1] += 1 weighted_confusion_matrix[:all][1][1] += pred[:probabilities][pred[:value]] - self.nr_predictions[:all] += 1 - if pred[:warnings].empty? + self.nr_predictions[:all] += 1 + if pred[:warnings].empty? confusion_matrix[:without_warnings][1][1] += 1 weighted_confusion_matrix[:without_warnings][1][1] += pred[:probabilities][pred[:value]] - self.nr_predictions[:without_warnings] += 1 - end + self.nr_predictions[:without_warnings] += 1 + end end elsif pred[:value] != m if pred[:value] == accept_values[0] confusion_matrix[:all][0][1] += 1 weighted_confusion_matrix[:all][0][1] += pred[:probabilities][pred[:value]] - self.nr_predictions[:all] += 1 - if pred[:warnings].empty? + self.nr_predictions[:all] += 1 + if pred[:warnings].empty? confusion_matrix[:without_warnings][0][1] += 1 weighted_confusion_matrix[:without_warnings][0][1] += pred[:probabilities][pred[:value]] - self.nr_predictions[:without_warnings] += 1 - end + self.nr_predictions[:without_warnings] += 1 + end elsif pred[:value] == accept_values[1] confusion_matrix[:all][1][0] += 1 weighted_confusion_matrix[:all][1][0] += pred[:probabilities][pred[:value]] - self.nr_predictions[:all] += 1 - if pred[:warnings].empty? + self.nr_predictions[:all] += 1 + if pred[:warnings].empty? confusion_matrix[:without_warnings][1][0] += 1 weighted_confusion_matrix[:without_warnings][1][0] += pred[:probabilities][pred[:value]] - self.nr_predictions[:without_warnings] += 1 - end + self.nr_predictions[:without_warnings] += 1 + end end end end @@ -63,25 +62,25 @@ module OpenTox self.true_rate = {:all => {}, :without_warnings => {}} self.predictivity = {:all => {}, :without_warnings => {}} accept_values.each_with_index do |v,i| - [:all,:without_warnings].each do |a| - self.true_rate[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a][i].reduce(:+).to_f - self.predictivity[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a].collect{|n| n[i]}.reduce(:+).to_f - end + [:all,:without_warnings].each do |a| + self.true_rate[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a][i].reduce(:+).to_f + self.predictivity[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a].collect{|n| n[i]}.reduce(:+).to_f + end end confidence_sum = {:all => 0, :without_warnings => 0} [:all,:without_warnings].each do |a| weighted_confusion_matrix[a].each do |r| r.each do |c| confidence_sum[a] += c - end + end end end - self.accuracy = {} - self.weighted_accuracy = {} + self.accuracy = {} + self.weighted_accuracy = {} [:all,:without_warnings].each do |a| self.accuracy[a] = (confusion_matrix[a][0][0]+confusion_matrix[a][1][1])/nr_predictions[a].to_f self.weighted_accuracy[a] = (weighted_confusion_matrix[a][0][0]+weighted_confusion_matrix[a][1][1])/confidence_sum[a].to_f - end + end $logger.debug "Accuracy #{accuracy}" save { @@ -92,7 +91,7 @@ module OpenTox :weighted_accuracy => weighted_accuracy, :true_rate => self.true_rate, :predictivity => self.predictivity, - :nr_predictions => nr_predictions, + :nr_predictions => nr_predictions, } end @@ -143,19 +142,20 @@ module OpenTox def statistics self.warnings = [] self.rmse = {:all =>0,:without_warnings => 0} + self.r_squared = {:all =>0,:without_warnings => 0} self.mae = {:all =>0,:without_warnings => 0} self.within_prediction_interval = {:all =>0,:without_warnings => 0} self.out_of_prediction_interval = {:all =>0,:without_warnings => 0} x = {:all => [],:without_warnings => []} y = {:all => [],:without_warnings => []} self.nr_predictions = {:all =>0,:without_warnings => 0} - error = {} predictions.each do |cid,pred| + p pred if pred[:value] and pred[:measurements] - self.nr_predictions[:all] +=1 + self.nr_predictions[:all] +=1 x[:all] << pred[:measurements].median y[:all] << pred[:value] - error[:all] = pred[:value]-pred[:measurements].median + error = pred[:value]-pred[:measurements].median self.rmse[:all] += error**2 self.mae[:all] += error.abs if pred[:prediction_interval] @@ -165,21 +165,21 @@ module OpenTox self.out_of_prediction_interval[:all] += 1 end end - if pred[:warnings].empty? - self.nr_predictions[:without_warnings] +=1 - x[:without_warnings] << pred[:measurements].median - y[:without_warnings] << pred[:value] - error[:without_warnings] = pred[:value]-pred[:measurements].median - self.rmse[:without_warnings] += error**2 - self.mae[:without_warnings] += error.abs - if pred[:prediction_interval] - if pred[:measurements].median >= pred[:prediction_interval][0] and pred[:measurements].median <= pred[:prediction_interval][1] - self.within_prediction_interval[:without_warnings] += 1 - else - self.out_of_prediction_interval[:without_warnings] += 1 - end - end - end + if pred[:warnings].empty? + self.nr_predictions[:without_warnings] +=1 + x[:without_warnings] << pred[:measurements].median + y[:without_warnings] << pred[:value] + error = pred[:value]-pred[:measurements].median + self.rmse[:without_warnings] += error**2 + self.mae[:without_warnings] += error.abs + if pred[:prediction_interval] + if pred[:measurements].median >= pred[:prediction_interval][0] and pred[:measurements].median <= pred[:prediction_interval][1] + self.within_prediction_interval[:without_warnings] += 1 + else + self.out_of_prediction_interval[:without_warnings] += 1 + end + end + end else trd_id = model.training_dataset_id smiles = Compound.find(cid).smiles @@ -187,36 +187,40 @@ module OpenTox $logger.debug "No training activities for #{smiles} in training dataset #{trd_id}." end end - [:all,:without_warnings].each do |a| - R.assign "measurement", x[a] - R.assign "prediction", y[a] - R.eval "r <- cor(measurement,prediction,use='pairwise')" - self.r_squared[a] = R.eval("r").to_ruby**2 - self.mae[a] = self.mae[a]/self.nr_predictions[a] - self.rmse[a] = Math.sqrt(self.rmse[a]/self.nr_predictions[a]) - end + [:all,:without_warnings].each do |a| + if x[a].size > 2 + R.assign "measurement", x[a] + R.assign "prediction", y[a] + R.eval "r <- cor(measurement,prediction,use='pairwise')" + self.r_squared[a] = R.eval("r").to_ruby**2 + else + self.r_squared[a] = 0 + end + if self.nr_predictions[a] > 0 + self.mae[a] = self.mae[a]/self.nr_predictions[a] + self.rmse[a] = Math.sqrt(self.rmse[a]/self.nr_predictions[a]) + else + self.mae[a] = nil + self.rmse[a] = nil + end + end $logger.debug "R^2 #{r_squared}" $logger.debug "RMSE #{rmse}" $logger.debug "MAE #{mae}" - $logger.debug "#{percent_within_prediction_interval.round(2)}% of measurements within prediction interval" + $logger.debug "Nr predictions #{nr_predictions}" + $logger.debug "#{within_prediction_interval} measurements within prediction interval" $logger.debug "#{warnings}" save { :mae => mae, :rmse => rmse, :r_squared => r_squared, - :within_prediction_interval => within_prediction_interval, + :within_prediction_interval => self.within_prediction_interval, :out_of_prediction_interval => out_of_prediction_interval, - :nr_predictions => nr_predictions, + :nr_predictions => nr_predictions, } end - # Get percentage of measurements within the prediction interval - # @return [Float] - def percent_within_prediction_interval - 100*within_prediction_interval.to_f/(within_prediction_interval+out_of_prediction_interval) - end - # Plot predicted vs measured values # @param [String,nil] format # @return [Blob] -- cgit v1.2.3