summaryrefslogtreecommitdiff
path: root/lib/validation-statistics.rb
diff options
context:
space:
mode:
authorhelma@in-silico.ch <helma@in-silico.ch>2018-10-12 21:58:36 +0200
committerhelma@in-silico.ch <helma@in-silico.ch>2018-10-12 21:58:36 +0200
commit9d17895ab9e8cd31e0f32e8e622e13612ea5ff77 (patch)
treed6984f0bd81679228d0dfd903aad09c7005f1c4c /lib/validation-statistics.rb
parentde763211bd2b6451e3a8dc20eb95a3ecf72bef17 (diff)
validation statistic fixes
Diffstat (limited to 'lib/validation-statistics.rb')
-rw-r--r--lib/validation-statistics.rb128
1 files changed, 66 insertions, 62 deletions
diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
index a69ede3..e440731 100644
--- a/lib/validation-statistics.rb
+++ b/lib/validation-statistics.rb
@@ -9,8 +9,7 @@ module OpenTox
self.accept_values = model.prediction_feature.accept_values
self.confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :without_warnings => Array.new(accept_values.size){Array.new(accept_values.size,0)}}
self.weighted_confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :without_warnings => Array.new(accept_values.size){Array.new(accept_values.size,0)}}
- #self.weighted_confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)}
- self.nr_predictions = {:all => 0,:without_warnings => 0}
+ self.nr_predictions = {:all => 0,:without_warnings => 0}
predictions.each do |cid,pred|
# TODO
# use predictions without probabilities (single neighbor)??
@@ -21,41 +20,41 @@ module OpenTox
if pred[:value] == accept_values[0]
confusion_matrix[:all][0][0] += 1
weighted_confusion_matrix[:all][0][0] += pred[:probabilities][pred[:value]]
- self.nr_predictions[:all] += 1
- if pred[:warnings].empty?
+ self.nr_predictions[:all] += 1
+ if pred[:warnings].empty?
confusion_matrix[:without_warnings][0][0] += 1
weighted_confusion_matrix[:without_warnings][0][0] += pred[:probabilities][pred[:value]]
- self.nr_predictions[:without_warnings] += 1
- end
+ self.nr_predictions[:without_warnings] += 1
+ end
elsif pred[:value] == accept_values[1]
confusion_matrix[:all][1][1] += 1
weighted_confusion_matrix[:all][1][1] += pred[:probabilities][pred[:value]]
- self.nr_predictions[:all] += 1
- if pred[:warnings].empty?
+ self.nr_predictions[:all] += 1
+ if pred[:warnings].empty?
confusion_matrix[:without_warnings][1][1] += 1
weighted_confusion_matrix[:without_warnings][1][1] += pred[:probabilities][pred[:value]]
- self.nr_predictions[:without_warnings] += 1
- end
+ self.nr_predictions[:without_warnings] += 1
+ end
end
elsif pred[:value] != m
if pred[:value] == accept_values[0]
confusion_matrix[:all][0][1] += 1
weighted_confusion_matrix[:all][0][1] += pred[:probabilities][pred[:value]]
- self.nr_predictions[:all] += 1
- if pred[:warnings].empty?
+ self.nr_predictions[:all] += 1
+ if pred[:warnings].empty?
confusion_matrix[:without_warnings][0][1] += 1
weighted_confusion_matrix[:without_warnings][0][1] += pred[:probabilities][pred[:value]]
- self.nr_predictions[:without_warnings] += 1
- end
+ self.nr_predictions[:without_warnings] += 1
+ end
elsif pred[:value] == accept_values[1]
confusion_matrix[:all][1][0] += 1
weighted_confusion_matrix[:all][1][0] += pred[:probabilities][pred[:value]]
- self.nr_predictions[:all] += 1
- if pred[:warnings].empty?
+ self.nr_predictions[:all] += 1
+ if pred[:warnings].empty?
confusion_matrix[:without_warnings][1][0] += 1
weighted_confusion_matrix[:without_warnings][1][0] += pred[:probabilities][pred[:value]]
- self.nr_predictions[:without_warnings] += 1
- end
+ self.nr_predictions[:without_warnings] += 1
+ end
end
end
end
@@ -63,25 +62,25 @@ module OpenTox
self.true_rate = {:all => {}, :without_warnings => {}}
self.predictivity = {:all => {}, :without_warnings => {}}
accept_values.each_with_index do |v,i|
- [:all,:without_warnings].each do |a|
- self.true_rate[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a][i].reduce(:+).to_f
- self.predictivity[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a].collect{|n| n[i]}.reduce(:+).to_f
- end
+ [:all,:without_warnings].each do |a|
+ self.true_rate[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a][i].reduce(:+).to_f
+ self.predictivity[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a].collect{|n| n[i]}.reduce(:+).to_f
+ end
end
confidence_sum = {:all => 0, :without_warnings => 0}
[:all,:without_warnings].each do |a|
weighted_confusion_matrix[a].each do |r|
r.each do |c|
confidence_sum[a] += c
- end
+ end
end
end
- self.accuracy = {}
- self.weighted_accuracy = {}
+ self.accuracy = {}
+ self.weighted_accuracy = {}
[:all,:without_warnings].each do |a|
self.accuracy[a] = (confusion_matrix[a][0][0]+confusion_matrix[a][1][1])/nr_predictions[a].to_f
self.weighted_accuracy[a] = (weighted_confusion_matrix[a][0][0]+weighted_confusion_matrix[a][1][1])/confidence_sum[a].to_f
- end
+ end
$logger.debug "Accuracy #{accuracy}"
save
{
@@ -92,7 +91,7 @@ module OpenTox
:weighted_accuracy => weighted_accuracy,
:true_rate => self.true_rate,
:predictivity => self.predictivity,
- :nr_predictions => nr_predictions,
+ :nr_predictions => nr_predictions,
}
end
@@ -143,19 +142,20 @@ module OpenTox
def statistics
self.warnings = []
self.rmse = {:all =>0,:without_warnings => 0}
+ self.r_squared = {:all =>0,:without_warnings => 0}
self.mae = {:all =>0,:without_warnings => 0}
self.within_prediction_interval = {:all =>0,:without_warnings => 0}
self.out_of_prediction_interval = {:all =>0,:without_warnings => 0}
x = {:all => [],:without_warnings => []}
y = {:all => [],:without_warnings => []}
self.nr_predictions = {:all =>0,:without_warnings => 0}
- error = {}
predictions.each do |cid,pred|
+ p pred
if pred[:value] and pred[:measurements]
- self.nr_predictions[:all] +=1
+ self.nr_predictions[:all] +=1
x[:all] << pred[:measurements].median
y[:all] << pred[:value]
- error[:all] = pred[:value]-pred[:measurements].median
+ error = pred[:value]-pred[:measurements].median
self.rmse[:all] += error**2
self.mae[:all] += error.abs
if pred[:prediction_interval]
@@ -165,21 +165,21 @@ module OpenTox
self.out_of_prediction_interval[:all] += 1
end
end
- if pred[:warnings].empty?
- self.nr_predictions[:without_warnings] +=1
- x[:without_warnings] << pred[:measurements].median
- y[:without_warnings] << pred[:value]
- error[:without_warnings] = pred[:value]-pred[:measurements].median
- self.rmse[:without_warnings] += error**2
- self.mae[:without_warnings] += error.abs
- if pred[:prediction_interval]
- if pred[:measurements].median >= pred[:prediction_interval][0] and pred[:measurements].median <= pred[:prediction_interval][1]
- self.within_prediction_interval[:without_warnings] += 1
- else
- self.out_of_prediction_interval[:without_warnings] += 1
- end
- end
- end
+ if pred[:warnings].empty?
+ self.nr_predictions[:without_warnings] +=1
+ x[:without_warnings] << pred[:measurements].median
+ y[:without_warnings] << pred[:value]
+ error = pred[:value]-pred[:measurements].median
+ self.rmse[:without_warnings] += error**2
+ self.mae[:without_warnings] += error.abs
+ if pred[:prediction_interval]
+ if pred[:measurements].median >= pred[:prediction_interval][0] and pred[:measurements].median <= pred[:prediction_interval][1]
+ self.within_prediction_interval[:without_warnings] += 1
+ else
+ self.out_of_prediction_interval[:without_warnings] += 1
+ end
+ end
+ end
else
trd_id = model.training_dataset_id
smiles = Compound.find(cid).smiles
@@ -187,36 +187,40 @@ module OpenTox
$logger.debug "No training activities for #{smiles} in training dataset #{trd_id}."
end
end
- [:all,:without_warnings].each do |a|
- R.assign "measurement", x[a]
- R.assign "prediction", y[a]
- R.eval "r <- cor(measurement,prediction,use='pairwise')"
- self.r_squared[a] = R.eval("r").to_ruby**2
- self.mae[a] = self.mae[a]/self.nr_predictions[a]
- self.rmse[a] = Math.sqrt(self.rmse[a]/self.nr_predictions[a])
- end
+ [:all,:without_warnings].each do |a|
+ if x[a].size > 2
+ R.assign "measurement", x[a]
+ R.assign "prediction", y[a]
+ R.eval "r <- cor(measurement,prediction,use='pairwise')"
+ self.r_squared[a] = R.eval("r").to_ruby**2
+ else
+ self.r_squared[a] = 0
+ end
+ if self.nr_predictions[a] > 0
+ self.mae[a] = self.mae[a]/self.nr_predictions[a]
+ self.rmse[a] = Math.sqrt(self.rmse[a]/self.nr_predictions[a])
+ else
+ self.mae[a] = nil
+ self.rmse[a] = nil
+ end
+ end
$logger.debug "R^2 #{r_squared}"
$logger.debug "RMSE #{rmse}"
$logger.debug "MAE #{mae}"
- $logger.debug "#{percent_within_prediction_interval.round(2)}% of measurements within prediction interval"
+ $logger.debug "Nr predictions #{nr_predictions}"
+ $logger.debug "#{within_prediction_interval} measurements within prediction interval"
$logger.debug "#{warnings}"
save
{
:mae => mae,
:rmse => rmse,
:r_squared => r_squared,
- :within_prediction_interval => within_prediction_interval,
+ :within_prediction_interval => self.within_prediction_interval,
:out_of_prediction_interval => out_of_prediction_interval,
- :nr_predictions => nr_predictions,
+ :nr_predictions => nr_predictions,
}
end
- # Get percentage of measurements within the prediction interval
- # @return [Float]
- def percent_within_prediction_interval
- 100*within_prediction_interval.to_f/(within_prediction_interval+out_of_prediction_interval)
- end
-
# Plot predicted vs measured values
# @param [String,nil] format
# @return [Blob]