summaryrefslogtreecommitdiff
path: root/lib/validation-statistics.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-04-13 15:35:01 +0200
committerChristoph Helma <helma@in-silico.ch>2016-04-13 15:35:01 +0200
commit815cf6ba1543fc323eb7cbd1202fadbf03bcfbca (patch)
tree23a69a2dc885649df7995180a5fcbb9c8c5cb470 /lib/validation-statistics.rb
parenta8368dda776c05331474adf7eaf9a6e413a3b1eb (diff)
new files added
Diffstat (limited to 'lib/validation-statistics.rb')
-rw-r--r--lib/validation-statistics.rb100
1 files changed, 100 insertions, 0 deletions
diff --git a/lib/validation-statistics.rb b/lib/validation-statistics.rb
new file mode 100644
index 0000000..570b2d4
--- /dev/null
+++ b/lib/validation-statistics.rb
@@ -0,0 +1,100 @@
+module OpenTox
+ class ValidationStatistics
+ include OpenTox
+ def self.classification predictions, accept_values
+ confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)}
+ weighted_confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)}
+ true_rate = {}
+ predictivity = {}
+ nr_instances = 0
+ predictions.each do |cid,pred|
+ # TODO use measured majority class
+ if pred[:measured].uniq.size == 1
+ m = pred[:measured].first
+ #pred[:measured].each do |m|
+ if pred[:value] == m
+ if pred[:value] == accept_values[0]
+ confusion_matrix[0][0] += 1
+ weighted_confusion_matrix[0][0] += pred[:confidence]
+ nr_instances += 1
+ elsif pred[:value] == accept_values[1]
+ confusion_matrix[1][1] += 1
+ weighted_confusion_matrix[1][1] += pred[:confidence]
+ nr_instances += 1
+ end
+ elsif pred[:value] != m
+ if pred[:value] == accept_values[0]
+ confusion_matrix[0][1] += 1
+ weighted_confusion_matrix[0][1] += pred[:confidence]
+ nr_instances += 1
+ elsif pred[:value] == accept_values[1]
+ confusion_matrix[1][0] += 1
+ weighted_confusion_matrix[1][0] += pred[:confidence]
+ nr_instances += 1
+ end
+ end
+ end
+ end
+ true_rate = {}
+ predictivity = {}
+ accept_values.each_with_index do |v,i|
+ true_rate[v] = confusion_matrix[i][i]/confusion_matrix[i].reduce(:+).to_f
+ predictivity[v] = confusion_matrix[i][i]/confusion_matrix.collect{|n| n[i]}.reduce(:+).to_f
+ end
+ confidence_sum = 0
+ weighted_confusion_matrix.each do |r|
+ r.each do |c|
+ confidence_sum += c
+ end
+ end
+ accuracy = (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f
+ $logger.debug "Accuracy #{accuracy}"
+ {
+ :accept_values => accept_values,
+ :confusion_matrix => confusion_matrix,
+ :weighted_confusion_matrix => weighted_confusion_matrix,
+ :accuracy => accuracy,
+ :weighted_accuracy => (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f,
+ :true_rate => true_rate,
+ :predictivity => predictivity,
+ :finished_at => Time.now
+ }
+ end
+
+ def self.regression predictions
+ # TODO: prediction intervals
+ rmse = 0
+ mae = 0
+ x = []
+ y = []
+ predictions.each do |cid,pred|
+ if pred[:value] and pred[:measured] #and pred[:measured] != [nil]
+ x << -Math.log10(pred[:measured].median)
+ y << -Math.log10(pred[:value])
+ error = Math.log10(pred[:value])-Math.log10(pred[:measured].median)
+ rmse += error**2
+ mae += error.abs
+ else
+ warnings << "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
+ $logger.debug "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
+ end
+ end
+ R.assign "measurement", x
+ R.assign "prediction", y
+ R.eval "r <- cor(measurement,prediction,use='complete')"
+ r = R.eval("r").to_ruby
+
+ mae = mae/predictions.size
+ rmse = Math.sqrt(rmse/predictions.size)
+ $logger.debug "R^2 #{r**2}"
+ $logger.debug "RMSE #{rmse}"
+ $logger.debug "MAE #{mae}"
+ {
+ :mae => mae,
+ :rmse => rmse,
+ :r_squared => r**2,
+ :finished_at => Time.now
+ }
+ end
+ end
+end