From b515a0cfedb887a2af753db6e4a08ae1af430cad Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 31 May 2016 18:08:08 +0200 Subject: cleanup of validation modules/classes --- lib/leave-one-out-validation.rb | 141 +++++++++++++--------------------------- 1 file changed, 44 insertions(+), 97 deletions(-) (limited to 'lib/leave-one-out-validation.rb') diff --git a/lib/leave-one-out-validation.rb b/lib/leave-one-out-validation.rb index 9698e05..7ff65ff 100644 --- a/lib/leave-one-out-validation.rb +++ b/lib/leave-one-out-validation.rb @@ -1,110 +1,57 @@ module OpenTox - class LeaveOneOutValidation - - field :model_id, type: BSON::ObjectId - field :nr_instances, type: Integer - field :nr_unpredicted, type: Integer - field :predictions, type: Hash - field :finished_at, type: Time - - def self.create model - $logger.debug "#{model.name}: LOO validation started" - t = Time.now - model.training_dataset.features.first.nominal? ? klass = ClassificationLeaveOneOutValidation : klass = RegressionLeaveOneOutValidation - loo = klass.new :model_id => model.id - predictions = model.predict model.training_dataset.substances - predictions.each{|cid,p| p.delete(:neighbors)} - nr_unpredicted = 0 - predictions.each do |cid,prediction| - if prediction[:value] - prediction[:measured] = model.training_dataset.values(cid, prediction[:prediction_feature_id]) - else - nr_unpredicted += 1 + module Validation + + class LeaveOneOut < Validation + + def self.create model + $logger.debug "#{model.name}: LOO validation started" + t = Time.now + model.training_dataset.features.first.nominal? ? klass = ClassificationLeaveOneOut : klass = RegressionLeaveOneOut + loo = klass.new :model_id => model.id + predictions = model.predict model.training_dataset.substances + predictions.each{|cid,p| p.delete(:neighbors)} + nr_unpredicted = 0 + predictions.each do |cid,prediction| + if prediction[:value] + prediction[:measurements] = model.training_dataset.values(cid, prediction[:prediction_feature_id]) + else + nr_unpredicted += 1 + end + predictions.delete(cid) unless prediction[:value] and prediction[:measurements] end - predictions.delete(cid) unless prediction[:value] and prediction[:measured] + predictions.select!{|cid,p| p[:value] and p[:measurements]} + loo.nr_instances = predictions.size + loo.nr_unpredicted = nr_unpredicted + loo.predictions = predictions + loo.statistics + $logger.debug "#{model.name}, LOO validation: #{Time.now-t} seconds" + loo end - predictions.select!{|cid,p| p[:value] and p[:measured]} - loo.nr_instances = predictions.size - loo.nr_unpredicted = nr_unpredicted - loo.predictions = predictions - loo.statistics - loo.save - $logger.debug "#{model.name}, LOO validation: #{Time.now-t} seconds" - loo - end - def model - Model::Lazar.find model_id end - end - class ClassificationLeaveOneOutValidation < LeaveOneOutValidation - - field :accept_values, type: Array - field :confusion_matrix, type: Array, default: [] - field :weighted_confusion_matrix, type: Array, default: [] - field :accuracy, type: Float - field :weighted_accuracy, type: Float - field :true_rate, type: Hash, default: {} - field :predictivity, type: Hash, default: {} - field :confidence_plot_id, type: BSON::ObjectId - - def statistics - stat = ValidationStatistics.classification(predictions, Feature.find(model.prediction_feature_id).accept_values) - update_attributes(stat) + class ClassificationLeaveOneOut < LeaveOneOut + include ClassificationStatistics + field :accept_values, type: Array + field :confusion_matrix, type: Array, default: [] + field :weighted_confusion_matrix, type: Array, default: [] + field :accuracy, type: Float + field :weighted_accuracy, type: Float + field :true_rate, type: Hash, default: {} + field :predictivity, type: Hash, default: {} + field :confidence_plot_id, type: BSON::ObjectId end - - def confidence_plot - unless confidence_plot_id - tmpfile = "/tmp/#{id.to_s}_confidence.svg" - accuracies = [] - confidences = [] - correct_predictions = 0 - incorrect_predictions = 0 - predictions.each do |p| - p[:database_activities].each do |db_act| - if p[:value] - p[:value] == db_act ? correct_predictions += 1 : incorrect_predictions += 1 - accuracies << correct_predictions/(correct_predictions+incorrect_predictions).to_f - confidences << p[:confidence] - - end - end - end - R.assign "accuracy", accuracies - R.assign "confidence", confidences - R.eval "image = qplot(confidence,accuracy)+ylab('accumulated accuracy')+scale_x_reverse()" - R.eval "ggsave(file='#{tmpfile}', plot=image)" - file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_confidence_plot.svg") - plot_id = $gridfs.insert_one(file) - update(:confidence_plot_id => plot_id) - end - $gridfs.find_one(_id: confidence_plot_id).data + + class RegressionLeaveOneOut < LeaveOneOut + include RegressionStatistics + field :rmse, type: Float, default: 0 + field :mae, type: Float, default: 0 + field :r_squared, type: Float + field :correlation_plot_id, type: BSON::ObjectId + field :confidence_plot_id, type: BSON::ObjectId end - end - - - class RegressionLeaveOneOutValidation < LeaveOneOutValidation - - field :rmse, type: Float, default: 0 - field :mae, type: Float, default: 0 - field :r_squared, type: Float - field :correlation_plot_id, type: BSON::ObjectId - field :confidence_plot_id, type: BSON::ObjectId - def statistics - stat = ValidationStatistics.regression predictions - update_attributes(stat) - end - - def correlation_plot - unless correlation_plot_id - plot_id = ValidationStatistics.correlation_plot id, predictions - update(:correlation_plot_id => plot_id) - end - $gridfs.find_one(_id: correlation_plot_id).data - end end end -- cgit v1.2.3