summaryrefslogtreecommitdiff
path: root/lib/leave-one-out-validation.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-05-31 18:08:08 +0200
committerChristoph Helma <helma@in-silico.ch>2016-05-31 18:08:08 +0200
commitb515a0cfedb887a2af753db6e4a08ae1af430cad (patch)
tree5d69d89d0031d581e932272aeb741ee38a0106d6 /lib/leave-one-out-validation.rb
parentf46ba3b7262f5b551c81fc9396c5b7f0cac7f030 (diff)
cleanup of validation modules/classes
Diffstat (limited to 'lib/leave-one-out-validation.rb')
-rw-r--r--lib/leave-one-out-validation.rb141
1 files changed, 44 insertions, 97 deletions
diff --git a/lib/leave-one-out-validation.rb b/lib/leave-one-out-validation.rb
index 9698e05..7ff65ff 100644
--- a/lib/leave-one-out-validation.rb
+++ b/lib/leave-one-out-validation.rb
@@ -1,110 +1,57 @@
module OpenTox
- class LeaveOneOutValidation
-
- field :model_id, type: BSON::ObjectId
- field :nr_instances, type: Integer
- field :nr_unpredicted, type: Integer
- field :predictions, type: Hash
- field :finished_at, type: Time
-
- def self.create model
- $logger.debug "#{model.name}: LOO validation started"
- t = Time.now
- model.training_dataset.features.first.nominal? ? klass = ClassificationLeaveOneOutValidation : klass = RegressionLeaveOneOutValidation
- loo = klass.new :model_id => model.id
- predictions = model.predict model.training_dataset.substances
- predictions.each{|cid,p| p.delete(:neighbors)}
- nr_unpredicted = 0
- predictions.each do |cid,prediction|
- if prediction[:value]
- prediction[:measured] = model.training_dataset.values(cid, prediction[:prediction_feature_id])
- else
- nr_unpredicted += 1
+ module Validation
+
+ class LeaveOneOut < Validation
+
+ def self.create model
+ $logger.debug "#{model.name}: LOO validation started"
+ t = Time.now
+ model.training_dataset.features.first.nominal? ? klass = ClassificationLeaveOneOut : klass = RegressionLeaveOneOut
+ loo = klass.new :model_id => model.id
+ predictions = model.predict model.training_dataset.substances
+ predictions.each{|cid,p| p.delete(:neighbors)}
+ nr_unpredicted = 0
+ predictions.each do |cid,prediction|
+ if prediction[:value]
+ prediction[:measurements] = model.training_dataset.values(cid, prediction[:prediction_feature_id])
+ else
+ nr_unpredicted += 1
+ end
+ predictions.delete(cid) unless prediction[:value] and prediction[:measurements]
end
- predictions.delete(cid) unless prediction[:value] and prediction[:measured]
+ predictions.select!{|cid,p| p[:value] and p[:measurements]}
+ loo.nr_instances = predictions.size
+ loo.nr_unpredicted = nr_unpredicted
+ loo.predictions = predictions
+ loo.statistics
+ $logger.debug "#{model.name}, LOO validation: #{Time.now-t} seconds"
+ loo
end
- predictions.select!{|cid,p| p[:value] and p[:measured]}
- loo.nr_instances = predictions.size
- loo.nr_unpredicted = nr_unpredicted
- loo.predictions = predictions
- loo.statistics
- loo.save
- $logger.debug "#{model.name}, LOO validation: #{Time.now-t} seconds"
- loo
- end
- def model
- Model::Lazar.find model_id
end
- end
- class ClassificationLeaveOneOutValidation < LeaveOneOutValidation
-
- field :accept_values, type: Array
- field :confusion_matrix, type: Array, default: []
- field :weighted_confusion_matrix, type: Array, default: []
- field :accuracy, type: Float
- field :weighted_accuracy, type: Float
- field :true_rate, type: Hash, default: {}
- field :predictivity, type: Hash, default: {}
- field :confidence_plot_id, type: BSON::ObjectId
-
- def statistics
- stat = ValidationStatistics.classification(predictions, Feature.find(model.prediction_feature_id).accept_values)
- update_attributes(stat)
+ class ClassificationLeaveOneOut < LeaveOneOut
+ include ClassificationStatistics
+ field :accept_values, type: Array
+ field :confusion_matrix, type: Array, default: []
+ field :weighted_confusion_matrix, type: Array, default: []
+ field :accuracy, type: Float
+ field :weighted_accuracy, type: Float
+ field :true_rate, type: Hash, default: {}
+ field :predictivity, type: Hash, default: {}
+ field :confidence_plot_id, type: BSON::ObjectId
end
-
- def confidence_plot
- unless confidence_plot_id
- tmpfile = "/tmp/#{id.to_s}_confidence.svg"
- accuracies = []
- confidences = []
- correct_predictions = 0
- incorrect_predictions = 0
- predictions.each do |p|
- p[:database_activities].each do |db_act|
- if p[:value]
- p[:value] == db_act ? correct_predictions += 1 : incorrect_predictions += 1
- accuracies << correct_predictions/(correct_predictions+incorrect_predictions).to_f
- confidences << p[:confidence]
-
- end
- end
- end
- R.assign "accuracy", accuracies
- R.assign "confidence", confidences
- R.eval "image = qplot(confidence,accuracy)+ylab('accumulated accuracy')+scale_x_reverse()"
- R.eval "ggsave(file='#{tmpfile}', plot=image)"
- file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_confidence_plot.svg")
- plot_id = $gridfs.insert_one(file)
- update(:confidence_plot_id => plot_id)
- end
- $gridfs.find_one(_id: confidence_plot_id).data
+
+ class RegressionLeaveOneOut < LeaveOneOut
+ include RegressionStatistics
+ field :rmse, type: Float, default: 0
+ field :mae, type: Float, default: 0
+ field :r_squared, type: Float
+ field :correlation_plot_id, type: BSON::ObjectId
+ field :confidence_plot_id, type: BSON::ObjectId
end
- end
-
-
- class RegressionLeaveOneOutValidation < LeaveOneOutValidation
-
- field :rmse, type: Float, default: 0
- field :mae, type: Float, default: 0
- field :r_squared, type: Float
- field :correlation_plot_id, type: BSON::ObjectId
- field :confidence_plot_id, type: BSON::ObjectId
- def statistics
- stat = ValidationStatistics.regression predictions
- update_attributes(stat)
- end
-
- def correlation_plot
- unless correlation_plot_id
- plot_id = ValidationStatistics.correlation_plot id, predictions
- update(:correlation_plot_id => plot_id)
- end
- $gridfs.find_one(_id: correlation_plot_id).data
- end
end
end