From a8368dda776c05331474adf7eaf9a6e413a3b1eb Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 13 Apr 2016 15:15:51 +0200 Subject: validation tests pass --- lib/leave-one-out-validation.rb | 108 +++++++--------------------------------- 1 file changed, 18 insertions(+), 90 deletions(-) (limited to 'lib/leave-one-out-validation.rb') diff --git a/lib/leave-one-out-validation.rb b/lib/leave-one-out-validation.rb index 2cd13db..10fbe85 100644 --- a/lib/leave-one-out-validation.rb +++ b/lib/leave-one-out-validation.rb @@ -6,20 +6,26 @@ module OpenTox field :dataset_id, type: BSON::ObjectId field :nr_instances, type: Integer field :nr_unpredicted, type: Integer - field :predictions, type: Array + field :predictions, type: Hash field :finished_at, type: Time def self.create model model.training_dataset.features.first.nominal? ? klass = ClassificationLeaveOneOutValidation : klass = RegressionLeaveOneOutValidation loo = klass.new :model_id => model.id, :dataset_id => model.training_dataset_id - compound_ids = model.training_dataset.compound_ids predictions = model.predict model.training_dataset.compounds - predictions = predictions.each_with_index {|p,i| p[:compound_id] = compound_ids[i]} - predictions.select!{|p| p[:database_activities] and !p[:database_activities].empty?} + predictions.each{|cid,p| p.delete(:neighbors)} + nr_unpredicted = 0 + predictions.each do |cid,prediction| + if prediction[:value] + prediction[:measured] = model.training_dataset.data_entries[cid][prediction[:prediction_feature_id].to_s] + else + nr_unpredicted += 1 + end + predictions.delete(cid) unless prediction[:value] and prediction[:measured] + end loo.nr_instances = predictions.size - predictions.select!{|p| p[:value]} # remove unpredicted - loo.predictions = predictions#.sort{|a,b| b[:confidence] <=> a[:confidence]} - loo.nr_unpredicted = loo.nr_instances - loo.predictions.size + loo.nr_unpredicted = nr_unpredicted + loo.predictions = predictions loo.statistics loo.save loo @@ -42,53 +48,8 @@ module OpenTox field :confidence_plot_id, type: BSON::ObjectId def statistics - accept_values = Feature.find(model.prediction_feature_id).accept_values - confusion_matrix = Array.new(accept_values.size,0){Array.new(accept_values.size,0)} - weighted_confusion_matrix = Array.new(accept_values.size,0){Array.new(accept_values.size,0)} - predictions.each do |pred| - pred[:database_activities].each do |db_act| - if pred[:value] - if pred[:value] == db_act - if pred[:value] == accept_values[0] - confusion_matrix[0][0] += 1 - weighted_confusion_matrix[0][0] += pred[:confidence] - elsif pred[:value] == accept_values[1] - confusion_matrix[1][1] += 1 - weighted_confusion_matrix[1][1] += pred[:confidence] - end - else - if pred[:value] == accept_values[0] - confusion_matrix[0][1] += 1 - weighted_confusion_matrix[0][1] += pred[:confidence] - elsif pred[:value] == accept_values[1] - confusion_matrix[1][0] += 1 - weighted_confusion_matrix[1][0] += pred[:confidence] - end - end - end - end - end - accept_values.each_with_index do |v,i| - true_rate[v] = confusion_matrix[i][i]/confusion_matrix[i].reduce(:+).to_f - predictivity[v] = confusion_matrix[i][i]/confusion_matrix.collect{|n| n[i]}.reduce(:+).to_f - end - confidence_sum = 0 - weighted_confusion_matrix.each do |r| - r.each do |c| - confidence_sum += c - end - end - update_attributes( - accept_values: accept_values, - confusion_matrix: confusion_matrix, - weighted_confusion_matrix: weighted_confusion_matrix, - accuracy: (confusion_matrix[0][0]+confusion_matrix[1][1])/(nr_instances-nr_unpredicted).to_f, - weighted_accuracy: (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f, - true_rate: true_rate, - predictivity: predictivity, - finished_at: Time.now - ) - $logger.debug "Accuracy #{accuracy}" + stat = ValidationStatistics.classification(predictions, Feature.find(model.prediction_feature_id).accept_values) + update_attributes(stat) end def confidence_plot @@ -132,43 +93,10 @@ module OpenTox field :correlation_plot_id, type: BSON::ObjectId field :confidence_plot_id, type: BSON::ObjectId + def statistics - confidence_sum = 0 - predicted_values = [] - measured_values = [] - predictions.each do |pred| - pred[:database_activities].each do |activity| - if pred[:value] - predicted_values << pred[:value] - measured_values << activity - error = Math.log10(pred[:value])-Math.log10(activity) - self.rmse += error**2 - #self.weighted_rmse += pred[:confidence]*error**2 - self.mae += error.abs - #self.weighted_mae += pred[:confidence]*error.abs - #confidence_sum += pred[:confidence] - end - end - if pred[:database_activities].empty? - warnings << "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}." - $logger.debug "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}." - end - end - R.assign "measurement", measured_values - R.assign "prediction", predicted_values - R.eval "r <- cor(-log(measurement),-log(prediction),use='complete')" - r = R.eval("r").to_ruby - - self.mae = self.mae/predictions.size - #self.weighted_mae = self.weighted_mae/confidence_sum - self.rmse = Math.sqrt(self.rmse/predictions.size) - #self.weighted_rmse = Math.sqrt(self.weighted_rmse/confidence_sum) - self.r_squared = r**2 - self.finished_at = Time.now - save - $logger.debug "R^2 #{r**2}" - $logger.debug "RMSE #{rmse}" - $logger.debug "MAE #{mae}" + stat = ValidationStatistics.regression predictions + update_attributes(stat) end def correlation_plot -- cgit v1.2.3 From 8aab046eb1ad39aaf10c5a8596102c35c7b2ee0b Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 15 Apr 2016 11:01:16 +0200 Subject: data_entries removed from datasets. datasets are now just containers for compounds and features, feature values have to be retrieved from substances. --- lib/leave-one-out-validation.rb | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'lib/leave-one-out-validation.rb') diff --git a/lib/leave-one-out-validation.rb b/lib/leave-one-out-validation.rb index 10fbe85..ed917eb 100644 --- a/lib/leave-one-out-validation.rb +++ b/lib/leave-one-out-validation.rb @@ -10,6 +10,8 @@ module OpenTox field :finished_at, type: Time def self.create model + $logger.debug "#{model.name}: LOO validation started" + t = Time.now model.training_dataset.features.first.nominal? ? klass = ClassificationLeaveOneOutValidation : klass = RegressionLeaveOneOutValidation loo = klass.new :model_id => model.id, :dataset_id => model.training_dataset_id predictions = model.predict model.training_dataset.compounds @@ -17,7 +19,7 @@ module OpenTox nr_unpredicted = 0 predictions.each do |cid,prediction| if prediction[:value] - prediction[:measured] = model.training_dataset.data_entries[cid][prediction[:prediction_feature_id].to_s] + prediction[:measured] = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s] else nr_unpredicted += 1 end @@ -28,6 +30,7 @@ module OpenTox loo.predictions = predictions loo.statistics loo.save + $logger.debug "#{model.name}, LOO validation: #{Time.now-t} seconds" loo end @@ -84,16 +87,12 @@ module OpenTox class RegressionLeaveOneOutValidation < LeaveOneOutValidation - - field :rmse, type: Float, default: 0.0 + field :rmse, type: Float, default: 0 field :mae, type: Float, default: 0 - #field :weighted_rmse, type: Float, default: 0 - #field :weighted_mae, type: Float, default: 0 field :r_squared, type: Float field :correlation_plot_id, type: BSON::ObjectId field :confidence_plot_id, type: BSON::ObjectId - def statistics stat = ValidationStatistics.regression predictions update_attributes(stat) -- cgit v1.2.3 From 06fc914653face2c58fd4e6c47161cb03e217582 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sun, 8 May 2016 12:22:58 +0200 Subject: default validations fixed --- lib/leave-one-out-validation.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/leave-one-out-validation.rb') diff --git a/lib/leave-one-out-validation.rb b/lib/leave-one-out-validation.rb index ed917eb..2306041 100644 --- a/lib/leave-one-out-validation.rb +++ b/lib/leave-one-out-validation.rb @@ -19,7 +19,7 @@ module OpenTox nr_unpredicted = 0 predictions.each do |cid,prediction| if prediction[:value] - prediction[:measured] = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s] + prediction[:measured] = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s][dataset_id.to_s] else nr_unpredicted += 1 end -- cgit v1.2.3 From ab652ac85036c5b372e7f1a08cdb75a19db5b19a Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sun, 8 May 2016 12:57:10 +0200 Subject: regression crossvalidation fixed --- lib/leave-one-out-validation.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/leave-one-out-validation.rb') diff --git a/lib/leave-one-out-validation.rb b/lib/leave-one-out-validation.rb index 2306041..7189617 100644 --- a/lib/leave-one-out-validation.rb +++ b/lib/leave-one-out-validation.rb @@ -3,7 +3,6 @@ module OpenTox class LeaveOneOutValidation field :model_id, type: BSON::ObjectId - field :dataset_id, type: BSON::ObjectId field :nr_instances, type: Integer field :nr_unpredicted, type: Integer field :predictions, type: Hash @@ -13,13 +12,14 @@ module OpenTox $logger.debug "#{model.name}: LOO validation started" t = Time.now model.training_dataset.features.first.nominal? ? klass = ClassificationLeaveOneOutValidation : klass = RegressionLeaveOneOutValidation - loo = klass.new :model_id => model.id, :dataset_id => model.training_dataset_id + loo = klass.new :model_id => model.id predictions = model.predict model.training_dataset.compounds predictions.each{|cid,p| p.delete(:neighbors)} nr_unpredicted = 0 predictions.each do |cid,prediction| if prediction[:value] - prediction[:measured] = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s][dataset_id.to_s] + tox = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s] + prediction[:measured] = tox[model.training_dataset_id.to_s] if tox else nr_unpredicted += 1 end -- cgit v1.2.3 From c90644211e214a50f6fdb3a936bf247f45f1f4be Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 13 May 2016 13:38:24 +0200 Subject: compound tests fixed --- lib/leave-one-out-validation.rb | 31 +++++-------------------------- 1 file changed, 5 insertions(+), 26 deletions(-) (limited to 'lib/leave-one-out-validation.rb') diff --git a/lib/leave-one-out-validation.rb b/lib/leave-one-out-validation.rb index 7189617..b8deae9 100644 --- a/lib/leave-one-out-validation.rb +++ b/lib/leave-one-out-validation.rb @@ -13,18 +13,18 @@ module OpenTox t = Time.now model.training_dataset.features.first.nominal? ? klass = ClassificationLeaveOneOutValidation : klass = RegressionLeaveOneOutValidation loo = klass.new :model_id => model.id - predictions = model.predict model.training_dataset.compounds + predictions = model.predict model.training_dataset.substances predictions.each{|cid,p| p.delete(:neighbors)} nr_unpredicted = 0 predictions.each do |cid,prediction| if prediction[:value] - tox = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s] - prediction[:measured] = tox[model.training_dataset_id.to_s] if tox + prediction[:measured] = model.training_dataset.values(cid, prediction[:prediction_feature_id]) else nr_unpredicted += 1 end predictions.delete(cid) unless prediction[:value] and prediction[:measured] end + predictions.select!{|cid,p| p[:value] and p[:measured]} loo.nr_instances = predictions.size loo.nr_unpredicted = nr_unpredicted loo.predictions = predictions @@ -86,6 +86,7 @@ module OpenTox class RegressionLeaveOneOutValidation < LeaveOneOutValidation + include Plot field :rmse, type: Float, default: 0 field :mae, type: Float, default: 0 @@ -100,29 +101,7 @@ module OpenTox def correlation_plot unless correlation_plot_id - tmpfile = "/tmp/#{id.to_s}_correlation.svg" - predicted_values = [] - measured_values = [] - predictions.each do |pred| - pred[:database_activities].each do |activity| - if pred[:value] - predicted_values << pred[:value] - measured_values << activity - end - end - end - attributes = Model::Lazar.find(self.model_id).attributes - attributes.delete_if{|key,_| key.match(/_id|_at/) or ["_id","creator","name"].include? key} - attributes = attributes.values.collect{|v| v.is_a?(String) ? v.sub(/OpenTox::/,'') : v}.join("\n") - R.assign "measurement", measured_values - R.assign "prediction", predicted_values - R.eval "all = c(-log(measurement),-log(prediction))" - R.eval "range = c(min(all), max(all))" - R.eval "image = qplot(-log(prediction),-log(measurement),main='#{self.name}',asp=1,xlim=range, ylim=range)" - R.eval "image = image + geom_abline(intercept=0, slope=1)" - R.eval "ggsave(file='#{tmpfile}', plot=image)" - file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_correlation_plot.svg") - plot_id = $gridfs.insert_one(file) + #plot_id = correlation_plot update(:correlation_plot_id => plot_id) end $gridfs.find_one(_id: correlation_plot_id).data -- cgit v1.2.3 From f46ba3b7262f5b551c81fc9396c5b7f0cac7f030 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 27 May 2016 19:16:16 +0200 Subject: first correlation of nanoparticle predictions --- lib/leave-one-out-validation.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib/leave-one-out-validation.rb') diff --git a/lib/leave-one-out-validation.rb b/lib/leave-one-out-validation.rb index b8deae9..9698e05 100644 --- a/lib/leave-one-out-validation.rb +++ b/lib/leave-one-out-validation.rb @@ -86,7 +86,6 @@ module OpenTox class RegressionLeaveOneOutValidation < LeaveOneOutValidation - include Plot field :rmse, type: Float, default: 0 field :mae, type: Float, default: 0 @@ -101,7 +100,7 @@ module OpenTox def correlation_plot unless correlation_plot_id - #plot_id = correlation_plot + plot_id = ValidationStatistics.correlation_plot id, predictions update(:correlation_plot_id => plot_id) end $gridfs.find_one(_id: correlation_plot_id).data -- cgit v1.2.3 From b515a0cfedb887a2af753db6e4a08ae1af430cad Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 31 May 2016 18:08:08 +0200 Subject: cleanup of validation modules/classes --- lib/leave-one-out-validation.rb | 141 +++++++++++++--------------------------- 1 file changed, 44 insertions(+), 97 deletions(-) (limited to 'lib/leave-one-out-validation.rb') diff --git a/lib/leave-one-out-validation.rb b/lib/leave-one-out-validation.rb index 9698e05..7ff65ff 100644 --- a/lib/leave-one-out-validation.rb +++ b/lib/leave-one-out-validation.rb @@ -1,110 +1,57 @@ module OpenTox - class LeaveOneOutValidation - - field :model_id, type: BSON::ObjectId - field :nr_instances, type: Integer - field :nr_unpredicted, type: Integer - field :predictions, type: Hash - field :finished_at, type: Time - - def self.create model - $logger.debug "#{model.name}: LOO validation started" - t = Time.now - model.training_dataset.features.first.nominal? ? klass = ClassificationLeaveOneOutValidation : klass = RegressionLeaveOneOutValidation - loo = klass.new :model_id => model.id - predictions = model.predict model.training_dataset.substances - predictions.each{|cid,p| p.delete(:neighbors)} - nr_unpredicted = 0 - predictions.each do |cid,prediction| - if prediction[:value] - prediction[:measured] = model.training_dataset.values(cid, prediction[:prediction_feature_id]) - else - nr_unpredicted += 1 + module Validation + + class LeaveOneOut < Validation + + def self.create model + $logger.debug "#{model.name}: LOO validation started" + t = Time.now + model.training_dataset.features.first.nominal? ? klass = ClassificationLeaveOneOut : klass = RegressionLeaveOneOut + loo = klass.new :model_id => model.id + predictions = model.predict model.training_dataset.substances + predictions.each{|cid,p| p.delete(:neighbors)} + nr_unpredicted = 0 + predictions.each do |cid,prediction| + if prediction[:value] + prediction[:measurements] = model.training_dataset.values(cid, prediction[:prediction_feature_id]) + else + nr_unpredicted += 1 + end + predictions.delete(cid) unless prediction[:value] and prediction[:measurements] end - predictions.delete(cid) unless prediction[:value] and prediction[:measured] + predictions.select!{|cid,p| p[:value] and p[:measurements]} + loo.nr_instances = predictions.size + loo.nr_unpredicted = nr_unpredicted + loo.predictions = predictions + loo.statistics + $logger.debug "#{model.name}, LOO validation: #{Time.now-t} seconds" + loo end - predictions.select!{|cid,p| p[:value] and p[:measured]} - loo.nr_instances = predictions.size - loo.nr_unpredicted = nr_unpredicted - loo.predictions = predictions - loo.statistics - loo.save - $logger.debug "#{model.name}, LOO validation: #{Time.now-t} seconds" - loo - end - def model - Model::Lazar.find model_id end - end - class ClassificationLeaveOneOutValidation < LeaveOneOutValidation - - field :accept_values, type: Array - field :confusion_matrix, type: Array, default: [] - field :weighted_confusion_matrix, type: Array, default: [] - field :accuracy, type: Float - field :weighted_accuracy, type: Float - field :true_rate, type: Hash, default: {} - field :predictivity, type: Hash, default: {} - field :confidence_plot_id, type: BSON::ObjectId - - def statistics - stat = ValidationStatistics.classification(predictions, Feature.find(model.prediction_feature_id).accept_values) - update_attributes(stat) + class ClassificationLeaveOneOut < LeaveOneOut + include ClassificationStatistics + field :accept_values, type: Array + field :confusion_matrix, type: Array, default: [] + field :weighted_confusion_matrix, type: Array, default: [] + field :accuracy, type: Float + field :weighted_accuracy, type: Float + field :true_rate, type: Hash, default: {} + field :predictivity, type: Hash, default: {} + field :confidence_plot_id, type: BSON::ObjectId end - - def confidence_plot - unless confidence_plot_id - tmpfile = "/tmp/#{id.to_s}_confidence.svg" - accuracies = [] - confidences = [] - correct_predictions = 0 - incorrect_predictions = 0 - predictions.each do |p| - p[:database_activities].each do |db_act| - if p[:value] - p[:value] == db_act ? correct_predictions += 1 : incorrect_predictions += 1 - accuracies << correct_predictions/(correct_predictions+incorrect_predictions).to_f - confidences << p[:confidence] - - end - end - end - R.assign "accuracy", accuracies - R.assign "confidence", confidences - R.eval "image = qplot(confidence,accuracy)+ylab('accumulated accuracy')+scale_x_reverse()" - R.eval "ggsave(file='#{tmpfile}', plot=image)" - file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_confidence_plot.svg") - plot_id = $gridfs.insert_one(file) - update(:confidence_plot_id => plot_id) - end - $gridfs.find_one(_id: confidence_plot_id).data + + class RegressionLeaveOneOut < LeaveOneOut + include RegressionStatistics + field :rmse, type: Float, default: 0 + field :mae, type: Float, default: 0 + field :r_squared, type: Float + field :correlation_plot_id, type: BSON::ObjectId + field :confidence_plot_id, type: BSON::ObjectId end - end - - - class RegressionLeaveOneOutValidation < LeaveOneOutValidation - - field :rmse, type: Float, default: 0 - field :mae, type: Float, default: 0 - field :r_squared, type: Float - field :correlation_plot_id, type: BSON::ObjectId - field :confidence_plot_id, type: BSON::ObjectId - def statistics - stat = ValidationStatistics.regression predictions - update_attributes(stat) - end - - def correlation_plot - unless correlation_plot_id - plot_id = ValidationStatistics.correlation_plot id, predictions - update(:correlation_plot_id => plot_id) - end - $gridfs.find_one(_id: correlation_plot_id).data - end end end -- cgit v1.2.3 From 65b69d4c35890a7a2d2992108f0cf4eb5202dd1b Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 1 Jun 2016 10:37:00 +0200 Subject: validation tests fixed --- lib/leave-one-out-validation.rb | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/leave-one-out-validation.rb') diff --git a/lib/leave-one-out-validation.rb b/lib/leave-one-out-validation.rb index 7ff65ff..59f43c5 100644 --- a/lib/leave-one-out-validation.rb +++ b/lib/leave-one-out-validation.rb @@ -49,7 +49,6 @@ module OpenTox field :mae, type: Float, default: 0 field :r_squared, type: Float field :correlation_plot_id, type: BSON::ObjectId - field :confidence_plot_id, type: BSON::ObjectId end end -- cgit v1.2.3 From 8519274487166d75b3b9ae28e61f7a7be9f7e83c Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 27 Oct 2016 11:58:07 +0200 Subject: probability plot for classification validations --- lib/leave-one-out-validation.rb | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/leave-one-out-validation.rb') diff --git a/lib/leave-one-out-validation.rb b/lib/leave-one-out-validation.rb index 59f43c5..538b7b3 100644 --- a/lib/leave-one-out-validation.rb +++ b/lib/leave-one-out-validation.rb @@ -5,6 +5,7 @@ module OpenTox class LeaveOneOut < Validation def self.create model + bad_request_error "Cannot create leave one out validation for models with supervised feature selection. Please use crossvalidation instead." if model.algorithms[:feature_selection] $logger.debug "#{model.name}: LOO validation started" t = Time.now model.training_dataset.features.first.nominal? ? klass = ClassificationLeaveOneOut : klass = RegressionLeaveOneOut @@ -48,6 +49,8 @@ module OpenTox field :rmse, type: Float, default: 0 field :mae, type: Float, default: 0 field :r_squared, type: Float + field :within_prediction_interval, type: Integer, default:0 + field :out_of_prediction_interval, type: Integer, default:0 field :correlation_plot_id, type: BSON::ObjectId end -- cgit v1.2.3