module OpenTox class LeaveOneOutValidation field :model_id, type: BSON::ObjectId field :dataset_id, type: BSON::ObjectId field :nr_instances, type: Integer field :nr_unpredicted, type: Integer field :predictions, type: Array field :finished_at, type: Time def self.create model model.training_dataset.features.first.nominal? ? klass = ClassificationLeaveOneOutValidation : klass = RegressionLeaveOneOutValidation loo = klass.new :model_id => model.id, :dataset_id => model.training_dataset_id compound_ids = model.training_dataset.compound_ids predictions = model.predict model.training_dataset.compounds predictions = predictions.each_with_index {|p,i| p[:compound_id] = compound_ids[i]} predictions.select!{|p| p[:database_activities] and !p[:database_activities].empty?} loo.nr_instances = predictions.size predictions.select!{|p| p[:value]} # remove unpredicted loo.predictions = predictions#.sort{|a,b| b[:confidence] <=> a[:confidence]} loo.nr_unpredicted = loo.nr_instances - loo.predictions.size loo.statistics loo.save loo end def model Model::Lazar.find model_id end end class ClassificationLeaveOneOutValidation < LeaveOneOutValidation field :accept_values, type: Array field :confusion_matrix, type: Array, default: [] field :weighted_confusion_matrix, type: Array, default: [] field :accuracy, type: Float field :weighted_accuracy, type: Float field :true_rate, type: Hash, default: {} field :predictivity, type: Hash, default: {} field :confidence_plot_id, type: BSON::ObjectId def statistics accept_values = Feature.find(model.prediction_feature_id).accept_values confusion_matrix = Array.new(accept_values.size,0){Array.new(accept_values.size,0)} weighted_confusion_matrix = Array.new(accept_values.size,0){Array.new(accept_values.size,0)} predictions.each do |pred| pred[:database_activities].each do |db_act| if pred[:value] if pred[:value] == db_act if pred[:value] == accept_values[0] confusion_matrix[0][0] += 1 weighted_confusion_matrix[0][0] += pred[:confidence] elsif pred[:value] == accept_values[1] confusion_matrix[1][1] += 1 weighted_confusion_matrix[1][1] += pred[:confidence] end else if pred[:value] == accept_values[0] confusion_matrix[0][1] += 1 weighted_confusion_matrix[0][1] += pred[:confidence] elsif pred[:value] == accept_values[1] confusion_matrix[1][0] += 1 weighted_confusion_matrix[1][0] += pred[:confidence] end end end end end accept_values.each_with_index do |v,i| true_rate[v] = confusion_matrix[i][i]/confusion_matrix[i].reduce(:+).to_f predictivity[v] = confusion_matrix[i][i]/confusion_matrix.collect{|n| n[i]}.reduce(:+).to_f end confidence_sum = 0 weighted_confusion_matrix.each do |r| r.each do |c| confidence_sum += c end end update_attributes( accept_values: accept_values, confusion_matrix: confusion_matrix, weighted_confusion_matrix: weighted_confusion_matrix, accuracy: (confusion_matrix[0][0]+confusion_matrix[1][1])/(nr_instances-nr_unpredicted).to_f, weighted_accuracy: (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f, true_rate: true_rate, predictivity: predictivity, finished_at: Time.now ) $logger.debug "Accuracy #{accuracy}" end def confidence_plot unless confidence_plot_id tmpfile = "/tmp/#{id.to_s}_confidence.svg" accuracies = [] confidences = [] correct_predictions = 0 incorrect_predictions = 0 predictions.each do |p| p[:database_activities].each do |db_act| if p[:value] p[:value] == db_act ? correct_predictions += 1 : incorrect_predictions += 1 accuracies << correct_predictions/(correct_predictions+incorrect_predictions).to_f confidences << p[:confidence] end end end R.assign "accuracy", accuracies R.assign "confidence", confidences R.eval "image = qplot(confidence,accuracy)+ylab('accumulated accuracy')+scale_x_reverse()" R.eval "ggsave(file='#{tmpfile}', plot=image)" file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_confidence_plot.svg") plot_id = $gridfs.insert_one(file) update(:confidence_plot_id => plot_id) end $gridfs.find_one(_id: confidence_plot_id).data end end class RegressionLeaveOneOutValidation < LeaveOneOutValidation field :rmse, type: Float, default: 0.0 field :mae, type: Float, default: 0 #field :weighted_rmse, type: Float, default: 0 #field :weighted_mae, type: Float, default: 0 field :r_squared, type: Float field :correlation_plot_id, type: BSON::ObjectId field :confidence_plot_id, type: BSON::ObjectId def statistics confidence_sum = 0 predicted_values = [] measured_values = [] predictions.each do |pred| pred[:database_activities].each do |activity| if pred[:value] predicted_values << pred[:value] measured_values << activity error = Math.log10(pred[:value])-Math.log10(activity) self.rmse += error**2 #self.weighted_rmse += pred[:confidence]*error**2 self.mae += error.abs #self.weighted_mae += pred[:confidence]*error.abs #confidence_sum += pred[:confidence] end end if pred[:database_activities].empty? warnings << "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}." $logger.debug "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}." end end R.assign "measurement", measured_values R.assign "prediction", predicted_values R.eval "r <- cor(-log(measurement),-log(prediction),use='complete')" r = R.eval("r").to_ruby self.mae = self.mae/predictions.size #self.weighted_mae = self.weighted_mae/confidence_sum self.rmse = Math.sqrt(self.rmse/predictions.size) #self.weighted_rmse = Math.sqrt(self.weighted_rmse/confidence_sum) self.r_squared = r**2 self.finished_at = Time.now save $logger.debug "R^2 #{r**2}" $logger.debug "RMSE #{rmse}" $logger.debug "MAE #{mae}" end def correlation_plot unless correlation_plot_id tmpfile = "/tmp/#{id.to_s}_correlation.svg" predicted_values = [] measured_values = [] predictions.each do |pred| pred[:database_activities].each do |activity| if pred[:value] predicted_values << pred[:value] measured_values << activity end end end attributes = Model::Lazar.find(self.model_id).attributes attributes.delete_if{|key,_| key.match(/_id|_at/) or ["_id","creator","name"].include? key} attributes = attributes.values.collect{|v| v.is_a?(String) ? v.sub(/OpenTox::/,'') : v}.join("\n") R.assign "measurement", measured_values R.assign "prediction", predicted_values R.eval "all = c(-log(measurement),-log(prediction))" R.eval "range = c(min(all), max(all))" R.eval "image = qplot(-log(prediction),-log(measurement),main='#{self.name}',asp=1,xlim=range, ylim=range)" R.eval "image = image + geom_abline(intercept=0, slope=1)" R.eval "ggsave(file='#{tmpfile}', plot=image)" file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_correlation_plot.svg") plot_id = $gridfs.insert_one(file) update(:correlation_plot_id => plot_id) end $gridfs.find_one(_id: correlation_plot_id).data end end end