From ca2bb0f90335b1f2c4ecc28ee423e85b281ffcf0 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 4 Nov 2015 17:50:17 +0100 Subject: neighbor search delegated to database backend --- lib/crossvalidation.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/crossvalidation.rb') diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb index 2e6dabb..3127351 100644 --- a/lib/crossvalidation.rb +++ b/lib/crossvalidation.rb @@ -136,7 +136,7 @@ module OpenTox incorrect_predictions = 0 predictions.each do |p| if p[1] and p[2] - p[1] == p [2] ? correct_predictions += 1 : incorrect_predictions += 1 + p[1] == p[2] ? correct_predictions += 1 : incorrect_predictions += 1 accuracies << correct_predictions/(correct_predictions+incorrect_predictions).to_f confidences << p[3] @@ -243,7 +243,7 @@ module OpenTox :neighbors => neighbors } end - end.compact.sort{|a,b| p a; b[:relative_error] <=> a[:relative_error]}[0..n-1] + end.compact.sort{|a,b| b[:relative_error] <=> a[:relative_error]}[0..n-1] end def confidence_plot -- cgit v1.2.3 From d6eced29e104b9bc1923b2ac89b2700a48adf07a Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 8 Jan 2016 11:00:20 +0100 Subject: mg-mmol conversion fixed --- lib/crossvalidation.rb | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib/crossvalidation.rb') diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb index 3127351..9b5c4e2 100644 --- a/lib/crossvalidation.rb +++ b/lib/crossvalidation.rb @@ -175,8 +175,6 @@ module OpenTox weighted_rse = 0 mae = 0 weighted_mae = 0 - rae = 0 - weighted_rae = 0 confidence_sum = 0 predictions.each do |pred| compound_id,activity,prediction,confidence = pred -- cgit v1.2.3 From f61b7d3c65d084747dc1bf87214e5ec0c57326be Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 9 Feb 2016 11:04:00 +0100 Subject: pls regression --- lib/crossvalidation.rb | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'lib/crossvalidation.rb') diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb index 9b5c4e2..9789882 100644 --- a/lib/crossvalidation.rb +++ b/lib/crossvalidation.rb @@ -35,14 +35,14 @@ module OpenTox predictions = [] training_dataset = Dataset.find model.training_dataset_id training_dataset.folds(n).each_with_index do |fold,fold_nr| - fork do # parallel execution of validations + #fork do # parallel execution of validations $logger.debug "Dataset #{training_dataset.name}: Fold #{fold_nr} started" t = Time.now validation = Validation.create(model, fold[0], fold[1],cv) $logger.debug "Dataset #{training_dataset.name}, Fold #{fold_nr}: #{Time.now-t} seconds" - end + #end end - Process.waitall + #Process.waitall cv.validation_ids = Validation.where(:crossvalidation_id => cv.id).distinct(:_id) cv.validations.each do |validation| nr_instances += validation.nr_instances @@ -176,6 +176,7 @@ module OpenTox mae = 0 weighted_mae = 0 confidence_sum = 0 + p predictions predictions.each do |pred| compound_id,activity,prediction,confidence = pred if activity and prediction @@ -194,6 +195,8 @@ module OpenTox y = predictions.collect{|p| p[2]} R.assign "measurement", x R.assign "prediction", y + p x + p y R.eval "r <- cor(-log(measurement),-log(prediction),use='complete')" r = R.eval("r").to_ruby -- cgit v1.2.3 From e778475c578f13f30af4437845716d7e781c2609 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sat, 13 Feb 2016 13:15:29 +0100 Subject: improved handling of duplicates in validations --- lib/crossvalidation.rb | 3 --- 1 file changed, 3 deletions(-) (limited to 'lib/crossvalidation.rb') diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb index 9789882..0c5f0be 100644 --- a/lib/crossvalidation.rb +++ b/lib/crossvalidation.rb @@ -176,7 +176,6 @@ module OpenTox mae = 0 weighted_mae = 0 confidence_sum = 0 - p predictions predictions.each do |pred| compound_id,activity,prediction,confidence = pred if activity and prediction @@ -195,8 +194,6 @@ module OpenTox y = predictions.collect{|p| p[2]} R.assign "measurement", x R.assign "prediction", y - p x - p y R.eval "r <- cor(-log(measurement),-log(prediction),use='complete')" r = R.eval("r").to_ruby -- cgit v1.2.3 From b90720cc26d789a96fa6f7a054fe06fc8b4ef33d Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sat, 27 Feb 2016 16:47:48 +0100 Subject: local pls regression as default regression algorithm --- lib/crossvalidation.rb | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'lib/crossvalidation.rb') diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb index 0c5f0be..362842e 100644 --- a/lib/crossvalidation.rb +++ b/lib/crossvalidation.rb @@ -55,7 +55,7 @@ module OpenTox predictions: predictions.sort{|a,b| b[3] <=> a[3]} # sort according to confidence ) $logger.debug "Nr unpredicted: #{nr_unpredicted}" - cv.statistics + #cv.statistics cv end end @@ -179,12 +179,14 @@ module OpenTox predictions.each do |pred| compound_id,activity,prediction,confidence = pred if activity and prediction - error = Math.log10(prediction)-Math.log10(activity) - rmse += error**2 - weighted_rmse += confidence*error**2 - mae += error.abs - weighted_mae += confidence*error.abs - confidence_sum += confidence + activity.each do |act| + error = Math.log10(prediction)-Math.log10(act) + rmse += error**2 + weighted_rmse += confidence*error**2 + mae += error.abs + weighted_mae += confidence*error.abs + confidence_sum += confidence + end else warnings << "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}." $logger.debug "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}." -- cgit v1.2.3 From 72f6cd966a249859e009a0db5f7b089aad1d6511 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 29 Feb 2016 08:59:43 +0100 Subject: regression crossvalidation fixed --- lib/crossvalidation.rb | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'lib/crossvalidation.rb') diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb index 362842e..ea32a2b 100644 --- a/lib/crossvalidation.rb +++ b/lib/crossvalidation.rb @@ -176,11 +176,15 @@ module OpenTox mae = 0 weighted_mae = 0 confidence_sum = 0 + x = [] + y = [] predictions.each do |pred| compound_id,activity,prediction,confidence = pred - if activity and prediction - activity.each do |act| - error = Math.log10(prediction)-Math.log10(act) + if activity and prediction + unless activity == [nil] + x << -Math.log10(activity.median) + y << -Math.log10(prediction) + error = Math.log10(prediction)-Math.log10(activity.median) rmse += error**2 weighted_rmse += confidence*error**2 mae += error.abs @@ -192,22 +196,20 @@ module OpenTox $logger.debug "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}." end end - x = predictions.collect{|p| p[1]} - y = predictions.collect{|p| p[2]} R.assign "measurement", x R.assign "prediction", y R.eval "r <- cor(-log(measurement),-log(prediction),use='complete')" r = R.eval("r").to_ruby mae = mae/predictions.size - weighted_mae = weighted_mae/confidence_sum + #weighted_mae = weighted_mae/confidence_sum rmse = Math.sqrt(rmse/predictions.size) - weighted_rmse = Math.sqrt(weighted_rmse/confidence_sum) + #weighted_rmse = Math.sqrt(weighted_rmse/confidence_sum) update_attributes( mae: mae, rmse: rmse, - weighted_mae: weighted_mae, - weighted_rmse: weighted_rmse, + #weighted_mae: weighted_mae, + #weighted_rmse: weighted_rmse, r_squared: r**2, finished_at: Time.now ) -- cgit v1.2.3 From 0c5d2e678908a2d4aea43efbedbedc2c0439be30 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 14 Mar 2016 15:25:50 +0100 Subject: descriptor tests --- lib/crossvalidation.rb | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/crossvalidation.rb') diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb index ea32a2b..cd94e33 100644 --- a/lib/crossvalidation.rb +++ b/lib/crossvalidation.rb @@ -55,7 +55,6 @@ module OpenTox predictions: predictions.sort{|a,b| b[3] <=> a[3]} # sort according to confidence ) $logger.debug "Nr unpredicted: #{nr_unpredicted}" - #cv.statistics cv end end -- cgit v1.2.3 From 7c3bd90c26dfeea2db3cf74a1cefc23d8dece7c0 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 15 Mar 2016 17:40:40 +0100 Subject: validation tests pass --- lib/crossvalidation.rb | 68 ++++++++++++++++++++++++-------------------------- 1 file changed, 32 insertions(+), 36 deletions(-) (limited to 'lib/crossvalidation.rb') diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb index cd94e33..08a5ad3 100644 --- a/lib/crossvalidation.rb +++ b/lib/crossvalidation.rb @@ -52,9 +52,10 @@ module OpenTox cv.update_attributes( nr_instances: nr_instances, nr_unpredicted: nr_unpredicted, - predictions: predictions.sort{|a,b| b[3] <=> a[3]} # sort according to confidence + predictions: predictions#.sort{|a,b| b[3] <=> a[3]} # sort according to confidence ) $logger.debug "Nr unpredicted: #{nr_unpredicted}" + cv.statistics cv end end @@ -78,23 +79,26 @@ module OpenTox true_rate = {} predictivity = {} predictions.each do |pred| - compound_id,activity,prediction,confidence = pred - if activity and prediction and confidence.numeric? - if prediction == activity - if prediction == accept_values[0] - confusion_matrix[0][0] += 1 - weighted_confusion_matrix[0][0] += confidence - elsif prediction == accept_values[1] - confusion_matrix[1][1] += 1 - weighted_confusion_matrix[1][1] += confidence - end - elsif prediction != activity - if prediction == accept_values[0] - confusion_matrix[0][1] += 1 - weighted_confusion_matrix[0][1] += confidence - elsif prediction == accept_values[1] - confusion_matrix[1][0] += 1 - weighted_confusion_matrix[1][0] += confidence + compound_id,activities,prediction,confidence = pred + if activities and prediction #and confidence.numeric? + if activities.uniq.size == 1 + activity = activities.uniq.first + if prediction == activity + if prediction == accept_values[0] + confusion_matrix[0][0] += 1 + #weighted_confusion_matrix[0][0] += confidence + elsif prediction == accept_values[1] + confusion_matrix[1][1] += 1 + #weighted_confusion_matrix[1][1] += confidence + end + elsif prediction != activity + if prediction == accept_values[0] + confusion_matrix[0][1] += 1 + #weighted_confusion_matrix[0][1] += confidence + elsif prediction == accept_values[1] + confusion_matrix[1][0] += 1 + #weighted_confusion_matrix[1][0] += confidence + end end end else @@ -108,17 +112,17 @@ module OpenTox predictivity[v] = confusion_matrix[i][i]/confusion_matrix.collect{|n| n[i]}.reduce(:+).to_f end confidence_sum = 0 - weighted_confusion_matrix.each do |r| - r.each do |c| - confidence_sum += c - end - end + #weighted_confusion_matrix.each do |r| + #r.each do |c| + #confidence_sum += c + #end + #end update_attributes( accept_values: accept_values, confusion_matrix: confusion_matrix, - weighted_confusion_matrix: weighted_confusion_matrix, + #weighted_confusion_matrix: weighted_confusion_matrix, accuracy: (confusion_matrix[0][0]+confusion_matrix[1][1])/(nr_instances-nr_unpredicted).to_f, - weighted_accuracy: (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f, + #weighted_accuracy: (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f, true_rate: true_rate, predictivity: predictivity, finished_at: Time.now @@ -161,20 +165,12 @@ module OpenTox field :rmse, type: Float field :mae, type: Float - field :weighted_rmse, type: Float - field :weighted_mae, type: Float field :r_squared, type: Float field :correlation_plot_id, type: BSON::ObjectId - field :confidence_plot_id, type: BSON::ObjectId def statistics rmse = 0 - weighted_rmse = 0 - rse = 0 - weighted_rse = 0 mae = 0 - weighted_mae = 0 - confidence_sum = 0 x = [] y = [] predictions.each do |pred| @@ -185,10 +181,10 @@ module OpenTox y << -Math.log10(prediction) error = Math.log10(prediction)-Math.log10(activity.median) rmse += error**2 - weighted_rmse += confidence*error**2 + #weighted_rmse += confidence*error**2 mae += error.abs - weighted_mae += confidence*error.abs - confidence_sum += confidence + #weighted_mae += confidence*error.abs + #confidence_sum += confidence end else warnings << "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}." -- cgit v1.2.3 From 6117375fdc800fd071fc4983896c26700bf2acd7 Mon Sep 17 00:00:00 2001 From: gebele Date: Mon, 21 Mar 2016 09:50:12 +0000 Subject: added install script for R packages, updated README with install instructions; changed plot format from svg to png --- lib/crossvalidation.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib/crossvalidation.rb') diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb index 08a5ad3..29e208c 100644 --- a/lib/crossvalidation.rb +++ b/lib/crossvalidation.rb @@ -132,7 +132,7 @@ module OpenTox def confidence_plot unless confidence_plot_id - tmpfile = "/tmp/#{id.to_s}_confidence.svg" + tmpfile = "/tmp/#{id.to_s}_confidence.png" accuracies = [] confidences = [] correct_predictions = 0 @@ -149,7 +149,7 @@ module OpenTox R.assign "confidence", confidences R.eval "image = qplot(confidence,accuracy)+ylab('accumulated accuracy')+scale_x_reverse()" R.eval "ggsave(file='#{tmpfile}', plot=image)" - file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_confidence_plot.svg") + file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_confidence_plot.png") plot_id = $gridfs.insert_one(file) update(:confidence_plot_id => plot_id) end @@ -244,7 +244,7 @@ module OpenTox end def confidence_plot - tmpfile = "/tmp/#{id.to_s}_confidence.svg" + tmpfile = "/tmp/#{id.to_s}_confidence.png" sorted_predictions = predictions.collect{|p| [(Math.log10(p[1])-Math.log10(p[2])).abs,p[3]] if p[1] and p[2]}.compact R.assign "error", sorted_predictions.collect{|p| p[0]} R.assign "confidence", sorted_predictions.collect{|p| p[1]} @@ -252,7 +252,7 @@ module OpenTox R.eval "image = qplot(confidence,error)" R.eval "image = image + stat_smooth(method='lm', se=FALSE)" R.eval "ggsave(file='#{tmpfile}', plot=image)" - file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_confidence_plot.svg") + file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_confidence_plot.png") plot_id = $gridfs.insert_one(file) update(:confidence_plot_id => plot_id) $gridfs.find_one(_id: confidence_plot_id).data @@ -260,7 +260,7 @@ module OpenTox def correlation_plot unless correlation_plot_id - tmpfile = "/tmp/#{id.to_s}_correlation.svg" + tmpfile = "/tmp/#{id.to_s}_correlation.png" x = predictions.collect{|p| p[1]} y = predictions.collect{|p| p[2]} attributes = Model::Lazar.find(self.model_id).attributes @@ -273,7 +273,7 @@ module OpenTox R.eval "image = qplot(-log(prediction),-log(measurement),main='#{self.name}',asp=1,xlim=range, ylim=range)" R.eval "image = image + geom_abline(intercept=0, slope=1)" R.eval "ggsave(file='#{tmpfile}', plot=image)" - file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_correlation_plot.svg") + file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_correlation_plot.png") plot_id = $gridfs.insert_one(file) update(:correlation_plot_id => plot_id) end -- cgit v1.2.3 From 8751c33ed42e358a1d67837e2002c8edb91e06a0 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 1 Apr 2016 16:07:55 +0200 Subject: regression r^2 fixed --- lib/crossvalidation.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/crossvalidation.rb') diff --git a/lib/crossvalidation.rb b/lib/crossvalidation.rb index 29e208c..15dfb21 100644 --- a/lib/crossvalidation.rb +++ b/lib/crossvalidation.rb @@ -193,7 +193,7 @@ module OpenTox end R.assign "measurement", x R.assign "prediction", y - R.eval "r <- cor(-log(measurement),-log(prediction),use='complete')" + R.eval "r <- cor(measurement,prediction,use='complete')" r = R.eval("r").to_ruby mae = mae/predictions.size -- cgit v1.2.3