From a8368dda776c05331474adf7eaf9a6e413a3b1eb Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 13 Apr 2016 15:15:51 +0200 Subject: validation tests pass --- lib/validation.rb | 62 ++++++++----------------------------------------------- 1 file changed, 9 insertions(+), 53 deletions(-) (limited to 'lib/validation.rb') diff --git a/lib/validation.rb b/lib/validation.rb index b72d273..484e22e 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -8,7 +8,7 @@ module OpenTox field :test_dataset_id, type: BSON::ObjectId field :nr_instances, type: Integer field :nr_unpredicted, type: Integer - field :predictions, type: Array + field :predictions, type: Hash def prediction_dataset Dataset.find prediction_dataset_id @@ -29,30 +29,22 @@ module OpenTox atts[:training_dataset_id] = training_set.id validation_model = model.class.create training_set, atts validation_model.save - cids = test_set.compound_ids - - test_set_without_activities = Dataset.new(:compound_ids => cids.uniq) # remove duplicates and make sure that activities cannot be used - prediction_dataset = validation_model.predict test_set_without_activities - predictions = [] + predictions = validation_model.predict test_set.compounds + predictions.each{|cid,p| p.delete(:neighbors)} nr_unpredicted = 0 - activities = test_set.data_entries.collect{|de| de.first} - prediction_dataset.data_entries.each_with_index do |de,i| - if de[0] #and de[1] - cid = prediction_dataset.compound_ids[i] - rows = cids.each_index.select{|r| cids[r] == cid } - activities = rows.collect{|r| test_set.data_entries[r][0]} - prediction = de.first - confidence = de[1] - predictions << [prediction_dataset.compound_ids[i], activities, prediction, de[1]] + predictions.each do |cid,prediction| + if prediction[:value] + prediction[:measured] = test_set.data_entries[cid][prediction[:prediction_feature_id].to_s] else nr_unpredicted += 1 end + predictions.delete(cid) unless prediction[:value] and prediction[:measured] end validation = self.new( :model_id => validation_model.id, - :prediction_dataset_id => prediction_dataset.id, + #:prediction_dataset_id => prediction_dataset.id, :test_dataset_id => test_set.id, - :nr_instances => test_set.compound_ids.size, + :nr_instances => test_set.compounds.size, :nr_unpredicted => nr_unpredicted, :predictions => predictions#.sort{|a,b| p a; b[3] <=> a[3]} # sort according to confidence ) @@ -67,42 +59,6 @@ module OpenTox end class RegressionValidation < Validation - - def statistics - rmse = 0 - weighted_rmse = 0 - rse = 0 - weighted_rse = 0 - mae = 0 - weighted_mae = 0 - confidence_sum = 0 - predictions.each do |pred| - compound_id,activity,prediction,confidence = pred - if activity and prediction - error = Math.log10(prediction)-Math.log10(activity.median) - rmse += error**2 - weighted_rmse += confidence*error**2 - mae += error.abs - weighted_mae += confidence*error.abs - confidence_sum += confidence - else - warnings << "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}." - $logger.debug "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}." - end - end - x = predictions.collect{|p| p[1].median} - y = predictions.collect{|p| p[2]} - R.assign "measurement", x - R.assign "prediction", y - R.eval "r <- cor(-log(measurement),-log(prediction),use='complete')" - r = R.eval("r").to_ruby - - mae = mae/predictions.size - weighted_mae = weighted_mae/confidence_sum - rmse = Math.sqrt(rmse/predictions.size) - weighted_rmse = Math.sqrt(weighted_rmse/confidence_sum) - { "R^2" => r**2, "RMSE" => rmse, "MAE" => mae } - end end end -- cgit v1.2.3 From 8aab046eb1ad39aaf10c5a8596102c35c7b2ee0b Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 15 Apr 2016 11:01:16 +0200 Subject: data_entries removed from datasets. datasets are now just containers for compounds and features, feature values have to be retrieved from substances. --- lib/validation.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib/validation.rb') diff --git a/lib/validation.rb b/lib/validation.rb index 484e22e..6b515e4 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -27,14 +27,14 @@ module OpenTox atts = model.attributes.dup # do not modify attributes from original model atts["_id"] = BSON::ObjectId.new atts[:training_dataset_id] = training_set.id - validation_model = model.class.create training_set, atts + validation_model = model.class.create model.prediction_feature, training_set, atts validation_model.save predictions = validation_model.predict test_set.compounds predictions.each{|cid,p| p.delete(:neighbors)} nr_unpredicted = 0 predictions.each do |cid,prediction| if prediction[:value] - prediction[:measured] = test_set.data_entries[cid][prediction[:prediction_feature_id].to_s] + prediction[:measured] = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s] else nr_unpredicted += 1 end @@ -42,7 +42,6 @@ module OpenTox end validation = self.new( :model_id => validation_model.id, - #:prediction_dataset_id => prediction_dataset.id, :test_dataset_id => test_set.id, :nr_instances => test_set.compounds.size, :nr_unpredicted => nr_unpredicted, -- cgit v1.2.3 From 32d767ee7cfcc19337892551906950621f348174 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 28 Apr 2016 08:11:12 +0200 Subject: nanoparticle crossvalidation technically working --- lib/validation.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/validation.rb') diff --git a/lib/validation.rb b/lib/validation.rb index 6b515e4..68cb1a1 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -24,12 +24,12 @@ module OpenTox def self.create model, training_set, test_set, crossvalidation=nil - atts = model.attributes.dup # do not modify attributes from original model + atts = model.attributes.dup # do not modify attributes of the original model atts["_id"] = BSON::ObjectId.new atts[:training_dataset_id] = training_set.id validation_model = model.class.create model.prediction_feature, training_set, atts validation_model.save - predictions = validation_model.predict test_set.compounds + predictions = validation_model.predict test_set.substances predictions.each{|cid,p| p.delete(:neighbors)} nr_unpredicted = 0 predictions.each do |cid,prediction| @@ -43,7 +43,7 @@ module OpenTox validation = self.new( :model_id => validation_model.id, :test_dataset_id => test_set.id, - :nr_instances => test_set.compounds.size, + :nr_instances => test_set.substances.size, :nr_unpredicted => nr_unpredicted, :predictions => predictions#.sort{|a,b| p a; b[3] <=> a[3]} # sort according to confidence ) -- cgit v1.2.3 From 06fc914653face2c58fd4e6c47161cb03e217582 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sun, 8 May 2016 12:22:58 +0200 Subject: default validations fixed --- lib/validation.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib/validation.rb') diff --git a/lib/validation.rb b/lib/validation.rb index 68cb1a1..334efd7 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -34,7 +34,9 @@ module OpenTox nr_unpredicted = 0 predictions.each do |cid,prediction| if prediction[:value] - prediction[:measured] = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s] + tox = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s] + #prediction[:measured] = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s][test_set.id.to_s] + prediction[:measured] = tox[test_set.id.to_s] if tox else nr_unpredicted += 1 end -- cgit v1.2.3 From b8bb12c8a163c238d7d4387c1914e2100bb660df Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 12 May 2016 15:23:01 +0200 Subject: enm study import fixed --- lib/validation.rb | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib/validation.rb') diff --git a/lib/validation.rb b/lib/validation.rb index 334efd7..015e718 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -32,9 +32,12 @@ module OpenTox predictions = validation_model.predict test_set.substances predictions.each{|cid,p| p.delete(:neighbors)} nr_unpredicted = 0 + p predictions.size predictions.each do |cid,prediction| + p prediction if prediction[:value] tox = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s] + p tox #prediction[:measured] = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s][test_set.id.to_s] prediction[:measured] = tox[test_set.id.to_s] if tox else @@ -42,6 +45,7 @@ module OpenTox end predictions.delete(cid) unless prediction[:value] and prediction[:measured] end + p predictions.size validation = self.new( :model_id => validation_model.id, :test_dataset_id => test_set.id, -- cgit v1.2.3 From c90644211e214a50f6fdb3a936bf247f45f1f4be Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 13 May 2016 13:38:24 +0200 Subject: compound tests fixed --- lib/validation.rb | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'lib/validation.rb') diff --git a/lib/validation.rb b/lib/validation.rb index 015e718..9122df1 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -32,20 +32,14 @@ module OpenTox predictions = validation_model.predict test_set.substances predictions.each{|cid,p| p.delete(:neighbors)} nr_unpredicted = 0 - p predictions.size predictions.each do |cid,prediction| - p prediction if prediction[:value] - tox = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s] - p tox - #prediction[:measured] = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s][test_set.id.to_s] - prediction[:measured] = tox[test_set.id.to_s] if tox + prediction[:measured] = test_set.values(cid, prediction[:prediction_feature_id]) else nr_unpredicted += 1 end - predictions.delete(cid) unless prediction[:value] and prediction[:measured] end - p predictions.size + predictions.select!{|cid,p| p[:value] and p[:measured]} validation = self.new( :model_id => validation_model.id, :test_dataset_id => test_set.id, -- cgit v1.2.3 From b515a0cfedb887a2af753db6e4a08ae1af430cad Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 31 May 2016 18:08:08 +0200 Subject: cleanup of validation modules/classes --- lib/validation.rb | 72 +++++++++++++------------------------------------------ 1 file changed, 17 insertions(+), 55 deletions(-) (limited to 'lib/validation.rb') diff --git a/lib/validation.rb b/lib/validation.rb index 9122df1..ff9a971 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -1,63 +1,25 @@ module OpenTox - class Validation - - field :model_id, type: BSON::ObjectId - field :prediction_dataset_id, type: BSON::ObjectId - field :crossvalidation_id, type: BSON::ObjectId - field :test_dataset_id, type: BSON::ObjectId - field :nr_instances, type: Integer - field :nr_unpredicted, type: Integer - field :predictions, type: Hash - - def prediction_dataset - Dataset.find prediction_dataset_id - end - - def test_dataset - Dataset.find test_dataset_id - end - - def model - Model::Lazar.find model_id - end - - def self.create model, training_set, test_set, crossvalidation=nil - - atts = model.attributes.dup # do not modify attributes of the original model - atts["_id"] = BSON::ObjectId.new - atts[:training_dataset_id] = training_set.id - validation_model = model.class.create model.prediction_feature, training_set, atts - validation_model.save - predictions = validation_model.predict test_set.substances - predictions.each{|cid,p| p.delete(:neighbors)} - nr_unpredicted = 0 - predictions.each do |cid,prediction| - if prediction[:value] - prediction[:measured] = test_set.values(cid, prediction[:prediction_feature_id]) - else - nr_unpredicted += 1 - end + module Validation + + class Validation + include OpenTox + include Mongoid::Document + include Mongoid::Timestamps + store_in collection: "validations" + field :name, type: String + field :model_id, type: BSON::ObjectId + field :nr_instances, type: Integer + field :nr_unpredicted, type: Integer + field :predictions, type: Hash + field :finished_at, type: Time + + def model + Model::Lazar.find model_id end - predictions.select!{|cid,p| p[:value] and p[:measured]} - validation = self.new( - :model_id => validation_model.id, - :test_dataset_id => test_set.id, - :nr_instances => test_set.substances.size, - :nr_unpredicted => nr_unpredicted, - :predictions => predictions#.sort{|a,b| p a; b[3] <=> a[3]} # sort according to confidence - ) - validation.crossvalidation_id = crossvalidation.id if crossvalidation - validation.save - validation - end - - end - class ClassificationValidation < Validation - end + end - class RegressionValidation < Validation end end -- cgit v1.2.3 From 65b69d4c35890a7a2d2992108f0cf4eb5202dd1b Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 1 Jun 2016 10:37:00 +0200 Subject: validation tests fixed --- lib/validation.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/validation.rb') diff --git a/lib/validation.rb b/lib/validation.rb index ff9a971..ced9596 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -9,9 +9,9 @@ module OpenTox store_in collection: "validations" field :name, type: String field :model_id, type: BSON::ObjectId - field :nr_instances, type: Integer - field :nr_unpredicted, type: Integer - field :predictions, type: Hash + field :nr_instances, type: Integer, default: 0 + field :nr_unpredicted, type: Integer, default: 0 + field :predictions, type: Hash, default: {} field :finished_at, type: Time def model -- cgit v1.2.3