From 9d17895ab9e8cd31e0f32e8e622e13612ea5ff77 Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Fri, 12 Oct 2018 21:58:36 +0200 Subject: validation statistic fixes --- test/regression-validation.rb | 91 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 test/regression-validation.rb (limited to 'test/regression-validation.rb') diff --git a/test/regression-validation.rb b/test/regression-validation.rb new file mode 100644 index 0000000..44162c0 --- /dev/null +++ b/test/regression-validation.rb @@ -0,0 +1,91 @@ +require_relative "setup.rb" + +class ValidationRegressionTest < MiniTest::Test + include OpenTox::Validation + + # defaults + + def test_default_regression_crossvalidation + dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM_log10.csv" + model = Model::Lazar.create training_dataset: dataset + cv = RegressionCrossValidation.create model + assert cv.rmse[:all] < 1.5, "RMSE #{cv.rmse[:all]} should be smaller than 1.5, this may occur due to unfavorable training/test set splits" + assert cv.mae[:all] < 1.1, "MAE #{cv.mae[:all]} should be smaller than 1.1, this may occur due to unfavorable training/test set splits" + assert cv.within_prediction_interval[:all]/cv.nr_predictions[:all] > 0.8, "Only #{(100*cv.within_prediction_interval[:all]/cv.nr_predictions[:all]).round(2)}% of measurement within prediction interval. This may occur due to unfavorable training/test set splits" + end + + # parameters + + def test_regression_crossvalidation_params + dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv" + algorithms = { + :prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" }, + :descriptors => { :type => "MACCS", }, + :similarity => {:min => 0.7} + } + model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms + assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type] + cv = RegressionCrossValidation.create model + cv.validation_ids.each do |vid| + model = Model::Lazar.find(Validation.find(vid).model_id) + assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type] + assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min] + refute_nil model.training_dataset_id + refute_equal dataset.id, model.training_dataset_id + end + + refute_nil cv.rmse[:all] + refute_nil cv.mae[:all] + end + + def test_physchem_regression_crossvalidation + training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") + model = Model::Lazar.create training_dataset:training_dataset + cv = RegressionCrossValidation.create model + refute_nil cv.rmse[:all] + refute_nil cv.mae[:all] + end + + # LOO + + def test_regression_loo_validation + dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") + model = Model::Lazar.create training_dataset: dataset + loo = RegressionLeaveOneOut.create model + assert loo.r_squared[:all] > 0.34, "R^2 (#{loo.r_squared[:all]}) should be larger than 0.034" + end + + def test_regression_loo_validation_with_feature_selection + dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") + algorithms = { + :descriptors => { + :method => "calculate_properties", + :features => PhysChem.openbabel_descriptors, + }, + :similarity => { + :method => "Algorithm::Similarity.weighted_cosine", + :min => 0.5 + }, + :feature_selection => { + :method => "Algorithm::FeatureSelection.correlation_filter", + }, + } + model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms + assert_raises OpenTox::BadRequestError do + loo = RegressionLeaveOneOut.create model + end + end + + # repeated CV + + def test_repeated_crossvalidation + dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") + model = Model::Lazar.create training_dataset: dataset + repeated_cv = RepeatedCrossValidation.create model + repeated_cv.crossvalidations.each do |cv| + assert cv.r_squared[:all] > 0.34, "R^2 (#{cv.r_squared[:all]}) should be larger than 0.034" + assert cv.rmse[:all] < 1.5, "RMSE (#{cv.rmse[:all]}) should be smaller than 0.5" + end + end + +end -- cgit v1.2.3 From 3a9c9332b660d35720ad4fa1f55ee0883e53aecd Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Fri, 2 Nov 2018 20:34:44 +0100 Subject: warnings fixed, cleanup --- test/regression-validation.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test/regression-validation.rb') diff --git a/test/regression-validation.rb b/test/regression-validation.rb index 44162c0..9418df4 100644 --- a/test/regression-validation.rb +++ b/test/regression-validation.rb @@ -1,6 +1,6 @@ require_relative "setup.rb" -class ValidationRegressionTest < MiniTest::Test +class RegressionValidationTest < MiniTest::Test include OpenTox::Validation # defaults -- cgit v1.2.3 From 7e547fd4a296f497615a7805d565b378cb1bd7cd Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Wed, 14 Nov 2018 17:33:44 +0100 Subject: bad_request_error substituted with ArgumentError --- test/regression-validation.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test/regression-validation.rb') diff --git a/test/regression-validation.rb b/test/regression-validation.rb index 9418df4..afa4278 100644 --- a/test/regression-validation.rb +++ b/test/regression-validation.rb @@ -71,7 +71,7 @@ class RegressionValidationTest < MiniTest::Test }, } model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms - assert_raises OpenTox::BadRequestError do + assert_raises ArgumentError do loo = RegressionLeaveOneOut.create model end end -- cgit v1.2.3 From 0882c2cd0de934d7377fc9d08c306be98612c88a Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Fri, 16 Nov 2018 18:42:42 +0100 Subject: real datasets for testing, test data cleanup, Daphnia import, upper and lower similarity thresholds --- test/regression-validation.rb | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'test/regression-validation.rb') diff --git a/test/regression-validation.rb b/test/regression-validation.rb index afa4278..7dbe354 100644 --- a/test/regression-validation.rb +++ b/test/regression-validation.rb @@ -6,7 +6,8 @@ class RegressionValidationTest < MiniTest::Test # defaults def test_default_regression_crossvalidation - dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM_log10.csv" + training_dataset = Dataset.from_csv_file File.join(Download::DATA, "Acute_toxicity-Fathead_minnow.csv") + dataset = Dataset.from_csv_file File.join(Download::DATA, "Acute_toxicity-Fathead_minnow.csv") model = Model::Lazar.create training_dataset: dataset cv = RegressionCrossValidation.create model assert cv.rmse[:all] < 1.5, "RMSE #{cv.rmse[:all]} should be smaller than 1.5, this may occur due to unfavorable training/test set splits" @@ -21,7 +22,7 @@ class RegressionValidationTest < MiniTest::Test algorithms = { :prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" }, :descriptors => { :type => "MACCS", }, - :similarity => {:min => 0.7} + :similarity => {:min => [0.9,0.1]} } model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type] @@ -64,7 +65,7 @@ class RegressionValidationTest < MiniTest::Test }, :similarity => { :method => "Algorithm::Similarity.weighted_cosine", - :min => 0.5 + :min => [0.5,0.1] }, :feature_selection => { :method => "Algorithm::FeatureSelection.correlation_filter", @@ -83,7 +84,7 @@ class RegressionValidationTest < MiniTest::Test model = Model::Lazar.create training_dataset: dataset repeated_cv = RepeatedCrossValidation.create model repeated_cv.crossvalidations.each do |cv| - assert cv.r_squared[:all] > 0.34, "R^2 (#{cv.r_squared[:all]}) should be larger than 0.034" + assert cv.r_squared[:all] > 0.34, "R^2 (#{cv.r_squared[:all]}) should be larger than 0.34" assert cv.rmse[:all] < 1.5, "RMSE (#{cv.rmse[:all]}) should be smaller than 0.5" end end -- cgit v1.2.3 From c12d5bb40ab2a0783f755c3238a20448b9a5a42e Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Fri, 16 Nov 2018 22:17:55 +0100 Subject: minor test fixes --- test/regression-validation.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test/regression-validation.rb') diff --git a/test/regression-validation.rb b/test/regression-validation.rb index 7dbe354..65bec63 100644 --- a/test/regression-validation.rb +++ b/test/regression-validation.rb @@ -12,7 +12,7 @@ class RegressionValidationTest < MiniTest::Test cv = RegressionCrossValidation.create model assert cv.rmse[:all] < 1.5, "RMSE #{cv.rmse[:all]} should be smaller than 1.5, this may occur due to unfavorable training/test set splits" assert cv.mae[:all] < 1.1, "MAE #{cv.mae[:all]} should be smaller than 1.1, this may occur due to unfavorable training/test set splits" - assert cv.within_prediction_interval[:all]/cv.nr_predictions[:all] > 0.8, "Only #{(100*cv.within_prediction_interval[:all]/cv.nr_predictions[:all]).round(2)}% of measurement within prediction interval. This may occur due to unfavorable training/test set splits" + assert cv.within_prediction_interval[:all]/cv.nr_predictions[:all].to_f > 0.8, "Only #{(100.0*cv.within_prediction_interval[:all]/cv.nr_predictions[:all]).round(2)}% of measurement within prediction interval. This may occur due to unfavorable training/test set splits" end # parameters -- cgit v1.2.3 From b536a45cf18b070cec3f9cb8a44fdac0bfa3c58e Mon Sep 17 00:00:00 2001 From: gebele Date: Thu, 27 Jun 2019 14:08:57 +0000 Subject: fixed confidence value for cv stats; added tests --- test/regression-validation.rb | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'test/regression-validation.rb') diff --git a/test/regression-validation.rb b/test/regression-validation.rb index 65bec63..94ef7b5 100644 --- a/test/regression-validation.rb +++ b/test/regression-validation.rb @@ -86,6 +86,20 @@ class RegressionValidationTest < MiniTest::Test repeated_cv.crossvalidations.each do |cv| assert cv.r_squared[:all] > 0.34, "R^2 (#{cv.r_squared[:all]}) should be larger than 0.34" assert cv.rmse[:all] < 1.5, "RMSE (#{cv.rmse[:all]}) should be smaller than 0.5" + keys = cv.rmse.keys + types = ["rmse", \ + "r_squared", \ + "mae", \ + "nr_predictions", \ + "within_prediction_interval", \ + "out_of_prediction_interval" + ] + types.each do |type| + keys.each do |key| + refute_nil cv[type][key] + assert cv[type][key] > 0 + end + end end end -- cgit v1.2.3 From 4a8cdac587ae464244e9ae4bffc2597b6bd07101 Mon Sep 17 00:00:00 2001 From: gebele Date: Tue, 2 Jul 2019 09:04:47 +0000 Subject: adding text message for assertions; remove value check for regression test, see comment --- test/regression-validation.rb | 3 +++ 1 file changed, 3 insertions(+) (limited to 'test/regression-validation.rb') diff --git a/test/regression-validation.rb b/test/regression-validation.rb index 94ef7b5..9a2da8f 100644 --- a/test/regression-validation.rb +++ b/test/regression-validation.rb @@ -86,6 +86,8 @@ class RegressionValidationTest < MiniTest::Test repeated_cv.crossvalidations.each do |cv| assert cv.r_squared[:all] > 0.34, "R^2 (#{cv.r_squared[:all]}) should be larger than 0.34" assert cv.rmse[:all] < 1.5, "RMSE (#{cv.rmse[:all]}) should be smaller than 0.5" +=begin + #actually some values can be 0 or nil depending on the random folds in this small dataset keys = cv.rmse.keys types = ["rmse", \ "r_squared", \ @@ -100,6 +102,7 @@ class RegressionValidationTest < MiniTest::Test assert cv[type][key] > 0 end end +=end end end -- cgit v1.2.3