From 09452bba5c407c27721223d126e3f45c12b20a0c Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 13 Oct 2016 22:59:45 +0200 Subject: tests pass --- test/validation-regression.rb | 57 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 test/validation-regression.rb (limited to 'test/validation-regression.rb') diff --git a/test/validation-regression.rb b/test/validation-regression.rb new file mode 100644 index 0000000..efce849 --- /dev/null +++ b/test/validation-regression.rb @@ -0,0 +1,57 @@ +require_relative "setup.rb" + +class ValidationRegressionTest < MiniTest::Test + include OpenTox::Validation + + # defaults + + def test_default_regression_crossvalidation + dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv" + model = Model::Lazar.create training_dataset: dataset + cv = RegressionCrossValidation.create model + assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be smaller than 1.5, this may occur due to an unfavorable training/test set split" + assert cv.mae < 1, "MAE #{cv.mae} should be smaller than 1, this may occur due to an unfavorable training/test set split" + end + + # parameters + + def test_regression_crossvalidation_params + dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv" + algorithms = { + :prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" }, + :descriptors => { :type => "MACCS", }, + :similarity => {:min => 0.7} + } + model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms + assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type] + cv = RegressionCrossValidation.create model + cv.validation_ids.each do |vid| + model = Model::Lazar.find(Validation.find(vid).model_id) + assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type] + assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min] + refute_nil model.training_dataset_id + refute_equal dataset.id, model.training_dataset_id + end + + refute_nil cv.rmse + refute_nil cv.mae + end + + def test_physchem_regression_crossvalidation + training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") + model = Model::Lazar.create training_dataset:training_dataset + cv = RegressionCrossValidation.create model + refute_nil cv.rmse + refute_nil cv.mae + end + + # LOO + + def test_regression_loo_validation + dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") + model = Model::Lazar.create training_dataset: dataset + loo = RegressionLeaveOneOut.create model + assert loo.r_squared > 0.34, "R^2 (#{loo.r_squared}) should be larger than 0.034" + end + +end -- cgit v1.2.3 From 8519274487166d75b3b9ae28e61f7a7be9f7e83c Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 27 Oct 2016 11:58:07 +0200 Subject: probability plot for classification validations --- test/validation-regression.rb | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) (limited to 'test/validation-regression.rb') diff --git a/test/validation-regression.rb b/test/validation-regression.rb index efce849..a0895f9 100644 --- a/test/validation-regression.rb +++ b/test/validation-regression.rb @@ -9,8 +9,9 @@ class ValidationRegressionTest < MiniTest::Test dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv" model = Model::Lazar.create training_dataset: dataset cv = RegressionCrossValidation.create model - assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be smaller than 1.5, this may occur due to an unfavorable training/test set split" - assert cv.mae < 1, "MAE #{cv.mae} should be smaller than 1, this may occur due to an unfavorable training/test set split" + assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be smaller than 1.5, this may occur due to unfavorable training/test set splits" + assert cv.mae < 1.1, "MAE #{cv.mae} should be smaller than 1.1, this may occur due to unfavorable training/test set splits" + assert cv.percent_within_prediction_interval > 80, "Only #{cv.percent_within_prediction_interval.round(2)}% of measurement within prediction interval. This may occur due to unfavorable training/test set splits" end # parameters @@ -54,4 +55,39 @@ class ValidationRegressionTest < MiniTest::Test assert loo.r_squared > 0.34, "R^2 (#{loo.r_squared}) should be larger than 0.034" end + def test_regression_loo_validation_with_feature_selection + dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") + algorithms = { + :descriptors => { + :method => "calculate_properties", + :features => PhysChem.openbabel_descriptors, + }, + :similarity => { + :method => "Algorithm::Similarity.weighted_cosine", + :min => 0.5 + }, + :feature_selection => { + :method => "Algorithm::FeatureSelection.correlation_filter", + }, + } + model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms + assert_raises OpenTox::BadRequestError do + loo = RegressionLeaveOneOut.create model + end + end + + # repeated CV + + def test_repeated_crossvalidation + dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") + model = Model::Lazar.create training_dataset: dataset + repeated_cv = RepeatedCrossValidation.create model + repeated_cv.crossvalidations.each do |cv| + #assert cv.r_squared > 0.34, "R^2 (#{cv.r_squared}) should be larger than 0.034" + #assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split" + end + p repeated_cv + File.open("tmp.png","w+"){|f| f.puts repeated_cv.correlation_plot} + end + end -- cgit v1.2.3 From b6116bc4705066da30668ff3370f3b1c307e44e7 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 11 Nov 2016 13:07:53 +0100 Subject: enm import fixed --- test/validation-regression.rb | 1 - 1 file changed, 1 deletion(-) (limited to 'test/validation-regression.rb') diff --git a/test/validation-regression.rb b/test/validation-regression.rb index a0895f9..7630521 100644 --- a/test/validation-regression.rb +++ b/test/validation-regression.rb @@ -86,7 +86,6 @@ class ValidationRegressionTest < MiniTest::Test #assert cv.r_squared > 0.34, "R^2 (#{cv.r_squared}) should be larger than 0.034" #assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split" end - p repeated_cv File.open("tmp.png","w+"){|f| f.puts repeated_cv.correlation_plot} end -- cgit v1.2.3