From a8368dda776c05331474adf7eaf9a6e413a3b1eb Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 13 Apr 2016 15:15:51 +0200 Subject: validation tests pass --- test/validation.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'test/validation.rb') diff --git a/test/validation.rb b/test/validation.rb index d8eea59..e702278 100644 --- a/test/validation.rb +++ b/test/validation.rb @@ -8,15 +8,15 @@ class ValidationTest < MiniTest::Test dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" model = Model::LazarClassification.create dataset cv = ClassificationCrossValidation.create model - assert cv.accuracy > 0.7, "Accuracy (#{cv.accuracy}) should be larger than 0.7" + assert cv.accuracy > 0.7, "Accuracy (#{cv.accuracy}) should be larger than 0.7, this may occur due to an unfavorable training/test set split" end def test_default_regression_crossvalidation dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv" model = Model::LazarRegression.create dataset cv = RegressionCrossValidation.create model - assert cv.rmse < 1.5, "RMSE > 1.5" - assert cv.mae < 1 + assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be larger than 1.5, this may occur due to an unfavorable training/test set split" + assert cv.mae < 1, "MAE #{cv.mae} should be larger than 1, this may occur due to an unfavorable training/test set split" end # parameters -- cgit v1.2.3 From 8aab046eb1ad39aaf10c5a8596102c35c7b2ee0b Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 15 Apr 2016 11:01:16 +0200 Subject: data_entries removed from datasets. datasets are now just containers for compounds and features, feature values have to be retrieved from substances. --- test/validation.rb | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'test/validation.rb') diff --git a/test/validation.rb b/test/validation.rb index e702278..baee2d1 100644 --- a/test/validation.rb +++ b/test/validation.rb @@ -6,14 +6,14 @@ class ValidationTest < MiniTest::Test def test_default_classification_crossvalidation dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - model = Model::LazarClassification.create dataset + model = Model::LazarClassification.create dataset.features.first, dataset cv = ClassificationCrossValidation.create model assert cv.accuracy > 0.7, "Accuracy (#{cv.accuracy}) should be larger than 0.7, this may occur due to an unfavorable training/test set split" end def test_default_regression_crossvalidation dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv" - model = Model::LazarRegression.create dataset + model = Model::LazarRegression.create dataset.features.first, dataset cv = RegressionCrossValidation.create model assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be larger than 1.5, this may occur due to an unfavorable training/test set split" assert cv.mae < 1, "MAE #{cv.mae} should be larger than 1, this may occur due to an unfavorable training/test set split" @@ -30,7 +30,7 @@ class ValidationTest < MiniTest::Test :type => "FP3" } } - model = Model::LazarClassification.create dataset, params + model = Model::LazarClassification.create dataset.features.first, dataset, params model.save cv = ClassificationCrossValidation.create model params = model.neighbor_algorithm_parameters @@ -54,7 +54,7 @@ class ValidationTest < MiniTest::Test :min_sim => 0.7, } } - model = Model::LazarRegression.create dataset, params + model = Model::LazarRegression.create dataset.features.first, dataset, params cv = RegressionCrossValidation.create model cv.validation_ids.each do |vid| model = Model::Lazar.find(Validation.find(vid).model_id) @@ -70,7 +70,7 @@ class ValidationTest < MiniTest::Test def test_physchem_regression_crossvalidation training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv") - model = Model::LazarRegression.create(training_dataset, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression") + model = Model::LazarRegression.create(training_dataset.features.first, training_dataset, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression") cv = RegressionCrossValidation.create model refute_nil cv.rmse refute_nil cv.mae @@ -80,7 +80,7 @@ class ValidationTest < MiniTest::Test def test_classification_loo_validation dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - model = Model::LazarClassification.create dataset + model = Model::LazarClassification.create dataset.features.first, dataset loo = ClassificationLeaveOneOutValidation.create model assert_equal 14, loo.nr_unpredicted refute_empty loo.confusion_matrix @@ -89,7 +89,7 @@ class ValidationTest < MiniTest::Test def test_regression_loo_validation dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv") - model = Model::LazarRegression.create dataset + model = Model::LazarRegression.create dataset.features.first, dataset loo = RegressionLeaveOneOutValidation.create model assert loo.r_squared > 0.34 end @@ -98,7 +98,7 @@ class ValidationTest < MiniTest::Test def test_repeated_crossvalidation dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - model = Model::LazarClassification.create dataset + model = Model::LazarClassification.create dataset.features.first, dataset repeated_cv = RepeatedCrossValidation.create model repeated_cv.crossvalidations.each do |cv| assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split" -- cgit v1.2.3 From cfc64a2966ab38698e499f0b44f41208ee77a07f Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 26 Apr 2016 17:38:15 +0200 Subject: first nanomaterial prediction --- test/validation.rb | 2 ++ 1 file changed, 2 insertions(+) (limited to 'test/validation.rb') diff --git a/test/validation.rb b/test/validation.rb index baee2d1..cbc7d09 100644 --- a/test/validation.rb +++ b/test/validation.rb @@ -9,6 +9,7 @@ class ValidationTest < MiniTest::Test model = Model::LazarClassification.create dataset.features.first, dataset cv = ClassificationCrossValidation.create model assert cv.accuracy > 0.7, "Accuracy (#{cv.accuracy}) should be larger than 0.7, this may occur due to an unfavorable training/test set split" + assert cv.weighted_accuracy > cv.accuracy, "Weighted accuracy (#{cv.weighted_accuracy}) should be larger than accuracy (#{cv.accuracy})." end def test_default_regression_crossvalidation @@ -85,6 +86,7 @@ class ValidationTest < MiniTest::Test assert_equal 14, loo.nr_unpredicted refute_empty loo.confusion_matrix assert loo.accuracy > 0.77 + assert loo.weighted_accuracy > loo.accuracy, "Weighted accuracy (#{loo.weighted_accuracy}) should be larger than accuracy (#{loo.accuracy})." end def test_regression_loo_validation -- cgit v1.2.3 From 48234554ea99b972a01718ac36c4e8332dd9159b Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sat, 7 May 2016 10:34:03 +0200 Subject: -log10 for regression datasets, test cleanups --- test/validation.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'test/validation.rb') diff --git a/test/validation.rb b/test/validation.rb index cbc7d09..021fac5 100644 --- a/test/validation.rb +++ b/test/validation.rb @@ -13,7 +13,7 @@ class ValidationTest < MiniTest::Test end def test_default_regression_crossvalidation - dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv" + dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv" model = Model::LazarRegression.create dataset.features.first, dataset cv = RegressionCrossValidation.create model assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be larger than 1.5, this may occur due to an unfavorable training/test set split" @@ -46,7 +46,7 @@ class ValidationTest < MiniTest::Test end def test_regression_crossvalidation_params - dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv" + dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv" params = { :prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "fingerprint_neighbors", @@ -70,7 +70,7 @@ class ValidationTest < MiniTest::Test def test_physchem_regression_crossvalidation - training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv") + training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") model = Model::LazarRegression.create(training_dataset.features.first, training_dataset, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression") cv = RegressionCrossValidation.create model refute_nil cv.rmse @@ -90,10 +90,10 @@ class ValidationTest < MiniTest::Test end def test_regression_loo_validation - dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv") + dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") model = Model::LazarRegression.create dataset.features.first, dataset loo = RegressionLeaveOneOutValidation.create model - assert loo.r_squared > 0.34 + assert loo.r_squared > 0.34, "R^2 (#{loo.r_squared}) should be larger than 0.034" end # repeated CV -- cgit v1.2.3 From ab652ac85036c5b372e7f1a08cdb75a19db5b19a Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sun, 8 May 2016 12:57:10 +0200 Subject: regression crossvalidation fixed --- test/validation.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test/validation.rb') diff --git a/test/validation.rb b/test/validation.rb index 021fac5..8ebb52c 100644 --- a/test/validation.rb +++ b/test/validation.rb @@ -25,7 +25,6 @@ class ValidationTest < MiniTest::Test def test_classification_crossvalidation_parameters dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" params = { - :training_dataset_id => dataset.id, :neighbor_algorithm_parameters => { :min_sim => 0.3, :type => "FP3" @@ -56,6 +55,7 @@ class ValidationTest < MiniTest::Test } } model = Model::LazarRegression.create dataset.features.first, dataset, params + p model cv = RegressionCrossValidation.create model cv.validation_ids.each do |vid| model = Model::Lazar.find(Validation.find(vid).model_id) -- cgit v1.2.3 From c1be8fe66f640d44dbbc9bfe5212733994bfb9c5 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 9 May 2016 15:44:29 +0200 Subject: physchem crossvalidation fixed, test_compound_descriptor_parameters assertions fixed --- test/validation.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test/validation.rb') diff --git a/test/validation.rb b/test/validation.rb index 8ebb52c..ed19fee 100644 --- a/test/validation.rb +++ b/test/validation.rb @@ -17,7 +17,7 @@ class ValidationTest < MiniTest::Test model = Model::LazarRegression.create dataset.features.first, dataset cv = RegressionCrossValidation.create model assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be larger than 1.5, this may occur due to an unfavorable training/test set split" - assert cv.mae < 1, "MAE #{cv.mae} should be larger than 1, this may occur due to an unfavorable training/test set split" + assert cv.mae < 1, "MAE #{cv.mae} should be smaller than 1, this may occur due to an unfavorable training/test set split" end # parameters -- cgit v1.2.3 From cc08e6beda7f7d70ebf6c6929a22d1a0cd7c1a20 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 24 May 2016 15:41:24 +0200 Subject: tests fixed. DescriptorTest#test_compound_all may fail within all.rb --- test/validation.rb | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'test/validation.rb') diff --git a/test/validation.rb b/test/validation.rb index ed19fee..39314da 100644 --- a/test/validation.rb +++ b/test/validation.rb @@ -34,13 +34,16 @@ class ValidationTest < MiniTest::Test model.save cv = ClassificationCrossValidation.create model params = model.neighbor_algorithm_parameters - params.delete :training_dataset_id params = Hash[params.map{ |k, v| [k.to_s, v] }] # convert symbols to string cv.validations.each do |validation| validation_params = validation.model.neighbor_algorithm_parameters - validation_params.delete "training_dataset_id" - assert_equal params, validation_params + refute_nil params["dataset_id"] + refute_nil validation_params[:dataset_id] + refute_equal params["dataset_id"], validation_params[:dataset_id] + ["min_sim","type","prediction_feature_id"].each do |k| + assert_equal params[k], validation_params[k] + end end end @@ -55,13 +58,14 @@ class ValidationTest < MiniTest::Test } } model = Model::LazarRegression.create dataset.features.first, dataset, params - p model cv = RegressionCrossValidation.create model cv.validation_ids.each do |vid| model = Model::Lazar.find(Validation.find(vid).model_id) assert_equal params[:neighbor_algorithm_parameters][:type], model[:neighbor_algorithm_parameters][:type] assert_equal params[:neighbor_algorithm_parameters][:min_sim], model[:neighbor_algorithm_parameters][:min_sim] - refute_equal params[:neighbor_algorithm_parameters][:training_dataset_id], model[:neighbor_algorithm_parameters][:training_dataset_id] + refute_nil model[:neighbor_algorithm_parameters][:dataset_id] + refute_equal dataset.id, model[:neighbor_algorithm_parameters][:dataset_id] + assert_equal model.training_dataset_id, model[:neighbor_algorithm_parameters][:dataset_id] end refute_nil cv.rmse -- cgit v1.2.3 From b515a0cfedb887a2af753db6e4a08ae1af430cad Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 31 May 2016 18:08:08 +0200 Subject: cleanup of validation modules/classes --- test/validation.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'test/validation.rb') diff --git a/test/validation.rb b/test/validation.rb index 39314da..a259472 100644 --- a/test/validation.rb +++ b/test/validation.rb @@ -1,6 +1,7 @@ require_relative "setup.rb" class ValidationTest < MiniTest::Test + include OpenTox::Validation # defaults @@ -86,7 +87,7 @@ class ValidationTest < MiniTest::Test def test_classification_loo_validation dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" model = Model::LazarClassification.create dataset.features.first, dataset - loo = ClassificationLeaveOneOutValidation.create model + loo = ClassificationLeaveOneOut.create model assert_equal 14, loo.nr_unpredicted refute_empty loo.confusion_matrix assert loo.accuracy > 0.77 @@ -96,7 +97,7 @@ class ValidationTest < MiniTest::Test def test_regression_loo_validation dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") model = Model::LazarRegression.create dataset.features.first, dataset - loo = RegressionLeaveOneOutValidation.create model + loo = RegressionLeaveOneOut.create model assert loo.r_squared > 0.34, "R^2 (#{loo.r_squared}) should be larger than 0.034" end -- cgit v1.2.3 From 65b69d4c35890a7a2d2992108f0cf4eb5202dd1b Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 1 Jun 2016 10:37:00 +0200 Subject: validation tests fixed --- test/validation.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test/validation.rb') diff --git a/test/validation.rb b/test/validation.rb index a259472..4d0c372 100644 --- a/test/validation.rb +++ b/test/validation.rb @@ -59,6 +59,7 @@ class ValidationTest < MiniTest::Test } } model = Model::LazarRegression.create dataset.features.first, dataset, params + assert_equal params[:neighbor_algorithm_parameters][:type], model[:neighbor_algorithm_parameters][:type] cv = RegressionCrossValidation.create model cv.validation_ids.each do |vid| model = Model::Lazar.find(Validation.find(vid).model_id) @@ -74,7 +75,6 @@ class ValidationTest < MiniTest::Test end def test_physchem_regression_crossvalidation - training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") model = Model::LazarRegression.create(training_dataset.features.first, training_dataset, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression") cv = RegressionCrossValidation.create model -- cgit v1.2.3 From 0f31c884d1bcfa448a1bf43a41d8fd6cf88bfc52 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 8 Jun 2016 18:26:07 +0200 Subject: compound tests fixed --- test/validation.rb | 1 + 1 file changed, 1 insertion(+) (limited to 'test/validation.rb') diff --git a/test/validation.rb b/test/validation.rb index 4d0c372..b4f5a92 100644 --- a/test/validation.rb +++ b/test/validation.rb @@ -75,6 +75,7 @@ class ValidationTest < MiniTest::Test end def test_physchem_regression_crossvalidation + skip # TODO: fix training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") model = Model::LazarRegression.create(training_dataset.features.first, training_dataset, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression") cv = RegressionCrossValidation.create model -- cgit v1.2.3 From 4348eec89033e6677c9f628646fc67bd03c73fe6 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 6 Oct 2016 19:14:10 +0200 Subject: nano caret regression fixed --- test/validation.rb | 61 ++++++++++++++++++++++++------------------------------ 1 file changed, 27 insertions(+), 34 deletions(-) (limited to 'test/validation.rb') diff --git a/test/validation.rb b/test/validation.rb index b4f5a92..03adf69 100644 --- a/test/validation.rb +++ b/test/validation.rb @@ -7,7 +7,7 @@ class ValidationTest < MiniTest::Test def test_default_classification_crossvalidation dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - model = Model::LazarClassification.create dataset.features.first, dataset + model = Model::Lazar.create training_dataset: dataset cv = ClassificationCrossValidation.create model assert cv.accuracy > 0.7, "Accuracy (#{cv.accuracy}) should be larger than 0.7, this may occur due to an unfavorable training/test set split" assert cv.weighted_accuracy > cv.accuracy, "Weighted accuracy (#{cv.weighted_accuracy}) should be larger than accuracy (#{cv.accuracy})." @@ -15,9 +15,9 @@ class ValidationTest < MiniTest::Test def test_default_regression_crossvalidation dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv" - model = Model::LazarRegression.create dataset.features.first, dataset + model = Model::Lazar.create training_dataset: dataset cv = RegressionCrossValidation.create model - assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be larger than 1.5, this may occur due to an unfavorable training/test set split" + assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be smaller than 1.5, this may occur due to an unfavorable training/test set split" assert cv.mae < 1, "MAE #{cv.mae} should be smaller than 1, this may occur due to an unfavorable training/test set split" end @@ -25,23 +25,20 @@ class ValidationTest < MiniTest::Test def test_classification_crossvalidation_parameters dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - params = { - :neighbor_algorithm_parameters => { - :min_sim => 0.3, - :type => "FP3" - } + algorithms = { + :similarity => { :min => 0.3, }, + :descriptors => { :type => "FP3" } } - model = Model::LazarClassification.create dataset.features.first, dataset, params - model.save + model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms cv = ClassificationCrossValidation.create model - params = model.neighbor_algorithm_parameters + params = model.algorithms params = Hash[params.map{ |k, v| [k.to_s, v] }] # convert symbols to string - + cv.validations.each do |validation| - validation_params = validation.model.neighbor_algorithm_parameters - refute_nil params["dataset_id"] - refute_nil validation_params[:dataset_id] - refute_equal params["dataset_id"], validation_params[:dataset_id] + validation_params = validation.model.algorithms + refute_nil model.training_dataset_id + refute_nil validation.model.training_dataset_id + refute_equal model.training_dataset_id, validation.model.training_dataset_id ["min_sim","type","prediction_feature_id"].each do |k| assert_equal params[k], validation_params[k] end @@ -50,24 +47,20 @@ class ValidationTest < MiniTest::Test def test_regression_crossvalidation_params dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv" - params = { - :prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", - :neighbor_algorithm => "fingerprint_neighbors", - :neighbor_algorithm_parameters => { - :type => "MACCS", - :min_sim => 0.7, - } + algorithms = { + :prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" }, + :descriptors => { :type => "MACCS", }, + :similarity => {:min => 0.7} } - model = Model::LazarRegression.create dataset.features.first, dataset, params - assert_equal params[:neighbor_algorithm_parameters][:type], model[:neighbor_algorithm_parameters][:type] + model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms + assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type] cv = RegressionCrossValidation.create model cv.validation_ids.each do |vid| model = Model::Lazar.find(Validation.find(vid).model_id) - assert_equal params[:neighbor_algorithm_parameters][:type], model[:neighbor_algorithm_parameters][:type] - assert_equal params[:neighbor_algorithm_parameters][:min_sim], model[:neighbor_algorithm_parameters][:min_sim] - refute_nil model[:neighbor_algorithm_parameters][:dataset_id] - refute_equal dataset.id, model[:neighbor_algorithm_parameters][:dataset_id] - assert_equal model.training_dataset_id, model[:neighbor_algorithm_parameters][:dataset_id] + assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type] + assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min] + refute_nil model.training_dataset_id + refute_equal dataset.id, model.training_dataset_id end refute_nil cv.rmse @@ -77,7 +70,7 @@ class ValidationTest < MiniTest::Test def test_physchem_regression_crossvalidation skip # TODO: fix training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") - model = Model::LazarRegression.create(training_dataset.features.first, training_dataset, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression") + model = Model::Lazar.create(training_dataset.features.first, training_dataset, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression") cv = RegressionCrossValidation.create model refute_nil cv.rmse refute_nil cv.mae @@ -87,7 +80,7 @@ class ValidationTest < MiniTest::Test def test_classification_loo_validation dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - model = Model::LazarClassification.create dataset.features.first, dataset + model = Model::Lazar.create training_dataset: dataset loo = ClassificationLeaveOneOut.create model assert_equal 14, loo.nr_unpredicted refute_empty loo.confusion_matrix @@ -97,7 +90,7 @@ class ValidationTest < MiniTest::Test def test_regression_loo_validation dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") - model = Model::LazarRegression.create dataset.features.first, dataset + model = Model::Lazar.create training_dataset: dataset loo = RegressionLeaveOneOut.create model assert loo.r_squared > 0.34, "R^2 (#{loo.r_squared}) should be larger than 0.034" end @@ -106,7 +99,7 @@ class ValidationTest < MiniTest::Test def test_repeated_crossvalidation dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - model = Model::LazarClassification.create dataset.features.first, dataset + model = Model::Lazar.create training_dataset: dataset repeated_cv = RepeatedCrossValidation.create model repeated_cv.crossvalidations.each do |cv| assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split" -- cgit v1.2.3 From 2dc66aef3b7932105868ee8c7d32ad975e142d1b Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 13 Oct 2016 19:48:21 +0200 Subject: compound tests fixed --- test/validation.rb | 109 ----------------------------------------------------- 1 file changed, 109 deletions(-) delete mode 100644 test/validation.rb (limited to 'test/validation.rb') diff --git a/test/validation.rb b/test/validation.rb deleted file mode 100644 index 03adf69..0000000 --- a/test/validation.rb +++ /dev/null @@ -1,109 +0,0 @@ -require_relative "setup.rb" - -class ValidationTest < MiniTest::Test - include OpenTox::Validation - - # defaults - - def test_default_classification_crossvalidation - dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - model = Model::Lazar.create training_dataset: dataset - cv = ClassificationCrossValidation.create model - assert cv.accuracy > 0.7, "Accuracy (#{cv.accuracy}) should be larger than 0.7, this may occur due to an unfavorable training/test set split" - assert cv.weighted_accuracy > cv.accuracy, "Weighted accuracy (#{cv.weighted_accuracy}) should be larger than accuracy (#{cv.accuracy})." - end - - def test_default_regression_crossvalidation - dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv" - model = Model::Lazar.create training_dataset: dataset - cv = RegressionCrossValidation.create model - assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be smaller than 1.5, this may occur due to an unfavorable training/test set split" - assert cv.mae < 1, "MAE #{cv.mae} should be smaller than 1, this may occur due to an unfavorable training/test set split" - end - - # parameters - - def test_classification_crossvalidation_parameters - dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - algorithms = { - :similarity => { :min => 0.3, }, - :descriptors => { :type => "FP3" } - } - model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms - cv = ClassificationCrossValidation.create model - params = model.algorithms - params = Hash[params.map{ |k, v| [k.to_s, v] }] # convert symbols to string - - cv.validations.each do |validation| - validation_params = validation.model.algorithms - refute_nil model.training_dataset_id - refute_nil validation.model.training_dataset_id - refute_equal model.training_dataset_id, validation.model.training_dataset_id - ["min_sim","type","prediction_feature_id"].each do |k| - assert_equal params[k], validation_params[k] - end - end - end - - def test_regression_crossvalidation_params - dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv" - algorithms = { - :prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" }, - :descriptors => { :type => "MACCS", }, - :similarity => {:min => 0.7} - } - model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms - assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type] - cv = RegressionCrossValidation.create model - cv.validation_ids.each do |vid| - model = Model::Lazar.find(Validation.find(vid).model_id) - assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type] - assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min] - refute_nil model.training_dataset_id - refute_equal dataset.id, model.training_dataset_id - end - - refute_nil cv.rmse - refute_nil cv.mae - end - - def test_physchem_regression_crossvalidation - skip # TODO: fix - training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") - model = Model::Lazar.create(training_dataset.features.first, training_dataset, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression") - cv = RegressionCrossValidation.create model - refute_nil cv.rmse - refute_nil cv.mae - end - - # LOO - - def test_classification_loo_validation - dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - model = Model::Lazar.create training_dataset: dataset - loo = ClassificationLeaveOneOut.create model - assert_equal 14, loo.nr_unpredicted - refute_empty loo.confusion_matrix - assert loo.accuracy > 0.77 - assert loo.weighted_accuracy > loo.accuracy, "Weighted accuracy (#{loo.weighted_accuracy}) should be larger than accuracy (#{loo.accuracy})." - end - - def test_regression_loo_validation - dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") - model = Model::Lazar.create training_dataset: dataset - loo = RegressionLeaveOneOut.create model - assert loo.r_squared > 0.34, "R^2 (#{loo.r_squared}) should be larger than 0.034" - end - - # repeated CV - - def test_repeated_crossvalidation - dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - model = Model::Lazar.create training_dataset: dataset - repeated_cv = RepeatedCrossValidation.create model - repeated_cv.crossvalidations.each do |cv| - assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split" - end - end - -end -- cgit v1.2.3