From 2b0a7c725b23d8ef3f525b25fc7105de57ee3897 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 15 Mar 2016 18:53:12 +0100 Subject: validation test cleanup --- test/validation.rb | 81 +++++++++++++++++++++++++++--------------------------- 1 file changed, 40 insertions(+), 41 deletions(-) (limited to 'test') diff --git a/test/validation.rb b/test/validation.rb index c803c92..d8eea59 100644 --- a/test/validation.rb +++ b/test/validation.rb @@ -2,6 +2,8 @@ require_relative "setup.rb" class ValidationTest < MiniTest::Test + # defaults + def test_default_classification_crossvalidation dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" model = Model::LazarClassification.create dataset @@ -17,48 +19,9 @@ class ValidationTest < MiniTest::Test assert cv.mae < 1 end - def test_regression_crossvalidation - dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv" - params = { - :prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", - :neighbor_algorithm => "fingerprint_neighbors", - :neighbor_algorithm_parameters => { - :type => "MACCS", - :min_sim => 0.7, - } - } - model = Model::LazarRegression.create dataset, params - cv = RegressionCrossValidation.create model - cv.validation_ids.each do |vid| - model = Model::Lazar.find(Validation.find(vid).model_id) - assert_equal params[:neighbor_algorithm_parameters][:type], model[:neighbor_algorithm_parameters][:type] - assert_equal params[:neighbor_algorithm_parameters][:min_sim], model[:neighbor_algorithm_parameters][:min_sim] - refute_equal params[:neighbor_algorithm_parameters][:training_dataset_id], model[:neighbor_algorithm_parameters][:training_dataset_id] - end + # parameters - refute_nil cv.rmse - refute_nil cv.mae - end - - def test_pls_regression_crossvalidation - dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv" - params = { :prediction_algorithm => "OpenTox::Algorithm::Regression.local_fingerprint_regression", } - model = Model::LazarRegression.create dataset, params - cv = RegressionCrossValidation.create model - assert cv.rmse < 1.5, "RMSE > 1.5" - assert cv.mae < 1 - end - - def test_repeated_crossvalidation - dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - model = Model::LazarClassification.create dataset - repeated_cv = RepeatedCrossValidation.create model - repeated_cv.crossvalidations.each do |cv| - assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split" - end - end - - def test_crossvalidation_parameters + def test_classification_crossvalidation_parameters dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" params = { :training_dataset_id => dataset.id, @@ -80,6 +43,29 @@ class ValidationTest < MiniTest::Test assert_equal params, validation_params end end + + def test_regression_crossvalidation_params + dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv" + params = { + :prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", + :neighbor_algorithm => "fingerprint_neighbors", + :neighbor_algorithm_parameters => { + :type => "MACCS", + :min_sim => 0.7, + } + } + model = Model::LazarRegression.create dataset, params + cv = RegressionCrossValidation.create model + cv.validation_ids.each do |vid| + model = Model::Lazar.find(Validation.find(vid).model_id) + assert_equal params[:neighbor_algorithm_parameters][:type], model[:neighbor_algorithm_parameters][:type] + assert_equal params[:neighbor_algorithm_parameters][:min_sim], model[:neighbor_algorithm_parameters][:min_sim] + refute_equal params[:neighbor_algorithm_parameters][:training_dataset_id], model[:neighbor_algorithm_parameters][:training_dataset_id] + end + + refute_nil cv.rmse + refute_nil cv.mae + end def test_physchem_regression_crossvalidation @@ -90,6 +76,8 @@ class ValidationTest < MiniTest::Test refute_nil cv.mae end + # LOO + def test_classification_loo_validation dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" model = Model::LazarClassification.create dataset @@ -106,4 +94,15 @@ class ValidationTest < MiniTest::Test assert loo.r_squared > 0.34 end + # repeated CV + + def test_repeated_crossvalidation + dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" + model = Model::LazarClassification.create dataset + repeated_cv = RepeatedCrossValidation.create model + repeated_cv.crossvalidations.each do |cv| + assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split" + end + end + end -- cgit v1.2.3