diff options
Diffstat (limited to 'test/validation.rb')
-rw-r--r-- | test/validation.rb | 143 |
1 files changed, 63 insertions, 80 deletions
diff --git a/test/validation.rb b/test/validation.rb index 6764a32..d8eea59 100644 --- a/test/validation.rb +++ b/test/validation.rb @@ -2,54 +2,52 @@ require_relative "setup.rb" class ValidationTest < MiniTest::Test - def test_fminer_crossvalidation + # defaults + + def test_default_classification_crossvalidation dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - model = Model::LazarFminerClassification.create dataset - cv = ClassificationCrossValidation.create model - refute_empty cv.validation_ids - assert cv.accuracy > 0.8, "Crossvalidation accuracy lower than 0.8" - assert cv.weighted_accuracy > cv.accuracy, "Weighted accuracy (#{cv.weighted_accuracy}) larger than unweighted accuracy(#{cv.accuracy}) " - end - - def test_classification_crossvalidation - dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - model = Model::LazarClassification.create dataset#, features + model = Model::LazarClassification.create dataset cv = ClassificationCrossValidation.create model - assert cv.accuracy > 0.7 - File.open("tmp.svg","w+"){|f| f.puts cv.confidence_plot} - `inkview tmp.svg` - p cv.nr_unpredicted - p cv.accuracy - #assert cv.weighted_accuracy > cv.accuracy, "Weighted accuracy should be larger than unweighted accuracy." + assert cv.accuracy > 0.7, "Accuracy (#{cv.accuracy}) should be larger than 0.7" end def test_default_regression_crossvalidation dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv" model = Model::LazarRegression.create dataset cv = RegressionCrossValidation.create model - #cv = RegressionCrossValidation.find '561503262b72ed54fd000001' - p cv.id - File.open("tmp.svg","w+"){|f| f.puts cv.correlation_plot} - `inkview tmp.svg` - File.open("tmp.svg","w+"){|f| f.puts cv.confidence_plot} - `inkview tmp.svg` - - #puts cv.misclassifications.to_yaml - p cv.rmse - p cv.weighted_rmse assert cv.rmse < 1.5, "RMSE > 1.5" - #assert cv.weighted_rmse < cv.rmse, "Weighted RMSE (#{cv.weighted_rmse}) larger than unweighted RMSE(#{cv.rmse}) " - p cv.mae - p cv.weighted_mae assert cv.mae < 1 - #assert cv.weighted_mae < cv.mae end - def test_regression_crossvalidation + # parameters + + def test_classification_crossvalidation_parameters + dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" + params = { + :training_dataset_id => dataset.id, + :neighbor_algorithm_parameters => { + :min_sim => 0.3, + :type => "FP3" + } + } + model = Model::LazarClassification.create dataset, params + model.save + cv = ClassificationCrossValidation.create model + params = model.neighbor_algorithm_parameters + params.delete :training_dataset_id + params = Hash[params.map{ |k, v| [k.to_s, v] }] # convert symbols to string + + cv.validations.each do |validation| + validation_params = validation.model.neighbor_algorithm_parameters + validation_params.delete "training_dataset_id" + assert_equal params, validation_params + end + end + + def test_regression_crossvalidation_params dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv" - #dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.csv" params = { - :prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average", + :prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "fingerprint_neighbors", :neighbor_algorithm_parameters => { :type => "MACCS", @@ -65,61 +63,46 @@ class ValidationTest < MiniTest::Test refute_equal params[:neighbor_algorithm_parameters][:training_dataset_id], model[:neighbor_algorithm_parameters][:training_dataset_id] end - assert cv.rmse < 1.5, "RMSE > 30" - assert cv.mae < 1 + refute_nil cv.rmse + refute_nil cv.mae end - def test_repeated_crossvalidation - dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - model = Model::LazarClassification.create dataset - repeated_cv = RepeatedCrossValidation.create model - repeated_cv.crossvalidations.each do |cv| - assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split" - assert_operator cv.weighted_accuracy, :>, cv.accuracy - end + def test_physchem_regression_crossvalidation + + training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv") + model = Model::LazarRegression.create(training_dataset, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression") + cv = RegressionCrossValidation.create model + refute_nil cv.rmse + refute_nil cv.mae end - def test_crossvalidation_parameters + # LOO + + def test_classification_loo_validation dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" - params = { - :neighbor_algorithm_parameters => { - :min_sim => 0.3, - :type => "FP3" - } - } - model = Model::LazarClassification.create dataset, params - model.save - cv = ClassificationCrossValidation.create model - params = model.neighbor_algorithm_parameters - params = Hash[params.map{ |k, v| [k.to_s, v] }] # convert symbols to string - cv.validations.each do |validation| - assert_equal params, validation.model.neighbor_algorithm_parameters - end + model = Model::LazarClassification.create dataset + loo = ClassificationLeaveOneOutValidation.create model + assert_equal 14, loo.nr_unpredicted + refute_empty loo.confusion_matrix + assert loo.accuracy > 0.77 end - def test_physchem_regression_crossvalidation - skip + def test_regression_loo_validation + dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv") + model = Model::LazarRegression.create dataset + loo = RegressionLeaveOneOutValidation.create model + assert loo.r_squared > 0.34 + end - @descriptors = OpenTox::Algorithm::Descriptor::OBDESCRIPTORS.keys - refute_empty @descriptors + # repeated CV - # UPLOAD DATA - training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv") - feature_dataset = Algorithm::Descriptor.physchem training_dataset, @descriptors - feature_dataset.save - scaled_feature_dataset = feature_dataset.scale - scaled_feature_dataset.save - model = Model::LazarRegression.create training_dataset - model.neighbor_algorithm = "physchem_neighbors" - model.neighbor_algorithm_parameters = { - :feature_calculation_algorithm => "OpenTox::Algorithm::Descriptor.physchem", - :descriptors => @descriptors, - :feature_dataset_id => scaled_feature_dataset.id, - :min_sim => 0.3 - } - model.save - cv = RegressionCrossValidation.create model - p cv + def test_repeated_crossvalidation + dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" + model = Model::LazarClassification.create dataset + repeated_cv = RepeatedCrossValidation.create model + repeated_cv.crossvalidations.each do |cv| + assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split" + end end end |