From 658cee5e1df2e1fcb0c7f9259955f1e3199deb5a Mon Sep 17 00:00:00 2001 From: gebele Date: Tue, 21 Mar 2017 15:11:44 +0000 Subject: fixed regression test for rf algorithm see: 'loael edit' commit;along with larger treining datasets --- test/model-regression.rb | 28 ++++++++++++++-------------- test/setup.rb | 2 ++ 2 files changed, 16 insertions(+), 14 deletions(-) (limited to 'test') diff --git a/test/model-regression.rb b/test/model-regression.rb index 86b927c..5903e88 100644 --- a/test/model-regression.rb +++ b/test/model-regression.rb @@ -10,21 +10,21 @@ class LazarRegressionTest < MiniTest::Test }, :similarity => { :method => "Algorithm::Similarity.tanimoto", - :min => 0.1 + :min => 0.5 }, :prediction => { - :method => "Algorithm::Caret.pls", + :method => "Algorithm::Caret.rf", }, :feature_selection => nil, } - training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") + training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM_log10.csv") model = Model::Lazar.create training_dataset: training_dataset assert_kind_of Model::LazarRegression, model assert_equal algorithms, model.algorithms - substance = training_dataset.substances[10] + substance = training_dataset.substances[145] prediction = model.predict substance assert_includes prediction[:prediction_interval][0]..prediction[:prediction_interval][1], prediction[:measurements].median, "This assertion assures that measured values are within the prediction interval. It may fail in 5% of the predictions." - substance = Compound.from_smiles "NC(=O)OCCC" + substance = Compound.from_smiles "c1ccc(cc1)Oc1ccccc1" prediction = model.predict substance refute_nil prediction[:value] refute_nil prediction[:prediction_interval] @@ -59,8 +59,8 @@ class LazarRegressionTest < MiniTest::Test model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms compound = Compound.from_smiles "CCCSCCSCC" prediction = model.predict compound - assert_equal 4, prediction[:neighbors].size - assert_equal 1.37, prediction[:value].round(2) + assert_equal 3, prediction[:neighbors].size + assert prediction[:value].round(2) > 1.37, "Prediction value (#{prediction[:value].round(2)}) should be larger than 1.37." end def test_local_physchem_regression @@ -112,12 +112,12 @@ class LazarRegressionTest < MiniTest::Test :method => "Algorithm::Similarity.cosine", } } - training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv") + training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms assert_kind_of Model::LazarRegression, model - assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method] + assert_equal "Algorithm::Caret.rf", model.algorithms[:prediction][:method] assert_equal "Algorithm::Similarity.cosine", model.algorithms[:similarity][:method] - assert_equal 0.1, model.algorithms[:similarity][:min] + assert_equal 0.5, model.algorithms[:similarity][:min] algorithms[:descriptors].delete :features assert_equal algorithms[:descriptors], model.algorithms[:descriptors] prediction = model.predict training_dataset.substances[10] @@ -130,14 +130,14 @@ class LazarRegressionTest < MiniTest::Test :method => "Algorithm::FeatureSelection.correlation_filter", }, } - training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv") + training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM_log10.csv") model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms assert_kind_of Model::LazarRegression, model - assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method] + assert_equal "Algorithm::Caret.rf", model.algorithms[:prediction][:method] assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method] - assert_equal 0.1, model.algorithms[:similarity][:min] + assert_equal 0.5, model.algorithms[:similarity][:min] assert_equal algorithms[:feature_selection][:method], model.algorithms[:feature_selection][:method] - prediction = model.predict training_dataset.substances[10] + prediction = model.predict training_dataset.substances[145] refute_nil prediction[:value] end diff --git a/test/setup.rb b/test/setup.rb index 40c8ebf..c1cddfb 100644 --- a/test/setup.rb +++ b/test/setup.rb @@ -3,6 +3,8 @@ require 'minitest/autorun' require_relative '../lib/lazar.rb' #require 'lazar' include OpenTox +#$mongo.database.drop +#$gridfs = $mongo.database.fs # recreate GridFS indexes TEST_DIR ||= File.expand_path(File.dirname(__FILE__)) DATA_DIR ||= File.join(TEST_DIR,"data") training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first -- cgit v1.2.3