From f8faf510b4574df1a00fa61a9f0a1681fc2f4857 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 25 Aug 2015 17:20:55 +0200 Subject: Experiments added --- test/dataset.rb | 2 +- test/experiment.rb | 31 +++++++++++++++++++++++++++++++ test/lazar-long.rb | 2 +- test/lazar-regression.rb | 7 ++++--- test/setup.rb | 4 ++-- test/validation.rb | 7 +++++-- 6 files changed, 44 insertions(+), 9 deletions(-) create mode 100644 test/experiment.rb (limited to 'test') diff --git a/test/dataset.rb b/test/dataset.rb index 27dba61..b5275d4 100644 --- a/test/dataset.rb +++ b/test/dataset.rb @@ -78,7 +78,7 @@ class DatasetTest < MiniTest::Test new_dataset = Dataset.find d.id # get metadata assert_match "multicolumn.csv", new_dataset.source - assert_equal "multicolumn.csv", new_dataset.title + assert_equal "multicolumn.csv", new_dataset.name # get features assert_equal 6, new_dataset.features.size assert_equal 7, new_dataset.compounds.size diff --git a/test/experiment.rb b/test/experiment.rb new file mode 100644 index 0000000..eae7fa0 --- /dev/null +++ b/test/experiment.rb @@ -0,0 +1,31 @@ +require_relative "setup.rb" + +class ExperimentTest < MiniTest::Test + + def test_regression_experiment + datasets = [ + "EPAFHM.csv", + "FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv", + "LOAEL_log_mmol_corrected_smiles.csv" + ] + model_algorithms = ["OpenTox::Model::LazarRegression"] + neighbor_algorithms = ["OpenTox::Algorithm::Neighbor.fingerprint_similarity"] + prediction_algorithms = ["OpenTox::Algorithm::Regression.weighted_average"] + neighbor_algorithm_parameters = [{:min_sim => 0.7}] + experiment = Experiment.create( + :name => "Regression for datasets #{datasets}.", + :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id}, + :model_algorithms => model_algorithms, + :neighbor_algorithms => neighbor_algorithms, + :neighbor_algorithm_parameters => neighbor_algorithm_parameters, + :prediction_algorithms => prediction_algorithms, + ) + experiment.run +=begin + experiment = Experiment.find "55dc58b32b72ed14a8000008" +=end + p experiment.id + experiment.report + refute_empty experiment.crossvalidation_ids + end +end diff --git a/test/lazar-long.rb b/test/lazar-long.rb index c0deaa2..1b58319 100644 --- a/test/lazar-long.rb +++ b/test/lazar-long.rb @@ -29,7 +29,7 @@ class LazarExtendedTest < MiniTest::Test feature_dataset = OpenTox::CalculatedDataset.find model.feature_dataset_id assert_equal dataset.compounds.size, feature_dataset.compounds.size assert_equal 52, feature_dataset.features.size - assert_equal '[#17&A]-[#6&A]', feature_dataset.features.first.title + assert_equal '[#17&A]-[#6&A]', feature_dataset.features.first.name compound = OpenTox::Compound.from_inchi("InChI=1S/C10H9NO2S/c1-8-2-4-9(5-3-8)13-6-10(12)11-7-14/h2-5H,6H2,1H3") prediction_dataset = model.predict compound prediction = prediction_dataset.data_entries.first diff --git a/test/lazar-regression.rb b/test/lazar-regression.rb index c36f521..4062cfd 100644 --- a/test/lazar-regression.rb +++ b/test/lazar-regression.rb @@ -12,11 +12,12 @@ class LazarRegressionTest < MiniTest::Test assert_equal 1, prediction[:neighbors].size end - def test_weighted_average_with_relevant_fingerprints + def test_local_linear_regression + skip training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv" model = Model::LazarRegression.create training_dataset - model.update(:prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average_with_relevant_fingerprints") - compound = Compound.from_smiles "CC(C)(C)CN" + model.update(:prediction_algorithm => "OpenTox::Algorithm::Regression.local_linear_regression") + compound = Compound.from_smiles "NC(=O)OCCC" prediction = model.predict compound p prediction #assert_equal 13.6, prediction[:value].round(1) diff --git a/test/setup.rb b/test/setup.rb index 538853d..3dad683 100644 --- a/test/setup.rb +++ b/test/setup.rb @@ -3,5 +3,5 @@ require_relative '../lib/lazar.rb' include OpenTox TEST_DIR ||= File.expand_path(File.dirname(__FILE__)) DATA_DIR ||= File.join(TEST_DIR,"data") -$mongo.database.drop -$gridfs = $mongo.database.fs # recreate GridFS indexes +#$mongo.database.drop +#$gridfs = $mongo.database.fs # recreate GridFS indexes diff --git a/test/validation.rb b/test/validation.rb index 485769c..009c337 100644 --- a/test/validation.rb +++ b/test/validation.rb @@ -24,8 +24,8 @@ class ValidationTest < MiniTest::Test end def test_regression_crossvalidation - dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv" - #dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.csv" + #dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv" + dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.csv" model = Model::LazarRegression.create dataset cv = RegressionCrossValidation.create model p cv.rmse @@ -33,6 +33,9 @@ class ValidationTest < MiniTest::Test p cv.mae p cv.weighted_mae #`inkview #{cv.plot}` + #puts JSON.pretty_generate(cv.misclassifications)#.collect{|l| l.join ", "}.join "\n" + p cv.misclassifications.collect{|l| l[:neighbors].size} + `inkview #{cv.plot}` assert cv.rmse < 30, "RMSE > 30" assert cv.weighted_rmse < cv.rmse, "Weighted RMSE (#{cv.weighted_rmse}) larger than unweighted RMSE(#{cv.rmse}) " assert cv.mae < 12 -- cgit v1.2.3