From d3a4c309d48b794f2f60f44bb9a3d94f402cc82f Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 16 Sep 2015 13:11:45 +0200 Subject: repeated crossvalidations, improved experiment reports --- test/experiment.rb | 62 ++++++++++++++++++++++++++++++++++++++++++------------ test/validation.rb | 12 +++++++++++ 2 files changed, 60 insertions(+), 14 deletions(-) (limited to 'test') diff --git a/test/experiment.rb b/test/experiment.rb index c465d7b..cad4fa7 100644 --- a/test/experiment.rb +++ b/test/experiment.rb @@ -4,27 +4,61 @@ class ExperimentTest < MiniTest::Test def test_regression_experiment datasets = [ - "EPAFHM.csv", - "FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv", + "EPAFHM.medi.csv", + #"EPAFHM.csv", + #"FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv", "LOAEL_mmol_corrected_smiles.csv" + ] + experiment = Experiment.create( + :name => "Default regression for datasets #{datasets}.", + :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id}, + :model_settings => [ + { + :algorithm => "OpenTox::Model::LazarRegression", + } ] - model_algorithms = ["OpenTox::Model::LazarRegression"] - neighbor_algorithms = ["OpenTox::Algorithm::Neighbor.fingerprint_similarity"] - prediction_algorithms = ["OpenTox::Algorithm::Regression.weighted_average"] - neighbor_algorithm_parameters = [{:min_sim => 0.7}] + ) + experiment.run + puts experiment.report.to_yaml + assert_equal datasets.size, experiment.results.size + experiment.results.each do |dataset_id, result| + assert_equal 1, result.size + result.each do |r| + assert_kind_of BSON::ObjectId, r[:model_id] + assert_kind_of BSON::ObjectId, r[:repeated_crossvalidation_id] + end + end + end + + def test_classification_experiment + + datasets = [ "hamster_carcinogenicity.csv" ] experiment = Experiment.create( - :name => "Regression for datasets #{datasets}.", + :name => "Fminer vs fingerprint classification for datasets #{datasets}.", :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id}, - :model_algorithms => model_algorithms, - :neighbor_algorithms => neighbor_algorithms, - :neighbor_algorithm_parameters => neighbor_algorithm_parameters, - :prediction_algorithms => prediction_algorithms, + :model_settings => [ + { + :algorithm => "OpenTox::Model::LazarClassification", + },{ + :algorithm => "OpenTox::Model::LazarClassification", + :neighbor_algorithm_parameter => {:min_sim => 0.3} + }, + #{ + #:algorithm => "OpenTox::Model::LazarFminerClassification", + #} + ] ) experiment.run =begin - p experiment - experiment.report + experiment = Experiment.find "55f944a22b72ed7de2000000" =end - refute_empty experiment.crossvalidation_ids + puts experiment.report.to_yaml + experiment.results.each do |dataset_id, result| + assert_equal 2, result.size + result.each do |r| + assert_kind_of BSON::ObjectId, r[:model_id] + assert_kind_of BSON::ObjectId, r[:repeated_crossvalidation_id] + end + end end end diff --git a/test/validation.rb b/test/validation.rb index a4c3d80..dfa2c81 100644 --- a/test/validation.rb +++ b/test/validation.rb @@ -33,4 +33,16 @@ class ValidationTest < MiniTest::Test #assert cv.weighted_mae < cv.mae end + def test_repeated_crossvalidation + dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" + model = Model::LazarClassification.create dataset + repeated_cv = RepeatedCrossValidation.create model + p repeated_cv + repeated_cv.crossvalidations.each do |cv| + p cv + p cv.accuracy + assert cv.accuracy > 0.7 + end + end + end -- cgit v1.2.3