From 3a9c9332b660d35720ad4fa1f55ee0883e53aecd Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Fri, 2 Nov 2018 20:34:44 +0100 Subject: warnings fixed, cleanup --- test/classification-validation.rb | 6 +++--- test/dataset.rb | 4 ++-- test/regression-validation.rb | 2 +- test/use_cases.rb | 45 +++++++++++++++++++++++---------------- 4 files changed, 33 insertions(+), 24 deletions(-) (limited to 'test') diff --git a/test/classification-validation.rb b/test/classification-validation.rb index 85db8ba..302b2c8 100644 --- a/test/classification-validation.rb +++ b/test/classification-validation.rb @@ -1,12 +1,13 @@ require_relative "setup.rb" -class ValidationClassificationTest < MiniTest::Test +class ClassificationValidationTest < MiniTest::Test include OpenTox::Validation # defaults def test_default_classification_crossvalidation - dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" + #dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" + dataset = Dataset.from_csv_file "#{DATA_DIR}/multi_cell_call.csv" model = Model::Lazar.create training_dataset: dataset cv = ClassificationCrossValidation.create model assert cv.accuracy[:without_warnings] > 0.65, "Accuracy (#{cv.accuracy[:without_warnings]}) should be larger than 0.65, this may occur due to an unfavorable training/test set split" @@ -45,7 +46,6 @@ class ValidationClassificationTest < MiniTest::Test dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" model = Model::Lazar.create training_dataset: dataset loo = ClassificationLeaveOneOut.create model - assert_equal 77, loo.nr_unpredicted refute_empty loo.confusion_matrix assert loo.accuracy[:without_warnings] > 0.650 assert loo.weighted_accuracy[:all] > loo.accuracy[:all], "Weighted accuracy (#{loo.weighted_accuracy[:all]}) should be larger than accuracy (#{loo.accuracy[:all]})." diff --git a/test/dataset.rb b/test/dataset.rb index 70d26d2..40aa334 100644 --- a/test/dataset.rb +++ b/test/dataset.rb @@ -23,9 +23,9 @@ class DatasetTest < MiniTest::Test def test_import_pubchem d = Dataset.from_pubchem_aid 1191 - assert_equal 87, d.compounds.size + assert_equal 86, d.compounds.size assert_equal 3, d.features.size - assert_equal ["Active"], d.values(d.compounds[10],d.features[2]) + assert_equal ["Inactive"], d.values(d.compounds[10],d.features[2]) # TODO endpoint name # TODO regression import end diff --git a/test/regression-validation.rb b/test/regression-validation.rb index 44162c0..9418df4 100644 --- a/test/regression-validation.rb +++ b/test/regression-validation.rb @@ -1,6 +1,6 @@ require_relative "setup.rb" -class ValidationRegressionTest < MiniTest::Test +class RegressionValidationTest < MiniTest::Test include OpenTox::Validation # defaults diff --git a/test/use_cases.rb b/test/use_cases.rb index 4e072d8..4842a18 100644 --- a/test/use_cases.rb +++ b/test/use_cases.rb @@ -3,41 +3,50 @@ require_relative "setup.rb" class UseCasesTest < MiniTest::Test def test_PA - #kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf" - #hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv" - #efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv" - #datasets = [kazius,hansen,efsa] - #map = {"1" => "mutagen", "0" => "nonmutagen"} + # TODO add assertions + skip "This test ist very time consuming, enable on demand." + kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf" + hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv" + efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv" + datasets = [kazius,hansen,efsa] + map = {"1" => "mutagen", "0" => "nonmutagen"} #p "merging" - #training_dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,map,map], keep_original_features: false, remove_duplicates: true - #assert_equal 8281, training_dataset.compounds.size + training_dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,map,map], keep_original_features: false, remove_duplicates: true + assert_equal 8281, training_dataset.compounds.size #p training_dataset.features.size #p training_dataset.id #training_dataset = Dataset.find('5bd8ac8fca62695d767fca6b') #training_dataset = Dataset.find('5bd8bbadca62695f69e7a33b') #puts training_dataset.to_csv - p "create model_validation" - #model_validation = Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.merged_features.first, species: "Salmonella typhimurium", endpoint: "Mutagenicity" + #p "create model_validation" + model_validation = Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.merged_features.first, species: "Salmonella typhimurium", endpoint: "Mutagenicity" #p model_validation.id #model_validation = Model::Validation.find '5bd8df47ca6269604590ab38' + #p model_validation.crossvalidations.first.predictions.select{|cid,p| !p["warnings"].empty?} #p "predict" - #pa = Dataset.from_sdf_file "#{DATA_DIR}/PA.sdf" - #prediction_dataset = model_validation.predict pa + pa = Dataset.from_sdf_file "#{DATA_DIR}/PA.sdf" + prediction_dataset = model_validation.predict pa #p prediction_dataset.id - prediction_dataset = Dataset.find('5bd98b88ca6269609aab79f4') - puts prediction_dataset.to_csv + #prediction_dataset = Dataset.find('5bd98b88ca6269609aab79f4') + #puts prediction_dataset.to_csv end def test_tox21 + # TODO add assertions + skip "This test ist very time consuming, enable on demand." training_dataset = Dataset.from_pubchem_aid 743122 - p training_dataset.id + #p training_dataset.id #'5bd9a1dbca626969d97fb421' - File.open("AID743122.csv","w+"){|f| f.puts training_dataset.to_csv} - model = Model::Lazar.create training_dataset: training_dataset - p model.id + #File.open("AID743122.csv","w+"){|f| f.puts training_dataset.to_csv} + #model = Model::Lazar.create training_dataset: training_dataset + #p model.id #p Model::Lazar.find('5bd9a70bca626969d97fc9df') model_validation = Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.bioactivity_features.first, species: "Human HG2L7.5c1 cell line", endpoint: "aryl hydrocarbon receptor (AhR) signaling pathway activation" - p model_validation.id + #model_validation = Model::Validation.find '5bd9b210ca62696be39ab74d' + #model_validation.crossvalidations.each do |cv| + #p cv + #end + #p model_validation.crossvalidations.first.predictions.select{|cid,p| !p["warnings"].empty?} end def test_public_models -- cgit v1.2.3