From ae78e8216909ebfa708b8da3c55248a68abc291c Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Wed, 14 Nov 2018 13:35:17 +0100 Subject: public model validation, updated documentation --- test/use_cases.rb | 55 +++++++++++++++++-------------------------------------- 1 file changed, 17 insertions(+), 38 deletions(-) (limited to 'test/use_cases.rb') diff --git a/test/use_cases.rb b/test/use_cases.rb index f39b3e2..51c3fef 100644 --- a/test/use_cases.rb +++ b/test/use_cases.rb @@ -3,59 +3,38 @@ require_relative "setup.rb" class UseCasesTest < MiniTest::Test def test_PA - # TODO add assertions skip "This test ist very time consuming, enable on demand." - kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf" - hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv" - efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv" - datasets = [kazius,hansen,efsa] - map = {"1" => "mutagen", "0" => "nonmutagen"} - #p "merging" - training_dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,map,map], keep_original_features: false, remove_duplicates: true + Download.mutagenicity + training_dataset = Dataset.from_csv_file File.join(Download::DATA,"Mutagenicity-Salmonella_typhimurium.csv") assert_equal 8281, training_dataset.compounds.size - #p training_dataset.features.size - #p training_dataset.id - #training_dataset = Dataset.find('5bd8ac8fca62695d767fca6b') - #training_dataset = Dataset.find('5bd8bbadca62695f69e7a33b') - #puts training_dataset.to_csv - #p "create model_validation" - model_validation = Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.merged_features.first, species: "Salmonella typhimurium", endpoint: "Mutagenicity" - #p model_validation.id - #model_validation = Model::Validation.find '5bd8df47ca6269604590ab38' - #p model_validation.crossvalidations.first.predictions.select{|cid,p| !p["warnings"].empty?} - #p "predict" + # TODO use Model::Validation.from_csv_file + model_validation = Model::Validation.from_csv_file File.join(Download::DATA,"Mutagenicity-Salmonella_typhimurium.csv") pa = Dataset.from_sdf_file "#{DATA_DIR}/PA.sdf" prediction_dataset = model_validation.predict pa - #p prediction_dataset.id - #prediction_dataset = Dataset.find('5bd98b88ca6269609aab79f4') - #puts prediction_dataset.to_csv + # TODO add assertions end def test_tox21 - # TODO add assertions skip "This test ist very time consuming, enable on demand." - training_dataset = Dataset.from_pubchem_aid 743122 - #p training_dataset.id - #'5bd9a1dbca626969d97fb421' - #File.open("AID743122.csv","w+"){|f| f.puts training_dataset.to_csv} - #model = Model::Lazar.create training_dataset: training_dataset - #p model.id - #p Model::Lazar.find('5bd9a70bca626969d97fc9df') - model_validation = Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.bioactivity_features.first, species: "Human HG2L7.5c1 cell line", endpoint: "aryl hydrocarbon receptor (AhR) signaling pathway activation" - #model_validation = Model::Validation.find '5bd9b210ca62696be39ab74d' - #model_validation.crossvalidations.each do |cv| - #p cv - #end - #p model_validation.crossvalidations.first.predictions.select{|cid,p| !p["warnings"].empty?} + csv_file = Download.pubchem_classification aid: 743122, species: "Human HG2L7.5c1 cell line", endpoint: "aryl hydrocarbon receptor (AhR) signaling pathway activation" + model_validation = Model::Validation.from_csv_file csv_file + assert_equal 5, model_validation.crossvalidations.size end def test_download_public_models + skip "This test will overwrite public data." Download.public_data + assert_equal 11, Dir[File.join(File.dirname(__FILE__),"..","data","*csv")].size + assert_equal 11, Dir[File.join(File.dirname(__FILE__),"..","data","*json")].size + # TODO: check values end def test_import_public_models - skip - Import.public_data + skip "This test is very time consuming, enable on demand." + #$mongo.database.drop + #$gridfs = $mongo.database.fs # recreate GridFS indexes + validated_models = Import.public_data + assert_equal Dir[File.join(File.dirname(__FILE__),"..","data/*csv")].size, validated_models.size end end -- cgit v1.2.3