From 15f4ad23eb918a91d52779887ccfb51bc6547f1b Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Thu, 25 Oct 2018 18:58:19 +0200 Subject: dataset merge --- test/use_cases.rb | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 test/use_cases.rb (limited to 'test/use_cases.rb') diff --git a/test/use_cases.rb b/test/use_cases.rb new file mode 100644 index 0000000..d9ae78b --- /dev/null +++ b/test/use_cases.rb @@ -0,0 +1,50 @@ +require_relative "setup.rb" + +class UseCasesTest < MiniTest::Test + + def test_PA + kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf" + hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv" + efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv" + datasets = [kazius,hansen,efsa] + training_dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,map,map], keep_original_features: false, remove_duplicates: true + model = Model::Validation.create training_dataset: training_dataset, species: "Salmonella typhimurium", endpoint: "Mutagenicity" + pa = Dataset.from_sdf_file "#{DATA_DIR}/PA.sdf" + prediction_dataset = model.predict pa + puts prediction_dataset.to_csv + assert_equal 8281, d.compounds.size + end + + def test_public_models + skip +=begin + #classification + aids = [ + 1205, #Rodents (multiple species/sites) + 1208, # rat carc + 1199 # mouse + # Mutagenicity + + + 1195 #MRDD + 1188 #FHM + 1208, # rat carc td50 + 1199 # mouse td50 + + # daphnia + # Blood Brain Barrier Penetration + # Lowest observed adverse effect level (LOAEL) + + # 1204 estrogen receptor + # 1259408, # GENE-TOX + # 1159563 HepG2 cytotoxicity assay + # 588209 hepatotoxicity + # 1259333 cytotoxicity + # 1159569 HepG2 cytotoxicity counterscreen Measured in Cell-Based System Using Plate Reader - 2153-03_Inhibitor_Dose_DryPowder_Activity + # 2122 HTS Counterscreen for Detection of Compound Cytotoxicity in MIN6 Cells + # 116724 Acute toxicity determined after intravenal administration in mice + # 1148549 Toxicity in po dosed mouse assessed as mortality after 7 days +=end + + end +end -- cgit v1.2.3 From d61f78093f4ddf03c27a2c8ae0bab9c1f10c80f5 Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Tue, 30 Oct 2018 17:26:59 +0100 Subject: tests fixed --- test/use_cases.rb | 2 ++ 1 file changed, 2 insertions(+) (limited to 'test/use_cases.rb') diff --git a/test/use_cases.rb b/test/use_cases.rb index d9ae78b..15e65a3 100644 --- a/test/use_cases.rb +++ b/test/use_cases.rb @@ -3,10 +3,12 @@ require_relative "setup.rb" class UseCasesTest < MiniTest::Test def test_PA + skip kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf" hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv" efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv" datasets = [kazius,hansen,efsa] + map = {"true" => "carcinogen", "false" => "non-carcinogen"} training_dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,map,map], keep_original_features: false, remove_duplicates: true model = Model::Validation.create training_dataset: training_dataset, species: "Salmonella typhimurium", endpoint: "Mutagenicity" pa = Dataset.from_sdf_file "#{DATA_DIR}/PA.sdf" -- cgit v1.2.3 From 2d4ce39cb1b489e26b0d6d96026054566a4f77b9 Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Tue, 30 Oct 2018 21:11:04 +0100 Subject: dataset merge --- test/use_cases.rb | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'test/use_cases.rb') diff --git a/test/use_cases.rb b/test/use_cases.rb index 15e65a3..4959f16 100644 --- a/test/use_cases.rb +++ b/test/use_cases.rb @@ -3,18 +3,25 @@ require_relative "setup.rb" class UseCasesTest < MiniTest::Test def test_PA - skip kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf" hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv" efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv" datasets = [kazius,hansen,efsa] - map = {"true" => "carcinogen", "false" => "non-carcinogen"} + map = {"1" => "mutagen", "0" => "nonmutagen"} + p "merging" training_dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,map,map], keep_original_features: false, remove_duplicates: true - model = Model::Validation.create training_dataset: training_dataset, species: "Salmonella typhimurium", endpoint: "Mutagenicity" + assert_equal 8281, training_dataset.compounds.size + p training_dataset.features.size + p training_dataset.id + training_dataset = Dataset.find('5bd8ac8fca62695d767fca6b') + p "create model_validation" + model_validation = Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.merged_features.first, species: "Salmonella typhimurium", endpoint: "Mutagenicity" + p model_validation.id + p "predict" pa = Dataset.from_sdf_file "#{DATA_DIR}/PA.sdf" - prediction_dataset = model.predict pa + prediction_dataset = model_dataset.predict pa + p prediction_dataset.id puts prediction_dataset.to_csv - assert_equal 8281, d.compounds.size end def test_public_models -- cgit v1.2.3 From 5b08a8c6d8e5567d253bec92d5bf5d18fd040cdc Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Wed, 31 Oct 2018 14:50:42 +0100 Subject: pubchem import for openrisknet --- test/use_cases.rb | 50 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 17 deletions(-) (limited to 'test/use_cases.rb') diff --git a/test/use_cases.rb b/test/use_cases.rb index 4959f16..4e072d8 100644 --- a/test/use_cases.rb +++ b/test/use_cases.rb @@ -3,27 +3,43 @@ require_relative "setup.rb" class UseCasesTest < MiniTest::Test def test_PA - kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf" - hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv" - efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv" - datasets = [kazius,hansen,efsa] - map = {"1" => "mutagen", "0" => "nonmutagen"} - p "merging" - training_dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,map,map], keep_original_features: false, remove_duplicates: true - assert_equal 8281, training_dataset.compounds.size - p training_dataset.features.size - p training_dataset.id - training_dataset = Dataset.find('5bd8ac8fca62695d767fca6b') + #kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf" + #hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv" + #efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv" + #datasets = [kazius,hansen,efsa] + #map = {"1" => "mutagen", "0" => "nonmutagen"} + #p "merging" + #training_dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,map,map], keep_original_features: false, remove_duplicates: true + #assert_equal 8281, training_dataset.compounds.size + #p training_dataset.features.size + #p training_dataset.id + #training_dataset = Dataset.find('5bd8ac8fca62695d767fca6b') + #training_dataset = Dataset.find('5bd8bbadca62695f69e7a33b') + #puts training_dataset.to_csv p "create model_validation" - model_validation = Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.merged_features.first, species: "Salmonella typhimurium", endpoint: "Mutagenicity" - p model_validation.id - p "predict" - pa = Dataset.from_sdf_file "#{DATA_DIR}/PA.sdf" - prediction_dataset = model_dataset.predict pa - p prediction_dataset.id + #model_validation = Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.merged_features.first, species: "Salmonella typhimurium", endpoint: "Mutagenicity" + #p model_validation.id + #model_validation = Model::Validation.find '5bd8df47ca6269604590ab38' + #p "predict" + #pa = Dataset.from_sdf_file "#{DATA_DIR}/PA.sdf" + #prediction_dataset = model_validation.predict pa + #p prediction_dataset.id + prediction_dataset = Dataset.find('5bd98b88ca6269609aab79f4') puts prediction_dataset.to_csv end + def test_tox21 + training_dataset = Dataset.from_pubchem_aid 743122 + p training_dataset.id + #'5bd9a1dbca626969d97fb421' + File.open("AID743122.csv","w+"){|f| f.puts training_dataset.to_csv} + model = Model::Lazar.create training_dataset: training_dataset + p model.id + #p Model::Lazar.find('5bd9a70bca626969d97fc9df') + model_validation = Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.bioactivity_features.first, species: "Human HG2L7.5c1 cell line", endpoint: "aryl hydrocarbon receptor (AhR) signaling pathway activation" + p model_validation.id + end + def test_public_models skip =begin -- cgit v1.2.3 From 3a9c9332b660d35720ad4fa1f55ee0883e53aecd Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Fri, 2 Nov 2018 20:34:44 +0100 Subject: warnings fixed, cleanup --- test/use_cases.rb | 45 +++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 18 deletions(-) (limited to 'test/use_cases.rb') diff --git a/test/use_cases.rb b/test/use_cases.rb index 4e072d8..4842a18 100644 --- a/test/use_cases.rb +++ b/test/use_cases.rb @@ -3,41 +3,50 @@ require_relative "setup.rb" class UseCasesTest < MiniTest::Test def test_PA - #kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf" - #hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv" - #efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv" - #datasets = [kazius,hansen,efsa] - #map = {"1" => "mutagen", "0" => "nonmutagen"} + # TODO add assertions + skip "This test ist very time consuming, enable on demand." + kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf" + hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv" + efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv" + datasets = [kazius,hansen,efsa] + map = {"1" => "mutagen", "0" => "nonmutagen"} #p "merging" - #training_dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,map,map], keep_original_features: false, remove_duplicates: true - #assert_equal 8281, training_dataset.compounds.size + training_dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,map,map], keep_original_features: false, remove_duplicates: true + assert_equal 8281, training_dataset.compounds.size #p training_dataset.features.size #p training_dataset.id #training_dataset = Dataset.find('5bd8ac8fca62695d767fca6b') #training_dataset = Dataset.find('5bd8bbadca62695f69e7a33b') #puts training_dataset.to_csv - p "create model_validation" - #model_validation = Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.merged_features.first, species: "Salmonella typhimurium", endpoint: "Mutagenicity" + #p "create model_validation" + model_validation = Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.merged_features.first, species: "Salmonella typhimurium", endpoint: "Mutagenicity" #p model_validation.id #model_validation = Model::Validation.find '5bd8df47ca6269604590ab38' + #p model_validation.crossvalidations.first.predictions.select{|cid,p| !p["warnings"].empty?} #p "predict" - #pa = Dataset.from_sdf_file "#{DATA_DIR}/PA.sdf" - #prediction_dataset = model_validation.predict pa + pa = Dataset.from_sdf_file "#{DATA_DIR}/PA.sdf" + prediction_dataset = model_validation.predict pa #p prediction_dataset.id - prediction_dataset = Dataset.find('5bd98b88ca6269609aab79f4') - puts prediction_dataset.to_csv + #prediction_dataset = Dataset.find('5bd98b88ca6269609aab79f4') + #puts prediction_dataset.to_csv end def test_tox21 + # TODO add assertions + skip "This test ist very time consuming, enable on demand." training_dataset = Dataset.from_pubchem_aid 743122 - p training_dataset.id + #p training_dataset.id #'5bd9a1dbca626969d97fb421' - File.open("AID743122.csv","w+"){|f| f.puts training_dataset.to_csv} - model = Model::Lazar.create training_dataset: training_dataset - p model.id + #File.open("AID743122.csv","w+"){|f| f.puts training_dataset.to_csv} + #model = Model::Lazar.create training_dataset: training_dataset + #p model.id #p Model::Lazar.find('5bd9a70bca626969d97fc9df') model_validation = Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.bioactivity_features.first, species: "Human HG2L7.5c1 cell line", endpoint: "aryl hydrocarbon receptor (AhR) signaling pathway activation" - p model_validation.id + #model_validation = Model::Validation.find '5bd9b210ca62696be39ab74d' + #model_validation.crossvalidations.each do |cv| + #p cv + #end + #p model_validation.crossvalidations.first.predictions.select{|cid,p| !p["warnings"].empty?} end def test_public_models -- cgit v1.2.3 From 6d68a1ca94937a0553f61ebbbbd317dae54ce4e6 Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Mon, 12 Nov 2018 17:08:51 +0100 Subject: PubChem classification download --- test/use_cases.rb | 84 +++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 14 deletions(-) (limited to 'test/use_cases.rb') diff --git a/test/use_cases.rb b/test/use_cases.rb index 4842a18..59677cb 100644 --- a/test/use_cases.rb +++ b/test/use_cases.rb @@ -50,25 +50,81 @@ class UseCasesTest < MiniTest::Test end def test_public_models - skip + #skip + # TODO clean mongo + # PubChem Classification + [ + { + :aid => 1205, + :species => "Rodents", + :endpoint => "Carcinogenicity", + :qmrf => {:group => "QMRF 4.12. Carcinogenicity", :name => "OECD 451 Carcinogenicity Studies"} + },{ + :aid => 1208, + :species => "Rat", + :endpoint => "Carcinogenicity", + :qmrf => {:group => "QMRF 4.12. Carcinogenicity", :name => "OECD 451 Carcinogenicity Studies"} + },{ + :aid => 1199, + :species => "Mouse", + :endpoint => "Carcinogenicity", + :qmrf => {:group => "QMRF 4.12. Carcinogenicity", :name => "OECD 451 Carcinogenicity Studies"} + } + ].each do |assay| + Download.pubchem_classification aid: assay[:aid], species: assay[:species], endpoint: assay[:endpoint], active: "carcinogen", inactive: "non-carcinogen", qmrf: qmrf + end + =begin - #classification - aids = [ - 1205, #Rodents (multiple species/sites) - 1208, # rat carc - 1199 # mouse - # Mutagenicity + # Mutagenicity + kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf" + hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv" + efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv" + datasets = [kazius,hansen,efsa] + map = {"1" => "mutagen", "0" => "nonmutagen"} + training_dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,map,map], keep_original_features: false, remove_duplicates: true + # rename merged feature + training_dataset.merged_features.first.name = "Mutagenicity" + training_dataset.merged_features.first.save + Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.merged_features.first, species: "Salmonella typhimurium", endpoint: "Mutagenicity" + + # Blood Brain Barrier Penetration + bbb = Dataset.from_csv_file "#{DATA_DIR}/bbb.csv" + Model::Validation.from_dataset training_dataset: bbb, prediction_feature: bbb.bioactivity_features.first, species: "Human", endpoint: "Blood Brain Barrier Penetration" + # PubChem Regression + # TODO transformations + [ + { + :aid => 1195, + :species => "Human", + :endpoint => "Maximum Recommended Daily Dose" + },{ + :aid => 1208, + :species => "Rat (TD50)", + :endpoint => "Carcinogenicity" + },{ + :aid => 1199, + :species => "Mouse (TD50)", + :endpoint => "Carcinogenicity" + },{ + :aid => 1188, + :species => "Fathead minnow", + :endpoint => "Acute Toxicity" + } + ].each do |assay| + Model::Validation.from_pubchem_aid aid: assay[:aid], species: assay[;species], endpoint: assay[:endpoint], regression:true + end - 1195 #MRDD - 1188 #FHM - 1208, # rat carc td50 - 1199 # mouse td50 - # daphnia - # Blood Brain Barrier Penetration - # Lowest observed adverse effect level (LOAEL) + daphnia = Dataset.from_csv_file "#{DATA_DIR}/daphnia.csv" + Model::Validation.from_dataset training_dataset: daphnia, prediction_feature: daphnia.bioactivity_features.first, species: "Daphnia magna", endpoint: "Acute toxicity" + # LOAEL + loael = Dataset.from_csv_file "#{DATA_DIR}/loael.csv" + Model::Validation.from_dataset training_dataset: loael, prediction_feature: loael.bioactivity_features.first, species: "Rat", endpoint: "Lowest observed adverse effect level (LOAEL)" +=end + +=begin # 1204 estrogen receptor # 1259408, # GENE-TOX # 1159563 HepG2 cytotoxicity assay -- cgit v1.2.3 From 8649795b3d5d63f227eed030286270b91ec39c68 Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Mon, 12 Nov 2018 20:43:46 +0100 Subject: Mutagenicity download --- test/use_cases.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'test/use_cases.rb') diff --git a/test/use_cases.rb b/test/use_cases.rb index 59677cb..ab01269 100644 --- a/test/use_cases.rb +++ b/test/use_cases.rb @@ -50,6 +50,8 @@ class UseCasesTest < MiniTest::Test end def test_public_models + Download.mutagenicity + exit #skip # TODO clean mongo # PubChem Classification @@ -71,8 +73,9 @@ class UseCasesTest < MiniTest::Test :qmrf => {:group => "QMRF 4.12. Carcinogenicity", :name => "OECD 451 Carcinogenicity Studies"} } ].each do |assay| - Download.pubchem_classification aid: assay[:aid], species: assay[:species], endpoint: assay[:endpoint], active: "carcinogen", inactive: "non-carcinogen", qmrf: qmrf + Download.pubchem_classification aid: assay[:aid], species: assay[:species], endpoint: assay[:endpoint], active: "carcinogen", inactive: "non-carcinogen", qmrf: assay[:qmrf] end + =begin # Mutagenicity -- cgit v1.2.3 From 6e23be652ad90c747aaccf15258bdaa4458185a4 Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Tue, 13 Nov 2018 14:32:09 +0100 Subject: public dataset download --- test/use_cases.rb | 96 ++++--------------------------------------------------- 1 file changed, 7 insertions(+), 89 deletions(-) (limited to 'test/use_cases.rb') diff --git a/test/use_cases.rb b/test/use_cases.rb index ab01269..f39b3e2 100644 --- a/test/use_cases.rb +++ b/test/use_cases.rb @@ -49,95 +49,13 @@ class UseCasesTest < MiniTest::Test #p model_validation.crossvalidations.first.predictions.select{|cid,p| !p["warnings"].empty?} end - def test_public_models - Download.mutagenicity - exit - #skip - # TODO clean mongo - # PubChem Classification - [ - { - :aid => 1205, - :species => "Rodents", - :endpoint => "Carcinogenicity", - :qmrf => {:group => "QMRF 4.12. Carcinogenicity", :name => "OECD 451 Carcinogenicity Studies"} - },{ - :aid => 1208, - :species => "Rat", - :endpoint => "Carcinogenicity", - :qmrf => {:group => "QMRF 4.12. Carcinogenicity", :name => "OECD 451 Carcinogenicity Studies"} - },{ - :aid => 1199, - :species => "Mouse", - :endpoint => "Carcinogenicity", - :qmrf => {:group => "QMRF 4.12. Carcinogenicity", :name => "OECD 451 Carcinogenicity Studies"} - } - ].each do |assay| - Download.pubchem_classification aid: assay[:aid], species: assay[:species], endpoint: assay[:endpoint], active: "carcinogen", inactive: "non-carcinogen", qmrf: assay[:qmrf] - end - - -=begin - # Mutagenicity - kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf" - hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv" - efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv" - datasets = [kazius,hansen,efsa] - map = {"1" => "mutagen", "0" => "nonmutagen"} - training_dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,map,map], keep_original_features: false, remove_duplicates: true - # rename merged feature - training_dataset.merged_features.first.name = "Mutagenicity" - training_dataset.merged_features.first.save - Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.merged_features.first, species: "Salmonella typhimurium", endpoint: "Mutagenicity" - - # Blood Brain Barrier Penetration - bbb = Dataset.from_csv_file "#{DATA_DIR}/bbb.csv" - Model::Validation.from_dataset training_dataset: bbb, prediction_feature: bbb.bioactivity_features.first, species: "Human", endpoint: "Blood Brain Barrier Penetration" - - # PubChem Regression - # TODO transformations - [ - { - :aid => 1195, - :species => "Human", - :endpoint => "Maximum Recommended Daily Dose" - },{ - :aid => 1208, - :species => "Rat (TD50)", - :endpoint => "Carcinogenicity" - },{ - :aid => 1199, - :species => "Mouse (TD50)", - :endpoint => "Carcinogenicity" - },{ - :aid => 1188, - :species => "Fathead minnow", - :endpoint => "Acute Toxicity" - } - ].each do |assay| - Model::Validation.from_pubchem_aid aid: assay[:aid], species: assay[;species], endpoint: assay[:endpoint], regression:true - end - - # daphnia - daphnia = Dataset.from_csv_file "#{DATA_DIR}/daphnia.csv" - Model::Validation.from_dataset training_dataset: daphnia, prediction_feature: daphnia.bioactivity_features.first, species: "Daphnia magna", endpoint: "Acute toxicity" - - # LOAEL - loael = Dataset.from_csv_file "#{DATA_DIR}/loael.csv" - Model::Validation.from_dataset training_dataset: loael, prediction_feature: loael.bioactivity_features.first, species: "Rat", endpoint: "Lowest observed adverse effect level (LOAEL)" -=end - -=begin - # 1204 estrogen receptor - # 1259408, # GENE-TOX - # 1159563 HepG2 cytotoxicity assay - # 588209 hepatotoxicity - # 1259333 cytotoxicity - # 1159569 HepG2 cytotoxicity counterscreen Measured in Cell-Based System Using Plate Reader - 2153-03_Inhibitor_Dose_DryPowder_Activity - # 2122 HTS Counterscreen for Detection of Compound Cytotoxicity in MIN6 Cells - # 116724 Acute toxicity determined after intravenal administration in mice - # 1148549 Toxicity in po dosed mouse assessed as mortality after 7 days -=end + def test_download_public_models + Download.public_data + end + def test_import_public_models + skip + Import.public_data end + end -- cgit v1.2.3 From ae78e8216909ebfa708b8da3c55248a68abc291c Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Wed, 14 Nov 2018 13:35:17 +0100 Subject: public model validation, updated documentation --- test/use_cases.rb | 55 +++++++++++++++++-------------------------------------- 1 file changed, 17 insertions(+), 38 deletions(-) (limited to 'test/use_cases.rb') diff --git a/test/use_cases.rb b/test/use_cases.rb index f39b3e2..51c3fef 100644 --- a/test/use_cases.rb +++ b/test/use_cases.rb @@ -3,59 +3,38 @@ require_relative "setup.rb" class UseCasesTest < MiniTest::Test def test_PA - # TODO add assertions skip "This test ist very time consuming, enable on demand." - kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf" - hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv" - efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv" - datasets = [kazius,hansen,efsa] - map = {"1" => "mutagen", "0" => "nonmutagen"} - #p "merging" - training_dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,map,map], keep_original_features: false, remove_duplicates: true + Download.mutagenicity + training_dataset = Dataset.from_csv_file File.join(Download::DATA,"Mutagenicity-Salmonella_typhimurium.csv") assert_equal 8281, training_dataset.compounds.size - #p training_dataset.features.size - #p training_dataset.id - #training_dataset = Dataset.find('5bd8ac8fca62695d767fca6b') - #training_dataset = Dataset.find('5bd8bbadca62695f69e7a33b') - #puts training_dataset.to_csv - #p "create model_validation" - model_validation = Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.merged_features.first, species: "Salmonella typhimurium", endpoint: "Mutagenicity" - #p model_validation.id - #model_validation = Model::Validation.find '5bd8df47ca6269604590ab38' - #p model_validation.crossvalidations.first.predictions.select{|cid,p| !p["warnings"].empty?} - #p "predict" + # TODO use Model::Validation.from_csv_file + model_validation = Model::Validation.from_csv_file File.join(Download::DATA,"Mutagenicity-Salmonella_typhimurium.csv") pa = Dataset.from_sdf_file "#{DATA_DIR}/PA.sdf" prediction_dataset = model_validation.predict pa - #p prediction_dataset.id - #prediction_dataset = Dataset.find('5bd98b88ca6269609aab79f4') - #puts prediction_dataset.to_csv + # TODO add assertions end def test_tox21 - # TODO add assertions skip "This test ist very time consuming, enable on demand." - training_dataset = Dataset.from_pubchem_aid 743122 - #p training_dataset.id - #'5bd9a1dbca626969d97fb421' - #File.open("AID743122.csv","w+"){|f| f.puts training_dataset.to_csv} - #model = Model::Lazar.create training_dataset: training_dataset - #p model.id - #p Model::Lazar.find('5bd9a70bca626969d97fc9df') - model_validation = Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.bioactivity_features.first, species: "Human HG2L7.5c1 cell line", endpoint: "aryl hydrocarbon receptor (AhR) signaling pathway activation" - #model_validation = Model::Validation.find '5bd9b210ca62696be39ab74d' - #model_validation.crossvalidations.each do |cv| - #p cv - #end - #p model_validation.crossvalidations.first.predictions.select{|cid,p| !p["warnings"].empty?} + csv_file = Download.pubchem_classification aid: 743122, species: "Human HG2L7.5c1 cell line", endpoint: "aryl hydrocarbon receptor (AhR) signaling pathway activation" + model_validation = Model::Validation.from_csv_file csv_file + assert_equal 5, model_validation.crossvalidations.size end def test_download_public_models + skip "This test will overwrite public data." Download.public_data + assert_equal 11, Dir[File.join(File.dirname(__FILE__),"..","data","*csv")].size + assert_equal 11, Dir[File.join(File.dirname(__FILE__),"..","data","*json")].size + # TODO: check values end def test_import_public_models - skip - Import.public_data + skip "This test is very time consuming, enable on demand." + #$mongo.database.drop + #$gridfs = $mongo.database.fs # recreate GridFS indexes + validated_models = Import.public_data + assert_equal Dir[File.join(File.dirname(__FILE__),"..","data/*csv")].size, validated_models.size end end -- cgit v1.2.3