From 6d68a1ca94937a0553f61ebbbbd317dae54ce4e6 Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Mon, 12 Nov 2018 17:08:51 +0100 Subject: PubChem classification download --- test/use_cases.rb | 84 +++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 14 deletions(-) (limited to 'test/use_cases.rb') diff --git a/test/use_cases.rb b/test/use_cases.rb index 4842a18..59677cb 100644 --- a/test/use_cases.rb +++ b/test/use_cases.rb @@ -50,25 +50,81 @@ class UseCasesTest < MiniTest::Test end def test_public_models - skip + #skip + # TODO clean mongo + # PubChem Classification + [ + { + :aid => 1205, + :species => "Rodents", + :endpoint => "Carcinogenicity", + :qmrf => {:group => "QMRF 4.12. Carcinogenicity", :name => "OECD 451 Carcinogenicity Studies"} + },{ + :aid => 1208, + :species => "Rat", + :endpoint => "Carcinogenicity", + :qmrf => {:group => "QMRF 4.12. Carcinogenicity", :name => "OECD 451 Carcinogenicity Studies"} + },{ + :aid => 1199, + :species => "Mouse", + :endpoint => "Carcinogenicity", + :qmrf => {:group => "QMRF 4.12. Carcinogenicity", :name => "OECD 451 Carcinogenicity Studies"} + } + ].each do |assay| + Download.pubchem_classification aid: assay[:aid], species: assay[:species], endpoint: assay[:endpoint], active: "carcinogen", inactive: "non-carcinogen", qmrf: qmrf + end + =begin - #classification - aids = [ - 1205, #Rodents (multiple species/sites) - 1208, # rat carc - 1199 # mouse - # Mutagenicity + # Mutagenicity + kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf" + hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv" + efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv" + datasets = [kazius,hansen,efsa] + map = {"1" => "mutagen", "0" => "nonmutagen"} + training_dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,map,map], keep_original_features: false, remove_duplicates: true + # rename merged feature + training_dataset.merged_features.first.name = "Mutagenicity" + training_dataset.merged_features.first.save + Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.merged_features.first, species: "Salmonella typhimurium", endpoint: "Mutagenicity" + + # Blood Brain Barrier Penetration + bbb = Dataset.from_csv_file "#{DATA_DIR}/bbb.csv" + Model::Validation.from_dataset training_dataset: bbb, prediction_feature: bbb.bioactivity_features.first, species: "Human", endpoint: "Blood Brain Barrier Penetration" + # PubChem Regression + # TODO transformations + [ + { + :aid => 1195, + :species => "Human", + :endpoint => "Maximum Recommended Daily Dose" + },{ + :aid => 1208, + :species => "Rat (TD50)", + :endpoint => "Carcinogenicity" + },{ + :aid => 1199, + :species => "Mouse (TD50)", + :endpoint => "Carcinogenicity" + },{ + :aid => 1188, + :species => "Fathead minnow", + :endpoint => "Acute Toxicity" + } + ].each do |assay| + Model::Validation.from_pubchem_aid aid: assay[:aid], species: assay[;species], endpoint: assay[:endpoint], regression:true + end - 1195 #MRDD - 1188 #FHM - 1208, # rat carc td50 - 1199 # mouse td50 - # daphnia - # Blood Brain Barrier Penetration - # Lowest observed adverse effect level (LOAEL) + daphnia = Dataset.from_csv_file "#{DATA_DIR}/daphnia.csv" + Model::Validation.from_dataset training_dataset: daphnia, prediction_feature: daphnia.bioactivity_features.first, species: "Daphnia magna", endpoint: "Acute toxicity" + # LOAEL + loael = Dataset.from_csv_file "#{DATA_DIR}/loael.csv" + Model::Validation.from_dataset training_dataset: loael, prediction_feature: loael.bioactivity_features.first, species: "Rat", endpoint: "Lowest observed adverse effect level (LOAEL)" +=end + +=begin # 1204 estrogen receptor # 1259408, # GENE-TOX # 1159563 HepG2 cytotoxicity assay -- cgit v1.2.3