PubChem classification download
[lazar] / test / use_cases.rb
1 require_relative "setup.rb"
2
3 class UseCasesTest < MiniTest::Test
4
5   def test_PA
6     # TODO add assertions
7     skip "This test ist very time consuming, enable on demand."
8     kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf"
9     hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv"
10     efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv"
11     datasets = [kazius,hansen,efsa]
12     map = {"1" => "mutagen", "0" => "nonmutagen"}
13     #p "merging"
14     training_dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,map,map], keep_original_features: false, remove_duplicates: true
15     assert_equal 8281, training_dataset.compounds.size
16     #p training_dataset.features.size
17     #p training_dataset.id
18     #training_dataset = Dataset.find('5bd8ac8fca62695d767fca6b')
19     #training_dataset = Dataset.find('5bd8bbadca62695f69e7a33b')
20     #puts training_dataset.to_csv
21     #p "create model_validation"
22     model_validation = Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.merged_features.first, species: "Salmonella typhimurium", endpoint: "Mutagenicity"
23     #p model_validation.id
24     #model_validation = Model::Validation.find '5bd8df47ca6269604590ab38'
25     #p model_validation.crossvalidations.first.predictions.select{|cid,p| !p["warnings"].empty?}
26     #p "predict"
27     pa = Dataset.from_sdf_file "#{DATA_DIR}/PA.sdf"
28     prediction_dataset = model_validation.predict pa
29     #p prediction_dataset.id
30     #prediction_dataset = Dataset.find('5bd98b88ca6269609aab79f4')
31     #puts prediction_dataset.to_csv
32   end
33
34   def test_tox21
35     # TODO add assertions
36     skip "This test ist very time consuming, enable on demand."
37     training_dataset = Dataset.from_pubchem_aid 743122
38     #p training_dataset.id
39     #'5bd9a1dbca626969d97fb421'
40     #File.open("AID743122.csv","w+"){|f| f.puts training_dataset.to_csv}
41     #model = Model::Lazar.create training_dataset: training_dataset
42     #p model.id
43     #p Model::Lazar.find('5bd9a70bca626969d97fc9df')
44     model_validation = Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.bioactivity_features.first, species: "Human HG2L7.5c1 cell line", endpoint: "aryl hydrocarbon receptor (AhR) signaling pathway activation"
45     #model_validation = Model::Validation.find '5bd9b210ca62696be39ab74d'
46     #model_validation.crossvalidations.each do |cv|
47       #p cv
48     #end
49     #p model_validation.crossvalidations.first.predictions.select{|cid,p| !p["warnings"].empty?}
50   end
51
52   def test_public_models
53     #skip
54     # TODO clean mongo
55     # PubChem Classification
56     [
57       {
58         :aid => 1205,
59         :species => "Rodents",
60         :endpoint => "Carcinogenicity",
61         :qmrf => {:group => "QMRF 4.12. Carcinogenicity", :name => "OECD 451 Carcinogenicity Studies"}
62       },{
63         :aid => 1208,
64         :species => "Rat",
65         :endpoint => "Carcinogenicity",
66         :qmrf => {:group => "QMRF 4.12. Carcinogenicity", :name => "OECD 451 Carcinogenicity Studies"}
67       },{
68         :aid => 1199,
69         :species => "Mouse",
70         :endpoint => "Carcinogenicity",
71         :qmrf => {:group => "QMRF 4.12. Carcinogenicity", :name => "OECD 451 Carcinogenicity Studies"}
72       }
73     ].each do |assay|
74       Download.pubchem_classification aid: assay[:aid], species: assay[:species], endpoint: assay[:endpoint], active: "carcinogen", inactive: "non-carcinogen", qmrf: qmrf
75     end
76
77 =begin
78     # Mutagenicity
79     kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf"
80     hansen = Dataset.from_csv_file "#{DATA_DIR}/hansen.csv"
81     efsa = Dataset.from_csv_file "#{DATA_DIR}/efsa.csv"
82     datasets = [kazius,hansen,efsa]
83     map = {"1" => "mutagen", "0" => "nonmutagen"}
84     training_dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,map,map], keep_original_features: false, remove_duplicates: true
85     # rename merged feature
86     training_dataset.merged_features.first.name = "Mutagenicity"
87     training_dataset.merged_features.first.save
88     Model::Validation.from_dataset training_dataset: training_dataset, prediction_feature: training_dataset.merged_features.first, species: "Salmonella typhimurium", endpoint: "Mutagenicity"
89     
90     # Blood Brain Barrier Penetration
91     bbb = Dataset.from_csv_file "#{DATA_DIR}/bbb.csv"
92     Model::Validation.from_dataset training_dataset: bbb, prediction_feature: bbb.bioactivity_features.first, species: "Human", endpoint: "Blood Brain Barrier Penetration"
93
94     # PubChem Regression
95     # TODO transformations
96     [
97       {
98         :aid => 1195,
99         :species => "Human",
100         :endpoint => "Maximum Recommended Daily Dose"
101       },{
102         :aid => 1208,
103         :species => "Rat (TD50)",
104         :endpoint => "Carcinogenicity"
105       },{
106         :aid => 1199,
107         :species => "Mouse (TD50)",
108         :endpoint => "Carcinogenicity"
109       },{
110         :aid => 1188,
111         :species => "Fathead minnow",
112         :endpoint => "Acute Toxicity"
113       }
114     ].each do |assay|
115       Model::Validation.from_pubchem_aid aid: assay[:aid], species: assay[;species], endpoint: assay[:endpoint], regression:true
116     end
117
118     # daphnia
119     daphnia = Dataset.from_csv_file "#{DATA_DIR}/daphnia.csv"
120     Model::Validation.from_dataset training_dataset: daphnia, prediction_feature: daphnia.bioactivity_features.first, species: "Daphnia magna", endpoint: "Acute toxicity"
121
122     # LOAEL
123     loael = Dataset.from_csv_file "#{DATA_DIR}/loael.csv"
124     Model::Validation.from_dataset training_dataset: loael, prediction_feature: loael.bioactivity_features.first, species: "Rat", endpoint: "Lowest observed adverse effect level (LOAEL)"
125 =end
126
127 =begin
128       # 1204  estrogen receptor
129       # 1259408, # GENE-TOX
130       # 1159563 HepG2 cytotoxicity assay
131       # 588209 hepatotoxicity
132       # 1259333 cytotoxicity
133       # 1159569 HepG2 cytotoxicity counterscreen Measured in Cell-Based System Using Plate Reader - 2153-03_Inhibitor_Dose_DryPowder_Activity
134       # 2122 HTS Counterscreen for Detection of Compound Cytotoxicity in MIN6 Cells
135       # 116724 Acute toxicity determined after intravenal administration in mice
136       # 1148549 Toxicity in po dosed mouse assessed as mortality after 7 days
137 =end
138
139   end
140 end