comments removed
[lazar] / test / regression-model.rb
1 require_relative "setup.rb"
2
3 class LazarRegressionTest < MiniTest::Test
4
5   def test_default_regression
6     algorithms = {
7       :descriptors => {
8         :method => "fingerprint",
9         :type => "MP2D"
10       },
11       :similarity => {
12         :method => "Algorithm::Similarity.tanimoto",
13         :min => [0.5,0.2]
14       },
15       :prediction => {
16         :method => "Algorithm::Caret.rf",
17       },
18       :feature_selection => nil,
19     }
20     training_dataset = Dataset.from_csv_file File.join(Download::DATA, "Acute_toxicity-Fathead_minnow.csv")
21     model = Model::Lazar.create  training_dataset: training_dataset
22     assert_kind_of Model::LazarRegression, model
23     assert_equal algorithms, model.algorithms
24     substance = training_dataset.substances[145]
25     prediction = model.predict substance
26     assert_includes prediction[:prediction_interval][0]..prediction[:prediction_interval][1], prediction[:measurements].median, "This assertion assures that measured values are within the prediction interval. It may fail in 5% of the predictions."
27     substance = Compound.from_smiles "c1ccc(cc1)Oc1ccccc1"
28     prediction = model.predict substance
29     refute_nil prediction[:value]
30     refute_nil prediction[:prediction_interval]
31     refute_empty prediction[:neighbors]
32   end
33
34   def test_weighted_average
35     training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
36     algorithms = {
37       :similarity => {
38         :min => [0,0]
39       },
40       :prediction => {
41         :method => "Algorithm::Regression.weighted_average",
42       },
43     }
44     model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
45     compound = Compound.from_smiles "CC(C)(C)CN"
46     prediction = model.predict compound
47     assert_equal -0.86, prediction[:value].round(2)
48     assert_equal model.substance_ids.size, prediction[:neighbors].size
49   end
50
51   def test_mpd_fingerprints
52     training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
53     algorithms = {
54       :descriptors => {
55         :method => "fingerprint",
56         :type => "MP2D"
57       },
58     }
59     model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
60     compound = Compound.from_smiles "CCCSCCSCC"
61     prediction = model.predict compound
62     assert_equal 3, prediction[:neighbors].size
63     assert prediction[:value].round(2) > 1.37, "Prediction value (#{prediction[:value].round(2)}) should be larger than 1.37."
64   end
65
66   def test_local_physchem_regression
67     training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
68     algorithms = {
69       :descriptors => {
70         :method => "calculate_properties",
71         :features => PhysChem.openbabel_descriptors,
72       },
73       :similarity => {
74         :method => "Algorithm::Similarity.weighted_cosine",
75         :min => [0.5,0.1]
76       },
77     }
78     model = Model::Lazar.create(training_dataset:training_dataset, algorithms:algorithms)
79     compound = Compound.from_smiles "NC(=O)OCCC"
80     prediction = model.predict compound
81     refute_nil prediction[:value]
82   end
83
84   def test_local_physchem_regression_with_feature_selection
85     training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
86     algorithms = {
87       :descriptors => {
88         :method => "calculate_properties",
89         :features => PhysChem.openbabel_descriptors,
90       },
91       :similarity => {
92         :method => "Algorithm::Similarity.weighted_cosine",
93         :min => [0.5,0.1]
94       },
95       :feature_selection => {
96         :method => "Algorithm::FeatureSelection.correlation_filter",
97       },
98     }
99     model = Model::Lazar.create(training_dataset:training_dataset, algorithms:algorithms)
100     compound = Compound.from_smiles "NC(=O)OCCC"
101     prediction = model.predict compound
102     refute_nil prediction[:value]
103   end
104
105   def test_unweighted_cosine_physchem_regression
106     algorithms = {
107       :descriptors => {
108         :method => "calculate_properties",
109         :features => PhysChem.openbabel_descriptors,
110       },
111       :similarity => {
112         :method => "Algorithm::Similarity.cosine",
113       }
114     }
115     training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
116     model = Model::Lazar.create  training_dataset: training_dataset, algorithms: algorithms
117     assert_kind_of Model::LazarRegression, model
118     assert_equal "Algorithm::Caret.rf", model.algorithms[:prediction][:method]
119     assert_equal "Algorithm::Similarity.cosine", model.algorithms[:similarity][:method]
120     assert_equal 0.5, model.algorithms[:similarity][:min].first
121     algorithms[:descriptors].delete :features
122     assert_equal algorithms[:descriptors], model.algorithms[:descriptors]
123     prediction = model.predict training_dataset.substances[10]
124     refute_nil prediction[:value]
125   end
126
127   def test_regression_with_feature_selection
128     algorithms = {
129       :feature_selection => {
130         :method => "Algorithm::FeatureSelection.correlation_filter",
131       },
132     }
133     training_dataset = Dataset.from_csv_file File.join(Download::DATA, "Acute_toxicity-Fathead_minnow.csv")
134     model = Model::Lazar.create  training_dataset: training_dataset, algorithms: algorithms
135     assert_kind_of Model::LazarRegression, model
136     assert_equal "Algorithm::Caret.rf", model.algorithms[:prediction][:method]
137     assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method]
138     assert_equal 0.5, model.algorithms[:similarity][:min].first
139     assert_equal algorithms[:feature_selection][:method], model.algorithms[:feature_selection][:method]
140     prediction = model.predict training_dataset.substances[145]
141     refute_nil prediction[:value]
142   end
143
144   def test_regression_parameters
145     algorithms = {
146       :descriptors => {
147         :method => "fingerprint",
148         :type => "MP2D"
149       },
150       :similarity => {
151         :method => "Algorithm::Similarity.tanimoto",
152         :min => [0.3,0.1]
153       },
154       :prediction => {
155         :method => "Algorithm::Regression.weighted_average",
156       },
157       :feature_selection => nil,
158     }
159     training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
160     model = Model::Lazar.create  training_dataset: training_dataset, algorithms: algorithms
161     assert_kind_of Model::LazarRegression, model
162     assert_equal "Algorithm::Regression.weighted_average", model.algorithms[:prediction][:method]
163     assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method]
164     assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min]
165     assert_equal algorithms[:prediction][:parameters], model.algorithms[:prediction][:parameters]
166     substance = training_dataset.substances[10]
167     prediction = model.predict substance
168     assert_equal 0.83, prediction[:value].round(2)
169   end
170
171   def test_dataset_prediction
172     training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
173     model = Model::Lazar.create training_dataset: training_dataset
174     result = model.predict training_dataset
175     assert_kind_of Dataset, result
176     assert_equal 8, result.features.size
177     assert_equal 88, result.compounds.size
178     assert_equal [1.95], result.values(result.compounds.first, result.bioactivity_features[0]).collect{|v| v.round(2)}
179     assert_equal [1.37], result.values(result.compounds[6], result.bioactivity_features[0]).collect{|v| v.round(2)}
180     assert_equal [1.79], result.values(result.compounds[6], result.prediction_feature).collect{|v| v.round(2)}
181     assert_equal [1.84,1.73], result.values(result.compounds[7], result.bioactivity_features[0]).collect{|v| v.round(2)}
182     assert_match /Low/i, result.predictions[result.compounds[6]][:confidence]
183     csv = result.to_prediction_csv
184     rows = csv.split("\n")
185     assert_equal "ID,Original SMILES,Canonical SMILES,Prediction,Confidence,Lower prediction interval,Upper prediction interval,Measurements", rows[0]
186     items = rows[3].split(",")
187     # prediction and measurement within prediciton interval
188     prediction = items[3].to_f
189     pi_low = items[5].to_f
190     pi_hi = items[6].to_f
191     measurement = items[7].to_f
192     [prediction,measurement].each do |v|
193       assert(v > pi_low)
194       assert(v < pi_hi)
195     end
196   end
197
198   def test_fhm_prediction
199     training_dataset = Dataset.from_csv_file File.join(File.dirname(__FILE__),"..","data","Acute_toxicity-Fathead_minnow.csv")
200     model = Model::Lazar.create training_dataset: training_dataset
201     prediction = model.predict Compound.from_smiles("N=Nc1ccccc1")
202     assert_equal 0.65, prediction[:neighbors][0][:measurement].round(2)
203   end
204
205 end