real datasets for testing, test data cleanup, Daphnia import, upper and lower similar...
[lazar] / test / classification-model.rb
1 require_relative "setup.rb"
2
3 class ClassificationModelTest < MiniTest::Test
4
5   def test_classification_default
6     algorithms = {
7       :descriptors => {
8         :method => "fingerprint",
9         :type => "MP2D"
10       },
11       :similarity => {
12         :method => "Algorithm::Similarity.tanimoto",
13         :min => [0.5,0.2]
14       },
15       :prediction => {
16         :method => "Algorithm::Classification.weighted_majority_vote",
17       },
18       :feature_selection => nil,
19     }
20     training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
21     model = Model::Lazar.create  training_dataset: training_dataset
22     assert_kind_of Model::LazarClassification, model
23     assert_equal algorithms, model.algorithms
24     [ {
25       :compound => OpenTox::Compound.from_smiles("OCC(CN(CC(O)C)N=O)O"),
26       :prediction => "false",
27     },{
28       :compound => OpenTox::Compound.from_smiles("O=CNc1scc(n1)c1ccc(o1)[N+](=O)[O-]"),
29       :prediction => "true",
30     } ].each do |example|
31       prediction = model.predict example[:compound]
32       assert_equal example[:prediction], prediction[:value]
33     end
34   end
35
36   def test_export_import
37     training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
38     export = Model::Lazar.create  training_dataset: training_dataset
39     File.open("tmp.csv","w+"){|f| f.puts export.to_json }
40     import = Model::LazarClassification.new JSON.parse(File.read "tmp.csv")
41     assert_kind_of Model::LazarClassification, import
42     import.algorithms.each{|k,v| v.transform_keys!(&:to_sym) if v.is_a? Hash}
43     import.algorithms.transform_keys!(&:to_sym)
44     assert_equal export.algorithms, import.algorithms
45     [ {
46       :compound => OpenTox::Compound.from_smiles("OCC(CN(CC(O)C)N=O)O"),
47       :prediction => "false",
48     },{
49       :compound => OpenTox::Compound.from_smiles("O=CNc1scc(n1)c1ccc(o1)[N+](=O)[O-]"),
50       :prediction => "true",
51     } ].each do |example|
52       prediction = import.predict example[:compound]
53       assert_equal example[:prediction], prediction[:value]
54     end
55   end
56  
57   def test_classification_parameters
58     algorithms = {
59       :descriptors => {
60         :method => "fingerprint",
61         :type => "MACCS"
62       },
63       :similarity => {
64         :min => [0.4,0.1]
65       },
66     }
67     training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
68     model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
69     assert_kind_of Model::LazarClassification, model
70     assert_equal "Algorithm::Classification.weighted_majority_vote", model.algorithms[:prediction][:method]
71     assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method]
72     assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min]
73     substance = training_dataset.substances[10]
74     prediction = model.predict substance
75     assert_equal "false", prediction[:value]
76     assert_equal 4, prediction[:neighbors].size
77   end
78
79   def test_dataset_prediction
80     training_dataset = Dataset.from_csv_file File.join(Download::DATA,"Carcinogenicity-Rodents.csv")
81     test_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
82     model = Model::Lazar.create training_dataset: training_dataset
83     result = model.predict test_dataset
84     assert_kind_of Dataset, result
85     assert_equal 7, result.features.size
86     assert_equal 85, result.compounds.size
87     prediction_feature = result.prediction_features.first
88     assert_equal ["carcinogenic"], result.values(result.compounds[1], prediction_feature)
89     assert_equal ["non-carcinogenic"], result.values(result.compounds[5], prediction_feature)
90     assert_nil result.predictions[result.compounds.first][:value]
91     assert_equal "carcinogenic", result.predictions[result.compounds[1]][:value]
92     assert_equal 0.27, result.predictions[result.compounds[1]][:probabilities]["no"].round(2)
93   end
94
95   def test_carcinogenicity_rf_classification
96     skip "Caret rf may run into a (endless?) loop for some compounds."
97     dataset = Dataset.from_csv_file File.join(Download::DATA,"Carcinogenicity-Rodents.csv")
98     algorithms = {
99       :prediction => {
100         :method => "Algorithm::Caret.rf",
101       },
102     }
103     model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms
104     substance = Compound.from_smiles "[O-]S(=O)(=O)[O-].[Mn+2].O"
105     prediction = model.predict substance
106     p prediction
107     
108   end
109
110   def test_rf_classification
111     skip "Caret rf may run into a (endless?) loop for some compounds."
112     algorithms = {
113       :prediction => {
114         :method => "Algorithm::Caret.rf",
115       },
116     }
117     training_dataset = Dataset.from_sdf_file File.join(DATA_DIR,"cas_4337.sdf")
118     model = Model::Lazar.create  training_dataset: training_dataset, algorithms: algorithms
119     #p model.id.to_s
120     #model = Model::Lazar.find "5bbb4c0cca626909f6c8a924"
121     assert_kind_of Model::LazarClassification, model
122     assert_equal algorithms[:prediction][:method], model.algorithms["prediction"]["method"]
123     substance = Compound.from_smiles "Clc1ccc(cc1)C(=O)c1ccc(cc1)OC(C(=O)O)(C)C"
124     prediction = model.predict substance
125     assert_equal  51, prediction[:neighbors].size
126     assert_equal "nonmutagen", prediction[:value]
127     assert_equal 0.1, prediction[:probabilities]["mutagen"].round(1)
128     assert_equal 0.9, prediction[:probabilities]["nonmutagen"].round(1)
129   end
130
131 end