1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
require_relative "setup.rb"
class ClassificationModelTest < MiniTest::Test
def test_classification_default
algorithms = {
:descriptors => {
:method => "fingerprint",
:type => "MP2D"
},
:similarity => {
:method => "Algorithm::Similarity.tanimoto",
:min => 0.5
},
:prediction => {
:method => "Algorithm::Classification.weighted_majority_vote",
},
:feature_selection => nil,
}
training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
model = Model::Lazar.create training_dataset: training_dataset
assert_kind_of Model::LazarClassification, model
assert_equal algorithms, model.algorithms
[ {
:compound => OpenTox::Compound.from_smiles("OCC(CN(CC(O)C)N=O)O"),
:prediction => "false",
},{
:compound => OpenTox::Compound.from_smiles("O=CNc1scc(n1)c1ccc(o1)[N+](=O)[O-]"),
:prediction => "true",
} ].each do |example|
prediction = model.predict example[:compound]
assert_equal example[:prediction], prediction[:value]
end
end
def test_classification_parameters
algorithms = {
:descriptors => {
:method => "fingerprint",
:type => "MACCS"
},
:similarity => {
:min => 0.4
},
}
training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
assert_kind_of Model::LazarClassification, model
assert_equal "Algorithm::Classification.weighted_majority_vote", model.algorithms[:prediction][:method]
assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method]
assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min]
substance = training_dataset.substances[10]
prediction = model.predict substance
assert_equal "false", prediction[:value]
assert_equal 4, prediction[:neighbors].size
end
def test_dataset_prediction
training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"multi_cell_call.csv")
test_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
model = Model::Lazar.create training_dataset: training_dataset
result = model.predict test_dataset
assert_kind_of Dataset, result
assert_equal 7, result.features.size
assert_equal 85, result.compounds.size
prediction_feature = result.prediction_features.first
assert_equal ["yes"], result.values(result.compounds[1], prediction_feature)
assert_equal ["no"], result.values(result.compounds[5], prediction_feature)
assert_nil result.predictions[result.compounds.first][:value]
assert_equal "yes", result.predictions[result.compounds[1]][:value]
assert_equal 0.27, result.predictions[result.compounds[1]][:probabilities]["no"].round(2)
end
def test_carcinogenicity_rf_classification
skip "Caret rf may run into a (endless?) loop for some compounds."
dataset = Dataset.from_csv_file "#{DATA_DIR}/multi_cell_call.csv"
algorithms = {
:prediction => {
:method => "Algorithm::Caret.rf",
},
}
model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms
substance = Compound.from_smiles "[O-]S(=O)(=O)[O-].[Mn+2].O"
prediction = model.predict substance
p prediction
end
def test_rf_classification
skip "Caret rf may run into a (endless?) loop for some compounds."
algorithms = {
:prediction => {
:method => "Algorithm::Caret.rf",
},
}
training_dataset = Dataset.from_sdf_file File.join(DATA_DIR,"cas_4337.sdf")
model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
#p model.id.to_s
#model = Model::Lazar.find "5bbb4c0cca626909f6c8a924"
assert_kind_of Model::LazarClassification, model
assert_equal algorithms[:prediction][:method], model.algorithms["prediction"]["method"]
substance = Compound.from_smiles "Clc1ccc(cc1)C(=O)c1ccc(cc1)OC(C(=O)O)(C)C"
prediction = model.predict substance
assert_equal 51, prediction[:neighbors].size
assert_equal "nonmutagen", prediction[:value]
assert_equal 0.1, prediction[:probabilities]["mutagen"].round(1)
assert_equal 0.9, prediction[:probabilities]["nonmutagen"].round(1)
end
end
|