summaryrefslogtreecommitdiff
path: root/test/classification-model.rb
diff options
context:
space:
mode:
Diffstat (limited to 'test/classification-model.rb')
-rw-r--r--test/classification-model.rb138
1 files changed, 138 insertions, 0 deletions
diff --git a/test/classification-model.rb b/test/classification-model.rb
new file mode 100644
index 0000000..c41b211
--- /dev/null
+++ b/test/classification-model.rb
@@ -0,0 +1,138 @@
+require_relative "setup.rb"
+
+class ClassificationModelTest < MiniTest::Test
+
+ def test_classification_default
+ algorithms = {
+ :descriptors => {
+ :method => "fingerprint",
+ :type => "MP2D"
+ },
+ :similarity => {
+ :method => "Algorithm::Similarity.tanimoto",
+ :min => [0.5,0.2]
+ },
+ :prediction => {
+ :method => "Algorithm::Classification.weighted_majority_vote",
+ },
+ :feature_selection => nil,
+ }
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
+ model = Model::Lazar.create training_dataset: training_dataset
+ assert_kind_of Model::LazarClassification, model
+ assert_equal algorithms, model.algorithms
+ [ {
+ :compound => OpenTox::Compound.from_smiles("OCC(CN(CC(O)C)N=O)O"),
+ :prediction => "false",
+ },{
+ :compound => OpenTox::Compound.from_smiles("O=CNc1scc(n1)c1ccc(o1)[N+](=O)[O-]"),
+ :prediction => "true",
+ } ].each do |example|
+ prediction = model.predict example[:compound]
+ assert_equal example[:prediction], prediction[:value]
+ end
+ end
+
+ def test_export_import
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
+ export = Model::Lazar.create training_dataset: training_dataset
+ File.open("tmp.csv","w+"){|f| f.puts export.to_json }
+ import = Model::LazarClassification.new JSON.parse(File.read "tmp.csv")
+ assert_kind_of Model::LazarClassification, import
+ import.algorithms.each{|k,v| v.transform_keys!(&:to_sym) if v.is_a? Hash}
+ import.algorithms.transform_keys!(&:to_sym)
+ assert_equal export.algorithms, import.algorithms
+ [ {
+ :compound => OpenTox::Compound.from_smiles("OCC(CN(CC(O)C)N=O)O"),
+ :prediction => "false",
+ },{
+ :compound => OpenTox::Compound.from_smiles("O=CNc1scc(n1)c1ccc(o1)[N+](=O)[O-]"),
+ :prediction => "true",
+ } ].each do |example|
+ prediction = import.predict example[:compound]
+ assert_equal example[:prediction], prediction[:value]
+ end
+ end
+
+ def test_classification_parameters
+ algorithms = {
+ :descriptors => {
+ :method => "fingerprint",
+ :type => "MACCS"
+ },
+ :similarity => {
+ :min => [0.4,0.1]
+ },
+ }
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
+ model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
+ assert_kind_of Model::LazarClassification, model
+ assert_equal "Algorithm::Classification.weighted_majority_vote", model.algorithms[:prediction][:method]
+ assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method]
+ assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min]
+ substance = training_dataset.substances[10]
+ prediction = model.predict substance
+ assert_equal "false", prediction[:value]
+ assert_equal 4, prediction[:neighbors].size
+ end
+
+ def test_dataset_prediction
+ training_dataset = Dataset.from_csv_file File.join(Download::DATA,"Carcinogenicity-Rodents.csv")
+ test_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
+ model = Model::Lazar.create training_dataset: training_dataset
+ result = model.predict test_dataset
+ assert_kind_of Dataset, result
+ assert_equal 7, result.features.size
+ assert_equal 85, result.compounds.size
+ prediction_feature = result.prediction_feature
+ assert_equal ["carcinogenic"], result.values(result.compounds[1], prediction_feature)
+ assert_equal ["non-carcinogenic"], result.values(result.compounds[5], prediction_feature)
+ assert_nil result.predictions[result.compounds.first][:value]
+ assert_equal "carcinogenic", result.predictions[result.compounds[1]][:value]
+ assert_equal 0.27, result.predictions[result.compounds[1]][:probabilities]["non-carcinogenic"].round(2)
+ assert_match /Similar/i, result.predictions[result.compounds[1]][:confidence]
+ csv = result.to_prediction_csv
+ rows = csv.split("\n")
+ assert_equal "ID,Original SMILES,Canonical SMILES,Prediction,Confidence,Probability: carcinogenic,Probability: non-carcinogenic,Measurements", rows[0]
+ items = rows[2].split(",")
+ assert_equal "carcinogenic", items[3]
+ assert_equal 0.27, items[6].to_f.round(2) # probabilities
+ end
+
+ def test_carcinogenicity_rf_classification
+ skip "Caret rf may run into a (endless?) loop for some compounds."
+ dataset = Dataset.from_csv_file File.join(Download::DATA,"Carcinogenicity-Rodents.csv")
+ algorithms = {
+ :prediction => {
+ :method => "Algorithm::Caret.rf",
+ },
+ }
+ model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms
+ substance = Compound.from_smiles "[O-]S(=O)(=O)[O-].[Mn+2].O"
+ prediction = model.predict substance
+ p prediction
+
+ end
+
+ def test_rf_classification
+ skip "Caret rf may run into a (endless?) loop for some compounds."
+ algorithms = {
+ :prediction => {
+ :method => "Algorithm::Caret.rf",
+ },
+ }
+ training_dataset = Dataset.from_sdf_file File.join(DATA_DIR,"cas_4337.sdf")
+ model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
+ #p model.id.to_s
+ #model = Model::Lazar.find "5bbb4c0cca626909f6c8a924"
+ assert_kind_of Model::LazarClassification, model
+ assert_equal algorithms[:prediction][:method], model.algorithms["prediction"]["method"]
+ substance = Compound.from_smiles "Clc1ccc(cc1)C(=O)c1ccc(cc1)OC(C(=O)O)(C)C"
+ prediction = model.predict substance
+ assert_equal 51, prediction[:neighbors].size
+ assert_equal "nonmutagen", prediction[:value]
+ assert_equal 0.1, prediction[:probabilities]["mutagen"].round(1)
+ assert_equal 0.9, prediction[:probabilities]["nonmutagen"].round(1)
+ end
+
+end