summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgebele <gebele@in-silico.ch>2017-03-31 15:07:28 +0000
committergebele <gebele@in-silico.ch>2017-03-31 15:07:28 +0000
commitdb38c345fdc119edd8a892a5b0ba2c2a4b1cbe1f (patch)
treed03f73e581dceecf6b044e6055876dbf903486ed
parenteef5d89a92dd7dde9acf9fc063a54e1fe729a89b (diff)
set default min sim to 0.1 for classification and 0.5 for regression
-rw-r--r--lib/model.rb12
-rw-r--r--test/model-classification.rb22
-rw-r--r--test/validation-classification.rb4
3 files changed, 21 insertions, 17 deletions
diff --git a/lib/model.rb b/lib/model.rb
index 7cc6765..80affd5 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -68,10 +68,6 @@ module OpenTox
:method => "fingerprint",
:type => "MP2D",
},
- :similarity => {
- :method => "Algorithm::Similarity.tanimoto",
- :min => 0.5,
- },
:feature_selection => nil
}
@@ -79,10 +75,18 @@ module OpenTox
model.algorithms[:prediction] = {
:method => "Algorithm::Classification.weighted_majority_vote",
}
+ model.algorithms[:similarity] = {
+ :method => "Algorithm::Similarity.tanimoto",
+ :min => 0.1,
+ }
elsif model.class == LazarRegression
model.algorithms[:prediction] = {
:method => "Algorithm::Caret.rf",
}
+ model.algorithms[:similarity] = {
+ :method => "Algorithm::Similarity.tanimoto",
+ :min => 0.5,
+ }
end
elsif substance_classes.first == "OpenTox::Nanoparticle"
diff --git a/test/model-classification.rb b/test/model-classification.rb
index 0bb3e83..1424f6a 100644
--- a/test/model-classification.rb
+++ b/test/model-classification.rb
@@ -10,35 +10,35 @@ class LazarClassificationTest < MiniTest::Test
},
:similarity => {
:method => "Algorithm::Similarity.tanimoto",
- :min => 0.5
+ :min => 0.1
},
- :feature_selection => nil,
:prediction => {
:method => "Algorithm::Classification.weighted_majority_vote",
},
+ :feature_selection => nil,
}
training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
model = Model::Lazar.create training_dataset: training_dataset
assert_kind_of Model::LazarClassification, model
assert_equal algorithms, model.algorithms
- substance = training_dataset.substances[49]
+ substance = training_dataset.substances[10]
prediction = model.predict substance
assert_equal "false", prediction[:value]
[ {
- :compound => OpenTox::Compound.from_inchi("InChI=1S/C6H14N2O4/c1-5(10)2-8(7-12)3-6(11)4-9/h5-6,9-11H,2-4H2,1H3"),
+ :compound => OpenTox::Compound.from_inchi("InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"),
:prediction => "false",
},{
- :compound => OpenTox::Compound.from_smiles("OCC(CN(CC(O)C)N=O)O"),
+ :compound => OpenTox::Compound.from_smiles("c1ccccc1NN"),
:prediction => "false",
} ].each do |example|
prediction = model.predict example[:compound]
assert_equal example[:prediction], prediction[:value]
end
- compound = Compound.from_smiles "O=NN1CCC1"
+ compound = Compound.from_smiles "CCO"
prediction = model.predict compound
assert_equal "true", prediction[:value]
- #assert_equal ["false"], prediction[:measurements]
+ assert_equal ["false"], prediction[:measurements]
# make a dataset prediction
compound_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv")
@@ -46,12 +46,12 @@ class LazarClassificationTest < MiniTest::Test
assert_equal compound_dataset.compounds, prediction_dataset.compounds
cid = prediction_dataset.compounds[7].id.to_s
- assert_equal "Could not find similar substances with experimental data in the training dataset.", prediction_dataset.predictions[cid][:warnings][0]
+ assert_equal "Could not find similar substances with experimental data in the training dataset.", prediction_dataset.predictions[cid][:warning]
prediction_dataset.predictions.each do |cid,pred|
- assert_equal "Could not find similar substances with experimental data in the training dataset.", pred[:warnings][0] if pred[:value].nil?
+ assert_equal "Could not find similar substances with experimental data in the training dataset.", pred[:warning] if pred[:value].nil?
end
cid = Compound.from_smiles("CCOC(=O)N").id.to_s
- assert_match "excluded", prediction_dataset.predictions[cid][:info]
+ assert_match "excluded", prediction_dataset.predictions[cid][:warning]
# cleanup
[training_dataset,model,compound_dataset,prediction_dataset].each{|o| o.delete}
end
@@ -85,7 +85,7 @@ class LazarClassificationTest < MiniTest::Test
model = Model::Lazar.create training_dataset: training_dataset
t = Time.now
2.times do
- compound = Compound.from_smiles("OCC(CN(CC(O)C)N=O)O")
+ compound = Compound.from_smiles("Clc1ccccc1NN")
prediction = model.predict compound
assert_equal "1", prediction[:value]
end
diff --git a/test/validation-classification.rb b/test/validation-classification.rb
index ac25b29..fb4c3e7 100644
--- a/test/validation-classification.rb
+++ b/test/validation-classification.rb
@@ -47,9 +47,9 @@ class ValidationClassificationTest < MiniTest::Test
dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
model = Model::Lazar.create training_dataset: dataset
loo = ClassificationLeaveOneOut.create model
- assert_equal 77, loo.nr_unpredicted
+ assert_equal 14, loo.nr_unpredicted
refute_empty loo.confusion_matrix
- assert loo.accuracy > 0.74
+ assert loo.accuracy > 0.77
assert loo.weighted_accuracy > loo.accuracy, "Weighted accuracy (#{loo.weighted_accuracy}) should be larger than accuracy (#{loo.accuracy})."
end