From db38c345fdc119edd8a892a5b0ba2c2a4b1cbe1f Mon Sep 17 00:00:00 2001 From: gebele Date: Fri, 31 Mar 2017 15:07:28 +0000 Subject: set default min sim to 0.1 for classification and 0.5 for regression --- lib/model.rb | 12 ++++++++---- test/model-classification.rb | 22 +++++++++++----------- test/validation-classification.rb | 4 ++-- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/lib/model.rb b/lib/model.rb index 7cc6765..80affd5 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -68,10 +68,6 @@ module OpenTox :method => "fingerprint", :type => "MP2D", }, - :similarity => { - :method => "Algorithm::Similarity.tanimoto", - :min => 0.5, - }, :feature_selection => nil } @@ -79,10 +75,18 @@ module OpenTox model.algorithms[:prediction] = { :method => "Algorithm::Classification.weighted_majority_vote", } + model.algorithms[:similarity] = { + :method => "Algorithm::Similarity.tanimoto", + :min => 0.1, + } elsif model.class == LazarRegression model.algorithms[:prediction] = { :method => "Algorithm::Caret.rf", } + model.algorithms[:similarity] = { + :method => "Algorithm::Similarity.tanimoto", + :min => 0.5, + } end elsif substance_classes.first == "OpenTox::Nanoparticle" diff --git a/test/model-classification.rb b/test/model-classification.rb index 0bb3e83..1424f6a 100644 --- a/test/model-classification.rb +++ b/test/model-classification.rb @@ -10,35 +10,35 @@ class LazarClassificationTest < MiniTest::Test }, :similarity => { :method => "Algorithm::Similarity.tanimoto", - :min => 0.5 + :min => 0.1 }, - :feature_selection => nil, :prediction => { :method => "Algorithm::Classification.weighted_majority_vote", }, + :feature_selection => nil, } training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv") model = Model::Lazar.create training_dataset: training_dataset assert_kind_of Model::LazarClassification, model assert_equal algorithms, model.algorithms - substance = training_dataset.substances[49] + substance = training_dataset.substances[10] prediction = model.predict substance assert_equal "false", prediction[:value] [ { - :compound => OpenTox::Compound.from_inchi("InChI=1S/C6H14N2O4/c1-5(10)2-8(7-12)3-6(11)4-9/h5-6,9-11H,2-4H2,1H3"), + :compound => OpenTox::Compound.from_inchi("InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"), :prediction => "false", },{ - :compound => OpenTox::Compound.from_smiles("OCC(CN(CC(O)C)N=O)O"), + :compound => OpenTox::Compound.from_smiles("c1ccccc1NN"), :prediction => "false", } ].each do |example| prediction = model.predict example[:compound] assert_equal example[:prediction], prediction[:value] end - compound = Compound.from_smiles "O=NN1CCC1" + compound = Compound.from_smiles "CCO" prediction = model.predict compound assert_equal "true", prediction[:value] - #assert_equal ["false"], prediction[:measurements] + assert_equal ["false"], prediction[:measurements] # make a dataset prediction compound_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv") @@ -46,12 +46,12 @@ class LazarClassificationTest < MiniTest::Test assert_equal compound_dataset.compounds, prediction_dataset.compounds cid = prediction_dataset.compounds[7].id.to_s - assert_equal "Could not find similar substances with experimental data in the training dataset.", prediction_dataset.predictions[cid][:warnings][0] + assert_equal "Could not find similar substances with experimental data in the training dataset.", prediction_dataset.predictions[cid][:warning] prediction_dataset.predictions.each do |cid,pred| - assert_equal "Could not find similar substances with experimental data in the training dataset.", pred[:warnings][0] if pred[:value].nil? + assert_equal "Could not find similar substances with experimental data in the training dataset.", pred[:warning] if pred[:value].nil? end cid = Compound.from_smiles("CCOC(=O)N").id.to_s - assert_match "excluded", prediction_dataset.predictions[cid][:info] + assert_match "excluded", prediction_dataset.predictions[cid][:warning] # cleanup [training_dataset,model,compound_dataset,prediction_dataset].each{|o| o.delete} end @@ -85,7 +85,7 @@ class LazarClassificationTest < MiniTest::Test model = Model::Lazar.create training_dataset: training_dataset t = Time.now 2.times do - compound = Compound.from_smiles("OCC(CN(CC(O)C)N=O)O") + compound = Compound.from_smiles("Clc1ccccc1NN") prediction = model.predict compound assert_equal "1", prediction[:value] end diff --git a/test/validation-classification.rb b/test/validation-classification.rb index ac25b29..fb4c3e7 100644 --- a/test/validation-classification.rb +++ b/test/validation-classification.rb @@ -47,9 +47,9 @@ class ValidationClassificationTest < MiniTest::Test dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" model = Model::Lazar.create training_dataset: dataset loo = ClassificationLeaveOneOut.create model - assert_equal 77, loo.nr_unpredicted + assert_equal 14, loo.nr_unpredicted refute_empty loo.confusion_matrix - assert loo.accuracy > 0.74 + assert loo.accuracy > 0.77 assert loo.weighted_accuracy > loo.accuracy, "Weighted accuracy (#{loo.weighted_accuracy}) should be larger than accuracy (#{loo.accuracy})." end -- cgit v1.2.3