From 6ab86c253ba0eb79b9e6a20effa2d18626accf2b Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 13 Aug 2015 11:56:40 +0200 Subject: OpenBabel can (canonical smiles) instead of inchi as internal identifier to avoid OpenBabel InChi bug. --- test/compound.rb | 18 +++++++++++++----- test/lazar-fminer.rb | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ test/validation.rb | 41 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+), 5 deletions(-) create mode 100644 test/lazar-fminer.rb create mode 100644 test/validation.rb (limited to 'test') diff --git a/test/compound.rb b/test/compound.rb index 7bbba58..b45e3d0 100644 --- a/test/compound.rb +++ b/test/compound.rb @@ -4,20 +4,20 @@ class CompoundTest < MiniTest::Test def test_0_compound_from_smiles c = OpenTox::Compound.from_smiles "F[B-](F)(F)F.[Na+]" - assert_equal "InChI=1S/BF4.Na/c2-1(3,4)5;/q-1;+1", c.inchi - assert_equal "[B-](F)(F)(F)F.[Na+]", c.smiles, "A failure here might be caused by a compound webservice running on 64bit architectures using an outdated version of OpenBabel. Please install OpenBabel version 2.3.2 or higher." # seems to be fixed in 2.3.2 + assert_equal "InChI=1S/BF4.Na/c2-1(3,4)5;/q-1;+1", c.inchi.chomp + assert_equal "F[B-](F)(F)F.[Na+]", c.smiles, "A failure here might be caused by a compound webservice running on 64bit architectures using an outdated version of OpenBabel. Please install OpenBabel version 2.3.2 or higher." # seems to be fixed in 2.3.2 end def test_1_compound_from_smiles c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N" assert_equal "InChI=1S/C6H9NO/c1-5(4-7)3-6(2)8/h5H,3H2,1-2H3", c.inchi - assert_equal "CC(CC(=O)C)C#N", c.smiles + assert_equal "CC(C#N)CC(=O)C", c.smiles end def test_2_compound_from_smiles c = OpenTox::Compound.from_smiles "N#[N+]C1=CC=CC=C1.F[B-](F)(F)F" assert_equal "InChI=1S/C6H5N2.BF4/c7-8-6-4-2-1-3-5-6;2-1(3,4)5/h1-5H;/q+1;-1", c.inchi - assert_equal "c1ccc(cc1)[N+]#N.[B-](F)(F)(F)F", c.smiles + assert_equal "F[B-](F)(F)F.N#[N+]c1ccccc1", c.smiles end def test_compound_from_name @@ -54,6 +54,7 @@ class CompoundTest < MiniTest::Test # OpenBabel segfaults randomly during inchikey calculation def test_inchikey c = OpenTox::Compound.from_inchi "InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H" + p c assert_equal "UHOVQNZJYSORNB-UHFFFAOYSA-N", c.inchikey end @@ -87,7 +88,14 @@ class CompoundTest < MiniTest::Test refute_nil c.fp4 end c = d.compounds[371] - assert_equal 19, c.neighbors.size + assert c.neighbors.size >= 19 end + def test_openbabel_segfault + inchi = "InChI=1S/C19H27NO7/c1-11-9-19(12(2)27-19)17(23)26-14-6-8-20(4)7-5-13(15(14)21)10-25-16(22)18(11,3)24/h5,11-12,14,24H,6-10H2,1-4H3/b13-5-/t11-,12-,14-,18-,19?/m1/s1" + + #r = `echo "#{inchi}" | babel -iinchi - -oinchi` + c = Compound.from_inchi(inchi) + assert_nil c + end end diff --git a/test/lazar-fminer.rb b/test/lazar-fminer.rb new file mode 100644 index 0000000..fbfa3d2 --- /dev/null +++ b/test/lazar-fminer.rb @@ -0,0 +1,51 @@ +require_relative "setup.rb" + +class LazarFminerTest < MiniTest::Test + + def test_lazar_fminer + training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv") + model = Model::LazarFminerClassification.create training_dataset#, feature_dataset + feature_dataset = Dataset.find model.neighbor_algorithm_parameters[:feature_dataset_id] + assert_equal training_dataset.compounds.size, feature_dataset.compounds.size + p feature_dataset.features.size + #assert_equal 54, feature_dataset.features.size + feature_dataset.data_entries.each do |e| + assert_equal e.size, feature_dataset.features.size + end + #assert_equal 'C-C-C=C', feature_dataset.features.first.smarts + + [ { + :compound => OpenTox::Compound.from_inchi("InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"), + :prediction => "false", + :confidence => 0.25281385281385277, + :nr_neighbors => 11 + },{ + :compound => OpenTox::Compound.from_smiles("c1ccccc1NN"), + :prediction => "false", + :confidence => 0.3639589577089577, + :nr_neighbors => 14 + }, { + :compound => Compound.from_smiles('OCCCCCCCC\C=C/CCCCCCCC'), + :prediction => "false", + :confidence => 0.5555555555555556, + :nr_neighbors => 1 + }].each do |example| + prediction = model.predict example[:compound] + + p prediction + #assert_equal example[:prediction], prediction[:value] + #assert_equal example[:confidence], prediction[:confidence] + #assert_equal example[:nr_neighbors], prediction[:neighbors].size + end + + # make a dataset prediction + compound_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv") + prediction = model.predict compound_dataset + assert_equal compound_dataset.compounds, prediction.compounds + + assert_match /No neighbors/, prediction.data_entries[7][2] + assert_equal "measured", prediction.data_entries[14][1] + # cleanup + [training_dataset,model,feature_dataset,compound_dataset].each{|o| o.delete} + end +end diff --git a/test/validation.rb b/test/validation.rb new file mode 100644 index 0000000..d98feb5 --- /dev/null +++ b/test/validation.rb @@ -0,0 +1,41 @@ +require_relative "setup.rb" + +class ValidationTest < MiniTest::Test + + def test_fminer_crossvalidation + dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" + model = Model::LazarFminerClassification.create dataset#, features + cv = ClassificationCrossValidation.create model + p cv.accuracy + p cv.weighted_accuracy + assert cv.accuracy > 0.8 + assert cv.weighted_accuracy > cv.accuracy, "Weighted accuracy (#{cv.weighted_accuracy}) larger than unweighted accuracy(#{cv.accuracy}) " + end + + def test_classification_crossvalidation + dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv" + model = Model::LazarClassification.create dataset#, features + cv = ClassificationCrossValidation.create model + p cv.accuracy + p cv.weighted_accuracy + assert cv.accuracy > 0.7 + assert cv.weighted_accuracy > cv.accuracy, "Weighted accuracy should be larger than unweighted accuracy." + end + + def test_regression_crossvalidation + dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv" + #dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.csv" + model = Model::LazarRegression.create dataset + cv = RegressionCrossValidation.create model + p cv.rmse + p cv.weighted_rmse + p cv.mae + p cv.weighted_mae + `inkview #{cv.plot}` + assert cv.rmse < 30, "RMSE > 30" + assert cv.weighted_rmse < cv.rmse, "Weighted RMSE (#{cv.weighted_rmse}) larger than unweighted RMSE(#{cv.rmse}) " + assert cv.mae < 12 + assert cv.weighted_mae < cv.mae + end + +end -- cgit v1.2.3