summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2015-08-13 11:56:40 +0200
committerChristoph Helma <helma@in-silico.ch>2015-08-13 12:04:52 +0200
commit6ab86c253ba0eb79b9e6a20effa2d18626accf2b (patch)
tree508eef99b34eb495493444663af2dd72e138bba6 /test
parentb7cd3ebbb858a8891c35c45896f1bdd525f3534e (diff)
OpenBabel can (canonical smiles) instead of inchi as internal identifier to avoid OpenBabel InChi bug.
Diffstat (limited to 'test')
-rw-r--r--test/compound.rb18
-rw-r--r--test/lazar-fminer.rb51
-rw-r--r--test/validation.rb41
3 files changed, 105 insertions, 5 deletions
diff --git a/test/compound.rb b/test/compound.rb
index 7bbba58..b45e3d0 100644
--- a/test/compound.rb
+++ b/test/compound.rb
@@ -4,20 +4,20 @@ class CompoundTest < MiniTest::Test
def test_0_compound_from_smiles
c = OpenTox::Compound.from_smiles "F[B-](F)(F)F.[Na+]"
- assert_equal "InChI=1S/BF4.Na/c2-1(3,4)5;/q-1;+1", c.inchi
- assert_equal "[B-](F)(F)(F)F.[Na+]", c.smiles, "A failure here might be caused by a compound webservice running on 64bit architectures using an outdated version of OpenBabel. Please install OpenBabel version 2.3.2 or higher." # seems to be fixed in 2.3.2
+ assert_equal "InChI=1S/BF4.Na/c2-1(3,4)5;/q-1;+1", c.inchi.chomp
+ assert_equal "F[B-](F)(F)F.[Na+]", c.smiles, "A failure here might be caused by a compound webservice running on 64bit architectures using an outdated version of OpenBabel. Please install OpenBabel version 2.3.2 or higher." # seems to be fixed in 2.3.2
end
def test_1_compound_from_smiles
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
assert_equal "InChI=1S/C6H9NO/c1-5(4-7)3-6(2)8/h5H,3H2,1-2H3", c.inchi
- assert_equal "CC(CC(=O)C)C#N", c.smiles
+ assert_equal "CC(C#N)CC(=O)C", c.smiles
end
def test_2_compound_from_smiles
c = OpenTox::Compound.from_smiles "N#[N+]C1=CC=CC=C1.F[B-](F)(F)F"
assert_equal "InChI=1S/C6H5N2.BF4/c7-8-6-4-2-1-3-5-6;2-1(3,4)5/h1-5H;/q+1;-1", c.inchi
- assert_equal "c1ccc(cc1)[N+]#N.[B-](F)(F)(F)F", c.smiles
+ assert_equal "F[B-](F)(F)F.N#[N+]c1ccccc1", c.smiles
end
def test_compound_from_name
@@ -54,6 +54,7 @@ class CompoundTest < MiniTest::Test
# OpenBabel segfaults randomly during inchikey calculation
def test_inchikey
c = OpenTox::Compound.from_inchi "InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"
+ p c
assert_equal "UHOVQNZJYSORNB-UHFFFAOYSA-N", c.inchikey
end
@@ -87,7 +88,14 @@ class CompoundTest < MiniTest::Test
refute_nil c.fp4
end
c = d.compounds[371]
- assert_equal 19, c.neighbors.size
+ assert c.neighbors.size >= 19
end
+ def test_openbabel_segfault
+ inchi = "InChI=1S/C19H27NO7/c1-11-9-19(12(2)27-19)17(23)26-14-6-8-20(4)7-5-13(15(14)21)10-25-16(22)18(11,3)24/h5,11-12,14,24H,6-10H2,1-4H3/b13-5-/t11-,12-,14-,18-,19?/m1/s1"
+
+ #r = `echo "#{inchi}" | babel -iinchi - -oinchi`
+ c = Compound.from_inchi(inchi)
+ assert_nil c
+ end
end
diff --git a/test/lazar-fminer.rb b/test/lazar-fminer.rb
new file mode 100644
index 0000000..fbfa3d2
--- /dev/null
+++ b/test/lazar-fminer.rb
@@ -0,0 +1,51 @@
+require_relative "setup.rb"
+
+class LazarFminerTest < MiniTest::Test
+
+ def test_lazar_fminer
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
+ model = Model::LazarFminerClassification.create training_dataset#, feature_dataset
+ feature_dataset = Dataset.find model.neighbor_algorithm_parameters[:feature_dataset_id]
+ assert_equal training_dataset.compounds.size, feature_dataset.compounds.size
+ p feature_dataset.features.size
+ #assert_equal 54, feature_dataset.features.size
+ feature_dataset.data_entries.each do |e|
+ assert_equal e.size, feature_dataset.features.size
+ end
+ #assert_equal 'C-C-C=C', feature_dataset.features.first.smarts
+
+ [ {
+ :compound => OpenTox::Compound.from_inchi("InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"),
+ :prediction => "false",
+ :confidence => 0.25281385281385277,
+ :nr_neighbors => 11
+ },{
+ :compound => OpenTox::Compound.from_smiles("c1ccccc1NN"),
+ :prediction => "false",
+ :confidence => 0.3639589577089577,
+ :nr_neighbors => 14
+ }, {
+ :compound => Compound.from_smiles('OCCCCCCCC\C=C/CCCCCCCC'),
+ :prediction => "false",
+ :confidence => 0.5555555555555556,
+ :nr_neighbors => 1
+ }].each do |example|
+ prediction = model.predict example[:compound]
+
+ p prediction
+ #assert_equal example[:prediction], prediction[:value]
+ #assert_equal example[:confidence], prediction[:confidence]
+ #assert_equal example[:nr_neighbors], prediction[:neighbors].size
+ end
+
+ # make a dataset prediction
+ compound_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv")
+ prediction = model.predict compound_dataset
+ assert_equal compound_dataset.compounds, prediction.compounds
+
+ assert_match /No neighbors/, prediction.data_entries[7][2]
+ assert_equal "measured", prediction.data_entries[14][1]
+ # cleanup
+ [training_dataset,model,feature_dataset,compound_dataset].each{|o| o.delete}
+ end
+end
diff --git a/test/validation.rb b/test/validation.rb
new file mode 100644
index 0000000..d98feb5
--- /dev/null
+++ b/test/validation.rb
@@ -0,0 +1,41 @@
+require_relative "setup.rb"
+
+class ValidationTest < MiniTest::Test
+
+ def test_fminer_crossvalidation
+ dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
+ model = Model::LazarFminerClassification.create dataset#, features
+ cv = ClassificationCrossValidation.create model
+ p cv.accuracy
+ p cv.weighted_accuracy
+ assert cv.accuracy > 0.8
+ assert cv.weighted_accuracy > cv.accuracy, "Weighted accuracy (#{cv.weighted_accuracy}) larger than unweighted accuracy(#{cv.accuracy}) "
+ end
+
+ def test_classification_crossvalidation
+ dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
+ model = Model::LazarClassification.create dataset#, features
+ cv = ClassificationCrossValidation.create model
+ p cv.accuracy
+ p cv.weighted_accuracy
+ assert cv.accuracy > 0.7
+ assert cv.weighted_accuracy > cv.accuracy, "Weighted accuracy should be larger than unweighted accuracy."
+ end
+
+ def test_regression_crossvalidation
+ dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv"
+ #dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.csv"
+ model = Model::LazarRegression.create dataset
+ cv = RegressionCrossValidation.create model
+ p cv.rmse
+ p cv.weighted_rmse
+ p cv.mae
+ p cv.weighted_mae
+ `inkview #{cv.plot}`
+ assert cv.rmse < 30, "RMSE > 30"
+ assert cv.weighted_rmse < cv.rmse, "Weighted RMSE (#{cv.weighted_rmse}) larger than unweighted RMSE(#{cv.rmse}) "
+ assert cv.mae < 12
+ assert cv.weighted_mae < cv.mae
+ end
+
+end