From c12d5bb40ab2a0783f755c3238a20448b9a5a42e Mon Sep 17 00:00:00 2001 From: "helma@in-silico.ch" Date: Fri, 16 Nov 2018 22:17:55 +0100 Subject: minor test fixes --- lib/lazar.rb | 1 - test/classification-model.rb | 2 +- test/data/input_53.tsv | 2 +- test/dataset.rb | 26 +++++--------------------- test/regression-validation.rb | 2 +- 5 files changed, 8 insertions(+), 25 deletions(-) diff --git a/lib/lazar.rb b/lib/lazar.rb index a077626..2a3f749 100644 --- a/lib/lazar.rb +++ b/lib/lazar.rb @@ -77,7 +77,6 @@ CLASSES = ["Feature","Substance","Dataset","CrossValidation","LeaveOneOutValidat [ # be aware of the require sequence as it affects class/method overwrites "overwrite.rb", "rest-client-wrapper.rb", - #"error.rb", "opentox.rb", "feature.rb", "physchem.rb", diff --git a/test/classification-model.rb b/test/classification-model.rb index 8cbd4bb..2032bf8 100644 --- a/test/classification-model.rb +++ b/test/classification-model.rb @@ -89,7 +89,7 @@ class ClassificationModelTest < MiniTest::Test assert_equal ["non-carcinogenic"], result.values(result.compounds[5], prediction_feature) assert_nil result.predictions[result.compounds.first][:value] assert_equal "carcinogenic", result.predictions[result.compounds[1]][:value] - assert_equal 0.27, result.predictions[result.compounds[1]][:probabilities]["no"].round(2) + assert_equal 0.27, result.predictions[result.compounds[1]][:probabilities]["non-carcinogenic"].round(2) end def test_carcinogenicity_rf_classification diff --git a/test/data/input_53.tsv b/test/data/input_53.tsv index c46fdd4..0d7594b 100644 --- a/test/data/input_53.tsv +++ b/test/data/input_53.tsv @@ -1,4 +1,4 @@ -Id Smiles +ID Smiles 123-30-8 Oc1ccc(N)cc1 68391-25-3 OC(COc1ccccc1)CNc2ccc(cc2)Cc3ccc(N)cc3 62-53-3 Nc1ccccc1 diff --git a/test/dataset.rb b/test/dataset.rb index 543a359..8e230e0 100644 --- a/test/dataset.rb +++ b/test/dataset.rb @@ -29,35 +29,24 @@ class DatasetTest < MiniTest::Test # TODO regression import end - def test_import_csv_with_id + def test_import_csv_tsv_with_id ["csv","tsv"].each do |ext| d = Dataset.from_csv_file "#{DATA_DIR}/input_53.#{ext}" assert_equal 53, d.compounds.size assert_equal 2, d.features.size f = d.features[1] - assert_equal "Id", f.name + assert_equal "ID", f.name assert_equal OriginalId, f.class assert_equal ["123-30-8"], d.values(d.compounds.first,f) end end - def test_import_tsv_with_id - d = Dataset.from_csv_file "#{DATA_DIR}/input_53.tsv" - assert_equal 53, d.compounds.size - assert_equal 2, d.features.size - f = d.features[1] - assert_equal "Id", f.name - assert_equal OriginalId, f.class - assert_equal ["123-30-8"], d.values(d.compounds.first,f) - end - def test_import_sdf d = Dataset.from_sdf_file "#{DATA_DIR}/PA.sdf" assert_equal 36, d.features.size assert_kind_of NumericSubstanceProperty, d.substance_property_features[1] assert_equal NominalSubstanceProperty, d.substance_property_features.last.class assert_equal 602, d.compounds.size - #p d.warnings assert_match "PUBCHEM_XLOGP3_AA", d.warnings.compact.last end @@ -95,16 +84,12 @@ class DatasetTest < MiniTest::Test "InChI=1S/C8H14O4/c1-5-4-8(11-6(2)9)12-7(3)10-5/h5,7-8H,4H2,1-3H3", "InChI=1S/C19H30O5/c1-3-5-7-20-8-9-21-10-11-22-14-17-13-19-18(23-15-24-19)12-16(17)6-4-2/h12-13H,3-11,14-15H2,1-2H3", ] - errors = ['O=P(H)(OC)OC', 'C=CCNN.HCl' ] f = File.join Download::DATA, "Carcinogenicity-Rodents.csv" d = OpenTox::Dataset.from_csv_file f csv = CSV.read f assert_equal NominalBioActivity, d.bioactivity_features.first.class assert_equal 1100, d.compounds.size - assert_equal csv.first.size-1, d.bioactivity_features.size - errors.each do |smi| - assert_match smi, d.warnings.join - end + assert_equal csv.first.size-2, d.bioactivity_features.size duplicates.each do |inchi| refute_empty d.values(Compound.from_inchi(inchi),d.warnings_features.first) end @@ -189,12 +174,11 @@ class DatasetTest < MiniTest::Test efsa = Dataset.from_csv_file "#{Download::DATA}/parts/efsa.csv" datasets = [hansen,efsa,kazius] map = {"mutagen" => "mutagenic", "nonmutagen" => "non-mutagenic"} - dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,nil,map], keep_original_features: false, remove_duplicates: true + dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,nil,map], keep_original_features: true, remove_duplicates: true assert_equal 8281, dataset.compounds.size assert_equal 9, dataset.features.size c = Compound.from_smiles("C/C=C/C=O") - assert_equal ["mutagen"], dataset.values(c,dataset.merged_features.first) - #File.open("tmp.csv","w+"){|f| f.puts d.to_csv} + assert_equal ["mutagenic"], dataset.values(c,dataset.merged_features.first) end # serialisation diff --git a/test/regression-validation.rb b/test/regression-validation.rb index 7dbe354..65bec63 100644 --- a/test/regression-validation.rb +++ b/test/regression-validation.rb @@ -12,7 +12,7 @@ class RegressionValidationTest < MiniTest::Test cv = RegressionCrossValidation.create model assert cv.rmse[:all] < 1.5, "RMSE #{cv.rmse[:all]} should be smaller than 1.5, this may occur due to unfavorable training/test set splits" assert cv.mae[:all] < 1.1, "MAE #{cv.mae[:all]} should be smaller than 1.1, this may occur due to unfavorable training/test set splits" - assert cv.within_prediction_interval[:all]/cv.nr_predictions[:all] > 0.8, "Only #{(100*cv.within_prediction_interval[:all]/cv.nr_predictions[:all]).round(2)}% of measurement within prediction interval. This may occur due to unfavorable training/test set splits" + assert cv.within_prediction_interval[:all]/cv.nr_predictions[:all].to_f > 0.8, "Only #{(100.0*cv.within_prediction_interval[:all]/cv.nr_predictions[:all]).round(2)}% of measurement within prediction interval. This may occur due to unfavorable training/test set splits" end # parameters -- cgit v1.2.3