minor test fixes
authorhelma@in-silico.ch <helma@in-silico.ch>
Fri, 16 Nov 2018 21:17:55 +0000 (22:17 +0100)
committerhelma@in-silico.ch <helma@in-silico.ch>
Fri, 16 Nov 2018 21:17:55 +0000 (22:17 +0100)
lib/lazar.rb
test/classification-model.rb
test/data/input_53.tsv
test/dataset.rb
test/regression-validation.rb

index a077626..2a3f749 100644 (file)
@@ -77,7 +77,6 @@ CLASSES = ["Feature","Substance","Dataset","CrossValidation","LeaveOneOutValidat
 [ # be aware of the require sequence as it affects class/method overwrites
   "overwrite.rb",
   "rest-client-wrapper.rb", 
-  #"error.rb",
   "opentox.rb",
   "feature.rb",
   "physchem.rb",
index 8cbd4bb..2032bf8 100644 (file)
@@ -89,7 +89,7 @@ class ClassificationModelTest < MiniTest::Test
     assert_equal ["non-carcinogenic"], result.values(result.compounds[5], prediction_feature)
     assert_nil result.predictions[result.compounds.first][:value]
     assert_equal "carcinogenic", result.predictions[result.compounds[1]][:value]
-    assert_equal 0.27, result.predictions[result.compounds[1]][:probabilities]["no"].round(2)
+    assert_equal 0.27, result.predictions[result.compounds[1]][:probabilities]["non-carcinogenic"].round(2)
   end
 
   def test_carcinogenicity_rf_classification
index c46fdd4..0d7594b 100644 (file)
@@ -1,4 +1,4 @@
-Id     Smiles\r
+ID     Smiles\r
 123-30-8       Oc1ccc(N)cc1\r
 68391-25-3     OC(COc1ccccc1)CNc2ccc(cc2)Cc3ccc(N)cc3\r
 62-53-3        Nc1ccccc1\r
index 543a359..8e230e0 100644 (file)
@@ -29,35 +29,24 @@ class DatasetTest < MiniTest::Test
     # TODO regression import
   end
 
-  def test_import_csv_with_id
+  def test_import_csv_tsv_with_id
     ["csv","tsv"].each do |ext|
       d = Dataset.from_csv_file "#{DATA_DIR}/input_53.#{ext}"
       assert_equal 53, d.compounds.size
       assert_equal 2, d.features.size
       f = d.features[1]
-      assert_equal "Id", f.name
+      assert_equal "ID", f.name
       assert_equal OriginalId, f.class
       assert_equal ["123-30-8"], d.values(d.compounds.first,f)
     end
   end
 
-  def test_import_tsv_with_id
-    d = Dataset.from_csv_file "#{DATA_DIR}/input_53.tsv"
-    assert_equal 53, d.compounds.size
-    assert_equal 2, d.features.size
-    f = d.features[1]
-    assert_equal "Id", f.name
-    assert_equal OriginalId, f.class
-    assert_equal ["123-30-8"], d.values(d.compounds.first,f)
-  end
-
   def test_import_sdf
     d = Dataset.from_sdf_file "#{DATA_DIR}/PA.sdf"
     assert_equal 36, d.features.size
     assert_kind_of NumericSubstanceProperty, d.substance_property_features[1]
     assert_equal NominalSubstanceProperty, d.substance_property_features.last.class
     assert_equal 602, d.compounds.size
-    #p d.warnings
     assert_match "PUBCHEM_XLOGP3_AA", d.warnings.compact.last
   end
 
@@ -95,16 +84,12 @@ class DatasetTest < MiniTest::Test
       "InChI=1S/C8H14O4/c1-5-4-8(11-6(2)9)12-7(3)10-5/h5,7-8H,4H2,1-3H3",
       "InChI=1S/C19H30O5/c1-3-5-7-20-8-9-21-10-11-22-14-17-13-19-18(23-15-24-19)12-16(17)6-4-2/h12-13H,3-11,14-15H2,1-2H3",
     ]
-    errors = ['O=P(H)(OC)OC', 'C=CCNN.HCl' ]
     f = File.join Download::DATA, "Carcinogenicity-Rodents.csv"
     d = OpenTox::Dataset.from_csv_file f 
     csv = CSV.read f
     assert_equal NominalBioActivity, d.bioactivity_features.first.class
     assert_equal 1100, d.compounds.size
-    assert_equal csv.first.size-1, d.bioactivity_features.size
-    errors.each do |smi|
-      assert_match smi, d.warnings.join
-    end
+    assert_equal csv.first.size-2, d.bioactivity_features.size
     duplicates.each do |inchi|
       refute_empty d.values(Compound.from_inchi(inchi),d.warnings_features.first)
     end
@@ -189,12 +174,11 @@ class DatasetTest < MiniTest::Test
     efsa = Dataset.from_csv_file "#{Download::DATA}/parts/efsa.csv"
     datasets = [hansen,efsa,kazius]
     map = {"mutagen" => "mutagenic", "nonmutagen" => "non-mutagenic"}
-    dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,nil,map], keep_original_features: false, remove_duplicates: true
+    dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,nil,map], keep_original_features: true, remove_duplicates: true
     assert_equal 8281, dataset.compounds.size
     assert_equal 9, dataset.features.size
     c = Compound.from_smiles("C/C=C/C=O")
-    assert_equal ["mutagen"], dataset.values(c,dataset.merged_features.first)
-    #File.open("tmp.csv","w+"){|f| f.puts d.to_csv}
+    assert_equal ["mutagenic"], dataset.values(c,dataset.merged_features.first)
   end
 
   # serialisation
index 7dbe354..65bec63 100644 (file)
@@ -12,7 +12,7 @@ class RegressionValidationTest < MiniTest::Test
     cv = RegressionCrossValidation.create model
     assert cv.rmse[:all] < 1.5, "RMSE #{cv.rmse[:all]} should be smaller than 1.5, this may occur due to unfavorable training/test set splits"
     assert cv.mae[:all] < 1.1, "MAE #{cv.mae[:all]} should be smaller than 1.1, this may occur due to unfavorable training/test set splits"
-    assert cv.within_prediction_interval[:all]/cv.nr_predictions[:all] > 0.8, "Only #{(100*cv.within_prediction_interval[:all]/cv.nr_predictions[:all]).round(2)}% of measurement within prediction interval. This may occur due to unfavorable training/test set splits"
+    assert cv.within_prediction_interval[:all]/cv.nr_predictions[:all].to_f > 0.8, "Only #{(100.0*cv.within_prediction_interval[:all]/cv.nr_predictions[:all]).round(2)}% of measurement within prediction interval. This may occur due to unfavorable training/test set splits"
   end
 
   # parameters