From 455da06aa6459da0d25b286ca6cb866ff64c4c34 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 20 Jun 2019 22:01:50 +0200 Subject: separate csv serialisations for batch predictions and training data, repeated measurements in mutagenicity dataset fixed, daphnia import fixed, CENTRAL_MONGO_IP removed --- test/classification-model.rb | 10 ++++++++-- test/dataset.rb | 18 +++++++++++++++--- test/regression-model.rb | 17 +++++++++++++++-- 3 files changed, 38 insertions(+), 7 deletions(-) (limited to 'test') diff --git a/test/classification-model.rb b/test/classification-model.rb index 79ccb98..c41b211 100644 --- a/test/classification-model.rb +++ b/test/classification-model.rb @@ -84,13 +84,19 @@ class ClassificationModelTest < MiniTest::Test assert_kind_of Dataset, result assert_equal 7, result.features.size assert_equal 85, result.compounds.size - prediction_feature = result.prediction_features.first + prediction_feature = result.prediction_feature assert_equal ["carcinogenic"], result.values(result.compounds[1], prediction_feature) assert_equal ["non-carcinogenic"], result.values(result.compounds[5], prediction_feature) assert_nil result.predictions[result.compounds.first][:value] assert_equal "carcinogenic", result.predictions[result.compounds[1]][:value] assert_equal 0.27, result.predictions[result.compounds[1]][:probabilities]["non-carcinogenic"].round(2) - assert_match /High/i, result.predictions[result.compounds[1]][:confidence] + assert_match /Similar/i, result.predictions[result.compounds[1]][:confidence] + csv = result.to_prediction_csv + rows = csv.split("\n") + assert_equal "ID,Original SMILES,Canonical SMILES,Prediction,Confidence,Probability: carcinogenic,Probability: non-carcinogenic,Measurements", rows[0] + items = rows[2].split(",") + assert_equal "carcinogenic", items[3] + assert_equal 0.27, items[6].to_f.round(2) # probabilities end def test_carcinogenicity_rf_classification diff --git a/test/dataset.rb b/test/dataset.rb index 8e230e0..b978512 100644 --- a/test/dataset.rb +++ b/test/dataset.rb @@ -137,7 +137,6 @@ class DatasetTest < MiniTest::Test d = Dataset.from_csv_file File.join(DATA_DIR,"batch_prediction_#{type}_small.csv") assert_equal Dataset, d.class refute_nil d.id - dataset = Dataset.find d.id assert_equal 3, d.compounds.size end end @@ -175,10 +174,16 @@ class DatasetTest < MiniTest::Test datasets = [hansen,efsa,kazius] map = {"mutagen" => "mutagenic", "nonmutagen" => "non-mutagenic"} dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,nil,map], keep_original_features: true, remove_duplicates: true - assert_equal 8281, dataset.compounds.size - assert_equal 9, dataset.features.size + csv = dataset.to_training_csv + rows = csv.split("\n") + header = rows.shift + assert_equal "Canonical SMILES,Mutagenicity",header + values = rows.collect{|r| r.split(",")[1]}.uniq + assert_equal 2, values.size + assert_equal 8290, dataset.compounds.size c = Compound.from_smiles("C/C=C/C=O") assert_equal ["mutagenic"], dataset.values(c,dataset.merged_features.first) + assert_equal 9, dataset.features.size end # serialisation @@ -203,6 +208,13 @@ class DatasetTest < MiniTest::Test end # special cases/details + + def test_daphnia_import + d = Dataset.from_csv_file File.join(File.dirname(__FILE__),"..","data", "Acute_toxicity-Daphnia_magna.csv") + assert 3, d.features.size + assert 546, d.compounds.size + puts d.to_training_csv + end def test_dataset_accessors d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv" diff --git a/test/regression-model.rb b/test/regression-model.rb index 7f667dc..3b41171 100644 --- a/test/regression-model.rb +++ b/test/regression-model.rb @@ -173,13 +173,26 @@ class LazarRegressionTest < MiniTest::Test model = Model::Lazar.create training_dataset: training_dataset result = model.predict training_dataset assert_kind_of Dataset, result - assert_equal 6, result.features.size + assert_equal 8, result.features.size assert_equal 88, result.compounds.size assert_equal [1.95], result.values(result.compounds.first, result.bioactivity_features[0]).collect{|v| v.round(2)} assert_equal [1.37], result.values(result.compounds[6], result.bioactivity_features[0]).collect{|v| v.round(2)} - assert_equal [1.79], result.values(result.compounds[6], result.prediction_features[0]).collect{|v| v.round(2)} + assert_equal [1.79], result.values(result.compounds[6], result.prediction_feature).collect{|v| v.round(2)} assert_equal [1.84,1.73], result.values(result.compounds[7], result.bioactivity_features[0]).collect{|v| v.round(2)} assert_match /Low/i, result.predictions[result.compounds[6]][:confidence] + csv = result.to_prediction_csv + rows = csv.split("\n") + assert_equal "ID,Original SMILES,Canonical SMILES,Prediction,Confidence,Lower prediction interval,Upper prediction interval,Measurements", rows[0] + items = rows[3].split(",") + # prediction and measurement within prediciton interval + prediction = items[3].to_f + pi_low = items[5].to_f + pi_hi = items[6].to_f + measurement = items[7].to_f + [prediction,measurement].each do |v| + assert(v > pi_low) + assert(v < pi_hi) + end end end -- cgit v1.2.3