summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2019-06-20 22:01:50 +0200
committerChristoph Helma <helma@in-silico.ch>2019-06-20 22:01:50 +0200
commit455da06aa6459da0d25b286ca6cb866ff64c4c34 (patch)
treed0ed8fcf720a02742da781669251f379b8fd07f0 /test
parent1b44e0cd76f2ead93b8b3fa0f970c85ef32a4b14 (diff)
separate csv serialisations for batch predictions and training data, repeated measurements in mutagenicity dataset fixed, daphnia import fixed, CENTRAL_MONGO_IP removed
Diffstat (limited to 'test')
-rw-r--r--test/classification-model.rb10
-rw-r--r--test/dataset.rb18
-rw-r--r--test/regression-model.rb17
3 files changed, 38 insertions, 7 deletions
diff --git a/test/classification-model.rb b/test/classification-model.rb
index 79ccb98..c41b211 100644
--- a/test/classification-model.rb
+++ b/test/classification-model.rb
@@ -84,13 +84,19 @@ class ClassificationModelTest < MiniTest::Test
assert_kind_of Dataset, result
assert_equal 7, result.features.size
assert_equal 85, result.compounds.size
- prediction_feature = result.prediction_features.first
+ prediction_feature = result.prediction_feature
assert_equal ["carcinogenic"], result.values(result.compounds[1], prediction_feature)
assert_equal ["non-carcinogenic"], result.values(result.compounds[5], prediction_feature)
assert_nil result.predictions[result.compounds.first][:value]
assert_equal "carcinogenic", result.predictions[result.compounds[1]][:value]
assert_equal 0.27, result.predictions[result.compounds[1]][:probabilities]["non-carcinogenic"].round(2)
- assert_match /High/i, result.predictions[result.compounds[1]][:confidence]
+ assert_match /Similar/i, result.predictions[result.compounds[1]][:confidence]
+ csv = result.to_prediction_csv
+ rows = csv.split("\n")
+ assert_equal "ID,Original SMILES,Canonical SMILES,Prediction,Confidence,Probability: carcinogenic,Probability: non-carcinogenic,Measurements", rows[0]
+ items = rows[2].split(",")
+ assert_equal "carcinogenic", items[3]
+ assert_equal 0.27, items[6].to_f.round(2) # probabilities
end
def test_carcinogenicity_rf_classification
diff --git a/test/dataset.rb b/test/dataset.rb
index 8e230e0..b978512 100644
--- a/test/dataset.rb
+++ b/test/dataset.rb
@@ -137,7 +137,6 @@ class DatasetTest < MiniTest::Test
d = Dataset.from_csv_file File.join(DATA_DIR,"batch_prediction_#{type}_small.csv")
assert_equal Dataset, d.class
refute_nil d.id
- dataset = Dataset.find d.id
assert_equal 3, d.compounds.size
end
end
@@ -175,10 +174,16 @@ class DatasetTest < MiniTest::Test
datasets = [hansen,efsa,kazius]
map = {"mutagen" => "mutagenic", "nonmutagen" => "non-mutagenic"}
dataset = Dataset.merge datasets: datasets, features: datasets.collect{|d| d.bioactivity_features.first}, value_maps: [nil,nil,map], keep_original_features: true, remove_duplicates: true
- assert_equal 8281, dataset.compounds.size
- assert_equal 9, dataset.features.size
+ csv = dataset.to_training_csv
+ rows = csv.split("\n")
+ header = rows.shift
+ assert_equal "Canonical SMILES,Mutagenicity",header
+ values = rows.collect{|r| r.split(",")[1]}.uniq
+ assert_equal 2, values.size
+ assert_equal 8290, dataset.compounds.size
c = Compound.from_smiles("C/C=C/C=O")
assert_equal ["mutagenic"], dataset.values(c,dataset.merged_features.first)
+ assert_equal 9, dataset.features.size
end
# serialisation
@@ -203,6 +208,13 @@ class DatasetTest < MiniTest::Test
end
# special cases/details
+
+ def test_daphnia_import
+ d = Dataset.from_csv_file File.join(File.dirname(__FILE__),"..","data", "Acute_toxicity-Daphnia_magna.csv")
+ assert 3, d.features.size
+ assert 546, d.compounds.size
+ puts d.to_training_csv
+ end
def test_dataset_accessors
d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv"
diff --git a/test/regression-model.rb b/test/regression-model.rb
index 7f667dc..3b41171 100644
--- a/test/regression-model.rb
+++ b/test/regression-model.rb
@@ -173,13 +173,26 @@ class LazarRegressionTest < MiniTest::Test
model = Model::Lazar.create training_dataset: training_dataset
result = model.predict training_dataset
assert_kind_of Dataset, result
- assert_equal 6, result.features.size
+ assert_equal 8, result.features.size
assert_equal 88, result.compounds.size
assert_equal [1.95], result.values(result.compounds.first, result.bioactivity_features[0]).collect{|v| v.round(2)}
assert_equal [1.37], result.values(result.compounds[6], result.bioactivity_features[0]).collect{|v| v.round(2)}
- assert_equal [1.79], result.values(result.compounds[6], result.prediction_features[0]).collect{|v| v.round(2)}
+ assert_equal [1.79], result.values(result.compounds[6], result.prediction_feature).collect{|v| v.round(2)}
assert_equal [1.84,1.73], result.values(result.compounds[7], result.bioactivity_features[0]).collect{|v| v.round(2)}
assert_match /Low/i, result.predictions[result.compounds[6]][:confidence]
+ csv = result.to_prediction_csv
+ rows = csv.split("\n")
+ assert_equal "ID,Original SMILES,Canonical SMILES,Prediction,Confidence,Lower prediction interval,Upper prediction interval,Measurements", rows[0]
+ items = rows[3].split(",")
+ # prediction and measurement within prediciton interval
+ prediction = items[3].to_f
+ pi_low = items[5].to_f
+ pi_hi = items[6].to_f
+ measurement = items[7].to_f
+ [prediction,measurement].each do |v|
+ assert(v > pi_low)
+ assert(v < pi_hi)
+ end
end
end