summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
Diffstat (limited to 'test')
-rw-r--r--test/dataset.rb2
-rw-r--r--test/feature.rb10
-rw-r--r--test/model-classification.rb8
-rw-r--r--test/model-regression.rb28
-rw-r--r--test/model-validation.rb2
-rw-r--r--test/setup.rb2
-rw-r--r--test/validation-classification.rb2
-rw-r--r--test/validation-regression.rb2
8 files changed, 30 insertions, 26 deletions
diff --git a/test/dataset.rb b/test/dataset.rb
index e91e65a..055a029 100644
--- a/test/dataset.rb
+++ b/test/dataset.rb
@@ -160,7 +160,7 @@ class DatasetTest < MiniTest::Test
if v.numeric?
assert_equal v.to_f, serialized[inchi][i].to_f
else
- assert_equal v, serialized[inchi][i]
+ assert_equal v.to_s, serialized[inchi][i].to_s
end
end
diff --git a/test/feature.rb b/test/feature.rb
index 40edb9f..85ce588 100644
--- a/test/feature.rb
+++ b/test/feature.rb
@@ -57,20 +57,20 @@ class FeatureTest < MiniTest::Test
def test_physchem_description
assert_equal 346, PhysChem.descriptors.size
assert_equal 15, PhysChem.openbabel_descriptors.size
- assert_equal 295, PhysChem.cdk_descriptors.size
+ assert_equal 286, PhysChem.cdk_descriptors.size
assert_equal 45, PhysChem.joelib_descriptors.size
- assert_equal 310, PhysChem.unique_descriptors.size
+ assert_equal 309, PhysChem.unique_descriptors.size
end
def test_physchem
assert_equal 346, PhysChem.descriptors.size
c = Compound.from_smiles "CC(=O)CC(C)C"
logP = PhysChem.find_or_create_by :name => "Openbabel.logP"
- assert_equal 1.6215, logP.calculate(c)
+ assert_equal 1.6215, c.calculate_properties([logP]).first
jlogP = PhysChem.find_or_create_by :name => "Joelib.LogP"
- assert_equal 3.5951, jlogP.calculate(c)
+ assert_equal 3.5951, c.calculate_properties([jlogP]).first
alogP = PhysChem.find_or_create_by :name => "Cdk.ALOGP.ALogP"
- assert_equal 0.35380000000000034, alogP.calculate(c)
+ assert_equal 0.35380000000000034, c.calculate_properties([alogP]).first
end
end
diff --git a/test/model-classification.rb b/test/model-classification.rb
index 1424f6a..f75598b 100644
--- a/test/model-classification.rb
+++ b/test/model-classification.rb
@@ -46,12 +46,14 @@ class LazarClassificationTest < MiniTest::Test
assert_equal compound_dataset.compounds, prediction_dataset.compounds
cid = prediction_dataset.compounds[7].id.to_s
- assert_equal "Could not find similar substances with experimental data in the training dataset.", prediction_dataset.predictions[cid][:warning]
+ assert_equal "Could not find similar substances with experimental data in the training dataset.", prediction_dataset.predictions[cid][:warnings][0]
+ expectations = ["Cannot create prediction: Only one similar compound in the training set.",
+ "Could not find similar substances with experimental data in the training dataset."]
prediction_dataset.predictions.each do |cid,pred|
- assert_equal "Could not find similar substances with experimental data in the training dataset.", pred[:warning] if pred[:value].nil?
+ assert_includes expectations, pred[:warnings][0] if pred[:value].nil?
end
cid = Compound.from_smiles("CCOC(=O)N").id.to_s
- assert_match "excluded", prediction_dataset.predictions[cid][:warning]
+ assert_match "excluded", prediction_dataset.predictions[cid][:info]
# cleanup
[training_dataset,model,compound_dataset,prediction_dataset].each{|o| o.delete}
end
diff --git a/test/model-regression.rb b/test/model-regression.rb
index 86b927c..5903e88 100644
--- a/test/model-regression.rb
+++ b/test/model-regression.rb
@@ -10,21 +10,21 @@ class LazarRegressionTest < MiniTest::Test
},
:similarity => {
:method => "Algorithm::Similarity.tanimoto",
- :min => 0.1
+ :min => 0.5
},
:prediction => {
- :method => "Algorithm::Caret.pls",
+ :method => "Algorithm::Caret.rf",
},
:feature_selection => nil,
}
- training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM_log10.csv")
model = Model::Lazar.create training_dataset: training_dataset
assert_kind_of Model::LazarRegression, model
assert_equal algorithms, model.algorithms
- substance = training_dataset.substances[10]
+ substance = training_dataset.substances[145]
prediction = model.predict substance
assert_includes prediction[:prediction_interval][0]..prediction[:prediction_interval][1], prediction[:measurements].median, "This assertion assures that measured values are within the prediction interval. It may fail in 5% of the predictions."
- substance = Compound.from_smiles "NC(=O)OCCC"
+ substance = Compound.from_smiles "c1ccc(cc1)Oc1ccccc1"
prediction = model.predict substance
refute_nil prediction[:value]
refute_nil prediction[:prediction_interval]
@@ -59,8 +59,8 @@ class LazarRegressionTest < MiniTest::Test
model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
compound = Compound.from_smiles "CCCSCCSCC"
prediction = model.predict compound
- assert_equal 4, prediction[:neighbors].size
- assert_equal 1.37, prediction[:value].round(2)
+ assert_equal 3, prediction[:neighbors].size
+ assert prediction[:value].round(2) > 1.37, "Prediction value (#{prediction[:value].round(2)}) should be larger than 1.37."
end
def test_local_physchem_regression
@@ -112,12 +112,12 @@ class LazarRegressionTest < MiniTest::Test
:method => "Algorithm::Similarity.cosine",
}
}
- training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv")
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
assert_kind_of Model::LazarRegression, model
- assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method]
+ assert_equal "Algorithm::Caret.rf", model.algorithms[:prediction][:method]
assert_equal "Algorithm::Similarity.cosine", model.algorithms[:similarity][:method]
- assert_equal 0.1, model.algorithms[:similarity][:min]
+ assert_equal 0.5, model.algorithms[:similarity][:min]
algorithms[:descriptors].delete :features
assert_equal algorithms[:descriptors], model.algorithms[:descriptors]
prediction = model.predict training_dataset.substances[10]
@@ -130,14 +130,14 @@ class LazarRegressionTest < MiniTest::Test
:method => "Algorithm::FeatureSelection.correlation_filter",
},
}
- training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv")
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM_log10.csv")
model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
assert_kind_of Model::LazarRegression, model
- assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method]
+ assert_equal "Algorithm::Caret.rf", model.algorithms[:prediction][:method]
assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method]
- assert_equal 0.1, model.algorithms[:similarity][:min]
+ assert_equal 0.5, model.algorithms[:similarity][:min]
assert_equal algorithms[:feature_selection][:method], model.algorithms[:feature_selection][:method]
- prediction = model.predict training_dataset.substances[10]
+ prediction = model.predict training_dataset.substances[145]
refute_nil prediction[:value]
end
diff --git a/test/model-validation.rb b/test/model-validation.rb
index 83986d6..9304232 100644
--- a/test/model-validation.rb
+++ b/test/model-validation.rb
@@ -12,7 +12,7 @@ class ValidationModelTest < MiniTest::Test
m.crossvalidations.each do |cv|
assert cv.accuracy > 0.74, "Crossvalidation accuracy (#{cv.accuracy}) should be larger than 0.75. This may happen due to an unfavorable training/test set split."
end
- prediction = m.predict Compound.from_smiles("CCCC(NN)C")
+ prediction = m.predict Compound.from_smiles("OCC(CN(CC(O)C)N=O)O")
assert_equal "true", prediction[:value]
m.delete
end
diff --git a/test/setup.rb b/test/setup.rb
index 40c8ebf..c1cddfb 100644
--- a/test/setup.rb
+++ b/test/setup.rb
@@ -3,6 +3,8 @@ require 'minitest/autorun'
require_relative '../lib/lazar.rb'
#require 'lazar'
include OpenTox
+#$mongo.database.drop
+#$gridfs = $mongo.database.fs # recreate GridFS indexes
TEST_DIR ||= File.expand_path(File.dirname(__FILE__))
DATA_DIR ||= File.join(TEST_DIR,"data")
training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
diff --git a/test/validation-classification.rb b/test/validation-classification.rb
index fb4c3e7..ce06063 100644
--- a/test/validation-classification.rb
+++ b/test/validation-classification.rb
@@ -47,7 +47,7 @@ class ValidationClassificationTest < MiniTest::Test
dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
model = Model::Lazar.create training_dataset: dataset
loo = ClassificationLeaveOneOut.create model
- assert_equal 14, loo.nr_unpredicted
+ assert_equal 24, loo.nr_unpredicted
refute_empty loo.confusion_matrix
assert loo.accuracy > 0.77
assert loo.weighted_accuracy > loo.accuracy, "Weighted accuracy (#{loo.weighted_accuracy}) should be larger than accuracy (#{loo.accuracy})."
diff --git a/test/validation-regression.rb b/test/validation-regression.rb
index 01ed644..c5ad312 100644
--- a/test/validation-regression.rb
+++ b/test/validation-regression.rb
@@ -84,7 +84,7 @@ class ValidationRegressionTest < MiniTest::Test
repeated_cv = RepeatedCrossValidation.create model
repeated_cv.crossvalidations.each do |cv|
assert cv.r_squared > 0.34, "R^2 (#{cv.r_squared}) should be larger than 0.034"
- assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split"
+ assert cv.rmse < 0.5, "RMSE (#{cv.rmse}) should be smaller than 0.5"
end
end