summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-03-15 18:53:12 +0100
committerChristoph Helma <helma@in-silico.ch>2016-03-15 18:53:12 +0100
commit2b0a7c725b23d8ef3f525b25fc7105de57ee3897 (patch)
tree866f2ea83fbaed796631c7863e1674eaa5c07c51 /test
parentabc3526e318a2bfa24dfe033d8879e7657c2ae5c (diff)
validation test cleanup
Diffstat (limited to 'test')
-rw-r--r--test/validation.rb81
1 files changed, 40 insertions, 41 deletions
diff --git a/test/validation.rb b/test/validation.rb
index c803c92..d8eea59 100644
--- a/test/validation.rb
+++ b/test/validation.rb
@@ -2,6 +2,8 @@ require_relative "setup.rb"
class ValidationTest < MiniTest::Test
+ # defaults
+
def test_default_classification_crossvalidation
dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
model = Model::LazarClassification.create dataset
@@ -17,48 +19,9 @@ class ValidationTest < MiniTest::Test
assert cv.mae < 1
end
- def test_regression_crossvalidation
- dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv"
- params = {
- :prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average",
- :neighbor_algorithm => "fingerprint_neighbors",
- :neighbor_algorithm_parameters => {
- :type => "MACCS",
- :min_sim => 0.7,
- }
- }
- model = Model::LazarRegression.create dataset, params
- cv = RegressionCrossValidation.create model
- cv.validation_ids.each do |vid|
- model = Model::Lazar.find(Validation.find(vid).model_id)
- assert_equal params[:neighbor_algorithm_parameters][:type], model[:neighbor_algorithm_parameters][:type]
- assert_equal params[:neighbor_algorithm_parameters][:min_sim], model[:neighbor_algorithm_parameters][:min_sim]
- refute_equal params[:neighbor_algorithm_parameters][:training_dataset_id], model[:neighbor_algorithm_parameters][:training_dataset_id]
- end
+ # parameters
- refute_nil cv.rmse
- refute_nil cv.mae
- end
-
- def test_pls_regression_crossvalidation
- dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv"
- params = { :prediction_algorithm => "OpenTox::Algorithm::Regression.local_fingerprint_regression", }
- model = Model::LazarRegression.create dataset, params
- cv = RegressionCrossValidation.create model
- assert cv.rmse < 1.5, "RMSE > 1.5"
- assert cv.mae < 1
- end
-
- def test_repeated_crossvalidation
- dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
- model = Model::LazarClassification.create dataset
- repeated_cv = RepeatedCrossValidation.create model
- repeated_cv.crossvalidations.each do |cv|
- assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split"
- end
- end
-
- def test_crossvalidation_parameters
+ def test_classification_crossvalidation_parameters
dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
params = {
:training_dataset_id => dataset.id,
@@ -80,6 +43,29 @@ class ValidationTest < MiniTest::Test
assert_equal params, validation_params
end
end
+
+ def test_regression_crossvalidation_params
+ dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv"
+ params = {
+ :prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average",
+ :neighbor_algorithm => "fingerprint_neighbors",
+ :neighbor_algorithm_parameters => {
+ :type => "MACCS",
+ :min_sim => 0.7,
+ }
+ }
+ model = Model::LazarRegression.create dataset, params
+ cv = RegressionCrossValidation.create model
+ cv.validation_ids.each do |vid|
+ model = Model::Lazar.find(Validation.find(vid).model_id)
+ assert_equal params[:neighbor_algorithm_parameters][:type], model[:neighbor_algorithm_parameters][:type]
+ assert_equal params[:neighbor_algorithm_parameters][:min_sim], model[:neighbor_algorithm_parameters][:min_sim]
+ refute_equal params[:neighbor_algorithm_parameters][:training_dataset_id], model[:neighbor_algorithm_parameters][:training_dataset_id]
+ end
+
+ refute_nil cv.rmse
+ refute_nil cv.mae
+ end
def test_physchem_regression_crossvalidation
@@ -90,6 +76,8 @@ class ValidationTest < MiniTest::Test
refute_nil cv.mae
end
+ # LOO
+
def test_classification_loo_validation
dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
model = Model::LazarClassification.create dataset
@@ -106,4 +94,15 @@ class ValidationTest < MiniTest::Test
assert loo.r_squared > 0.34
end
+ # repeated CV
+
+ def test_repeated_crossvalidation
+ dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
+ model = Model::LazarClassification.create dataset
+ repeated_cv = RepeatedCrossValidation.create model
+ repeated_cv.crossvalidations.each do |cv|
+ assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split"
+ end
+ end
+
end