summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-10-27 11:58:07 +0200
committerChristoph Helma <helma@in-silico.ch>2016-10-27 11:58:07 +0200
commit8519274487166d75b3b9ae28e61f7a7be9f7e83c (patch)
tree6815047dae4740e57c33bca9f65b5716f32e0b0f /test
parentaada2ff67eaba251d1eeedb7f3eb29282706f997 (diff)
probability plot for classification validations
Diffstat (limited to 'test')
-rw-r--r--test/validation-classification.rb2
-rw-r--r--test/validation-regression.rb40
2 files changed, 40 insertions, 2 deletions
diff --git a/test/validation-classification.rb b/test/validation-classification.rb
index b71e427..c93e71f 100644
--- a/test/validation-classification.rb
+++ b/test/validation-classification.rb
@@ -11,6 +11,8 @@ class ValidationClassificationTest < MiniTest::Test
cv = ClassificationCrossValidation.create model
assert cv.accuracy > 0.7, "Accuracy (#{cv.accuracy}) should be larger than 0.7, this may occur due to an unfavorable training/test set split"
assert cv.weighted_accuracy > cv.accuracy, "Weighted accuracy (#{cv.weighted_accuracy}) should be larger than accuracy (#{cv.accuracy})."
+ #p cv
+ #File.open("tmp.pdf","w+"){|f| f.puts cv.probability_plot}
end
# parameters
diff --git a/test/validation-regression.rb b/test/validation-regression.rb
index efce849..a0895f9 100644
--- a/test/validation-regression.rb
+++ b/test/validation-regression.rb
@@ -9,8 +9,9 @@ class ValidationRegressionTest < MiniTest::Test
dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
model = Model::Lazar.create training_dataset: dataset
cv = RegressionCrossValidation.create model
- assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be smaller than 1.5, this may occur due to an unfavorable training/test set split"
- assert cv.mae < 1, "MAE #{cv.mae} should be smaller than 1, this may occur due to an unfavorable training/test set split"
+ assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be smaller than 1.5, this may occur due to unfavorable training/test set splits"
+ assert cv.mae < 1.1, "MAE #{cv.mae} should be smaller than 1.1, this may occur due to unfavorable training/test set splits"
+ assert cv.percent_within_prediction_interval > 80, "Only #{cv.percent_within_prediction_interval.round(2)}% of measurement within prediction interval. This may occur due to unfavorable training/test set splits"
end
# parameters
@@ -54,4 +55,39 @@ class ValidationRegressionTest < MiniTest::Test
assert loo.r_squared > 0.34, "R^2 (#{loo.r_squared}) should be larger than 0.034"
end
+ def test_regression_loo_validation_with_feature_selection
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
+ algorithms = {
+ :descriptors => {
+ :method => "calculate_properties",
+ :features => PhysChem.openbabel_descriptors,
+ },
+ :similarity => {
+ :method => "Algorithm::Similarity.weighted_cosine",
+ :min => 0.5
+ },
+ :feature_selection => {
+ :method => "Algorithm::FeatureSelection.correlation_filter",
+ },
+ }
+ model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms
+ assert_raises OpenTox::BadRequestError do
+ loo = RegressionLeaveOneOut.create model
+ end
+ end
+
+ # repeated CV
+
+ def test_repeated_crossvalidation
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
+ model = Model::Lazar.create training_dataset: dataset
+ repeated_cv = RepeatedCrossValidation.create model
+ repeated_cv.crossvalidations.each do |cv|
+ #assert cv.r_squared > 0.34, "R^2 (#{cv.r_squared}) should be larger than 0.034"
+ #assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split"
+ end
+ p repeated_cv
+ File.open("tmp.png","w+"){|f| f.puts repeated_cv.correlation_plot}
+ end
+
end