summaryrefslogtreecommitdiff
path: root/test/validation.rb
diff options
context:
space:
mode:
Diffstat (limited to 'test/validation.rb')
-rw-r--r--test/validation.rb143
1 files changed, 63 insertions, 80 deletions
diff --git a/test/validation.rb b/test/validation.rb
index 6764a32..d8eea59 100644
--- a/test/validation.rb
+++ b/test/validation.rb
@@ -2,54 +2,52 @@ require_relative "setup.rb"
class ValidationTest < MiniTest::Test
- def test_fminer_crossvalidation
+ # defaults
+
+ def test_default_classification_crossvalidation
dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
- model = Model::LazarFminerClassification.create dataset
- cv = ClassificationCrossValidation.create model
- refute_empty cv.validation_ids
- assert cv.accuracy > 0.8, "Crossvalidation accuracy lower than 0.8"
- assert cv.weighted_accuracy > cv.accuracy, "Weighted accuracy (#{cv.weighted_accuracy}) larger than unweighted accuracy(#{cv.accuracy}) "
- end
-
- def test_classification_crossvalidation
- dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
- model = Model::LazarClassification.create dataset#, features
+ model = Model::LazarClassification.create dataset
cv = ClassificationCrossValidation.create model
- assert cv.accuracy > 0.7
- File.open("tmp.svg","w+"){|f| f.puts cv.confidence_plot}
- `inkview tmp.svg`
- p cv.nr_unpredicted
- p cv.accuracy
- #assert cv.weighted_accuracy > cv.accuracy, "Weighted accuracy should be larger than unweighted accuracy."
+ assert cv.accuracy > 0.7, "Accuracy (#{cv.accuracy}) should be larger than 0.7"
end
def test_default_regression_crossvalidation
dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv"
model = Model::LazarRegression.create dataset
cv = RegressionCrossValidation.create model
- #cv = RegressionCrossValidation.find '561503262b72ed54fd000001'
- p cv.id
- File.open("tmp.svg","w+"){|f| f.puts cv.correlation_plot}
- `inkview tmp.svg`
- File.open("tmp.svg","w+"){|f| f.puts cv.confidence_plot}
- `inkview tmp.svg`
-
- #puts cv.misclassifications.to_yaml
- p cv.rmse
- p cv.weighted_rmse
assert cv.rmse < 1.5, "RMSE > 1.5"
- #assert cv.weighted_rmse < cv.rmse, "Weighted RMSE (#{cv.weighted_rmse}) larger than unweighted RMSE(#{cv.rmse}) "
- p cv.mae
- p cv.weighted_mae
assert cv.mae < 1
- #assert cv.weighted_mae < cv.mae
end
- def test_regression_crossvalidation
+ # parameters
+
+ def test_classification_crossvalidation_parameters
+ dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
+ params = {
+ :training_dataset_id => dataset.id,
+ :neighbor_algorithm_parameters => {
+ :min_sim => 0.3,
+ :type => "FP3"
+ }
+ }
+ model = Model::LazarClassification.create dataset, params
+ model.save
+ cv = ClassificationCrossValidation.create model
+ params = model.neighbor_algorithm_parameters
+ params.delete :training_dataset_id
+ params = Hash[params.map{ |k, v| [k.to_s, v] }] # convert symbols to string
+
+ cv.validations.each do |validation|
+ validation_params = validation.model.neighbor_algorithm_parameters
+ validation_params.delete "training_dataset_id"
+ assert_equal params, validation_params
+ end
+ end
+
+ def test_regression_crossvalidation_params
dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv"
- #dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.csv"
params = {
- :prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average",
+ :prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average",
:neighbor_algorithm => "fingerprint_neighbors",
:neighbor_algorithm_parameters => {
:type => "MACCS",
@@ -65,61 +63,46 @@ class ValidationTest < MiniTest::Test
refute_equal params[:neighbor_algorithm_parameters][:training_dataset_id], model[:neighbor_algorithm_parameters][:training_dataset_id]
end
- assert cv.rmse < 1.5, "RMSE > 30"
- assert cv.mae < 1
+ refute_nil cv.rmse
+ refute_nil cv.mae
end
- def test_repeated_crossvalidation
- dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
- model = Model::LazarClassification.create dataset
- repeated_cv = RepeatedCrossValidation.create model
- repeated_cv.crossvalidations.each do |cv|
- assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split"
- assert_operator cv.weighted_accuracy, :>, cv.accuracy
- end
+ def test_physchem_regression_crossvalidation
+
+ training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv")
+ model = Model::LazarRegression.create(training_dataset, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression")
+ cv = RegressionCrossValidation.create model
+ refute_nil cv.rmse
+ refute_nil cv.mae
end
- def test_crossvalidation_parameters
+ # LOO
+
+ def test_classification_loo_validation
dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
- params = {
- :neighbor_algorithm_parameters => {
- :min_sim => 0.3,
- :type => "FP3"
- }
- }
- model = Model::LazarClassification.create dataset, params
- model.save
- cv = ClassificationCrossValidation.create model
- params = model.neighbor_algorithm_parameters
- params = Hash[params.map{ |k, v| [k.to_s, v] }] # convert symbols to string
- cv.validations.each do |validation|
- assert_equal params, validation.model.neighbor_algorithm_parameters
- end
+ model = Model::LazarClassification.create dataset
+ loo = ClassificationLeaveOneOutValidation.create model
+ assert_equal 14, loo.nr_unpredicted
+ refute_empty loo.confusion_matrix
+ assert loo.accuracy > 0.77
end
- def test_physchem_regression_crossvalidation
- skip
+ def test_regression_loo_validation
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv")
+ model = Model::LazarRegression.create dataset
+ loo = RegressionLeaveOneOutValidation.create model
+ assert loo.r_squared > 0.34
+ end
- @descriptors = OpenTox::Algorithm::Descriptor::OBDESCRIPTORS.keys
- refute_empty @descriptors
+ # repeated CV
- # UPLOAD DATA
- training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv")
- feature_dataset = Algorithm::Descriptor.physchem training_dataset, @descriptors
- feature_dataset.save
- scaled_feature_dataset = feature_dataset.scale
- scaled_feature_dataset.save
- model = Model::LazarRegression.create training_dataset
- model.neighbor_algorithm = "physchem_neighbors"
- model.neighbor_algorithm_parameters = {
- :feature_calculation_algorithm => "OpenTox::Algorithm::Descriptor.physchem",
- :descriptors => @descriptors,
- :feature_dataset_id => scaled_feature_dataset.id,
- :min_sim => 0.3
- }
- model.save
- cv = RegressionCrossValidation.create model
- p cv
+ def test_repeated_crossvalidation
+ dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
+ model = Model::LazarClassification.create dataset
+ repeated_cv = RepeatedCrossValidation.create model
+ repeated_cv.crossvalidations.each do |cv|
+ assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split"
+ end
end
end