From 09452bba5c407c27721223d126e3f45c12b20a0c Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 13 Oct 2016 22:59:45 +0200 Subject: tests pass --- lib/caret.rb | 5 ++++ lib/model.rb | 32 +++++++---------------- lib/regression.rb | 2 +- test/dataset.rb | 3 --- test/model-nanoparticle.rb | 4 --- test/validation-nanoparticle.rb | 9 +------ test/validation-regression.rb | 57 +++++++++++++++++++++++++++++++++++++++++ 7 files changed, 73 insertions(+), 39 deletions(-) create mode 100644 test/validation-regression.rb diff --git a/lib/caret.rb b/lib/caret.rb index df86093..2c4cd0c 100644 --- a/lib/caret.rb +++ b/lib/caret.rb @@ -9,6 +9,11 @@ module OpenTox if independent_variables.flatten.uniq == ["NA"] prediction = Algorithm::Regression::weighted_average dependent_variables:dependent_variables, weights:weights prediction[:warning] = "No variables for regression model. Using weighted average of similar substances." + elsif + dependent_variables.size < 3 + prediction = Algorithm::Regression::weighted_average dependent_variables:dependent_variables, weights:weights + prediction[:warning] = "Insufficient number of neighbors (#{dependent_variables.size}) for regression model. Using weighted average of similar substances." + else dependent_variables.each_with_index do |v,i| dependent_variables[i] = to_r(v) diff --git a/lib/model.rb b/lib/model.rb index 4bbb7da..d7b072f 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -28,23 +28,9 @@ module OpenTox bad_request_error "Please provide a prediction_feature and/or a training_dataset." unless prediction_feature or training_dataset prediction_feature = training_dataset.features.first unless prediction_feature # TODO: prediction_feature without training_dataset: use all available data - # explicit prediction algorithm - if algorithms[:prediction] and algorithms[:prediction][:method] - case algorithms[:prediction][:method] - when /Classification/i - model = LazarClassification.new - when /Regression/i - model = LazarRegression.new - else - bad_request_error "Prediction method '#{algorithms[:prediction][:method]}' not implemented." - end # guess model type - elsif prediction_feature.numeric? - model = LazarRegression.new - else - model = LazarClassification.new - end + prediction_feature.numeric? ? model = LazarRegression.new : model = LazarClassification.new model.prediction_feature_id = prediction_feature.id model.training_dataset_id = training_dataset.id @@ -193,17 +179,17 @@ module OpenTox query_descriptors = substance.calculate_properties(features) similarity_descriptors = query_descriptors.collect_with_index{|v,i| (v-descriptor_means[i])/descriptor_sds[i]} else - similarity_descriptors = descriptor_ids.collect_with_index{|id,i| - prop = substance.properties[id] - prop = prop.median if prop.is_a? Array # measured - (prop-descriptor_means[i])/descriptor_sds[i] - } - query_descriptors = descriptor_ids.collect_with_index{|id,i| + similarity_descriptors = [] + query_descriptors = [] + descriptor_ids.each_with_index do |id,i| prop = substance.properties[id] prop = prop.median if prop.is_a? Array # measured - substance.properties[id] - } + if prop + similarity_descriptors[i] = (prop-descriptor_means[i])/descriptor_sds[i] + query_descriptors[i] = prop + end end + end else bad_request_error "Unknown descriptor type '#{descriptors}' for similarity method '#{similarity[:method]}'." end diff --git a/lib/regression.rb b/lib/regression.rb index d1724fd..3890987 100644 --- a/lib/regression.rb +++ b/lib/regression.rb @@ -3,7 +3,7 @@ module OpenTox class Regression - def self.weighted_average dependent_variables:, independent_variables:nil, weights:, query_variables: + def self.weighted_average dependent_variables:, independent_variables:nil, weights:, query_variables:nil # TODO: prediction_interval weighted_sum = 0.0 sim_sum = 0.0 diff --git a/test/dataset.rb b/test/dataset.rb index 2c0aa01..e91e65a 100644 --- a/test/dataset.rb +++ b/test/dataset.rb @@ -231,10 +231,7 @@ class DatasetTest < MiniTest::Test datasets.each{|d| d.delete} end - # skips, may be removed in the future - def test_simultanous_upload - skip threads = [] 3.times do |t| threads << Thread.new(t) do |up| diff --git a/test/model-nanoparticle.rb b/test/model-nanoparticle.rb index 6e18add..7244a29 100644 --- a/test/model-nanoparticle.rb +++ b/test/model-nanoparticle.rb @@ -31,10 +31,6 @@ class NanoparticleModelTest < MiniTest::Test model.delete end - def test_nanoparticle_parameters - skip - end - def test_import_ld skip # Ambit JSON-LD export defunct dataset_ids = Import::Enanomapper.import_ld diff --git a/test/validation-nanoparticle.rb b/test/validation-nanoparticle.rb index c5618e8..c0f2f92 100644 --- a/test/validation-nanoparticle.rb +++ b/test/validation-nanoparticle.rb @@ -31,8 +31,7 @@ class NanoparticleValidationTest < MiniTest::Test :prediction => {:method => 'Algorithm::Caret.pls' }, } model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms - assert_equal "pls", model.algorithms[:prediction][:parameters] - assert_equal "Algorithm::Caret.regression", model.algorithms[:prediction][:method] + assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method] cv = CrossValidation.create model p cv.rmse p cv.r_squared @@ -49,7 +48,6 @@ class NanoparticleValidationTest < MiniTest::Test :prediction => {:method => 'Algorithm::Caret.pls' }, } model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms - assert_equal "pls", model.algorithms[:prediction][:parameters] assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method] cv = CrossValidation.create model p cv.rmse @@ -73,9 +71,4 @@ class NanoparticleValidationTest < MiniTest::Test refute_nil cv.rmse end - - def test_import_ld - skip # Ambit JSON-LD export defunct - dataset_ids = Import::Enanomapper.import_ld - end end diff --git a/test/validation-regression.rb b/test/validation-regression.rb new file mode 100644 index 0000000..efce849 --- /dev/null +++ b/test/validation-regression.rb @@ -0,0 +1,57 @@ +require_relative "setup.rb" + +class ValidationRegressionTest < MiniTest::Test + include OpenTox::Validation + + # defaults + + def test_default_regression_crossvalidation + dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv" + model = Model::Lazar.create training_dataset: dataset + cv = RegressionCrossValidation.create model + assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be smaller than 1.5, this may occur due to an unfavorable training/test set split" + assert cv.mae < 1, "MAE #{cv.mae} should be smaller than 1, this may occur due to an unfavorable training/test set split" + end + + # parameters + + def test_regression_crossvalidation_params + dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv" + algorithms = { + :prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" }, + :descriptors => { :type => "MACCS", }, + :similarity => {:min => 0.7} + } + model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms + assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type] + cv = RegressionCrossValidation.create model + cv.validation_ids.each do |vid| + model = Model::Lazar.find(Validation.find(vid).model_id) + assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type] + assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min] + refute_nil model.training_dataset_id + refute_equal dataset.id, model.training_dataset_id + end + + refute_nil cv.rmse + refute_nil cv.mae + end + + def test_physchem_regression_crossvalidation + training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") + model = Model::Lazar.create training_dataset:training_dataset + cv = RegressionCrossValidation.create model + refute_nil cv.rmse + refute_nil cv.mae + end + + # LOO + + def test_regression_loo_validation + dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") + model = Model::Lazar.create training_dataset: dataset + loo = RegressionLeaveOneOut.create model + assert loo.r_squared > 0.34, "R^2 (#{loo.r_squared}) should be larger than 0.034" + end + +end -- cgit v1.2.3