summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-10-13 22:59:45 +0200
committerChristoph Helma <helma@in-silico.ch>2016-10-13 22:59:45 +0200
commit09452bba5c407c27721223d126e3f45c12b20a0c (patch)
treeed4f73a874ddb12c98e7c62af49c2de5fcc4f4d1
parent2dc66aef3b7932105868ee8c7d32ad975e142d1b (diff)
tests pass
-rw-r--r--lib/caret.rb5
-rw-r--r--lib/model.rb32
-rw-r--r--lib/regression.rb2
-rw-r--r--test/dataset.rb3
-rw-r--r--test/model-nanoparticle.rb4
-rw-r--r--test/validation-nanoparticle.rb9
-rw-r--r--test/validation-regression.rb57
7 files changed, 73 insertions, 39 deletions
diff --git a/lib/caret.rb b/lib/caret.rb
index df86093..2c4cd0c 100644
--- a/lib/caret.rb
+++ b/lib/caret.rb
@@ -9,6 +9,11 @@ module OpenTox
if independent_variables.flatten.uniq == ["NA"]
prediction = Algorithm::Regression::weighted_average dependent_variables:dependent_variables, weights:weights
prediction[:warning] = "No variables for regression model. Using weighted average of similar substances."
+ elsif
+ dependent_variables.size < 3
+ prediction = Algorithm::Regression::weighted_average dependent_variables:dependent_variables, weights:weights
+ prediction[:warning] = "Insufficient number of neighbors (#{dependent_variables.size}) for regression model. Using weighted average of similar substances."
+
else
dependent_variables.each_with_index do |v,i|
dependent_variables[i] = to_r(v)
diff --git a/lib/model.rb b/lib/model.rb
index 4bbb7da..d7b072f 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -28,23 +28,9 @@ module OpenTox
bad_request_error "Please provide a prediction_feature and/or a training_dataset." unless prediction_feature or training_dataset
prediction_feature = training_dataset.features.first unless prediction_feature
# TODO: prediction_feature without training_dataset: use all available data
- # explicit prediction algorithm
- if algorithms[:prediction] and algorithms[:prediction][:method]
- case algorithms[:prediction][:method]
- when /Classification/i
- model = LazarClassification.new
- when /Regression/i
- model = LazarRegression.new
- else
- bad_request_error "Prediction method '#{algorithms[:prediction][:method]}' not implemented."
- end
# guess model type
- elsif prediction_feature.numeric?
- model = LazarRegression.new
- else
- model = LazarClassification.new
- end
+ prediction_feature.numeric? ? model = LazarRegression.new : model = LazarClassification.new
model.prediction_feature_id = prediction_feature.id
model.training_dataset_id = training_dataset.id
@@ -193,17 +179,17 @@ module OpenTox
query_descriptors = substance.calculate_properties(features)
similarity_descriptors = query_descriptors.collect_with_index{|v,i| (v-descriptor_means[i])/descriptor_sds[i]}
else
- similarity_descriptors = descriptor_ids.collect_with_index{|id,i|
- prop = substance.properties[id]
- prop = prop.median if prop.is_a? Array # measured
- (prop-descriptor_means[i])/descriptor_sds[i]
- }
- query_descriptors = descriptor_ids.collect_with_index{|id,i|
+ similarity_descriptors = []
+ query_descriptors = []
+ descriptor_ids.each_with_index do |id,i|
prop = substance.properties[id]
prop = prop.median if prop.is_a? Array # measured
- substance.properties[id]
- }
+ if prop
+ similarity_descriptors[i] = (prop-descriptor_means[i])/descriptor_sds[i]
+ query_descriptors[i] = prop
+ end
end
+ end
else
bad_request_error "Unknown descriptor type '#{descriptors}' for similarity method '#{similarity[:method]}'."
end
diff --git a/lib/regression.rb b/lib/regression.rb
index d1724fd..3890987 100644
--- a/lib/regression.rb
+++ b/lib/regression.rb
@@ -3,7 +3,7 @@ module OpenTox
class Regression
- def self.weighted_average dependent_variables:, independent_variables:nil, weights:, query_variables:
+ def self.weighted_average dependent_variables:, independent_variables:nil, weights:, query_variables:nil
# TODO: prediction_interval
weighted_sum = 0.0
sim_sum = 0.0
diff --git a/test/dataset.rb b/test/dataset.rb
index 2c0aa01..e91e65a 100644
--- a/test/dataset.rb
+++ b/test/dataset.rb
@@ -231,10 +231,7 @@ class DatasetTest < MiniTest::Test
datasets.each{|d| d.delete}
end
- # skips, may be removed in the future
-
def test_simultanous_upload
- skip
threads = []
3.times do |t|
threads << Thread.new(t) do |up|
diff --git a/test/model-nanoparticle.rb b/test/model-nanoparticle.rb
index 6e18add..7244a29 100644
--- a/test/model-nanoparticle.rb
+++ b/test/model-nanoparticle.rb
@@ -31,10 +31,6 @@ class NanoparticleModelTest < MiniTest::Test
model.delete
end
- def test_nanoparticle_parameters
- skip
- end
-
def test_import_ld
skip # Ambit JSON-LD export defunct
dataset_ids = Import::Enanomapper.import_ld
diff --git a/test/validation-nanoparticle.rb b/test/validation-nanoparticle.rb
index c5618e8..c0f2f92 100644
--- a/test/validation-nanoparticle.rb
+++ b/test/validation-nanoparticle.rb
@@ -31,8 +31,7 @@ class NanoparticleValidationTest < MiniTest::Test
:prediction => {:method => 'Algorithm::Caret.pls' },
}
model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
- assert_equal "pls", model.algorithms[:prediction][:parameters]
- assert_equal "Algorithm::Caret.regression", model.algorithms[:prediction][:method]
+ assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method]
cv = CrossValidation.create model
p cv.rmse
p cv.r_squared
@@ -49,7 +48,6 @@ class NanoparticleValidationTest < MiniTest::Test
:prediction => {:method => 'Algorithm::Caret.pls' },
}
model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
- assert_equal "pls", model.algorithms[:prediction][:parameters]
assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method]
cv = CrossValidation.create model
p cv.rmse
@@ -73,9 +71,4 @@ class NanoparticleValidationTest < MiniTest::Test
refute_nil cv.rmse
end
-
- def test_import_ld
- skip # Ambit JSON-LD export defunct
- dataset_ids = Import::Enanomapper.import_ld
- end
end
diff --git a/test/validation-regression.rb b/test/validation-regression.rb
new file mode 100644
index 0000000..efce849
--- /dev/null
+++ b/test/validation-regression.rb
@@ -0,0 +1,57 @@
+require_relative "setup.rb"
+
+class ValidationRegressionTest < MiniTest::Test
+ include OpenTox::Validation
+
+ # defaults
+
+ def test_default_regression_crossvalidation
+ dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
+ model = Model::Lazar.create training_dataset: dataset
+ cv = RegressionCrossValidation.create model
+ assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be smaller than 1.5, this may occur due to an unfavorable training/test set split"
+ assert cv.mae < 1, "MAE #{cv.mae} should be smaller than 1, this may occur due to an unfavorable training/test set split"
+ end
+
+ # parameters
+
+ def test_regression_crossvalidation_params
+ dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
+ algorithms = {
+ :prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" },
+ :descriptors => { :type => "MACCS", },
+ :similarity => {:min => 0.7}
+ }
+ model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms
+ assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type]
+ cv = RegressionCrossValidation.create model
+ cv.validation_ids.each do |vid|
+ model = Model::Lazar.find(Validation.find(vid).model_id)
+ assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type]
+ assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min]
+ refute_nil model.training_dataset_id
+ refute_equal dataset.id, model.training_dataset_id
+ end
+
+ refute_nil cv.rmse
+ refute_nil cv.mae
+ end
+
+ def test_physchem_regression_crossvalidation
+ training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
+ model = Model::Lazar.create training_dataset:training_dataset
+ cv = RegressionCrossValidation.create model
+ refute_nil cv.rmse
+ refute_nil cv.mae
+ end
+
+ # LOO
+
+ def test_regression_loo_validation
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
+ model = Model::Lazar.create training_dataset: dataset
+ loo = RegressionLeaveOneOut.create model
+ assert loo.r_squared > 0.34, "R^2 (#{loo.r_squared}) should be larger than 0.034"
+ end
+
+end