summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-10-06 19:14:10 +0200
committerChristoph Helma <helma@in-silico.ch>2016-10-06 19:14:10 +0200
commit4348eec89033e6677c9f628646fc67bd03c73fe6 (patch)
tree5b7465c3c286ba343268ce8e29b10cc1b03a667b /test
parentec87f7e079f3a7ef8ea6a0fa57f3b40e81ecaed0 (diff)
nano caret regression fixed
Diffstat (limited to 'test')
-rw-r--r--test/all.rb2
-rw-r--r--test/model.rb31
-rw-r--r--test/nanoparticles.rb81
-rw-r--r--test/validation.rb61
4 files changed, 79 insertions, 96 deletions
diff --git a/test/all.rb b/test/all.rb
index a10bcaa..eddf4e6 100644
--- a/test/all.rb
+++ b/test/all.rb
@@ -1,5 +1,5 @@
# "./default_environment.rb" has to be executed separately
-exclude = ["./setup.rb","./all.rb", "./default_environment.rb","./nanoparticles.rb"]
+exclude = ["./setup.rb","./all.rb", "./default_environment.rb"]
(Dir[File.join(File.dirname(__FILE__),"*.rb")]-exclude).each do |test|
require_relative test
end
diff --git a/test/model.rb b/test/model.rb
index 563d081..02b8e73 100644
--- a/test/model.rb
+++ b/test/model.rb
@@ -13,7 +13,7 @@ class ModelTest < MiniTest::Test
:min => 0.1
},
:prediction => {
- :method => "Algorithm::Regression.caret",
+ :method => "Algorithm::Caret.regression",
:parameters => "pls",
},
:feature_selection => nil,
@@ -65,7 +65,7 @@ class ModelTest < MiniTest::Test
training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv")
model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
assert_kind_of Model::LazarRegression, model
- assert_equal "Algorithm::Regression.caret", model.algorithms[:prediction][:method]
+ assert_equal "Algorithm::Caret.regression", model.algorithms[:prediction][:method]
assert_equal "Algorithm::Similarity.weighted_cosine", model.algorithms[:similarity][:method]
assert_equal 0.1, model.algorithms[:similarity][:min]
assert_equal algorithms[:descriptors], model.algorithms[:descriptors]
@@ -78,7 +78,7 @@ class ModelTest < MiniTest::Test
training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
end
model = Model::Lazar.create training_dataset: training_dataset
- assert_equal "Algorithm::Regression.caret", model.algorithms[:prediction][:method]
+ assert_equal "Algorithm::Caret.regression", model.algorithms[:prediction][:method]
assert_equal "rf", model.algorithms[:prediction][:parameters]
assert_equal "Algorithm::Similarity.weighted_cosine", model.algorithms[:similarity][:method]
prediction = model.predict training_dataset.substances[14]
@@ -87,6 +87,7 @@ class ModelTest < MiniTest::Test
end
def test_nanoparticle_parameters
+ skip
end
def test_regression_with_feature_selection
@@ -98,13 +99,14 @@ class ModelTest < MiniTest::Test
training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv")
model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
assert_kind_of Model::LazarRegression, model
- assert_equal "Algorithm::Regression.caret", model.algorithms[:prediction][:method]
+ assert_equal "Algorithm::Caret.regression", model.algorithms[:prediction][:method]
assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method]
assert_equal 0.1, model.algorithms[:similarity][:min]
assert_equal algorithms[:feature_selection][:method], model.algorithms[:feature_selection][:method]
end
def test_caret_parameters
+ skip
end
def test_default_classification
@@ -153,25 +155,4 @@ class ModelTest < MiniTest::Test
assert_equal 4, prediction[:neighbors].size
end
-=begin
- def test_physchem_description
- assert_equal 355, PhysChem.descriptors.size
- assert_equal 15, PhysChem.openbabel_descriptors.size
- assert_equal 295, PhysChem.cdk_descriptors.size
- assert_equal 45, PhysChem.joelib_descriptors.size
- assert_equal 310, PhysChem.unique_descriptors.size
- end
-
- def test_physchem
- assert_equal 355, PhysChem.descriptors.size
- c = Compound.from_smiles "CC(=O)CC(C)C"
- logP = PhysChem.find_or_create_by :name => "Openbabel.logP"
- assert_equal 1.6215, logP.calculate(c)
- jlogP = PhysChem.find_or_create_by :name => "Joelib.LogP"
- assert_equal 3.5951, jlogP.calculate(c)
- alogP = PhysChem.find_or_create_by :name => "Cdk.ALOGP.ALogP"
- assert_equal 0.35380000000000034, alogP.calculate(c)
- end
-=end
-
end
diff --git a/test/nanoparticles.rb b/test/nanoparticles.rb
index 23c09e7..9b2d2d9 100644
--- a/test/nanoparticles.rb
+++ b/test/nanoparticles.rb
@@ -5,29 +5,26 @@ class NanoparticleTest < MiniTest::Test
include OpenTox::Validation
def setup
- # TODO: multiple runs create duplicates
- #$mongo.database.drop
- #Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm")
@training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
unless @training_dataset
Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm")
@training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
end
+ @prediction_feature = @training_dataset.features.select{|f| f["name"] == 'log2(Net cell association)'}.first
end
def test_create_model
- skip
- @training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles")
- feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)")
- model = Model::LazarRegression.create(feature, @training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "physchem_neighbors"})
+ model = Model::Lazar.create training_dataset: @training_dataset
nanoparticle = @training_dataset.nanoparticles[-34]
prediction = model.predict nanoparticle
+ p prediction
refute_nil prediction[:value]
assert_includes nanoparticle.dataset_ids, @training_dataset.id
model.delete
end
def test_inspect_cv
+ skip
cv = CrossValidation.all.sort_by{|cv| cv.created_at}.last
#p cv
#p cv.id
@@ -45,6 +42,7 @@ class NanoparticleTest < MiniTest::Test
end
end
def test_inspect_worst_prediction
+ skip
cv = CrossValidation.all.sort_by{|cv| cv.created_at}.last
worst_predictions = cv.worst_predictions(n: 3,show_neigbors: false)
@@ -64,10 +62,8 @@ class NanoparticleTest < MiniTest::Test
end
def test_validate_model
- #feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)")
- feature = Feature.find_or_create_by(name: "Log2 transformed", category: "TOX")
-
- model = Model::LazarRegression.create(feature, @training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :feature_selection_algorithm => :correlation_filter, :neighbor_algorithm => "physchem_neighbors", :neighbor_algorithm_parameters => {:min_sim => 0.5}})
+ algorithms = { :prediction => {:method => "Algorithm::Regression.weighted_average" } }
+ model = Model::Lazar.create training_dataset: @training_dataset
cv = RegressionCrossValidation.create model
p cv.rmse
p cv.r_squared
@@ -77,17 +73,14 @@ class NanoparticleTest < MiniTest::Test
end
def test_validate_pls_model
- feature = Feature.find_or_create_by(name: "Log2 transformed", category: "TOX")
-
- model = Model::LazarRegression.create(feature, @training_dataset, {
- :prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression",
- :feature_selection_algorithm => :correlation_filter,
- :prediction_algorithm_parameters => {:method => 'pls'},
- #:feature_selection_algorithm_parameters => {:category => "P-CHEM"},
- #:feature_selection_algorithm_parameters => {:category => "Proteomics"},
- :neighbor_algorithm => "physchem_neighbors",
- :neighbor_algorithm_parameters => {:min_sim => 0.5}
- })
+ algorithms = {
+ :descriptors => {
+ :method => "properties",
+ :types => ["P-CHEM"]
+ },
+ :prediction => {:method => "Algorithm::Caret.regression", :parameters => 'pls' },
+ }
+ model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
cv = RegressionCrossValidation.create model
p cv.rmse
p cv.r_squared
@@ -96,17 +89,14 @@ class NanoparticleTest < MiniTest::Test
end
def test_validate_random_forest_model
- feature = Feature.find_or_create_by(name: "Log2 transformed", category: "TOX")
-
- model = Model::LazarRegression.create(feature, @training_dataset, {
- :prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression",
- :prediction_algorithm_parameters => {:method => 'rf'},
- :feature_selection_algorithm => :correlation_filter,
- #:feature_selection_algorithm_parameters => {:category => "P-CHEM"},
- #:feature_selection_algorithm_parameters => {:category => "Proteomics"},
- :neighbor_algorithm => "physchem_neighbors",
- :neighbor_algorithm_parameters => {:min_sim => 0.5}
- })
+ algorithms = {
+ :descriptors => {
+ :method => "properties",
+ :types => ["P-CHEM"]
+ },
+ :prediction => {:method => "Algorithm::Caret.regression", :parameters => 'rf' }
+ }
+ model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
cv = RegressionCrossValidation.create model
p cv.rmse
p cv.r_squared
@@ -115,9 +105,28 @@ class NanoparticleTest < MiniTest::Test
end
def test_validate_proteomics_pls_model
- feature = Feature.find_or_create_by(name: "Log2 transformed", category: "TOX")
-
- model = Model::LazarRegression.create(feature, @training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression", :neighbor_algorithm => "proteomics_neighbors", :neighbor_algorithm_parameters => {:min_sim => 0.5}})
+ algorithms = {
+ :descriptors => {
+ :method => "properties",
+ :types => ["Proteomics"]
+ },
+ :prediction => {:method => "Algorithm::Caret.regression", :parameters => 'rf' }
+ }
+ model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
+ cv = RegressionCrossValidation.create model
+ p cv.rmse
+ p cv.r_squared
+ refute_nil cv.r_squared
+ refute_nil cv.rmse
+ end
+
+ def test_validate_all_default_model
+ algorithms = {
+ :descriptors => {
+ :types => ["Proteomics","P-CHEM"]
+ },
+ }
+ model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
cv = RegressionCrossValidation.create model
p cv.rmse
p cv.r_squared
diff --git a/test/validation.rb b/test/validation.rb
index b4f5a92..03adf69 100644
--- a/test/validation.rb
+++ b/test/validation.rb
@@ -7,7 +7,7 @@ class ValidationTest < MiniTest::Test
def test_default_classification_crossvalidation
dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
- model = Model::LazarClassification.create dataset.features.first, dataset
+ model = Model::Lazar.create training_dataset: dataset
cv = ClassificationCrossValidation.create model
assert cv.accuracy > 0.7, "Accuracy (#{cv.accuracy}) should be larger than 0.7, this may occur due to an unfavorable training/test set split"
assert cv.weighted_accuracy > cv.accuracy, "Weighted accuracy (#{cv.weighted_accuracy}) should be larger than accuracy (#{cv.accuracy})."
@@ -15,9 +15,9 @@ class ValidationTest < MiniTest::Test
def test_default_regression_crossvalidation
dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
- model = Model::LazarRegression.create dataset.features.first, dataset
+ model = Model::Lazar.create training_dataset: dataset
cv = RegressionCrossValidation.create model
- assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be larger than 1.5, this may occur due to an unfavorable training/test set split"
+ assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be smaller than 1.5, this may occur due to an unfavorable training/test set split"
assert cv.mae < 1, "MAE #{cv.mae} should be smaller than 1, this may occur due to an unfavorable training/test set split"
end
@@ -25,23 +25,20 @@ class ValidationTest < MiniTest::Test
def test_classification_crossvalidation_parameters
dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
- params = {
- :neighbor_algorithm_parameters => {
- :min_sim => 0.3,
- :type => "FP3"
- }
+ algorithms = {
+ :similarity => { :min => 0.3, },
+ :descriptors => { :type => "FP3" }
}
- model = Model::LazarClassification.create dataset.features.first, dataset, params
- model.save
+ model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms
cv = ClassificationCrossValidation.create model
- params = model.neighbor_algorithm_parameters
+ params = model.algorithms
params = Hash[params.map{ |k, v| [k.to_s, v] }] # convert symbols to string
-
+
cv.validations.each do |validation|
- validation_params = validation.model.neighbor_algorithm_parameters
- refute_nil params["dataset_id"]
- refute_nil validation_params[:dataset_id]
- refute_equal params["dataset_id"], validation_params[:dataset_id]
+ validation_params = validation.model.algorithms
+ refute_nil model.training_dataset_id
+ refute_nil validation.model.training_dataset_id
+ refute_equal model.training_dataset_id, validation.model.training_dataset_id
["min_sim","type","prediction_feature_id"].each do |k|
assert_equal params[k], validation_params[k]
end
@@ -50,24 +47,20 @@ class ValidationTest < MiniTest::Test
def test_regression_crossvalidation_params
dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
- params = {
- :prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average",
- :neighbor_algorithm => "fingerprint_neighbors",
- :neighbor_algorithm_parameters => {
- :type => "MACCS",
- :min_sim => 0.7,
- }
+ algorithms = {
+ :prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" },
+ :descriptors => { :type => "MACCS", },
+ :similarity => {:min => 0.7}
}
- model = Model::LazarRegression.create dataset.features.first, dataset, params
- assert_equal params[:neighbor_algorithm_parameters][:type], model[:neighbor_algorithm_parameters][:type]
+ model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms
+ assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type]
cv = RegressionCrossValidation.create model
cv.validation_ids.each do |vid|
model = Model::Lazar.find(Validation.find(vid).model_id)
- assert_equal params[:neighbor_algorithm_parameters][:type], model[:neighbor_algorithm_parameters][:type]
- assert_equal params[:neighbor_algorithm_parameters][:min_sim], model[:neighbor_algorithm_parameters][:min_sim]
- refute_nil model[:neighbor_algorithm_parameters][:dataset_id]
- refute_equal dataset.id, model[:neighbor_algorithm_parameters][:dataset_id]
- assert_equal model.training_dataset_id, model[:neighbor_algorithm_parameters][:dataset_id]
+ assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type]
+ assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min]
+ refute_nil model.training_dataset_id
+ refute_equal dataset.id, model.training_dataset_id
end
refute_nil cv.rmse
@@ -77,7 +70,7 @@ class ValidationTest < MiniTest::Test
def test_physchem_regression_crossvalidation
skip # TODO: fix
training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
- model = Model::LazarRegression.create(training_dataset.features.first, training_dataset, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression")
+ model = Model::Lazar.create(training_dataset.features.first, training_dataset, :prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression")
cv = RegressionCrossValidation.create model
refute_nil cv.rmse
refute_nil cv.mae
@@ -87,7 +80,7 @@ class ValidationTest < MiniTest::Test
def test_classification_loo_validation
dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
- model = Model::LazarClassification.create dataset.features.first, dataset
+ model = Model::Lazar.create training_dataset: dataset
loo = ClassificationLeaveOneOut.create model
assert_equal 14, loo.nr_unpredicted
refute_empty loo.confusion_matrix
@@ -97,7 +90,7 @@ class ValidationTest < MiniTest::Test
def test_regression_loo_validation
dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
- model = Model::LazarRegression.create dataset.features.first, dataset
+ model = Model::Lazar.create training_dataset: dataset
loo = RegressionLeaveOneOut.create model
assert loo.r_squared > 0.34, "R^2 (#{loo.r_squared}) should be larger than 0.034"
end
@@ -106,7 +99,7 @@ class ValidationTest < MiniTest::Test
def test_repeated_crossvalidation
dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
- model = Model::LazarClassification.create dataset.features.first, dataset
+ model = Model::Lazar.create training_dataset: dataset
repeated_cv = RepeatedCrossValidation.create model
repeated_cv.crossvalidations.each do |cv|
assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split"