From 160e75e696452ac61e651664ac56d16ce1c9c4b6 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 13 Oct 2016 19:17:03 +0200 Subject: model tests separated and cleaned --- test/model-nanoparticle.rb | 42 ++++++++++ test/model-regression.rb | 170 ++++++++++++++++++++++++++++++++++++++++ test/model.rb | 106 ------------------------- test/nanoparticles.rb | 86 -------------------- test/regression.rb | 86 -------------------- test/validation-nanoparticle.rb | 74 +++++++++++++++++ 6 files changed, 286 insertions(+), 278 deletions(-) create mode 100644 test/model-nanoparticle.rb create mode 100644 test/model-regression.rb delete mode 100644 test/model.rb delete mode 100644 test/nanoparticles.rb delete mode 100644 test/regression.rb create mode 100644 test/validation-nanoparticle.rb (limited to 'test') diff --git a/test/model-nanoparticle.rb b/test/model-nanoparticle.rb new file mode 100644 index 0000000..fb81b83 --- /dev/null +++ b/test/model-nanoparticle.rb @@ -0,0 +1,42 @@ +require_relative "setup.rb" + +class NanoparticleTest < MiniTest::Test + include OpenTox::Validation + + def setup + @training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first + unless @training_dataset + Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm") + @training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first + end + @prediction_feature = @training_dataset.features.select{|f| f["name"] == 'log2(Net cell association)'}.first + end + + def test_nanoparticle_model + assert true, @prediction_feature.measured + model = Model::Lazar.create training_dataset: @training_dataset, prediction_feature: @prediction_feature + refute_empty model.dependent_variables + refute_empty model.descriptor_ids + refute_empty model.independent_variables + assert_equal "Algorithm::Caret.rf", model.algorithms[:prediction][:method] + assert_equal "Algorithm::Similarity.weighted_cosine", model.algorithms[:similarity][:method] + nanoparticle = @training_dataset.nanoparticles[-34] + assert_includes nanoparticle.dataset_ids, @training_dataset.id + prediction = model.predict nanoparticle + refute_nil prediction[:value] + assert_includes prediction[:prediction_interval][0]..prediction[:prediction_interval][1], prediction[:measurements].median, "This assertion assures that measured values are within the prediction interval. It may fail in 5% of the predictions." + prediction = model.predict @training_dataset.substances[14] + refute_nil prediction[:value] + assert_includes prediction[:prediction_interval][0]..prediction[:prediction_interval][1], prediction[:measurements].median, "This assertion assures that measured values are within the prediction interval. It may fail in 5% of the predictions." + model.delete + end + + def test_nanoparticle_parameters + skip + end + + def test_import_ld + skip # Ambit JSON-LD export defunct + dataset_ids = Import::Enanomapper.import_ld + end +end diff --git a/test/model-regression.rb b/test/model-regression.rb new file mode 100644 index 0000000..644ca1c --- /dev/null +++ b/test/model-regression.rb @@ -0,0 +1,170 @@ +require_relative "setup.rb" + +class LazarRegressionTest < MiniTest::Test + + def test_default_regression + algorithms = { + :descriptors => { + :method => "fingerprint", + :type => "MP2D" + }, + :similarity => { + :method => "Algorithm::Similarity.tanimoto", + :min => 0.1 + }, + :prediction => { + :method => "Algorithm::Caret.pls", + }, + :feature_selection => nil, + } + training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") + model = Model::Lazar.create training_dataset: training_dataset + assert_kind_of Model::LazarRegression, model + assert_equal algorithms, model.algorithms + substance = training_dataset.substances[10] + prediction = model.predict substance + assert_includes prediction[:prediction_interval][0]..prediction[:prediction_interval][1], prediction[:measurements].median, "This assertion assures that measured values are within the prediction interval. It may fail in 5% of the predictions." + substance = Compound.from_smiles "NC(=O)OCCC" + prediction = model.predict substance + refute_nil prediction[:value] + refute_nil prediction[:prediction_interval] + refute_empty prediction[:neighbors] + end + + def test_weighted_average + training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv" + algorithms = { + :similarity => { + :min => 0 + }, + :prediction => { + :method => "Algorithm::Regression.weighted_average", + }, + } + model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms + compound = Compound.from_smiles "CC(C)(C)CN" + prediction = model.predict compound + assert_equal -0.86, prediction[:value].round(2) + assert_equal model.substance_ids.size, prediction[:neighbors].size + end + + def test_mpd_fingerprints + training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv" + algorithms = { + :descriptors => { + :method => "fingerprint", + :type => "MP2D" + }, + } + model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms + compound = Compound.from_smiles "CCCSCCSCC" + prediction = model.predict compound + assert_equal 4, prediction[:neighbors].size + assert_equal 1.37, prediction[:value].round(2) + end + + def test_local_physchem_regression + training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv" + algorithms = { + :descriptors => { + :method => "calculate_properties", + :features => PhysChem.openbabel_descriptors, + }, + :similarity => { + :method => "Algorithm::Similarity.weighted_cosine", + :min => 0.5 + }, + } + model = Model::Lazar.create(training_dataset:training_dataset, algorithms:algorithms) + compound = Compound.from_smiles "NC(=O)OCCC" + prediction = model.predict compound + refute_nil prediction[:value] + end + + def test_local_physchem_regression_with_feature_selection + training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv" + algorithms = { + :descriptors => { + :method => "calculate_properties", + :features => PhysChem.openbabel_descriptors, + }, + :similarity => { + :method => "Algorithm::Similarity.weighted_cosine", + :min => 0.5 + }, + :feature_selection => { + :method => "Algorithm::FeatureSelection.correlation_filter", + }, + } + model = Model::Lazar.create(training_dataset:training_dataset, algorithms:algorithms) + compound = Compound.from_smiles "NC(=O)OCCC" + prediction = model.predict compound + refute_nil prediction[:value] + end + + def test_unweighted_cosine_physchem_regression + algorithms = { + :descriptors => { + :method => "calculate_properties", + :features => PhysChem.openbabel_descriptors, + }, + :similarity => { + :method => "Algorithm::Similarity.cosine", + } + } + training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv") + model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms + assert_kind_of Model::LazarRegression, model + assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method] + assert_equal "Algorithm::Similarity.cosine", model.algorithms[:similarity][:method] + assert_equal 0.1, model.algorithms[:similarity][:min] + algorithms[:descriptors].delete :features + assert_equal algorithms[:descriptors], model.algorithms[:descriptors] + prediction = model.predict training_dataset.substances[10] + refute_nil prediction[:value] + # TODO test predictin + end + + def test_regression_with_feature_selection + algorithms = { + :feature_selection => { + :method => "Algorithm::FeatureSelection.correlation_filter", + }, + } + training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv") + model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms + assert_kind_of Model::LazarRegression, model + assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method] + assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method] + assert_equal 0.1, model.algorithms[:similarity][:min] + assert_equal algorithms[:feature_selection][:method], model.algorithms[:feature_selection][:method] + end + + def test_regression_parameters + algorithms = { + :descriptors => { + :method => "fingerprint", + :type => "MP2D" + }, + :similarity => { + :method => "Algorithm::Similarity.tanimoto", + :min => 0.3 + }, + :prediction => { + :method => "Algorithm::Regression.weighted_average", + }, + :feature_selection => nil, + } + training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") + model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms + assert_kind_of Model::LazarRegression, model + assert_equal "Algorithm::Regression.weighted_average", model.algorithms[:prediction][:method] + assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method] + assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min] + assert_equal algorithms[:prediction][:parameters], model.algorithms[:prediction][:parameters] + substance = training_dataset.substances[10] + prediction = model.predict substance + assert_equal 0.83, prediction[:value].round(2) + end + +end diff --git a/test/model.rb b/test/model.rb deleted file mode 100644 index 027efe4..0000000 --- a/test/model.rb +++ /dev/null @@ -1,106 +0,0 @@ -require_relative "setup.rb" - -class ModelTest < MiniTest::Test - - def test_default_regression - algorithms = { - :descriptors => [ "MP2D" ], - :similarity => { - :method => "Algorithm::Similarity.tanimoto", - :min => 0.1 - }, - :prediction => { - :method => "Algorithm::Caret.pls", - }, - :feature_selection => nil, - } - training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") - model = Model::Lazar.create training_dataset: training_dataset - assert_kind_of Model::LazarRegression, model - assert_equal algorithms, model.algorithms - substance = training_dataset.substances[10] - prediction = model.predict substance - assert_includes prediction[:prediction_interval][0]..prediction[:prediction_interval][1], prediction[:measurements].median, "This assertion assures that measured values are within the prediction interval. It may fail in 5% of the predictions." - end - - def test_regression_parameters - algorithms = { - :descriptors => [ "MP2D" ], - :similarity => { - :method => "Algorithm::Similarity.tanimoto", - :min => 0.3 - }, - :prediction => { - :method => "Algorithm::Regression.weighted_average", - }, - :feature_selection => nil, - } - training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") - model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms - assert_kind_of Model::LazarRegression, model - assert_equal "Algorithm::Regression.weighted_average", model.algorithms[:prediction][:method] - assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method] - assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min] - assert_equal algorithms[:prediction][:parameters], model.algorithms[:prediction][:parameters] - substance = training_dataset.substances[10] - prediction = model.predict substance - assert_equal 0.83, prediction[:value].round(2) - end - - def test_physchem_regression - algorithms = { - :descriptors => { - :method => "calculate_properties", - :features => PhysChem.openbabel_descriptors, - }, - :similarity => { - :method => "Algorithm::Similarity.cosine", - } - } - training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv") - model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms - assert_kind_of Model::LazarRegression, model - assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method] - assert_equal "Algorithm::Similarity.cosine", model.algorithms[:similarity][:method] - assert_equal 0.1, model.algorithms[:similarity][:min] - algorithms[:descriptors].delete :features - assert_equal algorithms[:descriptors], model.algorithms[:descriptors] - prediction = model.predict training_dataset.substances[10] - refute_nil prediction[:value] - # TODO test predictin - end - - def test_nanoparticle_default - training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first - unless training_dataset - Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm") - training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first - end - model = Model::Lazar.create training_dataset: training_dataset - assert_equal "Algorithm::Caret.rf", model.algorithms[:prediction][:method] - assert_equal "Algorithm::Similarity.weighted_cosine", model.algorithms[:similarity][:method] - prediction = model.predict training_dataset.substances[14] - assert_includes prediction[:prediction_interval][0]..prediction[:prediction_interval][1], prediction[:measurements].median, "This assertion assures that measured values are within the prediction interval. It may fail in 5% of the predictions." - - end - - def test_nanoparticle_parameters - skip - end - - def test_regression_with_feature_selection - algorithms = { - :feature_selection => { - :method => "Algorithm::FeatureSelection.correlation_filter", - }, - } - training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv") - model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms - assert_kind_of Model::LazarRegression, model - assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method] - assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method] - assert_equal 0.1, model.algorithms[:similarity][:min] - assert_equal algorithms[:feature_selection][:method], model.algorithms[:feature_selection][:method] - end - -end diff --git a/test/nanoparticles.rb b/test/nanoparticles.rb deleted file mode 100644 index 9a67e63..0000000 --- a/test/nanoparticles.rb +++ /dev/null @@ -1,86 +0,0 @@ -require_relative "setup.rb" - -class NanoparticleTest < MiniTest::Test - include OpenTox::Validation - - def setup - @training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first - unless @training_dataset - Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm") - @training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first - end - @prediction_feature = @training_dataset.features.select{|f| f["name"] == 'log2(Net cell association)'}.first - end - - def test_nanoparticle_model - model = Model::Lazar.create training_dataset: @training_dataset, prediction_feature: @prediction_feature - nanoparticle = @training_dataset.nanoparticles[-34] - prediction = model.predict nanoparticle - refute_nil prediction[:value] - assert_includes nanoparticle.dataset_ids, @training_dataset.id - assert true, @prediction_feature.measured - model.delete - end - - # validations - - def test_validate_default_nanoparticle_model - model = Model::Lazar.create training_dataset: @training_dataset, prediction_feature: @prediction_feature - cv = CrossValidation.create model - p cv.rmse - p cv.r_squared - #File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot} - refute_nil cv.r_squared - refute_nil cv.rmse - end - - def test_validate_pls_nanoparticle_model - algorithms = { - :descriptors => { :types => ["P-CHEM"] }, - :prediction => {:parameters => 'pls' }, - } - model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms - assert_equal "pls", model.algorithms[:prediction][:parameters] - assert_equal "Algorithm::Caret.regression", model.algorithms[:prediction][:method] - cv = CrossValidation.create model - p cv.rmse - p cv.r_squared - refute_nil cv.r_squared - refute_nil cv.rmse - end - - def test_validate_proteomics_pls_nanoparticle_model - algorithms = { - :descriptors => { :types => ["Proteomics"] }, - :prediction => { :parameters => 'pls' } - } - model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms - assert_equal "pls", model.algorithms[:prediction][:parameters] - assert_equal "Algorithm::Caret.regression", model.algorithms[:prediction][:method] - cv = CrossValidation.create model - p cv.rmse - p cv.r_squared - refute_nil cv.r_squared - refute_nil cv.rmse - end - - def test_validate_all_default_nanoparticle_model - algorithms = { - :descriptors => { - :types => ["Proteomics","P-CHEM"] - }, - } - model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms - cv = CrossValidation.create model - p cv.rmse - p cv.r_squared - refute_nil cv.r_squared - refute_nil cv.rmse - end - - - def test_import_ld - skip # Ambit JSON-LD export defunct - dataset_ids = Import::Enanomapper.import_ld - end -end diff --git a/test/regression.rb b/test/regression.rb deleted file mode 100644 index cdbac4b..0000000 --- a/test/regression.rb +++ /dev/null @@ -1,86 +0,0 @@ -require_relative "setup.rb" - -class LazarRegressionTest < MiniTest::Test - - def test_weighted_average - training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv" - algorithms = { - :similarity => { - :min => 0 - }, - :prediction => { - :method => "Algorithm::Regression.weighted_average", - }, - } - model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms - compound = Compound.from_smiles "CC(C)(C)CN" - prediction = model.predict compound - assert_equal -0.86, prediction[:value].round(2) - assert_equal 88, prediction[:neighbors].size - end - - def test_mpd_fingerprints - training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv" - algorithms = { - :descriptors => [ "MP2D" ] - } - model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms - compound = Compound.from_smiles "CCCSCCSCC" - prediction = model.predict compound - assert_equal 3, prediction[:neighbors].size - assert_equal 1.37, prediction[:value].round(2) - end - - def test_local_fingerprint_regression - training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv" - model = Model::Lazar.create training_dataset: training_dataset - compound = Compound.from_smiles "NC(=O)OCCC" - prediction = model.predict compound - refute_nil prediction[:value] - refute_nil prediction[:prediction_interval] - refute_empty prediction[:neighbors] - end - - def test_local_physchem_regression - training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv" - algorithms = { - :descriptors => [PhysChem::OPENBABEL], - :similarity => { - :method => "Algorithm::Similarity.weighted_cosine", - :min => 0.5 - }, - } - model = Model::Lazar.create(training_dataset:training_dataset, algorithms:algorithms) - p model - compound = Compound.from_smiles "NC(=O)OCCC" - prediction = model.predict compound - refute_nil prediction[:value] - end - - def test_local_physchem_regression_with_feature_selection - training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv" - algorithms = { - :descriptors => { - :method => "calculated_properties", - :types => ["OPENBABEL"] - }, - :similarity => { - :method => "Algorithm::Similarity.weighted_cosine", - :min => 0.5 - }, - :feature_selection => { - :method => "Algorithm::FeatureSelection.correlation_filter", - }, - } - model = Model::Lazar.create(training_dataset.features.first, training_dataset, algorithms) - p model - compound = Compound.from_smiles "NC(=O)OCCC" - prediction = model.predict compound - refute_nil prediction[:value] - end - - def test_local_physchem_classification - skip - end - -end diff --git a/test/validation-nanoparticle.rb b/test/validation-nanoparticle.rb new file mode 100644 index 0000000..3692515 --- /dev/null +++ b/test/validation-nanoparticle.rb @@ -0,0 +1,74 @@ +require_relative "setup.rb" + +class NanoparticleTest < MiniTest::Test + include OpenTox::Validation + + def setup + @training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first + unless @training_dataset + Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm") + @training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first + end + @prediction_feature = @training_dataset.features.select{|f| f["name"] == 'log2(Net cell association)'}.first + end + + def test_validate_default_nanoparticle_model + model = Model::Lazar.create training_dataset: @training_dataset, prediction_feature: @prediction_feature + cv = CrossValidation.create model + p cv.rmse + p cv.r_squared + #File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot} + refute_nil cv.r_squared + refute_nil cv.rmse + end + + def test_validate_pls_nanoparticle_model + algorithms = { + :descriptors => { :types => ["P-CHEM"] }, + :prediction => {:parameters => 'pls' }, + } + model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms + assert_equal "pls", model.algorithms[:prediction][:parameters] + assert_equal "Algorithm::Caret.regression", model.algorithms[:prediction][:method] + cv = CrossValidation.create model + p cv.rmse + p cv.r_squared + refute_nil cv.r_squared + refute_nil cv.rmse + end + + def test_validate_proteomics_pls_nanoparticle_model + algorithms = { + :descriptors => { :types => ["Proteomics"] }, + :prediction => { :parameters => 'pls' } + } + model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms + assert_equal "pls", model.algorithms[:prediction][:parameters] + assert_equal "Algorithm::Caret.regression", model.algorithms[:prediction][:method] + cv = CrossValidation.create model + p cv.rmse + p cv.r_squared + refute_nil cv.r_squared + refute_nil cv.rmse + end + + def test_validate_all_default_nanoparticle_model + algorithms = { + :descriptors => { + :types => ["Proteomics","P-CHEM"] + }, + } + model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms + cv = CrossValidation.create model + p cv.rmse + p cv.r_squared + refute_nil cv.r_squared + refute_nil cv.rmse + end + + + def test_import_ld + skip # Ambit JSON-LD export defunct + dataset_ids = Import::Enanomapper.import_ld + end +end -- cgit v1.2.3