From 91787edb3682900bc5a2feeca66e5142f387fcc6 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 7 Oct 2016 10:25:58 +0200 Subject: unified interface for prediction algorithms --- test/nanoparticles.rb | 129 +++++++------------------------------------------- 1 file changed, 17 insertions(+), 112 deletions(-) (limited to 'test') diff --git a/test/nanoparticles.rb b/test/nanoparticles.rb index 9b2d2d9..074a429 100644 --- a/test/nanoparticles.rb +++ b/test/nanoparticles.rb @@ -14,57 +14,18 @@ class NanoparticleTest < MiniTest::Test end def test_create_model - model = Model::Lazar.create training_dataset: @training_dataset + model = Model::Lazar.create training_dataset: @training_dataset, prediction_feature: @prediction_feature nanoparticle = @training_dataset.nanoparticles[-34] prediction = model.predict nanoparticle - p prediction refute_nil prediction[:value] assert_includes nanoparticle.dataset_ids, @training_dataset.id + asser_true @prediction_feature.measured model.delete end - def test_inspect_cv - skip - cv = CrossValidation.all.sort_by{|cv| cv.created_at}.last - #p cv - #p cv.id - #cv.correlation_plot_id = nil - File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot} - #p cv.statistics - #p cv.model.@training_dataset.substances.first.physchem_descriptors.keys.collect{|d| Feature.find(d).name} - CrossValidation.all.sort_by{|cv| cv.created_at}.reverse.each do |cv| - p cv.name - p cv.created_at - begin - p cv.r_squared - rescue - end - end - end - def test_inspect_worst_prediction - skip - - cv = CrossValidation.all.sort_by{|cv| cv.created_at}.last - worst_predictions = cv.worst_predictions(n: 3,show_neigbors: false) - assert_equal 3, worst_predictions.size - assert_kind_of Integer, worst_predictions.first[:neighbors] - worst_predictions = cv.worst_predictions - assert_equal 5, worst_predictions.size - assert_kind_of Array, worst_predictions.first[:neighbors] - assert_kind_of Integer, worst_predictions.first[:neighbors].first[:common_descriptors] - puts worst_predictions.to_yaml - worst_predictions = cv.worst_predictions(n: 2, show_common_descriptors: true) - #puts worst_predictions.to_yaml - assert_equal 2, worst_predictions.size - assert_kind_of Array, worst_predictions.first[:neighbors] - refute_nil worst_predictions.first[:neighbors].first[:common_descriptors] - #p cv.model.training_dataset.features - end - - def test_validate_model - algorithms = { :prediction => {:method => "Algorithm::Regression.weighted_average" } } - model = Model::Lazar.create training_dataset: @training_dataset - cv = RegressionCrossValidation.create model + def test_validate_default_nanoparticle_model + model = Model::Lazar.create training_dataset: @training_dataset, prediction_feature: @prediction_feature + cv = CrossValidation.create model p cv.rmse p cv.r_squared #File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot} @@ -72,62 +33,42 @@ class NanoparticleTest < MiniTest::Test refute_nil cv.rmse end - def test_validate_pls_model + def test_validate_pls_nanoparticle_model algorithms = { - :descriptors => { - :method => "properties", - :types => ["P-CHEM"] - }, - :prediction => {:method => "Algorithm::Caret.regression", :parameters => 'pls' }, + :descriptors => { :types => ["P-CHEM"] }, + :prediction => {:parameters => 'pls' }, } model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms - cv = RegressionCrossValidation.create model + assert_equal "pls", model.algorithms[:prediction][:method] + cv = CrossValidation.create model p cv.rmse p cv.r_squared refute_nil cv.r_squared refute_nil cv.rmse end - def test_validate_random_forest_model + def test_validate_proteomics_pls_nanoparticle_model algorithms = { - :descriptors => { - :method => "properties", - :types => ["P-CHEM"] - }, - :prediction => {:method => "Algorithm::Caret.regression", :parameters => 'rf' } + :descriptors => { :types => ["Proteomics"] }, + :prediction => { :parameters => 'pls' } } model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms - cv = RegressionCrossValidation.create model + assert_equal "pls", model.algorithms[:prediction][:method] + cv = CrossValidation.create model p cv.rmse p cv.r_squared refute_nil cv.r_squared refute_nil cv.rmse end - def test_validate_proteomics_pls_model - algorithms = { - :descriptors => { - :method => "properties", - :types => ["Proteomics"] - }, - :prediction => {:method => "Algorithm::Caret.regression", :parameters => 'rf' } - } - model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms - cv = RegressionCrossValidation.create model - p cv.rmse - p cv.r_squared - refute_nil cv.r_squared - refute_nil cv.rmse - end - - def test_validate_all_default_model + def test_validate_all_default_nanoparticle_model algorithms = { :descriptors => { :types => ["Proteomics","P-CHEM"] }, } model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms - cv = RegressionCrossValidation.create model + cv = CrossValidation.create model p cv.rmse p cv.r_squared refute_nil cv.r_squared @@ -141,42 +82,6 @@ class NanoparticleTest < MiniTest::Test end end - def test_summaries - skip - datasets = Dataset.all - datasets = datasets.select{|d| !d.name.nil?} - datasets.each do |d| - - #p d.features.select{|f| f.name.match (/Total/)} - #p d.features.collect{|f| "#{f.name} #{f.unit} #{f.conditions}"} - p d.features.uniq.collect{|f| f.name} - end - assert_equal 9, datasets.size -=begin - features = Feature.all.to_a - #p features.collect do |f| - #f if f.category == "TOX" - #end.to_a.flatten.size - toxcounts = {} - pccounts = {} - Nanoparticle.all.each do |np| - np.measurements.each do |t,v| - toxcounts[t] ||= 0 - toxcounts[t] += 1#v.uniq.size - end - np.physchem_descriptors.each do |t,v| - pccounts[t] ||= 0 - pccounts[t] += 1#v.uniq.size - end - end - #puts counts.keys.collect{|i| Feature.find(i)}.to_yaml - #pccounts.each{|e,n| p Feature.find(e),n if n > 100} - #p toxcounts.collect{|e,n| Feature.find(e).name if n > 1}.uniq - toxcounts.each{|e,n| p Feature.find(e),n if n > 100} -=end - end - - def test_import_ld skip dataset_ids = Import::Enanomapper.import_ld -- cgit v1.2.3