unified interface for prediction algorithms

author: Christoph Helma <helma@in-silico.ch> 2016-10-07 10:25:58 +0200
committer: Christoph Helma <helma@in-silico.ch> 2016-10-07 10:25:58 +0200
commit: 91787edb3682900bc5a2feeca66e5142f387fcc6 (patch)
tree: fcb189bb8e84c9b3727b29f0846a5f820b596aee /test
parent: 4348eec89033e6677c9f628646fc67bd03c73fe6 (diff)
1 files changed, 17 insertions, 112 deletions
diff --git a/test/nanoparticles.rb b/test/nanoparticles.rb
index 9b2d2d9..074a429 100644
--- a/test/nanoparticles.rb
+++ b/test/nanoparticles.rb
@@ -14,57 +14,18 @@ class NanoparticleTest  < MiniTest::Test
   end
 
   def test_create_model
-    model = Model::Lazar.create training_dataset: @training_dataset
+    model = Model::Lazar.create training_dataset: @training_dataset, prediction_feature: @prediction_feature
     nanoparticle = @training_dataset.nanoparticles[-34]
     prediction = model.predict nanoparticle
-    p prediction
     refute_nil prediction[:value]
     assert_includes nanoparticle.dataset_ids, @training_dataset.id
+    asser_true @prediction_feature.measured
     model.delete
   end
 
-  def test_inspect_cv
-    skip
-    cv = CrossValidation.all.sort_by{|cv| cv.created_at}.last
-    #p cv
-    #p cv.id
-    #cv.correlation_plot_id = nil
-    File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot}
-    #p cv.statistics
-    #p cv.model.@training_dataset.substances.first.physchem_descriptors.keys.collect{|d| Feature.find(d).name}
-    CrossValidation.all.sort_by{|cv| cv.created_at}.reverse.each do |cv|
-      p cv.name
-      p cv.created_at
-      begin
-      p cv.r_squared
-      rescue
-      end
-    end
-  end
-  def test_inspect_worst_prediction
-    skip
-  
-    cv = CrossValidation.all.sort_by{|cv| cv.created_at}.last
-    worst_predictions = cv.worst_predictions(n: 3,show_neigbors: false)
-    assert_equal 3, worst_predictions.size
-    assert_kind_of Integer, worst_predictions.first[:neighbors]
-    worst_predictions = cv.worst_predictions
-    assert_equal 5, worst_predictions.size
-    assert_kind_of Array, worst_predictions.first[:neighbors]
-    assert_kind_of Integer, worst_predictions.first[:neighbors].first[:common_descriptors]
-    puts worst_predictions.to_yaml
-    worst_predictions = cv.worst_predictions(n: 2, show_common_descriptors: true)
-    #puts worst_predictions.to_yaml
-    assert_equal 2, worst_predictions.size
-    assert_kind_of Array, worst_predictions.first[:neighbors]
-    refute_nil worst_predictions.first[:neighbors].first[:common_descriptors]
-    #p cv.model.training_dataset.features
-  end
-
-  def test_validate_model
-    algorithms = { :prediction => {:method => "Algorithm::Regression.weighted_average" } }
-    model = Model::Lazar.create training_dataset: @training_dataset
-    cv = RegressionCrossValidation.create model
+  def test_validate_default_nanoparticle_model
+    model = Model::Lazar.create training_dataset: @training_dataset, prediction_feature: @prediction_feature
+    cv = CrossValidation.create model
     p cv.rmse
     p cv.r_squared
     #File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot}
@@ -72,62 +33,42 @@ class NanoparticleTest  < MiniTest::Test
     refute_nil cv.rmse
   end
 
-  def test_validate_pls_model
+  def test_validate_pls_nanoparticle_model
     algorithms = {
-      :descriptors => {
-        :method => "properties",
-        :types => ["P-CHEM"]
-      },
-      :prediction => {:method => "Algorithm::Caret.regression", :parameters => 'pls' },
+      :descriptors => { :types => ["P-CHEM"] },
+      :prediction => {:parameters => 'pls' },
     }
     model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
-    cv = RegressionCrossValidation.create model
+    assert_equal "pls", model.algorithms[:prediction][:method]
+    cv = CrossValidation.create model
     p cv.rmse
     p cv.r_squared
     refute_nil cv.r_squared
     refute_nil cv.rmse
   end
 
-  def test_validate_random_forest_model
+  def test_validate_proteomics_pls_nanoparticle_model
     algorithms = {
-      :descriptors => {
-        :method => "properties",
-        :types => ["P-CHEM"]
-      },
-      :prediction => {:method => "Algorithm::Caret.regression", :parameters => 'rf' }
+      :descriptors => { :types => ["Proteomics"] },
+      :prediction => { :parameters => 'pls' }
     }
     model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
-    cv = RegressionCrossValidation.create model
+    assert_equal "pls", model.algorithms[:prediction][:method]
+    cv = CrossValidation.create model
     p cv.rmse
     p cv.r_squared
     refute_nil cv.r_squared
     refute_nil cv.rmse
   end
 
-  def test_validate_proteomics_pls_model
-    algorithms = {
-      :descriptors => {
-        :method => "properties",
-        :types => ["Proteomics"]
-      },
-      :prediction => {:method => "Algorithm::Caret.regression", :parameters => 'rf' }
-    }
-    model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
-    cv = RegressionCrossValidation.create model
-    p cv.rmse
-    p cv.r_squared
-    refute_nil cv.r_squared
-    refute_nil cv.rmse
-  end
-
-  def test_validate_all_default_model
+  def test_validate_all_default_nanoparticle_model
     algorithms = {
       :descriptors => {
         :types => ["Proteomics","P-CHEM"]
       },
     }
     model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
-    cv = RegressionCrossValidation.create model
+    cv = CrossValidation.create model
     p cv.rmse
     p cv.r_squared
     refute_nil cv.r_squared
@@ -141,42 +82,6 @@ class NanoparticleTest  < MiniTest::Test
     end
   end
 
-  def test_summaries
-    skip
-    datasets = Dataset.all
-    datasets = datasets.select{|d| !d.name.nil?}
-    datasets.each do |d|
-      
-      #p d.features.select{|f| f.name.match (/Total/)}
-      #p d.features.collect{|f| "#{f.name} #{f.unit} #{f.conditions}"}
-      p d.features.uniq.collect{|f| f.name}
-    end
-    assert_equal 9, datasets.size
-=begin
-    features = Feature.all.to_a
-    #p features.collect do |f|
-      #f if f.category == "TOX"
-    #end.to_a.flatten.size
-    toxcounts = {}
-    pccounts = {}
-    Nanoparticle.all.each do |np|
-      np.measurements.each do |t,v|
-        toxcounts[t] ||= 0
-        toxcounts[t] += 1#v.uniq.size
-      end
-      np.physchem_descriptors.each do |t,v|
-        pccounts[t] ||= 0
-        pccounts[t] += 1#v.uniq.size
-      end
-    end
-    #puts counts.keys.collect{|i| Feature.find(i)}.to_yaml
-    #pccounts.each{|e,n| p Feature.find(e),n if n > 100}
-    #p toxcounts.collect{|e,n| Feature.find(e).name if n > 1}.uniq
-    toxcounts.each{|e,n| p Feature.find(e),n if n > 100}
-=end
-  end
-
-
   def test_import_ld
     skip
     dataset_ids = Import::Enanomapper.import_ld
author	Christoph Helma <helma@in-silico.ch>	2016-10-07 10:25:58 +0200
committer	Christoph Helma <helma@in-silico.ch>	2016-10-07 10:25:58 +0200
commit	91787edb3682900bc5a2feeca66e5142f387fcc6 (patch)
tree	fcb189bb8e84c9b3727b29f0846a5f820b596aee /test
parent	4348eec89033e6677c9f628646fc67bd03c73fe6 (diff)