From 85f2308c101b4778508c2d767e08af4cfd671b7b Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 2 Jun 2016 12:22:39 +0200 Subject: local pls regression for nanoparticles --- test/nanoparticles.rb | 46 ++++++++++++---------------------------------- test/setup.rb | 4 ++-- 2 files changed, 14 insertions(+), 36 deletions(-) (limited to 'test') diff --git a/test/nanoparticles.rb b/test/nanoparticles.rb index a2c77b5..b6a2f00 100644 --- a/test/nanoparticles.rb +++ b/test/nanoparticles.rb @@ -5,6 +5,7 @@ class NanoparticleTest < MiniTest::Test include OpenTox::Validation def setup + # TODO: multiple runs create duplicates #Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm") end @@ -35,39 +36,13 @@ class NanoparticleTest < MiniTest::Test # TODO move to validation-statistics def test_inspect_cv - skip cv = CrossValidation.all.sort_by{|cv| cv.created_at}.last cv.correlation_plot_id = nil File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot} - #p cv -=begin - #File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot} - cv.predictions.sort_by{|sid,p| -(p["value"] - p["measurements"].median).abs}[0,5].each do |sid,p| - s = Substance.find(sid) - puts - p s.name - p([p["value"],p["measurements"],(p["value"]-p["measured"].median).abs]) - neighbors = s.physchem_neighbors dataset_id: cv.model.training_dataset_id, prediction_feature_id: cv.model.prediction_feature_id, type: nil - neighbors.each do |n| - neighbor = Substance.find(n["_id"]) - p "==" - p neighbor.name, n["similarity"], n["measurements"] - p neighbor.core["name"] - p neighbor.coating.collect{|c| c["name"]} - n["common_descriptors"].each do |id| - f = Feature.find(id) - print "#{f.name} #{f.conditions["MEDIUM"]}" - print ", " - end - puts - end - - end -=end + p cv.statistics end def test_inspect_worst_prediction - skip -# TODO check/fix single/double neighbor prediction + cv = CrossValidation.all.sort_by{|cv| cv.created_at}.last worst_predictions = cv.worst_predictions(n: 3,show_neigbors: false) assert_equal 3, worst_predictions.size @@ -100,15 +75,18 @@ class NanoparticleTest < MiniTest::Test refute_nil cv.r_squared refute_nil cv.rmse end - def test_validate_pls_model - skip training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles") - feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)") - model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression", :neighbor_algorithm => "physchem_neighbors"}) - cv = Validation::RegressionCrossValidation.create model + #feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)") + feature = Feature.find_or_create_by(name: "Log2 transformed", category: "TOX") + + model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression", :neighbor_algorithm => "physchem_neighbors", :neighbor_algorithm_parameters => {:min_sim => 0.5}}) + cv = RegressionCrossValidation.create model p cv - File.open("tmp.png","w+"){|f| f.puts cv.correlation_plot} + #p cv.predictions.sort_by{|sid,p| (p["value"] - p["measurements"].median).abs} + p cv.rmse + p cv.r_squared + File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot} refute_nil cv.r_squared refute_nil cv.rmse end diff --git a/test/setup.rb b/test/setup.rb index e7c32b4..6c97282 100644 --- a/test/setup.rb +++ b/test/setup.rb @@ -5,5 +5,5 @@ require_relative '../lib/lazar.rb' include OpenTox TEST_DIR ||= File.expand_path(File.dirname(__FILE__)) DATA_DIR ||= File.join(TEST_DIR,"data") -$mongo.database.drop -$gridfs = $mongo.database.fs +#$mongo.database.drop +#$gridfs = $mongo.database.fs -- cgit v1.2.3