summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-05-31 18:08:08 +0200
committerChristoph Helma <helma@in-silico.ch>2016-05-31 18:08:08 +0200
commitb515a0cfedb887a2af753db6e4a08ae1af430cad (patch)
tree5d69d89d0031d581e932272aeb741ee38a0106d6 /test
parentf46ba3b7262f5b551c81fc9396c5b7f0cac7f030 (diff)
cleanup of validation modules/classes
Diffstat (limited to 'test')
-rw-r--r--test/classification.rb2
-rw-r--r--test/nanoparticles.rb70
-rw-r--r--test/setup.rb4
-rw-r--r--test/validation.rb5
4 files changed, 68 insertions, 13 deletions
diff --git a/test/classification.rb b/test/classification.rb
index df7cba9..9104022 100644
--- a/test/classification.rb
+++ b/test/classification.rb
@@ -20,7 +20,7 @@ class LazarClassificationTest < MiniTest::Test
compound = Compound.from_smiles "CCO"
prediction = model.predict compound
assert_equal "true", prediction[:value]
- assert_equal ["false"], prediction[:database_activities]
+ assert_equal ["false"], prediction[:measurements]
# make a dataset prediction
compound_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv")
diff --git a/test/nanoparticles.rb b/test/nanoparticles.rb
index 1cd1ff0..f0ded2f 100644
--- a/test/nanoparticles.rb
+++ b/test/nanoparticles.rb
@@ -11,7 +11,7 @@ class NanoparticleTest < MiniTest::Test
def test_create_model_with_feature_selection
training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles")
feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)")
- model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "nanoparticle_neighbors", :feature_selection_algorithm => "correlation_filter"})
+ model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "physchem_neighbors", :feature_selection_algorithm => "correlation_filter"})
nanoparticle = training_dataset.nanoparticles[-34]
#p nanoparticle.neighbors
prediction = model.predict nanoparticle
@@ -23,7 +23,7 @@ class NanoparticleTest < MiniTest::Test
def test_create_model
training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles")
feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)")
- model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "nanoparticle_neighbors"})
+ model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "physchem_neighbors"})
nanoparticle = training_dataset.nanoparticles[-34]
prediction = model.predict nanoparticle
refute_nil prediction[:value]
@@ -31,13 +31,67 @@ class NanoparticleTest < MiniTest::Test
model.delete
end
+ # TODO move to validation-statistics
+ def test_inspect_cv
+ cv = CrossValidation.all.sort_by{|cv| cv.created_at}.last
+ cv.correlation_plot_id = nil
+ File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot}
+ #p cv
+=begin
+ #File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot}
+ cv.predictions.sort_by{|sid,p| -(p["value"] - p["measurements"].median).abs}[0,5].each do |sid,p|
+ s = Substance.find(sid)
+ puts
+ p s.name
+ p([p["value"],p["measurements"],(p["value"]-p["measured"].median).abs])
+ neighbors = s.physchem_neighbors dataset_id: cv.model.training_dataset_id, prediction_feature_id: cv.model.prediction_feature_id, type: nil
+ neighbors.each do |n|
+ neighbor = Substance.find(n["_id"])
+ p "=="
+ p neighbor.name, n["similarity"], n["measurements"]
+ p neighbor.core["name"]
+ p neighbor.coating.collect{|c| c["name"]}
+ n["common_descriptors"].each do |id|
+ f = Feature.find(id)
+ print "#{f.name} #{f.conditions["MEDIUM"]}"
+ print ", "
+ end
+ puts
+ end
+
+ end
+=end
+ end
+ def test_inspect_worst_prediction
+# TODO check/fix single/double neighbor prediction
+ cv = CrossValidation.all.sort_by{|cv| cv.created_at}.last
+ worst_predictions = cv.worst_predictions(n: 3,show_neigbors: false)
+ assert_equal 3, worst_predictions.size
+ assert_kind_of Integer, worst_predictions.first[:neighbors]
+ worst_predictions = cv.worst_predictions
+ #puts worst_predictions.to_yaml
+ assert_equal 5, worst_predictions.size
+ assert_kind_of Array, worst_predictions.first[:neighbors]
+ assert_kind_of Integer, worst_predictions.first[:neighbors].first[:common_descriptors]
+ worst_predictions = cv.worst_predictions(n: 2, show_common_descriptors: true)
+ puts worst_predictions.to_yaml
+ assert_equal 2, worst_predictions.size
+ assert_kind_of Array, worst_predictions.first[:neighbors]
+ refute_nil worst_predictions.first[:neighbors].first[:common_descriptors]
+ #p cv.model.training_dataset.features
+ end
+
def test_validate_model
training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles")
- feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)")
- model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "nanoparticle_neighbors", :neighbor_algorithm_parameters => {:min_sim => 0.5}})
+ #feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)")
+ feature = Feature.find_or_create_by(name: "Log2 transformed", category: "TOX")
+
+ model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "physchem_neighbors", :neighbor_algorithm_parameters => {:min_sim => 0.5}})
cv = RegressionCrossValidation.create model
- p cv
- File.open("tmp.png","w+"){|f| f.puts cv.correlation_plot}
+ p cv.predictions.sort_by{|sid,p| (p["value"] - p["measurements"].median).abs}
+ p cv.rmse
+ p cv.r_squared
+ File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot}
refute_nil cv.r_squared
refute_nil cv.rmse
end
@@ -45,7 +99,7 @@ class NanoparticleTest < MiniTest::Test
def test_validate_pls_model
training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles")
feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)")
- model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression", :neighbor_algorithm => "nanoparticle_neighbors"})
+ model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression", :neighbor_algorithm => "physchem_neighbors"})
cv = RegressionCrossValidation.create model
p cv
File.open("tmp.png","w+"){|f| f.puts cv.correlation_plot}
@@ -79,7 +133,7 @@ class NanoparticleTest < MiniTest::Test
toxcounts = {}
pccounts = {}
Nanoparticle.all.each do |np|
- np.toxicities.each do |t,v|
+ np.measurements.each do |t,v|
toxcounts[t] ||= 0
toxcounts[t] += 1#v.uniq.size
end
diff --git a/test/setup.rb b/test/setup.rb
index 6c97282..e7c32b4 100644
--- a/test/setup.rb
+++ b/test/setup.rb
@@ -5,5 +5,5 @@ require_relative '../lib/lazar.rb'
include OpenTox
TEST_DIR ||= File.expand_path(File.dirname(__FILE__))
DATA_DIR ||= File.join(TEST_DIR,"data")
-#$mongo.database.drop
-#$gridfs = $mongo.database.fs
+$mongo.database.drop
+$gridfs = $mongo.database.fs
diff --git a/test/validation.rb b/test/validation.rb
index 39314da..a259472 100644
--- a/test/validation.rb
+++ b/test/validation.rb
@@ -1,6 +1,7 @@
require_relative "setup.rb"
class ValidationTest < MiniTest::Test
+ include OpenTox::Validation
# defaults
@@ -86,7 +87,7 @@ class ValidationTest < MiniTest::Test
def test_classification_loo_validation
dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
model = Model::LazarClassification.create dataset.features.first, dataset
- loo = ClassificationLeaveOneOutValidation.create model
+ loo = ClassificationLeaveOneOut.create model
assert_equal 14, loo.nr_unpredicted
refute_empty loo.confusion_matrix
assert loo.accuracy > 0.77
@@ -96,7 +97,7 @@ class ValidationTest < MiniTest::Test
def test_regression_loo_validation
dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
model = Model::LazarRegression.create dataset.features.first, dataset
- loo = RegressionLeaveOneOutValidation.create model
+ loo = RegressionLeaveOneOut.create model
assert loo.r_squared > 0.34, "R^2 (#{loo.r_squared}) should be larger than 0.034"
end