summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2015-09-23 14:51:41 +0200
committerChristoph Helma <helma@in-silico.ch>2015-09-23 14:51:41 +0200
commitd5bf97c2cb999539c56bf59aa1d7d3286745be84 (patch)
tree91d5ab3fd9641c7349d45356d43aef867e4bee92 /test
parent259cd085e053193b4c166495ae1af35cfa94bcf6 (diff)
validations fixed (all models were executed with default parameters)
Diffstat (limited to 'test')
-rw-r--r--test/compound.rb12
-rw-r--r--test/dataset.rb10
-rw-r--r--test/experiment.rb63
-rw-r--r--test/lazar-physchem-short.rb34
-rw-r--r--test/lazar-regression.rb10
-rw-r--r--test/validation.rb66
6 files changed, 167 insertions, 28 deletions
diff --git a/test/compound.rb b/test/compound.rb
index 6a3c696..b33a643 100644
--- a/test/compound.rb
+++ b/test/compound.rb
@@ -134,4 +134,16 @@ print c.sdf
end
end
end
+
+ def test_mna
+ c = OpenTox::Compound.from_smiles "N#[N+]C1=CC=CC=C1.F[B-](F)(F)F"
+ p c.mna 4
+ end
+
+ def test_mpd
+ c = OpenTox::Compound.from_smiles "N#[N+]C1=CC=CC=C1.F[B-](F)(F)F"
+ assert 13, c.mpd.size
+ assert 7, c.mpd.uniq.size
+ assert_equal c.mpd, c.openbabel_fingerprint("mpd")
+ end
end
diff --git a/test/dataset.rb b/test/dataset.rb
index 84be547..752073e 100644
--- a/test/dataset.rb
+++ b/test/dataset.rb
@@ -202,5 +202,15 @@ class DatasetTest < MiniTest::Test
assert_equal 0.00323, d2.data_entries[5][0]
end
+ def test_scaled_dataset
+ original_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv")
+ scaled_dataset = original_dataset.scale
+ scaled_dataset.data_entries.each_with_index do |row,i|
+ row.each_with_index do |value,j|
+ assert_equal original_dataset.data_entries[i][j].round(4), scaled_dataset.original_value(value,j).round(4) if value # ignore nils
+ end
+ end
+ end
+
end
diff --git a/test/experiment.rb b/test/experiment.rb
index 4b54768..76a0498 100644
--- a/test/experiment.rb
+++ b/test/experiment.rb
@@ -63,21 +63,26 @@ class ExperimentTest < MiniTest::Test
end
def test_regression_fingerprints
+=begin
datasets = [
- "LOAEL_mmol_corrected_smiles.csv"
+ "EPAFHM.medi.csv",
+ #"LOAEL_mmol_corrected_smiles.csv"
]
min_sims = [0.3,0.7]
- types = ["FP2","FP3","FP4","MACCS"]
+ #min_sims = [0.7]
+ #types = ["FP2","FP3","FP4","MACCS","mpd"]
+ types = ["mpd","FP3"]
experiment = Experiment.create(
- :name => "Fminer vs fingerprint classification for datasets #{datasets}.",
+ :name => "Fingerprint regression with different types for datasets #{datasets}.",
:dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
)
types.each do |type|
min_sims.each do |min_sim|
experiment.model_settings << {
- :algorithm => "OpenTox::Model::LazarRegression",
+ :model_algorithm => "OpenTox::Model::LazarRegression",
+ :prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average",
:neighbor_algorithm => "fingerprint_neighbors",
- :neighbor_algorithm_parameter => {
+ :neighbor_algorithm_parameters => {
:type => type,
:min_sim => min_sim,
}
@@ -85,7 +90,53 @@ class ExperimentTest < MiniTest::Test
end
end
experiment.run
- p experiment.report
+=end
+#=begin
+ experiment = Experiment.find '56029cb92b72ed673d000000'
+#=end
+ p experiment.id
+ experiment.results.each do |dataset,result|
+ result.each do |r|
+ params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters]
+ RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv|
+ cv.validation_ids.each do |vid|
+ model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters]
+ assert_equal params[:type], model_params[:type]
+ assert_equal params[:min_sim], model_params[:min_sim]
+ refute_equal params[:training_dataset_id], model_params[:training_dataset_id]
+ end
+ end
+ end
+ end
+ puts experiment.report.to_yaml
+ p experiment.summary
+ end
+ def test_mpd_fingerprints
+=begin
+ datasets = [
+ "EPAFHM.medi.csv",
+ ]
+ types = ["FP2","mpd"]
+ experiment = Experiment.create(
+ :name => "FP2 vs mpd fingerprint regression for datasets #{datasets}.",
+ :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
+ )
+ types.each do |type|
+ experiment.model_settings << {
+ :algorithm => "OpenTox::Model::LazarRegression",
+ :neighbor_algorithm => "fingerprint_neighbors",
+ :neighbor_algorithm_parameter => {
+ :type => type,
+ :min_sim => 0.7,
+ }
+ }
+ end
+ experiment.run
+ p experiment.id
+=end
+ experiment = Experiment.find '55ffd0c02b72ed123c000000'
+ p experiment
+ puts experiment.report.to_yaml
end
end
diff --git a/test/lazar-physchem-short.rb b/test/lazar-physchem-short.rb
index e74a4b9..59d8112 100644
--- a/test/lazar-physchem-short.rb
+++ b/test/lazar-physchem-short.rb
@@ -2,27 +2,29 @@ require_relative "setup.rb"
class LazarPhyschemDescriptorTest < MiniTest::Test
def test_epafhm
- skip "Physchem Regression not yet implemented."
- # check available descriptors
- @descriptors = OpenTox::Algorithm::Descriptor::DESCRIPTORS.keys
- assert_equal 111,@descriptors.size,"wrong number of physchem descriptors"
- @descriptor_values = OpenTox::Algorithm::Descriptor::DESCRIPTOR_VALUES
- # select descriptors for test
- @num_features_offset = 0
- @descriptors.keep_if{|x| x=~/^Openbabel\./}
- @descriptors.delete("Openbabel.L5") # TODO Openbabel.L5 does not work, investigate!!!
- puts "Descriptors: #{@descriptors}"
+ @descriptors = OpenTox::Algorithm::Descriptor::OBDESCRIPTORS.keys
+ refute_empty @descriptors
# UPLOAD DATA
training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv")
- puts "Dataset: "+training_dataset.id
-# feature_dataset = Algorithm::Descriptor.physchem training_dataset, @descriptors
- model = Model::LazarRegression.create training_dataset#, feature_dataset
- #p model
+ feature_dataset = Algorithm::Descriptor.physchem training_dataset, @descriptors
+ scaled_feature_dataset = feature_dataset.scale
+ model = Model::LazarRegression.create training_dataset
+ model.neighbor_algorithm = "physchem_neighbors"
+ model.neighbor_algorithm_parameters = {
+ :feature_calculation_algorithm => "OpenTox::Algorithm::Descriptor.physchem",
+ :descriptors => @descriptors,
+ :feature_dataset_id => scaled_feature_dataset.id,
+ :min_sim => 0.3
+ }
+ model.save
compound = Compound.from_smiles "CC(C)(C)CN"
prediction = model.predict compound
- p prediction
-
+ refute_nil prediction[:value]
+ refute_nil prediction[:confidence]
+ prediction[:neighbors].each do |line|
+ assert_operator line[1], :>, 0.3
+ end
end
end
diff --git a/test/lazar-regression.rb b/test/lazar-regression.rb
index cc7f356..8b2d473 100644
--- a/test/lazar-regression.rb
+++ b/test/lazar-regression.rb
@@ -13,6 +13,16 @@ class LazarRegressionTest < MiniTest::Test
assert_equal 1, prediction[:neighbors].size
end
+ def test_mpd_fingerprints
+ training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv"
+ model = Model::LazarRegression.create training_dataset
+ model.neighbor_algorithm_parameters[:type] = "mpd"
+ compound = Compound.from_smiles "CCCSCCSCC"
+ prediction = model.predict compound
+ assert_equal 0.04, prediction[:value].round(2)
+ assert_equal 1, prediction[:neighbors].size
+ end
+
def test_local_linear_regression
skip
training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv"
diff --git a/test/validation.rb b/test/validation.rb
index dfa2c81..9717ccc 100644
--- a/test/validation.rb
+++ b/test/validation.rb
@@ -20,10 +20,25 @@ class ValidationTest < MiniTest::Test
end
def test_regression_crossvalidation
- #dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv"
- dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.csv"
- model = Model::LazarRegression.create dataset
+ dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv"
+ #dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.csv"
+ params = {
+ :prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average",
+ :neighbor_algorithm => "fingerprint_neighbors",
+ :neighbor_algorithm_parameters => {
+ :type => "MACCS",
+ :min_sim => 0.7,
+ }
+ }
+ model = Model::LazarRegression.create dataset, params
cv = RegressionCrossValidation.create model
+ cv.validation_ids.each do |vid|
+ model = Model::Lazar.find(Validation.find(vid).model_id)
+ assert_equal params[:neighbor_algorithm_parameters][:type], model[:neighbor_algorithm_parameters][:type]
+ assert_equal params[:neighbor_algorithm_parameters][:min_sim], model[:neighbor_algorithm_parameters][:min_sim]
+ refute_equal params[:neighbor_algorithm_parameters][:training_dataset_id], model[:neighbor_algorithm_parameters][:training_dataset_id]
+ end
+
#`inkview #{cv.plot}`
#puts JSON.pretty_generate(cv.misclassifications)#.collect{|l| l.join ", "}.join "\n"
#`inkview #{cv.plot}`
@@ -37,12 +52,51 @@ class ValidationTest < MiniTest::Test
dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
model = Model::LazarClassification.create dataset
repeated_cv = RepeatedCrossValidation.create model
- p repeated_cv
repeated_cv.crossvalidations.each do |cv|
- p cv
- p cv.accuracy
assert cv.accuracy > 0.7
end
end
+ def test_crossvalidation_parameters
+ dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
+ params = {
+ :neighbor_algorithm_parameters => {
+ :min_sim => 0.3,
+ :type => "FP3"
+ }
+ }
+ model = Model::LazarClassification.create dataset, params
+ model.save
+ cv = ClassificationCrossValidation.create model
+ params = model.neighbor_algorithm_parameters
+ params = Hash[params.map{ |k, v| [k.to_s, v] }] # convert symbols to string
+ cv.validations.each do |validation|
+ assert_equal params, validation.model.neighbor_algorithm_parameters
+ end
+ end
+
+ def test_physchem_regression_crossvalidation
+
+ @descriptors = OpenTox::Algorithm::Descriptor::OBDESCRIPTORS.keys
+ refute_empty @descriptors
+
+ # UPLOAD DATA
+ training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv")
+ feature_dataset = Algorithm::Descriptor.physchem training_dataset, @descriptors
+ feature_dataset.save
+ scaled_feature_dataset = feature_dataset.scale
+ scaled_feature_dataset.save
+ model = Model::LazarRegression.create training_dataset
+ model.neighbor_algorithm = "physchem_neighbors"
+ model.neighbor_algorithm_parameters = {
+ :feature_calculation_algorithm => "OpenTox::Algorithm::Descriptor.physchem",
+ :descriptors => @descriptors,
+ :feature_dataset_id => scaled_feature_dataset.id,
+ :min_sim => 0.3
+ }
+ model.save
+ cv = RegressionCrossValidation.create model
+ p cv
+ end
+
end