summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorhelma@in-silico.ch <helma@in-silico.ch>2018-10-10 21:39:11 +0200
committerhelma@in-silico.ch <helma@in-silico.ch>2018-10-10 21:39:11 +0200
commit8b31acab67e22f30a87c995a94f1ee1e2a3d510f (patch)
tree8313b75ec509f4bfcb5abfff5237c00c1a134113 /test
parente1eeac0711af6a5e6139610d3ab4dc100beb0fa6 (diff)
dataset tests fixed
Diffstat (limited to 'test')
-rw-r--r--test/dataset.rb27
-rw-r--r--test/experiment.rb301
2 files changed, 14 insertions, 314 deletions
diff --git a/test/dataset.rb b/test/dataset.rb
index 4196fd8..2b439bb 100644
--- a/test/dataset.rb
+++ b/test/dataset.rb
@@ -24,10 +24,10 @@ class DatasetTest < MiniTest::Test
# real datasets
def test_import_pubchem
- d = Dataset.from_pubchem 1191
+ d = Dataset.from_pubchem_aid 1191
assert_equal 87, d.compounds.size
assert_equal 2, d.features.size
- assert_equal "Active", d.values(d.compounds[10],d.features[1])
+ assert_equal ["Active"], d.values(d.compounds[10],d.features[1])
# TODO endpoint name
# TODO regression import
end
@@ -37,7 +37,7 @@ class DatasetTest < MiniTest::Test
assert_equal 53, d.compounds.size
assert_equal 1, d.features.size
f = d.features[0]
- assert_equal "input_53.csv.ID", f.name
+ assert_equal "input_53.ID", f.name
assert_equal OriginalId, f.class
assert_equal ["123-30-8"], d.values(d.compounds.first,f)
end
@@ -47,18 +47,18 @@ class DatasetTest < MiniTest::Test
assert_equal 53, d.compounds.size
assert_equal 1, d.features.size
f = d.features[0]
- assert_equal "input_53.tsv.ID", f.name
+ assert_equal "input_53.ID", f.name
assert_equal OriginalId, f.class
assert_equal ["123-30-8"], d.values(d.compounds.first,f)
end
def test_import_sdf
- #d = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf"
d = Dataset.from_sdf_file "#{DATA_DIR}/PA.sdf"
- assert_equal Compound.from_smiles("C[C@H]1C(=O)O[C@@H]2CCN3[C@@H]2C(=CC3)COC(=O)[C@]([C@]1(C)O)(C)O").smiles, d.compounds.first.smiles
- f = Feature.find_by(:name => "original_id")
assert_equal 35, d.features.size
- assert_equal ["9415"], d.values(d.compounds.first,f)
+ assert_kind_of NumericSubstanceProperty, d.features[1]
+ assert_equal NominalSubstanceProperty, d.features.last.class
+ assert_equal 602, d.compounds.size
+ assert_match "PUBCHEM_XLOGP3_AA", d.warnings.last
end
def test_import_hamster
@@ -66,7 +66,7 @@ class DatasetTest < MiniTest::Test
assert_equal Dataset, d.class
assert_equal 1, d.features.size
assert_equal 85, d.compounds.size
- assert_equal true, d.features.first.measured
+ assert_equal NominalBioActivity, d.features.first.class
csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.csv")
csv.shift
csv.each do |row|
@@ -104,7 +104,7 @@ class DatasetTest < MiniTest::Test
f = File.join DATA_DIR, "multi_cell_call.csv"
d = OpenTox::Dataset.from_csv_file f
csv = CSV.read f
- assert_equal true, d.features.first.nominal?
+ assert_equal NominalBioActivity, d.features.first.class
assert_equal 1056, d.compounds.size
assert_equal csv.first.size-1, d.features.size
errors.each do |smi|
@@ -157,7 +157,7 @@ class DatasetTest < MiniTest::Test
def test_create_without_features_smiles_and_inchi
["smiles", "inchi"].each do |type|
- d = Dataset.from_csv_file File.join(DATA_DIR,"batch_prediction_#{type}_small.csv"), true
+ d = Dataset.from_csv_file File.join(DATA_DIR,"batch_prediction_#{type}_small.csv")
assert_equal Dataset, d.class
refute_nil d.id
dataset = Dataset.find d.id
@@ -169,6 +169,7 @@ class DatasetTest < MiniTest::Test
# dataset operations
def test_merge
+ skip # TODO use new Features
source_feature = Feature.where(:name => "Ames test categorisation").first
target_feature = Feature.where(:name => "Mutagenicity").first
kazius = Dataset.from_sdf_file "#{DATA_DIR}/cas_4337.sdf"
@@ -177,10 +178,11 @@ class DatasetTest < MiniTest::Test
d = Dataset.merge [kazius,hansen,efsa], {source_feature => target_feature}, {1 => "mutagen", 0 => "nonmutagen"}
#File.open("tmp.csv","w+"){|f| f.puts d.to_csv}
assert_equal 8281, d.compounds.size
- assert_equal 4, d.features.size
c = Compound.from_smiles("C/C=C/C=O")
assert_equal ["mutagen"], d.values(c,target_feature)
assert_equal "/home/ist/lazar/test/data/cas_4337.sdf, /home/ist/lazar/test/data/hansen.csv, /home/ist/lazar/test/data/efsa.csv", d.source
+ p d.features
+ assert_equal 4, d.features.size
end
def test_folds
@@ -219,7 +221,6 @@ class DatasetTest < MiniTest::Test
c = Compound.from_smiles row.shift
serialized[c.inchi] = row
end
- #puts serialized.to_yaml
original.each do |inchi,row|
row.each_with_index do |v,i|
if v.numeric?
diff --git a/test/experiment.rb b/test/experiment.rb
deleted file mode 100644
index 418f7fe..0000000
--- a/test/experiment.rb
+++ /dev/null
@@ -1,301 +0,0 @@
-require_relative "setup.rb"
-
-class ExperimentTest < MiniTest::Test
-
- def test_regression_experiment
- skip
- datasets = [
- "EPAFHM.medi_log10.csv",
- #"EPAFHM.csv",
- #"FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv",
- "LOAEL_mmol_corrected_smiles.csv"
- ]
- experiment = Experiment.create(
- :name => "Default regression for datasets #{datasets}.",
- :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
- :model_settings => [
- {
- :algorithm => "OpenTox::Model::LazarRegression",
- }
- ]
- )
- #experiment.run
- puts experiment.report.to_yaml
- assert_equal datasets.size, experiment.results.size
- experiment.results.each do |dataset_id, result|
- assert_equal 1, result.size
- result.each do |r|
- assert_kind_of BSON::ObjectId, r[:model_id]
- assert_kind_of BSON::ObjectId, r[:repeated_crossvalidation_id]
- end
- end
- end
-
- def test_classification_experiment
-
- skip
- datasets = [ "hamster_carcinogenicity.csv" ]
- experiment = Experiment.create(
- :name => "Fminer vs fingerprint classification for datasets #{datasets}.",
- :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
- :model_settings => [
- {
- :algorithm => "OpenTox::Model::LazarClassification",
- },{
- :algorithm => "OpenTox::Model::LazarClassification",
- :neighbor_algorithm_parameter => {:min_sim => 0.3}
- },
- #{
- #:algorithm => "OpenTox::Model::LazarFminerClassification",
- #}
- ]
- )
- #experiment.run
-=begin
- experiment = Experiment.find "55f944a22b72ed7de2000000"
-=end
- puts experiment.report.to_yaml
- experiment.results.each do |dataset_id, result|
- assert_equal 2, result.size
- result.each do |r|
- assert_kind_of BSON::ObjectId, r[:model_id]
- assert_kind_of BSON::ObjectId, r[:repeated_crossvalidation_id]
- end
- end
- end
-
- def test_regression_fingerprints
- skip
-#=begin
- datasets = [
- "EPAFHM.medi_log10.csv",
- #"LOAEL_mmol_corrected_smiles.csv"
- ]
- min_sims = [0.3,0.7]
- #min_sims = [0.7]
- #types = ["FP2","FP3","FP4","MACCS","MP2D"]
- types = ["MP2D","FP3"]
- experiment = Experiment.create(
- :name => "Fingerprint regression with different types for datasets #{datasets}.",
- :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
- )
- types.each do |type|
- min_sims.each do |min_sim|
- experiment.model_settings << {
- :model_algorithm => "OpenTox::Model::LazarRegression",
- :prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average",
- :neighbor_algorithm => "fingerprint_neighbors",
- :neighbor_algorithm_parameters => {
- :type => type,
- :min_sim => min_sim,
- }
- }
- end
- end
- experiment.run
-#=end
-=begin
- experiment = Experiment.find '56029cb92b72ed673d000000'
-=end
- p experiment.id
- experiment.results.each do |dataset,result|
- result.each do |r|
- params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters]
- RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv|
- cv.validation_ids.each do |vid|
- model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters]
- assert_equal params[:type], model_params[:type]
- assert_equal params[:min_sim], model_params[:min_sim]
- refute_equal params[:training_dataset_id], model_params[:training_dataset_id]
- end
- end
- end
- end
- puts experiment.report.to_yaml
- p experiment.summary
- end
-
- def test_mpd_fingerprints
- skip
- datasets = [
- "EPAFHM.medi_log10.csv",
- ]
- types = ["FP2","MP2D"]
- experiment = Experiment.create(
- :name => "FP2 vs MP2D fingerprint regression for datasets #{datasets}.",
- :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
- )
- types.each do |type|
- experiment.model_settings << {
- :algorithm => "OpenTox::Model::LazarRegression",
- :neighbor_algorithm => "fingerprint_neighbors",
- :neighbor_algorithm_parameter => {
- :type => type,
- :min_sim => 0.7,
- }
- }
- end
- experiment.run
- p experiment.id
-=begin
-=end
- #experiment = Experiment.find '55ffd0c02b72ed123c000000'
- p experiment
- puts experiment.report.to_yaml
- end
-
- def test_multiple_datasets
- skip
- datasets = [
- "EPAFHM.medi_log10.csv",
- "LOAEL_mmol_corrected_smiles.csv"
- ]
- min_sims = [0.3]
- types = ["FP2"]
- experiment = Experiment.create(
- :name => "Fingerprint regression with mutiple datasets #{datasets}.",
- :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
- )
- types.each do |type|
- min_sims.each do |min_sim|
- experiment.model_settings << {
- :model_algorithm => "OpenTox::Model::LazarRegression",
- :prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average",
- :neighbor_algorithm => "fingerprint_neighbors",
- :neighbor_algorithm_parameters => {
- :type => type,
- :min_sim => min_sim,
- }
- }
- end
- end
- experiment.run
- p experiment.id
- experiment.results.each do |dataset,result|
- result.each do |r|
- params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters]
- RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv|
- cv.validation_ids.each do |vid|
- model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters]
- assert_equal params[:type], model_params[:type]
- assert_equal params[:min_sim], model_params[:min_sim]
- refute_equal params[:training_dataset_id], model_params[:training_dataset_id]
- end
- end
- end
- end
- puts experiment.report.to_yaml
- p experiment.summary
- end
-
- def test_mpd_mna_regression_fingerprints
- skip
- datasets = [
- "EPAFHM.medi.csv",
- #"hamster_carcinogenicity.csv"
- ]
- min_sims = [0.0,0.3]
- types = ["MP2D","MNA"]
- neighbor_algos = [
- "fingerprint_neighbors",
- "fingerprint_count_neighbors",
- ]
- experiment = Experiment.create(
- :name => "MNA vs MPD descriptors",
- :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
- )
- types.each do |type|
- min_sims.each do |min_sim|
- neighbor_algos.each do |neighbor_algo|
- experiment.model_settings << {
- :model_algorithm => "OpenTox::Model::LazarRegression",
- :prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average",
- :neighbor_algorithm => neighbor_algo,
- :neighbor_algorithm_parameters => {
- :type => type,
- :min_sim => min_sim,
- }
- }
- end
- end
- end
- experiment.run
-#=end
-=begin
- experiment = Experiment.find '56029cb92b72ed673d000000'
-=end
- p experiment.id
- puts experiment.report.to_yaml
- #p experiment.summary
- experiment.results.each do |dataset,result|
- result.each do |r|
- p r
- # TODO fix r["model_id"]
- params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters]
- RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv|
- cv.validation_ids.each do |vid|
- model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters]
- assert_equal params[:type], model_params[:type]
- assert_equal params[:min_sim], model_params[:min_sim]
- refute_equal params[:training_dataset_id], model_params[:training_dataset_id]
- end
- end
- end
- end
- end
-
- def test_mpd_mna_classification_fingerprints
- skip
- datasets = [
- #"EPAFHM.medi.csv",
- "hamster_carcinogenicity.csv"
- ]
- min_sims = [0.0,0.3]
- types = ["MP2D","MNA"]
- neighbor_algos = [
- "fingerprint_count_neighbors",
- "fingerprint_neighbors",
- ]
- experiment = Experiment.create(
- :name => "MNA vs MPD descriptors",
- :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
- )
- types.each do |type|
- min_sims.each do |min_sim|
- neighbor_algos.each do |neighbor_algo|
- experiment.model_settings << {
- :model_algorithm => "OpenTox::Model::LazarClassification",
- :prediction_algorithm => "OpenTox::Algorithm::Classification.weighted_majority_vote",
- :neighbor_algorithm => neighbor_algo,
- :neighbor_algorithm_parameters => {
- :type => type,
- :min_sim => min_sim,
- }
- }
- end
- end
- end
- experiment.run
-#=end
-=begin
- experiment = Experiment.find '56029cb92b72ed673d000000'
-=end
- p experiment.id
- puts experiment.report.to_yaml
- #p experiment.summary
- experiment.results.each do |dataset,result|
- result.each do |r|
- # TODO fix r["model_id"]
- params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters]
- RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv|
- cv.validation_ids.each do |vid|
- model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters]
- assert_equal params[:type], model_params[:type]
- assert_equal params[:min_sim], model_params[:min_sim]
- refute_equal params[:training_dataset_id], model_params[:training_dataset_id]
- end
- end
- end
- end
- end
-end