summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-10-05 14:10:25 +0200
committerChristoph Helma <helma@in-silico.ch>2016-10-05 14:10:25 +0200
commit016403f7db0dedf8237f29af41312b5ff2720c30 (patch)
treeb276ad7bbb11ff73049b9c7cfeda15c140846436
parent5d4e5e463c2b87241bbb56e4658e1e26c0ed084f (diff)
compound and descriptor tests fixed
-rw-r--r--lib/compound.rb6
-rw-r--r--test/compound.rb21
-rw-r--r--test/descriptor.rb14
-rw-r--r--test/model.rb177
4 files changed, 203 insertions, 15 deletions
diff --git a/lib/compound.rb b/lib/compound.rb
index 4d62c53..93cfc03 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -77,7 +77,7 @@ module OpenTox
def calculated_physchem descriptors=PhysChem.openbabel_descriptors
# TODO: speedup java descriptors
- calculated_ids = descriptors.keys
+ calculated_ids = properties.keys
# BSON::ObjectId instances are not allowed as keys in a BSON document.
new_ids = descriptors.collect{|d| d.id.to_s} - calculated_ids
descs = {}
@@ -90,11 +90,11 @@ module OpenTox
# avoid recalculating Cdk features with multiple values
descs.keys.uniq.each do |k|
descs[k].send(k[0].downcase,k[1],self).each do |n,v|
- descriptors[algos[n].id.to_s] = v # BSON::ObjectId instances are not allowed as keys in a BSON document.
+ properties[algos[n].id.to_s] = v # BSON::ObjectId instances are not allowed as keys in a BSON document.
end
end
save
- descriptors.select{|id,v| descriptors.collect{|d| d.id.to_s}.include? id}
+ properties.select{|id,v| descriptors.collect{|d| d.id.to_s}.include? id}
end
def smarts_match smarts, count=false
diff --git a/test/compound.rb b/test/compound.rb
index c9faa21..c78acb1 100644
--- a/test/compound.rb
+++ b/test/compound.rb
@@ -85,7 +85,13 @@ print c.sdf
refute_nil c.fingerprint("MP2D")
end
c = d.compounds[371]
- n = c.fingerprint_neighbors({:type => "FP4", :min_sim => 0.7, :dataset_id => d.id, :prediction_feature_id => d.features.first.id })
+ n = c.neighbors(
+ descriptors: {:method => "fingerprint", :type => "FP4"},
+ similarity: {:method => "Algorithm::Similarity.tanimoto", :min => 0.7},
+ dataset_id: d.id,
+ prediction_feature_id: d.features.first.id
+ )
+
assert n.size >= 8, "Neighbors size (#{n.size}) should be larger than 7"
end
@@ -118,7 +124,12 @@ print c.sdf
].each do |smi|
c = OpenTox::Compound.from_smiles smi
types.each do |type|
- neighbors = c.fingerprint_neighbors({:type => type, :dataset_id => training_dataset.id, :min_sim => min_sim, :prediction_feature_id => training_dataset.features.first.id})
+ neighbors = c.fingerprint_neighbors(
+ descriptors: {:method => "fingerprint",:type => type},
+ dataset_id: training_dataset.id,
+ similarity: {:method => "Algorithm::Similarity.tanimoto", :min => min_sim},
+ prediction_feature_id: training_dataset.features.first.id
+ )
unless type == "FP2" and smi == "CC(=O)CC(C)C#N" or smi == "C(=O)CC(C)C#N" and (type == "FP2" or type == "MACCS")
refute_empty neighbors
end
@@ -197,8 +208,8 @@ print c.sdf
def test_physchem
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C"
- assert_equal PhysChem::OBDESCRIPTORS.size, c.physchem.size
- assert_equal PhysChem::OBDESCRIPTORS.size, c.physchem(PhysChem.openbabel_descriptors).size
- assert_equal PhysChem::unique_descriptors.size, c.physchem(PhysChem.unique_descriptors).size
+ assert_equal PhysChem::OBDESCRIPTORS.size, c.calculated_physchem.size
+ assert_equal PhysChem::OBDESCRIPTORS.size, c.calculated_physchem(PhysChem.openbabel_descriptors).size
+ assert_equal PhysChem::unique_descriptors.size, c.calculated_physchem(PhysChem.unique_descriptors).size
end
end
diff --git a/test/descriptor.rb b/test/descriptor.rb
index cd0c1ff..2a5be60 100644
--- a/test/descriptor.rb
+++ b/test/descriptor.rb
@@ -28,34 +28,34 @@ class DescriptorTest < MiniTest::Test
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
PhysChem.openbabel_descriptors # required for descriptor initialisation, TODO: move into libs
PhysChem.find_or_create_by(:name => "Openbabel.logP")
- result = c.physchem [PhysChem.find_or_create_by(:name => "Openbabel.logP")]
+ result = c.calculated_physchem [PhysChem.find_or_create_by(:name => "Openbabel.logP")]
assert_equal 1.12518, result.first.last.round(5)
end
def test_compound_cdk_single
PhysChem.cdk_descriptors # required for descriptor initialisation, TODO: move into libs
c = OpenTox::Compound.from_smiles "c1ccccc1"
- result = c.physchem [PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")]
+ result = c.calculated_physchem [PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")]
assert_equal 12, result.first.last
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
- result = c.physchem [PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")]
+ result = c.calculated_physchem [PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")]
assert_equal 17, result.first.last
c_types = {"Cdk.CarbonTypes.C1SP1"=>1, "Cdk.CarbonTypes.C2SP1"=>0, "Cdk.CarbonTypes.C1SP2"=>0, "Cdk.CarbonTypes.C2SP2"=>1, "Cdk.CarbonTypes.C3SP2"=>0, "Cdk.CarbonTypes.C1SP3"=>2, "Cdk.CarbonTypes.C2SP3"=>1, "Cdk.CarbonTypes.C3SP3"=>1, "Cdk.CarbonTypes.C4SP3"=>0}
physchem_features = c_types.collect{|t,nr| PhysChem.find_or_create_by(:name => t)}
- result = c.physchem physchem_features
+ result = c.calculated_physchem physchem_features
assert_equal [1, 0, 0, 1, 0, 2, 1, 1, 0], result.values
end
def test_compound_joelib_single
PhysChem.joelib_descriptors # required for descriptor initialisation, TODO: move into libs
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
- result = c.physchem [PhysChem.find_or_create_by(:name => "Joelib.LogP")]
+ result = c.calculated_physchem [PhysChem.find_or_create_by(:name => "Joelib.LogP")]
assert_equal 2.65908, result.first.last
end
def test_compound_all
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
- result = c.physchem PhysChem.descriptors
+ result = c.calculated_physchem PhysChem.descriptors
amr = PhysChem.find_or_create_by(:name => "Cdk.ALOGP.AMR", :library => "Cdk")
sbonds = PhysChem.find_by(:name => "Openbabel.sbonds")
assert_equal 30.8723, result[amr.id.to_s]
@@ -65,7 +65,7 @@ class DescriptorTest < MiniTest::Test
def test_compound_descriptor_parameters
PhysChem.descriptors
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
- result = c.physchem [ "Openbabel.logP", "Cdk.AtomCount.nAtom", "Joelib.LogP" ].collect{|d| PhysChem.find_or_create_by(:name => d)}
+ result = c.calculated_physchem [ "Openbabel.logP", "Cdk.AtomCount.nAtom", "Joelib.LogP" ].collect{|d| PhysChem.find_or_create_by(:name => d)}
assert_equal 3, result.size
result.each do |fid,v|
feature = Feature.find(fid)
diff --git a/test/model.rb b/test/model.rb
new file mode 100644
index 0000000..563d081
--- /dev/null
+++ b/test/model.rb
@@ -0,0 +1,177 @@
+require_relative "setup.rb"
+
+class ModelTest < MiniTest::Test
+
+ def test_default_regression
+ algorithms = {
+ :descriptors => {
+ :method => "fingerprint",
+ :type => "MP2D"
+ },
+ :similarity => {
+ :method => "Algorithm::Similarity.tanimoto",
+ :min => 0.1
+ },
+ :prediction => {
+ :method => "Algorithm::Regression.caret",
+ :parameters => "pls",
+ },
+ :feature_selection => nil,
+ }
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
+ model = Model::Lazar.create training_dataset: training_dataset
+ assert_kind_of Model::LazarRegression, model
+ assert_equal algorithms, model.algorithms
+ substance = training_dataset.substances[10]
+ prediction = model.predict substance
+ assert_includes prediction[:prediction_interval][0]..prediction[:prediction_interval][1], prediction[:measurements].median, "This assertion assures that measured values are within the prediction interval. It may fail in 5% of the predictions."
+ end
+
+ def test_regression_parameters
+ algorithms = {
+ :descriptors => {
+ :method => "fingerprint",
+ :type => "MP2D"
+ },
+ :similarity => {
+ :method => "Algorithm::Similarity.tanimoto",
+ :min => 0.3
+ },
+ :prediction => {
+ :method => "Algorithm::Regression.weighted_average",
+ :parameters => "rf",
+ },
+ :feature_selection => nil,
+ }
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
+ model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
+ assert_kind_of Model::LazarRegression, model
+ assert_equal "Algorithm::Regression.weighted_average", model.algorithms[:prediction][:method]
+ assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method]
+ assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min]
+ assert_equal algorithms[:prediction][:parameters], model.algorithms[:prediction][:parameters]
+ substance = training_dataset.substances[10]
+ prediction = model.predict substance
+ assert_equal 0.83, prediction[:value].round(2)
+ end
+
+ def test_physchem_regression
+ algorithms = {
+ :descriptors => "physchem",
+ :similarity => {
+ :method => "Algorithm::Similarity.weighted_cosine",
+ }
+ }
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv")
+ model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
+ assert_kind_of Model::LazarRegression, model
+ assert_equal "Algorithm::Regression.caret", model.algorithms[:prediction][:method]
+ assert_equal "Algorithm::Similarity.weighted_cosine", model.algorithms[:similarity][:method]
+ assert_equal 0.1, model.algorithms[:similarity][:min]
+ assert_equal algorithms[:descriptors], model.algorithms[:descriptors]
+ end
+
+ def test_nanoparticle_default
+ training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
+ unless training_dataset
+ Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm")
+ training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
+ end
+ model = Model::Lazar.create training_dataset: training_dataset
+ assert_equal "Algorithm::Regression.caret", model.algorithms[:prediction][:method]
+ assert_equal "rf", model.algorithms[:prediction][:parameters]
+ assert_equal "Algorithm::Similarity.weighted_cosine", model.algorithms[:similarity][:method]
+ prediction = model.predict training_dataset.substances[14]
+ assert_includes prediction[:prediction_interval][0]..prediction[:prediction_interval][1], prediction[:measurements].median, "This assertion assures that measured values are within the prediction interval. It may fail in 5% of the predictions."
+
+ end
+
+ def test_nanoparticle_parameters
+ end
+
+ def test_regression_with_feature_selection
+ algorithms = {
+ :feature_selection => {
+ :method => "Algorithm::FeatureSelection.correlation_filter",
+ },
+ }
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv")
+ model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
+ assert_kind_of Model::LazarRegression, model
+ assert_equal "Algorithm::Regression.caret", model.algorithms[:prediction][:method]
+ assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method]
+ assert_equal 0.1, model.algorithms[:similarity][:min]
+ assert_equal algorithms[:feature_selection][:method], model.algorithms[:feature_selection][:method]
+ end
+
+ def test_caret_parameters
+ end
+
+ def test_default_classification
+ algorithms = {
+ :descriptors => {
+ :method => "fingerprint",
+ :type => 'MP2D',
+ },
+ :similarity => {
+ :method => "Algorithm::Similarity.tanimoto",
+ :min => 0.1
+ },
+ :prediction => {
+ :method => "Algorithm::Classification.weighted_majority_vote",
+ },
+ :feature_selection => nil,
+ }
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
+ model = Model::Lazar.create training_dataset: training_dataset
+ assert_kind_of Model::LazarClassification, model
+ assert_equal algorithms, model.algorithms
+ substance = training_dataset.substances[10]
+ prediction = model.predict substance
+ assert_equal "false", prediction[:value]
+ end
+
+ def test_classification_parameters
+ algorithms = {
+ :descriptors => {
+ :method => "fingerprint",
+ :type => 'MACCS',
+ },
+ :similarity => {
+ :min => 0.4
+ },
+ }
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
+ model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
+ assert_kind_of Model::LazarClassification, model
+ assert_equal "Algorithm::Classification.weighted_majority_vote", model.algorithms[:prediction][:method]
+ assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method]
+ assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min]
+ substance = training_dataset.substances[10]
+ prediction = model.predict substance
+ assert_equal "false", prediction[:value]
+ assert_equal 4, prediction[:neighbors].size
+ end
+
+=begin
+ def test_physchem_description
+ assert_equal 355, PhysChem.descriptors.size
+ assert_equal 15, PhysChem.openbabel_descriptors.size
+ assert_equal 295, PhysChem.cdk_descriptors.size
+ assert_equal 45, PhysChem.joelib_descriptors.size
+ assert_equal 310, PhysChem.unique_descriptors.size
+ end
+
+ def test_physchem
+ assert_equal 355, PhysChem.descriptors.size
+ c = Compound.from_smiles "CC(=O)CC(C)C"
+ logP = PhysChem.find_or_create_by :name => "Openbabel.logP"
+ assert_equal 1.6215, logP.calculate(c)
+ jlogP = PhysChem.find_or_create_by :name => "Joelib.LogP"
+ assert_equal 3.5951, jlogP.calculate(c)
+ alogP = PhysChem.find_or_create_by :name => "Cdk.ALOGP.ALogP"
+ assert_equal 0.35380000000000034, alogP.calculate(c)
+ end
+=end
+
+end