From 016403f7db0dedf8237f29af41312b5ff2720c30 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 5 Oct 2016 14:10:25 +0200 Subject: compound and descriptor tests fixed --- lib/compound.rb | 6 +- test/compound.rb | 21 +++++-- test/descriptor.rb | 14 ++--- test/model.rb | 177 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 203 insertions(+), 15 deletions(-) create mode 100644 test/model.rb diff --git a/lib/compound.rb b/lib/compound.rb index 4d62c53..93cfc03 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -77,7 +77,7 @@ module OpenTox def calculated_physchem descriptors=PhysChem.openbabel_descriptors # TODO: speedup java descriptors - calculated_ids = descriptors.keys + calculated_ids = properties.keys # BSON::ObjectId instances are not allowed as keys in a BSON document. new_ids = descriptors.collect{|d| d.id.to_s} - calculated_ids descs = {} @@ -90,11 +90,11 @@ module OpenTox # avoid recalculating Cdk features with multiple values descs.keys.uniq.each do |k| descs[k].send(k[0].downcase,k[1],self).each do |n,v| - descriptors[algos[n].id.to_s] = v # BSON::ObjectId instances are not allowed as keys in a BSON document. + properties[algos[n].id.to_s] = v # BSON::ObjectId instances are not allowed as keys in a BSON document. end end save - descriptors.select{|id,v| descriptors.collect{|d| d.id.to_s}.include? id} + properties.select{|id,v| descriptors.collect{|d| d.id.to_s}.include? id} end def smarts_match smarts, count=false diff --git a/test/compound.rb b/test/compound.rb index c9faa21..c78acb1 100644 --- a/test/compound.rb +++ b/test/compound.rb @@ -85,7 +85,13 @@ print c.sdf refute_nil c.fingerprint("MP2D") end c = d.compounds[371] - n = c.fingerprint_neighbors({:type => "FP4", :min_sim => 0.7, :dataset_id => d.id, :prediction_feature_id => d.features.first.id }) + n = c.neighbors( + descriptors: {:method => "fingerprint", :type => "FP4"}, + similarity: {:method => "Algorithm::Similarity.tanimoto", :min => 0.7}, + dataset_id: d.id, + prediction_feature_id: d.features.first.id + ) + assert n.size >= 8, "Neighbors size (#{n.size}) should be larger than 7" end @@ -118,7 +124,12 @@ print c.sdf ].each do |smi| c = OpenTox::Compound.from_smiles smi types.each do |type| - neighbors = c.fingerprint_neighbors({:type => type, :dataset_id => training_dataset.id, :min_sim => min_sim, :prediction_feature_id => training_dataset.features.first.id}) + neighbors = c.fingerprint_neighbors( + descriptors: {:method => "fingerprint",:type => type}, + dataset_id: training_dataset.id, + similarity: {:method => "Algorithm::Similarity.tanimoto", :min => min_sim}, + prediction_feature_id: training_dataset.features.first.id + ) unless type == "FP2" and smi == "CC(=O)CC(C)C#N" or smi == "C(=O)CC(C)C#N" and (type == "FP2" or type == "MACCS") refute_empty neighbors end @@ -197,8 +208,8 @@ print c.sdf def test_physchem c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C" - assert_equal PhysChem::OBDESCRIPTORS.size, c.physchem.size - assert_equal PhysChem::OBDESCRIPTORS.size, c.physchem(PhysChem.openbabel_descriptors).size - assert_equal PhysChem::unique_descriptors.size, c.physchem(PhysChem.unique_descriptors).size + assert_equal PhysChem::OBDESCRIPTORS.size, c.calculated_physchem.size + assert_equal PhysChem::OBDESCRIPTORS.size, c.calculated_physchem(PhysChem.openbabel_descriptors).size + assert_equal PhysChem::unique_descriptors.size, c.calculated_physchem(PhysChem.unique_descriptors).size end end diff --git a/test/descriptor.rb b/test/descriptor.rb index cd0c1ff..2a5be60 100644 --- a/test/descriptor.rb +++ b/test/descriptor.rb @@ -28,34 +28,34 @@ class DescriptorTest < MiniTest::Test c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N" PhysChem.openbabel_descriptors # required for descriptor initialisation, TODO: move into libs PhysChem.find_or_create_by(:name => "Openbabel.logP") - result = c.physchem [PhysChem.find_or_create_by(:name => "Openbabel.logP")] + result = c.calculated_physchem [PhysChem.find_or_create_by(:name => "Openbabel.logP")] assert_equal 1.12518, result.first.last.round(5) end def test_compound_cdk_single PhysChem.cdk_descriptors # required for descriptor initialisation, TODO: move into libs c = OpenTox::Compound.from_smiles "c1ccccc1" - result = c.physchem [PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")] + result = c.calculated_physchem [PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")] assert_equal 12, result.first.last c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N" - result = c.physchem [PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")] + result = c.calculated_physchem [PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")] assert_equal 17, result.first.last c_types = {"Cdk.CarbonTypes.C1SP1"=>1, "Cdk.CarbonTypes.C2SP1"=>0, "Cdk.CarbonTypes.C1SP2"=>0, "Cdk.CarbonTypes.C2SP2"=>1, "Cdk.CarbonTypes.C3SP2"=>0, "Cdk.CarbonTypes.C1SP3"=>2, "Cdk.CarbonTypes.C2SP3"=>1, "Cdk.CarbonTypes.C3SP3"=>1, "Cdk.CarbonTypes.C4SP3"=>0} physchem_features = c_types.collect{|t,nr| PhysChem.find_or_create_by(:name => t)} - result = c.physchem physchem_features + result = c.calculated_physchem physchem_features assert_equal [1, 0, 0, 1, 0, 2, 1, 1, 0], result.values end def test_compound_joelib_single PhysChem.joelib_descriptors # required for descriptor initialisation, TODO: move into libs c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N" - result = c.physchem [PhysChem.find_or_create_by(:name => "Joelib.LogP")] + result = c.calculated_physchem [PhysChem.find_or_create_by(:name => "Joelib.LogP")] assert_equal 2.65908, result.first.last end def test_compound_all c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N" - result = c.physchem PhysChem.descriptors + result = c.calculated_physchem PhysChem.descriptors amr = PhysChem.find_or_create_by(:name => "Cdk.ALOGP.AMR", :library => "Cdk") sbonds = PhysChem.find_by(:name => "Openbabel.sbonds") assert_equal 30.8723, result[amr.id.to_s] @@ -65,7 +65,7 @@ class DescriptorTest < MiniTest::Test def test_compound_descriptor_parameters PhysChem.descriptors c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N" - result = c.physchem [ "Openbabel.logP", "Cdk.AtomCount.nAtom", "Joelib.LogP" ].collect{|d| PhysChem.find_or_create_by(:name => d)} + result = c.calculated_physchem [ "Openbabel.logP", "Cdk.AtomCount.nAtom", "Joelib.LogP" ].collect{|d| PhysChem.find_or_create_by(:name => d)} assert_equal 3, result.size result.each do |fid,v| feature = Feature.find(fid) diff --git a/test/model.rb b/test/model.rb new file mode 100644 index 0000000..563d081 --- /dev/null +++ b/test/model.rb @@ -0,0 +1,177 @@ +require_relative "setup.rb" + +class ModelTest < MiniTest::Test + + def test_default_regression + algorithms = { + :descriptors => { + :method => "fingerprint", + :type => "MP2D" + }, + :similarity => { + :method => "Algorithm::Similarity.tanimoto", + :min => 0.1 + }, + :prediction => { + :method => "Algorithm::Regression.caret", + :parameters => "pls", + }, + :feature_selection => nil, + } + training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") + model = Model::Lazar.create training_dataset: training_dataset + assert_kind_of Model::LazarRegression, model + assert_equal algorithms, model.algorithms + substance = training_dataset.substances[10] + prediction = model.predict substance + assert_includes prediction[:prediction_interval][0]..prediction[:prediction_interval][1], prediction[:measurements].median, "This assertion assures that measured values are within the prediction interval. It may fail in 5% of the predictions." + end + + def test_regression_parameters + algorithms = { + :descriptors => { + :method => "fingerprint", + :type => "MP2D" + }, + :similarity => { + :method => "Algorithm::Similarity.tanimoto", + :min => 0.3 + }, + :prediction => { + :method => "Algorithm::Regression.weighted_average", + :parameters => "rf", + }, + :feature_selection => nil, + } + training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv") + model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms + assert_kind_of Model::LazarRegression, model + assert_equal "Algorithm::Regression.weighted_average", model.algorithms[:prediction][:method] + assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method] + assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min] + assert_equal algorithms[:prediction][:parameters], model.algorithms[:prediction][:parameters] + substance = training_dataset.substances[10] + prediction = model.predict substance + assert_equal 0.83, prediction[:value].round(2) + end + + def test_physchem_regression + algorithms = { + :descriptors => "physchem", + :similarity => { + :method => "Algorithm::Similarity.weighted_cosine", + } + } + training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv") + model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms + assert_kind_of Model::LazarRegression, model + assert_equal "Algorithm::Regression.caret", model.algorithms[:prediction][:method] + assert_equal "Algorithm::Similarity.weighted_cosine", model.algorithms[:similarity][:method] + assert_equal 0.1, model.algorithms[:similarity][:min] + assert_equal algorithms[:descriptors], model.algorithms[:descriptors] + end + + def test_nanoparticle_default + training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first + unless training_dataset + Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm") + training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first + end + model = Model::Lazar.create training_dataset: training_dataset + assert_equal "Algorithm::Regression.caret", model.algorithms[:prediction][:method] + assert_equal "rf", model.algorithms[:prediction][:parameters] + assert_equal "Algorithm::Similarity.weighted_cosine", model.algorithms[:similarity][:method] + prediction = model.predict training_dataset.substances[14] + assert_includes prediction[:prediction_interval][0]..prediction[:prediction_interval][1], prediction[:measurements].median, "This assertion assures that measured values are within the prediction interval. It may fail in 5% of the predictions." + + end + + def test_nanoparticle_parameters + end + + def test_regression_with_feature_selection + algorithms = { + :feature_selection => { + :method => "Algorithm::FeatureSelection.correlation_filter", + }, + } + training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv") + model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms + assert_kind_of Model::LazarRegression, model + assert_equal "Algorithm::Regression.caret", model.algorithms[:prediction][:method] + assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method] + assert_equal 0.1, model.algorithms[:similarity][:min] + assert_equal algorithms[:feature_selection][:method], model.algorithms[:feature_selection][:method] + end + + def test_caret_parameters + end + + def test_default_classification + algorithms = { + :descriptors => { + :method => "fingerprint", + :type => 'MP2D', + }, + :similarity => { + :method => "Algorithm::Similarity.tanimoto", + :min => 0.1 + }, + :prediction => { + :method => "Algorithm::Classification.weighted_majority_vote", + }, + :feature_selection => nil, + } + training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv") + model = Model::Lazar.create training_dataset: training_dataset + assert_kind_of Model::LazarClassification, model + assert_equal algorithms, model.algorithms + substance = training_dataset.substances[10] + prediction = model.predict substance + assert_equal "false", prediction[:value] + end + + def test_classification_parameters + algorithms = { + :descriptors => { + :method => "fingerprint", + :type => 'MACCS', + }, + :similarity => { + :min => 0.4 + }, + } + training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv") + model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms + assert_kind_of Model::LazarClassification, model + assert_equal "Algorithm::Classification.weighted_majority_vote", model.algorithms[:prediction][:method] + assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method] + assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min] + substance = training_dataset.substances[10] + prediction = model.predict substance + assert_equal "false", prediction[:value] + assert_equal 4, prediction[:neighbors].size + end + +=begin + def test_physchem_description + assert_equal 355, PhysChem.descriptors.size + assert_equal 15, PhysChem.openbabel_descriptors.size + assert_equal 295, PhysChem.cdk_descriptors.size + assert_equal 45, PhysChem.joelib_descriptors.size + assert_equal 310, PhysChem.unique_descriptors.size + end + + def test_physchem + assert_equal 355, PhysChem.descriptors.size + c = Compound.from_smiles "CC(=O)CC(C)C" + logP = PhysChem.find_or_create_by :name => "Openbabel.logP" + assert_equal 1.6215, logP.calculate(c) + jlogP = PhysChem.find_or_create_by :name => "Joelib.LogP" + assert_equal 3.5951, jlogP.calculate(c) + alogP = PhysChem.find_or_create_by :name => "Cdk.ALOGP.ALogP" + assert_equal 0.35380000000000034, alogP.calculate(c) + end +=end + +end -- cgit v1.2.3