summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2014-10-07 16:31:12 +0200
committermguetlein <martin.guetlein@gmail.com>2014-10-07 16:31:12 +0200
commitaac621fb93064542e25bbd1e8581332558908f3c (patch)
tree38f83bbe8d453c64e76e0b2b18470a2f5d143c27
parent1781a188d44faf2c0d9c9cf4ab82be966c1ea263 (diff)
adding cdk and joelib to physchem test, add pc-crossvalidation to validation test
-rw-r--r--test/data/EPAFHM.medi.csv4
-rw-r--r--test/lazar-physchem-long.rb (renamed from test/lazar-physchem.rb)30
-rw-r--r--test/lazar-physchem-short.rb3
-rw-r--r--test/validation-long.rb4
4 files changed, 28 insertions, 13 deletions
diff --git a/test/data/EPAFHM.medi.csv b/test/data/EPAFHM.medi.csv
index 975eaa1..4428fa2 100644
--- a/test/data/EPAFHM.medi.csv
+++ b/test/data/EPAFHM.medi.csv
@@ -26,10 +26,8 @@
"C1=CC=C2C(=C1)C(=O)C(C)=CC2=O",6.39E-04
"OC1=C(Cl)C(Cl)=C(Cl)C=C1Cl",4.44E-03
"OC1=CC(C)=C(Cl)C=C1",3.84E-02
-"[H]Cl.C1=CC=CC=C1CC2=NCCN2",1.80E+00
"O=S(O)(O)=O.C1(=CC=CC=C1CC(N)C).C2=CC=CC=C2CC(N)C",7.82E-02
"O(CC)CC",3.45E+01
-"O=C2N5[C@@]3([H])[C@@]1([H])[C@](C[C@]4([H])N(C7)CC[C@]34C6=C5C=CC=C6)([H])C7=CCO[C@]([H])1C2.O=C9N%12[C@@]%10([H])[C@@]8([H])[C@](C[C@]%11([H])N(C%14)CC[C@]%10%11C%13=C%12C=CC=C%13)([H])C%14=CCO[C@]([H])8C9.O=S(O)(O)=O",1.11E-03
"NC1=CC=CC=C1",1.13E+00
"O=C(OC1=C2C(=CC=C1)C=CC=C2)NC",4.35E-02
"CCO",3.19E+02
@@ -84,8 +82,6 @@
"ClC(Cl)C1=C(Cl)C=CC=C1Cl",4.22E-03
"C1=CC=C2C=CC=C3C2=C1CC3",1.12E-02
"CC1=CNC2=C1C=CC=C2",6.74E-02
-"O=C([C@](C(C=C4OC)=C(C=C4OC)OC3)([H])[C@]3([H])O2)C(C=C5)=C2C1=C5O[C@@H]([C@@](C)=C)C1",1.32E-05
-"O=C2C1=NC3=C(C=C(C)C(C)=C3)N(C[C@H](O)[C@H](O)[C@H](O)CO)C1=NC(N2)=O",
"C1=CC=CC=C1OC(=O)C2=CC=CC=C2C(=O)OC3=CC=CC=C3",2.51E-04
"O=C1C2=C(C=CC=C2)C(=O)C3=C1C=CC=C3",
"CCOC(=O)C1=CC=CC=C1C(=O)OCC",1.43E-01
diff --git a/test/lazar-physchem.rb b/test/lazar-physchem-long.rb
index d6f5894..0b2820d 100644
--- a/test/lazar-physchem.rb
+++ b/test/lazar-physchem-long.rb
@@ -3,13 +3,31 @@ require_relative "setup.rb"
class LazarPhyschemDescriptorTest < MiniTest::Test
def test_lazar_pc_descriptors
+
+ # check available descriptors
+ desc = OpenTox::Algorithm::Descriptor.physchem_descriptors.keys
+ assert_equal 111,desc.size,"wrong num physchem descriptors"
+ sum = 0
+ {"Openbabel"=>16,"Cdk"=>50,"Joelib"=>45}.each do |k,v|
+ assert_equal v,desc.select{|x| x=~/^#{k}\./}.size,"wrong num #{k} descriptors"
+ sum += v
+ end
+ assert_equal 111,sum
+
+ # select descriptors for test
+ desc.keep_if{|x| x=~/^Openbabel\./}
+ unless defined?($short_tests)
+ # the actual descriptor calculation is rather fast, computing 3D structures takes time
+ desc += ["Cdk.XLogP", "Cdk.WienerNumbers", "Joelib.LogP", "Joelib.count.HeteroCycles"]
+ end
+ puts "descriptors for modeling: #{desc}"
+
dataset = OpenTox::Dataset.new
dataset.upload File.join(DATA_DIR,"EPAFHM.medi.csv")
assert_equal dataset.uri.uri?, true
puts dataset.uri
- model_uri = OpenTox::Model::Lazar.create :dataset_uri => dataset.uri, :feature_generation_uri => File.join($algorithm[:uri],"descriptor","physchem"), :descriptors => [ "Openbabel.atoms", "Openbabel.bonds", "Openbabel.dbonds", "Openbabel.HBA1", "Openbabel.HBA2", "Openbabel.HBD", "Openbabel.MP", "Openbabel.MR", "Openbabel.MW", "Openbabel.nF", "Openbabel.sbonds", "Openbabel.tbonds", "Openbabel.TPSA"]
-# model_uri = "http://localhost:8085/model/437f008a-ca0f-4a85-83c1-d851ef2be60c"
+ model_uri = OpenTox::Model::Lazar.create :dataset_uri => dataset.uri, :feature_generation_uri => File.join($algorithm[:uri],"descriptor","physchem"), :descriptors => desc
puts model_uri
model = OpenTox::Model::Lazar.new model_uri
assert_equal model_uri.uri?, true
@@ -17,8 +35,6 @@ class LazarPhyschemDescriptorTest < MiniTest::Test
compound_uri = "#{$compound[:uri]}/InChI=1S/C13H8Cl2O2/c14-12-5-4-11(7-13(12)15)17-10-3-1-2-9(6-10)8-16/h1-8H"
prediction_uri = model.predict :compound_uri => compound_uri
-# prediction_uri = "http://localhost:8083/dataset/1e2d48d2-f720-4575-b192-524586630ac3"
-
prediction = OpenTox::Dataset.new prediction_uri
assert_equal prediction.uri.uri?, true
puts prediction.uri
@@ -27,10 +43,10 @@ class LazarPhyschemDescriptorTest < MiniTest::Test
assert prediction.compounds.collect{|c| c.uri}.include?(compound_uri),"compound #{compound_uri} not included in prediction dataset #{prediction.compounds.collect{|c| c.uri}}"
assert_equal 1,prediction.compound_indices(compound_uri).size,"compound should only be once in the dataset"
- predicted_value = prediction.data_entry_value(prediction.compound_indices(compound_uri).first,model.predicted_variable) #[model.predicted_variable]
+ predicted_value = prediction.data_entry_value(prediction.compound_indices(compound_uri).first,model.predicted_variable)
puts predicted_value
- assert predicted_value > 0.01
- assert predicted_value < 0.1
+ assert predicted_value > 0.005,"predicted values should be above 0.005, is #{predicted_value}"
+ assert predicted_value < 0.1,"predicted values should be below 0.1, is #{predicted_value}"
end
end
diff --git a/test/lazar-physchem-short.rb b/test/lazar-physchem-short.rb
new file mode 100644
index 0000000..5b00231
--- /dev/null
+++ b/test/lazar-physchem-short.rb
@@ -0,0 +1,3 @@
+$short_tests = true
+
+require File.join(File.expand_path(File.dirname(__FILE__)),"lazar-physchem-long.rb")
diff --git a/test/validation-long.rb b/test/validation-long.rb
index 48a886e..88ee315 100644
--- a/test/validation-long.rb
+++ b/test/validation-long.rb
@@ -34,7 +34,7 @@ FILES = {
unless defined?($short_tests)
FILES.merge!({
File.new(File.join(test_path,"data","hamster_carcinogenicity.csv")) => :crossvalidation,
- # File.new("data/EPAFHM.csv") => :crossvalidation,
+ File.new("data/EPAFHM.medi.csv") => :crossvalidation,
# File.new("data/hamster_carcinogenicity.csv") => :bootstrap_validation
})
end
@@ -61,7 +61,7 @@ class ValidationTest < MiniTest::Test
data = { :type => type,
:data => ValidationTestUtil.upload_dataset(file),
:feat => ValidationTestUtil.prediction_feature_for_file(file),
- :split_ratio => (file.path=~/EPAFHM/ ? 0.98 : 0.9),
+ :split_ratio => (file.path=~/EPAFHM/ ? 0.98 : 0.9),#only used for split_validation
:info => file.path, :delete => true}
FEAT_GEN[file].each do |feat_gen|
data[:alg_params] = "feature_generation_uri="+feat_gen