diff options
author | gebele <gebele@in-silico.ch> | 2014-10-08 08:35:19 +0200 |
---|---|---|
committer | gebele <gebele@in-silico.ch> | 2014-10-08 08:35:19 +0200 |
commit | a1f3d0396ab4d93d49cb7f0dfab654bd12494e85 (patch) | |
tree | f389c9478eae62f0c10ba1b0100af4a2c5db10e1 | |
parent | 17d839d6b24e6e30340bf64435663a1e6643d361 (diff) | |
parent | aac621fb93064542e25bbd1e8581332558908f3c (diff) |
Merge branch 'development' of github.com:opentox/opentox-test into development
-rw-r--r-- | test/data/EPAFHM.medi.csv | 97 | ||||
-rw-r--r-- | test/lazar-physchem-long.rb | 52 | ||||
-rw-r--r-- | test/lazar-physchem-short.rb | 3 | ||||
-rw-r--r-- | test/lazar-physchem.rb | 22 | ||||
-rw-r--r-- | test/task.rb | 1 | ||||
-rw-r--r-- | test/validation-long.rb | 136 | ||||
-rwxr-xr-x | test/validation_util.rb | 1 |
7 files changed, 228 insertions, 84 deletions
diff --git a/test/data/EPAFHM.medi.csv b/test/data/EPAFHM.medi.csv new file mode 100644 index 0000000..4428fa2 --- /dev/null +++ b/test/data/EPAFHM.medi.csv @@ -0,0 +1,97 @@ +"STRUCTURE_SMILES","LC50_mmol" +"C1=CC(C=O)=CC(OC)=C1OCCCCCC",1.13E-02 +"C1(OC)=C([N+]([O-])=O)C(C=O)=CC(Br)=C1O",2.66E-01 +"CCCCCCCCOC(=O)C1=CC=CC(C(=O)OCCCCCCCC)=C1", +"C1=CC(Cl)=CC=C1OC2=C([N+](=O)[O-])C=CC=C2",7.69E-03 +"CC1=C(NC=O)C=CC=C1Cl",2.75E-01 +"CCCCOC(=O)C1=CC=CC(C(=O)OCCCC)=C1",3.23E-03 +"C(C1=CC=CC=C1)(C2=CC=CC=C2)(O)C#C",5.33E-02 +"CCCSCCSCCC",4.22E-02 +"CCCCCCCCOC(=O)C1=CC=C(C(=O)OCCCCCCCC)C=C1", +"OCCCCOC(=O)C1=CC=CC=C1C(=O)OCCCCOC(=O)C2=CC=CC=C2C(=O)OCCCCO", +"CCCSCCCCSCCC",1.45E-02 +"C1([N+](=O)[O-])=CC=C(C)C=C1OP(=O)(OC2=C([N+](=O)[O-])C=CC(C)=C2)OC3=C([N+]([O-])=O)C=CC(C)=C3", +"C1=C([N+]([O-])=O)C=CC=C1P(=O)(C2=CC([N+](=O)[O-])=CC=C2)C3=CC([N+](=O)[O-])=CC=C3", +"ClCCOC(=O)NC1CCCCC1",1.70E-01 +"O=C1C(C2=CC=CC=C2)(C(=O)NC(=O)N1)CC",2.08E+00 +"OC1=C(C=C(C=C1)[N+](=O)[O-])[N+](=O)[O-]",5.92E-02 +"NC(=O)OCC",5.88E+01 +"[O-]C(C1=CC=CC=C1O)=O.[Na+]",1.25E+01 +"C1=CC=CC=C1C(=O)N",5.46E+00 +"CC[N+](CC)(CC)CC1(=CC=CC=C1).[Cl-]",7.07E-01 +"CN(C)N",1.31E-01 +"CC(C(C(NC([O-])=N1)=O)(C1=O)CC)CCC.[Na+]",1.99E-01 +"N1C(=O)C(CC)(CCC(C)C)C(=O)NC1=O",3.77E-01 +"O=C1C2=C(N=CN2C)N(C(=O)N1C)C",7.78E-01 +"C1=CC=C2C(=C1)C(=O)C(C)=CC2=O",6.39E-04 +"OC1=C(Cl)C(Cl)=C(Cl)C=C1Cl",4.44E-03 +"OC1=CC(C)=C(Cl)C=C1",3.84E-02 +"O=S(O)(O)=O.C1(=CC=CC=C1CC(N)C).C2=CC=CC=C2CC(N)C",7.82E-02 +"O(CC)CC",3.45E+01 +"NC1=CC=CC=C1",1.13E+00 +"O=C(OC1=C2C(=CC=C1)C=CC=C2)NC",4.35E-02 +"CCO",3.19E+02 +"C1(=NC=CC=C1C2CCCN2C).OS(O)(=O)=O",5.30E-02 +"C1(O)=CC=CC=C1C(=O)N",7.36E-01 +"O=C1NC(=O)NC=C1", +"CCCCCC=O",1.75E-01 +"O=C1OC2=CC=CC=C2C(O)=C1CC3=C(O)C4=CC=CC=C4OC3=O",1.52E-02 +"C1(C=O)=CC=C(OC2=CC=CC=C2)C=C1",2.32E-02 +"CO",9.17E+02 +"OC(C)C",1.44E+02 +"CC(=O)C",1.23E+02 +"ClC(Cl)Cl",5.92E-01 +"CS(=O)C",4.35E+02 +"ClC(C(Cl)(Cl)Cl)(Cl)Cl",6.00E-03 +"OC1=C(C=C(C(=C1CC2=C(C(=CC(=C2Cl)Cl)Cl)O)Cl)Cl)Cl",5.16E-05 +"C1=CC(=CC=C1N)C(=O)CC",9.79E-01 +"OCCC",7.57E+01 +"CCCCO",2.33E+01 +"CCCCCO",5.36E+00 +"C1=CC=CC=C1",2.25E-01 +"CC(Cl)(Cl)Cl",3.55E-01 +"[S-]C1=NC(C(C(C)CCC)(CC)C(N1)=O)=O.[Na+]",9.91E-02 +"CC#N",4.01E+01 +"CC=O",7.67E-01 +"ClCCl",3.89E+00 +"IC(I)I",7.42E-03 +"[N+](C)(C)(C)C.[Cl-]",4.22E+00 +"CC(C)(C)O",8.65E+01 +"C(F)(F)(F)CO",1.19E+00 +"CC(=O)C(C)(C)C",8.69E-01 +"ClC(C(Cl)Cl)(Cl)Cl",3.72E-02 +"CC1(C)NC(=O)NC1=O",1.29E+02 +"CCC(O)(C)CC",6.58E+00 +"C#CC(O)(C)CC",1.24E+01 +"C1CCCC(C#C)(O)C1",2.06E+00 +"CCCCOCCOP(=O)(OCCOCCCC)OCCOCCCC",2.81E-02 +"OCC(C)C",1.93E+01 +"CC(Cl)CCl",1.12E+00 +"NCC(N)C",1.36E+01 +"CC(O)CC",4.95E+01 +"CCC(=O)C",4.47E+01 +"OC(C)CN",3.36E+01 +"ClC(CCl)Cl",6.12E-01 +"ClC(=CCl)Cl",3.36E-01 +"CC(=O)OC",4.82E+00 +"ClC(C(Cl)Cl)Cl",1.21E-01 +"C1(C)(C)CCCC(C)=C1C=CC(C)=O",2.65E-02 +"ClC1=C(O)C(Cl)=CC(=C1)C(C2=CC(Cl)=C(O)C(=C2)Cl)(C)C",3.63E-03 +"C(C1C=CC(=CC=1)O)(CC)(C)C",1.58E-02 +"C1CC(CCC1(N)C)C(C)(N)C",3.83E-01 +"ClC(Cl)C1=C(Cl)C=CC=C1Cl",4.22E-03 +"C1=CC=C2C=CC=C3C2=C1CC3",1.12E-02 +"CC1=CNC2=C1C=CC=C2",6.74E-02 +"C1=CC=CC=C1OC(=O)C2=CC=CC=C2C(=O)OC3=CC=CC=C3",2.51E-04 +"O=C1C2=C(C=CC=C2)C(=O)C3=C1C=CC=C3", +"CCOC(=O)C1=CC=CC=C1C(=O)OCC",1.43E-01 +"C1=CC=C(C(=O)OCCCC)C(=C1)C(=O)OCCCC",3.5900E-03 +"CCC1=C(Br)C(Br)=C(Br)C(Br)=C1Br", +"O=C1C2=C(C=CC=C2)N=NN1CSP(=S)(OC)OC",2.02E-04 +"C1=CC=CC=C1NC(=O)C2=C(O)C=CC=C2",1.85E-02 +"Cl\C(Cl)=C(Cl)/C(Cl)=C(Cl)\Cl",3.45E-04 +"OC1=C(C(=C(C(=C1Cl)Cl)Cl)Cl)Cl",9.12E-04 +"OC1=C(C=C(C=C1Cl)Cl)Cl",2.48E-02 +"OC1=CC(C(F)(F)F)=C([N+]([O-])=O)C=C1",4.41E-02 +"C1(N)=CC=CC=C1C(=O)N",2.90E+00 +"OC1=C([N+]([O-])=O)C=CC=C1",1.15E+00 diff --git a/test/lazar-physchem-long.rb b/test/lazar-physchem-long.rb new file mode 100644 index 0000000..0b2820d --- /dev/null +++ b/test/lazar-physchem-long.rb @@ -0,0 +1,52 @@ +require_relative "setup.rb" + +class LazarPhyschemDescriptorTest < MiniTest::Test + + def test_lazar_pc_descriptors + + # check available descriptors + desc = OpenTox::Algorithm::Descriptor.physchem_descriptors.keys + assert_equal 111,desc.size,"wrong num physchem descriptors" + sum = 0 + {"Openbabel"=>16,"Cdk"=>50,"Joelib"=>45}.each do |k,v| + assert_equal v,desc.select{|x| x=~/^#{k}\./}.size,"wrong num #{k} descriptors" + sum += v + end + assert_equal 111,sum + + # select descriptors for test + desc.keep_if{|x| x=~/^Openbabel\./} + unless defined?($short_tests) + # the actual descriptor calculation is rather fast, computing 3D structures takes time + desc += ["Cdk.XLogP", "Cdk.WienerNumbers", "Joelib.LogP", "Joelib.count.HeteroCycles"] + end + puts "descriptors for modeling: #{desc}" + + dataset = OpenTox::Dataset.new + dataset.upload File.join(DATA_DIR,"EPAFHM.medi.csv") + assert_equal dataset.uri.uri?, true + puts dataset.uri + + model_uri = OpenTox::Model::Lazar.create :dataset_uri => dataset.uri, :feature_generation_uri => File.join($algorithm[:uri],"descriptor","physchem"), :descriptors => desc + puts model_uri + model = OpenTox::Model::Lazar.new model_uri + assert_equal model_uri.uri?, true + puts model.predicted_variable + + compound_uri = "#{$compound[:uri]}/InChI=1S/C13H8Cl2O2/c14-12-5-4-11(7-13(12)15)17-10-3-1-2-9(6-10)8-16/h1-8H" + prediction_uri = model.predict :compound_uri => compound_uri + prediction = OpenTox::Dataset.new prediction_uri + assert_equal prediction.uri.uri?, true + puts prediction.uri + + assert prediction.features.collect{|f| f.uri}.include?(model.predicted_variable),"prediction feature #{model.predicted_variable} not included prediction dataset #{prediction.features.collect{|f| f.uri}}" + assert prediction.compounds.collect{|c| c.uri}.include?(compound_uri),"compound #{compound_uri} not included in prediction dataset #{prediction.compounds.collect{|c| c.uri}}" + assert_equal 1,prediction.compound_indices(compound_uri).size,"compound should only be once in the dataset" + + predicted_value = prediction.data_entry_value(prediction.compound_indices(compound_uri).first,model.predicted_variable) + puts predicted_value + assert predicted_value > 0.005,"predicted values should be above 0.005, is #{predicted_value}" + assert predicted_value < 0.1,"predicted values should be below 0.1, is #{predicted_value}" + end + +end diff --git a/test/lazar-physchem-short.rb b/test/lazar-physchem-short.rb new file mode 100644 index 0000000..5b00231 --- /dev/null +++ b/test/lazar-physchem-short.rb @@ -0,0 +1,3 @@ +$short_tests = true + +require File.join(File.expand_path(File.dirname(__FILE__)),"lazar-physchem-long.rb") diff --git a/test/lazar-physchem.rb b/test/lazar-physchem.rb deleted file mode 100644 index 066460a..0000000 --- a/test/lazar-physchem.rb +++ /dev/null @@ -1,22 +0,0 @@ -require_relative "setup.rb" - -class LazarPhyschemDescriptorTest < MiniTest::Test - - def test_lazar_pc_descriptors - dataset = OpenTox::Dataset.new - dataset.upload File.join(DATA_DIR,"EPAFHM.mini.csv") - assert_equal dataset.uri.uri?, true - - model_uri = OpenTox::Model::Lazar.create :dataset_uri => dataset.uri, :feature_generation_uri => File.join($algorithm[:uri],"descriptor","physchem"), :descriptors => [ "Openbabel.atoms", "Openbabel.bonds", "Openbabel.dbonds", "Openbabel.HBA1", "Openbabel.HBA2", "Openbabel.HBD", "Openbabel.MP", "Openbabel.MR", "Openbabel.MW", "Openbabel.nF", "Openbabel.sbonds", "Openbabel.tbonds", "Openbabel.TPSA"] - - puts model_uri - model = OpenTox::Model::Lazar.new model_uri - assert_equal model_uri.uri?, true - prediction_uri = model.predict :compound_uri => "#{$compound[:uri]}/InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H" - prediction = OpenTox::Dataset.new prediction_uri - assert_equal prediction.uri.uri?, true - #TODO check correct prediction - puts prediction.uri - end - -end diff --git a/test/task.rb b/test/task.rb index 4ee07c1..df78b1a 100644 --- a/test/task.rb +++ b/test/task.rb @@ -189,6 +189,7 @@ class TaskTest < MiniTest::Test assert ex.is_a?(test[:error]),"error type should be a #{test[:error]}, but is a #{ex.class}" assert ex.message=~/#{test[:msg]}/,"message should be #{test[:msg]}, but is #{ex.message}" assert ex.error_cause=~/test.rb:#{test[:line]}/,"code line number test.rb:#{test[:line]} is lost or wrong: #{ex.error_cause}" + assert ex.uri==test[:uri] end end end diff --git a/test/validation-long.rb b/test/validation-long.rb index 1a458a2..88ee315 100644 --- a/test/validation-long.rb +++ b/test/validation-long.rb @@ -26,21 +26,30 @@ DATA = [] # :feat => "http://apps.ideaconsult.net:8080/ambit2/feature/533748", # :info => "http://apps.ideaconsult.net:8080/ambit2/dataset/435293?page=0&pagesize=300" } -HAMSTER_CV_FEATURE_TYPES = ["bbrc"] FILES = { File.new(File.join(test_path,"data","hamster_carcinogenicity.csv")) => :split_validation, - #File.new("data/EPAFHM.medi.csv") => :split_validation - } + File.new(File.join(test_path,"data","EPAFHM.medi.csv")) => :split_validation, +} unless defined?($short_tests) - #HAMSTER_CV_FEATURE_TYPES = ["bbrc", "last"] FILES.merge!({ File.new(File.join(test_path,"data","hamster_carcinogenicity.csv")) => :crossvalidation, - # File.new("data/EPAFHM.csv") => :crossvalidation, - # File.new("data/hamster_carcinogenicity.csv") => :bootstrap_validation + File.new("data/EPAFHM.medi.csv") => :crossvalidation, + # File.new("data/hamster_carcinogenicity.csv") => :bootstrap_validation }) end +FEAT_GEN = {} +FILES.each do |f,t| + if f.path=~/hamster/ + FEAT_GEN[f] = [ File.join($algorithm[:uri],"fminer/bbrc") ] #FEAT_GEN[f] << File.join($algorithm[:uri],"fminer/last") + elsif f.path=~/EPAFHM/ + FEAT_GEN[f] = [ File.join($algorithm[:uri],"descriptor","physchem") ] + else + raise "please define feature generation uri for dataset: #{f.path}" + end +end + class ValidationTest < MiniTest::Test i_suck_and_my_tests_are_order_dependent! @@ -49,10 +58,17 @@ class ValidationTest < MiniTest::Test puts "login and upload datasets" OpenTox::RestClientWrapper.subjectid ? puts("logged in: "+OpenTox::RestClientWrapper.subjectid.to_s) : puts("AA disabled") FILES.each do |file,type| - DATA << { :type => type, - :data => ValidationTestUtil.upload_dataset(file), - :feat => ValidationTestUtil.prediction_feature_for_file(file), - :info => file.path, :delete => true} + data = { :type => type, + :data => ValidationTestUtil.upload_dataset(file), + :feat => ValidationTestUtil.prediction_feature_for_file(file), + :split_ratio => (file.path=~/EPAFHM/ ? 0.98 : 0.9),#only used for split_validation + :info => file.path, :delete => true} + FEAT_GEN[file].each do |feat_gen| + data[:alg_params] = "feature_generation_uri="+feat_gen ++ data[:alg_params] << ";backbone=false;min_chisq_significance=0.0" if feat_gen=~/fminer/ and data[:info] =~ /mini/ + data[:alg_params] << ";descriptors="+[ "Openbabel.atoms", "Openbabel.bonds", "Openbabel.dbonds", "Openbabel.HBA1", "Openbabel.HBA2", "Openbabel.HBD", "Openbabel.MP", "Openbabel.MR", "Openbabel.MW", "Openbabel.nF", "Openbabel.sbonds", "Openbabel.tbonds", "Openbabel.TPSA"].join(",") if feat_gen=~/physchem/ + DATA << data + end end end @@ -88,7 +104,7 @@ class ValidationTest < MiniTest::Test p = { :dataset_uri => data[:data], :algorithm_uri => File.join($algorithm[:uri],"lazar"), - :algorithm_params => "feature_generation_uri="+File.join($algorithm[:uri],"fminer/bbrc"), + :algorithm_params => data[:alg_params], :prediction_feature => data[:feat], :random_seed => 2} t = OpenTox::SubTask.new(nil,0,1) @@ -130,9 +146,9 @@ class ValidationTest < MiniTest::Test p = { :dataset_uri => data[:data], :algorithm_uri => File.join($algorithm[:uri],"lazar"), - :algorithm_params => "feature_generation_uri="+File.join($algorithm[:uri],"fminer/bbrc"), + :algorithm_params => data[:alg_params], :prediction_feature => data[:feat], - :split_ratio => 0.9, + :split_ratio => data[:split_ratio], :random_seed => 2} t = OpenTox::SubTask.new(nil,0,1) def t.progress(pct) @@ -161,7 +177,7 @@ class ValidationTest < MiniTest::Test train_compounds = OpenTox::Dataset.find(v.metadata[RDF::OT.trainingDataset.to_s]).compounds test_compounds = OpenTox::Dataset.find(v.metadata[RDF::OT.testDataset.to_s]).compounds orig_compounds = OpenTox::Dataset.find(data[:data]).compounds - assert_equal((orig_compounds.size*0.9).round,train_compounds.size) + assert_equal((orig_compounds.size*data[:split_ratio]).round,train_compounds.size) assert_equal(orig_compounds.size,(train_compounds+test_compounds).size) assert_equal(orig_compounds.uniq.size,(train_compounds+test_compounds).uniq.size) @@ -186,7 +202,7 @@ class ValidationTest < MiniTest::Test :training_dataset_uri => data[:train_data], :test_dataset_uri => data[:test_data], :algorithm_uri => File.join($algorithm[:uri],"lazar"), - :algorithm_params => "feature_generation_uri="+File.join($algorithm[:uri],"fminer/bbrc"), + :algorithm_params => data[:alg_params], :prediction_feature => data[:feat]} t = OpenTox::SubTask.new(nil,0,1) def t.progress(pct) @@ -273,56 +289,52 @@ class ValidationTest < MiniTest::Test @@cv_identifiers = [] DATA.each do |data| if data[:type]==:crossvalidation - HAMSTER_CV_FEATURE_TYPES.each do |fminer| - next unless (fminer==HAMSTER_CV_FEATURE_TYPES[0] or data[:info].to_s =~ /hamster_carcinogenicity.csv/) - puts "test_crossvalidation "+data[:info].to_s+" "+fminer - p = { - :dataset_uri => data[:data], - :algorithm_uri => File.join($algorithm[:uri],"lazar"), - :algorithm_params => "feature_generation_uri="+File.join($algorithm[:uri],"fminer/"+fminer)+ - (data[:info] =~ /mini/ ? ";backbone=false;min_chisq_significance=0.0" : ""), - :prediction_feature => data[:feat], - :num_folds => 10 } - #:num_folds => 2 } - t = OpenTox::SubTask.new(nil,0,1) - def t.progress(pct) - if !defined?@last_msg or @last_msg+10<Time.new - puts "waiting for crossvalidation: "+pct.to_s - @last_msg=Time.new - end - end - def t.waiting_for(task_uri); end - cv = OpenTox::Crossvalidation.create(p, t) - assert cv.uri.uri? - if $aa[:uri] - assert_unauthorized do - OpenTox::Crossvalidation.find(cv.uri) - end + puts "test_crossvalidation "+data[:info].to_s+" "+data[:alg_params] + p = { + :dataset_uri => data[:data], + :algorithm_uri => File.join($algorithm[:uri],"lazar"), + :algorithm_params => data[:alg_params], + :prediction_feature => data[:feat], + :num_folds => 10 } + #:num_folds => 2 } + t = OpenTox::SubTask.new(nil,0,1) + def t.progress(pct) + if !defined?@last_msg or @last_msg+10<Time.new + puts "waiting for crossvalidation: "+pct.to_s + @last_msg=Time.new end - cv = OpenTox::Crossvalidation.find(cv.uri) - assert_valid_date cv - assert cv.uri.uri? - stats_val = cv.statistics - assert_kind_of OpenTox::Validation,stats_val - assert_prob_correct(stats_val) - - algorithm = cv.metadata[RDF::OT.algorithm.to_s] - assert algorithm.uri? - cv_list = OpenTox::Crossvalidation.list( {:algorithm => algorithm} ) - assert cv_list.include?(cv.uri) - cv_list.each do |cv_uri| - #begin catch not authorized somehow - alg = OpenTox::Crossvalidation.find(cv_uri).metadata[RDF::OT.algorithm.to_s] - assert alg==algorithm,"wrong algorithm for filtered crossvalidation, should be: '"+algorithm.to_s+"', is: '"+alg.to_s+"'" - #rescue - #end + end + def t.waiting_for(task_uri); end + cv = OpenTox::Crossvalidation.create(p, t) + assert cv.uri.uri? + if $aa[:uri] + assert_unauthorized do + OpenTox::Crossvalidation.find(cv.uri) end - puts cv.uri unless defined?(DELETE) and DELETE - - @@cvs << cv - @@cv_datasets << data - @@cv_identifiers << fminer end + cv = OpenTox::Crossvalidation.find(cv.uri) + assert_valid_date cv + assert cv.uri.uri? + stats_val = cv.statistics + assert_kind_of OpenTox::Validation,stats_val + assert_prob_correct(stats_val) + + algorithm = cv.metadata[RDF::OT.algorithm.to_s] + assert algorithm.uri? + cv_list = OpenTox::Crossvalidation.list( {:algorithm => algorithm} ) + assert cv_list.include?(cv.uri) + cv_list.each do |cv_uri| + #begin catch not authorized somehow + alg = OpenTox::Crossvalidation.find(cv_uri).metadata[RDF::OT.algorithm.to_s] + assert alg==algorithm,"wrong algorithm for filtered crossvalidation, should be: '"+algorithm.to_s+"', is: '"+alg.to_s+"'" + #rescue + #end + end + puts cv.uri unless defined?(DELETE) and DELETE + + @@cvs << cv + @@cv_datasets << data + @@cv_identifiers << data[:alg_params] end end end diff --git a/test/validation_util.rb b/test/validation_util.rb index d8373f4..0e82aec 100755 --- a/test/validation_util.rb +++ b/test/validation_util.rb @@ -35,6 +35,7 @@ class ValidationTestUtil end def self.prediction_feature_for_file(file) + raise "no prediction feature available for #{file.path}" unless @@prediction_features[file.path.to_s] @@prediction_features[file.path.to_s] end |