From 6f89d7448d861f665d6c506e27db31984bd62808 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 9 May 2011 16:11:28 +0200 Subject: moved validation-unit-tests to test repository --- Rakefile | 2 +- data/EPAFHM.mini.csv | 21 +++ data/hamster_carcinogenicity.mini.csv | 11 ++ validation.rb | 335 ++++++++++++++++++++++++++++++---- validation_util.rb | 46 +++++ 5 files changed, 379 insertions(+), 36 deletions(-) create mode 100644 data/EPAFHM.mini.csv create mode 100644 data/hamster_carcinogenicity.mini.csv create mode 100755 validation_util.rb diff --git a/Rakefile b/Rakefile index 42f8b15..49b72f8 100644 --- a/Rakefile +++ b/Rakefile @@ -31,7 +31,7 @@ task :teardown do end #[:all, :feature, :dataset, :fminer, :lazar, :authorization, :validation].each do |t| -[:all, :feature, :dataset, :fminer, :lazar, :authorization, :parser].each do |t| +[:all, :feature, :dataset, :fminer, :lazar, :authorization, :parser, :validation ].each do |t| task :teardown => t task t => :setup end diff --git a/data/EPAFHM.mini.csv b/data/EPAFHM.mini.csv new file mode 100644 index 0000000..c86cd33 --- /dev/null +++ b/data/EPAFHM.mini.csv @@ -0,0 +1,21 @@ +"STRUCTURE_SMILES","LC50_mmol" +"C1=CC(C=O)=CC(OC)=C1OCCCCCC",1.13E-02 +"C1(OC)=C([N+]([O-])=O)C(C=O)=CC(Br)=C1O",2.66E-01 +"CCCCCCCCOC(=O)C1=CC=CC(C(=O)OCCCCCCCC)=C1", +"C1=CC(Cl)=CC=C1OC2=C([N+](=O)[O-])C=CC=C2",7.69E-03 +"CC1=C(NC=O)C=CC=C1Cl",2.75E-01 +"CCCCOC(=O)C1=CC=CC(C(=O)OCCCC)=C1",3.23E-03 +"C(C1=CC=CC=C1)(C2=CC=CC=C2)(O)C#C",5.33E-02 +"CCCSCCSCCC",4.22E-02 +"CCCCCCCCOC(=O)C1=CC=C(C(=O)OCCCCCCCC)C=C1", +"OCCCCOC(=O)C1=CC=CC=C1C(=O)OCCCCOC(=O)C2=CC=CC=C2C(=O)OCCCCO", +"CCCSCCCCSCCC",1.45E-02 +"C1([N+](=O)[O-])=CC=C(C)C=C1OP(=O)(OC2=C([N+](=O)[O-])C=CC(C)=C2)OC3=C([N+]([O-])=O)C=CC(C)=C3", +"C1=C([N+]([O-])=O)C=CC=C1P(=O)(C2=CC([N+](=O)[O-])=CC=C2)C3=CC([N+](=O)[O-])=CC=C3", +"ClCCOC(=O)NC1CCCCC1",1.70E-01 +"O=C1C(C2=CC=CC=C2)(C(=O)NC(=O)N1)CC",2.08E+00 +"OC1=C(C=C(C=C1)[N+](=O)[O-])[N+](=O)[O-]",5.92E-02 +"NC(=O)OCC",5.88E+01 +"[O-]C(C1=CC=CC=C1O)=O.[Na+]",1.25E+01 +"C1=CC=CC=C1C(=O)N",5.46E+00 +"CC[N+](CC)(CC)CC1(=CC=CC=C1).[Cl-]",7.07E-01 diff --git a/data/hamster_carcinogenicity.mini.csv b/data/hamster_carcinogenicity.mini.csv new file mode 100644 index 0000000..4267235 --- /dev/null +++ b/data/hamster_carcinogenicity.mini.csv @@ -0,0 +1,11 @@ +SMILES, Hamster Carcinogenicity +CC=O,1 +C12C3=C(C=CC=C3)CC1=CC(=CC=2)NC(C)=O,1 +O=C(N)\C(C2=CC=CO2)=C/C1=CC=C([N+]([O-])=O)O1,1 +C1(N=CNN=1)N,0 +Br(=O)(=O)[O-].[K+],1 +[Cl-].[Cd+2].[Cl-],0 +O=S(=O)([O-])[O-].[Cd+2],0 +ClC1=CC(=NC(=N1)SCC(=O)O)NC2=CC=CC(=C2C)C,0 +ClCOC,1 +C=C(Cl)C=C,0 diff --git a/validation.rb b/validation.rb index beb1c02..0946cf9 100644 --- a/validation.rb +++ b/validation.rb @@ -1,45 +1,310 @@ -require 'rubygems' -require 'opentox-ruby' +ENV['RACK_ENV'] = 'production' + require 'test/unit' +require 'validation_util.rb' + +#TEST_USER = "guest" +#TEST_PW = "guest" + +#LOGGER = OTLogger.new(STDOUT) +#LOGGER.datetime_format = "%Y-%m-%d %H:%M:%S " +#LOGGER.formatter = Logger::Formatter.new -class Float - def round_to(x) - (self * 10**x).round.to_f / 10**x +module Sinatra + set :raise_errors, false + set :show_exceptions, false +end + +class Exception + def message + errorCause ? errorCause.to_yaml : to_s end end class ValidationTest < Test::Unit::TestCase + + def global_setup + puts "login and upload datasets" + if AA_SERVER + @@subjectid = OpenTox::Authorization.authenticate(TEST_USER,TEST_PW) + raise "could not log in" unless @@subjectid + puts "logged in: "+@@subjectid.to_s + else + puts "AA disabled" + @@subjectid = nil + end + + @@data = [] + files = { File.new("data/hamster_carcinogenicity.mini.csv") => :crossvalidation, + File.new("data/EPAFHM.mini.csv") => :crossvalidation, + File.new("data/hamster_carcinogenicity.csv") => :validation, + File.new("data/EPAFHM.csv") => :validation, +# File.new("data/StJudes-HepG2-testset_Class.csv") => :crossvalidation + } + files.each do |file,type| + @@data << { :type => type, + :data => ValidationTestUtil.upload_dataset(file, @@subjectid), + :feat => ValidationTestUtil.prediction_feature_for_file(file), + :info => file.path, :delete => true} + end +# @@data << { :type => :crossvalidation, +# :data => "http://apps.ideaconsult.net:8080/ambit2/dataset/9?max=50", +# :feat => "http://apps.ideaconsult.net:8080/ambit2/feature/21573", +# :info => "http://apps.ideaconsult.net:8080/ambit2/dataset/9?max=50" } +# @@data << { :type => :validation, +# :data => "http://apps.ideaconsult.net:8080/ambit2/dataset/272?max=50", +# :feat => "http://apps.ideaconsult.net:8080/ambit2/feature/26221", +# :info => "http://apps.ideaconsult.net:8080/ambit2/dataset/272?max=50" } + end + + def global_teardown + puts "delete and logout" + @@data.each{|data| OpenTox::Dataset.find(data[:data],@@subjectid).delete(@@subjectid) if data[:delete]} + @@vs.each{|v| v.delete(@@subjectid)} if defined?@@vs + @@cvs.each{|cv| cv.delete(@@subjectid)} if defined?@@cvs + @@reports.each{|report| report.delete(@@subjectid)} if defined?@@reports + @@qmrfReports.each{|qmrfReport| qmrfReport.delete(@@subjectid)} if defined?@@qmrfReports + OpenTox::Authorization.logout(@@subjectid) if AA_SERVER + end + + def test_validation_list + puts "test_validation_list" + list = OpenTox::Validation.list + assert list.is_a?(Array) + list.each do |l| + assert l.uri? + end + end + + def test_training_test_split + + @@vs = [] + @@data.each do |data| + if data[:type]==:validation + puts "test_training_test_split "+data[:info].to_s + p = { + :dataset_uri => data[:data], + :algorithm_uri => File.join(CONFIG[:services]["opentox-algorithm"],"lazar"), + :algorithm_params => "feature_generation_uri="+File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc"), + :prediction_feature => data[:feat], + :split_ratio => 0.95, + :random_seed => 2} + t = OpenTox::SubTask.new(nil,0,1) + def t.progress(pct) + if !defined?@last_msg or @last_msg+3 model} ) + assert v_list.size==1 and v_list.include?(v.uri) + @@vs << v + end + end + end + + def test_validation_report + #@@cv = OpenTox::Crossvalidation.find("http://local-ot/validation/crossvalidation/48", @@subjectid) + + @@reports = [] unless defined?@@reports + @@vs.each do |v| + puts "test_validation_report" + assert defined?v,"no validation defined" + assert_kind_of OpenTox::Validation,v + if @@subjectid + assert_rest_call_error OpenTox::NotAuthorizedError do + OpenTox::CrossvalidationReport.create(v.uri) + end + end + report = OpenTox::ValidationReport.find_for_validation(v.uri,@@subjectid) + assert report==nil,"report already exists for validation\nreport: "+(report ? report.uri.to_s : "")+"\nvalidation: "+v.uri.to_s + report = OpenTox::ValidationReport.create(v.uri,@@subjectid) + assert report.uri.uri? + if @@subjectid + assert_rest_call_error OpenTox::NotAuthorizedError do + OpenTox::CrossvalidationReport.find(report.uri) + end + end + report = OpenTox::ValidationReport.find(report.uri,@@subjectid) + assert report.uri.uri? + report2 = OpenTox::ValidationReport.find_for_validation(v.uri,@@subjectid) + assert_equal report.uri,report2.uri + report3_uri = v.find_or_create_report(@@subjectid) + assert_equal report.uri,report3_uri + @@reports << report2 + end + end + + def test_crossvalidation_list + puts "test_crossvalidation_list" + list = OpenTox::Crossvalidation.list + assert list.is_a?(Array) + list.each do |l| + assert l.uri? + end + end -=begin -=end def test_crossvalidation -=begin -=end - puts "creating model ..." - model_uri = OpenTox::Algorithm::Lazar.new.run({:dataset_uri => @@classification_training_dataset.uri, :subjectid => @@subjectid}).to_s - lazar = OpenTox::Model::Lazar.find model_uri, @@subjectid - puts @@classification_training_dataset.features.to_yaml - params = { - :algorithm_uri => File.join(CONFIG[:services]["opentox-algorithm"],"lazar"), - :dataset_uri => lazar.parameter("dataset_uri"), - #:dataset_uri => @@classification_training_dataset.uri, - :subjectid => @@subjectid, - #:prediction_feature => @@classification_training_dataset.features.keys.first, - #:algorithm_params => "feature_generation_uri=#{File.join(CONFIG[:services]["opentox-algorithm"],"bbrc")}" - :prediction_feature => lazar.parameter("prediction_feature"), - :algorithm_params => "feature_generation_uri=#{lazar.parameter("feature_generation_uri")}" - } - puts params.to_yaml - cv = OpenTox::Validation.create_crossvalidation(params) - puts cv.uri - #@@subjectid = OpenTox::Authorization.authenticate(TEST_USER,TEST_PW) - cv = OpenTox::Validation.new "http://opentox.informatik.uni-freiburg.de/validation/crossvalidation/6" - puts cv.uri - #puts cv.create_report(@@subjectid) - #puts cv.create_qmrf_report(@@subjectid) - #v = YAML.load OpenTox::RestClientWrapper.get(cv.uri,{:accept => "application/x-yaml", :subjectid => @@subjectid}).to_s - v = YAML.load OpenTox::RestClientWrapper.get(File.join(cv.uri, 'statistics'),{:accept => "application/x-yaml", :subjectid => @@subjectid}).to_s - puts v.to_yaml - #puts cv.summary("classification",@@subjectid) + + #assert_rest_call_error OpenTox::NotFoundError do + # OpenTox::Crossvalidation.find(File.join(CONFIG[:services]["opentox-validation"],"crossvalidation/noexistingid")) + #end + @@cvs = [] + @@data.each do |data| + if data[:type]==:crossvalidation + puts "test_crossvalidation "+data[:info].to_s + p = { + :dataset_uri => data[:data], + :algorithm_uri => File.join(CONFIG[:services]["opentox-algorithm"],"lazar"), + :algorithm_params => "feature_generation_uri="+File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc"), + :prediction_feature => data[:feat], + :num_folds => 2 } + t = OpenTox::SubTask.new(nil,0,1) + def t.progress(pct) + if !defined?@last_msg or @last_msg+3 algorithm} ) + assert cv_list.include?(cv.uri) + cv_list.each do |cv_uri| + alg = OpenTox::Crossvalidation.find(cv_uri).metadata[OT.algorithm] + assert alg==algorithm,"wrong algorithm for filtered crossvalidation, should be: '"+algorithm.to_s+"', is: '"+alg.to_s+"'" + end + + @@cvs << cv + end + end + end + + def test_crossvalidation_report + #@@cv = OpenTox::Crossvalidation.find("http://local-ot/validation/crossvalidation/48", @@subjectid) + + @@reports = [] unless defined?@@reports + @@cvs.each do |cv| + puts "test_crossvalidation_report" + assert defined?cv,"no crossvalidation defined" + assert_kind_of OpenTox::Crossvalidation,cv + #assert_rest_call_error OpenTox::NotFoundError do + # OpenTox::CrossvalidationReport.find_for_crossvalidation(cv.uri) + #end + if @@subjectid + assert_rest_call_error OpenTox::NotAuthorizedError do + OpenTox::CrossvalidationReport.create(cv.uri) + end + end + assert OpenTox::ValidationReport.find_for_validation(cv.uri,@@subjectid)==nil + report = OpenTox::CrossvalidationReport.create(cv.uri,@@subjectid) + assert report.uri.uri? + if @@subjectid + assert_rest_call_error OpenTox::NotAuthorizedError do + OpenTox::CrossvalidationReport.find(report.uri) + end + end + report = OpenTox::CrossvalidationReport.find(report.uri,@@subjectid) + assert report.uri.uri? + report2 = OpenTox::CrossvalidationReport.find_for_crossvalidation(cv.uri,@@subjectid) + assert_equal report.uri,report2.uri + report3_uri = cv.find_or_create_report(@@subjectid) + assert_equal report.uri,report3_uri + @@reports << report2 + end + end + + def test_qmrf_report + #@@cv = OpenTox::Crossvalidation.find("http://local-ot/validation/crossvalidation/13", @@subjectid) + + @@qmrfReports = [] + @@cvs.each do |cv| + puts "test_qmrf_report" + assert defined?cv,"no crossvalidation defined" + validations = cv.metadata[OT.validation] + assert_kind_of Array,validations + assert validations.size==cv.metadata[OT.numFolds].to_i,validations.size.to_s+"!="+cv.metadata[OT.numFolds].to_s + val = OpenTox::Validation.find(validations[0], @@subjectid) + model_uri = val.metadata[OT.model] + + model = OpenTox::Model::Generic.find(model_uri, @@subjectid) + assert model!=nil + + #assert_rest_call_error OpenTox::NotFoundError do + # OpenTox::QMRFReport.find_for_model(model_uri, @@subjectid) + #end + + @@qmrfReports << OpenTox::QMRFReport.create(model_uri, @@subjectid) + end + end + + ################### utils and overrides ########################## + + def app + Sinatra::Application end + + # checks RestCallError type + def assert_rest_call_error( ex ) + if ex==OpenTox::NotAuthorizedError and @@subjectid==nil + puts "AA disabled: skipping test for not authorized" + return + end + begin + yield + rescue OpenTox::RestCallError => e + report = e.errorCause + while report.errorCause + report = report.errorCause + end + assert_equal report.errorType,ex.to_s + end + end + + # hack to have a global_setup and global_teardown + def teardown + if((@@expected_test_count-=1) == 0) + global_teardown + end + end + def setup + unless defined?@@expected_test_count + @@expected_test_count = (self.class.instance_methods.reject{|method| method[0..3] != 'test'}).length + global_setup + end + end + end + + diff --git a/validation_util.rb b/validation_util.rb new file mode 100755 index 0000000..a5206b3 --- /dev/null +++ b/validation_util.rb @@ -0,0 +1,46 @@ + +require 'test/unit' + + class ValidationTestUtil + + @@dataset_uris = {} + @@prediction_features = {} + + def self.upload_dataset(file, subjectid=nil, dataset_service=CONFIG[:services]["opentox-dataset"]) #, file_type="application/x-yaml") + raise "File not found: "+file.path.to_s unless File.exist?(file.path) + if @@dataset_uris[file.path.to_s]==nil + LOGGER.debug "uploading file: "+file.path.to_s + if (file.path =~ /yaml$/) + data = File.read(file.path) + #data_uri = OpenTox::RestClientWrapper.post(dataset_service,{:content_type => file_type},data).to_s.chomp + #@@dataset_uris[file.path.to_s] = data_uri + #LOGGER.debug "uploaded dataset: "+data_uri + d = OpenTox::Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid) + d.load_yaml(data) + d.save( subjectid ) + @@dataset_uris[file.path.to_s] = d.uri + elsif (file.path =~ /csv$/) + d = OpenTox::Dataset.create_from_csv_file(file.path, subjectid) + raise "num features not 1 (="+d.features.keys.size.to_s+"), what to predict??" if d.features.keys.size != 1 + @@prediction_features[file.path.to_s] = d.features.keys[0] + @@dataset_uris[file.path.to_s] = d.uri + elsif (file.path =~ /rdf$/) + d = OpenTox::Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid) + d.load_rdfxml_file(file, subjectid) + d.save(subjectid) + @@dataset_uris[file.path.to_s] = d.uri + else + raise "unknown file type: "+file.path.to_s + end + LOGGER.debug "uploaded dataset: "+d.uri + else + LOGGER.debug "file already uploaded: "+@@dataset_uris[file.path.to_s] + end + return @@dataset_uris[file.path.to_s] + end + + def self.prediction_feature_for_file(file) + @@prediction_features[file.path.to_s] + end + + end -- cgit v1.2.3