summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2011-05-09 16:11:28 +0200
committermguetlein <martin.guetlein@gmail.com>2011-05-09 16:11:28 +0200
commit6f89d7448d861f665d6c506e27db31984bd62808 (patch)
tree761dc6d1c59acef5d423c1cb8f4010fd94757960
parent73bfe27cf9d62593c426d8742df18e3b47388ff1 (diff)
moved validation-unit-tests to test repository
-rw-r--r--Rakefile2
-rw-r--r--data/EPAFHM.mini.csv21
-rw-r--r--data/hamster_carcinogenicity.mini.csv11
-rw-r--r--validation.rb335
-rwxr-xr-xvalidation_util.rb46
5 files changed, 379 insertions, 36 deletions
diff --git a/Rakefile b/Rakefile
index 42f8b15..49b72f8 100644
--- a/Rakefile
+++ b/Rakefile
@@ -31,7 +31,7 @@ task :teardown do
end
#[:all, :feature, :dataset, :fminer, :lazar, :authorization, :validation].each do |t|
-[:all, :feature, :dataset, :fminer, :lazar, :authorization, :parser].each do |t|
+[:all, :feature, :dataset, :fminer, :lazar, :authorization, :parser, :validation ].each do |t|
task :teardown => t
task t => :setup
end
diff --git a/data/EPAFHM.mini.csv b/data/EPAFHM.mini.csv
new file mode 100644
index 0000000..c86cd33
--- /dev/null
+++ b/data/EPAFHM.mini.csv
@@ -0,0 +1,21 @@
+"STRUCTURE_SMILES","LC50_mmol"
+"C1=CC(C=O)=CC(OC)=C1OCCCCCC",1.13E-02
+"C1(OC)=C([N+]([O-])=O)C(C=O)=CC(Br)=C1O",2.66E-01
+"CCCCCCCCOC(=O)C1=CC=CC(C(=O)OCCCCCCCC)=C1",
+"C1=CC(Cl)=CC=C1OC2=C([N+](=O)[O-])C=CC=C2",7.69E-03
+"CC1=C(NC=O)C=CC=C1Cl",2.75E-01
+"CCCCOC(=O)C1=CC=CC(C(=O)OCCCC)=C1",3.23E-03
+"C(C1=CC=CC=C1)(C2=CC=CC=C2)(O)C#C",5.33E-02
+"CCCSCCSCCC",4.22E-02
+"CCCCCCCCOC(=O)C1=CC=C(C(=O)OCCCCCCCC)C=C1",
+"OCCCCOC(=O)C1=CC=CC=C1C(=O)OCCCCOC(=O)C2=CC=CC=C2C(=O)OCCCCO",
+"CCCSCCCCSCCC",1.45E-02
+"C1([N+](=O)[O-])=CC=C(C)C=C1OP(=O)(OC2=C([N+](=O)[O-])C=CC(C)=C2)OC3=C([N+]([O-])=O)C=CC(C)=C3",
+"C1=C([N+]([O-])=O)C=CC=C1P(=O)(C2=CC([N+](=O)[O-])=CC=C2)C3=CC([N+](=O)[O-])=CC=C3",
+"ClCCOC(=O)NC1CCCCC1",1.70E-01
+"O=C1C(C2=CC=CC=C2)(C(=O)NC(=O)N1)CC",2.08E+00
+"OC1=C(C=C(C=C1)[N+](=O)[O-])[N+](=O)[O-]",5.92E-02
+"NC(=O)OCC",5.88E+01
+"[O-]C(C1=CC=CC=C1O)=O.[Na+]",1.25E+01
+"C1=CC=CC=C1C(=O)N",5.46E+00
+"CC[N+](CC)(CC)CC1(=CC=CC=C1).[Cl-]",7.07E-01
diff --git a/data/hamster_carcinogenicity.mini.csv b/data/hamster_carcinogenicity.mini.csv
new file mode 100644
index 0000000..4267235
--- /dev/null
+++ b/data/hamster_carcinogenicity.mini.csv
@@ -0,0 +1,11 @@
+SMILES, Hamster Carcinogenicity
+CC=O,1
+C12C3=C(C=CC=C3)CC1=CC(=CC=2)NC(C)=O,1
+O=C(N)\C(C2=CC=CO2)=C/C1=CC=C([N+]([O-])=O)O1,1
+C1(N=CNN=1)N,0
+Br(=O)(=O)[O-].[K+],1
+[Cl-].[Cd+2].[Cl-],0
+O=S(=O)([O-])[O-].[Cd+2],0
+ClC1=CC(=NC(=N1)SCC(=O)O)NC2=CC=CC(=C2C)C,0
+ClCOC,1
+C=C(Cl)C=C,0
diff --git a/validation.rb b/validation.rb
index beb1c02..0946cf9 100644
--- a/validation.rb
+++ b/validation.rb
@@ -1,45 +1,310 @@
-require 'rubygems'
-require 'opentox-ruby'
+ENV['RACK_ENV'] = 'production'
+
require 'test/unit'
+require 'validation_util.rb'
+
+#TEST_USER = "guest"
+#TEST_PW = "guest"
+
+#LOGGER = OTLogger.new(STDOUT)
+#LOGGER.datetime_format = "%Y-%m-%d %H:%M:%S "
+#LOGGER.formatter = Logger::Formatter.new
-class Float
- def round_to(x)
- (self * 10**x).round.to_f / 10**x
+module Sinatra
+ set :raise_errors, false
+ set :show_exceptions, false
+end
+
+class Exception
+ def message
+ errorCause ? errorCause.to_yaml : to_s
end
end
class ValidationTest < Test::Unit::TestCase
+
+ def global_setup
+ puts "login and upload datasets"
+ if AA_SERVER
+ @@subjectid = OpenTox::Authorization.authenticate(TEST_USER,TEST_PW)
+ raise "could not log in" unless @@subjectid
+ puts "logged in: "+@@subjectid.to_s
+ else
+ puts "AA disabled"
+ @@subjectid = nil
+ end
+
+ @@data = []
+ files = { File.new("data/hamster_carcinogenicity.mini.csv") => :crossvalidation,
+ File.new("data/EPAFHM.mini.csv") => :crossvalidation,
+ File.new("data/hamster_carcinogenicity.csv") => :validation,
+ File.new("data/EPAFHM.csv") => :validation,
+# File.new("data/StJudes-HepG2-testset_Class.csv") => :crossvalidation
+ }
+ files.each do |file,type|
+ @@data << { :type => type,
+ :data => ValidationTestUtil.upload_dataset(file, @@subjectid),
+ :feat => ValidationTestUtil.prediction_feature_for_file(file),
+ :info => file.path, :delete => true}
+ end
+# @@data << { :type => :crossvalidation,
+# :data => "http://apps.ideaconsult.net:8080/ambit2/dataset/9?max=50",
+# :feat => "http://apps.ideaconsult.net:8080/ambit2/feature/21573",
+# :info => "http://apps.ideaconsult.net:8080/ambit2/dataset/9?max=50" }
+# @@data << { :type => :validation,
+# :data => "http://apps.ideaconsult.net:8080/ambit2/dataset/272?max=50",
+# :feat => "http://apps.ideaconsult.net:8080/ambit2/feature/26221",
+# :info => "http://apps.ideaconsult.net:8080/ambit2/dataset/272?max=50" }
+ end
+
+ def global_teardown
+ puts "delete and logout"
+ @@data.each{|data| OpenTox::Dataset.find(data[:data],@@subjectid).delete(@@subjectid) if data[:delete]}
+ @@vs.each{|v| v.delete(@@subjectid)} if defined?@@vs
+ @@cvs.each{|cv| cv.delete(@@subjectid)} if defined?@@cvs
+ @@reports.each{|report| report.delete(@@subjectid)} if defined?@@reports
+ @@qmrfReports.each{|qmrfReport| qmrfReport.delete(@@subjectid)} if defined?@@qmrfReports
+ OpenTox::Authorization.logout(@@subjectid) if AA_SERVER
+ end
+
+ def test_validation_list
+ puts "test_validation_list"
+ list = OpenTox::Validation.list
+ assert list.is_a?(Array)
+ list.each do |l|
+ assert l.uri?
+ end
+ end
+
+ def test_training_test_split
+
+ @@vs = []
+ @@data.each do |data|
+ if data[:type]==:validation
+ puts "test_training_test_split "+data[:info].to_s
+ p = {
+ :dataset_uri => data[:data],
+ :algorithm_uri => File.join(CONFIG[:services]["opentox-algorithm"],"lazar"),
+ :algorithm_params => "feature_generation_uri="+File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc"),
+ :prediction_feature => data[:feat],
+ :split_ratio => 0.95,
+ :random_seed => 2}
+ t = OpenTox::SubTask.new(nil,0,1)
+ def t.progress(pct)
+ if !defined?@last_msg or @last_msg+3<Time.new
+ puts "waiting for training-test-split validation: "+pct.to_s
+ @last_msg=Time.new
+ end
+ end
+ def t.waiting_for(task_uri); end
+ v = OpenTox::Validation.create_training_test_split(p, @@subjectid, t)
+ assert v.uri.uri?
+ if @@subjectid
+ assert_rest_call_error OpenTox::NotAuthorizedError do
+ OpenTox::Crossvalidation.find(v.uri)
+ end
+ end
+ v = OpenTox::Validation.find(v.uri, @@subjectid)
+ assert v.uri.uri?
+
+ model = v.metadata[OT.model]
+ assert model.uri?
+ v_list = OpenTox::Validation.list( {:model => model} )
+ assert v_list.size==1 and v_list.include?(v.uri)
+ @@vs << v
+ end
+ end
+ end
+
+ def test_validation_report
+ #@@cv = OpenTox::Crossvalidation.find("http://local-ot/validation/crossvalidation/48", @@subjectid)
+
+ @@reports = [] unless defined?@@reports
+ @@vs.each do |v|
+ puts "test_validation_report"
+ assert defined?v,"no validation defined"
+ assert_kind_of OpenTox::Validation,v
+ if @@subjectid
+ assert_rest_call_error OpenTox::NotAuthorizedError do
+ OpenTox::CrossvalidationReport.create(v.uri)
+ end
+ end
+ report = OpenTox::ValidationReport.find_for_validation(v.uri,@@subjectid)
+ assert report==nil,"report already exists for validation\nreport: "+(report ? report.uri.to_s : "")+"\nvalidation: "+v.uri.to_s
+ report = OpenTox::ValidationReport.create(v.uri,@@subjectid)
+ assert report.uri.uri?
+ if @@subjectid
+ assert_rest_call_error OpenTox::NotAuthorizedError do
+ OpenTox::CrossvalidationReport.find(report.uri)
+ end
+ end
+ report = OpenTox::ValidationReport.find(report.uri,@@subjectid)
+ assert report.uri.uri?
+ report2 = OpenTox::ValidationReport.find_for_validation(v.uri,@@subjectid)
+ assert_equal report.uri,report2.uri
+ report3_uri = v.find_or_create_report(@@subjectid)
+ assert_equal report.uri,report3_uri
+ @@reports << report2
+ end
+ end
+
+ def test_crossvalidation_list
+ puts "test_crossvalidation_list"
+ list = OpenTox::Crossvalidation.list
+ assert list.is_a?(Array)
+ list.each do |l|
+ assert l.uri?
+ end
+ end
-=begin
-=end
def test_crossvalidation
-=begin
-=end
- puts "creating model ..."
- model_uri = OpenTox::Algorithm::Lazar.new.run({:dataset_uri => @@classification_training_dataset.uri, :subjectid => @@subjectid}).to_s
- lazar = OpenTox::Model::Lazar.find model_uri, @@subjectid
- puts @@classification_training_dataset.features.to_yaml
- params = {
- :algorithm_uri => File.join(CONFIG[:services]["opentox-algorithm"],"lazar"),
- :dataset_uri => lazar.parameter("dataset_uri"),
- #:dataset_uri => @@classification_training_dataset.uri,
- :subjectid => @@subjectid,
- #:prediction_feature => @@classification_training_dataset.features.keys.first,
- #:algorithm_params => "feature_generation_uri=#{File.join(CONFIG[:services]["opentox-algorithm"],"bbrc")}"
- :prediction_feature => lazar.parameter("prediction_feature"),
- :algorithm_params => "feature_generation_uri=#{lazar.parameter("feature_generation_uri")}"
- }
- puts params.to_yaml
- cv = OpenTox::Validation.create_crossvalidation(params)
- puts cv.uri
- #@@subjectid = OpenTox::Authorization.authenticate(TEST_USER,TEST_PW)
- cv = OpenTox::Validation.new "http://opentox.informatik.uni-freiburg.de/validation/crossvalidation/6"
- puts cv.uri
- #puts cv.create_report(@@subjectid)
- #puts cv.create_qmrf_report(@@subjectid)
- #v = YAML.load OpenTox::RestClientWrapper.get(cv.uri,{:accept => "application/x-yaml", :subjectid => @@subjectid}).to_s
- v = YAML.load OpenTox::RestClientWrapper.get(File.join(cv.uri, 'statistics'),{:accept => "application/x-yaml", :subjectid => @@subjectid}).to_s
- puts v.to_yaml
- #puts cv.summary("classification",@@subjectid)
+
+ #assert_rest_call_error OpenTox::NotFoundError do
+ # OpenTox::Crossvalidation.find(File.join(CONFIG[:services]["opentox-validation"],"crossvalidation/noexistingid"))
+ #end
+ @@cvs = []
+ @@data.each do |data|
+ if data[:type]==:crossvalidation
+ puts "test_crossvalidation "+data[:info].to_s
+ p = {
+ :dataset_uri => data[:data],
+ :algorithm_uri => File.join(CONFIG[:services]["opentox-algorithm"],"lazar"),
+ :algorithm_params => "feature_generation_uri="+File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc"),
+ :prediction_feature => data[:feat],
+ :num_folds => 2 }
+ t = OpenTox::SubTask.new(nil,0,1)
+ def t.progress(pct)
+ if !defined?@last_msg or @last_msg+3<Time.new
+ puts "waiting for crossvalidation: "+pct.to_s
+ @last_msg=Time.new
+ end
+ end
+ def t.waiting_for(task_uri); end
+ cv = OpenTox::Crossvalidation.create(p, @@subjectid, t)
+ assert cv.uri.uri?
+ if @@subjectid
+ assert_rest_call_error OpenTox::NotAuthorizedError do
+ OpenTox::Crossvalidation.find(cv.uri)
+ end
+ end
+ cv = OpenTox::Crossvalidation.find(cv.uri, @@subjectid)
+ assert cv.uri.uri?
+ if @@subjectid
+ assert_rest_call_error OpenTox::NotAuthorizedError do
+ cv.summary(cv)
+ end
+ end
+ summary = cv.summary(@@subjectid)
+ assert_kind_of Hash,summary
+
+ algorithm = cv.metadata[OT.algorithm]
+ assert algorithm.uri?
+ cv_list = OpenTox::Crossvalidation.list( {:algorithm => algorithm} )
+ assert cv_list.include?(cv.uri)
+ cv_list.each do |cv_uri|
+ alg = OpenTox::Crossvalidation.find(cv_uri).metadata[OT.algorithm]
+ assert alg==algorithm,"wrong algorithm for filtered crossvalidation, should be: '"+algorithm.to_s+"', is: '"+alg.to_s+"'"
+ end
+
+ @@cvs << cv
+ end
+ end
+ end
+
+ def test_crossvalidation_report
+ #@@cv = OpenTox::Crossvalidation.find("http://local-ot/validation/crossvalidation/48", @@subjectid)
+
+ @@reports = [] unless defined?@@reports
+ @@cvs.each do |cv|
+ puts "test_crossvalidation_report"
+ assert defined?cv,"no crossvalidation defined"
+ assert_kind_of OpenTox::Crossvalidation,cv
+ #assert_rest_call_error OpenTox::NotFoundError do
+ # OpenTox::CrossvalidationReport.find_for_crossvalidation(cv.uri)
+ #end
+ if @@subjectid
+ assert_rest_call_error OpenTox::NotAuthorizedError do
+ OpenTox::CrossvalidationReport.create(cv.uri)
+ end
+ end
+ assert OpenTox::ValidationReport.find_for_validation(cv.uri,@@subjectid)==nil
+ report = OpenTox::CrossvalidationReport.create(cv.uri,@@subjectid)
+ assert report.uri.uri?
+ if @@subjectid
+ assert_rest_call_error OpenTox::NotAuthorizedError do
+ OpenTox::CrossvalidationReport.find(report.uri)
+ end
+ end
+ report = OpenTox::CrossvalidationReport.find(report.uri,@@subjectid)
+ assert report.uri.uri?
+ report2 = OpenTox::CrossvalidationReport.find_for_crossvalidation(cv.uri,@@subjectid)
+ assert_equal report.uri,report2.uri
+ report3_uri = cv.find_or_create_report(@@subjectid)
+ assert_equal report.uri,report3_uri
+ @@reports << report2
+ end
+ end
+
+ def test_qmrf_report
+ #@@cv = OpenTox::Crossvalidation.find("http://local-ot/validation/crossvalidation/13", @@subjectid)
+
+ @@qmrfReports = []
+ @@cvs.each do |cv|
+ puts "test_qmrf_report"
+ assert defined?cv,"no crossvalidation defined"
+ validations = cv.metadata[OT.validation]
+ assert_kind_of Array,validations
+ assert validations.size==cv.metadata[OT.numFolds].to_i,validations.size.to_s+"!="+cv.metadata[OT.numFolds].to_s
+ val = OpenTox::Validation.find(validations[0], @@subjectid)
+ model_uri = val.metadata[OT.model]
+
+ model = OpenTox::Model::Generic.find(model_uri, @@subjectid)
+ assert model!=nil
+
+ #assert_rest_call_error OpenTox::NotFoundError do
+ # OpenTox::QMRFReport.find_for_model(model_uri, @@subjectid)
+ #end
+
+ @@qmrfReports << OpenTox::QMRFReport.create(model_uri, @@subjectid)
+ end
+ end
+
+ ################### utils and overrides ##########################
+
+ def app
+ Sinatra::Application
end
+
+ # checks RestCallError type
+ def assert_rest_call_error( ex )
+ if ex==OpenTox::NotAuthorizedError and @@subjectid==nil
+ puts "AA disabled: skipping test for not authorized"
+ return
+ end
+ begin
+ yield
+ rescue OpenTox::RestCallError => e
+ report = e.errorCause
+ while report.errorCause
+ report = report.errorCause
+ end
+ assert_equal report.errorType,ex.to_s
+ end
+ end
+
+ # hack to have a global_setup and global_teardown
+ def teardown
+ if((@@expected_test_count-=1) == 0)
+ global_teardown
+ end
+ end
+ def setup
+ unless defined?@@expected_test_count
+ @@expected_test_count = (self.class.instance_methods.reject{|method| method[0..3] != 'test'}).length
+ global_setup
+ end
+ end
+
end
+
+
diff --git a/validation_util.rb b/validation_util.rb
new file mode 100755
index 0000000..a5206b3
--- /dev/null
+++ b/validation_util.rb
@@ -0,0 +1,46 @@
+
+require 'test/unit'
+
+ class ValidationTestUtil
+
+ @@dataset_uris = {}
+ @@prediction_features = {}
+
+ def self.upload_dataset(file, subjectid=nil, dataset_service=CONFIG[:services]["opentox-dataset"]) #, file_type="application/x-yaml")
+ raise "File not found: "+file.path.to_s unless File.exist?(file.path)
+ if @@dataset_uris[file.path.to_s]==nil
+ LOGGER.debug "uploading file: "+file.path.to_s
+ if (file.path =~ /yaml$/)
+ data = File.read(file.path)
+ #data_uri = OpenTox::RestClientWrapper.post(dataset_service,{:content_type => file_type},data).to_s.chomp
+ #@@dataset_uris[file.path.to_s] = data_uri
+ #LOGGER.debug "uploaded dataset: "+data_uri
+ d = OpenTox::Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
+ d.load_yaml(data)
+ d.save( subjectid )
+ @@dataset_uris[file.path.to_s] = d.uri
+ elsif (file.path =~ /csv$/)
+ d = OpenTox::Dataset.create_from_csv_file(file.path, subjectid)
+ raise "num features not 1 (="+d.features.keys.size.to_s+"), what to predict??" if d.features.keys.size != 1
+ @@prediction_features[file.path.to_s] = d.features.keys[0]
+ @@dataset_uris[file.path.to_s] = d.uri
+ elsif (file.path =~ /rdf$/)
+ d = OpenTox::Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
+ d.load_rdfxml_file(file, subjectid)
+ d.save(subjectid)
+ @@dataset_uris[file.path.to_s] = d.uri
+ else
+ raise "unknown file type: "+file.path.to_s
+ end
+ LOGGER.debug "uploaded dataset: "+d.uri
+ else
+ LOGGER.debug "file already uploaded: "+@@dataset_uris[file.path.to_s]
+ end
+ return @@dataset_uris[file.path.to_s]
+ end
+
+ def self.prediction_feature_for_file(file)
+ @@prediction_features[file.path.to_s]
+ end
+
+ end