summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2014-10-06 22:21:05 +0200
committermguetlein <martin.guetlein@gmail.com>2014-10-06 22:21:05 +0200
commit1781a188d44faf2c0d9c9cf4ab82be966c1ea263 (patch)
tree50e74714669621603a27de5a1620830c609632f9
parent3d2869bef38438c994a548d3b4fa5bab45e61527 (diff)
add regression test to validation test
-rw-r--r--test/validation-long.rb136
-rwxr-xr-xtest/validation_util.rb1
2 files changed, 75 insertions, 62 deletions
diff --git a/test/validation-long.rb b/test/validation-long.rb
index 1a458a2..48a886e 100644
--- a/test/validation-long.rb
+++ b/test/validation-long.rb
@@ -26,21 +26,30 @@ DATA = []
# :feat => "http://apps.ideaconsult.net:8080/ambit2/feature/533748",
# :info => "http://apps.ideaconsult.net:8080/ambit2/dataset/435293?page=0&pagesize=300" }
-HAMSTER_CV_FEATURE_TYPES = ["bbrc"]
FILES = {
File.new(File.join(test_path,"data","hamster_carcinogenicity.csv")) => :split_validation,
- #File.new("data/EPAFHM.medi.csv") => :split_validation
- }
+ File.new(File.join(test_path,"data","EPAFHM.medi.csv")) => :split_validation,
+}
unless defined?($short_tests)
- #HAMSTER_CV_FEATURE_TYPES = ["bbrc", "last"]
FILES.merge!({
File.new(File.join(test_path,"data","hamster_carcinogenicity.csv")) => :crossvalidation,
- # File.new("data/EPAFHM.csv") => :crossvalidation,
- # File.new("data/hamster_carcinogenicity.csv") => :bootstrap_validation
+ # File.new("data/EPAFHM.csv") => :crossvalidation,
+ # File.new("data/hamster_carcinogenicity.csv") => :bootstrap_validation
})
end
+FEAT_GEN = {}
+FILES.each do |f,t|
+ if f.path=~/hamster/
+ FEAT_GEN[f] = [ File.join($algorithm[:uri],"fminer/bbrc") ] #FEAT_GEN[f] << File.join($algorithm[:uri],"fminer/last")
+ elsif f.path=~/EPAFHM/
+ FEAT_GEN[f] = [ File.join($algorithm[:uri],"descriptor","physchem") ]
+ else
+ raise "please define feature generation uri for dataset: #{f.path}"
+ end
+end
+
class ValidationTest < MiniTest::Test
i_suck_and_my_tests_are_order_dependent!
@@ -49,10 +58,17 @@ class ValidationTest < MiniTest::Test
puts "login and upload datasets"
OpenTox::RestClientWrapper.subjectid ? puts("logged in: "+OpenTox::RestClientWrapper.subjectid.to_s) : puts("AA disabled")
FILES.each do |file,type|
- DATA << { :type => type,
- :data => ValidationTestUtil.upload_dataset(file),
- :feat => ValidationTestUtil.prediction_feature_for_file(file),
- :info => file.path, :delete => true}
+ data = { :type => type,
+ :data => ValidationTestUtil.upload_dataset(file),
+ :feat => ValidationTestUtil.prediction_feature_for_file(file),
+ :split_ratio => (file.path=~/EPAFHM/ ? 0.98 : 0.9),
+ :info => file.path, :delete => true}
+ FEAT_GEN[file].each do |feat_gen|
+ data[:alg_params] = "feature_generation_uri="+feat_gen
++ data[:alg_params] << ";backbone=false;min_chisq_significance=0.0" if feat_gen=~/fminer/ and data[:info] =~ /mini/
+ data[:alg_params] << ";descriptors="+[ "Openbabel.atoms", "Openbabel.bonds", "Openbabel.dbonds", "Openbabel.HBA1", "Openbabel.HBA2", "Openbabel.HBD", "Openbabel.MP", "Openbabel.MR", "Openbabel.MW", "Openbabel.nF", "Openbabel.sbonds", "Openbabel.tbonds", "Openbabel.TPSA"].join(",") if feat_gen=~/physchem/
+ DATA << data
+ end
end
end
@@ -88,7 +104,7 @@ class ValidationTest < MiniTest::Test
p = {
:dataset_uri => data[:data],
:algorithm_uri => File.join($algorithm[:uri],"lazar"),
- :algorithm_params => "feature_generation_uri="+File.join($algorithm[:uri],"fminer/bbrc"),
+ :algorithm_params => data[:alg_params],
:prediction_feature => data[:feat],
:random_seed => 2}
t = OpenTox::SubTask.new(nil,0,1)
@@ -130,9 +146,9 @@ class ValidationTest < MiniTest::Test
p = {
:dataset_uri => data[:data],
:algorithm_uri => File.join($algorithm[:uri],"lazar"),
- :algorithm_params => "feature_generation_uri="+File.join($algorithm[:uri],"fminer/bbrc"),
+ :algorithm_params => data[:alg_params],
:prediction_feature => data[:feat],
- :split_ratio => 0.9,
+ :split_ratio => data[:split_ratio],
:random_seed => 2}
t = OpenTox::SubTask.new(nil,0,1)
def t.progress(pct)
@@ -161,7 +177,7 @@ class ValidationTest < MiniTest::Test
train_compounds = OpenTox::Dataset.find(v.metadata[RDF::OT.trainingDataset.to_s]).compounds
test_compounds = OpenTox::Dataset.find(v.metadata[RDF::OT.testDataset.to_s]).compounds
orig_compounds = OpenTox::Dataset.find(data[:data]).compounds
- assert_equal((orig_compounds.size*0.9).round,train_compounds.size)
+ assert_equal((orig_compounds.size*data[:split_ratio]).round,train_compounds.size)
assert_equal(orig_compounds.size,(train_compounds+test_compounds).size)
assert_equal(orig_compounds.uniq.size,(train_compounds+test_compounds).uniq.size)
@@ -186,7 +202,7 @@ class ValidationTest < MiniTest::Test
:training_dataset_uri => data[:train_data],
:test_dataset_uri => data[:test_data],
:algorithm_uri => File.join($algorithm[:uri],"lazar"),
- :algorithm_params => "feature_generation_uri="+File.join($algorithm[:uri],"fminer/bbrc"),
+ :algorithm_params => data[:alg_params],
:prediction_feature => data[:feat]}
t = OpenTox::SubTask.new(nil,0,1)
def t.progress(pct)
@@ -273,56 +289,52 @@ class ValidationTest < MiniTest::Test
@@cv_identifiers = []
DATA.each do |data|
if data[:type]==:crossvalidation
- HAMSTER_CV_FEATURE_TYPES.each do |fminer|
- next unless (fminer==HAMSTER_CV_FEATURE_TYPES[0] or data[:info].to_s =~ /hamster_carcinogenicity.csv/)
- puts "test_crossvalidation "+data[:info].to_s+" "+fminer
- p = {
- :dataset_uri => data[:data],
- :algorithm_uri => File.join($algorithm[:uri],"lazar"),
- :algorithm_params => "feature_generation_uri="+File.join($algorithm[:uri],"fminer/"+fminer)+
- (data[:info] =~ /mini/ ? ";backbone=false;min_chisq_significance=0.0" : ""),
- :prediction_feature => data[:feat],
- :num_folds => 10 }
- #:num_folds => 2 }
- t = OpenTox::SubTask.new(nil,0,1)
- def t.progress(pct)
- if !defined?@last_msg or @last_msg+10<Time.new
- puts "waiting for crossvalidation: "+pct.to_s
- @last_msg=Time.new
- end
- end
- def t.waiting_for(task_uri); end
- cv = OpenTox::Crossvalidation.create(p, t)
- assert cv.uri.uri?
- if $aa[:uri]
- assert_unauthorized do
- OpenTox::Crossvalidation.find(cv.uri)
- end
+ puts "test_crossvalidation "+data[:info].to_s+" "+data[:alg_params]
+ p = {
+ :dataset_uri => data[:data],
+ :algorithm_uri => File.join($algorithm[:uri],"lazar"),
+ :algorithm_params => data[:alg_params],
+ :prediction_feature => data[:feat],
+ :num_folds => 10 }
+ #:num_folds => 2 }
+ t = OpenTox::SubTask.new(nil,0,1)
+ def t.progress(pct)
+ if !defined?@last_msg or @last_msg+10<Time.new
+ puts "waiting for crossvalidation: "+pct.to_s
+ @last_msg=Time.new
end
- cv = OpenTox::Crossvalidation.find(cv.uri)
- assert_valid_date cv
- assert cv.uri.uri?
- stats_val = cv.statistics
- assert_kind_of OpenTox::Validation,stats_val
- assert_prob_correct(stats_val)
-
- algorithm = cv.metadata[RDF::OT.algorithm.to_s]
- assert algorithm.uri?
- cv_list = OpenTox::Crossvalidation.list( {:algorithm => algorithm} )
- assert cv_list.include?(cv.uri)
- cv_list.each do |cv_uri|
- #begin catch not authorized somehow
- alg = OpenTox::Crossvalidation.find(cv_uri).metadata[RDF::OT.algorithm.to_s]
- assert alg==algorithm,"wrong algorithm for filtered crossvalidation, should be: '"+algorithm.to_s+"', is: '"+alg.to_s+"'"
- #rescue
- #end
+ end
+ def t.waiting_for(task_uri); end
+ cv = OpenTox::Crossvalidation.create(p, t)
+ assert cv.uri.uri?
+ if $aa[:uri]
+ assert_unauthorized do
+ OpenTox::Crossvalidation.find(cv.uri)
end
- puts cv.uri unless defined?(DELETE) and DELETE
-
- @@cvs << cv
- @@cv_datasets << data
- @@cv_identifiers << fminer
end
+ cv = OpenTox::Crossvalidation.find(cv.uri)
+ assert_valid_date cv
+ assert cv.uri.uri?
+ stats_val = cv.statistics
+ assert_kind_of OpenTox::Validation,stats_val
+ assert_prob_correct(stats_val)
+
+ algorithm = cv.metadata[RDF::OT.algorithm.to_s]
+ assert algorithm.uri?
+ cv_list = OpenTox::Crossvalidation.list( {:algorithm => algorithm} )
+ assert cv_list.include?(cv.uri)
+ cv_list.each do |cv_uri|
+ #begin catch not authorized somehow
+ alg = OpenTox::Crossvalidation.find(cv_uri).metadata[RDF::OT.algorithm.to_s]
+ assert alg==algorithm,"wrong algorithm for filtered crossvalidation, should be: '"+algorithm.to_s+"', is: '"+alg.to_s+"'"
+ #rescue
+ #end
+ end
+ puts cv.uri unless defined?(DELETE) and DELETE
+
+ @@cvs << cv
+ @@cv_datasets << data
+ @@cv_identifiers << data[:alg_params]
end
end
end
diff --git a/test/validation_util.rb b/test/validation_util.rb
index d8373f4..0e82aec 100755
--- a/test/validation_util.rb
+++ b/test/validation_util.rb
@@ -35,6 +35,7 @@ class ValidationTestUtil
end
def self.prediction_feature_for_file(file)
+ raise "no prediction feature available for #{file.path}" unless @@prediction_features[file.path.to_s]
@@prediction_features[file.path.to_s]
end