From c019ecedcb54e0ccbfebcf6901b3007d1e24ba1d Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 28 Feb 2012 10:36:17 +0100 Subject: remove unneeded dataset files when performing loo-cv --- test/test_examples.rb | 18 ++++++ test/test_examples_util.rb | 80 +++++++++++++++++++++++++++ validation/validation_application.rb | 1 + validation/validation_service.rb | 15 +++++ validation/validation_test.rb | 104 +++++++++++++++++++++++++++++------ 5 files changed, 201 insertions(+), 17 deletions(-) diff --git a/test/test_examples.rb b/test/test_examples.rb index f3c0b7e..2b95cf2 100755 --- a/test/test_examples.rb +++ b/test/test_examples.rb @@ -274,6 +274,22 @@ module ValidationExamples end end + ######################################################################################################## + + class HamsterLooCrossvalidation < LooCrossValidation + def initialize + @dataset_file = File.new("data/hamster_carcinogenicity.csv","r") + end + end + + class LazarHamsterLooCrossvalidation < HamsterLooCrossvalidation + def initialize + @algorithm_uri = File.join(CONFIG[:services]["opentox-algorithm"],"lazar") + @algorithm_params = "feature_generation_uri="+File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc") + super + end + end + ######################################################################################################## class LazarHamsterMiniCrossvalidation < CrossValidation @@ -828,6 +844,8 @@ module ValidationExamples "22e" => [ AmbitVsNtuaTrainingTest ], "22f" => [ AnotherAmbitJ48TrainingTest ], "22g" => [ TumTrainingTest ], + + "23a" => [ LazarHamsterLooCrossvalidation ], } diff --git a/test/test_examples_util.rb b/test/test_examples_util.rb index b48096d..82c4c48 100755 --- a/test/test_examples_util.rb +++ b/test/test_examples_util.rb @@ -1,4 +1,15 @@ +class Numeric + def to_human + return "0" if self==0 + units = %w{B KB MB GB TB} + e = (Math.log(self)/Math.log(1024)).floor + s = "%.1f" % (to_f / 1024**e) + s.sub(/\.?0*$/, units[e]) + end +end + + module ValidationExamples class Util @@ -335,6 +346,57 @@ module ValidationExamples end end + def compute_dataset_size + if @validation_uri =~ /crossvalidation/ + cv = OpenTox::Crossvalidation.find(@validation_uri,@subjectid) + count = 0 + size = 0 + target = nil + + cv.metadata[OT.validation].each do |v| + val = OpenTox::Validation.find(v) + dataset = {} + dataset[:test] = val.metadata[OT.testDataset] + dataset[:training] = val.metadata[OT.trainingDataset] + #dataset[:target] = val.metadata[OT.testTargetDataset] + raise if (target!=nil and target!=val.metadata[OT.testTargetDataset]) + target = val.metadata[OT.testTargetDataset] + + dataset[:prediction] = val.metadata[OT.predictionDataset] + m = val.metadata[OT.model] + model = OpenTox::Model::Generic.find(m) + dataset[:feature] = model.metadata[OT.featureDataset] + + puts v + val_size = 0 + dataset.each do |k,v| + s = size(v) + val_size += s + puts k.to_s+" "+v+" "+s.to_human + end + puts val_size.to_human + puts "" + size += val_size + + count += 1 + #break if (count>2) + end + + puts "total "+size.to_human+" (count: "+count.to_s+")" + puts "avg "+(size/count.to_f).to_human + + puts "" + puts "orig file: "+target+" "+size(target).to_human + end + end + + private + def size(dataset) + f = "/home/martin/opentox-ruby/www/opentox/dataset/data/#{dataset.split("/")[-1]}.json" + File.exist?(f) ? File.new(f).size : 0 + end + + public def verify_yaml raise "cannot very validation, validation_uri is null" unless @validation_uri @@ -443,4 +505,22 @@ module ValidationExamples "crossvalidation" end end + + class LooCrossValidation < ValidationExample + def params + [:algorithm_uri, :dataset_uri, :prediction_feature] + end + + def opt_params + [ :algorithm_params ] + end + + def report_type + "crossvalidation" + end + + def validation_type + "crossvalidation/loo" + end + end end diff --git a/validation/validation_application.rb b/validation/validation_application.rb index 279cd14..b07e814 100755 --- a/validation/validation_application.rb +++ b/validation/validation_application.rb @@ -97,6 +97,7 @@ post '/crossvalidation/loo/?' do cv.perform_cv( params[:prediction_feature], params[:algorithm_params], OpenTox::SubTask.create(task,0,95)) # computation of stats is cheap as dataset are already loaded into the memory Validation::Validation.from_cv_statistics( cv.id, @subjectid, OpenTox::SubTask.create(task,95,100) ) + cv.clean_loo_files cv.crossvalidation_uri end return_task(task) diff --git a/validation/validation_service.rb b/validation/validation_service.rb index 614363d..527e5ca 100755 --- a/validation/validation_service.rb +++ b/validation/validation_service.rb @@ -305,6 +305,21 @@ module Validation perform_cv_validations( algorithm_params, OpenTox::SubTask.create(task, 33, 100) ) end + def clean_loo_files + Validation.find( :crossvalidation_id => self.id, :validation_type => "crossvalidation" ).each do |v| + LOGGER.debug "loo-cleanup> delete training dataset "+v.training_dataset_uri + OpenTox::RestClientWrapper.delete v.training_dataset_uri,subjectid + begin + model = OpenTox::Model::Generic.find(v.model_uri) + if model.metadata[OT.featureDataset] + LOGGER.debug "loo-cleanup> delete feature dataset "+model.metadata[OT.featureDataset] + OpenTox::RestClientWrapper.delete model.metadata[OT.featureDataset],subjectid + end + rescue + end + end + end + # deletes a crossvalidation, all validations are deleted as well def delete_crossvalidation validations = Validation.find(:crossvalidation_id => self.id) diff --git a/validation/validation_test.rb b/validation/validation_test.rb index ae71749..70f3ca4 100755 --- a/validation/validation_test.rb +++ b/validation/validation_test.rb @@ -60,6 +60,60 @@ class ValidationTest < Test::Unit::TestCase begin $test_case = self +# dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603206?pagesize=250&page=0" +# test_dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603206?pagesize=250&page=1" +# #prediction_feature = "http://apps.ideaconsult.net:8080/ambit2/feature/528321" +# prediction_feature = "http://apps.ideaconsult.net:8080/ambit2/feature/528402" +# prediction_algorithm = "http://apps.ideaconsult.net:8080/ambit2/algorithm/RandomForest" +# #ad_algorithm = "http://apps.ideaconsult.net:8080/ambit2/algorithm/leverage" +# #ad_algorithm = "http://apps.ideaconsult.net:8080/ambit2/algorithm/distanceMahalanobis" +# #ad_algorithm = "http://apps.ideaconsult.net:8080/ambit2/algorithm/pcaRanges" +# ad_algorithm = "http://apps.ideaconsult.net:8080/ambit2/algorithm/RandomForest" +# post "/training_test_validation",{:training_dataset_uri=>dataset_uri, :test_dataset_uri=>test_dataset_uri, +# :prediction_feature => prediction_feature, :algorithm_uri=>"http://local-ot/adwrap", +# :algorithm_params=>"prediction_algorithm=#{prediction_algorithm};ad_algorithm=#{ad_algorithm}"} +# puts last_response.body +# uri = last_response.body +# rep = wait_for_task(uri) +# puts rep +# +# post "/report/method_comparison", +# {:validation_uris=>"http://local-ot/validation/433,http://local-ot/validation/434,http://local-ot/validation/435,http://local-ot/validation/436,http://local-ot/validation/437,http://local-ot/validation/438,http://local-ot/validation/439,http://local-ot/validation/440,http://local-ot/validation/441,http://local-ot/validation/442,http://local-ot/validation/crossvalidation/30,", +# :identifier=>"random,random,random,random,random,random,random,random,random,random,crossvalidated,"} + +# post "/report/method_comparison", +# {:validation_uris=>"http://local-ot/validation/389,http://local-ot/validation/390,http://local-ot/validation/391,http://local-ot/validation/392", +# :identifier=>"split1,split1,split2,split2"} + + + #post "/report/validation",{:validation_uris=>"http://local-ot/validation/171"} + #post "/report/validation",{:validation_uris=>"http://local-ot/validation/389"} + + #dataset_uri = OpenTox::Dataset.create_from_csv_file(File.new("data/EPAFHM.csv").path, nil).uri + #puts dataset_uri + +# #dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603306?feature_uris[]=http://apps.ideaconsult.net:8080/ambit2/feature/764036" +# #dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603204" +# post "/plain_training_test_split",{:dataset_uri=>dataset_uri, :stratified=>"true", :split_ratio=>0.3} +# puts last_response.body +# uri = last_response.body +# rep = wait_for_task(uri) +# puts rep + #OpenTox::RestClientWrapper.post("http://opentox.informatik.uni-freiburg.de/validation/plain_training_test_split", + # {:dataset_uri=>dataset_uri, :stratified=>"true", :split_ratio=>0.7407407407}) + + #puts OpenTox::Dataset.create_from_csv_file(File.new("data/hamster_carcinogenicity.csv").path, nil).uri + #puts OpenTox::Dataset.create_from_csv_file(File.new("data/multi_cell_call.csv").path, nil).uri + + #puts OpenTox::Dataset.find("http://opentox.informatik.uni-freiburg.de/dataset/98").compounds.size + +# +# #post "/plain_training_test_split",{:dataset_uri=>"http://apps.ideaconsult.net:8080/ambit2/dataset/603204", :stratified=>"true"} +# +# +# + + # post "/validate_datasets",{ # :test_dataset_uri=>"http://local-ot/dataset/6907", # :prediction_dataset_uri=>"http://local-ot/dataset/6909", @@ -71,20 +125,19 @@ class ValidationTest < Test::Unit::TestCase # #:regression=>"true"} # :classification=>"true"} # -# puts last_response.body -# uri = last_response.body -# rep = wait_for_task(uri) -# puts rep + #get 'crossvalidation/19/statistics' #get 'crossvalidation/189/statistics' #puts last_response.body -# run_test("1b") + + #run_test("13a") + # run_test("1a",:validation_uri=>"http://local-ot/validation/513") #get '/crossvalidation/79/predictions',nil,'HTTP_ACCEPT' => "application/x-yaml" #puts last_response.body - run_test("22f") #,:validation_uri=>"http://local-ot/validation/84" ) + # run_test("22f") #,:validation_uri=>"http://local-ot/validation/84" ) #run_test("21b") @@ -109,12 +162,6 @@ class ValidationTest < Test::Unit::TestCase # puts rep # 205 206 207 -# post '/report/algorithm_comparison',{:validation_uris=>"http://local-ot/validation/crossvalidation/149,http://local-ot/validation/crossvalidation/210", -# :identifier=>"bbrc,last"} -# uri = last_response.body -# rep = wait_for_task(uri) -# puts rep - #run_test("1a", {:validation_uri=>"http://local-ot/validation/305"}) # puts "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" #run_test("3a",{:validation_uri=>"http://local-ot/validation/crossvalidation/6"}) @@ -123,14 +170,33 @@ class ValidationTest < Test::Unit::TestCase # puts "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" #run_test("14a") #,{:validation_uri=>"http://local-ot/validation/crossvalidation/148"}) # puts "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" - - #run_test("1a") -# run_test("3d",{ -# :dataset_uri => "http://local-ot/dataset/2897", -# :prediction_feature => "http://local-ot/dataset/2897/feature/Hamster%20Carcinogenicity", + #run_test("3a") + #run_test("3d",{ + # :dataset_uri => "http://local-ot/dataset/447", + # :prediction_feature => "http://local-ot/dataset/447/feature/Hamster%20Carcinogenicity", + # :random_seed => 1 + # }) + + #run_test("23a") + run_test("23a",{:validation_uri=>"http://local-ot/validation/crossvalidation/53"}) + #run_test("23a",{:validation_uri=>"http://local-ot/validation/crossvalidation/47"}) + #23a loo {:validation_uri=>"http://local-ot/validation/crossvalidation/47"}) + #loo mit datasets auf ortona {:validation_uri=>"http://local-ot/validation/crossvalidation/46"} + +# run_test("14d",{ +# :dataset_uri => "http://local-ot/dataset/508", +# :prediction_feature => "http://local-ot/dataset/508/feature/LC50_mmol", # :random_seed => 1 # }) + + #post '/report/algorithm_comparison',{ + # :validation_uris=>"http://local-ot/validation/crossvalidation/9,http://local-ot/validation/crossvalidation/10", + # :identifier=>"bbrc,last", + # :ttest_attributes=>"num_instances,num_without_class,num_unpredicted,real_runtime,percent_without_class,percent_unpredicted"} + #uri = last_response.body + #rep = wait_for_task(uri) + #puts rep #run_test("14",{ # :dataset_uri => "http://local-ot/dataset/3877", @@ -189,6 +255,10 @@ class ValidationTest < Test::Unit::TestCase LOGGER.debug "validation done '"+ex.validation_uri.to_s+"'" end + + #ex.compute_dataset_size + #break + if !delete and ex.validation_uri if SUBJECTID puts ex.validation_uri+"?subjectid="+CGI.escape(SUBJECTID) -- cgit v1.2.3