summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2012-02-28 10:36:17 +0100
committermguetlein <martin.guetlein@gmail.com>2012-02-28 10:36:17 +0100
commitc019ecedcb54e0ccbfebcf6901b3007d1e24ba1d (patch)
tree713fa0134ae15d0176e80026b362680a778735e5
parent63320057e2a2b2121c5c405c31e2e7b709fa9e44 (diff)
remove unneeded dataset files when performing loo-cv
-rwxr-xr-xtest/test_examples.rb18
-rwxr-xr-xtest/test_examples_util.rb80
-rwxr-xr-xvalidation/validation_application.rb1
-rwxr-xr-xvalidation/validation_service.rb15
-rwxr-xr-xvalidation/validation_test.rb104
5 files changed, 201 insertions, 17 deletions
diff --git a/test/test_examples.rb b/test/test_examples.rb
index f3c0b7e..2b95cf2 100755
--- a/test/test_examples.rb
+++ b/test/test_examples.rb
@@ -274,6 +274,22 @@ module ValidationExamples
end
end
+ ########################################################################################################
+
+ class HamsterLooCrossvalidation < LooCrossValidation
+ def initialize
+ @dataset_file = File.new("data/hamster_carcinogenicity.csv","r")
+ end
+ end
+
+ class LazarHamsterLooCrossvalidation < HamsterLooCrossvalidation
+ def initialize
+ @algorithm_uri = File.join(CONFIG[:services]["opentox-algorithm"],"lazar")
+ @algorithm_params = "feature_generation_uri="+File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc")
+ super
+ end
+ end
+
########################################################################################################
class LazarHamsterMiniCrossvalidation < CrossValidation
@@ -828,6 +844,8 @@ module ValidationExamples
"22e" => [ AmbitVsNtuaTrainingTest ],
"22f" => [ AnotherAmbitJ48TrainingTest ],
"22g" => [ TumTrainingTest ],
+
+ "23a" => [ LazarHamsterLooCrossvalidation ],
}
diff --git a/test/test_examples_util.rb b/test/test_examples_util.rb
index b48096d..82c4c48 100755
--- a/test/test_examples_util.rb
+++ b/test/test_examples_util.rb
@@ -1,4 +1,15 @@
+class Numeric
+ def to_human
+ return "0" if self==0
+ units = %w{B KB MB GB TB}
+ e = (Math.log(self)/Math.log(1024)).floor
+ s = "%.1f" % (to_f / 1024**e)
+ s.sub(/\.?0*$/, units[e])
+ end
+end
+
+
module ValidationExamples
class Util
@@ -335,6 +346,57 @@ module ValidationExamples
end
end
+ def compute_dataset_size
+ if @validation_uri =~ /crossvalidation/
+ cv = OpenTox::Crossvalidation.find(@validation_uri,@subjectid)
+ count = 0
+ size = 0
+ target = nil
+
+ cv.metadata[OT.validation].each do |v|
+ val = OpenTox::Validation.find(v)
+ dataset = {}
+ dataset[:test] = val.metadata[OT.testDataset]
+ dataset[:training] = val.metadata[OT.trainingDataset]
+ #dataset[:target] = val.metadata[OT.testTargetDataset]
+ raise if (target!=nil and target!=val.metadata[OT.testTargetDataset])
+ target = val.metadata[OT.testTargetDataset]
+
+ dataset[:prediction] = val.metadata[OT.predictionDataset]
+ m = val.metadata[OT.model]
+ model = OpenTox::Model::Generic.find(m)
+ dataset[:feature] = model.metadata[OT.featureDataset]
+
+ puts v
+ val_size = 0
+ dataset.each do |k,v|
+ s = size(v)
+ val_size += s
+ puts k.to_s+" "+v+" "+s.to_human
+ end
+ puts val_size.to_human
+ puts ""
+ size += val_size
+
+ count += 1
+ #break if (count>2)
+ end
+
+ puts "total "+size.to_human+" (count: "+count.to_s+")"
+ puts "avg "+(size/count.to_f).to_human
+
+ puts ""
+ puts "orig file: "+target+" "+size(target).to_human
+ end
+ end
+
+ private
+ def size(dataset)
+ f = "/home/martin/opentox-ruby/www/opentox/dataset/data/#{dataset.split("/")[-1]}.json"
+ File.exist?(f) ? File.new(f).size : 0
+ end
+
+ public
def verify_yaml
raise "cannot very validation, validation_uri is null" unless @validation_uri
@@ -443,4 +505,22 @@ module ValidationExamples
"crossvalidation"
end
end
+
+ class LooCrossValidation < ValidationExample
+ def params
+ [:algorithm_uri, :dataset_uri, :prediction_feature]
+ end
+
+ def opt_params
+ [ :algorithm_params ]
+ end
+
+ def report_type
+ "crossvalidation"
+ end
+
+ def validation_type
+ "crossvalidation/loo"
+ end
+ end
end
diff --git a/validation/validation_application.rb b/validation/validation_application.rb
index 279cd14..b07e814 100755
--- a/validation/validation_application.rb
+++ b/validation/validation_application.rb
@@ -97,6 +97,7 @@ post '/crossvalidation/loo/?' do
cv.perform_cv( params[:prediction_feature], params[:algorithm_params], OpenTox::SubTask.create(task,0,95))
# computation of stats is cheap as dataset are already loaded into the memory
Validation::Validation.from_cv_statistics( cv.id, @subjectid, OpenTox::SubTask.create(task,95,100) )
+ cv.clean_loo_files
cv.crossvalidation_uri
end
return_task(task)
diff --git a/validation/validation_service.rb b/validation/validation_service.rb
index 614363d..527e5ca 100755
--- a/validation/validation_service.rb
+++ b/validation/validation_service.rb
@@ -305,6 +305,21 @@ module Validation
perform_cv_validations( algorithm_params, OpenTox::SubTask.create(task, 33, 100) )
end
+ def clean_loo_files
+ Validation.find( :crossvalidation_id => self.id, :validation_type => "crossvalidation" ).each do |v|
+ LOGGER.debug "loo-cleanup> delete training dataset "+v.training_dataset_uri
+ OpenTox::RestClientWrapper.delete v.training_dataset_uri,subjectid
+ begin
+ model = OpenTox::Model::Generic.find(v.model_uri)
+ if model.metadata[OT.featureDataset]
+ LOGGER.debug "loo-cleanup> delete feature dataset "+model.metadata[OT.featureDataset]
+ OpenTox::RestClientWrapper.delete model.metadata[OT.featureDataset],subjectid
+ end
+ rescue
+ end
+ end
+ end
+
# deletes a crossvalidation, all validations are deleted as well
def delete_crossvalidation
validations = Validation.find(:crossvalidation_id => self.id)
diff --git a/validation/validation_test.rb b/validation/validation_test.rb
index ae71749..70f3ca4 100755
--- a/validation/validation_test.rb
+++ b/validation/validation_test.rb
@@ -60,6 +60,60 @@ class ValidationTest < Test::Unit::TestCase
begin
$test_case = self
+# dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603206?pagesize=250&page=0"
+# test_dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603206?pagesize=250&page=1"
+# #prediction_feature = "http://apps.ideaconsult.net:8080/ambit2/feature/528321"
+# prediction_feature = "http://apps.ideaconsult.net:8080/ambit2/feature/528402"
+# prediction_algorithm = "http://apps.ideaconsult.net:8080/ambit2/algorithm/RandomForest"
+# #ad_algorithm = "http://apps.ideaconsult.net:8080/ambit2/algorithm/leverage"
+# #ad_algorithm = "http://apps.ideaconsult.net:8080/ambit2/algorithm/distanceMahalanobis"
+# #ad_algorithm = "http://apps.ideaconsult.net:8080/ambit2/algorithm/pcaRanges"
+# ad_algorithm = "http://apps.ideaconsult.net:8080/ambit2/algorithm/RandomForest"
+# post "/training_test_validation",{:training_dataset_uri=>dataset_uri, :test_dataset_uri=>test_dataset_uri,
+# :prediction_feature => prediction_feature, :algorithm_uri=>"http://local-ot/adwrap",
+# :algorithm_params=>"prediction_algorithm=#{prediction_algorithm};ad_algorithm=#{ad_algorithm}"}
+# puts last_response.body
+# uri = last_response.body
+# rep = wait_for_task(uri)
+# puts rep
+#
+# post "/report/method_comparison",
+# {:validation_uris=>"http://local-ot/validation/433,http://local-ot/validation/434,http://local-ot/validation/435,http://local-ot/validation/436,http://local-ot/validation/437,http://local-ot/validation/438,http://local-ot/validation/439,http://local-ot/validation/440,http://local-ot/validation/441,http://local-ot/validation/442,http://local-ot/validation/crossvalidation/30,",
+# :identifier=>"random,random,random,random,random,random,random,random,random,random,crossvalidated,"}
+
+# post "/report/method_comparison",
+# {:validation_uris=>"http://local-ot/validation/389,http://local-ot/validation/390,http://local-ot/validation/391,http://local-ot/validation/392",
+# :identifier=>"split1,split1,split2,split2"}
+
+
+ #post "/report/validation",{:validation_uris=>"http://local-ot/validation/171"}
+ #post "/report/validation",{:validation_uris=>"http://local-ot/validation/389"}
+
+ #dataset_uri = OpenTox::Dataset.create_from_csv_file(File.new("data/EPAFHM.csv").path, nil).uri
+ #puts dataset_uri
+
+# #dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603306?feature_uris[]=http://apps.ideaconsult.net:8080/ambit2/feature/764036"
+# #dataset_uri = "http://apps.ideaconsult.net:8080/ambit2/dataset/603204"
+# post "/plain_training_test_split",{:dataset_uri=>dataset_uri, :stratified=>"true", :split_ratio=>0.3}
+# puts last_response.body
+# uri = last_response.body
+# rep = wait_for_task(uri)
+# puts rep
+ #OpenTox::RestClientWrapper.post("http://opentox.informatik.uni-freiburg.de/validation/plain_training_test_split",
+ # {:dataset_uri=>dataset_uri, :stratified=>"true", :split_ratio=>0.7407407407})
+
+ #puts OpenTox::Dataset.create_from_csv_file(File.new("data/hamster_carcinogenicity.csv").path, nil).uri
+ #puts OpenTox::Dataset.create_from_csv_file(File.new("data/multi_cell_call.csv").path, nil).uri
+
+ #puts OpenTox::Dataset.find("http://opentox.informatik.uni-freiburg.de/dataset/98").compounds.size
+
+#
+# #post "/plain_training_test_split",{:dataset_uri=>"http://apps.ideaconsult.net:8080/ambit2/dataset/603204", :stratified=>"true"}
+#
+#
+#
+
+
# post "/validate_datasets",{
# :test_dataset_uri=>"http://local-ot/dataset/6907",
# :prediction_dataset_uri=>"http://local-ot/dataset/6909",
@@ -71,20 +125,19 @@ class ValidationTest < Test::Unit::TestCase
# #:regression=>"true"}
# :classification=>"true"}
#
-# puts last_response.body
-# uri = last_response.body
-# rep = wait_for_task(uri)
-# puts rep
+
#get 'crossvalidation/19/statistics'
#get 'crossvalidation/189/statistics'
#puts last_response.body
-# run_test("1b")
+
+ #run_test("13a")
+ # run_test("1a",:validation_uri=>"http://local-ot/validation/513")
#get '/crossvalidation/79/predictions',nil,'HTTP_ACCEPT' => "application/x-yaml"
#puts last_response.body
- run_test("22f") #,:validation_uri=>"http://local-ot/validation/84" )
+ # run_test("22f") #,:validation_uri=>"http://local-ot/validation/84" )
#run_test("21b")
@@ -109,12 +162,6 @@ class ValidationTest < Test::Unit::TestCase
# puts rep
# 205 206 207
-# post '/report/algorithm_comparison',{:validation_uris=>"http://local-ot/validation/crossvalidation/149,http://local-ot/validation/crossvalidation/210",
-# :identifier=>"bbrc,last"}
-# uri = last_response.body
-# rep = wait_for_task(uri)
-# puts rep
-
#run_test("1a", {:validation_uri=>"http://local-ot/validation/305"})
# puts "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
#run_test("3a",{:validation_uri=>"http://local-ot/validation/crossvalidation/6"})
@@ -123,14 +170,33 @@ class ValidationTest < Test::Unit::TestCase
# puts "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
#run_test("14a") #,{:validation_uri=>"http://local-ot/validation/crossvalidation/148"})
# puts "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
-
- #run_test("1a")
-# run_test("3d",{
-# :dataset_uri => "http://local-ot/dataset/2897",
-# :prediction_feature => "http://local-ot/dataset/2897/feature/Hamster%20Carcinogenicity",
+ #run_test("3a")
+ #run_test("3d",{
+ # :dataset_uri => "http://local-ot/dataset/447",
+ # :prediction_feature => "http://local-ot/dataset/447/feature/Hamster%20Carcinogenicity",
+ # :random_seed => 1
+ # })
+
+ #run_test("23a")
+ run_test("23a",{:validation_uri=>"http://local-ot/validation/crossvalidation/53"})
+ #run_test("23a",{:validation_uri=>"http://local-ot/validation/crossvalidation/47"})
+ #23a loo {:validation_uri=>"http://local-ot/validation/crossvalidation/47"})
+ #loo mit datasets auf ortona {:validation_uri=>"http://local-ot/validation/crossvalidation/46"}
+
+# run_test("14d",{
+# :dataset_uri => "http://local-ot/dataset/508",
+# :prediction_feature => "http://local-ot/dataset/508/feature/LC50_mmol",
# :random_seed => 1
# })
+
+ #post '/report/algorithm_comparison',{
+ # :validation_uris=>"http://local-ot/validation/crossvalidation/9,http://local-ot/validation/crossvalidation/10",
+ # :identifier=>"bbrc,last",
+ # :ttest_attributes=>"num_instances,num_without_class,num_unpredicted,real_runtime,percent_without_class,percent_unpredicted"}
+ #uri = last_response.body
+ #rep = wait_for_task(uri)
+ #puts rep
#run_test("14",{
# :dataset_uri => "http://local-ot/dataset/3877",
@@ -189,6 +255,10 @@ class ValidationTest < Test::Unit::TestCase
LOGGER.debug "validation done '"+ex.validation_uri.to_s+"'"
end
+
+ #ex.compute_dataset_size
+ #break
+
if !delete and ex.validation_uri
if SUBJECTID
puts ex.validation_uri+"?subjectid="+CGI.escape(SUBJECTID)