From 5abed3eedb5e0bc763574ef6a24b006546bdb22f Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 26 May 2011 18:06:50 +0200 Subject: implemented loo crossvalidation --- validation/validation_application.rb | 48 ++++++++++++++++++++++++++++++++---- validation/validation_service.rb | 20 ++++++++++++--- 2 files changed, 59 insertions(+), 9 deletions(-) (limited to 'validation') diff --git a/validation/validation_application.rb b/validation/validation_application.rb index 38fa1c7..32a1c99 100755 --- a/validation/validation_application.rb +++ b/validation/validation_application.rb @@ -12,8 +12,9 @@ get '/crossvalidation/?' do uri_list = Lib::OhmUtil.find( Validation::Crossvalidation, params ).sort.collect{|v| v.crossvalidation_uri}.join("\n") + "\n" if request.env['HTTP_ACCEPT'] =~ /text\/html/ related_links = - "Single validations: "+url_for("/",:full)+"\n"+ - "Crossvalidation reports: "+url_for("/report/crossvalidation",:full) + "Single validations: "+url_for("/",:full)+"\n"+ + "Leave-one-out crossvalidations: "+url_for("/crossvalidation/loo",:full)+"\n"+ + "Crossvalidation reports: "+url_for("/report/crossvalidation",:full) description = "A list of all crossvalidations.\n"+ "Use the POST method to perform a crossvalidation." @@ -36,7 +37,8 @@ post '/crossvalidation/?' do params[:num_folds].to_i>1 cv_params = { :dataset_uri => params[:dataset_uri], - :algorithm_uri => params[:algorithm_uri] } + :algorithm_uri => params[:algorithm_uri], + :loo => "false" } [ :num_folds, :random_seed ].each{ |sym| cv_params[sym] = params[sym] if params[sym] } cv_params[:stratified] = (params[:stratified].size>0 && params[:stratified]!="false" && params[:stratified]!="0") if params[:stratified] cv = Validation::Crossvalidation.create cv_params @@ -70,11 +72,47 @@ post '/crossvalidation/cleanup/?' do end post '/crossvalidation/loo/?' do - raise "not yet implemented" + task = OpenTox::Task.create( "Perform loo-crossvalidation", url_for("/crossvalidation/loo", :full) ) do |task| #, params + LOGGER.info "creating loo-crossvalidation "+params.inspect + raise OpenTox::BadRequestError.new "dataset_uri missing" unless params[:dataset_uri] + raise OpenTox::BadRequestError.new "algorithm_uri missing" unless params[:algorithm_uri] + raise OpenTox::BadRequestError.new "prediction_feature missing" unless params[:prediction_feature] + raise OpenTox::BadRequestError.new "illegal param: num_folds, stratified, random_seed not allowed for loo-crossvalidation" if params[:num_folds] or + params[:stratifed] or params[:random_seed] + + cv_params = { :dataset_uri => params[:dataset_uri], + :algorithm_uri => params[:algorithm_uri], + :loo => "true" } + cv = Validation::Crossvalidation.create cv_params + cv.subjectid = @subjectid + cv.perform_cv( params[:prediction_feature], params[:algorithm_params], task ) + # computation of stats is cheap as dataset are already loaded into the memory + Validation::Validation.from_cv_statistics( cv.id, @subjectid ) + cv.crossvalidation_uri + end + return_task(task) end get '/crossvalidation/loo/?' do - raise OpenTox::BadRequestError.new "GET operation not supported, use POST for performing a loo-crossvalidation, see "+url_for("/crossvalidation", :full)+" for crossvalidation results" + LOGGER.info "list all crossvalidations" + params[:loo]="true" + uri_list = Lib::OhmUtil.find( Validation::Crossvalidation, params ).sort.collect{|v| v.crossvalidation_uri}.join("\n") + "\n" + if request.env['HTTP_ACCEPT'] =~ /text\/html/ + related_links = + "Single validations: "+url_for("/",:full)+"\n"+ + "All crossvalidations: "+url_for("/crossvalidation",:full)+"\n"+ + "Crossvalidation reports: "+url_for("/report/crossvalidation",:full) + description = + "A list of all leave one out crossvalidations.\n"+ + "Use the POST method to perform a crossvalidation." + post_params = [[:dataset_uri,:algorithm_uri,:prediction_feature,[:algorithm_params,""]]] + content_type "text/html" + OpenTox.text_to_html uri_list,@subjectid,related_links,description,post_params + else + content_type "text/uri-list" + uri_list + end + end get '/crossvalidation/:id' do diff --git a/validation/validation_service.rb b/validation/validation_service.rb index d135a2c..a18c6ab 100755 --- a/validation/validation_service.rb +++ b/validation/validation_service.rb @@ -306,9 +306,16 @@ module Validation # creates the cv folds def create_cv_datasets( prediction_feature, task=nil ) - self.random_seed = 1 unless self.random_seed - self.num_folds = 10 unless self.num_folds - self.stratified = false unless self.stratified + if self.loo=="true" + orig_dataset = Lib::DatasetCache.find(self.dataset_uri,self.subjectid) + self.num_folds = orig_dataset.compounds.size + self.random_seed = 0 + self.stratified = false + else + self.random_seed = 1 unless self.random_seed + self.num_folds = 10 unless self.num_folds + self.stratified = false unless self.stratified + end if copy_cv_datasets( prediction_feature ) # dataset folds of a previous crossvalidaiton could be used task.progress(100) if task @@ -350,6 +357,7 @@ module Validation :num_folds => self.num_folds, :stratified => self.stratified, :random_seed => self.random_seed, + :loo => self.loo, :finished => true} ).reject{ |cv| cv.id == self.id } cvs.each do |cv| next if AA_SERVER and !OpenTox::Authorization.authorized?(cv.crossvalidation_uri,"GET",self.subjectid) @@ -386,7 +394,11 @@ module Validation orig_dataset = Lib::DatasetCache.find(self.dataset_uri,self.subjectid) raise OpenTox::NotFoundError.new "Dataset not found: "+self.dataset_uri.to_s unless orig_dataset - shuffled_compounds = orig_dataset.compounds.shuffle( self.random_seed ) + if self.loo=="true" + shuffled_compounds = orig_dataset.compounds + else + shuffled_compounds = orig_dataset.compounds.shuffle( self.random_seed ) + end unless self.stratified split_compounds = shuffled_compounds.chunk( self.num_folds.to_i ) -- cgit v1.2.3