summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2011-05-26 18:06:50 +0200
committermguetlein <martin.guetlein@gmail.com>2011-05-26 18:06:50 +0200
commit5abed3eedb5e0bc763574ef6a24b006546bdb22f (patch)
tree82a508835acdb71a34c8652accbb9f703b98ad44
parentc70021d0ce1d7bb68a609f24f9888fc2285fe774 (diff)
implemented loo crossvalidation
-rw-r--r--lib/feature_util.rb3
-rwxr-xr-xlib/validation_db.rb2
-rwxr-xr-xvalidation/validation_application.rb48
-rwxr-xr-xvalidation/validation_service.rb20
4 files changed, 63 insertions, 10 deletions
diff --git a/lib/feature_util.rb b/lib/feature_util.rb
index 286c05e..e412416 100644
--- a/lib/feature_util.rb
+++ b/lib/feature_util.rb
@@ -52,7 +52,8 @@ module Lib
end
end
end
- raise "could not estimate predicted variable" unless predicted_variable
+ raise "could not estimate predicted variable, model: '"+model.uri.to_s+"', prediction_dataset: '"+
+ prediction_dataset_uri.to_s+"'" unless predicted_variable
end
{:predicted_variable => predicted_variable, :predicted_confidence => predicted_confidence}
diff --git a/lib/validation_db.rb b/lib/validation_db.rb
index e2595c5..c38b82e 100755
--- a/lib/validation_db.rb
+++ b/lib/validation_db.rb
@@ -136,6 +136,7 @@ module Validation
attribute :random_seed
attribute :finished
attribute :stratified
+ attribute :loo
attr_accessor :subjectid
@@ -145,6 +146,7 @@ module Validation
index :random_seed
index :stratified
index :finished
+ index :loo
def self.create(params={})
params[:date] = Time.new
diff --git a/validation/validation_application.rb b/validation/validation_application.rb
index 38fa1c7..32a1c99 100755
--- a/validation/validation_application.rb
+++ b/validation/validation_application.rb
@@ -12,8 +12,9 @@ get '/crossvalidation/?' do
uri_list = Lib::OhmUtil.find( Validation::Crossvalidation, params ).sort.collect{|v| v.crossvalidation_uri}.join("\n") + "\n"
if request.env['HTTP_ACCEPT'] =~ /text\/html/
related_links =
- "Single validations: "+url_for("/",:full)+"\n"+
- "Crossvalidation reports: "+url_for("/report/crossvalidation",:full)
+ "Single validations: "+url_for("/",:full)+"\n"+
+ "Leave-one-out crossvalidations: "+url_for("/crossvalidation/loo",:full)+"\n"+
+ "Crossvalidation reports: "+url_for("/report/crossvalidation",:full)
description =
"A list of all crossvalidations.\n"+
"Use the POST method to perform a crossvalidation."
@@ -36,7 +37,8 @@ post '/crossvalidation/?' do
params[:num_folds].to_i>1
cv_params = { :dataset_uri => params[:dataset_uri],
- :algorithm_uri => params[:algorithm_uri] }
+ :algorithm_uri => params[:algorithm_uri],
+ :loo => "false" }
[ :num_folds, :random_seed ].each{ |sym| cv_params[sym] = params[sym] if params[sym] }
cv_params[:stratified] = (params[:stratified].size>0 && params[:stratified]!="false" && params[:stratified]!="0") if params[:stratified]
cv = Validation::Crossvalidation.create cv_params
@@ -70,11 +72,47 @@ post '/crossvalidation/cleanup/?' do
end
post '/crossvalidation/loo/?' do
- raise "not yet implemented"
+ task = OpenTox::Task.create( "Perform loo-crossvalidation", url_for("/crossvalidation/loo", :full) ) do |task| #, params
+ LOGGER.info "creating loo-crossvalidation "+params.inspect
+ raise OpenTox::BadRequestError.new "dataset_uri missing" unless params[:dataset_uri]
+ raise OpenTox::BadRequestError.new "algorithm_uri missing" unless params[:algorithm_uri]
+ raise OpenTox::BadRequestError.new "prediction_feature missing" unless params[:prediction_feature]
+ raise OpenTox::BadRequestError.new "illegal param: num_folds, stratified, random_seed not allowed for loo-crossvalidation" if params[:num_folds] or
+ params[:stratifed] or params[:random_seed]
+
+ cv_params = { :dataset_uri => params[:dataset_uri],
+ :algorithm_uri => params[:algorithm_uri],
+ :loo => "true" }
+ cv = Validation::Crossvalidation.create cv_params
+ cv.subjectid = @subjectid
+ cv.perform_cv( params[:prediction_feature], params[:algorithm_params], task )
+ # computation of stats is cheap as dataset are already loaded into the memory
+ Validation::Validation.from_cv_statistics( cv.id, @subjectid )
+ cv.crossvalidation_uri
+ end
+ return_task(task)
end
get '/crossvalidation/loo/?' do
- raise OpenTox::BadRequestError.new "GET operation not supported, use POST for performing a loo-crossvalidation, see "+url_for("/crossvalidation", :full)+" for crossvalidation results"
+ LOGGER.info "list all crossvalidations"
+ params[:loo]="true"
+ uri_list = Lib::OhmUtil.find( Validation::Crossvalidation, params ).sort.collect{|v| v.crossvalidation_uri}.join("\n") + "\n"
+ if request.env['HTTP_ACCEPT'] =~ /text\/html/
+ related_links =
+ "Single validations: "+url_for("/",:full)+"\n"+
+ "All crossvalidations: "+url_for("/crossvalidation",:full)+"\n"+
+ "Crossvalidation reports: "+url_for("/report/crossvalidation",:full)
+ description =
+ "A list of all leave one out crossvalidations.\n"+
+ "Use the POST method to perform a crossvalidation."
+ post_params = [[:dataset_uri,:algorithm_uri,:prediction_feature,[:algorithm_params,""]]]
+ content_type "text/html"
+ OpenTox.text_to_html uri_list,@subjectid,related_links,description,post_params
+ else
+ content_type "text/uri-list"
+ uri_list
+ end
+
end
get '/crossvalidation/:id' do
diff --git a/validation/validation_service.rb b/validation/validation_service.rb
index d135a2c..a18c6ab 100755
--- a/validation/validation_service.rb
+++ b/validation/validation_service.rb
@@ -306,9 +306,16 @@ module Validation
# creates the cv folds
def create_cv_datasets( prediction_feature, task=nil )
- self.random_seed = 1 unless self.random_seed
- self.num_folds = 10 unless self.num_folds
- self.stratified = false unless self.stratified
+ if self.loo=="true"
+ orig_dataset = Lib::DatasetCache.find(self.dataset_uri,self.subjectid)
+ self.num_folds = orig_dataset.compounds.size
+ self.random_seed = 0
+ self.stratified = false
+ else
+ self.random_seed = 1 unless self.random_seed
+ self.num_folds = 10 unless self.num_folds
+ self.stratified = false unless self.stratified
+ end
if copy_cv_datasets( prediction_feature )
# dataset folds of a previous crossvalidaiton could be used
task.progress(100) if task
@@ -350,6 +357,7 @@ module Validation
:num_folds => self.num_folds,
:stratified => self.stratified,
:random_seed => self.random_seed,
+ :loo => self.loo,
:finished => true} ).reject{ |cv| cv.id == self.id }
cvs.each do |cv|
next if AA_SERVER and !OpenTox::Authorization.authorized?(cv.crossvalidation_uri,"GET",self.subjectid)
@@ -386,7 +394,11 @@ module Validation
orig_dataset = Lib::DatasetCache.find(self.dataset_uri,self.subjectid)
raise OpenTox::NotFoundError.new "Dataset not found: "+self.dataset_uri.to_s unless orig_dataset
- shuffled_compounds = orig_dataset.compounds.shuffle( self.random_seed )
+ if self.loo=="true"
+ shuffled_compounds = orig_dataset.compounds
+ else
+ shuffled_compounds = orig_dataset.compounds.shuffle( self.random_seed )
+ end
unless self.stratified
split_compounds = shuffled_compounds.chunk( self.num_folds.to_i )