summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2014-10-30 16:05:55 +0100
committermguetlein <martin.guetlein@gmail.com>2014-10-30 16:05:55 +0100
commit1e2d786a13a16abb14b2200f49cbdc7b0063c600 (patch)
tree60511db3fc53f0131668d44f2574aa9c9ef40722
parentc528b931f0b3718509d162ca73982e8367a62069 (diff)
add skip-fold for faster partial loo-cv and filtering of predictions
-rwxr-xr-xreport/report_factory.rb4
-rwxr-xr-xvalidation/validation_application.rb24
-rwxr-xr-xvalidation/validation_service.rb30
3 files changed, 38 insertions, 20 deletions
diff --git a/report/report_factory.rb b/report/report_factory.rb
index ac275a3..c830a97 100755
--- a/report/report_factory.rb
+++ b/report/report_factory.rb
@@ -136,8 +136,8 @@ module Reports::ReportFactory
bad_request_error("crossvalidation-id not unique and != nil: "+
validation_set.get_values(:crossvalidation_id,false).inspect) if validation_set.unique_value(:crossvalidation_id)==nil
validation_set.load_cv_attributes
- bad_request_error("num validations ("+validation_set.size.to_s+") is not equal to num folds ("+
- validation_set.unique_value(:num_folds).to_s+")") unless validation_set.unique_value(:num_folds).to_i==validation_set.size
+ #bad_request_error("num validations ("+validation_set.size.to_s+") is not equal to num folds ("+
+ # validation_set.unique_value(:num_folds).to_s+")") unless validation_set.unique_value(:num_folds).to_i==validation_set.size
bad_request_error("num different folds is not equal to num validations") unless validation_set.num_different_values(:crossvalidation_fold)==validation_set.size
bad_request_error("validations must have unique feature type, i.e. must be either all regression, "+
"or all classification validations") unless validation_set.unique_feature_type
diff --git a/validation/validation_application.rb b/validation/validation_application.rb
index 568e1f7..ed8083c 100755
--- a/validation/validation_application.rb
+++ b/validation/validation_application.rb
@@ -13,6 +13,15 @@ class Validation::Application < OpenTox::Application
bad_request_error "stratified != true|false|super, is #{params[:stratified]}" unless
params[:stratified]=~/true|false|super/
end
+
+ def filter_validation(validation, params)
+ if (params[:min_confidence] or params[:min_num_predictions] or params[:max_num_predictions])
+ min_confidence = params[:min_confidence] ? params[:min_confidence].to_f : nil
+ min_num_predictions = params[:min_num_predictions] ? params[:min_num_predictions].to_i : nil
+ max_num_predictions = params[:max_num_predictions] ? params[:max_num_predictions].to_i : nil
+ validation.filter_predictions(min_confidence,min_num_predictions,max_num_predictions)
+ end
+ end
end
before do
@@ -105,18 +114,19 @@ class Validation::Application < OpenTox::Application
bad_request_error "algorithm_uri missing" unless params[:algorithm_uri].to_s.size>0
bad_request_error "prediction_feature missing" unless params[:prediction_feature].to_s.size>0
bad_request_error "illegal param: num_folds, stratified, random_seed not allowed for loo-crossvalidation" if params[:num_folds] or
- params[:stratified] or params[:random_seed]
+ params[:stratified] or (params[:random_seed] and !params[:skip_ratio])
task = OpenTox::Task.run( "Perform loo-crossvalidation", to("/validation/crossvalidation/loo", :full) ) do |task| #, params
cv_params = { :dataset_uri => params[:dataset_uri],
:algorithm_params => params[:algorithm_params],
:prediction_feature => params[:prediction_feature],
:algorithm_uri => params[:algorithm_uri],
- :loo => (params[:loo]=="uniq" ? "uniq" : "true") }
+ :loo => (params[:loo]=="uniq" ? "uniq" : "true"),
+ :random_seed => params[:random_seed]}
cv = Validation::Crossvalidation.create cv_params
- cv.perform_cv( OpenTox::SubTask.create(task,0,95))
+ cv.perform_cv( OpenTox::SubTask.create(task,0,95), (params[:skip_ratio] ? params[:skip_ratio].to_f : nil))
# computation of stats is cheap as dataset are already loaded into the memory
Validation::Validation.from_cv_statistics( cv.id, OpenTox::SubTask.create(task,95,100) )
- cv.clean_loo_files( !(params[:algorithm_params] && params[:algorithm_params] =~ /feature_dataset_uri/) )
+ #cv.clean_loo_files( !(params[:algorithm_params] && params[:algorithm_params] =~ /feature_dataset_uri/) )
cv.crossvalidation_uri
end
return_task(task)
@@ -189,6 +199,7 @@ class Validation::Application < OpenTox::Application
$logger.info "get crossvalidation statistics for crossvalidation with id "+params[:id].to_s
v = Validation::Validation.from_cv_statistics( params[:id] )
+ filter_validation(v,params)
case request.env['HTTP_ACCEPT'].to_s
when /text\/html/
related_links =
@@ -219,6 +230,10 @@ class Validation::Application < OpenTox::Application
content_type "text/x-yaml"
props.to_yaml
end
+
+ get '/validation/crossvalidation/:id/prediction_data' do
+ Validation::Validation.from_cv_statistics( params[:id] ).prediction_data.to_yaml
+ end
delete '/validation/crossvalidation/:id/?' do
$logger.info "delete crossvalidation with id "+params[:id].to_s
@@ -664,6 +679,7 @@ class Validation::Application < OpenTox::Application
# end
validation = Validation::Validation[params[:id]]
resource_not_found_error "Validation '#{params[:id]}' not found." unless validation
+ filter_validation(validation,params)
case request.env['HTTP_ACCEPT'].to_s
when "application/rdf+xml"
diff --git a/validation/validation_service.rb b/validation/validation_service.rb
index 4fc4018..8b22713 100755
--- a/validation/validation_service.rb
+++ b/validation/validation_service.rb
@@ -269,8 +269,8 @@ module Validation
end
def filter_predictions( min_confidence, min_num_predictions, max_num_predictions, prediction=nil )
- self.prediction_data = nil
- self.save
+ #self.prediction_data = nil
+ #self.save
bad_request_error "only supported for classification" if prediction!=nil and classification_statistics==nil
bad_request_error "illegal confidence value #{min_confidence}" unless
@@ -315,9 +315,9 @@ module Validation
class Crossvalidation
- def perform_cv ( task=nil )
+ def perform_cv ( task=nil, skip_ratio=nil )
create_cv_datasets( OpenTox::SubTask.create(task, 0, 33) )
- perform_cv_validations( OpenTox::SubTask.create(task, 33, 100) )
+ perform_cv_validations( OpenTox::SubTask.create(task, 33, 100), skip_ratio )
end
def clean_loo_files( delete_feature_datasets )
@@ -370,13 +370,12 @@ module Validation
else
self.num_folds = orig_dataset.compounds.size
end
- self.random_seed = 0
self.stratified = "false"
else
- self.random_seed = 1 unless self.random_seed
self.num_folds = 10 unless self.num_folds
self.stratified = "false" unless self.stratified
end
+ self.random_seed = 1 unless self.random_seed
if copy_cv_datasets()
# dataset folds of a previous crossvalidaiton could be used
task.progress(100) if task
@@ -386,18 +385,20 @@ module Validation
end
# executes the cross-validation (build models and validates them)
- def perform_cv_validations( task=nil )
+ def perform_cv_validations( task=nil, skip_ratio=nil )
$logger.debug "perform cv validations"
- i = 0
+ i = -1
task_step = 100 / self.num_folds.to_f;
+ skip = @tmp_validations.size.times.collect{|i| i < (skip_ratio*@tmp_validations.size) }.shuffle(self.random_seed) if skip_ratio
@tmp_validations.each do | val |
+ i += 1
+ next if skip and skip[i]
validation = Validation.create val
validation.validate_algorithm( OpenTox::SubTask.create(task, i * task_step, ( i + 1 ) * task_step) )
internal_server_error "validation '"+validation.validation_uri+"' for crossvaldation could not be finished" unless
- validation.finished
- i += 1
- $logger.debug "fold "+i.to_s+" done: "+validation.validation_uri.to_s
+ validation.finished
+ $logger.debug "fold "+(i+1).to_s+" done: "+validation.validation_uri.to_s
end
# self.attributes = { :finished => true }
@@ -412,13 +413,14 @@ module Validation
# returns true if successfull, false otherwise
def copy_cv_datasets( )
# for downwards compatibilty: search prediction_feature=nil is ok
- cvs = Crossvalidation.find( {
+ p = {
:dataset_uri => self.dataset_uri,
:num_folds => self.num_folds,
:stratified => self.stratified,
- :random_seed => self.random_seed,
:loo => self.loo,
- :finished => true} ).reject{ |cv| (cv.id == self.id ||
+ :finished => true}
+ p[:random_seed] = self.random_seed unless self.loo=="uniq"
+ cvs = Crossvalidation.find( p ).reject{ |cv| (cv.id == self.id ||
(cv.prediction_feature &&
cv.prediction_feature != self.prediction_feature)) }
cvs.each do |cv|