diff options
author | mguetlein <martin.guetlein@gmail.com> | 2012-10-12 10:27:15 +0200 |
---|---|---|
committer | mguetlein <martin.guetlein@gmail.com> | 2012-10-12 10:27:15 +0200 |
commit | ba9d3e628e424a84d8c892c4fdf49c2258a95352 (patch) | |
tree | 4dbbf4269e6e3dc1dd94024921255a506a5fa921 /validation/validation_service.rb | |
parent | cd8800cf377b5b8c07dc25f3629e503f01bc9112 (diff) |
add contra splitting, provide access to dataset-splits, minor stuff
Diffstat (limited to 'validation/validation_service.rb')
-rwxr-xr-x | validation/validation_service.rb | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/validation/validation_service.rb b/validation/validation_service.rb index e099e25..f57b3a5 100755 --- a/validation/validation_service.rb +++ b/validation/validation_service.rb @@ -653,15 +653,21 @@ module Validation meta = { DC.creator => $url_provider.url_for('/training_test_split',:full) } case stratified - when /^(true|super|super4|super5|anti)$/ + when /^(true|super|super4|super5|contra)$/ raise "store split clusters not available for true stratified splits" if store_split_clusters and stratified=="true" if stratified=="true" raise OpenTox::BadRequestError.new "prediction feature required for stratified splits" unless prediction_feature LOGGER.warn "split features are ignored for stratified splits (use super instead)" if features features = [prediction_feature] + elsif stratified=="contra" + raise OpenTox::BadRequestError.new "prediction feature required for anti-stratified splits" unless prediction_feature + LOGGER.debug "prediction feature is removed for anti-stratified splits" + features = orig_dataset.features.keys-[prediction_feature] + raise unless features.size==orig_dataset.features.size-1 else LOGGER.warn "prediction feature is ignored for super- or anti-stratified splits" if prediction_feature end + LOGGER.debug "Using "+features.size.to_s+"/"+orig_dataset.features.size.to_s+" features for splitting" if features r_util = OpenTox::RUtil.new train, test = r_util.stratified_split( orig_dataset, meta, missing_values, split_ratio, @subjectid, random_seed, features, stratified, store_split_clusters ) @@ -705,7 +711,7 @@ module Validation end LOGGER.debug "split done, training dataset: '"+result[:training_dataset_uri].to_s+"', test dataset: '"+result[:test_dataset_uri].to_s+"'" else - raise OpenTox::BadRequestError.new "stratified != false|true|super, is #{stratified}" + raise OpenTox::BadRequestError.new "stratified != false|true|super|contra, is #{stratified}" end result end |