summaryrefslogtreecommitdiff
path: root/validation/validation_service.rb
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2012-10-12 10:27:15 +0200
committermguetlein <martin.guetlein@gmail.com>2012-10-12 10:27:15 +0200
commitba9d3e628e424a84d8c892c4fdf49c2258a95352 (patch)
tree4dbbf4269e6e3dc1dd94024921255a506a5fa921 /validation/validation_service.rb
parentcd8800cf377b5b8c07dc25f3629e503f01bc9112 (diff)
add contra splitting, provide access to dataset-splits, minor stuff
Diffstat (limited to 'validation/validation_service.rb')
-rwxr-xr-xvalidation/validation_service.rb10
1 files changed, 8 insertions, 2 deletions
diff --git a/validation/validation_service.rb b/validation/validation_service.rb
index e099e25..f57b3a5 100755
--- a/validation/validation_service.rb
+++ b/validation/validation_service.rb
@@ -653,15 +653,21 @@ module Validation
meta = { DC.creator => $url_provider.url_for('/training_test_split',:full) }
case stratified
- when /^(true|super|super4|super5|anti)$/
+ when /^(true|super|super4|super5|contra)$/
raise "store split clusters not available for true stratified splits" if store_split_clusters and stratified=="true"
if stratified=="true"
raise OpenTox::BadRequestError.new "prediction feature required for stratified splits" unless prediction_feature
LOGGER.warn "split features are ignored for stratified splits (use super instead)" if features
features = [prediction_feature]
+ elsif stratified=="contra"
+ raise OpenTox::BadRequestError.new "prediction feature required for anti-stratified splits" unless prediction_feature
+ LOGGER.debug "prediction feature is removed for anti-stratified splits"
+ features = orig_dataset.features.keys-[prediction_feature]
+ raise unless features.size==orig_dataset.features.size-1
else
LOGGER.warn "prediction feature is ignored for super- or anti-stratified splits" if prediction_feature
end
+ LOGGER.debug "Using "+features.size.to_s+"/"+orig_dataset.features.size.to_s+" features for splitting" if features
r_util = OpenTox::RUtil.new
train, test = r_util.stratified_split( orig_dataset, meta, missing_values, split_ratio,
@subjectid, random_seed, features, stratified, store_split_clusters )
@@ -705,7 +711,7 @@ module Validation
end
LOGGER.debug "split done, training dataset: '"+result[:training_dataset_uri].to_s+"', test dataset: '"+result[:test_dataset_uri].to_s+"'"
else
- raise OpenTox::BadRequestError.new "stratified != false|true|super, is #{stratified}"
+ raise OpenTox::BadRequestError.new "stratified != false|true|super|contra, is #{stratified}"
end
result
end