diff options
Diffstat (limited to 'validation/validation_service.rb')
-rwxr-xr-x | validation/validation_service.rb | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/validation/validation_service.rb b/validation/validation_service.rb index 1b29630..a2e97d0 100755 --- a/validation/validation_service.rb +++ b/validation/validation_service.rb @@ -500,7 +500,7 @@ module Validation meta, self.subjectid ).uri test_dataset_uris << test_dataset_uri end - when /^(true|super|super4)$/ + when /^(true|super|super4|super5)$/ raise "DO NOT USED SUPER-STRATIFICATION FOR VAL-EXPERIMENTS AND CV, IF SO SOLVE _MISSING_VAULE_NA_ PROBLEM" if stratified=~/super/ if stratified=="true" features = [ self.prediction_feature ] @@ -627,7 +627,7 @@ module Validation # splits a dataset into test and training dataset # returns map with training_dataset_uri and test_dataset_uri def self.train_test_dataset_split( orig_dataset_uri, prediction_feature, subjectid, stratified="false", - split_ratio=nil, random_seed=nil, missing_values=nil, task=nil, features=nil ) + split_ratio=nil, random_seed=nil, missing_values=nil, task=nil, features=nil, store_split_clusters=false ) split_ratio=0.67 unless split_ratio split_ratio = split_ratio.to_f @@ -651,7 +651,8 @@ module Validation meta = { DC.creator => $url_provider.url_for('/training_test_split',:full) } case stratified - when /^(true|super|super4|anti)$/ + when /^(true|super|super4|super5|anti)$/ + raise "store split clusters not available for true stratified splits" if store_split_clusters and stratified=="true" if stratified=="true" raise OpenTox::BadRequestError.new "prediction feature required for stratified splits" unless prediction_feature LOGGER.warn "split features are ignored for stratified splits (use super instead)" if features @@ -660,10 +661,12 @@ module Validation LOGGER.warn "prediction feature is ignored for super- or anti-stratified splits" if prediction_feature end r_util = OpenTox::RUtil.new - train, test = r_util.stratified_split( orig_dataset, meta, missing_values, split_ratio, @subjectid, random_seed, features, stratified ) + train, test = r_util.stratified_split( orig_dataset, meta, missing_values, split_ratio, + @subjectid, random_seed, features, stratified, store_split_clusters ) r_util.quit_r result = {:training_dataset_uri => train.uri, :test_dataset_uri => test.uri} when "false" + raise "store split clusters not available for random splits" if store_split_clusters LOGGER.warn "split features are ignored for non-stratified splits (use super instead)" if features compounds = orig_dataset.compounds raise OpenTox::BadRequestError.new "Cannot split datset, num compounds in dataset < 2 ("+compounds.size.to_s+")" if compounds.size<2 |