diff options
author | mguetlein <martin.guetlein@gmail.com> | 2011-05-11 13:39:46 +0200 |
---|---|---|
committer | mguetlein <martin.guetlein@gmail.com> | 2011-05-11 13:39:46 +0200 |
commit | 2f3bdab44bc49a65aa9843516c86337f26d4201d (patch) | |
tree | 3d2b150f0dc7dd64d69ba82cdd7c992fccc2e92c /validation | |
parent | 9436a6e0c6b8cfc0ebbc742fd6568c233a75006d (diff) |
fix stratified cv for datasets with multiple compounds
Diffstat (limited to 'validation')
-rwxr-xr-x | validation/validation_service.rb | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/validation/validation_service.rb b/validation/validation_service.rb index 247cdb3..dcfb8d7 100755 --- a/validation/validation_service.rb +++ b/validation/validation_service.rb @@ -366,7 +366,8 @@ module Validation accept_values.each do |value| class_compounds[value] = [] shuffled_compounds.each do |c| - class_compounds[value] << c if orig_dataset.data_entries[c][prediction_feature].to_s==value + #PENDING accept values are type string, data_entries may be boolean + class_compounds[value] << c if orig_dataset.data_entries[c][prediction_feature].collect{|v| v.to_s}.include?(value) end end LOGGER.debug "stratified cv: different class values: "+class_compounds.keys.join(", ") @@ -425,7 +426,8 @@ module Validation end raise "internal error, num test compounds not correct" unless (shuffled_compounds.size/self.num_folds.to_i - test_compounds.size).abs <= 1 - raise "internal error, num train compounds not correct" unless shuffled_compounds.size - test_compounds.size == train_compounds.size + raise "internal error, num train compounds not correct, should be '"+(shuffled_compounds.size-test_compounds.size).to_s+ + "', is '"+train_compounds.size.to_s+"'" unless shuffled_compounds.size - test_compounds.size == train_compounds.size LOGGER.debug "training set: "+datasetname+"_train, compounds: "+train_compounds.size.to_s #train_dataset_uri = orig_dataset.create_new_dataset( train_compounds, orig_dataset.features, datasetname + '_train', source ) |