diff options
Diffstat (limited to 'cv/data/utils/count_features.rb')
-rw-r--r-- | cv/data/utils/count_features.rb | 34 |
1 files changed, 27 insertions, 7 deletions
diff --git a/cv/data/utils/count_features.rb b/cv/data/utils/count_features.rb index 1e272ac..c8a5893 100644 --- a/cv/data/utils/count_features.rb +++ b/cv/data/utils/count_features.rb @@ -4,7 +4,11 @@ require 'yaml' @subjectid = nil - +if ARGV.size != 1 + puts "Args: path/to/dataset.yaml" + puts ARGV.size + exit +end def count_features(ds_uri) puts ds_uri @@ -13,9 +17,10 @@ def count_features(ds_uri) features = dataset.features.keys puts "# all features: #{features.size}" - + feature_names = [] delete_features = [] features.each{ |fn| + feature_names << fn.split("\/feature\/").last dataset.features[fn][RDF.type].each { |typestr| if typestr.include? "MissingFeature" delete_features << fn @@ -23,6 +28,9 @@ def count_features(ds_uri) end } } + @all_feature_names << feature_names.sort + @all_feature_names << "" + puts "# Missingfeatures: #{delete_features.size}" features = features - delete_features puts "# numeric features: #{features.size}" @@ -31,18 +39,30 @@ end @missing_features = [] - -ds = YAML::load_file("../datasets.yaml") +@all_feature_names = [] +path = ARGV[0] +puts path +ds = YAML::load_file("#{path}") +#ds = YAML::load_file("../datasets.yaml") ds.keys.each { |dataset| puts "----------" puts dataset + @all_feature_names << "" + @all_feature_names << "------ new dataset ------" + @all_feature_names << "-------- #{dataset} --------" ds[dataset].keys.each { |pc| - puts pc unless (pc == "dataset") || (pc == "test") || (pc == "training") - count_features(ds[dataset][pc]) unless (pc == "dataset") || (pc == "test") || (pc == "training") + if !(pc == "dataset") || (pc == "test") || (pc == "training") + puts pc + @all_feature_names << "--- new feature: #{pc} ---" + count_features(ds[dataset][pc]) + end } puts "----------" puts } puts puts "Missing features over all datasets:" -puts @missing_features.uniq!.to_yaml +puts @missing_features.uniq!.to_yaml +puts +puts "All feature names:" +puts @all_feature_names |