From d3248d702e0bb1886b222688ce1c066d92d36801 Mon Sep 17 00:00:00 2001 From: davor Date: Wed, 21 Mar 2012 10:28:03 +0100 Subject: Add argument to set dataset file path. --- cv/data/utils/count_features.rb | 34 ++++++++++++++++++++----- cv/scripts/CV_ds_pctype_algo_rseed_LOAEL.rb | 15 ++++++----- cv/scripts/CV_ds_pctype_algo_rseed_LOAEL_loo.rb | 15 ++++++----- cv/scripts/wrapper_pc_cv_LOAEL.sh | 15 ++++++----- cv/scripts/wrapper_pc_cv_LOAEL_loo.sh | 15 ++++++----- 5 files changed, 61 insertions(+), 33 deletions(-) diff --git a/cv/data/utils/count_features.rb b/cv/data/utils/count_features.rb index 1e272ac..c8a5893 100644 --- a/cv/data/utils/count_features.rb +++ b/cv/data/utils/count_features.rb @@ -4,7 +4,11 @@ require 'yaml' @subjectid = nil - +if ARGV.size != 1 + puts "Args: path/to/dataset.yaml" + puts ARGV.size + exit +end def count_features(ds_uri) puts ds_uri @@ -13,9 +17,10 @@ def count_features(ds_uri) features = dataset.features.keys puts "# all features: #{features.size}" - + feature_names = [] delete_features = [] features.each{ |fn| + feature_names << fn.split("\/feature\/").last dataset.features[fn][RDF.type].each { |typestr| if typestr.include? "MissingFeature" delete_features << fn @@ -23,6 +28,9 @@ def count_features(ds_uri) end } } + @all_feature_names << feature_names.sort + @all_feature_names << "" + puts "# Missingfeatures: #{delete_features.size}" features = features - delete_features puts "# numeric features: #{features.size}" @@ -31,18 +39,30 @@ end @missing_features = [] - -ds = YAML::load_file("../datasets.yaml") +@all_feature_names = [] +path = ARGV[0] +puts path +ds = YAML::load_file("#{path}") +#ds = YAML::load_file("../datasets.yaml") ds.keys.each { |dataset| puts "----------" puts dataset + @all_feature_names << "" + @all_feature_names << "------ new dataset ------" + @all_feature_names << "-------- #{dataset} --------" ds[dataset].keys.each { |pc| - puts pc unless (pc == "dataset") || (pc == "test") || (pc == "training") - count_features(ds[dataset][pc]) unless (pc == "dataset") || (pc == "test") || (pc == "training") + if !(pc == "dataset") || (pc == "test") || (pc == "training") + puts pc + @all_feature_names << "--- new feature: #{pc} ---" + count_features(ds[dataset][pc]) + end } puts "----------" puts } puts puts "Missing features over all datasets:" -puts @missing_features.uniq!.to_yaml +puts @missing_features.uniq!.to_yaml +puts +puts "All feature names:" +puts @all_feature_names diff --git a/cv/scripts/CV_ds_pctype_algo_rseed_LOAEL.rb b/cv/scripts/CV_ds_pctype_algo_rseed_LOAEL.rb index bd86579..2442b01 100644 --- a/cv/scripts/CV_ds_pctype_algo_rseed_LOAEL.rb +++ b/cv/scripts/CV_ds_pctype_algo_rseed_LOAEL.rb @@ -2,15 +2,18 @@ # # Author: Andreas Maunz, David Vorgrimmler # # @params: Dataset_name(see dataset_nestle.yaml), pc_type(electronic,cpsa or constitutional ... or nil to disable), prediction_algorithm(local_mlr_prop or local_svm_regression ...) -if ARGV.size != 4 - puts "Args: ds_name, pc_type, algo, random_seed" +if ARGV.size != 5 + puts "Args: ds_name, pc_type, algo, random_seed, path/to/dataset.yaml" puts ARGV.size exit end -ds_file = "datasets.yaml" -pwd=`pwd` -path = "#{pwd.chop}/../data/#{ds_file}" +#ds_file = "datasets.yaml" +#pwd=`pwd` +#path = "#{pwd.chop}/../data/#{ds_file}" +path = ARGV[4] +ds_file = path.split("/").last + if File.exists?(path) puts "#{ds_file} exists" else @@ -29,7 +32,7 @@ pc_type = ARGV[1] # e.g. electronic,cpsa or nil to disable algo = ARGV[2] # e.g. local_svm_regression, local_mlr_prop r_seed = ARGV[3] # 1, 2, ..., 10 -ds = YAML::load_file("../data/datasets.yaml") +ds = YAML::load_file("#{path}") ds_uri = ds[ds_name]["dataset"] pc_ds_uri = ds[ds_name][pc_type] diff --git a/cv/scripts/CV_ds_pctype_algo_rseed_LOAEL_loo.rb b/cv/scripts/CV_ds_pctype_algo_rseed_LOAEL_loo.rb index 88adac2..34ec4f9 100644 --- a/cv/scripts/CV_ds_pctype_algo_rseed_LOAEL_loo.rb +++ b/cv/scripts/CV_ds_pctype_algo_rseed_LOAEL_loo.rb @@ -2,15 +2,18 @@ # # Author: Andreas Maunz, David Vorgrimmler # # @params: Dataset_name(see dataset_nestle.yaml), pc_type(electronic,cpsa or constitutional ... or nil to disable), prediction_algorithm(local_mlr_prop or local_svm_regression ...) -if ARGV.size != 4 - puts "Args: ds_name, pc_type, algo, random_seed" +if ARGV.size != 5 + puts "Args: ds_name, pc_type, algo, random_seed, path/to/dataset.yaml" puts ARGV.size exit end -ds_file = "datasets.yaml" -pwd=`pwd` -path = "#{pwd.chop}/../data/#{ds_file}" +#ds_file = "datasets.yaml" +#pwd=`pwd` +#path = "#{pwd.chop}/../data/#{ds_file}" +path = ARGV[4] +ds_file = path.split("/").last + if File.exists?(path) puts "#{ds_file} exists" else @@ -29,7 +32,7 @@ pc_type = ARGV[1] # e.g. electronic,cpsa or nil to disable algo = ARGV[2] # e.g. local_svm_regression, local_mlr_prop r_seed = ARGV[3] # 1, 2, ..., 10 -ds = YAML::load_file("../data/datasets.yaml") +ds = YAML::load_file("#{path}") ds_uri = ds[ds_name]["dataset"] pc_ds_uri = ds[ds_name][pc_type] diff --git a/cv/scripts/wrapper_pc_cv_LOAEL.sh b/cv/scripts/wrapper_pc_cv_LOAEL.sh index 4d74596..e4f5685 100755 --- a/cv/scripts/wrapper_pc_cv_LOAEL.sh +++ b/cv/scripts/wrapper_pc_cv_LOAEL.sh @@ -3,14 +3,15 @@ # Reads factors_config, ../data/datasets.yaml and performs cv's # Andreas Maunz, David Vorgrimmler, 2012 -if [ $# -lt 1 ]; then - echo "Usage: $0 factors" +if [ $# -lt 2 ]; then + echo "Usage: $0 factors path/to/dataset.yaml" exit fi -PWD=`pwd` -echo $PWD -if [ ! -f $PWD/../data/datasets.yaml ] +#PWD=`pwd` +#echo $PWD +#if [ ! -f $PWD/../data/datasets.yaml ] +if [ ! -f $2 ] then echo "datasets.yaml does not exist." exit @@ -36,8 +37,8 @@ cat $FACTORS | while read factor; do do factor="$factor $r_seed" echo "${THIS_DATE}: $factor" >> $LOGFILE>&1 - echo "ruby $CV $factor" >> $LOGFILE 2>&1 - ruby $CV $factor >> $LOGFILE 2>&1 + echo "ruby $CV $factor $2" >> $LOGFILE 2>&1 + ruby $CV $factor $2>> $LOGFILE 2>&1 echo >> $LOGFILE 2>&1 done fi diff --git a/cv/scripts/wrapper_pc_cv_LOAEL_loo.sh b/cv/scripts/wrapper_pc_cv_LOAEL_loo.sh index be3e6df..f75f385 100755 --- a/cv/scripts/wrapper_pc_cv_LOAEL_loo.sh +++ b/cv/scripts/wrapper_pc_cv_LOAEL_loo.sh @@ -3,14 +3,15 @@ # Reads factors_config, ../data/datasets.yaml and performs cv's # Andreas Maunz, David Vorgrimmler, 2012 -if [ $# -lt 1 ]; then - echo "Usage: $0 factors" +if [ $# -lt 2 ]; then + echo "Usage: $0 factors path/to/dataset.yaml" exit fi -PWD=`pwd` -echo $PWD -if [ ! -f $PWD/../data/datasets.yaml ] +#PWD=`pwd` +#echo $PWD +#if [ ! -f $PWD/../data/datasets.yaml ] +if [ ! -f $2 ] then echo "datasets.yaml does not exist." exit @@ -36,8 +37,8 @@ cat $FACTORS | while read factor; do do factor="$factor $r_seed" echo "${THIS_DATE}: $factor" >> $LOGFILE>&1 - echo "ruby $CV $factor" >> $LOGFILE 2>&1 - ruby $CV $factor >> $LOGFILE 2>&1 + echo "ruby $CV $factor $2" >> $LOGFILE 2>&1 + ruby $CV $factor $2>> $LOGFILE 2>&1 echo >> $LOGFILE 2>&1 done fi -- cgit v1.2.3