summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordavor <vorgrimmlerdavid@gmx.de>2012-03-21 10:28:03 +0100
committerdavor <vorgrimmlerdavid@gmx.de>2012-03-21 10:28:03 +0100
commitd3248d702e0bb1886b222688ce1c066d92d36801 (patch)
tree2bb5f01f08cb76aa43917ab0881c3eb821330ffc
parentf87c7b3819345aa0880e518d3a83c76eb7198293 (diff)
Add argument to set dataset file path.
-rw-r--r--cv/data/utils/count_features.rb34
-rw-r--r--cv/scripts/CV_ds_pctype_algo_rseed_LOAEL.rb15
-rw-r--r--cv/scripts/CV_ds_pctype_algo_rseed_LOAEL_loo.rb15
-rwxr-xr-xcv/scripts/wrapper_pc_cv_LOAEL.sh15
-rwxr-xr-xcv/scripts/wrapper_pc_cv_LOAEL_loo.sh15
5 files changed, 61 insertions, 33 deletions
diff --git a/cv/data/utils/count_features.rb b/cv/data/utils/count_features.rb
index 1e272ac..c8a5893 100644
--- a/cv/data/utils/count_features.rb
+++ b/cv/data/utils/count_features.rb
@@ -4,7 +4,11 @@ require 'yaml'
@subjectid = nil
-
+if ARGV.size != 1
+ puts "Args: path/to/dataset.yaml"
+ puts ARGV.size
+ exit
+end
def count_features(ds_uri)
puts ds_uri
@@ -13,9 +17,10 @@ def count_features(ds_uri)
features = dataset.features.keys
puts "# all features: #{features.size}"
-
+ feature_names = []
delete_features = []
features.each{ |fn|
+ feature_names << fn.split("\/feature\/").last
dataset.features[fn][RDF.type].each { |typestr|
if typestr.include? "MissingFeature"
delete_features << fn
@@ -23,6 +28,9 @@ def count_features(ds_uri)
end
}
}
+ @all_feature_names << feature_names.sort
+ @all_feature_names << ""
+
puts "# Missingfeatures: #{delete_features.size}"
features = features - delete_features
puts "# numeric features: #{features.size}"
@@ -31,18 +39,30 @@ end
@missing_features = []
-
-ds = YAML::load_file("../datasets.yaml")
+@all_feature_names = []
+path = ARGV[0]
+puts path
+ds = YAML::load_file("#{path}")
+#ds = YAML::load_file("../datasets.yaml")
ds.keys.each { |dataset|
puts "----------"
puts dataset
+ @all_feature_names << ""
+ @all_feature_names << "------ new dataset ------"
+ @all_feature_names << "-------- #{dataset} --------"
ds[dataset].keys.each { |pc|
- puts pc unless (pc == "dataset") || (pc == "test") || (pc == "training")
- count_features(ds[dataset][pc]) unless (pc == "dataset") || (pc == "test") || (pc == "training")
+ if !(pc == "dataset") || (pc == "test") || (pc == "training")
+ puts pc
+ @all_feature_names << "--- new feature: #{pc} ---"
+ count_features(ds[dataset][pc])
+ end
}
puts "----------"
puts
}
puts
puts "Missing features over all datasets:"
-puts @missing_features.uniq!.to_yaml
+puts @missing_features.uniq!.to_yaml
+puts
+puts "All feature names:"
+puts @all_feature_names
diff --git a/cv/scripts/CV_ds_pctype_algo_rseed_LOAEL.rb b/cv/scripts/CV_ds_pctype_algo_rseed_LOAEL.rb
index bd86579..2442b01 100644
--- a/cv/scripts/CV_ds_pctype_algo_rseed_LOAEL.rb
+++ b/cv/scripts/CV_ds_pctype_algo_rseed_LOAEL.rb
@@ -2,15 +2,18 @@
# # Author: Andreas Maunz, David Vorgrimmler
# # @params: Dataset_name(see dataset_nestle.yaml), pc_type(electronic,cpsa or constitutional ... or nil to disable), prediction_algorithm(local_mlr_prop or local_svm_regression ...)
-if ARGV.size != 4
- puts "Args: ds_name, pc_type, algo, random_seed"
+if ARGV.size != 5
+ puts "Args: ds_name, pc_type, algo, random_seed, path/to/dataset.yaml"
puts ARGV.size
exit
end
-ds_file = "datasets.yaml"
-pwd=`pwd`
-path = "#{pwd.chop}/../data/#{ds_file}"
+#ds_file = "datasets.yaml"
+#pwd=`pwd`
+#path = "#{pwd.chop}/../data/#{ds_file}"
+path = ARGV[4]
+ds_file = path.split("/").last
+
if File.exists?(path)
puts "#{ds_file} exists"
else
@@ -29,7 +32,7 @@ pc_type = ARGV[1] # e.g. electronic,cpsa or nil to disable
algo = ARGV[2] # e.g. local_svm_regression, local_mlr_prop
r_seed = ARGV[3] # 1, 2, ..., 10
-ds = YAML::load_file("../data/datasets.yaml")
+ds = YAML::load_file("#{path}")
ds_uri = ds[ds_name]["dataset"]
pc_ds_uri = ds[ds_name][pc_type]
diff --git a/cv/scripts/CV_ds_pctype_algo_rseed_LOAEL_loo.rb b/cv/scripts/CV_ds_pctype_algo_rseed_LOAEL_loo.rb
index 88adac2..34ec4f9 100644
--- a/cv/scripts/CV_ds_pctype_algo_rseed_LOAEL_loo.rb
+++ b/cv/scripts/CV_ds_pctype_algo_rseed_LOAEL_loo.rb
@@ -2,15 +2,18 @@
# # Author: Andreas Maunz, David Vorgrimmler
# # @params: Dataset_name(see dataset_nestle.yaml), pc_type(electronic,cpsa or constitutional ... or nil to disable), prediction_algorithm(local_mlr_prop or local_svm_regression ...)
-if ARGV.size != 4
- puts "Args: ds_name, pc_type, algo, random_seed"
+if ARGV.size != 5
+ puts "Args: ds_name, pc_type, algo, random_seed, path/to/dataset.yaml"
puts ARGV.size
exit
end
-ds_file = "datasets.yaml"
-pwd=`pwd`
-path = "#{pwd.chop}/../data/#{ds_file}"
+#ds_file = "datasets.yaml"
+#pwd=`pwd`
+#path = "#{pwd.chop}/../data/#{ds_file}"
+path = ARGV[4]
+ds_file = path.split("/").last
+
if File.exists?(path)
puts "#{ds_file} exists"
else
@@ -29,7 +32,7 @@ pc_type = ARGV[1] # e.g. electronic,cpsa or nil to disable
algo = ARGV[2] # e.g. local_svm_regression, local_mlr_prop
r_seed = ARGV[3] # 1, 2, ..., 10
-ds = YAML::load_file("../data/datasets.yaml")
+ds = YAML::load_file("#{path}")
ds_uri = ds[ds_name]["dataset"]
pc_ds_uri = ds[ds_name][pc_type]
diff --git a/cv/scripts/wrapper_pc_cv_LOAEL.sh b/cv/scripts/wrapper_pc_cv_LOAEL.sh
index 4d74596..e4f5685 100755
--- a/cv/scripts/wrapper_pc_cv_LOAEL.sh
+++ b/cv/scripts/wrapper_pc_cv_LOAEL.sh
@@ -3,14 +3,15 @@
# Reads factors_config, ../data/datasets.yaml and performs cv's
# Andreas Maunz, David Vorgrimmler, 2012
-if [ $# -lt 1 ]; then
- echo "Usage: $0 factors"
+if [ $# -lt 2 ]; then
+ echo "Usage: $0 factors path/to/dataset.yaml"
exit
fi
-PWD=`pwd`
-echo $PWD
-if [ ! -f $PWD/../data/datasets.yaml ]
+#PWD=`pwd`
+#echo $PWD
+#if [ ! -f $PWD/../data/datasets.yaml ]
+if [ ! -f $2 ]
then
echo "datasets.yaml does not exist."
exit
@@ -36,8 +37,8 @@ cat $FACTORS | while read factor; do
do
factor="$factor $r_seed"
echo "${THIS_DATE}: $factor" >> $LOGFILE>&1
- echo "ruby $CV $factor" >> $LOGFILE 2>&1
- ruby $CV $factor >> $LOGFILE 2>&1
+ echo "ruby $CV $factor $2" >> $LOGFILE 2>&1
+ ruby $CV $factor $2>> $LOGFILE 2>&1
echo >> $LOGFILE 2>&1
done
fi
diff --git a/cv/scripts/wrapper_pc_cv_LOAEL_loo.sh b/cv/scripts/wrapper_pc_cv_LOAEL_loo.sh
index be3e6df..f75f385 100755
--- a/cv/scripts/wrapper_pc_cv_LOAEL_loo.sh
+++ b/cv/scripts/wrapper_pc_cv_LOAEL_loo.sh
@@ -3,14 +3,15 @@
# Reads factors_config, ../data/datasets.yaml and performs cv's
# Andreas Maunz, David Vorgrimmler, 2012
-if [ $# -lt 1 ]; then
- echo "Usage: $0 factors"
+if [ $# -lt 2 ]; then
+ echo "Usage: $0 factors path/to/dataset.yaml"
exit
fi
-PWD=`pwd`
-echo $PWD
-if [ ! -f $PWD/../data/datasets.yaml ]
+#PWD=`pwd`
+#echo $PWD
+#if [ ! -f $PWD/../data/datasets.yaml ]
+if [ ! -f $2 ]
then
echo "datasets.yaml does not exist."
exit
@@ -36,8 +37,8 @@ cat $FACTORS | while read factor; do
do
factor="$factor $r_seed"
echo "${THIS_DATE}: $factor" >> $LOGFILE>&1
- echo "ruby $CV $factor" >> $LOGFILE 2>&1
- ruby $CV $factor >> $LOGFILE 2>&1
+ echo "ruby $CV $factor $2" >> $LOGFILE 2>&1
+ ruby $CV $factor $2>> $LOGFILE 2>&1
echo >> $LOGFILE 2>&1
done
fi