summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordavor <vorgrimmlerdavid@gmx.de>2012-03-17 21:05:28 +0100
committerdavor <vorgrimmlerdavid@gmx.de>2012-03-17 21:05:28 +0100
commit76c11057ecdf0908a48b750c098293af47f298f0 (patch)
tree24000f38483b7b7a281c9e60ae4eb148e7de259e
parent1624dee63eb6a64467f417f4469897cb611036c7 (diff)
Modified scripts.
-rw-r--r--cv/data/utils/count_features.rb34
-rw-r--r--cv/data/utils/create_selected_feature_ds.rb35
2 files changed, 50 insertions, 19 deletions
diff --git a/cv/data/utils/count_features.rb b/cv/data/utils/count_features.rb
index 1e272ac..c8a5893 100644
--- a/cv/data/utils/count_features.rb
+++ b/cv/data/utils/count_features.rb
@@ -4,7 +4,11 @@ require 'yaml'
@subjectid = nil
-
+if ARGV.size != 1
+ puts "Args: path/to/dataset.yaml"
+ puts ARGV.size
+ exit
+end
def count_features(ds_uri)
puts ds_uri
@@ -13,9 +17,10 @@ def count_features(ds_uri)
features = dataset.features.keys
puts "# all features: #{features.size}"
-
+ feature_names = []
delete_features = []
features.each{ |fn|
+ feature_names << fn.split("\/feature\/").last
dataset.features[fn][RDF.type].each { |typestr|
if typestr.include? "MissingFeature"
delete_features << fn
@@ -23,6 +28,9 @@ def count_features(ds_uri)
end
}
}
+ @all_feature_names << feature_names.sort
+ @all_feature_names << ""
+
puts "# Missingfeatures: #{delete_features.size}"
features = features - delete_features
puts "# numeric features: #{features.size}"
@@ -31,18 +39,30 @@ end
@missing_features = []
-
-ds = YAML::load_file("../datasets.yaml")
+@all_feature_names = []
+path = ARGV[0]
+puts path
+ds = YAML::load_file("#{path}")
+#ds = YAML::load_file("../datasets.yaml")
ds.keys.each { |dataset|
puts "----------"
puts dataset
+ @all_feature_names << ""
+ @all_feature_names << "------ new dataset ------"
+ @all_feature_names << "-------- #{dataset} --------"
ds[dataset].keys.each { |pc|
- puts pc unless (pc == "dataset") || (pc == "test") || (pc == "training")
- count_features(ds[dataset][pc]) unless (pc == "dataset") || (pc == "test") || (pc == "training")
+ if !(pc == "dataset") || (pc == "test") || (pc == "training")
+ puts pc
+ @all_feature_names << "--- new feature: #{pc} ---"
+ count_features(ds[dataset][pc])
+ end
}
puts "----------"
puts
}
puts
puts "Missing features over all datasets:"
-puts @missing_features.uniq!.to_yaml
+puts @missing_features.uniq!.to_yaml
+puts
+puts "All feature names:"
+puts @all_feature_names
diff --git a/cv/data/utils/create_selected_feature_ds.rb b/cv/data/utils/create_selected_feature_ds.rb
index c30a23e..c7c3445 100644
--- a/cv/data/utils/create_selected_feature_ds.rb
+++ b/cv/data/utils/create_selected_feature_ds.rb
@@ -4,14 +4,14 @@ require 'yaml'
@subjectid = nil
-
+@dataset = []
def create_f_ds(t_ds_uri, f_ds_uri, del)
regression_training_dataset = OpenTox::Dataset.find(t_ds_uri, @subjectid)
prediction_feature = regression_training_dataset.features.keys.first
regression_feature_dataset = OpenTox::Dataset.find(f_ds_uri, @subjectid)
-
+
params = {}
params[:dataset_uri] = regression_training_dataset.uri
params[:prediction_feature_uri] = prediction_feature
@@ -20,12 +20,12 @@ def create_f_ds(t_ds_uri, f_ds_uri, del)
puts params.to_yaml
feature_selection_algo_uri = File.join(CONFIG[:services]["opentox-algorithm"],"feature_selection/rfe")
puts feature_selection_algo_uri
+ puts "--- Feature dataset is: ---"
result = OpenTox::RestClientWrapper.post( feature_selection_algo_uri, params)
- puts "--- Feature dataset is: ---"
puts result
-
puts
+ result
end
@@ -34,15 +34,26 @@ end
ds = YAML::load_file("../datasets.yaml")
ds.keys.each { |dataset|
puts "----------------- next dataset -----------------"
+ @dataset << "\"#{dataset}\": {"
ds[dataset].keys.each { |pc|
- puts pc unless (pc == "dataset") || (pc == "test") || (pc == "training")
- #[false, true].each { |del_missing|
- [false].each { |del_missing| #false is default
- begin
- create_f_ds(ds[dataset]["dataset"], ds[dataset][pc], del_missing) unless (pc == "dataset") || (pc == "test") || (pc == "training")
- rescue
- end
- }
+ if !((pc == "dataset") || (pc == "test") || (pc == "training") || (pc == "hybrid"))
+ puts pc
+ #[false, true].each { |del_missing|
+ [false].each { |del_missing| #false is default
+ begin
+ result = create_f_ds(ds[dataset]["dataset"], ds[dataset][pc], del_missing)
+ @dataset << " \"#{pc}\": \"#{result}\","
+ rescue
+ end
+ }
+ else
+ @dataset << " \"#{pc}\": \"#{ds[dataset][pc]}\","
+
+ end
puts "-----------------" unless pc == "dataset"
}
+ @dataset << "},"
}
+
+puts @dataset
+