diff options
author | davor <vorgrimmlerdavid@gmx.de> | 2012-03-17 21:05:28 +0100 |
---|---|---|
committer | davor <vorgrimmlerdavid@gmx.de> | 2012-03-17 21:05:28 +0100 |
commit | 76c11057ecdf0908a48b750c098293af47f298f0 (patch) | |
tree | 24000f38483b7b7a281c9e60ae4eb148e7de259e | |
parent | 1624dee63eb6a64467f417f4469897cb611036c7 (diff) |
Modified scripts.
-rw-r--r-- | cv/data/utils/count_features.rb | 34 | ||||
-rw-r--r-- | cv/data/utils/create_selected_feature_ds.rb | 35 |
2 files changed, 50 insertions, 19 deletions
diff --git a/cv/data/utils/count_features.rb b/cv/data/utils/count_features.rb index 1e272ac..c8a5893 100644 --- a/cv/data/utils/count_features.rb +++ b/cv/data/utils/count_features.rb @@ -4,7 +4,11 @@ require 'yaml' @subjectid = nil - +if ARGV.size != 1 + puts "Args: path/to/dataset.yaml" + puts ARGV.size + exit +end def count_features(ds_uri) puts ds_uri @@ -13,9 +17,10 @@ def count_features(ds_uri) features = dataset.features.keys puts "# all features: #{features.size}" - + feature_names = [] delete_features = [] features.each{ |fn| + feature_names << fn.split("\/feature\/").last dataset.features[fn][RDF.type].each { |typestr| if typestr.include? "MissingFeature" delete_features << fn @@ -23,6 +28,9 @@ def count_features(ds_uri) end } } + @all_feature_names << feature_names.sort + @all_feature_names << "" + puts "# Missingfeatures: #{delete_features.size}" features = features - delete_features puts "# numeric features: #{features.size}" @@ -31,18 +39,30 @@ end @missing_features = [] - -ds = YAML::load_file("../datasets.yaml") +@all_feature_names = [] +path = ARGV[0] +puts path +ds = YAML::load_file("#{path}") +#ds = YAML::load_file("../datasets.yaml") ds.keys.each { |dataset| puts "----------" puts dataset + @all_feature_names << "" + @all_feature_names << "------ new dataset ------" + @all_feature_names << "-------- #{dataset} --------" ds[dataset].keys.each { |pc| - puts pc unless (pc == "dataset") || (pc == "test") || (pc == "training") - count_features(ds[dataset][pc]) unless (pc == "dataset") || (pc == "test") || (pc == "training") + if !(pc == "dataset") || (pc == "test") || (pc == "training") + puts pc + @all_feature_names << "--- new feature: #{pc} ---" + count_features(ds[dataset][pc]) + end } puts "----------" puts } puts puts "Missing features over all datasets:" -puts @missing_features.uniq!.to_yaml +puts @missing_features.uniq!.to_yaml +puts +puts "All feature names:" +puts @all_feature_names diff --git a/cv/data/utils/create_selected_feature_ds.rb b/cv/data/utils/create_selected_feature_ds.rb index c30a23e..c7c3445 100644 --- a/cv/data/utils/create_selected_feature_ds.rb +++ b/cv/data/utils/create_selected_feature_ds.rb @@ -4,14 +4,14 @@ require 'yaml' @subjectid = nil - +@dataset = [] def create_f_ds(t_ds_uri, f_ds_uri, del) regression_training_dataset = OpenTox::Dataset.find(t_ds_uri, @subjectid) prediction_feature = regression_training_dataset.features.keys.first regression_feature_dataset = OpenTox::Dataset.find(f_ds_uri, @subjectid) - + params = {} params[:dataset_uri] = regression_training_dataset.uri params[:prediction_feature_uri] = prediction_feature @@ -20,12 +20,12 @@ def create_f_ds(t_ds_uri, f_ds_uri, del) puts params.to_yaml feature_selection_algo_uri = File.join(CONFIG[:services]["opentox-algorithm"],"feature_selection/rfe") puts feature_selection_algo_uri + puts "--- Feature dataset is: ---" result = OpenTox::RestClientWrapper.post( feature_selection_algo_uri, params) - puts "--- Feature dataset is: ---" puts result - puts + result end @@ -34,15 +34,26 @@ end ds = YAML::load_file("../datasets.yaml") ds.keys.each { |dataset| puts "----------------- next dataset -----------------" + @dataset << "\"#{dataset}\": {" ds[dataset].keys.each { |pc| - puts pc unless (pc == "dataset") || (pc == "test") || (pc == "training") - #[false, true].each { |del_missing| - [false].each { |del_missing| #false is default - begin - create_f_ds(ds[dataset]["dataset"], ds[dataset][pc], del_missing) unless (pc == "dataset") || (pc == "test") || (pc == "training") - rescue - end - } + if !((pc == "dataset") || (pc == "test") || (pc == "training") || (pc == "hybrid")) + puts pc + #[false, true].each { |del_missing| + [false].each { |del_missing| #false is default + begin + result = create_f_ds(ds[dataset]["dataset"], ds[dataset][pc], del_missing) + @dataset << " \"#{pc}\": \"#{result}\"," + rescue + end + } + else + @dataset << " \"#{pc}\": \"#{ds[dataset][pc]}\"," + + end puts "-----------------" unless pc == "dataset" } + @dataset << "}," } + +puts @dataset + |