summaryrefslogtreecommitdiff
path: root/cv/data/utils/count_features.rb
diff options
context:
space:
mode:
Diffstat (limited to 'cv/data/utils/count_features.rb')
-rw-r--r--cv/data/utils/count_features.rb48
1 files changed, 48 insertions, 0 deletions
diff --git a/cv/data/utils/count_features.rb b/cv/data/utils/count_features.rb
new file mode 100644
index 0000000..1e272ac
--- /dev/null
+++ b/cv/data/utils/count_features.rb
@@ -0,0 +1,48 @@
+require 'rubygems'
+require 'opentox-ruby'
+require 'yaml'
+
+@subjectid = nil
+
+
+
+def count_features(ds_uri)
+ puts ds_uri
+
+ dataset = OpenTox::Dataset.find(ds_uri, @subjectid)
+
+ features = dataset.features.keys
+ puts "# all features: #{features.size}"
+
+ delete_features = []
+ features.each{ |fn|
+ dataset.features[fn][RDF.type].each { |typestr|
+ if typestr.include? "MissingFeature"
+ delete_features << fn
+ @missing_features << dataset.features[fn][DC.title]
+ end
+ }
+ }
+ puts "# Missingfeatures: #{delete_features.size}"
+ features = features - delete_features
+ puts "# numeric features: #{features.size}"
+ puts "-----"
+end
+
+
+@missing_features = []
+
+ds = YAML::load_file("../datasets.yaml")
+ds.keys.each { |dataset|
+ puts "----------"
+ puts dataset
+ ds[dataset].keys.each { |pc|
+ puts pc unless (pc == "dataset") || (pc == "test") || (pc == "training")
+ count_features(ds[dataset][pc]) unless (pc == "dataset") || (pc == "test") || (pc == "training")
+ }
+ puts "----------"
+ puts
+}
+puts
+puts "Missing features over all datasets:"
+puts @missing_features.uniq!.to_yaml