summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordavor <vorgrimmlerdavid@gmx.de>2012-03-07 17:07:44 +0100
committerdavor <vorgrimmlerdavid@gmx.de>2012-03-07 17:07:44 +0100
commit9245dbb13a6add795b66053bc4d2a0485dee496b (patch)
tree4a807980dab918543bf5e142ea4f7087ed3f6793
parent90c012d4567da61758d43616a18c4fe306bbdf39 (diff)
Added del_nominal switch to load_csv which removes all nominal features
(del_nomnial is false by default).
-rw-r--r--lib/parser.rb28
-rw-r--r--lib/utils.rb2
2 files changed, 27 insertions, 3 deletions
diff --git a/lib/parser.rb b/lib/parser.rb
index 53765b1..f11ed48 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -351,12 +351,13 @@ module OpenTox
# @param [String] csv CSV representation of the dataset
# @param [Boolean] drop_missing Whether completely missing rows should be droppped
# @param [Boolean] all_numeric Whether all features should be treated as numeric
+ # @param [Boolean] del_nominal All nominal features will be removed
# @return [OpenTox::Dataset] Dataset object with CSV data
- def load_csv(csv, drop_missing=false, all_numeric=false)
+ def load_csv(csv, drop_missing=false, all_numeric=false, del_nominal=false)
+
row = 0
input = csv.split("\n")
headers = split_row(input.shift)
- add_features(headers)
value_maps = Array.new
regression_features=Array.new
@@ -371,6 +372,29 @@ module OpenTox
end
}
}
+
+ if del_nominal
+ # Collect all nominal features
+ del_features = Array.new
+ regression_features.each_with_index {|value, i|
+ if value == false
+ del_features << i
+ end
+ }
+ del_features = del_features.reverse
+ # Delete nominal entries in each row
+ del_features.each{|f|
+ regression_features.delete_at(f)
+ headers.delete_at(f+1)
+ input.map!{|row|
+ new_row = split_row(row)
+ new_row.delete_at(f+1)
+ row = new_row.join(",")
+ }
+ }
+ end
+
+ add_features(headers)
input.each_with_index { |row, i|
drop=false
diff --git a/lib/utils.rb b/lib/utils.rb
index 40988db..c9dd32a 100644
--- a/lib/utils.rb
+++ b/lib/utils.rb
@@ -60,7 +60,7 @@ module OpenTox
ds = OpenTox::Dataset.new
ds.save
parser.dataset = ds
- ds = parser.load_csv(master.collect{|r| r.join(",")}.join("\n"),false,true)
+ ds = parser.load_csv(master.collect{|r| r.join(",")}.join("\n"),false,false,true)
ds.save
rescue Exception => e