summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordavor <vorgrimmlerdavid@gmx.de>2012-03-20 13:22:38 +0100
committerdavor <vorgrimmlerdavid@gmx.de>2012-03-20 13:22:38 +0100
commitad84654d513b68821496d2e08c1a399cf142910b (patch)
tree442fc645d88cb7961d22f66658da4786669cd41c
parent8a4cdc83b210ad8087ca89235337e6b9b40cefb1 (diff)
Merged back to d7c0289 but includes fixes.
Moved back to setting with best performance (see table 18, http://goo.gl/uGMJN).
-rw-r--r--lib/algorithm.rb9
-rw-r--r--lib/parser.rb27
-rw-r--r--lib/utils.rb8
3 files changed, 5 insertions, 39 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 19666ad..ebd2019 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -517,16 +517,9 @@ module OpenTox
# Recursive feature elimination
rfProfile = rfe( x=features, y=y, rfeControl=rfeControl(functions=rfFuncs, number=150), sizes=subsets)
- optVar = rfProfile$optVariables
- if (rfProfile$bestSubset == dim(features)[2]) {
- newRMSE = rfProfile$results$RMSE
- newRMSE[which.min(rfProfile$results$RMSE)] = Inf
- newOptSize = rfProfile$results[which.min(newRMSE),]$Variables
- optVar = rfProfile$optVariables[1:newOptSize]
- }
# read existing dataset and select most useful features
- csv=feats[,c("SMILES", optVar)]
+ csv=feats[,c("SMILES", rfProfile$optVariables)]
write.csv(x=csv,file=f_fds_r, row.names=F, quote=F, na='')
EOR
r_result_file
diff --git a/lib/parser.rb b/lib/parser.rb
index f11ed48..e871323 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -353,11 +353,11 @@ module OpenTox
# @param [Boolean] all_numeric Whether all features should be treated as numeric
# @param [Boolean] del_nominal All nominal features will be removed
# @return [OpenTox::Dataset] Dataset object with CSV data
- def load_csv(csv, drop_missing=false, all_numeric=false, del_nominal=false)
-
+ def load_csv(csv, drop_missing=false, all_numeric=false)
row = 0
input = csv.split("\n")
headers = split_row(input.shift)
+ add_features(headers)
value_maps = Array.new
regression_features=Array.new
@@ -372,29 +372,6 @@ module OpenTox
end
}
}
-
- if del_nominal
- # Collect all nominal features
- del_features = Array.new
- regression_features.each_with_index {|value, i|
- if value == false
- del_features << i
- end
- }
- del_features = del_features.reverse
- # Delete nominal entries in each row
- del_features.each{|f|
- regression_features.delete_at(f)
- headers.delete_at(f+1)
- input.map!{|row|
- new_row = split_row(row)
- new_row.delete_at(f+1)
- row = new_row.join(",")
- }
- }
- end
-
- add_features(headers)
input.each_with_index { |row, i|
drop=false
diff --git a/lib/utils.rb b/lib/utils.rb
index eccec46..40988db 100644
--- a/lib/utils.rb
+++ b/lib/utils.rb
@@ -55,16 +55,12 @@ module OpenTox
master = jl_master if jl_master
master = ambit_master if ambit_master
end
-
+
parser = OpenTox::Parser::Spreadsheets.new
ds = OpenTox::Dataset.new
ds.save
parser.dataset = ds
- if compounds.size < 4
- ds = parser.load_csv(master.collect{|r| r.join(",")}.join("\n"),false,true,false) # all_numeric = true, all features should be treated as numeric
- else
- ds = parser.load_csv(master.collect{|r| r.join(",")}.join("\n"),false,false,true) # del_nominal = true, which removes nominal features
- end
+ ds = parser.load_csv(master.collect{|r| r.join(",")}.join("\n"),false,true)
ds.save
rescue Exception => e