summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Maunz <andreas@maunz.de>2012-03-05 13:21:57 +0100
committerAndreas Maunz <andreas@maunz.de>2012-03-05 13:21:57 +0100
commit377663911a0ac3ad8193c9dbc5544e748c31bf49 (patch)
treec674c1593f94cb0e6e431394d8bad25890c6f331
parentbf1792b0353f0af5bf5f5383d193e315a8968245 (diff)
RFE selects proper subset of features
-rw-r--r--lib/algorithm.rb12
-rw-r--r--lib/parser.rb4
2 files changed, 12 insertions, 4 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index f57954d..a367656 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -505,10 +505,18 @@ module OpenTox
subsets = subsets[subsets>1]
# Recursive feature elimination
- rfProfile = rfe( x=features, y=y, rfeControl=rfeControl(functions=rfFuncs, number=150), sizes=subsets)
+ rfProfile = rfe( x=features, y=y, rfeControl=rfeControl(functions=rfFuncs, number=50), sizes=subsets)
+ save.image('/tmp/testam.R') # TODO: remove DBG
+ optVar = rfProfile$optVariables
+ if (rfProfile$bestSubset == dim(features)[2]) {
+ newRMSE = rfProfile$results$RMSE
+ newRMSE[which.min(rfProfile$results$RMSE)] = Inf
+ newOptSize = rfProfile$results[which.min(newRMSE),]$Variables
+ optVar = rfProfile$Variables(1:newOptSize)
+ }
# read existing dataset and select most useful features
- csv=feats[,c("SMILES", rfProfile$optVariables)]
+ csv=feats[,c("SMILES", optVar)]
write.csv(x=csv,file=f_fds_r, row.names=F, quote=F, na='')
EOR
r_result_file
diff --git a/lib/parser.rb b/lib/parser.rb
index a7caf9d..53765b1 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -373,7 +373,7 @@ module OpenTox
}
input.each_with_index { |row, i|
- drop = false
+ drop=false
row = split_row(row)
raise "Entry has size #{row.size}, different from headers (#{headers.size})" if row.size != headers.size
if row.include?("")
@@ -653,7 +653,7 @@ module OpenTox
table.data[compound.uri] = row
end
- # finda and remove ignored_features
+ # find and remove ignored_features
@activity_errors = table.clean_features
table.add_to_dataset @dataset