RFE selects proper subset of features

author: Andreas Maunz <andreas@maunz.de> 2012-03-05 13:21:57 +0100
committer: Andreas Maunz <andreas@maunz.de> 2012-03-05 13:21:57 +0100
commit: 377663911a0ac3ad8193c9dbc5544e748c31bf49 (patch)
tree: c674c1593f94cb0e6e431394d8bad25890c6f331
parent: bf1792b0353f0af5bf5f5383d193e315a8968245 (diff)
2 files changed, 12 insertions, 4 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index f57954d..a367656 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -505,10 +505,18 @@ module OpenTox
           subsets = subsets[subsets>1] 
           
           # Recursive feature elimination
-          rfProfile = rfe( x=features, y=y, rfeControl=rfeControl(functions=rfFuncs, number=150), sizes=subsets)
+          rfProfile = rfe( x=features, y=y, rfeControl=rfeControl(functions=rfFuncs, number=50), sizes=subsets)
+          save.image('/tmp/testam.R') # TODO: remove DBG
+          optVar = rfProfile$optVariables
+          if (rfProfile$bestSubset == dim(features)[2]) {
+            newRMSE = rfProfile$results$RMSE
+            newRMSE[which.min(rfProfile$results$RMSE)] = Inf
+            newOptSize = rfProfile$results[which.min(newRMSE),]$Variables
+            optVar = rfProfile$Variables(1:newOptSize)
+          }
           
           # read existing dataset and select most useful features
-          csv=feats[,c("SMILES", rfProfile$optVariables)]
+          csv=feats[,c("SMILES", optVar)]
           write.csv(x=csv,file=f_fds_r, row.names=F, quote=F, na='')
         EOR
         r_result_file
diff --git a/lib/parser.rb b/lib/parser.rb
index a7caf9d..53765b1 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -373,7 +373,7 @@ module OpenTox
         }
 
         input.each_with_index { |row, i| 
-          drop = false
+          drop=false
           row = split_row(row)
           raise "Entry has size #{row.size}, different from headers (#{headers.size})" if row.size != headers.size
           if row.include?("")
@@ -653,7 +653,7 @@ module OpenTox
           table.data[compound.uri] = row
         end
 
-        # finda and remove ignored_features
+        # find and remove ignored_features
         @activity_errors = table.clean_features
         table.add_to_dataset @dataset
author	Andreas Maunz <andreas@maunz.de>	2012-03-05 13:21:57 +0100
committer	Andreas Maunz <andreas@maunz.de>	2012-03-05 13:21:57 +0100
commit	377663911a0ac3ad8193c9dbc5544e748c31bf49 (patch)
tree	c674c1593f94cb0e6e431394d8bad25890c6f331
parent	bf1792b0353f0af5bf5f5383d193e315a8968245 (diff)