diff options
author | Andreas Maunz <andreas@maunz.de> | 2012-03-05 13:21:57 +0100 |
---|---|---|
committer | Andreas Maunz <andreas@maunz.de> | 2012-03-05 13:21:57 +0100 |
commit | 377663911a0ac3ad8193c9dbc5544e748c31bf49 (patch) | |
tree | c674c1593f94cb0e6e431394d8bad25890c6f331 | |
parent | bf1792b0353f0af5bf5f5383d193e315a8968245 (diff) |
RFE selects proper subset of features
-rw-r--r-- | lib/algorithm.rb | 12 | ||||
-rw-r--r-- | lib/parser.rb | 4 |
2 files changed, 12 insertions, 4 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb index f57954d..a367656 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -505,10 +505,18 @@ module OpenTox subsets = subsets[subsets>1] # Recursive feature elimination - rfProfile = rfe( x=features, y=y, rfeControl=rfeControl(functions=rfFuncs, number=150), sizes=subsets) + rfProfile = rfe( x=features, y=y, rfeControl=rfeControl(functions=rfFuncs, number=50), sizes=subsets) + save.image('/tmp/testam.R') # TODO: remove DBG + optVar = rfProfile$optVariables + if (rfProfile$bestSubset == dim(features)[2]) { + newRMSE = rfProfile$results$RMSE + newRMSE[which.min(rfProfile$results$RMSE)] = Inf + newOptSize = rfProfile$results[which.min(newRMSE),]$Variables + optVar = rfProfile$Variables(1:newOptSize) + } # read existing dataset and select most useful features - csv=feats[,c("SMILES", rfProfile$optVariables)] + csv=feats[,c("SMILES", optVar)] write.csv(x=csv,file=f_fds_r, row.names=F, quote=F, na='') EOR r_result_file diff --git a/lib/parser.rb b/lib/parser.rb index a7caf9d..53765b1 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -373,7 +373,7 @@ module OpenTox } input.each_with_index { |row, i| - drop = false + drop=false row = split_row(row) raise "Entry has size #{row.size}, different from headers (#{headers.size})" if row.size != headers.size if row.include?("") @@ -653,7 +653,7 @@ module OpenTox table.data[compound.uri] = row end - # finda and remove ignored_features + # find and remove ignored_features @activity_errors = table.clean_features table.add_to_dataset @dataset |