diff options
author | davor <vorgrimmlerdavid@gmx.de> | 2012-03-20 13:22:38 +0100 |
---|---|---|
committer | davor <vorgrimmlerdavid@gmx.de> | 2012-03-20 13:22:38 +0100 |
commit | ad84654d513b68821496d2e08c1a399cf142910b (patch) | |
tree | 442fc645d88cb7961d22f66658da4786669cd41c | |
parent | 8a4cdc83b210ad8087ca89235337e6b9b40cefb1 (diff) |
Merged back to d7c0289 but includes fixes.
Moved back to setting with best performance (see table 18,
http://goo.gl/uGMJN).
-rw-r--r-- | lib/algorithm.rb | 9 | ||||
-rw-r--r-- | lib/parser.rb | 27 | ||||
-rw-r--r-- | lib/utils.rb | 8 |
3 files changed, 5 insertions, 39 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 19666ad..ebd2019 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -517,16 +517,9 @@ module OpenTox # Recursive feature elimination rfProfile = rfe( x=features, y=y, rfeControl=rfeControl(functions=rfFuncs, number=150), sizes=subsets) - optVar = rfProfile$optVariables - if (rfProfile$bestSubset == dim(features)[2]) { - newRMSE = rfProfile$results$RMSE - newRMSE[which.min(rfProfile$results$RMSE)] = Inf - newOptSize = rfProfile$results[which.min(newRMSE),]$Variables - optVar = rfProfile$optVariables[1:newOptSize] - } # read existing dataset and select most useful features - csv=feats[,c("SMILES", optVar)] + csv=feats[,c("SMILES", rfProfile$optVariables)] write.csv(x=csv,file=f_fds_r, row.names=F, quote=F, na='') EOR r_result_file diff --git a/lib/parser.rb b/lib/parser.rb index f11ed48..e871323 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -353,11 +353,11 @@ module OpenTox # @param [Boolean] all_numeric Whether all features should be treated as numeric # @param [Boolean] del_nominal All nominal features will be removed # @return [OpenTox::Dataset] Dataset object with CSV data - def load_csv(csv, drop_missing=false, all_numeric=false, del_nominal=false) - + def load_csv(csv, drop_missing=false, all_numeric=false) row = 0 input = csv.split("\n") headers = split_row(input.shift) + add_features(headers) value_maps = Array.new regression_features=Array.new @@ -372,29 +372,6 @@ module OpenTox end } } - - if del_nominal - # Collect all nominal features - del_features = Array.new - regression_features.each_with_index {|value, i| - if value == false - del_features << i - end - } - del_features = del_features.reverse - # Delete nominal entries in each row - del_features.each{|f| - regression_features.delete_at(f) - headers.delete_at(f+1) - input.map!{|row| - new_row = split_row(row) - new_row.delete_at(f+1) - row = new_row.join(",") - } - } - end - - add_features(headers) input.each_with_index { |row, i| drop=false diff --git a/lib/utils.rb b/lib/utils.rb index eccec46..40988db 100644 --- a/lib/utils.rb +++ b/lib/utils.rb @@ -55,16 +55,12 @@ module OpenTox master = jl_master if jl_master master = ambit_master if ambit_master end - + parser = OpenTox::Parser::Spreadsheets.new ds = OpenTox::Dataset.new ds.save parser.dataset = ds - if compounds.size < 4 - ds = parser.load_csv(master.collect{|r| r.join(",")}.join("\n"),false,true,false) # all_numeric = true, all features should be treated as numeric - else - ds = parser.load_csv(master.collect{|r| r.join(",")}.join("\n"),false,false,true) # del_nominal = true, which removes nominal features - end + ds = parser.load_csv(master.collect{|r| r.join(",")}.join("\n"),false,true) ds.save rescue Exception => e |