diff options
Diffstat (limited to 'lib/feature-selection.rb')
-rw-r--r-- | lib/feature-selection.rb | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/lib/feature-selection.rb b/lib/feature-selection.rb new file mode 100644 index 0000000..8601737 --- /dev/null +++ b/lib/feature-selection.rb @@ -0,0 +1,56 @@ +module OpenTox + module Algorithm + + # Feature selection algorithms + module FeatureSelection + + class Supervised + + # Select features correlated to the models prediction feature + # @param [OpenTox::Model::Lazar] + def self.correlation_filter model + relevant_features = {} + R.assign "dependent", model.dependent_variables.collect{|v| to_r(v)} + model.descriptor_weights = [] + selected_variables = [] + selected_descriptor_ids = [] + model.independent_variables.each_with_index do |v,i| + v.collect!{|n| to_r(n)} + R.assign "independent", v + begin + R.eval "cor <- cor.test(dependent,independent,method = 'pearson',use='pairwise')" + pvalue = R.eval("cor$p.value").to_ruby + if pvalue <= 0.05 + model.descriptor_weights << R.eval("cor$estimate").to_ruby**2 + selected_variables << v + selected_descriptor_ids << model.descriptor_ids[i] + end + rescue + warn "Correlation of '#{model.prediction_feature.name}' (#{model.dependent_variables}) with (#{v}) failed." + end + end + + model.independent_variables = selected_variables + model.descriptor_ids = selected_descriptor_ids + model + end + + def self.to_r v + return 0 if v == false + return 1 if v == true + v + end + + end + + class Unsupervised + # Select features correlated to the models prediction feature + # @param [OpenTox::Model::Lazar] + def self.nonredundant independent_variables + end + end + + end + + end +end |