summaryrefslogtreecommitdiff
path: root/lib/feature-selection.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/feature-selection.rb')
-rw-r--r--lib/feature-selection.rb56
1 files changed, 56 insertions, 0 deletions
diff --git a/lib/feature-selection.rb b/lib/feature-selection.rb
new file mode 100644
index 0000000..8601737
--- /dev/null
+++ b/lib/feature-selection.rb
@@ -0,0 +1,56 @@
+module OpenTox
+ module Algorithm
+
+ # Feature selection algorithms
+ module FeatureSelection
+
+ class Supervised
+
+ # Select features correlated to the models prediction feature
+ # @param [OpenTox::Model::Lazar]
+ def self.correlation_filter model
+ relevant_features = {}
+ R.assign "dependent", model.dependent_variables.collect{|v| to_r(v)}
+ model.descriptor_weights = []
+ selected_variables = []
+ selected_descriptor_ids = []
+ model.independent_variables.each_with_index do |v,i|
+ v.collect!{|n| to_r(n)}
+ R.assign "independent", v
+ begin
+ R.eval "cor <- cor.test(dependent,independent,method = 'pearson',use='pairwise')"
+ pvalue = R.eval("cor$p.value").to_ruby
+ if pvalue <= 0.05
+ model.descriptor_weights << R.eval("cor$estimate").to_ruby**2
+ selected_variables << v
+ selected_descriptor_ids << model.descriptor_ids[i]
+ end
+ rescue
+ warn "Correlation of '#{model.prediction_feature.name}' (#{model.dependent_variables}) with (#{v}) failed."
+ end
+ end
+
+ model.independent_variables = selected_variables
+ model.descriptor_ids = selected_descriptor_ids
+ model
+ end
+
+ def self.to_r v
+ return 0 if v == false
+ return 1 if v == true
+ v
+ end
+
+ end
+
+ class Unsupervised
+ # Select features correlated to the models prediction feature
+ # @param [OpenTox::Model::Lazar]
+ def self.nonredundant independent_variables
+ end
+ end
+
+ end
+
+ end
+end