diff options
author | gebele <gebele@in-silico.ch> | 2016-12-12 09:15:48 +0000 |
---|---|---|
committer | gebele <gebele@in-silico.ch> | 2016-12-12 09:15:48 +0000 |
commit | da086fad5b45c0d7b59feb40d0108ac620613933 (patch) | |
tree | 7e9cf8c9332e30552ab255ee9b30e04e904977b4 /lib/feature_selection.rb | |
parent | 32a16d99b51642cac8e75f90c43753d8d05ab770 (diff) | |
parent | 4570f11444bc10da88d849e9a2812e95a8933c8a (diff) |
merged development
Diffstat (limited to 'lib/feature_selection.rb')
-rw-r--r-- | lib/feature_selection.rb | 42 |
1 files changed, 42 insertions, 0 deletions
diff --git a/lib/feature_selection.rb b/lib/feature_selection.rb new file mode 100644 index 0000000..65f9752 --- /dev/null +++ b/lib/feature_selection.rb @@ -0,0 +1,42 @@ +module OpenTox + module Algorithm + + class FeatureSelection + + def self.correlation_filter model + relevant_features = {} + R.assign "dependent", model.dependent_variables.collect{|v| to_r(v)} + model.descriptor_weights = [] + selected_variables = [] + selected_descriptor_ids = [] + model.independent_variables.each_with_index do |v,i| + v.collect!{|n| to_r(n)} + R.assign "independent", v + begin + R.eval "cor <- cor.test(dependent,independent,method = 'pearson',use='pairwise')" + pvalue = R.eval("cor$p.value").to_ruby + if pvalue <= 0.05 + model.descriptor_weights << R.eval("cor$estimate").to_ruby**2 + selected_variables << v + selected_descriptor_ids << model.descriptor_ids[i] + end + rescue + warn "Correlation of '#{model.prediction_feature.name}' (#{model.dependent_variables}) with (#{v}) failed." + end + end + + model.independent_variables = selected_variables + model.descriptor_ids = selected_descriptor_ids + model + end + + def self.to_r v + return 0 if v == false + return 1 if v == true + v + end + + end + + end +end |