summaryrefslogtreecommitdiff
path: root/lib/feature_selection.rb
blob: 65f975252583aa110f00c019c033c2b16a159844 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
module OpenTox
  module Algorithm
    
    class FeatureSelection

      def self.correlation_filter model
        relevant_features = {}
        R.assign "dependent", model.dependent_variables.collect{|v| to_r(v)}
        model.descriptor_weights = []
        selected_variables = []
        selected_descriptor_ids = []
        model.independent_variables.each_with_index do |v,i|
          v.collect!{|n| to_r(n)}
          R.assign "independent", v
          begin
            R.eval "cor <- cor.test(dependent,independent,method = 'pearson',use='pairwise')"
            pvalue = R.eval("cor$p.value").to_ruby
            if pvalue <= 0.05
              model.descriptor_weights << R.eval("cor$estimate").to_ruby**2
              selected_variables << v
              selected_descriptor_ids << model.descriptor_ids[i]
            end
          rescue
            warn "Correlation of '#{model.prediction_feature.name}' (#{model.dependent_variables}) with (#{v}) failed."
          end
        end

        model.independent_variables = selected_variables
        model.descriptor_ids = selected_descriptor_ids
        model
      end

      def self.to_r v
        return 0 if v == false
        return 1 if v == true
        v
      end

    end

  end
end