summaryrefslogtreecommitdiff
path: root/lib/feature-selection.rb
blob: 8601737c4e8d025d6029242afb2c9dd6a47cfa29 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
module OpenTox
  module Algorithm
    
    # Feature selection algorithms
    module FeatureSelection

      class Supervised

        # Select features correlated to the models prediction feature
        # @param [OpenTox::Model::Lazar]
        def self.correlation_filter model
          relevant_features = {}
          R.assign "dependent", model.dependent_variables.collect{|v| to_r(v)}
          model.descriptor_weights = []
          selected_variables = [] 
          selected_descriptor_ids = []
          model.independent_variables.each_with_index do |v,i|
            v.collect!{|n| to_r(n)}
            R.assign "independent", v
            begin
              R.eval "cor <- cor.test(dependent,independent,method = 'pearson',use='pairwise')"
              pvalue = R.eval("cor$p.value").to_ruby
              if pvalue <= 0.05
                model.descriptor_weights << R.eval("cor$estimate").to_ruby**2
                selected_variables << v
                selected_descriptor_ids << model.descriptor_ids[i]
              end
            rescue
              warn "Correlation of '#{model.prediction_feature.name}' (#{model.dependent_variables}) with (#{v}) failed."
            end
          end

          model.independent_variables = selected_variables
          model.descriptor_ids = selected_descriptor_ids
          model
        end

        def self.to_r v
          return 0 if v == false
          return 1 if v == true
          v
        end

      end

      class Unsupervised
        # Select features correlated to the models prediction feature
        # @param [OpenTox::Model::Lazar]
        def self.nonredundant independent_variables
        end
      end

    end

  end
end