blob: c596b1fff878baf8081627d05f5b60ef92e9b819 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
|
module OpenTox
module Algorithm
# Feature selection algorithms
class FeatureSelection
# Select features correlated to the models prediction feature
# @param [OpenTox::Model::Lazar]
def self.correlation_filter model
relevant_features = {}
R.assign "dependent", model.dependent_variables.collect{|v| to_r(v)}
model.descriptor_weights = []
selected_variables = []
selected_descriptor_ids = []
model.independent_variables.each_with_index do |v,i|
v.collect!{|n| to_r(n)}
R.assign "independent", v
begin
R.eval "cor <- cor.test(dependent,independent,method = 'pearson',use='pairwise')"
pvalue = R.eval("cor$p.value").to_ruby
if pvalue <= 0.05
model.descriptor_weights << R.eval("cor$estimate").to_ruby**2
selected_variables << v
selected_descriptor_ids << model.descriptor_ids[i]
end
rescue
warn "Correlation of '#{model.prediction_feature.name}' (#{model.dependent_variables}) with (#{v}) failed."
end
end
model.independent_variables = selected_variables
model.descriptor_ids = selected_descriptor_ids
model
end
def self.to_r v
return 0 if v == false
return 1 if v == true
v
end
end
end
end
|