summaryrefslogtreecommitdiff
path: root/lib/feature_selection.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2021-02-12 19:54:07 +0100
committerChristoph Helma <helma@in-silico.ch>2021-02-12 19:54:07 +0100
commita29eb3e38414cd252850c9c4fb356f8b2bef6fb4 (patch)
treea957d9ac455e7345c51f3ab6075698f552c497d1 /lib/feature_selection.rb
parent158e9a7ecbc467c3db77c354f203b1176b0fc3f2 (diff)
model.rb refactored, mp2d models updated
Diffstat (limited to 'lib/feature_selection.rb')
-rw-r--r--lib/feature_selection.rb45
1 files changed, 0 insertions, 45 deletions
diff --git a/lib/feature_selection.rb b/lib/feature_selection.rb
deleted file mode 100644
index c596b1f..0000000
--- a/lib/feature_selection.rb
+++ /dev/null
@@ -1,45 +0,0 @@
-module OpenTox
- module Algorithm
-
- # Feature selection algorithms
- class FeatureSelection
-
- # Select features correlated to the models prediction feature
- # @param [OpenTox::Model::Lazar]
- def self.correlation_filter model
- relevant_features = {}
- R.assign "dependent", model.dependent_variables.collect{|v| to_r(v)}
- model.descriptor_weights = []
- selected_variables = []
- selected_descriptor_ids = []
- model.independent_variables.each_with_index do |v,i|
- v.collect!{|n| to_r(n)}
- R.assign "independent", v
- begin
- R.eval "cor <- cor.test(dependent,independent,method = 'pearson',use='pairwise')"
- pvalue = R.eval("cor$p.value").to_ruby
- if pvalue <= 0.05
- model.descriptor_weights << R.eval("cor$estimate").to_ruby**2
- selected_variables << v
- selected_descriptor_ids << model.descriptor_ids[i]
- end
- rescue
- warn "Correlation of '#{model.prediction_feature.name}' (#{model.dependent_variables}) with (#{v}) failed."
- end
- end
-
- model.independent_variables = selected_variables
- model.descriptor_ids = selected_descriptor_ids
- model
- end
-
- def self.to_r v
- return 0 if v == false
- return 1 if v == true
- v
- end
-
- end
-
- end
-end