From 5d4e5e463c2b87241bbb56e4658e1e26c0ed084f Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 5 Oct 2016 13:22:12 +0200 Subject: substance and nanoparticle model creation and predictions --- lib/feature_selection.rb | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 lib/feature_selection.rb (limited to 'lib/feature_selection.rb') diff --git a/lib/feature_selection.rb b/lib/feature_selection.rb new file mode 100644 index 0000000..43e3bea --- /dev/null +++ b/lib/feature_selection.rb @@ -0,0 +1,46 @@ +module OpenTox + module Algorithm + + class FeatureSelection + + def self.correlation_filter dataset:, prediction_feature:, types:nil + # TODO: speedup, single assignment of all features to R+ parallel computation of significance? + relevant_features = {} + measurements = [] + substances = [] + dataset.substances.each do |s| + dataset.values(s,prediction_feature).each do |act| + measurements << act + substances << s + end + end + R.assign "tox", measurements + feature_ids = dataset.substances.collect{ |s| s["properties"].keys}.flatten.uniq + feature_ids.select!{|fid| types.include? Feature.find(fid).category} if types + feature_ids.each do |feature_id| + feature_values = substances.collect{|s| s["properties"][feature_id].first if s["properties"][feature_id]} + unless feature_values.uniq.size == 1 + R.assign "feature", feature_values + begin + R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='pairwise')" + pvalue = R.eval("cor$p.value").to_ruby + if pvalue <= 0.05 + r = R.eval("cor$estimate").to_ruby + relevant_features[feature_id] = {} + relevant_features[feature_id]["pvalue"] = pvalue + relevant_features[feature_id]["r"] = r + relevant_features[feature_id]["mean"] = R.eval("mean(feature, na.rm=TRUE)").to_ruby + relevant_features[feature_id]["sd"] = R.eval("sd(feature, na.rm=TRUE)").to_ruby + end + rescue + warn "Correlation of '#{Feature.find(feature_id).name}' (#{feature_values}) with '#{Feature.find(prediction_feature_id).name}' (#{measurements}) failed." + end + end + end + relevant_features.sort{|a,b| a[1]["pvalue"] <=> b[1]["pvalue"]}.to_h + end + + end + + end +end -- cgit v1.2.3