diff options
author | Christoph Helma <helma@in-silico.ch> | 2016-01-21 19:26:48 +0100 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2016-01-21 19:26:48 +0100 |
commit | d8f1e75ba45cb770f421fa950861c6ff502d64dd (patch) | |
tree | d489ad054770f4b1528fdb60f9a13a9e7de3a3bd /feature-filter.rb | |
parent | 9546c589f6852942ed85f8da1e12c351fb92e0f0 (diff) |
feature selection added
Diffstat (limited to 'feature-filter.rb')
-rw-r--r-- | feature-filter.rb | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/feature-filter.rb b/feature-filter.rb new file mode 100644 index 0000000..3765842 --- /dev/null +++ b/feature-filter.rb @@ -0,0 +1,36 @@ +require 'rserve' +require 'json' +require 'yaml' +require 'csv' + +R = Rserve::Connection.new +ENDPOINT = "Cell.association (Net cell association [mL/ug(Mg)])" + +def feature_filter + data = JSON.parse(File.read("./data.json")) + features = data["G15.AC"]["physchem"].keys + R.assign "tox", data.collect{|id,cats| cats["tox"][ENDPOINT]} + filtered_features = {} + features.each do |feature| + R.assign "feature", data.collect{|id,cats| cats["physchem"][feature]} + begin + #R.eval "cor <- cor.test(-log(tox),-log(feature),use='complete')" + R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='complete')" + pvalue = R.eval("cor$p.value").to_ruby + if pvalue <= 0.05 + r = R.eval("cor$estimate").to_ruby + filtered_features[feature] = {} + filtered_features[feature]["pvalue"] = pvalue + filtered_features[feature]["r"] = r + end + rescue + f = data.collect{|id,cats| cats["physchem"][feature]} + f = R.eval("feature").to_ruby + p f.collect{|f| p f; Math.log f} + p R.eval("log(feature)").to_ruby + end + end + filtered_features.sort{|a,b| a[1]["pvalue"] <=> b[1]["pvalue"]}.to_h +end + +puts feature_filter.to_json |