summaryrefslogtreecommitdiff
path: root/feature-filter.rb
diff options
context:
space:
mode:
authorgebele <gebele@in-silico.ch>2016-01-22 23:18:56 +0100
committergebele <gebele@in-silico.ch>2016-01-22 23:18:56 +0100
commit88ac5fbe3d8d3141fbad81460b13d6cb8284da26 (patch)
tree783f9da714155a5848c84905159a51b002c6a80c /feature-filter.rb
parent09b750e1639d351d24cff3cca74681c761b17503 (diff)
refined prediction
Diffstat (limited to 'feature-filter.rb')
-rw-r--r--feature-filter.rb36
1 files changed, 36 insertions, 0 deletions
diff --git a/feature-filter.rb b/feature-filter.rb
new file mode 100644
index 0000000..3765842
--- /dev/null
+++ b/feature-filter.rb
@@ -0,0 +1,36 @@
+require 'rserve'
+require 'json'
+require 'yaml'
+require 'csv'
+
+R = Rserve::Connection.new
+ENDPOINT = "Cell.association (Net cell association [mL/ug(Mg)])"
+
+def feature_filter
+ data = JSON.parse(File.read("./data.json"))
+ features = data["G15.AC"]["physchem"].keys
+ R.assign "tox", data.collect{|id,cats| cats["tox"][ENDPOINT]}
+ filtered_features = {}
+ features.each do |feature|
+ R.assign "feature", data.collect{|id,cats| cats["physchem"][feature]}
+ begin
+ #R.eval "cor <- cor.test(-log(tox),-log(feature),use='complete')"
+ R.eval "cor <- cor.test(tox,feature,method = 'pearson',use='complete')"
+ pvalue = R.eval("cor$p.value").to_ruby
+ if pvalue <= 0.05
+ r = R.eval("cor$estimate").to_ruby
+ filtered_features[feature] = {}
+ filtered_features[feature]["pvalue"] = pvalue
+ filtered_features[feature]["r"] = r
+ end
+ rescue
+ f = data.collect{|id,cats| cats["physchem"][feature]}
+ f = R.eval("feature").to_ruby
+ p f.collect{|f| p f; Math.log f}
+ p R.eval("log(feature)").to_ruby
+ end
+ end
+ filtered_features.sort{|a,b| a[1]["pvalue"] <=> b[1]["pvalue"]}.to_h
+end
+
+puts feature_filter.to_json