summaryrefslogtreecommitdiff
path: root/scripts/pa-table.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2021-02-06 20:21:58 +0100
committerChristoph Helma <helma@in-silico.ch>2021-02-06 20:21:58 +0100
commit771a2381ae0fd5e352f23d7223baeb26e8bb4e02 (patch)
tree1a7182a83ec15b9d7a69e5508c8d3b2bfa5983df /scripts/pa-table.rb
parent241f997c3a1a6a38fa47070f5efbd23852cc432b (diff)
svm validation, tensorflow mp2d pa predictions, r results removed, cleanup
Diffstat (limited to 'scripts/pa-table.rb')
-rwxr-xr-xscripts/pa-table.rb28
1 files changed, 21 insertions, 7 deletions
diff --git a/scripts/pa-table.rb b/scripts/pa-table.rb
index 8c1037e..1b8ecc8 100755
--- a/scripts/pa-table.rb
+++ b/scripts/pa-table.rb
@@ -2,7 +2,7 @@
# red groups
tab = []
-File.read("pyrrolizidine-alkaloids/R/PA.RF.outcome.csv").each_line do |l|
+File.read("data/pyrrolizidine-alkaloids/pa-groups.csv").each_line do |l|
items = l.chomp.split(';')
if items.first.empty?
items[0] = "ID"
@@ -17,7 +17,7 @@ end
tab[0] += ["CID","SMILES","Canonical SMILES","Measured","lazar-MP2D","lazar-MP2D-high-confidence","lazar-CDK","lazar-CDK-high-confidence"]
i = 0
-File.read("pyrrolizidine-alkaloids/180920_PA_complete_SMILES.csv").each_line do |l|
+File.read("data/pyrrolizidine-alkaloids/180920_PA_complete_SMILES.csv").each_line do |l|
if i > 0
id,cid,name,smi = l.chomp.split(";")
tab[i] += [cid,'"'+smi+'"']
@@ -26,7 +26,7 @@ File.read("pyrrolizidine-alkaloids/180920_PA_complete_SMILES.csv").each_line do
end
i = 0
-File.read("pyrrolizidine-alkaloids/lazar/pa-mp2d-predictions.csv").each_line do |l|
+File.read("pyrrolizidine-alkaloids/mp2d/lazar/pa-mp2d-predictions.csv").each_line do |l|
if i > 0
id,cansmi,exp,mut,p0,p1,max_sim,nn = l.chomp.split(",")
max_sim.to_f < 0.5? hc = "F" : hc = "T"
@@ -36,8 +36,8 @@ File.read("pyrrolizidine-alkaloids/lazar/pa-mp2d-predictions.csv").each_line do
i += 1
end
-i=0
-File.read("pyrrolizidine-alkaloids/lazar/pa-cdk-predictions.csv").each_line do |l|
+i=1
+File.read("pyrrolizidine-alkaloids/cdk/lazar/pa-cdk-predictions.csv").each_line do |l|
#if i > 0
cansmi,exp,mut,p0,p1,max_sim,nn = l.chomp.split(",")
max_sim.to_f < 0.5? hc = "F" : hc = "T"
@@ -47,6 +47,7 @@ File.read("pyrrolizidine-alkaloids/lazar/pa-cdk-predictions.csv").each_line do |
i += 1
end
+=begin
Dir["pyrrolizidine-alkaloids/R/PA.*.outcome.csv"].each do |r|
tab[0] << "R-"+r.sub('pyrrolizidine-alkaloids/R/PA.','').sub('.outcome.csv','')
i = 0
@@ -63,9 +64,22 @@ Dir["pyrrolizidine-alkaloids/R/PA.*.outcome.csv"].each do |r|
i += 1
end
end
+=end
-Dir["pyrrolizidine-alkaloids/tensorflow/pred.*.v5-ext-Padel-2D.csv"].each do |r|
- tab[0] << "TF-"+r.sub('pyrrolizidine-alkaloids/tensorflow/pred.','').sub('.v5-ext-Padel-2D.csv','').sub("lr2","LR-scikit").sub("lr","LR-sgd").sub("rf","RF").sub("nn","NN")
+Dir["pyrrolizidine-alkaloids/cdk/tensorflow/*.csv"].each do |r|
+ tab[0] << "TF-"+r.sub('pyrrolizidine-alkaloids/tensorflow/pred.','').sub('.v5-ext-Padel-2D.csv','').sub("lr2","LR-scikit").sub("lr","LR-sgd").sub("rf","RF").sub("nn","NN").sub("svm","SVM")
+ i = 0
+ File.read(r).each_line do |l|
+ if i > 0
+ id,pred = l.chomp.split(",")
+ pred.to_f > 0.5 ? tab[i] << 1 : tab[i] << 0
+ end
+ i += 1
+ end
+end
+
+Dir["pyrrolizidine-alkaloids/cdk/tensorflow/*.csv"].each do |r|
+ tab[0] << "TF-"+r.sub('pyrrolizidine-alkaloids/tensorflow/pred.','').sub('.v5-ext-Padel-2D.csv','').sub("lr2","LR-scikit").sub("lr","LR-sgd").sub("rf","RF").sub("nn","NN").sub("svm","SVM")
i = 0
File.read(r).each_line do |l|
if i > 0