diff options
author | Christoph Helma <helma@in-silico.ch> | 2021-02-06 20:21:58 +0100 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2021-02-06 20:21:58 +0100 |
commit | 771a2381ae0fd5e352f23d7223baeb26e8bb4e02 (patch) | |
tree | 1a7182a83ec15b9d7a69e5508c8d3b2bfa5983df /scripts/pa-table.rb | |
parent | 241f997c3a1a6a38fa47070f5efbd23852cc432b (diff) |
svm validation, tensorflow mp2d pa predictions, r results removed, cleanup
Diffstat (limited to 'scripts/pa-table.rb')
-rwxr-xr-x | scripts/pa-table.rb | 28 |
1 files changed, 21 insertions, 7 deletions
diff --git a/scripts/pa-table.rb b/scripts/pa-table.rb index 8c1037e..1b8ecc8 100755 --- a/scripts/pa-table.rb +++ b/scripts/pa-table.rb @@ -2,7 +2,7 @@ # red groups tab = [] -File.read("pyrrolizidine-alkaloids/R/PA.RF.outcome.csv").each_line do |l| +File.read("data/pyrrolizidine-alkaloids/pa-groups.csv").each_line do |l| items = l.chomp.split(';') if items.first.empty? items[0] = "ID" @@ -17,7 +17,7 @@ end tab[0] += ["CID","SMILES","Canonical SMILES","Measured","lazar-MP2D","lazar-MP2D-high-confidence","lazar-CDK","lazar-CDK-high-confidence"] i = 0 -File.read("pyrrolizidine-alkaloids/180920_PA_complete_SMILES.csv").each_line do |l| +File.read("data/pyrrolizidine-alkaloids/180920_PA_complete_SMILES.csv").each_line do |l| if i > 0 id,cid,name,smi = l.chomp.split(";") tab[i] += [cid,'"'+smi+'"'] @@ -26,7 +26,7 @@ File.read("pyrrolizidine-alkaloids/180920_PA_complete_SMILES.csv").each_line do end i = 0 -File.read("pyrrolizidine-alkaloids/lazar/pa-mp2d-predictions.csv").each_line do |l| +File.read("pyrrolizidine-alkaloids/mp2d/lazar/pa-mp2d-predictions.csv").each_line do |l| if i > 0 id,cansmi,exp,mut,p0,p1,max_sim,nn = l.chomp.split(",") max_sim.to_f < 0.5? hc = "F" : hc = "T" @@ -36,8 +36,8 @@ File.read("pyrrolizidine-alkaloids/lazar/pa-mp2d-predictions.csv").each_line do i += 1 end -i=0 -File.read("pyrrolizidine-alkaloids/lazar/pa-cdk-predictions.csv").each_line do |l| +i=1 +File.read("pyrrolizidine-alkaloids/cdk/lazar/pa-cdk-predictions.csv").each_line do |l| #if i > 0 cansmi,exp,mut,p0,p1,max_sim,nn = l.chomp.split(",") max_sim.to_f < 0.5? hc = "F" : hc = "T" @@ -47,6 +47,7 @@ File.read("pyrrolizidine-alkaloids/lazar/pa-cdk-predictions.csv").each_line do | i += 1 end +=begin Dir["pyrrolizidine-alkaloids/R/PA.*.outcome.csv"].each do |r| tab[0] << "R-"+r.sub('pyrrolizidine-alkaloids/R/PA.','').sub('.outcome.csv','') i = 0 @@ -63,9 +64,22 @@ Dir["pyrrolizidine-alkaloids/R/PA.*.outcome.csv"].each do |r| i += 1 end end +=end -Dir["pyrrolizidine-alkaloids/tensorflow/pred.*.v5-ext-Padel-2D.csv"].each do |r| - tab[0] << "TF-"+r.sub('pyrrolizidine-alkaloids/tensorflow/pred.','').sub('.v5-ext-Padel-2D.csv','').sub("lr2","LR-scikit").sub("lr","LR-sgd").sub("rf","RF").sub("nn","NN") +Dir["pyrrolizidine-alkaloids/cdk/tensorflow/*.csv"].each do |r| + tab[0] << "TF-"+r.sub('pyrrolizidine-alkaloids/tensorflow/pred.','').sub('.v5-ext-Padel-2D.csv','').sub("lr2","LR-scikit").sub("lr","LR-sgd").sub("rf","RF").sub("nn","NN").sub("svm","SVM") + i = 0 + File.read(r).each_line do |l| + if i > 0 + id,pred = l.chomp.split(",") + pred.to_f > 0.5 ? tab[i] << 1 : tab[i] << 0 + end + i += 1 + end +end + +Dir["pyrrolizidine-alkaloids/cdk/tensorflow/*.csv"].each do |r| + tab[0] << "TF-"+r.sub('pyrrolizidine-alkaloids/tensorflow/pred.','').sub('.v5-ext-Padel-2D.csv','').sub("lr2","LR-scikit").sub("lr","LR-sgd").sub("rf","RF").sub("nn","NN").sub("svm","SVM") i = 0 File.read(r).each_line do |l| if i > 0 |