1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
|
#!/usr/bin/env ruby
header = ["ID","CID","Name","SMILES","Canonical SMILES","Measured","lazar-MP2D","lazar-MP2D-high-confidence","lazar-PaDEL","lazar-PaDEL-high-confidence"]
tab = []
i = 0
File.read("pyrrolizidine-alkaloids/180920_PA_complete_SMILES.csv").each_line do |l|
if i > 0
id,cid,name,smi = l.chomp.split(";")
name.sub!('1: ','')
tab << [id,cid,'"'+name+'"','"'+smi+'"']
end
i += 1
end
i = 0
File.read("pyrrolizidine-alkaloids/lazar/pa-mp2d-predictions.csv").each_line do |l|
if i > 0
id,cansmi,exp,mut,p0,p1,max_sim,nn = l.chomp.split(",")
max_sim.to_f < 0.5? hc = "F" : hc = "T"
hc = "" if mut.empty?
tab[i-1] += ['"'+cansmi+'"',exp,mut,hc]
end
i += 1
end
i=0
File.read("pyrrolizidine-alkaloids/lazar/pa-padel-predictions.csv").each_line do |l|
#if i > 0
cansmi,exp,mut,p0,p1,max_sim,nn = l.chomp.split(",")
max_sim.to_f < 0.5? hc = "F" : hc = "T"
hc = "" if mut.empty?
tab[i] += [mut,hc]
#end
i += 1
end
Dir["pyrrolizidine-alkaloids/R/PA.*.outcome.csv"].each do |r|
header << "R-"+r.sub('pyrrolizidine-alkaloids/R/PA.','').sub('.outcome.csv','')
i = 0
File.read(r).each_line do |l|
if i > 0
items = l.chomp.split(";")
items.shift
if items.uniq.include? "1"
tab[i-1] << 1
elsif items.uniq.include? "0"
tab[i-1] << 0
end
end
i += 1
end
end
Dir["pyrrolizidine-alkaloids/tensorflow/pred.*.v3-ext-Padel-2D.csv"].each do |r|
header << "TF-"+r.sub('pyrrolizidine-alkaloids/tensorflow/pred.','').sub('.v3-ext-Padel-2D.csv','').sub("lr2","LR-scikit").sub("lr","LR-sgd").sub("rf","RF").sub("nn","NN")
i = 0
File.read(r).each_line do |l|
if i > 0
id,pred = l.chomp.split(",")
pred.to_f > 0.5 ? tab[i-1] << 1 : tab[i-1] << 0
end
i += 1
end
end
puts header.join(",")
puts tab.collect{|r| r.join(",")}.join("\n")
|