1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
|
#!/usr/bin/env ruby
require 'csv'
src = File.join "pyrrolizidine-alkaloids","src"
dest = "pyrrolizidine-alkaloids"
ids = CSV.read(File.join(src,"180920_PA_complete_SMILES.csv"), headers: true, col_sep: ";")
cansmi = `echo "#{ids.collect{|r| r[3].gsub(';','')}.join("\n")}" | obabel -ismi - -ocan`.gsub("\t","").split("\n")
raise "Could not convert all smiles" unless ids.size == cansmi.size
smi_by_cid = {}
File.open(File.join(dest,"pa-cids.csv"),"w+") do |f|
f.puts ["CID","Canonical SMILES"].join(",")
ids.by_col["CID"].each_with_index do |cid,i|
f.puts [cid,cansmi[i]].join(",")
smi_by_cid[cid] = cansmi[i]
end
end
File.open(File.join(dest,"pa-ids.csv"),"w+") do |f|
f.puts ["ID","Canonical SMILES"].join(",")
ids.by_col["ID"].each_with_index do |id,i|
f.puts [id,cansmi[i]].join(",")
end
end
File.open(File.join(dest,"pa-names.tsv"),"w+") do |f|
f.puts ["Canonical SMILES","Name"].join("\t")
ids.by_col["Name"].each_with_index do |name,i|
name.sub!("1: ","") if name
f.puts [cansmi[i],name].join("\t")
end
end
groups = CSV.read(File.join(src,"pa-groups.original.csv"), headers: true, col_sep: ";")
raise "Unequal IDs in 180920_PA_complete_SMILES.csv and pa-groups.original.csv" unless ids["ID"] == groups[nil]
File.open(File.join(dest,"pa-groups.csv"),"w+") do |f|
f.puts (["Canonical SMILES"] + groups.headers[1..groups.headers.size-1]).join ","
groups.each_with_index do |row,i|
f.puts ([cansmi[i]]+ row[1..9].collect{|g| g == "NA" ? 0 : 1}).join(",")
end
end
CSV::Converters[:comma_numbers] =
cdk = CSV.read(File.join(src,"PA-Padel-2D_m2.csv"), headers: true, col_sep: ";", converters: ->(s) {(s =~ /^-*\d+,/) ? (s.sub(',','.').to_f) : s})
headers = cdk.headers
headers[0] = "Canonical SMILES"
File.open(File.join(dest,"pa-cdk.csv"),"w+") do |f|
f.puts headers.join(",")
cdk.each do |row|
row[0] = smi_by_cid[row[0]]
f.puts row.to_s
end
end
|