blob: f04f8c370058f164827c4213c3ea352c3d310ce5 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
|
#!/usr/bin/env ruby
require "csv"
require "uri"
require "./config.rb"
DATA.each do |d|
$stderr.puts "\ndataset name #{d}"
@inchis = []
["orig-features","endpoint","complete","test"].each do |mode|
next unless File.exist?("data/01/#{d}_#{mode}_enc.csv")
# csv_string = CSV.generate({:force_quotes=>true}) do |csv|
csv_string = CSV.generate() do |csv|
first = true
skip_lipinksi = false
CSV.foreach("data/01/#{d}_#{mode}_enc.csv") do |row|
if (first)
first = false
if row[-1]=="LipinksiFailures"
$stderr.puts "Skipping col LipinksiFailures from #{mode}"
skip_lipinksi=true
end
if row[-1]=="LOAEL_log_mmol_kg_bw_day"
$stderr.puts "rename mmol to mol (LOAEL_log_mmol_kg_bw_day to LOAEL_log_mol_kg_bw_day)"
row[-1] = "LOAEL_log_mol_kg_bw_day"
end
raise row.inspect unless row[0]=="SMILES"
csv << ["InChI"]+row[1..(skip_lipinksi ? -2 : -1)]
else
inchi = URI.unescape(row[0])
csv << [inchi]+row[1..(skip_lipinksi ? -2 : -1)]
end
end
end
header = csv_string.split("\n")[0]
content = csv_string.split("\n")[1..-1]
content.sort!
csv_string = header+"\n"+content.join("\n")
# dest = (mode=="features" ? "orig-features-quotes" : mode)
# dest = (mode=="features" ? "orig-features" : mode)
File.open("data/02/#{d}_#{mode}.csv","w").puts csv_string
$stderr.puts "written to data/02/#{d}_#{mode}.csv"
end
end
|