summaryrefslogtreecommitdiff
path: root/nch/02_decode_inchi.rb
diff options
context:
space:
mode:
Diffstat (limited to 'nch/02_decode_inchi.rb')
-rwxr-xr-xnch/02_decode_inchi.rb54
1 files changed, 54 insertions, 0 deletions
diff --git a/nch/02_decode_inchi.rb b/nch/02_decode_inchi.rb
new file mode 100755
index 0000000..f04f8c3
--- /dev/null
+++ b/nch/02_decode_inchi.rb
@@ -0,0 +1,54 @@
+#!/usr/bin/env ruby
+
+require "csv"
+require "uri"
+
+require "./config.rb"
+
+DATA.each do |d|
+ $stderr.puts "\ndataset name #{d}"
+
+ @inchis = []
+
+ ["orig-features","endpoint","complete","test"].each do |mode|
+
+ next unless File.exist?("data/01/#{d}_#{mode}_enc.csv")
+
+# csv_string = CSV.generate({:force_quotes=>true}) do |csv|
+ csv_string = CSV.generate() do |csv|
+
+ first = true
+ skip_lipinksi = false
+ CSV.foreach("data/01/#{d}_#{mode}_enc.csv") do |row|
+ if (first)
+ first = false
+ if row[-1]=="LipinksiFailures"
+ $stderr.puts "Skipping col LipinksiFailures from #{mode}"
+ skip_lipinksi=true
+ end
+ if row[-1]=="LOAEL_log_mmol_kg_bw_day"
+ $stderr.puts "rename mmol to mol (LOAEL_log_mmol_kg_bw_day to LOAEL_log_mol_kg_bw_day)"
+ row[-1] = "LOAEL_log_mol_kg_bw_day"
+ end
+ raise row.inspect unless row[0]=="SMILES"
+ csv << ["InChI"]+row[1..(skip_lipinksi ? -2 : -1)]
+ else
+ inchi = URI.unescape(row[0])
+ csv << [inchi]+row[1..(skip_lipinksi ? -2 : -1)]
+ end
+ end
+ end
+
+ header = csv_string.split("\n")[0]
+ content = csv_string.split("\n")[1..-1]
+ content.sort!
+ csv_string = header+"\n"+content.join("\n")
+
+# dest = (mode=="features" ? "orig-features-quotes" : mode)
+# dest = (mode=="features" ? "orig-features" : mode)
+ File.open("data/02/#{d}_#{mode}.csv","w").puts csv_string
+ $stderr.puts "written to data/02/#{d}_#{mode}.csv"
+ end
+
+end
+