From 36ea2f865014a761be9fe74719f4c88dbaffeb81 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 27 Oct 2014 15:48:01 +0100 Subject: inital commit more documentation will be added --- nch/02_decode_inchi.rb | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100755 nch/02_decode_inchi.rb (limited to 'nch/02_decode_inchi.rb') diff --git a/nch/02_decode_inchi.rb b/nch/02_decode_inchi.rb new file mode 100755 index 0000000..f04f8c3 --- /dev/null +++ b/nch/02_decode_inchi.rb @@ -0,0 +1,54 @@ +#!/usr/bin/env ruby + +require "csv" +require "uri" + +require "./config.rb" + +DATA.each do |d| + $stderr.puts "\ndataset name #{d}" + + @inchis = [] + + ["orig-features","endpoint","complete","test"].each do |mode| + + next unless File.exist?("data/01/#{d}_#{mode}_enc.csv") + +# csv_string = CSV.generate({:force_quotes=>true}) do |csv| + csv_string = CSV.generate() do |csv| + + first = true + skip_lipinksi = false + CSV.foreach("data/01/#{d}_#{mode}_enc.csv") do |row| + if (first) + first = false + if row[-1]=="LipinksiFailures" + $stderr.puts "Skipping col LipinksiFailures from #{mode}" + skip_lipinksi=true + end + if row[-1]=="LOAEL_log_mmol_kg_bw_day" + $stderr.puts "rename mmol to mol (LOAEL_log_mmol_kg_bw_day to LOAEL_log_mol_kg_bw_day)" + row[-1] = "LOAEL_log_mol_kg_bw_day" + end + raise row.inspect unless row[0]=="SMILES" + csv << ["InChI"]+row[1..(skip_lipinksi ? -2 : -1)] + else + inchi = URI.unescape(row[0]) + csv << [inchi]+row[1..(skip_lipinksi ? -2 : -1)] + end + end + end + + header = csv_string.split("\n")[0] + content = csv_string.split("\n")[1..-1] + content.sort! + csv_string = header+"\n"+content.join("\n") + +# dest = (mode=="features" ? "orig-features-quotes" : mode) +# dest = (mode=="features" ? "orig-features" : mode) + File.open("data/02/#{d}_#{mode}.csv","w").puts csv_string + $stderr.puts "written to data/02/#{d}_#{mode}.csv" + end + +end + -- cgit v1.2.3