#!/usr/bin/env ruby require 'csv' src = File.join "pyrrolizidine-alkaloids","src" dest = "pyrrolizidine-alkaloids" ids = CSV.read(File.join(src,"180920_PA_complete_SMILES.csv"), headers: true, col_sep: ";") cansmi = `echo "#{ids.collect{|r| r[3].gsub(';','')}.join("\n")}" | obabel -ismi - -ocan`.gsub("\t","").split("\n") raise "Could not convert all smiles" unless ids.size == cansmi.size smi_by_cid = {} File.open(File.join(dest,"pa-cids.csv"),"w+") do |f| f.puts ["CID","Canonical SMILES"].join(",") ids.by_col["CID"].each_with_index do |cid,i| f.puts [cid,cansmi[i]].join(",") smi_by_cid[cid] = cansmi[i] end end File.open(File.join(dest,"pa-ids.csv"),"w+") do |f| f.puts ["ID","Canonical SMILES"].join(",") ids.by_col["ID"].each_with_index do |id,i| f.puts [id,cansmi[i]].join(",") end end File.open(File.join(dest,"pa-names.tsv"),"w+") do |f| f.puts ["Canonical SMILES","Name"].join("\t") ids.by_col["Name"].each_with_index do |name,i| name.sub!("1: ","") if name f.puts [cansmi[i],name].join("\t") end end groups = CSV.read(File.join(src,"pa-groups.original.csv"), headers: true, col_sep: ";") raise "Unequal IDs in 180920_PA_complete_SMILES.csv and pa-groups.original.csv" unless ids["ID"] == groups[nil] File.open(File.join(dest,"pa-groups.csv"),"w+") do |f| f.puts (["Canonical SMILES"] + groups.headers[1..groups.headers.size-1]).join "," groups.each_with_index do |row,i| f.puts ([cansmi[i]]+ row[1..9].collect{|g| g == "NA" ? 0 : 1}).join(",") end end CSV::Converters[:comma_numbers] = cdk = CSV.read(File.join(src,"PA-Padel-2D_m2.csv"), headers: true, col_sep: ";", converters: ->(s) {(s =~ /^-*\d+,/) ? (s.sub(',','.').to_f) : s}) headers = cdk.headers headers[0] = "Canonical SMILES" File.open(File.join(dest,"pa-cdk.csv"),"w+") do |f| f.puts headers.join(",") cdk.each do |row| row[0] = smi_by_cid[row[0]] f.puts row.to_s end end