#!/usr/bin/env ruby #require_relative '../../lazar/lib/lazar.rb' i = 0 activities = [] File.readlines(ARGV[0]).each do |line| if i > 2 tokens = line.split "," p line if tokens[1].empty? activities << [tokens[1],tokens[3]] end i += 1 end puts "SMILES,Activity" activities.each_slice(100) do |slice| # get SMILES in chunks sids = slice.collect{|e| e[0]} smiles = `curl https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/#{sids.join(",")}/property/CanonicalSMILES/TXT`.split("\n") abort("Could not get SMILES for all SIDs from PubChem") unless sids.size == smiles.size smiles.each_with_index do |smi,i| act = slice[i] puts [smi.chomp,act[1]].join(",") end end