diff options
Diffstat (limited to 'scripts/carcinogenicity2csv.rb')
-rwxr-xr-x | scripts/carcinogenicity2csv.rb | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/scripts/carcinogenicity2csv.rb b/scripts/carcinogenicity2csv.rb new file mode 100755 index 0000000..9501bdc --- /dev/null +++ b/scripts/carcinogenicity2csv.rb @@ -0,0 +1,25 @@ +#!/usr/bin/env ruby +require_relative '../../lazar/lib/lazar.rb' + +i = 0 +activities = [] +File.readlines(ARGV[0]).each do |line| + if i > 2 + tokens = line.split "," + p line if tokens[1].empty? + activities << [tokens[1],tokens[3]] + end + i += 1 +end + +puts "SMILES,Activity" +activities.each_slice(100) do |slice| # get SMILES in chunks + sids = slice.collect{|e| e[0]} + smiles = `curl https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/#{sids.join(",")}/property/CanonicalSMILES/TXT`.split("\n") + abort("Could not get SMILES for all SIDs from PubChem") unless sids.size == smiles.size + smiles.each_with_index do |smi,i| + act = slice[i] + puts [smi.chomp,act[1]].join(",") + end +end + |