summaryrefslogtreecommitdiff
path: root/scripts/carcinogenicity2csv.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2018-08-23 17:08:34 +0200
committerChristoph Helma <helma@in-silico.ch>2018-08-23 17:08:34 +0200
commit3146140b2530bc89d13c494f2e4317b952fc31cc (patch)
tree10fd82e10e23c69ec7c36e8bc88345a9b4c3941d /scripts/carcinogenicity2csv.rb
parentc94ac24d68c137e93d11f0a7d7621ab0b2e808d7 (diff)
unique neighbor sets
Diffstat (limited to 'scripts/carcinogenicity2csv.rb')
-rwxr-xr-xscripts/carcinogenicity2csv.rb25
1 files changed, 25 insertions, 0 deletions
diff --git a/scripts/carcinogenicity2csv.rb b/scripts/carcinogenicity2csv.rb
new file mode 100755
index 0000000..9501bdc
--- /dev/null
+++ b/scripts/carcinogenicity2csv.rb
@@ -0,0 +1,25 @@
+#!/usr/bin/env ruby
+require_relative '../../lazar/lib/lazar.rb'
+
+i = 0
+activities = []
+File.readlines(ARGV[0]).each do |line|
+ if i > 2
+ tokens = line.split ","
+ p line if tokens[1].empty?
+ activities << [tokens[1],tokens[3]]
+ end
+ i += 1
+end
+
+puts "SMILES,Activity"
+activities.each_slice(100) do |slice| # get SMILES in chunks
+ sids = slice.collect{|e| e[0]}
+ smiles = `curl https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/#{sids.join(",")}/property/CanonicalSMILES/TXT`.split("\n")
+ abort("Could not get SMILES for all SIDs from PubChem") unless sids.size == smiles.size
+ smiles.each_with_index do |smi,i|
+ act = slice[i]
+ puts [smi.chomp,act[1]].join(",")
+ end
+end
+