summaryrefslogtreecommitdiff
path: root/pubchem.rb
diff options
context:
space:
mode:
Diffstat (limited to 'pubchem.rb')
-rw-r--r--pubchem.rb295
1 files changed, 200 insertions, 95 deletions
diff --git a/pubchem.rb b/pubchem.rb
index 891110c..a6893fb 100644
--- a/pubchem.rb
+++ b/pubchem.rb
@@ -1,154 +1,221 @@
require '../opentox-client/lib/opentox-client.rb'
require 'json'
+require 'base64'
def Math.gauss(x, sigma = 0.3)
d = 1.0 - x.to_f
Math.exp(-(d*d)/(2*sigma*sigma))
end
-module PubChem
-
- attr_accessor :result
-
- def initialize
- @pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/"
- end
-
- def pubchem_search url
- json = RestClient.get url#, :accept => "application/json", :timeout => 90000000
- @result = JSON.parse json
- rescue
- puts url
- puts $!.message
- @result = nil
- end
-
-end
-
module OpenTox
+ # doc @ http://pubchem.ncbi.nlm.nih.gov/pug_rest/
class PubChemCompound < Compound
- include PubChem
- # doc @ http://pubchem.ncbi.nlm.nih.gov/pug_rest/
attr_writer :cid
attr_accessor :similarity, :p, :assays
- def initialize
- super
- @summary = []
- @similarity_threshold = 75
- @neighbors = []
- @predicted_targets = []
+ def initialize cid=nil
+ @pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/"
+ @cid = cid
+ @assays = nil
+ @similarity_threshold = 85
+ @neighbors = nil
+ @predicted_assays = nil
+ #@predicted_targets = nil
+ #@priors = {}
+ #@priors = JSON.parse(File.read("priors.json"))
end
- def from_name name
- @inchi = RestClientWrapper.get File.join(CACTUS_URI,URI.escape(name),"stdinchi")
+ def fingerprint
+ unless @fingerprint
+ begin
+ # ftp://ftp.ncbi.nlm.nih.gov/pubchem/specifications/pubchem_fingerprints.txt
+ base64key = `curl http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/#{cid}/SDF|grep -A1 PUBCHEM_CACTVS_SUBSKEYS|sed '1d'`.chomp
+ @fingerprint = Base64.decode64(base64key)[4..-1].unpack("B*").first[0..-8].split(//).collect{|c| c == "1"}
+ rescue
+ end
+ end
+ @fingerprint
+ end
+
+ def self.from_name name
+ pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name"
+ compounds = []
+ session[:name] = name
+ cid = RestClientWrapper.get(File.join(pug_uri,URI.escape(name),"cids","TXT"))
+ #puts response
+ #response.split("\n") do |cid|
+ puts cid
+ compound = OpenTox::PubChemCompound.new
+ compound.cid = cid.chomp
+ compounds << compound
+ #end
+ compounds
end
def neighbors
- if @neighbors.empty?
- pubchem_search File.join(@pug_uri, "compound", "similarity", "cid", cid.to_s, "JSON")+"?Threshold=#{@similarity_threshold}&MaxRecords=100"
- listkey = @result["Waiting"]["ListKey"]
- while @result["Waiting"] do
- sleep 1
- pubchem_search File.join(@pug_uri, "compound", "listkey", listkey, "assaysummary", "JSON")
+ unless @neighbors
+ @neighbors = []
+ result = pubchem_search File.join(@pug_uri, "compound", "similarity", "cid", cid.to_s, "JSON")+"?Threshold=#{@similarity_threshold}&MaxRecords=100"
+ while result["Waiting"] do
+ sleep 2
+ listkey = result["Waiting"]["ListKey"]
+ result = pubchem_search File.join(@pug_uri, "compound", "listkey", listkey, "cids", "JSON")
+ #result = pubchem_search File.join(@pug_uri, "compound", "listkey", listkey, "assaysummary", "JSON")
end
- columns = @result["Table"]["Columns"]["Column"]
- table = @result["Table"]["Row"].collect{|cell| cell.values.flatten}
- cid_idx = columns.index("CID")
- cids = table.collect{|r| r[cid_idx]}.uniq
- cids.each do |cid|
+ puts "Neighbor CIDs received"
+ result["IdentifierList"]["CID"].each do |cid|
unless cid.to_s == @cid.to_s
- tab = table.collect{|r| r if r[cid_idx] == cid}.compact
- c = PubChemCompound.new
- c.extract_result columns, tab
- c.similarity = tanimoto c
- @neighbors << c unless (c.targets + c.non_targets).empty?
+ c = PubChemCompound.new cid.to_s
+ @neighbors << c if c.assays #and !(c.targets + c.non_targets).empty?
+ end
+ end if result and result["IdentifierList"]
+=begin
+ if result and result["Table"]
+ columns = result["Table"]["Columns"]["Column"]
+ table = result["Table"]["Row"].collect{|cell| cell.values.flatten}
+ cid_idx = columns.index("CID")
+ cids = table.collect{|r| r[cid_idx]}.uniq
+ cids.each do |cid|
+ unless cid.to_s == @cid.to_s
+ tab = table.collect{|r| r if r[cid_idx] == cid}.compact
+ c = PubChemCompound.new
+ c.extract_result columns, tab
+ c.similarity = tanimoto c
+ @neighbors << c unless (c.targets + c.non_targets).empty?
+ end
end
end
- @neighbors.sort!{|a,b| b.similarity <=> a.similarity}
+=end
+ #@neighbors.sort!{|a,b| b.similarity <=> a.similarity}
end
@neighbors
end
- def summary
- if @summary.empty?
- pubchem_search File.join(@pug_uri, "compound", "cid", cid.to_s, "assaysummary", "JSON")
- extract_result @result["Table"]["Columns"]["Column"], @result["Table"]["Row"].collect{|cell| cell.values.flatten}
+ def assays
+ unless @assays
+ result = pubchem_search File.join(@pug_uri, "compound", "cid", cid.to_s, "assaysummary", "JSON")
+ extract_result result["Table"]["Columns"]["Column"], result["Table"]["Row"].collect{|cell| cell.values.flatten} if result and result["Table"]
end
- @summary
+ @assays
end
def active_assays
- summary.select{|a| a["Activity Outcome"] == "active"}
+ assays.select{|a| a["Activity Outcome"] == "active"} if assays
end
def inactive_assays
- summary.select{|a| a["Activity Outcome"] == "inactive"}
+ assays.select{|a| a["Activity Outcome"] == "inactive"} if assays
end
def targets
- active_assays.select{|a| a["Target GI"]}
+ active_assays.select{|a| a["Target GI"]} if assays
end
def non_targets
- inactive_assays.select{|a| a["Target GI"]}
+ inactive_assays.select{|a| a["Target GI"]} if assays
end
- def predicted_targets
- if @predicted_targets.empty?
- target_gis = neighbors.collect{|n| n.summary.collect{|a| a["Target GI"]}}.flatten.compact.uniq
- target_gis.each do |gid|
- target = {:target_gi => gid}
+ def predicted_assays
+ unless @predicted_assays
+ @predicted_assays = []
+ neighbors.collect{|n| n.assays.collect{|a| a["AID"]}}.flatten.compact.uniq.each do |aid|
+ predicted_assay = {"AID" => aid}
neighbors.each do |neighbor|
- if neighbor.similarity > 0.5 # avoid downweighting
- search = neighbor.summary.select{|a| a["Target GI"] == gid}
- unless search.empty? or search.size == 1
- print "+++ ("
- print search.size
- puts ")"
- puts search.inspect
- end
+ if similarity(neighbor) > 0.5 # avoid downweighting
+ search = neighbor.assays.select{|a| a["AID"] == aid}
search.each do |assay|
- target[:aid] ||= assay["AID"]
- target[:name] ||= assay["Target Name"]
- target[:assay_name] ||= assay["Assay Name"]
- target[:active_similarities] ||= []
- target[:inactive_similarities] ||= []
+ predicted_assay["Target GI"] ||= assay["Target GI"]
+ predicted_assay["Target Name"] ||= assay["Target Name"]
+ predicted_assay["Assay Name"] ||= assay["Assay Name"]
+ predicted_assay[:active_similarities] ||= []
+ predicted_assay[:inactive_similarities] ||= []
if assay["Activity Outcome"] == "active"
- target[:p_active] ? target[:p_active] = target[:p_active]*neighbor.similarity : target[:p_active] = neighbor.similarity
- target[:p_inactive] ? target[:p_inactive] = target[:p_inactive]*(1-neighbor.similarity) : target[:p_inactive] = 1-neighbor.similarity
- target[:active_similarities] << neighbor.similarity
+ predicted_assay[:p_active] ? predicted_assay[:p_active] = predicted_assay[:p_active]*similarity(neighbor) : predicted_assay[:p_active] = similarity(neighbor)
+ predicted_assay[:p_inactive] ? predicted_assay[:p_inactive] = predicted_assay[:p_inactive]*(1-similarity(neighbor)) : predicted_assay[:p_inactive] = 1-similarity(neighbor)
+ predicted_assay[:active_similarities] << similarity(neighbor)
elsif assay["Activity Outcome"] == "inactive"
- target[:p_active] ? target[:p_active] = target[:p_active]*(1-neighbor.similarity) : target[:p_active] = 1-neighbor.similarity
- target[:p_inactive] ? target[:p_inactive] = target[:p_inactive]*neighbor.similarity : target[:p_inactive] = neighbor.similarity
- target[:inactive_similarities] << neighbor.similarity
+ predicted_assay[:p_active] ? predicted_assay[:p_active] = predicted_assay[:p_active]*(1-similarity(neighbor)) : predicted_assay[:p_active] = 1-similarity(neighbor)
+ predicted_assay[:p_inactive] ? predicted_assay[:p_inactive] = predicted_assay[:p_inactive]*similarity(neighbor) : predicted_assay[:p_inactive] = similarity(neighbor)
+ predicted_assay[:inactive_similarities] << similarity(neighbor)
end
end
end
end
- if target[:p_active] and target[:p_inactive] and target[:p_active] + target[:p_inactive] != 0
- target[:p_active] = target[:p_active]/(target[:p_active]+target[:p_inactive])
- target[:p_inactive] = target[:p_inactive]/(target[:p_active]+target[:p_inactive])
- if target[:p_active] > target[:p_inactive]
- target[:prediction] = "active"
- elsif target[:p_active] < target[:p_inactive]
- target[:prediction] = "inactive"
+ if predicted_assay[:p_active] and predicted_assay[:p_inactive] and predicted_assay[:p_active] != 0 and predicted_assay[:p_inactive] != 0
+ predicted_assay[:p_active] = predicted_assay[:p_active]/(predicted_assay[:p_active]+predicted_assay[:p_inactive])
+ predicted_assay[:p_inactive] = predicted_assay[:p_inactive]/(predicted_assay[:p_active]+predicted_assay[:p_inactive])
+ if predicted_assay[:p_active] > predicted_assay[:p_inactive]
+ predicted_assay[:prediction] = "active"
+ elsif predicted_assay[:p_active] < predicted_assay[:p_inactive]
+ predicted_assay[:prediction] = "inactive"
end
- @predicted_targets << target
+ @predicted_assays << predicted_assay
end
end
- @predicted_targets.sort{|a,b| b[:p_active] <=> a[:p_active]}
+ #@predicted_targets.sort{|a,b| b[:p_active] <=> a[:p_active]}
end
- @predicted_targets
+ @predicted_assays
+ end
+
+ def predicted_active_assays
+ predicted_assays.select{|a| a[:prediction] == "active"} if predicted_assays
+ end
+
+ def predicted_inactive_assays
+ predicted_assays.select{|a| a[:prediction] == "inactive"} if predicted_assays
+ end
+
+ def predicted_targets
+ predicted_active_assays.select{|a| a[:target_gi]} if predicted_assays
+ end
+
+ def predicted_non_targets
+ inactive_assays.select{|a| a[:target_gi]} if predicted_assays
end
def to_smiles
RestClient.get(File.join(@pug_uri, "compound", "cid", cid.to_s, "property", "CanonicalSMILES", "TXT")).strip
end
+ def image_uri
+ File.join @pug_uri, "compound", "cid", @cid, "PNG"#?record_type=3d&image_size=small"
+ end
+
+ def similarity compound
+ cosine compound
+ end
+
def tanimoto compound
+ if fingerprint and compound.fingerprint
+ m11 = 0.0
+ m1 = 0.0
+ fingerprint.each_index do |i|
+ m11 += 1 if (@fingerprint[i] and compound.fingerprint[i])
+ m1 += 1 if (@fingerprint[i] or compound.fingerprint[i])
+ end
+ m11/m1
+ end
+ end
+
+ def cosine compound
+ if fingerprint and compound.fingerprint
+ m11 = 0.0
+ m01 = 0.0
+ m10 = 0.0
+ m00 = 0.0
+ fingerprint.each_index do |i|
+ m11 += 1 if (@fingerprint[i] and compound.fingerprint[i])
+ m01 += 1 if (!@fingerprint[i] and compound.fingerprint[i])
+ m10 += 1 if (@fingerprint[i] and !compound.fingerprint[i])
+ m00 += 1 if (!@fingerprint[i] and !compound.fingerprint[i])
+ end
+ m11/((m01+m11)*(m10+m11))**0.5
+ end
+ end
+
+=begin
f1 = File.open(File.join(".","tmp",SecureRandom.uuid+".smi"),"w+")
f1.puts to_smiles
f1.close
@@ -160,20 +227,62 @@ module OpenTox
File.delete(f2.path)
sim
end
+=end
+
+ def pubchem_search url
+ attempts = 0
+ begin
+ attempts += 1
+ json = RestClient.get url, :timeout => 90000000
+ puts url
+ JSON.parse json
+ rescue
+ if $!.message =~ /Timeout/i and attempts < 4
+ sleep 2
+ retry
+ elsif $!.message =~ /Timeout/i and attempts >= 4
+ File.open("timeouts","a+"){|f| f.puts url}
+ puts url
+ puts $!.message
+ nil
+ elsif $!.message.match /404/
+ nil
+ else
+ puts url
+ puts $!.message
+ nil
+ end
+ end
+ end
def extract_result columns, table
+ @assays = []
table.each do |row|
- @summary << {}
+ @assays << {}
row.each_with_index do |cell,i|
if columns[i] == "CID"
@cid = cell if @cid.nil?
else
- cell.blank? ? @summary.last[columns[i]] = nil : @summary.last[columns[i]] = cell
+ cell.blank? ? @assays.last[columns[i]] = nil : @assays.last[columns[i]] = cell
end
end
end
end
+ def priors aid
+ unless @priors[aid]
+ @priors[aid] = {"nr_active" => 0, "nr_inactive" => 0}
+ result = nil
+ result = pubchem_search File.join(@pug_uri, "assay", "aid", aid.to_s, "cids", "JSON?cids_type=active&list_return=listkey")
+ @priors[aid]["nr_active"] = result["IdentifierList"]["Size"].to_i if result
+ result = nil
+ result = pubchem_search File.join(@pug_uri, "assay", "aid", aid.to_s, "cids", "JSON?cids_type=inactive&list_return=listkey")
+ @priors[aid]["nr_inactive"] = result["IdentifierList"]["Size"].to_i if result
+ File.open("priors.json","w+"){|f| f.puts @priors.to_json}
+ end
+ @priors[aid]
+ end
+
=begin
def assay_summary assay
if assay["Target GI"] and !@assays[assay["AID"]]
@@ -262,10 +371,6 @@ module OpenTox
def to_name
RestClient.get(File.join(@pug_uri, "compound", "cid", @cid, "property", "IUPACName", "TXT")).strip
end
-
- def to_image_uri
- File.join @pug_uri, "compound", "cid", @cid, "PNG?record_type=3d&image_size=small"
- end
=end
end