summaryrefslogtreecommitdiff
path: root/pubchem.rb
diff options
context:
space:
mode:
Diffstat (limited to 'pubchem.rb')
-rw-r--r--pubchem.rb320
1 files changed, 214 insertions, 106 deletions
diff --git a/pubchem.rb b/pubchem.rb
index e2574d1..891110c 100644
--- a/pubchem.rb
+++ b/pubchem.rb
@@ -1,68 +1,198 @@
require '../opentox-client/lib/opentox-client.rb'
require 'json'
-# get assay from endpoint
-# search in endpoint ontology
-#
-# get measurements
-# search for compound and assay
-#
-# get affected pathways
-# search for compound and genes
-# identify affected pathways
-# identify relations between affected genes/pathways and endpoint
-#
-# get related assays
-# search for assays in ontology tree
-# search for compound and related assays
-#
-# get similar compounds
-# search for similar compounds
+def Math.gauss(x, sigma = 0.3)
+ d = 1.0 - x.to_f
+ Math.exp(-(d*d)/(2*sigma*sigma))
+end
module PubChem
- def pubchem_search url
- puts url
- #json = RestClient.get url, :accept => "application/json", :timeout => 90000000
- json = `curl "#{url}"`#, :accept => "application/json", :timeout => 90000000
- @result = JSON.parse json
- end
+ attr_accessor :result
- class Assay
- attr_accessor :aid
+ def initialize
+ @pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/"
end
- class Result
- attr_accessor :aid, :cid, :sid
+ def pubchem_search url
+ json = RestClient.get url#, :accept => "application/json", :timeout => 90000000
+ @result = JSON.parse json
+ rescue
+ puts url
+ puts $!.message
+ @result = nil
end
- class Substance
- end
+end
- class Compound
- # doc @ http://pubchem.ncbi.nlm.nih.gov/pug_rest/
- include OpenTox
+module OpenTox
+
+ class PubChemCompound < Compound
include PubChem
- attr_accessor :result, :cid, :neighbors, :tanimoto
+ # doc @ http://pubchem.ncbi.nlm.nih.gov/pug_rest/
+ attr_writer :cid
+ attr_accessor :similarity, :p, :assays
def initialize
- @uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/"
- @similarity_threshold = 95
+ super
@summary = []
+ @similarity_threshold = 75
@neighbors = []
+ @predicted_targets = []
+ end
+
+ def from_name name
+ @inchi = RestClientWrapper.get File.join(CACTUS_URI,URI.escape(name),"stdinchi")
+ end
+
+ def neighbors
+ if @neighbors.empty?
+ pubchem_search File.join(@pug_uri, "compound", "similarity", "cid", cid.to_s, "JSON")+"?Threshold=#{@similarity_threshold}&MaxRecords=100"
+ listkey = @result["Waiting"]["ListKey"]
+ while @result["Waiting"] do
+ sleep 1
+ pubchem_search File.join(@pug_uri, "compound", "listkey", listkey, "assaysummary", "JSON")
+ end
+ columns = @result["Table"]["Columns"]["Column"]
+ table = @result["Table"]["Row"].collect{|cell| cell.values.flatten}
+ cid_idx = columns.index("CID")
+ cids = table.collect{|r| r[cid_idx]}.uniq
+ cids.each do |cid|
+ unless cid.to_s == @cid.to_s
+ tab = table.collect{|r| r if r[cid_idx] == cid}.compact
+ c = PubChemCompound.new
+ c.extract_result columns, tab
+ c.similarity = tanimoto c
+ @neighbors << c unless (c.targets + c.non_targets).empty?
+ end
+ end
+ @neighbors.sort!{|a,b| b.similarity <=> a.similarity}
+ end
+ @neighbors
+ end
+
+ def summary
+ if @summary.empty?
+ pubchem_search File.join(@pug_uri, "compound", "cid", cid.to_s, "assaysummary", "JSON")
+ extract_result @result["Table"]["Columns"]["Column"], @result["Table"]["Row"].collect{|cell| cell.values.flatten}
+ end
+ @summary
+ end
+
+ def active_assays
+ summary.select{|a| a["Activity Outcome"] == "active"}
+ end
+
+ def inactive_assays
+ summary.select{|a| a["Activity Outcome"] == "inactive"}
+ end
+
+ def targets
+ active_assays.select{|a| a["Target GI"]}
+ end
+
+ def non_targets
+ inactive_assays.select{|a| a["Target GI"]}
+ end
+
+ def predicted_targets
+ if @predicted_targets.empty?
+ target_gis = neighbors.collect{|n| n.summary.collect{|a| a["Target GI"]}}.flatten.compact.uniq
+ target_gis.each do |gid|
+ target = {:target_gi => gid}
+ neighbors.each do |neighbor|
+ if neighbor.similarity > 0.5 # avoid downweighting
+ search = neighbor.summary.select{|a| a["Target GI"] == gid}
+ unless search.empty? or search.size == 1
+ print "+++ ("
+ print search.size
+ puts ")"
+ puts search.inspect
+ end
+ search.each do |assay|
+ target[:aid] ||= assay["AID"]
+ target[:name] ||= assay["Target Name"]
+ target[:assay_name] ||= assay["Assay Name"]
+ target[:active_similarities] ||= []
+ target[:inactive_similarities] ||= []
+
+ if assay["Activity Outcome"] == "active"
+ target[:p_active] ? target[:p_active] = target[:p_active]*neighbor.similarity : target[:p_active] = neighbor.similarity
+ target[:p_inactive] ? target[:p_inactive] = target[:p_inactive]*(1-neighbor.similarity) : target[:p_inactive] = 1-neighbor.similarity
+ target[:active_similarities] << neighbor.similarity
+ elsif assay["Activity Outcome"] == "inactive"
+ target[:p_active] ? target[:p_active] = target[:p_active]*(1-neighbor.similarity) : target[:p_active] = 1-neighbor.similarity
+ target[:p_inactive] ? target[:p_inactive] = target[:p_inactive]*neighbor.similarity : target[:p_inactive] = neighbor.similarity
+ target[:inactive_similarities] << neighbor.similarity
+ end
+ end
+ end
+ end
+ if target[:p_active] and target[:p_inactive] and target[:p_active] + target[:p_inactive] != 0
+ target[:p_active] = target[:p_active]/(target[:p_active]+target[:p_inactive])
+ target[:p_inactive] = target[:p_inactive]/(target[:p_active]+target[:p_inactive])
+ if target[:p_active] > target[:p_inactive]
+ target[:prediction] = "active"
+ elsif target[:p_active] < target[:p_inactive]
+ target[:prediction] = "inactive"
+ end
+ @predicted_targets << target
+ end
+ end
+ @predicted_targets.sort{|a,b| b[:p_active] <=> a[:p_active]}
+ end
+ @predicted_targets
end
def to_smiles
- RestClient.get(File.join(@uri, "compound", "cid", @cid, "property", "CanonicalSMILES", "TXT")).strip
+ RestClient.get(File.join(@pug_uri, "compound", "cid", cid.to_s, "property", "CanonicalSMILES", "TXT")).strip
end
- def name
- RestClient.get(File.join(@uri, "compound", "cid", @cid, "property", "IUPACName", "TXT")).strip
+ def tanimoto compound
+ f1 = File.open(File.join(".","tmp",SecureRandom.uuid+".smi"),"w+")
+ f1.puts to_smiles
+ f1.close
+ f2 = File.open(File.join(".","tmp",SecureRandom.uuid+".smi"),"w+")
+ f2.puts compound.to_smiles
+ f2.close
+ sim = `babel #{f1.path} #{f2.path} -ofpt 2>/dev/null| grep Tanimoto|cut -d "=" -f2`.strip.to_f
+ File.delete(f1.path)
+ File.delete(f2.path)
+ sim
end
- def image
- File.join @uri, "compound", "cid", @cid, "PNG?record_type=3d&image_size=small"
+ def extract_result columns, table
+ table.each do |row|
+ @summary << {}
+ row.each_with_index do |cell,i|
+ if columns[i] == "CID"
+ @cid = cell if @cid.nil?
+ else
+ cell.blank? ? @summary.last[columns[i]] = nil : @summary.last[columns[i]] = cell
+ end
+ end
+ end
+ end
+
+=begin
+ def assay_summary assay
+ if assay["Target GI"] and !@assays[assay["AID"]]
+ @assays[assay["AID"]] = {"nr_active" => 0, "nr_inactive" => 0}
+ pubchem_search File.join(@pug_uri, "assay", "aid", assay["AID"].to_s, "cids", "JSON?cids_type=active")
+ @assays[assay["AID"]]["nr_active"] = @result["InformationList"]["Information"].first["CID"].size if @result
+ pubchem_search File.join(@pug_uri, "assay", "aid", assay["AID"].to_s, "cids", "JSON?cids_type=inactive")
+ @assays[assay["AID"]]["nr_inactive"] = @result["InformationList"]["Information"].first["CID"].size if @result
+ print "getting (in)actives for aid "
+ puts assay["AID"]
+ print @assays[assay["AID"]]["nr_active"]
+ print " "
+ puts @assays[assay["AID"]]["nr_inactive"]
+ File.open("assays.json","w+"){|f| f.puts @assays.to_json}
+ end
end
+=end
+
+=begin
def properties
properties = [
@@ -99,104 +229,82 @@ module PubChem
"EffectiveRotorCount3D",
"ConformerCount3D",
]
- pubchem_search File.join(@uri, "compound", "cid", @cid, "property", properties.join(","), "JSON")
+ pubchem_search File.join(@pug_uri, "compound", "cid", @cid, "property", properties.join(","), "JSON")
@result["PropertyTable"]["Properties"].first
end
def from_smiles smiles
- pubchem_search File.join(@uri, "compound", "smiles", smiles, "assaysummary", "JSON")
- from_summary @result["Table"]["Columns"]["Column"], @result["Table"]["Row"].collect{|cell| cell.values.flatten}
+ pubchem_search File.join(@pug_uri, "compound", "smiles", smiles, "assaysummary", "JSON")
+ extract_result @result["Table"]["Columns"]["Column"], @result["Table"]["Row"].collect{|cell| cell.values.flatten}
+ end
+ def property_similarity compound
+ svd = OpenTox::SVD.new(GSL::Matrix [[properties, compound.properties]])
+ OpenTox::Algorithm::Similarity.cosine svd.data_transformed_matrix.first, svd.data_transformed_matrix.last
end
- def from_summary columns, table
- table.each do |row|
- @summary << {}
- row.each_with_index do |cell,i|
- if columns[i] == "CID"
- @cid = cell if @cid.nil?
- else
- cell.blank? ? @summary.last[columns[i]] = nil : @summary.last[columns[i]] = cell
- end
- end
- end
+ def assay_similarity compound
+ tanimoto [[active_assays,inactive_assays],[compound.active_assays,compound.inactive_assays]]
end
- def active_assays
- @summary.collect{|a| a if a["Activity Outcome"] == "active"}.compact
+ def target_similarity compound
+ tanimoto [[targets,non_targets],[compound.targets,compound.non_targets]]
end
- def inactive_assays
- @summary.collect{|a| a if a["Activity Outcome"] == "inactive"}.compact
+ def tanimoto features
+ common = features.first.flatten & features.last.flatten
+ same_outcome = (features.first.first & features.last.first) + (features.first.last & features.last.last)
+ same_outcome.size.to_f/common.size
end
- def targets
- active_assays.collect{|a| a["Target Name"]}.compact
+ def euclid features
end
- def non_targets
- inactive_assays.collect{|a| a["Target Name"]}.compact
+ def to_name
+ RestClient.get(File.join(@pug_uri, "compound", "cid", @cid, "property", "IUPACName", "TXT")).strip
end
- def assay_similarity compound
- a1 = active_assays.collect{|a| a["Assay Name"]}
- a2 = compound.active_assays.collect{|a| a["Assay Name"]}
- i1 = inactive_assays.collect{|a| a["Assay Name"]}
- i2 = compound.inactive_assays.collect{|a| a["Assay Name"]}
- self_assays = a1 + i1
- compound_assays = a2 + i2
- common_assays = self_assays & compound_assays
- same_outcome = (a1 & a2) + (i1 & i2)
- same_outcome.size.to_f/common_assays.size
+ def to_image_uri
+ File.join @pug_uri, "compound", "cid", @cid, "PNG?record_type=3d&image_size=small"
end
+=end
- def target_similarity compound
- self_assays = targets + non_targets
- compound_assays = compound.targets + compound.non_targets
- common_assays = self_assays & compound_assays
- same_outcome = (targets & compound.targets) + (non_targets & compound.non_targets)
- same_outcome.size.to_f/common_assays.size
- end
-
- def assay_genes
- active = []
- @aids[:active].each do |aid|
- begin
- pubchem_search File.join(@uri, "assay", "aid", aid.to_s, "genes", "JSON")
- active << @result["InformationList"]["Information"].collect{|i| i["GeneID"]}.flatten
- rescue; end
- end
- @aids[:inactive].each do |aid|
- begin
- pubchem_search File.join(@uri, "assay", "aid", aid.to_s, "genes", "JSON")
- inactive << @result["InformationList"]["Information"].collect{|i| i["GeneID"]}.flatten
- rescue; end
- end
- {:active => active, :inactive => inactive }
+ end
+
+=begin
+ class PubChemNeighbors < Dataset
+ include PubChem
+
+ attr_accessor :query, :neighbors
+
+ def initialize
+ @similarity_threshold = 95
+ @neighbors = []
+ @pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/"
end
- def get_neighbors smiles
- pubchem_search File.join(@uri, "compound", "similarity", "smiles", smiles, "JSON")+"?Threshold=#{@similarity_threshold}&MaxRecords=250"
+ def from_smiles smiles
+ #@query = PubChemCompound.new.from_smiles smiles
+ pubchem_search File.join(@pug_uri, "compound", "similarity", "smiles", smiles, "JSON")+"?Threshold=#{@similarity_threshold}&MaxRecords=250"
listkey = @result["Waiting"]["ListKey"]
while @result["Waiting"] do
sleep 1
- pubchem_search File.join(@uri, "compound", "listkey", listkey, "assaysummary", "JSON")
+ pubchem_search File.join(@pug_uri, "compound", "listkey", listkey, "assaysummary", "JSON")
end
- File.open("search.yaml","w+"){|s| s.puts @result.to_yaml}
+ #File.open("search.yaml","w+"){|s| s.puts @result.to_yaml}
columns = @result["Table"]["Columns"]["Column"]
table = @result["Table"]["Row"].collect{|cell| cell.values.flatten}
cid_idx = columns.index("CID")
cids = table.collect{|r| r[cid_idx]}.uniq
cids.each do |cid|
tab = table.collect{|r| r if r[cid_idx] == cid}.compact
- c = PubChem::Compound.new
- c.from_summary columns, tab
+ c = PubChemCompound.new
+ c.extract_result columns, tab
@neighbors << c unless (c.targets + c.active_assays).flatten.compact.empty?
end
- File.open("smiles.smi","w+"){|f| f.puts @neighbors.collect{|n| n.to_smiles}.join("\n")}
- `babel smiles.smi -ofpt 2>/dev/null| grep Tanimoto|cut -d "=" -f2`.split("\n").each_with_index do |t,i|
- @neighbors[i].tanimoto = t.strip.to_f
- end
+ @query = @neighbors.shift
+ File.open("search.yaml","w+"){|s| s.puts self.to_yaml}
+ #puts @neighbors.query.to_name
end
end
+=end
end
-