summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2012-10-29 13:19:20 +0100
committerChristoph Helma <helma@in-silico.ch>2012-10-29 13:19:20 +0100
commit9661a67983ffc93ee02bc12b20b9afb38e199d79 (patch)
treef9b91ec0d8ad0425ecd74c9ff9b833857959e2bf
parentdbf513ce686f1c0db1ed2d6af1fa96c86352e709 (diff)
pubchem pathway prediction implemented
-rw-r--r--application.rb9
-rw-r--r--pubchem-test.rb92
-rw-r--r--pubchem.rb320
-rw-r--r--test.rb51
-rw-r--r--views/index.haml54
5 files changed, 379 insertions, 147 deletions
diff --git a/application.rb b/application.rb
index 09f3188..2a77013 100644
--- a/application.rb
+++ b/application.rb
@@ -6,16 +6,17 @@ require "./pubchem.rb"
also_reload './pubchem.rb'
get '/?' do
-=begin
- @compound = PubChem::Compound.new
+ #@neighbors = OpenTox::PubChemNeighbors.new
smiles = "OC(=O)C1=C(C=CC=C1)OC(=O)C"
#smiles = "c1cc(CC)ccc1"
#smiles = "CC(=O)Nc1ccc(O)cc1"
smiles = "C1=CC(=C(C=C1Cl)Cl)OCC(=O)O"
#@compound.from_smiles smiles
- @compound.get_neighbors smiles
+ #@neighbors.from_smiles smiles
+=begin
File.open("compound.yaml","w+"){|f| f.puts @compound.to_yaml}
-=end
@compound = YAML.load_file "compound.yaml"
+=end
+ @neighbors = YAML.load_file "search.yaml"
haml :index
end
diff --git a/pubchem-test.rb b/pubchem-test.rb
new file mode 100644
index 0000000..273698e
--- /dev/null
+++ b/pubchem-test.rb
@@ -0,0 +1,92 @@
+require 'test/unit'
+require '../opentox-client/lib/opentox-client'
+require './pubchem.rb'
+require 'yaml'
+
+class AOPTest < Test::Unit::TestCase
+
+ def setup
+ @pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/"
+ @compound = OpenTox::PubChemCompound.new
+ @compound.cid = 1983
+ #@compound.from_name "2,4-D"
+ end
+
+ def test_initialize
+ print @compound.cid
+ print " "
+ puts @compound.to_smiles
+ puts "measured targets"
+ puts @compound.targets.collect{|t| t["Target Name"]}.to_yaml
+ puts "predicted targets"
+ puts @compound.predicted_targets.select{|t| t[:prediction] == "active"}.to_yaml
+
+ puts "predicted non_targets"
+ puts @compound.predicted_targets.select{|t| t[:prediction] == "inactive"}.size
+ #puts @compound.predicted_non_targets.values.inspect
+ measured_target_gis = @compound.targets.collect{|t| t["Target GI"]}.compact.uniq
+ measured_nontarget_gis = @compound.non_targets.collect{|t| t["Target GI"]}.compact.uniq
+ predicted_target_gis = @compound.predicted_targets.collect{|t| t[:target_gi] if t[:prediction] == "active"}.compact.uniq
+ predicted_nontarget_gis = @compound.predicted_targets.collect{|t| t[:target_gi] if t[:prediction] == "inactive"}.compact.uniq
+ print "correct predicted targets: "
+ puts (measured_target_gis & predicted_target_gis).size
+ print "new predicted targets: "
+ puts (predicted_target_gis - measured_target_gis).size
+ print "correct predicted non-targets: "
+ puts (measured_nontarget_gis & predicted_nontarget_gis).size
+ print "new predicted non-targets: "
+ puts (predicted_nontarget_gis - measured_nontarget_gis).size
+ print "incorrect predicted targets: "
+ puts (measured_nontarget_gis & predicted_target_gis).size
+ print "incorrect predicted non-targets: "
+ puts (measured_target_gis & predicted_nontarget_gis).size
+=begin
+ @compound.neighbors.each do |n|
+ #print n.cid
+ #print " "
+ print n.to_smiles
+ print " "
+ print n.similarity
+ print " "
+ puts n.p
+ puts n.targets.sort.inspect
+ #puts n.non_targets.inspect
+ end
+=end
+ #File.open("Acetaminophen.yaml","w+"){|f| f.puts @compound.to_yaml}
+ #puts @compound.neighbors.size
+ #assert_equal "7500", @p.cid
+ #assert_equal true, @p.aids[:active].include?(1188)
+ #assert_equal true, @p.aids[:inactive].include?(435)
+ end
+=begin
+ def test_similarity_search
+ #puts @p.to_smiles
+ @p.neighbors.each do |n|
+ #puts n.to_smiles
+ puts @p.target_similarity(n)
+ end
+ #puts @p.neighbors.inspect
+ #assert_equal 100, @p.neighbor_cids.size
+ end
+
+ def test_assay_description
+ puts @p.assay_description.to_yaml
+ end
+
+ def test_assay_genes
+ puts @p.assay_genes.to_yaml
+ end
+
+ def test_assay_similarity
+ @p2 = PubChem::Compound.new "OC(=O)C1=C(C=CC=C1)OC(=O)C"
+ puts @p.assay_similarity(@p2)
+ end
+
+ def test_target_similarity
+ @p2 = PubChem::Compound.new "OC(=O)C1=C(C=CC=C1)OC(=O)C"
+ puts @p.target_similarity(@p2)
+ end
+=end
+
+end
diff --git a/pubchem.rb b/pubchem.rb
index e2574d1..891110c 100644
--- a/pubchem.rb
+++ b/pubchem.rb
@@ -1,68 +1,198 @@
require '../opentox-client/lib/opentox-client.rb'
require 'json'
-# get assay from endpoint
-# search in endpoint ontology
-#
-# get measurements
-# search for compound and assay
-#
-# get affected pathways
-# search for compound and genes
-# identify affected pathways
-# identify relations between affected genes/pathways and endpoint
-#
-# get related assays
-# search for assays in ontology tree
-# search for compound and related assays
-#
-# get similar compounds
-# search for similar compounds
+def Math.gauss(x, sigma = 0.3)
+ d = 1.0 - x.to_f
+ Math.exp(-(d*d)/(2*sigma*sigma))
+end
module PubChem
- def pubchem_search url
- puts url
- #json = RestClient.get url, :accept => "application/json", :timeout => 90000000
- json = `curl "#{url}"`#, :accept => "application/json", :timeout => 90000000
- @result = JSON.parse json
- end
+ attr_accessor :result
- class Assay
- attr_accessor :aid
+ def initialize
+ @pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/"
end
- class Result
- attr_accessor :aid, :cid, :sid
+ def pubchem_search url
+ json = RestClient.get url#, :accept => "application/json", :timeout => 90000000
+ @result = JSON.parse json
+ rescue
+ puts url
+ puts $!.message
+ @result = nil
end
- class Substance
- end
+end
- class Compound
- # doc @ http://pubchem.ncbi.nlm.nih.gov/pug_rest/
- include OpenTox
+module OpenTox
+
+ class PubChemCompound < Compound
include PubChem
- attr_accessor :result, :cid, :neighbors, :tanimoto
+ # doc @ http://pubchem.ncbi.nlm.nih.gov/pug_rest/
+ attr_writer :cid
+ attr_accessor :similarity, :p, :assays
def initialize
- @uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/"
- @similarity_threshold = 95
+ super
@summary = []
+ @similarity_threshold = 75
@neighbors = []
+ @predicted_targets = []
+ end
+
+ def from_name name
+ @inchi = RestClientWrapper.get File.join(CACTUS_URI,URI.escape(name),"stdinchi")
+ end
+
+ def neighbors
+ if @neighbors.empty?
+ pubchem_search File.join(@pug_uri, "compound", "similarity", "cid", cid.to_s, "JSON")+"?Threshold=#{@similarity_threshold}&MaxRecords=100"
+ listkey = @result["Waiting"]["ListKey"]
+ while @result["Waiting"] do
+ sleep 1
+ pubchem_search File.join(@pug_uri, "compound", "listkey", listkey, "assaysummary", "JSON")
+ end
+ columns = @result["Table"]["Columns"]["Column"]
+ table = @result["Table"]["Row"].collect{|cell| cell.values.flatten}
+ cid_idx = columns.index("CID")
+ cids = table.collect{|r| r[cid_idx]}.uniq
+ cids.each do |cid|
+ unless cid.to_s == @cid.to_s
+ tab = table.collect{|r| r if r[cid_idx] == cid}.compact
+ c = PubChemCompound.new
+ c.extract_result columns, tab
+ c.similarity = tanimoto c
+ @neighbors << c unless (c.targets + c.non_targets).empty?
+ end
+ end
+ @neighbors.sort!{|a,b| b.similarity <=> a.similarity}
+ end
+ @neighbors
+ end
+
+ def summary
+ if @summary.empty?
+ pubchem_search File.join(@pug_uri, "compound", "cid", cid.to_s, "assaysummary", "JSON")
+ extract_result @result["Table"]["Columns"]["Column"], @result["Table"]["Row"].collect{|cell| cell.values.flatten}
+ end
+ @summary
+ end
+
+ def active_assays
+ summary.select{|a| a["Activity Outcome"] == "active"}
+ end
+
+ def inactive_assays
+ summary.select{|a| a["Activity Outcome"] == "inactive"}
+ end
+
+ def targets
+ active_assays.select{|a| a["Target GI"]}
+ end
+
+ def non_targets
+ inactive_assays.select{|a| a["Target GI"]}
+ end
+
+ def predicted_targets
+ if @predicted_targets.empty?
+ target_gis = neighbors.collect{|n| n.summary.collect{|a| a["Target GI"]}}.flatten.compact.uniq
+ target_gis.each do |gid|
+ target = {:target_gi => gid}
+ neighbors.each do |neighbor|
+ if neighbor.similarity > 0.5 # avoid downweighting
+ search = neighbor.summary.select{|a| a["Target GI"] == gid}
+ unless search.empty? or search.size == 1
+ print "+++ ("
+ print search.size
+ puts ")"
+ puts search.inspect
+ end
+ search.each do |assay|
+ target[:aid] ||= assay["AID"]
+ target[:name] ||= assay["Target Name"]
+ target[:assay_name] ||= assay["Assay Name"]
+ target[:active_similarities] ||= []
+ target[:inactive_similarities] ||= []
+
+ if assay["Activity Outcome"] == "active"
+ target[:p_active] ? target[:p_active] = target[:p_active]*neighbor.similarity : target[:p_active] = neighbor.similarity
+ target[:p_inactive] ? target[:p_inactive] = target[:p_inactive]*(1-neighbor.similarity) : target[:p_inactive] = 1-neighbor.similarity
+ target[:active_similarities] << neighbor.similarity
+ elsif assay["Activity Outcome"] == "inactive"
+ target[:p_active] ? target[:p_active] = target[:p_active]*(1-neighbor.similarity) : target[:p_active] = 1-neighbor.similarity
+ target[:p_inactive] ? target[:p_inactive] = target[:p_inactive]*neighbor.similarity : target[:p_inactive] = neighbor.similarity
+ target[:inactive_similarities] << neighbor.similarity
+ end
+ end
+ end
+ end
+ if target[:p_active] and target[:p_inactive] and target[:p_active] + target[:p_inactive] != 0
+ target[:p_active] = target[:p_active]/(target[:p_active]+target[:p_inactive])
+ target[:p_inactive] = target[:p_inactive]/(target[:p_active]+target[:p_inactive])
+ if target[:p_active] > target[:p_inactive]
+ target[:prediction] = "active"
+ elsif target[:p_active] < target[:p_inactive]
+ target[:prediction] = "inactive"
+ end
+ @predicted_targets << target
+ end
+ end
+ @predicted_targets.sort{|a,b| b[:p_active] <=> a[:p_active]}
+ end
+ @predicted_targets
end
def to_smiles
- RestClient.get(File.join(@uri, "compound", "cid", @cid, "property", "CanonicalSMILES", "TXT")).strip
+ RestClient.get(File.join(@pug_uri, "compound", "cid", cid.to_s, "property", "CanonicalSMILES", "TXT")).strip
end
- def name
- RestClient.get(File.join(@uri, "compound", "cid", @cid, "property", "IUPACName", "TXT")).strip
+ def tanimoto compound
+ f1 = File.open(File.join(".","tmp",SecureRandom.uuid+".smi"),"w+")
+ f1.puts to_smiles
+ f1.close
+ f2 = File.open(File.join(".","tmp",SecureRandom.uuid+".smi"),"w+")
+ f2.puts compound.to_smiles
+ f2.close
+ sim = `babel #{f1.path} #{f2.path} -ofpt 2>/dev/null| grep Tanimoto|cut -d "=" -f2`.strip.to_f
+ File.delete(f1.path)
+ File.delete(f2.path)
+ sim
end
- def image
- File.join @uri, "compound", "cid", @cid, "PNG?record_type=3d&image_size=small"
+ def extract_result columns, table
+ table.each do |row|
+ @summary << {}
+ row.each_with_index do |cell,i|
+ if columns[i] == "CID"
+ @cid = cell if @cid.nil?
+ else
+ cell.blank? ? @summary.last[columns[i]] = nil : @summary.last[columns[i]] = cell
+ end
+ end
+ end
+ end
+
+=begin
+ def assay_summary assay
+ if assay["Target GI"] and !@assays[assay["AID"]]
+ @assays[assay["AID"]] = {"nr_active" => 0, "nr_inactive" => 0}
+ pubchem_search File.join(@pug_uri, "assay", "aid", assay["AID"].to_s, "cids", "JSON?cids_type=active")
+ @assays[assay["AID"]]["nr_active"] = @result["InformationList"]["Information"].first["CID"].size if @result
+ pubchem_search File.join(@pug_uri, "assay", "aid", assay["AID"].to_s, "cids", "JSON?cids_type=inactive")
+ @assays[assay["AID"]]["nr_inactive"] = @result["InformationList"]["Information"].first["CID"].size if @result
+ print "getting (in)actives for aid "
+ puts assay["AID"]
+ print @assays[assay["AID"]]["nr_active"]
+ print " "
+ puts @assays[assay["AID"]]["nr_inactive"]
+ File.open("assays.json","w+"){|f| f.puts @assays.to_json}
+ end
end
+=end
+
+=begin
def properties
properties = [
@@ -99,104 +229,82 @@ module PubChem
"EffectiveRotorCount3D",
"ConformerCount3D",
]
- pubchem_search File.join(@uri, "compound", "cid", @cid, "property", properties.join(","), "JSON")
+ pubchem_search File.join(@pug_uri, "compound", "cid", @cid, "property", properties.join(","), "JSON")
@result["PropertyTable"]["Properties"].first
end
def from_smiles smiles
- pubchem_search File.join(@uri, "compound", "smiles", smiles, "assaysummary", "JSON")
- from_summary @result["Table"]["Columns"]["Column"], @result["Table"]["Row"].collect{|cell| cell.values.flatten}
+ pubchem_search File.join(@pug_uri, "compound", "smiles", smiles, "assaysummary", "JSON")
+ extract_result @result["Table"]["Columns"]["Column"], @result["Table"]["Row"].collect{|cell| cell.values.flatten}
+ end
+ def property_similarity compound
+ svd = OpenTox::SVD.new(GSL::Matrix [[properties, compound.properties]])
+ OpenTox::Algorithm::Similarity.cosine svd.data_transformed_matrix.first, svd.data_transformed_matrix.last
end
- def from_summary columns, table
- table.each do |row|
- @summary << {}
- row.each_with_index do |cell,i|
- if columns[i] == "CID"
- @cid = cell if @cid.nil?
- else
- cell.blank? ? @summary.last[columns[i]] = nil : @summary.last[columns[i]] = cell
- end
- end
- end
+ def assay_similarity compound
+ tanimoto [[active_assays,inactive_assays],[compound.active_assays,compound.inactive_assays]]
end
- def active_assays
- @summary.collect{|a| a if a["Activity Outcome"] == "active"}.compact
+ def target_similarity compound
+ tanimoto [[targets,non_targets],[compound.targets,compound.non_targets]]
end
- def inactive_assays
- @summary.collect{|a| a if a["Activity Outcome"] == "inactive"}.compact
+ def tanimoto features
+ common = features.first.flatten & features.last.flatten
+ same_outcome = (features.first.first & features.last.first) + (features.first.last & features.last.last)
+ same_outcome.size.to_f/common.size
end
- def targets
- active_assays.collect{|a| a["Target Name"]}.compact
+ def euclid features
end
- def non_targets
- inactive_assays.collect{|a| a["Target Name"]}.compact
+ def to_name
+ RestClient.get(File.join(@pug_uri, "compound", "cid", @cid, "property", "IUPACName", "TXT")).strip
end
- def assay_similarity compound
- a1 = active_assays.collect{|a| a["Assay Name"]}
- a2 = compound.active_assays.collect{|a| a["Assay Name"]}
- i1 = inactive_assays.collect{|a| a["Assay Name"]}
- i2 = compound.inactive_assays.collect{|a| a["Assay Name"]}
- self_assays = a1 + i1
- compound_assays = a2 + i2
- common_assays = self_assays & compound_assays
- same_outcome = (a1 & a2) + (i1 & i2)
- same_outcome.size.to_f/common_assays.size
+ def to_image_uri
+ File.join @pug_uri, "compound", "cid", @cid, "PNG?record_type=3d&image_size=small"
end
+=end
- def target_similarity compound
- self_assays = targets + non_targets
- compound_assays = compound.targets + compound.non_targets
- common_assays = self_assays & compound_assays
- same_outcome = (targets & compound.targets) + (non_targets & compound.non_targets)
- same_outcome.size.to_f/common_assays.size
- end
-
- def assay_genes
- active = []
- @aids[:active].each do |aid|
- begin
- pubchem_search File.join(@uri, "assay", "aid", aid.to_s, "genes", "JSON")
- active << @result["InformationList"]["Information"].collect{|i| i["GeneID"]}.flatten
- rescue; end
- end
- @aids[:inactive].each do |aid|
- begin
- pubchem_search File.join(@uri, "assay", "aid", aid.to_s, "genes", "JSON")
- inactive << @result["InformationList"]["Information"].collect{|i| i["GeneID"]}.flatten
- rescue; end
- end
- {:active => active, :inactive => inactive }
+ end
+
+=begin
+ class PubChemNeighbors < Dataset
+ include PubChem
+
+ attr_accessor :query, :neighbors
+
+ def initialize
+ @similarity_threshold = 95
+ @neighbors = []
+ @pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/"
end
- def get_neighbors smiles
- pubchem_search File.join(@uri, "compound", "similarity", "smiles", smiles, "JSON")+"?Threshold=#{@similarity_threshold}&MaxRecords=250"
+ def from_smiles smiles
+ #@query = PubChemCompound.new.from_smiles smiles
+ pubchem_search File.join(@pug_uri, "compound", "similarity", "smiles", smiles, "JSON")+"?Threshold=#{@similarity_threshold}&MaxRecords=250"
listkey = @result["Waiting"]["ListKey"]
while @result["Waiting"] do
sleep 1
- pubchem_search File.join(@uri, "compound", "listkey", listkey, "assaysummary", "JSON")
+ pubchem_search File.join(@pug_uri, "compound", "listkey", listkey, "assaysummary", "JSON")
end
- File.open("search.yaml","w+"){|s| s.puts @result.to_yaml}
+ #File.open("search.yaml","w+"){|s| s.puts @result.to_yaml}
columns = @result["Table"]["Columns"]["Column"]
table = @result["Table"]["Row"].collect{|cell| cell.values.flatten}
cid_idx = columns.index("CID")
cids = table.collect{|r| r[cid_idx]}.uniq
cids.each do |cid|
tab = table.collect{|r| r if r[cid_idx] == cid}.compact
- c = PubChem::Compound.new
- c.from_summary columns, tab
+ c = PubChemCompound.new
+ c.extract_result columns, tab
@neighbors << c unless (c.targets + c.active_assays).flatten.compact.empty?
end
- File.open("smiles.smi","w+"){|f| f.puts @neighbors.collect{|n| n.to_smiles}.join("\n")}
- `babel smiles.smi -ofpt 2>/dev/null| grep Tanimoto|cut -d "=" -f2`.split("\n").each_with_index do |t,i|
- @neighbors[i].tanimoto = t.strip.to_f
- end
+ @query = @neighbors.shift
+ File.open("search.yaml","w+"){|s| s.puts self.to_yaml}
+ #puts @neighbors.query.to_name
end
end
+=end
end
-
diff --git a/test.rb b/test.rb
new file mode 100644
index 0000000..bd15dcd
--- /dev/null
+++ b/test.rb
@@ -0,0 +1,51 @@
+require 'test/unit'
+#require 'yaml'
+require './pubchem.rb'
+
+class MRATest < Test::Unit::TestCase
+
+ def setup
+ #@p = PubChem::Compound.new "c1cc(CC)ccc1"
+ @p = PubChem::Compound.new
+ @p.from_smiles "CC(=O)Nc1ccc(O)cc1"
+ end
+
+ def test_initialize
+ puts @p.active_assays.size
+ puts @p.inactive_assays.size
+ puts @p.targets.size
+ puts @p.non_targets.size
+ assert_equal "7500", @p.cid
+ #assert_equal true, @p.aids[:active].include?(1188)
+ #assert_equal true, @p.aids[:inactive].include?(435)
+ end
+
+ def test_similarity_search
+ #puts @p.to_smiles
+ @p.neighbors.each do |n|
+ #puts n.to_smiles
+ puts @p.target_similarity(n)
+ end
+ #puts @p.neighbors.inspect
+ #assert_equal 100, @p.neighbor_cids.size
+ end
+
+ def test_assay_description
+ puts @p.assay_description.to_yaml
+ end
+
+ def test_assay_genes
+ puts @p.assay_genes.to_yaml
+ end
+
+ def test_assay_similarity
+ @p2 = PubChem::Compound.new "OC(=O)C1=C(C=CC=C1)OC(=O)C"
+ puts @p.assay_similarity(@p2)
+ end
+
+ def test_target_similarity
+ @p2 = PubChem::Compound.new "OC(=O)C1=C(C=CC=C1)OC(=O)C"
+ puts @p.target_similarity(@p2)
+ end
+
+end
diff --git a/views/index.haml b/views/index.haml
index c8481c3..b75a9ab 100644
--- a/views/index.haml
+++ b/views/index.haml
@@ -1,47 +1,27 @@
!!! 5
-= @compound.neighbors.size
-%table
+%script{:type => "text/javascript", :src => "sorttable.js"}
+%table{:class => "sortable"}
%tr
+ %th
+ = @neighbors.query.to_name
+ %br
+ %img{:src => @neighbors.query.to_image_uri}
%th Structure
%th Properties
%th Targets
%th Assays
- - @compound.neighbors.each do |neighbor|
- %tr
+ - @neighbors.neighbors.each do |neighbor|
+ - sim = neighbor.structure_similarity @neighbors.query
+ %tr{:sorttable_customkey => sim}
%td
- = neighbor.name
- %img{:src => neighbor.image}
- %dl
- %dt Similarity
- %dd
- = neighbor.tanimoto
+ = neighbor.to_name
+ %br
+ %img{:src => neighbor.to_image_uri}
%td
- %dl
- - neighbor.properties.each do |p,v|
- %dt
- = p
- %dd
- = v
+ = sim
%td
- %dl
- %dt Similarity
- %dd
- = neighbor.target_similarity @compound.neighbors.first
- %dt Targets
- %dd
- = neighbor.targets.uniq
- %dt Total
- %dd
- = neighbor.targets.size + neighbor.non_targets.size
+ = neighbor.property_similarity @neighbors.query
%td
- %dl
- %dt Similarity
- %dd
- = neighbor.assay_similarity @compound.neighbors.first
- %dt Active
- %dd
- = neighbor.active_assays.collect{|a| a["Assay Name"]}
- %dt Total
- %dd
- = neighbor.active_assays.size + neighbor.inactive_assays.size
- =# neighbor.inactive_assays.collect{|a| a["Assay Name"]}
+ = neighbor.target_similarity @neighbors.query
+ %td
+ = neighbor.assay_similarity @neighbors.query