diff options
Diffstat (limited to 'pubchem.rb')
-rw-r--r-- | pubchem.rb | 375 |
1 files changed, 22 insertions, 353 deletions
@@ -1,13 +1,6 @@ require '../opentox-client/lib/opentox-client.rb' require 'json' require 'base64' -require 'restclient/components' -require 'rack/cache' -#RestClient.enable Rack::Cache, :verbose => true#, :allow_reload => true, :allow_revalidate => true -RestClient.enable Rack::Cache, - :verbose => true, - :metastore => 'file:/tmp/cache/meta', - :entitystore => 'file:/tmp/cache/body' def Math.gauss(x, sigma = 0.3) d = 1.0 - x.to_f @@ -16,100 +9,40 @@ end module OpenTox - # doc @ http://pubchem.ncbi.nlm.nih.gov/pug_rest/ class PubChemCompound < Compound - attr_writer :cid - attr_accessor :similarity, :p, :assays + + attr_accessor :cid + @@pug_proxy = "http://localhost:8081/" - def initialize cid=nil - #@pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/" - @pug_uri = "http://localhost:8081/" - @cid = cid - @assays = nil - @similarity_threshold = 90 - @neighbors = nil - @predicted_assays = nil - #@predicted_targets = nil - #@priors = {} - #@priors = JSON.parse(File.read("priors.json")) + def initialize cid + @cid = cid.to_s end def fingerprint - unless @fingerprint - begin - # ftp://ftp.ncbi.nlm.nih.gov/pubchem/specifications/pubchem_fingerprints.txt - base64key = `curl http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/#{cid}/SDF|grep -A1 PUBCHEM_CACTVS_SUBSKEYS|sed '1d'`.chomp - @fingerprint = Base64.decode64(base64key)[4..-1].unpack("B*").first[0..-8].split(//).collect{|c| c == "1"} - rescue - end - end - @fingerprint + JSON.parse RestClient.get(File.join(@@pug_proxy,"cid",@cid,"fingerprint")) end def self.from_name name - pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name" - compounds = [] - session[:name] = name - cid = RestClient.get(File.join(pug_uri,URI.escape(name),"cids","TXT")) - #puts response - #response.split("\n") do |cid| - puts cid - compound = OpenTox::PubChemCompound.new - compound.cid = cid.chomp - compounds << compound - #end - compounds + cids = JSON.parse(RestClient.get(File.join(@@pug_proxy,"name",CGI.escape(name)))) + if cids.size == 1 + PubChemCompound.new cids.first + elsif cids.empty? + nil + else + cids.collect{|cid| PubChemCompound.new cid} + end end def name - RestClient.get File.join(@pug_uri, "compound", "cid", cid.to_s, "property", "IUPACName","TXT").chomp + RestClient.get(File.join(@@pug_proxy,"cid",cid,"name")).chomp.sub(/^"/,'').sub(/"$/,'') end def neighbors - unless @neighbors - @neighbors = [] - result = pubchem_search File.join(@pug_uri, "compound", "similarity", "cid", cid.to_s, "JSON")+"?Threshold=#{@similarity_threshold}&MaxRecords=100" - while result["Waiting"] do - sleep 2 - listkey = result["Waiting"]["ListKey"] - result = pubchem_search File.join(@pug_uri, "compound", "listkey", listkey, "cids", "JSON") - #result = pubchem_search File.join(@pug_uri, "compound", "listkey", listkey, "assaysummary", "JSON") - end - puts "#{result["IdentifierList"]["CID"].size} Neighbor CIDs received" - result["IdentifierList"]["CID"].each do |cid| - unless cid.to_s == @cid.to_s - c = PubChemCompound.new cid.to_s - @neighbors << c if c.assays and !c.assays.empty? - end - end if result and result["IdentifierList"] -=begin - if result and result["Table"] - columns = result["Table"]["Columns"]["Column"] - table = result["Table"]["Row"].collect{|cell| cell.values.flatten} - cid_idx = columns.index("CID") - cids = table.collect{|r| r[cid_idx]}.uniq - cids.each do |cid| - unless cid.to_s == @cid.to_s - tab = table.collect{|r| r if r[cid_idx] == cid}.compact - c = PubChemCompound.new - c.extract_result columns, tab - c.similarity = tanimoto c - @neighbors << c unless (c.targets + c.non_targets).empty? - end - end - end -=end - #@neighbors.sort!{|a,b| b.similarity <=> a.similarity} - end - @neighbors + JSON.parse(RestClient.get(File.join(@@pug_proxy,"cid",@cid,"neighbors"))).collect{|n| PubChemCompound.new(n) } end def assays - unless @assays - result = pubchem_search File.join(@pug_uri, "compound", "cid", cid.to_s, "assaysummary", "JSON") - extract_result result["Table"]["Columns"]["Column"], result["Table"]["Row"].collect{|cell| cell.values.flatten} if result and result["Table"] - end - @assays + JSON.parse RestClient.get(File.join(@@pug_proxy,"cid",cid,"assays")) end def active_assays @@ -129,54 +62,15 @@ module OpenTox end def predicted_assays - unless @predicted_assays - @predicted_assays = [] - neighbors.collect{|n| n.assays.collect{|a| a["AID"]}}.flatten.compact.uniq.each do |aid| - predicted_assay = {"AID" => aid} - neighbors.each do |neighbor| - if similarity(neighbor) and similarity(neighbor) > 0.5 # avoid downweighting - search = neighbor.assays.select{|a| a["AID"] == aid} - search.each do |assay| - predicted_assay["Target GI"] ||= assay["Target GI"] - predicted_assay["Target Name"] ||= assay["Target Name"] - predicted_assay["Assay Name"] ||= assay["Assay Name"] - predicted_assay[:active_similarities] ||= [] - predicted_assay[:inactive_similarities] ||= [] - - if assay["Activity Outcome"] == "active" - predicted_assay[:p_active] ? predicted_assay[:p_active] = predicted_assay[:p_active]*similarity(neighbor) : predicted_assay[:p_active] = similarity(neighbor) - predicted_assay[:p_inactive] ? predicted_assay[:p_inactive] = predicted_assay[:p_inactive]*(1-similarity(neighbor)) : predicted_assay[:p_inactive] = 1-similarity(neighbor) - predicted_assay[:active_similarities] << similarity(neighbor) - elsif assay["Activity Outcome"] == "inactive" - predicted_assay[:p_active] ? predicted_assay[:p_active] = predicted_assay[:p_active]*(1-similarity(neighbor)) : predicted_assay[:p_active] = 1-similarity(neighbor) - predicted_assay[:p_inactive] ? predicted_assay[:p_inactive] = predicted_assay[:p_inactive]*similarity(neighbor) : predicted_assay[:p_inactive] = similarity(neighbor) - predicted_assay[:inactive_similarities] << similarity(neighbor) - end - end - end - end - if predicted_assay[:p_active] and predicted_assay[:p_inactive] and predicted_assay[:p_active] != 0 and predicted_assay[:p_inactive] != 0 - predicted_assay[:p_active] = predicted_assay[:p_active]/(predicted_assay[:p_active]+predicted_assay[:p_inactive]) - predicted_assay[:p_inactive] = predicted_assay[:p_inactive]/(predicted_assay[:p_active]+predicted_assay[:p_inactive]) - if predicted_assay[:p_active] > predicted_assay[:p_inactive] - predicted_assay[:prediction] = "active" - elsif predicted_assay[:p_active] < predicted_assay[:p_inactive] - predicted_assay[:prediction] = "inactive" - end - @predicted_assays << predicted_assay - end - end - #@predicted_targets.sort{|a,b| b[:p_active] <=> a[:p_active]} - end - @predicted_assays + JSON.parse RestClient.get(File.join(@@pug_proxy,"cid",cid,"predictions")) end def predicted_active_assays - predicted_assays.select{|a| a[:prediction] == "active"} if predicted_assays + predicted_assays.select{|a| a["p_active"] > a["p_inactive"]} if predicted_assays end def predicted_inactive_assays - predicted_assays.select{|a| a[:prediction] == "inactive"} if predicted_assays + predicted_assays.select{|a| a["p_active"] < a["p_inactive"]} if predicted_assays end def predicted_targets @@ -187,241 +81,16 @@ module OpenTox predicted_inactive_assays.select{|a| a["Target GI"]} if predicted_assays end - def to_smiles - RestClient.get(File.join(@pug_uri, "compound", "cid", cid.to_s, "property", "CanonicalSMILES", "TXT")).strip - end - def image_uri - File.join @pug_uri, "compound", "cid", @cid, "PNG"#?record_type=3d&image_size=small" + File.join @@pug_proxy, "cid", @cid, "image" end def similarity compound cosine compound end - def tanimoto compound - if fingerprint and compound.fingerprint - m11 = 0.0 - m1 = 0.0 - fingerprint.each_index do |i| - m11 += 1 if (@fingerprint[i] and compound.fingerprint[i]) - m1 += 1 if (@fingerprint[i] or compound.fingerprint[i]) - end - m11/m1 - end - end - def cosine compound - if fingerprint and compound.fingerprint - m11 = 0.0 - m01 = 0.0 - m10 = 0.0 - m00 = 0.0 - fingerprint.each_index do |i| - m11 += 1 if (@fingerprint[i] and compound.fingerprint[i]) - m01 += 1 if (!@fingerprint[i] and compound.fingerprint[i]) - m10 += 1 if (@fingerprint[i] and !compound.fingerprint[i]) - m00 += 1 if (!@fingerprint[i] and !compound.fingerprint[i]) - end - m11/((m01+m11)*(m10+m11))**0.5 - end - end - -=begin - f1 = File.open(File.join(".","tmp",SecureRandom.uuid+".smi"),"w+") - f1.puts to_smiles - f1.close - f2 = File.open(File.join(".","tmp",SecureRandom.uuid+".smi"),"w+") - f2.puts compound.to_smiles - f2.close - sim = `babel #{f1.path} #{f2.path} -ofpt 2>/dev/null| grep Tanimoto|cut -d "=" -f2`.strip.to_f - File.delete(f1.path) - File.delete(f2.path) - sim - end -=end - - def pubchem_search url - attempts = 0 - begin - attempts += 1 - json = RestClient.get url, :timeout => 90000000 - puts url - JSON.parse json - rescue - if $!.message =~ /Timeout/i and attempts < 4 - sleep 2 - retry - elsif $!.message =~ /Timeout/i and attempts >= 4 - File.open("timeouts","a+"){|f| f.puts url} - puts url - puts $!.message - nil - elsif $!.message.match /404/ - nil - else - puts url - puts $!.message - nil - end - end - end - - def extract_result columns, table - @assays = [] - table.each do |row| - @assays << {} - row.each_with_index do |cell,i| - if columns[i] == "CID" - @cid = cell if @cid.nil? - else - cell.blank? ? @assays.last[columns[i]] = nil : @assays.last[columns[i]] = cell - end - end - end - end - - def priors aid - unless @priors[aid] - @priors[aid] = {"nr_active" => 0, "nr_inactive" => 0} - result = nil - result = pubchem_search File.join(@pug_uri, "assay", "aid", aid.to_s, "cids", "JSON?cids_type=active&list_return=listkey") - @priors[aid]["nr_active"] = result["IdentifierList"]["Size"].to_i if result - result = nil - result = pubchem_search File.join(@pug_uri, "assay", "aid", aid.to_s, "cids", "JSON?cids_type=inactive&list_return=listkey") - @priors[aid]["nr_inactive"] = result["IdentifierList"]["Size"].to_i if result - File.open("priors.json","w+"){|f| f.puts @priors.to_json} - end - @priors[aid] - end - -=begin - def assay_summary assay - if assay["Target GI"] and !@assays[assay["AID"]] - @assays[assay["AID"]] = {"nr_active" => 0, "nr_inactive" => 0} - pubchem_search File.join(@pug_uri, "assay", "aid", assay["AID"].to_s, "cids", "JSON?cids_type=active") - @assays[assay["AID"]]["nr_active"] = @result["InformationList"]["Information"].first["CID"].size if @result - pubchem_search File.join(@pug_uri, "assay", "aid", assay["AID"].to_s, "cids", "JSON?cids_type=inactive") - @assays[assay["AID"]]["nr_inactive"] = @result["InformationList"]["Information"].first["CID"].size if @result - print "getting (in)actives for aid " - puts assay["AID"] - print @assays[assay["AID"]]["nr_active"] - print " " - puts @assays[assay["AID"]]["nr_inactive"] - File.open("assays.json","w+"){|f| f.puts @assays.to_json} - end - end -=end - -=begin - - def properties - properties = [ - "XLogP", - "ExactMass", - "MonoisotopicMass", - "TPSA", - "Complexity", - "Charge", - "HBondDonorCount", - "HBondAcceptorCount", - "RotatableBondCount", - "HeavyAtomCount", - "IsotopeAtomCount", - "AtomStereoCount", - "DefinedAtomStereoCount", - "UndefinedAtomStereoCount", - "BondStereoCount", - "DefinedBondStereoCount", - "UndefinedBondStereoCount", - "CovalentUnitCount", - "Volume3D", - "XStericQuadrupole3D", - "YStericQuadrupole3D", - "ZStericQuadrupole3D", - "FeatureCount3D", - "FeatureAcceptorCount3D", - "FeatureDonorCount3D", - "FeatureAnionCount3D", - "FeatureCationCount3D", - "FeatureRingCount3D", - "FeatureHydrophobeCount3D", - "ConformerModelRMSD3D", - "EffectiveRotorCount3D", - "ConformerCount3D", - ] - pubchem_search File.join(@pug_uri, "compound", "cid", @cid, "property", properties.join(","), "JSON") - @result["PropertyTable"]["Properties"].first - end - - def from_smiles smiles - pubchem_search File.join(@pug_uri, "compound", "smiles", smiles, "assaysummary", "JSON") - extract_result @result["Table"]["Columns"]["Column"], @result["Table"]["Row"].collect{|cell| cell.values.flatten} - end - def property_similarity compound - svd = OpenTox::SVD.new(GSL::Matrix [[properties, compound.properties]]) - OpenTox::Algorithm::Similarity.cosine svd.data_transformed_matrix.first, svd.data_transformed_matrix.last - end - - def assay_similarity compound - tanimoto [[active_assays,inactive_assays],[compound.active_assays,compound.inactive_assays]] - end - - def target_similarity compound - tanimoto [[targets,non_targets],[compound.targets,compound.non_targets]] - end - - def tanimoto features - common = features.first.flatten & features.last.flatten - same_outcome = (features.first.first & features.last.first) + (features.first.last & features.last.last) - same_outcome.size.to_f/common.size - end - - def euclid features - end - - def to_name - RestClient.get(File.join(@pug_uri, "compound", "cid", @cid, "property", "IUPACName", "TXT")).strip - end -=end - - end - -=begin - class PubChemNeighbors < Dataset - include PubChem - - attr_accessor :query, :neighbors - - def initialize - @similarity_threshold = 95 - @neighbors = [] - @pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/" - end - - def from_smiles smiles - #@query = PubChemCompound.new.from_smiles smiles - pubchem_search File.join(@pug_uri, "compound", "similarity", "smiles", smiles, "JSON")+"?Threshold=#{@similarity_threshold}&MaxRecords=250" - listkey = @result["Waiting"]["ListKey"] - while @result["Waiting"] do - sleep 1 - pubchem_search File.join(@pug_uri, "compound", "listkey", listkey, "assaysummary", "JSON") - end - #File.open("search.yaml","w+"){|s| s.puts @result.to_yaml} - columns = @result["Table"]["Columns"]["Column"] - table = @result["Table"]["Row"].collect{|cell| cell.values.flatten} - cid_idx = columns.index("CID") - cids = table.collect{|r| r[cid_idx]}.uniq - cids.each do |cid| - tab = table.collect{|r| r if r[cid_idx] == cid}.compact - c = PubChemCompound.new - c.extract_result columns, tab - @neighbors << c unless (c.targets + c.active_assays).flatten.compact.empty? - end - @query = @neighbors.shift - File.open("search.yaml","w+"){|s| s.puts self.to_yaml} - #puts @neighbors.query.to_name + RestClient.get(File.join(@@pug_proxy,"cid",@cid,"cosine",compound.cid)).to_f end end -=end end |