From cd1cba67830505cd2d23ec83e64c0beed42a9f28 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 22 Jul 2015 20:08:12 +0200 Subject: mongo batch import workaround --- lib/compound.rb | 86 ++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 51 insertions(+), 35 deletions(-) (limited to 'lib/compound.rb') diff --git a/lib/compound.rb b/lib/compound.rb index 6cc4707..4a8089b 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -1,16 +1,20 @@ CACTUS_URI="http://cactus.nci.nih.gov/chemical/structure/" require 'openbabel' +require "base64" module OpenTox - # Ruby wrapper for OpenTox Compound Webservices (http://opentox.org/dev/apis/api-1.2/structure). class Compound - attr_reader :inchi - - def initialize inchi - @inchi = inchi - end + field :inchi, type: String + attr_readonly :inchi + field :smiles, type: String + field :inchikey, type: String + field :names, type: Array + field :cid, type: String + field :chemblid, type: String + field :image_id, type: BSON::ObjectId + field :sdf_id, type: BSON::ObjectId def == compound self.inchi == compound.inchi @@ -22,21 +26,23 @@ module OpenTox # @param [String] smiles Smiles string # @return [OpenTox::Compound] Compound def self.from_smiles smiles - OpenTox::Compound.new obconversion(smiles,"smi","inchi") + # do not store smiles because it might be noncanonical + Compound.find_or_create_by :inchi => obconversion(smiles,"smi","inchi") end # Create a compound from inchi string # @param inchi [String] smiles InChI string # @return [OpenTox::Compound] Compound def self.from_inchi inchi - OpenTox::Compound.new inchi + Compound.find_or_create_by :inchi => inchi end # Create a compound from sdf string # @param sdf [String] smiles SDF string # @return [OpenTox::Compound] Compound def self.from_sdf sdf - OpenTox::Compound.new obconversion(sdf,"sdf","inchi") + # do not store sdf because it might be 2D + Compound.find_or_create_by :inchi => obconversion(sdf,"sdf","inchi") end # Create a compound from name. Relies on an external service for name lookups. @@ -45,31 +51,32 @@ module OpenTox # @param name [String] can be also an InChI/InChiKey, CAS number, etc # @return [OpenTox::Compound] Compound def self.from_name name - OpenTox::Compound.new RestClientWrapper.get File.join(CACTUS_URI,URI.escape(name),"stdinchi") + Compound.find_or_create_by :inchi => RestClientWrapper.get(File.join(CACTUS_URI,URI.escape(name),"stdinchi")) end # Get InChIKey # @return [String] InChI string def inchikey - obconversion(@inchi,"inchi","inchikey") + update(:inchikey => obconversion(inchi,"inchi","inchikey")) unless self["inchikey"] + self["inchikey"] end # Get (canonical) smiles # @return [String] Smiles string def smiles - obconversion(@inchi,"inchi","smi") # "can" gives nonn-canonical smiles?? + update(:smiles => obconversion(inchi,"inchi","smi")) unless self["smiles"] # should give canonical smiles, "can" seems to give incorrect results + self["smiles"] end # Get sdf # @return [String] SDF string def sdf - obconversion(@inchi,"inchi","sdf") - end - - # Get gif image - # @return [image/gif] Image data - def gif - RestClientWrapper.get File.join(CACTUS_URI,inchi,"image") + if sdf_id.nil? + sdf = obconversion(inchi,"inchi","sdf") + file = Mongo::Grid::File.new(sdf, :filename => "#{id}.sdf",:content_type => "chemical/x-mdl-sdfile") + sdf_id = $gridfs.insert_one file + end + $gridfs.find_one(_id: sdf_id).data end # Get png image @@ -77,41 +84,37 @@ module OpenTox # image = compound.png # @return [image/png] Image data def png - obconversion(@inchi,"inchi","_png2") - end + if image_id.nil? + png = obconversion(inchi,"inchi","_png2") + file = Mongo::Grid::File.new(Base64.encode64(png), :filename => "#{id}.png", :content_type => "image/png") + update(:image_id => $gridfs.insert_one(file)) + end + Base64.decode64($gridfs.find_one(_id: image_id).data) -=begin - # Get URI of compound image - # @return [String] Compound image URI - def image_uri - File.join @data["uri"], "image" end -=end # Get all known compound names. Relies on an external service for name lookups. # @example # names = compound.names # @return [String] Compound names def names - RestClientWrapper.get("#{CACTUS_URI}#{inchi}/names").split("\n") + update(:names => RestClientWrapper.get("#{CACTUS_URI}#{inchi}/names").split("\n")) unless self["names"] + self["names"] end # @return [String] PubChem Compound Identifier (CID), derieved via restcall to pubchem def cid pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/" - @cid ||= RestClientWrapper.post(File.join(pug_uri, "compound", "inchi", "cids", "TXT"),{:inchi => inchi}).strip - end - - # @todo - def chebi - internal_server_error "not yet implemented" + update(:cid => RestClientWrapper.post(File.join(pug_uri, "compound", "inchi", "cids", "TXT"),{:inchi => inchi}).strip) unless self["cid"] + self["cid"] end # @return [String] ChEMBL database compound id, derieved via restcall to chembl def chemblid # https://www.ebi.ac.uk/chembldb/ws#individualCompoundByInChiKey uri = "http://www.ebi.ac.uk/chemblws/compounds/smiles/#{smiles}.json" - @chemblid = JSON.parse(RestClientWrapper.get(uri))["compounds"].first["chemblId"] + update(:chemblid => JSON.parse(RestClientWrapper.get(uri))["compounds"].first["chemblId"]) unless self["chemblid"] + self["chemblid"] end private @@ -125,6 +128,19 @@ module OpenTox case output_format when /smi|can|inchi/ obconversion.write_string(obmol).gsub(/\s/,'').chomp + when /sdf/ + OpenBabel::OBOp.find_type("Gen3D").do(obmol) + sdf = obconversion.write_string(obmol) + if sdf.match(/.nan/) + $logger.warn "3D generation failed for compound #{compound.inchi}, trying to calculate 2D structure" + OpenBabel::OBOp.find_type("Gen2D").do(obmol) + sdf = obconversion.write_string(obmol) + if sdf.match(/.nan/) + $logger.warn "2D generation failed for compound #{compound.inchi}" + sdf = nil + end + end + sdf else obconversion.write_string(obmol) end -- cgit v1.2.3