summaryrefslogtreecommitdiff
path: root/lib/compound.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/compound.rb')
-rw-r--r--lib/compound.rb475
1 files changed, 213 insertions, 262 deletions
diff --git a/lib/compound.rb b/lib/compound.rb
index 6d0e075..615ea6e 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -1,296 +1,247 @@
-module OpenTox
+require 'openbabel'
- # Small molecules with defined chemical structures
- class Compound < Substance
- require_relative "unique_descriptors.rb"
- DEFAULT_FINGERPRINT = "MP2D"
+# Small molecules with defined chemical structures
+class Compound
+ DEFAULT_FINGERPRINT = "MP2D"
- field :inchi, type: String
- field :smiles, type: String
- field :inchikey, type: String
- field :names, type: Array
- field :cid, type: String
- field :png_id, type: BSON::ObjectId
- field :svg_id, type: BSON::ObjectId
- field :sdf_id, type: BSON::ObjectId
- field :fingerprints, type: Hash, default: {}
- field :default_fingerprint_size, type: Integer
-
- index({smiles: 1}, {unique: true})
-
- # Overwrites standard Mongoid method to create fingerprints before database insertion
- def self.find_or_create_by params
- compound = self.find_or_initialize_by params
- compound.default_fingerprint_size = compound.fingerprint(DEFAULT_FINGERPRINT).size
- compound.save
- compound
- end
+ def initialize smiles
+ @smiles = smiles
+ @fingerprints = {}
+ end
- # Create chemical fingerprint
- # @param [String] fingerprint type
- # @return [Array<String>]
- def fingerprint type=DEFAULT_FINGERPRINT
- unless fingerprints[type]
- return [] unless self.smiles
- if type == "MP2D" # http://openbabel.org/docs/dev/FileFormats/MolPrint2D_format.html#molprint2d-format
- fp = obconversion(smiles,"smi","mpd").strip.split("\t")
- name = fp.shift # remove Title
- fingerprints[type] = fp.uniq # no fingerprint counts
- elsif type== "MNA" # http://openbabel.org/docs/dev/FileFormats/Multilevel_Neighborhoods_of_Atoms_(MNA).html
- level = 2 # TODO: level as parameter, evaluate level 1, see paper
- fp = obconversion(smiles,"smi","mna","xL\"#{level}\"").split("\n")
- fp.shift # remove Title
- fingerprints[type] = fp
- else # standard fingerprints
- fp = OpenBabel::OBFingerprint.find_fingerprint(type)
- obmol = OpenBabel::OBMol.new
- obconversion = OpenBabel::OBConversion.new
- obconversion.set_in_format "smi"
- obconversion.read_string obmol, self.smiles
- result = OpenBabel::VectorUnsignedInt.new
- fp.get_fingerprint(obmol,result)
- # TODO: %ignore *::DescribeBits @ line 163 openbabel/scripts/openbabel-ruby.i
- #p OpenBabel::OBFingerprint.describe_bits(result)
- # convert result to a list of the bits that are set
- # from openbabel/scripts/python/pybel.py line 830
- # see also http://openbabel.org/docs/dev/UseTheLibrary/Python_Pybel.html#fingerprints
- result = result.to_a
- bitsperint = OpenBabel::OBFingerprint.getbitsperint()
- bits_set = []
- start = 1
- result.each do |x|
- i = start
- while x > 0 do
- bits_set << i if (x % 2) == 1
- x >>= 1
- i += 1
- end
- start += bitsperint
+ # Create chemical fingerprint
+ # @param [String] fingerprint type
+ # @return [Array<String>]
+ def fingerprint type=DEFAULT_FINGERPRINT
+ unless @fingerprints[type]
+ if type == "MP2D" # http://openbabel.org/docs/dev/FileFormats/MolPrint2D_format.html#molprint2d-format
+ fp = obconversion(@smiles,"smi","mpd").strip.split("\t")
+ fp.shift # remove Title
+ @fingerprints[type] = fp.uniq # no fingerprint counts
+ elsif type== "MNA" # http://openbabel.org/docs/dev/FileFormats/Multilevel_Neighborhoods_of_Atoms_(MNA).html
+ level = 2 # TODO: level as parameter, evaluate level 1, see paper
+ fp = obconversion(@smiles,"smi","mna","xL\"#{level}\"").split("\n")
+ fp.shift # remove Title
+ @fingerprints[type] = fp
+ else # standard fingerprints
+ fp = OpenBabel::OBFingerprint.find_fingerprint(type)
+ obmol = OpenBabel::OBMol.new
+ obconversion = OpenBabel::OBConversion.new
+ obconversion.set_in_format "smi"
+ obconversion.read_string obmol, @smiles
+ result = OpenBabel::VectorUnsignedInt.new
+ fp.get_fingerprint(obmol,result)
+ # TODO: %ignore *::DescribeBits @ line 163 openbabel/scripts/openbabel-ruby.i
+ #p OpenBabel::OBFingerprint.describe_bits(result)
+ # convert result to a list of the bits that are set
+ # from openbabel/scripts/python/pybel.py line 830
+ # see also http://openbabel.org/docs/dev/UseTheLibrary/Python_Pybel.html#fingerprints
+ result = result.to_a
+ bitsperint = OpenBabel::OBFingerprint.getbitsperint()
+ bits_set = []
+ start = 1
+ result.each do |x|
+ i = start
+ while x > 0 do
+ bits_set << i if (x % 2) == 1
+ x >>= 1
+ i += 1
end
- fingerprints[type] = bits_set
+ start += bitsperint
end
- save
+ @fingerprints[type] = bits_set
end
- fingerprints[type]
end
+ @fingerprints[type]
+ end
- # Calculate physchem properties
- # @param [Array<Hash>] list of descriptors
- # @return [Array<Float>]
- def calculate_properties descriptors=PhysChem::OPENBABEL
- calculated_ids = properties.keys
- # BSON::ObjectId instances are not allowed as keys in a BSON document.
- new_ids = descriptors.collect{|d| d.id.to_s} - calculated_ids
- descs = {}
- algos = {}
- new_ids.each do |id|
- descriptor = PhysChem.find id
- descs[[descriptor.library, descriptor.descriptor]] = descriptor
- algos[descriptor.name] = descriptor
- end
- # avoid recalculating Cdk features with multiple values
- descs.keys.uniq.each do |k|
- descs[k].send(k[0].downcase,k[1],self).each do |n,v|
- properties[algos[n].id.to_s] = v # BSON::ObjectId instances are not allowed as keys in a BSON document.
- end
+=begin
+ # Calculate physchem properties
+ # @param [Array<Hash>] list of descriptors
+ # @return [Array<Float>]
+ def calculate_properties descriptors=PhysChem::OPENBABEL
+ calculated_ids = properties.keys
+ # BSON::ObjectId instances are not allowed as keys in a BSON document.
+ new_ids = descriptors.collect{|d| d.id.to_s} - calculated_ids
+ descs = {}
+ algos = {}
+ new_ids.each do |id|
+ descriptor = PhysChem.find id
+ descs[[descriptor.library, descriptor.descriptor]] = descriptor
+ algos[descriptor.name] = descriptor
+ end
+ # avoid recalculating Cdk features with multiple values
+ descs.keys.uniq.each do |k|
+ descs[k].send(k[0].downcase,k[1],self).each do |n,v|
+ properties[algos[n].id.to_s] = v # BSON::ObjectId instances are not allowed as keys in a BSON document.
end
- save
- descriptors.collect{|d| properties[d.id.to_s]}
end
-
- # Match a SMARTS substructure
- # @param [String] smarts
- # @param [TrueClass,FalseClass] count matches or return true/false
- # @return [TrueClass,FalseClass,Fixnum]
- def smarts_match smarts, count=false
- obconversion = OpenBabel::OBConversion.new
- obmol = OpenBabel::OBMol.new
- obconversion.set_in_format('smi')
- obconversion.read_string(obmol,self.smiles)
- smarts_pattern = OpenBabel::OBSmartsPattern.new
- smarts.collect do |sma|
- smarts_pattern.init(sma.smarts)
- if smarts_pattern.match(obmol)
- count ? value = smarts_pattern.get_map_list.to_a.size : value = 1
- else
- value = 0
- end
- value
+ save
+ descriptors.collect{|d| properties[d.id.to_s]}
+ end
+=end
+
+ # Match a SMARTS substructure
+ # @param [String] smarts
+ # @param [TrueClass,FalseClass] count matches or return true/false
+ # @return [TrueClass,FalseClass,Fixnum]
+ def smarts_match smarts, count=false
+ obconversion = OpenBabel::OBConversion.new
+ obmol = OpenBabel::OBMol.new
+ obconversion.set_in_format('smi')
+ obconversion.read_string(obmol,@smiles)
+ smarts_pattern = OpenBabel::OBSmartsPattern.new
+ smarts.collect do |sma|
+ smarts_pattern.init(sma.smarts)
+ if smarts_pattern.match(obmol)
+ count ? value = smarts_pattern.get_map_list.to_a.size : value = 1
+ else
+ value = 0
end
+ value
end
+ end
- # Create a compound from smiles string
- # @example
- # compound = OpenTox::Compound.from_smiles("c1ccccc1")
- # @param [String] smiles
- # @return [OpenTox::Compound]
- def self.from_smiles smiles
- return nil if smiles.match(/\s/) # spaces seem to confuse obconversion and may lead to invalid smiles
- smiles = obconversion(smiles,"smi","can") # test if SMILES is correct and return canonical smiles (for compound comparisons)
- smiles.empty? ? nil : Compound.find_or_create_by(:smiles => smiles)
- end
-
- # Create a compound from InChI string
- # @param [String] InChI
- # @return [OpenTox::Compound]
- def self.from_inchi inchi
- smiles = obconversion(inchi,"inchi","can")
- smiles.empty? ? nil : Compound.find_or_create_by(:smiles => smiles)
- end
-
- # Create a compound from SDF
- # @param [String] SDF
- # @return [OpenTox::Compound]
- def self.from_sdf sdf
- # do not store sdf because it might be 2D
- Compound.from_smiles obconversion(sdf,"sdf","can")
- end
-
- # Create a compound from name. Relies on an external service for name lookups.
- # @example
- # compound = OpenTox::Compound.from_name("Benzene")
- # @param [String] name, can be also an InChI/InChiKey, CAS number, etc
- # @return [OpenTox::Compound]
- def self.from_name name
- Compound.from_smiles RestClientWrapper.get(File.join(PUBCHEM_URI,"compound","name",URI.escape(name),"property","CanonicalSMILES","TXT")).chomp
- end
+ # Create a compound from smiles string
+ # @example
+ # compound = Lazar::Compound.from_smiles("c1ccccc1")
+ # @param [String] smiles
+ # @return [Lazar::Compound]
+ def self.from_smiles smiles
+ return nil if smiles.match(/\s/) # spaces seem to confuse obconversion and may lead to invalid smiles
+ @smiles = obconversion(smiles,"smi","can") # test if SMILES is correct and return canonical smiles (for compound comparisons)
+ @smiles.empty? ? nil : @smiles
+ end
- # Get InChI
- # @return [String]
- def inchi
- unless self["inchi"]
- result = obconversion(smiles,"smi","inchi")
- update(:inchi => result.chomp) if result and !result.empty?
- end
- self["inchi"]
- end
+ # Create a compound from InChI string
+ # @param [String] InChI
+ # @return [OpenTox::Compound]
+ def self.from_inchi inchi
+ @smiles = obconversion(inchi,"inchi","can")
+ @smiles.empty? ? nil : @smiles
+ end
- # Get InChIKey
- # @return [String]
- def inchikey
- update(:inchikey => obconversion(smiles,"smi","inchikey")) unless self["inchikey"]
- self["inchikey"]
- end
+ # Create a compound from SDF
+ # @param [String] SDF
+ # @return [OpenTox::Compound]
+ def self.from_sdf sdf
+ # do not store sdf because it might be 2D
+ Compound.from_smiles obconversion(sdf,"sdf","can")
+ end
- # Get (canonical) smiles
- # @return [String]
- def smiles
- update(:smiles => obconversion(self["smiles"],"smi","can")) unless self["smiles"]
- self["smiles"]
- end
+ # Create a compound from name. Relies on an external service for name lookups.
+ # @example
+ # compound = OpenTox::Compound.from_name("Benzene")
+ # @param [String] name, can be also an InChI/InChiKey, CAS number, etc
+ # @return [OpenTox::Compound]
+ def self.from_name name
+ Compound.from_smiles RestClientWrapper.get(File.join(PUBCHEM_URI,"compound","name",URI.escape(name),"property","CanonicalSMILES","TXT")).chomp
+ end
- # Get SDF
- # @return [String]
- def sdf
- if self.sdf_id.nil?
- sdf = obconversion(smiles,"smi","sdf")
- file = Mongo::Grid::File.new(sdf, :filename => "#{id}.sdf",:content_type => "chemical/x-mdl-sdfile")
- sdf_id = $gridfs.insert_one file
- update :sdf_id => sdf_id
- end
- $gridfs.find_one(_id: self.sdf_id).data
- end
+ # Get InChI
+ # @return [String]
+ def inchi
+ obconversion(@smiles,"smi","inchi")
+ end
- # Get SVG image
- # @return [image/svg] Image data
- def svg
- if self.svg_id.nil?
- svg = obconversion(smiles,"smi","svg")
- file = Mongo::Grid::File.new(svg, :filename => "#{id}.svg", :content_type => "image/svg")
- update(:svg_id => $gridfs.insert_one(file))
- end
- $gridfs.find_one(_id: self.svg_id).data
- end
+ # Get InChIKey
+ # @return [String]
+ def inchikey
+ obconversion(@smiles,"smi","inchikey")
+ end
- # Get png image
- # @example
- # image = compound.png
- # @return [image/png] Image data
- def png
- if self.png_id.nil?
- png = obconversion(smiles,"smi","_png2")
- file = Mongo::Grid::File.new(Base64.encode64(png), :filename => "#{id}.png", :content_type => "image/png")
- update(:png_id => $gridfs.insert_one(file))
- end
- Base64.decode64($gridfs.find_one(_id: self.png_id).data)
- end
+ # Get SDF
+ # @return [String]
+ def sdf
+ obconversion(smiles,"smi","sdf")
+ end
- # Get all known compound names. Relies on an external service for name lookups.
- # @example
- # names = compound.names
- # @return [Array<String>]
- def names
- update(:names => RestClientWrapper.get(File.join(PUBCHEM_URI,"compound","smiles",URI.escape(smiles),"synonyms","TXT")).split("\n")) #unless self["names"]
- self["names"]
- end
+ # Get SVG image
+ # @return [image/svg] Image data
+ def svg
+ obconversion(smiles,"smi","svg")
+ end
- # Get PubChem Compound Identifier (CID), obtained via REST call to PubChem
- # @return [String]
- def cid
- update(:cid => RestClientWrapper.post(File.join(PUBCHEM_URI, "compound", "inchi", "cids", "TXT"),{:inchi => inchi}).strip) unless self["cid"]
- self["cid"]
- end
-
- # Convert mmol to mg
- # @return [Float] value in mg
- def mmol_to_mg mmol
- mmol.to_f*molecular_weight
- end
+ # Get png image
+ # @example
+ # image = compound.png
+ # @return [image/png] Image data
+ def png
+ obconversion(smiles,"smi","_png2")
+ end
- # Convert mg to mmol
- # @return [Float] value in mmol
- def mg_to_mmol mg
- mg.to_f/molecular_weight
- end
-
- # Calculate molecular weight of Compound with OB and store it in compound object
- # @return [Float] molecular weight
- def molecular_weight
- mw_feature = PhysChem.find_or_create_by(:name => "Openbabel.MW")
- calculate_properties([mw_feature]).first
- end
+ # Get all known compound names. Relies on an external service for name lookups.
+ # @example
+ # names = compound.names
+ # @return [Array<String>]
+ def names
+ RestClientWrapper.get(File.join(PUBCHEM_URI,"compound","smiles",URI.escape(smiles),"synonyms","TXT")).split("\n")
+ end
- private
+ # Get PubChem Compound Identifier (CID), obtained via REST call to PubChem
+ # @return [String]
+ def cid
+ RestClientWrapper.post(File.join(PUBCHEM_URI, "compound", "inchi", "cids", "TXT"),{:inchi => inchi}).strip
+ end
+
+ # Convert mmol to mg
+ # @return [Float] value in mg
+ def mmol_to_mg mmol
+ mmol.to_f*molecular_weight
+ end
- def self.obconversion(identifier,input_format,output_format,option=nil)
- obconversion = OpenBabel::OBConversion.new
- obconversion.set_options(option, OpenBabel::OBConversion::OUTOPTIONS) if option
- obmol = OpenBabel::OBMol.new
- obconversion.set_in_and_out_formats input_format, output_format
- return nil if identifier.nil?
- obconversion.read_string obmol, identifier
- case output_format
- when /smi|can|inchi/
- obconversion.write_string(obmol).split(/\s/).first
- when /sdf/
- # TODO: find disconnected structures
- # strip_salts
- # separate
- obmol.add_hydrogens
- builder = OpenBabel::OBBuilder.new
- builder.build(obmol)
+ # Convert mg to mmol
+ # @return [Float] value in mmol
+ def mg_to_mmol mg
+ mg.to_f/molecular_weight
+ end
+
+ # Calculate molecular weight of Compound with OB and store it in compound object
+ # @return [Float] molecular weight
+ def molecular_weight
+ mw_feature = PhysChem.find_or_create_by(:name => "Openbabel.MW")
+ calculate_properties([mw_feature]).first
+ end
- sdf = obconversion.write_string(obmol)
+ def self.obconversion(identifier,input_format,output_format,option=nil)
+ obconversion = OpenBabel::OBConversion.new
+ obconversion.set_options(option, OpenBabel::OBConversion::OUTOPTIONS) if option
+ obmol = OpenBabel::OBMol.new
+ obconversion.set_in_and_out_formats input_format, output_format
+ return nil if identifier.nil?
+ obconversion.read_string obmol, identifier
+ case output_format
+ when /smi|can|inchi/
+ obconversion.write_string(obmol).split(/\s/).first
+ when /sdf/
+ # TODO: find disconnected structures
+ # strip_salts
+ # separate
+ obmol.add_hydrogens
+ builder = OpenBabel::OBBuilder.new
+ builder.build(obmol)
+
+ sdf = obconversion.write_string(obmol)
print sdf
+ if sdf.match(/.nan/)
+
+ #warn "3D generation failed for compound #{identifier}, trying to calculate 2D structure"
+ obconversion.set_options("gen2D", OpenBabel::OBConversion::GENOPTIONS)
+ sdf = obconversion.write_string(obmol)
if sdf.match(/.nan/)
-
- #warn "3D generation failed for compound #{identifier}, trying to calculate 2D structure"
- obconversion.set_options("gen2D", OpenBabel::OBConversion::GENOPTIONS)
+ #warn "2D generation failed for compound #{identifier}, rendering without coordinates."
+ obconversion.remove_option("gen2D", OpenBabel::OBConversion::GENOPTIONS)
sdf = obconversion.write_string(obmol)
- if sdf.match(/.nan/)
- #warn "2D generation failed for compound #{identifier}, rendering without coordinates."
- obconversion.remove_option("gen2D", OpenBabel::OBConversion::GENOPTIONS)
- sdf = obconversion.write_string(obmol)
- end
end
- sdf
- else
- obconversion.write_string(obmol)
end
+ sdf
+ else
+ obconversion.write_string(obmol)
end
+ end
- def obconversion(identifier,input_format,output_format,option=nil)
- self.class.obconversion(identifier,input_format,output_format,option)
- end
+ def obconversion(identifier,input_format,output_format,option=nil)
+ self.class.obconversion(identifier,input_format,output_format,option)
end
+
end