summaryrefslogtreecommitdiff
path: root/lib/compound.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/compound.rb')
-rw-r--r--lib/compound.rb80
1 files changed, 11 insertions, 69 deletions
diff --git a/lib/compound.rb b/lib/compound.rb
index bfe69e3..6d0e075 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -1,5 +1,3 @@
-CACTUS_URI="https://cactus.nci.nih.gov/chemical/structure/"
-
module OpenTox
# Small molecules with defined chemical structures
@@ -12,7 +10,6 @@ module OpenTox
field :inchikey, type: String
field :names, type: Array
field :cid, type: String
- field :chemblid, type: String
field :png_id, type: BSON::ObjectId
field :svg_id, type: BSON::ObjectId
field :sdf_id, type: BSON::ObjectId
@@ -35,13 +32,11 @@ module OpenTox
def fingerprint type=DEFAULT_FINGERPRINT
unless fingerprints[type]
return [] unless self.smiles
- #http://openbabel.org/docs/dev/FileFormats/MolPrint2D_format.html#molprint2d-format
- if type == "MP2D"
+ if type == "MP2D" # http://openbabel.org/docs/dev/FileFormats/MolPrint2D_format.html#molprint2d-format
fp = obconversion(smiles,"smi","mpd").strip.split("\t")
name = fp.shift # remove Title
fingerprints[type] = fp.uniq # no fingerprint counts
- #http://openbabel.org/docs/dev/FileFormats/Multilevel_Neighborhoods_of_Atoms_(MNA).html
- elsif type== "MNA"
+ elsif type== "MNA" # http://openbabel.org/docs/dev/FileFormats/Multilevel_Neighborhoods_of_Atoms_(MNA).html
level = 2 # TODO: level as parameter, evaluate level 1, see paper
fp = obconversion(smiles,"smi","mna","xL\"#{level}\"").split("\n")
fp.shift # remove Title
@@ -130,30 +125,17 @@ module OpenTox
# @param [String] smiles
# @return [OpenTox::Compound]
def self.from_smiles smiles
- if smiles.match(/\s/) # spaces seem to confuse obconversion and may lead to invalid smiles
- $logger.warn "SMILES parsing failed for '#{smiles}'', SMILES string contains whitespaces."
- return nil
- end
+ return nil if smiles.match(/\s/) # spaces seem to confuse obconversion and may lead to invalid smiles
smiles = obconversion(smiles,"smi","can") # test if SMILES is correct and return canonical smiles (for compound comparisons)
- if smiles.empty?
- $logger.warn "SMILES parsing failed for '#{smiles}'', this may be caused by an incorrect SMILES string."
- return nil
- else
- Compound.find_or_create_by :smiles => smiles
- end
+ smiles.empty? ? nil : Compound.find_or_create_by(:smiles => smiles)
end
# Create a compound from InChI string
# @param [String] InChI
# @return [OpenTox::Compound]
def self.from_inchi inchi
- #smiles = `echo "#{inchi}" | "#{File.join(File.dirname(__FILE__),"..","openbabel","bin","babel")}" -iinchi - -ocan`.chomp.strip
smiles = obconversion(inchi,"inchi","can")
- if smiles.empty?
- Compound.find_or_create_by(:warnings => ["InChi parsing failed for #{inchi}, this may be caused by an incorrect InChi string or a bug in OpenBabel libraries."])
- else
- Compound.find_or_create_by(:smiles => smiles, :inchi => inchi)
- end
+ smiles.empty? ? nil : Compound.find_or_create_by(:smiles => smiles)
end
# Create a compound from SDF
@@ -170,7 +152,7 @@ module OpenTox
# @param [String] name, can be also an InChI/InChiKey, CAS number, etc
# @return [OpenTox::Compound]
def self.from_name name
- Compound.from_smiles RestClientWrapper.get(File.join(CACTUS_URI,URI.escape(name),"smiles"))
+ Compound.from_smiles RestClientWrapper.get(File.join(PUBCHEM_URI,"compound","name",URI.escape(name),"property","CanonicalSMILES","TXT")).chomp
end
# Get InChI
@@ -238,56 +220,16 @@ module OpenTox
# names = compound.names
# @return [Array<String>]
def names
- update(:names => RestClientWrapper.get("#{CACTUS_URI}#{inchi}/names").split("\n")) unless self["names"]
+ update(:names => RestClientWrapper.get(File.join(PUBCHEM_URI,"compound","smiles",URI.escape(smiles),"synonyms","TXT")).split("\n")) #unless self["names"]
self["names"]
end
# Get PubChem Compound Identifier (CID), obtained via REST call to PubChem
# @return [String]
def cid
- pug_uri = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/"
- update(:cid => RestClientWrapper.post(File.join(pug_uri, "compound", "inchi", "cids", "TXT"),{:inchi => inchi}).strip) unless self["cid"]
+ update(:cid => RestClientWrapper.post(File.join(PUBCHEM_URI, "compound", "inchi", "cids", "TXT"),{:inchi => inchi}).strip) unless self["cid"]
self["cid"]
end
-
- # Get ChEMBL database compound id, obtained via REST call to ChEMBL
- # @return [String]
- def chemblid
- # https://www.ebi.ac.uk/chembldb/ws#individualCompoundByInChiKey
- uri = "https://www.ebi.ac.uk/chemblws/compounds/smiles/#{smiles}.json"
- update(:chemblid => JSON.parse(RestClientWrapper.get(uri))["compounds"].first["chemblId"]) unless self["chemblid"]
- self["chemblid"]
- end
-
- def db_neighbors min_sim: 0.1, dataset_id:
- #p fingerprints[DEFAULT_FINGERPRINT]
- # from http://blog.matt-swain.com/post/87093745652/chemical-similarity-search-in-mongodb
-
- #qn = default_fingerprint_size
- #qmin = qn * threshold
- #qmax = qn / threshold
- #not sure if it is worth the effort of keeping feature counts up to date (compound deletions, additions, ...)
- #reqbits = [count['_id'] for count in db.mfp_counts.find({'_id': {'$in': qfp}}).sort('count', 1).limit(qn - qmin + 1)]
- aggregate = [
- #{'$match': {'mfp.count': {'$gte': qmin, '$lte': qmax}, 'mfp.bits': {'$in': reqbits}}},
- #{'$match' => {'_id' => {'$ne' => self.id}}}, # remove self
- {'$project' => {
- 'similarity' => {'$let' => {
- 'vars' => {'common' => {'$size' => {'$setIntersection' => ["$fingerprints.#{DEFAULT_FINGERPRINT}", fingerprints[DEFAULT_FINGERPRINT]]}}},
- 'in' => {'$divide' => ['$$common', {'$subtract' => [{'$add' => [default_fingerprint_size, '$default_fingerprint_size']}, '$$common']}]}
- }},
- '_id' => 1,
- #'measurements' => 1,
- 'dataset_ids' => 1
- }},
- {'$match' => {'similarity' => {'$gte' => min_sim}}},
- {'$sort' => {'similarity' => -1}}
- ]
-
- # TODO move into aggregate pipeline, see http://stackoverflow.com/questions/30537317/mongodb-aggregation-match-if-value-in-array
- $mongo["substances"].aggregate(aggregate).select{|r| r["dataset_ids"].include? dataset_id}
-
- end
# Convert mmol to mg
# @return [Float] value in mg
@@ -319,7 +261,7 @@ module OpenTox
obconversion.read_string obmol, identifier
case output_format
when /smi|can|inchi/
- obconversion.write_string(obmol).gsub(/\s/,'').chomp
+ obconversion.write_string(obmol).split(/\s/).first
when /sdf/
# TODO: find disconnected structures
# strip_salts
@@ -332,11 +274,11 @@ module OpenTox
print sdf
if sdf.match(/.nan/)
- $logger.warn "3D generation failed for compound #{identifier}, trying to calculate 2D structure"
+ #warn "3D generation failed for compound #{identifier}, trying to calculate 2D structure"
obconversion.set_options("gen2D", OpenBabel::OBConversion::GENOPTIONS)
sdf = obconversion.write_string(obmol)
if sdf.match(/.nan/)
- $logger.warn "2D generation failed for compound #{identifier}, rendering without coordinates."
+ #warn "2D generation failed for compound #{identifier}, rendering without coordinates."
obconversion.remove_option("gen2D", OpenBabel::OBConversion::GENOPTIONS)
sdf = obconversion.write_string(obmol)
end