summaryrefslogtreecommitdiff
path: root/lib/compound.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/compound.rb')
-rw-r--r--lib/compound.rb71
1 files changed, 70 insertions, 1 deletions
diff --git a/lib/compound.rb b/lib/compound.rb
index 0c27553..4d292f1 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -1,7 +1,7 @@
# TODO: check
# *** Open Babel Error in ParseFile
# Could not find contribution data file.
-# 3d creation??
+
CACTUS_URI="http://cactus.nci.nih.gov/chemical/structure/"
require 'openbabel'
require "base64"
@@ -9,6 +9,34 @@ require "base64"
module OpenTox
class Compound
+ include OpenTox
+
+# OpenBabel FP4 fingerprints
+# OpenBabel http://open-babel.readthedocs.org/en/latest/Fingerprints/intro.html
+# TODO store in DB
+fp4 = FingerprintSmarts.find
+unless fp4
+ fp4 = []
+ File.open(File.join(File.dirname(__FILE__),"SMARTS_InteLigand.txt")).each do |l|
+ l.strip!
+ unless l.empty? or l.match /^#/
+ name,smarts = l.split(': ')
+ fp4 << OpenTox::FingerprintSmarts.find_or_create_by(:name => name, :smarts => smarts) unless smarts.nil?
+ end
+ end
+end
+FP4 = fp4
+
+# TODO investigate other types of fingerprints (MACCS)
+# OpenBabel http://open-babel.readthedocs.org/en/latest/Fingerprints/intro.html
+# http://www.dalkescientific.com/writings/diary/archive/2008/06/26/fingerprint_background.html
+# OpenBabel MNA http://openbabel.org/docs/dev/FileFormats/Multilevel_Neighborhoods_of_Atoms_(MNA).html#multilevel-neighborhoods-of-atoms-mna
+# Morgan ECFP, FCFP
+# http://cdk.github.io/cdk/1.5/docs/api/org/openscience/cdk/fingerprint/CircularFingerprinter.html
+# http://www.rdkit.org/docs/GettingStartedInPython.html
+# Chemfp
+# https://chemfp.readthedocs.org/en/latest/using-tools.html
+# CACTVS/PubChem
field :inchi, type: String
attr_readonly :inchi
@@ -19,6 +47,8 @@ module OpenTox
field :chemblid, type: String
field :image_id, type: BSON::ObjectId
field :sdf_id, type: BSON::ObjectId
+ field :fp4, type: Array
+ field :fp4_size, type: Integer
#belongs_to :dataset
#belongs_to :data_entry
@@ -26,6 +56,22 @@ module OpenTox
#self.inchi == compound.inchi
#end
+ def self.find_or_create_by params
+ compound = self.find_or_initialize_by params
+ unless compound.fp4
+ compound.fp4_size = 0
+ compound.fp4 = []
+ Algorithm::Descriptor.smarts_match(compound, FP4.collect{|f| f.smarts}).each_with_index do |m,i|
+ if m > 0
+ compound.fp4 << FP4[i].id
+ compound.fp4_size += 1
+ end
+ end
+ end
+ compound.save
+ compound
+ end
+
# Create a compound from smiles string
# @example
# compound = OpenTox::Compound.from_smiles("c1ccccc1")
@@ -124,6 +170,29 @@ module OpenTox
self["chemblid"]
end
+ def neighbors threshold=0.3
+ # from http://blog.matt-swain.com/post/87093745652/chemical-similarity-search-in-mongodb
+ qn = fp4.size
+ qmin = qn * threshold
+ qmax = qn / threshold
+ #not sure if it is worth the effort of keeping feature counts up to date (compound deletions, additions, ...)
+ #reqbits = [count['_id'] for count in db.mfp_counts.find({'_id': {'$in': qfp}}).sort('count', 1).limit(qn - qmin + 1)]
+ aggregate = [
+ #{'$match': {'mfp.count': {'$gte': qmin, '$lte': qmax}, 'mfp.bits': {'$in': reqbits}}},
+ {'$project': {
+ 'tanimoto': {'$let': {
+ 'vars': {'common': {'$size': {'$setIntersection': ['$fp4', fp4]}}},
+ 'in': {'$divide': ['$$common', {'$subtract': [{'$add': [qn, '$fp4_size']}, '$$common']}]}
+ }},
+ '_id': 1
+ }},
+ {'$match': {'tanimoto': {'$gte': threshold}}},
+ {'$sort': {'tanimoto': -1}}
+ ]
+
+ $mongo["compounds"].aggregate(aggregate).collect { |r| [Compound.find(r["_id"]), r["tanimoto"]]}
+ end
+
private
def self.obconversion(identifier,input_format,output_format,option=nil)