From ef76c077fd39d31fc795b842c32575f6afb9fdb2 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sun, 9 Aug 2015 13:42:54 +0200 Subject: customized prediction algorithms implemented --- lib/compound.rb | 59 ++++++++++++++++++++++++++------------------------- lib/dataset.rb | 3 +-- lib/opentox-client.rb | 11 +++++----- 3 files changed, 36 insertions(+), 37 deletions(-) diff --git a/lib/compound.rb b/lib/compound.rb index 4d292f1..4e29938 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -11,32 +11,31 @@ module OpenTox class Compound include OpenTox -# OpenBabel FP4 fingerprints -# OpenBabel http://open-babel.readthedocs.org/en/latest/Fingerprints/intro.html -# TODO store in DB -fp4 = FingerprintSmarts.find -unless fp4 - fp4 = [] - File.open(File.join(File.dirname(__FILE__),"SMARTS_InteLigand.txt")).each do |l| - l.strip! - unless l.empty? or l.match /^#/ - name,smarts = l.split(': ') - fp4 << OpenTox::FingerprintSmarts.find_or_create_by(:name => name, :smarts => smarts) unless smarts.nil? + # OpenBabel FP4 fingerprints + # OpenBabel http://open-babel.readthedocs.org/en/latest/Fingerprints/intro.html + fp4 = FingerprintSmarts.all + unless fp4 + fp4 = [] + File.open(File.join(File.dirname(__FILE__),"SMARTS_InteLigand.txt")).each do |l| + l.strip! + unless l.empty? or l.match /^#/ + name,smarts = l.split(': ') + fp4 << OpenTox::FingerprintSmarts.find_or_create_by(:name => name, :smarts => smarts) unless smarts.nil? + end + end end - end -end -FP4 = fp4 - -# TODO investigate other types of fingerprints (MACCS) -# OpenBabel http://open-babel.readthedocs.org/en/latest/Fingerprints/intro.html -# http://www.dalkescientific.com/writings/diary/archive/2008/06/26/fingerprint_background.html -# OpenBabel MNA http://openbabel.org/docs/dev/FileFormats/Multilevel_Neighborhoods_of_Atoms_(MNA).html#multilevel-neighborhoods-of-atoms-mna -# Morgan ECFP, FCFP -# http://cdk.github.io/cdk/1.5/docs/api/org/openscience/cdk/fingerprint/CircularFingerprinter.html -# http://www.rdkit.org/docs/GettingStartedInPython.html -# Chemfp -# https://chemfp.readthedocs.org/en/latest/using-tools.html -# CACTVS/PubChem + FP4 = fp4 + + # TODO investigate other types of fingerprints (MACCS) + # OpenBabel http://open-babel.readthedocs.org/en/latest/Fingerprints/intro.html + # http://www.dalkescientific.com/writings/diary/archive/2008/06/26/fingerprint_background.html + # OpenBabel MNA http://openbabel.org/docs/dev/FileFormats/Multilevel_Neighborhoods_of_Atoms_(MNA).html#multilevel-neighborhoods-of-atoms-mna + # Morgan ECFP, FCFP + # http://cdk.github.io/cdk/1.5/docs/api/org/openscience/cdk/fingerprint/CircularFingerprinter.html + # http://www.rdkit.org/docs/GettingStartedInPython.html + # Chemfp + # https://chemfp.readthedocs.org/en/latest/using-tools.html + # CACTVS/PubChem field :inchi, type: String attr_readonly :inchi @@ -170,15 +169,16 @@ FP4 = fp4 self["chemblid"] end - def neighbors threshold=0.3 + def neighbors threshold=0.7 # from http://blog.matt-swain.com/post/87093745652/chemical-similarity-search-in-mongodb qn = fp4.size - qmin = qn * threshold - qmax = qn / threshold + #qmin = qn * threshold + #qmax = qn / threshold #not sure if it is worth the effort of keeping feature counts up to date (compound deletions, additions, ...) #reqbits = [count['_id'] for count in db.mfp_counts.find({'_id': {'$in': qfp}}).sort('count', 1).limit(qn - qmin + 1)] aggregate = [ #{'$match': {'mfp.count': {'$gte': qmin, '$lte': qmax}, 'mfp.bits': {'$in': reqbits}}}, + {'$match': {'_id': {'$ne': self.id}}}, # remove self {'$project': { 'tanimoto': {'$let': { 'vars': {'common': {'$size': {'$setIntersection': ['$fp4', fp4]}}}, @@ -190,7 +190,8 @@ FP4 = fp4 {'$sort': {'tanimoto': -1}} ] - $mongo["compounds"].aggregate(aggregate).collect { |r| [Compound.find(r["_id"]), r["tanimoto"]]} + $mongo["compounds"].aggregate(aggregate).collect{ |r| [r["_id"], r["tanimoto"]] } + end private diff --git a/lib/dataset.rb b/lib/dataset.rb index 0447bb0..509e897 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -123,8 +123,7 @@ module OpenTox # @param feature [OpenTox::Feature] OpenTox Feature object # @return [Array] Data entry values def values(compound, feature) - #data_entries.where(:compound_id => compound.id, :feature_id => feature.id).distinct(:value) - rows = (0 ... compound_ids.length).select { |r| compound_ids[r] == compound.id } + rows = compound_ids.each_index.select{|r| compound_ids[r] == compound.id } col = feature_ids.index feature.id rows.collect{|row| data_entries[row][col]} end diff --git a/lib/opentox-client.rb b/lib/opentox-client.rb index 092b84e..e1e27c9 100644 --- a/lib/opentox-client.rb +++ b/lib/opentox-client.rb @@ -8,16 +8,16 @@ require 'mongoid' require 'rserve' # TODO store development/test, validation, production in separate databases -ENV["MONGOID_ENV"] = "development" +ENV["MONGOID_ENV"] ||= "development" Mongoid.load!("#{ENV['HOME']}/.opentox/config/mongoid.yml") R = Rserve::Connection.new -CLASSES = ["Feature","Compound", "Dataset"]#, "Validation", "Task", "Investigation"] +CLASSES = ["Feature","Compound", "Dataset", "Validation", "CrossValidation"]#, "Task", "Investigation"] #CLASSES = ["Feature", "Dataset", "Validation", "Task", "Investigation"] # Regular expressions for parsing classification data -TRUE_REGEXP = /^(true|active|1|1.0|tox|activating|carcinogen|mutagenic)$/i -FALSE_REGEXP = /^(false|inactive|0|0.0|low tox|deactivating|non-carcinogen|non-mutagenic)$/i +#TRUE_REGEXP = /^(true|active|1|1.0|tox|activating|carcinogen|mutagenic)$/i +#FALSE_REGEXP = /^(false|inactive|0|0.0|low tox|deactivating|non-carcinogen|non-mutagenic)$/i [ "overwrite.rb", @@ -49,5 +49,4 @@ $logger.level = Logger::DEBUG Mongo::Logger.level = Logger::WARN $mongo = Mongo::Client.new('mongodb://127.0.0.1:27017/opentox') $gridfs = $mongo.database.fs -Mongoid.logger.level = Logger::WARN -Mongoid.logger = $logger +#Mongoid.logger = $logger -- cgit v1.2.3