From ef76c077fd39d31fc795b842c32575f6afb9fdb2 Mon Sep 17 00:00:00 2001
From: Christoph Helma <helma@in-silico.ch>
Date: Sun, 9 Aug 2015 13:42:54 +0200
Subject: customized prediction algorithms implemented

---
 lib/compound.rb       | 59 ++++++++++++++++++++++++++-------------------------
 lib/dataset.rb        |  3 +--
 lib/opentox-client.rb | 11 +++++-----
 3 files changed, 36 insertions(+), 37 deletions(-)

diff --git a/lib/compound.rb b/lib/compound.rb
index 4d292f1..4e29938 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -11,32 +11,31 @@ module OpenTox
   class Compound
     include OpenTox
 
-# OpenBabel FP4 fingerprints
-# OpenBabel http://open-babel.readthedocs.org/en/latest/Fingerprints/intro.html
-# TODO store in DB
-fp4 = FingerprintSmarts.find
-unless fp4
-  fp4 = []
-  File.open(File.join(File.dirname(__FILE__),"SMARTS_InteLigand.txt")).each do |l| 
-    l.strip!
-    unless l.empty? or l.match /^#/
-      name,smarts = l.split(': ')
-      fp4 << OpenTox::FingerprintSmarts.find_or_create_by(:name => name, :smarts => smarts) unless smarts.nil?
+    # OpenBabel FP4 fingerprints
+    # OpenBabel http://open-babel.readthedocs.org/en/latest/Fingerprints/intro.html
+    fp4 = FingerprintSmarts.all
+    unless fp4
+      fp4 = []
+      File.open(File.join(File.dirname(__FILE__),"SMARTS_InteLigand.txt")).each do |l| 
+        l.strip!
+        unless l.empty? or l.match /^#/
+          name,smarts = l.split(': ')
+          fp4 << OpenTox::FingerprintSmarts.find_or_create_by(:name => name, :smarts => smarts) unless smarts.nil?
+        end
+      end
     end
-  end
-end
-FP4 = fp4
-
-# TODO investigate other types of fingerprints (MACCS)
-# OpenBabel http://open-babel.readthedocs.org/en/latest/Fingerprints/intro.html
-# http://www.dalkescientific.com/writings/diary/archive/2008/06/26/fingerprint_background.html
-# OpenBabel MNA http://openbabel.org/docs/dev/FileFormats/Multilevel_Neighborhoods_of_Atoms_(MNA).html#multilevel-neighborhoods-of-atoms-mna
-# Morgan ECFP, FCFP
-# http://cdk.github.io/cdk/1.5/docs/api/org/openscience/cdk/fingerprint/CircularFingerprinter.html
-# http://www.rdkit.org/docs/GettingStartedInPython.html
-# Chemfp
-# https://chemfp.readthedocs.org/en/latest/using-tools.html
-# CACTVS/PubChem
+    FP4 = fp4
+
+    # TODO investigate other types of fingerprints (MACCS)
+    # OpenBabel http://open-babel.readthedocs.org/en/latest/Fingerprints/intro.html
+    # http://www.dalkescientific.com/writings/diary/archive/2008/06/26/fingerprint_background.html
+    # OpenBabel MNA http://openbabel.org/docs/dev/FileFormats/Multilevel_Neighborhoods_of_Atoms_(MNA).html#multilevel-neighborhoods-of-atoms-mna
+    # Morgan ECFP, FCFP
+    # http://cdk.github.io/cdk/1.5/docs/api/org/openscience/cdk/fingerprint/CircularFingerprinter.html
+    # http://www.rdkit.org/docs/GettingStartedInPython.html
+    # Chemfp
+    # https://chemfp.readthedocs.org/en/latest/using-tools.html
+    # CACTVS/PubChem
 
     field :inchi, type: String
     attr_readonly :inchi
@@ -170,15 +169,16 @@ FP4 = fp4
       self["chemblid"]
     end
 
-    def neighbors threshold=0.3
+    def neighbors threshold=0.7
       # from http://blog.matt-swain.com/post/87093745652/chemical-similarity-search-in-mongodb
       qn = fp4.size
-      qmin = qn * threshold
-      qmax = qn / threshold
+      #qmin = qn * threshold
+      #qmax = qn / threshold
       #not sure if it is worth the effort of keeping feature counts up to date (compound deletions, additions, ...)
       #reqbits = [count['_id'] for count in db.mfp_counts.find({'_id': {'$in': qfp}}).sort('count', 1).limit(qn - qmin + 1)]
       aggregate = [
         #{'$match': {'mfp.count': {'$gte': qmin, '$lte': qmax}, 'mfp.bits': {'$in': reqbits}}},
+        {'$match':  {'_id': {'$ne': self.id}}}, # remove self
         {'$project': {
           'tanimoto': {'$let': {
             'vars': {'common': {'$size': {'$setIntersection': ['$fp4', fp4]}}},
@@ -190,7 +190,8 @@ FP4 = fp4
         {'$sort': {'tanimoto': -1}}
       ]
       
-      $mongo["compounds"].aggregate(aggregate).collect { |r| [Compound.find(r["_id"]), r["tanimoto"]]}
+      $mongo["compounds"].aggregate(aggregate).collect{ |r| [r["_id"], r["tanimoto"]] }
+        
     end
 
     private
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 0447bb0..509e897 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -123,8 +123,7 @@ module OpenTox
     # @param feature [OpenTox::Feature] OpenTox Feature object
     # @return [Array] Data entry values
     def values(compound, feature)
-      #data_entries.where(:compound_id => compound.id, :feature_id => feature.id).distinct(:value)
-      rows = (0 ... compound_ids.length).select { |r| compound_ids[r] == compound.id }
+      rows = compound_ids.each_index.select{|r| compound_ids[r] == compound.id }
       col = feature_ids.index feature.id
       rows.collect{|row| data_entries[row][col]}
     end
diff --git a/lib/opentox-client.rb b/lib/opentox-client.rb
index 092b84e..e1e27c9 100644
--- a/lib/opentox-client.rb
+++ b/lib/opentox-client.rb
@@ -8,16 +8,16 @@ require 'mongoid'
 require 'rserve'
 
 # TODO store development/test, validation, production in separate databases
-ENV["MONGOID_ENV"] = "development"
+ENV["MONGOID_ENV"] ||= "development"
 Mongoid.load!("#{ENV['HOME']}/.opentox/config/mongoid.yml")
 R = Rserve::Connection.new
 
-CLASSES = ["Feature","Compound",  "Dataset"]#, "Validation", "Task", "Investigation"]
+CLASSES = ["Feature","Compound",  "Dataset", "Validation", "CrossValidation"]#, "Task", "Investigation"]
 #CLASSES = ["Feature", "Dataset", "Validation", "Task", "Investigation"]
 
 # Regular expressions for parsing classification data
-TRUE_REGEXP = /^(true|active|1|1.0|tox|activating|carcinogen|mutagenic)$/i
-FALSE_REGEXP = /^(false|inactive|0|0.0|low tox|deactivating|non-carcinogen|non-mutagenic)$/i
+#TRUE_REGEXP = /^(true|active|1|1.0|tox|activating|carcinogen|mutagenic)$/i
+#FALSE_REGEXP = /^(false|inactive|0|0.0|low tox|deactivating|non-carcinogen|non-mutagenic)$/i
 
 [
   "overwrite.rb",
@@ -49,5 +49,4 @@ $logger.level = Logger::DEBUG
 Mongo::Logger.level = Logger::WARN 
 $mongo = Mongo::Client.new('mongodb://127.0.0.1:27017/opentox')
 $gridfs = $mongo.database.fs
-Mongoid.logger.level = Logger::WARN
-Mongoid.logger = $logger
+#Mongoid.logger = $logger
-- 
cgit v1.2.3