From d0c6234fed7d45227fcf9309cb6dc0854d17e647 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sun, 28 Feb 2016 16:00:15 +0100 Subject: physchem calculation and storage in compouds --- lib/compound.rb | 30 +++++++++++++++++++----------- lib/physchem.rb | 31 ++++++++++++++----------------- lib/unique_descriptors.rb | 2 +- 3 files changed, 34 insertions(+), 29 deletions(-) (limited to 'lib') diff --git a/lib/compound.rb b/lib/compound.rb index 4ea4db4..8c11831 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -17,7 +17,6 @@ module OpenTox field :smiles, type: String field :inchikey, type: String field :names, type: Array - #field :warnings, type: Array, default: [] field :cid, type: String field :chemblid, type: String field :png_id, type: BSON::ObjectId @@ -88,17 +87,26 @@ module OpenTox fingerprints[type] end - def physchem descriptor_ids - calculated_descriptor_ids = self[:physchem_descriptors].keys - p names - new = UNIQUEDESCRIPTORS-names - p new - d = self.physchem(self, new) - #p d - #self[:physchem_descriptors].merge! d - self.update_attribute(:physchem_descriptors, self[:physchem_descriptors].merge(d)) + def physchem descriptors=PhysChem.openbabel_descriptors + # TODO: speedup java descriptors + calculated_ids = physchem_descriptors.keys + # BSON::ObjectId instances are not allowed as keys in a BSON document. + new_ids = descriptors.collect{|d| d.id.to_s} - calculated_ids + descs = {} + algos = {} + new_ids.each do |id| + descriptor = PhysChem.find id + descs[[descriptor.library, descriptor.descriptor]] = descriptor + algos[descriptor.name] = descriptor + end + # avoid recalculating Cdk features with multiple values + descs.keys.uniq.each do |k| + descs[k].send(k[0].downcase,k[1],self).each do |n,v| + physchem_descriptors[algos[n].id.to_s] = v # BSON::ObjectId instances are not allowed as keys in a BSON document. + end + end save - self[:physchem_descriptors] + physchem_descriptors end # Create a compound from smiles string diff --git a/lib/physchem.rb b/lib/physchem.rb index 1126e69..64018ad 100644 --- a/lib/physchem.rb +++ b/lib/physchem.rb @@ -37,10 +37,12 @@ module OpenTox DESCRIPTORS = OBDESCRIPTORS.merge(CDKDESCRIPTORS.merge(JOELIBDESCRIPTORS)) + require_relative "unique_descriptors.rb" - def self.descriptors - DESCRIPTORS.collect do |name,description| + def self.descriptors desc=DESCRIPTORS + # TODO create PhysChem features @startup + desc.collect do |name,description| lib,desc = name.split('.',2) self.find_or_create_by(:name => name, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true, :numeric => true, :nominal => false) end @@ -64,25 +66,20 @@ module OpenTox udesc end - # Description of available descriptors - def self.description descriptor - lib = descriptor.split('_').first - case lib - when "Openbabel" - OBDESCRIPTORS[descriptor] - when "Cdk" - name = descriptor.split('_')[0..-2].join('_') - CDKDESCRIPTORS[name] - when "Joelib" - JOELIBDESCRIPTORS[descriptor] - when "lookup" - "Read feature values from a dataset" - end + def self.openbabel_descriptors + descriptors OBDESCRIPTORS + end + + def self.cdk_descriptors + descriptors CDKDESCRIPTORS + end + + def self.joelib_descriptors + descriptors JOELIBDESCRIPTORS end def calculate compound result = send library.downcase,descriptor,compound - p result result[self.name] end diff --git a/lib/unique_descriptors.rb b/lib/unique_descriptors.rb index 03a9b08..8341a67 100644 --- a/lib/unique_descriptors.rb +++ b/lib/unique_descriptors.rb @@ -24,7 +24,7 @@ UNIQUEDESCRIPTORS = [ "Cdk.ALOGP", #Calculates atom additive logP and molar refractivity values as described by Ghose and Crippen and "Cdk.APol", #Descriptor that calculates the sum of the atomic polarizabilities (including implicit hydrogens). "Cdk.AcidicGroupCount", #Returns the number of acidic groups. - "Cdk.AminoAcidCount", #Returns the number of amino acids found in the system + #"Cdk.AminoAcidCount", #Returns the number of amino acids found in the system #"Cdk.AromaticAtomsCount", #Descriptor based on the number of aromatic atoms of a molecule. #"Cdk.AromaticBondsCount", #Descriptor based on the number of aromatic bonds of a molecule. #"Cdk.AtomCount", #Descriptor based on the number of atoms of a certain element type. -- cgit v1.2.3