summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-02-28 16:00:15 +0100
committerChristoph Helma <helma@in-silico.ch>2016-02-28 16:00:15 +0100
commitd0c6234fed7d45227fcf9309cb6dc0854d17e647 (patch)
tree43f2327abbdbcb1688c2605308966e62f88c907e /lib
parent8c973e16028cb95c978bb08cf79369a5c3520c31 (diff)
physchem calculation and storage in compouds
Diffstat (limited to 'lib')
-rw-r--r--lib/compound.rb30
-rw-r--r--lib/physchem.rb31
-rw-r--r--lib/unique_descriptors.rb2
3 files changed, 34 insertions, 29 deletions
diff --git a/lib/compound.rb b/lib/compound.rb
index 4ea4db4..8c11831 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -17,7 +17,6 @@ module OpenTox
field :smiles, type: String
field :inchikey, type: String
field :names, type: Array
- #field :warnings, type: Array, default: []
field :cid, type: String
field :chemblid, type: String
field :png_id, type: BSON::ObjectId
@@ -88,17 +87,26 @@ module OpenTox
fingerprints[type]
end
- def physchem descriptor_ids
- calculated_descriptor_ids = self[:physchem_descriptors].keys
- p names
- new = UNIQUEDESCRIPTORS-names
- p new
- d = self.physchem(self, new)
- #p d
- #self[:physchem_descriptors].merge! d
- self.update_attribute(:physchem_descriptors, self[:physchem_descriptors].merge(d))
+ def physchem descriptors=PhysChem.openbabel_descriptors
+ # TODO: speedup java descriptors
+ calculated_ids = physchem_descriptors.keys
+ # BSON::ObjectId instances are not allowed as keys in a BSON document.
+ new_ids = descriptors.collect{|d| d.id.to_s} - calculated_ids
+ descs = {}
+ algos = {}
+ new_ids.each do |id|
+ descriptor = PhysChem.find id
+ descs[[descriptor.library, descriptor.descriptor]] = descriptor
+ algos[descriptor.name] = descriptor
+ end
+ # avoid recalculating Cdk features with multiple values
+ descs.keys.uniq.each do |k|
+ descs[k].send(k[0].downcase,k[1],self).each do |n,v|
+ physchem_descriptors[algos[n].id.to_s] = v # BSON::ObjectId instances are not allowed as keys in a BSON document.
+ end
+ end
save
- self[:physchem_descriptors]
+ physchem_descriptors
end
# Create a compound from smiles string
diff --git a/lib/physchem.rb b/lib/physchem.rb
index 1126e69..64018ad 100644
--- a/lib/physchem.rb
+++ b/lib/physchem.rb
@@ -37,10 +37,12 @@ module OpenTox
DESCRIPTORS = OBDESCRIPTORS.merge(CDKDESCRIPTORS.merge(JOELIBDESCRIPTORS))
+
require_relative "unique_descriptors.rb"
- def self.descriptors
- DESCRIPTORS.collect do |name,description|
+ def self.descriptors desc=DESCRIPTORS
+ # TODO create PhysChem features @startup
+ desc.collect do |name,description|
lib,desc = name.split('.',2)
self.find_or_create_by(:name => name, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true, :numeric => true, :nominal => false)
end
@@ -64,25 +66,20 @@ module OpenTox
udesc
end
- # Description of available descriptors
- def self.description descriptor
- lib = descriptor.split('_').first
- case lib
- when "Openbabel"
- OBDESCRIPTORS[descriptor]
- when "Cdk"
- name = descriptor.split('_')[0..-2].join('_')
- CDKDESCRIPTORS[name]
- when "Joelib"
- JOELIBDESCRIPTORS[descriptor]
- when "lookup"
- "Read feature values from a dataset"
- end
+ def self.openbabel_descriptors
+ descriptors OBDESCRIPTORS
+ end
+
+ def self.cdk_descriptors
+ descriptors CDKDESCRIPTORS
+ end
+
+ def self.joelib_descriptors
+ descriptors JOELIBDESCRIPTORS
end
def calculate compound
result = send library.downcase,descriptor,compound
- p result
result[self.name]
end
diff --git a/lib/unique_descriptors.rb b/lib/unique_descriptors.rb
index 03a9b08..8341a67 100644
--- a/lib/unique_descriptors.rb
+++ b/lib/unique_descriptors.rb
@@ -24,7 +24,7 @@ UNIQUEDESCRIPTORS = [
"Cdk.ALOGP", #Calculates atom additive logP and molar refractivity values as described by Ghose and Crippen and
"Cdk.APol", #Descriptor that calculates the sum of the atomic polarizabilities (including implicit hydrogens).
"Cdk.AcidicGroupCount", #Returns the number of acidic groups.
- "Cdk.AminoAcidCount", #Returns the number of amino acids found in the system
+ #"Cdk.AminoAcidCount", #Returns the number of amino acids found in the system
#"Cdk.AromaticAtomsCount", #Descriptor based on the number of aromatic atoms of a molecule.
#"Cdk.AromaticBondsCount", #Descriptor based on the number of aromatic bonds of a molecule.
#"Cdk.AtomCount", #Descriptor based on the number of atoms of a certain element type.