path: root/lib/physchem.rb
diff options
authorChristoph Helma <>2016-02-28 12:43:38 +0100
committerChristoph Helma <>2016-02-28 12:43:38 +0100
commit8c973e16028cb95c978bb08cf79369a5c3520c31 (patch)
tree96909cb936fbbf2c3bc43776278953394b93b94f /lib/physchem.rb
parentb90720cc26d789a96fa6f7a054fe06fc8b4ef33d (diff)
physchem feature class
Diffstat (limited to 'lib/physchem.rb')
1 files changed, 138 insertions, 0 deletions
diff --git a/lib/physchem.rb b/lib/physchem.rb
new file mode 100644
index 0000000..1126e69
--- /dev/null
+++ b/lib/physchem.rb
@@ -0,0 +1,138 @@
+module OpenTox
+ # Feature for physico-chemical descriptors
+ class PhysChem < NumericFeature
+ field :library, type: String
+ field :descriptor, type: String
+ field :description, type: String
+ JAVA_DIR = File.join(File.dirname(__FILE__),"..","java")
+ CDK_JAR = Dir[File.join(JAVA_DIR,"cdk-*jar")].last
+ JOELIB_JAR = File.join(JAVA_DIR,"joelib2.jar")
+ LOG4J_JAR = File.join(JAVA_DIR,"log4j.jar")
+ JMOL_JAR = File.join(JAVA_DIR,"Jmol.jar")
+ obexclude = ["cansmi","cansmiNS","formula","InChI","InChIKey","s","smarts","title","L5"]
+ OBDESCRIPTORS = Hash[OpenBabel::OBDescriptor.list_as_string("descriptors").split("\n").collect do |d|
+ name,description = d.split(/\s+/,2)
+ ["Openbabel."+name,description] unless obexclude.include? name
+ end.compact.sort{|a,b| a[0] <=> b[0]}]
+ cdkdescriptors = {}
+ CDK_DESCRIPTIONS = YAML.load(`java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptorInfo`)
+ CDK_DESCRIPTIONS.each do |d|
+ prefix="Cdk."+d[:java_class].split('.').last.sub(/Descriptor/,'')
+ d[:names].each { |name| cdkdescriptors[prefix+"."+name] = d[:description] }
+ end
+ CDKDESCRIPTORS = cdkdescriptors
+ # exclude Hashcode (not a physchem property) and GlobalTopologicalChargeIndex (Joelib bug)
+ joelibexclude = ["MoleculeHashcode","GlobalTopologicalChargeIndex"]
+ # strip Joelib messages from stdout
+ JOELIBDESCRIPTORS = Hash[YAML.load(`java -classpath #{JOELIB_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptorInfo | sed '0,/---/d'`).collect do |d|
+ name = d[:java_class].sub(/^joelib2.feature.types./,'')
+ ["Joelib."+name, "JOELIb does not provide meaningful descriptions, see java/ for details."] unless joelibexclude.include? name
+ end.compact.sort{|a,b| a[0] <=> b[0]}]
+ require_relative "unique_descriptors.rb"
+ def self.descriptors
+ DESCRIPTORS.collect do |name,description|
+ lib,desc = name.split('.',2)
+ self.find_or_create_by(:name => name, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true, :numeric => true, :nominal => false)
+ end
+ end
+ def self.unique_descriptors
+ udesc = []
+ UNIQUEDESCRIPTORS.each do |name|
+ lib,desc = name.split('.',2)
+ if lib == "Cdk"
+{|d| desc == d[:java_class].split('.').last.sub('Descriptor','') }.first[:names].each do |n|
+ dname = "#{name}.#{n}"
+ description = DESCRIPTORS[dname]
+ udesc << self.find_or_create_by(:name => dname, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true, :numeric => true, :nominal => false)
+ end
+ else
+ description = DESCRIPTORS[name]
+ udesc << self.find_or_create_by(:name => name, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true, :numeric => true, :nominal => false)
+ end
+ end
+ udesc
+ end
+ # Description of available descriptors
+ def self.description descriptor
+ lib = descriptor.split('_').first
+ case lib
+ when "Openbabel"
+ OBDESCRIPTORS[descriptor]
+ when "Cdk"
+ name = descriptor.split('_')[0..-2].join('_')
+ when "Joelib"
+ when "lookup"
+ "Read feature values from a dataset"
+ end
+ end
+ def calculate compound
+ result = send library.downcase,descriptor,compound
+ p result
+ result[]
+ end
+ def openbabel descriptor, compound
+ obdescriptor = OpenBabel::OBDescriptor.find_type descriptor
+ obmol =
+ obconversion =
+ obconversion.set_in_format 'smi'
+ obconversion.read_string obmol, compound.smiles
+ {"#{library.capitalize}.#{descriptor}" => fix_value(obdescriptor.predict(obmol))}
+ end
+ def cdk descriptor, compound
+ java_descriptor "cdk", descriptor, compound
+ end
+ def joelib descriptor, compound
+ java_descriptor "joelib", descriptor, compound
+ end
+ private
+ def java_descriptor lib, descriptor, compound
+ sdf_3d = "/tmp/#{SecureRandom.uuid}.sdf"
+,"w+"){|f| f.print compound.sdf}
+ # use java system call (rjb blocks within tasks)
+ # use Tempfiles to avoid "Argument list too long" error
+ case lib
+ when "cdk"
+ `java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors #{sdf_3d} #{descriptor}`
+ when "joelib"
+ `java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{sdf_3d} #{descriptor}`
+ end
+ result = YAML.load_file("#{sdf_3d}#{lib}.yaml").first
+ result.keys.each{|k| result[k] = result.delete(k)}
+ result
+ end
+ def fix_value val
+ val = val.first if val.is_a? Array and val.size == 1
+ val = nil if val == "NaN"
+ if val.numeric?
+ val = Float(val)
+ val = nil if val.nan? or val.infinite?
+ end
+ val
+ end
+ end