1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
module OpenTox
# Feature for physico-chemical descriptors
class PhysChem < NumericFeature
field :library, type: String
field :descriptor, type: String
field :description, type: String
JAVA_DIR = File.join(File.dirname(__FILE__),"..","java")
CDK_JAR = Dir[File.join(JAVA_DIR,"cdk-*jar")].last
JOELIB_JAR = File.join(JAVA_DIR,"joelib2.jar")
LOG4J_JAR = File.join(JAVA_DIR,"log4j.jar")
JMOL_JAR = File.join(JAVA_DIR,"Jmol.jar")
obexclude = ["cansmi","cansmiNS","formula","InChI","InChIKey","s","smarts","title","L5"]
OBDESCRIPTORS = Hash[OpenBabel::OBDescriptor.list_as_string("descriptors").split("\n").collect do |d|
name,description = d.split(/\s+/,2)
["Openbabel."+name,description] unless obexclude.include? name
end.compact.sort{|a,b| a[0] <=> b[0]}]
cdkdescriptors = {}
CDK_DESCRIPTIONS = YAML.load(`java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptorInfo`)
CDK_DESCRIPTIONS.each do |d|
prefix="Cdk."+d[:java_class].split('.').last.sub(/Descriptor/,'')
d[:names].each { |name| cdkdescriptors[prefix+"."+name] = d[:description] }
end
CDKDESCRIPTORS = cdkdescriptors
# exclude Hashcode (not a physchem property) and GlobalTopologicalChargeIndex (Joelib bug)
joelibexclude = ["MoleculeHashcode","GlobalTopologicalChargeIndex"]
# strip Joelib messages from stdout
JOELIBDESCRIPTORS = Hash[YAML.load(`java -classpath #{JOELIB_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptorInfo | sed '0,/---/d'`).collect do |d|
name = d[:java_class].sub(/^joelib2.feature.types./,'')
["Joelib."+name, "JOELIb does not provide meaningful descriptions, see java/JoelibDescriptors.java for details."] unless joelibexclude.include? name
end.compact.sort{|a,b| a[0] <=> b[0]}]
DESCRIPTORS = OBDESCRIPTORS.merge(CDKDESCRIPTORS.merge(JOELIBDESCRIPTORS))
require_relative "unique_descriptors.rb"
def self.descriptors desc=DESCRIPTORS
desc.collect do |name,description|
lib,desc = name.split('.',2)
self.find_or_create_by(:name => name, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true)
end
end
def self.unique_descriptors
udesc = []
UNIQUEDESCRIPTORS.each do |name|
lib,desc = name.split('.',2)
if lib == "Cdk"
CDK_DESCRIPTIONS.select{|d| desc == d[:java_class].split('.').last.sub('Descriptor','') }.first[:names].each do |n|
dname = "#{name}.#{n}"
description = DESCRIPTORS[dname]
udesc << self.find_or_create_by(:name => dname, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true)
end
else
description = DESCRIPTORS[name]
udesc << self.find_or_create_by(:name => name, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true)
end
end
udesc
end
def self.openbabel_descriptors
descriptors OBDESCRIPTORS
end
def self.cdk_descriptors
descriptors CDKDESCRIPTORS
end
def self.joelib_descriptors
descriptors JOELIBDESCRIPTORS
end
def calculate compound
result = send library.downcase,descriptor,compound
result[self.name]
end
def openbabel descriptor, compound
obdescriptor = OpenBabel::OBDescriptor.find_type descriptor
obmol = OpenBabel::OBMol.new
obconversion = OpenBabel::OBConversion.new
obconversion.set_in_format 'smi'
obconversion.read_string obmol, compound.smiles
{"#{library.capitalize}.#{descriptor}" => fix_value(obdescriptor.predict(obmol))}
end
def cdk descriptor, compound
java_descriptor "cdk", descriptor, compound
end
def joelib descriptor, compound
java_descriptor "joelib", descriptor, compound
end
private
def java_descriptor lib, descriptor, compound
sdf_3d = "/tmp/#{SecureRandom.uuid}.sdf"
File.open(sdf_3d,"w+"){|f| f.print compound.sdf}
# use java system call (rjb blocks within tasks)
# use Tempfiles to avoid "Argument list too long" error
case lib
when "cdk"
`java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors #{sdf_3d} #{descriptor}`
when "joelib"
`java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{sdf_3d} #{descriptor}`
end
result = YAML.load_file("#{sdf_3d}#{lib}.yaml").first
result.keys.each{|k| result[k] = result.delete(k)}
result
end
def fix_value val
val = val.first if val.is_a? Array and val.size == 1
val = nil if val == "NaN"
if val.numeric?
val = Float(val)
val = nil if val.nan? or val.infinite?
end
val
end
end
end
OpenTox::PhysChem.descriptors # load descriptor features
|