summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2014-10-10 11:31:22 +0200
committermguetlein <martin.guetlein@gmail.com>2014-10-10 11:31:22 +0200
commit02e63dd161f07b9ec750af61b933ce60c34be42c (patch)
treedc20c7e8d527a4c62a7c03479580a56fdee66765
parent7173d52d7c6e476c476b1eebbb46ada3ade4454e (diff)
fix storing feature datasets for dataset with duplicate compounds, preserve feature order
-rw-r--r--lib/descriptor.rb47
1 files changed, 33 insertions, 14 deletions
diff --git a/lib/descriptor.rb b/lib/descriptor.rb
index 36fb1a5..7403ebf 100644
--- a/lib/descriptor.rb
+++ b/lib/descriptor.rb
@@ -1,5 +1,8 @@
require 'digest/md5'
ENV["JAVA_HOME"] ||= "/usr/lib/jvm/java-7-openjdk"
+
+BABEL_3D_CACHE_DIR = File.join(Dir.pwd,'/babel_3d_cache')
+
module OpenTox
module Algorithm
@@ -118,7 +121,7 @@ module OpenTox
$logger.debug "> #{line.chomp}"
end
io.close
- raise "external cmd failed '#{cmd}' (error should be logged)" unless $?.to_i == 0
+ raise "external cmd failed '#{cmd}' (see log file for error msg)" unless $?.to_i == 0
end
end
@@ -172,7 +175,7 @@ module OpenTox
obconversion.set_out_format 'sdf'
digest = Digest::MD5.hexdigest compounds.collect{|c| c.uri}.inspect
- sdf_file = "/tmp/#{digest}.sdf"
+ sdf_file = File.join(BABEL_3D_CACHE_DIR,"#{digest}.sdf")
if File.exists? sdf_file # do not recreate existing 3d sdfs
$logger.debug "re-using cached 3d structures from #{sdf_file}"
else
@@ -185,20 +188,36 @@ module OpenTox
c = 0
compounds.each do |compound|
c += 1
- $logger.debug "compute 3d structures for compound #{c}/#{compounds.size}"
- obconversion.read_string obmol, compound.inchi
- sdf_2d = obconversion.write_string(obmol)
- OpenBabel::OBOp.find_type("Gen3D").do(obmol)
- sdf_3d = obconversion.write_string(obmol)
- if sdf_3d.match(/.nan/)
- warning = "3D generation failed for compound #{compound.uri}, trying to calculate descriptors from 2D structure."
- $logger.warn warning
- # TODO
- #@feature_dataset[RDF::OT.Warnings] ? @feature_dataset[RDF::OT.Warnings] << warning : @feature_dataset[RDF::OT.Warnings] = warning
- tmp_file.write sdf_2d
+ cmp_file = File.join(BABEL_3D_CACHE_DIR,Digest::MD5.hexdigest(compound.inchi)+".sdf")
+ cmp_sdf = nil
+ if File.exists? cmp_file
+ $logger.debug "read cached 3d structure for compound #{c}/#{compounds.size}"
+ cmp_sdf = File.read(cmp_file)
else
- tmp_file.write sdf_3d
+ $logger.debug "compute 3d structure for compound #{c}/#{compounds.size}"
+ obconversion.read_string obmol, compound.inchi
+ sdf_2d = obconversion.write_string(obmol)
+ error = nil
+ if compound.inchi.include?(";") # component includes multiple compounds (; in inchi, . in smiles)
+ error = "OpenBabel 3D generation failed for multi-compound #{compound.uri}, trying to calculate descriptors from 2D structure."
+ else
+ OpenBabel::OBOp.find_type("Gen3D").do(obmol)
+ sdf_3d = obconversion.write_string(obmol)
+ error = "3D generation failed for compound #{compound.uri}, trying to calculate descriptors from 2D structure." if sdf_3d.match(/.nan/)
+ end
+ if error
+ $logger.warn error
+ # TODO
+ # @feature_dataset[RDF::OT.Warnings] ? @feature_dataset[RDF::OT.Warnings] << error : @feature_dataset[RDF::OT.Warnings] = error
+ cmp_sdf = sdf_2d
+ else
+ cmp_sdf = sdf_3d
+ end
+ File.open(cmp_file,"w") do |f|
+ f.write(cmp_sdf)
+ end
end
+ tmp_file.write cmp_sdf
end
tmp_file.close
File.rename(tmp_file, sdf_file)