diff options
author | mguetlein <martin.guetlein@gmail.com> | 2014-10-10 11:31:22 +0200 |
---|---|---|
committer | mguetlein <martin.guetlein@gmail.com> | 2014-10-10 11:31:22 +0200 |
commit | 02e63dd161f07b9ec750af61b933ce60c34be42c (patch) | |
tree | dc20c7e8d527a4c62a7c03479580a56fdee66765 | |
parent | 7173d52d7c6e476c476b1eebbb46ada3ade4454e (diff) |
fix storing feature datasets for dataset with duplicate compounds, preserve feature order
-rw-r--r-- | lib/descriptor.rb | 47 |
1 files changed, 33 insertions, 14 deletions
diff --git a/lib/descriptor.rb b/lib/descriptor.rb index 36fb1a5..7403ebf 100644 --- a/lib/descriptor.rb +++ b/lib/descriptor.rb @@ -1,5 +1,8 @@ require 'digest/md5' ENV["JAVA_HOME"] ||= "/usr/lib/jvm/java-7-openjdk" + +BABEL_3D_CACHE_DIR = File.join(Dir.pwd,'/babel_3d_cache') + module OpenTox module Algorithm @@ -118,7 +121,7 @@ module OpenTox $logger.debug "> #{line.chomp}" end io.close - raise "external cmd failed '#{cmd}' (error should be logged)" unless $?.to_i == 0 + raise "external cmd failed '#{cmd}' (see log file for error msg)" unless $?.to_i == 0 end end @@ -172,7 +175,7 @@ module OpenTox obconversion.set_out_format 'sdf' digest = Digest::MD5.hexdigest compounds.collect{|c| c.uri}.inspect - sdf_file = "/tmp/#{digest}.sdf" + sdf_file = File.join(BABEL_3D_CACHE_DIR,"#{digest}.sdf") if File.exists? sdf_file # do not recreate existing 3d sdfs $logger.debug "re-using cached 3d structures from #{sdf_file}" else @@ -185,20 +188,36 @@ module OpenTox c = 0 compounds.each do |compound| c += 1 - $logger.debug "compute 3d structures for compound #{c}/#{compounds.size}" - obconversion.read_string obmol, compound.inchi - sdf_2d = obconversion.write_string(obmol) - OpenBabel::OBOp.find_type("Gen3D").do(obmol) - sdf_3d = obconversion.write_string(obmol) - if sdf_3d.match(/.nan/) - warning = "3D generation failed for compound #{compound.uri}, trying to calculate descriptors from 2D structure." - $logger.warn warning - # TODO - #@feature_dataset[RDF::OT.Warnings] ? @feature_dataset[RDF::OT.Warnings] << warning : @feature_dataset[RDF::OT.Warnings] = warning - tmp_file.write sdf_2d + cmp_file = File.join(BABEL_3D_CACHE_DIR,Digest::MD5.hexdigest(compound.inchi)+".sdf") + cmp_sdf = nil + if File.exists? cmp_file + $logger.debug "read cached 3d structure for compound #{c}/#{compounds.size}" + cmp_sdf = File.read(cmp_file) else - tmp_file.write sdf_3d + $logger.debug "compute 3d structure for compound #{c}/#{compounds.size}" + obconversion.read_string obmol, compound.inchi + sdf_2d = obconversion.write_string(obmol) + error = nil + if compound.inchi.include?(";") # component includes multiple compounds (; in inchi, . in smiles) + error = "OpenBabel 3D generation failed for multi-compound #{compound.uri}, trying to calculate descriptors from 2D structure." + else + OpenBabel::OBOp.find_type("Gen3D").do(obmol) + sdf_3d = obconversion.write_string(obmol) + error = "3D generation failed for compound #{compound.uri}, trying to calculate descriptors from 2D structure." if sdf_3d.match(/.nan/) + end + if error + $logger.warn error + # TODO + # @feature_dataset[RDF::OT.Warnings] ? @feature_dataset[RDF::OT.Warnings] << error : @feature_dataset[RDF::OT.Warnings] = error + cmp_sdf = sdf_2d + else + cmp_sdf = sdf_3d + end + File.open(cmp_file,"w") do |f| + f.write(cmp_sdf) + end end + tmp_file.write cmp_sdf end tmp_file.close File.rename(tmp_file, sdf_file) |