diff options
author | Christoph Helma <helma@in-silico.ch> | 2013-04-24 19:49:49 +0200 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2013-04-24 19:49:49 +0200 |
commit | d0db6b77f462a240159268ea3721a805f5f795f6 (patch) | |
tree | 3dfdfec17c8420ad2515ee5e33195435cd8a2866 | |
parent | 95eecdd3583b33fa8aceab857468296443decd0c (diff) |
descriptor calculation with descriptor_uris parameter working.
-rw-r--r-- | descriptor.rb | 72 |
1 files changed, 32 insertions, 40 deletions
diff --git a/descriptor.rb b/descriptor.rb index 18a44d6..96599d6 100644 --- a/descriptor.rb +++ b/descriptor.rb @@ -27,6 +27,7 @@ module OpenTox OpenBabel::OBDescriptor.list_as_string("descriptors").split("\n").each do |d| title,description = d.split(/\s+/,2) unless title =~ /cansmi|formula|InChI|smarts|title/ or title == "s" + uri = File.join $algorithm[:uri], "descriptor/openbabel" ,title title = "OpenBabel "+title feature = OpenTox::Feature.find_or_create({ RDF::DC.title => title, @@ -35,6 +36,7 @@ module OpenTox }, @subjectid) descriptors[:openbabel] << { :title => title, + :uri => uri, :description => description, :calculator => OpenBabel::OBDescriptor.find_type(title.split(" ").last), :feature => feature @@ -45,7 +47,9 @@ module OpenTox # CDK cdk_descriptors = YAML.load(`java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptorInfo`) cdk_descriptors.each do |descriptor| - descriptor[:title] = "Cdk " + descriptor[:java_class].split('.').last.sub(/Descriptor/,'') + title = descriptor[:java_class].split('.').last.sub(/Descriptor/,'') + descriptor[:title] = "Cdk " + title + descriptor[:uri] = File.join $algorithm[:uri], "descriptor/cdk" ,title descriptor[:features] = [] descriptor[:names].each do |name| descriptor[:features] << OpenTox::Feature.find_or_create({ @@ -62,7 +66,9 @@ module OpenTox joelib_descriptors.each do |descriptor| # exclude Hashcode (not a physchem property) and GlobalTopologicalChargeIndex (Joelib bug) next if descriptor[:java_class] == "joelib2.feature.types.MoleculeHashcode" or descriptor[:java_class] == "joelib2.feature.types.GlobalTopologicalChargeIndex" - descriptor[:title] = "Joelib " + descriptor[:java_class].split('.').last + title = descriptor[:java_class].split('.').last + descriptor[:uri] = File.join $algorithm[:uri], "descriptor/joelib",title + descriptor[:title] = "Joelib " + title descriptor[:feature] = OpenTox::Feature.find_or_create({ RDF::DC.title => descriptor[:title], RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature], @@ -88,12 +94,9 @@ module OpenTox def cdk compounds, descriptors sdf_3d compounds - # rjb blocks within tasks - # Avoid "Argument list too long" error by sending only short descriptor names - #yaml = `export CDKDescriptors= ;echo "#{@sdf}" |java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors #{descriptors.collect{|d| d[:title].split("\s").last}.join(" ")}` - #yaml = `export CDKDescriptors='#{descriptors.collect{|d| d[:title].split("\s").last}.join(" ")}';echo "#{@sdf}" |java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors ` + # use java system call (rjb blocks within tasks) + # use Tempfiles to avoid "Argument list too long" error puts `java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors #{@sdf_file.path} #{descriptors.collect{|d| d[:title].split("\s").last}.join(" ")}` - #puts yaml YAML.load_file(@sdf_file.path+"cdk.yaml").each_with_index do |calculation,i| $logger.error "Descriptor calculation failed for compound #{compounds[i].uri}." if calculation.empty? calculation.each do |name,value| @@ -104,12 +107,10 @@ module OpenTox end def joelib compounds, descriptors + # use java system call (rjb blocks within tasks) + # use Tempfiles to avoid "Argument list too long" error sdf_3d compounds - # rjb blocks within tasks - #yaml = `echo "#{@sdf}" |java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{descriptors.collect{|d| d[:java_class]}.join(" ")}|grep "^[- ]"` - #puts "java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{@sdf_file.path} #{descriptors.collect{|d| d[:java_class]}.join(" ")}" puts `java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{@sdf_file.path} #{descriptors.collect{|d| d[:java_class]}.join(" ")}` - #YAML.load(yaml).each_with_index do |calculation,i| YAML.load_file(@sdf_file.path+"joelib.yaml").each_with_index do |calculation,i| $logger.error "Descriptor calculation failed for compound #{compounds[i].uri}." if calculation.empty? calculation.each do |java_class,value| @@ -120,9 +121,8 @@ module OpenTox end def sdf_3d compounds - #unless @sdf_file and File.exists? @sdf_file.path - unless @sdf - @sdf = "" + unless @sdf_file and File.exists? @sdf_file.path + @sdf_file = Tempfile.open("sdf") @@obconversion.set_out_format 'sdf' # create 3d sdf file (faster in Openbabel than in CDK) compounds.each do |compound| @@ -134,13 +134,11 @@ module OpenTox warning = "3D generation failed for compound #{compound.uri}, trying to calculate descriptors from 2D structure." $logger.warn warning @feature_dataset[RDF::OT.Warnings] ? @feature_dataset[RDF::OT.Warnings] << warning : @feature_dataset[RDF::OT.Warnings] = warning - @sdf << sdf_2d + @sdf_file.puts sdf_2d else - @sdf << sdf_3d + @sdf_file.puts sdf_3d end end - @sdf_file = Tempfile.open("sdf") - @sdf_file.puts @sdf @sdf_file.close end end @@ -182,7 +180,7 @@ module OpenTox elsif params[:dataset_uri] @feature_dataset.parameters = [ { RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] }] else - bad_request_error "Please provide a dataset_uri or compound_uri paramaeter", @uri + bad_request_error "Please provide a dataset_uri or compound_uri parameter", @uri end end end @@ -193,9 +191,11 @@ module OpenTox @descriptor = @descriptors.first end - after do - #@sdf_file.unlink if @sdf_file and File.exists @sdf_file.path - #TODO cleanup yamls + after do # Tempfile cleanup + if @sdf_file and File.exists? @sdf_file.path + FileUtils.rm Dir["#{@sdf_file.path}*.yaml"] + @sdf_file.unlink + end @sdf_file = nil end @@ -219,12 +219,13 @@ module OpenTox post '/descriptor/?' do task = OpenTox::Task.run "Calculating PC descriptors", @uri, @subjectid do |task| - if params[:descriptors] + if params[:descriptor_uris] descriptors = {} - params[:descriptors].each do |descriptor| + params[:descriptor_uris].each do |descriptor_uri| #lib, title = descriptor.split('/') + lib = descriptor_uri.split('/')[-2] descriptors[lib.to_sym] ||= [] - descriptors[lib.to_sym] << DESCRIPTORS[lib.to_sym].select{|d| d[:title] == descriptor} + descriptors[lib.to_sym] += DESCRIPTORS[lib.to_sym].select{|d| d[:uri] == descriptor_uri} end else descriptors = DESCRIPTORS @@ -242,22 +243,13 @@ module OpenTox halt 202, task.uri end + # use /descriptor with dataset_uri and descriptor_uri parameters for efficient calculation of multiple compounds/descriptors post '/descriptor/:lib/:descriptor/?' do - if params[:compound_uri] - compounds = [ Compound.new(params[:compound_uri], @subjectid) ] - send params[:lib].to_sym, compounds, @descriptors - @feature_dataset.put - @feature_dataset.uri - elsif params[:dataset_uri] - task = OpenTox::Task.run "Calculating PC descriptors", @uri, @subjectid do |task| - compounds = Dataset.new(params[:dataset_uri]).compounds - send params[:lib].to_sym, compounds, @descriptors - @feature_dataset.put - @feature_dataset.uri - end - response['Content-Type'] = 'text/uri-list' - halt 202, task.uri - end + bad_request_error "Please provide a compound_uri parameter", @uri unless params[:compound_uri] + compounds = [ Compound.new(params[:compound_uri], @subjectid) ] + send params[:lib].to_sym, compounds, @descriptors + @feature_dataset.put + @feature_dataset.uri end end |