summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2013-04-24 19:49:49 +0200
committerChristoph Helma <helma@in-silico.ch>2013-04-24 19:49:49 +0200
commitd0db6b77f462a240159268ea3721a805f5f795f6 (patch)
tree3dfdfec17c8420ad2515ee5e33195435cd8a2866
parent95eecdd3583b33fa8aceab857468296443decd0c (diff)
descriptor calculation with descriptor_uris parameter working.
-rw-r--r--descriptor.rb72
1 files changed, 32 insertions, 40 deletions
diff --git a/descriptor.rb b/descriptor.rb
index 18a44d6..96599d6 100644
--- a/descriptor.rb
+++ b/descriptor.rb
@@ -27,6 +27,7 @@ module OpenTox
OpenBabel::OBDescriptor.list_as_string("descriptors").split("\n").each do |d|
title,description = d.split(/\s+/,2)
unless title =~ /cansmi|formula|InChI|smarts|title/ or title == "s"
+ uri = File.join $algorithm[:uri], "descriptor/openbabel" ,title
title = "OpenBabel "+title
feature = OpenTox::Feature.find_or_create({
RDF::DC.title => title,
@@ -35,6 +36,7 @@ module OpenTox
}, @subjectid)
descriptors[:openbabel] << {
:title => title,
+ :uri => uri,
:description => description,
:calculator => OpenBabel::OBDescriptor.find_type(title.split(" ").last),
:feature => feature
@@ -45,7 +47,9 @@ module OpenTox
# CDK
cdk_descriptors = YAML.load(`java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptorInfo`)
cdk_descriptors.each do |descriptor|
- descriptor[:title] = "Cdk " + descriptor[:java_class].split('.').last.sub(/Descriptor/,'')
+ title = descriptor[:java_class].split('.').last.sub(/Descriptor/,'')
+ descriptor[:title] = "Cdk " + title
+ descriptor[:uri] = File.join $algorithm[:uri], "descriptor/cdk" ,title
descriptor[:features] = []
descriptor[:names].each do |name|
descriptor[:features] << OpenTox::Feature.find_or_create({
@@ -62,7 +66,9 @@ module OpenTox
joelib_descriptors.each do |descriptor|
# exclude Hashcode (not a physchem property) and GlobalTopologicalChargeIndex (Joelib bug)
next if descriptor[:java_class] == "joelib2.feature.types.MoleculeHashcode" or descriptor[:java_class] == "joelib2.feature.types.GlobalTopologicalChargeIndex"
- descriptor[:title] = "Joelib " + descriptor[:java_class].split('.').last
+ title = descriptor[:java_class].split('.').last
+ descriptor[:uri] = File.join $algorithm[:uri], "descriptor/joelib",title
+ descriptor[:title] = "Joelib " + title
descriptor[:feature] = OpenTox::Feature.find_or_create({
RDF::DC.title => descriptor[:title],
RDF.type => [RDF::OT.Feature, RDF::OT.NumericFeature],
@@ -88,12 +94,9 @@ module OpenTox
def cdk compounds, descriptors
sdf_3d compounds
- # rjb blocks within tasks
- # Avoid "Argument list too long" error by sending only short descriptor names
- #yaml = `export CDKDescriptors= ;echo "#{@sdf}" |java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors #{descriptors.collect{|d| d[:title].split("\s").last}.join(" ")}`
- #yaml = `export CDKDescriptors='#{descriptors.collect{|d| d[:title].split("\s").last}.join(" ")}';echo "#{@sdf}" |java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors `
+ # use java system call (rjb blocks within tasks)
+ # use Tempfiles to avoid "Argument list too long" error
puts `java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors #{@sdf_file.path} #{descriptors.collect{|d| d[:title].split("\s").last}.join(" ")}`
- #puts yaml
YAML.load_file(@sdf_file.path+"cdk.yaml").each_with_index do |calculation,i|
$logger.error "Descriptor calculation failed for compound #{compounds[i].uri}." if calculation.empty?
calculation.each do |name,value|
@@ -104,12 +107,10 @@ module OpenTox
end
def joelib compounds, descriptors
+ # use java system call (rjb blocks within tasks)
+ # use Tempfiles to avoid "Argument list too long" error
sdf_3d compounds
- # rjb blocks within tasks
- #yaml = `echo "#{@sdf}" |java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{descriptors.collect{|d| d[:java_class]}.join(" ")}|grep "^[- ]"`
- #puts "java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{@sdf_file.path} #{descriptors.collect{|d| d[:java_class]}.join(" ")}"
puts `java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{@sdf_file.path} #{descriptors.collect{|d| d[:java_class]}.join(" ")}`
- #YAML.load(yaml).each_with_index do |calculation,i|
YAML.load_file(@sdf_file.path+"joelib.yaml").each_with_index do |calculation,i|
$logger.error "Descriptor calculation failed for compound #{compounds[i].uri}." if calculation.empty?
calculation.each do |java_class,value|
@@ -120,9 +121,8 @@ module OpenTox
end
def sdf_3d compounds
- #unless @sdf_file and File.exists? @sdf_file.path
- unless @sdf
- @sdf = ""
+ unless @sdf_file and File.exists? @sdf_file.path
+ @sdf_file = Tempfile.open("sdf")
@@obconversion.set_out_format 'sdf'
# create 3d sdf file (faster in Openbabel than in CDK)
compounds.each do |compound|
@@ -134,13 +134,11 @@ module OpenTox
warning = "3D generation failed for compound #{compound.uri}, trying to calculate descriptors from 2D structure."
$logger.warn warning
@feature_dataset[RDF::OT.Warnings] ? @feature_dataset[RDF::OT.Warnings] << warning : @feature_dataset[RDF::OT.Warnings] = warning
- @sdf << sdf_2d
+ @sdf_file.puts sdf_2d
else
- @sdf << sdf_3d
+ @sdf_file.puts sdf_3d
end
end
- @sdf_file = Tempfile.open("sdf")
- @sdf_file.puts @sdf
@sdf_file.close
end
end
@@ -182,7 +180,7 @@ module OpenTox
elsif params[:dataset_uri]
@feature_dataset.parameters = [ { RDF::DC.title => "dataset_uri", RDF::OT.paramValue => params[:dataset_uri] }]
else
- bad_request_error "Please provide a dataset_uri or compound_uri paramaeter", @uri
+ bad_request_error "Please provide a dataset_uri or compound_uri parameter", @uri
end
end
end
@@ -193,9 +191,11 @@ module OpenTox
@descriptor = @descriptors.first
end
- after do
- #@sdf_file.unlink if @sdf_file and File.exists @sdf_file.path
- #TODO cleanup yamls
+ after do # Tempfile cleanup
+ if @sdf_file and File.exists? @sdf_file.path
+ FileUtils.rm Dir["#{@sdf_file.path}*.yaml"]
+ @sdf_file.unlink
+ end
@sdf_file = nil
end
@@ -219,12 +219,13 @@ module OpenTox
post '/descriptor/?' do
task = OpenTox::Task.run "Calculating PC descriptors", @uri, @subjectid do |task|
- if params[:descriptors]
+ if params[:descriptor_uris]
descriptors = {}
- params[:descriptors].each do |descriptor|
+ params[:descriptor_uris].each do |descriptor_uri|
#lib, title = descriptor.split('/')
+ lib = descriptor_uri.split('/')[-2]
descriptors[lib.to_sym] ||= []
- descriptors[lib.to_sym] << DESCRIPTORS[lib.to_sym].select{|d| d[:title] == descriptor}
+ descriptors[lib.to_sym] += DESCRIPTORS[lib.to_sym].select{|d| d[:uri] == descriptor_uri}
end
else
descriptors = DESCRIPTORS
@@ -242,22 +243,13 @@ module OpenTox
halt 202, task.uri
end
+ # use /descriptor with dataset_uri and descriptor_uri parameters for efficient calculation of multiple compounds/descriptors
post '/descriptor/:lib/:descriptor/?' do
- if params[:compound_uri]
- compounds = [ Compound.new(params[:compound_uri], @subjectid) ]
- send params[:lib].to_sym, compounds, @descriptors
- @feature_dataset.put
- @feature_dataset.uri
- elsif params[:dataset_uri]
- task = OpenTox::Task.run "Calculating PC descriptors", @uri, @subjectid do |task|
- compounds = Dataset.new(params[:dataset_uri]).compounds
- send params[:lib].to_sym, compounds, @descriptors
- @feature_dataset.put
- @feature_dataset.uri
- end
- response['Content-Type'] = 'text/uri-list'
- halt 202, task.uri
- end
+ bad_request_error "Please provide a compound_uri parameter", @uri unless params[:compound_uri]
+ compounds = [ Compound.new(params[:compound_uri], @subjectid) ]
+ send params[:lib].to_sym, compounds, @descriptors
+ @feature_dataset.put
+ @feature_dataset.uri
end
end