diff options
author | Christoph Helma <helma@in-silico.ch> | 2013-03-26 10:52:55 +0100 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2013-03-26 10:52:55 +0100 |
commit | ce7216c6da3767e3e55429eaa0b55c367939ee38 (patch) | |
tree | 20889a1f3d404984272c13054f9f45485a1ca4c5 | |
parent | bb35f26cc0e1bc60303465f991cde2ac01a54b38 (diff) |
code cleanup and refactoring.
-rw-r--r-- | application.rb | 353 | ||||
-rw-r--r-- | compound.rb | 6 | ||||
-rw-r--r-- | helper.rb | 347 | ||||
-rw-r--r-- | webapp/dataset.rb | 125 | ||||
-rw-r--r-- | webapp/sinatra.rb | 4 | ||||
-rw-r--r-- | webapp/test.rb | 16 |
6 files changed, 366 insertions, 485 deletions
diff --git a/application.rb b/application.rb index dfdc952..20fadad 100644 --- a/application.rb +++ b/application.rb @@ -4,325 +4,27 @@ require 'roo' require 'opentox-server' -require './compound.rb' +require_relative 'helper.rb' +require_relative 'compound.rb' +# TODO: remove and find a better way to test task errors +require_relative 'test.rb' # Library code $logger.debug "Dataset booting: #{$dataset.collect{|k,v| "#{k}: '#{v}'"} }" -Dir['./lib/compound/*.rb'].each { |f| require f; also_reload f } # Libs -Dir['./lib/*.rb'].each { |f| require f; also_reload f } # Libs -Dir['./webapp/*.rb'].each { |f| require f; also_reload f } # Webapps -#require 'profiler' +require 'profiler' # Entry point module OpenTox class Application < Service - @warnings = [] - - helpers do - - def from_csv(csv) - table = CSV.parse(csv) - # CSVs with unexpected encodings may have blanks instead of nil - table.collect! { |row| - row.collect! { |val| - (val.class == String and val.strip == "") ? nil : val - } - } - from_table table - end - - def from_spreadsheet spreadsheet - extensions = { Excel => ".xls", Excelx => ".xlsx", Openoffice => ".ods" } - input = params[:file][:tempfile].path + ".xls" - csv_file = params[:file][:tempfile].path + ".csv" - File.rename params[:file][:tempfile].path, input # roo needs "correct" extensions - spreadsheet.new(input).to_csv csv_file # roo cannot write to strings - @body = from_csv File.read(csv_file) - @content_type = "text/plain" - end - -=begin - def from_sdf(sdf) - - #obconversion = OpenBabel::OBConversion.new - #obmol = OpenBabel::OBMol.new - #obconversion.set_in_and_out_formats "sdf", "inchi" - - table = [] - - properties = [] - sdf.each_line { |l| properties << l.to_s if l.match(/</) } - properties.sort! - properties.uniq! - properties.collect!{ |p| p.gsub(/<|>/,'').strip.chomp } - properties.insert 0, "InChI" - table[0] = properties - - rec = 0 - sdf.split(/\$\$\$\$\r*\n/).each do |s| - rec += 1 - table << [] - begin - # TODO: use compound service - compound = OpenTox::Compound.from_sdf sdf - #obconversion.read_string obmol, s - table.last << obconversion.write_string(obmol).gsub(/\s/,'').chomp - rescue - # TODO: Fix, will lead to follow up errors - table.last << "Could not convert structure at record #{rec}) have been ignored! \n#{s}" - end - obmol.get_data.each { |d| table.last[table.first.index(d.get_attribute)] = d.get_value } - end - from_table table - end -=end - - def from_table table - - @warnings = [] - ntriples = ["<#{@uri}> <#{RDF.type}> <#{RDF::OT.Dataset}>."] - ntriples << ["<#{@uri}> <#{RDF.type}> <#{RDF::OT.OrderedDataset}>."] - - # features - feature_names = table.shift.collect{|f| f.strip} - @warnings << "Duplicate features in table header." unless feature_names.size == feature_names.uniq.size - compound_format = feature_names.shift.strip - bad_request_error "#{compound_format} is not a supported compound format. Accepted formats: URI, SMILES, InChI." unless compound_format =~ /URI|URL|SMILES|InChI/i - features = [] - ignored_feature_indices = [] - feature_names.each_with_index do |f,i| - values = table.collect{|row| val=row[i+1]; val.strip! unless val.nil?; val }.uniq.compact - types = values.collect{|v| feature_type(v)}.uniq - metadata = {} - if values.size == 0 # empty feature - elsif values.size <= 5 # max classes - metadata[RDF.type] = [ RDF::OT.NominalFeature, RDF::OT.StringFeature ] - metadata[RDF::OT.acceptValue] = values - end - if types.size == 1 and types[0] == RDF::OT.NumericFeature - metadata[RDF.type] = [] unless metadata[RDF.type] - metadata[RDF.type] << RDF::OT.NumericFeature - else - metadata[RDF.type] = [ RDF::OT.NominalFeature, RDF::OT.StringFeature ] # only nominal type for mixed cases - metadata[RDF::OT.acceptValue] = values - end - feature = OpenTox::Feature.find_by_title(f,metadata) # AM: find or generate - features << feature unless feature.nil? - ntriples << "<#{feature.uri}> <#{RDF.type}> <#{RDF::OT.Feature}>." - ntriples << "<#{feature.uri}> <#{RDF::OLO.index}> #{i} ." - end - - # compounds and values - compound_uris = [] - table.each_with_index do |values,j| - compound = values.shift - begin - case compound_format - when /URI|URL/i - compound_uri = compound - when /SMILES/i - compound_uri = OpenTox::Compound.from_smiles($compound[:uri], compound).uri - when /InChI/i - compound_uri = OpenTox::Compound.from_inchi($compound[:uri], compound).uri - end - rescue - @warnings << "Cannot parse compound '#{compound}' at position #{j+2}, all entries are ignored." # be careful with double quotes in literals! - next - end - compound_uris << compound_uri - unless values.size == features.size - @warnings << "Number of values at position #{j+2} (#{values.size}) is different than header size (#{features.size}), all entries are ignored." - next - end - ntriples << "<#{compound_uri}> <#{RDF.type}> <#{RDF::OT.Compound}>." - ntriples << "<#{compound_uri}> <#{RDF::OLO.index}> #{j} ." - - values.each_with_index do |v,i| - @warnings << "Empty value for compound #{compound} (row #{j+2}) and feature \"#{feature_names[i]}\" (column #{i+2})." if v.blank? - #@warnings << "Empty value in row #{j+2}, column #{i+2} (feature \"#{feature_names[i]}\")." if v.blank? - - data_entry_node = "_:dataentry"+ j.to_s - value_node = data_entry_node+ "_value"+ i.to_s - ntriples << "<#{@uri}> <#{RDF::OT.dataEntry}> #{data_entry_node} ." - ntriples << "#{data_entry_node} <#{RDF.type}> <#{RDF::OT.DataEntry}> ." - ntriples << "#{data_entry_node} <#{RDF::OLO.index}> #{j} ." - ntriples << "#{data_entry_node} <#{RDF::OT.compound}> <#{compound_uri}> ." - ntriples << "#{data_entry_node} <#{RDF::OT.values}> #{value_node} ." - ntriples << "#{value_node} <#{RDF::OT.feature}> <#{features[i].uri}> ." - ntriples << "#{value_node} <#{RDF::OT.value}> \"#{v}\" ." - - end - - end - compound_uris.duplicates.each do |uri| - positions = [] - compound_uris.each_with_index{|c,i| positions << i+1 if c == uri} - @warnings << "Duplicate compound #{uri} at rows #{positions.join(', ')}. Entries are accepted, assuming that measurements come from independent experiments." - end - - ntriples << "<#{@uri}> <#{RDF::OT.Warnings}> \"#{@warnings.join('\n')}\" ." - ntriples.join("\n") - end - -=begin - def to_xlsx - - # both simple_xlsx and axlsx create empty documents with OLE2 errors - xlsx = @uri.split("/").last+".xlsx" - p = Axlsx::Package.new - wb = p.workbook - wb.add_worksheet(:name => "test") do |sheet| - to_table.each { |row| sheet.add_row row; puts row } - end - p.serialize("test.xlsx") - - p.to_stream -#``` - #Tempfile.open(@uri.split("/").last+".xlsx") do |xlsx| - SimpleXlsx::Serializer.new(xlsx) do |doc| - doc.add_sheet("People") do |sheet| - to_table.each { |row| sheet.add_row row } - end - end - send_file xlsx - #end - end -=end - - def to_csv - csv_string = CSV.generate do |csv| - to_table.each { |row| csv << row } - end - csv_string.gsub(/\"\"/,"") # AM: no quotes for missing values - end - - def to_table -=begin - table = [] - dataset = OpenTox::Dataset.new @uri - dataset.get - table << ["SMILES"] + dataset.features.collect{|f| f.get; f.title} - dataset.data_entries.each_with_index do |data_entry,i| - table << [dataset.compounds[i]] + data_entry - end - table -=end - accept = "text/uri-list" - table = [] - if ordered? - features = OpenTox::Dataset.find_features_sparql(@uri) - sparql_constraints = {:type => RDF.type, :title => RDF::DC.title} - feature_props = OpenTox::Dataset.find_props_sparql(features.collect { |f| f.uri }, sparql_constraints) - quoted_features = []; feature_names = [] - features.each { |feature| - quoted_features << feature_props[feature.uri][:type].include?(RDF::OT.NominalFeature) - feature_names << "\"#{feature_props[feature.uri][:title][0].strip}\"" - } - compounds = OpenTox::Dataset.find_compounds_sparql(@uri) - values = OpenTox::Dataset.find_data_entries_sparql(@uri) - values += Array.new(compounds.size*features.size-values.size, "") - clim=(compounds.size-1) - cidx = fidx = 0 - num=(!quoted_features[fidx]) - table = (Array.new((features.size)*(compounds.size))).each_slice(features.size).to_a - values.each { |val| - unless val.blank? - table[cidx][fidx] = (num ? val : "\"#{val}\"") - end - if (cidx < clim) - cidx+=1 - else - cidx=0 - fidx+=1 - num=(!quoted_features[fidx]) - end - } - table.each_with_index { |row,idx| row.unshift("\"#{compounds[idx].inchi}\"") } - table.unshift([ "\"InChI\"" ] + feature_names) - else - sparql = "SELECT DISTINCT ?s FROM <#{@uri}> WHERE {?s <#{RDF.type}> <#{RDF::OT.Feature}>}" - features = FourStore.query(sparql, accept).split("\n").collect{|uri| OpenTox::Feature.new uri}.each { |f| f.get } - quoted_features = features.each_with_index.collect { |f,idx| - if (f[RDF.type].include?(RDF::OT.NominalFeature) or - f[RDF.type].include?(RDF::OT.StringFeature) and - !f[RDF.type].include?(RDF::OT.NumericFeature)) - idx+1 - end - }.compact - table << ["InChI"] + features.collect{ |f| "\"" + f[RDF::DC.title] + "\"" } - sparql = "SELECT ?s FROM <#{@uri}> WHERE {?s <#{RDF.type}> <#{RDF::OT.Compound}>. }" - compounds = FourStore.query(sparql, accept).split("\n").collect{|uri| OpenTox::Compound.new uri} - compounds.each do |compound| - data_entries = [] - features.each do |feature| - sparql = "SELECT ?value FROM <#{@uri}> WHERE { - ?data_entry <#{RDF::OT.compound}> <#{compound.uri}>; - <#{RDF::OT.values}> ?v . - ?v <#{RDF::OT.feature}> <#{feature.uri}>; - <#{RDF::OT.value}> ?value. - } ORDER BY ?data_entry" - FourStore.query(sparql, accept).split("\n").each do |value| - data_entries << value - end - end - row = ["\"#{compound.inchi}\""] + data_entries - row = row.each_with_index.collect { |value,idx| (quoted_features.include?(idx) ? "\"#{value}\"" : value) } - table << row - end - end - table - end - - def feature_type(value) - if value.blank? - nil - elsif value.numeric? - RDF::OT.NumericFeature - else - RDF::OT.NominalFeature - end - end - - def ordered? - sparql = "SELECT DISTINCT ?s FROM <#{@uri}> WHERE {?s <#{RDF.type}> <#{RDF::OT.OrderedDataset}>}" - FourStore.query(sparql, "text/uri-list").split("\n").empty? ? false : true - end + before do + #Profiler__::start_profile + end - def parse_put - task = OpenTox::Task.create $task[:uri], nil, RDF::DC.description => "Dataset upload: #{@uri}" do - #Profiler__::start_profile - case @content_type - when "text/plain", "text/turtle", "application/rdf+xml" # no conversion needed - when "text/csv" - @body = from_csv @body - @content_type = "text/plain" - when "application/vnd.ms-excel" - from_spreadsheet Excel - when "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" - from_spreadsheet Excelx - when "application/vnd.oasis.opendocument.spreadsheet" - from_spreadsheet Openoffice - # when "chemical/x-mdl-sdfile" - # @body = parse_sdf @body - # @content_type = "text/plain" - else - bad_request_error "#{@content_type} is not a supported content type." - end - #puts @body - FourStore.put @uri, @body, @content_type - if params[:file] - nt = "<#{@uri}> <#{RDF::DC.title}> \"#{params[:file][:filename]}\".\n<#{uri}> <#{RDF::OT.hasSource}> \"#{params[:file][:filename]}\"." - FourStore.post(@uri, nt, "text/plain") - end - #Profiler__::stop_profile - #Profiler__::print_profile($stdout) - @uri - end - response['Content-Type'] = "text/uri-list" - halt 202, task.uri - end + after do + #Profiler__::stop_profile + #Profiler__::print_profile($stdout) end before "/#{SERVICE}/:id/:property" do @@ -336,7 +38,6 @@ module OpenTox end get "/dataset/:id/?" do - #Profiler__::start_profile case @accept when "application/rdf+xml", "text/turtle", "text/plain", /html/ r = FourStore.get(@uri, @accept) @@ -355,8 +56,6 @@ module OpenTox bad_request_error "'#{@accept}' is not a supported content type." end end - #Profiler__::stop_profile - #Profiler__::print_profile($stdout) r end @@ -371,11 +70,9 @@ module OpenTox case @accept when "application/rdf+xml", "text/turtle", "text/plain" sparql = "CONSTRUCT {?s ?p ?o.} FROM <#{@uri}> WHERE { - { ?s ?p ?o. <#{@uri}> <#{RDF.type}> ?o. } UNION - { ?s ?p ?o. <#{@uri}> <#{RDF::DC.title}> ?o.} UNION - { ?s ?p ?o. <#{@uri}> <#{RDF::DC.creator}> ?o.} UNION - { ?s ?p ?o. <#{@uri}> <#{RDF::OT.Warnings}> ?o.} UNION - { ?s ?p ?o. <#{@uri}> <#{RDF::OT.hasSource}> ?o.} + { ?s ?p ?o. <#{@uri}> ?p ?o. } UNION + { ?s ?p ?o. ?s <#{RDF.type}> <#{RDF::OT.Parameter}> . } + FILTER (?p != <#{RDF::OT.dataEntry}>) } " FourStore.query sparql, @accept else @@ -413,28 +110,6 @@ module OpenTox FourStore.query sparql, @accept end - # Get everything but the data entries - # @param [Header] Accept one of `application/rdf+xml, text/turtle, text/plain, text/uri-list` (default application/rdf+xml) - # @return [application/rdf+xml, text/turtle, text/plain, text/uri-list] The data - get '/dataset/:id/allnde' do - case @accept - when "application/rdf+xml", "text/turtle", "text/plain" - sparql = "CONSTRUCT {?s ?p ?o.} FROM <#{@uri}> WHERE { - { ?s ?p ?o. <#{@uri}> <#{RDF.type}> ?o. } UNION - { ?s ?p ?o. <#{@uri}> <#{RDF::DC.title}> ?o.} UNION - { ?s ?p ?o. <#{@uri}> <#{RDF::DC.creator}> ?o.} UNION - { ?s ?p ?o. <#{@uri}> <#{RDF::OT.Warnings}> ?o.} UNION - { ?s ?p ?o. <#{@uri}> <#{RDF::OT.hasSource}> ?o.} UNION - { ?s ?p ?o. ?s <#{RDF.type}> <#{RDF::OT.Feature}> } UNION - { ?s ?p ?o. ?s <#{RDF.type}> <#{RDF::OT.Parameter}> } - }" - else - bad_request_error "'#{@accept}' is not a supported content type." - end - FourStore.query sparql, @accept - end - - end end diff --git a/compound.rb b/compound.rb index cb194ff..3373737 100644 --- a/compound.rb +++ b/compound.rb @@ -11,7 +11,7 @@ module OpenTox # compound = OpenTox::Compound.from_smiles("c1ccccc1") # @param [String] smiles Smiles string # @return [OpenTox::Compound] Compound - def self.from_smiles service_uri, smiles, subjectid=nil + def self.from_smiles smiles, subjectid=nil inchi = obconversion(smiles,'smi','inchi') compound = Compound.new(File.join service_uri, inchi) compound.inchi = inchi @@ -22,7 +22,7 @@ module OpenTox # Create a compound from inchi string # @param [String] smiles InChI string # @return [OpenTox::Compound] Compound - def self.from_inchi service_uri, inchi, subjectid=nil + def self.from_inchi inchi, subjectid=nil compound = Compound.new(File.join service_uri, inchi) compound.inchi = inchi compound @@ -31,7 +31,7 @@ module OpenTox # Create a compound from sdf string # @param [String] smiles SDF string # @return [OpenTox::Compound] Compound - def self.from_sdf service_uri, sdf, subjectid=nil + def self.from_sdf sdf, subjectid=nil inchi = obconversion(sdf,'sdf','inchi') compound = Compound.new(File.join service_uri, inchi) compound.inchi = inchi diff --git a/helper.rb b/helper.rb new file mode 100644 index 0000000..c405963 --- /dev/null +++ b/helper.rb @@ -0,0 +1,347 @@ +# Author: Christoph Helma, Andreas Maunz + +module OpenTox + class Application < Service + + @warnings = [] + + helpers do + + def from_csv(csv) + table = CSV.parse(csv) + # CSVs with unexpected encodings may have blanks instead of nil + table.collect! { |row| + row.collect! { |val| + (val.class == String and val.strip == "") ? nil : val + } + } + from_table table + end + + def from_spreadsheet spreadsheet + extensions = { Excel => ".xls", Excelx => ".xlsx", Openoffice => ".ods" } + input = params[:file][:tempfile].path + ".xls" + csv_file = params[:file][:tempfile].path + ".csv" + File.rename params[:file][:tempfile].path, input # roo needs "correct" extensions + spreadsheet.new(input).to_csv csv_file # roo cannot write to strings + @body = from_csv File.read(csv_file) + @content_type = "text/plain" + end + +=begin + def from_sdf(sdf) + + #obconversion = OpenBabel::OBConversion.new + #obmol = OpenBabel::OBMol.new + #obconversion.set_in_and_out_formats "sdf", "inchi" + + table = [] + + properties = [] + sdf.each_line { |l| properties << l.to_s if l.match(/</) } + properties.sort! + properties.uniq! + properties.collect!{ |p| p.gsub(/<|>/,'').strip.chomp } + properties.insert 0, "InChI" + table[0] = properties + + rec = 0 + sdf.split(/\$\$\$\$\r*\n/).each do |s| + rec += 1 + table << [] + begin + # TODO: use compound service + compound = OpenTox::Compound.from_sdf sdf + #obconversion.read_string obmol, s + table.last << obconversion.write_string(obmol).gsub(/\s/,'').chomp + rescue + # TODO: Fix, will lead to follow up errors + table.last << "Could not convert structure at record #{rec}) have been ignored! \n#{s}" + end + obmol.get_data.each { |d| table.last[table.first.index(d.get_attribute)] = d.get_value } + end + from_table table + end +=end + + def from_table table + + @warnings = [] + ntriples = ["<#{@uri}> <#{RDF.type}> <#{RDF::OT.Dataset}>."] + ntriples << ["<#{@uri}> <#{RDF.type}> <#{RDF::OT.OrderedDataset}>."] + + # features + feature_names = table.shift.collect{|f| f.strip} + @warnings << "Duplicate features in table header." unless feature_names.size == feature_names.uniq.size + compound_format = feature_names.shift.strip + bad_request_error "#{compound_format} is not a supported compound format. Accepted formats: URI, SMILES, InChI." unless compound_format =~ /URI|URL|SMILES|InChI/i + features = [] + ignored_feature_indices = [] + feature_names.each_with_index do |f,i| + values = table.collect{|row| val=row[i+1]; val.strip! unless val.nil?; val }.uniq.compact + types = values.collect{|v| feature_type(v)}.uniq + metadata = {RDF::DC.title => f} + if values.size == 0 # empty feature + elsif values.size <= 5 # max classes + metadata[RDF.type] = [ RDF::OT.NominalFeature, RDF::OT.StringFeature ] + metadata[RDF::OT.acceptValue] = values + end + if types.size == 1 and types[0] == RDF::OT.NumericFeature + metadata[RDF.type] = [] unless metadata[RDF.type] + metadata[RDF.type] << RDF::OT.NumericFeature + else + metadata[RDF.type] = [ RDF::OT.NominalFeature, RDF::OT.StringFeature ] # only nominal type for mixed cases + metadata[RDF::OT.acceptValue] = values + end + feature = OpenTox::Feature.find_or_create metadata, @subjectid # AM: find or generate + features << feature unless feature.nil? + ntriples << "<#{feature.uri}> <#{RDF.type}> <#{RDF::OT.Feature}>." + ntriples << "<#{feature.uri}> <#{RDF::OLO.index}> #{i} ." + end + + # compounds and values + compound_uris = [] + table.each_with_index do |values,j| + compound = values.shift + begin + case compound_format + when /URI|URL/i + compound_uri = compound + when /SMILES/i + compound_uri = OpenTox::Compound.from_smiles(compound).uri + when /InChI/i + compound_uri = OpenTox::Compound.from_inchi(compound).uri + end + rescue + @warnings << "Cannot parse compound '#{compound}' at position #{j+2}, all entries are ignored." # be careful with double quotes in literals! \C in smiles is an illegal Turtle string + next + end + compound_uris << compound_uri + unless values.size == features.size + @warnings << "Number of values at position #{j+2} (#{values.size}) is different than header size (#{features.size}), all entries are ignored." + next + end + ntriples << "<#{compound_uri}> <#{RDF.type}> <#{RDF::OT.Compound}>." + ntriples << "<#{compound_uri}> <#{RDF::OLO.index}> #{j} ." + + values.each_with_index do |v,i| + @warnings << "Empty value for compound '#{compound}' (row #{j+2}) and feature '#{feature_names[i]}' (column #{i+2})." if v.blank? + + data_entry_node = "_:dataentry"+ j.to_s + value_node = data_entry_node+ "_value"+ i.to_s + ntriples << "<#{@uri}> <#{RDF::OT.dataEntry}> #{data_entry_node} ." + ntriples << "#{data_entry_node} <#{RDF.type}> <#{RDF::OT.DataEntry}> ." + ntriples << "#{data_entry_node} <#{RDF::OLO.index}> #{j} ." + ntriples << "#{data_entry_node} <#{RDF::OT.compound}> <#{compound_uri}> ." + ntriples << "#{data_entry_node} <#{RDF::OT.values}> #{value_node} ." + ntriples << "#{value_node} <#{RDF::OT.feature}> <#{features[i].uri}> ." + ntriples << "#{value_node} <#{RDF::OT.value}> \"#{v}\" ." + + end + + end + compound_uris.duplicates.each do |uri| + positions = [] + compound_uris.each_with_index{|c,i| positions << i+1 if c == uri} + @warnings << "Duplicate compound #{uri} at rows #{positions.join(', ')}. Entries are accepted, assuming that measurements come from independent experiments." + end + + ntriples << "<#{@uri}> <#{RDF::OT.Warnings}> \"#{@warnings.join('\n')}\" ." + ntriples.join("\n") + end + +=begin + def to_xlsx + + # both simple_xlsx and axlsx create empty documents with OLE2 errors + xlsx = @uri.split("/").last+".xlsx" + p = Axlsx::Package.new + wb = p.workbook + wb.add_worksheet(:name => "test") do |sheet| + to_table.each { |row| sheet.add_row row; puts row } + end + p.serialize("test.xlsx") + + p.to_stream +#``` + #Tempfile.open(@uri.split("/").last+".xlsx") do |xlsx| + SimpleXlsx::Serializer.new(xlsx) do |doc| + doc.add_sheet("People") do |sheet| + to_table.each { |row| sheet.add_row row } + end + end + send_file xlsx + #end + end +=end + + def to_csv + csv_string = CSV.generate do |csv| + to_table.each { |row| csv << row } + end + csv_string.gsub(/\"\"/,"") # AM: no quotes for missing values + #to_table + end + + def compound_uris + end + + def features + end + + def data_entries + end + + def to_table + # TODO: fix and speed up + sparql = "SELECT DISTINCT ?s FROM <#{@uri}> WHERE { + ?s <#{RDF.type}> <#{RDF::OT.Feature}> ; + <#{RDF::OLO.index}> ?fidx + } ORDER BY ?fidx" + features = FourStore.query(sparql, "text/uri-list").split("\n").collect { |uri| OpenTox::Feature.new uri } + sparql = "SELECT DISTINCT ?compound FROM <#{@uri}> WHERE { + ?compound <#{RDF.type}> <#{RDF::OT.Compound}> ; + <#{RDF::OLO.index}> ?cidx; + } ORDER BY ?cidx" + inchis = FourStore.query(sparql, "text/uri-list").split("\n").collect { |uri| "InChI#{uri.split("InChI").last}" } + + table = [["InChI"] + features.collect{|f| f.title}] + inchis.each{|inchi| table << [inchi]} + sparql = "SELECT ?cidx ?fidx ?value FROM <#{@uri}> WHERE { + ?data_entry <#{RDF::OLO.index}> ?cidx ; + <#{RDF::OT.values}> ?v . + ?v <#{RDF::OT.feature}> ?f; + <#{RDF::OT.value}> ?value . + ?f <#{RDF::OLO.index}> ?fidx. + } ORDER BY ?fidx ?cidx" + FourStore.query(sparql,"text/uri-list").split("\n").each do |row| + r,c,v = row.split("\t") + table[r.to_i+1][c.to_i+1] = v.to_s + end + table#.inspect +=begin + table = [] + dataset = OpenTox::Dataset.new @uri + table << ["SMILES"] + dataset.features.collect{|f| f.title} + dataset.data_entries.each_with_index do |data_entry,i| + table << [dataset.compounds[i]] + data_entry + end + table +=end +=begin + accept = "text/uri-list" + table = [] + if ordered? + features = OpenTox::Dataset.find_features_sparql(@uri) + sparql_constraints = {:type => RDF.type, :title => RDF::DC.title} + feature_props = OpenTox::Dataset.find_props_sparql(features.collect { |f| f.uri }, sparql_constraints) + quoted_features = []; feature_names = [] + features.each { |feature| + quoted_features << feature_props[feature.uri][:type].include?(RDF::OT.NominalFeature) + feature_names << "\"#{feature_props[feature.uri][:title][0].strip}\"" + } + compounds = OpenTox::Dataset.find_compounds_sparql(@uri) + values = OpenTox::Dataset.find_data_entries_sparql(@uri) + values += Array.new(compounds.size*features.size-values.size, "") + clim=(compounds.size-1) + cidx = fidx = 0 + num=(!quoted_features[fidx]) + table = (Array.new((features.size)*(compounds.size))).each_slice(features.size).to_a + values.each { |val| + unless val.blank? + table[cidx][fidx] = (num ? val : "\"#{val}\"") + end + if (cidx < clim) + cidx+=1 + else + cidx=0 + fidx+=1 + num=(!quoted_features[fidx]) + end + } + table.each_with_index { |row,idx| row.unshift("\"#{compounds[idx].inchi}\"") } + table.unshift([ "\"InChI\"" ] + feature_names) + else + sparql = "SELECT DISTINCT ?s FROM <#{@uri}> WHERE {?s <#{RDF.type}> <#{RDF::OT.Feature}>}" + features = FourStore.query(sparql, accept).split("\n").collect{|uri| OpenTox::Feature.new uri}.each { |f| f.get } + quoted_features = features.each_with_index.collect { |f,idx| + if (f[RDF.type].include?(RDF::OT.NominalFeature) or + f[RDF.type].include?(RDF::OT.StringFeature) and + !f[RDF.type].include?(RDF::OT.NumericFeature)) + idx+1 + end + }.compact + table << ["InChI"] + features.collect{ |f| "\"" + f[RDF::DC.title] + "\"" } + sparql = "SELECT ?s FROM <#{@uri}> WHERE {?s <#{RDF.type}> <#{RDF::OT.Compound}>. }" + compounds = FourStore.query(sparql, accept).split("\n").collect{|uri| OpenTox::Compound.new uri} + compounds.each do |compound| + data_entries = [] + features.each do |feature| + sparql = "SELECT ?value FROM <#{@uri}> WHERE { + ?data_entry <#{RDF::OT.compound}> <#{compound.uri}>; + <#{RDF::OT.values}> ?v . + ?v <#{RDF::OT.feature}> <#{feature.uri}>; + <#{RDF::OT.value}> ?value. + } ORDER BY ?data_entry" + FourStore.query(sparql, accept).split("\n").each do |value| + data_entries << value + end + end + row = ["\"#{compound.inchi}\""] + data_entries + row = row.each_with_index.collect { |value,idx| (quoted_features.include?(idx) ? "\"#{value}\"" : value) } + table << row + end + end + table +=end + end + + def feature_type(value) + if value.blank? + nil + elsif value.numeric? + RDF::OT.NumericFeature + else + RDF::OT.NominalFeature + end + end + + def ordered? + sparql = "SELECT DISTINCT ?s FROM <#{@uri}> WHERE {?s <#{RDF.type}> <#{RDF::OT.OrderedDataset}>}" + FourStore.query(sparql, "text/uri-list").split("\n").empty? ? false : true + end + + def parse_put + task = OpenTox::Task.run "Dataset upload", @uri, @subjectid do + case @content_type + when "text/plain", "text/turtle", "application/rdf+xml" # no conversion needed + when "text/csv" + @body = from_csv @body + @content_type = "text/plain" + when "application/vnd.ms-excel" + from_spreadsheet Excel + when "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + from_spreadsheet Excelx + when "application/vnd.oasis.opendocument.spreadsheet" + from_spreadsheet Openoffice + # when "chemical/x-mdl-sdfile" + # @body = parse_sdf @body + # @content_type = "text/plain" + else + bad_request_error "#{@content_type} is not a supported content type." + end + if params["file"] + nt = "<#{@uri}> <#{RDF::DC.title}> \"#{params["file"][:filename]}\".\n<#{uri}> <#{RDF::OT.hasSource}> \"#{params["file"][:filename]}\"." + FourStore.put(@uri, nt, "text/plain") + end + nt ? FourStore.post(@uri, @body, @content_type) : FourStore.put(@uri, @body, @content_type) + @uri + end + response['Content-Type'] = "text/uri-list" + halt 202, task.uri + end + end + + end +end + diff --git a/webapp/dataset.rb b/webapp/dataset.rb deleted file mode 100644 index 3ee6c04..0000000 --- a/webapp/dataset.rb +++ /dev/null @@ -1,125 +0,0 @@ -# dataset.rb -# OpenTox dataset -# Author: Andreas Maunz - -module OpenTox - - class Application < Service - - - # Get a list of descriptor calculation - # @return [text/uri-list] URIs - get '/dataset/*/pc' do - dataset=params["captures"][0] - algorithms = YAML::load_file RestClientWrapper.get(File.join($compound[:uri],"pc_descriptors.yaml")) - list = (algorithms.keys.sort << "AllDescriptors").collect { |name| to("/dataset/#{dataset}/pc/#{name}",:full) }.join("\n") + "\n" - format_output(list) - end - - # Get representation of descriptor calculation - # @return [String] Representation - get '/dataset/*/pc/*' do - dataset = params[:captures][0] - params[:descriptor] = params[:captures][1] - descriptors = YAML::load_file RestClientWrapper.get(File.join($compound[:uri],"pc_descriptors.yaml")) - alg_params = [ - { DC.description => "Dataset URI", - OT.paramScope => "mandatory", - DC.title => "dataset_uri" } - ] - if params[:descriptor] != "AllDescriptors" - descriptors = descriptors[params[:descriptor]] - else - alg_params << { - DC.description => "Physico-chemical type, one or more of '#{descriptors.collect { |id, info| info[:pc_type] }.uniq.sort.join(",")}'", - OT.paramScope => "optional", DC.title => "pc_type" - } - alg_params << { - DC.description => "Software Library, one or more of '#{descriptors.collect { |id, info| info[:lib] }.uniq.sort.join(",")}'", - OT.paramScope => "optional", DC.title => "lib" - } - descriptors = {:id => "AllDescriptors", :name => "All PC descriptors" } # Comes from pc_descriptors.yaml for single descriptors - end - - if descriptors - # Contents - algorithm = OpenTox::Algorithm.new(to("/dataset/#{dataset}/pc/#{params[:descriptor]}",:full)) - mmdata = { - DC.title => params[:descriptor], - DC.creator => "andreas@maunz.de", - DC.description => descriptors[:name], - RDF.type => [OTA.DescriptorCalculation], - } - mmdata[DC.description] << (", pc_type: " + descriptors[:pc_type]) unless descriptors[:id] == "AllDescriptors" - mmdata[DC.description] << (", lib: " + descriptors[:lib]) unless descriptors[:id] == "AllDescriptors" - algorithm.metadata=mmdata - algorithm.parameters = alg_params - format_output(algorithm) - else - resource_not_found_error "Unknown descriptor #{params[:descriptor]}." - end - end - - - # Calculate PC descriptors - # Single descriptors or sets of descriptors can be selected - # Sets are selected via lib and/or pc_type, and take precedence, when also a descriptor is submitted - # If none of descriptor, lib, and pc_type is submitted, all descriptors are calculated - # Set composition is induced by intersecting lib and pc_type sets, if appropriate - # @param [optional, HEADER] accept Accept one of 'application/rdf+xml', 'text/csv', defaults to 'application/rdf+xml' - # @param [optional, String] descriptor A single descriptor to calculate values for. - # @param [optional, String] lib One or more descriptor libraries out of [cdk,joelib,openbabel], for whose descriptors to calculate values. - # @param [optional, String] pc_type One or more descriptor types out of [constitutional,topological,geometrical,electronic,cpsa,hybrid], for whose descriptors to calculate values - # @return [application/rdf+xml,text/csv] Compound descriptors and values - post '/dataset/*/pc' do - dataset=params["captures"][0] - params.delete('splat') - params.delete('captures') - params_array = params.collect{ |k,v| [k.to_sym, v]} - params = Hash[params_array] - params[:dataset] = dataset - descriptor = params[:descriptor].nil? ? "" : params[:descriptor] - lib = params[:lib].nil? ? "" : params[:lib] - pc_type = params[:pc_type].nil? ? "" : params[:pc_type] - - task = OpenTox::Task.create( - $task[:uri], - @subjectid, - { RDF::DC.description => "Calculating PC descriptors", - RDF::DC.creator => to("/dataset/#{dataset}/pc",:full) - } - ) do |task| - - result_ds = OpenTox::Dataset.new(nil,@subjectid) - ds=OpenTox::Dataset.find("#{$dataset[:uri]}/#{dataset}",@subjectid) - ds.compounds.each { |cmpd| - ds_string = RestClientWrapper.post("#{$compound[:uri]}/#{cmpd.inchi}/pc", params, {:accept => "application/rdf+xml"}) - single_cmpd_ds = OpenTox::Dataset.new(OpenTox::Dataset.uri_from_rdf(ds_string),@subjectid) - single_cmpd_ds.parse_rdfxml(ds_string) - single_cmpd_ds.get(true) - unless result_ds.features.size>0 # features present already? - result_ds.features = single_cmpd_ds.features # AM: features - result_ds.parameters = ["pc_type", "lib", "descriptor"].collect{ |key| # AM: parameters - val = single_cmpd_ds.find_parameter_value(key) - { DC.title => key, OT.paramValue => (val.nil? ? "" : val) } - } - result_ds[DC.title] = single_cmpd_ds[DC.title] - result_ds[DC.creator] = to("/dataset/#{dataset}/pc",:full) - result_ds[OT.hasSource] = to("/dataset/#{dataset}/pc",:full) - end - result_ds << [ cmpd ] + single_cmpd_ds.data_entries[0] - } - result_ds.put @subjectid - $logger.debug result_ds.uri - result_ds.uri - - end - response['Content-Type'] = 'text/uri-list' - service_unavailable_error "Service unavailable" if task.cancelled? - halt 202,task.uri.to_s+"\n" - end - - end - -end - diff --git a/webapp/sinatra.rb b/webapp/sinatra.rb index fd0d354..b6becab 100644 --- a/webapp/sinatra.rb +++ b/webapp/sinatra.rb @@ -17,7 +17,7 @@ module OpenTox case @accept when /text\/html/ content_type "text/html" - OpenTox.text_to_html obj + obj.to_html else content_type 'text/uri-list' obj @@ -31,7 +31,7 @@ module OpenTox obj.to_rdfxml when /text\/html/ content_type "text/html" - OpenTox.text_to_html obj.to_turtle + obj.to_html else content_type "text/turtle" obj.to_turtle diff --git a/webapp/test.rb b/webapp/test.rb deleted file mode 100644 index 63b9521..0000000 --- a/webapp/test.rb +++ /dev/null @@ -1,16 +0,0 @@ -#for testing the error handling - -module OpenTox - class Application < Service - - post '/dataset/test/error_in_task/?' do - task = OpenTox::Task.create($task[:uri],@subjectid,{ RDF::DC.description => "error_in_task"}) do |task| - sleep 1 - internal_server_error "error_in_task_message" - end - response['Content-Type'] = 'text/uri-list' - halt 202,task.uri.to_s+"\n" - end - - end -end |