From ce7216c6da3767e3e55429eaa0b55c367939ee38 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 26 Mar 2013 10:52:55 +0100 Subject: code cleanup and refactoring. --- application.rb | 353 +++------------------------------------------------------ 1 file changed, 14 insertions(+), 339 deletions(-) (limited to 'application.rb') diff --git a/application.rb b/application.rb index dfdc952..20fadad 100644 --- a/application.rb +++ b/application.rb @@ -4,325 +4,27 @@ require 'roo' require 'opentox-server' -require './compound.rb' +require_relative 'helper.rb' +require_relative 'compound.rb' +# TODO: remove and find a better way to test task errors +require_relative 'test.rb' # Library code $logger.debug "Dataset booting: #{$dataset.collect{|k,v| "#{k}: '#{v}'"} }" -Dir['./lib/compound/*.rb'].each { |f| require f; also_reload f } # Libs -Dir['./lib/*.rb'].each { |f| require f; also_reload f } # Libs -Dir['./webapp/*.rb'].each { |f| require f; also_reload f } # Webapps -#require 'profiler' +require 'profiler' # Entry point module OpenTox class Application < Service - @warnings = [] - - helpers do - - def from_csv(csv) - table = CSV.parse(csv) - # CSVs with unexpected encodings may have blanks instead of nil - table.collect! { |row| - row.collect! { |val| - (val.class == String and val.strip == "") ? nil : val - } - } - from_table table - end - - def from_spreadsheet spreadsheet - extensions = { Excel => ".xls", Excelx => ".xlsx", Openoffice => ".ods" } - input = params[:file][:tempfile].path + ".xls" - csv_file = params[:file][:tempfile].path + ".csv" - File.rename params[:file][:tempfile].path, input # roo needs "correct" extensions - spreadsheet.new(input).to_csv csv_file # roo cannot write to strings - @body = from_csv File.read(csv_file) - @content_type = "text/plain" - end - -=begin - def from_sdf(sdf) - - #obconversion = OpenBabel::OBConversion.new - #obmol = OpenBabel::OBMol.new - #obconversion.set_in_and_out_formats "sdf", "inchi" - - table = [] - - properties = [] - sdf.each_line { |l| properties << l.to_s if l.match(//,'').strip.chomp } - properties.insert 0, "InChI" - table[0] = properties - - rec = 0 - sdf.split(/\$\$\$\$\r*\n/).each do |s| - rec += 1 - table << [] - begin - # TODO: use compound service - compound = OpenTox::Compound.from_sdf sdf - #obconversion.read_string obmol, s - table.last << obconversion.write_string(obmol).gsub(/\s/,'').chomp - rescue - # TODO: Fix, will lead to follow up errors - table.last << "Could not convert structure at record #{rec}) have been ignored! \n#{s}" - end - obmol.get_data.each { |d| table.last[table.first.index(d.get_attribute)] = d.get_value } - end - from_table table - end -=end - - def from_table table - - @warnings = [] - ntriples = ["<#{@uri}> <#{RDF.type}> <#{RDF::OT.Dataset}>."] - ntriples << ["<#{@uri}> <#{RDF.type}> <#{RDF::OT.OrderedDataset}>."] - - # features - feature_names = table.shift.collect{|f| f.strip} - @warnings << "Duplicate features in table header." unless feature_names.size == feature_names.uniq.size - compound_format = feature_names.shift.strip - bad_request_error "#{compound_format} is not a supported compound format. Accepted formats: URI, SMILES, InChI." unless compound_format =~ /URI|URL|SMILES|InChI/i - features = [] - ignored_feature_indices = [] - feature_names.each_with_index do |f,i| - values = table.collect{|row| val=row[i+1]; val.strip! unless val.nil?; val }.uniq.compact - types = values.collect{|v| feature_type(v)}.uniq - metadata = {} - if values.size == 0 # empty feature - elsif values.size <= 5 # max classes - metadata[RDF.type] = [ RDF::OT.NominalFeature, RDF::OT.StringFeature ] - metadata[RDF::OT.acceptValue] = values - end - if types.size == 1 and types[0] == RDF::OT.NumericFeature - metadata[RDF.type] = [] unless metadata[RDF.type] - metadata[RDF.type] << RDF::OT.NumericFeature - else - metadata[RDF.type] = [ RDF::OT.NominalFeature, RDF::OT.StringFeature ] # only nominal type for mixed cases - metadata[RDF::OT.acceptValue] = values - end - feature = OpenTox::Feature.find_by_title(f,metadata) # AM: find or generate - features << feature unless feature.nil? - ntriples << "<#{feature.uri}> <#{RDF.type}> <#{RDF::OT.Feature}>." - ntriples << "<#{feature.uri}> <#{RDF::OLO.index}> #{i} ." - end - - # compounds and values - compound_uris = [] - table.each_with_index do |values,j| - compound = values.shift - begin - case compound_format - when /URI|URL/i - compound_uri = compound - when /SMILES/i - compound_uri = OpenTox::Compound.from_smiles($compound[:uri], compound).uri - when /InChI/i - compound_uri = OpenTox::Compound.from_inchi($compound[:uri], compound).uri - end - rescue - @warnings << "Cannot parse compound '#{compound}' at position #{j+2}, all entries are ignored." # be careful with double quotes in literals! - next - end - compound_uris << compound_uri - unless values.size == features.size - @warnings << "Number of values at position #{j+2} (#{values.size}) is different than header size (#{features.size}), all entries are ignored." - next - end - ntriples << "<#{compound_uri}> <#{RDF.type}> <#{RDF::OT.Compound}>." - ntriples << "<#{compound_uri}> <#{RDF::OLO.index}> #{j} ." - - values.each_with_index do |v,i| - @warnings << "Empty value for compound #{compound} (row #{j+2}) and feature \"#{feature_names[i]}\" (column #{i+2})." if v.blank? - #@warnings << "Empty value in row #{j+2}, column #{i+2} (feature \"#{feature_names[i]}\")." if v.blank? - - data_entry_node = "_:dataentry"+ j.to_s - value_node = data_entry_node+ "_value"+ i.to_s - ntriples << "<#{@uri}> <#{RDF::OT.dataEntry}> #{data_entry_node} ." - ntriples << "#{data_entry_node} <#{RDF.type}> <#{RDF::OT.DataEntry}> ." - ntriples << "#{data_entry_node} <#{RDF::OLO.index}> #{j} ." - ntriples << "#{data_entry_node} <#{RDF::OT.compound}> <#{compound_uri}> ." - ntriples << "#{data_entry_node} <#{RDF::OT.values}> #{value_node} ." - ntriples << "#{value_node} <#{RDF::OT.feature}> <#{features[i].uri}> ." - ntriples << "#{value_node} <#{RDF::OT.value}> \"#{v}\" ." - - end - - end - compound_uris.duplicates.each do |uri| - positions = [] - compound_uris.each_with_index{|c,i| positions << i+1 if c == uri} - @warnings << "Duplicate compound #{uri} at rows #{positions.join(', ')}. Entries are accepted, assuming that measurements come from independent experiments." - end - - ntriples << "<#{@uri}> <#{RDF::OT.Warnings}> \"#{@warnings.join('\n')}\" ." - ntriples.join("\n") - end - -=begin - def to_xlsx - - # both simple_xlsx and axlsx create empty documents with OLE2 errors - xlsx = @uri.split("/").last+".xlsx" - p = Axlsx::Package.new - wb = p.workbook - wb.add_worksheet(:name => "test") do |sheet| - to_table.each { |row| sheet.add_row row; puts row } - end - p.serialize("test.xlsx") - - p.to_stream -#``` - #Tempfile.open(@uri.split("/").last+".xlsx") do |xlsx| - SimpleXlsx::Serializer.new(xlsx) do |doc| - doc.add_sheet("People") do |sheet| - to_table.each { |row| sheet.add_row row } - end - end - send_file xlsx - #end - end -=end - - def to_csv - csv_string = CSV.generate do |csv| - to_table.each { |row| csv << row } - end - csv_string.gsub(/\"\"/,"") # AM: no quotes for missing values - end - - def to_table -=begin - table = [] - dataset = OpenTox::Dataset.new @uri - dataset.get - table << ["SMILES"] + dataset.features.collect{|f| f.get; f.title} - dataset.data_entries.each_with_index do |data_entry,i| - table << [dataset.compounds[i]] + data_entry - end - table -=end - accept = "text/uri-list" - table = [] - if ordered? - features = OpenTox::Dataset.find_features_sparql(@uri) - sparql_constraints = {:type => RDF.type, :title => RDF::DC.title} - feature_props = OpenTox::Dataset.find_props_sparql(features.collect { |f| f.uri }, sparql_constraints) - quoted_features = []; feature_names = [] - features.each { |feature| - quoted_features << feature_props[feature.uri][:type].include?(RDF::OT.NominalFeature) - feature_names << "\"#{feature_props[feature.uri][:title][0].strip}\"" - } - compounds = OpenTox::Dataset.find_compounds_sparql(@uri) - values = OpenTox::Dataset.find_data_entries_sparql(@uri) - values += Array.new(compounds.size*features.size-values.size, "") - clim=(compounds.size-1) - cidx = fidx = 0 - num=(!quoted_features[fidx]) - table = (Array.new((features.size)*(compounds.size))).each_slice(features.size).to_a - values.each { |val| - unless val.blank? - table[cidx][fidx] = (num ? val : "\"#{val}\"") - end - if (cidx < clim) - cidx+=1 - else - cidx=0 - fidx+=1 - num=(!quoted_features[fidx]) - end - } - table.each_with_index { |row,idx| row.unshift("\"#{compounds[idx].inchi}\"") } - table.unshift([ "\"InChI\"" ] + feature_names) - else - sparql = "SELECT DISTINCT ?s FROM <#{@uri}> WHERE {?s <#{RDF.type}> <#{RDF::OT.Feature}>}" - features = FourStore.query(sparql, accept).split("\n").collect{|uri| OpenTox::Feature.new uri}.each { |f| f.get } - quoted_features = features.each_with_index.collect { |f,idx| - if (f[RDF.type].include?(RDF::OT.NominalFeature) or - f[RDF.type].include?(RDF::OT.StringFeature) and - !f[RDF.type].include?(RDF::OT.NumericFeature)) - idx+1 - end - }.compact - table << ["InChI"] + features.collect{ |f| "\"" + f[RDF::DC.title] + "\"" } - sparql = "SELECT ?s FROM <#{@uri}> WHERE {?s <#{RDF.type}> <#{RDF::OT.Compound}>. }" - compounds = FourStore.query(sparql, accept).split("\n").collect{|uri| OpenTox::Compound.new uri} - compounds.each do |compound| - data_entries = [] - features.each do |feature| - sparql = "SELECT ?value FROM <#{@uri}> WHERE { - ?data_entry <#{RDF::OT.compound}> <#{compound.uri}>; - <#{RDF::OT.values}> ?v . - ?v <#{RDF::OT.feature}> <#{feature.uri}>; - <#{RDF::OT.value}> ?value. - } ORDER BY ?data_entry" - FourStore.query(sparql, accept).split("\n").each do |value| - data_entries << value - end - end - row = ["\"#{compound.inchi}\""] + data_entries - row = row.each_with_index.collect { |value,idx| (quoted_features.include?(idx) ? "\"#{value}\"" : value) } - table << row - end - end - table - end - - def feature_type(value) - if value.blank? - nil - elsif value.numeric? - RDF::OT.NumericFeature - else - RDF::OT.NominalFeature - end - end - - def ordered? - sparql = "SELECT DISTINCT ?s FROM <#{@uri}> WHERE {?s <#{RDF.type}> <#{RDF::OT.OrderedDataset}>}" - FourStore.query(sparql, "text/uri-list").split("\n").empty? ? false : true - end + before do + #Profiler__::start_profile + end - def parse_put - task = OpenTox::Task.create $task[:uri], nil, RDF::DC.description => "Dataset upload: #{@uri}" do - #Profiler__::start_profile - case @content_type - when "text/plain", "text/turtle", "application/rdf+xml" # no conversion needed - when "text/csv" - @body = from_csv @body - @content_type = "text/plain" - when "application/vnd.ms-excel" - from_spreadsheet Excel - when "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" - from_spreadsheet Excelx - when "application/vnd.oasis.opendocument.spreadsheet" - from_spreadsheet Openoffice - # when "chemical/x-mdl-sdfile" - # @body = parse_sdf @body - # @content_type = "text/plain" - else - bad_request_error "#{@content_type} is not a supported content type." - end - #puts @body - FourStore.put @uri, @body, @content_type - if params[:file] - nt = "<#{@uri}> <#{RDF::DC.title}> \"#{params[:file][:filename]}\".\n<#{uri}> <#{RDF::OT.hasSource}> \"#{params[:file][:filename]}\"." - FourStore.post(@uri, nt, "text/plain") - end - #Profiler__::stop_profile - #Profiler__::print_profile($stdout) - @uri - end - response['Content-Type'] = "text/uri-list" - halt 202, task.uri - end + after do + #Profiler__::stop_profile + #Profiler__::print_profile($stdout) end before "/#{SERVICE}/:id/:property" do @@ -336,7 +38,6 @@ module OpenTox end get "/dataset/:id/?" do - #Profiler__::start_profile case @accept when "application/rdf+xml", "text/turtle", "text/plain", /html/ r = FourStore.get(@uri, @accept) @@ -355,8 +56,6 @@ module OpenTox bad_request_error "'#{@accept}' is not a supported content type." end end - #Profiler__::stop_profile - #Profiler__::print_profile($stdout) r end @@ -371,11 +70,9 @@ module OpenTox case @accept when "application/rdf+xml", "text/turtle", "text/plain" sparql = "CONSTRUCT {?s ?p ?o.} FROM <#{@uri}> WHERE { - { ?s ?p ?o. <#{@uri}> <#{RDF.type}> ?o. } UNION - { ?s ?p ?o. <#{@uri}> <#{RDF::DC.title}> ?o.} UNION - { ?s ?p ?o. <#{@uri}> <#{RDF::DC.creator}> ?o.} UNION - { ?s ?p ?o. <#{@uri}> <#{RDF::OT.Warnings}> ?o.} UNION - { ?s ?p ?o. <#{@uri}> <#{RDF::OT.hasSource}> ?o.} + { ?s ?p ?o. <#{@uri}> ?p ?o. } UNION + { ?s ?p ?o. ?s <#{RDF.type}> <#{RDF::OT.Parameter}> . } + FILTER (?p != <#{RDF::OT.dataEntry}>) } " FourStore.query sparql, @accept else @@ -413,28 +110,6 @@ module OpenTox FourStore.query sparql, @accept end - # Get everything but the data entries - # @param [Header] Accept one of `application/rdf+xml, text/turtle, text/plain, text/uri-list` (default application/rdf+xml) - # @return [application/rdf+xml, text/turtle, text/plain, text/uri-list] The data - get '/dataset/:id/allnde' do - case @accept - when "application/rdf+xml", "text/turtle", "text/plain" - sparql = "CONSTRUCT {?s ?p ?o.} FROM <#{@uri}> WHERE { - { ?s ?p ?o. <#{@uri}> <#{RDF.type}> ?o. } UNION - { ?s ?p ?o. <#{@uri}> <#{RDF::DC.title}> ?o.} UNION - { ?s ?p ?o. <#{@uri}> <#{RDF::DC.creator}> ?o.} UNION - { ?s ?p ?o. <#{@uri}> <#{RDF::OT.Warnings}> ?o.} UNION - { ?s ?p ?o. <#{@uri}> <#{RDF::OT.hasSource}> ?o.} UNION - { ?s ?p ?o. ?s <#{RDF.type}> <#{RDF::OT.Feature}> } UNION - { ?s ?p ?o. ?s <#{RDF.type}> <#{RDF::OT.Parameter}> } - }" - else - bad_request_error "'#{@accept}' is not a supported content type." - end - FourStore.query sparql, @accept - end - - end end -- cgit v1.2.3