From 4b90e58aa5873a01566f093ed1301ac4ccaf91f4 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 24 Aug 2011 16:07:58 +0000 Subject: datasets stored as json (with Yajl) to improve performance --- .gitignore | 1 + application.rb | 81 ++++++++++++++++++++++++++++++++++------------------------ 2 files changed, 49 insertions(+), 33 deletions(-) diff --git a/.gitignore b/.gitignore index f52f036..d21a58b 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ api_key.rb tmp/* log/* public/* +data/* diff --git a/application.rb b/application.rb index cb36c2d..f5b6d0c 100644 --- a/application.rb +++ b/application.rb @@ -1,6 +1,7 @@ require 'rubygems' gem "opentox-ruby", "~> 2" require 'opentox-ruby' +require 'profiler' set :lock, true @@ -8,7 +9,7 @@ set :lock, true helpers do def next_id - id = Dir["./#{@@datadir}/*yaml"].collect{|f| File.basename(f.sub(/.yaml/,'')).to_i}.sort.last + id = Dir["./#{@@datadir}/*json"].collect{|f| File.basename(f.sub(/.json/,'')).to_i}.sort.last id = 0 if id.nil? id + 1 end @@ -24,7 +25,6 @@ helpers do raise "store subject-id in dataset-object, not in params" if params.has_key?(:subjectid) and @subjectid==nil content_type = "application/rdf+xml" if content_type.nil? - #dataset = OpenTox::Dataset.new(@uri, @subjectid) dataset = OpenTox::Dataset.new(nil, @subjectid) case content_type @@ -32,6 +32,9 @@ helpers do when /yaml/ dataset.load_yaml(input_data) + when /json/ + dataset.load_json(input_data) + when /application\/rdf\+xml/ dataset.load_rdfxml(input_data, @subjectid) @@ -45,6 +48,9 @@ helpers do when "chemical/x-mdl-sdfile" dataset.load_sdf(input_data, @subjectid) + when /json/ + dataset.load_json(params[:file][:tempfile].read) + when /yaml/ dataset.load_yaml(params[:file][:tempfile].read) @@ -52,7 +58,6 @@ helpers do dataset.load_rdfxml_file(params[:file][:tempfile], @subjectid) when "text/csv" - #dataset = OpenTox::Dataset.new @uri dataset.load_csv(params[:file][:tempfile].read, @subjectid) dataset.add_metadata({ DC.title => File.basename(params[:file][:filename],".csv"), @@ -89,7 +94,7 @@ helpers do dataset.uri = @uri # update uri (also in metdata) dataset.features.keys.each { |f| dataset.features[f][OT.hasSource] = dataset.metadata[OT.hasSource] unless dataset.features[f][OT.hasSource]} - File.open("#{@@datadir}/#{@id}.yaml","w+"){|f| f.puts dataset.to_yaml} + File.open("#{@@datadir}/#{@id}.json","w+"){|f| f.puts dataset.to_json} end end @@ -102,14 +107,16 @@ before do @id = @id.to_s.sub(/\//,'').to_i @uri = uri @id - @yaml_file = "#{@@datadir}/#{@id}.yaml" - raise OpenTox::NotFoundError.new "Dataset #{@id} not found." unless File.exists? @yaml_file + @json_file = "#{@@datadir}/#{@id}.json" + raise OpenTox::NotFoundError.new "Dataset #{@id} not found." unless File.exists? @json_file extension = File.extname(request.path_info) unless extension.empty? case extension when ".html" @accept = 'text/html' + when ".json" + @accept = 'application/json' when ".yaml" @accept = 'application/x-yaml' when ".csv" @@ -155,40 +162,42 @@ get '/:id' do when /rdf/ # redland sends text/rdf instead of application/rdf+xml file = "#{@@datadir}/#{params[:id]}.rdfxml" unless File.exists? file # lazy rdfxml generation - dataset = YAML.load_file(@yaml_file) + dataset = OpenTox::Dataset.from_json File.read(@json_file) File.open(file,"w+") { |f| f.puts dataset.to_rdfxml } end - #response['Content-Type'] = 'application/rdf+xml' - #File.read(file) send_file file, :type => 'application/rdf+xml' + when /json/ + send_file @json_file, :type => 'application/x-yaml' + when /yaml/ - #response['Content-Type'] = 'application/x-yaml' - #File.read(@yaml_file) - send_file @yaml_file, :type => 'application/x-yaml' + file = "#{@@datadir}/#{params[:id]}.yaml" + unless File.exists? file # lazy yaml generation + dataset = OpenTox::Dataset.from_json File.read(@json_file) + File.open(file,"w+") { |f| f.puts dataset.to_rdfxml } + end + send_file file, :type => 'application/x-yaml' when /html/ response['Content-Type'] = 'text/html' - OpenTox.text_to_html File.read(@yaml_file) + OpenTox.text_to_html File.read(@json_file) when "text/csv" response['Content-Type'] = 'text/csv' - YAML.load_file(@yaml_file).to_csv + OpenTox::Dataset.from_json(File.read(@json_file)).to_csv when /ms-excel/ file = "#{@@datadir}/#{params[:id]}.xls" - YAML.load_file(@yaml_file).to_xls.write(file) unless File.exists? file # lazy xls generation - #response['Content-Type'] = 'application/ms-excel' - #File.open(file).read + OpenTox::Dataset.from_json(File.read(@json_file)).to_xls.write(file) unless File.exists? file # lazy xls generation send_file file, :type => 'application/ms-excel' when /sdfile/ response['Content-Type'] = 'chemical/x-mdl-sdfile' - YAML.load_file(@yaml_file).to_sdf + OpenTox::Dataset.from_json(File.read(@json_file)).to_sdf - when /uri-list/ - response['Content-Type'] = 'text/uri-list' - YAML.load_file(@yaml_file).to_urilist +# when /uri-list/ +# response['Content-Type'] = 'text/uri-list' +# Yajl::Parser.parse(File.read(@json_file)).to_urilist else raise OpenTox::NotFoundError.new "Content-type #{@accept} not supported." @@ -198,7 +207,7 @@ end # Get metadata of the dataset # @return [application/rdf+xml] Metadata OWL-DL get '/:id/metadata' do - metadata = YAML.load_file(@yaml_file).metadata + metadata = OpenTox::Dataset.from_json(File.read(@json_file)).metadata case @accept when /rdf/ # redland sends text/rdf instead of application/rdf+xml @@ -220,9 +229,9 @@ get %r{/(\d+)/feature/(.*)$} do |id,feature| @id = id @uri = uri @id - @yaml_file = "#{@@datadir}/#{@id}.yaml" + @json_file = "#{@@datadir}/#{@id}.json" feature_uri = url_for("/#{@id}/feature/#{URI.encode(feature)}",:full) # work around racks internal uri decoding - metadata = YAML.load_file(@yaml_file).features[feature_uri] + metadata = OpenTox::Dataset.from_json(File.read(@json_file)).features[feature_uri] case @accept when /rdf/ # redland sends text/rdf instead of application/rdf+xml @@ -233,6 +242,9 @@ get %r{/(\d+)/feature/(.*)$} do |id,feature| when /yaml/ response['Content-Type'] = 'application/x-yaml' metadata.to_yaml + when /json/ + response['Content-Type'] = 'application/json' + Yajl::Encoder.encode(metadata) end end @@ -242,7 +254,7 @@ end # @return [application/rdf+xml, application-x-yaml, text/uri-list] Feature list get '/:id/features' do - features = YAML.load_file(@yaml_file).features + features = OpenTox::Dataset.from_json(File.read(@json_file)).features case @accept when /rdf/ # redland sends text/rdf instead of application/rdf+xml @@ -253,6 +265,9 @@ get '/:id/features' do when /yaml/ response['Content-Type'] = 'application/x-yaml' features.to_yaml + when /json/ + response['Content-Type'] = 'application/json' + Yajl::Encoder.encode(features) when "text/uri-list" response['Content-Type'] = 'text/uri-list' features.keys.join("\n") + "\n" @@ -263,7 +278,7 @@ end # @return [text/uri-list] Feature list get '/:id/compounds' do response['Content-Type'] = 'text/uri-list' - YAML.load_file(@yaml_file).compounds.join("\n") + "\n" + OpenTox::Dataset.from_json(File.read(@json_file)).compounds.join("\n") + "\n" end # Create a new dataset. @@ -280,22 +295,22 @@ end # @param [optional] file, for file uploads, Content-type should be multipart/form-data, please specify the file type `application/rdf+xml, application-x-yaml, text/csv, application/ms-excel` # @return [text/uri-list] Task URI or Dataset URI (empty datasets) post '/?' do - + response['Content-Type'] = 'text/uri-list' # it could be that the read function works only once!, store in varible input_data = request.env["rack.input"].read @id = next_id @uri = uri @id - @yaml_file = "#{@@datadir}/#{@id}.yaml" + @json_file = "#{@@datadir}/#{@id}.json" if params.size == 0 and input_data.size==0 - File.open(@yaml_file,"w+"){|f| f.puts OpenTox::Dataset.new(@uri).to_yaml} - OpenTox::Authorization.check_policy(@uri, @subjectid) if File.exists? @yaml_file + File.open(@json_file,"w+"){|f| f.puts OpenTox::Dataset.new(@uri).to_json} + OpenTox::Authorization.check_policy(@uri, @subjectid) if File.exists? @json_file @uri else task = OpenTox::Task.create("Converting and saving dataset ", @uri) do load_dataset @id, params, request.content_type, input_data - OpenTox::Authorization.check_policy(@uri, @subjectid) if File.exists? @yaml_file + OpenTox::Authorization.check_policy(@uri, @subjectid) if File.exists? @json_file @uri end raise OpenTox::ServiceUnavailableError.newtask.uri+"\n" if task.status == "Cancelled" @@ -315,7 +330,6 @@ end # @param [optional] file, for file uploads, Content-type should be multipart/form-data, please specify the file type `application/rdf+xml, application-x-yaml, text/csv, application/ms-excel` # @return [text/uri-list] Task ID post '/:id' do - LOGGER.debug @uri response['Content-Type'] = 'text/uri-list' task = OpenTox::Task.create("Converting and saving dataset ", @uri) do FileUtils.rm Dir["#{@@datadir}/#{@id}.*"] @@ -332,7 +346,7 @@ delete '/:id' do LOGGER.debug "deleting dataset with id #{@id}" begin FileUtils.rm Dir["#{@@datadir}/#{@id}.*"] - if @subjectid and !File.exists? @yaml_file and @uri + if @subjectid and !File.exists? @json_file and @uri begin res = OpenTox::Authorization.delete_policies_from_uri(@uri, @subjectid) LOGGER.debug "Policy deleted for Dataset URI: #{@uri} with result: #{res}" @@ -353,6 +367,7 @@ delete '/?' do FileUtils.rm Dir["#{@@datadir}/*.rdfxml"] FileUtils.rm Dir["#{@@datadir}/*.xls"] FileUtils.rm Dir["#{@@datadir}/*.yaml"] + FileUtils.rm Dir["#{@@datadir}/*.json"] response['Content-Type'] = 'text/plain' "All datasets deleted." end -- cgit v1.2.3