From 931e7f00c8a5df5d5c8ea4ae6d8fdd841ec63c90 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Tue, 30 Oct 2012 16:02:28 +0100 Subject: Loading entries by sparql (mv'd 4store.rb), ordered status via rdf --- lib/4store.rb | 122 ++++++++++++++++++++++++++++++++++++++++++++ lib/dataset.rb | 19 ++++--- lib/opentox-client.rb | 3 +- lib/utils/rdf/dataset.rb | 15 +++++- lib/utils/sparql/dataset.rb | 4 +- 5 files changed, 151 insertions(+), 12 deletions(-) create mode 100644 lib/4store.rb diff --git a/lib/4store.rb b/lib/4store.rb new file mode 100644 index 0000000..3ed081d --- /dev/null +++ b/lib/4store.rb @@ -0,0 +1,122 @@ +module OpenTox + module Backend + class FourStore + + @@accept_formats = [ "application/rdf+xml", "text/turtle", "text/plain", "text/uri-list", "text/html", 'application/sparql-results+xml' ] + @@content_type_formats = [ "application/rdf+xml", "text/turtle", "text/plain" ] + + def self.list mime_type + mime_type = "text/html" if mime_type.match(%r{\*/\*}) + bad_request_error "'#{mime_type}' is not a supported mime type. Please specify one of #{@@accept_formats.join(", ")} in the Accept Header." unless @@accept_formats.include? mime_type + if mime_type =~ /uri-list/ + sparql = "SELECT DISTINCT ?g WHERE {GRAPH ?g {?s <#{RDF.type}> <#{klass}>; ?p ?o. } }" + else + sparql = "CONSTRUCT {?s ?p ?o.} WHERE {?s <#{RDF.type}> <#{klass}>; ?p ?o. }" + end + query sparql, mime_type + end + + def self.get uri, mime_type + mime_type = "text/html" if mime_type.match(%r{\*/\*}) + bad_request_error "'#{mime_type}' is not a supported mime type. Please specify one of #{@@accept_formats.join(", ")} in the Accept Header." unless @@accept_formats.include? mime_type + sparql = "CONSTRUCT {?s ?p ?o.} FROM <#{uri}> WHERE { ?s ?p ?o. }" + rdf = query sparql, mime_type + resource_not_found_error "#{uri} not found." if rdf.empty? + rdf + end + + def self.post uri, rdf, mime_type + bad_request_error "'#{mime_type}' is not a supported content type. Please use one of #{@@content_type_formats.join(", ")}." unless @@content_type_formats.include? mime_type or mime_type == "multipart/form-data" + bad_request_error "Reqest body empty." unless rdf + mime_type = "application/x-turtle" if mime_type == "text/plain" # ntriples is turtle in 4store + begin + RestClient.post File.join(four_store_uri,"data")+"/", :data => rdf, :graph => uri, "mime-type" => mime_type + rescue + bad_request_error $!.message, File.join(four_store_uri,"data")+"/" + end + end + + def self.put uri, rdf, mime_type + bad_request_error "'#{mime_type}' is not a supported content type. Please use one of #{@@content_type_formats.join(", ")}." unless @@content_type_formats.include? mime_type + bad_request_error "Reqest body empty." unless rdf + mime_type = "application/x-turtle" if mime_type == "text/plain" + #begin + RestClientWrapper.put File.join(four_store_uri,"data",uri), rdf, :content_type => mime_type + #rescue + #bad_request_error $!.message, File.join(four_store_uri,"data",uri) + #end + end + + def self.delete uri + RestClientWrapper.delete data_uri(uri) + end + + def self.update sparql + RestClient.post(update_uri, :update => sparql ) + end + + def self.query sparql, mime_type + if sparql =~ /SELECT/i +# return list unless mime_type + case mime_type + when 'application/sparql-results+xml' + RestClient.get(sparql_uri, :params => { :query => sparql }, :accept => mime_type).body + when "text/uri-list" + RestClient.get(sparql_uri, :params => { :query => sparql }, :accept => "text/plain").body.gsub(/"|<|>/,'').split("\n").drop(1).join("\n") + else + bad_request_error "#{mime_type} is not a supported mime type for SELECT statements." + end + elsif sparql =~ /CONSTRUCT/i + case mime_type + when "text/plain", "application/rdf+xml" + RestClient.get(sparql_uri, :params => { :query => sparql }, :accept => mime_type).body + when /html|turtle/ + # TODO: fix and improve + nt = RestClient.get(sparql_uri, :params => { :query => sparql }, :accept => "text/plain").body # 4store returns ntriples for turtle + + rdf = RDF::Graph.new + RDF::Reader.for(:ntriples).new(nt) do |reader| + reader.each_statement { |statement| rdf << statement } + end + prefixes = {:rdf => "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"} + ['OT', 'DC', 'XSD', 'OLO'].each{|p| prefixes[p.downcase.to_sym] = eval("RDF::#{p}.to_s") } + # TODO: fails for large datasets?? multi_cell_call + turtle = RDF::N3::Writer.for(:turtle).buffer(:prefixes => prefixes) do |writer| + rdf.each{|statement| writer << statement} + end + regex = Regexp.new '(https?:\/\/[\S]+)([>"])' + turtle = "" + turtle.gsub( regex, '\1\2' ).gsub(/\n/,'
') + "" if mime_type =~ /html/ and !turtle.empty? + turtle + end + else + # TODO: check if this prevents SPARQL injections + bad_request_error "Only SELECT and CONSTRUCT are accepted SPARQL statements." + end + rescue + bad_request_error $!.message, sparql_uri + end + + def self.klass + RDF::OT[SERVICE.capitalize] + end + + def self.four_store_uri + # credentials are removed from uri in error.rb + $four_store[:uri].sub(%r{//},"//#{$four_store[:user]}:#{$four_store[:password]}@") + end + + def self.sparql_uri + File.join(four_store_uri, "sparql") + '/' + end + + def self.update_uri + File.join(four_store_uri, "update") + '/' + end + + def self.data_uri uri + File.join(four_store_uri, "data","?graph=#{uri}") + end + + end + end +end diff --git a/lib/dataset.rb b/lib/dataset.rb index e700ad0..8d135a6 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -28,25 +28,28 @@ module OpenTox end def get(force_no_backend_query=false) - super() unless (force_no_backend_query and @rdf.size>0) + have_rdf = (force_no_backend_query and @rdf.size>0) + super() unless have_rdf + ordered = (have_rdf or OpenTox::Dataset.ordered?(@uri)) @features = [] @compounds = [] @data_entries = [] - query = RDF::Query.new do - pattern [:uri, RDF.type, RDF::OT.OrderedDataset] - end - s=query.execute(@rdf) # AM: read ordered dataset from RDF - if s.first - @uri = s[0].uri.to_s if force_no_backend_query # AM: must rewrite URI + if ordered + @uri = s[0].uri.to_s if have_rdf # AM: must rewrite URI @compounds = OpenTox::Dataset.find_compounds_rdf(@rdf) @features = OpenTox::Dataset.find_features_rdf(@rdf) numeric_features = @features.collect{|f| f.get f[RDF.type].include?(RDF::OT.NumericFeature) or f[RDF.type].include?(RDF::OT.Substructure) } - table = OpenTox::Dataset.find_data_entries_rdf(@rdf) + if have_rdf + table = OpenTox::Dataset.find_data_entries_rdf(@rdf) + else + values = OpenTox::Dataset.find_data_entries_sparql(@uri) + table = values + Array.new(@compounds.size*@features.size-values.size, "") + end clim=(@compounds.size-1) cidx = fidx = 0 num=numeric_features[fidx] diff --git a/lib/opentox-client.rb b/lib/opentox-client.rb index 8b56411..ac7f4e6 100644 --- a/lib/opentox-client.rb +++ b/lib/opentox-client.rb @@ -36,7 +36,8 @@ FALSE_REGEXP = /^(false|inactive|0|0.0|low tox|deactivating|non-carcinogen|non-m "compound.rb", "dataset.rb", "model.rb", - "algorithm.rb" + "algorithm.rb", + "4store.rb" ].each{ |f| require File.join(File.dirname(__FILE__),f) } Dir["#{File.dirname(__FILE__)}/utils/shims/*.rb"].each { |f| require f } # Shims for legacy code diff --git a/lib/utils/rdf/dataset.rb b/lib/utils/rdf/dataset.rb index b2deeb8..ab720d7 100644 --- a/lib/utils/rdf/dataset.rb +++ b/lib/utils/rdf/dataset.rb @@ -32,7 +32,7 @@ module OpenTox # Load data entries via RDF (slow) # @param [String] uri Dataset uri - # @return [Array] entries Data entries, ordered primarily over rows and secondarily over cols + # @return [Array] entries Data entries, ordered primarily over cols and secondarily over rows def self.find_data_entries_rdf(rdf) query = RDF::Query.new do pattern [:data_entry, RDF::OLO.index, :cidx] # compound index: now a free variable @@ -44,5 +44,18 @@ module OpenTox query.execute(rdf).order_by(:fidx, :cidx).collect { |s| s.val.to_s } end + # Query a dataset URI for ordered status + # by loading its metadata (OpenTox compliant) + # @param [String] uri Dataset uri + # @return [TrueClass,FalseClass] status Whether the dataset is ordered + def self.ordered?(uri) + ds = OpenTox::Dataset.new # dummy + ds.parse_rdfxml(RestClient.get([uri,"metadata"].join("/"),{:accept => "application/rdf+xml"})) + query = RDF::Query.new do + pattern [:dataset, RDF.type, RDF::OT.OrderedDataset] + end + query.execute(ds.rdf).size>0 + end + end end diff --git a/lib/utils/sparql/dataset.rb b/lib/utils/sparql/dataset.rb index ecc0321..7ba57ee 100644 --- a/lib/utils/sparql/dataset.rb +++ b/lib/utils/sparql/dataset.rb @@ -60,7 +60,7 @@ module OpenTox # Load data entries via SPARQL (fast) # @param [String] uri Dataset uri - # @return [Array] entries Data entries, ordered primarily over rows and secondarily over cols + # @return [Array] entries Data entries, ordered primarily over cols and secondarily over rows def self.find_data_entries_sparql(uri) sparql = "SELECT ?value FROM <#{uri}> WHERE { ?data_entry <#{RDF::OLO.index}> ?cidx ; @@ -68,7 +68,7 @@ module OpenTox ?v <#{RDF::OT.feature}> ?f; <#{RDF::OT.value}> ?value . ?f <#{RDF::OLO.index}> ?fidx. - } ORDER BY ?cidx ?fidx" + } ORDER BY ?fidx ?cidx" OpenTox::Backend::FourStore.query(sparql,"text/uri-list").split("\n").collect { |val| val.strip } end -- cgit v1.2.3