summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Maunz <andreas@maunz.de>2012-10-30 16:02:28 +0100
committerAndreas Maunz <andreas@maunz.de>2012-10-30 16:02:28 +0100
commit931e7f00c8a5df5d5c8ea4ae6d8fdd841ec63c90 (patch)
tree10ecad664abb8a481c96e4ec72bfd03e072f766f
parent19582925f0496e4cb07e71ead8aea1261abf0bc8 (diff)
Loading entries by sparql (mv'd 4store.rb), ordered status via rdf
-rw-r--r--lib/4store.rb122
-rw-r--r--lib/dataset.rb19
-rw-r--r--lib/opentox-client.rb3
-rw-r--r--lib/utils/rdf/dataset.rb15
-rw-r--r--lib/utils/sparql/dataset.rb4
5 files changed, 151 insertions, 12 deletions
diff --git a/lib/4store.rb b/lib/4store.rb
new file mode 100644
index 0000000..3ed081d
--- /dev/null
+++ b/lib/4store.rb
@@ -0,0 +1,122 @@
+module OpenTox
+ module Backend
+ class FourStore
+
+ @@accept_formats = [ "application/rdf+xml", "text/turtle", "text/plain", "text/uri-list", "text/html", 'application/sparql-results+xml' ]
+ @@content_type_formats = [ "application/rdf+xml", "text/turtle", "text/plain" ]
+
+ def self.list mime_type
+ mime_type = "text/html" if mime_type.match(%r{\*/\*})
+ bad_request_error "'#{mime_type}' is not a supported mime type. Please specify one of #{@@accept_formats.join(", ")} in the Accept Header." unless @@accept_formats.include? mime_type
+ if mime_type =~ /uri-list/
+ sparql = "SELECT DISTINCT ?g WHERE {GRAPH ?g {?s <#{RDF.type}> <#{klass}>; ?p ?o. } }"
+ else
+ sparql = "CONSTRUCT {?s ?p ?o.} WHERE {?s <#{RDF.type}> <#{klass}>; ?p ?o. }"
+ end
+ query sparql, mime_type
+ end
+
+ def self.get uri, mime_type
+ mime_type = "text/html" if mime_type.match(%r{\*/\*})
+ bad_request_error "'#{mime_type}' is not a supported mime type. Please specify one of #{@@accept_formats.join(", ")} in the Accept Header." unless @@accept_formats.include? mime_type
+ sparql = "CONSTRUCT {?s ?p ?o.} FROM <#{uri}> WHERE { ?s ?p ?o. }"
+ rdf = query sparql, mime_type
+ resource_not_found_error "#{uri} not found." if rdf.empty?
+ rdf
+ end
+
+ def self.post uri, rdf, mime_type
+ bad_request_error "'#{mime_type}' is not a supported content type. Please use one of #{@@content_type_formats.join(", ")}." unless @@content_type_formats.include? mime_type or mime_type == "multipart/form-data"
+ bad_request_error "Reqest body empty." unless rdf
+ mime_type = "application/x-turtle" if mime_type == "text/plain" # ntriples is turtle in 4store
+ begin
+ RestClient.post File.join(four_store_uri,"data")+"/", :data => rdf, :graph => uri, "mime-type" => mime_type
+ rescue
+ bad_request_error $!.message, File.join(four_store_uri,"data")+"/"
+ end
+ end
+
+ def self.put uri, rdf, mime_type
+ bad_request_error "'#{mime_type}' is not a supported content type. Please use one of #{@@content_type_formats.join(", ")}." unless @@content_type_formats.include? mime_type
+ bad_request_error "Reqest body empty." unless rdf
+ mime_type = "application/x-turtle" if mime_type == "text/plain"
+ #begin
+ RestClientWrapper.put File.join(four_store_uri,"data",uri), rdf, :content_type => mime_type
+ #rescue
+ #bad_request_error $!.message, File.join(four_store_uri,"data",uri)
+ #end
+ end
+
+ def self.delete uri
+ RestClientWrapper.delete data_uri(uri)
+ end
+
+ def self.update sparql
+ RestClient.post(update_uri, :update => sparql )
+ end
+
+ def self.query sparql, mime_type
+ if sparql =~ /SELECT/i
+# return list unless mime_type
+ case mime_type
+ when 'application/sparql-results+xml'
+ RestClient.get(sparql_uri, :params => { :query => sparql }, :accept => mime_type).body
+ when "text/uri-list"
+ RestClient.get(sparql_uri, :params => { :query => sparql }, :accept => "text/plain").body.gsub(/"|<|>/,'').split("\n").drop(1).join("\n")
+ else
+ bad_request_error "#{mime_type} is not a supported mime type for SELECT statements."
+ end
+ elsif sparql =~ /CONSTRUCT/i
+ case mime_type
+ when "text/plain", "application/rdf+xml"
+ RestClient.get(sparql_uri, :params => { :query => sparql }, :accept => mime_type).body
+ when /html|turtle/
+ # TODO: fix and improve
+ nt = RestClient.get(sparql_uri, :params => { :query => sparql }, :accept => "text/plain").body # 4store returns ntriples for turtle
+
+ rdf = RDF::Graph.new
+ RDF::Reader.for(:ntriples).new(nt) do |reader|
+ reader.each_statement { |statement| rdf << statement }
+ end
+ prefixes = {:rdf => "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"}
+ ['OT', 'DC', 'XSD', 'OLO'].each{|p| prefixes[p.downcase.to_sym] = eval("RDF::#{p}.to_s") }
+ # TODO: fails for large datasets?? multi_cell_call
+ turtle = RDF::N3::Writer.for(:turtle).buffer(:prefixes => prefixes) do |writer|
+ rdf.each{|statement| writer << statement}
+ end
+ regex = Regexp.new '(https?:\/\/[\S]+)([>"])'
+ turtle = "<html><body>" + turtle.gsub( regex, '<a href="\1">\1</a>\2' ).gsub(/\n/,'<br/>') + "</body></html>" if mime_type =~ /html/ and !turtle.empty?
+ turtle
+ end
+ else
+ # TODO: check if this prevents SPARQL injections
+ bad_request_error "Only SELECT and CONSTRUCT are accepted SPARQL statements."
+ end
+ rescue
+ bad_request_error $!.message, sparql_uri
+ end
+
+ def self.klass
+ RDF::OT[SERVICE.capitalize]
+ end
+
+ def self.four_store_uri
+ # credentials are removed from uri in error.rb
+ $four_store[:uri].sub(%r{//},"//#{$four_store[:user]}:#{$four_store[:password]}@")
+ end
+
+ def self.sparql_uri
+ File.join(four_store_uri, "sparql") + '/'
+ end
+
+ def self.update_uri
+ File.join(four_store_uri, "update") + '/'
+ end
+
+ def self.data_uri uri
+ File.join(four_store_uri, "data","?graph=#{uri}")
+ end
+
+ end
+ end
+end
diff --git a/lib/dataset.rb b/lib/dataset.rb
index e700ad0..8d135a6 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -28,25 +28,28 @@ module OpenTox
end
def get(force_no_backend_query=false)
- super() unless (force_no_backend_query and @rdf.size>0)
+ have_rdf = (force_no_backend_query and @rdf.size>0)
+ super() unless have_rdf
+ ordered = (have_rdf or OpenTox::Dataset.ordered?(@uri))
@features = []
@compounds = []
@data_entries = []
- query = RDF::Query.new do
- pattern [:uri, RDF.type, RDF::OT.OrderedDataset]
- end
- s=query.execute(@rdf)
# AM: read ordered dataset from RDF
- if s.first
- @uri = s[0].uri.to_s if force_no_backend_query # AM: must rewrite URI
+ if ordered
+ @uri = s[0].uri.to_s if have_rdf # AM: must rewrite URI
@compounds = OpenTox::Dataset.find_compounds_rdf(@rdf)
@features = OpenTox::Dataset.find_features_rdf(@rdf)
numeric_features = @features.collect{|f|
f.get
f[RDF.type].include?(RDF::OT.NumericFeature) or f[RDF.type].include?(RDF::OT.Substructure)
}
- table = OpenTox::Dataset.find_data_entries_rdf(@rdf)
+ if have_rdf
+ table = OpenTox::Dataset.find_data_entries_rdf(@rdf)
+ else
+ values = OpenTox::Dataset.find_data_entries_sparql(@uri)
+ table = values + Array.new(@compounds.size*@features.size-values.size, "")
+ end
clim=(@compounds.size-1)
cidx = fidx = 0
num=numeric_features[fidx]
diff --git a/lib/opentox-client.rb b/lib/opentox-client.rb
index 8b56411..ac7f4e6 100644
--- a/lib/opentox-client.rb
+++ b/lib/opentox-client.rb
@@ -36,7 +36,8 @@ FALSE_REGEXP = /^(false|inactive|0|0.0|low tox|deactivating|non-carcinogen|non-m
"compound.rb",
"dataset.rb",
"model.rb",
- "algorithm.rb"
+ "algorithm.rb",
+ "4store.rb"
].each{ |f| require File.join(File.dirname(__FILE__),f) }
Dir["#{File.dirname(__FILE__)}/utils/shims/*.rb"].each { |f| require f } # Shims for legacy code
diff --git a/lib/utils/rdf/dataset.rb b/lib/utils/rdf/dataset.rb
index b2deeb8..ab720d7 100644
--- a/lib/utils/rdf/dataset.rb
+++ b/lib/utils/rdf/dataset.rb
@@ -32,7 +32,7 @@ module OpenTox
# Load data entries via RDF (slow)
# @param [String] uri Dataset uri
- # @return [Array] entries Data entries, ordered primarily over rows and secondarily over cols
+ # @return [Array] entries Data entries, ordered primarily over cols and secondarily over rows
def self.find_data_entries_rdf(rdf)
query = RDF::Query.new do
pattern [:data_entry, RDF::OLO.index, :cidx] # compound index: now a free variable
@@ -44,5 +44,18 @@ module OpenTox
query.execute(rdf).order_by(:fidx, :cidx).collect { |s| s.val.to_s }
end
+ # Query a dataset URI for ordered status
+ # by loading its metadata (OpenTox compliant)
+ # @param [String] uri Dataset uri
+ # @return [TrueClass,FalseClass] status Whether the dataset is ordered
+ def self.ordered?(uri)
+ ds = OpenTox::Dataset.new # dummy
+ ds.parse_rdfxml(RestClient.get([uri,"metadata"].join("/"),{:accept => "application/rdf+xml"}))
+ query = RDF::Query.new do
+ pattern [:dataset, RDF.type, RDF::OT.OrderedDataset]
+ end
+ query.execute(ds.rdf).size>0
+ end
+
end
end
diff --git a/lib/utils/sparql/dataset.rb b/lib/utils/sparql/dataset.rb
index ecc0321..7ba57ee 100644
--- a/lib/utils/sparql/dataset.rb
+++ b/lib/utils/sparql/dataset.rb
@@ -60,7 +60,7 @@ module OpenTox
# Load data entries via SPARQL (fast)
# @param [String] uri Dataset uri
- # @return [Array] entries Data entries, ordered primarily over rows and secondarily over cols
+ # @return [Array] entries Data entries, ordered primarily over cols and secondarily over rows
def self.find_data_entries_sparql(uri)
sparql = "SELECT ?value FROM <#{uri}> WHERE {
?data_entry <#{RDF::OLO.index}> ?cidx ;
@@ -68,7 +68,7 @@ module OpenTox
?v <#{RDF::OT.feature}> ?f;
<#{RDF::OT.value}> ?value .
?f <#{RDF::OLO.index}> ?fidx.
- } ORDER BY ?cidx ?fidx"
+ } ORDER BY ?fidx ?cidx"
OpenTox::Backend::FourStore.query(sparql,"text/uri-list").split("\n").collect { |val| val.strip }
end