From b55f670feb82dff3c782b4f86bf90ac1212a0361 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 26 Oct 2012 16:11:43 +0200 Subject: Separate libs for sparql and rdf --- lib/dataset.rb | 32 +++++++++-------------------- lib/opentox-client.rb | 2 ++ lib/utils/rdf/dataset.rb | 48 ++++++++++++++++++++++++++++++++++++++++++++ lib/utils/shims/dataset.rb | 38 ----------------------------------- lib/utils/sparql/dataset.rb | 49 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 109 insertions(+), 60 deletions(-) create mode 100644 lib/utils/rdf/dataset.rb create mode 100644 lib/utils/sparql/dataset.rb (limited to 'lib') diff --git a/lib/dataset.rb b/lib/dataset.rb index 286c3cb..e700ad0 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -36,36 +36,22 @@ module OpenTox pattern [:uri, RDF.type, RDF::OT.OrderedDataset] end s=query.execute(@rdf) - if s.first # ordered dataset + + # AM: read ordered dataset from RDF + if s.first @uri = s[0].uri.to_s if force_no_backend_query # AM: must rewrite URI - query = RDF::Query.new do - pattern [:uri, RDF.type, RDF::OT.Compound] - pattern [:uri, RDF::OLO.index, :idx] - end - @compounds = query.execute(@rdf).sort_by{|s| s.idx}.collect{|s| OpenTox::Compound.new s.uri.to_s} - query = RDF::Query.new do - pattern [:uri, RDF.type, RDF::OT.Feature] - pattern [:uri, RDF::OLO.index, :idx] - end - @features = query.execute(@rdf).sort_by{|s| s.idx}.collect{|s| OpenTox::Feature.new(s.uri.to_s)} + @compounds = OpenTox::Dataset.find_compounds_rdf(@rdf) + @features = OpenTox::Dataset.find_features_rdf(@rdf) numeric_features = @features.collect{|f| f.get f[RDF.type].include?(RDF::OT.NumericFeature) or f[RDF.type].include?(RDF::OT.Substructure) } - query = RDF::Query.new do - pattern [:data_entry, RDF::OLO.index, :cidx] # compound index: now a free variable - pattern [:data_entry, RDF::OT.values, :vals] - pattern [:vals, RDF::OT.feature, :f] - pattern [:f, RDF::OLO.index, :fidx] - pattern [:vals, RDF::OT.value, :val] - end + table = OpenTox::Dataset.find_data_entries_rdf(@rdf) clim=(@compounds.size-1) - cidx=0 - fidx=0 + cidx = fidx = 0 num=numeric_features[fidx] @data_entries = (Array.new(@compounds.size*@features.size)).each_slice(@features.size).to_a # init to nil - query.execute(@rdf).order_by(:fidx, :cidx).each { |entry| # order by feature index as to compute numeric status less frequently - val = entry.val.to_s + table.each { |val| unless val.blank? @data_entries[cidx][fidx] = (num ? val.to_f : val) end @@ -77,6 +63,8 @@ module OpenTox num=numeric_features[fidx] end } + + # AM: read unordered dataset from RDF else query = RDF::Query.new do pattern [:uri, RDF.type, RDF::OT.Feature] diff --git a/lib/opentox-client.rb b/lib/opentox-client.rb index 67e0ce7..8b56411 100644 --- a/lib/opentox-client.rb +++ b/lib/opentox-client.rb @@ -40,5 +40,7 @@ FALSE_REGEXP = /^(false|inactive|0|0.0|low tox|deactivating|non-carcinogen|non-m ].each{ |f| require File.join(File.dirname(__FILE__),f) } Dir["#{File.dirname(__FILE__)}/utils/shims/*.rb"].each { |f| require f } # Shims for legacy code +Dir["#{File.dirname(__FILE__)}/utils/sparql/*.rb"].each { |f| require f } # SPARQL code +Dir["#{File.dirname(__FILE__)}/utils/rdf/*.rb"].each { |f| require f } # RDF code Dir["#{File.dirname(__FILE__)}/utils/*.rb"].each { |f| require f } # Utils for Libs diff --git a/lib/utils/rdf/dataset.rb b/lib/utils/rdf/dataset.rb new file mode 100644 index 0000000..5cfb827 --- /dev/null +++ b/lib/utils/rdf/dataset.rb @@ -0,0 +1,48 @@ +=begin +* Name: dataset.rb +* Description: Dataset RDF tools +* Author: Andreas Maunz +* Date: 10/2012 +=end + +module OpenTox + class Dataset + + # Load features via RDF (slow) + # @param [String] Dataset URI + # @return [Array] Features in order + def self.find_features_rdf(rdf) + query = RDF::Query.new do + pattern [:uri, RDF.type, RDF::OT.Feature] + pattern [:uri, RDF::OLO.index, :idx] + end + query.execute(rdf).sort_by{|s| s.idx}.collect{|s| OpenTox::Feature.new(s.uri.to_s)} + end + + # Load compounds via RDF (slow) + # @param [String] Dataset URI + # @return [Array] Compounds in order + def self.find_compounds_rdf(rdf) + query = RDF::Query.new do + pattern [:uri, RDF.type, RDF::OT.Compound] + pattern [:uri, RDF::OLO.index, :idx] + end + query.execute(rdf).sort_by{|s| s.idx}.collect{|s| OpenTox::Compound.new(s.uri.to_s)} + end + + # Load data entries via RDF (slow) + # @param [String] Dataset uri + # @return [Array] Data entries, ordered primarily over rows and secondarily over cols + def self.find_data_entries_rdf(rdf) + query = RDF::Query.new do + pattern [:data_entry, RDF::OLO.index, :cidx] # compound index: now a free variable + pattern [:data_entry, RDF::OT.values, :vals] + pattern [:vals, RDF::OT.feature, :f] + pattern [:f, RDF::OLO.index, :fidx] + pattern [:vals, RDF::OT.value, :val] + end + query.execute(rdf).order_by(:fidx, :cidx).collect { |s| s.val.to_s } + end + + end +end diff --git a/lib/utils/shims/dataset.rb b/lib/utils/shims/dataset.rb index b5faf18..912510c 100644 --- a/lib/utils/shims/dataset.rb +++ b/lib/utils/shims/dataset.rb @@ -23,44 +23,6 @@ module OpenTox end - # Load features via SPARQL (fast) - # @param [String] Dataset URI - # @return [Array] Features in order - def self.find_features(uri) - sparql = "SELECT DISTINCT ?s FROM <#{uri}> WHERE { - ?s <#{RDF.type}> <#{RDF::OT.Feature}> ; - <#{RDF::OLO.index}> ?fidx - } ORDER BY ?fidx" - OpenTox::Backend::FourStore.query(sparql, "text/uri-list").split("\n").collect { |uri| OpenTox::Feature.new uri.strip } - end - - # Load compounds via SPARQL (fast) - # @param [String] Dataset URI - # @return [Array] Compounds in order - def self.find_compounds(uri) - sparql = "SELECT DISTINCT ?compound FROM <#{uri}> WHERE { - ?s <#{RDF.type}> <#{RDF::OT.DataEntry}> ; - <#{RDF::OLO.index}> ?cidx; - <#{RDF::OT.compound}> ?compound - } ORDER BY ?cidx" - OpenTox::Backend::FourStore.query(sparql, "text/uri-list").split("\n").collect { |uri| OpenTox::Compound.new uri.strip } - end - - # Load data entries via SPARQL (fast) - # @param [String] Dataset uri - # @return [Array] Data entries, ordered primarily over rows and secondarily over cols - def self.find_data_entries(uri) - sparql = "SELECT ?value FROM <#{uri}> WHERE { - ?data_entry <#{RDF::OLO.index}> ?cidx ; - <#{RDF::OT.values}> ?v . - ?v <#{RDF::OT.feature}> ?f; - <#{RDF::OT.value}> ?value . - ?f <#{RDF::OLO.index}> ?fidx. - } ORDER BY ?cidx ?fidx" - OpenTox::Backend::FourStore.query(sparql,"text/uri-list").split("\n").collect { |val| val.strip } - end - - ### Index Structures # Create value map diff --git a/lib/utils/sparql/dataset.rb b/lib/utils/sparql/dataset.rb new file mode 100644 index 0000000..e781f08 --- /dev/null +++ b/lib/utils/sparql/dataset.rb @@ -0,0 +1,49 @@ +=begin +* Name: dataset.rb +* Description: Dataset SPARQL tools +* Author: Andreas Maunz +* Date: 10/2012 +=end + +module OpenTox + class Dataset + + # Load features via SPARQL (fast) + # @param [String] Dataset URI + # @return [Array] Features in order + def self.find_features_sparql(uri) + sparql = "SELECT DISTINCT ?s FROM <#{uri}> WHERE { + ?s <#{RDF.type}> <#{RDF::OT.Feature}> ; + <#{RDF::OLO.index}> ?fidx + } ORDER BY ?fidx" + OpenTox::Backend::FourStore.query(sparql, "text/uri-list").split("\n").collect { |uri| OpenTox::Feature.new uri.strip } + end + + # Load compounds via SPARQL (fast) + # @param [String] Dataset URI + # @return [Array] Compounds in order + def self.find_compounds_sparql(uri) + sparql = "SELECT DISTINCT ?compound FROM <#{uri}> WHERE { + ?s <#{RDF.type}> <#{RDF::OT.DataEntry}> ; + <#{RDF::OLO.index}> ?cidx; + <#{RDF::OT.compound}> ?compound + } ORDER BY ?cidx" + OpenTox::Backend::FourStore.query(sparql, "text/uri-list").split("\n").collect { |uri| OpenTox::Compound.new uri.strip } + end + + # Load data entries via SPARQL (fast) + # @param [String] Dataset uri + # @return [Array] Data entries, ordered primarily over rows and secondarily over cols + def self.find_data_entries_sparql(uri) + sparql = "SELECT ?value FROM <#{uri}> WHERE { + ?data_entry <#{RDF::OLO.index}> ?cidx ; + <#{RDF::OT.values}> ?v . + ?v <#{RDF::OT.feature}> ?f; + <#{RDF::OT.value}> ?value . + ?f <#{RDF::OLO.index}> ?fidx. + } ORDER BY ?cidx ?fidx" + OpenTox::Backend::FourStore.query(sparql,"text/uri-list").split("\n").collect { |val| val.strip } + end + + end +end -- cgit v1.2.3