From 66ae34a7f1fcf01767d94f8c11a0ab2842e19112 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Fri, 26 Oct 2012 10:17:13 +0200 Subject: Improved ds read performance (see http://goo.gl/ajKQn) --- lib/dataset.rb | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/dataset.rb b/lib/dataset.rb index 85b942a..286c3cb 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -52,19 +52,31 @@ module OpenTox f.get f[RDF.type].include?(RDF::OT.NumericFeature) or f[RDF.type].include?(RDF::OT.Substructure) } - @compounds.each_with_index do |compound,i| - query = RDF::Query.new do - pattern [:data_entry, RDF::OLO.index, i] - pattern [:data_entry, RDF::OT.values, :values] - pattern [:values, RDF::OT.feature, :feature] - pattern [:feature, RDF::OLO.index, :feature_idx] - pattern [:values, RDF::OT.value, :value] + query = RDF::Query.new do + pattern [:data_entry, RDF::OLO.index, :cidx] # compound index: now a free variable + pattern [:data_entry, RDF::OT.values, :vals] + pattern [:vals, RDF::OT.feature, :f] + pattern [:f, RDF::OLO.index, :fidx] + pattern [:vals, RDF::OT.value, :val] + end + clim=(@compounds.size-1) + cidx=0 + fidx=0 + num=numeric_features[fidx] + @data_entries = (Array.new(@compounds.size*@features.size)).each_slice(@features.size).to_a # init to nil + query.execute(@rdf).order_by(:fidx, :cidx).each { |entry| # order by feature index as to compute numeric status less frequently + val = entry.val.to_s + unless val.blank? + @data_entries[cidx][fidx] = (num ? val.to_f : val) end - values = query.execute(@rdf).sort_by{|s| s.feature_idx}.collect do |s| - (numeric_features[s.feature_idx] and s.value.to_s != "") ? s.value.to_s.to_f : s.value.to_s + if (cidx < clim) + cidx+=1 + else + cidx=0 + fidx+=1 + num=numeric_features[fidx] end - @data_entries << values.collect{|v| v == "" ? nil : v} - end + } else query = RDF::Query.new do pattern [:uri, RDF.type, RDF::OT.Feature] -- cgit v1.2.3