summaryrefslogtreecommitdiff
path: root/lib/dataset.rb
diff options
context:
space:
mode:
authorAndreas Maunz <andreas@maunz.de>2012-10-26 10:17:13 +0200
committerAndreas Maunz <andreas@maunz.de>2012-10-26 10:17:13 +0200
commit66ae34a7f1fcf01767d94f8c11a0ab2842e19112 (patch)
tree174816a201775c4d3ed9aa0e31523a16d8c7a32f /lib/dataset.rb
parent9c8b20a910d316c19d24e79dcf52868b6b8383b7 (diff)
Improved ds read performance (see http://goo.gl/ajKQn)
Diffstat (limited to 'lib/dataset.rb')
-rw-r--r--lib/dataset.rb34
1 files changed, 23 insertions, 11 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 85b942a..286c3cb 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -52,19 +52,31 @@ module OpenTox
f.get
f[RDF.type].include?(RDF::OT.NumericFeature) or f[RDF.type].include?(RDF::OT.Substructure)
}
- @compounds.each_with_index do |compound,i|
- query = RDF::Query.new do
- pattern [:data_entry, RDF::OLO.index, i]
- pattern [:data_entry, RDF::OT.values, :values]
- pattern [:values, RDF::OT.feature, :feature]
- pattern [:feature, RDF::OLO.index, :feature_idx]
- pattern [:values, RDF::OT.value, :value]
+ query = RDF::Query.new do
+ pattern [:data_entry, RDF::OLO.index, :cidx] # compound index: now a free variable
+ pattern [:data_entry, RDF::OT.values, :vals]
+ pattern [:vals, RDF::OT.feature, :f]
+ pattern [:f, RDF::OLO.index, :fidx]
+ pattern [:vals, RDF::OT.value, :val]
+ end
+ clim=(@compounds.size-1)
+ cidx=0
+ fidx=0
+ num=numeric_features[fidx]
+ @data_entries = (Array.new(@compounds.size*@features.size)).each_slice(@features.size).to_a # init to nil
+ query.execute(@rdf).order_by(:fidx, :cidx).each { |entry| # order by feature index as to compute numeric status less frequently
+ val = entry.val.to_s
+ unless val.blank?
+ @data_entries[cidx][fidx] = (num ? val.to_f : val)
end
- values = query.execute(@rdf).sort_by{|s| s.feature_idx}.collect do |s|
- (numeric_features[s.feature_idx] and s.value.to_s != "") ? s.value.to_s.to_f : s.value.to_s
+ if (cidx < clim)
+ cidx+=1
+ else
+ cidx=0
+ fidx+=1
+ num=numeric_features[fidx]
end
- @data_entries << values.collect{|v| v == "" ? nil : v}
- end
+ }
else
query = RDF::Query.new do
pattern [:uri, RDF.type, RDF::OT.Feature]