diff options
author | Andreas Maunz <andreas@maunz.de> | 2012-04-18 10:56:15 +0200 |
---|---|---|
committer | Andreas Maunz <andreas@maunz.de> | 2012-04-18 10:56:15 +0200 |
commit | 3bcc5faba7c72ec9d7efc9a80e1f28ccbe9cc54f (patch) | |
tree | 80f533bd14cce1a9b065001d8c772d81b590bfe2 | |
parent | e50b7083a584e1387d885a594d13320b0013ef55 (diff) |
Major fix: CSV download
-rw-r--r-- | lib/dataset.rb | 7 | ||||
-rw-r--r-- | lib/serializer.rb | 53 |
2 files changed, 37 insertions, 23 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb index aafa656..c916722 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -339,11 +339,14 @@ module OpenTox end # Complete feature values by adding zeroes - def complete_data_entries + # @param [Hash] key: compound, value: duplicate sizes + def complete_data_entries(compound_sizes) all_features = @features.keys @data_entries.each { |c, e| (Set.new(all_features.collect)).subtract(Set.new e.keys).to_a.each { |f| - self.add(c,f,0) + compound_sizes[c].times { + self.add(c,f,0) + } } } end diff --git a/lib/serializer.rb b/lib/serializer.rb index d3616ce..4c26329 100644 --- a/lib/serializer.rb +++ b/lib/serializer.rb @@ -466,7 +466,6 @@ module OpenTox have_substructures = features.collect{ |id| dataset.features[id][RDF.type].include? OT.Substructure}.compact.uniq if have_substructures.size == 1 && have_substructures[0] features_smarts = features.collect{ |id| "'" + dataset.features[id][OT.smarts] + "'" } - dataset.complete_data_entries end # gather missing features @@ -478,34 +477,46 @@ module OpenTox end } } - features = features - delete_features - features_smarts && @rows.first << features_smarts || @rows.first << features + # detect nr duplicates per compound + compound_sizes = {} + dataset.compounds.each do |compound| + entries=dataset.data_entries[compound] + entries.each do |feature, values| + compound_sizes[compound] || compound_sizes[compound] = [] + compound_sizes[compound] << values.size + end + compound_sizes[compound].uniq! + raise "Inappropriate data for CSV export" if compound_sizes[compound].size > 1 + compound_sizes[compound] = compound_sizes[compound][0] # integer instead of array + end + + # substructures: complete data entries with zeroes of appropriate duplicates + features_smarts && dataset.complete_data_entries(compound_sizes) + + # get headers + features_smarts && @rows.first << features_smarts || @rows.first << features @rows.first.flatten! + dataset.compounds.each do |compound| entries=dataset.data_entries[compound] cmpd = Compound.new(compound) - smiles = cmpd.to_smiles inchi = URI.encode_www_form_component(cmpd.to_inchi) - row_container = Array.new - row = Array.new(@rows.first.size) - row_container << row - #row[0] = smiles - row[0] = inchi - entries.each do |feature, values| - i = features.index(feature)+1 - values.each do |value| - if row_container[0][i] - #LOGGER.debug "Feature '#{feature}' (nr '#{i}'): '#{value}'" - row_container << row_container.last.collect - row_container.last[i] = value - #LOGGER.debug "RC: #{row_container.to_yaml}" - else - row_container.each { |r| r[i] = value } - end - end + + # allocate container + row_container = Array.new(compound_sizes[compound]) + (0...row_container.size).each do |i| + row_container[i] = Array.new(@rows.first.size) end + + entries.each { |feature, values| + (0...compound_sizes[compound]).each { |i| + j = features.index(feature)+1 + row_container[i][0] = inchi + row_container[i][j] = values[i] + } + } row_container.each { |r| @rows << r } end end |