Major fix: CSV download

author: Andreas Maunz <andreas@maunz.de> 2012-04-18 10:56:15 +0200
committer: Andreas Maunz <andreas@maunz.de> 2012-04-18 10:56:15 +0200
commit: 3bcc5faba7c72ec9d7efc9a80e1f28ccbe9cc54f (patch)
tree: 80f533bd14cce1a9b065001d8c772d81b590bfe2
parent: e50b7083a584e1387d885a594d13320b0013ef55 (diff)
2 files changed, 37 insertions, 23 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index aafa656..c916722 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -339,11 +339,14 @@ module OpenTox
     end
 
     # Complete feature values by adding zeroes
-    def complete_data_entries
+    # @param [Hash] key: compound, value: duplicate sizes
+    def complete_data_entries(compound_sizes)
       all_features = @features.keys
       @data_entries.each { |c, e|
         (Set.new(all_features.collect)).subtract(Set.new e.keys).to_a.each { |f|
-          self.add(c,f,0)
+          compound_sizes[c].times { 
+            self.add(c,f,0) 
+          }
         }
       }
     end
diff --git a/lib/serializer.rb b/lib/serializer.rb
index d3616ce..4c26329 100644
--- a/lib/serializer.rb
+++ b/lib/serializer.rb
@@ -466,7 +466,6 @@ module OpenTox
         have_substructures = features.collect{ |id| dataset.features[id][RDF.type].include? OT.Substructure}.compact.uniq
         if have_substructures.size == 1 && have_substructures[0] 
           features_smarts = features.collect{ |id| "'" + dataset.features[id][OT.smarts] + "'" }
-          dataset.complete_data_entries
         end
       
         # gather missing features
@@ -478,34 +477,46 @@ module OpenTox
             end
           }
         }
-
         features = features - delete_features
-        features_smarts && @rows.first << features_smarts || @rows.first << features
 
+        # detect nr duplicates per compound
+        compound_sizes = {}
+        dataset.compounds.each do |compound|
+          entries=dataset.data_entries[compound]
+          entries.each do |feature, values|
+            compound_sizes[compound] || compound_sizes[compound] = []
+            compound_sizes[compound] << values.size
+          end
+          compound_sizes[compound].uniq!
+          raise "Inappropriate data for CSV export" if compound_sizes[compound].size > 1
+          compound_sizes[compound] = compound_sizes[compound][0] # integer instead of array
+        end
+ 
+        # substructures: complete data entries with zeroes of appropriate duplicates
+        features_smarts && dataset.complete_data_entries(compound_sizes)
+
+        # get headers
+        features_smarts && @rows.first << features_smarts || @rows.first << features
         @rows.first.flatten!
+
         dataset.compounds.each do |compound|
           entries=dataset.data_entries[compound]
           cmpd = Compound.new(compound)
-          smiles = cmpd.to_smiles
           inchi = URI.encode_www_form_component(cmpd.to_inchi)
-          row_container = Array.new
-          row = Array.new(@rows.first.size)
-          row_container << row
-          #row[0] = smiles
-          row[0] = inchi
-          entries.each do |feature, values|
-            i = features.index(feature)+1
-            values.each do |value|
-              if row_container[0][i]
-                #LOGGER.debug "Feature '#{feature}' (nr '#{i}'): '#{value}'"
-                row_container << row_container.last.collect
-                row_container.last[i] = value
-                #LOGGER.debug "RC: #{row_container.to_yaml}"
-              else
-                row_container.each { |r| r[i] = value }
-              end
-            end
+
+          # allocate container
+          row_container = Array.new(compound_sizes[compound])
+          (0...row_container.size).each do |i|
+            row_container[i] = Array.new(@rows.first.size)
           end
+
+          entries.each { |feature, values|
+            (0...compound_sizes[compound]).each { |i|
+              j = features.index(feature)+1
+              row_container[i][0] = inchi
+              row_container[i][j] = values[i]
+            }
+          }
           row_container.each { |r| @rows << r }
         end
       end
author	Andreas Maunz <andreas@maunz.de>	2012-04-18 10:56:15 +0200
committer	Andreas Maunz <andreas@maunz.de>	2012-04-18 10:56:15 +0200
commit	3bcc5faba7c72ec9d7efc9a80e1f28ccbe9cc54f (patch)
tree	80f533bd14cce1a9b065001d8c772d81b590bfe2
parent	e50b7083a584e1387d885a594d13320b0013ef55 (diff)