summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Maunz <andreas@maunz.de>2012-04-18 10:56:15 +0200
committerAndreas Maunz <andreas@maunz.de>2012-04-18 10:56:15 +0200
commit3bcc5faba7c72ec9d7efc9a80e1f28ccbe9cc54f (patch)
tree80f533bd14cce1a9b065001d8c772d81b590bfe2
parente50b7083a584e1387d885a594d13320b0013ef55 (diff)
Major fix: CSV download
-rw-r--r--lib/dataset.rb7
-rw-r--r--lib/serializer.rb53
2 files changed, 37 insertions, 23 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index aafa656..c916722 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -339,11 +339,14 @@ module OpenTox
end
# Complete feature values by adding zeroes
- def complete_data_entries
+ # @param [Hash] key: compound, value: duplicate sizes
+ def complete_data_entries(compound_sizes)
all_features = @features.keys
@data_entries.each { |c, e|
(Set.new(all_features.collect)).subtract(Set.new e.keys).to_a.each { |f|
- self.add(c,f,0)
+ compound_sizes[c].times {
+ self.add(c,f,0)
+ }
}
}
end
diff --git a/lib/serializer.rb b/lib/serializer.rb
index d3616ce..4c26329 100644
--- a/lib/serializer.rb
+++ b/lib/serializer.rb
@@ -466,7 +466,6 @@ module OpenTox
have_substructures = features.collect{ |id| dataset.features[id][RDF.type].include? OT.Substructure}.compact.uniq
if have_substructures.size == 1 && have_substructures[0]
features_smarts = features.collect{ |id| "'" + dataset.features[id][OT.smarts] + "'" }
- dataset.complete_data_entries
end
# gather missing features
@@ -478,34 +477,46 @@ module OpenTox
end
}
}
-
features = features - delete_features
- features_smarts && @rows.first << features_smarts || @rows.first << features
+ # detect nr duplicates per compound
+ compound_sizes = {}
+ dataset.compounds.each do |compound|
+ entries=dataset.data_entries[compound]
+ entries.each do |feature, values|
+ compound_sizes[compound] || compound_sizes[compound] = []
+ compound_sizes[compound] << values.size
+ end
+ compound_sizes[compound].uniq!
+ raise "Inappropriate data for CSV export" if compound_sizes[compound].size > 1
+ compound_sizes[compound] = compound_sizes[compound][0] # integer instead of array
+ end
+
+ # substructures: complete data entries with zeroes of appropriate duplicates
+ features_smarts && dataset.complete_data_entries(compound_sizes)
+
+ # get headers
+ features_smarts && @rows.first << features_smarts || @rows.first << features
@rows.first.flatten!
+
dataset.compounds.each do |compound|
entries=dataset.data_entries[compound]
cmpd = Compound.new(compound)
- smiles = cmpd.to_smiles
inchi = URI.encode_www_form_component(cmpd.to_inchi)
- row_container = Array.new
- row = Array.new(@rows.first.size)
- row_container << row
- #row[0] = smiles
- row[0] = inchi
- entries.each do |feature, values|
- i = features.index(feature)+1
- values.each do |value|
- if row_container[0][i]
- #LOGGER.debug "Feature '#{feature}' (nr '#{i}'): '#{value}'"
- row_container << row_container.last.collect
- row_container.last[i] = value
- #LOGGER.debug "RC: #{row_container.to_yaml}"
- else
- row_container.each { |r| r[i] = value }
- end
- end
+
+ # allocate container
+ row_container = Array.new(compound_sizes[compound])
+ (0...row_container.size).each do |i|
+ row_container[i] = Array.new(@rows.first.size)
end
+
+ entries.each { |feature, values|
+ (0...compound_sizes[compound]).each { |i|
+ j = features.index(feature)+1
+ row_container[i][0] = inchi
+ row_container[i][j] = values[i]
+ }
+ }
row_container.each { |r| @rows << r }
end
end