summaryrefslogtreecommitdiff
path: root/lib/dataset.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2015-08-13 18:57:11 +0200
committerChristoph Helma <helma@in-silico.ch>2015-08-13 18:57:11 +0200
commitd0850e2983a219da214a67190fe881c7650f532f (patch)
treea917334a1a70823dc979a27e453b2598e98c8027 /lib/dataset.rb
parent6ab86c253ba0eb79b9e6a20effa2d18626accf2b (diff)
majority of tests working
Diffstat (limited to 'lib/dataset.rb')
-rw-r--r--lib/dataset.rb23
1 files changed, 12 insertions, 11 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 4f6f0b5..8c5ffc0 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -208,30 +208,29 @@ module OpenTox
value_time = 0
# compounds and values
- @data_entries = Array.new(table.size){Array.new(table.first.size-1)}
+ @data_entries = [] #Array.new(table.size){Array.new(table.first.size-1)}
table.each_with_index do |vals,i|
ct = Time.now
identifier = vals.shift
warnings << "No feature values for compound at position #{i+2}." if vals.compact.empty?
begin
- # TODO parse inchi and catch openbabel errors (and segfaults) in compound.rb
case compound_format
when /SMILES/i
compound = OpenTox::Compound.from_smiles(identifier)
- if compound.inchi.empty?
- warnings << "Cannot parse #{compound_format} compound '#{identifier}' at position #{i+2}, all entries are ignored."
- next
- end
when /InChI/i
compound = OpenTox::Compound.from_inchi(identifier)
end
- rescue
+ rescue
+ compound = nil
+ end
+ if compound.nil?
+ # compound parsers may return nil
warnings << "Cannot parse #{compound_format} compound '#{identifier}' at position #{i+2}, all entries are ignored."
next
end
+ # TODO insert empty compounds to keep positions?
compound_time += Time.now-ct
- compound_ids << compound.id
r += 1
unless vals.size == feature_ids.size # way cheaper than accessing features
@@ -239,15 +238,17 @@ module OpenTox
next
end
- cid = compound.id.to_s
+ compound_ids << compound.id
+ @data_entries << Array.new(table.first.size-1)
+
vals.each_with_index do |v,j|
if v.blank?
warnings << "Empty value for compound '#{identifier}' (row #{r+2}) and feature '#{feature_names[j]}' (column #{j+2})."
next
elsif numeric[j]
- @data_entries[i][j] = v.to_f
+ @data_entries.last[j] = v.to_f
else
- @data_entries[i][j] = v.strip
+ @data_entries.last[j] = v.strip
end
end
end