diff options
author | Christoph Helma <helma@in-silico.ch> | 2015-11-04 17:50:17 +0100 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2015-11-04 17:50:17 +0100 |
commit | ca2bb0f90335b1f2c4ecc28ee423e85b281ffcf0 (patch) | |
tree | 71f823d08e0799b8ebb59713f06f646888135cd7 /lib/dataset.rb | |
parent | 61fda66b5bc86e600b27f9a2c2eaea97603fbb92 (diff) |
neighbor search delegated to database backend
Diffstat (limited to 'lib/dataset.rb')
-rw-r--r-- | lib/dataset.rb | 21 |
1 files changed, 17 insertions, 4 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb index d989bdf..af116a9 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -105,10 +105,18 @@ module OpenTox test_cids = test_idxs.collect{|i| self.compound_ids[i]} test_data_entries = test_idxs.collect{|i| self.data_entries[i]} test_dataset = self.class.new(:compound_ids => test_cids, :feature_ids => self.feature_ids, :data_entries => test_data_entries) + test_dataset.compounds.each do |compound| + compound.dataset_ids << test_dataset.id + compound.save + end training_idxs = indices-test_idxs training_cids = training_idxs.collect{|i| self.compound_ids[i]} training_data_entries = training_idxs.collect{|i| self.data_entries[i]} training_dataset = self.class.new(:compound_ids => training_cids, :feature_ids => self.feature_ids, :data_entries => training_data_entries) + training_dataset.compounds.each do |compound| + compound.dataset_ids << training_dataset.id + compound.save + end test_dataset.save_all training_dataset.save_all chunks << [training_dataset,test_dataset] @@ -229,7 +237,7 @@ module OpenTox table.each_with_index do |vals,i| ct = Time.now - identifier = vals.shift + identifier = vals.shift.strip warnings << "No feature values for compound at position #{i+2}." if vals.compact.empty? begin case compound_format @@ -246,7 +254,7 @@ module OpenTox warnings << "Cannot parse #{compound_format} compound '#{identifier}' at position #{i+2}, all entries are ignored." next end - # TODO insert empty compounds to keep positions? + compound.dataset_ids << self.id unless compound.dataset_ids.include? self.id compound_time += Time.now-ct r += 1 @@ -263,10 +271,15 @@ module OpenTox warnings << "Empty value for compound '#{identifier}' (row #{r+2}) and feature '#{feature_names[j]}' (column #{j+2})." next elsif numeric[j] - self.data_entries.last[j] = v.to_f + v = v.to_f else - self.data_entries.last[j] = v.strip + v = v.strip end + self.data_entries.last[j] = v + #i = compound.feature_ids.index feature_ids[j] + compound.features[feature_ids[j].to_s] ||= [] + compound.features[feature_ids[j].to_s] << v + compound.save end end compounds.duplicates.each do |compound| |