summaryrefslogtreecommitdiff
path: root/lib/dataset.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2015-08-26 14:20:23 +0200
committerChristoph Helma <helma@in-silico.ch>2015-08-26 14:20:23 +0200
commitd542e9fe92567c54423f39904111bd5293236416 (patch)
tree68d04fe73e7012a2732a15703b25f5934c7e7dad /lib/dataset.rb
parentf8faf510b4574df1a00fa61a9f0a1681fc2f4857 (diff)
Parallel Crossvalidations
Diffstat (limited to 'lib/dataset.rb')
-rw-r--r--lib/dataset.rb18
1 files changed, 11 insertions, 7 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index b3f5392..979753c 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -12,7 +12,6 @@ module OpenTox
field :compound_ids, type: Array, default: []
field :data_entries_id, type: BSON::ObjectId#, default: []
field :source, type: String
- field :warnings, type: Array, default: []
# Save all data including data_entries
# Should be used instead of save
@@ -21,7 +20,6 @@ module OpenTox
file = Mongo::Grid::File.new(dump, :filename => "#{self.id.to_s}.data_entries")
entries_id = $gridfs.insert_one(file)
update(:data_entries_id => entries_id)
- #save
end
# Readers
@@ -50,7 +48,7 @@ module OpenTox
bad_request_error "Data entries (#{data_entries_id}) are not a 2D-Array" unless @data_entries.is_a? Array and @data_entries.first.is_a? Array
bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries.size} rows, but dataset (#{id}) has #{compound_ids.size} compounds" unless @data_entries.size == compound_ids.size
bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries.first.size} columns, but dataset (#{id}) has #{feature_ids.size} features" unless @data_entries.first.size == feature_ids.size
- $logger.debug "Retrieving data: #{Time.now-t}"
+ #$logger.debug "Retrieving data: #{Time.now-t}"
end
end
@data_entries
@@ -149,11 +147,17 @@ module OpenTox
# Create a dataset from CSV file
# TODO: document structure
def self.from_csv_file file, source=nil, bioassay=true
- $logger.debug "Parsing #{file}."
source ||= file
- table = CSV.read file, :skip_blanks => true
- dataset = self.new(:source => source, :name => File.basename(file,".*"))
- dataset.parse_table table, bioassay
+ name = File.basename(file,".*")
+ dataset = self.find_by(:source => source, :name => name)
+ if dataset
+ $logger.debug "#{file} already in database."
+ else
+ $logger.debug "Parsing #{file}."
+ table = CSV.read file, :skip_blanks => true
+ dataset = self.new(:source => source, :name => name)
+ dataset.parse_table table, bioassay
+ end
dataset
end