From cdab5069ded9490afe81095059e9a407faf864d9 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Tue, 10 Jan 2017 13:44:43 +0100 Subject: independent_variables stored in GridFS to avoid Mongo database size limit problems --- lib/compound.rb | 2 -- lib/model.rb | 19 +++++++++++++++++-- test/setup.rb | 6 +----- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/lib/compound.rb b/lib/compound.rb index 8a1143b..1c308d8 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -209,7 +209,6 @@ module OpenTox update(:svg_id => $gridfs.insert_one(file)) end $gridfs.find_one(_id: self.svg_id).data - end # Get png image @@ -223,7 +222,6 @@ module OpenTox update(:png_id => $gridfs.insert_one(file)) end Base64.decode64($gridfs.find_one(_id: self.png_id).data) - end # Get all known compound names. Relies on an external service for name lookups. diff --git a/lib/model.rb b/lib/model.rb index 9c4a93f..e5834ae 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -9,6 +9,8 @@ module OpenTox include Mongoid::Timestamps store_in collection: "models" + attr_writer :independent_variables # store in GridFS to avoid Mongo database size limit problems + field :name, type: String field :creator, type: String, default: __FILE__ field :algorithms, type: Hash, default:{} @@ -17,7 +19,7 @@ module OpenTox field :prediction_feature_id, type: BSON::ObjectId field :dependent_variables, type: Array, default:[] field :descriptor_ids, type:Array, default:[] - field :independent_variables, type: Array, default:[] + field :independent_variables_id, type: BSON::ObjectId field :fingerprints, type: Array, default:[] field :descriptor_weights, type: Array, default:[] field :descriptor_means, type: Array, default:[] @@ -119,6 +121,7 @@ module OpenTox end descriptor_method = model.algorithms[:descriptors][:method] + model.independent_variables = [] case descriptor_method # parse fingerprints when "fingerprint" @@ -179,6 +182,7 @@ module OpenTox def predict_substance substance + @independent_variables = Marshal.load $gridfs.find_one(_id: self.independent_variables_id).data case algorithms[:similarity][:method] when /tanimoto/ # binary features similarity_descriptors = substance.fingerprint algorithms[:descriptors][:type] @@ -234,7 +238,7 @@ module OpenTox neighbor_dependent_variables << dependent_variables[i] independent_variables.each_with_index do |c,j| neighbor_independent_variables[j] ||= [] - neighbor_independent_variables[j] << independent_variables[j][i] + neighbor_independent_variables[j] << @independent_variables[j][i] end end end @@ -302,6 +306,17 @@ module OpenTox end + def save # store independent_variables in GridFS to avoid Mongo database size limit problems + file = Mongo::Grid::File.new(Marshal.dump(@independent_variables), :filename => "#{id}.independent_variables") + self.independent_variables_id = $gridfs.insert_one(file) + super + end + + def independent_variables + @independent_variables ||= Marshal.load $gridfs.find_one(_id: self.independent_variables_id).data + @independent_variables + end + def training_dataset Dataset.find(training_dataset_id) end diff --git a/test/setup.rb b/test/setup.rb index 63b59fb..40c8ebf 100644 --- a/test/setup.rb +++ b/test/setup.rb @@ -6,8 +6,4 @@ include OpenTox TEST_DIR ||= File.expand_path(File.dirname(__FILE__)) DATA_DIR ||= File.join(TEST_DIR,"data") training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first -unless training_dataset - Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm") -end -#$mongo.database.drop -#$gridfs = $mongo.database.fs +Import::Enanomapper.import unless training_dataset -- cgit v1.2.3