summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2017-01-10 13:44:43 +0100
committerChristoph Helma <helma@in-silico.ch>2017-01-10 13:44:43 +0100
commitcdab5069ded9490afe81095059e9a407faf864d9 (patch)
tree26a3337e350540d440d1725d9e0ad878f26fec26
parenta5abdd27b8c9b3f1cf65a567bfd9eb17ecc91a72 (diff)
independent_variables stored in GridFS to avoid Mongo database size limit problems
-rw-r--r--lib/compound.rb2
-rw-r--r--lib/model.rb19
-rw-r--r--test/setup.rb6
3 files changed, 18 insertions, 9 deletions
diff --git a/lib/compound.rb b/lib/compound.rb
index 8a1143b..1c308d8 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -209,7 +209,6 @@ module OpenTox
update(:svg_id => $gridfs.insert_one(file))
end
$gridfs.find_one(_id: self.svg_id).data
-
end
# Get png image
@@ -223,7 +222,6 @@ module OpenTox
update(:png_id => $gridfs.insert_one(file))
end
Base64.decode64($gridfs.find_one(_id: self.png_id).data)
-
end
# Get all known compound names. Relies on an external service for name lookups.
diff --git a/lib/model.rb b/lib/model.rb
index 9c4a93f..e5834ae 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -9,6 +9,8 @@ module OpenTox
include Mongoid::Timestamps
store_in collection: "models"
+ attr_writer :independent_variables # store in GridFS to avoid Mongo database size limit problems
+
field :name, type: String
field :creator, type: String, default: __FILE__
field :algorithms, type: Hash, default:{}
@@ -17,7 +19,7 @@ module OpenTox
field :prediction_feature_id, type: BSON::ObjectId
field :dependent_variables, type: Array, default:[]
field :descriptor_ids, type:Array, default:[]
- field :independent_variables, type: Array, default:[]
+ field :independent_variables_id, type: BSON::ObjectId
field :fingerprints, type: Array, default:[]
field :descriptor_weights, type: Array, default:[]
field :descriptor_means, type: Array, default:[]
@@ -119,6 +121,7 @@ module OpenTox
end
descriptor_method = model.algorithms[:descriptors][:method]
+ model.independent_variables = []
case descriptor_method
# parse fingerprints
when "fingerprint"
@@ -179,6 +182,7 @@ module OpenTox
def predict_substance substance
+ @independent_variables = Marshal.load $gridfs.find_one(_id: self.independent_variables_id).data
case algorithms[:similarity][:method]
when /tanimoto/ # binary features
similarity_descriptors = substance.fingerprint algorithms[:descriptors][:type]
@@ -234,7 +238,7 @@ module OpenTox
neighbor_dependent_variables << dependent_variables[i]
independent_variables.each_with_index do |c,j|
neighbor_independent_variables[j] ||= []
- neighbor_independent_variables[j] << independent_variables[j][i]
+ neighbor_independent_variables[j] << @independent_variables[j][i]
end
end
end
@@ -302,6 +306,17 @@ module OpenTox
end
+ def save # store independent_variables in GridFS to avoid Mongo database size limit problems
+ file = Mongo::Grid::File.new(Marshal.dump(@independent_variables), :filename => "#{id}.independent_variables")
+ self.independent_variables_id = $gridfs.insert_one(file)
+ super
+ end
+
+ def independent_variables
+ @independent_variables ||= Marshal.load $gridfs.find_one(_id: self.independent_variables_id).data
+ @independent_variables
+ end
+
def training_dataset
Dataset.find(training_dataset_id)
end
diff --git a/test/setup.rb b/test/setup.rb
index 63b59fb..40c8ebf 100644
--- a/test/setup.rb
+++ b/test/setup.rb
@@ -6,8 +6,4 @@ include OpenTox
TEST_DIR ||= File.expand_path(File.dirname(__FILE__))
DATA_DIR ||= File.join(TEST_DIR,"data")
training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
-unless training_dataset
- Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm")
-end
-#$mongo.database.drop
-#$gridfs = $mongo.database.fs
+Import::Enanomapper.import unless training_dataset