summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgebele <gebele@in-silico.ch>2017-05-04 09:59:37 +0000
committergebele <gebele@in-silico.ch>2017-05-04 09:59:37 +0000
commit658e0f706622eabce6900134fb1d968b440fd704 (patch)
treedff22dfe3c3573fd4c9cf0d942678249cde95ab6
parentd4d914052de0a37489f9491dbe491093bd14a03a (diff)
cleanup and hack for mongos file size limit
-rw-r--r--lib/model.rb1
-rw-r--r--lib/train-test-validation.rb2
2 files changed, 2 insertions, 1 deletions
diff --git a/lib/model.rb b/lib/model.rb
index 80affd5..9c5c19b 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -145,7 +145,6 @@ module OpenTox
end
model.descriptor_ids = model.fingerprints.flatten.uniq
model.descriptor_ids.each do |d|
- # resulting model may break BSON size limit (e.g. f Kazius dataset)
model.independent_variables << model.substance_ids.collect_with_index{|s,i| model.fingerprints[i].include? d} if model.algorithms[:prediction][:method].match /Caret/
end
# calculate physchem properties
diff --git a/lib/train-test-validation.rb b/lib/train-test-validation.rb
index 034ae3a..9a5532d 100644
--- a/lib/train-test-validation.rb
+++ b/lib/train-test-validation.rb
@@ -27,6 +27,8 @@ module OpenTox
end
end
predictions.select!{|cid,p| p[:value] and p[:measurements]}
+ # hack to avoid mongos file size limit error on large datasets
+ #predictions.each{|cid,p| p[:neighbors] = []} if model.training_dataset.name.match(/mutagenicity/i)
validation = self.new(
:model_id => validation_model.id,
:test_dataset_id => test_set.id,