summaryrefslogtreecommitdiff
path: root/lib/train-test-validation.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2016-05-31 18:08:08 +0200
committerChristoph Helma <helma@in-silico.ch>2016-05-31 18:08:08 +0200
commitb515a0cfedb887a2af753db6e4a08ae1af430cad (patch)
tree5d69d89d0031d581e932272aeb741ee38a0106d6 /lib/train-test-validation.rb
parentf46ba3b7262f5b551c81fc9396c5b7f0cac7f030 (diff)
cleanup of validation modules/classes
Diffstat (limited to 'lib/train-test-validation.rb')
-rw-r--r--lib/train-test-validation.rb58
1 files changed, 58 insertions, 0 deletions
diff --git a/lib/train-test-validation.rb b/lib/train-test-validation.rb
new file mode 100644
index 0000000..286614a
--- /dev/null
+++ b/lib/train-test-validation.rb
@@ -0,0 +1,58 @@
+module OpenTox
+
+ module Validation
+
+ class TrainTest < Validation
+
+ field :training_dataset_id, type: BSON::ObjectId
+ field :test_dataset_id, type: BSON::ObjectId
+
+ def self.create model, training_set, test_set
+
+ atts = model.attributes.dup # do not modify attributes of the original model
+ atts["_id"] = BSON::ObjectId.new
+ atts[:training_dataset_id] = training_set.id
+ validation_model = model.class.create model.prediction_feature, training_set, atts
+ validation_model.save
+ predictions = validation_model.predict test_set.substances
+ nr_unpredicted = 0
+ predictions.each do |cid,prediction|
+ if prediction[:value]
+ prediction[:measurements] = test_set.values(cid, prediction[:prediction_feature_id])
+ else
+ nr_unpredicted += 1
+ end
+ end
+ predictions.select!{|cid,p| p[:value] and p[:measurements]}
+ validation = self.new(
+ :model_id => validation_model.id,
+ :test_dataset_id => test_set.id,
+ :nr_instances => test_set.substances.size,
+ :nr_unpredicted => nr_unpredicted,
+ :predictions => predictions
+ )
+ validation.save
+ validation
+ end
+
+ def test_dataset
+ Dataset.find test_dataset_id
+ end
+
+ def training_dataset
+ Dataset.find training_dataset_id
+ end
+
+ end
+
+ class ClassificationTrainTest < TrainTest
+ include ClassificationStatistics
+ end
+
+ class RegressionTrainTest < TrainTest
+ include RegressionStatistics
+ end
+
+ end
+
+end