summaryrefslogtreecommitdiff
path: root/lib/train-test-validation.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/train-test-validation.rb')
-rw-r--r--lib/train-test-validation.rb76
1 files changed, 0 insertions, 76 deletions
diff --git a/lib/train-test-validation.rb b/lib/train-test-validation.rb
deleted file mode 100644
index d034cd1..0000000
--- a/lib/train-test-validation.rb
+++ /dev/null
@@ -1,76 +0,0 @@
-module OpenTox
-
- module Validation
-
- # Training test set validation
- class TrainTest < Validation
-
- field :training_dataset_id, type: BSON::ObjectId
- field :test_dataset_id, type: BSON::ObjectId
-
- # Create a training test set validation
- # @param [OpenTox::Model::Lazar]
- # @param [OpenTox::Dataset] training dataset
- # @param [OpenTox::Dataset] test dataset
- # @return [OpenTox::Validation::TrainTest]
- def self.create model, training_set, test_set
-
- validation_model = model.class.create prediction_feature: model.prediction_feature, training_dataset: training_set, algorithms: model.algorithms
- validation_model.save
- predictions = validation_model.predict test_set.substances
- predictions.each do |cid,prediction|
- prediction[:measurements] = test_set.values(cid, prediction[:prediction_feature_id]) if prediction[:value]
- end
- predictions.select!{|cid,p| p[:value] and p[:measurements]}
- # remove neighbors to avoid mongos file size limit error on large datasets
- predictions.each{|cid,p| p.delete(:neighbors)} #if model.training_dataset.name.match(/mutagenicity/i)
- validation = self.new(
- :model_id => validation_model.id,
- :test_dataset_id => test_set.id,
- :predictions => predictions
- )
- validation.save
- validation
- end
-
- # Get test dataset
- # @return [OpenTox::Dataset]
- def test_dataset
- Dataset.find test_dataset_id
- end
-
- # Get training dataset
- # @return [OpenTox::Dataset]
- def training_dataset
- Dataset.find training_dataset_id
- end
-
- end
-
- # Training test set validation for classification models
- class ClassificationTrainTest < TrainTest
- include ClassificationStatistics
- field :accept_values, type: Array
- field :confusion_matrix, type: Array
- field :weighted_confusion_matrix, type: Array
- field :accuracy, type: Float
- field :weighted_accuracy, type: Float
- field :true_rate, type: Hash
- field :predictivity, type: Hash
- field :probability_plot_id, type: BSON::ObjectId
- end
-
- # Training test set validation for regression models
- class RegressionTrainTest < TrainTest
- include RegressionStatistics
- field :rmse, type: Float, default:0
- field :mae, type: Float, default:0
- field :r_squared, type: Float
- field :within_prediction_interval, type: Integer, default:0
- field :out_of_prediction_interval, type: Integer, default:0
- field :correlation_plot_id, type: BSON::ObjectId
- end
-
- end
-
-end