1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
module OpenTox
module Validation
# Training test set validation
class TrainTest < Validation
field :training_dataset_id, type: BSON::ObjectId
field :test_dataset_id, type: BSON::ObjectId
# Create a training test set validation
# @param [OpenTox::Model::Lazar]
# @param [OpenTox::Dataset] training dataset
# @param [OpenTox::Dataset] test dataset
# @return [OpenTox::Validation::TrainTest]
def self.create model, training_set, test_set
validation_model = model.class.create prediction_feature: model.prediction_feature, training_dataset: training_set, algorithms: model.algorithms
validation_model.save
predictions = validation_model.predict test_set.substances
nr_unpredicted = 0
predictions.each do |cid,prediction|
if prediction[:value]
prediction[:measurements] = test_set.values(cid, prediction[:prediction_feature_id])
else
nr_unpredicted += 1
end
end
predictions.select!{|cid,p| p[:value] and p[:measurements]}
# hack to avoid mongos file size limit error on large datasets
#predictions.each{|cid,p| p[:neighbors] = []} if model.training_dataset.name.match(/mutagenicity/i)
validation = self.new(
:model_id => validation_model.id,
:test_dataset_id => test_set.id,
:nr_instances => test_set.substances.size,
:nr_unpredicted => nr_unpredicted,
:predictions => predictions
)
validation.save
validation
end
# Get test dataset
# @return [OpenTox::Dataset]
def test_dataset
Dataset.find test_dataset_id
end
# Get training dataset
# @return [OpenTox::Dataset]
def training_dataset
Dataset.find training_dataset_id
end
end
# Training test set validation for classification models
class ClassificationTrainTest < TrainTest
include ClassificationStatistics
field :accept_values, type: Array
field :confusion_matrix, type: Array
field :weighted_confusion_matrix, type: Array
field :accuracy, type: Float
field :weighted_accuracy, type: Float
field :true_rate, type: Hash
field :predictivity, type: Hash
field :probability_plot_id, type: BSON::ObjectId
end
# Training test set validation for regression models
class RegressionTrainTest < TrainTest
include RegressionStatistics
field :rmse, type: Float, default:0
field :mae, type: Float, default:0
field :r_squared, type: Float
field :within_prediction_interval, type: Integer, default:0
field :out_of_prediction_interval, type: Integer, default:0
field :correlation_plot_id, type: BSON::ObjectId
end
end
end
|