summaryrefslogtreecommitdiff
path: root/lib/train-test-validation.rb
blob: d034cd1e641aa861db4162e5112806b7cf2267c3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
module OpenTox

  module Validation

    # Training test set validation
    class TrainTest < Validation

      field :training_dataset_id, type: BSON::ObjectId
      field :test_dataset_id, type: BSON::ObjectId

      # Create a training test set validation
      # @param [OpenTox::Model::Lazar]
      # @param [OpenTox::Dataset] training dataset
      # @param [OpenTox::Dataset] test dataset
      # @return [OpenTox::Validation::TrainTest]
      def self.create model, training_set, test_set
        
        validation_model = model.class.create prediction_feature: model.prediction_feature, training_dataset: training_set, algorithms: model.algorithms
        validation_model.save
        predictions = validation_model.predict test_set.substances
        predictions.each do |cid,prediction|
          prediction[:measurements] = test_set.values(cid, prediction[:prediction_feature_id]) if prediction[:value]
        end
        predictions.select!{|cid,p| p[:value] and p[:measurements]}
        # remove neighbors to avoid mongos file size limit error on large datasets
        predictions.each{|cid,p| p.delete(:neighbors)} #if model.training_dataset.name.match(/mutagenicity/i)
        validation = self.new(
          :model_id => validation_model.id,
          :test_dataset_id => test_set.id,
          :predictions => predictions
        )
        validation.save
        validation
      end

      # Get test dataset
      # @return [OpenTox::Dataset]
      def test_dataset
        Dataset.find test_dataset_id
      end

      # Get training dataset
      # @return [OpenTox::Dataset]
      def training_dataset
        Dataset.find training_dataset_id
      end

    end

    # Training test set validation for classification models
    class ClassificationTrainTest < TrainTest
      include ClassificationStatistics
      field :accept_values, type: Array
      field :confusion_matrix, type: Array
      field :weighted_confusion_matrix, type: Array
      field :accuracy, type: Float
      field :weighted_accuracy, type: Float
      field :true_rate, type: Hash
      field :predictivity, type: Hash
      field :probability_plot_id, type: BSON::ObjectId
    end

    # Training test set validation for regression models
    class RegressionTrainTest < TrainTest
      include RegressionStatistics
      field :rmse, type: Float, default:0
      field :mae, type: Float, default:0
      field :r_squared, type: Float
      field :within_prediction_interval, type: Integer, default:0
      field :out_of_prediction_interval, type: Integer, default:0
      field :correlation_plot_id, type: BSON::ObjectId
    end

  end

end