summaryrefslogtreecommitdiff
path: root/lib/validation.rb
blob: 334efd783835f63c6b96b1d6e57b23edf42e23b5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
module OpenTox

  class Validation

    field :model_id, type: BSON::ObjectId
    field :prediction_dataset_id, type: BSON::ObjectId
    field :crossvalidation_id, type: BSON::ObjectId
    field :test_dataset_id, type: BSON::ObjectId
    field :nr_instances, type: Integer
    field :nr_unpredicted, type: Integer
    field :predictions, type: Hash

    def prediction_dataset
      Dataset.find prediction_dataset_id
    end

    def test_dataset
      Dataset.find test_dataset_id
    end

    def model
      Model::Lazar.find model_id
    end

    def self.create model, training_set, test_set, crossvalidation=nil
      
      atts = model.attributes.dup # do not modify attributes of the original model
      atts["_id"] = BSON::ObjectId.new
      atts[:training_dataset_id] = training_set.id
      validation_model = model.class.create model.prediction_feature, training_set, atts
      validation_model.save
      predictions = validation_model.predict test_set.substances
      predictions.each{|cid,p| p.delete(:neighbors)}
      nr_unpredicted = 0
      predictions.each do |cid,prediction|
        if prediction[:value]
          tox = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s]
          #prediction[:measured] = Substance.find(cid).toxicities[prediction[:prediction_feature_id].to_s][test_set.id.to_s]
          prediction[:measured] = tox[test_set.id.to_s] if tox
        else
          nr_unpredicted += 1
        end
        predictions.delete(cid) unless prediction[:value] and prediction[:measured]
      end
      validation = self.new(
        :model_id => validation_model.id,
        :test_dataset_id => test_set.id,
        :nr_instances => test_set.substances.size,
        :nr_unpredicted => nr_unpredicted,
        :predictions => predictions#.sort{|a,b| p a; b[3] <=> a[3]} # sort according to confidence
      )
      validation.crossvalidation_id = crossvalidation.id if crossvalidation
      validation.save
      validation
    end

  end

  class ClassificationValidation < Validation
  end

  class RegressionValidation < Validation
  end

end