summaryrefslogtreecommitdiff
path: root/test/model.rb
blob: 017ce10c0bfe92a1c0df7ca901ed41f851866063 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
require_relative "setup.rb"

class ModelTest < MiniTest::Test

  def test_default_regression
    algorithms = {
      :descriptors => [ "MP2D" ],
      :similarity => {
        :method => "Algorithm::Similarity.tanimoto",
        :min => 0.1
      },
      :prediction => {
        :method => "Algorithm::Caret.pls",
      },
      :feature_selection => nil,
    }
    training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
    model = Model::Lazar.create  training_dataset: training_dataset
    assert_kind_of Model::LazarRegression, model
    assert_equal algorithms, model.algorithms
    substance = training_dataset.substances[10]
    prediction = model.predict substance
    assert_includes prediction[:prediction_interval][0]..prediction[:prediction_interval][1], prediction[:measurements].median, "This assertion assures that measured values are within the prediction interval. It may fail in 5% of the predictions."
  end

  def test_regression_parameters
    algorithms = {
      :descriptors => [ "MP2D" ],
      :similarity => {
        :method => "Algorithm::Similarity.tanimoto",
        :min => 0.3
      },
      :prediction => {
        :method => "Algorithm::Regression.weighted_average",
      },
      :feature_selection => nil,
    }
    training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
    model = Model::Lazar.create  training_dataset: training_dataset, algorithms: algorithms
    assert_kind_of Model::LazarRegression, model
    assert_equal "Algorithm::Regression.weighted_average", model.algorithms[:prediction][:method]
    assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method]
    assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min]
    assert_equal algorithms[:prediction][:parameters], model.algorithms[:prediction][:parameters]
    substance = training_dataset.substances[10]
    prediction = model.predict substance
    assert_equal 0.83, prediction[:value].round(2)
  end

  def test_physchem_regression
    algorithms = {
      :descriptors => ["PhysChem::OPENBABEL"],
      :similarity => {
        :method => "Algorithm::Similarity.cosine",
      }
    }
    training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv")
    model = Model::Lazar.create  training_dataset: training_dataset, algorithms: algorithms
    assert_kind_of Model::LazarRegression, model
    assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method]
    assert_equal "Algorithm::Similarity.cosine", model.algorithms[:similarity][:method]
    assert_equal 0.1, model.algorithms[:similarity][:min]
    assert_equal algorithms[:descriptors], model.algorithms[:descriptors]
    prediction = model.predict training_dataset.substances[10]
    p prediction
    refute_nil prediction[:value]
    # TODO test predictin
  end

  def test_nanoparticle_default
    training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
    unless training_dataset
      Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm")
      training_dataset = Dataset.where(name: "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
    end
    model = Model::Lazar.create  training_dataset: training_dataset
    assert_equal "Algorithm::Caret.rf", model.algorithms[:prediction][:method]
    assert_equal "Algorithm::Similarity.weighted_cosine", model.algorithms[:similarity][:method]
    prediction = model.predict training_dataset.substances[14]
    assert_includes prediction[:prediction_interval][0]..prediction[:prediction_interval][1], prediction[:measurements].median, "This assertion assures that measured values are within the prediction interval. It may fail in 5% of the predictions."

  end

  def test_nanoparticle_parameters
    skip
  end

  def test_regression_with_feature_selection
    algorithms = {
      :feature_selection => {
        :method => "Algorithm::FeatureSelection.correlation_filter",
      },
    }
    training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini_log10.csv")
    model = Model::Lazar.create  training_dataset: training_dataset, algorithms: algorithms
    assert_kind_of Model::LazarRegression, model
    assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method]
    assert_equal "Algorithm::Similarity.tanimoto", model.algorithms[:similarity][:method]
    assert_equal 0.1, model.algorithms[:similarity][:min]
    assert_equal algorithms[:feature_selection][:method], model.algorithms[:feature_selection][:method]
  end

end