summaryrefslogtreecommitdiff
path: root/test/regression.rb
blob: b1051f156f1843ec348dc1a8f256456e9b9f8e00 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
require_relative "setup.rb"

class LazarRegressionTest < MiniTest::Test

  def test_weighted_average
    training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
    algorithms = {
      :similarity => {
        :min => 0
      },
      :prediction => {
        :method => "Algorithm::Regression.weighted_average",
      },
    }
    model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
    compound = Compound.from_smiles "CC(C)(C)CN"
    prediction = model.predict compound
    assert_equal -0.86, prediction[:value].round(2)
    assert_equal 88, prediction[:neighbors].size
  end

  def test_mpd_fingerprints
    training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
    algorithms = {
      :descriptors => [ "MP2D" ]
    }
    model = Model::Lazar.create training_dataset: training_dataset, algorithms: algorithms
    compound = Compound.from_smiles "CCCSCCSCC"
    prediction = model.predict compound
    assert_equal 3, prediction[:neighbors].size
    assert_equal 1.37, prediction[:value].round(2)
  end

  def test_local_fingerprint_regression
    training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
    model = Model::Lazar.create training_dataset: training_dataset
    compound = Compound.from_smiles "NC(=O)OCCC"
    prediction = model.predict compound
    refute_nil prediction[:value]
    refute_nil prediction[:prediction_interval]
    refute_empty prediction[:neighbors]
  end

  def test_local_physchem_regression
    training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
    algorithms = {
      :descriptors => ["PhysChem::OPENBABEL"],
      :similarity => {
        :method => "Algorithm::Similarity.weighted_cosine",
        :min => 0.5
      },
    }
    model = Model::Lazar.create(training_dataset:training_dataset, algorithms:algorithms)
    p model
    compound = Compound.from_smiles "NC(=O)OCCC"
    prediction = model.predict compound
    refute_nil prediction[:value]
  end

  def test_local_physchem_regression_with_feature_selection
    training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
    algorithms = {
      :descriptors => {
        :method => "calculated_properties",
        :types => ["OPENBABEL"]
      },
      :similarity => {
        :method => "Algorithm::Similarity.weighted_cosine",
        :min => 0.5
      },
      :feature_selection => {
        :method => "Algorithm::FeatureSelection.correlation_filter",
      },
    }
    model = Model::Lazar.create(training_dataset.features.first, training_dataset, algorithms)
    p model
    compound = Compound.from_smiles "NC(=O)OCCC"
    prediction = model.predict compound
    refute_nil prediction[:value]
  end

  def test_local_physchem_classification
    skip
  end

end