test/nanoparticles.rb


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141

require_relative "setup.rb"


class NanoparticleTest  < MiniTest::Test
  include OpenTox::Validation

  def setup
    # TODO: multiple runs create duplicates
    #Import::Enanomapper.import File.join(File.dirname(__FILE__),"data","enm")
  end

  def test_create_model_with_feature_selection
    skip
    training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles")
    feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)")
    model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "physchem_neighbors", :feature_selection_algorithm => "correlation_filter"})
    nanoparticle = training_dataset.nanoparticles[-34]
    #p nanoparticle.neighbors
    prediction = model.predict nanoparticle
    p prediction
    #p prediction
    refute_nil prediction[:value]
  end

  def test_create_model
    skip
    training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles")
    feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)")
    model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "physchem_neighbors"})
    nanoparticle = training_dataset.nanoparticles[-34]
    prediction = model.predict nanoparticle
    refute_nil prediction[:value]
    assert_includes nanoparticle.dataset_ids, training_dataset.id
    model.delete
  end

  # TODO move to validation-statistics
  def test_inspect_cv
    cv = CrossValidation.all.sort_by{|cv| cv.created_at}.last
    cv.correlation_plot_id = nil
    File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot}
    p cv.statistics
  end
  def test_inspect_worst_prediction
  
    cv = CrossValidation.all.sort_by{|cv| cv.created_at}.last
    worst_predictions = cv.worst_predictions(n: 3,show_neigbors: false)
    assert_equal 3, worst_predictions.size
    assert_kind_of Integer, worst_predictions.first[:neighbors]
    worst_predictions = cv.worst_predictions
    #puts worst_predictions.to_yaml
    assert_equal 5, worst_predictions.size
    assert_kind_of Array, worst_predictions.first[:neighbors]
    assert_kind_of Integer, worst_predictions.first[:neighbors].first[:common_descriptors]
    worst_predictions = cv.worst_predictions(n: 2, show_common_descriptors: true)
    puts worst_predictions.to_yaml
    assert_equal 2, worst_predictions.size
    assert_kind_of Array, worst_predictions.first[:neighbors]
    refute_nil worst_predictions.first[:neighbors].first[:common_descriptors]
    #p cv.model.training_dataset.features
  end

  def test_validate_model
    training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles")
    #feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)")
    feature = Feature.find_or_create_by(name: "Log2 transformed", category: "TOX")
    
    model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_weighted_average", :neighbor_algorithm => "physchem_neighbors", :neighbor_algorithm_parameters => {:min_sim => 0.5}})
    cv = RegressionCrossValidation.create model
    p cv
    #p cv.predictions.sort_by{|sid,p| (p["value"] - p["measurements"].median).abs}
    p cv.rmse
    p cv.r_squared
    #File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot}
    refute_nil cv.r_squared
    refute_nil cv.rmse
  end
  def test_validate_pls_model
    training_dataset = Dataset.find_or_create_by(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles")
    #feature = Feature.find_or_create_by(name: "Net cell association", category: "TOX", unit: "mL/ug(Mg)")
    feature = Feature.find_or_create_by(name: "Log2 transformed", category: "TOX")
    
    model = Model::LazarRegression.create(feature, training_dataset, {:prediction_algorithm => "OpenTox::Algorithm::Regression.local_physchem_regression", :neighbor_algorithm => "physchem_neighbors", :neighbor_algorithm_parameters => {:min_sim => 0.5}})
    cv = RegressionCrossValidation.create model
    p cv
    #p cv.predictions.sort_by{|sid,p| (p["value"] - p["measurements"].median).abs}
    p cv.rmse
    p cv.r_squared
    File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot}
    refute_nil cv.r_squared
    refute_nil cv.rmse
  end

  def test_export
    skip
    Dataset.all.each do |d|
      puts d.to_csv
    end
  end

  def test_summaries
    skip
    datasets = Dataset.all
    datasets = datasets.select{|d| !d.name.nil?}
    datasets.each do |d|
      
      #p d.features.select{|f| f.name.match (/Total/)}
      #p d.features.collect{|f| "#{f.name} #{f.unit} #{f.conditions}"}
      p d.features.uniq.collect{|f| f.name}
    end
    assert_equal 9, datasets.size
=begin
    features = Feature.all.to_a
    #p features.collect do |f|
      #f if f.category == "TOX"
    #end.to_a.flatten.size
    toxcounts = {}
    pccounts = {}
    Nanoparticle.all.each do |np|
      np.measurements.each do |t,v|
        toxcounts[t] ||= 0
        toxcounts[t] += 1#v.uniq.size
      end
      np.physchem_descriptors.each do |t,v|
        pccounts[t] ||= 0
        pccounts[t] += 1#v.uniq.size
      end
    end
    #puts counts.keys.collect{|i| Feature.find(i)}.to_yaml
    #pccounts.each{|e,n| p Feature.find(e),n if n > 100}
    #p toxcounts.collect{|e,n| Feature.find(e).name if n > 1}.uniq
    toxcounts.each{|e,n| p Feature.find(e),n if n > 100}
=end
  end


  def test_import_ld
    skip
    dataset_ids = Import::Enanomapper.import_ld
  end
end