summaryrefslogtreecommitdiff
path: root/lib/experiment.rb
blob: 0a76c5352084c1f43e9f7fd924db3a15c93fc34b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
module OpenTox

  class Experiment
    field :dataset_ids, type: Array
    field :model_settings, type: Array, default: []
    field :results, type: Hash, default: {}
  end

  def run 
    dataset_ids.each do |dataset_id|
      dataset = Dataset.find(dataset_id)
      results[dataset_id.to_s] = []
      model_settings.each do |setting|
        model = Object.const_get(setting[:algorithm]).create dataset
        model.prediction_algorithm = setting[:prediction_algorithm] if setting[:prediction_algorithm]
        model.neighbor_algorithm = setting[:neighbor_algorithm] if setting[:neighbor_algorithm]
        model.neighbor_algorithm_parameters = setting[:neighbor_algorithm_parameter] if setting[:neighbor_algorithm_parameter]
        model.save
        repeated_crossvalidation = RepeatedCrossValidation.create model
        results[dataset_id.to_s] << {:model_id => model.id, :repeated_crossvalidation_id => repeated_crossvalidation.id}
      end
    end
    save
  end

  def self.create params
    experiment = self.new
    $logge.debug "Experiment started ..."
    #experiment.run params
    experiment
  end

  def report
    # TODO significances
    # statistical significances http://www.r-bloggers.com/anova-and-tukeys-test-on-r/
    report = {}
    report[:name] = name
    report[:experiment_id] = self.id.to_s
    report[:results] = {}
    parameters = []
    dataset_ids.each do |dataset_id|
      dataset_name = Dataset.find(dataset_id).name
      report[:results][dataset_name] = {}
      report[:results][dataset_name][:anova] = {}
      report[:results][dataset_name][:data] = []
      results[dataset_id.to_s].each do |result|
        model = Model::Lazar.find(result[:model_id])
        repeated_cv = RepeatedCrossValidation.find(result[:repeated_crossvalidation_id])
        crossvalidations = repeated_cv.crossvalidations
        if crossvalidations.first.is_a? ClassificationCrossValidation
          parameters = [:accuracy,:true_rate,:predictivity]
        elsif crossvalidations.first.is_a? RegressionCrossValidation
          parameters = [:rmse,:mae,:r_squared]
        end
        summary = {}
        [:neighbor_algorithm, :neighbor_algorithm_parameters, :prediction_algorithm].each do |key|
          summary[key] = model[key]
        end
        summary[:nr_instances] = crossvalidations.first.nr_instances
        summary[:nr_unpredicted] = crossvalidations.collect{|cv| cv.nr_unpredicted}
        summary[:time] = crossvalidations.collect{|cv| cv.time}
        parameters.each do |param|
          summary[param] = crossvalidations.collect{|cv| cv.send(param)}
        end
        report[:results][dataset_name][:data] << summary
      end
    end
    report[:results].each do |dataset,results|
      ([:time,:nr_unpredicted]+parameters).each do |param|
        experiments = []
        outcome = []
        results[:data].each_with_index do |result,i|
          result[param].each do |p|
            experiments << i
            outcome << p
          end
        end
        R.assign "experiment_nr",experiments.collect{|i| "Experiment #{i}"}
        R.eval "experiment_nr = factor(experiment_nr)"
        R.assign "outcome",outcome
        R.eval "data = data.frame(experiment_nr,outcome)"
        # one-way ANOVA
        R.eval "fit = aov(outcome ~ experiment_nr, data=data)"
        # http://stackoverflow.com/questions/3366506/extract-p-value-from-aov
        p_value = R.eval("summary(fit)[[1]][['Pr(>F)']][[1]]").to_ruby
        # aequivalent
        # sum = R.eval("summary(fit)")
        #p_value = sum.to_ruby.first.last.first
=begin
        if p_value < 0.01
          p_value = "#{p_value} ***"
        elsif p_value < 0.05
          p_value = "#{p_value} **"
        elsif p_value < 0.1
          p_value = "#{p_value} *"
        end
=end
        report[:results][dataset][:anova][param] = p_value
      end
    end
    report
  end

  def summary
    report[:results].collect{|dataset,data| {dataset => data[:anova].select{|param,p_val| p_val < 0.1}}}
  end

end