1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
module OpenTox
class Experiment
field :dataset_ids, type: Array
field :model_settings, type: Array, default: []
field :results, type: Hash, default: {}
end
def run
dataset_ids.each do |dataset_id|
dataset = Dataset.find(dataset_id)
results[dataset_id.to_s] = []
model_settings.each do |setting|
model = Object.const_get(setting[:algorithm]).create dataset
model.prediction_algorithm = setting[:prediction_algorithm] if setting[:prediction_algorithm]
model.neighbor_algorithm = setting[:neighbor_algorithm] if setting[:neighbor_algorithm]
model.neighbor_algorithm_parameters = setting[:neighbor_algorithm_parameter] if setting[:neighbor_algorithm_parameter]
model.save
repeated_crossvalidation = RepeatedCrossValidation.create model
results[dataset_id.to_s] << {:model_id => model.id, :repeated_crossvalidation_id => repeated_crossvalidation.id}
end
end
save
end
def self.create params
experiment = self.new
$logge.debug "Experiment started ..."
#experiment.run params
experiment
end
def report
# TODO significances
# statistical significances http://www.r-bloggers.com/anova-and-tukeys-test-on-r/
report = {}
report[:name] = name
report[:experiment_id] = self.id.to_s
report[:results] = {}
parameters = []
dataset_ids.each do |dataset_id|
dataset_name = Dataset.find(dataset_id).name
report[:results][dataset_name] = {}
report[:results][dataset_name][:anova] = {}
report[:results][dataset_name][:data] = []
results[dataset_id.to_s].each do |result|
model = Model::Lazar.find(result[:model_id])
repeated_cv = RepeatedCrossValidation.find(result[:repeated_crossvalidation_id])
crossvalidations = repeated_cv.crossvalidations
if crossvalidations.first.is_a? ClassificationCrossValidation
parameters = [:accuracy,:true_rate,:predictivity]
elsif crossvalidations.first.is_a? RegressionCrossValidation
parameters = [:rmse,:mae,:r_squared]
end
summary = {}
[:neighbor_algorithm, :neighbor_algorithm_parameters, :prediction_algorithm].each do |key|
summary[key] = model[key]
end
summary[:nr_instances] = crossvalidations.first.nr_instances
summary[:nr_unpredicted] = crossvalidations.collect{|cv| cv.nr_unpredicted}
summary[:time] = crossvalidations.collect{|cv| cv.time}
parameters.each do |param|
summary[param] = crossvalidations.collect{|cv| cv.send(param)}
end
report[:results][dataset_name][:data] << summary
end
end
report[:results].each do |dataset,results|
([:time,:nr_unpredicted]+parameters).each do |param|
experiments = []
outcome = []
results[:data].each_with_index do |result,i|
result[param].each do |p|
experiments << i
outcome << p
end
end
R.assign "experiment_nr",experiments.collect{|i| "Experiment #{i}"}
R.eval "experiment_nr = factor(experiment_nr)"
R.assign "outcome",outcome
R.eval "data = data.frame(experiment_nr,outcome)"
# one-way ANOVA
R.eval "fit = aov(outcome ~ experiment_nr, data=data)"
# http://stackoverflow.com/questions/3366506/extract-p-value-from-aov
p_value = R.eval("summary(fit)[[1]][['Pr(>F)']][[1]]").to_ruby
# aequivalent
# sum = R.eval("summary(fit)")
#p_value = sum.to_ruby.first.last.first
=begin
if p_value < 0.01
p_value = "#{p_value} ***"
elsif p_value < 0.05
p_value = "#{p_value} **"
elsif p_value < 0.1
p_value = "#{p_value} *"
end
=end
report[:results][dataset][:anova][param] = p_value
end
end
report
end
def summary
report[:results].collect{|dataset,data| {dataset => data[:anova].select{|param,p_val| p_val < 0.1}}}
end
end
|