summaryrefslogtreecommitdiff
path: root/lib/experiment.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/experiment.rb')
-rw-r--r--lib/experiment.rb162
1 files changed, 75 insertions, 87 deletions
diff --git a/lib/experiment.rb b/lib/experiment.rb
index 0a76c53..616a273 100644
--- a/lib/experiment.rb
+++ b/lib/experiment.rb
@@ -4,105 +4,93 @@ module OpenTox
field :dataset_ids, type: Array
field :model_settings, type: Array, default: []
field :results, type: Hash, default: {}
- end
- def run
- dataset_ids.each do |dataset_id|
- dataset = Dataset.find(dataset_id)
- results[dataset_id.to_s] = []
- model_settings.each do |setting|
- model = Object.const_get(setting[:algorithm]).create dataset
- model.prediction_algorithm = setting[:prediction_algorithm] if setting[:prediction_algorithm]
- model.neighbor_algorithm = setting[:neighbor_algorithm] if setting[:neighbor_algorithm]
- model.neighbor_algorithm_parameters = setting[:neighbor_algorithm_parameter] if setting[:neighbor_algorithm_parameter]
- model.save
- repeated_crossvalidation = RepeatedCrossValidation.create model
- results[dataset_id.to_s] << {:model_id => model.id, :repeated_crossvalidation_id => repeated_crossvalidation.id}
+ def run
+ dataset_ids.each do |dataset_id|
+ dataset = Dataset.find(dataset_id)
+ results[dataset_id.to_s] = []
+ model_settings.each do |setting|
+ model_algorithm = setting.delete :model_algorithm
+ model = Object.const_get(model_algorithm).create dataset, setting
+ #model.prediction_algorithm = setting[:prediction_algorithm] if setting[:prediction_algorithm]
+ #model.neighbor_algorithm = setting[:neighbor_algorithm] if setting[:neighbor_algorithm]
+ #model.neighbor_algorithm_parameters = setting[:neighbor_algorithm_parameter] if setting[:neighbor_algorithm_parameter]
+ p model
+ model.save
+ repeated_crossvalidation = RepeatedCrossValidation.create model
+ results[dataset_id.to_s] << {:model_id => model.id, :repeated_crossvalidation_id => repeated_crossvalidation.id}
+ end
end
+ save
end
- save
- end
-
- def self.create params
- experiment = self.new
- $logge.debug "Experiment started ..."
- #experiment.run params
- experiment
- end
- def report
- # TODO significances
- # statistical significances http://www.r-bloggers.com/anova-and-tukeys-test-on-r/
- report = {}
- report[:name] = name
- report[:experiment_id] = self.id.to_s
- report[:results] = {}
- parameters = []
- dataset_ids.each do |dataset_id|
- dataset_name = Dataset.find(dataset_id).name
- report[:results][dataset_name] = {}
- report[:results][dataset_name][:anova] = {}
- report[:results][dataset_name][:data] = []
- results[dataset_id.to_s].each do |result|
- model = Model::Lazar.find(result[:model_id])
- repeated_cv = RepeatedCrossValidation.find(result[:repeated_crossvalidation_id])
- crossvalidations = repeated_cv.crossvalidations
- if crossvalidations.first.is_a? ClassificationCrossValidation
- parameters = [:accuracy,:true_rate,:predictivity]
- elsif crossvalidations.first.is_a? RegressionCrossValidation
- parameters = [:rmse,:mae,:r_squared]
- end
- summary = {}
- [:neighbor_algorithm, :neighbor_algorithm_parameters, :prediction_algorithm].each do |key|
- summary[key] = model[key]
- end
- summary[:nr_instances] = crossvalidations.first.nr_instances
- summary[:nr_unpredicted] = crossvalidations.collect{|cv| cv.nr_unpredicted}
- summary[:time] = crossvalidations.collect{|cv| cv.time}
- parameters.each do |param|
- summary[param] = crossvalidations.collect{|cv| cv.send(param)}
+ def report
+ # statistical significances http://www.r-bloggers.com/anova-and-tukeys-test-on-r/
+ report = {}
+ report[:name] = name
+ report[:experiment_id] = self.id.to_s
+ report[:results] = {}
+ parameters = []
+ dataset_ids.each do |dataset_id|
+ dataset_name = Dataset.find(dataset_id).name
+ report[:results][dataset_name] = {}
+ report[:results][dataset_name][:anova] = {}
+ report[:results][dataset_name][:data] = []
+ results[dataset_id.to_s].each do |result|
+ model = Model::Lazar.find(result[:model_id])
+ repeated_cv = RepeatedCrossValidation.find(result[:repeated_crossvalidation_id])
+ crossvalidations = repeated_cv.crossvalidations
+ if crossvalidations.first.is_a? ClassificationCrossValidation
+ parameters = [:accuracy,:true_rate,:predictivity]
+ elsif crossvalidations.first.is_a? RegressionCrossValidation
+ parameters = [:rmse,:mae,:r_squared]
+ end
+ summary = {}
+ [:neighbor_algorithm, :neighbor_algorithm_parameters, :prediction_algorithm].each do |key|
+ summary[key] = model[key]
+ end
+ summary[:nr_instances] = crossvalidations.first.nr_instances
+ summary[:nr_unpredicted] = crossvalidations.collect{|cv| cv.nr_unpredicted}
+ summary[:time] = crossvalidations.collect{|cv| cv.time}
+ parameters.each do |param|
+ summary[param] = crossvalidations.collect{|cv| cv.send(param)}
+ end
+ report[:results][dataset_name][:data] << summary
end
- report[:results][dataset_name][:data] << summary
end
- end
- report[:results].each do |dataset,results|
- ([:time,:nr_unpredicted]+parameters).each do |param|
- experiments = []
- outcome = []
- results[:data].each_with_index do |result,i|
- result[param].each do |p|
- experiments << i
- outcome << p
+ report[:results].each do |dataset,results|
+ ([:time,:nr_unpredicted]+parameters).each do |param|
+ experiments = []
+ outcome = []
+ results[:data].each_with_index do |result,i|
+ result[param].each do |p|
+ experiments << i
+ p = nil if p.kind_of? Float and p.infinite? # TODO fix @ division by 0
+ outcome << p
+ end
end
- end
- R.assign "experiment_nr",experiments.collect{|i| "Experiment #{i}"}
- R.eval "experiment_nr = factor(experiment_nr)"
- R.assign "outcome",outcome
- R.eval "data = data.frame(experiment_nr,outcome)"
- # one-way ANOVA
- R.eval "fit = aov(outcome ~ experiment_nr, data=data)"
- # http://stackoverflow.com/questions/3366506/extract-p-value-from-aov
- p_value = R.eval("summary(fit)[[1]][['Pr(>F)']][[1]]").to_ruby
- # aequivalent
- # sum = R.eval("summary(fit)")
- #p_value = sum.to_ruby.first.last.first
+ R.assign "experiment_nr",experiments.collect{|i| "Experiment #{i}"}
+ R.eval "experiment_nr = factor(experiment_nr)"
+ R.assign "outcome", outcome
+ R.eval "data = data.frame(experiment_nr,outcome)"
+ # one-way ANOVA
+ R.eval "fit = aov(outcome ~ experiment_nr, data=data,na.action='na.omit')"
+ # http://stackoverflow.com/questions/3366506/extract-p-value-from-aov
+ p_value = R.eval("summary(fit)[[1]][['Pr(>F)']][[1]]").to_ruby
+ # aequivalent
+ # sum = R.eval("summary(fit)")
+ #p_value = sum.to_ruby.first.last.first
+ report[:results][dataset][:anova][param] = p_value
=begin
- if p_value < 0.01
- p_value = "#{p_value} ***"
- elsif p_value < 0.05
- p_value = "#{p_value} **"
- elsif p_value < 0.1
- p_value = "#{p_value} *"
- end
=end
- report[:results][dataset][:anova][param] = p_value
+ end
end
+ report
end
- report
- end
- def summary
- report[:results].collect{|dataset,data| {dataset => data[:anova].select{|param,p_val| p_val < 0.1}}}
+ def summary
+ report[:results].collect{|dataset,data| {dataset => data[:anova].select{|param,p_val| p_val < 0.1}}}
+ end
end
end