summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2015-09-18 14:48:39 +0200
committerChristoph Helma <helma@in-silico.ch>2015-09-18 14:48:39 +0200
commit259cd085e053193b4c166495ae1af35cfa94bcf6 (patch)
treed199332cbaedba2ce100636b6f6792bf18f5f448
parent1255775bca9bcb9ca9100c38504e42541249865d (diff)
import of datasets without features fixed
-rw-r--r--lib/dataset.rb2
-rw-r--r--lib/experiment.rb61
-rw-r--r--test/dataset.rb4
3 files changed, 54 insertions, 13 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 00e2bc3..946fd90 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -245,7 +245,7 @@ module OpenTox
end
compound_ids << compound.id
- @data_entries << Array.new(table.first.size-1) if (table.first.size-1) > 0
+ table.first.size == 0 ? @data_entries << Array.new(0) : @data_entries << Array.new(table.first.size-1)
vals.each_with_index do |v,j|
if v.blank?
diff --git a/lib/experiment.rb b/lib/experiment.rb
index 985a491..0a76c53 100644
--- a/lib/experiment.rb
+++ b/lib/experiment.rb
@@ -32,16 +32,26 @@ module OpenTox
def report
# TODO significances
+ # statistical significances http://www.r-bloggers.com/anova-and-tukeys-test-on-r/
report = {}
report[:name] = name
report[:experiment_id] = self.id.to_s
+ report[:results] = {}
+ parameters = []
dataset_ids.each do |dataset_id|
dataset_name = Dataset.find(dataset_id).name
- report[dataset_name] = []
+ report[:results][dataset_name] = {}
+ report[:results][dataset_name][:anova] = {}
+ report[:results][dataset_name][:data] = []
results[dataset_id.to_s].each do |result|
model = Model::Lazar.find(result[:model_id])
repeated_cv = RepeatedCrossValidation.find(result[:repeated_crossvalidation_id])
crossvalidations = repeated_cv.crossvalidations
+ if crossvalidations.first.is_a? ClassificationCrossValidation
+ parameters = [:accuracy,:true_rate,:predictivity]
+ elsif crossvalidations.first.is_a? RegressionCrossValidation
+ parameters = [:rmse,:mae,:r_squared]
+ end
summary = {}
[:neighbor_algorithm, :neighbor_algorithm_parameters, :prediction_algorithm].each do |key|
summary[key] = model[key]
@@ -49,19 +59,50 @@ module OpenTox
summary[:nr_instances] = crossvalidations.first.nr_instances
summary[:nr_unpredicted] = crossvalidations.collect{|cv| cv.nr_unpredicted}
summary[:time] = crossvalidations.collect{|cv| cv.time}
- if crossvalidations.first.is_a? ClassificationCrossValidation
- summary[:accuracies] = crossvalidations.collect{|cv| cv.accuracy}
- elsif crossvalidations.first.is_a? RegressionCrossValidation
- summary[:r_squared] = crossvalidations.collect{|cv| cv.r_squared}
+ parameters.each do |param|
+ summary[param] = crossvalidations.collect{|cv| cv.send(param)}
end
- report[dataset_name] << summary
- #p repeated_cv.crossvalidations.collect{|cv| cv.accuracy}
- #file = "/tmp/#{id}.svg"
- #File.open(file,"w+"){|f| f.puts cv.correlation_plot}
- #`inkview '#{file}'`
+ report[:results][dataset_name][:data] << summary
+ end
+ end
+ report[:results].each do |dataset,results|
+ ([:time,:nr_unpredicted]+parameters).each do |param|
+ experiments = []
+ outcome = []
+ results[:data].each_with_index do |result,i|
+ result[param].each do |p|
+ experiments << i
+ outcome << p
+ end
+ end
+ R.assign "experiment_nr",experiments.collect{|i| "Experiment #{i}"}
+ R.eval "experiment_nr = factor(experiment_nr)"
+ R.assign "outcome",outcome
+ R.eval "data = data.frame(experiment_nr,outcome)"
+ # one-way ANOVA
+ R.eval "fit = aov(outcome ~ experiment_nr, data=data)"
+ # http://stackoverflow.com/questions/3366506/extract-p-value-from-aov
+ p_value = R.eval("summary(fit)[[1]][['Pr(>F)']][[1]]").to_ruby
+ # aequivalent
+ # sum = R.eval("summary(fit)")
+ #p_value = sum.to_ruby.first.last.first
+=begin
+ if p_value < 0.01
+ p_value = "#{p_value} ***"
+ elsif p_value < 0.05
+ p_value = "#{p_value} **"
+ elsif p_value < 0.1
+ p_value = "#{p_value} *"
+ end
+=end
+ report[:results][dataset][:anova][param] = p_value
end
end
report
end
+ def summary
+ report[:results].collect{|dataset,data| {dataset => data[:anova].select{|param,p_val| p_val < 0.1}}}
+ end
+
end
diff --git a/test/dataset.rb b/test/dataset.rb
index d30a898..84be547 100644
--- a/test/dataset.rb
+++ b/test/dataset.rb
@@ -11,7 +11,7 @@ class DatasetTest < MiniTest::Test
assert_equal Dataset, datasets.first.class
d1.delete
end
-=begin
+
#TODO
def test_create_without_features_csv
d = Dataset.from_csv_file File.join(DATA_DIR,"batch_prediction.csv")
@@ -22,7 +22,7 @@ class DatasetTest < MiniTest::Test
assert_equal 24, d.compounds.size.to_i
d.delete
end
-=end
+
def test_create_empty
d = Dataset.new
assert_equal Dataset, d.class