summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2011-07-01 11:18:41 +0200
committermguetlein <martin.guetlein@gmail.com>2011-07-01 11:18:41 +0200
commit2ae0e7a344954b97b66c47094292084741b320e4 (patch)
tree2c6ca7f4f4d8551bd630b49a7aaa4f3a3cd351d7
parent95de1906994672c12d4d4b6fd1f5f09695fb1c60 (diff)
add control of t-tests to algorihtm-comparison reports
-rwxr-xr-xreport/report_application.rb10
-rwxr-xr-xreport/report_content.rb37
-rwxr-xr-xreport/report_factory.rb27
-rw-r--r--report/report_service.rb5
-rw-r--r--report/statistical_test.rb10
-rwxr-xr-xreport/validation_data.rb27
6 files changed, 75 insertions, 41 deletions
diff --git a/report/report_application.rb b/report/report_application.rb
index 8f74bd9..b96fb27 100755
--- a/report/report_application.rb
+++ b/report/report_application.rb
@@ -68,8 +68,12 @@ get '/report/:report_type' do
# if searching for a report, ?validation="uri" or ?crossvalidaiton="uri" is given as search param
# use this (search param has equal name as report type) as default value for validation_uri
post_command.attributes << OpenTox::PostAttribute.new("validation_uris",true,params[params[:report_type]],val_uri_description)
- post_command.attributes << OpenTox::PostAttribute.new("identifier",true,nil,"Specifiy one identifier for each uri, separated with ','") if
- params[:report_type]=="algorithm_comparison"
+ if params[:report_type]=="algorithm_comparison"
+ post_command.attributes << OpenTox::PostAttribute.new("identifier",true,nil,"Specifiy one identifier for each uri, separated with ','")
+ post_command.attributes << OpenTox::PostAttribute.new("ttest_significance",false,"0.9","Significance level for t-tests (Set to '0' to disable t-test).")
+ post_command.attributes << OpenTox::PostAttribute.new("ttest_attributes",false,nil,"Attributes for t-test; default for classification: '"+
+ VAL_ATTR_TTEST_CLASS.join(",")+"', default for regression: '"+VAL_ATTR_TTEST_REGR.join(",")+"'")
+ end
content_type "text/html"
OpenTox.text_to_html rs.get_all_reports(params[:report_type], params),@subjectid,related_links,description,post_command
else
@@ -129,7 +133,7 @@ post '/report/:type' do
task = OpenTox::Task.create("Create report",url_for("/report/"+params[:type], :full)) do |task| #,params
perform do |rs|
rs.create_report(params[:type],params[:validation_uris]?params[:validation_uris].split(/\n|,/):nil,
- params[:identifier]?params[:identifier].split(/\n|,/):nil,@subjectid,task)
+ params[:identifier]?params[:identifier].split(/\n|,/):nil,params,@subjectid,task)
end
end
return_task(task)
diff --git a/report/report_content.rb b/report/report_content.rb
index 30118cf..ea2ad5a 100755
--- a/report/report_content.rb
+++ b/report/report_content.rb
@@ -25,28 +25,35 @@ class Reports::ReportContent
def add_paired_ttest_tables( validation_set,
group_attribute,
test_attributes,
+ ttest_level = 0.9,
section_title = "Paired t-test",
section_text = nil)
+ raise "no test_attributes given: "+test_attributes.inspect unless test_attributes.is_a?(Array) and test_attributes.size>0
section_test = @xml_report.add_section(@current_section, section_title)
@xml_report.add_paragraph(section_test, section_text) if section_text
+
+ test_attributes.each do |test_attribute|
+ accept_values = validation_set.get_accept_values_for_attr(test_attribute)
+ accept_values = [nil] unless accept_values and accept_values.size>0
+ #puts "t-test for "+test_attribute.to_s+", class values: "+accept_values.to_s
- test_attributes.each do |test_attribute|
- level = 0.90
- test_matrix = Reports::ReportStatisticalTest.test_matrix( validation_set.validations,
- group_attribute, test_attribute, "paired_ttest", level )
- #puts test_matrix.inspect
- titles = test_matrix[:titles]
- matrix = test_matrix[:matrix]
- table = []
- #puts titles.inspect
- table << [""] + titles
- titles.size.times do |i|
- table << [titles[i]] + matrix[i].collect{|v| (v==nil || v==0) ? "" : (v<0 ? "-" : "+") }
+ accept_values.each do |accept_value|
+ test_matrix = Reports::ReportStatisticalTest.test_matrix( validation_set.validations,
+ group_attribute, test_attribute, accept_value, "paired_ttest", ttest_level )
+ #puts test_matrix.inspect
+ titles = test_matrix[:titles]
+ matrix = test_matrix[:matrix]
+ table = []
+ #puts titles.inspect
+ table << [""] + titles
+ titles.size.times do |i|
+ table << [titles[i]] + matrix[i].collect{|v| (v==nil || v==0) ? "" : (v<0 ? "-" : "+") }
+ end
+ accept_value_str = accept_value!=nil ? " for class-value '"+accept_value.to_s+"'" : ""
+ @xml_report.add_table(section_test, test_attribute.to_s+accept_value_str+", significance-level: "+ttest_level.to_s+", num results: "+
+ test_matrix[:num_results].to_s, table, true, true)
end
-
- @xml_report.add_table(section_test, test_attribute.to_s+", significance-level: "+level.to_s+", num results: "+
- test_matrix[:num_results].to_s, table, true, true)
end
Reports::ReportStatisticalTest.quit_r
end
diff --git a/report/report_factory.rb b/report/report_factory.rb
index fcd9bab..25bc4d9 100755
--- a/report/report_factory.rb
+++ b/report/report_factory.rb
@@ -15,6 +15,9 @@ VAL_ATTR_REGR = [ :num_instances, :num_unpredicted, :root_mean_squared_error,
VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :f_measure, :true_positive_rate, :true_negative_rate ]
VAL_ATTR_BAR_PLOT_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square ]
+VAL_ATTR_TTEST_REGR = [:r_square, :root_mean_squared_error]
+VAL_ATTR_TTEST_CLASS = [:percent_correct, :weighted_area_under_roc]
+
# = Reports::ReportFactory
#
@@ -33,14 +36,14 @@ module Reports::ReportFactory
# call-seq:
# self.create_report(type, validation_set) => Reports::ReportContent
#
- def self.create_report(type, validation_set, task=nil)
+ def self.create_report(type, validation_set, params={}, task=nil)
case type
when RT_VALIDATION
create_report_validation(validation_set, task)
when RT_CV
create_report_crossvalidation(validation_set, task)
when RT_ALG_COMP
- create_report_compare_algorithms(validation_set, task)
+ create_report_compare_algorithms(validation_set, params, task)
else
raise "unknown report type "+type.to_s
end
@@ -151,7 +154,7 @@ module Reports::ReportFactory
report
end
- def self.create_report_compare_algorithms(validation_set, task=nil)
+ def self.create_report_compare_algorithms(validation_set, params={}, task=nil)
#validation_set.to_array([:test_dataset_uri, :model_uri, :algorithm_uri], false).each{|a| puts a.inspect}
raise OpenTox::BadRequestError.new("num validations is not >1") unless validation_set.size>1
@@ -165,13 +168,13 @@ module Reports::ReportFactory
else
raise OpenTox::BadRequestError.new("num different cross-validation-ids <2") if validation_set.num_different_values(:crossvalidation_id)<2
validation_set.load_cv_attributes
- compare_algorithms_crossvalidation(validation_set, task)
+ compare_algorithms_crossvalidation(validation_set, params, task)
end
end
# create Algorithm Comparison report
# crossvalidations, 1-n datasets, 2-n algorithms
- def self.compare_algorithms_crossvalidation(validation_set, task=nil)
+ def self.compare_algorithms_crossvalidation(validation_set, params={}, task=nil)
# groups results into sets with equal dataset
if (validation_set.num_different_values(:dataset_uri)>1)
@@ -203,13 +206,21 @@ module Reports::ReportFactory
case validation_set.unique_feature_type
when "classification"
result_attributes += VAL_ATTR_CLASS
- ttest_attributes = [:percent_correct, :weighted_area_under_roc]
+ ttest_attributes = VAL_ATTR_TTEST_CLASS
bar_plot_attributes = VAL_ATTR_BAR_PLOT_CLASS
else
result_attributes += VAL_ATTR_REGR
- ttest_attributes = [:r_square, :root_mean_squared_error]
+ ttest_attributes = VAL_ATTR_TTEST_REGR
bar_plot_attributes = VAL_ATTR_BAR_PLOT_REGR
end
+
+ if params[:ttest_attributes] and params[:ttest_attributes].chomp.size>0
+ ttest_attributes = params[:ttest_attributes].split(",").collect{|a| a.to_sym}
+ end
+ ttest_significance = 0.9
+ if params[:ttest_significance]
+ ttest_significance = params[:ttest_significance].to_f
+ end
dataset_grouping.each do |validations|
@@ -230,7 +241,7 @@ module Reports::ReportFactory
report.add_result(merged,result_attributes,res_titel,res_titel,res_text)
# pending: regression stats have different scales!!!
report.add_bar_plot(merged, :identifier, bar_plot_attributes) if validation_set.unique_feature_type=="classification"
- report.add_paired_ttest_tables(set, :identifier, ttest_attributes)
+ report.add_paired_ttest_tables(set, :identifier, ttest_attributes, ttest_significance) if ttest_significance>0
report.end_section
end
task.progress(100) if task
diff --git a/report/report_service.rb b/report/report_service.rb
index 3e23889..f299122 100644
--- a/report/report_service.rb
+++ b/report/report_service.rb
@@ -60,8 +60,9 @@ module Reports
# call-seq:
# create_report(type, validation_uris) => string
#
- def create_report(type, validation_uris, identifier=nil, subjectid=nil, task=nil)
+ def create_report(type, validation_uris, identifier=nil, params={}, subjectid=nil, task=nil)
+ raise "params is no hash" unless params.is_a?(Hash)
LOGGER.info "create report of type '"+type.to_s+"'"
check_report_type(type)
@@ -77,7 +78,7 @@ module Reports
task.progress(10) if task
#step 2: create report of type
- report_content = Reports::ReportFactory.create_report(type, validation_set,
+ report_content = Reports::ReportFactory.create_report(type, validation_set, params,
OpenTox::SubTask.create(task,10,90))
LOGGER.debug "report created"
diff --git a/report/statistical_test.rb b/report/statistical_test.rb
index 9461cac..8d6bd62 100644
--- a/report/statistical_test.rb
+++ b/report/statistical_test.rb
@@ -38,7 +38,7 @@ module Reports
class ReportStatisticalTest
# __grouped_validations__ : array of validation arrays
- def self.test_matrix( validations, group_attribute, test_attribute, test_method="paired_ttest", significance_level=0.95 )
+ def self.test_matrix( validations, group_attribute, test_attribute, class_value, test_method="paired_ttest", significance_level=0.95 )
raise "statistical-test: '"+test_method+"' does not exist" unless ReportStatisticalTest.respond_to?(test_method)
grouped_validations = Reports::Util.group(validations, [group_attribute])
@@ -60,17 +60,17 @@ module Reports
validations2 = grouped_validations[j]
title2 = validations2[0].send(group_attribute)
matrix[i][j] = ReportStatisticalTest.send(test_method,validations1,validations2,
- test_attribute, significance_level)
+ test_attribute, class_value, significance_level)
end
end
end
{:titles => titles, :matrix => matrix, :num_results => grouped_validations[0].size}
end
- def self.paired_ttest( validations1, validations2, attribute, significance_level=0.95 )
+ def self.paired_ttest( validations1, validations2, attribute, class_value, significance_level=0.95 )
- array1 = validations1.collect{ |v| v.send(attribute) }
- array2 = validations2.collect{ |v| v.send(attribute) }
+ array1 = validations1.collect{ |v| (v.send(attribute).is_a?(Hash) ? v.send(attribute)[class_value] : v.send(attribute)) }
+ array2 = validations2.collect{ |v| (v.send(attribute).is_a?(Hash) ? v.send(attribute)[class_value] : v.send(attribute)) }
LOGGER.debug "paired-t-testing "+attribute.to_s+" "+array1.inspect+" vs "+array2.inspect
LIB::StatisticalTest.pairedTTest(array1, array2, significance_level)
end
diff --git a/report/validation_data.rb b/report/validation_data.rb
index fa0af8e..aa146a6 100755
--- a/report/validation_data.rb
+++ b/report/validation_data.rb
@@ -1,8 +1,9 @@
# the variance is computed when merging results for these attributes
VAL_ATTR_VARIANCE = [ :area_under_roc, :percent_correct, :root_mean_squared_error, :mean_absolute_error,
- :r_square, :accuracy, :weighted_area_under_roc, :weighted_accuracy ]
-VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate, :weighted_area_under_roc ] #:accuracy ]
+ :r_square, :accuracy, :weighted_area_under_roc, :weighted_accuracy, :weighted_root_mean_squared_error, :weighted_mean_absolute_error,
+ :weighted_r_square ]
+VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate, :weighted_area_under_roc, :accuracy, :f_measure ]
ATTR_NICE_NAME = {}
@@ -24,7 +25,7 @@ class Object
if self==0
return "0"
elsif abs>0.1
- return "%.2f" % self
+ return "%.3f" % self
elsif abs>0.01
return "%.3f" % self
else
@@ -310,9 +311,9 @@ module Reports
def to_table( attribute_col, attribute_row, attribute_val)
row_values = get_values(attribute_row)
- #puts row_values.inspect
+ #puts "row: "+row_values.inspect
col_values = get_values(attribute_col)
- #puts col_values.inspect
+ #puts "col: "+col_values.inspect
# get domain for classification attribute, i.e. ["true","false"]
accept_values = get_accept_values_for_attr(attribute_val)
@@ -326,7 +327,7 @@ module Reports
val = nil
@validations.each do |v|
if v.send(attribute_row)==row and v.send(attribute_col)==col
- raise "two validation have equal row and column values"if val!=nil
+ #raise "two validation have equal row and column values: "+val.to_s if val!=nil
val = v.send(attribute_val)
val = val[accept_values[0]] if first_value_elem
val = val.to_nice_s
@@ -449,8 +450,18 @@ module Reports
return new_set
end
- def sort(attribute, ascending=true)
- @validations.sort!{ |a,b| a.send(attribute).to_s <=> b.send(attribute).to_s }
+ def sort(attributes, ascending=true)
+ attributes = [attributes] unless attributes.is_a?(Array)
+ @validations.sort! do |a,b|
+ val = 0
+ attributes.each do |attr|
+ if a.send(attr).to_s != b.send(attr).to_s
+ val = a.send(attr).to_s <=> b.send(attr).to_s
+ break
+ end
+ end
+ val
+ end
end
# creates a new validaiton set, that contains a ranking for __ranking_attribute__