add control of t-tests to algorihtm-comparison reports

author: mguetlein <martin.guetlein@gmail.com> 2011-07-01 11:18:41 +0200
committer: mguetlein <martin.guetlein@gmail.com> 2011-07-01 11:18:41 +0200
commit: 2ae0e7a344954b97b66c47094292084741b320e4 (patch)
tree: 2c6ca7f4f4d8551bd630b49a7aaa4f3a3cd351d7
parent: 95de1906994672c12d4d4b6fd1f5f09695fb1c60 (diff)
6 files changed, 75 insertions, 41 deletions
diff --git a/report/report_application.rb b/report/report_application.rb
index 8f74bd9..b96fb27 100755
--- a/report/report_application.rb
+++ b/report/report_application.rb
@@ -68,8 +68,12 @@ get '/report/:report_type' do
       # if searching for a report, ?validation="uri" or ?crossvalidaiton="uri" is given as search param
       # use this (search param has equal name as report type) as default value for validation_uri 
       post_command.attributes << OpenTox::PostAttribute.new("validation_uris",true,params[params[:report_type]],val_uri_description)
-      post_command.attributes << OpenTox::PostAttribute.new("identifier",true,nil,"Specifiy one identifier for each uri, separated with ','") if
-        params[:report_type]=="algorithm_comparison"
+      if params[:report_type]=="algorithm_comparison"
+        post_command.attributes << OpenTox::PostAttribute.new("identifier",true,nil,"Specifiy one identifier for each uri, separated with ','")
+        post_command.attributes << OpenTox::PostAttribute.new("ttest_significance",false,"0.9","Significance level for t-tests (Set to '0' to disable t-test).")
+        post_command.attributes << OpenTox::PostAttribute.new("ttest_attributes",false,nil,"Attributes for t-test; default for classification: '"+
+          VAL_ATTR_TTEST_CLASS.join(",")+"', default for regression: '"+VAL_ATTR_TTEST_REGR.join(",")+"'")
+      end
       content_type "text/html"
       OpenTox.text_to_html rs.get_all_reports(params[:report_type], params),@subjectid,related_links,description,post_command
     else
@@ -129,7 +133,7 @@ post '/report/:type' do
   task = OpenTox::Task.create("Create report",url_for("/report/"+params[:type], :full)) do |task| #,params
     perform do |rs|
       rs.create_report(params[:type],params[:validation_uris]?params[:validation_uris].split(/\n|,/):nil,
-        params[:identifier]?params[:identifier].split(/\n|,/):nil,@subjectid,task)
+        params[:identifier]?params[:identifier].split(/\n|,/):nil,params,@subjectid,task)
     end
   end
   return_task(task)
diff --git a/report/report_content.rb b/report/report_content.rb
index 30118cf..ea2ad5a 100755
--- a/report/report_content.rb
+++ b/report/report_content.rb
@@ -25,28 +25,35 @@ class Reports::ReportContent
   def add_paired_ttest_tables( validation_set,
                        group_attribute, 
                        test_attributes,
+                       ttest_level = 0.9,
                        section_title = "Paired t-test",
                        section_text = nil)
 
+    raise "no test_attributes given: "+test_attributes.inspect unless test_attributes.is_a?(Array) and test_attributes.size>0
     section_test = @xml_report.add_section(@current_section, section_title)
     @xml_report.add_paragraph(section_test, section_text) if section_text
+        
+    test_attributes.each do |test_attribute|         
+      accept_values = validation_set.get_accept_values_for_attr(test_attribute)
+      accept_values = [nil] unless accept_values and accept_values.size>0
+      #puts "t-test for "+test_attribute.to_s+", class values: "+accept_values.to_s
       
-    test_attributes.each do |test_attribute|                    
-      level = 0.90                       
-      test_matrix = Reports::ReportStatisticalTest.test_matrix( validation_set.validations, 
-        group_attribute, test_attribute, "paired_ttest", level )
-      #puts test_matrix.inspect
-      titles = test_matrix[:titles]
-      matrix = test_matrix[:matrix]
-      table = []
-      #puts titles.inspect
-      table << [""] + titles
-      titles.size.times do |i|
-        table << [titles[i]] + matrix[i].collect{|v| (v==nil || v==0) ? "" : (v<0 ? "-" : "+") }
+      accept_values.each do |accept_value| 
+        test_matrix = Reports::ReportStatisticalTest.test_matrix( validation_set.validations, 
+          group_attribute, test_attribute, accept_value, "paired_ttest", ttest_level )
+        #puts test_matrix.inspect
+        titles = test_matrix[:titles]
+        matrix = test_matrix[:matrix]
+        table = []
+        #puts titles.inspect
+        table << [""] + titles
+        titles.size.times do |i|
+          table << [titles[i]] + matrix[i].collect{|v| (v==nil || v==0) ? "" : (v<0 ? "-" : "+") }
+        end
+        accept_value_str = accept_value!=nil ? " for class-value '"+accept_value.to_s+"'" : ""
+        @xml_report.add_table(section_test, test_attribute.to_s+accept_value_str+", significance-level: "+ttest_level.to_s+", num results: "+
+          test_matrix[:num_results].to_s, table, true, true)
       end
-      
-      @xml_report.add_table(section_test, test_attribute.to_s+", significance-level: "+level.to_s+", num results: "+
-        test_matrix[:num_results].to_s, table, true, true)
     end
     Reports::ReportStatisticalTest.quit_r
   end
diff --git a/report/report_factory.rb b/report/report_factory.rb
index fcd9bab..25bc4d9 100755
--- a/report/report_factory.rb
+++ b/report/report_factory.rb
@@ -15,6 +15,9 @@ VAL_ATTR_REGR = [ :num_instances, :num_unpredicted, :root_mean_squared_error,
 VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :f_measure, :true_positive_rate, :true_negative_rate ]
 VAL_ATTR_BAR_PLOT_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square ]
 
+VAL_ATTR_TTEST_REGR = [:r_square, :root_mean_squared_error]
+VAL_ATTR_TTEST_CLASS = [:percent_correct, :weighted_area_under_roc]
+
 
 # = Reports::ReportFactory 
 #
@@ -33,14 +36,14 @@ module Reports::ReportFactory
   # call-seq:
   #   self.create_report(type, validation_set) => Reports::ReportContent
   #
-  def self.create_report(type, validation_set, task=nil)
+  def self.create_report(type, validation_set, params={}, task=nil)
     case type
     when RT_VALIDATION
       create_report_validation(validation_set, task)
     when RT_CV
       create_report_crossvalidation(validation_set, task)
     when RT_ALG_COMP
-      create_report_compare_algorithms(validation_set, task)
+      create_report_compare_algorithms(validation_set, params, task)
     else
       raise "unknown report type "+type.to_s
     end
@@ -151,7 +154,7 @@ module Reports::ReportFactory
     report
   end
   
-  def self.create_report_compare_algorithms(validation_set, task=nil)
+  def self.create_report_compare_algorithms(validation_set, params={}, task=nil)
     
     #validation_set.to_array([:test_dataset_uri, :model_uri, :algorithm_uri], false).each{|a| puts a.inspect}
     raise OpenTox::BadRequestError.new("num validations is not >1") unless validation_set.size>1
@@ -165,13 +168,13 @@ module Reports::ReportFactory
     else
       raise OpenTox::BadRequestError.new("num different cross-validation-ids <2") if validation_set.num_different_values(:crossvalidation_id)<2
       validation_set.load_cv_attributes
-      compare_algorithms_crossvalidation(validation_set, task)
+      compare_algorithms_crossvalidation(validation_set, params, task)
     end
   end  
   
   # create Algorithm Comparison report
   # crossvalidations, 1-n datasets, 2-n algorithms
-  def self.compare_algorithms_crossvalidation(validation_set, task=nil)
+  def self.compare_algorithms_crossvalidation(validation_set, params={}, task=nil)
     
     # groups results into sets with equal dataset 
     if (validation_set.num_different_values(:dataset_uri)>1)
@@ -203,13 +206,21 @@ module Reports::ReportFactory
     case validation_set.unique_feature_type
     when "classification"
       result_attributes += VAL_ATTR_CLASS
-      ttest_attributes = [:percent_correct, :weighted_area_under_roc]
+      ttest_attributes = VAL_ATTR_TTEST_CLASS
       bar_plot_attributes = VAL_ATTR_BAR_PLOT_CLASS
     else 
       result_attributes += VAL_ATTR_REGR
-      ttest_attributes = [:r_square, :root_mean_squared_error]
+      ttest_attributes = VAL_ATTR_TTEST_REGR
       bar_plot_attributes = VAL_ATTR_BAR_PLOT_REGR
     end
+    
+    if params[:ttest_attributes] and params[:ttest_attributes].chomp.size>0
+      ttest_attributes = params[:ttest_attributes].split(",").collect{|a| a.to_sym}
+    end
+    ttest_significance = 0.9
+    if params[:ttest_significance]
+      ttest_significance = params[:ttest_significance].to_f
+    end
       
     dataset_grouping.each do |validations|
     
@@ -230,7 +241,7 @@ module Reports::ReportFactory
       report.add_result(merged,result_attributes,res_titel,res_titel,res_text)
       # pending: regression stats have different scales!!!
       report.add_bar_plot(merged, :identifier, bar_plot_attributes) if validation_set.unique_feature_type=="classification"
-      report.add_paired_ttest_tables(set, :identifier, ttest_attributes)
+      report.add_paired_ttest_tables(set, :identifier, ttest_attributes, ttest_significance) if ttest_significance>0
       report.end_section
     end
     task.progress(100) if task
diff --git a/report/report_service.rb b/report/report_service.rb
index 3e23889..f299122 100644
--- a/report/report_service.rb
+++ b/report/report_service.rb
@@ -60,8 +60,9 @@ module Reports
     # call-seq:
     #   create_report(type, validation_uris) => string
     # 
-    def create_report(type, validation_uris, identifier=nil, subjectid=nil, task=nil)
+    def create_report(type, validation_uris, identifier=nil, params={}, subjectid=nil, task=nil)
       
+      raise "params is no hash" unless params.is_a?(Hash)
       LOGGER.info "create report of type '"+type.to_s+"'"
       check_report_type(type)
       
@@ -77,7 +78,7 @@ module Reports
       task.progress(10) if task
       
       #step 2: create report of type
-      report_content = Reports::ReportFactory.create_report(type, validation_set, 
+      report_content = Reports::ReportFactory.create_report(type, validation_set, params,
         OpenTox::SubTask.create(task,10,90))
       LOGGER.debug "report created"
       
diff --git a/report/statistical_test.rb b/report/statistical_test.rb
index 9461cac..8d6bd62 100644
--- a/report/statistical_test.rb
+++ b/report/statistical_test.rb
@@ -38,7 +38,7 @@ module Reports
   class ReportStatisticalTest
     
     # __grouped_validations__ : array of validation arrays
-    def self.test_matrix( validations, group_attribute, test_attribute, test_method="paired_ttest", significance_level=0.95 )
+    def self.test_matrix( validations, group_attribute, test_attribute, class_value, test_method="paired_ttest", significance_level=0.95 )
       
       raise "statistical-test: '"+test_method+"' does not exist" unless ReportStatisticalTest.respond_to?(test_method)
       grouped_validations = Reports::Util.group(validations, [group_attribute])
@@ -60,17 +60,17 @@ module Reports
             validations2 = grouped_validations[j]
             title2 = validations2[0].send(group_attribute)
             matrix[i][j] = ReportStatisticalTest.send(test_method,validations1,validations2,
-              test_attribute, significance_level)
+              test_attribute, class_value, significance_level)
           end
         end
       end
       {:titles => titles, :matrix => matrix, :num_results => grouped_validations[0].size}
     end
     
-    def self.paired_ttest( validations1, validations2, attribute, significance_level=0.95 )
+    def self.paired_ttest( validations1, validations2, attribute, class_value, significance_level=0.95 )
       
-      array1 = validations1.collect{ |v| v.send(attribute) }
-      array2 = validations2.collect{ |v| v.send(attribute) }
+      array1 = validations1.collect{ |v| (v.send(attribute).is_a?(Hash) ? v.send(attribute)[class_value] : v.send(attribute)) }
+      array2 = validations2.collect{ |v| (v.send(attribute).is_a?(Hash) ? v.send(attribute)[class_value] : v.send(attribute)) }
       LOGGER.debug "paired-t-testing "+attribute.to_s+" "+array1.inspect+" vs "+array2.inspect
       LIB::StatisticalTest.pairedTTest(array1, array2, significance_level)
     end
diff --git a/report/validation_data.rb b/report/validation_data.rb
index fa0af8e..aa146a6 100755
--- a/report/validation_data.rb
+++ b/report/validation_data.rb
@@ -1,8 +1,9 @@
 
 # the variance is computed when merging results for these attributes 
 VAL_ATTR_VARIANCE = [ :area_under_roc, :percent_correct, :root_mean_squared_error, :mean_absolute_error, 
-  :r_square, :accuracy, :weighted_area_under_roc, :weighted_accuracy  ]
-VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate, :weighted_area_under_roc ] #:accuracy ]
+  :r_square, :accuracy, :weighted_area_under_roc, :weighted_accuracy, :weighted_root_mean_squared_error, :weighted_mean_absolute_error, 
+  :weighted_r_square  ]
+VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate, :weighted_area_under_roc, :accuracy, :f_measure ]
 
 ATTR_NICE_NAME = {}
 
@@ -24,7 +25,7 @@ class Object
       if self==0
         return "0"
       elsif abs>0.1
-        return "%.2f" % self
+        return "%.3f" % self
       elsif abs>0.01
         return "%.3f" % self
       else
@@ -310,9 +311,9 @@ module Reports
     def to_table( attribute_col, attribute_row, attribute_val)
       
       row_values = get_values(attribute_row)
-      #puts row_values.inspect
+      #puts "row: "+row_values.inspect
       col_values = get_values(attribute_col)
-      #puts col_values.inspect
+      #puts "col: "+col_values.inspect
       
       # get domain for classification attribute, i.e. ["true","false"]
       accept_values = get_accept_values_for_attr(attribute_val)
@@ -326,7 +327,7 @@ module Reports
           val = nil
           @validations.each do |v|
             if v.send(attribute_row)==row and v.send(attribute_col)==col
-              raise "two validation have equal row and column values"if val!=nil
+              #raise "two validation have equal row and column values: "+val.to_s if val!=nil
               val = v.send(attribute_val)
               val = val[accept_values[0]] if first_value_elem
               val = val.to_nice_s
@@ -449,8 +450,18 @@ module Reports
       return new_set
     end
     
-    def sort(attribute, ascending=true)
-      @validations.sort!{ |a,b| a.send(attribute).to_s <=> b.send(attribute).to_s }
+    def sort(attributes, ascending=true)
+      attributes = [attributes] unless attributes.is_a?(Array)
+      @validations.sort! do |a,b|
+        val = 0
+        attributes.each do |attr|
+          if a.send(attr).to_s != b.send(attr).to_s
+            val = a.send(attr).to_s <=> b.send(attr).to_s
+            break
+          end
+        end
+        val
+      end
     end
     
     # creates a new validaiton set, that contains a ranking for __ranking_attribute__
author	mguetlein <martin.guetlein@gmail.com>	2011-07-01 11:18:41 +0200
committer	mguetlein <martin.guetlein@gmail.com>	2011-07-01 11:18:41 +0200
commit	2ae0e7a344954b97b66c47094292084741b320e4 (patch)
tree	2c6ca7f4f4d8551bd630b49a7aaa4f3a3cd351d7
parent	95de1906994672c12d4d4b6fd1f5f09695fb1c60 (diff)