Merge branch 'release/v3.0.0'v3.0.0

author: mr <mr@mrautenberg.de> 2011-09-23 12:22:40 +0200
committer: mr <mr@mrautenberg.de> 2011-09-23 12:22:40 +0200
commit: 41b1e72eba222a337efa70e61734b34118f832c6 (patch)
tree: 15645033d7cf516f9c19cc11ec9d16bbdba43327
parent: b076615734eb9a51a29e3dc01e0c96d86e5c75c9 (diff)
parent: 9db1f68871ad3e9be92744fd908f9fee9eeb18a0 (diff)
10 files changed, 244 insertions, 90 deletions
diff --git a/lib/predictions.rb b/lib/predictions.rb
index b71359d..6c0e996 100755
--- a/lib/predictions.rb
+++ b/lib/predictions.rb
@@ -254,7 +254,6 @@ module Lib
       return res
     end
     
-    # does only take the instances that are classified as <class-index> into account
     def area_under_roc(class_index=nil)
       return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if 
         class_index==nil
@@ -268,11 +267,12 @@ module Lib
       tp_conf = []
       fp_conf = []
       (0..@predicted_values.size-1).each do |i|
-        if @predicted_values[i]==class_index
-          if @actual_values[i]==@predicted_values[i]
-            tp_conf.push(@confidence_values[i])
+        if @predicted_values[i]!=nil
+          c = @confidence_values[i] * (@predicted_values[i]==class_index ? 1 : -1)
+          if @actual_values[i]==class_index
+            tp_conf << c
           else
-            fp_conf.push(@confidence_values[i])
+            fp_conf << c
           end
         end
       end
@@ -300,7 +300,11 @@ module Lib
     end
     
     def precision(class_index=nil)
-      return prediction_feature_value_map( lambda{ |i| precision(i) } ) if class_index==nil
+      return positive_predictive_value(class_index)
+    end
+    
+    def positive_predictive_value(class_index=nil)
+      return prediction_feature_value_map( lambda{ |i| positive_predictive_value(i) } ) if class_index==nil
       
       correct = 0 # all instances with prediction class_index that are correctly classified 
       total = 0 # all instances with prediciton class_index
@@ -312,6 +316,23 @@ module Lib
       return correct/total.to_f
     end
     
+    def negative_predictive_value(class_index=nil)
+      return prediction_feature_value_map( lambda{ |i| negative_predictive_value(i) } ) if class_index==nil
+      
+      correct = 0 # all instances with prediction class_index that are correctly classified 
+      total = 0 # all instances with prediciton class_index
+      (0..@num_classes-1).each do |i|
+        if i != class_index
+          (0..@num_classes-1).each do |j|
+            correct += @confusion_matrix[j][i] if j != class_index
+            total += @confusion_matrix[j][i]
+          end
+        end
+      end
+      return 0 if total==0
+      return correct/total.to_f
+    end
+    
     def recall(class_index=nil)
       return true_positive_rate(class_index)
     end
@@ -432,22 +453,18 @@ module Lib
       return incorrect
     end
     
-    # Note:
-    # * (un-weighted) area under roc is computed with all __predicted__ isntances for a certain class
-    # * weighted weights each auc with the number of __acutal__ instances
-    # its like that, because its like that in weka   
-    def weighted_area_under_roc
-      w_auc = weighted_measure( :area_under_roc )
+    def average_area_under_roc
+      w_auc = average_measure( :area_under_roc )
       w_auc.nan? ? 0 : w_auc
     end
     
-    def weighted_f_measure
-      return weighted_measure( :f_measure )
+    def average_f_measure
+      return average_measure( :f_measure )
     end
     
     private
-    # the <measure> is weighted with the number of instances for each actual class value 
-    def weighted_measure( measure )
+    # the <measure> is averaged over the number of instances for each actual class value 
+    def average_measure( measure )
       
       sum_instances = 0
       num_instances_per_class = Array.new(@num_classes, 0)
@@ -562,19 +579,53 @@ module Lib
 
     # data for (roc-)plots ###################################################################################
     
-    def get_prediction_values(class_value)
+     def get_roc_prediction_values(class_value)
       
       #puts "get_roc_values for class_value: "+class_value.to_s
       raise "no confidence values" unless confidence_values_available?
-      #raise "no class-value specified" if class_value==nil
+      raise "no class-value specified" if class_value==nil
       
       class_index = @accept_values.index(class_value) if class_value!=nil
       raise "class not found "+class_value.to_s if (class_value!=nil && class_index==nil)
       
+      c = []; tp = []
+      (0..@predicted_values.size-1).each do |i|
+        if @predicted_values[i]!=nil
+          c << @confidence_values[i] * (@predicted_values[i]==class_index ? 1 : -1)
+          if (@actual_values[i]==class_index)
+            tp << 1
+          else
+            tp << 0
+          end
+        end
+      end
+      
+      # DO NOT raise exception here, maybe different validations are concated
+      #raise "no instance predicted as '"+class_value+"'" if p.size == 0
+      
+      h = {:true_positives => tp, :confidence_values => c}
+      #puts h.inspect
+      return h
+    end
+    
+    def get_prediction_values(actual_accept_value, predicted_accept_value)
+      
+      #puts "get_roc_values for class_value: "+class_value.to_s
+      raise "no confidence values" unless confidence_values_available?
+      #raise "no class-value specified" if class_value==nil
+      
+      actual_class_index = @accept_values.index(actual_accept_value) if actual_accept_value!=nil
+      raise "class not found '"+actual_accept_value.to_s+"' in "+@accept_values.inspect if (actual_accept_value!=nil && actual_class_index==nil)
+      
+      predicted_class_index = @accept_values.index(predicted_accept_value) if predicted_accept_value!=nil
+      raise "class not found "+predicted_accept_value.to_s+" in "+@accept_values.inspect if (predicted_accept_value!=nil && predicted_class_index==nil)
+      
       c = []; p = []; a = []
       (0..@predicted_values.size-1).each do |i|
         # NOTE: not predicted instances are ignored here
-        if @predicted_values[i]!=nil and (class_index==nil || @predicted_values[i]==class_index)
+        if @predicted_values[i]!=nil and 
+            (predicted_class_index==nil || @predicted_values[i]==predicted_class_index) and
+            (actual_class_index==nil || @actual_values[i]==actual_class_index)
           c << @confidence_values[i]
           p << @predicted_values[i]
           a << @actual_values[i]
diff --git a/lib/validation_db.rb b/lib/validation_db.rb
index fb7a8b5..be004fb 100755
--- a/lib/validation_db.rb
+++ b/lib/validation_db.rb
@@ -18,19 +18,20 @@ module Validation
   # :classification_statistics
   VAL_CLASS_PROPS_SINGLE_SUM = [ :num_correct, :num_incorrect, :confusion_matrix  ]
   VAL_CLASS_PROPS_SINGLE_AVG = [ :percent_correct, :percent_incorrect, 
-    :weighted_area_under_roc, :accuracy, :weighted_accuracy ] 
+    :average_area_under_roc, :accuracy, :weighted_accuracy ] 
   VAL_CLASS_PROPS_SINGLE = VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_SINGLE_AVG
   
   # :class_value_statistics
   VAL_CLASS_PROPS_PER_CLASS_SUM = [ :num_false_positives, :num_false_negatives, 
                                 :num_true_positives, :num_true_negatives ]
   VAL_CLASS_PROPS_PER_CLASS_AVG = [ :area_under_roc, :false_negative_rate, :false_positive_rate,
-                                :f_measure, :precision, 
+                                :f_measure, :positive_predictive_value, :negative_predictive_value, 
                                 :true_negative_rate, :true_positive_rate ] #:recall,
   VAL_CLASS_PROPS_PER_CLASS = VAL_CLASS_PROPS_PER_CLASS_SUM + VAL_CLASS_PROPS_PER_CLASS_AVG
   VAL_CLASS_PROPS_PER_CLASS_COMPLEMENT_EXISTS = [ :num_false_positives, :num_false_negatives, 
                                 :num_true_positives, :num_true_negatives, :false_negative_rate, :false_positive_rate,
-                                :true_negative_rate, :true_positive_rate ] #:precision, :recall, 
+                                :true_negative_rate, :true_positive_rate, :area_under_roc, 
+                                :positive_predictive_value, :negative_predictive_value ] #:precision, :recall, 
                                 
   VAL_CLASS_PROPS = VAL_CLASS_PROPS_SINGLE + VAL_CLASS_PROPS_PER_CLASS
 
diff --git a/reach_reports/reach_application.rb b/reach_reports/reach_application.rb
index e35df7b..b380c92 100755
--- a/reach_reports/reach_application.rb
+++ b/reach_reports/reach_application.rb
@@ -48,6 +48,7 @@ end
 get '/reach_report/:type' do
   type = extract_type(params)
   LOGGER.info "list all "+type+" reports"
+  uris = ReachReports.list_reports(type,params[:model] || params[:model_uri])
   if request.env['HTTP_ACCEPT'] =~ /text\/html/
     content_type "text/html"
     related_links = 
@@ -66,10 +67,10 @@ get '/reach_report/:type' do
     when /(?i)QPRF/
       #TODO
     end
-    OpenTox.text_to_html ReachReports.list_reports(type),@subjectid,related_links,description,post_command
+    OpenTox.text_to_html uris,@subjectid,related_links,description,post_command
   else
     content_type "text/uri-list"
-    ReachReports.list_reports(type)
+    uris
   end
 end
 
@@ -181,6 +182,12 @@ get '/reach_report/:type/:id/editor' do
 <j2se version="1.6+" java-vm-args="-Xincgc"/>
 
 <jar href="qmrfedit/OT_QMRFEditor.jar" download="eager" main="true"/>
+<jar href="qmrfedit/OT_QMRFEditor_lib/xercesImpl.jar" download="eager"/>
+<jar href="qmrfedit/OT_QMRFEditor_lib/itext-1.4.5.jar" download="lazy"/>
+<jar href="qmrfedit/OT_QMRFEditor_lib/poi-3.0.jar" download="lazy"/>
+<jar href="qmrfedit/OT_QMRFEditor_lib/poi-contrib.jar" download="lazy"/>
+<jar href="qmrfedit/OT_QMRFEditor_lib/poi-scratchpad.jar" download="lazy"/>
+<jar href="qmrfedit/OT_QMRFEditor_lib/commons-lang-2.3.jar" download="lazy"/>
 <jar href="qmrfedit/OT_QMRFEditor_lib/cdk-applications.jar" download="lazy" />
 <jar href="qmrfedit/OT_QMRFEditor_lib/cdk-builder3d.jar" download="lazy" />
 <jar href="qmrfedit/OT_QMRFEditor_lib/cdk-charges.jar" download="lazy" />
diff --git a/reach_reports/reach_service.rb b/reach_reports/reach_service.rb
index fa4c0d7..5dd68e1 100755
--- a/reach_reports/reach_service.rb
+++ b/reach_reports/reach_service.rb
@@ -12,10 +12,12 @@ end
   
 module ReachReports
   
-  def self.list_reports(type)
+  def self.list_reports(type, model_uri=nil)
     case type
     when /(?i)QMRF/
-      ReachReports::QmrfReport.all.collect{ |r| r.report_uri }.join("\n")+"\n"
+      params = {}
+      params[:model_uri]=model_uri if model_uri
+      ReachReports::QmrfReport.all(params).collect{ |r| r.report_uri }.join("\n")+"\n"
     when /(?i)QPRF/
       ReachReports::QprfReport.all.collect{ |r| r.report_uri }.join("\n")+"\n"
     end
@@ -108,8 +110,8 @@ module ReachReports
     # TODO app_domain_description, app_domain_method, app_domain_software, applicability_limits
 
     #training_dataset = model.trainingDataset ? OpenTox::Dataset.find(model.trainingDataset+"/metadata") : nil
-    if ( OpenTox::Dataset.exist?(model.metadata[OT.trainingDataset]) )
-      training_dataset = OpenTox::Dataset.new( model.metadata[OT.trainingDataset] )
+    if ( OpenTox::Dataset.exist?(model.metadata[OT.trainingDataset], r.subjectid) )
+      training_dataset = OpenTox::Dataset.new( model.metadata[OT.trainingDataset], r.subjectid )
       training_dataset.load_metadata( r.subjectid )
     else
       training_dataset = nil
@@ -227,7 +229,7 @@ module ReachReports
         case feature_type
         when "classification"
           v << "percent_correct: "+validation.classification_statistics[:percent_correct].to_s
-          v << "weighted AUC: "+validation.classification_statistics[:weighted_area_under_roc].to_s
+          v << "average AUC: "+validation.classification_statistics[:average_area_under_roc].to_s
         when "regression"
           v << "root_mean_squared_error: "+validation.regression_statistics[:root_mean_squared_error].to_s
           v << "r_square "+validation.regression_statistics[:r_square].to_s
@@ -270,7 +272,7 @@ module ReachReports
         
     val_datasets.each do |data_uri|
       if OpenTox::Dataset.exist?(data_uri, r.subjectid)
-        d = OpenTox::Dataset.new(data_uri)
+        d = OpenTox::Dataset.new(data_uri, r.subjectid)
         d.load_metadata( r.subjectid)
         r.qsar_miscellaneous.attachment_validation_data << AttachmentValidationData.new( 
           { :description => d.title, 
diff --git a/report/plot_factory.rb b/report/plot_factory.rb
index bf59960..2074ce5 100644
--- a/report/plot_factory.rb
+++ b/report/plot_factory.rb
@@ -130,8 +130,43 @@ module Reports
       end
     end
     
+    def self.confidence_plot_class_performance( validation_set, actual_accept_value, predicted_accept_value )
+      true_class = nil
+      if actual_accept_value==nil and predicted_accept_value==nil
+        perf = "Accuracy"
+      elsif actual_accept_value!=nil
+        if validation_set.get_true_accept_value==actual_accept_value
+          perf = "True Positive Rate"
+          true_class = actual_accept_value
+        elsif validation_set.get_accept_values.size==2 and validation_set.get_true_accept_value==(validation_set.get_accept_values-[actual_accept_value])[0]
+          perf = "True Negative Rate"
+          true_class = validation_set.get_true_accept_value
+        else
+          perf = "True Positive Rate"
+          true_class = actual_accept_value
+        end
+      elsif predicted_accept_value!=nil
+         if validation_set.get_true_accept_value==predicted_accept_value
+          perf = "Positive Predictive Value"
+          true_class = predicted_accept_value
+        elsif validation_set.get_accept_values.size==2 and validation_set.get_true_accept_value==(validation_set.get_accept_values-[predicted_accept_value])[0]
+          perf = "Negative Predictive Value"
+          true_class = validation_set.get_true_accept_value
+        else
+          perf = "Positive Predictive Value"
+          true_class = predicted_accept_value
+        end
+      end
+      title = perf+" vs Confidence Plot"
+      title += " (with True-Class: '"+true_class.to_s+"')" if true_class!=nil 
+      {:title =>title, :performance => perf}
+    end
+    
     
-    def self.create_confidence_plot( out_files, validation_set, class_value, split_set_attribute=nil, show_single_curves=false )
+    def self.create_confidence_plot( out_files, validation_set, actual_accept_value = nil,
+                            predicted_accept_value = nil, split_set_attribute=nil, show_single_curves=false )
+                            
+      raise "param combination not supported" if actual_accept_value!=nil and predicted_accept_value!=nil
       
       out_files = [out_files] unless out_files.is_a?(Array)
       LOGGER.debug "creating confidence plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_files.inspect
@@ -143,7 +178,7 @@ module Reports
         performance = []
         attribute_values.each do |value|
           begin
-            data = transform_confidence_predictions(validation_set.filter({split_set_attribute => value}), class_value, false)
+            data = transform_confidence_predictions(validation_set.filter({split_set_attribute => value}), actual_accept_value, predicted_accept_value, false)
             names << split_set_attribute.to_s.nice_attr+" "+value.to_s
             confidence << data[:confidence][0]
             performance << data[:performance][0]
@@ -155,17 +190,19 @@ module Reports
         out_files.each do |out_file|
           case validation_set.unique_feature_type
           when "classification"
-            RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", names, confidence, performance)
+            info = confidence_plot_class_performance( validation_set, actual_accept_value, predicted_accept_value )
+            RubyPlot::accuracy_confidence_plot(out_file, info[:title], "Confidence", info[:performance], names, confidence, performance)
           when "regression"
             RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", names, confidence, performance, true)
           end
         end
       else
-        data = transform_confidence_predictions(validation_set, class_value, show_single_curves)
+        data = transform_confidence_predictions(validation_set, actual_accept_value, predicted_accept_value, show_single_curves)
         out_files.each do |out_file|
           case validation_set.unique_feature_type
           when "classification"
-            RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", data[:names], data[:confidence], data[:performance])
+            info = confidence_plot_class_performance( validation_set, actual_accept_value, predicted_accept_value )
+            RubyPlot::accuracy_confidence_plot(out_file, info[:title], "Confidence", info[:performance], data[:names], data[:confidence], data[:performance])
           when "regression"
             RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", data[:names], data[:confidence], data[:performance], true)
           end
@@ -294,15 +331,14 @@ module Reports
     private
     def self.transform_roc_predictions(validation_set, class_value, add_label=true )
       if (validation_set.size > 1)
-        values = { :predicted_values => [], :actual_values => [], :confidence_values => []}
+        values = { :true_positives  => [], :confidence_values => []}
         (0..validation_set.size-1).each do |i|
-          roc_values = validation_set.get(i).get_predictions.get_prediction_values(class_value)
-          values[:predicted_values] += roc_values[:predicted_values]
+          roc_values = validation_set.get(i).get_predictions.get_roc_prediction_values(class_value)
+          values[:true_positives ] += roc_values[:true_positives ]
           values[:confidence_values] += roc_values[:confidence_values]
-          values[:actual_values] += roc_values[:actual_values]
         end
       else
-        values = validation_set.validations[0].get_predictions.get_prediction_values(class_value)
+        values = validation_set.validations[0].get_predictions.get_roc_prediction_values(class_value)
       end
       tp_fp_rates = get_tp_fp_rates(values)
       labels = []
@@ -313,7 +349,7 @@ module Reports
     end
     
     
-    def self.transform_confidence_predictions(validation_set, class_value, add_single_folds=false)
+    def self.transform_confidence_predictions(validation_set, actual_accept_value, predicted_accept_value, add_single_folds=false)
       
       if (validation_set.size > 1)
         
@@ -321,7 +357,7 @@ module Reports
         sum_confidence_values = { :predicted_values => [], :actual_values => [], :confidence_values => []}
         
         (0..validation_set.size-1).each do |i|
-          confidence_values = validation_set.get(i).get_predictions.get_prediction_values(class_value)
+          confidence_values = validation_set.get(i).get_predictions.get_prediction_values(actual_accept_value, predicted_accept_value)
           sum_confidence_values[:predicted_values] += confidence_values[:predicted_values]
           sum_confidence_values[:confidence_values] += confidence_values[:confidence_values]
           sum_confidence_values[:actual_values] += confidence_values[:actual_values]
@@ -346,7 +382,7 @@ module Reports
         return { :names => names, :performance => performance, :confidence => confidence, :faint => faint }
         
       else
-        confidence_values = validation_set.validations[0].get_predictions.get_prediction_values(class_value)
+        confidence_values = validation_set.validations[0].get_predictions.get_prediction_values(actual_accept_value, predicted_accept_value)
         pref_conf_rates = get_performance_confidence_rates(confidence_values, validation_set.unique_feature_type)
         return { :names => [""], :performance => [pref_conf_rates[:performance]], :confidence => [pref_conf_rates[:confidence]] }
       end
@@ -357,8 +393,7 @@ module Reports
 #                    :predicted_values =>  [1, 0, 0, 1, 0, 1],
 #                    :actual_values =>     [0, 1, 0, 0, 1, 1]}
       roc_values = {:confidence_values => [0.9, 0.8, 0.7, 0.6, 0.5, 0.4], 
-                    :predicted_values =>  [1, 1, 1, 1, 1, 1],
-                    :actual_values =>     [1, 0, 1, 0, 1, 0]}
+                    :true_positives =>    [1, 1, 1, 0, 1, 0]}
       tp_fp_rates = get_tp_fp_rates(roc_values)
       labels = []
       tp_fp_rates[:youden].each do |point,confidence|
@@ -431,16 +466,15 @@ module Reports
     def self.get_tp_fp_rates(roc_values)
       
       c = roc_values[:confidence_values]
-      p = roc_values[:predicted_values]
-      a = roc_values[:actual_values]
-      raise "no prediction values for roc-plot" if p.size==0
+      tp = roc_values[:true_positives]
+      raise "no prediction values for roc-plot" if tp.size==0
       
       # hack for painting perfect/worst roc curve, otherwhise fp/tp-rate will always be 100%
       # determine if perfect/worst roc curve
       fp_found = false
       tp_found = false
-      (0..p.size-1).each do |i|
-        if a[i]!=p[i]
+      (0..tp.size-1).each do |i|
+        if tp[i]==0
           fp_found |= true
         else
           tp_found |=true
@@ -448,28 +482,26 @@ module Reports
         break if tp_found and fp_found
       end
       unless fp_found and tp_found #if perfect/worst add wrong/right instance with lowest confidence
-        a << (tp_found ? 0 : 1)
-        p << 1
+        tp << (tp_found ? 0 : 1)
         c << -Float::MAX
       end
       
-      (0..p.size-2).each do |i|
-        ((i+1)..p.size-1).each do |j|
+      (0..tp.size-2).each do |i|
+        ((i+1)..tp.size-1).each do |j|
           if c[i]<c[j]
             c.swap!(i,j)
-            a.swap!(i,j)
-            p.swap!(i,j)
+            tp.swap!(i,j)
           end
         end
       end
-      #puts c.inspect+"\n"+a.inspect+"\n"+p.inspect+"\n\n"
+      #puts c.inspect+"\n"+tp.inspect+"\n\n"
       
       tp_rate = [0]
       fp_rate = [0]
       w = [1]
       c2 = [Float::MAX]
-      (0..p.size-1).each do |i|
-        if a[i]==p[i]
+      (0..tp.size-1).each do |i|
+        if tp[i]==1
           tp_rate << tp_rate[-1]+1
           fp_rate << fp_rate[-1]
         else
diff --git a/report/report_application.rb b/report/report_application.rb
index b96fb27..5fec6d1 100755
--- a/report/report_application.rb
+++ b/report/report_application.rb
@@ -50,6 +50,10 @@ get '/report/?' do
   end
 end
 
+def wrap(s, width=78)
+  s.gsub(/(.{1,#{width}})(\s+|\Z)/, "\\1\n")
+end
+
 get '/report/:report_type' do
   perform do |rs|
     case request.env['HTTP_ACCEPT'].to_s
@@ -60,8 +64,15 @@ get '/report/:report_type' do
         "Crossvalidations:       "+url_for("/crossvalidation",:full)
       description = 
         "A list of all "+params[:report_type]+" reports. To create a report, use the POST method."
+      if params[:report_type]=="algorithm_comparison"
+        description += "\n\nThis report can be used to compare the validation results of different algorithms that have been validated on the same dataset."
+        description += "\nThe following attributes can be compared with the t-test:"
+        description += "\n\n* All validation types:\n"+wrap((Validation::VAL_PROPS_SUM+Validation::VAL_PROPS_AVG).join(", "),120)
+        description += "\n* Classification validations:\n"+wrap(Validation::VAL_CLASS_PROPS.join(", "),120)
+        description += "\n* Regresssion validations:\n"+wrap(Validation::VAL_REGR_PROPS.join(", "),120)
+      end
+        
       post_params = [[:validation_uris]]
-      
       post_command = OpenTox::PostCommand.new request.url,"Create validation report"
       val_uri_description = params[:report_type]=="algorithm_comparison" ? "Separate multiple uris with ','" : nil
       # trick for easy report creation
diff --git a/report/report_content.rb b/report/report_content.rb
index 8c437a8..8d6d44b 100755
--- a/report/report_content.rb
+++ b/report/report_content.rb
@@ -179,13 +179,14 @@ class Reports::ReportContent
   def add_roc_plot( validation_set, 
                     accept_value, 
                     split_set_attribute=nil, 
-                    image_title = "ROC Plot", 
+                    image_title = nil, 
                     section_text="")
                             
     #section_roc = @xml_report.add_section(@current_section, section_title)
     section_roc = @current_section
     prediction_set = validation_set.collect{ |v| v.get_predictions && v.get_predictions.confidence_values_available? }
-        
+    image_title = "ROC Plot (true class is '"+accept_value.to_s+"')" unless image_title
+    
     if prediction_set.size>0
       if prediction_set.size!=validation_set.size
         section_text += "\nWARNING: roc plot information not available for all validation results"
@@ -212,9 +213,10 @@ class Reports::ReportContent
   end
   
   def add_confidence_plot( validation_set,
-                            accept_value = nil,
+                            actual_accept_value = nil,
+                            predicted_accept_value = nil,
                             split_set_attribute = nil,
-                            image_title = "Percent Correct vs Confidence Plot",
+                            image_title = "Confidence Plot",
                             section_text="")
                             
     #section_conf = @xml_report.add_section(@current_section, section_title)
@@ -232,7 +234,7 @@ class Reports::ReportContent
       begin
         plot_png = add_tmp_file("conf_plot", "png")
         plot_svg = add_tmp_file("conf_plot", "svg")
-        Reports::PlotFactory.create_confidence_plot( [plot_png[:path], plot_svg[:path]], prediction_set, accept_value, split_set_attribute, false )
+        Reports::PlotFactory.create_confidence_plot( [plot_png[:path], plot_svg[:path]], prediction_set, actual_accept_value, predicted_accept_value, split_set_attribute, false )
         @xml_report.add_imagefigure(section_conf, image_title, plot_png[:name], "PNG", 100, plot_svg[:name])
       rescue Exception => ex
         msg = "WARNING could not create confidence plot: "+ex.message
diff --git a/report/report_factory.rb b/report/report_factory.rb
index 340f276..9995b42 100755
--- a/report/report_factory.rb
+++ b/report/report_factory.rb
@@ -5,19 +5,19 @@ VAL_ATTR_TRAIN_TEST = [ :model_uri, :training_dataset_uri, :test_dataset_uri, :p
 VAL_ATTR_CV = [ :algorithm_uri, :dataset_uri, :num_folds, :crossvalidation_fold ]
 
 # selected attributes of interest when performing classification
-VAL_ATTR_CLASS = [ :num_instances, :num_unpredicted, :accuracy, :weighted_accuracy, :weighted_area_under_roc,
-  :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ]
+VAL_ATTR_CLASS = [ :num_instances, :num_unpredicted, :accuracy, :weighted_accuracy, :average_area_under_roc,
+  :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate, :positive_predictive_value, :negative_predictive_value ]
 VAL_ATTR_REGR = [ :num_instances, :num_unpredicted, :root_mean_squared_error, 
   :weighted_root_mean_squared_error, :mean_absolute_error, :weighted_mean_absolute_error, :r_square, :weighted_r_square,
   :sample_correlation_coefficient ]
 
-#VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :weighted_area_under_roc, 
+#VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :average_area_under_roc, 
 #  :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ]
-VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :f_measure, :true_positive_rate, :true_negative_rate ]
+VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :f_measure, :true_positive_rate, :true_negative_rate, :positive_predictive_value, :negative_predictive_value ]
 VAL_ATTR_BAR_PLOT_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square ]
 
-VAL_ATTR_TTEST_REGR = [:r_square, :root_mean_squared_error]
-VAL_ATTR_TTEST_CLASS = [:percent_correct, :weighted_area_under_roc]
+VAL_ATTR_TTEST_REGR = [ :r_square, :root_mean_squared_error ]
+VAL_ATTR_TTEST_CLASS = [ :accuracy, :average_area_under_roc ]
 
 
 # = Reports::ReportFactory 
@@ -76,11 +76,20 @@ module Reports::ReportFactory
       report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_CLASS, "Results", "Results")
       report.add_confusion_matrix(val)
       report.add_section("Plots")
-      ([nil] + validation_set.get_accept_values).each do |accept_value|
-        report.add_roc_plot(validation_set, accept_value)
-        report.add_confidence_plot(validation_set, accept_value)
-        title = accept_value ? "Plots for predicted class-value '"+accept_value.to_s+"'" : "Plots for all predictions"
-        report.align_last_two_images title
+      if (validation_set.get_accept_values.size == 2)
+        if validation_set.get_true_accept_value!=nil
+          report.add_roc_plot(validation_set, validation_set.get_true_accept_value)
+        else
+          report.add_roc_plot(validation_set, validation_set.get_accept_values[0])
+          report.add_roc_plot(validation_set, validation_set.get_accept_values[1])
+          report.align_last_two_images "ROC Plots"
+        end
+      end
+      report.add_confidence_plot(validation_set)
+      validation_set.get_accept_values.each do |accept_value|
+        report.add_confidence_plot(validation_set, accept_value, nil)
+        report.add_confidence_plot(validation_set, nil, accept_value)
+        report.align_last_two_images "Confidence Plots"
       end
       report.end_section
     when "regression"
@@ -127,12 +136,21 @@ module Reports::ReportFactory
       report.add_confusion_matrix(cv_set.validations[0])
       report.add_section("Plots")
       [nil, :crossvalidation_fold].each do |split_attribute|
-        ([nil] + validation_set.get_accept_values).each do |accept_value|
-          report.add_roc_plot(validation_set, accept_value, split_attribute)
-          report.add_confidence_plot(validation_set, accept_value, split_attribute)
-          title = accept_value ? "Plots for predicted class-value '"+accept_value.to_s+"'" : "Plots for all predictions"
-          title += split_attribute ? ", separated by crossvalidation fold" : " (accumulated over all folds)"
-          report.align_last_two_images title
+        
+        if (validation_set.get_accept_values.size == 2)
+          if validation_set.get_true_accept_value!=nil
+            report.add_roc_plot(validation_set, validation_set.get_true_accept_value,split_attribute)
+          else
+            report.add_roc_plot(validation_set, validation_set.get_accept_values[0],split_attribute)
+            report.add_roc_plot(validation_set, validation_set.get_accept_values[1],split_attribute)
+            report.align_last_two_images "ROC Plots"
+          end
+        end
+        report.add_confidence_plot(validation_set,nil,nil,split_attribute)
+        validation_set.get_accept_values.each do |accept_value|
+          report.add_confidence_plot(validation_set, accept_value, nil,split_attribute)
+          report.add_confidence_plot(validation_set, nil, accept_value,split_attribute)
+          report.align_last_two_images "Confidence Plots"
         end
       end
       report.end_section
@@ -199,8 +217,8 @@ module Reports::ReportFactory
     if (validation_set.num_different_values(:dataset_uri)>1)
       all_merged = validation_set.merge([:algorithm_uri, :dataset_uri, :crossvalidation_id, :crossvalidation_uri])
       report.add_ranking_plots(all_merged, :algorithm_uri, :dataset_uri,
-        [:percent_correct, :weighted_area_under_roc, :true_positive_rate, :true_negative_rate] )
-      report.add_result_overview(all_merged, :algorithm_uri, :dataset_uri, [:percent_correct, :weighted_area_under_roc, :true_positive_rate, :true_negative_rate])
+        [:percent_correct, :average_area_under_roc, :true_positive_rate, :true_negative_rate] )
+      report.add_result_overview(all_merged, :algorithm_uri, :dataset_uri, [:percent_correct, :average_area_under_roc, :true_positive_rate, :true_negative_rate])
     end
       
     result_attributes = [:identifier,:crossvalidation_uri,:crossvalidation_report_uri]+VAL_ATTR_CV-[:crossvalidation_fold,:num_folds,:dataset_uri]
@@ -222,6 +240,12 @@ module Reports::ReportFactory
     if params[:ttest_significance]
       ttest_significance = params[:ttest_significance].to_f
     end
+    
+    bar_plot_attributes += ttest_attributes
+    bar_plot_attributes.uniq!
+    
+    result_attributes += ttest_attributes
+    result_attributes.uniq!
       
     dataset_grouping.each do |validations|
     
diff --git a/report/validation_data.rb b/report/validation_data.rb
index aa146a6..f5ecae7 100755
--- a/report/validation_data.rb
+++ b/report/validation_data.rb
@@ -1,9 +1,9 @@
 
 # the variance is computed when merging results for these attributes 
 VAL_ATTR_VARIANCE = [ :area_under_roc, :percent_correct, :root_mean_squared_error, :mean_absolute_error, 
-  :r_square, :accuracy, :weighted_area_under_roc, :weighted_accuracy, :weighted_root_mean_squared_error, :weighted_mean_absolute_error, 
+  :r_square, :accuracy, :average_area_under_roc, :weighted_accuracy, :weighted_root_mean_squared_error, :weighted_mean_absolute_error, 
   :weighted_r_square  ]
-VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate, :weighted_area_under_roc, :accuracy, :f_measure ]
+VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate, :average_area_under_roc, :accuracy, :f_measure ]
 
 ATTR_NICE_NAME = {}
 
@@ -263,6 +263,18 @@ module Reports
       return unique_value("get_accept_values")
     end
     
+    def get_true_accept_value()
+      accept_values = get_accept_values()
+      if accept_values.size==2
+        if (accept_values[0] =~ TRUE_REGEXP and !(accept_values[1] =~ TRUE_REGEXP))
+          return accept_values[0]
+        elsif (accept_values[1] =~ TRUE_REGEXP and !(accept_values[0] =~ TRUE_REGEXP))
+          return accept_values[1]
+        end 
+      end
+      nil
+    end
+    
     def get_accept_values_for_attr( attribute )
       if !Validation::Validation.classification_property?(attribute)
         []
@@ -270,9 +282,8 @@ module Reports
         accept_values = get_accept_values()
         if !Validation::Validation.depends_on_class_value?(attribute)
           [ nil ]
-        elsif accept_values.size==2 and 
-            Validation::Validation.complement_exists?(attribute)
-          [ accept_values[0] ]
+        elsif accept_values.size==2 and get_true_accept_value()!=nil and Validation::Validation.complement_exists?(attribute)
+          [ get_true_accept_value() ]
         else
           accept_values
         end
diff --git a/validation/validation_application.rb b/validation/validation_application.rb
index d2dfef0..7e0e10f 100755
--- a/validation/validation_application.rb
+++ b/validation/validation_application.rb
@@ -8,6 +8,12 @@ require 'validation/validation_service.rb'
 
 get '/crossvalidation/?' do
   LOGGER.info "list all crossvalidations"
+  model_uri = params.delete("model") || params.delete("model_uri")
+  if model_uri
+    model = OpenTox::Model::Generic.find(model_uri, @subjectid)
+    params[:algorithm] = model.metadata[OT.algorithm]
+    params[:dataset] = model.metadata[OT.trainingDataset]
+  end
   uri_list = Lib::OhmUtil.find( Validation::Crossvalidation, params ).sort.collect{|v| v.crossvalidation_uri}.join("\n") + "\n"
   if request.env['HTTP_ACCEPT'] =~ /text\/html/
     related_links = 
@@ -400,7 +406,14 @@ get '/bootstrapping' do
       "All validations:    "+url_for("/",:full)+"\n"+
       "Validation reports: "+url_for("/report/validation",:full)
     description = 
-        "A list of all bootstrapping-validations.\n"+
+        "A list of all bootstrapping-validations.\n\n"+
+        "Bootstrapping performs sampling with replacement to create a training dataset and test dataset from the orignial dataset.\n"+
+        "Subsequently, a model is built with the training dataset and validated on the test-dataset.\n\n"+
+        "Quote from R Kohavi - A study of cross-validation and bootstrap for accuracy estimation and model selection,\n"+
+        "International joint Conference on artificial intelligence, 1995:\n"+
+        "'Given a dataset of size n, a bootstrap sample is created by sampling n instances uniformly from the data (with replacement).\n"+
+        " Since the dataset is sampled with replacement, the probability of any given instance not being chosen after n samples is (1 - 1/n)^n = e^-1 = 0.368;\n"+
+        " the expected number of distinct instances from the original dataset appearing in the test set is thus 0.632n.'\n\n"+
         "To perform a bootstrapping-validation use the POST method."
     post_command = OpenTox::PostCommand.new request.url,"Perform bootstrapping-validation"
     post_command.attributes << OpenTox::PostAttribute.new("algorithm_uri")
author	mr <mr@mrautenberg.de>	2011-09-23 12:22:40 +0200
committer	mr <mr@mrautenberg.de>	2011-09-23 12:22:40 +0200
commit	41b1e72eba222a337efa70e61734b34118f832c6 (patch)
tree	15645033d7cf516f9c19cc11ec9d16bbdba43327
parent	b076615734eb9a51a29e3dc01e0c96d86e5c75c9 (diff)
parent	9db1f68871ad3e9be92744fd908f9fee9eeb18a0 (diff)