fix ROC stuff, rename weighted_auc to average_auc

author: mguetlein <martin.guetlein@gmail.com> 2011-08-18 10:38:51 +0200
committer: mguetlein <martin.guetlein@gmail.com> 2011-08-18 10:38:51 +0200
commit: d27d53d98238ede80fc3b1a0c277ca890a84c736 (patch)
tree: c40f2952c7b569976f5de8e754937e85c9a75ed6 /lib
parent: 01cc1d014f1f9ccdeb5925e3fa7d64b2d06c2085 (diff)
2 files changed, 43 insertions, 18 deletions
diff --git a/lib/predictions.rb b/lib/predictions.rb
index b71359d..bfb25da 100755
--- a/lib/predictions.rb
+++ b/lib/predictions.rb
@@ -254,7 +254,6 @@ module Lib
       return res
     end
     
-    # does only take the instances that are classified as <class-index> into account
     def area_under_roc(class_index=nil)
       return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if 
         class_index==nil
@@ -268,15 +267,16 @@ module Lib
       tp_conf = []
       fp_conf = []
       (0..@predicted_values.size-1).each do |i|
-        if @predicted_values[i]==class_index
-          if @actual_values[i]==@predicted_values[i]
-            tp_conf.push(@confidence_values[i])
+        if @predicted_values[i]!=nil
+          c = @confidence_values[i] * (@predicted_values[i]==class_index ? 1 : -1)
+          if @actual_values[i]==class_index
+            tp_conf << c
           else
-            fp_conf.push(@confidence_values[i])
+            fp_conf << c
           end
         end
       end
-      #puts tp_conf.inspect+"\n"+fp_conf.inspect+"\n\n"
+      puts tp_conf.inspect+"\n"+fp_conf.inspect+"\n\n"
       
       return 0.0 if tp_conf.size == 0
       return 1.0 if fp_conf.size == 0
@@ -432,22 +432,18 @@ module Lib
       return incorrect
     end
     
-    # Note:
-    # * (un-weighted) area under roc is computed with all __predicted__ isntances for a certain class
-    # * weighted weights each auc with the number of __acutal__ instances
-    # its like that, because its like that in weka   
-    def weighted_area_under_roc
-      w_auc = weighted_measure( :area_under_roc )
+    def average_area_under_roc
+      w_auc = average_measure( :area_under_roc )
       w_auc.nan? ? 0 : w_auc
     end
     
-    def weighted_f_measure
-      return weighted_measure( :f_measure )
+    def average_f_measure
+      return average_measure( :f_measure )
     end
     
     private
-    # the <measure> is weighted with the number of instances for each actual class value 
-    def weighted_measure( measure )
+    # the <measure> is averaged over the number of instances for each actual class value 
+    def average_measure( measure )
       
       sum_instances = 0
       num_instances_per_class = Array.new(@num_classes, 0)
@@ -562,6 +558,35 @@ module Lib
 
     # data for (roc-)plots ###################################################################################
     
+     def get_roc_prediction_values(class_value)
+      
+      #puts "get_roc_values for class_value: "+class_value.to_s
+      raise "no confidence values" unless confidence_values_available?
+      raise "no class-value specified" if class_value==nil
+      
+      class_index = @accept_values.index(class_value) if class_value!=nil
+      raise "class not found "+class_value.to_s if (class_value!=nil && class_index==nil)
+      
+      c = []; tp = []
+      (0..@predicted_values.size-1).each do |i|
+        if @predicted_values[i]!=nil
+          c << @confidence_values[i] * (@predicted_values[i]==class_index ? 1 : -1)
+          if (@actual_values[i]==class_index)
+            tp << 1
+          else
+            tp << 0
+          end
+        end
+      end
+      
+      # DO NOT raise exception here, maybe different validations are concated
+      #raise "no instance predicted as '"+class_value+"'" if p.size == 0
+      
+      h = {:true_positives => tp, :confidence_values => c}
+      #puts h.inspect
+      return h
+    end
+    
     def get_prediction_values(class_value)
       
       #puts "get_roc_values for class_value: "+class_value.to_s
diff --git a/lib/validation_db.rb b/lib/validation_db.rb
index fb7a8b5..9af43de 100755
--- a/lib/validation_db.rb
+++ b/lib/validation_db.rb
@@ -18,7 +18,7 @@ module Validation
   # :classification_statistics
   VAL_CLASS_PROPS_SINGLE_SUM = [ :num_correct, :num_incorrect, :confusion_matrix  ]
   VAL_CLASS_PROPS_SINGLE_AVG = [ :percent_correct, :percent_incorrect, 
-    :weighted_area_under_roc, :accuracy, :weighted_accuracy ] 
+    :average_area_under_roc, :accuracy, :weighted_accuracy ] 
   VAL_CLASS_PROPS_SINGLE = VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_SINGLE_AVG
   
   # :class_value_statistics
@@ -30,7 +30,7 @@ module Validation
   VAL_CLASS_PROPS_PER_CLASS = VAL_CLASS_PROPS_PER_CLASS_SUM + VAL_CLASS_PROPS_PER_CLASS_AVG
   VAL_CLASS_PROPS_PER_CLASS_COMPLEMENT_EXISTS = [ :num_false_positives, :num_false_negatives, 
                                 :num_true_positives, :num_true_negatives, :false_negative_rate, :false_positive_rate,
-                                :true_negative_rate, :true_positive_rate ] #:precision, :recall, 
+                                :true_negative_rate, :true_positive_rate, :area_under_roc ] #:precision, :recall, 
                                 
   VAL_CLASS_PROPS = VAL_CLASS_PROPS_SINGLE + VAL_CLASS_PROPS_PER_CLASS
author	mguetlein <martin.guetlein@gmail.com>	2011-08-18 10:38:51 +0200
committer	mguetlein <martin.guetlein@gmail.com>	2011-08-18 10:38:51 +0200
commit	d27d53d98238ede80fc3b1a0c277ca890a84c736 (patch)
tree	c40f2952c7b569976f5de8e754937e85c9a75ed6 /lib
parent	01cc1d014f1f9ccdeb5925e3fa7d64b2d06c2085 (diff)