summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2011-08-18 10:38:51 +0200
committermguetlein <martin.guetlein@gmail.com>2011-08-18 10:38:51 +0200
commitd27d53d98238ede80fc3b1a0c277ca890a84c736 (patch)
treec40f2952c7b569976f5de8e754937e85c9a75ed6
parent01cc1d014f1f9ccdeb5925e3fa7d64b2d06c2085 (diff)
fix ROC stuff, rename weighted_auc to average_auc
-rwxr-xr-xlib/predictions.rb57
-rwxr-xr-xlib/validation_db.rb4
-rwxr-xr-xreach_reports/reach_service.rb2
-rw-r--r--report/plot_factory.rb37
-rwxr-xr-xreport/report_content.rb5
-rwxr-xr-xreport/report_factory.rb36
-rwxr-xr-xreport/validation_data.rb4
7 files changed, 85 insertions, 60 deletions
diff --git a/lib/predictions.rb b/lib/predictions.rb
index b71359d..bfb25da 100755
--- a/lib/predictions.rb
+++ b/lib/predictions.rb
@@ -254,7 +254,6 @@ module Lib
return res
end
- # does only take the instances that are classified as <class-index> into account
def area_under_roc(class_index=nil)
return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if
class_index==nil
@@ -268,15 +267,16 @@ module Lib
tp_conf = []
fp_conf = []
(0..@predicted_values.size-1).each do |i|
- if @predicted_values[i]==class_index
- if @actual_values[i]==@predicted_values[i]
- tp_conf.push(@confidence_values[i])
+ if @predicted_values[i]!=nil
+ c = @confidence_values[i] * (@predicted_values[i]==class_index ? 1 : -1)
+ if @actual_values[i]==class_index
+ tp_conf << c
else
- fp_conf.push(@confidence_values[i])
+ fp_conf << c
end
end
end
- #puts tp_conf.inspect+"\n"+fp_conf.inspect+"\n\n"
+ puts tp_conf.inspect+"\n"+fp_conf.inspect+"\n\n"
return 0.0 if tp_conf.size == 0
return 1.0 if fp_conf.size == 0
@@ -432,22 +432,18 @@ module Lib
return incorrect
end
- # Note:
- # * (un-weighted) area under roc is computed with all __predicted__ isntances for a certain class
- # * weighted weights each auc with the number of __acutal__ instances
- # its like that, because its like that in weka
- def weighted_area_under_roc
- w_auc = weighted_measure( :area_under_roc )
+ def average_area_under_roc
+ w_auc = average_measure( :area_under_roc )
w_auc.nan? ? 0 : w_auc
end
- def weighted_f_measure
- return weighted_measure( :f_measure )
+ def average_f_measure
+ return average_measure( :f_measure )
end
private
- # the <measure> is weighted with the number of instances for each actual class value
- def weighted_measure( measure )
+ # the <measure> is averaged over the number of instances for each actual class value
+ def average_measure( measure )
sum_instances = 0
num_instances_per_class = Array.new(@num_classes, 0)
@@ -562,6 +558,35 @@ module Lib
# data for (roc-)plots ###################################################################################
+ def get_roc_prediction_values(class_value)
+
+ #puts "get_roc_values for class_value: "+class_value.to_s
+ raise "no confidence values" unless confidence_values_available?
+ raise "no class-value specified" if class_value==nil
+
+ class_index = @accept_values.index(class_value) if class_value!=nil
+ raise "class not found "+class_value.to_s if (class_value!=nil && class_index==nil)
+
+ c = []; tp = []
+ (0..@predicted_values.size-1).each do |i|
+ if @predicted_values[i]!=nil
+ c << @confidence_values[i] * (@predicted_values[i]==class_index ? 1 : -1)
+ if (@actual_values[i]==class_index)
+ tp << 1
+ else
+ tp << 0
+ end
+ end
+ end
+
+ # DO NOT raise exception here, maybe different validations are concated
+ #raise "no instance predicted as '"+class_value+"'" if p.size == 0
+
+ h = {:true_positives => tp, :confidence_values => c}
+ #puts h.inspect
+ return h
+ end
+
def get_prediction_values(class_value)
#puts "get_roc_values for class_value: "+class_value.to_s
diff --git a/lib/validation_db.rb b/lib/validation_db.rb
index fb7a8b5..9af43de 100755
--- a/lib/validation_db.rb
+++ b/lib/validation_db.rb
@@ -18,7 +18,7 @@ module Validation
# :classification_statistics
VAL_CLASS_PROPS_SINGLE_SUM = [ :num_correct, :num_incorrect, :confusion_matrix ]
VAL_CLASS_PROPS_SINGLE_AVG = [ :percent_correct, :percent_incorrect,
- :weighted_area_under_roc, :accuracy, :weighted_accuracy ]
+ :average_area_under_roc, :accuracy, :weighted_accuracy ]
VAL_CLASS_PROPS_SINGLE = VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_SINGLE_AVG
# :class_value_statistics
@@ -30,7 +30,7 @@ module Validation
VAL_CLASS_PROPS_PER_CLASS = VAL_CLASS_PROPS_PER_CLASS_SUM + VAL_CLASS_PROPS_PER_CLASS_AVG
VAL_CLASS_PROPS_PER_CLASS_COMPLEMENT_EXISTS = [ :num_false_positives, :num_false_negatives,
:num_true_positives, :num_true_negatives, :false_negative_rate, :false_positive_rate,
- :true_negative_rate, :true_positive_rate ] #:precision, :recall,
+ :true_negative_rate, :true_positive_rate, :area_under_roc ] #:precision, :recall,
VAL_CLASS_PROPS = VAL_CLASS_PROPS_SINGLE + VAL_CLASS_PROPS_PER_CLASS
diff --git a/reach_reports/reach_service.rb b/reach_reports/reach_service.rb
index 2030dbd..bfa760e 100755
--- a/reach_reports/reach_service.rb
+++ b/reach_reports/reach_service.rb
@@ -229,7 +229,7 @@ module ReachReports
case feature_type
when "classification"
v << "percent_correct: "+validation.classification_statistics[:percent_correct].to_s
- v << "weighted AUC: "+validation.classification_statistics[:weighted_area_under_roc].to_s
+ v << "average AUC: "+validation.classification_statistics[:average_area_under_roc].to_s
when "regression"
v << "root_mean_squared_error: "+validation.regression_statistics[:root_mean_squared_error].to_s
v << "r_square "+validation.regression_statistics[:r_square].to_s
diff --git a/report/plot_factory.rb b/report/plot_factory.rb
index bf59960..27e934d 100644
--- a/report/plot_factory.rb
+++ b/report/plot_factory.rb
@@ -294,15 +294,14 @@ module Reports
private
def self.transform_roc_predictions(validation_set, class_value, add_label=true )
if (validation_set.size > 1)
- values = { :predicted_values => [], :actual_values => [], :confidence_values => []}
+ values = { :true_positives => [], :confidence_values => []}
(0..validation_set.size-1).each do |i|
- roc_values = validation_set.get(i).get_predictions.get_prediction_values(class_value)
- values[:predicted_values] += roc_values[:predicted_values]
+ roc_values = validation_set.get(i).get_predictions.get_roc_prediction_values(class_value)
+ values[:true_positives ] += roc_values[:true_positives ]
values[:confidence_values] += roc_values[:confidence_values]
- values[:actual_values] += roc_values[:actual_values]
end
else
- values = validation_set.validations[0].get_predictions.get_prediction_values(class_value)
+ values = validation_set.validations[0].get_predictions.get_roc_prediction_values(class_value)
end
tp_fp_rates = get_tp_fp_rates(values)
labels = []
@@ -357,8 +356,7 @@ module Reports
# :predicted_values => [1, 0, 0, 1, 0, 1],
# :actual_values => [0, 1, 0, 0, 1, 1]}
roc_values = {:confidence_values => [0.9, 0.8, 0.7, 0.6, 0.5, 0.4],
- :predicted_values => [1, 1, 1, 1, 1, 1],
- :actual_values => [1, 0, 1, 0, 1, 0]}
+ :true_positives => [1, 1, 1, 0, 1, 0]}
tp_fp_rates = get_tp_fp_rates(roc_values)
labels = []
tp_fp_rates[:youden].each do |point,confidence|
@@ -431,16 +429,15 @@ module Reports
def self.get_tp_fp_rates(roc_values)
c = roc_values[:confidence_values]
- p = roc_values[:predicted_values]
- a = roc_values[:actual_values]
- raise "no prediction values for roc-plot" if p.size==0
+ tp = roc_values[:true_positives]
+ raise "no prediction values for roc-plot" if tp.size==0
# hack for painting perfect/worst roc curve, otherwhise fp/tp-rate will always be 100%
# determine if perfect/worst roc curve
fp_found = false
tp_found = false
- (0..p.size-1).each do |i|
- if a[i]!=p[i]
+ (0..tp.size-1).each do |i|
+ if tp[i]==0
fp_found |= true
else
tp_found |=true
@@ -448,28 +445,26 @@ module Reports
break if tp_found and fp_found
end
unless fp_found and tp_found #if perfect/worst add wrong/right instance with lowest confidence
- a << (tp_found ? 0 : 1)
- p << 1
+ tp << (tp_found ? 0 : 1)
c << -Float::MAX
end
- (0..p.size-2).each do |i|
- ((i+1)..p.size-1).each do |j|
+ (0..tp.size-2).each do |i|
+ ((i+1)..tp.size-1).each do |j|
if c[i]<c[j]
c.swap!(i,j)
- a.swap!(i,j)
- p.swap!(i,j)
+ tp.swap!(i,j)
end
end
end
- #puts c.inspect+"\n"+a.inspect+"\n"+p.inspect+"\n\n"
+ #puts c.inspect+"\n"+tp.inspect+"\n\n"
tp_rate = [0]
fp_rate = [0]
w = [1]
c2 = [Float::MAX]
- (0..p.size-1).each do |i|
- if a[i]==p[i]
+ (0..tp.size-1).each do |i|
+ if tp[i]==1
tp_rate << tp_rate[-1]+1
fp_rate << fp_rate[-1]
else
diff --git a/report/report_content.rb b/report/report_content.rb
index 8c437a8..9c33038 100755
--- a/report/report_content.rb
+++ b/report/report_content.rb
@@ -179,13 +179,14 @@ class Reports::ReportContent
def add_roc_plot( validation_set,
accept_value,
split_set_attribute=nil,
- image_title = "ROC Plot",
+ image_title = nil,
section_text="")
#section_roc = @xml_report.add_section(@current_section, section_title)
section_roc = @current_section
prediction_set = validation_set.collect{ |v| v.get_predictions && v.get_predictions.confidence_values_available? }
-
+ image_title = "ROC Plot (true class is '"+accept_value.to_s+"')" unless image_title
+
if prediction_set.size>0
if prediction_set.size!=validation_set.size
section_text += "\nWARNING: roc plot information not available for all validation results"
diff --git a/report/report_factory.rb b/report/report_factory.rb
index 340f276..1cf7b94 100755
--- a/report/report_factory.rb
+++ b/report/report_factory.rb
@@ -5,19 +5,19 @@ VAL_ATTR_TRAIN_TEST = [ :model_uri, :training_dataset_uri, :test_dataset_uri, :p
VAL_ATTR_CV = [ :algorithm_uri, :dataset_uri, :num_folds, :crossvalidation_fold ]
# selected attributes of interest when performing classification
-VAL_ATTR_CLASS = [ :num_instances, :num_unpredicted, :accuracy, :weighted_accuracy, :weighted_area_under_roc,
+VAL_ATTR_CLASS = [ :num_instances, :num_unpredicted, :accuracy, :weighted_accuracy, :average_area_under_roc,
:area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ]
VAL_ATTR_REGR = [ :num_instances, :num_unpredicted, :root_mean_squared_error,
:weighted_root_mean_squared_error, :mean_absolute_error, :weighted_mean_absolute_error, :r_square, :weighted_r_square,
:sample_correlation_coefficient ]
-#VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :weighted_area_under_roc,
+#VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :average_area_under_roc,
# :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ]
VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :f_measure, :true_positive_rate, :true_negative_rate ]
VAL_ATTR_BAR_PLOT_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square ]
VAL_ATTR_TTEST_REGR = [:r_square, :root_mean_squared_error]
-VAL_ATTR_TTEST_CLASS = [:percent_correct, :weighted_area_under_roc]
+VAL_ATTR_TTEST_CLASS = [:percent_correct, :average_area_under_roc]
# = Reports::ReportFactory
@@ -76,11 +76,13 @@ module Reports::ReportFactory
report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_CLASS, "Results", "Results")
report.add_confusion_matrix(val)
report.add_section("Plots")
- ([nil] + validation_set.get_accept_values).each do |accept_value|
- report.add_roc_plot(validation_set, accept_value)
- report.add_confidence_plot(validation_set, accept_value)
- title = accept_value ? "Plots for predicted class-value '"+accept_value.to_s+"'" : "Plots for all predictions"
- report.align_last_two_images title
+ report.add_confidence_plot(validation_set)
+ if (validation_set.get_accept_values.size == 2)
+ report.add_roc_plot(validation_set, validation_set.get_accept_values[0])
+ else
+ validation_set.get_accept_values.each do |accept_value|
+ report.add_roc_plot(validation_set, accept_value)
+ end
end
report.end_section
when "regression"
@@ -127,12 +129,14 @@ module Reports::ReportFactory
report.add_confusion_matrix(cv_set.validations[0])
report.add_section("Plots")
[nil, :crossvalidation_fold].each do |split_attribute|
- ([nil] + validation_set.get_accept_values).each do |accept_value|
- report.add_roc_plot(validation_set, accept_value, split_attribute)
- report.add_confidence_plot(validation_set, accept_value, split_attribute)
- title = accept_value ? "Plots for predicted class-value '"+accept_value.to_s+"'" : "Plots for all predictions"
- title += split_attribute ? ", separated by crossvalidation fold" : " (accumulated over all folds)"
- report.align_last_two_images title
+
+ report.add_confidence_plot(validation_set,nil,split_attribute)
+ if (validation_set.get_accept_values.size == 2)
+ report.add_roc_plot(validation_set, validation_set.get_accept_values[0], split_attribute)
+ else
+ validation_set.get_accept_values.each do |accept_value|
+ report.add_roc_plot(validation_set, accept_value, split_attribute)
+ end
end
end
report.end_section
@@ -199,8 +203,8 @@ module Reports::ReportFactory
if (validation_set.num_different_values(:dataset_uri)>1)
all_merged = validation_set.merge([:algorithm_uri, :dataset_uri, :crossvalidation_id, :crossvalidation_uri])
report.add_ranking_plots(all_merged, :algorithm_uri, :dataset_uri,
- [:percent_correct, :weighted_area_under_roc, :true_positive_rate, :true_negative_rate] )
- report.add_result_overview(all_merged, :algorithm_uri, :dataset_uri, [:percent_correct, :weighted_area_under_roc, :true_positive_rate, :true_negative_rate])
+ [:percent_correct, :average_area_under_roc, :true_positive_rate, :true_negative_rate] )
+ report.add_result_overview(all_merged, :algorithm_uri, :dataset_uri, [:percent_correct, :average_area_under_roc, :true_positive_rate, :true_negative_rate])
end
result_attributes = [:identifier,:crossvalidation_uri,:crossvalidation_report_uri]+VAL_ATTR_CV-[:crossvalidation_fold,:num_folds,:dataset_uri]
diff --git a/report/validation_data.rb b/report/validation_data.rb
index aa146a6..b6522b6 100755
--- a/report/validation_data.rb
+++ b/report/validation_data.rb
@@ -1,9 +1,9 @@
# the variance is computed when merging results for these attributes
VAL_ATTR_VARIANCE = [ :area_under_roc, :percent_correct, :root_mean_squared_error, :mean_absolute_error,
- :r_square, :accuracy, :weighted_area_under_roc, :weighted_accuracy, :weighted_root_mean_squared_error, :weighted_mean_absolute_error,
+ :r_square, :accuracy, :average_area_under_roc, :weighted_accuracy, :weighted_root_mean_squared_error, :weighted_mean_absolute_error,
:weighted_r_square ]
-VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate, :weighted_area_under_roc, :accuracy, :f_measure ]
+VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate, :average_area_under_roc, :accuracy, :f_measure ]
ATTR_NICE_NAME = {}