From bad2d7444ab40a59770678c0b0e4057d5edeceef Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 18 Aug 2011 12:57:33 +0200 Subject: add confidence plots for various classification stats --- lib/predictions.rb | 13 ++++++++---- report/plot_factory.rb | 53 ++++++++++++++++++++++++++++++++++++++++-------- report/report_content.rb | 7 ++++--- report/report_factory.rb | 16 +++++++++++++-- 4 files changed, 72 insertions(+), 17 deletions(-) diff --git a/lib/predictions.rb b/lib/predictions.rb index bfb25da..7de1751 100755 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -587,19 +587,24 @@ module Lib return h end - def get_prediction_values(class_value) + def get_prediction_values(actual_accept_value, predicted_accept_value) #puts "get_roc_values for class_value: "+class_value.to_s raise "no confidence values" unless confidence_values_available? #raise "no class-value specified" if class_value==nil - class_index = @accept_values.index(class_value) if class_value!=nil - raise "class not found "+class_value.to_s if (class_value!=nil && class_index==nil) + actual_class_index = @accept_values.index(actual_accept_value) if actual_accept_value!=nil + raise "class not found '"+actual_accept_value.to_s+"' in "+@accept_values.inspect if (actual_accept_value!=nil && actual_class_index==nil) + + predicted_class_index = @accept_values.index(predicted_accept_value) if predicted_accept_value!=nil + raise "class not found "+predicted_accept_value.to_s+" in "+@accept_values.inspect if (predicted_accept_value!=nil && predicted_class_index==nil) c = []; p = []; a = [] (0..@predicted_values.size-1).each do |i| # NOTE: not predicted instances are ignored here - if @predicted_values[i]!=nil and (class_index==nil || @predicted_values[i]==class_index) + if @predicted_values[i]!=nil and + (predicted_class_index==nil || @predicted_values[i]==predicted_class_index) and + (actual_class_index==nil || @actual_values[i]==actual_class_index) c << @confidence_values[i] p << @predicted_values[i] a << @actual_values[i] diff --git a/report/plot_factory.rb b/report/plot_factory.rb index 27e934d..2074ce5 100644 --- a/report/plot_factory.rb +++ b/report/plot_factory.rb @@ -130,8 +130,43 @@ module Reports end end + def self.confidence_plot_class_performance( validation_set, actual_accept_value, predicted_accept_value ) + true_class = nil + if actual_accept_value==nil and predicted_accept_value==nil + perf = "Accuracy" + elsif actual_accept_value!=nil + if validation_set.get_true_accept_value==actual_accept_value + perf = "True Positive Rate" + true_class = actual_accept_value + elsif validation_set.get_accept_values.size==2 and validation_set.get_true_accept_value==(validation_set.get_accept_values-[actual_accept_value])[0] + perf = "True Negative Rate" + true_class = validation_set.get_true_accept_value + else + perf = "True Positive Rate" + true_class = actual_accept_value + end + elsif predicted_accept_value!=nil + if validation_set.get_true_accept_value==predicted_accept_value + perf = "Positive Predictive Value" + true_class = predicted_accept_value + elsif validation_set.get_accept_values.size==2 and validation_set.get_true_accept_value==(validation_set.get_accept_values-[predicted_accept_value])[0] + perf = "Negative Predictive Value" + true_class = validation_set.get_true_accept_value + else + perf = "Positive Predictive Value" + true_class = predicted_accept_value + end + end + title = perf+" vs Confidence Plot" + title += " (with True-Class: '"+true_class.to_s+"')" if true_class!=nil + {:title =>title, :performance => perf} + end + - def self.create_confidence_plot( out_files, validation_set, class_value, split_set_attribute=nil, show_single_curves=false ) + def self.create_confidence_plot( out_files, validation_set, actual_accept_value = nil, + predicted_accept_value = nil, split_set_attribute=nil, show_single_curves=false ) + + raise "param combination not supported" if actual_accept_value!=nil and predicted_accept_value!=nil out_files = [out_files] unless out_files.is_a?(Array) LOGGER.debug "creating confidence plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_files.inspect @@ -143,7 +178,7 @@ module Reports performance = [] attribute_values.each do |value| begin - data = transform_confidence_predictions(validation_set.filter({split_set_attribute => value}), class_value, false) + data = transform_confidence_predictions(validation_set.filter({split_set_attribute => value}), actual_accept_value, predicted_accept_value, false) names << split_set_attribute.to_s.nice_attr+" "+value.to_s confidence << data[:confidence][0] performance << data[:performance][0] @@ -155,17 +190,19 @@ module Reports out_files.each do |out_file| case validation_set.unique_feature_type when "classification" - RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", names, confidence, performance) + info = confidence_plot_class_performance( validation_set, actual_accept_value, predicted_accept_value ) + RubyPlot::accuracy_confidence_plot(out_file, info[:title], "Confidence", info[:performance], names, confidence, performance) when "regression" RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", names, confidence, performance, true) end end else - data = transform_confidence_predictions(validation_set, class_value, show_single_curves) + data = transform_confidence_predictions(validation_set, actual_accept_value, predicted_accept_value, show_single_curves) out_files.each do |out_file| case validation_set.unique_feature_type when "classification" - RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", data[:names], data[:confidence], data[:performance]) + info = confidence_plot_class_performance( validation_set, actual_accept_value, predicted_accept_value ) + RubyPlot::accuracy_confidence_plot(out_file, info[:title], "Confidence", info[:performance], data[:names], data[:confidence], data[:performance]) when "regression" RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", data[:names], data[:confidence], data[:performance], true) end @@ -312,7 +349,7 @@ module Reports end - def self.transform_confidence_predictions(validation_set, class_value, add_single_folds=false) + def self.transform_confidence_predictions(validation_set, actual_accept_value, predicted_accept_value, add_single_folds=false) if (validation_set.size > 1) @@ -320,7 +357,7 @@ module Reports sum_confidence_values = { :predicted_values => [], :actual_values => [], :confidence_values => []} (0..validation_set.size-1).each do |i| - confidence_values = validation_set.get(i).get_predictions.get_prediction_values(class_value) + confidence_values = validation_set.get(i).get_predictions.get_prediction_values(actual_accept_value, predicted_accept_value) sum_confidence_values[:predicted_values] += confidence_values[:predicted_values] sum_confidence_values[:confidence_values] += confidence_values[:confidence_values] sum_confidence_values[:actual_values] += confidence_values[:actual_values] @@ -345,7 +382,7 @@ module Reports return { :names => names, :performance => performance, :confidence => confidence, :faint => faint } else - confidence_values = validation_set.validations[0].get_predictions.get_prediction_values(class_value) + confidence_values = validation_set.validations[0].get_predictions.get_prediction_values(actual_accept_value, predicted_accept_value) pref_conf_rates = get_performance_confidence_rates(confidence_values, validation_set.unique_feature_type) return { :names => [""], :performance => [pref_conf_rates[:performance]], :confidence => [pref_conf_rates[:confidence]] } end diff --git a/report/report_content.rb b/report/report_content.rb index 9c33038..8d6d44b 100755 --- a/report/report_content.rb +++ b/report/report_content.rb @@ -213,9 +213,10 @@ class Reports::ReportContent end def add_confidence_plot( validation_set, - accept_value = nil, + actual_accept_value = nil, + predicted_accept_value = nil, split_set_attribute = nil, - image_title = "Percent Correct vs Confidence Plot", + image_title = "Confidence Plot", section_text="") #section_conf = @xml_report.add_section(@current_section, section_title) @@ -233,7 +234,7 @@ class Reports::ReportContent begin plot_png = add_tmp_file("conf_plot", "png") plot_svg = add_tmp_file("conf_plot", "svg") - Reports::PlotFactory.create_confidence_plot( [plot_png[:path], plot_svg[:path]], prediction_set, accept_value, split_set_attribute, false ) + Reports::PlotFactory.create_confidence_plot( [plot_png[:path], plot_svg[:path]], prediction_set, actual_accept_value, predicted_accept_value, split_set_attribute, false ) @xml_report.add_imagefigure(section_conf, image_title, plot_png[:name], "PNG", 100, plot_svg[:name]) rescue Exception => ex msg = "WARNING could not create confidence plot: "+ex.message diff --git a/report/report_factory.rb b/report/report_factory.rb index e3e0f3c..2a50869 100755 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -76,15 +76,21 @@ module Reports::ReportFactory report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_CLASS, "Results", "Results") report.add_confusion_matrix(val) report.add_section("Plots") - report.add_confidence_plot(validation_set) if (validation_set.get_accept_values.size == 2) if validation_set.get_true_accept_value!=nil report.add_roc_plot(validation_set, validation_set.get_true_accept_value) else report.add_roc_plot(validation_set, validation_set.get_accept_values[0]) report.add_roc_plot(validation_set, validation_set.get_accept_values[1]) + report.align_last_two_images "ROC Plots" end end + report.add_confidence_plot(validation_set) + validation_set.get_accept_values.each do |accept_value| + report.add_confidence_plot(validation_set, accept_value, nil) + report.add_confidence_plot(validation_set, nil, accept_value) + report.align_last_two_images "Confidence Plots" + end report.end_section when "regression" report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_REGR, "Results", "Results") @@ -131,15 +137,21 @@ module Reports::ReportFactory report.add_section("Plots") [nil, :crossvalidation_fold].each do |split_attribute| - report.add_confidence_plot(validation_set,nil,split_attribute) if (validation_set.get_accept_values.size == 2) if validation_set.get_true_accept_value!=nil report.add_roc_plot(validation_set, validation_set.get_true_accept_value,split_attribute) else report.add_roc_plot(validation_set, validation_set.get_accept_values[0],split_attribute) report.add_roc_plot(validation_set, validation_set.get_accept_values[1],split_attribute) + report.align_last_two_images "ROC Plots" end end + report.add_confidence_plot(validation_set,nil,nil,split_attribute) + validation_set.get_accept_values.each do |accept_value| + report.add_confidence_plot(validation_set, accept_value, nil,split_attribute) + report.add_confidence_plot(validation_set, nil, accept_value,split_attribute) + report.align_last_two_images "Confidence Plots" + end end report.end_section report.add_result(validation_set, [:validation_uri, :validation_report_uri]+VAL_ATTR_CV+VAL_ATTR_CLASS-[:num_folds, :dataset_uri, :algorithm_uri], -- cgit v1.2.3