From bad2d7444ab40a59770678c0b0e4057d5edeceef Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 18 Aug 2011 12:57:33 +0200 Subject: add confidence plots for various classification stats --- report/plot_factory.rb | 53 ++++++++++++++++++++++++++++++++++++++++-------- report/report_content.rb | 7 ++++--- report/report_factory.rb | 16 +++++++++++++-- 3 files changed, 63 insertions(+), 13 deletions(-) (limited to 'report') diff --git a/report/plot_factory.rb b/report/plot_factory.rb index 27e934d..2074ce5 100644 --- a/report/plot_factory.rb +++ b/report/plot_factory.rb @@ -130,8 +130,43 @@ module Reports end end + def self.confidence_plot_class_performance( validation_set, actual_accept_value, predicted_accept_value ) + true_class = nil + if actual_accept_value==nil and predicted_accept_value==nil + perf = "Accuracy" + elsif actual_accept_value!=nil + if validation_set.get_true_accept_value==actual_accept_value + perf = "True Positive Rate" + true_class = actual_accept_value + elsif validation_set.get_accept_values.size==2 and validation_set.get_true_accept_value==(validation_set.get_accept_values-[actual_accept_value])[0] + perf = "True Negative Rate" + true_class = validation_set.get_true_accept_value + else + perf = "True Positive Rate" + true_class = actual_accept_value + end + elsif predicted_accept_value!=nil + if validation_set.get_true_accept_value==predicted_accept_value + perf = "Positive Predictive Value" + true_class = predicted_accept_value + elsif validation_set.get_accept_values.size==2 and validation_set.get_true_accept_value==(validation_set.get_accept_values-[predicted_accept_value])[0] + perf = "Negative Predictive Value" + true_class = validation_set.get_true_accept_value + else + perf = "Positive Predictive Value" + true_class = predicted_accept_value + end + end + title = perf+" vs Confidence Plot" + title += " (with True-Class: '"+true_class.to_s+"')" if true_class!=nil + {:title =>title, :performance => perf} + end + - def self.create_confidence_plot( out_files, validation_set, class_value, split_set_attribute=nil, show_single_curves=false ) + def self.create_confidence_plot( out_files, validation_set, actual_accept_value = nil, + predicted_accept_value = nil, split_set_attribute=nil, show_single_curves=false ) + + raise "param combination not supported" if actual_accept_value!=nil and predicted_accept_value!=nil out_files = [out_files] unless out_files.is_a?(Array) LOGGER.debug "creating confidence plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_files.inspect @@ -143,7 +178,7 @@ module Reports performance = [] attribute_values.each do |value| begin - data = transform_confidence_predictions(validation_set.filter({split_set_attribute => value}), class_value, false) + data = transform_confidence_predictions(validation_set.filter({split_set_attribute => value}), actual_accept_value, predicted_accept_value, false) names << split_set_attribute.to_s.nice_attr+" "+value.to_s confidence << data[:confidence][0] performance << data[:performance][0] @@ -155,17 +190,19 @@ module Reports out_files.each do |out_file| case validation_set.unique_feature_type when "classification" - RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", names, confidence, performance) + info = confidence_plot_class_performance( validation_set, actual_accept_value, predicted_accept_value ) + RubyPlot::accuracy_confidence_plot(out_file, info[:title], "Confidence", info[:performance], names, confidence, performance) when "regression" RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", names, confidence, performance, true) end end else - data = transform_confidence_predictions(validation_set, class_value, show_single_curves) + data = transform_confidence_predictions(validation_set, actual_accept_value, predicted_accept_value, show_single_curves) out_files.each do |out_file| case validation_set.unique_feature_type when "classification" - RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", data[:names], data[:confidence], data[:performance]) + info = confidence_plot_class_performance( validation_set, actual_accept_value, predicted_accept_value ) + RubyPlot::accuracy_confidence_plot(out_file, info[:title], "Confidence", info[:performance], data[:names], data[:confidence], data[:performance]) when "regression" RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", data[:names], data[:confidence], data[:performance], true) end @@ -312,7 +349,7 @@ module Reports end - def self.transform_confidence_predictions(validation_set, class_value, add_single_folds=false) + def self.transform_confidence_predictions(validation_set, actual_accept_value, predicted_accept_value, add_single_folds=false) if (validation_set.size > 1) @@ -320,7 +357,7 @@ module Reports sum_confidence_values = { :predicted_values => [], :actual_values => [], :confidence_values => []} (0..validation_set.size-1).each do |i| - confidence_values = validation_set.get(i).get_predictions.get_prediction_values(class_value) + confidence_values = validation_set.get(i).get_predictions.get_prediction_values(actual_accept_value, predicted_accept_value) sum_confidence_values[:predicted_values] += confidence_values[:predicted_values] sum_confidence_values[:confidence_values] += confidence_values[:confidence_values] sum_confidence_values[:actual_values] += confidence_values[:actual_values] @@ -345,7 +382,7 @@ module Reports return { :names => names, :performance => performance, :confidence => confidence, :faint => faint } else - confidence_values = validation_set.validations[0].get_predictions.get_prediction_values(class_value) + confidence_values = validation_set.validations[0].get_predictions.get_prediction_values(actual_accept_value, predicted_accept_value) pref_conf_rates = get_performance_confidence_rates(confidence_values, validation_set.unique_feature_type) return { :names => [""], :performance => [pref_conf_rates[:performance]], :confidence => [pref_conf_rates[:confidence]] } end diff --git a/report/report_content.rb b/report/report_content.rb index 9c33038..8d6d44b 100755 --- a/report/report_content.rb +++ b/report/report_content.rb @@ -213,9 +213,10 @@ class Reports::ReportContent end def add_confidence_plot( validation_set, - accept_value = nil, + actual_accept_value = nil, + predicted_accept_value = nil, split_set_attribute = nil, - image_title = "Percent Correct vs Confidence Plot", + image_title = "Confidence Plot", section_text="") #section_conf = @xml_report.add_section(@current_section, section_title) @@ -233,7 +234,7 @@ class Reports::ReportContent begin plot_png = add_tmp_file("conf_plot", "png") plot_svg = add_tmp_file("conf_plot", "svg") - Reports::PlotFactory.create_confidence_plot( [plot_png[:path], plot_svg[:path]], prediction_set, accept_value, split_set_attribute, false ) + Reports::PlotFactory.create_confidence_plot( [plot_png[:path], plot_svg[:path]], prediction_set, actual_accept_value, predicted_accept_value, split_set_attribute, false ) @xml_report.add_imagefigure(section_conf, image_title, plot_png[:name], "PNG", 100, plot_svg[:name]) rescue Exception => ex msg = "WARNING could not create confidence plot: "+ex.message diff --git a/report/report_factory.rb b/report/report_factory.rb index e3e0f3c..2a50869 100755 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -76,15 +76,21 @@ module Reports::ReportFactory report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_CLASS, "Results", "Results") report.add_confusion_matrix(val) report.add_section("Plots") - report.add_confidence_plot(validation_set) if (validation_set.get_accept_values.size == 2) if validation_set.get_true_accept_value!=nil report.add_roc_plot(validation_set, validation_set.get_true_accept_value) else report.add_roc_plot(validation_set, validation_set.get_accept_values[0]) report.add_roc_plot(validation_set, validation_set.get_accept_values[1]) + report.align_last_two_images "ROC Plots" end end + report.add_confidence_plot(validation_set) + validation_set.get_accept_values.each do |accept_value| + report.add_confidence_plot(validation_set, accept_value, nil) + report.add_confidence_plot(validation_set, nil, accept_value) + report.align_last_two_images "Confidence Plots" + end report.end_section when "regression" report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_REGR, "Results", "Results") @@ -131,15 +137,21 @@ module Reports::ReportFactory report.add_section("Plots") [nil, :crossvalidation_fold].each do |split_attribute| - report.add_confidence_plot(validation_set,nil,split_attribute) if (validation_set.get_accept_values.size == 2) if validation_set.get_true_accept_value!=nil report.add_roc_plot(validation_set, validation_set.get_true_accept_value,split_attribute) else report.add_roc_plot(validation_set, validation_set.get_accept_values[0],split_attribute) report.add_roc_plot(validation_set, validation_set.get_accept_values[1],split_attribute) + report.align_last_two_images "ROC Plots" end end + report.add_confidence_plot(validation_set,nil,nil,split_attribute) + validation_set.get_accept_values.each do |accept_value| + report.add_confidence_plot(validation_set, accept_value, nil,split_attribute) + report.add_confidence_plot(validation_set, nil, accept_value,split_attribute) + report.align_last_two_images "Confidence Plots" + end end report.end_section report.add_result(validation_set, [:validation_uri, :validation_report_uri]+VAL_ATTR_CV+VAL_ATTR_CLASS-[:num_folds, :dataset_uri, :algorithm_uri], -- cgit v1.2.3