diff options
author | mguetlein <martin.guetlein@gmail.com> | 2011-05-19 17:24:55 +0200 |
---|---|---|
committer | mguetlein <martin.guetlein@gmail.com> | 2011-05-19 17:24:55 +0200 |
commit | b0ca028f1d9eaa30774f8c843c8b2ec8943247f6 (patch) | |
tree | a4d9d0dc24b78dcea39b04c9976deabbaa8367c0 /report | |
parent | ac6a536d12697a98db6847c5229c67372cbbd1e7 (diff) |
extended validation report plotting:
* roc plot for predictions
* confidence plots for class-values
* aligned roc plots and confidence plots
* added confidence label into roc plots
Diffstat (limited to 'report')
-rw-r--r-- | report/plot_factory.rb | 42 | ||||
-rwxr-xr-x | report/report_content.rb | 71 | ||||
-rwxr-xr-x | report/report_factory.rb | 23 | ||||
-rwxr-xr-x | report/xml_report.rb | 66 |
4 files changed, 131 insertions, 71 deletions
diff --git a/report/plot_factory.rb b/report/plot_factory.rb index 74c89f5..7535eb4 100644 --- a/report/plot_factory.rb +++ b/report/plot_factory.rb @@ -91,7 +91,8 @@ module Reports # * the validation set is splitted into sets of validation_sets with equal attribute values # * each of theses validation sets is plotted as a roc-curve # - def self.create_roc_plot( out_file, validation_set, class_value, split_set_attribute=nil, show_single_curves=false ) + def self.create_roc_plot( out_file, validation_set, class_value, split_set_attribute=nil, + x_label="False positive rate", y_label="True Positive Rate", show_single_curves=false ) LOGGER.debug "creating roc plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_file.to_s @@ -110,10 +111,16 @@ module Reports LOGGER.warn "could not create ROC plot for "+value.to_s end end - RubyPlot::plot_lines(out_file, "ROC-Plot", "False positive rate", "True Positive Rate", names, fp_rates, tp_rates ) + RubyPlot::plot_lines(out_file, "ROC-Plot", x_label, y_label, names, fp_rates, tp_rates ) else data = transform_roc_predictions(validation_set, class_value, show_single_curves) - RubyPlot::plot_lines(out_file, "ROC-Plot", "False positive rate", "True Positive Rate", data[:names], data[:fp_rate], data[:tp_rate], data[:faint] ) + labels = [] + data[:youden].each do |points| + points.each do |point,confidence| + labels << ["confidence: "+confidence.to_nice_s, point[0], point[1]] + end + end + RubyPlot::plot_lines(out_file, "ROC-Plot", x_label, y_label, data[:names], data[:fp_rate], data[:tp_rate], data[:faint], labels ) end end @@ -275,7 +282,7 @@ module Reports if (validation_set.size > 1) - names = []; fp_rate = []; tp_rate = []; faint = [] + names = []; fp_rate = []; tp_rate = []; faint = []; youden = [] sum_roc_values = { :predicted_values => [], :actual_values => [], :confidence_values => []} (0..validation_set.size-1).each do |i| @@ -299,12 +306,13 @@ module Reports names << nil # "all" fp_rate << tp_fp_rates[:fp_rate] tp_rate << tp_fp_rates[:tp_rate] + youden << tp_fp_rates[:youden] faint << false - return { :names => names, :fp_rate => fp_rate, :tp_rate => tp_rate, :faint => faint } + return { :names => names, :fp_rate => fp_rate, :tp_rate => tp_rate, :faint => faint, :youden => youden } else roc_values = validation_set.validations[0].get_predictions.get_prediction_values(class_value) tp_fp_rates = get_tp_fp_rates(roc_values) - return { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]] } + return { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]], :youden => [tp_fp_rates[:youden]] } end end @@ -472,13 +480,33 @@ module Reports w = w.compress_sum(c2) #puts tp_rate.inspect+"\n"+fp_rate.inspect+"\n"+w.inspect+"\n\n" + youden = [] + (0..tp_rate.size-1).each do |i| + tpr = tp_rate[i]/tp_rate[-1].to_f + fpr = fp_rate[i]/fp_rate[-1].to_f + youden << tpr + (1 - fpr) + #puts youden[-1].to_s+" ("+tpr.to_s+" "+fpr.to_s+")" + end + max = youden.max + youden_hash = {} + (0..tp_rate.size-1).each do |i| + youden_hash[i] = c2[i] if youden[i]==max + end + #puts youden.inspect+"\n"+youden_hash.inspect+"\n\n" + (0..tp_rate.size-1).each do |i| tp_rate[i] = tp_rate[-1]>0 ? tp_rate[i]/tp_rate[-1].to_f*100 : 100 fp_rate[i] = fp_rate[-1]>0 ? fp_rate[i]/fp_rate[-1].to_f*100 : 100 end #puts tp_rate.inspect+"\n"+fp_rate.inspect+"\n\n" - return {:tp_rate => tp_rate,:fp_rate => fp_rate} + youden_coordinates_hash = {} + youden_hash.each do |i,c| + youden_coordinates_hash[[fp_rate[i],tp_rate[i]]] = c + end + #puts youden_coordinates_hash.inspect+"\n\n" + + return {:tp_rate => tp_rate,:fp_rate => fp_rate,:youden => youden_coordinates_hash} end end end diff --git a/report/report_content.rb b/report/report_content.rb index ac64bab..893ac34 100755 --- a/report/report_content.rb +++ b/report/report_content.rb @@ -124,6 +124,11 @@ class Reports::ReportContent Reports::XMLReportUtil::create_confusion_matrix( validation.confusion_matrix ), true, true) end + # bit of a hack to algin the last two plots in the report in to one row + def align_last_two_images( title ) + @xml_report.align_last_two_elements(@current_section, title ) + end + def add_regression_plot( validation_set, name_attribute, section_title="Regression Plot", @@ -155,13 +160,13 @@ class Reports::ReportContent @xml_report.add_paragraph(section_regr, "No prediction info for regression available.") end end - - def add_roc_plot( validation_set, - split_set_attribute = nil, - section_title="ROC Plots", - section_text=nil, - image_titles=nil, - image_captions=nil) + + def add_roc_plot( validation_set, + accept_value, + split_set_attribute=nil, + image_title = "ROC Plot", + section_text=nil, + image_caption=nil) #section_roc = @xml_report.add_section(@current_section, section_title) section_roc = @current_section @@ -174,25 +179,18 @@ class Reports::ReportContent "validation set size: "+validation_set.size.to_s+", prediction set size: "+prediction_set.size.to_s end @xml_report.add_paragraph(section_roc, section_text) if section_text - - accept_values = validation_set.get_accept_values - accept_values.size.times do |i| - class_value = accept_values[i] - image_title = image_titles ? image_titles[i] : "ROC Plot for class-value '"+class_value.to_s+"'" - image_caption = image_captions ? image_captions[i] : nil - plot_file_name = "roc_plot"+@tmp_file_count.to_s+".png" - @tmp_file_count += 1 - begin - plot_file_path = add_tmp_file(plot_file_name) - Reports::PlotFactory.create_roc_plot( plot_file_path, prediction_set, class_value, split_set_attribute, false )#prediction_set.size>1 ) - @xml_report.add_imagefigure(section_roc, image_title, plot_file_name, "PNG", 100, image_caption) - rescue Exception => ex - msg = "WARNING could not create roc plot for class value '"+class_value.to_s+"': "+ex.message - LOGGER.error(msg) - rm_tmp_file(plot_file_name) - @xml_report.add_paragraph(section_roc, msg) - end - end + plot_file_name = "roc_plot"+@tmp_file_count.to_s+".png" + @tmp_file_count += 1 + begin + plot_file_path = add_tmp_file(plot_file_name) + Reports::PlotFactory.create_roc_plot( plot_file_path, prediction_set, accept_value, split_set_attribute )#prediction_set.size>1 ) + @xml_report.add_imagefigure(section_roc, image_title, plot_file_name, "PNG", 100, image_caption) + rescue Exception => ex + msg = "WARNING could not create roc plot for class value '"+accept_value.to_s+"': "+ex.message + LOGGER.error(msg) + rm_tmp_file(plot_file_name) + @xml_report.add_paragraph(section_roc, msg) + end else @xml_report.add_paragraph(section_roc, "No prediction-confidence info for roc plot available.") end @@ -200,11 +198,11 @@ class Reports::ReportContent end def add_confidence_plot( validation_set, + accept_value = nil, split_set_attribute = nil, - section_title="Confidence plots", + image_title = "Percent Correct vs Confidence Plot", section_text=nil, - image_titles=nil, - image_captions=nil) + image_caption=nil) #section_conf = @xml_report.add_section(@current_section, section_title) section_conf = @current_section @@ -217,30 +215,23 @@ class Reports::ReportContent "validation set size: "+validation_set.size.to_s+", prediction set size: "+prediction_set.size.to_s end @xml_report.add_paragraph(section_conf, section_text) if section_text - - image_title = image_titles ? image_titles[i] : "Percent Correct vs Confidence Plot" - image_caption = image_captions ? image_captions[i] : nil + plot_file_name = "conf_plot"+@tmp_file_count.to_s+".png" @tmp_file_count += 1 - begin - plot_file_path = add_tmp_file(plot_file_name) - Reports::PlotFactory.create_confidence_plot( plot_file_path, prediction_set, nil, split_set_attribute, false ) + Reports::PlotFactory.create_confidence_plot( plot_file_path, prediction_set, accept_value, split_set_attribute, false ) @xml_report.add_imagefigure(section_conf, image_title, plot_file_name, "PNG", 100, image_caption) - rescue Exception => ex msg = "WARNING could not create confidence plot: "+ex.message LOGGER.error(msg) rm_tmp_file(plot_file_name) @xml_report.add_paragraph(section_conf, msg) - end - + end else @xml_report.add_paragraph(section_conf, "No prediction-confidence info for confidence plot available.") end - - end + end def add_ranking_plots( validation_set, compare_attribute, diff --git a/report/report_factory.rb b/report/report_factory.rb index 7e74cb4..d16066e 100755 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -70,8 +70,12 @@ module Reports::ReportFactory report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_CLASS, "Results", "Results") report.add_confusion_matrix(val) report.add_section("Plots") - report.add_roc_plot(validation_set) - report.add_confidence_plot(validation_set) + ([nil] + validation_set.get_accept_values).each do |accept_value| + report.add_roc_plot(validation_set, accept_value) + report.add_confidence_plot(validation_set, accept_value) + title = accept_value ? "Plots for predicted class-value '"+accept_value.to_s+"'" : "Plots for all predictions" + report.align_last_two_images title + end report.end_section when "regression" report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_REGR, "Results", "Results") @@ -116,10 +120,15 @@ module Reports::ReportFactory report.add_result(cv_set, [:crossvalidation_uri]+VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold], res_titel, res_titel, res_text) report.add_confusion_matrix(cv_set.validations[0]) report.add_section("Plots") - report.add_roc_plot(validation_set) - report.add_roc_plot(validation_set, :crossvalidation_fold) - report.add_confidence_plot(validation_set) - report.add_confidence_plot(validation_set, :crossvalidation_fold) + [nil, :crossvalidation_fold].each do |split_attribute| + ([nil] + validation_set.get_accept_values).each do |accept_value| + report.add_roc_plot(validation_set, accept_value, split_attribute) + report.add_confidence_plot(validation_set, accept_value, split_attribute) + title = accept_value ? "Plots for predicted class-value '"+accept_value.to_s+"'" : "Plots for all predictions" + title += split_attribute ? ", separated by crossvalidation fold" : " (accumulated over all folds)" + report.align_last_two_images title + end + end report.end_section report.add_result(validation_set, [:validation_uri, :validation_report_uri]+VAL_ATTR_CV+VAL_ATTR_CLASS-[:num_folds, :dataset_uri, :algorithm_uri], "Results","Results") @@ -128,7 +137,7 @@ module Reports::ReportFactory report.add_section("Plots") report.add_regression_plot(validation_set, :crossvalidation_fold) report.add_confidence_plot(validation_set) - report.add_confidence_plot(validation_set, :crossvalidation_fold) + report.add_confidence_plot(validation_set, nil, :crossvalidation_fold) report.end_section report.add_result(validation_set, [:validation_uri, :validation_report_uri]+VAL_ATTR_CV+VAL_ATTR_REGR-[:num_folds, :dataset_uri, :algorithm_uri], "Results","Results") end diff --git a/report/xml_report.rb b/report/xml_report.rb index 7467c47..b308c01 100755 --- a/report/xml_report.rb +++ b/report/xml_report.rb @@ -93,38 +93,70 @@ module Reports end end - # adds a new image to a REXML:Element, returns the figure as element - # - # example: <tt>add_imagefigure( section2, "Nice graph", "/images/graph1.svg", "SVG", "This graph shows..." )</tt> - # - # call-seq: - # add_imagefigure( element, title, path, filetype, caption = nil ) => REXML::Element - # - def add_imagefigure( element, title, path, filetype, size_pct=100, caption = nil ) - + def imagefigure( title, path, filetype, size_pct=100, caption = nil ) figure = Reports::XMLReportUtil.attribute_element("figure", {"float" => 0}) figure << Reports::XMLReportUtil.text_element("title", title) media = Element.new("mediaobject") image = Element.new("imageobject") imagedata = Reports::XMLReportUtil.attribute_element("imagedata", - {"fileref" => path, "format"=>filetype, "contentwidth" => size_pct.to_s+"%", - #"contentdepth"=> "4in" - })#"width" => "6in", "height" => "5in"}) #"contentwidth" => "100%"}) + {"fileref" => path, "format"=>filetype, "contentwidth" => size_pct.to_s+"%", + #"contentdepth"=> "4in" + })#"width" => "6in", "height" => "5in"}) #"contentwidth" => "100%"}) #imagedata = Reports::XMLReportUtil.attribute_element("imagedata",{"width" => "6in", "fileref" => path, "format"=>filetype}) @resource_path_elements[imagedata] = "fileref" image << imagedata - media << image - # ulink = Element.new("ulink") # ulink.add_attributes({"url" => "http://google.de"}) # ulink << image # media << ulink - media << Reports::XMLReportUtil.text_element("caption", caption) if caption - figure << media + figure << media + figure + end + + # adds a new image to a REXML:Element, returns the figure as element + # + # example: <tt>add_imagefigure( section2, "Nice graph", "/images/graph1.svg", "SVG", "This graph shows..." )</tt> + # + # call-seq: + # add_imagefigure( element, title, path, filetype, caption = nil ) => REXML::Element + # + def add_imagefigure( element, title, path, filetype, size_pct=100, caption = nil ) + figure = imagefigure( title, path, filetype, size_pct, caption) element << figure - return figure + return figure + end + + # bit of a hack to algin the last two elements that have been added to element into one row + def align_last_two_elements( element, title ) + imgs = [] + element.elements.each do |e| + imgs[0] = imgs[1] + imgs[1] = e + end + element.delete_element imgs[0] + element.delete_element imgs[1] + add_imagefigures_in_row( element, imgs, title ) + end + + def add_imagefigures_in_row( element, imagefigures, title ) + params = {"frame" => "none", "colsep" => 0, "rowsep" => 0 } + table = Reports::XMLReportUtil.attribute_element("table",params) + table << Reports::XMLReportUtil.text_element("title", title) + tgroup = Reports::XMLReportUtil.attribute_element("tgroup",{"cols" => 2}) + tbody = Element.new("tbody") + row = Element.new("row") + imagefigures.each do |f| + entry = Element.new("entry") + entry << f + row << entry + end + tbody << row + tgroup << tbody + table << tgroup + element << table + table end def add_image( element, url ) |