summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2011-05-19 17:24:55 +0200
committermguetlein <martin.guetlein@gmail.com>2011-05-19 17:24:55 +0200
commitb0ca028f1d9eaa30774f8c843c8b2ec8943247f6 (patch)
treea4d9d0dc24b78dcea39b04c9976deabbaa8367c0
parentac6a536d12697a98db6847c5229c67372cbbd1e7 (diff)
extended validation report plotting:
* roc plot for predictions * confidence plots for class-values * aligned roc plots and confidence plots * added confidence label into roc plots
-rw-r--r--report/plot_factory.rb42
-rwxr-xr-xreport/report_content.rb71
-rwxr-xr-xreport/report_factory.rb23
-rwxr-xr-xreport/xml_report.rb66
4 files changed, 131 insertions, 71 deletions
diff --git a/report/plot_factory.rb b/report/plot_factory.rb
index 74c89f5..7535eb4 100644
--- a/report/plot_factory.rb
+++ b/report/plot_factory.rb
@@ -91,7 +91,8 @@ module Reports
# * the validation set is splitted into sets of validation_sets with equal attribute values
# * each of theses validation sets is plotted as a roc-curve
#
- def self.create_roc_plot( out_file, validation_set, class_value, split_set_attribute=nil, show_single_curves=false )
+ def self.create_roc_plot( out_file, validation_set, class_value, split_set_attribute=nil,
+ x_label="False positive rate", y_label="True Positive Rate", show_single_curves=false )
LOGGER.debug "creating roc plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_file.to_s
@@ -110,10 +111,16 @@ module Reports
LOGGER.warn "could not create ROC plot for "+value.to_s
end
end
- RubyPlot::plot_lines(out_file, "ROC-Plot", "False positive rate", "True Positive Rate", names, fp_rates, tp_rates )
+ RubyPlot::plot_lines(out_file, "ROC-Plot", x_label, y_label, names, fp_rates, tp_rates )
else
data = transform_roc_predictions(validation_set, class_value, show_single_curves)
- RubyPlot::plot_lines(out_file, "ROC-Plot", "False positive rate", "True Positive Rate", data[:names], data[:fp_rate], data[:tp_rate], data[:faint] )
+ labels = []
+ data[:youden].each do |points|
+ points.each do |point,confidence|
+ labels << ["confidence: "+confidence.to_nice_s, point[0], point[1]]
+ end
+ end
+ RubyPlot::plot_lines(out_file, "ROC-Plot", x_label, y_label, data[:names], data[:fp_rate], data[:tp_rate], data[:faint], labels )
end
end
@@ -275,7 +282,7 @@ module Reports
if (validation_set.size > 1)
- names = []; fp_rate = []; tp_rate = []; faint = []
+ names = []; fp_rate = []; tp_rate = []; faint = []; youden = []
sum_roc_values = { :predicted_values => [], :actual_values => [], :confidence_values => []}
(0..validation_set.size-1).each do |i|
@@ -299,12 +306,13 @@ module Reports
names << nil # "all"
fp_rate << tp_fp_rates[:fp_rate]
tp_rate << tp_fp_rates[:tp_rate]
+ youden << tp_fp_rates[:youden]
faint << false
- return { :names => names, :fp_rate => fp_rate, :tp_rate => tp_rate, :faint => faint }
+ return { :names => names, :fp_rate => fp_rate, :tp_rate => tp_rate, :faint => faint, :youden => youden }
else
roc_values = validation_set.validations[0].get_predictions.get_prediction_values(class_value)
tp_fp_rates = get_tp_fp_rates(roc_values)
- return { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]] }
+ return { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]], :youden => [tp_fp_rates[:youden]] }
end
end
@@ -472,13 +480,33 @@ module Reports
w = w.compress_sum(c2)
#puts tp_rate.inspect+"\n"+fp_rate.inspect+"\n"+w.inspect+"\n\n"
+ youden = []
+ (0..tp_rate.size-1).each do |i|
+ tpr = tp_rate[i]/tp_rate[-1].to_f
+ fpr = fp_rate[i]/fp_rate[-1].to_f
+ youden << tpr + (1 - fpr)
+ #puts youden[-1].to_s+" ("+tpr.to_s+" "+fpr.to_s+")"
+ end
+ max = youden.max
+ youden_hash = {}
+ (0..tp_rate.size-1).each do |i|
+ youden_hash[i] = c2[i] if youden[i]==max
+ end
+ #puts youden.inspect+"\n"+youden_hash.inspect+"\n\n"
+
(0..tp_rate.size-1).each do |i|
tp_rate[i] = tp_rate[-1]>0 ? tp_rate[i]/tp_rate[-1].to_f*100 : 100
fp_rate[i] = fp_rate[-1]>0 ? fp_rate[i]/fp_rate[-1].to_f*100 : 100
end
#puts tp_rate.inspect+"\n"+fp_rate.inspect+"\n\n"
- return {:tp_rate => tp_rate,:fp_rate => fp_rate}
+ youden_coordinates_hash = {}
+ youden_hash.each do |i,c|
+ youden_coordinates_hash[[fp_rate[i],tp_rate[i]]] = c
+ end
+ #puts youden_coordinates_hash.inspect+"\n\n"
+
+ return {:tp_rate => tp_rate,:fp_rate => fp_rate,:youden => youden_coordinates_hash}
end
end
end
diff --git a/report/report_content.rb b/report/report_content.rb
index ac64bab..893ac34 100755
--- a/report/report_content.rb
+++ b/report/report_content.rb
@@ -124,6 +124,11 @@ class Reports::ReportContent
Reports::XMLReportUtil::create_confusion_matrix( validation.confusion_matrix ), true, true)
end
+ # bit of a hack to algin the last two plots in the report in to one row
+ def align_last_two_images( title )
+ @xml_report.align_last_two_elements(@current_section, title )
+ end
+
def add_regression_plot( validation_set,
name_attribute,
section_title="Regression Plot",
@@ -155,13 +160,13 @@ class Reports::ReportContent
@xml_report.add_paragraph(section_regr, "No prediction info for regression available.")
end
end
-
- def add_roc_plot( validation_set,
- split_set_attribute = nil,
- section_title="ROC Plots",
- section_text=nil,
- image_titles=nil,
- image_captions=nil)
+
+ def add_roc_plot( validation_set,
+ accept_value,
+ split_set_attribute=nil,
+ image_title = "ROC Plot",
+ section_text=nil,
+ image_caption=nil)
#section_roc = @xml_report.add_section(@current_section, section_title)
section_roc = @current_section
@@ -174,25 +179,18 @@ class Reports::ReportContent
"validation set size: "+validation_set.size.to_s+", prediction set size: "+prediction_set.size.to_s
end
@xml_report.add_paragraph(section_roc, section_text) if section_text
-
- accept_values = validation_set.get_accept_values
- accept_values.size.times do |i|
- class_value = accept_values[i]
- image_title = image_titles ? image_titles[i] : "ROC Plot for class-value '"+class_value.to_s+"'"
- image_caption = image_captions ? image_captions[i] : nil
- plot_file_name = "roc_plot"+@tmp_file_count.to_s+".png"
- @tmp_file_count += 1
- begin
- plot_file_path = add_tmp_file(plot_file_name)
- Reports::PlotFactory.create_roc_plot( plot_file_path, prediction_set, class_value, split_set_attribute, false )#prediction_set.size>1 )
- @xml_report.add_imagefigure(section_roc, image_title, plot_file_name, "PNG", 100, image_caption)
- rescue Exception => ex
- msg = "WARNING could not create roc plot for class value '"+class_value.to_s+"': "+ex.message
- LOGGER.error(msg)
- rm_tmp_file(plot_file_name)
- @xml_report.add_paragraph(section_roc, msg)
- end
- end
+ plot_file_name = "roc_plot"+@tmp_file_count.to_s+".png"
+ @tmp_file_count += 1
+ begin
+ plot_file_path = add_tmp_file(plot_file_name)
+ Reports::PlotFactory.create_roc_plot( plot_file_path, prediction_set, accept_value, split_set_attribute )#prediction_set.size>1 )
+ @xml_report.add_imagefigure(section_roc, image_title, plot_file_name, "PNG", 100, image_caption)
+ rescue Exception => ex
+ msg = "WARNING could not create roc plot for class value '"+accept_value.to_s+"': "+ex.message
+ LOGGER.error(msg)
+ rm_tmp_file(plot_file_name)
+ @xml_report.add_paragraph(section_roc, msg)
+ end
else
@xml_report.add_paragraph(section_roc, "No prediction-confidence info for roc plot available.")
end
@@ -200,11 +198,11 @@ class Reports::ReportContent
end
def add_confidence_plot( validation_set,
+ accept_value = nil,
split_set_attribute = nil,
- section_title="Confidence plots",
+ image_title = "Percent Correct vs Confidence Plot",
section_text=nil,
- image_titles=nil,
- image_captions=nil)
+ image_caption=nil)
#section_conf = @xml_report.add_section(@current_section, section_title)
section_conf = @current_section
@@ -217,30 +215,23 @@ class Reports::ReportContent
"validation set size: "+validation_set.size.to_s+", prediction set size: "+prediction_set.size.to_s
end
@xml_report.add_paragraph(section_conf, section_text) if section_text
-
- image_title = image_titles ? image_titles[i] : "Percent Correct vs Confidence Plot"
- image_caption = image_captions ? image_captions[i] : nil
+
plot_file_name = "conf_plot"+@tmp_file_count.to_s+".png"
@tmp_file_count += 1
-
begin
-
plot_file_path = add_tmp_file(plot_file_name)
- Reports::PlotFactory.create_confidence_plot( plot_file_path, prediction_set, nil, split_set_attribute, false )
+ Reports::PlotFactory.create_confidence_plot( plot_file_path, prediction_set, accept_value, split_set_attribute, false )
@xml_report.add_imagefigure(section_conf, image_title, plot_file_name, "PNG", 100, image_caption)
-
rescue Exception => ex
msg = "WARNING could not create confidence plot: "+ex.message
LOGGER.error(msg)
rm_tmp_file(plot_file_name)
@xml_report.add_paragraph(section_conf, msg)
- end
-
+ end
else
@xml_report.add_paragraph(section_conf, "No prediction-confidence info for confidence plot available.")
end
-
- end
+ end
def add_ranking_plots( validation_set,
compare_attribute,
diff --git a/report/report_factory.rb b/report/report_factory.rb
index 7e74cb4..d16066e 100755
--- a/report/report_factory.rb
+++ b/report/report_factory.rb
@@ -70,8 +70,12 @@ module Reports::ReportFactory
report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_CLASS, "Results", "Results")
report.add_confusion_matrix(val)
report.add_section("Plots")
- report.add_roc_plot(validation_set)
- report.add_confidence_plot(validation_set)
+ ([nil] + validation_set.get_accept_values).each do |accept_value|
+ report.add_roc_plot(validation_set, accept_value)
+ report.add_confidence_plot(validation_set, accept_value)
+ title = accept_value ? "Plots for predicted class-value '"+accept_value.to_s+"'" : "Plots for all predictions"
+ report.align_last_two_images title
+ end
report.end_section
when "regression"
report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_REGR, "Results", "Results")
@@ -116,10 +120,15 @@ module Reports::ReportFactory
report.add_result(cv_set, [:crossvalidation_uri]+VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold], res_titel, res_titel, res_text)
report.add_confusion_matrix(cv_set.validations[0])
report.add_section("Plots")
- report.add_roc_plot(validation_set)
- report.add_roc_plot(validation_set, :crossvalidation_fold)
- report.add_confidence_plot(validation_set)
- report.add_confidence_plot(validation_set, :crossvalidation_fold)
+ [nil, :crossvalidation_fold].each do |split_attribute|
+ ([nil] + validation_set.get_accept_values).each do |accept_value|
+ report.add_roc_plot(validation_set, accept_value, split_attribute)
+ report.add_confidence_plot(validation_set, accept_value, split_attribute)
+ title = accept_value ? "Plots for predicted class-value '"+accept_value.to_s+"'" : "Plots for all predictions"
+ title += split_attribute ? ", separated by crossvalidation fold" : " (accumulated over all folds)"
+ report.align_last_two_images title
+ end
+ end
report.end_section
report.add_result(validation_set, [:validation_uri, :validation_report_uri]+VAL_ATTR_CV+VAL_ATTR_CLASS-[:num_folds, :dataset_uri, :algorithm_uri],
"Results","Results")
@@ -128,7 +137,7 @@ module Reports::ReportFactory
report.add_section("Plots")
report.add_regression_plot(validation_set, :crossvalidation_fold)
report.add_confidence_plot(validation_set)
- report.add_confidence_plot(validation_set, :crossvalidation_fold)
+ report.add_confidence_plot(validation_set, nil, :crossvalidation_fold)
report.end_section
report.add_result(validation_set, [:validation_uri, :validation_report_uri]+VAL_ATTR_CV+VAL_ATTR_REGR-[:num_folds, :dataset_uri, :algorithm_uri], "Results","Results")
end
diff --git a/report/xml_report.rb b/report/xml_report.rb
index 7467c47..b308c01 100755
--- a/report/xml_report.rb
+++ b/report/xml_report.rb
@@ -93,38 +93,70 @@ module Reports
end
end
- # adds a new image to a REXML:Element, returns the figure as element
- #
- # example: <tt>add_imagefigure( section2, "Nice graph", "/images/graph1.svg", "SVG", "This graph shows..." )</tt>
- #
- # call-seq:
- # add_imagefigure( element, title, path, filetype, caption = nil ) => REXML::Element
- #
- def add_imagefigure( element, title, path, filetype, size_pct=100, caption = nil )
-
+ def imagefigure( title, path, filetype, size_pct=100, caption = nil )
figure = Reports::XMLReportUtil.attribute_element("figure", {"float" => 0})
figure << Reports::XMLReportUtil.text_element("title", title)
media = Element.new("mediaobject")
image = Element.new("imageobject")
imagedata = Reports::XMLReportUtil.attribute_element("imagedata",
- {"fileref" => path, "format"=>filetype, "contentwidth" => size_pct.to_s+"%",
- #"contentdepth"=> "4in"
- })#"width" => "6in", "height" => "5in"}) #"contentwidth" => "100%"})
+ {"fileref" => path, "format"=>filetype, "contentwidth" => size_pct.to_s+"%",
+ #"contentdepth"=> "4in"
+ })#"width" => "6in", "height" => "5in"}) #"contentwidth" => "100%"})
#imagedata = Reports::XMLReportUtil.attribute_element("imagedata",{"width" => "6in", "fileref" => path, "format"=>filetype})
@resource_path_elements[imagedata] = "fileref"
image << imagedata
-
media << image
-
# ulink = Element.new("ulink")
# ulink.add_attributes({"url" => "http://google.de"})
# ulink << image
# media << ulink
-
media << Reports::XMLReportUtil.text_element("caption", caption) if caption
- figure << media
+ figure << media
+ figure
+ end
+
+ # adds a new image to a REXML:Element, returns the figure as element
+ #
+ # example: <tt>add_imagefigure( section2, "Nice graph", "/images/graph1.svg", "SVG", "This graph shows..." )</tt>
+ #
+ # call-seq:
+ # add_imagefigure( element, title, path, filetype, caption = nil ) => REXML::Element
+ #
+ def add_imagefigure( element, title, path, filetype, size_pct=100, caption = nil )
+ figure = imagefigure( title, path, filetype, size_pct, caption)
element << figure
- return figure
+ return figure
+ end
+
+ # bit of a hack to algin the last two elements that have been added to element into one row
+ def align_last_two_elements( element, title )
+ imgs = []
+ element.elements.each do |e|
+ imgs[0] = imgs[1]
+ imgs[1] = e
+ end
+ element.delete_element imgs[0]
+ element.delete_element imgs[1]
+ add_imagefigures_in_row( element, imgs, title )
+ end
+
+ def add_imagefigures_in_row( element, imagefigures, title )
+ params = {"frame" => "none", "colsep" => 0, "rowsep" => 0 }
+ table = Reports::XMLReportUtil.attribute_element("table",params)
+ table << Reports::XMLReportUtil.text_element("title", title)
+ tgroup = Reports::XMLReportUtil.attribute_element("tgroup",{"cols" => 2})
+ tbody = Element.new("tbody")
+ row = Element.new("row")
+ imagefigures.each do |f|
+ entry = Element.new("entry")
+ entry << f
+ row << entry
+ end
+ tbody << row
+ tgroup << tbody
+ table << tgroup
+ element << table
+ table
end
def add_image( element, url )