diff options
author | mguetlein <martin.guetlein@gmail.com> | 2011-05-16 14:46:50 +0200 |
---|---|---|
committer | mguetlein <martin.guetlein@gmail.com> | 2011-05-16 14:46:50 +0200 |
commit | eb5f8b5da9b247d62abc8a7b9eb2e44fe46a1c79 (patch) | |
tree | 99bbf52ad3b7495114ffe50194b8f9c606f5f248 /report | |
parent | 8afc018a179b254905f93ef8607338a7826baf4e (diff) |
add confidence plots
Diffstat (limited to 'report')
-rwxr-xr-x | report/environment.rb | 2 | ||||
-rw-r--r-- | report/plot_factory.rb | 147 | ||||
-rwxr-xr-x | report/report_content.rb | 50 | ||||
-rwxr-xr-x | report/report_factory.rb | 40 |
4 files changed, 224 insertions, 15 deletions
diff --git a/report/environment.rb b/report/environment.rb index 19ea3a2..59465aa 100755 --- a/report/environment.rb +++ b/report/environment.rb @@ -4,7 +4,7 @@ 'rexml/document', 'ruby-plot', 'opentox-ruby' ].each do |g| require g end -gem 'ruby-plot', "~>0.3.0" +gem 'ruby-plot', "~>0.4.0" #R.quit diff --git a/report/plot_factory.rb b/report/plot_factory.rb index 5fd20bb..a4e415a 100644 --- a/report/plot_factory.rb +++ b/report/plot_factory.rb @@ -79,7 +79,7 @@ module Reports end raise "no predictions performed" if x.size==0 || x[0].size==0 - RubyPlot::plot_points(out_file, "Regression plot", "Predicted values", "Actual values", names, x, y ) + RubyPlot::regression_point_plot(out_file, "Regression plot", "Predicted values", "Actual values", names, x, y ) end @@ -102,7 +102,7 @@ module Reports tp_rates = [] attribute_values.each do |value| begin - data = transform_predictions(validation_set.filter({split_set_attribute => value}), class_value, false) + data = transform_roc_predictions(validation_set.filter({split_set_attribute => value}), class_value, false) names << value.to_s fp_rates << data[:fp_rate][0] tp_rates << data[:tp_rate][0] @@ -112,11 +112,50 @@ module Reports end RubyPlot::plot_lines(out_file, "ROC-Plot", "False positive rate", "True Positive Rate", names, fp_rates, tp_rates ) else - data = transform_predictions(validation_set, class_value, show_single_curves) + data = transform_roc_predictions(validation_set, class_value, show_single_curves) RubyPlot::plot_lines(out_file, "ROC-Plot", "False positive rate", "True Positive Rate", data[:names], data[:fp_rate], data[:tp_rate], data[:faint] ) end end + + def self.create_confidence_plot( out_file, validation_set, class_value, split_set_attribute=nil, show_single_curves=false ) + + LOGGER.debug "creating confidence plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_file.to_s + + if split_set_attribute + attribute_values = validation_set.get_values(split_set_attribute) + names = [] + confidence = [] + performance = [] + attribute_values.each do |value| + begin + data = transform_confidence_predictions(validation_set.filter({split_set_attribute => value}), class_value, false) + names << value.to_s + confidence << data[:confidence][0] + performance << data[:performance][0] + rescue + LOGGER.warn "could not create confidence plot for "+value.to_s + end + end + #RubyPlot::plot_lines(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", names, fp_rates, tp_rates ) + case validation_set.unique_feature_type + when "classification" + RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", names, confidence, performance) + when "regression" + RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", names, confidence, performance, true) + end + else + data = transform_confidence_predictions(validation_set, class_value, show_single_curves) + case validation_set.unique_feature_type + when "classification" + RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", data[:names], data[:confidence], data[:performance]) + when "regression" + RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", data[:names], data[:confidence], data[:performance], true) + end + end + end + + def self.create_bar_plot( out_file, validation_set, title_attribute, value_attributes ) LOGGER.debug "creating bar plot, out-file:"+out_file.to_s @@ -128,7 +167,7 @@ module Reports validation_set.validations.each do |v| values = [] value_attributes.each do |a| - validation_set.get_domain_for_attr(a).each do |class_value| + validation_set.get_accept_values_for_attr(a).each do |class_value| value = v.send(a) if value.is_a?(Hash) if class_value==nil @@ -222,7 +261,7 @@ module Reports end private - def self.transform_predictions(validation_set, class_value, add_single_folds=false) + def self.transform_roc_predictions(validation_set, class_value, add_single_folds=false) if (validation_set.size > 1) @@ -230,7 +269,7 @@ module Reports sum_roc_values = { :predicted_values => [], :actual_values => [], :confidence_values => []} (0..validation_set.size-1).each do |i| - roc_values = validation_set.get(i).get_predictions.get_roc_values(class_value) + roc_values = validation_set.get(i).get_predictions.get_prediction_values(class_value) sum_roc_values[:predicted_values] += roc_values[:predicted_values] sum_roc_values[:confidence_values] += roc_values[:confidence_values] sum_roc_values[:actual_values] += roc_values[:actual_values] @@ -253,12 +292,51 @@ module Reports faint << false return { :names => names, :fp_rate => fp_rate, :tp_rate => tp_rate, :faint => faint } else - roc_values = validation_set.validations[0].get_predictions.get_roc_values(class_value) + roc_values = validation_set.validations[0].get_predictions.get_prediction_values(class_value) tp_fp_rates = get_tp_fp_rates(roc_values) return { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]] } end end + def self.transform_confidence_predictions(validation_set, class_value, add_single_folds=false) + + if (validation_set.size > 1) + + names = []; performance = []; confidence = []; faint = [] + sum_confidence_values = { :predicted_values => [], :actual_values => [], :confidence_values => []} + + (0..validation_set.size-1).each do |i| + confidence_values = validation_set.get(i).get_predictions.get_prediction_values(class_value) + sum_confidence_values[:predicted_values] += confidence_values[:predicted_values] + sum_confidence_values[:confidence_values] += confidence_values[:confidence_values] + sum_confidence_values[:actual_values] += confidence_values[:actual_values] + + if add_single_folds + begin + pref_conf_rates = get_performance_confidence_rates(confidence_values) + names << "fold "+i.to_s + performance << pref_conf_rates[:performance] + confidence << pref_conf_rates[:confidence] + faint << true + rescue + LOGGER.warn "could not get confidence vals for fold "+i.to_s + end + end + end + pref_conf_rates = get_performance_confidence_rates(sum_confidence_values, validation_set.unique_feature_type) + names << nil # "all" + performance << pref_conf_rates[:performance] + confidence << pref_conf_rates[:confidence] + faint << false + return { :names => names, :performance => performance, :confidence => confidence, :faint => faint } + + else + confidence_values = validation_set.validations[0].get_predictions.get_prediction_values(class_value) + pref_conf_rates = get_performance_confidence_rates(confidence_values, validation_set.unique_feature_type) + return { :names => ["default"], :performance => [pref_conf_rates[:performance]], :confidence => [pref_conf_rates[:confidence]] } + end + end + def self.demo_rock_plot roc_values = {:confidence_values => [0.1, 0.9, 0.5, 0.6, 0.6, 0.6], :predicted_values => [1, 0, 0, 1, 0, 1], @@ -271,6 +349,61 @@ module Reports "True Positive Rate", data[:names], data[:fp_rate], data[:tp_rate], data[:faint] ) end + def self.get_performance_confidence_rates(roc_values, feature_type) + + c = roc_values[:confidence_values] + p = roc_values[:predicted_values] + a = roc_values[:actual_values] + raise "no prediction values for roc-plot" if p.size==0 + + (0..p.size-2).each do |i| + ((i+1)..p.size-1).each do |j| + if c[i]<c[j] + c.swap!(i,j) + a.swap!(i,j) + p.swap!(i,j) + end + end + end + #puts c.inspect+"\n"+a.inspect+"\n"+p.inspect+"\n\n" + + perf = [] + conf = [] + + case feature_type + when "classification" + count = 0 + correct = 0 + (0..p.size-1).each do |i| + count += 1 + correct += 1 if p[i]==a[i] + if i>0 && (c[i]>=conf[-1]-0.00001) + perf.pop + conf.pop + end + perf << correct/count.to_f * 100 + conf << c[i] + end + when "regression" + count = 0 + sum_squared_error = 0 + (0..p.size-1).each do |i| + count += 1 + sum_squared_error += (p[i]-a[i])**2 + if i>0 && (c[i]>=conf[-1]-0.00001) + perf.pop + conf.pop + end + perf << Math.sqrt(sum_squared_error/count.to_f) + conf << c[i] + end + end + #puts perf.inspect + + return {:performance => perf,:confidence => conf} + end + + def self.get_tp_fp_rates(roc_values) c = roc_values[:confidence_values] diff --git a/report/report_content.rb b/report/report_content.rb index 6c8148e..ca04f25 100755 --- a/report/report_content.rb +++ b/report/report_content.rb @@ -147,8 +147,8 @@ class Reports::ReportContent image_caption=nil) image_title = "Regression plot" unless image_title - - section_regr = @xml_report.add_section(@current_section, section_title) + #section_regr = @xml_report.add_section(@current_section, section_title) + section_regr = @current_section prediction_set = validation_set.collect{ |v| v.get_predictions } if prediction_set.size>0 @@ -178,7 +178,8 @@ class Reports::ReportContent image_titles=nil, image_captions=nil) - section_roc = @xml_report.add_section(@current_section, section_title) + #section_roc = @xml_report.add_section(@current_section, section_title) + section_roc = @current_section prediction_set = validation_set.collect{ |v| v.get_predictions && v.get_predictions.confidence_values_available? } if prediction_set.size>0 @@ -213,6 +214,49 @@ class Reports::ReportContent end + def add_confidence_plot( validation_set, + split_set_attribute = nil, + section_title="Confidence plots", + section_text=nil, + image_titles=nil, + image_captions=nil) + + #section_conf = @xml_report.add_section(@current_section, section_title) + section_conf = @current_section + prediction_set = validation_set.collect{ |v| v.get_predictions && v.get_predictions.confidence_values_available? } + + if prediction_set.size>0 + if prediction_set.size!=validation_set.size + section_text += "\nWARNING: plot information not available for all validation results" + LOGGER.error "WARNING: plot information not available for all validation results:\n"+ + "validation set size: "+validation_set.size.to_s+", prediction set size: "+prediction_set.size.to_s + end + @xml_report.add_paragraph(section_conf, section_text) if section_text + + image_title = image_titles ? image_titles[i] : "Percent Correct vs Confidence Plot" + image_caption = image_captions ? image_captions[i] : nil + plot_file_name = "conf_plot"+@tmp_file_count.to_s+".png" + @tmp_file_count += 1 + + begin + + plot_file_path = add_tmp_file(plot_file_name) + Reports::PlotFactory.create_confidence_plot( plot_file_path, prediction_set, nil, split_set_attribute, false ) + @xml_report.add_imagefigure(section_conf, image_title, plot_file_name, "PNG", 100, image_caption) + + rescue Exception => ex + msg = "WARNING could not create confidence plot: "+ex.message + LOGGER.error(msg) + rm_tmp_file(plot_file_name) + @xml_report.add_paragraph(section_conf, msg) + end + + else + @xml_report.add_paragraph(section_conf, "No prediction-confidence info for confidence plot available.") + end + + end + def add_ranking_plots( validation_set, compare_attribute, equal_attribute, diff --git a/report/report_factory.rb b/report/report_factory.rb index f48d11a..08d9418 100755 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -68,11 +68,17 @@ module Reports::ReportFactory case val.feature_type when "classification" report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_CLASS, "Results", "Results") - report.add_roc_plot(validation_set) report.add_confusion_matrix(val) + report.add_section("Plots") + report.add_roc_plot(validation_set) + report.add_confidence_plot(validation_set) + report.end_section when "regression" report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_REGR, "Results", "Results") + report.add_section("Plots") report.add_regression_plot(validation_set, :model_uri) + report.add_confidence_plot(validation_set) + report.end_section end task.progress(90) if task @@ -104,14 +110,22 @@ module Reports::ReportFactory case validation_set.unique_feature_type when "classification" report.add_result(merged, [:crossvalidation_uri]+VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold],"Mean Results","Mean Results") - report.add_roc_plot(validation_set, nil, "ROC Plots over all folds") - report.add_roc_plot(validation_set, :crossvalidation_fold) report.add_confusion_matrix(merged.validations[0]) + report.add_section("Plots") + report.add_roc_plot(validation_set) + report.add_roc_plot(validation_set, :crossvalidation_fold) + report.add_confidence_plot(validation_set) + report.add_confidence_plot(validation_set, :crossvalidation_fold) + report.end_section report.add_result(validation_set, VAL_ATTR_CV+VAL_ATTR_CLASS-[:num_folds], "Results","Results",nil,"validation") when "regression" report.add_result(merged, [:crossvalidation_uri]+VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],"Mean Results","Mean Results") + report.add_section("Plots") report.add_regression_plot(validation_set, :crossvalidation_fold) + report.add_confidence_plot(validation_set) + report.add_confidence_plot(validation_set, :crossvalidation_fold) + report.end_section report.add_result(validation_set, VAL_ATTR_CV+VAL_ATTR_REGR-[:num_folds], "Results","Results") end task.progress(90) if task @@ -194,7 +208,25 @@ module Reports::ReportFactory end when "regression" - raise OpenTox::BadRequestError.new("algorithm comparison for regression not yet implemented") + + attributes = VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold] + attributes = ([ :dataset_uri ] + attributes).uniq + + dataset_grouping.each do |validations| + + set = Reports::ValidationSet.create(validations) + + dataset = validations[0].dataset_uri + merged = set.merge([:algorithm_uri, :dataset_uri, :crossvalidation_id, :crossvalidation_uri]) + merged.sort(:dataset_uri) + + report.add_section("Dataset: "+dataset) + report.add_result(merged,attributes, + "Mean Results","Mean Results",nil,"crossvalidation") + report.add_paired_ttest_table(set, :algorithm_uri, :r_square) + report.end_section + end + end task.progress(100) if task report |