diff options
Diffstat (limited to 'report/report_factory.rb')
-rwxr-xr-x | report/report_factory.rb | 132 |
1 files changed, 98 insertions, 34 deletions
diff --git a/report/report_factory.rb b/report/report_factory.rb index 9995b42..2bb74ee 100755 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -11,10 +11,10 @@ VAL_ATTR_REGR = [ :num_instances, :num_unpredicted, :root_mean_squared_error, :weighted_root_mean_squared_error, :mean_absolute_error, :weighted_mean_absolute_error, :r_square, :weighted_r_square, :sample_correlation_coefficient ] -#VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :average_area_under_roc, +#VAL_ATTR_BOX_PLOT_CLASS = [ :accuracy, :average_area_under_roc, # :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ] -VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :f_measure, :true_positive_rate, :true_negative_rate, :positive_predictive_value, :negative_predictive_value ] -VAL_ATTR_BAR_PLOT_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square ] +VAL_ATTR_BOX_PLOT_CLASS = [ :accuracy, :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate, :positive_predictive_value, :negative_predictive_value ] +VAL_ATTR_BOX_PLOT_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square ] VAL_ATTR_TTEST_REGR = [ :r_square, :root_mean_squared_error ] VAL_ATTR_TTEST_CLASS = [ :accuracy, :average_area_under_roc ] @@ -29,8 +29,9 @@ module Reports::ReportFactory RT_VALIDATION = "validation" RT_CV = "crossvalidation" RT_ALG_COMP = "algorithm_comparison" + RT_METHOD_COMP = "method_comparison" - REPORT_TYPES = [RT_VALIDATION, RT_CV, RT_ALG_COMP ] + REPORT_TYPES = [RT_VALIDATION, RT_CV, RT_ALG_COMP, RT_METHOD_COMP ] # creates a report of a certain type according to the validation data in validation_set # @@ -40,11 +41,13 @@ module Reports::ReportFactory def self.create_report(type, validation_set, params={}, task=nil) case type when RT_VALIDATION - create_report_validation(validation_set, task) + create_report_validation(validation_set, {}, task) when RT_CV - create_report_crossvalidation(validation_set, task) + create_report_crossvalidation(validation_set, {}, task) when RT_ALG_COMP create_report_compare_algorithms(validation_set, params, task) + when RT_METHOD_COMP + create_report_compare_methods(validation_set, params, task) else raise "unknown report type "+type.to_s end @@ -63,14 +66,26 @@ module Reports::ReportFactory end end - def self.create_report_validation(validation_set, task=nil) + def self.add_filter_warning(report, filter_params) + msg = "The validation results for this report have been filtered." + msg += " Minimum confidence: "+ filter_params[:min_confidence].to_s if + filter_params[:min_confidence]!=nil + msg += " Minimum number of predictions (sorted with confidence): "+ filter_params[:min_num_predictions].to_s if + filter_params[:min_num_predictions]!=nil + msg += " Maximum number of predictions: "+ filter_params[:max_num_predictions].to_s if + filter_params[:max_num_predictions]!=nil + report.add_warning(msg) + end + + def self.create_report_validation(validation_set, params, task=nil) raise OpenTox::BadRequestError.new("num validations is not equal to 1") unless validation_set.size==1 val = validation_set.validations[0] - pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) ) + pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,50) ) report = Reports::ReportContent.new("Validation report") - + add_filter_warning(report, validation_set.filter_params) if validation_set.filter_params!=nil + case val.feature_type when "classification" report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_CLASS, "Results", "Results") @@ -85,30 +100,35 @@ module Reports::ReportFactory report.align_last_two_images "ROC Plots" end end - report.add_confidence_plot(validation_set) + report.add_confidence_plot(validation_set, :accuracy, nil) validation_set.get_accept_values.each do |accept_value| - report.add_confidence_plot(validation_set, accept_value, nil) - report.add_confidence_plot(validation_set, nil, accept_value) + report.add_confidence_plot(validation_set, :true_positive_rate, accept_value) + report.add_confidence_plot(validation_set, :positive_predictive_value, accept_value) report.align_last_two_images "Confidence Plots" end - report.end_section when "regression" report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_REGR, "Results", "Results") report.add_section("Plots") report.add_regression_plot(validation_set, :model_uri) - report.add_confidence_plot(validation_set) - report.end_section + report.add_confidence_plot(validation_set, :root_mean_squared_error, nil) + report.add_confidence_plot(validation_set, :r_square, nil) + report.align_last_two_images "Confidence Plots" end - task.progress(90) if task - + task.progress(70) if task + report.add_train_test_plot( validation_set, false, OpenTox::SubTask.create(task,70,80) ) + report.add_train_test_plot( validation_set, true, OpenTox::SubTask.create(task,80,90) ) + report.align_last_two_images "Training Test Data Distribution Plots" + report.end_section + report.add_result(validation_set, Validation::ALL_PROPS, "All Results", "All Results") report.add_predictions( validation_set ) task.progress(100) if task report end - def self.create_report_crossvalidation(validation_set, task=nil) + def self.create_report_crossvalidation(validation_set, params, task=nil) + raise OpenTox::BadRequestError.new "cv report not implemented for filter params" if validation_set.filter_params!=nil raise OpenTox::BadRequestError.new("num validations is not >1") unless validation_set.size>1 raise OpenTox::BadRequestError.new("crossvalidation-id not unique and != nil: "+ validation_set.get_values(:crossvalidation_id,false).inspect) if validation_set.unique_value(:crossvalidation_id)==nil @@ -117,7 +137,7 @@ module Reports::ReportFactory validation_set.unique_value(:num_folds).to_s+")") unless validation_set.unique_value(:num_folds).to_i==validation_set.size raise OpenTox::BadRequestError.new("num different folds is not equal to num validations") unless validation_set.num_different_values(:crossvalidation_fold)==validation_set.size raise OpenTox::BadRequestError.new("validations must have unique feature type, i.e. must be either all regression, "+ - "or all classification validations") unless validation_set.unique_feature_type + "or all classification validations") unless validation_set.unique_feature_type pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) ) validation_set.validations.sort! do |x,y| x.crossvalidation_fold.to_f <=> y.crossvalidation_fold.to_f @@ -136,34 +156,40 @@ module Reports::ReportFactory report.add_confusion_matrix(cv_set.validations[0]) report.add_section("Plots") [nil, :crossvalidation_fold].each do |split_attribute| - if (validation_set.get_accept_values.size == 2) if validation_set.get_true_accept_value!=nil report.add_roc_plot(validation_set, validation_set.get_true_accept_value,split_attribute) else - report.add_roc_plot(validation_set, validation_set.get_accept_values[0],split_attribute) - report.add_roc_plot(validation_set, validation_set.get_accept_values[1],split_attribute) + report.add_roc_plot(validation_set, validation_set.get_accept_values[0], split_attribute) + report.add_roc_plot(validation_set, validation_set.get_accept_values[1], split_attribute) report.align_last_two_images "ROC Plots" end end - report.add_confidence_plot(validation_set,nil,nil,split_attribute) + report.add_confidence_plot(validation_set,:accuracy,nil,split_attribute) validation_set.get_accept_values.each do |accept_value| - report.add_confidence_plot(validation_set, accept_value, nil,split_attribute) - report.add_confidence_plot(validation_set, nil, accept_value,split_attribute) + report.add_confidence_plot(validation_set, :true_positive_rate, accept_value, split_attribute) + report.add_confidence_plot(validation_set, :positive_predictive_value, accept_value, split_attribute) report.align_last_two_images "Confidence Plots" end end report.end_section - report.add_result(validation_set, [:validation_uri, :validation_report_uri]+VAL_ATTR_CV+VAL_ATTR_CLASS-[:num_folds, :dataset_uri, :algorithm_uri], + report.add_result(validation_set, + [:validation_uri, :validation_report_uri]+VAL_ATTR_CV+VAL_ATTR_CLASS-[:num_folds, :dataset_uri, :algorithm_uri], "Results","Results") when "regression" report.add_result(cv_set, [:crossvalidation_uri]+VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],res_titel, res_titel, res_text) report.add_section("Plots") report.add_regression_plot(validation_set, :crossvalidation_fold) - report.add_confidence_plot(validation_set) - report.add_confidence_plot(validation_set, nil, :crossvalidation_fold) + report.add_confidence_plot(validation_set, :root_mean_squared_error, nil) + report.add_confidence_plot(validation_set, :r_square, nil) + report.align_last_two_images "Confidence Plots" + report.add_confidence_plot(validation_set, :root_mean_squared_error, nil, :crossvalidation_fold) + report.add_confidence_plot(validation_set, :r_square, nil, :crossvalidation_fold) + report.align_last_two_images "Confidence Plots Across Folds" report.end_section - report.add_result(validation_set, [:validation_uri, :validation_report_uri]+VAL_ATTR_CV+VAL_ATTR_REGR-[:num_folds, :dataset_uri, :algorithm_uri], "Results","Results") + report.add_result(validation_set, + [:validation_uri, :validation_report_uri]+VAL_ATTR_CV+VAL_ATTR_REGR-[:num_folds, :dataset_uri, :algorithm_uri], + "Results","Results") end task.progress(90) if task @@ -213,6 +239,7 @@ module Reports::ReportFactory pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) ) report = Reports::ReportContent.new("Algorithm comparison report") + add_filter_warning(report, validation_set.filter_params) if validation_set.filter_params!=nil if (validation_set.num_different_values(:dataset_uri)>1) all_merged = validation_set.merge([:algorithm_uri, :dataset_uri, :crossvalidation_id, :crossvalidation_uri]) @@ -226,11 +253,11 @@ module Reports::ReportFactory when "classification" result_attributes += VAL_ATTR_CLASS ttest_attributes = VAL_ATTR_TTEST_CLASS - bar_plot_attributes = VAL_ATTR_BAR_PLOT_CLASS + box_plot_attributes = VAL_ATTR_BOX_PLOT_CLASS else result_attributes += VAL_ATTR_REGR ttest_attributes = VAL_ATTR_TTEST_REGR - bar_plot_attributes = VAL_ATTR_BAR_PLOT_REGR + box_plot_attributes = VAL_ATTR_BOX_PLOT_REGR end if params[:ttest_attributes] and params[:ttest_attributes].chomp.size>0 @@ -241,8 +268,8 @@ module Reports::ReportFactory ttest_significance = params[:ttest_significance].to_f end - bar_plot_attributes += ttest_attributes - bar_plot_attributes.uniq! + box_plot_attributes += ttest_attributes + box_plot_attributes.uniq! result_attributes += ttest_attributes result_attributes.uniq! @@ -265,13 +292,50 @@ module Reports::ReportFactory res_text = "These performance statistics have been derieved by computing the mean of the statistics on each crossvalidation fold." report.add_result(merged,result_attributes,res_titel,res_titel,res_text) # pending: regression stats have different scales!!! - report.add_bar_plot(merged, :identifier, bar_plot_attributes) if validation_set.unique_feature_type=="classification" + report.add_box_plot(set, :identifier, box_plot_attributes) report.add_paired_ttest_tables(set, :identifier, ttest_attributes, ttest_significance) if ttest_significance>0 report.end_section end task.progress(100) if task report end + + def self.create_report_compare_methods(validation_set, params={}, task=nil) + raise OpenTox::BadRequestError.new("num validations is not >1") unless validation_set.size>1 + raise OpenTox::BadRequestError.new("validations must have unique feature type, i.e. must be either all regression, "+ + "or all classification validations") unless validation_set.unique_feature_type + raise OpenTox::BadRequestError.new("number of different identifiers <2: "+ + validation_set.get_values(:identifier).inspect) if validation_set.num_different_values(:identifier)<2 + #validation_set.load_cv_attributes + + pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) ) + report = Reports::ReportContent.new("Method comparison report") + add_filter_warning(report, validation_set.filter_params) if validation_set.filter_params!=nil + + result_attributes = [:identifier,:validation_uri,:validation_report_uri]+VAL_ATTR_CV-[:crossvalidation_fold,:num_folds,:dataset_uri] + case validation_set.unique_feature_type + when "classification" + result_attributes += VAL_ATTR_CLASS + box_plot_attributes = VAL_ATTR_BOX_PLOT_CLASS + else + result_attributes += VAL_ATTR_REGR + box_plot_attributes = VAL_ATTR_BOX_PLOT_REGR + end + + merged = validation_set.merge([:identifier]) + merged.sort(:identifier) + + merged.validations.each do |v| + v.validation_uri = v.validation_uri.split(";").uniq.join(" ") + v.validation_report_uri = v.validation_report_uri.split(";").uniq.join(" ") if v.validation_report_uri + end + + msg = merged.validations.collect{|v| v.identifier+" ("+Lib::MergeObjects.merge_count(v).to_s+"x)"}.join(", ") + report.add_result(merged,result_attributes,"Average Results","Results",msg) + + report.add_box_plot(validation_set, :identifier, box_plot_attributes) + report + end end |