diff options
Diffstat (limited to 'report')
-rwxr-xr-x | report/environment.rb | 8 | ||||
-rw-r--r-- | report/plot_factory.rb | 197 | ||||
-rwxr-xr-x | report/report_application.rb | 9 | ||||
-rwxr-xr-x | report/report_content.rb | 185 | ||||
-rwxr-xr-x | report/report_factory.rb | 125 | ||||
-rwxr-xr-x | report/report_persistance.rb | 3 | ||||
-rw-r--r-- | report/report_service.rb | 7 | ||||
-rw-r--r-- | report/statistical_test.rb | 15 | ||||
-rwxr-xr-x | report/validation_access.rb | 60 | ||||
-rwxr-xr-x | report/validation_data.rb | 116 | ||||
-rwxr-xr-x | report/xml_report.rb | 115 |
11 files changed, 489 insertions, 351 deletions
diff --git a/report/environment.rb b/report/environment.rb index 59465aa..72320a0 100755 --- a/report/environment.rb +++ b/report/environment.rb @@ -1,12 +1,10 @@ ['rubygems', 'logger', 'fileutils', 'sinatra', 'sinatra/url_for', 'rest_client', - 'yaml', 'fileutils', 'mime/types', 'abbrev', 'rinruby', + 'yaml', 'fileutils', 'mime/types', 'abbrev', 'rexml/document', 'ruby-plot', 'opentox-ruby' ].each do |g| require g end -gem 'ruby-plot', "~>0.4.0" - -#R.quit +gem 'ruby-plot', "~>0.5.0" module Reports end @@ -27,6 +25,8 @@ require "report/validation_data.rb" require "report/util.rb" require "report/statistical_test.rb" +ICON_ERROR = File.join(CONFIG[:services]["opentox-validation"],"resources/error.png") +ICON_OK = File.join(CONFIG[:services]["opentox-validation"],"resources/ok.png") diff --git a/report/plot_factory.rb b/report/plot_factory.rb index a4e415a..78d2e05 100644 --- a/report/plot_factory.rb +++ b/report/plot_factory.rb @@ -52,9 +52,10 @@ module Reports module PlotFactory - def self.create_regression_plot( out_file, validation_set, name_attribute ) + def self.create_regression_plot( out_files, validation_set, name_attribute ) - LOGGER.debug "Creating regression plot, out-file:"+out_file.to_s + out_files = [out_files] unless out_files.is_a?(Array) + LOGGER.debug "Creating regression plot, out-file:"+out_files.to_s names = [] x = [] @@ -79,7 +80,9 @@ module Reports end raise "no predictions performed" if x.size==0 || x[0].size==0 - RubyPlot::regression_point_plot(out_file, "Regression plot", "Predicted values", "Actual values", names, x, y ) + out_files.each do |out_file| + RubyPlot::regression_point_plot(out_file, "Regression plot", "Predicted values", "Actual values", names, x, y ) + end end @@ -91,36 +94,36 @@ module Reports # * the validation set is splitted into sets of validation_sets with equal attribute values # * each of theses validation sets is plotted as a roc-curve # - def self.create_roc_plot( out_file, validation_set, class_value, split_set_attribute=nil, show_single_curves=false ) + def self.create_roc_plot( out_files, validation_set, class_value, split_set_attribute=nil, + x_label="False positive rate", y_label="True Positive Rate" ) - LOGGER.debug "creating roc plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_file.to_s + out_files = [out_files] unless out_files.is_a?(Array) + LOGGER.debug "creating roc plot for '"+validation_set.size.to_s+"' validations, out-files:"+out_files.inspect + data = [] if split_set_attribute attribute_values = validation_set.get_values(split_set_attribute) - names = [] - fp_rates = [] - tp_rates = [] attribute_values.each do |value| begin - data = transform_roc_predictions(validation_set.filter({split_set_attribute => value}), class_value, false) - names << value.to_s - fp_rates << data[:fp_rate][0] - tp_rates << data[:tp_rate][0] + data << transform_roc_predictions(validation_set.filter({split_set_attribute => value}), class_value, false ) rescue LOGGER.warn "could not create ROC plot for "+value.to_s end end - RubyPlot::plot_lines(out_file, "ROC-Plot", "False positive rate", "True Positive Rate", names, fp_rates, tp_rates ) else - data = transform_roc_predictions(validation_set, class_value, show_single_curves) - RubyPlot::plot_lines(out_file, "ROC-Plot", "False positive rate", "True Positive Rate", data[:names], data[:fp_rate], data[:tp_rate], data[:faint] ) + data << transform_roc_predictions(validation_set, class_value ) end + + out_files.each do |out_file| + RubyPlot::plot_lines(out_file, "ROC-Plot", x_label, y_label, data ) + end end - def self.create_confidence_plot( out_file, validation_set, class_value, split_set_attribute=nil, show_single_curves=false ) + def self.create_confidence_plot( out_files, validation_set, class_value, split_set_attribute=nil, show_single_curves=false ) - LOGGER.debug "creating confidence plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_file.to_s + out_files = [out_files] unless out_files.is_a?(Array) + LOGGER.debug "creating confidence plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_files.inspect if split_set_attribute attribute_values = validation_set.get_values(split_set_attribute) @@ -138,27 +141,32 @@ module Reports end end #RubyPlot::plot_lines(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", names, fp_rates, tp_rates ) - case validation_set.unique_feature_type - when "classification" - RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", names, confidence, performance) - when "regression" - RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", names, confidence, performance, true) + out_files.each do |out_file| + case validation_set.unique_feature_type + when "classification" + RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", names, confidence, performance) + when "regression" + RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", names, confidence, performance, true) + end end else data = transform_confidence_predictions(validation_set, class_value, show_single_curves) - case validation_set.unique_feature_type - when "classification" - RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", data[:names], data[:confidence], data[:performance]) - when "regression" - RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", data[:names], data[:confidence], data[:performance], true) + out_files.each do |out_file| + case validation_set.unique_feature_type + when "classification" + RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", data[:names], data[:confidence], data[:performance]) + when "regression" + RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", data[:names], data[:confidence], data[:performance], true) + end end end end - def self.create_bar_plot( out_file, validation_set, title_attribute, value_attributes ) + def self.create_bar_plot( out_files, validation_set, title_attribute, value_attributes ) - LOGGER.debug "creating bar plot, out-file:"+out_file.to_s + out_files = [out_files] unless out_files.is_a?(Array) + LOGGER.debug "creating bar plot, out-files:"+out_files.inspect data = [] titles = [] @@ -167,25 +175,35 @@ module Reports validation_set.validations.each do |v| values = [] value_attributes.each do |a| - validation_set.get_accept_values_for_attr(a).each do |class_value| - value = v.send(a) - if value.is_a?(Hash) - if class_value==nil - avg_value = 0 - value.values.each{ |val| avg_value+=val } - value = avg_value/value.values.size.to_f - else - raise "bar plot value is hash, but no entry for class-value ("+class_value.to_s+"); value for "+a.to_s+" -> "+value.inspect unless value.key?(class_value) - value = value[class_value] + + accept = validation_set.get_accept_values_for_attr(a) + if accept and accept.size>0 + accept.each do |class_value| + value = v.send(a) + if value.is_a?(Hash) + if class_value==nil + avg_value = 0 + value.values.each{ |val| avg_value+=val } + value = avg_value/value.values.size.to_f + else + raise "bar plot value is hash, but no entry for class-value ("+class_value.to_s+"); value for "+a.to_s+" -> "+value.inspect unless value.key?(class_value) + value = value[class_value] + end end + raise "value is nil\nattribute: "+a.to_s+"\nvalidation: "+v.inspect if value==nil + values.push(value) + labels.push(a.to_s.gsub("_","-") + ( class_value==nil ? "" : "("+class_value.to_s+")" )) end - raise "value is nil\nattribute: "+a.to_s+"\nvalidation: "+v.inspect if value==nil + else + value = v.send(a) values.push(value) - labels.push(a.to_s.gsub("_","-") + ( class_value==nil ? "" : "("+class_value.to_s+")" )) + labels.push(a.to_s.gsub("_","-")) end + end titles << v.send(title_attribute).to_s + raise "no title for '"+title_attribute.to_s+"' in validation: "+v.to_yaml if titles[-1].to_s.size==0 data << values end @@ -197,7 +215,9 @@ module Reports LOGGER.debug "bar plot labels: "+labels.inspect LOGGER.debug "bar plot data: "+data.inspect - RubyPlot::plot_bars('Bar plot', labels, data, out_file) + out_files.each do |out_file| + RubyPlot::plot_bars('Bar plot', labels, data, out_file) + end end @@ -261,43 +281,27 @@ module Reports end private - def self.transform_roc_predictions(validation_set, class_value, add_single_folds=false) - + def self.transform_roc_predictions(validation_set, class_value, add_label=true ) if (validation_set.size > 1) - - names = []; fp_rate = []; tp_rate = []; faint = [] - sum_roc_values = { :predicted_values => [], :actual_values => [], :confidence_values => []} - + values = { :predicted_values => [], :actual_values => [], :confidence_values => []} (0..validation_set.size-1).each do |i| roc_values = validation_set.get(i).get_predictions.get_prediction_values(class_value) - sum_roc_values[:predicted_values] += roc_values[:predicted_values] - sum_roc_values[:confidence_values] += roc_values[:confidence_values] - sum_roc_values[:actual_values] += roc_values[:actual_values] - if add_single_folds - begin - tp_fp_rates = get_tp_fp_rates(roc_values) - names << "fold "+i.to_s - fp_rate << tp_fp_rates[:fp_rate] - tp_rate << tp_fp_rates[:tp_rate] - faint << true - rescue - LOGGER.warn "could not get ROC vals for fold "+i.to_s - end - end + values[:predicted_values] += roc_values[:predicted_values] + values[:confidence_values] += roc_values[:confidence_values] + values[:actual_values] += roc_values[:actual_values] end - tp_fp_rates = get_tp_fp_rates(sum_roc_values) - names << nil # "all" - fp_rate << tp_fp_rates[:fp_rate] - tp_rate << tp_fp_rates[:tp_rate] - faint << false - return { :names => names, :fp_rate => fp_rate, :tp_rate => tp_rate, :faint => faint } else - roc_values = validation_set.validations[0].get_predictions.get_prediction_values(class_value) - tp_fp_rates = get_tp_fp_rates(roc_values) - return { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]] } + values = validation_set.validations[0].get_predictions.get_prediction_values(class_value) end + tp_fp_rates = get_tp_fp_rates(values) + labels = [] + tp_fp_rates[:youden].each do |point,confidence| + labels << ["confidence: "+confidence.to_nice_s, point[0], point[1]] + end if add_label + RubyPlot::LinePlotData.new(:name => "default", :x_values => tp_fp_rates[:fp_rate], :y_values => tp_fp_rates[:tp_rate], :labels => labels) end + def self.transform_confidence_predictions(validation_set, class_value, add_single_folds=false) if (validation_set.size > 1) @@ -337,16 +341,25 @@ module Reports end end - def self.demo_rock_plot - roc_values = {:confidence_values => [0.1, 0.9, 0.5, 0.6, 0.6, 0.6], - :predicted_values => [1, 0, 0, 1, 0, 1], - :actual_values => [0, 1, 0, 0, 1, 1]} + def self.demo_roc_plot +# roc_values = {:confidence_values => [0.1, 0.9, 0.5, 0.6, 0.6, 0.6], +# :predicted_values => [1, 0, 0, 1, 0, 1], +# :actual_values => [0, 1, 0, 0, 1, 1]} + roc_values = {:confidence_values => [0.9, 0.8, 0.7, 0.6, 0.5, 0.4], + :predicted_values => [1, 1, 1, 1, 1, 1], + :actual_values => [1, 0, 1, 0, 1, 0]} tp_fp_rates = get_tp_fp_rates(roc_values) - data = { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]] } + labels = [] + tp_fp_rates[:youden].each do |point,confidence| + labels << ["confidence: "+confidence.to_s, point[0], point[1]] + end + + plot_data = [] + plot_data << RubyPlot::LinePlotData.new(:name => "testname", :x_values => tp_fp_rates[:fp_rate], :y_values => tp_fp_rates[:tp_rate], :labels => labels) RubyPlot::plot_lines("/tmp/plot.png", "ROC-Plot", "False positive rate", - "True Positive Rate", data[:names], data[:fp_rate], data[:tp_rate], data[:faint] ) + "True Positive Rate", plot_data ) end def self.get_performance_confidence_rates(roc_values, feature_type) @@ -354,7 +367,7 @@ module Reports c = roc_values[:confidence_values] p = roc_values[:predicted_values] a = roc_values[:actual_values] - raise "no prediction values for roc-plot" if p.size==0 + raise "no prediction values for confidence plot" if p.size==0 (0..p.size-2).each do |i| ((i+1)..p.size-1).each do |j| @@ -462,21 +475,43 @@ module Reports w = w.compress_sum(c2) #puts tp_rate.inspect+"\n"+fp_rate.inspect+"\n"+w.inspect+"\n\n" + youden = [] + (0..tp_rate.size-1).each do |i| + tpr = tp_rate[i]/tp_rate[-1].to_f + fpr = fp_rate[i]/fp_rate[-1].to_f + youden << tpr + (1 - fpr) + #puts youden[-1].to_s+" ("+tpr.to_s+" "+fpr.to_s+")" + end + max = youden.max + youden_hash = {} + (0..tp_rate.size-1).each do |i| + if youden[i]==max and i>0 + youden_hash[i] = c2[i] + end + end + #puts youden.inspect+"\n"+youden_hash.inspect+"\n\n" + (0..tp_rate.size-1).each do |i| tp_rate[i] = tp_rate[-1]>0 ? tp_rate[i]/tp_rate[-1].to_f*100 : 100 fp_rate[i] = fp_rate[-1]>0 ? fp_rate[i]/fp_rate[-1].to_f*100 : 100 end #puts tp_rate.inspect+"\n"+fp_rate.inspect+"\n\n" - return {:tp_rate => tp_rate,:fp_rate => fp_rate} + youden_coordinates_hash = {} + youden_hash.each do |i,c| + youden_coordinates_hash[[fp_rate[i],tp_rate[i]]] = c + end + #puts youden_coordinates_hash.inspect+"\n\n" + + return {:tp_rate => tp_rate,:fp_rate => fp_rate,:youden => youden_coordinates_hash} end end end #require "rubygems" #require "ruby-plot" -#Reports::PlotFactory::demo_ranking_plot -#Reports::PlotFactory::demo_rock_plot +##Reports::PlotFactory::demo_ranking_plot +#Reports::PlotFactory::demo_roc_plot #a = [1, 0, 1, 2, 3, 0, 2] #puts a.compress_sum([100, 90, 70, 70, 30, 10, 0]).inspect diff --git a/report/report_application.rb b/report/report_application.rb index 258daa7..debfe07 100755 --- a/report/report_application.rb +++ b/report/report_application.rb @@ -7,7 +7,7 @@ end def get_docbook_resource(filepath) perform do |rs| - halt 404,"not found: "+filepath unless File.exist?(filepath) + raise OpenTox::NotFoundError.new"not found: "+filepath unless File.exist?(filepath) types = MIME::Types.type_for(filepath) content_type(types[0].content_type) if types and types.size>0 and types[0] result = body(File.new(filepath)) @@ -23,6 +23,10 @@ get '/'+ENV['DOCBOOK_DIRECTORY']+'/:resource' do get_docbook_resource ENV['DOCBOOK_DIRECTORY']+"/"+request.env['REQUEST_URI'].split("/")[-1] end +get '/resources/:resource' do + get_docbook_resource "resources/"+request.env['REQUEST_URI'].split("/")[-1] +end + get '/report/:type/css_style_sheet/?' do perform do |rs| "@import \""+params[:css_style_sheet]+"\";" @@ -114,7 +118,8 @@ end post '/report/:type' do task = OpenTox::Task.create("Create report",url_for("/report/"+params[:type], :full)) do |task| #,params perform do |rs| - rs.create_report(params[:type],params[:validation_uris]?params[:validation_uris].split(/\n|,/):nil,@subjectid,task) + rs.create_report(params[:type],params[:validation_uris]?params[:validation_uris].split(/\n|,/):nil, + params[:identifier]?params[:identifier].split(/\n|,/):nil,@subjectid,task) end end return_task(task) diff --git a/report/report_content.rb b/report/report_content.rb index 3e3c3d4..30118cf 100755 --- a/report/report_content.rb +++ b/report/report_content.rb @@ -22,36 +22,40 @@ class Reports::ReportContent @current_section = @xml_report.get_root_element end - def add_paired_ttest_table( validation_set, + def add_paired_ttest_tables( validation_set, group_attribute, - test_attribute, + test_attributes, section_title = "Paired t-test", section_text = nil) - - level = 0.90 - test_matrix = Reports::ReportStatisticalTest.test_matrix( validation_set.validations, - group_attribute, test_attribute, "paired_ttest", level ) - puts test_matrix.inspect - titles = test_matrix[:titles] - matrix = test_matrix[:matrix] - table = [] - puts titles.inspect - table << [""] + titles - titles.size.times do |i| - table << [titles[i]] + matrix[i].collect{|v| (v==nil || v==0) ? "" : (v<0 ? "-" : "+") } - end - + section_test = @xml_report.add_section(@current_section, section_title) @xml_report.add_paragraph(section_test, section_text) if section_text - @xml_report.add_table(section_test, test_attribute.to_s+", significance-level: "+level.to_s, table, true, true) + + test_attributes.each do |test_attribute| + level = 0.90 + test_matrix = Reports::ReportStatisticalTest.test_matrix( validation_set.validations, + group_attribute, test_attribute, "paired_ttest", level ) + #puts test_matrix.inspect + titles = test_matrix[:titles] + matrix = test_matrix[:matrix] + table = [] + #puts titles.inspect + table << [""] + titles + titles.size.times do |i| + table << [titles[i]] + matrix[i].collect{|v| (v==nil || v==0) ? "" : (v<0 ? "-" : "+") } + end + + @xml_report.add_table(section_test, test_attribute.to_s+", significance-level: "+level.to_s+", num results: "+ + test_matrix[:num_results].to_s, table, true, true) + end Reports::ReportStatisticalTest.quit_r end def add_predictions( validation_set, - validation_attributes=[], - section_title="Predictions", - section_text=nil, - table_title="Predictions") + validation_attributes=[], + section_title="Predictions", + section_text=nil, + table_title="Predictions") #PENING raise "validation attributes not implemented in get prediction array" if validation_attributes.size>0 @@ -99,32 +103,13 @@ class Reports::ReportContent validation_attributes, table_title, section_title="Results", - section_text=nil, - #rem_equal_vals_attr=[], - search_for_existing_report_type=nil) + section_text=nil) + #rem_equal_vals_attr=[]) section_table = @xml_report.add_section(@current_section, section_title) @xml_report.add_paragraph(section_table, section_text) if section_text vals = validation_set.to_array(validation_attributes, true) vals = vals.collect{|a| a.collect{|v| v.to_s }} - - if (search_for_existing_report_type) - vals.size.times do |i| - puts i - if (i==0) - vals[i] = [ "Reports" ] + vals[i] - puts vals[i].inspect - else - if search_for_existing_report_type=="validation" - vals[i] = [ validation_set.validations[i-1].validation_report_uri() ] + vals[i] - elsif search_for_existing_report_type=="crossvalidation" - vals[i] = [ validation_set.validations[i-1].cv_report_uri() ] + vals[i] - else - raise "illegal report type: "+search_for_existing_report_type.to_s - end - end - end - end #PENDING transpose values if there more than 4 columns, and there are more than columns than rows transpose = vals[0].size>4 && vals[0].size>vals.size @xml_report.add_table(section_table, table_title, vals, !transpose, transpose, transpose) @@ -140,12 +125,16 @@ class Reports::ReportContent Reports::XMLReportUtil::create_confusion_matrix( validation.confusion_matrix ), true, true) end + # bit of a hack to algin the last two plots in the report in to one row + def align_last_two_images( title ) + @xml_report.align_last_two_images(@current_section, title ) + end + def add_regression_plot( validation_set, name_attribute, section_title="Regression Plot", section_text=nil, - image_title=nil, - image_caption=nil) + image_title=nil) image_title = "Regression plot" unless image_title #section_regr = @xml_report.add_section(@current_section, section_title) @@ -156,28 +145,27 @@ class Reports::ReportContent section_text += "\nWARNING: regression plot information not available for all validation results" if prediction_set.size!=validation_set.size @xml_report.add_paragraph(section_regr, section_text) if section_text - plot_file_name = "regr_plot"+@tmp_file_count.to_s+".png" - @tmp_file_count += 1 begin - plot_file_path = add_tmp_file(plot_file_name) - Reports::PlotFactory.create_regression_plot( plot_file_path, prediction_set, name_attribute ) - @xml_report.add_imagefigure(section_regr, image_title, plot_file_name, "PNG", 100, image_caption) + plot_png = add_tmp_file("regr_plot", "png") + plot_svg = add_tmp_file("regr_plot", "svg") + Reports::PlotFactory.create_regression_plot( [plot_png[:path], plot_svg[:path]], prediction_set, name_attribute ) + @xml_report.add_imagefigure(section_regr, image_title, plot_png[:name], "PNG", 100, plot_svg[:name]) rescue Exception => ex LOGGER.error("Could not create regression plot: "+ex.message) - rm_tmp_file(plot_file_name) + rm_tmp_file(plot_png[:name]) + rm_tmp_file(plot_svg[:name]) @xml_report.add_paragraph(section_regr, "could not create regression plot: "+ex.message) end else @xml_report.add_paragraph(section_regr, "No prediction info for regression available.") end end - - def add_roc_plot( validation_set, - split_set_attribute = nil, - section_title="ROC Plots", - section_text=nil, - image_titles=nil, - image_captions=nil) + + def add_roc_plot( validation_set, + accept_value, + split_set_attribute=nil, + image_title = "ROC Plot", + section_text="") #section_roc = @xml_report.add_section(@current_section, section_title) section_roc = @current_section @@ -190,25 +178,18 @@ class Reports::ReportContent "validation set size: "+validation_set.size.to_s+", prediction set size: "+prediction_set.size.to_s end @xml_report.add_paragraph(section_roc, section_text) if section_text - - accept_values = validation_set.get_accept_values - accept_values.size.times do |i| - class_value = accept_values[i] - image_title = image_titles ? image_titles[i] : "ROC Plot for class-value '"+class_value.to_s+"'" - image_caption = image_captions ? image_captions[i] : nil - plot_file_name = "roc_plot"+@tmp_file_count.to_s+".png" - @tmp_file_count += 1 - begin - plot_file_path = add_tmp_file(plot_file_name) - Reports::PlotFactory.create_roc_plot( plot_file_path, prediction_set, class_value, split_set_attribute, false )#prediction_set.size>1 ) - @xml_report.add_imagefigure(section_roc, image_title, plot_file_name, "PNG", 100, image_caption) - rescue Exception => ex - msg = "WARNING could not create roc plot for class value '"+class_value.to_s+"': "+ex.message - LOGGER.error(msg) - rm_tmp_file(plot_file_name) - @xml_report.add_paragraph(section_roc, msg) - end - end + begin + plot_png = add_tmp_file("roc_plot", "png") + plot_svg = add_tmp_file("roc_plot", "svg") + Reports::PlotFactory.create_roc_plot( [plot_png[:path], plot_svg[:path]], prediction_set, accept_value, split_set_attribute )#prediction_set.size>1 ) + @xml_report.add_imagefigure(section_roc, image_title, plot_png[:name], "PNG", 100, plot_svg[:name]) + rescue Exception => ex + msg = "WARNING could not create roc plot for class value '"+accept_value.to_s+"': "+ex.message + LOGGER.error(msg) + rm_tmp_file(plot_png[:name]) + rm_tmp_file(plot_svg[:name]) + @xml_report.add_paragraph(section_roc, msg) + end else @xml_report.add_paragraph(section_roc, "No prediction-confidence info for roc plot available.") end @@ -216,11 +197,10 @@ class Reports::ReportContent end def add_confidence_plot( validation_set, + accept_value = nil, split_set_attribute = nil, - section_title="Confidence plots", - section_text=nil, - image_titles=nil, - image_captions=nil) + image_title = "Percent Correct vs Confidence Plot", + section_text="") #section_conf = @xml_report.add_section(@current_section, section_title) section_conf = @current_section @@ -232,31 +212,24 @@ class Reports::ReportContent LOGGER.error "WARNING: plot information not available for all validation results:\n"+ "validation set size: "+validation_set.size.to_s+", prediction set size: "+prediction_set.size.to_s end - @xml_report.add_paragraph(section_conf, section_text) if section_text - - image_title = image_titles ? image_titles[i] : "Percent Correct vs Confidence Plot" - image_caption = image_captions ? image_captions[i] : nil - plot_file_name = "conf_plot"+@tmp_file_count.to_s+".png" - @tmp_file_count += 1 + @xml_report.add_paragraph(section_conf, section_text) if section_text and section_text.size>0 begin - - plot_file_path = add_tmp_file(plot_file_name) - Reports::PlotFactory.create_confidence_plot( plot_file_path, prediction_set, nil, split_set_attribute, false ) - @xml_report.add_imagefigure(section_conf, image_title, plot_file_name, "PNG", 100, image_caption) - + plot_png = add_tmp_file("conf_plot", "png") + plot_svg = add_tmp_file("conf_plot", "svg") + Reports::PlotFactory.create_confidence_plot( [plot_png[:path], plot_svg[:path]], prediction_set, accept_value, split_set_attribute, false ) + @xml_report.add_imagefigure(section_conf, image_title, plot_png[:name], "PNG", 100, plot_svg[:name]) rescue Exception => ex msg = "WARNING could not create confidence plot: "+ex.message LOGGER.error(msg) - rm_tmp_file(plot_file_name) + rm_tmp_file(plot_png[:name]) + rm_tmp_file(plot_svg[:name]) @xml_report.add_paragraph(section_conf, msg) - end - + end else @xml_report.add_paragraph(section_conf, "No prediction-confidence info for confidence plot available.") end - - end + end def add_ranking_plots( validation_set, compare_attribute, @@ -309,27 +282,25 @@ class Reports::ReportContent value_attributes, section_title="Bar Plot", section_text=nil, - image_title="Bar Plot", - image_caption=nil) + image_title="Bar Plot") section_bar = @xml_report.add_section(@current_section, section_title) @xml_report.add_paragraph(section_bar, section_text) if section_text - - plot_file_name = "bar_plot"+@tmp_file_count.to_s+".png" - @tmp_file_count += 1 - plot_file_path = add_tmp_file(plot_file_name) - Reports::PlotFactory.create_bar_plot(plot_file_path, validation_set, title_attribute, value_attributes ) - @xml_report.add_imagefigure(section_bar, image_title, plot_file_name, "PNG", 100, image_caption) + plot_png = add_tmp_file("bar_plot", "png") + plot_svg = add_tmp_file("bar_plot", "svg") + Reports::PlotFactory.create_bar_plot([plot_png[:path], plot_svg[:path]], validation_set, title_attribute, value_attributes ) + @xml_report.add_imagefigure(section_bar, image_title, plot_png[:name], "PNG", 100, plot_svg[:name]) end private - def add_tmp_file(tmp_file_name) - + def add_tmp_file(name, extension) + tmp_file_name = name.to_s+@tmp_file_count.to_s+"."+extension.to_s + @tmp_file_count += 1 @tmp_files = {} unless @tmp_files raise "file name already exits" if @tmp_files[tmp_file_name] || (@text_files && @text_files[tmp_file_name]) tmp_file_path = Reports::Util.create_tmp_file(tmp_file_name) @tmp_files[tmp_file_name] = tmp_file_path - return tmp_file_path + return {:name => tmp_file_name, :path => tmp_file_path} end def rm_tmp_file(tmp_file_name) diff --git a/report/report_factory.rb b/report/report_factory.rb index 08d9418..d16066e 100755 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -70,8 +70,12 @@ module Reports::ReportFactory report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_CLASS, "Results", "Results") report.add_confusion_matrix(val) report.add_section("Plots") - report.add_roc_plot(validation_set) - report.add_confidence_plot(validation_set) + ([nil] + validation_set.get_accept_values).each do |accept_value| + report.add_roc_plot(validation_set, accept_value) + report.add_confidence_plot(validation_set, accept_value) + title = accept_value ? "Plots for predicted class-value '"+accept_value.to_s+"'" : "Plots for all predictions" + report.align_last_two_images title + end report.end_section when "regression" report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_REGR, "Results", "Results") @@ -100,33 +104,42 @@ module Reports::ReportFactory raise OpenTox::BadRequestError.new("validations must have unique feature type, i.e. must be either all regression, "+ +"or all classification validations") unless validation_set.unique_feature_type pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) ) + validation_set.validations.sort! do |x,y| + x.crossvalidation_fold.to_f <=> y.crossvalidation_fold.to_f + end + cv_set = validation_set.replace_with_cv_stats + raise unless cv_set.size==1 - merged = validation_set.merge([:crossvalidation_id]) - raise unless merged.size==1 - - #puts merged.get_values(:percent_correct_variance, false).inspect + #puts cv_set.get_values(:percent_correct_variance, false).inspect report = Reports::ReportContent.new("Crossvalidation report") + res_titel = "Crossvalidation Results" + res_text = "These performance statistics have been derieved by accumulating all predictions on the various fold (i.e. these numbers are NOT averaged results over all crossvalidation folds)." case validation_set.unique_feature_type when "classification" - report.add_result(merged, [:crossvalidation_uri]+VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold],"Mean Results","Mean Results") - report.add_confusion_matrix(merged.validations[0]) + report.add_result(cv_set, [:crossvalidation_uri]+VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold], res_titel, res_titel, res_text) + report.add_confusion_matrix(cv_set.validations[0]) report.add_section("Plots") - report.add_roc_plot(validation_set) - report.add_roc_plot(validation_set, :crossvalidation_fold) - report.add_confidence_plot(validation_set) - report.add_confidence_plot(validation_set, :crossvalidation_fold) + [nil, :crossvalidation_fold].each do |split_attribute| + ([nil] + validation_set.get_accept_values).each do |accept_value| + report.add_roc_plot(validation_set, accept_value, split_attribute) + report.add_confidence_plot(validation_set, accept_value, split_attribute) + title = accept_value ? "Plots for predicted class-value '"+accept_value.to_s+"'" : "Plots for all predictions" + title += split_attribute ? ", separated by crossvalidation fold" : " (accumulated over all folds)" + report.align_last_two_images title + end + end report.end_section - report.add_result(validation_set, VAL_ATTR_CV+VAL_ATTR_CLASS-[:num_folds], - "Results","Results",nil,"validation") + report.add_result(validation_set, [:validation_uri, :validation_report_uri]+VAL_ATTR_CV+VAL_ATTR_CLASS-[:num_folds, :dataset_uri, :algorithm_uri], + "Results","Results") when "regression" - report.add_result(merged, [:crossvalidation_uri]+VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],"Mean Results","Mean Results") + report.add_result(cv_set, [:crossvalidation_uri]+VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],res_titel, res_titel, res_text) report.add_section("Plots") report.add_regression_plot(validation_set, :crossvalidation_fold) report.add_confidence_plot(validation_set) - report.add_confidence_plot(validation_set, :crossvalidation_fold) + report.add_confidence_plot(validation_set, nil, :crossvalidation_fold) report.end_section - report.add_result(validation_set, VAL_ATTR_CV+VAL_ATTR_REGR-[:num_folds], "Results","Results") + report.add_result(validation_set, [:validation_uri, :validation_report_uri]+VAL_ATTR_CV+VAL_ATTR_REGR-[:num_folds, :dataset_uri, :algorithm_uri], "Results","Results") end task.progress(90) if task @@ -142,8 +155,8 @@ module Reports::ReportFactory raise OpenTox::BadRequestError.new("num validations is not >1") unless validation_set.size>1 raise OpenTox::BadRequestError.new("validations must have unique feature type, i.e. must be either all regression, "+ +"or all classification validations") unless validation_set.unique_feature_type - raise OpenTox::BadRequestError.new("number of different algorithms <2: "+ - validation_set.get_values(:algorithm_uri).inspect) if validation_set.num_different_values(:algorithm_uri)<2 + raise OpenTox::BadRequestError.new("number of different identifiers <2: "+ + validation_set.get_values(:identifier).inspect) if validation_set.num_different_values(:identifier)<2 if validation_set.has_nil_values?(:crossvalidation_id) raise OpenTox::BadRequestError.new("algorithm comparison for non crossvalidation not yet implemented") @@ -160,73 +173,63 @@ module Reports::ReportFactory # groups results into sets with equal dataset if (validation_set.num_different_values(:dataset_uri)>1) + LOGGER.debug "compare report -- num different datasets: "+validation_set.num_different_values(:dataset_uri).to_s dataset_grouping = Reports::Util.group(validation_set.validations, [:dataset_uri]) # check if equal values in each group exist - Reports::Util.check_group_matching(dataset_grouping, [:algorithm_uri, :crossvalidation_fold, :num_folds, :stratified, :random_seed]) + Reports::Util.check_group_matching(dataset_grouping, [:crossvalidation_fold, :num_folds, :stratified, :random_seed]) else dataset_grouping = [ validation_set.validations ] end - # we only checked that equal validations exist in each dataset group, now check for each algorithm + # we only checked that equal validations exist in each dataset group, now check for each identifier dataset_grouping.each do |validations| - algorithm_grouping = Reports::Util.group(validations, [:algorithm_uri]) + algorithm_grouping = Reports::Util.group(validations, [:identifier]) Reports::Util.check_group_matching(algorithm_grouping, [:crossvalidation_fold, :num_folds, :stratified, :random_seed]) end pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) ) - report = Reports::ReportContent.new("Algorithm comparison report - Many datasets") + report = Reports::ReportContent.new("Algorithm comparison report") if (validation_set.num_different_values(:dataset_uri)>1) all_merged = validation_set.merge([:algorithm_uri, :dataset_uri, :crossvalidation_id, :crossvalidation_uri]) report.add_ranking_plots(all_merged, :algorithm_uri, :dataset_uri, [:percent_correct, :weighted_area_under_roc, :true_positive_rate, :true_negative_rate] ) report.add_result_overview(all_merged, :algorithm_uri, :dataset_uri, [:percent_correct, :weighted_area_under_roc, :true_positive_rate, :true_negative_rate]) - end - + + result_attributes = [:identifier,:crossvalidation_uri,:crossvalidation_report_uri]+VAL_ATTR_CV-[:crossvalidation_fold,:num_folds,:dataset_uri] case validation_set.unique_feature_type when "classification" - attributes = VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold] - attributes = ([ :dataset_uri ] + attributes).uniq - - dataset_grouping.each do |validations| - - set = Reports::ValidationSet.create(validations) - - dataset = validations[0].dataset_uri - merged = set.merge([:algorithm_uri, :dataset_uri, :crossvalidation_id, :crossvalidation_uri]) - merged.sort(:dataset_uri) - - report.add_section("Dataset: "+dataset) - report.add_result(merged,attributes, - "Mean Results","Mean Results",nil,"crossvalidation") - report.add_paired_ttest_table(set, :algorithm_uri, :percent_correct) - - report.add_bar_plot(merged, :algorithm_uri, VAL_ATTR_BAR_PLOT_CLASS) - report.add_roc_plot(set, :algorithm_uri) - report.end_section - end - - when "regression" + result_attributes += VAL_ATTR_CLASS + ttest_attributes = [:percent_correct, :weighted_area_under_roc] + bar_plot_attributes = VAL_ATTR_BAR_PLOT_CLASS + else + result_attributes += VAL_ATTR_REGR + ttest_attributes = [:r_square, :root_mean_squared_error] + bar_plot_attributes = VAL_ATTR_BAR_PLOT_REGR + end - attributes = VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold] - attributes = ([ :dataset_uri ] + attributes).uniq + dataset_grouping.each do |validations| + + set = Reports::ValidationSet.create(validations) - dataset_grouping.each do |validations| + dataset = validations[0].dataset_uri + merged = set.merge([:identifier, :dataset_uri]) #, :crossvalidation_id, :crossvalidation_uri]) + merged.sort(:identifier) - set = Reports::ValidationSet.create(validations) - - dataset = validations[0].dataset_uri - merged = set.merge([:algorithm_uri, :dataset_uri, :crossvalidation_id, :crossvalidation_uri]) - merged.sort(:dataset_uri) - - report.add_section("Dataset: "+dataset) - report.add_result(merged,attributes, - "Mean Results","Mean Results",nil,"crossvalidation") - report.add_paired_ttest_table(set, :algorithm_uri, :r_square) - report.end_section + merged.validations.each do |v| + v.crossvalidation_uri = v.crossvalidation_uri.split(";").uniq.join(" ") + v.crossvalidation_report_uri = v.crossvalidation_report_uri.split(";").uniq.join(" ") if v.crossvalidation_report_uri end + report.add_section("Dataset: "+dataset) + res_titel = "Average Results on Folds" + res_text = "These performance statistics have been derieved by computing the mean of the statistics on each crossvalidation fold." + report.add_result(merged,result_attributes,res_titel,res_titel,res_text) + # pending: regression stats have different scales!!! + report.add_bar_plot(merged, :identifier, bar_plot_attributes) if validation_set.unique_feature_type=="classification" + report.add_paired_ttest_tables(set, :identifier, ttest_attributes) + report.end_section end task.progress(100) if task report diff --git a/report/report_persistance.rb b/report/report_persistance.rb index c85ad68..e02387f 100755 --- a/report/report_persistance.rb +++ b/report/report_persistance.rb @@ -250,6 +250,7 @@ module Reports end def list_reports(type, filter_params={}) + filter_params[:report_type] = type LOGGER.debug "find reports for params: "+filter_params.inspect reports = Lib::OhmUtil.find( ReportData, filter_params ) reports.collect{ |r| r.id } @@ -314,7 +315,7 @@ end # unless prop_names.include?(key) # err = "no attribute found: '"+k.to_s+"'" # if $sinatra -# $sinatra.halt 400,err +# $sinatra.raise OpenTox::BadRequestError.newerr # else # raise err # end diff --git a/report/report_service.rb b/report/report_service.rb index 722c3d6..3e23889 100644 --- a/report/report_service.rb +++ b/report/report_service.rb @@ -60,7 +60,7 @@ module Reports # call-seq: # create_report(type, validation_uris) => string # - def create_report(type, validation_uris, subjectid=nil, task=nil) + def create_report(type, validation_uris, identifier=nil, subjectid=nil, task=nil) LOGGER.info "create report of type '"+type.to_s+"'" check_report_type(type) @@ -68,7 +68,10 @@ module Reports # step1: load validations raise OpenTox::BadRequestError.new("validation_uris missing") unless validation_uris LOGGER.debug "validation_uri(s): '"+validation_uris.inspect+"'" - validation_set = Reports::ValidationSet.new(validation_uris, subjectid) + LOGGER.debug "identifier: '"+identifier.inspect+"'" + raise "illegal num identifiers: "+identifier.size.to_s+" should be equal to num validation-uris ("+validation_uris.size.to_s+")" if + identifier and identifier.size!=validation_uris.size + validation_set = Reports::ValidationSet.new(validation_uris, identifier, subjectid) raise OpenTox::BadRequestError.new("cannot get validations from validation_uris '"+validation_uris.inspect+"'") unless validation_set and validation_set.size > 0 LOGGER.debug "loaded "+validation_set.size.to_s+" validation/s" task.progress(10) if task diff --git a/report/statistical_test.rb b/report/statistical_test.rb index 5e5ea3a..9461cac 100644 --- a/report/statistical_test.rb +++ b/report/statistical_test.rb @@ -9,8 +9,8 @@ module LIB # 1 -> array2 > array1 # def self.pairedTTest(array1, array2, significance_level=0.95) - - @@r = RinRuby.new(true,false) unless defined?(@@r) and @@r + + @@r = RinRuby.new(true,false) unless defined?(@@r) and @@r @@r.assign "v1",array1 @@r.assign "v2",array2 @@r.eval "ttest = t.test(v1,v2,paired=T)" @@ -64,7 +64,7 @@ module Reports end end end - {:titles => titles, :matrix => matrix} + {:titles => titles, :matrix => matrix, :num_results => grouped_validations[0].size} end def self.paired_ttest( validations1, validations2, attribute, significance_level=0.95 ) @@ -83,5 +83,12 @@ module Reports end -#puts LIB::StatisticalTest.pairedTTest([1,2,3],[2,3,3]) +#t1 = Time.new +#10.times do +# puts LIB::StatisticalTest.pairedTTest([1,2,3,4,5,12,4,2],[2,3,3,3,56,3,4,5]) +#end +#LIB::StatisticalTest.quitR +#t2 = Time.new +#puts t2-t1 + diff --git a/report/validation_access.rb b/report/validation_access.rb index e9b6e19..299b124 100755 --- a/report/validation_access.rb +++ b/report/validation_access.rb @@ -7,8 +7,9 @@ require "lib/validation_db.rb" # class Reports::ValidationDB - def resolve_cv_uris(validation_uris, subjectid=nil) - res = [] + def resolve_cv_uris(validation_uris, identifier=nil, subjectid=nil) + res = {} + count = 0 validation_uris.each do |u| if u.to_s =~ /.*\/crossvalidation\/[0-9]+/ cv_id = u.split("/")[-1].to_i @@ -25,17 +26,20 @@ class Reports::ValidationDB raise OpenTox::NotFoundError.new "crossvalidation with id "+cv_id.to_s+" not found" unless cv raise OpenTox::BadRequestError.new("crossvalidation with id '"+cv_id.to_s+"' not finished") unless cv.finished #res += Validation::Validation.find( :all, :conditions => { :crossvalidation_id => cv_id } ).collect{|v| v.validation_uri.to_s} - res += Validation::Validation.find( :crossvalidation_id => cv_id ).collect{|v| v.validation_uri.to_s } + Validation::Validation.find( :crossvalidation_id => cv_id, :validation_type => "crossvalidation" ).each do |v| + res[v.validation_uri.to_s] = identifier ? identifier[count] : nil + end else - res += [u.to_s] + res[u.to_s] = identifier ? identifier[count] : nil end + count += 1 end res end def init_validation(validation, uri, subjectid=nil) - raise OpenTox::BadRequestError.new "not a validation uri: "+uri.to_s unless uri =~ /.*\/[0-9]+/ + raise OpenTox::BadRequestError.new "not a validation uri: "+uri.to_s unless uri =~ /\/[0-9]+$/ validation_id = uri.split("/")[-1] raise OpenTox::BadRequestError.new "invalid validation id "+validation_id.to_s unless validation_id!=nil and (validation_id.to_i > 0 || validation_id.to_s=="0" ) @@ -56,6 +60,31 @@ class Reports::ValidationDB subset_props.each{ |prop| validation.send("#{prop.to_s}=".to_sym, subset[prop]) } if subset end end + + def init_validation_from_cv_statistics( validation, cv_uri, subjectid=nil ) + + raise OpenTox::BadRequestError.new "not a crossvalidation uri: "+cv_uri.to_s unless cv_uri.uri? and cv_uri =~ /crossvalidation.*\/[0-9]+$/ + cv_id = cv_uri.split("/")[-1] + raise OpenTox::NotAuthorizedError.new "Not authorized: GET "+cv_uri.to_s if + AA_SERVER and !OpenTox::Authorization.authorized?(cv_uri,"GET",subjectid) + cv = Validation::Crossvalidation.get(cv_id) + raise OpenTox::NotFoundError.new "crossvalidation with id "+crossvalidation_id.to_s+" not found" unless cv + raise OpenTox::BadRequestError.new "crossvalidation with id "+crossvalidation_id.to_s+" is not finished yet" unless cv.finished + v = Validation::Validation.from_cv_statistics(cv_id, subjectid) + (Validation::VAL_PROPS + Validation::VAL_CV_PROPS).each do |p| + validation.send("#{p.to_s}=".to_sym, v.send(p)) + end + {:classification_statistics => Validation::VAL_CLASS_PROPS, + :regression_statistics => Validation::VAL_REGR_PROPS}.each do |subset_name,subset_props| + subset = v.send(subset_name) + subset_props.each{ |prop| validation.send("#{prop.to_s}=".to_sym, subset[prop]) } if subset + end + #cv props + Validation::CROSS_VAL_PROPS.each do |p| + validation.send("#{p.to_s}=".to_sym, cv.send(p.to_s)) + end + validation.crossvalidation_uri = cv_uri + end def init_cv(validation) @@ -71,14 +100,17 @@ class Reports::ValidationDB def get_predictions(validation, subjectid=nil, task=nil) Lib::OTPredictions.new( validation.feature_type, validation.test_dataset_uri, validation.test_target_dataset_uri, validation.prediction_feature, validation.prediction_dataset_uri, - validation.predicted_variable, subjectid, task) + validation.predicted_variable, validation.predicted_confidence, subjectid, task) end def get_accept_values( validation, subjectid=nil ) # PENDING So far, one has to load the whole dataset to get the accept_value from ambit - d = OpenTox::Dataset.find( validation.test_target_dataset_uri, subjectid ) - accept_values = d.features[validation.prediction_feature][OT.acceptValue] - raise "cannot get accept values from dataset "+validation.test_target_dataset_uri.to_s+" for feature "+ + test_target_dataset = validation.test_target_dataset_uri + test_target_dataset = validation.test_dataset_uri unless test_target_dataset + d = Lib::DatasetCache.find( test_target_dataset, subjectid ) + raise "cannot get test target dataset for accept values, dataset: "+test_target_dataset.to_s unless d + accept_values = d.accept_values(validation.prediction_feature) + raise "cannot get accept values from dataset "+test_target_dataset.to_s+" for feature "+ validation.prediction_feature+":\n"+d.features[validation.prediction_feature].to_yaml unless accept_values!=nil accept_values end @@ -92,8 +124,14 @@ class Reports::ValidationDB raise "cannot derive model depended props for merged validations" if Lib::MergeObjects.merged?(validation) model = OpenTox::Model::Generic.find(validation.model_uri, subjectid) raise OpenTox::NotFoundError.new "model not found '"+validation.model_uri+"'" unless model - model.metadata[OT.predictedVariables] - #get_model(validation).predictedVariables + model.predicted_variable(subjectid) + end + + def predicted_confidence(validation, subjectid=nil) + raise "cannot derive model depended props for merged validations" if Lib::MergeObjects.merged?(validation) + model = OpenTox::Model::Generic.find(validation.model_uri, subjectid) + raise OpenTox::NotFoundError.new "model not found '"+validation.model_uri+"'" unless model + model.predicted_confidence(subjectid) end # private diff --git a/report/validation_data.rb b/report/validation_data.rb index 42b179b..fa0af8e 100755 --- a/report/validation_data.rb +++ b/report/validation_data.rb @@ -1,6 +1,7 @@ # the variance is computed when merging results for these attributes -VAL_ATTR_VARIANCE = [ :area_under_roc, :percent_correct, :root_mean_squared_error, :mean_absolute_error, :r_square, :accuracy ] +VAL_ATTR_VARIANCE = [ :area_under_roc, :percent_correct, :root_mean_squared_error, :mean_absolute_error, + :r_square, :accuracy, :weighted_area_under_roc, :weighted_accuracy ] VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate, :weighted_area_under_roc ] #:accuracy ] ATTR_NICE_NAME = {} @@ -51,21 +52,31 @@ end module Reports + @@validation_access = ValidationDB.new + @@persistance = ReportService.persistance + + def self.persistance + @@persistance + end + + def self.validation_access + @@validation_access + end + + # for overwriting validation source (other than using webservices) + def self.reset_validation_access(validation_access) + @@validation_access = validation_access + end + + # = ReportValidation # # contains all values of a validation object # class ReportValidation - @@validation_access = ValidationDB.new - - # for overwriting validation source (other than using webservices) - def self.reset_validation_access(validation_access) - @@validation_access = validation_access - end - - def self.resolve_cv_uris(validation_uris, subjectid) - @@validation_access.resolve_cv_uris(validation_uris, subjectid) + def self.resolve_cv_uris(validation_uris, identifier, subjectid) + Reports.validation_access.resolve_cv_uris(validation_uris, identifier, subjectid) end # create member variables for all validation properties @@ -74,13 +85,20 @@ module Reports VAL_ATTR_RANKING.collect{ |a| (a.to_s+"_ranking").to_sym } @@validation_attributes.each{ |a| attr_accessor a } - attr_reader :predictions + attr_reader :predictions, :subjectid + attr_accessor :identifier, :validation_report_uri, :crossvalidation_report_uri def initialize(uri = nil, subjectid = nil) - @@validation_access.init_validation(self, uri, subjectid) if uri + Reports.validation_access.init_validation(self, uri, subjectid) if uri @subjectid = subjectid #raise "subjectid is nil" unless subjectid end + + def self.from_cv_statistics( cv_uri, subjectid = nil ) + v = ReportValidation.new(nil, subjectid) + Reports.validation_access.init_validation_from_cv_statistics(v, cv_uri, subjectid) + v + end # returns/creates predictions, cache to save rest-calls/computation time # @@ -97,7 +115,7 @@ module Reports task.progress(100) if task nil else - @predictions = @@validation_access.get_predictions( self, @subjectid, task ) + @predictions = Reports.validation_access.get_predictions( self, @subjectid, task ) end end end @@ -105,7 +123,7 @@ module Reports # returns the predictions feature values (i.e. the domain of the class attribute) # def get_accept_values() - @accept_values = @@validation_access.get_accept_values(self, @subjectid) unless @accept_values + @accept_values = Reports.validation_access.get_accept_values(self, @subjectid) unless @accept_values @accept_values end @@ -113,36 +131,26 @@ module Reports # def feature_type return @feature_type if @feature_type!=nil - @feature_type = @@validation_access.feature_type(self, @subjectid) + @feature_type = Reports.validation_access.feature_type(self, @subjectid) end def predicted_variable return @predicted_variable if @predicted_variable!=nil - @predicted_variable = @@validation_access.predicted_variable(self, @subjectid) + @predicted_variable = Reports.validation_access.predicted_variable(self, @subjectid) end + def predicted_confidence + return @predicted_confidence if @predicted_confidence!=nil + @predicted_confidence = Reports.validation_access.predicted_confidence(self, @subjectid) + end + # loads all crossvalidation attributes, of the corresponding cv into this object def load_cv_attributes raise "crossvalidation-id not set" unless @crossvalidation_id - @@validation_access.init_cv(self) - end - - @@persistance = ReportService.persistance - - def validation_report_uri - #puts "searching for validation report: "+self.validation_uri.to_s - return @validation_report_uri if @validation_report_uri!=nil - ids = @@persistance.list_reports("validation",{:validation_uris=>validation_uri }) - @validation_report_uri = ReportService.instance.get_uri("validation",ids[-1]) if ids and ids.size>0 - end - - def cv_report_uri - #puts "searching for cv report: "+self.crossvalidation_uri.to_s - return @cv_report_uri if @cv_report_uri!=nil - raise "no cv uri "+to_yaml unless self.crossvalidation_uri - ids = @@persistance.list_reports("crossvalidation",{:crossvalidation=>self.crossvalidation_uri.to_s }) - #puts "-> "+ids.inspect - @cv_report_uri = ReportService.instance.get_uri("crossvalidation",ids[-1]) if ids and ids.size>0 + Reports.validation_access.init_cv(self) + # load cv report + ids = Reports.persistance.list_reports("crossvalidation",{:crossvalidation=>self.crossvalidation_uri.to_s }) + @crossvalidation_report_uri = ReportService.instance.get_uri("crossvalidation",ids[-1]) if ids and ids.size>0 end def clone_validation @@ -158,13 +166,20 @@ module Reports # class ValidationSet - def initialize(validation_uris=nil, subjectid=nil) + def initialize(validation_uris=nil, identifier=nil, subjectid=nil) @unique_values = {} - validation_uris = ReportValidation.resolve_cv_uris(validation_uris, subjectid) if validation_uris - @validations = Array.new - validation_uris.each{|u| @validations.push(ReportValidation.new(u, subjectid))} if validation_uris + @validations = [] + if validation_uris + validation_uri_and_ids = ReportValidation.resolve_cv_uris(validation_uris, identifier, subjectid) + validation_uri_and_ids.each do |u,id| + v = ReportValidation.new(u, subjectid) + v.identifier = id if id + ids = Reports.persistance.list_reports("validation",{:validation_uris=>v.validation_uri }) + v.validation_report_uri = ReportService.instance.get_uri("validation",ids[-1]) if ids and ids.size>0 + @validations << v + end + end end - def self.create(validations) set = ValidationSet.new @@ -393,6 +408,17 @@ module Reports return array end + def replace_with_cv_stats + new_set = ValidationSet.new + grouping = Util.group(@validations, [:crossvalidation_id]) + grouping.each do |g| + v = ReportValidation.from_cv_statistics(g[0].crossvalidation_uri, g[0].subjectid) + v.identifier = g.collect{|vv| vv.identifier}.uniq.join(";") + new_set.validations << v + end + return new_set + end + # creates a new validaiton set, that contains merged validations # all validation with equal values for __equal_attributes__ are summed up in one validation, i.e. merged # @@ -409,19 +435,17 @@ module Reports #compute grouping grouping = Util.group(@validations, equal_attributes) #puts "groups "+grouping.size.to_s - + + #merge Lib::MergeObjects.register_merge_attributes( ReportValidation, - Validation::VAL_MERGE_AVG,Validation::VAL_MERGE_SUM,Validation::VAL_MERGE_GENERAL) unless + Validation::VAL_MERGE_AVG+Validation::VAL_MERGE_SUM,[],Validation::VAL_MERGE_GENERAL+[:identifier, :validation_report_uri, :crossvalidation_report_uri]) unless Lib::MergeObjects.merge_attributes_registered?(ReportValidation) - - #merge grouping.each do |g| - new_set.validations.push(g[0].clone_validation) + new_set.validations << g[0].clone_validation g[1..-1].each do |v| new_set.validations[-1] = Lib::MergeObjects.merge_objects(new_set.validations[-1],v) end end - return new_set end diff --git a/report/xml_report.rb b/report/xml_report.rb index 4fbfae3..5be5fdc 100755 --- a/report/xml_report.rb +++ b/report/xml_report.rb @@ -93,50 +93,89 @@ module Reports end end - # adds a new image to a REXML:Element, returns the figure as element - # - # example: <tt>add_imagefigure( section2, "Nice graph", "/images/graph1.svg", "SVG", "This graph shows..." )</tt> - # - # call-seq: - # add_imagefigure( element, title, path, filetype, caption = nil ) => REXML::Element - # - def add_imagefigure( element, title, path, filetype, size_pct=100, caption = nil ) - + def imagefigure( title, path, filetype, size_pct=100, altPath = nil ) figure = Reports::XMLReportUtil.attribute_element("figure", {"float" => 0}) figure << Reports::XMLReportUtil.text_element("title", title) - media = Element.new("mediaobject") + + #media = Element.new("mediaobject") + media = Element.new("inlinemediaobject") image = Element.new("imageobject") imagedata = Reports::XMLReportUtil.attribute_element("imagedata", - {"fileref" => path, "format"=>filetype, "contentwidth" => size_pct.to_s+"%", - #"contentdepth"=> "4in" - })#"width" => "6in", "height" => "5in"}) #"contentwidth" => "100%"}) + {"fileref" => path, "format"=>filetype, "contentwidth" => size_pct.to_s+"%", + #"contentdepth"=> "4in" + })#"width" => "6in", "height" => "5in"}) #"contentwidth" => "100%"}) #imagedata = Reports::XMLReportUtil.attribute_element("imagedata",{"width" => "6in", "fileref" => path, "format"=>filetype}) @resource_path_elements[imagedata] = "fileref" image << imagedata - media << image + #media << Reports::XMLReportUtil.text_element("caption", caption) if caption + #figure << media -# ulink = Element.new("ulink") -# ulink.add_attributes({"url" => "http://google.de"}) -# ulink << image -# media << ulink + ulink = Element.new("ulink") + ulink.add_attributes({"url" => altPath ? altPath : path }) + @resource_path_elements[ulink] = "url" + ulink << media - media << Reports::XMLReportUtil.text_element("caption", caption) if caption - figure << media + figure << ulink + figure + end + + # adds a new image to a REXML:Element, returns the figure as element + # + # example: <tt>add_imagefigure( section2, "Nice graph", "/images/graph1.svg", "SVG", "This graph shows..." )</tt> + # + # call-seq: + # add_imagefigure( element, title, path, filetype, caption = nil ) => REXML::Element + # + def add_imagefigure( element, title, path, filetype, size_pct=100, altPath = nil ) + figure = imagefigure( title, path, filetype, size_pct, altPath) element << figure - return figure + return figure end - def add_image( element, url ) + # bit of a hack to algin the last two figures that have been added to element into one row + def align_last_two_images( element, title ) + imgs = [] + element.elements.each do |e| + imgs[0] = imgs[1] + imgs[1] = e if e.name=="figure" + end + if (imgs[0] and imgs[1]) + element.delete_element imgs[0] + element.delete_element imgs[1] + add_imagefigures_in_row( element, imgs, title ) + end + end + + def add_imagefigures_in_row( element, imagefigures, title ) + params = {"frame" => "none", "colsep" => 0, "rowsep" => 0 } + table = Reports::XMLReportUtil.attribute_element("table",params) + table << Reports::XMLReportUtil.text_element("title", title) + tgroup = Reports::XMLReportUtil.attribute_element("tgroup",{"cols" => 2}) + tbody = Element.new("tbody") + row = Element.new("row") + imagefigures.each do |f| + entry = Element.new("entry") + entry << f + row << entry + end + tbody << row + tgroup << tbody + table << tgroup + element << table + table + end + + def add_image( element, url ) #, scale=false ) image = Element.new("imageobject") - imagedata = Reports::XMLReportUtil.attribute_element("imagedata", - {"fileref" => url, "format"=>"PNG", "contentwidth" => "2in" }) #PENDING: do not hardcode size + params = {"fileref" => url, "format"=>"PNG"} + #params["contentwidth"] = "2in" + imagedata = Reports::XMLReportUtil.attribute_element("imagedata",params) image << imagedata element << image return image end - # adds a table to a REXML:Element, _table_values_ should be a multi-dimensional-array, returns the table as element # # call-seq: @@ -144,7 +183,7 @@ module Reports # def add_table( element, title, table_values, first_row_header=true, first_col_header=false, transpose=false, auto_link_urls=true ) - raise "table_values is not mulit-dimensional-array" unless table_values && table_values.is_a?(Array) && table_values[0].is_a?(Array) + raise "table_values is not multi-dimensional-array" unless table_values && table_values.is_a?(Array) && table_values[0].is_a?(Array) values = transpose ? table_values.transpose : table_values @@ -184,12 +223,20 @@ module Reports row = Element.new("row") r.each do |v| entry = Element.new("entry") - if auto_link_urls && v.to_s =~ /depict/ || v.to_s =~ /image\/png$/ #PENDING + if auto_link_urls && v.to_s =~ /depict/ || v.to_s =~ /png$/ #PENDING add_image(entry, v.to_s) elsif auto_link_urls && v.to_s =~ /^http(s?):\/\// - add_url(entry, v.to_s, v.to_s) - else - entry.text = v.to_s + #add_url(entry, v.to_s, v.to_s) + v.to_s.split(" ").each do |vv| + add_url(entry, vv.to_s, vv.to_s) + space = Element.new("para") + space.text = " " + entry << space + end + else + text = v.to_s + text.gsub!(/\+\-/,"±") + entry << Text.new(text, true, nil, true) end row << entry end @@ -221,11 +268,15 @@ module Reports return list end - def add_url (element, url, description=url ) - + def url_element( url, description=url ) ulink = Element.new("ulink") ulink.add_attributes({"url" => url}) ulink.text = description + ulink + end + + def add_url (element, url, description=url ) + ulink = url_element(url, description) element << ulink return ulink end |