summaryrefslogtreecommitdiff
path: root/report
diff options
context:
space:
mode:
Diffstat (limited to 'report')
-rwxr-xr-xreport/environment.rb8
-rw-r--r--report/plot_factory.rb197
-rwxr-xr-xreport/report_application.rb9
-rwxr-xr-xreport/report_content.rb185
-rwxr-xr-xreport/report_factory.rb125
-rwxr-xr-xreport/report_persistance.rb3
-rw-r--r--report/report_service.rb7
-rw-r--r--report/statistical_test.rb15
-rwxr-xr-xreport/validation_access.rb60
-rwxr-xr-xreport/validation_data.rb116
-rwxr-xr-xreport/xml_report.rb115
11 files changed, 489 insertions, 351 deletions
diff --git a/report/environment.rb b/report/environment.rb
index 59465aa..72320a0 100755
--- a/report/environment.rb
+++ b/report/environment.rb
@@ -1,12 +1,10 @@
['rubygems', 'logger', 'fileutils', 'sinatra', 'sinatra/url_for', 'rest_client',
- 'yaml', 'fileutils', 'mime/types', 'abbrev', 'rinruby',
+ 'yaml', 'fileutils', 'mime/types', 'abbrev',
'rexml/document', 'ruby-plot', 'opentox-ruby' ].each do |g|
require g
end
-gem 'ruby-plot', "~>0.4.0"
-
-#R.quit
+gem 'ruby-plot', "~>0.5.0"
module Reports
end
@@ -27,6 +25,8 @@ require "report/validation_data.rb"
require "report/util.rb"
require "report/statistical_test.rb"
+ICON_ERROR = File.join(CONFIG[:services]["opentox-validation"],"resources/error.png")
+ICON_OK = File.join(CONFIG[:services]["opentox-validation"],"resources/ok.png")
diff --git a/report/plot_factory.rb b/report/plot_factory.rb
index a4e415a..78d2e05 100644
--- a/report/plot_factory.rb
+++ b/report/plot_factory.rb
@@ -52,9 +52,10 @@ module Reports
module PlotFactory
- def self.create_regression_plot( out_file, validation_set, name_attribute )
+ def self.create_regression_plot( out_files, validation_set, name_attribute )
- LOGGER.debug "Creating regression plot, out-file:"+out_file.to_s
+ out_files = [out_files] unless out_files.is_a?(Array)
+ LOGGER.debug "Creating regression plot, out-file:"+out_files.to_s
names = []
x = []
@@ -79,7 +80,9 @@ module Reports
end
raise "no predictions performed" if x.size==0 || x[0].size==0
- RubyPlot::regression_point_plot(out_file, "Regression plot", "Predicted values", "Actual values", names, x, y )
+ out_files.each do |out_file|
+ RubyPlot::regression_point_plot(out_file, "Regression plot", "Predicted values", "Actual values", names, x, y )
+ end
end
@@ -91,36 +94,36 @@ module Reports
# * the validation set is splitted into sets of validation_sets with equal attribute values
# * each of theses validation sets is plotted as a roc-curve
#
- def self.create_roc_plot( out_file, validation_set, class_value, split_set_attribute=nil, show_single_curves=false )
+ def self.create_roc_plot( out_files, validation_set, class_value, split_set_attribute=nil,
+ x_label="False positive rate", y_label="True Positive Rate" )
- LOGGER.debug "creating roc plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_file.to_s
+ out_files = [out_files] unless out_files.is_a?(Array)
+ LOGGER.debug "creating roc plot for '"+validation_set.size.to_s+"' validations, out-files:"+out_files.inspect
+ data = []
if split_set_attribute
attribute_values = validation_set.get_values(split_set_attribute)
- names = []
- fp_rates = []
- tp_rates = []
attribute_values.each do |value|
begin
- data = transform_roc_predictions(validation_set.filter({split_set_attribute => value}), class_value, false)
- names << value.to_s
- fp_rates << data[:fp_rate][0]
- tp_rates << data[:tp_rate][0]
+ data << transform_roc_predictions(validation_set.filter({split_set_attribute => value}), class_value, false )
rescue
LOGGER.warn "could not create ROC plot for "+value.to_s
end
end
- RubyPlot::plot_lines(out_file, "ROC-Plot", "False positive rate", "True Positive Rate", names, fp_rates, tp_rates )
else
- data = transform_roc_predictions(validation_set, class_value, show_single_curves)
- RubyPlot::plot_lines(out_file, "ROC-Plot", "False positive rate", "True Positive Rate", data[:names], data[:fp_rate], data[:tp_rate], data[:faint] )
+ data << transform_roc_predictions(validation_set, class_value )
end
+
+ out_files.each do |out_file|
+ RubyPlot::plot_lines(out_file, "ROC-Plot", x_label, y_label, data )
+ end
end
- def self.create_confidence_plot( out_file, validation_set, class_value, split_set_attribute=nil, show_single_curves=false )
+ def self.create_confidence_plot( out_files, validation_set, class_value, split_set_attribute=nil, show_single_curves=false )
- LOGGER.debug "creating confidence plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_file.to_s
+ out_files = [out_files] unless out_files.is_a?(Array)
+ LOGGER.debug "creating confidence plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_files.inspect
if split_set_attribute
attribute_values = validation_set.get_values(split_set_attribute)
@@ -138,27 +141,32 @@ module Reports
end
end
#RubyPlot::plot_lines(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", names, fp_rates, tp_rates )
- case validation_set.unique_feature_type
- when "classification"
- RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", names, confidence, performance)
- when "regression"
- RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", names, confidence, performance, true)
+ out_files.each do |out_file|
+ case validation_set.unique_feature_type
+ when "classification"
+ RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", names, confidence, performance)
+ when "regression"
+ RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", names, confidence, performance, true)
+ end
end
else
data = transform_confidence_predictions(validation_set, class_value, show_single_curves)
- case validation_set.unique_feature_type
- when "classification"
- RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", data[:names], data[:confidence], data[:performance])
- when "regression"
- RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", data[:names], data[:confidence], data[:performance], true)
+ out_files.each do |out_file|
+ case validation_set.unique_feature_type
+ when "classification"
+ RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", data[:names], data[:confidence], data[:performance])
+ when "regression"
+ RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", data[:names], data[:confidence], data[:performance], true)
+ end
end
end
end
- def self.create_bar_plot( out_file, validation_set, title_attribute, value_attributes )
+ def self.create_bar_plot( out_files, validation_set, title_attribute, value_attributes )
- LOGGER.debug "creating bar plot, out-file:"+out_file.to_s
+ out_files = [out_files] unless out_files.is_a?(Array)
+ LOGGER.debug "creating bar plot, out-files:"+out_files.inspect
data = []
titles = []
@@ -167,25 +175,35 @@ module Reports
validation_set.validations.each do |v|
values = []
value_attributes.each do |a|
- validation_set.get_accept_values_for_attr(a).each do |class_value|
- value = v.send(a)
- if value.is_a?(Hash)
- if class_value==nil
- avg_value = 0
- value.values.each{ |val| avg_value+=val }
- value = avg_value/value.values.size.to_f
- else
- raise "bar plot value is hash, but no entry for class-value ("+class_value.to_s+"); value for "+a.to_s+" -> "+value.inspect unless value.key?(class_value)
- value = value[class_value]
+
+ accept = validation_set.get_accept_values_for_attr(a)
+ if accept and accept.size>0
+ accept.each do |class_value|
+ value = v.send(a)
+ if value.is_a?(Hash)
+ if class_value==nil
+ avg_value = 0
+ value.values.each{ |val| avg_value+=val }
+ value = avg_value/value.values.size.to_f
+ else
+ raise "bar plot value is hash, but no entry for class-value ("+class_value.to_s+"); value for "+a.to_s+" -> "+value.inspect unless value.key?(class_value)
+ value = value[class_value]
+ end
end
+ raise "value is nil\nattribute: "+a.to_s+"\nvalidation: "+v.inspect if value==nil
+ values.push(value)
+ labels.push(a.to_s.gsub("_","-") + ( class_value==nil ? "" : "("+class_value.to_s+")" ))
end
- raise "value is nil\nattribute: "+a.to_s+"\nvalidation: "+v.inspect if value==nil
+ else
+ value = v.send(a)
values.push(value)
- labels.push(a.to_s.gsub("_","-") + ( class_value==nil ? "" : "("+class_value.to_s+")" ))
+ labels.push(a.to_s.gsub("_","-"))
end
+
end
titles << v.send(title_attribute).to_s
+ raise "no title for '"+title_attribute.to_s+"' in validation: "+v.to_yaml if titles[-1].to_s.size==0
data << values
end
@@ -197,7 +215,9 @@ module Reports
LOGGER.debug "bar plot labels: "+labels.inspect
LOGGER.debug "bar plot data: "+data.inspect
- RubyPlot::plot_bars('Bar plot', labels, data, out_file)
+ out_files.each do |out_file|
+ RubyPlot::plot_bars('Bar plot', labels, data, out_file)
+ end
end
@@ -261,43 +281,27 @@ module Reports
end
private
- def self.transform_roc_predictions(validation_set, class_value, add_single_folds=false)
-
+ def self.transform_roc_predictions(validation_set, class_value, add_label=true )
if (validation_set.size > 1)
-
- names = []; fp_rate = []; tp_rate = []; faint = []
- sum_roc_values = { :predicted_values => [], :actual_values => [], :confidence_values => []}
-
+ values = { :predicted_values => [], :actual_values => [], :confidence_values => []}
(0..validation_set.size-1).each do |i|
roc_values = validation_set.get(i).get_predictions.get_prediction_values(class_value)
- sum_roc_values[:predicted_values] += roc_values[:predicted_values]
- sum_roc_values[:confidence_values] += roc_values[:confidence_values]
- sum_roc_values[:actual_values] += roc_values[:actual_values]
- if add_single_folds
- begin
- tp_fp_rates = get_tp_fp_rates(roc_values)
- names << "fold "+i.to_s
- fp_rate << tp_fp_rates[:fp_rate]
- tp_rate << tp_fp_rates[:tp_rate]
- faint << true
- rescue
- LOGGER.warn "could not get ROC vals for fold "+i.to_s
- end
- end
+ values[:predicted_values] += roc_values[:predicted_values]
+ values[:confidence_values] += roc_values[:confidence_values]
+ values[:actual_values] += roc_values[:actual_values]
end
- tp_fp_rates = get_tp_fp_rates(sum_roc_values)
- names << nil # "all"
- fp_rate << tp_fp_rates[:fp_rate]
- tp_rate << tp_fp_rates[:tp_rate]
- faint << false
- return { :names => names, :fp_rate => fp_rate, :tp_rate => tp_rate, :faint => faint }
else
- roc_values = validation_set.validations[0].get_predictions.get_prediction_values(class_value)
- tp_fp_rates = get_tp_fp_rates(roc_values)
- return { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]] }
+ values = validation_set.validations[0].get_predictions.get_prediction_values(class_value)
end
+ tp_fp_rates = get_tp_fp_rates(values)
+ labels = []
+ tp_fp_rates[:youden].each do |point,confidence|
+ labels << ["confidence: "+confidence.to_nice_s, point[0], point[1]]
+ end if add_label
+ RubyPlot::LinePlotData.new(:name => "default", :x_values => tp_fp_rates[:fp_rate], :y_values => tp_fp_rates[:tp_rate], :labels => labels)
end
+
def self.transform_confidence_predictions(validation_set, class_value, add_single_folds=false)
if (validation_set.size > 1)
@@ -337,16 +341,25 @@ module Reports
end
end
- def self.demo_rock_plot
- roc_values = {:confidence_values => [0.1, 0.9, 0.5, 0.6, 0.6, 0.6],
- :predicted_values => [1, 0, 0, 1, 0, 1],
- :actual_values => [0, 1, 0, 0, 1, 1]}
+ def self.demo_roc_plot
+# roc_values = {:confidence_values => [0.1, 0.9, 0.5, 0.6, 0.6, 0.6],
+# :predicted_values => [1, 0, 0, 1, 0, 1],
+# :actual_values => [0, 1, 0, 0, 1, 1]}
+ roc_values = {:confidence_values => [0.9, 0.8, 0.7, 0.6, 0.5, 0.4],
+ :predicted_values => [1, 1, 1, 1, 1, 1],
+ :actual_values => [1, 0, 1, 0, 1, 0]}
tp_fp_rates = get_tp_fp_rates(roc_values)
- data = { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]] }
+ labels = []
+ tp_fp_rates[:youden].each do |point,confidence|
+ labels << ["confidence: "+confidence.to_s, point[0], point[1]]
+ end
+
+ plot_data = []
+ plot_data << RubyPlot::LinePlotData.new(:name => "testname", :x_values => tp_fp_rates[:fp_rate], :y_values => tp_fp_rates[:tp_rate], :labels => labels)
RubyPlot::plot_lines("/tmp/plot.png",
"ROC-Plot",
"False positive rate",
- "True Positive Rate", data[:names], data[:fp_rate], data[:tp_rate], data[:faint] )
+ "True Positive Rate", plot_data )
end
def self.get_performance_confidence_rates(roc_values, feature_type)
@@ -354,7 +367,7 @@ module Reports
c = roc_values[:confidence_values]
p = roc_values[:predicted_values]
a = roc_values[:actual_values]
- raise "no prediction values for roc-plot" if p.size==0
+ raise "no prediction values for confidence plot" if p.size==0
(0..p.size-2).each do |i|
((i+1)..p.size-1).each do |j|
@@ -462,21 +475,43 @@ module Reports
w = w.compress_sum(c2)
#puts tp_rate.inspect+"\n"+fp_rate.inspect+"\n"+w.inspect+"\n\n"
+ youden = []
+ (0..tp_rate.size-1).each do |i|
+ tpr = tp_rate[i]/tp_rate[-1].to_f
+ fpr = fp_rate[i]/fp_rate[-1].to_f
+ youden << tpr + (1 - fpr)
+ #puts youden[-1].to_s+" ("+tpr.to_s+" "+fpr.to_s+")"
+ end
+ max = youden.max
+ youden_hash = {}
+ (0..tp_rate.size-1).each do |i|
+ if youden[i]==max and i>0
+ youden_hash[i] = c2[i]
+ end
+ end
+ #puts youden.inspect+"\n"+youden_hash.inspect+"\n\n"
+
(0..tp_rate.size-1).each do |i|
tp_rate[i] = tp_rate[-1]>0 ? tp_rate[i]/tp_rate[-1].to_f*100 : 100
fp_rate[i] = fp_rate[-1]>0 ? fp_rate[i]/fp_rate[-1].to_f*100 : 100
end
#puts tp_rate.inspect+"\n"+fp_rate.inspect+"\n\n"
- return {:tp_rate => tp_rate,:fp_rate => fp_rate}
+ youden_coordinates_hash = {}
+ youden_hash.each do |i,c|
+ youden_coordinates_hash[[fp_rate[i],tp_rate[i]]] = c
+ end
+ #puts youden_coordinates_hash.inspect+"\n\n"
+
+ return {:tp_rate => tp_rate,:fp_rate => fp_rate,:youden => youden_coordinates_hash}
end
end
end
#require "rubygems"
#require "ruby-plot"
-#Reports::PlotFactory::demo_ranking_plot
-#Reports::PlotFactory::demo_rock_plot
+##Reports::PlotFactory::demo_ranking_plot
+#Reports::PlotFactory::demo_roc_plot
#a = [1, 0, 1, 2, 3, 0, 2]
#puts a.compress_sum([100, 90, 70, 70, 30, 10, 0]).inspect
diff --git a/report/report_application.rb b/report/report_application.rb
index 258daa7..debfe07 100755
--- a/report/report_application.rb
+++ b/report/report_application.rb
@@ -7,7 +7,7 @@ end
def get_docbook_resource(filepath)
perform do |rs|
- halt 404,"not found: "+filepath unless File.exist?(filepath)
+ raise OpenTox::NotFoundError.new"not found: "+filepath unless File.exist?(filepath)
types = MIME::Types.type_for(filepath)
content_type(types[0].content_type) if types and types.size>0 and types[0]
result = body(File.new(filepath))
@@ -23,6 +23,10 @@ get '/'+ENV['DOCBOOK_DIRECTORY']+'/:resource' do
get_docbook_resource ENV['DOCBOOK_DIRECTORY']+"/"+request.env['REQUEST_URI'].split("/")[-1]
end
+get '/resources/:resource' do
+ get_docbook_resource "resources/"+request.env['REQUEST_URI'].split("/")[-1]
+end
+
get '/report/:type/css_style_sheet/?' do
perform do |rs|
"@import \""+params[:css_style_sheet]+"\";"
@@ -114,7 +118,8 @@ end
post '/report/:type' do
task = OpenTox::Task.create("Create report",url_for("/report/"+params[:type], :full)) do |task| #,params
perform do |rs|
- rs.create_report(params[:type],params[:validation_uris]?params[:validation_uris].split(/\n|,/):nil,@subjectid,task)
+ rs.create_report(params[:type],params[:validation_uris]?params[:validation_uris].split(/\n|,/):nil,
+ params[:identifier]?params[:identifier].split(/\n|,/):nil,@subjectid,task)
end
end
return_task(task)
diff --git a/report/report_content.rb b/report/report_content.rb
index 3e3c3d4..30118cf 100755
--- a/report/report_content.rb
+++ b/report/report_content.rb
@@ -22,36 +22,40 @@ class Reports::ReportContent
@current_section = @xml_report.get_root_element
end
- def add_paired_ttest_table( validation_set,
+ def add_paired_ttest_tables( validation_set,
group_attribute,
- test_attribute,
+ test_attributes,
section_title = "Paired t-test",
section_text = nil)
-
- level = 0.90
- test_matrix = Reports::ReportStatisticalTest.test_matrix( validation_set.validations,
- group_attribute, test_attribute, "paired_ttest", level )
- puts test_matrix.inspect
- titles = test_matrix[:titles]
- matrix = test_matrix[:matrix]
- table = []
- puts titles.inspect
- table << [""] + titles
- titles.size.times do |i|
- table << [titles[i]] + matrix[i].collect{|v| (v==nil || v==0) ? "" : (v<0 ? "-" : "+") }
- end
-
+
section_test = @xml_report.add_section(@current_section, section_title)
@xml_report.add_paragraph(section_test, section_text) if section_text
- @xml_report.add_table(section_test, test_attribute.to_s+", significance-level: "+level.to_s, table, true, true)
+
+ test_attributes.each do |test_attribute|
+ level = 0.90
+ test_matrix = Reports::ReportStatisticalTest.test_matrix( validation_set.validations,
+ group_attribute, test_attribute, "paired_ttest", level )
+ #puts test_matrix.inspect
+ titles = test_matrix[:titles]
+ matrix = test_matrix[:matrix]
+ table = []
+ #puts titles.inspect
+ table << [""] + titles
+ titles.size.times do |i|
+ table << [titles[i]] + matrix[i].collect{|v| (v==nil || v==0) ? "" : (v<0 ? "-" : "+") }
+ end
+
+ @xml_report.add_table(section_test, test_attribute.to_s+", significance-level: "+level.to_s+", num results: "+
+ test_matrix[:num_results].to_s, table, true, true)
+ end
Reports::ReportStatisticalTest.quit_r
end
def add_predictions( validation_set,
- validation_attributes=[],
- section_title="Predictions",
- section_text=nil,
- table_title="Predictions")
+ validation_attributes=[],
+ section_title="Predictions",
+ section_text=nil,
+ table_title="Predictions")
#PENING
raise "validation attributes not implemented in get prediction array" if validation_attributes.size>0
@@ -99,32 +103,13 @@ class Reports::ReportContent
validation_attributes,
table_title,
section_title="Results",
- section_text=nil,
- #rem_equal_vals_attr=[],
- search_for_existing_report_type=nil)
+ section_text=nil)
+ #rem_equal_vals_attr=[])
section_table = @xml_report.add_section(@current_section, section_title)
@xml_report.add_paragraph(section_table, section_text) if section_text
vals = validation_set.to_array(validation_attributes, true)
vals = vals.collect{|a| a.collect{|v| v.to_s }}
-
- if (search_for_existing_report_type)
- vals.size.times do |i|
- puts i
- if (i==0)
- vals[i] = [ "Reports" ] + vals[i]
- puts vals[i].inspect
- else
- if search_for_existing_report_type=="validation"
- vals[i] = [ validation_set.validations[i-1].validation_report_uri() ] + vals[i]
- elsif search_for_existing_report_type=="crossvalidation"
- vals[i] = [ validation_set.validations[i-1].cv_report_uri() ] + vals[i]
- else
- raise "illegal report type: "+search_for_existing_report_type.to_s
- end
- end
- end
- end
#PENDING transpose values if there more than 4 columns, and there are more than columns than rows
transpose = vals[0].size>4 && vals[0].size>vals.size
@xml_report.add_table(section_table, table_title, vals, !transpose, transpose, transpose)
@@ -140,12 +125,16 @@ class Reports::ReportContent
Reports::XMLReportUtil::create_confusion_matrix( validation.confusion_matrix ), true, true)
end
+ # bit of a hack to algin the last two plots in the report in to one row
+ def align_last_two_images( title )
+ @xml_report.align_last_two_images(@current_section, title )
+ end
+
def add_regression_plot( validation_set,
name_attribute,
section_title="Regression Plot",
section_text=nil,
- image_title=nil,
- image_caption=nil)
+ image_title=nil)
image_title = "Regression plot" unless image_title
#section_regr = @xml_report.add_section(@current_section, section_title)
@@ -156,28 +145,27 @@ class Reports::ReportContent
section_text += "\nWARNING: regression plot information not available for all validation results" if prediction_set.size!=validation_set.size
@xml_report.add_paragraph(section_regr, section_text) if section_text
- plot_file_name = "regr_plot"+@tmp_file_count.to_s+".png"
- @tmp_file_count += 1
begin
- plot_file_path = add_tmp_file(plot_file_name)
- Reports::PlotFactory.create_regression_plot( plot_file_path, prediction_set, name_attribute )
- @xml_report.add_imagefigure(section_regr, image_title, plot_file_name, "PNG", 100, image_caption)
+ plot_png = add_tmp_file("regr_plot", "png")
+ plot_svg = add_tmp_file("regr_plot", "svg")
+ Reports::PlotFactory.create_regression_plot( [plot_png[:path], plot_svg[:path]], prediction_set, name_attribute )
+ @xml_report.add_imagefigure(section_regr, image_title, plot_png[:name], "PNG", 100, plot_svg[:name])
rescue Exception => ex
LOGGER.error("Could not create regression plot: "+ex.message)
- rm_tmp_file(plot_file_name)
+ rm_tmp_file(plot_png[:name])
+ rm_tmp_file(plot_svg[:name])
@xml_report.add_paragraph(section_regr, "could not create regression plot: "+ex.message)
end
else
@xml_report.add_paragraph(section_regr, "No prediction info for regression available.")
end
end
-
- def add_roc_plot( validation_set,
- split_set_attribute = nil,
- section_title="ROC Plots",
- section_text=nil,
- image_titles=nil,
- image_captions=nil)
+
+ def add_roc_plot( validation_set,
+ accept_value,
+ split_set_attribute=nil,
+ image_title = "ROC Plot",
+ section_text="")
#section_roc = @xml_report.add_section(@current_section, section_title)
section_roc = @current_section
@@ -190,25 +178,18 @@ class Reports::ReportContent
"validation set size: "+validation_set.size.to_s+", prediction set size: "+prediction_set.size.to_s
end
@xml_report.add_paragraph(section_roc, section_text) if section_text
-
- accept_values = validation_set.get_accept_values
- accept_values.size.times do |i|
- class_value = accept_values[i]
- image_title = image_titles ? image_titles[i] : "ROC Plot for class-value '"+class_value.to_s+"'"
- image_caption = image_captions ? image_captions[i] : nil
- plot_file_name = "roc_plot"+@tmp_file_count.to_s+".png"
- @tmp_file_count += 1
- begin
- plot_file_path = add_tmp_file(plot_file_name)
- Reports::PlotFactory.create_roc_plot( plot_file_path, prediction_set, class_value, split_set_attribute, false )#prediction_set.size>1 )
- @xml_report.add_imagefigure(section_roc, image_title, plot_file_name, "PNG", 100, image_caption)
- rescue Exception => ex
- msg = "WARNING could not create roc plot for class value '"+class_value.to_s+"': "+ex.message
- LOGGER.error(msg)
- rm_tmp_file(plot_file_name)
- @xml_report.add_paragraph(section_roc, msg)
- end
- end
+ begin
+ plot_png = add_tmp_file("roc_plot", "png")
+ plot_svg = add_tmp_file("roc_plot", "svg")
+ Reports::PlotFactory.create_roc_plot( [plot_png[:path], plot_svg[:path]], prediction_set, accept_value, split_set_attribute )#prediction_set.size>1 )
+ @xml_report.add_imagefigure(section_roc, image_title, plot_png[:name], "PNG", 100, plot_svg[:name])
+ rescue Exception => ex
+ msg = "WARNING could not create roc plot for class value '"+accept_value.to_s+"': "+ex.message
+ LOGGER.error(msg)
+ rm_tmp_file(plot_png[:name])
+ rm_tmp_file(plot_svg[:name])
+ @xml_report.add_paragraph(section_roc, msg)
+ end
else
@xml_report.add_paragraph(section_roc, "No prediction-confidence info for roc plot available.")
end
@@ -216,11 +197,10 @@ class Reports::ReportContent
end
def add_confidence_plot( validation_set,
+ accept_value = nil,
split_set_attribute = nil,
- section_title="Confidence plots",
- section_text=nil,
- image_titles=nil,
- image_captions=nil)
+ image_title = "Percent Correct vs Confidence Plot",
+ section_text="")
#section_conf = @xml_report.add_section(@current_section, section_title)
section_conf = @current_section
@@ -232,31 +212,24 @@ class Reports::ReportContent
LOGGER.error "WARNING: plot information not available for all validation results:\n"+
"validation set size: "+validation_set.size.to_s+", prediction set size: "+prediction_set.size.to_s
end
- @xml_report.add_paragraph(section_conf, section_text) if section_text
-
- image_title = image_titles ? image_titles[i] : "Percent Correct vs Confidence Plot"
- image_caption = image_captions ? image_captions[i] : nil
- plot_file_name = "conf_plot"+@tmp_file_count.to_s+".png"
- @tmp_file_count += 1
+ @xml_report.add_paragraph(section_conf, section_text) if section_text and section_text.size>0
begin
-
- plot_file_path = add_tmp_file(plot_file_name)
- Reports::PlotFactory.create_confidence_plot( plot_file_path, prediction_set, nil, split_set_attribute, false )
- @xml_report.add_imagefigure(section_conf, image_title, plot_file_name, "PNG", 100, image_caption)
-
+ plot_png = add_tmp_file("conf_plot", "png")
+ plot_svg = add_tmp_file("conf_plot", "svg")
+ Reports::PlotFactory.create_confidence_plot( [plot_png[:path], plot_svg[:path]], prediction_set, accept_value, split_set_attribute, false )
+ @xml_report.add_imagefigure(section_conf, image_title, plot_png[:name], "PNG", 100, plot_svg[:name])
rescue Exception => ex
msg = "WARNING could not create confidence plot: "+ex.message
LOGGER.error(msg)
- rm_tmp_file(plot_file_name)
+ rm_tmp_file(plot_png[:name])
+ rm_tmp_file(plot_svg[:name])
@xml_report.add_paragraph(section_conf, msg)
- end
-
+ end
else
@xml_report.add_paragraph(section_conf, "No prediction-confidence info for confidence plot available.")
end
-
- end
+ end
def add_ranking_plots( validation_set,
compare_attribute,
@@ -309,27 +282,25 @@ class Reports::ReportContent
value_attributes,
section_title="Bar Plot",
section_text=nil,
- image_title="Bar Plot",
- image_caption=nil)
+ image_title="Bar Plot")
section_bar = @xml_report.add_section(@current_section, section_title)
@xml_report.add_paragraph(section_bar, section_text) if section_text
-
- plot_file_name = "bar_plot"+@tmp_file_count.to_s+".png"
- @tmp_file_count += 1
- plot_file_path = add_tmp_file(plot_file_name)
- Reports::PlotFactory.create_bar_plot(plot_file_path, validation_set, title_attribute, value_attributes )
- @xml_report.add_imagefigure(section_bar, image_title, plot_file_name, "PNG", 100, image_caption)
+ plot_png = add_tmp_file("bar_plot", "png")
+ plot_svg = add_tmp_file("bar_plot", "svg")
+ Reports::PlotFactory.create_bar_plot([plot_png[:path], plot_svg[:path]], validation_set, title_attribute, value_attributes )
+ @xml_report.add_imagefigure(section_bar, image_title, plot_png[:name], "PNG", 100, plot_svg[:name])
end
private
- def add_tmp_file(tmp_file_name)
-
+ def add_tmp_file(name, extension)
+ tmp_file_name = name.to_s+@tmp_file_count.to_s+"."+extension.to_s
+ @tmp_file_count += 1
@tmp_files = {} unless @tmp_files
raise "file name already exits" if @tmp_files[tmp_file_name] || (@text_files && @text_files[tmp_file_name])
tmp_file_path = Reports::Util.create_tmp_file(tmp_file_name)
@tmp_files[tmp_file_name] = tmp_file_path
- return tmp_file_path
+ return {:name => tmp_file_name, :path => tmp_file_path}
end
def rm_tmp_file(tmp_file_name)
diff --git a/report/report_factory.rb b/report/report_factory.rb
index 08d9418..d16066e 100755
--- a/report/report_factory.rb
+++ b/report/report_factory.rb
@@ -70,8 +70,12 @@ module Reports::ReportFactory
report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_CLASS, "Results", "Results")
report.add_confusion_matrix(val)
report.add_section("Plots")
- report.add_roc_plot(validation_set)
- report.add_confidence_plot(validation_set)
+ ([nil] + validation_set.get_accept_values).each do |accept_value|
+ report.add_roc_plot(validation_set, accept_value)
+ report.add_confidence_plot(validation_set, accept_value)
+ title = accept_value ? "Plots for predicted class-value '"+accept_value.to_s+"'" : "Plots for all predictions"
+ report.align_last_two_images title
+ end
report.end_section
when "regression"
report.add_result(validation_set, [:validation_uri] + VAL_ATTR_TRAIN_TEST + VAL_ATTR_REGR, "Results", "Results")
@@ -100,33 +104,42 @@ module Reports::ReportFactory
raise OpenTox::BadRequestError.new("validations must have unique feature type, i.e. must be either all regression, "+
+"or all classification validations") unless validation_set.unique_feature_type
pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) )
+ validation_set.validations.sort! do |x,y|
+ x.crossvalidation_fold.to_f <=> y.crossvalidation_fold.to_f
+ end
+ cv_set = validation_set.replace_with_cv_stats
+ raise unless cv_set.size==1
- merged = validation_set.merge([:crossvalidation_id])
- raise unless merged.size==1
-
- #puts merged.get_values(:percent_correct_variance, false).inspect
+ #puts cv_set.get_values(:percent_correct_variance, false).inspect
report = Reports::ReportContent.new("Crossvalidation report")
+ res_titel = "Crossvalidation Results"
+ res_text = "These performance statistics have been derieved by accumulating all predictions on the various fold (i.e. these numbers are NOT averaged results over all crossvalidation folds)."
case validation_set.unique_feature_type
when "classification"
- report.add_result(merged, [:crossvalidation_uri]+VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold],"Mean Results","Mean Results")
- report.add_confusion_matrix(merged.validations[0])
+ report.add_result(cv_set, [:crossvalidation_uri]+VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold], res_titel, res_titel, res_text)
+ report.add_confusion_matrix(cv_set.validations[0])
report.add_section("Plots")
- report.add_roc_plot(validation_set)
- report.add_roc_plot(validation_set, :crossvalidation_fold)
- report.add_confidence_plot(validation_set)
- report.add_confidence_plot(validation_set, :crossvalidation_fold)
+ [nil, :crossvalidation_fold].each do |split_attribute|
+ ([nil] + validation_set.get_accept_values).each do |accept_value|
+ report.add_roc_plot(validation_set, accept_value, split_attribute)
+ report.add_confidence_plot(validation_set, accept_value, split_attribute)
+ title = accept_value ? "Plots for predicted class-value '"+accept_value.to_s+"'" : "Plots for all predictions"
+ title += split_attribute ? ", separated by crossvalidation fold" : " (accumulated over all folds)"
+ report.align_last_two_images title
+ end
+ end
report.end_section
- report.add_result(validation_set, VAL_ATTR_CV+VAL_ATTR_CLASS-[:num_folds],
- "Results","Results",nil,"validation")
+ report.add_result(validation_set, [:validation_uri, :validation_report_uri]+VAL_ATTR_CV+VAL_ATTR_CLASS-[:num_folds, :dataset_uri, :algorithm_uri],
+ "Results","Results")
when "regression"
- report.add_result(merged, [:crossvalidation_uri]+VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],"Mean Results","Mean Results")
+ report.add_result(cv_set, [:crossvalidation_uri]+VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],res_titel, res_titel, res_text)
report.add_section("Plots")
report.add_regression_plot(validation_set, :crossvalidation_fold)
report.add_confidence_plot(validation_set)
- report.add_confidence_plot(validation_set, :crossvalidation_fold)
+ report.add_confidence_plot(validation_set, nil, :crossvalidation_fold)
report.end_section
- report.add_result(validation_set, VAL_ATTR_CV+VAL_ATTR_REGR-[:num_folds], "Results","Results")
+ report.add_result(validation_set, [:validation_uri, :validation_report_uri]+VAL_ATTR_CV+VAL_ATTR_REGR-[:num_folds, :dataset_uri, :algorithm_uri], "Results","Results")
end
task.progress(90) if task
@@ -142,8 +155,8 @@ module Reports::ReportFactory
raise OpenTox::BadRequestError.new("num validations is not >1") unless validation_set.size>1
raise OpenTox::BadRequestError.new("validations must have unique feature type, i.e. must be either all regression, "+
+"or all classification validations") unless validation_set.unique_feature_type
- raise OpenTox::BadRequestError.new("number of different algorithms <2: "+
- validation_set.get_values(:algorithm_uri).inspect) if validation_set.num_different_values(:algorithm_uri)<2
+ raise OpenTox::BadRequestError.new("number of different identifiers <2: "+
+ validation_set.get_values(:identifier).inspect) if validation_set.num_different_values(:identifier)<2
if validation_set.has_nil_values?(:crossvalidation_id)
raise OpenTox::BadRequestError.new("algorithm comparison for non crossvalidation not yet implemented")
@@ -160,73 +173,63 @@ module Reports::ReportFactory
# groups results into sets with equal dataset
if (validation_set.num_different_values(:dataset_uri)>1)
+ LOGGER.debug "compare report -- num different datasets: "+validation_set.num_different_values(:dataset_uri).to_s
dataset_grouping = Reports::Util.group(validation_set.validations, [:dataset_uri])
# check if equal values in each group exist
- Reports::Util.check_group_matching(dataset_grouping, [:algorithm_uri, :crossvalidation_fold, :num_folds, :stratified, :random_seed])
+ Reports::Util.check_group_matching(dataset_grouping, [:crossvalidation_fold, :num_folds, :stratified, :random_seed])
else
dataset_grouping = [ validation_set.validations ]
end
- # we only checked that equal validations exist in each dataset group, now check for each algorithm
+ # we only checked that equal validations exist in each dataset group, now check for each identifier
dataset_grouping.each do |validations|
- algorithm_grouping = Reports::Util.group(validations, [:algorithm_uri])
+ algorithm_grouping = Reports::Util.group(validations, [:identifier])
Reports::Util.check_group_matching(algorithm_grouping, [:crossvalidation_fold, :num_folds, :stratified, :random_seed])
end
pre_load_predictions( validation_set, OpenTox::SubTask.create(task,0,80) )
- report = Reports::ReportContent.new("Algorithm comparison report - Many datasets")
+ report = Reports::ReportContent.new("Algorithm comparison report")
if (validation_set.num_different_values(:dataset_uri)>1)
all_merged = validation_set.merge([:algorithm_uri, :dataset_uri, :crossvalidation_id, :crossvalidation_uri])
report.add_ranking_plots(all_merged, :algorithm_uri, :dataset_uri,
[:percent_correct, :weighted_area_under_roc, :true_positive_rate, :true_negative_rate] )
report.add_result_overview(all_merged, :algorithm_uri, :dataset_uri, [:percent_correct, :weighted_area_under_roc, :true_positive_rate, :true_negative_rate])
-
end
-
+
+ result_attributes = [:identifier,:crossvalidation_uri,:crossvalidation_report_uri]+VAL_ATTR_CV-[:crossvalidation_fold,:num_folds,:dataset_uri]
case validation_set.unique_feature_type
when "classification"
- attributes = VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold]
- attributes = ([ :dataset_uri ] + attributes).uniq
-
- dataset_grouping.each do |validations|
-
- set = Reports::ValidationSet.create(validations)
-
- dataset = validations[0].dataset_uri
- merged = set.merge([:algorithm_uri, :dataset_uri, :crossvalidation_id, :crossvalidation_uri])
- merged.sort(:dataset_uri)
-
- report.add_section("Dataset: "+dataset)
- report.add_result(merged,attributes,
- "Mean Results","Mean Results",nil,"crossvalidation")
- report.add_paired_ttest_table(set, :algorithm_uri, :percent_correct)
-
- report.add_bar_plot(merged, :algorithm_uri, VAL_ATTR_BAR_PLOT_CLASS)
- report.add_roc_plot(set, :algorithm_uri)
- report.end_section
- end
-
- when "regression"
+ result_attributes += VAL_ATTR_CLASS
+ ttest_attributes = [:percent_correct, :weighted_area_under_roc]
+ bar_plot_attributes = VAL_ATTR_BAR_PLOT_CLASS
+ else
+ result_attributes += VAL_ATTR_REGR
+ ttest_attributes = [:r_square, :root_mean_squared_error]
+ bar_plot_attributes = VAL_ATTR_BAR_PLOT_REGR
+ end
- attributes = VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold]
- attributes = ([ :dataset_uri ] + attributes).uniq
+ dataset_grouping.each do |validations|
+
+ set = Reports::ValidationSet.create(validations)
- dataset_grouping.each do |validations|
+ dataset = validations[0].dataset_uri
+ merged = set.merge([:identifier, :dataset_uri]) #, :crossvalidation_id, :crossvalidation_uri])
+ merged.sort(:identifier)
- set = Reports::ValidationSet.create(validations)
-
- dataset = validations[0].dataset_uri
- merged = set.merge([:algorithm_uri, :dataset_uri, :crossvalidation_id, :crossvalidation_uri])
- merged.sort(:dataset_uri)
-
- report.add_section("Dataset: "+dataset)
- report.add_result(merged,attributes,
- "Mean Results","Mean Results",nil,"crossvalidation")
- report.add_paired_ttest_table(set, :algorithm_uri, :r_square)
- report.end_section
+ merged.validations.each do |v|
+ v.crossvalidation_uri = v.crossvalidation_uri.split(";").uniq.join(" ")
+ v.crossvalidation_report_uri = v.crossvalidation_report_uri.split(";").uniq.join(" ") if v.crossvalidation_report_uri
end
+ report.add_section("Dataset: "+dataset)
+ res_titel = "Average Results on Folds"
+ res_text = "These performance statistics have been derieved by computing the mean of the statistics on each crossvalidation fold."
+ report.add_result(merged,result_attributes,res_titel,res_titel,res_text)
+ # pending: regression stats have different scales!!!
+ report.add_bar_plot(merged, :identifier, bar_plot_attributes) if validation_set.unique_feature_type=="classification"
+ report.add_paired_ttest_tables(set, :identifier, ttest_attributes)
+ report.end_section
end
task.progress(100) if task
report
diff --git a/report/report_persistance.rb b/report/report_persistance.rb
index c85ad68..e02387f 100755
--- a/report/report_persistance.rb
+++ b/report/report_persistance.rb
@@ -250,6 +250,7 @@ module Reports
end
def list_reports(type, filter_params={})
+ filter_params[:report_type] = type
LOGGER.debug "find reports for params: "+filter_params.inspect
reports = Lib::OhmUtil.find( ReportData, filter_params )
reports.collect{ |r| r.id }
@@ -314,7 +315,7 @@ end
# unless prop_names.include?(key)
# err = "no attribute found: '"+k.to_s+"'"
# if $sinatra
-# $sinatra.halt 400,err
+# $sinatra.raise OpenTox::BadRequestError.newerr
# else
# raise err
# end
diff --git a/report/report_service.rb b/report/report_service.rb
index 722c3d6..3e23889 100644
--- a/report/report_service.rb
+++ b/report/report_service.rb
@@ -60,7 +60,7 @@ module Reports
# call-seq:
# create_report(type, validation_uris) => string
#
- def create_report(type, validation_uris, subjectid=nil, task=nil)
+ def create_report(type, validation_uris, identifier=nil, subjectid=nil, task=nil)
LOGGER.info "create report of type '"+type.to_s+"'"
check_report_type(type)
@@ -68,7 +68,10 @@ module Reports
# step1: load validations
raise OpenTox::BadRequestError.new("validation_uris missing") unless validation_uris
LOGGER.debug "validation_uri(s): '"+validation_uris.inspect+"'"
- validation_set = Reports::ValidationSet.new(validation_uris, subjectid)
+ LOGGER.debug "identifier: '"+identifier.inspect+"'"
+ raise "illegal num identifiers: "+identifier.size.to_s+" should be equal to num validation-uris ("+validation_uris.size.to_s+")" if
+ identifier and identifier.size!=validation_uris.size
+ validation_set = Reports::ValidationSet.new(validation_uris, identifier, subjectid)
raise OpenTox::BadRequestError.new("cannot get validations from validation_uris '"+validation_uris.inspect+"'") unless validation_set and validation_set.size > 0
LOGGER.debug "loaded "+validation_set.size.to_s+" validation/s"
task.progress(10) if task
diff --git a/report/statistical_test.rb b/report/statistical_test.rb
index 5e5ea3a..9461cac 100644
--- a/report/statistical_test.rb
+++ b/report/statistical_test.rb
@@ -9,8 +9,8 @@ module LIB
# 1 -> array2 > array1
#
def self.pairedTTest(array1, array2, significance_level=0.95)
-
- @@r = RinRuby.new(true,false) unless defined?(@@r) and @@r
+
+ @@r = RinRuby.new(true,false) unless defined?(@@r) and @@r
@@r.assign "v1",array1
@@r.assign "v2",array2
@@r.eval "ttest = t.test(v1,v2,paired=T)"
@@ -64,7 +64,7 @@ module Reports
end
end
end
- {:titles => titles, :matrix => matrix}
+ {:titles => titles, :matrix => matrix, :num_results => grouped_validations[0].size}
end
def self.paired_ttest( validations1, validations2, attribute, significance_level=0.95 )
@@ -83,5 +83,12 @@ module Reports
end
-#puts LIB::StatisticalTest.pairedTTest([1,2,3],[2,3,3])
+#t1 = Time.new
+#10.times do
+# puts LIB::StatisticalTest.pairedTTest([1,2,3,4,5,12,4,2],[2,3,3,3,56,3,4,5])
+#end
+#LIB::StatisticalTest.quitR
+#t2 = Time.new
+#puts t2-t1
+
diff --git a/report/validation_access.rb b/report/validation_access.rb
index e9b6e19..299b124 100755
--- a/report/validation_access.rb
+++ b/report/validation_access.rb
@@ -7,8 +7,9 @@ require "lib/validation_db.rb"
#
class Reports::ValidationDB
- def resolve_cv_uris(validation_uris, subjectid=nil)
- res = []
+ def resolve_cv_uris(validation_uris, identifier=nil, subjectid=nil)
+ res = {}
+ count = 0
validation_uris.each do |u|
if u.to_s =~ /.*\/crossvalidation\/[0-9]+/
cv_id = u.split("/")[-1].to_i
@@ -25,17 +26,20 @@ class Reports::ValidationDB
raise OpenTox::NotFoundError.new "crossvalidation with id "+cv_id.to_s+" not found" unless cv
raise OpenTox::BadRequestError.new("crossvalidation with id '"+cv_id.to_s+"' not finished") unless cv.finished
#res += Validation::Validation.find( :all, :conditions => { :crossvalidation_id => cv_id } ).collect{|v| v.validation_uri.to_s}
- res += Validation::Validation.find( :crossvalidation_id => cv_id ).collect{|v| v.validation_uri.to_s }
+ Validation::Validation.find( :crossvalidation_id => cv_id, :validation_type => "crossvalidation" ).each do |v|
+ res[v.validation_uri.to_s] = identifier ? identifier[count] : nil
+ end
else
- res += [u.to_s]
+ res[u.to_s] = identifier ? identifier[count] : nil
end
+ count += 1
end
res
end
def init_validation(validation, uri, subjectid=nil)
- raise OpenTox::BadRequestError.new "not a validation uri: "+uri.to_s unless uri =~ /.*\/[0-9]+/
+ raise OpenTox::BadRequestError.new "not a validation uri: "+uri.to_s unless uri =~ /\/[0-9]+$/
validation_id = uri.split("/")[-1]
raise OpenTox::BadRequestError.new "invalid validation id "+validation_id.to_s unless validation_id!=nil and
(validation_id.to_i > 0 || validation_id.to_s=="0" )
@@ -56,6 +60,31 @@ class Reports::ValidationDB
subset_props.each{ |prop| validation.send("#{prop.to_s}=".to_sym, subset[prop]) } if subset
end
end
+
+ def init_validation_from_cv_statistics( validation, cv_uri, subjectid=nil )
+
+ raise OpenTox::BadRequestError.new "not a crossvalidation uri: "+cv_uri.to_s unless cv_uri.uri? and cv_uri =~ /crossvalidation.*\/[0-9]+$/
+ cv_id = cv_uri.split("/")[-1]
+ raise OpenTox::NotAuthorizedError.new "Not authorized: GET "+cv_uri.to_s if
+ AA_SERVER and !OpenTox::Authorization.authorized?(cv_uri,"GET",subjectid)
+ cv = Validation::Crossvalidation.get(cv_id)
+ raise OpenTox::NotFoundError.new "crossvalidation with id "+crossvalidation_id.to_s+" not found" unless cv
+ raise OpenTox::BadRequestError.new "crossvalidation with id "+crossvalidation_id.to_s+" is not finished yet" unless cv.finished
+ v = Validation::Validation.from_cv_statistics(cv_id, subjectid)
+ (Validation::VAL_PROPS + Validation::VAL_CV_PROPS).each do |p|
+ validation.send("#{p.to_s}=".to_sym, v.send(p))
+ end
+ {:classification_statistics => Validation::VAL_CLASS_PROPS,
+ :regression_statistics => Validation::VAL_REGR_PROPS}.each do |subset_name,subset_props|
+ subset = v.send(subset_name)
+ subset_props.each{ |prop| validation.send("#{prop.to_s}=".to_sym, subset[prop]) } if subset
+ end
+ #cv props
+ Validation::CROSS_VAL_PROPS.each do |p|
+ validation.send("#{p.to_s}=".to_sym, cv.send(p.to_s))
+ end
+ validation.crossvalidation_uri = cv_uri
+ end
def init_cv(validation)
@@ -71,14 +100,17 @@ class Reports::ValidationDB
def get_predictions(validation, subjectid=nil, task=nil)
Lib::OTPredictions.new( validation.feature_type, validation.test_dataset_uri,
validation.test_target_dataset_uri, validation.prediction_feature, validation.prediction_dataset_uri,
- validation.predicted_variable, subjectid, task)
+ validation.predicted_variable, validation.predicted_confidence, subjectid, task)
end
def get_accept_values( validation, subjectid=nil )
# PENDING So far, one has to load the whole dataset to get the accept_value from ambit
- d = OpenTox::Dataset.find( validation.test_target_dataset_uri, subjectid )
- accept_values = d.features[validation.prediction_feature][OT.acceptValue]
- raise "cannot get accept values from dataset "+validation.test_target_dataset_uri.to_s+" for feature "+
+ test_target_dataset = validation.test_target_dataset_uri
+ test_target_dataset = validation.test_dataset_uri unless test_target_dataset
+ d = Lib::DatasetCache.find( test_target_dataset, subjectid )
+ raise "cannot get test target dataset for accept values, dataset: "+test_target_dataset.to_s unless d
+ accept_values = d.accept_values(validation.prediction_feature)
+ raise "cannot get accept values from dataset "+test_target_dataset.to_s+" for feature "+
validation.prediction_feature+":\n"+d.features[validation.prediction_feature].to_yaml unless accept_values!=nil
accept_values
end
@@ -92,8 +124,14 @@ class Reports::ValidationDB
raise "cannot derive model depended props for merged validations" if Lib::MergeObjects.merged?(validation)
model = OpenTox::Model::Generic.find(validation.model_uri, subjectid)
raise OpenTox::NotFoundError.new "model not found '"+validation.model_uri+"'" unless model
- model.metadata[OT.predictedVariables]
- #get_model(validation).predictedVariables
+ model.predicted_variable(subjectid)
+ end
+
+ def predicted_confidence(validation, subjectid=nil)
+ raise "cannot derive model depended props for merged validations" if Lib::MergeObjects.merged?(validation)
+ model = OpenTox::Model::Generic.find(validation.model_uri, subjectid)
+ raise OpenTox::NotFoundError.new "model not found '"+validation.model_uri+"'" unless model
+ model.predicted_confidence(subjectid)
end
# private
diff --git a/report/validation_data.rb b/report/validation_data.rb
index 42b179b..fa0af8e 100755
--- a/report/validation_data.rb
+++ b/report/validation_data.rb
@@ -1,6 +1,7 @@
# the variance is computed when merging results for these attributes
-VAL_ATTR_VARIANCE = [ :area_under_roc, :percent_correct, :root_mean_squared_error, :mean_absolute_error, :r_square, :accuracy ]
+VAL_ATTR_VARIANCE = [ :area_under_roc, :percent_correct, :root_mean_squared_error, :mean_absolute_error,
+ :r_square, :accuracy, :weighted_area_under_roc, :weighted_accuracy ]
VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate, :weighted_area_under_roc ] #:accuracy ]
ATTR_NICE_NAME = {}
@@ -51,21 +52,31 @@ end
module Reports
+ @@validation_access = ValidationDB.new
+ @@persistance = ReportService.persistance
+
+ def self.persistance
+ @@persistance
+ end
+
+ def self.validation_access
+ @@validation_access
+ end
+
+ # for overwriting validation source (other than using webservices)
+ def self.reset_validation_access(validation_access)
+ @@validation_access = validation_access
+ end
+
+
# = ReportValidation
#
# contains all values of a validation object
#
class ReportValidation
- @@validation_access = ValidationDB.new
-
- # for overwriting validation source (other than using webservices)
- def self.reset_validation_access(validation_access)
- @@validation_access = validation_access
- end
-
- def self.resolve_cv_uris(validation_uris, subjectid)
- @@validation_access.resolve_cv_uris(validation_uris, subjectid)
+ def self.resolve_cv_uris(validation_uris, identifier, subjectid)
+ Reports.validation_access.resolve_cv_uris(validation_uris, identifier, subjectid)
end
# create member variables for all validation properties
@@ -74,13 +85,20 @@ module Reports
VAL_ATTR_RANKING.collect{ |a| (a.to_s+"_ranking").to_sym }
@@validation_attributes.each{ |a| attr_accessor a }
- attr_reader :predictions
+ attr_reader :predictions, :subjectid
+ attr_accessor :identifier, :validation_report_uri, :crossvalidation_report_uri
def initialize(uri = nil, subjectid = nil)
- @@validation_access.init_validation(self, uri, subjectid) if uri
+ Reports.validation_access.init_validation(self, uri, subjectid) if uri
@subjectid = subjectid
#raise "subjectid is nil" unless subjectid
end
+
+ def self.from_cv_statistics( cv_uri, subjectid = nil )
+ v = ReportValidation.new(nil, subjectid)
+ Reports.validation_access.init_validation_from_cv_statistics(v, cv_uri, subjectid)
+ v
+ end
# returns/creates predictions, cache to save rest-calls/computation time
#
@@ -97,7 +115,7 @@ module Reports
task.progress(100) if task
nil
else
- @predictions = @@validation_access.get_predictions( self, @subjectid, task )
+ @predictions = Reports.validation_access.get_predictions( self, @subjectid, task )
end
end
end
@@ -105,7 +123,7 @@ module Reports
# returns the predictions feature values (i.e. the domain of the class attribute)
#
def get_accept_values()
- @accept_values = @@validation_access.get_accept_values(self, @subjectid) unless @accept_values
+ @accept_values = Reports.validation_access.get_accept_values(self, @subjectid) unless @accept_values
@accept_values
end
@@ -113,36 +131,26 @@ module Reports
#
def feature_type
return @feature_type if @feature_type!=nil
- @feature_type = @@validation_access.feature_type(self, @subjectid)
+ @feature_type = Reports.validation_access.feature_type(self, @subjectid)
end
def predicted_variable
return @predicted_variable if @predicted_variable!=nil
- @predicted_variable = @@validation_access.predicted_variable(self, @subjectid)
+ @predicted_variable = Reports.validation_access.predicted_variable(self, @subjectid)
end
+ def predicted_confidence
+ return @predicted_confidence if @predicted_confidence!=nil
+ @predicted_confidence = Reports.validation_access.predicted_confidence(self, @subjectid)
+ end
+
# loads all crossvalidation attributes, of the corresponding cv into this object
def load_cv_attributes
raise "crossvalidation-id not set" unless @crossvalidation_id
- @@validation_access.init_cv(self)
- end
-
- @@persistance = ReportService.persistance
-
- def validation_report_uri
- #puts "searching for validation report: "+self.validation_uri.to_s
- return @validation_report_uri if @validation_report_uri!=nil
- ids = @@persistance.list_reports("validation",{:validation_uris=>validation_uri })
- @validation_report_uri = ReportService.instance.get_uri("validation",ids[-1]) if ids and ids.size>0
- end
-
- def cv_report_uri
- #puts "searching for cv report: "+self.crossvalidation_uri.to_s
- return @cv_report_uri if @cv_report_uri!=nil
- raise "no cv uri "+to_yaml unless self.crossvalidation_uri
- ids = @@persistance.list_reports("crossvalidation",{:crossvalidation=>self.crossvalidation_uri.to_s })
- #puts "-> "+ids.inspect
- @cv_report_uri = ReportService.instance.get_uri("crossvalidation",ids[-1]) if ids and ids.size>0
+ Reports.validation_access.init_cv(self)
+ # load cv report
+ ids = Reports.persistance.list_reports("crossvalidation",{:crossvalidation=>self.crossvalidation_uri.to_s })
+ @crossvalidation_report_uri = ReportService.instance.get_uri("crossvalidation",ids[-1]) if ids and ids.size>0
end
def clone_validation
@@ -158,13 +166,20 @@ module Reports
#
class ValidationSet
- def initialize(validation_uris=nil, subjectid=nil)
+ def initialize(validation_uris=nil, identifier=nil, subjectid=nil)
@unique_values = {}
- validation_uris = ReportValidation.resolve_cv_uris(validation_uris, subjectid) if validation_uris
- @validations = Array.new
- validation_uris.each{|u| @validations.push(ReportValidation.new(u, subjectid))} if validation_uris
+ @validations = []
+ if validation_uris
+ validation_uri_and_ids = ReportValidation.resolve_cv_uris(validation_uris, identifier, subjectid)
+ validation_uri_and_ids.each do |u,id|
+ v = ReportValidation.new(u, subjectid)
+ v.identifier = id if id
+ ids = Reports.persistance.list_reports("validation",{:validation_uris=>v.validation_uri })
+ v.validation_report_uri = ReportService.instance.get_uri("validation",ids[-1]) if ids and ids.size>0
+ @validations << v
+ end
+ end
end
-
def self.create(validations)
set = ValidationSet.new
@@ -393,6 +408,17 @@ module Reports
return array
end
+ def replace_with_cv_stats
+ new_set = ValidationSet.new
+ grouping = Util.group(@validations, [:crossvalidation_id])
+ grouping.each do |g|
+ v = ReportValidation.from_cv_statistics(g[0].crossvalidation_uri, g[0].subjectid)
+ v.identifier = g.collect{|vv| vv.identifier}.uniq.join(";")
+ new_set.validations << v
+ end
+ return new_set
+ end
+
# creates a new validaiton set, that contains merged validations
# all validation with equal values for __equal_attributes__ are summed up in one validation, i.e. merged
#
@@ -409,19 +435,17 @@ module Reports
#compute grouping
grouping = Util.group(@validations, equal_attributes)
#puts "groups "+grouping.size.to_s
-
+
+ #merge
Lib::MergeObjects.register_merge_attributes( ReportValidation,
- Validation::VAL_MERGE_AVG,Validation::VAL_MERGE_SUM,Validation::VAL_MERGE_GENERAL) unless
+ Validation::VAL_MERGE_AVG+Validation::VAL_MERGE_SUM,[],Validation::VAL_MERGE_GENERAL+[:identifier, :validation_report_uri, :crossvalidation_report_uri]) unless
Lib::MergeObjects.merge_attributes_registered?(ReportValidation)
-
- #merge
grouping.each do |g|
- new_set.validations.push(g[0].clone_validation)
+ new_set.validations << g[0].clone_validation
g[1..-1].each do |v|
new_set.validations[-1] = Lib::MergeObjects.merge_objects(new_set.validations[-1],v)
end
end
-
return new_set
end
diff --git a/report/xml_report.rb b/report/xml_report.rb
index 4fbfae3..5be5fdc 100755
--- a/report/xml_report.rb
+++ b/report/xml_report.rb
@@ -93,50 +93,89 @@ module Reports
end
end
- # adds a new image to a REXML:Element, returns the figure as element
- #
- # example: <tt>add_imagefigure( section2, "Nice graph", "/images/graph1.svg", "SVG", "This graph shows..." )</tt>
- #
- # call-seq:
- # add_imagefigure( element, title, path, filetype, caption = nil ) => REXML::Element
- #
- def add_imagefigure( element, title, path, filetype, size_pct=100, caption = nil )
-
+ def imagefigure( title, path, filetype, size_pct=100, altPath = nil )
figure = Reports::XMLReportUtil.attribute_element("figure", {"float" => 0})
figure << Reports::XMLReportUtil.text_element("title", title)
- media = Element.new("mediaobject")
+
+ #media = Element.new("mediaobject")
+ media = Element.new("inlinemediaobject")
image = Element.new("imageobject")
imagedata = Reports::XMLReportUtil.attribute_element("imagedata",
- {"fileref" => path, "format"=>filetype, "contentwidth" => size_pct.to_s+"%",
- #"contentdepth"=> "4in"
- })#"width" => "6in", "height" => "5in"}) #"contentwidth" => "100%"})
+ {"fileref" => path, "format"=>filetype, "contentwidth" => size_pct.to_s+"%",
+ #"contentdepth"=> "4in"
+ })#"width" => "6in", "height" => "5in"}) #"contentwidth" => "100%"})
#imagedata = Reports::XMLReportUtil.attribute_element("imagedata",{"width" => "6in", "fileref" => path, "format"=>filetype})
@resource_path_elements[imagedata] = "fileref"
image << imagedata
-
media << image
+ #media << Reports::XMLReportUtil.text_element("caption", caption) if caption
+ #figure << media
-# ulink = Element.new("ulink")
-# ulink.add_attributes({"url" => "http://google.de"})
-# ulink << image
-# media << ulink
+ ulink = Element.new("ulink")
+ ulink.add_attributes({"url" => altPath ? altPath : path })
+ @resource_path_elements[ulink] = "url"
+ ulink << media
- media << Reports::XMLReportUtil.text_element("caption", caption) if caption
- figure << media
+ figure << ulink
+ figure
+ end
+
+ # adds a new image to a REXML:Element, returns the figure as element
+ #
+ # example: <tt>add_imagefigure( section2, "Nice graph", "/images/graph1.svg", "SVG", "This graph shows..." )</tt>
+ #
+ # call-seq:
+ # add_imagefigure( element, title, path, filetype, caption = nil ) => REXML::Element
+ #
+ def add_imagefigure( element, title, path, filetype, size_pct=100, altPath = nil )
+ figure = imagefigure( title, path, filetype, size_pct, altPath)
element << figure
- return figure
+ return figure
end
- def add_image( element, url )
+ # bit of a hack to algin the last two figures that have been added to element into one row
+ def align_last_two_images( element, title )
+ imgs = []
+ element.elements.each do |e|
+ imgs[0] = imgs[1]
+ imgs[1] = e if e.name=="figure"
+ end
+ if (imgs[0] and imgs[1])
+ element.delete_element imgs[0]
+ element.delete_element imgs[1]
+ add_imagefigures_in_row( element, imgs, title )
+ end
+ end
+
+ def add_imagefigures_in_row( element, imagefigures, title )
+ params = {"frame" => "none", "colsep" => 0, "rowsep" => 0 }
+ table = Reports::XMLReportUtil.attribute_element("table",params)
+ table << Reports::XMLReportUtil.text_element("title", title)
+ tgroup = Reports::XMLReportUtil.attribute_element("tgroup",{"cols" => 2})
+ tbody = Element.new("tbody")
+ row = Element.new("row")
+ imagefigures.each do |f|
+ entry = Element.new("entry")
+ entry << f
+ row << entry
+ end
+ tbody << row
+ tgroup << tbody
+ table << tgroup
+ element << table
+ table
+ end
+
+ def add_image( element, url ) #, scale=false )
image = Element.new("imageobject")
- imagedata = Reports::XMLReportUtil.attribute_element("imagedata",
- {"fileref" => url, "format"=>"PNG", "contentwidth" => "2in" }) #PENDING: do not hardcode size
+ params = {"fileref" => url, "format"=>"PNG"}
+ #params["contentwidth"] = "2in"
+ imagedata = Reports::XMLReportUtil.attribute_element("imagedata",params)
image << imagedata
element << image
return image
end
-
# adds a table to a REXML:Element, _table_values_ should be a multi-dimensional-array, returns the table as element
#
# call-seq:
@@ -144,7 +183,7 @@ module Reports
#
def add_table( element, title, table_values, first_row_header=true, first_col_header=false, transpose=false, auto_link_urls=true )
- raise "table_values is not mulit-dimensional-array" unless table_values && table_values.is_a?(Array) && table_values[0].is_a?(Array)
+ raise "table_values is not multi-dimensional-array" unless table_values && table_values.is_a?(Array) && table_values[0].is_a?(Array)
values = transpose ? table_values.transpose : table_values
@@ -184,12 +223,20 @@ module Reports
row = Element.new("row")
r.each do |v|
entry = Element.new("entry")
- if auto_link_urls && v.to_s =~ /depict/ || v.to_s =~ /image\/png$/ #PENDING
+ if auto_link_urls && v.to_s =~ /depict/ || v.to_s =~ /png$/ #PENDING
add_image(entry, v.to_s)
elsif auto_link_urls && v.to_s =~ /^http(s?):\/\//
- add_url(entry, v.to_s, v.to_s)
- else
- entry.text = v.to_s
+ #add_url(entry, v.to_s, v.to_s)
+ v.to_s.split(" ").each do |vv|
+ add_url(entry, vv.to_s, vv.to_s)
+ space = Element.new("para")
+ space.text = " "
+ entry << space
+ end
+ else
+ text = v.to_s
+ text.gsub!(/\+\-/,"&plusmn;")
+ entry << Text.new(text, true, nil, true)
end
row << entry
end
@@ -221,11 +268,15 @@ module Reports
return list
end
- def add_url (element, url, description=url )
-
+ def url_element( url, description=url )
ulink = Element.new("ulink")
ulink.add_attributes({"url" => url})
ulink.text = description
+ ulink
+ end
+
+ def add_url (element, url, description=url )
+ ulink = url_element(url, description)
element << ulink
return ulink
end