summaryrefslogtreecommitdiff
path: root/report
diff options
context:
space:
mode:
authorMartin Gütlein <martin.guetlein@gmail.com>2010-03-02 14:11:57 +0100
committerMartin Gütlein <martin.guetlein@gmail.com>2010-03-02 14:11:57 +0100
commitb7efeaaf79233de8bbc173fa426e4561c458d44f (patch)
tree9326f1d711a0536f069266a4613e0e0c65e26b62 /report
parente2b814301c323bc787ad9d75eceb786e3cb7dde9 (diff)
cv statistics, ie accept header hack, alg comp report
Diffstat (limited to 'report')
-rw-r--r--report/external/mimeparse.rb4
-rw-r--r--report/plot_factory.rb21
-rw-r--r--report/prediction_util.rb7
-rw-r--r--report/report_application.rb11
-rw-r--r--report/report_factory.rb83
-rw-r--r--report/report_test.rb241
-rw-r--r--report/validation_access.rb4
-rw-r--r--report/validation_data.rb42
8 files changed, 262 insertions, 151 deletions
diff --git a/report/external/mimeparse.rb b/report/external/mimeparse.rb
index f572c64..553c431 100644
--- a/report/external/mimeparse.rb
+++ b/report/external/mimeparse.rb
@@ -214,3 +214,7 @@ if __FILE__ == $0
end
end
end
+
+
+#puts MIMEParse::best_match(["text/xml","text/html","application/pdf"],
+# 'application/x-ms-application,image/jpeg, application/xaml+xml, image/gif, image/pjpeg, application/x-ms-xbap, */*')
diff --git a/report/plot_factory.rb b/report/plot_factory.rb
index d2884e3..c1a731f 100644
--- a/report/plot_factory.rb
+++ b/report/plot_factory.rb
@@ -34,8 +34,8 @@ module Reports
fp_rates = []
tp_rates = []
attribute_values.each do |value|
- names << value
data = transform_predictions(validation_set.filter({split_set_attribute => value}), class_value, false)
+ names << value
fp_rates << data[:fp_rate][0]
tp_rates << data[:tp_rate][0]
end
@@ -53,14 +53,21 @@ module Reports
data = []
validation_set.validations.each do |v|
values = []
- value_attributes.collect do |a|
+ value_attributes.each do |a|
value = v.send(a)
if value.is_a?(Hash)
- raise "bar plot value is hash, but no entry for class-value ("+class_value.to_s+")" unless value.key?(class_value)
- value = value[class_value]
+ if class_value==nil
+ avg_value = 0
+ value.values.each{ |val| avg_value+=val }
+ value = avg_value/value.values.size.to_f
+ else
+ raise "bar plot value is hash, but no entry for class-value ("+class_value.to_s+"); value for "+a.to_s+" -> "+value.inspect unless value.key?(class_value)
+ value = value[class_value]
+ end
end
values.push(value)
end
+
data << [v.send(title_attribute).to_s] + values
end
@@ -74,10 +81,10 @@ module Reports
end
- def self.create_ranking_plot( svg_out_file, validation_set, compare_attribute, equal_attribute, rank_attribute )
+ def self.create_ranking_plot( svg_out_file, validation_set, compare_attribute, equal_attribute, rank_attribute, class_value=nil )
#compute ranks
- rank_set = validation_set.compute_ranking([equal_attribute],rank_attribute)
+ rank_set = validation_set.compute_ranking([equal_attribute],rank_attribute,class_value)
#puts rank_set.to_array([:algorithm_uri, :dataset_uri, :acc, :acc_ranking]).collect{|a| a.inspect}.join("\n")
#compute avg ranks
@@ -85,7 +92,7 @@ module Reports
#puts merge_set.to_array([:algorithm_uri, :dataset_uri, :acc, :acc_ranking]).collect{|a| a.inspect}.join("\n")
comparables = merge_set.get_values(compare_attribute)
- ranks = merge_set.get_values((rank_attribute.to_s+"_ranking").to_sym)
+ ranks = merge_set.get_values((rank_attribute.to_s+"_ranking").to_sym,false)
plot_ranking( rank_attribute.to_s+" ranking",
comparables,
diff --git a/report/prediction_util.rb b/report/prediction_util.rb
index f35d73a..fbe7531 100644
--- a/report/prediction_util.rb
+++ b/report/prediction_util.rb
@@ -14,6 +14,7 @@ module Reports::PredictionUtil
res = []
+
validation_set.validations.each do |v|
(0..v.get_predictions.num_instances-1).each do |i|
a = []
@@ -22,15 +23,15 @@ module Reports::PredictionUtil
a.push(v.get_predictions.actual_value(i).to_nice_s)
a.push(v.get_predictions.predicted_value(i).to_nice_s)
a.push(v.get_predictions.classification_miss?(i)?"X":"") if v.get_predictions.classification?
- a.push(v.get_predictions.confidence_value(i).to_nice_s)
+ a.push(v.get_predictions.confidence_value(i).to_nice_s) if v.get_predictions.confidence_values_available?
res.push(a)
end
end
#res = res.sort{|x,y| y[3] <=> x[3] }
header = [ "compound", "actual value", "predicted value"]
- header.push "missclassified" if validation_set.validations[0].get_predictions.classification?
- header.push "confidence value"
+ header.push "missclassified" if validation_set.first.get_predictions.classification?
+ header.push "confidence value" if validation_set.first.get_predictions.confidence_values_available?
res.insert(0, validation_attributes + header)
#puts res.collect{|c| c.inspect}.join("\n")
diff --git a/report/report_application.rb b/report/report_application.rb
index c68df11..4346f7f 100644
--- a/report/report_application.rb
+++ b/report/report_application.rb
@@ -31,9 +31,16 @@ end
get '/report/:type/:id' do
perform do |rs|
+
+ accept_header = request.env['HTTP_ACCEPT']
+ if request.env['HTTP_USER_AGENT'] =~ /MSIE/
+ LOGGER.info "Changing MSIE accept-header to text/html"
+ accept_header = "text/html"
+ end
#request.env['HTTP_ACCEPT'] = "application/pdf"
- content_type Reports::ReportFormat.get_format(request.env['HTTP_ACCEPT'])
- result = body(File.new( rs.get_report(params[:type],params[:id],request.env['HTTP_ACCEPT']) ))
+
+ content_type Reports::ReportFormat.get_format(accept_header)
+ result = body(File.new( rs.get_report(params[:type],params[:id],accept_header) ))
end
end
diff --git a/report/report_factory.rb b/report/report_factory.rb
index e577d70..a522901 100644
--- a/report/report_factory.rb
+++ b/report/report_factory.rb
@@ -5,6 +5,7 @@ VAL_ATTR_TRAIN_TEST = [ :model_uri, :training_dataset_uri, :test_dataset_uri, :p
VAL_ATTR_CV = [ :algorithm_uri, :dataset_uri, :num_folds, :crossvalidation_fold ]
# selected attributes of interest when performing classification
VAL_ATTR_CLASS = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate ]
+VAL_ATTR_BAR_PLOT_CLASS = [ :area_under_roc, :accuracy, :true_positive_rate, :true_negative_rate ]
VAL_ATTR_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square ]
@@ -81,7 +82,7 @@ module Reports::ReportFactory
#puts merged.get_values(:percent_correct_variance, false).inspect
report = Reports::ReportContent.new("Crossvalidation report")
- if (validation_set.validations[0].percent_correct!=nil) #classification
+ if (validation_set.first.classification?)
report.add_section_result(merged, VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold],"Mean Results","Mean Results")
report.add_section_roc_plot(validation_set, nil, nil, "roc-plot.svg")
@@ -106,13 +107,52 @@ module Reports::ReportFactory
raise Reports::BadRequest.new("num validations is not >1") unless validation_set.size>1
raise Reports::BadRequest.new("validations must be either all regression, "+
+"or all classification validations") unless validation_set.all_classification? or validation_set.all_regression?
+ raise Reports::BadRequest.new("number of different algorithms <2") if validation_set.num_different_values(:algorithm_uri)<2
if validation_set.has_nil_values?(:crossvalidation_id)
- raise Reports::BadRequest.new("so far, algorithm comparison is only supported for crossvalidation results")
+ if validation_set.num_different_values(:test_dataset_uri)>1
+
+ # groups results into sets with equal test and training dataset
+ dataset_grouping = Reports::Util.group(validation_set.validations, [:test_dataset_uri, :training_dataset_uri])
+ # check if the same algorithms exists for each test and training dataset
+ Reports::Util.check_group_matching(dataset_grouping, [:algorithm_uri])
+
+ #merged = validation_set.merge([:algorithm_uri, :dataset_uri])
+ report = Reports::ReportContent.new("Algorithm comparison report Many datasets")
+
+ if (validation_set.first.classification?)
+ report.add_section_result(validation_set,[:algorithm_uri, :test_dataset_uri]+VAL_ATTR_CLASS,"Mean Results","Mean Results")
+ report.add_section_ranking_plots(validation_set, :algorithm_uri, :test_dataset_uri,
+ [:accuracy, :true_positive_rate, :true_negative_rate], "true")
+ else # regression
+ raise Reports::BadRequest.new("not implemented yet for regression")
+ end
+ return report
+ else
+ # this groups all validations in x different groups (arrays) according to there algorithm-uri
+ algorithm_grouping = Reports::Util.group(validation_set.validations, [:algorithm_uri])
+ # we check if there are corresponding validations in each group that have equal attributes (folds, num-folds,..)
+ Reports::Util.check_group_matching(algorithm_grouping, [:training_dataset_uri, :test_dataset_uri, :prediction_feature])
+
+ report = Reports::ReportContent.new("Algorithm comparison report")
+
+ if (validation_set.first.classification?)
+ report.add_section_bar_plot(validation_set,nil,:algorithm_uri,VAL_ATTR_BAR_PLOT_CLASS, "bar-plot.svg")
+ report.add_section_roc_plot(validation_set,nil, :algorithm_uri, "roc-plot.svg")
+ #validation_set.validations[0].get_prediction_feature_values.each do |class_value|
+ #report.add_section_bar_plot(validation_set,class_value,:algorithm_uri,VAL_ATTR_CLASS, "bar-plot-"+class_value+".svg")
+ #report.add_section_roc_plot(validation_set, class_value, :algorithm_uri, "roc-plot-"+class_value+".svg")
+ #end
+ report.add_section_result(validation_set,[:algorithm_uri]+VAL_ATTR_CLASS,"Results","Results")
+ else #regression
+ #report.add_section_result(merged, VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],"Mean Results","Mean Results")
+ #report.add_section_result(validation_set, VAL_ATTR_CV+VAL_ATTR_REGR-[:num_folds], "Results","Results")
+ end
+ return report
+ end
else
raise Reports::BadRequest.new("num different cross-validation-ids <2") if validation_set.num_different_values(:crossvalidation_id)<2
validation_set.load_cv_attributes
- raise Reports::BadRequest.new("number of different algorithms <2") if validation_set.num_different_values(:algorithm_uri)<2
if validation_set.num_different_values(:dataset_uri)>1
# groups results into sets with equal dataset
@@ -128,9 +168,9 @@ module Reports::ReportFactory
merged = validation_set.merge([:algorithm_uri, :dataset_uri])
report = Reports::ReportContent.new("Algorithm comparison report - Many datasets")
- if (validation_set.validations[0].percent_correct!=nil) #classification
+ if (validation_set.first.classification?)
report.add_section_result(merged,VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold],"Mean Results","Mean Results")
- report.add_section_ranking_plots(merged, :algorithm_uri, :dataset_uri, [:acc, :auc, :sens, :spec])
+ report.add_section_ranking_plots(merged, :algorithm_uri, :dataset_uri, [:acc, :auc, :sens, :spec], "true")
else # regression
report.add_section_result(merged,VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],"Mean Results","Mean Results")
end
@@ -145,7 +185,7 @@ module Reports::ReportFactory
report = Reports::ReportContent.new("Algorithm comparison report")
- if (validation_set.validations[0].percent_correct!=nil) #classification
+ if (validation_set.first.classification?)
validation_set.validations[0].get_prediction_feature_values.each do |class_value|
report.add_section_bar_plot(merged,class_value,:algorithm_uri,VAL_ATTR_CLASS, "bar-plot-"+class_value+".svg")
report.add_section_roc_plot(validation_set, class_value, :algorithm_uri, "roc-plot-"+class_value+".svg")
@@ -204,7 +244,7 @@ class Reports::ReportContent
vals = vals.collect{|a| a.collect{|v| v.to_s[0,66] }}
#PENDING transpose values if there more than 4 columns, and there are more than columns than rows
transpose = vals[0].size>4 && vals[0].size>vals.size
- @xml_report.add_table(section_table, table_title, vals, !transpose, transpose)
+ @xml_report.add_table(section_table, table_title, vals, !transpose, transpose)
end
def add_section_confusion_matrix( validation,
@@ -235,12 +275,16 @@ class Reports::ReportContent
end
section_roc = @xml_report.add_section(@xml_report.get_root_element, section_title)
- if validation_set.first.get_predictions
+
+ prediction_set = validation_set.collect{ |v| v.get_predictions && v.get_predictions.confidence_values_available? }
+
+ if prediction_set.size>0
+
+ section_text += "\nWARNING: roc plot information not available for all validation results" if prediction_set.size!=validation_set.size
@xml_report.add_paragraph(section_roc, section_text) if section_text
-
begin
plot_file_path = add_tmp_file(plot_file_name)
- Reports::PlotFactory.create_roc_plot( plot_file_path, validation_set, class_value, split_set_attribute, validation_set.size>1 )
+ Reports::PlotFactory.create_roc_plot( plot_file_path, prediction_set, class_value, split_set_attribute, prediction_set.size>1 )
@xml_report.add_imagefigure(section_roc, image_title, plot_file_name, "SVG", image_caption)
rescue RuntimeError => ex
LOGGER.error("could not create roc plot: "+ex.message)
@@ -248,7 +292,7 @@ class Reports::ReportContent
@xml_report.add_paragraph(section_roc, "could not create roc plot: "+ex.message)
end
else
- @xml_report.add_paragraph(section_roc, "No prediction info for roc plot available.")
+ @xml_report.add_paragraph(section_roc, "No prediction-confidence info for roc plot available.")
end
end
@@ -257,13 +301,14 @@ class Reports::ReportContent
compare_attribute,
equal_attribute,
rank_attributes,
+ class_value,
section_title="Ranking Plots",
section_text="This section contains the ranking plots.")
section_rank = @xml_report.add_section(@xml_report.get_root_element, section_title)
@xml_report.add_paragraph(section_rank, section_text) if section_text
- rank_attributes.each{|a| add_ranking_plot(section_rank, validation_set, compare_attribute, equal_attribute, a, a.to_s+"-ranking.svg")}
+ rank_attributes.each{|a| add_ranking_plot(section_rank, validation_set, compare_attribute, equal_attribute, a, class_value, a.to_s+"-ranking.svg")}
end
def add_ranking_plot( report_section,
@@ -271,12 +316,13 @@ class Reports::ReportContent
compare_attribute,
equal_attribute,
rank_attribute,
+ class_value=nil,
plot_file_name="ranking.svg",
image_title="Ranking Plot",
image_caption=nil)
plot_file_path = add_tmp_file(plot_file_name)
- Reports::PlotFactory::create_ranking_plot(plot_file_path, validation_set, compare_attribute, equal_attribute, rank_attribute)
+ Reports::PlotFactory::create_ranking_plot(plot_file_path, validation_set, compare_attribute, equal_attribute, rank_attribute, class_value)
@xml_report.add_imagefigure(report_section, image_title, plot_file_name, "SVG", image_caption)
end
@@ -287,11 +333,16 @@ class Reports::ReportContent
value_attributes,
plot_file_name="bar-plot.svg",
section_title="Bar Plot",
- section_text="This section contains the bar plot.",
+ section_text=nil,
image_title=nil,
image_caption=nil)
- image_title = "Bar Plot for class-value '"+class_value+"'" unless image_title
-
+ if class_value
+ section_text = "This section contains the bar plot for class '"+class_value+"'." unless section_text
+ image_title = "Bar Plot for class-value '"+class_value+"'" unless image_title
+ else
+ section_text = "This section contains the bar plot." unless section_text
+ image_title = "Bar Plot for all classes" unless image_title
+ end
section_bar = @xml_report.add_section(@xml_report.get_root_element, section_title)
@xml_report.add_paragraph(section_bar, section_text) if section_text
diff --git a/report/report_test.rb b/report/report_test.rb
index 3e0d093..ed6b377 100644
--- a/report/report_test.rb
+++ b/report/report_test.rb
@@ -8,12 +8,19 @@ require 'rack/test'
require "lib/test_util.rb"
-#class Reports::ApplicationTest < Test::Unit::TestCase
-# include Rack::Test::Methods
-#
-# def app
-# Sinatra::Application
-# end
+class Reports::ApplicationTest < Test::Unit::TestCase
+ include Rack::Test::Methods
+
+ def app
+ Sinatra::Application
+ end
+
+ def test_nothing
+
+ get '/report/validation/1'
+
+ puts last_response.body
+ end
#
# def test_webservice
#
@@ -68,122 +75,122 @@ require "lib/test_util.rb"
# end
# end
#
-#end
-
-
-
-class Reports::ReportServiceTest < Test::Unit::TestCase
- include Lib::TestUtil
+end
- WS_VAL = @@config[:services]["opentox-validation"]
- WS_DATA=@@config[:services]["opentox-dataset"]
- FILE=File.new("data/hamster_carcinogenicity.owl","r")
-
- WS_CLASS_ALG=File.join(@@config[:services]["opentox-algorithm"],"lazar")
- WS_FEATURE_ALG=File.join(@@config[:services]["opentox-algorithm"],"fminer")
-
- #WS_CLASS_ALG_2="localhost:4008/algorithm"
- #WS_FEATURE_ALG_2=nil
- def test_service_ot_webservice
- begin
-
- rep = Reports::ReportService.new("http://some.location")
- types = rep.get_report_types
- assert types.is_a?(String)
- assert types.split("\n").size == Reports::ReportFactory::REPORT_TYPES.size
- #Reports::ReportFactory::REPORT_TYPES.each{|t| rep.get_all_reports(t)}
- #assert_raise(Reports::NotFound){rep.get_all_reports("osterhase")}
-
- ### using ot_mock_layer (reporting component does not rely on ot validation webservice)
-
- #ENV['REPORT_VALIDATION_ACCESS'] = "mock_layer"
- #Reports::Validation.reset_validation_access
-
-# create_report(rep, "validation_uri_1", "validation")
-# assert_raise(Reports::BadRequest){create_report(rep, ["validation_uri_1","validation_uri_2"], "validation")}
+#class Reports::ReportServiceTest < Test::Unit::TestCase
+# include Lib::TestUtil
+#
+# WS_VAL = @@config[:services]["opentox-validation"]
+# WS_DATA=@@config[:services]["opentox-dataset"]
+# FILE=File.new("data/hamster_carcinogenicity.owl","r")
+#
+# WS_CLASS_ALG=File.join(@@config[:services]["opentox-algorithm"],"lazar")
+# WS_FEATURE_ALG=File.join(@@config[:services]["opentox-algorithm"],"fminer")
+#
+# #WS_CLASS_ALG_2="localhost:4008/algorithm"
+# #WS_FEATURE_ALG_2=nil
+#
+# def test_service_ot_webservice
+#
+# begin
#
-# create_report(rep, "crossvalidation_uri_1", "crossvalidation")
-# create_report(rep, ["validation_uri_1"]*Reports::OTMockLayer::NUM_FOLDS, "crossvalidation")
-# assert_raise(Reports::BadRequest){create_report(rep, ["validation_uri_1"]*(Reports::OTMockLayer::NUM_FOLDS-1), "crossvalidation")}
+# rep = Reports::ReportService.new("http://some.location")
+# types = rep.get_report_types
+# assert types.is_a?(String)
+# assert types.split("\n").size == Reports::ReportFactory::REPORT_TYPES.size
+# #Reports::ReportFactory::REPORT_TYPES.each{|t| rep.get_all_reports(t)}
+# #assert_raise(Reports::NotFound){rep.get_all_reports("osterhase")}
#
-# create_report(rep, ["crossvalidation_uri_1"]* (Reports::OTMockLayer::NUM_DATASETS * Reports::OTMockLayer::NUM_ALGS), "algorithm_comparison")
-# create_report(rep, ["validation_uri_1"]* (Reports::OTMockLayer::NUM_DATASETS * Reports::OTMockLayer::NUM_ALGS * Reports::OTMockLayer::NUM_FOLDS), "algorithm_comparison")
-
- ### using ot webservices (instead of mock layer)
-
- #ENV['REPORT_VALIDATION_ACCESS'] = nil
- #Reports::Validation.reset_validation_access
-
- #data_uri = upload_data WS_DATA, FILE
- #data_uri= File.join(WS_DATA,"1")
-
-# #val_uri = create_single_validation(data_uri)
-# #val_uri = create_single_validation(data_uri, WS_CLASS_ALG_2, WS_FEATURE_ALG_2)
-# val_uri = File.join(WS_VAL,"15")
-## #add_resource val_uri
-# create_report(rep, val_uri, "validation")
-
- #val_uri = create_cross_validation(data_uri, WS_CLASS_ALG_2, WS_FEATURE_ALG_2)
- #val_uri = create_cross_validation(data_uri)
- val_uri = File.join(WS_VAL,"crossvalidation/1")
- #val_uri2 = "http://localhost:4007/crossvalidation/14"
-# # add_resource val_uri
- create_report(rep, val_uri, "crossvalidation")
-
-# #val_uri2 = create_cross_validation(data_uri, WS_CLASS_ALG_2, WS_FEATURE_ALG_2)
-# #val_uri = ["http://localhost:4007/crossvalidation/6", "http://localhost:4007/crossvalidation/8"]
- #val_uri = ["http://localhost:4007/crossvalidation/7", "http://localhost:4007/crossvalidation/8"]
-# #add_resource val_uri
- #create_report(rep, val_uri, "algorithm_comparison")
-
- ensure
- # delete_resources
- end
- end
-
- private
- def create_single_validation(data_uri, ws_class_alg=WS_CLASS_ALG, ws_feat_alg=WS_FEATURE_ALG)
- puts "validating"
- val_params = {
- :dataset_uri => data_uri,
- :algorithm_uri => ws_class_alg,
- :split_ratio=>0.7,
- :prediction_feature => "classification",}
- val_params[:feature_generation_uri] = ws_feat_alg if ws_feat_alg
- begin
- RestClient.post WS_VAL+"/validation/training_test_split", val_params
- rescue => ex
- raise "error validating "+WS_VAL+"/validation/training_test_split\n "+val_params.inspect+" \n -> "+ex.message
- end
- end
-
- def create_cross_validation(data_uri, ws_class_alg=WS_CLASS_ALG, ws_feat_alg=WS_FEATURE_ALG)
- puts "cross-validating"
- ext("curl -X POST -d num_folds=3 -d dataset_uri="+data_uri+" -d algorithm_uri="+ws_class_alg+" -d prediction_feature=classification"+
- (ws_feat_alg ? " -d feature_generation_uri="+ws_feat_alg : "")+
- " "+WS_VAL+"/crossvalidation",nil)
- end
-
- def create_report(report_service, val_uri, type)
-
- Reports.reset_ot_access if ENV['USE_OT_MOCK_LAYER']
- report_uri = report_service.create_report(type, val_uri)
- assert type == report_service.parse_type(report_uri)
- id = report_service.parse_id(report_uri)
-
- #puts "created report with id "+id.to_s
-
- #assert_raise(Reports::BadRequest){report_service.get_report(type, id, "weihnachtsmann")}
-
- report_service.get_report(type, id, "text/html")
- #report_service.get_report(type, id, "application/pdf")
- #assert_raise(Reports::NotFound){report_service.delete_report(type, 877658)}
-
-# rep.delete_report(type, id)
- end
-end
+# ### using ot_mock_layer (reporting component does not rely on ot validation webservice)
+#
+# #ENV['REPORT_VALIDATION_ACCESS'] = "mock_layer"
+# #Reports::Validation.reset_validation_access
+#
+## create_report(rep, "validation_uri_1", "validation")
+## assert_raise(Reports::BadRequest){create_report(rep, ["validation_uri_1","validation_uri_2"], "validation")}
+##
+## create_report(rep, "crossvalidation_uri_1", "crossvalidation")
+## create_report(rep, ["validation_uri_1"]*Reports::OTMockLayer::NUM_FOLDS, "crossvalidation")
+## assert_raise(Reports::BadRequest){create_report(rep, ["validation_uri_1"]*(Reports::OTMockLayer::NUM_FOLDS-1), "crossvalidation")}
+##
+## create_report(rep, ["crossvalidation_uri_1"]* (Reports::OTMockLayer::NUM_DATASETS * Reports::OTMockLayer::NUM_ALGS), "algorithm_comparison")
+## create_report(rep, ["validation_uri_1"]* (Reports::OTMockLayer::NUM_DATASETS * Reports::OTMockLayer::NUM_ALGS * Reports::OTMockLayer::NUM_FOLDS), "algorithm_comparison")
+#
+# ### using ot webservices (instead of mock layer)
+#
+# #ENV['REPORT_VALIDATION_ACCESS'] = nil
+# #Reports::Validation.reset_validation_access
+#
+# #data_uri = upload_data WS_DATA, FILE
+# #data_uri= File.join(WS_DATA,"1")
+#
+## #val_uri = create_single_validation(data_uri)
+## #val_uri = create_single_validation(data_uri, WS_CLASS_ALG_2, WS_FEATURE_ALG_2)
+## val_uri = File.join(WS_VAL,"15")
+### #add_resource val_uri
+## create_report(rep, val_uri, "validation")
+#
+# #val_uri = create_cross_validation(data_uri, WS_CLASS_ALG_2, WS_FEATURE_ALG_2)
+# #val_uri = create_cross_validation(data_uri)
+# val_uri = File.join(WS_VAL,"crossvalidation/1")
+# #val_uri2 = "http://localhost:4007/crossvalidation/14"
+## # add_resource val_uri
+# create_report(rep, val_uri, "crossvalidation")
+#
+## #val_uri2 = create_cross_validation(data_uri, WS_CLASS_ALG_2, WS_FEATURE_ALG_2)
+## #val_uri = ["http://localhost:4007/crossvalidation/6", "http://localhost:4007/crossvalidation/8"]
+# #val_uri = ["http://localhost:4007/crossvalidation/7", "http://localhost:4007/crossvalidation/8"]
+## #add_resource val_uri
+# #create_report(rep, val_uri, "algorithm_comparison")
+#
+# ensure
+# # delete_resources
+# end
+# end
+#
+# private
+# def create_single_validation(data_uri, ws_class_alg=WS_CLASS_ALG, ws_feat_alg=WS_FEATURE_ALG)
+# puts "validating"
+# val_params = {
+# :dataset_uri => data_uri,
+# :algorithm_uri => ws_class_alg,
+# :split_ratio=>0.7,
+# :prediction_feature => "classification",}
+# val_params[:feature_generation_uri] = ws_feat_alg if ws_feat_alg
+# begin
+# RestClient.post WS_VAL+"/validation/training_test_split", val_params
+# rescue => ex
+# raise "error validating "+WS_VAL+"/validation/training_test_split\n "+val_params.inspect+" \n -> "+ex.message
+# end
+# end
+#
+# def create_cross_validation(data_uri, ws_class_alg=WS_CLASS_ALG, ws_feat_alg=WS_FEATURE_ALG)
+# puts "cross-validating"
+# ext("curl -X POST -d num_folds=3 -d dataset_uri="+data_uri+" -d algorithm_uri="+ws_class_alg+" -d prediction_feature=classification"+
+# (ws_feat_alg ? " -d feature_generation_uri="+ws_feat_alg : "")+
+# " "+WS_VAL+"/crossvalidation",nil)
+# end
+#
+# def create_report(report_service, val_uri, type)
+#
+# Reports.reset_ot_access if ENV['USE_OT_MOCK_LAYER']
+# report_uri = report_service.create_report(type, val_uri)
+# assert type == report_service.parse_type(report_uri)
+# id = report_service.parse_id(report_uri)
+#
+# #puts "created report with id "+id.to_s
+#
+# #assert_raise(Reports::BadRequest){report_service.get_report(type, id, "weihnachtsmann")}
+#
+# report_service.get_report(type, id, "text/html")
+# #report_service.get_report(type, id, "application/pdf")
+# #assert_raise(Reports::NotFound){report_service.delete_report(type, 877658)}
+#
+## rep.delete_report(type, id)
+# end
+#end
diff --git a/report/validation_access.rb b/report/validation_access.rb
index 2a5ce51..e06c1f0 100644
--- a/report/validation_access.rb
+++ b/report/validation_access.rb
@@ -73,7 +73,7 @@ class Reports::ValidationDB < Reports::ValidationAccess
validation.send("#{p.to_s}=".to_sym, v[p])
end
- {:classification_statistics => Lib::VAL_CLASS_PROPS,
+ {:classification_statistics => Lib::VAL_CLASS_PROPS_EXTENDED,
:regression_statistics => Lib::VAL_REGR_PROPS}.each do |subset_name,subset_props|
subset = v[subset_name]
subset_props.each{ |prop| validation.send("#{prop.to_s}=".to_sym, subset[prop]) } if subset
@@ -148,7 +148,7 @@ class Reports::ValidationWebservice < Reports::ValidationAccess
#validation.prediction_feature = model.get_prediction_feature
{Lib::VAL_CV_PROP => Lib::VAL_CV_PROPS,
- Lib::VAL_CLASS_PROP => Lib::VAL_CLASS_PROPS}.each do |subset_name,subset_props|
+ Lib::VAL_CLASS_PROP => Lib::VAL_CLASS_PROPS_EXTENDED}.each do |subset_name,subset_props|
subset = data[subset_name]
subset_props.each{ |prop| validation.send("#{prop}=".to_sym, subset[prop]) } if subset
end
diff --git a/report/validation_data.rb b/report/validation_data.rb
index 13a5175..a2b8905 100644
--- a/report/validation_data.rb
+++ b/report/validation_data.rb
@@ -1,7 +1,7 @@
# the variance is computed when merging results for these attributes
-VAL_ATTR_VARIANCE = [ :area_under_roc, :percent_correct, :root_mean_squared_error, :mean_absolute_error, :r_square ]
-VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate ]
+VAL_ATTR_VARIANCE = [ :area_under_roc, :percent_correct, :root_mean_squared_error, :mean_absolute_error, :r_square, :accuracy ]
+VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate, :accuracy ]
class Object
@@ -25,6 +25,19 @@ class Object
end
end
+class Hash
+
+ def mean_value
+ sum = 0
+ self.values.collect do |v|
+ raise "cannot compute mean of non-numeric values '"+self.inspect+"'" unless v!=nil and v.is_a?(Numeric)
+ sum+=v
+ end
+ sum/=self.values.size.to_f
+ end
+
+end
+
module Reports
@@ -261,6 +274,18 @@ module Reports
return new_set
end
+ # returns a new set with all validation that the attached block accepted
+ # e.g. create set with predictions: collect{ |validation| validation.get_predictions!=null }
+ #
+ # call-seq:
+ # filter_proc(proc) => Reports::ValidationSet
+ #
+ def collect
+ new_set = Reports::ValidationSet.new
+ validations.each{ |v| new_set.validations.push(v) if yield(v) }
+ return new_set
+ end
+
# returns an array, with values for __attributes__, that can be use for a table
# * first row is header row
# * other rows are values
@@ -321,7 +346,7 @@ module Reports
# call-seq:
# compute_ranking(equal_attributes, ranking_attribute) => array
#
- def compute_ranking(equal_attributes, ranking_attribute)
+ def compute_ranking(equal_attributes, ranking_attribute, class_value=nil )
new_set = Reports::ValidationSet.new
(0..@validations.size-1).each do |i|
@@ -334,7 +359,16 @@ module Reports
# put indices and ranking values for current group into hash
rank_hash = {}
(0..group.size-1).each do |i|
- rank_hash[i] = group[i].send(ranking_attribute)
+ val = group[i].send(ranking_attribute)
+ if val.is_a?(Hash)
+ if class_value != nil
+ raise "no value for class value "+class_value.class.to_s+" "+class_value.to_s+" in hash "+val.inspect.to_s unless val.has_key?(class_value)
+ val = val[class_value]
+ else
+ val = val.mean_value
+ end
+ end
+ rank_hash[i] = val
end
# sort group accrording to second value (= ranking value)