diff options
-rw-r--r-- | lib/merge.rb | 141 | ||||
-rw-r--r-- | lib/ot_predictions.rb | 2 | ||||
-rw-r--r-- | lib/predictions.rb | 22 | ||||
-rw-r--r-- | lib/rdf_provider.rb | 2 | ||||
-rw-r--r-- | lib/validation_db.rb | 7 | ||||
-rw-r--r-- | report/external/mimeparse.rb | 4 | ||||
-rw-r--r-- | report/plot_factory.rb | 21 | ||||
-rw-r--r-- | report/prediction_util.rb | 7 | ||||
-rw-r--r-- | report/report_application.rb | 11 | ||||
-rw-r--r-- | report/report_factory.rb | 83 | ||||
-rw-r--r-- | report/report_test.rb | 241 | ||||
-rw-r--r-- | report/validation_access.rb | 4 | ||||
-rw-r--r-- | report/validation_data.rb | 42 | ||||
-rw-r--r-- | validation/validation_application.rb | 21 | ||||
-rw-r--r-- | validation/validation_service.rb | 1 | ||||
-rw-r--r-- | validation/validation_test.rb | 6 |
16 files changed, 447 insertions, 168 deletions
diff --git a/lib/merge.rb b/lib/merge.rb new file mode 100644 index 0000000..f35198d --- /dev/null +++ b/lib/merge.rb @@ -0,0 +1,141 @@ + +$merge_count = {} + +class Array + def merge_array( merge_attributes, equal_attributes=nil ) + return nil if self.size == nil + return self[0] if self.size==1 + + m = self[0].merge_object(self[1], merge_attributes, equal_attributes) + (2..self.size-1).each do |i| + m = m.merge_object(self[i], merge_attributes, equal_attributes) + end + return m + end +end + +class Object + + def merge_count() + $merge_count[self] = 1 if $merge_count[self]==nil + return $merge_count[self] + end + + def set_merge_count(merge_count) + $merge_count[self] = merge_count + end + + def self.compute_variance( old_variance, n, new_mean, old_mean, new_value ) + # use revursiv formular for computing the variance + # ( see Tysiak, Folgen: explizit und rekursiv, ISSN: 0025-5866 + # http://www.frl.de/tysiakpapers/07_TY_Papers.pdf ) + return (n>1 ? old_variance * (n-2)/(n-1) : 0) + + (new_mean - old_mean)**2 + + (n>1 ? (new_value - new_mean)**2/(n-1) : 0 ) + end + + def self.merge_value( value1, weight1, compute_variance, variance1, value2 ) + + if value1.is_a?(Numeric) and value2.is_a?(Numeric) + value = (value1 * weight1 + value2) / (weight1 + 1).to_f; + if compute_variance + variance = compute_variance( variance1!=nil ? variance1 : 0, weight1+1, value, value1, value2 ) + end + elsif value1.is_a?(Array) and value2.is_a?(Array) + raise "cannot merge arrays with unequal sizes" if !value2.is_a?(Array) || value1.size!=value2.size + value = [] + variance = [] + (0..value1.size-1).each do |i| + m = merge_value( value1[i], weight1, compute_variance, variance1==nil ? nil : variance1[i], value2[i] ) + value[i] = m[:value] + variance[i] = m[:variance] if compute_variance + end + elsif value1.is_a?(Hash) and value2.is_a?(Hash) + value = {} + variance = {} + value1.keys.each do |k| + m = merge_value( value1[k], weight1, compute_variance, variance1==nil ? nil : variance1[k], value2[k] ) + value[k] = m[:value] + variance[k] = m[:variance] if compute_variance + end + else + if value1.to_s != value2.to_s + value = value1.to_s + "/" + value2.to_s + else + value = value2.to_s + end + end + + {:value => value, :variance => (compute_variance ? variance : nil) } + end + + def merge_object( object, merge_attributes, equal_attributes=nil ) + + raise "classes not equal" if object.class != self.class + raise "not supported, successivly add unmerged object to a merge object" if object.merge_count>1 + + new_object = self.class.new + merge_attributes.each do |variable| + next if variable.to_s =~ /_variance$/ + + if (equal_attributes and equal_attributes.index(variable) != nil) + new_object.send("#{variable.to_s}=".to_sym, send(variable)) + else + compute_variance = self.respond_to?( (variable.to_s+"_variance").to_sym ) #VAL_ATTR_VARIANCE.index(a)!=nil + old_variance = compute_variance ? send((variable.to_s+"_variance").to_sym) : nil + m = Object::merge_value( send(variable), self.merge_count, compute_variance, old_variance, object.send(variable) ) + new_object.send("#{variable.to_s}=".to_sym, m[:value]) + new_object.send("#{variable.to_s}_variance=".to_sym, m[:variance]) if compute_variance + end + end + + new_object.set_merge_count self.merge_count+1 + return new_object + end + +end + +class MergeTest + + attr_accessor :string, :integer, :float, :hash_value, :float_variance + + def to_s + res = [:string, :integer, :float, :hash_value].collect do |var| + variance = nil + variance = "+-"+send((var.to_s+"_variance")).inspect if self.respond_to?( (var.to_s+"_variance").to_sym ) + var.to_s+":"+send(var).inspect+variance.to_s + end + res.join(" ") + end + + def self.demo + to_merge = [] + p = MergeTest.new + p.string = "asdf" + p.integer = 39 + p.float = 78.6 + p.hash_value = {:mixed_key=>80, :string_key=>"tu", :int_key=>70} + to_merge << p + + p = MergeTest.new + p.string = "jkl" + p.integer = 25 + p.float = 35.6 + p.hash_value = {:mixed_key=>"bla", :string_key=>"iu", :int_key=>34} + to_merge << p + + p = MergeTest.new + p.string = "qwert" + p.integer = 100 + p.float = 100 + p.hash_value = {:mixed_key=>45, :string_key=>"op", :int_key=>20} + to_merge << p + + puts "merged: "+to_merge.merge_array([:string, :integer, :float, :hash_value]).to_s + end + +end + +#MergeTest.demo + + diff --git a/lib/ot_predictions.rb b/lib/ot_predictions.rb index ee67c2c..3e11f2a 100644 --- a/lib/ot_predictions.rb +++ b/lib/ot_predictions.rb @@ -86,7 +86,7 @@ module Lib res = {} if @is_classification - (Lib::VAL_CLASS_PROPS).each{ |s| res[s] = send(s)} + (Lib::VAL_CLASS_PROPS_EXTENDED).each{ |s| res[s] = send(s)} else (Lib::VAL_REGR_PROPS).each{ |s| res[s] = send(s) } end diff --git a/lib/predictions.rb b/lib/predictions.rb index f673f8c..259a990 100644 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -35,6 +35,12 @@ module Lib raise "illegal num confidence values "+num_info if @confidence_values.size != @predicted_values.size @confidence_values.each{ |c| raise "illegal confidence value: '"+c.to_s+"'" unless c==nil or (c.is_a?(Numeric) and c>=0 and c<=1) } + conf_val_tmp = {} + @confidence_values.each{ |c| conf_val_tmp[c] = nil } + if conf_val_tmp.keys.size<2 + LOGGER.warn("prediction w/o confidence values"); + @confidence_values=nil + end if @is_classification raise "prediction_feature_values missing while performing classification" unless @prediction_feature_values @@ -54,13 +60,13 @@ module Lib init_stats() (0..@predicted_values.size-1).each do |i| - update_stats( @predicted_values[i], @actual_values[i], @confidence_values[i] ) + update_stats( @predicted_values[i], @actual_values[i], (@confidence_values!=nil)?@confidence_values[i]:nil ) end end def init_stats @num_no_actual_value = 0 - @num_with_actual_value = 0 + @num_with_actual_value = 0 @num_predicted = 0 @num_unpredicted = 0 @@ -137,6 +143,10 @@ module Lib return 100 * @num_incorrect / @num_with_actual_value.to_f end + def accuracy + return percent_correct / 100.0 + end + def percent_unpredicted return 0 if @num_with_actual_value==0 return 100 * @num_unpredicted / @num_with_actual_value.to_f @@ -188,6 +198,7 @@ module Lib def area_under_roc(class_index=nil) return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if class_index==nil + return 0.0 if @confidence_values==nil LOGGER.warn("TODO: implement approx computiation of AUC,"+ "so far Wilcoxon-Man-Whitney is used (exponential)") if @predicted_values.size>1000 @@ -212,6 +223,7 @@ module Lib sum += 1 if tp>fp end end + return sum / (tp_conf.size * fp_conf.size).to_f end @@ -378,7 +390,7 @@ module Lib # data for roc-plots ################################################################################### def get_roc_values(class_value) - + raise "no confidence values" if @confidence_values==nil class_index = @prediction_feature_values.index(class_value) raise "class not found "+class_value.to_s if class_index==nil and class_value!=nil @@ -430,6 +442,10 @@ module Lib @is_classification end + def confidence_values_available? + return @confidence_values!=nil + end + ################################################################################################################### private diff --git a/lib/rdf_provider.rb b/lib/rdf_provider.rb index fab0eaf..1715566 100644 --- a/lib/rdf_provider.rb +++ b/lib/rdf_provider.rb @@ -78,7 +78,7 @@ module Lib set_literal( k, v, node) elsif @rdf_provider.object_property?(k) add_object_property( k, v, node) - elsif [ :uri, :id, :finished ].index(k)!=nil + elsif [ :uri, :id ].index(k)!=nil #skip else raise "illegal value k:"+k.to_s+" v:"+v.to_s diff --git a/lib/validation_db.rb b/lib/validation_db.rb index ca4a7e0..01607ce 100644 --- a/lib/validation_db.rb +++ b/lib/validation_db.rb @@ -6,7 +6,7 @@ end module Lib VAL_PROPS = [ :id, :uri, :model_uri, :training_dataset_uri, :prediction_feature, - :test_dataset_uri, :prediction_dataset_uri, :finished, + :test_dataset_uri, :prediction_dataset_uri, :created_at, :real_runtime, # :cpu_runtime, :num_instances, :num_without_class, :percent_without_class, :num_unpredicted, :percent_unpredicted ] @@ -21,13 +21,14 @@ module Lib :num_true_positives, :num_true_negatives, :precision, :recall, :true_negative_rate, :true_positive_rate ] VAL_CLASS_PROPS = VAL_CLASS_PROPS_SINGLE + VAL_CLASS_PROPS_PER_CLASS + [ :confusion_matrix ] + VAL_CLASS_PROPS_EXTENDED = VAL_CLASS_PROPS + [:accuracy] # :regression_statistics VAL_REGR_PROPS = [ :root_mean_squared_error, :mean_absolute_error, :r_square ] CROSS_VAL_PROPS = [:algorithm_uri, :dataset_uri, :num_folds, :stratified, :random_seed] - ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS + VAL_REGR_PROPS + CROSS_VAL_PROPS + ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS_EXTENDED + VAL_REGR_PROPS + CROSS_VAL_PROPS class Validation include DataMapper::Resource @@ -39,7 +40,6 @@ module Lib property :test_dataset_uri, String, :length => 255 property :prediction_dataset_uri, String, :length => 255 property :prediction_feature, String, :length => 255 - property :finished, Boolean, :default => false property :created_at, DateTime property :real_runtime, Float @@ -65,7 +65,6 @@ module Lib property :num_folds, Integer, :default => 10 property :stratified, Boolean, :default => false property :random_seed, Integer, :default => 1 - property :finished, Boolean, :default => false end end diff --git a/report/external/mimeparse.rb b/report/external/mimeparse.rb index f572c64..553c431 100644 --- a/report/external/mimeparse.rb +++ b/report/external/mimeparse.rb @@ -214,3 +214,7 @@ if __FILE__ == $0 end end end + + +#puts MIMEParse::best_match(["text/xml","text/html","application/pdf"], +# 'application/x-ms-application,image/jpeg, application/xaml+xml, image/gif, image/pjpeg, application/x-ms-xbap, */*') diff --git a/report/plot_factory.rb b/report/plot_factory.rb index d2884e3..c1a731f 100644 --- a/report/plot_factory.rb +++ b/report/plot_factory.rb @@ -34,8 +34,8 @@ module Reports fp_rates = [] tp_rates = [] attribute_values.each do |value| - names << value data = transform_predictions(validation_set.filter({split_set_attribute => value}), class_value, false) + names << value fp_rates << data[:fp_rate][0] tp_rates << data[:tp_rate][0] end @@ -53,14 +53,21 @@ module Reports data = [] validation_set.validations.each do |v| values = [] - value_attributes.collect do |a| + value_attributes.each do |a| value = v.send(a) if value.is_a?(Hash) - raise "bar plot value is hash, but no entry for class-value ("+class_value.to_s+")" unless value.key?(class_value) - value = value[class_value] + if class_value==nil + avg_value = 0 + value.values.each{ |val| avg_value+=val } + value = avg_value/value.values.size.to_f + else + raise "bar plot value is hash, but no entry for class-value ("+class_value.to_s+"); value for "+a.to_s+" -> "+value.inspect unless value.key?(class_value) + value = value[class_value] + end end values.push(value) end + data << [v.send(title_attribute).to_s] + values end @@ -74,10 +81,10 @@ module Reports end - def self.create_ranking_plot( svg_out_file, validation_set, compare_attribute, equal_attribute, rank_attribute ) + def self.create_ranking_plot( svg_out_file, validation_set, compare_attribute, equal_attribute, rank_attribute, class_value=nil ) #compute ranks - rank_set = validation_set.compute_ranking([equal_attribute],rank_attribute) + rank_set = validation_set.compute_ranking([equal_attribute],rank_attribute,class_value) #puts rank_set.to_array([:algorithm_uri, :dataset_uri, :acc, :acc_ranking]).collect{|a| a.inspect}.join("\n") #compute avg ranks @@ -85,7 +92,7 @@ module Reports #puts merge_set.to_array([:algorithm_uri, :dataset_uri, :acc, :acc_ranking]).collect{|a| a.inspect}.join("\n") comparables = merge_set.get_values(compare_attribute) - ranks = merge_set.get_values((rank_attribute.to_s+"_ranking").to_sym) + ranks = merge_set.get_values((rank_attribute.to_s+"_ranking").to_sym,false) plot_ranking( rank_attribute.to_s+" ranking", comparables, diff --git a/report/prediction_util.rb b/report/prediction_util.rb index f35d73a..fbe7531 100644 --- a/report/prediction_util.rb +++ b/report/prediction_util.rb @@ -14,6 +14,7 @@ module Reports::PredictionUtil res = [] + validation_set.validations.each do |v| (0..v.get_predictions.num_instances-1).each do |i| a = [] @@ -22,15 +23,15 @@ module Reports::PredictionUtil a.push(v.get_predictions.actual_value(i).to_nice_s) a.push(v.get_predictions.predicted_value(i).to_nice_s) a.push(v.get_predictions.classification_miss?(i)?"X":"") if v.get_predictions.classification? - a.push(v.get_predictions.confidence_value(i).to_nice_s) + a.push(v.get_predictions.confidence_value(i).to_nice_s) if v.get_predictions.confidence_values_available? res.push(a) end end #res = res.sort{|x,y| y[3] <=> x[3] } header = [ "compound", "actual value", "predicted value"] - header.push "missclassified" if validation_set.validations[0].get_predictions.classification? - header.push "confidence value" + header.push "missclassified" if validation_set.first.get_predictions.classification? + header.push "confidence value" if validation_set.first.get_predictions.confidence_values_available? res.insert(0, validation_attributes + header) #puts res.collect{|c| c.inspect}.join("\n") diff --git a/report/report_application.rb b/report/report_application.rb index c68df11..4346f7f 100644 --- a/report/report_application.rb +++ b/report/report_application.rb @@ -31,9 +31,16 @@ end get '/report/:type/:id' do perform do |rs| + + accept_header = request.env['HTTP_ACCEPT'] + if request.env['HTTP_USER_AGENT'] =~ /MSIE/ + LOGGER.info "Changing MSIE accept-header to text/html" + accept_header = "text/html" + end #request.env['HTTP_ACCEPT'] = "application/pdf" - content_type Reports::ReportFormat.get_format(request.env['HTTP_ACCEPT']) - result = body(File.new( rs.get_report(params[:type],params[:id],request.env['HTTP_ACCEPT']) )) + + content_type Reports::ReportFormat.get_format(accept_header) + result = body(File.new( rs.get_report(params[:type],params[:id],accept_header) )) end end diff --git a/report/report_factory.rb b/report/report_factory.rb index e577d70..a522901 100644 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -5,6 +5,7 @@ VAL_ATTR_TRAIN_TEST = [ :model_uri, :training_dataset_uri, :test_dataset_uri, :p VAL_ATTR_CV = [ :algorithm_uri, :dataset_uri, :num_folds, :crossvalidation_fold ] # selected attributes of interest when performing classification VAL_ATTR_CLASS = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate ] +VAL_ATTR_BAR_PLOT_CLASS = [ :area_under_roc, :accuracy, :true_positive_rate, :true_negative_rate ] VAL_ATTR_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square ] @@ -81,7 +82,7 @@ module Reports::ReportFactory #puts merged.get_values(:percent_correct_variance, false).inspect report = Reports::ReportContent.new("Crossvalidation report") - if (validation_set.validations[0].percent_correct!=nil) #classification + if (validation_set.first.classification?) report.add_section_result(merged, VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold],"Mean Results","Mean Results") report.add_section_roc_plot(validation_set, nil, nil, "roc-plot.svg") @@ -106,13 +107,52 @@ module Reports::ReportFactory raise Reports::BadRequest.new("num validations is not >1") unless validation_set.size>1 raise Reports::BadRequest.new("validations must be either all regression, "+ +"or all classification validations") unless validation_set.all_classification? or validation_set.all_regression? + raise Reports::BadRequest.new("number of different algorithms <2") if validation_set.num_different_values(:algorithm_uri)<2 if validation_set.has_nil_values?(:crossvalidation_id) - raise Reports::BadRequest.new("so far, algorithm comparison is only supported for crossvalidation results") + if validation_set.num_different_values(:test_dataset_uri)>1 + + # groups results into sets with equal test and training dataset + dataset_grouping = Reports::Util.group(validation_set.validations, [:test_dataset_uri, :training_dataset_uri]) + # check if the same algorithms exists for each test and training dataset + Reports::Util.check_group_matching(dataset_grouping, [:algorithm_uri]) + + #merged = validation_set.merge([:algorithm_uri, :dataset_uri]) + report = Reports::ReportContent.new("Algorithm comparison report Many datasets") + + if (validation_set.first.classification?) + report.add_section_result(validation_set,[:algorithm_uri, :test_dataset_uri]+VAL_ATTR_CLASS,"Mean Results","Mean Results") + report.add_section_ranking_plots(validation_set, :algorithm_uri, :test_dataset_uri, + [:accuracy, :true_positive_rate, :true_negative_rate], "true") + else # regression + raise Reports::BadRequest.new("not implemented yet for regression") + end + return report + else + # this groups all validations in x different groups (arrays) according to there algorithm-uri + algorithm_grouping = Reports::Util.group(validation_set.validations, [:algorithm_uri]) + # we check if there are corresponding validations in each group that have equal attributes (folds, num-folds,..) + Reports::Util.check_group_matching(algorithm_grouping, [:training_dataset_uri, :test_dataset_uri, :prediction_feature]) + + report = Reports::ReportContent.new("Algorithm comparison report") + + if (validation_set.first.classification?) + report.add_section_bar_plot(validation_set,nil,:algorithm_uri,VAL_ATTR_BAR_PLOT_CLASS, "bar-plot.svg") + report.add_section_roc_plot(validation_set,nil, :algorithm_uri, "roc-plot.svg") + #validation_set.validations[0].get_prediction_feature_values.each do |class_value| + #report.add_section_bar_plot(validation_set,class_value,:algorithm_uri,VAL_ATTR_CLASS, "bar-plot-"+class_value+".svg") + #report.add_section_roc_plot(validation_set, class_value, :algorithm_uri, "roc-plot-"+class_value+".svg") + #end + report.add_section_result(validation_set,[:algorithm_uri]+VAL_ATTR_CLASS,"Results","Results") + else #regression + #report.add_section_result(merged, VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],"Mean Results","Mean Results") + #report.add_section_result(validation_set, VAL_ATTR_CV+VAL_ATTR_REGR-[:num_folds], "Results","Results") + end + return report + end else raise Reports::BadRequest.new("num different cross-validation-ids <2") if validation_set.num_different_values(:crossvalidation_id)<2 validation_set.load_cv_attributes - raise Reports::BadRequest.new("number of different algorithms <2") if validation_set.num_different_values(:algorithm_uri)<2 if validation_set.num_different_values(:dataset_uri)>1 # groups results into sets with equal dataset @@ -128,9 +168,9 @@ module Reports::ReportFactory merged = validation_set.merge([:algorithm_uri, :dataset_uri]) report = Reports::ReportContent.new("Algorithm comparison report - Many datasets") - if (validation_set.validations[0].percent_correct!=nil) #classification + if (validation_set.first.classification?) report.add_section_result(merged,VAL_ATTR_CV+VAL_ATTR_CLASS-[:crossvalidation_fold],"Mean Results","Mean Results") - report.add_section_ranking_plots(merged, :algorithm_uri, :dataset_uri, [:acc, :auc, :sens, :spec]) + report.add_section_ranking_plots(merged, :algorithm_uri, :dataset_uri, [:acc, :auc, :sens, :spec], "true") else # regression report.add_section_result(merged,VAL_ATTR_CV+VAL_ATTR_REGR-[:crossvalidation_fold],"Mean Results","Mean Results") end @@ -145,7 +185,7 @@ module Reports::ReportFactory report = Reports::ReportContent.new("Algorithm comparison report") - if (validation_set.validations[0].percent_correct!=nil) #classification + if (validation_set.first.classification?) validation_set.validations[0].get_prediction_feature_values.each do |class_value| report.add_section_bar_plot(merged,class_value,:algorithm_uri,VAL_ATTR_CLASS, "bar-plot-"+class_value+".svg") report.add_section_roc_plot(validation_set, class_value, :algorithm_uri, "roc-plot-"+class_value+".svg") @@ -204,7 +244,7 @@ class Reports::ReportContent vals = vals.collect{|a| a.collect{|v| v.to_s[0,66] }} #PENDING transpose values if there more than 4 columns, and there are more than columns than rows transpose = vals[0].size>4 && vals[0].size>vals.size - @xml_report.add_table(section_table, table_title, vals, !transpose, transpose) + @xml_report.add_table(section_table, table_title, vals, !transpose, transpose) end def add_section_confusion_matrix( validation, @@ -235,12 +275,16 @@ class Reports::ReportContent end section_roc = @xml_report.add_section(@xml_report.get_root_element, section_title) - if validation_set.first.get_predictions + + prediction_set = validation_set.collect{ |v| v.get_predictions && v.get_predictions.confidence_values_available? } + + if prediction_set.size>0 + + section_text += "\nWARNING: roc plot information not available for all validation results" if prediction_set.size!=validation_set.size @xml_report.add_paragraph(section_roc, section_text) if section_text - begin plot_file_path = add_tmp_file(plot_file_name) - Reports::PlotFactory.create_roc_plot( plot_file_path, validation_set, class_value, split_set_attribute, validation_set.size>1 ) + Reports::PlotFactory.create_roc_plot( plot_file_path, prediction_set, class_value, split_set_attribute, prediction_set.size>1 ) @xml_report.add_imagefigure(section_roc, image_title, plot_file_name, "SVG", image_caption) rescue RuntimeError => ex LOGGER.error("could not create roc plot: "+ex.message) @@ -248,7 +292,7 @@ class Reports::ReportContent @xml_report.add_paragraph(section_roc, "could not create roc plot: "+ex.message) end else - @xml_report.add_paragraph(section_roc, "No prediction info for roc plot available.") + @xml_report.add_paragraph(section_roc, "No prediction-confidence info for roc plot available.") end end @@ -257,13 +301,14 @@ class Reports::ReportContent compare_attribute, equal_attribute, rank_attributes, + class_value, section_title="Ranking Plots", section_text="This section contains the ranking plots.") section_rank = @xml_report.add_section(@xml_report.get_root_element, section_title) @xml_report.add_paragraph(section_rank, section_text) if section_text - rank_attributes.each{|a| add_ranking_plot(section_rank, validation_set, compare_attribute, equal_attribute, a, a.to_s+"-ranking.svg")} + rank_attributes.each{|a| add_ranking_plot(section_rank, validation_set, compare_attribute, equal_attribute, a, class_value, a.to_s+"-ranking.svg")} end def add_ranking_plot( report_section, @@ -271,12 +316,13 @@ class Reports::ReportContent compare_attribute, equal_attribute, rank_attribute, + class_value=nil, plot_file_name="ranking.svg", image_title="Ranking Plot", image_caption=nil) plot_file_path = add_tmp_file(plot_file_name) - Reports::PlotFactory::create_ranking_plot(plot_file_path, validation_set, compare_attribute, equal_attribute, rank_attribute) + Reports::PlotFactory::create_ranking_plot(plot_file_path, validation_set, compare_attribute, equal_attribute, rank_attribute, class_value) @xml_report.add_imagefigure(report_section, image_title, plot_file_name, "SVG", image_caption) end @@ -287,11 +333,16 @@ class Reports::ReportContent value_attributes, plot_file_name="bar-plot.svg", section_title="Bar Plot", - section_text="This section contains the bar plot.", + section_text=nil, image_title=nil, image_caption=nil) - image_title = "Bar Plot for class-value '"+class_value+"'" unless image_title - + if class_value + section_text = "This section contains the bar plot for class '"+class_value+"'." unless section_text + image_title = "Bar Plot for class-value '"+class_value+"'" unless image_title + else + section_text = "This section contains the bar plot." unless section_text + image_title = "Bar Plot for all classes" unless image_title + end section_bar = @xml_report.add_section(@xml_report.get_root_element, section_title) @xml_report.add_paragraph(section_bar, section_text) if section_text diff --git a/report/report_test.rb b/report/report_test.rb index 3e0d093..ed6b377 100644 --- a/report/report_test.rb +++ b/report/report_test.rb @@ -8,12 +8,19 @@ require 'rack/test' require "lib/test_util.rb" -#class Reports::ApplicationTest < Test::Unit::TestCase -# include Rack::Test::Methods -# -# def app -# Sinatra::Application -# end +class Reports::ApplicationTest < Test::Unit::TestCase + include Rack::Test::Methods + + def app + Sinatra::Application + end + + def test_nothing + + get '/report/validation/1' + + puts last_response.body + end # # def test_webservice # @@ -68,122 +75,122 @@ require "lib/test_util.rb" # end # end # -#end - - - -class Reports::ReportServiceTest < Test::Unit::TestCase - include Lib::TestUtil +end - WS_VAL = @@config[:services]["opentox-validation"] - WS_DATA=@@config[:services]["opentox-dataset"] - FILE=File.new("data/hamster_carcinogenicity.owl","r") - - WS_CLASS_ALG=File.join(@@config[:services]["opentox-algorithm"],"lazar") - WS_FEATURE_ALG=File.join(@@config[:services]["opentox-algorithm"],"fminer") - - #WS_CLASS_ALG_2="localhost:4008/algorithm" - #WS_FEATURE_ALG_2=nil - def test_service_ot_webservice - begin - - rep = Reports::ReportService.new("http://some.location") - types = rep.get_report_types - assert types.is_a?(String) - assert types.split("\n").size == Reports::ReportFactory::REPORT_TYPES.size - #Reports::ReportFactory::REPORT_TYPES.each{|t| rep.get_all_reports(t)} - #assert_raise(Reports::NotFound){rep.get_all_reports("osterhase")} - - ### using ot_mock_layer (reporting component does not rely on ot validation webservice) - - #ENV['REPORT_VALIDATION_ACCESS'] = "mock_layer" - #Reports::Validation.reset_validation_access - -# create_report(rep, "validation_uri_1", "validation") -# assert_raise(Reports::BadRequest){create_report(rep, ["validation_uri_1","validation_uri_2"], "validation")} +#class Reports::ReportServiceTest < Test::Unit::TestCase +# include Lib::TestUtil +# +# WS_VAL = @@config[:services]["opentox-validation"] +# WS_DATA=@@config[:services]["opentox-dataset"] +# FILE=File.new("data/hamster_carcinogenicity.owl","r") +# +# WS_CLASS_ALG=File.join(@@config[:services]["opentox-algorithm"],"lazar") +# WS_FEATURE_ALG=File.join(@@config[:services]["opentox-algorithm"],"fminer") +# +# #WS_CLASS_ALG_2="localhost:4008/algorithm" +# #WS_FEATURE_ALG_2=nil +# +# def test_service_ot_webservice +# +# begin # -# create_report(rep, "crossvalidation_uri_1", "crossvalidation") -# create_report(rep, ["validation_uri_1"]*Reports::OTMockLayer::NUM_FOLDS, "crossvalidation") -# assert_raise(Reports::BadRequest){create_report(rep, ["validation_uri_1"]*(Reports::OTMockLayer::NUM_FOLDS-1), "crossvalidation")} +# rep = Reports::ReportService.new("http://some.location") +# types = rep.get_report_types +# assert types.is_a?(String) +# assert types.split("\n").size == Reports::ReportFactory::REPORT_TYPES.size +# #Reports::ReportFactory::REPORT_TYPES.each{|t| rep.get_all_reports(t)} +# #assert_raise(Reports::NotFound){rep.get_all_reports("osterhase")} # -# create_report(rep, ["crossvalidation_uri_1"]* (Reports::OTMockLayer::NUM_DATASETS * Reports::OTMockLayer::NUM_ALGS), "algorithm_comparison") -# create_report(rep, ["validation_uri_1"]* (Reports::OTMockLayer::NUM_DATASETS * Reports::OTMockLayer::NUM_ALGS * Reports::OTMockLayer::NUM_FOLDS), "algorithm_comparison") - - ### using ot webservices (instead of mock layer) - - #ENV['REPORT_VALIDATION_ACCESS'] = nil - #Reports::Validation.reset_validation_access - - #data_uri = upload_data WS_DATA, FILE - #data_uri= File.join(WS_DATA,"1") - -# #val_uri = create_single_validation(data_uri) -# #val_uri = create_single_validation(data_uri, WS_CLASS_ALG_2, WS_FEATURE_ALG_2) -# val_uri = File.join(WS_VAL,"15") -## #add_resource val_uri -# create_report(rep, val_uri, "validation") - - #val_uri = create_cross_validation(data_uri, WS_CLASS_ALG_2, WS_FEATURE_ALG_2) - #val_uri = create_cross_validation(data_uri) - val_uri = File.join(WS_VAL,"crossvalidation/1") - #val_uri2 = "http://localhost:4007/crossvalidation/14" -# # add_resource val_uri - create_report(rep, val_uri, "crossvalidation") - -# #val_uri2 = create_cross_validation(data_uri, WS_CLASS_ALG_2, WS_FEATURE_ALG_2) -# #val_uri = ["http://localhost:4007/crossvalidation/6", "http://localhost:4007/crossvalidation/8"] - #val_uri = ["http://localhost:4007/crossvalidation/7", "http://localhost:4007/crossvalidation/8"] -# #add_resource val_uri - #create_report(rep, val_uri, "algorithm_comparison") - - ensure - # delete_resources - end - end - - private - def create_single_validation(data_uri, ws_class_alg=WS_CLASS_ALG, ws_feat_alg=WS_FEATURE_ALG) - puts "validating" - val_params = { - :dataset_uri => data_uri, - :algorithm_uri => ws_class_alg, - :split_ratio=>0.7, - :prediction_feature => "classification",} - val_params[:feature_generation_uri] = ws_feat_alg if ws_feat_alg - begin - RestClient.post WS_VAL+"/validation/training_test_split", val_params - rescue => ex - raise "error validating "+WS_VAL+"/validation/training_test_split\n "+val_params.inspect+" \n -> "+ex.message - end - end - - def create_cross_validation(data_uri, ws_class_alg=WS_CLASS_ALG, ws_feat_alg=WS_FEATURE_ALG) - puts "cross-validating" - ext("curl -X POST -d num_folds=3 -d dataset_uri="+data_uri+" -d algorithm_uri="+ws_class_alg+" -d prediction_feature=classification"+ - (ws_feat_alg ? " -d feature_generation_uri="+ws_feat_alg : "")+ - " "+WS_VAL+"/crossvalidation",nil) - end - - def create_report(report_service, val_uri, type) - - Reports.reset_ot_access if ENV['USE_OT_MOCK_LAYER'] - report_uri = report_service.create_report(type, val_uri) - assert type == report_service.parse_type(report_uri) - id = report_service.parse_id(report_uri) - - #puts "created report with id "+id.to_s - - #assert_raise(Reports::BadRequest){report_service.get_report(type, id, "weihnachtsmann")} - - report_service.get_report(type, id, "text/html") - #report_service.get_report(type, id, "application/pdf") - #assert_raise(Reports::NotFound){report_service.delete_report(type, 877658)} - -# rep.delete_report(type, id) - end -end +# ### using ot_mock_layer (reporting component does not rely on ot validation webservice) +# +# #ENV['REPORT_VALIDATION_ACCESS'] = "mock_layer" +# #Reports::Validation.reset_validation_access +# +## create_report(rep, "validation_uri_1", "validation") +## assert_raise(Reports::BadRequest){create_report(rep, ["validation_uri_1","validation_uri_2"], "validation")} +## +## create_report(rep, "crossvalidation_uri_1", "crossvalidation") +## create_report(rep, ["validation_uri_1"]*Reports::OTMockLayer::NUM_FOLDS, "crossvalidation") +## assert_raise(Reports::BadRequest){create_report(rep, ["validation_uri_1"]*(Reports::OTMockLayer::NUM_FOLDS-1), "crossvalidation")} +## +## create_report(rep, ["crossvalidation_uri_1"]* (Reports::OTMockLayer::NUM_DATASETS * Reports::OTMockLayer::NUM_ALGS), "algorithm_comparison") +## create_report(rep, ["validation_uri_1"]* (Reports::OTMockLayer::NUM_DATASETS * Reports::OTMockLayer::NUM_ALGS * Reports::OTMockLayer::NUM_FOLDS), "algorithm_comparison") +# +# ### using ot webservices (instead of mock layer) +# +# #ENV['REPORT_VALIDATION_ACCESS'] = nil +# #Reports::Validation.reset_validation_access +# +# #data_uri = upload_data WS_DATA, FILE +# #data_uri= File.join(WS_DATA,"1") +# +## #val_uri = create_single_validation(data_uri) +## #val_uri = create_single_validation(data_uri, WS_CLASS_ALG_2, WS_FEATURE_ALG_2) +## val_uri = File.join(WS_VAL,"15") +### #add_resource val_uri +## create_report(rep, val_uri, "validation") +# +# #val_uri = create_cross_validation(data_uri, WS_CLASS_ALG_2, WS_FEATURE_ALG_2) +# #val_uri = create_cross_validation(data_uri) +# val_uri = File.join(WS_VAL,"crossvalidation/1") +# #val_uri2 = "http://localhost:4007/crossvalidation/14" +## # add_resource val_uri +# create_report(rep, val_uri, "crossvalidation") +# +## #val_uri2 = create_cross_validation(data_uri, WS_CLASS_ALG_2, WS_FEATURE_ALG_2) +## #val_uri = ["http://localhost:4007/crossvalidation/6", "http://localhost:4007/crossvalidation/8"] +# #val_uri = ["http://localhost:4007/crossvalidation/7", "http://localhost:4007/crossvalidation/8"] +## #add_resource val_uri +# #create_report(rep, val_uri, "algorithm_comparison") +# +# ensure +# # delete_resources +# end +# end +# +# private +# def create_single_validation(data_uri, ws_class_alg=WS_CLASS_ALG, ws_feat_alg=WS_FEATURE_ALG) +# puts "validating" +# val_params = { +# :dataset_uri => data_uri, +# :algorithm_uri => ws_class_alg, +# :split_ratio=>0.7, +# :prediction_feature => "classification",} +# val_params[:feature_generation_uri] = ws_feat_alg if ws_feat_alg +# begin +# RestClient.post WS_VAL+"/validation/training_test_split", val_params +# rescue => ex +# raise "error validating "+WS_VAL+"/validation/training_test_split\n "+val_params.inspect+" \n -> "+ex.message +# end +# end +# +# def create_cross_validation(data_uri, ws_class_alg=WS_CLASS_ALG, ws_feat_alg=WS_FEATURE_ALG) +# puts "cross-validating" +# ext("curl -X POST -d num_folds=3 -d dataset_uri="+data_uri+" -d algorithm_uri="+ws_class_alg+" -d prediction_feature=classification"+ +# (ws_feat_alg ? " -d feature_generation_uri="+ws_feat_alg : "")+ +# " "+WS_VAL+"/crossvalidation",nil) +# end +# +# def create_report(report_service, val_uri, type) +# +# Reports.reset_ot_access if ENV['USE_OT_MOCK_LAYER'] +# report_uri = report_service.create_report(type, val_uri) +# assert type == report_service.parse_type(report_uri) +# id = report_service.parse_id(report_uri) +# +# #puts "created report with id "+id.to_s +# +# #assert_raise(Reports::BadRequest){report_service.get_report(type, id, "weihnachtsmann")} +# +# report_service.get_report(type, id, "text/html") +# #report_service.get_report(type, id, "application/pdf") +# #assert_raise(Reports::NotFound){report_service.delete_report(type, 877658)} +# +## rep.delete_report(type, id) +# end +#end diff --git a/report/validation_access.rb b/report/validation_access.rb index 2a5ce51..e06c1f0 100644 --- a/report/validation_access.rb +++ b/report/validation_access.rb @@ -73,7 +73,7 @@ class Reports::ValidationDB < Reports::ValidationAccess validation.send("#{p.to_s}=".to_sym, v[p]) end - {:classification_statistics => Lib::VAL_CLASS_PROPS, + {:classification_statistics => Lib::VAL_CLASS_PROPS_EXTENDED, :regression_statistics => Lib::VAL_REGR_PROPS}.each do |subset_name,subset_props| subset = v[subset_name] subset_props.each{ |prop| validation.send("#{prop.to_s}=".to_sym, subset[prop]) } if subset @@ -148,7 +148,7 @@ class Reports::ValidationWebservice < Reports::ValidationAccess #validation.prediction_feature = model.get_prediction_feature {Lib::VAL_CV_PROP => Lib::VAL_CV_PROPS, - Lib::VAL_CLASS_PROP => Lib::VAL_CLASS_PROPS}.each do |subset_name,subset_props| + Lib::VAL_CLASS_PROP => Lib::VAL_CLASS_PROPS_EXTENDED}.each do |subset_name,subset_props| subset = data[subset_name] subset_props.each{ |prop| validation.send("#{prop}=".to_sym, subset[prop]) } if subset end diff --git a/report/validation_data.rb b/report/validation_data.rb index 13a5175..a2b8905 100644 --- a/report/validation_data.rb +++ b/report/validation_data.rb @@ -1,7 +1,7 @@ # the variance is computed when merging results for these attributes -VAL_ATTR_VARIANCE = [ :area_under_roc, :percent_correct, :root_mean_squared_error, :mean_absolute_error, :r_square ] -VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate ] +VAL_ATTR_VARIANCE = [ :area_under_roc, :percent_correct, :root_mean_squared_error, :mean_absolute_error, :r_square, :accuracy ] +VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate, :accuracy ] class Object @@ -25,6 +25,19 @@ class Object end end +class Hash + + def mean_value + sum = 0 + self.values.collect do |v| + raise "cannot compute mean of non-numeric values '"+self.inspect+"'" unless v!=nil and v.is_a?(Numeric) + sum+=v + end + sum/=self.values.size.to_f + end + +end + module Reports @@ -261,6 +274,18 @@ module Reports return new_set end + # returns a new set with all validation that the attached block accepted + # e.g. create set with predictions: collect{ |validation| validation.get_predictions!=null } + # + # call-seq: + # filter_proc(proc) => Reports::ValidationSet + # + def collect + new_set = Reports::ValidationSet.new + validations.each{ |v| new_set.validations.push(v) if yield(v) } + return new_set + end + # returns an array, with values for __attributes__, that can be use for a table # * first row is header row # * other rows are values @@ -321,7 +346,7 @@ module Reports # call-seq: # compute_ranking(equal_attributes, ranking_attribute) => array # - def compute_ranking(equal_attributes, ranking_attribute) + def compute_ranking(equal_attributes, ranking_attribute, class_value=nil ) new_set = Reports::ValidationSet.new (0..@validations.size-1).each do |i| @@ -334,7 +359,16 @@ module Reports # put indices and ranking values for current group into hash rank_hash = {} (0..group.size-1).each do |i| - rank_hash[i] = group[i].send(ranking_attribute) + val = group[i].send(ranking_attribute) + if val.is_a?(Hash) + if class_value != nil + raise "no value for class value "+class_value.class.to_s+" "+class_value.to_s+" in hash "+val.inspect.to_s unless val.has_key?(class_value) + val = val[class_value] + else + val = val.mean_value + end + end + rank_hash[i] = val end # sort group accrording to second value (= ranking value) diff --git a/validation/validation_application.rb b/validation/validation_application.rb index d4e1a2e..eb3e4a4 100644 --- a/validation/validation_application.rb +++ b/validation/validation_application.rb @@ -4,6 +4,7 @@ end require 'validation/validation_service.rb' +require 'lib/merge.rb' # hack: store self in $sinatra to make url_for method accessible in validation_service @@ -46,8 +47,6 @@ get '/crossvalidation/:id' do else halt 400, "MIME type '"+request.env['HTTP_ACCEPT'].to_s+"' not supported." end - - halt 202, result unless crossvalidation.finished result end @@ -65,6 +64,22 @@ get '/crossvalidation/:id/validations' do Validation::Validation.all(:crossvalidation_id => params[:id]).collect{ |v| v.uri.to_s }.join("\n")+"\n" end + +get '/crossvalidation/:id/statistics' do + LOGGER.info "get merged validation-result for crossvalidation with id "+params[:id].to_s + halt 404, "Crossvalidation #{params[:id]} not found." unless crossvalidation = Validation::Crossvalidation.get(params[:id]) + + to_merge = [:prediction_feature, :num_instances,:num_without_class,:percent_without_class,:num_unpredicted,:percent_unpredicted, + :classification_statistics,:regression_statistics,:crossvalidation_id] + v = Validation::Validation.all(:crossvalidation_id => params[:id]).merge_array(to_merge) + v.uri = nil + v.created_at = nil + v.id = nil + content_type "text/x-yaml" + v.to_yaml +end + + post '/crossvalidation/?' do OpenTox::Task.as_task do LOGGER.info "creating crossvalidation "+params.inspect @@ -102,8 +117,6 @@ get '/:id' do else halt 400, "MIME type '"+request.env['HTTP_ACCEPT'].to_s+"' not supported." end - - halt 202, result unless validation.finished result end diff --git a/validation/validation_service.rb b/validation/validation_service.rb index 1e2c103..469a717 100644 --- a/validation/validation_service.rb +++ b/validation/validation_service.rb @@ -110,7 +110,6 @@ module Validation end update :prediction_dataset_uri => prediction_dataset_uri, - :finished => true, :real_runtime => benchmark.real, :num_instances => prediction.num_instances, :num_without_class => prediction.num_without_class, diff --git a/validation/validation_test.rb b/validation/validation_test.rb index 5cfd319..643ea81 100644 --- a/validation/validation_test.rb +++ b/validation/validation_test.rb @@ -190,16 +190,16 @@ class ValidationTest < Test::Unit::TestCase #get '/' #get '/prepare_examples' - get '/test_examples' + #get '/test_examples' #get '/1',nil,'HTTP_ACCEPT' => "application/rdf+xml" #get '/1',nil,'HTTP_ACCEPT' => "text/x-yaml" #get '/crossvalidation/1',nil,'HTTP_ACCEPT' => "application/rdf+xml" - #get '/crossvalidation/1',nil,'HTTP_ACCEPT' => "text/x-yaml" + get '/crossvalidation/1/statistics',nil,'HTTP_ACCEPT' => "text/x-yaml" - #puts last_response.body + puts last_response.body end # private |