diff options
Diffstat (limited to 'report/plot_factory.rb')
-rw-r--r-- | report/plot_factory.rb | 153 |
1 files changed, 122 insertions, 31 deletions
diff --git a/report/plot_factory.rb b/report/plot_factory.rb index daaba52..43c45fc 100644 --- a/report/plot_factory.rb +++ b/report/plot_factory.rb @@ -8,6 +8,43 @@ class Array self[i] = self[j] self[j] = tmp end + + # summing up values of fields where array __groups__ has equal values + # EXAMPLE + # self: [1, 0, 1, 2, 3, 0, 2] + # __groups__: [100, 90, 70, 70, 30, 10, 0] + # returns: + # [ 1, 0, 3, 3, 0, 2] + # (fields with equal value 70 are compressed) + # PRECONDITION + # __groups__ has to be sorted + def compress_sum(groups) + compress(groups) do |a,b| + a+b + end + end + + # see compress_sum, replace sum with max + def compress_max(groups) + compress(groups) do |a,b| + a > b ? a : b + end + end + + private + def compress(groups) + raise "length not equal" unless self.size==groups.size + raise "to small" unless self.size>=2 + a = [ self[0] ] + (1..groups.size-1).each do |i| + if groups[i]!=groups[i-1] + a << self[i] + else + a[-1] = yield a[-1],self[i] + end + end + a + end end @@ -15,7 +52,7 @@ module Reports module PlotFactory - def self.create_regression_plot( out_file, validation_set ) + def self.create_regression_plot( out_file, validation_set, name_attribute ) LOGGER.debug "Creating regression plot, out-file:"+out_file.to_s @@ -23,14 +60,28 @@ module Reports x = [] y = [] validation_set.validations.each do |v| - names << v.algorithm_uri - x << v.get_predictions.predicted_values - y << v.get_predictions.actual_values + x_i = v.get_predictions.predicted_values + y_i = v.get_predictions.actual_values + + # filter out nil-predictions + not_nil_indices = [] + x_i.size.times do |i| + not_nil_indices << i if x_i[i]!=nil && y_i[i]!=nil + end + if not_nil_indices.size < x_i.size + x_i = not_nil_indices.collect{ |i| x_i[i] } + y_i = not_nil_indices.collect{ |i| y_i[i] } + end + + names << ( name_attribute==:crossvalidation_fold ? "fold " : "" ) + v.send(name_attribute).to_s + x << x_i + y << y_i end RubyPlot::plot_points(out_file, "Regression plot", "Predicted values", "Actual values", names, x, y ) end + # creates a roc plot (result is plotted into out_file) # * if (split_set_attributes == nil?) # * the predictions of all validations in the validation set are plotted as one average roc-curve @@ -41,19 +92,22 @@ module Reports # def self.create_roc_plot( out_file, validation_set, class_value, split_set_attribute=nil, show_single_curves=false ) - LOGGER.debug "creating roc plot, out-file:"+out_file.to_s + LOGGER.debug "creating roc plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_file.to_s if split_set_attribute attribute_values = validation_set.get_values(split_set_attribute) - names = [] fp_rates = [] tp_rates = [] attribute_values.each do |value| - data = transform_predictions(validation_set.filter({split_set_attribute => value}), class_value, false) - names << value.to_s - fp_rates << data[:fp_rate][0] - tp_rates << data[:tp_rate][0] + begin + data = transform_predictions(validation_set.filter({split_set_attribute => value}), class_value, false) + names << value.to_s + fp_rates << data[:fp_rate][0] + tp_rates << data[:tp_rate][0] + rescue + LOGGER.warn "could not create ROC plot for "+value.to_s + end end RubyPlot::plot_lines(out_file, "ROC-Plot", "False positive rate", "True Positive Rate", names, fp_rates, tp_rates ) else @@ -62,28 +116,33 @@ module Reports end end - def self.create_bar_plot( out_file, validation_set, class_value, title_attribute, value_attributes ) + def self.create_bar_plot( out_file, validation_set, title_attribute, value_attributes ) LOGGER.debug "creating bar plot, out-file:"+out_file.to_s data = [] titles = [] + labels = [] validation_set.validations.each do |v| values = [] value_attributes.each do |a| - value = v.send(a) - if value.is_a?(Hash) - if class_value==nil - avg_value = 0 - value.values.each{ |val| avg_value+=val } - value = avg_value/value.values.size.to_f - else - raise "bar plot value is hash, but no entry for class-value ("+class_value.to_s+"); value for "+a.to_s+" -> "+value.inspect unless value.key?(class_value) - value = value[class_value] + validation_set.get_domain_for_attr(a).each do |class_value| + value = v.send(a) + if value.is_a?(Hash) + if class_value==nil + avg_value = 0 + value.values.each{ |val| avg_value+=val } + value = avg_value/value.values.size.to_f + else + raise "bar plot value is hash, but no entry for class-value ("+class_value.to_s+"); value for "+a.to_s+" -> "+value.inspect unless value.key?(class_value) + value = value[class_value] + end end + raise "value is nil\nattribute: "+a.to_s+"\nvalidation: "+v.inspect if value==nil + values.push(value) + labels.push(a.to_s.gsub("_","-") + ( class_value==nil ? "" : "("+class_value.to_s+")" )) end - values.push(value) end titles << v.send(title_attribute).to_s @@ -95,8 +154,6 @@ module Reports data[i] = [titles[i]] + data[i] end - labels = value_attributes.collect{|a| a.to_s.gsub("_","-")} - LOGGER.debug "bar plot labels: "+labels.inspect LOGGER.debug "bar plot data: "+data.inspect @@ -177,11 +234,15 @@ module Reports sum_roc_values[:confidence_values] += roc_values[:confidence_values] sum_roc_values[:actual_values] += roc_values[:actual_values] if add_single_folds - tp_fp_rates = get_tp_fp_rates(roc_values) - names << "fold "+i.to_s - fp_rate << tp_fp_rates[:fp_rate] - tp_rate << tp_fp_rates[:tp_rate] - faint << true + begin + tp_fp_rates = get_tp_fp_rates(roc_values) + names << "fold "+i.to_s + fp_rate << tp_fp_rates[:fp_rate] + tp_rate << tp_fp_rates[:tp_rate] + faint << true + rescue + LOGGER.warn "could not get ROC vals for fold "+i.to_s + end end end tp_fp_rates = get_tp_fp_rates(sum_roc_values) @@ -197,6 +258,18 @@ module Reports end end + def self.demo_rock_plot + roc_values = {:confidence_values => [0.1, 0.9, 0.5, 0.6, 0.6, 0.6], + :predicted_values => [1, 0, 0, 1, 0, 1], + :actual_values => [0, 1, 0, 0, 1, 1]} + tp_fp_rates = get_tp_fp_rates(roc_values) + data = { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]] } + RubyPlot::plot_lines("/tmp/plot.svg", + "ROC-Plot", + "False positive rate", + "True Positive Rate", data[:names], data[:fp_rate], data[:tp_rate], data[:faint] ) + end + def self.get_tp_fp_rates(roc_values) c = roc_values[:confidence_values] @@ -232,9 +305,11 @@ module Reports end end #puts c.inspect+"\n"+a.inspect+"\n"+p.inspect+"\n\n" - + tp_rate = [0] fp_rate = [0] + w = [1] + c2 = [Float::MAX] (0..p.size-1).each do |i| if a[i]==p[i] tp_rate << tp_rate[-1]+1 @@ -243,8 +318,15 @@ module Reports fp_rate << fp_rate[-1]+1 tp_rate << tp_rate[-1] end + w << 1 + c2 << c[i] end - #puts tp_rate.inspect+"\n"+fp_rate.inspect+"\n\n" + #puts c2.inspect+"\n"+tp_rate.inspect+"\n"+fp_rate.inspect+"\n"+w.inspect+"\n\n" + + tp_rate = tp_rate.compress_max(c2) + fp_rate = fp_rate.compress_max(c2) + w = w.compress_sum(c2) + #puts tp_rate.inspect+"\n"+fp_rate.inspect+"\n"+w.inspect+"\n\n" (0..tp_rate.size-1).each do |i| tp_rate[i] = tp_rate[-1]>0 ? tp_rate[i]/tp_rate[-1].to_f*100 : 100 @@ -256,5 +338,14 @@ module Reports end end end - + +#require "rubygems" +#require "ruby-plot" #Reports::PlotFactory::demo_ranking_plot +#Reports::PlotFactory::demo_rock_plot + +#a = [1, 0, 1, 2, 3, 0, 2] +#puts a.compress_sum([100, 90, 70, 70, 30, 10, 0]).inspect +#puts a.compress_max([100, 90, 70, 70, 30, 10, 0]).inspect + + |