summaryrefslogtreecommitdiff
path: root/report/plot_factory.rb
diff options
context:
space:
mode:
Diffstat (limited to 'report/plot_factory.rb')
-rw-r--r--report/plot_factory.rb228
1 files changed, 137 insertions, 91 deletions
diff --git a/report/plot_factory.rb b/report/plot_factory.rb
index a4e415a..bf59960 100644
--- a/report/plot_factory.rb
+++ b/report/plot_factory.rb
@@ -52,10 +52,12 @@ module Reports
module PlotFactory
- def self.create_regression_plot( out_file, validation_set, name_attribute )
+ def self.create_regression_plot( out_files, validation_set, name_attribute, logscale=true )
- LOGGER.debug "Creating regression plot, out-file:"+out_file.to_s
+ out_files = [out_files] unless out_files.is_a?(Array)
+ LOGGER.debug "Creating regression plot, out-file:"+out_files.to_s
+ omit_count = 0
names = []
x = []
y = []
@@ -63,23 +65,34 @@ module Reports
x_i = v.get_predictions.predicted_values
y_i = v.get_predictions.actual_values
- # filter out nil-predictions
- not_nil_indices = []
+ # filter out nil-predictions and <=0 predictions if log-scale wanted
+ valid_indices = []
x_i.size.times do |i|
- not_nil_indices << i if x_i[i]!=nil && y_i[i]!=nil
+ if x_i[i]!=nil and y_i[i]!=nil
+ if !logscale or (x_i[i]>0 and y_i[i]>0)
+ valid_indices << i
+ else
+ omit_count += 1
+ end
+ end
end
- if not_nil_indices.size < x_i.size
- x_i = not_nil_indices.collect{ |i| x_i[i] }
- y_i = not_nil_indices.collect{ |i| y_i[i] }
+ if valid_indices.size < x_i.size
+ x_i = valid_indices.collect{ |i| x_i[i] }
+ y_i = valid_indices.collect{ |i| y_i[i] }
end
names << ( name_attribute==:crossvalidation_fold ? "fold " : "" ) + v.send(name_attribute).to_s
x << x_i
y << y_i
end
-
- raise "no predictions performed" if x.size==0 || x[0].size==0
- RubyPlot::regression_point_plot(out_file, "Regression plot", "Predicted values", "Actual values", names, x, y )
+ names = [""] if names.size==1
+
+ omit_str = omit_count>0 ? " ("+omit_count.to_s+" predictions omitted)" : ""
+ raise "no predictions performed"+omit_str if x.size==0 || x[0].size==0
+ out_files.each do |out_file|
+ RubyPlot::regression_point_plot(out_file, "Regression plot", "Predicted values", "Actual values", names, x, y, logscale)
+ end
+ omit_count
end
@@ -91,36 +104,37 @@ module Reports
# * the validation set is splitted into sets of validation_sets with equal attribute values
# * each of theses validation sets is plotted as a roc-curve
#
- def self.create_roc_plot( out_file, validation_set, class_value, split_set_attribute=nil, show_single_curves=false )
+ def self.create_roc_plot( out_files, validation_set, class_value, split_set_attribute=nil,
+ x_label="False positive rate", y_label="True Positive Rate" )
- LOGGER.debug "creating roc plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_file.to_s
+ out_files = [out_files] unless out_files.is_a?(Array)
+ LOGGER.debug "creating roc plot for '"+validation_set.size.to_s+"' validations, out-files:"+out_files.inspect
+ data = []
if split_set_attribute
attribute_values = validation_set.get_values(split_set_attribute)
- names = []
- fp_rates = []
- tp_rates = []
attribute_values.each do |value|
begin
- data = transform_roc_predictions(validation_set.filter({split_set_attribute => value}), class_value, false)
- names << value.to_s
- fp_rates << data[:fp_rate][0]
- tp_rates << data[:tp_rate][0]
+ data << transform_roc_predictions(validation_set.filter({split_set_attribute => value}), class_value, false )
+ data[-1].name = split_set_attribute.to_s.nice_attr+" "+value.to_s
rescue
LOGGER.warn "could not create ROC plot for "+value.to_s
end
end
- RubyPlot::plot_lines(out_file, "ROC-Plot", "False positive rate", "True Positive Rate", names, fp_rates, tp_rates )
else
- data = transform_roc_predictions(validation_set, class_value, show_single_curves)
- RubyPlot::plot_lines(out_file, "ROC-Plot", "False positive rate", "True Positive Rate", data[:names], data[:fp_rate], data[:tp_rate], data[:faint] )
+ data << transform_roc_predictions(validation_set, class_value )
end
+
+ out_files.each do |out_file|
+ RubyPlot::plot_lines(out_file, "ROC-Plot", x_label, y_label, data )
+ end
end
- def self.create_confidence_plot( out_file, validation_set, class_value, split_set_attribute=nil, show_single_curves=false )
+ def self.create_confidence_plot( out_files, validation_set, class_value, split_set_attribute=nil, show_single_curves=false )
- LOGGER.debug "creating confidence plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_file.to_s
+ out_files = [out_files] unless out_files.is_a?(Array)
+ LOGGER.debug "creating confidence plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_files.inspect
if split_set_attribute
attribute_values = validation_set.get_values(split_set_attribute)
@@ -130,7 +144,7 @@ module Reports
attribute_values.each do |value|
begin
data = transform_confidence_predictions(validation_set.filter({split_set_attribute => value}), class_value, false)
- names << value.to_s
+ names << split_set_attribute.to_s.nice_attr+" "+value.to_s
confidence << data[:confidence][0]
performance << data[:performance][0]
rescue
@@ -138,27 +152,32 @@ module Reports
end
end
#RubyPlot::plot_lines(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", names, fp_rates, tp_rates )
- case validation_set.unique_feature_type
- when "classification"
- RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", names, confidence, performance)
- when "regression"
- RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", names, confidence, performance, true)
+ out_files.each do |out_file|
+ case validation_set.unique_feature_type
+ when "classification"
+ RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", names, confidence, performance)
+ when "regression"
+ RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", names, confidence, performance, true)
+ end
end
else
data = transform_confidence_predictions(validation_set, class_value, show_single_curves)
- case validation_set.unique_feature_type
- when "classification"
- RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", data[:names], data[:confidence], data[:performance])
- when "regression"
- RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", data[:names], data[:confidence], data[:performance], true)
+ out_files.each do |out_file|
+ case validation_set.unique_feature_type
+ when "classification"
+ RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", data[:names], data[:confidence], data[:performance])
+ when "regression"
+ RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", data[:names], data[:confidence], data[:performance], true)
+ end
end
end
end
- def self.create_bar_plot( out_file, validation_set, title_attribute, value_attributes )
+ def self.create_bar_plot( out_files, validation_set, title_attribute, value_attributes )
- LOGGER.debug "creating bar plot, out-file:"+out_file.to_s
+ out_files = [out_files] unless out_files.is_a?(Array)
+ LOGGER.debug "creating bar plot, out-files:"+out_files.inspect
data = []
titles = []
@@ -167,25 +186,35 @@ module Reports
validation_set.validations.each do |v|
values = []
value_attributes.each do |a|
- validation_set.get_accept_values_for_attr(a).each do |class_value|
- value = v.send(a)
- if value.is_a?(Hash)
- if class_value==nil
- avg_value = 0
- value.values.each{ |val| avg_value+=val }
- value = avg_value/value.values.size.to_f
- else
- raise "bar plot value is hash, but no entry for class-value ("+class_value.to_s+"); value for "+a.to_s+" -> "+value.inspect unless value.key?(class_value)
- value = value[class_value]
+
+ accept = validation_set.get_accept_values_for_attr(a)
+ if accept and accept.size>0
+ accept.each do |class_value|
+ value = v.send(a)
+ if value.is_a?(Hash)
+ if class_value==nil
+ avg_value = 0
+ value.values.each{ |val| avg_value+=val }
+ value = avg_value/value.values.size.to_f
+ else
+ raise "bar plot value is hash, but no entry for class-value ("+class_value.to_s+"); value for "+a.to_s+" -> "+value.inspect unless value.key?(class_value)
+ value = value[class_value]
+ end
end
+ raise "value is nil\nattribute: "+a.to_s+"\nvalidation: "+v.inspect if value==nil
+ values.push(value)
+ labels.push(a.to_s.gsub("_","-") + ( class_value==nil ? "" : "("+class_value.to_s+")" ))
end
- raise "value is nil\nattribute: "+a.to_s+"\nvalidation: "+v.inspect if value==nil
+ else
+ value = v.send(a)
values.push(value)
- labels.push(a.to_s.gsub("_","-") + ( class_value==nil ? "" : "("+class_value.to_s+")" ))
+ labels.push(a.to_s.gsub("_","-"))
end
+
end
titles << v.send(title_attribute).to_s
+ raise "no title for '"+title_attribute.to_s+"' in validation: "+v.to_yaml if titles[-1].to_s.size==0
data << values
end
@@ -197,7 +226,9 @@ module Reports
LOGGER.debug "bar plot labels: "+labels.inspect
LOGGER.debug "bar plot data: "+data.inspect
- RubyPlot::plot_bars('Bar plot', labels, data, out_file)
+ out_files.each do |out_file|
+ RubyPlot::plot_bars('Bar plot', labels, data, out_file)
+ end
end
@@ -261,43 +292,27 @@ module Reports
end
private
- def self.transform_roc_predictions(validation_set, class_value, add_single_folds=false)
-
+ def self.transform_roc_predictions(validation_set, class_value, add_label=true )
if (validation_set.size > 1)
-
- names = []; fp_rate = []; tp_rate = []; faint = []
- sum_roc_values = { :predicted_values => [], :actual_values => [], :confidence_values => []}
-
+ values = { :predicted_values => [], :actual_values => [], :confidence_values => []}
(0..validation_set.size-1).each do |i|
roc_values = validation_set.get(i).get_predictions.get_prediction_values(class_value)
- sum_roc_values[:predicted_values] += roc_values[:predicted_values]
- sum_roc_values[:confidence_values] += roc_values[:confidence_values]
- sum_roc_values[:actual_values] += roc_values[:actual_values]
- if add_single_folds
- begin
- tp_fp_rates = get_tp_fp_rates(roc_values)
- names << "fold "+i.to_s
- fp_rate << tp_fp_rates[:fp_rate]
- tp_rate << tp_fp_rates[:tp_rate]
- faint << true
- rescue
- LOGGER.warn "could not get ROC vals for fold "+i.to_s
- end
- end
+ values[:predicted_values] += roc_values[:predicted_values]
+ values[:confidence_values] += roc_values[:confidence_values]
+ values[:actual_values] += roc_values[:actual_values]
end
- tp_fp_rates = get_tp_fp_rates(sum_roc_values)
- names << nil # "all"
- fp_rate << tp_fp_rates[:fp_rate]
- tp_rate << tp_fp_rates[:tp_rate]
- faint << false
- return { :names => names, :fp_rate => fp_rate, :tp_rate => tp_rate, :faint => faint }
else
- roc_values = validation_set.validations[0].get_predictions.get_prediction_values(class_value)
- tp_fp_rates = get_tp_fp_rates(roc_values)
- return { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]] }
+ values = validation_set.validations[0].get_predictions.get_prediction_values(class_value)
end
+ tp_fp_rates = get_tp_fp_rates(values)
+ labels = []
+ tp_fp_rates[:youden].each do |point,confidence|
+ labels << ["confidence: "+confidence.to_nice_s, point[0], point[1]]
+ end if add_label
+ RubyPlot::LinePlotData.new(:name => "", :x_values => tp_fp_rates[:fp_rate], :y_values => tp_fp_rates[:tp_rate], :labels => labels)
end
+
def self.transform_confidence_predictions(validation_set, class_value, add_single_folds=false)
if (validation_set.size > 1)
@@ -333,20 +348,29 @@ module Reports
else
confidence_values = validation_set.validations[0].get_predictions.get_prediction_values(class_value)
pref_conf_rates = get_performance_confidence_rates(confidence_values, validation_set.unique_feature_type)
- return { :names => ["default"], :performance => [pref_conf_rates[:performance]], :confidence => [pref_conf_rates[:confidence]] }
+ return { :names => [""], :performance => [pref_conf_rates[:performance]], :confidence => [pref_conf_rates[:confidence]] }
end
end
- def self.demo_rock_plot
- roc_values = {:confidence_values => [0.1, 0.9, 0.5, 0.6, 0.6, 0.6],
- :predicted_values => [1, 0, 0, 1, 0, 1],
- :actual_values => [0, 1, 0, 0, 1, 1]}
+ def self.demo_roc_plot
+# roc_values = {:confidence_values => [0.1, 0.9, 0.5, 0.6, 0.6, 0.6],
+# :predicted_values => [1, 0, 0, 1, 0, 1],
+# :actual_values => [0, 1, 0, 0, 1, 1]}
+ roc_values = {:confidence_values => [0.9, 0.8, 0.7, 0.6, 0.5, 0.4],
+ :predicted_values => [1, 1, 1, 1, 1, 1],
+ :actual_values => [1, 0, 1, 0, 1, 0]}
tp_fp_rates = get_tp_fp_rates(roc_values)
- data = { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]] }
+ labels = []
+ tp_fp_rates[:youden].each do |point,confidence|
+ labels << ["confidence: "+confidence.to_s, point[0], point[1]]
+ end
+
+ plot_data = []
+ plot_data << RubyPlot::LinePlotData.new(:name => "testname", :x_values => tp_fp_rates[:fp_rate], :y_values => tp_fp_rates[:tp_rate], :labels => labels)
RubyPlot::plot_lines("/tmp/plot.png",
"ROC-Plot",
"False positive rate",
- "True Positive Rate", data[:names], data[:fp_rate], data[:tp_rate], data[:faint] )
+ "True Positive Rate", plot_data )
end
def self.get_performance_confidence_rates(roc_values, feature_type)
@@ -354,7 +378,7 @@ module Reports
c = roc_values[:confidence_values]
p = roc_values[:predicted_values]
a = roc_values[:actual_values]
- raise "no prediction values for roc-plot" if p.size==0
+ raise "no prediction values for confidence plot" if p.size==0
(0..p.size-2).each do |i|
((i+1)..p.size-1).each do |j|
@@ -462,21 +486,43 @@ module Reports
w = w.compress_sum(c2)
#puts tp_rate.inspect+"\n"+fp_rate.inspect+"\n"+w.inspect+"\n\n"
+ youden = []
+ (0..tp_rate.size-1).each do |i|
+ tpr = tp_rate[i]/tp_rate[-1].to_f
+ fpr = fp_rate[i]/fp_rate[-1].to_f
+ youden << tpr + (1 - fpr)
+ #puts youden[-1].to_s+" ("+tpr.to_s+" "+fpr.to_s+")"
+ end
+ max = youden.max
+ youden_hash = {}
+ (0..tp_rate.size-1).each do |i|
+ if youden[i]==max and i>0
+ youden_hash[i] = c2[i]
+ end
+ end
+ #puts youden.inspect+"\n"+youden_hash.inspect+"\n\n"
+
(0..tp_rate.size-1).each do |i|
tp_rate[i] = tp_rate[-1]>0 ? tp_rate[i]/tp_rate[-1].to_f*100 : 100
fp_rate[i] = fp_rate[-1]>0 ? fp_rate[i]/fp_rate[-1].to_f*100 : 100
end
#puts tp_rate.inspect+"\n"+fp_rate.inspect+"\n\n"
- return {:tp_rate => tp_rate,:fp_rate => fp_rate}
+ youden_coordinates_hash = {}
+ youden_hash.each do |i,c|
+ youden_coordinates_hash[[fp_rate[i],tp_rate[i]]] = c
+ end
+ #puts youden_coordinates_hash.inspect+"\n\n"
+
+ return {:tp_rate => tp_rate,:fp_rate => fp_rate,:youden => youden_coordinates_hash}
end
end
end
#require "rubygems"
#require "ruby-plot"
-#Reports::PlotFactory::demo_ranking_plot
-#Reports::PlotFactory::demo_rock_plot
+##Reports::PlotFactory::demo_ranking_plot
+#Reports::PlotFactory::demo_roc_plot
#a = [1, 0, 1, 2, 3, 0, 2]
#puts a.compress_sum([100, 90, 70, 70, 30, 10, 0]).inspect