summaryrefslogtreecommitdiff
path: root/report/plot_factory.rb
diff options
context:
space:
mode:
Diffstat (limited to 'report/plot_factory.rb')
-rw-r--r--report/plot_factory.rb166
1 files changed, 150 insertions, 16 deletions
diff --git a/report/plot_factory.rb b/report/plot_factory.rb
index 43c45fc..a4e415a 100644
--- a/report/plot_factory.rb
+++ b/report/plot_factory.rb
@@ -77,8 +77,9 @@ module Reports
x << x_i
y << y_i
end
-
- RubyPlot::plot_points(out_file, "Regression plot", "Predicted values", "Actual values", names, x, y )
+
+ raise "no predictions performed" if x.size==0 || x[0].size==0
+ RubyPlot::regression_point_plot(out_file, "Regression plot", "Predicted values", "Actual values", names, x, y )
end
@@ -101,7 +102,7 @@ module Reports
tp_rates = []
attribute_values.each do |value|
begin
- data = transform_predictions(validation_set.filter({split_set_attribute => value}), class_value, false)
+ data = transform_roc_predictions(validation_set.filter({split_set_attribute => value}), class_value, false)
names << value.to_s
fp_rates << data[:fp_rate][0]
tp_rates << data[:tp_rate][0]
@@ -111,11 +112,50 @@ module Reports
end
RubyPlot::plot_lines(out_file, "ROC-Plot", "False positive rate", "True Positive Rate", names, fp_rates, tp_rates )
else
- data = transform_predictions(validation_set, class_value, show_single_curves)
+ data = transform_roc_predictions(validation_set, class_value, show_single_curves)
RubyPlot::plot_lines(out_file, "ROC-Plot", "False positive rate", "True Positive Rate", data[:names], data[:fp_rate], data[:tp_rate], data[:faint] )
end
end
+
+ def self.create_confidence_plot( out_file, validation_set, class_value, split_set_attribute=nil, show_single_curves=false )
+
+ LOGGER.debug "creating confidence plot for '"+validation_set.size.to_s+"' validations, out-file:"+out_file.to_s
+
+ if split_set_attribute
+ attribute_values = validation_set.get_values(split_set_attribute)
+ names = []
+ confidence = []
+ performance = []
+ attribute_values.each do |value|
+ begin
+ data = transform_confidence_predictions(validation_set.filter({split_set_attribute => value}), class_value, false)
+ names << value.to_s
+ confidence << data[:confidence][0]
+ performance << data[:performance][0]
+ rescue
+ LOGGER.warn "could not create confidence plot for "+value.to_s
+ end
+ end
+ #RubyPlot::plot_lines(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", names, fp_rates, tp_rates )
+ case validation_set.unique_feature_type
+ when "classification"
+ RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", names, confidence, performance)
+ when "regression"
+ RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", names, confidence, performance, true)
+ end
+ else
+ data = transform_confidence_predictions(validation_set, class_value, show_single_curves)
+ case validation_set.unique_feature_type
+ when "classification"
+ RubyPlot::accuracy_confidence_plot(out_file, "Percent Correct vs Confidence Plot", "Confidence", "Percent Correct", data[:names], data[:confidence], data[:performance])
+ when "regression"
+ RubyPlot::accuracy_confidence_plot(out_file, "RMSE vs Confidence Plot", "Confidence", "RMSE", data[:names], data[:confidence], data[:performance], true)
+ end
+ end
+ end
+
+
def self.create_bar_plot( out_file, validation_set, title_attribute, value_attributes )
LOGGER.debug "creating bar plot, out-file:"+out_file.to_s
@@ -127,7 +167,7 @@ module Reports
validation_set.validations.each do |v|
values = []
value_attributes.each do |a|
- validation_set.get_domain_for_attr(a).each do |class_value|
+ validation_set.get_accept_values_for_attr(a).each do |class_value|
value = v.send(a)
if value.is_a?(Hash)
if class_value==nil
@@ -161,7 +201,7 @@ module Reports
end
- def self.create_ranking_plot( svg_out_file, validation_set, compare_attribute, equal_attribute, rank_attribute, class_value=nil )
+ def self.create_ranking_plot( out_file, validation_set, compare_attribute, equal_attribute, rank_attribute, class_value=nil )
#compute ranks
#puts "rank attibute is "+rank_attribute.to_s
@@ -184,14 +224,14 @@ module Reports
ranks,
nil, #0.1,
validation_set.num_different_values(equal_attribute),
- svg_out_file)
+ out_file)
end
protected
- def self.plot_ranking( title, comparables_array, ranks_array, confidence = nil, numdatasets = nil, svg_out_file = nil )
+ def self.plot_ranking( title, comparables_array, ranks_array, confidence = nil, numdatasets = nil, out_file = nil )
(confidence and numdatasets) ? conf = "-q "+confidence.to_s+" -k "+numdatasets.to_s : conf = ""
- svg_out_file ? show = "-o" : show = ""
+ out_file ? show = "-o" : show = ""
(title and title.length > 0) ? tit = '-t "'+title+'"' : tit = ""
#title = "-t \""+ranking_value_prop+"-Ranking ("+comparables.size.to_s+" "+comparable_prop+"s, "+num_groups.to_s+" "+ranking_group_prop+"s, p < "+p.to_s+")\" "
@@ -208,12 +248,12 @@ module Reports
end
raise "rank plot failed" unless $?==0
- if svg_out_file
- f = File.new(svg_out_file, "w")
+ if out_file
+ f = File.new(out_file, "w")
f.puts res
end
- svg_out_file ? svg_out_file : res
+ out_file ? out_file : res
end
def self.demo_ranking_plot
@@ -221,7 +261,7 @@ module Reports
end
private
- def self.transform_predictions(validation_set, class_value, add_single_folds=false)
+ def self.transform_roc_predictions(validation_set, class_value, add_single_folds=false)
if (validation_set.size > 1)
@@ -229,7 +269,7 @@ module Reports
sum_roc_values = { :predicted_values => [], :actual_values => [], :confidence_values => []}
(0..validation_set.size-1).each do |i|
- roc_values = validation_set.get(i).get_predictions.get_roc_values(class_value)
+ roc_values = validation_set.get(i).get_predictions.get_prediction_values(class_value)
sum_roc_values[:predicted_values] += roc_values[:predicted_values]
sum_roc_values[:confidence_values] += roc_values[:confidence_values]
sum_roc_values[:actual_values] += roc_values[:actual_values]
@@ -252,24 +292,118 @@ module Reports
faint << false
return { :names => names, :fp_rate => fp_rate, :tp_rate => tp_rate, :faint => faint }
else
- roc_values = validation_set.validations[0].get_predictions.get_roc_values(class_value)
+ roc_values = validation_set.validations[0].get_predictions.get_prediction_values(class_value)
tp_fp_rates = get_tp_fp_rates(roc_values)
return { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]] }
end
end
+ def self.transform_confidence_predictions(validation_set, class_value, add_single_folds=false)
+
+ if (validation_set.size > 1)
+
+ names = []; performance = []; confidence = []; faint = []
+ sum_confidence_values = { :predicted_values => [], :actual_values => [], :confidence_values => []}
+
+ (0..validation_set.size-1).each do |i|
+ confidence_values = validation_set.get(i).get_predictions.get_prediction_values(class_value)
+ sum_confidence_values[:predicted_values] += confidence_values[:predicted_values]
+ sum_confidence_values[:confidence_values] += confidence_values[:confidence_values]
+ sum_confidence_values[:actual_values] += confidence_values[:actual_values]
+
+ if add_single_folds
+ begin
+ pref_conf_rates = get_performance_confidence_rates(confidence_values)
+ names << "fold "+i.to_s
+ performance << pref_conf_rates[:performance]
+ confidence << pref_conf_rates[:confidence]
+ faint << true
+ rescue
+ LOGGER.warn "could not get confidence vals for fold "+i.to_s
+ end
+ end
+ end
+ pref_conf_rates = get_performance_confidence_rates(sum_confidence_values, validation_set.unique_feature_type)
+ names << nil # "all"
+ performance << pref_conf_rates[:performance]
+ confidence << pref_conf_rates[:confidence]
+ faint << false
+ return { :names => names, :performance => performance, :confidence => confidence, :faint => faint }
+
+ else
+ confidence_values = validation_set.validations[0].get_predictions.get_prediction_values(class_value)
+ pref_conf_rates = get_performance_confidence_rates(confidence_values, validation_set.unique_feature_type)
+ return { :names => ["default"], :performance => [pref_conf_rates[:performance]], :confidence => [pref_conf_rates[:confidence]] }
+ end
+ end
+
def self.demo_rock_plot
roc_values = {:confidence_values => [0.1, 0.9, 0.5, 0.6, 0.6, 0.6],
:predicted_values => [1, 0, 0, 1, 0, 1],
:actual_values => [0, 1, 0, 0, 1, 1]}
tp_fp_rates = get_tp_fp_rates(roc_values)
data = { :names => ["default"], :fp_rate => [tp_fp_rates[:fp_rate]], :tp_rate => [tp_fp_rates[:tp_rate]] }
- RubyPlot::plot_lines("/tmp/plot.svg",
+ RubyPlot::plot_lines("/tmp/plot.png",
"ROC-Plot",
"False positive rate",
"True Positive Rate", data[:names], data[:fp_rate], data[:tp_rate], data[:faint] )
end
+ def self.get_performance_confidence_rates(roc_values, feature_type)
+
+ c = roc_values[:confidence_values]
+ p = roc_values[:predicted_values]
+ a = roc_values[:actual_values]
+ raise "no prediction values for roc-plot" if p.size==0
+
+ (0..p.size-2).each do |i|
+ ((i+1)..p.size-1).each do |j|
+ if c[i]<c[j]
+ c.swap!(i,j)
+ a.swap!(i,j)
+ p.swap!(i,j)
+ end
+ end
+ end
+ #puts c.inspect+"\n"+a.inspect+"\n"+p.inspect+"\n\n"
+
+ perf = []
+ conf = []
+
+ case feature_type
+ when "classification"
+ count = 0
+ correct = 0
+ (0..p.size-1).each do |i|
+ count += 1
+ correct += 1 if p[i]==a[i]
+ if i>0 && (c[i]>=conf[-1]-0.00001)
+ perf.pop
+ conf.pop
+ end
+ perf << correct/count.to_f * 100
+ conf << c[i]
+ end
+ when "regression"
+ count = 0
+ sum_squared_error = 0
+ (0..p.size-1).each do |i|
+ count += 1
+ sum_squared_error += (p[i]-a[i])**2
+ if i>0 && (c[i]>=conf[-1]-0.00001)
+ perf.pop
+ conf.pop
+ end
+ perf << Math.sqrt(sum_squared_error/count.to_f)
+ conf << c[i]
+ end
+ end
+ #puts perf.inspect
+
+ return {:performance => perf,:confidence => conf}
+ end
+
+
def self.get_tp_fp_rates(roc_values)
c = roc_values[:confidence_values]