From 95de1906994672c12d4d4b6fd1f5f09695fb1c60 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 30 Jun 2011 23:49:22 +0200 Subject: adding confidence-weighted regression stats --- lib/predictions.rb | 43 ++++++++++++++++++++++++++++++++++++++++++- lib/validation_db.rb | 5 +++-- report/report_factory.rb | 8 +++++--- 3 files changed, 50 insertions(+), 6 deletions(-) diff --git a/lib/predictions.rb b/lib/predictions.rb index 2409375..4e34c90 100755 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -78,6 +78,8 @@ module Lib @num_predicted = 0 @num_unpredicted = 0 + @mean_confidence = 0 + case @feature_type when "classification" @@ -111,6 +113,9 @@ module Lib @sum_multiply = 0 @sum_squares_actual = 0 @sum_squares_predicted = 0 + + @sum_weighted_abs_error = 0 + @sum_weighted_squared_error = 0 end end @@ -127,6 +132,7 @@ module Lib @num_predicted += 1 @conf_provided |= confidence_value!=nil + @mean_confidence = (confidence_value + @mean_confidence*(@num_predicted-1)) / @num_predicted.to_f if @conf_provided case @feature_type when "classification" @@ -140,7 +146,9 @@ module Lib delta = predicted_value - actual_value @sum_error += delta @sum_abs_error += delta.abs + @sum_weighted_abs_error += delta.abs*confidence_value @sum_squared_error += delta**2 + @sum_weighted_squared_error += (delta**2)*confidence_value old_prediction_mean = @prediction_mean @prediction_mean = (@prediction_mean * (@num_predicted-1) + predicted_value) / @num_predicted.to_f @@ -466,11 +474,23 @@ module Lib Math.sqrt(@sum_squared_error / (@num_with_actual_value - @num_unpredicted).to_f) end + def weighted_root_mean_squared_error + return 0 unless confidence_values_available? + return 0 if (@num_with_actual_value - @num_unpredicted)==0 + Math.sqrt(@sum_weighted_squared_error / ((@num_with_actual_value - @num_unpredicted).to_f * @mean_confidence )) + end + def mean_absolute_error return 0 if (@num_with_actual_value - @num_unpredicted)==0 @sum_abs_error / (@num_with_actual_value - @num_unpredicted).to_f end + def weighted_mean_absolute_error + return 0 unless confidence_values_available? + return 0 if (@num_with_actual_value - @num_unpredicted)==0 + @sum_weighted_abs_error / ((@num_with_actual_value - @num_unpredicted).to_f * @mean_confidence ) + end + def sum_squared_error return @sum_squared_error end @@ -486,6 +506,14 @@ module Lib ( r_2.infinite? || r_2.nan? ) ? 0 : r_2 end + def weighted_r_square + return 0 unless confidence_values_available? + ss_tot = weighted_total_sum_of_squares + return 0 if ss_tot==0 + r_2 = 1 - weighted_residual_sum_of_squares / ss_tot + ( r_2.infinite? || r_2.nan? ) ? 0 : r_2 + end + def sample_correlation_coefficient # formula see http://en.wikipedia.org/wiki/Correlation_and_dependence#Pearson.27s_product-moment_coefficient scc = ( @num_predicted * @sum_multiply - @sum_actual * @sum_predicted ) / @@ -498,7 +526,16 @@ module Lib #return @variance_actual * ( @num_predicted - 1 ) sum = 0 @predicted_values.size.times do |i| - sum += (@actual_values[i]-@actual_mean)**2 if @predicted_values[i]!=nil + sum += (@actual_values[i]-@actual_mean)**2 if @actual_values[i]!=nil and @predicted_values[i]!=nil + end + sum + end + + def weighted_total_sum_of_squares + return 0 unless confidence_values_available? + sum = 0 + @predicted_values.size.times do |i| + sum += ((@actual_values[i]-@actual_mean)**2)*@confidence_values[i] if @actual_values[i]!=nil and @predicted_values[i]!=nil end sum end @@ -507,6 +544,10 @@ module Lib sum_squared_error end + def weighted_residual_sum_of_squares + @sum_weighted_squared_error + end + def target_variance_predicted return @variance_predicted end diff --git a/lib/validation_db.rb b/lib/validation_db.rb index c38b82e..fb7a8b5 100755 --- a/lib/validation_db.rb +++ b/lib/validation_db.rb @@ -35,8 +35,9 @@ module Validation VAL_CLASS_PROPS = VAL_CLASS_PROPS_SINGLE + VAL_CLASS_PROPS_PER_CLASS # :regression_statistics - VAL_REGR_PROPS = [ :root_mean_squared_error, :mean_absolute_error, :r_square, - :target_variance_actual, :target_variance_predicted, :sum_squared_error, :sample_correlation_coefficient ] + VAL_REGR_PROPS = [ :root_mean_squared_error, :mean_absolute_error, :r_square, :weighted_r_square, + :target_variance_actual, :target_variance_predicted, :sum_squared_error, :sample_correlation_coefficient, + :weighted_mean_absolute_error, :weighted_root_mean_squared_error ] CROSS_VAL_PROPS = [:dataset_uri, :num_folds, :stratified, :random_seed] CROSS_VAL_PROPS_REDUNDANT = [:crossvalidation_uri, :algorithm_uri, :date] + CROSS_VAL_PROPS diff --git a/report/report_factory.rb b/report/report_factory.rb index d16066e..fcd9bab 100755 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -7,10 +7,12 @@ VAL_ATTR_CV = [ :algorithm_uri, :dataset_uri, :num_folds, :crossvalidation_fold # selected attributes of interest when performing classification VAL_ATTR_CLASS = [ :num_instances, :num_unpredicted, :accuracy, :weighted_accuracy, :weighted_area_under_roc, :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ] -VAL_ATTR_REGR = [ :num_instances, :num_unpredicted, :root_mean_squared_error, :mean_absolute_error, :r_square ] +VAL_ATTR_REGR = [ :num_instances, :num_unpredicted, :root_mean_squared_error, + :weighted_root_mean_squared_error, :mean_absolute_error, :weighted_mean_absolute_error, :r_square, :weighted_r_square ] -VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :weighted_area_under_roc, - :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ] +#VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :weighted_area_under_roc, +# :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ] +VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :f_measure, :true_positive_rate, :true_negative_rate ] VAL_ATTR_BAR_PLOT_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square ] -- cgit v1.2.3