summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2011-06-30 23:49:22 +0200
committermguetlein <martin.guetlein@gmail.com>2011-06-30 23:49:22 +0200
commit95de1906994672c12d4d4b6fd1f5f09695fb1c60 (patch)
tree93ac2ce86b72c265d942d2b7f12a38939d5badb2
parent0b0d101c77c2f40769036fb1455e8143f4dace9c (diff)
adding confidence-weighted regression stats
-rwxr-xr-xlib/predictions.rb43
-rwxr-xr-xlib/validation_db.rb5
-rwxr-xr-xreport/report_factory.rb8
3 files changed, 50 insertions, 6 deletions
diff --git a/lib/predictions.rb b/lib/predictions.rb
index 2409375..4e34c90 100755
--- a/lib/predictions.rb
+++ b/lib/predictions.rb
@@ -78,6 +78,8 @@ module Lib
@num_predicted = 0
@num_unpredicted = 0
+ @mean_confidence = 0
+
case @feature_type
when "classification"
@@ -111,6 +113,9 @@ module Lib
@sum_multiply = 0
@sum_squares_actual = 0
@sum_squares_predicted = 0
+
+ @sum_weighted_abs_error = 0
+ @sum_weighted_squared_error = 0
end
end
@@ -127,6 +132,7 @@ module Lib
@num_predicted += 1
@conf_provided |= confidence_value!=nil
+ @mean_confidence = (confidence_value + @mean_confidence*(@num_predicted-1)) / @num_predicted.to_f if @conf_provided
case @feature_type
when "classification"
@@ -140,7 +146,9 @@ module Lib
delta = predicted_value - actual_value
@sum_error += delta
@sum_abs_error += delta.abs
+ @sum_weighted_abs_error += delta.abs*confidence_value
@sum_squared_error += delta**2
+ @sum_weighted_squared_error += (delta**2)*confidence_value
old_prediction_mean = @prediction_mean
@prediction_mean = (@prediction_mean * (@num_predicted-1) + predicted_value) / @num_predicted.to_f
@@ -466,11 +474,23 @@ module Lib
Math.sqrt(@sum_squared_error / (@num_with_actual_value - @num_unpredicted).to_f)
end
+ def weighted_root_mean_squared_error
+ return 0 unless confidence_values_available?
+ return 0 if (@num_with_actual_value - @num_unpredicted)==0
+ Math.sqrt(@sum_weighted_squared_error / ((@num_with_actual_value - @num_unpredicted).to_f * @mean_confidence ))
+ end
+
def mean_absolute_error
return 0 if (@num_with_actual_value - @num_unpredicted)==0
@sum_abs_error / (@num_with_actual_value - @num_unpredicted).to_f
end
+ def weighted_mean_absolute_error
+ return 0 unless confidence_values_available?
+ return 0 if (@num_with_actual_value - @num_unpredicted)==0
+ @sum_weighted_abs_error / ((@num_with_actual_value - @num_unpredicted).to_f * @mean_confidence )
+ end
+
def sum_squared_error
return @sum_squared_error
end
@@ -486,6 +506,14 @@ module Lib
( r_2.infinite? || r_2.nan? ) ? 0 : r_2
end
+ def weighted_r_square
+ return 0 unless confidence_values_available?
+ ss_tot = weighted_total_sum_of_squares
+ return 0 if ss_tot==0
+ r_2 = 1 - weighted_residual_sum_of_squares / ss_tot
+ ( r_2.infinite? || r_2.nan? ) ? 0 : r_2
+ end
+
def sample_correlation_coefficient
# formula see http://en.wikipedia.org/wiki/Correlation_and_dependence#Pearson.27s_product-moment_coefficient
scc = ( @num_predicted * @sum_multiply - @sum_actual * @sum_predicted ) /
@@ -498,7 +526,16 @@ module Lib
#return @variance_actual * ( @num_predicted - 1 )
sum = 0
@predicted_values.size.times do |i|
- sum += (@actual_values[i]-@actual_mean)**2 if @predicted_values[i]!=nil
+ sum += (@actual_values[i]-@actual_mean)**2 if @actual_values[i]!=nil and @predicted_values[i]!=nil
+ end
+ sum
+ end
+
+ def weighted_total_sum_of_squares
+ return 0 unless confidence_values_available?
+ sum = 0
+ @predicted_values.size.times do |i|
+ sum += ((@actual_values[i]-@actual_mean)**2)*@confidence_values[i] if @actual_values[i]!=nil and @predicted_values[i]!=nil
end
sum
end
@@ -507,6 +544,10 @@ module Lib
sum_squared_error
end
+ def weighted_residual_sum_of_squares
+ @sum_weighted_squared_error
+ end
+
def target_variance_predicted
return @variance_predicted
end
diff --git a/lib/validation_db.rb b/lib/validation_db.rb
index c38b82e..fb7a8b5 100755
--- a/lib/validation_db.rb
+++ b/lib/validation_db.rb
@@ -35,8 +35,9 @@ module Validation
VAL_CLASS_PROPS = VAL_CLASS_PROPS_SINGLE + VAL_CLASS_PROPS_PER_CLASS
# :regression_statistics
- VAL_REGR_PROPS = [ :root_mean_squared_error, :mean_absolute_error, :r_square,
- :target_variance_actual, :target_variance_predicted, :sum_squared_error, :sample_correlation_coefficient ]
+ VAL_REGR_PROPS = [ :root_mean_squared_error, :mean_absolute_error, :r_square, :weighted_r_square,
+ :target_variance_actual, :target_variance_predicted, :sum_squared_error, :sample_correlation_coefficient,
+ :weighted_mean_absolute_error, :weighted_root_mean_squared_error ]
CROSS_VAL_PROPS = [:dataset_uri, :num_folds, :stratified, :random_seed]
CROSS_VAL_PROPS_REDUNDANT = [:crossvalidation_uri, :algorithm_uri, :date] + CROSS_VAL_PROPS
diff --git a/report/report_factory.rb b/report/report_factory.rb
index d16066e..fcd9bab 100755
--- a/report/report_factory.rb
+++ b/report/report_factory.rb
@@ -7,10 +7,12 @@ VAL_ATTR_CV = [ :algorithm_uri, :dataset_uri, :num_folds, :crossvalidation_fold
# selected attributes of interest when performing classification
VAL_ATTR_CLASS = [ :num_instances, :num_unpredicted, :accuracy, :weighted_accuracy, :weighted_area_under_roc,
:area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ]
-VAL_ATTR_REGR = [ :num_instances, :num_unpredicted, :root_mean_squared_error, :mean_absolute_error, :r_square ]
+VAL_ATTR_REGR = [ :num_instances, :num_unpredicted, :root_mean_squared_error,
+ :weighted_root_mean_squared_error, :mean_absolute_error, :weighted_mean_absolute_error, :r_square, :weighted_r_square ]
-VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :weighted_area_under_roc,
- :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ]
+#VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :weighted_area_under_roc,
+# :area_under_roc, :f_measure, :true_positive_rate, :true_negative_rate ]
+VAL_ATTR_BAR_PLOT_CLASS = [ :accuracy, :f_measure, :true_positive_rate, :true_negative_rate ]
VAL_ATTR_BAR_PLOT_REGR = [ :root_mean_squared_error, :mean_absolute_error, :r_square ]