summaryrefslogtreecommitdiff
path: root/lib/predictions.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/predictions.rb')
-rwxr-xr-xlib/predictions.rb121
1 files changed, 93 insertions, 28 deletions
diff --git a/lib/predictions.rb b/lib/predictions.rb
index 420790e..b71359d 100755
--- a/lib/predictions.rb
+++ b/lib/predictions.rb
@@ -36,7 +36,7 @@ module Lib
#puts "actual: "+actual_values.inspect
#puts "confidence: "+confidence_values.inspect
- raise "unknown feature_type: "+@feature_type.to_s unless
+ raise "unknown feature_type: '"+@feature_type.to_s+"'" unless
@feature_type=="classification" || @feature_type=="regression"
raise "no predictions" if @predicted_values.size == 0
num_info = "predicted:"+@predicted_values.size.to_s+
@@ -45,16 +45,6 @@ module Lib
raise "illegal num confidence values "+num_info if @confidence_values.size != @predicted_values.size
@confidence_values.each{ |c| raise "illegal confidence value: '"+c.to_s+"'" unless c==nil or (c.is_a?(Numeric) and c>=0 and c<=1) }
- ## check if there is more than one different conf value
- ## DEPRECATED? not sure anymore what this was about,
- ## I am pretty sure this was for r-plot of roc curves
- ## roc curvers are now plotted manually
- #conf_val_tmp = {}
- #@confidence_values.each{ |c| conf_val_tmp[c] = nil }
- #if conf_val_tmp.keys.size<2
- # LOGGER.warn("prediction w/o confidence values");
- # @confidence_values=nil
- #end
case @feature_type
when "classification"
@@ -65,27 +55,31 @@ module Lib
values.each{ |v| raise "illegal "+s+" classification-value ("+v.to_s+"),"+
"has to be either nil or index of predicted-values" if v!=nil and (!v.is_a?(Numeric) or v<0 or v>@num_classes)}
end
- when "regresssion"
+ when "regression"
raise "accept_values != nil while performing regression" if @accept_values
{ "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values|
values.each{ |v| raise "illegal "+s+" regression-value ("+v.to_s+"),"+
- "has to be either nil or number" unless v==nil or v.is_a?(Numeric)}
+ " has to be either nil or number (not NaN, not Infinite)" unless v==nil or (v.is_a?(Numeric) and !v.nan? and v.finite?)}
end
end
init_stats()
(0..@predicted_values.size-1).each do |i|
- update_stats( @predicted_values[i], @actual_values[i], (@confidence_values!=nil)?@confidence_values[i]:nil )
+ update_stats( @predicted_values[i], @actual_values[i], @confidence_values[i] )
end
end
def init_stats
+ @conf_provided = false
+
@num_no_actual_value = 0
@num_with_actual_value = 0
@num_predicted = 0
@num_unpredicted = 0
+ @mean_confidence = 0
+
case @feature_type
when "classification"
@@ -119,6 +113,9 @@ module Lib
@sum_multiply = 0
@sum_squares_actual = 0
@sum_squares_predicted = 0
+
+ @sum_weighted_abs_error = 0
+ @sum_weighted_squared_error = 0
end
end
@@ -134,6 +131,9 @@ module Lib
else
@num_predicted += 1
+ @conf_provided |= confidence_value!=nil
+ @mean_confidence = (confidence_value + @mean_confidence*(@num_predicted-1)) / @num_predicted.to_f if @conf_provided
+
case @feature_type
when "classification"
@confusion_matrix[actual_value][predicted_value] += 1
@@ -146,7 +146,9 @@ module Lib
delta = predicted_value - actual_value
@sum_error += delta
@sum_abs_error += delta.abs
+ @sum_weighted_abs_error += delta.abs*confidence_value if @conf_provided
@sum_squared_error += delta**2
+ @sum_weighted_squared_error += (delta**2)*confidence_value if @conf_provided
old_prediction_mean = @prediction_mean
@prediction_mean = (@prediction_mean * (@num_predicted-1) + predicted_value) / @num_predicted.to_f
@@ -170,8 +172,8 @@ module Lib
def percent_correct
raise "no classification" unless @feature_type=="classification"
- return 0 if @num_with_actual_value==0
- return 100 * @num_correct / (@num_with_actual_value - @num_unpredicted).to_f
+ pct = 100 * @num_correct / (@num_with_actual_value - @num_unpredicted).to_f
+ pct.nan? ? 0 : pct
end
def percent_incorrect
@@ -181,10 +183,12 @@ module Lib
end
def accuracy
- return percent_correct / 100.0
+ acc = percent_correct / 100.0
+ acc.nan? ? 0 : acc
end
def weighted_accuracy
+ return 0 unless confidence_values_available?
raise "no classification" unless @feature_type=="classification"
total = 0
correct = 0
@@ -250,10 +254,11 @@ module Lib
return res
end
+ # does only take the instances that are classified as <class-index> into account
def area_under_roc(class_index=nil)
return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if
class_index==nil
- return 0.0 if @confidence_values==nil
+ return 0 unless confidence_values_available?
LOGGER.warn("TODO: implement approx computiation of AUC,"+
"so far Wilcoxon-Man-Whitney is used (exponential)") if
@@ -427,8 +432,13 @@ module Lib
return incorrect
end
+ # Note:
+ # * (un-weighted) area under roc is computed with all __predicted__ isntances for a certain class
+ # * weighted weights each auc with the number of __acutal__ instances
+ # its like that, because its like that in weka
def weighted_area_under_roc
- return weighted_measure( :area_under_roc )
+ w_auc = weighted_measure( :area_under_roc )
+ w_auc.nan? ? 0 : w_auc
end
def weighted_f_measure
@@ -436,6 +446,7 @@ module Lib
end
private
+ # the <measure> is weighted with the number of instances for each actual class value
def weighted_measure( measure )
sum_instances = 0
@@ -460,31 +471,85 @@ module Lib
public
def root_mean_squared_error
return 0 if (@num_with_actual_value - @num_unpredicted)==0
- Math.sqrt(@sum_squared_error / (@num_with_actual_value - @num_unpredicted).to_f)
+ mse = @sum_squared_error / (@num_with_actual_value - @num_unpredicted).to_f
+ return 0 if mse.nan?
+ Math.sqrt(mse)
end
+ def weighted_root_mean_squared_error
+ return 0 unless confidence_values_available?
+ return 0 if (@num_with_actual_value - @num_unpredicted)==0
+ Math.sqrt(@sum_weighted_squared_error / ((@num_with_actual_value - @num_unpredicted).to_f * @mean_confidence ))
+ end
+
def mean_absolute_error
return 0 if (@num_with_actual_value - @num_unpredicted)==0
@sum_abs_error / (@num_with_actual_value - @num_unpredicted).to_f
end
+ def weighted_mean_absolute_error
+ return 0 unless confidence_values_available?
+ return 0 if (@num_with_actual_value - @num_unpredicted)==0
+ @sum_weighted_abs_error / ((@num_with_actual_value - @num_unpredicted).to_f * @mean_confidence )
+ end
+
def sum_squared_error
return @sum_squared_error
end
def r_square
- return sample_correlation_coefficient ** 2
+ #return sample_correlation_coefficient ** 2
+
+ # see http://en.wikipedia.org/wiki/Coefficient_of_determination#Definitions
+ # see http://web.maths.unsw.edu.au/~adelle/Garvan/Assays/GoodnessOfFit.html
+ ss_tot = total_sum_of_squares
+ return 0 if ss_tot==0
+ r_2 = 1 - residual_sum_of_squares / ss_tot
+ ( r_2.infinite? || r_2.nan? ) ? 0 : r_2
+ end
+
+ def weighted_r_square
+ return 0 unless confidence_values_available?
+ ss_tot = weighted_total_sum_of_squares
+ return 0 if ss_tot==0
+ r_2 = 1 - weighted_residual_sum_of_squares / ss_tot
+ ( r_2.infinite? || r_2.nan? ) ? 0 : r_2
end
def sample_correlation_coefficient
- # formula see http://en.wikipedia.org/wiki/Correlation_and_dependence#Pearson.27s_product-moment_coefficient
- return ( @num_predicted * @sum_multiply - @sum_actual * @sum_predicted ) /
- ( Math.sqrt( [0, @num_predicted * @sum_squares_actual - @sum_actual**2].max ) *
- Math.sqrt( [0, @num_predicted * @sum_squares_predicted - @sum_predicted**2].max ) )
+ begin
+ # formula see http://en.wikipedia.org/wiki/Correlation_and_dependence#Pearson.27s_product-moment_coefficient
+ scc = ( @num_predicted * @sum_multiply - @sum_actual * @sum_predicted ) /
+ ( Math.sqrt( @num_predicted * @sum_squares_actual - @sum_actual**2 ) *
+ Math.sqrt( @num_predicted * @sum_squares_predicted - @sum_predicted**2 ) )
+ ( scc.infinite? || scc.nan? ) ? 0 : scc
+ rescue; 0; end
end
def total_sum_of_squares
- return @variance_actual * ( @num_predicted - 1 )
+ #return @variance_actual * ( @num_predicted - 1 )
+ sum = 0
+ @predicted_values.size.times do |i|
+ sum += (@actual_values[i]-@actual_mean)**2 if @actual_values[i]!=nil and @predicted_values[i]!=nil
+ end
+ sum
+ end
+
+ def weighted_total_sum_of_squares
+ return 0 unless confidence_values_available?
+ sum = 0
+ @predicted_values.size.times do |i|
+ sum += ((@actual_values[i]-@actual_mean)**2)*@confidence_values[i] if @actual_values[i]!=nil and @predicted_values[i]!=nil
+ end
+ sum
+ end
+
+ def residual_sum_of_squares
+ sum_squared_error
+ end
+
+ def weighted_residual_sum_of_squares
+ @sum_weighted_squared_error
end
def target_variance_predicted
@@ -500,7 +565,7 @@ module Lib
def get_prediction_values(class_value)
#puts "get_roc_values for class_value: "+class_value.to_s
- raise "no confidence values" if @confidence_values==nil
+ raise "no confidence values" unless confidence_values_available?
#raise "no class-value specified" if class_value==nil
class_index = @accept_values.index(class_value) if class_value!=nil
@@ -571,7 +636,7 @@ module Lib
end
def confidence_values_available?
- return @confidence_values!=nil
+ @conf_provided
end
###################################################################################################################