diff options
Diffstat (limited to 'lib/predictions.rb')
-rwxr-xr-x | lib/predictions.rb | 121 |
1 files changed, 93 insertions, 28 deletions
diff --git a/lib/predictions.rb b/lib/predictions.rb index 420790e..b71359d 100755 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -36,7 +36,7 @@ module Lib #puts "actual: "+actual_values.inspect #puts "confidence: "+confidence_values.inspect - raise "unknown feature_type: "+@feature_type.to_s unless + raise "unknown feature_type: '"+@feature_type.to_s+"'" unless @feature_type=="classification" || @feature_type=="regression" raise "no predictions" if @predicted_values.size == 0 num_info = "predicted:"+@predicted_values.size.to_s+ @@ -45,16 +45,6 @@ module Lib raise "illegal num confidence values "+num_info if @confidence_values.size != @predicted_values.size @confidence_values.each{ |c| raise "illegal confidence value: '"+c.to_s+"'" unless c==nil or (c.is_a?(Numeric) and c>=0 and c<=1) } - ## check if there is more than one different conf value - ## DEPRECATED? not sure anymore what this was about, - ## I am pretty sure this was for r-plot of roc curves - ## roc curvers are now plotted manually - #conf_val_tmp = {} - #@confidence_values.each{ |c| conf_val_tmp[c] = nil } - #if conf_val_tmp.keys.size<2 - # LOGGER.warn("prediction w/o confidence values"); - # @confidence_values=nil - #end case @feature_type when "classification" @@ -65,27 +55,31 @@ module Lib values.each{ |v| raise "illegal "+s+" classification-value ("+v.to_s+"),"+ "has to be either nil or index of predicted-values" if v!=nil and (!v.is_a?(Numeric) or v<0 or v>@num_classes)} end - when "regresssion" + when "regression" raise "accept_values != nil while performing regression" if @accept_values { "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values| values.each{ |v| raise "illegal "+s+" regression-value ("+v.to_s+"),"+ - "has to be either nil or number" unless v==nil or v.is_a?(Numeric)} + " has to be either nil or number (not NaN, not Infinite)" unless v==nil or (v.is_a?(Numeric) and !v.nan? and v.finite?)} end end init_stats() (0..@predicted_values.size-1).each do |i| - update_stats( @predicted_values[i], @actual_values[i], (@confidence_values!=nil)?@confidence_values[i]:nil ) + update_stats( @predicted_values[i], @actual_values[i], @confidence_values[i] ) end end def init_stats + @conf_provided = false + @num_no_actual_value = 0 @num_with_actual_value = 0 @num_predicted = 0 @num_unpredicted = 0 + @mean_confidence = 0 + case @feature_type when "classification" @@ -119,6 +113,9 @@ module Lib @sum_multiply = 0 @sum_squares_actual = 0 @sum_squares_predicted = 0 + + @sum_weighted_abs_error = 0 + @sum_weighted_squared_error = 0 end end @@ -134,6 +131,9 @@ module Lib else @num_predicted += 1 + @conf_provided |= confidence_value!=nil + @mean_confidence = (confidence_value + @mean_confidence*(@num_predicted-1)) / @num_predicted.to_f if @conf_provided + case @feature_type when "classification" @confusion_matrix[actual_value][predicted_value] += 1 @@ -146,7 +146,9 @@ module Lib delta = predicted_value - actual_value @sum_error += delta @sum_abs_error += delta.abs + @sum_weighted_abs_error += delta.abs*confidence_value if @conf_provided @sum_squared_error += delta**2 + @sum_weighted_squared_error += (delta**2)*confidence_value if @conf_provided old_prediction_mean = @prediction_mean @prediction_mean = (@prediction_mean * (@num_predicted-1) + predicted_value) / @num_predicted.to_f @@ -170,8 +172,8 @@ module Lib def percent_correct raise "no classification" unless @feature_type=="classification" - return 0 if @num_with_actual_value==0 - return 100 * @num_correct / (@num_with_actual_value - @num_unpredicted).to_f + pct = 100 * @num_correct / (@num_with_actual_value - @num_unpredicted).to_f + pct.nan? ? 0 : pct end def percent_incorrect @@ -181,10 +183,12 @@ module Lib end def accuracy - return percent_correct / 100.0 + acc = percent_correct / 100.0 + acc.nan? ? 0 : acc end def weighted_accuracy + return 0 unless confidence_values_available? raise "no classification" unless @feature_type=="classification" total = 0 correct = 0 @@ -250,10 +254,11 @@ module Lib return res end + # does only take the instances that are classified as <class-index> into account def area_under_roc(class_index=nil) return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if class_index==nil - return 0.0 if @confidence_values==nil + return 0 unless confidence_values_available? LOGGER.warn("TODO: implement approx computiation of AUC,"+ "so far Wilcoxon-Man-Whitney is used (exponential)") if @@ -427,8 +432,13 @@ module Lib return incorrect end + # Note: + # * (un-weighted) area under roc is computed with all __predicted__ isntances for a certain class + # * weighted weights each auc with the number of __acutal__ instances + # its like that, because its like that in weka def weighted_area_under_roc - return weighted_measure( :area_under_roc ) + w_auc = weighted_measure( :area_under_roc ) + w_auc.nan? ? 0 : w_auc end def weighted_f_measure @@ -436,6 +446,7 @@ module Lib end private + # the <measure> is weighted with the number of instances for each actual class value def weighted_measure( measure ) sum_instances = 0 @@ -460,31 +471,85 @@ module Lib public def root_mean_squared_error return 0 if (@num_with_actual_value - @num_unpredicted)==0 - Math.sqrt(@sum_squared_error / (@num_with_actual_value - @num_unpredicted).to_f) + mse = @sum_squared_error / (@num_with_actual_value - @num_unpredicted).to_f + return 0 if mse.nan? + Math.sqrt(mse) end + def weighted_root_mean_squared_error + return 0 unless confidence_values_available? + return 0 if (@num_with_actual_value - @num_unpredicted)==0 + Math.sqrt(@sum_weighted_squared_error / ((@num_with_actual_value - @num_unpredicted).to_f * @mean_confidence )) + end + def mean_absolute_error return 0 if (@num_with_actual_value - @num_unpredicted)==0 @sum_abs_error / (@num_with_actual_value - @num_unpredicted).to_f end + def weighted_mean_absolute_error + return 0 unless confidence_values_available? + return 0 if (@num_with_actual_value - @num_unpredicted)==0 + @sum_weighted_abs_error / ((@num_with_actual_value - @num_unpredicted).to_f * @mean_confidence ) + end + def sum_squared_error return @sum_squared_error end def r_square - return sample_correlation_coefficient ** 2 + #return sample_correlation_coefficient ** 2 + + # see http://en.wikipedia.org/wiki/Coefficient_of_determination#Definitions + # see http://web.maths.unsw.edu.au/~adelle/Garvan/Assays/GoodnessOfFit.html + ss_tot = total_sum_of_squares + return 0 if ss_tot==0 + r_2 = 1 - residual_sum_of_squares / ss_tot + ( r_2.infinite? || r_2.nan? ) ? 0 : r_2 + end + + def weighted_r_square + return 0 unless confidence_values_available? + ss_tot = weighted_total_sum_of_squares + return 0 if ss_tot==0 + r_2 = 1 - weighted_residual_sum_of_squares / ss_tot + ( r_2.infinite? || r_2.nan? ) ? 0 : r_2 end def sample_correlation_coefficient - # formula see http://en.wikipedia.org/wiki/Correlation_and_dependence#Pearson.27s_product-moment_coefficient - return ( @num_predicted * @sum_multiply - @sum_actual * @sum_predicted ) / - ( Math.sqrt( [0, @num_predicted * @sum_squares_actual - @sum_actual**2].max ) * - Math.sqrt( [0, @num_predicted * @sum_squares_predicted - @sum_predicted**2].max ) ) + begin + # formula see http://en.wikipedia.org/wiki/Correlation_and_dependence#Pearson.27s_product-moment_coefficient + scc = ( @num_predicted * @sum_multiply - @sum_actual * @sum_predicted ) / + ( Math.sqrt( @num_predicted * @sum_squares_actual - @sum_actual**2 ) * + Math.sqrt( @num_predicted * @sum_squares_predicted - @sum_predicted**2 ) ) + ( scc.infinite? || scc.nan? ) ? 0 : scc + rescue; 0; end end def total_sum_of_squares - return @variance_actual * ( @num_predicted - 1 ) + #return @variance_actual * ( @num_predicted - 1 ) + sum = 0 + @predicted_values.size.times do |i| + sum += (@actual_values[i]-@actual_mean)**2 if @actual_values[i]!=nil and @predicted_values[i]!=nil + end + sum + end + + def weighted_total_sum_of_squares + return 0 unless confidence_values_available? + sum = 0 + @predicted_values.size.times do |i| + sum += ((@actual_values[i]-@actual_mean)**2)*@confidence_values[i] if @actual_values[i]!=nil and @predicted_values[i]!=nil + end + sum + end + + def residual_sum_of_squares + sum_squared_error + end + + def weighted_residual_sum_of_squares + @sum_weighted_squared_error end def target_variance_predicted @@ -500,7 +565,7 @@ module Lib def get_prediction_values(class_value) #puts "get_roc_values for class_value: "+class_value.to_s - raise "no confidence values" if @confidence_values==nil + raise "no confidence values" unless confidence_values_available? #raise "no class-value specified" if class_value==nil class_index = @accept_values.index(class_value) if class_value!=nil @@ -571,7 +636,7 @@ module Lib end def confidence_values_available? - return @confidence_values!=nil + @conf_provided end ################################################################################################################### |