diff options
Diffstat (limited to 'lib/predictions.rb')
-rwxr-xr-x[-rw-r--r--] | lib/predictions.rb | 143 |
1 files changed, 92 insertions, 51 deletions
diff --git a/lib/predictions.rb b/lib/predictions.rb index f6351f8..5850024 100644..100755 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -22,20 +22,22 @@ module Lib def initialize( predicted_values, actual_values, confidence_values, - is_classification, - prediction_feature_values=nil ) + feature_type, + class_domain=nil ) @predicted_values = predicted_values @actual_values = actual_values @confidence_values = confidence_values - @is_classification = is_classification - @prediction_feature_values = prediction_feature_values + @feature_type = feature_type + @class_domain = class_domain @num_classes = 1 #puts "predicted: "+predicted_values.inspect #puts "actual: "+actual_values.inspect #puts "confidence: "+confidence_values.inspect + raise "unknown feature_type: "+@feature_type.to_s unless + @feature_type=="classification" || @feature_type=="regression" raise "no predictions" if @predicted_values.size == 0 num_info = "predicted:"+@predicted_values.size.to_s+ " confidence:"+@confidence_values.size.to_s+" actual:"+@actual_values.size.to_s @@ -43,23 +45,28 @@ module Lib raise "illegal num confidence values "+num_info if @confidence_values.size != @predicted_values.size @confidence_values.each{ |c| raise "illegal confidence value: '"+c.to_s+"'" unless c==nil or (c.is_a?(Numeric) and c>=0 and c<=1) } - conf_val_tmp = {} - @confidence_values.each{ |c| conf_val_tmp[c] = nil } - if conf_val_tmp.keys.size<2 - LOGGER.warn("prediction w/o confidence values"); - @confidence_values=nil - end + ## check if there is more than one different conf value + ## DEPRECATED? not sure anymore what this was about, + ## I am pretty sure this was for r-plot of roc curves + ## roc curvers are now plotted manually + #conf_val_tmp = {} + #@confidence_values.each{ |c| conf_val_tmp[c] = nil } + #if conf_val_tmp.keys.size<2 + # LOGGER.warn("prediction w/o confidence values"); + # @confidence_values=nil + #end - if @is_classification - raise "prediction_feature_values missing while performing classification" unless @prediction_feature_values - @num_classes = @prediction_feature_values.size + case @feature_type + when "classification" + raise "class_domain missing while performing classification" unless @class_domain + @num_classes = @class_domain.size raise "num classes < 2" if @num_classes<2 { "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values| values.each{ |v| raise "illegal "+s+" classification-value ("+v.to_s+"),"+ - "has to be either nil or index of predicted-values" if v!=nil and (v<0 or v>@num_classes)} + "has to be either nil or index of predicted-values" if v!=nil and (!v.is_a?(Numeric) or v<0 or v>@num_classes)} end - else - raise "prediction_feature_values != nil while performing regression" if @prediction_feature_values + when "regresssion" + raise "class_domain != nil while performing regression" if @class_domain { "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values| values.each{ |v| raise "illegal "+s+" regression-value ("+v.to_s+"),"+ "has to be either nil or number" unless v==nil or v.is_a?(Numeric)} @@ -79,15 +86,16 @@ module Lib @num_predicted = 0 @num_unpredicted = 0 - if @is_classification + case @feature_type + when "classification" @confusion_matrix = [] - @prediction_feature_values.each do |v| + @class_domain.each do |v| @confusion_matrix.push( Array.new( @num_classes, 0 ) ) end @num_correct = 0 @num_incorrect = 0 - else + when "regression" @sum_error = 0 @sum_abs_error = 0 @sum_squared_error = 0 @@ -118,14 +126,15 @@ module Lib else @num_predicted += 1 - if @is_classification + case @feature_type + when "classification" @confusion_matrix[actual_value][predicted_value] += 1 if (predicted_value == actual_value) @num_correct += 1 else @num_incorrect += 1 end - else + when "regression" delta = predicted_value - actual_value @sum_error += delta @sum_abs_error += delta.abs @@ -152,21 +161,38 @@ module Lib end def percent_correct - raise "no classification" unless @is_classification + raise "no classification" unless @feature_type=="classification" return 0 if @num_with_actual_value==0 - return 100 * @num_correct / @num_with_actual_value.to_f + return 100 * @num_correct / (@num_with_actual_value - @num_unpredicted).to_f end def percent_incorrect - raise "no classification" unless @is_classification + raise "no classification" unless @feature_type=="classification" return 0 if @num_with_actual_value==0 - return 100 * @num_incorrect / @num_with_actual_value.to_f + return 100 * @num_incorrect / (@num_with_actual_value - @num_unpredicted).to_f end def accuracy return percent_correct / 100.0 end + def weighted_accuracy + raise "no classification" unless @feature_type=="classification" + total = 0 + correct = 0 + (0..@predicted_values.size-1).each do |i| + if @predicted_values[i]!=nil + total += @confidence_values[i] + correct += @confidence_values[i] if @actual_values[i]==@predicted_values[i] + end + end + if total==0 || correct == 0 + return 0 + else + return correct / total + end + end + def percent_unpredicted return 0 if @num_with_actual_value==0 return 100 * @num_unpredicted / @num_with_actual_value.to_f @@ -186,17 +212,17 @@ module Lib end def num_correct - raise "no classification" unless @is_classification + raise "no classification" unless @feature_type=="classification" return @num_correct end def num_incorrect - raise "no classification" unless @is_classification + raise "no classification" unless @feature_type=="classification" return @num_incorrect end def num_unclassified - raise "no classification" unless @is_classification + raise "no classification" unless @feature_type=="classification" return @num_unpredicted end @@ -205,35 +231,39 @@ module Lib # and values: <int-value> def confusion_matrix - raise "no classification" unless @is_classification + raise "no classification" unless @feature_type=="classification" res = {} (0..@num_classes-1).each do |actual| (0..@num_classes-1).each do |predicted| - res[{:confusion_matrix_actual => @prediction_feature_values[actual], - :confusion_matrix_predicted => @prediction_feature_values[predicted]}] = @confusion_matrix[actual][predicted] + res[{:confusion_matrix_actual => @class_domain[actual], + :confusion_matrix_predicted => @class_domain[predicted]}] = @confusion_matrix[actual][predicted] end end return res end def area_under_roc(class_index=nil) - return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if class_index==nil + return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if + class_index==nil return 0.0 if @confidence_values==nil LOGGER.warn("TODO: implement approx computiation of AUC,"+ - "so far Wilcoxon-Man-Whitney is used (exponential)") if @predicted_values.size>1000 + "so far Wilcoxon-Man-Whitney is used (exponential)") if + @predicted_values.size>1000 + #puts "COMPUTING AUC "+class_index.to_s tp_conf = [] fp_conf = [] (0..@predicted_values.size-1).each do |i| if @predicted_values[i]==class_index - if @actual_values[i]==class_index + if @actual_values[i]==@predicted_values[i] tp_conf.push(@confidence_values[i]) else fp_conf.push(@confidence_values[i]) end end end + #puts tp_conf.inspect+"\n"+fp_conf.inspect+"\n\n" return 0.0 if tp_conf.size == 0 return 1.0 if fp_conf.size == 0 @@ -241,9 +271,9 @@ module Lib tp_conf.each do |tp| fp_conf.each do |fp| sum += 1 if tp>fp + sum += 0.5 if tp==fp end end - return sum / (tp_conf.size * fp_conf.size).to_f end @@ -441,8 +471,8 @@ module Lib def sample_correlation_coefficient # formula see http://en.wikipedia.org/wiki/Correlation_and_dependence#Pearson.27s_product-moment_coefficient return ( @num_predicted * @sum_multiply - @sum_actual * @sum_predicted ) / - ( Math.sqrt( @num_predicted * @sum_squares_actual - @sum_actual**2 ) * - Math.sqrt( @num_predicted * @sum_squares_predicted - @sum_predicted**2 ) ) + ( Math.sqrt( [0, @num_predicted * @sum_squares_actual - @sum_actual**2].max ) * + Math.sqrt( [0, @num_predicted * @sum_squares_predicted - @sum_predicted**2].max ) ) end def total_sum_of_squares @@ -460,21 +490,30 @@ module Lib # data for roc-plots ################################################################################### def get_roc_values(class_value) + + #puts "get_roc_values for class_value: "+class_value.to_s raise "no confidence values" if @confidence_values==nil - class_index = @prediction_feature_values.index(class_value) - raise "class not found "+class_value.to_s if class_index==nil and class_value!=nil + raise "no class-value specified" if class_value==nil + + class_index = @class_domain.index(class_value) + raise "class not found "+class_value.to_s if class_index==nil c = []; p = []; a = [] (0..@predicted_values.size-1).each do |i| # NOTE: not predicted instances are ignored here - if (@predicted_values[i]!=nil and (class_value==nil or @predicted_values[i]==class_index)) + if @predicted_values[i]!=nil and @predicted_values[i]==class_index c << @confidence_values[i] p << @predicted_values[i] a << @actual_values[i] end end - return {:predicted_values => p, :actual_values => a, :confidence_values => c} + # DO NOT raise exception here, maybe different validations are concated + #raise "no instance predicted as '"+class_value+"'" if p.size == 0 + + h = {:predicted_values => p, :actual_values => a, :confidence_values => c} + #puts h.inspect + return h end ######################################################################################## @@ -488,9 +527,10 @@ module Lib end def predicted_value(instance_index) - if @is_classification - @predicted_values[instance_index]==nil ? nil : @prediction_feature_values[@predicted_values[instance_index]] - else + case @feature_type + when "classification" + @predicted_values[instance_index]==nil ? nil : @class_domain[@predicted_values[instance_index]] + when "regression" @predicted_values[instance_index] end end @@ -500,9 +540,10 @@ module Lib end def actual_value(instance_index) - if @is_classification - @actual_values[instance_index]==nil ? nil : @prediction_feature_values[@actual_values[instance_index]] - else + case @feature_type + when "classification" + @actual_values[instance_index]==nil ? nil : @class_domain[@actual_values[instance_index]] + when "regression" @actual_values[instance_index] end end @@ -512,13 +553,13 @@ module Lib end def classification_miss?(instance_index) - raise "no classification" unless @is_classification + raise "no classification" unless @feature_type=="classification" return false if predicted_value(instance_index)==nil or actual_value(instance_index)==nil return predicted_value(instance_index) != actual_value(instance_index) end - def classification? - @is_classification + def feature_type + @feature_type end def confidence_values_available? @@ -535,7 +576,7 @@ module Lib def prediction_feature_value_map(proc) res = {} (0..@num_classes-1).each do |i| - res[@prediction_feature_values[i]] = proc.call(i) + res[@class_domain[i]] = proc.call(i) end return res end |