summaryrefslogtreecommitdiff
path: root/lib/predictions.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/predictions.rb')
-rwxr-xr-x[-rw-r--r--]lib/predictions.rb143
1 files changed, 92 insertions, 51 deletions
diff --git a/lib/predictions.rb b/lib/predictions.rb
index f6351f8..5850024 100644..100755
--- a/lib/predictions.rb
+++ b/lib/predictions.rb
@@ -22,20 +22,22 @@ module Lib
def initialize( predicted_values,
actual_values,
confidence_values,
- is_classification,
- prediction_feature_values=nil )
+ feature_type,
+ class_domain=nil )
@predicted_values = predicted_values
@actual_values = actual_values
@confidence_values = confidence_values
- @is_classification = is_classification
- @prediction_feature_values = prediction_feature_values
+ @feature_type = feature_type
+ @class_domain = class_domain
@num_classes = 1
#puts "predicted: "+predicted_values.inspect
#puts "actual: "+actual_values.inspect
#puts "confidence: "+confidence_values.inspect
+ raise "unknown feature_type: "+@feature_type.to_s unless
+ @feature_type=="classification" || @feature_type=="regression"
raise "no predictions" if @predicted_values.size == 0
num_info = "predicted:"+@predicted_values.size.to_s+
" confidence:"+@confidence_values.size.to_s+" actual:"+@actual_values.size.to_s
@@ -43,23 +45,28 @@ module Lib
raise "illegal num confidence values "+num_info if @confidence_values.size != @predicted_values.size
@confidence_values.each{ |c| raise "illegal confidence value: '"+c.to_s+"'" unless c==nil or (c.is_a?(Numeric) and c>=0 and c<=1) }
- conf_val_tmp = {}
- @confidence_values.each{ |c| conf_val_tmp[c] = nil }
- if conf_val_tmp.keys.size<2
- LOGGER.warn("prediction w/o confidence values");
- @confidence_values=nil
- end
+ ## check if there is more than one different conf value
+ ## DEPRECATED? not sure anymore what this was about,
+ ## I am pretty sure this was for r-plot of roc curves
+ ## roc curvers are now plotted manually
+ #conf_val_tmp = {}
+ #@confidence_values.each{ |c| conf_val_tmp[c] = nil }
+ #if conf_val_tmp.keys.size<2
+ # LOGGER.warn("prediction w/o confidence values");
+ # @confidence_values=nil
+ #end
- if @is_classification
- raise "prediction_feature_values missing while performing classification" unless @prediction_feature_values
- @num_classes = @prediction_feature_values.size
+ case @feature_type
+ when "classification"
+ raise "class_domain missing while performing classification" unless @class_domain
+ @num_classes = @class_domain.size
raise "num classes < 2" if @num_classes<2
{ "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values|
values.each{ |v| raise "illegal "+s+" classification-value ("+v.to_s+"),"+
- "has to be either nil or index of predicted-values" if v!=nil and (v<0 or v>@num_classes)}
+ "has to be either nil or index of predicted-values" if v!=nil and (!v.is_a?(Numeric) or v<0 or v>@num_classes)}
end
- else
- raise "prediction_feature_values != nil while performing regression" if @prediction_feature_values
+ when "regresssion"
+ raise "class_domain != nil while performing regression" if @class_domain
{ "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values|
values.each{ |v| raise "illegal "+s+" regression-value ("+v.to_s+"),"+
"has to be either nil or number" unless v==nil or v.is_a?(Numeric)}
@@ -79,15 +86,16 @@ module Lib
@num_predicted = 0
@num_unpredicted = 0
- if @is_classification
+ case @feature_type
+ when "classification"
@confusion_matrix = []
- @prediction_feature_values.each do |v|
+ @class_domain.each do |v|
@confusion_matrix.push( Array.new( @num_classes, 0 ) )
end
@num_correct = 0
@num_incorrect = 0
- else
+ when "regression"
@sum_error = 0
@sum_abs_error = 0
@sum_squared_error = 0
@@ -118,14 +126,15 @@ module Lib
else
@num_predicted += 1
- if @is_classification
+ case @feature_type
+ when "classification"
@confusion_matrix[actual_value][predicted_value] += 1
if (predicted_value == actual_value)
@num_correct += 1
else
@num_incorrect += 1
end
- else
+ when "regression"
delta = predicted_value - actual_value
@sum_error += delta
@sum_abs_error += delta.abs
@@ -152,21 +161,38 @@ module Lib
end
def percent_correct
- raise "no classification" unless @is_classification
+ raise "no classification" unless @feature_type=="classification"
return 0 if @num_with_actual_value==0
- return 100 * @num_correct / @num_with_actual_value.to_f
+ return 100 * @num_correct / (@num_with_actual_value - @num_unpredicted).to_f
end
def percent_incorrect
- raise "no classification" unless @is_classification
+ raise "no classification" unless @feature_type=="classification"
return 0 if @num_with_actual_value==0
- return 100 * @num_incorrect / @num_with_actual_value.to_f
+ return 100 * @num_incorrect / (@num_with_actual_value - @num_unpredicted).to_f
end
def accuracy
return percent_correct / 100.0
end
+ def weighted_accuracy
+ raise "no classification" unless @feature_type=="classification"
+ total = 0
+ correct = 0
+ (0..@predicted_values.size-1).each do |i|
+ if @predicted_values[i]!=nil
+ total += @confidence_values[i]
+ correct += @confidence_values[i] if @actual_values[i]==@predicted_values[i]
+ end
+ end
+ if total==0 || correct == 0
+ return 0
+ else
+ return correct / total
+ end
+ end
+
def percent_unpredicted
return 0 if @num_with_actual_value==0
return 100 * @num_unpredicted / @num_with_actual_value.to_f
@@ -186,17 +212,17 @@ module Lib
end
def num_correct
- raise "no classification" unless @is_classification
+ raise "no classification" unless @feature_type=="classification"
return @num_correct
end
def num_incorrect
- raise "no classification" unless @is_classification
+ raise "no classification" unless @feature_type=="classification"
return @num_incorrect
end
def num_unclassified
- raise "no classification" unless @is_classification
+ raise "no classification" unless @feature_type=="classification"
return @num_unpredicted
end
@@ -205,35 +231,39 @@ module Lib
# and values: <int-value>
def confusion_matrix
- raise "no classification" unless @is_classification
+ raise "no classification" unless @feature_type=="classification"
res = {}
(0..@num_classes-1).each do |actual|
(0..@num_classes-1).each do |predicted|
- res[{:confusion_matrix_actual => @prediction_feature_values[actual],
- :confusion_matrix_predicted => @prediction_feature_values[predicted]}] = @confusion_matrix[actual][predicted]
+ res[{:confusion_matrix_actual => @class_domain[actual],
+ :confusion_matrix_predicted => @class_domain[predicted]}] = @confusion_matrix[actual][predicted]
end
end
return res
end
def area_under_roc(class_index=nil)
- return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if class_index==nil
+ return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if
+ class_index==nil
return 0.0 if @confidence_values==nil
LOGGER.warn("TODO: implement approx computiation of AUC,"+
- "so far Wilcoxon-Man-Whitney is used (exponential)") if @predicted_values.size>1000
+ "so far Wilcoxon-Man-Whitney is used (exponential)") if
+ @predicted_values.size>1000
+ #puts "COMPUTING AUC "+class_index.to_s
tp_conf = []
fp_conf = []
(0..@predicted_values.size-1).each do |i|
if @predicted_values[i]==class_index
- if @actual_values[i]==class_index
+ if @actual_values[i]==@predicted_values[i]
tp_conf.push(@confidence_values[i])
else
fp_conf.push(@confidence_values[i])
end
end
end
+ #puts tp_conf.inspect+"\n"+fp_conf.inspect+"\n\n"
return 0.0 if tp_conf.size == 0
return 1.0 if fp_conf.size == 0
@@ -241,9 +271,9 @@ module Lib
tp_conf.each do |tp|
fp_conf.each do |fp|
sum += 1 if tp>fp
+ sum += 0.5 if tp==fp
end
end
-
return sum / (tp_conf.size * fp_conf.size).to_f
end
@@ -441,8 +471,8 @@ module Lib
def sample_correlation_coefficient
# formula see http://en.wikipedia.org/wiki/Correlation_and_dependence#Pearson.27s_product-moment_coefficient
return ( @num_predicted * @sum_multiply - @sum_actual * @sum_predicted ) /
- ( Math.sqrt( @num_predicted * @sum_squares_actual - @sum_actual**2 ) *
- Math.sqrt( @num_predicted * @sum_squares_predicted - @sum_predicted**2 ) )
+ ( Math.sqrt( [0, @num_predicted * @sum_squares_actual - @sum_actual**2].max ) *
+ Math.sqrt( [0, @num_predicted * @sum_squares_predicted - @sum_predicted**2].max ) )
end
def total_sum_of_squares
@@ -460,21 +490,30 @@ module Lib
# data for roc-plots ###################################################################################
def get_roc_values(class_value)
+
+ #puts "get_roc_values for class_value: "+class_value.to_s
raise "no confidence values" if @confidence_values==nil
- class_index = @prediction_feature_values.index(class_value)
- raise "class not found "+class_value.to_s if class_index==nil and class_value!=nil
+ raise "no class-value specified" if class_value==nil
+
+ class_index = @class_domain.index(class_value)
+ raise "class not found "+class_value.to_s if class_index==nil
c = []; p = []; a = []
(0..@predicted_values.size-1).each do |i|
# NOTE: not predicted instances are ignored here
- if (@predicted_values[i]!=nil and (class_value==nil or @predicted_values[i]==class_index))
+ if @predicted_values[i]!=nil and @predicted_values[i]==class_index
c << @confidence_values[i]
p << @predicted_values[i]
a << @actual_values[i]
end
end
- return {:predicted_values => p, :actual_values => a, :confidence_values => c}
+ # DO NOT raise exception here, maybe different validations are concated
+ #raise "no instance predicted as '"+class_value+"'" if p.size == 0
+
+ h = {:predicted_values => p, :actual_values => a, :confidence_values => c}
+ #puts h.inspect
+ return h
end
########################################################################################
@@ -488,9 +527,10 @@ module Lib
end
def predicted_value(instance_index)
- if @is_classification
- @predicted_values[instance_index]==nil ? nil : @prediction_feature_values[@predicted_values[instance_index]]
- else
+ case @feature_type
+ when "classification"
+ @predicted_values[instance_index]==nil ? nil : @class_domain[@predicted_values[instance_index]]
+ when "regression"
@predicted_values[instance_index]
end
end
@@ -500,9 +540,10 @@ module Lib
end
def actual_value(instance_index)
- if @is_classification
- @actual_values[instance_index]==nil ? nil : @prediction_feature_values[@actual_values[instance_index]]
- else
+ case @feature_type
+ when "classification"
+ @actual_values[instance_index]==nil ? nil : @class_domain[@actual_values[instance_index]]
+ when "regression"
@actual_values[instance_index]
end
end
@@ -512,13 +553,13 @@ module Lib
end
def classification_miss?(instance_index)
- raise "no classification" unless @is_classification
+ raise "no classification" unless @feature_type=="classification"
return false if predicted_value(instance_index)==nil or actual_value(instance_index)==nil
return predicted_value(instance_index) != actual_value(instance_index)
end
- def classification?
- @is_classification
+ def feature_type
+ @feature_type
end
def confidence_values_available?
@@ -535,7 +576,7 @@ module Lib
def prediction_feature_value_map(proc)
res = {}
(0..@num_classes-1).each do |i|
- res[@prediction_feature_values[i]] = proc.call(i)
+ res[@class_domain[i]] = proc.call(i)
end
return res
end