summaryrefslogtreecommitdiff
path: root/lib/predictions.rb
diff options
context:
space:
mode:
authorMartin Gütlein <martin.guetlein@gmail.com>2009-12-14 15:12:37 +0100
committerMartin Gütlein <martin.guetlein@gmail.com>2009-12-14 15:12:37 +0100
commitf5a6f160afd864848c3b3a11a0f643b395159763 (patch)
tree74ca62c51f62116a19f567a98d2b88476e54b220 /lib/predictions.rb
parentc647dd756a74bbd2ad61b2e8158d6afe7a175965 (diff)
added support for non-binary classification
Diffstat (limited to 'lib/predictions.rb')
-rw-r--r--lib/predictions.rb385
1 files changed, 385 insertions, 0 deletions
diff --git a/lib/predictions.rb b/lib/predictions.rb
new file mode 100644
index 0000000..a1b5292
--- /dev/null
+++ b/lib/predictions.rb
@@ -0,0 +1,385 @@
+
+raise "Environment variable R_HOME missing" unless ENV['R_HOME']
+ENV['PATH'] = ENV['R_HOME']+":"+ENV['PATH'] unless ENV['PATH'].split(":").index(ENV['R_HOME'])
+require 'rinruby'
+
+module Lib
+
+ class Predictions
+
+ def initialize( predicted_values,
+ actual_values,
+ confidence_values,
+ prediction_feature,
+ is_classification,
+ prediction_feature_values=nil )
+
+ @predicted_values = predicted_values
+ @actual_values = actual_values
+ @confidence_values = confidence_values
+ @prediction_feature = prediction_feature
+ @is_classification = is_classification
+ @prediction_feature_values = prediction_feature_values
+ @num_classes = 1
+
+ raise "no predictions" if @predicted_values.size == 0
+ num_info = "predicted:"+@predicted_values.size.to_s+
+ " confidence:"+@confidence_values.size.to_s+" actual:"+@actual_values.size.to_s
+ raise "illegal num actual values "+num_info if @actual_values.size != @predicted_values.size
+ raise "illegal num confidence values "+num_info if @confidence_values.size != @predicted_values.size
+
+ if @is_classification
+ raise "prediction_feature_values missing while performing classification" unless @prediction_feature_values
+ @num_classes = @prediction_feature_values.size
+ raise "num classes < 2" if @num_classes<2
+ { "predicted"=>@predicted_values, "actual"=>@actual_values }.each do |s,values|
+ values.each{ |v| raise "illegal "+s+" index ("+v.to_s+"), has to be either nill or index of predicted-values" if v!=nil and (v<0 or v>@num_classes)}
+ end
+ else
+ raise "prediction_feature_values != nil while performing regression" if @prediction_feature_values
+ end
+
+ init_stats
+ (0..@predicted_values.size-1).each do |i|
+ update_stats( @predicted_values[i], @actual_values[i], @confidence_values[i] )
+ end
+ end
+
+ def init_stats
+ @num_no_actual_value = 0
+ @num_with_actual_value = 0
+
+ @num_predicted = 0
+ @num_unpredicted = 0
+
+ if @is_classification
+ @confusion_matrix = []
+ @prediction_feature_values.each do |v|
+ @confusion_matrix.push( Array.new( @num_classes, 0 ) )
+ end
+
+ @num_correct = 0
+ @num_incorrect = 0
+ else
+ raise "regression not yet implemented"
+ end
+ end
+
+ def update_stats( predicted_value, actual_value, confidence_value )
+
+ if actual_value==nil
+ @num_no_actual_value += 1
+ else
+ @num_with_actual_value += 1
+
+ if predicted_value==nil
+ @num_unpredicted += 1
+ else
+ @num_predicted += 1
+
+ if @is_classification
+ @confusion_matrix[actual_value][predicted_value] += 1
+ if (predicted_value == actual_value)
+ @num_correct += 1
+ else
+ @num_incorrect += 1
+ end
+ else
+ raise "regression not yet implemented"
+ end
+ end
+ end
+
+ end
+
+ def percent_correct
+ raise "no classification" unless @is_classification
+ return 100 * @num_correct / @num_with_actual_value.to_f
+ end
+
+ def percent_incorrect
+ raise "no classification" unless @is_classification
+ return 100 * @num_incorrect / @num_with_actual_value.to_f
+ end
+
+ def percent_unpredicted
+ return 100 * @num_unpredicted / @num_with_actual_value.to_f
+ end
+
+ def num_unpredicted
+ @num_unpredicted
+ end
+
+ def percent_without_class
+ return 100 * @num_no_actual_value / @predicted_values.size.to_f
+ end
+
+ def num_without_class
+ @num_no_actual_value
+ end
+
+ def num_correct
+ raise "no classification" unless @is_classification
+ return @num_correct
+ end
+
+ def num_incorrect
+ raise "no classification" unless @is_classification
+ return @num_incorrect
+ end
+
+ def num_unclassified
+ raise "no classification" unless @is_classification
+ return @num_unpredicted
+ end
+
+ def confusion_matrix
+ raise "no classification" unless @is_classification
+ res = {}
+ (0..@num_classes-1).each do |actual|
+ (0..@num_classes-1).each do |predicted|
+ res[{:actual => @prediction_feature_values[actual],
+ :predicted => @prediction_feature_values[predicted]}] = @confusion_matrix[actual][predicted]
+ end
+ end
+ return res
+ end
+
+
+
+ def area_under_roc(class_index=nil)
+ return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if class_index==nil
+
+ LOGGER.warn("TODO: implement approx computiion of AUC,"+
+ "so far Wilcoxon-Man-Whitney is used (exponential)") if @predicted_values.size>1000
+
+ tp_conf = []
+ fp_conf = []
+ (0..@predicted_values.size-1).each do |i|
+ if @predicted_values[i]==class_index
+ if @actual_values[i]==class_index
+ tp_conf.push(@confidence_values[i])
+ else
+ fp_conf.push(@confidence_values[i])
+ end
+ end
+ end
+
+ return 0.0 if tp_conf.size == 0
+ return 1.0 if fp_conf.size == 0
+ sum = 0
+ tp_conf.each do |tp|
+ fp_conf.each do |fp|
+ sum += 1 if tp>fp
+ end
+ end
+ return sum / (tp_conf.size * fp_conf.size).to_f
+ end
+
+ def f_measure(class_index=nil)
+ return prediction_feature_value_map( lambda{ |i| f_measure(i) } ) if class_index==nil
+
+ prec = precision(class_index)
+ rec = recall(class_index)
+ return 0 if prec == 0 and rec == 0
+ return 2 * prec * rec / (prec + rec).to_f;
+ end
+
+ def precision(class_index=nil)
+ return prediction_feature_value_map( lambda{ |i| precision(i) } ) if class_index==nil
+
+ correct = 0
+ total = 0
+ (0..@num_classes-1).each do |i|
+ correct += @confusion_matrix[i][class_index] if i == class_index
+ total += @confusion_matrix[i][class_index]
+ end
+ return 0 if total==0
+ return correct/total.to_f
+ end
+
+ def recall(class_index=nil)
+ return true_positive_rate(class_index)
+ end
+
+ def true_negative_rate(class_index=nil)
+ return prediction_feature_value_map( lambda{ |i| true_negative_rate(i) } ) if class_index==nil
+
+ correct = 0
+ total = 0
+ (0..@num_classes-1).each do |i|
+ if i != class_index
+ (0..@num_classes-1).each do |j|
+ correct += @confusion_matrix[i][j] if j != class_index
+ total += @confusion_matrix[i][j]
+ end
+ end
+ end
+ return 0 if total==0
+ return correct/total.to_f
+ end
+
+ def num_true_negatives(class_index=nil)
+ return prediction_feature_value_map( lambda{ |i| num_true_negatives(i) } ) if class_index==nil
+
+ correct = 0
+ (0..@num_classes-1).each do |i|
+ if i != class_index
+ (0..@num_classes-1).each do |j|
+ correct += @confusion_matrix[i][j] if j != class_index
+ end
+ end
+ end
+ return correct
+ end
+
+ def true_positive_rate(class_index=nil)
+ return prediction_feature_value_map( lambda{ |i| true_positive_rate(i) } ) if class_index==nil
+
+ correct = 0
+ total = 0
+ (0..@num_classes-1).each do |i|
+ correct += @confusion_matrix[class_index][i] if i == class_index
+ total += @confusion_matrix[class_index][i]
+ end
+ return 0 if total==0
+ return correct/total.to_f
+ end
+
+ def num_true_positives(class_index=nil)
+ return prediction_feature_value_map( lambda{ |i| num_true_positives(i) } ) if class_index==nil
+
+ correct = 0
+ (0..@num_classes-1).each do |i|
+ correct += @confusion_matrix[class_index][i] if i == class_index
+ end
+ return correct
+ end
+
+ def false_negative_rate(class_index=nil)
+ return prediction_feature_value_map( lambda{ |i| false_negative_rate(i) } ) if class_index==nil
+
+ total = 0
+ incorrect = 0
+ (0..@num_classes-1).each do |i|
+ if i == class_index
+ (0..@num_classes-1).each do |j|
+ incorrect += @confusion_matrix[i][j] if j != class_index
+ total += @confusion_matrix[i][j]
+ end
+ end
+ end
+ return 0 if total == 0
+ return incorrect / total.to_f
+ end
+
+ def num_false_negatives(class_index=nil)
+ return prediction_feature_value_map( lambda{ |i| num_false_negatives(i) } ) if class_index==nil
+
+ incorrect = 0
+ (0..@num_classes-1).each do |i|
+ if i == class_index
+ (0..@num_classes-1).each do |j|
+ incorrect += @confusion_matrix[i][j] if j != class_index
+ end
+ end
+ end
+ return incorrect
+ end
+
+ def false_positive_rate(class_index=nil)
+ return prediction_feature_value_map( lambda{ |i| false_positive_rate(i) } ) if class_index==nil
+
+ total = 0
+ incorrect = 0
+ (0..@num_classes-1).each do |i|
+ if i != class_index
+ (0..@num_classes-1).each do |j|
+ incorrect += @confusion_matrix[i][j] if j == class_index
+ total += @confusion_matrix[i][j]
+ end
+ end
+ end
+ return 0 if total == 0
+ return incorrect / total.to_f
+ end
+
+ def num_false_positives(class_index=nil)
+ return prediction_feature_value_map( lambda{ |i| num_false_positives(i) } ) if class_index==nil
+
+ incorrect = 0
+ (0..@num_classes-1).each do |i|
+ if i != class_index
+ (0..@num_classes-1).each do |j|
+ incorrect += @confusion_matrix[i][j] if j == class_index
+ end
+ end
+ end
+ return incorrect
+ end
+
+ ########################################################################################
+
+ def roc_confidence_values(class_value)
+ class_index = @prediction_feature_values.index(class_value)
+ raise "class not found "+class_value.to_s if class_index==nil
+ res = []
+ (0..@predicted_values.size-1).each do |i|
+ res.push(@confidence_values[i]) if @predicted_values[i]==class_index
+ end
+ return res
+ end
+
+ def roc_actual_values(class_value)
+ class_index = @prediction_feature_values.index(class_value)
+ raise "class not found "+class_value.to_s if class_index==nil
+ res = []
+ (0..@predicted_values.size-1).each do |i|
+ if @predicted_values[i]==class_index
+ res.push( @actual_values[i]==class_index ? 1 : 0 )
+ end
+ end
+ return res
+ end
+
+ ########################################################################################
+
+ def num_instances
+ return @predicted_values.size
+ end
+
+ def predicted_value(instance_index)
+ @predicted_values[instance_index]==nil ? nil : @prediction_feature_values[@predicted_values[instance_index]]
+ end
+
+ def actual_value(instance_index)
+ @actual_values[instance_index]==nil ? nil : @prediction_feature_values[@actual_values[instance_index]]
+ end
+
+ def confidence_value(instance_index)
+ return @confidence_values[instance_index]
+ end
+
+ def classification_miss?(instance_index)
+ raise "no classification" unless @is_classification
+ return false if predicted_value(instance_index)==nil or actual_value(instance_index)==nil
+ return predicted_value(instance_index) != actual_value(instance_index)
+ end
+
+ def classification?
+ @is_classification
+ end
+
+ ###################################################################################################################
+
+ private
+ def prediction_feature_value_map(proc)
+ res = {}
+ (0..@num_classes-1).each do |i|
+ res[@prediction_feature_values[i]] = proc.call(i)
+ end
+ return res
+ end
+
+ end
+end \ No newline at end of file