summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2011-05-18 15:35:52 +0200
committermguetlein <martin.guetlein@gmail.com>2011-05-18 15:35:52 +0200
commitac6a536d12697a98db6847c5229c67372cbbd1e7 (patch)
tree1b5bfaabceb8a8d809724e91d5474ac7f66ef7be /lib
parent9ce03c0f50bb9129b584327d56fa4c9277849227 (diff)
new feature: algorithm comparison report
Diffstat (limited to 'lib')
-rw-r--r--lib/merge.rb2
-rwxr-xr-xlib/predictions.rb19
2 files changed, 15 insertions, 6 deletions
diff --git a/lib/merge.rb b/lib/merge.rb
index ecbe133..f30a3c1 100644
--- a/lib/merge.rb
+++ b/lib/merge.rb
@@ -126,7 +126,7 @@ module Lib
if value1==nil && value2==nil
value = nil
elsif value1.to_s != value2.to_s
- value = value1.to_s + "/" + value2.to_s
+ value = value1.to_s + ";" + value2.to_s
else
value = value2.to_s
end
diff --git a/lib/predictions.rb b/lib/predictions.rb
index 420790e..f97b764 100755
--- a/lib/predictions.rb
+++ b/lib/predictions.rb
@@ -170,8 +170,8 @@ module Lib
def percent_correct
raise "no classification" unless @feature_type=="classification"
- return 0 if @num_with_actual_value==0
- return 100 * @num_correct / (@num_with_actual_value - @num_unpredicted).to_f
+ pct = 100 * @num_correct / (@num_with_actual_value - @num_unpredicted).to_f
+ pct.nan? ? 0 : pct
end
def percent_incorrect
@@ -181,7 +181,8 @@ module Lib
end
def accuracy
- return percent_correct / 100.0
+ acc = percent_correct / 100.0
+ acc.nan? ? 0 : acc
end
def weighted_accuracy
@@ -250,6 +251,7 @@ module Lib
return res
end
+ # does only take the instances that are classified as <class-index> into account
def area_under_roc(class_index=nil)
return prediction_feature_value_map( lambda{ |i| area_under_roc(i) } ) if
class_index==nil
@@ -427,8 +429,13 @@ module Lib
return incorrect
end
+ # Note:
+ # * (un-weighted) area under roc is computed with all __predicted__ isntances for a certain class
+ # * weighted weights each auc with the number of __acutal__ instances
+ # its like that, because its like that in weka
def weighted_area_under_roc
- return weighted_measure( :area_under_roc )
+ w_auc = weighted_measure( :area_under_roc )
+ w_auc.nan? ? 0 : w_auc
end
def weighted_f_measure
@@ -436,6 +443,7 @@ module Lib
end
private
+ # the <measure> is weighted with the number of instances for each actual class value
def weighted_measure( measure )
sum_instances = 0
@@ -478,9 +486,10 @@ module Lib
def sample_correlation_coefficient
# formula see http://en.wikipedia.org/wiki/Correlation_and_dependence#Pearson.27s_product-moment_coefficient
- return ( @num_predicted * @sum_multiply - @sum_actual * @sum_predicted ) /
+ scc = ( @num_predicted * @sum_multiply - @sum_actual * @sum_predicted ) /
( Math.sqrt( [0, @num_predicted * @sum_squares_actual - @sum_actual**2].max ) *
Math.sqrt( [0, @num_predicted * @sum_squares_predicted - @sum_predicted**2].max ) )
+ ( scc.infinite? || scc.nan? ) ? 0 : scc
end
def total_sum_of_squares