diff options
author | Martin Gütlein <martin.guetlein@gmail.com> | 2009-12-14 15:12:37 +0100 |
---|---|---|
committer | Martin Gütlein <martin.guetlein@gmail.com> | 2009-12-14 15:12:37 +0100 |
commit | f5a6f160afd864848c3b3a11a0f643b395159763 (patch) | |
tree | 74ca62c51f62116a19f567a98d2b88476e54b220 /report/validation_data.rb | |
parent | c647dd756a74bbd2ad61b2e8158d6afe7a175965 (diff) |
added support for non-binary classification
Diffstat (limited to 'report/validation_data.rb')
-rw-r--r-- | report/validation_data.rb | 107 |
1 files changed, 76 insertions, 31 deletions
diff --git a/report/validation_data.rb b/report/validation_data.rb index eb092a1..8696351 100644 --- a/report/validation_data.rb +++ b/report/validation_data.rb @@ -1,15 +1,18 @@ # the variance is computed when merging results for these attributes -VAL_ATTR_VARIANCE = [ :auc, :acc ] -VAL_ATTR_RANKING = [ :auc, :acc, :spec, :sens ] +VAL_ATTR_VARIANCE = [ :area_under_roc, :percent_correct ] +VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate ] class Object def to_nice_s return "%.2f" % self if is_a?(Float) + return collect{ |i| i.to_nice_s }.join(", ") if is_a?(Array) + return collect{ |i,j| i.to_nice_s+": "+j.to_nice_s }.join(", ") if is_a?(Hash) return to_s end + # checks weather an object has equal values as stored in the map # example o.att = "a", o.att2 = 12, o.has_values?({ att => a }) is true # @@ -82,6 +85,13 @@ module Reports @predictions = Reports.validation_access.get_predictions( self ) end + # returns the predictions feature values (i.e. the range of the class attribute) + # + def get_prediction_feature_values + return @prediction_feature_values if @prediction_feature_values + @prediction_feature_values = Reports.validation_access.get_prediction_feature_values(:prediction_feature) + end + # loads all crossvalidation attributes, of the corresponding cv into this object def load_cv_attributes raise "crossvalidation-id not set" unless @crossvalidation_id @@ -104,7 +114,7 @@ module Reports # call-seq: # merge( validation, equal_attributes) => Reports::Validation # - def merge( validation, equal_attributes) + def merge_validation( validation, equal_attributes) new_validation = Reports::Validation.new raise "not working" if validation.merge_count > 1 @@ -115,32 +125,13 @@ module Reports if (equal_attributes.index(a) != nil) new_validation.send("#{a.to_s}=".to_sym, send(a)) else - value = nil - variance = nil - - if (send(a).is_a?(Float) || send(a).is_a?(Integer)) - old_value = send(a) - value = (send(a) * @merge_count + validation.send(a)) / (@merge_count + 1).to_f; - if (VAL_ATTR_VARIANCE.index(a) != nil) - # use revursiv formular for computing the variance - # ( see Tysiak, Folgen: explizit und rekursiv, ISSN: 0025-5866 - # http://www.frl.de/tysiakpapers/07_TY_Papers.pdf ) - old_variance = 0 unless (old_variance = send((a.to_s+"_variance").to_sym)) - variance = old_variance*(@merge_count-1)/@merge_count + - (value-old_value)**2 + - (validation.send(a)-value)**2/@merge_count - end - else - if send(a).to_s != validation.send(a).to_s - value = send(a).to_s + "/" + validation.send(a).to_s - else - value = validation.send(a).to_s - end - end - - #value = "test" - new_validation.send("#{a.to_s}=".to_sym, value) - new_validation.send("#{a.to_s+"_variance"}=".to_sym, variance) if variance + + compute_variance = VAL_ATTR_VARIANCE.index(a)!=nil + old_variance = compute_variance ? send((a.to_s+"_variance").to_sym) : nil + m = Validation::merge_value( send(a), @merge_count, compute_variance, old_variance, validation.send(a) ) + + new_validation.send("#{a.to_s}=".to_sym, m[:value]) + new_validation.send("#{a.to_s+"_variance"}=".to_sym, m[:variance]) if compute_variance end end @@ -156,7 +147,45 @@ module Reports def set_merge_count(c) @merge_count = c end - + + # merges to values (value1 and value2), value1 has weight weight1, value2 has weight 1, + # computes variance if corresponding params are set + # + # return hash with merge value (:value) and :variance (if necessary) + # + def self.merge_value( value1, weight1, compute_variance, variance1, value2 ) + + if (value1.is_a?(Numeric)) + value = (value1 * weight1 + value2) / (weight1 + 1).to_f; + if compute_variance + variance1 = 0 if variance1==nil + # use revursiv formular for computing the variance + # ( see Tysiak, Folgen: explizit und rekursiv, ISSN: 0025-5866 + # http://www.frl.de/tysiakpapers/07_TY_Papers.pdf ) + variance = variance1*(weight1-1)/weight1.to_f + + (value-value1)**2 + + (value2-value)**2/weight1.to_f + end + elsif value1.is_a?(Array) + raise "not yet implemented : merging arrays" + elsif value1.is_a?(Hash) + value = {} + variance = {} + value1.keys.each do |k| + m = merge_value( value1[k], weight1, compute_variance, variance1==nil ? nil : variance1[k], value2[k] ) + value[k] = m[:value] + variance[k] = m[:variance] if compute_variance + end + else + if value1.to_s != value2.to_s + value = value1.to_s + "/" + value2.to_s + else + value = value2.to_s + end + end + + {:value => value, :variance => (compute_variance ? variance : nil) } + end end # = Reports:ValidationSet @@ -210,10 +239,26 @@ module Reports return false end + # loads the attributes of the related crossvalidation into all validation objects + # def load_cv_attributes @validations.each{ |v| v.load_cv_attributes } end + # checks weather all validations are classification validations + # + def all_classification? + @validations.each{ |v| return false if v.percent_correct==nil } + true + end + + # checks weather all validations are regression validations + # + def all_regression? + @validations.each{ |v| return false if v.root_mean_squared_error==nil } + true + end + # returns a new set with all validation that have values as specified in the map # # call-seq: @@ -261,7 +306,7 @@ module Reports grouping.each do |g| new_set.validations.push(g[0].clone_validation) g[1..-1].each do |v| - new_set.validations[-1] = new_set.validations[-1].merge(v, equal_attributes) + new_set.validations[-1] = new_set.validations[-1].merge_validation(v, equal_attributes) end end |