# the variance is computed when merging results for these attributes VAL_ATTR_VARIANCE = [ :area_under_roc, :percent_correct, :root_mean_squared_error, :mean_absolute_error, :r_square, :accuracy ] VAL_ATTR_RANKING = [ :area_under_roc, :percent_correct, :true_positive_rate, :true_negative_rate, :weighted_area_under_roc ] #:accuracy ] ATTR_NICE_NAME = {} class String def nice_attr() if ATTR_NICE_NAME.has_key?(self) return ATTR_NICE_NAME[self] else return self.to_s.gsub(/_id$/, "").gsub(/_/, " ").capitalize end end end class Object def to_nice_s if is_a?(Float) if self==0 return "0" elsif abs>0.1 return "%.2f" % self elsif abs>0.01 return "%.3f" % self else return "%.2e" % self end end return collect{ |i| i.to_nice_s }.join(", ") if is_a?(Array) return collect{ |i,j| i.to_nice_s+": "+j.to_nice_s }.join(", ") if is_a?(Hash) return to_s end # checks weather an object has equal values as stored in the map # example o.att = "a", o.att2 = 12, o.has_values?({ att => a }) is true # # call-seq: # has_values?(map) => boolean # def has_values?(map) map.each { |k,v| return false if send(k)!=v } return true end end module Reports # = Reports::Validation # # contains all values of a validation object # class Validation @@validation_access = Reports::ValidationDB.new # for overwriting validation source (other than using webservices) def self.reset_validation_access(validation_access) @@validation_access = validation_access end def self.resolve_cv_uris(validation_uris, subjectid) @@validation_access.resolve_cv_uris(validation_uris, subjectid) end # create member variables for all validation properties @@validation_attributes = Lib::ALL_PROPS + VAL_ATTR_VARIANCE.collect{ |a| (a.to_s+"_variance").to_sym } + VAL_ATTR_RANKING.collect{ |a| (a.to_s+"_ranking").to_sym } @@validation_attributes.each{ |a| attr_accessor a } attr_reader :predictions def initialize(uri = nil, subjectid = nil) @@validation_access.init_validation(self, uri, subjectid) if uri @subjectid = subjectid #raise "subjectid is nil" unless subjectid end # returns/creates predictions, cache to save rest-calls/computation time # # call-seq: # get_predictions => Reports::Predictions # def get_predictions( task=nil ) if @predictions task.progress(100) if task @predictions else unless @prediction_dataset_uri LOGGER.info("no predictions available, prediction_dataset_uri not set") task.progress(100) if task nil else @predictions = @@validation_access.get_predictions( self, @subjectid, task ) end end end # returns the predictions feature values (i.e. the domain of the class attribute) # def get_class_domain() @class_domain = @@validation_access.get_class_domain(self) unless @class_domain @class_domain end # is classification/regression validation? cache to save rest-calls # def feature_type return @feature_type if @feature_type!=nil @feature_type = @@validation_access.feature_type(self, @subjectid) end def predicted_variable return @predicted_variable if @predicted_variable!=nil @predicted_variable = @@validation_access.predicted_variable(self, @subjectid) end # loads all crossvalidation attributes, of the corresponding cv into this object def load_cv_attributes raise "crossvalidation-id not set" unless @crossvalidation_id @@validation_access.init_cv(self) end @@persistance = Reports::ReportService.persistance def validation_report_uri #puts "searching for validation report: "+self.validation_uri.to_s return @validation_report_uri if @validation_report_uri!=nil ids = @@persistance.list_reports("validation",{:validation_uris=>validation_uri }) @validation_report_uri = Reports::ReportService.instance.get_uri("validation",ids[-1]) if ids and ids.size>0 end def cv_report_uri #puts "searching for cv report: "+self.crossvalidation_uri.to_s return @cv_report_uri if @cv_report_uri!=nil raise "no cv uri "+to_yaml unless self.crossvalidation_uri ids = @@persistance.list_reports("crossvalidation",{:crossvalidation=>self.crossvalidation_uri.to_s }) #puts "-> "+ids.inspect @cv_report_uri = Reports::ReportService.instance.get_uri("crossvalidation",ids[-1]) if ids and ids.size>0 end def clone_validation new_val = clone VAL_ATTR_VARIANCE.each { |a| new_val.send((a.to_s+"_variance=").to_sym,nil) } return new_val end end # = Reports:ValidationSet # # contains an array of validations, including some functionality as merging validations.. # class ValidationSet def initialize(validation_uris=nil, subjectid=nil) @unique_values = {} validation_uris = Reports::Validation.resolve_cv_uris(validation_uris, subjectid) if validation_uris @validations = Array.new validation_uris.each{|u| @validations.push(Reports::Validation.new(u, subjectid))} if validation_uris end def self.create(validations) set = ValidationSet.new validations.each{ |v| set.validations.push(v) } set end def get(index) return @validations[index] end #def first() #return @validations.first #end # returns the values of the validations for __attribute__ # * if unique is true a set is returned, i.e. not redundant info # * => if unique is false the size of the returned array is equal to the number of validations # # call-seq: # get_values(attribute, unique=true) => array # def get_values(attribute, unique=true) a = Array.new @validations.each{ |v| a.push(v.send(attribute)) if !unique || a.index(v.send(attribute))==nil } return a end # returns the number of different values that exist for an attribute in the validation set # # call-seq: # num_different_values(attribute) => integer # def num_different_values(attribute) return get_values(attribute).size end # returns true if at least one validation has a nil value for __attribute__ # # call-seq: # has_nil_values?(attribute) => boolean # def has_nil_values?(attribute) @validations.each{ |v| return true unless v.send(attribute) } return false end # loads the attributes of the related crossvalidation into all validation objects # def load_cv_attributes @validations.each{ |v| v.load_cv_attributes } end def unique_value(validation_prop) return @unique_values[validation_prop] if @unique_values.has_key?(validation_prop) val = @validations[0].send(validation_prop) (1..@validations.size-1).each do |i| if @validations[i].send(validation_prop)!=val val = nil break end end @unique_values[validation_prop] = val return val end # def get_true_prediction_feature_value # if all_classification? # class_values = get_class_domain # if class_values.size == 2 # (0..1).each do |i| # return class_values[i] if (class_values[i].to_s.downcase == "true" || class_values[i].to_s.downcase == "active") # end # end # end # return nil # end def get_class_domain( ) return unique_value("get_class_domain") end def get_domain_for_attr( attribute ) class_domain = get_class_domain() if Lib::Validation.classification_property?(attribute) and !Lib::Validation.depends_on_class_value?(attribute) [ nil ] elsif Lib::Validation.classification_property?(attribute) and class_domain.size==2 and Lib::Validation.complement_exists?(attribute) [ class_domain[0] ] else class_domain end end # checks weather all validations are classification/regression validations # def unique_feature_type return unique_value("feature_type") end # returns a new set with all validation that have values as specified in the map # # call-seq: # filter(map) => Reports::ValidationSet # def filter(map) new_set = Reports::ValidationSet.new validations.each{ |v| new_set.validations.push(v) if v.has_values?(map) } return new_set end # returns a new set with all validation that the attached block accepted # e.g. create set with predictions: collect{ |validation| validation.get_predictions!=null } # # call-seq: # filter_proc(proc) => Reports::ValidationSet # def collect new_set = Reports::ValidationSet.new validations.each{ |v| new_set.validations.push(v) if yield(v) } return new_set end def to_table( attribute_col, attribute_row, attribute_val) row_values = get_values(attribute_row) #puts row_values.inspect col_values = get_values(attribute_col) #puts col_values.inspect # get domain for classification attribute, i.e. ["true","false"] class_domain = get_domain_for_attr(attribute_val) # or the attribute has a complementary value, i.e. true_positive_rate # -> domain is reduced to one class value first_value_elem = (class_domain.size==1 && class_domain[0]!=nil) cell_values = {} row_values.each do |row| col_values.each do |col| val = nil @validations.each do |v| if v.send(attribute_row)==row and v.send(attribute_col)==col raise "two validation have equal row and column values"if val!=nil val = v.send(attribute_val) val = val[class_domain[0]] if first_value_elem val = val.to_nice_s end end cell_values[row] = [] if cell_values[row]==nil cell_values[row] << val end end #puts cell_values.inspect table = [] table << [ "" ] + col_values row_values.each do |row| table << [ row ] + cell_values[row] end #puts table.inspect table end # returns an array, with values for __attributes__, that can be use for a table # * first row is header row # * other rows are values # # call-seq: # to_array(attributes, remove_nil_attributes) => array # def to_array(attributes, remove_nil_attributes=true) array = Array.new array.push(attributes.collect{|a| a.to_s.nice_attr}) attribute_not_nil = Array.new(attributes.size) @validations.each do |v| index = -1 array.push(attributes.collect do |a| index += 1 if VAL_ATTR_VARIANCE.index(a) variance = v.send( (a.to_s+"_variance").to_sym ) end #variance = " +- "+variance.to_nice_s if variance val = v.send(a) if val==nil || val.to_s.chomp.size==0 '' else attribute_not_nil[index] = true if remove_nil_attributes class_domain = get_domain_for_attr(a) # get domain for classification attribute, i.e. ["true","false"] if class_domain.size==1 && class_domain[0]!=nil # or the attribute has a complementary value, i.e. true_positive_rate # -> domain is reduced to one class value raise "illegal state, value for "+a.to_s+" is no hash: '"+val.to_s+"'" unless (val.is_a?(Hash)) val = val[class_domain[0]] end if variance if (val.is_a?(Array)) raise "not implemented" elsif (val.is_a?(Hash)) val.collect{ |i,j| i.to_nice_s+": "+j.to_nice_s + " +- " + variance[i].to_nice_s }.join(", ") else val.to_nice_s + " +- " + variance.to_nice_s end else val.to_nice_s end end end) end if remove_nil_attributes #delete in reverse order to avoid shifting of indices (0..attribute_not_nil.size-1).to_a.reverse.each do |i| array.each{|row| row.delete_at(i)} unless attribute_not_nil[i] end end return array end # creates a new validaiton set, that contains merged validations # all validation with equal values for __equal_attributes__ are summed up in one validation, i.e. merged # # call-seq: # to_array(attributes) => array # def merge(equal_attributes) new_set = Reports::ValidationSet.new # unique values stay unique when merging # derive unique values before, because model dependent props cannot be accessed later (when mergin validations from different models) new_set.unique_values = @unique_values #compute grouping grouping = Reports::Util.group(@validations, equal_attributes) #puts "groups "+grouping.size.to_s Lib::MergeObjects.register_merge_attributes( Reports::Validation, Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL) unless Lib::MergeObjects.merge_attributes_registered?(Reports::Validation) #merge grouping.each do |g| new_set.validations.push(g[0].clone_validation) g[1..-1].each do |v| new_set.validations[-1] = Lib::MergeObjects.merge_objects(new_set.validations[-1],v) end end return new_set end def sort(attribute, ascending=true) @validations.sort!{ |a,b| a.send(attribute).to_s <=> b.send(attribute).to_s } end # creates a new validaiton set, that contains a ranking for __ranking_attribute__ # (i.e. for ranking attribute :acc, :acc_ranking is calculated) # all validation with equal values for __equal_attributes__ are compared # (the one with highest value of __ranking_attribute__ has rank 1, and so on) # # call-seq: # compute_ranking(equal_attributes, ranking_attribute) => array # def compute_ranking(equal_attributes, ranking_attribute, class_value=nil ) #puts "compute_ranking("+equal_attributes.inspect+", "+ranking_attribute.inspect+", "+class_value.to_s+" )" new_set = Reports::ValidationSet.new (0..@validations.size-1).each do |i| new_set.validations.push(@validations[i].clone_validation) end grouping = Reports::Util.group(new_set.validations, equal_attributes) grouping.each do |group| # put indices and ranking values for current group into hash rank_hash = {} (0..group.size-1).each do |i| val = group[i].send(ranking_attribute) if val.is_a?(Hash) if class_value != nil raise "no value for class value "+class_value.class.to_s+" "+class_value.to_s+" in hash "+val.inspect.to_s unless val.has_key?(class_value) val = val[class_value] else raise "value for '"+ranking_attribute.to_s+"' is a hash, specify class value plz" end end rank_hash[i] = val end #puts rank_hash.inspect # sort group accrording to second value (= ranking value) rank_array = rank_hash.sort { |a, b| b[1] <=> a[1] } #puts rank_array.inspect # create ranks array ranks = Array.new (0..rank_array.size-1).each do |j| val = rank_array.at(j)[1] rank = j+1 ranks.push(rank.to_f) # check if previous ranks have equal value equal_count = 1; equal_rank_sum = rank; while ( j - equal_count >= 0 && (val - rank_array.at(j - equal_count)[1]).abs < 0.0001 ) equal_rank_sum += ranks.at(j - equal_count); equal_count += 1; end # if previous ranks have equal values -> replace with avg rank if (equal_count > 1) (0..equal_count-1).each do |k| ranks[j-k] = equal_rank_sum / equal_count.to_f; end end end #puts ranks.inspect # set rank as validation value (0..rank_array.size-1).each do |j| index = rank_array.at(j)[0] group[index].send( (ranking_attribute.to_s+"_ranking=").to_sym, ranks[j]) end end return new_set end def size return @validations.size end def validations @validations end protected def unique_values=(unique_values) @unique_values = unique_values end end end