diff options
author | Martin Gütlein <martin.guetlein@gmail.com> | 2010-03-08 17:01:23 +0100 |
---|---|---|
committer | Martin Gütlein <martin.guetlein@gmail.com> | 2010-03-08 17:01:23 +0100 |
commit | 9c41e91c6a6067d8b254e0ef5da66c752fabdb4d (patch) | |
tree | 74d4e9702bed59f2d6d5b3fd035e88020dba9f79 | |
parent | e93ada015dbe91cff5b72eb8628c4f52814e3bdb (diff) |
fixed: percent float instead of int, sum of number counts for cv
-rw-r--r-- | lib/merge.rb | 316 | ||||
-rw-r--r-- | lib/validation_db.rb | 36 | ||||
-rw-r--r-- | report/plot_factory.rb | 3 | ||||
-rw-r--r-- | report/report_factory.rb | 2 | ||||
-rw-r--r-- | report/report_test.rb | 9 | ||||
-rw-r--r-- | report/validation_data.rb | 91 | ||||
-rw-r--r-- | report/xml_report.rb | 2 | ||||
-rw-r--r-- | report/xml_report_util.rb | 1 | ||||
-rw-r--r-- | validation/validation_application.rb | 20 | ||||
-rw-r--r-- | validation/validation_format.rb | 3 | ||||
-rw-r--r-- | validation/validation_test.rb | 94 |
11 files changed, 353 insertions, 224 deletions
diff --git a/lib/merge.rb b/lib/merge.rb index f35198d..b42df1e 100644 --- a/lib/merge.rb +++ b/lib/merge.rb @@ -1,29 +1,5 @@ -$merge_count = {} - -class Array - def merge_array( merge_attributes, equal_attributes=nil ) - return nil if self.size == nil - return self[0] if self.size==1 - - m = self[0].merge_object(self[1], merge_attributes, equal_attributes) - (2..self.size-1).each do |i| - m = m.merge_object(self[i], merge_attributes, equal_attributes) - end - return m - end -end - -class Object - - def merge_count() - $merge_count[self] = 1 if $merge_count[self]==nil - return $merge_count[self] - end - - def set_merge_count(merge_count) - $merge_count[self] = merge_count - end +module Lib def self.compute_variance( old_variance, n, new_mean, old_mean, new_value ) # use revursiv formular for computing the variance @@ -33,109 +9,219 @@ class Object (new_mean - old_mean)**2 + (n>1 ? (new_value - new_mean)**2/(n-1) : 0 ) end - - def self.merge_value( value1, weight1, compute_variance, variance1, value2 ) - - if value1.is_a?(Numeric) and value2.is_a?(Numeric) - value = (value1 * weight1 + value2) / (weight1 + 1).to_f; - if compute_variance - variance = compute_variance( variance1!=nil ? variance1 : 0, weight1+1, value, value1, value2 ) - end - elsif value1.is_a?(Array) and value2.is_a?(Array) - raise "cannot merge arrays with unequal sizes" if !value2.is_a?(Array) || value1.size!=value2.size - value = [] - variance = [] - (0..value1.size-1).each do |i| - m = merge_value( value1[i], weight1, compute_variance, variance1==nil ? nil : variance1[i], value2[i] ) - value[i] = m[:value] - variance[i] = m[:variance] if compute_variance - end - elsif value1.is_a?(Hash) and value2.is_a?(Hash) - value = {} - variance = {} - value1.keys.each do |k| - m = merge_value( value1[k], weight1, compute_variance, variance1==nil ? nil : variance1[k], value2[k] ) - value[k] = m[:value] - variance[k] = m[:variance] if compute_variance + + module MergeObjects + + @@merge_count = {} + @@avg_attributes = {} + @@sum_attributes = {} + @@non_numeric_attributes = {} + + def self.merge_array_objects( array ) + return nil if array.size == nil + return array[0] if array.size==1 + + m = self.merge_objects(array[0], array[1] ) + (2..array.size-1).each do |i| + m = self.merge_objects(m, array[i] ) end - else - if value1.to_s != value2.to_s - value = value1.to_s + "/" + value2.to_s - else - value = value2.to_s + return m + end + + def self.merge_objects( object1, object2 ) + + raise "classes not equal" if object1.class != object2.class + object_class = object1.class + raise "register which attributes to merge first, nothing found for class "+object_class.to_s unless merge_attributes_registered?(object_class) + raise "not supported, successivly add unmerged object to a merge object" if merge_count(object2)>1 + + new_object = object_class.new + # actually instance_variables would be appropriate, but the datamanager creates objects dynamically + object1.public_methods.each do |method| + v = method.to_sym + if merge_attribute?(object_class, v) + old_variance = (avg_attribute?(object_class,v) and variance_attribute?(new_object,v)) ? object1.send(variance_symbol(v)) : nil + m = merge_value( object_class, v, object1.send(v), object2.send(v), merge_count(object1), old_variance ) + new_object.send("#{v.to_s}=".to_sym, m[:value]) + new_object.send("#{v.to_s}_variance=".to_sym, m[:variance]) if (m[:variance] and variance_attribute?(new_object,v)) + end end + set_merge_count(new_object,merge_count(object1)+1) + return new_object + end + + def self.register_merge_attributes( object_class, avg_attributes, sum_attributes, non_numeric_attributes) + @@avg_attributes[object_class] = avg_attributes + @@sum_attributes[object_class] = sum_attributes + @@non_numeric_attributes[object_class] = non_numeric_attributes end - {:value => value, :variance => (compute_variance ? variance : nil) } - end - - def merge_object( object, merge_attributes, equal_attributes=nil ) - - raise "classes not equal" if object.class != self.class - raise "not supported, successivly add unmerged object to a merge object" if object.merge_count>1 + def self.merge_attributes_registered?( object_class ) + [ @@avg_attributes, @@sum_attributes, @@non_numeric_attributes ].each{ |map| return false unless map.has_key?(object_class) } + return true + end - new_object = self.class.new - merge_attributes.each do |variable| - next if variable.to_s =~ /_variance$/ + protected + def self.merge_value( object_class, attribute, value1, value2, weight1=1, variance1=nil ) - if (equal_attributes and equal_attributes.index(variable) != nil) - new_object.send("#{variable.to_s}=".to_sym, send(variable)) + variance = nil + + if (avg=avg_attribute?(object_class, attribute)) || sum_attribute?(object_class, attribute) + if (value1==nil and value2==nil ) + #do nothing + elsif value1.is_a?(Numeric) and value2.is_a?(Numeric) + if avg + value = (value1 * weight1 + value2) / (weight1 + 1).to_f; + variance = Lib::compute_variance( variance1!=nil ? variance1 : 0, weight1+1, value, value1, value2 ) + else + value = value1 + value2 + end + elsif value1.is_a?(Array) and value2.is_a?(Array) + raise "cannot merge arrays with unequal sizes" if !value2.is_a?(Array) || value1.size!=value2.size + value = [] + variance = [] if avg + (0..value1.size-1).each do |i| + if avg + value << (value1[i] * weight1 + value2[i]) / (weight1 + 1).to_f; + variance << Lib::compute_variance( (variance1!=nil && variance1[i]!=nil) ? variance1[i] : 0, weight1+1, value[-1], value1[i], value2[i] ) + else + value << value1[i] + value2[i] + end + end + elsif value1.is_a?(Hash) and value2.is_a?(Hash) + value = {} + variance = {} if avg + value1.keys.each do |k| + if avg + value[k] = (value1[k] * weight1 + value2[k]) / (weight1 + 1).to_f; + variance[k] = Lib::compute_variance( (variance1!=nil && variance1[k]!=nil) ? variance1[k] : 0, weight1+1, value[k], value1[k], value2[k] ) + else + value[k] = value1[k] + value2[k] + end + end + else + raise "invalid, cannot avg/sum non-numeric content for attribute: "+attribute.to_s+" contents: '"+value1.to_s+"', '"+value2.to_s+"'" + end + elsif non_numeric_attribute?(object_class, attribute) + if (value1.is_a?(Hash) and value2.is_a?(Hash)) + value = {} + value1.keys.each do |k| + if merge_attribute?(object_class, k) + m = merge_value( object_class, k, value1[k], value2[k], weight1, (variance1!=nil ? variance1[k] : nil) ) + value[k] = m[:value] + value[variance_symbol(k)] = m[:variance] if m[:variance] + end + end + elsif value1.is_a?(Array) + raise "non-numerical arrays not yet supported" + else + if value1.to_s != value2.to_s + value = value1.to_s + "/" + value2.to_s + else + value = value2.to_s + end + end else - compute_variance = self.respond_to?( (variable.to_s+"_variance").to_sym ) #VAL_ATTR_VARIANCE.index(a)!=nil - old_variance = compute_variance ? send((variable.to_s+"_variance").to_sym) : nil - m = Object::merge_value( send(variable), self.merge_count, compute_variance, old_variance, object.send(variable) ) - new_object.send("#{variable.to_s}=".to_sym, m[:value]) - new_object.send("#{variable.to_s}_variance=".to_sym, m[:variance]) if compute_variance + raise "invalid type '"+attribute.to_s+"'" end + {:value => value, :variance => variance } + end + + def self.merge_count( object ) + @@merge_count[object] = 1 if @@merge_count[object]==nil + return @@merge_count[object] end - - new_object.set_merge_count self.merge_count+1 - return new_object - end - -end - -class MergeTest - - attr_accessor :string, :integer, :float, :hash_value, :float_variance - - def to_s - res = [:string, :integer, :float, :hash_value].collect do |var| - variance = nil - variance = "+-"+send((var.to_s+"_variance")).inspect if self.respond_to?( (var.to_s+"_variance").to_sym ) - var.to_s+":"+send(var).inspect+variance.to_s + + def self.set_merge_count(object, merge_count) + @@merge_count[object] = merge_count + end + + def self.avg_attribute?(object_class, attribute) + return @@avg_attributes[object_class].index(attribute) != nil + end + + def self.sum_attribute?(object_class, attribute) + return @@sum_attributes[object_class].index(attribute) != nil + end + + def self.non_numeric_attribute?(object_class, attribute) + return @@non_numeric_attributes[object_class].index(attribute) != nil + end + + def self.merge_attribute?(object_class, attribute) + return avg_attribute?(object_class, attribute)|| + sum_attribute?(object_class, attribute) || + non_numeric_attribute?(object_class,attribute) + end + + def self.variance_symbol(attribute) + return (attribute.to_s+"_variance").to_sym + end + + def self.variance_attribute?(object, attribute) + return false unless avg_attribute?(object.class, attribute) + begin + return object.respond_to?( variance_symbol(attribute) ) + rescue + return false + end end - res.join(" ") end - def self.demo - to_merge = [] - p = MergeTest.new - p.string = "asdf" - p.integer = 39 - p.float = 78.6 - p.hash_value = {:mixed_key=>80, :string_key=>"tu", :int_key=>70} - to_merge << p - - p = MergeTest.new - p.string = "jkl" - p.integer = 25 - p.float = 35.6 - p.hash_value = {:mixed_key=>"bla", :string_key=>"iu", :int_key=>34} - to_merge << p - - p = MergeTest.new - p.string = "qwert" - p.integer = 100 - p.float = 100 - p.hash_value = {:mixed_key=>45, :string_key=>"op", :int_key=>20} - to_merge << p - - puts "merged: "+to_merge.merge_array([:string, :integer, :float, :hash_value]).to_s + class MergeTest + + attr_accessor :string, :integer, :float, :hash_value, :float, :float_array, :float_variance, :float_array_variance + + AVG = [:float, :float_array, :int_key ] + SUM = [:integer ] + ELSE = [:string, :hash_value] + + def to_s + res = [:string, :integer, :float, :hash_value, :float_array].collect do |var| + variance = nil + begin + variance = "+-"+send((var.to_s+"_variance")).inspect if AVG.index(var)!=nil + rescue + end + var.to_s+":"+send(var).inspect+variance.to_s + end + res.join(" ") + end + + def self.demo + to_merge = [] + p = MergeTest.new + p.string = "asdf" + p.integer = 39 + p.float = 78.6 + p.float_array = [1, 2] + p.hash_value = {:mixed_key=>80, :string_key=>"tu", :int_key=>70} + to_merge << p + + p = MergeTest.new + p.string = "jkl" + p.integer = 25 + p.float = 35.6 + p.float_array = [1, 3] + p.hash_value = {:mixed_key=>"bla", :string_key=>"iu", :int_key=>34} + to_merge << p + + p = MergeTest.new + p.string = "qwert" + p.integer = 100 + p.float = 100 + p.float_array = [2, 3] + p.hash_value = {:mixed_key=>45, :string_key=>"op", :int_key=>20} + to_merge << p + + puts "single:\n"+to_merge.collect{|t| t.to_s+"\n"}.to_s+"\n" + + MergeObjects.register_merge_attributes(to_merge[0].class, AVG, SUM, ELSE) + puts "merged:\n"+MergeObjects.merge_array_objects(to_merge).to_s + end + end - end -#MergeTest.demo - +#Lib::MergeTest.demo diff --git a/lib/validation_db.rb b/lib/validation_db.rb index 01607ce..8f4a540 100644 --- a/lib/validation_db.rb +++ b/lib/validation_db.rb @@ -3,33 +3,45 @@ require lib end +require "lib/merge.rb" + module Lib - VAL_PROPS = [ :id, :uri, :model_uri, :training_dataset_uri, :prediction_feature, - :test_dataset_uri, :prediction_dataset_uri, - :created_at, :real_runtime, # :cpu_runtime, - :num_instances, :num_without_class, :percent_without_class, :num_unpredicted, :percent_unpredicted ] + VAL_PROPS_GENERAL = [ :id, :uri, :model_uri, :training_dataset_uri, :prediction_feature, + :test_dataset_uri, :prediction_dataset_uri, :created_at ] + VAL_PROPS_SUM = [ :num_instances, :num_without_class, :num_unpredicted ] + VAL_PROPS_AVG = [:real_runtime, :percent_without_class, :percent_unpredicted ] + VAL_PROPS = VAL_PROPS_GENERAL + VAL_PROPS_SUM + VAL_PROPS_AVG # :crossvalidation_info VAL_CV_PROPS = [ :crossvalidation_id, :crossvalidation_fold ] # :classification_statistics - VAL_CLASS_PROPS_SINGLE = [ :num_correct, :num_incorrect, :percent_correct, :percent_incorrect ] + VAL_CLASS_PROPS_SINGLE_SUM = [ :num_correct, :num_incorrect, :confusion_matrix ] + VAL_CLASS_PROPS_SINGLE_AVG = [ :percent_correct, :percent_incorrect ] + VAL_CLASS_PROPS_SINGLE = VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_SINGLE_AVG + # :class_value_statistics - VAL_CLASS_PROPS_PER_CLASS = [ :area_under_roc, :false_negative_rate, :false_positive_rate, - :f_measure, :num_false_positives, :num_false_negatives, - :num_true_positives, :num_true_negatives, :precision, + VAL_CLASS_PROPS_PER_CLASS_SUM = [ :num_false_positives, :num_false_negatives, + :num_true_positives, :num_true_negatives ] + VAL_CLASS_PROPS_PER_CLASS_AVG = [ :area_under_roc, :false_negative_rate, :false_positive_rate, + :f_measure, :precision, :recall, :true_negative_rate, :true_positive_rate ] - VAL_CLASS_PROPS = VAL_CLASS_PROPS_SINGLE + VAL_CLASS_PROPS_PER_CLASS + [ :confusion_matrix ] + VAL_CLASS_PROPS_PER_CLASS = VAL_CLASS_PROPS_PER_CLASS_SUM + VAL_CLASS_PROPS_PER_CLASS_AVG + + VAL_CLASS_PROPS = VAL_CLASS_PROPS_SINGLE + VAL_CLASS_PROPS_PER_CLASS VAL_CLASS_PROPS_EXTENDED = VAL_CLASS_PROPS + [:accuracy] # :regression_statistics VAL_REGR_PROPS = [ :root_mean_squared_error, :mean_absolute_error, :r_square ] - CROSS_VAL_PROPS = [:algorithm_uri, :dataset_uri, :num_folds, :stratified, :random_seed] ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS_EXTENDED + VAL_REGR_PROPS + CROSS_VAL_PROPS + VAL_MERGE_GENERAL = VAL_PROPS_GENERAL + VAL_CV_PROPS + [:classification_statistics, :regression_statistics] + CROSS_VAL_PROPS + VAL_MERGE_SUM = VAL_PROPS_SUM + VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_PER_CLASS_SUM + VAL_MERGE_AVG = VAL_PROPS_AVG + VAL_CLASS_PROPS_SINGLE_AVG + VAL_CLASS_PROPS_PER_CLASS_AVG + VAL_REGR_PROPS + class Validation include DataMapper::Resource @@ -45,9 +57,9 @@ module Lib property :num_instances, Integer property :num_without_class, Integer - property :percent_without_class, Integer + property :percent_without_class, Float property :num_unpredicted, Integer - property :percent_unpredicted, Integer + property :percent_unpredicted, Float property :classification_statistics, Object #Hash property :regression_statistics, Object diff --git a/report/plot_factory.rb b/report/plot_factory.rb index c1a731f..afe98de 100644 --- a/report/plot_factory.rb +++ b/report/plot_factory.rb @@ -109,10 +109,11 @@ module Reports svg_out_file ? show = "-o" : show = "" (title and title.length > 0) ? tit = '-t "'+title+'"' : tit = "" #title = "-t \""+ranking_value_prop+"-Ranking ("+comparables.size.to_s+" "+comparable_prop+"s, "+num_groups.to_s+" "+ranking_group_prop+"s, p < "+p.to_s+")\" " - + cmd = "java -jar "+ENV['RANK_PLOTTER_JAR']+" "+tit+" -c '"+ comparables_array.join(",")+"' -r '"+ranks_array.join(",")+"' "+conf+" "+show #+" > /home/martin/tmp/test.svg" #puts "\nplotting: "+cmd + LOGGER.debug "Plotting ranks: "+cmd.to_s res = "" IO.popen(cmd) do |f| diff --git a/report/report_factory.rb b/report/report_factory.rb index a522901..7484eb1 100644 --- a/report/report_factory.rb +++ b/report/report_factory.rb @@ -118,7 +118,7 @@ module Reports::ReportFactory Reports::Util.check_group_matching(dataset_grouping, [:algorithm_uri]) #merged = validation_set.merge([:algorithm_uri, :dataset_uri]) - report = Reports::ReportContent.new("Algorithm comparison report Many datasets") + report = Reports::ReportContent.new("Algorithm comparison report - Many datasets") if (validation_set.first.classification?) report.add_section_result(validation_set,[:algorithm_uri, :test_dataset_uri]+VAL_ATTR_CLASS,"Mean Results","Mean Results") diff --git a/report/report_test.rb b/report/report_test.rb index a75dd76..3960856 100644 --- a/report/report_test.rb +++ b/report/report_test.rb @@ -22,9 +22,14 @@ class Reports::ApplicationTest < Test::Unit::TestCase #get uri #get '/report/validation/1',nil,'HTTP_ACCEPT' => "text/html" - post '/report/validation/1/format_html',:css_style_sheet=>"http://apps.ideaconsult.net:8180/ToxPredict/style/global.css" + #post '/report/validation/1/format_html',:css_style_sheet=>"http://apps.ideaconsult.net:8180/ToxPredict/style/global.css" + + post 'http://ot.validation.de/report/crossvalidation',:validation_uris=>"http://ot.validation.de/crossvalidation/1" + uri = last_response.body.to_s + + #post uri.to_s+'/format_html',:css_style_sheet=>"http://apps.ideaconsult.net:8180/ToxPredict/style/global.css" + #puts last_response.body.to_s.gsub(/\n.*/,"") - puts last_response.body.to_s.gsub(/\n.*/,"") end # # def test_webservice diff --git a/report/validation_data.rb b/report/validation_data.rb index a2b8905..c164674 100644 --- a/report/validation_data.rb +++ b/report/validation_data.rb @@ -48,7 +48,7 @@ module Reports class Validation @@validation_access = Reports::ValidationDB.new - + # for overwriting validation source (other than using webservices) def self.reset_validation_access(validation_access) @@validation_access = validation_access @@ -65,11 +65,10 @@ module Reports VAL_ATTR_RANKING.collect{ |a| (a.to_s+"_ranking").to_sym } @@validation_attributes.each{ |a| attr_accessor a } - attr_reader :predictions, :merge_count + attr_reader :predictions def initialize(uri = nil) @@validation_access.init_validation(self, uri) if uri - @merge_count = 1 end # returns/creates predictions, cache to save rest-calls/computation time @@ -109,86 +108,8 @@ module Reports def clone_validation new_val = clone VAL_ATTR_VARIANCE.each { |a| new_val.send((a.to_s+"_variance=").to_sym,nil) } - new_val.set_merge_count(1) return new_val end - - # merges this validation and another validation object to a new validation object - # * v1.att = "a", v2.att = "a" => r.att = "a" - # * v1.att = "a", v2.att = "b" => r.att = "a / b" - # * v1.att = "1", v2.att = "2" => r.att = "1.5" - # * the attributes in __equal_attributes__ are assumed to be equal - # - # call-seq: - # merge( validation, equal_attributes) => Reports::Validation - # - def merge_validation( validation, equal_attributes ) - - new_validation = Reports::Validation.new - # validation cannot be merged before - raise "not working" if validation.merge_count > 1 - - @@validation_attributes.each do |a| - next if a.to_s =~ /_variance$/ - - if (equal_attributes.index(a) != nil) - new_validation.send("#{a.to_s}=".to_sym, send(a)) - else - - compute_variance = VAL_ATTR_VARIANCE.index(a)!=nil - old_variance = compute_variance ? send((a.to_s+"_variance").to_sym) : nil - m = Validation::merge_value( send(a), @merge_count, compute_variance, old_variance, validation.send(a) ) - - new_validation.send("#{a.to_s}=".to_sym, m[:value]) - new_validation.send("#{a.to_s+"_variance"}=".to_sym, m[:variance]) if compute_variance - end - end - - new_validation.set_merge_count(@merge_count + 1); - return new_validation - end - - def merge_count - @merge_count - end - - protected - def set_merge_count(c) - @merge_count = c - end - - # merges to values (value1 and value2), value1 has weight weight1, value2 has weight 1, - # computes variance if corresponding params are set - # - # return hash with merge value (:value) and :variance (if necessary) - # - def self.merge_value( value1, weight1, compute_variance, variance1, value2 ) - - if (value1.is_a?(Numeric)) - value = (value1 * weight1 + value2) / (weight1 + 1).to_f; - if compute_variance - variance = Lib::Util::compute_variance( variance1!=nil ? variance1 : 0, weight1+1, value, value1, value2 ) - end - elsif value1.is_a?(Array) - raise "not yet implemented : merging arrays" - elsif value1.is_a?(Hash) - value = {} - variance = {} - value1.keys.each do |k| - m = merge_value( value1[k], weight1, compute_variance, variance1==nil ? nil : variance1[k], value2[k] ) - value[k] = m[:value] - variance[k] = m[:variance] if compute_variance - end - else - if value1.to_s != value2.to_s - value = value1.to_s + "/" + value2.to_s - else - value = value2.to_s - end - end - - {:value => value, :variance => (compute_variance ? variance : nil) } - end end # = Reports:ValidationSet @@ -327,11 +248,15 @@ module Reports #compute grouping grouping = Reports::Util.group(@validations, equal_attributes) + Lib::MergeObjects.register_merge_attributes( Reports::Validation, + Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL) unless + Lib::MergeObjects.merge_attributes_registered?(Reports::Validation) + #merge grouping.each do |g| new_set.validations.push(g[0].clone_validation) g[1..-1].each do |v| - new_set.validations[-1] = new_set.validations[-1].merge_validation(v, equal_attributes) + new_set.validations[-1] = Lib::MergeObjects.merge_objects(new_set.validations[-1],v) end end @@ -419,4 +344,4 @@ module Reports end -end
\ No newline at end of file +end diff --git a/report/xml_report.rb b/report/xml_report.rb index be66851..4b62457 100644 --- a/report/xml_report.rb +++ b/report/xml_report.rb @@ -1,6 +1,5 @@ require 'rexml/document' -include REXML ENV['REPORT_DTD'] = "docbook-xml-4.5/docbookx.dtd" unless ENV['REPORT_DTD'] #transfer to absolute path @@ -13,6 +12,7 @@ ENV['REPORT_DTD'] = File.expand_path(ENV['REPORT_DTD']) if File.exist?(ENV['REPO # uses Env-Variable _XMLREPORT_DTD_ to specifiy the dtd # class Reports::XMLReport + include REXML # create new xmlreport def initialize(title, pubdate=nil, author_firstname = nil, author_surname = nil) diff --git a/report/xml_report_util.rb b/report/xml_report_util.rb index 00ff608..d047421 100644 --- a/report/xml_report_util.rb +++ b/report/xml_report_util.rb @@ -4,6 +4,7 @@ # Utilities for XMLReport # module Reports::XMLReportUtil + include REXML # creates a confusion matrix as array (to be used as input for Reports::XMLReport::add_table) # input is confusion matrix as returned by Lib::Predictions.confusion_matrix diff --git a/validation/validation_application.rb b/validation/validation_application.rb index eb3e4a4..eb9574d 100644 --- a/validation/validation_application.rb +++ b/validation/validation_application.rb @@ -33,6 +33,14 @@ get '/crossvalidation/?' do Validation::Crossvalidation.all.collect{ |d| url_for("/crossvalidation/", :full) + d.id.to_s }.join("\n") end +post '/crossvalidation/loo/?' do + halt 500, "not yet implemented" +end + +get '/crossvalidation/loo/?' do + halt 400, "GET operation not supported, use POST for performing a loo-crossvalidation, see "+url_for("/crossvalidation", :full)+" for crossvalidation results" +end + get '/crossvalidation/:id' do LOGGER.info "get crossvalidation with id "+params[:id].to_s halt 404, "Crossvalidation #{params[:id]} not found." unless crossvalidation = Validation::Crossvalidation.get(params[:id]) @@ -69,9 +77,11 @@ get '/crossvalidation/:id/statistics' do LOGGER.info "get merged validation-result for crossvalidation with id "+params[:id].to_s halt 404, "Crossvalidation #{params[:id]} not found." unless crossvalidation = Validation::Crossvalidation.get(params[:id]) - to_merge = [:prediction_feature, :num_instances,:num_without_class,:percent_without_class,:num_unpredicted,:percent_unpredicted, - :classification_statistics,:regression_statistics,:crossvalidation_id] - v = Validation::Validation.all(:crossvalidation_id => params[:id]).merge_array(to_merge) + Lib::MergeObjects.register_merge_attributes( Validation::Validation, + Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL) unless + Lib::MergeObjects.merge_attributes_registered?(Validation::Validation) + + v = Lib::MergeObjects.merge_array_objects( Validation::Validation.all(:crossvalidation_id => params[:id]) ) v.uri = nil v.created_at = nil v.id = nil @@ -97,6 +107,10 @@ post '/crossvalidation/?' do end end +get '/training_test_split' do + halt 400, "GET operation not supported, use POST to perform a training_test_split, see "+url_for("/", :full)+" for validation results" +end + get '/?' do LOGGER.info "list all validations" content_type "text/uri-list" diff --git a/validation/validation_format.rb b/validation/validation_format.rb index ed5c7b1..d03dee4 100644 --- a/validation/validation_format.rb +++ b/validation/validation_format.rb @@ -30,6 +30,7 @@ module Validation # transpose results per class class_values = {} Lib::VAL_CLASS_PROPS_PER_CLASS.each do |p| + raise "missing classification statitstics: "+p.to_s+" "+classification_statistics.inspect unless classification_statistics[p] classification_statistics[p].each do |class_value, property_value| class_values[class_value] = {:class_value => class_value} unless class_values.has_key?(class_value) map = class_values[class_value] @@ -40,6 +41,7 @@ module Validation #converting confusion matrix cells = [] + raise "confusion matrix missing" unless classification_statistics[:confusion_matrix]!=nil classification_statistics[:confusion_matrix].each do |k,v| cell = {} # key in confusion matrix is map with predicted and actual attribute @@ -62,6 +64,7 @@ module Validation # build hash structure and return with to_yaml def to_yaml get_content_as_hash.to_yaml + #super.to_yaml end def rdf_title diff --git a/validation/validation_test.rb b/validation/validation_test.rb index b854f0c..3cecef5 100644 --- a/validation/validation_test.rb +++ b/validation/validation_test.rb @@ -134,13 +134,22 @@ class ValidationTest < Test::Unit::TestCase # #model_uri = "http://ot.model.de/12" # #data_uri_test = "http://ot.dataset.de/67" # -# model_uri = "http://ot.model.de/9" -# data_uri_test = "http://ot.dataset.de/33" +# model_uri = "http://ot.model.de/1" +# data_uri_test = "http://ot.dataset.de/3" # # post '', {:test_dataset_uri => data_uri_test, :model_uri => model_uri, :prediction_feature => FEATURE_URI} # # puts last_response.body # #verify_validation +# +# task = OpenTox::Task.find(last_response.body) +# task.wait_for_completion +# val_uri = task.resource +# puts val_uri +# +# get val_uri +# verify_validation(last_response.body) +# # ensure # #delete_resources # end @@ -151,8 +160,9 @@ class ValidationTest < Test::Unit::TestCase # # #get '/41',nil,'HTTP_ACCEPT' => "application/rdf+xml" #"text/x-yaml" # #puts last_response.body -# data_uri_train = upload_data(WS_DATA, FILE_TRAIN) -# data_uri_test = upload_data(WS_DATA, FILE_TEST) +# +# #data_uri_train = upload_data(WS_DATA, FILE_TRAIN) +# #data_uri_test = upload_data(WS_DATA, FILE_TEST) # # #data_uri_train = WS_DATA+"/"+DATA_TRAIN # #data_uri_test = WS_DATA+"/"+DATA_TEST @@ -177,18 +187,86 @@ class ValidationTest < Test::Unit::TestCase # post '/training_test_split', { :dataset_uri => data_uri, :algorithm_uri => WS_CLASS_ALG, :prediction_feature => FEATURE_URI, # :algorithm_params => "feature_generation_uri="+WS_FEATURE_ALG, :split_ratio=>0.75, :random_seed=>6} # puts last_response.body +# +# task = OpenTox::Task.find(last_response.body) +# task.wait_for_completion +# val_uri = task.resource +# puts val_uri +# +# get val_uri +# puts last_response.body # #verify_validation # ensure # #delete_resources # end # end + + def verify_validation(val_yaml) + + val = YAML.load(val_yaml) + + puts val.inspect + assert_integer val["num_instances".to_sym],0,1000 + num_instances = val["num_instances".to_sym].to_i + + assert_integer val["num_unpredicted".to_sym],0,num_instances + num_unpredicted = val["num_unpredicted".to_sym].to_i + assert_float val["percent_unpredicted".to_sym],0,100 + assert_float_equal(val["percent_unpredicted".to_sym].to_f,100*num_unpredicted/num_instances.to_f,"percent_unpredicted") + + assert_integer val["num_without_class".to_sym],0,num_instances + num_without_class = val["num_without_class".to_sym].to_i + assert_float val["percent_without_class".to_sym],0,100 + assert_float_equal(val["percent_without_class".to_sym].to_f,100*num_without_class/num_instances.to_f,"percent_without_class") + + class_stats = val["classification_statistics".to_sym] + class_value_stats = class_stats["class_value_statistics".to_sym] + class_values = [] + class_value_stats.each do |cvs| + class_values << cvs["class_value".to_sym] + end + puts class_values.inspect + + confusion_matrix = class_stats["confusion_matrix".to_sym] + confusion_matrix_cells = confusion_matrix["confusion_matrix_cell".to_sym] + predictions = 0 + confusion_matrix_cells.each do |confusion_matrix_cell| + predictions += confusion_matrix_cell["confusion_matrix_value".to_sym].to_i + end + assert_int_equal(predictions, num_instances-num_unpredicted) + end + + def assert_int_equal(val1,val2,msg_suffix=nil) + assert(val1==val2,msg_suffix.to_s+" not equal: "+val1.to_s+" != "+val2.to_s) + end + + def assert_float_equal(val1,val2,msg_suffix=nil,epsilon=0.0001) + assert((val1-val2).abs<epsilon,msg_suffix.to_s+" not equal: "+val1.to_s+" != "+val2.to_s+", diff:"+(val1-val2).abs.to_s) + end + + def assert_integer(string_val, min=nil, max=nil) + assert string_val.to_i.to_s==string_val.to_s, string_val.to_s+" not an integer" + assert string_val.to_i>=min if min!=nil + assert string_val.to_i<=max if max!=nil + end + + def assert_float(string_val, min=nil, max=nil) + assert( string_val.to_f.to_s==string_val.to_s || (string_val.to_f.to_s==(string_val.to_s+".0")), + string_val.to_s+" not a float (!="+string_val.to_f.to_s+")") + assert string_val.to_f>=min if min!=nil + assert string_val.to_f<=max if max!=nil + end + def test_nothing #puts "testing nothing" #get '/' + #get '/crossvalidation/loo' + #get '/training_test_split' + #get '/prepare_examples' #get '/test_examples' @@ -197,9 +275,13 @@ class ValidationTest < Test::Unit::TestCase #get '/crossvalidation/1',nil,'HTTP_ACCEPT' => "application/rdf+xml" - get '/crossvalidation/1/statistics',:bla=>blub,'HTTP_ACCEPT' => "text/x-yaml" + get '/crossvalidation/1/statistics',nil,'HTTP_ACCEPT' => "text/x-yaml" + + #puts last_response.body + + #get '/2' + verify_validation(last_response.body) - puts last_response.body end # private |