fixed: percent float instead of int, sum of number counts for cv

author: Martin Gütlein <martin.guetlein@gmail.com> 2010-03-08 17:01:23 +0100
committer: Martin Gütlein <martin.guetlein@gmail.com> 2010-03-08 17:01:23 +0100
commit: 9c41e91c6a6067d8b254e0ef5da66c752fabdb4d (patch)
tree: 74d4e9702bed59f2d6d5b3fd035e88020dba9f79
parent: e93ada015dbe91cff5b72eb8628c4f52814e3bdb (diff)
11 files changed, 353 insertions, 224 deletions
diff --git a/lib/merge.rb b/lib/merge.rb
index f35198d..b42df1e 100644
--- a/lib/merge.rb
+++ b/lib/merge.rb
@@ -1,29 +1,5 @@
 
-$merge_count = {}
-
-class Array
-  def merge_array( merge_attributes, equal_attributes=nil )
-    return nil if self.size == nil
-    return self[0] if self.size==1
-      
-    m = self[0].merge_object(self[1], merge_attributes, equal_attributes)
-    (2..self.size-1).each do |i|
-      m = m.merge_object(self[i], merge_attributes, equal_attributes)
-    end
-    return m
-  end
-end
-
-class Object
-  
-  def merge_count()
-    $merge_count[self] = 1 if $merge_count[self]==nil
-    return $merge_count[self] 
-  end
-  
-  def set_merge_count(merge_count)
-    $merge_count[self] = merge_count
-  end
+module Lib
   
   def self.compute_variance( old_variance, n, new_mean, old_mean, new_value )
     # use revursiv formular for computing the variance
@@ -33,109 +9,219 @@ class Object
            (new_mean - old_mean)**2 +
            (n>1 ? (new_value - new_mean)**2/(n-1) : 0 )
   end
-    
-  def self.merge_value( value1, weight1, compute_variance, variance1, value2 )
-    
-    if value1.is_a?(Numeric) and value2.is_a?(Numeric)
-      value = (value1 * weight1 + value2) / (weight1 + 1).to_f;
-      if compute_variance
-        variance = compute_variance( variance1!=nil ? variance1 : 0, weight1+1, value, value1, value2 )
-      end
-    elsif value1.is_a?(Array) and value2.is_a?(Array)
-      raise "cannot merge arrays with unequal sizes" if !value2.is_a?(Array) || value1.size!=value2.size
-      value = []
-      variance = []
-      (0..value1.size-1).each do |i|
-        m = merge_value( value1[i], weight1, compute_variance, variance1==nil ? nil : variance1[i], value2[i] )
-        value[i] = m[:value]
-        variance[i] = m[:variance] if compute_variance
-      end
-    elsif value1.is_a?(Hash) and value2.is_a?(Hash)
-      value = {}
-      variance = {}
-      value1.keys.each do |k|
-        m = merge_value( value1[k], weight1, compute_variance, variance1==nil ? nil : variance1[k], value2[k] )
-        value[k] = m[:value]
-        variance[k] = m[:variance] if compute_variance
+  
+  module MergeObjects
+  
+    @@merge_count = {}
+    @@avg_attributes = {}
+    @@sum_attributes = {}
+    @@non_numeric_attributes = {}
+  
+    def self.merge_array_objects( array )
+      return nil if array.size == nil
+      return array[0] if array.size==1
+        
+      m = self.merge_objects(array[0], array[1] )
+      (2..array.size-1).each do |i|
+        m = self.merge_objects(m, array[i] )
       end
-    else
-      if value1.to_s != value2.to_s
-        value = value1.to_s + "/" + value2.to_s
-      else
-        value = value2.to_s
+      return m
+    end
+    
+    def self.merge_objects( object1, object2 )
+      
+      raise "classes not equal" if object1.class != object2.class
+      object_class = object1.class
+      raise "register which attributes to merge first, nothing found for class "+object_class.to_s unless merge_attributes_registered?(object_class)
+      raise "not supported, successivly add unmerged object to a merge object" if merge_count(object2)>1
+      
+      new_object = object_class.new
+      # actually instance_variables would be appropriate, but the datamanager creates objects dynamically
+      object1.public_methods.each do |method|
+        v = method.to_sym
+        if merge_attribute?(object_class, v)
+          old_variance = (avg_attribute?(object_class,v) and variance_attribute?(new_object,v)) ? object1.send(variance_symbol(v)) : nil
+          m = merge_value( object_class, v, object1.send(v), object2.send(v), merge_count(object1), old_variance  )
+          new_object.send("#{v.to_s}=".to_sym, m[:value])
+          new_object.send("#{v.to_s}_variance=".to_sym, m[:variance]) if (m[:variance] and variance_attribute?(new_object,v))
+        end
       end
+      set_merge_count(new_object,merge_count(object1)+1)
+      return new_object
+    end 
+     
+    def self.register_merge_attributes( object_class, avg_attributes, sum_attributes, non_numeric_attributes)
+      @@avg_attributes[object_class] = avg_attributes
+      @@sum_attributes[object_class] = sum_attributes
+      @@non_numeric_attributes[object_class] = non_numeric_attributes
     end
     
-    {:value => value, :variance => (compute_variance ? variance : nil) }
-  end 
-  
-  def merge_object( object, merge_attributes, equal_attributes=nil )
-
-    raise "classes not equal" if object.class != self.class
-    raise "not supported, successivly add unmerged object to a merge object" if object.merge_count>1
+    def self.merge_attributes_registered?( object_class )
+      [ @@avg_attributes, @@sum_attributes, @@non_numeric_attributes ].each{ |map| return false unless map.has_key?(object_class) }
+      return true
+    end
     
-    new_object = self.class.new
-    merge_attributes.each do |variable|
-      next if variable.to_s =~ /_variance$/
+    protected
+    def self.merge_value( object_class, attribute, value1, value2, weight1=1, variance1=nil )
       
-      if (equal_attributes and equal_attributes.index(variable) != nil)
-        new_object.send("#{variable.to_s}=".to_sym, send(variable))
+      variance = nil
+      
+      if (avg=avg_attribute?(object_class, attribute)) || sum_attribute?(object_class, attribute)
+        if (value1==nil and value2==nil )
+          #do nothing
+        elsif value1.is_a?(Numeric) and value2.is_a?(Numeric)
+          if avg
+            value = (value1 * weight1 + value2) / (weight1 + 1).to_f;
+            variance = Lib::compute_variance( variance1!=nil ? variance1 : 0, weight1+1, value, value1, value2 )
+          else
+            value = value1 + value2
+          end
+        elsif value1.is_a?(Array) and value2.is_a?(Array)
+          raise "cannot merge arrays with unequal sizes" if !value2.is_a?(Array) || value1.size!=value2.size
+          value = []
+          variance = [] if avg
+          (0..value1.size-1).each do |i|
+            if avg 
+              value << (value1[i] * weight1 + value2[i]) / (weight1 + 1).to_f;
+              variance << Lib::compute_variance( (variance1!=nil && variance1[i]!=nil) ? variance1[i] : 0, weight1+1, value[-1], value1[i], value2[i] )
+            else
+              value << value1[i] + value2[i]
+            end
+          end
+        elsif value1.is_a?(Hash) and value2.is_a?(Hash)
+          value = {}
+          variance = {} if avg
+          value1.keys.each do |k|
+            if avg 
+              value[k] = (value1[k] * weight1 + value2[k]) / (weight1 + 1).to_f;
+              variance[k] = Lib::compute_variance( (variance1!=nil && variance1[k]!=nil) ? variance1[k] : 0, weight1+1, value[k], value1[k], value2[k] )
+            else
+              value[k] = value1[k] + value2[k]
+            end
+          end        
+        else
+          raise "invalid, cannot avg/sum non-numeric content for attribute: "+attribute.to_s+" contents: '"+value1.to_s+"', '"+value2.to_s+"'"
+        end
+      elsif non_numeric_attribute?(object_class, attribute)
+        if (value1.is_a?(Hash) and value2.is_a?(Hash))
+          value = {}
+          value1.keys.each do |k|
+            if merge_attribute?(object_class, k)
+              m = merge_value( object_class, k, value1[k], value2[k], weight1, (variance1!=nil ? variance1[k] : nil) )
+              value[k] = m[:value]
+              value[variance_symbol(k)] = m[:variance] if m[:variance] 
+            end
+          end
+        elsif value1.is_a?(Array)
+          raise "non-numerical arrays not yet supported"
+        else
+          if value1.to_s != value2.to_s
+            value = value1.to_s + "/" + value2.to_s
+          else
+            value = value2.to_s
+          end
+        end
       else
-        compute_variance = self.respond_to?( (variable.to_s+"_variance").to_sym ) #VAL_ATTR_VARIANCE.index(a)!=nil
-        old_variance = compute_variance ? send((variable.to_s+"_variance").to_sym) : nil 
-        m = Object::merge_value( send(variable), self.merge_count, compute_variance, old_variance, object.send(variable) )
-        new_object.send("#{variable.to_s}=".to_sym, m[:value])
-        new_object.send("#{variable.to_s}_variance=".to_sym, m[:variance]) if compute_variance
+        raise "invalid type '"+attribute.to_s+"'"
       end
+      {:value => value, :variance => variance }
+    end 
+    
+    def self.merge_count( object )
+      @@merge_count[object] = 1 if @@merge_count[object]==nil
+      return @@merge_count[object] 
     end
-
-    new_object.set_merge_count self.merge_count+1
-    return new_object
-  end 
-  
-end
-
-class MergeTest
-  
-  attr_accessor :string, :integer, :float, :hash_value, :float_variance 
- 
-  def to_s
-    res = [:string, :integer, :float, :hash_value].collect do |var|
-       variance = nil
-       variance = "+-"+send((var.to_s+"_variance")).inspect if self.respond_to?( (var.to_s+"_variance").to_sym )
-       var.to_s+":"+send(var).inspect+variance.to_s
+    
+    def self.set_merge_count(object, merge_count)
+      @@merge_count[object] = merge_count
+    end
+    
+    def self.avg_attribute?(object_class, attribute)
+      return @@avg_attributes[object_class].index(attribute) != nil
+    end
+    
+    def self.sum_attribute?(object_class, attribute)
+      return @@sum_attributes[object_class].index(attribute) != nil
+    end
+    
+    def self.non_numeric_attribute?(object_class, attribute)
+      return @@non_numeric_attributes[object_class].index(attribute) != nil
+    end
+    
+    def self.merge_attribute?(object_class, attribute)
+      return avg_attribute?(object_class, attribute)|| 
+        sum_attribute?(object_class, attribute) || 
+        non_numeric_attribute?(object_class,attribute)
+    end
+    
+    def self.variance_symbol(attribute)
+      return (attribute.to_s+"_variance").to_sym
+    end
+    
+    def self.variance_attribute?(object, attribute)
+      return false unless avg_attribute?(object.class, attribute)    
+      begin
+        return object.respond_to?( variance_symbol(attribute) )
+      rescue
+        return false
+      end
     end
-    res.join(" ")
   end
   
-  def self.demo
-    to_merge = []
-    p = MergeTest.new
-    p.string = "asdf"
-    p.integer = 39
-    p.float = 78.6
-    p.hash_value = {:mixed_key=>80, :string_key=>"tu", :int_key=>70}
-    to_merge << p
-    
-    p = MergeTest.new
-    p.string = "jkl"
-    p.integer = 25
-    p.float = 35.6
-    p.hash_value = {:mixed_key=>"bla", :string_key=>"iu", :int_key=>34}
-    to_merge << p
-    
-    p = MergeTest.new
-    p.string = "qwert"
-    p.integer = 100
-    p.float = 100
-    p.hash_value = {:mixed_key=>45, :string_key=>"op", :int_key=>20}
-    to_merge << p
-    
-    puts "merged: "+to_merge.merge_array([:string, :integer, :float, :hash_value]).to_s    
+  class MergeTest
+    
+    attr_accessor :string, :integer, :float, :hash_value, :float, :float_array, :float_variance, :float_array_variance 
+    
+    AVG = [:float, :float_array, :int_key ] 
+    SUM = [:integer ]
+    ELSE = [:string, :hash_value]
+    
+    def to_s
+      res = [:string, :integer, :float, :hash_value, :float_array].collect do |var|
+         variance = nil
+         begin
+            variance = "+-"+send((var.to_s+"_variance")).inspect if AVG.index(var)!=nil
+         rescue
+         end
+         var.to_s+":"+send(var).inspect+variance.to_s
+      end
+      res.join(" ")
+    end
+    
+    def self.demo
+      to_merge = []
+      p = MergeTest.new
+      p.string = "asdf"
+      p.integer = 39
+      p.float = 78.6
+      p.float_array = [1, 2]
+      p.hash_value = {:mixed_key=>80, :string_key=>"tu", :int_key=>70}
+      to_merge << p
+      
+      p = MergeTest.new
+      p.string = "jkl"
+      p.integer = 25
+      p.float = 35.6
+      p.float_array = [1, 3]
+      p.hash_value = {:mixed_key=>"bla", :string_key=>"iu", :int_key=>34}
+      to_merge << p
+      
+      p = MergeTest.new
+      p.string = "qwert"
+      p.integer = 100
+      p.float = 100
+      p.float_array = [2, 3]
+      p.hash_value = {:mixed_key=>45, :string_key=>"op", :int_key=>20}
+      to_merge << p
+      
+      puts "single:\n"+to_merge.collect{|t| t.to_s+"\n"}.to_s+"\n"
+      
+      MergeObjects.register_merge_attributes(to_merge[0].class, AVG, SUM, ELSE)
+      puts "merged:\n"+MergeObjects.merge_array_objects(to_merge).to_s    
+    end
+    
   end
-  
 end
 
-#MergeTest.demo
-
+#Lib::MergeTest.demo
 
diff --git a/lib/validation_db.rb b/lib/validation_db.rb
index 01607ce..8f4a540 100644
--- a/lib/validation_db.rb
+++ b/lib/validation_db.rb
@@ -3,33 +3,45 @@
   require lib
 end
 
+require "lib/merge.rb"
+
 module Lib
 
-  VAL_PROPS = [ :id, :uri, :model_uri, :training_dataset_uri, :prediction_feature,
-                :test_dataset_uri, :prediction_dataset_uri,  
-                :created_at, :real_runtime, # :cpu_runtime, 
-                :num_instances, :num_without_class, :percent_without_class, :num_unpredicted, :percent_unpredicted ] 
+  VAL_PROPS_GENERAL = [ :id, :uri, :model_uri, :training_dataset_uri, :prediction_feature,
+                :test_dataset_uri, :prediction_dataset_uri, :created_at ] 
+  VAL_PROPS_SUM = [ :num_instances, :num_without_class, :num_unpredicted ]
+  VAL_PROPS_AVG = [:real_runtime, :percent_without_class, :percent_unpredicted ]
+  VAL_PROPS = VAL_PROPS_GENERAL + VAL_PROPS_SUM + VAL_PROPS_AVG
   
   # :crossvalidation_info
   VAL_CV_PROPS = [ :crossvalidation_id, :crossvalidation_fold ]
   
   # :classification_statistics
-  VAL_CLASS_PROPS_SINGLE = [ :num_correct, :num_incorrect, :percent_correct, :percent_incorrect ]
+  VAL_CLASS_PROPS_SINGLE_SUM = [ :num_correct, :num_incorrect, :confusion_matrix  ]
+  VAL_CLASS_PROPS_SINGLE_AVG = [ :percent_correct, :percent_incorrect ]
+  VAL_CLASS_PROPS_SINGLE = VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_SINGLE_AVG
+  
   # :class_value_statistics
-  VAL_CLASS_PROPS_PER_CLASS = [ :area_under_roc, :false_negative_rate, :false_positive_rate,
-                                :f_measure, :num_false_positives, :num_false_negatives, 
-                                :num_true_positives, :num_true_negatives, :precision, 
+  VAL_CLASS_PROPS_PER_CLASS_SUM = [ :num_false_positives, :num_false_negatives, 
+                                :num_true_positives, :num_true_negatives ]
+  VAL_CLASS_PROPS_PER_CLASS_AVG = [ :area_under_roc, :false_negative_rate, :false_positive_rate,
+                                :f_measure, :precision, 
                                 :recall, :true_negative_rate, :true_positive_rate ]
-  VAL_CLASS_PROPS = VAL_CLASS_PROPS_SINGLE + VAL_CLASS_PROPS_PER_CLASS + [ :confusion_matrix ]
+  VAL_CLASS_PROPS_PER_CLASS = VAL_CLASS_PROPS_PER_CLASS_SUM + VAL_CLASS_PROPS_PER_CLASS_AVG
+                                
+  VAL_CLASS_PROPS = VAL_CLASS_PROPS_SINGLE + VAL_CLASS_PROPS_PER_CLASS
   VAL_CLASS_PROPS_EXTENDED = VAL_CLASS_PROPS + [:accuracy]
 
   # :regression_statistics
   VAL_REGR_PROPS = [ :root_mean_squared_error, :mean_absolute_error, :r_square ]
-  
   CROSS_VAL_PROPS = [:algorithm_uri, :dataset_uri, :num_folds, :stratified, :random_seed]
   
   ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS_EXTENDED + VAL_REGR_PROPS + CROSS_VAL_PROPS
 
+  VAL_MERGE_GENERAL = VAL_PROPS_GENERAL + VAL_CV_PROPS + [:classification_statistics, :regression_statistics] + CROSS_VAL_PROPS
+  VAL_MERGE_SUM = VAL_PROPS_SUM + VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_PER_CLASS_SUM
+  VAL_MERGE_AVG = VAL_PROPS_AVG + VAL_CLASS_PROPS_SINGLE_AVG + VAL_CLASS_PROPS_PER_CLASS_AVG + VAL_REGR_PROPS
+ 
   class Validation
     include DataMapper::Resource 
   
@@ -45,9 +57,9 @@ module Lib
     
     property :num_instances, Integer
     property :num_without_class, Integer
-    property :percent_without_class, Integer
+    property :percent_without_class, Float
     property :num_unpredicted, Integer
-    property :percent_unpredicted, Integer
+    property :percent_unpredicted, Float
         
     property :classification_statistics, Object #Hash
     property :regression_statistics, Object
diff --git a/report/plot_factory.rb b/report/plot_factory.rb
index c1a731f..afe98de 100644
--- a/report/plot_factory.rb
+++ b/report/plot_factory.rb
@@ -109,10 +109,11 @@ module Reports
       svg_out_file ? show = "-o" : show = ""  
       (title and title.length > 0) ? tit = '-t "'+title+'"' : tit = ""  
       #title = "-t \""+ranking_value_prop+"-Ranking ("+comparables.size.to_s+" "+comparable_prop+"s, "+num_groups.to_s+" "+ranking_group_prop+"s, p < "+p.to_s+")\" "
-        
+      
       cmd = "java -jar "+ENV['RANK_PLOTTER_JAR']+" "+tit+" -c '"+
         comparables_array.join(",")+"' -r '"+ranks_array.join(",")+"' "+conf+" "+show #+" > /home/martin/tmp/test.svg" 
       #puts "\nplotting: "+cmd
+      LOGGER.debug "Plotting ranks: "+cmd.to_s
       
       res = ""
       IO.popen(cmd) do |f|
diff --git a/report/report_factory.rb b/report/report_factory.rb
index a522901..7484eb1 100644
--- a/report/report_factory.rb
+++ b/report/report_factory.rb
@@ -118,7 +118,7 @@ module Reports::ReportFactory
         Reports::Util.check_group_matching(dataset_grouping, [:algorithm_uri])
         
         #merged = validation_set.merge([:algorithm_uri, :dataset_uri])
-        report = Reports::ReportContent.new("Algorithm comparison report Many datasets")
+        report = Reports::ReportContent.new("Algorithm comparison report - Many datasets")
         
         if (validation_set.first.classification?)
           report.add_section_result(validation_set,[:algorithm_uri, :test_dataset_uri]+VAL_ATTR_CLASS,"Mean Results","Mean Results")
diff --git a/report/report_test.rb b/report/report_test.rb
index a75dd76..3960856 100644
--- a/report/report_test.rb
+++ b/report/report_test.rb
@@ -22,9 +22,14 @@ class Reports::ApplicationTest < Test::Unit::TestCase
     #get uri
     
     #get '/report/validation/1',nil,'HTTP_ACCEPT' => "text/html"     
-    post '/report/validation/1/format_html',:css_style_sheet=>"http://apps.ideaconsult.net:8180/ToxPredict/style/global.css"
+    #post '/report/validation/1/format_html',:css_style_sheet=>"http://apps.ideaconsult.net:8180/ToxPredict/style/global.css"
+    
+    post 'http://ot.validation.de/report/crossvalidation',:validation_uris=>"http://ot.validation.de/crossvalidation/1"
+    uri = last_response.body.to_s
+    
+    #post uri.to_s+'/format_html',:css_style_sheet=>"http://apps.ideaconsult.net:8180/ToxPredict/style/global.css"
+    #puts last_response.body.to_s.gsub(/\n.*/,"")
     
-    puts last_response.body.to_s.gsub(/\n.*/,"")
   end
 #
 #  def test_webservice
diff --git a/report/validation_data.rb b/report/validation_data.rb
index a2b8905..c164674 100644
--- a/report/validation_data.rb
+++ b/report/validation_data.rb
@@ -48,7 +48,7 @@ module Reports
   class Validation
     
     @@validation_access = Reports::ValidationDB.new
-
+    
     # for overwriting validation source (other than using webservices)
     def self.reset_validation_access(validation_access)
       @@validation_access = validation_access
@@ -65,11 +65,10 @@ module Reports
       VAL_ATTR_RANKING.collect{ |a| (a.to_s+"_ranking").to_sym }
     @@validation_attributes.each{ |a| attr_accessor a } 
   
-    attr_reader :predictions, :merge_count
+    attr_reader :predictions
     
     def initialize(uri = nil)
       @@validation_access.init_validation(self, uri) if uri
-      @merge_count = 1
     end
   
     # returns/creates predictions, cache to save rest-calls/computation time
@@ -109,86 +108,8 @@ module Reports
     def clone_validation
       new_val = clone
       VAL_ATTR_VARIANCE.each { |a| new_val.send((a.to_s+"_variance=").to_sym,nil) }
-      new_val.set_merge_count(1)
       return new_val
     end
-    
-    # merges this validation and another validation object to a new validation object
-    # * v1.att = "a", v2.att = "a" => r.att = "a"
-    # * v1.att = "a", v2.att = "b" => r.att = "a / b"
-    # * v1.att = "1", v2.att = "2" => r.att = "1.5"
-    # * the attributes in __equal_attributes__ are assumed to be equal
-    #
-    # call-seq:
-    #   merge( validation, equal_attributes) => Reports::Validation
-    # 
-    def merge_validation( validation, equal_attributes )
-  
-      new_validation = Reports::Validation.new
-      # validation cannot be merged before 
-      raise "not working" if validation.merge_count > 1
-
-      @@validation_attributes.each do |a|
-        next if a.to_s =~ /_variance$/
-      
-        if (equal_attributes.index(a) != nil)
-          new_validation.send("#{a.to_s}=".to_sym, send(a))
-        else
-          
-          compute_variance = VAL_ATTR_VARIANCE.index(a)!=nil
-          old_variance = compute_variance ? send((a.to_s+"_variance").to_sym) : nil 
-          m = Validation::merge_value( send(a), @merge_count, compute_variance, old_variance, validation.send(a) )
-          
-          new_validation.send("#{a.to_s}=".to_sym, m[:value])
-          new_validation.send("#{a.to_s+"_variance"}=".to_sym, m[:variance]) if compute_variance
-        end
-      end
-  
-      new_validation.set_merge_count(@merge_count + 1);
-      return new_validation
-    end  
-    
-    def merge_count
-      @merge_count
-    end
-    
-    protected
-    def set_merge_count(c)
-      @merge_count = c
-    end
-    
-    # merges to values (value1 and value2), value1 has weight weight1, value2 has weight 1,
-    # computes variance if corresponding params are set
-    #
-    # return hash with merge value (:value) and :variance (if necessary)
-    # 
-    def self.merge_value( value1, weight1, compute_variance, variance1, value2 )
-      
-      if (value1.is_a?(Numeric))
-        value = (value1 * weight1 + value2) / (weight1 + 1).to_f;
-        if compute_variance
-          variance = Lib::Util::compute_variance( variance1!=nil ? variance1 : 0, weight1+1, value, value1, value2 )
-        end
-      elsif value1.is_a?(Array)
-        raise "not yet implemented : merging arrays"
-      elsif value1.is_a?(Hash)
-        value = {}
-        variance = {}
-        value1.keys.each do |k|
-          m = merge_value( value1[k], weight1, compute_variance, variance1==nil ? nil : variance1[k], value2[k] )
-          value[k] = m[:value]
-          variance[k] = m[:variance] if compute_variance
-        end
-      else
-        if value1.to_s != value2.to_s
-          value = value1.to_s + "/" + value2.to_s
-        else
-          value = value2.to_s
-        end
-      end
-      
-      {:value => value, :variance => (compute_variance ? variance : nil) }
-    end    
   end
   
   # = Reports:ValidationSet
@@ -327,11 +248,15 @@ module Reports
       #compute grouping
       grouping = Reports::Util.group(@validations, equal_attributes)
   
+      Lib::MergeObjects.register_merge_attributes( Reports::Validation,
+        Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL) unless 
+          Lib::MergeObjects.merge_attributes_registered?(Reports::Validation)
+  
       #merge
       grouping.each do |g|
         new_set.validations.push(g[0].clone_validation)
         g[1..-1].each do |v|
-          new_set.validations[-1] = new_set.validations[-1].merge_validation(v, equal_attributes)
+          new_set.validations[-1] = Lib::MergeObjects.merge_objects(new_set.validations[-1],v)
         end
       end
       
@@ -419,4 +344,4 @@ module Reports
     
   end
   
-end 
-\ No newline at end of file
+end 
diff --git a/report/xml_report.rb b/report/xml_report.rb
index be66851..4b62457 100644
--- a/report/xml_report.rb
+++ b/report/xml_report.rb
@@ -1,6 +1,5 @@
 
 require 'rexml/document'
-include REXML
 
 ENV['REPORT_DTD'] = "docbook-xml-4.5/docbookx.dtd" unless ENV['REPORT_DTD']
 #transfer to absolute path
@@ -13,6 +12,7 @@ ENV['REPORT_DTD'] = File.expand_path(ENV['REPORT_DTD']) if File.exist?(ENV['REPO
 # uses Env-Variable _XMLREPORT_DTD_ to specifiy the dtd
 #  
 class Reports::XMLReport
+  include REXML
   
   # create new xmlreport
   def initialize(title, pubdate=nil, author_firstname = nil, author_surname = nil)
diff --git a/report/xml_report_util.rb b/report/xml_report_util.rb
index 00ff608..d047421 100644
--- a/report/xml_report_util.rb
+++ b/report/xml_report_util.rb
@@ -4,6 +4,7 @@
 # Utilities for XMLReport
 #
 module Reports::XMLReportUtil
+  include REXML
   
   # creates a confusion matrix as array (to be used as input for Reports::XMLReport::add_table)
   # input is confusion matrix as returned by Lib::Predictions.confusion_matrix
diff --git a/validation/validation_application.rb b/validation/validation_application.rb
index eb3e4a4..eb9574d 100644
--- a/validation/validation_application.rb
+++ b/validation/validation_application.rb
@@ -33,6 +33,14 @@ get '/crossvalidation/?' do
   Validation::Crossvalidation.all.collect{ |d| url_for("/crossvalidation/", :full) + d.id.to_s }.join("\n")
 end
 
+post '/crossvalidation/loo/?' do
+  halt 500, "not yet implemented"
+end
+
+get '/crossvalidation/loo/?' do
+  halt 400, "GET operation not supported, use POST for performing a loo-crossvalidation, see "+url_for("/crossvalidation", :full)+" for crossvalidation results"
+end
+
 get '/crossvalidation/:id' do
   LOGGER.info "get crossvalidation with id "+params[:id].to_s
   halt 404, "Crossvalidation #{params[:id]} not found." unless crossvalidation = Validation::Crossvalidation.get(params[:id])
@@ -69,9 +77,11 @@ get '/crossvalidation/:id/statistics' do
   LOGGER.info "get merged validation-result for crossvalidation with id "+params[:id].to_s
   halt 404, "Crossvalidation #{params[:id]} not found." unless crossvalidation = Validation::Crossvalidation.get(params[:id])
   
-  to_merge = [:prediction_feature, :num_instances,:num_without_class,:percent_without_class,:num_unpredicted,:percent_unpredicted,
-    :classification_statistics,:regression_statistics,:crossvalidation_id]
-  v = Validation::Validation.all(:crossvalidation_id => params[:id]).merge_array(to_merge)
+  Lib::MergeObjects.register_merge_attributes( Validation::Validation,
+    Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL) unless 
+      Lib::MergeObjects.merge_attributes_registered?(Validation::Validation)
+  
+  v = Lib::MergeObjects.merge_array_objects( Validation::Validation.all(:crossvalidation_id => params[:id]) )
   v.uri = nil
   v.created_at = nil
   v.id = nil
@@ -97,6 +107,10 @@ post '/crossvalidation/?' do
   end
 end
 
+get '/training_test_split' do
+  halt 400, "GET operation not supported, use POST to perform a training_test_split, see "+url_for("/", :full)+" for validation results"
+end
+
 get '/?' do
   LOGGER.info "list all validations"
   content_type "text/uri-list"
diff --git a/validation/validation_format.rb b/validation/validation_format.rb
index ed5c7b1..d03dee4 100644
--- a/validation/validation_format.rb
+++ b/validation/validation_format.rb
@@ -30,6 +30,7 @@ module Validation
         # transpose results per class
         class_values = {}
         Lib::VAL_CLASS_PROPS_PER_CLASS.each do |p|
+          raise "missing classification statitstics: "+p.to_s+" "+classification_statistics.inspect unless classification_statistics[p]
           classification_statistics[p].each do |class_value, property_value|
             class_values[class_value] = {:class_value => class_value} unless class_values.has_key?(class_value)
             map = class_values[class_value]
@@ -40,6 +41,7 @@ module Validation
         
         #converting confusion matrix
         cells = []
+        raise "confusion matrix missing" unless classification_statistics[:confusion_matrix]!=nil
         classification_statistics[:confusion_matrix].each do |k,v|
           cell = {}
           # key in confusion matrix is map with predicted and actual attribute 
@@ -62,6 +64,7 @@ module Validation
     # build hash structure and return with to_yaml
     def to_yaml
       get_content_as_hash.to_yaml
+      #super.to_yaml
     end
     
     def rdf_title
diff --git a/validation/validation_test.rb b/validation/validation_test.rb
index b854f0c..3cecef5 100644
--- a/validation/validation_test.rb
+++ b/validation/validation_test.rb
@@ -134,13 +134,22 @@ class ValidationTest < Test::Unit::TestCase
 #      #model_uri = "http://ot.model.de/12"
 #      #data_uri_test = "http://ot.dataset.de/67"
 #      
-#      model_uri = "http://ot.model.de/9" 
-#      data_uri_test = "http://ot.dataset.de/33"
+#      model_uri = "http://ot.model.de/1" 
+#      data_uri_test = "http://ot.dataset.de/3"
 #      
 #      post '', {:test_dataset_uri => data_uri_test, :model_uri => model_uri, :prediction_feature => FEATURE_URI}
 #      
 #      puts last_response.body
 #      #verify_validation
+#      
+#      task = OpenTox::Task.find(last_response.body)
+#      task.wait_for_completion
+#      val_uri = task.resource
+#      puts val_uri
+#      
+#      get val_uri
+#      verify_validation(last_response.body)
+#
 #    ensure
 #      #delete_resources
 #    end
@@ -151,8 +160,9 @@ class ValidationTest < Test::Unit::TestCase
 #      
 #      #get '/41',nil,'HTTP_ACCEPT' => "application/rdf+xml" #"text/x-yaml"
 #      #puts last_response.body
-#      data_uri_train = upload_data(WS_DATA, FILE_TRAIN)
-#      data_uri_test = upload_data(WS_DATA, FILE_TEST)
+#      
+#      #data_uri_train = upload_data(WS_DATA, FILE_TRAIN)
+#      #data_uri_test = upload_data(WS_DATA, FILE_TEST)
 #      
 #      #data_uri_train = WS_DATA+"/"+DATA_TRAIN
 #      #data_uri_test = WS_DATA+"/"+DATA_TEST
@@ -177,18 +187,86 @@ class ValidationTest < Test::Unit::TestCase
 #      post '/training_test_split', { :dataset_uri => data_uri, :algorithm_uri => WS_CLASS_ALG, :prediction_feature => FEATURE_URI,
 #        :algorithm_params => "feature_generation_uri="+WS_FEATURE_ALG, :split_ratio=>0.75, :random_seed=>6}
 #      puts last_response.body
+#      
+#      task = OpenTox::Task.find(last_response.body)
+#      task.wait_for_completion
+#      val_uri = task.resource
+#      puts val_uri
+#            
+#      get val_uri
+#      puts last_response.body
 #      #verify_validation
 #    ensure
 #      #delete_resources
 #    end
 #  end
   
+  
+  def verify_validation(val_yaml)
+    
+    val = YAML.load(val_yaml)
+
+    puts val.inspect
+    assert_integer val["num_instances".to_sym],0,1000
+    num_instances = val["num_instances".to_sym].to_i
+    
+    assert_integer val["num_unpredicted".to_sym],0,num_instances
+    num_unpredicted = val["num_unpredicted".to_sym].to_i
+    assert_float val["percent_unpredicted".to_sym],0,100
+    assert_float_equal(val["percent_unpredicted".to_sym].to_f,100*num_unpredicted/num_instances.to_f,"percent_unpredicted")
+    
+    assert_integer val["num_without_class".to_sym],0,num_instances
+    num_without_class = val["num_without_class".to_sym].to_i
+    assert_float val["percent_without_class".to_sym],0,100
+    assert_float_equal(val["percent_without_class".to_sym].to_f,100*num_without_class/num_instances.to_f,"percent_without_class")
+    
+    class_stats = val["classification_statistics".to_sym]
+    class_value_stats = class_stats["class_value_statistics".to_sym]
+    class_values = []
+    class_value_stats.each do |cvs|
+      class_values << cvs["class_value".to_sym]
+    end
+    puts class_values.inspect
+    
+    confusion_matrix = class_stats["confusion_matrix".to_sym]
+    confusion_matrix_cells = confusion_matrix["confusion_matrix_cell".to_sym]
+    predictions = 0
+    confusion_matrix_cells.each do |confusion_matrix_cell|
+      predictions += confusion_matrix_cell["confusion_matrix_value".to_sym].to_i
+    end
+    assert_int_equal(predictions, num_instances-num_unpredicted)
+  end
+  
+  def assert_int_equal(val1,val2,msg_suffix=nil)
+    assert(val1==val2,msg_suffix.to_s+" not equal: "+val1.to_s+" != "+val2.to_s)
+  end
+  
+  def assert_float_equal(val1,val2,msg_suffix=nil,epsilon=0.0001)
+    assert((val1-val2).abs<epsilon,msg_suffix.to_s+" not equal: "+val1.to_s+" != "+val2.to_s+", diff:"+(val1-val2).abs.to_s)
+  end
+  
+  def assert_integer(string_val, min=nil, max=nil)
+    assert string_val.to_i.to_s==string_val.to_s, string_val.to_s+" not an integer"
+    assert string_val.to_i>=min if min!=nil
+    assert string_val.to_i<=max if max!=nil
+  end
+  
+  def assert_float(string_val, min=nil, max=nil)
+    assert( string_val.to_f.to_s==string_val.to_s || (string_val.to_f.to_s==(string_val.to_s+".0")),
+      string_val.to_s+" not a float (!="+string_val.to_f.to_s+")")
+    assert string_val.to_f>=min if min!=nil
+    assert string_val.to_f<=max if max!=nil
+  end
+  
   def test_nothing
     
     #puts "testing nothing"
     
     #get '/'     
 
+    #get '/crossvalidation/loo'
+    #get '/training_test_split'
+
     #get '/prepare_examples'
     #get '/test_examples'
 
@@ -197,9 +275,13 @@ class ValidationTest < Test::Unit::TestCase
 
     
     #get '/crossvalidation/1',nil,'HTTP_ACCEPT' => "application/rdf+xml"
-    get '/crossvalidation/1/statistics',:bla=>blub,'HTTP_ACCEPT' => "text/x-yaml"
+    get '/crossvalidation/1/statistics',nil,'HTTP_ACCEPT' => "text/x-yaml"
+    
+    #puts last_response.body
+    
+    #get '/2'
+    verify_validation(last_response.body)
     
-    puts last_response.body
   end
   
 #  private
author	Martin Gütlein <martin.guetlein@gmail.com>	2010-03-08 17:01:23 +0100
committer	Martin Gütlein <martin.guetlein@gmail.com>	2010-03-08 17:01:23 +0100
commit	9c41e91c6a6067d8b254e0ef5da66c752fabdb4d (patch)
tree	74d4e9702bed59f2d6d5b3fd035e88020dba9f79
parent	e93ada015dbe91cff5b72eb8628c4f52814e3bdb (diff)