summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Gütlein <martin.guetlein@gmail.com>2010-03-08 17:01:23 +0100
committerMartin Gütlein <martin.guetlein@gmail.com>2010-03-08 17:01:23 +0100
commit9c41e91c6a6067d8b254e0ef5da66c752fabdb4d (patch)
tree74d4e9702bed59f2d6d5b3fd035e88020dba9f79
parente93ada015dbe91cff5b72eb8628c4f52814e3bdb (diff)
fixed: percent float instead of int, sum of number counts for cv
-rw-r--r--lib/merge.rb316
-rw-r--r--lib/validation_db.rb36
-rw-r--r--report/plot_factory.rb3
-rw-r--r--report/report_factory.rb2
-rw-r--r--report/report_test.rb9
-rw-r--r--report/validation_data.rb91
-rw-r--r--report/xml_report.rb2
-rw-r--r--report/xml_report_util.rb1
-rw-r--r--validation/validation_application.rb20
-rw-r--r--validation/validation_format.rb3
-rw-r--r--validation/validation_test.rb94
11 files changed, 353 insertions, 224 deletions
diff --git a/lib/merge.rb b/lib/merge.rb
index f35198d..b42df1e 100644
--- a/lib/merge.rb
+++ b/lib/merge.rb
@@ -1,29 +1,5 @@
-$merge_count = {}
-
-class Array
- def merge_array( merge_attributes, equal_attributes=nil )
- return nil if self.size == nil
- return self[0] if self.size==1
-
- m = self[0].merge_object(self[1], merge_attributes, equal_attributes)
- (2..self.size-1).each do |i|
- m = m.merge_object(self[i], merge_attributes, equal_attributes)
- end
- return m
- end
-end
-
-class Object
-
- def merge_count()
- $merge_count[self] = 1 if $merge_count[self]==nil
- return $merge_count[self]
- end
-
- def set_merge_count(merge_count)
- $merge_count[self] = merge_count
- end
+module Lib
def self.compute_variance( old_variance, n, new_mean, old_mean, new_value )
# use revursiv formular for computing the variance
@@ -33,109 +9,219 @@ class Object
(new_mean - old_mean)**2 +
(n>1 ? (new_value - new_mean)**2/(n-1) : 0 )
end
-
- def self.merge_value( value1, weight1, compute_variance, variance1, value2 )
-
- if value1.is_a?(Numeric) and value2.is_a?(Numeric)
- value = (value1 * weight1 + value2) / (weight1 + 1).to_f;
- if compute_variance
- variance = compute_variance( variance1!=nil ? variance1 : 0, weight1+1, value, value1, value2 )
- end
- elsif value1.is_a?(Array) and value2.is_a?(Array)
- raise "cannot merge arrays with unequal sizes" if !value2.is_a?(Array) || value1.size!=value2.size
- value = []
- variance = []
- (0..value1.size-1).each do |i|
- m = merge_value( value1[i], weight1, compute_variance, variance1==nil ? nil : variance1[i], value2[i] )
- value[i] = m[:value]
- variance[i] = m[:variance] if compute_variance
- end
- elsif value1.is_a?(Hash) and value2.is_a?(Hash)
- value = {}
- variance = {}
- value1.keys.each do |k|
- m = merge_value( value1[k], weight1, compute_variance, variance1==nil ? nil : variance1[k], value2[k] )
- value[k] = m[:value]
- variance[k] = m[:variance] if compute_variance
+
+ module MergeObjects
+
+ @@merge_count = {}
+ @@avg_attributes = {}
+ @@sum_attributes = {}
+ @@non_numeric_attributes = {}
+
+ def self.merge_array_objects( array )
+ return nil if array.size == nil
+ return array[0] if array.size==1
+
+ m = self.merge_objects(array[0], array[1] )
+ (2..array.size-1).each do |i|
+ m = self.merge_objects(m, array[i] )
end
- else
- if value1.to_s != value2.to_s
- value = value1.to_s + "/" + value2.to_s
- else
- value = value2.to_s
+ return m
+ end
+
+ def self.merge_objects( object1, object2 )
+
+ raise "classes not equal" if object1.class != object2.class
+ object_class = object1.class
+ raise "register which attributes to merge first, nothing found for class "+object_class.to_s unless merge_attributes_registered?(object_class)
+ raise "not supported, successivly add unmerged object to a merge object" if merge_count(object2)>1
+
+ new_object = object_class.new
+ # actually instance_variables would be appropriate, but the datamanager creates objects dynamically
+ object1.public_methods.each do |method|
+ v = method.to_sym
+ if merge_attribute?(object_class, v)
+ old_variance = (avg_attribute?(object_class,v) and variance_attribute?(new_object,v)) ? object1.send(variance_symbol(v)) : nil
+ m = merge_value( object_class, v, object1.send(v), object2.send(v), merge_count(object1), old_variance )
+ new_object.send("#{v.to_s}=".to_sym, m[:value])
+ new_object.send("#{v.to_s}_variance=".to_sym, m[:variance]) if (m[:variance] and variance_attribute?(new_object,v))
+ end
end
+ set_merge_count(new_object,merge_count(object1)+1)
+ return new_object
+ end
+
+ def self.register_merge_attributes( object_class, avg_attributes, sum_attributes, non_numeric_attributes)
+ @@avg_attributes[object_class] = avg_attributes
+ @@sum_attributes[object_class] = sum_attributes
+ @@non_numeric_attributes[object_class] = non_numeric_attributes
end
- {:value => value, :variance => (compute_variance ? variance : nil) }
- end
-
- def merge_object( object, merge_attributes, equal_attributes=nil )
-
- raise "classes not equal" if object.class != self.class
- raise "not supported, successivly add unmerged object to a merge object" if object.merge_count>1
+ def self.merge_attributes_registered?( object_class )
+ [ @@avg_attributes, @@sum_attributes, @@non_numeric_attributes ].each{ |map| return false unless map.has_key?(object_class) }
+ return true
+ end
- new_object = self.class.new
- merge_attributes.each do |variable|
- next if variable.to_s =~ /_variance$/
+ protected
+ def self.merge_value( object_class, attribute, value1, value2, weight1=1, variance1=nil )
- if (equal_attributes and equal_attributes.index(variable) != nil)
- new_object.send("#{variable.to_s}=".to_sym, send(variable))
+ variance = nil
+
+ if (avg=avg_attribute?(object_class, attribute)) || sum_attribute?(object_class, attribute)
+ if (value1==nil and value2==nil )
+ #do nothing
+ elsif value1.is_a?(Numeric) and value2.is_a?(Numeric)
+ if avg
+ value = (value1 * weight1 + value2) / (weight1 + 1).to_f;
+ variance = Lib::compute_variance( variance1!=nil ? variance1 : 0, weight1+1, value, value1, value2 )
+ else
+ value = value1 + value2
+ end
+ elsif value1.is_a?(Array) and value2.is_a?(Array)
+ raise "cannot merge arrays with unequal sizes" if !value2.is_a?(Array) || value1.size!=value2.size
+ value = []
+ variance = [] if avg
+ (0..value1.size-1).each do |i|
+ if avg
+ value << (value1[i] * weight1 + value2[i]) / (weight1 + 1).to_f;
+ variance << Lib::compute_variance( (variance1!=nil && variance1[i]!=nil) ? variance1[i] : 0, weight1+1, value[-1], value1[i], value2[i] )
+ else
+ value << value1[i] + value2[i]
+ end
+ end
+ elsif value1.is_a?(Hash) and value2.is_a?(Hash)
+ value = {}
+ variance = {} if avg
+ value1.keys.each do |k|
+ if avg
+ value[k] = (value1[k] * weight1 + value2[k]) / (weight1 + 1).to_f;
+ variance[k] = Lib::compute_variance( (variance1!=nil && variance1[k]!=nil) ? variance1[k] : 0, weight1+1, value[k], value1[k], value2[k] )
+ else
+ value[k] = value1[k] + value2[k]
+ end
+ end
+ else
+ raise "invalid, cannot avg/sum non-numeric content for attribute: "+attribute.to_s+" contents: '"+value1.to_s+"', '"+value2.to_s+"'"
+ end
+ elsif non_numeric_attribute?(object_class, attribute)
+ if (value1.is_a?(Hash) and value2.is_a?(Hash))
+ value = {}
+ value1.keys.each do |k|
+ if merge_attribute?(object_class, k)
+ m = merge_value( object_class, k, value1[k], value2[k], weight1, (variance1!=nil ? variance1[k] : nil) )
+ value[k] = m[:value]
+ value[variance_symbol(k)] = m[:variance] if m[:variance]
+ end
+ end
+ elsif value1.is_a?(Array)
+ raise "non-numerical arrays not yet supported"
+ else
+ if value1.to_s != value2.to_s
+ value = value1.to_s + "/" + value2.to_s
+ else
+ value = value2.to_s
+ end
+ end
else
- compute_variance = self.respond_to?( (variable.to_s+"_variance").to_sym ) #VAL_ATTR_VARIANCE.index(a)!=nil
- old_variance = compute_variance ? send((variable.to_s+"_variance").to_sym) : nil
- m = Object::merge_value( send(variable), self.merge_count, compute_variance, old_variance, object.send(variable) )
- new_object.send("#{variable.to_s}=".to_sym, m[:value])
- new_object.send("#{variable.to_s}_variance=".to_sym, m[:variance]) if compute_variance
+ raise "invalid type '"+attribute.to_s+"'"
end
+ {:value => value, :variance => variance }
+ end
+
+ def self.merge_count( object )
+ @@merge_count[object] = 1 if @@merge_count[object]==nil
+ return @@merge_count[object]
end
-
- new_object.set_merge_count self.merge_count+1
- return new_object
- end
-
-end
-
-class MergeTest
-
- attr_accessor :string, :integer, :float, :hash_value, :float_variance
-
- def to_s
- res = [:string, :integer, :float, :hash_value].collect do |var|
- variance = nil
- variance = "+-"+send((var.to_s+"_variance")).inspect if self.respond_to?( (var.to_s+"_variance").to_sym )
- var.to_s+":"+send(var).inspect+variance.to_s
+
+ def self.set_merge_count(object, merge_count)
+ @@merge_count[object] = merge_count
+ end
+
+ def self.avg_attribute?(object_class, attribute)
+ return @@avg_attributes[object_class].index(attribute) != nil
+ end
+
+ def self.sum_attribute?(object_class, attribute)
+ return @@sum_attributes[object_class].index(attribute) != nil
+ end
+
+ def self.non_numeric_attribute?(object_class, attribute)
+ return @@non_numeric_attributes[object_class].index(attribute) != nil
+ end
+
+ def self.merge_attribute?(object_class, attribute)
+ return avg_attribute?(object_class, attribute)||
+ sum_attribute?(object_class, attribute) ||
+ non_numeric_attribute?(object_class,attribute)
+ end
+
+ def self.variance_symbol(attribute)
+ return (attribute.to_s+"_variance").to_sym
+ end
+
+ def self.variance_attribute?(object, attribute)
+ return false unless avg_attribute?(object.class, attribute)
+ begin
+ return object.respond_to?( variance_symbol(attribute) )
+ rescue
+ return false
+ end
end
- res.join(" ")
end
- def self.demo
- to_merge = []
- p = MergeTest.new
- p.string = "asdf"
- p.integer = 39
- p.float = 78.6
- p.hash_value = {:mixed_key=>80, :string_key=>"tu", :int_key=>70}
- to_merge << p
-
- p = MergeTest.new
- p.string = "jkl"
- p.integer = 25
- p.float = 35.6
- p.hash_value = {:mixed_key=>"bla", :string_key=>"iu", :int_key=>34}
- to_merge << p
-
- p = MergeTest.new
- p.string = "qwert"
- p.integer = 100
- p.float = 100
- p.hash_value = {:mixed_key=>45, :string_key=>"op", :int_key=>20}
- to_merge << p
-
- puts "merged: "+to_merge.merge_array([:string, :integer, :float, :hash_value]).to_s
+ class MergeTest
+
+ attr_accessor :string, :integer, :float, :hash_value, :float, :float_array, :float_variance, :float_array_variance
+
+ AVG = [:float, :float_array, :int_key ]
+ SUM = [:integer ]
+ ELSE = [:string, :hash_value]
+
+ def to_s
+ res = [:string, :integer, :float, :hash_value, :float_array].collect do |var|
+ variance = nil
+ begin
+ variance = "+-"+send((var.to_s+"_variance")).inspect if AVG.index(var)!=nil
+ rescue
+ end
+ var.to_s+":"+send(var).inspect+variance.to_s
+ end
+ res.join(" ")
+ end
+
+ def self.demo
+ to_merge = []
+ p = MergeTest.new
+ p.string = "asdf"
+ p.integer = 39
+ p.float = 78.6
+ p.float_array = [1, 2]
+ p.hash_value = {:mixed_key=>80, :string_key=>"tu", :int_key=>70}
+ to_merge << p
+
+ p = MergeTest.new
+ p.string = "jkl"
+ p.integer = 25
+ p.float = 35.6
+ p.float_array = [1, 3]
+ p.hash_value = {:mixed_key=>"bla", :string_key=>"iu", :int_key=>34}
+ to_merge << p
+
+ p = MergeTest.new
+ p.string = "qwert"
+ p.integer = 100
+ p.float = 100
+ p.float_array = [2, 3]
+ p.hash_value = {:mixed_key=>45, :string_key=>"op", :int_key=>20}
+ to_merge << p
+
+ puts "single:\n"+to_merge.collect{|t| t.to_s+"\n"}.to_s+"\n"
+
+ MergeObjects.register_merge_attributes(to_merge[0].class, AVG, SUM, ELSE)
+ puts "merged:\n"+MergeObjects.merge_array_objects(to_merge).to_s
+ end
+
end
-
end
-#MergeTest.demo
-
+#Lib::MergeTest.demo
diff --git a/lib/validation_db.rb b/lib/validation_db.rb
index 01607ce..8f4a540 100644
--- a/lib/validation_db.rb
+++ b/lib/validation_db.rb
@@ -3,33 +3,45 @@
require lib
end
+require "lib/merge.rb"
+
module Lib
- VAL_PROPS = [ :id, :uri, :model_uri, :training_dataset_uri, :prediction_feature,
- :test_dataset_uri, :prediction_dataset_uri,
- :created_at, :real_runtime, # :cpu_runtime,
- :num_instances, :num_without_class, :percent_without_class, :num_unpredicted, :percent_unpredicted ]
+ VAL_PROPS_GENERAL = [ :id, :uri, :model_uri, :training_dataset_uri, :prediction_feature,
+ :test_dataset_uri, :prediction_dataset_uri, :created_at ]
+ VAL_PROPS_SUM = [ :num_instances, :num_without_class, :num_unpredicted ]
+ VAL_PROPS_AVG = [:real_runtime, :percent_without_class, :percent_unpredicted ]
+ VAL_PROPS = VAL_PROPS_GENERAL + VAL_PROPS_SUM + VAL_PROPS_AVG
# :crossvalidation_info
VAL_CV_PROPS = [ :crossvalidation_id, :crossvalidation_fold ]
# :classification_statistics
- VAL_CLASS_PROPS_SINGLE = [ :num_correct, :num_incorrect, :percent_correct, :percent_incorrect ]
+ VAL_CLASS_PROPS_SINGLE_SUM = [ :num_correct, :num_incorrect, :confusion_matrix ]
+ VAL_CLASS_PROPS_SINGLE_AVG = [ :percent_correct, :percent_incorrect ]
+ VAL_CLASS_PROPS_SINGLE = VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_SINGLE_AVG
+
# :class_value_statistics
- VAL_CLASS_PROPS_PER_CLASS = [ :area_under_roc, :false_negative_rate, :false_positive_rate,
- :f_measure, :num_false_positives, :num_false_negatives,
- :num_true_positives, :num_true_negatives, :precision,
+ VAL_CLASS_PROPS_PER_CLASS_SUM = [ :num_false_positives, :num_false_negatives,
+ :num_true_positives, :num_true_negatives ]
+ VAL_CLASS_PROPS_PER_CLASS_AVG = [ :area_under_roc, :false_negative_rate, :false_positive_rate,
+ :f_measure, :precision,
:recall, :true_negative_rate, :true_positive_rate ]
- VAL_CLASS_PROPS = VAL_CLASS_PROPS_SINGLE + VAL_CLASS_PROPS_PER_CLASS + [ :confusion_matrix ]
+ VAL_CLASS_PROPS_PER_CLASS = VAL_CLASS_PROPS_PER_CLASS_SUM + VAL_CLASS_PROPS_PER_CLASS_AVG
+
+ VAL_CLASS_PROPS = VAL_CLASS_PROPS_SINGLE + VAL_CLASS_PROPS_PER_CLASS
VAL_CLASS_PROPS_EXTENDED = VAL_CLASS_PROPS + [:accuracy]
# :regression_statistics
VAL_REGR_PROPS = [ :root_mean_squared_error, :mean_absolute_error, :r_square ]
-
CROSS_VAL_PROPS = [:algorithm_uri, :dataset_uri, :num_folds, :stratified, :random_seed]
ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS_EXTENDED + VAL_REGR_PROPS + CROSS_VAL_PROPS
+ VAL_MERGE_GENERAL = VAL_PROPS_GENERAL + VAL_CV_PROPS + [:classification_statistics, :regression_statistics] + CROSS_VAL_PROPS
+ VAL_MERGE_SUM = VAL_PROPS_SUM + VAL_CLASS_PROPS_SINGLE_SUM + VAL_CLASS_PROPS_PER_CLASS_SUM
+ VAL_MERGE_AVG = VAL_PROPS_AVG + VAL_CLASS_PROPS_SINGLE_AVG + VAL_CLASS_PROPS_PER_CLASS_AVG + VAL_REGR_PROPS
+
class Validation
include DataMapper::Resource
@@ -45,9 +57,9 @@ module Lib
property :num_instances, Integer
property :num_without_class, Integer
- property :percent_without_class, Integer
+ property :percent_without_class, Float
property :num_unpredicted, Integer
- property :percent_unpredicted, Integer
+ property :percent_unpredicted, Float
property :classification_statistics, Object #Hash
property :regression_statistics, Object
diff --git a/report/plot_factory.rb b/report/plot_factory.rb
index c1a731f..afe98de 100644
--- a/report/plot_factory.rb
+++ b/report/plot_factory.rb
@@ -109,10 +109,11 @@ module Reports
svg_out_file ? show = "-o" : show = ""
(title and title.length > 0) ? tit = '-t "'+title+'"' : tit = ""
#title = "-t \""+ranking_value_prop+"-Ranking ("+comparables.size.to_s+" "+comparable_prop+"s, "+num_groups.to_s+" "+ranking_group_prop+"s, p < "+p.to_s+")\" "
-
+
cmd = "java -jar "+ENV['RANK_PLOTTER_JAR']+" "+tit+" -c '"+
comparables_array.join(",")+"' -r '"+ranks_array.join(",")+"' "+conf+" "+show #+" > /home/martin/tmp/test.svg"
#puts "\nplotting: "+cmd
+ LOGGER.debug "Plotting ranks: "+cmd.to_s
res = ""
IO.popen(cmd) do |f|
diff --git a/report/report_factory.rb b/report/report_factory.rb
index a522901..7484eb1 100644
--- a/report/report_factory.rb
+++ b/report/report_factory.rb
@@ -118,7 +118,7 @@ module Reports::ReportFactory
Reports::Util.check_group_matching(dataset_grouping, [:algorithm_uri])
#merged = validation_set.merge([:algorithm_uri, :dataset_uri])
- report = Reports::ReportContent.new("Algorithm comparison report Many datasets")
+ report = Reports::ReportContent.new("Algorithm comparison report - Many datasets")
if (validation_set.first.classification?)
report.add_section_result(validation_set,[:algorithm_uri, :test_dataset_uri]+VAL_ATTR_CLASS,"Mean Results","Mean Results")
diff --git a/report/report_test.rb b/report/report_test.rb
index a75dd76..3960856 100644
--- a/report/report_test.rb
+++ b/report/report_test.rb
@@ -22,9 +22,14 @@ class Reports::ApplicationTest < Test::Unit::TestCase
#get uri
#get '/report/validation/1',nil,'HTTP_ACCEPT' => "text/html"
- post '/report/validation/1/format_html',:css_style_sheet=>"http://apps.ideaconsult.net:8180/ToxPredict/style/global.css"
+ #post '/report/validation/1/format_html',:css_style_sheet=>"http://apps.ideaconsult.net:8180/ToxPredict/style/global.css"
+
+ post 'http://ot.validation.de/report/crossvalidation',:validation_uris=>"http://ot.validation.de/crossvalidation/1"
+ uri = last_response.body.to_s
+
+ #post uri.to_s+'/format_html',:css_style_sheet=>"http://apps.ideaconsult.net:8180/ToxPredict/style/global.css"
+ #puts last_response.body.to_s.gsub(/\n.*/,"")
- puts last_response.body.to_s.gsub(/\n.*/,"")
end
#
# def test_webservice
diff --git a/report/validation_data.rb b/report/validation_data.rb
index a2b8905..c164674 100644
--- a/report/validation_data.rb
+++ b/report/validation_data.rb
@@ -48,7 +48,7 @@ module Reports
class Validation
@@validation_access = Reports::ValidationDB.new
-
+
# for overwriting validation source (other than using webservices)
def self.reset_validation_access(validation_access)
@@validation_access = validation_access
@@ -65,11 +65,10 @@ module Reports
VAL_ATTR_RANKING.collect{ |a| (a.to_s+"_ranking").to_sym }
@@validation_attributes.each{ |a| attr_accessor a }
- attr_reader :predictions, :merge_count
+ attr_reader :predictions
def initialize(uri = nil)
@@validation_access.init_validation(self, uri) if uri
- @merge_count = 1
end
# returns/creates predictions, cache to save rest-calls/computation time
@@ -109,86 +108,8 @@ module Reports
def clone_validation
new_val = clone
VAL_ATTR_VARIANCE.each { |a| new_val.send((a.to_s+"_variance=").to_sym,nil) }
- new_val.set_merge_count(1)
return new_val
end
-
- # merges this validation and another validation object to a new validation object
- # * v1.att = "a", v2.att = "a" => r.att = "a"
- # * v1.att = "a", v2.att = "b" => r.att = "a / b"
- # * v1.att = "1", v2.att = "2" => r.att = "1.5"
- # * the attributes in __equal_attributes__ are assumed to be equal
- #
- # call-seq:
- # merge( validation, equal_attributes) => Reports::Validation
- #
- def merge_validation( validation, equal_attributes )
-
- new_validation = Reports::Validation.new
- # validation cannot be merged before
- raise "not working" if validation.merge_count > 1
-
- @@validation_attributes.each do |a|
- next if a.to_s =~ /_variance$/
-
- if (equal_attributes.index(a) != nil)
- new_validation.send("#{a.to_s}=".to_sym, send(a))
- else
-
- compute_variance = VAL_ATTR_VARIANCE.index(a)!=nil
- old_variance = compute_variance ? send((a.to_s+"_variance").to_sym) : nil
- m = Validation::merge_value( send(a), @merge_count, compute_variance, old_variance, validation.send(a) )
-
- new_validation.send("#{a.to_s}=".to_sym, m[:value])
- new_validation.send("#{a.to_s+"_variance"}=".to_sym, m[:variance]) if compute_variance
- end
- end
-
- new_validation.set_merge_count(@merge_count + 1);
- return new_validation
- end
-
- def merge_count
- @merge_count
- end
-
- protected
- def set_merge_count(c)
- @merge_count = c
- end
-
- # merges to values (value1 and value2), value1 has weight weight1, value2 has weight 1,
- # computes variance if corresponding params are set
- #
- # return hash with merge value (:value) and :variance (if necessary)
- #
- def self.merge_value( value1, weight1, compute_variance, variance1, value2 )
-
- if (value1.is_a?(Numeric))
- value = (value1 * weight1 + value2) / (weight1 + 1).to_f;
- if compute_variance
- variance = Lib::Util::compute_variance( variance1!=nil ? variance1 : 0, weight1+1, value, value1, value2 )
- end
- elsif value1.is_a?(Array)
- raise "not yet implemented : merging arrays"
- elsif value1.is_a?(Hash)
- value = {}
- variance = {}
- value1.keys.each do |k|
- m = merge_value( value1[k], weight1, compute_variance, variance1==nil ? nil : variance1[k], value2[k] )
- value[k] = m[:value]
- variance[k] = m[:variance] if compute_variance
- end
- else
- if value1.to_s != value2.to_s
- value = value1.to_s + "/" + value2.to_s
- else
- value = value2.to_s
- end
- end
-
- {:value => value, :variance => (compute_variance ? variance : nil) }
- end
end
# = Reports:ValidationSet
@@ -327,11 +248,15 @@ module Reports
#compute grouping
grouping = Reports::Util.group(@validations, equal_attributes)
+ Lib::MergeObjects.register_merge_attributes( Reports::Validation,
+ Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL) unless
+ Lib::MergeObjects.merge_attributes_registered?(Reports::Validation)
+
#merge
grouping.each do |g|
new_set.validations.push(g[0].clone_validation)
g[1..-1].each do |v|
- new_set.validations[-1] = new_set.validations[-1].merge_validation(v, equal_attributes)
+ new_set.validations[-1] = Lib::MergeObjects.merge_objects(new_set.validations[-1],v)
end
end
@@ -419,4 +344,4 @@ module Reports
end
-end \ No newline at end of file
+end
diff --git a/report/xml_report.rb b/report/xml_report.rb
index be66851..4b62457 100644
--- a/report/xml_report.rb
+++ b/report/xml_report.rb
@@ -1,6 +1,5 @@
require 'rexml/document'
-include REXML
ENV['REPORT_DTD'] = "docbook-xml-4.5/docbookx.dtd" unless ENV['REPORT_DTD']
#transfer to absolute path
@@ -13,6 +12,7 @@ ENV['REPORT_DTD'] = File.expand_path(ENV['REPORT_DTD']) if File.exist?(ENV['REPO
# uses Env-Variable _XMLREPORT_DTD_ to specifiy the dtd
#
class Reports::XMLReport
+ include REXML
# create new xmlreport
def initialize(title, pubdate=nil, author_firstname = nil, author_surname = nil)
diff --git a/report/xml_report_util.rb b/report/xml_report_util.rb
index 00ff608..d047421 100644
--- a/report/xml_report_util.rb
+++ b/report/xml_report_util.rb
@@ -4,6 +4,7 @@
# Utilities for XMLReport
#
module Reports::XMLReportUtil
+ include REXML
# creates a confusion matrix as array (to be used as input for Reports::XMLReport::add_table)
# input is confusion matrix as returned by Lib::Predictions.confusion_matrix
diff --git a/validation/validation_application.rb b/validation/validation_application.rb
index eb3e4a4..eb9574d 100644
--- a/validation/validation_application.rb
+++ b/validation/validation_application.rb
@@ -33,6 +33,14 @@ get '/crossvalidation/?' do
Validation::Crossvalidation.all.collect{ |d| url_for("/crossvalidation/", :full) + d.id.to_s }.join("\n")
end
+post '/crossvalidation/loo/?' do
+ halt 500, "not yet implemented"
+end
+
+get '/crossvalidation/loo/?' do
+ halt 400, "GET operation not supported, use POST for performing a loo-crossvalidation, see "+url_for("/crossvalidation", :full)+" for crossvalidation results"
+end
+
get '/crossvalidation/:id' do
LOGGER.info "get crossvalidation with id "+params[:id].to_s
halt 404, "Crossvalidation #{params[:id]} not found." unless crossvalidation = Validation::Crossvalidation.get(params[:id])
@@ -69,9 +77,11 @@ get '/crossvalidation/:id/statistics' do
LOGGER.info "get merged validation-result for crossvalidation with id "+params[:id].to_s
halt 404, "Crossvalidation #{params[:id]} not found." unless crossvalidation = Validation::Crossvalidation.get(params[:id])
- to_merge = [:prediction_feature, :num_instances,:num_without_class,:percent_without_class,:num_unpredicted,:percent_unpredicted,
- :classification_statistics,:regression_statistics,:crossvalidation_id]
- v = Validation::Validation.all(:crossvalidation_id => params[:id]).merge_array(to_merge)
+ Lib::MergeObjects.register_merge_attributes( Validation::Validation,
+ Lib::VAL_MERGE_AVG,Lib::VAL_MERGE_SUM,Lib::VAL_MERGE_GENERAL) unless
+ Lib::MergeObjects.merge_attributes_registered?(Validation::Validation)
+
+ v = Lib::MergeObjects.merge_array_objects( Validation::Validation.all(:crossvalidation_id => params[:id]) )
v.uri = nil
v.created_at = nil
v.id = nil
@@ -97,6 +107,10 @@ post '/crossvalidation/?' do
end
end
+get '/training_test_split' do
+ halt 400, "GET operation not supported, use POST to perform a training_test_split, see "+url_for("/", :full)+" for validation results"
+end
+
get '/?' do
LOGGER.info "list all validations"
content_type "text/uri-list"
diff --git a/validation/validation_format.rb b/validation/validation_format.rb
index ed5c7b1..d03dee4 100644
--- a/validation/validation_format.rb
+++ b/validation/validation_format.rb
@@ -30,6 +30,7 @@ module Validation
# transpose results per class
class_values = {}
Lib::VAL_CLASS_PROPS_PER_CLASS.each do |p|
+ raise "missing classification statitstics: "+p.to_s+" "+classification_statistics.inspect unless classification_statistics[p]
classification_statistics[p].each do |class_value, property_value|
class_values[class_value] = {:class_value => class_value} unless class_values.has_key?(class_value)
map = class_values[class_value]
@@ -40,6 +41,7 @@ module Validation
#converting confusion matrix
cells = []
+ raise "confusion matrix missing" unless classification_statistics[:confusion_matrix]!=nil
classification_statistics[:confusion_matrix].each do |k,v|
cell = {}
# key in confusion matrix is map with predicted and actual attribute
@@ -62,6 +64,7 @@ module Validation
# build hash structure and return with to_yaml
def to_yaml
get_content_as_hash.to_yaml
+ #super.to_yaml
end
def rdf_title
diff --git a/validation/validation_test.rb b/validation/validation_test.rb
index b854f0c..3cecef5 100644
--- a/validation/validation_test.rb
+++ b/validation/validation_test.rb
@@ -134,13 +134,22 @@ class ValidationTest < Test::Unit::TestCase
# #model_uri = "http://ot.model.de/12"
# #data_uri_test = "http://ot.dataset.de/67"
#
-# model_uri = "http://ot.model.de/9"
-# data_uri_test = "http://ot.dataset.de/33"
+# model_uri = "http://ot.model.de/1"
+# data_uri_test = "http://ot.dataset.de/3"
#
# post '', {:test_dataset_uri => data_uri_test, :model_uri => model_uri, :prediction_feature => FEATURE_URI}
#
# puts last_response.body
# #verify_validation
+#
+# task = OpenTox::Task.find(last_response.body)
+# task.wait_for_completion
+# val_uri = task.resource
+# puts val_uri
+#
+# get val_uri
+# verify_validation(last_response.body)
+#
# ensure
# #delete_resources
# end
@@ -151,8 +160,9 @@ class ValidationTest < Test::Unit::TestCase
#
# #get '/41',nil,'HTTP_ACCEPT' => "application/rdf+xml" #"text/x-yaml"
# #puts last_response.body
-# data_uri_train = upload_data(WS_DATA, FILE_TRAIN)
-# data_uri_test = upload_data(WS_DATA, FILE_TEST)
+#
+# #data_uri_train = upload_data(WS_DATA, FILE_TRAIN)
+# #data_uri_test = upload_data(WS_DATA, FILE_TEST)
#
# #data_uri_train = WS_DATA+"/"+DATA_TRAIN
# #data_uri_test = WS_DATA+"/"+DATA_TEST
@@ -177,18 +187,86 @@ class ValidationTest < Test::Unit::TestCase
# post '/training_test_split', { :dataset_uri => data_uri, :algorithm_uri => WS_CLASS_ALG, :prediction_feature => FEATURE_URI,
# :algorithm_params => "feature_generation_uri="+WS_FEATURE_ALG, :split_ratio=>0.75, :random_seed=>6}
# puts last_response.body
+#
+# task = OpenTox::Task.find(last_response.body)
+# task.wait_for_completion
+# val_uri = task.resource
+# puts val_uri
+#
+# get val_uri
+# puts last_response.body
# #verify_validation
# ensure
# #delete_resources
# end
# end
+
+ def verify_validation(val_yaml)
+
+ val = YAML.load(val_yaml)
+
+ puts val.inspect
+ assert_integer val["num_instances".to_sym],0,1000
+ num_instances = val["num_instances".to_sym].to_i
+
+ assert_integer val["num_unpredicted".to_sym],0,num_instances
+ num_unpredicted = val["num_unpredicted".to_sym].to_i
+ assert_float val["percent_unpredicted".to_sym],0,100
+ assert_float_equal(val["percent_unpredicted".to_sym].to_f,100*num_unpredicted/num_instances.to_f,"percent_unpredicted")
+
+ assert_integer val["num_without_class".to_sym],0,num_instances
+ num_without_class = val["num_without_class".to_sym].to_i
+ assert_float val["percent_without_class".to_sym],0,100
+ assert_float_equal(val["percent_without_class".to_sym].to_f,100*num_without_class/num_instances.to_f,"percent_without_class")
+
+ class_stats = val["classification_statistics".to_sym]
+ class_value_stats = class_stats["class_value_statistics".to_sym]
+ class_values = []
+ class_value_stats.each do |cvs|
+ class_values << cvs["class_value".to_sym]
+ end
+ puts class_values.inspect
+
+ confusion_matrix = class_stats["confusion_matrix".to_sym]
+ confusion_matrix_cells = confusion_matrix["confusion_matrix_cell".to_sym]
+ predictions = 0
+ confusion_matrix_cells.each do |confusion_matrix_cell|
+ predictions += confusion_matrix_cell["confusion_matrix_value".to_sym].to_i
+ end
+ assert_int_equal(predictions, num_instances-num_unpredicted)
+ end
+
+ def assert_int_equal(val1,val2,msg_suffix=nil)
+ assert(val1==val2,msg_suffix.to_s+" not equal: "+val1.to_s+" != "+val2.to_s)
+ end
+
+ def assert_float_equal(val1,val2,msg_suffix=nil,epsilon=0.0001)
+ assert((val1-val2).abs<epsilon,msg_suffix.to_s+" not equal: "+val1.to_s+" != "+val2.to_s+", diff:"+(val1-val2).abs.to_s)
+ end
+
+ def assert_integer(string_val, min=nil, max=nil)
+ assert string_val.to_i.to_s==string_val.to_s, string_val.to_s+" not an integer"
+ assert string_val.to_i>=min if min!=nil
+ assert string_val.to_i<=max if max!=nil
+ end
+
+ def assert_float(string_val, min=nil, max=nil)
+ assert( string_val.to_f.to_s==string_val.to_s || (string_val.to_f.to_s==(string_val.to_s+".0")),
+ string_val.to_s+" not a float (!="+string_val.to_f.to_s+")")
+ assert string_val.to_f>=min if min!=nil
+ assert string_val.to_f<=max if max!=nil
+ end
+
def test_nothing
#puts "testing nothing"
#get '/'
+ #get '/crossvalidation/loo'
+ #get '/training_test_split'
+
#get '/prepare_examples'
#get '/test_examples'
@@ -197,9 +275,13 @@ class ValidationTest < Test::Unit::TestCase
#get '/crossvalidation/1',nil,'HTTP_ACCEPT' => "application/rdf+xml"
- get '/crossvalidation/1/statistics',:bla=>blub,'HTTP_ACCEPT' => "text/x-yaml"
+ get '/crossvalidation/1/statistics',nil,'HTTP_ACCEPT' => "text/x-yaml"
+
+ #puts last_response.body
+
+ #get '/2'
+ verify_validation(last_response.body)
- puts last_response.body
end
# private