summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/format_util.rb32
-rw-r--r--lib/predictions.rb4
-rw-r--r--lib/rdf_provider.rb188
-rw-r--r--lib/validation_db.rb7
-rw-r--r--report/environment.rb2
-rw-r--r--report/report_persistance.rb46
-rw-r--r--report/report_service.rb2
-rw-r--r--test/test_examples_util.rb53
-rw-r--r--validation/validation_application.rb2
-rw-r--r--validation/validation_format.rb94
10 files changed, 136 insertions, 294 deletions
diff --git a/lib/format_util.rb b/lib/format_util.rb
new file mode 100644
index 0000000..abd83d6
--- /dev/null
+++ b/lib/format_util.rb
@@ -0,0 +1,32 @@
+
+
+class String
+
+ # :prediction_feature -> predictionFeature
+ # :test_dataset_uri -> testDataset
+ def to_rdf_format
+ s = gsub(/_uri(s|)$/,"")
+ s.gsub(/_./) do |m|
+ m.gsub!(/^_/,"")
+ m.upcase
+ end
+ end
+end
+
+class Hash
+
+ # applies to_rdf_format to all keys
+ def keys_to_rdf_format
+ res = {}
+ keys.each do |k|
+ v = self[k]
+ if v.is_a?(Hash)
+ v = v.keys_to_rdf_format
+ elsif v.is_a?(Array)
+ v = v.collect{ |vv| vv.is_a?(Hash) ? vv.keys_to_rdf_format : vv }
+ end
+ res[k.to_s.to_rdf_format] = v
+ end
+ return res
+ end
+end \ No newline at end of file
diff --git a/lib/predictions.rb b/lib/predictions.rb
index f6351f8..2873689 100644
--- a/lib/predictions.rb
+++ b/lib/predictions.rb
@@ -441,8 +441,8 @@ module Lib
def sample_correlation_coefficient
# formula see http://en.wikipedia.org/wiki/Correlation_and_dependence#Pearson.27s_product-moment_coefficient
return ( @num_predicted * @sum_multiply - @sum_actual * @sum_predicted ) /
- ( Math.sqrt( @num_predicted * @sum_squares_actual - @sum_actual**2 ) *
- Math.sqrt( @num_predicted * @sum_squares_predicted - @sum_predicted**2 ) )
+ ( Math.sqrt( [0, @num_predicted * @sum_squares_actual - @sum_actual**2].max ) *
+ Math.sqrt( [0, @num_predicted * @sum_squares_predicted - @sum_predicted**2].max ) )
end
def total_sum_of_squares
diff --git a/lib/rdf_provider.rb b/lib/rdf_provider.rb
deleted file mode 100644
index 7fa3ecc..0000000
--- a/lib/rdf_provider.rb
+++ /dev/null
@@ -1,188 +0,0 @@
-
-class String
- def convert_underscore
- gsub(/_./) do |m|
- m.gsub!(/^_/,"")
- m.upcase
- end
- end
-end
-
-module Lib
- module RDFProvider
-
- def to_rdf
- HashToOwl.to_rdf(self)
- end
-
- def uri
- raise "not implemented"
- end
-
- def rdf_title
- raise "not implemented"
- end
-
- # the rdf output is generated from the hash that is provided by this method
- # the keys in the hash structure are used to defined type of the resource (literal, objectProperty, dataProperty)
- # example: if the structure should contain a literal named "size" with value 5
- # * add :property_xy => 5 to your hash
- # * make sure literal?(:property_xy) returns true
- # * literal_name(:property_xy) must return "size"
- #
- def get_content_as_hash
- raise "not implemented"
- end
-
- def to_yaml
- get_content_as_hash.to_yaml
- end
-
- def rdf_ignore?( prop )
- self.class::IGNORE.index( prop ) != nil
- end
-
- def literal?( prop )
- self.class::LITERALS.index( prop ) != nil
- end
-
- def literal_name( prop )
- if self.class::LITERAL_NAMES.has_key?(prop)
- self.class::LITERAL_NAMES[prop]
- else
- OT[prop.to_s.convert_underscore]
- end
- end
-
- def object_property?( prop )
- self.class::OBJECT_PROPERTIES.has_key?( prop )
- end
-
- def object_property_name( prop )
- return self.class::OBJECT_PROPERTIES[ prop ]
- end
-
- def object_type( prop )
- return self.class::OBJECTS[ prop ]
- end
-
- def class?(prop)
- self.class::CLASSES.has_key?( prop )
- end
-
- def class_name( prop )
- return self.class::CLASSES[ prop ]
- end
-
- end
-
- class HashToOwl
- #include OpenTox::Owl
-
- def self.to_rdf( rdf_provider )
-
- owl = OpenTox::Owl.create(rdf_provider.rdf_title, rdf_provider.uri )
- toOwl = HashToOwl.new(owl)
- toOwl.add_content(rdf_provider)
- toOwl.rdf
- end
-
- def add_content( rdf_provider )
- @rdf_provider = rdf_provider
- recursiv_add_content( @rdf_provider.get_content_as_hash, @owl.root_node )
- end
-
- def rdf
- @owl.rdf
- end
-
- private
- def initialize(owl)
- @owl = owl
- @model = owl.model
- end
-
- def recursiv_add_content( output, node )
- output.each do |k,v|
- if v==nil
- LOGGER.warn "skipping nil value: "+k.to_s
- next
- end
- if @rdf_provider.rdf_ignore?(k)
- #do nothing
- elsif v.is_a?(Hash)
- new_node = add_class( k, node )
- recursiv_add_content( v, new_node )
- elsif v.is_a?(Array)
- v.each do |value|
- if @rdf_provider.class?(k)
- new_node = add_class( k, node )
- recursiv_add_content( value, new_node )
- else
- add_object_property( k, value, node)
- end
- end
- elsif @rdf_provider.literal?(k)
- set_literal( k, v, node)
- elsif @rdf_provider.object_property?(k)
- add_object_property( k, v, node)
- else
- raise "illegal value k:"+k.to_s+" v:"+v.to_s
- end
- end
- end
-
- def add_class( property, node )
- raise "no object prop: "+property.to_s unless @rdf_provider.object_property?(property)
- raise "no class name: "+property.to_s unless @rdf_provider.class_name(property)
- # to avoid anonymous nodes, make up uris for sub-objects
- # use counter to make sure each uri is unique
- # for example we will get ../confusion_matrix_cell/1, ../confusion_matrix_cell/2, ...
- count = 1
- while (true)
- res = Redland::Resource.new( File.join(node.uri.to_s,property.to_s+"/"+count.to_s) )
- break if @model.subject(@rdf_provider.object_property_name(property), res).nil?
- count += 1
- end
- clazz = Redland::Resource.new(@rdf_provider.class_name(property))
- @model.add res, RDF['type'], clazz
- @model.add res, DC['title'], clazz
- @model.add clazz, RDF['type'], OWL['Class']
- @model.add DC['title'], RDF['type'],OWL['AnnotationProperty']
-
- objectProp = Redland::Resource.new(@rdf_provider.object_property_name(property))
- @model.add objectProp, RDF['type'], OWL['ObjectProperty']
- @model.add node, objectProp, res
- return res
- end
-
- def set_literal(property, value, node )
- raise "empty literal value "+property.to_s if value==nil || value.to_s.size==0
- raise "no literal name "+propety.to_s unless @rdf_provider.literal_name(property)
- begin
- l = @model.object(subject, @rdf_provider.literal_name(property))
- @model.delete node, @rdf_provider.literal_name(property), l
- rescue
- end
- literalProp = Redland::Resource.new(@rdf_provider.literal_name(property))
- @model.add literalProp, RDF['type'],OWL['AnnotationProperty']
- @model.add node, literalProp, Redland::Literal.create(value)
- end
-
- def add_object_property(property, value, node )
- raise "empty object property value "+property.to_s if value==nil || value.to_s.size==0
- raise "no object property name "+propety.to_s unless @rdf_provider.object_property_name(property)
- raise "no object type "+property.to_s unless @rdf_provider.object_type(property)
-
- objectProp = Redland::Resource.new(@rdf_provider.object_property_name(property))
- @model.add objectProp, RDF['type'], OWL['ObjectProperty']
-
- val = Redland::Resource.new(value)
- type = Redland::Resource.new(@rdf_provider.object_type(property))
- @model.add node, objectProp, val
- @model.add val, RDF['type'], type
- @model.add type, RDF['type'], OWL['Class']
- end
-
- end
-end
diff --git a/lib/validation_db.rb b/lib/validation_db.rb
index 1152abf..09316ca 100644
--- a/lib/validation_db.rb
+++ b/lib/validation_db.rb
@@ -18,7 +18,7 @@ end
module Lib
VAL_PROPS_GENERAL = [ :validation_uri, :model_uri, :algorithm_uri, :training_dataset_uri, :prediction_feature,
- :test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri, :created_at ]
+ :test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri, :date ]
VAL_PROPS_SUM = [ :num_instances, :num_without_class, :num_unpredicted ]
VAL_PROPS_AVG = [:real_runtime, :percent_without_class, :percent_unpredicted ]
VAL_PROPS = VAL_PROPS_GENERAL + VAL_PROPS_SUM + VAL_PROPS_AVG
@@ -50,7 +50,7 @@ module Lib
:target_variance_actual, :target_variance_predicted, :sum_squared_error, :sample_correlation_coefficient ]
CROSS_VAL_PROPS = [:dataset_uri, :num_folds, :stratified, :random_seed]
- CROSS_VAL_PROPS_REDUNDANT = [:crossvalidation_uri, :algorithm_uri, :created_at] + CROSS_VAL_PROPS
+ CROSS_VAL_PROPS_REDUNDANT = [:crossvalidation_uri, :algorithm_uri, :date] + CROSS_VAL_PROPS
ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS_EXTENDED + VAL_REGR_PROPS + CROSS_VAL_PROPS
@@ -61,8 +61,11 @@ module Lib
class Validation < ActiveRecord::Base
serialize :classification_statistics
serialize :regression_statistics
+
+ alias_attribute :date, :created_at
end
class Crossvalidation < ActiveRecord::Base
+ alias_attribute :date, :created_at
end
end
diff --git a/report/environment.rb b/report/environment.rb
index 714cebe..d1321e7 100644
--- a/report/environment.rb
+++ b/report/environment.rb
@@ -20,8 +20,6 @@ end
module Reports
end
-require "lib/rdf_provider.rb"
-
require "report/plot_factory.rb"
require "report/xml_report.rb"
require "report/xml_report_util.rb"
diff --git a/report/report_persistance.rb b/report/report_persistance.rb
index 46a014e..d79cf7e 100644
--- a/report/report_persistance.rb
+++ b/report/report_persistance.rb
@@ -1,5 +1,6 @@
REPORT_DIR = File.join(Dir.pwd,'/reports')
+require "lib/format_util.rb"
# = Reports::ReportPersistance
#
@@ -182,41 +183,32 @@ end
module Reports
class ReportData < ActiveRecord::Base
- include Lib::RDFProvider
+ serialize :validation_uris
+ serialize :crossvalidation_uris
+ serialize :algorithm_uris
+ serialize :model_uris
+
+ alias_attribute :date, :created_at
+
def get_content_as_hash
map = {}
- map[:created_at] = created_at
- map[:report_uri] = report_uri
- map[:report_type] = report_type
- map[:validation_uris] = validation_uris
- map[:crossvalidation_uris] = crossvalidation_uris
- map[:algorithm_uris] = algorithm_uris
- map[:model_uris] = model_uris
+ [ :date, :report_type, :validation_uris, :crossvalidation_uris,
+ :algorithm_uris, :model_uris ].each do |p|
+ map[p] = self.send(p)
+ end
map
end
- def rdf_title
- "ValidationReport"
- end
+ def to_yaml
+ get_content_as_hash.to_yaml
+ end
- def uri
- report_uri
+ def to_rdf
+ owl = OpenTox::Owl.create("ValidationReport",report_uri)
+ owl.set_data(get_content_as_hash.keys_to_rdf_format)
+ owl.rdf
end
-
- LITERALS = [ :created_at, :report_type ]
- LITERAL_NAMES = {:created_at => OT["date"] }
- OBJECT_PROPERTIES = { :crossvalidation_uris => OT['reportCrossvalidation'], :algorithm_uris => OT['reportAlgorithm'],
- :validation_uris => OT['reportValidation'], :model_uris => OT['reportModel'] }
- OBJECTS = { :crossvalidation_uris => OT['Crossvalidation'], :algorithm_uris => OT['Algorithm'],
- :validation_uris => OT['Validation'], :model_uris => OT['Model'] }
- CLASSES = {}
- IGNORE = [ :id, :report_uri ]
-
- serialize :validation_uris
- serialize :crossvalidation_uris
- serialize :algorithm_uris
- serialize :model_uris
end
class ExtendedFileReportPersistance < FileReportPersistance
diff --git a/report/report_service.rb b/report/report_service.rb
index d6d0e1a..04a2d2e 100644
--- a/report/report_service.rb
+++ b/report/report_service.rb
@@ -133,7 +133,7 @@ module Reports
protected
def create_meta_data(type, validation_set, validation_uris)
- # the validtion_set contains the resolved single validations
+ # the validation_set contains the resolved single validations
# crossvalidation uris are only added if given as validation_uris - param
meta_data = {}
{ :validation_uri => "validation_uris",
diff --git a/test/test_examples_util.rb b/test/test_examples_util.rb
index 09f7d6c..8d945f0 100644
--- a/test/test_examples_util.rb
+++ b/test/test_examples_util.rb
@@ -67,14 +67,14 @@ module ValidationExamples
def self.verify_crossvalidation(val_yaml)
val = YAML.load(val_yaml)
- puts val.inspect
+ #puts val.inspect
assert_integer val["random_seed".to_sym],nil,nil,"random_seed"
assert_boolean val["stratified".to_sym],"stratified"
assert_integer val["num_folds".to_sym],0,1000,"num_folds"
num_folds = val["num_folds".to_sym].to_i
- validations = val["validations".to_sym]
+ validations = val["validation_uris".to_sym]
assert_int_equal(num_folds, validations.size, "num_folds != validations.size")
end
@@ -82,7 +82,7 @@ module ValidationExamples
val = YAML.load(val_yaml)
- puts val.inspect
+ #puts val.inspect
assert_integer val["num_instances".to_sym],0,1000,"num_instances"
num_instances = val["num_instances".to_sym].to_i
@@ -103,7 +103,7 @@ module ValidationExamples
class_value_stats.each do |cvs|
class_values << cvs["class_value".to_sym]
end
- puts class_values.inspect
+ #puts class_values.inspect
confusion_matrix = class_stats["confusion_matrix".to_sym]
confusion_matrix_cells = confusion_matrix["confusion_matrix_cell".to_sym]
@@ -118,12 +118,40 @@ module ValidationExamples
end
end
+ def self.compare_yaml_and_owl(hash, owl, nested_params=[] )
+
+ hash.each do |k,v|
+ p = nested_params + [ k.to_s.to_rdf_format ]
+ if (v.is_a?(Hash))
+ compare_yaml_and_owl( v, owl, p )
+ elsif (v.is_a?(Array))
+ v.each do |vv|
+ compare_yaml_and_owl( vv, owl, p )
+ end
+ else
+ owl_value = owl.get_nested( p )
+ if owl_value.size == 0
+ raise "owl_value is nil, yaml value is '"+v.to_s+"'" unless v==nil or v.to_s.size==0
+ elsif owl_value.size == 1
+ assert_equal(v, owl_value[0], p.join(".")+" (yaml != rdf)")
+ else
+ raise p.join(".")+" yaml value '"+v.to_s+"' not included in rdf values '"+
+ owl_value.inspect+"'" unless owl_value.include?(v)
+ end
+ end
+ end
+ end
+
private
def self.assert_not_nil(val,msg_suffix=nil)
raise msg_suffix.to_s+" is nil" if val==nil
end
def self.assert_int_equal(val1,val2,msg_suffix=nil)
+ assert_equal(val1, val2, msg_suffix)
+ end
+
+ def self.assert_equal(val1,val2,msg_suffix=nil)
raise msg_suffix.to_s+" not equal: "+val1.to_s+" != "+val2.to_s unless val1==val2
end
@@ -211,7 +239,24 @@ module ValidationExamples
end
end
+ def compare_yaml_vs_rdf
+ if @validation_uri
+ yaml = YAML.load(Util.validation_get(@validation_uri.split("/")[-1],'application/x-yaml'))
+ owl = OpenTox::Owl.from_data(Util.validation_get(@validation_uri.split("/")[-1]),@validation_uri,"Validation")
+ Util.compare_yaml_and_owl(yaml,owl)
+ end
+ if @report_uri
+ yaml = YAML.load(Util.validation_get(@report_uri.split("/")[-3..-1].join("/"),'application/x-yaml'))
+ owl = OpenTox::Owl.from_data(Util.validation_get(@report_uri.split("/")[-3..-1].join("/")),@report_uri,"ValidationReport")
+ Util.compare_yaml_and_owl(yaml,owl)
+ else
+ puts "no report"
+ end
+ end
+
+
def verify_yaml
+ raise "cannot very validation, validation_uri is null" unless @validation_uri
if @validation_uri =~ /crossvalidation/
Util.verify_crossvalidation(Util.validation_get("crossvalidation/"+@validation_uri.split("/")[-1],'application/x-yaml'))
Util.validation_get("crossvalidation/"+@validation_uri.split("/")[-1]+"/statistics",'application/x-yaml')
diff --git a/validation/validation_application.rb b/validation/validation_application.rb
index a43a2a6..a1affa2 100644
--- a/validation/validation_application.rb
+++ b/validation/validation_application.rb
@@ -79,7 +79,7 @@ get '/crossvalidation/:id/statistics' do
v = Lib::MergeObjects.merge_array_objects( Validation::Validation.find( :all, :conditions => { :crossvalidation_id => params[:id] } ) )
v.validation_uri = nil
- v.created_at = nil
+ v.date = nil
v.id = nil
content_type "application/x-yaml"
v.to_yaml
diff --git a/validation/validation_format.rb b/validation/validation_format.rb
index a172f8a..0db8579 100644
--- a/validation/validation_format.rb
+++ b/validation/validation_format.rb
@@ -1,29 +1,26 @@
-require "lib/rdf_provider.rb"
+require "lib/format_util.rb"
module Validation
-
# adding to_yaml and to_rdf functionality to validation
class Validation < Lib::Validation
- include Lib::RDFProvider
- # get_content_as_hash is the basis for to_yaml and to_rdf
- # the idea is that everything is stored in a hash structure
- # the hash is directly printed in to_yaml, whereas the has_keys can be used to resolve
- # the right properties, classes for to_rdf
- def get_content_as_hash
+ # builds hash for valiation, as the internal presentation differs from the owl-object
+ # the hash is directly printed in to_yaml, or added to the owl-structure
+ def get_content_as_hash()
LOGGER.debug self.validation_uri
h = {}
- Lib::VAL_PROPS.each{|p| h[p] = self.send(p)}
+ (Lib::VAL_PROPS - [:validation_uri]).each do |p|
+ h[p] = self.send(p)
+ end
if crossvalidation_id!=nil
cv = {}
- Lib::VAL_CV_PROPS.each do |p|
- cv[p] = self.send(p)
- end
- # replace crossvalidation id with uri
+ #skip crossvalidation_id
+ cv[:crossvalidation_fold] = self.crossvalidation_fold
+ cv[:crossvalidation_uri] = self.crossvalidation_uri
h[:crossvalidation_info] = cv
end
if classification_statistics
@@ -64,79 +61,42 @@ module Validation
return h
end
- def rdf_title
- "Validation"
+ def to_rdf
+ owl = OpenTox::Owl.create("Validation",validation_uri)
+ owl.set_data(get_content_as_hash.keys_to_rdf_format)
+ owl.rdf
end
- def uri
- validation_uri
+ def to_yaml
+ get_content_as_hash.to_yaml
end
- LITERALS = [ :created_at, :real_runtime, :num_instances, :num_without_class,
- :percent_without_class, :num_unpredicted, :percent_unpredicted,
- :crossvalidation_fold ] +
- (Lib::VAL_CLASS_PROPS - [ :confusion_matrix ]) + Lib::VAL_REGR_PROPS +
- [ :class_value, :confusion_matrix_value,
- :confusion_matrix_actual, :confusion_matrix_predicted ]
-
- LITERAL_NAMES = {:created_at => OT["date"] }
-
- OBJECT_PROPERTIES = { :model_uri => OT['validationModel'], :training_dataset_uri => OT['validationTrainingDataset'], :algorithm_uri => OT['validationAlgorithm'],
- :prediction_feature => OT['predictedFeature'], :test_dataset_uri => OT['validationTestDataset'], :test_target_dataset_uri => OT['validationTestTargetDataset'],
- :prediction_dataset_uri => OT['validationPredictionDataset'], :crossvalidation_info => OT['hasValidationInfo'],
- :crossvalidation_uri => OT['validationCrossvalidation'],
- :classification_statistics => OT['hasValidationInfo'], :regression_statistics => OT['hasValidationInfo'],
- :class_value_statistics => OT['classValueStatistics'], :confusion_matrix => OT['confusionMatrix'],
- :confusion_matrix_cell => OT['confusionMatrixCell'], #:class_value => OT['classValue'],
- #:confusion_matrix_actual => OT['confusionMatrixActual'], :confusion_matrix_predicted => OT['confusionMatrixPredicted']
- }
-
- OBJECTS = { :model_uri => OT['Model'], :training_dataset_uri => OT['Dataset'], :test_dataset_uri => OT['Dataset'],
- :test_target_dataset_uri => OT['Dataset'], :prediction_dataset_uri => OT['Dataset'], :prediction_feature => OT['Feature'],
- :algorithm_uri => OT['Algorithm'],}
-
- CLASSES = { :crossvalidation_info => OT['CrossvalidationInfo'], :classification_statistics => OT['ClassificationStatistics'],
- :regression_statistics => OT['RegresssionStatistics'], :class_value_statistics => OT['ClassValueStatistics'],
- :confusion_matrix => OT['ConfusionMatrix'], :confusion_matrix_cell => OT['ConfusionMatrixCell']}
-
- IGNORE = [ :id, :validation_uri, :crossvalidation_id ]
-
end
class Crossvalidation < Lib::Crossvalidation
- include Lib::RDFProvider
-
+
def get_content_as_hash
h = {}
- Lib::CROSS_VAL_PROPS_REDUNDANT.each{|p| h[p] = self.send(p)}
+ (Lib::CROSS_VAL_PROPS_REDUNDANT - [:crossvalidation_uri]).each do |p|
+ h[p] = self.send(p)
+ end
v = []
Validation.find( :all, :conditions => { :crossvalidation_id => self.id } ).each do |val|
v.push( val.validation_uri.to_s )
end
- h[:validations] = v
+ h[:validation_uris] = v
h
end
- def uri
- crossvalidation_uri
+ def to_rdf
+ owl = OpenTox::Owl.create("'Crossvalidation",crossvalidation_uri)
+ owl.set_data(get_content_as_hash.keys_to_rdf_format)
+ owl.rdf
end
- def rdf_title
- "Crossvalidation"
+ def to_yaml
+ get_content_as_hash.to_yaml
end
-
- LITERALS = [ :created_at, :stratified, :num_folds, :random_seed ]
-
- LITERAL_NAMES = {:created_at => OT["date"] }
-
- OBJECT_PROPERTIES = { :dataset_uri => OT['crossvalidationDataset'], :algorithm_uri => OT['crossvalidationAlgorithm'],
- :validations => OT['crossvalidationValidation'] }
-
- OBJECTS = { :dataset_uri => OT['Dataset'], :validations => OT['Validation'], :algorithm_uri => OT['Algorithm']}
-
- CLASSES = {}
-
- IGNORE = [ :id, :crossvalidation_uri ]
end
end