diff options
-rw-r--r-- | lib/format_util.rb | 32 | ||||
-rw-r--r-- | lib/predictions.rb | 4 | ||||
-rw-r--r-- | lib/rdf_provider.rb | 188 | ||||
-rw-r--r-- | lib/validation_db.rb | 7 | ||||
-rw-r--r-- | report/environment.rb | 2 | ||||
-rw-r--r-- | report/report_persistance.rb | 46 | ||||
-rw-r--r-- | report/report_service.rb | 2 | ||||
-rw-r--r-- | test/test_examples_util.rb | 53 | ||||
-rw-r--r-- | validation/validation_application.rb | 2 | ||||
-rw-r--r-- | validation/validation_format.rb | 94 |
10 files changed, 136 insertions, 294 deletions
diff --git a/lib/format_util.rb b/lib/format_util.rb new file mode 100644 index 0000000..abd83d6 --- /dev/null +++ b/lib/format_util.rb @@ -0,0 +1,32 @@ + + +class String + + # :prediction_feature -> predictionFeature + # :test_dataset_uri -> testDataset + def to_rdf_format + s = gsub(/_uri(s|)$/,"") + s.gsub(/_./) do |m| + m.gsub!(/^_/,"") + m.upcase + end + end +end + +class Hash + + # applies to_rdf_format to all keys + def keys_to_rdf_format + res = {} + keys.each do |k| + v = self[k] + if v.is_a?(Hash) + v = v.keys_to_rdf_format + elsif v.is_a?(Array) + v = v.collect{ |vv| vv.is_a?(Hash) ? vv.keys_to_rdf_format : vv } + end + res[k.to_s.to_rdf_format] = v + end + return res + end +end
\ No newline at end of file diff --git a/lib/predictions.rb b/lib/predictions.rb index f6351f8..2873689 100644 --- a/lib/predictions.rb +++ b/lib/predictions.rb @@ -441,8 +441,8 @@ module Lib def sample_correlation_coefficient # formula see http://en.wikipedia.org/wiki/Correlation_and_dependence#Pearson.27s_product-moment_coefficient return ( @num_predicted * @sum_multiply - @sum_actual * @sum_predicted ) / - ( Math.sqrt( @num_predicted * @sum_squares_actual - @sum_actual**2 ) * - Math.sqrt( @num_predicted * @sum_squares_predicted - @sum_predicted**2 ) ) + ( Math.sqrt( [0, @num_predicted * @sum_squares_actual - @sum_actual**2].max ) * + Math.sqrt( [0, @num_predicted * @sum_squares_predicted - @sum_predicted**2].max ) ) end def total_sum_of_squares diff --git a/lib/rdf_provider.rb b/lib/rdf_provider.rb deleted file mode 100644 index 7fa3ecc..0000000 --- a/lib/rdf_provider.rb +++ /dev/null @@ -1,188 +0,0 @@ - -class String - def convert_underscore - gsub(/_./) do |m| - m.gsub!(/^_/,"") - m.upcase - end - end -end - -module Lib - module RDFProvider - - def to_rdf - HashToOwl.to_rdf(self) - end - - def uri - raise "not implemented" - end - - def rdf_title - raise "not implemented" - end - - # the rdf output is generated from the hash that is provided by this method - # the keys in the hash structure are used to defined type of the resource (literal, objectProperty, dataProperty) - # example: if the structure should contain a literal named "size" with value 5 - # * add :property_xy => 5 to your hash - # * make sure literal?(:property_xy) returns true - # * literal_name(:property_xy) must return "size" - # - def get_content_as_hash - raise "not implemented" - end - - def to_yaml - get_content_as_hash.to_yaml - end - - def rdf_ignore?( prop ) - self.class::IGNORE.index( prop ) != nil - end - - def literal?( prop ) - self.class::LITERALS.index( prop ) != nil - end - - def literal_name( prop ) - if self.class::LITERAL_NAMES.has_key?(prop) - self.class::LITERAL_NAMES[prop] - else - OT[prop.to_s.convert_underscore] - end - end - - def object_property?( prop ) - self.class::OBJECT_PROPERTIES.has_key?( prop ) - end - - def object_property_name( prop ) - return self.class::OBJECT_PROPERTIES[ prop ] - end - - def object_type( prop ) - return self.class::OBJECTS[ prop ] - end - - def class?(prop) - self.class::CLASSES.has_key?( prop ) - end - - def class_name( prop ) - return self.class::CLASSES[ prop ] - end - - end - - class HashToOwl - #include OpenTox::Owl - - def self.to_rdf( rdf_provider ) - - owl = OpenTox::Owl.create(rdf_provider.rdf_title, rdf_provider.uri ) - toOwl = HashToOwl.new(owl) - toOwl.add_content(rdf_provider) - toOwl.rdf - end - - def add_content( rdf_provider ) - @rdf_provider = rdf_provider - recursiv_add_content( @rdf_provider.get_content_as_hash, @owl.root_node ) - end - - def rdf - @owl.rdf - end - - private - def initialize(owl) - @owl = owl - @model = owl.model - end - - def recursiv_add_content( output, node ) - output.each do |k,v| - if v==nil - LOGGER.warn "skipping nil value: "+k.to_s - next - end - if @rdf_provider.rdf_ignore?(k) - #do nothing - elsif v.is_a?(Hash) - new_node = add_class( k, node ) - recursiv_add_content( v, new_node ) - elsif v.is_a?(Array) - v.each do |value| - if @rdf_provider.class?(k) - new_node = add_class( k, node ) - recursiv_add_content( value, new_node ) - else - add_object_property( k, value, node) - end - end - elsif @rdf_provider.literal?(k) - set_literal( k, v, node) - elsif @rdf_provider.object_property?(k) - add_object_property( k, v, node) - else - raise "illegal value k:"+k.to_s+" v:"+v.to_s - end - end - end - - def add_class( property, node ) - raise "no object prop: "+property.to_s unless @rdf_provider.object_property?(property) - raise "no class name: "+property.to_s unless @rdf_provider.class_name(property) - # to avoid anonymous nodes, make up uris for sub-objects - # use counter to make sure each uri is unique - # for example we will get ../confusion_matrix_cell/1, ../confusion_matrix_cell/2, ... - count = 1 - while (true) - res = Redland::Resource.new( File.join(node.uri.to_s,property.to_s+"/"+count.to_s) ) - break if @model.subject(@rdf_provider.object_property_name(property), res).nil? - count += 1 - end - clazz = Redland::Resource.new(@rdf_provider.class_name(property)) - @model.add res, RDF['type'], clazz - @model.add res, DC['title'], clazz - @model.add clazz, RDF['type'], OWL['Class'] - @model.add DC['title'], RDF['type'],OWL['AnnotationProperty'] - - objectProp = Redland::Resource.new(@rdf_provider.object_property_name(property)) - @model.add objectProp, RDF['type'], OWL['ObjectProperty'] - @model.add node, objectProp, res - return res - end - - def set_literal(property, value, node ) - raise "empty literal value "+property.to_s if value==nil || value.to_s.size==0 - raise "no literal name "+propety.to_s unless @rdf_provider.literal_name(property) - begin - l = @model.object(subject, @rdf_provider.literal_name(property)) - @model.delete node, @rdf_provider.literal_name(property), l - rescue - end - literalProp = Redland::Resource.new(@rdf_provider.literal_name(property)) - @model.add literalProp, RDF['type'],OWL['AnnotationProperty'] - @model.add node, literalProp, Redland::Literal.create(value) - end - - def add_object_property(property, value, node ) - raise "empty object property value "+property.to_s if value==nil || value.to_s.size==0 - raise "no object property name "+propety.to_s unless @rdf_provider.object_property_name(property) - raise "no object type "+property.to_s unless @rdf_provider.object_type(property) - - objectProp = Redland::Resource.new(@rdf_provider.object_property_name(property)) - @model.add objectProp, RDF['type'], OWL['ObjectProperty'] - - val = Redland::Resource.new(value) - type = Redland::Resource.new(@rdf_provider.object_type(property)) - @model.add node, objectProp, val - @model.add val, RDF['type'], type - @model.add type, RDF['type'], OWL['Class'] - end - - end -end diff --git a/lib/validation_db.rb b/lib/validation_db.rb index 1152abf..09316ca 100644 --- a/lib/validation_db.rb +++ b/lib/validation_db.rb @@ -18,7 +18,7 @@ end module Lib VAL_PROPS_GENERAL = [ :validation_uri, :model_uri, :algorithm_uri, :training_dataset_uri, :prediction_feature, - :test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri, :created_at ] + :test_dataset_uri, :test_target_dataset_uri, :prediction_dataset_uri, :date ] VAL_PROPS_SUM = [ :num_instances, :num_without_class, :num_unpredicted ] VAL_PROPS_AVG = [:real_runtime, :percent_without_class, :percent_unpredicted ] VAL_PROPS = VAL_PROPS_GENERAL + VAL_PROPS_SUM + VAL_PROPS_AVG @@ -50,7 +50,7 @@ module Lib :target_variance_actual, :target_variance_predicted, :sum_squared_error, :sample_correlation_coefficient ] CROSS_VAL_PROPS = [:dataset_uri, :num_folds, :stratified, :random_seed] - CROSS_VAL_PROPS_REDUNDANT = [:crossvalidation_uri, :algorithm_uri, :created_at] + CROSS_VAL_PROPS + CROSS_VAL_PROPS_REDUNDANT = [:crossvalidation_uri, :algorithm_uri, :date] + CROSS_VAL_PROPS ALL_PROPS = VAL_PROPS + VAL_CV_PROPS + VAL_CLASS_PROPS_EXTENDED + VAL_REGR_PROPS + CROSS_VAL_PROPS @@ -61,8 +61,11 @@ module Lib class Validation < ActiveRecord::Base serialize :classification_statistics serialize :regression_statistics + + alias_attribute :date, :created_at end class Crossvalidation < ActiveRecord::Base + alias_attribute :date, :created_at end end diff --git a/report/environment.rb b/report/environment.rb index 714cebe..d1321e7 100644 --- a/report/environment.rb +++ b/report/environment.rb @@ -20,8 +20,6 @@ end module Reports end -require "lib/rdf_provider.rb" - require "report/plot_factory.rb" require "report/xml_report.rb" require "report/xml_report_util.rb" diff --git a/report/report_persistance.rb b/report/report_persistance.rb index 46a014e..d79cf7e 100644 --- a/report/report_persistance.rb +++ b/report/report_persistance.rb @@ -1,5 +1,6 @@ REPORT_DIR = File.join(Dir.pwd,'/reports') +require "lib/format_util.rb" # = Reports::ReportPersistance # @@ -182,41 +183,32 @@ end module Reports class ReportData < ActiveRecord::Base - include Lib::RDFProvider + serialize :validation_uris + serialize :crossvalidation_uris + serialize :algorithm_uris + serialize :model_uris + + alias_attribute :date, :created_at + def get_content_as_hash map = {} - map[:created_at] = created_at - map[:report_uri] = report_uri - map[:report_type] = report_type - map[:validation_uris] = validation_uris - map[:crossvalidation_uris] = crossvalidation_uris - map[:algorithm_uris] = algorithm_uris - map[:model_uris] = model_uris + [ :date, :report_type, :validation_uris, :crossvalidation_uris, + :algorithm_uris, :model_uris ].each do |p| + map[p] = self.send(p) + end map end - def rdf_title - "ValidationReport" - end + def to_yaml + get_content_as_hash.to_yaml + end - def uri - report_uri + def to_rdf + owl = OpenTox::Owl.create("ValidationReport",report_uri) + owl.set_data(get_content_as_hash.keys_to_rdf_format) + owl.rdf end - - LITERALS = [ :created_at, :report_type ] - LITERAL_NAMES = {:created_at => OT["date"] } - OBJECT_PROPERTIES = { :crossvalidation_uris => OT['reportCrossvalidation'], :algorithm_uris => OT['reportAlgorithm'], - :validation_uris => OT['reportValidation'], :model_uris => OT['reportModel'] } - OBJECTS = { :crossvalidation_uris => OT['Crossvalidation'], :algorithm_uris => OT['Algorithm'], - :validation_uris => OT['Validation'], :model_uris => OT['Model'] } - CLASSES = {} - IGNORE = [ :id, :report_uri ] - - serialize :validation_uris - serialize :crossvalidation_uris - serialize :algorithm_uris - serialize :model_uris end class ExtendedFileReportPersistance < FileReportPersistance diff --git a/report/report_service.rb b/report/report_service.rb index d6d0e1a..04a2d2e 100644 --- a/report/report_service.rb +++ b/report/report_service.rb @@ -133,7 +133,7 @@ module Reports protected def create_meta_data(type, validation_set, validation_uris) - # the validtion_set contains the resolved single validations + # the validation_set contains the resolved single validations # crossvalidation uris are only added if given as validation_uris - param meta_data = {} { :validation_uri => "validation_uris", diff --git a/test/test_examples_util.rb b/test/test_examples_util.rb index 09f7d6c..8d945f0 100644 --- a/test/test_examples_util.rb +++ b/test/test_examples_util.rb @@ -67,14 +67,14 @@ module ValidationExamples def self.verify_crossvalidation(val_yaml) val = YAML.load(val_yaml) - puts val.inspect + #puts val.inspect assert_integer val["random_seed".to_sym],nil,nil,"random_seed" assert_boolean val["stratified".to_sym],"stratified" assert_integer val["num_folds".to_sym],0,1000,"num_folds" num_folds = val["num_folds".to_sym].to_i - validations = val["validations".to_sym] + validations = val["validation_uris".to_sym] assert_int_equal(num_folds, validations.size, "num_folds != validations.size") end @@ -82,7 +82,7 @@ module ValidationExamples val = YAML.load(val_yaml) - puts val.inspect + #puts val.inspect assert_integer val["num_instances".to_sym],0,1000,"num_instances" num_instances = val["num_instances".to_sym].to_i @@ -103,7 +103,7 @@ module ValidationExamples class_value_stats.each do |cvs| class_values << cvs["class_value".to_sym] end - puts class_values.inspect + #puts class_values.inspect confusion_matrix = class_stats["confusion_matrix".to_sym] confusion_matrix_cells = confusion_matrix["confusion_matrix_cell".to_sym] @@ -118,12 +118,40 @@ module ValidationExamples end end + def self.compare_yaml_and_owl(hash, owl, nested_params=[] ) + + hash.each do |k,v| + p = nested_params + [ k.to_s.to_rdf_format ] + if (v.is_a?(Hash)) + compare_yaml_and_owl( v, owl, p ) + elsif (v.is_a?(Array)) + v.each do |vv| + compare_yaml_and_owl( vv, owl, p ) + end + else + owl_value = owl.get_nested( p ) + if owl_value.size == 0 + raise "owl_value is nil, yaml value is '"+v.to_s+"'" unless v==nil or v.to_s.size==0 + elsif owl_value.size == 1 + assert_equal(v, owl_value[0], p.join(".")+" (yaml != rdf)") + else + raise p.join(".")+" yaml value '"+v.to_s+"' not included in rdf values '"+ + owl_value.inspect+"'" unless owl_value.include?(v) + end + end + end + end + private def self.assert_not_nil(val,msg_suffix=nil) raise msg_suffix.to_s+" is nil" if val==nil end def self.assert_int_equal(val1,val2,msg_suffix=nil) + assert_equal(val1, val2, msg_suffix) + end + + def self.assert_equal(val1,val2,msg_suffix=nil) raise msg_suffix.to_s+" not equal: "+val1.to_s+" != "+val2.to_s unless val1==val2 end @@ -211,7 +239,24 @@ module ValidationExamples end end + def compare_yaml_vs_rdf + if @validation_uri + yaml = YAML.load(Util.validation_get(@validation_uri.split("/")[-1],'application/x-yaml')) + owl = OpenTox::Owl.from_data(Util.validation_get(@validation_uri.split("/")[-1]),@validation_uri,"Validation") + Util.compare_yaml_and_owl(yaml,owl) + end + if @report_uri + yaml = YAML.load(Util.validation_get(@report_uri.split("/")[-3..-1].join("/"),'application/x-yaml')) + owl = OpenTox::Owl.from_data(Util.validation_get(@report_uri.split("/")[-3..-1].join("/")),@report_uri,"ValidationReport") + Util.compare_yaml_and_owl(yaml,owl) + else + puts "no report" + end + end + + def verify_yaml + raise "cannot very validation, validation_uri is null" unless @validation_uri if @validation_uri =~ /crossvalidation/ Util.verify_crossvalidation(Util.validation_get("crossvalidation/"+@validation_uri.split("/")[-1],'application/x-yaml')) Util.validation_get("crossvalidation/"+@validation_uri.split("/")[-1]+"/statistics",'application/x-yaml') diff --git a/validation/validation_application.rb b/validation/validation_application.rb index a43a2a6..a1affa2 100644 --- a/validation/validation_application.rb +++ b/validation/validation_application.rb @@ -79,7 +79,7 @@ get '/crossvalidation/:id/statistics' do v = Lib::MergeObjects.merge_array_objects( Validation::Validation.find( :all, :conditions => { :crossvalidation_id => params[:id] } ) ) v.validation_uri = nil - v.created_at = nil + v.date = nil v.id = nil content_type "application/x-yaml" v.to_yaml diff --git a/validation/validation_format.rb b/validation/validation_format.rb index a172f8a..0db8579 100644 --- a/validation/validation_format.rb +++ b/validation/validation_format.rb @@ -1,29 +1,26 @@ -require "lib/rdf_provider.rb" +require "lib/format_util.rb" module Validation - # adding to_yaml and to_rdf functionality to validation class Validation < Lib::Validation - include Lib::RDFProvider - # get_content_as_hash is the basis for to_yaml and to_rdf - # the idea is that everything is stored in a hash structure - # the hash is directly printed in to_yaml, whereas the has_keys can be used to resolve - # the right properties, classes for to_rdf - def get_content_as_hash + # builds hash for valiation, as the internal presentation differs from the owl-object + # the hash is directly printed in to_yaml, or added to the owl-structure + def get_content_as_hash() LOGGER.debug self.validation_uri h = {} - Lib::VAL_PROPS.each{|p| h[p] = self.send(p)} + (Lib::VAL_PROPS - [:validation_uri]).each do |p| + h[p] = self.send(p) + end if crossvalidation_id!=nil cv = {} - Lib::VAL_CV_PROPS.each do |p| - cv[p] = self.send(p) - end - # replace crossvalidation id with uri + #skip crossvalidation_id + cv[:crossvalidation_fold] = self.crossvalidation_fold + cv[:crossvalidation_uri] = self.crossvalidation_uri h[:crossvalidation_info] = cv end if classification_statistics @@ -64,79 +61,42 @@ module Validation return h end - def rdf_title - "Validation" + def to_rdf + owl = OpenTox::Owl.create("Validation",validation_uri) + owl.set_data(get_content_as_hash.keys_to_rdf_format) + owl.rdf end - def uri - validation_uri + def to_yaml + get_content_as_hash.to_yaml end - LITERALS = [ :created_at, :real_runtime, :num_instances, :num_without_class, - :percent_without_class, :num_unpredicted, :percent_unpredicted, - :crossvalidation_fold ] + - (Lib::VAL_CLASS_PROPS - [ :confusion_matrix ]) + Lib::VAL_REGR_PROPS + - [ :class_value, :confusion_matrix_value, - :confusion_matrix_actual, :confusion_matrix_predicted ] - - LITERAL_NAMES = {:created_at => OT["date"] } - - OBJECT_PROPERTIES = { :model_uri => OT['validationModel'], :training_dataset_uri => OT['validationTrainingDataset'], :algorithm_uri => OT['validationAlgorithm'], - :prediction_feature => OT['predictedFeature'], :test_dataset_uri => OT['validationTestDataset'], :test_target_dataset_uri => OT['validationTestTargetDataset'], - :prediction_dataset_uri => OT['validationPredictionDataset'], :crossvalidation_info => OT['hasValidationInfo'], - :crossvalidation_uri => OT['validationCrossvalidation'], - :classification_statistics => OT['hasValidationInfo'], :regression_statistics => OT['hasValidationInfo'], - :class_value_statistics => OT['classValueStatistics'], :confusion_matrix => OT['confusionMatrix'], - :confusion_matrix_cell => OT['confusionMatrixCell'], #:class_value => OT['classValue'], - #:confusion_matrix_actual => OT['confusionMatrixActual'], :confusion_matrix_predicted => OT['confusionMatrixPredicted'] - } - - OBJECTS = { :model_uri => OT['Model'], :training_dataset_uri => OT['Dataset'], :test_dataset_uri => OT['Dataset'], - :test_target_dataset_uri => OT['Dataset'], :prediction_dataset_uri => OT['Dataset'], :prediction_feature => OT['Feature'], - :algorithm_uri => OT['Algorithm'],} - - CLASSES = { :crossvalidation_info => OT['CrossvalidationInfo'], :classification_statistics => OT['ClassificationStatistics'], - :regression_statistics => OT['RegresssionStatistics'], :class_value_statistics => OT['ClassValueStatistics'], - :confusion_matrix => OT['ConfusionMatrix'], :confusion_matrix_cell => OT['ConfusionMatrixCell']} - - IGNORE = [ :id, :validation_uri, :crossvalidation_id ] - end class Crossvalidation < Lib::Crossvalidation - include Lib::RDFProvider - + def get_content_as_hash h = {} - Lib::CROSS_VAL_PROPS_REDUNDANT.each{|p| h[p] = self.send(p)} + (Lib::CROSS_VAL_PROPS_REDUNDANT - [:crossvalidation_uri]).each do |p| + h[p] = self.send(p) + end v = [] Validation.find( :all, :conditions => { :crossvalidation_id => self.id } ).each do |val| v.push( val.validation_uri.to_s ) end - h[:validations] = v + h[:validation_uris] = v h end - def uri - crossvalidation_uri + def to_rdf + owl = OpenTox::Owl.create("'Crossvalidation",crossvalidation_uri) + owl.set_data(get_content_as_hash.keys_to_rdf_format) + owl.rdf end - def rdf_title - "Crossvalidation" + def to_yaml + get_content_as_hash.to_yaml end - - LITERALS = [ :created_at, :stratified, :num_folds, :random_seed ] - - LITERAL_NAMES = {:created_at => OT["date"] } - - OBJECT_PROPERTIES = { :dataset_uri => OT['crossvalidationDataset'], :algorithm_uri => OT['crossvalidationAlgorithm'], - :validations => OT['crossvalidationValidation'] } - - OBJECTS = { :dataset_uri => OT['Dataset'], :validations => OT['Validation'], :algorithm_uri => OT['Algorithm']} - - CLASSES = {} - - IGNORE = [ :id, :crossvalidation_uri ] end end |