diff options
author | Christoph Helma <helma@in-silico.ch> | 2010-09-13 17:09:37 +0200 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2010-09-13 17:09:37 +0200 |
commit | a05eefb79f82b79d609448e44d0ab533d2593a0f (patch) | |
tree | 693c0708d59134778a11f80e46fb424e0da2c1e5 /lib | |
parent | 0e69de27fca7c1a13e3f38dd52d625e6b6e31758 (diff) |
intermediary commit for new owl serializer
Diffstat (limited to 'lib')
-rw-r--r-- | lib/opentox-ruby-api-wrapper.rb | 2 | ||||
-rw-r--r-- | lib/owl-serializer.rb | 313 | ||||
-rw-r--r-- | lib/owl.rb | 7 |
3 files changed, 320 insertions, 2 deletions
diff --git a/lib/opentox-ruby-api-wrapper.rb b/lib/opentox-ruby-api-wrapper.rb index bae1155..d9db4ac 100644 --- a/lib/opentox-ruby-api-wrapper.rb +++ b/lib/opentox-ruby-api-wrapper.rb @@ -8,6 +8,6 @@ rescue LoadError puts "Please install Openbabel with 'rake openbabel:install' in the compound component" end -['owl', 'compound','dataset','algorithm','model','task','validation','utils','features', 'ot-logger', 'overwrite', 'rest_client_wrapper'].each do |lib| +['owl-serializer', 'compound','dataset','algorithm','model','task','validation','utils','features', 'ot-logger', 'overwrite', 'rest_client_wrapper'].each do |lib| require lib end diff --git a/lib/owl-serializer.rb b/lib/owl-serializer.rb new file mode 100644 index 0000000..8257b78 --- /dev/null +++ b/lib/owl-serializer.rb @@ -0,0 +1,313 @@ +require 'rdf' +require 'rdf/raptor' +require 'rdf/ntriples' + +# RDF namespaces +include RDF +OT = RDF::Vocabulary.new 'http://www.opentox.org/api/1.1#' + +module OpenTox + + class OwlSerializer + + # to get correct owl-dl, properties and objects have to be typed + # i.e. the following triple is insufficient: + # ModelXY,ot:algorithm,AlgorithmXY + # further needed: + # ot:algorithm,rdf:type,owl:ObjectProperty + # AlgorithmXY,rdf:type,ot:Algorithm + # ot:Algorithm,rdf:type,owl:Class DONE + attr_accessor :model + + def initialize(klass,uri) + @model = RDF::Graph.new(uri) + @model << [ RDF::URI.new(uri), RDF.type, OT[klass] ] + @model << [ OT[klass], RDF.type, OWL.Class ] + # add class statements from OT +=begin + RDF::Reader.open('http://www.opentox.org/api/1.1#', :format => :rdfxml).each_statement do |statement| + @model << statement if statement.predicate == RDF.type #and statement.object == OWL.class + end +=end + end + + # build new owl object + # klass is the class of this object, should be a string like "Model", "Task", ... + # uri is name and identifier of this object + + def self.create( klass, uri ) + OpenTox::OwlSerializer.new(klass,uri) + end + + def rdf + RDF::Writer.for(:rdfxml).buffer do |writer| + writer << @model + #@model.each do |statement| + #writer << statement + #end + end + end + + # sets values of current_node (by default root_node) + # + # note: this does not delete existing triples + # * there can be several triples for the same subject and predicate + # ( e.g. after set("description","bla1") and set("description","bla2") + # both descriptions are in the model, + # but the get("description") will give you only one object (by chance) + # * this does not matter in pratice (only dataset uses this -> load_dataset-methods) + # * identical values appear only once in rdf + def annotate(predicate, object) + @model << [ @model.to_uri, DC[predicate], RDF::Literal.new(object, :datatype => XSD.String) ] + @model << [ DC[predicate], RDF.type, OWL.AnnotationProperty ] + end +=begin + def set(predicate, object, current_node=@root_node ) + + pred = predicate.to_s + raise "uri is no prop, cannot set uri" if pred=="uri" + raise "dc[identifier] deprecated, use owl.uri" if pred=="identifier" + if (object.is_a?(Redland::Node) and object.blank?) or nil==object or object.to_s.size==0 + # set only not-nil values + LOGGER.warn "skipping (not setting) empty value in rdf for property: '"+pred+"'" + return + end + + if pred=="type" + # predicate is type, set class of current node + set_type(object, current_node) + elsif LITERAL_TYPES.has_key?(pred) + # predicate is literal + set_literal(pred,object,LITERAL_TYPES[pred],current_node) + elsif OBJECT_PROPERTY_CLASS.has_key?(pred) + # predicte is objectProperty, object is another resource + set_object_property(pred,object,OBJECT_PROPERTY_CLASS[pred],current_node) + else + raise "unkonwn rdf-property, please add: '"+pred+"' to OpenTox::OWL.OBJECT_PROPERTY_CLASS or OpenTox::OWL.LITERAL_TYPES" + end + end + + # example-triples for setting rdf-type to model: + # model_xy,rdf:type,ot:Model + # ot:Model,rdf:type,owl:Class + def set_type(ot_class, current_node=@root_node) + add current_node, RDF.type, node(ot_class) + add node(ot_class), RDF_TYPE, OWL_TYPE_CLASS + end + + # example-triples for setting description of a model: + # model_xy,ot:description,bla..bla^^xml:string + # ot:description,rdf:type,owl:Literal + def set_literal(literal_name, literal_value, literal_datatype, current_node=@root_node) + add current_node, node(literal_name), literal_value# TODO add literal_datatype + add node(literal_name), RDF_TYPE, OWL_TYPE_LITERAL + end + + # example-triples for setting algorithm property of a model: + # model_xy,ot:algorithm,algorihtm_xy + # ot:algorithm,rdf:type,owl:ObjectProperty + # algorihtm_xy,rdf:type,ot:Algorithm + # ot:Algorithm,rdf:type,owl:Class + def set_object_property(property, object, object_class, current_node=@root_node) + object_node = Redland::Resource.new(object) + add current_node, node(property), object_node + add node(property), RDF_TYPE, OWL_TYPE_OBJECT_PROPERTY + add object_node, RDF_TYPE, node(object_class) + add node(object_class), RDF_TYPE, OWL_TYPE_CLASS + end + + def add(s,p,o) + @triples << "#{s} #{p} #{o}.\n".gsub(/\[/,'<').gsub(/\]/,'>') + end + + # this is (a recursiv method) to set nested-data via hashes (not only simple properties) + # example (for a dataset) + # { :description => "bla", + # :dataEntry => { :compound => "compound_uri", + # :values => [ { :class => "FeatureValue" + # :feature => "feat1", + # :value => 42 }, + # { :class => "FeatureValue" + # :feature => "feat2", + # :value => 123 } ] } } + def set_data(hash, current_node=@root_node) + + hash.each do |k,v| + if v.is_a?(Hash) + # value is again a hash + prop = k.to_s + + # :class is a special key to specify the class value, if not defined in OBJECT_PROPERTY_CLASS + object_class = v.has_key?(:class) ? v.delete(:class) : OBJECT_PROPERTY_CLASS[prop] + raise "hash key must be a object-property, please add '"+prop.to_s+ + "' to OpenTox::OWL.OBJECT_PROPERTY_CLASS or specify :class value" unless object_class + + # the new node is a class node, to specify the uri of the resource use key :uri + if v[:uri] + # identifier is either a specified uri + class_node = Redland::Resource.new(v.delete(:uri)) + else + # or a new uri, make up internal uri with increment + class_node = new_class_node(object_class,current_node) + end + set_object_property(prop,class_node,object_class,current_node) + # recursivly call set_data method with new node + set_data(v,class_node) + elsif v.is_a?(Array) + # value is an array, each array element is added with current key as predicate + v.each do |value| + set_data( { k => value }, current_node ) + end + else + # neither hash nor array, call simple set-method + set( k, v, current_node ) + end + end + end + + # create a new (internal class) node with unique, uri-like name + def new_class_node(name, current_node=@root_node) + # to avoid anonymous nodes, make up uris for sub-objects + # use counter to make sure each uri is unique + # for example we will get ../confusion_matrix_cell/1, ../confusion_matrix_cell/2, ... + count = 1 + while (true) + res = Redland::Resource.new( File.join(current_node.uri.to_s,name.to_s,count.to_s) ) + match = false + @model.find(nil, nil, res) do |s,p,o| + match = true + break + end + if match + count += 1 + else + break + end + end + return res + end + + # for "backwards-compatiblity" + # better use directly: + # set_data( { "parameters" => [ { "title" => <t>, "paramScope" => <s>, "paramValue" => <v> } ] ) + def parameters=(params) + + converted_params = [] + params.each do |name, settings| + converted_params << { :title => name, :paramScope => settings[:scope], :paramValue => settings[:value] } + end + set_data( :parameters => converted_params ) + end +=end + + # PENDING move to dataset.rb + # this is for dataset.to_owl + # adds feautre value for a single compound + def add_data_entries(compound_uri,features) + + data_entry = { :compound => compound_uri } + if features + feature_values = [] + features.each do |f| + f.each do |feature_uri,value| + if value.is_a?(Hash) + complex_values = [] + value.each do |uri,v| + complex_values << { :feature => uri, :value => v } + end + feature_values << { :class => "Tuple", :feature => feature_uri, :complexValue => complex_values } + else + feature_values << { :class => "FeatureValue", :feature => feature_uri, :value => value } + end + end + end + data_entry[:values] = feature_values + end + set_data( :dataEntry => data_entry ) + end + + # PENDING move to dataset.rb + # feature values are not loaded for performance reasons + # loading compounds and features into arrays that are given as params + def load_dataset( compounds, features ) + + @model.subjects(RDF_TYPE, node('Compound')).each do |compound| + compounds << get_value(compound) + end + + @model.subjects(RDF_TYPE, node('Feature')).each do |feature| + feature_value_found=false + @model.find(nil, node("feature"), feature) do |potential_feature_value,p,o| + @model.find(nil, node("values"), potential_feature_value) do |s,p,o| + feature_value_found=true + break + end + break if feature_value_found + end + features << get_value(feature) if feature_value_found + end + LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features from dataset "+uri.to_s + end + + # PENDING move to dataset.rb + # loading feature values for the specified feature + # if feature is nil, all feature values are loaded + # + # general remark on the rdf loading (found out with some testing): + # the search methods (subjects/find) are fast, the time consuming parts is creating resources, + # which cannot be avoided in general + def load_dataset_feature_values( compounds, data, feature_uris ) + + raise "no feature-uri array" unless feature_uris.is_a?(Array) + + # values are stored in the data-hash, hash has a key for each compound + compounds.each{|c| data[c] = [] unless data[c]} + + count = 0 + + feature_uris.each do |feature_uri| + LOGGER.debug("load feature values for feature: "+feature_uri ) + feature_node = Redland::Resource.new(feature_uri) + + # search for all feature_value_node with property 'ot_feature' and the feature we are looking for + @model.find(nil, node('feature'), feature_node) do |feature_value_node,p,o| + + # get compound_uri by "backtracking" to values node (property is 'values'), then get compound_node via 'compound' + value_nodes = @model.subjects(node('values'),feature_value_node) + if value_nodes.size>0 + raise "more than one value node "+value_nodes.size.to_s if value_nodes.size>1 + value_node = value_nodes[0] + + compound_uri = get_value( @model.object(value_node, node('compound')) ) + unless compound_uri + LOGGER.warn "'compound' missing for data-entry of feature "+feature_uri.to_s+ + ", value: "+@model.object(feature_value_node,node("value")).to_s + next + end + + value_node_type = @model.object(feature_value_node, RDF_TYPE) + if (value_node_type == node('FeatureValue')) + value_literal = @model.object( feature_value_node, node('value')) + raise "plain feature value no literal: "+value_literal.to_s unless value_literal.is_a?(Redland::Literal) + data[compound_uri] << {feature_uri => value_literal.get_value } + elsif (value_node_type == node('Tuple')) + complex_values = {} + @model.find(feature_value_node,node('complexValue'),nil) do |p,s,complex_value| + complex_value_type = @model.object(complex_value, RDF_TYPE) + raise "complex feature value no feature value: "+complex_value.to_s unless complex_value_type==node('FeatureValue') + complex_feature_uri = get_value(@model.object( complex_value, node('feature'))) + complex_value = @model.object( complex_value, node('value')) + raise "complex value no literal: "+complex_value.to_s unless complex_value.is_a?(Redland::Literal) + complex_values[ complex_feature_uri ] = complex_value.get_value + end + data[compound_uri] << { feature_uri => complex_values } if complex_values.size>0 + end + count += 1 + LOGGER.debug "loading feature values ("+count.to_s+")" if (count%1000 == 0) + end + end + LOGGER.debug "loaded "+count.to_s+" feature values for feature "+feature_node.to_s + end + end + end +end @@ -271,7 +271,12 @@ module OpenTox #output = RDF::Writer.for(:rdfxml).buffer do |writer| RDF::Writer.for(:rdfxml).buffer do |writer| @triples.each do |statement| + begin writer << statement + rescue => e + LOGGER.error e + LOGGER.info statement.inspect + end end end #output @@ -392,7 +397,7 @@ module OpenTox def add(s,p,o) #@triples << "#{s} #{p} #{o}.\n".gsub(/\[/,'<').gsub(/\]/,'>') - @triples << [s.to_s.sub(/\[/,'').sub(/\]/,''),p.to_s.sub(/\[/,'').sub(/\]/,''),o.to_s.sub(/\[/,'').sub(/\]/,'')] + @triples << [RDF::URI.new(s.to_s.sub(/\[/,'').sub(/\]/,'')),RDF::URI.new(p.to_s.sub(/\[/,'').sub(/\]/,'')),o.to_s.sub(/\[/,'').sub(/\]/,'')] #@model.add s,p,o end |