summaryrefslogtreecommitdiff
path: root/lib/owl-serializer.rb
blob: 8257b7822d1f854887b35651f9ebfc00078392b8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
require 'rdf'
require 'rdf/raptor'
require 'rdf/ntriples'

# RDF namespaces
include RDF
OT = RDF::Vocabulary.new 'http://www.opentox.org/api/1.1#'

module OpenTox

  class OwlSerializer
    
    # to get correct owl-dl, properties and objects have to be typed
    # i.e. the following triple is insufficient:  
    # ModelXY,ot:algorithm,AlgorithmXY 
    # further needed:
    # ot:algorithm,rdf:type,owl:ObjectProperty
    # AlgorithmXY,rdf:type,ot:Algorithm
    # ot:Algorithm,rdf:type,owl:Class DONE
    attr_accessor :model

    def initialize(klass,uri)
			@model = RDF::Graph.new(uri)
      @model << [ RDF::URI.new(uri), RDF.type, OT[klass] ]
      @model << [ OT[klass], RDF.type, OWL.Class ]
      # add class statements from OT
=begin
      RDF::Reader.open('http://www.opentox.org/api/1.1#', :format => :rdfxml).each_statement do |statement|
        @model << statement if statement.predicate == RDF.type #and statement.object == OWL.class
      end
=end
    end

    # build new owl object
    # klass is the class of this object, should be a string like "Model", "Task", ...
    # uri is name and identifier of this object
    
    def self.create( klass, uri )
      OpenTox::OwlSerializer.new(klass,uri)
    end

    def rdf
      RDF::Writer.for(:rdfxml).buffer do |writer|
        writer << @model
        #@model.each do |statement|
          #writer << statement
        #end
      end
    end
    
    # sets values of current_node (by default root_node)
    #
    # note: this does not delete existing triples
    # * there can be several triples for the same subject and predicate
    #   ( e.g. after set("description","bla1") and set("description","bla2")
    #     both descriptions are in the model, 
    #     but the get("description") will give you only one object (by chance) 
    # * this does not matter in pratice (only dataset uses this -> load_dataset-methods)
    # * identical values appear only once in rdf 
    def annotate(predicate, object)
      @model << [ @model.to_uri, DC[predicate], RDF::Literal.new(object, :datatype => XSD.String) ]
      @model << [ DC[predicate], RDF.type, OWL.AnnotationProperty ]
    end
=begin
    def set(predicate, object, current_node=@root_node )
      
      pred = predicate.to_s
      raise "uri is no prop, cannot set uri" if pred=="uri"
      raise "dc[identifier] deprecated, use owl.uri" if pred=="identifier"
      if (object.is_a?(Redland::Node) and object.blank?) or nil==object or object.to_s.size==0
        # set only not-nil values
        LOGGER.warn "skipping (not setting) empty value in rdf for property: '"+pred+"'"
        return 
      end
      
      if pred=="type"
        # predicate is type, set class of current node
        set_type(object, current_node)
      elsif LITERAL_TYPES.has_key?(pred)
        # predicate is literal
        set_literal(pred,object,LITERAL_TYPES[pred],current_node)
      elsif OBJECT_PROPERTY_CLASS.has_key?(pred)
        # predicte is objectProperty, object is another resource
        set_object_property(pred,object,OBJECT_PROPERTY_CLASS[pred],current_node)
      else
        raise "unkonwn rdf-property, please add: '"+pred+"' to OpenTox::OWL.OBJECT_PROPERTY_CLASS or OpenTox::OWL.LITERAL_TYPES"
      end
    end
   
    # example-triples for setting rdf-type to model:
    # model_xy,rdf:type,ot:Model
    # ot:Model,rdf:type,owl:Class 
    def set_type(ot_class, current_node=@root_node)
      add current_node, RDF.type, node(ot_class)
      add node(ot_class), RDF_TYPE, OWL_TYPE_CLASS
    end
    
    # example-triples for setting description of a model:
    # model_xy,ot:description,bla..bla^^xml:string
    # ot:description,rdf:type,owl:Literal
    def set_literal(literal_name, literal_value, literal_datatype, current_node=@root_node)
      add current_node, node(literal_name), literal_value# TODO add literal_datatype
      add node(literal_name), RDF_TYPE, OWL_TYPE_LITERAL
    end
    
    # example-triples for setting algorithm property of a model:
    # model_xy,ot:algorithm,algorihtm_xy
    # ot:algorithm,rdf:type,owl:ObjectProperty
    # algorihtm_xy,rdf:type,ot:Algorithm
    # ot:Algorithm,rdf:type,owl:Class
    def set_object_property(property, object, object_class, current_node=@root_node)
      object_node = Redland::Resource.new(object)
      add current_node, node(property), object_node
      add node(property), RDF_TYPE, OWL_TYPE_OBJECT_PROPERTY
      add object_node, RDF_TYPE, node(object_class)
      add node(object_class), RDF_TYPE, OWL_TYPE_CLASS
    end

    def add(s,p,o)
      @triples << "#{s} #{p} #{o}.\n".gsub(/\[/,'<').gsub(/\]/,'>')
    end

    # this is (a recursiv method) to set nested-data via hashes (not only simple properties)
    # example (for a dataset)
    # { :description => "bla", 
    #   :dataEntry => { :compound => "compound_uri", 
    #                   :values => [ { :class => "FeatureValue"
    #                                  :feature => "feat1", 
    #                                  :value => 42 },
    #                                { :class => "FeatureValue"
    #                                  :feature => "feat2", 
    #                                  :value => 123 } ] } }
    def set_data(hash, current_node=@root_node)
      
      hash.each do |k,v|
        if v.is_a?(Hash)
          # value is again a hash
          prop = k.to_s
          
          # :class is a special key to specify the class value, if not defined in OBJECT_PROPERTY_CLASS
          object_class = v.has_key?(:class) ? v.delete(:class) : OBJECT_PROPERTY_CLASS[prop]
          raise "hash key must be a object-property, please add '"+prop.to_s+
            "' to OpenTox::OWL.OBJECT_PROPERTY_CLASS or specify :class value" unless object_class
            
          # the new node is a class node, to specify the uri of the resource use key :uri
          if v[:uri] 
            # identifier is either a specified uri
            class_node = Redland::Resource.new(v.delete(:uri))
          else
            # or a new uri, make up internal uri with increment
            class_node = new_class_node(object_class,current_node)
          end
          set_object_property(prop,class_node,object_class,current_node)
          # recursivly call set_data method with new node
          set_data(v,class_node)
        elsif v.is_a?(Array)
          # value is an array, each array element is added with current key as predicate
          v.each do |value|
            set_data( { k => value }, current_node )
          end
        else
          # neither hash nor array, call simple set-method
          set( k, v, current_node )
        end
      end
    end
    
    # create a new (internal class) node with unique, uri-like name
    def new_class_node(name, current_node=@root_node)
      # to avoid anonymous nodes, make up uris for sub-objects
      # use counter to make sure each uri is unique
      # for example we will get ../confusion_matrix_cell/1, ../confusion_matrix_cell/2, ...
      count = 1
      while (true)
        res = Redland::Resource.new( File.join(current_node.uri.to_s,name.to_s,count.to_s) )
        match = false
        @model.find(nil, nil, res) do |s,p,o|
          match = true
          break
        end
        if match
          count += 1
        else
          break
        end
      end
      return res
    end

    # for "backwards-compatiblity"
    # better use directly: 
    # set_data( { "parameters" => [ { "title" => <t>, "paramScope" => <s>, "paramValue" => <v> } ] )
    def parameters=(params)
      
      converted_params = []
      params.each do |name, settings|
        converted_params << { :title => name, :paramScope => settings[:scope], :paramValue => settings[:value] }
      end
      set_data( :parameters => converted_params )
    end
=end

    # PENDING move to dataset.rb
    # this is for dataset.to_owl
    # adds feautre value for a single compound
    def add_data_entries(compound_uri,features)
      
      data_entry = { :compound => compound_uri }
      if features
        feature_values = []
        features.each do |f|
          f.each do |feature_uri,value|
            if value.is_a?(Hash)
              complex_values = []
              value.each do |uri,v|
                complex_values << { :feature => uri, :value => v }
              end
              feature_values << { :class => "Tuple", :feature => feature_uri, :complexValue => complex_values }
            else
              feature_values << { :class => "FeatureValue", :feature => feature_uri, :value => value }
            end
          end
        end
        data_entry[:values] = feature_values
      end
      set_data( :dataEntry => data_entry )
    end

    # PENDING move to dataset.rb
    # feature values are not loaded for performance reasons
    # loading compounds and features into arrays that are given as params
    def load_dataset( compounds, features )
      
      @model.subjects(RDF_TYPE, node('Compound')).each do |compound|
        compounds << get_value(compound)
      end
      
      @model.subjects(RDF_TYPE, node('Feature')).each do |feature|
        feature_value_found=false
        @model.find(nil, node("feature"), feature) do |potential_feature_value,p,o|
          @model.find(nil, node("values"), potential_feature_value) do |s,p,o|
            feature_value_found=true
            break
          end
          break if feature_value_found
        end
        features << get_value(feature) if feature_value_found
      end
      LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features from dataset "+uri.to_s
    end
  
    # PENDING move to dataset.rb
    # loading feature values for the specified feature
    # if feature is nil, all feature values are loaded
    #
    # general remark on the rdf loading (found out with some testing):
    # the search methods (subjects/find) are fast, the time consuming parts is creating resources,
    # which cannot be avoided in general 
    def load_dataset_feature_values( compounds, data, feature_uris )
      
      raise "no feature-uri array" unless feature_uris.is_a?(Array)
  
       # values are stored in the data-hash, hash has a key for each compound
      compounds.each{|c| data[c] = [] unless data[c]}
      
      count = 0

      feature_uris.each do |feature_uri|
        LOGGER.debug("load feature values for feature: "+feature_uri )
        feature_node = Redland::Resource.new(feature_uri)
        
         # search for all feature_value_node with property 'ot_feature' and the feature we are looking for
         @model.find(nil, node('feature'), feature_node) do |feature_value_node,p,o|
      
          # get compound_uri by "backtracking" to values node (property is 'values'), then get compound_node via 'compound'
          value_nodes = @model.subjects(node('values'),feature_value_node)
          if value_nodes.size>0
            raise "more than one value node "+value_nodes.size.to_s if value_nodes.size>1
            value_node = value_nodes[0]
            
            compound_uri = get_value( @model.object(value_node, node('compound')) )
            unless compound_uri
               LOGGER.warn "'compound' missing for data-entry of feature "+feature_uri.to_s+
                 ", value: "+@model.object(feature_value_node,node("value")).to_s
               next
            end
            
            value_node_type = @model.object(feature_value_node, RDF_TYPE)
            if (value_node_type == node('FeatureValue'))
              value_literal = @model.object( feature_value_node, node('value'))
              raise "plain feature value no literal: "+value_literal.to_s unless value_literal.is_a?(Redland::Literal)
              data[compound_uri] << {feature_uri => value_literal.get_value }
            elsif (value_node_type == node('Tuple'))
              complex_values = {}
              @model.find(feature_value_node,node('complexValue'),nil) do |p,s,complex_value|
                complex_value_type = @model.object(complex_value, RDF_TYPE)
                raise "complex feature value no feature value: "+complex_value.to_s unless complex_value_type==node('FeatureValue')
                complex_feature_uri = get_value(@model.object( complex_value, node('feature')))
                complex_value = @model.object( complex_value, node('value'))
                raise "complex value no literal: "+complex_value.to_s unless complex_value.is_a?(Redland::Literal)
                complex_values[ complex_feature_uri ] = complex_value.get_value
              end
              data[compound_uri] << { feature_uri => complex_values } if complex_values.size>0
            end
            count += 1
            LOGGER.debug "loading feature values ("+count.to_s+")" if (count%1000 == 0)
          end
        end
        LOGGER.debug "loaded "+count.to_s+" feature values for feature "+feature_node.to_s
      end
    end
  end
end