summaryrefslogtreecommitdiff
path: root/lib/owl.rb
blob: 700e6eefe61a6ea11a583474914706c10e921ea1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
class Redland::Literal
  
  def self.create(value, datatype=nil)
    if datatype
      if datatype.is_a?(Redland::Uri)
        Redland::Literal.new(value.to_s,nil,datatype)
      else
        Redland::Literal.new(value.to_s,nil,Redland::Uri.new(datatype.to_s))
      end
    else
      Redland::Literal.new(value.to_s,nil,Redland::Literal.parse_datatype_uri(value))
    end
  end
  
  # the literal node of the ruby swig api provdides the 'value' of a literal but not the 'datatype'
  # found solution in mailing list
  def datatype
      uri = Redland.librdf_node_get_literal_value_datatype_uri(self.node)
      return Redland.librdf_uri_to_string(uri) if uri
  end
  
  # gets value of literal, value class is se according to literal datatype
  def get_value
    Redland::Literal.parse_value( self.value, self.datatype )
  end
  
  private
  @@type_string = XML["string"].uri
  @@type_uri = XML["anyURI"].uri
  @@type_float = XML["float"].uri
  @@type_double = XML["double"].uri
  @@type_date = XML["date"].uri
  @@type_boolean = XML["boolean"].uri
  @@type_datetime = XML["dateTime"].uri
  @@type_integer = XML["integer"].uri
  
  # parses value according to datatype uri
  def self.parse_value(string_value, datatype_uri)
    if (datatype_uri==nil || datatype_uri.size==0)
      LOGGER.warn("empty datatype for literal with value: "+string_value)
      return string_value
    end
    case datatype_uri
    when @@type_string.to_s
      return string_value
    when @@type_uri.to_s
      return string_value #PENDING uri as string?
    when @@type_float.to_s 
      return string_value.to_f
    when @@type_double.to_s
      return string_value.to_f
    when @@type_boolean.to_s
      return string_value.upcase=="TRUE"
    when @@type_date.to_s
      return string_value #PENDING date as string?
    when @@type_datetime.to_s
      return string_value #PENDING date as string?
    when @@type_integer.to_s
      return string_value.to_i
    else
      raise "unknown literal datatype: '"+datatype_uri.to_s+"', value is "+string_value
    end
  end
  
  # parse datatype uri accoring to value class
  def self.parse_datatype_uri(value)
    if value==nil
      raise "illegal datatype: value is nil"
    elsif value.is_a?(String)
      # PENDING: uri check too slow?
      if OpenTox::Utils.is_uri?(value)
        return @@type_uri
      else
        return @@type_string
      end
    elsif value.is_a?(Float)
      return @@type_float
    elsif value.is_a?(TrueClass) or value.is_a?(FalseClass)
      return @@type_boolean
    elsif value.is_a?(Integer)
      return @@type_integer
    elsif value.is_a?(DateTime)
      return @@type_datetime
    elsif value.is_a?(Time)
      return @@type_datetime
    else
      raise "illegal datatype: "+value.class.to_s+" "+value.to_s
    end
  end
end

module OpenTox

	class Owl
   
    # ot_class is the class of the object, e.g. "Model","Dataset", ...
    # root_node is the root-object node in the rdf
    # uri the uri of the object
		attr_accessor :ot_class, :root_node, :uri, :model

		def initialize
			@model = Redland::Model.new Redland::MemoryStore.new
		end

		def self.create( ot_class, uri )
    
			owl = OpenTox::Owl.new
      owl.ot_class = ot_class
      owl.root_node = Redland::Resource.new(uri.to_s.strip)
			owl.set("type",owl.node(owl.ot_class)) #,true))
			owl
	  end
  
    # loads owl from data
    def self.from_data(data, base_uri, ot_class)
      
      owl = OpenTox::Owl.new
      parser = Redland::Parser.new
      
      begin
        parser.parse_string_into_model(owl.model, data, base_uri)
        
        # now loading root_node and uri
        owl.model.find(nil, owl.node("type"), owl.node(ot_class)) do |s,p,o|
          #LOGGER.debug "about statements "+s.to_s+" . "+p.to_s+" -> "+o.to_s
          is_root = true  
          owl.model.find(nil, nil, s) do |ss,pp,oo|
            is_root = false
            break
          end
          if is_root
            raise "cannot derieve root object from rdf, more than one object specified" if owl.uri
            raise "illegal root node type, no uri specified\n"+data.to_s if s.blank?
            owl.uri = s.uri.to_s
            owl.root_node = s
          end
        end
        
        # handle error if no root node was found
        unless owl.root_node
          types = []
          owl.model.find(nil, owl.node("type"), nil){ |s,p,o| types << o.to_s }
          raise "root node for class '"+ot_class+"' not found (available type nodes: "+types.inspect+")"
        end
        raise "no uri in rdf: '"+owl.uri+"'" unless owl.uri and Utils.is_uri?(owl.uri) 
        owl.ot_class = ot_class
        owl
      rescue => e
        RestClientWrapper.raise_uri_error(e.message, base_uri)
      end
    end
  
	  def self.from_uri(uri, ot_class)
     return from_data(RestClientWrapper.get(uri,:accept => "application/rdf+xml").to_s, uri, ot_class) 
		end

		def rdf
			@model.to_string
	  end
  
    def get(name)
      raise "uri is no prop, use owl.uri instead" if name=="uri"
      property_node = node(name.to_s)
      return get_value( @model.object(@root_node, property_node) )
    end
    
    private
    def get_value( node )
      return nil unless node
      if node.is_a?(Redland::Literal)
        return node.get_value
      elsif node.blank?
        return nil
      else
        return node.uri.to_s
      end
    end
    
    public
    def set(name, value, datatype=nil)
      
      raise "uri is no prop, cannot set uri" if name=="uri"
      property_node = node(name.to_s) #, true)
      begin # delete existing entry
        t = @model.object(@root_node, property_node)
        @model.delete @root_node, property_node, t
      rescue
      end
      if value.is_a?(Redland::Node)
        raise "not nil datatype not allowed when setting redland node as value" if datatype
        @model.add @root_node, property_node, value
      else # if value is no node, a literal is created
        @model.add @root_node, property_node, Redland::Literal.create(value.to_s, datatype)
      end
    end

		def parameters=(params)
			params.each do |name, settings|
				parameter = @model.create_resource
				@model.add parameter, node('type'), node('Parameter')
				@model.add parameter, node('title'), name
				@model.add parameter, node('paramScope'), settings[:scope]
				@model.add parameter, node('paramValue'),  settings[:value]
        @model.add @root_node, node('parameters'), parameter
		  end
		end

		def add_data_entries(compound_uri,features)
			# add compound
			compound = @model.subject(DC["identifier"], compound_uri)
			if compound.nil?
				compound = @model.create_resource(compound_uri)
				@model.add compound, node('type'), node("Compound")
			end
			features.each do |f|
				f.each do |feature_uri,value|
					# add feature
					feature = find_or_create_feature feature_uri
					if value.class.to_s == 'Hash'
						# create tuple
						tuple = @model.create_resource
						@model.add tuple, node('type'), node("Tuple")
						@model.add tuple, node('feature'), feature
						value.each do |uri,v|
							f = find_or_create_feature uri
							complex_value = @model.create_resource
							@model.add tuple, node('complexValue'), complex_value
							@model.add complex_value, node('type'), node("FeatureValue")
							@model.add complex_value, node('feature'), f
							@model.add complex_value, node('value'), Redland::Literal.create(v)
						end
						# add data entry
						data_entry = @model.subject node('compound'), compound
						if data_entry.nil?
							data_entry = @model.create_resource
							@model.add @root_node, node('dataEntry'), data_entry
							@model.add data_entry, node('type'), node("DataEntry")
							@model.add data_entry, node('compound'), compound
						end
						@model.add data_entry, node('values'), tuple
					else
						data_entry = @model.subject node('compound'), compound
						if data_entry.nil?
							data_entry = @model.create_resource
							@model.add @root_node, node('dataEntry'), data_entry
							@model.add data_entry,node('type'), node("DataEntry")
							@model.add data_entry, node('compound'), compound
						end
						values = @model.create_resource
						@model.add data_entry, node('values'), values
						@model.add values, node('type'), node('FeatureValue')
						@model.add values, node('feature'), feature
						@model.add values, node('value'),  Redland::Literal.create(value)
					end
				end
			end
	end
  
  private
  def find_feature(feature_uri)
    # PENDING: more efficiently get feature node?
    @model.subjects(RDF['type'], OT['Feature']).each do |feature|
      return feature if feature_uri==get_value(feature)
    end
    return nil
  end

  public
	def find_or_create_feature(feature_uri)
		feature = find_feature(feature_uri)
		unless feature
			feature = @model.create_resource(feature_uri)
			@model.add feature, node('type'), node("Feature")
			@model.add feature, node("title"), File.basename(feature_uri).split(/#/)[1]
			@model.add feature, node('creator'), feature_uri
		end
		feature
  end
 
  # feature values are not loaded for performance reasons
  # loading compounds and features into arrays that are given as params
  def load_dataset( compounds, features )
    
    @model.subjects(node('type'), node('Compound')).each do |compound|
      compounds << get_value(compound)
    end
    @model.subjects(node('type'), node('Feature')).each do |feature|
      features << get_value(feature)
    end
    LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features"
  end

  # loading feature values for the specified feature
  # if feature is nil, all feature values are loaded
  #
  # general remark on the rdf loading (found out with some testing):
  # the search methods (subjects/find) are fast, the time consuming parts is creating resources,
  # which cannot be avoided in general (implemented some performance tweaks with uri storing when loading all features) 
  def load_dataset_feature_values( compounds, data, feature_uri=nil )
    
    LOGGER.debug("load feature values"+ ( (feature_uri!=nil)?(" for feature: "+feature_uri):"") ) 

     # values are stored in the data-hash, hash has a key for each compound
    compounds.each{|c| data[c] = [] unless data[c]}
    
    load_all_features = feature_uri==nil
    feature_node = nil
    
    # create feature node for feature uri if specified
    unless load_all_features
      feature_node = find_feature(feature_uri)
      raise "feature node not found" unless feature_node
    end
    
    count = 0
    
    # preformance tweak: store uirs to save some resource init time
    compound_uri_store = {}
    feature_uri_store = {}
    
    # search for all feature_value_node with property 'ot_feature'
    # feature_node is either nil, i.e. a wildcard or specified      
    @model.find(nil, node('feature'), feature_node) do |feature_value_node,p,o|
  
      # get compound_uri by "backtracking" to values node (property is 'values'), then get compound_node via 'compound'
      value_nodes = @model.subjects(node('values'),feature_value_node)
      raise "more than one value node "+value_nodes.size.to_s unless value_nodes.size==1
      value_node = value_nodes[0]
      compound_node  = @model.object(value_node, node('compound'))
      compound_uri = compound_uri_store[compound_node.to_s]
      unless compound_uri
        compound_uri = get_value(compound_node)
        compound_uri_store[compound_node.to_s] = compound_uri
      end
      
      if load_all_features
        # if load all features, feautre_uri is not specified, derieve from feature_node
        feature_uri = feature_uri_store[o.to_s]
        unless feature_uri
          feature_uri = get_value(o)
          feature_uri_store[o.to_s] = feature_uri
        end
      end
      
      value_node_type = @model.object(feature_value_node, node('type'))
      if (value_node_type == node('FeatureValue'))
         value_literal = @model.object( feature_value_node, node('value'))
         raise "feature value no literal" unless value_literal.is_a?(Redland::Literal)
         data[compound_uri] << {feature_uri => value_literal.get_value }
      else
        raise "feature value type not yet implemented "+value_node_type.to_s
      end
      count += 1
      LOGGER.debug "loaded "+count.to_s+" feature values" if (count%500 == 0)
    end
    
    LOGGER.debug "loaded "+count.to_s+" feature values"
  end
  
  @@property_nodes = { "type" => RDF["type"], 
    "about" => RDF["about"],
    "title" => DC["title"], 
    "creator" => DC["creator"],
    #"identifier" => DC["identifier"], identifier is deprecated
    "date" => DC["date"],
    "format" => DC["format"]}
  
#  @object_prop = OWL["ObjectProperty"]
#  @@type = { "Validation" => OWL["Class"],
#             "Model" => OWL["Class"],
#             "title" => OWL["AnnotationProperty"],
#             "creator" => OWL["AnnotationProperty"],
#             "date" => OWL["AnnotationProperty"],
#             "format" => OWL["AnnotationProperty"],
#             "predictedVariables" => @object_prop}
  
  # this method has two purposes:
  # * distinguishing ot-properties from dc- and rdf- properties
  # * caching nodes, as creating nodes is costly
  def node(name) #, write_type_to_model=false)
    raise "dc[identifier] deprecated, use owl.uri" if name=="identifier"
    n = @@property_nodes[name]
    unless n
      n = OT[name]
      @@property_nodes[name] = n
    end
    
#    if write_type_to_model and name!="type"
#      raise "no type defined for '"+name+"'" unless @@type[name] 
#      @model.add n,RDF['type'],@@type[name]
#    end
    return n
  end

=begin
    def data
      LOGGER.debug("getting data from model")
      
      data = {}
      @model.subjects(RDF['type'], OT['DataEntry']).each do |data_entry|
        compound_node  = @model.object(data_entry, OT['compound'])
        compound_uri = @model.object(compound_node, DC['identifier']).to_s
        @model.find(data_entry, OT['values'], nil) do |s,p,values|
          feature_node = @model.object values, OT['feature']
          feature_uri = @model.object(feature_node, DC['identifier']).to_s.sub(/\^\^.*$/,'') # remove XML datatype
          type = @model.object(values, RDF['type'])
          if type == OT['FeatureValue']
            value = @model.object(values, OT['value']).to_s
            case value.to_s
            when TRUE_REGEXP # defined in environment.rb
              value = true
            when FALSE_REGEXP # defined in environment.rb
              value = false
            when /.*\^\^<.*XMLSchema#.*>/
              #HACK for reading ambit datasets
              case value.to_s
              when /XMLSchema#string/
                value = value.to_s[0..(value.to_s.index("^^")-1)]
              when /XMLSchema#double/
                value = value.to_s[0..(value.to_s.index("^^")-1)].to_f
              else
                LOGGER.warn " ILLEGAL TYPE "+compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
                value = nil
              end
            else
              LOGGER.warn compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
              value = nil
            end
            LOGGER.debug "converting owl to yaml, #compounds: "+(data.keys.size+1).to_s if (data.keys.size+1)%10==0 && !data.has_key?(compound_uri)
            
            return data if (data.keys.size)>9 && !data.has_key?(compound_uri)
            
            #puts "c "+compound_uri.to_s
            #puts "f "+feature_uri.to_s
            #puts "v "+value.to_s
            #puts ""
            data[compound_uri] = [] unless data[compound_uri]
            data[compound_uri] << {feature_uri => value} unless value.nil?
          elsif type == OT['Tuple']
            entry = {}
            data[compound_uri] = [] unless data[compound_uri]
            #data[compound_uri][feature_uri] = [] unless data[compound_uri][feature_uri]
            @model.find(values, OT['complexValue'],nil) do |s,p,complex_value|
              name_node = @model.object complex_value, OT['feature']
              name = @model.object(name_node, DC['title']).to_s
              value = @model.object(complex_value, OT['value']).to_s
              v = value.sub(/\^\^.*$/,'') # remove XML datatype
              v = v.to_f if v.match(/^[\.|\d]+$/) # guess numeric datatype
              entry[name] = v
            end
            data[compound_uri] << {feature_uri => entry} unless entry.empty?
          end
        end
      end
      data
    end
=end

  end
end