1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
|
class Redland::Literal
def self.create(value, datatype=nil)
if datatype
if datatype.is_a?(Redland::Uri)
Redland::Literal.new(value.to_s,nil,datatype)
else
Redland::Literal.new(value.to_s,nil,Redland::Uri.new(datatype.to_s))
end
else
Redland::Literal.new(value.to_s,nil,Redland::Literal.parse_datatype_uri(value))
end
end
# the literal node of the ruby swig api provdides the 'value' of a literal but not the 'datatype'
# found solution in mailing list
def datatype
uri = Redland.librdf_node_get_literal_value_datatype_uri(self.node)
return Redland.librdf_uri_to_string(uri) if uri
end
# gets value of literal, value class is se according to literal datatype
def get_value
Redland::Literal.parse_value( self.value, self.datatype )
end
private
@@type_string = XML["string"].uri
@@type_uri = XML["anyURI"].uri
@@type_float = XML["float"].uri
@@type_double = XML["double"].uri
@@type_date = XML["date"].uri
@@type_boolean = XML["boolean"].uri
@@type_datetime = XML["dateTime"].uri
# parses value according to datatype uri
def self.parse_value(string_value, datatype_uri)
if (datatype_uri==nil || datatype_uri.size==0)
LOGGER.warn("empty datatype for literal with value: "+string_value)
return string_value
end
case datatype_uri
when @@type_string.to_s
return string_value
when @@type_uri.to_s
return string_value #PENDING uri as string?
when @@type_float.to_s
return string_value.to_f
when @@type_double.to_s
return string_value.to_f
when @@type_boolean.to_s
return string_value.upcase=="TRUE"
when @@type_date.to_s
return string_value #PENDING date as string?
when @@type_datetime.to_s
return string_value #PENDING date as string?
else
raise "unknown literal datatype: '"+datatype_uri.to_s+"', value is "+string_value
end
end
# parse datatype uri accoring to value class
def self.parse_datatype_uri(value)
if value==nil
raise "illegal datatype: value is nil"
elsif value.is_a?(String)
# PENDING: uri check too slow?
if OpenTox::Utils.is_uri?(value)
return @@type_uri
else
return @@type_string
end
elsif value.is_a?(Float)
return @@type_float
elsif value.is_a?(TrueClass) or value.is_a?(FalseClass)
return @@type_boolean
else
raise "illegal datatype: "+value.class.to_s+" "+value.to_s
end
end
end
module OpenTox
class Owl
# ot_class is the class of the object, e.g. "Model","Dataset", ...
# root_node is the root-object node in the rdf
# uri the uri of the object
attr_accessor :ot_class, :root_node, :uri, :model
def initialize
@model = Redland::Model.new Redland::MemoryStore.new
end
def self.create( ot_class, uri )
owl = OpenTox::Owl.new
owl.ot_class = ot_class
owl.root_node = Redland::Resource.new(uri.to_s.strip)
owl.set("type",owl.node(owl.ot_class))
owl
end
# loads owl from data
def self.from_data(data, base_uri, ot_class)
owl = OpenTox::Owl.new
parser = Redland::Parser.new
begin
parser.parse_string_into_model(owl.model, data, base_uri)
# now loading root_node and uri
owl.model.find(nil, owl.node("type"), owl.node(ot_class)) do |s,p,o|
#LOGGER.debug "about statements "+s.to_s+" . "+p.to_s+" -> "+o.to_s
is_root = true
owl.model.find(nil, nil, s) do |ss,pp,oo|
is_root = false
break
end
if is_root
raise "cannot derieve root object from rdf, more than one object specified" if owl.uri
raise "illegal root node type, no uri specified\n"+data.to_s if s.blank?
owl.uri = s.uri.to_s
owl.root_node = s
end
end
# handle error if no root node was found
unless owl.root_node
types = []
owl.model.find(nil, owl.node("type"), nil){ |s,p,o| types << o.to_s }
raise "root node for class '"+ot_class+"' not found (available type nodes: "+types.inspect+")"
end
raise "no uri in rdf: '"+owl.uri+"'" unless owl.uri and Utils.is_uri?(owl.uri)
owl.ot_class = ot_class
owl
rescue => e
RestClientWrapper.raise_uri_error(e.message, base_uri)
end
end
def self.from_uri(uri, ot_class)
return from_data(RestClientWrapper.get(uri,:accept => "application/rdf+xml").to_s, uri, ot_class)
end
def rdf
@model.to_string
end
def get(name)
raise "uri is no prop, use owl.uri instead" if name=="uri"
property_node = node(name.to_s)
return get_value( @model.object(@root_node, property_node) )
end
private
def get_value( node )
return nil unless node
if node.is_a?(Redland::Literal)
return node.get_value
elsif node.blank?
return nil
else
return node.uri.to_s
end
end
public
def set(name, value, datatype=nil)
raise "uri is no prop, cannot set uri" if name=="uri"
property_node = node(name.to_s)
begin # delete existing entry
t = @model.object(@root_node, property_node)
@model.delete @root_node, property_node, t
rescue
end
if value.is_a?(Redland::Node)
raise "not nil datatype not allowed when setting redland node as value" if datatype
@model.add @root_node, property_node, value
else # if value is no node, a literal is created
@model.add @root_node, property_node, Redland::Literal.create(value.to_s, datatype)
end
end
def parameters=(params)
params.each do |name, settings|
parameter = @model.create_resource
@model.add parameter, node('type'), node('Parameter')
@model.add parameter, node('title'), name
@model.add parameter, node('paramScope'), settings[:scope]
@model.add parameter, node('paramValue'), settings[:value]
@model.add @root_node, node('parameters'), parameter
end
end
def add_data_entries(compound_uri,features)
# add compound
compound = @model.subject(DC["identifier"], compound_uri)
if compound.nil?
compound = @model.create_resource(compound_uri)
@model.add compound, node('type'), node("Compound")
end
features.each do |f|
f.each do |feature_uri,value|
# add feature
feature = find_or_create_feature feature_uri
if value.class.to_s == 'Hash'
# create tuple
tuple = @model.create_resource
@model.add tuple, node('type'), node("Tuple")
@model.add tuple, node('feature'), feature
value.each do |uri,v|
f = find_or_create_feature uri
complex_value = @model.create_resource
@model.add tuple, node('complexValue'), complex_value
@model.add complex_value, node('type'), node("FeatureValue")
@model.add complex_value, node('feature'), f
@model.add complex_value, node('value'), Redland::Literal.create(v)
end
# add data entry
data_entry = @model.subject node('compound'), compound
if data_entry.nil?
data_entry = @model.create_resource
@model.add @root_node, node('dataEntry'), data_entry
@model.add data_entry, node('type'), node("DataEntry")
@model.add data_entry, node('compound'), compound
end
@model.add data_entry, node('values'), tuple
else
data_entry = @model.subject node('compound'), compound
if data_entry.nil?
data_entry = @model.create_resource
@model.add @root_node, node('dataEntry'), data_entry
@model.add data_entry,node('type'), node("DataEntry")
@model.add data_entry, node('compound'), compound
end
values = @model.create_resource
@model.add data_entry, node('values'), values
@model.add values, node('type'), node('FeatureValue')
@model.add values, node('feature'), feature
@model.add values, node('value'), Redland::Literal.create(value)
end
end
end
end
private
def find_feature(feature_uri)
# PENDING: more efficiently get feature node?
@model.subjects(RDF['type'], OT['Feature']).each do |feature|
return feature if feature_uri==get_value(feature)
end
return nil
end
public
def find_or_create_feature(feature_uri)
feature = find_feature(feature_uri)
unless feature
feature = @model.create_resource(feature_uri)
@model.add feature, node('type'), node("Feature")
@model.add feature, node("title"), File.basename(feature_uri).split(/#/)[1]
@model.add feature, node('creator'), feature_uri
end
feature
end
# feature values are not loaded for performance reasons
# loading compounds and features into arrays that are given as params
def load_dataset( compounds, features )
@model.subjects(node('type'), node('Compound')).each do |compound|
compounds << get_value(compound)
end
@model.subjects(node('type'), node('Feature')).each do |feature|
features << get_value(feature)
end
LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features"
end
# loading feature values for the specified feature
# if feature is nil, all feature values are loaded
#
# general remark on the rdf loading (found out with some testing):
# the search methods (subjects/find) are fast, the time consuming parts is creating resources,
# which cannot be avoided in general (implemented some performance tweaks with uri storing when loading all features)
def load_dataset_feature_values( compounds, data, feature_uri=nil )
LOGGER.debug("load feature values"+ ( (feature_uri!=nil)?(" for feature: "+feature_uri):"") )
# values are stored in the data-hash, hash has a key for each compound
compounds.each{|c| data[c] = [] unless data[c]}
load_all_features = feature_uri==nil
feature_node = nil
# create feature node for feature uri if specified
unless load_all_features
feature_node = find_feature(feature_uri)
raise "feature node not found" unless feature_node
end
count = 0
# preformance tweak: store uirs to save some resource init time
compound_uri_store = {}
feature_uri_store = {}
# search for all feature_value_node with property 'ot_feature'
# feature_node is either nil, i.e. a wildcard or specified
@model.find(nil, node('feature'), feature_node) do |feature_value_node,p,o|
# get compound_uri by "backtracking" to values node (property is 'values'), then get compound_node via 'compound'
value_nodes = @model.subjects(node('values'),feature_value_node)
raise "more than one value node "+value_nodes.size.to_s unless value_nodes.size==1
value_node = value_nodes[0]
compound_node = @model.object(value_node, node('compound'))
compound_uri = compound_uri_store[compound_node.to_s]
unless compound_uri
compound_uri = get_value(compound_node)
compound_uri_store[compound_node.to_s] = compound_uri
end
if load_all_features
# if load all features, feautre_uri is not specified, derieve from feature_node
feature_uri = feature_uri_store[o.to_s]
unless feature_uri
feature_uri = get_value(o)
feature_uri_store[o.to_s] = feature_uri
end
end
value_node_type = @model.object(feature_value_node, node('type'))
if (value_node_type == node('FeatureValue'))
value_literal = @model.object( feature_value_node, node('value'))
raise "feature value no literal" unless value_literal.is_a?(Redland::Literal)
data[compound_uri] << {feature_uri => value_literal.get_value }
else
raise "feature value type not yet implemented "+value_node_type.to_s
end
count += 1
LOGGER.debug "loaded "+count.to_s+" feature values" if (count%500 == 0)
end
LOGGER.debug "loaded "+count.to_s+" feature values"
end
@@property_nodes = { "type" => RDF["type"],
"about" => RDF["about"],
"title" => DC["title"],
"creator" => DC["creator"],
#"identifier" => DC["identifier"], identifier is deprecated
"date" => DC["date"],
"format" => DC["format"]}
# this method has to purposes:
# * distinguishing ot-properties from dc- and rdf- properties
# * caching nodes, as creating nodes is costly
def node(name)
raise "dc[identifier] deprecated, use owl.uri" if name=="identifier"
n = @@property_nodes[name]
unless n
n = OT[name]
@@property_nodes[name] = n
end
return n
end
=begin
def data
LOGGER.debug("getting data from model")
data = {}
@model.subjects(RDF['type'], OT['DataEntry']).each do |data_entry|
compound_node = @model.object(data_entry, OT['compound'])
compound_uri = @model.object(compound_node, DC['identifier']).to_s
@model.find(data_entry, OT['values'], nil) do |s,p,values|
feature_node = @model.object values, OT['feature']
feature_uri = @model.object(feature_node, DC['identifier']).to_s.sub(/\^\^.*$/,'') # remove XML datatype
type = @model.object(values, RDF['type'])
if type == OT['FeatureValue']
value = @model.object(values, OT['value']).to_s
case value.to_s
when TRUE_REGEXP # defined in environment.rb
value = true
when FALSE_REGEXP # defined in environment.rb
value = false
when /.*\^\^<.*XMLSchema#.*>/
#HACK for reading ambit datasets
case value.to_s
when /XMLSchema#string/
value = value.to_s[0..(value.to_s.index("^^")-1)]
when /XMLSchema#double/
value = value.to_s[0..(value.to_s.index("^^")-1)].to_f
else
LOGGER.warn " ILLEGAL TYPE "+compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
value = nil
end
else
LOGGER.warn compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
value = nil
end
LOGGER.debug "converting owl to yaml, #compounds: "+(data.keys.size+1).to_s if (data.keys.size+1)%10==0 && !data.has_key?(compound_uri)
return data if (data.keys.size)>9 && !data.has_key?(compound_uri)
#puts "c "+compound_uri.to_s
#puts "f "+feature_uri.to_s
#puts "v "+value.to_s
#puts ""
data[compound_uri] = [] unless data[compound_uri]
data[compound_uri] << {feature_uri => value} unless value.nil?
elsif type == OT['Tuple']
entry = {}
data[compound_uri] = [] unless data[compound_uri]
#data[compound_uri][feature_uri] = [] unless data[compound_uri][feature_uri]
@model.find(values, OT['complexValue'],nil) do |s,p,complex_value|
name_node = @model.object complex_value, OT['feature']
name = @model.object(name_node, DC['title']).to_s
value = @model.object(complex_value, OT['value']).to_s
v = value.sub(/\^\^.*$/,'') # remove XML datatype
v = v.to_f if v.match(/^[\.|\d]+$/) # guess numeric datatype
entry[name] = v
end
data[compound_uri] << {feature_uri => entry} unless entry.empty?
end
end
end
data
end
=end
end
end
|