1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
|
# overriding literal to give nice access to datatype
# and to access the stored value as correct ruby type
class Redland::Literal
def self.create(value, datatype)
raise "literal datatype may not be nil" unless datatype
if datatype.is_a?(Redland::Uri)
Redland::Literal.new(value.to_s,nil,datatype)
else
Redland::Literal.new(value.to_s,nil,Redland::Uri.new(datatype.to_s))
end
end
# the literal node of the ruby swig api provdides the 'value' of a literal but not the 'datatype'
# found solution in mailing list
def datatype
uri = Redland.librdf_node_get_literal_value_datatype_uri(self.node)
return Redland.librdf_uri_to_string(uri) if uri
end
# gets value of literal, value class is se according to literal datatype
def get_value
Redland::Literal.parse_value( self.value, self.datatype )
end
private
# parses value according to datatype uri
def self.parse_value(string_value, datatype_uri)
if (datatype_uri==nil || datatype_uri.size==0)
LOGGER.warn("empty datatype for literal with value: '"+string_value+"'")
return string_value
end
case datatype_uri
when OpenTox::Owl::LITERAL_DATATYPE_STRING.to_s
return string_value
when OpenTox::Owl::LITERAL_DATATYPE_URI.to_s
return string_value #PENDING uri as string?
when OpenTox::Owl::LITERAL_DATATYPE_FLOAT.to_s
return string_value.to_f
when OpenTox::Owl::LITERAL_DATATYPE_DOUBLE.to_s
return string_value.to_f
when OpenTox::Owl::LITERAL_DATATYPE_BOOLEAN.to_s
return string_value.upcase=="TRUE"
when OpenTox::Owl::LITERAL_DATATYPE_DATE.to_s
return Time.parse(string_value)
when OpenTox::Owl::LITERAL_DATATYPE_DATETIME.to_s
return Time.parse(string_value)
when OpenTox::Owl::LITERAL_DATATYPE_INTEGER.to_s
return string_value.to_i
else
raise "unknown literal datatype: '"+datatype_uri.to_s+"' (value is "+string_value+
"), please specify new OpenTox::Owl::LITERAL_DATATYPE"
end
end
end
module OpenTox
class Owl
# to get correct owl-dl, properties and objects have to be typed
# i.e. the following triple is insufficient:
# ModelXY,ot:algorithm,AlgorithmXY
# furhter needed:
# ot:algorithm,rdf:type,owl:ObjectProperty
# AlgorithmXY,rdf:type,ot:Algorithm
# ot:Algorithm,rdf:type,owl:Class
#
# therefore OpentoxOwl needs info about the opentox-ontology
# the info is stored in OBJECT_PROPERTY_CLASS and LITERAL_TYPES
# contains all owl:ObjectProperty as keys, and the respective classes as value
OBJECT_PROPERTY_CLASS = {}
[ "model" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Model"}
[ "algorithm" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Algorithm"}
[ "trainingDataset", "testTargetDataset", "predictionDataset",
"testDataset", "dataset" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Dataset"}
[ "feature", "dependentVariables", "independentVariables",
"predictedVariables", "predictionFeature" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Feature"}
[ "parameters" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Parameter"}
[ "compound" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Compound"}
[ "dataEntry" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "DataEntry"}
[ "values" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "FeatureValue"}
[ "classificationStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ClassificationStatistics"}
[ "classValueStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ClassValueStatistics"}
[ "confusionMatrix" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ConfusionMatrix"}
[ "confusionMatrixCell" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ConfusionMatrixCell"}
[ "regressionStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "RegressionStatistics"}
[ "validation" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Validation"}
[ "crossvalidationInfo" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "CrossvalidationInfo"}
[ "crossvalidation" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Crossvalidation"}
# literals point to primitive values (not to other resources)
# the literal datatype is encoded is uri:
LITERAL_DATATYPE_STRING = XML["string"].uri
LITERAL_DATATYPE_URI = XML["anyURI"].uri
LITERAL_DATATYPE_FLOAT = XML["float"].uri
LITERAL_DATATYPE_DOUBLE = XML["double"].uri
LITERAL_DATATYPE_DATE = XML["date"].uri
LITERAL_DATATYPE_BOOLEAN = XML["boolean"].uri
LITERAL_DATATYPE_DATETIME = XML["dateTime"].uri
LITERAL_DATATYPE_INTEGER = XML["integer"].uri
# list all literals (to distinguish from objectProperties) as keys, datatype as values
# (do not add dc-identifier, deprecated, object are identified over via name=uri)
LITERAL_TYPES = {}
[ "title", "creator", "format", "description", "hasStatus", "paramScope", "paramValue",
"value", "classValue", "reportType", "confusionMatrixActual",
"confusionMatrixPredicted" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_STRING }
[ "date", "due_to_time" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_DATE }
[ "percentageCompleted", "truePositiveRate", "fMeasure", "falseNegativeRate",
"areaUnderRoc", "falsePositiveRate", "trueNegativeRate", "precision", "recall",
"percentCorrect", "percentIncorrect", "weightedAreaUnderRoc", "numCorrect",
"percentIncorrect", "percentUnpredicted", "realRuntime",
"percentWithoutClass", "rootMeanSquaredError", "meanAbsoluteError", "rSquare",
"targetVarianceActual", "targetVariancePredicted", "sumSquaredError",
"sampleCorrelationCoefficient" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_DOUBLE }
[ "numTrueNegatives", "numWithoutClass", "numFalseNegatives", "numTruePositives",
"numFalsePositives", "numIncorrect", "numInstances", "numUnpredicted",
"randomSeed", "numFolds", "confusionMatrixValue",
"crossvalidationFold" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_INTEGER }
[ "resultURI" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_URI }
[ "stratified" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_BOOLEAN }
# constants for often used redland-resources
OWL_TYPE_LITERAL = OWL["AnnotationProperty"]
OWL_TYPE_CLASS = OWL["Class"]
OWL_TYPE_OBJECT_PROPERTY = OWL["ObjectProperty"]
RDF_TYPE = RDF['type']
# store redland:resources (=nodes) to:
# * separate namespaces (OT from RDF and DC)
# * save time, as generating resources is timeconsuming in redland
@@nodes = {}
[ "type", "about"].each{ |l| @@nodes[l] = RDF[l] }
[ "title", "creator", "date", "format" ].each{ |l| @@nodes[l] = DC[l] }
def node(property)
raise "can only create node for non-empty-string, but given "+property.class.to_s+" (value: "+
property.to_s+")" unless property.is_a?(String) and property.size>0
raise "dc[identifier] deprecated, use owl.uri" if property=="identifier"
@@nodes[property] = OT[property] unless @@nodes.has_key?(property)
return @@nodes[property]
end
# ot_class is the class of the object as string, e.g. "Model","Dataset", ...
# root_node is the root-object node in the rdf
# uri the uri of the object
attr_accessor :ot_class, :root_node, :uri, :model
def initialize
@model = Redland::Model.new Redland::MemoryStore.new
end
# build new owl object
# ot_class is the class of this object, should be a string like "Model", "Task", ...
# uri is name and identifier of this object
def self.create( ot_class, uri )
owl = OpenTox::Owl.new
owl.ot_class = ot_class
owl.root_node = Redland::Resource.new(uri.to_s.strip)
owl.set("type",owl.ot_class)
owl.uri = uri
owl
end
# loads owl from data
def self.from_data(data, base_uri, ot_class)
owl = OpenTox::Owl.new
parser = Redland::Parser.new
begin
parser.parse_string_into_model(owl.model, data, base_uri)
# now loading root_node and uri
owl.model.find(nil, RDF_TYPE, owl.node(ot_class)) do |s,p,o|
#LOGGER.debug "about statements "+s.to_s+" . "+p.to_s+" -> "+o.to_s
is_root = true
owl.model.find(nil, nil, s) do |ss,pp,oo|
is_root = false
break
end
if is_root
# handle error if root is already set
raise "cannot derieve root object from rdf, more than one object specified" if owl.uri
raise "illegal root node type, no uri specified\n"+data.to_s if s.blank?
#store root note and uri
owl.uri = s.uri.to_s
owl.root_node = s
end
end
# handle error if no root node was found
unless owl.root_node
types = []
owl.model.find(nil, RDF_TYPE, nil){ |s,p,o| types << o.to_s }
raise "root node for class '"+ot_class+"' not found (available type nodes: "+types.inspect+")"
end
raise "no uri in rdf: '"+owl.uri+"'" unless owl.uri and Utils.is_uri?(owl.uri)
owl.ot_class = ot_class
owl
rescue => e
RestClientWrapper.raise_uri_error(e.message, base_uri)
end
end
def self.from_uri(uri, ot_class)
return from_data(RestClientWrapper.get(uri,:accept => "application/rdf+xml").to_s, uri, ot_class)
end
def rdf
@model.to_string
end
# returns the first object for subject:root_node and property
# (sufficient for accessing simple, root-node properties)
def get( property )
raise "uri is no prop, use owl.uri instead" if property=="uri"
return get_value( @model.object( @root_node, node(property.to_s)) )
end
# returns an array of objects (not only the first one) that fit for the property
# accepts array of properties to access not-root-node vaules
# i.e. validation_owl.get_nested( [ "confusionMatrix", "confusionMatrixCell", "confusionMatrixValue" ]
# returns an array of all confusionMatrixValues
def get_nested( property_array )
n = [ @root_node ]
property_array.each do |p|
new_nodes = []
n.each do |nn|
@model.find( nn, node(p), nil ) do |sub,pred,obj|
new_nodes << obj
end
end
n = new_nodes
end
return n.collect{|nn| get_value( nn )}
end
private
# returns node-value
def get_value( node )
return nil unless node
if node.is_a?(Redland::Literal)
return node.get_value
elsif node.blank?
return nil
else
return node.uri.to_s
end
end
public
# sets values of current_node, by default root_node
def set(predicate, object, current_node=@root_node)
pred = predicate.to_s
raise "uri is no prop, cannot set uri" if pred=="uri"
raise "dc[identifier] deprecated, use owl.uri" if pred=="identifier"
unless object && object.to_s.size>0
# set only not-nil values
LOGGER.warn "skipping (not setting) empty value in rdf for property: '"+pred+"'"
return
end
if pred=="type"
# predicat is type, set class of current node
@model.add current_node, RDF_TYPE, node(object)
@model.add node(object), RDF_TYPE, OWL_TYPE_CLASS
# example-triples:
# model_xy,rdf:type,ot:Model
# ot:Model,rdf:type,owl:Class
elsif LITERAL_TYPES.has_key?(pred)
# predicate is literal
predicate_node = node(pred)
@model.add current_node, predicate_node, Redland::Literal.create(object, LITERAL_TYPES[pred])
@model.add predicate_node, RDF_TYPE, OWL_TYPE_LITERAL
# example-triples:
# model_xy,ot:description,bla..bla^^xml:string
# ot:description,rdf:type,owl:Literal
elsif OBJECT_PROPERTY_CLASS.has_key?(pred)
# predicte is objectProperty, object is another resource
predicate_node = node(pred)
object_node = Redland::Resource.new(object)
@model.add current_node, predicate_node, object_node
@model.add predicate_node, RDF_TYPE, OWL_TYPE_OBJECT_PROPERTY
object_class_node = node(OBJECT_PROPERTY_CLASS[pred])
@model.add object_node, RDF_TYPE, object_class_node
@model.add object_class_node, RDF_TYPE, OWL_TYPE_CLASS
# example-triples:
# model_xy,ot:algorithm,algorihtm_xy
# ot:algorithm,rdf:type,owl:ObjectProperty
# algorihtm_xy,rdf:type,ot:Algorithm
# ot:Algorithm,rdf:type,owl:Class
else
raise "unkonwn rdf-property, please add: '"+pred+"' to OpenTox::OWL.OBJECT_PROPERTY_CLASS or OpenTox::OWL.LITERAL_TYPES"
end
end
# this is (a recursiv method) to set not only simple properties but nested-data via hashes
# example (for a dataset)
# { :description => "bla",
# :compound => { :uri => "compound_uri",
# :dataEntry: => { :values => [ { :feature => "feat1",
# :value => 42 },
# { :feature => "feat2",
# :value => 43 } ] } } }
def set_data(hash, current_node=@root_node)
hash.each do |k,v|
if v.is_a?(Hash)
# value is again a hash
prop = k.to_s
raise "hash key must be a object-property, please add '"+prop.to_s+
"' to OpenTox::OWL.OBJECT_PROPERTY_CLASS" unless OBJECT_PROPERTY_CLASS[prop]
# the new node is a class node
if v["uri"]
# identifier is either a specified uri
class_node = Redland::Resource.new(v.delete("uri"))
else
# or a new uri, make up internal uri with increment
class_node = new_class_node(OBJECT_PROPERTY_CLASS[prop],current_node)
end
set(prop,class_node,current_node)
# recursivly call set_data method with new node
set_data(v,class_node)
elsif v.is_a?(Array)
# value is an array, each array element is added with current key as predicate
v.each do |value|
set_data( { k => value }, current_node )
end
else
# neither hash nor array, call simple set-method
set( k, v, current_node )
end
end
end
# create a new (internal class) node with unique, uri-like name
def new_class_node(name, current_node=@root_node)
# to avoid anonymous nodes, make up uris for sub-objects
# use counter to make sure each uri is unique
# for example we will get ../confusion_matrix_cell/1, ../confusion_matrix_cell/2, ...
count = 1
while (true)
res = Redland::Resource.new( File.join(current_node.uri.to_s,name.to_s,count.to_s) )
match = false
@model.find(nil, nil, res) do |s,p,o|
match = true
break
end
if match
count += 1
else
break
end
end
return res
end
# for "backwards-compatiblity"
# better use directly:
# set_data( { "parameters" => [ { "title" => <t>, "paramScope" => <s>, "paramValue" => <v> } ] )
def parameters=(params)
converted_params = []
params.each do |name, settings|
converted_params << { :title => name, :paramScope => settings[:scope], :paramValue => settings[:value] }
end
set_data( :parameters => converted_params )
end
# this is for dataset.to_owl
# adds feautre value for a single compound
def add_data_entries(compound_uri,features)
data_entry_values = []
features.each do |f|
f.each do |feature_uri,value|
if value.is_a?(Hash)
complex_values = []
value.each do |uri,v|
complex_values << { :feature => uri, :value => v }
end
data_entry_values << { :feature => feature_uri, :complexValue => complex_values }
else
data_entry_values << { :feature => feature_uri, :value => value }
end
end
end
set_data( :compound => { :uri => compound_uri, :dataEntry => { :values => data_entry_values } } )
end
# feature values are not loaded for performance reasons
# loading compounds and features into arrays that are given as params
def load_dataset( compounds, features )
@model.subjects(RDF_TYPE, node('Compound')).each do |compound|
compounds << get_value(compound)
end
@model.subjects(RDF_TYPE, node('Feature')).each do |feature|
features << get_value(feature)
end
LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features"
end
# loading feature values for the specified feature
# if feature is nil, all feature values are loaded
#
# general remark on the rdf loading (found out with some testing):
# the search methods (subjects/find) are fast, the time consuming parts is creating resources,
# which cannot be avoided in general
def load_dataset_feature_values( compounds, data, feature_uri=nil )
LOGGER.debug("load feature values"+ ( (feature_uri!=nil)?(" for feature: "+feature_uri):"") )
# values are stored in the data-hash, hash has a key for each compound
compounds.each{|c| data[c] = [] unless data[c]}
load_all_features = feature_uri==nil
feature_node = nil
# create feature node for feature uri if specified
unless load_all_features
@model.subjects(RDF_TYPE, OT['Feature']).each do |feature|
if feature_uri==get_value(feature)
feature_node = feature
break
end
end
raise "feature node not found" unless feature_node
end
count = 0
# search for all feature_value_node with property 'ot_feature'
# feature_node is either nil, i.e. a wildcard or specified
@model.find(nil, node('feature'), feature_node) do |feature_value_node,p,o|
# get compound_uri by "backtracking" to values node (property is 'values'), then get compound_node via 'compound'
value_nodes = @model.subjects(node('values'),feature_value_node)
raise "more than one value node "+value_nodes.size.to_s unless value_nodes.size==1
value_node = value_nodes[0]
compound_uri = get_value( @model.object(value_node, node('compound')) )
# if load all features, feautre_uri is not specified, derieve from feature_node
feature_uri = get_value(o) if load_all_features
value_node_type = @model.object(feature_value_node, RDF_TYPE)
if (value_node_type == node('FeatureValue'))
value_literal = @model.object( feature_value_node, node('value'))
raise "feature value no literal" unless value_literal.is_a?(Redland::Literal)
data[compound_uri] << {feature_uri => value_literal.get_value }
else
raise "feature value type not yet implemented "+value_node_type.to_s
end
count += 1
LOGGER.debug "loaded "+count.to_s+" feature values" if (count%500 == 0)
end
LOGGER.debug "loaded "+count.to_s+" feature values"
end
end
end
|