From 5f54e69f9ed332bfb73e30bb6daed24955fe90f6 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Wed, 28 Jul 2010 10:15:04 +0200 Subject: new owl version (should be backwards compatible), fix dirty features bug in dataset --- lib/dataset.rb | 3 +- lib/owl.rb | 652 +++++++++++++++++++++++++++++---------------------------- lib/task.rb | 12 +- 3 files changed, 338 insertions(+), 329 deletions(-) diff --git a/lib/dataset.rb b/lib/dataset.rb index af72403..e43ce96 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -22,6 +22,7 @@ module OpenTox accept_header = "application/rdf+xml" end end + case accept_header when "application/x-yaml" d = YAML.load RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s @@ -207,7 +208,7 @@ module OpenTox end def init_dirty_features(owl) - @dirty_features = @features + @dirty_features = @features.dclone @owl = owl end end diff --git a/lib/owl.rb b/lib/owl.rb index 700e6ee..fe96a6b 100644 --- a/lib/owl.rb +++ b/lib/owl.rb @@ -1,22 +1,22 @@ + +# overriding literal to give nice access to datatype +# and to access the stored value as correct ruby type class Redland::Literal - def self.create(value, datatype=nil) - if datatype - if datatype.is_a?(Redland::Uri) - Redland::Literal.new(value.to_s,nil,datatype) - else - Redland::Literal.new(value.to_s,nil,Redland::Uri.new(datatype.to_s)) - end + def self.create(value, datatype) + raise "literal datatype may not be nil" unless datatype + if datatype.is_a?(Redland::Uri) + Redland::Literal.new(value.to_s,nil,datatype) else - Redland::Literal.new(value.to_s,nil,Redland::Literal.parse_datatype_uri(value)) + Redland::Literal.new(value.to_s,nil,Redland::Uri.new(datatype.to_s)) end end # the literal node of the ruby swig api provdides the 'value' of a literal but not the 'datatype' # found solution in mailing list def datatype - uri = Redland.librdf_node_get_literal_value_datatype_uri(self.node) - return Redland.librdf_uri_to_string(uri) if uri + uri = Redland.librdf_node_get_literal_value_datatype_uri(self.node) + return Redland.librdf_uri_to_string(uri) if uri end # gets value of literal, value class is se according to literal datatype @@ -25,66 +25,33 @@ class Redland::Literal end private - @@type_string = XML["string"].uri - @@type_uri = XML["anyURI"].uri - @@type_float = XML["float"].uri - @@type_double = XML["double"].uri - @@type_date = XML["date"].uri - @@type_boolean = XML["boolean"].uri - @@type_datetime = XML["dateTime"].uri - @@type_integer = XML["integer"].uri - # parses value according to datatype uri def self.parse_value(string_value, datatype_uri) + if (datatype_uri==nil || datatype_uri.size==0) - LOGGER.warn("empty datatype for literal with value: "+string_value) + LOGGER.warn("empty datatype for literal with value: '"+string_value+"'") return string_value end case datatype_uri - when @@type_string.to_s + when OpenTox::Owl::LITERAL_DATATYPE_STRING.to_s return string_value - when @@type_uri.to_s + when OpenTox::Owl::LITERAL_DATATYPE_URI.to_s return string_value #PENDING uri as string? - when @@type_float.to_s + when OpenTox::Owl::LITERAL_DATATYPE_FLOAT.to_s return string_value.to_f - when @@type_double.to_s + when OpenTox::Owl::LITERAL_DATATYPE_DOUBLE.to_s return string_value.to_f - when @@type_boolean.to_s + when OpenTox::Owl::LITERAL_DATATYPE_BOOLEAN.to_s return string_value.upcase=="TRUE" - when @@type_date.to_s - return string_value #PENDING date as string? - when @@type_datetime.to_s - return string_value #PENDING date as string? - when @@type_integer.to_s + when OpenTox::Owl::LITERAL_DATATYPE_DATE.to_s + return Time.parse(string_value) + when OpenTox::Owl::LITERAL_DATATYPE_DATETIME.to_s + return Time.parse(string_value) + when OpenTox::Owl::LITERAL_DATATYPE_INTEGER.to_s return string_value.to_i else - raise "unknown literal datatype: '"+datatype_uri.to_s+"', value is "+string_value - end - end - - # parse datatype uri accoring to value class - def self.parse_datatype_uri(value) - if value==nil - raise "illegal datatype: value is nil" - elsif value.is_a?(String) - # PENDING: uri check too slow? - if OpenTox::Utils.is_uri?(value) - return @@type_uri - else - return @@type_string - end - elsif value.is_a?(Float) - return @@type_float - elsif value.is_a?(TrueClass) or value.is_a?(FalseClass) - return @@type_boolean - elsif value.is_a?(Integer) - return @@type_integer - elsif value.is_a?(DateTime) - return @@type_datetime - elsif value.is_a?(Time) - return @@type_datetime - else - raise "illegal datatype: "+value.class.to_s+" "+value.to_s + raise "unknown literal datatype: '"+datatype_uri.to_s+"' (value is "+string_value+ + "), please specify new OpenTox::Owl::LITERAL_DATATYPE" end end end @@ -92,8 +59,93 @@ end module OpenTox class Owl + + # to get correct owl-dl, properties and objects have to be typed + # i.e. the following triple is insufficient: + # ModelXY,ot:algorithm,AlgorithmXY + # furhter needed: + # ot:algorithm,rdf:type,owl:ObjectProperty + # AlgorithmXY,rdf:type,ot:Algorithm + # ot:Algorithm,rdf:type,owl:Class + # + # therefore OpentoxOwl needs info about the opentox-ontology + # the info is stored in OBJECT_PROPERTY_CLASS and LITERAL_TYPES + + # contains all owl:ObjectProperty as keys, and the respective classes as value + OBJECT_PROPERTY_CLASS = {} + [ "model" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Model"} + [ "algorithm" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Algorithm"} + [ "trainingDataset", "testTargetDataset", "predictionDataset", + "testDataset", "dataset" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Dataset"} + [ "feature", "dependentVariables", "independentVariables", + "predictedVariables", "predictionFeature" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Feature"} + [ "parameters" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Parameter"} + [ "compound" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Compound"} + [ "dataEntry" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "DataEntry"} + [ "values" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "FeatureValue"} + [ "classificationStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ClassificationStatistics"} + [ "classValueStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ClassValueStatistics"} + [ "confusionMatrix" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ConfusionMatrix"} + [ "confusionMatrixCell" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ConfusionMatrixCell"} + [ "regressionStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "RegressionStatistics"} + [ "validation" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Validation"} + [ "crossvalidationInfo" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "CrossvalidationInfo"} + [ "crossvalidation" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Crossvalidation"} + + # literals point to primitive values (not to other resources) + # the literal datatype is encoded is uri: + LITERAL_DATATYPE_STRING = XML["string"].uri + LITERAL_DATATYPE_URI = XML["anyURI"].uri + LITERAL_DATATYPE_FLOAT = XML["float"].uri + LITERAL_DATATYPE_DOUBLE = XML["double"].uri + LITERAL_DATATYPE_DATE = XML["date"].uri + LITERAL_DATATYPE_BOOLEAN = XML["boolean"].uri + LITERAL_DATATYPE_DATETIME = XML["dateTime"].uri + LITERAL_DATATYPE_INTEGER = XML["integer"].uri + + # list all literals (to distinguish from objectProperties) as keys, datatype as values + # (do not add dc-identifier, deprecated, object are identified over via name=uri) + LITERAL_TYPES = {} + [ "title", "creator", "format", "description", "hasStatus", "paramScope", "paramValue", + "value", "classValue", "reportType", "confusionMatrixActual", + "confusionMatrixPredicted" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_STRING } + [ "date", "due_to_time" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_DATE } + [ "percentageCompleted", "truePositiveRate", "fMeasure", "falseNegativeRate", + "areaUnderRoc", "falsePositiveRate", "trueNegativeRate", "precision", "recall", + "percentCorrect", "percentIncorrect", "weightedAreaUnderRoc", "numCorrect", + "percentIncorrect", "percentUnpredicted", "realRuntime", + "percentWithoutClass", "rootMeanSquaredError", "meanAbsoluteError", "rSquare", + "targetVarianceActual", "targetVariancePredicted", "sumSquaredError", + "sampleCorrelationCoefficient" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_DOUBLE } + [ "numTrueNegatives", "numWithoutClass", "numFalseNegatives", "numTruePositives", + "numFalsePositives", "numIncorrect", "numInstances", "numUnpredicted", + "randomSeed", "numFolds", "confusionMatrixValue", + "crossvalidationFold" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_INTEGER } + [ "resultURI" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_URI } + [ "stratified" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_BOOLEAN } + + # constants for often used redland-resources + OWL_TYPE_LITERAL = OWL["AnnotationProperty"] + OWL_TYPE_CLASS = OWL["Class"] + OWL_TYPE_OBJECT_PROPERTY = OWL["ObjectProperty"] + RDF_TYPE = RDF['type'] + + # store redland:resources (=nodes) to: + # * separate namespaces (OT from RDF and DC) + # * save time, as generating resources is timeconsuming in redland + @@nodes = {} + [ "type", "about"].each{ |l| @@nodes[l] = RDF[l] } + [ "title", "creator", "date", "format" ].each{ |l| @@nodes[l] = DC[l] } + + def node(property) + raise "can only create node for non-empty-string, but given "+property.class.to_s+" (value: "+ + property.to_s+")" unless property.is_a?(String) and property.size>0 + raise "dc[identifier] deprecated, use owl.uri" if property=="identifier" + @@nodes[property] = OT[property] unless @@nodes.has_key?(property) + return @@nodes[property] + end - # ot_class is the class of the object, e.g. "Model","Dataset", ... + # ot_class is the class of the object as string, e.g. "Model","Dataset", ... # root_node is the root-object node in the rdf # uri the uri of the object attr_accessor :ot_class, :root_node, :uri, :model @@ -102,12 +154,16 @@ module OpenTox @model = Redland::Model.new Redland::MemoryStore.new end + # build new owl object + # ot_class is the class of this object, should be a string like "Model", "Task", ... + # uri is name and identifier of this object def self.create( ot_class, uri ) - owl = OpenTox::Owl.new + owl = OpenTox::Owl.new owl.ot_class = ot_class owl.root_node = Redland::Resource.new(uri.to_s.strip) - owl.set("type",owl.node(owl.ot_class)) #,true)) + owl.set("type",owl.ot_class) + owl.uri = uri owl end @@ -121,7 +177,7 @@ module OpenTox parser.parse_string_into_model(owl.model, data, base_uri) # now loading root_node and uri - owl.model.find(nil, owl.node("type"), owl.node(ot_class)) do |s,p,o| + owl.model.find(nil, RDF_TYPE, owl.node(ot_class)) do |s,p,o| #LOGGER.debug "about statements "+s.to_s+" . "+p.to_s+" -> "+o.to_s is_root = true owl.model.find(nil, nil, s) do |ss,pp,oo| @@ -129,8 +185,10 @@ module OpenTox break end if is_root + # handle error if root is already set raise "cannot derieve root object from rdf, more than one object specified" if owl.uri raise "illegal root node type, no uri specified\n"+data.to_s if s.blank? + #store root note and uri owl.uri = s.uri.to_s owl.root_node = s end @@ -139,7 +197,7 @@ module OpenTox # handle error if no root node was found unless owl.root_node types = [] - owl.model.find(nil, owl.node("type"), nil){ |s,p,o| types << o.to_s } + owl.model.find(nil, RDF_TYPE, nil){ |s,p,o| types << o.to_s } raise "root node for class '"+ot_class+"' not found (available type nodes: "+types.inspect+")" end raise "no uri in rdf: '"+owl.uri+"'" unless owl.uri and Utils.is_uri?(owl.uri) @@ -158,13 +216,33 @@ module OpenTox @model.to_string end - def get(name) - raise "uri is no prop, use owl.uri instead" if name=="uri" - property_node = node(name.to_s) - return get_value( @model.object(@root_node, property_node) ) + # returns the first object for subject:root_node and property + # (sufficient for accessing simple, root-node properties) + def get( property ) + raise "uri is no prop, use owl.uri instead" if property=="uri" + return get_value( @model.object( @root_node, node(property.to_s)) ) + end + + # returns an array of objects (not only the first one) that fit for the property + # accepts array of properties to access not-root-node vaules + # i.e. validation_owl.get_nested( [ "confusionMatrix", "confusionMatrixCell", "confusionMatrixValue" ] + # returns an array of all confusionMatrixValues + def get_nested( property_array ) + n = [ @root_node ] + property_array.each do |p| + new_nodes = [] + n.each do |nn| + @model.find( nn, node(p), nil ) do |sub,pred,obj| + new_nodes << obj + end + end + n = new_nodes + end + return n.collect{|nn| get_value( nn )} end private + # returns node-value def get_value( node ) return nil unless node if node.is_a?(Redland::Literal) @@ -177,285 +255,215 @@ module OpenTox end public - def set(name, value, datatype=nil) + # sets values of current_node, by default root_node + def set(predicate, object, current_node=@root_node) - raise "uri is no prop, cannot set uri" if name=="uri" - property_node = node(name.to_s) #, true) - begin # delete existing entry - t = @model.object(@root_node, property_node) - @model.delete @root_node, property_node, t - rescue + pred = predicate.to_s + raise "uri is no prop, cannot set uri" if pred=="uri" + raise "dc[identifier] deprecated, use owl.uri" if pred=="identifier" + unless object && object.to_s.size>0 + # set only not-nil values + LOGGER.warn "skipping (not setting) empty value in rdf for property: '"+pred+"'" + return end - if value.is_a?(Redland::Node) - raise "not nil datatype not allowed when setting redland node as value" if datatype - @model.add @root_node, property_node, value - else # if value is no node, a literal is created - @model.add @root_node, property_node, Redland::Literal.create(value.to_s, datatype) + + if pred=="type" + # predicat is type, set class of current node + @model.add current_node, RDF_TYPE, node(object) + @model.add node(object), RDF_TYPE, OWL_TYPE_CLASS + # example-triples: + # model_xy,rdf:type,ot:Model + # ot:Model,rdf:type,owl:Class + elsif LITERAL_TYPES.has_key?(pred) + # predicate is literal + predicate_node = node(pred) + @model.add current_node, predicate_node, Redland::Literal.create(object, LITERAL_TYPES[pred]) + @model.add predicate_node, RDF_TYPE, OWL_TYPE_LITERAL + # example-triples: + # model_xy,ot:description,bla..bla^^xml:string + # ot:description,rdf:type,owl:Literal + elsif OBJECT_PROPERTY_CLASS.has_key?(pred) + # predicte is objectProperty, object is another resource + predicate_node = node(pred) + object_node = Redland::Resource.new(object) + @model.add current_node, predicate_node, object_node + @model.add predicate_node, RDF_TYPE, OWL_TYPE_OBJECT_PROPERTY + object_class_node = node(OBJECT_PROPERTY_CLASS[pred]) + @model.add object_node, RDF_TYPE, object_class_node + @model.add object_class_node, RDF_TYPE, OWL_TYPE_CLASS + # example-triples: + # model_xy,ot:algorithm,algorihtm_xy + # ot:algorithm,rdf:type,owl:ObjectProperty + # algorihtm_xy,rdf:type,ot:Algorithm + # ot:Algorithm,rdf:type,owl:Class + else + raise "unkonwn rdf-property, please add: '"+pred+"' to OpenTox::OWL.OBJECT_PROPERTY_CLASS or OpenTox::OWL.LITERAL_TYPES" end end - def parameters=(params) - params.each do |name, settings| - parameter = @model.create_resource - @model.add parameter, node('type'), node('Parameter') - @model.add parameter, node('title'), name - @model.add parameter, node('paramScope'), settings[:scope] - @model.add parameter, node('paramValue'), settings[:value] - @model.add @root_node, node('parameters'), parameter - end - end - - def add_data_entries(compound_uri,features) - # add compound - compound = @model.subject(DC["identifier"], compound_uri) - if compound.nil? - compound = @model.create_resource(compound_uri) - @model.add compound, node('type'), node("Compound") - end - features.each do |f| - f.each do |feature_uri,value| - # add feature - feature = find_or_create_feature feature_uri - if value.class.to_s == 'Hash' - # create tuple - tuple = @model.create_resource - @model.add tuple, node('type'), node("Tuple") - @model.add tuple, node('feature'), feature - value.each do |uri,v| - f = find_or_create_feature uri - complex_value = @model.create_resource - @model.add tuple, node('complexValue'), complex_value - @model.add complex_value, node('type'), node("FeatureValue") - @model.add complex_value, node('feature'), f - @model.add complex_value, node('value'), Redland::Literal.create(v) - end - # add data entry - data_entry = @model.subject node('compound'), compound - if data_entry.nil? - data_entry = @model.create_resource - @model.add @root_node, node('dataEntry'), data_entry - @model.add data_entry, node('type'), node("DataEntry") - @model.add data_entry, node('compound'), compound - end - @model.add data_entry, node('values'), tuple - else - data_entry = @model.subject node('compound'), compound - if data_entry.nil? - data_entry = @model.create_resource - @model.add @root_node, node('dataEntry'), data_entry - @model.add data_entry,node('type'), node("DataEntry") - @model.add data_entry, node('compound'), compound - end - values = @model.create_resource - @model.add data_entry, node('values'), values - @model.add values, node('type'), node('FeatureValue') - @model.add values, node('feature'), feature - @model.add values, node('value'), Redland::Literal.create(value) - end - end - end - end - - private - def find_feature(feature_uri) - # PENDING: more efficiently get feature node? - @model.subjects(RDF['type'], OT['Feature']).each do |feature| - return feature if feature_uri==get_value(feature) + # this is (a recursiv method) to set not only simple properties but nested-data via hashes + # example (for a dataset) + # { :description => "bla", + # :compound => { :uri => "compound_uri", + # :dataEntry: => { :values => [ { :feature => "feat1", + # :value => 42 }, + # { :feature => "feat2", + # :value => 43 } ] } } } + def set_data(hash, current_node=@root_node) + + hash.each do |k,v| + if v.is_a?(Hash) + # value is again a hash + prop = k.to_s + raise "hash key must be a object-property, please add '"+prop.to_s+ + "' to OpenTox::OWL.OBJECT_PROPERTY_CLASS" unless OBJECT_PROPERTY_CLASS[prop] + # the new node is a class node + if v["uri"] + # identifier is either a specified uri + class_node = Redland::Resource.new(v.delete("uri")) + else + # or a new uri, make up internal uri with increment + class_node = new_class_node(OBJECT_PROPERTY_CLASS[prop],current_node) + end + set(prop,class_node,current_node) + # recursivly call set_data method with new node + set_data(v,class_node) + elsif v.is_a?(Array) + # value is an array, each array element is added with current key as predicate + v.each do |value| + set_data( { k => value }, current_node ) + end + else + # neither hash nor array, call simple set-method + set( k, v, current_node ) + end + end end - return nil - end - - public - def find_or_create_feature(feature_uri) - feature = find_feature(feature_uri) - unless feature - feature = @model.create_resource(feature_uri) - @model.add feature, node('type'), node("Feature") - @model.add feature, node("title"), File.basename(feature_uri).split(/#/)[1] - @model.add feature, node('creator'), feature_uri - end - feature - end - - # feature values are not loaded for performance reasons - # loading compounds and features into arrays that are given as params - def load_dataset( compounds, features ) - @model.subjects(node('type'), node('Compound')).each do |compound| - compounds << get_value(compound) - end - @model.subjects(node('type'), node('Feature')).each do |feature| - features << get_value(feature) + # create a new (internal class) node with unique, uri-like name + def new_class_node(name, current_node=@root_node) + # to avoid anonymous nodes, make up uris for sub-objects + # use counter to make sure each uri is unique + # for example we will get ../confusion_matrix_cell/1, ../confusion_matrix_cell/2, ... + count = 1 + while (true) + res = Redland::Resource.new( File.join(current_node.uri.to_s,name.to_s,count.to_s) ) + match = false + @model.find(nil, nil, res) do |s,p,o| + match = true + break + end + if match + count += 1 + else + break + end + end + return res end - LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features" - end - - # loading feature values for the specified feature - # if feature is nil, all feature values are loaded - # - # general remark on the rdf loading (found out with some testing): - # the search methods (subjects/find) are fast, the time consuming parts is creating resources, - # which cannot be avoided in general (implemented some performance tweaks with uri storing when loading all features) - def load_dataset_feature_values( compounds, data, feature_uri=nil ) - - LOGGER.debug("load feature values"+ ( (feature_uri!=nil)?(" for feature: "+feature_uri):"") ) - # values are stored in the data-hash, hash has a key for each compound - compounds.each{|c| data[c] = [] unless data[c]} - - load_all_features = feature_uri==nil - feature_node = nil - - # create feature node for feature uri if specified - unless load_all_features - feature_node = find_feature(feature_uri) - raise "feature node not found" unless feature_node - end - - count = 0 - - # preformance tweak: store uirs to save some resource init time - compound_uri_store = {} - feature_uri_store = {} - - # search for all feature_value_node with property 'ot_feature' - # feature_node is either nil, i.e. a wildcard or specified - @model.find(nil, node('feature'), feature_node) do |feature_value_node,p,o| - - # get compound_uri by "backtracking" to values node (property is 'values'), then get compound_node via 'compound' - value_nodes = @model.subjects(node('values'),feature_value_node) - raise "more than one value node "+value_nodes.size.to_s unless value_nodes.size==1 - value_node = value_nodes[0] - compound_node = @model.object(value_node, node('compound')) - compound_uri = compound_uri_store[compound_node.to_s] - unless compound_uri - compound_uri = get_value(compound_node) - compound_uri_store[compound_node.to_s] = compound_uri + # for "backwards-compatiblity" + # better use directly: + # set_data( { "parameters" => [ { "title" => , "paramScope" => , "paramValue" => } ] ) + def parameters=(params) + + converted_params = [] + params.each do |name, settings| + converted_params << { :title => name, :paramScope => settings[:scope], :paramValue => settings[:value] } end + set_data( :parameters => converted_params ) + end + + + # this is for dataset.to_owl + # adds feautre value for a single compound + def add_data_entries(compound_uri,features) - if load_all_features - # if load all features, feautre_uri is not specified, derieve from feature_node - feature_uri = feature_uri_store[o.to_s] - unless feature_uri - feature_uri = get_value(o) - feature_uri_store[o.to_s] = feature_uri + data_entry_values = [] + features.each do |f| + f.each do |feature_uri,value| + if value.is_a?(Hash) + complex_values = [] + value.each do |uri,v| + complex_values << { :feature => uri, :value => v } + end + data_entry_values << { :feature => feature_uri, :complexValue => complex_values } + else + data_entry_values << { :feature => feature_uri, :value => value } + end end end + set_data( :compound => { :uri => compound_uri, :dataEntry => { :values => data_entry_values } } ) + end + + # feature values are not loaded for performance reasons + # loading compounds and features into arrays that are given as params + def load_dataset( compounds, features ) - value_node_type = @model.object(feature_value_node, node('type')) - if (value_node_type == node('FeatureValue')) - value_literal = @model.object( feature_value_node, node('value')) - raise "feature value no literal" unless value_literal.is_a?(Redland::Literal) - data[compound_uri] << {feature_uri => value_literal.get_value } - else - raise "feature value type not yet implemented "+value_node_type.to_s + @model.subjects(RDF_TYPE, node('Compound')).each do |compound| + compounds << get_value(compound) end - count += 1 - LOGGER.debug "loaded "+count.to_s+" feature values" if (count%500 == 0) + @model.subjects(RDF_TYPE, node('Feature')).each do |feature| + features << get_value(feature) + end + LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features" end - - LOGGER.debug "loaded "+count.to_s+" feature values" - end - @@property_nodes = { "type" => RDF["type"], - "about" => RDF["about"], - "title" => DC["title"], - "creator" => DC["creator"], - #"identifier" => DC["identifier"], identifier is deprecated - "date" => DC["date"], - "format" => DC["format"]} - -# @object_prop = OWL["ObjectProperty"] -# @@type = { "Validation" => OWL["Class"], -# "Model" => OWL["Class"], -# "title" => OWL["AnnotationProperty"], -# "creator" => OWL["AnnotationProperty"], -# "date" => OWL["AnnotationProperty"], -# "format" => OWL["AnnotationProperty"], -# "predictedVariables" => @object_prop} + # loading feature values for the specified feature + # if feature is nil, all feature values are loaded + # + # general remark on the rdf loading (found out with some testing): + # the search methods (subjects/find) are fast, the time consuming parts is creating resources, + # which cannot be avoided in general + def load_dataset_feature_values( compounds, data, feature_uri=nil ) + + LOGGER.debug("load feature values"+ ( (feature_uri!=nil)?(" for feature: "+feature_uri):"") ) - # this method has two purposes: - # * distinguishing ot-properties from dc- and rdf- properties - # * caching nodes, as creating nodes is costly - def node(name) #, write_type_to_model=false) - raise "dc[identifier] deprecated, use owl.uri" if name=="identifier" - n = @@property_nodes[name] - unless n - n = OT[name] - @@property_nodes[name] = n - end - -# if write_type_to_model and name!="type" -# raise "no type defined for '"+name+"'" unless @@type[name] -# @model.add n,RDF['type'],@@type[name] -# end - return n - end - -=begin - def data - LOGGER.debug("getting data from model") + # values are stored in the data-hash, hash has a key for each compound + compounds.each{|c| data[c] = [] unless data[c]} - data = {} - @model.subjects(RDF['type'], OT['DataEntry']).each do |data_entry| - compound_node = @model.object(data_entry, OT['compound']) - compound_uri = @model.object(compound_node, DC['identifier']).to_s - @model.find(data_entry, OT['values'], nil) do |s,p,values| - feature_node = @model.object values, OT['feature'] - feature_uri = @model.object(feature_node, DC['identifier']).to_s.sub(/\^\^.*$/,'') # remove XML datatype - type = @model.object(values, RDF['type']) - if type == OT['FeatureValue'] - value = @model.object(values, OT['value']).to_s - case value.to_s - when TRUE_REGEXP # defined in environment.rb - value = true - when FALSE_REGEXP # defined in environment.rb - value = false - when /.*\^\^<.*XMLSchema#.*>/ - #HACK for reading ambit datasets - case value.to_s - when /XMLSchema#string/ - value = value.to_s[0..(value.to_s.index("^^")-1)] - when /XMLSchema#double/ - value = value.to_s[0..(value.to_s.index("^^")-1)].to_f - else - LOGGER.warn " ILLEGAL TYPE "+compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri - value = nil - end - else - LOGGER.warn compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri - value = nil - end - LOGGER.debug "converting owl to yaml, #compounds: "+(data.keys.size+1).to_s if (data.keys.size+1)%10==0 && !data.has_key?(compound_uri) - - return data if (data.keys.size)>9 && !data.has_key?(compound_uri) - - #puts "c "+compound_uri.to_s - #puts "f "+feature_uri.to_s - #puts "v "+value.to_s - #puts "" - data[compound_uri] = [] unless data[compound_uri] - data[compound_uri] << {feature_uri => value} unless value.nil? - elsif type == OT['Tuple'] - entry = {} - data[compound_uri] = [] unless data[compound_uri] - #data[compound_uri][feature_uri] = [] unless data[compound_uri][feature_uri] - @model.find(values, OT['complexValue'],nil) do |s,p,complex_value| - name_node = @model.object complex_value, OT['feature'] - name = @model.object(name_node, DC['title']).to_s - value = @model.object(complex_value, OT['value']).to_s - v = value.sub(/\^\^.*$/,'') # remove XML datatype - v = v.to_f if v.match(/^[\.|\d]+$/) # guess numeric datatype - entry[name] = v - end - data[compound_uri] << {feature_uri => entry} unless entry.empty? + load_all_features = feature_uri==nil + feature_node = nil + + # create feature node for feature uri if specified + unless load_all_features + @model.subjects(RDF_TYPE, OT['Feature']).each do |feature| + if feature_uri==get_value(feature) + feature_node = feature + break end end + raise "feature node not found" unless feature_node end - data + + count = 0 + + # search for all feature_value_node with property 'ot_feature' + # feature_node is either nil, i.e. a wildcard or specified + @model.find(nil, node('feature'), feature_node) do |feature_value_node,p,o| + + # get compound_uri by "backtracking" to values node (property is 'values'), then get compound_node via 'compound' + value_nodes = @model.subjects(node('values'),feature_value_node) + raise "more than one value node "+value_nodes.size.to_s unless value_nodes.size==1 + value_node = value_nodes[0] + + compound_uri = get_value( @model.object(value_node, node('compound')) ) + # if load all features, feautre_uri is not specified, derieve from feature_node + feature_uri = get_value(o) if load_all_features + + value_node_type = @model.object(feature_value_node, RDF_TYPE) + if (value_node_type == node('FeatureValue')) + value_literal = @model.object( feature_value_node, node('value')) + raise "feature value no literal" unless value_literal.is_a?(Redland::Literal) + data[compound_uri] << {feature_uri => value_literal.get_value } + else + raise "feature value type not yet implemented "+value_node_type.to_s + end + count += 1 + LOGGER.debug "loaded "+count.to_s+" feature values" if (count%500 == 0) + end + LOGGER.debug "loaded "+count.to_s+" feature values" end -=end - end end diff --git a/lib/task.rb b/lib/task.rb index 88bcb71..b563fe6 100644 --- a/lib/task.rb +++ b/lib/task.rb @@ -23,9 +23,9 @@ module OpenTox end public - def self.find(uri) + def self.find( uri, accept_header='application/rdf+xml' ) task = Task.new(uri) - task.reload + task.reload( accept_header ) return task end @@ -36,8 +36,8 @@ module OpenTox return task end - def reload - result = RestClientWrapper.get(uri, {:accept => 'application/rdf+xml'}, false)#'application/x-yaml'}) + def reload( accept_header='application/rdf+xml' ) + result = RestClientWrapper.get(uri, {:accept => accept_header}, false)#'application/x-yaml'}) @http_code = result.code reload_from_data(result, result.content_type, uri) end @@ -95,8 +95,8 @@ module OpenTox def wait_for_completion(dur=0.3) if (@uri.match(@@config[:services]["opentox-task"])) - due_to_time = Time.parse(@due_to_time) - running_time = due_to_time - Time.parse(@date) + due_to_time = (@due_to_time.is_a?(Time) ? @due_to_time : Time.parse(@due_to_time)) + running_time = due_to_time - (@date.is_a?(Time) ? @date : Time.parse(@date)) else # the date of the external task cannot be trusted, offest to local time might be to big due_to_time = Time.new + EXTERNAL_TASK_MAX_DURATION -- cgit v1.2.3 From 92bb08ac2e9e21a7bade2c272740fe3a1691cb85 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Fri, 30 Jul 2010 15:35:14 +0200 Subject: fix dataset.find to not request yaml from ambit --- lib/dataset.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/dataset.rb b/lib/dataset.rb index e43ce96..3efff7d 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -16,7 +16,7 @@ module OpenTox unless accept_header #if uri.match(@@config[:services]["opentox-dataset"]) || uri=~ /188.40.32.88/ || uri =~ /informatik/ - if !@@config[:accept_headers]["opentox-dataset"].grep(/yaml/).empty? + if uri.match(@@config[:services]["opentox-dataset"]) && !@@config[:accept_headers]["opentox-dataset"].grep(/yaml/).empty? accept_header = 'application/x-yaml' else accept_header = "application/rdf+xml" -- cgit v1.2.3 From 87c815a3eedd87d08f7dc74c3dbfdaf9867103a6 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 2 Aug 2010 15:20:44 +0200 Subject: handle missing features, some more documentation for owl --- lib/dataset.rb | 4 ++-- lib/owl.rb | 33 +++++++++++++++++++++------------ 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/lib/dataset.rb b/lib/dataset.rb index 3efff7d..257cc17 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -25,7 +25,7 @@ module OpenTox case accept_header when "application/x-yaml" - d = YAML.load RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s + d = YAML.load RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s d.uri = uri unless d.uri when "application/rdf+xml" owl = OpenTox::Owl.from_uri(uri.to_s.strip, "Dataset") @@ -172,7 +172,7 @@ module OpenTox raise "invalid internal value type" end end - raise "feature value no found: "+feature.to_s + return nil #missing value else raise "value is not an array\n"+ "value "+v.to_s+"\n"+ diff --git a/lib/owl.rb b/lib/owl.rb index fe96a6b..6d50d69 100644 --- a/lib/owl.rb +++ b/lib/owl.rb @@ -93,7 +93,7 @@ module OpenTox [ "crossvalidation" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Crossvalidation"} # literals point to primitive values (not to other resources) - # the literal datatype is encoded is uri: + # the literal datatype is encoded via uri: LITERAL_DATATYPE_STRING = XML["string"].uri LITERAL_DATATYPE_URI = XML["anyURI"].uri LITERAL_DATATYPE_FLOAT = XML["float"].uri @@ -104,7 +104,7 @@ module OpenTox LITERAL_DATATYPE_INTEGER = XML["integer"].uri # list all literals (to distinguish from objectProperties) as keys, datatype as values - # (do not add dc-identifier, deprecated, object are identified over via name=uri) + # (do not add dc-identifier, deprecated, object are identified via name=uri) LITERAL_TYPES = {} [ "title", "creator", "format", "description", "hasStatus", "paramScope", "paramValue", "value", "classValue", "reportType", "confusionMatrixActual", @@ -150,6 +150,7 @@ module OpenTox # uri the uri of the object attr_accessor :ot_class, :root_node, :uri, :model + private def initialize @model = Redland::Model.new Redland::MemoryStore.new end @@ -157,6 +158,7 @@ module OpenTox # build new owl object # ot_class is the class of this object, should be a string like "Model", "Task", ... # uri is name and identifier of this object + public def self.create( ot_class, uri ) owl = OpenTox::Owl.new @@ -255,7 +257,15 @@ module OpenTox end public - # sets values of current_node, by default root_node + # sets values of current_node (by default root_node) + # + # note: this does not delete existing triples + # * there can be several triples for the same subject and predicate + # ( e.g. after set("description","bla1") and set("description","bla2") + # both descriptions are in the model, + # but the get("description") will give you only one object (by chance) + # * this does not matter in pratice (only dataset uses this -> load_dataset-methods) + # * identical values appear only once in rdf def set(predicate, object, current_node=@root_node) pred = predicate.to_s @@ -268,10 +278,10 @@ module OpenTox end if pred=="type" - # predicat is type, set class of current node + # predicate is type, set class of current node @model.add current_node, RDF_TYPE, node(object) @model.add node(object), RDF_TYPE, OWL_TYPE_CLASS - # example-triples: + # example-triples for setting rdf-type to model: # model_xy,rdf:type,ot:Model # ot:Model,rdf:type,owl:Class elsif LITERAL_TYPES.has_key?(pred) @@ -279,7 +289,7 @@ module OpenTox predicate_node = node(pred) @model.add current_node, predicate_node, Redland::Literal.create(object, LITERAL_TYPES[pred]) @model.add predicate_node, RDF_TYPE, OWL_TYPE_LITERAL - # example-triples: + # example-triples for setting description of a model: # model_xy,ot:description,bla..bla^^xml:string # ot:description,rdf:type,owl:Literal elsif OBJECT_PROPERTY_CLASS.has_key?(pred) @@ -291,7 +301,7 @@ module OpenTox object_class_node = node(OBJECT_PROPERTY_CLASS[pred]) @model.add object_node, RDF_TYPE, object_class_node @model.add object_class_node, RDF_TYPE, OWL_TYPE_CLASS - # example-triples: + # example-triples for setting algorithm property of a model: # model_xy,ot:algorithm,algorihtm_xy # ot:algorithm,rdf:type,owl:ObjectProperty # algorihtm_xy,rdf:type,ot:Algorithm @@ -301,7 +311,7 @@ module OpenTox end end - # this is (a recursiv method) to set not only simple properties but nested-data via hashes + # this is (a recursiv method) to set nested-data via hashes (not only simple properties) # example (for a dataset) # { :description => "bla", # :compound => { :uri => "compound_uri", @@ -406,7 +416,7 @@ module OpenTox @model.subjects(RDF_TYPE, node('Feature')).each do |feature| features << get_value(feature) end - LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features" + LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features from dataset "+uri.to_s end # loading feature values for the specified feature @@ -460,10 +470,9 @@ module OpenTox raise "feature value type not yet implemented "+value_node_type.to_s end count += 1 - LOGGER.debug "loaded "+count.to_s+" feature values" if (count%500 == 0) + LOGGER.debug "loading feature values ("+count.to_s+")" if (count%1000 == 0) end LOGGER.debug "loaded "+count.to_s+" feature values" end end -end - +end \ No newline at end of file -- cgit v1.2.3 From ff2a7af228fd6d0d23d29ef249422890841b526c Mon Sep 17 00:00:00 2001 From: mguetlein Date: Tue, 3 Aug 2010 08:54:09 +0200 Subject: fix dataset rdf support --- lib/owl.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/owl.rb b/lib/owl.rb index 6d50d69..62ef5d2 100644 --- a/lib/owl.rb +++ b/lib/owl.rb @@ -81,6 +81,7 @@ module OpenTox "predictedVariables", "predictionFeature" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Feature"} [ "parameters" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Parameter"} [ "compound" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Compound"} + [ "complexValue" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Tuple"} [ "dataEntry" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "DataEntry"} [ "values" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "FeatureValue"} [ "classificationStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ClassificationStatistics"} @@ -92,6 +93,7 @@ module OpenTox [ "crossvalidationInfo" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "CrossvalidationInfo"} [ "crossvalidation" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Crossvalidation"} + # literals point to primitive values (not to other resources) # the literal datatype is encoded via uri: LITERAL_DATATYPE_STRING = XML["string"].uri @@ -328,9 +330,9 @@ module OpenTox raise "hash key must be a object-property, please add '"+prop.to_s+ "' to OpenTox::OWL.OBJECT_PROPERTY_CLASS" unless OBJECT_PROPERTY_CLASS[prop] # the new node is a class node - if v["uri"] + if v[:uri] # identifier is either a specified uri - class_node = Redland::Resource.new(v.delete("uri")) + class_node = Redland::Resource.new(v.delete(:uri)) else # or a new uri, make up internal uri with increment class_node = new_class_node(OBJECT_PROPERTY_CLASS[prop],current_node) -- cgit v1.2.3 From 9d2f25cdfc340bc7d9df7a041a5b23c1552c7d53 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Wed, 4 Aug 2010 10:55:48 +0200 Subject: dataset to/from rdf now working again --- lib/dataset.rb | 16 ++-- lib/owl.rb | 254 +++++++++++++++++++++++++++++++++++---------------------- 2 files changed, 162 insertions(+), 108 deletions(-) diff --git a/lib/dataset.rb b/lib/dataset.rb index 257cc17..4ce9ffe 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -16,7 +16,7 @@ module OpenTox unless accept_header #if uri.match(@@config[:services]["opentox-dataset"]) || uri=~ /188.40.32.88/ || uri =~ /informatik/ - if uri.match(@@config[:services]["opentox-dataset"]) && !@@config[:accept_headers]["opentox-dataset"].grep(/yaml/).empty? + if (uri.match(@@config[:services]["opentox-dataset"]) || uri =~ /in-silico.ch/) && !@@config[:accept_headers]["opentox-dataset"].grep(/yaml/).empty? accept_header = 'application/x-yaml' else accept_header = "application/rdf+xml" @@ -144,12 +144,6 @@ module OpenTox else LOGGER.warn "no confidence for compound: "+compound.to_s+", feature: "+feature.to_s return 1 -# raise "prediction confidence value is not a hash value\n"+ -# "value "+v.to_s+"\n"+ -# "value-class "+v.class.to_s+"\n"+ -# "dataset "+@uri.to_s+"\n"+ -# "compound "+compound.to_s+"\n"+ -# "feature "+feature.to_s+"\n" end end @@ -160,7 +154,7 @@ module OpenTox end v = @data[compound] - raise "no values for compound "+compound.to_s if v==nil + return nil if v == nil # missing values for all features if v.is_a?(Array) # PENDING: why using an array here? v.each do |e| @@ -187,11 +181,11 @@ module OpenTox def load_feature_values(feature=nil) if feature raise "feature already loaded" unless @dirty_features.include?(feature) - @owl.load_dataset_feature_values(@compounds, @data, feature) + @owl.load_dataset_feature_values(@compounds, @data, [feature]) @dirty_features.delete(feature) else - @data = {} - @owl.load_dataset_feature_values(@compounds, @data) + @data = {} unless @data + @owl.load_dataset_feature_values(@compounds, @data, @dirty_features) @dirty_features.clear end end diff --git a/lib/owl.rb b/lib/owl.rb index 62ef5d2..7447ce6 100644 --- a/lib/owl.rb +++ b/lib/owl.rb @@ -3,12 +3,14 @@ # and to access the stored value as correct ruby type class Redland::Literal - def self.create(value, datatype) - raise "literal datatype may not be nil" unless datatype - if datatype.is_a?(Redland::Uri) - Redland::Literal.new(value.to_s,nil,datatype) + def self.create(value, type) + raise "literal datatype may not be nil" unless type + type = parse_datatype_uri(value) if OpenTox::Owl::PARSE_LITERAL_TYPE==type + + if type.is_a?(Redland::Uri) + Redland::Literal.new(value.to_s,nil,type) else - Redland::Literal.new(value.to_s,nil,Redland::Uri.new(datatype.to_s)) + Redland::Literal.new(value.to_s,nil,Redland::Uri.new(type.to_s)) end end @@ -54,6 +56,32 @@ class Redland::Literal "), please specify new OpenTox::Owl::LITERAL_DATATYPE" end end + + # parse datatype uri accoring to value class + def self.parse_datatype_uri(value) + if value==nil + raise "illegal datatype: value is nil" + elsif value.is_a?(String) + # PENDING: uri check too slow? + if OpenTox::Utils.is_uri?(value) + return OpenTox::Owl::LITERAL_DATATYPE_URI + else + return OpenTox::Owl::LITERAL_DATATYPE_STRING + end + elsif value.is_a?(Float) + return OpenTox::Owl::LITERAL_DATATYPE_FLOAT + elsif value.is_a?(TrueClass) or value.is_a?(FalseClass) + return OpenTox::Owl::LITERAL_DATATYPE_BOOLEAN + elsif value.is_a?(Integer) + return OpenTox::Owl::LITERAL_DATATYPE_INTEGER + elsif value.is_a?(DateTime) + return OpenTox::Owl::LITERAL_DATATYPE_DATETIME + elsif value.is_a?(Time) + return OpenTox::Owl::LITERAL_DATATYPE_DATETIME + else + raise "illegal datatype: "+value.class.to_s+" "+value.to_s + end + end end module OpenTox @@ -63,15 +91,17 @@ module OpenTox # to get correct owl-dl, properties and objects have to be typed # i.e. the following triple is insufficient: # ModelXY,ot:algorithm,AlgorithmXY - # furhter needed: + # further needed: # ot:algorithm,rdf:type,owl:ObjectProperty # AlgorithmXY,rdf:type,ot:Algorithm # ot:Algorithm,rdf:type,owl:Class # # therefore OpentoxOwl needs info about the opentox-ontology # the info is stored in OBJECT_PROPERTY_CLASS and LITERAL_TYPES - + # contains all owl:ObjectProperty as keys, and the respective classes as value + # some object properties link to objects from different classes (e.g. "values can be "Tuple", or "FeatureValue") + # in this case, use set_object_property() (instead of set()) and specify class manually OBJECT_PROPERTY_CLASS = {} [ "model" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Model"} [ "algorithm" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Algorithm"} @@ -81,9 +111,8 @@ module OpenTox "predictedVariables", "predictionFeature" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Feature"} [ "parameters" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Parameter"} [ "compound" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Compound"} - [ "complexValue" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Tuple"} [ "dataEntry" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "DataEntry"} - [ "values" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "FeatureValue"} + [ "complexValue" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "FeatureValue"} [ "classificationStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ClassificationStatistics"} [ "classValueStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ClassValueStatistics"} [ "confusionMatrix" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ConfusionMatrix"} @@ -93,7 +122,6 @@ module OpenTox [ "crossvalidationInfo" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "CrossvalidationInfo"} [ "crossvalidation" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Crossvalidation"} - # literals point to primitive values (not to other resources) # the literal datatype is encoded via uri: LITERAL_DATATYPE_STRING = XML["string"].uri @@ -109,7 +137,7 @@ module OpenTox # (do not add dc-identifier, deprecated, object are identified via name=uri) LITERAL_TYPES = {} [ "title", "creator", "format", "description", "hasStatus", "paramScope", "paramValue", - "value", "classValue", "reportType", "confusionMatrixActual", + "classValue", "reportType", "confusionMatrixActual", "confusionMatrixPredicted" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_STRING } [ "date", "due_to_time" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_DATE } [ "percentageCompleted", "truePositiveRate", "fMeasure", "falseNegativeRate", @@ -125,6 +153,9 @@ module OpenTox "crossvalidationFold" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_INTEGER } [ "resultURI" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_URI } [ "stratified" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_BOOLEAN } + # some literals can have different types, parse from ruby type + PARSE_LITERAL_TYPE = "PARSE_LITERAL_TYPE" + [ "value" ].each{ |l| LITERAL_TYPES[l] = PARSE_LITERAL_TYPE } # constants for often used redland-resources OWL_TYPE_LITERAL = OWL["AnnotationProperty"] @@ -268,12 +299,12 @@ module OpenTox # but the get("description") will give you only one object (by chance) # * this does not matter in pratice (only dataset uses this -> load_dataset-methods) # * identical values appear only once in rdf - def set(predicate, object, current_node=@root_node) + def set(predicate, object, current_node=@root_node ) pred = predicate.to_s raise "uri is no prop, cannot set uri" if pred=="uri" raise "dc[identifier] deprecated, use owl.uri" if pred=="identifier" - unless object && object.to_s.size>0 + if (object.is_a?(Redland::Node) and object.blank?) or nil==object or object.to_s.size==0 # set only not-nil values LOGGER.warn "skipping (not setting) empty value in rdf for property: '"+pred+"'" return @@ -281,63 +312,78 @@ module OpenTox if pred=="type" # predicate is type, set class of current node - @model.add current_node, RDF_TYPE, node(object) - @model.add node(object), RDF_TYPE, OWL_TYPE_CLASS - # example-triples for setting rdf-type to model: - # model_xy,rdf:type,ot:Model - # ot:Model,rdf:type,owl:Class + set_type(object, current_node) elsif LITERAL_TYPES.has_key?(pred) # predicate is literal - predicate_node = node(pred) - @model.add current_node, predicate_node, Redland::Literal.create(object, LITERAL_TYPES[pred]) - @model.add predicate_node, RDF_TYPE, OWL_TYPE_LITERAL - # example-triples for setting description of a model: - # model_xy,ot:description,bla..bla^^xml:string - # ot:description,rdf:type,owl:Literal + set_literal(pred,object,LITERAL_TYPES[pred],current_node) elsif OBJECT_PROPERTY_CLASS.has_key?(pred) # predicte is objectProperty, object is another resource - predicate_node = node(pred) - object_node = Redland::Resource.new(object) - @model.add current_node, predicate_node, object_node - @model.add predicate_node, RDF_TYPE, OWL_TYPE_OBJECT_PROPERTY - object_class_node = node(OBJECT_PROPERTY_CLASS[pred]) - @model.add object_node, RDF_TYPE, object_class_node - @model.add object_class_node, RDF_TYPE, OWL_TYPE_CLASS - # example-triples for setting algorithm property of a model: - # model_xy,ot:algorithm,algorihtm_xy - # ot:algorithm,rdf:type,owl:ObjectProperty - # algorihtm_xy,rdf:type,ot:Algorithm - # ot:Algorithm,rdf:type,owl:Class + set_object_property(pred,object,OBJECT_PROPERTY_CLASS[pred],current_node) else raise "unkonwn rdf-property, please add: '"+pred+"' to OpenTox::OWL.OBJECT_PROPERTY_CLASS or OpenTox::OWL.LITERAL_TYPES" end end + + # example-triples for setting rdf-type to model: + # model_xy,rdf:type,ot:Model + # ot:Model,rdf:type,owl:Class + def set_type(ot_class, current_node=@root_node) + @model.add current_node, RDF_TYPE, node(ot_class) + @model.add node(ot_class), RDF_TYPE, OWL_TYPE_CLASS + end + + # example-triples for setting description of a model: + # model_xy,ot:description,bla..bla^^xml:string + # ot:description,rdf:type,owl:Literal + def set_literal(literal_name, literal_value, literal_datatype, current_node=@root_node) + @model.add current_node, node(literal_name), Redland::Literal.create(literal_value, literal_datatype) + @model.add node(literal_name), RDF_TYPE, OWL_TYPE_LITERAL + end + + # example-triples for setting algorithm property of a model: + # model_xy,ot:algorithm,algorihtm_xy + # ot:algorithm,rdf:type,owl:ObjectProperty + # algorihtm_xy,rdf:type,ot:Algorithm + # ot:Algorithm,rdf:type,owl:Class + def set_object_property(property, object, object_class, current_node=@root_node) + object_node = Redland::Resource.new(object) + @model.add current_node, node(property), object_node + @model.add node(property), RDF_TYPE, OWL_TYPE_OBJECT_PROPERTY + @model.add object_node, RDF_TYPE, node(object_class) + @model.add node(object_class), RDF_TYPE, OWL_TYPE_CLASS + end # this is (a recursiv method) to set nested-data via hashes (not only simple properties) # example (for a dataset) # { :description => "bla", - # :compound => { :uri => "compound_uri", - # :dataEntry: => { :values => [ { :feature => "feat1", - # :value => 42 }, - # { :feature => "feat2", - # :value => 43 } ] } } } + # :dataEntry => { :compound => "compound_uri", + # :values => [ { :class => "FeatureValue" + # :feature => "feat1", + # :value => 42 }, + # { :class => "FeatureValue" + # :feature => "feat2", + # :value => 123 } ] } } def set_data(hash, current_node=@root_node) hash.each do |k,v| if v.is_a?(Hash) # value is again a hash prop = k.to_s + + # :class is a special key to specify the class value, if not defined in OBJECT_PROPERTY_CLASS + object_class = v.has_key?(:class) ? v.delete(:class) : OBJECT_PROPERTY_CLASS[prop] raise "hash key must be a object-property, please add '"+prop.to_s+ - "' to OpenTox::OWL.OBJECT_PROPERTY_CLASS" unless OBJECT_PROPERTY_CLASS[prop] - # the new node is a class node + "' to OpenTox::OWL.OBJECT_PROPERTY_CLASS or specify :class value" unless object_class + + # the new node is a class node, to specify the uri of the resource use key :uri if v[:uri] # identifier is either a specified uri class_node = Redland::Resource.new(v.delete(:uri)) else # or a new uri, make up internal uri with increment - class_node = new_class_node(OBJECT_PROPERTY_CLASS[prop],current_node) + class_node = new_class_node(object_class,current_node) end - set(prop,class_node,current_node) + set_object_property(prop,class_node,object_class,current_node) # recursivly call set_data method with new node set_data(v,class_node) elsif v.is_a?(Array) @@ -386,28 +432,33 @@ module OpenTox set_data( :parameters => converted_params ) end - + # PENDING move to dataset.rb # this is for dataset.to_owl # adds feautre value for a single compound def add_data_entries(compound_uri,features) - data_entry_values = [] - features.each do |f| - f.each do |feature_uri,value| - if value.is_a?(Hash) - complex_values = [] - value.each do |uri,v| - complex_values << { :feature => uri, :value => v } + data_entry = { :compound => compound_uri } + if features + feature_values = [] + features.each do |f| + f.each do |feature_uri,value| + if value.is_a?(Hash) + complex_values = [] + value.each do |uri,v| + complex_values << { :feature => uri, :value => v } + end + feature_values << { :class => "Tuple", :feature => feature_uri, :complexValue => complex_values } + else + feature_values << { :class => "FeatureValue", :feature => feature_uri, :value => value } end - data_entry_values << { :feature => feature_uri, :complexValue => complex_values } - else - data_entry_values << { :feature => feature_uri, :value => value } end end + data_entry[:values] = feature_values end - set_data( :compound => { :uri => compound_uri, :dataEntry => { :values => data_entry_values } } ) + set_data( :dataEntry => data_entry ) end + # PENDING move to dataset.rb # feature values are not loaded for performance reasons # loading compounds and features into arrays that are given as params def load_dataset( compounds, features ) @@ -415,66 +466,75 @@ module OpenTox @model.subjects(RDF_TYPE, node('Compound')).each do |compound| compounds << get_value(compound) end + @model.subjects(RDF_TYPE, node('Feature')).each do |feature| - features << get_value(feature) + feature_value_found=false + @model.find(nil, node("feature"), feature) do |potential_feature_value,p,o| + @model.find(nil, node("values"), potential_feature_value) do |s,p,o| + feature_value_found=true + break + end + break if feature_value_found + end + features << get_value(feature) if feature_value_found end LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features from dataset "+uri.to_s end + # PENDING move to dataset.rb # loading feature values for the specified feature # if feature is nil, all feature values are loaded # # general remark on the rdf loading (found out with some testing): # the search methods (subjects/find) are fast, the time consuming parts is creating resources, # which cannot be avoided in general - def load_dataset_feature_values( compounds, data, feature_uri=nil ) + def load_dataset_feature_values( compounds, data, feature_uris ) - LOGGER.debug("load feature values"+ ( (feature_uri!=nil)?(" for feature: "+feature_uri):"") ) + raise "no feature-uri array" unless feature_uris.is_a?(Array) # values are stored in the data-hash, hash has a key for each compound compounds.each{|c| data[c] = [] unless data[c]} - load_all_features = feature_uri==nil - feature_node = nil - - # create feature node for feature uri if specified - unless load_all_features - @model.subjects(RDF_TYPE, OT['Feature']).each do |feature| - if feature_uri==get_value(feature) - feature_node = feature - break - end - end - raise "feature node not found" unless feature_node - end - count = 0 - - # search for all feature_value_node with property 'ot_feature' - # feature_node is either nil, i.e. a wildcard or specified - @model.find(nil, node('feature'), feature_node) do |feature_value_node,p,o| - - # get compound_uri by "backtracking" to values node (property is 'values'), then get compound_node via 'compound' - value_nodes = @model.subjects(node('values'),feature_value_node) - raise "more than one value node "+value_nodes.size.to_s unless value_nodes.size==1 - value_node = value_nodes[0] - - compound_uri = get_value( @model.object(value_node, node('compound')) ) - # if load all features, feautre_uri is not specified, derieve from feature_node - feature_uri = get_value(o) if load_all_features + + feature_uris.each do |feature_uri| + LOGGER.debug("load feature values for feature: "+feature_uri ) + feature_node = Redland::Resource.new(feature_uri) - value_node_type = @model.object(feature_value_node, RDF_TYPE) - if (value_node_type == node('FeatureValue')) - value_literal = @model.object( feature_value_node, node('value')) - raise "feature value no literal" unless value_literal.is_a?(Redland::Literal) - data[compound_uri] << {feature_uri => value_literal.get_value } - else - raise "feature value type not yet implemented "+value_node_type.to_s + # search for all feature_value_node with property 'ot_feature' and the feature we are looking for + @model.find(nil, node('feature'), feature_node) do |feature_value_node,p,o| + + # get compound_uri by "backtracking" to values node (property is 'values'), then get compound_node via 'compound' + value_nodes = @model.subjects(node('values'),feature_value_node) + if value_nodes.size>0 + raise "more than one value node "+value_nodes.size.to_s if value_nodes.size>1 + value_node = value_nodes[0] + + compound_uri = get_value( @model.object(value_node, node('compound')) ) + + value_node_type = @model.object(feature_value_node, RDF_TYPE) + if (value_node_type == node('FeatureValue')) + value_literal = @model.object( feature_value_node, node('value')) + raise "plain feature value no literal: "+value_literal.to_s unless value_literal.is_a?(Redland::Literal) + data[compound_uri] << {feature_uri => value_literal.get_value } + elsif (value_node_type == node('Tuple')) + complex_values = {} + @model.find(feature_value_node,node('complexValue'),nil) do |p,s,complex_value| + complex_value_type = @model.object(complex_value, RDF_TYPE) + raise "complex feature value no feature value: "+complex_value.to_s unless complex_value_type==node('FeatureValue') + complex_feature_uri = get_value(@model.object( complex_value, node('feature'))) + complex_value = @model.object( complex_value, node('value')) + raise "complex value no literal: "+complex_value.to_s unless complex_value.is_a?(Redland::Literal) + complex_values[ complex_feature_uri ] = complex_value.get_value + end + data[compound_uri] << { feature_uri => complex_values } if complex_values.size>0 + end + count += 1 + LOGGER.debug "loading feature values ("+count.to_s+")" if (count%1000 == 0) + end end - count += 1 - LOGGER.debug "loading feature values ("+count.to_s+")" if (count%1000 == 0) + LOGGER.debug "loaded "+count.to_s+" feature values for feature "+feature_node.to_s end - LOGGER.debug "loaded "+count.to_s+" feature values" end end end \ No newline at end of file -- cgit v1.2.3