summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/dataset.rb3
-rw-r--r--lib/owl.rb652
-rw-r--r--lib/task.rb12
3 files changed, 338 insertions, 329 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index af72403..e43ce96 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -22,6 +22,7 @@ module OpenTox
accept_header = "application/rdf+xml"
end
end
+
case accept_header
when "application/x-yaml"
d = YAML.load RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s
@@ -207,7 +208,7 @@ module OpenTox
end
def init_dirty_features(owl)
- @dirty_features = @features
+ @dirty_features = @features.dclone
@owl = owl
end
end
diff --git a/lib/owl.rb b/lib/owl.rb
index 700e6ee..fe96a6b 100644
--- a/lib/owl.rb
+++ b/lib/owl.rb
@@ -1,22 +1,22 @@
+
+# overriding literal to give nice access to datatype
+# and to access the stored value as correct ruby type
class Redland::Literal
- def self.create(value, datatype=nil)
- if datatype
- if datatype.is_a?(Redland::Uri)
- Redland::Literal.new(value.to_s,nil,datatype)
- else
- Redland::Literal.new(value.to_s,nil,Redland::Uri.new(datatype.to_s))
- end
+ def self.create(value, datatype)
+ raise "literal datatype may not be nil" unless datatype
+ if datatype.is_a?(Redland::Uri)
+ Redland::Literal.new(value.to_s,nil,datatype)
else
- Redland::Literal.new(value.to_s,nil,Redland::Literal.parse_datatype_uri(value))
+ Redland::Literal.new(value.to_s,nil,Redland::Uri.new(datatype.to_s))
end
end
# the literal node of the ruby swig api provdides the 'value' of a literal but not the 'datatype'
# found solution in mailing list
def datatype
- uri = Redland.librdf_node_get_literal_value_datatype_uri(self.node)
- return Redland.librdf_uri_to_string(uri) if uri
+ uri = Redland.librdf_node_get_literal_value_datatype_uri(self.node)
+ return Redland.librdf_uri_to_string(uri) if uri
end
# gets value of literal, value class is se according to literal datatype
@@ -25,66 +25,33 @@ class Redland::Literal
end
private
- @@type_string = XML["string"].uri
- @@type_uri = XML["anyURI"].uri
- @@type_float = XML["float"].uri
- @@type_double = XML["double"].uri
- @@type_date = XML["date"].uri
- @@type_boolean = XML["boolean"].uri
- @@type_datetime = XML["dateTime"].uri
- @@type_integer = XML["integer"].uri
-
# parses value according to datatype uri
def self.parse_value(string_value, datatype_uri)
+
if (datatype_uri==nil || datatype_uri.size==0)
- LOGGER.warn("empty datatype for literal with value: "+string_value)
+ LOGGER.warn("empty datatype for literal with value: '"+string_value+"'")
return string_value
end
case datatype_uri
- when @@type_string.to_s
+ when OpenTox::Owl::LITERAL_DATATYPE_STRING.to_s
return string_value
- when @@type_uri.to_s
+ when OpenTox::Owl::LITERAL_DATATYPE_URI.to_s
return string_value #PENDING uri as string?
- when @@type_float.to_s
+ when OpenTox::Owl::LITERAL_DATATYPE_FLOAT.to_s
return string_value.to_f
- when @@type_double.to_s
+ when OpenTox::Owl::LITERAL_DATATYPE_DOUBLE.to_s
return string_value.to_f
- when @@type_boolean.to_s
+ when OpenTox::Owl::LITERAL_DATATYPE_BOOLEAN.to_s
return string_value.upcase=="TRUE"
- when @@type_date.to_s
- return string_value #PENDING date as string?
- when @@type_datetime.to_s
- return string_value #PENDING date as string?
- when @@type_integer.to_s
+ when OpenTox::Owl::LITERAL_DATATYPE_DATE.to_s
+ return Time.parse(string_value)
+ when OpenTox::Owl::LITERAL_DATATYPE_DATETIME.to_s
+ return Time.parse(string_value)
+ when OpenTox::Owl::LITERAL_DATATYPE_INTEGER.to_s
return string_value.to_i
else
- raise "unknown literal datatype: '"+datatype_uri.to_s+"', value is "+string_value
- end
- end
-
- # parse datatype uri accoring to value class
- def self.parse_datatype_uri(value)
- if value==nil
- raise "illegal datatype: value is nil"
- elsif value.is_a?(String)
- # PENDING: uri check too slow?
- if OpenTox::Utils.is_uri?(value)
- return @@type_uri
- else
- return @@type_string
- end
- elsif value.is_a?(Float)
- return @@type_float
- elsif value.is_a?(TrueClass) or value.is_a?(FalseClass)
- return @@type_boolean
- elsif value.is_a?(Integer)
- return @@type_integer
- elsif value.is_a?(DateTime)
- return @@type_datetime
- elsif value.is_a?(Time)
- return @@type_datetime
- else
- raise "illegal datatype: "+value.class.to_s+" "+value.to_s
+ raise "unknown literal datatype: '"+datatype_uri.to_s+"' (value is "+string_value+
+ "), please specify new OpenTox::Owl::LITERAL_DATATYPE"
end
end
end
@@ -92,8 +59,93 @@ end
module OpenTox
class Owl
+
+ # to get correct owl-dl, properties and objects have to be typed
+ # i.e. the following triple is insufficient:
+ # ModelXY,ot:algorithm,AlgorithmXY
+ # furhter needed:
+ # ot:algorithm,rdf:type,owl:ObjectProperty
+ # AlgorithmXY,rdf:type,ot:Algorithm
+ # ot:Algorithm,rdf:type,owl:Class
+ #
+ # therefore OpentoxOwl needs info about the opentox-ontology
+ # the info is stored in OBJECT_PROPERTY_CLASS and LITERAL_TYPES
+
+ # contains all owl:ObjectProperty as keys, and the respective classes as value
+ OBJECT_PROPERTY_CLASS = {}
+ [ "model" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Model"}
+ [ "algorithm" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Algorithm"}
+ [ "trainingDataset", "testTargetDataset", "predictionDataset",
+ "testDataset", "dataset" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Dataset"}
+ [ "feature", "dependentVariables", "independentVariables",
+ "predictedVariables", "predictionFeature" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Feature"}
+ [ "parameters" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Parameter"}
+ [ "compound" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Compound"}
+ [ "dataEntry" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "DataEntry"}
+ [ "values" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "FeatureValue"}
+ [ "classificationStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ClassificationStatistics"}
+ [ "classValueStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ClassValueStatistics"}
+ [ "confusionMatrix" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ConfusionMatrix"}
+ [ "confusionMatrixCell" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ConfusionMatrixCell"}
+ [ "regressionStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "RegressionStatistics"}
+ [ "validation" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Validation"}
+ [ "crossvalidationInfo" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "CrossvalidationInfo"}
+ [ "crossvalidation" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Crossvalidation"}
+
+ # literals point to primitive values (not to other resources)
+ # the literal datatype is encoded is uri:
+ LITERAL_DATATYPE_STRING = XML["string"].uri
+ LITERAL_DATATYPE_URI = XML["anyURI"].uri
+ LITERAL_DATATYPE_FLOAT = XML["float"].uri
+ LITERAL_DATATYPE_DOUBLE = XML["double"].uri
+ LITERAL_DATATYPE_DATE = XML["date"].uri
+ LITERAL_DATATYPE_BOOLEAN = XML["boolean"].uri
+ LITERAL_DATATYPE_DATETIME = XML["dateTime"].uri
+ LITERAL_DATATYPE_INTEGER = XML["integer"].uri
+
+ # list all literals (to distinguish from objectProperties) as keys, datatype as values
+ # (do not add dc-identifier, deprecated, object are identified over via name=uri)
+ LITERAL_TYPES = {}
+ [ "title", "creator", "format", "description", "hasStatus", "paramScope", "paramValue",
+ "value", "classValue", "reportType", "confusionMatrixActual",
+ "confusionMatrixPredicted" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_STRING }
+ [ "date", "due_to_time" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_DATE }
+ [ "percentageCompleted", "truePositiveRate", "fMeasure", "falseNegativeRate",
+ "areaUnderRoc", "falsePositiveRate", "trueNegativeRate", "precision", "recall",
+ "percentCorrect", "percentIncorrect", "weightedAreaUnderRoc", "numCorrect",
+ "percentIncorrect", "percentUnpredicted", "realRuntime",
+ "percentWithoutClass", "rootMeanSquaredError", "meanAbsoluteError", "rSquare",
+ "targetVarianceActual", "targetVariancePredicted", "sumSquaredError",
+ "sampleCorrelationCoefficient" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_DOUBLE }
+ [ "numTrueNegatives", "numWithoutClass", "numFalseNegatives", "numTruePositives",
+ "numFalsePositives", "numIncorrect", "numInstances", "numUnpredicted",
+ "randomSeed", "numFolds", "confusionMatrixValue",
+ "crossvalidationFold" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_INTEGER }
+ [ "resultURI" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_URI }
+ [ "stratified" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_BOOLEAN }
+
+ # constants for often used redland-resources
+ OWL_TYPE_LITERAL = OWL["AnnotationProperty"]
+ OWL_TYPE_CLASS = OWL["Class"]
+ OWL_TYPE_OBJECT_PROPERTY = OWL["ObjectProperty"]
+ RDF_TYPE = RDF['type']
+
+ # store redland:resources (=nodes) to:
+ # * separate namespaces (OT from RDF and DC)
+ # * save time, as generating resources is timeconsuming in redland
+ @@nodes = {}
+ [ "type", "about"].each{ |l| @@nodes[l] = RDF[l] }
+ [ "title", "creator", "date", "format" ].each{ |l| @@nodes[l] = DC[l] }
+
+ def node(property)
+ raise "can only create node for non-empty-string, but given "+property.class.to_s+" (value: "+
+ property.to_s+")" unless property.is_a?(String) and property.size>0
+ raise "dc[identifier] deprecated, use owl.uri" if property=="identifier"
+ @@nodes[property] = OT[property] unless @@nodes.has_key?(property)
+ return @@nodes[property]
+ end
- # ot_class is the class of the object, e.g. "Model","Dataset", ...
+ # ot_class is the class of the object as string, e.g. "Model","Dataset", ...
# root_node is the root-object node in the rdf
# uri the uri of the object
attr_accessor :ot_class, :root_node, :uri, :model
@@ -102,12 +154,16 @@ module OpenTox
@model = Redland::Model.new Redland::MemoryStore.new
end
+ # build new owl object
+ # ot_class is the class of this object, should be a string like "Model", "Task", ...
+ # uri is name and identifier of this object
def self.create( ot_class, uri )
- owl = OpenTox::Owl.new
+ owl = OpenTox::Owl.new
owl.ot_class = ot_class
owl.root_node = Redland::Resource.new(uri.to_s.strip)
- owl.set("type",owl.node(owl.ot_class)) #,true))
+ owl.set("type",owl.ot_class)
+ owl.uri = uri
owl
end
@@ -121,7 +177,7 @@ module OpenTox
parser.parse_string_into_model(owl.model, data, base_uri)
# now loading root_node and uri
- owl.model.find(nil, owl.node("type"), owl.node(ot_class)) do |s,p,o|
+ owl.model.find(nil, RDF_TYPE, owl.node(ot_class)) do |s,p,o|
#LOGGER.debug "about statements "+s.to_s+" . "+p.to_s+" -> "+o.to_s
is_root = true
owl.model.find(nil, nil, s) do |ss,pp,oo|
@@ -129,8 +185,10 @@ module OpenTox
break
end
if is_root
+ # handle error if root is already set
raise "cannot derieve root object from rdf, more than one object specified" if owl.uri
raise "illegal root node type, no uri specified\n"+data.to_s if s.blank?
+ #store root note and uri
owl.uri = s.uri.to_s
owl.root_node = s
end
@@ -139,7 +197,7 @@ module OpenTox
# handle error if no root node was found
unless owl.root_node
types = []
- owl.model.find(nil, owl.node("type"), nil){ |s,p,o| types << o.to_s }
+ owl.model.find(nil, RDF_TYPE, nil){ |s,p,o| types << o.to_s }
raise "root node for class '"+ot_class+"' not found (available type nodes: "+types.inspect+")"
end
raise "no uri in rdf: '"+owl.uri+"'" unless owl.uri and Utils.is_uri?(owl.uri)
@@ -158,13 +216,33 @@ module OpenTox
@model.to_string
end
- def get(name)
- raise "uri is no prop, use owl.uri instead" if name=="uri"
- property_node = node(name.to_s)
- return get_value( @model.object(@root_node, property_node) )
+ # returns the first object for subject:root_node and property
+ # (sufficient for accessing simple, root-node properties)
+ def get( property )
+ raise "uri is no prop, use owl.uri instead" if property=="uri"
+ return get_value( @model.object( @root_node, node(property.to_s)) )
+ end
+
+ # returns an array of objects (not only the first one) that fit for the property
+ # accepts array of properties to access not-root-node vaules
+ # i.e. validation_owl.get_nested( [ "confusionMatrix", "confusionMatrixCell", "confusionMatrixValue" ]
+ # returns an array of all confusionMatrixValues
+ def get_nested( property_array )
+ n = [ @root_node ]
+ property_array.each do |p|
+ new_nodes = []
+ n.each do |nn|
+ @model.find( nn, node(p), nil ) do |sub,pred,obj|
+ new_nodes << obj
+ end
+ end
+ n = new_nodes
+ end
+ return n.collect{|nn| get_value( nn )}
end
private
+ # returns node-value
def get_value( node )
return nil unless node
if node.is_a?(Redland::Literal)
@@ -177,285 +255,215 @@ module OpenTox
end
public
- def set(name, value, datatype=nil)
+ # sets values of current_node, by default root_node
+ def set(predicate, object, current_node=@root_node)
- raise "uri is no prop, cannot set uri" if name=="uri"
- property_node = node(name.to_s) #, true)
- begin # delete existing entry
- t = @model.object(@root_node, property_node)
- @model.delete @root_node, property_node, t
- rescue
+ pred = predicate.to_s
+ raise "uri is no prop, cannot set uri" if pred=="uri"
+ raise "dc[identifier] deprecated, use owl.uri" if pred=="identifier"
+ unless object && object.to_s.size>0
+ # set only not-nil values
+ LOGGER.warn "skipping (not setting) empty value in rdf for property: '"+pred+"'"
+ return
end
- if value.is_a?(Redland::Node)
- raise "not nil datatype not allowed when setting redland node as value" if datatype
- @model.add @root_node, property_node, value
- else # if value is no node, a literal is created
- @model.add @root_node, property_node, Redland::Literal.create(value.to_s, datatype)
+
+ if pred=="type"
+ # predicat is type, set class of current node
+ @model.add current_node, RDF_TYPE, node(object)
+ @model.add node(object), RDF_TYPE, OWL_TYPE_CLASS
+ # example-triples:
+ # model_xy,rdf:type,ot:Model
+ # ot:Model,rdf:type,owl:Class
+ elsif LITERAL_TYPES.has_key?(pred)
+ # predicate is literal
+ predicate_node = node(pred)
+ @model.add current_node, predicate_node, Redland::Literal.create(object, LITERAL_TYPES[pred])
+ @model.add predicate_node, RDF_TYPE, OWL_TYPE_LITERAL
+ # example-triples:
+ # model_xy,ot:description,bla..bla^^xml:string
+ # ot:description,rdf:type,owl:Literal
+ elsif OBJECT_PROPERTY_CLASS.has_key?(pred)
+ # predicte is objectProperty, object is another resource
+ predicate_node = node(pred)
+ object_node = Redland::Resource.new(object)
+ @model.add current_node, predicate_node, object_node
+ @model.add predicate_node, RDF_TYPE, OWL_TYPE_OBJECT_PROPERTY
+ object_class_node = node(OBJECT_PROPERTY_CLASS[pred])
+ @model.add object_node, RDF_TYPE, object_class_node
+ @model.add object_class_node, RDF_TYPE, OWL_TYPE_CLASS
+ # example-triples:
+ # model_xy,ot:algorithm,algorihtm_xy
+ # ot:algorithm,rdf:type,owl:ObjectProperty
+ # algorihtm_xy,rdf:type,ot:Algorithm
+ # ot:Algorithm,rdf:type,owl:Class
+ else
+ raise "unkonwn rdf-property, please add: '"+pred+"' to OpenTox::OWL.OBJECT_PROPERTY_CLASS or OpenTox::OWL.LITERAL_TYPES"
end
end
- def parameters=(params)
- params.each do |name, settings|
- parameter = @model.create_resource
- @model.add parameter, node('type'), node('Parameter')
- @model.add parameter, node('title'), name
- @model.add parameter, node('paramScope'), settings[:scope]
- @model.add parameter, node('paramValue'), settings[:value]
- @model.add @root_node, node('parameters'), parameter
- end
- end
-
- def add_data_entries(compound_uri,features)
- # add compound
- compound = @model.subject(DC["identifier"], compound_uri)
- if compound.nil?
- compound = @model.create_resource(compound_uri)
- @model.add compound, node('type'), node("Compound")
- end
- features.each do |f|
- f.each do |feature_uri,value|
- # add feature
- feature = find_or_create_feature feature_uri
- if value.class.to_s == 'Hash'
- # create tuple
- tuple = @model.create_resource
- @model.add tuple, node('type'), node("Tuple")
- @model.add tuple, node('feature'), feature
- value.each do |uri,v|
- f = find_or_create_feature uri
- complex_value = @model.create_resource
- @model.add tuple, node('complexValue'), complex_value
- @model.add complex_value, node('type'), node("FeatureValue")
- @model.add complex_value, node('feature'), f
- @model.add complex_value, node('value'), Redland::Literal.create(v)
- end
- # add data entry
- data_entry = @model.subject node('compound'), compound
- if data_entry.nil?
- data_entry = @model.create_resource
- @model.add @root_node, node('dataEntry'), data_entry
- @model.add data_entry, node('type'), node("DataEntry")
- @model.add data_entry, node('compound'), compound
- end
- @model.add data_entry, node('values'), tuple
- else
- data_entry = @model.subject node('compound'), compound
- if data_entry.nil?
- data_entry = @model.create_resource
- @model.add @root_node, node('dataEntry'), data_entry
- @model.add data_entry,node('type'), node("DataEntry")
- @model.add data_entry, node('compound'), compound
- end
- values = @model.create_resource
- @model.add data_entry, node('values'), values
- @model.add values, node('type'), node('FeatureValue')
- @model.add values, node('feature'), feature
- @model.add values, node('value'), Redland::Literal.create(value)
- end
- end
- end
- end
-
- private
- def find_feature(feature_uri)
- # PENDING: more efficiently get feature node?
- @model.subjects(RDF['type'], OT['Feature']).each do |feature|
- return feature if feature_uri==get_value(feature)
+ # this is (a recursiv method) to set not only simple properties but nested-data via hashes
+ # example (for a dataset)
+ # { :description => "bla",
+ # :compound => { :uri => "compound_uri",
+ # :dataEntry: => { :values => [ { :feature => "feat1",
+ # :value => 42 },
+ # { :feature => "feat2",
+ # :value => 43 } ] } } }
+ def set_data(hash, current_node=@root_node)
+
+ hash.each do |k,v|
+ if v.is_a?(Hash)
+ # value is again a hash
+ prop = k.to_s
+ raise "hash key must be a object-property, please add '"+prop.to_s+
+ "' to OpenTox::OWL.OBJECT_PROPERTY_CLASS" unless OBJECT_PROPERTY_CLASS[prop]
+ # the new node is a class node
+ if v["uri"]
+ # identifier is either a specified uri
+ class_node = Redland::Resource.new(v.delete("uri"))
+ else
+ # or a new uri, make up internal uri with increment
+ class_node = new_class_node(OBJECT_PROPERTY_CLASS[prop],current_node)
+ end
+ set(prop,class_node,current_node)
+ # recursivly call set_data method with new node
+ set_data(v,class_node)
+ elsif v.is_a?(Array)
+ # value is an array, each array element is added with current key as predicate
+ v.each do |value|
+ set_data( { k => value }, current_node )
+ end
+ else
+ # neither hash nor array, call simple set-method
+ set( k, v, current_node )
+ end
+ end
end
- return nil
- end
-
- public
- def find_or_create_feature(feature_uri)
- feature = find_feature(feature_uri)
- unless feature
- feature = @model.create_resource(feature_uri)
- @model.add feature, node('type'), node("Feature")
- @model.add feature, node("title"), File.basename(feature_uri).split(/#/)[1]
- @model.add feature, node('creator'), feature_uri
- end
- feature
- end
-
- # feature values are not loaded for performance reasons
- # loading compounds and features into arrays that are given as params
- def load_dataset( compounds, features )
- @model.subjects(node('type'), node('Compound')).each do |compound|
- compounds << get_value(compound)
- end
- @model.subjects(node('type'), node('Feature')).each do |feature|
- features << get_value(feature)
+ # create a new (internal class) node with unique, uri-like name
+ def new_class_node(name, current_node=@root_node)
+ # to avoid anonymous nodes, make up uris for sub-objects
+ # use counter to make sure each uri is unique
+ # for example we will get ../confusion_matrix_cell/1, ../confusion_matrix_cell/2, ...
+ count = 1
+ while (true)
+ res = Redland::Resource.new( File.join(current_node.uri.to_s,name.to_s,count.to_s) )
+ match = false
+ @model.find(nil, nil, res) do |s,p,o|
+ match = true
+ break
+ end
+ if match
+ count += 1
+ else
+ break
+ end
+ end
+ return res
end
- LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features"
- end
-
- # loading feature values for the specified feature
- # if feature is nil, all feature values are loaded
- #
- # general remark on the rdf loading (found out with some testing):
- # the search methods (subjects/find) are fast, the time consuming parts is creating resources,
- # which cannot be avoided in general (implemented some performance tweaks with uri storing when loading all features)
- def load_dataset_feature_values( compounds, data, feature_uri=nil )
-
- LOGGER.debug("load feature values"+ ( (feature_uri!=nil)?(" for feature: "+feature_uri):"") )
- # values are stored in the data-hash, hash has a key for each compound
- compounds.each{|c| data[c] = [] unless data[c]}
-
- load_all_features = feature_uri==nil
- feature_node = nil
-
- # create feature node for feature uri if specified
- unless load_all_features
- feature_node = find_feature(feature_uri)
- raise "feature node not found" unless feature_node
- end
-
- count = 0
-
- # preformance tweak: store uirs to save some resource init time
- compound_uri_store = {}
- feature_uri_store = {}
-
- # search for all feature_value_node with property 'ot_feature'
- # feature_node is either nil, i.e. a wildcard or specified
- @model.find(nil, node('feature'), feature_node) do |feature_value_node,p,o|
-
- # get compound_uri by "backtracking" to values node (property is 'values'), then get compound_node via 'compound'
- value_nodes = @model.subjects(node('values'),feature_value_node)
- raise "more than one value node "+value_nodes.size.to_s unless value_nodes.size==1
- value_node = value_nodes[0]
- compound_node = @model.object(value_node, node('compound'))
- compound_uri = compound_uri_store[compound_node.to_s]
- unless compound_uri
- compound_uri = get_value(compound_node)
- compound_uri_store[compound_node.to_s] = compound_uri
+ # for "backwards-compatiblity"
+ # better use directly:
+ # set_data( { "parameters" => [ { "title" => <t>, "paramScope" => <s>, "paramValue" => <v> } ] )
+ def parameters=(params)
+
+ converted_params = []
+ params.each do |name, settings|
+ converted_params << { :title => name, :paramScope => settings[:scope], :paramValue => settings[:value] }
end
+ set_data( :parameters => converted_params )
+ end
+
+
+ # this is for dataset.to_owl
+ # adds feautre value for a single compound
+ def add_data_entries(compound_uri,features)
- if load_all_features
- # if load all features, feautre_uri is not specified, derieve from feature_node
- feature_uri = feature_uri_store[o.to_s]
- unless feature_uri
- feature_uri = get_value(o)
- feature_uri_store[o.to_s] = feature_uri
+ data_entry_values = []
+ features.each do |f|
+ f.each do |feature_uri,value|
+ if value.is_a?(Hash)
+ complex_values = []
+ value.each do |uri,v|
+ complex_values << { :feature => uri, :value => v }
+ end
+ data_entry_values << { :feature => feature_uri, :complexValue => complex_values }
+ else
+ data_entry_values << { :feature => feature_uri, :value => value }
+ end
end
end
+ set_data( :compound => { :uri => compound_uri, :dataEntry => { :values => data_entry_values } } )
+ end
+
+ # feature values are not loaded for performance reasons
+ # loading compounds and features into arrays that are given as params
+ def load_dataset( compounds, features )
- value_node_type = @model.object(feature_value_node, node('type'))
- if (value_node_type == node('FeatureValue'))
- value_literal = @model.object( feature_value_node, node('value'))
- raise "feature value no literal" unless value_literal.is_a?(Redland::Literal)
- data[compound_uri] << {feature_uri => value_literal.get_value }
- else
- raise "feature value type not yet implemented "+value_node_type.to_s
+ @model.subjects(RDF_TYPE, node('Compound')).each do |compound|
+ compounds << get_value(compound)
end
- count += 1
- LOGGER.debug "loaded "+count.to_s+" feature values" if (count%500 == 0)
+ @model.subjects(RDF_TYPE, node('Feature')).each do |feature|
+ features << get_value(feature)
+ end
+ LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features"
end
-
- LOGGER.debug "loaded "+count.to_s+" feature values"
- end
- @@property_nodes = { "type" => RDF["type"],
- "about" => RDF["about"],
- "title" => DC["title"],
- "creator" => DC["creator"],
- #"identifier" => DC["identifier"], identifier is deprecated
- "date" => DC["date"],
- "format" => DC["format"]}
-
-# @object_prop = OWL["ObjectProperty"]
-# @@type = { "Validation" => OWL["Class"],
-# "Model" => OWL["Class"],
-# "title" => OWL["AnnotationProperty"],
-# "creator" => OWL["AnnotationProperty"],
-# "date" => OWL["AnnotationProperty"],
-# "format" => OWL["AnnotationProperty"],
-# "predictedVariables" => @object_prop}
+ # loading feature values for the specified feature
+ # if feature is nil, all feature values are loaded
+ #
+ # general remark on the rdf loading (found out with some testing):
+ # the search methods (subjects/find) are fast, the time consuming parts is creating resources,
+ # which cannot be avoided in general
+ def load_dataset_feature_values( compounds, data, feature_uri=nil )
+
+ LOGGER.debug("load feature values"+ ( (feature_uri!=nil)?(" for feature: "+feature_uri):"") )
- # this method has two purposes:
- # * distinguishing ot-properties from dc- and rdf- properties
- # * caching nodes, as creating nodes is costly
- def node(name) #, write_type_to_model=false)
- raise "dc[identifier] deprecated, use owl.uri" if name=="identifier"
- n = @@property_nodes[name]
- unless n
- n = OT[name]
- @@property_nodes[name] = n
- end
-
-# if write_type_to_model and name!="type"
-# raise "no type defined for '"+name+"'" unless @@type[name]
-# @model.add n,RDF['type'],@@type[name]
-# end
- return n
- end
-
-=begin
- def data
- LOGGER.debug("getting data from model")
+ # values are stored in the data-hash, hash has a key for each compound
+ compounds.each{|c| data[c] = [] unless data[c]}
- data = {}
- @model.subjects(RDF['type'], OT['DataEntry']).each do |data_entry|
- compound_node = @model.object(data_entry, OT['compound'])
- compound_uri = @model.object(compound_node, DC['identifier']).to_s
- @model.find(data_entry, OT['values'], nil) do |s,p,values|
- feature_node = @model.object values, OT['feature']
- feature_uri = @model.object(feature_node, DC['identifier']).to_s.sub(/\^\^.*$/,'') # remove XML datatype
- type = @model.object(values, RDF['type'])
- if type == OT['FeatureValue']
- value = @model.object(values, OT['value']).to_s
- case value.to_s
- when TRUE_REGEXP # defined in environment.rb
- value = true
- when FALSE_REGEXP # defined in environment.rb
- value = false
- when /.*\^\^<.*XMLSchema#.*>/
- #HACK for reading ambit datasets
- case value.to_s
- when /XMLSchema#string/
- value = value.to_s[0..(value.to_s.index("^^")-1)]
- when /XMLSchema#double/
- value = value.to_s[0..(value.to_s.index("^^")-1)].to_f
- else
- LOGGER.warn " ILLEGAL TYPE "+compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
- value = nil
- end
- else
- LOGGER.warn compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
- value = nil
- end
- LOGGER.debug "converting owl to yaml, #compounds: "+(data.keys.size+1).to_s if (data.keys.size+1)%10==0 && !data.has_key?(compound_uri)
-
- return data if (data.keys.size)>9 && !data.has_key?(compound_uri)
-
- #puts "c "+compound_uri.to_s
- #puts "f "+feature_uri.to_s
- #puts "v "+value.to_s
- #puts ""
- data[compound_uri] = [] unless data[compound_uri]
- data[compound_uri] << {feature_uri => value} unless value.nil?
- elsif type == OT['Tuple']
- entry = {}
- data[compound_uri] = [] unless data[compound_uri]
- #data[compound_uri][feature_uri] = [] unless data[compound_uri][feature_uri]
- @model.find(values, OT['complexValue'],nil) do |s,p,complex_value|
- name_node = @model.object complex_value, OT['feature']
- name = @model.object(name_node, DC['title']).to_s
- value = @model.object(complex_value, OT['value']).to_s
- v = value.sub(/\^\^.*$/,'') # remove XML datatype
- v = v.to_f if v.match(/^[\.|\d]+$/) # guess numeric datatype
- entry[name] = v
- end
- data[compound_uri] << {feature_uri => entry} unless entry.empty?
+ load_all_features = feature_uri==nil
+ feature_node = nil
+
+ # create feature node for feature uri if specified
+ unless load_all_features
+ @model.subjects(RDF_TYPE, OT['Feature']).each do |feature|
+ if feature_uri==get_value(feature)
+ feature_node = feature
+ break
end
end
+ raise "feature node not found" unless feature_node
end
- data
+
+ count = 0
+
+ # search for all feature_value_node with property 'ot_feature'
+ # feature_node is either nil, i.e. a wildcard or specified
+ @model.find(nil, node('feature'), feature_node) do |feature_value_node,p,o|
+
+ # get compound_uri by "backtracking" to values node (property is 'values'), then get compound_node via 'compound'
+ value_nodes = @model.subjects(node('values'),feature_value_node)
+ raise "more than one value node "+value_nodes.size.to_s unless value_nodes.size==1
+ value_node = value_nodes[0]
+
+ compound_uri = get_value( @model.object(value_node, node('compound')) )
+ # if load all features, feautre_uri is not specified, derieve from feature_node
+ feature_uri = get_value(o) if load_all_features
+
+ value_node_type = @model.object(feature_value_node, RDF_TYPE)
+ if (value_node_type == node('FeatureValue'))
+ value_literal = @model.object( feature_value_node, node('value'))
+ raise "feature value no literal" unless value_literal.is_a?(Redland::Literal)
+ data[compound_uri] << {feature_uri => value_literal.get_value }
+ else
+ raise "feature value type not yet implemented "+value_node_type.to_s
+ end
+ count += 1
+ LOGGER.debug "loaded "+count.to_s+" feature values" if (count%500 == 0)
+ end
+ LOGGER.debug "loaded "+count.to_s+" feature values"
end
-=end
-
end
end
diff --git a/lib/task.rb b/lib/task.rb
index 88bcb71..b563fe6 100644
--- a/lib/task.rb
+++ b/lib/task.rb
@@ -23,9 +23,9 @@ module OpenTox
end
public
- def self.find(uri)
+ def self.find( uri, accept_header='application/rdf+xml' )
task = Task.new(uri)
- task.reload
+ task.reload( accept_header )
return task
end
@@ -36,8 +36,8 @@ module OpenTox
return task
end
- def reload
- result = RestClientWrapper.get(uri, {:accept => 'application/rdf+xml'}, false)#'application/x-yaml'})
+ def reload( accept_header='application/rdf+xml' )
+ result = RestClientWrapper.get(uri, {:accept => accept_header}, false)#'application/x-yaml'})
@http_code = result.code
reload_from_data(result, result.content_type, uri)
end
@@ -95,8 +95,8 @@ module OpenTox
def wait_for_completion(dur=0.3)
if (@uri.match(@@config[:services]["opentox-task"]))
- due_to_time = Time.parse(@due_to_time)
- running_time = due_to_time - Time.parse(@date)
+ due_to_time = (@due_to_time.is_a?(Time) ? @due_to_time : Time.parse(@due_to_time))
+ running_time = due_to_time - (@date.is_a?(Time) ? @date : Time.parse(@date))
else
# the date of the external task cannot be trusted, offest to local time might be to big
due_to_time = Time.new + EXTERNAL_TASK_MAX_DURATION