summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2010-08-05 12:55:58 +0200
committerChristoph Helma <helma@in-silico.ch>2010-08-05 12:55:58 +0200
commitb92d78620f7e9ed3a35730f867019723f51c4462 (patch)
tree1aab98345f8802edebdaf976748bf62f94aef641 /lib
parent34a7b50fb278fc37c937f2fe170b86332eb054c5 (diff)
parent9d2f25cdfc340bc7d9df7a041a5b23c1552c7d53 (diff)
Merge commit 'mguetlein/test' into development
Conflicts: lib/owl.rb
Diffstat (limited to 'lib')
-rw-r--r--lib/algorithm.rb8
-rw-r--r--lib/compound.rb23
-rw-r--r--lib/dataset.rb23
-rw-r--r--lib/model.rb5
-rw-r--r--lib/owl.rb674
-rw-r--r--lib/task.rb12
-rw-r--r--lib/tasks/opentox.rb107
7 files changed, 419 insertions, 433 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index cc06032..f2a3618 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -22,9 +22,7 @@ module OpenTox
LOGGER.debug params
LOGGER.debug File.basename(__FILE__) + ": creating model"
LOGGER.debug File.join(@@config[:services]["opentox-algorithm"], "lazar")
- #resource = RestClient::Resource.new(File.join(@@config[:services]["opentox-algorithm"], "lazar"), :user => @@users[:users].keys[0], :password => @@users[:users].values[0], :content_type => "application/x-yaml")
resource = RestClient::Resource.new(File.join(@@config[:services]["opentox-algorithm"], "lazar"), :user => @@users[:users].keys[0], :password => @@users[:users].values[0], :content_type => "application/x-yaml")
- #@uri = resource.post(:dataset_uri => params[:dataset_uri], :feature_uri => params[:feature_uri], :feature_generation_uri => File.join(@@config[:services]["opentox-algorithm"], "fminer")).chomp
@uri = resource.post(:dataset_uri => params[:dataset_uri], :prediction_feature => params[:prediction_feature], :feature_generation_uri => File.join(@@config[:services]["opentox-algorithm"], "fminer")).body.chomp
end
@@ -37,12 +35,12 @@ module OpenTox
class Similarity
def self.weighted_tanimoto(fp_a,fp_b,p)
common_features = fp_a & fp_b
- all_features = fp_a + fp_b
+ all_features = (fp_a + fp_b).uniq
common_p_sum = 0.0
if common_features.size > 0
- common_features.each{|f| common_p_sum += p[f]}
+ common_features.each{|f| common_p_sum += OpenTox::Utils.gauss(p[f])}
all_p_sum = 0.0
- all_features.each{|f| all_p_sum += p[f]}
+ all_features.each{|f| all_p_sum += OpenTox::Utils.gauss(p[f])}
common_p_sum/all_p_sum
else
0.0
diff --git a/lib/compound.rb b/lib/compound.rb
index 9366db4..49c166f 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -52,13 +52,30 @@ module OpenTox
end
def png
- RestClientWrapper.get("#{@@ambit_uri}#{smiles}")
+ RestClientWrapper.get(File.join @uri, "image")
end
+ def names
+ begin
+ RestClientWrapper.get("#{@@cactus_uri}#{@inchi}/names")
+ rescue
+ "not available"
+ end
+ end
+
+ def display_smarts_uri(activating, deactivating, highlight = nil)
+ LOGGER.debug activating.to_yaml unless activating.nil?
+ activating_smarts = URI.encode "\"#{activating.join("\"/\"")}\""
+ deactivating_smarts = URI.encode "\"#{deactivating.join("\"/\"")}\""
+ if highlight.nil?
+ File.join @@config[:services]["opentox-compound"], "smiles", URI.encode(smiles), "smarts/activating", URI.encode(activating_smarts),"deactivating", URI.encode(deactivating_smarts)
+ else
+ File.join @@config[:services]["opentox-compound"], "smiles", URI.encode(smiles), "smarts/activating", URI.encode(activating_smarts),"deactivating", URI.encode(deactivating_smarts), "highlight", URI.encode(highlight)
+ end
+ end
+
def image_uri
File.join @uri, "image"
- #"#{@@ambit_uri}#{smiles}"
- #"#{@@cactus_uri}#{@inchi}/image"
end
# Matchs a smarts string
diff --git a/lib/dataset.rb b/lib/dataset.rb
index af72403..4ce9ffe 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -16,15 +16,16 @@ module OpenTox
unless accept_header
#if uri.match(@@config[:services]["opentox-dataset"]) || uri=~ /188.40.32.88/ || uri =~ /informatik/
- if !@@config[:accept_headers]["opentox-dataset"].grep(/yaml/).empty?
+ if (uri.match(@@config[:services]["opentox-dataset"]) || uri =~ /in-silico.ch/) && !@@config[:accept_headers]["opentox-dataset"].grep(/yaml/).empty?
accept_header = 'application/x-yaml'
else
accept_header = "application/rdf+xml"
end
end
+
case accept_header
when "application/x-yaml"
- d = YAML.load RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s
+ d = YAML.load RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s
d.uri = uri unless d.uri
when "application/rdf+xml"
owl = OpenTox::Owl.from_uri(uri.to_s.strip, "Dataset")
@@ -143,12 +144,6 @@ module OpenTox
else
LOGGER.warn "no confidence for compound: "+compound.to_s+", feature: "+feature.to_s
return 1
-# raise "prediction confidence value is not a hash value\n"+
-# "value "+v.to_s+"\n"+
-# "value-class "+v.class.to_s+"\n"+
-# "dataset "+@uri.to_s+"\n"+
-# "compound "+compound.to_s+"\n"+
-# "feature "+feature.to_s+"\n"
end
end
@@ -159,7 +154,7 @@ module OpenTox
end
v = @data[compound]
- raise "no values for compound "+compound.to_s if v==nil
+ return nil if v == nil # missing values for all features
if v.is_a?(Array)
# PENDING: why using an array here?
v.each do |e|
@@ -171,7 +166,7 @@ module OpenTox
raise "invalid internal value type"
end
end
- raise "feature value no found: "+feature.to_s
+ return nil #missing value
else
raise "value is not an array\n"+
"value "+v.to_s+"\n"+
@@ -186,11 +181,11 @@ module OpenTox
def load_feature_values(feature=nil)
if feature
raise "feature already loaded" unless @dirty_features.include?(feature)
- @owl.load_dataset_feature_values(@compounds, @data, feature)
+ @owl.load_dataset_feature_values(@compounds, @data, [feature])
@dirty_features.delete(feature)
else
- @data = {}
- @owl.load_dataset_feature_values(@compounds, @data)
+ @data = {} unless @data
+ @owl.load_dataset_feature_values(@compounds, @data, @dirty_features)
@dirty_features.clear
end
end
@@ -207,7 +202,7 @@ module OpenTox
end
def init_dirty_features(owl)
- @dirty_features = @features
+ @dirty_features = @features.dclone
@owl = owl
end
end
diff --git a/lib/model.rb b/lib/model.rb
index b6cef46..0077cfe 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -104,6 +104,11 @@ module OpenTox
def self.find_all
RestClientWrapper.get(@@config[:services]["opentox-model"]).chomp.split("\n")
end
+
+ def self.predict(compound_uri,model_uri)
+ #RestClientWrapper.post(model_uri,{:compound_uri => compound_uri, :accept => 'application/x-yaml'})
+ `curl -X POST -d 'compound_uri=#{compound_uri}' -H 'Accept:application/x-yaml' #{model_uri}`
+ end
end
end
end
diff --git a/lib/owl.rb b/lib/owl.rb
index a3000e4..f8d0017 100644
--- a/lib/owl.rb
+++ b/lib/owl.rb
@@ -6,25 +6,26 @@ OT = Redland::Namespace.new 'http://www.opentox.org/api/1.1#'
#OT = Redland::Namespace.new 'http://ortona.informatik.uni-freiburg.de/opentox.owl#'
XML = Redland::Namespace.new 'http://www.w3.org/2001/XMLSchema#'
+# overriding literal to give nice access to datatype
+# and to access the stored value as correct ruby type
class Redland::Literal
- def self.create(value, datatype=nil)
- if datatype
- if datatype.is_a?(Redland::Uri)
- Redland::Literal.new(value.to_s,nil,datatype)
- else
- Redland::Literal.new(value.to_s,nil,Redland::Uri.new(datatype.to_s))
- end
+ def self.create(value, type)
+ raise "literal datatype may not be nil" unless type
+ type = parse_datatype_uri(value) if OpenTox::Owl::PARSE_LITERAL_TYPE==type
+
+ if type.is_a?(Redland::Uri)
+ Redland::Literal.new(value.to_s,nil,type)
else
- Redland::Literal.new(value.to_s,nil,Redland::Literal.parse_datatype_uri(value))
+ Redland::Literal.new(value.to_s,nil,Redland::Uri.new(type.to_s))
end
end
# the literal node of the ruby swig api provdides the 'value' of a literal but not the 'datatype'
# found solution in mailing list
def datatype
- uri = Redland.librdf_node_get_literal_value_datatype_uri(self.node)
- return Redland.librdf_uri_to_string(uri) if uri
+ uri = Redland.librdf_node_get_literal_value_datatype_uri(self.node)
+ return Redland.librdf_uri_to_string(uri) if uri
end
# gets value of literal, value class is se according to literal datatype
@@ -33,40 +34,33 @@ class Redland::Literal
end
private
- @@type_string = XML["string"].uri
- @@type_uri = XML["anyURI"].uri
- @@type_float = XML["float"].uri
- @@type_double = XML["double"].uri
- @@type_date = XML["date"].uri
- @@type_boolean = XML["boolean"].uri
- @@type_datetime = XML["dateTime"].uri
- @@type_integer = XML["integer"].uri
-
# parses value according to datatype uri
def self.parse_value(string_value, datatype_uri)
+
if (datatype_uri==nil || datatype_uri.size==0)
- LOGGER.warn("empty datatype for literal with value: "+string_value)
+ LOGGER.warn("empty datatype for literal with value: '"+string_value+"'")
return string_value
end
case datatype_uri
- when @@type_string.to_s
+ when OpenTox::Owl::LITERAL_DATATYPE_STRING.to_s
return string_value
- when @@type_uri.to_s
+ when OpenTox::Owl::LITERAL_DATATYPE_URI.to_s
return string_value #PENDING uri as string?
- when @@type_float.to_s
+ when OpenTox::Owl::LITERAL_DATATYPE_FLOAT.to_s
return string_value.to_f
- when @@type_double.to_s
+ when OpenTox::Owl::LITERAL_DATATYPE_DOUBLE.to_s
return string_value.to_f
- when @@type_boolean.to_s
+ when OpenTox::Owl::LITERAL_DATATYPE_BOOLEAN.to_s
return string_value.upcase=="TRUE"
- when @@type_date.to_s
- return string_value #PENDING date as string?
- when @@type_datetime.to_s
- return string_value #PENDING date as string?
- when @@type_integer.to_s
+ when OpenTox::Owl::LITERAL_DATATYPE_DATE.to_s
+ return Time.parse(string_value)
+ when OpenTox::Owl::LITERAL_DATATYPE_DATETIME.to_s
+ return Time.parse(string_value)
+ when OpenTox::Owl::LITERAL_DATATYPE_INTEGER.to_s
return string_value.to_i
else
- raise "unknown literal datatype: '"+datatype_uri.to_s+"', value is "+string_value
+ raise "unknown literal datatype: '"+datatype_uri.to_s+"' (value is "+string_value+
+ "), please specify new OpenTox::Owl::LITERAL_DATATYPE"
end
end
@@ -77,20 +71,20 @@ class Redland::Literal
elsif value.is_a?(String)
# PENDING: uri check too slow?
if OpenTox::Utils.is_uri?(value)
- return @@type_uri
+ return OpenTox::Owl::LITERAL_DATATYPE_URI
else
- return @@type_string
+ return OpenTox::Owl::LITERAL_DATATYPE_STRING
end
elsif value.is_a?(Float)
- return @@type_float
+ return OpenTox::Owl::LITERAL_DATATYPE_FLOAT
elsif value.is_a?(TrueClass) or value.is_a?(FalseClass)
- return @@type_boolean
+ return OpenTox::Owl::LITERAL_DATATYPE_BOOLEAN
elsif value.is_a?(Integer)
- return @@type_integer
+ return OpenTox::Owl::LITERAL_DATATYPE_INTEGER
elsif value.is_a?(DateTime)
- return @@type_datetime
+ return OpenTox::Owl::LITERAL_DATATYPE_DATETIME
elsif value.is_a?(Time)
- return @@type_datetime
+ return OpenTox::Owl::LITERAL_DATATYPE_DATETIME
else
raise "illegal datatype: "+value.class.to_s+" "+value.to_s
end
@@ -100,22 +94,118 @@ end
module OpenTox
class Owl
+
+ # to get correct owl-dl, properties and objects have to be typed
+ # i.e. the following triple is insufficient:
+ # ModelXY,ot:algorithm,AlgorithmXY
+ # further needed:
+ # ot:algorithm,rdf:type,owl:ObjectProperty
+ # AlgorithmXY,rdf:type,ot:Algorithm
+ # ot:Algorithm,rdf:type,owl:Class
+ #
+ # therefore OpentoxOwl needs info about the opentox-ontology
+ # the info is stored in OBJECT_PROPERTY_CLASS and LITERAL_TYPES
+
+ # contains all owl:ObjectProperty as keys, and the respective classes as value
+ # some object properties link to objects from different classes (e.g. "values can be "Tuple", or "FeatureValue")
+ # in this case, use set_object_property() (instead of set()) and specify class manually
+ OBJECT_PROPERTY_CLASS = {}
+ [ "model" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Model"}
+ [ "algorithm" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Algorithm"}
+ [ "trainingDataset", "testTargetDataset", "predictionDataset",
+ "testDataset", "dataset" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Dataset"}
+ [ "feature", "dependentVariables", "independentVariables",
+ "predictedVariables", "predictionFeature" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Feature"}
+ [ "parameters" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Parameter"}
+ [ "compound" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Compound"}
+ [ "dataEntry" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "DataEntry"}
+ [ "complexValue" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "FeatureValue"}
+ [ "classificationStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ClassificationStatistics"}
+ [ "classValueStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ClassValueStatistics"}
+ [ "confusionMatrix" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ConfusionMatrix"}
+ [ "confusionMatrixCell" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ConfusionMatrixCell"}
+ [ "regressionStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "RegressionStatistics"}
+ [ "validation" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Validation"}
+ [ "crossvalidationInfo" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "CrossvalidationInfo"}
+ [ "crossvalidation" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Crossvalidation"}
+
+ # literals point to primitive values (not to other resources)
+ # the literal datatype is encoded via uri:
+ LITERAL_DATATYPE_STRING = XML["string"].uri
+ LITERAL_DATATYPE_URI = XML["anyURI"].uri
+ LITERAL_DATATYPE_FLOAT = XML["float"].uri
+ LITERAL_DATATYPE_DOUBLE = XML["double"].uri
+ LITERAL_DATATYPE_DATE = XML["date"].uri
+ LITERAL_DATATYPE_BOOLEAN = XML["boolean"].uri
+ LITERAL_DATATYPE_DATETIME = XML["dateTime"].uri
+ LITERAL_DATATYPE_INTEGER = XML["integer"].uri
+
+ # list all literals (to distinguish from objectProperties) as keys, datatype as values
+ # (do not add dc-identifier, deprecated, object are identified via name=uri)
+ LITERAL_TYPES = {}
+ [ "title", "creator", "format", "description", "hasStatus", "paramScope", "paramValue",
+ "classValue", "reportType", "confusionMatrixActual",
+ "confusionMatrixPredicted" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_STRING }
+ [ "date", "due_to_time" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_DATE }
+ [ "percentageCompleted", "truePositiveRate", "fMeasure", "falseNegativeRate",
+ "areaUnderRoc", "falsePositiveRate", "trueNegativeRate", "precision", "recall",
+ "percentCorrect", "percentIncorrect", "weightedAreaUnderRoc", "numCorrect",
+ "percentIncorrect", "percentUnpredicted", "realRuntime",
+ "percentWithoutClass", "rootMeanSquaredError", "meanAbsoluteError", "rSquare",
+ "targetVarianceActual", "targetVariancePredicted", "sumSquaredError",
+ "sampleCorrelationCoefficient" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_DOUBLE }
+ [ "numTrueNegatives", "numWithoutClass", "numFalseNegatives", "numTruePositives",
+ "numFalsePositives", "numIncorrect", "numInstances", "numUnpredicted",
+ "randomSeed", "numFolds", "confusionMatrixValue",
+ "crossvalidationFold" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_INTEGER }
+ [ "resultURI" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_URI }
+ [ "stratified" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_BOOLEAN }
+ # some literals can have different types, parse from ruby type
+ PARSE_LITERAL_TYPE = "PARSE_LITERAL_TYPE"
+ [ "value" ].each{ |l| LITERAL_TYPES[l] = PARSE_LITERAL_TYPE }
+
+ # constants for often used redland-resources
+ OWL_TYPE_LITERAL = OWL["AnnotationProperty"]
+ OWL_TYPE_CLASS = OWL["Class"]
+ OWL_TYPE_OBJECT_PROPERTY = OWL["ObjectProperty"]
+ RDF_TYPE = RDF['type']
+
+ # store redland:resources (=nodes) to:
+ # * separate namespaces (OT from RDF and DC)
+ # * save time, as generating resources is timeconsuming in redland
+ @@nodes = {}
+ [ "type", "about"].each{ |l| @@nodes[l] = RDF[l] }
+ [ "title", "creator", "date", "format" ].each{ |l| @@nodes[l] = DC[l] }
+
+ def node(property)
+ raise "can only create node for non-empty-string, but given "+property.class.to_s+" (value: "+
+ property.to_s+")" unless property.is_a?(String) and property.size>0
+ raise "dc[identifier] deprecated, use owl.uri" if property=="identifier"
+ @@nodes[property] = OT[property] unless @@nodes.has_key?(property)
+ return @@nodes[property]
+ end
- # ot_class is the class of the object, e.g. "Model","Dataset", ...
+ # ot_class is the class of the object as string, e.g. "Model","Dataset", ...
# root_node is the root-object node in the rdf
# uri the uri of the object
attr_accessor :ot_class, :root_node, :uri, :model
+ private
def initialize
@model = Redland::Model.new Redland::MemoryStore.new
end
+ # build new owl object
+ # ot_class is the class of this object, should be a string like "Model", "Task", ...
+ # uri is name and identifier of this object
+ public
def self.create( ot_class, uri )
- owl = OpenTox::Owl.new
+ owl = OpenTox::Owl.new
owl.ot_class = ot_class
owl.root_node = Redland::Resource.new(uri.to_s.strip)
- owl.set("type",owl.node(owl.ot_class)) #,true))
+ owl.set("type",owl.ot_class)
+ owl.uri = uri
owl
end
@@ -129,7 +219,7 @@ module OpenTox
parser.parse_string_into_model(owl.model, data, base_uri)
# now loading root_node and uri
- owl.model.find(nil, owl.node("type"), owl.node(ot_class)) do |s,p,o|
+ owl.model.find(nil, RDF_TYPE, owl.node(ot_class)) do |s,p,o|
#LOGGER.debug "about statements "+s.to_s+" . "+p.to_s+" -> "+o.to_s
is_root = true
owl.model.find(nil, nil, s) do |ss,pp,oo|
@@ -137,8 +227,10 @@ module OpenTox
break
end
if is_root
+ # handle error if root is already set
raise "cannot derieve root object from rdf, more than one object specified" if owl.uri
raise "illegal root node type, no uri specified\n"+data.to_s if s.blank?
+ #store root note and uri
owl.uri = s.uri.to_s
owl.root_node = s
end
@@ -147,7 +239,7 @@ module OpenTox
# handle error if no root node was found
unless owl.root_node
types = []
- owl.model.find(nil, owl.node("type"), nil){ |s,p,o| types << o.to_s }
+ owl.model.find(nil, RDF_TYPE, nil){ |s,p,o| types << o.to_s }
raise "root node for class '"+ot_class+"' not found (available type nodes: "+types.inspect+")"
end
raise "no uri in rdf: '"+owl.uri+"'" unless owl.uri and Utils.is_uri?(owl.uri)
@@ -166,13 +258,33 @@ module OpenTox
@model.to_string
end
- def get(name)
- raise "uri is no prop, use owl.uri instead" if name=="uri"
- property_node = node(name.to_s)
- return get_value( @model.object(@root_node, property_node) )
+ # returns the first object for subject:root_node and property
+ # (sufficient for accessing simple, root-node properties)
+ def get( property )
+ raise "uri is no prop, use owl.uri instead" if property=="uri"
+ return get_value( @model.object( @root_node, node(property.to_s)) )
+ end
+
+ # returns an array of objects (not only the first one) that fit for the property
+ # accepts array of properties to access not-root-node vaules
+ # i.e. validation_owl.get_nested( [ "confusionMatrix", "confusionMatrixCell", "confusionMatrixValue" ]
+ # returns an array of all confusionMatrixValues
+ def get_nested( property_array )
+ n = [ @root_node ]
+ property_array.each do |p|
+ new_nodes = []
+ n.each do |nn|
+ @model.find( nn, node(p), nil ) do |sub,pred,obj|
+ new_nodes << obj
+ end
+ end
+ n = new_nodes
+ end
+ return n.collect{|nn| get_value( nn )}
end
private
+ # returns node-value
def get_value( node )
return nil unless node
if node.is_a?(Redland::Literal)
@@ -185,285 +297,251 @@ module OpenTox
end
public
- def set(name, value, datatype=nil)
+ # sets values of current_node (by default root_node)
+ #
+ # note: this does not delete existing triples
+ # * there can be several triples for the same subject and predicate
+ # ( e.g. after set("description","bla1") and set("description","bla2")
+ # both descriptions are in the model,
+ # but the get("description") will give you only one object (by chance)
+ # * this does not matter in pratice (only dataset uses this -> load_dataset-methods)
+ # * identical values appear only once in rdf
+ def set(predicate, object, current_node=@root_node )
- raise "uri is no prop, cannot set uri" if name=="uri"
- property_node = node(name.to_s) #, true)
- begin # delete existing entry
- t = @model.object(@root_node, property_node)
- @model.delete @root_node, property_node, t
- rescue
+ pred = predicate.to_s
+ raise "uri is no prop, cannot set uri" if pred=="uri"
+ raise "dc[identifier] deprecated, use owl.uri" if pred=="identifier"
+ if (object.is_a?(Redland::Node) and object.blank?) or nil==object or object.to_s.size==0
+ # set only not-nil values
+ LOGGER.warn "skipping (not setting) empty value in rdf for property: '"+pred+"'"
+ return
end
- if value.is_a?(Redland::Node)
- raise "not nil datatype not allowed when setting redland node as value" if datatype
- @model.add @root_node, property_node, value
- else # if value is no node, a literal is created
- @model.add @root_node, property_node, Redland::Literal.create(value.to_s, datatype)
+
+ if pred=="type"
+ # predicate is type, set class of current node
+ set_type(object, current_node)
+ elsif LITERAL_TYPES.has_key?(pred)
+ # predicate is literal
+ set_literal(pred,object,LITERAL_TYPES[pred],current_node)
+ elsif OBJECT_PROPERTY_CLASS.has_key?(pred)
+ # predicte is objectProperty, object is another resource
+ set_object_property(pred,object,OBJECT_PROPERTY_CLASS[pred],current_node)
+ else
+ raise "unkonwn rdf-property, please add: '"+pred+"' to OpenTox::OWL.OBJECT_PROPERTY_CLASS or OpenTox::OWL.LITERAL_TYPES"
end
end
-
- def parameters=(params)
- params.each do |name, settings|
- parameter = @model.create_resource
- @model.add parameter, node('type'), node('Parameter')
- @model.add parameter, node('title'), name
- @model.add parameter, node('paramScope'), settings[:scope]
- @model.add parameter, node('paramValue'), settings[:value]
- @model.add @root_node, node('parameters'), parameter
- end
- end
-
- def add_data_entries(compound_uri,features)
- # add compound
- compound = @model.subject(DC["identifier"], compound_uri)
- if compound.nil?
- compound = @model.create_resource(compound_uri)
- @model.add compound, node('type'), node("Compound")
- end
- features.each do |f|
- f.each do |feature_uri,value|
- # add feature
- feature = find_or_create_feature feature_uri
- if value.class.to_s == 'Hash'
- # create tuple
- tuple = @model.create_resource
- @model.add tuple, node('type'), node("Tuple")
- @model.add tuple, node('feature'), feature
- value.each do |uri,v|
- f = find_or_create_feature uri
- complex_value = @model.create_resource
- @model.add tuple, node('complexValue'), complex_value
- @model.add complex_value, node('type'), node("FeatureValue")
- @model.add complex_value, node('feature'), f
- @model.add complex_value, node('value'), Redland::Literal.create(v)
- end
- # add data entry
- data_entry = @model.subject node('compound'), compound
- if data_entry.nil?
- data_entry = @model.create_resource
- @model.add @root_node, node('dataEntry'), data_entry
- @model.add data_entry, node('type'), node("DataEntry")
- @model.add data_entry, node('compound'), compound
- end
- @model.add data_entry, node('values'), tuple
- else
- data_entry = @model.subject node('compound'), compound
- if data_entry.nil?
- data_entry = @model.create_resource
- @model.add @root_node, node('dataEntry'), data_entry
- @model.add data_entry,node('type'), node("DataEntry")
- @model.add data_entry, node('compound'), compound
- end
- values = @model.create_resource
- @model.add data_entry, node('values'), values
- @model.add values, node('type'), node('FeatureValue')
- @model.add values, node('feature'), feature
- @model.add values, node('value'), Redland::Literal.create(value)
- end
- end
- end
- end
-
- private
- def find_feature(feature_uri)
- # PENDING: more efficiently get feature node?
- @model.subjects(RDF['type'], OT['Feature']).each do |feature|
- return feature if feature_uri==get_value(feature)
+
+ # example-triples for setting rdf-type to model:
+ # model_xy,rdf:type,ot:Model
+ # ot:Model,rdf:type,owl:Class
+ def set_type(ot_class, current_node=@root_node)
+ @model.add current_node, RDF_TYPE, node(ot_class)
+ @model.add node(ot_class), RDF_TYPE, OWL_TYPE_CLASS
end
- return nil
- end
-
- public
- def find_or_create_feature(feature_uri)
- feature = find_feature(feature_uri)
- unless feature
- feature = @model.create_resource(feature_uri)
- @model.add feature, node('type'), node("Feature")
- @model.add feature, node("title"), File.basename(feature_uri).split(/#/)[1]
- @model.add feature, node('creator'), feature_uri
- end
- feature
- end
-
- # feature values are not loaded for performance reasons
- # loading compounds and features into arrays that are given as params
- def load_dataset( compounds, features )
- @model.subjects(node('type'), node('Compound')).each do |compound|
- compounds << get_value(compound)
- end
- @model.subjects(node('type'), node('Feature')).each do |feature|
- features << get_value(feature)
+ # example-triples for setting description of a model:
+ # model_xy,ot:description,bla..bla^^xml:string
+ # ot:description,rdf:type,owl:Literal
+ def set_literal(literal_name, literal_value, literal_datatype, current_node=@root_node)
+ @model.add current_node, node(literal_name), Redland::Literal.create(literal_value, literal_datatype)
+ @model.add node(literal_name), RDF_TYPE, OWL_TYPE_LITERAL
end
- LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features"
- end
-
- # loading feature values for the specified feature
- # if feature is nil, all feature values are loaded
- #
- # general remark on the rdf loading (found out with some testing):
- # the search methods (subjects/find) are fast, the time consuming parts is creating resources,
- # which cannot be avoided in general (implemented some performance tweaks with uri storing when loading all features)
- def load_dataset_feature_values( compounds, data, feature_uri=nil )
- LOGGER.debug("load feature values"+ ( (feature_uri!=nil)?(" for feature: "+feature_uri):"") )
+ # example-triples for setting algorithm property of a model:
+ # model_xy,ot:algorithm,algorihtm_xy
+ # ot:algorithm,rdf:type,owl:ObjectProperty
+ # algorihtm_xy,rdf:type,ot:Algorithm
+ # ot:Algorithm,rdf:type,owl:Class
+ def set_object_property(property, object, object_class, current_node=@root_node)
+ object_node = Redland::Resource.new(object)
+ @model.add current_node, node(property), object_node
+ @model.add node(property), RDF_TYPE, OWL_TYPE_OBJECT_PROPERTY
+ @model.add object_node, RDF_TYPE, node(object_class)
+ @model.add node(object_class), RDF_TYPE, OWL_TYPE_CLASS
+ end
- # values are stored in the data-hash, hash has a key for each compound
- compounds.each{|c| data[c] = [] unless data[c]}
-
- load_all_features = feature_uri==nil
- feature_node = nil
-
- # create feature node for feature uri if specified
- unless load_all_features
- feature_node = find_feature(feature_uri)
- raise "feature node not found" unless feature_node
+ # this is (a recursiv method) to set nested-data via hashes (not only simple properties)
+ # example (for a dataset)
+ # { :description => "bla",
+ # :dataEntry => { :compound => "compound_uri",
+ # :values => [ { :class => "FeatureValue"
+ # :feature => "feat1",
+ # :value => 42 },
+ # { :class => "FeatureValue"
+ # :feature => "feat2",
+ # :value => 123 } ] } }
+ def set_data(hash, current_node=@root_node)
+
+ hash.each do |k,v|
+ if v.is_a?(Hash)
+ # value is again a hash
+ prop = k.to_s
+
+ # :class is a special key to specify the class value, if not defined in OBJECT_PROPERTY_CLASS
+ object_class = v.has_key?(:class) ? v.delete(:class) : OBJECT_PROPERTY_CLASS[prop]
+ raise "hash key must be a object-property, please add '"+prop.to_s+
+ "' to OpenTox::OWL.OBJECT_PROPERTY_CLASS or specify :class value" unless object_class
+
+ # the new node is a class node, to specify the uri of the resource use key :uri
+ if v[:uri]
+ # identifier is either a specified uri
+ class_node = Redland::Resource.new(v.delete(:uri))
+ else
+ # or a new uri, make up internal uri with increment
+ class_node = new_class_node(object_class,current_node)
+ end
+ set_object_property(prop,class_node,object_class,current_node)
+ # recursivly call set_data method with new node
+ set_data(v,class_node)
+ elsif v.is_a?(Array)
+ # value is an array, each array element is added with current key as predicate
+ v.each do |value|
+ set_data( { k => value }, current_node )
+ end
+ else
+ # neither hash nor array, call simple set-method
+ set( k, v, current_node )
+ end
+ end
end
- count = 0
-
- # preformance tweak: store uirs to save some resource init time
- compound_uri_store = {}
- feature_uri_store = {}
-
- # search for all feature_value_node with property 'ot_feature'
- # feature_node is either nil, i.e. a wildcard or specified
- @model.find(nil, node('feature'), feature_node) do |feature_value_node,p,o|
-
- # get compound_uri by "backtracking" to values node (property is 'values'), then get compound_node via 'compound'
- value_nodes = @model.subjects(node('values'),feature_value_node)
- raise "more than one value node "+value_nodes.size.to_s unless value_nodes.size==1
- value_node = value_nodes[0]
- compound_node = @model.object(value_node, node('compound'))
- compound_uri = compound_uri_store[compound_node.to_s]
- unless compound_uri
- compound_uri = get_value(compound_node)
- compound_uri_store[compound_node.to_s] = compound_uri
+ # create a new (internal class) node with unique, uri-like name
+ def new_class_node(name, current_node=@root_node)
+ # to avoid anonymous nodes, make up uris for sub-objects
+ # use counter to make sure each uri is unique
+ # for example we will get ../confusion_matrix_cell/1, ../confusion_matrix_cell/2, ...
+ count = 1
+ while (true)
+ res = Redland::Resource.new( File.join(current_node.uri.to_s,name.to_s,count.to_s) )
+ match = false
+ @model.find(nil, nil, res) do |s,p,o|
+ match = true
+ break
+ end
+ if match
+ count += 1
+ else
+ break
+ end
end
+ return res
+ end
+
+ # for "backwards-compatiblity"
+ # better use directly:
+ # set_data( { "parameters" => [ { "title" => <t>, "paramScope" => <s>, "paramValue" => <v> } ] )
+ def parameters=(params)
- if load_all_features
- # if load all features, feautre_uri is not specified, derieve from feature_node
- feature_uri = feature_uri_store[o.to_s]
- unless feature_uri
- feature_uri = get_value(o)
- feature_uri_store[o.to_s] = feature_uri
+ converted_params = []
+ params.each do |name, settings|
+ converted_params << { :title => name, :paramScope => settings[:scope], :paramValue => settings[:value] }
+ end
+ set_data( :parameters => converted_params )
+ end
+
+ # PENDING move to dataset.rb
+ # this is for dataset.to_owl
+ # adds feautre value for a single compound
+ def add_data_entries(compound_uri,features)
+
+ data_entry = { :compound => compound_uri }
+ if features
+ feature_values = []
+ features.each do |f|
+ f.each do |feature_uri,value|
+ if value.is_a?(Hash)
+ complex_values = []
+ value.each do |uri,v|
+ complex_values << { :feature => uri, :value => v }
+ end
+ feature_values << { :class => "Tuple", :feature => feature_uri, :complexValue => complex_values }
+ else
+ feature_values << { :class => "FeatureValue", :feature => feature_uri, :value => value }
+ end
+ end
end
+ data_entry[:values] = feature_values
+ end
+ set_data( :dataEntry => data_entry )
+ end
+
+ # PENDING move to dataset.rb
+ # feature values are not loaded for performance reasons
+ # loading compounds and features into arrays that are given as params
+ def load_dataset( compounds, features )
+
+ @model.subjects(RDF_TYPE, node('Compound')).each do |compound|
+ compounds << get_value(compound)
end
- value_node_type = @model.object(feature_value_node, node('type'))
- if (value_node_type == node('FeatureValue'))
- value_literal = @model.object( feature_value_node, node('value'))
- raise "feature value no literal" unless value_literal.is_a?(Redland::Literal)
- data[compound_uri] << {feature_uri => value_literal.get_value }
- else
- raise "feature value type not yet implemented "+value_node_type.to_s
+ @model.subjects(RDF_TYPE, node('Feature')).each do |feature|
+ feature_value_found=false
+ @model.find(nil, node("feature"), feature) do |potential_feature_value,p,o|
+ @model.find(nil, node("values"), potential_feature_value) do |s,p,o|
+ feature_value_found=true
+ break
+ end
+ break if feature_value_found
+ end
+ features << get_value(feature) if feature_value_found
end
- count += 1
- LOGGER.debug "loaded "+count.to_s+" feature values" if (count%500 == 0)
+ LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features from dataset "+uri.to_s
end
-
- LOGGER.debug "loaded "+count.to_s+" feature values"
- end
-
- @@property_nodes = { "type" => RDF["type"],
- "about" => RDF["about"],
- "title" => DC["title"],
- "creator" => DC["creator"],
- #"identifier" => DC["identifier"], identifier is deprecated
- "date" => DC["date"],
- "format" => DC["format"]}
-# @object_prop = OWL["ObjectProperty"]
-# @@type = { "Validation" => OWL["Class"],
-# "Model" => OWL["Class"],
-# "title" => OWL["AnnotationProperty"],
-# "creator" => OWL["AnnotationProperty"],
-# "date" => OWL["AnnotationProperty"],
-# "format" => OWL["AnnotationProperty"],
-# "predictedVariables" => @object_prop}
+ # PENDING move to dataset.rb
+ # loading feature values for the specified feature
+ # if feature is nil, all feature values are loaded
+ #
+ # general remark on the rdf loading (found out with some testing):
+ # the search methods (subjects/find) are fast, the time consuming parts is creating resources,
+ # which cannot be avoided in general
+ def load_dataset_feature_values( compounds, data, feature_uris )
+
+ raise "no feature-uri array" unless feature_uris.is_a?(Array)
- # this method has two purposes:
- # * distinguishing ot-properties from dc- and rdf- properties
- # * caching nodes, as creating nodes is costly
- def node(name) #, write_type_to_model=false)
- raise "dc[identifier] deprecated, use owl.uri" if name=="identifier"
- n = @@property_nodes[name]
- unless n
- n = OT[name]
- @@property_nodes[name] = n
- end
-
-# if write_type_to_model and name!="type"
-# raise "no type defined for '"+name+"'" unless @@type[name]
-# @model.add n,RDF['type'],@@type[name]
-# end
- return n
- end
+ # values are stored in the data-hash, hash has a key for each compound
+ compounds.each{|c| data[c] = [] unless data[c]}
+
+ count = 0
-=begin
- def data
- LOGGER.debug("getting data from model")
+ feature_uris.each do |feature_uri|
+ LOGGER.debug("load feature values for feature: "+feature_uri )
+ feature_node = Redland::Resource.new(feature_uri)
+
+ # search for all feature_value_node with property 'ot_feature' and the feature we are looking for
+ @model.find(nil, node('feature'), feature_node) do |feature_value_node,p,o|
- data = {}
- @model.subjects(RDF['type'], OT['DataEntry']).each do |data_entry|
- compound_node = @model.object(data_entry, OT['compound'])
- compound_uri = @model.object(compound_node, DC['identifier']).to_s
- @model.find(data_entry, OT['values'], nil) do |s,p,values|
- feature_node = @model.object values, OT['feature']
- feature_uri = @model.object(feature_node, DC['identifier']).to_s.sub(/\^\^.*$/,'') # remove XML datatype
- type = @model.object(values, RDF['type'])
- if type == OT['FeatureValue']
- value = @model.object(values, OT['value']).to_s
- case value.to_s
- when TRUE_REGEXP # defined in environment.rb
- value = true
- when FALSE_REGEXP # defined in environment.rb
- value = false
- when /.*\^\^<.*XMLSchema#.*>/
- #HACK for reading ambit datasets
- case value.to_s
- when /XMLSchema#string/
- value = value.to_s[0..(value.to_s.index("^^")-1)]
- when /XMLSchema#double/
- value = value.to_s[0..(value.to_s.index("^^")-1)].to_f
- else
- LOGGER.warn " ILLEGAL TYPE "+compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
- value = nil
- end
- else
- LOGGER.warn compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
- value = nil
- end
- LOGGER.debug "converting owl to yaml, #compounds: "+(data.keys.size+1).to_s if (data.keys.size+1)%10==0 && !data.has_key?(compound_uri)
+ # get compound_uri by "backtracking" to values node (property is 'values'), then get compound_node via 'compound'
+ value_nodes = @model.subjects(node('values'),feature_value_node)
+ if value_nodes.size>0
+ raise "more than one value node "+value_nodes.size.to_s if value_nodes.size>1
+ value_node = value_nodes[0]
- return data if (data.keys.size)>9 && !data.has_key?(compound_uri)
+ compound_uri = get_value( @model.object(value_node, node('compound')) )
- #puts "c "+compound_uri.to_s
- #puts "f "+feature_uri.to_s
- #puts "v "+value.to_s
- #puts ""
- data[compound_uri] = [] unless data[compound_uri]
- data[compound_uri] << {feature_uri => value} unless value.nil?
- elsif type == OT['Tuple']
- entry = {}
- data[compound_uri] = [] unless data[compound_uri]
- #data[compound_uri][feature_uri] = [] unless data[compound_uri][feature_uri]
- @model.find(values, OT['complexValue'],nil) do |s,p,complex_value|
- name_node = @model.object complex_value, OT['feature']
- name = @model.object(name_node, DC['title']).to_s
- value = @model.object(complex_value, OT['value']).to_s
- v = value.sub(/\^\^.*$/,'') # remove XML datatype
- v = v.to_f if v.match(/^[\.|\d]+$/) # guess numeric datatype
- entry[name] = v
+ value_node_type = @model.object(feature_value_node, RDF_TYPE)
+ if (value_node_type == node('FeatureValue'))
+ value_literal = @model.object( feature_value_node, node('value'))
+ raise "plain feature value no literal: "+value_literal.to_s unless value_literal.is_a?(Redland::Literal)
+ data[compound_uri] << {feature_uri => value_literal.get_value }
+ elsif (value_node_type == node('Tuple'))
+ complex_values = {}
+ @model.find(feature_value_node,node('complexValue'),nil) do |p,s,complex_value|
+ complex_value_type = @model.object(complex_value, RDF_TYPE)
+ raise "complex feature value no feature value: "+complex_value.to_s unless complex_value_type==node('FeatureValue')
+ complex_feature_uri = get_value(@model.object( complex_value, node('feature')))
+ complex_value = @model.object( complex_value, node('value'))
+ raise "complex value no literal: "+complex_value.to_s unless complex_value.is_a?(Redland::Literal)
+ complex_values[ complex_feature_uri ] = complex_value.get_value
+ end
+ data[compound_uri] << { feature_uri => complex_values } if complex_values.size>0
end
- data[compound_uri] << {feature_uri => entry} unless entry.empty?
+ count += 1
+ LOGGER.debug "loading feature values ("+count.to_s+")" if (count%1000 == 0)
end
end
+ LOGGER.debug "loaded "+count.to_s+" feature values for feature "+feature_node.to_s
end
- data
end
-=end
-
end
end
-
diff --git a/lib/task.rb b/lib/task.rb
index 88bcb71..b563fe6 100644
--- a/lib/task.rb
+++ b/lib/task.rb
@@ -23,9 +23,9 @@ module OpenTox
end
public
- def self.find(uri)
+ def self.find( uri, accept_header='application/rdf+xml' )
task = Task.new(uri)
- task.reload
+ task.reload( accept_header )
return task
end
@@ -36,8 +36,8 @@ module OpenTox
return task
end
- def reload
- result = RestClientWrapper.get(uri, {:accept => 'application/rdf+xml'}, false)#'application/x-yaml'})
+ def reload( accept_header='application/rdf+xml' )
+ result = RestClientWrapper.get(uri, {:accept => accept_header}, false)#'application/x-yaml'})
@http_code = result.code
reload_from_data(result, result.content_type, uri)
end
@@ -95,8 +95,8 @@ module OpenTox
def wait_for_completion(dur=0.3)
if (@uri.match(@@config[:services]["opentox-task"]))
- due_to_time = Time.parse(@due_to_time)
- running_time = due_to_time - Time.parse(@date)
+ due_to_time = (@due_to_time.is_a?(Time) ? @due_to_time : Time.parse(@due_to_time))
+ running_time = due_to_time - (@date.is_a?(Time) ? @date : Time.parse(@date))
else
# the date of the external task cannot be trusted, offest to local time might be to big
due_to_time = Time.new + EXTERNAL_TASK_MAX_DURATION
diff --git a/lib/tasks/opentox.rb b/lib/tasks/opentox.rb
deleted file mode 100644
index 7fce35b..0000000
--- a/lib/tasks/opentox.rb
+++ /dev/null
@@ -1,107 +0,0 @@
-require File.join(File.dirname(__FILE__), '..', 'opentox-ruby-api-wrapper.rb')
-
-namespace :opentox do
-
- namespace :services do
-
- desc "Run opentox services"
- task :start do
- @@config[:services].each do |service,uri|
- dir = File.join(@@config[:base_dir], service)
- server = @@config[:webserver]
- case server
- when /thin|mongrel|webrick/
- port = uri.sub(/^.*:/,'').sub(/\/$/,'')
- Dir.chdir dir
- pid_file = File.join(TMP_DIR,"#{service}.pid")
- begin
- `#{server} --trace --rackup config.ru start -p #{port} -e #{ENV['RACK_ENV']} -P #{pid_file} -d &`
- puts "#{service} started on localhost:#{port} in #{ENV['RACK_ENV']} environment with PID file #{pid_file}."
- rescue
- puts "Cannot start #{service} on port #{port}."
- end
- when 'passenger'
- FileUtils.mkdir_p File.join(dir, 'tmp')
- FileUtils.touch File.join(dir, 'tmp/restart.txt')
- puts "#{service} restarted."
- else
- puts "not yet implemented"
- end
- end
- end
-
- desc "Stop opentox services"
- task :stop do
- server = @@config[:webserver]
- if server =~ /thin|mongrel|webrick/
- @@config[:services].each do |service,uri|
- port = uri.sub(/^.*:/,'').sub(/\/$/,'')
- pid_file = File.join(TMP_DIR,"#{service}.pid")
- begin
- puts `#{server} stop -P #{pid_file}`
- puts "#{service} stopped on localhost:#{port}"
- rescue
- puts "Cannot stop #{service} on port #{port}."
- end
- end
- end
- end
-
- desc "Restart opentox services"
- task :restart => [:stop, :start]
-
- end
-
- desc "Run all OpenTox tests"
- task :test do
- @@config[:services].each do |service,uri|
- dir = File.join(@@config[:base_dir], service)
- Dir.chdir dir
- puts "Running tests in #{dir}"
- `rake test -t 1>&2`
- end
- end
-
-end
-
-desc "Start service in current directory"
-task :start do
- service = File.basename(Dir.pwd).intern
- server = @@config[:webserver]
- case server
- when /thin|mongrel|webrick/
- port = @@config[:services][service].sub(/^.*:/,'').sub(/\/$/,'')
- pid_file = File.join(TMP_DIR,"#{service}.pid")
- begin
- `#{server} --trace --rackup config.ru start -p #{port} -e #{ENV['RACK_ENV']} -P #{pid_file} -d &`
- puts "#{service} started on localhost:#{port} in #{ENV['RACK_ENV']} environment with PID file #{pid_file}."
- rescue
- puts "Cannot start #{service} on port #{port}."
- end
- when 'passenger'
- FileUtils.mkdir_p File.join(dir, 'tmp')
- FileUtils.touch File.join(dir, 'tmp/restart.txt')
- puts "#{service} restarted."
- else
- puts "not yet implemented"
- end
-end
-
-desc "Stop service in current directory"
-task :stop do
- service = File.basename(Dir.pwd).intern
- server = @@config[:webserver]
- if server =~ /thin|mongrel|webrick/
- port = @@config[:services][service].sub(/^.*:/,'').sub(/\/$/,'')
- pid_file = File.join(TMP_DIR,"#{service}.pid")
- begin
- puts `thin stop -P #{pid_file}`
- puts "#{service} stopped on localhost:#{port}"
- rescue
- puts "Cannot stop #{service} on port #{port}."
- end
- end
-end
-
-desc "Restart service in current directory"
-task :restart => [:stop, :start]