summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.de>2010-03-19 16:24:41 +0100
committerChristoph Helma <helma@in-silico.de>2010-03-19 16:24:41 +0100
commit95f6ca7e8c02daea41ae00b85807cae245142092 (patch)
treeaafa8d34625d320c3316d589c8a7ebe70bc1e582 /lib
parent59d50e68b52b207e1a8207e2a7baeb5b986c9784 (diff)
owl parsing implemented
Diffstat (limited to 'lib')
-rw-r--r--lib/dataset.rb61
-rw-r--r--lib/model.rb53
-rw-r--r--lib/owl.rb159
3 files changed, 103 insertions, 170 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index dad6403..09cafe2 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -13,7 +13,25 @@ module OpenTox
end
def self.find(uri)
- YAML.load RestClient.get(uri, :accept => 'application/x-yaml').to_s
+ if uri.match(/webservices.in-silico.ch|localhost/) # try to get YAML first
+ YAML.load RestClient.get(uri, :accept => 'application/x-yaml').to_s
+ else # get default rdf+xml
+ owl = OpenTox::Owl.from_uri(uri)
+ @title = owl.title
+ @source = owl.source
+ @identifier = owl.identifier.sub(/^\[/,'').sub(/\]$/,'')
+ @uri = @identifier
+ @data = owl.data
+ halt 404, "Dataset #{uri} empty!" if @data.empty?
+ @data.each do |compound,features|
+ @compounds << compound
+ features.each do |f,v|
+ @features << f
+ end
+ end
+ @compounds.uniq!
+ @features.uniq!
+ end
end
@@ -114,47 +132,6 @@ module OpenTox
features
end
- def data
- data = {}
- @model.subjects(RDF['type'], OT['DataEntry']).each do |data_entry|
- compound_node = @model.object(data_entry, OT['compound'])
- compound_uri = @model.object(compound_node, DC['identifier']).to_s
- @model.find(data_entry, OT['values'], nil) do |s,p,values|
- feature_node = @model.object values, OT['feature']
- feature_uri = @model.object(feature_node, DC['identifier']).to_s.sub(/\^\^.*$/,'') # remove XML datatype
- type = @model.object(values, RDF['type'])
- if type == OT['FeatureValue']
- value = @model.object(values, OT['value']).to_s
- case value.to_s
- when TRUE_REGEXP # defined in environment.rb
- value = true
- when FALSE_REGEXP # defined in environment.rb
- value = false
- else
- LOGGER.warn compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
- value = nil
- end
- data[compound_uri] = {} unless data[compound_uri]
- data[compound_uri][feature_uri] = [] unless data[compound_uri][feature_uri]
- data[compound_uri][feature_uri] << value unless value.nil?
- elsif type == OT['Tuple']
- entry = {}
- data[compound_uri] = {} unless data[compound_uri]
- data[compound_uri][feature_uri] = [] unless data[compound_uri][feature_uri]
- @model.find(values, OT['complexValue'],nil) do |s,p,complex_value|
- name_node = @model.object complex_value, OT['feature']
- name = @model.object(name_node, DC['title']).to_s
- value = @model.object(complex_value, OT['value']).to_s
- v = value.sub(/\^\^.*$/,'') # remove XML datatype
- v = v.to_f if v.match(/^[\.|\d]+$/) # guess numeric datatype
- entry[name] = v
- end
- data[compound_uri][feature_uri] << entry
- end
- end
- end
- data
- end
def compounds
compounds = []
diff --git a/lib/model.rb b/lib/model.rb
index c8d501b..e6418f0 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -1,10 +1,26 @@
module OpenTox
module Model
-
- class Lazar
- attr_accessor :dependent_variable, :activity_dataset_uri, :feature_dataset_uri, :effects, :activities, :p_values, :fingerprints, :features, :algorithm
+ class Generic
+
+ attr_accessor :predicted_variables, :independent_variables, :dependent_variables, :activity_dataset_uri, :feature_dataset_uri, :effects, :activities, :p_values, :fingerprints, :features, :algorithm
+
+ def self.find(uri)
+ owl = OpenTox::Owl.from_uri(uri)
+ @title = owl.title
+ @source = owl.source
+ @identifier = owl.identifier.sub(/^\[/,'').sub(/\]$/,'')
+ @uri = @identifier
+ @algorithm = owl.algorithm
+ @dependent_variables = owl.dependentVariables
+ @independent_variables = owl.independentVariables
+ @predicted_variables = owl.predictedVariables
+ end
+ end
+
+ class Lazar < Generic
+
def initialize
@source = "http://github.com/helma/opentox-model"
@algorithm = File.join(@@config[:services]["opentox-algorithm"],"lazar")
@@ -22,41 +38,10 @@ module OpenTox
resource.post(self.to_yaml, :content_type => "application/x-yaml").chomp.to_s
end
-
def self.find_all
RestClient.get(@@config[:services]["opentox-model"]).chomp.split("\n")
end
=begin
- include Owl
-
- # Create a new prediction model from a dataset
- def initialize
- super
- self.source = "http://github.com/helma/opentox-model"
- self.algorithm = File.join(@@config[:services]["opentox-algorithm"],"lazar")
- self.independentVariables = File.join(@@config[:services]["opentox-algorithm"],"fminer#BBRC_representative") # TODO read this from dataset
- end
-
- def self.from_yaml(yaml)
- yaml = YAML.load yaml
- lazar = Lazar.new
- lazar.title = "lazar model for #{yaml[:endpoint]}"
- lazar.parameters = {
- "Dataset URI" => { :scope => "mandatory", :value => "dataset_uri=#{yaml[:activity_dataset]}" },
- "Feature URI for dependent variable" => { :scope => "mandatory", :value => "feature_uri=#{yaml[:endpoint]}" },
- "Feature generation URI" => { :scope => "mandatory", :value => "feature_generation_uri=#{File.join(@@config[:services]["opentox-algorithm"],"fminer")}"} #TODO write to yaml
- }
- lazar.algorithm = File.join(@@config[:services]["opentox-algorithm"],"lazar")
- lazar.trainingDataset = yaml[:activity_dataset]
- lazar.dependentVariables = yaml[:endpoint]
- lazar.predictedVariables = yaml[:endpoint] + "_lazar_prediction"
- lazar
- end
-
- def self.find(uri)
- yaml = RestClient.get(uri, :accept => "application/x-yaml")
- OpenTox::Model::Lazar.from_yaml(yaml)
- end
# Predict a compound
def predict(compound)
diff --git a/lib/owl.rb b/lib/owl.rb
index c247c45..1d47d2c 100644
--- a/lib/owl.rb
+++ b/lib/owl.rb
@@ -2,15 +2,35 @@ module OpenTox
class Owl
- attr_reader :uri, :ot_class
+ attr_accessor :uri, :ot_class, :model
- def initialize(ot_class,uri)
+ def initialize
@model = Redland::Model.new Redland::MemoryStore.new
- @parser = Redland::Parser.new
- @ot_class = ot_class
- @uri = Redland::Uri.new(uri.chomp)
- @model.add @uri, RDF['type'], OT[@ot_class]
- @model.add @uri, DC['identifier'], @uri
+ end
+
+ def self.create(ot_class,uri)
+ owl = OpenTox::Owl.new
+ owl.ot_class = ot_class
+ owl.uri = Redland::Uri.new(uri.chomp)
+ owl.model.add owl.uri, RDF['type'], OT[owl.ot_class]
+ owl.model.add owl.uri, DC['identifier'], owl.uri
+ owl
+ end
+
+ def self.from_uri(uri)
+ owl = OpenTox::Owl.new
+ parser = Redland::Parser.new
+ begin
+ parser.parse_into_model(owl.model,uri)
+ rescue => e
+ raise "Error parsing #{uri}: #{e.message + e.backtrace}"
+ end
+ owl.uri = Redland::Uri.new(uri.chomp)
+ owl
+ end
+
+ def rdf
+ @model.to_string
end
def method_missing(name, *args)
@@ -25,7 +45,7 @@ module OpenTox
end
@model.add @uri, DC[name], args.first
else # getter
- @model.object(@uri, DC['title']).to_s
+ @model.object(@uri, DC[name.to_s]).to_s
end
else
raise "Method '#{name.to_s}' not found."
@@ -106,95 +126,46 @@ module OpenTox
feature
end
- def rdf
- @model.to_string
- end
-
-=begin
-
- def to_ntriples
- @serializer.model_to_string(Redland::Uri.new(@uri), @model)
- end
-
- def uri=(uri)
- @uri = uri.chomp
- # rewrite uri
- @model.subjects(RDF['type'],OT[@ot_class]).each do |me|
- @model.delete(me,RDF['type'],OT[@ot_class])
- @model.add(uri,RDF['type'],OT[@ot_class])
- id = @model.object(me, DC['identifier'])
- @model.delete me, DC['identifier'], id
- # find/replace metadata
- @model.find(me, nil, nil) do |s,p,o|
- @model.delete s,p,o
- @model.add uri,p,o
+ def data
+ data = {}
+ @model.subjects(RDF['type'], OT['DataEntry']).each do |data_entry|
+ compound_node = @model.object(data_entry, OT['compound'])
+ compound_uri = @model.object(compound_node, DC['identifier']).to_s
+ @model.find(data_entry, OT['values'], nil) do |s,p,values|
+ feature_node = @model.object values, OT['feature']
+ feature_uri = @model.object(feature_node, DC['identifier']).to_s.sub(/\^\^.*$/,'') # remove XML datatype
+ type = @model.object(values, RDF['type'])
+ if type == OT['FeatureValue']
+ value = @model.object(values, OT['value']).to_s
+ case value.to_s
+ when TRUE_REGEXP # defined in environment.rb
+ value = true
+ when FALSE_REGEXP # defined in environment.rb
+ value = false
+ else
+ LOGGER.warn compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri
+ value = nil
+ end
+ data[compound_uri] = [] unless data[compound_uri]
+ data[compound_uri] << {feature_uri => value} unless value.nil?
+ elsif type == OT['Tuple']
+ entry = {}
+ data[compound_uri] = [] unless data[compound_uri]
+ #data[compound_uri][feature_uri] = [] unless data[compound_uri][feature_uri]
+ @model.find(values, OT['complexValue'],nil) do |s,p,complex_value|
+ name_node = @model.object complex_value, OT['feature']
+ name = @model.object(name_node, DC['title']).to_s
+ value = @model.object(complex_value, OT['value']).to_s
+ v = value.sub(/\^\^.*$/,'') # remove XML datatype
+ v = v.to_f if v.match(/^[\.|\d]+$/) # guess numeric datatype
+ entry[name] = v
+ end
+ data[compound_uri] << {feature_uri => entry} unless entry.empty?
+ end
end
- @model.add uri, DC['identifier'], @uri
end
+ data
end
- def read(uri)
- @parser.parse_into_model(@model,uri)
- @uri = uri
- end
-
- def identifier
- me = @model.subject(RDF['type'],OT[@ot_class])
- @model.object(me, DC['identifier']).to_s unless me.nil?
- end
-
- def title=(title)
- me = @model.subject(RDF['type'],OT[@ot_class])
- begin
- t = @model.object(me, DC['title'])
- @model.delete me, DC['title'], t
- rescue
- end
- @model.add me, DC['title'], title
- end
-
- def source=(source)
- me = @model.subject(RDF['type'],OT[@ot_class])
- begin
- t = @model.object(me, DC['source'])
- @model.delete me, DC['source'], t
- rescue
- end
- @model.add me, DC['source'], source
- end
-
- def title
- # I have no idea, why 2 subjects are returned
- # iterating over all subjects leads to memory allocation problems
- # SPARQL queries also do not work
- #me = @model.subjects(RDF['type'],OT[@ot_class])[1]
- me = @model.subject(RDF['type'],OT[@ot_class])
- @model.object(me, DC['title']).to_s
- end
-
- def source
- me = @model.subject(RDF['type'],OT[@ot_class])
- @model.object(me, DC['source']).to_s unless me.nil?
- end
- def create_owl_statement(name,value)
- r = @model.create_resource
- dc_class = DC[name.gsub(/^[a-z]/) { |a| a.upcase }] # capitalize only the first letter
- #puts "DC:" + name.gsub(/^[a-z]/) { |a| a.upcase }
- @model.add dc_class, RDF['type'], OWL["Class"]
- @model.add r, RDF['type'], dc_class
- @model.add r, DC[name], value
- end
-
- def method_missing(name, *args)
- # create magic setter methods
- if /=/ =~ name.to_s
- create_owl_statement name.to_s.sub(/=/,''), args.first
- else
- raise "No method #{name}"
- end
- end
-=end
-
end
-
end