diff options
author | Christoph Helma <helma@in-silico.de> | 2010-03-19 16:24:41 +0100 |
---|---|---|
committer | Christoph Helma <helma@in-silico.de> | 2010-03-19 16:24:41 +0100 |
commit | 95f6ca7e8c02daea41ae00b85807cae245142092 (patch) | |
tree | aafa8d34625d320c3316d589c8a7ebe70bc1e582 /lib | |
parent | 59d50e68b52b207e1a8207e2a7baeb5b986c9784 (diff) |
owl parsing implemented
Diffstat (limited to 'lib')
-rw-r--r-- | lib/dataset.rb | 61 | ||||
-rw-r--r-- | lib/model.rb | 53 | ||||
-rw-r--r-- | lib/owl.rb | 159 |
3 files changed, 103 insertions, 170 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb index dad6403..09cafe2 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -13,7 +13,25 @@ module OpenTox end def self.find(uri) - YAML.load RestClient.get(uri, :accept => 'application/x-yaml').to_s + if uri.match(/webservices.in-silico.ch|localhost/) # try to get YAML first + YAML.load RestClient.get(uri, :accept => 'application/x-yaml').to_s + else # get default rdf+xml + owl = OpenTox::Owl.from_uri(uri) + @title = owl.title + @source = owl.source + @identifier = owl.identifier.sub(/^\[/,'').sub(/\]$/,'') + @uri = @identifier + @data = owl.data + halt 404, "Dataset #{uri} empty!" if @data.empty? + @data.each do |compound,features| + @compounds << compound + features.each do |f,v| + @features << f + end + end + @compounds.uniq! + @features.uniq! + end end @@ -114,47 +132,6 @@ module OpenTox features end - def data - data = {} - @model.subjects(RDF['type'], OT['DataEntry']).each do |data_entry| - compound_node = @model.object(data_entry, OT['compound']) - compound_uri = @model.object(compound_node, DC['identifier']).to_s - @model.find(data_entry, OT['values'], nil) do |s,p,values| - feature_node = @model.object values, OT['feature'] - feature_uri = @model.object(feature_node, DC['identifier']).to_s.sub(/\^\^.*$/,'') # remove XML datatype - type = @model.object(values, RDF['type']) - if type == OT['FeatureValue'] - value = @model.object(values, OT['value']).to_s - case value.to_s - when TRUE_REGEXP # defined in environment.rb - value = true - when FALSE_REGEXP # defined in environment.rb - value = false - else - LOGGER.warn compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri - value = nil - end - data[compound_uri] = {} unless data[compound_uri] - data[compound_uri][feature_uri] = [] unless data[compound_uri][feature_uri] - data[compound_uri][feature_uri] << value unless value.nil? - elsif type == OT['Tuple'] - entry = {} - data[compound_uri] = {} unless data[compound_uri] - data[compound_uri][feature_uri] = [] unless data[compound_uri][feature_uri] - @model.find(values, OT['complexValue'],nil) do |s,p,complex_value| - name_node = @model.object complex_value, OT['feature'] - name = @model.object(name_node, DC['title']).to_s - value = @model.object(complex_value, OT['value']).to_s - v = value.sub(/\^\^.*$/,'') # remove XML datatype - v = v.to_f if v.match(/^[\.|\d]+$/) # guess numeric datatype - entry[name] = v - end - data[compound_uri][feature_uri] << entry - end - end - end - data - end def compounds compounds = [] diff --git a/lib/model.rb b/lib/model.rb index c8d501b..e6418f0 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -1,10 +1,26 @@ module OpenTox module Model - - class Lazar - attr_accessor :dependent_variable, :activity_dataset_uri, :feature_dataset_uri, :effects, :activities, :p_values, :fingerprints, :features, :algorithm + class Generic + + attr_accessor :predicted_variables, :independent_variables, :dependent_variables, :activity_dataset_uri, :feature_dataset_uri, :effects, :activities, :p_values, :fingerprints, :features, :algorithm + + def self.find(uri) + owl = OpenTox::Owl.from_uri(uri) + @title = owl.title + @source = owl.source + @identifier = owl.identifier.sub(/^\[/,'').sub(/\]$/,'') + @uri = @identifier + @algorithm = owl.algorithm + @dependent_variables = owl.dependentVariables + @independent_variables = owl.independentVariables + @predicted_variables = owl.predictedVariables + end + end + + class Lazar < Generic + def initialize @source = "http://github.com/helma/opentox-model" @algorithm = File.join(@@config[:services]["opentox-algorithm"],"lazar") @@ -22,41 +38,10 @@ module OpenTox resource.post(self.to_yaml, :content_type => "application/x-yaml").chomp.to_s end - def self.find_all RestClient.get(@@config[:services]["opentox-model"]).chomp.split("\n") end =begin - include Owl - - # Create a new prediction model from a dataset - def initialize - super - self.source = "http://github.com/helma/opentox-model" - self.algorithm = File.join(@@config[:services]["opentox-algorithm"],"lazar") - self.independentVariables = File.join(@@config[:services]["opentox-algorithm"],"fminer#BBRC_representative") # TODO read this from dataset - end - - def self.from_yaml(yaml) - yaml = YAML.load yaml - lazar = Lazar.new - lazar.title = "lazar model for #{yaml[:endpoint]}" - lazar.parameters = { - "Dataset URI" => { :scope => "mandatory", :value => "dataset_uri=#{yaml[:activity_dataset]}" }, - "Feature URI for dependent variable" => { :scope => "mandatory", :value => "feature_uri=#{yaml[:endpoint]}" }, - "Feature generation URI" => { :scope => "mandatory", :value => "feature_generation_uri=#{File.join(@@config[:services]["opentox-algorithm"],"fminer")}"} #TODO write to yaml - } - lazar.algorithm = File.join(@@config[:services]["opentox-algorithm"],"lazar") - lazar.trainingDataset = yaml[:activity_dataset] - lazar.dependentVariables = yaml[:endpoint] - lazar.predictedVariables = yaml[:endpoint] + "_lazar_prediction" - lazar - end - - def self.find(uri) - yaml = RestClient.get(uri, :accept => "application/x-yaml") - OpenTox::Model::Lazar.from_yaml(yaml) - end # Predict a compound def predict(compound) @@ -2,15 +2,35 @@ module OpenTox class Owl - attr_reader :uri, :ot_class + attr_accessor :uri, :ot_class, :model - def initialize(ot_class,uri) + def initialize @model = Redland::Model.new Redland::MemoryStore.new - @parser = Redland::Parser.new - @ot_class = ot_class - @uri = Redland::Uri.new(uri.chomp) - @model.add @uri, RDF['type'], OT[@ot_class] - @model.add @uri, DC['identifier'], @uri + end + + def self.create(ot_class,uri) + owl = OpenTox::Owl.new + owl.ot_class = ot_class + owl.uri = Redland::Uri.new(uri.chomp) + owl.model.add owl.uri, RDF['type'], OT[owl.ot_class] + owl.model.add owl.uri, DC['identifier'], owl.uri + owl + end + + def self.from_uri(uri) + owl = OpenTox::Owl.new + parser = Redland::Parser.new + begin + parser.parse_into_model(owl.model,uri) + rescue => e + raise "Error parsing #{uri}: #{e.message + e.backtrace}" + end + owl.uri = Redland::Uri.new(uri.chomp) + owl + end + + def rdf + @model.to_string end def method_missing(name, *args) @@ -25,7 +45,7 @@ module OpenTox end @model.add @uri, DC[name], args.first else # getter - @model.object(@uri, DC['title']).to_s + @model.object(@uri, DC[name.to_s]).to_s end else raise "Method '#{name.to_s}' not found." @@ -106,95 +126,46 @@ module OpenTox feature end - def rdf - @model.to_string - end - -=begin - - def to_ntriples - @serializer.model_to_string(Redland::Uri.new(@uri), @model) - end - - def uri=(uri) - @uri = uri.chomp - # rewrite uri - @model.subjects(RDF['type'],OT[@ot_class]).each do |me| - @model.delete(me,RDF['type'],OT[@ot_class]) - @model.add(uri,RDF['type'],OT[@ot_class]) - id = @model.object(me, DC['identifier']) - @model.delete me, DC['identifier'], id - # find/replace metadata - @model.find(me, nil, nil) do |s,p,o| - @model.delete s,p,o - @model.add uri,p,o + def data + data = {} + @model.subjects(RDF['type'], OT['DataEntry']).each do |data_entry| + compound_node = @model.object(data_entry, OT['compound']) + compound_uri = @model.object(compound_node, DC['identifier']).to_s + @model.find(data_entry, OT['values'], nil) do |s,p,values| + feature_node = @model.object values, OT['feature'] + feature_uri = @model.object(feature_node, DC['identifier']).to_s.sub(/\^\^.*$/,'') # remove XML datatype + type = @model.object(values, RDF['type']) + if type == OT['FeatureValue'] + value = @model.object(values, OT['value']).to_s + case value.to_s + when TRUE_REGEXP # defined in environment.rb + value = true + when FALSE_REGEXP # defined in environment.rb + value = false + else + LOGGER.warn compound_uri + " has value '" + value.to_s + "' for feature " + feature_uri + value = nil + end + data[compound_uri] = [] unless data[compound_uri] + data[compound_uri] << {feature_uri => value} unless value.nil? + elsif type == OT['Tuple'] + entry = {} + data[compound_uri] = [] unless data[compound_uri] + #data[compound_uri][feature_uri] = [] unless data[compound_uri][feature_uri] + @model.find(values, OT['complexValue'],nil) do |s,p,complex_value| + name_node = @model.object complex_value, OT['feature'] + name = @model.object(name_node, DC['title']).to_s + value = @model.object(complex_value, OT['value']).to_s + v = value.sub(/\^\^.*$/,'') # remove XML datatype + v = v.to_f if v.match(/^[\.|\d]+$/) # guess numeric datatype + entry[name] = v + end + data[compound_uri] << {feature_uri => entry} unless entry.empty? + end end - @model.add uri, DC['identifier'], @uri end + data end - def read(uri) - @parser.parse_into_model(@model,uri) - @uri = uri - end - - def identifier - me = @model.subject(RDF['type'],OT[@ot_class]) - @model.object(me, DC['identifier']).to_s unless me.nil? - end - - def title=(title) - me = @model.subject(RDF['type'],OT[@ot_class]) - begin - t = @model.object(me, DC['title']) - @model.delete me, DC['title'], t - rescue - end - @model.add me, DC['title'], title - end - - def source=(source) - me = @model.subject(RDF['type'],OT[@ot_class]) - begin - t = @model.object(me, DC['source']) - @model.delete me, DC['source'], t - rescue - end - @model.add me, DC['source'], source - end - - def title - # I have no idea, why 2 subjects are returned - # iterating over all subjects leads to memory allocation problems - # SPARQL queries also do not work - #me = @model.subjects(RDF['type'],OT[@ot_class])[1] - me = @model.subject(RDF['type'],OT[@ot_class]) - @model.object(me, DC['title']).to_s - end - - def source - me = @model.subject(RDF['type'],OT[@ot_class]) - @model.object(me, DC['source']).to_s unless me.nil? - end - def create_owl_statement(name,value) - r = @model.create_resource - dc_class = DC[name.gsub(/^[a-z]/) { |a| a.upcase }] # capitalize only the first letter - #puts "DC:" + name.gsub(/^[a-z]/) { |a| a.upcase } - @model.add dc_class, RDF['type'], OWL["Class"] - @model.add r, RDF['type'], dc_class - @model.add r, DC[name], value - end - - def method_missing(name, *args) - # create magic setter methods - if /=/ =~ name.to_s - create_owl_statement name.to_s.sub(/=/,''), args.first - else - raise "No method #{name}" - end - end -=end - end - end |