From fc3bda0095ca7e6968edf01bec034a243d294af4 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Mon, 14 Dec 2009 09:41:59 +0100 Subject: Initial adaptation to RDF/OWL --- Rakefile | 7 +- lib/algorithm.rb | 52 ++++----- lib/compound.rb | 19 +++- lib/dataset.rb | 239 +++++++++++++++++++++++++++++++++++++--- lib/environment.rb | 6 + lib/model.rb | 66 ++++++++--- lib/opentox-ruby-api-wrapper.rb | 6 +- lib/opentox.rb | 27 ----- lib/owl.rb | 102 +++++++++++++++++ lib/task.rb | 11 +- 10 files changed, 433 insertions(+), 102 deletions(-) delete mode 100644 lib/opentox.rb create mode 100644 lib/owl.rb diff --git a/Rakefile b/Rakefile index 1c79e15..c5cb026 100644 --- a/Rakefile +++ b/Rakefile @@ -15,16 +15,11 @@ begin gem.add_dependency "rack" gem.add_dependency "rack-contrib" gem.add_dependency "thin" - #gem.add_dependency "ezmobius-redis-rb" gem.add_dependency "emk-sinatra-url-for" gem.add_dependency "cehoffman-sinatra-respond_to" - #gem.add_dependency "dm-core" - #gem.add_dependency "datamapper" - #gem.add_dependency "do_sqlite3" gem.add_development_dependency "cucumber" gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore'] - #gem.files.include %w(lib/tasks/opentox.rb, lib/tasks/redis.rb, lib/environment.rb, lib/algorithm.rb, lib/compound.rb, lib/dataset.rb, lib/feature.rb, lib/model.rb, lib/utils.rb, lib/templates/*) - gem.files.include %w(lib/tasks/opentox.rb, lib/environment.rb, lib/algorithm.rb, lib/compound.rb, lib/dataset.rb, lib/model.rb, lib/utils.rb, lib/templates/*) + gem.files.include %w(lib/tasks/owl.rb, lib/environment.rb, lib/algorithm.rb, lib/compound.rb, lib/dataset.rb, lib/model.rb, lib/utils.rb, lib/templates/*) # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings end Jeweler::GemcutterTasks.new diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 43f7ede..a32a462 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -1,35 +1,37 @@ module OpenTox module Algorithm - class Fminer < OpenTox - # Create a new dataset with BBRC features - def self.create(params) - puts params[:dataset_uri] - uri = RestClient.post File.join(@@config[:services]["opentox-algorithm"],'fminer'), :dataset_uri => params[:dataset_uri] - print "fminer finsihed " - puts uri - uri - end - end + class Fminer #< OpenTox + include Owl - class Similarity < OpenTox - - def self.tanimoto(dataset1,compound1,dataset2,compound2) - RestClient.get File.join(@@config[:services]["opentox-algorithm"], 'tanimoto/dataset',dataset1.name,compound1.inchi,'dataset',dataset2.name,compound2.inchi) + def initialize + super + self.uri = File.join(@@config[:services]["opentox-algorithm"],'fminer') + self.title = "fminer" + self.source = "http://github.com/amaunz/libfminer" + self.parameters = { + "Dataset URI" => { :scope => "mandatory", :value => "dataset_uri" }, + "Feature URI for dependent variable" => { :scope => "mandatory", :value => "feature_uri" } + } end - - def self.weighted_tanimoto(dataset1,compound1,dataset2,compound2) - # URI.escape does not work here - uri = File.join(@@config[:services]["opentox-algorithm"], 'weighted_tanimoto/dataset',CGI.escape(dataset1.name),'compound',CGI.escape(compound1.inchi),'dataset',CGI.escape(dataset2.name),'compound',CGI.escape(compound2.inchi)) - RestClient.get uri - end - end - class Lazar < OpenTox - # Create a new prediction model from a dataset - def self.create(params) - RestClient.post File.join(@@config[:services]["opentox-algorithm"],"lazar_classification"), params + class Lazar #< OpenTox + include Owl + + def initialize + super + self.uri = File.join(@@config[:services]["opentox-algorithm"],'lazar') + self.title = "lazar" + self.source = "http://github.com/helma/opentox-algorithm" + self.parameters = { + "Dataset URI" => + { :scope => "mandatory", :value => "dataset_uri" }, + "Feature URI for dependent variable" => + { :scope => "mandatory", :value => "feature_uri" }, + "Feature generation URI" => + { :scope => "mandatory", :value => "feature_generation_uri" } + } end end diff --git a/lib/compound.rb b/lib/compound.rb index 416acab..9e1cee7 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -1,9 +1,8 @@ module OpenTox - # uri: /compound/:inchi - class Compound < OpenTox + class Compound #< OpenTox - attr_reader :inchi + attr_reader :inchi, :uri # Initialize with :uri => uri, :smiles => smiles or :name => name (name can be also an InChI/InChiKey, CAS number, etc) def initialize(params) @@ -14,6 +13,9 @@ module OpenTox elsif params[:inchi] @inchi = params[:inchi] @uri = File.join(@@config[:services]["opentox-compound"],URI.escape(@inchi)) + elsif params[:sdf] + @inchi = sdf2inchi(params[:sdf]) + @uri = File.join(@@config[:services]["opentox-compound"],URI.escape(@inchi)) elsif params[:name] @inchi = RestClient.get("#{@@cactus_uri}#{params[:name]}/stdinchi").chomp @uri = File.join(@@config[:services]["opentox-compound"],URI.escape(@inchi)) @@ -48,6 +50,10 @@ module OpenTox smarts_array.collect{|s| s if match?(s)}.compact end + def sdf2inchi(sdf) + obconversion(sdf,'sdf','inchi') + end + def smiles2inchi(smiles) obconversion(smiles,'smi','inchi') end @@ -61,7 +67,12 @@ module OpenTox obmol = OpenBabel::OBMol.new obconversion.set_in_and_out_formats input_format, output_format obconversion.read_string obmol, identifier - obconversion.write_string(obmol).gsub(/\s/,'').chomp + case output_format + when /smi|can|inchi/ + obconversion.write_string(obmol).gsub(/\s/,'').chomp + else + obconversion.write_string(obmol) + end end end end diff --git a/lib/dataset.rb b/lib/dataset.rb index fe49622..fad6baa 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -1,23 +1,184 @@ module OpenTox - class Dataset < OpenTox + class Dataset + include Owl - # Initialize with :uri => uri or :name => name (creates a new dataset) - def initialize(uri) - super(uri) + #attr_accessor :model + + def initialize + super + end + + # find or create a new compound and return the resource + def find_or_create_compound(uri) + compound = @model.subject(DC["identifier"], uri) + if compound.nil? + compound = @model.create_resource + @model.add compound, RDF['type'], OT["Compound"] + @model.add compound, DC["identifier"], uri + end + compound + end + + # find or create a new feature and return the resource + def find_or_create_feature(f) + feature = @model.subject(DC["title"], f[:name].to_s) + if feature.nil? + feature = @model.create_resource + @model.add feature, RDF['type'], OT["Feature"] + @model.add feature, DC["identifier"], File.join("feature",feature.to_s.gsub(/[()]/,'')) # relative uri as we don know the final uri + @model.add feature, DC["title"], f[:name].to_s + @model.add feature, OT['hasSource'], f[:source].to_s if f[:source] + end + feature + end + + # find or create a new value and return the resource + def find_or_create_value(v) + value = @model.subject OT["value"], v.to_s + if value.nil? + value = @model.create_resource + @model.add value, RDF['type'], OT["FeatureValue"] + @model.add value, OT["value"], v.to_s + end + value + end + + def tuple?(t) + statements = [] + has_tuple = true + t.each do |name,v| + feature = self.find_or_create_feature(:name => name) + value = self.find_or_create_value(v) + tuple = @model.subject(feature,value) + has_tuple = false if tuple.nil? + statements << [tuple,feature,value] + end + tuples_found = statements.collect{|s| s[0]}.uniq + has_tuple = false unless tuples_found.size == 1 + has_tuple end - def self.create(data) - uri = RestClient.post @@config[:services]["opentox-dataset"], data, :content_type => 'application/rdf+xml' - Dataset.new(uri.to_s) + def create_tuple(t) + tuple = @model.create_resource + @model.add tuple, RDF['type'], OT["Tuple"] + t.each do |name,value| + feature = self.find_or_create_feature(:name => name) + value = self.find_or_create_value(value) + pair = @model.create_resource + @model.add tuple, OT['tuple'], pair + @model.add pair, OT['feature'], feature + @model.add pair, OT['value'], value + end + tuple + end + + def find_or_create_tuple(t) + if self.tuple?(t) + t + else + self.create_tuple(t) + end + end + + def add_data_entry(compound,feature,value) + data_entry = @model.create_resource + @model.add data_entry, RDF['type'], OT["DataEntry"] + @model.add data_entry, OT['compound'], compound + @model.add data_entry, OT['feature'], feature + @model.add data_entry, OT['values'], value + end + + def self.create(data, content_type = 'application/rdf+xml') + uri = RestClient.post @@config[:services]["opentox-dataset"], data, :content_type => content_type + dataset = Dataset.new + dataset.read uri.to_s + dataset end def self.find(uri) - RestClient.get uri # check if the resource is available + begin + RestClient.get uri # check if the resource is available + dataset = Dataset.new + dataset.read uri.to_s + dataset + rescue + nil + end + end + + def features end - def self.base_uri - @@config[:services]["opentox-dataset"] + def feature_values(uri) + features = {} + feature = @model.subject(DC["identifier"],uri) + @model.subjects(RDF['type'], OT["Compound"]).each do |compound_node| + compound = @model.object(compound_node, DC["identifier"]).to_s.sub(/^\[(.*)\]$/,'\1') + features[compound] = [] unless features[compound] + @model.subjects(OT['compound'], compound_node).each do |data_entry| + if feature == @model.object(data_entry, OT['feature']) + values_node = @model.object(data_entry, OT['values']) + @model.find(values_node, OT['value'], nil) do |s,p,value| + case value.to_s + when "true" + features[compound] << true + when "false" + features[compound] << false + else + features[compound] << value.to_s + end + end + end + end + end + features + end + + def tuples + tuples = [] + @model.subjects(RDF['type'], OT["Tuple"]).each do |t| + tuple = {} + compounds = [] + @model.subjects(OT['values'], t).each do |data_entry| + compound_node = @model.object(data_entry,OT['compound']) + compounds << @model.object(compound_node, DC["identifier"]).to_s + end + @model.find(t, OT['tuple'],nil) do |s,p,pair| + feature_node = @model.object(pair, OT['feature']) + feature_name = @model.object(feature_node, DC['title']).to_s + value_node = @model.object(pair, OT['value']) + value = @model.object(value_node, OT['value']).to_s + value = value.to_f if value.match(/^[\d\.]+$/) + tuple[feature_name.to_sym] = value + end + tuple[:compounds] = compounds + tuples << tuple + end + tuples + end + + def tuple(compound_uri) + compound_node = @model.subject(DC["identifier"],compound_uri) + #puts compound_uri + @model.subjects(OT['compound'], compound_node).each do |data_entry| + values_node = @model.object(data_entry, OT['values']) + @model.find(values_node, OT['tuple'], nil) do |s,p,tuple| + @model.find(tuple, OT['feature'], nil) do |s,p,feature| + name = @model.object(feature,DC['title']).to_s + #puts name + end + end + #puts values_node + end + end + + def compounds + compounds = [] + @model.subjects(RDF['type'], OT["Compound"]).each do |compound_node| + compounds << @model.object(compound_node, DC["identifier"])#.to_s.sub(/^\[(.*)\]$/,'\1') + end + compounds end # Delete a dataset @@ -25,6 +186,15 @@ module OpenTox RestClient.delete @uri end + def save + RestClient.post(@@config[:services]["opentox-dataset"], self.rdf, :content_type => "application/rdf+xml").to_s + end + + end + +end + + # def tanimoto(dataset) # RestClient.get(File.join(@uri,'tanimoto',dataset.path)) # end @@ -32,7 +202,48 @@ module OpenTox # def weighted_tanimoto(dataset) # RestClient.get(File.join(@uri,'weighted_tanimoto',dataset.path)) # end - - end - -end +=begin + def data_entries + data = {} + @model.subjects(RDF['type'], OT["Compound"]).each do |compound_node| + compound = @model.object(compound_node, DC["identifier"]).to_s#.sub(/^\[(.*)\]$/,'\1') + #compound = OpenTox::Compound.new(:inchi => compound).smiles + data[compound] = [] unless data[compound] + #puts compound + @model.subjects(OT['compound'], compound_node).each do |data_entry| + feature_node = @model.object(data_entry, OT['feature']) + feature = @model.object(feature_node, DC["identifier"]).to_s + values_node = @model.object(data_entry, OT['values']) + type = @model.object(values_node,RDF['type']).to_s + case type + when /FeatureValue/ + @model.find(values_node, OT['value'], nil) do |s,p,value| + case value.to_s + when "true" + data[compound] << {feature => true} + when "false" + data[compound] << {feature => false} + else + data[compound] << {feature => value.to_s} + end + end + when /Tuple/ # this is really slow + t = {} + @model.find(values_node, OT['tuple'], nil) do |s,p,tuple| + @model.find(tuple, OT['feature'], nil) do |s,p,feature| + @name = @model.object(feature,DC['title']).to_s + end + @model.find(tuple, OT['value'], nil) do |s,p,value| + v = @model.object(value,OT['value']).to_s + t[@name] = v + #print @name + ": " + #puts v + end + end + data[compound] << t + end + end + end + data + end +=end diff --git a/lib/environment.rb b/lib/environment.rb index 7ce6c7e..6100928 100644 --- a/lib/environment.rb +++ b/lib/environment.rb @@ -17,6 +17,12 @@ else exit end +# RDF namespaces +RDF = Redland::Namespace.new 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' +OWL = Redland::Namespace.new 'http://www.w3.org/2002/07/owl#' +DC = Redland::Namespace.new 'http://purl.org/dc/elements/1.1/' +OT = Redland::Namespace.new 'http://www.opentox.org/api/1.1#' + # configure redis database =begin begin diff --git a/lib/model.rb b/lib/model.rb index 4b36aad..50d6bea 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -1,38 +1,69 @@ module OpenTox - - module Model - - class LazarClassification < OpenTox - + module Model + class Lazar + include Owl + # Create a new prediction model from a dataset - def initialize(uri) - super(uri) + def initialize + super end - def self.create(params) - uri = RestClient.post File.join(@@config[:services]["opentox-model"], 'lazar_classification'), params - puts "URI: " + uri - LazarClassification.new(uri.to_s) + def read_yaml(id,yaml) + @lazar = YAML.load yaml + self.identifier = File.join(@@config[:services]["opentox-model"],'lazar',id) + self.title = "lazar model for #{@lazar[:endpoint]}" + self.source = "http://github.com/helma/opentox-model" + self.parameters = { + "Dataset URI" => { :scope => "mandatory", :value => "dataset_uri=#{@lazar[:activity_dataset]}" }, + "Feature URI for dependent variable" => { :scope => "mandatory", :value => "feature_uri=#{@lazar[:endpoint]}" }, + "Feature generation URI" => { :scope => "mandatory", :value => "feature_generation_uri=" } #TODO write to yaml + } + self.algorithm = File.join(@@config[:services]["opentox-model"],"lazar") + self.trainingDataset = @lazar[:activity_dataset] + self.dependentVariables = @lazar[:endpoint] + self.predictedVariables = @lazar[:endpoint] + " lazar prediction" end - def self.find(name) - uri = RestClient.get File.join(@@config[:services]["opentox-model"], 'lazar_classification', URI.encode(params[:name])) - LazarClassification.new(uri) + def self.find(uri) + begin + YAML.load(RestClient.get uri) + Lazar.new uri + rescue + halt 404, "Model #{uri} not found." + end end def self.find_all - RestClient.get File.join(@@config[:services]["opentox-model"], 'lazar_classification')#.split("\n") + RestClient.get(@@config[:services]["opentox-model"]).split("\n") end - + # Predict a compound def predict(compound) - LazarPrediction.new(:uri => RestClient.post(@uri, :compound_uri => compound.uri)) + RestClient.post(@uri, :compound_uri => compound.uri) end def self.base_uri @@config[:services]["opentox-model"] end + def self.create(data) + RestClient.post(@@config[:services]["opentox-model"], data, :content_type => "application/x-yaml").to_s + end + + def endpoint + YAML.load(RestClient.get uri)[:endpoint] + end + + end + end + + +=begin + module Model + + class LazarClassification < OpenTox + + end end @@ -68,4 +99,5 @@ module OpenTox end end +=end end diff --git a/lib/opentox-ruby-api-wrapper.rb b/lib/opentox-ruby-api-wrapper.rb index a55b59e..ca57fc9 100644 --- a/lib/opentox-ruby-api-wrapper.rb +++ b/lib/opentox-ruby-api-wrapper.rb @@ -1,5 +1,4 @@ -#['rubygems', 'sinatra', 'sinatra/url_for', 'redis','builder', 'rest_client', 'yaml', 'cgi', 'spork', 'environment'].each do |lib| -['rubygems', 'sinatra', 'sinatra/url_for', 'builder', 'rest_client', 'yaml', 'cgi', 'spork', 'environment'].each do |lib| +['rubygems', 'sinatra', 'sinatra/url_for', 'rest_client', 'yaml', 'cgi', 'spork', 'redland', 'rdf/redland', 'rdf/redland/util', 'environment'].each do |lib| require lib end @@ -9,7 +8,6 @@ rescue LoadError puts "Please install Openbabel with 'rake openbabel:install' in the compound component" end -#['opentox', 'compound','feature','dataset','algorithm','model','task','utils'].each do |lib| -['opentox', 'compound','dataset','algorithm','model','task','utils'].each do |lib| +['owl', 'compound','dataset','algorithm','model','task','utils'].each do |lib| require lib end diff --git a/lib/opentox.rb b/lib/opentox.rb deleted file mode 100644 index 9b1226b..0000000 --- a/lib/opentox.rb +++ /dev/null @@ -1,27 +0,0 @@ -module OpenTox - - class OpenTox - attr_accessor :uri - - def initialize(uri) - @uri = uri - end - - # Get the object name - def name - RestClient.get @uri + '/name' - end - - # Deletes an object - def destroy - RestClient.delete @uri - end - - # Object path without hostname - def path - URI.split(@uri)[5] - end - - end - -end diff --git a/lib/owl.rb b/lib/owl.rb new file mode 100644 index 0000000..c858fec --- /dev/null +++ b/lib/owl.rb @@ -0,0 +1,102 @@ +module OpenTox + + module Owl + + attr_reader :uri, :model + + def initialize + + @model = Redland::Model.new Redland::MemoryStore.new + @parser = Redland::Parser.new + @serializer = Redland::Serializer.ntriples + + # explicit typing + # this should come from http://opentox.org/data/documents/development/RDF%20files/OpenToxOntology/at_download/file (does not pass OWL-DL validation) + @model.add @uri, RDF['type'], OWL['Ontology'] + # annotation properties + @model.add DC['source'], RDF['type'], OWL["AnnotationProperty"] + @model.add DC['identifier'], RDF['type'], OWL["AnnotationProperty"] + @model.add DC['title'], RDF['type'], OWL["AnnotationProperty"] + # object properties + @model.add OT['feature'], RDF['type'], OWL["ObjectProperty"] + @model.add OT['compound'], RDF['type'], OWL["ObjectProperty"] + @model.add OT['values'], RDF['type'], OWL["ObjectProperty"] + @model.add OT['tuple'], RDF['type'], OWL["ObjectProperty"] # added by ch + @model.add OT['parameters'], RDF['type'], OWL["ObjectProperty"] + # datatype properties + @model.add OT['value'], RDF['type'], OWL["DatatypeProperty"] + @model.add OT['paramValue'], RDF['type'], OWL["DatatypeProperty"] + @model.add OT['paramScope'], RDF['type'], OWL["DatatypeProperty"] + @model.add OT['hasSource'], RDF['type'], OWL["DatatypeProperty"] + # classes + @model.add OT['Dataset'], RDF['type'], OWL["Class"] + @model.add OT['FeatureValue'], RDF['type'], OWL["Class"] + @model.add OT['Tuple'], RDF['type'], OWL["Class"] # added by ch + @model.add OT['Feature'], RDF['type'], OWL["Class"] + @model.add OT['Compound'], RDF['type'], OWL["Class"] + @model.add OT['DataEntry'], RDF['type'], OWL["Class"] + @model.add OT['Parameter'], RDF['type'], OWL["Class"] + @model.add OT['Algorithm'], RDF['type'], OWL["Class"] + end + + def owl_class + self.class.to_s.sub(/^OpenTox::/,'') + #@model.subject RDF['type'], OT[self.class.to_s.sub(/^OpenTox::/,'')] + end + + def read(uri) + @parser.parse_into_model(@model,uri) + @uri = uri + end + + def rdf=(rdf) + @uri = '/' unless @uri + @parser.parse_string_into_model(@model,rdf,@uri) + end + + def rdf + @model.to_string + end + + def uri=(uri) + identifier = uri + end + + def to_ntriples + @serializer.model_to_string(Redland::Uri.new(@uri), @model) + end + + def title + puts OT[self.owl_class] + @model.object(OT[self.owl_class], DC['title']).to_s + end + + def parameters=(params) + params.each do |name, settings| + parameter = @model.create_resource + @model.add parameter, RDF['type'], OT['Parameter'] + @model.add parameter, DC['title'], name + @model.add parameter, OT['paramScope'], settings[:scope] + @model.add parameter, OT['paramValue'], settings[:value] + end + end + + def create_owl_statement(name,value) + r = @model.create_resource + @model.add r, RDF['type'], DC[name.gsub(/^[a-z]/) { |a| a.upcase }] # capitalize only the first letter + @model.add r, DC[name], value + end + + def method_missing(name, *args) + # create magic setter methods + if /=/ =~ name.to_s + puts "create_owl_statement #{name.to_s.sub(/=/,'')}, #{args.first}" + create_owl_statement name.to_s.sub(/=/,''), args.first + else + raise "No method #{name}" + end + end + + end + +end diff --git a/lib/task.rb b/lib/task.rb index 74c3fb8..079d387 100644 --- a/lib/task.rb +++ b/lib/task.rb @@ -1,15 +1,16 @@ module OpenTox - class Task < OpenTox - #private :new + class Task #< OpenTox def initialize(uri) super(uri) end - def self.create(params) - uri = RestClient.post @@config[:services]["opentox-task"], :resource_uri => params[:resource_uri] - Task.new uri + #def self.create(uri) + def self.create + puts @@config[:services]["opentox-task"] + uri = RestClient.post @@config[:services]["opentox-task"], ''#, :dataset_uri => uri + Task.new(uri) end def self.find(params) -- cgit v1.2.3