From 1c74a684b8b2992d7e6fce5f1a4677d397ec4dd6 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Sat, 18 Jul 2015 08:52:39 +0200 Subject: dataset tests pass --- lib/compound.rb | 55 +++++---- lib/dataset.rb | 334 ++++++++++++++++++++------------------------------ lib/feature.rb | 15 ++- lib/opentox-client.rb | 18 ++- lib/opentox.rb | 179 ++++----------------------- lib/overwrite.rb | 2 + lib/task.rb | 50 +++++--- 7 files changed, 250 insertions(+), 403 deletions(-) (limited to 'lib') diff --git a/lib/compound.rb b/lib/compound.rb index 82ea94e..b588c75 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -1,17 +1,15 @@ CACTUS_URI="http://cactus.nci.nih.gov/chemical/structure/" +require 'openbabel' module OpenTox # Ruby wrapper for OpenTox Compound Webservices (http://opentox.org/dev/apis/api-1.2/structure). class Compound - def initialize uri - @data = {} - @data["uri"] = uri - end + attr_reader :inchi - def ==(c) - @data["uri"] == c.uri + def initialize inchi + @inchi = inchi end # Create a compound from smiles string @@ -20,21 +18,21 @@ module OpenTox # @param [String] smiles Smiles string # @return [OpenTox::Compound] Compound def self.from_smiles smiles - Compound.new RestClientWrapper.post(service_uri, smiles, {:content_type => 'chemical/x-daylight-smiles'}) + OpenTox::Compound.new obconversion(smiles,"smi","inchi") end # Create a compound from inchi string # @param inchi [String] smiles InChI string # @return [OpenTox::Compound] Compound def self.from_inchi inchi - Compound.new RestClientWrapper.post(service_uri, inchi, {:content_type => 'chemical/x-inchi'}) + OpenTox::Compound.new inchi end # Create a compound from sdf string # @param sdf [String] smiles SDF string # @return [OpenTox::Compound] Compound def self.from_sdf sdf - Compound.new RestClientWrapper.post(service_uri, sdf, {:content_type => 'chemical/x-mdl-sdfile'}) + OpenTox::Compound.new obconversion(sdf,"sdf","inchi") end # Create a compound from name. Relies on an external service for name lookups. @@ -43,32 +41,25 @@ module OpenTox # @param name [String] can be also an InChI/InChiKey, CAS number, etc # @return [OpenTox::Compound] Compound def self.from_name name - @inchi = RestClientWrapper.get File.join(CACTUS_URI,URI.escape(name),"stdinchi") - Compound.new RestClientWrapper.post(service_uri, @inchi, {:content_type => 'chemical/x-inchi'}) - end - - # Get InChI - # @return [String] InChI string - def inchi - @inchi ||= RestClientWrapper.get(@data["uri"],{},{:accept => 'chemical/x-inchi'}).chomp + OpenTox::Compound.new RestClientWrapper.get File.join(CACTUS_URI,URI.escape(name),"stdinchi") end # Get InChIKey # @return [String] InChI string def inchikey - @inchikey ||= RestClientWrapper.get(@data["uri"],{},{:accept => 'chemical/x-inchikey'}).chomp + obconversion(@inchi,"inchi","inchikey") end # Get (canonical) smiles # @return [String] Smiles string def smiles - @smiles ||= RestClientWrapper.get(@data["uri"],{},{:accept => 'chemical/x-daylight-smiles'}).chomp + obconversion(@inchi,"inchi","smi") # "can" gives nonn-canonical smiles?? end # Get sdf # @return [String] SDF string def sdf - RestClientWrapper.get(@data["uri"],{},{:accept => 'chemical/x-mdl-sdfile'}).chomp + obconversion(@inchi,"inchi","sdf") end # Get gif image @@ -82,14 +73,16 @@ module OpenTox # image = compound.png # @return [image/png] Image data def png - RestClientWrapper.get(File.join @data["uri"], "image") + obconversion(@inchi,"inchi","_png2") end +=begin # Get URI of compound image # @return [String] Compound image URI def image_uri File.join @data["uri"], "image" end +=end # Get all known compound names. Relies on an external service for name lookups. # @example @@ -116,5 +109,25 @@ module OpenTox uri = "http://www.ebi.ac.uk/chemblws/compounds/smiles/#{smiles}.json" @chemblid = JSON.parse(RestClientWrapper.get(uri))["compounds"].first["chemblId"] end + + private + + def self.obconversion(identifier,input_format,output_format,option=nil) + obconversion = OpenBabel::OBConversion.new + obconversion.set_options(option, OpenBabel::OBConversion::OUTOPTIONS) if option + obmol = OpenBabel::OBMol.new + obconversion.set_in_and_out_formats input_format, output_format + obconversion.read_string obmol, identifier + case output_format + when /smi|can|inchi/ + obconversion.write_string(obmol).gsub(/\s/,'').chomp + else + obconversion.write_string(obmol) + end + end + + def obconversion(identifier,input_format,output_format,option=nil) + self.class.obconversion(identifier,input_format,output_format,option=nil) + end end end diff --git a/lib/dataset.rb b/lib/dataset.rb index 1c938da..bcbacb2 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -4,95 +4,48 @@ module OpenTox # Ruby wrapper for OpenTox Dataset Webservices (http://opentox.org/dev/apis/api-1.2/dataset). class Dataset - - def initialize uri=nil - super uri - @data["features"] ||= [] - @data["compounds"] ||= [] - @data["data_entries"] ||= [] + include Mongoid::Document + + field :feature_ids, type: Array + field :inchis, type: Array + field :data_entries, type: Array + field :warnings, type: Array + field :source, type: String + + def initialize + super + self.feature_ids = [] + self.inchis = [] + self.data_entries = [] + self.warnings = [] end - def data_entries - @data["data_entries"] + # Readers + + def compounds + inchis.collect{|i| OpenTox::Compound.new i} end - [:features, :compounds, :data_entries].each do |method| - send :define_method, "#{method}=" do |value| - @data[method.to_s] = value.collect{|v| v.uri} - end - send :define_method, "#{method}<<" do |value| - @data[method.to_s] << value.uri - end + def features + self.feature_ids.collect{|id| OpenTox::Feature.find(id)} end - # Get data (lazy loading from dataset service) - # overrides {OpenTox#metadata} to only load the metadata instead of the whole dataset - # @return [Hash] the metadata - def metadata force_update=false - if @data.empty? or force_update - uri = File.join(@data["uri"],"metadata") - #begin - @data = JSON.parse RestClientWrapper.get(uri,{},{:accept => "application/json"}) - #parse_ntriples RestClientWrapper.get(uri,{},{:accept => "text/plain"}) - #rescue # fall back to rdfxml - #parse_rdfxml RestClientWrapper.get(uri,{},{:accept => "application/rdf+xml"}) - #end - #@data = @rdf.to_hash[RDF::URI.new(@data["uri"])].inject({}) { |h, (predicate, values)| h[predicate] = values.collect{|v| v.to_s}; h } - end - @data + # Writers + + def compounds=(compounds) + self.inchis = compounds.collect{|c| c.inchi} end - # @return [Array] feature objects (NOT uris) - def features force_update=false - if @data["features"].empty? or force_update - uri = File.join(@data["uri"],"features") - begin - uris = JSON.parse RestClientWrapper.get(uri,{},{:accept => "application/json"}) # ordered datasets return ordered features - rescue - uris = [] - end - @data["features"] = uris#.collect{|uri| Feature.new(uri)} - end - @data["features"].collect{|uri| Feature.new uri} + def add_compound(compound) + self.inchis << compound.id end - # @return [Array] compound objects (NOT uris) - def compounds force_update=false - if @data["compounds"].empty? or force_update - uri = File.join(@data["uri"],"compounds") - begin - uris = JSON.parse RestClientWrapper.get(uri,{},{:accept => "application/json"}) # ordered datasets return ordered compounds - rescue - uris = [] - end - @data["compounds"] = uris - end - @data["compounds"].collect{|uri| Compound.new(uri)} + def features=(features) + self.feature_ids = features.collect{|f| f.id} end - # @return [Array] with two dimensions, - # first index: compounds, second index: features, values: compound feature values - def data_entries force_update=false - if @data["data_entries"].empty? or force_update - sparql = "SELECT ?cidx ?fidx ?value FROM <#{uri}> WHERE { - ?data_entry <#{RDF::OLO.index}> ?cidx ; - <#{RDF::OT.values}> ?v . - ?v <#{RDF::OT.feature}> ?f; - <#{RDF::OT.value}> ?value . - ?f <#{RDF::OLO.index}> ?fidx. - } ORDER BY ?fidx ?cidx" - RestClientWrapper.get(service_uri,{:query => sparql},{:accept => "text/uri-list"}).split("\n").each do |row| - r,c,v = row.split("\t") - @data["data_entries"][r.to_i] ||= [] - # adjust value class depending on feature type, StringFeature takes precedence over NumericFeature - if features[c.to_i][RDF.type].include? RDF::OT.NumericFeature and ! features[c.to_i][RDF.type].include? RDF::OT.StringFeature - v = v.to_f if v - end - @data["data_entries"][r.to_i][c.to_i] = v if v - end - # TODO: fallbacks for external and unordered datasets - end - @data["data_entries"] + def add_feature(feature) + self.feature_ids << feature.id end # Find data entry values for a given compound and feature @@ -100,8 +53,8 @@ module OpenTox # @param feature [OpenTox::Feature] OpenTox Feature object # @return [Array] Data entry values def values(compound, feature) - rows = (0 ... compounds.length).select { |r| compounds[r].uri == compound.uri } - col = features.collect{|f| f.uri}.index feature.uri + rows = (0 ... inchis.length).select { |r| inchis[r].uri == compound.uri } + col = feature_ids.collect{|f| f.uri}.index feature.uri rows.collect{|row| data_entries[row][col]} end @@ -122,7 +75,7 @@ module OpenTox end # for prediction result datasets - # assumes that there are features with title prediction and confidence + # assumes that there are feature_ids with title prediction and confidence # @return [Array] of Hashes with keys { :compound, :value ,:confidence } (compound value is object not uri) def predictions predictions = [] @@ -149,18 +102,17 @@ module OpenTox end # Adding data methods - # (Alternatively, you can directly change @data["features"] and @data["compounds"]) + # (Alternatively, you can directly change @data["feature_ids"] and @data["compounds"]) # Create a dataset from file (csv,sdf,...) # @param filename [String] # @return [String] dataset uri def upload filename, wait=true - uri = RestClientWrapper.put(@data["uri"], {:file => File.new(filename)}) - wait_for_task uri if URI.task?(uri) and wait - compounds true - features true - metadata true - @data["uri"] + self.title = File.basename(filename) + self.source = filename + table = CSV.read filename, :skip_blanks => true + from_table table + save end # @param compound [OpenTox::Compound] @@ -182,6 +134,7 @@ module OpenTox end end + # TODO: remove? might be dangerous if feature ordering is incorrect # MG: I would not remove this because add_data_entry is very slow (4 times searching in arrays) # CH: do you have measurements? compound and feature arrays are not that big, I suspect that feature search/creation is the time critical step @@ -193,11 +146,11 @@ module OpenTox # d << [ Compound.new("c1ccccc1"), feature-value-a, feature-value-b ] def << row compound = row.shift # removes the compound from the array - bad_request_error "Dataset features are empty." unless @data["features"] - bad_request_error "Row size '#{row.size}' does not match features size '#{@data["features"].size}'." unless row.size == @data["features"].size + bad_request_error "Dataset features are empty." unless feature_ids + bad_request_error "Row size '#{row.size}' does not match features size '#{feature_ids.size}'." unless row.size == feature_ids.size bad_request_error "First column is not a OpenTox::Compound" unless compound.class == OpenTox::Compound - @data["compounds"] << compound.uri - @data["data_entries"] << row + self.inchis << compound.inchi + self.data_entries << row end # Serialisation @@ -213,107 +166,6 @@ module OpenTox end end -=begin - RDF_FORMATS.each do |format| - - # redefine rdf parse methods for all formats e.g. parse_rdfxml - send :define_method, "parse_#{format}".to_sym do |rdf| - # TODO: parse ordered dataset - # TODO: parse data entries - # TODO: parse metadata - @rdf = RDF::Graph.new - RDF::Reader.for(format).new(rdf) do |reader| - reader.each_statement{ |statement| @rdf << statement } - end - query = RDF::Query.new({ :uri => { RDF.type => RDF::OT.Compound } }) - @data["compounds"] = query.execute(@rdf).collect { |solution| OpenTox::Compound.new solution.uri } - query = RDF::Query.new({ :uri => { RDF.type => RDF::OT.Feature } }) - @data["features"] = query.execute(@rdf).collect { |solution| OpenTox::Feature.new solution.uri } - @data["compounds"].each_with_index do |c,i| - @data["features"].each_with_index do |f,j| - end - end - end - - - # redefine rdf serialization methods - send :define_method, "to_#{format}".to_sym do - @data[RDF.type] = [RDF::OT.Dataset, RDF::OT.OrderedDataset] - create_rdf - @data["features"].each_with_index do |feature,i| - @rdf << [RDF::URI.new(feature.uri), RDF::URI.new(RDF.type), RDF::URI.new(RDF::OT.Feature)] - @rdf << [RDF::URI.new(feature.uri), RDF::URI.new(RDF::OLO.index), RDF::Literal.new(i)] - end - @data["compounds"].each_with_index do |compound,i| - @rdf << [RDF::URI.new(compound.uri), RDF::URI.new(RDF.type), RDF::URI.new(RDF::OT.Compound)] - if defined? @neighbors and neighbors.include? compound - @rdf << [RDF::URI.new(compound.uri), RDF::URI.new(RDF.type), RDF::URI.new(RDF::OT.Neighbor)] - end - - @rdf << [RDF::URI.new(compound.uri), RDF::URI.new(RDF::OLO.index), RDF::Literal.new(i)] - data_entry_node = RDF::Node.new - @rdf << [RDF::URI.new(@data["uri"]), RDF::URI.new(RDF::OT.dataEntry), data_entry_node] - @rdf << [data_entry_node, RDF::URI.new(RDF.type), RDF::URI.new(RDF::OT.DataEntry)] - @rdf << [data_entry_node, RDF::URI.new(RDF::OLO.index), RDF::Literal.new(i)] - @rdf << [data_entry_node, RDF::URI.new(RDF::OT.compound), RDF::URI.new(compound.uri)] - @data["data_entries"][i].each_with_index do |value,j| - value_node = RDF::Node.new - @rdf << [data_entry_node, RDF::URI.new(RDF::OT.values), value_node] - @rdf << [value_node, RDF::URI.new(RDF::OT.feature), RDF::URI.new(@data["features"][j].uri)] - @rdf << [value_node, RDF::URI.new(RDF::OT.value), RDF::Literal.new(value)] - end - end - RDF::Writer.for(format).buffer do |writer| - writer << @rdf - end - end - - end -=end - -# TODO: fix bug that affects data_entry positions # DG: who wrotes this comment ? - def to_ntriples # redefined string version for better performance - ntriples = "" - @data[RDF.type] = [ RDF::OT.Dataset, RDF::OT.OrderedDataset ] - @data.each do |predicate,values| - [values].flatten.each do |value| - URI.valid?(value) ? value = "<#{value}>" : value = "\"#{value}\"" - ntriples << "<#{@data["uri"]}> <#{predicate}> #{value} .\n" #\n" - end - end - @parameters.each_with_index do |parameter,i| - p_node = "_:parameter"+ i.to_s - ntriples << "<#{@data["uri"]}> <#{RDF::OT.parameters}> #{p_node} .\n" - ntriples << "#{p_node} <#{RDF.type}> <#{RDF::OT.Parameter}> .\n" - parameter.each { |k,v| ntriples << "#{p_node} <#{k}> \"#{v.to_s.tr('"', '\'')}\" .\n" } - end - @data["features"].each_with_index do |feature,i| - ntriples << "<#{feature.uri}> <#{RDF.type}> <#{RDF::OT.Feature}> .\n" - ntriples << "<#{feature.uri}> <#{RDF::OLO.index}> \"#{i}\"^^ .\n" # sorting at dataset service does not work without type information - end - @data["compounds"].each_with_index do |compound,i| - ntriples << "<#{compound.uri}> <#{RDF.type}> <#{RDF::OT.Compound}> .\n" - if defined? @neighbors and neighbors.include? compound - ntriples << "<#{compound.uri}> <#{RDF.type}> <#{RDF::OT.Neighbor}> .\n" - end - - ntriples << "<#{compound.uri}> <#{RDF::OLO.index}> \"#{i}\"^^ .\n" # sorting at dataset service does not work without type information - data_entry_node = "_:dataentry"+ i.to_s - ntriples << "<#{@data["uri"]}> <#{RDF::OT.dataEntry}> #{data_entry_node} .\n" - ntriples << "#{data_entry_node} <#{RDF.type}> <#{RDF::OT.DataEntry}> .\n" - ntriples << "#{data_entry_node} <#{RDF::OLO.index}> \"#{i}\"^^ .\n" # sorting at dataset service does not work without type information - ntriples << "#{data_entry_node} <#{RDF::OT.compound}> <#{compound.uri}> .\n" - @data["data_entries"][i].each_with_index do |value,j| - value_node = data_entry_node+ "_value"+ j.to_s - ntriples << "#{data_entry_node} <#{RDF::OT.values}> #{value_node} .\n" - ntriples << "#{value_node} <#{RDF::OT.feature}> <#{@data["features"][j].uri}> .\n" - ntriples << "#{value_node} <#{RDF::OT.value}> \"#{value}\" .\n" - end unless @data["data_entries"][i].nil? - end - ntriples - - end - # Methods for for validation service # create a new dataset with the specified compounds and features @@ -350,32 +202,32 @@ module OpenTox # @param dataset [OpenTox::Dataset] dataset that should be mapped to this dataset (fully loaded) # @param compound_index [Fixnum], corresponding to dataset def compound_index( dataset, compound_index ) - compound_uri = dataset.compounds[compound_index]#.uri - self_indices = compound_indices(compound_uri) + compound_inchi = dataset.compounds[compound_index].inchi + self_indices = compound_indices(compound_inchi) if self_indices==nil nil else - dataset_indices = dataset.compound_indices(compound_uri) + dataset_indices = dataset.compound_indices(compound_inchi) if self_indices.size==1 self_indices.first elsif self_indices.size==dataset_indices.size # we do assume that the order is preseverd (i.e., the nth occurences in both datasets are mapped to each other)! self_indices[dataset_indices.index(compound_index)] else - raise "cannot map compound #{compound_uri} from dataset #{dataset.uri} to dataset #{uri}, "+ + raise "cannot map compound #{compound_inchi} from dataset #{dataset.id} to dataset #{self.id}, "+ "compound occurs #{dataset_indices.size} times and #{self_indices.size} times" end end end # returns the inidices of the compound in the dataset - # @param compound_uri [String] + # @param compound_inchi [String] # @return [Array] compound index (position) of the compound in the dataset, array-size is 1 unless multiple occurences - def compound_indices( compound_uri ) - unless defined?(@cmp_indices) and @cmp_indices.has_key?(compound_uri) + def compound_indices( compound_inchi ) + unless defined?(@cmp_indices) and @cmp_indices.has_key?(compound_inchi) @cmp_indices = {} compounds().size.times do |i| - c = @data["compounds"][i]#.uri + c = self.compounds[i].inchi if @cmp_indices[c]==nil @cmp_indices[c] = [i] else @@ -383,7 +235,7 @@ module OpenTox end end end - @cmp_indices[compound_uri] + @cmp_indices[compound_inchi] end # returns compound feature value using the compound-index and the feature_uri @@ -392,6 +244,84 @@ module OpenTox col = @data["features"].collect{|f| f.uri}.index feature_uri @data["data_entries"][compound_index] ? @data["data_entries"][compound_index][col] : nil end - end + def from_table table + + # features + feature_names = table.shift.collect{|f| f.strip} + self.warnings << "Duplicate features in table header." unless feature_names.size == feature_names.uniq.size + compound_format = feature_names.shift.strip + bad_request_error "#{compound_format} is not a supported compound format. Accepted formats: SMILES, InChI." unless compound_format =~ /SMILES|InChI/i + ignored_feature_indices = [] + numeric = [] + feature_names.each_with_index do |f,i| + values = table.collect{|row| val=row[i+1].to_s.strip; val.blank? ? nil : val }.uniq.compact + types = values.collect{|v| v.numeric? ? true : false}.uniq + metadata = {"title" => f} + if values.size == 0 # empty feature + elsif values.size > 5 and types.size == 1 and types.first == true # 5 max classes + metadata["numeric"] = true + numeric[i] = true + else + metadata["nominal"] = true + metadata["string"] = true + metadata["accept_values"] = values + numeric[i] = false + end + feature = OpenTox::Feature.find_or_create_by metadata + self.feature_ids << feature.id unless feature.nil? + end + + # compounds and values + r = -1 + table.each_with_index do |values,j| + compound = values.shift + begin + case compound_format + when /SMILES/i + c = OpenTox::Compound.from_smiles(compound) + if c.inchi.empty? + self.warnings << "Cannot parse #{compound_format} compound '#{compound.strip}' at position #{j+2}, all entries are ignored." + next + else + inchi = c.inchi + end + when /InChI/i + # TODO validate inchi + inchi = compound + else + raise "wrong compound format" #should be checked above + end + rescue + self.warnings << "Cannot parse #{compound_format} compound '#{compound}' at position #{j+2}, all entries are ignored." + next + end + + r += 1 + self.inchis << inchi + unless values.size == self.feature_ids.size + self.warnings << "Number of values at position #{j+2} (#{values.size}) is different than header size (#{self.feature_ids.size}), all entries are ignored." + next + end + + self.data_entries << [] + values.each_with_index do |v,i| + if v.blank? + self.data_entries.last << nil + self.warnings << "Empty value for compound '#{compound}' (row #{r+2}) and feature '#{feature_names[i]}' (column #{i+2})." + next + elsif numeric[i] + self.data_entries.last << v.to_f + else + self.data_entries.last << v.strip + end + end + end + self.inchis.duplicates.each do |inchi| + positions = [] + self.inchis.each_with_index{|c,i| positions << i+1 if !c.blank? and c == inchi} + self.warnings << "Duplicate compound #{inchi} at rows #{positions.join(', ')}. Entries are accepted, assuming that measurements come from independent experiments." + end + end + end end diff --git a/lib/feature.rb b/lib/feature.rb index 5d3d962..43cf7e9 100644 --- a/lib/feature.rb +++ b/lib/feature.rb @@ -2,13 +2,18 @@ module OpenTox class Feature + field :string, type: Boolean, default: false + field :nominal, type: Boolean, default: false + field :numeric, type: Boolean, default: false + field :accept_values, type: Array + # Find out feature type # Classification takes precedence # @return [String] Feature type def feature_type - if self[RDF.type].include?(RDF::OT.NominalFeature) + if nominal "classification" - elsif self[RDF.type].include?(RDF::OT.NumericFeature) + elsif numeric "regression" else "unknown" @@ -18,9 +23,9 @@ module OpenTox # Get accept values # # @return[Array] Accept values - def accept_values - self[RDF::OT.acceptValue] ? self[RDF::OT.acceptValue].sort : nil - end + #def accept_values + #self[RDF::OT.acceptValue] ? self[RDF::OT.acceptValue].sort : nil + #end # Create value map # @param [OpenTox::Feature] Feature diff --git a/lib/opentox-client.rb b/lib/opentox-client.rb index 9f3373d..6358705 100644 --- a/lib/opentox-client.rb +++ b/lib/opentox-client.rb @@ -9,8 +9,7 @@ require 'yaml' require 'json' require 'logger' require "securerandom" -require 'mongo' -require 'bson' +require 'mongoid' default_config = File.join(ENV["HOME"],".opentox","config","default.rb") client_config = File.join(ENV["HOME"],".opentox","config","opentox-client.rb") @@ -18,6 +17,9 @@ client_config = File.join(ENV["HOME"],".opentox","config","opentox-client.rb") puts "Could not find configuration files #{default_config} or #{client_config}" unless File.exist? default_config or File.exist? client_config require default_config if File.exist? default_config require client_config if File.exist? client_config +# TODO switch to production +ENV["MONGOID_ENV"] = "development" +Mongoid.load!("#{ENV['HOME']}/.opentox/config/mongoid.yml") # define constants and global variables #RDF::OT = RDF::Vocabulary.new 'http://www.opentox.org/api/1.2#' @@ -28,7 +30,8 @@ require client_config if File.exist? client_config #RDF::ISA = RDF::Vocabulary.new "http://onto.toxbank.net/isa/" #RDF::OWL = RDF::Vocabulary.new "http://www.w3.org/2002/07/owl#" -CLASSES = ["Compound", "Feature", "Dataset", "Validation", "Task", "Investigation"] +#CLASSES = ["Compound", "Feature", "Dataset", "Validation", "Task", "Investigation"] +CLASSES = ["Feature", "Dataset", "Validation", "Task", "Investigation"] #RDF_FORMATS = [:rdfxml,:ntriples,:turtle] # Regular expressions for parsing classification data @@ -57,3 +60,12 @@ FALSE_REGEXP = /^(false|inactive|0|0.0|low tox|deactivating|non-carcinogen|non-m # unauthorized_error "Failed to authenticate user \"#{$aa[:user]}\"." unless OpenTox::Authorization.is_token_valid(OpenTox::RestClientWrapper.subjectid) #end +# defaults to stderr, may be changed to file output (e.g in opentox-service) +$logger = OTLogger.new(STDERR) +$logger.level = Logger::DEBUG +#Mongo::Logger.logger = $logger +Mongo::Logger.level = Logger::WARN +#$mongo = Mongo::Client.new($mongodb[:uri]) +Mongoid.logger.level = Logger::WARN +Mongoid.logger = $logger +#Moped.logger = $logger diff --git a/lib/opentox.rb b/lib/opentox.rb index 9be6078..96cefe0 100644 --- a/lib/opentox.rb +++ b/lib/opentox.rb @@ -1,172 +1,41 @@ -# defaults to stderr, may be changed to file output (e.g in opentox-service) -$logger = OTLogger.new(STDERR) -$logger.level = Logger::DEBUG -Mongo::Logger.logger = $logger -Mongo::Logger.logger.level = Logger::WARN -$mongo = Mongo::Client.new($mongodb[:uri]) - module OpenTox # Ruby interface - attr_accessor :data - - # Create a new OpenTox object - # @param id [optional,String] ID - # @return [OpenTox] OpenTox object - def initialize - @data = {} - @data["type"] = type - end - - def created_at - @data["_id"].generation_time - end - - # Object metadata (lazy loading) - # @return [Hash] Object metadata - def metadata - get if exists? - @data - end - - # Metadata values - # @param predicate [String] Predicate URI - # @return [Array, String] Predicate value(s) - def [](predicate) - predicate = predicate.to_s - return nil if @data[predicate].nil? - @data[predicate].size == 1 ? @data[predicate].first : @data[predicate] - end - - # Set a metadata entry - # @param predicate [String] Predicate URI - # @param values [Array, String] Predicate value(s) - def []=(predicate,values) - predicate = predicate.to_s - values.is_a?(Array) ? @data[predicate] = [values].flatten : @data[predicate] = values - end - - def id - @data["_id"] - end - - def exists? - nr_items = $mongo[collection].find(:_id => @data["_id"]).count - nr_items > 0 ? true : false - end - - # Get object from webservice - # @param [String,optional] mime_type - def get - resource_not_found_error("#{@data[:type]} with ID #{@data["_id"]} not found.") unless exists? - @data = $mongo[collection].find(:_id => @data["_id"]).first - end - - def save - @data["_id"] = $mongo[collection].insert_one(@data).inserted_id - end - - # partial update - def update metadata - $mongo[collection].find(:_id => @data["_id"]).find_one_and_replace('$set' => metadata) - end - - # Save object at webservice (replace or create object) - def put - #@data.delete("_id") # to enable updates - $mongo[collection].find(:_id => @data["_id"]).find_one_and_replace(@data, :upsert => true) - end - - # Delete object at webservice - def delete - $mongo[collection].find(:_id => @data["_id"]).find_one_and_delete - end - - # @return [String] converts OpenTox object into html document (by first converting it to a string) - def to_html - @data.to_json.to_html - end - - def type - self.class.to_s.split('::').last - end - - def collection - type.downcase - end - - # short access for metadata keys title, description and type - [ :title , :description ].each do |method| - send :define_method, method do - self[method] - end - send :define_method, "#{method}=" do |value| - self[method] = value - end - end - - # define class methods within module - def self.included(base) - base.extend(ClassMethods) - end - - module ClassMethods - def subjectid - RestClientWrapper.subjectid - end - def subjectid=(subjectid) - RestClientWrapper.subjectid = subjectid - end - end - # create default OpenTox classes with class methods - # (defined in opentox-client.rb) + # create default OpenTox classes (defined in opentox-client.rb) + # provides Mongoid's query and persistence methods + # http://mongoid.org/en/mongoid/docs/persistence.html + # http://mongoid.org/en/mongoid/docs/querying.html CLASSES.each do |klass| c = Class.new do include OpenTox + include Mongoid::Document + include Mongoid::Timestamps + store_in collection: klass.downcase.pluralize - def self.all - $mongo[collection].find.collect do |data| - f = self.new - f.data = data - f - end - end + field :title, type: String + field :description, type: String - def self.find_id id - self.find(:_id => id) + # TODO check if needed + def self.subjectid + RestClientWrapper.subjectid end - - #@example fetching a model - # OpenTox::Model.find() -> model-object - def self.find metadata - f = self.new - items = $mongo[collection].find metadata - items.count > 0 ? f.data = items.first : f = nil - f + def self.subjectid=(subjectid) + RestClientWrapper.subjectid = subjectid end + end + OpenTox.const_set klass,c + end - def self.create metadata - object = self.new - object.data = metadata - object.save - object.get - object - end + def type + self.class.to_s.split('::').last + end - def self.find_or_create metadata - search = metadata - search.delete("_id") - ids = $mongo[collection].find(search).distinct(:_id) - ids.empty? ? self.create(metadata) : self.find_id(ids.first) - end + # Serialisation - private - def self.collection - self.to_s.split('::').last.downcase - end - end - OpenTox.const_set klass,c + # @return [String] converts OpenTox object into html document (by first converting it to a string) + def to_html + self.to_json.to_html end end diff --git a/lib/overwrite.rb b/lib/overwrite.rb index 692e239..4dafe8d 100644 --- a/lib/overwrite.rb +++ b/lib/overwrite.rb @@ -134,6 +134,7 @@ end module Kernel +=begin # overwrite backtick operator to catch system errors # Override raises an error if _cmd_ returns a non-zero exit status. CH: I do not understand this comment # Returns stdout if _cmd_ succeeds. Note that these are simply concatenated; STDERR is not inline. CH: I do not understand this comment @@ -148,6 +149,7 @@ module Kernel rescue internal_server_error $!.message end +=end # @return [String] uri of task result, if task fails, an error according to task is raised def wait_for_task uri diff --git a/lib/task.rb b/lib/task.rb index f7e4c6f..55d024d 100644 --- a/lib/task.rb +++ b/lib/task.rb @@ -1,17 +1,30 @@ DEFAULT_TASK_MAX_DURATION = 36000 module OpenTox + # TODO: fix error reports + # TODO: fix field names and overwrite accessors # Class for handling asynchronous tasks class Task + field :creator, type: String + field :percentageCompleted, type: Float + field :error_code, type: Integer # workaround name, cannot overwrite accessors in current mongoid version + field :finished, type: Time # workaround name, cannot overwrite accessors in current mongoid version + # TODO + field :result_object, type: String + field :report, type: String + field :pid, type: Integer + field :observer_pid, type: Integer + def self.run(description, creator=nil) task = Task.new task[:description] = description.to_s task[:creator] = creator.to_s task[:percentageCompleted] = 0 - task[:code] = 202 + task[:error_code] = 202 task.save + pid = fork do begin task.completed yield @@ -19,8 +32,7 @@ module OpenTox # wrap non-opentox-errors first e = OpenTox::Error.new(500,e.message,nil,e.backtrace) unless e.is_a?(OpenTox::Error) $logger.error "error in task #{task.id} created by #{creator}" # creator is not logged because error is logged when thrown - task.update(:errorReport => e.metadata, :code => e.http_code, :finished_at => Time.now) - task.get + task.update(:report => e.metadata, :error_code => e.http_code, :finished => Time.now) task.kill end end @@ -50,13 +62,11 @@ module OpenTox def cancel kill - update(:code => 503, :finished_at => Time.now) - get + update_attributes(:error_code => 503, :finished => Time.now) end def completed(result) - update(:code => 200, :finished_at => Time.now, :percentageCompleted => 100, :result => result) - get + update_attributes(:error_code => 200, :finished => Time.now, :percentageCompleted => 100, :result_object => result) end # waits for a task, unless time exceeds or state is no longer running @@ -73,6 +83,22 @@ module OpenTox end + def error_report + OpenTox::Task.find(id).report + end + + def code + OpenTox::Task.find(id).error_code + end + + def result + OpenTox::Task.find(id).result_object + end + + def finished_at + OpenTox::Task.find(id).finished + end + def running? code == 202 end @@ -104,14 +130,4 @@ module OpenTox end end - [:code, :description, :creator, :finished_at, :percentageCompleted, :result, :errorReport].each do |method| - define_method method do - $mongo[:task].find(:_id => self.id).distinct(method).first - end - end - - def error_report - self.errorReport - end - end -- cgit v1.2.3