summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2015-07-18 08:52:39 +0200
committerChristoph Helma <helma@in-silico.ch>2015-07-18 08:52:39 +0200
commit1c74a684b8b2992d7e6fce5f1a4677d397ec4dd6 (patch)
tree7b23f87020a8b5cf1a56f2108e7563d90912dcc5
parenta8e52efbfd977c7ecfc98df10960686a05ed8afd (diff)
dataset tests pass
-rw-r--r--lib/compound.rb55
-rw-r--r--lib/dataset.rb334
-rw-r--r--lib/feature.rb15
-rw-r--r--lib/opentox-client.rb18
-rw-r--r--lib/opentox.rb179
-rw-r--r--lib/overwrite.rb2
-rw-r--r--lib/task.rb50
-rw-r--r--opentox-client.gemspec9
8 files changed, 254 insertions, 408 deletions
diff --git a/lib/compound.rb b/lib/compound.rb
index 82ea94e..b588c75 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -1,17 +1,15 @@
CACTUS_URI="http://cactus.nci.nih.gov/chemical/structure/"
+require 'openbabel'
module OpenTox
# Ruby wrapper for OpenTox Compound Webservices (http://opentox.org/dev/apis/api-1.2/structure).
class Compound
- def initialize uri
- @data = {}
- @data["uri"] = uri
- end
+ attr_reader :inchi
- def ==(c)
- @data["uri"] == c.uri
+ def initialize inchi
+ @inchi = inchi
end
# Create a compound from smiles string
@@ -20,21 +18,21 @@ module OpenTox
# @param [String] smiles Smiles string
# @return [OpenTox::Compound] Compound
def self.from_smiles smiles
- Compound.new RestClientWrapper.post(service_uri, smiles, {:content_type => 'chemical/x-daylight-smiles'})
+ OpenTox::Compound.new obconversion(smiles,"smi","inchi")
end
# Create a compound from inchi string
# @param inchi [String] smiles InChI string
# @return [OpenTox::Compound] Compound
def self.from_inchi inchi
- Compound.new RestClientWrapper.post(service_uri, inchi, {:content_type => 'chemical/x-inchi'})
+ OpenTox::Compound.new inchi
end
# Create a compound from sdf string
# @param sdf [String] smiles SDF string
# @return [OpenTox::Compound] Compound
def self.from_sdf sdf
- Compound.new RestClientWrapper.post(service_uri, sdf, {:content_type => 'chemical/x-mdl-sdfile'})
+ OpenTox::Compound.new obconversion(sdf,"sdf","inchi")
end
# Create a compound from name. Relies on an external service for name lookups.
@@ -43,32 +41,25 @@ module OpenTox
# @param name [String] can be also an InChI/InChiKey, CAS number, etc
# @return [OpenTox::Compound] Compound
def self.from_name name
- @inchi = RestClientWrapper.get File.join(CACTUS_URI,URI.escape(name),"stdinchi")
- Compound.new RestClientWrapper.post(service_uri, @inchi, {:content_type => 'chemical/x-inchi'})
- end
-
- # Get InChI
- # @return [String] InChI string
- def inchi
- @inchi ||= RestClientWrapper.get(@data["uri"],{},{:accept => 'chemical/x-inchi'}).chomp
+ OpenTox::Compound.new RestClientWrapper.get File.join(CACTUS_URI,URI.escape(name),"stdinchi")
end
# Get InChIKey
# @return [String] InChI string
def inchikey
- @inchikey ||= RestClientWrapper.get(@data["uri"],{},{:accept => 'chemical/x-inchikey'}).chomp
+ obconversion(@inchi,"inchi","inchikey")
end
# Get (canonical) smiles
# @return [String] Smiles string
def smiles
- @smiles ||= RestClientWrapper.get(@data["uri"],{},{:accept => 'chemical/x-daylight-smiles'}).chomp
+ obconversion(@inchi,"inchi","smi") # "can" gives nonn-canonical smiles??
end
# Get sdf
# @return [String] SDF string
def sdf
- RestClientWrapper.get(@data["uri"],{},{:accept => 'chemical/x-mdl-sdfile'}).chomp
+ obconversion(@inchi,"inchi","sdf")
end
# Get gif image
@@ -82,14 +73,16 @@ module OpenTox
# image = compound.png
# @return [image/png] Image data
def png
- RestClientWrapper.get(File.join @data["uri"], "image")
+ obconversion(@inchi,"inchi","_png2")
end
+=begin
# Get URI of compound image
# @return [String] Compound image URI
def image_uri
File.join @data["uri"], "image"
end
+=end
# Get all known compound names. Relies on an external service for name lookups.
# @example
@@ -116,5 +109,25 @@ module OpenTox
uri = "http://www.ebi.ac.uk/chemblws/compounds/smiles/#{smiles}.json"
@chemblid = JSON.parse(RestClientWrapper.get(uri))["compounds"].first["chemblId"]
end
+
+ private
+
+ def self.obconversion(identifier,input_format,output_format,option=nil)
+ obconversion = OpenBabel::OBConversion.new
+ obconversion.set_options(option, OpenBabel::OBConversion::OUTOPTIONS) if option
+ obmol = OpenBabel::OBMol.new
+ obconversion.set_in_and_out_formats input_format, output_format
+ obconversion.read_string obmol, identifier
+ case output_format
+ when /smi|can|inchi/
+ obconversion.write_string(obmol).gsub(/\s/,'').chomp
+ else
+ obconversion.write_string(obmol)
+ end
+ end
+
+ def obconversion(identifier,input_format,output_format,option=nil)
+ self.class.obconversion(identifier,input_format,output_format,option=nil)
+ end
end
end
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 1c938da..bcbacb2 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -4,95 +4,48 @@ module OpenTox
# Ruby wrapper for OpenTox Dataset Webservices (http://opentox.org/dev/apis/api-1.2/dataset).
class Dataset
-
- def initialize uri=nil
- super uri
- @data["features"] ||= []
- @data["compounds"] ||= []
- @data["data_entries"] ||= []
+ include Mongoid::Document
+
+ field :feature_ids, type: Array
+ field :inchis, type: Array
+ field :data_entries, type: Array
+ field :warnings, type: Array
+ field :source, type: String
+
+ def initialize
+ super
+ self.feature_ids = []
+ self.inchis = []
+ self.data_entries = []
+ self.warnings = []
end
- def data_entries
- @data["data_entries"]
+ # Readers
+
+ def compounds
+ inchis.collect{|i| OpenTox::Compound.new i}
end
- [:features, :compounds, :data_entries].each do |method|
- send :define_method, "#{method}=" do |value|
- @data[method.to_s] = value.collect{|v| v.uri}
- end
- send :define_method, "#{method}<<" do |value|
- @data[method.to_s] << value.uri
- end
+ def features
+ self.feature_ids.collect{|id| OpenTox::Feature.find(id)}
end
- # Get data (lazy loading from dataset service)
- # overrides {OpenTox#metadata} to only load the metadata instead of the whole dataset
- # @return [Hash] the metadata
- def metadata force_update=false
- if @data.empty? or force_update
- uri = File.join(@data["uri"],"metadata")
- #begin
- @data = JSON.parse RestClientWrapper.get(uri,{},{:accept => "application/json"})
- #parse_ntriples RestClientWrapper.get(uri,{},{:accept => "text/plain"})
- #rescue # fall back to rdfxml
- #parse_rdfxml RestClientWrapper.get(uri,{},{:accept => "application/rdf+xml"})
- #end
- #@data = @rdf.to_hash[RDF::URI.new(@data["uri"])].inject({}) { |h, (predicate, values)| h[predicate] = values.collect{|v| v.to_s}; h }
- end
- @data
+ # Writers
+
+ def compounds=(compounds)
+ self.inchis = compounds.collect{|c| c.inchi}
end
- # @return [Array] feature objects (NOT uris)
- def features force_update=false
- if @data["features"].empty? or force_update
- uri = File.join(@data["uri"],"features")
- begin
- uris = JSON.parse RestClientWrapper.get(uri,{},{:accept => "application/json"}) # ordered datasets return ordered features
- rescue
- uris = []
- end
- @data["features"] = uris#.collect{|uri| Feature.new(uri)}
- end
- @data["features"].collect{|uri| Feature.new uri}
+ def add_compound(compound)
+ self.inchis << compound.id
end
- # @return [Array] compound objects (NOT uris)
- def compounds force_update=false
- if @data["compounds"].empty? or force_update
- uri = File.join(@data["uri"],"compounds")
- begin
- uris = JSON.parse RestClientWrapper.get(uri,{},{:accept => "application/json"}) # ordered datasets return ordered compounds
- rescue
- uris = []
- end
- @data["compounds"] = uris
- end
- @data["compounds"].collect{|uri| Compound.new(uri)}
+ def features=(features)
+ self.feature_ids = features.collect{|f| f.id}
end
- # @return [Array] with two dimensions,
- # first index: compounds, second index: features, values: compound feature values
- def data_entries force_update=false
- if @data["data_entries"].empty? or force_update
- sparql = "SELECT ?cidx ?fidx ?value FROM <#{uri}> WHERE {
- ?data_entry <#{RDF::OLO.index}> ?cidx ;
- <#{RDF::OT.values}> ?v .
- ?v <#{RDF::OT.feature}> ?f;
- <#{RDF::OT.value}> ?value .
- ?f <#{RDF::OLO.index}> ?fidx.
- } ORDER BY ?fidx ?cidx"
- RestClientWrapper.get(service_uri,{:query => sparql},{:accept => "text/uri-list"}).split("\n").each do |row|
- r,c,v = row.split("\t")
- @data["data_entries"][r.to_i] ||= []
- # adjust value class depending on feature type, StringFeature takes precedence over NumericFeature
- if features[c.to_i][RDF.type].include? RDF::OT.NumericFeature and ! features[c.to_i][RDF.type].include? RDF::OT.StringFeature
- v = v.to_f if v
- end
- @data["data_entries"][r.to_i][c.to_i] = v if v
- end
- # TODO: fallbacks for external and unordered datasets
- end
- @data["data_entries"]
+ def add_feature(feature)
+ self.feature_ids << feature.id
end
# Find data entry values for a given compound and feature
@@ -100,8 +53,8 @@ module OpenTox
# @param feature [OpenTox::Feature] OpenTox Feature object
# @return [Array] Data entry values
def values(compound, feature)
- rows = (0 ... compounds.length).select { |r| compounds[r].uri == compound.uri }
- col = features.collect{|f| f.uri}.index feature.uri
+ rows = (0 ... inchis.length).select { |r| inchis[r].uri == compound.uri }
+ col = feature_ids.collect{|f| f.uri}.index feature.uri
rows.collect{|row| data_entries[row][col]}
end
@@ -122,7 +75,7 @@ module OpenTox
end
# for prediction result datasets
- # assumes that there are features with title prediction and confidence
+ # assumes that there are feature_ids with title prediction and confidence
# @return [Array] of Hashes with keys { :compound, :value ,:confidence } (compound value is object not uri)
def predictions
predictions = []
@@ -149,18 +102,17 @@ module OpenTox
end
# Adding data methods
- # (Alternatively, you can directly change @data["features"] and @data["compounds"])
+ # (Alternatively, you can directly change @data["feature_ids"] and @data["compounds"])
# Create a dataset from file (csv,sdf,...)
# @param filename [String]
# @return [String] dataset uri
def upload filename, wait=true
- uri = RestClientWrapper.put(@data["uri"], {:file => File.new(filename)})
- wait_for_task uri if URI.task?(uri) and wait
- compounds true
- features true
- metadata true
- @data["uri"]
+ self.title = File.basename(filename)
+ self.source = filename
+ table = CSV.read filename, :skip_blanks => true
+ from_table table
+ save
end
# @param compound [OpenTox::Compound]
@@ -182,6 +134,7 @@ module OpenTox
end
end
+
# TODO: remove? might be dangerous if feature ordering is incorrect
# MG: I would not remove this because add_data_entry is very slow (4 times searching in arrays)
# CH: do you have measurements? compound and feature arrays are not that big, I suspect that feature search/creation is the time critical step
@@ -193,11 +146,11 @@ module OpenTox
# d << [ Compound.new("c1ccccc1"), feature-value-a, feature-value-b ]
def << row
compound = row.shift # removes the compound from the array
- bad_request_error "Dataset features are empty." unless @data["features"]
- bad_request_error "Row size '#{row.size}' does not match features size '#{@data["features"].size}'." unless row.size == @data["features"].size
+ bad_request_error "Dataset features are empty." unless feature_ids
+ bad_request_error "Row size '#{row.size}' does not match features size '#{feature_ids.size}'." unless row.size == feature_ids.size
bad_request_error "First column is not a OpenTox::Compound" unless compound.class == OpenTox::Compound
- @data["compounds"] << compound.uri
- @data["data_entries"] << row
+ self.inchis << compound.inchi
+ self.data_entries << row
end
# Serialisation
@@ -213,107 +166,6 @@ module OpenTox
end
end
-=begin
- RDF_FORMATS.each do |format|
-
- # redefine rdf parse methods for all formats e.g. parse_rdfxml
- send :define_method, "parse_#{format}".to_sym do |rdf|
- # TODO: parse ordered dataset
- # TODO: parse data entries
- # TODO: parse metadata
- @rdf = RDF::Graph.new
- RDF::Reader.for(format).new(rdf) do |reader|
- reader.each_statement{ |statement| @rdf << statement }
- end
- query = RDF::Query.new({ :uri => { RDF.type => RDF::OT.Compound } })
- @data["compounds"] = query.execute(@rdf).collect { |solution| OpenTox::Compound.new solution.uri }
- query = RDF::Query.new({ :uri => { RDF.type => RDF::OT.Feature } })
- @data["features"] = query.execute(@rdf).collect { |solution| OpenTox::Feature.new solution.uri }
- @data["compounds"].each_with_index do |c,i|
- @data["features"].each_with_index do |f,j|
- end
- end
- end
-
-
- # redefine rdf serialization methods
- send :define_method, "to_#{format}".to_sym do
- @data[RDF.type] = [RDF::OT.Dataset, RDF::OT.OrderedDataset]
- create_rdf
- @data["features"].each_with_index do |feature,i|
- @rdf << [RDF::URI.new(feature.uri), RDF::URI.new(RDF.type), RDF::URI.new(RDF::OT.Feature)]
- @rdf << [RDF::URI.new(feature.uri), RDF::URI.new(RDF::OLO.index), RDF::Literal.new(i)]
- end
- @data["compounds"].each_with_index do |compound,i|
- @rdf << [RDF::URI.new(compound.uri), RDF::URI.new(RDF.type), RDF::URI.new(RDF::OT.Compound)]
- if defined? @neighbors and neighbors.include? compound
- @rdf << [RDF::URI.new(compound.uri), RDF::URI.new(RDF.type), RDF::URI.new(RDF::OT.Neighbor)]
- end
-
- @rdf << [RDF::URI.new(compound.uri), RDF::URI.new(RDF::OLO.index), RDF::Literal.new(i)]
- data_entry_node = RDF::Node.new
- @rdf << [RDF::URI.new(@data["uri"]), RDF::URI.new(RDF::OT.dataEntry), data_entry_node]
- @rdf << [data_entry_node, RDF::URI.new(RDF.type), RDF::URI.new(RDF::OT.DataEntry)]
- @rdf << [data_entry_node, RDF::URI.new(RDF::OLO.index), RDF::Literal.new(i)]
- @rdf << [data_entry_node, RDF::URI.new(RDF::OT.compound), RDF::URI.new(compound.uri)]
- @data["data_entries"][i].each_with_index do |value,j|
- value_node = RDF::Node.new
- @rdf << [data_entry_node, RDF::URI.new(RDF::OT.values), value_node]
- @rdf << [value_node, RDF::URI.new(RDF::OT.feature), RDF::URI.new(@data["features"][j].uri)]
- @rdf << [value_node, RDF::URI.new(RDF::OT.value), RDF::Literal.new(value)]
- end
- end
- RDF::Writer.for(format).buffer do |writer|
- writer << @rdf
- end
- end
-
- end
-=end
-
-# TODO: fix bug that affects data_entry positions # DG: who wrotes this comment ?
- def to_ntriples # redefined string version for better performance
- ntriples = ""
- @data[RDF.type] = [ RDF::OT.Dataset, RDF::OT.OrderedDataset ]
- @data.each do |predicate,values|
- [values].flatten.each do |value|
- URI.valid?(value) ? value = "<#{value}>" : value = "\"#{value}\""
- ntriples << "<#{@data["uri"]}> <#{predicate}> #{value} .\n" #\n"
- end
- end
- @parameters.each_with_index do |parameter,i|
- p_node = "_:parameter"+ i.to_s
- ntriples << "<#{@data["uri"]}> <#{RDF::OT.parameters}> #{p_node} .\n"
- ntriples << "#{p_node} <#{RDF.type}> <#{RDF::OT.Parameter}> .\n"
- parameter.each { |k,v| ntriples << "#{p_node} <#{k}> \"#{v.to_s.tr('"', '\'')}\" .\n" }
- end
- @data["features"].each_with_index do |feature,i|
- ntriples << "<#{feature.uri}> <#{RDF.type}> <#{RDF::OT.Feature}> .\n"
- ntriples << "<#{feature.uri}> <#{RDF::OLO.index}> \"#{i}\"^^<http://www.w3.org/2001/XMLSchema#integer> .\n" # sorting at dataset service does not work without type information
- end
- @data["compounds"].each_with_index do |compound,i|
- ntriples << "<#{compound.uri}> <#{RDF.type}> <#{RDF::OT.Compound}> .\n"
- if defined? @neighbors and neighbors.include? compound
- ntriples << "<#{compound.uri}> <#{RDF.type}> <#{RDF::OT.Neighbor}> .\n"
- end
-
- ntriples << "<#{compound.uri}> <#{RDF::OLO.index}> \"#{i}\"^^<http://www.w3.org/2001/XMLSchema#integer> .\n" # sorting at dataset service does not work without type information
- data_entry_node = "_:dataentry"+ i.to_s
- ntriples << "<#{@data["uri"]}> <#{RDF::OT.dataEntry}> #{data_entry_node} .\n"
- ntriples << "#{data_entry_node} <#{RDF.type}> <#{RDF::OT.DataEntry}> .\n"
- ntriples << "#{data_entry_node} <#{RDF::OLO.index}> \"#{i}\"^^<http://www.w3.org/2001/XMLSchema#integer> .\n" # sorting at dataset service does not work without type information
- ntriples << "#{data_entry_node} <#{RDF::OT.compound}> <#{compound.uri}> .\n"
- @data["data_entries"][i].each_with_index do |value,j|
- value_node = data_entry_node+ "_value"+ j.to_s
- ntriples << "#{data_entry_node} <#{RDF::OT.values}> #{value_node} .\n"
- ntriples << "#{value_node} <#{RDF::OT.feature}> <#{@data["features"][j].uri}> .\n"
- ntriples << "#{value_node} <#{RDF::OT.value}> \"#{value}\" .\n"
- end unless @data["data_entries"][i].nil?
- end
- ntriples
-
- end
-
# Methods for for validation service
# create a new dataset with the specified compounds and features
@@ -350,32 +202,32 @@ module OpenTox
# @param dataset [OpenTox::Dataset] dataset that should be mapped to this dataset (fully loaded)
# @param compound_index [Fixnum], corresponding to dataset
def compound_index( dataset, compound_index )
- compound_uri = dataset.compounds[compound_index]#.uri
- self_indices = compound_indices(compound_uri)
+ compound_inchi = dataset.compounds[compound_index].inchi
+ self_indices = compound_indices(compound_inchi)
if self_indices==nil
nil
else
- dataset_indices = dataset.compound_indices(compound_uri)
+ dataset_indices = dataset.compound_indices(compound_inchi)
if self_indices.size==1
self_indices.first
elsif self_indices.size==dataset_indices.size
# we do assume that the order is preseverd (i.e., the nth occurences in both datasets are mapped to each other)!
self_indices[dataset_indices.index(compound_index)]
else
- raise "cannot map compound #{compound_uri} from dataset #{dataset.uri} to dataset #{uri}, "+
+ raise "cannot map compound #{compound_inchi} from dataset #{dataset.id} to dataset #{self.id}, "+
"compound occurs #{dataset_indices.size} times and #{self_indices.size} times"
end
end
end
# returns the inidices of the compound in the dataset
- # @param compound_uri [String]
+ # @param compound_inchi [String]
# @return [Array] compound index (position) of the compound in the dataset, array-size is 1 unless multiple occurences
- def compound_indices( compound_uri )
- unless defined?(@cmp_indices) and @cmp_indices.has_key?(compound_uri)
+ def compound_indices( compound_inchi )
+ unless defined?(@cmp_indices) and @cmp_indices.has_key?(compound_inchi)
@cmp_indices = {}
compounds().size.times do |i|
- c = @data["compounds"][i]#.uri
+ c = self.compounds[i].inchi
if @cmp_indices[c]==nil
@cmp_indices[c] = [i]
else
@@ -383,7 +235,7 @@ module OpenTox
end
end
end
- @cmp_indices[compound_uri]
+ @cmp_indices[compound_inchi]
end
# returns compound feature value using the compound-index and the feature_uri
@@ -392,6 +244,84 @@ module OpenTox
col = @data["features"].collect{|f| f.uri}.index feature_uri
@data["data_entries"][compound_index] ? @data["data_entries"][compound_index][col] : nil
end
- end
+ def from_table table
+
+ # features
+ feature_names = table.shift.collect{|f| f.strip}
+ self.warnings << "Duplicate features in table header." unless feature_names.size == feature_names.uniq.size
+ compound_format = feature_names.shift.strip
+ bad_request_error "#{compound_format} is not a supported compound format. Accepted formats: SMILES, InChI." unless compound_format =~ /SMILES|InChI/i
+ ignored_feature_indices = []
+ numeric = []
+ feature_names.each_with_index do |f,i|
+ values = table.collect{|row| val=row[i+1].to_s.strip; val.blank? ? nil : val }.uniq.compact
+ types = values.collect{|v| v.numeric? ? true : false}.uniq
+ metadata = {"title" => f}
+ if values.size == 0 # empty feature
+ elsif values.size > 5 and types.size == 1 and types.first == true # 5 max classes
+ metadata["numeric"] = true
+ numeric[i] = true
+ else
+ metadata["nominal"] = true
+ metadata["string"] = true
+ metadata["accept_values"] = values
+ numeric[i] = false
+ end
+ feature = OpenTox::Feature.find_or_create_by metadata
+ self.feature_ids << feature.id unless feature.nil?
+ end
+
+ # compounds and values
+ r = -1
+ table.each_with_index do |values,j|
+ compound = values.shift
+ begin
+ case compound_format
+ when /SMILES/i
+ c = OpenTox::Compound.from_smiles(compound)
+ if c.inchi.empty?
+ self.warnings << "Cannot parse #{compound_format} compound '#{compound.strip}' at position #{j+2}, all entries are ignored."
+ next
+ else
+ inchi = c.inchi
+ end
+ when /InChI/i
+ # TODO validate inchi
+ inchi = compound
+ else
+ raise "wrong compound format" #should be checked above
+ end
+ rescue
+ self.warnings << "Cannot parse #{compound_format} compound '#{compound}' at position #{j+2}, all entries are ignored."
+ next
+ end
+
+ r += 1
+ self.inchis << inchi
+ unless values.size == self.feature_ids.size
+ self.warnings << "Number of values at position #{j+2} (#{values.size}) is different than header size (#{self.feature_ids.size}), all entries are ignored."
+ next
+ end
+
+ self.data_entries << []
+ values.each_with_index do |v,i|
+ if v.blank?
+ self.data_entries.last << nil
+ self.warnings << "Empty value for compound '#{compound}' (row #{r+2}) and feature '#{feature_names[i]}' (column #{i+2})."
+ next
+ elsif numeric[i]
+ self.data_entries.last << v.to_f
+ else
+ self.data_entries.last << v.strip
+ end
+ end
+ end
+ self.inchis.duplicates.each do |inchi|
+ positions = []
+ self.inchis.each_with_index{|c,i| positions << i+1 if !c.blank? and c == inchi}
+ self.warnings << "Duplicate compound #{inchi} at rows #{positions.join(', ')}. Entries are accepted, assuming that measurements come from independent experiments."
+ end
+ end
+ end
end
diff --git a/lib/feature.rb b/lib/feature.rb
index 5d3d962..43cf7e9 100644
--- a/lib/feature.rb
+++ b/lib/feature.rb
@@ -2,13 +2,18 @@ module OpenTox
class Feature
+ field :string, type: Boolean, default: false
+ field :nominal, type: Boolean, default: false
+ field :numeric, type: Boolean, default: false
+ field :accept_values, type: Array
+
# Find out feature type
# Classification takes precedence
# @return [String] Feature type
def feature_type
- if self[RDF.type].include?(RDF::OT.NominalFeature)
+ if nominal
"classification"
- elsif self[RDF.type].include?(RDF::OT.NumericFeature)
+ elsif numeric
"regression"
else
"unknown"
@@ -18,9 +23,9 @@ module OpenTox
# Get accept values
#
# @return[Array] Accept values
- def accept_values
- self[RDF::OT.acceptValue] ? self[RDF::OT.acceptValue].sort : nil
- end
+ #def accept_values
+ #self[RDF::OT.acceptValue] ? self[RDF::OT.acceptValue].sort : nil
+ #end
# Create value map
# @param [OpenTox::Feature] Feature
diff --git a/lib/opentox-client.rb b/lib/opentox-client.rb
index 9f3373d..6358705 100644
--- a/lib/opentox-client.rb
+++ b/lib/opentox-client.rb
@@ -9,8 +9,7 @@ require 'yaml'
require 'json'
require 'logger'
require "securerandom"
-require 'mongo'
-require 'bson'
+require 'mongoid'
default_config = File.join(ENV["HOME"],".opentox","config","default.rb")
client_config = File.join(ENV["HOME"],".opentox","config","opentox-client.rb")
@@ -18,6 +17,9 @@ client_config = File.join(ENV["HOME"],".opentox","config","opentox-client.rb")
puts "Could not find configuration files #{default_config} or #{client_config}" unless File.exist? default_config or File.exist? client_config
require default_config if File.exist? default_config
require client_config if File.exist? client_config
+# TODO switch to production
+ENV["MONGOID_ENV"] = "development"
+Mongoid.load!("#{ENV['HOME']}/.opentox/config/mongoid.yml")
# define constants and global variables
#RDF::OT = RDF::Vocabulary.new 'http://www.opentox.org/api/1.2#'
@@ -28,7 +30,8 @@ require client_config if File.exist? client_config
#RDF::ISA = RDF::Vocabulary.new "http://onto.toxbank.net/isa/"
#RDF::OWL = RDF::Vocabulary.new "http://www.w3.org/2002/07/owl#"
-CLASSES = ["Compound", "Feature", "Dataset", "Validation", "Task", "Investigation"]
+#CLASSES = ["Compound", "Feature", "Dataset", "Validation", "Task", "Investigation"]
+CLASSES = ["Feature", "Dataset", "Validation", "Task", "Investigation"]
#RDF_FORMATS = [:rdfxml,:ntriples,:turtle]
# Regular expressions for parsing classification data
@@ -57,3 +60,12 @@ FALSE_REGEXP = /^(false|inactive|0|0.0|low tox|deactivating|non-carcinogen|non-m
# unauthorized_error "Failed to authenticate user \"#{$aa[:user]}\"." unless OpenTox::Authorization.is_token_valid(OpenTox::RestClientWrapper.subjectid)
#end
+# defaults to stderr, may be changed to file output (e.g in opentox-service)
+$logger = OTLogger.new(STDERR)
+$logger.level = Logger::DEBUG
+#Mongo::Logger.logger = $logger
+Mongo::Logger.level = Logger::WARN
+#$mongo = Mongo::Client.new($mongodb[:uri])
+Mongoid.logger.level = Logger::WARN
+Mongoid.logger = $logger
+#Moped.logger = $logger
diff --git a/lib/opentox.rb b/lib/opentox.rb
index 9be6078..96cefe0 100644
--- a/lib/opentox.rb
+++ b/lib/opentox.rb
@@ -1,172 +1,41 @@
-# defaults to stderr, may be changed to file output (e.g in opentox-service)
-$logger = OTLogger.new(STDERR)
-$logger.level = Logger::DEBUG
-Mongo::Logger.logger = $logger
-Mongo::Logger.logger.level = Logger::WARN
-$mongo = Mongo::Client.new($mongodb[:uri])
-
module OpenTox
# Ruby interface
- attr_accessor :data
-
- # Create a new OpenTox object
- # @param id [optional,String] ID
- # @return [OpenTox] OpenTox object
- def initialize
- @data = {}
- @data["type"] = type
- end
-
- def created_at
- @data["_id"].generation_time
- end
-
- # Object metadata (lazy loading)
- # @return [Hash] Object metadata
- def metadata
- get if exists?
- @data
- end
-
- # Metadata values
- # @param predicate [String] Predicate URI
- # @return [Array, String] Predicate value(s)
- def [](predicate)
- predicate = predicate.to_s
- return nil if @data[predicate].nil?
- @data[predicate].size == 1 ? @data[predicate].first : @data[predicate]
- end
-
- # Set a metadata entry
- # @param predicate [String] Predicate URI
- # @param values [Array, String] Predicate value(s)
- def []=(predicate,values)
- predicate = predicate.to_s
- values.is_a?(Array) ? @data[predicate] = [values].flatten : @data[predicate] = values
- end
-
- def id
- @data["_id"]
- end
-
- def exists?
- nr_items = $mongo[collection].find(:_id => @data["_id"]).count
- nr_items > 0 ? true : false
- end
-
- # Get object from webservice
- # @param [String,optional] mime_type
- def get
- resource_not_found_error("#{@data[:type]} with ID #{@data["_id"]} not found.") unless exists?
- @data = $mongo[collection].find(:_id => @data["_id"]).first
- end
-
- def save
- @data["_id"] = $mongo[collection].insert_one(@data).inserted_id
- end
-
- # partial update
- def update metadata
- $mongo[collection].find(:_id => @data["_id"]).find_one_and_replace('$set' => metadata)
- end
-
- # Save object at webservice (replace or create object)
- def put
- #@data.delete("_id") # to enable updates
- $mongo[collection].find(:_id => @data["_id"]).find_one_and_replace(@data, :upsert => true)
- end
-
- # Delete object at webservice
- def delete
- $mongo[collection].find(:_id => @data["_id"]).find_one_and_delete
- end
-
- # @return [String] converts OpenTox object into html document (by first converting it to a string)
- def to_html
- @data.to_json.to_html
- end
-
- def type
- self.class.to_s.split('::').last
- end
-
- def collection
- type.downcase
- end
-
- # short access for metadata keys title, description and type
- [ :title , :description ].each do |method|
- send :define_method, method do
- self[method]
- end
- send :define_method, "#{method}=" do |value|
- self[method] = value
- end
- end
-
- # define class methods within module
- def self.included(base)
- base.extend(ClassMethods)
- end
-
- module ClassMethods
- def subjectid
- RestClientWrapper.subjectid
- end
- def subjectid=(subjectid)
- RestClientWrapper.subjectid = subjectid
- end
- end
- # create default OpenTox classes with class methods
- # (defined in opentox-client.rb)
+ # create default OpenTox classes (defined in opentox-client.rb)
+ # provides Mongoid's query and persistence methods
+ # http://mongoid.org/en/mongoid/docs/persistence.html
+ # http://mongoid.org/en/mongoid/docs/querying.html
CLASSES.each do |klass|
c = Class.new do
include OpenTox
+ include Mongoid::Document
+ include Mongoid::Timestamps
+ store_in collection: klass.downcase.pluralize
- def self.all
- $mongo[collection].find.collect do |data|
- f = self.new
- f.data = data
- f
- end
- end
+ field :title, type: String
+ field :description, type: String
- def self.find_id id
- self.find(:_id => id)
+ # TODO check if needed
+ def self.subjectid
+ RestClientWrapper.subjectid
end
-
- #@example fetching a model
- # OpenTox::Model.find(<model-id>) -> model-object
- def self.find metadata
- f = self.new
- items = $mongo[collection].find metadata
- items.count > 0 ? f.data = items.first : f = nil
- f
+ def self.subjectid=(subjectid)
+ RestClientWrapper.subjectid = subjectid
end
+ end
+ OpenTox.const_set klass,c
+ end
- def self.create metadata
- object = self.new
- object.data = metadata
- object.save
- object.get
- object
- end
+ def type
+ self.class.to_s.split('::').last
+ end
- def self.find_or_create metadata
- search = metadata
- search.delete("_id")
- ids = $mongo[collection].find(search).distinct(:_id)
- ids.empty? ? self.create(metadata) : self.find_id(ids.first)
- end
+ # Serialisation
- private
- def self.collection
- self.to_s.split('::').last.downcase
- end
- end
- OpenTox.const_set klass,c
+ # @return [String] converts OpenTox object into html document (by first converting it to a string)
+ def to_html
+ self.to_json.to_html
end
end
diff --git a/lib/overwrite.rb b/lib/overwrite.rb
index 692e239..4dafe8d 100644
--- a/lib/overwrite.rb
+++ b/lib/overwrite.rb
@@ -134,6 +134,7 @@ end
module Kernel
+=begin
# overwrite backtick operator to catch system errors
# Override raises an error if _cmd_ returns a non-zero exit status. CH: I do not understand this comment
# Returns stdout if _cmd_ succeeds. Note that these are simply concatenated; STDERR is not inline. CH: I do not understand this comment
@@ -148,6 +149,7 @@ module Kernel
rescue
internal_server_error $!.message
end
+=end
# @return [String] uri of task result, if task fails, an error according to task is raised
def wait_for_task uri
diff --git a/lib/task.rb b/lib/task.rb
index f7e4c6f..55d024d 100644
--- a/lib/task.rb
+++ b/lib/task.rb
@@ -1,17 +1,30 @@
DEFAULT_TASK_MAX_DURATION = 36000
module OpenTox
+ # TODO: fix error reports
+ # TODO: fix field names and overwrite accessors
# Class for handling asynchronous tasks
class Task
+ field :creator, type: String
+ field :percentageCompleted, type: Float
+ field :error_code, type: Integer # workaround name, cannot overwrite accessors in current mongoid version
+ field :finished, type: Time # workaround name, cannot overwrite accessors in current mongoid version
+ # TODO
+ field :result_object, type: String
+ field :report, type: String
+ field :pid, type: Integer
+ field :observer_pid, type: Integer
+
def self.run(description, creator=nil)
task = Task.new
task[:description] = description.to_s
task[:creator] = creator.to_s
task[:percentageCompleted] = 0
- task[:code] = 202
+ task[:error_code] = 202
task.save
+
pid = fork do
begin
task.completed yield
@@ -19,8 +32,7 @@ module OpenTox
# wrap non-opentox-errors first
e = OpenTox::Error.new(500,e.message,nil,e.backtrace) unless e.is_a?(OpenTox::Error)
$logger.error "error in task #{task.id} created by #{creator}" # creator is not logged because error is logged when thrown
- task.update(:errorReport => e.metadata, :code => e.http_code, :finished_at => Time.now)
- task.get
+ task.update(:report => e.metadata, :error_code => e.http_code, :finished => Time.now)
task.kill
end
end
@@ -50,13 +62,11 @@ module OpenTox
def cancel
kill
- update(:code => 503, :finished_at => Time.now)
- get
+ update_attributes(:error_code => 503, :finished => Time.now)
end
def completed(result)
- update(:code => 200, :finished_at => Time.now, :percentageCompleted => 100, :result => result)
- get
+ update_attributes(:error_code => 200, :finished => Time.now, :percentageCompleted => 100, :result_object => result)
end
# waits for a task, unless time exceeds or state is no longer running
@@ -73,6 +83,22 @@ module OpenTox
end
+ def error_report
+ OpenTox::Task.find(id).report
+ end
+
+ def code
+ OpenTox::Task.find(id).error_code
+ end
+
+ def result
+ OpenTox::Task.find(id).result_object
+ end
+
+ def finished_at
+ OpenTox::Task.find(id).finished
+ end
+
def running?
code == 202
end
@@ -104,14 +130,4 @@ module OpenTox
end
end
- [:code, :description, :creator, :finished_at, :percentageCompleted, :result, :errorReport].each do |method|
- define_method method do
- $mongo[:task].find(:_id => self.id).distinct(method).first
- end
- end
-
- def error_report
- self.errorReport
- end
-
end
diff --git a/opentox-client.gemspec b/opentox-client.gemspec
index 91ee0ae..3bba11c 100644
--- a/opentox-client.gemspec
+++ b/opentox-client.gemspec
@@ -26,10 +26,9 @@ Gem::Specification.new do |s|
#s.add_runtime_dependency 'rdf-turtle'
s.add_runtime_dependency "open4"
s.add_runtime_dependency "openbabel"
- s.add_runtime_dependency "mongo"
- s.add_runtime_dependency "bson"
-
+ s.add_runtime_dependency "mongoid", '~> 5.0beta'
+
# external requirements
- ["libraptor-dev"].each{|r| s.requirements << r}
- s.post_install_message = "Please check the version of your libraptor library, if installation of rdf.rb fails"
+ #["libraptor-dev"].each{|r| s.requirements << r}
+ #s.post_install_message = "Please check the version of your libraptor library, if installation of rdf.rb fails"
end