summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--lib/compound.rb12
-rw-r--r--lib/dataset.rb42
-rw-r--r--lib/opentox.rb4
4 files changed, 51 insertions, 9 deletions
diff --git a/.gitignore b/.gitignore
index c9daf01..8f883a0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,5 @@ Gemfile.lock
.bundle
pkg/*
*~
+.yardoc/
+doc/
diff --git a/lib/compound.rb b/lib/compound.rb
index d33c48e..89bf840 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -91,14 +91,18 @@ module OpenTox
RestClientWrapper.get("#{CACTUS_URI}#{inchi}/names").split("\n")
end
+ # @return [String] PubChem Compound Identifier (CID), derieved via restcall to pubchem
def cid
pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/"
@cid ||= RestClientWrapper.post(File.join(pug_uri, "compound", "inchi", "cids", "TXT"),{:inchi => inchi}).strip
end
+ # @todo
def chebi
+ raise_internal_error "not yet implemented"
end
+ # @return [String] ChEMBL database compound id, derieved via restcall to chembl
def chemblid
# https://www.ebi.ac.uk/chembldb/ws#individualCompoundByInChiKey
uri = "http://www.ebi.ac.uk/chemblws/compounds/smiles/#{smiles}.json"
@@ -190,11 +194,11 @@ module OpenTox
# Keys: matching smarts, values: number of non-unique hits, or 1
# @param [Array] smarts_array Array with Smarts strings
# @param use_hits [Boolean] Whether non-unique hits or 1 should be produced
- # @return [Array] Array with matching Smarts strings
- # @example {
+ # @return [Hash] Hash with matching Smarts as keys, nr-of-hits/1 as values
+ # @example
# compound = Compound.from_name("Benzene")
- # compound.match(['cc','cN']) # returns { 'cc' => 12, 'cN' => 0 }
- # }
+ # compound.match(['cc','cN'],true) # returns { 'cc' => 12 }, 'cN' is not included because it does not match
+ # compound.match(['cc','cN'],false) # returns { 'cc' => 1 }
def match_hits(smarts_array, use_hits=true)
obconversion = OpenBabel::OBConversion.new
obmol = OpenBabel::OBMol.new
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 19d05c9..9efae20 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -15,7 +15,8 @@ module OpenTox
end
# Get data (lazy loading from dataset service)
-
+ # overrides {OpenTox#metadata} to only load the metadata instead of the whole dataset
+ # @return [Hash] the metadata
def metadata force_update=false
if @metadata.empty? or force_update
uri = File.join(@uri,"metadata")
@@ -29,6 +30,7 @@ module OpenTox
@metadata
end
+ # @return [Array] feature objects (NOT uris)
def features force_update=false
if @features.empty? or force_update
uri = File.join(@uri,"features")
@@ -38,6 +40,7 @@ module OpenTox
@features
end
+ # @return [Array] compound objects (NOT uris)
def compounds force_update=false
if @compounds.empty? or force_update
uri = File.join(@uri,"compounds")
@@ -47,6 +50,8 @@ module OpenTox
@compounds
end
+ # @return [Array] with two dimensions,
+ # first index: compounds, second index: features, values: compound feature values
def data_entries force_update=false
if @data_entries.empty? or force_update
sparql = "SELECT ?cidx ?fidx ?value FROM <#{uri}> WHERE {
@@ -105,6 +110,9 @@ module OpenTox
compounds.select{|f| f.uri == uri}.first
end
+ # for prediction result datasets
+ # assumes that there are features with title prediction and confidence
+ # @return [Array] of Hashes with keys { :compound, :value ,:confidence } (compound value is object not uri)
def predictions
predictions = []
prediction_feature = nil
@@ -128,15 +136,23 @@ module OpenTox
predictions
end
- # Adding data (@features and @compounds are also writable)
+ # Adding data methods
+ # (Alternatively, you can directly change @features and @compounds)
+ # Create a dataset from file (csv,sdf,...)
+ # @param filename [String]
+ # @return [String] dataset uri
def upload filename, wait=true
uri = RestClientWrapper.put(@uri, {:file => File.new(filename)}, {:subjectid => @subjectid})
wait_for_task uri if URI.task?(uri) and wait
metadata true
@uri
end
-
+
+ # @param compound [OpenTox::Compound]
+ # @param feature [OpenTox::Feature]
+ # @param value [Object] (will be converted to String)
+ # @return [Array] data_entries
def add_data_entry compound, feature, value
@compounds << compound unless @compounds.collect{|c| c.uri}.include?(compound.uri)
row = @compounds.collect{|c| c.uri}.index(compound.uri)
@@ -152,8 +168,15 @@ module OpenTox
end
# TODO: remove? might be dangerous if feature ordering is incorrect
+ # MG: I would not remove this because add_data_entry is very slow (4 times searching in arrays)
+ # @param row [Array]
+ # @example
+ # d = Dataset.new
+ # d.features << Feature.new(a)
+ # d.features << Feature.new(b)
+ # d << [ Compound.new("c1ccccc1"), feature-value-a, feature-value-b ]
def << row
- compound = row.shift
+ compound = row.shift # removes the compound from the array
bad_request_error "Dataset features are empty." unless @features
bad_request_error "Row size '#{row.size}' does not match features size '#{@features.size}'." unless row.size == @features.size
bad_request_error "First column is not a OpenTox::Compound" unless compound.class == OpenTox::Compound
@@ -163,6 +186,8 @@ module OpenTox
# Serialisation
+ # converts dataset to csv format including compound smiles as first column, other column headers are feature titles
+ # @return [String]
def to_csv
CSV.generate do |csv|
csv << ["SMILES"] + features.collect{|f| f.title}
@@ -277,6 +302,11 @@ module OpenTox
# Methods for for validation service
+ # create a new dataset with the specified compounds and features
+ # @param compound_indices [Array] compound indices (integers)
+ # @param feats [Array] features objects
+ # @param metadata [Hash]
+ # @return [OpenTox::Dataset]
def split( compound_indices, feats, metadata, subjectid=nil)
bad_request_error "Dataset.split : Please give compounds as indices" if compound_indices.size==0 or !compound_indices[0].is_a?(Fixnum)
@@ -328,6 +358,9 @@ module OpenTox
@index_map[dataset.uri][compound_index]
end
+ # returns the inidices of the compound in the dataset
+ # @param compound [OpenTox::Compound]
+ # @return [Array] compound index (position) of the compound in the dataset, array-size is 1 unless multiple occurences
def compound_indices( compound )
unless defined?(@cmp_indices) and @cmp_indices.has_key?(compound)
@cmp_indices = {}
@@ -343,6 +376,7 @@ module OpenTox
@cmp_indices[compound]
end
+ # returns compound feature value using the compound-index and the feature_uri
def data_entry_value(compound_index, feature_uri)
data_entries(true) if @data_entries.empty?
col = @features.collect{|f| f.uri}.index feature_uri
diff --git a/lib/opentox.rb b/lib/opentox.rb
index 0e44426..95dca04 100644
--- a/lib/opentox.rb
+++ b/lib/opentox.rb
@@ -167,15 +167,17 @@ module OpenTox
end
end
+ # @return [String] converts object to turtle-string
def to_turtle # redefined to use prefixes (not supported by RDF::Writer)
prefixes = {:rdf => "http://www.w3.org/1999/02/22-rdf-syntax-ns#"}
['OT', 'DC', 'XSD', 'OLO'].each{|p| prefixes[p.downcase.to_sym] = eval("RDF::#{p}.to_s") }
create_rdf
- RDF::Writer.for(:turtle).buffer(:prefixes => prefixes) do |writer|
+ RDF::Turtle::Writer.for(:turtle).buffer(:prefixes => prefixes) do |writer|
writer << @rdf
end
end
+ # @return [String] converts OpenTox object into html document (by first converting it to a string)
def to_html
to_turtle.to_html
end