summaryrefslogtreecommitdiff
path: root/lib/dataset.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2010-10-22 17:45:19 +0200
committerChristoph Helma <helma@in-silico.ch>2010-10-22 17:45:19 +0200
commitd6811507c1c1339cc4fe7cdb429b9b34b97dc422 (patch)
tree9cf87802db0b03e078077a52ecedd0994bbbd0e4 /lib/dataset.rb
parent3dd19c461d0b205ff504a85785f0c6e55114cd4e (diff)
new API with support for external services (initial version)
Diffstat (limited to 'lib/dataset.rb')
-rw-r--r--lib/dataset.rb273
1 files changed, 244 insertions, 29 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 2eb2206..7c8ce24 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -1,35 +1,253 @@
module OpenTox
+ # Ruby wrapper for OpenTox Dataset Webservices (http://opentox.org/dev/apis/api-1.2/dataset).
+ #
+ # Examples:
+ # require "opentox-ruby-api-wrapper"
+ #
+ # # Creating datasets
+ #
+ # # create an empty dataset
+ # dataset = OpenTox::Dataset.new
+ # # create an empty dataset with URI
+ # # this does not load data from the dataset service - use one of the load_* methods
+ # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1")
+ # # create new dataset and sav it to obtain a URI
+ # dataset = OpenTox::Dataset.create
+ # # create a new dataset from yaml representation
+ # dataset = OpenTox::Dataset.from_yaml
+ # # create a new dataset from CSV string
+ # csv_string = "SMILES, Toxicity\nc1ccccc1N, true"
+ # dataset = OpenTox::Dataset.from_csv(csv_string)
+ #
+ # # Loading data
+ # # Datasets created with OpenTox::Dataset.new(uri) are empty by default
+ # # Invoking one of the following functions will load data into the object
+ #
+ # # create an empty dataset with URI
+ # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1")
+ # # loads (and returns) only metadata
+ # dataset.load_metadata
+ # # loads (and returns) only compounds
+ # dataset.load_compounds
+ # # loads (and returns) only features
+ # dataset.load_features
+ # # load all data from URI
+ # dataset.load_all
+ #
+ # # Getting dataset representations
+ #
+ # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1")
+ # dataset.load_all
+ # # OWL-DL (RDF/XML)
+ # dataset.rdfxml
+ # # OWL-DL (Ntriples)
+ # dataset.ntriples
+ # # YAML
+ # dataset.yaml
+ # # CSV
+ # dataset.csv
+ #
+ # # Modifying datasets
+ #
+ # # insert a statement (compound_uri,feature_uri,value)
+ # dataset.add "http://webservices.in-silico.ch/compound/InChI=1S/C6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9", "http://webservices.in-silico.ch/dataset/1/feature/hamster_carcinogenicity", true
+ #
+ #
+ # # Saving datasets
+ # # save dataset at dataset service
+ # dataset.save
+ #
+ # # Deleting datasets
+ # # delete dataset (also at dataset service)
+ # dataset.delete
class Dataset
- attr_accessor :uri, :title, :creator, :data, :features, :compounds
+ include OtObject
- def initialize( owl=nil )
- @data = {}
- @features = []
+ attr_reader :features, :compounds, :data_entries, :metadata
+ attr_writer :metadata
+
+ # Create dataset with optional URI
+ def initialize(uri=nil)
+ super uri
+ @features = {}
@compounds = []
-
+ @data_entries = {}
+ end
+
+ # Create and save an empty dataset (assigns URI to dataset)
+ def self.create(uri=CONFIG[:services]["opentox-dataset"])
+ dataset = Dataset.new
+ dataset.uri = RestClientWrapper.post(uri,{}).to_s.chomp
+ dataset
+ end
+
+ # Get all datasets from a service
+# def self.all(uri=CONFIG[:services]["opentox-dataset"])
+# RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.each_line.collect{|u| Dataset.new(u)}
+# end
+
+ # Create a dataset from YAML string
+ def self.from_yaml(yaml)
+ dataset = Dataset.create
+ dataset.copy YAML.load(yaml)
+ dataset
+ end
+
+ # Create dataset from CSV string (format specification: http://toxcreate.org/help)
+ # - loads data_entries, compounds, features
+ # - sets metadata (warnings) for parser errors
+ # - you will have to set remaining metadata manually
+ def self.from_csv(csv)
+ dataset = Dataset.create
+ Parser::Spreadsheet.new(dataset).load_csv(csv)
+ dataset
+ end
+
+ # Create dataset from Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help))
+ # - loads data_entries, compounds, features
+ # - sets metadata (warnings) for parser errors
+ # - you will have to set remaining metadata manually
+ def self.from_spreadsheet(book)
+ dataset = Dataset.create
+ Parser::Spreadsheet.new(dataset).load_excel(book)
+ dataset
+ end
+
+ # Load and return metadata of a Dataset object
+ def load_metadata
+ #if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host))
+ #add_metadata YAML.load(RestClientWrapper.get(File.join(@uri,"metadata"), :accept => "application/x-yaml"))
+ #else
+ add_metadata Parser::Owl::Dataset.new(@uri).metadata
+ #end
+ self.uri = @uri if @uri # keep uri
+ @metadata
+ end
+
+ # Load all data (metadata, data_entries, compounds and features) from URI
+ def load_all
+ if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host))
+ copy YAML.load(RestClientWrapper.get(@uri, :accept => "application/x-yaml"))
+ else
+ parser = Parser::Owl::Dataset.new(@uri)
+ copy parser.load_uri
+ end
+ end
+
+ # Load and return all compound URIs
+ def load_compounds
+ RestClientWrapper.get(File.join(uri,"compounds"),:accept=> "text/uri-list").to_s.each_line do |compound_uri|
+ @compounds << compound_uri.chomp
+ end
+ @compounds.uniq!
+ end
+
+ # Load all feature URIs
+ def load_features
+ RestClientWrapper.get(File.join(uri,"features"),:accept=> "text/uri-list").to_s.each_line do |feature_uri|
+ @features[feature_uri.chomp] = Feature.new(feature_uri.chomp).load_metadata
+ end
+ @features
+ end
+
+ # Get YAML representation
+ def yaml
+ self.to_yaml
+ end
+
+ # Get Excel representation, returns a Spreadsheet::Workbook which can be written with the 'spreadsheet' gem (data_entries only, metadata will )
+ def excel
+ Serializer::Spreadsheets.new(self).excel
+ end
+
+ # Get CSV string representation (data_entries only, metadata will be discarded)
+ def csv
+ Serializer::Spreadsheets.new(self).csv
+ end
+
+ # Get OWL-DL in ntriples format
+ def ntriples
+ s = Serializer::Owl.new
+ s.add_dataset(self)
+ s.ntriples
+ end
+
+ # Get OWL-DL in RDF/XML format
+ def rdfxml
+ s = Serializer::Owl.new
+ s.add_dataset(self)
+ s.rdfxml
+ end
+
+ # Insert a statement (compound_uri,feature_uri,value)
+ def add (compound,feature,value)
+ @compounds << compound unless @compounds.include? compound
+ @features[feature] = {} unless @features[feature]
+ @data_entries[compound] = {} unless @data_entries[compound]
+ @data_entries[compound][feature] = [] unless @data_entries[compound][feature]
+ @data_entries[compound][feature] << value
+ end
+
+ # Add metadata (hash with predicate_uri => value)
+ def add_metadata(metadata)
+ metadata.each { |k,v| @metadata[k] = v }
+ end
+
+ # Copy a dataset (rewrites URI)
+ def copy(dataset)
+ @metadata = dataset.metadata
+ @data_entries = dataset.data_entries
+ @compounds = dataset.compounds
+ @features = dataset.features
+ if @uri
+ self.uri = @uri
+ else
+ @uri = dataset.metadata[XSD.anyUri]
+ end
+ end
+
+ # save dataset (overwrites existing dataset)
+ def save
+ # TODO: rewrite feature URI's ??
+ # create dataset if uri empty
+ @compounds.uniq!
+ RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml)
+ end
+
+ # Delete dataset at the dataset service
+ def delete
+ RestClientWrapper.delete @uri
+ end
+ end
+end
+
+ #########################################################
+ # kept for backward compatibility, may have to be fixed #
+ #########################################################
+
+=begin
+ def from_owl(owl)
# creates dataset object from Opentox::Owl object
# use Dataset.find( <uri> ) to load dataset from rdf-supporting datasetservice
# note: does not load all feature values, as this is time consuming
- if owl
- raise "invalid param" unless owl.is_a?(OpenTox::Owl)
- @title = owl.get("title")
- @creator = owl.get("creator")
- @uri = owl.uri
- # when loading a dataset from owl, only compound- and feature-uris are loaded
- owl.load_dataset(@compounds, @features)
- # all features are marked as dirty
- # as soon as a feature-value is requested all values for this feature are loaded from the rdf
- @dirty_features = @features.dclone
- @owl = owl
- end
+ raise "invalid param" unless owl.is_a?(OpenTox::Owl)
+ @metadata[DC.title] = owl.get("title")
+ @metadata[DC.creator] = owl.get("creator")
+ @metadata[XSD.anyUri] = owl.uri
+ # when loading a dataset from owl, only compound- and feature-uris are loaded
+ owl.load_dataset(@compounds, @features)
+ # all features are marked as dirty
+ # as soon as a feature-value is requested all values for this feature are loaded from the rdf
+ @dirty_features = @features.dclone
+ @owl = owl
end
def self.find(uri, accept_header=nil)
unless accept_header
- if (@@config[:yaml_hosts].include?(URI.parse(uri).host))
+ if (CONFIG[:yaml_hosts].include?(URI.parse(uri).host))
accept_header = 'application/x-yaml'
else
accept_header = "application/rdf+xml"
@@ -38,8 +256,10 @@ module OpenTox
case accept_header
when "application/x-yaml"
+ LOGGER.debug "DATASET: "+ uri
+ LOGGER.debug RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s
d = YAML.load RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s
- d.uri = uri unless d.uri
+ #d.uri = @metadata[XSD.anyUri] unless d.uri
when "application/rdf+xml"
owl = OpenTox::Owl.from_uri(uri.to_s.strip, "Dataset")
d = Dataset.new(owl)
@@ -48,7 +268,7 @@ module OpenTox
end
d
end
-
+
# converts a dataset represented in owl to yaml
# (uses a temporary dataset)
# note: to_yaml is overwritten, loads complete owl dataset values
@@ -108,7 +328,7 @@ module OpenTox
raise "predicted class value is an array\n"+
"value "+v.to_s+"\n"+
"value-class "+v.class.to_s+"\n"+
- "dataset "+@uri.to_s+"\n"+
+ "dataset "+self.uri.to_s+"\n"+
"compound "+compound.to_s+"\n"+
"feature "+feature.to_s+"\n"
else
@@ -130,7 +350,7 @@ module OpenTox
raise "predicted regression value is an array\n"+
"value "+v.to_s+"\n"+
"value-class "+v.class.to_s+"\n"+
- "dataset "+@uri.to_s+"\n"+
+ "dataset "+self.uri.to_s+"\n"+
"compound "+compound.to_s+"\n"+
"feature "+feature.to_s+"\n"
else
@@ -181,7 +401,7 @@ module OpenTox
raise "value is not an array\n"+
"value "+v.to_s+"\n"+
"value-class "+v.class.to_s+"\n"+
- "dataset "+@uri.to_s+"\n"+
+ "dataset "+self.uri.to_s+"\n"+
"compound "+compound.to_s+"\n"+
"feature "+feature.to_s+"\n"
end
@@ -216,11 +436,6 @@ module OpenTox
super - ["@owl"]
end
- # saves (changes) as new dataset in dataset service
- # returns uri
- # uses to yaml method (which is overwritten)
- def save
- OpenTox::RestClientWrapper.post(@@config[:services]["opentox-dataset"],{:content_type => "application/x-yaml"},self.to_yaml).strip
- end
end
end
+=end