summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2010-11-11 10:42:48 +0100
committerChristoph Helma <helma@in-silico.ch>2010-11-11 10:42:48 +0100
commit91c95f8dc8f60a8f0029b970ef881eecee28401b (patch)
tree405b4da936104c12a1938b25aa6766b5a526b94e /lib
parentb93002b4ea50ff7e357da08abd10577347ce2d5f (diff)
Documentation and API fixes for serializer and parser
Diffstat (limited to 'lib')
-rw-r--r--lib/dataset.rb10
-rw-r--r--lib/opentox.rb2
-rw-r--r--lib/parser.rb45
-rw-r--r--lib/serializer.rb37
4 files changed, 76 insertions, 18 deletions
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 05b2ed3..6e270e9 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -77,7 +77,7 @@ module OpenTox
parser.load_csv(csv)
end
- # Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help))
+ # Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help)
# - loads data_entries, compounds, features
# - sets metadata (warnings) for parser errors
# - you will have to set remaining metadata manually
@@ -87,13 +87,13 @@ module OpenTox
save unless @uri # get a uri for creating features
parser = Parser::Spreadsheets.new
parser.dataset = self
- parser.load_excel(book)
+ parser.load_spreadsheet(book)
end
# Load and return only metadata of a Dataset object
# @return [Hash] Metadata of the dataset
def load_metadata
- add_metadata Parser::Owl::Dataset.new(@uri).metadata
+ add_metadata Parser::Owl::Dataset.new(@uri).load_metadata
self.uri = @uri if @uri # keep uri
@metadata
end
@@ -147,8 +147,8 @@ module OpenTox
# Get Excel representation
# @return [Spreadsheet::Workbook] Workbook which can be written with the spreadsheet gem (data_entries only, metadata will will be discarded))
- def to_xls
- Serializer::Spreadsheets.new(self).to_xls
+ def to_spreadsheet
+ Serializer::Spreadsheets.new(self).to_spreadsheet
end
# Get CSV string representation (data_entries only, metadata will be discarded)
diff --git a/lib/opentox.rb b/lib/opentox.rb
index 7e1deec..3b7fa65 100644
--- a/lib/opentox.rb
+++ b/lib/opentox.rb
@@ -26,7 +26,7 @@ module OpenTox
# Load (and return) metadata from object URI
# @return [Hash] Metadata
def load_metadata
- @metadata = Parser::Owl::Generic.new(@uri).metadata
+ @metadata = Parser::Owl::Generic.new(@uri).load_metadata
@metadata
end
diff --git a/lib/parser.rb b/lib/parser.rb
index 8c173f9..4d8e729 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -3,6 +3,8 @@ require 'roo'
class String
+ # Split RDF statement into triples
+ # @return [Array] Array with [subject,predicate,object]
def to_triple
self.chomp.split(' ',3).collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')}
end
@@ -11,16 +13,23 @@ end
module OpenTox
+ # Parser for various input formats
module Parser
+ # OWL-DL parser
module Owl
+ # Create a new OWL-DL parser
+ # @param uri URI of OpenTox object
+ # @return [OpenTox::Parser::Owl] OWL-DL parser
def initialize(uri)
@uri = uri
@metadata = {}
end
- def metadata
+ # Read metadata from opentox service
+ # @return [Hash] Object metadata
+ def load_metadata
if @dataset
uri = File.join(@uri,"metadata")
@@ -47,21 +56,37 @@ module OpenTox
@metadata
end
+ # Generic parser for all OpenTox classes
class Generic
include Owl
end
+ # OWL-DL parser for datasets
class Dataset
include Owl
attr_writer :uri
+ # Create a new OWL-DL dataset parser
+ # @param uri Dataset URI
+ # @return [OpenTox::Parser::Owl::Dataset] OWL-DL parser
def initialize(uri)
super uri
@dataset = ::OpenTox::Dataset.new(@uri)
end
+ # Read data from dataset service. Files can be parsed by setting #uri to a filename (after initialization with a real URI)
+ # @example Read data from an external service
+ # parser = OpenTox::Parser::Owl::Dataaset.new "http://wwbservices.in-silico.ch/dataset/1"
+ # dataset = parser.load_uri
+ # @example Create dataset from RDF/XML file
+ # dataset = OpenTox::Dataset.create
+ # parser = OpenTox::Parser::Owl::Dataaset.new dataset.uri
+ # parser.uri = "dataset.rdfxml" # insert your input file
+ # dataset = parser.load_uri
+ # dataset.save
+ # @return [Hash] Internal dataset representation
def load_uri
data = {}
feature_values = {}
@@ -95,6 +120,8 @@ module OpenTox
@dataset
end
+ # Read only features from a dataset service.
+ # @return [Hash] Internal features representation
def load_features
uri = File.join(@uri,"features")
statements = []
@@ -117,16 +144,12 @@ module OpenTox
end
+ # Parser for getting spreadsheet data into a dataset
class Spreadsheets
- # TODO: expand for multiple columns
attr_accessor :dataset
- def initialize
-
- # TODO: fix 2 datasets created
- #@dataset = Dataset.create
- #@dataset.save # get uri
+ def initialize
@data = []
@features = []
@feature_types = {}
@@ -137,7 +160,10 @@ module OpenTox
@duplicates = {}
end
- def load_excel(book)
+ # Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help)
+ # @param [Excel] book Excel workbook object (created with roo gem)
+ # @return [OpenTox::Dataset] Dataset object with Excel data
+ def load_spreadsheet(book)
book.default_sheet = 0
add_features book.row(1)
2.upto(book.last_row) { |i| add_values book.row(i) }
@@ -145,6 +171,9 @@ module OpenTox
@dataset
end
+ # Load CSV string (format specification: http://toxcreate.org/help)
+ # @param [String] csv CSV representation of the dataset
+ # @return [OpenTox::Dataset] Dataset object with CSV data
def load_csv(csv)
row = 0
input = csv.split("\n")
diff --git a/lib/serializer.rb b/lib/serializer.rb
index 3a9cb60..31aa0d1 100644
--- a/lib/serializer.rb
+++ b/lib/serializer.rb
@@ -3,9 +3,10 @@ require 'yajl'
module OpenTox
+ # Serialzer for various oputput formats
module Serializer
- # modelled according to to http://n2.talis.com/wiki/RDF_JSON_Specification
+ # OWL-DL Serializer, modelled according to to http://n2.talis.com/wiki/RDF_JSON_Specification
class Owl
attr_accessor :object
@@ -60,15 +61,21 @@ module OpenTox
@objects = Set.new
end
+ # Add a compound
+ # @param [String] uri Compound URI
def add_compound(uri)
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Compound }] }
end
+ # Add a feature
+ # @param [String] uri Feature URI
def add_feature(uri,metadata)
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }] }
add_metadata uri, metadata
end
+ # Add a dataset
+ # @param [String] uri Dataset URI
def add_dataset(dataset)
@dataset = dataset.uri
@@ -89,6 +96,8 @@ module OpenTox
end
+ # Add a algorithm
+ # @param [String] uri Algorithm URI
def add_algorithm(uri,metadata)
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] }
LOGGER.debug @object[uri]
@@ -96,12 +105,16 @@ module OpenTox
LOGGER.debug @object[uri]
end
+ # Add a model
+ # @param [String] uri Model URI
def add_model(uri,metadata,parameters)
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Model }] }
add_metadata uri, metadata
add_parameters uri, parameters
end
+ # Add metadata
+ # @param [Hash] metadata
def add_metadata(uri,metadata)
id = 0
metadata.each do |u,v|
@@ -123,6 +136,10 @@ module OpenTox
end
end
+ # Add a data entry
+ # @param [String] compound Compound URI
+ # @param [String] feature Feature URI
+ # @param [Boolead,Float] value Feature value
def add_data_entry(compound,feature,value)
add_compound(compound) unless @object[compound]
add_feature(feature,{}) unless @object[feature]
@@ -158,11 +175,11 @@ module OpenTox
# Serializers
+ # Convert to N-Triples
+ # @return [text/plain] Object OWL-DL in N-Triples format
def to_ntriples
- #rdf_types
@triples = Set.new
- #LOGGER.debug @object.to_yaml
@object.each do |s,entry|
s = url(s) if type(s) == "uri"
entry.each do |p,objects|
@@ -183,11 +200,16 @@ module OpenTox
@triples.sort.collect{ |s| s.join(' ').concat(" .") }.join("\n")+"\n"
end
+ # Convert to RDF/XML
+ # @return [text/plain] Object OWL-DL in RDF/XML format
def to_rdfxml
Tempfile.open("owl-serializer"){|f| f.write(self.to_ntriples); @path = f.path}
`rapper -i ntriples -o rdfxml #{@path} 2>/dev/null`
end
+ # Convert to JSON as specified in http://n2.talis.com/wiki/RDF_JSON_Specification
+ # (Ambit services use a different JSON representation)
+ # @return [text/plain] Object OWL-DL in JSON format
def to_json
#rdf_types
Yajl::Encoder.encode(@object)
@@ -250,8 +272,11 @@ module OpenTox
end
+ # Serializer for spreadsheet formats
class Spreadsheets # to avoid nameclash with Spreadsheet gem
+ # Create a new spreadsheet serializer
+ # @param [OpenTox::Dataset] dataset Dataset object
def initialize(dataset)
@rows = []
@rows << ["SMILES"]
@@ -272,11 +297,15 @@ module OpenTox
end
end
+ # Convert to CSV string
+ # @return [String] CSV string
def to_csv
@rows.collect{|r| r.join(", ")}.join("\n")
end
- def to_xls
+ # Convert to spreadsheet workbook
+ # @return [Spreadsheet::Workbook] Workbook object (use the spreadsheet gemc to write a file)
+ def to_spreadsheet
Spreadsheet.client_encoding = 'UTF-8'
book = Spreadsheet::Workbook.new
sheet = book.create_worksheet(:name => '')