From 91c95f8dc8f60a8f0029b970ef881eecee28401b Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 11 Nov 2010 10:42:48 +0100 Subject: Documentation and API fixes for serializer and parser --- lib/dataset.rb | 10 +++++----- lib/opentox.rb | 2 +- lib/parser.rb | 45 +++++++++++++++++++++++++++++++++++++-------- lib/serializer.rb | 37 +++++++++++++++++++++++++++++++++---- 4 files changed, 76 insertions(+), 18 deletions(-) diff --git a/lib/dataset.rb b/lib/dataset.rb index 05b2ed3..6e270e9 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -77,7 +77,7 @@ module OpenTox parser.load_csv(csv) end - # Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help)) + # Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help) # - loads data_entries, compounds, features # - sets metadata (warnings) for parser errors # - you will have to set remaining metadata manually @@ -87,13 +87,13 @@ module OpenTox save unless @uri # get a uri for creating features parser = Parser::Spreadsheets.new parser.dataset = self - parser.load_excel(book) + parser.load_spreadsheet(book) end # Load and return only metadata of a Dataset object # @return [Hash] Metadata of the dataset def load_metadata - add_metadata Parser::Owl::Dataset.new(@uri).metadata + add_metadata Parser::Owl::Dataset.new(@uri).load_metadata self.uri = @uri if @uri # keep uri @metadata end @@ -147,8 +147,8 @@ module OpenTox # Get Excel representation # @return [Spreadsheet::Workbook] Workbook which can be written with the spreadsheet gem (data_entries only, metadata will will be discarded)) - def to_xls - Serializer::Spreadsheets.new(self).to_xls + def to_spreadsheet + Serializer::Spreadsheets.new(self).to_spreadsheet end # Get CSV string representation (data_entries only, metadata will be discarded) diff --git a/lib/opentox.rb b/lib/opentox.rb index 7e1deec..3b7fa65 100644 --- a/lib/opentox.rb +++ b/lib/opentox.rb @@ -26,7 +26,7 @@ module OpenTox # Load (and return) metadata from object URI # @return [Hash] Metadata def load_metadata - @metadata = Parser::Owl::Generic.new(@uri).metadata + @metadata = Parser::Owl::Generic.new(@uri).load_metadata @metadata end diff --git a/lib/parser.rb b/lib/parser.rb index 8c173f9..4d8e729 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -3,6 +3,8 @@ require 'roo' class String + # Split RDF statement into triples + # @return [Array] Array with [subject,predicate,object] def to_triple self.chomp.split(' ',3).collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')} end @@ -11,16 +13,23 @@ end module OpenTox + # Parser for various input formats module Parser + # OWL-DL parser module Owl + # Create a new OWL-DL parser + # @param uri URI of OpenTox object + # @return [OpenTox::Parser::Owl] OWL-DL parser def initialize(uri) @uri = uri @metadata = {} end - def metadata + # Read metadata from opentox service + # @return [Hash] Object metadata + def load_metadata if @dataset uri = File.join(@uri,"metadata") @@ -47,21 +56,37 @@ module OpenTox @metadata end + # Generic parser for all OpenTox classes class Generic include Owl end + # OWL-DL parser for datasets class Dataset include Owl attr_writer :uri + # Create a new OWL-DL dataset parser + # @param uri Dataset URI + # @return [OpenTox::Parser::Owl::Dataset] OWL-DL parser def initialize(uri) super uri @dataset = ::OpenTox::Dataset.new(@uri) end + # Read data from dataset service. Files can be parsed by setting #uri to a filename (after initialization with a real URI) + # @example Read data from an external service + # parser = OpenTox::Parser::Owl::Dataaset.new "http://wwbservices.in-silico.ch/dataset/1" + # dataset = parser.load_uri + # @example Create dataset from RDF/XML file + # dataset = OpenTox::Dataset.create + # parser = OpenTox::Parser::Owl::Dataaset.new dataset.uri + # parser.uri = "dataset.rdfxml" # insert your input file + # dataset = parser.load_uri + # dataset.save + # @return [Hash] Internal dataset representation def load_uri data = {} feature_values = {} @@ -95,6 +120,8 @@ module OpenTox @dataset end + # Read only features from a dataset service. + # @return [Hash] Internal features representation def load_features uri = File.join(@uri,"features") statements = [] @@ -117,16 +144,12 @@ module OpenTox end + # Parser for getting spreadsheet data into a dataset class Spreadsheets - # TODO: expand for multiple columns attr_accessor :dataset - def initialize - - # TODO: fix 2 datasets created - #@dataset = Dataset.create - #@dataset.save # get uri + def initialize @data = [] @features = [] @feature_types = {} @@ -137,7 +160,10 @@ module OpenTox @duplicates = {} end - def load_excel(book) + # Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help) + # @param [Excel] book Excel workbook object (created with roo gem) + # @return [OpenTox::Dataset] Dataset object with Excel data + def load_spreadsheet(book) book.default_sheet = 0 add_features book.row(1) 2.upto(book.last_row) { |i| add_values book.row(i) } @@ -145,6 +171,9 @@ module OpenTox @dataset end + # Load CSV string (format specification: http://toxcreate.org/help) + # @param [String] csv CSV representation of the dataset + # @return [OpenTox::Dataset] Dataset object with CSV data def load_csv(csv) row = 0 input = csv.split("\n") diff --git a/lib/serializer.rb b/lib/serializer.rb index 3a9cb60..31aa0d1 100644 --- a/lib/serializer.rb +++ b/lib/serializer.rb @@ -3,9 +3,10 @@ require 'yajl' module OpenTox + # Serialzer for various oputput formats module Serializer - # modelled according to to http://n2.talis.com/wiki/RDF_JSON_Specification + # OWL-DL Serializer, modelled according to to http://n2.talis.com/wiki/RDF_JSON_Specification class Owl attr_accessor :object @@ -60,15 +61,21 @@ module OpenTox @objects = Set.new end + # Add a compound + # @param [String] uri Compound URI def add_compound(uri) @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Compound }] } end + # Add a feature + # @param [String] uri Feature URI def add_feature(uri,metadata) @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }] } add_metadata uri, metadata end + # Add a dataset + # @param [String] uri Dataset URI def add_dataset(dataset) @dataset = dataset.uri @@ -89,6 +96,8 @@ module OpenTox end + # Add a algorithm + # @param [String] uri Algorithm URI def add_algorithm(uri,metadata) @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] } LOGGER.debug @object[uri] @@ -96,12 +105,16 @@ module OpenTox LOGGER.debug @object[uri] end + # Add a model + # @param [String] uri Model URI def add_model(uri,metadata,parameters) @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Model }] } add_metadata uri, metadata add_parameters uri, parameters end + # Add metadata + # @param [Hash] metadata def add_metadata(uri,metadata) id = 0 metadata.each do |u,v| @@ -123,6 +136,10 @@ module OpenTox end end + # Add a data entry + # @param [String] compound Compound URI + # @param [String] feature Feature URI + # @param [Boolead,Float] value Feature value def add_data_entry(compound,feature,value) add_compound(compound) unless @object[compound] add_feature(feature,{}) unless @object[feature] @@ -158,11 +175,11 @@ module OpenTox # Serializers + # Convert to N-Triples + # @return [text/plain] Object OWL-DL in N-Triples format def to_ntriples - #rdf_types @triples = Set.new - #LOGGER.debug @object.to_yaml @object.each do |s,entry| s = url(s) if type(s) == "uri" entry.each do |p,objects| @@ -183,11 +200,16 @@ module OpenTox @triples.sort.collect{ |s| s.join(' ').concat(" .") }.join("\n")+"\n" end + # Convert to RDF/XML + # @return [text/plain] Object OWL-DL in RDF/XML format def to_rdfxml Tempfile.open("owl-serializer"){|f| f.write(self.to_ntriples); @path = f.path} `rapper -i ntriples -o rdfxml #{@path} 2>/dev/null` end + # Convert to JSON as specified in http://n2.talis.com/wiki/RDF_JSON_Specification + # (Ambit services use a different JSON representation) + # @return [text/plain] Object OWL-DL in JSON format def to_json #rdf_types Yajl::Encoder.encode(@object) @@ -250,8 +272,11 @@ module OpenTox end + # Serializer for spreadsheet formats class Spreadsheets # to avoid nameclash with Spreadsheet gem + # Create a new spreadsheet serializer + # @param [OpenTox::Dataset] dataset Dataset object def initialize(dataset) @rows = [] @rows << ["SMILES"] @@ -272,11 +297,15 @@ module OpenTox end end + # Convert to CSV string + # @return [String] CSV string def to_csv @rows.collect{|r| r.join(", ")}.join("\n") end - def to_xls + # Convert to spreadsheet workbook + # @return [Spreadsheet::Workbook] Workbook object (use the spreadsheet gemc to write a file) + def to_spreadsheet Spreadsheet.client_encoding = 'UTF-8' book = Spreadsheet::Workbook.new sheet = book.create_worksheet(:name => '') -- cgit v1.2.3