From d6811507c1c1339cc4fe7cdb429b9b34b97dc422 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 22 Oct 2010 17:45:19 +0200 Subject: new API with support for external services (initial version) --- lib/dataset.rb | 273 +++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 244 insertions(+), 29 deletions(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index 2eb2206..7c8ce24 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -1,35 +1,253 @@ module OpenTox + # Ruby wrapper for OpenTox Dataset Webservices (http://opentox.org/dev/apis/api-1.2/dataset). + # + # Examples: + # require "opentox-ruby-api-wrapper" + # + # # Creating datasets + # + # # create an empty dataset + # dataset = OpenTox::Dataset.new + # # create an empty dataset with URI + # # this does not load data from the dataset service - use one of the load_* methods + # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1") + # # create new dataset and sav it to obtain a URI + # dataset = OpenTox::Dataset.create + # # create a new dataset from yaml representation + # dataset = OpenTox::Dataset.from_yaml + # # create a new dataset from CSV string + # csv_string = "SMILES, Toxicity\nc1ccccc1N, true" + # dataset = OpenTox::Dataset.from_csv(csv_string) + # + # # Loading data + # # Datasets created with OpenTox::Dataset.new(uri) are empty by default + # # Invoking one of the following functions will load data into the object + # + # # create an empty dataset with URI + # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1") + # # loads (and returns) only metadata + # dataset.load_metadata + # # loads (and returns) only compounds + # dataset.load_compounds + # # loads (and returns) only features + # dataset.load_features + # # load all data from URI + # dataset.load_all + # + # # Getting dataset representations + # + # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1") + # dataset.load_all + # # OWL-DL (RDF/XML) + # dataset.rdfxml + # # OWL-DL (Ntriples) + # dataset.ntriples + # # YAML + # dataset.yaml + # # CSV + # dataset.csv + # + # # Modifying datasets + # + # # insert a statement (compound_uri,feature_uri,value) + # dataset.add "http://webservices.in-silico.ch/compound/InChI=1S/C6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9", "http://webservices.in-silico.ch/dataset/1/feature/hamster_carcinogenicity", true + # + # + # # Saving datasets + # # save dataset at dataset service + # dataset.save + # + # # Deleting datasets + # # delete dataset (also at dataset service) + # dataset.delete class Dataset - attr_accessor :uri, :title, :creator, :data, :features, :compounds + include OtObject - def initialize( owl=nil ) - @data = {} - @features = [] + attr_reader :features, :compounds, :data_entries, :metadata + attr_writer :metadata + + # Create dataset with optional URI + def initialize(uri=nil) + super uri + @features = {} @compounds = [] - + @data_entries = {} + end + + # Create and save an empty dataset (assigns URI to dataset) + def self.create(uri=CONFIG[:services]["opentox-dataset"]) + dataset = Dataset.new + dataset.uri = RestClientWrapper.post(uri,{}).to_s.chomp + dataset + end + + # Get all datasets from a service +# def self.all(uri=CONFIG[:services]["opentox-dataset"]) +# RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.each_line.collect{|u| Dataset.new(u)} +# end + + # Create a dataset from YAML string + def self.from_yaml(yaml) + dataset = Dataset.create + dataset.copy YAML.load(yaml) + dataset + end + + # Create dataset from CSV string (format specification: http://toxcreate.org/help) + # - loads data_entries, compounds, features + # - sets metadata (warnings) for parser errors + # - you will have to set remaining metadata manually + def self.from_csv(csv) + dataset = Dataset.create + Parser::Spreadsheet.new(dataset).load_csv(csv) + dataset + end + + # Create dataset from Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help)) + # - loads data_entries, compounds, features + # - sets metadata (warnings) for parser errors + # - you will have to set remaining metadata manually + def self.from_spreadsheet(book) + dataset = Dataset.create + Parser::Spreadsheet.new(dataset).load_excel(book) + dataset + end + + # Load and return metadata of a Dataset object + def load_metadata + #if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) + #add_metadata YAML.load(RestClientWrapper.get(File.join(@uri,"metadata"), :accept => "application/x-yaml")) + #else + add_metadata Parser::Owl::Dataset.new(@uri).metadata + #end + self.uri = @uri if @uri # keep uri + @metadata + end + + # Load all data (metadata, data_entries, compounds and features) from URI + def load_all + if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) + copy YAML.load(RestClientWrapper.get(@uri, :accept => "application/x-yaml")) + else + parser = Parser::Owl::Dataset.new(@uri) + copy parser.load_uri + end + end + + # Load and return all compound URIs + def load_compounds + RestClientWrapper.get(File.join(uri,"compounds"),:accept=> "text/uri-list").to_s.each_line do |compound_uri| + @compounds << compound_uri.chomp + end + @compounds.uniq! + end + + # Load all feature URIs + def load_features + RestClientWrapper.get(File.join(uri,"features"),:accept=> "text/uri-list").to_s.each_line do |feature_uri| + @features[feature_uri.chomp] = Feature.new(feature_uri.chomp).load_metadata + end + @features + end + + # Get YAML representation + def yaml + self.to_yaml + end + + # Get Excel representation, returns a Spreadsheet::Workbook which can be written with the 'spreadsheet' gem (data_entries only, metadata will ) + def excel + Serializer::Spreadsheets.new(self).excel + end + + # Get CSV string representation (data_entries only, metadata will be discarded) + def csv + Serializer::Spreadsheets.new(self).csv + end + + # Get OWL-DL in ntriples format + def ntriples + s = Serializer::Owl.new + s.add_dataset(self) + s.ntriples + end + + # Get OWL-DL in RDF/XML format + def rdfxml + s = Serializer::Owl.new + s.add_dataset(self) + s.rdfxml + end + + # Insert a statement (compound_uri,feature_uri,value) + def add (compound,feature,value) + @compounds << compound unless @compounds.include? compound + @features[feature] = {} unless @features[feature] + @data_entries[compound] = {} unless @data_entries[compound] + @data_entries[compound][feature] = [] unless @data_entries[compound][feature] + @data_entries[compound][feature] << value + end + + # Add metadata (hash with predicate_uri => value) + def add_metadata(metadata) + metadata.each { |k,v| @metadata[k] = v } + end + + # Copy a dataset (rewrites URI) + def copy(dataset) + @metadata = dataset.metadata + @data_entries = dataset.data_entries + @compounds = dataset.compounds + @features = dataset.features + if @uri + self.uri = @uri + else + @uri = dataset.metadata[XSD.anyUri] + end + end + + # save dataset (overwrites existing dataset) + def save + # TODO: rewrite feature URI's ?? + # create dataset if uri empty + @compounds.uniq! + RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml) + end + + # Delete dataset at the dataset service + def delete + RestClientWrapper.delete @uri + end + end +end + + ######################################################### + # kept for backward compatibility, may have to be fixed # + ######################################################### + +=begin + def from_owl(owl) # creates dataset object from Opentox::Owl object # use Dataset.find( ) to load dataset from rdf-supporting datasetservice # note: does not load all feature values, as this is time consuming - if owl - raise "invalid param" unless owl.is_a?(OpenTox::Owl) - @title = owl.get("title") - @creator = owl.get("creator") - @uri = owl.uri - # when loading a dataset from owl, only compound- and feature-uris are loaded - owl.load_dataset(@compounds, @features) - # all features are marked as dirty - # as soon as a feature-value is requested all values for this feature are loaded from the rdf - @dirty_features = @features.dclone - @owl = owl - end + raise "invalid param" unless owl.is_a?(OpenTox::Owl) + @metadata[DC.title] = owl.get("title") + @metadata[DC.creator] = owl.get("creator") + @metadata[XSD.anyUri] = owl.uri + # when loading a dataset from owl, only compound- and feature-uris are loaded + owl.load_dataset(@compounds, @features) + # all features are marked as dirty + # as soon as a feature-value is requested all values for this feature are loaded from the rdf + @dirty_features = @features.dclone + @owl = owl end def self.find(uri, accept_header=nil) unless accept_header - if (@@config[:yaml_hosts].include?(URI.parse(uri).host)) + if (CONFIG[:yaml_hosts].include?(URI.parse(uri).host)) accept_header = 'application/x-yaml' else accept_header = "application/rdf+xml" @@ -38,8 +256,10 @@ module OpenTox case accept_header when "application/x-yaml" + LOGGER.debug "DATASET: "+ uri + LOGGER.debug RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s d = YAML.load RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s - d.uri = uri unless d.uri + #d.uri = @metadata[XSD.anyUri] unless d.uri when "application/rdf+xml" owl = OpenTox::Owl.from_uri(uri.to_s.strip, "Dataset") d = Dataset.new(owl) @@ -48,7 +268,7 @@ module OpenTox end d end - + # converts a dataset represented in owl to yaml # (uses a temporary dataset) # note: to_yaml is overwritten, loads complete owl dataset values @@ -108,7 +328,7 @@ module OpenTox raise "predicted class value is an array\n"+ "value "+v.to_s+"\n"+ "value-class "+v.class.to_s+"\n"+ - "dataset "+@uri.to_s+"\n"+ + "dataset "+self.uri.to_s+"\n"+ "compound "+compound.to_s+"\n"+ "feature "+feature.to_s+"\n" else @@ -130,7 +350,7 @@ module OpenTox raise "predicted regression value is an array\n"+ "value "+v.to_s+"\n"+ "value-class "+v.class.to_s+"\n"+ - "dataset "+@uri.to_s+"\n"+ + "dataset "+self.uri.to_s+"\n"+ "compound "+compound.to_s+"\n"+ "feature "+feature.to_s+"\n" else @@ -181,7 +401,7 @@ module OpenTox raise "value is not an array\n"+ "value "+v.to_s+"\n"+ "value-class "+v.class.to_s+"\n"+ - "dataset "+@uri.to_s+"\n"+ + "dataset "+self.uri.to_s+"\n"+ "compound "+compound.to_s+"\n"+ "feature "+feature.to_s+"\n" end @@ -216,11 +436,6 @@ module OpenTox super - ["@owl"] end - # saves (changes) as new dataset in dataset service - # returns uri - # uses to yaml method (which is overwritten) - def save - OpenTox::RestClientWrapper.post(@@config[:services]["opentox-dataset"],{:content_type => "application/x-yaml"},self.to_yaml).strip - end end end +=end -- cgit v1.2.3 From b93002b4ea50ff7e357da08abd10577347ce2d5f Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 11 Nov 2010 09:31:27 +0100 Subject: first steps towards version 2.0, yard documentation started, passes compound, dataset, feature, algorithm, fminer tests --- lib/dataset.rb | 482 ++++++++++++++++++--------------------------------------- 1 file changed, 148 insertions(+), 334 deletions(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index 7c8ce24..05b2ed3 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -1,74 +1,19 @@ module OpenTox # Ruby wrapper for OpenTox Dataset Webservices (http://opentox.org/dev/apis/api-1.2/dataset). - # - # Examples: - # require "opentox-ruby-api-wrapper" - # - # # Creating datasets - # - # # create an empty dataset - # dataset = OpenTox::Dataset.new - # # create an empty dataset with URI - # # this does not load data from the dataset service - use one of the load_* methods - # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1") - # # create new dataset and sav it to obtain a URI - # dataset = OpenTox::Dataset.create - # # create a new dataset from yaml representation - # dataset = OpenTox::Dataset.from_yaml - # # create a new dataset from CSV string - # csv_string = "SMILES, Toxicity\nc1ccccc1N, true" - # dataset = OpenTox::Dataset.from_csv(csv_string) - # - # # Loading data - # # Datasets created with OpenTox::Dataset.new(uri) are empty by default - # # Invoking one of the following functions will load data into the object - # - # # create an empty dataset with URI - # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1") - # # loads (and returns) only metadata - # dataset.load_metadata - # # loads (and returns) only compounds - # dataset.load_compounds - # # loads (and returns) only features - # dataset.load_features - # # load all data from URI - # dataset.load_all - # - # # Getting dataset representations - # - # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1") - # dataset.load_all - # # OWL-DL (RDF/XML) - # dataset.rdfxml - # # OWL-DL (Ntriples) - # dataset.ntriples - # # YAML - # dataset.yaml - # # CSV - # dataset.csv - # - # # Modifying datasets - # - # # insert a statement (compound_uri,feature_uri,value) - # dataset.add "http://webservices.in-silico.ch/compound/InChI=1S/C6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9", "http://webservices.in-silico.ch/dataset/1/feature/hamster_carcinogenicity", true - # - # - # # Saving datasets - # # save dataset at dataset service - # dataset.save - # - # # Deleting datasets - # # delete dataset (also at dataset service) - # dataset.delete class Dataset - include OtObject + include OpenTox attr_reader :features, :compounds, :data_entries, :metadata - attr_writer :metadata - # Create dataset with optional URI + # Create dataset with optional URI. Does not load data into the dataset - you will need to execute one of the load_* methods to pull data from a service or to insert it from other representations. + # @example Create an empty dataset + # dataset = OpenTox::Dataset.new + # @example Create an empty dataset with URI + # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1") + # @param [optional, String] uri Dataset URI + # @return [OpenTox::Dataset] Dataset object def initialize(uri=nil) super uri @features = {} @@ -76,52 +21,79 @@ module OpenTox @data_entries = {} end - # Create and save an empty dataset (assigns URI to dataset) + # Create an empty dataset and save it at the dataset service (assigns URI to dataset) + # @example Create new dataset and save it to obtain a URI + # dataset = OpenTox::Dataset.create + # @param [optional, String] uri Dataset URI + # @return [OpenTox::Dataset] Dataset object def self.create(uri=CONFIG[:services]["opentox-dataset"]) dataset = Dataset.new - dataset.uri = RestClientWrapper.post(uri,{}).to_s.chomp + dataset.save + dataset + end + + # Find a dataset and load all data. This can be time consuming, use Dataset.new together with one of the load_* methods for a fine grained control over data loading. + # @param [String] uri Dataset URI + # @return [OpenTox::Dataset] Dataset object with all data + def self.find(uri) + dataset = Dataset.new(uri) + dataset.load_all dataset end # Get all datasets from a service -# def self.all(uri=CONFIG[:services]["opentox-dataset"]) -# RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.each_line.collect{|u| Dataset.new(u)} -# end + # @param [optional,String] uri URI of the dataset service, defaults to service specified in configuration + # @return [Array] Array of dataset object with all data + def self.all(uri=CONFIG[:services]["opentox-dataset"]) + RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.each_line.collect{|u| Dataset.new(u)} + end - # Create a dataset from YAML string - def self.from_yaml(yaml) - dataset = Dataset.create - dataset.copy YAML.load(yaml) - dataset + # Load YAML representation into the dataset + # @param [String] yaml YAML representation of the dataset + # @return [OpenTox::Dataset] Dataset object with YAML data + def load_yaml(yaml) + copy YAML.load(yaml) + end + + # Load RDF/XML representation from a file + # @param [String] file File with RDF/XML representation of the dataset + # @return [OpenTox::Dataset] Dataset object with RDF/XML data + def load_rdfxml_file(file) + parser = Parser::Owl::Dataset.new @uri + parser.uri = file.path + copy parser.load_uri end - # Create dataset from CSV string (format specification: http://toxcreate.org/help) + # Load CSV string (format specification: http://toxcreate.org/help) # - loads data_entries, compounds, features # - sets metadata (warnings) for parser errors # - you will have to set remaining metadata manually - def self.from_csv(csv) - dataset = Dataset.create - Parser::Spreadsheet.new(dataset).load_csv(csv) - dataset + # @param [String] csv CSV representation of the dataset + # @return [OpenTox::Dataset] Dataset object with CSV data + def load_csv(csv) + save unless @uri # get a uri for creating features + parser = Parser::Spreadsheets.new + parser.dataset = self + parser.load_csv(csv) end - # Create dataset from Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help)) + # Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help)) # - loads data_entries, compounds, features # - sets metadata (warnings) for parser errors # - you will have to set remaining metadata manually - def self.from_spreadsheet(book) - dataset = Dataset.create - Parser::Spreadsheet.new(dataset).load_excel(book) - dataset + # @param [Excel] book Excel workbook object (created with roo gem) + # @return [OpenTox::Dataset] Dataset object with Excel data + def load_spreadsheet(book) + save unless @uri # get a uri for creating features + parser = Parser::Spreadsheets.new + parser.dataset = self + parser.load_excel(book) end - # Load and return metadata of a Dataset object + # Load and return only metadata of a Dataset object + # @return [Hash] Metadata of the dataset def load_metadata - #if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) - #add_metadata YAML.load(RestClientWrapper.get(File.join(@uri,"metadata"), :accept => "application/x-yaml")) - #else - add_metadata Parser::Owl::Dataset.new(@uri).metadata - #end + add_metadata Parser::Owl::Dataset.new(@uri).metadata self.uri = @uri if @uri # keep uri @metadata end @@ -136,7 +108,8 @@ module OpenTox end end - # Load and return all compound URIs + # Load and return only compound URIs from the dataset service + # @return [Array] Compound URIs in the dataset def load_compounds RestClientWrapper.get(File.join(uri,"compounds"),:accept=> "text/uri-list").to_s.each_line do |compound_uri| @compounds << compound_uri.chomp @@ -144,44 +117,75 @@ module OpenTox @compounds.uniq! end - # Load all feature URIs + # Load and return only features from the dataset service + # @return [Hash] Features of the dataset def load_features - RestClientWrapper.get(File.join(uri,"features"),:accept=> "text/uri-list").to_s.each_line do |feature_uri| - @features[feature_uri.chomp] = Feature.new(feature_uri.chomp).load_metadata - end + parser = Parser::Owl::Dataset.new(@uri) + @features = parser.load_features @features end - # Get YAML representation - def yaml - self.to_yaml + # Detect feature type(s) in the dataset + # @return [String] `classification", "regression", "mixed" or unknown` + def feature_type + feature_types = @features.collect{|f,metadata| metadata[OT.isA]}.uniq + LOGGER.debug "FEATURES" + LOGGER.debug feature_types.inspect + if feature_types.size > 1 + "mixed" + else + case feature_types.first + when /NominalFeature/ + "classification" + when /NumericFeature/ + "regression" + else + "unknown" + end + end end - # Get Excel representation, returns a Spreadsheet::Workbook which can be written with the 'spreadsheet' gem (data_entries only, metadata will ) - def excel - Serializer::Spreadsheets.new(self).excel + # Get Excel representation + # @return [Spreadsheet::Workbook] Workbook which can be written with the spreadsheet gem (data_entries only, metadata will will be discarded)) + def to_xls + Serializer::Spreadsheets.new(self).to_xls end # Get CSV string representation (data_entries only, metadata will be discarded) - def csv - Serializer::Spreadsheets.new(self).csv + # @return [String] CSV representation + def to_csv + Serializer::Spreadsheets.new(self).to_csv end # Get OWL-DL in ntriples format - def ntriples + # @return [String] N-Triples representation + def to_ntriples s = Serializer::Owl.new s.add_dataset(self) - s.ntriples + s.to_ntriples end # Get OWL-DL in RDF/XML format - def rdfxml + # @return [String] RDF/XML representation + def to_rdfxml s = Serializer::Owl.new s.add_dataset(self) - s.rdfxml + s.to_rdfxml + end + + # Get name (DC.title) of a feature + # @param [String] feature Feature URI + # @return [String] Feture title + def feature_name(feature) + @features[feature][DC.title] end # Insert a statement (compound_uri,feature_uri,value) + # @example Insert a statement (compound_uri,feature_uri,value) + # dataset.add "http://webservices.in-silico.ch/compound/InChI=1S/C6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9", "http://webservices.in-silico.ch/dataset/1/feature/hamster_carcinogenicity", true + # @param [String] compound Compound URI + # @param [String] feature Compound URI + # @param [Boolean,Float] value Feature value def add (compound,feature,value) @compounds << compound unless @compounds.include? compound @features[feature] = {} unless @features[feature] @@ -190,252 +194,62 @@ module OpenTox @data_entries[compound][feature] << value end - # Add metadata (hash with predicate_uri => value) + # Add/modify metadata, existing entries will be overwritten + # @example + # dataset.add_metadata({DC.title => "any_title", DC.creator => "my_email"}) + # @param [Hash] metadata Hash mapping predicate_uris to values def add_metadata(metadata) metadata.each { |k,v| @metadata[k] = v } end - # Copy a dataset (rewrites URI) - def copy(dataset) - @metadata = dataset.metadata - @data_entries = dataset.data_entries - @compounds = dataset.compounds - @features = dataset.features - if @uri - self.uri = @uri - else - @uri = dataset.metadata[XSD.anyUri] - end + # Add a feature + # @param [String] feature Feature URI + # @param [Hash] metadata Hash with feature metadata + def add_feature(feature,metadata={}) + @features[feature] = metadata end - # save dataset (overwrites existing dataset) + # Add/modify metadata for a feature + # @param [String] feature Feature URI + # @param [Hash] metadata Hash with feature metadata + def add_feature_metadata(feature,metadata) + metadata.each { |k,v| @features[feature][k] = v } + end + + # Save dataset at the dataset service + # - creates a new dataset if uri is not set + # - overwrites dataset if uri exists + # @return [String] Dataset URI def save # TODO: rewrite feature URI's ?? - # create dataset if uri empty @compounds.uniq! - RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml) + if @uri + RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml) + else + # create dataset if uri is empty + self.uri = RestClientWrapper.post(CONFIG[:services]["opentox-dataset"],{}).to_s.chomp + RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml) + end + @uri end # Delete dataset at the dataset service def delete RestClientWrapper.delete @uri end - end -end - - ######################################################### - # kept for backward compatibility, may have to be fixed # - ######################################################### - -=begin - def from_owl(owl) - # creates dataset object from Opentox::Owl object - # use Dataset.find( ) to load dataset from rdf-supporting datasetservice - # note: does not load all feature values, as this is time consuming - raise "invalid param" unless owl.is_a?(OpenTox::Owl) - @metadata[DC.title] = owl.get("title") - @metadata[DC.creator] = owl.get("creator") - @metadata[XSD.anyUri] = owl.uri - # when loading a dataset from owl, only compound- and feature-uris are loaded - owl.load_dataset(@compounds, @features) - # all features are marked as dirty - # as soon as a feature-value is requested all values for this feature are loaded from the rdf - @dirty_features = @features.dclone - @owl = owl - end - - def self.find(uri, accept_header=nil) - - unless accept_header - if (CONFIG[:yaml_hosts].include?(URI.parse(uri).host)) - accept_header = 'application/x-yaml' - else - accept_header = "application/rdf+xml" - end - end - - case accept_header - when "application/x-yaml" - LOGGER.debug "DATASET: "+ uri - LOGGER.debug RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s - d = YAML.load RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s - #d.uri = @metadata[XSD.anyUri] unless d.uri - when "application/rdf+xml" - owl = OpenTox::Owl.from_uri(uri.to_s.strip, "Dataset") - d = Dataset.new(owl) - else - raise "cannot get datset with accept header: "+accept_header.to_s - end - d - end - # converts a dataset represented in owl to yaml - # (uses a temporary dataset) - # note: to_yaml is overwritten, loads complete owl dataset values - def self.owl_to_yaml( owl_data, uri) - owl = OpenTox::Owl.from_data(owl_data, uri, "Dataset") - d = Dataset.new(owl) - d.to_yaml - end - - # creates a new dataset, using only those compounsd specified in new_compounds - # returns uri of new dataset - def create_new_dataset( new_compounds, new_features, new_title, new_creator ) - - LOGGER.debug "create new dataset with "+new_compounds.size.to_s+"/"+compounds.size.to_s+" compounds" - raise "no new compounds selected" unless new_compounds and new_compounds.size>0 - - # load require features - if ((defined? @dirty_features) && (@dirty_features & new_features).size > 0) - (@dirty_features & new_features).each{|f| load_feature_values(f)} - end - - dataset = OpenTox::Dataset.new - dataset.title = new_title - dataset.creator = new_creator - dataset.features = new_features - dataset.compounds = new_compounds - - # Copy dataset data for compounds and features - # PENDING: why storing feature values in an array? - new_compounds.each do |c| - data_c = [] - raise "no data for compound '"+c.to_s+"'" if @data[c]==nil - @data[c].each do |d| - m = {} - new_features.each do |f| - m[f] = d[f] - end - data_c << m - end - dataset.data[c] = data_c - end - return dataset.save - end - - # returns classification value - def get_predicted_class(compound, feature) - v = get_value(compound, feature) - if v.is_a?(Hash) - k = v.keys.grep(/classification/).first - unless k.empty? - #if v.has_key?(:classification) - return v[k] - else - return "no classification key" - end - elsif v.is_a?(Array) - raise "predicted class value is an array\n"+ - "value "+v.to_s+"\n"+ - "value-class "+v.class.to_s+"\n"+ - "dataset "+self.uri.to_s+"\n"+ - "compound "+compound.to_s+"\n"+ - "feature "+feature.to_s+"\n" - else - return v - end - end - - # returns regression value - def get_predicted_regression(compound, feature) - v = get_value(compound, feature) - if v.is_a?(Hash) - k = v.keys.grep(/regression/).first - unless k.empty? - return v[k] - else - return "no regression key" - end - elsif v.is_a?(Array) - raise "predicted regression value is an array\n"+ - "value "+v.to_s+"\n"+ - "value-class "+v.class.to_s+"\n"+ - "dataset "+self.uri.to_s+"\n"+ - "compound "+compound.to_s+"\n"+ - "feature "+feature.to_s+"\n" - else - return v - end - end - - # returns prediction confidence if available - def get_prediction_confidence(compound, feature) - v = get_value(compound, feature) - if v.is_a?(Hash) - k = v.keys.grep(/confidence/).first - unless k.empty? - #if v.has_key?(:confidence) - return v[k].abs - #return v["http://ot-dev.in-silico.ch/model/lazar#confidence"].abs - else - # PENDING: return nil isntead of raising an exception - raise "no confidence key" - end - else - LOGGER.warn "no confidence for compound: "+compound.to_s+", feature: "+feature.to_s - return 1 - end - end - - # return compound-feature value - def get_value(compound, feature) - if (defined? @dirty_features) && @dirty_features.include?(feature) - load_feature_values(feature) - end - - v = @data[compound] - return nil if v == nil # missing values for all features - if v.is_a?(Array) - # PENDING: why using an array here? - v.each do |e| - if e.is_a?(Hash) - if e.has_key?(feature) - return e[feature] - end - else - raise "invalid internal value type" - end - end - return nil #missing value - else - raise "value is not an array\n"+ - "value "+v.to_s+"\n"+ - "value-class "+v.class.to_s+"\n"+ - "dataset "+self.uri.to_s+"\n"+ - "compound "+compound.to_s+"\n"+ - "feature "+feature.to_s+"\n" - end - end - - # loads specified feature and removes dirty-flag, loads all features if feature is nil - def load_feature_values(feature=nil) - if feature - raise "feature already loaded" unless @dirty_features.include?(feature) - @owl.load_dataset_feature_values(@compounds, @data, [feature]) - @dirty_features.delete(feature) + private + # Copy a dataset (rewrites URI) + def copy(dataset) + @metadata = dataset.metadata + @data_entries = dataset.data_entries + @compounds = dataset.compounds + @features = dataset.features + if @uri + self.uri = @uri else - @data = {} unless @data - @owl.load_dataset_feature_values(@compounds, @data, @dirty_features) - @dirty_features.clear + @uri = dataset.metadata[XSD.anyURI] end end - - # overwrite to yaml: - # in case dataset is loaded from owl: - # * load all values - def to_yaml - # loads all features - if ((defined? @dirty_features) && @dirty_features.size > 0) - load_feature_values - end - super - end - - # * remove @owl from yaml, not necessary - def to_yaml_properties - super - ["@owl"] - end - end end -=end -- cgit v1.2.3 From 91c95f8dc8f60a8f0029b970ef881eecee28401b Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 11 Nov 2010 10:42:48 +0100 Subject: Documentation and API fixes for serializer and parser --- lib/dataset.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index 05b2ed3..6e270e9 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -77,7 +77,7 @@ module OpenTox parser.load_csv(csv) end - # Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help)) + # Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help) # - loads data_entries, compounds, features # - sets metadata (warnings) for parser errors # - you will have to set remaining metadata manually @@ -87,13 +87,13 @@ module OpenTox save unless @uri # get a uri for creating features parser = Parser::Spreadsheets.new parser.dataset = self - parser.load_excel(book) + parser.load_spreadsheet(book) end # Load and return only metadata of a Dataset object # @return [Hash] Metadata of the dataset def load_metadata - add_metadata Parser::Owl::Dataset.new(@uri).metadata + add_metadata Parser::Owl::Dataset.new(@uri).load_metadata self.uri = @uri if @uri # keep uri @metadata end @@ -147,8 +147,8 @@ module OpenTox # Get Excel representation # @return [Spreadsheet::Workbook] Workbook which can be written with the spreadsheet gem (data_entries only, metadata will will be discarded)) - def to_xls - Serializer::Spreadsheets.new(self).to_xls + def to_spreadsheet + Serializer::Spreadsheets.new(self).to_spreadsheet end # Get CSV string representation (data_entries only, metadata will be discarded) -- cgit v1.2.3 From f8552611c2dbe25d76474f51e4e895bf9c2b5c5e Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 19 Nov 2010 16:53:21 +0100 Subject: lazar predictions for toxcreate working --- lib/dataset.rb | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 6 deletions(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index 6e270e9..4737ea1 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -43,7 +43,7 @@ module OpenTox # Get all datasets from a service # @param [optional,String] uri URI of the dataset service, defaults to service specified in configuration - # @return [Array] Array of dataset object with all data + # @return [Array] Array of dataset object without data (use one of the load_* methods to pull data from the server) def self.all(uri=CONFIG[:services]["opentox-dataset"]) RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.each_line.collect{|u| Dataset.new(u)} end @@ -55,6 +55,10 @@ module OpenTox copy YAML.load(yaml) end + def load_rdfxml(rdfxml) + load_rdfxml_file Tempfile.open("ot-rdfxml"){|f| f.write(rdfxml)}.path + end + # Load RDF/XML representation from a file # @param [String] file File with RDF/XML representation of the dataset # @return [OpenTox::Dataset] Dataset object with RDF/XML data @@ -129,8 +133,6 @@ module OpenTox # @return [String] `classification", "regression", "mixed" or unknown` def feature_type feature_types = @features.collect{|f,metadata| metadata[OT.isA]}.uniq - LOGGER.debug "FEATURES" - LOGGER.debug feature_types.inspect if feature_types.size > 1 "mixed" else @@ -145,12 +147,18 @@ module OpenTox end end - # Get Excel representation + # Get Spreadsheet representation # @return [Spreadsheet::Workbook] Workbook which can be written with the spreadsheet gem (data_entries only, metadata will will be discarded)) def to_spreadsheet Serializer::Spreadsheets.new(self).to_spreadsheet end + # Get Excel representation (alias for to_spreadsheet) + # @return [Spreadsheet::Workbook] Workbook which can be written with the spreadsheet gem (data_entries only, metadata will will be discarded)) + def to_xls + to_spreadsheet + end + # Get CSV string representation (data_entries only, metadata will be discarded) # @return [String] CSV representation def to_csv @@ -180,6 +188,10 @@ module OpenTox @features[feature][DC.title] end + def title + @metadata[DC.title] + end + # Insert a statement (compound_uri,feature_uri,value) # @example Insert a statement (compound_uri,feature_uri,value) # dataset.add "http://webservices.in-silico.ch/compound/InChI=1S/C6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9", "http://webservices.in-silico.ch/dataset/1/feature/hamster_carcinogenicity", true @@ -224,11 +236,18 @@ module OpenTox # TODO: rewrite feature URI's ?? @compounds.uniq! if @uri - RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml) + if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) + RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml) + else + File.open("ot-post-file.rdf","w+") { |f| f.write(self.to_rdfxml); @path = f.path } + task_uri = RestClient.post(@uri, {:file => File.new(@path)},{:accept => "text/uri-list"}).to_s.chomp + #task_uri = `curl -X POST -H "Accept:text/uri-list" -F "file=@#{@path};type=application/rdf+xml" http://apps.ideaconsult.net:8080/ambit2/dataset` + Task.find(task_uri).wait_for_completion + self.uri = RestClientWrapper.get(task_uri,:accept => 'text/uri-list') + end else # create dataset if uri is empty self.uri = RestClientWrapper.post(CONFIG[:services]["opentox-dataset"],{}).to_s.chomp - RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml) end @uri end @@ -252,4 +271,45 @@ module OpenTox end end end + + # Class with special methods for lazar prediction datasets + class LazarPrediction < Dataset + + # Find a prediction dataset and load all data. + # @param [String] uri Prediction dataset URI + # @return [OpenTox::Dataset] Prediction dataset object with all data + def self.find(uri) + prediction = LazarPrediction.new(uri) + prediction.load_all + prediction + end + + def value(compound) + @data_entries[compound.uri].collect{|f,v| v.first if f.match(/prediction/)}.compact.first + end + + def confidence(compound) + feature_uri = @data_entries[compound.uri].collect{|f,v| f if f.match(/prediction/)}.compact.first + @features[feature_uri][OT.confidence] + end + + def descriptors(compound) + @data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/descriptor/)}.compact if @data_entries[compound.uri] + end + + def measured_activities(compound) + source = @metadata[OT.hasSource] + @data_entries[compound.uri].collect{|f,v| v if f.match(/#{source}/)}.compact + end + + def neighbors(compound) + @data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact + end + +# def errors(compound) +# features = @data_entries[compound.uri].keys +# features.collect{|f| @features[f][OT.error]}.join(" ") if features +# end + + end end -- cgit v1.2.3 From 7067bd44d5c97618ec6a968bbdfe6d6bda12a1cd Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Wed, 24 Nov 2010 13:13:40 +0100 Subject: opentox-ruby-api-wrapper renamed to opentox-ruby --- lib/dataset.rb | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index 4737ea1..c5704ae 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -32,6 +32,21 @@ module OpenTox dataset end + # Create dataset from CSV file (format specification: http://toxcreate.org/help) + # - loads data_entries, compounds, features + # - sets metadata (warnings) for parser errors + # - you will have to set remaining metadata manually + # @param [String] file CSV file path + # @return [OpenTox::Dataset] Dataset object with CSV data + def self.create_from_csv_file(file) + dataset = Dataset.create + parser = Parser::Spreadsheets.new + parser.dataset = dataset + parser.load_csv(File.open(file).read) + dataset.save + dataset + end + # Find a dataset and load all data. This can be time consuming, use Dataset.new together with one of the load_* methods for a fine grained control over data loading. # @param [String] uri Dataset URI # @return [OpenTox::Dataset] Dataset object with all data @@ -299,7 +314,7 @@ module OpenTox def measured_activities(compound) source = @metadata[OT.hasSource] - @data_entries[compound.uri].collect{|f,v| v if f.match(/#{source}/)}.compact + @data_entries[compound.uri].collect{|f,v| v if f.match(/#{source}/)}.compact.flatten end def neighbors(compound) -- cgit v1.2.3 From c4504c72ffb2920de65399a2dc0a2c29fe04a52d Mon Sep 17 00:00:00 2001 From: mr Date: Thu, 9 Dec 2010 10:46:13 +0100 Subject: A&A implementation --- lib/dataset.rb | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index c5704ae..bbd8b8b 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -6,6 +6,7 @@ module OpenTox include OpenTox attr_reader :features, :compounds, :data_entries, :metadata + attr_accessor :token_id # Create dataset with optional URI. Does not load data into the dataset - you will need to execute one of the load_* methods to pull data from a service or to insert it from other representations. # @example Create an empty dataset @@ -14,8 +15,9 @@ module OpenTox # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1") # @param [optional, String] uri Dataset URI # @return [OpenTox::Dataset] Dataset object - def initialize(uri=nil) + def initialize(uri=nil,token_id=nil) super uri + @token_id = token_id @features = {} @compounds = [] @data_entries = {} @@ -26,8 +28,9 @@ module OpenTox # dataset = OpenTox::Dataset.create # @param [optional, String] uri Dataset URI # @return [OpenTox::Dataset] Dataset object - def self.create(uri=CONFIG[:services]["opentox-dataset"]) + def self.create(uri=CONFIG[:services]["opentox-dataset"], token_id=nil) dataset = Dataset.new + dataset.token_id = token_id if token_id dataset.save dataset end @@ -252,7 +255,7 @@ module OpenTox @compounds.uniq! if @uri if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) - RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml) + RestClientWrapper.post(@uri,{:content_type => "application/x-yaml", :token_id => @token_id},self.to_yaml) else File.open("ot-post-file.rdf","w+") { |f| f.write(self.to_rdfxml); @path = f.path } task_uri = RestClient.post(@uri, {:file => File.new(@path)},{:accept => "text/uri-list"}).to_s.chomp @@ -262,7 +265,7 @@ module OpenTox end else # create dataset if uri is empty - self.uri = RestClientWrapper.post(CONFIG[:services]["opentox-dataset"],{}).to_s.chomp + self.uri = RestClientWrapper.post(CONFIG[:services]["opentox-dataset"],{:token_id => @token_id}).to_s.chomp end @uri end @@ -279,6 +282,7 @@ module OpenTox @data_entries = dataset.data_entries @compounds = dataset.compounds @features = dataset.features + @token_id = dataset.token_id if @uri self.uri = @uri else -- cgit v1.2.3 From 4c2470353a1e3b69b4260d0052c9c48137ef76d3 Mon Sep 17 00:00:00 2001 From: mr Date: Tue, 14 Dec 2010 12:30:20 +0100 Subject: remove token_id from tables in database --- lib/dataset.rb | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index bbd8b8b..b7feeec 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -6,7 +6,6 @@ module OpenTox include OpenTox attr_reader :features, :compounds, :data_entries, :metadata - attr_accessor :token_id # Create dataset with optional URI. Does not load data into the dataset - you will need to execute one of the load_* methods to pull data from a service or to insert it from other representations. # @example Create an empty dataset @@ -15,9 +14,8 @@ module OpenTox # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1") # @param [optional, String] uri Dataset URI # @return [OpenTox::Dataset] Dataset object - def initialize(uri=nil,token_id=nil) + def initialize(uri=nil) super uri - @token_id = token_id @features = {} @compounds = [] @data_entries = {} @@ -30,8 +28,7 @@ module OpenTox # @return [OpenTox::Dataset] Dataset object def self.create(uri=CONFIG[:services]["opentox-dataset"], token_id=nil) dataset = Dataset.new - dataset.token_id = token_id if token_id - dataset.save + dataset.save(token_id) dataset end @@ -250,22 +247,22 @@ module OpenTox # - creates a new dataset if uri is not set # - overwrites dataset if uri exists # @return [String] Dataset URI - def save + def save(token_id=nil) # TODO: rewrite feature URI's ?? @compounds.uniq! if @uri if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) - RestClientWrapper.post(@uri,{:content_type => "application/x-yaml", :token_id => @token_id},self.to_yaml) + RestClientWrapper.post(@uri,{:content_type => "application/x-yaml", :token_id => token_id},self.to_yaml) else File.open("ot-post-file.rdf","w+") { |f| f.write(self.to_rdfxml); @path = f.path } - task_uri = RestClient.post(@uri, {:file => File.new(@path)},{:accept => "text/uri-list"}).to_s.chomp + task_uri = RestClient.post(@uri, {:file => File.new(@path)},{:accept => "text/uri-list" , :token_id => token_id}).to_s.chomp #task_uri = `curl -X POST -H "Accept:text/uri-list" -F "file=@#{@path};type=application/rdf+xml" http://apps.ideaconsult.net:8080/ambit2/dataset` Task.find(task_uri).wait_for_completion self.uri = RestClientWrapper.get(task_uri,:accept => 'text/uri-list') end else # create dataset if uri is empty - self.uri = RestClientWrapper.post(CONFIG[:services]["opentox-dataset"],{:token_id => @token_id}).to_s.chomp + self.uri = RestClientWrapper.post(CONFIG[:services]["opentox-dataset"],{:token_id => token_id}).to_s.chomp end @uri end @@ -282,7 +279,6 @@ module OpenTox @data_entries = dataset.data_entries @compounds = dataset.compounds @features = dataset.features - @token_id = dataset.token_id if @uri self.uri = @uri else -- cgit v1.2.3 From 2fb2f4cd34f499f8c9def5e4091cb5998794c595 Mon Sep 17 00:00:00 2001 From: mr Date: Tue, 14 Dec 2010 16:39:01 +0100 Subject: rename token_id to subjectid --- lib/dataset.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index b7feeec..7c70c9d 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -26,9 +26,9 @@ module OpenTox # dataset = OpenTox::Dataset.create # @param [optional, String] uri Dataset URI # @return [OpenTox::Dataset] Dataset object - def self.create(uri=CONFIG[:services]["opentox-dataset"], token_id=nil) + def self.create(uri=CONFIG[:services]["opentox-dataset"], subjectid=nil) dataset = Dataset.new - dataset.save(token_id) + dataset.save(subjectid) dataset end @@ -247,22 +247,22 @@ module OpenTox # - creates a new dataset if uri is not set # - overwrites dataset if uri exists # @return [String] Dataset URI - def save(token_id=nil) + def save(subjectid=nil) # TODO: rewrite feature URI's ?? @compounds.uniq! if @uri if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) - RestClientWrapper.post(@uri,{:content_type => "application/x-yaml", :token_id => token_id},self.to_yaml) + RestClientWrapper.post(@uri,{:content_type => "application/x-yaml", :subjectid => subjectid},self.to_yaml) else File.open("ot-post-file.rdf","w+") { |f| f.write(self.to_rdfxml); @path = f.path } - task_uri = RestClient.post(@uri, {:file => File.new(@path)},{:accept => "text/uri-list" , :token_id => token_id}).to_s.chomp + task_uri = RestClient.post(@uri, {:file => File.new(@path)},{:accept => "text/uri-list" , :subjectid => subjectid}).to_s.chomp #task_uri = `curl -X POST -H "Accept:text/uri-list" -F "file=@#{@path};type=application/rdf+xml" http://apps.ideaconsult.net:8080/ambit2/dataset` Task.find(task_uri).wait_for_completion self.uri = RestClientWrapper.get(task_uri,:accept => 'text/uri-list') end else # create dataset if uri is empty - self.uri = RestClientWrapper.post(CONFIG[:services]["opentox-dataset"],{:token_id => token_id}).to_s.chomp + self.uri = RestClientWrapper.post(CONFIG[:services]["opentox-dataset"],{:subjectid => subjectid}).to_s.chomp end @uri end -- cgit v1.2.3 From b22110ae0d8e902d700e0a3dc629ebfde1edfe10 Mon Sep 17 00:00:00 2001 From: mr Date: Thu, 16 Dec 2010 10:59:46 +0100 Subject: A&A --- lib/dataset.rb | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index 7c70c9d..aba7754 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -38,12 +38,12 @@ module OpenTox # - you will have to set remaining metadata manually # @param [String] file CSV file path # @return [OpenTox::Dataset] Dataset object with CSV data - def self.create_from_csv_file(file) - dataset = Dataset.create + def self.create_from_csv_file(file, subjectid=nil) + dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid) parser = Parser::Spreadsheets.new parser.dataset = dataset parser.load_csv(File.open(file).read) - dataset.save + dataset.save(subjectid) dataset end @@ -89,8 +89,8 @@ module OpenTox # - you will have to set remaining metadata manually # @param [String] csv CSV representation of the dataset # @return [OpenTox::Dataset] Dataset object with CSV data - def load_csv(csv) - save unless @uri # get a uri for creating features + def load_csv(csv, subjectid=nil) + save(subjectid) unless @uri # get a uri for creating features parser = Parser::Spreadsheets.new parser.dataset = self parser.load_csv(csv) @@ -102,8 +102,8 @@ module OpenTox # - you will have to set remaining metadata manually # @param [Excel] book Excel workbook object (created with roo gem) # @return [OpenTox::Dataset] Dataset object with Excel data - def load_spreadsheet(book) - save unless @uri # get a uri for creating features + def load_spreadsheet(book, subjectid=nil) + save(subjectid) unless @uri # get a uri for creating features parser = Parser::Spreadsheets.new parser.dataset = self parser.load_spreadsheet(book) @@ -268,8 +268,8 @@ module OpenTox end # Delete dataset at the dataset service - def delete - RestClientWrapper.delete @uri + def delete(subjectid=nil) + RestClientWrapper.delete(@uri, :subjectid => subjectid) end private -- cgit v1.2.3 From 4c089275d34ba42014e1add97a41ccf351790260 Mon Sep 17 00:00:00 2001 From: mr Date: Wed, 5 Jan 2011 10:30:54 +0100 Subject: Authorization for GET requests --- lib/dataset.rb | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index aba7754..52b41a7 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -14,7 +14,7 @@ module OpenTox # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1") # @param [optional, String] uri Dataset URI # @return [OpenTox::Dataset] Dataset object - def initialize(uri=nil) + def initialize(uri=nil,subjectid=nil) super uri @features = {} @compounds = [] @@ -27,7 +27,7 @@ module OpenTox # @param [optional, String] uri Dataset URI # @return [OpenTox::Dataset] Dataset object def self.create(uri=CONFIG[:services]["opentox-dataset"], subjectid=nil) - dataset = Dataset.new + dataset = Dataset.new(nil,subjectid) dataset.save(subjectid) dataset end @@ -50,17 +50,17 @@ module OpenTox # Find a dataset and load all data. This can be time consuming, use Dataset.new together with one of the load_* methods for a fine grained control over data loading. # @param [String] uri Dataset URI # @return [OpenTox::Dataset] Dataset object with all data - def self.find(uri) - dataset = Dataset.new(uri) - dataset.load_all + def self.find(uri, subjectid=nil) + dataset = Dataset.new(uri, subjectid) + dataset.load_all(subjectid) dataset end # Get all datasets from a service # @param [optional,String] uri URI of the dataset service, defaults to service specified in configuration # @return [Array] Array of dataset object without data (use one of the load_* methods to pull data from the server) - def self.all(uri=CONFIG[:services]["opentox-dataset"]) - RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.each_line.collect{|u| Dataset.new(u)} + def self.all(uri=CONFIG[:services]["opentox-dataset"], subjectid=nil) + RestClientWrapper.get(uri,{:accept => "text/uri-list",:subjectid => subjectid}).to_s.each_line.collect{|u| Dataset.new(u)} end # Load YAML representation into the dataset @@ -118,9 +118,9 @@ module OpenTox end # Load all data (metadata, data_entries, compounds and features) from URI - def load_all + def load_all(subjectid=nil) if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) - copy YAML.load(RestClientWrapper.get(@uri, :accept => "application/x-yaml")) + copy YAML.load(RestClientWrapper.get(@uri, {:accept => "application/x-yaml", :subjectid => subjectid})) else parser = Parser::Owl::Dataset.new(@uri) copy parser.load_uri @@ -129,8 +129,8 @@ module OpenTox # Load and return only compound URIs from the dataset service # @return [Array] Compound URIs in the dataset - def load_compounds - RestClientWrapper.get(File.join(uri,"compounds"),:accept=> "text/uri-list").to_s.each_line do |compound_uri| + def load_compounds(subjectid=nil) + RestClientWrapper.get(File.join(uri,"compounds"),{:accept=> "text/uri-list", :subjectid => subjectid}).to_s.each_line do |compound_uri| @compounds << compound_uri.chomp end @compounds.uniq! @@ -258,7 +258,7 @@ module OpenTox task_uri = RestClient.post(@uri, {:file => File.new(@path)},{:accept => "text/uri-list" , :subjectid => subjectid}).to_s.chomp #task_uri = `curl -X POST -H "Accept:text/uri-list" -F "file=@#{@path};type=application/rdf+xml" http://apps.ideaconsult.net:8080/ambit2/dataset` Task.find(task_uri).wait_for_completion - self.uri = RestClientWrapper.get(task_uri,:accept => 'text/uri-list') + self.uri = RestClientWrapper.get(task_uri,{:accept => 'text/uri-list', :subjectid => subjectid}) end else # create dataset if uri is empty @@ -293,9 +293,9 @@ module OpenTox # Find a prediction dataset and load all data. # @param [String] uri Prediction dataset URI # @return [OpenTox::Dataset] Prediction dataset object with all data - def self.find(uri) - prediction = LazarPrediction.new(uri) - prediction.load_all + def self.find(uri, subjectid=nil) + prediction = LazarPrediction.new(uri, subjectid) + prediction.load_all(subjectid) prediction end -- cgit v1.2.3 From 2aafed7543287c420a5aa2e751b8c74ad771d14c Mon Sep 17 00:00:00 2001 From: mr Date: Thu, 13 Jan 2011 12:01:19 +0100 Subject: A&A for GET requests --- lib/dataset.rb | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index 52b41a7..a85c2b5 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -60,7 +60,7 @@ module OpenTox # @param [optional,String] uri URI of the dataset service, defaults to service specified in configuration # @return [Array] Array of dataset object without data (use one of the load_* methods to pull data from the server) def self.all(uri=CONFIG[:services]["opentox-dataset"], subjectid=nil) - RestClientWrapper.get(uri,{:accept => "text/uri-list",:subjectid => subjectid}).to_s.each_line.collect{|u| Dataset.new(u)} + RestClientWrapper.get(uri,{:accept => "text/uri-list",:subjectid => subjectid}).to_s.each_line.collect{|u| Dataset.new(u, subjectid)} end # Load YAML representation into the dataset @@ -77,10 +77,10 @@ module OpenTox # Load RDF/XML representation from a file # @param [String] file File with RDF/XML representation of the dataset # @return [OpenTox::Dataset] Dataset object with RDF/XML data - def load_rdfxml_file(file) - parser = Parser::Owl::Dataset.new @uri + def load_rdfxml_file(file, subjectid=nil) + parser = Parser::Owl::Dataset.new @uri, subjectid parser.uri = file.path - copy parser.load_uri + copy parser.load_uri(subjectid) end # Load CSV string (format specification: http://toxcreate.org/help) @@ -111,8 +111,8 @@ module OpenTox # Load and return only metadata of a Dataset object # @return [Hash] Metadata of the dataset - def load_metadata - add_metadata Parser::Owl::Dataset.new(@uri).load_metadata + def load_metadata(subjectid=nil) + add_metadata Parser::Owl::Dataset.new(@uri, subjectid).load_metadata(subjectid) self.uri = @uri if @uri # keep uri @metadata end @@ -122,8 +122,8 @@ module OpenTox if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) copy YAML.load(RestClientWrapper.get(@uri, {:accept => "application/x-yaml", :subjectid => subjectid})) else - parser = Parser::Owl::Dataset.new(@uri) - copy parser.load_uri + parser = Parser::Owl::Dataset.new(@uri, subjectid) + copy parser.load_uri(subjectid) end end @@ -138,9 +138,9 @@ module OpenTox # Load and return only features from the dataset service # @return [Hash] Features of the dataset - def load_features - parser = Parser::Owl::Dataset.new(@uri) - @features = parser.load_features + def load_features(subjectid=nil) + parser = Parser::Owl::Dataset.new(@uri, subjectid) + @features = parser.load_features(subjectid) @features end -- cgit v1.2.3 From f2ca545448ab8a6f654309f23cfce9416b2e9856 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Thu, 13 Jan 2011 14:02:58 +0100 Subject: find methods for algorithm and model, split method for dataset, feature_type method for model and feature, perform single predicitons in resuce block, add to-html.rb, fix handling of rest-client-wrapper --- lib/dataset.rb | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index aba7754..d45c821 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -46,7 +46,7 @@ module OpenTox dataset.save(subjectid) dataset end - + # Find a dataset and load all data. This can be time consuming, use Dataset.new together with one of the load_* methods for a fine grained control over data loading. # @param [String] uri Dataset URI # @return [OpenTox::Dataset] Dataset object with all data @@ -242,6 +242,37 @@ module OpenTox def add_feature_metadata(feature,metadata) metadata.each { |k,v| @features[feature][k] = v } end + + # Add a new compound + # @param [String] compound Compound URI + def add_compound (compound) + @compounds << compound unless @compounds.include? compound + end + + # Creates a new dataset, by splitting the current dataset, i.e. using only a subset of compounds and features + # @param [Array] compounds List of compound URIs + # @param [Array] features List of feature URIs + # @param [Hash] metadata Hash containing the metadata for the new dataset + # @return [OpenTox::Dataset] newly created dataset, already saved + def split( compounds, features, metadata) + LOGGER.debug "split dataset using "+compounds.size.to_s+"/"+@compounds.size.to_s+" compounds" + raise "no new compounds selected" unless compounds and compounds.size>0 + dataset = OpenTox::Dataset.create + if features.size==0 + compounds.each{ |c| dataset.add_compound(c) } + else + compounds.each do |c| + features.each do |f| + @data_entries[c][f].each do |v| + dataset.add(c,f,v) + end + end + end + end + dataset.add_metadata(metadata) + dataset.save + dataset + end # Save dataset at the dataset service # - creates a new dataset if uri is not set -- cgit v1.2.3 From 23d96df630689d122c023d76ec1d40d7688d2c96 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Wed, 19 Jan 2011 15:59:12 +0100 Subject: extend authorization and rdf serialization for validation --- lib/dataset.rb | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index d45c821..ae86f5f 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -253,11 +253,12 @@ module OpenTox # @param [Array] compounds List of compound URIs # @param [Array] features List of feature URIs # @param [Hash] metadata Hash containing the metadata for the new dataset + # @param [String] subjectid # @return [OpenTox::Dataset] newly created dataset, already saved - def split( compounds, features, metadata) + def split( compounds, features, metadata, subjectid=nil) LOGGER.debug "split dataset using "+compounds.size.to_s+"/"+@compounds.size.to_s+" compounds" raise "no new compounds selected" unless compounds and compounds.size>0 - dataset = OpenTox::Dataset.create + dataset = OpenTox::Dataset.create(CONFIG[:services]["opentox-dataset"],subjectid) if features.size==0 compounds.each{ |c| dataset.add_compound(c) } else @@ -270,7 +271,7 @@ module OpenTox end end dataset.add_metadata(metadata) - dataset.save + dataset.save(subjectid) dataset end -- cgit v1.2.3 From e1a067953dd9139b01aaebe42ff158a944240540 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Fri, 28 Jan 2011 12:20:08 +0100 Subject: extend whitelisting, get feature_type from algorithm --- lib/dataset.rb | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index 640e3da..9c20968 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -51,6 +51,7 @@ module OpenTox # @param [String] uri Dataset URI # @return [OpenTox::Dataset] Dataset object with all data def self.find(uri, subjectid=nil) + return nil unless uri dataset = Dataset.new(uri, subjectid) dataset.load_all(subjectid) dataset -- cgit v1.2.3 From 70aee6e9dfece2760fc6d616e7151f41cc7625bf Mon Sep 17 00:00:00 2001 From: mguetlein Date: Wed, 2 Feb 2011 17:11:24 +0100 Subject: resclient wrapper: headers <-> payload, error report from rdf --- lib/dataset.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index 9c20968..a4716dc 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -285,7 +285,7 @@ module OpenTox @compounds.uniq! if @uri if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) - RestClientWrapper.post(@uri,{:content_type => "application/x-yaml", :subjectid => subjectid},self.to_yaml) + RestClientWrapper.post(@uri,self.to_yaml,{:content_type => "application/x-yaml", :subjectid => subjectid}) else File.open("ot-post-file.rdf","w+") { |f| f.write(self.to_rdfxml); @path = f.path } task_uri = RestClient.post(@uri, {:file => File.new(@path)},{:accept => "text/uri-list" , :subjectid => subjectid}).to_s.chomp -- cgit v1.2.3 From 9848197e9e0830c569ae2addcd404c59c0a53180 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 7 Feb 2011 10:07:34 +0100 Subject: add Dataset.exist as find loads all data --- lib/dataset.rb | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index a4716dc..02b89cb 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -56,6 +56,19 @@ module OpenTox dataset.load_all(subjectid) dataset end + + # replaces find as exist check, takes not as long, does NOT raise an un-authorized exception + # @param [String] uri Dataset URI + # @return [Boolean] true if dataset exists and user has get rights, false else + def self.exist?(uri, subjectid=nil) + return false unless uri + dataset = Dataset.new(uri, subjectid) + begin + dataset.load_metadata( subjectid ).size > 0 + rescue + false + end + end # Get all datasets from a service # @param [optional,String] uri URI of the dataset service, defaults to service specified in configuration -- cgit v1.2.3 From 3c7e4de0e3f4c9bbf8df55c88f155b40f575b3ab Mon Sep 17 00:00:00 2001 From: mguetlein Date: Fri, 11 Feb 2011 10:51:01 +0100 Subject: fix read from rdf file --- lib/dataset.rb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index 02b89cb..a843cea 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -85,7 +85,12 @@ module OpenTox end def load_rdfxml(rdfxml) - load_rdfxml_file Tempfile.open("ot-rdfxml"){|f| f.write(rdfxml)}.path + raise "rdfxml data is empty" if rdfxml.to_s.size==0 + file = Tempfile.new("ot-rdfxml") + file.puts rdfxml + file.close + load_rdfxml_file file + file.delete end # Load RDF/XML representation from a file -- cgit v1.2.3 From 8921d20b9d399274b0674794301ff3567ac7c816 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 14 Feb 2011 18:01:42 +0100 Subject: handle nil values in split --- lib/dataset.rb | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index a843cea..a0f99b1 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -237,7 +237,7 @@ module OpenTox @features[feature] = {} unless @features[feature] @data_entries[compound] = {} unless @data_entries[compound] @data_entries[compound][feature] = [] unless @data_entries[compound][feature] - @data_entries[compound][feature] << value + @data_entries[compound][feature] << value unless value end # Add/modify metadata, existing entries will be overwritten @@ -283,8 +283,12 @@ module OpenTox else compounds.each do |c| features.each do |f| - @data_entries[c][f].each do |v| - dataset.add(c,f,v) + unless @data_entries[c][f] + dataset.add(c,f,nil) + else + @data_entries[c][f].each do |v| + dataset.add(c,f,v) + end end end end -- cgit v1.2.3 From 267b691017202c2fccf69dbeecfd4ed524a73fc2 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 14 Feb 2011 18:28:12 +0100 Subject: fix: handle nil values in split --- lib/dataset.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index a0f99b1..efab0a3 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -237,7 +237,7 @@ module OpenTox @features[feature] = {} unless @features[feature] @data_entries[compound] = {} unless @data_entries[compound] @data_entries[compound][feature] = [] unless @data_entries[compound][feature] - @data_entries[compound][feature] << value unless value + @data_entries[compound][feature] << value if value end # Add/modify metadata, existing entries will be overwritten -- cgit v1.2.3 From 80d49f60ac55cc2fb1c7974752e1e947fa3f3f70 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 21 Feb 2011 13:52:00 +0100 Subject: clean uri now works for https, rdf parsing issues: /features and ?feature_uris, missing subjectid --- lib/dataset.rb | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index efab0a3..3f530e6 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -141,8 +141,10 @@ module OpenTox if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) copy YAML.load(RestClientWrapper.get(@uri, {:accept => "application/x-yaml", :subjectid => subjectid})) else + puts "loading all.." parser = Parser::Owl::Dataset.new(@uri, subjectid) copy parser.load_uri(subjectid) + puts "..done" end end -- cgit v1.2.3 From 53dec3e3b1a59760ac9440749d159edc7ac09359 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 21 Feb 2011 14:10:44 +0100 Subject: removing debug msg --- lib/dataset.rb | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index 3f530e6..efab0a3 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -141,10 +141,8 @@ module OpenTox if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) copy YAML.load(RestClientWrapper.get(@uri, {:accept => "application/x-yaml", :subjectid => subjectid})) else - puts "loading all.." parser = Parser::Owl::Dataset.new(@uri, subjectid) copy parser.load_uri(subjectid) - puts "..done" end end -- cgit v1.2.3 From 9bc9d1c5c11aa64d410200cc21d07acc39cc3019 Mon Sep 17 00:00:00 2001 From: mguetlein Date: Mon, 21 Feb 2011 15:44:24 +0100 Subject: fix for Datset#add false values, fix for parsing compounds without values --- lib/dataset.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/dataset.rb') diff --git a/lib/dataset.rb b/lib/dataset.rb index efab0a3..2c47502 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -237,7 +237,7 @@ module OpenTox @features[feature] = {} unless @features[feature] @data_entries[compound] = {} unless @data_entries[compound] @data_entries[compound][feature] = [] unless @data_entries[compound][feature] - @data_entries[compound][feature] << value if value + @data_entries[compound][feature] << value if value!=nil end # Add/modify metadata, existing entries will be overwritten -- cgit v1.2.3