From d6811507c1c1339cc4fe7cdb429b9b34b97dc422 Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 22 Oct 2010 17:45:19 +0200 Subject: new API with support for external services (initial version) --- Rakefile | 2 + lib/algorithm.rb | 41 ++- lib/compound.rb | 184 ++++++++----- lib/dataset.rb | 273 ++++++++++++++++-- lib/environment.rb | 20 +- lib/feature.rb | 7 + lib/features.rb | 19 -- lib/model.rb | 16 +- lib/opentox-ruby-api-wrapper.rb | 4 +- lib/opentox.rb | 79 ++++++ lib/owl-serializer.rb | 65 ----- lib/owl.rb | 593 ---------------------------------------- lib/parser.rb | 191 +++++++++++++ lib/serializer.rb | 297 ++++++++++++++++++++ lib/task.rb | 8 +- lib/validation.rb | 4 +- 16 files changed, 981 insertions(+), 822 deletions(-) create mode 100644 lib/feature.rb delete mode 100644 lib/features.rb create mode 100644 lib/opentox.rb delete mode 100644 lib/owl-serializer.rb delete mode 100644 lib/owl.rb create mode 100644 lib/parser.rb create mode 100644 lib/serializer.rb diff --git a/Rakefile b/Rakefile index 3846bd1..8dd1088 100644 --- a/Rakefile +++ b/Rakefile @@ -27,6 +27,8 @@ begin "google-spreadsheet-ruby", "tmail", "rinruby", + "rdf", + "rdf-raptor", "rjb" ].each { |dep| gem.add_dependency dep } [ "dm-core", diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 4d9156a..e1d369a 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -1,27 +1,22 @@ - module OpenTox + module Algorithm - - - class Generic - - attr_accessor :uri, :title, :date - - def self.find(uri) - owl = OpenTox::Owl.from_uri(uri, "Algorithm") - return self.new(owl) - end - + + include OtObject + + class Generic + include Algorithm + #include OtObject protected - def initialize(owl) - @title = owl.get("title") - @date = owl.get("date") - @uri = owl.uri - end +# def initialize(owl) +# @title = owl.get("title") +# @date = owl.get("date") +# @uri = owl.uri +# end end - class Fminer + class Fminer < Generic def self.create_feature_dataset(params) LOGGER.debug File.basename(__FILE__) + ": creating feature dataset" @@ -30,7 +25,7 @@ module OpenTox end def self.uri - File.join(@@config[:services]["opentox-algorithm"], "fminer") + File.join(CONFIG[:services]["opentox-algorithm"], "fminer") end end @@ -39,13 +34,13 @@ module OpenTox def self.create_model(params) LOGGER.debug params LOGGER.debug File.basename(__FILE__) + ": creating model" - LOGGER.debug File.join(@@config[:services]["opentox-algorithm"], "lazar") - resource = RestClient::Resource.new(File.join(@@config[:services]["opentox-algorithm"], "lazar"), :content_type => "application/x-yaml") - @uri = resource.post(:dataset_uri => params[:dataset_uri], :prediction_feature => params[:prediction_feature], :feature_generation_uri => File.join(@@config[:services]["opentox-algorithm"], "fminer")).body.chomp + LOGGER.debug File.join(CONFIG[:services]["opentox-algorithm"], "lazar") + resource = RestClient::Resource.new(File.join(CONFIG[:services]["opentox-algorithm"], "lazar"), :content_type => "application/x-yaml") + @uri = resource.post(:dataset_uri => params[:dataset_uri], :prediction_feature => params[:prediction_feature], :feature_generation_uri => File.join(CONFIG[:services]["opentox-algorithm"], "fminer")).body.chomp end def self.uri - File.join(@@config[:services]["opentox-algorithm"], "lazar") + File.join(CONFIG[:services]["opentox-algorithm"], "lazar") end end diff --git a/lib/compound.rb b/lib/compound.rb index 49c166f..699e4c1 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -3,82 +3,120 @@ module OpenTox - class Compound #< OpenTox - - attr_reader :inchi, :uri - - # Initialize with :uri => uri, :smiles => smiles or :name => name (name can be also an InChI/InChiKey, CAS number, etc) - def initialize(params) - if params[:smiles] - @inchi = smiles2inchi(params[:smiles]) - @uri = File.join(@@config[:services]["opentox-compound"],URI.escape(@inchi)) - elsif params[:inchi] - @inchi = params[:inchi] - @uri = File.join(@@config[:services]["opentox-compound"],URI.escape(@inchi)) - elsif params[:sdf] - @inchi = sdf2inchi(params[:sdf]) - @uri = File.join(@@config[:services]["opentox-compound"],URI.escape(@inchi)) - elsif params[:name] - # paranoid URI encoding to keep SMILES charges and brackets - @inchi = RestClient.get("#{@@cactus_uri}#{URI.encode(params[:name], Regexp.new("[^#{URI::PATTERN::UNRESERVED}]"))}/stdinchi").body.chomp - # this was too hard for me to debug and leads to additional errors (ch) - #@inchi = RestClientWrapper.get("#{@@cactus_uri}#{URI.encode(params[:name], Regexp.new("[^#{URI::PATTERN::UNRESERVED}]"))}/stdinchi").chomp - @uri = File.join(@@config[:services]["opentox-compound"],URI.escape(@inchi)) - elsif params[:uri] - @uri = params[:uri] - case params[:uri] - when /ambit/ # Ambit does not deliver InChIs reliably - smiles = RestClientWrapper.get @uri, :accept => 'chemical/x-daylight-smiles' - @inchi = obconversion(smiles,'smi','inchi') - when /InChI/ # shortcut for IST services - @inchi = params[:uri].sub(/^.*InChI/, 'InChI') - else - @inchi = RestClientWrapper.get @uri, :accept => 'chemical/x-inchi' - end - end - end + # Ruby wrapper for OpenTox Compound Webservices (http://opentox.org/dev/apis/api-1.2/structure). + # + # Examples: + # require "opentox-ruby-api-wrapper" + # + # # Creating compounds + # + # # from smiles string + # compound = OpenTox::Compound.from_smiles("c1ccccc1") + # # from name + # compound = OpenTox::Compound.from_name("Benzene") + # # from uri + # compound = OpenTox::Compound.new("http://webservices.in-silico.ch/compound/InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"") + # + # # Getting compound representations + # + # # get InChI + # inchi = compound.inchi + # # get all compound names + # names = compound.names + # # get png image + # image = compound.png + # # get uri + # uri = compound.uri + # + # # SMARTS matching + # + # # match a smarts string + # compound.match?("cN") # returns false + # # match an array of smarts strings + # compound.match(['cc','cN']) # returns ['cc'] + class Compound + + attr_accessor :inchi, :uri + + # Create compound with optional uri + def initialize(uri=nil) + @uri = uri + case @uri + when /InChI/ # shortcut for IST services + @inchi = @uri.sub(/^.*InChI/, 'InChI') + else + @inchi = RestClientWrapper.get(@uri, :accept => 'chemical/x-inchi').to_s.chomp if @uri + end + end - # Get the (canonical) smiles + # Create a compound from smiles string + def self.from_smiles(smiles) + c = Compound.new + c.inchi = Compound.smiles2inchi(smiles) + c.uri = File.join(CONFIG[:services]["opentox-compound"],URI.escape(c.inchi)) + c + end + + # Create a compound from inchi string + def self.from_inchi(inchi) + c = Compound.new + c.inchi = inchi + c.uri = File.join(CONFIG[:services]["opentox-compound"],URI.escape(c.inchi)) + c + end + + # Create a compound from sdf string + def self.from_sdf(sdf) + c = Compound.new + c.inchi = Compound.sdf2inchi(sdf) + c.uri = File.join(CONFIG[:services]["opentox-compound"],URI.escape(c.inchi)) + c + end + + # Create a compound from name (name can be also an InChI/InChiKey, CAS number, etc) + def self.from_name(name) + c = Compound.new + # paranoid URI encoding to keep SMILES charges and brackets + c.inchi = RestClientWrapper.get("#{@@cactus_uri}#{URI.encode(name, Regexp.new("[^#{URI::PATTERN::UNRESERVED}]"))}/stdinchi").to_s.chomp + c.uri = File.join(CONFIG[:services]["opentox-compound"],URI.escape(c.inchi)) + c + end + + # Get (canonical) smiles def smiles - obconversion(@inchi,'inchi','can') + Compound.obconversion(@inchi,'inchi','can') end + # Get sdf def sdf - obconversion(@inchi,'inchi','sdf') + Compound.obconversion(@inchi,'inchi','sdf') end + # Get gif image def gif RestClientWrapper.get("#{@@cactus_uri}#{@inchi}/image") end + # Get png image def png RestClientWrapper.get(File.join @uri, "image") end + # Get URI of compound image + def image_uri + File.join @uri, "image" + end + + # Get all known compound names def names begin - RestClientWrapper.get("#{@@cactus_uri}#{@inchi}/names") + RestClientWrapper.get("#{@@cactus_uri}#{@inchi}/names").split("\n") rescue "not available" end end - def display_smarts_uri(activating, deactivating, highlight = nil) - LOGGER.debug activating.to_yaml unless activating.nil? - activating_smarts = URI.encode "\"#{activating.join("\"/\"")}\"" - deactivating_smarts = URI.encode "\"#{deactivating.join("\"/\"")}\"" - if highlight.nil? - File.join @@config[:services]["opentox-compound"], "smiles", URI.encode(smiles), "smarts/activating", URI.encode(activating_smarts),"deactivating", URI.encode(deactivating_smarts) - else - File.join @@config[:services]["opentox-compound"], "smiles", URI.encode(smiles), "smarts/activating", URI.encode(activating_smarts),"deactivating", URI.encode(deactivating_smarts), "highlight", URI.encode(highlight) - end - end - - def image_uri - File.join @uri, "image" - end - - # Matchs a smarts string + # Match a smarts string def match?(smarts) obconversion = OpenBabel::OBConversion.new obmol = OpenBabel::OBMol.new @@ -89,30 +127,42 @@ module OpenTox smarts_pattern.match(obmol) end - # Match an array of smarts features, returns matching features + # Match an array of smarts strings, returns array with matching smarts def match(smarts_array) smarts_array.collect{|s| s if match?(s)}.compact end - # AM - # Match an array of smarts features, returns (0)1 for (non)matching features at each pos - def match_all(smarts_array) - smarts_array.collect{|s| match?(s) ? 1 : 0 } - end + # Get URI of compound image with highlighted fragments + def matching_smarts_image_uri(activating, deactivating, highlight = nil) + activating_smarts = URI.encode "\"#{activating.join("\"/\"")}\"" + deactivating_smarts = URI.encode "\"#{deactivating.join("\"/\"")}\"" + if highlight.nil? + File.join CONFIG[:services]["opentox-compound"], "smiles", URI.encode(smiles), "smarts/activating", URI.encode(activating_smarts),"deactivating", URI.encode(deactivating_smarts) + else + File.join CONFIG[:services]["opentox-compound"], "smiles", URI.encode(smiles), "smarts/activating", URI.encode(activating_smarts),"deactivating", URI.encode(deactivating_smarts), "highlight", URI.encode(highlight) + end + end + + + private - def sdf2inchi(sdf) - obconversion(sdf,'sdf','inchi') + # Convert sdf to inchi + def self.sdf2inchi(sdf) + Compound.obconversion(sdf,'sdf','inchi') end - def smiles2inchi(smiles) - obconversion(smiles,'smi','inchi') + # Convert smiles to inchi + def self.smiles2inchi(smiles) + Compound.obconversion(smiles,'smi','inchi') end - def smiles2cansmi(smiles) - obconversion(smiles,'smi','can') + # Convert smiles to canonical smiles + def self.smiles2cansmi(smiles) + Compound.obconversion(smiles,'smi','can') end - def obconversion(identifier,input_format,output_format) + # Convert identifier from OpenBabel input_format to OpenBabel output_format + def self.obconversion(identifier,input_format,output_format) obconversion = OpenBabel::OBConversion.new obmol = OpenBabel::OBMol.new obconversion.set_in_and_out_formats input_format, output_format diff --git a/lib/dataset.rb b/lib/dataset.rb index 2eb2206..7c8ce24 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -1,35 +1,253 @@ module OpenTox + # Ruby wrapper for OpenTox Dataset Webservices (http://opentox.org/dev/apis/api-1.2/dataset). + # + # Examples: + # require "opentox-ruby-api-wrapper" + # + # # Creating datasets + # + # # create an empty dataset + # dataset = OpenTox::Dataset.new + # # create an empty dataset with URI + # # this does not load data from the dataset service - use one of the load_* methods + # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1") + # # create new dataset and sav it to obtain a URI + # dataset = OpenTox::Dataset.create + # # create a new dataset from yaml representation + # dataset = OpenTox::Dataset.from_yaml + # # create a new dataset from CSV string + # csv_string = "SMILES, Toxicity\nc1ccccc1N, true" + # dataset = OpenTox::Dataset.from_csv(csv_string) + # + # # Loading data + # # Datasets created with OpenTox::Dataset.new(uri) are empty by default + # # Invoking one of the following functions will load data into the object + # + # # create an empty dataset with URI + # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1") + # # loads (and returns) only metadata + # dataset.load_metadata + # # loads (and returns) only compounds + # dataset.load_compounds + # # loads (and returns) only features + # dataset.load_features + # # load all data from URI + # dataset.load_all + # + # # Getting dataset representations + # + # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1") + # dataset.load_all + # # OWL-DL (RDF/XML) + # dataset.rdfxml + # # OWL-DL (Ntriples) + # dataset.ntriples + # # YAML + # dataset.yaml + # # CSV + # dataset.csv + # + # # Modifying datasets + # + # # insert a statement (compound_uri,feature_uri,value) + # dataset.add "http://webservices.in-silico.ch/compound/InChI=1S/C6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9", "http://webservices.in-silico.ch/dataset/1/feature/hamster_carcinogenicity", true + # + # + # # Saving datasets + # # save dataset at dataset service + # dataset.save + # + # # Deleting datasets + # # delete dataset (also at dataset service) + # dataset.delete class Dataset - attr_accessor :uri, :title, :creator, :data, :features, :compounds + include OtObject - def initialize( owl=nil ) - @data = {} - @features = [] + attr_reader :features, :compounds, :data_entries, :metadata + attr_writer :metadata + + # Create dataset with optional URI + def initialize(uri=nil) + super uri + @features = {} @compounds = [] - + @data_entries = {} + end + + # Create and save an empty dataset (assigns URI to dataset) + def self.create(uri=CONFIG[:services]["opentox-dataset"]) + dataset = Dataset.new + dataset.uri = RestClientWrapper.post(uri,{}).to_s.chomp + dataset + end + + # Get all datasets from a service +# def self.all(uri=CONFIG[:services]["opentox-dataset"]) +# RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.each_line.collect{|u| Dataset.new(u)} +# end + + # Create a dataset from YAML string + def self.from_yaml(yaml) + dataset = Dataset.create + dataset.copy YAML.load(yaml) + dataset + end + + # Create dataset from CSV string (format specification: http://toxcreate.org/help) + # - loads data_entries, compounds, features + # - sets metadata (warnings) for parser errors + # - you will have to set remaining metadata manually + def self.from_csv(csv) + dataset = Dataset.create + Parser::Spreadsheet.new(dataset).load_csv(csv) + dataset + end + + # Create dataset from Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help)) + # - loads data_entries, compounds, features + # - sets metadata (warnings) for parser errors + # - you will have to set remaining metadata manually + def self.from_spreadsheet(book) + dataset = Dataset.create + Parser::Spreadsheet.new(dataset).load_excel(book) + dataset + end + + # Load and return metadata of a Dataset object + def load_metadata + #if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) + #add_metadata YAML.load(RestClientWrapper.get(File.join(@uri,"metadata"), :accept => "application/x-yaml")) + #else + add_metadata Parser::Owl::Dataset.new(@uri).metadata + #end + self.uri = @uri if @uri # keep uri + @metadata + end + + # Load all data (metadata, data_entries, compounds and features) from URI + def load_all + if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) + copy YAML.load(RestClientWrapper.get(@uri, :accept => "application/x-yaml")) + else + parser = Parser::Owl::Dataset.new(@uri) + copy parser.load_uri + end + end + + # Load and return all compound URIs + def load_compounds + RestClientWrapper.get(File.join(uri,"compounds"),:accept=> "text/uri-list").to_s.each_line do |compound_uri| + @compounds << compound_uri.chomp + end + @compounds.uniq! + end + + # Load all feature URIs + def load_features + RestClientWrapper.get(File.join(uri,"features"),:accept=> "text/uri-list").to_s.each_line do |feature_uri| + @features[feature_uri.chomp] = Feature.new(feature_uri.chomp).load_metadata + end + @features + end + + # Get YAML representation + def yaml + self.to_yaml + end + + # Get Excel representation, returns a Spreadsheet::Workbook which can be written with the 'spreadsheet' gem (data_entries only, metadata will ) + def excel + Serializer::Spreadsheets.new(self).excel + end + + # Get CSV string representation (data_entries only, metadata will be discarded) + def csv + Serializer::Spreadsheets.new(self).csv + end + + # Get OWL-DL in ntriples format + def ntriples + s = Serializer::Owl.new + s.add_dataset(self) + s.ntriples + end + + # Get OWL-DL in RDF/XML format + def rdfxml + s = Serializer::Owl.new + s.add_dataset(self) + s.rdfxml + end + + # Insert a statement (compound_uri,feature_uri,value) + def add (compound,feature,value) + @compounds << compound unless @compounds.include? compound + @features[feature] = {} unless @features[feature] + @data_entries[compound] = {} unless @data_entries[compound] + @data_entries[compound][feature] = [] unless @data_entries[compound][feature] + @data_entries[compound][feature] << value + end + + # Add metadata (hash with predicate_uri => value) + def add_metadata(metadata) + metadata.each { |k,v| @metadata[k] = v } + end + + # Copy a dataset (rewrites URI) + def copy(dataset) + @metadata = dataset.metadata + @data_entries = dataset.data_entries + @compounds = dataset.compounds + @features = dataset.features + if @uri + self.uri = @uri + else + @uri = dataset.metadata[XSD.anyUri] + end + end + + # save dataset (overwrites existing dataset) + def save + # TODO: rewrite feature URI's ?? + # create dataset if uri empty + @compounds.uniq! + RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml) + end + + # Delete dataset at the dataset service + def delete + RestClientWrapper.delete @uri + end + end +end + + ######################################################### + # kept for backward compatibility, may have to be fixed # + ######################################################### + +=begin + def from_owl(owl) # creates dataset object from Opentox::Owl object # use Dataset.find( ) to load dataset from rdf-supporting datasetservice # note: does not load all feature values, as this is time consuming - if owl - raise "invalid param" unless owl.is_a?(OpenTox::Owl) - @title = owl.get("title") - @creator = owl.get("creator") - @uri = owl.uri - # when loading a dataset from owl, only compound- and feature-uris are loaded - owl.load_dataset(@compounds, @features) - # all features are marked as dirty - # as soon as a feature-value is requested all values for this feature are loaded from the rdf - @dirty_features = @features.dclone - @owl = owl - end + raise "invalid param" unless owl.is_a?(OpenTox::Owl) + @metadata[DC.title] = owl.get("title") + @metadata[DC.creator] = owl.get("creator") + @metadata[XSD.anyUri] = owl.uri + # when loading a dataset from owl, only compound- and feature-uris are loaded + owl.load_dataset(@compounds, @features) + # all features are marked as dirty + # as soon as a feature-value is requested all values for this feature are loaded from the rdf + @dirty_features = @features.dclone + @owl = owl end def self.find(uri, accept_header=nil) unless accept_header - if (@@config[:yaml_hosts].include?(URI.parse(uri).host)) + if (CONFIG[:yaml_hosts].include?(URI.parse(uri).host)) accept_header = 'application/x-yaml' else accept_header = "application/rdf+xml" @@ -38,8 +256,10 @@ module OpenTox case accept_header when "application/x-yaml" + LOGGER.debug "DATASET: "+ uri + LOGGER.debug RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s d = YAML.load RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s - d.uri = uri unless d.uri + #d.uri = @metadata[XSD.anyUri] unless d.uri when "application/rdf+xml" owl = OpenTox::Owl.from_uri(uri.to_s.strip, "Dataset") d = Dataset.new(owl) @@ -48,7 +268,7 @@ module OpenTox end d end - + # converts a dataset represented in owl to yaml # (uses a temporary dataset) # note: to_yaml is overwritten, loads complete owl dataset values @@ -108,7 +328,7 @@ module OpenTox raise "predicted class value is an array\n"+ "value "+v.to_s+"\n"+ "value-class "+v.class.to_s+"\n"+ - "dataset "+@uri.to_s+"\n"+ + "dataset "+self.uri.to_s+"\n"+ "compound "+compound.to_s+"\n"+ "feature "+feature.to_s+"\n" else @@ -130,7 +350,7 @@ module OpenTox raise "predicted regression value is an array\n"+ "value "+v.to_s+"\n"+ "value-class "+v.class.to_s+"\n"+ - "dataset "+@uri.to_s+"\n"+ + "dataset "+self.uri.to_s+"\n"+ "compound "+compound.to_s+"\n"+ "feature "+feature.to_s+"\n" else @@ -181,7 +401,7 @@ module OpenTox raise "value is not an array\n"+ "value "+v.to_s+"\n"+ "value-class "+v.class.to_s+"\n"+ - "dataset "+@uri.to_s+"\n"+ + "dataset "+self.uri.to_s+"\n"+ "compound "+compound.to_s+"\n"+ "feature "+feature.to_s+"\n" end @@ -216,11 +436,6 @@ module OpenTox super - ["@owl"] end - # saves (changes) as new dataset in dataset service - # returns uri - # uses to yaml method (which is overwritten) - def save - OpenTox::RestClientWrapper.post(@@config[:services]["opentox-dataset"],{:content_type => "application/x-yaml"},self.to_yaml).strip - end end end +=end diff --git a/lib/environment.rb b/lib/environment.rb index 0c62113..b16b62f 100644 --- a/lib/environment.rb +++ b/lib/environment.rb @@ -12,8 +12,8 @@ TMP_DIR = File.join(basedir, "tmp") LOG_DIR = File.join(basedir, "log") if File.exist?(config_file) - @@config = YAML.load_file(config_file) - raise "could not load config, config file: "+config_file.to_s unless @@config + CONFIG = YAML.load_file(config_file) + raise "could not load config, config file: "+config_file.to_s unless CONFIG else FileUtils.mkdir_p TMP_DIR FileUtils.mkdir_p LOG_DIR @@ -24,20 +24,20 @@ else end # database -if @@config[:database] +if CONFIG[:database] ['dm-core', 'dm-serializer', 'dm-timestamps', 'dm-types', 'dm-migrations', 'dm-validations' ].each{|lib| require lib } - case @@config[:database][:adapter] + case CONFIG[:database][:adapter] when /sqlite/i db_dir = File.join(basedir, "db") FileUtils.mkdir_p db_dir DataMapper::setup(:default, "sqlite3://#{db_dir}/opentox.sqlite3") else DataMapper.setup(:default, { - :adapter => @@config[:database][:adapter], - :database => @@config[:database][:database], - :username => @@config[:database][:username], - :password => @@config[:database][:password], - :host => @@config[:database][:host]}) + :adapter => CONFIG[:database][:adapter], + :database => CONFIG[:database][:database], + :username => CONFIG[:database][:username], + :password => CONFIG[:database][:password], + :host => CONFIG[:database][:host]}) end end @@ -48,7 +48,7 @@ logfile = "#{LOG_DIR}/#{ENV["RACK_ENV"]}.log" #LOGGER = MyLogger.new(logfile,'daily') # daily rotation LOGGER = MyLogger.new(logfile) # no rotation LOGGER.formatter = Logger::Formatter.new #this is neccessary to restore the formating in case active-record is loaded -if @@config[:logger] and @@config[:logger] == "debug" +if CONFIG[:logger] and CONFIG[:logger] == "debug" LOGGER.level = Logger::DEBUG else LOGGER.level = Logger::WARN diff --git a/lib/feature.rb b/lib/feature.rb new file mode 100644 index 0000000..9616135 --- /dev/null +++ b/lib/feature.rb @@ -0,0 +1,7 @@ +module OpenTox + + class Feature + include OtObject + end + +end diff --git a/lib/features.rb b/lib/features.rb deleted file mode 100644 index 0fa1cf0..0000000 --- a/lib/features.rb +++ /dev/null @@ -1,19 +0,0 @@ -# CH: should go into validation service -# - not a complete OT object -# - only used twice -# - what about ./validation/validation/validation_service.rb:241: value = OpenTox::Feature.new(:uri => a.uri).value(prediction_feature).to_s -module OpenTox - - module Feature - - def self.domain( feature_uri ) - #TODO - if feature_uri =~ /ambit/ - return nil - else - return ["true", "false"] - end - end - - end -end diff --git a/lib/model.rb b/lib/model.rb index 3ecd61c..d0d6703 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -84,8 +84,8 @@ module OpenTox def initialize @source = "http://github.com/helma/opentox-model" - @algorithm = File.join(@@config[:services]["opentox-algorithm"],"lazar") - #@independent_variables = File.join(@@config[:services]["opentox-algorithm"],"fminer#BBRC_representative") + @algorithm = File.join(CONFIG[:services]["opentox-algorithm"],"lazar") + #@independent_variables = File.join(CONFIG[:services]["opentox-algorithm"],"fminer#BBRC_representative") @features = [] @effects = {} @activities = {} @@ -95,12 +95,12 @@ module OpenTox def save @features.uniq! - resource = RestClient::Resource.new(@@config[:services]["opentox-model"]) + resource = RestClient::Resource.new(CONFIG[:services]["opentox-model"]) resource.post(self.to_yaml, :content_type => "application/x-yaml").chomp.to_s end def self.find_all - RestClientWrapper.get(@@config[:services]["opentox-model"]).chomp.split("\n") + RestClientWrapper.get(CONFIG[:services]["opentox-model"]).chomp.split("\n") end def self.predict(compound_uri,model_uri) @@ -115,8 +115,8 @@ module OpenTox def initialize @source = "http://github.com/helma/opentox-model" - @algorithm = File.join(@@config[:services]["opentox-algorithm"],"property_lazar") - #@independent_variables = File.join(@@config[:services]["opentox-algorithm"],"fminer#BBRC_representative") + @algorithm = File.join(CONFIG[:services]["opentox-algorithm"],"property_lazar") + #@independent_variables = File.join(CONFIG[:services]["opentox-algorithm"],"fminer#BBRC_representative") @features = [] #@effects = {} @activities = {} @@ -126,12 +126,12 @@ module OpenTox def save @features.uniq! - resource = RestClient::Resource.new(@@config[:services]["opentox-model"]) + resource = RestClient::Resource.new(CONFIG[:services]["opentox-model"]) resource.post(self.to_yaml, :content_type => "application/x-yaml").chomp.to_s end def self.find_all - RestClientWrapper.get(@@config[:services]["opentox-model"]).chomp.split("\n") + RestClientWrapper.get(CONFIG[:services]["opentox-model"]).chomp.split("\n") end def self.predict(compound_uri,model_uri) diff --git a/lib/opentox-ruby-api-wrapper.rb b/lib/opentox-ruby-api-wrapper.rb index d9db4ac..2749899 100644 --- a/lib/opentox-ruby-api-wrapper.rb +++ b/lib/opentox-ruby-api-wrapper.rb @@ -1,4 +1,4 @@ -['rubygems', 'sinatra', 'sinatra/url_for', 'rest_client', 'yaml', 'cgi', 'spork', 'redland', 'rdf/redland', 'rdf/redland/util', 'environment'].each do |lib| +['rubygems', 'sinatra', 'sinatra/url_for', 'rest_client', 'yaml', 'cgi', 'spork', 'environment'].each do |lib| require lib end @@ -8,6 +8,6 @@ rescue LoadError puts "Please install Openbabel with 'rake openbabel:install' in the compound component" end -['owl-serializer', 'compound','dataset','algorithm','model','task','validation','utils','features', 'ot-logger', 'overwrite', 'rest_client_wrapper'].each do |lib| +['opentox', 'compound','dataset', 'parser','serializer', 'algorithm','model','task','validation','utils','feature', 'ot-logger', 'overwrite', 'rest_client_wrapper'].each do |lib| require lib end diff --git a/lib/opentox.rb b/lib/opentox.rb new file mode 100644 index 0000000..453ca66 --- /dev/null +++ b/lib/opentox.rb @@ -0,0 +1,79 @@ +module OpenTox + + # Generic OpenTox class + module OtObject + + attr_reader :uri + attr_accessor :metadata + + # Initialize OpenTox object with optional uri + def initialize(uri=nil) + @metadata = {} + self.uri = uri if uri + end + + # Set URI + def uri=(uri) + @uri = uri + @metadata[XSD.anyUri] = uri + end + + # Get title + def title + load_metadata unless @metadata[DC.title] + @metadata[DC.title] + end + + # Set title + def title=(title) + @metadata[DC.title] = title + end + + # Get all objects from a service + def self.all(uri) + #def OtObject.all(uri) + RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.split(/\n/) + end + + # Load metadata from URI + def load_metadata + #if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) + # TODO: fix metadata retrie + #@metadata = YAML.load(RestClientWrapper.get(@uri, :accept => "application/x-yaml")) + #else + @metadata = Parser::Owl::Generic.new(@uri).metadata + #end + @metadata + #Parser::Owl::Generic.new(@uri).metadata + end + + end + + module Owl + + class Namespace + + def initialize(uri) + @uri = uri + end + + def [](property) + @uri+property.to_s + end + + def method_missing(property) + @uri+property.to_s + end + + end + end + +end +# +# OWL Namespaces +RDF = OpenTox::Owl::Namespace.new 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' +OWL = OpenTox::Owl::Namespace.new 'http://www.w3.org/2002/07/owl#' +DC = OpenTox::Owl::Namespace.new 'http://purl.org/dc/elements/1.1/' +OT = OpenTox::Owl::Namespace.new 'http://www.opentox.org/api/1.1#' +XSD = OpenTox::Owl::Namespace.new 'http://www.w3.org/2001/XMLSchema#' + diff --git a/lib/owl-serializer.rb b/lib/owl-serializer.rb deleted file mode 100644 index 8965bf2..0000000 --- a/lib/owl-serializer.rb +++ /dev/null @@ -1,65 +0,0 @@ -require 'rdf' -require 'rdf/raptor' -require 'rdf/ntriples' - -# RDF namespaces -include RDF -OT = RDF::Vocabulary.new 'http://www.opentox.org/api/1.1#' - -module OpenTox - - class OwlSerializer - - def initialize(klass,uri) - - @model = RDF::Graph.new(uri) - - @triples = [] - @triples << [ OT[klass], RDF.type, OWL.Class ] - @triples << [ RDF::URI.new(uri), RDF.type, OT[klass] ] - - @classes = [ OT[klass] ] - @object_properties = [] - @annotation_properties = [] - @objects = [ uri ] - - end - - def self.create(klass, uri) - OpenTox::OwlSerializer.new(klass,uri) - end - - def rdf - @triples.each { |statement| @model << statement } - RDF::Writer.for(:rdfxml).buffer do |writer| - writer << @model - end - end - - def object_property(subject,predicate,object,object_class) - s = [ RDF::URI.new(subject), predicate, RDF::URI.new(object) ] # - @triples << s unless @triples.include? s - unless @object_properties.include? predicate - @triples << [ predicate, RDF.type, OWL.ObjectProperty ] - @object_properties << predicate - end - unless @objects.include? object - @triples << [ RDF::URI.new(object), RDF.type, object_class ] - @objects << object - end - unless @classes.include? object_class - @triples << [ object_class, RDF.type, OWL.Class ] - @classes << object_class - end - end - - def annotation_property(subject, predicate, value, datatype) - s = [ RDF::URI.new(subject), predicate, RDF::Literal.new(value, :datatype => datatype) ] - @triples << s unless @triples.include? s - unless @annotation_properties.include? predicate - @triples << [ predicate, RDF.type, OWL.AnnotationProperty ] - @annotation_properties << predicate - end - end - end -end diff --git a/lib/owl.rb b/lib/owl.rb deleted file mode 100644 index f4128ee..0000000 --- a/lib/owl.rb +++ /dev/null @@ -1,593 +0,0 @@ -require 'rdf' -require 'rdf/ntriples' -require 'rdf/raptor' -include RDF -# RDF namespaces -#RDF = Redland::Namespace.new 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' -OWL = Redland::Namespace.new 'http://www.w3.org/2002/07/owl#' -DC = Redland::Namespace.new 'http://purl.org/dc/elements/1.1/' -OT = Redland::Namespace.new 'http://www.opentox.org/api/1.1#' -#OT = Redland::Namespace.new 'http://ortona.informatik.uni-freiburg.de/opentox.owl#' -XML = Redland::Namespace.new 'http://www.w3.org/2001/XMLSchema#' - -# overriding literal to give nice access to datatype -# and to access the stored value as correct ruby type -class Redland::Literal - - def self.create(value, type) - raise "literal datatype may not be nil" unless type - type = parse_datatype_uri(value) if OpenTox::Owl::PARSE_LITERAL_TYPE==type - - if type.is_a?(Redland::Uri) - Redland::Literal.new(value.to_s,nil,type) - else - Redland::Literal.new(value.to_s,nil,Redland::Uri.new(type.to_s)) - end - end - - # the literal node of the ruby swig api provdides the 'value' of a literal but not the 'datatype' - # found solution in mailing list - def datatype - uri = Redland.librdf_node_get_literal_value_datatype_uri(self.node) - return Redland.librdf_uri_to_string(uri) if uri - end - - # gets value of literal, value class is se according to literal datatype - def get_value - Redland::Literal.parse_value( self.value, self.datatype ) - end - - private - # parses value according to datatype uri - def self.parse_value(string_value, datatype_uri) - - if (datatype_uri==nil || datatype_uri.size==0) - LOGGER.warn("empty datatype for literal with value: '"+string_value+"'") - return string_value - end - case datatype_uri - when OpenTox::Owl::LITERAL_DATATYPE_STRING.to_s - return string_value - when OpenTox::Owl::LITERAL_DATATYPE_URI.to_s - return string_value #PENDING uri as string? - when OpenTox::Owl::LITERAL_DATATYPE_FLOAT.to_s - return string_value.to_f - when OpenTox::Owl::LITERAL_DATATYPE_DOUBLE.to_s - return string_value.to_f - when OpenTox::Owl::LITERAL_DATATYPE_BOOLEAN.to_s - return string_value.upcase=="TRUE" - when OpenTox::Owl::LITERAL_DATATYPE_DATE.to_s - return Time.parse(string_value) - when OpenTox::Owl::LITERAL_DATATYPE_DATETIME.to_s - return Time.parse(string_value) - when OpenTox::Owl::LITERAL_DATATYPE_INTEGER.to_s - return string_value.to_i - else - raise "unknown literal datatype: '"+datatype_uri.to_s+"' (value is "+string_value+ - "), please specify new OpenTox::Owl::LITERAL_DATATYPE" - end - end - - # parse datatype uri accoring to value class - def self.parse_datatype_uri(value) - if value==nil - raise "illegal datatype: value is nil" - elsif value.is_a?(String) - # PENDING: uri check too slow? - if OpenTox::Utils.is_uri?(value) - return OpenTox::Owl::LITERAL_DATATYPE_URI - else - return OpenTox::Owl::LITERAL_DATATYPE_STRING - end - elsif value.is_a?(Float) - return OpenTox::Owl::LITERAL_DATATYPE_FLOAT - elsif value.is_a?(TrueClass) or value.is_a?(FalseClass) - return OpenTox::Owl::LITERAL_DATATYPE_BOOLEAN - elsif value.is_a?(Integer) - return OpenTox::Owl::LITERAL_DATATYPE_INTEGER - elsif value.is_a?(DateTime) - return OpenTox::Owl::LITERAL_DATATYPE_DATETIME - elsif value.is_a?(Time) - return OpenTox::Owl::LITERAL_DATATYPE_DATETIME - else - raise "illegal datatype: "+value.class.to_s+" "+value.to_s - end - end -end - -module OpenTox - - class Owl - - # to get correct owl-dl, properties and objects have to be typed - # i.e. the following triple is insufficient: - # ModelXY,ot:algorithm,AlgorithmXY - # further needed: - # ot:algorithm,rdf:type,owl:ObjectProperty - # AlgorithmXY,rdf:type,ot:Algorithm - # ot:Algorithm,rdf:type,owl:Class - # - # therefore OpentoxOwl needs info about the opentox-ontology - # the info is stored in OBJECT_PROPERTY_CLASS and LITERAL_TYPES - - # contains all owl:ObjectProperty as keys, and the respective classes as value - # some object properties link to objects from different classes (e.g. "values can be "Tuple", or "FeatureValue") - # in this case, use set_object_property() (instead of set()) and specify class manually - OBJECT_PROPERTY_CLASS = {} - [ "model" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Model"} - [ "algorithm" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Algorithm"} - [ "trainingDataset", "testTargetDataset", "predictionDataset", - "testDataset", "dataset" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Dataset"} - [ "feature", "dependentVariables", "independentVariables", - "predictedVariables", "predictionFeature" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Feature"} - [ "parameters" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Parameter"} - [ "compound" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Compound"} - [ "dataEntry" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "DataEntry"} - [ "complexValue" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "FeatureValue"} - [ "classificationStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ClassificationStatistics"} - [ "classValueStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ClassValueStatistics"} - [ "confusionMatrix" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ConfusionMatrix"} - [ "confusionMatrixCell" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ConfusionMatrixCell"} - [ "regressionStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "RegressionStatistics"} - [ "validation" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Validation"} - [ "crossvalidationInfo" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "CrossvalidationInfo"} - [ "crossvalidation" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Crossvalidation"} - - # literals point to primitive values (not to other resources) - # the literal datatype is encoded via uri: - LITERAL_DATATYPE_STRING = XML["string"].uri - LITERAL_DATATYPE_URI = XML["anyURI"].uri - LITERAL_DATATYPE_FLOAT = XML["float"].uri - LITERAL_DATATYPE_DOUBLE = XML["double"].uri - LITERAL_DATATYPE_DATE = XML["date"].uri - LITERAL_DATATYPE_BOOLEAN = XML["boolean"].uri - LITERAL_DATATYPE_DATETIME = XML["dateTime"].uri - LITERAL_DATATYPE_INTEGER = XML["integer"].uri - - # list all literals (to distinguish from objectProperties) as keys, datatype as values - # (do not add dc-identifier, deprecated, object are identified via name=uri) - LITERAL_TYPES = {} - [ "title", "creator", "format", "description", "hasStatus", "paramScope", "paramValue", - "classValue", "reportType", "confusionMatrixActual", - "confusionMatrixPredicted" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_STRING } - [ "date", "due_to_time" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_DATE } - [ "percentageCompleted", "truePositiveRate", "fMeasure", "falseNegativeRate", - "areaUnderRoc", "falsePositiveRate", "trueNegativeRate", "precision", "recall", - "percentCorrect", "percentIncorrect", "weightedAreaUnderRoc", "numCorrect", - "percentIncorrect", "percentUnpredicted", "realRuntime", - "percentWithoutClass", "rootMeanSquaredError", "meanAbsoluteError", "rSquare", - "targetVarianceActual", "targetVariancePredicted", "sumSquaredError", - "sampleCorrelationCoefficient" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_DOUBLE } - [ "numTrueNegatives", "numWithoutClass", "numFalseNegatives", "numTruePositives", - "numFalsePositives", "numIncorrect", "numInstances", "numUnpredicted", - "randomSeed", "numFolds", "confusionMatrixValue", - "crossvalidationFold" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_INTEGER } - [ "resultURI" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_URI } - [ "stratified" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_BOOLEAN } - # some literals can have different types, parse from ruby type - PARSE_LITERAL_TYPE = "PARSE_LITERAL_TYPE" - [ "value" ].each{ |l| LITERAL_TYPES[l] = PARSE_LITERAL_TYPE } - - # constants for often used redland-resources - OWL_TYPE_LITERAL = OWL["AnnotationProperty"] - OWL_TYPE_CLASS = OWL["Class"] - OWL_TYPE_OBJECT_PROPERTY = OWL["ObjectProperty"] - RDF_TYPE = RDF['type'] - - # store redland:resources (=nodes) to: - # * separate namespaces (OT from RDF and DC) - # * save time, as generating resources is timeconsuming in redland - @@nodes = {} - [ "type", "about"].each{ |l| @@nodes[l] = RDF[l] } - [ "title", "creator", "date", "format" ].each{ |l| @@nodes[l] = DC[l] } - - def node(property) - raise "can only create node for non-empty-string, but given "+property.class.to_s+" (value: "+ - property.to_s+")" unless property.is_a?(String) and property.size>0 - raise "dc[identifier] deprecated, use owl.uri" if property=="identifier" - @@nodes[property] = OT[property] unless @@nodes.has_key?(property) - return @@nodes[property] - end - - # ot_class is the class of the object as string, e.g. "Model","Dataset", ... - # root_node is the root-object node in the rdf - # uri the uri of the object - attr_accessor :ot_class, :root_node, :uri, :model, :triples - - private - def initialize - @triples = [] - @model = Redland::Model.new Redland::MemoryStore.new - #@triples = "" - end - - # build new owl object - # ot_class is the class of this object, should be a string like "Model", "Task", ... - # uri is name and identifier of this object - public - def self.create( ot_class, uri ) - - owl = OpenTox::Owl.new - owl.ot_class = ot_class - owl.root_node = Redland::Resource.new(uri.to_s.strip) - owl.set("type",owl.ot_class) - owl.uri = uri - owl - end - - # loads owl from data - def self.from_data(data, base_uri, ot_class) - - owl = OpenTox::Owl.new - parser = Redland::Parser.new - - begin - parser.parse_string_into_model(owl.model, data, base_uri) - - # now loading root_node and uri - owl.model.find(nil, RDF_TYPE, owl.node(ot_class)) do |s,p,o| - #LOGGER.debug "about statements "+s.to_s+" . "+p.to_s+" -> "+o.to_s - is_root = true - owl.model.find(nil, nil, s) do |ss,pp,oo| - is_root = false - break - end - if is_root - # handle error if root is already set - raise "cannot derieve root object from rdf, more than one object specified" if owl.uri - raise "illegal root node type, no uri specified\n"+data.to_s if s.blank? - #store root note and uri - owl.uri = s.uri.to_s - owl.root_node = s - end - end - - # handle error if no root node was found - unless owl.root_node - types = [] - owl.model.find(nil, RDF_TYPE, nil){ |s,p,o| types << o.to_s } - raise "root node for class '"+owl.node(ot_class).to_s+"' not found (available type nodes: "+types.inspect+")" - end - raise "no uri in rdf: '"+owl.uri+"'" unless owl.uri and Utils.is_uri?(owl.uri) - owl.ot_class = ot_class - owl - rescue => e - RestClientWrapper.raise_uri_error(e.message, base_uri) - end - end - - def self.from_uri(uri, ot_class) - return from_data(RestClientWrapper.get(uri,:accept => "application/rdf+xml").to_s, uri, ot_class) - end - - def rdf - #@model.to_string - #stdin, stdout, stderr = Open3.popen3('rapper -I test.org -i ntriples -o rdfxml -') - #stdin.puts @triples - #stdout - #File.open("/tmp/d","w+") {|f| f.puts @triples} - #`rapper -i ntriples -o rdfxml /tmp/d` - #@triples - #output = RDF::Writer.for(:rdfxml).buffer do |writer| - RDF::Writer.for(:rdfxml).buffer do |writer| - @triples.each do |statement| - begin - writer << statement - rescue => e - LOGGER.error e - LOGGER.info statement.inspect - end - end - end - #output - end - - # returns the first object for subject:root_node and property - # (sufficient for accessing simple, root-node properties) - def get( property ) - raise "uri is no prop, use owl.uri instead" if property=="uri" - return get_value( @model.object( @root_node, node(property.to_s)) ) - end - - # returns an array of objects (not only the first one) that fit for the property - # accepts array of properties to access not-root-node vaules - # i.e. validation_owl.get_nested( [ "confusionMatrix", "confusionMatrixCell", "confusionMatrixValue" ] - # returns an array of all confusionMatrixValues - def get_nested( property_array ) - n = [ @root_node ] - property_array.each do |p| - new_nodes = [] - n.each do |nn| - @model.find( nn, node(p), nil ) do |sub,pred,obj| - new_nodes << obj - end - end - n = new_nodes - end - return n.collect{|nn| get_value( nn )} - end - - private - # returns node-value - def get_value( node ) - return nil unless node - if node.is_a?(Redland::Literal) - return node.get_value - elsif node.blank? - return nil - else - return node.uri.to_s - end - end - - public - # sets values of current_node (by default root_node) - # - # note: this does not delete existing triples - # * there can be several triples for the same subject and predicate - # ( e.g. after set("description","bla1") and set("description","bla2") - # both descriptions are in the model, - # but the get("description") will give you only one object (by chance) - # * this does not matter in pratice (only dataset uses this -> load_dataset-methods) - # * identical values appear only once in rdf - def set(predicate, object, current_node=@root_node ) - - pred = predicate.to_s - raise "uri is no prop, cannot set uri" if pred=="uri" - raise "dc[identifier] deprecated, use owl.uri" if pred=="identifier" - if (object.is_a?(Redland::Node) and object.blank?) or nil==object or object.to_s.size==0 - # set only not-nil values - LOGGER.warn "skipping (not setting) empty value in rdf for property: '"+pred+"'" - return - end - - if pred=="type" - # predicate is type, set class of current node - set_type(object, current_node) - elsif LITERAL_TYPES.has_key?(pred) - # predicate is literal - set_literal(pred,object,LITERAL_TYPES[pred],current_node) - elsif OBJECT_PROPERTY_CLASS.has_key?(pred) - # predicte is objectProperty, object is another resource - set_object_property(pred,object,OBJECT_PROPERTY_CLASS[pred],current_node) - else - raise "unkonwn rdf-property, please add: '"+pred+"' to OpenTox::OWL.OBJECT_PROPERTY_CLASS or OpenTox::OWL.LITERAL_TYPES" - end - end - - # example-triples for setting rdf-type to model: - # model_xy,rdf:type,ot:Model - # ot:Model,rdf:type,owl:Class - def set_type(ot_class, current_node=@root_node) - #@triples += "#{ot_class.to_s} #{RDF_TYPE.to_s} #{current_node.to_s}" - #@triples << "#{current_node} #{RDF_TYPE} #{node(ot_class).to_s}.\n".gsub(/\[/,'<').gsub(/\]/,'>') - #@triples << "#{node(ot_class).to_s} #{RDF_TYPE} #{OWL_TYPE_CLASS}.\n".gsub(/\[/,'<').gsub(/\]/,'>') - add current_node, RDF_TYPE, node(ot_class) - add node(ot_class), RDF_TYPE, OWL_TYPE_CLASS - end - - # example-triples for setting description of a model: - # model_xy,ot:description,bla..bla^^xml:string - # ot:description,rdf:type,owl:Literal - def set_literal(literal_name, literal_value, literal_datatype, current_node=@root_node) - #@triples += "#{current_node} #{node(literal_name)} #{Redland::Literal.create(literal_value, literal_datatype)}.\n".gsub(/\[/,'<').gsub(/\]/,'>') - #TODO: add datatype - #@triples << "#{current_node} #{node(literal_name)} \"#{literal_value}\".\n".gsub(/\[/,'<').gsub(/\]/,'>') - #@triples << "#{node(literal_name)} #{RDF_TYPE} #{OWL_TYPE_LITERAL}.\n".gsub(/\[/,'<').gsub(/\]/,'>') - add current_node, node(literal_name), Redland::Literal.create(literal_value, literal_datatype) - add node(literal_name), RDF_TYPE, OWL_TYPE_LITERAL - end - - # example-triples for setting algorithm property of a model: - # model_xy,ot:algorithm,algorihtm_xy - # ot:algorithm,rdf:type,owl:ObjectProperty - # algorihtm_xy,rdf:type,ot:Algorithm - # ot:Algorithm,rdf:type,owl:Class - def set_object_property(property, object, object_class, current_node=@root_node) - object_node = Redland::Resource.new(object) - #@triples << "#{current_node} #{node(property)} #{object_node}.\n".gsub(/\[/,'<').gsub(/\]/,'>') - #@triples << "#{node(property)} #{RDF_TYPE} #{OWL_TYPE_OBJECT_PROPERTY}.\n".gsub(/\[/,'<').gsub(/\]/,'>') - #@triples << "#{object_node} #{RDF_TYPE} #{node(object_class)}.\n".gsub(/\[/,'<').gsub(/\]/,'>') - #@triples << "#{node(object_class)} #{RDF_TYPE} #{OWL_TYPE_CLASS}.\n".gsub(/\[/,'<').gsub(/\]/,'>') - add current_node, node(property), object_node - add node(property), RDF_TYPE, OWL_TYPE_OBJECT_PROPERTY - add object_node, RDF_TYPE, node(object_class) - add node(object_class), RDF_TYPE, OWL_TYPE_CLASS - end - - def add(s,p,o) - #@triples << "#{s} #{p} #{o}.\n".gsub(/\[/,'<').gsub(/\]/,'>') - @triples << [RDF::URI.new(s.to_s.sub(/\[/,'').sub(/\]/,'')),RDF::URI.new(p.to_s.sub(/\[/,'').sub(/\]/,'')),o.to_s.sub(/\[/,'').sub(/\]/,'')] - #@model.add s,p,o - end - - # this is (a recursiv method) to set nested-data via hashes (not only simple properties) - # example (for a dataset) - # { :description => "bla", - # :dataEntry => { :compound => "compound_uri", - # :values => [ { :class => "FeatureValue" - # :feature => "feat1", - # :value => 42 }, - # { :class => "FeatureValue" - # :feature => "feat2", - # :value => 123 } ] } } - def set_data(hash, current_node=@root_node) - - hash.each do |k,v| - if v.is_a?(Hash) - # value is again a hash - prop = k.to_s - - # :class is a special key to specify the class value, if not defined in OBJECT_PROPERTY_CLASS - object_class = v.has_key?(:class) ? v.delete(:class) : OBJECT_PROPERTY_CLASS[prop] - raise "hash key must be a object-property, please add '"+prop.to_s+ - "' to OpenTox::OWL.OBJECT_PROPERTY_CLASS or specify :class value" unless object_class - - # the new node is a class node, to specify the uri of the resource use key :uri - if v[:uri] - # identifier is either a specified uri - class_node = Redland::Resource.new(v.delete(:uri)) - else - # or a new uri, make up internal uri with increment - class_node = new_class_node(object_class,current_node) - end - set_object_property(prop,class_node,object_class,current_node) - # recursivly call set_data method with new node - set_data(v,class_node) - elsif v.is_a?(Array) - # value is an array, each array element is added with current key as predicate - v.each do |value| - set_data( { k => value }, current_node ) - end - else - # neither hash nor array, call simple set-method - set( k, v, current_node ) - end - end - end - - # create a new (internal class) node with unique, uri-like name - def new_class_node(name, current_node=@root_node) - # to avoid anonymous nodes, make up uris for sub-objects - # use counter to make sure each uri is unique - # for example we will get ../confusion_matrix_cell/1, ../confusion_matrix_cell/2, ... - count = 1 - while (true) - res = Redland::Resource.new( File.join(current_node.uri.to_s,name.to_s,count.to_s) ) - match = false - @model.find(nil, nil, res) do |s,p,o| - match = true - break - end - if match - count += 1 - else - break - end - end - return res - end - - # for "backwards-compatiblity" - # better use directly: - # set_data( { "parameters" => [ { "title" => , "paramScope" => , "paramValue" => } ] ) - def parameters=(params) - - converted_params = [] - params.each do |name, settings| - converted_params << { :title => name, :paramScope => settings[:scope], :paramValue => settings[:value] } - end - set_data( :parameters => converted_params ) - end - - # PENDING move to dataset.rb - # this is for dataset.to_owl - # adds feautre value for a single compound - def add_data_entries(compound_uri,features) - - data_entry = { :compound => compound_uri } - if features - feature_values = [] - features.each do |f| - f.each do |feature_uri,value| - if value.is_a?(Hash) - complex_values = [] - value.each do |uri,v| - complex_values << { :feature => uri, :value => v } - end - feature_values << { :class => "Tuple", :feature => feature_uri, :complexValue => complex_values } - else - feature_values << { :class => "FeatureValue", :feature => feature_uri, :value => value } - end - end - end - data_entry[:values] = feature_values - end - set_data( :dataEntry => data_entry ) - end - - # PENDING move to dataset.rb - # feature values are not loaded for performance reasons - # loading compounds and features into arrays that are given as params - def load_dataset( compounds, features ) - - @model.subjects(RDF_TYPE, node('Compound')).each do |compound| - compounds << get_value(compound) - end - - @model.subjects(RDF_TYPE, node('Feature')).each do |feature| - feature_value_found=false - @model.find(nil, node("feature"), feature) do |potential_feature_value,p,o| - @model.find(nil, node("values"), potential_feature_value) do |s,p,o| - feature_value_found=true - break - end - break if feature_value_found - end - features << get_value(feature) if feature_value_found - end - LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features from dataset "+uri.to_s - end - - # PENDING move to dataset.rb - # loading feature values for the specified feature - # if feature is nil, all feature values are loaded - # - # general remark on the rdf loading (found out with some testing): - # the search methods (subjects/find) are fast, the time consuming parts is creating resources, - # which cannot be avoided in general - def load_dataset_feature_values( compounds, data, feature_uris ) - - raise "no feature-uri array" unless feature_uris.is_a?(Array) - - # values are stored in the data-hash, hash has a key for each compound - compounds.each{|c| data[c] = [] unless data[c]} - - count = 0 - - feature_uris.each do |feature_uri| - LOGGER.debug("load feature values for feature: "+feature_uri ) - feature_node = Redland::Resource.new(feature_uri) - - # search for all feature_value_node with property 'ot_feature' and the feature we are looking for - @model.find(nil, node('feature'), feature_node) do |feature_value_node,p,o| - - # get compound_uri by "backtracking" to values node (property is 'values'), then get compound_node via 'compound' - value_nodes = @model.subjects(node('values'),feature_value_node) - if value_nodes.size>0 - raise "more than one value node "+value_nodes.size.to_s if value_nodes.size>1 - value_node = value_nodes[0] - - compound_uri = get_value( @model.object(value_node, node('compound')) ) - unless compound_uri - LOGGER.warn "'compound' missing for data-entry of feature "+feature_uri.to_s+ - ", value: "+@model.object(feature_value_node,node("value")).to_s - next - end - - value_node_type = @model.object(feature_value_node, RDF_TYPE) - if (value_node_type == node('FeatureValue')) - value_literal = @model.object( feature_value_node, node('value')) - raise "plain feature value no literal: "+value_literal.to_s unless value_literal.is_a?(Redland::Literal) - data[compound_uri] << {feature_uri => value_literal.get_value } - elsif (value_node_type == node('Tuple')) - complex_values = {} - @model.find(feature_value_node,node('complexValue'),nil) do |p,s,complex_value| - complex_value_type = @model.object(complex_value, RDF_TYPE) - raise "complex feature value no feature value: "+complex_value.to_s unless complex_value_type==node('FeatureValue') - complex_feature_uri = get_value(@model.object( complex_value, node('feature'))) - complex_value = @model.object( complex_value, node('value')) - raise "complex value no literal: "+complex_value.to_s unless complex_value.is_a?(Redland::Literal) - complex_values[ complex_feature_uri ] = complex_value.get_value - end - data[compound_uri] << { feature_uri => complex_values } if complex_values.size>0 - end - count += 1 - LOGGER.debug "loading feature values ("+count.to_s+")" if (count%1000 == 0) - end - end - LOGGER.debug "loaded "+count.to_s+" feature values for feature "+feature_node.to_s - end - end - end -end diff --git a/lib/parser.rb b/lib/parser.rb new file mode 100644 index 0000000..e623bf5 --- /dev/null +++ b/lib/parser.rb @@ -0,0 +1,191 @@ +require 'spreadsheet' +require 'roo' +module OpenTox + + module Parser + + module Owl + + def initialize(uri) + @uri = uri + @metadata = {} + end + + def metadata + # TODO: load parameters + if @dataset + uri = File.join(@uri,"metadata") + else + uri = @uri + end + statements = [] + `rapper -i rdfxml -o ntriples #{uri}`.each_line do |line| + triple = line.chomp.split('> ') + statements << triple.collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')} + end + statements.each do |triple| + @metadata[triple[1]] = triple[2].split('^^').first if triple[0] == @uri and triple[1] != RDF['type'] + end + @metadata + end + + class Generic + include Owl + end + + class Dataset + + include Owl + + def initialize(uri) + super uri + @dataset = ::OpenTox::Dataset.new(@uri) + end + + def load_uri + data = {} + feature_values = {} + feature = {} + other_statements = {} + ntriples = `rapper -i rdfxml -o ntriples #{@uri}` + ntriples.each_line do |line| + triple = line.chomp.split(' ',3) + triple = triple[0..2].collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')} + case triple[1] # Ambit namespaces are case insensitive + when /#{OT.values}/i + data[triple[0]] = {:compound => "", :values => []} unless data[triple[0]] + data[triple[0]][:values] << triple[2] + when /#{OT.value}/i + feature_values[triple[0]] = triple[2] + when /#{OT.compound}/i + data[triple[0]] = {:compound => "", :values => []} unless data[triple[0]] + data[triple[0]][:compound] = triple[2] + when /#{OT.feature}/i + feature[triple[0]] = triple[2] + else + end + end + data.each do |id,entry| + entry[:values].each do |value_id| + value = feature_values[value_id].split(/\^\^/).first # remove XSD.type + @dataset.add entry[:compound],feature[value_id],value + end + end + load_features + @dataset.metadata = metadata + @dataset + end + + def load_features + @dataset.features.keys.each do |feature| + @dataset.features[feature] = Parser::Owl::Generic.new(feature).metadata + end + end + end + + end + + class Spreadsheet + + def initialize(dataset) + @dataset = dataset + @format_errors = "" + @smiles_errors = [] + @activity_errors = [] + @duplicates = {} + @nr_compounds = 0 + @data = [] + @activities = [] + @type = "classification" + end + + def load_excel(book) + book.default_sheet = 0 + 1.upto(book.last_row) do |row| + if row == 1 + @feature = File.join(@dataset.uri,"feature",book.cell(row,2)) + else + add( book.cell(row,1), book.cell(row,2), row ) # smiles, activity + end + end + parse + end + + def load_csv(csv) + row = 0 + csv.each_line do |line| + row += 1 + raise "Invalid CSV format at line #{row}: #{line.chomp}" unless line.chomp.match(/^.+[,;].*$/) # check CSV format + items = line.chomp.gsub(/["']/,'').split(/\s*[,;]\s*/) # remove quotes + if row == 1 + @feature = File.join(@dataset.uri,"feature",items[1]) + else + add(items[0], items[1], row) + end + end + parse + end + + def parse + + # create dataset + @data.each do |items| + case @type + when "classification" + case items[1].to_s + when TRUE_REGEXP + @dataset.add(items[0], @feature, true ) + when FALSE_REGEXP + @dataset.add(items[0], @feature, false) + end + when "regression" + if items[1].to_f == 0 + @activity_errors << "Row #{items[2]}: Zero values not allowed for regression datasets - entry ignored." + else + @dataset.add items[0], @feature, items[1].to_f + end + end + end + + warnings = '' + warnings += "

Incorrect Smiles structures (ignored):

" + @smiles_errors.join("
") unless @smiles_errors.empty? + warnings += "

Irregular activities (ignored):

" + @activity_errors.join("
") unless @activity_errors.empty? + duplicate_warnings = '' + @duplicates.each {|inchi,lines| duplicate_warnings << "

#{lines.join('
')}

" if lines.size > 1 } + warnings += "

Duplicated structures (all structures/activities used for model building, please make sure, that the results were obtained from independent experiments):

" + duplicate_warnings unless duplicate_warnings.empty? + + @dataset.metadata[OT.Warnings] = warnings + + @dataset + + end + + def add(smiles, act, row) + compound = Compound.from_smiles(smiles) + if compound.nil? or compound.inchi.nil? or compound.inchi == "" + @smiles_errors << "Row #{row}: " + [smiles,act].join(", ") + return false + end + unless numeric?(act) or classification?(act) + @activity_errors << "Row #{row}: " + [smiles,act].join(", ") + return false + end + @duplicates[compound.inchi] = [] unless @duplicates[compound.inchi] + @duplicates[compound.inchi] << "Row #{row}: " + [smiles, act].join(", ") + @type = "regression" unless classification?(act) + # TODO: set OT.NumericalFeature, ... + @nr_compounds += 1 + @data << [ compound.uri, act , row ] + end + + def numeric?(object) + true if Float(object) rescue false + end + + def classification?(object) + !object.to_s.strip.match(TRUE_REGEXP).nil? or !object.to_s.strip.match(FALSE_REGEXP).nil? + end + + end + end +end diff --git a/lib/serializer.rb b/lib/serializer.rb new file mode 100644 index 0000000..3def252 --- /dev/null +++ b/lib/serializer.rb @@ -0,0 +1,297 @@ +require 'spreadsheet' +require 'yajl' + +module OpenTox + + module Serializer + + # modelled according to to http://n2.talis.com/wiki/RDF_JSON_Specification + class Owl + + attr_accessor :object + + def initialize + + @object = { + # this should come from opntox.owl + OT.Compound => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.Feature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.NominalFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.NumericFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.StringFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.Dataset => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.DataEntry => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.FeatureValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.Algorithm => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + OT.Parameter => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } , + + OT.compound => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.feature => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.dataEntry => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.acceptValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.values => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + #XSD.anyUri => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.algorithm => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + OT.parameters => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , + + DC.title => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , + DC.identifier => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , + DC.contributor => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , + DC.creator => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , + OT.isA => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , + + OT.hasSource => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , + OT.value => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , + OT.paramScope => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , + OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , + + #Untyped Individual: http://localhost/algorithm + } + + @data_entries = {} + @values_id = 0 + @parameter_id = 0 + + @classes = Set.new + @object_properties = Set.new + @annotation_properties = Set.new + @datatype_properties = Set.new + + @objects = Set.new + end + + def add_compound(uri) + #@classes << OT.Compound unless @classes.include? OT.Compound + @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Compound }] } + end + + def add_feature(uri,metadata) + #@classes << OT.Feature unless @classes.include? OT.Feature + #@classes << OT.NominalFeature unless @classes.include? OT.NominalFeature + #@classes << OT.NumericFeature unless @classes.include? OT.NumericFeature + #@classes << OT.StringFeature unless @classes.include? OT.StringFeature + @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }] } + add_metadata uri, metadata + end + + def add_dataset(dataset) + + @dataset = dataset.uri + + @object[dataset.uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] } + + add_metadata dataset.uri, dataset.metadata + + dataset.compounds.each { |compound| add_compound compound } + + dataset.features.each { |feature,metadata| add_feature feature,metadata } + + dataset.data_entries.each do |compound,entry| + entry.each do |feature,values| + values.each { |value| add_data_entry compound,feature,value } + end + end + + end + + def add_algorithm(uri,metadata,parameters) + @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] } + add_metadata uri, metadata + add_parameters uri, parameters + #metadata.each { |u,v| @object[uri][u] = [{"type" => type(v), "value" => v }] } + end + + def add_model(uri,metadata) + end + + def add_metadata(uri,metadata) + #@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT[type] }] } + metadata.each do |u,v| + @object[uri][u] = [{"type" => type(v), "value" => v }] + end + end + + def add_parameters(uri,parameters) + #@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT[type] }] } + @object[uri][OT.parameters] = [] unless @object[uri][OT.parameters] + parameters.each do |p| + parameter = "_:parameter#{@parameter_id}" + @parameter_id += 1 + @object[uri][OT.parameters] << {"type" => "bnode", "value" => parameter} + @object[parameter] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Parameter }] } + add_metadata parameter, p + end + end + + def add_data_entry(compound,feature,value) + add_compound(compound) unless @object[compound] + add_feature(feature,{}) unless @object[feature] + unless data_entry = @data_entries[compound] + data_entry = "_:dataentry#{@data_entries.size}" + @data_entries[compound] = data_entry + @object[@dataset][OT.dataEntry] = [] unless @object[@dataset][OT.dataEntry] + @object[@dataset][OT.dataEntry] << {"type" => "bnode", "value" => data_entry} + @object[data_entry] = { + RDF["type"] => [{ "type" => "uri", "value" => OT.DataEntry }], + OT.compound => [{ "type" => "uri", "value" => compound }], + OT.values => [], + } + end + values = "_:values#{@values_id}" + @values_id += 1 + @object[data_entry][OT.values] << {"type" => "bnode", "value" => values} + case type(value) + when "uri" + v = [{ "type" => "uri", "value" => value}] + when "literal" + v = [{ "type" => "literal", "value" => value, "datatype" => datatype(value) }] + else + raise "Illegal type #{type(value)} for #{value}." + end + @object[values] = { + RDF["type"] => [{ "type" => "uri", "value" => OT.FeatureValue }], + OT.feature => [{ "type" => "uri", "value" => feature }], + OT.value => v + } + @object[feature][RDF["type"]] << { "type" => "uri", "value" => featuretype(value) } + end + + # Serializers + + def ntriples + + #rdf_types + @triples = Set.new + @object.each do |s,entry| + s = url(s) if type(s) == "uri" + entry.each do |p,objects| + p = url(p) + objects.each do |o| + case o["type"] + when "uri" + o = url(o["value"]) + when "literal" + o = literal(o["value"],datatype(o["value"])) + when "bnode" + o = o["value"] + end + @triples << [s,p,o] + end + end + end + @triples.sort.collect{ |s| s.join(' ').concat(" .") }.join("\n")+"\n" + end + + def rdfxml + Tempfile.open("owl-serializer"){|f| f.write(ntriples); @path = f.path} + `rapper -i ntriples -o rdfxml #{@path}` + end + + def json + #rdf_types + Yajl::Encoder.encode(@object) + end + + # Helpers for type detection + private + + def datatype(value) + if value.is_a? TrueClass or value.is_a? FalseClass + XSD.boolean + elsif value.is_a? Float + XSD.float + else + XSD.string + end + end + + def featuretype(value) + if value.is_a? TrueClass or value.is_a? FalseClass + datatype = OT.NominalFeature + elsif value.is_a? Float + datatype = OT.NumericFeature + else + datatype = OT.StringFeature + end + end + + def type(value) + begin + uri = URI.parse(value) + if uri.class == URI::HTTP or uri.class == URI::HTTPS + "uri" + elsif value.match(/^_/) + "bnode" + else + "literal" + end + rescue + "literal" + end + end + + def literal(value,type) + # concat and << are faster string concatination operators than + + '"'.concat(value.to_s).concat('"^^<').concat(type).concat('>') + end + + def url(uri) + # concat and << are faster string concatination operators than + + '<'.concat(uri).concat('>') + end + + def rdf_types + @classes.each { |c| @object[c] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } } + @object_properties.each { |p| @object[p] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['ObjectProperty'] }] } } + @annotation_properties.each { |a| @object[a] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['AnnotationProperty'] }] } } + @datatype_properties.each { |d| @object[d] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['DatatypeProperty'] }] } } + end + + end + + class Spreadsheets # to avoid nameclash with Spreadsheet gem + + def initialize(dataset) + @rows = [] + @rows << ["SMILES"] + features = dataset.features.keys + @rows.first << features + @rows.first.flatten! + dataset.data_entries.each do |compound,entries| + smiles = Compound.new(compound).smiles + row = Array.new(@rows.first.size) + row[0] = smiles + entries.each do |feature, values| + i = features.index(feature)+1 + values.each do |value| + row[i] = value #TODO overwrites duplicated values + end + end + @rows << row + end + end + + def csv + @rows.collect{|r| r.join(", ")}.join("\n") + end + + def excel + Spreadsheet.client_encoding = 'UTF-8' + book = Spreadsheet::Workbook.new + sheet = book.create_worksheet(:name => '') + sheet.column(0).width = 100 + i = 0 + @rows.each do |row| + row.each do |c| + sheet.row(i).push c + end + i+=1 + end + book + end + + end + + + end +end diff --git a/lib/task.rb b/lib/task.rb index 1ab3893..50f0347 100644 --- a/lib/task.rb +++ b/lib/task.rb @@ -16,7 +16,7 @@ module OpenTox # create is private now, use OpenTox::Task.as_task def self.create( params ) - task_uri = RestClientWrapper.post(@@config[:services]["opentox-task"], params, nil, false).to_s + task_uri = RestClientWrapper.post(CONFIG[:services]["opentox-task"], params, nil, false).to_s Task.find(task_uri.chomp) end @@ -36,7 +36,7 @@ module OpenTox def reload( accept_header=nil ) unless accept_header - if (@@config[:yaml_hosts].include?(URI.parse(uri).host)) + if (CONFIG[:yaml_hosts].include?(URI.parse(uri).host)) accept_header = "application/x-yaml" else accept_header = 'application/rdf+xml' @@ -99,7 +99,7 @@ module OpenTox # waits for a task, unless time exceeds or state is no longer running def wait_for_completion(dur=0.3) - if (@uri.match(@@config[:services]["opentox-task"])) + if (@uri.match(CONFIG[:services]["opentox-task"])) due_to_time = (@due_to_time.is_a?(Time) ? @due_to_time : Time.parse(@due_to_time)) running_time = due_to_time - (@date.is_a?(Time) ? @date : Time.parse(@date)) else @@ -144,7 +144,7 @@ module OpenTox #return yield nil params = {:title=>title, :creator=>creator, :max_duration=>max_duration, :description=>description } - task = OpenTox::Task.create(params) + task = ::OpenTox::Task.create(params) task_pid = Spork.spork(:logger => LOGGER) do LOGGER.debug "Task #{task.uri} started #{Time.now}" $self_task = task diff --git a/lib/validation.rb b/lib/validation.rb index 89a2a0c..340332a 100644 --- a/lib/validation.rb +++ b/lib/validation.rb @@ -4,11 +4,11 @@ module OpenTox attr_accessor :uri def initialize(params) - @uri = OpenTox::RestClientWrapper.post(File.join(@@config[:services]["opentox-validation"],"/crossvalidation"),params,nil,false) + @uri = OpenTox::RestClientWrapper.post(File.join(CONFIG[:services]["opentox-validation"],"/crossvalidation"),params,nil,false) end def self.crossvalidation(params) - params[:uri] = File.join(@@config[:services]['opentox-validation'], "crossvalidation") + params[:uri] = File.join(CONFIG[:services]['opentox-validation'], "crossvalidation") params[:num_folds] = 10 unless params[:num_folds] params[:random_seed] = 2 unless params[:random_seed] params[:stratified] = false unless params[:stratified] -- cgit v1.2.3