summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Rakefile2
-rw-r--r--lib/algorithm.rb41
-rw-r--r--lib/compound.rb184
-rw-r--r--lib/dataset.rb273
-rw-r--r--lib/environment.rb20
-rw-r--r--lib/feature.rb7
-rw-r--r--lib/features.rb19
-rw-r--r--lib/model.rb16
-rw-r--r--lib/opentox-ruby-api-wrapper.rb4
-rw-r--r--lib/opentox.rb79
-rw-r--r--lib/owl-serializer.rb65
-rw-r--r--lib/owl.rb593
-rw-r--r--lib/parser.rb191
-rw-r--r--lib/serializer.rb297
-rw-r--r--lib/task.rb8
-rw-r--r--lib/validation.rb4
16 files changed, 981 insertions, 822 deletions
diff --git a/Rakefile b/Rakefile
index 3846bd1..8dd1088 100644
--- a/Rakefile
+++ b/Rakefile
@@ -27,6 +27,8 @@ begin
"google-spreadsheet-ruby",
"tmail",
"rinruby",
+ "rdf",
+ "rdf-raptor",
"rjb"
].each { |dep| gem.add_dependency dep }
[ "dm-core",
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 4d9156a..e1d369a 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -1,27 +1,22 @@
-
module OpenTox
+
module Algorithm
-
-
- class Generic
-
- attr_accessor :uri, :title, :date
-
- def self.find(uri)
- owl = OpenTox::Owl.from_uri(uri, "Algorithm")
- return self.new(owl)
- end
-
+
+ include OtObject
+
+ class Generic
+ include Algorithm
+ #include OtObject
protected
- def initialize(owl)
- @title = owl.get("title")
- @date = owl.get("date")
- @uri = owl.uri
- end
+# def initialize(owl)
+# @title = owl.get("title")
+# @date = owl.get("date")
+# @uri = owl.uri
+# end
end
- class Fminer
+ class Fminer < Generic
def self.create_feature_dataset(params)
LOGGER.debug File.basename(__FILE__) + ": creating feature dataset"
@@ -30,7 +25,7 @@ module OpenTox
end
def self.uri
- File.join(@@config[:services]["opentox-algorithm"], "fminer")
+ File.join(CONFIG[:services]["opentox-algorithm"], "fminer")
end
end
@@ -39,13 +34,13 @@ module OpenTox
def self.create_model(params)
LOGGER.debug params
LOGGER.debug File.basename(__FILE__) + ": creating model"
- LOGGER.debug File.join(@@config[:services]["opentox-algorithm"], "lazar")
- resource = RestClient::Resource.new(File.join(@@config[:services]["opentox-algorithm"], "lazar"), :content_type => "application/x-yaml")
- @uri = resource.post(:dataset_uri => params[:dataset_uri], :prediction_feature => params[:prediction_feature], :feature_generation_uri => File.join(@@config[:services]["opentox-algorithm"], "fminer")).body.chomp
+ LOGGER.debug File.join(CONFIG[:services]["opentox-algorithm"], "lazar")
+ resource = RestClient::Resource.new(File.join(CONFIG[:services]["opentox-algorithm"], "lazar"), :content_type => "application/x-yaml")
+ @uri = resource.post(:dataset_uri => params[:dataset_uri], :prediction_feature => params[:prediction_feature], :feature_generation_uri => File.join(CONFIG[:services]["opentox-algorithm"], "fminer")).body.chomp
end
def self.uri
- File.join(@@config[:services]["opentox-algorithm"], "lazar")
+ File.join(CONFIG[:services]["opentox-algorithm"], "lazar")
end
end
diff --git a/lib/compound.rb b/lib/compound.rb
index 49c166f..699e4c1 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -3,82 +3,120 @@
module OpenTox
- class Compound #< OpenTox
-
- attr_reader :inchi, :uri
-
- # Initialize with <tt>:uri => uri</tt>, <tt>:smiles => smiles</tt> or <tt>:name => name</tt> (name can be also an InChI/InChiKey, CAS number, etc)
- def initialize(params)
- if params[:smiles]
- @inchi = smiles2inchi(params[:smiles])
- @uri = File.join(@@config[:services]["opentox-compound"],URI.escape(@inchi))
- elsif params[:inchi]
- @inchi = params[:inchi]
- @uri = File.join(@@config[:services]["opentox-compound"],URI.escape(@inchi))
- elsif params[:sdf]
- @inchi = sdf2inchi(params[:sdf])
- @uri = File.join(@@config[:services]["opentox-compound"],URI.escape(@inchi))
- elsif params[:name]
- # paranoid URI encoding to keep SMILES charges and brackets
- @inchi = RestClient.get("#{@@cactus_uri}#{URI.encode(params[:name], Regexp.new("[^#{URI::PATTERN::UNRESERVED}]"))}/stdinchi").body.chomp
- # this was too hard for me to debug and leads to additional errors (ch)
- #@inchi = RestClientWrapper.get("#{@@cactus_uri}#{URI.encode(params[:name], Regexp.new("[^#{URI::PATTERN::UNRESERVED}]"))}/stdinchi").chomp
- @uri = File.join(@@config[:services]["opentox-compound"],URI.escape(@inchi))
- elsif params[:uri]
- @uri = params[:uri]
- case params[:uri]
- when /ambit/ # Ambit does not deliver InChIs reliably
- smiles = RestClientWrapper.get @uri, :accept => 'chemical/x-daylight-smiles'
- @inchi = obconversion(smiles,'smi','inchi')
- when /InChI/ # shortcut for IST services
- @inchi = params[:uri].sub(/^.*InChI/, 'InChI')
- else
- @inchi = RestClientWrapper.get @uri, :accept => 'chemical/x-inchi'
- end
- end
- end
+ # Ruby wrapper for OpenTox Compound Webservices (http://opentox.org/dev/apis/api-1.2/structure).
+ #
+ # Examples:
+ # require "opentox-ruby-api-wrapper"
+ #
+ # # Creating compounds
+ #
+ # # from smiles string
+ # compound = OpenTox::Compound.from_smiles("c1ccccc1")
+ # # from name
+ # compound = OpenTox::Compound.from_name("Benzene")
+ # # from uri
+ # compound = OpenTox::Compound.new("http://webservices.in-silico.ch/compound/InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"")
+ #
+ # # Getting compound representations
+ #
+ # # get InChI
+ # inchi = compound.inchi
+ # # get all compound names
+ # names = compound.names
+ # # get png image
+ # image = compound.png
+ # # get uri
+ # uri = compound.uri
+ #
+ # # SMARTS matching
+ #
+ # # match a smarts string
+ # compound.match?("cN") # returns false
+ # # match an array of smarts strings
+ # compound.match(['cc','cN']) # returns ['cc']
+ class Compound
+
+ attr_accessor :inchi, :uri
+
+ # Create compound with optional uri
+ def initialize(uri=nil)
+ @uri = uri
+ case @uri
+ when /InChI/ # shortcut for IST services
+ @inchi = @uri.sub(/^.*InChI/, 'InChI')
+ else
+ @inchi = RestClientWrapper.get(@uri, :accept => 'chemical/x-inchi').to_s.chomp if @uri
+ end
+ end
- # Get the (canonical) smiles
+ # Create a compound from smiles string
+ def self.from_smiles(smiles)
+ c = Compound.new
+ c.inchi = Compound.smiles2inchi(smiles)
+ c.uri = File.join(CONFIG[:services]["opentox-compound"],URI.escape(c.inchi))
+ c
+ end
+
+ # Create a compound from inchi string
+ def self.from_inchi(inchi)
+ c = Compound.new
+ c.inchi = inchi
+ c.uri = File.join(CONFIG[:services]["opentox-compound"],URI.escape(c.inchi))
+ c
+ end
+
+ # Create a compound from sdf string
+ def self.from_sdf(sdf)
+ c = Compound.new
+ c.inchi = Compound.sdf2inchi(sdf)
+ c.uri = File.join(CONFIG[:services]["opentox-compound"],URI.escape(c.inchi))
+ c
+ end
+
+ # Create a compound from name (name can be also an InChI/InChiKey, CAS number, etc)
+ def self.from_name(name)
+ c = Compound.new
+ # paranoid URI encoding to keep SMILES charges and brackets
+ c.inchi = RestClientWrapper.get("#{@@cactus_uri}#{URI.encode(name, Regexp.new("[^#{URI::PATTERN::UNRESERVED}]"))}/stdinchi").to_s.chomp
+ c.uri = File.join(CONFIG[:services]["opentox-compound"],URI.escape(c.inchi))
+ c
+ end
+
+ # Get (canonical) smiles
def smiles
- obconversion(@inchi,'inchi','can')
+ Compound.obconversion(@inchi,'inchi','can')
end
+ # Get sdf
def sdf
- obconversion(@inchi,'inchi','sdf')
+ Compound.obconversion(@inchi,'inchi','sdf')
end
+ # Get gif image
def gif
RestClientWrapper.get("#{@@cactus_uri}#{@inchi}/image")
end
+ # Get png image
def png
RestClientWrapper.get(File.join @uri, "image")
end
+ # Get URI of compound image
+ def image_uri
+ File.join @uri, "image"
+ end
+
+ # Get all known compound names
def names
begin
- RestClientWrapper.get("#{@@cactus_uri}#{@inchi}/names")
+ RestClientWrapper.get("#{@@cactus_uri}#{@inchi}/names").split("\n")
rescue
"not available"
end
end
- def display_smarts_uri(activating, deactivating, highlight = nil)
- LOGGER.debug activating.to_yaml unless activating.nil?
- activating_smarts = URI.encode "\"#{activating.join("\"/\"")}\""
- deactivating_smarts = URI.encode "\"#{deactivating.join("\"/\"")}\""
- if highlight.nil?
- File.join @@config[:services]["opentox-compound"], "smiles", URI.encode(smiles), "smarts/activating", URI.encode(activating_smarts),"deactivating", URI.encode(deactivating_smarts)
- else
- File.join @@config[:services]["opentox-compound"], "smiles", URI.encode(smiles), "smarts/activating", URI.encode(activating_smarts),"deactivating", URI.encode(deactivating_smarts), "highlight", URI.encode(highlight)
- end
- end
-
- def image_uri
- File.join @uri, "image"
- end
-
- # Matchs a smarts string
+ # Match a smarts string
def match?(smarts)
obconversion = OpenBabel::OBConversion.new
obmol = OpenBabel::OBMol.new
@@ -89,30 +127,42 @@ module OpenTox
smarts_pattern.match(obmol)
end
- # Match an array of smarts features, returns matching features
+ # Match an array of smarts strings, returns array with matching smarts
def match(smarts_array)
smarts_array.collect{|s| s if match?(s)}.compact
end
- # AM
- # Match an array of smarts features, returns (0)1 for (non)matching features at each pos
- def match_all(smarts_array)
- smarts_array.collect{|s| match?(s) ? 1 : 0 }
- end
+ # Get URI of compound image with highlighted fragments
+ def matching_smarts_image_uri(activating, deactivating, highlight = nil)
+ activating_smarts = URI.encode "\"#{activating.join("\"/\"")}\""
+ deactivating_smarts = URI.encode "\"#{deactivating.join("\"/\"")}\""
+ if highlight.nil?
+ File.join CONFIG[:services]["opentox-compound"], "smiles", URI.encode(smiles), "smarts/activating", URI.encode(activating_smarts),"deactivating", URI.encode(deactivating_smarts)
+ else
+ File.join CONFIG[:services]["opentox-compound"], "smiles", URI.encode(smiles), "smarts/activating", URI.encode(activating_smarts),"deactivating", URI.encode(deactivating_smarts), "highlight", URI.encode(highlight)
+ end
+ end
+
+
+ private
- def sdf2inchi(sdf)
- obconversion(sdf,'sdf','inchi')
+ # Convert sdf to inchi
+ def self.sdf2inchi(sdf)
+ Compound.obconversion(sdf,'sdf','inchi')
end
- def smiles2inchi(smiles)
- obconversion(smiles,'smi','inchi')
+ # Convert smiles to inchi
+ def self.smiles2inchi(smiles)
+ Compound.obconversion(smiles,'smi','inchi')
end
- def smiles2cansmi(smiles)
- obconversion(smiles,'smi','can')
+ # Convert smiles to canonical smiles
+ def self.smiles2cansmi(smiles)
+ Compound.obconversion(smiles,'smi','can')
end
- def obconversion(identifier,input_format,output_format)
+ # Convert identifier from OpenBabel input_format to OpenBabel output_format
+ def self.obconversion(identifier,input_format,output_format)
obconversion = OpenBabel::OBConversion.new
obmol = OpenBabel::OBMol.new
obconversion.set_in_and_out_formats input_format, output_format
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 2eb2206..7c8ce24 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -1,35 +1,253 @@
module OpenTox
+ # Ruby wrapper for OpenTox Dataset Webservices (http://opentox.org/dev/apis/api-1.2/dataset).
+ #
+ # Examples:
+ # require "opentox-ruby-api-wrapper"
+ #
+ # # Creating datasets
+ #
+ # # create an empty dataset
+ # dataset = OpenTox::Dataset.new
+ # # create an empty dataset with URI
+ # # this does not load data from the dataset service - use one of the load_* methods
+ # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1")
+ # # create new dataset and sav it to obtain a URI
+ # dataset = OpenTox::Dataset.create
+ # # create a new dataset from yaml representation
+ # dataset = OpenTox::Dataset.from_yaml
+ # # create a new dataset from CSV string
+ # csv_string = "SMILES, Toxicity\nc1ccccc1N, true"
+ # dataset = OpenTox::Dataset.from_csv(csv_string)
+ #
+ # # Loading data
+ # # Datasets created with OpenTox::Dataset.new(uri) are empty by default
+ # # Invoking one of the following functions will load data into the object
+ #
+ # # create an empty dataset with URI
+ # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1")
+ # # loads (and returns) only metadata
+ # dataset.load_metadata
+ # # loads (and returns) only compounds
+ # dataset.load_compounds
+ # # loads (and returns) only features
+ # dataset.load_features
+ # # load all data from URI
+ # dataset.load_all
+ #
+ # # Getting dataset representations
+ #
+ # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1")
+ # dataset.load_all
+ # # OWL-DL (RDF/XML)
+ # dataset.rdfxml
+ # # OWL-DL (Ntriples)
+ # dataset.ntriples
+ # # YAML
+ # dataset.yaml
+ # # CSV
+ # dataset.csv
+ #
+ # # Modifying datasets
+ #
+ # # insert a statement (compound_uri,feature_uri,value)
+ # dataset.add "http://webservices.in-silico.ch/compound/InChI=1S/C6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9", "http://webservices.in-silico.ch/dataset/1/feature/hamster_carcinogenicity", true
+ #
+ #
+ # # Saving datasets
+ # # save dataset at dataset service
+ # dataset.save
+ #
+ # # Deleting datasets
+ # # delete dataset (also at dataset service)
+ # dataset.delete
class Dataset
- attr_accessor :uri, :title, :creator, :data, :features, :compounds
+ include OtObject
- def initialize( owl=nil )
- @data = {}
- @features = []
+ attr_reader :features, :compounds, :data_entries, :metadata
+ attr_writer :metadata
+
+ # Create dataset with optional URI
+ def initialize(uri=nil)
+ super uri
+ @features = {}
@compounds = []
-
+ @data_entries = {}
+ end
+
+ # Create and save an empty dataset (assigns URI to dataset)
+ def self.create(uri=CONFIG[:services]["opentox-dataset"])
+ dataset = Dataset.new
+ dataset.uri = RestClientWrapper.post(uri,{}).to_s.chomp
+ dataset
+ end
+
+ # Get all datasets from a service
+# def self.all(uri=CONFIG[:services]["opentox-dataset"])
+# RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.each_line.collect{|u| Dataset.new(u)}
+# end
+
+ # Create a dataset from YAML string
+ def self.from_yaml(yaml)
+ dataset = Dataset.create
+ dataset.copy YAML.load(yaml)
+ dataset
+ end
+
+ # Create dataset from CSV string (format specification: http://toxcreate.org/help)
+ # - loads data_entries, compounds, features
+ # - sets metadata (warnings) for parser errors
+ # - you will have to set remaining metadata manually
+ def self.from_csv(csv)
+ dataset = Dataset.create
+ Parser::Spreadsheet.new(dataset).load_csv(csv)
+ dataset
+ end
+
+ # Create dataset from Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help))
+ # - loads data_entries, compounds, features
+ # - sets metadata (warnings) for parser errors
+ # - you will have to set remaining metadata manually
+ def self.from_spreadsheet(book)
+ dataset = Dataset.create
+ Parser::Spreadsheet.new(dataset).load_excel(book)
+ dataset
+ end
+
+ # Load and return metadata of a Dataset object
+ def load_metadata
+ #if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host))
+ #add_metadata YAML.load(RestClientWrapper.get(File.join(@uri,"metadata"), :accept => "application/x-yaml"))
+ #else
+ add_metadata Parser::Owl::Dataset.new(@uri).metadata
+ #end
+ self.uri = @uri if @uri # keep uri
+ @metadata
+ end
+
+ # Load all data (metadata, data_entries, compounds and features) from URI
+ def load_all
+ if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host))
+ copy YAML.load(RestClientWrapper.get(@uri, :accept => "application/x-yaml"))
+ else
+ parser = Parser::Owl::Dataset.new(@uri)
+ copy parser.load_uri
+ end
+ end
+
+ # Load and return all compound URIs
+ def load_compounds
+ RestClientWrapper.get(File.join(uri,"compounds"),:accept=> "text/uri-list").to_s.each_line do |compound_uri|
+ @compounds << compound_uri.chomp
+ end
+ @compounds.uniq!
+ end
+
+ # Load all feature URIs
+ def load_features
+ RestClientWrapper.get(File.join(uri,"features"),:accept=> "text/uri-list").to_s.each_line do |feature_uri|
+ @features[feature_uri.chomp] = Feature.new(feature_uri.chomp).load_metadata
+ end
+ @features
+ end
+
+ # Get YAML representation
+ def yaml
+ self.to_yaml
+ end
+
+ # Get Excel representation, returns a Spreadsheet::Workbook which can be written with the 'spreadsheet' gem (data_entries only, metadata will )
+ def excel
+ Serializer::Spreadsheets.new(self).excel
+ end
+
+ # Get CSV string representation (data_entries only, metadata will be discarded)
+ def csv
+ Serializer::Spreadsheets.new(self).csv
+ end
+
+ # Get OWL-DL in ntriples format
+ def ntriples
+ s = Serializer::Owl.new
+ s.add_dataset(self)
+ s.ntriples
+ end
+
+ # Get OWL-DL in RDF/XML format
+ def rdfxml
+ s = Serializer::Owl.new
+ s.add_dataset(self)
+ s.rdfxml
+ end
+
+ # Insert a statement (compound_uri,feature_uri,value)
+ def add (compound,feature,value)
+ @compounds << compound unless @compounds.include? compound
+ @features[feature] = {} unless @features[feature]
+ @data_entries[compound] = {} unless @data_entries[compound]
+ @data_entries[compound][feature] = [] unless @data_entries[compound][feature]
+ @data_entries[compound][feature] << value
+ end
+
+ # Add metadata (hash with predicate_uri => value)
+ def add_metadata(metadata)
+ metadata.each { |k,v| @metadata[k] = v }
+ end
+
+ # Copy a dataset (rewrites URI)
+ def copy(dataset)
+ @metadata = dataset.metadata
+ @data_entries = dataset.data_entries
+ @compounds = dataset.compounds
+ @features = dataset.features
+ if @uri
+ self.uri = @uri
+ else
+ @uri = dataset.metadata[XSD.anyUri]
+ end
+ end
+
+ # save dataset (overwrites existing dataset)
+ def save
+ # TODO: rewrite feature URI's ??
+ # create dataset if uri empty
+ @compounds.uniq!
+ RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml)
+ end
+
+ # Delete dataset at the dataset service
+ def delete
+ RestClientWrapper.delete @uri
+ end
+ end
+end
+
+ #########################################################
+ # kept for backward compatibility, may have to be fixed #
+ #########################################################
+
+=begin
+ def from_owl(owl)
# creates dataset object from Opentox::Owl object
# use Dataset.find( <uri> ) to load dataset from rdf-supporting datasetservice
# note: does not load all feature values, as this is time consuming
- if owl
- raise "invalid param" unless owl.is_a?(OpenTox::Owl)
- @title = owl.get("title")
- @creator = owl.get("creator")
- @uri = owl.uri
- # when loading a dataset from owl, only compound- and feature-uris are loaded
- owl.load_dataset(@compounds, @features)
- # all features are marked as dirty
- # as soon as a feature-value is requested all values for this feature are loaded from the rdf
- @dirty_features = @features.dclone
- @owl = owl
- end
+ raise "invalid param" unless owl.is_a?(OpenTox::Owl)
+ @metadata[DC.title] = owl.get("title")
+ @metadata[DC.creator] = owl.get("creator")
+ @metadata[XSD.anyUri] = owl.uri
+ # when loading a dataset from owl, only compound- and feature-uris are loaded
+ owl.load_dataset(@compounds, @features)
+ # all features are marked as dirty
+ # as soon as a feature-value is requested all values for this feature are loaded from the rdf
+ @dirty_features = @features.dclone
+ @owl = owl
end
def self.find(uri, accept_header=nil)
unless accept_header
- if (@@config[:yaml_hosts].include?(URI.parse(uri).host))
+ if (CONFIG[:yaml_hosts].include?(URI.parse(uri).host))
accept_header = 'application/x-yaml'
else
accept_header = "application/rdf+xml"
@@ -38,8 +256,10 @@ module OpenTox
case accept_header
when "application/x-yaml"
+ LOGGER.debug "DATASET: "+ uri
+ LOGGER.debug RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s
d = YAML.load RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s
- d.uri = uri unless d.uri
+ #d.uri = @metadata[XSD.anyUri] unless d.uri
when "application/rdf+xml"
owl = OpenTox::Owl.from_uri(uri.to_s.strip, "Dataset")
d = Dataset.new(owl)
@@ -48,7 +268,7 @@ module OpenTox
end
d
end
-
+
# converts a dataset represented in owl to yaml
# (uses a temporary dataset)
# note: to_yaml is overwritten, loads complete owl dataset values
@@ -108,7 +328,7 @@ module OpenTox
raise "predicted class value is an array\n"+
"value "+v.to_s+"\n"+
"value-class "+v.class.to_s+"\n"+
- "dataset "+@uri.to_s+"\n"+
+ "dataset "+self.uri.to_s+"\n"+
"compound "+compound.to_s+"\n"+
"feature "+feature.to_s+"\n"
else
@@ -130,7 +350,7 @@ module OpenTox
raise "predicted regression value is an array\n"+
"value "+v.to_s+"\n"+
"value-class "+v.class.to_s+"\n"+
- "dataset "+@uri.to_s+"\n"+
+ "dataset "+self.uri.to_s+"\n"+
"compound "+compound.to_s+"\n"+
"feature "+feature.to_s+"\n"
else
@@ -181,7 +401,7 @@ module OpenTox
raise "value is not an array\n"+
"value "+v.to_s+"\n"+
"value-class "+v.class.to_s+"\n"+
- "dataset "+@uri.to_s+"\n"+
+ "dataset "+self.uri.to_s+"\n"+
"compound "+compound.to_s+"\n"+
"feature "+feature.to_s+"\n"
end
@@ -216,11 +436,6 @@ module OpenTox
super - ["@owl"]
end
- # saves (changes) as new dataset in dataset service
- # returns uri
- # uses to yaml method (which is overwritten)
- def save
- OpenTox::RestClientWrapper.post(@@config[:services]["opentox-dataset"],{:content_type => "application/x-yaml"},self.to_yaml).strip
- end
end
end
+=end
diff --git a/lib/environment.rb b/lib/environment.rb
index 0c62113..b16b62f 100644
--- a/lib/environment.rb
+++ b/lib/environment.rb
@@ -12,8 +12,8 @@ TMP_DIR = File.join(basedir, "tmp")
LOG_DIR = File.join(basedir, "log")
if File.exist?(config_file)
- @@config = YAML.load_file(config_file)
- raise "could not load config, config file: "+config_file.to_s unless @@config
+ CONFIG = YAML.load_file(config_file)
+ raise "could not load config, config file: "+config_file.to_s unless CONFIG
else
FileUtils.mkdir_p TMP_DIR
FileUtils.mkdir_p LOG_DIR
@@ -24,20 +24,20 @@ else
end
# database
-if @@config[:database]
+if CONFIG[:database]
['dm-core', 'dm-serializer', 'dm-timestamps', 'dm-types', 'dm-migrations', 'dm-validations' ].each{|lib| require lib }
- case @@config[:database][:adapter]
+ case CONFIG[:database][:adapter]
when /sqlite/i
db_dir = File.join(basedir, "db")
FileUtils.mkdir_p db_dir
DataMapper::setup(:default, "sqlite3://#{db_dir}/opentox.sqlite3")
else
DataMapper.setup(:default, {
- :adapter => @@config[:database][:adapter],
- :database => @@config[:database][:database],
- :username => @@config[:database][:username],
- :password => @@config[:database][:password],
- :host => @@config[:database][:host]})
+ :adapter => CONFIG[:database][:adapter],
+ :database => CONFIG[:database][:database],
+ :username => CONFIG[:database][:username],
+ :password => CONFIG[:database][:password],
+ :host => CONFIG[:database][:host]})
end
end
@@ -48,7 +48,7 @@ logfile = "#{LOG_DIR}/#{ENV["RACK_ENV"]}.log"
#LOGGER = MyLogger.new(logfile,'daily') # daily rotation
LOGGER = MyLogger.new(logfile) # no rotation
LOGGER.formatter = Logger::Formatter.new #this is neccessary to restore the formating in case active-record is loaded
-if @@config[:logger] and @@config[:logger] == "debug"
+if CONFIG[:logger] and CONFIG[:logger] == "debug"
LOGGER.level = Logger::DEBUG
else
LOGGER.level = Logger::WARN
diff --git a/lib/feature.rb b/lib/feature.rb
new file mode 100644
index 0000000..9616135
--- /dev/null
+++ b/lib/feature.rb
@@ -0,0 +1,7 @@
+module OpenTox
+
+ class Feature
+ include OtObject
+ end
+
+end
diff --git a/lib/features.rb b/lib/features.rb
deleted file mode 100644
index 0fa1cf0..0000000
--- a/lib/features.rb
+++ /dev/null
@@ -1,19 +0,0 @@
-# CH: should go into validation service
-# - not a complete OT object
-# - only used twice
-# - what about ./validation/validation/validation_service.rb:241: value = OpenTox::Feature.new(:uri => a.uri).value(prediction_feature).to_s
-module OpenTox
-
- module Feature
-
- def self.domain( feature_uri )
- #TODO
- if feature_uri =~ /ambit/
- return nil
- else
- return ["true", "false"]
- end
- end
-
- end
-end
diff --git a/lib/model.rb b/lib/model.rb
index 3ecd61c..d0d6703 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -84,8 +84,8 @@ module OpenTox
def initialize
@source = "http://github.com/helma/opentox-model"
- @algorithm = File.join(@@config[:services]["opentox-algorithm"],"lazar")
- #@independent_variables = File.join(@@config[:services]["opentox-algorithm"],"fminer#BBRC_representative")
+ @algorithm = File.join(CONFIG[:services]["opentox-algorithm"],"lazar")
+ #@independent_variables = File.join(CONFIG[:services]["opentox-algorithm"],"fminer#BBRC_representative")
@features = []
@effects = {}
@activities = {}
@@ -95,12 +95,12 @@ module OpenTox
def save
@features.uniq!
- resource = RestClient::Resource.new(@@config[:services]["opentox-model"])
+ resource = RestClient::Resource.new(CONFIG[:services]["opentox-model"])
resource.post(self.to_yaml, :content_type => "application/x-yaml").chomp.to_s
end
def self.find_all
- RestClientWrapper.get(@@config[:services]["opentox-model"]).chomp.split("\n")
+ RestClientWrapper.get(CONFIG[:services]["opentox-model"]).chomp.split("\n")
end
def self.predict(compound_uri,model_uri)
@@ -115,8 +115,8 @@ module OpenTox
def initialize
@source = "http://github.com/helma/opentox-model"
- @algorithm = File.join(@@config[:services]["opentox-algorithm"],"property_lazar")
- #@independent_variables = File.join(@@config[:services]["opentox-algorithm"],"fminer#BBRC_representative")
+ @algorithm = File.join(CONFIG[:services]["opentox-algorithm"],"property_lazar")
+ #@independent_variables = File.join(CONFIG[:services]["opentox-algorithm"],"fminer#BBRC_representative")
@features = []
#@effects = {}
@activities = {}
@@ -126,12 +126,12 @@ module OpenTox
def save
@features.uniq!
- resource = RestClient::Resource.new(@@config[:services]["opentox-model"])
+ resource = RestClient::Resource.new(CONFIG[:services]["opentox-model"])
resource.post(self.to_yaml, :content_type => "application/x-yaml").chomp.to_s
end
def self.find_all
- RestClientWrapper.get(@@config[:services]["opentox-model"]).chomp.split("\n")
+ RestClientWrapper.get(CONFIG[:services]["opentox-model"]).chomp.split("\n")
end
def self.predict(compound_uri,model_uri)
diff --git a/lib/opentox-ruby-api-wrapper.rb b/lib/opentox-ruby-api-wrapper.rb
index d9db4ac..2749899 100644
--- a/lib/opentox-ruby-api-wrapper.rb
+++ b/lib/opentox-ruby-api-wrapper.rb
@@ -1,4 +1,4 @@
-['rubygems', 'sinatra', 'sinatra/url_for', 'rest_client', 'yaml', 'cgi', 'spork', 'redland', 'rdf/redland', 'rdf/redland/util', 'environment'].each do |lib|
+['rubygems', 'sinatra', 'sinatra/url_for', 'rest_client', 'yaml', 'cgi', 'spork', 'environment'].each do |lib|
require lib
end
@@ -8,6 +8,6 @@ rescue LoadError
puts "Please install Openbabel with 'rake openbabel:install' in the compound component"
end
-['owl-serializer', 'compound','dataset','algorithm','model','task','validation','utils','features', 'ot-logger', 'overwrite', 'rest_client_wrapper'].each do |lib|
+['opentox', 'compound','dataset', 'parser','serializer', 'algorithm','model','task','validation','utils','feature', 'ot-logger', 'overwrite', 'rest_client_wrapper'].each do |lib|
require lib
end
diff --git a/lib/opentox.rb b/lib/opentox.rb
new file mode 100644
index 0000000..453ca66
--- /dev/null
+++ b/lib/opentox.rb
@@ -0,0 +1,79 @@
+module OpenTox
+
+ # Generic OpenTox class
+ module OtObject
+
+ attr_reader :uri
+ attr_accessor :metadata
+
+ # Initialize OpenTox object with optional uri
+ def initialize(uri=nil)
+ @metadata = {}
+ self.uri = uri if uri
+ end
+
+ # Set URI
+ def uri=(uri)
+ @uri = uri
+ @metadata[XSD.anyUri] = uri
+ end
+
+ # Get title
+ def title
+ load_metadata unless @metadata[DC.title]
+ @metadata[DC.title]
+ end
+
+ # Set title
+ def title=(title)
+ @metadata[DC.title] = title
+ end
+
+ # Get all objects from a service
+ def self.all(uri)
+ #def OtObject.all(uri)
+ RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.split(/\n/)
+ end
+
+ # Load metadata from URI
+ def load_metadata
+ #if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host))
+ # TODO: fix metadata retrie
+ #@metadata = YAML.load(RestClientWrapper.get(@uri, :accept => "application/x-yaml"))
+ #else
+ @metadata = Parser::Owl::Generic.new(@uri).metadata
+ #end
+ @metadata
+ #Parser::Owl::Generic.new(@uri).metadata
+ end
+
+ end
+
+ module Owl
+
+ class Namespace
+
+ def initialize(uri)
+ @uri = uri
+ end
+
+ def [](property)
+ @uri+property.to_s
+ end
+
+ def method_missing(property)
+ @uri+property.to_s
+ end
+
+ end
+ end
+
+end
+#
+# OWL Namespaces
+RDF = OpenTox::Owl::Namespace.new 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
+OWL = OpenTox::Owl::Namespace.new 'http://www.w3.org/2002/07/owl#'
+DC = OpenTox::Owl::Namespace.new 'http://purl.org/dc/elements/1.1/'
+OT = OpenTox::Owl::Namespace.new 'http://www.opentox.org/api/1.1#'
+XSD = OpenTox::Owl::Namespace.new 'http://www.w3.org/2001/XMLSchema#'
+
diff --git a/lib/owl-serializer.rb b/lib/owl-serializer.rb
deleted file mode 100644
index 8965bf2..0000000
--- a/lib/owl-serializer.rb
+++ /dev/null
@@ -1,65 +0,0 @@
-require 'rdf'
-require 'rdf/raptor'
-require 'rdf/ntriples'
-
-# RDF namespaces
-include RDF
-OT = RDF::Vocabulary.new 'http://www.opentox.org/api/1.1#'
-
-module OpenTox
-
- class OwlSerializer
-
- def initialize(klass,uri)
-
- @model = RDF::Graph.new(uri)
-
- @triples = []
- @triples << [ OT[klass], RDF.type, OWL.Class ]
- @triples << [ RDF::URI.new(uri), RDF.type, OT[klass] ]
-
- @classes = [ OT[klass] ]
- @object_properties = []
- @annotation_properties = []
- @objects = [ uri ]
-
- end
-
- def self.create(klass, uri)
- OpenTox::OwlSerializer.new(klass,uri)
- end
-
- def rdf
- @triples.each { |statement| @model << statement }
- RDF::Writer.for(:rdfxml).buffer do |writer|
- writer << @model
- end
- end
-
- def object_property(subject,predicate,object,object_class)
- s = [ RDF::URI.new(subject), predicate, RDF::URI.new(object) ] #
- @triples << s unless @triples.include? s
- unless @object_properties.include? predicate
- @triples << [ predicate, RDF.type, OWL.ObjectProperty ]
- @object_properties << predicate
- end
- unless @objects.include? object
- @triples << [ RDF::URI.new(object), RDF.type, object_class ]
- @objects << object
- end
- unless @classes.include? object_class
- @triples << [ object_class, RDF.type, OWL.Class ]
- @classes << object_class
- end
- end
-
- def annotation_property(subject, predicate, value, datatype)
- s = [ RDF::URI.new(subject), predicate, RDF::Literal.new(value, :datatype => datatype) ]
- @triples << s unless @triples.include? s
- unless @annotation_properties.include? predicate
- @triples << [ predicate, RDF.type, OWL.AnnotationProperty ]
- @annotation_properties << predicate
- end
- end
- end
-end
diff --git a/lib/owl.rb b/lib/owl.rb
deleted file mode 100644
index f4128ee..0000000
--- a/lib/owl.rb
+++ /dev/null
@@ -1,593 +0,0 @@
-require 'rdf'
-require 'rdf/ntriples'
-require 'rdf/raptor'
-include RDF
-# RDF namespaces
-#RDF = Redland::Namespace.new 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
-OWL = Redland::Namespace.new 'http://www.w3.org/2002/07/owl#'
-DC = Redland::Namespace.new 'http://purl.org/dc/elements/1.1/'
-OT = Redland::Namespace.new 'http://www.opentox.org/api/1.1#'
-#OT = Redland::Namespace.new 'http://ortona.informatik.uni-freiburg.de/opentox.owl#'
-XML = Redland::Namespace.new 'http://www.w3.org/2001/XMLSchema#'
-
-# overriding literal to give nice access to datatype
-# and to access the stored value as correct ruby type
-class Redland::Literal
-
- def self.create(value, type)
- raise "literal datatype may not be nil" unless type
- type = parse_datatype_uri(value) if OpenTox::Owl::PARSE_LITERAL_TYPE==type
-
- if type.is_a?(Redland::Uri)
- Redland::Literal.new(value.to_s,nil,type)
- else
- Redland::Literal.new(value.to_s,nil,Redland::Uri.new(type.to_s))
- end
- end
-
- # the literal node of the ruby swig api provdides the 'value' of a literal but not the 'datatype'
- # found solution in mailing list
- def datatype
- uri = Redland.librdf_node_get_literal_value_datatype_uri(self.node)
- return Redland.librdf_uri_to_string(uri) if uri
- end
-
- # gets value of literal, value class is se according to literal datatype
- def get_value
- Redland::Literal.parse_value( self.value, self.datatype )
- end
-
- private
- # parses value according to datatype uri
- def self.parse_value(string_value, datatype_uri)
-
- if (datatype_uri==nil || datatype_uri.size==0)
- LOGGER.warn("empty datatype for literal with value: '"+string_value+"'")
- return string_value
- end
- case datatype_uri
- when OpenTox::Owl::LITERAL_DATATYPE_STRING.to_s
- return string_value
- when OpenTox::Owl::LITERAL_DATATYPE_URI.to_s
- return string_value #PENDING uri as string?
- when OpenTox::Owl::LITERAL_DATATYPE_FLOAT.to_s
- return string_value.to_f
- when OpenTox::Owl::LITERAL_DATATYPE_DOUBLE.to_s
- return string_value.to_f
- when OpenTox::Owl::LITERAL_DATATYPE_BOOLEAN.to_s
- return string_value.upcase=="TRUE"
- when OpenTox::Owl::LITERAL_DATATYPE_DATE.to_s
- return Time.parse(string_value)
- when OpenTox::Owl::LITERAL_DATATYPE_DATETIME.to_s
- return Time.parse(string_value)
- when OpenTox::Owl::LITERAL_DATATYPE_INTEGER.to_s
- return string_value.to_i
- else
- raise "unknown literal datatype: '"+datatype_uri.to_s+"' (value is "+string_value+
- "), please specify new OpenTox::Owl::LITERAL_DATATYPE"
- end
- end
-
- # parse datatype uri accoring to value class
- def self.parse_datatype_uri(value)
- if value==nil
- raise "illegal datatype: value is nil"
- elsif value.is_a?(String)
- # PENDING: uri check too slow?
- if OpenTox::Utils.is_uri?(value)
- return OpenTox::Owl::LITERAL_DATATYPE_URI
- else
- return OpenTox::Owl::LITERAL_DATATYPE_STRING
- end
- elsif value.is_a?(Float)
- return OpenTox::Owl::LITERAL_DATATYPE_FLOAT
- elsif value.is_a?(TrueClass) or value.is_a?(FalseClass)
- return OpenTox::Owl::LITERAL_DATATYPE_BOOLEAN
- elsif value.is_a?(Integer)
- return OpenTox::Owl::LITERAL_DATATYPE_INTEGER
- elsif value.is_a?(DateTime)
- return OpenTox::Owl::LITERAL_DATATYPE_DATETIME
- elsif value.is_a?(Time)
- return OpenTox::Owl::LITERAL_DATATYPE_DATETIME
- else
- raise "illegal datatype: "+value.class.to_s+" "+value.to_s
- end
- end
-end
-
-module OpenTox
-
- class Owl
-
- # to get correct owl-dl, properties and objects have to be typed
- # i.e. the following triple is insufficient:
- # ModelXY,ot:algorithm,AlgorithmXY
- # further needed:
- # ot:algorithm,rdf:type,owl:ObjectProperty
- # AlgorithmXY,rdf:type,ot:Algorithm
- # ot:Algorithm,rdf:type,owl:Class
- #
- # therefore OpentoxOwl needs info about the opentox-ontology
- # the info is stored in OBJECT_PROPERTY_CLASS and LITERAL_TYPES
-
- # contains all owl:ObjectProperty as keys, and the respective classes as value
- # some object properties link to objects from different classes (e.g. "values can be "Tuple", or "FeatureValue")
- # in this case, use set_object_property() (instead of set()) and specify class manually
- OBJECT_PROPERTY_CLASS = {}
- [ "model" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Model"}
- [ "algorithm" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Algorithm"}
- [ "trainingDataset", "testTargetDataset", "predictionDataset",
- "testDataset", "dataset" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Dataset"}
- [ "feature", "dependentVariables", "independentVariables",
- "predictedVariables", "predictionFeature" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Feature"}
- [ "parameters" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Parameter"}
- [ "compound" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Compound"}
- [ "dataEntry" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "DataEntry"}
- [ "complexValue" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "FeatureValue"}
- [ "classificationStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ClassificationStatistics"}
- [ "classValueStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ClassValueStatistics"}
- [ "confusionMatrix" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ConfusionMatrix"}
- [ "confusionMatrixCell" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "ConfusionMatrixCell"}
- [ "regressionStatistics" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "RegressionStatistics"}
- [ "validation" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Validation"}
- [ "crossvalidationInfo" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "CrossvalidationInfo"}
- [ "crossvalidation" ].each{ |c| OBJECT_PROPERTY_CLASS[c] = "Crossvalidation"}
-
- # literals point to primitive values (not to other resources)
- # the literal datatype is encoded via uri:
- LITERAL_DATATYPE_STRING = XML["string"].uri
- LITERAL_DATATYPE_URI = XML["anyURI"].uri
- LITERAL_DATATYPE_FLOAT = XML["float"].uri
- LITERAL_DATATYPE_DOUBLE = XML["double"].uri
- LITERAL_DATATYPE_DATE = XML["date"].uri
- LITERAL_DATATYPE_BOOLEAN = XML["boolean"].uri
- LITERAL_DATATYPE_DATETIME = XML["dateTime"].uri
- LITERAL_DATATYPE_INTEGER = XML["integer"].uri
-
- # list all literals (to distinguish from objectProperties) as keys, datatype as values
- # (do not add dc-identifier, deprecated, object are identified via name=uri)
- LITERAL_TYPES = {}
- [ "title", "creator", "format", "description", "hasStatus", "paramScope", "paramValue",
- "classValue", "reportType", "confusionMatrixActual",
- "confusionMatrixPredicted" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_STRING }
- [ "date", "due_to_time" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_DATE }
- [ "percentageCompleted", "truePositiveRate", "fMeasure", "falseNegativeRate",
- "areaUnderRoc", "falsePositiveRate", "trueNegativeRate", "precision", "recall",
- "percentCorrect", "percentIncorrect", "weightedAreaUnderRoc", "numCorrect",
- "percentIncorrect", "percentUnpredicted", "realRuntime",
- "percentWithoutClass", "rootMeanSquaredError", "meanAbsoluteError", "rSquare",
- "targetVarianceActual", "targetVariancePredicted", "sumSquaredError",
- "sampleCorrelationCoefficient" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_DOUBLE }
- [ "numTrueNegatives", "numWithoutClass", "numFalseNegatives", "numTruePositives",
- "numFalsePositives", "numIncorrect", "numInstances", "numUnpredicted",
- "randomSeed", "numFolds", "confusionMatrixValue",
- "crossvalidationFold" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_INTEGER }
- [ "resultURI" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_URI }
- [ "stratified" ].each{ |l| LITERAL_TYPES[l] = LITERAL_DATATYPE_BOOLEAN }
- # some literals can have different types, parse from ruby type
- PARSE_LITERAL_TYPE = "PARSE_LITERAL_TYPE"
- [ "value" ].each{ |l| LITERAL_TYPES[l] = PARSE_LITERAL_TYPE }
-
- # constants for often used redland-resources
- OWL_TYPE_LITERAL = OWL["AnnotationProperty"]
- OWL_TYPE_CLASS = OWL["Class"]
- OWL_TYPE_OBJECT_PROPERTY = OWL["ObjectProperty"]
- RDF_TYPE = RDF['type']
-
- # store redland:resources (=nodes) to:
- # * separate namespaces (OT from RDF and DC)
- # * save time, as generating resources is timeconsuming in redland
- @@nodes = {}
- [ "type", "about"].each{ |l| @@nodes[l] = RDF[l] }
- [ "title", "creator", "date", "format" ].each{ |l| @@nodes[l] = DC[l] }
-
- def node(property)
- raise "can only create node for non-empty-string, but given "+property.class.to_s+" (value: "+
- property.to_s+")" unless property.is_a?(String) and property.size>0
- raise "dc[identifier] deprecated, use owl.uri" if property=="identifier"
- @@nodes[property] = OT[property] unless @@nodes.has_key?(property)
- return @@nodes[property]
- end
-
- # ot_class is the class of the object as string, e.g. "Model","Dataset", ...
- # root_node is the root-object node in the rdf
- # uri the uri of the object
- attr_accessor :ot_class, :root_node, :uri, :model, :triples
-
- private
- def initialize
- @triples = []
- @model = Redland::Model.new Redland::MemoryStore.new
- #@triples = ""
- end
-
- # build new owl object
- # ot_class is the class of this object, should be a string like "Model", "Task", ...
- # uri is name and identifier of this object
- public
- def self.create( ot_class, uri )
-
- owl = OpenTox::Owl.new
- owl.ot_class = ot_class
- owl.root_node = Redland::Resource.new(uri.to_s.strip)
- owl.set("type",owl.ot_class)
- owl.uri = uri
- owl
- end
-
- # loads owl from data
- def self.from_data(data, base_uri, ot_class)
-
- owl = OpenTox::Owl.new
- parser = Redland::Parser.new
-
- begin
- parser.parse_string_into_model(owl.model, data, base_uri)
-
- # now loading root_node and uri
- owl.model.find(nil, RDF_TYPE, owl.node(ot_class)) do |s,p,o|
- #LOGGER.debug "about statements "+s.to_s+" . "+p.to_s+" -> "+o.to_s
- is_root = true
- owl.model.find(nil, nil, s) do |ss,pp,oo|
- is_root = false
- break
- end
- if is_root
- # handle error if root is already set
- raise "cannot derieve root object from rdf, more than one object specified" if owl.uri
- raise "illegal root node type, no uri specified\n"+data.to_s if s.blank?
- #store root note and uri
- owl.uri = s.uri.to_s
- owl.root_node = s
- end
- end
-
- # handle error if no root node was found
- unless owl.root_node
- types = []
- owl.model.find(nil, RDF_TYPE, nil){ |s,p,o| types << o.to_s }
- raise "root node for class '"+owl.node(ot_class).to_s+"' not found (available type nodes: "+types.inspect+")"
- end
- raise "no uri in rdf: '"+owl.uri+"'" unless owl.uri and Utils.is_uri?(owl.uri)
- owl.ot_class = ot_class
- owl
- rescue => e
- RestClientWrapper.raise_uri_error(e.message, base_uri)
- end
- end
-
- def self.from_uri(uri, ot_class)
- return from_data(RestClientWrapper.get(uri,:accept => "application/rdf+xml").to_s, uri, ot_class)
- end
-
- def rdf
- #@model.to_string
- #stdin, stdout, stderr = Open3.popen3('rapper -I test.org -i ntriples -o rdfxml -')
- #stdin.puts @triples
- #stdout
- #File.open("/tmp/d","w+") {|f| f.puts @triples}
- #`rapper -i ntriples -o rdfxml /tmp/d`
- #@triples
- #output = RDF::Writer.for(:rdfxml).buffer do |writer|
- RDF::Writer.for(:rdfxml).buffer do |writer|
- @triples.each do |statement|
- begin
- writer << statement
- rescue => e
- LOGGER.error e
- LOGGER.info statement.inspect
- end
- end
- end
- #output
- end
-
- # returns the first object for subject:root_node and property
- # (sufficient for accessing simple, root-node properties)
- def get( property )
- raise "uri is no prop, use owl.uri instead" if property=="uri"
- return get_value( @model.object( @root_node, node(property.to_s)) )
- end
-
- # returns an array of objects (not only the first one) that fit for the property
- # accepts array of properties to access not-root-node vaules
- # i.e. validation_owl.get_nested( [ "confusionMatrix", "confusionMatrixCell", "confusionMatrixValue" ]
- # returns an array of all confusionMatrixValues
- def get_nested( property_array )
- n = [ @root_node ]
- property_array.each do |p|
- new_nodes = []
- n.each do |nn|
- @model.find( nn, node(p), nil ) do |sub,pred,obj|
- new_nodes << obj
- end
- end
- n = new_nodes
- end
- return n.collect{|nn| get_value( nn )}
- end
-
- private
- # returns node-value
- def get_value( node )
- return nil unless node
- if node.is_a?(Redland::Literal)
- return node.get_value
- elsif node.blank?
- return nil
- else
- return node.uri.to_s
- end
- end
-
- public
- # sets values of current_node (by default root_node)
- #
- # note: this does not delete existing triples
- # * there can be several triples for the same subject and predicate
- # ( e.g. after set("description","bla1") and set("description","bla2")
- # both descriptions are in the model,
- # but the get("description") will give you only one object (by chance)
- # * this does not matter in pratice (only dataset uses this -> load_dataset-methods)
- # * identical values appear only once in rdf
- def set(predicate, object, current_node=@root_node )
-
- pred = predicate.to_s
- raise "uri is no prop, cannot set uri" if pred=="uri"
- raise "dc[identifier] deprecated, use owl.uri" if pred=="identifier"
- if (object.is_a?(Redland::Node) and object.blank?) or nil==object or object.to_s.size==0
- # set only not-nil values
- LOGGER.warn "skipping (not setting) empty value in rdf for property: '"+pred+"'"
- return
- end
-
- if pred=="type"
- # predicate is type, set class of current node
- set_type(object, current_node)
- elsif LITERAL_TYPES.has_key?(pred)
- # predicate is literal
- set_literal(pred,object,LITERAL_TYPES[pred],current_node)
- elsif OBJECT_PROPERTY_CLASS.has_key?(pred)
- # predicte is objectProperty, object is another resource
- set_object_property(pred,object,OBJECT_PROPERTY_CLASS[pred],current_node)
- else
- raise "unkonwn rdf-property, please add: '"+pred+"' to OpenTox::OWL.OBJECT_PROPERTY_CLASS or OpenTox::OWL.LITERAL_TYPES"
- end
- end
-
- # example-triples for setting rdf-type to model:
- # model_xy,rdf:type,ot:Model
- # ot:Model,rdf:type,owl:Class
- def set_type(ot_class, current_node=@root_node)
- #@triples += "#{ot_class.to_s} #{RDF_TYPE.to_s} #{current_node.to_s}"
- #@triples << "#{current_node} #{RDF_TYPE} #{node(ot_class).to_s}.\n".gsub(/\[/,'<').gsub(/\]/,'>')
- #@triples << "#{node(ot_class).to_s} #{RDF_TYPE} #{OWL_TYPE_CLASS}.\n".gsub(/\[/,'<').gsub(/\]/,'>')
- add current_node, RDF_TYPE, node(ot_class)
- add node(ot_class), RDF_TYPE, OWL_TYPE_CLASS
- end
-
- # example-triples for setting description of a model:
- # model_xy,ot:description,bla..bla^^xml:string
- # ot:description,rdf:type,owl:Literal
- def set_literal(literal_name, literal_value, literal_datatype, current_node=@root_node)
- #@triples += "#{current_node} #{node(literal_name)} #{Redland::Literal.create(literal_value, literal_datatype)}.\n".gsub(/\[/,'<').gsub(/\]/,'>')
- #TODO: add datatype
- #@triples << "#{current_node} #{node(literal_name)} \"#{literal_value}\".\n".gsub(/\[/,'<').gsub(/\]/,'>')
- #@triples << "#{node(literal_name)} #{RDF_TYPE} #{OWL_TYPE_LITERAL}.\n".gsub(/\[/,'<').gsub(/\]/,'>')
- add current_node, node(literal_name), Redland::Literal.create(literal_value, literal_datatype)
- add node(literal_name), RDF_TYPE, OWL_TYPE_LITERAL
- end
-
- # example-triples for setting algorithm property of a model:
- # model_xy,ot:algorithm,algorihtm_xy
- # ot:algorithm,rdf:type,owl:ObjectProperty
- # algorihtm_xy,rdf:type,ot:Algorithm
- # ot:Algorithm,rdf:type,owl:Class
- def set_object_property(property, object, object_class, current_node=@root_node)
- object_node = Redland::Resource.new(object)
- #@triples << "#{current_node} #{node(property)} #{object_node}.\n".gsub(/\[/,'<').gsub(/\]/,'>')
- #@triples << "#{node(property)} #{RDF_TYPE} #{OWL_TYPE_OBJECT_PROPERTY}.\n".gsub(/\[/,'<').gsub(/\]/,'>')
- #@triples << "#{object_node} #{RDF_TYPE} #{node(object_class)}.\n".gsub(/\[/,'<').gsub(/\]/,'>')
- #@triples << "#{node(object_class)} #{RDF_TYPE} #{OWL_TYPE_CLASS}.\n".gsub(/\[/,'<').gsub(/\]/,'>')
- add current_node, node(property), object_node
- add node(property), RDF_TYPE, OWL_TYPE_OBJECT_PROPERTY
- add object_node, RDF_TYPE, node(object_class)
- add node(object_class), RDF_TYPE, OWL_TYPE_CLASS
- end
-
- def add(s,p,o)
- #@triples << "#{s} #{p} #{o}.\n".gsub(/\[/,'<').gsub(/\]/,'>')
- @triples << [RDF::URI.new(s.to_s.sub(/\[/,'').sub(/\]/,'')),RDF::URI.new(p.to_s.sub(/\[/,'').sub(/\]/,'')),o.to_s.sub(/\[/,'').sub(/\]/,'')]
- #@model.add s,p,o
- end
-
- # this is (a recursiv method) to set nested-data via hashes (not only simple properties)
- # example (for a dataset)
- # { :description => "bla",
- # :dataEntry => { :compound => "compound_uri",
- # :values => [ { :class => "FeatureValue"
- # :feature => "feat1",
- # :value => 42 },
- # { :class => "FeatureValue"
- # :feature => "feat2",
- # :value => 123 } ] } }
- def set_data(hash, current_node=@root_node)
-
- hash.each do |k,v|
- if v.is_a?(Hash)
- # value is again a hash
- prop = k.to_s
-
- # :class is a special key to specify the class value, if not defined in OBJECT_PROPERTY_CLASS
- object_class = v.has_key?(:class) ? v.delete(:class) : OBJECT_PROPERTY_CLASS[prop]
- raise "hash key must be a object-property, please add '"+prop.to_s+
- "' to OpenTox::OWL.OBJECT_PROPERTY_CLASS or specify :class value" unless object_class
-
- # the new node is a class node, to specify the uri of the resource use key :uri
- if v[:uri]
- # identifier is either a specified uri
- class_node = Redland::Resource.new(v.delete(:uri))
- else
- # or a new uri, make up internal uri with increment
- class_node = new_class_node(object_class,current_node)
- end
- set_object_property(prop,class_node,object_class,current_node)
- # recursivly call set_data method with new node
- set_data(v,class_node)
- elsif v.is_a?(Array)
- # value is an array, each array element is added with current key as predicate
- v.each do |value|
- set_data( { k => value }, current_node )
- end
- else
- # neither hash nor array, call simple set-method
- set( k, v, current_node )
- end
- end
- end
-
- # create a new (internal class) node with unique, uri-like name
- def new_class_node(name, current_node=@root_node)
- # to avoid anonymous nodes, make up uris for sub-objects
- # use counter to make sure each uri is unique
- # for example we will get ../confusion_matrix_cell/1, ../confusion_matrix_cell/2, ...
- count = 1
- while (true)
- res = Redland::Resource.new( File.join(current_node.uri.to_s,name.to_s,count.to_s) )
- match = false
- @model.find(nil, nil, res) do |s,p,o|
- match = true
- break
- end
- if match
- count += 1
- else
- break
- end
- end
- return res
- end
-
- # for "backwards-compatiblity"
- # better use directly:
- # set_data( { "parameters" => [ { "title" => <t>, "paramScope" => <s>, "paramValue" => <v> } ] )
- def parameters=(params)
-
- converted_params = []
- params.each do |name, settings|
- converted_params << { :title => name, :paramScope => settings[:scope], :paramValue => settings[:value] }
- end
- set_data( :parameters => converted_params )
- end
-
- # PENDING move to dataset.rb
- # this is for dataset.to_owl
- # adds feautre value for a single compound
- def add_data_entries(compound_uri,features)
-
- data_entry = { :compound => compound_uri }
- if features
- feature_values = []
- features.each do |f|
- f.each do |feature_uri,value|
- if value.is_a?(Hash)
- complex_values = []
- value.each do |uri,v|
- complex_values << { :feature => uri, :value => v }
- end
- feature_values << { :class => "Tuple", :feature => feature_uri, :complexValue => complex_values }
- else
- feature_values << { :class => "FeatureValue", :feature => feature_uri, :value => value }
- end
- end
- end
- data_entry[:values] = feature_values
- end
- set_data( :dataEntry => data_entry )
- end
-
- # PENDING move to dataset.rb
- # feature values are not loaded for performance reasons
- # loading compounds and features into arrays that are given as params
- def load_dataset( compounds, features )
-
- @model.subjects(RDF_TYPE, node('Compound')).each do |compound|
- compounds << get_value(compound)
- end
-
- @model.subjects(RDF_TYPE, node('Feature')).each do |feature|
- feature_value_found=false
- @model.find(nil, node("feature"), feature) do |potential_feature_value,p,o|
- @model.find(nil, node("values"), potential_feature_value) do |s,p,o|
- feature_value_found=true
- break
- end
- break if feature_value_found
- end
- features << get_value(feature) if feature_value_found
- end
- LOGGER.debug "loaded "+compounds.size.to_s+" compounds and "+features.size.to_s+" features from dataset "+uri.to_s
- end
-
- # PENDING move to dataset.rb
- # loading feature values for the specified feature
- # if feature is nil, all feature values are loaded
- #
- # general remark on the rdf loading (found out with some testing):
- # the search methods (subjects/find) are fast, the time consuming parts is creating resources,
- # which cannot be avoided in general
- def load_dataset_feature_values( compounds, data, feature_uris )
-
- raise "no feature-uri array" unless feature_uris.is_a?(Array)
-
- # values are stored in the data-hash, hash has a key for each compound
- compounds.each{|c| data[c] = [] unless data[c]}
-
- count = 0
-
- feature_uris.each do |feature_uri|
- LOGGER.debug("load feature values for feature: "+feature_uri )
- feature_node = Redland::Resource.new(feature_uri)
-
- # search for all feature_value_node with property 'ot_feature' and the feature we are looking for
- @model.find(nil, node('feature'), feature_node) do |feature_value_node,p,o|
-
- # get compound_uri by "backtracking" to values node (property is 'values'), then get compound_node via 'compound'
- value_nodes = @model.subjects(node('values'),feature_value_node)
- if value_nodes.size>0
- raise "more than one value node "+value_nodes.size.to_s if value_nodes.size>1
- value_node = value_nodes[0]
-
- compound_uri = get_value( @model.object(value_node, node('compound')) )
- unless compound_uri
- LOGGER.warn "'compound' missing for data-entry of feature "+feature_uri.to_s+
- ", value: "+@model.object(feature_value_node,node("value")).to_s
- next
- end
-
- value_node_type = @model.object(feature_value_node, RDF_TYPE)
- if (value_node_type == node('FeatureValue'))
- value_literal = @model.object( feature_value_node, node('value'))
- raise "plain feature value no literal: "+value_literal.to_s unless value_literal.is_a?(Redland::Literal)
- data[compound_uri] << {feature_uri => value_literal.get_value }
- elsif (value_node_type == node('Tuple'))
- complex_values = {}
- @model.find(feature_value_node,node('complexValue'),nil) do |p,s,complex_value|
- complex_value_type = @model.object(complex_value, RDF_TYPE)
- raise "complex feature value no feature value: "+complex_value.to_s unless complex_value_type==node('FeatureValue')
- complex_feature_uri = get_value(@model.object( complex_value, node('feature')))
- complex_value = @model.object( complex_value, node('value'))
- raise "complex value no literal: "+complex_value.to_s unless complex_value.is_a?(Redland::Literal)
- complex_values[ complex_feature_uri ] = complex_value.get_value
- end
- data[compound_uri] << { feature_uri => complex_values } if complex_values.size>0
- end
- count += 1
- LOGGER.debug "loading feature values ("+count.to_s+")" if (count%1000 == 0)
- end
- end
- LOGGER.debug "loaded "+count.to_s+" feature values for feature "+feature_node.to_s
- end
- end
- end
-end
diff --git a/lib/parser.rb b/lib/parser.rb
new file mode 100644
index 0000000..e623bf5
--- /dev/null
+++ b/lib/parser.rb
@@ -0,0 +1,191 @@
+require 'spreadsheet'
+require 'roo'
+module OpenTox
+
+ module Parser
+
+ module Owl
+
+ def initialize(uri)
+ @uri = uri
+ @metadata = {}
+ end
+
+ def metadata
+ # TODO: load parameters
+ if @dataset
+ uri = File.join(@uri,"metadata")
+ else
+ uri = @uri
+ end
+ statements = []
+ `rapper -i rdfxml -o ntriples #{uri}`.each_line do |line|
+ triple = line.chomp.split('> ')
+ statements << triple.collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')}
+ end
+ statements.each do |triple|
+ @metadata[triple[1]] = triple[2].split('^^').first if triple[0] == @uri and triple[1] != RDF['type']
+ end
+ @metadata
+ end
+
+ class Generic
+ include Owl
+ end
+
+ class Dataset
+
+ include Owl
+
+ def initialize(uri)
+ super uri
+ @dataset = ::OpenTox::Dataset.new(@uri)
+ end
+
+ def load_uri
+ data = {}
+ feature_values = {}
+ feature = {}
+ other_statements = {}
+ ntriples = `rapper -i rdfxml -o ntriples #{@uri}`
+ ntriples.each_line do |line|
+ triple = line.chomp.split(' ',3)
+ triple = triple[0..2].collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')}
+ case triple[1] # Ambit namespaces are case insensitive
+ when /#{OT.values}/i
+ data[triple[0]] = {:compound => "", :values => []} unless data[triple[0]]
+ data[triple[0]][:values] << triple[2]
+ when /#{OT.value}/i
+ feature_values[triple[0]] = triple[2]
+ when /#{OT.compound}/i
+ data[triple[0]] = {:compound => "", :values => []} unless data[triple[0]]
+ data[triple[0]][:compound] = triple[2]
+ when /#{OT.feature}/i
+ feature[triple[0]] = triple[2]
+ else
+ end
+ end
+ data.each do |id,entry|
+ entry[:values].each do |value_id|
+ value = feature_values[value_id].split(/\^\^/).first # remove XSD.type
+ @dataset.add entry[:compound],feature[value_id],value
+ end
+ end
+ load_features
+ @dataset.metadata = metadata
+ @dataset
+ end
+
+ def load_features
+ @dataset.features.keys.each do |feature|
+ @dataset.features[feature] = Parser::Owl::Generic.new(feature).metadata
+ end
+ end
+ end
+
+ end
+
+ class Spreadsheet
+
+ def initialize(dataset)
+ @dataset = dataset
+ @format_errors = ""
+ @smiles_errors = []
+ @activity_errors = []
+ @duplicates = {}
+ @nr_compounds = 0
+ @data = []
+ @activities = []
+ @type = "classification"
+ end
+
+ def load_excel(book)
+ book.default_sheet = 0
+ 1.upto(book.last_row) do |row|
+ if row == 1
+ @feature = File.join(@dataset.uri,"feature",book.cell(row,2))
+ else
+ add( book.cell(row,1), book.cell(row,2), row ) # smiles, activity
+ end
+ end
+ parse
+ end
+
+ def load_csv(csv)
+ row = 0
+ csv.each_line do |line|
+ row += 1
+ raise "Invalid CSV format at line #{row}: #{line.chomp}" unless line.chomp.match(/^.+[,;].*$/) # check CSV format
+ items = line.chomp.gsub(/["']/,'').split(/\s*[,;]\s*/) # remove quotes
+ if row == 1
+ @feature = File.join(@dataset.uri,"feature",items[1])
+ else
+ add(items[0], items[1], row)
+ end
+ end
+ parse
+ end
+
+ def parse
+
+ # create dataset
+ @data.each do |items|
+ case @type
+ when "classification"
+ case items[1].to_s
+ when TRUE_REGEXP
+ @dataset.add(items[0], @feature, true )
+ when FALSE_REGEXP
+ @dataset.add(items[0], @feature, false)
+ end
+ when "regression"
+ if items[1].to_f == 0
+ @activity_errors << "Row #{items[2]}: Zero values not allowed for regression datasets - entry ignored."
+ else
+ @dataset.add items[0], @feature, items[1].to_f
+ end
+ end
+ end
+
+ warnings = ''
+ warnings += "<p>Incorrect Smiles structures (ignored):</p>" + @smiles_errors.join("<br/>") unless @smiles_errors.empty?
+ warnings += "<p>Irregular activities (ignored):</p>" + @activity_errors.join("<br/>") unless @activity_errors.empty?
+ duplicate_warnings = ''
+ @duplicates.each {|inchi,lines| duplicate_warnings << "<p>#{lines.join('<br/>')}</p>" if lines.size > 1 }
+ warnings += "<p>Duplicated structures (all structures/activities used for model building, please make sure, that the results were obtained from <em>independent</em> experiments):</p>" + duplicate_warnings unless duplicate_warnings.empty?
+
+ @dataset.metadata[OT.Warnings] = warnings
+
+ @dataset
+
+ end
+
+ def add(smiles, act, row)
+ compound = Compound.from_smiles(smiles)
+ if compound.nil? or compound.inchi.nil? or compound.inchi == ""
+ @smiles_errors << "Row #{row}: " + [smiles,act].join(", ")
+ return false
+ end
+ unless numeric?(act) or classification?(act)
+ @activity_errors << "Row #{row}: " + [smiles,act].join(", ")
+ return false
+ end
+ @duplicates[compound.inchi] = [] unless @duplicates[compound.inchi]
+ @duplicates[compound.inchi] << "Row #{row}: " + [smiles, act].join(", ")
+ @type = "regression" unless classification?(act)
+ # TODO: set OT.NumericalFeature, ...
+ @nr_compounds += 1
+ @data << [ compound.uri, act , row ]
+ end
+
+ def numeric?(object)
+ true if Float(object) rescue false
+ end
+
+ def classification?(object)
+ !object.to_s.strip.match(TRUE_REGEXP).nil? or !object.to_s.strip.match(FALSE_REGEXP).nil?
+ end
+
+ end
+ end
+end
diff --git a/lib/serializer.rb b/lib/serializer.rb
new file mode 100644
index 0000000..3def252
--- /dev/null
+++ b/lib/serializer.rb
@@ -0,0 +1,297 @@
+require 'spreadsheet'
+require 'yajl'
+
+module OpenTox
+
+ module Serializer
+
+ # modelled according to to http://n2.talis.com/wiki/RDF_JSON_Specification
+ class Owl
+
+ attr_accessor :object
+
+ def initialize
+
+ @object = {
+ # this should come from opntox.owl
+ OT.Compound => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
+ OT.Feature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
+ OT.NominalFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
+ OT.NumericFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
+ OT.StringFeature => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
+ OT.Dataset => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
+ OT.DataEntry => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
+ OT.FeatureValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
+ OT.Algorithm => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
+ OT.Parameter => { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } ,
+
+ OT.compound => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
+ OT.feature => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
+ OT.dataEntry => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
+ OT.acceptValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
+ OT.values => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
+ #XSD.anyUri => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
+ OT.algorithm => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
+ OT.parameters => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
+
+ DC.title => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
+ DC.identifier => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
+ DC.contributor => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
+ DC.creator => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
+ OT.isA => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
+
+ OT.hasSource => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
+ OT.value => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
+ OT.paramScope => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
+ OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
+
+ #Untyped Individual: http://localhost/algorithm
+ }
+
+ @data_entries = {}
+ @values_id = 0
+ @parameter_id = 0
+
+ @classes = Set.new
+ @object_properties = Set.new
+ @annotation_properties = Set.new
+ @datatype_properties = Set.new
+
+ @objects = Set.new
+ end
+
+ def add_compound(uri)
+ #@classes << OT.Compound unless @classes.include? OT.Compound
+ @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Compound }] }
+ end
+
+ def add_feature(uri,metadata)
+ #@classes << OT.Feature unless @classes.include? OT.Feature
+ #@classes << OT.NominalFeature unless @classes.include? OT.NominalFeature
+ #@classes << OT.NumericFeature unless @classes.include? OT.NumericFeature
+ #@classes << OT.StringFeature unless @classes.include? OT.StringFeature
+ @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }] }
+ add_metadata uri, metadata
+ end
+
+ def add_dataset(dataset)
+
+ @dataset = dataset.uri
+
+ @object[dataset.uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Dataset }] }
+
+ add_metadata dataset.uri, dataset.metadata
+
+ dataset.compounds.each { |compound| add_compound compound }
+
+ dataset.features.each { |feature,metadata| add_feature feature,metadata }
+
+ dataset.data_entries.each do |compound,entry|
+ entry.each do |feature,values|
+ values.each { |value| add_data_entry compound,feature,value }
+ end
+ end
+
+ end
+
+ def add_algorithm(uri,metadata,parameters)
+ @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] }
+ add_metadata uri, metadata
+ add_parameters uri, parameters
+ #metadata.each { |u,v| @object[uri][u] = [{"type" => type(v), "value" => v }] }
+ end
+
+ def add_model(uri,metadata)
+ end
+
+ def add_metadata(uri,metadata)
+ #@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT[type] }] }
+ metadata.each do |u,v|
+ @object[uri][u] = [{"type" => type(v), "value" => v }]
+ end
+ end
+
+ def add_parameters(uri,parameters)
+ #@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT[type] }] }
+ @object[uri][OT.parameters] = [] unless @object[uri][OT.parameters]
+ parameters.each do |p|
+ parameter = "_:parameter#{@parameter_id}"
+ @parameter_id += 1
+ @object[uri][OT.parameters] << {"type" => "bnode", "value" => parameter}
+ @object[parameter] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Parameter }] }
+ add_metadata parameter, p
+ end
+ end
+
+ def add_data_entry(compound,feature,value)
+ add_compound(compound) unless @object[compound]
+ add_feature(feature,{}) unless @object[feature]
+ unless data_entry = @data_entries[compound]
+ data_entry = "_:dataentry#{@data_entries.size}"
+ @data_entries[compound] = data_entry
+ @object[@dataset][OT.dataEntry] = [] unless @object[@dataset][OT.dataEntry]
+ @object[@dataset][OT.dataEntry] << {"type" => "bnode", "value" => data_entry}
+ @object[data_entry] = {
+ RDF["type"] => [{ "type" => "uri", "value" => OT.DataEntry }],
+ OT.compound => [{ "type" => "uri", "value" => compound }],
+ OT.values => [],
+ }
+ end
+ values = "_:values#{@values_id}"
+ @values_id += 1
+ @object[data_entry][OT.values] << {"type" => "bnode", "value" => values}
+ case type(value)
+ when "uri"
+ v = [{ "type" => "uri", "value" => value}]
+ when "literal"
+ v = [{ "type" => "literal", "value" => value, "datatype" => datatype(value) }]
+ else
+ raise "Illegal type #{type(value)} for #{value}."
+ end
+ @object[values] = {
+ RDF["type"] => [{ "type" => "uri", "value" => OT.FeatureValue }],
+ OT.feature => [{ "type" => "uri", "value" => feature }],
+ OT.value => v
+ }
+ @object[feature][RDF["type"]] << { "type" => "uri", "value" => featuretype(value) }
+ end
+
+ # Serializers
+
+ def ntriples
+
+ #rdf_types
+ @triples = Set.new
+ @object.each do |s,entry|
+ s = url(s) if type(s) == "uri"
+ entry.each do |p,objects|
+ p = url(p)
+ objects.each do |o|
+ case o["type"]
+ when "uri"
+ o = url(o["value"])
+ when "literal"
+ o = literal(o["value"],datatype(o["value"]))
+ when "bnode"
+ o = o["value"]
+ end
+ @triples << [s,p,o]
+ end
+ end
+ end
+ @triples.sort.collect{ |s| s.join(' ').concat(" .") }.join("\n")+"\n"
+ end
+
+ def rdfxml
+ Tempfile.open("owl-serializer"){|f| f.write(ntriples); @path = f.path}
+ `rapper -i ntriples -o rdfxml #{@path}`
+ end
+
+ def json
+ #rdf_types
+ Yajl::Encoder.encode(@object)
+ end
+
+ # Helpers for type detection
+ private
+
+ def datatype(value)
+ if value.is_a? TrueClass or value.is_a? FalseClass
+ XSD.boolean
+ elsif value.is_a? Float
+ XSD.float
+ else
+ XSD.string
+ end
+ end
+
+ def featuretype(value)
+ if value.is_a? TrueClass or value.is_a? FalseClass
+ datatype = OT.NominalFeature
+ elsif value.is_a? Float
+ datatype = OT.NumericFeature
+ else
+ datatype = OT.StringFeature
+ end
+ end
+
+ def type(value)
+ begin
+ uri = URI.parse(value)
+ if uri.class == URI::HTTP or uri.class == URI::HTTPS
+ "uri"
+ elsif value.match(/^_/)
+ "bnode"
+ else
+ "literal"
+ end
+ rescue
+ "literal"
+ end
+ end
+
+ def literal(value,type)
+ # concat and << are faster string concatination operators than +
+ '"'.concat(value.to_s).concat('"^^<').concat(type).concat('>')
+ end
+
+ def url(uri)
+ # concat and << are faster string concatination operators than +
+ '<'.concat(uri).concat('>')
+ end
+
+ def rdf_types
+ @classes.each { |c| @object[c] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['Class'] }] } }
+ @object_properties.each { |p| @object[p] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['ObjectProperty'] }] } }
+ @annotation_properties.each { |a| @object[a] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['AnnotationProperty'] }] } }
+ @datatype_properties.each { |d| @object[d] = { RDF["type"] => [{ "type" => "uri", "value" => OWL['DatatypeProperty'] }] } }
+ end
+
+ end
+
+ class Spreadsheets # to avoid nameclash with Spreadsheet gem
+
+ def initialize(dataset)
+ @rows = []
+ @rows << ["SMILES"]
+ features = dataset.features.keys
+ @rows.first << features
+ @rows.first.flatten!
+ dataset.data_entries.each do |compound,entries|
+ smiles = Compound.new(compound).smiles
+ row = Array.new(@rows.first.size)
+ row[0] = smiles
+ entries.each do |feature, values|
+ i = features.index(feature)+1
+ values.each do |value|
+ row[i] = value #TODO overwrites duplicated values
+ end
+ end
+ @rows << row
+ end
+ end
+
+ def csv
+ @rows.collect{|r| r.join(", ")}.join("\n")
+ end
+
+ def excel
+ Spreadsheet.client_encoding = 'UTF-8'
+ book = Spreadsheet::Workbook.new
+ sheet = book.create_worksheet(:name => '')
+ sheet.column(0).width = 100
+ i = 0
+ @rows.each do |row|
+ row.each do |c|
+ sheet.row(i).push c
+ end
+ i+=1
+ end
+ book
+ end
+
+ end
+
+
+ end
+end
diff --git a/lib/task.rb b/lib/task.rb
index 1ab3893..50f0347 100644
--- a/lib/task.rb
+++ b/lib/task.rb
@@ -16,7 +16,7 @@ module OpenTox
# create is private now, use OpenTox::Task.as_task
def self.create( params )
- task_uri = RestClientWrapper.post(@@config[:services]["opentox-task"], params, nil, false).to_s
+ task_uri = RestClientWrapper.post(CONFIG[:services]["opentox-task"], params, nil, false).to_s
Task.find(task_uri.chomp)
end
@@ -36,7 +36,7 @@ module OpenTox
def reload( accept_header=nil )
unless accept_header
- if (@@config[:yaml_hosts].include?(URI.parse(uri).host))
+ if (CONFIG[:yaml_hosts].include?(URI.parse(uri).host))
accept_header = "application/x-yaml"
else
accept_header = 'application/rdf+xml'
@@ -99,7 +99,7 @@ module OpenTox
# waits for a task, unless time exceeds or state is no longer running
def wait_for_completion(dur=0.3)
- if (@uri.match(@@config[:services]["opentox-task"]))
+ if (@uri.match(CONFIG[:services]["opentox-task"]))
due_to_time = (@due_to_time.is_a?(Time) ? @due_to_time : Time.parse(@due_to_time))
running_time = due_to_time - (@date.is_a?(Time) ? @date : Time.parse(@date))
else
@@ -144,7 +144,7 @@ module OpenTox
#return yield nil
params = {:title=>title, :creator=>creator, :max_duration=>max_duration, :description=>description }
- task = OpenTox::Task.create(params)
+ task = ::OpenTox::Task.create(params)
task_pid = Spork.spork(:logger => LOGGER) do
LOGGER.debug "Task #{task.uri} started #{Time.now}"
$self_task = task
diff --git a/lib/validation.rb b/lib/validation.rb
index 89a2a0c..340332a 100644
--- a/lib/validation.rb
+++ b/lib/validation.rb
@@ -4,11 +4,11 @@ module OpenTox
attr_accessor :uri
def initialize(params)
- @uri = OpenTox::RestClientWrapper.post(File.join(@@config[:services]["opentox-validation"],"/crossvalidation"),params,nil,false)
+ @uri = OpenTox::RestClientWrapper.post(File.join(CONFIG[:services]["opentox-validation"],"/crossvalidation"),params,nil,false)
end
def self.crossvalidation(params)
- params[:uri] = File.join(@@config[:services]['opentox-validation'], "crossvalidation")
+ params[:uri] = File.join(CONFIG[:services]['opentox-validation'], "crossvalidation")
params[:num_folds] = 10 unless params[:num_folds]
params[:random_seed] = 2 unless params[:random_seed]
params[:stratified] = false unless params[:stratified]