summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2010-11-11 09:31:27 +0100
committerChristoph Helma <helma@in-silico.ch>2010-11-11 09:31:27 +0100
commitb93002b4ea50ff7e357da08abd10577347ce2d5f (patch)
tree840f1b8865032ce59917d8c5a3d6b2e499d19126 /lib
parentd6811507c1c1339cc4fe7cdb429b9b34b97dc422 (diff)
first steps towards version 2.0, yard documentation started, passes compound, dataset, feature, algorithm, fminer tests
Diffstat (limited to 'lib')
-rw-r--r--lib/algorithm.rb127
-rw-r--r--lib/compound.rb105
-rw-r--r--lib/dataset.rb482
-rw-r--r--lib/environment.rb29
-rw-r--r--lib/feature.rb4
-rw-r--r--lib/model.rb485
-rw-r--r--lib/opentox-ruby-api-wrapper.rb2
-rw-r--r--lib/opentox.rb106
-rw-r--r--lib/overwrite.rb22
-rw-r--r--lib/parser.rb208
-rw-r--r--lib/rest_client_wrapper.rb6
-rw-r--r--lib/serializer.rb69
-rw-r--r--lib/task.rb6
-rw-r--r--lib/utils.rb50
14 files changed, 940 insertions, 761 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index e1d369a..711f63b 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -1,77 +1,122 @@
module OpenTox
+ # Wrapper for OpenTox Algorithms
module Algorithm
- include OtObject
+ include OpenTox
+ # Execute algorithm with parameters, please consult the OpenTox API and the webservice documentation for acceptable parameters
+ def run(params=nil)
+ RestClientWrapper.post(@uri, params)
+ end
+
+ # Get OWL-DL representation in RDF/XML format
+ # @return [application/rdf+xml] RDF/XML representation
+ def to_rdfxml
+ s = Serializer::Owl.new
+ s.add_algorithm(@uri,@metadata)
+ s.to_rdfxml
+ end
+
+ # Generic Algorithm class, should work with all OpenTox webservices
class Generic
include Algorithm
- #include OtObject
- protected
-# def initialize(owl)
-# @title = owl.get("title")
-# @date = owl.get("date")
-# @uri = owl.uri
-# end
-
end
- class Fminer < Generic
+ module Fminer
+ include Algorithm
- def self.create_feature_dataset(params)
- LOGGER.debug File.basename(__FILE__) + ": creating feature dataset"
- resource = RestClient::Resource.new(params[:feature_generation_uri])
- resource.post :dataset_uri => params[:dataset_uri], :feature_uri => params[:feature_uri]
+ class BBRC
+ include Fminer
+ # Initialize bbrc algorithm
+ def initialize
+ super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/bbrc")
+ load_metadata
+ end
+ end
+
+ class LAST
+ include Fminer
+ # Initialize last algorithm
+ def initialize
+ super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/last")
+ load_metadata
+ end
end
- def self.uri
- File.join(CONFIG[:services]["opentox-algorithm"], "fminer")
- end
end
- class Lazar
-
- def self.create_model(params)
- LOGGER.debug params
- LOGGER.debug File.basename(__FILE__) + ": creating model"
- LOGGER.debug File.join(CONFIG[:services]["opentox-algorithm"], "lazar")
- resource = RestClient::Resource.new(File.join(CONFIG[:services]["opentox-algorithm"], "lazar"), :content_type => "application/x-yaml")
- @uri = resource.post(:dataset_uri => params[:dataset_uri], :prediction_feature => params[:prediction_feature], :feature_generation_uri => File.join(CONFIG[:services]["opentox-algorithm"], "fminer")).body.chomp
- end
+ # Create lazar prediction model
+ class Lazar
+ include Algorithm
+ # Initialize lazar algorithm
+ def initialize
+ super File.join(CONFIG[:services]["opentox-algorithm"], "lazar")
+ load_metadata
+ end
+ end
- def self.uri
- File.join(CONFIG[:services]["opentox-algorithm"], "lazar")
- end
+ # Utility methods without dedicated webservices
- end
+ module Similarity
+ include Algorithm
- class Similarity
- def self.weighted_tanimoto(fp_a,fp_b,p)
- common_features = fp_a & fp_b
- all_features = (fp_a + fp_b).uniq
+ # Tanimoto similarity
+ #
+ # @param [Array] features_a Features of first compound
+ # @param [Array] features_b Features of second compound
+ # @param [optional, Hash] weights Weights for all features
+ # @return [Float] (Wighted) tanimoto similarity
+ def self.tanimoto(features_a,features_b,weights=nil)
+ common_features = features_a & features_b
+ all_features = (features_a + features_b).uniq
common_p_sum = 0.0
if common_features.size > 0
- common_features.each{|f| common_p_sum += OpenTox::Utils.gauss(p[f])}
- all_p_sum = 0.0
- all_features.each{|f| all_p_sum += OpenTox::Utils.gauss(p[f])}
- common_p_sum/all_p_sum
+ if weights
+ common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])}
+ all_p_sum = 0.0
+ all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])}
+ common_p_sum/all_p_sum
+ else
+ common_features.to_f/all_features
+ end
else
0.0
end
end
- def self.euclidean(prop_a,prop_b)
+
+ # Euclidean similarity
+ def self.euclidean(prop_a,prop_b,weights=nil)
common_properties = prop_a.keys & prop_b.keys
if common_properties.size > 1
dist_sum = 0
common_properties.each do |p|
- dist_sum += (prop_a[p] - prop_b[p])**2
+ if weights
+ dist_sum += ( (prop_a[p] - prop_b[p]) * Algorithm.gauss(weights[p]) )**2
+ else
+ dist_sum += (prop_a[p] - prop_b[p])**2
+ end
end
1/(1+Math.sqrt(dist_sum))
else
- nil
+ 0.0
end
end
end
+
+ # Gauss kernel
+ def self.gauss(sim, sigma = 0.3)
+ x = 1.0 - sim
+ Math.exp(-(x*x)/(2*sigma*sigma))
+ end
+
+ # Median of an array
+ def self.median(array)
+ return nil if array.empty?
+ array.sort!
+ m_pos = array.size / 2
+ return array.size % 2 == 1 ? array[m_pos] : (array[m_pos-1] + array[m_pos])/2
+ end
end
end
diff --git a/lib/compound.rb b/lib/compound.rb
index 699e4c1..6834860 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -4,41 +4,15 @@
module OpenTox
# Ruby wrapper for OpenTox Compound Webservices (http://opentox.org/dev/apis/api-1.2/structure).
- #
- # Examples:
- # require "opentox-ruby-api-wrapper"
- #
- # # Creating compounds
- #
- # # from smiles string
- # compound = OpenTox::Compound.from_smiles("c1ccccc1")
- # # from name
- # compound = OpenTox::Compound.from_name("Benzene")
- # # from uri
- # compound = OpenTox::Compound.new("http://webservices.in-silico.ch/compound/InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"")
- #
- # # Getting compound representations
- #
- # # get InChI
- # inchi = compound.inchi
- # # get all compound names
- # names = compound.names
- # # get png image
- # image = compound.png
- # # get uri
- # uri = compound.uri
- #
- # # SMARTS matching
- #
- # # match a smarts string
- # compound.match?("cN") # returns false
- # # match an array of smarts strings
- # compound.match(['cc','cN']) # returns ['cc']
class Compound
attr_accessor :inchi, :uri
# Create compound with optional uri
+ # @example
+ # compound = OpenTox::Compound.new("http://webservices.in-silico.ch/compound/InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"")
+ # @param [optional, String] uri Compound URI
+ # @return [OpenTox::Compound] Compound
def initialize(uri=nil)
@uri = uri
case @uri
@@ -50,6 +24,10 @@ module OpenTox
end
# Create a compound from smiles string
+ # @example
+ # compound = OpenTox::Compound.from_smiles("c1ccccc1")
+ # @param [String] smiles Smiles string
+ # @return [OpenTox::Compound] Compound
def self.from_smiles(smiles)
c = Compound.new
c.inchi = Compound.smiles2inchi(smiles)
@@ -58,6 +36,8 @@ module OpenTox
end
# Create a compound from inchi string
+ # @param [String] smiles InChI string
+ # @return [OpenTox::Compound] Compound
def self.from_inchi(inchi)
c = Compound.new
c.inchi = inchi
@@ -66,6 +46,8 @@ module OpenTox
end
# Create a compound from sdf string
+ # @param [String] smiles SDF string
+ # @return [OpenTox::Compound] Compound
def self.from_sdf(sdf)
c = Compound.new
c.inchi = Compound.sdf2inchi(sdf)
@@ -73,7 +55,11 @@ module OpenTox
c
end
- # Create a compound from name (name can be also an InChI/InChiKey, CAS number, etc)
+ # Create a compound from name. Relies on an external service for name lookups.
+ # @example
+ # compound = OpenTox::Compound.from_name("Benzene")
+ # @param [String] name name can be also an InChI/InChiKey, CAS number, etc
+ # @return [OpenTox::Compound] Compound
def self.from_name(name)
c = Compound.new
# paranoid URI encoding to keep SMILES charges and brackets
@@ -83,32 +69,42 @@ module OpenTox
end
# Get (canonical) smiles
- def smiles
+ # @return [String] Smiles string
+ def to_smiles
Compound.obconversion(@inchi,'inchi','can')
end
# Get sdf
- def sdf
+ # @return [String] SDF string
+ def to_sdf
Compound.obconversion(@inchi,'inchi','sdf')
end
# Get gif image
- def gif
+ # @return [image/gif] Image data
+ def to_gif
RestClientWrapper.get("#{@@cactus_uri}#{@inchi}/image")
end
# Get png image
- def png
+ # @example
+ # image = compound.to_png
+ # @return [image/png] Image data
+ def to_png
RestClientWrapper.get(File.join @uri, "image")
end
# Get URI of compound image
- def image_uri
+ # @return [String] Compound image URI
+ def to_image_uri
File.join @uri, "image"
end
- # Get all known compound names
- def names
+ # Get all known compound names. Relies on an external service for name lookups.
+ # @example
+ # names = compound.to_names
+ # @return [String] Compound names
+ def to_names
begin
RestClientWrapper.get("#{@@cactus_uri}#{@inchi}/names").split("\n")
rescue
@@ -117,6 +113,10 @@ module OpenTox
end
# Match a smarts string
+ # @example
+ # compound = OpenTox::Compound.from_name("Benzene")
+ # compound.match?("cN") # returns false
+ # @param [String] smarts Smarts string
def match?(smarts)
obconversion = OpenBabel::OBConversion.new
obmol = OpenBabel::OBMol.new
@@ -128,19 +128,34 @@ module OpenTox
end
# Match an array of smarts strings, returns array with matching smarts
+ # @example
+ # compound = OpenTox::Compound.from_name("Benzene")
+ # compound.match(['cc','cN']) # returns ['cc']
+ # @param [Array] smarts_array Array with Smarts strings
+ # @return [Array] Array with matching Smarts strings
def match(smarts_array)
- smarts_array.collect{|s| s if match?(s)}.compact
+ # avoid recreation of OpenBabel objects
+ obconversion = OpenBabel::OBConversion.new
+ obmol = OpenBabel::OBMol.new
+ obconversion.set_in_format('inchi')
+ obconversion.read_string(obmol,@inchi)
+ smarts_pattern = OpenBabel::OBSmartsPattern.new
+ smarts_array.collect do |smarts|
+ smarts_pattern.init(smarts)
+ smarts if smarts_pattern.match(obmol)
+ end.compact
+ #smarts_array.collect { |s| s if match?(s)}.compact
end
# Get URI of compound image with highlighted fragments
- def matching_smarts_image_uri(activating, deactivating, highlight = nil)
+ #
+ # @param [Array] activating Array with activating Smarts strings
+ # @param [Array] deactivating Array with deactivating Smarts strings
+ # @return [String] URI for compound image with highlighted fragments
+ def matching_smarts_image_uri(activating, deactivating)
activating_smarts = URI.encode "\"#{activating.join("\"/\"")}\""
deactivating_smarts = URI.encode "\"#{deactivating.join("\"/\"")}\""
- if highlight.nil?
- File.join CONFIG[:services]["opentox-compound"], "smiles", URI.encode(smiles), "smarts/activating", URI.encode(activating_smarts),"deactivating", URI.encode(deactivating_smarts)
- else
- File.join CONFIG[:services]["opentox-compound"], "smiles", URI.encode(smiles), "smarts/activating", URI.encode(activating_smarts),"deactivating", URI.encode(deactivating_smarts), "highlight", URI.encode(highlight)
- end
+ File.join @uri, "smarts/activating", URI.encode(activating_smarts),"deactivating", URI.encode(deactivating_smarts)
end
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 7c8ce24..05b2ed3 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -1,74 +1,19 @@
module OpenTox
# Ruby wrapper for OpenTox Dataset Webservices (http://opentox.org/dev/apis/api-1.2/dataset).
- #
- # Examples:
- # require "opentox-ruby-api-wrapper"
- #
- # # Creating datasets
- #
- # # create an empty dataset
- # dataset = OpenTox::Dataset.new
- # # create an empty dataset with URI
- # # this does not load data from the dataset service - use one of the load_* methods
- # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1")
- # # create new dataset and sav it to obtain a URI
- # dataset = OpenTox::Dataset.create
- # # create a new dataset from yaml representation
- # dataset = OpenTox::Dataset.from_yaml
- # # create a new dataset from CSV string
- # csv_string = "SMILES, Toxicity\nc1ccccc1N, true"
- # dataset = OpenTox::Dataset.from_csv(csv_string)
- #
- # # Loading data
- # # Datasets created with OpenTox::Dataset.new(uri) are empty by default
- # # Invoking one of the following functions will load data into the object
- #
- # # create an empty dataset with URI
- # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1")
- # # loads (and returns) only metadata
- # dataset.load_metadata
- # # loads (and returns) only compounds
- # dataset.load_compounds
- # # loads (and returns) only features
- # dataset.load_features
- # # load all data from URI
- # dataset.load_all
- #
- # # Getting dataset representations
- #
- # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1")
- # dataset.load_all
- # # OWL-DL (RDF/XML)
- # dataset.rdfxml
- # # OWL-DL (Ntriples)
- # dataset.ntriples
- # # YAML
- # dataset.yaml
- # # CSV
- # dataset.csv
- #
- # # Modifying datasets
- #
- # # insert a statement (compound_uri,feature_uri,value)
- # dataset.add "http://webservices.in-silico.ch/compound/InChI=1S/C6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9", "http://webservices.in-silico.ch/dataset/1/feature/hamster_carcinogenicity", true
- #
- #
- # # Saving datasets
- # # save dataset at dataset service
- # dataset.save
- #
- # # Deleting datasets
- # # delete dataset (also at dataset service)
- # dataset.delete
class Dataset
- include OtObject
+ include OpenTox
attr_reader :features, :compounds, :data_entries, :metadata
- attr_writer :metadata
- # Create dataset with optional URI
+ # Create dataset with optional URI. Does not load data into the dataset - you will need to execute one of the load_* methods to pull data from a service or to insert it from other representations.
+ # @example Create an empty dataset
+ # dataset = OpenTox::Dataset.new
+ # @example Create an empty dataset with URI
+ # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1")
+ # @param [optional, String] uri Dataset URI
+ # @return [OpenTox::Dataset] Dataset object
def initialize(uri=nil)
super uri
@features = {}
@@ -76,52 +21,79 @@ module OpenTox
@data_entries = {}
end
- # Create and save an empty dataset (assigns URI to dataset)
+ # Create an empty dataset and save it at the dataset service (assigns URI to dataset)
+ # @example Create new dataset and save it to obtain a URI
+ # dataset = OpenTox::Dataset.create
+ # @param [optional, String] uri Dataset URI
+ # @return [OpenTox::Dataset] Dataset object
def self.create(uri=CONFIG[:services]["opentox-dataset"])
dataset = Dataset.new
- dataset.uri = RestClientWrapper.post(uri,{}).to_s.chomp
+ dataset.save
+ dataset
+ end
+
+ # Find a dataset and load all data. This can be time consuming, use Dataset.new together with one of the load_* methods for a fine grained control over data loading.
+ # @param [String] uri Dataset URI
+ # @return [OpenTox::Dataset] Dataset object with all data
+ def self.find(uri)
+ dataset = Dataset.new(uri)
+ dataset.load_all
dataset
end
# Get all datasets from a service
-# def self.all(uri=CONFIG[:services]["opentox-dataset"])
-# RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.each_line.collect{|u| Dataset.new(u)}
-# end
+ # @param [optional,String] uri URI of the dataset service, defaults to service specified in configuration
+ # @return [Array] Array of dataset object with all data
+ def self.all(uri=CONFIG[:services]["opentox-dataset"])
+ RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.each_line.collect{|u| Dataset.new(u)}
+ end
- # Create a dataset from YAML string
- def self.from_yaml(yaml)
- dataset = Dataset.create
- dataset.copy YAML.load(yaml)
- dataset
+ # Load YAML representation into the dataset
+ # @param [String] yaml YAML representation of the dataset
+ # @return [OpenTox::Dataset] Dataset object with YAML data
+ def load_yaml(yaml)
+ copy YAML.load(yaml)
+ end
+
+ # Load RDF/XML representation from a file
+ # @param [String] file File with RDF/XML representation of the dataset
+ # @return [OpenTox::Dataset] Dataset object with RDF/XML data
+ def load_rdfxml_file(file)
+ parser = Parser::Owl::Dataset.new @uri
+ parser.uri = file.path
+ copy parser.load_uri
end
- # Create dataset from CSV string (format specification: http://toxcreate.org/help)
+ # Load CSV string (format specification: http://toxcreate.org/help)
# - loads data_entries, compounds, features
# - sets metadata (warnings) for parser errors
# - you will have to set remaining metadata manually
- def self.from_csv(csv)
- dataset = Dataset.create
- Parser::Spreadsheet.new(dataset).load_csv(csv)
- dataset
+ # @param [String] csv CSV representation of the dataset
+ # @return [OpenTox::Dataset] Dataset object with CSV data
+ def load_csv(csv)
+ save unless @uri # get a uri for creating features
+ parser = Parser::Spreadsheets.new
+ parser.dataset = self
+ parser.load_csv(csv)
end
- # Create dataset from Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help))
+ # Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help))
# - loads data_entries, compounds, features
# - sets metadata (warnings) for parser errors
# - you will have to set remaining metadata manually
- def self.from_spreadsheet(book)
- dataset = Dataset.create
- Parser::Spreadsheet.new(dataset).load_excel(book)
- dataset
+ # @param [Excel] book Excel workbook object (created with roo gem)
+ # @return [OpenTox::Dataset] Dataset object with Excel data
+ def load_spreadsheet(book)
+ save unless @uri # get a uri for creating features
+ parser = Parser::Spreadsheets.new
+ parser.dataset = self
+ parser.load_excel(book)
end
- # Load and return metadata of a Dataset object
+ # Load and return only metadata of a Dataset object
+ # @return [Hash] Metadata of the dataset
def load_metadata
- #if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host))
- #add_metadata YAML.load(RestClientWrapper.get(File.join(@uri,"metadata"), :accept => "application/x-yaml"))
- #else
- add_metadata Parser::Owl::Dataset.new(@uri).metadata
- #end
+ add_metadata Parser::Owl::Dataset.new(@uri).metadata
self.uri = @uri if @uri # keep uri
@metadata
end
@@ -136,7 +108,8 @@ module OpenTox
end
end
- # Load and return all compound URIs
+ # Load and return only compound URIs from the dataset service
+ # @return [Array] Compound URIs in the dataset
def load_compounds
RestClientWrapper.get(File.join(uri,"compounds"),:accept=> "text/uri-list").to_s.each_line do |compound_uri|
@compounds << compound_uri.chomp
@@ -144,44 +117,75 @@ module OpenTox
@compounds.uniq!
end
- # Load all feature URIs
+ # Load and return only features from the dataset service
+ # @return [Hash] Features of the dataset
def load_features
- RestClientWrapper.get(File.join(uri,"features"),:accept=> "text/uri-list").to_s.each_line do |feature_uri|
- @features[feature_uri.chomp] = Feature.new(feature_uri.chomp).load_metadata
- end
+ parser = Parser::Owl::Dataset.new(@uri)
+ @features = parser.load_features
@features
end
- # Get YAML representation
- def yaml
- self.to_yaml
+ # Detect feature type(s) in the dataset
+ # @return [String] `classification", "regression", "mixed" or unknown`
+ def feature_type
+ feature_types = @features.collect{|f,metadata| metadata[OT.isA]}.uniq
+ LOGGER.debug "FEATURES"
+ LOGGER.debug feature_types.inspect
+ if feature_types.size > 1
+ "mixed"
+ else
+ case feature_types.first
+ when /NominalFeature/
+ "classification"
+ when /NumericFeature/
+ "regression"
+ else
+ "unknown"
+ end
+ end
end
- # Get Excel representation, returns a Spreadsheet::Workbook which can be written with the 'spreadsheet' gem (data_entries only, metadata will )
- def excel
- Serializer::Spreadsheets.new(self).excel
+ # Get Excel representation
+ # @return [Spreadsheet::Workbook] Workbook which can be written with the spreadsheet gem (data_entries only, metadata will will be discarded))
+ def to_xls
+ Serializer::Spreadsheets.new(self).to_xls
end
# Get CSV string representation (data_entries only, metadata will be discarded)
- def csv
- Serializer::Spreadsheets.new(self).csv
+ # @return [String] CSV representation
+ def to_csv
+ Serializer::Spreadsheets.new(self).to_csv
end
# Get OWL-DL in ntriples format
- def ntriples
+ # @return [String] N-Triples representation
+ def to_ntriples
s = Serializer::Owl.new
s.add_dataset(self)
- s.ntriples
+ s.to_ntriples
end
# Get OWL-DL in RDF/XML format
- def rdfxml
+ # @return [String] RDF/XML representation
+ def to_rdfxml
s = Serializer::Owl.new
s.add_dataset(self)
- s.rdfxml
+ s.to_rdfxml
+ end
+
+ # Get name (DC.title) of a feature
+ # @param [String] feature Feature URI
+ # @return [String] Feture title
+ def feature_name(feature)
+ @features[feature][DC.title]
end
# Insert a statement (compound_uri,feature_uri,value)
+ # @example Insert a statement (compound_uri,feature_uri,value)
+ # dataset.add "http://webservices.in-silico.ch/compound/InChI=1S/C6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9", "http://webservices.in-silico.ch/dataset/1/feature/hamster_carcinogenicity", true
+ # @param [String] compound Compound URI
+ # @param [String] feature Compound URI
+ # @param [Boolean,Float] value Feature value
def add (compound,feature,value)
@compounds << compound unless @compounds.include? compound
@features[feature] = {} unless @features[feature]
@@ -190,252 +194,62 @@ module OpenTox
@data_entries[compound][feature] << value
end
- # Add metadata (hash with predicate_uri => value)
+ # Add/modify metadata, existing entries will be overwritten
+ # @example
+ # dataset.add_metadata({DC.title => "any_title", DC.creator => "my_email"})
+ # @param [Hash] metadata Hash mapping predicate_uris to values
def add_metadata(metadata)
metadata.each { |k,v| @metadata[k] = v }
end
- # Copy a dataset (rewrites URI)
- def copy(dataset)
- @metadata = dataset.metadata
- @data_entries = dataset.data_entries
- @compounds = dataset.compounds
- @features = dataset.features
- if @uri
- self.uri = @uri
- else
- @uri = dataset.metadata[XSD.anyUri]
- end
+ # Add a feature
+ # @param [String] feature Feature URI
+ # @param [Hash] metadata Hash with feature metadata
+ def add_feature(feature,metadata={})
+ @features[feature] = metadata
end
- # save dataset (overwrites existing dataset)
+ # Add/modify metadata for a feature
+ # @param [String] feature Feature URI
+ # @param [Hash] metadata Hash with feature metadata
+ def add_feature_metadata(feature,metadata)
+ metadata.each { |k,v| @features[feature][k] = v }
+ end
+
+ # Save dataset at the dataset service
+ # - creates a new dataset if uri is not set
+ # - overwrites dataset if uri exists
+ # @return [String] Dataset URI
def save
# TODO: rewrite feature URI's ??
- # create dataset if uri empty
@compounds.uniq!
- RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml)
+ if @uri
+ RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml)
+ else
+ # create dataset if uri is empty
+ self.uri = RestClientWrapper.post(CONFIG[:services]["opentox-dataset"],{}).to_s.chomp
+ RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml)
+ end
+ @uri
end
# Delete dataset at the dataset service
def delete
RestClientWrapper.delete @uri
end
- end
-end
-
- #########################################################
- # kept for backward compatibility, may have to be fixed #
- #########################################################
-
-=begin
- def from_owl(owl)
- # creates dataset object from Opentox::Owl object
- # use Dataset.find( <uri> ) to load dataset from rdf-supporting datasetservice
- # note: does not load all feature values, as this is time consuming
- raise "invalid param" unless owl.is_a?(OpenTox::Owl)
- @metadata[DC.title] = owl.get("title")
- @metadata[DC.creator] = owl.get("creator")
- @metadata[XSD.anyUri] = owl.uri
- # when loading a dataset from owl, only compound- and feature-uris are loaded
- owl.load_dataset(@compounds, @features)
- # all features are marked as dirty
- # as soon as a feature-value is requested all values for this feature are loaded from the rdf
- @dirty_features = @features.dclone
- @owl = owl
- end
-
- def self.find(uri, accept_header=nil)
-
- unless accept_header
- if (CONFIG[:yaml_hosts].include?(URI.parse(uri).host))
- accept_header = 'application/x-yaml'
- else
- accept_header = "application/rdf+xml"
- end
- end
-
- case accept_header
- when "application/x-yaml"
- LOGGER.debug "DATASET: "+ uri
- LOGGER.debug RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s
- d = YAML.load RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s
- #d.uri = @metadata[XSD.anyUri] unless d.uri
- when "application/rdf+xml"
- owl = OpenTox::Owl.from_uri(uri.to_s.strip, "Dataset")
- d = Dataset.new(owl)
- else
- raise "cannot get datset with accept header: "+accept_header.to_s
- end
- d
- end
- # converts a dataset represented in owl to yaml
- # (uses a temporary dataset)
- # note: to_yaml is overwritten, loads complete owl dataset values
- def self.owl_to_yaml( owl_data, uri)
- owl = OpenTox::Owl.from_data(owl_data, uri, "Dataset")
- d = Dataset.new(owl)
- d.to_yaml
- end
-
- # creates a new dataset, using only those compounsd specified in new_compounds
- # returns uri of new dataset
- def create_new_dataset( new_compounds, new_features, new_title, new_creator )
-
- LOGGER.debug "create new dataset with "+new_compounds.size.to_s+"/"+compounds.size.to_s+" compounds"
- raise "no new compounds selected" unless new_compounds and new_compounds.size>0
-
- # load require features
- if ((defined? @dirty_features) && (@dirty_features & new_features).size > 0)
- (@dirty_features & new_features).each{|f| load_feature_values(f)}
- end
-
- dataset = OpenTox::Dataset.new
- dataset.title = new_title
- dataset.creator = new_creator
- dataset.features = new_features
- dataset.compounds = new_compounds
-
- # Copy dataset data for compounds and features
- # PENDING: why storing feature values in an array?
- new_compounds.each do |c|
- data_c = []
- raise "no data for compound '"+c.to_s+"'" if @data[c]==nil
- @data[c].each do |d|
- m = {}
- new_features.each do |f|
- m[f] = d[f]
- end
- data_c << m
- end
- dataset.data[c] = data_c
- end
- return dataset.save
- end
-
- # returns classification value
- def get_predicted_class(compound, feature)
- v = get_value(compound, feature)
- if v.is_a?(Hash)
- k = v.keys.grep(/classification/).first
- unless k.empty?
- #if v.has_key?(:classification)
- return v[k]
- else
- return "no classification key"
- end
- elsif v.is_a?(Array)
- raise "predicted class value is an array\n"+
- "value "+v.to_s+"\n"+
- "value-class "+v.class.to_s+"\n"+
- "dataset "+self.uri.to_s+"\n"+
- "compound "+compound.to_s+"\n"+
- "feature "+feature.to_s+"\n"
- else
- return v
- end
- end
-
- # returns regression value
- def get_predicted_regression(compound, feature)
- v = get_value(compound, feature)
- if v.is_a?(Hash)
- k = v.keys.grep(/regression/).first
- unless k.empty?
- return v[k]
- else
- return "no regression key"
- end
- elsif v.is_a?(Array)
- raise "predicted regression value is an array\n"+
- "value "+v.to_s+"\n"+
- "value-class "+v.class.to_s+"\n"+
- "dataset "+self.uri.to_s+"\n"+
- "compound "+compound.to_s+"\n"+
- "feature "+feature.to_s+"\n"
- else
- return v
- end
- end
-
- # returns prediction confidence if available
- def get_prediction_confidence(compound, feature)
- v = get_value(compound, feature)
- if v.is_a?(Hash)
- k = v.keys.grep(/confidence/).first
- unless k.empty?
- #if v.has_key?(:confidence)
- return v[k].abs
- #return v["http://ot-dev.in-silico.ch/model/lazar#confidence"].abs
- else
- # PENDING: return nil isntead of raising an exception
- raise "no confidence key"
- end
- else
- LOGGER.warn "no confidence for compound: "+compound.to_s+", feature: "+feature.to_s
- return 1
- end
- end
-
- # return compound-feature value
- def get_value(compound, feature)
- if (defined? @dirty_features) && @dirty_features.include?(feature)
- load_feature_values(feature)
- end
-
- v = @data[compound]
- return nil if v == nil # missing values for all features
- if v.is_a?(Array)
- # PENDING: why using an array here?
- v.each do |e|
- if e.is_a?(Hash)
- if e.has_key?(feature)
- return e[feature]
- end
- else
- raise "invalid internal value type"
- end
- end
- return nil #missing value
- else
- raise "value is not an array\n"+
- "value "+v.to_s+"\n"+
- "value-class "+v.class.to_s+"\n"+
- "dataset "+self.uri.to_s+"\n"+
- "compound "+compound.to_s+"\n"+
- "feature "+feature.to_s+"\n"
- end
- end
-
- # loads specified feature and removes dirty-flag, loads all features if feature is nil
- def load_feature_values(feature=nil)
- if feature
- raise "feature already loaded" unless @dirty_features.include?(feature)
- @owl.load_dataset_feature_values(@compounds, @data, [feature])
- @dirty_features.delete(feature)
+ private
+ # Copy a dataset (rewrites URI)
+ def copy(dataset)
+ @metadata = dataset.metadata
+ @data_entries = dataset.data_entries
+ @compounds = dataset.compounds
+ @features = dataset.features
+ if @uri
+ self.uri = @uri
else
- @data = {} unless @data
- @owl.load_dataset_feature_values(@compounds, @data, @dirty_features)
- @dirty_features.clear
+ @uri = dataset.metadata[XSD.anyURI]
end
end
-
- # overwrite to yaml:
- # in case dataset is loaded from owl:
- # * load all values
- def to_yaml
- # loads all features
- if ((defined? @dirty_features) && @dirty_features.size > 0)
- load_feature_values
- end
- super
- end
-
- # * remove @owl from yaml, not necessary
- def to_yaml_properties
- super - ["@owl"]
- end
-
end
end
-=end
diff --git a/lib/environment.rb b/lib/environment.rb
index b16b62f..d66b062 100644
--- a/lib/environment.rb
+++ b/lib/environment.rb
@@ -61,3 +61,32 @@ FALSE_REGEXP = /^(false|inactive|0|0.0)$/i
# Task durations
DEFAULT_TASK_MAX_DURATION = 36000
EXTERNAL_TASK_MAX_DURATION = 36000
+
+# OWL Namespaces
+class OwlNamespace
+
+ def initialize(uri)
+ @uri = uri
+ end
+
+ def [](property)
+ @uri+property.to_s
+ end
+
+ def type # for RDF.type
+ "#{@uri}type"
+ end
+
+ def method_missing(property)
+ @uri+property.to_s
+ end
+
+end
+
+RDF = OwlNamespace.new 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
+OWL = OwlNamespace.new 'http://www.w3.org/2002/07/owl#'
+DC = OwlNamespace.new 'http://purl.org/dc/elements/1.1/'
+OT = OwlNamespace.new 'http://www.opentox.org/api/1.1#'
+OTA = OwlNamespace.new 'http://www.opentox.org/algorithmTypes.owl#'
+XSD = OwlNamespace.new 'http://www.w3.org/2001/XMLSchema#'
+
diff --git a/lib/feature.rb b/lib/feature.rb
index 9616135..13d97a2 100644
--- a/lib/feature.rb
+++ b/lib/feature.rb
@@ -1,7 +1,5 @@
module OpenTox
-
class Feature
- include OtObject
+ include OpenTox
end
-
end
diff --git a/lib/model.rb b/lib/model.rb
index d0d6703..63013cb 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -1,143 +1,410 @@
module OpenTox
+
module Model
+ include OpenTox
+
+ def run(params)
+ if CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)
+ accept = 'application/x-yaml'
+ else
+ accept = 'application/rdf+xml'
+ end
+ begin
+ params[:acccept] = accept
+ #TODO fix: REstClientWrapper does not accept accept header
+ #RestClientWrapper.post(@uri,params)#,{:accept => accept})
+ `curl -X POST -H "Accept:#{accept}" #{params.collect{|k,v| "-d #{k}=#{v}"}.join(" ")} #{@uri}`.to_s.chomp
+ rescue => e
+ LOGGER.error "Failed to run #{@uri} with #{params.inspect} (#{e.inspect})"
+ raise "Failed to run #{@uri} with #{params.inspect}"
+ end
+ end
+
+=begin
+ def classification?
+ #TODO replace with request to ontology server
+ if @metadata[DC.title] =~ /(?i)classification/
+ return true
+ elsif @metadata[DC.title] =~ /(?i)regression/
+ return false
+ elsif @uri =~/ntua/ and @metadata[DC.title] =~ /mlr/
+ return false
+ elsif @uri =~/tu-muenchen/ and @metadata[DC.title] =~ /regression|M5P|GaussP/
+ return false
+ elsif @uri =~/ambit2/ and @metadata[DC.title] =~ /pKa/ || @metadata[DC.title] =~ /Regression|Caco/
+ return false
+ elsif @uri =~/majority/
+ return (@uri =~ /class/) != nil
+ else
+ raise "unknown model, uri:'"+@uri+"' title:'"+@metadata[DC.title]+"'"
+ end
+ end
+=end
+
class Generic
+ include Model
+ end
+
+ class Lazar
+
+ include Model
+
+ #attr_accessor :prediction_type, :feature_type, :features, :effects, :activities, :p_values, :fingerprints, :parameters
+ attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :parameters, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm
+
+ def initialize(uri=nil)
+
+ if uri
+ super uri
+ else
+ super CONFIG[:services]["opentox-model"]
+ end
+
+ # TODO: fix metadata, add parameters
+ @metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar")
+
+ @features = []
+ @effects = {}
+ @activities = {}
+ @p_values = {}
+ @fingerprints = {}
+
+ @feature_calculation_algorithm = "substructure_match"
+ @similarity_algorithm = "weighted_tanimoto"
+ @prediction_algorithm = "weighted_majority_vote"
- MODEL_ATTRIBS = [:uri, :title, :creator, :date, :format, :predictedVariables, :independentVariables, :dependentVariables, :trainingDataset, :algorithm]
- MODEL_ATTRIBS.each{ |a| attr_accessor(a) }
+ @min_sim = 0.3
+
+ end
def self.find(uri)
- owl = OpenTox::Owl.from_uri(uri, "Model")
- return self.new(owl)
- end
-
- def self.to_rdf(model)
- owl = OpenTox::Owl.create 'Model', model.uri
- (MODEL_ATTRIBS - [:uri]).each do |a|
- owl.set(a.to_s,model.send(a.to_s))
+ YAML.load RestClientWrapper.get(uri,:content_type => 'application/x-yaml')
+ end
+
+ def self.create_from_dataset(dataset_uri,feature_dataset_uri,prediction_feature=nil)
+ training_activities = OpenTox::Dataset.find(dataset_uri)
+ training_features = OpenTox::Dataset.find(feature_dataset_uri)
+ unless prediction_feature # try to read prediction_feature from dataset
+ raise "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1
+ prediction_feature = training_activities.features.keys.first
+ params[:prediction_feature] = prediction_feature
+ end
+ lazar = Lazar.new
+ training_features = OpenTox::Dataset.new(feature_dataset_uri)
+ case training_features.feature_type
+ when "classification"
+ lazar.similarity_algorithm = "weighted_tanimoto"
+ when "regression"
+ lazar.similarity_algorithm = "weighted_euclid"
end
- owl.rdf
end
-
- protected
- def initialize(owl)
- MODEL_ATTRIBS.each do |a|
- self.send("#{a.to_s}=".to_sym, owl.get(a.to_s)) unless a==:uri
+
+ def self.create(dataset_uri,prediction_feature=nil,feature_generation_uri=File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc"),params=nil)
+
+ training_activities = OpenTox::Dataset.find(dataset_uri)
+
+ unless prediction_feature # try to read prediction_feature from dataset
+ raise "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1
+ prediction_feature = training_activities.features.keys.first
+ params[:prediction_feature] = prediction_feature
end
- @uri = owl.uri
- if ENV['RACK_ENV'] =~ /test|debug/
- begin
- raise "uri invalid" unless Utils.is_uri?(@uri)
- raise "no predicted variables" unless @predictedVariables and @predictedVariables.size>0
- rescue => ex
- RestClientWrapper.raise_uri_error "invalid model: '"+ex.message+"'\n"+self.to_yaml+"\n",@uri.to_s
+
+ lazar = Lazar.new
+ params[:feature_generation_uri] = feature_generation_uri
+ feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params).to_s
+ training_features = OpenTox::Dataset.find(feature_dataset_uri)
+ raise "Dataset #{feature_dataset_uri} not found or empty." if training_features.nil?
+
+ # sorted features for index lookups
+ lazar.features = training_features.features.sort if training_features.feature_type == "regression"
+
+ training_features.data_entries.each do |compound,entry|
+ lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound]
+ entry.keys.each do |feature|
+ case training_features.feature_type
+ when "fminer"
+ # fingerprints are sets
+ smarts = training_features.features[feature][OT.smarts]
+ lazar.fingerprints[compound] << smarts
+ unless lazar.features.include? smarts
+ lazar.features << smarts
+ lazar.p_values[smarts] = training_features.features[feature][OT.p_value]
+ lazar.effects[smarts] = training_features.features[feature][OT.effect]
+ end
+ when "classification"
+ # fingerprints are sets
+ if entry[feature].flatten.size == 1
+ lazar.fingerprints[compound] << feature if entry[feature].flatten.first.match(TRUE_REGEXP)
+ lazar.features << feature unless lazar.features.include? feature
+ else
+ LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
+ end
+ when "regression"
+ # fingerprints are arrays
+ if entry[feature].flatten.size == 1
+ lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first
+ else
+ LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}"
+ end
+ end
+ end
+
+ lazar.activities[compound] = [] unless lazar.activities[compound]
+ training_activities.data_entries[compound][params[:prediction_feature]].each do |value|
+ case value.to_s
+ when "true"
+ lazar.activities[compound] << true
+ when "false"
+ lazar.activities[compound] << false
+ else
+ lazar.activities[compound] << value.to_f
+ lazar.prediction_type = "regression"
+ end
end
- LOGGER.warn "model has no dependent variable" unless @dependentVariables and @dependentVariables.size>0
- LOGGER.warn "model has no algorithm" unless @algorithm and @algorithm.size>0
- LOGGER.warn "model has no indenpendent variables" unless @independentVariables
end
+
+ if feature_generation_uri.match(/fminer/)
+ lazar.feature_calculation_algorithm = "substructure_match"
+ else
+ halt 404, "External feature generation services not yet supported"
+ end
+
+ lazar.metadata[OT.dependentVariables] = params[:prediction_feature]
+ lazar.metadata[OT.trainingDataset] = dataset_uri
+ lazar.metadata[OT.featureDataset] = feature_dataset_uri
+
+ lazar.parameters = {
+ "dataset_uri" => dataset_uri,
+ "prediction_feature" => prediction_feature,
+ "feature_generation_uri" => feature_generation_uri
+ }
+
+ model_uri = lazar.save
+ LOGGER.info model_uri + " created #{Time.now}"
+ model_uri
end
- end
-
- class PredictionModel < Generic
-
- def self.build( algorithm_uri, algorithm_params )
-
- LOGGER.debug "Build model, algorithm_uri:"+algorithm_uri.to_s+", algorithm_parms: "+algorithm_params.inspect.to_s
- uri = OpenTox::RestClientWrapper.post(algorithm_uri,algorithm_params).to_s
- LOGGER.debug "Build model done: "+uri.to_s
- RestClientWrapper.raise_uri_error("Invalid build model result: '"+uri.to_s+"'", algorithm_uri, algorithm_params ) unless Utils.model_uri?(uri)
- return PredictionModel.find(uri)
- end
-
- def predict_dataset( dataset_uri )
-
- LOGGER.debug "Predict dataset: "+dataset_uri.to_s+" with model "+@uri.to_s
- uri = RestClientWrapper.post(@uri, {:accept => "text/uri-list", :dataset_uri=>dataset_uri})
- RestClientWrapper.raise_uri_error("Prediciton result no dataset uri: "+uri.to_s, @uri, {:dataset_uri=>dataset_uri} ) unless Utils.dataset_uri?(uri)
- uri
- end
-
- def classification?
- #HACK replace with request to ontology server
- if @title =~ /(?i)classification/
- return true
- elsif @title =~ /(?i)regression/
- return false
- elsif @uri =~/ntua/ and @title =~ /mlr/
- return false
- elsif @uri =~/tu-muenchen/ and @title =~ /regression|M5P|GaussP/
- return false
- elsif @uri =~/ambit2/ and @title =~ /pKa/ || @title =~ /Regression|Caco/
- return false
- elsif @uri =~/majority/
- return (@uri =~ /class/) != nil
+
+ def predict_dataset(dataset_uri)
+ @prediction_dataset = Dataset.create
+ @prediction_dataset.add_metadata({
+ OT.hasSource => @lazar.uri,
+ DC.creator => @lazar.uri,
+ DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] ))
+ })
+ @prediction_dataset.add_parameters({"dataset_uri" => dataset_uri})
+ Dataset.new(dataset_uri).load_compounds.each do |compound_uri|
+ predict(compound_uri,false)
+ end
+ @prediction_dataset.save
+ @prediction_dataset.uri
+ end
+
+ def predict(compound_uri,verbose=false)
+
+ @compound = Compound.new compound_uri
+
+ unless @prediction_dataset
+ @prediction_dataset = Dataset.create
+ @prediction_dataset.add_metadata( {
+ OT.hasSource => @lazar.uri,
+ DC.creator => @lazar.uri,
+ DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] ))
+ } )
+ @prediction_dataset.add_parameters( {"compound_uri" => compound_uri} )
+ end
+
+ neighbors
+ eval @prediction_algorithm
+
+ if @prediction
+
+ feature_uri = File.join( @prediction_dataset.uri, "feature", @prediction_dataset.compounds.size)
+ @prediction_dataset.add @compound.uri, feature_uri, @prediction
+
+ feature_metadata = @prediction_dataset.metadata
+ feature_metadata[DC.title] = File.basename(@metadata[OT.dependentVariables])
+ feature_metadata[OT.prediction] = @prediction
+ feature_metadata[OT.confidence] = @confidence
+ @prediction_dataset.add_feature(feature_uri, feature_metadata)
+
+ if verbose
+ if @compound_features
+ @compound_features.each do |feature|
+ @prediction_dataset.add @compound.uri, feature, true
+ end
+ end
+ n = 0
+ @neighbors.sort{|a,b| a[:similarity] <=> b[:similarity]}.each do |neighbor|
+ neighbor_uri = File.join( @prediction_dataset.uri, "feature/neighbor", n )
+ @prediction_dataset.add @compound.uri, neighbor_uri, true
+ @prediction_dataset.add_feature(neighbor, {
+ OT.compound => neighbor[:compound],
+ OT.similarity => neighbor[:similarity],
+ OT.activity => neighbor[:activity]
+ })
+ n+=1
+ end
+ end
+ end
+ @prediction_dataset.save
+ @prediction_dataset.uri
+ end
+
+ def weighted_majority_vote
+ conf = 0.0
+ @neighbors.each do |neighbor|
+ case neighbor[:activity].to_s
+ when 'true'
+ conf += OpenTox::Algorithm.gauss(neighbor[:similarity])
+ when 'false'
+ conf -= OpenTox::Algorithm.gauss(neighbor[:similarity])
+ end
+ end
+ if conf > 0.0
+ @prediction = true
+ elsif conf < 0.0
+ @prediction = false
else
- raise "unknown model, uri:'"+@uri.to_s+"' title:'"+@title.to_s+"'"
+ @prediction = nil
end
+ @confidence = conf/@neighbors.size if @neighbors.size > 0
end
- end
-
- class Lazar < Generic
-
- attr_accessor :feature_dataset_uri, :effects, :activities, :p_values, :fingerprints, :features
-
- def initialize
- @source = "http://github.com/helma/opentox-model"
- @algorithm = File.join(CONFIG[:services]["opentox-algorithm"],"lazar")
- #@independent_variables = File.join(CONFIG[:services]["opentox-algorithm"],"fminer#BBRC_representative")
- @features = []
- @effects = {}
- @activities = {}
- @p_values = {}
- @fingerprints = {}
+
+ def local_svm_regression
+ sims = @neighbors.collect{ |n| n[:similarity] } # similarity values between query and neighbors
+ conf = sims.inject{|sum,x| sum + x }
+ acts = @neighbors.collect do |n|
+ act = n[:activity]
+ # TODO: check this in model creation
+ raise "0 values not allowed in training dataset. log10 is calculated internally." if act.to_f == 0
+ Math.log10(act.to_f)
+ end # activities of neighbors for supervised learning
+
+ neighbor_matches = @neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches
+ gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
+ if neighbor_matches.size == 0
+ raise "No neighbors found"
+ else
+ # gram matrix
+ (0..(neighbor_matches.length-1)).each do |i|
+ gram_matrix[i] = []
+ # lower triangle
+ (0..(i-1)).each do |j|
+ sim = OpenTox::Algorithm.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], @lazar.p_values)
+ gram_matrix[i] << OpenTox::Algorithm.gauss(sim)
+ end
+ # diagonal element
+ gram_matrix[i][i] = 1.0
+ # upper triangle
+ ((i+1)..(neighbor_matches.length-1)).each do |j|
+ sim = OpenTox::Algorithm.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], @lazar.p_values) # double calculation?
+ gram_matrix[i] << OpenTox::Algorithm.gauss(sim)
+ end
+ end
+
+ @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
+ @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
+ LOGGER.debug "Setting R data ..."
+ # set data
+ @r.gram_matrix = gram_matrix.flatten
+ @r.n = neighbor_matches.size
+ @r.y = acts
+ @r.sims = sims
+
+ LOGGER.debug "Preparing R data ..."
+ # prepare data
+ @r.eval "y<-as.vector(y)"
+ @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
+ @r.eval "sims<-as.vector(sims)"
+
+ # model + support vectors
+ LOGGER.debug "Creating SVM model ..."
+ @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)"
+ @r.eval "sv<-as.vector(SVindex(model))"
+ @r.eval "sims<-sims[sv]"
+ @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
+ LOGGER.debug "Predicting ..."
+ @r.eval "p<-predict(model,sims)[1,1]"
+ @prediction = 10**(@r.p.to_f)
+ LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+ @r.quit # free R
+ end
+ @confidence = conf/@neighbors.size if @neighbors.size > 0
+
end
- def save
- @features.uniq!
- resource = RestClient::Resource.new(CONFIG[:services]["opentox-model"])
- resource.post(self.to_yaml, :content_type => "application/x-yaml").chomp.to_s
+ def neighbors
+
+ @compound_features = eval(@feature_calculation_algorithm) if @feature_calculation_algorithm
+
+ @neighbors = {}
+ @activities.each do |training_compound,activities|
+ @training_compound = training_compound
+ sim = eval(@similarity_algorithm)
+ if sim > @min_sim
+ activities.each do |act|
+ @neighbors << {
+ :compound => @training_compound,
+ :similarity => sim,
+ :features => @fingerprints[@training_compound],
+ :activity => act
+ }
+ end
+ end
+ end
+
end
- def self.find_all
- RestClientWrapper.get(CONFIG[:services]["opentox-model"]).chomp.split("\n")
+ def tanimoto
+ OpenTox::Algorithm.tanimoto(@compound_features,@fingerprints[@training_compound])
end
- def self.predict(compound_uri,model_uri)
- #RestClientWrapper.post(model_uri,{:compound_uri => compound_uri, :accept => 'application/x-yaml'})
- `curl -X POST -d 'compound_uri=#{compound_uri}' -H 'Accept:application/x-yaml' #{model_uri}`
+ def weighted_tanimoto
+ OpenTox::Algorithm.tanimoto(@compound_features,@fingerprints[@training_compound],@p_values)
end
- end
-
- class PropertyLazar < Generic
-
- attr_accessor :feature_dataset_uri, :properties, :features, :activities#, :effects, :p_values
-
- def initialize
- @source = "http://github.com/helma/opentox-model"
- @algorithm = File.join(CONFIG[:services]["opentox-algorithm"],"property_lazar")
- #@independent_variables = File.join(CONFIG[:services]["opentox-algorithm"],"fminer#BBRC_representative")
- @features = []
- #@effects = {}
- @activities = {}
- #@p_values = {}
- @properties = {}
+
+ def euclid
+ OpenTox::Algorithm.tanimoto(@compound_features,@fingerprints[@training_compound])
+ end
+
+ def weighted_euclid
+ OpenTox::Algorithm.tanimoto(@compound_features,@fingerprints[@training_compound],@p_values)
+ end
+
+ def substructure_match
+ @compound.match(@features)
+ end
+
+ def database_search
+ #TODO add features method to dataset
+ Dataset.new(@metadata[OT.featureDataset]).features(@compound.uri)
+ end
+
+ def database_activity(compound_uri)
+ prediction = OpenTox::Dataset.new
+ # find database activities
+ if @activities[compound_uri]
+ @activities[compound_uri].each { |act| prediction.add compound_uri, @metadata[OT.dependentVariables], act }
+ prediction.add_metadata(OT.hasSource => @metadata[OT.trainingDataset])
+ prediction
+ else
+ nil
+ end
end
def save
- @features.uniq!
- resource = RestClient::Resource.new(CONFIG[:services]["opentox-model"])
- resource.post(self.to_yaml, :content_type => "application/x-yaml").chomp.to_s
+ RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml)
end
- def self.find_all
- RestClientWrapper.get(CONFIG[:services]["opentox-model"]).chomp.split("\n")
+ def self.all
+ RestClientWrapper.get(CONFIG[:services]["opentox-model"]).to_s.split("\n")
end
- def self.predict(compound_uri,model_uri)
- #RestClientWrapper.post(model_uri,{:compound_uri => compound_uri, :accept => 'application/x-yaml'})
- `curl -X POST -d 'compound_uri=#{compound_uri}' -H 'Accept:application/x-yaml' #{model_uri}`
+ def delete
+ RestClientWrapper.delete @uri unless @uri == CONFIG[:services]["opentox-model"]
end
+
end
end
end
diff --git a/lib/opentox-ruby-api-wrapper.rb b/lib/opentox-ruby-api-wrapper.rb
index 2749899..9dc1372 100644
--- a/lib/opentox-ruby-api-wrapper.rb
+++ b/lib/opentox-ruby-api-wrapper.rb
@@ -8,6 +8,6 @@ rescue LoadError
puts "Please install Openbabel with 'rake openbabel:install' in the compound component"
end
-['opentox', 'compound','dataset', 'parser','serializer', 'algorithm','model','task','validation','utils','feature', 'ot-logger', 'overwrite', 'rest_client_wrapper'].each do |lib|
+['opentox', 'compound','dataset', 'parser','serializer', 'algorithm','model','task','validation','feature', 'ot-logger', 'overwrite', 'rest_client_wrapper'].each do |lib|
require lib
end
diff --git a/lib/opentox.rb b/lib/opentox.rb
index 453ca66..7e1deec 100644
--- a/lib/opentox.rb
+++ b/lib/opentox.rb
@@ -1,79 +1,49 @@
module OpenTox
- # Generic OpenTox class
- module OtObject
-
- attr_reader :uri
- attr_accessor :metadata
-
- # Initialize OpenTox object with optional uri
- def initialize(uri=nil)
- @metadata = {}
- self.uri = uri if uri
- end
-
- # Set URI
- def uri=(uri)
- @uri = uri
- @metadata[XSD.anyUri] = uri
- end
-
- # Get title
- def title
- load_metadata unless @metadata[DC.title]
- @metadata[DC.title]
- end
-
- # Set title
- def title=(title)
- @metadata[DC.title] = title
- end
-
- # Get all objects from a service
- def self.all(uri)
- #def OtObject.all(uri)
- RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.split(/\n/)
- end
-
- # Load metadata from URI
- def load_metadata
- #if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host))
- # TODO: fix metadata retrie
- #@metadata = YAML.load(RestClientWrapper.get(@uri, :accept => "application/x-yaml"))
- #else
- @metadata = Parser::Owl::Generic.new(@uri).metadata
- #end
- @metadata
- #Parser::Owl::Generic.new(@uri).metadata
- end
-
+ attr_reader :uri
+ attr_accessor :metadata, :parameters
+
+ # Initialize OpenTox object with optional uri
+ # @param [optional, String] URI
+ def initialize(uri=nil)
+ @metadata = {}
+ self.uri = uri if uri
end
- module Owl
-
- class Namespace
-
- def initialize(uri)
- @uri = uri
- end
+ # Set URI
+ # @param [String] URI
+ def uri=(uri)
+ @uri = uri
+ @metadata[XSD.anyURI] = uri
+ end
- def [](property)
- @uri+property.to_s
- end
+ # Get all objects from a service
+ # @return [Array] List of available URIs
+ def self.all(uri)
+ RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.split(/\n/)
+ end
- def method_missing(property)
- @uri+property.to_s
- end
+ # Load (and return) metadata from object URI
+ # @return [Hash] Metadata
+ def load_metadata
+ @metadata = Parser::Owl::Generic.new(@uri).metadata
+ @metadata
+ end
- end
+ # Load parameters from URI
+ #def load_parameters
+ #@parameters = Parser::Owl::Generic.new(@uri).parameters
+ #@parameters
+ #end
+
+ # Get OWL-DL representation in RDF/XML format
+ # @return [application/rdf+xml] RDF/XML representation
+ def to_rdfxml
+ s = Serializer::Owl.new
+ s.add_metadata(@uri,@metadata)
+ #s.add_parameters(@uri,@parameters) if @parameters
+ s.to_rdfxml
end
end
-#
-# OWL Namespaces
-RDF = OpenTox::Owl::Namespace.new 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
-OWL = OpenTox::Owl::Namespace.new 'http://www.w3.org/2002/07/owl#'
-DC = OpenTox::Owl::Namespace.new 'http://purl.org/dc/elements/1.1/'
-OT = OpenTox::Owl::Namespace.new 'http://www.opentox.org/api/1.1#'
-XSD = OpenTox::Owl::Namespace.new 'http://www.w3.org/2001/XMLSchema#'
diff --git a/lib/overwrite.rb b/lib/overwrite.rb
index 1d0161b..2e4c396 100644
--- a/lib/overwrite.rb
+++ b/lib/overwrite.rb
@@ -12,3 +12,25 @@ class Sinatra::Base
end
end
+class String
+ def task_uri?
+ self.uri? && !self.match(/task/).nil?
+ end
+
+ def dataset_uri?
+ self.uri? && !self.match(/dataset/).nil?
+ end
+
+ def self.model_uri?
+ self.uri? && !self.match(/model/).nil?
+ end
+
+ def uri?
+ begin
+ u = URI::parse(self)
+ return (u.scheme!=nil and u.host!=nil)
+ rescue URI::InvalidURIError
+ return false
+ end
+ end
+end
diff --git a/lib/parser.rb b/lib/parser.rb
index e623bf5..8c173f9 100644
--- a/lib/parser.rb
+++ b/lib/parser.rb
@@ -1,5 +1,14 @@
require 'spreadsheet'
require 'roo'
+
+class String
+
+ def to_triple
+ self.chomp.split(' ',3).collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')}
+ end
+
+end
+
module OpenTox
module Parser
@@ -12,19 +21,28 @@ module OpenTox
end
def metadata
- # TODO: load parameters
+
if @dataset
uri = File.join(@uri,"metadata")
else
uri = @uri
end
+
statements = []
- `rapper -i rdfxml -o ntriples #{uri}`.each_line do |line|
- triple = line.chomp.split('> ')
- statements << triple.collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')}
- end
- statements.each do |triple|
+ parameter_ids = []
+ `rapper -i rdfxml -o ntriples #{uri} 2>/dev/null`.each_line do |line|
+ triple = line.to_triple
@metadata[triple[1]] = triple[2].split('^^').first if triple[0] == @uri and triple[1] != RDF['type']
+ statements << triple
+ parameter_ids << triple[2] if triple[1] == OT.parameters
+ end
+ unless parameter_ids.empty?
+ @metadata[OT.parameters] = []
+ parameter_ids.each do |p|
+ parameter = {}
+ statements.each{ |t| parameter[t[1]] = t[2] if t[0] == p and t[1] != RDF['type']}
+ @metadata[OT.parameters] << parameter
+ end
end
@metadata
end
@@ -37,6 +55,8 @@ module OpenTox
include Owl
+ attr_writer :uri
+
def initialize(uri)
super uri
@dataset = ::OpenTox::Dataset.new(@uri)
@@ -47,11 +67,10 @@ module OpenTox
feature_values = {}
feature = {}
other_statements = {}
- ntriples = `rapper -i rdfxml -o ntriples #{@uri}`
- ntriples.each_line do |line|
+ `rapper -i rdfxml -o ntriples #{@uri} 2>/dev/null`.each_line do |line|
triple = line.chomp.split(' ',3)
triple = triple[0..2].collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')}
- case triple[1] # Ambit namespaces are case insensitive
+ case triple[1]
when /#{OT.values}/i
data[triple[0]] = {:compound => "", :values => []} unless data[triple[0]]
data[triple[0]][:values] << triple[2]
@@ -77,76 +96,84 @@ module OpenTox
end
def load_features
- @dataset.features.keys.each do |feature|
- @dataset.features[feature] = Parser::Owl::Generic.new(feature).metadata
+ uri = File.join(@uri,"features")
+ statements = []
+ features = Set.new
+ `rapper -i rdfxml -o ntriples #{uri} 2>/dev/null`.each_line do |line|
+ triple = line.chomp.split('> ').collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')}[0..2]
+ statements << triple
+ features << triple[0] if triple[1] == RDF['type'] and triple[2] == OT.Feature
+ end
+ statements.each do |triple|
+ if features.include? triple[0]
+ @dataset.features[triple[0]] = {} unless @dataset.features[triple[0]]
+ @dataset.features[triple[0]][triple[1]] = triple[2].split('^^').first
+ end
end
+ @dataset.features
end
+
end
end
- class Spreadsheet
+ class Spreadsheets
+ # TODO: expand for multiple columns
+
+ attr_accessor :dataset
+ def initialize
+
+ # TODO: fix 2 datasets created
+ #@dataset = Dataset.create
+ #@dataset.save # get uri
+
+ @data = []
+ @features = []
+ @feature_types = {}
- def initialize(dataset)
- @dataset = dataset
@format_errors = ""
@smiles_errors = []
@activity_errors = []
@duplicates = {}
- @nr_compounds = 0
- @data = []
- @activities = []
- @type = "classification"
end
def load_excel(book)
book.default_sheet = 0
- 1.upto(book.last_row) do |row|
- if row == 1
- @feature = File.join(@dataset.uri,"feature",book.cell(row,2))
- else
- add( book.cell(row,1), book.cell(row,2), row ) # smiles, activity
- end
- end
- parse
+ add_features book.row(1)
+ 2.upto(book.last_row) { |i| add_values book.row(i) }
+ warnings
+ @dataset
end
def load_csv(csv)
row = 0
- csv.each_line do |line|
- row += 1
- raise "Invalid CSV format at line #{row}: #{line.chomp}" unless line.chomp.match(/^.+[,;].*$/) # check CSV format
- items = line.chomp.gsub(/["']/,'').split(/\s*[,;]\s*/) # remove quotes
- if row == 1
- @feature = File.join(@dataset.uri,"feature",items[1])
- else
- add(items[0], items[1], row)
- end
- end
- parse
+ input = csv.split("\n")
+ add_features split_row(input.shift)
+ input.each { |row| add_values split_row(row) }
+ warnings
+ @dataset
end
- def parse
+ private
- # create dataset
- @data.each do |items|
- case @type
- when "classification"
- case items[1].to_s
- when TRUE_REGEXP
- @dataset.add(items[0], @feature, true )
- when FALSE_REGEXP
- @dataset.add(items[0], @feature, false)
- end
- when "regression"
- if items[1].to_f == 0
- @activity_errors << "Row #{items[2]}: Zero values not allowed for regression datasets - entry ignored."
- else
- @dataset.add items[0], @feature, items[1].to_f
- end
+ def warnings
+
+ info = ''
+ @feature_types.each do |feature,types|
+ if types.uniq.size > 1
+ type = OT.NumericFeature
+ else
+ type = types.first
end
+ @dataset.add_feature_metadata(feature,{OT.isA => type})
+ info += "\"#{@dataset.feature_name(feature)}\" detected as #{type.split('#').last}."
+
+ # TODO: rewrite feature values
+ # TODO if value.to_f == 0 @activity_errors << "#{smiles} Zero values not allowed for regression datasets - entry ignored."
end
+ @dataset.metadata[OT.Info] = info
+
warnings = ''
warnings += "<p>Incorrect Smiles structures (ignored):</p>" + @smiles_errors.join("<br/>") unless @smiles_errors.empty?
warnings += "<p>Irregular activities (ignored):</p>" + @activity_errors.join("<br/>") unless @activity_errors.empty?
@@ -156,34 +183,75 @@ module OpenTox
@dataset.metadata[OT.Warnings] = warnings
- @dataset
+ end
+ def add_features(row)
+ row.shift # get rid of smiles entry
+ row.each do |feature_name|
+ feature_uri = File.join(@dataset.uri,"feature",URI.encode(feature_name))
+ @feature_types[feature_uri] = []
+ @features << feature_uri
+ @dataset.add_feature(feature_uri,{DC.title => feature_name})
+ end
end
- def add(smiles, act, row)
+ def add_values(row)
+
+ smiles = row.shift
compound = Compound.from_smiles(smiles)
if compound.nil? or compound.inchi.nil? or compound.inchi == ""
- @smiles_errors << "Row #{row}: " + [smiles,act].join(", ")
- return false
- end
- unless numeric?(act) or classification?(act)
- @activity_errors << "Row #{row}: " + [smiles,act].join(", ")
+ @smiles_errors << smiles+", "+row.join(", ")
return false
end
@duplicates[compound.inchi] = [] unless @duplicates[compound.inchi]
- @duplicates[compound.inchi] << "Row #{row}: " + [smiles, act].join(", ")
- @type = "regression" unless classification?(act)
- # TODO: set OT.NumericalFeature, ...
- @nr_compounds += 1
- @data << [ compound.uri, act , row ]
+ @duplicates[compound.inchi] << smiles+", "+row.join(", ")
+
+ row.each_index do |i|
+ value = row[i]
+ feature = @features[i]
+ type = feature_type(value)
+
+ @feature_types[feature] << type
+
+ case type
+ when OT.NominalFeature
+ case value.to_s
+ when TRUE_REGEXP
+ @dataset.add(compound.uri, feature, true )
+ when FALSE_REGEXP
+ @dataset.add(compound.uri, feature, false )
+ end
+ when OT.NumericFeature
+ @dataset.add compound.uri, feature, value.to_f
+ when OT.StringFeature
+ # TODO: insert ??
+ @dataset.add compound.uri, feature, value.to_s
+ @activity_errors << smiles+", "+row.join(", ")
+ #return false
+ end
+ end
+ end
+
+ def numeric?(value)
+ true if Float(value) rescue false
end
- def numeric?(object)
- true if Float(object) rescue false
+ def classification?(value)
+ !value.to_s.strip.match(TRUE_REGEXP).nil? or !value.to_s.strip.match(FALSE_REGEXP).nil?
+ end
+
+ def feature_type(value)
+ if classification? value
+ return OT.NominalFeature
+ elsif numeric? value
+ return OT.NumericFeature
+ else
+ return OT.StringFeature
+ end
end
- def classification?(object)
- !object.to_s.strip.match(TRUE_REGEXP).nil? or !object.to_s.strip.match(FALSE_REGEXP).nil?
+ def split_row(row)
+ row.chomp.gsub(/["']/,'').split(/\s*[,;]\s*/) # remove quotes
end
end
diff --git a/lib/rest_client_wrapper.rb b/lib/rest_client_wrapper.rb
index 82836d9..49549b5 100644
--- a/lib/rest_client_wrapper.rb
+++ b/lib/rest_client_wrapper.rb
@@ -1,5 +1,3 @@
-
-
module OpenTox
#PENDING: implement ot error api, move to own file
@@ -60,7 +58,7 @@ module OpenTox
def self.execute( rest_call, uri, headers, payload=nil, wait=true )
do_halt 400,"uri is null",uri,headers,payload unless uri
- do_halt 400,"not a uri",uri,headers,payload unless Utils.is_uri?(uri)
+ do_halt 400,"not a uri",uri,headers,payload unless uri.to_s.uri?
do_halt 400,"headers are no hash",uri,headers,payload unless headers==nil or headers.is_a?(Hash)
do_halt 400,"nil headers for post not allowed, use {}",uri,headers,payload if rest_call=="post" and headers==nil
headers.each{ |k,v| headers.delete(k) if v==nil } if headers #remove keys with empty values, as this can cause problems
@@ -115,7 +113,7 @@ module OpenTox
when /text\//
raise "uri list has more than one entry, should be a task" if res.content_type=~/text\/uri-list/ and
res.split("\n").size > 1 #if uri list contains more then one uri, its not a task
- task = OpenTox::Task.find(res.to_s) if Utils.task_uri?(res)
+ task = OpenTox::Task.find(res.to_s) if res.to_s.uri?
else
raise "unknown content-type for task: '"+res.content_type.to_s+"'" #+"' content: "+res[0..200].to_s
end
diff --git a/lib/serializer.rb b/lib/serializer.rb
index 3def252..3a9cb60 100644
--- a/lib/serializer.rb
+++ b/lib/serializer.rb
@@ -30,7 +30,6 @@ module OpenTox
OT.dataEntry => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
OT.acceptValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
OT.values => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
- #XSD.anyUri => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
OT.algorithm => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
OT.parameters => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } ,
@@ -38,14 +37,15 @@ module OpenTox
DC.identifier => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
DC.contributor => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
DC.creator => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
+ DC.description => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
OT.isA => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
+ OT.Warnings => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
+ XSD.anyURI => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } ,
OT.hasSource => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
OT.value => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
OT.paramScope => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } ,
-
- #Untyped Individual: http://localhost/algorithm
}
@data_entries = {}
@@ -61,15 +61,10 @@ module OpenTox
end
def add_compound(uri)
- #@classes << OT.Compound unless @classes.include? OT.Compound
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Compound }] }
end
def add_feature(uri,metadata)
- #@classes << OT.Feature unless @classes.include? OT.Feature
- #@classes << OT.NominalFeature unless @classes.include? OT.NominalFeature
- #@classes << OT.NumericFeature unless @classes.include? OT.NumericFeature
- #@classes << OT.StringFeature unless @classes.include? OT.StringFeature
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }] }
add_metadata uri, metadata
end
@@ -94,32 +89,37 @@ module OpenTox
end
- def add_algorithm(uri,metadata,parameters)
+ def add_algorithm(uri,metadata)
@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] }
+ LOGGER.debug @object[uri]
add_metadata uri, metadata
- add_parameters uri, parameters
- #metadata.each { |u,v| @object[uri][u] = [{"type" => type(v), "value" => v }] }
+ LOGGER.debug @object[uri]
end
- def add_model(uri,metadata)
+ def add_model(uri,metadata,parameters)
+ @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Model }] }
+ add_metadata uri, metadata
+ add_parameters uri, parameters
end
def add_metadata(uri,metadata)
- #@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT[type] }] }
+ id = 0
metadata.each do |u,v|
- @object[uri][u] = [{"type" => type(v), "value" => v }]
- end
- end
-
- def add_parameters(uri,parameters)
- #@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT[type] }] }
- @object[uri][OT.parameters] = [] unless @object[uri][OT.parameters]
- parameters.each do |p|
- parameter = "_:parameter#{@parameter_id}"
- @parameter_id += 1
- @object[uri][OT.parameters] << {"type" => "bnode", "value" => parameter}
- @object[parameter] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Parameter }] }
- add_metadata parameter, p
+ if v.is_a? String
+ @object[uri] = {} unless @object[uri]
+ @object[uri][u] = [{"type" => type(v), "value" => v }]
+ elsif v.is_a? Array and u == OT.parameters
+ @object[uri][u] = [] unless @object[uri][u]
+ v.each do |value|
+ id+=1
+ genid = "_:genid#{id}"
+ @object[uri][u] << {"type" => "bnode", "value" => genid}
+ @object[genid] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Parameter}] }
+ value.each do |name,entry|
+ @object[genid][name] = [{"type" => type(entry), "value" => entry }]
+ end
+ end
+ end
end
end
@@ -158,10 +158,11 @@ module OpenTox
# Serializers
- def ntriples
+ def to_ntriples
#rdf_types
@triples = Set.new
+ #LOGGER.debug @object.to_yaml
@object.each do |s,entry|
s = url(s) if type(s) == "uri"
entry.each do |p,objects|
@@ -182,12 +183,12 @@ module OpenTox
@triples.sort.collect{ |s| s.join(' ').concat(" .") }.join("\n")+"\n"
end
- def rdfxml
- Tempfile.open("owl-serializer"){|f| f.write(ntriples); @path = f.path}
- `rapper -i ntriples -o rdfxml #{@path}`
+ def to_rdfxml
+ Tempfile.open("owl-serializer"){|f| f.write(self.to_ntriples); @path = f.path}
+ `rapper -i ntriples -o rdfxml #{@path} 2>/dev/null`
end
- def json
+ def to_json
#rdf_types
Yajl::Encoder.encode(@object)
end
@@ -258,7 +259,7 @@ module OpenTox
@rows.first << features
@rows.first.flatten!
dataset.data_entries.each do |compound,entries|
- smiles = Compound.new(compound).smiles
+ smiles = Compound.new(compound).to_smiles
row = Array.new(@rows.first.size)
row[0] = smiles
entries.each do |feature, values|
@@ -271,11 +272,11 @@ module OpenTox
end
end
- def csv
+ def to_csv
@rows.collect{|r| r.join(", ")}.join("\n")
end
- def excel
+ def to_xls
Spreadsheet.client_encoding = 'UTF-8'
book = Spreadsheet::Workbook.new
sheet = book.create_worksheet(:name => '')
diff --git a/lib/task.rb b/lib/task.rb
index 50f0347..96ee719 100644
--- a/lib/task.rb
+++ b/lib/task.rb
@@ -3,6 +3,7 @@ $self_task=nil
module OpenTox
class Task
+ attr_accessor :uri, :date, :title, :creator, :description, :hasStatus, :percentageCompleted, :resultURI, :due_to_time, :http_code
# due_to_time is only set in local tasks
TASK_ATTRIBS = [ :uri, :date, :title, :creator, :description, :hasStatus, :percentageCompleted, :resultURI, :due_to_time ]
@@ -124,14 +125,14 @@ module OpenTox
def check_state
begin
raise "illegal task state, task is completed, resultURI is no URI: '"+@resultURI.to_s+
- "'" unless @resultURI and Utils.is_uri?(@resultURI) if completed?
+ "'" unless @resultURI and @resultURI.to_s.uri? if completed?
if @http_code == 202
raise "illegal task state, code is 202, but hasStatus is not Running: '"+@hasStatus+"'" unless running?
elsif @http_code == 201
raise "illegal task state, code is 201, but hasStatus is not Completed: '"+@hasStatus+"'" unless completed?
raise "illegal task state, code is 201, resultURI is no task-URI: '"+@resultURI.to_s+
- "'" unless @resultURI and Utils.task_uri?(@resultURI)
+ "'" unless @resultURI and @resultURI.to_s.uri?
end
rescue => ex
RestClientWrapper.raise_uri_error(ex.message, @uri)
@@ -171,6 +172,7 @@ module OpenTox
LOGGER.debug "Started task: "+task.uri.to_s
task.uri
end
+
end
end
diff --git a/lib/utils.rb b/lib/utils.rb
deleted file mode 100644
index a0e0cbe..0000000
--- a/lib/utils.rb
+++ /dev/null
@@ -1,50 +0,0 @@
-module OpenTox
- module Utils
- # gauss kernel
- def self.gauss(sim, sigma = 0.3)
- x = 1.0 - sim
- Math.exp(-(x*x)/(2*sigma*sigma))
- end
-
- def self.task_uri?(uri)
- is_uri?(uri) && uri.to_s =~ /task/
- end
-
- def self.dataset_uri?(uri)
- is_uri?(uri) && uri.to_s =~ /dataset/
- end
-
- def self.model_uri?(uri)
- is_uri?(uri) && uri.to_s =~ /model/
- end
-
-
- def self.is_uri?(uri)
- return false if uri==nil || uri.to_s.size==0
- begin
- u = URI::parse(uri)
- return (u.scheme!=nil and u.host!=nil)
- rescue URI::InvalidURIError
- return false
- end
- end
-
- def self.median(array)
- return nil if array.empty?
- array.sort!
- m_pos = array.size / 2
- return array.size % 2 == 1 ? array[m_pos] : (array[m_pos-1] + array[m_pos])/2
- end
-
- end
-
-# ['rubygems', 'rest_client'].each do |r|
-# require r
-# end
-# ["bla", "google.de", "http://google.de"].each do |u|
-# puts u+"? "+Utils.is_uri?(u).to_s
-# end
-
-
-end
-