From b93002b4ea50ff7e357da08abd10577347ce2d5f Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Thu, 11 Nov 2010 09:31:27 +0100 Subject: first steps towards version 2.0, yard documentation started, passes compound, dataset, feature, algorithm, fminer tests --- Rakefile | 8 +- lib/algorithm.rb | 127 +++++++---- lib/compound.rb | 105 +++++---- lib/dataset.rb | 482 ++++++++++++--------------------------- lib/environment.rb | 29 +++ lib/feature.rb | 4 +- lib/model.rb | 485 +++++++++++++++++++++++++++++++--------- lib/opentox-ruby-api-wrapper.rb | 2 +- lib/opentox.rb | 106 ++++----- lib/overwrite.rb | 22 ++ lib/parser.rb | 208 +++++++++++------ lib/rest_client_wrapper.rb | 6 +- lib/serializer.rb | 69 +++--- lib/task.rb | 6 +- lib/utils.rb | 50 ----- 15 files changed, 944 insertions(+), 765 deletions(-) delete mode 100644 lib/utils.rb diff --git a/Rakefile b/Rakefile index 8dd1088..18f24bd 100644 --- a/Rakefile +++ b/Rakefile @@ -21,14 +21,14 @@ begin "rack-flash", "nokogiri", "rubyzip", - "builder", + #"builder", "roo", "spreadsheet", "google-spreadsheet-ruby", "tmail", "rinruby", - "rdf", - "rdf-raptor", + #"rdf", + #"rdf-raptor", "rjb" ].each { |dep| gem.add_dependency dep } [ "dm-core", @@ -42,7 +42,7 @@ begin gem.add_dependency "haml", ">=3" ['cucumber','jeweler'].each { |dep| gem.add_development_dependency dep } gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore'] - gem.files.include %w(lib/tasks/owl.rb, lib/environment.rb, lib/algorithm.rb, lib/compound.rb, lib/dataset.rb, lib/model.rb, lib/utils.rb, lib/validation.rb, lib/templates/*) + gem.files.include %w(lib/tasks/owl.rb, lib/environment.rb, lib/algorithm.rb, lib/compound.rb, lib/dataset.rb, lib/model.rb, lib/validation.rb, lib/templates/*) # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings end Jeweler::GemcutterTasks.new diff --git a/lib/algorithm.rb b/lib/algorithm.rb index e1d369a..711f63b 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -1,77 +1,122 @@ module OpenTox + # Wrapper for OpenTox Algorithms module Algorithm - include OtObject + include OpenTox + # Execute algorithm with parameters, please consult the OpenTox API and the webservice documentation for acceptable parameters + def run(params=nil) + RestClientWrapper.post(@uri, params) + end + + # Get OWL-DL representation in RDF/XML format + # @return [application/rdf+xml] RDF/XML representation + def to_rdfxml + s = Serializer::Owl.new + s.add_algorithm(@uri,@metadata) + s.to_rdfxml + end + + # Generic Algorithm class, should work with all OpenTox webservices class Generic include Algorithm - #include OtObject - protected -# def initialize(owl) -# @title = owl.get("title") -# @date = owl.get("date") -# @uri = owl.uri -# end - end - class Fminer < Generic + module Fminer + include Algorithm - def self.create_feature_dataset(params) - LOGGER.debug File.basename(__FILE__) + ": creating feature dataset" - resource = RestClient::Resource.new(params[:feature_generation_uri]) - resource.post :dataset_uri => params[:dataset_uri], :feature_uri => params[:feature_uri] + class BBRC + include Fminer + # Initialize bbrc algorithm + def initialize + super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/bbrc") + load_metadata + end + end + + class LAST + include Fminer + # Initialize last algorithm + def initialize + super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/last") + load_metadata + end end - def self.uri - File.join(CONFIG[:services]["opentox-algorithm"], "fminer") - end end - class Lazar - - def self.create_model(params) - LOGGER.debug params - LOGGER.debug File.basename(__FILE__) + ": creating model" - LOGGER.debug File.join(CONFIG[:services]["opentox-algorithm"], "lazar") - resource = RestClient::Resource.new(File.join(CONFIG[:services]["opentox-algorithm"], "lazar"), :content_type => "application/x-yaml") - @uri = resource.post(:dataset_uri => params[:dataset_uri], :prediction_feature => params[:prediction_feature], :feature_generation_uri => File.join(CONFIG[:services]["opentox-algorithm"], "fminer")).body.chomp - end + # Create lazar prediction model + class Lazar + include Algorithm + # Initialize lazar algorithm + def initialize + super File.join(CONFIG[:services]["opentox-algorithm"], "lazar") + load_metadata + end + end - def self.uri - File.join(CONFIG[:services]["opentox-algorithm"], "lazar") - end + # Utility methods without dedicated webservices - end + module Similarity + include Algorithm - class Similarity - def self.weighted_tanimoto(fp_a,fp_b,p) - common_features = fp_a & fp_b - all_features = (fp_a + fp_b).uniq + # Tanimoto similarity + # + # @param [Array] features_a Features of first compound + # @param [Array] features_b Features of second compound + # @param [optional, Hash] weights Weights for all features + # @return [Float] (Wighted) tanimoto similarity + def self.tanimoto(features_a,features_b,weights=nil) + common_features = features_a & features_b + all_features = (features_a + features_b).uniq common_p_sum = 0.0 if common_features.size > 0 - common_features.each{|f| common_p_sum += OpenTox::Utils.gauss(p[f])} - all_p_sum = 0.0 - all_features.each{|f| all_p_sum += OpenTox::Utils.gauss(p[f])} - common_p_sum/all_p_sum + if weights + common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])} + all_p_sum = 0.0 + all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])} + common_p_sum/all_p_sum + else + common_features.to_f/all_features + end else 0.0 end end - def self.euclidean(prop_a,prop_b) + + # Euclidean similarity + def self.euclidean(prop_a,prop_b,weights=nil) common_properties = prop_a.keys & prop_b.keys if common_properties.size > 1 dist_sum = 0 common_properties.each do |p| - dist_sum += (prop_a[p] - prop_b[p])**2 + if weights + dist_sum += ( (prop_a[p] - prop_b[p]) * Algorithm.gauss(weights[p]) )**2 + else + dist_sum += (prop_a[p] - prop_b[p])**2 + end end 1/(1+Math.sqrt(dist_sum)) else - nil + 0.0 end end end + + # Gauss kernel + def self.gauss(sim, sigma = 0.3) + x = 1.0 - sim + Math.exp(-(x*x)/(2*sigma*sigma)) + end + + # Median of an array + def self.median(array) + return nil if array.empty? + array.sort! + m_pos = array.size / 2 + return array.size % 2 == 1 ? array[m_pos] : (array[m_pos-1] + array[m_pos])/2 + end end end diff --git a/lib/compound.rb b/lib/compound.rb index 699e4c1..6834860 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -4,41 +4,15 @@ module OpenTox # Ruby wrapper for OpenTox Compound Webservices (http://opentox.org/dev/apis/api-1.2/structure). - # - # Examples: - # require "opentox-ruby-api-wrapper" - # - # # Creating compounds - # - # # from smiles string - # compound = OpenTox::Compound.from_smiles("c1ccccc1") - # # from name - # compound = OpenTox::Compound.from_name("Benzene") - # # from uri - # compound = OpenTox::Compound.new("http://webservices.in-silico.ch/compound/InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"") - # - # # Getting compound representations - # - # # get InChI - # inchi = compound.inchi - # # get all compound names - # names = compound.names - # # get png image - # image = compound.png - # # get uri - # uri = compound.uri - # - # # SMARTS matching - # - # # match a smarts string - # compound.match?("cN") # returns false - # # match an array of smarts strings - # compound.match(['cc','cN']) # returns ['cc'] class Compound attr_accessor :inchi, :uri # Create compound with optional uri + # @example + # compound = OpenTox::Compound.new("http://webservices.in-silico.ch/compound/InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"") + # @param [optional, String] uri Compound URI + # @return [OpenTox::Compound] Compound def initialize(uri=nil) @uri = uri case @uri @@ -50,6 +24,10 @@ module OpenTox end # Create a compound from smiles string + # @example + # compound = OpenTox::Compound.from_smiles("c1ccccc1") + # @param [String] smiles Smiles string + # @return [OpenTox::Compound] Compound def self.from_smiles(smiles) c = Compound.new c.inchi = Compound.smiles2inchi(smiles) @@ -58,6 +36,8 @@ module OpenTox end # Create a compound from inchi string + # @param [String] smiles InChI string + # @return [OpenTox::Compound] Compound def self.from_inchi(inchi) c = Compound.new c.inchi = inchi @@ -66,6 +46,8 @@ module OpenTox end # Create a compound from sdf string + # @param [String] smiles SDF string + # @return [OpenTox::Compound] Compound def self.from_sdf(sdf) c = Compound.new c.inchi = Compound.sdf2inchi(sdf) @@ -73,7 +55,11 @@ module OpenTox c end - # Create a compound from name (name can be also an InChI/InChiKey, CAS number, etc) + # Create a compound from name. Relies on an external service for name lookups. + # @example + # compound = OpenTox::Compound.from_name("Benzene") + # @param [String] name name can be also an InChI/InChiKey, CAS number, etc + # @return [OpenTox::Compound] Compound def self.from_name(name) c = Compound.new # paranoid URI encoding to keep SMILES charges and brackets @@ -83,32 +69,42 @@ module OpenTox end # Get (canonical) smiles - def smiles + # @return [String] Smiles string + def to_smiles Compound.obconversion(@inchi,'inchi','can') end # Get sdf - def sdf + # @return [String] SDF string + def to_sdf Compound.obconversion(@inchi,'inchi','sdf') end # Get gif image - def gif + # @return [image/gif] Image data + def to_gif RestClientWrapper.get("#{@@cactus_uri}#{@inchi}/image") end # Get png image - def png + # @example + # image = compound.to_png + # @return [image/png] Image data + def to_png RestClientWrapper.get(File.join @uri, "image") end # Get URI of compound image - def image_uri + # @return [String] Compound image URI + def to_image_uri File.join @uri, "image" end - # Get all known compound names - def names + # Get all known compound names. Relies on an external service for name lookups. + # @example + # names = compound.to_names + # @return [String] Compound names + def to_names begin RestClientWrapper.get("#{@@cactus_uri}#{@inchi}/names").split("\n") rescue @@ -117,6 +113,10 @@ module OpenTox end # Match a smarts string + # @example + # compound = OpenTox::Compound.from_name("Benzene") + # compound.match?("cN") # returns false + # @param [String] smarts Smarts string def match?(smarts) obconversion = OpenBabel::OBConversion.new obmol = OpenBabel::OBMol.new @@ -128,19 +128,34 @@ module OpenTox end # Match an array of smarts strings, returns array with matching smarts + # @example + # compound = OpenTox::Compound.from_name("Benzene") + # compound.match(['cc','cN']) # returns ['cc'] + # @param [Array] smarts_array Array with Smarts strings + # @return [Array] Array with matching Smarts strings def match(smarts_array) - smarts_array.collect{|s| s if match?(s)}.compact + # avoid recreation of OpenBabel objects + obconversion = OpenBabel::OBConversion.new + obmol = OpenBabel::OBMol.new + obconversion.set_in_format('inchi') + obconversion.read_string(obmol,@inchi) + smarts_pattern = OpenBabel::OBSmartsPattern.new + smarts_array.collect do |smarts| + smarts_pattern.init(smarts) + smarts if smarts_pattern.match(obmol) + end.compact + #smarts_array.collect { |s| s if match?(s)}.compact end # Get URI of compound image with highlighted fragments - def matching_smarts_image_uri(activating, deactivating, highlight = nil) + # + # @param [Array] activating Array with activating Smarts strings + # @param [Array] deactivating Array with deactivating Smarts strings + # @return [String] URI for compound image with highlighted fragments + def matching_smarts_image_uri(activating, deactivating) activating_smarts = URI.encode "\"#{activating.join("\"/\"")}\"" deactivating_smarts = URI.encode "\"#{deactivating.join("\"/\"")}\"" - if highlight.nil? - File.join CONFIG[:services]["opentox-compound"], "smiles", URI.encode(smiles), "smarts/activating", URI.encode(activating_smarts),"deactivating", URI.encode(deactivating_smarts) - else - File.join CONFIG[:services]["opentox-compound"], "smiles", URI.encode(smiles), "smarts/activating", URI.encode(activating_smarts),"deactivating", URI.encode(deactivating_smarts), "highlight", URI.encode(highlight) - end + File.join @uri, "smarts/activating", URI.encode(activating_smarts),"deactivating", URI.encode(deactivating_smarts) end diff --git a/lib/dataset.rb b/lib/dataset.rb index 7c8ce24..05b2ed3 100644 --- a/lib/dataset.rb +++ b/lib/dataset.rb @@ -1,74 +1,19 @@ module OpenTox # Ruby wrapper for OpenTox Dataset Webservices (http://opentox.org/dev/apis/api-1.2/dataset). - # - # Examples: - # require "opentox-ruby-api-wrapper" - # - # # Creating datasets - # - # # create an empty dataset - # dataset = OpenTox::Dataset.new - # # create an empty dataset with URI - # # this does not load data from the dataset service - use one of the load_* methods - # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1") - # # create new dataset and sav it to obtain a URI - # dataset = OpenTox::Dataset.create - # # create a new dataset from yaml representation - # dataset = OpenTox::Dataset.from_yaml - # # create a new dataset from CSV string - # csv_string = "SMILES, Toxicity\nc1ccccc1N, true" - # dataset = OpenTox::Dataset.from_csv(csv_string) - # - # # Loading data - # # Datasets created with OpenTox::Dataset.new(uri) are empty by default - # # Invoking one of the following functions will load data into the object - # - # # create an empty dataset with URI - # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1") - # # loads (and returns) only metadata - # dataset.load_metadata - # # loads (and returns) only compounds - # dataset.load_compounds - # # loads (and returns) only features - # dataset.load_features - # # load all data from URI - # dataset.load_all - # - # # Getting dataset representations - # - # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1") - # dataset.load_all - # # OWL-DL (RDF/XML) - # dataset.rdfxml - # # OWL-DL (Ntriples) - # dataset.ntriples - # # YAML - # dataset.yaml - # # CSV - # dataset.csv - # - # # Modifying datasets - # - # # insert a statement (compound_uri,feature_uri,value) - # dataset.add "http://webservices.in-silico.ch/compound/InChI=1S/C6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9", "http://webservices.in-silico.ch/dataset/1/feature/hamster_carcinogenicity", true - # - # - # # Saving datasets - # # save dataset at dataset service - # dataset.save - # - # # Deleting datasets - # # delete dataset (also at dataset service) - # dataset.delete class Dataset - include OtObject + include OpenTox attr_reader :features, :compounds, :data_entries, :metadata - attr_writer :metadata - # Create dataset with optional URI + # Create dataset with optional URI. Does not load data into the dataset - you will need to execute one of the load_* methods to pull data from a service or to insert it from other representations. + # @example Create an empty dataset + # dataset = OpenTox::Dataset.new + # @example Create an empty dataset with URI + # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1") + # @param [optional, String] uri Dataset URI + # @return [OpenTox::Dataset] Dataset object def initialize(uri=nil) super uri @features = {} @@ -76,52 +21,79 @@ module OpenTox @data_entries = {} end - # Create and save an empty dataset (assigns URI to dataset) + # Create an empty dataset and save it at the dataset service (assigns URI to dataset) + # @example Create new dataset and save it to obtain a URI + # dataset = OpenTox::Dataset.create + # @param [optional, String] uri Dataset URI + # @return [OpenTox::Dataset] Dataset object def self.create(uri=CONFIG[:services]["opentox-dataset"]) dataset = Dataset.new - dataset.uri = RestClientWrapper.post(uri,{}).to_s.chomp + dataset.save + dataset + end + + # Find a dataset and load all data. This can be time consuming, use Dataset.new together with one of the load_* methods for a fine grained control over data loading. + # @param [String] uri Dataset URI + # @return [OpenTox::Dataset] Dataset object with all data + def self.find(uri) + dataset = Dataset.new(uri) + dataset.load_all dataset end # Get all datasets from a service -# def self.all(uri=CONFIG[:services]["opentox-dataset"]) -# RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.each_line.collect{|u| Dataset.new(u)} -# end + # @param [optional,String] uri URI of the dataset service, defaults to service specified in configuration + # @return [Array] Array of dataset object with all data + def self.all(uri=CONFIG[:services]["opentox-dataset"]) + RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.each_line.collect{|u| Dataset.new(u)} + end - # Create a dataset from YAML string - def self.from_yaml(yaml) - dataset = Dataset.create - dataset.copy YAML.load(yaml) - dataset + # Load YAML representation into the dataset + # @param [String] yaml YAML representation of the dataset + # @return [OpenTox::Dataset] Dataset object with YAML data + def load_yaml(yaml) + copy YAML.load(yaml) + end + + # Load RDF/XML representation from a file + # @param [String] file File with RDF/XML representation of the dataset + # @return [OpenTox::Dataset] Dataset object with RDF/XML data + def load_rdfxml_file(file) + parser = Parser::Owl::Dataset.new @uri + parser.uri = file.path + copy parser.load_uri end - # Create dataset from CSV string (format specification: http://toxcreate.org/help) + # Load CSV string (format specification: http://toxcreate.org/help) # - loads data_entries, compounds, features # - sets metadata (warnings) for parser errors # - you will have to set remaining metadata manually - def self.from_csv(csv) - dataset = Dataset.create - Parser::Spreadsheet.new(dataset).load_csv(csv) - dataset + # @param [String] csv CSV representation of the dataset + # @return [OpenTox::Dataset] Dataset object with CSV data + def load_csv(csv) + save unless @uri # get a uri for creating features + parser = Parser::Spreadsheets.new + parser.dataset = self + parser.load_csv(csv) end - # Create dataset from Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help)) + # Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help)) # - loads data_entries, compounds, features # - sets metadata (warnings) for parser errors # - you will have to set remaining metadata manually - def self.from_spreadsheet(book) - dataset = Dataset.create - Parser::Spreadsheet.new(dataset).load_excel(book) - dataset + # @param [Excel] book Excel workbook object (created with roo gem) + # @return [OpenTox::Dataset] Dataset object with Excel data + def load_spreadsheet(book) + save unless @uri # get a uri for creating features + parser = Parser::Spreadsheets.new + parser.dataset = self + parser.load_excel(book) end - # Load and return metadata of a Dataset object + # Load and return only metadata of a Dataset object + # @return [Hash] Metadata of the dataset def load_metadata - #if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) - #add_metadata YAML.load(RestClientWrapper.get(File.join(@uri,"metadata"), :accept => "application/x-yaml")) - #else - add_metadata Parser::Owl::Dataset.new(@uri).metadata - #end + add_metadata Parser::Owl::Dataset.new(@uri).metadata self.uri = @uri if @uri # keep uri @metadata end @@ -136,7 +108,8 @@ module OpenTox end end - # Load and return all compound URIs + # Load and return only compound URIs from the dataset service + # @return [Array] Compound URIs in the dataset def load_compounds RestClientWrapper.get(File.join(uri,"compounds"),:accept=> "text/uri-list").to_s.each_line do |compound_uri| @compounds << compound_uri.chomp @@ -144,44 +117,75 @@ module OpenTox @compounds.uniq! end - # Load all feature URIs + # Load and return only features from the dataset service + # @return [Hash] Features of the dataset def load_features - RestClientWrapper.get(File.join(uri,"features"),:accept=> "text/uri-list").to_s.each_line do |feature_uri| - @features[feature_uri.chomp] = Feature.new(feature_uri.chomp).load_metadata - end + parser = Parser::Owl::Dataset.new(@uri) + @features = parser.load_features @features end - # Get YAML representation - def yaml - self.to_yaml + # Detect feature type(s) in the dataset + # @return [String] `classification", "regression", "mixed" or unknown` + def feature_type + feature_types = @features.collect{|f,metadata| metadata[OT.isA]}.uniq + LOGGER.debug "FEATURES" + LOGGER.debug feature_types.inspect + if feature_types.size > 1 + "mixed" + else + case feature_types.first + when /NominalFeature/ + "classification" + when /NumericFeature/ + "regression" + else + "unknown" + end + end end - # Get Excel representation, returns a Spreadsheet::Workbook which can be written with the 'spreadsheet' gem (data_entries only, metadata will ) - def excel - Serializer::Spreadsheets.new(self).excel + # Get Excel representation + # @return [Spreadsheet::Workbook] Workbook which can be written with the spreadsheet gem (data_entries only, metadata will will be discarded)) + def to_xls + Serializer::Spreadsheets.new(self).to_xls end # Get CSV string representation (data_entries only, metadata will be discarded) - def csv - Serializer::Spreadsheets.new(self).csv + # @return [String] CSV representation + def to_csv + Serializer::Spreadsheets.new(self).to_csv end # Get OWL-DL in ntriples format - def ntriples + # @return [String] N-Triples representation + def to_ntriples s = Serializer::Owl.new s.add_dataset(self) - s.ntriples + s.to_ntriples end # Get OWL-DL in RDF/XML format - def rdfxml + # @return [String] RDF/XML representation + def to_rdfxml s = Serializer::Owl.new s.add_dataset(self) - s.rdfxml + s.to_rdfxml + end + + # Get name (DC.title) of a feature + # @param [String] feature Feature URI + # @return [String] Feture title + def feature_name(feature) + @features[feature][DC.title] end # Insert a statement (compound_uri,feature_uri,value) + # @example Insert a statement (compound_uri,feature_uri,value) + # dataset.add "http://webservices.in-silico.ch/compound/InChI=1S/C6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9", "http://webservices.in-silico.ch/dataset/1/feature/hamster_carcinogenicity", true + # @param [String] compound Compound URI + # @param [String] feature Compound URI + # @param [Boolean,Float] value Feature value def add (compound,feature,value) @compounds << compound unless @compounds.include? compound @features[feature] = {} unless @features[feature] @@ -190,252 +194,62 @@ module OpenTox @data_entries[compound][feature] << value end - # Add metadata (hash with predicate_uri => value) + # Add/modify metadata, existing entries will be overwritten + # @example + # dataset.add_metadata({DC.title => "any_title", DC.creator => "my_email"}) + # @param [Hash] metadata Hash mapping predicate_uris to values def add_metadata(metadata) metadata.each { |k,v| @metadata[k] = v } end - # Copy a dataset (rewrites URI) - def copy(dataset) - @metadata = dataset.metadata - @data_entries = dataset.data_entries - @compounds = dataset.compounds - @features = dataset.features - if @uri - self.uri = @uri - else - @uri = dataset.metadata[XSD.anyUri] - end + # Add a feature + # @param [String] feature Feature URI + # @param [Hash] metadata Hash with feature metadata + def add_feature(feature,metadata={}) + @features[feature] = metadata end - # save dataset (overwrites existing dataset) + # Add/modify metadata for a feature + # @param [String] feature Feature URI + # @param [Hash] metadata Hash with feature metadata + def add_feature_metadata(feature,metadata) + metadata.each { |k,v| @features[feature][k] = v } + end + + # Save dataset at the dataset service + # - creates a new dataset if uri is not set + # - overwrites dataset if uri exists + # @return [String] Dataset URI def save # TODO: rewrite feature URI's ?? - # create dataset if uri empty @compounds.uniq! - RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml) + if @uri + RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml) + else + # create dataset if uri is empty + self.uri = RestClientWrapper.post(CONFIG[:services]["opentox-dataset"],{}).to_s.chomp + RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml) + end + @uri end # Delete dataset at the dataset service def delete RestClientWrapper.delete @uri end - end -end - - ######################################################### - # kept for backward compatibility, may have to be fixed # - ######################################################### - -=begin - def from_owl(owl) - # creates dataset object from Opentox::Owl object - # use Dataset.find( ) to load dataset from rdf-supporting datasetservice - # note: does not load all feature values, as this is time consuming - raise "invalid param" unless owl.is_a?(OpenTox::Owl) - @metadata[DC.title] = owl.get("title") - @metadata[DC.creator] = owl.get("creator") - @metadata[XSD.anyUri] = owl.uri - # when loading a dataset from owl, only compound- and feature-uris are loaded - owl.load_dataset(@compounds, @features) - # all features are marked as dirty - # as soon as a feature-value is requested all values for this feature are loaded from the rdf - @dirty_features = @features.dclone - @owl = owl - end - - def self.find(uri, accept_header=nil) - - unless accept_header - if (CONFIG[:yaml_hosts].include?(URI.parse(uri).host)) - accept_header = 'application/x-yaml' - else - accept_header = "application/rdf+xml" - end - end - - case accept_header - when "application/x-yaml" - LOGGER.debug "DATASET: "+ uri - LOGGER.debug RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s - d = YAML.load RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s - #d.uri = @metadata[XSD.anyUri] unless d.uri - when "application/rdf+xml" - owl = OpenTox::Owl.from_uri(uri.to_s.strip, "Dataset") - d = Dataset.new(owl) - else - raise "cannot get datset with accept header: "+accept_header.to_s - end - d - end - # converts a dataset represented in owl to yaml - # (uses a temporary dataset) - # note: to_yaml is overwritten, loads complete owl dataset values - def self.owl_to_yaml( owl_data, uri) - owl = OpenTox::Owl.from_data(owl_data, uri, "Dataset") - d = Dataset.new(owl) - d.to_yaml - end - - # creates a new dataset, using only those compounsd specified in new_compounds - # returns uri of new dataset - def create_new_dataset( new_compounds, new_features, new_title, new_creator ) - - LOGGER.debug "create new dataset with "+new_compounds.size.to_s+"/"+compounds.size.to_s+" compounds" - raise "no new compounds selected" unless new_compounds and new_compounds.size>0 - - # load require features - if ((defined? @dirty_features) && (@dirty_features & new_features).size > 0) - (@dirty_features & new_features).each{|f| load_feature_values(f)} - end - - dataset = OpenTox::Dataset.new - dataset.title = new_title - dataset.creator = new_creator - dataset.features = new_features - dataset.compounds = new_compounds - - # Copy dataset data for compounds and features - # PENDING: why storing feature values in an array? - new_compounds.each do |c| - data_c = [] - raise "no data for compound '"+c.to_s+"'" if @data[c]==nil - @data[c].each do |d| - m = {} - new_features.each do |f| - m[f] = d[f] - end - data_c << m - end - dataset.data[c] = data_c - end - return dataset.save - end - - # returns classification value - def get_predicted_class(compound, feature) - v = get_value(compound, feature) - if v.is_a?(Hash) - k = v.keys.grep(/classification/).first - unless k.empty? - #if v.has_key?(:classification) - return v[k] - else - return "no classification key" - end - elsif v.is_a?(Array) - raise "predicted class value is an array\n"+ - "value "+v.to_s+"\n"+ - "value-class "+v.class.to_s+"\n"+ - "dataset "+self.uri.to_s+"\n"+ - "compound "+compound.to_s+"\n"+ - "feature "+feature.to_s+"\n" - else - return v - end - end - - # returns regression value - def get_predicted_regression(compound, feature) - v = get_value(compound, feature) - if v.is_a?(Hash) - k = v.keys.grep(/regression/).first - unless k.empty? - return v[k] - else - return "no regression key" - end - elsif v.is_a?(Array) - raise "predicted regression value is an array\n"+ - "value "+v.to_s+"\n"+ - "value-class "+v.class.to_s+"\n"+ - "dataset "+self.uri.to_s+"\n"+ - "compound "+compound.to_s+"\n"+ - "feature "+feature.to_s+"\n" - else - return v - end - end - - # returns prediction confidence if available - def get_prediction_confidence(compound, feature) - v = get_value(compound, feature) - if v.is_a?(Hash) - k = v.keys.grep(/confidence/).first - unless k.empty? - #if v.has_key?(:confidence) - return v[k].abs - #return v["http://ot-dev.in-silico.ch/model/lazar#confidence"].abs - else - # PENDING: return nil isntead of raising an exception - raise "no confidence key" - end - else - LOGGER.warn "no confidence for compound: "+compound.to_s+", feature: "+feature.to_s - return 1 - end - end - - # return compound-feature value - def get_value(compound, feature) - if (defined? @dirty_features) && @dirty_features.include?(feature) - load_feature_values(feature) - end - - v = @data[compound] - return nil if v == nil # missing values for all features - if v.is_a?(Array) - # PENDING: why using an array here? - v.each do |e| - if e.is_a?(Hash) - if e.has_key?(feature) - return e[feature] - end - else - raise "invalid internal value type" - end - end - return nil #missing value - else - raise "value is not an array\n"+ - "value "+v.to_s+"\n"+ - "value-class "+v.class.to_s+"\n"+ - "dataset "+self.uri.to_s+"\n"+ - "compound "+compound.to_s+"\n"+ - "feature "+feature.to_s+"\n" - end - end - - # loads specified feature and removes dirty-flag, loads all features if feature is nil - def load_feature_values(feature=nil) - if feature - raise "feature already loaded" unless @dirty_features.include?(feature) - @owl.load_dataset_feature_values(@compounds, @data, [feature]) - @dirty_features.delete(feature) + private + # Copy a dataset (rewrites URI) + def copy(dataset) + @metadata = dataset.metadata + @data_entries = dataset.data_entries + @compounds = dataset.compounds + @features = dataset.features + if @uri + self.uri = @uri else - @data = {} unless @data - @owl.load_dataset_feature_values(@compounds, @data, @dirty_features) - @dirty_features.clear + @uri = dataset.metadata[XSD.anyURI] end end - - # overwrite to yaml: - # in case dataset is loaded from owl: - # * load all values - def to_yaml - # loads all features - if ((defined? @dirty_features) && @dirty_features.size > 0) - load_feature_values - end - super - end - - # * remove @owl from yaml, not necessary - def to_yaml_properties - super - ["@owl"] - end - end end -=end diff --git a/lib/environment.rb b/lib/environment.rb index b16b62f..d66b062 100644 --- a/lib/environment.rb +++ b/lib/environment.rb @@ -61,3 +61,32 @@ FALSE_REGEXP = /^(false|inactive|0|0.0)$/i # Task durations DEFAULT_TASK_MAX_DURATION = 36000 EXTERNAL_TASK_MAX_DURATION = 36000 + +# OWL Namespaces +class OwlNamespace + + def initialize(uri) + @uri = uri + end + + def [](property) + @uri+property.to_s + end + + def type # for RDF.type + "#{@uri}type" + end + + def method_missing(property) + @uri+property.to_s + end + +end + +RDF = OwlNamespace.new 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' +OWL = OwlNamespace.new 'http://www.w3.org/2002/07/owl#' +DC = OwlNamespace.new 'http://purl.org/dc/elements/1.1/' +OT = OwlNamespace.new 'http://www.opentox.org/api/1.1#' +OTA = OwlNamespace.new 'http://www.opentox.org/algorithmTypes.owl#' +XSD = OwlNamespace.new 'http://www.w3.org/2001/XMLSchema#' + diff --git a/lib/feature.rb b/lib/feature.rb index 9616135..13d97a2 100644 --- a/lib/feature.rb +++ b/lib/feature.rb @@ -1,7 +1,5 @@ module OpenTox - class Feature - include OtObject + include OpenTox end - end diff --git a/lib/model.rb b/lib/model.rb index d0d6703..63013cb 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -1,143 +1,410 @@ module OpenTox + module Model + include OpenTox + + def run(params) + if CONFIG[:yaml_hosts].include?(URI.parse(@uri).host) + accept = 'application/x-yaml' + else + accept = 'application/rdf+xml' + end + begin + params[:acccept] = accept + #TODO fix: REstClientWrapper does not accept accept header + #RestClientWrapper.post(@uri,params)#,{:accept => accept}) + `curl -X POST -H "Accept:#{accept}" #{params.collect{|k,v| "-d #{k}=#{v}"}.join(" ")} #{@uri}`.to_s.chomp + rescue => e + LOGGER.error "Failed to run #{@uri} with #{params.inspect} (#{e.inspect})" + raise "Failed to run #{@uri} with #{params.inspect}" + end + end + +=begin + def classification? + #TODO replace with request to ontology server + if @metadata[DC.title] =~ /(?i)classification/ + return true + elsif @metadata[DC.title] =~ /(?i)regression/ + return false + elsif @uri =~/ntua/ and @metadata[DC.title] =~ /mlr/ + return false + elsif @uri =~/tu-muenchen/ and @metadata[DC.title] =~ /regression|M5P|GaussP/ + return false + elsif @uri =~/ambit2/ and @metadata[DC.title] =~ /pKa/ || @metadata[DC.title] =~ /Regression|Caco/ + return false + elsif @uri =~/majority/ + return (@uri =~ /class/) != nil + else + raise "unknown model, uri:'"+@uri+"' title:'"+@metadata[DC.title]+"'" + end + end +=end + class Generic + include Model + end + + class Lazar + + include Model + + #attr_accessor :prediction_type, :feature_type, :features, :effects, :activities, :p_values, :fingerprints, :parameters + attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :parameters, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm + + def initialize(uri=nil) + + if uri + super uri + else + super CONFIG[:services]["opentox-model"] + end + + # TODO: fix metadata, add parameters + @metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar") + + @features = [] + @effects = {} + @activities = {} + @p_values = {} + @fingerprints = {} + + @feature_calculation_algorithm = "substructure_match" + @similarity_algorithm = "weighted_tanimoto" + @prediction_algorithm = "weighted_majority_vote" - MODEL_ATTRIBS = [:uri, :title, :creator, :date, :format, :predictedVariables, :independentVariables, :dependentVariables, :trainingDataset, :algorithm] - MODEL_ATTRIBS.each{ |a| attr_accessor(a) } + @min_sim = 0.3 + + end def self.find(uri) - owl = OpenTox::Owl.from_uri(uri, "Model") - return self.new(owl) - end - - def self.to_rdf(model) - owl = OpenTox::Owl.create 'Model', model.uri - (MODEL_ATTRIBS - [:uri]).each do |a| - owl.set(a.to_s,model.send(a.to_s)) + YAML.load RestClientWrapper.get(uri,:content_type => 'application/x-yaml') + end + + def self.create_from_dataset(dataset_uri,feature_dataset_uri,prediction_feature=nil) + training_activities = OpenTox::Dataset.find(dataset_uri) + training_features = OpenTox::Dataset.find(feature_dataset_uri) + unless prediction_feature # try to read prediction_feature from dataset + raise "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1 + prediction_feature = training_activities.features.keys.first + params[:prediction_feature] = prediction_feature + end + lazar = Lazar.new + training_features = OpenTox::Dataset.new(feature_dataset_uri) + case training_features.feature_type + when "classification" + lazar.similarity_algorithm = "weighted_tanimoto" + when "regression" + lazar.similarity_algorithm = "weighted_euclid" end - owl.rdf end - - protected - def initialize(owl) - MODEL_ATTRIBS.each do |a| - self.send("#{a.to_s}=".to_sym, owl.get(a.to_s)) unless a==:uri + + def self.create(dataset_uri,prediction_feature=nil,feature_generation_uri=File.join(CONFIG[:services]["opentox-algorithm"],"fminer/bbrc"),params=nil) + + training_activities = OpenTox::Dataset.find(dataset_uri) + + unless prediction_feature # try to read prediction_feature from dataset + raise "#{training_activities.features.size} features in dataset #{dataset_uri}. Please provide a prediction_feature parameter." unless training_activities.features.size == 1 + prediction_feature = training_activities.features.keys.first + params[:prediction_feature] = prediction_feature end - @uri = owl.uri - if ENV['RACK_ENV'] =~ /test|debug/ - begin - raise "uri invalid" unless Utils.is_uri?(@uri) - raise "no predicted variables" unless @predictedVariables and @predictedVariables.size>0 - rescue => ex - RestClientWrapper.raise_uri_error "invalid model: '"+ex.message+"'\n"+self.to_yaml+"\n",@uri.to_s + + lazar = Lazar.new + params[:feature_generation_uri] = feature_generation_uri + feature_dataset_uri = OpenTox::Algorithm::Generic.new(feature_generation_uri).run(params).to_s + training_features = OpenTox::Dataset.find(feature_dataset_uri) + raise "Dataset #{feature_dataset_uri} not found or empty." if training_features.nil? + + # sorted features for index lookups + lazar.features = training_features.features.sort if training_features.feature_type == "regression" + + training_features.data_entries.each do |compound,entry| + lazar.fingerprints[compound] = [] unless lazar.fingerprints[compound] + entry.keys.each do |feature| + case training_features.feature_type + when "fminer" + # fingerprints are sets + smarts = training_features.features[feature][OT.smarts] + lazar.fingerprints[compound] << smarts + unless lazar.features.include? smarts + lazar.features << smarts + lazar.p_values[smarts] = training_features.features[feature][OT.p_value] + lazar.effects[smarts] = training_features.features[feature][OT.effect] + end + when "classification" + # fingerprints are sets + if entry[feature].flatten.size == 1 + lazar.fingerprints[compound] << feature if entry[feature].flatten.first.match(TRUE_REGEXP) + lazar.features << feature unless lazar.features.include? feature + else + LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" + end + when "regression" + # fingerprints are arrays + if entry[feature].flatten.size == 1 + lazar.fingerprints[compound][lazar.features.index(feature)] = entry[feature].flatten.first + else + LOGGER.warn "More than one entry (#{entry[feature].inspect}) for compound #{compound}, feature #{feature}" + end + end + end + + lazar.activities[compound] = [] unless lazar.activities[compound] + training_activities.data_entries[compound][params[:prediction_feature]].each do |value| + case value.to_s + when "true" + lazar.activities[compound] << true + when "false" + lazar.activities[compound] << false + else + lazar.activities[compound] << value.to_f + lazar.prediction_type = "regression" + end end - LOGGER.warn "model has no dependent variable" unless @dependentVariables and @dependentVariables.size>0 - LOGGER.warn "model has no algorithm" unless @algorithm and @algorithm.size>0 - LOGGER.warn "model has no indenpendent variables" unless @independentVariables end + + if feature_generation_uri.match(/fminer/) + lazar.feature_calculation_algorithm = "substructure_match" + else + halt 404, "External feature generation services not yet supported" + end + + lazar.metadata[OT.dependentVariables] = params[:prediction_feature] + lazar.metadata[OT.trainingDataset] = dataset_uri + lazar.metadata[OT.featureDataset] = feature_dataset_uri + + lazar.parameters = { + "dataset_uri" => dataset_uri, + "prediction_feature" => prediction_feature, + "feature_generation_uri" => feature_generation_uri + } + + model_uri = lazar.save + LOGGER.info model_uri + " created #{Time.now}" + model_uri end - end - - class PredictionModel < Generic - - def self.build( algorithm_uri, algorithm_params ) - - LOGGER.debug "Build model, algorithm_uri:"+algorithm_uri.to_s+", algorithm_parms: "+algorithm_params.inspect.to_s - uri = OpenTox::RestClientWrapper.post(algorithm_uri,algorithm_params).to_s - LOGGER.debug "Build model done: "+uri.to_s - RestClientWrapper.raise_uri_error("Invalid build model result: '"+uri.to_s+"'", algorithm_uri, algorithm_params ) unless Utils.model_uri?(uri) - return PredictionModel.find(uri) - end - - def predict_dataset( dataset_uri ) - - LOGGER.debug "Predict dataset: "+dataset_uri.to_s+" with model "+@uri.to_s - uri = RestClientWrapper.post(@uri, {:accept => "text/uri-list", :dataset_uri=>dataset_uri}) - RestClientWrapper.raise_uri_error("Prediciton result no dataset uri: "+uri.to_s, @uri, {:dataset_uri=>dataset_uri} ) unless Utils.dataset_uri?(uri) - uri - end - - def classification? - #HACK replace with request to ontology server - if @title =~ /(?i)classification/ - return true - elsif @title =~ /(?i)regression/ - return false - elsif @uri =~/ntua/ and @title =~ /mlr/ - return false - elsif @uri =~/tu-muenchen/ and @title =~ /regression|M5P|GaussP/ - return false - elsif @uri =~/ambit2/ and @title =~ /pKa/ || @title =~ /Regression|Caco/ - return false - elsif @uri =~/majority/ - return (@uri =~ /class/) != nil + + def predict_dataset(dataset_uri) + @prediction_dataset = Dataset.create + @prediction_dataset.add_metadata({ + OT.hasSource => @lazar.uri, + DC.creator => @lazar.uri, + DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )) + }) + @prediction_dataset.add_parameters({"dataset_uri" => dataset_uri}) + Dataset.new(dataset_uri).load_compounds.each do |compound_uri| + predict(compound_uri,false) + end + @prediction_dataset.save + @prediction_dataset.uri + end + + def predict(compound_uri,verbose=false) + + @compound = Compound.new compound_uri + + unless @prediction_dataset + @prediction_dataset = Dataset.create + @prediction_dataset.add_metadata( { + OT.hasSource => @lazar.uri, + DC.creator => @lazar.uri, + DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )) + } ) + @prediction_dataset.add_parameters( {"compound_uri" => compound_uri} ) + end + + neighbors + eval @prediction_algorithm + + if @prediction + + feature_uri = File.join( @prediction_dataset.uri, "feature", @prediction_dataset.compounds.size) + @prediction_dataset.add @compound.uri, feature_uri, @prediction + + feature_metadata = @prediction_dataset.metadata + feature_metadata[DC.title] = File.basename(@metadata[OT.dependentVariables]) + feature_metadata[OT.prediction] = @prediction + feature_metadata[OT.confidence] = @confidence + @prediction_dataset.add_feature(feature_uri, feature_metadata) + + if verbose + if @compound_features + @compound_features.each do |feature| + @prediction_dataset.add @compound.uri, feature, true + end + end + n = 0 + @neighbors.sort{|a,b| a[:similarity] <=> b[:similarity]}.each do |neighbor| + neighbor_uri = File.join( @prediction_dataset.uri, "feature/neighbor", n ) + @prediction_dataset.add @compound.uri, neighbor_uri, true + @prediction_dataset.add_feature(neighbor, { + OT.compound => neighbor[:compound], + OT.similarity => neighbor[:similarity], + OT.activity => neighbor[:activity] + }) + n+=1 + end + end + end + @prediction_dataset.save + @prediction_dataset.uri + end + + def weighted_majority_vote + conf = 0.0 + @neighbors.each do |neighbor| + case neighbor[:activity].to_s + when 'true' + conf += OpenTox::Algorithm.gauss(neighbor[:similarity]) + when 'false' + conf -= OpenTox::Algorithm.gauss(neighbor[:similarity]) + end + end + if conf > 0.0 + @prediction = true + elsif conf < 0.0 + @prediction = false else - raise "unknown model, uri:'"+@uri.to_s+"' title:'"+@title.to_s+"'" + @prediction = nil end + @confidence = conf/@neighbors.size if @neighbors.size > 0 end - end - - class Lazar < Generic - - attr_accessor :feature_dataset_uri, :effects, :activities, :p_values, :fingerprints, :features - - def initialize - @source = "http://github.com/helma/opentox-model" - @algorithm = File.join(CONFIG[:services]["opentox-algorithm"],"lazar") - #@independent_variables = File.join(CONFIG[:services]["opentox-algorithm"],"fminer#BBRC_representative") - @features = [] - @effects = {} - @activities = {} - @p_values = {} - @fingerprints = {} + + def local_svm_regression + sims = @neighbors.collect{ |n| n[:similarity] } # similarity values between query and neighbors + conf = sims.inject{|sum,x| sum + x } + acts = @neighbors.collect do |n| + act = n[:activity] + # TODO: check this in model creation + raise "0 values not allowed in training dataset. log10 is calculated internally." if act.to_f == 0 + Math.log10(act.to_f) + end # activities of neighbors for supervised learning + + neighbor_matches = @neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches + gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel + if neighbor_matches.size == 0 + raise "No neighbors found" + else + # gram matrix + (0..(neighbor_matches.length-1)).each do |i| + gram_matrix[i] = [] + # lower triangle + (0..(i-1)).each do |j| + sim = OpenTox::Algorithm.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], @lazar.p_values) + gram_matrix[i] << OpenTox::Algorithm.gauss(sim) + end + # diagonal element + gram_matrix[i][i] = 1.0 + # upper triangle + ((i+1)..(neighbor_matches.length-1)).each do |j| + sim = OpenTox::Algorithm.weighted_tanimoto(neighbor_matches[i], neighbor_matches[j], @lazar.p_values) # double calculation? + gram_matrix[i] << OpenTox::Algorithm.gauss(sim) + end + end + + @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests + @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed + LOGGER.debug "Setting R data ..." + # set data + @r.gram_matrix = gram_matrix.flatten + @r.n = neighbor_matches.size + @r.y = acts + @r.sims = sims + + LOGGER.debug "Preparing R data ..." + # prepare data + @r.eval "y<-as.vector(y)" + @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" + @r.eval "sims<-as.vector(sims)" + + # model + support vectors + LOGGER.debug "Creating SVM model ..." + @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)" + @r.eval "sv<-as.vector(SVindex(model))" + @r.eval "sims<-sims[sv]" + @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" + LOGGER.debug "Predicting ..." + @r.eval "p<-predict(model,sims)[1,1]" + @prediction = 10**(@r.p.to_f) + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + @r.quit # free R + end + @confidence = conf/@neighbors.size if @neighbors.size > 0 + end - def save - @features.uniq! - resource = RestClient::Resource.new(CONFIG[:services]["opentox-model"]) - resource.post(self.to_yaml, :content_type => "application/x-yaml").chomp.to_s + def neighbors + + @compound_features = eval(@feature_calculation_algorithm) if @feature_calculation_algorithm + + @neighbors = {} + @activities.each do |training_compound,activities| + @training_compound = training_compound + sim = eval(@similarity_algorithm) + if sim > @min_sim + activities.each do |act| + @neighbors << { + :compound => @training_compound, + :similarity => sim, + :features => @fingerprints[@training_compound], + :activity => act + } + end + end + end + end - def self.find_all - RestClientWrapper.get(CONFIG[:services]["opentox-model"]).chomp.split("\n") + def tanimoto + OpenTox::Algorithm.tanimoto(@compound_features,@fingerprints[@training_compound]) end - def self.predict(compound_uri,model_uri) - #RestClientWrapper.post(model_uri,{:compound_uri => compound_uri, :accept => 'application/x-yaml'}) - `curl -X POST -d 'compound_uri=#{compound_uri}' -H 'Accept:application/x-yaml' #{model_uri}` + def weighted_tanimoto + OpenTox::Algorithm.tanimoto(@compound_features,@fingerprints[@training_compound],@p_values) end - end - - class PropertyLazar < Generic - - attr_accessor :feature_dataset_uri, :properties, :features, :activities#, :effects, :p_values - - def initialize - @source = "http://github.com/helma/opentox-model" - @algorithm = File.join(CONFIG[:services]["opentox-algorithm"],"property_lazar") - #@independent_variables = File.join(CONFIG[:services]["opentox-algorithm"],"fminer#BBRC_representative") - @features = [] - #@effects = {} - @activities = {} - #@p_values = {} - @properties = {} + + def euclid + OpenTox::Algorithm.tanimoto(@compound_features,@fingerprints[@training_compound]) + end + + def weighted_euclid + OpenTox::Algorithm.tanimoto(@compound_features,@fingerprints[@training_compound],@p_values) + end + + def substructure_match + @compound.match(@features) + end + + def database_search + #TODO add features method to dataset + Dataset.new(@metadata[OT.featureDataset]).features(@compound.uri) + end + + def database_activity(compound_uri) + prediction = OpenTox::Dataset.new + # find database activities + if @activities[compound_uri] + @activities[compound_uri].each { |act| prediction.add compound_uri, @metadata[OT.dependentVariables], act } + prediction.add_metadata(OT.hasSource => @metadata[OT.trainingDataset]) + prediction + else + nil + end end def save - @features.uniq! - resource = RestClient::Resource.new(CONFIG[:services]["opentox-model"]) - resource.post(self.to_yaml, :content_type => "application/x-yaml").chomp.to_s + RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml) end - def self.find_all - RestClientWrapper.get(CONFIG[:services]["opentox-model"]).chomp.split("\n") + def self.all + RestClientWrapper.get(CONFIG[:services]["opentox-model"]).to_s.split("\n") end - def self.predict(compound_uri,model_uri) - #RestClientWrapper.post(model_uri,{:compound_uri => compound_uri, :accept => 'application/x-yaml'}) - `curl -X POST -d 'compound_uri=#{compound_uri}' -H 'Accept:application/x-yaml' #{model_uri}` + def delete + RestClientWrapper.delete @uri unless @uri == CONFIG[:services]["opentox-model"] end + end end end diff --git a/lib/opentox-ruby-api-wrapper.rb b/lib/opentox-ruby-api-wrapper.rb index 2749899..9dc1372 100644 --- a/lib/opentox-ruby-api-wrapper.rb +++ b/lib/opentox-ruby-api-wrapper.rb @@ -8,6 +8,6 @@ rescue LoadError puts "Please install Openbabel with 'rake openbabel:install' in the compound component" end -['opentox', 'compound','dataset', 'parser','serializer', 'algorithm','model','task','validation','utils','feature', 'ot-logger', 'overwrite', 'rest_client_wrapper'].each do |lib| +['opentox', 'compound','dataset', 'parser','serializer', 'algorithm','model','task','validation','feature', 'ot-logger', 'overwrite', 'rest_client_wrapper'].each do |lib| require lib end diff --git a/lib/opentox.rb b/lib/opentox.rb index 453ca66..7e1deec 100644 --- a/lib/opentox.rb +++ b/lib/opentox.rb @@ -1,79 +1,49 @@ module OpenTox - # Generic OpenTox class - module OtObject - - attr_reader :uri - attr_accessor :metadata - - # Initialize OpenTox object with optional uri - def initialize(uri=nil) - @metadata = {} - self.uri = uri if uri - end - - # Set URI - def uri=(uri) - @uri = uri - @metadata[XSD.anyUri] = uri - end - - # Get title - def title - load_metadata unless @metadata[DC.title] - @metadata[DC.title] - end - - # Set title - def title=(title) - @metadata[DC.title] = title - end - - # Get all objects from a service - def self.all(uri) - #def OtObject.all(uri) - RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.split(/\n/) - end - - # Load metadata from URI - def load_metadata - #if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) - # TODO: fix metadata retrie - #@metadata = YAML.load(RestClientWrapper.get(@uri, :accept => "application/x-yaml")) - #else - @metadata = Parser::Owl::Generic.new(@uri).metadata - #end - @metadata - #Parser::Owl::Generic.new(@uri).metadata - end - + attr_reader :uri + attr_accessor :metadata, :parameters + + # Initialize OpenTox object with optional uri + # @param [optional, String] URI + def initialize(uri=nil) + @metadata = {} + self.uri = uri if uri end - module Owl - - class Namespace - - def initialize(uri) - @uri = uri - end + # Set URI + # @param [String] URI + def uri=(uri) + @uri = uri + @metadata[XSD.anyURI] = uri + end - def [](property) - @uri+property.to_s - end + # Get all objects from a service + # @return [Array] List of available URIs + def self.all(uri) + RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.split(/\n/) + end - def method_missing(property) - @uri+property.to_s - end + # Load (and return) metadata from object URI + # @return [Hash] Metadata + def load_metadata + @metadata = Parser::Owl::Generic.new(@uri).metadata + @metadata + end - end + # Load parameters from URI + #def load_parameters + #@parameters = Parser::Owl::Generic.new(@uri).parameters + #@parameters + #end + + # Get OWL-DL representation in RDF/XML format + # @return [application/rdf+xml] RDF/XML representation + def to_rdfxml + s = Serializer::Owl.new + s.add_metadata(@uri,@metadata) + #s.add_parameters(@uri,@parameters) if @parameters + s.to_rdfxml end end -# -# OWL Namespaces -RDF = OpenTox::Owl::Namespace.new 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' -OWL = OpenTox::Owl::Namespace.new 'http://www.w3.org/2002/07/owl#' -DC = OpenTox::Owl::Namespace.new 'http://purl.org/dc/elements/1.1/' -OT = OpenTox::Owl::Namespace.new 'http://www.opentox.org/api/1.1#' -XSD = OpenTox::Owl::Namespace.new 'http://www.w3.org/2001/XMLSchema#' diff --git a/lib/overwrite.rb b/lib/overwrite.rb index 1d0161b..2e4c396 100644 --- a/lib/overwrite.rb +++ b/lib/overwrite.rb @@ -12,3 +12,25 @@ class Sinatra::Base end end +class String + def task_uri? + self.uri? && !self.match(/task/).nil? + end + + def dataset_uri? + self.uri? && !self.match(/dataset/).nil? + end + + def self.model_uri? + self.uri? && !self.match(/model/).nil? + end + + def uri? + begin + u = URI::parse(self) + return (u.scheme!=nil and u.host!=nil) + rescue URI::InvalidURIError + return false + end + end +end diff --git a/lib/parser.rb b/lib/parser.rb index e623bf5..8c173f9 100644 --- a/lib/parser.rb +++ b/lib/parser.rb @@ -1,5 +1,14 @@ require 'spreadsheet' require 'roo' + +class String + + def to_triple + self.chomp.split(' ',3).collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')} + end + +end + module OpenTox module Parser @@ -12,19 +21,28 @@ module OpenTox end def metadata - # TODO: load parameters + if @dataset uri = File.join(@uri,"metadata") else uri = @uri end + statements = [] - `rapper -i rdfxml -o ntriples #{uri}`.each_line do |line| - triple = line.chomp.split('> ') - statements << triple.collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')} - end - statements.each do |triple| + parameter_ids = [] + `rapper -i rdfxml -o ntriples #{uri} 2>/dev/null`.each_line do |line| + triple = line.to_triple @metadata[triple[1]] = triple[2].split('^^').first if triple[0] == @uri and triple[1] != RDF['type'] + statements << triple + parameter_ids << triple[2] if triple[1] == OT.parameters + end + unless parameter_ids.empty? + @metadata[OT.parameters] = [] + parameter_ids.each do |p| + parameter = {} + statements.each{ |t| parameter[t[1]] = t[2] if t[0] == p and t[1] != RDF['type']} + @metadata[OT.parameters] << parameter + end end @metadata end @@ -37,6 +55,8 @@ module OpenTox include Owl + attr_writer :uri + def initialize(uri) super uri @dataset = ::OpenTox::Dataset.new(@uri) @@ -47,11 +67,10 @@ module OpenTox feature_values = {} feature = {} other_statements = {} - ntriples = `rapper -i rdfxml -o ntriples #{@uri}` - ntriples.each_line do |line| + `rapper -i rdfxml -o ntriples #{@uri} 2>/dev/null`.each_line do |line| triple = line.chomp.split(' ',3) triple = triple[0..2].collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')} - case triple[1] # Ambit namespaces are case insensitive + case triple[1] when /#{OT.values}/i data[triple[0]] = {:compound => "", :values => []} unless data[triple[0]] data[triple[0]][:values] << triple[2] @@ -77,76 +96,84 @@ module OpenTox end def load_features - @dataset.features.keys.each do |feature| - @dataset.features[feature] = Parser::Owl::Generic.new(feature).metadata + uri = File.join(@uri,"features") + statements = [] + features = Set.new + `rapper -i rdfxml -o ntriples #{uri} 2>/dev/null`.each_line do |line| + triple = line.chomp.split('> ').collect{|i| i.sub(/\s+.$/,'').gsub(/[<>"]/,'')}[0..2] + statements << triple + features << triple[0] if triple[1] == RDF['type'] and triple[2] == OT.Feature + end + statements.each do |triple| + if features.include? triple[0] + @dataset.features[triple[0]] = {} unless @dataset.features[triple[0]] + @dataset.features[triple[0]][triple[1]] = triple[2].split('^^').first + end end + @dataset.features end + end end - class Spreadsheet + class Spreadsheets + # TODO: expand for multiple columns + + attr_accessor :dataset + def initialize + + # TODO: fix 2 datasets created + #@dataset = Dataset.create + #@dataset.save # get uri + + @data = [] + @features = [] + @feature_types = {} - def initialize(dataset) - @dataset = dataset @format_errors = "" @smiles_errors = [] @activity_errors = [] @duplicates = {} - @nr_compounds = 0 - @data = [] - @activities = [] - @type = "classification" end def load_excel(book) book.default_sheet = 0 - 1.upto(book.last_row) do |row| - if row == 1 - @feature = File.join(@dataset.uri,"feature",book.cell(row,2)) - else - add( book.cell(row,1), book.cell(row,2), row ) # smiles, activity - end - end - parse + add_features book.row(1) + 2.upto(book.last_row) { |i| add_values book.row(i) } + warnings + @dataset end def load_csv(csv) row = 0 - csv.each_line do |line| - row += 1 - raise "Invalid CSV format at line #{row}: #{line.chomp}" unless line.chomp.match(/^.+[,;].*$/) # check CSV format - items = line.chomp.gsub(/["']/,'').split(/\s*[,;]\s*/) # remove quotes - if row == 1 - @feature = File.join(@dataset.uri,"feature",items[1]) - else - add(items[0], items[1], row) - end - end - parse + input = csv.split("\n") + add_features split_row(input.shift) + input.each { |row| add_values split_row(row) } + warnings + @dataset end - def parse + private - # create dataset - @data.each do |items| - case @type - when "classification" - case items[1].to_s - when TRUE_REGEXP - @dataset.add(items[0], @feature, true ) - when FALSE_REGEXP - @dataset.add(items[0], @feature, false) - end - when "regression" - if items[1].to_f == 0 - @activity_errors << "Row #{items[2]}: Zero values not allowed for regression datasets - entry ignored." - else - @dataset.add items[0], @feature, items[1].to_f - end + def warnings + + info = '' + @feature_types.each do |feature,types| + if types.uniq.size > 1 + type = OT.NumericFeature + else + type = types.first end + @dataset.add_feature_metadata(feature,{OT.isA => type}) + info += "\"#{@dataset.feature_name(feature)}\" detected as #{type.split('#').last}." + + # TODO: rewrite feature values + # TODO if value.to_f == 0 @activity_errors << "#{smiles} Zero values not allowed for regression datasets - entry ignored." end + @dataset.metadata[OT.Info] = info + warnings = '' warnings += "

Incorrect Smiles structures (ignored):

" + @smiles_errors.join("
") unless @smiles_errors.empty? warnings += "

Irregular activities (ignored):

" + @activity_errors.join("
") unless @activity_errors.empty? @@ -156,34 +183,75 @@ module OpenTox @dataset.metadata[OT.Warnings] = warnings - @dataset + end + def add_features(row) + row.shift # get rid of smiles entry + row.each do |feature_name| + feature_uri = File.join(@dataset.uri,"feature",URI.encode(feature_name)) + @feature_types[feature_uri] = [] + @features << feature_uri + @dataset.add_feature(feature_uri,{DC.title => feature_name}) + end end - def add(smiles, act, row) + def add_values(row) + + smiles = row.shift compound = Compound.from_smiles(smiles) if compound.nil? or compound.inchi.nil? or compound.inchi == "" - @smiles_errors << "Row #{row}: " + [smiles,act].join(", ") - return false - end - unless numeric?(act) or classification?(act) - @activity_errors << "Row #{row}: " + [smiles,act].join(", ") + @smiles_errors << smiles+", "+row.join(", ") return false end @duplicates[compound.inchi] = [] unless @duplicates[compound.inchi] - @duplicates[compound.inchi] << "Row #{row}: " + [smiles, act].join(", ") - @type = "regression" unless classification?(act) - # TODO: set OT.NumericalFeature, ... - @nr_compounds += 1 - @data << [ compound.uri, act , row ] + @duplicates[compound.inchi] << smiles+", "+row.join(", ") + + row.each_index do |i| + value = row[i] + feature = @features[i] + type = feature_type(value) + + @feature_types[feature] << type + + case type + when OT.NominalFeature + case value.to_s + when TRUE_REGEXP + @dataset.add(compound.uri, feature, true ) + when FALSE_REGEXP + @dataset.add(compound.uri, feature, false ) + end + when OT.NumericFeature + @dataset.add compound.uri, feature, value.to_f + when OT.StringFeature + # TODO: insert ?? + @dataset.add compound.uri, feature, value.to_s + @activity_errors << smiles+", "+row.join(", ") + #return false + end + end + end + + def numeric?(value) + true if Float(value) rescue false end - def numeric?(object) - true if Float(object) rescue false + def classification?(value) + !value.to_s.strip.match(TRUE_REGEXP).nil? or !value.to_s.strip.match(FALSE_REGEXP).nil? + end + + def feature_type(value) + if classification? value + return OT.NominalFeature + elsif numeric? value + return OT.NumericFeature + else + return OT.StringFeature + end end - def classification?(object) - !object.to_s.strip.match(TRUE_REGEXP).nil? or !object.to_s.strip.match(FALSE_REGEXP).nil? + def split_row(row) + row.chomp.gsub(/["']/,'').split(/\s*[,;]\s*/) # remove quotes end end diff --git a/lib/rest_client_wrapper.rb b/lib/rest_client_wrapper.rb index 82836d9..49549b5 100644 --- a/lib/rest_client_wrapper.rb +++ b/lib/rest_client_wrapper.rb @@ -1,5 +1,3 @@ - - module OpenTox #PENDING: implement ot error api, move to own file @@ -60,7 +58,7 @@ module OpenTox def self.execute( rest_call, uri, headers, payload=nil, wait=true ) do_halt 400,"uri is null",uri,headers,payload unless uri - do_halt 400,"not a uri",uri,headers,payload unless Utils.is_uri?(uri) + do_halt 400,"not a uri",uri,headers,payload unless uri.to_s.uri? do_halt 400,"headers are no hash",uri,headers,payload unless headers==nil or headers.is_a?(Hash) do_halt 400,"nil headers for post not allowed, use {}",uri,headers,payload if rest_call=="post" and headers==nil headers.each{ |k,v| headers.delete(k) if v==nil } if headers #remove keys with empty values, as this can cause problems @@ -115,7 +113,7 @@ module OpenTox when /text\// raise "uri list has more than one entry, should be a task" if res.content_type=~/text\/uri-list/ and res.split("\n").size > 1 #if uri list contains more then one uri, its not a task - task = OpenTox::Task.find(res.to_s) if Utils.task_uri?(res) + task = OpenTox::Task.find(res.to_s) if res.to_s.uri? else raise "unknown content-type for task: '"+res.content_type.to_s+"'" #+"' content: "+res[0..200].to_s end diff --git a/lib/serializer.rb b/lib/serializer.rb index 3def252..3a9cb60 100644 --- a/lib/serializer.rb +++ b/lib/serializer.rb @@ -30,7 +30,6 @@ module OpenTox OT.dataEntry => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , OT.acceptValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , OT.values => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , - #XSD.anyUri => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , OT.algorithm => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , OT.parameters => { RDF["type"] => [{ "type" => "uri", "value" => OWL.ObjectProperty }] } , @@ -38,14 +37,15 @@ module OpenTox DC.identifier => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , DC.contributor => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , DC.creator => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , + DC.description => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.isA => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , + OT.Warnings => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , + XSD.anyURI => { RDF["type"] => [{ "type" => "uri", "value" => OWL.AnnotationProperty }] } , OT.hasSource => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , OT.value => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , OT.paramScope => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , OT.paramValue => { RDF["type"] => [{ "type" => "uri", "value" => OWL.DatatypeProperty }] } , - - #Untyped Individual: http://localhost/algorithm } @data_entries = {} @@ -61,15 +61,10 @@ module OpenTox end def add_compound(uri) - #@classes << OT.Compound unless @classes.include? OT.Compound @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Compound }] } end def add_feature(uri,metadata) - #@classes << OT.Feature unless @classes.include? OT.Feature - #@classes << OT.NominalFeature unless @classes.include? OT.NominalFeature - #@classes << OT.NumericFeature unless @classes.include? OT.NumericFeature - #@classes << OT.StringFeature unless @classes.include? OT.StringFeature @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Feature }] } add_metadata uri, metadata end @@ -94,32 +89,37 @@ module OpenTox end - def add_algorithm(uri,metadata,parameters) + def add_algorithm(uri,metadata) @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Algorithm }] } + LOGGER.debug @object[uri] add_metadata uri, metadata - add_parameters uri, parameters - #metadata.each { |u,v| @object[uri][u] = [{"type" => type(v), "value" => v }] } + LOGGER.debug @object[uri] end - def add_model(uri,metadata) + def add_model(uri,metadata,parameters) + @object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Model }] } + add_metadata uri, metadata + add_parameters uri, parameters end def add_metadata(uri,metadata) - #@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT[type] }] } + id = 0 metadata.each do |u,v| - @object[uri][u] = [{"type" => type(v), "value" => v }] - end - end - - def add_parameters(uri,parameters) - #@object[uri] = { RDF["type"] => [{ "type" => "uri", "value" => OT[type] }] } - @object[uri][OT.parameters] = [] unless @object[uri][OT.parameters] - parameters.each do |p| - parameter = "_:parameter#{@parameter_id}" - @parameter_id += 1 - @object[uri][OT.parameters] << {"type" => "bnode", "value" => parameter} - @object[parameter] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Parameter }] } - add_metadata parameter, p + if v.is_a? String + @object[uri] = {} unless @object[uri] + @object[uri][u] = [{"type" => type(v), "value" => v }] + elsif v.is_a? Array and u == OT.parameters + @object[uri][u] = [] unless @object[uri][u] + v.each do |value| + id+=1 + genid = "_:genid#{id}" + @object[uri][u] << {"type" => "bnode", "value" => genid} + @object[genid] = { RDF["type"] => [{ "type" => "uri", "value" => OT.Parameter}] } + value.each do |name,entry| + @object[genid][name] = [{"type" => type(entry), "value" => entry }] + end + end + end end end @@ -158,10 +158,11 @@ module OpenTox # Serializers - def ntriples + def to_ntriples #rdf_types @triples = Set.new + #LOGGER.debug @object.to_yaml @object.each do |s,entry| s = url(s) if type(s) == "uri" entry.each do |p,objects| @@ -182,12 +183,12 @@ module OpenTox @triples.sort.collect{ |s| s.join(' ').concat(" .") }.join("\n")+"\n" end - def rdfxml - Tempfile.open("owl-serializer"){|f| f.write(ntriples); @path = f.path} - `rapper -i ntriples -o rdfxml #{@path}` + def to_rdfxml + Tempfile.open("owl-serializer"){|f| f.write(self.to_ntriples); @path = f.path} + `rapper -i ntriples -o rdfxml #{@path} 2>/dev/null` end - def json + def to_json #rdf_types Yajl::Encoder.encode(@object) end @@ -258,7 +259,7 @@ module OpenTox @rows.first << features @rows.first.flatten! dataset.data_entries.each do |compound,entries| - smiles = Compound.new(compound).smiles + smiles = Compound.new(compound).to_smiles row = Array.new(@rows.first.size) row[0] = smiles entries.each do |feature, values| @@ -271,11 +272,11 @@ module OpenTox end end - def csv + def to_csv @rows.collect{|r| r.join(", ")}.join("\n") end - def excel + def to_xls Spreadsheet.client_encoding = 'UTF-8' book = Spreadsheet::Workbook.new sheet = book.create_worksheet(:name => '') diff --git a/lib/task.rb b/lib/task.rb index 50f0347..96ee719 100644 --- a/lib/task.rb +++ b/lib/task.rb @@ -3,6 +3,7 @@ $self_task=nil module OpenTox class Task + attr_accessor :uri, :date, :title, :creator, :description, :hasStatus, :percentageCompleted, :resultURI, :due_to_time, :http_code # due_to_time is only set in local tasks TASK_ATTRIBS = [ :uri, :date, :title, :creator, :description, :hasStatus, :percentageCompleted, :resultURI, :due_to_time ] @@ -124,14 +125,14 @@ module OpenTox def check_state begin raise "illegal task state, task is completed, resultURI is no URI: '"+@resultURI.to_s+ - "'" unless @resultURI and Utils.is_uri?(@resultURI) if completed? + "'" unless @resultURI and @resultURI.to_s.uri? if completed? if @http_code == 202 raise "illegal task state, code is 202, but hasStatus is not Running: '"+@hasStatus+"'" unless running? elsif @http_code == 201 raise "illegal task state, code is 201, but hasStatus is not Completed: '"+@hasStatus+"'" unless completed? raise "illegal task state, code is 201, resultURI is no task-URI: '"+@resultURI.to_s+ - "'" unless @resultURI and Utils.task_uri?(@resultURI) + "'" unless @resultURI and @resultURI.to_s.uri? end rescue => ex RestClientWrapper.raise_uri_error(ex.message, @uri) @@ -171,6 +172,7 @@ module OpenTox LOGGER.debug "Started task: "+task.uri.to_s task.uri end + end end diff --git a/lib/utils.rb b/lib/utils.rb deleted file mode 100644 index a0e0cbe..0000000 --- a/lib/utils.rb +++ /dev/null @@ -1,50 +0,0 @@ -module OpenTox - module Utils - # gauss kernel - def self.gauss(sim, sigma = 0.3) - x = 1.0 - sim - Math.exp(-(x*x)/(2*sigma*sigma)) - end - - def self.task_uri?(uri) - is_uri?(uri) && uri.to_s =~ /task/ - end - - def self.dataset_uri?(uri) - is_uri?(uri) && uri.to_s =~ /dataset/ - end - - def self.model_uri?(uri) - is_uri?(uri) && uri.to_s =~ /model/ - end - - - def self.is_uri?(uri) - return false if uri==nil || uri.to_s.size==0 - begin - u = URI::parse(uri) - return (u.scheme!=nil and u.host!=nil) - rescue URI::InvalidURIError - return false - end - end - - def self.median(array) - return nil if array.empty? - array.sort! - m_pos = array.size / 2 - return array.size % 2 == 1 ? array[m_pos] : (array[m_pos-1] + array[m_pos])/2 - end - - end - -# ['rubygems', 'rest_client'].each do |r| -# require r -# end -# ["bla", "google.de", "http://google.de"].each do |u| -# puts u+"? "+Utils.is_uri?(u).to_s -# end - - -end - -- cgit v1.2.3