summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2015-08-10 09:48:57 +0200
committerChristoph Helma <helma@in-silico.ch>2015-08-10 09:48:57 +0200
commit23ecfc6fa5ae4913e5cd17b7d58432d1f88d780c (patch)
tree83d78aed2b9fbaa85400be96acfa5ace56537d1c /lib
parentef76c077fd39d31fc795b842c32575f6afb9fdb2 (diff)
transfer to new git project started
Diffstat (limited to 'lib')
-rw-r--r--lib/authorization.rb378
-rw-r--r--lib/compound.rb40
-rw-r--r--lib/dataset.rb247
-rw-r--r--lib/descriptor.rb253
-rw-r--r--lib/error.rb79
-rw-r--r--lib/feature.rb70
-rw-r--r--lib/format-conversion.rb406
-rw-r--r--lib/lazar.rb46
-rw-r--r--lib/model.rb56
-rw-r--r--lib/opentox-client.rb52
-rw-r--r--lib/otlogger.rb47
-rw-r--r--lib/policy.rb354
-rw-r--r--lib/task.rb142
-rw-r--r--lib/templates/default_guest_policy.xml53
-rw-r--r--lib/templates/default_policy.xml53
-rw-r--r--lib/unique_descriptors.rb120
-rw-r--r--lib/validation.rb348
17 files changed, 612 insertions, 2132 deletions
diff --git a/lib/authorization.rb b/lib/authorization.rb
deleted file mode 100644
index b530815..0000000
--- a/lib/authorization.rb
+++ /dev/null
@@ -1,378 +0,0 @@
-module OpenTox
-
- if defined?($aa) and $aa.has_key?(:uri) and !$aa[:uri].nil?
- AA = $aa[:uri]
- else
- AA = "https://opensso.in-silico.ch" #if not set in .opentox/conf/[SERVICE].rb
- end
-
- #Module for Authorization and Authentication
- #@example Authentication
- # require "opentox-client"
- # OpenTox::Authorization::AA = "https://opensso.in-silico.ch" #if not set in .opentox/conf/[SERVICE].rb
- # OpenTox::Authorization.authenticate("username", "password")
- # puts OpenTox::Authorization.authorize("http://example.uri/testpath/", "GET")
- #@see http://www.opentox.org/dev/apis/api-1.2/AA OpenTox A&A API 1.2 specification
-
- module Authorization
-
- #Helper Class to create and send default policies out of xml templates
- #@example Creating a default policy to a URI
- # aa=OpenTox::Authorization::Helper.new
- # xml=aa.get_xml('http://uri....')
- # OpenTox::Authorization.create_policy(xml)
-
- class Helper
- attr_accessor :user, :policy
-
- #Generates an AuthorizationHelper object - requires subjectid
- # @param [String] subjectid
- def initialize
- @user = Authorization.get_user
- @policy = Policies.new()
- end
-
- #Cleans Policies of AuthorizationHelper object and loads default xml file into policy attribute
- #set uri and user, returns Policyfile(XML) for open-sso
- # @param uri [String] URI to create a policy for
- def get_xml(uri)
- @policy.drop_policies
- @policy.load_default_policy(@user, uri)
- return @policy.to_xml
- end
-
- #Loads and sends Policyfile(XML) to open-sso server
- # @param uri [String] URI to create a policy for
- def send(uri)
- xml = get_xml(uri)
- ret = false
- ret = Authorization.create_policy(xml)
- $logger.warn "Create policy on openSSO failed for URI: #{uri} subjectid: #{RestClientWrapper.subjectid}. Will try again." if !ret
- ret = Authorization.create_policy(xml) if !ret
- $logger.debug "Policy send with subjectid: #{RestClientWrapper.subjectid}"
- $logger.error "Not created Policy is: #{xml}" if !ret
- ret
- end
- end
-
- #Returns the open-sso server set in the config file .opentox/config/[environment].yaml
- # @return [String, nil] the openSSO server URI or nil
- def self.server
- return AA
- end
-
- #Authentication against OpenSSO. Returns token. Requires Username and Password.
- # @param user [String] Username
- # @param pw [String] Password
- # @return [Boolean] true if successful
- def self.authenticate(user, pw)
- begin
- res = RestClientWrapper.post("#{AA}/auth/authenticate",{:username=>user, :password => pw},{:subjectid => ""}).sub("token.id=","").sub("\n","")
- if is_token_valid(res)
- RestClientWrapper.subjectid = res
- return true
- else
- bad_request_error "Authentication failed #{res.inspect}"
- end
- rescue
- bad_request_error "Authentication failed #{res.inspect}"
- end
- end
-
- #Logout on opensso. Make token invalid. Requires token
- # @param [String] subjectid the subjectid
- # @return [Boolean] true if logout is OK
- def self.logout(subjectid=RestClientWrapper.subjectid)
- begin
- out = RestClientWrapper.post("#{AA}/auth/logout", :subjectid => subjectid)
- return true unless is_token_valid(subjectid)
- rescue
- return false
- end
- return false
- end
-
- #Authorization against OpenSSO for a URI with request-method (action) [GET/POST/PUT/DELETE]
- # @param [String] uri URI to request
- # @param [String] action request method
- # @param [String] subjectid
- # @return [Boolean, nil] returns true, false or nil (if authorization-request fails).
- def self.authorize(uri, action, subjectid=RestClientWrapper.subjectid)
- return true if RestClientWrapper.post("#{AA}/auth/authorize",{:subjectid => subjectid, :uri => uri, :action => action})== "boolean=true\n"
- return false
- end
-
- #Checks if a token is a valid token
- # @param [String]subjectid subjectid from openSSO session
- # @return [Boolean] subjectid is valid or not.
- def self.is_token_valid(subjectid=RestClientWrapper.subjectid)
- begin
- return true if RestClientWrapper.post("#{AA}/auth/isTokenValid",:tokenid => subjectid) == "boolean=true\n"
- rescue #do rescue because openSSO throws 401
- return false
- end
- return false
- end
-
- #Returns array with all policies of the token owner
- # @param [String]subjectid requires subjectid
- # @return [Array, nil] returns an Array of policy names or nil if request fails
- def self.list_policies
- begin
- out = RestClientWrapper.get("#{AA}/pol",nil)
- return out.split("\n")
- rescue
- return nil
- end
- end
-
- #Returns a policy in xml-format
- # @param policy [String] policyname
- # @param subjectid [String]
- # @return [String] XML of the policy
- def self.list_policy(policy)
- begin
- return RestClientWrapper.get("#{AA}/pol",nil,{:id => policy})
- rescue
- return nil
- end
- end
-
- # Lists policies alongside with affected uris
- # @param [String] subjectid
- # @return [Hash] keys: all policies of the subjectid owner, values: uris affected by those policies
- def self.list_policies_uris
- names = list_policies
- policies = {}
- names.each do |n|
- policies[n] = list_policy_uris n
- end
- policies
- end
-
- # Lists policies alongside with affected uris
- # @param [String] subjectid
- # @return [Hash] keys: all policies of the subjectid owner, values: uris affected by those policies
- def self.list_policy_uris( policy )
- p = OpenTox::Policies.new
- p.load_xml( list_policy(policy) )
- p.uris
- end
-
- #Returns the owner (who created the first policy) of an URI
- # @param uri [String] URI
- # @param subjectid [String] subjectid
- # return [String, nil]owner,nil returns owner of the URI
- def self.get_uri_owner(uri)
- begin
- return RestClientWrapper.get("#{AA}/pol",nil,{:uri => uri}).sub("\n","")
- rescue
- return nil
- end
- end
-
- #Returns true or false if owner (who created the first policy) of an URI
- # @param uri [String] URI
- # @param subjectid [String]
- # return [Boolean]true,false status of ownership of the URI
- def self.uri_owner?(uri)
- get_uri_owner(uri) == get_user
- end
-
- #Checks if a policy exists to a URI. Requires URI and token.
- # @param uri [String] URI
- # @param subjectid [String]
- # return [Boolean]
- def self.uri_has_policy(uri)
- owner = get_uri_owner(uri)
- return true if owner and owner != "null"
- false
- end
-
- #List all policynames for a URI. Requires URI and token.
- # @param uri [String] URI
- # @param subjectid [String]
- # return [Array, nil] returns an Array of policy names or nil if request fails
- def self.list_uri_policies(uri)
- begin
- out = RestClientWrapper.get("#{AA}/pol",nil,{:uri => uri, :polnames => true})
- policies = []; notfirstline = false
- out.split("\n").each do |line|
- policies << line if notfirstline
- notfirstline = true
- end
- return policies
- rescue
- return nil
- end
- end
-
- #Sends a policy in xml-format to opensso server. Requires policy-xml and token.
- # @param policy [String] XML string of a policy
- # @param subjectid [String]
- # return [Boolean] returns true if policy is created
- def self.create_policy(policy)
- begin
- $logger.debug "OpenTox::Authorization.create_policy policy: #{policy[168,43]} with token: #{RestClientWrapper.subjectid} ."
- return true if RestClientWrapper.post("#{AA}/Pol/opensso-pol",policy, {:content_type => "application/xml"})
- rescue
- return false
- end
- end
-
- #Deletes a policy
- # @param policy [String] policyname
- # @param subjectid [String]
- # @return [Boolean,nil]
- def self.delete_policy(policy)
- begin
- $logger.debug "OpenTox::Authorization.delete_policy policy: #{policy} with token: #{RestClientWrapper.subjectid}"
- return true if RestClientWrapper.delete("#{AA}/pol",nil, {:id => policy})
- rescue
- return nil
- end
- end
-
- #Returns array of the LDAP-Groups of an user
- # @param [String]subjectid
- # @return [Array] gives array of LDAP groups of a user
- def self.list_user_groups(user)
- begin
- out = RestClientWrapper.post("#{AA}/opensso/identity/read", {:name => user, :admin => RestClientWrapper.subjectid, :attributes_names => "group"})
- grps = []
- out.split("\n").each do |line|
- grps << line.sub("identitydetails.group=","") if line.include?("identitydetails.group=")
- end
- return grps
- rescue
- []
- end
- end
-
- #Returns the owner (user id) of a token
- # @param [String]subjectid optional (normally only used for testing)
- # @return [String]user
- def self.get_user subjectid=RestClientWrapper.subjectid
- begin
- out = RestClientWrapper.post("#{AA}/opensso/identity/attributes", {:subjectid => subjectid, :attributes_names => "uid"})
- user = ""; check = false
- out.split("\n").each do |line|
- if check
- user = line.sub("userdetails.attribute.value=","") if line.include?("userdetails.attribute.value=")
- check = false
- end
- check = true if line.include?("userdetails.attribute.name=uid")
- end
- return user
- rescue
- nil
- end
- end
-
- #Send default policy with Authorization::Helper class
- # @param uri [String] URI
- # @param subjectid [String]
- def self.send_policy(uri)
- aa = Authorization::Helper.new
- ret = aa.send(uri)
- $logger.debug "OpenTox::Authorization send policy for URI: #{uri} | subjectid: #{RestClientWrapper.subjectid} - policy created: #{ret}"
- ret
- end
-
- #Deletes all policies of an URI
- # @param uri [String] URI
- # @param subjectid [String]
- # @return [Boolean]
- def self.delete_policies_from_uri(uri)
- policies = list_uri_policies(uri)
- if policies
- policies.each do |policy|
- ret = delete_policy(policy)
- $logger.debug "OpenTox::Authorization delete policy: #{policy} - with result: #{ret}"
- end
- end
- return true
- end
-
- # Checks (if subjectid is valid) if a policy exist and create default policy if not
- # @param [String] uri
- # @param [String] subjectid
- # @return [Boolean] true if policy checked/created successfully (or no uri/subjectid given), false else
- def self.check_policy(uri)
- return true unless uri and RestClientWrapper.subjectid
- unless OpenTox::Authorization.is_token_valid(RestClientWrapper.subjectid)
- $logger.error "OpenTox::Authorization.check_policy, subjectid NOT valid: #{RestClientWrapper.subjectid}"
- return false
- end
-
- if !uri_has_policy(uri)
- # if no policy exists, create a policy, return result of send policy
- send_policy(uri)
- else
- # if policy exists check for POST rights
- if authorize(uri, "POST")
- true
- else
- $logger.error "OpenTox::Authorization.check_policy, already exists, but no POST-authorization with subjectid: #{RestClientWrapper.subjectid}"
- false
- end
- end
- true
- end
-
- class << self
- alias :token_valid? :is_token_valid
- end
-
- # Check Authorization for a resource (identified via URI) with method and subjectid.
- # @param uri [String] URI
- # @param request_method [String] GET, POST, PUT, DELETE
- # @param subjectid [String]
- # @return [Boolean] true if access granted, else otherwise
- def self.authorized?(uri, request_method)
- return true unless $aa[:uri]
- request_method = request_method.to_sym if request_method
- if $aa[:free_request].include?(request_method)
- true
- elsif OpenTox::Authorization.free_uri?(uri, request_method)
- true
- elsif $aa[:authenticate_request].include?(request_method)
- ret = OpenTox::Authorization.is_token_valid(RestClientWrapper.subjectid)
- $logger.debug "authorized? >>#{ret}<< (token is in/valid), method: #{request_method}, URI: #{uri}, subjectid: #{RestClientWrapper.subjectid}" unless ret
- ret
- elsif OpenTox::Authorization.authorize_exception?(uri, request_method)
- ret = OpenTox::Authorization.is_token_valid(RestClientWrapper.subjectid)
- $logger.debug "authorized? >>#{ret}<< (uris is authorize exception, token is in/valid), method: #{request_method}, URI: #{uri}, subjectid: #{RestClientWrapper.subjectid}" unless ret
- ret
- elsif $aa[:authorize_request].include?(request_method)
- ret = OpenTox::Authorization.authorize(uri, request_method)
- $logger.debug "authorized? >>#{ret}<< (uri (not) authorized), method: #{request_method}, URI: #{uri}, subjectid: #{RestClientWrapper.subjectid}" unless ret
- ret
- else
- $logger.error "invalid request/uri method: #{request_method}, URI: #{uri}, subjectid: #{RestClientWrapper.subjectid}"
- false
- end
- end
-
- private
- # extend class methods
- class << self
- # methods: free_uri and authorize_exception
- # @return [Boolean] checks if uri-method pair is included in $aa[:free_uri] or $aa[:authorize_exception]
- [:free_uri, :authorize_exception].each do |method|
- define_method "#{method}?".to_sym do |uri, request_method|
- if $aa["#{method}s".to_sym]
- $aa["#{method}s".to_sym].each do |request_methods, uris|
- if request_methods and uris and request_methods.include?(request_method.to_sym)
- uris.each do |u|
- return true if u.match uri
- end
- end
- end
- end
- return false
- end
- end
- end
- end
-end
diff --git a/lib/compound.rb b/lib/compound.rb
index 4e29938..3ba1670 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -11,32 +11,6 @@ module OpenTox
class Compound
include OpenTox
- # OpenBabel FP4 fingerprints
- # OpenBabel http://open-babel.readthedocs.org/en/latest/Fingerprints/intro.html
- fp4 = FingerprintSmarts.all
- unless fp4
- fp4 = []
- File.open(File.join(File.dirname(__FILE__),"SMARTS_InteLigand.txt")).each do |l|
- l.strip!
- unless l.empty? or l.match /^#/
- name,smarts = l.split(': ')
- fp4 << OpenTox::FingerprintSmarts.find_or_create_by(:name => name, :smarts => smarts) unless smarts.nil?
- end
- end
- end
- FP4 = fp4
-
- # TODO investigate other types of fingerprints (MACCS)
- # OpenBabel http://open-babel.readthedocs.org/en/latest/Fingerprints/intro.html
- # http://www.dalkescientific.com/writings/diary/archive/2008/06/26/fingerprint_background.html
- # OpenBabel MNA http://openbabel.org/docs/dev/FileFormats/Multilevel_Neighborhoods_of_Atoms_(MNA).html#multilevel-neighborhoods-of-atoms-mna
- # Morgan ECFP, FCFP
- # http://cdk.github.io/cdk/1.5/docs/api/org/openscience/cdk/fingerprint/CircularFingerprinter.html
- # http://www.rdkit.org/docs/GettingStartedInPython.html
- # Chemfp
- # https://chemfp.readthedocs.org/en/latest/using-tools.html
- # CACTVS/PubChem
-
field :inchi, type: String
attr_readonly :inchi
field :smiles, type: String
@@ -48,21 +22,17 @@ module OpenTox
field :sdf_id, type: BSON::ObjectId
field :fp4, type: Array
field :fp4_size, type: Integer
- #belongs_to :dataset
- #belongs_to :data_entry
-
- #def == compound
- #self.inchi == compound.inchi
- #end
+ # Overwrites standard Mongoid method to create fingerprints before database insertion
def self.find_or_create_by params
compound = self.find_or_initialize_by params
- unless compound.fp4
+ unless compound.fp4 and !compound.fp4.empty?
compound.fp4_size = 0
compound.fp4 = []
- Algorithm::Descriptor.smarts_match(compound, FP4.collect{|f| f.smarts}).each_with_index do |m,i|
+ fingerprint = FingerprintSmarts.fingerprint
+ Algorithm::Descriptor.smarts_match(compound, fingerprint).each_with_index do |m,i|
if m > 0
- compound.fp4 << FP4[i].id
+ compound.fp4 << fingerprint[i].id
compound.fp4_size += 1
end
end
diff --git a/lib/dataset.rb b/lib/dataset.rb
index 509e897..0237adf 100644
--- a/lib/dataset.rb
+++ b/lib/dataset.rb
@@ -3,39 +3,19 @@ require 'tempfile'
module OpenTox
- class LazarPrediction < Dataset
- field :creator, type: String
- field :prediction_feature_id, type: String
-
- def prediction_feature
- Feature.find prediction_feature_id
- end
-
- end
-
- class DescriptorDataset < Dataset
- field :feature_calculation_algorithm, type: String
- end
-
- class FminerDataset < DescriptorDataset
- field :training_algorithm, type: String
- field :training_dataset_id, type: BSON::ObjectId
- field :training_feature_id, type: BSON::ObjectId
- field :training_parameters, type: Hash
- end
-
class Dataset
- include Mongoid::Document
attr_writer :data_entries
# associations like has_many, belongs_to deteriorate performance
field :feature_ids, type: Array, default: []
field :compound_ids, type: Array, default: []
- field :data_entries_id, type: BSON::ObjectId
+ field :data_entries_id, type: BSON::ObjectId, default: []
field :source, type: String
field :warnings, type: Array, default: []
+ # Save all data including data_entries
+ # Should be used instead of save
def save_all
dump = Marshal.dump(@data_entries)
file = Mongo::Grid::File.new(dump, :filename => "#{self.id.to_s}.data_entries")
@@ -46,74 +26,32 @@ module OpenTox
# Readers
+ # Get all compounds
def compounds
@compounds ||= self.compound_ids.collect{|id| OpenTox::Compound.find id}
@compounds
end
+ # Get all features
def features
@features ||= self.feature_ids.collect{|id| OpenTox::Feature.find(id)}
@features
end
- def fill_nil_with n
- (0 .. compound_ids.size-1).each do |i|
- @data_entries[i] ||= []
- (0 .. feature_ids.size-1).each do |j|
- @data_entries[i][j] ||= n
- end
- end
- end
-
- def [](row,col)
- @data_entries[row,col]
- end
-
- def []=(row,col,v)
- @data_entries ||= []
- @data_entries[row] ||= []
- @data_entries[row][col] = v
- end
-
- def correlation_plot training_dataset
- R.assign "features", data_entries
- R.assign "activities", training_dataset.data_entries.collect{|de| de.first}
- R.eval "featurePlot(features,activities)"
- end
-
- def density_plot
- R.assign "acts", data_entries.collect{|r| r.first }#.compact
- R.eval "plot(density(log(acts),na.rm= TRUE), main='log(#{features.first.name})')"
- # TODO kill Rserve plots
- end
-
- # merge dataset (i.e. append features)
- def +(dataset)
- bad_request_error "Dataset merge failed because the argument is not a OpenTox::Dataset but a #{dataset.class}" unless dataset.is_a? Dataset
- bad_request_error "Dataset merge failed because compounds are unequal in datasets #{self.id} and #{dataset.id}" unless compound_ids == dataset.compound_ids
- self.feature_ids ||= []
- self.feature_ids = self.feature_ids + dataset.feature_ids
- @data_entries ||= Array.new(compound_ids.size){[]}
- @data_entries.each_with_index do |row,i|
- @data_entries[i] = row + dataset.fingerprint(compounds[i])
- end
- self
-
- end
-
- def fingerprint(compound)
- i = compound_ids.index(compound.id)
- i.nil? ? nil : data_entries[i]
- end
-
+ # Get all data_entries
def data_entries
unless @data_entries
t = Time.now
- @data_entries = Marshal.load($gridfs.find_one(_id: data_entries_id).data)
- bad_request_error "Data entries (#{data_entries_id}) are not a 2D-Array" unless @data_entries.is_a? Array and @data_entries.first.is_a? Array
- bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries.size} rows, but dataset (#{id}) has #{compound_ids.size} compounds" unless @data_entries.size == compound_ids.size
- bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries..first.size} columns, but dataset (#{id}) has #{feature_ids.size} features" unless @data_entries.first.size == feature_ids.size
- $logger.debug "Retrieving data: #{Time.now-t}"
+ data_entry_file = $gridfs.find_one(_id: data_entries_id)
+ if data_entry_file.nil?
+ @data_entries = []
+ else
+ @data_entries = Marshal.load(data_entry_file.data)
+ bad_request_error "Data entries (#{data_entries_id}) are not a 2D-Array" unless @data_entries.is_a? Array and @data_entries.first.is_a? Array
+ bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries.size} rows, but dataset (#{id}) has #{compound_ids.size} compounds" unless @data_entries.size == compound_ids.size
+ bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries..first.size} columns, but dataset (#{id}) has #{feature_ids.size} features" unless @data_entries.first.size == feature_ids.size
+ $logger.debug "Retrieving data: #{Time.now-t}"
+ end
end
@data_entries
end
@@ -130,50 +68,21 @@ module OpenTox
# Writers
+ # Set compounds
def compounds=(compounds)
self.compound_ids = compounds.collect{|c| c.id}
end
- def add_compound compound
- self.compound_ids << compound.id
- end
-
+ # Set features
def features=(features)
self.feature_ids = features.collect{|f| f.id}
end
- def add_feature feature
- self.feature_ids << feature.id
- end
-
- def self.create compounds, features, warnings=[], source=nil
- dataset = Dataset.new(:warnings => warnings)
- dataset.compounds = compounds
- dataset.features = features
- dataset
- end
-
- # for prediction result datasets
- # assumes that there are feature_ids with title prediction and confidence
- # @return [Array] of Hashes with keys { :compound, :value ,:confidence } (compound value is object not uri)
- # TODO
- #def predictions
- #end
-
- # Serialisation
-
- # converts dataset to csv format including compound smiles as first column, other column headers are feature titles
- # @return [String]
- def to_csv(inchi=false)
- CSV.generate() do |csv| #{:force_quotes=>true}
- csv << [inchi ? "InChI" : "SMILES"] + features.collect{|f| f.title}
- compounds.each_with_index do |c,i|
- csv << [inchi ? c.inchi : c.smiles] + data_entries[i]
- end
- end
- end
+ # Dataset operations
- # split dataset into n folds
+ # Split a dataset into n folds
+ # @param [Integer] number of folds
+ # @return [Array] Array with folds [training_dataset,test_dataset]
def folds n
len = self.compound_ids.size
indices = (0..len-1).to_a.shuffle
@@ -199,9 +108,36 @@ module OpenTox
chunks
end
+ # Diagnostics
+
+ def correlation_plot training_dataset
+ # TODO: create/store svg
+ R.assign "features", data_entries
+ R.assign "activities", training_dataset.data_entries.collect{|de| de.first}
+ R.eval "featurePlot(features,activities)"
+ end
+
+ def density_plot
+ # TODO: create/store svg
+ R.assign "acts", data_entries.collect{|r| r.first }#.compact
+ R.eval "plot(density(log(acts),na.rm= TRUE), main='log(#{features.first.name})')"
+ end
+
+ # Serialisation
+
+ # converts dataset to csv format including compound smiles as first column, other column headers are feature titles
+ # @return [String]
+ def to_csv(inchi=false)
+ CSV.generate() do |csv| #{:force_quotes=>true}
+ csv << [inchi ? "InChI" : "SMILES"] + features.collect{|f| f.title}
+ compounds.each_with_index do |c,i|
+ csv << [inchi ? c.inchi : c.smiles] + data_entries[i]
+ end
+ end
+ end
+
- # Adding data methods
- # (Alternatively, you can directly change @data["feature_ids"] and @data["compounds"])
+ # Parsers
# Create a dataset from file (csv,sdf,...)
# @param filename [String]
@@ -210,6 +146,8 @@ module OpenTox
#def self.from_sdf_file
#end
+ # Create a dataset from CSV file
+ # TODO: document structure
def self.from_csv_file file, source=nil, bioassay=true
source ||= file
table = CSV.read file, :skip_blanks => true
@@ -222,8 +160,6 @@ module OpenTox
# does a lot of guesswork in order to determine feature types
def parse_table table, bioassay=true
- # TODO: remove empty entries + write tests
-
time = Time.now
# features
@@ -277,24 +213,21 @@ module OpenTox
table.each_with_index do |vals,i|
ct = Time.now
identifier = vals.shift
- #if vals.compact.empty?
- #warnings << "No values for compound at position #{i+2}, all entries are ignored."
- #@data_entries.pop
- #next
- #end
+ warnings << "No feature values for compound at position #{i+2}." if vals.compact.empty?
begin
+ # TODO parse inchi and catch openbabel errors (and segfaults) in compound.rb
case compound_format
when /SMILES/i
compound = OpenTox::Compound.from_smiles(identifier)
if compound.inchi.empty?
- warnings << "Cannot parse #{compound_format} compound '#{compound.strip}' at position #{i+2}, all entries are ignored."
+ warnings << "Cannot parse #{compound_format} compound '#{identifier}' at position #{i+2}, all entries are ignored."
next
end
when /InChI/i
compound = OpenTox::Compound.from_inchi(identifier)
end
rescue
- warnings << "Cannot parse #{compound_format} compound '#{compound}' at position #{i+2}, all entries are ignored."
+ warnings << "Cannot parse #{compound_format} compound '#{identifier}' at position #{i+2}, all entries are ignored."
next
end
compound_time += Time.now-ct
@@ -330,5 +263,71 @@ module OpenTox
$logger.debug "Saving: #{Time.now-time}"
end
+
+=begin
+ # TODO remove
+
+ # Create a dataset with compounds and features
+ def self.create compounds, features, warnings=[], source=nil
+ dataset = Dataset.new(:warnings => warnings)
+ dataset.compounds = compounds
+ dataset.features = features
+ dataset
+ end
+ # merge dataset (i.e. append features)
+ def +(dataset)
+ bad_request_error "Dataset merge failed because the argument is not a OpenTox::Dataset but a #{dataset.class}" unless dataset.is_a? Dataset
+ bad_request_error "Dataset merge failed because compounds are unequal in datasets #{self.id} and #{dataset.id}" unless compound_ids == dataset.compound_ids
+ self.feature_ids ||= []
+ self.feature_ids = self.feature_ids + dataset.feature_ids
+ @data_entries ||= Array.new(compound_ids.size){[]}
+ @data_entries.each_with_index do |row,i|
+ @data_entries[i] = row + dataset.fingerprint(compounds[i])
+ end
+ self
+
+ end
+
+ def fingerprint(compound)
+ i = compound_ids.index(compound.id)
+ i.nil? ? nil : data_entries[i]
+ end
+=end
+
+ private
+
+ def fill_nil_with n
+ (0 .. compound_ids.size-1).each do |i|
+ @data_entries[i] ||= []
+ (0 .. feature_ids.size-1).each do |j|
+ @data_entries[i][j] ||= n
+ end
+ end
+ end
+ end
+
+ # Dataset for lazar predictions
+ class LazarPrediction < Dataset
+ field :creator, type: String
+ field :prediction_feature_id, type: String
+
+ def prediction_feature
+ Feature.find prediction_feature_id
+ end
+
+ end
+
+ # Dataset for descriptors (physchem)
+ class DescriptorDataset < Dataset
+ field :feature_calculation_algorithm, type: String
end
+
+ # Dataset for fminer descriptors
+ class FminerDataset < DescriptorDataset
+ field :training_algorithm, type: String
+ field :training_dataset_id, type: BSON::ObjectId
+ field :training_feature_id, type: BSON::ObjectId
+ field :training_parameters, type: Hash
+ end
+
end
diff --git a/lib/descriptor.rb b/lib/descriptor.rb
new file mode 100644
index 0000000..68bc7a2
--- /dev/null
+++ b/lib/descriptor.rb
@@ -0,0 +1,253 @@
+require 'digest/md5'
+ENV["JAVA_HOME"] ||= "/usr/lib/jvm/java-7-openjdk"
+BABEL_3D_CACHE_DIR = File.join(File.dirname(__FILE__),"..",'/babel_3d_cache')
+# TODO store descriptors in mongodb
+
+module OpenTox
+
+ module Algorithm
+ class Descriptor
+ include OpenTox
+
+ JAVA_DIR = File.join(File.dirname(__FILE__),"..","java")
+ CDK_JAR = Dir[File.join(JAVA_DIR,"cdk-*jar")].last
+ JOELIB_JAR = File.join(JAVA_DIR,"joelib2.jar")
+ LOG4J_JAR = File.join(JAVA_DIR,"log4j.jar")
+ JMOL_JAR = File.join(JAVA_DIR,"Jmol.jar")
+
+ obexclude = ["cansmi","cansmiNS","formula","InChI","InChIKey","s","smarts","title"]
+ OBDESCRIPTORS = Hash[OpenBabel::OBDescriptor.list_as_string("descriptors").split("\n").collect do |d|
+ name,description = d.split(/\s+/,2)
+ ["Openbabel."+name,description] unless obexclude.include? name
+ end.compact.sort{|a,b| a[0] <=> b[0]}]
+
+ cdk_desc = YAML.load(`java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptorInfo`)
+ CDKDESCRIPTORS = Hash[cdk_desc.collect { |d| ["Cdk."+d[:java_class].split('.').last.sub(/Descriptor/,''), d[:description]] }.sort{|a,b| a[0] <=> b[0]}]
+ CDKDESCRIPTOR_VALUES = cdk_desc.collect { |d| prefix="Cdk."+d[:java_class].split('.').last.sub(/Descriptor/,''); d[:names].collect{ |name| prefix+"."+name } }.flatten
+
+ # exclude Hashcode (not a physchem property) and GlobalTopologicalChargeIndex (Joelib bug)
+ joelibexclude = ["MoleculeHashcode","GlobalTopologicalChargeIndex"]
+ # strip Joelib messages from stdout
+ JOELIBDESCRIPTORS = Hash[YAML.load(`java -classpath #{JOELIB_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptorInfo | sed '0,/---/d'`).collect do |d|
+ name = d[:java_class].sub(/^joelib2.feature.types./,'')
+ # impossible to obtain meaningful descriptions from JOELIb, see java/JoelibDescriptors.java
+ ["Joelib."+name, "no description available"] unless joelibexclude.include? name
+ end.compact.sort{|a,b| a[0] <=> b[0]}]
+
+ DESCRIPTORS = OBDESCRIPTORS.merge(CDKDESCRIPTORS.merge(JOELIBDESCRIPTORS))
+ DESCRIPTOR_VALUES = OBDESCRIPTORS.keys + CDKDESCRIPTOR_VALUES + JOELIBDESCRIPTORS.keys
+
+ require_relative "unique_descriptors.rb"
+
+ def self.description descriptor
+ lib = descriptor.split('.').first
+ case lib
+ when "Openbabel"
+ OBDESCRIPTORS[descriptor]
+ when "Cdk"
+ name = descriptor.split('.')[0..-2].join('.')
+ CDKDESCRIPTORS[name]
+ when "Joelib"
+ JOELIBDESCRIPTORS[descriptor]
+ when "lookup"
+ "Read feature values from a dataset"
+ end
+ end
+
+ def self.smarts_match compounds, smarts_features, count=false
+ bad_request_error "Compounds for smarts_match are empty" unless compounds
+ bad_request_error "Smarts features for smarts_match are empty" unless smarts_features
+ parse compounds
+ @count = count
+ obconversion = OpenBabel::OBConversion.new
+ obmol = OpenBabel::OBMol.new
+ obconversion.set_in_format('inchi')
+ smarts_pattern = OpenBabel::OBSmartsPattern.new
+ smarts_features = [smarts_features] if smarts_features.is_a?(Feature)
+ @smarts = smarts_features.collect{|f| f.smarts}
+ @physchem_descriptors = nil
+ @data_entries = Array.new(@compounds.size){Array.new(@smarts.size,false)}
+ @compounds.each_with_index do |compound,c|
+ # TODO OpenBabel may segfault here
+ # catch inchi errors in compound.rb
+ # eg. at line 249 of rat_feature_dataset
+ # which worked with opentox-client
+ # (but no smarts_match)
+ p "'#{compound.inchi}'"
+ obconversion.read_string(obmol,compound.inchi)
+ @smarts.each_with_index do |smart,s|
+ smarts_pattern.init(smart)
+ if smarts_pattern.match(obmol)
+ count ? value = smarts_pattern.get_map_list.to_a.size : value = 1
+ else
+ value = 0
+ end
+ @data_entries[c][s] = value
+ end
+ end
+ serialize
+ end
+
+ def self.smarts_count compounds, smarts
+ smarts_match compounds,smarts,true
+ end
+
+ def self.serialize
+ case @input_class
+ when "OpenTox::Compound"
+ if @with_names and @physchem_descriptors
+ [@physchem_descriptors,@data_entries.first]
+ else
+ @data_entries.first
+ end
+ when "Array"
+ if @with_names and @physchem_descriptors
+ [@physchem_descriptors,@data_entries.first]
+ else
+ @data_entries
+ end
+ when "OpenTox::Dataset"
+ dataset = OpenTox::DescriptorDataset.new(:compound_ids => @compounds.collect{|c| c.id})
+ if @smarts
+ dataset.feature_ids = @smarts.collect{|smart| Smarts.find_or_create_by(:smarts => smart).id}
+ @count ? algo = "count" : algo = "match"
+ dataset.feature_calculation_algorithm = "#{self}.smarts_#{algo}"
+
+ elsif @physchem_descriptors
+ dataset.feature_ids = @physchem_descriptors.collect{|d| PhysChemDescriptor.find_or_create_by(:name => d, :creator => __FILE__).id}
+ dataset.data_entries = @data_entries
+ dataset.feature_calculation_algorithm = "#{self}.physchem"
+ #TODO params?
+ end
+ dataset.save_all
+ dataset
+ end
+ end
+
+ def self.physchem compounds, descriptors=UNIQUEDESCRIPTORS, with_names=false
+ parse compounds
+ @data_entries = Array.new(@compounds.size){[]}
+ @descriptors = descriptors
+ @smarts = nil
+ @physchem_descriptors = [] # CDK may return more than one result per descriptor, they are stored as separate features
+ @with_names = with_names
+ des = {}
+ @descriptors.each do |d|
+ lib, descriptor = d.split(".",2)
+ lib = lib.downcase.to_sym
+ des[lib] ||= []
+ des[lib] << descriptor
+ end
+ des.each do |lib,descriptors|
+ send(lib, descriptors)
+ end
+ serialize
+ end
+
+ def self.openbabel descriptors
+ $logger.debug "compute #{descriptors.size} openbabel descriptors for #{@compounds.size} compounds"
+ obdescriptors = descriptors.collect{|d| OpenBabel::OBDescriptor.find_type d}
+ obmol = OpenBabel::OBMol.new
+ obconversion = OpenBabel::OBConversion.new
+ obconversion.set_in_format 'inchi'
+ last_feature_idx = @physchem_descriptors.size
+ @compounds.each_with_index do |compound,c|
+ obconversion.read_string obmol, compound.inchi
+ obdescriptors.each_with_index do |descriptor,d|
+ @data_entries[c][d+last_feature_idx] = fix_value(descriptor.predict(obmol))
+ end
+ end
+ @physchem_descriptors += descriptors.collect{|d| "Openbabel.#{d}"}
+ end
+
+ def self.java_descriptors descriptors, lib
+ $logger.debug "compute #{descriptors.size} cdk descriptors for #{@compounds.size} compounds"
+ sdf = sdf_3d
+ # use java system call (rjb blocks within tasks)
+ # use Tempfiles to avoid "Argument list too long" error
+ case lib
+ when "cdk"
+ run_cmd "java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors #{sdf} #{descriptors.join(" ")}"
+ when "joelib"
+ run_cmd "java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{sdf} #{descriptors.join(' ')}"
+ end
+ last_feature_idx = @physchem_descriptors.size
+ YAML.load_file("#{sdf}#{lib}.yaml").each_with_index do |calculation,i|
+ $logger.error "Descriptor calculation failed for compound #{compounds[i].inchi}." if calculation.empty?
+ # CDK Descriptors may calculate multiple values, they are stored in separate features
+ @physchem_descriptors += calculation.keys if i == 0
+ calculation.keys.each_with_index do |name,j|
+ @data_entries[i][j+last_feature_idx] = fix_value(calculation[name])
+ end
+ end
+ FileUtils.rm "#{sdf}#{lib}.yaml"
+ end
+
+ def self.cdk descriptors
+ java_descriptors descriptors, "cdk"
+ end
+
+ def self.joelib descriptors
+ java_descriptors descriptors, "joelib"
+ end
+
+ def self.lookup compounds, features, dataset
+ parse compounds
+ fingerprint = []
+ compounds.each do |compound|
+ fingerprint << []
+ features.each do |feature|
+ end
+ end
+ end
+
+ def self.run_cmd cmd
+ cmd = "#{cmd} 2>&1"
+ $logger.debug "running external cmd: '#{cmd}'"
+ p = IO.popen(cmd) do |io|
+ while line = io.gets
+ $logger.debug "> #{line.chomp}"
+ end
+ io.close
+ raise "external cmd failed '#{cmd}' (see log file for error msg)" unless $?.to_i == 0
+ end
+ end
+
+ def self.sdf_3d
+ # TODO check if 3d sdfs are stored in GridFS
+ sdf = ""
+ @compounds.each do |compound|
+ sdf << compound.sdf
+ end
+ sdf_file = "/tmp/#{SecureRandom.uuid}.sdf"
+ File.open(sdf_file,"w+"){|f| f.print sdf}
+ sdf_file
+ end
+
+ def self.parse compounds
+ @input_class = compounds.class.to_s
+ case @input_class
+ when "OpenTox::Compound"
+ @compounds = [compounds]
+ when "Array"
+ @compounds = compounds
+ when "OpenTox::Dataset"
+ @compounds = compounds.compounds
+ else
+ bad_request_error "Cannot calculate descriptors for #{compounds.class} objects."
+ end
+ end
+
+ def self.fix_value val
+ val = val.first if val.is_a? Array and val.size == 1
+ val = nil if val == "NaN"
+ if val.numeric?
+ val = Float(val)
+ val = nil if val.nan? or val.infinite?
+ end
+ val
+ end
+ private_class_method :sdf_3d, :fix_value, :parse, :run_cmd, :serialize
+ end
+ end
+end
diff --git a/lib/error.rb b/lib/error.rb
index 12e22ff..8fe8a1e 100644
--- a/lib/error.rb
+++ b/lib/error.rb
@@ -1,36 +1,19 @@
-require 'open4'
-
-# add additional fields to Exception class to format errors according to OT-API
module OpenToxError
- attr_accessor :http_code, :uri, :error_cause, :metadata
- def initialize(message=nil, uri=nil, cause=nil)
+ attr_accessor :http_code, :message, :cause
+ def initialize message=nil
message = message.to_s.gsub(/\A"|"\Z/, '') if message # remove quotes
- @error_cause = cause ? OpenToxError::cut_backtrace(cause) : short_backtrace
-
super message
- @uri = uri.to_s.sub(%r{//.*:.*@},'//') # remove credentials from uri
- @http_code ||= 500
- @metadata = {
- :type => "ErrorReport",
- :actor => @uri,
- :message => message.to_s,
- :statusCode => @http_code,
- :errorCode => self.class.to_s,
- :errorCause => @error_cause,
- }
- $logger.error("\n"+JSON.pretty_generate(@metadata))
- end
-
-=begin
- # this method defines what is used for to_yaml (override to skip large @rdf graph)
- def encode_with coder
- @rdf.each do |statement|
- coder[statement.predicate.fragment.to_s] = statement.object.to_s
- end
+ @http_code ||= 500
+ @message = message.to_s
+ @cause = cut_backtrace(caller)
+ $logger.error("\n"+JSON.pretty_generate({
+ :http_code => @http_code,
+ :message => @message,
+ :cause => @cause
+ }))
end
-=end
- def self.cut_backtrace(trace)
+ def cut_backtrace(trace)
if trace.is_a?(Array)
cut_index = trace.find_index{|line| line.match(/sinatra|minitest/)}
cut_index ||= trace.size
@@ -41,34 +24,6 @@ module OpenToxError
trace
end
end
-
- def short_backtrace
- backtrace = caller.collect{|line| line unless line =~ /#{File.dirname(__FILE__)}/}.compact
- OpenToxError::cut_backtrace(backtrace)
- end
-
-=begin
- RDF_FORMATS.each do |format|
- # rdf serialization methods for all formats e.g. to_rdfxml
- send :define_method, "to_#{format}".to_sym do
- RDF::Writer.for(format).buffer do |writer|
- @rdf.each{|statement| writer << statement} if @rdf
- end
- end
- end
-
- def to_turtle # redefine to use prefixes (not supported by RDF::Writer)
- prefixes = {:rdf => "http://www.w3.org/1999/02/22-rdf-syntax-ns#"}
- ['OT', 'DC', 'XSD', 'OLO'].each{|p| prefixes[p.downcase.to_sym] = eval("RDF::#{p}.to_s") }
- RDF::Turtle::Writer.for(:turtle).buffer(:prefixes => prefixes) do |writer|
- @rdf.each{|statement| writer << statement} if @rdf
- end
- end
-=end
-
- def to_json
- @metadata.to_json
- end
end
@@ -76,7 +31,7 @@ class RuntimeError
include OpenToxError
end
-# clutters log file with library errors
+# clutters log file with library errors
#class NoMethodError
#include OpenToxError
#end
@@ -86,9 +41,9 @@ module OpenTox
class Error < RuntimeError
include OpenToxError
- def initialize(code, message=nil, uri=nil, cause=nil)
+ def initialize(code, message=nil)
@http_code = code
- super message, uri, cause
+ super message
end
end
@@ -96,15 +51,15 @@ module OpenTox
RestClientWrapper.known_errors.each do |error|
# create error classes
c = Class.new Error do
- define_method :initialize do |message=nil, uri=nil, cause=nil|
- super error[:code], message, uri, cause
+ define_method :initialize do |message=nil|
+ super error[:code], message
end
end
OpenTox.const_set error[:class],c
# define global methods for raising errors, eg. bad_request_error
Object.send(:define_method, error[:method]) do |message,uri=nil,cause=nil|
- raise c.new(message, uri, cause)
+ raise c.new(message)
end
end
diff --git a/lib/feature.rb b/lib/feature.rb
index 005d78f..9deb199 100644
--- a/lib/feature.rb
+++ b/lib/feature.rb
@@ -1,17 +1,16 @@
module OpenTox
+ # Basic feature class
class Feature
field :name, as: :title, type: String
field :nominal, type: Boolean
field :numeric, type: Boolean
field :measured, type: Boolean
- field :calculated, type: Boolean
- field :supervised, type: Boolean
- field :source, as: :title, type: String
- #belongs_to :dataset
end
+ # Feature for categorical variables
class NominalFeature < Feature
+ # TODO check if accept_values are still needed
field :accept_values, type: Array
def initialize params
super params
@@ -19,6 +18,7 @@ module OpenTox
end
end
+ # Feature for quantitative variables
class NumericFeature < Feature
def initialize params
super params
@@ -26,43 +26,69 @@ module OpenTox
end
end
+ # Feature for SMARTS fragments
class Smarts < NominalFeature
field :smarts, type: String
- #field :name, as: :smarts, type: String # causes warnings
- field :algorithm, type: String, default: "OpenTox::Algorithm::Descriptors.smarts_match"
- field :parameters, type: Hash, default: {:count => false}
- def initialize params
- super params
- nominal = true
- end
end
+ # Feature for supervised fragments from Fminer algorithm
class FminerSmarts < Smarts
- field :pValue, type: Float
+ field :p_value, type: Float
+ # TODO check if effect is used
field :effect, type: String
field :dataset_id
- def initialize params
- super params
- supervised = true
- end
end
+ # Feature for database fingerprints
+ # needs count for efficient retrieval (see compound.rb)
class FingerprintSmarts < Smarts
field :count, type: Integer
+ def self.fingerprint
+ @@fp4 ||= OpenTox::FingerprintSmarts.all
+ unless @@fp4.size == 306
+ @@fp4 = []
+ # OpenBabel FP4 fingerprints
+ # OpenBabel http://open-babel.readthedocs.org/en/latest/Fingerprints/intro.html
+ # TODO investigate other types of fingerprints (MACCS)
+ # OpenBabel http://open-babel.readthedocs.org/en/latest/Fingerprints/intro.html
+ # http://www.dalkescientific.com/writings/diary/archive/2008/06/26/fingerprint_background.html
+ # OpenBabel MNA http://openbabel.org/docs/dev/FileFormats/Multilevel_Neighborhoods_of_Atoms_(MNA).html#multilevel-neighborhoods-of-atoms-mna
+ # Morgan ECFP, FCFP
+ # http://cdk.github.io/cdk/1.5/docs/api/org/openscience/cdk/fingerprint/CircularFingerprinter.html
+ # http://www.rdkit.org/docs/GettingStartedInPython.html
+ # Chemfp
+ # https://chemfp.readthedocs.org/en/latest/using-tools.html
+ # CACTVS/PubChem
+
+ File.open(File.join(File.dirname(__FILE__),"SMARTS_InteLigand.txt")).each do |l|
+ l.strip!
+ unless l.empty? or l.match /^#/
+ name,smarts = l.split(': ')
+ @@fp4 << OpenTox::FingerprintSmarts.find_or_create_by(:name => name, :smarts => smarts) unless smarts.nil?
+ end
+ end
+ end
+ @@fp4
+ end
end
+ # Feature for physico-chemical descriptors
+ class PhysChemDescriptor < NumericFeature
+ field :algorithm, type: String, default: "OpenTox::Algorithm::Descriptor.physchem"
+ field :parameters, type: Hash
+ field :creator, type: String
+ end
+
+ # Feature for categorical bioassay results
class NominalBioAssay < NominalFeature
+ # TODO: needed? move to dataset?
field :description, type: String
end
+ # Feature for quantitative bioassay results
class NumericBioAssay < NumericFeature
+ # TODO: needed? move to dataset?
field :description, type: String
end
- class PhysChemDescriptor < NumericFeature
- field :algorithm, type: String, default: "OpenTox::Algorithm::Descriptor.physchem"
- field :parameters, type: Hash
- field :creator, type: String
- end
-
end
diff --git a/lib/format-conversion.rb b/lib/format-conversion.rb
deleted file mode 100644
index 7563b94..0000000
--- a/lib/format-conversion.rb
+++ /dev/null
@@ -1,406 +0,0 @@
-# defaults to stderr, may be changed to file output (e.g in opentox-service)
-$logger = OTLogger.new(STDERR)
-$logger.level = Logger::DEBUG
-
-module OpenTox
-
- # Ruby interface
-
- attr_accessor :data
-
- # Create a new OpenTox object
- # @param uri [optional,String] URI
- # @return [OpenTox] OpenTox object
- def initialize uri=nil
- @data = {}
- if uri
- @data[:uri] = uri.to_s.chomp
- get
- else
- @data[:uuid] = SecureRandom.uuid
- @data[:uri] = File.join(service_uri, @data[:uuid])
- end
- end
-
- # Object metadata (lazy loading)
- # @return [Hash] Object metadata
- def metadata force_update=false
- get #if (@metadata.nil? or @metadata.empty? or force_update) and URI.accessible? @uri
- @data
- end
-
- # Metadata values
- # @param predicate [String] Predicate URI
- # @return [Array, String] Predicate value(s)
- def [](predicate)
- predicate = predicate.to_s
- return nil if metadata[predicate].nil?
- metadata[predicate].size == 1 ? metadata[predicate].first : metadata[predicate]
- end
-
- # Set a metadata entry
- # @param predicate [String] Predicate URI
- # @param values [Array, String] Predicate value(s)
- def []=(predicate,values)
- predicate = predicate.to_s
- @data[predicate] = [values].flatten
- end
-
-=begin
- # Object parameters (lazy loading)
- # {http://opentox.org/dev/apis/api-1.2/interfaces OpenTox API}
- # @return [Hash] Object parameters
- def parameters force_update=false
- if (@parameters.empty? or force_update) and URI.accessible? @uri
- get #if @rdf.empty? or force_update
- params = {}
- query = RDF::Query.new({
- :parameter => {
- RDF.type => RDF::OT.Parameter,
- :property => :value,
- }
- })
- query.execute(@rdf).each do |solution|
- params[solution.parameter] = {} unless params[solution.parameter]
- params[solution.parameter][solution.property] = solution.value
- end
- @parameters = params.values
- end
- @parameters
- end
-
- # Parameter value
- # @param [String] title
- # @return [String] value
- def parameter_value title
- @parameters.collect{|p| p[RDF::OT.paramValue] if p[RDF::DC.title] == title}.compact.first
- end
-=end
-
- # Get object from webservice
- # @param [String,optional] mime_type
- def get mime_type="application/json"
- bad_request_error "Mime type #{mime_type} is not supported. Please use 'application/json' (default), 'text/plain' (ntriples) or mime_type == 'application/rdf+xml'." unless mime_type == "application/json" or mime_type == "text/plain" or mime_type == "application/rdf+xml"
- p @data[:uri]
- response = RestClientWrapper.get(@data[:uri],{},{:accept => mime_type})
- if URI.task?(response)
- uri = wait_for_task response
- response = RestClientWrapper.get(uri,{},{:accept => mime_type})
- p uri
- end
- case mime_type
- when 'application/json'
- p response
- @data = JSON.parse(response) if response
- when "text/plain"
- parse_ntriples response
- when "application/rdf+xml"
- parse_rdfxml response
- end
- end
-
-=begin
- # Post object to webservice (append to object), rarely useful and deprecated
- # @deprecated
- def post wait=true, mime_type="text/plain"
- bad_request_error "Mime type #{mime_type} is not supported. Please use 'text/plain' (default) or 'application/rdf+xml'." unless mime_type == "text/plain" or mime_type == "application/rdf+xml"
- case mime_type
- when 'text/plain'
- body = self.to_ntriples
- when 'application/rdf+xml'
- body = self.to_rdfxml
- end
- #Authorization.check_policy(@uri) if $aa[:uri]
- uri = RestClientWrapper.post @uri.to_s, body, { :content_type => mime_type}
- wait ? wait_for_task(uri) : uri
- end
-=end
-
- # Save object at webservice (replace or create object)
- def put wait=true, mime_type="application/json"
- bad_request_error "Mime type #{mime_type} is not supported. Please use 'application/json' (default)." unless mime_type == "application/json" or mime_type == "text/plain" or mime_type == "application/rdf+xml"
- @data[:created_at] = DateTime.now unless URI.accessible? @data[:uri]
- #@metadata[RDF::DC.modified] = DateTime.now
- @data[:uri] ? @data[:uri] = uri.to_s.chomp : @data[:uri] = File.join(service_uri, SecureRandom.uuid)
- case mime_type
- when 'text/plain'
- body = self.to_ntriples
- when 'application/rdf+xml'
- body = self.to_rdfxml
- when 'application/json'
- body = self.to_json
- end
- uri = RestClientWrapper.put @data[:uri], body, { :content_type => mime_type}
- wait ? wait_for_task(uri) : uri
- end
-
- # Delete object at webservice
- def delete
- RestClientWrapper.delete(@data[:uri])
- #Authorization.delete_policies_from_uri(@data[:uri]) if $aa[:uri]
- end
-
- def service_uri
- self.class.service_uri
- end
-
- def create_rdf
- #$logger.debug "#{eval("RDF::OT."+self.class.to_s.split('::').last)}\n"
- @rdf = RDF::Graph.new
- # DG: since model is no self.class anymore
- @metadata[RDF.type] ||= (eval("RDF::OT."+self.class.to_s.split('::').last) =~ /Lazar|Generic/) ? RDF::URI.new(RDF::OT.Model) : RDF::URI.new(eval("RDF::OT."+self.class.to_s.split('::').last))
- #@metadata[RDF.type] ||= RDF::URI.new(eval("RDF::OT."+self.class.to_s.split('::').last))
- @metadata[RDF::DC.date] ||= DateTime.now
- # DG: uri in object should be in brackets, otherwise query for uri-list ignores the object.
- # see: http://www.w3.org/TR/rdf-testcases/#sec-uri-encoding
- @metadata.each do |predicate,values|
- [values].flatten.each{ |value| @rdf << [RDF::URI.new(@data[:uri]), predicate, (URI.valid?(value) ? RDF::URI.new(value) : value)] unless value.nil? }
- end
- @parameters.each do |parameter|
- p_node = RDF::Node.new
- @rdf << [RDF::URI.new(@data[:uri]), RDF::OT.parameters, p_node]
- @rdf << [p_node, RDF.type, RDF::OT.Parameter]
- parameter.each { |k,v| @rdf << [p_node, k, v] unless v.nil?}
- end
- end
-
- # as defined in opentox-client.rb
- RDF_FORMATS.each do |format|
-
- # rdf parse methods for all formats e.g. parse_rdfxml
- send :define_method, "parse_#{format}".to_sym do |rdf|
- @rdf = RDF::Graph.new
- RDF::Reader.for(format).new(rdf) do |reader|
- reader.each_statement{ |statement| @rdf << statement }
- end
- # return values as plain strings instead of RDF objects
- @metadata = @rdf.to_hash[RDF::URI.new(@data[:uri])].inject({}) { |h, (predicate, values)| h[predicate] = values.collect{|v| v.to_s}; h }
- end
-
-=begin
- # rdf serialization methods for all formats e.g. to_rdfxml
- send :define_method, "to_#{format}".to_sym do
- create_rdf
- # if encoding is used iteration is necessary
- # see: http://rubydoc.info/github/ruby-rdf/rdf/RDF/NTriples/Writer
- RDF::Writer.for(format).buffer(:encoding => Encoding::ASCII) do |writer|
- @rdf.each_statement do |statement|
- writer << statement
- end
- end
- end
-=end
- end
-
- # @return [String] converts object to turtle-string
- def to_turtle # redefined to use prefixes (not supported by RDF::Writer)
- prefixes = {:rdf => "http://www.w3.org/1999/02/22-rdf-syntax-ns#"}
- ['OT', 'DC', 'XSD', 'OLO'].each{|p| prefixes[p.downcase.to_sym] = eval("RDF::#{p}.to_s") }
- create_rdf
- RDF::Turtle::Writer.for(:turtle).buffer(:prefixes => prefixes) do |writer|
- writer << @rdf
- end
- end
-
- def to_json
- @data.to_json
- end
-
- # @return [String] converts OpenTox object into html document (by first converting it to a string)
- def to_html
- to_turtle.to_html
- end
-
- # short access for metadata keys title, description and type
- [ :title , :description , :type , :uri, :uuid ].each do |method|
- send :define_method, method do
- self.data[method]
- end
- send :define_method, "#{method}=" do |value|
- self.data[method] = value
- end
- end
-
- # define class methods within module
- def self.included(base)
- base.extend(ClassMethods)
- end
-
- module ClassMethods
- def service_uri
- service = self.to_s.split('::')[1].downcase
- eval("$#{service}[:uri]")
- rescue
- bad_request_error "$#{service}[:uri] variable not set. Please set $#{service}[:uri] or use an explicit uri as first constructor argument "
- end
- def subjectid
- RestClientWrapper.subjectid
- end
- def subjectid=(subjectid)
- RestClientWrapper.subjectid = subjectid
- end
- end
-
- # create default OpenTox classes with class methods
- # (defined in opentox-client.rb)
- CLASSES.each do |klass|
- c = Class.new do
- include OpenTox
-
- def self.all
- uris = RestClientWrapper.get(service_uri, {},{:accept => 'text/uri-list'}).split("\n").compact
- uris.collect{|uri| self.new(uri)}
- end
-
- #@example fetching a model
- # OpenTox::Model.find(<model-uri>) -> model-object
- def self.find uri
- URI.accessible?(uri) ? self.new(uri) : nil
- end
-
- def self.create metadata
- object = self.new
- object.data = metadata
- object.put
- object
- end
-
- def self.find_or_create metadata
- uris = RestClientWrapper.get(service_uri,{:query => @data},{:accept => "text/uri-list"}).split("\n")
- uris.empty? ? self.create(@data) : self.new(uris.first)
- end
- end
- OpenTox.const_set klass,c
- end
-
-end
-
-# from overwrite.rb
-class String
-
- # encloses URI in text with with link tag
- # @return [String] new text with marked links
- def link_urls
- self.gsub(/(?i)http(s?):\/\/[^\r\n\s']*/, '<a href="\0">\0</a>')
- end
-
- # produces a html page for making web services browser friendly
- # format of text (=string params) is preserved (e.g. line breaks)
- # urls are marked as links
- #
- # @param related_links [optional,String] uri on related resources
- # @param description [optional,String] general info
- # @param png_image [optional,String] imagename
- # @return [String] html page
- def to_html(related_links=nil, description=nil, png_image=nil )
-
- # TODO add title as parameter
- title = nil #$sinatra.to($sinatra.request.env['PATH_INFO'], :full) if $sinatra
- html = "<html><body>"
- html << "<title>"+title+"</title>" if title
- #html += "<img src=\""+OT_LOGO+"\"><\/img><body>"
-
- html << "<h3>Description</h3><pre><p>"+description.link_urls+"</p></pre>" if description
- html << "<h3>Related links</h3><pre><p>"+related_links.link_urls+"</p></pre>" if related_links
- html << "<h3>Content</h3>" if description || related_links
- html << "<pre><p style=\"padding:15px; border:10px solid \#C5C1E4\">"
- html << "<img src=\"data:image/png;base64,#{Base64.encode64(png_image)}\">\n" if png_image
- html << self.link_urls
- html << "</p></pre></body></html>"
- html
- end
-
- def uri?
- URI.valid?(self)
- end
-end
-
-module Kernel
-
-=begin
- # overwrite backtick operator to catch system errors
- # Override raises an error if _cmd_ returns a non-zero exit status. CH: I do not understand this comment
- # Returns stdout if _cmd_ succeeds. Note that these are simply concatenated; STDERR is not inline. CH: I do not understand this comment
- def ` cmd
- stdout, stderr = ''
- status = Open4::popen4(cmd) do |pid, stdin_stream, stdout_stream, stderr_stream|
- stdout = stdout_stream.read
- stderr = stderr_stream.read
- end
- internal_server_error "`" + cmd + "` failed.\n" + stdout + stderr unless status.success?
- return stdout
- rescue
- internal_server_error $!.message
- end
-=end
-
- # @return [String] uri of task result, if task fails, an error according to task is raised
- def wait_for_task uri
- if URI.task?(uri)
- t = OpenTox::Task.new uri
- t.wait
- unless t.completed?
- error = OpenTox::RestClientWrapper.known_errors.select{|error| error[:code] == t.code}.first
- error_method = error ? error[:method] : :internal_server_error
- report = t.error_report
- error_message = report ? report[:message] : $!.message
- error_cause = report ? report[:errorCause] : nil
- Object.send(error_method,error_message,t.uri,error_cause)
- end
- uri = t.resultURI
- end
- uri
- end
-
-
-end
-module URI
-
- def self.compound? uri
- uri =~ /compound/ and URI.valid? uri
- end
-
- def self.task? uri
- uri =~ /task/ and URI.valid? uri
- end
-
- def self.dataset? uri
- uri =~ /dataset/ and URI.accessible? uri
- end
-
- def self.model? uri
- uri =~ /model/ and URI.accessible? uri
- end
-
- def self.ssl? uri
- URI.parse(uri).instance_of? URI::HTTPS
- end
-
- # @return [Boolean] checks if resource exists by making a HEAD-request
- def self.accessible?(uri)
- parsed_uri = URI.parse(uri + (OpenTox::RestClientWrapper.subjectid ? "?subjectid=#{CGI.escape OpenTox::RestClientWrapper.subjectid}" : ""))
- http_code = URI.task?(uri) ? 600 : 400
- http = Net::HTTP.new(parsed_uri.host, parsed_uri.port)
- unless (URI.ssl? uri) == true
- http = Net::HTTP.new(parsed_uri.host, parsed_uri.port)
- request = Net::HTTP::Head.new(parsed_uri.request_uri)
- http.request(request).code.to_i < http_code
- else
- http = Net::HTTP.new(parsed_uri.host, parsed_uri.port)
- http.use_ssl = true
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE
- request = Net::HTTP::Head.new(parsed_uri.request_uri)
- http.request(request).code.to_i < http_code
- end
- rescue
- false
- end
-
- def self.valid? uri
- u = URI.parse(uri)
- u.scheme!=nil and u.host!=nil
- rescue URI::InvalidURIError
- false
- end
-
-end
diff --git a/lib/lazar.rb b/lib/lazar.rb
new file mode 100644
index 0000000..8831ba2
--- /dev/null
+++ b/lib/lazar.rb
@@ -0,0 +1,46 @@
+require 'rubygems'
+require "bundler/setup"
+require "rest-client"
+require 'yaml'
+require 'json'
+require 'logger'
+require 'mongoid'
+require 'rserve'
+
+# Mongo setup
+# TODO retrieve correct environment from Rack/Sinatra
+ENV["MONGOID_ENV"] ||= "development"
+# TODO remove config files, change default via ENV or directly in Mongoid class
+Mongoid.load!("#{ENV['HOME']}/.opentox/config/mongoid.yml")
+# TODO get Mongo::Client from Mongoid
+$mongo = Mongo::Client.new('mongodb://127.0.0.1:27017/opentox')
+# TODO same for GridFS
+$gridfs = $mongo.database.fs
+
+# R setup
+R = Rserve::Connection.new
+
+# Logger setup
+$logger = Logger.new STDOUT # STDERR did not work on my development machine (CH)
+$logger.level = Logger::DEBUG
+Mongo::Logger.logger = $logger
+Mongo::Logger.level = Logger::WARN
+#Mongoid.logger = $logger
+
+# OpenTox classes and includes
+CLASSES = ["Feature","Compound", "Dataset", "Validation", "CrossValidation"]# Algorithm and Models are modules
+
+[ # be aware of the require sequence as it affects class/method overwrites
+ "overwrite.rb",
+ "rest-client-wrapper.rb",
+ "error.rb",
+ "opentox.rb",
+ "feature.rb",
+ "compound.rb",
+ "dataset.rb",
+ "descriptor.rb",
+ #"algorithm.rb",
+ #"model.rb",
+ #"validation.rb"
+].each{ |f| require_relative f }
+
diff --git a/lib/model.rb b/lib/model.rb
deleted file mode 100644
index 2b90a46..0000000
--- a/lib/model.rb
+++ /dev/null
@@ -1,56 +0,0 @@
-module OpenTox
-
- module Model
-
- def feature_type
- unless @feature_type
- bad_request_error "Cannot determine feature type, dependent variable missing in model #{@uri}" unless metadata[RDF::OT.dependentVariables]
- @feature_type = OpenTox::Feature.new( metadata[RDF::OT.dependentVariables][0]).feature_type
- end
- @feature_type
- end
-
- def predicted_variable
- load_predicted_variables unless defined? @predicted_variable
- @predicted_variable
- end
-
- def predicted_confidence
- load_predicted_variables unless defined? @predicted_confidence
- @predicted_confidence
- end
-
- private
- def load_predicted_variables
- metadata[RDF::OT.predictedVariables].each do |f|
- feat = OpenTox::Feature.new( f)
- if feat.title =~ /confidence/
- @predicted_confidence = f
- else
- @predicted_variable = f unless @predicted_variable
- end
- end
- end
-
- class Generic
- include OpenTox
- include OpenTox::Algorithm
- include Model
-
- def self.find uri
- URI.accessible?(uri) ? self.new(uri) : nil
- end
-
- def predict params
- run params
- end
- end
-
- class Lazar < Generic
-
- def self.create params
- Lazar.new(File.join($algorithm[:uri], "lazar")).run params
- end
- end
- end
-end
diff --git a/lib/opentox-client.rb b/lib/opentox-client.rb
deleted file mode 100644
index e1e27c9..0000000
--- a/lib/opentox-client.rb
+++ /dev/null
@@ -1,52 +0,0 @@
-require 'rubygems'
-require "bundler/setup"
-require "rest-client"
-require 'yaml'
-require 'json'
-require 'logger'
-require 'mongoid'
-require 'rserve'
-
-# TODO store development/test, validation, production in separate databases
-ENV["MONGOID_ENV"] ||= "development"
-Mongoid.load!("#{ENV['HOME']}/.opentox/config/mongoid.yml")
-R = Rserve::Connection.new
-
-CLASSES = ["Feature","Compound", "Dataset", "Validation", "CrossValidation"]#, "Task", "Investigation"]
-#CLASSES = ["Feature", "Dataset", "Validation", "Task", "Investigation"]
-
-# Regular expressions for parsing classification data
-#TRUE_REGEXP = /^(true|active|1|1.0|tox|activating|carcinogen|mutagenic)$/i
-#FALSE_REGEXP = /^(false|inactive|0|0.0|low tox|deactivating|non-carcinogen|non-mutagenic)$/i
-
-[
- "overwrite.rb",
- "rest-client-wrapper.rb",
- "error.rb",
- #"authorization.rb",
- #"policy.rb",
- #"otlogger.rb",
- "opentox.rb",
- #"task.rb",
- "feature.rb",
- "compound.rb",
- #"data_entry.rb",
- "dataset.rb",
- #"algorithm.rb",
- #"model.rb",
- #"validation.rb"
-].each{ |f| require_relative f }
-
-#if defined?($aa) and $aa[:uri]
-# OpenTox::Authorization.authenticate($aa[:user],$aa[:password])
-# unauthorized_error "Failed to authenticate user \"#{$aa[:user]}\"." unless OpenTox::Authorization.is_token_valid(OpenTox::RestClientWrapper.subjectid)
-#end
-
-# defaults to stderr, may be changed to file output (e.g in opentox-service)
-$logger = Logger.new STDOUT #OTLogger.new(STDOUT) # STDERR did not work on my development machine (CH)
-$logger.level = Logger::DEBUG
-#Mongo::Logger.logger = $logger
-Mongo::Logger.level = Logger::WARN
-$mongo = Mongo::Client.new('mongodb://127.0.0.1:27017/opentox')
-$gridfs = $mongo.database.fs
-#Mongoid.logger = $logger
diff --git a/lib/otlogger.rb b/lib/otlogger.rb
deleted file mode 100644
index 0f0caa4..0000000
--- a/lib/otlogger.rb
+++ /dev/null
@@ -1,47 +0,0 @@
-
-# extend logger to add current source file, line-number and source location where the log command is called
-class OTLogger < Logger
-
- def pwd
- path = Dir.pwd.to_s
- index = path.rindex(/\//)
- return path if index==nil
- path[(index+1)..-1]
- end
-
- def trace()
- lines = caller(0)
- n = 2
- line = lines[n]
-
- while (line =~ /error.rb/ or line =~ /create/ or line =~ /#{File.basename(__FILE__)}/)
- n += 1
- line = lines[n]
- end
-
- index = line.rindex(/\/.*\.rb/)
- return line if index==nil
- line[index..-1]
- end
-
- def format(msg)
- pwd.ljust(18)+" :: "+msg.to_s+" :: "+trace
- end
-
- def debug(msg)
- super format(msg)
- end
-
- def info(msg)
- super format(msg)
- end
-
- def warn(msg)
- super format(msg)
- end
-
- def error(msg)
- super format(msg)
- end
-
-end
diff --git a/lib/policy.rb b/lib/policy.rb
deleted file mode 100644
index e5676ba..0000000
--- a/lib/policy.rb
+++ /dev/null
@@ -1,354 +0,0 @@
-module OpenTox
- require "rexml/document"
-
- #Module for policy-processing
- # @see also http://www.opentox.org/dev/apis/api-1.2/AA for opentox API specs
- # Class Policies corresponds to <policies> container of an xml-policy-file
- class Policies
-
- #Hash for policy objects see {Policy Policy}
- attr_accessor :policies, :name
-
- def initialize()
- @policies = {}
- end
-
- #create new policy instance with name
- # @param [String]name of the policy
- def new_policy(name)
- @policies[name] = Policy.new(name)
- end
-
- #drop a specific policy in a policies instance
- # @param [String]name of the policy
- # @return [Boolean]
- def drop_policy(name)
- return true if @policies.delete(name)
- end
-
- #drop all policies in a policies instance
- def drop_policies
- @policies.each do |name, policy|
- drop_policy(name)
- end
- return true
- end
-
- # @return [Array] set of arrays affected by policies
- def uris
- @policies.collect{ |k,v| v.uri }.flatten.uniq
- end
-
- #list all policy names in a policies instance
- # @return [Array]
- def names
- out = []
- @policies.each do |name, policy|
- out << name
- end
- return out
- end
-
- # Loads a default policy template in a policies instance
- # @param [String]user username in LDAP string of user policy: 'uid=<user>,ou=people,dc=opentox,dc=org'
- # @param [String]uri URI
- # @param [String]group groupname in LDAP string of group policy: 'cn=<group>,ou=groups,dc=opentox,dc=org'
- def load_default_policy(user, uri, group="member")
- template = case user
- when "guest", "anonymous" then "default_guest_policy"
- else "default_policy"
- end
- xml = get_xml_template(template)
- self.load_xml(xml)
- datestring = Time.now.strftime("%Y-%m-%d-%H-%M-%S-x") + rand(1000).to_s
-
- @policies["policy_user"].name = "policy_user_#{user}_#{datestring}"
- @policies["policy_user"].rule.uri = uri
- @policies["policy_user"].rule.name = "rule_user_#{user}_#{datestring}"
- @policies["policy_user"].subject.name = "subject_user_#{user}_#{datestring}"
- @policies["policy_user"].subject.value = "uid=#{user},ou=people,dc=opentox,dc=org"
- @policies["policy_user"].subject_group = "subjects_user_#{user}_#{datestring}"
-
- @policies["policy_group"].name = "policy_group_#{group}_#{datestring}"
- @policies["policy_group"].rule.uri = uri
- @policies["policy_group"].rule.name = "rule_group_#{group}_#{datestring}"
- @policies["policy_group"].subject.name = "subject_group_#{group}_#{datestring}"
- @policies["policy_group"].subject.value = "cn=#{group},ou=groups,dc=opentox,dc=org"
- @policies["policy_group"].subject_group = "subjects_#{group}_#{datestring}"
- return true
- end
-
- def get_xml_template(template)
- File.read(File.join(File.dirname(__FILE__), "templates/#{template}.xml"))
- end
-
- #loads a xml template
- def load_xml(xml)
- rexml = REXML::Document.new(xml)
- rexml.elements.each("Policies/Policy") do |pol| #Policies
- policy_name = pol.attributes["name"]
- new_policy(policy_name)
- #@policies[policy_name] = Policy.new(policy_name)
- rexml.elements.each("Policies/Policy[@name='#{policy_name}']/Rule") do |r| #Rules
- rule_name = r.attributes["name"]
- uri = rexml.elements["Policies/Policy[@name='#{policy_name}']/Rule[@name='#{rule_name}']/ResourceName"].attributes["name"]
- @policies[policy_name].rule.name = rule_name
- @policies[policy_name].uri = uri
- rexml.elements.each("Policies/Policy[@name='#{policy_name}']/Rule[@name='#{rule_name}']/AttributeValuePair") do |attribute_pairs|
- action=nil; value=nil;
- attribute_pairs.each_element do |elem|
- action = elem.attributes["name"] if elem.attributes["name"]
- value = elem.text if elem.text
- end
- if action and value
- case action
- when "GET"
- @policies[policy_name].rule.get = value
- when "POST"
- @policies[policy_name].rule.post = value
- when "PUT"
- @policies[policy_name].rule.put = value
- when "DELETE"
- @policies[policy_name].rule.delete = value
- end
- end
- end
- end
- rexml.elements.each("Policies/Policy[@name='#{policy_name}']/Subjects") do |subjects| #Subjects
- @policies[policy_name].subject_group = subjects.attributes["name"]
- rexml.elements.each("Policies/Policy[@name='#{policy_name}']/Subjects[@name='#{@policies[policy_name].subject_group}']/Subject") do |s| #Subject
- subject_name = s.attributes["name"]
- subject_type = s.attributes["type"]
- subject_value = rexml.elements["Policies/Policy[@name='#{policy_name}']/Subjects[@name='#{@policies[policy_name].subject_group}']/Subject[@name='#{subject_name}']/AttributeValuePair/Value"].text
- if subject_name and subject_type and subject_value
- @policies[policy_name].subject.name = subject_name
- @policies[policy_name].type = subject_type
- @policies[policy_name].value = subject_value
- end
- end
- end
- end
- end
-
- #generates xml from policies instance
- def to_xml
- doc = REXML::Document.new()
- doc << REXML::DocType.new("Policies", "PUBLIC \"-//Sun Java System Access Manager7.1 2006Q3\n Admin CLI DTD//EN\" \"jar://com/sun/identity/policy/policyAdmin.dtd\"")
- doc.add_element(REXML::Element.new("Policies"))
-
- @policies.each do |name, pol|
- policy = REXML::Element.new("Policy")
- policy.attributes["name"] = pol.name
- policy.attributes["referralPolicy"] = false
- policy.attributes["active"] = true
- rule = @policies[name].rule
- out_rule = REXML::Element.new("Rule")
- out_rule.attributes["name"] = rule.name
- servicename = REXML::Element.new("ServiceName")
- servicename.attributes["name"]="iPlanetAMWebAgentService"
- out_rule.add_element(servicename)
- rescourcename = REXML::Element.new("ResourceName")
- rescourcename.attributes["name"] = rule.uri
- out_rule.add_element(rescourcename)
-
- ["get","post","delete","put"].each do |act|
- if rule.method(act).call
- attribute = REXML::Element.new("Attribute")
- attribute.attributes["name"] = act.upcase
- attributevaluepair = REXML::Element.new("AttributeValuePair")
- attributevaluepair.add_element(attribute)
- attributevalue = REXML::Element.new("Value")
- attributevaluepair.add_element(attributevalue)
- attributevalue.add_text REXML::Text.new(rule.method(act).call)
- out_rule.add_element(attributevaluepair)
- end
- end
- policy.add_element(out_rule)
-
- subjects = REXML::Element.new("Subjects")
- subjects.attributes["name"] = pol.subject_group
- subjects.attributes["description"] = ""
- subj = @policies[name].subject.name
- subject = REXML::Element.new("Subject")
- subject.attributes["name"] = pol.subject.name
- subject.attributes["type"] = pol.subject.type
- subject.attributes["includeType"] = "inclusive"
- attributevaluepair = REXML::Element.new("AttributeValuePair")
- attribute = REXML::Element.new("Attribute")
- attribute.attributes["name"] = "Values"
- attributevaluepair.add_element(attribute)
- attributevalue = REXML::Element.new("Value")
- attributevalue.add_text REXML::Text.new(pol.subject.value)
- attributevaluepair.add_element(attributevalue)
- subject.add_element(attributevaluepair)
- subjects.add_element(subject)
- policy.add_element(subjects)
- doc.root.add_element(policy)
- end
- out = ""
- doc.write(out, 2)
- return out
- end
-
- end
-
- #single policy in a {Policies Policies} instance
- class Policy
-
- attr_accessor :name, :rule, :subject_group, :subject, :value, :type, :uri, :group, :user
-
- def initialize(name)
- @name = name
- @rule = Rule.new("#{name}_rule", nil)
- @subject_group = "#{name}_subjects"
- @subject = Subject.new("#{name}_subject", nil, nil)
- end
-
- # Subject type LDAPUsers or LDAPGroups
- # @return [String]
- def type
- @subject.type
- end
-
- # Set subject type <LDAPUsers, LDAPGroups>
- # @param type [String] the subjecttype
- def type=(type)
- @subject.type = type
- end
-
- # returns LDAP Distinguished Name (DN) e.g. uid=username,ou=people,dc=opentox,dc=org or cn=membergroup,ou=groups,dc=opentox,dc=org
- def value
- @subject.value
- end
-
- # sets LDAP Distinguished Name (DN) for policy e.g.
- # @param value [String] LDAPString
- def value=(value)
- @subject.value = value
- end
-
- # uri affected by policy
- # @return [String] uri affected by policy
- def uri
- @rule.uri
- end
-
- # sets uri affected by policy
- # @param uri [String] set URI
- def uri=(uri)
- @rule.uri = uri
- end
-
- # Get the groupname from within the LDAP Distinguished Name (DN)
- def group
- return false if !value && type != "LDAPGroups"
- value.split(",").each{|part| return part.gsub("cn=","") if part.match("cn=")}
- end
-
- # Get the username from within the LDAP Distinguished Name (DN)
- def user
- return false if !value && type != "LDAPUsers"
- value.split(",").each{|part| return part.gsub("uid=","") if part.match("uid=")}
- end
-
- # helper method sets value and type to opentox LDAP Distinguished Name (DN) of a user
- # @param username [String] set a username into LDAP DN
- def set_ot_user(username)
- self.value = "uid=#{username},ou=people,dc=opentox,dc=org"
- self.type = "LDAPUsers"
- true
- end
-
- # @param groupname [String] Username set a groupname into LDAP DN
- def set_ot_group(groupname)
- self.value = "cn=#{groupname},ou=groups,dc=opentox,dc=org"
- self.type = "LDAPGroups"
- true
- end
-
- # policyrule
- # sets the permission for REST actions (GET, POST, PUT, DELETE) of a specific URI to allow/deny/nil
- class Rule
-
- attr_accessor :name, :uri, :get, :post, :put, :delete, :read, :readwrite
-
- def initialize(name, uri)
- @name = name
- @uri = uri
- end
-
- #Set Rule attribute for request-method GET
- # @param value [String] (allow,deny,nil)
- def get=(value)
- @get = check_value(value, @get)
- end
-
- #Set Rule attribute for request-method POST
- # @param [String]value (allow,deny,nil)
- def post=(value)
- @post = check_value(value, @post)
- end
-
- #Set Rule attribute for request-method DELETE
- # @param [String]value (allow,deny,nil)
- def delete=(value)
- @delete = check_value(value, @delete)
- end
-
- #Set Rule attribute for request-method PUT
- # @param [String]value (allow,deny,nil)
- def put=(value)
- @put = check_value(value, @put)
- end
-
- # read getter method
- def read
- return true if @get == "allow" && (@put == "deny" || !@put) && (@post == "deny" || !@post)
- end
-
- # readwrite getter method
- def readwrite
- return true if @get == "allow" && @put == "allow" && @post == "allow"
- end
-
- # Set(true case) or remove read(GET=allow) permissions.
- # @param [Boolean]value (true,false)
- def read=(value)
- if value
- @get = "allow"; @put = nil; @post = nil
- else
- @get = nil; @put = nil; @post = nil
- end
- end
-
- # Set(true case) or remove readwrite(GET=allow,POST=allow,PUT=allow) permissions.
- # @param [Boolean]value (true,false)
- def readwrite=(value)
- if value
- @get = "allow"; @put = "allow"; @post = "allow"
- else
- @get = nil; @put = nil; @post = nil
- end
- end
-
- private
- #checks if value is allow, deny or nil. returns old value if not valid.
- def check_value(new_value, old_value)
- return (new_value=="allow" || new_value=="deny" || new_value==nil) ? new_value : old_value
- end
- end
-
- # Subject of a policy
- # name(subjectname), type('LDAPUsers' or 'LDAPGroups'), value(LDAP DN e.G.:'uid=guest,ou=people,dc=opentox,dc=org')
- class Subject
-
- attr_accessor :name, :type, :value
-
- def initialize(name, type, value)
- @name = name
- @type = type
- @value = value
- end
- end
- end
-end
diff --git a/lib/task.rb b/lib/task.rb
deleted file mode 100644
index cd2dd92..0000000
--- a/lib/task.rb
+++ /dev/null
@@ -1,142 +0,0 @@
-# TODO: task seems to run twice, see fminser tests
-# TODO: do we need tasks for internal use
-DEFAULT_TASK_MAX_DURATION = 36000
-module OpenTox
- # TODO: fix error reports
- # TODO: fix field names and overwrite accessors
-
- # Class for handling asynchronous tasks
- class Task
- include Mongoid::Document
- include Mongoid::Timestamps
-
- field :creator, type: String
- field :percentageCompleted, type: Float
- field :error_code, type: Integer # workaround name, cannot overwrite accessors in current mongoid version
- field :finished, type: Time # workaround name, cannot overwrite accessors in current mongoid version
- # TODO
- field :result_type, type: String
- field :result_id, type: BSON::ObjectId
- field :report, type: String
- field :pid, type: Integer
- field :observer_pid, type: Integer
-
- def self.run(description, creator=nil)
-
- task = Task.new
- task[:description] = description.to_s
- task[:creator] = creator.to_s
- task[:percentageCompleted] = 0
- task[:error_code] = 202
- task.save
-
- pid = fork do
- begin
- task.completed yield
- rescue => e
- # wrap non-opentox-errors first
- e = OpenTox::Error.new(500,e.message,nil,e.backtrace) unless e.is_a?(OpenTox::Error)
- $logger.error "error in task #{task.id} created by #{creator}" # creator is not logged because error is logged when thrown
- task.update(:report => e.metadata, :error_code => e.http_code, :finished => Time.now)
- task.kill
- end
- end
- Process.detach(pid)
- task[:pid] = pid
-
- # watch if task has been cancelled
- observer_pid = fork do
- task.wait
- begin
- Process.kill(9,task[:pid]) if task.cancelled?
- rescue
- $logger.warn "Could not kill process of task #{task.id}, pid: #{task[:pid]}"
- end
- end
- Process.detach(observer_pid)
- task[:observer_pid] = observer_pid
- task
-
- end
-
- def kill
- Process.kill(9,task[:pid])
- Process.kill(9,task[:observer_pid])
- rescue # no need to raise an exception if processes are not running
- end
-
- def cancel
- kill
- update_attributes(:error_code => 503, :finished => Time.now)
- end
-
- def completed(result)
- update_attributes(:error_code => 200, :finished => Time.now, :percentageCompleted => 100, :result_type => result.type, :result_id => result.id)
- end
-
- # waits for a task, unless time exceeds or state is no longer running
- def wait
- start_time = Time.new
- due_to_time = start_time + DEFAULT_TASK_MAX_DURATION
- dur = 0.2
- while running?
- sleep dur
- dur = [[(Time.new - start_time)/20.0,0.3].max,300.0].min
- request_timeout_error "max wait time exceeded ("+DEFAULT_TASK_MAX_DURATION.to_s+"sec), task: '"+id.to_s+"'" if (Time.new > due_to_time)
- end
- end
-
- end
-
- def error_report
- OpenTox::Task.find(id).report
- end
-
- def code
- OpenTox::Task.find(id).error_code
- end
-
- def result
- c = OpenTox::Task.find(id).result_type.downcase.to_sym
- rid = OpenTox::Task.find(id).result_id
- p c, rid
- p $mongo[collection].all
- $mongo[collection].find(rid).first
- end
-
- def finished_at
- OpenTox::Task.find(id).finished
- end
-
- def running?
- code == 202
- end
-
- def cancelled?
- code == 503
- end
-
- def completed?
- code == 200
- end
-
- def error?
- code >= 400 and code != 503
- end
-
- # Check status of a task
- # @return [String] Status
- def status
- case code
- when 202
- "Running"
- when 200
- "Completed"
- when 503
- "Cancelled"
- else
- "Error"
- end
- end
-
-end
diff --git a/lib/templates/default_guest_policy.xml b/lib/templates/default_guest_policy.xml
deleted file mode 100644
index a778070..0000000
--- a/lib/templates/default_guest_policy.xml
+++ /dev/null
@@ -1,53 +0,0 @@
-<!DOCTYPE Policies PUBLIC "-//Sun Java System Access Manager7.1 2006Q3
- Admin CLI DTD//EN" "jar://com/sun/identity/policy/policyAdmin.dtd">
-
-<Policies>
-<Policy name="policy_user" referralPolicy="false" active="true">
- <Rule name="rule_user">
- <ServiceName name="iPlanetAMWebAgentService" />
- <ResourceName name="uri"/>
- <AttributeValuePair>
- <Attribute name="GET" />
- <Value>allow</Value>
- </AttributeValuePair>
- <AttributeValuePair>
- <Attribute name="POST" />
- <Value>allow</Value>
- </AttributeValuePair>
- <AttributeValuePair>
- <Attribute name="PUT" />
- <Value>allow</Value>
- </AttributeValuePair>
- <AttributeValuePair>
- <Attribute name="DELETE" />
- <Value>allow</Value>
- </AttributeValuePair>
- </Rule>
- <Subjects name="subjects_user" description="">
- <Subject name="subject_user" type="LDAPUsers" includeType="inclusive">
- <AttributeValuePair>
- <Attribute name="Values"/>
- <Value>uid=guest,ou=people,dc=opentox,dc=org</Value>
- </AttributeValuePair>
- </Subject>
- </Subjects>
-</Policy>
-<Policy name="policy_group" referralPolicy="false" active="true">
- <Rule name="rule_group">
- <ServiceName name="iPlanetAMWebAgentService" />
- <ResourceName name="uri"/>
- <AttributeValuePair>
- <Attribute name="GET" />
- <Value>allow</Value>
- </AttributeValuePair>
- </Rule>
- <Subjects name="subjects_group" description="">
- <Subject name="subject_group" type="LDAPGroups" includeType="inclusive">
- <AttributeValuePair>
- <Attribute name="Values"/>
- <Value>cn=member,ou=groups,dc=opentox,dc=org</Value>
- </AttributeValuePair>
- </Subject>
- </Subjects>
-</Policy>
-</Policies>
diff --git a/lib/templates/default_policy.xml b/lib/templates/default_policy.xml
deleted file mode 100644
index a778070..0000000
--- a/lib/templates/default_policy.xml
+++ /dev/null
@@ -1,53 +0,0 @@
-<!DOCTYPE Policies PUBLIC "-//Sun Java System Access Manager7.1 2006Q3
- Admin CLI DTD//EN" "jar://com/sun/identity/policy/policyAdmin.dtd">
-
-<Policies>
-<Policy name="policy_user" referralPolicy="false" active="true">
- <Rule name="rule_user">
- <ServiceName name="iPlanetAMWebAgentService" />
- <ResourceName name="uri"/>
- <AttributeValuePair>
- <Attribute name="GET" />
- <Value>allow</Value>
- </AttributeValuePair>
- <AttributeValuePair>
- <Attribute name="POST" />
- <Value>allow</Value>
- </AttributeValuePair>
- <AttributeValuePair>
- <Attribute name="PUT" />
- <Value>allow</Value>
- </AttributeValuePair>
- <AttributeValuePair>
- <Attribute name="DELETE" />
- <Value>allow</Value>
- </AttributeValuePair>
- </Rule>
- <Subjects name="subjects_user" description="">
- <Subject name="subject_user" type="LDAPUsers" includeType="inclusive">
- <AttributeValuePair>
- <Attribute name="Values"/>
- <Value>uid=guest,ou=people,dc=opentox,dc=org</Value>
- </AttributeValuePair>
- </Subject>
- </Subjects>
-</Policy>
-<Policy name="policy_group" referralPolicy="false" active="true">
- <Rule name="rule_group">
- <ServiceName name="iPlanetAMWebAgentService" />
- <ResourceName name="uri"/>
- <AttributeValuePair>
- <Attribute name="GET" />
- <Value>allow</Value>
- </AttributeValuePair>
- </Rule>
- <Subjects name="subjects_group" description="">
- <Subject name="subject_group" type="LDAPGroups" includeType="inclusive">
- <AttributeValuePair>
- <Attribute name="Values"/>
- <Value>cn=member,ou=groups,dc=opentox,dc=org</Value>
- </AttributeValuePair>
- </Subject>
- </Subjects>
-</Policy>
-</Policies>
diff --git a/lib/unique_descriptors.rb b/lib/unique_descriptors.rb
new file mode 100644
index 0000000..676f34a
--- /dev/null
+++ b/lib/unique_descriptors.rb
@@ -0,0 +1,120 @@
+# set of non redundant descriptors, faster algorithms are preferred
+# TODO:
+# select logP algorithm
+# select l5 algorithm
+# use smarts matcher for atom counts
+# check correlations
+UNIQUEDESCRIPTORS = [
+ "Openbabel.abonds", #Number of aromatic bonds
+ "Openbabel.atoms", #Number of atoms
+ "Openbabel.bonds", #Number of bonds
+ "Openbabel.dbonds", #Number of double bonds
+ "Openbabel.HBA1", #Number of Hydrogen Bond Acceptors 1 (JoelLib)
+ "Openbabel.HBA2", #Number of Hydrogen Bond Acceptors 2 (JoelLib)
+ "Openbabel.HBD", #Number of Hydrogen Bond Donors (JoelLib)
+ "Openbabel.L5", #Lipinski Rule of Five
+ "Openbabel.logP", #octanol/water partition coefficient
+ "Openbabel.MP", #Melting point
+ "Openbabel.MR", #molar refractivity
+ "Openbabel.MW", #Molecular Weight filter
+ "Openbabel.nF", #Number of Fluorine Atoms
+ "Openbabel.sbonds", #Number of single bonds
+ "Openbabel.tbonds", #Number of triple bonds
+ "Openbabel.TPSA", #topological polar surface area
+ "Cdk.ALOGP", #Calculates atom additive logP and molar refractivity values as described by Ghose and Crippen and
+ "Cdk.APol", #Descriptor that calculates the sum of the atomic polarizabilities (including implicit hydrogens).
+ "Cdk.AcidicGroupCount", #Returns the number of acidic groups.
+ "Cdk.AminoAcidCount", #Returns the number of amino acids found in the system
+ #"Cdk.AromaticAtomsCount", #Descriptor based on the number of aromatic atoms of a molecule.
+ #"Cdk.AromaticBondsCount", #Descriptor based on the number of aromatic bonds of a molecule.
+ #"Cdk.AtomCount", #Descriptor based on the number of atoms of a certain element type.
+ "Cdk.AutocorrelationCharge", #The Moreau-Broto autocorrelation descriptors using partial charges
+ "Cdk.AutocorrelationMass", #The Moreau-Broto autocorrelation descriptors using atomic weight
+ "Cdk.AutocorrelationPolarizability", #The Moreau-Broto autocorrelation descriptors using polarizability
+ "Cdk.BCUT", #Eigenvalue based descriptor noted for its utility in chemical diversity described by Pearlman et al. .
+ "Cdk.BPol", #Descriptor that calculates the sum of the absolute value of the difference between atomic polarizabilities of all bonded atoms in the molecule (including implicit hydrogens).
+ "Cdk.BasicGroupCount", #Returns the number of basic groups.
+ #"Cdk.BondCount", #Descriptor based on the number of bonds of a certain bond order.
+ "Cdk.CPSA", #A variety of descriptors combining surface area and partial charge information
+ "Cdk.CarbonTypes", #Characterizes the carbon connectivity in terms of hybridization
+ "Cdk.ChiChain", #Evaluates the Kier & Hall Chi chain indices of orders 3,4,5 and 6
+ "Cdk.ChiCluster", #Evaluates the Kier & Hall Chi cluster indices of orders 3,4,5,6 and 7
+ "Cdk.ChiPathCluster", #Evaluates the Kier & Hall Chi path cluster indices of orders 4,5 and 6
+ "Cdk.ChiPath", #Evaluates the Kier & Hall Chi path indices of orders 0,1,2,3,4,5,6 and 7
+ "Cdk.EccentricConnectivityIndex", #A topological descriptor combining distance and adjacency information.
+ "Cdk.FMF", #Descriptor characterizing molecular complexity in terms of its Murcko framework
+ "Cdk.FragmentComplexity", #Class that returns the complexity of a system. The complexity is defined as @cdk.cite{Nilakantan06}
+ "Cdk.GravitationalIndex", #Descriptor characterizing the mass distribution of the molecule.
+ #"Cdk.HBondAcceptorCount", #Descriptor that calculates the number of hydrogen bond acceptors.
+ #"Cdk.HBondDonorCount", #Descriptor that calculates the number of hydrogen bond donors.
+ "Cdk.HybridizationRatio", #Characterizes molecular complexity in terms of carbon hybridization states.
+ "Cdk.IPMolecularLearning", #Descriptor that evaluates the ionization potential.
+ "Cdk.KappaShapeIndices", #Descriptor that calculates Kier and Hall kappa molecular shape indices.
+ "Cdk.KierHallSmarts", #Counts the number of occurrences of the E-state fragments
+ "Cdk.LargestChain", #Returns the number of atoms in the largest chain
+ "Cdk.LargestPiSystem", #Returns the number of atoms in the largest pi chain
+ "Cdk.LengthOverBreadth", #Calculates the ratio of length to breadth.
+ "Cdk.LongestAliphaticChain", #Returns the number of atoms in the longest aliphatic chain
+ "Cdk.MDE", #Evaluate molecular distance edge descriptors for C, N and O
+ "Cdk.MannholdLogP", #Descriptor that calculates the LogP based on a simple equation using the number of carbons and hetero atoms .
+ "Cdk.MomentOfInertia", #Descriptor that calculates the principal moments of inertia and ratios of the principal moments. Als calculates the radius of gyration.
+ "Cdk.PetitjeanNumber", #Descriptor that calculates the Petitjean Number of a molecule.
+ "Cdk.PetitjeanShapeIndex", #The topological and geometric shape indices described Petitjean and Bath et al. respectively. Both measure the anisotropy in a molecule.
+ "Cdk.RotatableBondsCount", #Descriptor that calculates the number of nonrotatable bonds on a molecule.
+ #"Cdk.RuleOfFive", #This Class contains a method that returns the number failures of the Lipinski's Rule Of Five.
+ #"Cdk.TPSA", #Calculation of topological polar surface area based on fragment contributions .
+ "Cdk.VABC", #Describes the volume of a molecule.
+ "Cdk.VAdjMa", #Descriptor that calculates the vertex adjacency information of a molecule.
+ "Cdk.WHIM", #Holistic descriptors described by Todeschini et al .
+ #"Cdk.Weight", #Descriptor based on the weight of atoms of a certain element type. If no element is specified, the returned value is the Molecular Weight
+ "Cdk.WeightedPath", #The weighted path (molecular ID) descriptors described by Randic. They characterize molecular branching.
+ "Cdk.WienerNumbers", #This class calculates Wiener path number and Wiener polarity number.
+ "Cdk.XLogP", #Prediction of logP based on the atom-type method called XLogP.
+ "Cdk.ZagrebIndex", #The sum of the squared atom degrees of all heavy atoms.
+ "Joelib.count.NumberOfS", #no description available
+ "Joelib.count.NumberOfP", #no description available
+ "Joelib.count.NumberOfO", #no description available
+ "Joelib.count.NumberOfN", #no description available
+ #"Joelib.count.AromaticBonds", #no description available
+ "Joelib.count.NumberOfI", #no description available
+ "Joelib.count.NumberOfF", #no description available
+ "Joelib.count.NumberOfC", #no description available
+ "Joelib.count.NumberOfB", #no description available
+ "Joelib.count.HydrophobicGroups", #no description available
+ #"Joelib.KierShape3", #no description available
+ #"Joelib.KierShape2", #no description available
+ #"Joelib.KierShape1", #no description available
+ #"Joelib.count.AcidicGroups", #no description available
+ "Joelib.count.AliphaticOHGroups", #no description available
+ #"Joelib.count.NumberOfAtoms", #no description available
+ "Joelib.TopologicalRadius", #no description available
+ "Joelib.GeometricalShapeCoefficient", #no description available
+ #"Joelib.MolecularWeight", #no description available
+ "Joelib.FractionRotatableBonds", #no description available
+ #"Joelib.count.HBD2", #no description available
+ #"Joelib.count.HBD1", #no description available
+ "Joelib.LogP", #no description available
+ "Joelib.GraphShapeCoefficient", #no description available
+ "Joelib.count.BasicGroups", #no description available
+ #"Joelib.count.RotatableBonds", #no description available
+ "Joelib.count.HeavyBonds", #no description available
+ "Joelib.PolarSurfaceArea", #no description available
+ #"Joelib.ZagrebIndex1", #no description available
+ "Joelib.GeometricalRadius", #no description available
+ "Joelib.count.SO2Groups", #no description available
+ "Joelib.count.AromaticOHGroups", #no description available
+ "Joelib.GeometricalDiameter", #no description available
+ #"Joelib.MolarRefractivity", #no description available
+ "Joelib.count.NumberOfCl", #no description available
+ "Joelib.count.OSOGroups", #no description available
+ "Joelib.count.NumberOfBr", #no description available
+ "Joelib.count.NO2Groups", #no description available
+ "Joelib.count.HeteroCycles", #no description available
+ #"Joelib.count.HBA2", #no description available
+ #"Joelib.count.HBA1", #no description available
+ #"Joelib.count.NumberOfBonds", #no description available
+ "Joelib.count.SOGroups", #no description available
+ "Joelib.TopologicalDiameter", #no description available
+ "Joelib.count.NumberOfHal", #no description available
+
+].sort
diff --git a/lib/validation.rb b/lib/validation.rb
deleted file mode 100644
index deba1e3..0000000
--- a/lib/validation.rb
+++ /dev/null
@@ -1,348 +0,0 @@
-require "yaml"
-
-module OldOpenTox
- attr_accessor :metadata, :uri
-
- def initialize(uri=nil)
- @metadata = {}
- self.uri = uri if uri
- end
-
- # loads metadata via yaml
- def load_metadata
- yaml = OpenTox::RestClientWrapper.get(uri,nil,{:accept => "application/x-yaml"})
- @metadata = YAML.load(yaml)
- end
-
- def delete
- OpenTox::RestClientWrapper.delete @uri.to_s
- end
-end
-
-module OpenTox
-
- class Validation
- include OldOpenTox
-
- # find validation, raises error if not found
- # @param [String] uri
- # @return [OpenTox::Validation]
- def self.find( uri )
- val = Validation.new(uri)
- val.load_metadata
- val
- end
-
- # returns a filtered list of validation uris
- # @param params [Hash,optional] validation-params to filter the uris (could be model, training_dataset, ..)
- # @return [Array]
- def self.list( params={} )
- filter_string = ""
- params.each do |k,v|
- filter_string += (filter_string.length==0 ? "?" : "&")
- v = v.to_s.gsub(/;/, "%3b") if v.to_s =~ /;/
- filter_string += k.to_s+"="+v.to_s
- end
- (OpenTox::RestClientWrapper.get($validation[:uri]+filter_string).split("\n"))
- end
-
- # creates a training test split validation, waits until it finishes, may take some time
- # @param [Hash] params (required:algorithm_uri,dataset_uri,prediction_feature, optional:algorithm_params,split_ratio(0.67),random_seed(1))
- # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
- # @return [OpenTox::Validation]
- def self.create_training_test_split( params, waiting_task=nil )
- uri = OpenTox::RestClientWrapper.post( File.join($validation[:uri],"training_test_split"),
- params,{:content_type => "text/uri-list"},waiting_task )
- Validation.new(wait_for_task(uri))
- end
-
- # creates a training test validation, waits until it finishes, may take some time
- # @param [Hash] params (required:algorithm_uri,training_dataset_uri,prediction_feature,test_dataset_uri,optional:algorithm_params)
- # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
- # @return [OpenTox::Validation]
- def self.create_training_test_validation( params, waiting_task=nil )
- uri = OpenTox::RestClientWrapper.post( File.join($validation[:uri],"training_test_validation"),
- params,{:content_type => "text/uri-list"},waiting_task )
- Validation.new(wait_for_task(uri))
- end
-
- # creates a bootstrapping validation, waits until it finishes, may take some time
- # @param [Hash] params (required:algorithm_uri,dataset_uri,prediction_feature, optional:algorithm_params,random_seed(1))
- # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
- # @return [OpenTox::Validation]
- def self.create_bootstrapping_validation( params, waiting_task=nil )
- uri = OpenTox::RestClientWrapper.post( File.join($validation[:uri],"bootstrapping"),
- params,{:content_type => "text/uri-list"},waiting_task )
- Validation.new(wait_for_task(uri))
- end
-
- # looks for report for this validation, creates a report if no report is found
- # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
- # @return [String] report uri
- def find_or_create_report( waiting_task=nil )
- @report = ValidationReport.find_for_validation(@uri) unless @report
- @report = ValidationReport.create(@uri, waiting_task) unless @report
- @report.uri
- end
-
- # creates a validation object from crossvaldiation statistics, raise error if not found
- # (as crossvaldiation statistics are returned as an average valdidation over all folds)
- # @param crossvalidation_uri [String] crossvalidation uri
- # @return [OpenTox::Validation]
- def self.from_cv_statistics( crossvalidation_uri )
- find( File.join(crossvalidation_uri, 'statistics') )
- end
-
- # returns confusion matrix as array, predicted values are in rows
- # @example
- # [[nil,"active","moderate","inactive"],["active",1,3,99],["moderate",4,2,8],["inactive",3,8,6]]
- # -> 99 inactive compounds have been predicted as active
- def confusion_matrix
- raise "no classification statistics, probably a regression valdiation" unless @metadata[RDF::OT.classificationStatistics]
- matrix = @metadata[RDF::OT.classificationStatistics][RDF::OT.confusionMatrix][RDF::OT.confusionMatrixCell]
- values = matrix.collect{|cell| cell[RDF::OT.confusionMatrixPredicted]}.uniq
- table = [[nil]+values]
- values.each do |c|
- table << [c]
- values.each do |r|
- matrix.each do |cell|
- if cell[RDF::OT.confusionMatrixPredicted]==c and cell[RDF::OT.confusionMatrixActual]==r
- table[-1] << cell[RDF::OT.confusionMatrixValue].to_f
- break
- end
- end
- end
- end
- table
- end
-
- # filters the validation-predictions and returns validation-metadata with filtered statistics
- # @param min_confidence [Float] predictions with confidence < min_confidence are filtered out
- # @param min_num_predictions [Integer] optional, additional param to min_confidence, the top min_num_predictions are selected, even if confidence to low
- # @param max_num_predictions [Integer] returns the top max_num_predictions (with the highest confidence), not compatible to min_confidence
- # return [Hash] metadata
- def filter_metadata( min_confidence, min_num_predictions=nil, max_num_predictions=nil )
- conf = min_confidence ? "min_confidence=#{min_confidence}" : nil
- min = min_num_predictions ? "min_num_predictions=#{min_num_predictions}" : nil
- max = max_num_predictions ? "max_num_predictions=#{max_num_predictions}" : nil
- YAML.load(OpenTox::RestClientWrapper.get("#{@uri}?#{[conf,min,max].compact.join("&")}",nil,{:accept => "application/x-yaml"}))
- end
-
- # returns probability-distribution for a given prediction
- # it takes all predictions into account that have a confidence value that is >= confidence and that have the same predicted value
- # (minimum 12 predictions with the hightest confidence are selected (even if the confidence is lower than the given param)
- #
- # @param confidence [Float] confidence value (between 0 and 1)
- # @param prediction [String] predicted value
- # @return [Hash] see example
- # @example
- # Example 1:
- # validation.probabilities(0.3,"active")
- # -> { :min_confidence=>0.32, :num_predictions=>20, :probs=>{"active"=>0.7, "moderate"=>0.25 "inactive"=>0.05 } }
- # there have been 20 "active" predictions with confidence >= 0.3, 70 percent of them beeing correct
- #
- # Example 2:
- # validation.probabilities(0.8,"active")
- # -> { :min_confidence=>0.45, :num_predictions=>12, :probs=>{"active"=>0.9, "moderate"=>0.1 "inactive"=>0 } }
- # the given confidence value was to high (i.e. <12 predictions with confidence value >= 0.8)
- # the top 12 "active" predictions have a min_confidence of 0.45, 90 percent of them beeing correct
- #
- def probabilities( confidence, prediction )
- YAML.load(OpenTox::RestClientWrapper.get(@uri+"/probabilities?prediction="+prediction.to_s+"&confidence="+confidence.to_s,nil,
- {:accept => "application/x-yaml"}))
- end
- end
-
- class Crossvalidation
- include OldOpenTox
-
- attr_reader :report
-
- # find crossvalidation, raises error if not found
- # @param [String] uri
- # @return [OpenTox::Crossvalidation]
- def self.find( uri )
- cv = Crossvalidation.new(uri)
- cv.load_metadata
- cv
- end
-
- # returns a filtered list of crossvalidation uris
- # @param params [Hash,optional] crossvalidation-params to filter the uris (could be algorithm, dataset, ..)
- # @return [Array]
- def self.list( params={} )
- filter_string = ""
- params.each do |k,v|
- filter_string += (filter_string.length==0 ? "?" : "&")
- v = v.to_s.gsub(/;/, "%3b") if v.to_s =~ /;/
- filter_string += k.to_s+"="+v.to_s
- end
- (OpenTox::RestClientWrapper.get(File.join($validation[:uri],"crossvalidation")+filter_string).split("\n"))
- end
-
- # creates a crossvalidations, waits until it finishes, may take some time
- # @param [Hash] params (required:algorithm_uri,dataset_uri,prediction_feature, optional:algorithm_params,num_folds(10),random_seed(1),stratified(false))
- # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
- # @return [OpenTox::Crossvalidation]
- def self.create( params, waiting_task=nil )
- uri = OpenTox::RestClientWrapper.post( File.join($validation[:uri],"crossvalidation"),
- params,{:content_type => "text/uri-list"},waiting_task )
- uri = wait_for_task(uri)
- Crossvalidation.new(uri)
- end
-
- # looks for report for this crossvalidation, creates a report if no report is found
- # @param [OpenTox::Task,optional] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
- # @return [String] report uri
- def find_or_create_report( waiting_task=nil )
- @report = CrossvalidationReport.find_for_crossvalidation(@uri) unless @report
- @report = CrossvalidationReport.create(@uri, waiting_task) unless @report
- @report.uri
- end
-
- # loads metadata via yaml from crossvalidation object
- # fields (like for example the validations) can be acces via validation.metadata[RDF::OT.validation]
- def load_metadata
- @metadata = YAML.load(OpenTox::RestClientWrapper.get(uri,nil,{:accept => "application/x-yaml"}))
- end
-
- # returns a Validation object containing the statistics of the crossavlidation
- def statistics
- Validation.from_cv_statistics( @uri )
- end
-
- # documentation see OpenTox::Validation.probabilities
- def probabilities( confidence, prediction )
- YAML.load(OpenTox::RestClientWrapper.get(@uri+"/statistics/probabilities?prediction="+prediction.to_s+"&confidence="+confidence.to_s,nil,
- {:accept => "application/x-yaml"}))
- end
-
- end
-
- class ValidationReport
- include OldOpenTox
-
- # finds ValidationReport via uri, raises error if not found
- # @param [String] uri
- # @return [OpenTox::ValidationReport]
- def self.find( uri )
- OpenTox::RestClientWrapper.get(uri)
- rep = ValidationReport.new(uri)
- rep.load_metadata
- rep
- end
-
- # finds ValidationReport for a particular validation
- # @param validation_uri [String] crossvalidation uri
- # @return [OpenTox::ValidationReport] nil if no report found
- def self.find_for_validation( validation_uri )
- uris = RestClientWrapper.get(File.join($validation[:uri],
- "/report/validation?validation="+validation_uri)).chomp.split("\n")
- uris.size==0 ? nil : ValidationReport.new(uris[-1])
- end
-
- # creates a validation report via validation
- # @param validation_uri [String] validation uri
- # @param params [Hash] params addiditonal possible
- # (min_confidence, params={}, min_num_predictions, max_num_predictions)
- # @param waiting_task [OpenTox::Task,optional] (can be a OpenTox::Subtask as well), progress is updated accordingly
- # @return [OpenTox::ValidationReport]
- def self.create( validation_uri, params={}, waiting_task=nil )
- params = {} if params==nil
- bad_request_error "params is no hash" unless params.is_a?(Hash)
- params[:validation_uris] = validation_uri
- uri = RestClientWrapper.post(File.join($validation[:uri],"/report/validation"),
- params, {}, waiting_task )
- uri = wait_for_task(uri)
- ValidationReport.new(uri)
- end
-
- end
-
- class CrossvalidationReport
- include OldOpenTox
-
- # finds CrossvalidationReport via uri, raises error if not found
- # @param [String] uri
- # @return [OpenTox::CrossvalidationReport]
- def self.find( uri )
- OpenTox::RestClientWrapper.get(uri)
- rep = CrossvalidationReport.new(uri)
- rep.load_metadata
- rep
- end
-
- # finds CrossvalidationReport for a particular crossvalidation
- # @param crossvalidation_uri [String] crossvalidation uri
- # @return [OpenTox::CrossvalidationReport] nil if no report found
- def self.find_for_crossvalidation( crossvalidation_uri )
- uris = RestClientWrapper.get(File.join($validation[:uri],
- "/report/crossvalidation?crossvalidation="+crossvalidation_uri)).chomp.split("\n")
- uris.size==0 ? nil : CrossvalidationReport.new(uris[-1])
- end
-
- # creates a crossvalidation report via crossvalidation
- # @param crossvalidation_uri [String] crossvalidation uri
- # @param waiting_task [OpenTox::Task,optional] (can be a OpenTox::Subtask as well), progress is updated accordingly
- # @return [OpenTox::CrossvalidationReport]
- def self.create( crossvalidation_uri, waiting_task=nil )
- uri = RestClientWrapper.post(File.join($validation[:uri],"/report/crossvalidation"),
- { :validation_uris => crossvalidation_uri }, {}, waiting_task )
- uri = wait_for_task(uri)
- CrossvalidationReport.new(uri)
- end
- end
-
-
- class AlgorithmComparisonReport
- include OldOpenTox
-
- # finds AlgorithmComparisonReport via uri, raises error if not found
- # @param [String] uri
- # @return [OpenTox::CrossvalidationReport]
- def self.find( uri )
- OpenTox::RestClientWrapper.get(uri)
- rep = AlgorithmComparisonReport.new(uri)
- rep.load_metadata
- rep
- end
-
- # finds AlgorithmComparisonReport for a particular crossvalidation
- # @param crossvalidation_uri [String] crossvalidation uri
- # @return [OpenTox::AlgorithmComparisonReport] nil if no report found
- def self.find_for_crossvalidation( crossvalidation_uri )
- uris = RestClientWrapper.get(File.join($validation[:uri],
- "/report/algorithm_comparison?crossvalidation="+crossvalidation_uri)).chomp.split("\n")
- uris.size==0 ? nil : AlgorithmComparisonReport.new(uris[-1])
- end
-
- # creates a algorithm comparison report via crossvalidation uris
- # @param crossvalidation_uri_hash [Hash] crossvalidation uri_hash, see example
- # @param params [Hash] params addiditonal possible
- # (ttest_significance, ttest_attributes, min_confidence, min_num_predictions, max_num_predictions)
- # @param waiting_task [OpenTox::Task,optional] (can be a OpenTox::Subtask as well), progress is updated accordingly
- # @return [OpenTox::AlgorithmComparisonReport]
- # example for hash:
- # { :lazar-bbrc => [ http://host/validation/crossvalidation/x1, http://host/validation/crossvalidation/x2 ],
- # :lazar-last => [ http://host/validation/crossvalidation/xy, http://host/validation/crossvalidation/xy ] }
- def self.create( crossvalidation_uri_hash, params={}, waiting_task=nil )
- identifier = []
- validation_uris = []
- crossvalidation_uri_hash.each do |id, uris|
- uris.each do |uri|
- identifier << id
- validation_uris << uri
- end
- end
- params = {} if params==nil
- raise OpenTox::BadRequestError.new "params is no hash" unless params.is_a?(Hash)
- params[:validation_uris] = validation_uris.join(",")
- params[:identifier] = identifier.join(",")
- uri = RestClientWrapper.post(File.join($validation[:uri],"/report/algorithm_comparison"), params, {}, waiting_task )
- uri = wait_for_task(uri)
- AlgorithmComparisonReport.new(uri)
- end
- end
-
-end
-