path: root/lib/algorithm.rb
diff options
Diffstat (limited to 'lib/algorithm.rb')
1 files changed, 230 insertions, 47 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index d7b57af..af8dfaf 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -1,82 +1,265 @@
+# R integration
+# workaround to initialize R non-interactively (former rinruby versions did this by default)
+# avoids compiling R with X
+R = nil
+require "rinruby"
module OpenTox
+ # Wrapper for OpenTox Algorithms
module Algorithm
+ include OpenTox
+ # Execute algorithm with parameters, please consult the OpenTox API and the webservice documentation for acceptable parameters
+ # @param [optional,Hash] params Algorithm parameters
+ # @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
+ # @return [String] URI of new resource (dataset, model, ...)
+ def run(params=nil, waiting_task=nil)
+, params, {:accept => 'text/uri-list'}, waiting_task).to_s
+ end
- class Generic
- attr_accessor :uri, :title, :date
- def self.find(uri)
- owl = OpenTox::Owl.from_uri(uri, "Algorithm")
- return
- end
+ # Get OWL-DL representation in RDF/XML format
+ # @return [application/rdf+xml] RDF/XML representation
+ def to_rdfxml
+ s =
+ s.add_algorithm(@uri,@metadata)
+ s.to_rdfxml
+ end
+ # Generic Algorithm class, should work with all OpenTox webservices
+ class Generic
+ include Algorithm
- protected
- def initialize(owl)
- @title = owl.get("title")
- @date = owl.get("date")
- @uri = owl.uri
+ # Find Generic Opentox Algorithm via URI, and loads metadata, could raise NotFound/NotAuthorized error
+ # @param [String] uri Algorithm URI
+ # @return [OpenTox::Algorithm::Generic] Algorithm instance
+ def self.find(uri, subjectid=nil)
+ return nil unless uri
+ alg =
+ alg.load_metadata( subjectid )
+ raise "cannot load algorithm metadata" if alg.metadata==nil or alg.metadata.size==0
+ alg
- class Fminer
+ # Fminer algorithms (
+ module Fminer
+ include Algorithm
+ # Backbone Refinement Class mining (
+ class BBRC
+ include Fminer
+ # Initialize bbrc algorithm
+ def initialize
+ super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/bbrc")
+ load_metadata
+ end
+ end
- def self.create_feature_dataset(params)
- LOGGER.debug File.basename(__FILE__) + ": creating feature dataset"
- resource =[:feature_generation_uri], :user => @@users[:users].keys[0], :password => @@users[:users].values[0])
- :dataset_uri => params[:dataset_uri], :feature_uri => params[:feature_uri]
+ # LAtent STructure Pattern Mining (
+ class LAST
+ include Fminer
+ # Initialize last algorithm
+ def initialize
+ super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/last")
+ load_metadata
+ end
- def self.uri
- File.join(@@config[:services]["opentox-algorithm"], "fminer")
- end
- class Lazar
- def self.create_model(params)
- LOGGER.debug params
- LOGGER.debug File.basename(__FILE__) + ": creating model"
- LOGGER.debug File.join(@@config[:services]["opentox-algorithm"], "lazar")
- resource =[:services]["opentox-algorithm"], "lazar"), :user => @@users[:users].keys[0], :password => @@users[:users].values[0], :content_type => "application/x-yaml")
- @uri = => params[:dataset_uri], :prediction_feature => params[:prediction_feature], :feature_generation_uri => File.join(@@config[:services]["opentox-algorithm"], "fminer")).body.chomp
- end
+ # Create lazar prediction model
+ class Lazar
+ include Algorithm
+ # Initialize lazar algorithm
+ def initialize
+ super File.join(CONFIG[:services]["opentox-algorithm"], "lazar")
+ load_metadata
+ end
+ end
- def self.uri
- File.join(@@config[:services]["opentox-algorithm"], "lazar")
- end
+ # Utility methods without dedicated webservices
- end
+ # Similarity calculations
+ module Similarity
+ include Algorithm
- class Similarity
- def self.weighted_tanimoto(fp_a,fp_b,p)
- common_features = fp_a & fp_b
- all_features = (fp_a + fp_b).uniq
+ # Tanimoto similarity
+ # @param [Array] features_a Features of first compound
+ # @param [Array] features_b Features of second compound
+ # @param [optional, Hash] weights Weights for all features
+ # @return [Float] (Weighted) tanimoto similarity
+ def self.tanimoto(features_a,features_b,weights=nil)
+ common_features = features_a & features_b
+ all_features = (features_a + features_b).uniq
common_p_sum = 0.0
if common_features.size > 0
- common_features.each{|f| common_p_sum += OpenTox::Utils.gauss(p[f])}
- all_p_sum = 0.0
- all_features.each{|f| all_p_sum += OpenTox::Utils.gauss(p[f])}
- common_p_sum/all_p_sum
+ if weights
+ common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])}
+ all_p_sum = 0.0
+ all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])}
+ common_p_sum/all_p_sum
+ else
+ common_features.to_f/all_features
+ end
- def self.euclidean(prop_a,prop_b)
- common_properties = prop_a.keys & prop_b.keys
+ # Euclidean similarity
+ # @param [Hash] properties_a Properties of first compound
+ # @param [Hash] properties_b Properties of second compound
+ # @param [optional, Hash] weights Weights for all properties
+ # @return [Float] (Weighted) euclidean similarity
+ def self.euclidean(properties_a,properties_b,weights=nil)
+ common_properties = properties_a.keys & properties_b.keys
if common_properties.size > 1
dist_sum = 0
common_properties.each do |p|
- dist_sum += (prop_a[p] - prop_b[p])**2
+ if weights
+ dist_sum += ( (properties_a[p] - properties_b[p]) * Algorithm.gauss(weights[p]) )**2
+ else
+ dist_sum += (properties_a[p] - properties_b[p])**2
+ end
- nil
+ 0.0
+ end
+ end
+ end
+ module Neighbors
+ # Classification with majority vote from neighbors weighted by similarity
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity`
+ # @param [optional] params Ignored (only for compatibility with local_svm_regression)
+ # @return [Hash] Hash with keys `:prediction, :confidence`
+ def self.weighted_majority_vote(neighbors,params={})
+ conf = 0.0
+ confidence = 0.0
+ neighbors.each do |neighbor|
+ case neighbor[:activity].to_s
+ when 'true'
+ conf += Algorithm.gauss(neighbor[:similarity])
+ when 'false'
+ conf -= Algorithm.gauss(neighbor[:similarity])
+ end
+ end
+ if conf > 0.0
+ prediction = true
+ elsif conf < 0.0
+ prediction = false
+ else
+ prediction = nil
+ end
+ confidence = conf/neighbors.size if neighbors.size > 0
+ {:prediction => prediction, :confidence => confidence.abs}
+ end
+ # Local support vector regression from neighbors
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
+ # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
+ # @return [Hash] Hash with keys `:prediction, :confidence`
+ def self.local_svm_regression(neighbors,params )
+ sims = neighbors.collect{ |n| n[:similarity] } # similarity values between query and neighbors
+ conf = sims.inject{|sum,x| sum + x }
+ acts = neighbors.collect do |n|
+ act = n[:activity]
+ Math.log10(act.to_f)
+ end # activities of neighbors for supervised learning
+ neighbor_matches = neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches
+ gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
+ if neighbor_matches.size == 0
+ raise "No neighbors found"
+ else
+ # gram matrix
+ (0..(neighbor_matches.length-1)).each do |i|
+ gram_matrix[i] = [] unless gram_matrix[i]
+ # upper triangle
+ ((i+1)..(neighbor_matches.length-1)).each do |j|
+ sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])")
+ gram_matrix[i][j] = Algorithm.gauss(sim)
+ gram_matrix[j] = [] unless gram_matrix[j]
+ gram_matrix[j][i] = gram_matrix[i][j] # lower triangle
+ end
+ gram_matrix[i][i] = 1.0
+ end
+ LOGGER.debug gram_matrix.to_yaml
+ @r =,false) # global R instance leads to Socket errors after a large number of requests
+ @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
+ LOGGER.debug "Setting R data ..."
+ # set data
+ @r.gram_matrix = gram_matrix.flatten
+ @r.n = neighbor_matches.size
+ @r.y = acts
+ @r.sims = sims
+ LOGGER.debug "Preparing R data ..."
+ # prepare data
+ @r.eval "y<-as.vector(y)"
+ @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
+ @r.eval "sims<-as.vector(sims)"
+ # model + support vectors
+ LOGGER.debug "Creating SVM model ..."
+ @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)"
+ @r.eval "sv<-as.vector(SVindex(model))"
+ @r.eval "sims<-sims[sv]"
+ @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
+ LOGGER.debug "Predicting ..."
+ @r.eval "p<-predict(model,sims)[1,1]"
+ prediction = 10**(@r.p.to_f)
+ LOGGER.debug "Prediction is: '" + @prediction.to_s + "'."
+ @r.quit # free R
+ confidence = conf/neighbors.size if neighbors.size > 0
+ {:prediction => prediction, :confidence => confidence}
+ end
+ end
+ module Substructure
+ include Algorithm
+ # Substructure matching
+ # @param [OpenTox::Compound] compound Compound
+ # @param [Array] features Array with Smarts strings
+ # @return [Array] Array with matching Smarts
+ def self.match(compound,features)
+ compound.match(features)
+ end
+ end
+ module Dataset
+ include Algorithm
+ # API should match Substructure.match
+ def features(dataset_uri,compound_uri)
+ # Gauss kernel
+ # @return [Float]
+ def self.gauss(x, sigma = 0.3)
+ d = 1.0 - x
+ Math.exp(-(d*d)/(2*sigma*sigma))
+ end
+ # Median of an array
+ # @param [Array] Array with values
+ # @return [Float] Median
+ def self.median(array)
+ return nil if array.empty?
+ array.sort!
+ m_pos = array.size / 2
+ return array.size % 2 == 1 ? array[m_pos] : (array[m_pos-1] + array[m_pos])/2
+ end