summaryrefslogtreecommitdiff
path: root/lib/algorithm.rb
diff options
context:
space:
mode:
authorChristoph Helma <helma@in-silico.ch>2010-11-19 16:53:21 +0100
committerChristoph Helma <helma@in-silico.ch>2010-11-19 16:53:21 +0100
commitf8552611c2dbe25d76474f51e4e895bf9c2b5c5e (patch)
treeda145cd1d69adc4cdb8d299f0cea2e0810b88eaf /lib/algorithm.rb
parent91c95f8dc8f60a8f0029b970ef881eecee28401b (diff)
lazar predictions for toxcreate working
Diffstat (limited to 'lib/algorithm.rb')
-rw-r--r--lib/algorithm.rb154
1 files changed, 142 insertions, 12 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 711f63b..a6fa4a7 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -1,3 +1,9 @@
+# R integration
+# workaround to initialize R non-interactively (former rinruby versions did this by default)
+# avoids compiling R with X
+R = nil
+require "rinruby"
+
module OpenTox
# Wrapper for OpenTox Algorithms
@@ -6,8 +12,10 @@ module OpenTox
include OpenTox
# Execute algorithm with parameters, please consult the OpenTox API and the webservice documentation for acceptable parameters
+ # @param [optional,Hash] params Algorithm parameters
+ # @return [String] URI of new resource (dataset, model, ...)
def run(params=nil)
- RestClientWrapper.post(@uri, params)
+ RestClientWrapper.post(@uri, params).to_s
end
# Get OWL-DL representation in RDF/XML format
@@ -23,9 +31,11 @@ module OpenTox
include Algorithm
end
+ # Fminer algorithms (https://github.com/amaunz/fminer2)
module Fminer
include Algorithm
+ # Backbone Refinement Class mining (http://bbrc.maunz.de/)
class BBRC
include Fminer
# Initialize bbrc algorithm
@@ -35,6 +45,7 @@ module OpenTox
end
end
+ # LAtent STructure Pattern Mining (http://last-pm.maunz.de)
class LAST
include Fminer
# Initialize last algorithm
@@ -58,15 +69,15 @@ module OpenTox
# Utility methods without dedicated webservices
+ # Similarity calculations
module Similarity
include Algorithm
# Tanimoto similarity
- #
# @param [Array] features_a Features of first compound
# @param [Array] features_b Features of second compound
# @param [optional, Hash] weights Weights for all features
- # @return [Float] (Wighted) tanimoto similarity
+ # @return [Float] (Weighted) tanimoto similarity
def self.tanimoto(features_a,features_b,weights=nil)
common_features = features_a & features_b
all_features = (features_a + features_b).uniq
@@ -86,15 +97,19 @@ module OpenTox
end
# Euclidean similarity
- def self.euclidean(prop_a,prop_b,weights=nil)
- common_properties = prop_a.keys & prop_b.keys
+ # @param [Hash] properties_a Properties of first compound
+ # @param [Hash] properties_b Properties of second compound
+ # @param [optional, Hash] weights Weights for all properties
+ # @return [Float] (Weighted) euclidean similarity
+ def self.euclidean(properties_a,properties_b,weights=nil)
+ common_properties = properties_a.keys & properties_b.keys
if common_properties.size > 1
dist_sum = 0
common_properties.each do |p|
if weights
- dist_sum += ( (prop_a[p] - prop_b[p]) * Algorithm.gauss(weights[p]) )**2
+ dist_sum += ( (properties_a[p] - properties_b[p]) * Algorithm.gauss(weights[p]) )**2
else
- dist_sum += (prop_a[p] - prop_b[p])**2
+ dist_sum += (properties_a[p] - properties_b[p])**2
end
end
1/(1+Math.sqrt(dist_sum))
@@ -103,14 +118,129 @@ module OpenTox
end
end
end
+
+ module Neighbors
+
+ # Classification with majority vote from neighbors weighted by similarity
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity`
+ # @param [optional] params Ignored (only for compatibility with local_svm_regression)
+ # @return [Hash] Hash with keys `:prediction, :confidence`
+ def self.weighted_majority_vote(neighbors,params={})
+ conf = 0.0
+ confidence = 0.0
+ neighbors.each do |neighbor|
+ case neighbor[:activity].to_s
+ when 'true'
+ conf += Algorithm.gauss(neighbor[:similarity])
+ when 'false'
+ conf -= Algorithm.gauss(neighbor[:similarity])
+ end
+ end
+ if conf > 0.0
+ prediction = true
+ elsif conf < 0.0
+ prediction = false
+ else
+ prediction = nil
+ end
+ confidence = conf/neighbors.size if neighbors.size > 0
+ {:prediction => prediction, :confidence => confidence.abs}
+ end
+
+ # Local support vector regression from neighbors
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
+ # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
+ # @return [Hash] Hash with keys `:prediction, :confidence`
+ def self.local_svm_regression(neighbors,params )
+ sims = neighbors.collect{ |n| n[:similarity] } # similarity values between query and neighbors
+ conf = sims.inject{|sum,x| sum + x }
+ acts = neighbors.collect do |n|
+ act = n[:activity]
+ Math.log10(act.to_f)
+ end # activities of neighbors for supervised learning
+
+ neighbor_matches = neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches
+ gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
+ if neighbor_matches.size == 0
+ raise "No neighbors found"
+ else
+ # gram matrix
+ (0..(neighbor_matches.length-1)).each do |i|
+ gram_matrix[i] = [] unless gram_matrix[i]
+ # upper triangle
+ ((i+1)..(neighbor_matches.length-1)).each do |j|
+ sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])")
+ gram_matrix[i][j] = Algorithm.gauss(sim)
+ gram_matrix[j] = [] unless gram_matrix[j]
+ gram_matrix[j][i] = gram_matrix[i][j] # lower triangle
+ end
+ gram_matrix[i][i] = 1.0
+ end
+
+ LOGGER.debug gram_matrix.to_yaml
+
+ @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
+ @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
+ LOGGER.debug "Setting R data ..."
+ # set data
+ @r.gram_matrix = gram_matrix.flatten
+ @r.n = neighbor_matches.size
+ @r.y = acts
+ @r.sims = sims
+
+ LOGGER.debug "Preparing R data ..."
+ # prepare data
+ @r.eval "y<-as.vector(y)"
+ @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
+ @r.eval "sims<-as.vector(sims)"
+
+ # model + support vectors
+ LOGGER.debug "Creating SVM model ..."
+ @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)"
+ @r.eval "sv<-as.vector(SVindex(model))"
+ @r.eval "sims<-sims[sv]"
+ @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
+ LOGGER.debug "Predicting ..."
+ @r.eval "p<-predict(model,sims)[1,1]"
+ prediction = 10**(@r.p.to_f)
+ LOGGER.debug "Prediction is: '" + @prediction.to_s + "'."
+ @r.quit # free R
+ end
+ confidence = conf/neighbors.size if neighbors.size > 0
+ {:prediction => prediction, :confidence => confidence}
+
+ end
+
+ end
+
+ module Substructure
+ include Algorithm
+ # Substructure matching
+ # @param [OpenTox::Compound] compound Compound
+ # @param [Array] features Array with Smarts strings
+ # @return [Array] Array with matching Smarts
+ def self.match(compound,features)
+ compound.match(features)
+ end
+ end
+
+ module Dataset
+ include Algorithm
+ # API should match Substructure.match
+ def features(dataset_uri,compound_uri)
+ end
+ end
- # Gauss kernel
- def self.gauss(sim, sigma = 0.3)
- x = 1.0 - sim
- Math.exp(-(x*x)/(2*sigma*sigma))
- end
+ # Gauss kernel
+ # @return [Float]
+ def self.gauss(x, sigma = 0.3)
+ d = 1.0 - x
+ Math.exp(-(d*d)/(2*sigma*sigma))
+ end
# Median of an array
+ # @param [Array] Array with values
+ # @return [Float] Median
def self.median(array)
return nil if array.empty?
array.sort!