summaryrefslogtreecommitdiff
path: root/lib/algorithm.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/algorithm.rb')
-rw-r--r--lib/algorithm.rb154
1 files changed, 108 insertions, 46 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index abf10d4..5b41cbf 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -164,65 +164,127 @@ module OpenTox
# @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
# @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
# @return [Hash] Hash with keys `:prediction, :confidence`
- def self.local_svm_regression(neighbors,params )
- sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values between query and neighbors
- conf = sims.inject{|sum,x| sum + x }
+ def self.local_svm_regression(neighbors, params)
+ take_logs=true
+ neighbors.each do |n|
+ if (! n[:activity].nil?) && (n[:activity].to_f < 0.0)
+ take_logs = false
+ end
+ end
acts = neighbors.collect do |n|
act = n[:activity]
- Math.log10(act.to_f)
+ take_logs ? Math.log10(act.to_f) : act.to_f
end # activities of neighbors for supervised learning
- neighbor_matches = neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches
- gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
- if neighbor_matches.size == 0
- raise "No neighbors found"
- else
- # gram matrix
- (0..(neighbor_matches.length-1)).each do |i|
- gram_matrix[i] = [] unless gram_matrix[i]
- # upper triangle
- ((i+1)..(neighbor_matches.length-1)).each do |j|
- sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])")
- gram_matrix[i][j] = Algorithm.gauss(sim)
- gram_matrix[j] = [] unless gram_matrix[j]
- gram_matrix[j][i] = gram_matrix[i][j] # lower triangle
- end
- gram_matrix[i][i] = 1.0
- end
+ sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
+ begin
+ prediction = local_svm(neighbors, acts, sims, "nu-svr", params)
+ prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f)
+ LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
+ end
- #LOGGER.debug gram_matrix.to_yaml
- @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
- @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
- LOGGER.debug "Setting R data ..."
- # set data
- @r.gram_matrix = gram_matrix.flatten
- @r.n = neighbor_matches.size
- @r.y = acts
- @r.sims = sims
+ conf = sims.inject{|sum,x| sum + x }
+ confidence = conf/neighbors.size if neighbors.size > 0
+ {:prediction => prediction, :confidence => confidence}
+
+ end
- LOGGER.debug "Preparing R data ..."
- # prepare data
- @r.eval "y<-as.vector(y)"
- @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
- @r.eval "sims<-as.vector(sims)"
-
- # model + support vectors
- LOGGER.debug "Creating SVM model ..."
- @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)"
- @r.eval "sv<-as.vector(SVindex(model))"
- @r.eval "sims<-sims[sv]"
- @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
- LOGGER.debug "Predicting ..."
- @r.eval "p<-predict(model,sims)[1,1]"
- prediction = 10**(@r.p.to_f)
+ # Local support vector classification from neighbors
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
+ # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
+ # @return [Hash] Hash with keys `:prediction, :confidence`
+ def self.local_svm_classification(neighbors, params)
+ acts = neighbors.collect do |n|
+ act = n[:activity]
+ end # activities of neighbors for supervised learning
+ acts_f = acts.collect {|v| v == true ? 1.0 : 0.0}
+ sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
+ begin
+ prediction = local_svm (neighbors, acts_f, sims, "C-bsvc", params)
LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
- @r.quit # free R
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
end
+
+ conf = sims.inject{|sum,x| sum + x }
confidence = conf/neighbors.size if neighbors.size > 0
{:prediction => prediction, :confidence => confidence}
end
+
+ # Local support vector prediction from neighbors.
+ # Not to be called directly (use local_svm_regression or local_svm_classification.
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
+ # @param [Array] acts, activities for neighbors.
+ # @param [Array] sims, similarities for neighbors.
+ # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification).
+ # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
+ # @return [Numeric] A prediction value.
+ def self.local_svm(neighbors, acts, sims, type, params)
+ neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches
+ gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
+ if neighbor_matches.size == 0
+ raise "No neighbors found."
+ else
+ # gram matrix
+ (0..(neighbor_matches.length-1)).each do |i|
+ gram_matrix[i] = [] unless gram_matrix[i]
+ # upper triangle
+ ((i+1)..(neighbor_matches.length-1)).each do |j|
+ sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])")
+ gram_matrix[i][j] = Algorithm.gauss(sim)
+ gram_matrix[j] = [] unless gram_matrix[j]
+ gram_matrix[j][i] = gram_matrix[i][j] # lower triangle
+ end
+ gram_matrix[i][i] = 1.0
+ end
+
+ #LOGGER.debug gram_matrix.to_yaml
+ @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
+ @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
+ LOGGER.debug "Setting R data ..."
+ # set data
+ @r.gram_matrix = gram_matrix.flatten
+ @r.n = neighbor_matches.size
+ @r.y = acts
+ @r.sims = sims
+
+ begin
+ LOGGER.debug "Preparing R data ..."
+ # prepare data
+ @r.eval "y<-as.vector(y)"
+ @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
+ @r.eval "sims<-as.vector(sims)"
+
+ # model + support vectors
+ LOGGER.debug "Creating SVM model ..."
+ @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"#{type}\", nu=0.5)"
+ @r.eval "sv<-as.vector(SVindex(model))"
+ @r.eval "sims<-sims[sv]"
+ @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
+ LOGGER.debug "Predicting ..."
+ if type == "nu-svr"
+ @r.eval "p<-predict(model,sims)[1,1]"
+ elsif type == "C-bsvc"
+ @r.eval "p<-predict(model,sims)"
+ end
+ if type == "nu-svr"
+ prediction = @r.p
+ elsif type == "C-bsvc"
+ prediction = (@r.p.to_f == 1.0 ? true : false)
+ end
+ @r.quit # free R
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
+ end
+
+ end
+ prediction
+ end
+
end
module Substructure