diff options
Diffstat (limited to 'lib/algorithm.rb')
-rw-r--r-- | lib/algorithm.rb | 154 |
1 files changed, 108 insertions, 46 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb index abf10d4..5b41cbf 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -164,65 +164,127 @@ module OpenTox # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required # @return [Hash] Hash with keys `:prediction, :confidence` - def self.local_svm_regression(neighbors,params ) - sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values between query and neighbors - conf = sims.inject{|sum,x| sum + x } + def self.local_svm_regression(neighbors, params) + take_logs=true + neighbors.each do |n| + if (! n[:activity].nil?) && (n[:activity].to_f < 0.0) + take_logs = false + end + end acts = neighbors.collect do |n| act = n[:activity] - Math.log10(act.to_f) + take_logs ? Math.log10(act.to_f) : act.to_f end # activities of neighbors for supervised learning - neighbor_matches = neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches - gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel - if neighbor_matches.size == 0 - raise "No neighbors found" - else - # gram matrix - (0..(neighbor_matches.length-1)).each do |i| - gram_matrix[i] = [] unless gram_matrix[i] - # upper triangle - ((i+1)..(neighbor_matches.length-1)).each do |j| - sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") - gram_matrix[i][j] = Algorithm.gauss(sim) - gram_matrix[j] = [] unless gram_matrix[j] - gram_matrix[j][i] = gram_matrix[i][j] # lower triangle - end - gram_matrix[i][i] = 1.0 - end + sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + begin + prediction = local_svm(neighbors, acts, sims, "nu-svr", params) + prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + end - #LOGGER.debug gram_matrix.to_yaml - @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests - @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed - LOGGER.debug "Setting R data ..." - # set data - @r.gram_matrix = gram_matrix.flatten - @r.n = neighbor_matches.size - @r.y = acts - @r.sims = sims + conf = sims.inject{|sum,x| sum + x } + confidence = conf/neighbors.size if neighbors.size > 0 + {:prediction => prediction, :confidence => confidence} + + end - LOGGER.debug "Preparing R data ..." - # prepare data - @r.eval "y<-as.vector(y)" - @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" - @r.eval "sims<-as.vector(sims)" - - # model + support vectors - LOGGER.debug "Creating SVM model ..." - @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)" - @r.eval "sv<-as.vector(SVindex(model))" - @r.eval "sims<-sims[sv]" - @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" - LOGGER.debug "Predicting ..." - @r.eval "p<-predict(model,sims)[1,1]" - prediction = 10**(@r.p.to_f) + # Local support vector classification from neighbors + # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` + # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @return [Hash] Hash with keys `:prediction, :confidence` + def self.local_svm_classification(neighbors, params) + acts = neighbors.collect do |n| + act = n[:activity] + end # activities of neighbors for supervised learning + acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} + sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + begin + prediction = local_svm (neighbors, acts_f, sims, "C-bsvc", params) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." - @r.quit # free R + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" end + + conf = sims.inject{|sum,x| sum + x } confidence = conf/neighbors.size if neighbors.size > 0 {:prediction => prediction, :confidence => confidence} end + + # Local support vector prediction from neighbors. + # Not to be called directly (use local_svm_regression or local_svm_classification. + # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` + # @param [Array] acts, activities for neighbors. + # @param [Array] sims, similarities for neighbors. + # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification). + # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @return [Numeric] A prediction value. + def self.local_svm(neighbors, acts, sims, type, params) + neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches + gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel + if neighbor_matches.size == 0 + raise "No neighbors found." + else + # gram matrix + (0..(neighbor_matches.length-1)).each do |i| + gram_matrix[i] = [] unless gram_matrix[i] + # upper triangle + ((i+1)..(neighbor_matches.length-1)).each do |j| + sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") + gram_matrix[i][j] = Algorithm.gauss(sim) + gram_matrix[j] = [] unless gram_matrix[j] + gram_matrix[j][i] = gram_matrix[i][j] # lower triangle + end + gram_matrix[i][i] = 1.0 + end + + #LOGGER.debug gram_matrix.to_yaml + @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests + @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed + LOGGER.debug "Setting R data ..." + # set data + @r.gram_matrix = gram_matrix.flatten + @r.n = neighbor_matches.size + @r.y = acts + @r.sims = sims + + begin + LOGGER.debug "Preparing R data ..." + # prepare data + @r.eval "y<-as.vector(y)" + @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" + @r.eval "sims<-as.vector(sims)" + + # model + support vectors + LOGGER.debug "Creating SVM model ..." + @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"#{type}\", nu=0.5)" + @r.eval "sv<-as.vector(SVindex(model))" + @r.eval "sims<-sims[sv]" + @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" + LOGGER.debug "Predicting ..." + if type == "nu-svr" + @r.eval "p<-predict(model,sims)[1,1]" + elsif type == "C-bsvc" + @r.eval "p<-predict(model,sims)" + end + if type == "nu-svr" + prediction = @r.p + elsif type == "C-bsvc" + prediction = (@r.p.to_f == 1.0 ? true : false) + end + @r.quit # free R + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + end + + end + prediction + end + end module Substructure |