diff options
author | Andreas Maunz <andreas@maunz.de> | 2011-07-04 11:05:34 +0200 |
---|---|---|
committer | Andreas Maunz <andreas@maunz.de> | 2011-07-04 11:05:34 +0200 |
commit | ebb9427120e8100d94435851a66ae76dc6d5a22c (patch) | |
tree | ebbbc43f848fc54e09b48238313f13d4c51a56d0 | |
parent | 50d35c614cc0fb2cfb6f44f3c8711a1a0cd97d8d (diff) |
MLR integration finished
-rw-r--r-- | Rakefile | 1 | ||||
-rw-r--r-- | lib/algorithm.rb | 215 | ||||
-rw-r--r-- | lib/model.rb | 4 | ||||
-rw-r--r-- | lib/opentox-ruby.rb | 2 |
4 files changed, 129 insertions, 93 deletions
@@ -43,6 +43,7 @@ begin gem.add_dependency "dm-validations", "=1.1.0" gem.add_dependency "dm-sqlite-adapter", "=1.1.0" gem.add_dependency "ruby-plot", "=0.5.0" + gem.add_dependency "gsl", "=1.14.7" gem.add_development_dependency 'jeweler' gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore'] diff --git a/lib/algorithm.rb b/lib/algorithm.rb index af05376..bfa79d3 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -208,6 +208,75 @@ module OpenTox module Neighbors + # Local multi-linear regression (MLR) prediction from neighbors. + # Uses propositionalized setting. + # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` + # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] + # @return [Numeric] A prediction value. + def self.local_mlr_prop(neighbors, params, props) + + take_logs=true + + neighbors.each do |n| + if (! n[:activity].nil?) && (n[:activity].to_f < 0.0) + take_logs = false + end + end + + acts = neighbors.collect do |n| + act = n[:activity] + take_logs ? Math.log10(act.to_f) : act.to_f + end # activities of neighbors for supervised learning + + + begin + + LOGGER.debug "Local MLR (Propositionalization / GSL)." + n_prop = props[0] # is a matrix, i.e. two nested Arrays. + q_prop = props[1] # is an Array. + n_prop_x_size = n_prop[0].size + n_prop_y_size = n_prop.size + + n_prop.flatten! + y_x_rel = n_prop_y_size.to_f / n_prop_x_size + repeat_factor = (1/y_x_rel).ceil + n_prop_tmp = Array.new ; repeat_factor.times { n_prop_tmp.concat n_prop } ; n_prop = n_prop_tmp + acts_tmp = Array.new ; repeat_factor.times { acts_tmp.concat acts } ; acts = acts_tmp + + if n_prop.size == 0 + raise "No neighbors found." + else + begin + LOGGER.debug "Setting GSL data ..." + # set data + prop_matrix = GSL::Matrix[n_prop, n_prop_y_size * repeat_factor, n_prop_x_size] + y = GSL::Vector[acts] + q_prop = GSL::Vector[q_prop] + + # model + support vectors + LOGGER.debug "Creating MLR model ..." + work = GSL::MultiFit::Workspace.alloc(n_prop_y_size * repeat_factor, n_prop_x_size) + c, cov, chisq, status = GSL::MultiFit::linear(prop_matrix, y, work) + LOGGER.debug "Predicting ..." + prediction = GSL::MultiFit::linear_est(q_prop, c, cov)[0] + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + end + end + + prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + end + + sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + conf = sims.inject{|sum,x| sum + x } + confidence = conf/neighbors.size if neighbors.size > 0 + {:prediction => prediction, :confidence => confidence} + end + # Classification with majority vote from neighbors weighted by similarity # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity` # @param [optional] params Ignored (only for compatibility with local_svm_regression) @@ -318,67 +387,67 @@ module OpenTox # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] # @return [Numeric] A prediction value. def self.local_svm(neighbors, acts, sims, type, params) - LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)." - neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches - gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel - if neighbor_matches.size == 0 - raise "No neighbors found." - else - # gram matrix - (0..(neighbor_matches.length-1)).each do |i| - gram_matrix[i] = [] unless gram_matrix[i] - # upper triangle - ((i+1)..(neighbor_matches.length-1)).each do |j| - sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") - gram_matrix[i][j] = Algorithm.gauss(sim) - gram_matrix[j] = [] unless gram_matrix[j] - gram_matrix[j][i] = gram_matrix[i][j] # lower triangle - end - gram_matrix[i][i] = 1.0 + LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)." + neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches + gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel + if neighbor_matches.size == 0 + raise "No neighbors found." + else + # gram matrix + (0..(neighbor_matches.length-1)).each do |i| + gram_matrix[i] = [] unless gram_matrix[i] + # upper triangle + ((i+1)..(neighbor_matches.length-1)).each do |j| + sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") + gram_matrix[i][j] = Algorithm.gauss(sim) + gram_matrix[j] = [] unless gram_matrix[j] + gram_matrix[j][i] = gram_matrix[i][j] # lower triangle end + gram_matrix[i][i] = 1.0 + end - #LOGGER.debug gram_matrix.to_yaml - @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests - @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed - LOGGER.debug "Setting R data ..." - # set data - @r.gram_matrix = gram_matrix.flatten - @r.n = neighbor_matches.size - @r.y = acts - @r.sims = sims + #LOGGER.debug gram_matrix.to_yaml + @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests + @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed + LOGGER.debug "Setting R data ..." + # set data + @r.gram_matrix = gram_matrix.flatten + @r.n = neighbor_matches.size + @r.y = acts + @r.sims = sims - begin - LOGGER.debug "Preparing R data ..." - # prepare data - @r.eval "y<-as.vector(y)" - @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" - @r.eval "sims<-as.vector(sims)" - - # model + support vectors - LOGGER.debug "Creating SVM model ..." - @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"#{type}\", nu=0.5)" - @r.eval "sv<-as.vector(SVindex(model))" - @r.eval "sims<-sims[sv]" - @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" - LOGGER.debug "Predicting ..." - if type == "nu-svr" - @r.eval "p<-predict(model,sims)[1,1]" - elsif type == "C-bsvc" - @r.eval "p<-predict(model,sims)" - end - if type == "nu-svr" - prediction = @r.p - elsif type == "C-bsvc" - #prediction = (@r.p.to_f == 1.0 ? true : false) - prediction = @r.p - end - @r.quit # free R - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + begin + LOGGER.debug "Preparing R data ..." + # prepare data + @r.eval "y<-as.vector(y)" + @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" + @r.eval "sims<-as.vector(sims)" + + # model + support vectors + LOGGER.debug "Creating SVM model ..." + @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"#{type}\", nu=0.5)" + @r.eval "sv<-as.vector(SVindex(model))" + @r.eval "sims<-sims[sv]" + @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" + LOGGER.debug "Predicting ..." + if type == "nu-svr" + @r.eval "p<-predict(model,sims)[1,1]" + elsif type == "C-bsvc" + @r.eval "p<-predict(model,sims)" end - + if type == "nu-svr" + prediction = @r.p + elsif type == "C-bsvc" + #prediction = (@r.p.to_f == 1.0 ? true : false) + prediction = @r.p + end + @r.quit # free R + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" end - prediction + + end + prediction end # Local support vector prediction from neighbors. @@ -442,41 +511,7 @@ module OpenTox prediction end - # Local multi-linear regression (MLR) prediction from neighbors. - # Uses propositionalized setting. - # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` - # @param [Array] acts, activities for neighbors. - # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] - # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required - # @return [Numeric] A prediction value. - def local_mlr_prop - LOGGER.debug "Local MLR (Propositionalization / GSL)." - n_prop = props[0] # is a matrix, i.e. two nested Arrays. - q_prop = props[1] # is an Array. - - if n_prop.size == 0 - raise "No neighbors found." - else - begin - LOGGER.debug "Setting GSL data ..." - # set data - prop_matrix = GSL::Matrix[n_prop] - n_prop_x_size = n_prop.size - n_prop_y_size = n_prop[0].size - y = GSL::Vector[acts] - q_prop = GSL::Vector[q_prop] - # model + support vectors - LOGGER.debug "Creating MLR model ..." - work = GSL::MultiFit::Workspace.alloc(n_prop_y_size,n_prop_x_size) - [c, cov, chisq, status] = GSL::MultiFit::linear(prop_matrix, y, work) - LOGGER.debug "Predicting ..." - prediction = GSL::Multifit::linear_est(q_prop, c, cov) - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" - end - end - prediction end module Substructure diff --git a/lib/model.rb b/lib/model.rb index 5eec366..ea6fd08 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -253,7 +253,7 @@ module OpenTox (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part - if @prop_kernel && @prediction_algorithm.include?("svm") + if @prop_kernel && ( @prediction_algorithm.include?("svm") || @prediction_algorithm.include?("local_mlr_prop") ) props = get_props else props = nil @@ -276,7 +276,7 @@ module OpenTox else # AM: no balancing or regression LOGGER.info "LAZAR: Unbalanced." neighbors - if @prop_kernel && @prediction_algorithm.include?("svm") + if @prop_kernel && ( @prediction_algorithm.include?("svm") || @prediction_algorithm.include?("local_mlr_prop") ) props = get_props else props = nil diff --git a/lib/opentox-ruby.rb b/lib/opentox-ruby.rb index ae05cb2..1fa2a86 100644 --- a/lib/opentox-ruby.rb +++ b/lib/opentox-ruby.rb @@ -1,4 +1,4 @@ -['rubygems', 'sinatra', 'sinatra/url_for', 'ohm', 'rest_client', 'yaml', 'cgi', 'spork', 'error', 'overwrite', 'environment'].each do |lib| +['rubygems', 'sinatra', 'sinatra/url_for', 'ohm', 'rest_client', 'yaml', 'cgi', 'spork', 'error', 'overwrite', 'environment', 'gsl'].each do |lib| require lib end |