summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Maunz <andreas@maunz.de>2011-07-04 11:05:34 +0200
committerAndreas Maunz <andreas@maunz.de>2011-07-04 11:05:34 +0200
commitebb9427120e8100d94435851a66ae76dc6d5a22c (patch)
treeebbbc43f848fc54e09b48238313f13d4c51a56d0
parent50d35c614cc0fb2cfb6f44f3c8711a1a0cd97d8d (diff)
MLR integration finished
-rw-r--r--Rakefile1
-rw-r--r--lib/algorithm.rb215
-rw-r--r--lib/model.rb4
-rw-r--r--lib/opentox-ruby.rb2
4 files changed, 129 insertions, 93 deletions
diff --git a/Rakefile b/Rakefile
index 834e0a3..bd22c16 100644
--- a/Rakefile
+++ b/Rakefile
@@ -43,6 +43,7 @@ begin
gem.add_dependency "dm-validations", "=1.1.0"
gem.add_dependency "dm-sqlite-adapter", "=1.1.0"
gem.add_dependency "ruby-plot", "=0.5.0"
+ gem.add_dependency "gsl", "=1.14.7"
gem.add_development_dependency 'jeweler'
gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore']
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index af05376..bfa79d3 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -208,6 +208,75 @@ module OpenTox
module Neighbors
+ # Local multi-linear regression (MLR) prediction from neighbors.
+ # Uses propositionalized setting.
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
+ # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
+ # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ]
+ # @return [Numeric] A prediction value.
+ def self.local_mlr_prop(neighbors, params, props)
+
+ take_logs=true
+
+ neighbors.each do |n|
+ if (! n[:activity].nil?) && (n[:activity].to_f < 0.0)
+ take_logs = false
+ end
+ end
+
+ acts = neighbors.collect do |n|
+ act = n[:activity]
+ take_logs ? Math.log10(act.to_f) : act.to_f
+ end # activities of neighbors for supervised learning
+
+
+ begin
+
+ LOGGER.debug "Local MLR (Propositionalization / GSL)."
+ n_prop = props[0] # is a matrix, i.e. two nested Arrays.
+ q_prop = props[1] # is an Array.
+ n_prop_x_size = n_prop[0].size
+ n_prop_y_size = n_prop.size
+
+ n_prop.flatten!
+ y_x_rel = n_prop_y_size.to_f / n_prop_x_size
+ repeat_factor = (1/y_x_rel).ceil
+ n_prop_tmp = Array.new ; repeat_factor.times { n_prop_tmp.concat n_prop } ; n_prop = n_prop_tmp
+ acts_tmp = Array.new ; repeat_factor.times { acts_tmp.concat acts } ; acts = acts_tmp
+
+ if n_prop.size == 0
+ raise "No neighbors found."
+ else
+ begin
+ LOGGER.debug "Setting GSL data ..."
+ # set data
+ prop_matrix = GSL::Matrix[n_prop, n_prop_y_size * repeat_factor, n_prop_x_size]
+ y = GSL::Vector[acts]
+ q_prop = GSL::Vector[q_prop]
+
+ # model + support vectors
+ LOGGER.debug "Creating MLR model ..."
+ work = GSL::MultiFit::Workspace.alloc(n_prop_y_size * repeat_factor, n_prop_x_size)
+ c, cov, chisq, status = GSL::MultiFit::linear(prop_matrix, y, work)
+ LOGGER.debug "Predicting ..."
+ prediction = GSL::MultiFit::linear_est(q_prop, c, cov)[0]
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
+ end
+ end
+
+ prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f)
+ LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
+ end
+
+ sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
+ conf = sims.inject{|sum,x| sum + x }
+ confidence = conf/neighbors.size if neighbors.size > 0
+ {:prediction => prediction, :confidence => confidence}
+ end
+
# Classification with majority vote from neighbors weighted by similarity
# @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity`
# @param [optional] params Ignored (only for compatibility with local_svm_regression)
@@ -318,67 +387,67 @@ module OpenTox
# @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ]
# @return [Numeric] A prediction value.
def self.local_svm(neighbors, acts, sims, type, params)
- LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)."
- neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches
- gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
- if neighbor_matches.size == 0
- raise "No neighbors found."
- else
- # gram matrix
- (0..(neighbor_matches.length-1)).each do |i|
- gram_matrix[i] = [] unless gram_matrix[i]
- # upper triangle
- ((i+1)..(neighbor_matches.length-1)).each do |j|
- sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])")
- gram_matrix[i][j] = Algorithm.gauss(sim)
- gram_matrix[j] = [] unless gram_matrix[j]
- gram_matrix[j][i] = gram_matrix[i][j] # lower triangle
- end
- gram_matrix[i][i] = 1.0
+ LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)."
+ neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches
+ gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
+ if neighbor_matches.size == 0
+ raise "No neighbors found."
+ else
+ # gram matrix
+ (0..(neighbor_matches.length-1)).each do |i|
+ gram_matrix[i] = [] unless gram_matrix[i]
+ # upper triangle
+ ((i+1)..(neighbor_matches.length-1)).each do |j|
+ sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])")
+ gram_matrix[i][j] = Algorithm.gauss(sim)
+ gram_matrix[j] = [] unless gram_matrix[j]
+ gram_matrix[j][i] = gram_matrix[i][j] # lower triangle
end
+ gram_matrix[i][i] = 1.0
+ end
- #LOGGER.debug gram_matrix.to_yaml
- @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
- @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
- LOGGER.debug "Setting R data ..."
- # set data
- @r.gram_matrix = gram_matrix.flatten
- @r.n = neighbor_matches.size
- @r.y = acts
- @r.sims = sims
+ #LOGGER.debug gram_matrix.to_yaml
+ @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
+ @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
+ LOGGER.debug "Setting R data ..."
+ # set data
+ @r.gram_matrix = gram_matrix.flatten
+ @r.n = neighbor_matches.size
+ @r.y = acts
+ @r.sims = sims
- begin
- LOGGER.debug "Preparing R data ..."
- # prepare data
- @r.eval "y<-as.vector(y)"
- @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
- @r.eval "sims<-as.vector(sims)"
-
- # model + support vectors
- LOGGER.debug "Creating SVM model ..."
- @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"#{type}\", nu=0.5)"
- @r.eval "sv<-as.vector(SVindex(model))"
- @r.eval "sims<-sims[sv]"
- @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
- LOGGER.debug "Predicting ..."
- if type == "nu-svr"
- @r.eval "p<-predict(model,sims)[1,1]"
- elsif type == "C-bsvc"
- @r.eval "p<-predict(model,sims)"
- end
- if type == "nu-svr"
- prediction = @r.p
- elsif type == "C-bsvc"
- #prediction = (@r.p.to_f == 1.0 ? true : false)
- prediction = @r.p
- end
- @r.quit # free R
- rescue Exception => e
- LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
+ begin
+ LOGGER.debug "Preparing R data ..."
+ # prepare data
+ @r.eval "y<-as.vector(y)"
+ @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
+ @r.eval "sims<-as.vector(sims)"
+
+ # model + support vectors
+ LOGGER.debug "Creating SVM model ..."
+ @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"#{type}\", nu=0.5)"
+ @r.eval "sv<-as.vector(SVindex(model))"
+ @r.eval "sims<-sims[sv]"
+ @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
+ LOGGER.debug "Predicting ..."
+ if type == "nu-svr"
+ @r.eval "p<-predict(model,sims)[1,1]"
+ elsif type == "C-bsvc"
+ @r.eval "p<-predict(model,sims)"
end
-
+ if type == "nu-svr"
+ prediction = @r.p
+ elsif type == "C-bsvc"
+ #prediction = (@r.p.to_f == 1.0 ? true : false)
+ prediction = @r.p
+ end
+ @r.quit # free R
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
end
- prediction
+
+ end
+ prediction
end
# Local support vector prediction from neighbors.
@@ -442,41 +511,7 @@ module OpenTox
prediction
end
- # Local multi-linear regression (MLR) prediction from neighbors.
- # Uses propositionalized setting.
- # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
- # @param [Array] acts, activities for neighbors.
- # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ]
- # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
- # @return [Numeric] A prediction value.
- def local_mlr_prop
- LOGGER.debug "Local MLR (Propositionalization / GSL)."
- n_prop = props[0] # is a matrix, i.e. two nested Arrays.
- q_prop = props[1] # is an Array.
-
- if n_prop.size == 0
- raise "No neighbors found."
- else
- begin
- LOGGER.debug "Setting GSL data ..."
- # set data
- prop_matrix = GSL::Matrix[n_prop]
- n_prop_x_size = n_prop.size
- n_prop_y_size = n_prop[0].size
- y = GSL::Vector[acts]
- q_prop = GSL::Vector[q_prop]
- # model + support vectors
- LOGGER.debug "Creating MLR model ..."
- work = GSL::MultiFit::Workspace.alloc(n_prop_y_size,n_prop_x_size)
- [c, cov, chisq, status] = GSL::MultiFit::linear(prop_matrix, y, work)
- LOGGER.debug "Predicting ..."
- prediction = GSL::Multifit::linear_est(q_prop, c, cov)
- rescue Exception => e
- LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
- end
- end
- prediction
end
module Substructure
diff --git a/lib/model.rb b/lib/model.rb
index 5eec366..ea6fd08 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -253,7 +253,7 @@ module OpenTox
(i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction
LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}."
neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part
- if @prop_kernel && @prediction_algorithm.include?("svm")
+ if @prop_kernel && ( @prediction_algorithm.include?("svm") || @prediction_algorithm.include?("local_mlr_prop") )
props = get_props
else
props = nil
@@ -276,7 +276,7 @@ module OpenTox
else # AM: no balancing or regression
LOGGER.info "LAZAR: Unbalanced."
neighbors
- if @prop_kernel && @prediction_algorithm.include?("svm")
+ if @prop_kernel && ( @prediction_algorithm.include?("svm") || @prediction_algorithm.include?("local_mlr_prop") )
props = get_props
else
props = nil
diff --git a/lib/opentox-ruby.rb b/lib/opentox-ruby.rb
index ae05cb2..1fa2a86 100644
--- a/lib/opentox-ruby.rb
+++ b/lib/opentox-ruby.rb
@@ -1,4 +1,4 @@
-['rubygems', 'sinatra', 'sinatra/url_for', 'ohm', 'rest_client', 'yaml', 'cgi', 'spork', 'error', 'overwrite', 'environment'].each do |lib|
+['rubygems', 'sinatra', 'sinatra/url_for', 'ohm', 'rest_client', 'yaml', 'cgi', 'spork', 'error', 'overwrite', 'environment', 'gsl'].each do |lib|
require lib
end