diff options
authorAndreas Maunz <>2011-07-06 11:38:53 +0200
committerAndreas Maunz <>2011-07-06 11:38:53 +0200
commit324471e8455eb4a9256bd25aa3d33b6eb78e62ed (patch)
parent12951ac52d8dcf81aaa9fa7a882da912c91cce22 (diff)
Sigmoidal function for normality
1 files changed, 74 insertions, 80 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index d5e9caf..bdb10f7 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -204,22 +204,13 @@ module OpenTox
# @return [Numeric] A prediction value.
def self.local_mlr_prop(neighbors, params, props)
- acts = neighbors.collect do |n|
- act = n[:activity]
- act.to_f
- end # activities of neighbors for supervised learning
+ raise "No neighbors found." unless neighbors.size>0
- min,max = acts.minmax
- offset = 1.0 - min # offset to min element
- offset = -1.0 * offset if offset>0.0
- div_offset = max - offset # dynamic range
- acts = acts.collect { |a| a - offset } # everything >1, starting at 1
- acts = acts.collect { |a| a / div_offset } # scale to unit length
- acts = acts.collect { |a| Math.log10 a } # everything >1, then take log10
+ acts = neighbors.collect do |n|
+ act = n[:activity]
+ act.to_f
+ end # activities of neighbors for supervised learning
LOGGER.debug "Local MLR (Propositionalization / GSL)."
n_prop = props[0] # is a matrix, i.e. two nested Arrays.
@@ -233,37 +224,31 @@ module OpenTox
n_prop_tmp = ; repeat_factor.times { n_prop_tmp.concat n_prop } ; n_prop = n_prop_tmp
acts_tmp = ; repeat_factor.times { acts_tmp.concat acts } ; acts = acts_tmp
- if n_prop.size == 0
- raise "No neighbors found."
- else
- begin
- LOGGER.debug "Setting GSL data ..."
- # set data
- prop_matrix = GSL::Matrix[n_prop, n_prop_y_size * repeat_factor, n_prop_x_size]
- y = GSL::Vector[acts]
- q_prop = GSL::Vector[q_prop]
+ LOGGER.debug "Setting GSL data ..."
+ # set data
+ prop_matrix = GSL::Matrix[n_prop, n_prop_y_size * repeat_factor, n_prop_x_size]
+ y = GSL::Vector[acts]
+ q_prop = GSL::Vector[q_prop]
+ # model + support vectors
+ LOGGER.debug "Creating MLR model ..."
+ work = GSL::MultiFit::Workspace.alloc(n_prop_y_size * repeat_factor, n_prop_x_size)
+ c, cov, chisq, status = GSL::MultiFit::linear(prop_matrix, y, work)
+ LOGGER.debug "Predicting ..."
+ prediction = GSL::MultiFit::linear_est(q_prop, c, cov)[0]
+ LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
- # model + support vectors
- LOGGER.debug "Creating MLR model ..."
- work = GSL::MultiFit::Workspace.alloc(n_prop_y_size * repeat_factor, n_prop_x_size)
- c, cov, chisq, status = GSL::MultiFit::linear(prop_matrix, y, work)
- LOGGER.debug "Predicting ..."
- prediction = GSL::MultiFit::linear_est(q_prop, c, cov)[0]
- rescue Exception => e
- LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
- end
- end
+ sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
+ conf = sims.inject{|sum,x| sum + x }
+ confidence = conf/neighbors.size if neighbors.size > 0
+ {:prediction => prediction, :confidence => confidence}
- prediction = div_offset * (10**(prediction.to_f)) + offset # reverse transformation
- LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
rescue Exception => e
- LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
+ LOGGER.debug "#{e.class}: #{e.message}"
+ puts "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
- sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
- conf = sims.inject{|sum,x| sum + x }
- confidence = conf/neighbors.size if neighbors.size > 0
- {:prediction => prediction, :confidence => confidence}
# Classification with majority vote from neighbors weighted by similarity
@@ -313,39 +298,24 @@ module OpenTox
# @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
# @return [Hash] Hash with keys `:prediction, :confidence`
def self.local_svm_regression(neighbors, params, props=nil)
- acts = neighbors.collect do |n|
- act = n[:activity]
- act.to_f
- end # activities of neighbors for supervised learning
- sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
+ raise "No neighbors found." unless neighbors.size>0
+ acts = neighbors.collect{ |n| n[:activity].to_f }
+ sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) }
offset = 1.0 - acts.minmax[0] # offset to min element
offset = -1.0 * offset if offset>0.0
- puts "OFFSET MV"
- acts = acts.collect { |a| a - offset } # slide
- puts acts.to_yaml
- puts "OFFSET LOG"
- acts = acts.collect { |a| Math.log10 a } # everything >1, then take log10
- puts acts.to_yaml
- div_offset = acts.minmax[1] # dynamic range
- puts "OFFSET DIV"
- acts = acts.collect { |a| a / div_offset } # scale
- puts acts.to_yaml
- prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr", params))
- prediction = (10**(div_offset*prediction.to_f))+offset
- LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+ inverter =
+ prediction = (props.nil? ? local_svm(neighbors, inverter.values, sims, "nu-svr", params) : local_svm_prop(props, inverter.values, "nu-svr", params))
+ prediction = inverter.back_transform([prediction])
+ LOGGER.debug "Prediction is: '" + prediction[0].to_s + "'."
+ conf = sims.inject{|sum,x| sum + x }
+ confidence = conf/neighbors.size
+ {:prediction => prediction, :confidence => confidence}
rescue Exception => e
- LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
+ LOGGER.debug "#{e.class}: #{e.message}"
+ puts "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
- conf = sims.inject{|sum,x| sum + x }
- confidence = conf/neighbors.size if neighbors.size > 0
- {:prediction => prediction, :confidence => confidence}
@@ -355,22 +325,21 @@ module OpenTox
# @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ]
# @return [Hash] Hash with keys `:prediction, :confidence`
def self.local_svm_classification(neighbors, params, props=nil)
- acts = neighbors.collect do |n|
- act = n[:activity]
- end # activities of neighbors for supervised learning
-# acts_f = acts.collect {|v| v == true ? 1.0 : 0.0}
- acts_f = acts
- sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
+ raise "No neighbors found." unless neighbors.size>0
+ acts = neighbors.collect { |n| act = n[:activity] }
+ acts_f = acts
+ sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
prediction = (props.nil? ? local_svm(neighbors, acts_f, sims, "C-bsvc", params) : local_svm_prop(props, acts_f, "C-bsvc", params))
LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
+ conf = sims.inject{|sum,x| sum + x }
+ confidence = conf/neighbors.size if neighbors.size > 0
+ {:prediction => prediction, :confidence => confidence}
rescue Exception => e
- LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
+ LOGGER.debug "#{e.class}: #{e.message}"
+ puts "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
- conf = sims.inject{|sum,x| sum + x }
- confidence = conf/neighbors.size if neighbors.size > 0
- {:prediction => prediction, :confidence => confidence}
@@ -442,7 +411,8 @@ module OpenTox
@r.quit # free R
rescue Exception => e
- LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
+ LOGGER.debug "#{e.class}: #{e.message}"
+ puts "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
@@ -504,7 +474,8 @@ module OpenTox
@r.quit # free R
rescue Exception => e
- LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
+ LOGGER.debug "#{e.class}: #{e.message}"
+ puts "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
@@ -530,6 +501,29 @@ module OpenTox
def features(dataset_uri,compound_uri)
+ module Transform
+ include Algorithm
+ class Inverter # to improve normality conditions on a vector
+ attr_accessor :values
+ def initialize(values)
+ @values=values
+ raise "Cannot transform, values empty." if @values.size==0
+ @offset = 1.0 - @values.minmax[0]
+ @offset = -1.0 * @offset if @offset>0.0
+ @values = @values.collect { |v| v - @offset } # slide >1
+ @values = @values.collect { |v| 1 / v } # invert using sigmoidal function
+ end
+ def back_transform(values)
+ values = values.collect { |v| 1 / v }
+ values = values.collect { |v| v + @offset }
+ end
+ end
+ end
# Gauss kernel
# @return [Float]