diff options
author | Andreas Maunz <andreas@maunz.de> | 2011-07-06 11:38:53 +0200 |
---|---|---|
committer | Andreas Maunz <andreas@maunz.de> | 2011-07-06 11:38:53 +0200 |
commit | 324471e8455eb4a9256bd25aa3d33b6eb78e62ed (patch) | |
tree | eeea19d5e5a6cd91f18e329567f9057576980207 | |
parent | 12951ac52d8dcf81aaa9fa7a882da912c91cce22 (diff) |
Sigmoidal function for normality
-rw-r--r-- | lib/algorithm.rb | 154 |
1 files changed, 74 insertions, 80 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb index d5e9caf..bdb10f7 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -204,22 +204,13 @@ module OpenTox # @return [Numeric] A prediction value. def self.local_mlr_prop(neighbors, params, props) - - acts = neighbors.collect do |n| - act = n[:activity] - act.to_f - end # activities of neighbors for supervised learning - - + raise "No neighbors found." unless neighbors.size>0 begin - min,max = acts.minmax - offset = 1.0 - min # offset to min element - offset = -1.0 * offset if offset>0.0 - div_offset = max - offset # dynamic range - acts = acts.collect { |a| a - offset } # everything >1, starting at 1 - acts = acts.collect { |a| a / div_offset } # scale to unit length - acts = acts.collect { |a| Math.log10 a } # everything >1, then take log10 + acts = neighbors.collect do |n| + act = n[:activity] + act.to_f + end # activities of neighbors for supervised learning LOGGER.debug "Local MLR (Propositionalization / GSL)." n_prop = props[0] # is a matrix, i.e. two nested Arrays. @@ -233,37 +224,31 @@ module OpenTox n_prop_tmp = Array.new ; repeat_factor.times { n_prop_tmp.concat n_prop } ; n_prop = n_prop_tmp acts_tmp = Array.new ; repeat_factor.times { acts_tmp.concat acts } ; acts = acts_tmp - if n_prop.size == 0 - raise "No neighbors found." - else - begin - LOGGER.debug "Setting GSL data ..." - # set data - prop_matrix = GSL::Matrix[n_prop, n_prop_y_size * repeat_factor, n_prop_x_size] - y = GSL::Vector[acts] - q_prop = GSL::Vector[q_prop] + LOGGER.debug "Setting GSL data ..." + # set data + prop_matrix = GSL::Matrix[n_prop, n_prop_y_size * repeat_factor, n_prop_x_size] + y = GSL::Vector[acts] + q_prop = GSL::Vector[q_prop] + + # model + support vectors + LOGGER.debug "Creating MLR model ..." + work = GSL::MultiFit::Workspace.alloc(n_prop_y_size * repeat_factor, n_prop_x_size) + c, cov, chisq, status = GSL::MultiFit::linear(prop_matrix, y, work) + LOGGER.debug "Predicting ..." + prediction = GSL::MultiFit::linear_est(q_prop, c, cov)[0] + LOGGER.debug "Prediction is: '" + prediction.to_s + "'." - # model + support vectors - LOGGER.debug "Creating MLR model ..." - work = GSL::MultiFit::Workspace.alloc(n_prop_y_size * repeat_factor, n_prop_x_size) - c, cov, chisq, status = GSL::MultiFit::linear(prop_matrix, y, work) - LOGGER.debug "Predicting ..." - prediction = GSL::MultiFit::linear_est(q_prop, c, cov)[0] - rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" - end - end + sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + conf = sims.inject{|sum,x| sum + x } + confidence = conf/neighbors.size if neighbors.size > 0 + {:prediction => prediction, :confidence => confidence} - prediction = div_offset * (10**(prediction.to_f)) + offset # reverse transformation - LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + LOGGER.debug "#{e.class}: #{e.message}" + puts "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end - sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors - conf = sims.inject{|sum,x| sum + x } - confidence = conf/neighbors.size if neighbors.size > 0 - {:prediction => prediction, :confidence => confidence} + end # Classification with majority vote from neighbors weighted by similarity @@ -313,39 +298,24 @@ module OpenTox # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required # @return [Hash] Hash with keys `:prediction, :confidence` def self.local_svm_regression(neighbors, params, props=nil) - acts = neighbors.collect do |n| - act = n[:activity] - act.to_f - end # activities of neighbors for supervised learning - sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + raise "No neighbors found." unless neighbors.size>0 begin + acts = neighbors.collect{ |n| n[:activity].to_f } + sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } offset = 1.0 - acts.minmax[0] # offset to min element offset = -1.0 * offset if offset>0.0 - - puts "OFFSET MV" - acts = acts.collect { |a| a - offset } # slide - puts acts.to_yaml - - puts "OFFSET LOG" - acts = acts.collect { |a| Math.log10 a } # everything >1, then take log10 - puts acts.to_yaml - - div_offset = acts.minmax[1] # dynamic range - puts "OFFSET DIV" - acts = acts.collect { |a| a / div_offset } # scale - puts acts.to_yaml - - prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr", params)) - prediction = (10**(div_offset*prediction.to_f))+offset - LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + inverter = OpenTox::Algorithm::Transform::Inverter.new(acts) + prediction = (props.nil? ? local_svm(neighbors, inverter.values, sims, "nu-svr", params) : local_svm_prop(props, inverter.values, "nu-svr", params)) + prediction = inverter.back_transform([prediction]) + LOGGER.debug "Prediction is: '" + prediction[0].to_s + "'." + conf = sims.inject{|sum,x| sum + x } + confidence = conf/neighbors.size + {:prediction => prediction, :confidence => confidence} rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + LOGGER.debug "#{e.class}: #{e.message}" + puts "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end - - conf = sims.inject{|sum,x| sum + x } - confidence = conf/neighbors.size if neighbors.size > 0 - {:prediction => prediction, :confidence => confidence} end @@ -355,22 +325,21 @@ module OpenTox # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] # @return [Hash] Hash with keys `:prediction, :confidence` def self.local_svm_classification(neighbors, params, props=nil) - acts = neighbors.collect do |n| - act = n[:activity] - end # activities of neighbors for supervised learning -# acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} - acts_f = acts - sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors + + raise "No neighbors found." unless neighbors.size>0 begin + acts = neighbors.collect { |n| act = n[:activity] } + acts_f = acts + sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors prediction = (props.nil? ? local_svm(neighbors, acts_f, sims, "C-bsvc", params) : local_svm_prop(props, acts_f, "C-bsvc", params)) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." + conf = sims.inject{|sum,x| sum + x } + confidence = conf/neighbors.size if neighbors.size > 0 + {:prediction => prediction, :confidence => confidence} rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + LOGGER.debug "#{e.class}: #{e.message}" + puts "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end - - conf = sims.inject{|sum,x| sum + x } - confidence = conf/neighbors.size if neighbors.size > 0 - {:prediction => prediction, :confidence => confidence} end @@ -442,7 +411,8 @@ module OpenTox end @r.quit # free R rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + LOGGER.debug "#{e.class}: #{e.message}" + puts "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end end @@ -504,7 +474,8 @@ module OpenTox end @r.quit # free R rescue Exception => e - LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + LOGGER.debug "#{e.class}: #{e.message}" + puts "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end end prediction @@ -530,6 +501,29 @@ module OpenTox def features(dataset_uri,compound_uri) end end + + module Transform + include Algorithm + + class Inverter # to improve normality conditions on a vector + attr_accessor :values + + def initialize(values) + @values=values + raise "Cannot transform, values empty." if @values.size==0 + @offset = 1.0 - @values.minmax[0] + @offset = -1.0 * @offset if @offset>0.0 + @values = @values.collect { |v| v - @offset } # slide >1 + @values = @values.collect { |v| 1 / v } # invert using sigmoidal function + end + + def back_transform(values) + values = values.collect { |v| 1 / v } + values = values.collect { |v| v + @offset } + end + + end + end # Gauss kernel # @return [Float] |