diff options
-rw-r--r-- | lib/algorithm.rb | 90 | ||||
-rw-r--r-- | lib/model.rb | 127 |
2 files changed, 159 insertions, 58 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 96b9df1..2652695 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -138,7 +138,7 @@ module OpenTox # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity` # @param [optional] params Ignored (only for compatibility with local_svm_regression) # @return [Hash] Hash with keys `:prediction, :confidence` - def self.weighted_majority_vote(neighbors,params={}) + def self.weighted_majority_vote(neighbors,params={}, props=nil) conf = 0.0 confidence = 0.0 neighbors.each do |neighbor| @@ -164,7 +164,7 @@ module OpenTox # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required # @return [Hash] Hash with keys `:prediction, :confidence` - def self.local_svm_regression(neighbors, params) + def self.local_svm_regression(neighbors, params, props=nil) take_logs=true neighbors.each do |n| if (! n[:activity].nil?) && (n[:activity].to_f < 0.0) @@ -178,7 +178,7 @@ module OpenTox sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin - prediction = local_svm(neighbors, acts, sims, "nu-svr", params) + prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr", params)) prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e @@ -194,15 +194,16 @@ module OpenTox # Local support vector classification from neighbors # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] # @return [Hash] Hash with keys `:prediction, :confidence` - def self.local_svm_classification(neighbors, params) + def self.local_svm_classification(neighbors, params, props=nil) acts = neighbors.collect do |n| act = n[:activity] end # activities of neighbors for supervised learning acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin - prediction = local_svm(neighbors, acts_f, sims, "C-bsvc", params) + prediction = (props.nil? ? local_svm(neighbors, acts_f, sims, "C-bsvc", params) : local_svm_prop(props, acts_f, "C-bsvc", params)) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" @@ -216,14 +217,17 @@ module OpenTox # Local support vector prediction from neighbors. - # Not to be called directly (use local_svm_regression or local_svm_classification. + # Uses pre-defined Kernel Matrix. + # Not to be called directly (use local_svm_regression or local_svm_classification). # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` # @param [Array] acts, activities for neighbors. # @param [Array] sims, similarities for neighbors. # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification). # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] # @return [Numeric] A prediction value. def self.local_svm(neighbors, acts, sims, type, params) + LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)." neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel if neighbor_matches.size == 0 @@ -285,6 +289,80 @@ module OpenTox prediction end + # Local support vector prediction from neighbors. + # Uses propositionalized setting. + # Not to be called directly (use local_svm_regression or local_svm_classification). + # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` + # @param [Array] acts, activities for neighbors. + # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] + # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification). + # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @return [Numeric] A prediction value. + def self.local_svm_prop(props, acts, type, params) + + LOGGER.debug "Local SVM (Propositionalization / Kernlab Kernel)." + n_prop = props[0] # is a matrix, i.e. two nested Arrays. + q_prop = props[1] # is an Array. + + #neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches + #gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel + if n_prop.size == 0 + raise "No neighbors found." + else + # gram matrix + #(0..(neighbor_matches.length-1)).each do |i| + # gram_matrix[i] = [] unless gram_matrix[i] + # # upper triangle + # ((i+1)..(neighbor_matches.length-1)).each do |j| + # sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") + # gram_matrix[i][j] = Algorithm.gauss(sim) + # gram_matrix[j] = [] unless gram_matrix[j] + # gram_matrix[j][i] = gram_matrix[i][j] # lower triangle + # end + # gram_matrix[i][i] = 1.0 + #end + + #LOGGER.debug gram_matrix.to_yaml + @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests + @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed + LOGGER.debug "Setting R data ..." + # set data + @r.n_prop = n_prop.flatten + @r.n_prop_x_size = n_prop.size + @r.n_prop_y_size = n_prop[0].size + @r.y = acts + @r.q_prop = q_prop + + begin + LOGGER.debug "Preparing R data ..." + # prepare data + @r.eval "y<-matrix(y)" + @r.eval "prop_matrix<-matrix(n_prop, n_prop_x_size, n_prop_y_size, byrow=TRUE)" + @r.eval "q_prop<-matrix(q_prop, 1, n_prop_y_size, byrow=TRUE)" + + # model + support vectors + LOGGER.debug "Creating SVM model ..." + @r.eval "model<-ksvm(prop_matrix, y, type=\"#{type}\", nu=0.5)" + LOGGER.debug "Predicting ..." + if type == "nu-svr" + @r.eval "p<-predict(model,q_prop)[1,1]" + elsif type == "C-bsvc" + @r.eval "p<-predict(model,q_prop)" + end + if type == "nu-svr" + prediction = @r.p + elsif type == "C-bsvc" + prediction = (@r.p.to_f == 1.0 ? true : false) + end + @r.quit # free R + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + end + end + prediction + end + + end module Substructure diff --git a/lib/model.rb b/lib/model.rb index f0fd46b..4321646 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -91,7 +91,7 @@ module OpenTox include Model include Algorithm - attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid + attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel def initialize(uri=nil) @@ -114,6 +114,7 @@ module OpenTox @prediction_algorithm = "Neighbors.weighted_majority_vote" @min_sim = 0.3 + @prop_kernel = false end @@ -236,17 +237,22 @@ module OpenTox neighbors_best=nil begin - for i in 1..modulo[0] do - (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction - LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." - neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") - if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs - prediction_best=prediction - neighbors_best=@neighbors + for i in 1..modulo[0] do + (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction + LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." + neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part + if @prop_kernel && @prediction_algorithm.include?("svm") + props = get_props + else + props = nil + end + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") + if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs + prediction_best=prediction + neighbors_best=@neighbors + end + position = position + lr_size end - position = position + lr_size - end rescue Exception => e LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message end @@ -255,10 +261,15 @@ module OpenTox @neighbors=neighbors_best ### END AM balanced predictions - else # regression case: no balancing + else # AM: no balancing LOGGER.info "LAZAR: Unbalanced." neighbors - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + if @prop_kernel && @prediction_algorithm.include?("svm") + props = get_props + else + props = nil + end + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") end value_feature_uri = File.join( @uri, "predicted", "value") @@ -266,7 +277,7 @@ module OpenTox prediction_feature_uris = {value_feature_uri => prediction[:prediction], confidence_feature_uri => prediction[:confidence]} prediction_feature_uris[value_feature_uri] = nil if @neighbors.size == 0 or prediction[:prediction].nil? - + @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] @prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri] @@ -333,54 +344,66 @@ module OpenTox @prediction_dataset end - # Find neighbors and store them as object variable - def neighbors_balanced(s, l, start, offset) - @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm - - @neighbors = [] - begin - #@fingerprints.each do |training_compound,training_features| # AM: this is original by CH - [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset - training_features = @fingerprints[training_compound] - sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") - if sim > @min_sim - @activities[training_compound].each do |act| - this_neighbor = { - :compound => training_compound, - :similarity => sim, - :features => training_features, - :activity => act - } - @neighbors << this_neighbor + # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features) + # Same for the vector describing the query compound + def get_props + matrix = Array.new + begin + @neighbors.each do |n| + n = n[:compound] + row = [] + @features.each do |f| + if ! @fingerprints[n].nil? + row << (@fingerprints[n].include?(f) ? 0.0 : @p_values[f]) + else + row << 0.0 end end + matrix << row + end + row = [] + @features.each do |f| + row << (@compound.match([f]).size == 0 ? 0.0 : @p_values[f]) end rescue Exception => e - LOGGER.error "BLAZAR failed in neighbors: "+e.class.to_s+": "+e.message + LOGGER.debug "get_props failed with '" + $! + "'" end - + [ matrix, row ] end + # Find neighbors and store them as object variable, access only a subset of compounds for that. + def neighbors_balanced(s, l, start, offset) + @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm + @neighbors = [] + [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset + training_features = @fingerprints[training_compound] + add_neighbor training_features, training_compound + end + + end - # Find neighbors and store them as object variable + # Find neighbors and store them as object variable, access all compounds for that. def neighbors - - @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm - - @neighbors = [] - @fingerprints.each do |training_compound,training_features| - sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") - if sim > @min_sim - @activities[training_compound].each do |act| - @neighbors << { - :compound => training_compound, - :similarity => sim, - :features => training_features, - :activity => act - } - end + @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm + @neighbors = [] + @fingerprints.each do |training_compound,training_features| # AM: access all compounds + add_neighbor training_features, training_compound + end + end + + # Adds a neighbor to @neighbors if it passes the similarity threshold. + def add_neighbor(training_features, training_compound) + sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") + if sim > @min_sim + @activities[training_compound].each do |act| + @neighbors << { + :compound => training_compound, + :similarity => sim, + :features => training_features, + :activity => act + } end - end + end end # Find database activities and store them in @prediction_dataset |