summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authormguetlein <martin.guetlein@gmail.com>2011-05-27 10:06:24 +0200
committermguetlein <martin.guetlein@gmail.com>2011-05-27 10:06:24 +0200
commitd962f052f9b68a78fd9fb21e07369abc714bfa3b (patch)
tree2dc2c8c12bb07ffd5bf7c23ec5fce5389b0fb3c7 /lib
parentaf426336f15e1f4b58c87bf09571721bb42a388f (diff)
parent3e027274fbdb731b18f587ce356ec29cda59aaf6 (diff)
Merge branch 'development' of github.com:opentox/opentox-ruby into development
Diffstat (limited to 'lib')
-rw-r--r--lib/algorithm.rb90
-rw-r--r--lib/model.rb127
2 files changed, 159 insertions, 58 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 96b9df1..2652695 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -138,7 +138,7 @@ module OpenTox
# @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity`
# @param [optional] params Ignored (only for compatibility with local_svm_regression)
# @return [Hash] Hash with keys `:prediction, :confidence`
- def self.weighted_majority_vote(neighbors,params={})
+ def self.weighted_majority_vote(neighbors,params={}, props=nil)
conf = 0.0
confidence = 0.0
neighbors.each do |neighbor|
@@ -164,7 +164,7 @@ module OpenTox
# @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
# @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
# @return [Hash] Hash with keys `:prediction, :confidence`
- def self.local_svm_regression(neighbors, params)
+ def self.local_svm_regression(neighbors, params, props=nil)
take_logs=true
neighbors.each do |n|
if (! n[:activity].nil?) && (n[:activity].to_f < 0.0)
@@ -178,7 +178,7 @@ module OpenTox
sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
begin
- prediction = local_svm(neighbors, acts, sims, "nu-svr", params)
+ prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr", params))
prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f)
LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
rescue Exception => e
@@ -194,15 +194,16 @@ module OpenTox
# Local support vector classification from neighbors
# @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
# @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
+ # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ]
# @return [Hash] Hash with keys `:prediction, :confidence`
- def self.local_svm_classification(neighbors, params)
+ def self.local_svm_classification(neighbors, params, props=nil)
acts = neighbors.collect do |n|
act = n[:activity]
end # activities of neighbors for supervised learning
acts_f = acts.collect {|v| v == true ? 1.0 : 0.0}
sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors
begin
- prediction = local_svm(neighbors, acts_f, sims, "C-bsvc", params)
+ prediction = (props.nil? ? local_svm(neighbors, acts_f, sims, "C-bsvc", params) : local_svm_prop(props, acts_f, "C-bsvc", params))
LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
rescue Exception => e
LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
@@ -216,14 +217,17 @@ module OpenTox
# Local support vector prediction from neighbors.
- # Not to be called directly (use local_svm_regression or local_svm_classification.
+ # Uses pre-defined Kernel Matrix.
+ # Not to be called directly (use local_svm_regression or local_svm_classification).
# @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
# @param [Array] acts, activities for neighbors.
# @param [Array] sims, similarities for neighbors.
# @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification).
# @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
+ # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ]
# @return [Numeric] A prediction value.
def self.local_svm(neighbors, acts, sims, type, params)
+ LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)."
neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches
gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
if neighbor_matches.size == 0
@@ -285,6 +289,80 @@ module OpenTox
prediction
end
+ # Local support vector prediction from neighbors.
+ # Uses propositionalized setting.
+ # Not to be called directly (use local_svm_regression or local_svm_classification).
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
+ # @param [Array] acts, activities for neighbors.
+ # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ]
+ # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification).
+ # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
+ # @return [Numeric] A prediction value.
+ def self.local_svm_prop(props, acts, type, params)
+
+ LOGGER.debug "Local SVM (Propositionalization / Kernlab Kernel)."
+ n_prop = props[0] # is a matrix, i.e. two nested Arrays.
+ q_prop = props[1] # is an Array.
+
+ #neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches
+ #gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
+ if n_prop.size == 0
+ raise "No neighbors found."
+ else
+ # gram matrix
+ #(0..(neighbor_matches.length-1)).each do |i|
+ # gram_matrix[i] = [] unless gram_matrix[i]
+ # # upper triangle
+ # ((i+1)..(neighbor_matches.length-1)).each do |j|
+ # sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])")
+ # gram_matrix[i][j] = Algorithm.gauss(sim)
+ # gram_matrix[j] = [] unless gram_matrix[j]
+ # gram_matrix[j][i] = gram_matrix[i][j] # lower triangle
+ # end
+ # gram_matrix[i][i] = 1.0
+ #end
+
+ #LOGGER.debug gram_matrix.to_yaml
+ @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
+ @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
+ LOGGER.debug "Setting R data ..."
+ # set data
+ @r.n_prop = n_prop.flatten
+ @r.n_prop_x_size = n_prop.size
+ @r.n_prop_y_size = n_prop[0].size
+ @r.y = acts
+ @r.q_prop = q_prop
+
+ begin
+ LOGGER.debug "Preparing R data ..."
+ # prepare data
+ @r.eval "y<-matrix(y)"
+ @r.eval "prop_matrix<-matrix(n_prop, n_prop_x_size, n_prop_y_size, byrow=TRUE)"
+ @r.eval "q_prop<-matrix(q_prop, 1, n_prop_y_size, byrow=TRUE)"
+
+ # model + support vectors
+ LOGGER.debug "Creating SVM model ..."
+ @r.eval "model<-ksvm(prop_matrix, y, type=\"#{type}\", nu=0.5)"
+ LOGGER.debug "Predicting ..."
+ if type == "nu-svr"
+ @r.eval "p<-predict(model,q_prop)[1,1]"
+ elsif type == "C-bsvc"
+ @r.eval "p<-predict(model,q_prop)"
+ end
+ if type == "nu-svr"
+ prediction = @r.p
+ elsif type == "C-bsvc"
+ prediction = (@r.p.to_f == 1.0 ? true : false)
+ end
+ @r.quit # free R
+ rescue Exception => e
+ LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}"
+ end
+ end
+ prediction
+ end
+
+
end
module Substructure
diff --git a/lib/model.rb b/lib/model.rb
index f0fd46b..4321646 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -91,7 +91,7 @@ module OpenTox
include Model
include Algorithm
- attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid
+ attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel
def initialize(uri=nil)
@@ -114,6 +114,7 @@ module OpenTox
@prediction_algorithm = "Neighbors.weighted_majority_vote"
@min_sim = 0.3
+ @prop_kernel = false
end
@@ -236,17 +237,22 @@ module OpenTox
neighbors_best=nil
begin
- for i in 1..modulo[0] do
- (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction
- LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}."
- neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part
- prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
- if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs
- prediction_best=prediction
- neighbors_best=@neighbors
+ for i in 1..modulo[0] do
+ (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction
+ LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}."
+ neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part
+ if @prop_kernel && @prediction_algorithm.include?("svm")
+ props = get_props
+ else
+ props = nil
+ end
+ prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)")
+ if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs
+ prediction_best=prediction
+ neighbors_best=@neighbors
+ end
+ position = position + lr_size
end
- position = position + lr_size
- end
rescue Exception => e
LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message
end
@@ -255,10 +261,15 @@ module OpenTox
@neighbors=neighbors_best
### END AM balanced predictions
- else # regression case: no balancing
+ else # AM: no balancing
LOGGER.info "LAZAR: Unbalanced."
neighbors
- prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
+ if @prop_kernel && @prediction_algorithm.include?("svm")
+ props = get_props
+ else
+ props = nil
+ end
+ prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)")
end
value_feature_uri = File.join( @uri, "predicted", "value")
@@ -266,7 +277,7 @@ module OpenTox
prediction_feature_uris = {value_feature_uri => prediction[:prediction], confidence_feature_uri => prediction[:confidence]}
prediction_feature_uris[value_feature_uri] = nil if @neighbors.size == 0 or prediction[:prediction].nil?
-
+
@prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables]
@prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri]
@@ -333,54 +344,66 @@ module OpenTox
@prediction_dataset
end
- # Find neighbors and store them as object variable
- def neighbors_balanced(s, l, start, offset)
- @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
-
- @neighbors = []
- begin
- #@fingerprints.each do |training_compound,training_features| # AM: this is original by CH
- [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset
- training_features = @fingerprints[training_compound]
- sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)")
- if sim > @min_sim
- @activities[training_compound].each do |act|
- this_neighbor = {
- :compound => training_compound,
- :similarity => sim,
- :features => training_features,
- :activity => act
- }
- @neighbors << this_neighbor
+ # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features)
+ # Same for the vector describing the query compound
+ def get_props
+ matrix = Array.new
+ begin
+ @neighbors.each do |n|
+ n = n[:compound]
+ row = []
+ @features.each do |f|
+ if ! @fingerprints[n].nil?
+ row << (@fingerprints[n].include?(f) ? 0.0 : @p_values[f])
+ else
+ row << 0.0
end
end
+ matrix << row
+ end
+ row = []
+ @features.each do |f|
+ row << (@compound.match([f]).size == 0 ? 0.0 : @p_values[f])
end
rescue Exception => e
- LOGGER.error "BLAZAR failed in neighbors: "+e.class.to_s+": "+e.message
+ LOGGER.debug "get_props failed with '" + $! + "'"
end
-
+ [ matrix, row ]
end
+ # Find neighbors and store them as object variable, access only a subset of compounds for that.
+ def neighbors_balanced(s, l, start, offset)
+ @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
+ @neighbors = []
+ [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset
+ training_features = @fingerprints[training_compound]
+ add_neighbor training_features, training_compound
+ end
+
+ end
- # Find neighbors and store them as object variable
+ # Find neighbors and store them as object variable, access all compounds for that.
def neighbors
-
- @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
-
- @neighbors = []
- @fingerprints.each do |training_compound,training_features|
- sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)")
- if sim > @min_sim
- @activities[training_compound].each do |act|
- @neighbors << {
- :compound => training_compound,
- :similarity => sim,
- :features => training_features,
- :activity => act
- }
- end
+ @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
+ @neighbors = []
+ @fingerprints.each do |training_compound,training_features| # AM: access all compounds
+ add_neighbor training_features, training_compound
+ end
+ end
+
+ # Adds a neighbor to @neighbors if it passes the similarity threshold.
+ def add_neighbor(training_features, training_compound)
+ sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)")
+ if sim > @min_sim
+ @activities[training_compound].each do |act|
+ @neighbors << {
+ :compound => training_compound,
+ :similarity => sim,
+ :features => training_features,
+ :activity => act
+ }
end
- end
+ end
end
# Find database activities and store them in @prediction_dataset