From b7a03a18ce90d664d89d6a414512aa03a6dddcc4 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 25 May 2011 08:51:56 +0200 Subject: Add_neighbor function --- lib/model.rb | 59 ++++++++++++++++++++++------------------------------------- 1 file changed, 22 insertions(+), 37 deletions(-) (limited to 'lib') diff --git a/lib/model.rb b/lib/model.rb index 139aed8..f5e0410 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -338,54 +338,39 @@ module OpenTox @prediction_dataset end - # Find neighbors and store them as object variable + # Find neighbors and store them as object variable, access only a subset of compounds for that. def neighbors_balanced(s, l, start, offset) @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm - @neighbors = [] - begin - #@fingerprints.each do |training_compound,training_features| # AM: this is original by CH [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset training_features = @fingerprints[training_compound] - sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") - if sim > @min_sim - @activities[training_compound].each do |act| - this_neighbor = { - :compound => training_compound, - :similarity => sim, - :features => training_features, - :activity => act - } - @neighbors << this_neighbor - end - end - end - rescue Exception => e - LOGGER.error "BLAZAR failed in neighbors: "+e.class.to_s+": "+e.message + add_neighbor training_features end end - - # Find neighbors and store them as object variable + # Find neighbors and store them as object variable. def neighbors - - @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm - - @neighbors = [] - @fingerprints.each do |training_compound,training_features| - sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") - if sim > @min_sim - @activities[training_compound].each do |act| - @neighbors << { - :compound => training_compound, - :similarity => sim, - :features => training_features, - :activity => act - } - end + @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm + @neighbors = [] + @fingerprints.each do |training_compound,training_features| # AM: access all compounds + add_neighbor training_features + end + end + + # Adds a neighbor to @neighbors if it passes the similarity threshold. + def add_neighbor(training_features) + sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") + if sim > @min_sim + @activities[training_compound].each do |act| + @neighbors << { + :compound => training_compound, + :similarity => sim, + :features => training_features, + :activity => act + } end - end + end end # Find database activities and store them in @prediction_dataset -- cgit v1.2.3 From baffedfc7543cfc8a90fc185fc91f2748ce94528 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 25 May 2011 10:24:13 +0200 Subject: Fixed add_neighbor --- lib/model.rb | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/model.rb b/lib/model.rb index f5e0410..3d27706 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -338,28 +338,45 @@ module OpenTox @prediction_dataset end + # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features) +# def get_prop_matrix +# matrix = Array.new +# begin +# @neighbors.each do |n| +# row = [] +# @features.each do |f| +# row << @fingerprints[n].include?(f) ? 0.0 : @p_values[f] +# end +# matrix << row +# end +# rescue Exception => e +# LOGGER.debug "get_prop_matrix failed with '" + $! + "'" +# end +# matrix +# end + # Find neighbors and store them as object variable, access only a subset of compounds for that. def neighbors_balanced(s, l, start, offset) @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm @neighbors = [] [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset training_features = @fingerprints[training_compound] - add_neighbor training_features + add_neighbor training_features, training_compound end end - # Find neighbors and store them as object variable. + # Find neighbors and store them as object variable, access all compounds for that. def neighbors @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm @neighbors = [] @fingerprints.each do |training_compound,training_features| # AM: access all compounds - add_neighbor training_features + add_neighbor training_features, training_compound end end # Adds a neighbor to @neighbors if it passes the similarity threshold. - def add_neighbor(training_features) + def add_neighbor(training_features, training_compound) sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") if sim > @min_sim @activities[training_compound].each do |act| -- cgit v1.2.3 From b6ba84a077db9f6c708807f059e501333f7303b1 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 25 May 2011 12:18:08 +0200 Subject: 1st v --- lib/model.rb | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/model.rb b/lib/model.rb index 3d27706..bebf5d3 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -215,6 +215,7 @@ module OpenTox (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part + prop_matrix = get_prop_matrix prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs prediction_best=prediction @@ -228,10 +229,11 @@ module OpenTox prediction=prediction_best @neighbors=neighbors_best - ### END AM balanced predictions + ### END AM balanced predictions else # regression case: no balancing neighbors + prop_matrix = get_prop_matrix prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") end @@ -339,21 +341,26 @@ module OpenTox end # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features) -# def get_prop_matrix -# matrix = Array.new -# begin -# @neighbors.each do |n| -# row = [] -# @features.each do |f| -# row << @fingerprints[n].include?(f) ? 0.0 : @p_values[f] -# end -# matrix << row -# end -# rescue Exception => e -# LOGGER.debug "get_prop_matrix failed with '" + $! + "'" -# end -# matrix -# end + def get_prop_matrix + matrix = Array.new + begin + @neighbors.each do |n| + n = n[:compound] + row = [] + @features.each do |f| + if ! @fingerprints[n].nil? + row << (@fingerprints[n].include?(f) ? 0.0 : @p_values[f]) + else + row << 0.0 + end + end + matrix << row + end + rescue Exception => e + LOGGER.debug "get_prop_matrix failed with '" + $! + "'" + end + matrix + end # Find neighbors and store them as object variable, access only a subset of compounds for that. def neighbors_balanced(s, l, start, offset) -- cgit v1.2.3 From d012b9e8da641c342c455a1384ddf3b14f5b5c35 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 25 May 2011 12:38:04 +0200 Subject: 2nd v --- lib/model.rb | 87 ++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 46 insertions(+), 41 deletions(-) (limited to 'lib') diff --git a/lib/model.rb b/lib/model.rb index bebf5d3..f4df8ea 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -177,7 +177,7 @@ module OpenTox return @prediction_dataset if database_activity(subjectid) - if metadata[RDF.type] == [OTA.ClassificationLazySingleTarget] + if metadata[RDF.type] == [OTA.ClassificationLazySingleTarget] # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar l = Array.new # larger s = Array.new # smaller fraction @@ -211,33 +211,33 @@ module OpenTox neighbors_best=nil begin - for i in 1..modulo[0] do - (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction - LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." - neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part - prop_matrix = get_prop_matrix - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") - if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs - prediction_best=prediction - neighbors_best=@neighbors + for i in 1..modulo[0] do + (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction + LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." + neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part + props = get_props + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs + prediction_best=prediction + neighbors_best=@neighbors + end + position = position + lr_size end - position = position + lr_size - end rescue Exception => e LOGGER.error "BLAZAR failed in prediction: "+e.class.to_s+": "+e.message end prediction=prediction_best @neighbors=neighbors_best - ### END AM balanced predictions + ### END AM balanced predictions else # regression case: no balancing neighbors - prop_matrix = get_prop_matrix + props = get_props prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") end - - # TODO: reasonable feature name + + # TODO: reasonable feature name #prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s) value_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),"value") confidence_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),"confidence") @@ -245,7 +245,7 @@ module OpenTox prediction_feature_uris = {value_feature_uri => prediction[:prediction], confidence_feature_uri => prediction[:confidence]} #prediction_feature_uris[value_feature_uri] = "No similar compounds in training dataset." if @neighbors.size == 0 or prediction[:prediction].nil? prediction_feature_uris[value_feature_uri] = nil if @neighbors.size == 0 or prediction[:prediction].nil? - + #@prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] @@ -275,10 +275,10 @@ module OpenTox DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )), # TODO: factor information to value }) - #OT.prediction => prediction[:prediction], - #OT.confidence => prediction[:confidence], - #OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] - @prediction_dataset.add @compound.uri, prediction_feature_uri, value + #OT.prediction => prediction[:prediction], + #OT.confidence => prediction[:confidence], + #OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}] + @prediction_dataset.add @compound.uri, prediction_feature_uri, value end if verbose @@ -341,34 +341,39 @@ module OpenTox end # Calculate the propositionalization matrix aka instantiation matrix (0/1 entries for features) - def get_prop_matrix + # Same for the vector describing the query compound + def get_props matrix = Array.new begin - @neighbors.each do |n| - n = n[:compound] + @neighbors.each do |n| + n = n[:compound] + row = [] + @features.each do |f| + if ! @fingerprints[n].nil? + row << (@fingerprints[n].include?(f) ? 0.0 : @p_values[f]) + else + row << 0.0 + end + end + matrix << row + end row = [] @features.each do |f| - if ! @fingerprints[n].nil? - row << (@fingerprints[n].include?(f) ? 0.0 : @p_values[f]) - else - row << 0.0 - end + row << (@compound.match([f]).size == 0 ? 0.0 : @p_values[f]) end - matrix << row - end rescue Exception => e - LOGGER.debug "get_prop_matrix failed with '" + $! + "'" + LOGGER.debug "get_props failed with '" + $! + "'" end - matrix + [ matrix, row ] end # Find neighbors and store them as object variable, access only a subset of compounds for that. def neighbors_balanced(s, l, start, offset) @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm @neighbors = [] - [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset - training_features = @fingerprints[training_compound] - add_neighbor training_features, training_compound + [ l[start, offset ] , s ].flatten.each do |training_compound| # AM: access only a balanced subset + training_features = @fingerprints[training_compound] + add_neighbor training_features, training_compound end end @@ -378,7 +383,7 @@ module OpenTox @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm @neighbors = [] @fingerprints.each do |training_compound,training_features| # AM: access all compounds - add_neighbor training_features, training_compound + add_neighbor training_features, training_compound end end @@ -388,10 +393,10 @@ module OpenTox if sim > @min_sim @activities[training_compound].each do |act| @neighbors << { - :compound => training_compound, - :similarity => sim, - :features => training_features, - :activity => act + :compound => training_compound, + :similarity => sim, + :features => training_features, + :activity => act } end end -- cgit v1.2.3 From 3f209f75a2abe2b8a89df3afcb3f54ec8329a5e1 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 25 May 2011 14:16:34 +0200 Subject: 3rd v --- lib/algorithm.rb | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 80 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 96b9df1..280ed82 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -178,7 +178,7 @@ module OpenTox sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin - prediction = local_svm(neighbors, acts, sims, "nu-svr", params) + prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(neighbors, acts, sims, "nu-svr", params, props)) prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e @@ -194,15 +194,16 @@ module OpenTox # Local support vector classification from neighbors # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] # @return [Hash] Hash with keys `:prediction, :confidence` - def self.local_svm_classification(neighbors, params) + def self.local_svm_classification(neighbors, params, props=nil) acts = neighbors.collect do |n| act = n[:activity] end # activities of neighbors for supervised learning acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin - prediction = local_svm(neighbors, acts_f, sims, "C-bsvc", params) + prediction = (props.nil? ? local_svm(neighbors, acts_f, sims, "C-bsvc", params) : local_svm_prop(neighbors, acts_f, sims, "C-bsvc", params, props)) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" @@ -216,14 +217,16 @@ module OpenTox # Local support vector prediction from neighbors. - # Not to be called directly (use local_svm_regression or local_svm_classification. + # Uses pre-defined Kernel Matrix. + # Not to be called directly (use local_svm_regression or local_svm_classification). # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` # @param [Array] acts, activities for neighbors. # @param [Array] sims, similarities for neighbors. # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification). # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] # @return [Numeric] A prediction value. - def self.local_svm(neighbors, acts, sims, type, params) + def self.local_svm(neighbors, acts, sims, type, params, props=nil) neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel if neighbor_matches.size == 0 @@ -285,6 +288,78 @@ module OpenTox prediction end + # Local support vector prediction from neighbors. + # Uses propositionalized setting. + # Not to be called directly (use local_svm_regression or local_svm_classification). + # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` + # @param [Array] acts, activities for neighbors. + # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] + # @param [String] type, one of "nu-svr" (regression) or "C-bsvc" (classification). + # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @return [Numeric] A prediction value. + def self.local_svm_prop(props, acts, type, params) + + n_prop = props[0] # is a matrix, i.e. two nested Arrays. + q_prop = props[1] # is an Array. + + #neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches + #gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel + if n_prop.size == 0 + raise "No neighbors found." + else + # gram matrix + #(0..(neighbor_matches.length-1)).each do |i| + # gram_matrix[i] = [] unless gram_matrix[i] + # # upper triangle + # ((i+1)..(neighbor_matches.length-1)).each do |j| + # sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") + # gram_matrix[i][j] = Algorithm.gauss(sim) + # gram_matrix[j] = [] unless gram_matrix[j] + # gram_matrix[j][i] = gram_matrix[i][j] # lower triangle + # end + # gram_matrix[i][i] = 1.0 + #end + + #LOGGER.debug gram_matrix.to_yaml + @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests + @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed + LOGGER.debug "Setting R data ..." + # set data + @r.n_prop = n_prop.flatten + @r.n = n_prop.size + @r.y = acts + @r.q_prop = q_prop + + begin + LOGGER.debug "Preparing R data ..." + # prepare data + @r.eval "y<-as.vector(y)" + @r.eval "prop_matrix<-matrix(n_prop,n,n)" + @r.eval "q_prop<-as.vector(q_prop)" + + # model + support vectors + LOGGER.debug "Creating SVM model ..." + @r.eval "model<-ksvm(prop_matrix, y, type=\"#{type}\", nu=0.5)" + LOGGER.debug "Predicting ..." + if type == "nu-svr" + @r.eval "p<-predict(model,q_prop)[1,1]" + elsif type == "C-bsvc" + @r.eval "p<-predict(model,q_prop)" + end + if type == "nu-svr" + prediction = @r.p + elsif type == "C-bsvc" + prediction = (@r.p.to_f == 1.0 ? true : false) + end + @r.quit # free R + rescue Exception => e + LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" + end + end + prediction + end + + end module Substructure -- cgit v1.2.3 From ca6dd87b7c80611c4f4e4716f68fe6633ce1066b Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 25 May 2011 15:04:50 +0200 Subject: 4th v --- lib/algorithm.rb | 4 ++-- lib/model.rb | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 280ed82..2f722c1 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -178,7 +178,7 @@ module OpenTox sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin - prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(neighbors, acts, sims, "nu-svr", params, props)) + prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(props, acts, sims, "nu-svr", params)) prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e @@ -203,7 +203,7 @@ module OpenTox acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin - prediction = (props.nil? ? local_svm(neighbors, acts_f, sims, "C-bsvc", params) : local_svm_prop(neighbors, acts_f, sims, "C-bsvc", params, props)) + prediction = (props.nil? ? local_svm(neighbors, acts_f, sims, "C-bsvc", params) : local_svm_prop(props, acts_f, sims, "C-bsvc", params)) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" diff --git a/lib/model.rb b/lib/model.rb index f4df8ea..6a4602f 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -176,8 +176,7 @@ module OpenTox return @prediction_dataset if database_activity(subjectid) - - if metadata[RDF.type] == [OTA.ClassificationLazySingleTarget] + if metadata[RDF.type].include?([OTA.ClassificationLazySingleTarget][0]) # AM: searching in metadata for classification # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar l = Array.new # larger s = Array.new # smaller fraction @@ -231,7 +230,7 @@ module OpenTox @neighbors=neighbors_best ### END AM balanced predictions - else # regression case: no balancing + else # no balancing as before neighbors props = get_props prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") -- cgit v1.2.3 From 2b12d07bec101df8c10b7ab5aff1491b0997a6c7 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Wed, 25 May 2011 17:10:14 +0200 Subject: 6th v --- lib/algorithm.rb | 17 +++++++++-------- lib/model.rb | 4 ++-- 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 2f722c1..e089184 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -138,7 +138,7 @@ module OpenTox # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity` # @param [optional] params Ignored (only for compatibility with local_svm_regression) # @return [Hash] Hash with keys `:prediction, :confidence` - def self.weighted_majority_vote(neighbors,params={}) + def self.weighted_majority_vote(neighbors,params={}, props=nil) conf = 0.0 confidence = 0.0 neighbors.each do |neighbor| @@ -178,7 +178,7 @@ module OpenTox sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin - prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(props, acts, sims, "nu-svr", params)) + prediction = (props.nil? ? local_svm(neighbors, acts, sims, "nu-svr", params) : local_svm_prop(props, acts, "nu-svr", params)) prediction = (take_logs ? 10**(prediction.to_f) : prediction.to_f) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e @@ -203,7 +203,7 @@ module OpenTox acts_f = acts.collect {|v| v == true ? 1.0 : 0.0} sims = neighbors.collect{ |n| Algorithm.gauss(n[:similarity]) } # similarity values btwn q and nbors begin - prediction = (props.nil? ? local_svm(neighbors, acts_f, sims, "C-bsvc", params) : local_svm_prop(props, acts_f, sims, "C-bsvc", params)) + prediction = (props.nil? ? local_svm(neighbors, acts_f, sims, "C-bsvc", params) : local_svm_prop(props, acts_f, "C-bsvc", params)) LOGGER.debug "Prediction is: '" + prediction.to_s + "'." rescue Exception => e LOGGER.debug "#{e.class}: #{e.message} #{e.backtrace}" @@ -226,7 +226,7 @@ module OpenTox # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] # @return [Numeric] A prediction value. - def self.local_svm(neighbors, acts, sims, type, params, props=nil) + def self.local_svm(neighbors, acts, sims, type, params) neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel if neighbor_matches.size == 0 @@ -326,16 +326,17 @@ module OpenTox LOGGER.debug "Setting R data ..." # set data @r.n_prop = n_prop.flatten - @r.n = n_prop.size + @r.n_prop_x_size = n_prop.size + @r.n_prop_y_size = n_prop[0].size @r.y = acts @r.q_prop = q_prop begin LOGGER.debug "Preparing R data ..." # prepare data - @r.eval "y<-as.vector(y)" - @r.eval "prop_matrix<-matrix(n_prop,n,n)" - @r.eval "q_prop<-as.vector(q_prop)" + @r.eval "y<-matrix(y)" + @r.eval "prop_matrix<-matrix(n_prop, n_prop_x_size, n_prop_y_size, byrow=TRUE)" + @r.eval "q_prop<-matrix(q_prop, 1, n_prop_y_size, byrow=TRUE)" # model + support vectors LOGGER.debug "Creating SVM model ..." diff --git a/lib/model.rb b/lib/model.rb index 6a4602f..1a5aa37 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -215,7 +215,7 @@ module OpenTox LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part props = get_props - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs prediction_best=prediction neighbors_best=@neighbors @@ -233,7 +233,7 @@ module OpenTox else # no balancing as before neighbors props = get_props - prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})") + prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") end # TODO: reasonable feature name -- cgit v1.2.3 From 77c885b7394aa11ba5e59eb60884205332efa31a Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 26 May 2011 08:38:21 +0200 Subject: 7th v --- lib/algorithm.rb | 2 ++ lib/model.rb | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/algorithm.rb b/lib/algorithm.rb index e089184..91e075a 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -227,6 +227,7 @@ module OpenTox # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ] # @return [Numeric] A prediction value. def self.local_svm(neighbors, acts, sims, type, params) + LOGGER.debug "Local SVM (Weighted Tanimoto Kernel)." neighbor_matches = neighbors.collect{ |n| n[:features] } # URIs of matches gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel if neighbor_matches.size == 0 @@ -299,6 +300,7 @@ module OpenTox # @return [Numeric] A prediction value. def self.local_svm_prop(props, acts, type, params) + LOGGER.debug "Local SVM (Propositionalization / Kernlab Kernel)." n_prop = props[0] # is a matrix, i.e. two nested Arrays. q_prop = props[1] # is an Array. diff --git a/lib/model.rb b/lib/model.rb index 1a5aa37..921335c 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -214,7 +214,7 @@ module OpenTox (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part - props = get_props + (@prediction_algorithm.include? "svm" and params[:prop_kernel] == "true") ? props = get_props : props = nil prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs prediction_best=prediction @@ -232,7 +232,7 @@ module OpenTox else # no balancing as before neighbors - props = get_props + (@prediction_algorithm.include? "svm" and params[:prop_kernel] == "true") ? props = get_props : props = nil prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") end -- cgit v1.2.3 From 065fdeb351f68d0445b66516ccf8e7cfcc7e2a1f Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 26 May 2011 12:22:07 +0200 Subject: Fixed prediction type switching --- lib/model.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/model.rb b/lib/model.rb index 921335c..d63eef2 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -176,7 +176,10 @@ module OpenTox return @prediction_dataset if database_activity(subjectid) - if metadata[RDF.type].include?([OTA.ClassificationLazySingleTarget][0]) # AM: searching in metadata for classification + load_metadata(subjectid) + case OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type + when "classification" + # AM: Balancing, see http://www.maunz.de/wordpress/opentox/2011/balanced-lazar l = Array.new # larger s = Array.new # smaller fraction @@ -231,6 +234,7 @@ module OpenTox ### END AM balanced predictions else # no balancing as before + LOGGER.info "LAZAR: Unbalanced." neighbors (@prediction_algorithm.include? "svm" and params[:prop_kernel] == "true") ? props = get_props : props = nil prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") -- cgit v1.2.3 From 3922c8e5fcb9fbe6ddedab9f70e114717ff33a60 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 26 May 2011 14:28:19 +0200 Subject: 8th v --- lib/algorithm.rb | 2 +- lib/model.rb | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 91e075a..2652695 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -164,7 +164,7 @@ module OpenTox # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required # @return [Hash] Hash with keys `:prediction, :confidence` - def self.local_svm_regression(neighbors, params) + def self.local_svm_regression(neighbors, params, props=nil) take_logs=true neighbors.each do |n| if (! n[:activity].nil?) && (n[:activity].to_f < 0.0) diff --git a/lib/model.rb b/lib/model.rb index 7c2ef58..28c05a9 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -69,7 +69,7 @@ module OpenTox include Model include Algorithm - attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid + attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel def initialize(uri=nil) @@ -92,6 +92,7 @@ module OpenTox @prediction_algorithm = "Neighbors.weighted_majority_vote" @min_sim = 0.3 + @prop_kernel = false end @@ -219,7 +220,11 @@ module OpenTox (i == modulo[0]) && (slack>0) ? lr_size = s.size + slack : lr_size = s.size + addon # determine fraction LOGGER.info "BLAZAR: Neighbors round #{i}: #{position} + #{lr_size}." neighbors_balanced(s, l, position, lr_size) # get ratio fraction of larger part - (@prediction_algorithm.include? "svm" and params[:prop_kernel] == "true") ? props = get_props : props = nil + if @prop_kernel && @prediction_algorithm.include?("svm") + props = get_props + else + props = nil + end prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") if prediction_best.nil? || prediction[:confidence].abs > prediction_best[:confidence].abs prediction_best=prediction @@ -235,10 +240,14 @@ module OpenTox @neighbors=neighbors_best ### END AM balanced predictions - else # regression case: no balancing + else # AM: no balancing LOGGER.info "LAZAR: Unbalanced." neighbors - (@prediction_algorithm.include? "svm" and params[:prop_kernel] == "true") ? props = get_props : props = nil + if @prop_kernel && @prediction_algorithm.include?("svm") + props = get_props + else + props = nil + end prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values}, props)") end -- cgit v1.2.3