From f8552611c2dbe25d76474f51e4e895bf9c2b5c5e Mon Sep 17 00:00:00 2001 From: Christoph Helma Date: Fri, 19 Nov 2010 16:53:21 +0100 Subject: lazar predictions for toxcreate working --- lib/algorithm.rb | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 142 insertions(+), 12 deletions(-) (limited to 'lib/algorithm.rb') diff --git a/lib/algorithm.rb b/lib/algorithm.rb index 711f63b..a6fa4a7 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -1,3 +1,9 @@ +# R integration +# workaround to initialize R non-interactively (former rinruby versions did this by default) +# avoids compiling R with X +R = nil +require "rinruby" + module OpenTox # Wrapper for OpenTox Algorithms @@ -6,8 +12,10 @@ module OpenTox include OpenTox # Execute algorithm with parameters, please consult the OpenTox API and the webservice documentation for acceptable parameters + # @param [optional,Hash] params Algorithm parameters + # @return [String] URI of new resource (dataset, model, ...) def run(params=nil) - RestClientWrapper.post(@uri, params) + RestClientWrapper.post(@uri, params).to_s end # Get OWL-DL representation in RDF/XML format @@ -23,9 +31,11 @@ module OpenTox include Algorithm end + # Fminer algorithms (https://github.com/amaunz/fminer2) module Fminer include Algorithm + # Backbone Refinement Class mining (http://bbrc.maunz.de/) class BBRC include Fminer # Initialize bbrc algorithm @@ -35,6 +45,7 @@ module OpenTox end end + # LAtent STructure Pattern Mining (http://last-pm.maunz.de) class LAST include Fminer # Initialize last algorithm @@ -58,15 +69,15 @@ module OpenTox # Utility methods without dedicated webservices + # Similarity calculations module Similarity include Algorithm # Tanimoto similarity - # # @param [Array] features_a Features of first compound # @param [Array] features_b Features of second compound # @param [optional, Hash] weights Weights for all features - # @return [Float] (Wighted) tanimoto similarity + # @return [Float] (Weighted) tanimoto similarity def self.tanimoto(features_a,features_b,weights=nil) common_features = features_a & features_b all_features = (features_a + features_b).uniq @@ -86,15 +97,19 @@ module OpenTox end # Euclidean similarity - def self.euclidean(prop_a,prop_b,weights=nil) - common_properties = prop_a.keys & prop_b.keys + # @param [Hash] properties_a Properties of first compound + # @param [Hash] properties_b Properties of second compound + # @param [optional, Hash] weights Weights for all properties + # @return [Float] (Weighted) euclidean similarity + def self.euclidean(properties_a,properties_b,weights=nil) + common_properties = properties_a.keys & properties_b.keys if common_properties.size > 1 dist_sum = 0 common_properties.each do |p| if weights - dist_sum += ( (prop_a[p] - prop_b[p]) * Algorithm.gauss(weights[p]) )**2 + dist_sum += ( (properties_a[p] - properties_b[p]) * Algorithm.gauss(weights[p]) )**2 else - dist_sum += (prop_a[p] - prop_b[p])**2 + dist_sum += (properties_a[p] - properties_b[p])**2 end end 1/(1+Math.sqrt(dist_sum)) @@ -103,14 +118,129 @@ module OpenTox end end end + + module Neighbors + + # Classification with majority vote from neighbors weighted by similarity + # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity` + # @param [optional] params Ignored (only for compatibility with local_svm_regression) + # @return [Hash] Hash with keys `:prediction, :confidence` + def self.weighted_majority_vote(neighbors,params={}) + conf = 0.0 + confidence = 0.0 + neighbors.each do |neighbor| + case neighbor[:activity].to_s + when 'true' + conf += Algorithm.gauss(neighbor[:similarity]) + when 'false' + conf -= Algorithm.gauss(neighbor[:similarity]) + end + end + if conf > 0.0 + prediction = true + elsif conf < 0.0 + prediction = false + else + prediction = nil + end + confidence = conf/neighbors.size if neighbors.size > 0 + {:prediction => prediction, :confidence => confidence.abs} + end + + # Local support vector regression from neighbors + # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features` + # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required + # @return [Hash] Hash with keys `:prediction, :confidence` + def self.local_svm_regression(neighbors,params ) + sims = neighbors.collect{ |n| n[:similarity] } # similarity values between query and neighbors + conf = sims.inject{|sum,x| sum + x } + acts = neighbors.collect do |n| + act = n[:activity] + Math.log10(act.to_f) + end # activities of neighbors for supervised learning + + neighbor_matches = neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches + gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel + if neighbor_matches.size == 0 + raise "No neighbors found" + else + # gram matrix + (0..(neighbor_matches.length-1)).each do |i| + gram_matrix[i] = [] unless gram_matrix[i] + # upper triangle + ((i+1)..(neighbor_matches.length-1)).each do |j| + sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") + gram_matrix[i][j] = Algorithm.gauss(sim) + gram_matrix[j] = [] unless gram_matrix[j] + gram_matrix[j][i] = gram_matrix[i][j] # lower triangle + end + gram_matrix[i][i] = 1.0 + end + + LOGGER.debug gram_matrix.to_yaml + + @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests + @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed + LOGGER.debug "Setting R data ..." + # set data + @r.gram_matrix = gram_matrix.flatten + @r.n = neighbor_matches.size + @r.y = acts + @r.sims = sims + + LOGGER.debug "Preparing R data ..." + # prepare data + @r.eval "y<-as.vector(y)" + @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))" + @r.eval "sims<-as.vector(sims)" + + # model + support vectors + LOGGER.debug "Creating SVM model ..." + @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)" + @r.eval "sv<-as.vector(SVindex(model))" + @r.eval "sims<-sims[sv]" + @r.eval "sims<-as.kernelMatrix(matrix(sims,1))" + LOGGER.debug "Predicting ..." + @r.eval "p<-predict(model,sims)[1,1]" + prediction = 10**(@r.p.to_f) + LOGGER.debug "Prediction is: '" + @prediction.to_s + "'." + @r.quit # free R + end + confidence = conf/neighbors.size if neighbors.size > 0 + {:prediction => prediction, :confidence => confidence} + + end + + end + + module Substructure + include Algorithm + # Substructure matching + # @param [OpenTox::Compound] compound Compound + # @param [Array] features Array with Smarts strings + # @return [Array] Array with matching Smarts + def self.match(compound,features) + compound.match(features) + end + end + + module Dataset + include Algorithm + # API should match Substructure.match + def features(dataset_uri,compound_uri) + end + end - # Gauss kernel - def self.gauss(sim, sigma = 0.3) - x = 1.0 - sim - Math.exp(-(x*x)/(2*sigma*sigma)) - end + # Gauss kernel + # @return [Float] + def self.gauss(x, sigma = 0.3) + d = 1.0 - x + Math.exp(-(d*d)/(2*sigma*sigma)) + end # Median of an array + # @param [Array] Array with values + # @return [Float] Median def self.median(array) return nil if array.empty? array.sort! -- cgit v1.2.3