diff options
author | dv <dv@dv.de> | 2011-07-20 13:27:04 +0200 |
---|---|---|
committer | dv <dv@dv.de> | 2011-07-20 13:27:04 +0200 |
commit | 4fa97d6bec952fdb3ceb6175c6b39b51aab45f4b (patch) | |
tree | 90a791ce511ee390423f99655272033599ff942a | |
parent | 5a86ca682633dc6865729a75be0046dff2460684 (diff) | |
parent | 62930c5b40a1ed0e4f170d70c2284a004b3d0d55 (diff) |
Merge branch 'support' into development
-rw-r--r-- | lib/algorithm.rb | 48 | ||||
-rw-r--r-- | lib/compound.rb | 29 | ||||
-rw-r--r-- | lib/model.rb | 26 |
3 files changed, 91 insertions, 12 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb index cfca069..56ab94c 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -153,19 +153,34 @@ module OpenTox # @param [Array] features_a Features of first compound # @param [Array] features_b Features of second compound # @param [optional, Hash] weights Weights for all features + # @param [optional, Hash] params Keys: `:training_compound, :compound, :fingerprints, :nr_hits, :compound_features_hits` are required # @return [Float] (Weighted) tanimoto similarity - def self.tanimoto(features_a,features_b,weights=nil) + def self.tanimoto(features_a,features_b,weights=nil,params=nil) common_features = features_a & features_b all_features = (features_a + features_b).uniq - common_p_sum = 0.0 + #LOGGER.debug "dv --------------- common: #{common_features}, all: #{all_features}" if common_features.size > 0 if weights - common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])} - all_p_sum = 0.0 - all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])} + LOGGER.debug "nr_hits: #{params[:nr_hits]}" + if params[:nr_hits] == "true" + params[:weights] = weights + params[:mode] = "min" + params[:features] = common_features + common_p_sum = Algorithm.p_sum_support(params) + params[:mode] = "max" + params[:features] = all_features + all_p_sum = Algorithm.p_sum_support(params) + else + common_p_sum = 0.0 + common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])} + all_p_sum = 0.0 + all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])} + end + LOGGER.debug "common_p_sum: #{common_p_sum}, all_p_sum: #{all_p_sum}, c/a: #{common_p_sum/all_p_sum}" common_p_sum/all_p_sum else - common_features.to_f/all_features + LOGGER.debug "common_features : #{common_features}, all_features: #{all_features}, c/a: #{(common_features.size/all_features.size).to_f}" + (common_features.size/all_features.size).to_f end else 0.0 @@ -839,6 +854,27 @@ module OpenTox max end + # Returns Support value of an fingerprint + # @param [String] smiles of feature + # @param [Hash] params Keys: `fingerprints:, compound:, nr_hits:` are required + # return [Numeric] Support value + def self.support(feature,params) + params[:fingerprints][params[:training_compound]][feature] + end + + # Returns Support value of an fingerprint + # @param [Hash] params Keys: `:compound_features_hits, :weights, :fingerprints, :features, :compound, :nr_hits:, :mode` are required + # return [Numeric] Support value + def self.p_sum_support(params) + p_sum = 0.0 + params[:features].each{|f| + compound_hits = params[:compound_features_hits][f] + neighbor_hits = Algorithm.support(f,params) + p_sum += eval "(Algorithm.gauss(params[:weights][f]) * ([compound_hits, neighbor_hits].compact.#{params[:mode]}))" + } + p_sum + end + # Adds variance, mean and standard deviation calculation to Array class module Variance def sum(&blk) diff --git a/lib/compound.rb b/lib/compound.rb index d374b02..616db2c 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -164,6 +164,35 @@ module OpenTox #smarts_array.collect { |s| s if match?(s)}.compact end + # Match_hits an array of smarts strings, returns hash with matching smarts as key and number of non-unique hits as value + # @example + # compound = OpenTox::Compound.from_name("Benzene") + # compound.match(['cc','cN']) # returns ['cc'] + # @param [Array] smarts_array Array with Smarts strings + # @return [Hash] Hash with matching smarts as key and number of non-unique hits as value + def match_hits(smarts_array) + # avoid recreation of OpenBabel objects + obconversion = OpenBabel::OBConversion.new + obmol = OpenBabel::OBMol.new + obconversion.set_in_format('inchi') + obconversion.read_string(obmol,@inchi) + smarts_pattern = OpenBabel::OBSmartsPattern.new + smarts_hits = {} + #LOGGER.debug "dv ----------- obmol #{Compound.new(@inchi).to_smiles}" + smarts_array.collect do |smarts| + #LOGGER.debug "dv ----------- all smarts #{smarts}" + smarts_pattern.init(smarts) + if smarts_pattern.match(obmol) + hits = smarts_pattern.get_map_list + smarts_hits[smarts] = hits.size + end + end + LOGGER.debug "dv ----------- smarts => hits #{smarts_hits}" + return smarts_hits + #smarts_array.collect { |s| s if match?(s)}.compact + end + + # Get URI of compound image with highlighted fragments # # @param [Array] activating Array with activating Smarts strings diff --git a/lib/model.rb b/lib/model.rb index 13212ee..4cbe95a 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -91,7 +91,7 @@ module OpenTox include Algorithm include Model - attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :transform + attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :nr_hits, :transform def initialize(uri=nil) @@ -113,7 +113,8 @@ module OpenTox @feature_calculation_algorithm = "Substructure.match" @similarity_algorithm = "Similarity.tanimoto" @prediction_algorithm = "Neighbors.weighted_majority_vote" - + + @nr_hits = false @min_sim = 0.3 @prop_kernel = false @transform = { "class" => "NOP" } @@ -220,7 +221,8 @@ module OpenTox :fingerprints => @fingerprints, :similarity_algorithm => @similarity_algorithm, :prop_kernel => @prop_kernel, - :value_map => @value_map, + :value_map => @value_map, + :nr_hits => @nr_hits, :transform => @transform } ) ") value_feature_uri = File.join( @uri, "predicted", "value") @@ -301,14 +303,26 @@ module OpenTox def neighbors @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm @neighbors = [] - @fingerprints.each do |training_compound,training_features| # AM: access all compounds - add_neighbor training_features, training_compound + @fingerprints.keys.each do |training_compound| # AM: access all compounds + add_neighbor @fingerprints[training_compound].keys, training_compound end end # Adds a neighbor to @neighbors if it passes the similarity threshold. def add_neighbor(training_features, training_compound) - sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)") + compound_features_hits = {} + if @nr_hits == "true" + compound_features_hits = @compound.match_hits(@compound_features) #OpenTox::Compound.new(training_compound).match_hits(@compound_features) + LOGGER.debug "dv ------------ compound_features_hits: #{@compound_features_hits}" + end + params = {} + params[:training_compound] = training_compound + params[:compound] = @compound.uri #query compound + params[:fingerprints] = @fingerprints + params[:nr_hits] = nr_hits + params[:compound_features_hits] = compound_features_hits + + sim = eval("#{@similarity_algorithm}(training_features, @compound_features, @p_values, params )") if sim > @min_sim @activities[training_compound].each do |act| @neighbors << { |