summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordv <dv@dv.de>2011-07-20 13:27:04 +0200
committerdv <dv@dv.de>2011-07-20 13:27:04 +0200
commit4fa97d6bec952fdb3ceb6175c6b39b51aab45f4b (patch)
tree90a791ce511ee390423f99655272033599ff942a
parent5a86ca682633dc6865729a75be0046dff2460684 (diff)
parent62930c5b40a1ed0e4f170d70c2284a004b3d0d55 (diff)
Merge branch 'support' into development
-rw-r--r--lib/algorithm.rb48
-rw-r--r--lib/compound.rb29
-rw-r--r--lib/model.rb26
3 files changed, 91 insertions, 12 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index cfca069..56ab94c 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -153,19 +153,34 @@ module OpenTox
# @param [Array] features_a Features of first compound
# @param [Array] features_b Features of second compound
# @param [optional, Hash] weights Weights for all features
+ # @param [optional, Hash] params Keys: `:training_compound, :compound, :fingerprints, :nr_hits, :compound_features_hits` are required
# @return [Float] (Weighted) tanimoto similarity
- def self.tanimoto(features_a,features_b,weights=nil)
+ def self.tanimoto(features_a,features_b,weights=nil,params=nil)
common_features = features_a & features_b
all_features = (features_a + features_b).uniq
- common_p_sum = 0.0
+ #LOGGER.debug "dv --------------- common: #{common_features}, all: #{all_features}"
if common_features.size > 0
if weights
- common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])}
- all_p_sum = 0.0
- all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])}
+ LOGGER.debug "nr_hits: #{params[:nr_hits]}"
+ if params[:nr_hits] == "true"
+ params[:weights] = weights
+ params[:mode] = "min"
+ params[:features] = common_features
+ common_p_sum = Algorithm.p_sum_support(params)
+ params[:mode] = "max"
+ params[:features] = all_features
+ all_p_sum = Algorithm.p_sum_support(params)
+ else
+ common_p_sum = 0.0
+ common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])}
+ all_p_sum = 0.0
+ all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])}
+ end
+ LOGGER.debug "common_p_sum: #{common_p_sum}, all_p_sum: #{all_p_sum}, c/a: #{common_p_sum/all_p_sum}"
common_p_sum/all_p_sum
else
- common_features.to_f/all_features
+ LOGGER.debug "common_features : #{common_features}, all_features: #{all_features}, c/a: #{(common_features.size/all_features.size).to_f}"
+ (common_features.size/all_features.size).to_f
end
else
0.0
@@ -839,6 +854,27 @@ module OpenTox
max
end
+ # Returns Support value of an fingerprint
+ # @param [String] smiles of feature
+ # @param [Hash] params Keys: `fingerprints:, compound:, nr_hits:` are required
+ # return [Numeric] Support value
+ def self.support(feature,params)
+ params[:fingerprints][params[:training_compound]][feature]
+ end
+
+ # Returns Support value of an fingerprint
+ # @param [Hash] params Keys: `:compound_features_hits, :weights, :fingerprints, :features, :compound, :nr_hits:, :mode` are required
+ # return [Numeric] Support value
+ def self.p_sum_support(params)
+ p_sum = 0.0
+ params[:features].each{|f|
+ compound_hits = params[:compound_features_hits][f]
+ neighbor_hits = Algorithm.support(f,params)
+ p_sum += eval "(Algorithm.gauss(params[:weights][f]) * ([compound_hits, neighbor_hits].compact.#{params[:mode]}))"
+ }
+ p_sum
+ end
+
# Adds variance, mean and standard deviation calculation to Array class
module Variance
def sum(&blk)
diff --git a/lib/compound.rb b/lib/compound.rb
index d374b02..616db2c 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -164,6 +164,35 @@ module OpenTox
#smarts_array.collect { |s| s if match?(s)}.compact
end
+ # Match_hits an array of smarts strings, returns hash with matching smarts as key and number of non-unique hits as value
+ # @example
+ # compound = OpenTox::Compound.from_name("Benzene")
+ # compound.match(['cc','cN']) # returns ['cc']
+ # @param [Array] smarts_array Array with Smarts strings
+ # @return [Hash] Hash with matching smarts as key and number of non-unique hits as value
+ def match_hits(smarts_array)
+ # avoid recreation of OpenBabel objects
+ obconversion = OpenBabel::OBConversion.new
+ obmol = OpenBabel::OBMol.new
+ obconversion.set_in_format('inchi')
+ obconversion.read_string(obmol,@inchi)
+ smarts_pattern = OpenBabel::OBSmartsPattern.new
+ smarts_hits = {}
+ #LOGGER.debug "dv ----------- obmol #{Compound.new(@inchi).to_smiles}"
+ smarts_array.collect do |smarts|
+ #LOGGER.debug "dv ----------- all smarts #{smarts}"
+ smarts_pattern.init(smarts)
+ if smarts_pattern.match(obmol)
+ hits = smarts_pattern.get_map_list
+ smarts_hits[smarts] = hits.size
+ end
+ end
+ LOGGER.debug "dv ----------- smarts => hits #{smarts_hits}"
+ return smarts_hits
+ #smarts_array.collect { |s| s if match?(s)}.compact
+ end
+
+
# Get URI of compound image with highlighted fragments
#
# @param [Array] activating Array with activating Smarts strings
diff --git a/lib/model.rb b/lib/model.rb
index 13212ee..4cbe95a 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -91,7 +91,7 @@ module OpenTox
include Algorithm
include Model
- attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :transform
+ attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :nr_hits, :transform
def initialize(uri=nil)
@@ -113,7 +113,8 @@ module OpenTox
@feature_calculation_algorithm = "Substructure.match"
@similarity_algorithm = "Similarity.tanimoto"
@prediction_algorithm = "Neighbors.weighted_majority_vote"
-
+
+ @nr_hits = false
@min_sim = 0.3
@prop_kernel = false
@transform = { "class" => "NOP" }
@@ -220,7 +221,8 @@ module OpenTox
:fingerprints => @fingerprints,
:similarity_algorithm => @similarity_algorithm,
:prop_kernel => @prop_kernel,
- :value_map => @value_map,
+ :value_map => @value_map,
+ :nr_hits => @nr_hits,
:transform => @transform } ) ")
value_feature_uri = File.join( @uri, "predicted", "value")
@@ -301,14 +303,26 @@ module OpenTox
def neighbors
@compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
@neighbors = []
- @fingerprints.each do |training_compound,training_features| # AM: access all compounds
- add_neighbor training_features, training_compound
+ @fingerprints.keys.each do |training_compound| # AM: access all compounds
+ add_neighbor @fingerprints[training_compound].keys, training_compound
end
end
# Adds a neighbor to @neighbors if it passes the similarity threshold.
def add_neighbor(training_features, training_compound)
- sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)")
+ compound_features_hits = {}
+ if @nr_hits == "true"
+ compound_features_hits = @compound.match_hits(@compound_features) #OpenTox::Compound.new(training_compound).match_hits(@compound_features)
+ LOGGER.debug "dv ------------ compound_features_hits: #{@compound_features_hits}"
+ end
+ params = {}
+ params[:training_compound] = training_compound
+ params[:compound] = @compound.uri #query compound
+ params[:fingerprints] = @fingerprints
+ params[:nr_hits] = nr_hits
+ params[:compound_features_hits] = compound_features_hits
+
+ sim = eval("#{@similarity_algorithm}(training_features, @compound_features, @p_values, params )")
if sim > @min_sim
@activities[training_compound].each do |act|
@neighbors << {