summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Maunz <andreas@maunz.de>2011-07-21 12:57:19 +0200
committerAndreas Maunz <andreas@maunz.de>2011-07-21 12:57:19 +0200
commit0885bc30c65fa594e63d2b465fa9d96a4a3b7114 (patch)
treecb6f49ae93462653964348c1d19b9a8eb3aab524
parentb1e6b8aaeaed95797cd67e13567ac72344e89707 (diff)
parentcd536b197b7668f7be37f8b7340aa3f9e8c417b9 (diff)
Merge branch 'hits' into development
-rw-r--r--lib/algorithm.rb36
-rw-r--r--lib/compound.rb2
-rw-r--r--lib/model.rb2
3 files changed, 30 insertions, 10 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index df010e1..22768cc 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -180,7 +180,7 @@ module OpenTox
common_p_sum/all_p_sum
else
#LOGGER.debug "common_features : #{common_features}, all_features: #{all_features}, c/a: #{(common_features.size/all_features.size).to_f}"
- (common_features.size/all_features.size).to_f
+ common_features.size.to_f/all_features.size.to_f
end
else
0.0
@@ -237,6 +237,10 @@ module OpenTox
end
+ # Multi-linear regression weighted by similarity.
+ # Objective Feature Selection, Principal Components Analysis, Scaling of Axes.
+ # @param [Hash] params Keys `:n_prop, :q_prop, :sims, :acts` are required
+ # @return [Numeric] A prediction value.
def self.mlr(params)
# GSL matrix operations:
@@ -290,8 +294,6 @@ module OpenTox
confidence_sum = 0.0
confidence = 0.0
prediction = nil
- positive_map_value= nil
- negative_map_value= nil
params[:neighbors].each do |neighbor|
neighbor_weight = Algorithm.gauss(neighbor[:similarity]).to_f
@@ -387,10 +389,18 @@ module OpenTox
else
# gram matrix
(0..(neighbor_matches.length-1)).each do |i|
+ neighbor_i_hits = params[:fingerprints][params[:neighbors][i][:compound]]
gram_matrix[i] = [] unless gram_matrix[i]
# upper triangle
((i+1)..(neighbor_matches.length-1)).each do |j|
- sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])")
+ neighbor_j_hits= params[:fingerprints][params[:neighbors][j][:compound]]
+ sim_params = {}
+ if params[:nr_hits]
+ sim_params[:nr_hits] = true
+ sim_params[:compound_features_hits] = neighbor_i_hits
+ sim_params[:training_compound_features_hits] = neighbor_j_hits
+ end
+ sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values], sim_params)")
gram_matrix[i][j] = Algorithm.gauss(sim)
gram_matrix[j] = [] unless gram_matrix[j]
gram_matrix[j][i] = gram_matrix[i][j] # lower triangle
@@ -554,7 +564,7 @@ module OpenTox
row = []
params[:features].each do |f|
if ! params[:fingerprints][n].nil?
- row << (params[:fingerprints][n].include?(f) ? params[:p_values][f] : 0.0)
+ row << (params[:fingerprints][n].include?(f) ? (params[:p_values][f] * params[:fingerprints][n][f]) : 0.0)
else
row << 0.0
end
@@ -563,7 +573,12 @@ module OpenTox
end
row = []
params[:features].each do |f|
- row << (params[:compound].match([f]).size == 0 ? 0.0 : params[:p_values][f])
+ if params[:nr_hits]
+ compound_feature_hits = params[:compound].match_hits([f])
+ row << (compound_feature_hits.size == 0 ? 0.0 : (params[:p_values][f] * compound_feature_hits[f]))
+ else
+ row << (params[:compound].match([f]).size == 0 ? 0.0 : params[:p_values][f])
+ end
end
rescue Exception => e
LOGGER.debug "get_props failed with '" + $! + "'"
@@ -711,7 +726,7 @@ module OpenTox
raise "Error! PCA needs at least two dimensions." if data_matrix.size2 < 2
@data_matrix_selected = nil
(0..@data_matrix.size2-1).each { |i|
- if !Algorithm::isnull_or_singular?(@data_matrix.col(i).to_a)
+ if !Algorithm::zero_variance?(@data_matrix.col(i).to_a)
if @data_matrix_selected.nil?
@data_matrix_selected = GSL::Matrix.alloc(@data_matrix.size1, 1)
@data_matrix_selected.col(0)[0..@data_matrix.size1-1] = @data_matrix.col(i)
@@ -796,6 +811,13 @@ module OpenTox
(nr_zeroes == array.size-1) || # remove singular feature
(nr_zeroes == 0) # also remove feature present everywhere
end
+
+ # For symbolic features
+ # @param [Array] Array to test, must indicate non-occurrence with 0.
+ # @return [Boolean] Whether the feature has variance zero.
+ def self.zero_variance?(array)
+ return (array.to_scale.variance_sample == 0.0)
+ end
# Median of an array
# @param [Array] Array with values
diff --git a/lib/compound.rb b/lib/compound.rb
index 616db2c..e7b4da0 100644
--- a/lib/compound.rb
+++ b/lib/compound.rb
@@ -187,7 +187,7 @@ module OpenTox
smarts_hits[smarts] = hits.size
end
end
- LOGGER.debug "dv ----------- smarts => hits #{smarts_hits}"
+ #LOGGER.debug "dv ----------- smarts => hits #{smarts_hits}"
return smarts_hits
#smarts_array.collect { |s| s if match?(s)}.compact
end
diff --git a/lib/model.rb b/lib/model.rb
index a8b33c6..fe7f895 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -318,8 +318,6 @@ module OpenTox
#LOGGER.debug "dv ------------ training_compound_features_hits:#{training_compound_features_hits.class} #{training_compound_features_hits}"
end
params = {}
- params[:training_compound] = training_compound
- params[:compound] = @compound.uri #query compound
params[:nr_hits] = @nr_hits
params[:compound_features_hits] = compound_features_hits
params[:training_compound_features_hits] = training_compound_features_hits