diff options
author | Andreas Maunz <andreas@maunz.de> | 2011-07-21 12:57:19 +0200 |
---|---|---|
committer | Andreas Maunz <andreas@maunz.de> | 2011-07-21 12:57:19 +0200 |
commit | 0885bc30c65fa594e63d2b465fa9d96a4a3b7114 (patch) | |
tree | cb6f49ae93462653964348c1d19b9a8eb3aab524 | |
parent | b1e6b8aaeaed95797cd67e13567ac72344e89707 (diff) | |
parent | cd536b197b7668f7be37f8b7340aa3f9e8c417b9 (diff) |
Merge branch 'hits' into development
-rw-r--r-- | lib/algorithm.rb | 36 | ||||
-rw-r--r-- | lib/compound.rb | 2 | ||||
-rw-r--r-- | lib/model.rb | 2 |
3 files changed, 30 insertions, 10 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb index df010e1..22768cc 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -180,7 +180,7 @@ module OpenTox common_p_sum/all_p_sum else #LOGGER.debug "common_features : #{common_features}, all_features: #{all_features}, c/a: #{(common_features.size/all_features.size).to_f}" - (common_features.size/all_features.size).to_f + common_features.size.to_f/all_features.size.to_f end else 0.0 @@ -237,6 +237,10 @@ module OpenTox end + # Multi-linear regression weighted by similarity. + # Objective Feature Selection, Principal Components Analysis, Scaling of Axes. + # @param [Hash] params Keys `:n_prop, :q_prop, :sims, :acts` are required + # @return [Numeric] A prediction value. def self.mlr(params) # GSL matrix operations: @@ -290,8 +294,6 @@ module OpenTox confidence_sum = 0.0 confidence = 0.0 prediction = nil - positive_map_value= nil - negative_map_value= nil params[:neighbors].each do |neighbor| neighbor_weight = Algorithm.gauss(neighbor[:similarity]).to_f @@ -387,10 +389,18 @@ module OpenTox else # gram matrix (0..(neighbor_matches.length-1)).each do |i| + neighbor_i_hits = params[:fingerprints][params[:neighbors][i][:compound]] gram_matrix[i] = [] unless gram_matrix[i] # upper triangle ((i+1)..(neighbor_matches.length-1)).each do |j| - sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])") + neighbor_j_hits= params[:fingerprints][params[:neighbors][j][:compound]] + sim_params = {} + if params[:nr_hits] + sim_params[:nr_hits] = true + sim_params[:compound_features_hits] = neighbor_i_hits + sim_params[:training_compound_features_hits] = neighbor_j_hits + end + sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values], sim_params)") gram_matrix[i][j] = Algorithm.gauss(sim) gram_matrix[j] = [] unless gram_matrix[j] gram_matrix[j][i] = gram_matrix[i][j] # lower triangle @@ -554,7 +564,7 @@ module OpenTox row = [] params[:features].each do |f| if ! params[:fingerprints][n].nil? - row << (params[:fingerprints][n].include?(f) ? params[:p_values][f] : 0.0) + row << (params[:fingerprints][n].include?(f) ? (params[:p_values][f] * params[:fingerprints][n][f]) : 0.0) else row << 0.0 end @@ -563,7 +573,12 @@ module OpenTox end row = [] params[:features].each do |f| - row << (params[:compound].match([f]).size == 0 ? 0.0 : params[:p_values][f]) + if params[:nr_hits] + compound_feature_hits = params[:compound].match_hits([f]) + row << (compound_feature_hits.size == 0 ? 0.0 : (params[:p_values][f] * compound_feature_hits[f])) + else + row << (params[:compound].match([f]).size == 0 ? 0.0 : params[:p_values][f]) + end end rescue Exception => e LOGGER.debug "get_props failed with '" + $! + "'" @@ -711,7 +726,7 @@ module OpenTox raise "Error! PCA needs at least two dimensions." if data_matrix.size2 < 2 @data_matrix_selected = nil (0..@data_matrix.size2-1).each { |i| - if !Algorithm::isnull_or_singular?(@data_matrix.col(i).to_a) + if !Algorithm::zero_variance?(@data_matrix.col(i).to_a) if @data_matrix_selected.nil? @data_matrix_selected = GSL::Matrix.alloc(@data_matrix.size1, 1) @data_matrix_selected.col(0)[0..@data_matrix.size1-1] = @data_matrix.col(i) @@ -796,6 +811,13 @@ module OpenTox (nr_zeroes == array.size-1) || # remove singular feature (nr_zeroes == 0) # also remove feature present everywhere end + + # For symbolic features + # @param [Array] Array to test, must indicate non-occurrence with 0. + # @return [Boolean] Whether the feature has variance zero. + def self.zero_variance?(array) + return (array.to_scale.variance_sample == 0.0) + end # Median of an array # @param [Array] Array with values diff --git a/lib/compound.rb b/lib/compound.rb index 616db2c..e7b4da0 100644 --- a/lib/compound.rb +++ b/lib/compound.rb @@ -187,7 +187,7 @@ module OpenTox smarts_hits[smarts] = hits.size end end - LOGGER.debug "dv ----------- smarts => hits #{smarts_hits}" + #LOGGER.debug "dv ----------- smarts => hits #{smarts_hits}" return smarts_hits #smarts_array.collect { |s| s if match?(s)}.compact end diff --git a/lib/model.rb b/lib/model.rb index a8b33c6..fe7f895 100644 --- a/lib/model.rb +++ b/lib/model.rb @@ -318,8 +318,6 @@ module OpenTox #LOGGER.debug "dv ------------ training_compound_features_hits:#{training_compound_features_hits.class} #{training_compound_features_hits}" end params = {} - params[:training_compound] = training_compound - params[:compound] = @compound.uri #query compound params[:nr_hits] = @nr_hits params[:compound_features_hits] = compound_features_hits params[:training_compound_features_hits] = training_compound_features_hits |