summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Maunz <andreas@maunz.de>2011-12-09 13:32:36 +0100
committerAndreas Maunz <andreas@maunz.de>2011-12-09 13:32:36 +0100
commit427aff8780e2039ebe3149a8bb282315c5f0de13 (patch)
tree98d4bbe29be7d4a6bebe71b9bf94aed9da91273c
parent86db0cb63b83fdba2151776e44f77f03def290bd (diff)
fixed max_perc_neighbors ON + weighting
-rw-r--r--lib/algorithm.rb12
-rw-r--r--lib/model.rb2
2 files changed, 9 insertions, 5 deletions
diff --git a/lib/algorithm.rb b/lib/algorithm.rb
index 49c1a5d..2229e75 100644
--- a/lib/algorithm.rb
+++ b/lib/algorithm.rb
@@ -372,6 +372,7 @@ module OpenTox
n_prop = params[:n_prop].collect
q_prop = params[:q_prop].collect
acts = params[:acts].collect
+ sims = params[:sims].collect
maxcols = params[:maxcols]
nr_cases, nr_features = get_sizes n_prop
@@ -410,7 +411,7 @@ module OpenTox
@r.eval "suppressPackageStartupMessages(library(\"robustbase\"))"
@r.eval "outlier_threshold = 0.999"
- # outlier removal -- changes cases; adjust acts accordingly (stop if query is outlier)
+ # outlier removal -- changes cases; adjust acts and sims accordingly (stop if query is outlier)
outliers = []
begin
LOGGER.debug "Outliers..."
@@ -431,17 +432,20 @@ module OpenTox
if (outliers.include?(-1))
raise "Query is an outlier."
end
- temp_dm = []; temp_acts = []
+ temp_dm = []; temp_acts = []; temp_sims = []
data_matrix.to_a.each_with_index { |elem, idx| temp_dm << elem unless outliers.include? idx }
nr_cases, nr_features = get_sizes temp_dm
data_matrix = GSL::Matrix.alloc(temp_dm.flatten, nr_cases, nr_features)
acts.each_with_index { |elem, idx| temp_acts << elem unless outliers.include? idx }
acts = temp_acts # same nr_features
+ sims.each_with_index { |elem, idx| temp_sims << elem unless outliers.include? idx }
+ sims = temp_sims # same nr_features
@r.eval 'fstr <- "y ~ ."'
@r.x = data_matrix.to_a.flatten
@r.y = acts.to_a.flatten
+ @r.w = sims.to_a.flatten
@r.q = query_matrix.to_a.flatten
@r.eval "x <- matrix(x, #{nr_cases}, #{nr_features}, byrow=T)"
@@ -449,7 +453,7 @@ module OpenTox
@r.eval 'idx = rep(T,dim(x)[2])'
- # optimize selection of training instances -- changes features; adjust query accordingly
+ # optimize selection features; adjust query accordingly
begin
LOGGER.debug "Best subset..."
@r.eval 'suppressPackageStartupMessages(library("leaps"))'
@@ -475,7 +479,7 @@ module OpenTox
@r.eval 'suppressPackageStartupMessages(library("MASS"))'
@r.eval 'df <- df[,idx]'
- @r.eval 'fit <- rlm( as.formula(fstr), data=df, psi = psi.bisquare, method="MM")'
+ @r.eval 'fit <- rlm( as.formula(fstr), data=df, psi = psi.bisquare, method="MM", weights=w, wt.method="case")'
@r.eval 'q <- q[idx[2:length(idx)]]'
@r.eval 'q <- data.frame( matrix( q, 1, length(q) ) )'
diff --git a/lib/model.rb b/lib/model.rb
index 3c25007..01b2335 100644
--- a/lib/model.rb
+++ b/lib/model.rb
@@ -394,7 +394,7 @@ module OpenTox
params[:training_compound_features_hits] = training_compound_features_hits
sim = eval("#{@similarity_algorithm}(training_features, @compound_features, @p_values, params)")
- if sim > @min_sim
+ if sim > @min_sim || @max_perc_neighbors
@activities[training_compound].each do |act|
@neighbors << {
:compound => training_compound,