From c741ae32778e5fa0bed25d0ff913e33a46465792 Mon Sep 17 00:00:00 2001 From: Andreas Maunz Date: Thu, 12 Jul 2012 16:17:10 +0200 Subject: Weight evaluation prepared --- lib/algorithm.rb | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/lib/algorithm.rb b/lib/algorithm.rb index e2798b3..78fc447 100644 --- a/lib/algorithm.rb +++ b/lib/algorithm.rb @@ -427,7 +427,7 @@ module OpenTox prediction = acts[0] else #LOGGER.debug gram_matrix.to_yaml - @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests + @r = RinRuby.new(true,false) # global R instance leads to Socket errors after a large number of requests @r.eval "suppressPackageStartupMessages(library('caret'))" # requires R packages "caret" and "kernlab" @r.eval "suppressPackageStartupMessages(library('doMC'))" # requires R packages "multicore" @r.eval "registerDoMC()" # switch on parallel processing @@ -447,7 +447,14 @@ module OpenTox # prepare data LOGGER.debug "Preparing R data ..." - @r.eval "if (class(y) == 'character') { y = factor(y); suppressPackageStartupMessages(library('class')) }" # For classification + @r.eval <<-EOR + weights=NULL + if (class(y) == 'character') { + y = factor(y) + suppressPackageStartupMessages(library('class')) + #weights=unlist(as.list(prop.table(table(y)))) + } + EOR @r.eval <<-EOR rem = nearZeroVar(prop_matrix) @@ -465,7 +472,17 @@ module OpenTox # model + support vectors LOGGER.debug "Creating R SVM model ..." train_success = @r.eval <<-EOR - model = train(prop_matrix,y,method="svmradial",tuneLength=8,trControl=trainControl(method="LGOCV",number=10),preProcess=c("center", "scale")) + # AM: TODO: evaluate class weight effect by altering: + # AM: comment in 'weights' above run and class.weights=weights vs. class.weights=1-weights + # AM: vs + # AM: comment out 'weights' above (status quo), thereby disabling weights + model = train(prop_matrix,y, + method="svmradial", + preProcess=c("center", "scale"), + class.weights=weights, + trControl=trainControl(method="LGOCV",number=10), + tuneLength=8 + ) perf = ifelse ( class(y)!='numeric', max(model$results$Accuracy), model$results[which.min(model$results$RMSE),]$Rsquared ) EOR -- cgit v1.2.3