independent_variables = read.csv("/home/ch/src/lazar/models/mutagenicity-cdk/independent-variables",header=F) independent_variables[1,] independent_variables[,1] near_zero_var <- nearZeroVar(independent_variables) library(caret) install.packages('caret') q() library(caret) independent_variables = read.csv("independent-variables",header=F) independent_variables near_zero_var <- nearZeroVar(independent_variables) near_zero_var near_zero_var <- nearZeroVar(independent_variables,allowParallel=T) near_zero_var independent_variables.class class(independent_variables) names(independent_variables) non_zero_var = subset(independent_variables,select = -near_zero_var non_zero_var class(non_zero_var) names(non_zero_var) correlation = cor(non_zero_var) correlation correlated = findCorrelation(correlated) ls() correlated = findCorrelation(correlation) correlated print(correlated) ?findCorrelation correlation = cor(independent_variables) correlated = findCorrelation(correlation) correlation ?cor correlation = cor(non_zero_var) correlated = findCorrelation(correlation) correlated class(correlated) names(correlated) near_zero_var class(near_zero_var) names(non_zero_var) subset(non_zero_var,-correlated) subset(non_zero_var,select=-correlated) non_correlated = subset(non_zero_var,select=-correlated) names(non_correlated) names(non_correlated)[1..10] names(non_correlated)[1:10] names[independent_variables][0..10] names[independent_variables][0:10] names(independent_variables)[0:10] names(independent_variables)[1:10] ?read.csv sink(tmp) sink("tmp") names(non_correlated) sink() cat(names(non_correlated)) sink("tmp") cat(names(non_correlated)) sink() near_zero_var correlated q() library(caret) independent_variables = read.csv("independent-variables",header=F) dependent_variables = read.csv("dependent-variables",header=F) dependent_variables dependent_variables[,1] ?varImp importance = varImp(x=independent_variables,y=dependent_variables) importance = varImp(x=independent_variables,y=dependent_variables[,1]) importance = fiterVarImp(x=independent_variables,y=dependent_variables[,1]) importance = filterVarImp(x=independent_variables,y=dependent_variables[,1]) importance = filterVarImp(x=independent_variables,y=as.factor(dependent_variables[,1])) importance importance = varImp(x=independent_variables,y=dependent_variables[,1],useModel=F) importance = filterVarImp(x=independent_variables,y=as.factor(dependent_variables[,1])) near_zero_var = nearZeroVar(independent_variables) non_zero_var = subset(independent_variables,select = -near_zero_var) correlation = cor(non_zero_var) correlated = findCorrelation(correlation) non_correlated = subset(non_zero_var,select=-correlated) importance = filterVarImp(x=non_correlated,y=as.factor(dependent_variables[,1])) importance importance$X0 class(importance) importance[with(importance,order(X0)] importance[with(importance,order(X0))] arrange(importance,X0) importance[order(importance$X1)] names(importance) importance$X0 importance[order(importance$X0)] importance[order(importance$X0),] importance[order(importance$X0),] length(importance$X0) importance[importance[,1] %in% c(0.6,1)] importance[importance$X0 %in% c(0.6,1),] importance[importance$X0 %in% c(0.6:1),] importance[importance$X0 %in% c(0.6:1)] importance[,importance$X0 %in% c(0.6:1)] subset(importance,importance$X0 > 0.6) selected = subset(importance,importance$X0 > 0.6) selected[order(selected$X0),] length(selected) length(selected$X0) selected = subset(importance,importance$X0 > 0.55) length(selected$X0) q() names(importance) importance = subset(importance,-c(2)) importance = subset(importance,select=-c(2)) importance selected = subset(importance,importance[,1] > 0.55) length(selected$X0) cat(selected) selected dependent_variables = read.csv("dependent-variables",header=F) dependent_variables = read.csv("dependent-variables",header=F)[,1] dependent_variables importance = filterVarImp(x=non_correlated,y=as.factor(dependent_variables[,1]),nonpara=T) library(caret) importance = filterVarImp(x=non_correlated,y=as.factor(dependent_variables[,1]),nonpara=T) importance = filterVarImp(x=non_correlated,y=as.factor(dependent_variables),nonpara=T) selected = subset(importance,importance[,1] > 0.55) length(selected) selected length(selected$X0) selected = subset(importance,importance[,1] > 0.6) length(selected$X0) importanceF = filterVarImp(x=non_correlated,y=as.factor(dependent_variables),nonpara=F) selectedF = subset(importance,importance[,1] > 0.6) length(selectedF$X0) selected == selectedF write.csv(selected,"tmp.csv",col.names=F) write.table(selected,"tmp.csv",sep=",",col.names=F) q()