diff options
author | Christoph Helma <helma@in-silico.ch> | 2021-06-25 12:44:49 +0200 |
---|---|---|
committer | Christoph Helma <helma@in-silico.ch> | 2021-06-25 12:44:49 +0200 |
commit | fbeb14b83a03a2ebff3ec2f0e52b57bb6954d454 (patch) | |
tree | b49c334a318fdf4ab565e0cdeb56cd2b5d29fa95 /models/mutagenicity-cdk/.Rhistory | |
parent | 83591831c6e36c36d87159acba6afdfedab95522 (diff) |
model validations and pa predictions addedmutagenicity-paper
Diffstat (limited to 'models/mutagenicity-cdk/.Rhistory')
-rw-r--r-- | models/mutagenicity-cdk/.Rhistory | 138 |
1 files changed, 138 insertions, 0 deletions
diff --git a/models/mutagenicity-cdk/.Rhistory b/models/mutagenicity-cdk/.Rhistory new file mode 100644 index 0000000..4de9cdf --- /dev/null +++ b/models/mutagenicity-cdk/.Rhistory @@ -0,0 +1,138 @@ +independent_variables = read.csv("/home/ch/src/lazar/models/mutagenicity-cdk/independent-variables",header=F) +independent_variables[1,] +independent_variables[,1] +near_zero_var <- nearZeroVar(independent_variables) +library(caret) +install.packages('caret') +q() +library(caret) +independent_variables = read.csv("independent-variables",header=F) +independent_variables +near_zero_var <- nearZeroVar(independent_variables) +near_zero_var +near_zero_var <- nearZeroVar(independent_variables,allowParallel=T) +near_zero_var +independent_variables.class +class(independent_variables) +names(independent_variables) +non_zero_var = subset(independent_variables,select = -near_zero_var +non_zero_var +class(non_zero_var) +names(non_zero_var) +correlation = cor(non_zero_var) +correlation +correlated = findCorrelation(correlated) +ls() +correlated = findCorrelation(correlation) +correlated +print(correlated) +?findCorrelation +correlation = cor(independent_variables) +correlated = findCorrelation(correlation) +correlation +?cor +correlation = cor(non_zero_var) +correlated = findCorrelation(correlation) +correlated +class(correlated) +names(correlated) +near_zero_var +class(near_zero_var) +names(non_zero_var) +subset(non_zero_var,-correlated) +subset(non_zero_var,select=-correlated) +non_correlated = subset(non_zero_var,select=-correlated) +names(non_correlated) +names(non_correlated)[1..10] +names(non_correlated)[1:10] +names[independent_variables][0..10] +names[independent_variables][0:10] +names(independent_variables)[0:10] +names(independent_variables)[1:10] +?read.csv +sink(tmp) +sink("tmp") +names(non_correlated) +sink() +cat(names(non_correlated)) + +sink("tmp") +cat(names(non_correlated)) +sink() +near_zero_var +correlated +q() +library(caret) +independent_variables = read.csv("independent-variables",header=F) +dependent_variables = read.csv("dependent-variables",header=F) +dependent_variables +dependent_variables[,1] +?varImp +importance = varImp(x=independent_variables,y=dependent_variables) +importance = varImp(x=independent_variables,y=dependent_variables[,1]) +importance = fiterVarImp(x=independent_variables,y=dependent_variables[,1]) +importance = filterVarImp(x=independent_variables,y=dependent_variables[,1]) +importance = filterVarImp(x=independent_variables,y=as.factor(dependent_variables[,1])) +importance +importance = varImp(x=independent_variables,y=dependent_variables[,1],useModel=F) +importance = filterVarImp(x=independent_variables,y=as.factor(dependent_variables[,1])) +near_zero_var = nearZeroVar(independent_variables) +non_zero_var = subset(independent_variables,select = -near_zero_var) +correlation = cor(non_zero_var) +correlated = findCorrelation(correlation) +non_correlated = subset(non_zero_var,select=-correlated) +importance = filterVarImp(x=non_correlated,y=as.factor(dependent_variables[,1])) +importance +importance$X0 +class(importance) +importance[with(importance,order(X0)] +importance[with(importance,order(X0))] +arrange(importance,X0) +importance[order(importance$X1)] +names(importance) +importance$X0 +importance[order(importance$X0)] +importance[order(importance$X0),] +importance[order(importance$X0),] +length(importance$X0) +importance[importance[,1] %in% c(0.6,1)] +importance[importance$X0 %in% c(0.6,1),] +importance[importance$X0 %in% c(0.6:1),] +importance[importance$X0 %in% c(0.6:1)] +importance[,importance$X0 %in% c(0.6:1)] +subset(importance,importance$X0 > 0.6) +selected = subset(importance,importance$X0 > 0.6) +selected[order(selected$X0),] +length(selected) +length(selected$X0) +selected = subset(importance,importance$X0 > 0.55) +length(selected$X0) +q() +names(importance) +importance = subset(importance,-c(2)) +importance = subset(importance,select=-c(2)) +importance +selected = subset(importance,importance[,1] > 0.55) +length(selected$X0) +cat(selected) +selected +dependent_variables = read.csv("dependent-variables",header=F) +dependent_variables = read.csv("dependent-variables",header=F)[,1] +dependent_variables +importance = filterVarImp(x=non_correlated,y=as.factor(dependent_variables[,1]),nonpara=T) +library(caret) +importance = filterVarImp(x=non_correlated,y=as.factor(dependent_variables[,1]),nonpara=T) +importance = filterVarImp(x=non_correlated,y=as.factor(dependent_variables),nonpara=T) +selected = subset(importance,importance[,1] > 0.55) +length(selected) +selected +length(selected$X0) +selected = subset(importance,importance[,1] > 0.6) +length(selected$X0) +importanceF = filterVarImp(x=non_correlated,y=as.factor(dependent_variables),nonpara=F) +selectedF = subset(importance,importance[,1] > 0.6) +length(selectedF$X0) +selected == selectedF +write.csv(selected,"tmp.csv",col.names=F) +write.table(selected,"tmp.csv",sep=",",col.names=F) +q() |