summaryrefslogtreecommitdiff
path: root/models/mutagenicity-cdk/.Rhistory
blob: 4de9cdf7b8b3b81422860d8768de98064a7c3b03 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
independent_variables = read.csv("/home/ch/src/lazar/models/mutagenicity-cdk/independent-variables",header=F)
independent_variables[1,]
independent_variables[,1]
near_zero_var <- nearZeroVar(independent_variables)
library(caret)
install.packages('caret')
q()
library(caret)
independent_variables = read.csv("independent-variables",header=F)
independent_variables
near_zero_var <- nearZeroVar(independent_variables)
near_zero_var
near_zero_var <- nearZeroVar(independent_variables,allowParallel=T)
near_zero_var
independent_variables.class
class(independent_variables)
names(independent_variables)
non_zero_var = subset(independent_variables,select = -near_zero_var
non_zero_var
class(non_zero_var)
names(non_zero_var)
correlation = cor(non_zero_var)
correlation
correlated = findCorrelation(correlated)
ls()
correlated = findCorrelation(correlation)
correlated
print(correlated)
?findCorrelation
correlation = cor(independent_variables)
correlated = findCorrelation(correlation)
correlation
?cor
correlation = cor(non_zero_var)
correlated = findCorrelation(correlation)
correlated
class(correlated)
names(correlated)
near_zero_var
class(near_zero_var)
names(non_zero_var)
subset(non_zero_var,-correlated)
subset(non_zero_var,select=-correlated)
non_correlated = subset(non_zero_var,select=-correlated)
names(non_correlated)
names(non_correlated)[1..10]
names(non_correlated)[1:10]
names[independent_variables][0..10]
names[independent_variables][0:10]
names(independent_variables)[0:10]
names(independent_variables)[1:10]
?read.csv
sink(tmp)
sink("tmp")
names(non_correlated)
sink()
cat(names(non_correlated))
   
sink("tmp")
cat(names(non_correlated))
sink()
near_zero_var
correlated
q()
library(caret)
independent_variables = read.csv("independent-variables",header=F)
dependent_variables = read.csv("dependent-variables",header=F)
dependent_variables
dependent_variables[,1]
?varImp
importance = varImp(x=independent_variables,y=dependent_variables)
importance = varImp(x=independent_variables,y=dependent_variables[,1])
importance = fiterVarImp(x=independent_variables,y=dependent_variables[,1])
importance = filterVarImp(x=independent_variables,y=dependent_variables[,1])
importance = filterVarImp(x=independent_variables,y=as.factor(dependent_variables[,1]))
importance
importance = varImp(x=independent_variables,y=dependent_variables[,1],useModel=F)
importance = filterVarImp(x=independent_variables,y=as.factor(dependent_variables[,1]))
near_zero_var = nearZeroVar(independent_variables)
non_zero_var = subset(independent_variables,select = -near_zero_var)
correlation = cor(non_zero_var)
correlated = findCorrelation(correlation)
non_correlated = subset(non_zero_var,select=-correlated)
importance = filterVarImp(x=non_correlated,y=as.factor(dependent_variables[,1]))
importance
importance$X0
class(importance)
importance[with(importance,order(X0)]
importance[with(importance,order(X0))]
arrange(importance,X0)
importance[order(importance$X1)]
names(importance)
importance$X0
importance[order(importance$X0)]
importance[order(importance$X0),]
importance[order(importance$X0),]
length(importance$X0)
importance[importance[,1] %in% c(0.6,1)]
importance[importance$X0 %in% c(0.6,1),]
importance[importance$X0 %in% c(0.6:1),]
importance[importance$X0 %in% c(0.6:1)]
importance[,importance$X0 %in% c(0.6:1)]
subset(importance,importance$X0 > 0.6)
selected =  subset(importance,importance$X0 > 0.6)
selected[order(selected$X0),]
length(selected)
length(selected$X0)
selected =  subset(importance,importance$X0 > 0.55)
length(selected$X0)
q()
names(importance)
importance = subset(importance,-c(2))
importance = subset(importance,select=-c(2))
importance
selected =  subset(importance,importance[,1] > 0.55)
length(selected$X0)
cat(selected)
selected
dependent_variables = read.csv("dependent-variables",header=F)
dependent_variables = read.csv("dependent-variables",header=F)[,1]
dependent_variables
importance = filterVarImp(x=non_correlated,y=as.factor(dependent_variables[,1]),nonpara=T)
library(caret)
importance = filterVarImp(x=non_correlated,y=as.factor(dependent_variables[,1]),nonpara=T)
importance = filterVarImp(x=non_correlated,y=as.factor(dependent_variables),nonpara=T)
selected =  subset(importance,importance[,1] > 0.55)
length(selected)
selected
length(selected$X0)
selected =  subset(importance,importance[,1] > 0.6)
length(selected$X0)
importanceF = filterVarImp(x=non_correlated,y=as.factor(dependent_variables),nonpara=F)
selectedF =  subset(importance,importance[,1] > 0.6)
length(selectedF$X0)
selected == selectedF
write.csv(selected,"tmp.csv",col.names=F)
write.table(selected,"tmp.csv",sep=",",col.names=F)
q()