1
|
|
#' Fit a sNPLS model
|
2
|
|
#'
|
3
|
|
#' @description Fits a N-PLS regression model imposing sparsity on \code{wj} and \code{wk} matrices
|
4
|
|
#' @param XN A three-way array containing the predictors.
|
5
|
|
#' @param Y A matrix containing the response.
|
6
|
|
#' @param ncomp Number of components in the projection
|
7
|
|
#' @param threshold_j Threshold value on Wj. Scaled between [0, 1)
|
8
|
|
#' @param threshold_k Threshold value on Wk. scaled between [0, 1)
|
9
|
|
#' @param keepJ Number of variables to keep for each component, ignored if threshold_j is provided
|
10
|
|
#' @param keepK Number of 'times' to keep for each component, ignored if threshold_k is provided
|
11
|
|
#' @param scale.X Perform unit variance scaling on X?
|
12
|
|
#' @param center.X Perform mean centering on X?
|
13
|
|
#' @param scale.Y Perform unit variance scaling on Y?
|
14
|
|
#' @param center.Y Perform mean centering on Y?
|
15
|
|
#' @param conver Convergence criterion
|
16
|
|
#' @param max.iteration Maximum number of iterations
|
17
|
|
#' @param silent Show output?
|
18
|
|
#' @param method Select between L1 penalization (sNPLS), variable selection with Selectivity Ratio (sNPLS-SR) or variable selection with VIP (sNPLS-VIP)
|
19
|
|
#' @return A fitted sNPLS model
|
20
|
|
#' @references C. A. Andersson and R. Bro. The N-way Toolbox for MATLAB Chemometrics & Intelligent Laboratory Systems. 52 (1):1-4, 2000.
|
21
|
|
#' @references Hervas, D. Prats-Montalban, J. M., Garcia-Cañaveras, J. C., Lahoz, A., & Ferrer, A. (2019). Sparse N-way partial least squares by L1-penalization. Chemometrics and Intelligent Laboratory Systems, 185, 85-91.
|
22
|
|
#' @examples
|
23
|
|
#' X_npls<-array(rpois(7500, 10), dim=c(50, 50, 3))
|
24
|
|
#'
|
25
|
|
#' Y_npls <- matrix(2+0.4*X_npls[,5,1]+0.7*X_npls[,10,1]-0.9*X_npls[,15,1]+
|
26
|
|
#' 0.6*X_npls[,20,1]- 0.5*X_npls[,25,1]+rnorm(50), ncol=1)
|
27
|
|
#' #Discrete thresholding
|
28
|
|
#' fit <- sNPLS(X_npls, Y_npls, ncomp=3, keepJ = rep(2,3) , keepK = rep(1,3))
|
29
|
|
#' #Continuous thresholding
|
30
|
|
#' fit2 <- sNPLS(X_npls, Y_npls, ncomp=3, threshold_j=0.5, threshold_k=0.5)
|
31
|
|
#' #USe sNPLS-SR method
|
32
|
|
#' fit3 <- sNPLS(X_npls, Y_npls, ncomp=3, threshold_j=0.5, threshold_k=0.5, method="sNPLS-SR")
|
33
|
|
#' @importFrom stats sd
|
34
|
|
#' @export
|
35
|
|
sNPLS <- function(XN, Y, ncomp = 2, threshold_j=0.5, threshold_k=0.5, keepJ = NULL, keepK = NULL, scale.X=TRUE, center.X=TRUE,
|
36
|
|
scale.Y=TRUE, center.Y=TRUE, conver = 1e-16, max.iteration = 10000, silent = F, method="sNPLS"){
|
37
|
|
|
38
|
0
|
mynorm <- function(x) sqrt(sum(diag(crossprod(x))))
|
39
|
0
|
thresholding <- function(x, nj) {
|
40
|
0
|
ifelse(abs(x) > abs(x[order(abs(x))][nj]),
|
41
|
0
|
(abs(x) - abs(x[order(abs(x))][nj])) *
|
42
|
0
|
sign(x), 0)
|
43
|
|
}
|
44
|
0
|
rel_thresholding <- function(x, j_rel){
|
45
|
0
|
ifelse(abs(x)-max(abs(x))*j_rel <= 0, 0, sign(x)*(abs(x)-max(abs(x))*j_rel))
|
46
|
|
}
|
47
|
0
|
if(!method %in% c("sNPLS", "sNPLS-SR", "sNPLS-VIP")) stop("'method' not recognized")
|
48
|
0
|
if (length(dim(Y)) == 3) Y <- unfold3w(Y)
|
49
|
0
|
if (length(dim(XN)) != 3) stop("'XN' is not a three-way array")
|
50
|
0
|
if (!is.null(rownames(XN))){
|
51
|
0
|
y.names <- x.names <- rownames(XN)
|
52
|
|
} else {
|
53
|
0
|
y.names <- x.names <- 1:dim(XN)[1]
|
54
|
|
}
|
55
|
0
|
if (!is.null(colnames(XN))){
|
56
|
0
|
var.names <- colnames(XN)
|
57
|
|
} else {
|
58
|
0
|
var.names <- paste("X.", 1:dim(XN)[2], sep = "")
|
59
|
|
}
|
60
|
0
|
if (!is.null(dimnames(XN)[[3]])){
|
61
|
0
|
x3d.names <- dimnames(XN)[[3]]
|
62
|
|
} else {
|
63
|
0
|
x3d.names <- paste("Z.", 1:dim(XN)[3], sep = "")
|
64
|
|
}
|
65
|
0
|
if (!is.null(colnames(Y))){
|
66
|
0
|
yvar.names <- colnames(Y)
|
67
|
|
} else {
|
68
|
0
|
yvar.names <- paste("Y.", 1:dim(Y)[2], sep = "")
|
69
|
|
}
|
70
|
0
|
if(!center.X) center.X <- rep(0, ncol(XN)*dim(XN)[3])
|
71
|
0
|
if(!center.Y) center.Y <- rep(0, ncol(Y))
|
72
|
0
|
if(!scale.X) scale.X <- rep(1, ncol(XN)*dim(XN)[3])
|
73
|
0
|
if(!scale.Y) scale.Y <- rep(1, ncol(Y))
|
74
|
0
|
if(is.null(keepJ) | is.null(keepK) | method!="sNPLS"){
|
75
|
0
|
cont_thresholding <- TRUE
|
76
|
0
|
message(paste("Using continuous thresholding (", method, ")", sep=""))
|
77
|
|
} else {
|
78
|
0
|
cont_thresholding <- FALSE
|
79
|
0
|
message("Using discrete L1-thresholding")
|
80
|
|
}
|
81
|
0
|
if(length(threshold_j) == 1 & ncomp > 1) threshold_j <- rep(threshold_j, ncomp)
|
82
|
0
|
if(length(threshold_k) == 1 & ncomp > 1) threshold_k <- rep(threshold_k, ncomp)
|
83
|
|
|
84
|
|
# Matrices initialization
|
85
|
0
|
U <- Q <- X <- P <- NULL
|
86
|
0
|
if(method == "sNPLS"){
|
87
|
0
|
WsupraJ <- WsupraK <- Tm <- NULL
|
88
|
|
} else{
|
89
|
0
|
WsupraJ <- matrix(nrow=ncol(XN), ncol=ncomp)
|
90
|
0
|
WsupraK <- matrix(nrow=dim(XN)[3], ncol=ncomp)
|
91
|
0
|
Tm <- matrix(nrow=nrow(XN), ncol=ncomp)
|
92
|
|
}
|
93
|
0
|
Yorig <- Y
|
94
|
0
|
Y <- scale(Y, center = center.Y, scale = scale.Y)
|
95
|
0
|
y_center <- attr(Y, "scaled:center")
|
96
|
0
|
y_scale <- attr(Y, "scaled:scale")
|
97
|
0
|
B <- matrix(0, ncol = ncomp, nrow = ncomp)
|
98
|
0
|
Gu <- vector("list", ncomp)
|
99
|
0
|
S <- svd(Y)$d
|
100
|
0
|
u <- Y[, S == max(S)] #Column with the highest variance
|
101
|
|
|
102
|
|
# Unfolding of XN en 2-D
|
103
|
0
|
X <- unfold3w(XN)
|
104
|
|
#Check for zero variance columns and fix them with some noise
|
105
|
0
|
if(any(apply(X, 2, sd)==0)){
|
106
|
0
|
X[,apply(X, 2, sd)==0] <- apply(X[,apply(X, 2, sd)==0, drop=FALSE], 2, function(x) jitter(x))
|
107
|
|
}
|
108
|
|
|
109
|
|
# Center and scale
|
110
|
0
|
Xd <- scale(X, center = center.X, scale = scale.X)
|
111
|
0
|
x_center <- attr(Xd, "scaled:center")
|
112
|
0
|
x_scale <- attr(Xd, "scaled:scale")
|
113
|
|
|
114
|
|
# Main loop for each component
|
115
|
0
|
for (f in 1:ncomp) {
|
116
|
0
|
if(!cont_thresholding){
|
117
|
0
|
nj <- ncol(XN) - keepJ[f]
|
118
|
0
|
nk <- dim(XN)[3] - keepK[f]
|
119
|
|
}
|
120
|
0
|
if(method %in% c("sNPLS-VIP", "sNPLS-SR")){
|
121
|
0
|
nj <- ncol(XN)
|
122
|
0
|
nk <- dim(XN)[3]
|
123
|
0
|
wselj <- rep(1, nj)
|
124
|
0
|
wselk <- rep(1, nk)
|
125
|
|
}
|
126
|
0
|
it = 1
|
127
|
0
|
while (it < max.iteration) {
|
128
|
0
|
Zrow <- crossprod(u, Xd)
|
129
|
0
|
Z <- matrix(Zrow, nrow = dim(XN)[2], ncol = dim(XN)[3])
|
130
|
0
|
svd.z <- svd(Z)
|
131
|
0
|
wsupraj <- svd.z$u[, 1]
|
132
|
|
# L1 penalization for wsupraj
|
133
|
0
|
if(method == "sNPLS"){
|
134
|
0
|
if(cont_thresholding){
|
135
|
0
|
wsupraj <- rel_thresholding(wsupraj, threshold_j[f])
|
136
|
|
} else {
|
137
|
0
|
if (nj != 0) {
|
138
|
0
|
wsupraj <- thresholding(wsupraj, nj)
|
139
|
|
}
|
140
|
|
}
|
141
|
|
}
|
142
|
|
|
143
|
|
##########
|
144
|
0
|
wsuprak <- svd.z$v[, 1]
|
145
|
|
# L1 penalization for wsuprak
|
146
|
0
|
if(method == "sNPLS"){
|
147
|
0
|
if(cont_thresholding){
|
148
|
0
|
wsuprak <- rel_thresholding(wsuprak, threshold_k[f])
|
149
|
|
} else {
|
150
|
0
|
if (nk != 0) {
|
151
|
0
|
wsuprak <- thresholding(wsuprak, nk)
|
152
|
|
}
|
153
|
|
}
|
154
|
|
}
|
155
|
|
|
156
|
0
|
if(method %in% c("sNPLS-VIP", "sNPLS-SR")){
|
157
|
0
|
W <- kronecker(wsuprak, wsupraj) * kronecker(wselk, wselj)
|
158
|
0
|
tf <- Xd %*% W
|
159
|
0
|
qf <- crossprod(Y, tf)/mynorm(crossprod(Y, tf))
|
160
|
0
|
uf <- Y %*% qf
|
161
|
0
|
Tm[,f] <- tf
|
162
|
0
|
Q <- cbind(Q, qf)
|
163
|
0
|
WsupraJ[,f] <- wsupraj
|
164
|
0
|
WsupraK[,f] <- wsuprak
|
165
|
|
}
|
166
|
|
|
167
|
0
|
if(method == "sNPLS-VIP"){
|
168
|
|
#VIPj
|
169
|
0
|
SCE <- sum(sum(Tm[,1:f, drop=FALSE] %*% t(Q[,1:f, drop=FALSE])^2))
|
170
|
0
|
VIPj <- sqrt(nrow(WsupraJ)*((WsupraJ[,f, drop=FALSE]^2)*SCE)/sum(SCE))
|
171
|
0
|
VIPj_01 <- (VIPj-min(VIPj))/(max(VIPj)-min(VIPj))
|
172
|
|
|
173
|
|
#VIPk
|
174
|
0
|
SCE <- sum(sum(Tm[,1:f, drop=FALSE] %*% t(Q[,1:f, drop=FALSE])^2))
|
175
|
0
|
VIPk <- sqrt(nrow(WsupraK)*((WsupraK[,f, drop=FALSE]^2)*SCE)/sum(SCE))
|
176
|
0
|
VIPk_01 <- (VIPk-min(VIPk))/(max(VIPk)-min(VIPk))
|
177
|
|
|
178
|
0
|
wselk <- as.numeric(VIPk_01 > threshold_k[f])
|
179
|
0
|
wselj <- as.numeric(VIPj_01 > threshold_j[f])
|
180
|
|
}
|
181
|
|
|
182
|
0
|
if(method == "sNPLS-SR"){
|
183
|
0
|
TM <- MASS::ginv(crossprod(Tm[,1:f, drop=FALSE])) %*% t(Tm[,1:f, drop=FALSE])
|
184
|
0
|
WkM <- MASS::ginv(crossprod(WsupraK[,1:f, drop=FALSE])) %*% t(WsupraK[,1:f, drop=FALSE])
|
185
|
0
|
WjM <- MASS::ginv(crossprod(WsupraJ[,1:f, drop=FALSE])) %*% t(WsupraJ[,1:f, drop=FALSE])
|
186
|
0
|
Gu[[f]] <- TM %*% X %*% kronecker(t(WkM), t(WjM))
|
187
|
|
#SR
|
188
|
0
|
P[[f]] = t(as.matrix(Gu[[f]]) %*% t(kronecker(WsupraK[,1:f, drop=FALSE], WsupraJ[,1:f, drop=FALSE])))
|
189
|
0
|
Xres <- Xd - Tm[,1:f, drop=FALSE] %*% t(P[[f]])
|
190
|
0
|
xpred <- Tm[,1:f, drop=FALSE] %*% t(P[[f]])
|
191
|
0
|
SSexp <- xpred^2
|
192
|
0
|
SSres <- (Xd-xpred)^2
|
193
|
0
|
SSexp_cube <- array(SSexp, dim=c(nrow(SSexp), nj, nk))
|
194
|
0
|
SSres_cube <- array(SSres, dim=c(nrow(SSexp), nj, nk))
|
195
|
0
|
SR_k <- numeric(nk)
|
196
|
0
|
for(k in 1:nk){
|
197
|
0
|
SR_k[k] <- sum(SSexp_cube[,,k])/sum(SSres_cube[,,k])
|
198
|
|
}
|
199
|
0
|
SR_k_01 <- (SR_k-min(SR_k))/(max(SR_k)-min(SR_k))
|
200
|
0
|
SR_j <- numeric(nj)
|
201
|
0
|
for(j in 1:nj){
|
202
|
0
|
SR_j[j] <- sum(SSexp_cube[,j,])/sum(SSres_cube[,j,])
|
203
|
|
}
|
204
|
0
|
SR_j_01 <- (SR_j-min(SR_j))/(max(SR_j)-min(SR_j))
|
205
|
0
|
wselj <- rep(1, nj)
|
206
|
0
|
wselk <- rep(1, nk)
|
207
|
0
|
PFcalck <- SR_k_01
|
208
|
0
|
wselk <- as.numeric(PFcalck > threshold_k[f])
|
209
|
0
|
PFcalcj <- SR_j_01
|
210
|
0
|
wselj <- as.numeric(PFcalcj > threshold_j[f])
|
211
|
|
}
|
212
|
|
|
213
|
|
##########
|
214
|
0
|
if(method == "sNPLS"){
|
215
|
0
|
tf <- Xd %*% kronecker(wsuprak, wsupraj)
|
216
|
0
|
qf <- crossprod(Y, tf)/mynorm(crossprod(Y, tf))
|
217
|
0
|
uf <- Y %*% qf
|
218
|
|
}
|
219
|
0
|
if (sum((uf - u)^2) < conver) {
|
220
|
0
|
if (!silent) {
|
221
|
0
|
cat(paste("Component number ", f, "\n"))
|
222
|
0
|
cat(paste("Number of iterations: ", it, "\n"))
|
223
|
|
}
|
224
|
0
|
it <- max.iteration
|
225
|
0
|
if(method == "sNPLS"){
|
226
|
0
|
Tm <- cbind(Tm, tf)
|
227
|
0
|
WsupraJ <- cbind(WsupraJ, wsupraj)
|
228
|
0
|
WsupraK <- cbind(WsupraK, wsuprak)
|
229
|
0
|
bf <- MASS::ginv(crossprod(Tm)) %*% t(Tm) %*% uf
|
230
|
0
|
TM <- MASS::ginv(crossprod(Tm)) %*% t(Tm)
|
231
|
0
|
Q <- cbind(Q, qf)
|
232
|
|
} else {
|
233
|
0
|
Tm[,f] <- tf
|
234
|
0
|
WsupraJ[,f] <- wsupraj*wselj
|
235
|
0
|
WsupraK[,f] <- wsuprak*wselk
|
236
|
0
|
bf <- MASS::ginv(crossprod(Tm[,1:f, drop=FALSE])) %*% t(Tm[,1:f, drop=FALSE]) %*% uf
|
237
|
|
}
|
238
|
0
|
B[1:length(bf), f] <- bf
|
239
|
0
|
U <- cbind(U, uf)
|
240
|
0
|
if(method == "sNPLS-VIP"){
|
241
|
0
|
TM <- MASS::ginv(crossprod(Tm[,1:f, drop=FALSE])) %*% t(Tm[,1:f, drop=FALSE])
|
242
|
0
|
WkM <- MASS::ginv(crossprod(WsupraK[,1:f, drop=FALSE])) %*% t(WsupraK[,1:f, drop=FALSE])
|
243
|
0
|
WjM <- MASS::ginv(crossprod(WsupraJ[,1:f, drop=FALSE])) %*% t(WsupraJ[,1:f, drop=FALSE])
|
244
|
0
|
Gu[[f]] <- TM %*% X %*% kronecker(t(WkM), t(WjM))
|
245
|
0
|
P[[f]] = t(as.matrix(Gu[[f]]) %*% t(kronecker(WsupraK[,1:f, drop=FALSE], WsupraJ[,1:f, drop=FALSE])))
|
246
|
|
}
|
247
|
0
|
if(method == "sNPLS"){
|
248
|
0
|
TM <- MASS::ginv(crossprod(Tm)) %*% t(Tm)
|
249
|
0
|
WkM <- MASS::ginv(crossprod(WsupraK)) %*% t(WsupraK)
|
250
|
0
|
WjM <- MASS::ginv(crossprod(WsupraJ)) %*% t(WsupraJ)
|
251
|
0
|
Gu[[f]] <- TM %*% X %*% kronecker(t(WkM), t(WjM))
|
252
|
0
|
P[[f]] = t(as.matrix(Gu[[f]]) %*% t(kronecker(WsupraK, WsupraJ)))
|
253
|
|
}
|
254
|
|
|
255
|
0
|
if(method == "sNPLS"){
|
256
|
0
|
Y <- Y - Tm %*% bf %*% t(qf)
|
257
|
|
} else {
|
258
|
0
|
Y <- Y - Tm[,1:f, drop=FALSE] %*% bf %*% t(qf)
|
259
|
|
}
|
260
|
0
|
S <- svd(Y)$d
|
261
|
0
|
u <- Y[, S == max(S)]
|
262
|
|
} else {
|
263
|
0
|
u <- uf
|
264
|
0
|
it <- it + 1
|
265
|
|
}
|
266
|
|
}
|
267
|
|
}
|
268
|
0
|
Yadjsc <- Tm %*% B %*% t(Q)
|
269
|
0
|
Yadj <- Yadjsc * y_scale + y_center
|
270
|
0
|
SqrdE <- sum((Yorig - Yadj)^2)
|
271
|
0
|
rownames(WsupraJ) <- var.names
|
272
|
0
|
rownames(WsupraK) <- x3d.names
|
273
|
0
|
rownames(Q) <- yvar.names
|
274
|
0
|
rownames(Tm) <- rownames(U) <- x.names
|
275
|
0
|
colnames(Tm) <- colnames(WsupraJ) <- colnames(WsupraK) <- colnames(B) <-
|
276
|
0
|
colnames(U) <- colnames(Q) <- names(Gu) <- names(P) <- paste("Comp.", 1:ncomp)
|
277
|
0
|
output <- list(T = Tm, Wj = WsupraJ, Wk = WsupraK, B = B, U = U, Q = Q, P = P,
|
278
|
0
|
Gu = Gu, ncomp = ncomp, Xd=Xd, Yadj = Yadj, SqrdE = SqrdE,
|
279
|
0
|
Standarization = list(ScaleX = x_scale, CenterX = x_center,
|
280
|
0
|
ScaleY = y_scale, CenterY = y_center),
|
281
|
0
|
Method = method)
|
282
|
0
|
class(output)<-"sNPLS"
|
283
|
0
|
return(output)
|
284
|
|
}
|
285
|
|
|
286
|
|
#' R-matrix from a sNPLS model fit
|
287
|
|
#'
|
288
|
|
#' @description Builds the R-matrix from a sNPLS model fit
|
289
|
|
#' @param x A sNPLS model obtained from \code{sNPLS}
|
290
|
|
#' @return Returns the R-matrix of the model, needed to compute the coefficients
|
291
|
|
Rmatrix<-function(x) {
|
292
|
0
|
WsupraK <- x$Wk
|
293
|
0
|
WsupraJ <- x$Wj
|
294
|
0
|
R <- matrix(nrow = dim(x$Wj)[1] * dim(x$Wk)[1], ncol = x$ncomp)
|
295
|
0
|
ncomp <- x$ncomp
|
296
|
0
|
kroneckers<-sapply(1:x$ncomp, function(x) kronecker(WsupraK[, x], WsupraJ[, x]))
|
297
|
0
|
tkroneckers<-apply(kroneckers, 2, function(x) t(x))
|
298
|
0
|
R[,1] <- kroneckers[,1]
|
299
|
0
|
if(ncomp>1){
|
300
|
0
|
for(i in 2:ncomp){
|
301
|
0
|
pi <- pi0 <- Matrix::Matrix(diag(dim(R)[1]), sparse=TRUE)
|
302
|
0
|
for (j in 1:(i - 1)) {
|
303
|
0
|
pi <- Matrix::Matrix(pi %*% pi0 - kroneckers[,j] %*% t(tkroneckers[,j]), sparse=TRUE)
|
304
|
|
}
|
305
|
0
|
w <- kroneckers[, i]
|
306
|
0
|
pi <- pi %*% w
|
307
|
0
|
R[, i] <- Matrix::as.matrix(pi)
|
308
|
|
}
|
309
|
|
}
|
310
|
0
|
return(R)
|
311
|
|
}
|
312
|
|
|
313
|
|
#' Unfolding of three-way arrays
|
314
|
|
#'
|
315
|
|
#' @description Unfolds a three-way array into a matrix
|
316
|
|
#' @param x A three-way array
|
317
|
|
#' @return Returns a matrix with dimensions \code{dim(x)[1] x dim(x)[2]*dim(x([3]))}
|
318
|
|
unfold3w <- function(x) {
|
319
|
0
|
dim(x) <- c(dim(x)[1], dim(x)[2] * dim(x)[3])
|
320
|
0
|
return(x)
|
321
|
|
}
|
322
|
|
|
323
|
|
#' Cross-validation for a sNPLS model
|
324
|
|
#'
|
325
|
|
#' @description Performs cross-validation for a sNPLS model
|
326
|
|
#' @param X_npls A three-way array containing the predictors.
|
327
|
|
#' @param Y_npls A matrix containing the response.
|
328
|
|
#' @param ncomp A vector with the different number of components to test
|
329
|
|
#' @param samples Number of samples for performing random search in continuous thresholding
|
330
|
|
#' @param keepJ A vector with the different number of selected variables to test for discrete thresholding
|
331
|
|
#' @param keepK A vector with the different number of selected 'times' to test for discrete thresholding
|
332
|
|
#' @param nfold Number of folds for the cross-validation
|
333
|
|
#' @param parallel Should the computations be performed in parallel? Set up strategy first with \code{future::plan()}
|
334
|
|
#' @param method Select between sNPLS, sNPLS-SR or sNPLS-VIP
|
335
|
|
#' @param ... Further arguments passed to sNPLS
|
336
|
|
#' @return A list with the best parameters for the model and the CV error
|
337
|
|
#' @examples
|
338
|
|
#' \dontrun{
|
339
|
|
#' X_npls<-array(rpois(7500, 10), dim=c(50, 50, 3))
|
340
|
|
#'
|
341
|
|
#' Y_npls<-matrix(2+0.4*X_npls[,5,1]+0.7*X_npls[,10,1]-0.9*X_npls[,15,1]+
|
342
|
|
#' 0.6*X_npls[,20,1]- 0.5*X_npls[,25,1]+rnorm(50), ncol=1)
|
343
|
|
#' #Grid search for discrete thresholding
|
344
|
|
#' cv1<- cv_snpls(X_npls, Y_npls, ncomp=1:2, keepJ = 1:3, keepK = 1:2, parallel = FALSE)
|
345
|
|
#' #Random search for continuous thresholding
|
346
|
|
#' cv2<- cv_snpls(X_npls, Y_npls, ncomp=1:2, samples=20, parallel = FALSE)
|
347
|
|
#' }
|
348
|
|
#' @importFrom stats runif
|
349
|
|
#' @export
|
350
|
|
cv_snpls <- function(X_npls, Y_npls, ncomp = 1:3, samples=20,
|
351
|
|
keepJ = NULL, keepK = NULL, nfold = 10, parallel = TRUE, method="sNPLS", ...) {
|
352
|
|
|
353
|
0
|
if(parallel) message("Your parallel configuration is ", attr(future::plan(), "class")[3])
|
354
|
0
|
if(!method %in% c("sNPLS", "sNPLS-SR", "sNPLS-VIP")) stop("'method' not recognized")
|
355
|
0
|
if(length(dim(Y_npls)) == 3) Y_npls <- unfold3w(Y_npls)
|
356
|
0
|
top <- ceiling(dim(X_npls)[1]/nfold)
|
357
|
0
|
foldid <- sample(rep(1:nfold, top), dim(X_npls)[1], replace = F)
|
358
|
0
|
if(is.null(keepJ) | is.null(keepK)){
|
359
|
0
|
cont_thresholding <- TRUE
|
360
|
0
|
message("Using continuous thresholding")
|
361
|
|
} else {
|
362
|
0
|
cont_thresholding <- FALSE
|
363
|
0
|
message("Using discrete thresholding")
|
364
|
|
}
|
365
|
0
|
if(cont_thresholding){
|
366
|
0
|
search.grid <- expand.grid(list(ncomp = ncomp, threshold_j=runif(samples),
|
367
|
0
|
threshold_k=runif(samples)))
|
368
|
|
} else {
|
369
|
0
|
search.grid <- expand.grid(list(ncomp = ncomp, keepJ = keepJ,
|
370
|
0
|
keepK = keepK))
|
371
|
|
}
|
372
|
0
|
SqrdE <- numeric()
|
373
|
0
|
applied_fun <- function(y) {
|
374
|
0
|
sapply(1:nfold, function(x) {
|
375
|
0
|
suppressMessages(tryCatch(do.call(cv_fit, c(list(xtrain = X_npls[x != foldid, , ],
|
376
|
0
|
ytrain = Y_npls[x != foldid, , drop = FALSE],
|
377
|
0
|
xval = X_npls[x == foldid, , ],
|
378
|
0
|
yval = Y_npls[x == foldid, , drop = FALSE],
|
379
|
0
|
ncomp = y["ncomp"], method=method),
|
380
|
0
|
list(threshold_j=y["threshold_j"])[cont_thresholding],
|
381
|
0
|
list(threshold_k=y["threshold_k"])[cont_thresholding],
|
382
|
0
|
list(keepJ=rep(y["keepJ"], y["ncomp"]))[!cont_thresholding],
|
383
|
0
|
list(keepK=rep(y["keepK"], y["ncomp"]))[!cont_thresholding], ...)),
|
384
|
0
|
error=function(x) NA))
|
385
|
|
})
|
386
|
|
}
|
387
|
0
|
if (parallel) {
|
388
|
0
|
cv_res <- future.apply::future_apply(search.grid, 1, applied_fun, future.seed=TRUE)
|
389
|
|
} else {
|
390
|
0
|
cv_res <- pbapply::pbapply(search.grid, 1, applied_fun)
|
391
|
|
}
|
392
|
0
|
cv_mean <- apply(cv_res, 2, function(x) mean(x, na.rm = TRUE))
|
393
|
0
|
cv_se <- apply(cv_res, 2, function(x) sd(x, na.rm=TRUE)/sqrt(nfold))
|
394
|
0
|
best_model <- search.grid[which.min(cv_mean), ]
|
395
|
0
|
output <- list(best_parameters = best_model, cv_mean = cv_mean,
|
396
|
0
|
cv_se = cv_se, cv_grid = search.grid)
|
397
|
0
|
class(output)<-"cvsNPLS"
|
398
|
0
|
return(output)
|
399
|
|
}
|
400
|
|
|
401
|
|
#' Internal function for \code{cv_snpls}
|
402
|
|
#'
|
403
|
|
#' @param xtrain A three-way training array
|
404
|
|
#' @param ytrain A response training matrix
|
405
|
|
#' @param xval A three-way test array
|
406
|
|
#' @param yval A response test matrix
|
407
|
|
#' @param ncomp Number of components for the sNPLS model
|
408
|
|
#' @param threshold_j Threshold value on Wj. Scaled between [0, 1)
|
409
|
|
#' @param threshold_k Threshold value on Wk. Scaled between [0, 1)
|
410
|
|
#' @param keepJ Number of variables to keep for each component, ignored if threshold_j is provided
|
411
|
|
#' @param keepK Number of 'times' to keep for each component, ignored if threshold_k is provided
|
412
|
|
#' @param method Select between sNPLS, sNPLS-SR or sNPLS-VIP
|
413
|
|
#' @param ... Further arguments passed to sNPLS
|
414
|
|
#' @return Returns the CV mean squared error
|
415
|
|
#' @importFrom stats predict
|
416
|
|
#' @export
|
417
|
|
cv_fit <- function(xtrain, ytrain, xval, yval, ncomp, threshold_j=NULL, threshold_k=NULL, keepJ=NULL, keepK=NULL, method, ...) {
|
418
|
0
|
fit <- sNPLS(XN = xtrain, Y = ytrain, ncomp = ncomp, keepJ=keepJ, keepK=keepK, threshold_j = threshold_j,
|
419
|
0
|
threshold_k = threshold_k, silent = TRUE, method=method, ...)
|
420
|
0
|
Y_pred <- predict(fit, xval)
|
421
|
0
|
CVE <- sqrt(mean((Y_pred - yval)^2))
|
422
|
0
|
return(CVE)
|
423
|
|
}
|
424
|
|
|
425
|
|
#' Plots for sNPLS model fits
|
426
|
|
#'
|
427
|
|
#' @description Different plots for sNPLS model fits
|
428
|
|
#' @param x A sNPLS model fit
|
429
|
|
#' @param comps Vector with the components to plot. It can be of length \code{ncomp} for types "time" and "variables" and of length 2 otherwise.
|
430
|
|
#' @param type The type of plot. One of those: "T", "U", "Wj", "Wk", "time" or "variables"
|
431
|
|
#' @param labels Should rownames be added as labels to the plot?
|
432
|
|
#' @param group Vector with categorical variable defining groups (optional)
|
433
|
|
#' @param ... Not used
|
434
|
|
#' @return A plot of the type specified in the \code{type} parameter
|
435
|
|
#' @export
|
436
|
|
plot.sNPLS <- function(x, type = "T", comps = c(1, 2), labels=TRUE, group=NULL, ...) {
|
437
|
0
|
if (type == "T")
|
438
|
0
|
p<-plot_T(x, comps = comps, labels=labels, group=group)
|
439
|
0
|
if (type == "U")
|
440
|
0
|
p<-plot_U(x, comps = comps, labels=labels, group=group)
|
441
|
0
|
if (type == "Wj")
|
442
|
0
|
p<-plot_Wj(x, comps = comps, labels=labels)
|
443
|
0
|
if (type == "Wk")
|
444
|
0
|
p<-plot_Wk(x, comps = comps, labels=labels)
|
445
|
0
|
if (type == "time")
|
446
|
0
|
p<-plot_time(x, comps = comps)
|
447
|
0
|
if (type == "variables")
|
448
|
0
|
p<-plot_variables(x, comps = comps)
|
449
|
0
|
p
|
450
|
|
}
|
451
|
|
|
452
|
|
#' Internal function for \code{plot.sNPLS}
|
453
|
|
#'
|
454
|
|
#' @param x A sNPLS model fit
|
455
|
|
#' @param comps A vector of length two with the components to plot
|
456
|
|
#' @param labels Should rownames be added as labels to the plot?
|
457
|
|
#' @param group Vector with categorical variable defining groups
|
458
|
|
#' @return A plot of the T matrix of a sNPLS model fit
|
459
|
|
plot_T <- function(x, comps, labels, group=NULL){
|
460
|
0
|
df <- data.frame(x$T)[comps]
|
461
|
0
|
if(!is.null(group)) df <- data.frame(df, group=as.factor(group))
|
462
|
0
|
names(df)[1:2] <- paste("Comp.", comps, sep="")
|
463
|
0
|
p1 <- ggplot2::ggplot(df, ggplot2::aes_string(x=names(df)[1], y=names(df)[2])) +
|
464
|
0
|
ggplot2::geom_point() + ggplot2::theme_bw() + ggplot2::geom_vline(xintercept = 0,
|
465
|
0
|
lty=2) +
|
466
|
0
|
ggplot2::geom_hline(yintercept = 0, lty=2) + ggplot2::xlab(names(df)[1]) +
|
467
|
0
|
ggplot2::ylab(names(df)[2]) + ggplot2::theme(axis.text=ggplot2::element_text(size=12),
|
468
|
0
|
axis.title=ggplot2::element_text(size=12,face="bold"))
|
469
|
0
|
if(labels) p1 <- p1 + ggrepel::geom_text_repel(ggplot2::aes(label=rownames(df)))
|
470
|
0
|
if(!is.null(group)) p1 <- p1 + ggplot2::geom_point(ggplot2::aes(color=group))
|
471
|
0
|
p1
|
472
|
|
}
|
473
|
|
|
474
|
|
#' Internal function for \code{plot.sNPLS}
|
475
|
|
#'
|
476
|
|
#' @param x A sNPLS model fit
|
477
|
|
#' @param comps A vector of length two with the components to plot
|
478
|
|
#' @param labels Should rownames be added as labels to the plot?
|
479
|
|
#' @param group Vector with categorical variable defining groups
|
480
|
|
#' @return A plot of the U matrix of a sNPLS model fit
|
481
|
|
plot_U <- function(x, comps, labels, group=NULL){
|
482
|
0
|
df <- data.frame(x$U)[comps]
|
483
|
0
|
if(!is.null(group)) df <- data.frame(df, group=as.factor(group))
|
484
|
0
|
names(df)[1:2] <- paste("Comp.", comps, sep="")
|
485
|
0
|
p1 <- ggplot2::ggplot(df, ggplot2::aes_string(x=names(df)[1], y=names(df)[2])) +
|
486
|
0
|
ggplot2::geom_point() + ggplot2::theme_bw() + ggplot2::geom_vline(xintercept = 0,
|
487
|
0
|
lty=2) +
|
488
|
0
|
ggplot2::geom_hline(yintercept = 0, lty=2) + ggplot2::xlab(names(df)[1]) +
|
489
|
0
|
ggplot2::ylab(names(df)[2]) + ggplot2::theme(axis.text=ggplot2::element_text(size=12),
|
490
|
0
|
axis.title=ggplot2::element_text(size=12,face="bold"))
|
491
|
0
|
if(labels) p1 <- p1 + ggrepel::geom_text_repel(ggplot2::aes(label=rownames(df)))
|
492
|
0
|
if(!is.null(group)) p1 <- p1 + ggplot2::geom_point(ggplot2::aes(color=group))
|
493
|
0
|
p1
|
494
|
|
}
|
495
|
|
|
496
|
|
#' Internal function for \code{plot.sNPLS}
|
497
|
|
#'
|
498
|
|
#' @param x A sNPLS model fit
|
499
|
|
#' @param comps A vector of length two with the components to plot
|
500
|
|
#' @param labels Should rownames be added as labels to the plot?
|
501
|
|
#' @return A plot of Wj coefficients
|
502
|
|
plot_Wj <- function(x, comps, labels){
|
503
|
0
|
df <- data.frame(x$Wj)[comps]
|
504
|
0
|
names(df) <- paste("Comp.", comps, sep="")
|
505
|
0
|
var_names_zero <- rownames(df)
|
506
|
0
|
var_names_zero[rowSums(df)==0]<- ""
|
507
|
0
|
p1 <- ggplot2::ggplot(df, ggplot2::aes_string(x=names(df)[1], y=names(df)[2])) +
|
508
|
0
|
ggplot2::geom_point() + ggplot2::theme_bw() + ggplot2::geom_vline(xintercept = 0,
|
509
|
0
|
lty=2) +
|
510
|
0
|
ggplot2::geom_hline(yintercept = 0, lty=2) + ggplot2::xlab(names(df)[1]) +
|
511
|
0
|
ggplot2::ylab(names(df)[2]) + ggplot2::theme(axis.text=ggplot2::element_text(size=12),
|
512
|
0
|
axis.title=ggplot2::element_text(size=12,face="bold"))
|
513
|
0
|
if(labels) p1 <- p1 + ggrepel::geom_text_repel(ggplot2::aes(label=var_names_zero), size=3)
|
514
|
0
|
p1
|
515
|
|
}
|
516
|
|
|
517
|
|
#' Internal function for \code{plot.sNPLS}
|
518
|
|
#'
|
519
|
|
#' @param x A sNPLS model fit
|
520
|
|
#' @param comps A vector of length two with the components to plot
|
521
|
|
#' @param labels Should rownames be added as labels to the plot?
|
522
|
|
#' @return A plot of the Wk coefficients
|
523
|
|
plot_Wk <- function(x, comps, labels){
|
524
|
0
|
df <- data.frame(x$Wk)[comps]
|
525
|
0
|
names(df) <- paste("Comp.", comps, sep="")
|
526
|
0
|
var_names_zero <- rownames(df)
|
527
|
0
|
var_names_zero[rowSums(df)==0]<- ""
|
528
|
0
|
p1 <- ggplot2::ggplot(df, ggplot2::aes_string(x=names(df)[1], y=names(df)[2])) +
|
529
|
0
|
ggplot2::geom_point() + ggplot2::theme_bw() + ggplot2::geom_vline(xintercept = 0,
|
530
|
0
|
lty=2) +
|
531
|
0
|
ggplot2::geom_hline(yintercept = 0, lty=2) + ggplot2::xlab(names(df)[1]) +
|
532
|
0
|
ggplot2::ylab(names(df)[2]) + ggplot2::theme(axis.text=ggplot2::element_text(size=12),
|
533
|
0
|
axis.title=ggplot2::element_text(size=12,face="bold"))
|
534
|
0
|
if(labels) p1 <- p1 + ggrepel::geom_text_repel(ggplot2::aes(label=var_names_zero), size=4)
|
535
|
0
|
p1
|
536
|
|
}
|
537
|
|
|
538
|
|
#' Internal function for \code{plot.sNPLS}
|
539
|
|
#'
|
540
|
|
#' @param x A sNPLS model fit
|
541
|
|
#' @param comps A vector with the components to plot
|
542
|
|
#' @return A plot of Wk coefficients for each component
|
543
|
|
plot_time <- function(x, comps){
|
544
|
0
|
df <- clickR::forge(data.frame(row=1:nrow(x$Wk), x$Wk), affixes=as.character(comps),
|
545
|
0
|
var.name="Component")
|
546
|
0
|
ggplot2::ggplot(df, ggplot2::aes_string(x="row", y="Comp..", color="Component")) +
|
547
|
0
|
ggplot2::geom_line(lwd=1.05) + ggplot2::theme_bw() +
|
548
|
0
|
ggplot2::geom_hline(yintercept = 0, alpha=0.2) +
|
549
|
0
|
ggplot2::xlab("Time (index)") + ggplot2::ylab("Wk") +
|
550
|
0
|
ggplot2::theme(axis.text = ggplot2::element_text(size=12),
|
551
|
0
|
axis.title = ggplot2::element_text(size=12,face="bold"))
|
552
|
|
}
|
553
|
|
|
554
|
|
#' Internal function for \code{plot.sNPLS}
|
555
|
|
#'
|
556
|
|
#' @param x A sNPLS model fit
|
557
|
|
#' @param comps A vector with the components to plot
|
558
|
|
#' @return A plot of Wj coefficients for each component
|
559
|
|
plot_variables <- function(x, comps){
|
560
|
0
|
df <- clickR::forge(data.frame(row=1:nrow(x$Wj), x$Wj), affixes=as.character(comps),
|
561
|
0
|
var.name="Component")
|
562
|
0
|
ggplot2::ggplot(df, ggplot2::aes_string(x="row", y="Comp..", color="Component")) +
|
563
|
0
|
ggplot2::geom_line(lwd=1.05) + ggplot2::theme_bw() +
|
564
|
0
|
ggplot2::geom_hline(yintercept = 0, alpha=0.2) +
|
565
|
0
|
ggplot2::xlab("Variable (index)") + ggplot2::ylab("Wj") +
|
566
|
0
|
ggplot2::theme(axis.text = ggplot2::element_text(size=12),
|
567
|
0
|
axis.title=ggplot2::element_text(size=12,face="bold"))
|
568
|
|
}
|
569
|
|
|
570
|
|
#' Predict for sNPLS models
|
571
|
|
#'
|
572
|
|
#' @description Predict function for sNPLS models
|
573
|
|
#' @param object A sNPLS model fit
|
574
|
|
#' @param newX A three-way array containing the new data
|
575
|
|
#' @param rescale Should the prediction be rescaled to the original scale?
|
576
|
|
#' @param ... Further arguments passed to \code{predict}
|
577
|
|
#' @return A matrix with the predictions
|
578
|
|
#' @export
|
579
|
|
predict.sNPLS <- function(object, newX, rescale = TRUE, ...) {
|
580
|
0
|
newX <- unfold3w(newX)
|
581
|
|
# Centrado y escalado
|
582
|
|
#Xstd <- t((t(newX) - object$Standarization$CenterX)/object$Standarization$ScaleX)
|
583
|
|
#Xstd <- sweep(sweep(newX, 2, object$Standarization$CenterX), 2, object$Standarization$ScaleX, "/")
|
584
|
0
|
Xstd <- scale(newX, center=object$Standarization$CenterX, scale=object$Standarization$ScaleX)
|
585
|
0
|
R <- Rmatrix(object)
|
586
|
0
|
Bnpls <- R %*% object$B %*% t(object$Q)
|
587
|
0
|
Yval <- Xstd %*% Bnpls
|
588
|
0
|
if (rescale) {
|
589
|
0
|
Yval <- Yval * object$Standarization$ScaleY + object$Standarization$CenterY
|
590
|
|
}
|
591
|
0
|
return(Yval)
|
592
|
|
}
|
593
|
|
|
594
|
|
#' Fitted method for sNPLS models
|
595
|
|
#'
|
596
|
|
#' @description Fitted method for sNPLS models
|
597
|
|
#' @param object A sNPLS model fit
|
598
|
|
#' @param ... Further arguments passed to \code{fitted}
|
599
|
|
#' @return Fitted values for the sNPLS model
|
600
|
|
#' @export
|
601
|
|
fitted.sNPLS <- function(object, ...){
|
602
|
0
|
return(object$Yadj)
|
603
|
|
}
|
604
|
|
|
605
|
|
#' Plot cross validation results for sNPLS objects
|
606
|
|
#'
|
607
|
|
#' @description Plot function for visualization of cross validation results for sNPLS models
|
608
|
|
#' @param x A cv_sNPLS object
|
609
|
|
#' @param ... Not used
|
610
|
|
#' @return A facet plot with the results of the cross validation
|
611
|
|
#' @export
|
612
|
|
plot.cvsNPLS <- function(x, ...) {
|
613
|
0
|
cont_thresholding <- names(x$cv_grid)[2] == "threshold_j"
|
614
|
0
|
df_grid <- data.frame(threshold_j=x$cv_grid[,2], threshold_k=x$cv_grid[,3], CVE=x$cv_mean, Ncomp=paste("Ncomp =", x$cv_grid$ncomp, sep=" "))
|
615
|
0
|
if(!cont_thresholding) names(df_grid)[c(1, 2)] <- c("KeepJ", "KeepK")
|
616
|
0
|
if(cont_thresholding){
|
617
|
0
|
ggplot2::ggplot(df_grid, ggplot2::aes_string(x="threshold_j", y="CVE"))+ggplot2::geom_point()+ggplot2::geom_smooth()+ggplot2::facet_grid(cut(threshold_k,10) ~ Ncomp)+
|
618
|
0
|
ggplot2::theme_bw()
|
619
|
|
} else {
|
620
|
0
|
ggplot2::ggplot(df_grid, ggplot2::aes_string(x="KeepJ", y="CVE"))+ggplot2::geom_point()+ggplot2::geom_line()+ggplot2::facet_grid(KeepK ~ Ncomp)+
|
621
|
0
|
ggplot2::theme_bw()
|
622
|
|
}
|
623
|
|
}
|
624
|
|
|
625
|
|
#' Coefficients from a sNPLS model
|
626
|
|
#'
|
627
|
|
#' @description Extract coefficients from a sNPLS model
|
628
|
|
#' @param object A sNPLS model fit
|
629
|
|
#' @param as.matrix Should the coefficients be presented as matrix or vector?
|
630
|
|
#' @param ... Further arguments passed to \code{coef}
|
631
|
|
#' @return A matrix (or vector) of coefficients
|
632
|
|
#' @export
|
633
|
|
coef.sNPLS <- function(object, as.matrix = FALSE, ...) {
|
634
|
0
|
R <- Rmatrix(object)
|
635
|
0
|
Bnpls <- R %*% object$B %*% t(object$Q)
|
636
|
0
|
colnames(Bnpls) <- paste("Estimate", colnames(Bnpls))
|
637
|
0
|
if (as.matrix){
|
638
|
0
|
dim(Bnpls) <- c(dim(object$Wj)[1], dim(object$Wk)[1], dim(Bnpls)[2])
|
639
|
0
|
rownames(Bnpls) <- rownames(object$Wj)
|
640
|
0
|
colnames(Bnpls) <- rownames(object$Wk)
|
641
|
|
}
|
642
|
0
|
return(Bnpls)
|
643
|
|
}
|
644
|
|
|
645
|
|
#' Repeated cross-validation for sNPLS models
|
646
|
|
#'
|
647
|
|
#' @description Performs repeated cross-validatiodn and represents results in a plot
|
648
|
|
#' @param X_npls A three-way array containing the predictors.
|
649
|
|
#' @param Y_npls A matrix containing the response.
|
650
|
|
#' @param ncomp A vector with the different number of components to test
|
651
|
|
#' @param samples Number of samples for performing random search in continuous thresholding
|
652
|
|
#' @param keepJ A vector with the different number of selected variables to test in discrete thresholding
|
653
|
|
#' @param keepK A vector with the different number of selected 'times' to test in discrete thresholding
|
654
|
|
#' @param nfold Number of folds for the cross-validation
|
655
|
|
#' @param times Number of repetitions of the cross-validation
|
656
|
|
#' @param parallel Should the computations be performed in parallel? Set up strategy first with \code{future::plan()}
|
657
|
|
#' @param method Select between sNPLS, sNPLS-SR or sNPLS-VIP
|
658
|
|
#' @param ... Further arguments passed to cv_snpls
|
659
|
|
#' @return A density plot with the results of the cross-validation and an (invisible) \code{data.frame} with these results
|
660
|
|
#' @importFrom stats var
|
661
|
|
#' @export
|
662
|
|
repeat_cv <- function(X_npls, Y_npls, ncomp = 1:3, samples=20, keepJ=NULL, keepK=NULL, nfold = 10, times=30, parallel = TRUE, method="sNPLS", ...){
|
663
|
0
|
if(!method %in% c("sNPLS", "sNPLS-SR", "sNPLS-VIP")) stop("'method' not recognized")
|
664
|
0
|
if(parallel) message("Your parallel configuration is ", attr(future::plan(), "class")[3])
|
665
|
0
|
if(is.null(keepJ) | is.null(keepK)){
|
666
|
0
|
cont_thresholding <- TRUE
|
667
|
0
|
message("Using continuous thresholding")
|
668
|
|
} else {
|
669
|
0
|
cont_thresholding <- FALSE
|
670
|
0
|
message("Using discrete thresholding")
|
671
|
|
}
|
672
|
0
|
if(parallel){
|
673
|
0
|
rep_cv<-future.apply::future_sapply(1:times, function(x) suppressMessages(cv_snpls(X_npls, Y_npls, ncomp=ncomp, parallel = FALSE, nfold = nfold, samples=samples, keepJ=keepJ, keepK=keepK, method=method, ...)), future.seed=TRUE)
|
674
|
|
} else {
|
675
|
0
|
rep_cv<-pbapply::pbreplicate(times, suppressMessages(cv_snpls(X_npls, Y_npls, ncomp=ncomp, parallel = FALSE, nfold = nfold, samples=samples, keepJ=keepJ, keepK=keepK, method=method, ...)))
|
676
|
|
}
|
677
|
0
|
resdata<-data.frame(ncomp=sapply(rep_cv[1,], function(x) x[[1]]), threshold_j=sapply(rep_cv[1,], function(x) x[[2]]),
|
678
|
0
|
threshold_k=sapply(rep_cv[1,], function(x) x[[3]]))
|
679
|
0
|
if(!cont_thresholding) names(resdata)[c(2, 3)] <- c("keepJ", "keepK")
|
680
|
0
|
class(resdata)<-c("repeatcv", "data.frame")
|
681
|
0
|
return(resdata)
|
682
|
|
}
|
683
|
|
|
684
|
|
#' Density plot for repeat_cv results
|
685
|
|
#'
|
686
|
|
#' @description Plots a grid of slices from the 3-D kernel denity estimates of the repeat_cv function
|
687
|
|
#' @param x A repeatcv object
|
688
|
|
#' @param ... Further arguments passed to plot
|
689
|
|
#' @return A grid of slices from a 3-D density plot of the results of the repeated cross-validation
|
690
|
|
#' @importFrom grDevices colorRampPalette
|
691
|
|
#' @importFrom stats ftable density setNames
|
692
|
|
#' @export
|
693
|
|
plot.repeatcv <- function(x, ...){
|
694
|
0
|
x.old <- x
|
695
|
0
|
x<-x[,sapply(x, function(x) var(x)>0), drop=FALSE]
|
696
|
0
|
if(ncol(x) < ncol(x.old)) warning(paste("\n", colnames(x.old)[!colnames(x.old) %in% colnames(x)], "is constant at", x.old[1,colnames(x.old)[!colnames(x.old) %in% colnames(x)]]))
|
697
|
0
|
if(ncol(x) == 1){
|
698
|
0
|
densities <- density(x[,1])
|
699
|
0
|
df_grid <- setNames(data.frame(densities$x, densities$y), c(colnames(x), "density"))
|
700
|
0
|
p <- ggplot2::ggplot(x, ggplot2::aes_string(x=colnames(x)))+ggplot2::geom_density(color="gray", fill="gray", alpha=0.3)+ggplot2::theme_classic()
|
701
|
|
} else{
|
702
|
0
|
H.pi <- ks::Hpi(x)
|
703
|
0
|
fhat <- ks::kde(x, H=H.pi, compute.cont=TRUE, gridsize = rep(151, ncol(x)))
|
704
|
0
|
if(ncol(x) == 3){
|
705
|
0
|
ncomp_values <- sapply(sort(unique(fhat$x[,1])), function(x) which.min(abs(fhat$eval.points[[1]]-x)))
|
706
|
0
|
positions <- as.data.frame(fhat$x)
|
707
|
0
|
positions$Ncomp <- factor(positions$ncomp)
|
708
|
0
|
df_grid <- setNames(expand.grid(fhat$eval.points[[2]], fhat$eval.points[[3]]), names(positions)[c(2,3)])
|
709
|
0
|
combl <- nrow(df_grid)
|
710
|
0
|
df_grid <- df_grid[rep(1:nrow(df_grid), length(ncomp_values)),]
|
711
|
0
|
df_grid$density <- unlist(lapply(ncomp_values, function(x) as.numeric(matrix(fhat$estimate[x,,], ncol=1))))
|
712
|
0
|
df_grid$Ncomp <- factor(rep(sort(unique(positions$ncomp)), each=combl))
|
713
|
0
|
p <- ggplot2::ggplot(df_grid, ggplot2::aes_string(names(df_grid)[1], names(df_grid)[2], fill="density"))+ggplot2::geom_raster()+
|
714
|
0
|
ggplot2::scale_fill_gradientn(colours =colorRampPalette(c("white", "blue", "red"))(10))+ggplot2::theme_classic()+
|
715
|
0
|
ggplot2::geom_count(inherit.aes = FALSE, ggplot2::aes_string(x=names(df_grid)[1], y=names(df_grid)[2]), data=positions) +ggplot2::facet_grid(~Ncomp)
|
716
|
0
|
if(names(df_grid)[1] == "threshold_j"){
|
717
|
0
|
p <- p + ggplot2::scale_x_continuous(limits=c(0, 1)) + ggplot2::scale_y_continuous(limits=c(0, 1))
|
718
|
|
} else {
|
719
|
0
|
p <- p + ggplot2::scale_x_continuous(breaks=if(round(diff(range(df_grid$keepJ)))<=10) round(max(0, min(df_grid$keepJ)):max(df_grid$keepJ)) else round(seq(max(0, min(df_grid$keepJ)), max(df_grid$keepJ), by= ceiling(max(df_grid$keepJ)/20)*2)))+
|
720
|
0
|
ggplot2::scale_y_continuous(breaks=if(round(diff(range(df_grid$keepK)))<=10) round(max(0, min(df_grid$keepK)):max(df_grid$keepK)) else round(seq(max(0, min(df_grid$keepK)), max(df_grid$keepK), by= ceiling(max(df_grid$keepK)/20)*2)))+
|
721
|
0
|
ggplot2::scale_size_area(breaks=if(length(sort(unique(as.numeric(ftable(positions))))[-1])<=7) sort(unique(as.numeric(ftable(positions))))[-1] else (1:max(as.numeric(ftable(positions))))[round(seq(1, max(as.numeric(ftable(positions))), length.out = 7))],
|
722
|
0
|
labels=if(length(sort(unique(as.numeric(ftable(positions))))[-1])<=7) sort(unique(as.numeric(ftable(positions))))[-1] else (1:max(as.numeric(ftable(positions))))[round(seq(1, max(as.numeric(ftable(positions))), length.out = 7))])
|
723
|
|
}
|
724
|
|
} else {
|
725
|
0
|
positions <- as.data.frame(fhat$x)
|
726
|
0
|
df_grid <- expand.grid(V1=fhat$eval.points[[1]], V2=fhat$eval.points[[2]])
|
727
|
0
|
names(df_grid)<-colnames(positions)
|
728
|
0
|
df_grid$density <- as.numeric(matrix(fhat$estimate, ncol=1))
|
729
|
0
|
p <- ggplot2::ggplot(df_grid, ggplot2::aes_string(colnames(df_grid)[1], colnames(df_grid)[2], fill="density"))+ggplot2::geom_raster()+
|
730
|
0
|
ggplot2::scale_fill_gradientn(colours =colorRampPalette(c("white", "blue", "red"))(10))+ggplot2::theme_classic()+
|
731
|
0
|
ggplot2::geom_count(inherit.aes = FALSE, ggplot2::aes_string(x=colnames(df_grid)[1], y=colnames(df_grid)[2]), data=positions)
|
732
|
0
|
if("threshold_j" %in% names(df_grid) | "threshold_k" %in% names(df_grid)){
|
733
|
0
|
p <- p + ggplot2::scale_x_continuous(limits=c(0, 1)) + ggplot2::scale_y_continuous(limits=c(0, 1))
|
734
|
|
} else {
|
735
|
0
|
p <- p + ggplot2::scale_x_continuous(breaks=if(round(diff(range(df_grid[,1])))<=10) round(max(0, min(df_grid[,1])):max(df_grid[,1])) else round(seq(max(0, min(df_grid[,1])), max(df_grid[,1]), by= ceiling(max(df_grid[,1])/20)*2)))+
|
736
|
0
|
ggplot2::scale_y_continuous(breaks=if(round(diff(range(df_grid[,2])))<=10) round(max(0, min(df_grid[,2])):max(df_grid[,2])) else round(seq(max(0, min(df_grid[,2])), max(df_grid[,2]), by= ceiling(max(df_grid[,2])/20)*2)))+
|
737
|
0
|
ggplot2::scale_size_continuous(breaks=if(length(sort(unique(as.numeric(ftable(positions))))[-1])<=7) sort(unique(as.numeric(ftable(positions))))[-1] else (1:max(as.numeric(ftable(positions))))[round(seq(1, max(as.numeric(ftable(positions))), length.out = 7))],
|
738
|
0
|
labels=if(length(sort(unique(as.numeric(ftable(positions))))[-1])<=7) sort(unique(as.numeric(ftable(positions))))[-1] else (1:max(as.numeric(ftable(positions))))[round(seq(1, max(as.numeric(ftable(positions))), length.out = 7))])
|
739
|
|
}
|
740
|
|
}
|
741
|
|
}
|
742
|
0
|
print(p)
|
743
|
0
|
data.frame(x.old[1, !colnames(x.old) %in% colnames(x), drop=FALSE], df_grid[which.max(df_grid$density),])
|
744
|
|
}
|
745
|
|
|
746
|
|
#' Summary for sNPLS models
|
747
|
|
#'
|
748
|
|
#' @description Summary of a sNPLS model fit
|
749
|
|
#' @param object A sNPLS object
|
750
|
|
#' @param ... Further arguments passed to summary.default
|
751
|
|
#' @return A summary inclunding number of components, squared error and coefficients of the fitted model
|
752
|
|
#' @importFrom stats coef
|
753
|
|
#' @export
|
754
|
|
summary.sNPLS <- function(object, ...){
|
755
|
0
|
cat(object$Method, "model with", object$ncomp, "components and squared error of", round(object$SqrdE,3), "\n", "\n")
|
756
|
0
|
cat("Coefficients: \n")
|
757
|
0
|
round(coef(object, as.matrix=TRUE), 3)
|
758
|
|
}
|
759
|
|
|
760
|
|
#' Compute Selectivity Ratio for a sNPLS model
|
761
|
|
#'
|
762
|
|
#' @description Estimates Selectivity Ratio for the different components of a sNPLS model fit
|
763
|
|
#' @param model A sNPLS model
|
764
|
|
#' @return A list of data.frames, each of them including the computed Selectivity Ratios for each variable
|
765
|
|
#' @export
|
766
|
|
SR <- function(model){
|
767
|
0
|
output <- lapply(1:model$ncomp, function(f){
|
768
|
0
|
Xres <- model$Xd - model$T[,1:f, drop=FALSE] %*% t(model$P[[f]])
|
769
|
0
|
Xpred <- model$T[,1:f, drop=FALSE] %*% t(model$P[[f]])
|
770
|
0
|
SSexp <- Xpred^2
|
771
|
0
|
SSres <- (model$Xd-Xpred)^2
|
772
|
0
|
SSexp_cube <- array(SSexp, dim=c(nrow(SSexp), nrow(model$Wj), nrow(model$Wk)))
|
773
|
0
|
SSres_cube <- array(SSres, dim=c(nrow(SSexp), nrow(model$Wj), nrow(model$Wk)))
|
774
|
0
|
SR_k <- sapply(1:nrow(model$Wk), function(x) sum(SSexp_cube[,,x])/sum(SSres_cube[,,x]))
|
775
|
0
|
SR_j <- sapply(1:nrow(model$Wj), function(x) sum(SSexp_cube[,x,])/sum(SSres_cube[,x,]))
|
776
|
0
|
list(SR_k=round(SR_k,3),
|
777
|
0
|
SR_j=round(SR_j,3))
|
778
|
|
})
|
779
|
0
|
SR_j <- do.call("cbind", sapply(output, function(x) x[2]))
|
780
|
0
|
SR_k <- do.call("cbind", sapply(output, function(x) x[1]))
|
781
|
0
|
rownames(SR_j) <- rownames(model$Wj)
|
782
|
0
|
rownames(SR_k) <- rownames(model$Wk)
|
783
|
0
|
colnames(SR_j) <- colnames(SR_k) <- paste("Component ", 1:model$ncomp, sep="")
|
784
|
0
|
list(SR_j=SR_j, SR_k=SR_k)
|
785
|
|
}
|