David-Hervas / sNPLS
1
#' Fit a sNPLS model
2
#'
3
#' @description Fits a N-PLS regression model imposing sparsity on \code{wj} and \code{wk} matrices
4
#' @param XN A three-way array containing the predictors.
5
#' @param Y A matrix containing the response.
6
#' @param ncomp Number of components in the projection
7
#' @param threshold_j Threshold value on Wj. Scaled between [0, 1)
8
#' @param threshold_k Threshold value on Wk. scaled between [0, 1)
9
#' @param keepJ Number of variables to keep for each component, ignored if threshold_j is provided
10
#' @param keepK Number of 'times' to keep for each component, ignored if threshold_k is provided
11
#' @param scale.X Perform unit variance scaling on X?
12
#' @param center.X Perform mean centering on X?
13
#' @param scale.Y Perform unit variance scaling on Y?
14
#' @param center.Y Perform mean centering on Y?
15
#' @param conver Convergence criterion
16
#' @param max.iteration Maximum number of iterations
17
#' @param silent Show output?
18
#' @param method Select between L1 penalization (sNPLS), variable selection with Selectivity Ratio (sNPLS-SR) or variable selection with VIP (sNPLS-VIP)
19
#' @return A fitted sNPLS model
20
#' @references C. A. Andersson and R. Bro. The N-way Toolbox for MATLAB Chemometrics & Intelligent Laboratory Systems. 52 (1):1-4, 2000.
21
#' @references Hervas, D. Prats-Montalban, J. M., Garcia-Cañaveras, J. C., Lahoz, A., & Ferrer, A. (2019). Sparse N-way partial least squares by L1-penalization. Chemometrics and Intelligent Laboratory Systems, 185, 85-91.
22
#' @examples
23
#' X_npls<-array(rpois(7500, 10), dim=c(50, 50, 3))
24
#'
25
#' Y_npls <- matrix(2+0.4*X_npls[,5,1]+0.7*X_npls[,10,1]-0.9*X_npls[,15,1]+
26
#' 0.6*X_npls[,20,1]- 0.5*X_npls[,25,1]+rnorm(50), ncol=1)
27
#' #Discrete thresholding
28
#' fit <- sNPLS(X_npls, Y_npls, ncomp=3, keepJ = rep(2,3) , keepK = rep(1,3))
29
#' #Continuous thresholding
30
#' fit2 <- sNPLS(X_npls, Y_npls, ncomp=3, threshold_j=0.5, threshold_k=0.5)
31
#' #USe sNPLS-SR method
32
#' fit3 <- sNPLS(X_npls, Y_npls, ncomp=3, threshold_j=0.5, threshold_k=0.5, method="sNPLS-SR")
33
#' @importFrom stats sd
34
#' @export
35
sNPLS <- function(XN, Y, ncomp = 2, threshold_j=0.5, threshold_k=0.5, keepJ = NULL, keepK = NULL, scale.X=TRUE, center.X=TRUE,
36
         scale.Y=TRUE, center.Y=TRUE, conver = 1e-16, max.iteration = 10000, silent = F, method="sNPLS"){
37

38 0
  mynorm <- function(x) sqrt(sum(diag(crossprod(x))))
39 0
  thresholding <- function(x, nj) {
40 0
    ifelse(abs(x) > abs(x[order(abs(x))][nj]),
41 0
           (abs(x) - abs(x[order(abs(x))][nj])) *
42 0
             sign(x), 0)
43
  }
44 0
  rel_thresholding <- function(x, j_rel){
45 0
    ifelse(abs(x)-max(abs(x))*j_rel <= 0, 0, sign(x)*(abs(x)-max(abs(x))*j_rel))
46
  }
47 0
  if(!method %in% c("sNPLS", "sNPLS-SR", "sNPLS-VIP")) stop("'method' not recognized")
48 0
  if (length(dim(Y)) == 3) Y <- unfold3w(Y)
49 0
  if (length(dim(XN)) != 3) stop("'XN' is not a three-way array")
50 0
  if (!is.null(rownames(XN))){
51 0
    y.names <- x.names <- rownames(XN)
52
  } else {
53 0
    y.names <- x.names <- 1:dim(XN)[1]
54
  }
55 0
  if (!is.null(colnames(XN))){
56 0
    var.names <- colnames(XN)
57
  } else {
58 0
    var.names <- paste("X.", 1:dim(XN)[2], sep = "")
59
  }
60 0
  if (!is.null(dimnames(XN)[[3]])){
61 0
    x3d.names <- dimnames(XN)[[3]]
62
  } else {
63 0
    x3d.names <- paste("Z.", 1:dim(XN)[3], sep = "")
64
  }
65 0
  if (!is.null(colnames(Y))){
66 0
    yvar.names <- colnames(Y)
67
  } else {
68 0
    yvar.names <- paste("Y.", 1:dim(Y)[2], sep = "")
69
  }
70 0
  if(!center.X) center.X <- rep(0, ncol(XN)*dim(XN)[3])
71 0
  if(!center.Y) center.Y <- rep(0, ncol(Y))
72 0
  if(!scale.X) scale.X <- rep(1, ncol(XN)*dim(XN)[3])
73 0
  if(!scale.Y) scale.Y <- rep(1, ncol(Y))
74 0
  if(is.null(keepJ) | is.null(keepK) | method!="sNPLS"){
75 0
    cont_thresholding <- TRUE
76 0
    message(paste("Using continuous thresholding (", method, ")", sep=""))
77
  } else {
78 0
    cont_thresholding <- FALSE
79 0
    message("Using discrete L1-thresholding")
80
  }
81 0
  if(length(threshold_j) == 1 & ncomp > 1) threshold_j <- rep(threshold_j, ncomp)
82 0
  if(length(threshold_k) == 1 & ncomp > 1) threshold_k <- rep(threshold_k, ncomp)
83

84
  # Matrices initialization
85 0
  U <- Q <- X <- P <- NULL
86 0
  if(method == "sNPLS"){
87 0
    WsupraJ <- WsupraK <- Tm <- NULL
88
  } else{
89 0
    WsupraJ <- matrix(nrow=ncol(XN), ncol=ncomp)
90 0
    WsupraK <- matrix(nrow=dim(XN)[3], ncol=ncomp)
91 0
    Tm <- matrix(nrow=nrow(XN), ncol=ncomp)
92
  }
93 0
  Yorig <- Y
94 0
  Y <- scale(Y, center = center.Y, scale = scale.Y)
95 0
  y_center <- attr(Y, "scaled:center")
96 0
  y_scale <- attr(Y, "scaled:scale")
97 0
  B <- matrix(0, ncol = ncomp, nrow = ncomp)
98 0
  Gu <- vector("list", ncomp)
99 0
  S <- svd(Y)$d
100 0
  u <- Y[, S == max(S)]  #Column with the highest variance
101

102
  # Unfolding of XN en 2-D
103 0
  X <- unfold3w(XN)
104
  #Check for zero variance columns and fix them with some noise
105 0
  if(any(apply(X, 2, sd)==0)){
106 0
    X[,apply(X, 2, sd)==0] <- apply(X[,apply(X, 2, sd)==0, drop=FALSE], 2, function(x) jitter(x))
107
  }
108

109
  # Center and scale
110 0
  Xd <- scale(X, center = center.X, scale = scale.X)
111 0
  x_center <- attr(Xd, "scaled:center")
112 0
  x_scale <- attr(Xd, "scaled:scale")
113

114
  # Main loop for each component
115 0
  for (f in 1:ncomp) {
116 0
    if(!cont_thresholding){
117 0
      nj <- ncol(XN) - keepJ[f]
118 0
      nk <- dim(XN)[3] - keepK[f]
119
    }
120 0
    if(method %in% c("sNPLS-VIP", "sNPLS-SR")){
121 0
      nj <- ncol(XN)
122 0
      nk <- dim(XN)[3]
123 0
      wselj <- rep(1, nj)
124 0
      wselk <- rep(1, nk)
125
    }
126 0
    it = 1
127 0
    while (it < max.iteration) {
128 0
      Zrow <- crossprod(u, Xd)
129 0
      Z <- matrix(Zrow, nrow = dim(XN)[2], ncol = dim(XN)[3])
130 0
      svd.z <- svd(Z)
131 0
      wsupraj <- svd.z$u[, 1]
132
      # L1 penalization for wsupraj
133 0
      if(method == "sNPLS"){
134 0
        if(cont_thresholding){
135 0
          wsupraj <- rel_thresholding(wsupraj, threshold_j[f])
136
        } else {
137 0
          if (nj != 0) {
138 0
            wsupraj <- thresholding(wsupraj, nj)
139
          }
140
        }
141
      }
142

143
      ##########
144 0
      wsuprak <- svd.z$v[, 1]
145
      # L1 penalization for wsuprak
146 0
      if(method == "sNPLS"){
147 0
        if(cont_thresholding){
148 0
          wsuprak <- rel_thresholding(wsuprak, threshold_k[f])
149
        } else {
150 0
          if (nk != 0) {
151 0
            wsuprak <- thresholding(wsuprak, nk)
152
          }
153
        }
154
      }
155

156 0
      if(method %in% c("sNPLS-VIP", "sNPLS-SR")){
157 0
        W <- kronecker(wsuprak, wsupraj) *  kronecker(wselk, wselj)
158 0
        tf <- Xd %*% W
159 0
        qf <- crossprod(Y, tf)/mynorm(crossprod(Y, tf))
160 0
        uf <- Y %*% qf
161 0
        Tm[,f] <- tf
162 0
        Q <- cbind(Q, qf)
163 0
        WsupraJ[,f] <- wsupraj
164 0
        WsupraK[,f] <- wsuprak
165
      }
166

167 0
      if(method == "sNPLS-VIP"){
168
        #VIPj
169 0
        SCE <- sum(sum(Tm[,1:f, drop=FALSE] %*% t(Q[,1:f, drop=FALSE])^2))
170 0
        VIPj <- sqrt(nrow(WsupraJ)*((WsupraJ[,f, drop=FALSE]^2)*SCE)/sum(SCE))
171 0
        VIPj_01 <- (VIPj-min(VIPj))/(max(VIPj)-min(VIPj))
172

173
        #VIPk
174 0
        SCE <- sum(sum(Tm[,1:f, drop=FALSE] %*% t(Q[,1:f, drop=FALSE])^2))
175 0
        VIPk <- sqrt(nrow(WsupraK)*((WsupraK[,f, drop=FALSE]^2)*SCE)/sum(SCE))
176 0
        VIPk_01 <- (VIPk-min(VIPk))/(max(VIPk)-min(VIPk))
177

178 0
        wselk <- as.numeric(VIPk_01 > threshold_k[f])
179 0
        wselj <- as.numeric(VIPj_01 > threshold_j[f])
180
      }
181

182 0
      if(method == "sNPLS-SR"){
183 0
        TM <- MASS::ginv(crossprod(Tm[,1:f, drop=FALSE])) %*% t(Tm[,1:f, drop=FALSE])
184 0
        WkM <- MASS::ginv(crossprod(WsupraK[,1:f, drop=FALSE])) %*% t(WsupraK[,1:f, drop=FALSE])
185 0
        WjM <- MASS::ginv(crossprod(WsupraJ[,1:f, drop=FALSE])) %*% t(WsupraJ[,1:f, drop=FALSE])
186 0
        Gu[[f]] <- TM %*% X %*% kronecker(t(WkM), t(WjM))
187
        #SR
188 0
        P[[f]] = t(as.matrix(Gu[[f]]) %*% t(kronecker(WsupraK[,1:f, drop=FALSE], WsupraJ[,1:f, drop=FALSE])))
189 0
        Xres <- Xd - Tm[,1:f, drop=FALSE] %*% t(P[[f]])
190 0
        xpred <- Tm[,1:f, drop=FALSE] %*% t(P[[f]])
191 0
        SSexp <- xpred^2
192 0
        SSres <- (Xd-xpred)^2
193 0
        SSexp_cube <- array(SSexp, dim=c(nrow(SSexp), nj, nk))
194 0
        SSres_cube <- array(SSres, dim=c(nrow(SSexp), nj, nk))
195 0
        SR_k <- numeric(nk)
196 0
        for(k in 1:nk){
197 0
          SR_k[k] <- sum(SSexp_cube[,,k])/sum(SSres_cube[,,k])
198
        }
199 0
        SR_k_01 <- (SR_k-min(SR_k))/(max(SR_k)-min(SR_k))
200 0
        SR_j <- numeric(nj)
201 0
        for(j in 1:nj){
202 0
          SR_j[j] <- sum(SSexp_cube[,j,])/sum(SSres_cube[,j,])
203
        }
204 0
        SR_j_01 <- (SR_j-min(SR_j))/(max(SR_j)-min(SR_j))
205 0
        wselj <- rep(1, nj)
206 0
        wselk <- rep(1, nk)
207 0
        PFcalck <- SR_k_01
208 0
        wselk <- as.numeric(PFcalck > threshold_k[f])
209 0
        PFcalcj <- SR_j_01
210 0
        wselj <- as.numeric(PFcalcj > threshold_j[f])
211
      }
212

213
      ##########
214 0
      if(method == "sNPLS"){
215 0
        tf <- Xd %*% kronecker(wsuprak, wsupraj)
216 0
        qf <- crossprod(Y, tf)/mynorm(crossprod(Y, tf))
217 0
        uf <- Y %*% qf
218
      }
219 0
      if (sum((uf - u)^2) < conver) {
220 0
        if (!silent) {
221 0
          cat(paste("Component number ", f, "\n"))
222 0
          cat(paste("Number of iterations: ", it, "\n"))
223
        }
224 0
        it <- max.iteration
225 0
        if(method == "sNPLS"){
226 0
          Tm <- cbind(Tm, tf)
227 0
          WsupraJ <- cbind(WsupraJ, wsupraj)
228 0
          WsupraK <- cbind(WsupraK, wsuprak)
229 0
          bf <- MASS::ginv(crossprod(Tm)) %*% t(Tm) %*% uf
230 0
          TM <- MASS::ginv(crossprod(Tm)) %*% t(Tm)
231 0
          Q <- cbind(Q, qf)
232
        } else {
233 0
          Tm[,f] <- tf
234 0
          WsupraJ[,f] <- wsupraj*wselj
235 0
          WsupraK[,f] <- wsuprak*wselk
236 0
          bf <- MASS::ginv(crossprod(Tm[,1:f, drop=FALSE])) %*% t(Tm[,1:f, drop=FALSE]) %*% uf
237
        }
238 0
        B[1:length(bf), f] <- bf
239 0
        U <- cbind(U, uf)
240 0
        if(method == "sNPLS-VIP"){
241 0
          TM <- MASS::ginv(crossprod(Tm[,1:f, drop=FALSE])) %*% t(Tm[,1:f, drop=FALSE])
242 0
          WkM <- MASS::ginv(crossprod(WsupraK[,1:f, drop=FALSE])) %*% t(WsupraK[,1:f, drop=FALSE])
243 0
          WjM <- MASS::ginv(crossprod(WsupraJ[,1:f, drop=FALSE])) %*% t(WsupraJ[,1:f, drop=FALSE])
244 0
          Gu[[f]] <- TM %*% X %*% kronecker(t(WkM), t(WjM))
245 0
          P[[f]] = t(as.matrix(Gu[[f]]) %*% t(kronecker(WsupraK[,1:f, drop=FALSE], WsupraJ[,1:f, drop=FALSE])))
246
        }
247 0
        if(method == "sNPLS"){
248 0
          TM <- MASS::ginv(crossprod(Tm)) %*% t(Tm)
249 0
          WkM <- MASS::ginv(crossprod(WsupraK)) %*% t(WsupraK)
250 0
          WjM <- MASS::ginv(crossprod(WsupraJ)) %*% t(WsupraJ)
251 0
          Gu[[f]] <- TM %*% X %*% kronecker(t(WkM), t(WjM))
252 0
          P[[f]] = t(as.matrix(Gu[[f]]) %*% t(kronecker(WsupraK, WsupraJ)))
253
        }
254

255 0
        if(method == "sNPLS"){
256 0
          Y <- Y - Tm %*% bf %*% t(qf)
257
        } else {
258 0
          Y <- Y - Tm[,1:f, drop=FALSE] %*% bf %*% t(qf)
259
        }
260 0
        S <- svd(Y)$d
261 0
        u <- Y[, S == max(S)]
262
      } else {
263 0
        u <- uf
264 0
        it <- it + 1
265
      }
266
    }
267
  }
268 0
  Yadjsc <- Tm %*% B %*% t(Q)
269 0
  Yadj <- Yadjsc * y_scale + y_center
270 0
  SqrdE <- sum((Yorig - Yadj)^2)
271 0
  rownames(WsupraJ) <- var.names
272 0
  rownames(WsupraK) <- x3d.names
273 0
  rownames(Q) <- yvar.names
274 0
  rownames(Tm) <- rownames(U) <- x.names
275 0
  colnames(Tm) <- colnames(WsupraJ) <- colnames(WsupraK) <- colnames(B) <-
276 0
    colnames(U) <- colnames(Q) <- names(Gu) <- names(P) <- paste("Comp.", 1:ncomp)
277 0
  output <- list(T = Tm, Wj = WsupraJ, Wk = WsupraK, B = B, U = U, Q = Q, P = P,
278 0
                 Gu = Gu, ncomp = ncomp, Xd=Xd, Yorig = Yorig, Yadj = Yadj, SqrdE = SqrdE,
279 0
                 Standarization = list(ScaleX = x_scale, CenterX = x_center,
280 0
                                       ScaleY = y_scale, CenterY = y_center),
281 0
                 Method = method)
282 0
  class(output)<-"sNPLS"
283 0
  return(output)
284
}
285

286
#' R-matrix from a sNPLS model fit
287
#'
288
#' @description Builds the R-matrix from a sNPLS model fit
289
#' @param x A sNPLS model obtained from \code{sNPLS}
290
#' @return Returns the R-matrix of the model, needed to compute the coefficients
291
Rmatrix<-function(x) {
292 0
  WsupraK <- x$Wk
293 0
  WsupraJ <- x$Wj
294 0
  R <- matrix(nrow = dim(x$Wj)[1] * dim(x$Wk)[1], ncol = x$ncomp)
295 0
  ncomp <- x$ncomp
296 0
  kroneckers<-sapply(1:x$ncomp, function(x) kronecker(WsupraK[, x], WsupraJ[, x]))
297 0
  tkroneckers<-apply(kroneckers, 2, function(x) t(x))
298 0
  R[,1] <- kroneckers[,1]
299 0
  if(ncomp>1){
300 0
    for(i in 2:ncomp){
301 0
      pi <- pi0 <- Matrix::Matrix(diag(dim(R)[1]), sparse=TRUE)
302 0
      for (j in 1:(i - 1)) {
303 0
        pi <- Matrix::Matrix(pi %*% pi0 - kroneckers[,j] %*% t(tkroneckers[,j]), sparse=TRUE)
304
      }
305 0
      w <- kroneckers[, i]
306 0
      pi <- pi %*% w
307 0
      R[, i] <- Matrix::as.matrix(pi)
308
    }
309
  }
310 0
  return(R)
311
}
312

313
#' Unfolding of three-way arrays
314
#'
315
#' @description Unfolds a three-way array into a matrix
316
#' @param x A three-way array
317
#' @return Returns a matrix with dimensions \code{dim(x)[1] x dim(x)[2]*dim(x([3]))}
318
unfold3w <- function(x) {
319 0
    dim(x) <- c(dim(x)[1], dim(x)[2] * dim(x)[3])
320 0
    return(x)
321
}
322

323
#' Cross-validation for a sNPLS model
324
#'
325
#' @description Performs cross-validation for a sNPLS model
326
#' @param X_npls A three-way array containing the predictors.
327
#' @param Y_npls A matrix containing the response.
328
#' @param ncomp A vector with the different number of components to test
329
#' @param samples Number of samples for performing random search in continuous thresholding
330
#' @param keepJ A vector with the different number of selected variables to test for discrete thresholding
331
#' @param keepK A vector with the different number of selected 'times' to test for discrete thresholding
332
#' @param nfold Number of folds for the cross-validation
333
#' @param parallel Should the computations be performed in parallel? Set up strategy first with \code{future::plan()}
334
#' @param method Select between sNPLS, sNPLS-SR or sNPLS-VIP
335
#' @param metric Select between RMSE or AUC (for 0/1 response)
336
#' @param ... Further arguments passed to sNPLS
337
#' @return A list with the best parameters for the model and the CV error
338
#' @examples
339
#' \dontrun{
340
#' X_npls<-array(rpois(7500, 10), dim=c(50, 50, 3))
341
#'
342
#' Y_npls<-matrix(2+0.4*X_npls[,5,1]+0.7*X_npls[,10,1]-0.9*X_npls[,15,1]+
343
#' 0.6*X_npls[,20,1]- 0.5*X_npls[,25,1]+rnorm(50), ncol=1)
344
#' #Grid search for discrete thresholding
345
#' cv1<- cv_snpls(X_npls, Y_npls, ncomp=1:2, keepJ = 1:3, keepK = 1:2, parallel = FALSE)
346
#' #Random search for continuous thresholding
347
#' cv2<- cv_snpls(X_npls, Y_npls, ncomp=1:2, samples=20, parallel = FALSE)
348
#' }
349
#' @importFrom stats runif
350
#' @export
351
cv_snpls <- function(X_npls, Y_npls, ncomp = 1:3, samples=20,
352
                     keepJ = NULL, keepK = NULL, nfold = 10, parallel = TRUE,  method="sNPLS", metric="RMSE", ...) {
353

354 0
  if(parallel) message("Your parallel configuration is ", attr(future::plan(), "class")[3])
355 0
  if(!method %in% c("sNPLS", "sNPLS-SR", "sNPLS-VIP")) stop("'method' not recognized")
356 0
  if(length(dim(Y_npls)) == 3) Y_npls <- unfold3w(Y_npls)
357 0
  top <- ceiling(dim(X_npls)[1]/nfold)
358 0
  foldid <- sample(rep(1:nfold, top), dim(X_npls)[1], replace = F)
359 0
  if(is.null(keepJ) | is.null(keepK)){
360 0
    cont_thresholding <- TRUE
361 0
    message("Using continuous thresholding")
362
  } else {
363 0
    cont_thresholding <- FALSE
364 0
    message("Using discrete thresholding")
365
  }
366 0
  if(cont_thresholding){
367 0
    search.grid <- expand.grid(list(ncomp = ncomp, threshold_j=runif(samples),
368 0
                                    threshold_k=runif(samples)))
369
  } else {
370 0
    search.grid <- expand.grid(list(ncomp = ncomp, keepJ = keepJ,
371 0
                                    keepK = keepK))
372
  }
373 0
  SqrdE <- numeric()
374 0
  applied_fun <- function(y) {
375 0
    sapply(1:nfold, function(x) {
376 0
      suppressMessages(tryCatch(do.call(cv_fit, c(list(xtrain = X_npls[x != foldid, , ],
377 0
                                      ytrain = Y_npls[x != foldid, , drop = FALSE],
378 0
                                      xval = X_npls[x == foldid, , ],
379 0
                                      yval = Y_npls[x == foldid, , drop = FALSE],
380 0
                                      ncomp = y["ncomp"], method=method, metric=metric),
381 0
                                 list(threshold_j=y["threshold_j"])[cont_thresholding],
382 0
                                 list(threshold_k=y["threshold_k"])[cont_thresholding],
383 0
                                 list(keepJ=rep(y["keepJ"], y["ncomp"]))[!cont_thresholding],
384 0
                                 list(keepK=rep(y["keepK"], y["ncomp"]))[!cont_thresholding], ...)),
385 0
               error=function(x) NA))
386
    })
387
  }
388 0
  if (parallel) {
389 0
    cv_res <- future.apply::future_apply(search.grid, 1, applied_fun, future.seed=TRUE)
390
  } else {
391 0
    cv_res <- pbapply::pbapply(search.grid, 1, applied_fun)
392
  }
393 0
  cv_mean <- apply(cv_res, 2, function(x) mean(x, na.rm = TRUE))
394 0
  cv_se <- apply(cv_res, 2, function(x) sd(x, na.rm=TRUE)/sqrt(nfold))
395 0
  if(metric == "RMSE"){
396 0
    best_model <- search.grid[which.min(cv_mean), ]
397
  } else{
398 0
    best_model <- search.grid[which.max(cv_mean), ]
399
  }
400 0
  output <- list(best_parameters = best_model, cv_mean = cv_mean,
401 0
                 cv_se = cv_se, cv_grid = search.grid)
402 0
  class(output)<-"cvsNPLS"
403 0
  return(output)
404
}
405

406
#' Internal function for \code{cv_snpls}
407
#'
408
#' @param xtrain A three-way training array
409
#' @param ytrain A response training matrix
410
#' @param xval A three-way test array
411
#' @param yval A response test matrix
412
#' @param ncomp Number of components for the sNPLS model
413
#' @param threshold_j Threshold value on Wj. Scaled between [0, 1)
414
#' @param threshold_k Threshold value on Wk. Scaled between [0, 1)
415
#' @param keepJ Number of variables to keep for each component, ignored if threshold_j is provided
416
#' @param keepK Number of 'times' to keep for each component, ignored if threshold_k is provided
417
#' @param method Select between sNPLS, sNPLS-SR or sNPLS-VIP
418
#' @param metric Performance metric (RMSE or AUC)
419
#' @param ... Further arguments passed to sNPLS
420
#' @return Returns the CV root mean squared error or AUC
421
#' @importFrom stats predict
422
#' @export
423
cv_fit <- function(xtrain, ytrain, xval, yval, ncomp, threshold_j=NULL, threshold_k=NULL, keepJ=NULL, keepK=NULL,  method, metric, ...) {
424 0
  fit <- sNPLS(XN = xtrain, Y = ytrain, ncomp = ncomp, keepJ=keepJ, keepK=keepK, threshold_j = threshold_j,
425 0
                 threshold_k = threshold_k, silent = TRUE, method=method, ...)
426 0
  Y_pred <- predict(fit, xval)
427 0
  if(!metric %in% c("RMSE", "AUC")) stop("Invalid metric for cross-validation")
428 0
  if(metric == "RMSE"){
429 0
    CVE <- sqrt(mean((Y_pred - yval)^2))
430
  } else{
431 0
    CVE <- as.numeric(pROC::roc(yval ~ Y_pred[,1])$auc)
432
  }
433 0
  return(CVE)
434
}
435

436
#' Plots for sNPLS model fits
437
#'
438
#' @description Different plots for sNPLS model fits
439
#' @param x A sNPLS model fit
440
#' @param comps Vector with the components to plot. It can be of length \code{ncomp} for types "time" and "variables" and of length 2 otherwise.
441
#' @param type The type of plot. One of those: "T", "U", "Wj", "Wk", "time" or "variables"
442
#' @param labels Should rownames be added as labels to the plot?
443
#' @param group Vector with categorical variable defining groups (optional)
444
#' @param ... Not used
445
#' @return A plot of the type specified in the \code{type} parameter
446
#' @export
447
plot.sNPLS <- function(x, type = "T", comps = c(1, 2), labels=TRUE, group=NULL, ...) {
448 0
  if (type == "T")
449 0
    p<-plot_T(x, comps = comps, labels=labels, group=group)
450 0
  if (type == "U")
451 0
    p<-plot_U(x, comps = comps, labels=labels, group=group)
452 0
  if (type == "Wj")
453 0
    p<-plot_Wj(x, comps = comps, labels=labels)
454 0
  if (type == "Wk")
455 0
    p<-plot_Wk(x, comps = comps, labels=labels)
456 0
  if (type == "time")
457 0
    p<-plot_time(x, comps = comps)
458 0
  if (type == "variables")
459 0
    p<-plot_variables(x, comps = comps)
460 0
  p
461
}
462

463
#' Internal function for \code{plot.sNPLS}
464
#'
465
#' @param x A sNPLS model fit
466
#' @param comps A vector of length two with the components to plot
467
#' @param labels Should rownames be added as labels to the plot?
468
#' @param group Vector with categorical variable defining groups
469
#' @return A plot of the T matrix of a sNPLS model fit
470
plot_T <- function(x, comps, labels, group=NULL){
471 0
  df <- data.frame(x$T)[comps]
472 0
  if(!is.null(group)) df <- data.frame(df, group=as.factor(group))
473 0
  names(df)[1:2] <- paste("Comp.", comps, sep="")
474 0
  p1 <- ggplot2::ggplot(df, ggplot2::aes_string(x=names(df)[1], y=names(df)[2])) +
475 0
    ggplot2::geom_point() + ggplot2::theme_bw() + ggplot2::geom_vline(xintercept = 0,
476 0
                                                                      lty=2) +
477 0
    ggplot2::geom_hline(yintercept = 0, lty=2) + ggplot2::xlab(names(df)[1]) +
478 0
    ggplot2::ylab(names(df)[2]) + ggplot2::theme(axis.text=ggplot2::element_text(size=12),
479 0
                                                 axis.title=ggplot2::element_text(size=12,face="bold"))
480 0
  if(labels) p1 <- p1 + ggrepel::geom_text_repel(ggplot2::aes(label=rownames(df)))
481 0
  if(!is.null(group)) p1 <- p1 + ggplot2::geom_point(ggplot2::aes(color=group))
482 0
  p1
483
}
484

485
#' Internal function for \code{plot.sNPLS}
486
#'
487
#' @param x A sNPLS model fit
488
#' @param comps A vector of length two with the components to plot
489
#' @param labels Should rownames be added as labels to the plot?
490
#' @param group Vector with categorical variable defining groups
491
#' @return A plot of the U matrix of a sNPLS model fit
492
plot_U <- function(x, comps, labels, group=NULL){
493 0
  df <- data.frame(x$U)[comps]
494 0
  if(!is.null(group)) df <- data.frame(df, group=as.factor(group))
495 0
  names(df)[1:2] <- paste("Comp.", comps, sep="")
496 0
  p1 <- ggplot2::ggplot(df, ggplot2::aes_string(x=names(df)[1], y=names(df)[2])) +
497 0
    ggplot2::geom_point() + ggplot2::theme_bw() + ggplot2::geom_vline(xintercept = 0,
498 0
                                                                      lty=2) +
499 0
    ggplot2::geom_hline(yintercept = 0, lty=2) + ggplot2::xlab(names(df)[1]) +
500 0
    ggplot2::ylab(names(df)[2]) + ggplot2::theme(axis.text=ggplot2::element_text(size=12),
501 0
                                                 axis.title=ggplot2::element_text(size=12,face="bold"))
502 0
  if(labels) p1 <- p1 + ggrepel::geom_text_repel(ggplot2::aes(label=rownames(df)))
503 0
  if(!is.null(group)) p1 <- p1 + ggplot2::geom_point(ggplot2::aes(color=group))
504 0
  p1
505
}
506

507
#' Internal function for \code{plot.sNPLS}
508
#'
509
#' @param x A sNPLS model fit
510
#' @param comps A vector of length two with the components to plot
511
#' @param labels Should rownames be added as labels to the plot?
512
#' @return A plot of Wj coefficients
513
plot_Wj <- function(x, comps, labels){
514 0
  df <- data.frame(x$Wj)[comps]
515 0
  names(df) <- paste("Comp.", comps, sep="")
516 0
  var_names_zero <- rownames(df)
517 0
  var_names_zero[rowSums(df)==0]<- ""
518 0
  p1 <- ggplot2::ggplot(df, ggplot2::aes_string(x=names(df)[1], y=names(df)[2])) +
519 0
    ggplot2::geom_point() + ggplot2::theme_bw() + ggplot2::geom_vline(xintercept = 0,
520 0
                                                                      lty=2) +
521 0
    ggplot2::geom_hline(yintercept = 0, lty=2) + ggplot2::xlab(names(df)[1]) +
522 0
    ggplot2::ylab(names(df)[2]) + ggplot2::theme(axis.text=ggplot2::element_text(size=12),
523 0
                                                 axis.title=ggplot2::element_text(size=12,face="bold"))
524 0
  if(labels) p1 <- p1 + ggrepel::geom_text_repel(ggplot2::aes(label=var_names_zero), size=3)
525 0
  p1
526
}
527

528
#' Internal function for \code{plot.sNPLS}
529
#'
530
#' @param x A sNPLS model fit
531
#' @param comps A vector of length two with the components to plot
532
#' @param labels Should rownames be added as labels to the plot?
533
#' @return A plot of the Wk coefficients
534
plot_Wk <- function(x, comps, labels){
535 0
  df <- data.frame(x$Wk)[comps]
536 0
  names(df) <- paste("Comp.", comps, sep="")
537 0
  var_names_zero <- rownames(df)
538 0
  var_names_zero[rowSums(df)==0]<- ""
539 0
  p1 <- ggplot2::ggplot(df, ggplot2::aes_string(x=names(df)[1], y=names(df)[2])) +
540 0
    ggplot2::geom_point() + ggplot2::theme_bw() + ggplot2::geom_vline(xintercept = 0,
541 0
                                                                      lty=2) +
542 0
    ggplot2::geom_hline(yintercept = 0, lty=2) + ggplot2::xlab(names(df)[1]) +
543 0
    ggplot2::ylab(names(df)[2]) + ggplot2::theme(axis.text=ggplot2::element_text(size=12),
544 0
                                                 axis.title=ggplot2::element_text(size=12,face="bold"))
545 0
  if(labels) p1 <- p1 + ggrepel::geom_text_repel(ggplot2::aes(label=var_names_zero), size=4)
546 0
  p1
547
}
548

549
#' Internal function for \code{plot.sNPLS}
550
#'
551
#' @param x A sNPLS model fit
552
#' @param comps A vector with the components to plot
553
#' @return A plot of Wk coefficients for each component
554
plot_time <- function(x, comps){
555 0
  df <- clickR::forge(data.frame(row=1:nrow(x$Wk), x$Wk), affixes=as.character(comps),
556 0
                      var.name="Component")
557 0
  ggplot2::ggplot(df, ggplot2::aes_string(x="row", y="Comp..", color="Component")) +
558 0
    ggplot2::geom_line(lwd=1.05) + ggplot2::theme_bw() +
559 0
    ggplot2::geom_hline(yintercept = 0, alpha=0.2) +
560 0
    ggplot2::xlab("Time (index)") + ggplot2::ylab("Wk") +
561 0
    ggplot2::theme(axis.text = ggplot2::element_text(size=12),
562 0
                   axis.title = ggplot2::element_text(size=12,face="bold"))
563
}
564

565
#' Internal function for \code{plot.sNPLS}
566
#'
567
#' @param x A sNPLS model fit
568
#' @param comps A vector with the components to plot
569
#' @return A plot of Wj coefficients for each component
570
plot_variables <- function(x, comps){
571 0
  df <- clickR::forge(data.frame(row=1:nrow(x$Wj), x$Wj), affixes=as.character(comps),
572 0
                      var.name="Component")
573 0
  ggplot2::ggplot(df, ggplot2::aes_string(x="row", y="Comp..", color="Component")) +
574 0
    ggplot2::geom_line(lwd=1.05) + ggplot2::theme_bw() +
575 0
    ggplot2::geom_hline(yintercept = 0, alpha=0.2) +
576 0
    ggplot2::xlab("Variable (index)") + ggplot2::ylab("Wj") +
577 0
    ggplot2::theme(axis.text = ggplot2::element_text(size=12),
578 0
                   axis.title=ggplot2::element_text(size=12,face="bold"))
579
}
580

581
#' Predict for sNPLS models
582
#'
583
#' @description Predict function for sNPLS models
584
#' @param object A sNPLS model fit
585
#' @param newX A three-way array containing the new data
586
#' @param rescale Should the prediction be rescaled to the original scale?
587
#' @param ... Further arguments passed to \code{predict}
588
#' @return A matrix with the predictions
589
#' @export
590
predict.sNPLS <- function(object, newX, rescale = TRUE, ...) {
591 0
  newX <- unfold3w(newX)
592
  # Centrado y escalado
593
  #Xstd <- t((t(newX) - object$Standarization$CenterX)/object$Standarization$ScaleX)
594
  #Xstd <- sweep(sweep(newX, 2, object$Standarization$CenterX), 2, object$Standarization$ScaleX, "/")
595 0
  Xstd <- scale(newX, center=object$Standarization$CenterX, scale=object$Standarization$ScaleX)
596 0
  R <- Rmatrix(object)
597 0
  Bnpls <- R %*% object$B %*% t(object$Q)
598 0
  Yval <- Xstd %*% Bnpls
599 0
  if (rescale) {
600 0
    Yval <- Yval * object$Standarization$ScaleY + object$Standarization$CenterY
601
  }
602 0
  return(Yval)
603
}
604

605
#' Fitted method for sNPLS models
606
#'
607
#' @description Fitted method for sNPLS models
608
#' @param object A sNPLS model fit
609
#' @param ... Further arguments passed to \code{fitted}
610
#' @return Fitted values for the sNPLS model
611
#' @export
612
fitted.sNPLS <- function(object, ...){
613 0
  return(object$Yadj)
614
}
615

616
#' Plot cross validation results for sNPLS objects
617
#'
618
#' @description Plot function for visualization of cross validation results for sNPLS models
619
#' @param x A cv_sNPLS object
620
#' @param ... Not used
621
#' @return A facet plot with the results of the cross validation
622
#' @export
623
plot.cvsNPLS <- function(x, ...) {
624 0
  cont_thresholding <- names(x$cv_grid)[2] == "threshold_j"
625 0
  df_grid <- data.frame(threshold_j=x$cv_grid[,2], threshold_k=x$cv_grid[,3], CVE=x$cv_mean, Ncomp=paste("Ncomp =", x$cv_grid$ncomp, sep=" "))
626 0
  if(!cont_thresholding) names(df_grid)[c(1, 2)] <- c("KeepJ", "KeepK")
627 0
  if(cont_thresholding){
628 0
    ggplot2::ggplot(df_grid, ggplot2::aes_string(x="threshold_j", y="CVE"))+ggplot2::geom_point()+ggplot2::geom_smooth()+ggplot2::facet_grid(cut(threshold_k,10) ~ Ncomp)+
629 0
      ggplot2::theme_bw()
630
  } else {
631 0
      ggplot2::ggplot(df_grid, ggplot2::aes_string(x="KeepJ", y="CVE"))+ggplot2::geom_point()+ggplot2::geom_line()+ggplot2::facet_grid(KeepK ~ Ncomp)+
632 0
        ggplot2::theme_bw()
633
  }
634
}
635

636
#' Coefficients from a sNPLS model
637
#'
638
#' @description Extract coefficients from a sNPLS model
639
#' @param object A sNPLS model fit
640
#' @param as.matrix Should the coefficients be presented as matrix or vector?
641
#' @param ... Further arguments passed to \code{coef}
642
#' @return A matrix (or vector) of coefficients
643
#' @export
644
coef.sNPLS <- function(object, as.matrix = FALSE, ...) {
645 0
  R <- Rmatrix(object)
646 0
  Bnpls <- R %*% object$B %*% t(object$Q)
647 0
  colnames(Bnpls) <- paste("Estimate", colnames(Bnpls))
648 0
  if (as.matrix){
649 0
    dim(Bnpls) <- c(dim(object$Wj)[1], dim(object$Wk)[1], dim(Bnpls)[2])
650 0
    rownames(Bnpls) <- rownames(object$Wj)
651 0
    colnames(Bnpls) <- rownames(object$Wk)
652
  }
653 0
  return(Bnpls)
654
}
655

656
#' Repeated cross-validation for sNPLS models
657
#'
658
#' @description Performs repeated cross-validatiodn and represents results in a plot
659
#' @param X_npls A three-way array containing the predictors.
660
#' @param Y_npls A matrix containing the response.
661
#' @param ncomp A vector with the different number of components to test
662
#' @param samples Number of samples for performing random search in continuous thresholding
663
#' @param keepJ A vector with the different number of selected variables to test in discrete thresholding
664
#' @param keepK A vector with the different number of selected 'times' to test in discrete thresholding
665
#' @param nfold Number of folds for the cross-validation
666
#' @param times Number of repetitions of the cross-validation
667
#' @param parallel Should the computations be performed in parallel? Set up strategy first with \code{future::plan()}
668
#' @param method Select between sNPLS, sNPLS-SR or sNPLS-VIP
669
#' @param metric Select between RMSE or AUC (for 0/1 response)
670
#' @param ... Further arguments passed to cv_snpls
671
#' @return A density plot with the results of the cross-validation and an (invisible) \code{data.frame} with these results
672
#' @importFrom stats var
673
#' @export
674
repeat_cv <- function(X_npls, Y_npls, ncomp = 1:3, samples=20, keepJ=NULL, keepK=NULL, nfold = 10, times=30, parallel = TRUE, method="sNPLS", metric="RMSE", ...){
675 0
  if(!method %in% c("sNPLS", "sNPLS-SR", "sNPLS-VIP")) stop("'method' not recognized")
676 0
  if(parallel) message("Your parallel configuration is ", attr(future::plan(), "class")[3])
677 0
  if(is.null(keepJ) | is.null(keepK)){
678 0
    cont_thresholding <- TRUE
679 0
    message("Using continuous thresholding")
680
  } else {
681 0
    cont_thresholding <- FALSE
682 0
    message("Using discrete thresholding")
683
  }
684 0
  if(parallel){
685 0
    rep_cv<-future.apply::future_sapply(1:times, function(x) suppressMessages(cv_snpls(X_npls, Y_npls, ncomp=ncomp, parallel = FALSE, nfold = nfold, samples=samples, keepJ=keepJ, keepK=keepK, method=method, metric=metric, ...)), future.seed=TRUE)
686
  } else {
687 0
    rep_cv<-pbapply::pbreplicate(times, suppressMessages(cv_snpls(X_npls, Y_npls, ncomp=ncomp, parallel = FALSE, nfold = nfold, samples=samples, keepJ=keepJ, keepK=keepK, method=method, metric=metric, ...)))
688
  }
689 0
  resdata<-data.frame(ncomp=sapply(rep_cv[1,], function(x) x[[1]]), threshold_j=sapply(rep_cv[1,], function(x) x[[2]]),
690 0
                      threshold_k=sapply(rep_cv[1,], function(x) x[[3]]))
691 0
  if(!cont_thresholding) names(resdata)[c(2, 3)] <- c("keepJ", "keepK")
692 0
  class(resdata)<-c("repeatcv", "data.frame")
693 0
  return(resdata)
694
}
695

696
#' Density plot for repeat_cv results
697
#'
698
#' @description Plots a grid of slices from the 3-D kernel denity estimates of the repeat_cv function
699
#' @param x A repeatcv object
700
#' @param ... Further arguments passed to plot
701
#' @return A grid of slices from a 3-D density plot of the results of the repeated cross-validation
702
#' @importFrom grDevices colorRampPalette
703
#' @importFrom stats ftable density setNames
704
#' @export
705
plot.repeatcv <- function(x, ...){
706 0
  x.old <- x
707 0
  x<-x[,sapply(x, function(x) var(x)>0), drop=FALSE]
708 0
  if(ncol(x) < ncol(x.old)) warning(paste("\n", colnames(x.old)[!colnames(x.old) %in% colnames(x)], "is constant at", x.old[1,colnames(x.old)[!colnames(x.old) %in% colnames(x)]]))
709 0
  if(ncol(x) == 1){
710 0
    densities <- density(x[,1])
711 0
    df_grid <- setNames(data.frame(densities$x, densities$y), c(colnames(x), "density"))
712 0
    p <- ggplot2::ggplot(x, ggplot2::aes_string(x=colnames(x)))+ggplot2::geom_density(color="gray", fill="gray", alpha=0.3)+ggplot2::theme_classic()
713
  } else{
714 0
    H.pi <- ks::Hpi(x)
715 0
    fhat <- ks::kde(x, H=H.pi, compute.cont=TRUE, gridsize = rep(151, ncol(x)))
716 0
    if(ncol(x) == 3){
717 0
      ncomp_values <- sapply(sort(unique(fhat$x[,1])), function(x) which.min(abs(fhat$eval.points[[1]]-x)))
718 0
      positions <- as.data.frame(fhat$x)
719 0
      positions$Ncomp <- factor(positions$ncomp)
720 0
      df_grid <- setNames(expand.grid(fhat$eval.points[[2]], fhat$eval.points[[3]]), names(positions)[c(2,3)])
721 0
      combl <- nrow(df_grid)
722 0
      df_grid <- df_grid[rep(1:nrow(df_grid), length(ncomp_values)),]
723 0
      df_grid$density <- unlist(lapply(ncomp_values, function(x) as.numeric(matrix(fhat$estimate[x,,], ncol=1))))
724 0
      df_grid$Ncomp <- factor(rep(sort(unique(positions$ncomp)), each=combl))
725 0
      p <- ggplot2::ggplot(df_grid, ggplot2::aes_string(names(df_grid)[1], names(df_grid)[2], fill="density"))+ggplot2::geom_raster()+
726 0
        ggplot2::scale_fill_gradientn(colours =colorRampPalette(c("white", "blue", "red"))(10))+ggplot2::theme_classic()+
727 0
        ggplot2::geom_count(inherit.aes = FALSE, ggplot2::aes_string(x=names(df_grid)[1], y=names(df_grid)[2]), data=positions) +ggplot2::facet_grid(~Ncomp)
728 0
      if(names(df_grid)[1] == "threshold_j"){
729 0
        p <- p + ggplot2::scale_x_continuous(limits=c(0, 1)) + ggplot2::scale_y_continuous(limits=c(0, 1))
730
      } else {
731 0
        p <- p + ggplot2::scale_x_continuous(breaks=if(round(diff(range(df_grid$keepJ)))<=10) round(max(0, min(df_grid$keepJ)):max(df_grid$keepJ)) else round(seq(max(0, min(df_grid$keepJ)), max(df_grid$keepJ), by= ceiling(max(df_grid$keepJ)/20)*2)))+
732 0
          ggplot2::scale_y_continuous(breaks=if(round(diff(range(df_grid$keepK)))<=10) round(max(0, min(df_grid$keepK)):max(df_grid$keepK)) else round(seq(max(0, min(df_grid$keepK)), max(df_grid$keepK), by= ceiling(max(df_grid$keepK)/20)*2)))+
733 0
          ggplot2::scale_size_area(breaks=if(length(sort(unique(as.numeric(ftable(positions))))[-1])<=7) sort(unique(as.numeric(ftable(positions))))[-1] else (1:max(as.numeric(ftable(positions))))[round(seq(1, max(as.numeric(ftable(positions))), length.out = 7))],
734 0
                                   labels=if(length(sort(unique(as.numeric(ftable(positions))))[-1])<=7) sort(unique(as.numeric(ftable(positions))))[-1] else (1:max(as.numeric(ftable(positions))))[round(seq(1, max(as.numeric(ftable(positions))), length.out = 7))])
735
      }
736
    } else {
737 0
      positions <- as.data.frame(fhat$x)
738 0
      df_grid <- expand.grid(V1=fhat$eval.points[[1]], V2=fhat$eval.points[[2]])
739 0
      names(df_grid)<-colnames(positions)
740 0
      df_grid$density <- as.numeric(matrix(fhat$estimate, ncol=1))
741 0
      p <- ggplot2::ggplot(df_grid, ggplot2::aes_string(colnames(df_grid)[1], colnames(df_grid)[2], fill="density"))+ggplot2::geom_raster()+
742 0
        ggplot2::scale_fill_gradientn(colours =colorRampPalette(c("white", "blue", "red"))(10))+ggplot2::theme_classic()+
743 0
        ggplot2::geom_count(inherit.aes = FALSE, ggplot2::aes_string(x=colnames(df_grid)[1], y=colnames(df_grid)[2]), data=positions)
744 0
      if("threshold_j" %in% names(df_grid) | "threshold_k" %in% names(df_grid)){
745 0
        p <- p + ggplot2::scale_x_continuous(limits=c(0, 1)) + ggplot2::scale_y_continuous(limits=c(0, 1))
746
      } else {
747 0
        p <- p + ggplot2::scale_x_continuous(breaks=if(round(diff(range(df_grid[,1])))<=10) round(max(0, min(df_grid[,1])):max(df_grid[,1])) else round(seq(max(0, min(df_grid[,1])), max(df_grid[,1]), by= ceiling(max(df_grid[,1])/20)*2)))+
748 0
          ggplot2::scale_y_continuous(breaks=if(round(diff(range(df_grid[,2])))<=10) round(max(0, min(df_grid[,2])):max(df_grid[,2])) else round(seq(max(0, min(df_grid[,2])), max(df_grid[,2]), by= ceiling(max(df_grid[,2])/20)*2)))+
749 0
          ggplot2::scale_size_continuous(breaks=if(length(sort(unique(as.numeric(ftable(positions))))[-1])<=7) sort(unique(as.numeric(ftable(positions))))[-1] else (1:max(as.numeric(ftable(positions))))[round(seq(1, max(as.numeric(ftable(positions))), length.out = 7))],
750 0
                                         labels=if(length(sort(unique(as.numeric(ftable(positions))))[-1])<=7) sort(unique(as.numeric(ftable(positions))))[-1] else (1:max(as.numeric(ftable(positions))))[round(seq(1, max(as.numeric(ftable(positions))), length.out = 7))])
751
      }
752
    }
753
  }
754 0
  print(p)
755 0
  data.frame(x.old[1, !colnames(x.old) %in% colnames(x), drop=FALSE], df_grid[which.max(df_grid$density),])
756
}
757

758
#' Summary for sNPLS models
759
#'
760
#' @description Summary of a sNPLS model fit
761
#' @param object A sNPLS object
762
#' @param ... Further arguments passed to summary.default
763
#' @return A summary inclunding number of components, squared error and coefficients of the fitted model
764
#' @importFrom stats coef
765
#' @export
766
summary.sNPLS <- function(object, ...){
767 0
  cat(object$Method, "model with", object$ncomp, "components and squared error of", round(object$SqrdE,3), "\n", "\n")
768 0
  cat("Coefficients: \n")
769 0
  round(coef(object, as.matrix=TRUE), 3)
770
}
771

772
#' AUC for sNPLS-DA model
773
#'
774
#' @description AUC for a sNPLS-DA model
775
#' @param object A sNPLS object
776
#' @return The area under the ROC curve for the model
777
#' @export
778
auroc <- function(object){
779 0
  as.numeric(pROC::roc(object$Yorig[,1] ~ object$Yadj[,1])$auc)
780
}
781

782
#' Compute Selectivity Ratio for a sNPLS model
783
#'
784
#' @description Estimates Selectivity Ratio for the different components of a sNPLS model fit
785
#' @param model A sNPLS model
786
#' @return A list of data.frames, each of them including the computed Selectivity Ratios for each variable
787
#' @export
788
SR <- function(model){
789 0
  output <- lapply(1:model$ncomp, function(f){
790 0
    Xres <- model$Xd - model$T[,1:f, drop=FALSE] %*% t(model$P[[f]])
791 0
    Xpred <- model$T[,1:f, drop=FALSE] %*% t(model$P[[f]])
792 0
    SSexp <- Xpred^2
793 0
    SSres <- (model$Xd-Xpred)^2
794 0
    SSexp_cube <- array(SSexp, dim=c(nrow(SSexp), nrow(model$Wj), nrow(model$Wk)))
795 0
    SSres_cube <- array(SSres, dim=c(nrow(SSexp), nrow(model$Wj), nrow(model$Wk)))
796 0
    SR_k <- sapply(1:nrow(model$Wk), function(x) sum(SSexp_cube[,,x])/sum(SSres_cube[,,x]))
797 0
    SR_j <- sapply(1:nrow(model$Wj), function(x) sum(SSexp_cube[,x,])/sum(SSres_cube[,x,]))
798 0
    list(SR_k=round(SR_k,3),
799 0
         SR_j=round(SR_j,3))
800
  })
801 0
  SR_j <- do.call("cbind", sapply(output, function(x) x[2]))
802 0
  SR_k <- do.call("cbind", sapply(output, function(x) x[1]))
803 0
  rownames(SR_j) <- rownames(model$Wj)
804 0
  rownames(SR_k) <- rownames(model$Wk)
805 0
  colnames(SR_j) <- colnames(SR_k) <- paste("Component ", 1:model$ncomp, sep="")
806 0
  list(SR_j=SR_j, SR_k=SR_k)
807
}

Read our documentation on viewing source code .

Loading