David-Hervas / sNPLS
Showing 1 of 7 files from the diff.
Other files ignored by Codecov
DESCRIPTION has changed.
man/cv_snpls.Rd has changed.
NAMESPACE has changed.
man/repeat_cv.Rd has changed.
man/cv_fit.Rd has changed.

@@ -275,7 +275,7 @@
Loading
275 275
  colnames(Tm) <- colnames(WsupraJ) <- colnames(WsupraK) <- colnames(B) <-
276 276
    colnames(U) <- colnames(Q) <- names(Gu) <- names(P) <- paste("Comp.", 1:ncomp)
277 277
  output <- list(T = Tm, Wj = WsupraJ, Wk = WsupraK, B = B, U = U, Q = Q, P = P,
278 -
                 Gu = Gu, ncomp = ncomp, Xd=Xd, Yadj = Yadj, SqrdE = SqrdE,
278 +
                 Gu = Gu, ncomp = ncomp, Xd=Xd, Yorig = Yorig, Yadj = Yadj, SqrdE = SqrdE,
279 279
                 Standarization = list(ScaleX = x_scale, CenterX = x_center,
280 280
                                       ScaleY = y_scale, CenterY = y_center),
281 281
                 Method = method)
@@ -332,6 +332,7 @@
Loading
332 332
#' @param nfold Number of folds for the cross-validation
333 333
#' @param parallel Should the computations be performed in parallel? Set up strategy first with \code{future::plan()}
334 334
#' @param method Select between sNPLS, sNPLS-SR or sNPLS-VIP
335 +
#' @param metric Select between RMSE or AUC (for 0/1 response)
335 336
#' @param ... Further arguments passed to sNPLS
336 337
#' @return A list with the best parameters for the model and the CV error
337 338
#' @examples
@@ -348,7 +349,7 @@
Loading
348 349
#' @importFrom stats runif
349 350
#' @export
350 351
cv_snpls <- function(X_npls, Y_npls, ncomp = 1:3, samples=20,
351 -
                     keepJ = NULL, keepK = NULL, nfold = 10, parallel = TRUE,  method="sNPLS", ...) {
352 +
                     keepJ = NULL, keepK = NULL, nfold = 10, parallel = TRUE,  method="sNPLS", metric="RMSE", ...) {
352 353
353 354
  if(parallel) message("Your parallel configuration is ", attr(future::plan(), "class")[3])
354 355
  if(!method %in% c("sNPLS", "sNPLS-SR", "sNPLS-VIP")) stop("'method' not recognized")
@@ -376,7 +377,7 @@
Loading
376 377
                                      ytrain = Y_npls[x != foldid, , drop = FALSE],
377 378
                                      xval = X_npls[x == foldid, , ],
378 379
                                      yval = Y_npls[x == foldid, , drop = FALSE],
379 -
                                      ncomp = y["ncomp"], method=method),
380 +
                                      ncomp = y["ncomp"], method=method, metric=metric),
380 381
                                 list(threshold_j=y["threshold_j"])[cont_thresholding],
381 382
                                 list(threshold_k=y["threshold_k"])[cont_thresholding],
382 383
                                 list(keepJ=rep(y["keepJ"], y["ncomp"]))[!cont_thresholding],
@@ -391,7 +392,11 @@
Loading
391 392
  }
392 393
  cv_mean <- apply(cv_res, 2, function(x) mean(x, na.rm = TRUE))
393 394
  cv_se <- apply(cv_res, 2, function(x) sd(x, na.rm=TRUE)/sqrt(nfold))
394 -
  best_model <- search.grid[which.min(cv_mean), ]
395 +
  if(metric == "RMSE"){
396 +
    best_model <- search.grid[which.min(cv_mean), ]
397 +
  } else{
398 +
    best_model <- search.grid[which.max(cv_mean), ]
399 +
  }
395 400
  output <- list(best_parameters = best_model, cv_mean = cv_mean,
396 401
                 cv_se = cv_se, cv_grid = search.grid)
397 402
  class(output)<-"cvsNPLS"
@@ -410,15 +415,21 @@
Loading
410 415
#' @param keepJ Number of variables to keep for each component, ignored if threshold_j is provided
411 416
#' @param keepK Number of 'times' to keep for each component, ignored if threshold_k is provided
412 417
#' @param method Select between sNPLS, sNPLS-SR or sNPLS-VIP
418 +
#' @param metric Performance metric (RMSE or AUC)
413 419
#' @param ... Further arguments passed to sNPLS
414 -
#' @return Returns the CV mean squared error
420 +
#' @return Returns the CV root mean squared error or AUC
415 421
#' @importFrom stats predict
416 422
#' @export
417 -
cv_fit <- function(xtrain, ytrain, xval, yval, ncomp, threshold_j=NULL, threshold_k=NULL, keepJ=NULL, keepK=NULL,  method, ...) {
423 +
cv_fit <- function(xtrain, ytrain, xval, yval, ncomp, threshold_j=NULL, threshold_k=NULL, keepJ=NULL, keepK=NULL,  method, metric, ...) {
418 424
  fit <- sNPLS(XN = xtrain, Y = ytrain, ncomp = ncomp, keepJ=keepJ, keepK=keepK, threshold_j = threshold_j,
419 425
                 threshold_k = threshold_k, silent = TRUE, method=method, ...)
420 426
  Y_pred <- predict(fit, xval)
421 -
  CVE <- sqrt(mean((Y_pred - yval)^2))
427 +
  if(!metric %in% c("RMSE", "AUC")) stop("Invalid metric for cross-validation")
428 +
  if(metric == "RMSE"){
429 +
    CVE <- sqrt(mean((Y_pred - yval)^2))
430 +
  } else{
431 +
    CVE <- as.numeric(pROC::roc(yval ~ Y_pred[,1])$auc)
432 +
  }
422 433
  return(CVE)
423 434
}
424 435
@@ -655,11 +666,12 @@
Loading
655 666
#' @param times Number of repetitions of the cross-validation
656 667
#' @param parallel Should the computations be performed in parallel? Set up strategy first with \code{future::plan()}
657 668
#' @param method Select between sNPLS, sNPLS-SR or sNPLS-VIP
669 +
#' @param metric Select between RMSE or AUC (for 0/1 response)
658 670
#' @param ... Further arguments passed to cv_snpls
659 671
#' @return A density plot with the results of the cross-validation and an (invisible) \code{data.frame} with these results
660 672
#' @importFrom stats var
661 673
#' @export
662 -
repeat_cv <- function(X_npls, Y_npls, ncomp = 1:3, samples=20, keepJ=NULL, keepK=NULL, nfold = 10, times=30, parallel = TRUE, method="sNPLS", ...){
674 +
repeat_cv <- function(X_npls, Y_npls, ncomp = 1:3, samples=20, keepJ=NULL, keepK=NULL, nfold = 10, times=30, parallel = TRUE, method="sNPLS", metric="RMSE", ...){
663 675
  if(!method %in% c("sNPLS", "sNPLS-SR", "sNPLS-VIP")) stop("'method' not recognized")
664 676
  if(parallel) message("Your parallel configuration is ", attr(future::plan(), "class")[3])
665 677
  if(is.null(keepJ) | is.null(keepK)){
@@ -670,9 +682,9 @@
Loading
670 682
    message("Using discrete thresholding")
671 683
  }
672 684
  if(parallel){
673 -
    rep_cv<-future.apply::future_sapply(1:times, function(x) suppressMessages(cv_snpls(X_npls, Y_npls, ncomp=ncomp, parallel = FALSE, nfold = nfold, samples=samples, keepJ=keepJ, keepK=keepK, method=method, ...)), future.seed=TRUE)
685 +
    rep_cv<-future.apply::future_sapply(1:times, function(x) suppressMessages(cv_snpls(X_npls, Y_npls, ncomp=ncomp, parallel = FALSE, nfold = nfold, samples=samples, keepJ=keepJ, keepK=keepK, method=method, metric=metric, ...)), future.seed=TRUE)
674 686
  } else {
675 -
    rep_cv<-pbapply::pbreplicate(times, suppressMessages(cv_snpls(X_npls, Y_npls, ncomp=ncomp, parallel = FALSE, nfold = nfold, samples=samples, keepJ=keepJ, keepK=keepK, method=method, ...)))
687 +
    rep_cv<-pbapply::pbreplicate(times, suppressMessages(cv_snpls(X_npls, Y_npls, ncomp=ncomp, parallel = FALSE, nfold = nfold, samples=samples, keepJ=keepJ, keepK=keepK, method=method, metric=metric, ...)))
676 688
  }
677 689
  resdata<-data.frame(ncomp=sapply(rep_cv[1,], function(x) x[[1]]), threshold_j=sapply(rep_cv[1,], function(x) x[[2]]),
678 690
                      threshold_k=sapply(rep_cv[1,], function(x) x[[3]]))
@@ -757,6 +769,16 @@
Loading
757 769
  round(coef(object, as.matrix=TRUE), 3)
758 770
}
759 771
772 +
#' AUC for sNPLS-DA model
773 +
#'
774 +
#' @description AUC for a sNPLS-DA model
775 +
#' @param object A sNPLS object
776 +
#' @return The area under the ROC curve for the model
777 +
#' @export
778 +
auroc <- function(object){
779 +
  as.numeric(pROC::roc(object$Yorig[,1] ~ object$Yadj[,1])$auc)
780 +
}
781 +
760 782
#' Compute Selectivity Ratio for a sNPLS model
761 783
#'
762 784
#' @description Estimates Selectivity Ratio for the different components of a sNPLS model fit
Files Coverage
R/sNPLS_fit.R 0.00%
Project Totals (1 files) 0.00%
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading