1
######################################
2
# R Source code file for least squares strong hierarchy
3
# this is where most of the work is being done
4
# not exported
5
# Author: Sahir Bhatnagar
6
# Created: 2016
7
# Updated: April 6, 2018
8
#####################################
9

10

11
lspath <- function(x,
12
                   y,
13
                   e,
14
                   basis,
15
                   center.x,
16
                   center.e,
17
                   expand,
18
                   group,
19
                   group.penalty,
20
                   weights, # observation weights currently not being used
21
                   nlambda,
22
                   thresh,
23
                   fdev,
24
                   maxit,
25
                   verbose,
26
                   alpha,
27
                   nobs,
28
                   nvars,
29
                   vp, # penalty.factor
30
                   we, # we,wj,wje are subsets of vp
31
                   wj,
32
                   wje,
33
                   flmin,
34
                   vnames,
35
                   ne, # dfmax
36
                   ulam) {
37

38
  # Basis Expansion and Design Matrix ---------------------------------------
39

40 6
  expansion <- design_sail(
41 6
    x = x, e = e, expand = expand, group = group, basis = basis, nvars = nvars,
42 6
    vnames = vnames, center.x = center.x, center.e = center.e
43
  )
44

45
  # y <- drop(scale(y, center = TRUE, scale = FALSE))
46 6
  Phi_j_list <- expansion$Phi_j_list
47 6
  Phi_j <- expansion$Phi_j
48 6
  XE_Phi_j_list <- expansion$XE_Phi_j_list
49 6
  XE_Phi_j <- expansion$XE_Phi_j
50 6
  main_effect_names <- expansion$main_effect_names
51 6
  interaction_names <- expansion$interaction_names
52 6
  ncols <- expansion$ncols
53
  # group_list <- split(group, group)
54

55
  # group membership
56 6
  if (expand) {
57 6
    group <- rep(seq_len(nvars), each = ncols)
58
  }
59

60
  # this is used for the predict function
61 6
  design <- expansion$design
62

63 6
  nulldev <- as.numeric(crossprod(y - mean(y)))
64

65
  # Initialize -------------------------------------------------------------
66
  # the initial values here dont matter, since at Lambda_max everything is 0
67 6
  b0 <- mean(y)
68 6
  betaE <- 0
69 6
  theta <- split(stats::setNames(rep(0, length(main_effect_names)), main_effect_names), group)
70 6
  gamma <- rep(0, nvars)
71 6
  theta_next <- theta
72 6
  R.star <- y - b0
73

74
  # update this at the end once betaE and theta are updated. x_tilde is used for gamma update
75 6
  x_tilde <- matrix(0, nrow = nobs, ncol = nvars)
76 6
  add_back <- rep(0, nobs)
77

78 6
  Theta_init <- c(b0, betaE, do.call(c, theta), gamma)
79

80
  # Lambda Sequence ---------------------------------------------------------
81
  # browser()
82 6
  if (is.null(ulam)) {
83
    # R1 <- R2 <- y - b0 # this is used as the starting residual for Gamma and Theta update
84 6
    term1 <- (1 / we) * (crossprod(e, R.star))
85 6
    term2 <- (1 / wj) * sapply(Phi_j_list, function(i) l2norm(crossprod(i, R.star)))
86 6
    lambda_max <- (1 / (nobs * (1 - alpha))) * max(term1[term1 != Inf], max(term2[term2 != Inf]))
87 6
    lambdas <- rev(exp(seq(log(flmin * lambda_max), log(lambda_max), length.out = nlambda)))
88 6
    lambdaNames <- paste0("s", seq_along(lambdas))
89
  } else {
90 0
    lambdas <- ulam
91 0
    lambdaNames <- paste0("s", seq_along(lambdas))
92
    # not sure what to do yet, need to think about cv.sail and supplying the same lambda.sequence
93
    # or when using adaptive lasso?
94
  }
95

96

97
  # for all the x_tilde in zero_x_tilde, return the following matrix with 0 for each coefficient
98
  # this is like a place holder.
99

100 6
  coef_zero_gamma_matrix <- matrix(
101 6
    data = 0, nrow = nvars, ncol = 1,
102 6
    dimnames = ifelse(expand, list(vnames), list(paste0("V", seq(nvars))))
103
  )
104

105

106
  # Objects to store results ------------------------------------------------
107

108 6
  a0 <- stats::setNames(rep(0, nlambda), lambdaNames)
109

110 6
  environ <- stats::setNames(rep(0, nlambda), lambdaNames)
111

112 6
  betaMat <- matrix(
113 6
    nrow = length(main_effect_names), ncol = nlambda,
114 6
    dimnames = list(
115 6
      main_effect_names,
116 6
      lambdaNames
117
    )
118
  )
119

120 6
  if (expand) {
121 6
    gammaMat <- matrix(
122 6
      nrow = nvars, ncol = nlambda,
123 6
      dimnames = list(
124 6
        c(paste0(vnames, "E")),
125 6
        lambdaNames
126
      )
127
    )
128
  } else {
129 6
    gammaMat <- matrix(
130 6
      nrow = nvars, ncol = nlambda,
131 6
      dimnames = list(
132 6
        c(paste0("V", seq(nvars))),
133 6
        lambdaNames
134
      )
135
    )
136
  }
137

138 6
  alphaMat <- matrix(
139 6
    nrow = length(c(main_effect_names)),
140 6
    ncol = nlambda,
141 6
    dimnames = list(
142 6
      paste(main_effect_names, "E", sep = ":"),
143 6
      lambdaNames
144
    )
145
  )
146

147 6
  converged <- stats::setNames(rep(FALSE, nlambda), lambdaNames)
148

149 6
  outPrint <- matrix(NA,
150 6
    nrow = nlambda, ncol = 5,
151 6
    dimnames = list(
152 6
      lambdaNames,
153 6
      c(
154 6
        "dfBeta", "dfAlpha", "dfEnviron", "deviance",
155 6
        "percentDev"
156
      )
157
    )
158
  )
159

160 6
  active <- vector("list", length = nlambda)
161
  # browser()
162

163
  # Lambda Loop Start -------------------------------------------------------
164

165 6
  lambdas[1] <- .Machine$double.xmax
166 6
  for (LAMBDA in lambdas) {
167 6
    lambdaIndex <- which(LAMBDA == lambdas)
168

169 6
    if (verbose >= 1) {
170 0
      message(sprintf("Index: %g, lambda: %0.4f", lambdaIndex, if (lambdaIndex==1) lambda_max else LAMBDA))
171
    }
172

173
    # store likelihood values at each iteration in a matrix Q
174
    # rows: iteration number
175 6
    Q <- vector("numeric", length = maxit + 1)
176

177
    # store the value of the likelihood at the 0th iteration
178 6
    Q[1] <- (1 / (2 * nobs)) * crossprod(R.star)
179

180
    # iteration counter
181 6
    m <- 1
182

183

184
    # un-comment if we dont want warm starts for not converged lambdas
185

186
    # if (lambdaIndex > 1) {
187
    #   if (!converged[lambdaIndex - 1]) {
188
    #     b0 <- mean(y)
189
    #     theta <- split(setNames(rep(0, length(main_effect_names)), main_effect_names), group)
190
    #     betaE <- 0
191
    #     gamma <- rep(0, nvars)
192
    #     # R.star <- y - b0
193
    #     b0_next <- b0 ;
194
    #     theta_next <- theta
195
    #   }
196
    # }
197

198
    # While loop for convergence at a given Lambda value ----------------------
199

200 6
    while (!converged[lambdaIndex] && m < maxit) {
201

202
      # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
203
      # update gamma (interaction parameter)
204
      # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
205

206 6
      R <- R.star + add_back
207

208
      # indices of the x_tilde matrices that have all 0 columns
209 6
      zero_x_tilde <- dim(check_col_0(x_tilde))[2]
210

211 6
      gamma_next <- if (zero_x_tilde == 0) {
212 6
        drop(coef_zero_gamma_matrix)
213
      } else {
214 6
        coef(glmnet::glmnet(
215 6
          x = x_tilde,
216 6
          y = R,
217
          # thresh = 1e-12,
218 6
          penalty.factor = wje,
219 6
          lambda = c(.Machine$double.xmax, LAMBDA * alpha),
220 6
          standardize = F, intercept = F
221 6
        ))[-1, 2]
222
      }
223

224 6
      Delta <- rowSums(sweep(x_tilde, 2, (gamma - gamma_next), FUN = "*"))
225

226 6
      R.star <- R.star + Delta
227

228
      # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
229
      # update theta (main effect parameters)
230
      # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
231

232 6
      x_tilde_2 <- lapply(
233 6
        seq_along(Phi_j_list),
234 6
        function(i) Phi_j_list[[i]] + gamma_next[i] * betaE * XE_Phi_j_list[[i]]
235
      )
236

237
      # converged_theta <- FALSE
238
      # k <- 1
239
      # while (!converged_theta && k < maxit){
240
      # browser()
241

242 6
      if (any(wj == 0)) {
243 6
        for (j in seq_len(nvars)) {
244 6
          R <- R.star + x_tilde_2[[j]] %*% theta_next[[j]]
245 6
          if (wj[j] != 0) {
246 6
            theta_next_j <- switch(group.penalty,
247 6
              gglasso = coef(gglasso::gglasso(
248 6
                x = x_tilde_2[[j]],
249 6
                y = R,
250
                # eps = 1e-12,
251 6
                maxit = 100000,
252 6
                group = if (expand) rep(1, ncols) else rep(1, ncols[j]),
253 6
                pf = wj[j],
254 6
                lambda = LAMBDA * (1 - alpha),
255 6
                intercept = F
256 6
              ))[-1, ],
257 6
              grMCP = grpreg::grpreg(
258 6
                X = x_tilde_2[[j]],
259 6
                y = R,
260 6
                group = if (expand) rep(1, ncols) else rep(1, ncols[j]),
261 6
                penalty = "grMCP",
262 6
                family = "gaussian",
263 6
                group.multiplier = as.vector(wj[j]),
264 6
                lambda = LAMBDA * (1 - alpha),
265 6
                intercept = T
266 6
              )$beta[-1, ],
267 6
              grSCAD = grpreg::grpreg(
268 6
                X = x_tilde_2[[j]],
269 6
                y = R,
270 6
                group = if (expand) rep(1, ncols) else rep(1, ncols[j]),
271 6
                penalty = "grSCAD",
272 6
                family = "gaussian",
273 6
                group.multiplier = as.vector(wj[j]),
274 6
                lambda = LAMBDA * (1 - alpha),
275 6
                intercept = T
276 6
              )$beta[-1, ]
277
            )
278
          } else {
279 6
            theta_next_j <- stats::lm.fit(x_tilde_2[[j]], R)$coef
280
          }
281

282 6
          Delta <- x_tilde_2[[j]] %*% (theta_next[[j]] - theta_next_j)
283

284 6
          theta_next[[j]] <- theta_next_j
285

286 6
          R.star <- R.star + Delta
287
        }
288
      } else {
289 6
        for (j in seq_len(nvars)) {
290 6
          R <- R.star + x_tilde_2[[j]] %*% theta_next[[j]]
291 6
          theta_next_j <- switch(group.penalty,
292 6
            gglasso = coef(gglasso::gglasso(
293 6
              x = x_tilde_2[[j]],
294 6
              y = R,
295
              # eps = 1e-12,
296 6
              group = if (expand) rep(1, ncols) else rep(1, ncols[j]),
297 6
              pf = wj[j],
298 6
              lambda = LAMBDA * (1 - alpha),
299 6
              intercept = F
300 6
            ))[-1, ],
301 6
            grMCP = grpreg::grpreg(
302 6
              X = x_tilde_2[[j]],
303 6
              y = R,
304 6
              group = if (expand) rep(1, ncols) else rep(1, ncols[j]),
305 6
              penalty = "gel",
306 6
              family = "gaussian",
307 6
              group.multiplier = as.vector(wj[j]),
308 6
              lambda = LAMBDA * (1 - alpha),
309 6
              intercept = T
310 6
            )$beta[-1, ],
311 6
            grSCAD = grpreg::grpreg(
312 6
              X = x_tilde_2[[j]],
313 6
              y = R,
314 6
              group = if (expand) rep(1, ncols) else rep(1, ncols[j]),
315 6
              penalty = "grSCAD",
316 6
              family = "gaussian",
317 6
              group.multiplier = as.vector(wj[j]),
318 6
              lambda = LAMBDA * (1 - alpha),
319 6
              intercept = T
320 6
            )$beta[-1, ]
321
          )
322

323 6
          Delta <- x_tilde_2[[j]] %*% (theta_next[[j]] - theta_next_j)
324

325 6
          theta_next[[j]] <- theta_next_j
326

327 6
          R.star <- R.star + Delta
328
        }
329
      }
330

331
      # used to check convergence
332 6
      theta_next_vec <- do.call(c, theta_next)
333

334
      # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
335
      # update betaE
336
      # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
337

338
      # this can be used for betaE, b0 and gamma update!
339 6
      Phi_tilde_theta <- do.call(
340 6
        cbind,
341 6
        lapply(
342 6
          seq_along(XE_Phi_j_list),
343 6
          function(i) XE_Phi_j_list[[i]] %*% theta_next[[i]]
344
        )
345
      )
346

347 6
      gamma_Phi_tilde_theta_sum <- rowSums(sweep(Phi_tilde_theta, 2, gamma_next, FUN = "*"))
348

349 6
      x_tilde_E <- e + gamma_Phi_tilde_theta_sum
350

351 6
      R <- R.star + betaE * x_tilde_E
352

353 6
      if (we != 0) {
354 6
        betaE_next <- SoftThreshold(
355 6
          x = (1 / (nobs * we)) * sum(x_tilde_E * R),
356 6
          lambda = LAMBDA * (1 - alpha)
357
        )
358
      } else {
359 6
        betaE_next <- sum(x_tilde_E * R) / sum(x_tilde_E^2)
360
      }
361

362 6
      Delta <- (betaE - betaE_next) * x_tilde_E
363

364 6
      R.star <- R.star + Delta
365

366
      # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
367
      # update beta0
368
      # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
369

370 6
      R <- R.star + b0
371 6
      b0_next <- mean(R)
372

373
      # used for gamma update
374 6
      x_tilde <- betaE_next * Phi_tilde_theta
375 6
      add_back <- rowSums(sweep(x_tilde, 2, gamma_next, FUN = "*"))
376

377 6
      Delta <- (b0 - b0_next)
378

379 6
      R.star <- R.star + Delta
380

381 6
      Q[m + 1] <- Q_theta(
382 6
        R = R.star, nobs = nobs, lambda = LAMBDA, alpha = alpha,
383 6
        we = we, wj = wj, wje = wje, betaE = betaE_next,
384 6
        theta_list = theta_next, gamma = gamma_next
385
      )
386

387 6
      Theta_next <- c(b0_next, betaE_next, theta_next_vec, gamma_next)
388

389 6
      criterion <- abs(Q[m] - Q[m + 1]) / abs(Q[m])
390
      # criterion <- l2norm(Theta_next - Theta_init)
391 6
      converged[lambdaIndex] <- criterion < thresh
392 6
      converged[lambdaIndex] <- if (is.na(converged[lambdaIndex])) FALSE else converged[lambdaIndex]
393 6
      if (verbose >= 2) {
394 0
        message(sprintf(
395 0
          "Iteration: %f, Criterion: %f", m, criterion
396
        ))
397
      }
398

399 6
      b0 <- b0_next
400 6
      betaE <- betaE_next
401 6
      theta <- theta_next
402 6
      gamma <- gamma_next
403 6
      Theta_init <- Theta_next
404

405 6
      m <- m + 1
406
    }
407

408

409
    # Store Results -----------------------------------------------------------
410

411 6
    a0[lambdaIndex] <- b0_next
412 6
    environ[lambdaIndex] <- betaE_next
413 6
    betaMat[, lambdaIndex] <- theta_next_vec
414 6
    gammaMat[, lambdaIndex] <- gamma_next
415 6
    alphaMat[, lambdaIndex] <- do.call(c, lapply(seq_along(theta_next), function(i) betaE_next * gamma_next[i] * theta_next[[i]]))
416

417 6
    active[[lambdaIndex]] <- c(
418 6
      unique(gsub("\\_\\d*", "", names(which(abs(betaMat[, lambdaIndex]) > 0)))),
419 6
      unique(gsub("\\_\\d*", "", names(which(abs(alphaMat[, lambdaIndex]) > 0)))),
420 6
      if (abs(environ[lambdaIndex]) > 0) "E"
421
    )
422

423 6
    deviance <- crossprod(R.star)
424 6
    devRatio <- 1 - deviance / nulldev
425 6
    dfbeta <- sum(abs(betaMat[, lambdaIndex]) > 0) / ifelse(expand, ncols, 1)
426 6
    dfalpha <- sum(abs(alphaMat[, lambdaIndex]) > 0) / ifelse(expand, ncols, 1)
427 6
    dfenviron <- sum(abs(environ[lambdaIndex]) > 0)
428

429

430 6
    outPrint[lambdaIndex, ] <- c(
431 6
      if (dfbeta == 0) 0 else dfbeta,
432 6
      if (dfalpha == 0) 0 else dfalpha,
433 6
      if (dfenviron == 0) 0 else dfenviron,
434 6
      deviance, devRatio
435
    )
436

437

438
    # dfmax
439 6
    if (sum(outPrint[lambdaIndex, c("dfBeta", "dfAlpha", "dfEnviron")]) > ne) break
440

441
    # dev.off()
442
    # par(mfrow=c(3,1), mai = c(0.2,0.2,0.2,0.2))
443
    # matplot(t(betaMat), type = "l")
444
    # matplot(t(gammaMat), type = "l")
445
    # matplot(t(alphaMat), type = "l")
446
    # browser()
447
    # devianceDiff <- outPrint[lambdaIndex,"deviance"] - outPrint[lambdaIndex-1,"deviance"]
448 6
    devianceDiff <- (outPrint[lambdaIndex, "percentDev"] - outPrint[lambdaIndex - 1, "percentDev"]) /
449 6
      outPrint[lambdaIndex - 1, "percentDev"]
450 6
    if (length(devianceDiff) != 0 && !is.na(devianceDiff) && devRatio > 1e-3) {
451 6
      if (devianceDiff < fdev | outPrint[lambdaIndex, "percentDev"] > 0.999) break
452
    }
453
    # if (outPrint[LAMBDA,"percentDev"] > 0.999) break #}
454
  }
455

456 6
  beta_final <- methods::as(betaMat, "dgCMatrix")
457 6
  alpha_final <- methods::as(alphaMat, "dgCMatrix")
458 6
  gamma_final <- methods::as(gammaMat, "dgCMatrix") # used for KKT check
459

460

461
  # browser()
462

463 0
  if (all(!converged)) warning("The algorithm did not converge for all values of lambda.\n
464 0
                               Try changing the value of alpha and the convergence threshold.")
465

466 6
  lambdas[1] <- lambda_max
467

468 6
  out <- list(
469 6
    a0 = a0[converged],
470 6
    beta = beta_final[, converged, drop = FALSE],
471 6
    alpha = alpha_final[, converged, drop = FALSE],
472 6
    gamma = gamma_final[, converged, drop = FALSE],
473 6
    bE = environ[converged],
474 6
    active = active[converged],
475 6
    lambda = lambdas[converged],
476 6
    lambda2 = alpha,
477 6
    dfbeta = outPrint[converged, "dfBeta"],
478 6
    dfalpha = outPrint[converged, "dfAlpha"],
479 6
    dfenviron = outPrint[converged, "dfEnviron"],
480 6
    dev.ratio = outPrint[converged, "percentDev"],
481 6
    converged = converged,
482 6
    nlambda = sum(converged),
483 6
    design = design,
484
    # we = we,
485
    # wj = wj,
486
    # wje = wje,
487
    # Phi_j_list = Phi_j_list,
488
    # XE_Phi_j_list = XE_Phi_j_list,
489
    # Phi_j = Phi_j,
490
    # XE_Phi_j = XE_Phi_j,
491
    # x = x,
492
    # e = e,
493
    # y = y,
494 6
    nobs = nobs,
495 6
    nvars = nvars,
496 6
    vnames = vnames,
497 6
    ncols = ncols,
498 6
    center.x = center.x,
499 6
    center.e = center.e,
500 6
    basis = basis,
501 6
    expand = expand,
502 6
    group = group,
503 6
    interaction.names = interaction_names,
504 6
    main.effect.names = main_effect_names
505
  )
506 6
  class(out) <- "lspath"
507 6
  return(out)
508
}

Read our documentation on viewing source code .

Loading