1
|
|
#' Estimation of Lambda Sequence for Linear Mixed Model with Lasso Penalty
|
2
|
|
#'
|
3
|
|
#' @description \code{lambdalasso} estimates a decreasing sequence of tuning
|
4
|
|
#' parameters
|
5
|
|
#'
|
6
|
|
#' @seealso \code{\link{ggmix}}
|
7
|
|
#' @param ggmix_object A ggmix_object object of class \code{lowrank} or
|
8
|
|
#' \code{fullrank}
|
9
|
|
#' @inheritParams ggmix
|
10
|
|
#' @param scale_x should the columns of x be scaled - default is FALSE
|
11
|
|
#' @param center_y should y be mean centered - default is FALSE.
|
12
|
|
#' @param tol.kkt KKT tolerance. Currently ignored
|
13
|
|
#' @param ... Extra parameters. Currently ignored.
|
14
|
|
#' @note This function isn't meant to be called directly by the user.
|
15
|
|
#' @return A decreasing sequence of tuning parameters
|
16
|
4
|
lambdalasso <- function(ggmix_object, ...) UseMethod("lambdalasso")
|
17
|
|
|
18
|
|
#' @rdname lambdalasso
|
19
|
|
lambdalasso.default <- function(ggmix_object, ...) {
|
20
|
0
|
stop(strwrap("This function should be used with a ggmix object of class
|
21
|
0
|
lowrank or fullrank"))
|
22
|
|
}
|
23
|
|
|
24
|
|
#' @rdname lambdalasso
|
25
|
|
lambdalasso.fullrank <- function(ggmix_object,
|
26
|
|
...,
|
27
|
|
penalty.factor,
|
28
|
|
lambda_min_ratio,
|
29
|
|
epsilon = 1e-14,
|
30
|
|
tol.kkt = 1e-9,
|
31
|
|
eta_init = 0.5,
|
32
|
|
nlambda = 100, scale_x = F, center_y = F) {
|
33
|
4
|
utx <- ggmix_object[["x"]]
|
34
|
4
|
uty <- ggmix_object[["y"]]
|
35
|
4
|
eigenvalues <- ggmix_object[["D"]]
|
36
|
|
|
37
|
4
|
np <- dim(utx)
|
38
|
4
|
n <- np[[1]]
|
39
|
|
|
40
|
|
# assuming the first column is the intercept, so we subtract 1
|
41
|
4
|
p <- np[[2]] - 1
|
42
|
|
|
43
|
|
# weights
|
44
|
4
|
di <- 1 + eta_init * (eigenvalues - 1)
|
45
|
|
|
46
|
|
# initial value for beta0
|
47
|
4
|
beta0_init <- (sum(utx[, 1] * uty / di)) / (sum(utx[, 1]^2 / di))
|
48
|
|
|
49
|
|
# this includes all other betas which are 0 by definition
|
50
|
4
|
beta_init <- as.matrix(c(beta0_init, rep(0, p)))
|
51
|
|
|
52
|
|
# sum is faster
|
53
|
|
# microbenchmark::microbenchmark(
|
54
|
|
# mat = drop((t(utx0) %*% di_inverse %*% uty) / (t(utx0) %*% di_inverse %*% utx0)),
|
55
|
|
# sum = (sum(utx0 * uty / di)) / (sum(utx0 ^ 2 / di)), times = 1000
|
56
|
|
# )
|
57
|
|
|
58
|
|
# closed form for sigma^2
|
59
|
4
|
sigma2_init <- (1 / n) * sum((uty - utx %*% beta_init)^2 / di)
|
60
|
|
|
61
|
|
# sum version is faster
|
62
|
|
# mb <- microbenchmark(
|
63
|
|
# mat = (1 / n) * t(uty - beta0_init * utx0) %*% di_inverse %*% (uty - beta0_init * utx0),
|
64
|
|
# sum = (1 / n) * sum ((uty - beta0_init * utx0)^2 / (1 + eta_init * (eigenvalues - 1))),
|
65
|
|
# times = 1000)
|
66
|
|
# ggplot2::autoplot(mb)
|
67
|
|
|
68
|
|
# iteration counter
|
69
|
4
|
k <- 0
|
70
|
|
|
71
|
|
# to enter while loop
|
72
|
4
|
converged <- FALSE
|
73
|
|
|
74
|
4
|
while (!converged) {
|
75
|
4
|
Theta_init <- c(beta_init, eta_init, sigma2_init)
|
76
|
|
|
77
|
|
# fit eta
|
78
|
4
|
eta_next <- stats::optim(
|
79
|
4
|
par = eta_init,
|
80
|
4
|
fn = fn_eta_lasso_fullrank,
|
81
|
4
|
gr = gr_eta_lasso_fullrank,
|
82
|
4
|
method = "L-BFGS-B",
|
83
|
4
|
control = list(fnscale = 1),
|
84
|
4
|
lower = .01,
|
85
|
4
|
upper = .99,
|
86
|
4
|
sigma2 = sigma2_init,
|
87
|
4
|
beta = beta_init,
|
88
|
4
|
eigenvalues = eigenvalues,
|
89
|
4
|
x = utx,
|
90
|
4
|
y = uty,
|
91
|
4
|
nt = n
|
92
|
4
|
)$par
|
93
|
|
|
94
|
|
# weights
|
95
|
4
|
di <- 1 + eta_next * (eigenvalues - 1)
|
96
|
|
|
97
|
|
# next value for beta0
|
98
|
4
|
beta0_next <- (sum(utx[, 1] * uty / di)) / (sum(utx[, 1]^2 / di))
|
99
|
|
|
100
|
4
|
beta_next <- as.matrix(c(beta0_next, rep(0, p)))
|
101
|
|
|
102
|
|
# closed form for sigma^2
|
103
|
4
|
sigma2_next <- (1 / n) * sum((uty - utx %*% beta_next)^2 / di)
|
104
|
|
|
105
|
4
|
k <- k + 1
|
106
|
|
|
107
|
4
|
Theta_next <- c(beta_next, eta_next, sigma2_next)
|
108
|
|
|
109
|
4
|
converged <- crossprod(Theta_next - Theta_init) < epsilon
|
110
|
|
|
111
|
|
# message(sprintf("l2 norm squared of Theta_k+1 - Theta_k: %f \n log-lik: %f",
|
112
|
|
# crossprod(Theta_next - Theta_init),
|
113
|
|
# log_lik(eta = eta_next, sigma2 = sigma2_next, beta = beta_next,
|
114
|
|
# eigenvalues = eigenvalues,
|
115
|
|
# x = utx, y = uty, nt = n)))
|
116
|
|
|
117
|
4
|
beta0_init <- beta0_next
|
118
|
4
|
beta_init <- beta_next
|
119
|
4
|
eta_init <- eta_next
|
120
|
4
|
sigma2_init <- sigma2_next
|
121
|
|
}
|
122
|
|
|
123
|
|
# eta_next
|
124
|
|
# sigma2_next
|
125
|
4
|
di <- 1 + eta_next * (eigenvalues - 1)
|
126
|
4
|
wi <- (1 / sigma2_next) * (1 / di)
|
127
|
0
|
if (any(wi < 0)) stop("weights are negative")
|
128
|
|
|
129
|
|
# scale the weights to sum to nvars
|
130
|
|
# wi_scaled <- as.vector(wi) / sum(as.vector(wi)) * n
|
131
|
|
|
132
|
|
# wi_scaled <- as.vector(wi) * n
|
133
|
|
|
134
|
|
# lambda.max <- max(abs(colSums((wi * utx[,-1]) * drop(uty - utx %*% beta_next))))
|
135
|
|
|
136
|
|
# this gives the same answer (see paper for details)
|
137
|
|
# we divide by sum(wi) here and not in glmnet because the sequence is determined
|
138
|
|
# on the log scale
|
139
|
|
# lambda.max <- max(abs(colSums(((1 / sum(wi_scaled)) * (wi_scaled * utx[,-1]) * drop(uty - utx %*% beta_next)))))
|
140
|
|
# browser()
|
141
|
4
|
lambdas <- (1 / penalty.factor) *
|
142
|
4
|
abs(colSums(((1 / sum(wi)) * (wi * utx[, -1]) *
|
143
|
4
|
drop(uty - utx %*% beta_next))))
|
144
|
|
# need to check for Inf, in case some penalty factors are 0
|
145
|
4
|
lambda.max <- max(lambdas[lambdas != Inf])
|
146
|
|
# lambda.max <- lambda.max * sum(wi)
|
147
|
|
# (utx[,-1, drop = F]) %>% dim
|
148
|
|
# a <- colSums(utx[,-1, drop = F]^2 * wi)
|
149
|
|
# b <- colSums(sweep(utx[,-1, drop = F]^2, MARGIN = 1, wi, '*'))
|
150
|
|
# all(a == b)
|
151
|
|
|
152
|
|
# kkt <- kkt_check(eta = eta_next, sigma2 = sigma2_next, beta = beta_next,
|
153
|
|
# eigenvalues = eigenvalues, x = utx, y = uty, nt = n,
|
154
|
|
# lambda = lambda.max, tol.kkt = tol.kkt)
|
155
|
|
# message(kkt)
|
156
|
4
|
out <- list(
|
157
|
4
|
sequence = rev(exp(seq(log(lambda_min_ratio * lambda.max),
|
158
|
4
|
log(lambda.max),
|
159
|
4
|
length.out = nlambda
|
160
|
|
))),
|
161
|
4
|
eta = eta_next, sigma2 = sigma2_next, beta0 = beta0_next
|
162
|
|
)
|
163
|
|
|
164
|
4
|
return(out)
|
165
|
|
}
|
166
|
|
|
167
|
|
|
168
|
|
# still need to create lambdalasso.lowrank, lambdagglasso.fullrank, lambdagglasso.lowrank
|