-
Notifications
You must be signed in to change notification settings - Fork 1
/
use_case_lasso.Rmd
137 lines (123 loc) · 4.61 KB
/
use_case_lasso.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
---
title: "Lasso"
---
The [least absolute shrinkage and selection operator (LASSO)](https://en.wikipedia.org/wiki/Lasso_(statistics))
shrinks the regression coefficients by adding a $l_1-norm$ penalty term to the optimization problem.
More information about LASSO can be found in e.g., [Tibshirani (1996)](#TIBSHIRANI).
For the ordinary least squares problem with $l_1$ regularization this leads to the following optimization
problem
$$
\underset{\beta}{\text{minimize}} ~~ \frac{1}{2} || y - X \beta ||_2^2 + \lambda ||\beta||_1
$$
with $X \in \mathbb{R}^{m \times n}$, $y \in \mathbb{R}^{m}$, $\beta \in \mathbb{R}^n$
and $0 < \lambda \in \mathbb{R}$.
The problem can be rewritten into a quadratic optimization problem
$$
\begin{array}{rl}
\underset{(\beta, \gamma, t)}{\text{minimize}} &
\frac{1}{2} \gamma^\top \gamma + \lambda \boldsymbol{1}^\top t \\
\text{subject to} &
y - X \beta = \gamma \\
& -t \leq \beta \leq t
\end{array}
$$
with $\gamma \in \mathbb{R}^n$, $t \in \mathbb{R}^n$
Alternatively the problem can be written as a second order cone problem
$$
\begin{array}{rl}
\underset{(\beta, t, u)}{\text{minimize}} &
\frac{1}{2} u + \lambda \boldsymbol{1}^\top t \\
\text{subject to} &
||(1 - u, ~ 2 y - 2 X \beta)||_2 \leq 1 + u \\
& -t \leq \beta \leq t
\end{array}
$$
with $u \in \mathbb{R}$.
</br>
In **R** package [**glmnet**](https://cran.r-project.org/package=glmnet) is typically the method of choice to obtain the LASSO estimate.
```{r, use_case_lasso_glmnet, message = FALSE}
library(glmnet)
data(QuickStartExample)
lambda <- 1
mo <- glmnet(x, y, family = "gaussian", alpha = 1, lambda = lambda,
intercept = FALSE, standardize = FALSE)
glmnet_beta <- setNames(as.vector(coef(mo)), rownames(coef(mo)))
round(glmnet_beta, 4)
```
## Estimation QP
```{r, message = FALSE}
library(slam)
Sys.setenv(ROI_LOAD_PLUGINS = FALSE)
library(ROI)
library(ROI.plugin.qpoases)
library(ROI.plugin.ecos)
dbind <- function(...) {
.dbind <- function(x, y) {
A <- simple_triplet_zero_matrix(NROW(x), NCOL(y))
B <- simple_triplet_zero_matrix(NROW(y), NCOL(x))
rbind(cbind(x, A), cbind(B, y))
}
Reduce(.dbind, list(...))
}
```
```{r}
qp_lasso <- function(x, y, lambda) {
stzm <- simple_triplet_zero_matrix
stdm <- simple_triplet_diag_matrix
m <- NROW(x); n <- NCOL(x)
Q0 <- dbind(stzm(n), stdm(1, m), stzm(n))
a0 <- c(b = double(n), g = double(m), t = lambda * rep(1, n))
op <- OP(objective = Q_objective(Q = Q0, L = a0))
## y - X %*% beta = gamma <=> X %*% beta + gamma = y
A1 <- cbind(x, stdm(1, m), stzm(m, n))
LC1 <- L_constraint(A1, eq(m), y)
## -t <= beta <=> 0 <= beta + t
A2 <- cbind(stdm(1, n), stzm(n, m), stdm(1, n))
LC2 <- L_constraint(A2, geq(n), double(n))
## beta <= t <=> beta - t <= 0
A3 <- cbind(stdm(1, n), stzm(n, m), stdm(-1, n))
LC3 <- L_constraint(A3, leq(n), double(n))
constraints(op) <- rbind(LC1, LC2, LC3)
bounds(op) <- V_bound(ld = -Inf, nobj = ncol(Q0))
op
}
op <- qp_lasso(x, y, 0)
(qp0 <- ROI_solve(op, "qpoases"))
op <- qp_lasso(x, y, lambda * NROW(x))
(qp1 <- ROI_solve(op, "qpoases"))
```
## Estimation CP
```{r}
cp_lasso <- function(x, y, lambda) {
stm <- simple_triplet_matrix
stzm <- simple_triplet_zero_matrix
stdm <- simple_triplet_diag_matrix
m <- NROW(x); n <- NCOL(x); nobj <- 2 * n + 1
op <- OP(c(beta = double(n), t = lambda * rep(1, n), u = 0.5))
## ||(1 - u, 2 y - 2 X %*% beta)||_2 <= 1 + u
A1 <- rbind(stm(1, nobj, -1), stm(1, nobj, 1), cbind(2 * x, stzm(m, n + 1)))
LC1 <- C_constraint(A1, K_soc(m + 2), rhs = c(1, 1, 2 * y))
## -t <= z <=> 0 <= z + t
A2 <- cbind(stdm(1, n), stdm(1, n), stzm(n, 1))
## z <= t <=> z - t <= 0
A3 <- cbind(stdm(1, n), stdm(-1, n), stzm(n, 1))
LC2 <- L_constraint(rbind(-A2, A3), leq(2 * n), double(2 * n))
constraints(op) <- rbind(LC2, LC1)
bounds(op) <- V_bound(ld = -Inf, nobj = nobj)
op
}
op <- cp_lasso(x, y, 0)
(cp0 <- ROI_solve(op, "ecos"))
op <- cp_lasso(x, y, lambda * NROW(x))
(cp1 <- ROI_solve(op, "ecos"))
```
## Comparison
```{r}
n <- ncol(x)
cbind(lm = coef(lm.fit(x, y)), qp = head(solution(qp0), n),
cp = head(solution(cp0), n))
cbind(lm = round(glmnet_beta, 4), qp = round(c(0, head(solution(qp1), n)), 4),
cp = round(c(0, head(solution(cp1), n)), 4))
```
# References
* Tibshirani, Robert (1996), Regression Shrinkage and Selection Via the Lasso. Journal of the Royal Statistical Society: Series B (Methodological), 58: 267-288. URL [`doi:10.1111/j.2517-6161.1996.tb02080.x`](https://rss.onlinelibrary.wiley.com/doi/abs/10.1111/j.2517-6161.1996.tb02080.x) <a name = "TIBSHIRANI"></a>