forked from akyrola/shotgun
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlasso.cpp
209 lines (174 loc) · 7.23 KB
/
lasso.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
/*
Copyright [2011] [Aapo Kyrola, Joseph Bradley, Danny Bickson, Carlos Guestrin / Carnegie Mellon University]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF Alassoprob->ny KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Optimization problem
// \arg \min_x ||Ax-y|| + \lambda |x|_1
//
#include "common.h"
// Problem definition
shotgun_data * lassoprob;
// Major optimization: always keep updated vector 'Ax'
void initialize_feature(int feat_idx) {
sparse_array& col = (*lassoprob->A_cols)[feat_idx];
feature& feat = lassoprob->feature_consts[feat_idx];
lassoprob->x[feat_idx] = 0.0;
// Precompute covariance of a feature
feat.covar = 0;
for(int i=0; i<col.length(); i++) {
feat.covar += sqr(col.values[i]);
}
feat.covar *= 2;
// Precompute (Ay)_i
feat.Ay_i = 0.0;
for(int i=0; i<col.length(); i++) {
feat.Ay_i += col.values[i]*lassoprob->y[col.idxs[i]];
}
feat.Ay_i *= 2;
}
void initialize() {
lassoprob->feature_consts.reserve(lassoprob->nx);
lassoprob->x.resize(lassoprob->nx);
lassoprob->Ax.resize(lassoprob->ny);
#pragma omp for
for(int i=0; i<lassoprob->nx; i++) {
initialize_feature(i);
}
}
valuetype_t soft_thresholdO(valuetype_t _lambda, valuetype_t shootDiff) {
return (shootDiff > _lambda)* (_lambda - shootDiff) +
(shootDiff < -_lambda) * (-_lambda- shootDiff) ;
}
valuetype_t soft_threshold(valuetype_t _lambda, valuetype_t shootDiff) {
if (shootDiff > _lambda) return _lambda - shootDiff;
if (shootDiff < -_lambda) return -_lambda - shootDiff ;
else return 0;
}
double shoot(int x_i, valuetype_t lambda) {
feature& feat = lassoprob->feature_consts[x_i];
valuetype_t oldvalue = lassoprob->x[x_i];
if (feat.covar == 0.0) return 0.0; // Zero-column
// Compute dotproduct A'_i*(Ax)
valuetype_t AtAxj = 0.0;
sparse_array& col = (*lassoprob->A_cols)[x_i];
int len=col.length();
for(int i=0; i<len; i++) {
AtAxj += col.values[i] * lassoprob->Ax[col.idxs[i]];
}
valuetype_t S_j = 2 * AtAxj - feat.covar * oldvalue - feat.Ay_i;
valuetype_t newvalue = soft_threshold(lambda,S_j)/feat.covar;
valuetype_t delta = (newvalue - oldvalue);
// Update ax
if (delta != 0.0) {
for(int i=0; i<len; i++) {
lassoprob->Ax.add(col.idxs[i], col.values[i] * delta);
}
lassoprob->x[x_i] = newvalue;
}
return std::abs(delta);
}
// Find such lambda that if used for regularization,
// optimum would have all weights zero.
valuetype_t compute_max_lambda() {
valuetype_t maxlambda = 0.0;
for(int i=0; i<lassoprob->nx; i++) {
maxlambda = std::max(maxlambda, std::abs(lassoprob->feature_consts[i].Ay_i));
}
return maxlambda;
}
valuetype_t get_term_threshold(int k, int K, double delta_threshold) {
// Stricter termination threshold for last step in the optimization.
return (k == 0 ? delta_threshold : (delta_threshold + k*(delta_threshold*50)/K));
}
valuetype_t compute_objective(valuetype_t _lambda, std::vector<valuetype_t>& x, double & l0x, valuetype_t * l1x = NULL, valuetype_t * l2err = NULL) {
double least_sqr = 0;
for(int i=0; i<lassoprob->ny; i++) {
least_sqr += (lassoprob->Ax[i]-lassoprob->y[i])*(lassoprob->Ax[i]-lassoprob->y[i]);
}
// Penalty check for feature 0
double penalty = 0.0;
for(int i=0; i<lassoprob->nx; i++) {
penalty += std::abs(lassoprob->x[i]);
l0x += (lassoprob->x[i] == 0);
}
if (l1x != NULL) *l1x = penalty;
if (l2err != NULL) *l2err = least_sqr;
return penalty * _lambda + least_sqr;
}
void main_optimization_loop(double lambda, int regpathlength, double threshold, int maxiter, int verbose) {
// Empirically found heuristic for deciding how malassoprob->ny steps
// to take on the regularization path
int regularization_path_length = (regpathlength <= 0 ? 1+(int)(lassoprob->nx/2000) : regpathlength);
omp_set_lock(lassoprob->lock);
mexPrintf("regularization_path_length = %d\n",regularization_path_length);
omp_unset_lock(lassoprob->lock);
valuetype_t lambda_max = compute_max_lambda();
valuetype_t lambda_min = lambda;
valuetype_t alpha = pow(lambda_max/lambda_min, 1.0/(1.0*regularization_path_length));
int regularization_path_step = regularization_path_length;
double delta_threshold = threshold;
long long int num_of_shoots = 0;
int counter = 0;
int iterations = 0;
double *delta = new double[lassoprob->nx];
do {
++iterations;
if (iterations >= maxiter && maxiter > 0) {
omp_set_lock(lassoprob->lock);
mexPrintf("Exceeded max iterations: %d", maxiter);
omp_unset_lock(lassoprob->lock);
return;
}
double maxChange=0;
lambda = lambda_min * pow(alpha, regularization_path_step);
// Parallel loop
if (lassoprob->parallel) {
#pragma omp parallel for
for(int i=0; i<lassoprob->nx; i++)
delta[i] = shoot(i, lambda);
} else {
for(int i=0; i<lassoprob->nx; i++)
delta[i] = shoot(i, lambda);
}
maxChange = 0;
// TODO: use OpenMP reductions (although - not much to gain)
for(int i=0; i<lassoprob->nx; i++)
maxChange = (maxChange < delta[i] ? delta[i] : maxChange);
num_of_shoots += lassoprob->nx;
counter++;
// Convergence check.
// We use a simple trick to converge faster for the intermediate sub-optimization problems
// on the regularization path. This is important because we do not care about accuracy for
// the intermediate problems, just want a good warm start for next round.
bool converged = (maxChange <= get_term_threshold(regularization_path_step,regularization_path_length,delta_threshold));
if (converged || counter>std::min(100, (100-regularization_path_step)*2)) {
counter = 0;
regularization_path_step--;
}
if (verbose){
double l1x = 0, l2err = 0, l0x = 0;
valuetype_t obj = compute_objective(lambda, lassoprob->x, l0x, &l1x, &l2err);
omp_set_lock(lassoprob->lock);
mexPrintf("Objective: %g L1 : %g L2err: %g L0 %d\n", obj, l1x, l2err, (int)l0x);
omp_unset_lock(lassoprob->lock);
}
} while (regularization_path_step >= 0);
delete[] delta;
omp_set_lock(lassoprob->lock);
mexPrintf("Num of shoots = %lld\n", num_of_shoots);
omp_unset_lock(lassoprob->lock);
}
double solveLasso(shotgun_data * probdef, double lambda, int regpathlength, double threshold, int maxiter, int verbose) {
lassoprob = probdef;
initialize();
main_optimization_loop(lambda, regpathlength, threshold, maxiter, verbose);
return 0;
}