|
1 /*! |
|
2 Iterative algorithms for solving the finite-dimensional subproblem with constraint. |
|
3 */ |
|
4 |
|
5 use nalgebra::DVector; |
|
6 use numeric_literals::replace_float_literals; |
|
7 use itertools::izip; |
|
8 //use std::iter::zip; |
|
9 use std::cmp::Ordering::*; |
|
10 |
|
11 use alg_tools::iterate::{ |
|
12 AlgIteratorFactory, |
|
13 AlgIteratorState, |
|
14 }; |
|
15 use alg_tools::nalgebra_support::ToNalgebraRealField; |
|
16 use alg_tools::norms::{Dist, L1}; |
|
17 use alg_tools::nanleast::NaNLeast; |
|
18 |
|
19 use crate::types::*; |
|
20 use super::{ |
|
21 InnerMethod, |
|
22 InnerSettings |
|
23 }; |
|
24 use super::nonneg::nonneg_soft_thresholding; |
|
25 use super::l1squared_unconstrained::l1squared_prox; |
|
26 |
|
27 /// Return maximum of `dist` and distnce of inteval `[lb, ub]` to zero. |
|
28 #[replace_float_literals(F::cast_from(literal))] |
|
29 pub(super) fn max_interval_dist_to_zero<F : Float>(dist : F, lb : F, ub : F) -> F { |
|
30 if lb < 0.0 { |
|
31 if ub > 0.0 { |
|
32 dist |
|
33 } else { |
|
34 dist.max(-ub) |
|
35 } |
|
36 } else /* ub ≥ 0.0*/ { |
|
37 dist.max(lb) |
|
38 } |
|
39 } |
|
40 |
|
41 /// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁ +δ_{≥}(x)$ at $x$. |
|
42 /// |
|
43 /// `v` will be modified and cannot be trusted to contain useful values afterwards. |
|
44 #[replace_float_literals(F::cast_from(literal))] |
|
45 fn min_subdifferential<F : Float + nalgebra::RealField>( |
|
46 y : &DVector<F>, |
|
47 x : &DVector<F>, |
|
48 g : &DVector<F>, |
|
49 λ : F, |
|
50 β : F |
|
51 ) -> F { |
|
52 let mut val = 0.0; |
|
53 let tmp = β*y.dist(x, L1); |
|
54 for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) { |
|
55 let (mut lb, mut ub) = (-g_i, -g_i); |
|
56 match x_i.partial_cmp(y_i) { |
|
57 Some(Greater) => { lb += tmp; ub += tmp }, |
|
58 Some(Less) => { lb -= tmp; ub -= tmp }, |
|
59 Some(Equal) => { lb -= tmp; ub += tmp }, |
|
60 None => {}, |
|
61 } |
|
62 match x_i.partial_cmp(&0.0) { |
|
63 Some(Greater) => { lb += λ; ub += λ }, |
|
64 // Less should not happen |
|
65 Some(Less|Equal) => { lb = F::NEG_INFINITY; ub += λ }, |
|
66 None => {}, |
|
67 }; |
|
68 val = max_interval_dist_to_zero(val, lb, ub); |
|
69 } |
|
70 val |
|
71 } |
|
72 |
|
73 #[replace_float_literals(F::cast_from(literal))] |
|
74 fn lbd_soft_thresholding<F : Float>(v : F, λ : F, b : F) -> F |
|
75 { |
|
76 match (b >= 0.0, v >= b) { |
|
77 (true, false) => b, |
|
78 (true, true) => b.max(v - λ), // soft-to-b from above |
|
79 (false, true) => super::unconstrained::soft_thresholding(v, λ), |
|
80 (false, false) => 0.0.min(b.max(v - λ)), // soft-to-0 with lower bound |
|
81 } |
|
82 } |
|
83 |
|
84 /// Calculate $prox_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}_1^2 + δ_{≥0}(x)$. |
|
85 /// |
|
86 /// To derive an algorithm for this, we can use |
|
87 /// $prox_f(x) = prox_{f_0}(x - y) - y$ for |
|
88 /// $f_0(z)=\frac{β}{2}\norm{z}_1^2 + δ_{≥-y}(z)$. |
|
89 /// Now, the optimality conditions for $w = prox_{f_0}(x)$ are |
|
90 /// $$\tag{*} |
|
91 /// x ∈ w + β\norm{w}_1\sign w + N_{≥ -y}(w). |
|
92 /// $$ |
|
93 /// If we know $\norm{w}_1$, then this is easily solved by lower-bounded soft-thresholding. |
|
94 /// We find this by sorting the elements by the distance to the 'locked' lower-bounded |
|
95 /// soft-thresholding target ($0$ or $-y_i$). |
|
96 /// Then we loop over this sorted vector, increasing our estimate of $\norm{w}_1$ as we decide |
|
97 /// that the soft-thresholding parameter `β\norm{w}_1` has to be such that the passed elements |
|
98 /// will reach their locked value (after which they cannot change anymore, for a larger |
|
99 /// soft-thresholding parameter. This has to be slightly more fine-grained for account |
|
100 /// for the case that $-y_i<0$ and $x_i < -y_i$. |
|
101 /// |
|
102 /// Indeed, we denote by x' and w' the subset of elements such that w_i ≠ 0 and w_i > -y_i, |
|
103 /// we can calculate by applying $⟨\cdot, \sign w'⟩$ to the corresponding lines of (*) that |
|
104 /// $$ |
|
105 /// \norm{x'} = \norm{w'} + β \norm{w}_1 m. |
|
106 /// $$ |
|
107 /// Having a value for $t = \norm{w}-\norm{w'}$, we can then calculate |
|
108 /// $$ |
|
109 /// \norm{x'} - t = (1+β m)\norm{w}_1, |
|
110 /// $$ |
|
111 /// from where we can calculate the soft-thresholding parameter $λ=β\norm{w}_1$. |
|
112 /// Since we do not actually know the unlocked elements, but just loop over all the possibilities |
|
113 /// for them, we have to check that $λ$ is above the current lower bound for this parameter |
|
114 /// (`shift` in the code), and below a value that would cause changes in the locked set |
|
115 /// (`max_shift` in the code). |
|
116 #[replace_float_literals(F::cast_from(literal))] |
|
117 pub(super) fn l1squared_nonneg_prox<F :Float + nalgebra::RealField>( |
|
118 sorted : &mut Vec<(F, F, F, Option<(F, F)>)>, |
|
119 x : &mut DVector<F>, |
|
120 y : &DVector<F>, |
|
121 β : F |
|
122 ) { |
|
123 // nalgebra double-definition bullshit workaround |
|
124 //let max = alg_tools::NumTraitsFloat::max; |
|
125 let abs = alg_tools::NumTraitsFloat::abs; |
|
126 |
|
127 *x -= y; |
|
128 |
|
129 for (az_x_i, &x_i, &y_i) in izip!(sorted.iter_mut(), x.iter(), y.iter()) { |
|
130 // The first component of each az_x_i contains the distance of x_i to the |
|
131 // soft-thresholding limit. If it is negative, it is always reached. |
|
132 // The second component contains the absolute value of the result for that component |
|
133 // w_i of the solution, if the soft-thresholding limit is reached. |
|
134 // This is stored here due to the sorting, although could otherwise be computed directly. |
|
135 // Likewise the third component contains the absolute value of x_i. |
|
136 // The fourth component contains an initial lower bound. |
|
137 let a_i = abs(x_i); |
|
138 let b = -y_i; |
|
139 *az_x_i = match (b >= 0.0, x_i >= b) { |
|
140 (true, false) => (x_i-b, b, a_i, None), // w_i=b, so sorting element negative! |
|
141 (true, true) => (x_i-b, b, a_i, None), // soft-to-b from above |
|
142 (false, true) => (a_i, 0.0, a_i, None), // soft-to-0 |
|
143 (false, false) => (a_i, 0.0, a_i, Some((b, b-x_i))), // soft-to-0 with initial limit |
|
144 }; |
|
145 } |
|
146 sorted.as_mut_slice() |
|
147 .sort_unstable_by(|(a, _, _, _), (b, _, _, _)| NaNLeast(*a).cmp(&NaNLeast(*b))); |
|
148 |
|
149 let mut nwlow = 0.0; |
|
150 let mut shift = 0.0; |
|
151 // This main loop is over different combinations of elements of the solution locked |
|
152 // to the soft-thresholding lower bound (`0` or `-y_i`), in the sorted order of locking. |
|
153 for (i, az_x_i) in izip!(0.., sorted.iter()) { |
|
154 // This 'attempt is over different combinations of elements locked to the |
|
155 // lower bound (`-y_i ≤ 0`). It calculates `max_shift` as the maximum shift that |
|
156 // can be done until the locking would change (or become non-strictly-complementary). |
|
157 // If the main rule (*) gives an estimate of `λ` that stays below `max_shift`, it is |
|
158 // accepted. Otherwise `shift` is updated to `max_shift`, and we attempt again, |
|
159 // with the non-locking set that participates in the calculation of `λ` then including |
|
160 // the elements that are no longer locked to the lower bound. |
|
161 'attempt: loop { |
|
162 let mut nwthis = 0.0; // contribution to ‖w‖ from elements with locking |
|
163 //soft-thresholding parameter = `shift` |
|
164 let mut nxmore = 0.0; // ‖x'‖ for those elements through to not be locked to |
|
165 // neither the soft-thresholding limit, nor the lower bound |
|
166 let mut nwlbd = 0.0; // contribution to ‖w‖ from those elements locked to their |
|
167 // lower bound |
|
168 let mut m = 0; |
|
169 let mut max_shift = F::INFINITY; // maximal shift for which our estimate of the set of |
|
170 // unlocked elements is valid. |
|
171 let mut max_shift_from_lbd = false; // Whether max_shift comes from the next element |
|
172 // or from a lower bound being reached. |
|
173 for az_x_j in sorted[i as usize..].iter() { |
|
174 if az_x_j.0 <= shift { |
|
175 nwthis += az_x_j.1; |
|
176 } else { |
|
177 match az_x_j.3 { |
|
178 Some((l, s)) if shift < s => { |
|
179 if max_shift > s { |
|
180 max_shift_from_lbd = true; |
|
181 max_shift = s; |
|
182 } |
|
183 nwlbd += -l; |
|
184 }, |
|
185 _ => { |
|
186 nxmore += az_x_j.2; |
|
187 if m == 0 && max_shift > az_x_j.0 { |
|
188 max_shift = az_x_j.0; |
|
189 max_shift_from_lbd = false; |
|
190 } |
|
191 m += 1; |
|
192 } |
|
193 } |
|
194 } |
|
195 } |
|
196 |
|
197 // We need ‖x'‖ = ‖w'‖ + β m ‖w‖, i.e. ‖x'‖ - (‖w‖-‖w'‖)= (1 + β m)‖w‖. |
|
198 let tmp = β*(nxmore - (nwlow + nwthis + nwlbd))/(1.0 + β*F::cast_from(m)); |
|
199 if tmp > max_shift { |
|
200 if max_shift_from_lbd { |
|
201 shift = max_shift; |
|
202 continue 'attempt; |
|
203 } else { |
|
204 break 'attempt |
|
205 } |
|
206 } else if tmp < shift { |
|
207 // TODO: this should probably be an assert!(false) |
|
208 break 'attempt; |
|
209 } else { |
|
210 // success |
|
211 x.zip_apply(y, |x_i, y_i| *x_i = y_i + lbd_soft_thresholding(*x_i, tmp, -y_i)); |
|
212 return |
|
213 } |
|
214 } |
|
215 shift = az_x_i.0; |
|
216 nwlow += az_x_i.1; |
|
217 } |
|
218 // TODO: this fallback has not been verified to be correct |
|
219 x.zip_apply(y, |x_i, y_i| *x_i = y_i + lbd_soft_thresholding(*x_i, shift, -y_i)); |
|
220 } |
|
221 |
|
222 /// Proximal point method implementation of [`l1squared_nonneg`]. |
|
223 /// For detailed documentation of the inputs and outputs, refer to there. |
|
224 /// |
|
225 /// The `λ` component of the model is handled in the proximal step instead of the gradient step |
|
226 /// for potential performance improvements. |
|
227 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] |
|
228 pub fn l1squared_nonneg_pp<F, I>( |
|
229 y : &DVector<F::MixedType>, |
|
230 g : &DVector<F::MixedType>, |
|
231 λ_ : F, |
|
232 β_ : F, |
|
233 x : &mut DVector<F::MixedType>, |
|
234 τ_ : F, |
|
235 θ_ : F, |
|
236 iterator : I |
|
237 ) -> usize |
|
238 where F : Float + ToNalgebraRealField, |
|
239 I : AlgIteratorFactory<F> |
|
240 { |
|
241 let λ = λ_.to_nalgebra_mixed(); |
|
242 let β = β_.to_nalgebra_mixed(); |
|
243 let mut τ = τ_.to_nalgebra_mixed(); |
|
244 let θ = θ_.to_nalgebra_mixed(); |
|
245 let mut tmp = std::iter::repeat((0.0, 0.0, 0.0, None)).take(x.len()).collect(); |
|
246 let mut iters = 0; |
|
247 |
|
248 iterator.iterate(|state| { |
|
249 // Primal step: x^{k+1} = prox_{(τβ/2)|.-y|_1^2+δ_{≥0}+}(x^k - τ(λ𝟙^⊤-g)) |
|
250 x.apply(|x_i| *x_i -= τ*λ); |
|
251 x.axpy(τ, g, 1.0); |
|
252 l1squared_nonneg_prox(&mut tmp, x, y, τ*β); |
|
253 |
|
254 iters += 1; |
|
255 // This gives O(1/N^2) rates due to monotonicity of function values. |
|
256 // Higher acceleration does not seem to be numerically stable. |
|
257 τ += θ; |
|
258 |
|
259 // This gives O(1/N^3) rates due to monotonicity of function values. |
|
260 // Higher acceleration does not seem to be numerically stable. |
|
261 //τ + = F::cast_from(iters).to_nalgebra_mixed()*θ; |
|
262 |
|
263 state.if_verbose(|| { |
|
264 F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)) |
|
265 }) |
|
266 }); |
|
267 |
|
268 iters |
|
269 } |
|
270 |
|
271 /// PDPS implementation of [`l1squared_nonneg`]. |
|
272 /// For detailed documentation of the inputs and outputs, refer to there. |
|
273 /// |
|
274 /// The `λ` component of the model is handled in the proximal step instead of the gradient step |
|
275 /// for potential performance improvements. |
|
276 /// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model. |
|
277 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] |
|
278 pub fn l1squared_nonneg_pdps<F, I>( |
|
279 y : &DVector<F::MixedType>, |
|
280 g : &DVector<F::MixedType>, |
|
281 λ_ : F, |
|
282 β_ : F, |
|
283 x : &mut DVector<F::MixedType>, |
|
284 τ_ : F, |
|
285 σ_ : F, |
|
286 θ_ : F, |
|
287 iterator : I |
|
288 ) -> usize |
|
289 where F : Float + ToNalgebraRealField, |
|
290 I : AlgIteratorFactory<F> |
|
291 { |
|
292 let λ = λ_.to_nalgebra_mixed(); |
|
293 let β = β_.to_nalgebra_mixed(); |
|
294 let τ = τ_.to_nalgebra_mixed(); |
|
295 let σ = σ_.to_nalgebra_mixed(); |
|
296 let θ = θ_.to_nalgebra_mixed(); |
|
297 let mut w = DVector::zeros(x.len()); |
|
298 let mut tmp = DVector::zeros(x.len()); |
|
299 let mut xprev = x.clone(); |
|
300 let mut iters = 0; |
|
301 |
|
302 iterator.iterate(|state| { |
|
303 // Primal step: x^{k+1} = prox_{(τβ/2)|.-y|_1^2}(x^k - τ (w^k - g)) |
|
304 x.axpy(-τ*θ, &w, 1.0); |
|
305 x.axpy(τ, g, 1.0); |
|
306 l1squared_prox(&mut tmp, x, y, τ*β); |
|
307 |
|
308 // Dual step: w^{k+1} = proj_{[-∞,λ]}(w^k + σ(2x^{k+1}-x^k)) |
|
309 w.axpy(2.0*σ*θ, x, 1.0); |
|
310 w.axpy(-σ*θ, &xprev, 1.0); |
|
311 w.apply(|w_i| *w_i = w_i.min(λ)); |
|
312 xprev.copy_from(x); |
|
313 |
|
314 iters +=1; |
|
315 |
|
316 state.if_verbose(|| { |
|
317 F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)) |
|
318 }) |
|
319 }); |
|
320 |
|
321 iters |
|
322 } |
|
323 |
|
324 /// Alternative PDPS implementation of [`l1squared_nonneg`]. |
|
325 /// For detailed documentation of the inputs and outputs, refer to there. |
|
326 /// |
|
327 /// The `λ` component of the model is handled in the proximal step instead of the gradient step |
|
328 /// for potential performance improvements. |
|
329 /// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model. |
|
330 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] |
|
331 pub fn l1squared_nonneg_pdps_alt<F, I>( |
|
332 y : &DVector<F::MixedType>, |
|
333 g : &DVector<F::MixedType>, |
|
334 λ_ : F, |
|
335 β_ : F, |
|
336 x : &mut DVector<F::MixedType>, |
|
337 τ_ : F, |
|
338 σ_ : F, |
|
339 θ_ : F, |
|
340 iterator : I |
|
341 ) -> usize |
|
342 where F : Float + ToNalgebraRealField, |
|
343 I : AlgIteratorFactory<F> |
|
344 { |
|
345 let λ = λ_.to_nalgebra_mixed(); |
|
346 let τ = τ_.to_nalgebra_mixed(); |
|
347 let σ = σ_.to_nalgebra_mixed(); |
|
348 let θ = θ_.to_nalgebra_mixed(); |
|
349 let β = β_.to_nalgebra_mixed(); |
|
350 let βdivθ = β / θ; |
|
351 let σθ = σ*θ; |
|
352 let τθ = τ*θ; |
|
353 let mut w = DVector::zeros(x.len()); |
|
354 let mut tmp = DVector::zeros(x.len()); |
|
355 let mut xprev = x.clone(); |
|
356 let mut iters = 0; |
|
357 |
|
358 iterator.iterate(|state| { |
|
359 // Primal step: x^{k+1} = nonnegsoft_τλ(x^k - τ(θ w^k -g)) |
|
360 x.axpy(-τθ, &w, 1.0); |
|
361 x.axpy(τ, g, 1.0); |
|
362 x.apply(|x_i| *x_i = nonneg_soft_thresholding(*x_i, τ*λ)); |
|
363 |
|
364 // Dual step: w^{k+1} = prox_{σ[(β/2)‖./θ-y‖₁²]^*}(q) for q = w^k + σθ(2x^{k+1}-x^k) |
|
365 // = q - σ prox_{(β/(2σ))‖./θ-y‖₁²}(q/σ) |
|
366 // = q - (σθ) prox_{(β/(2σθ^2))‖.-y‖₁²}(q/(σθ)) |
|
367 // = σθ(q/(σθ) - prox_{(β/(2σθ^2))‖.-y‖₁²}(q/(σθ)) |
|
368 w /= σθ; |
|
369 w.axpy(2.0, x, 1.0); |
|
370 w.axpy(-1.0, &xprev, 1.0); |
|
371 xprev.copy_from(&w); // use xprev as temporary variable |
|
372 l1squared_prox(&mut tmp, &mut xprev, y, βdivθ/σθ); |
|
373 w -= &xprev; |
|
374 w *= σθ; |
|
375 xprev.copy_from(x); |
|
376 |
|
377 iters +=1; |
|
378 |
|
379 state.if_verbose(|| { |
|
380 F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)) |
|
381 }) |
|
382 }); |
|
383 |
|
384 iters |
|
385 } |
|
386 |
|
387 |
|
388 /// This function applies an iterative method for the solution of the problem |
|
389 /// <div>$$ |
|
390 /// \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ + δ_{≥ 0}(x). |
|
391 /// $$</div> |
|
392 /// |
|
393 /// This function returns the number of iterations taken. |
|
394 #[replace_float_literals(F::cast_from(literal))] |
|
395 pub fn l1squared_nonneg<F, I>( |
|
396 y : &DVector<F::MixedType>, |
|
397 g : &DVector<F::MixedType>, |
|
398 λ : F, |
|
399 β : F, |
|
400 x : &mut DVector<F::MixedType>, |
|
401 inner : &InnerSettings<F>, |
|
402 iterator : I |
|
403 ) -> usize |
|
404 where F : Float + ToNalgebraRealField, |
|
405 I : AlgIteratorFactory<F> |
|
406 { |
|
407 match inner.method { |
|
408 InnerMethod::PDPS | InnerMethod::Default => { |
|
409 let inner_θ = 0.001; |
|
410 // Estimate of ‖K‖ for K=θ\Id. |
|
411 let normest = inner_θ; |
|
412 let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest); |
|
413 l1squared_nonneg_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator) |
|
414 }, |
|
415 InnerMethod::FB /*| InnerMethod::Default*/ => { |
|
416 // The Lipschitz factor of ∇[x ↦ g^⊤ x + λ∑x]=g - λ𝟙 is FB is just a proximal point |
|
417 // method with on constraints on τ. We “accelerate” it by adding to τ the constant θ |
|
418 // on each iteration. Exponential growth does not seem stable. |
|
419 let inner_τ = inner.fb_τ0; |
|
420 let inner_θ = inner_τ; |
|
421 l1squared_nonneg_pp(y, g, λ, β, x, inner_τ, inner_θ, iterator) |
|
422 }, |
|
423 _ => unimplemented!("{:?}", inner.method), |
|
424 } |
|
425 } |