| |
1 /*! |
| |
2 Iterative algorithms for solving the finite-dimensional subproblem with constraint. |
| |
3 */ |
| |
4 |
| |
5 use nalgebra::DVector; |
| |
6 use numeric_literals::replace_float_literals; |
| |
7 use itertools::izip; |
| |
8 //use std::iter::zip; |
| |
9 use std::cmp::Ordering::*; |
| |
10 |
| |
11 use alg_tools::iterate::{ |
| |
12 AlgIteratorFactory, |
| |
13 AlgIteratorState, |
| |
14 }; |
| |
15 use alg_tools::nalgebra_support::ToNalgebraRealField; |
| |
16 use alg_tools::norms::{Dist, L1}; |
| |
17 use alg_tools::nanleast::NaNLeast; |
| |
18 |
| |
19 use crate::types::*; |
| |
20 use super::{ |
| |
21 InnerMethod, |
| |
22 InnerSettings |
| |
23 }; |
| |
24 use super::nonneg::nonneg_soft_thresholding; |
| |
25 use super::l1squared_unconstrained::l1squared_prox; |
| |
26 |
| |
27 /// Return maximum of `dist` and distnce of inteval `[lb, ub]` to zero. |
| |
28 #[replace_float_literals(F::cast_from(literal))] |
| |
29 pub(super) fn max_interval_dist_to_zero<F : Float>(dist : F, lb : F, ub : F) -> F { |
| |
30 if lb < 0.0 { |
| |
31 if ub > 0.0 { |
| |
32 dist |
| |
33 } else { |
| |
34 dist.max(-ub) |
| |
35 } |
| |
36 } else /* ub ≥ 0.0*/ { |
| |
37 dist.max(lb) |
| |
38 } |
| |
39 } |
| |
40 |
| |
41 /// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁ +δ_{≥}(x)$ at $x$. |
| |
42 /// |
| |
43 /// `v` will be modified and cannot be trusted to contain useful values afterwards. |
| |
44 #[replace_float_literals(F::cast_from(literal))] |
| |
45 fn min_subdifferential<F : Float + nalgebra::RealField>( |
| |
46 y : &DVector<F>, |
| |
47 x : &DVector<F>, |
| |
48 g : &DVector<F>, |
| |
49 λ : F, |
| |
50 β : F |
| |
51 ) -> F { |
| |
52 let mut val = 0.0; |
| |
53 let tmp = β*y.dist(x, L1); |
| |
54 for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) { |
| |
55 let (mut lb, mut ub) = (-g_i, -g_i); |
| |
56 match x_i.partial_cmp(y_i) { |
| |
57 Some(Greater) => { lb += tmp; ub += tmp }, |
| |
58 Some(Less) => { lb -= tmp; ub -= tmp }, |
| |
59 Some(Equal) => { lb -= tmp; ub += tmp }, |
| |
60 None => {}, |
| |
61 } |
| |
62 match x_i.partial_cmp(&0.0) { |
| |
63 Some(Greater) => { lb += λ; ub += λ }, |
| |
64 // Less should not happen |
| |
65 Some(Less|Equal) => { lb = F::NEG_INFINITY; ub += λ }, |
| |
66 None => {}, |
| |
67 }; |
| |
68 val = max_interval_dist_to_zero(val, lb, ub); |
| |
69 } |
| |
70 val |
| |
71 } |
| |
72 |
| |
73 #[replace_float_literals(F::cast_from(literal))] |
| |
74 fn lbd_soft_thresholding<F : Float>(v : F, λ : F, b : F) -> F |
| |
75 { |
| |
76 match (b >= 0.0, v >= b) { |
| |
77 (true, false) => b, |
| |
78 (true, true) => b.max(v - λ), // soft-to-b from above |
| |
79 (false, true) => super::unconstrained::soft_thresholding(v, λ), |
| |
80 (false, false) => 0.0.min(b.max(v - λ)), // soft-to-0 with lower bound |
| |
81 } |
| |
82 } |
| |
83 |
| |
84 /// Calculate $prox_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}_1^2 + δ_{≥0}(x)$. |
| |
85 /// |
| |
86 /// To derive an algorithm for this, we can use |
| |
87 /// $prox_f(x) = prox_{f_0}(x - y) - y$ for |
| |
88 /// $f_0(z)=\frac{β}{2}\norm{z}_1^2 + δ_{≥-y}(z)$. |
| |
89 /// Now, the optimality conditions for $w = prox_{f_0}(x)$ are |
| |
90 /// $$\tag{*} |
| |
91 /// x ∈ w + β\norm{w}_1\sign w + N_{≥ -y}(w). |
| |
92 /// $$ |
| |
93 /// If we know $\norm{w}_1$, then this is easily solved by lower-bounded soft-thresholding. |
| |
94 /// We find this by sorting the elements by the distance to the 'locked' lower-bounded |
| |
95 /// soft-thresholding target ($0$ or $-y_i$). |
| |
96 /// Then we loop over this sorted vector, increasing our estimate of $\norm{w}_1$ as we decide |
| |
97 /// that the soft-thresholding parameter `β\norm{w}_1` has to be such that the passed elements |
| |
98 /// will reach their locked value (after which they cannot change anymore, for a larger |
| |
99 /// soft-thresholding parameter. This has to be slightly more fine-grained for account |
| |
100 /// for the case that $-y_i<0$ and $x_i < -y_i$. |
| |
101 /// |
| |
102 /// Indeed, we denote by x' and w' the subset of elements such that w_i ≠ 0 and w_i > -y_i, |
| |
103 /// we can calculate by applying $⟨\cdot, \sign w'⟩$ to the corresponding lines of (*) that |
| |
104 /// $$ |
| |
105 /// \norm{x'} = \norm{w'} + β \norm{w}_1 m. |
| |
106 /// $$ |
| |
107 /// Having a value for $t = \norm{w}-\norm{w'}$, we can then calculate |
| |
108 /// $$ |
| |
109 /// \norm{x'} - t = (1+β m)\norm{w}_1, |
| |
110 /// $$ |
| |
111 /// from where we can calculate the soft-thresholding parameter $λ=β\norm{w}_1$. |
| |
112 /// Since we do not actually know the unlocked elements, but just loop over all the possibilities |
| |
113 /// for them, we have to check that $λ$ is above the current lower bound for this parameter |
| |
114 /// (`shift` in the code), and below a value that would cause changes in the locked set |
| |
115 /// (`max_shift` in the code). |
| |
116 #[replace_float_literals(F::cast_from(literal))] |
| |
117 pub(super) fn l1squared_nonneg_prox<F :Float + nalgebra::RealField>( |
| |
118 sorted : &mut Vec<(F, F, F, Option<(F, F)>)>, |
| |
119 x : &mut DVector<F>, |
| |
120 y : &DVector<F>, |
| |
121 β : F |
| |
122 ) { |
| |
123 // nalgebra double-definition bullshit workaround |
| |
124 //let max = alg_tools::NumTraitsFloat::max; |
| |
125 let abs = alg_tools::NumTraitsFloat::abs; |
| |
126 |
| |
127 *x -= y; |
| |
128 |
| |
129 for (az_x_i, &x_i, &y_i) in izip!(sorted.iter_mut(), x.iter(), y.iter()) { |
| |
130 // The first component of each az_x_i contains the distance of x_i to the |
| |
131 // soft-thresholding limit. If it is negative, it is always reached. |
| |
132 // The second component contains the absolute value of the result for that component |
| |
133 // w_i of the solution, if the soft-thresholding limit is reached. |
| |
134 // This is stored here due to the sorting, although could otherwise be computed directly. |
| |
135 // Likewise the third component contains the absolute value of x_i. |
| |
136 // The fourth component contains an initial lower bound. |
| |
137 let a_i = abs(x_i); |
| |
138 let b = -y_i; |
| |
139 *az_x_i = match (b >= 0.0, x_i >= b) { |
| |
140 (true, false) => (x_i-b, b, a_i, None), // w_i=b, so sorting element negative! |
| |
141 (true, true) => (x_i-b, b, a_i, None), // soft-to-b from above |
| |
142 (false, true) => (a_i, 0.0, a_i, None), // soft-to-0 |
| |
143 (false, false) => (a_i, 0.0, a_i, Some((b, b-x_i))), // soft-to-0 with initial limit |
| |
144 }; |
| |
145 } |
| |
146 sorted.as_mut_slice() |
| |
147 .sort_unstable_by(|(a, _, _, _), (b, _, _, _)| NaNLeast(*a).cmp(&NaNLeast(*b))); |
| |
148 |
| |
149 let mut nwlow = 0.0; |
| |
150 let mut shift = 0.0; |
| |
151 // This main loop is over different combinations of elements of the solution locked |
| |
152 // to the soft-thresholding lower bound (`0` or `-y_i`), in the sorted order of locking. |
| |
153 for (i, az_x_i) in izip!(0.., sorted.iter()) { |
| |
154 // This 'attempt is over different combinations of elements locked to the |
| |
155 // lower bound (`-y_i ≤ 0`). It calculates `max_shift` as the maximum shift that |
| |
156 // can be done until the locking would change (or become non-strictly-complementary). |
| |
157 // If the main rule (*) gives an estimate of `λ` that stays below `max_shift`, it is |
| |
158 // accepted. Otherwise `shift` is updated to `max_shift`, and we attempt again, |
| |
159 // with the non-locking set that participates in the calculation of `λ` then including |
| |
160 // the elements that are no longer locked to the lower bound. |
| |
161 'attempt: loop { |
| |
162 let mut nwthis = 0.0; // contribution to ‖w‖ from elements with locking |
| |
163 //soft-thresholding parameter = `shift` |
| |
164 let mut nxmore = 0.0; // ‖x'‖ for those elements through to not be locked to |
| |
165 // neither the soft-thresholding limit, nor the lower bound |
| |
166 let mut nwlbd = 0.0; // contribution to ‖w‖ from those elements locked to their |
| |
167 // lower bound |
| |
168 let mut m = 0; |
| |
169 let mut max_shift = F::INFINITY; // maximal shift for which our estimate of the set of |
| |
170 // unlocked elements is valid. |
| |
171 let mut max_shift_from_lbd = false; // Whether max_shift comes from the next element |
| |
172 // or from a lower bound being reached. |
| |
173 for az_x_j in sorted[i as usize..].iter() { |
| |
174 if az_x_j.0 <= shift { |
| |
175 nwthis += az_x_j.1; |
| |
176 } else { |
| |
177 match az_x_j.3 { |
| |
178 Some((l, s)) if shift < s => { |
| |
179 if max_shift > s { |
| |
180 max_shift_from_lbd = true; |
| |
181 max_shift = s; |
| |
182 } |
| |
183 nwlbd += -l; |
| |
184 }, |
| |
185 _ => { |
| |
186 nxmore += az_x_j.2; |
| |
187 if m == 0 && max_shift > az_x_j.0 { |
| |
188 max_shift = az_x_j.0; |
| |
189 max_shift_from_lbd = false; |
| |
190 } |
| |
191 m += 1; |
| |
192 } |
| |
193 } |
| |
194 } |
| |
195 } |
| |
196 |
| |
197 // We need ‖x'‖ = ‖w'‖ + β m ‖w‖, i.e. ‖x'‖ - (‖w‖-‖w'‖)= (1 + β m)‖w‖. |
| |
198 let tmp = β*(nxmore - (nwlow + nwthis + nwlbd))/(1.0 + β*F::cast_from(m)); |
| |
199 if tmp > max_shift { |
| |
200 if max_shift_from_lbd { |
| |
201 shift = max_shift; |
| |
202 continue 'attempt; |
| |
203 } else { |
| |
204 break 'attempt |
| |
205 } |
| |
206 } else if tmp < shift { |
| |
207 // TODO: this should probably be an assert!(false) |
| |
208 break 'attempt; |
| |
209 } else { |
| |
210 // success |
| |
211 x.zip_apply(y, |x_i, y_i| *x_i = y_i + lbd_soft_thresholding(*x_i, tmp, -y_i)); |
| |
212 return |
| |
213 } |
| |
214 } |
| |
215 shift = az_x_i.0; |
| |
216 nwlow += az_x_i.1; |
| |
217 } |
| |
218 // TODO: this fallback has not been verified to be correct |
| |
219 x.zip_apply(y, |x_i, y_i| *x_i = y_i + lbd_soft_thresholding(*x_i, shift, -y_i)); |
| |
220 } |
| |
221 |
| |
222 /// Proximal point method implementation of [`l1squared_nonneg`]. |
| |
223 /// For detailed documentation of the inputs and outputs, refer to there. |
| |
224 /// |
| |
225 /// The `λ` component of the model is handled in the proximal step instead of the gradient step |
| |
226 /// for potential performance improvements. |
| |
227 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] |
| |
228 pub fn l1squared_nonneg_pp<F, I>( |
| |
229 y : &DVector<F::MixedType>, |
| |
230 g : &DVector<F::MixedType>, |
| |
231 λ_ : F, |
| |
232 β_ : F, |
| |
233 x : &mut DVector<F::MixedType>, |
| |
234 τ_ : F, |
| |
235 θ_ : F, |
| |
236 iterator : I |
| |
237 ) -> usize |
| |
238 where F : Float + ToNalgebraRealField, |
| |
239 I : AlgIteratorFactory<F> |
| |
240 { |
| |
241 let λ = λ_.to_nalgebra_mixed(); |
| |
242 let β = β_.to_nalgebra_mixed(); |
| |
243 let mut τ = τ_.to_nalgebra_mixed(); |
| |
244 let θ = θ_.to_nalgebra_mixed(); |
| |
245 let mut tmp = std::iter::repeat((0.0, 0.0, 0.0, None)).take(x.len()).collect(); |
| |
246 let mut iters = 0; |
| |
247 |
| |
248 iterator.iterate(|state| { |
| |
249 // Primal step: x^{k+1} = prox_{(τβ/2)|.-y|_1^2+δ_{≥0}+}(x^k - τ(λ𝟙^⊤-g)) |
| |
250 x.apply(|x_i| *x_i -= τ*λ); |
| |
251 x.axpy(τ, g, 1.0); |
| |
252 l1squared_nonneg_prox(&mut tmp, x, y, τ*β); |
| |
253 |
| |
254 iters += 1; |
| |
255 // This gives O(1/N^2) rates due to monotonicity of function values. |
| |
256 // Higher acceleration does not seem to be numerically stable. |
| |
257 τ += θ; |
| |
258 |
| |
259 // This gives O(1/N^3) rates due to monotonicity of function values. |
| |
260 // Higher acceleration does not seem to be numerically stable. |
| |
261 //τ + = F::cast_from(iters).to_nalgebra_mixed()*θ; |
| |
262 |
| |
263 state.if_verbose(|| { |
| |
264 F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)) |
| |
265 }) |
| |
266 }); |
| |
267 |
| |
268 iters |
| |
269 } |
| |
270 |
| |
271 /// PDPS implementation of [`l1squared_nonneg`]. |
| |
272 /// For detailed documentation of the inputs and outputs, refer to there. |
| |
273 /// |
| |
274 /// The `λ` component of the model is handled in the proximal step instead of the gradient step |
| |
275 /// for potential performance improvements. |
| |
276 /// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model. |
| |
277 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] |
| |
278 pub fn l1squared_nonneg_pdps<F, I>( |
| |
279 y : &DVector<F::MixedType>, |
| |
280 g : &DVector<F::MixedType>, |
| |
281 λ_ : F, |
| |
282 β_ : F, |
| |
283 x : &mut DVector<F::MixedType>, |
| |
284 τ_ : F, |
| |
285 σ_ : F, |
| |
286 θ_ : F, |
| |
287 iterator : I |
| |
288 ) -> usize |
| |
289 where F : Float + ToNalgebraRealField, |
| |
290 I : AlgIteratorFactory<F> |
| |
291 { |
| |
292 let λ = λ_.to_nalgebra_mixed(); |
| |
293 let β = β_.to_nalgebra_mixed(); |
| |
294 let τ = τ_.to_nalgebra_mixed(); |
| |
295 let σ = σ_.to_nalgebra_mixed(); |
| |
296 let θ = θ_.to_nalgebra_mixed(); |
| |
297 let mut w = DVector::zeros(x.len()); |
| |
298 let mut tmp = DVector::zeros(x.len()); |
| |
299 let mut xprev = x.clone(); |
| |
300 let mut iters = 0; |
| |
301 |
| |
302 iterator.iterate(|state| { |
| |
303 // Primal step: x^{k+1} = prox_{(τβ/2)|.-y|_1^2}(x^k - τ (w^k - g)) |
| |
304 x.axpy(-τ*θ, &w, 1.0); |
| |
305 x.axpy(τ, g, 1.0); |
| |
306 l1squared_prox(&mut tmp, x, y, τ*β); |
| |
307 |
| |
308 // Dual step: w^{k+1} = proj_{[-∞,λ]}(w^k + σ(2x^{k+1}-x^k)) |
| |
309 w.axpy(2.0*σ*θ, x, 1.0); |
| |
310 w.axpy(-σ*θ, &xprev, 1.0); |
| |
311 w.apply(|w_i| *w_i = w_i.min(λ)); |
| |
312 xprev.copy_from(x); |
| |
313 |
| |
314 iters +=1; |
| |
315 |
| |
316 state.if_verbose(|| { |
| |
317 F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)) |
| |
318 }) |
| |
319 }); |
| |
320 |
| |
321 iters |
| |
322 } |
| |
323 |
| |
324 /// Alternative PDPS implementation of [`l1squared_nonneg`]. |
| |
325 /// For detailed documentation of the inputs and outputs, refer to there. |
| |
326 /// |
| |
327 /// The `λ` component of the model is handled in the proximal step instead of the gradient step |
| |
328 /// for potential performance improvements. |
| |
329 /// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model. |
| |
330 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] |
| |
331 pub fn l1squared_nonneg_pdps_alt<F, I>( |
| |
332 y : &DVector<F::MixedType>, |
| |
333 g : &DVector<F::MixedType>, |
| |
334 λ_ : F, |
| |
335 β_ : F, |
| |
336 x : &mut DVector<F::MixedType>, |
| |
337 τ_ : F, |
| |
338 σ_ : F, |
| |
339 θ_ : F, |
| |
340 iterator : I |
| |
341 ) -> usize |
| |
342 where F : Float + ToNalgebraRealField, |
| |
343 I : AlgIteratorFactory<F> |
| |
344 { |
| |
345 let λ = λ_.to_nalgebra_mixed(); |
| |
346 let τ = τ_.to_nalgebra_mixed(); |
| |
347 let σ = σ_.to_nalgebra_mixed(); |
| |
348 let θ = θ_.to_nalgebra_mixed(); |
| |
349 let β = β_.to_nalgebra_mixed(); |
| |
350 let βdivθ = β / θ; |
| |
351 let σθ = σ*θ; |
| |
352 let τθ = τ*θ; |
| |
353 let mut w = DVector::zeros(x.len()); |
| |
354 let mut tmp = DVector::zeros(x.len()); |
| |
355 let mut xprev = x.clone(); |
| |
356 let mut iters = 0; |
| |
357 |
| |
358 iterator.iterate(|state| { |
| |
359 // Primal step: x^{k+1} = nonnegsoft_τλ(x^k - τ(θ w^k -g)) |
| |
360 x.axpy(-τθ, &w, 1.0); |
| |
361 x.axpy(τ, g, 1.0); |
| |
362 x.apply(|x_i| *x_i = nonneg_soft_thresholding(*x_i, τ*λ)); |
| |
363 |
| |
364 // Dual step: w^{k+1} = prox_{σ[(β/2)‖./θ-y‖₁²]^*}(q) for q = w^k + σθ(2x^{k+1}-x^k) |
| |
365 // = q - σ prox_{(β/(2σ))‖./θ-y‖₁²}(q/σ) |
| |
366 // = q - (σθ) prox_{(β/(2σθ^2))‖.-y‖₁²}(q/(σθ)) |
| |
367 // = σθ(q/(σθ) - prox_{(β/(2σθ^2))‖.-y‖₁²}(q/(σθ)) |
| |
368 w /= σθ; |
| |
369 w.axpy(2.0, x, 1.0); |
| |
370 w.axpy(-1.0, &xprev, 1.0); |
| |
371 xprev.copy_from(&w); // use xprev as temporary variable |
| |
372 l1squared_prox(&mut tmp, &mut xprev, y, βdivθ/σθ); |
| |
373 w -= &xprev; |
| |
374 w *= σθ; |
| |
375 xprev.copy_from(x); |
| |
376 |
| |
377 iters +=1; |
| |
378 |
| |
379 state.if_verbose(|| { |
| |
380 F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)) |
| |
381 }) |
| |
382 }); |
| |
383 |
| |
384 iters |
| |
385 } |
| |
386 |
| |
387 |
| |
388 /// This function applies an iterative method for the solution of the problem |
| |
389 /// <div>$$ |
| |
390 /// \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ + δ_{≥ 0}(x). |
| |
391 /// $$</div> |
| |
392 /// |
| |
393 /// This function returns the number of iterations taken. |
| |
394 #[replace_float_literals(F::cast_from(literal))] |
| |
395 pub fn l1squared_nonneg<F, I>( |
| |
396 y : &DVector<F::MixedType>, |
| |
397 g : &DVector<F::MixedType>, |
| |
398 λ : F, |
| |
399 β : F, |
| |
400 x : &mut DVector<F::MixedType>, |
| |
401 inner : &InnerSettings<F>, |
| |
402 iterator : I |
| |
403 ) -> usize |
| |
404 where F : Float + ToNalgebraRealField, |
| |
405 I : AlgIteratorFactory<F> |
| |
406 { |
| |
407 match inner.method { |
| |
408 InnerMethod::PDPS | InnerMethod::Default => { |
| |
409 let inner_θ = 0.001; |
| |
410 // Estimate of ‖K‖ for K=θ\Id. |
| |
411 let normest = inner_θ; |
| |
412 let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest); |
| |
413 l1squared_nonneg_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator) |
| |
414 }, |
| |
415 InnerMethod::FB /*| InnerMethod::Default*/ => { |
| |
416 // The Lipschitz factor of ∇[x ↦ g^⊤ x + λ∑x]=g - λ𝟙 is FB is just a proximal point |
| |
417 // method with on constraints on τ. We “accelerate” it by adding to τ the constant θ |
| |
418 // on each iteration. Exponential growth does not seem stable. |
| |
419 let inner_τ = inner.fb_τ0; |
| |
420 let inner_θ = inner_τ; |
| |
421 l1squared_nonneg_pp(y, g, λ, β, x, inner_τ, inner_θ, iterator) |
| |
422 }, |
| |
423 _ => unimplemented!("{:?}", inner.method), |
| |
424 } |
| |
425 } |