src/subproblem/l1squared_nonneg.rs

branch
dev
changeset 34
efa60bc4f743
child 39
6316d68b58af
equal deleted inserted replaced
33:aec67cdd6b14 34:efa60bc4f743
1 /*!
2 Iterative algorithms for solving the finite-dimensional subproblem with constraint.
3 */
4
5 use nalgebra::DVector;
6 use numeric_literals::replace_float_literals;
7 use itertools::izip;
8 //use std::iter::zip;
9 use std::cmp::Ordering::*;
10
11 use alg_tools::iterate::{
12 AlgIteratorFactory,
13 AlgIteratorState,
14 };
15 use alg_tools::nalgebra_support::ToNalgebraRealField;
16 use alg_tools::norms::{Dist, L1};
17 use alg_tools::nanleast::NaNLeast;
18
19 use crate::types::*;
20 use super::{
21 InnerMethod,
22 InnerSettings
23 };
24 use super::nonneg::nonneg_soft_thresholding;
25 use super::l1squared_unconstrained::l1squared_prox;
26
27 /// Return maximum of `dist` and distnce of inteval `[lb, ub]` to zero.
28 #[replace_float_literals(F::cast_from(literal))]
29 pub(super) fn max_interval_dist_to_zero<F : Float>(dist : F, lb : F, ub : F) -> F {
30 if lb < 0.0 {
31 if ub > 0.0 {
32 dist
33 } else {
34 dist.max(-ub)
35 }
36 } else /* ub ≥ 0.0*/ {
37 dist.max(lb)
38 }
39 }
40
41 /// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁ +δ_{≥}(x)$ at $x$.
42 ///
43 /// `v` will be modified and cannot be trusted to contain useful values afterwards.
44 #[replace_float_literals(F::cast_from(literal))]
45 fn min_subdifferential<F : Float + nalgebra::RealField>(
46 y : &DVector<F>,
47 x : &DVector<F>,
48 g : &DVector<F>,
49 λ : F,
50 β : F
51 ) -> F {
52 let mut val = 0.0;
53 let tmp = β*y.dist(x, L1);
54 for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) {
55 let (mut lb, mut ub) = (-g_i, -g_i);
56 match x_i.partial_cmp(y_i) {
57 Some(Greater) => { lb += tmp; ub += tmp },
58 Some(Less) => { lb -= tmp; ub -= tmp },
59 Some(Equal) => { lb -= tmp; ub += tmp },
60 None => {},
61 }
62 match x_i.partial_cmp(&0.0) {
63 Some(Greater) => { lb += λ; ub += λ },
64 // Less should not happen
65 Some(Less|Equal) => { lb = F::NEG_INFINITY; ub += λ },
66 None => {},
67 };
68 val = max_interval_dist_to_zero(val, lb, ub);
69 }
70 val
71 }
72
73 #[replace_float_literals(F::cast_from(literal))]
74 fn lbd_soft_thresholding<F : Float>(v : F, λ : F, b : F) -> F
75 {
76 match (b >= 0.0, v >= b) {
77 (true, false) => b,
78 (true, true) => b.max(v - λ), // soft-to-b from above
79 (false, true) => super::unconstrained::soft_thresholding(v, λ),
80 (false, false) => 0.0.min(b.max(v - λ)), // soft-to-0 with lower bound
81 }
82 }
83
84 /// Calculate $prox_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}_1^2 + δ_{≥0}(x)$.
85 ///
86 /// To derive an algorithm for this, we can use
87 /// $prox_f(x) = prox_{f_0}(x - y) - y$ for
88 /// $f_0(z)=\frac{β}{2}\norm{z}_1^2 + δ_{≥-y}(z)$.
89 /// Now, the optimality conditions for $w = prox_{f_0}(x)$ are
90 /// $$\tag{*}
91 /// x ∈ w + β\norm{w}_1\sign w + N_{≥ -y}(w).
92 /// $$
93 /// If we know $\norm{w}_1$, then this is easily solved by lower-bounded soft-thresholding.
94 /// We find this by sorting the elements by the distance to the 'locked' lower-bounded
95 /// soft-thresholding target ($0$ or $-y_i$).
96 /// Then we loop over this sorted vector, increasing our estimate of $\norm{w}_1$ as we decide
97 /// that the soft-thresholding parameter `β\norm{w}_1` has to be such that the passed elements
98 /// will reach their locked value (after which they cannot change anymore, for a larger
99 /// soft-thresholding parameter. This has to be slightly more fine-grained for account
100 /// for the case that $-y_i<0$ and $x_i < -y_i$.
101 ///
102 /// Indeed, we denote by x' and w' the subset of elements such that w_i ≠ 0 and w_i > -y_i,
103 /// we can calculate by applying $⟨\cdot, \sign w'⟩$ to the corresponding lines of (*) that
104 /// $$
105 /// \norm{x'} = \norm{w'} + β \norm{w}_1 m.
106 /// $$
107 /// Having a value for $t = \norm{w}-\norm{w'}$, we can then calculate
108 /// $$
109 /// \norm{x'} - t = (1+β m)\norm{w}_1,
110 /// $$
111 /// from where we can calculate the soft-thresholding parameter $λ=β\norm{w}_1$.
112 /// Since we do not actually know the unlocked elements, but just loop over all the possibilities
113 /// for them, we have to check that $λ$ is above the current lower bound for this parameter
114 /// (`shift` in the code), and below a value that would cause changes in the locked set
115 /// (`max_shift` in the code).
116 #[replace_float_literals(F::cast_from(literal))]
117 pub(super) fn l1squared_nonneg_prox<F :Float + nalgebra::RealField>(
118 sorted : &mut Vec<(F, F, F, Option<(F, F)>)>,
119 x : &mut DVector<F>,
120 y : &DVector<F>,
121 β : F
122 ) {
123 // nalgebra double-definition bullshit workaround
124 //let max = alg_tools::NumTraitsFloat::max;
125 let abs = alg_tools::NumTraitsFloat::abs;
126
127 *x -= y;
128
129 for (az_x_i, &x_i, &y_i) in izip!(sorted.iter_mut(), x.iter(), y.iter()) {
130 // The first component of each az_x_i contains the distance of x_i to the
131 // soft-thresholding limit. If it is negative, it is always reached.
132 // The second component contains the absolute value of the result for that component
133 // w_i of the solution, if the soft-thresholding limit is reached.
134 // This is stored here due to the sorting, although could otherwise be computed directly.
135 // Likewise the third component contains the absolute value of x_i.
136 // The fourth component contains an initial lower bound.
137 let a_i = abs(x_i);
138 let b = -y_i;
139 *az_x_i = match (b >= 0.0, x_i >= b) {
140 (true, false) => (x_i-b, b, a_i, None), // w_i=b, so sorting element negative!
141 (true, true) => (x_i-b, b, a_i, None), // soft-to-b from above
142 (false, true) => (a_i, 0.0, a_i, None), // soft-to-0
143 (false, false) => (a_i, 0.0, a_i, Some((b, b-x_i))), // soft-to-0 with initial limit
144 };
145 }
146 sorted.as_mut_slice()
147 .sort_unstable_by(|(a, _, _, _), (b, _, _, _)| NaNLeast(*a).cmp(&NaNLeast(*b)));
148
149 let mut nwlow = 0.0;
150 let mut shift = 0.0;
151 // This main loop is over different combinations of elements of the solution locked
152 // to the soft-thresholding lower bound (`0` or `-y_i`), in the sorted order of locking.
153 for (i, az_x_i) in izip!(0.., sorted.iter()) {
154 // This 'attempt is over different combinations of elements locked to the
155 // lower bound (`-y_i ≤ 0`). It calculates `max_shift` as the maximum shift that
156 // can be done until the locking would change (or become non-strictly-complementary).
157 // If the main rule (*) gives an estimate of `λ` that stays below `max_shift`, it is
158 // accepted. Otherwise `shift` is updated to `max_shift`, and we attempt again,
159 // with the non-locking set that participates in the calculation of `λ` then including
160 // the elements that are no longer locked to the lower bound.
161 'attempt: loop {
162 let mut nwthis = 0.0; // contribution to ‖w‖ from elements with locking
163 //soft-thresholding parameter = `shift`
164 let mut nxmore = 0.0; // ‖x'‖ for those elements through to not be locked to
165 // neither the soft-thresholding limit, nor the lower bound
166 let mut nwlbd = 0.0; // contribution to ‖w‖ from those elements locked to their
167 // lower bound
168 let mut m = 0;
169 let mut max_shift = F::INFINITY; // maximal shift for which our estimate of the set of
170 // unlocked elements is valid.
171 let mut max_shift_from_lbd = false; // Whether max_shift comes from the next element
172 // or from a lower bound being reached.
173 for az_x_j in sorted[i as usize..].iter() {
174 if az_x_j.0 <= shift {
175 nwthis += az_x_j.1;
176 } else {
177 match az_x_j.3 {
178 Some((l, s)) if shift < s => {
179 if max_shift > s {
180 max_shift_from_lbd = true;
181 max_shift = s;
182 }
183 nwlbd += -l;
184 },
185 _ => {
186 nxmore += az_x_j.2;
187 if m == 0 && max_shift > az_x_j.0 {
188 max_shift = az_x_j.0;
189 max_shift_from_lbd = false;
190 }
191 m += 1;
192 }
193 }
194 }
195 }
196
197 // We need ‖x'‖ = ‖w'‖ + β m ‖w‖, i.e. ‖x'‖ - (‖w‖-‖w'‖)= (1 + β m)‖w‖.
198 let tmp = β*(nxmore - (nwlow + nwthis + nwlbd))/(1.0 + β*F::cast_from(m));
199 if tmp > max_shift {
200 if max_shift_from_lbd {
201 shift = max_shift;
202 continue 'attempt;
203 } else {
204 break 'attempt
205 }
206 } else if tmp < shift {
207 // TODO: this should probably be an assert!(false)
208 break 'attempt;
209 } else {
210 // success
211 x.zip_apply(y, |x_i, y_i| *x_i = y_i + lbd_soft_thresholding(*x_i, tmp, -y_i));
212 return
213 }
214 }
215 shift = az_x_i.0;
216 nwlow += az_x_i.1;
217 }
218 // TODO: this fallback has not been verified to be correct
219 x.zip_apply(y, |x_i, y_i| *x_i = y_i + lbd_soft_thresholding(*x_i, shift, -y_i));
220 }
221
222 /// Proximal point method implementation of [`l1squared_nonneg`].
223 /// For detailed documentation of the inputs and outputs, refer to there.
224 ///
225 /// The `λ` component of the model is handled in the proximal step instead of the gradient step
226 /// for potential performance improvements.
227 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
228 pub fn l1squared_nonneg_pp<F, I>(
229 y : &DVector<F::MixedType>,
230 g : &DVector<F::MixedType>,
231 λ_ : F,
232 β_ : F,
233 x : &mut DVector<F::MixedType>,
234 τ_ : F,
235 θ_ : F,
236 iterator : I
237 ) -> usize
238 where F : Float + ToNalgebraRealField,
239 I : AlgIteratorFactory<F>
240 {
241 let λ = λ_.to_nalgebra_mixed();
242 let β = β_.to_nalgebra_mixed();
243 let mut τ = τ_.to_nalgebra_mixed();
244 let θ = θ_.to_nalgebra_mixed();
245 let mut tmp = std::iter::repeat((0.0, 0.0, 0.0, None)).take(x.len()).collect();
246 let mut iters = 0;
247
248 iterator.iterate(|state| {
249 // Primal step: x^{k+1} = prox_{(τβ/2)|.-y|_1^2+δ_{≥0}+}(x^k - τ(λ𝟙^⊤-g))
250 x.apply(|x_i| *x_i -= τ*λ);
251 x.axpy(τ, g, 1.0);
252 l1squared_nonneg_prox(&mut tmp, x, y, τ*β);
253
254 iters += 1;
255 // This gives O(1/N^2) rates due to monotonicity of function values.
256 // Higher acceleration does not seem to be numerically stable.
257 τ += θ;
258
259 // This gives O(1/N^3) rates due to monotonicity of function values.
260 // Higher acceleration does not seem to be numerically stable.
261 //τ + = F::cast_from(iters).to_nalgebra_mixed()*θ;
262
263 state.if_verbose(|| {
264 F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))
265 })
266 });
267
268 iters
269 }
270
271 /// PDPS implementation of [`l1squared_nonneg`].
272 /// For detailed documentation of the inputs and outputs, refer to there.
273 ///
274 /// The `λ` component of the model is handled in the proximal step instead of the gradient step
275 /// for potential performance improvements.
276 /// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model.
277 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
278 pub fn l1squared_nonneg_pdps<F, I>(
279 y : &DVector<F::MixedType>,
280 g : &DVector<F::MixedType>,
281 λ_ : F,
282 β_ : F,
283 x : &mut DVector<F::MixedType>,
284 τ_ : F,
285 σ_ : F,
286 θ_ : F,
287 iterator : I
288 ) -> usize
289 where F : Float + ToNalgebraRealField,
290 I : AlgIteratorFactory<F>
291 {
292 let λ = λ_.to_nalgebra_mixed();
293 let β = β_.to_nalgebra_mixed();
294 let τ = τ_.to_nalgebra_mixed();
295 let σ = σ_.to_nalgebra_mixed();
296 let θ = θ_.to_nalgebra_mixed();
297 let mut w = DVector::zeros(x.len());
298 let mut tmp = DVector::zeros(x.len());
299 let mut xprev = x.clone();
300 let mut iters = 0;
301
302 iterator.iterate(|state| {
303 // Primal step: x^{k+1} = prox_{(τβ/2)|.-y|_1^2}(x^k - τ (w^k - g))
304 x.axpy(-τ*θ, &w, 1.0);
305 x.axpy(τ, g, 1.0);
306 l1squared_prox(&mut tmp, x, y, τ*β);
307
308 // Dual step: w^{k+1} = proj_{[-∞,λ]}(w^k + σ(2x^{k+1}-x^k))
309 w.axpy(2.0*σ*θ, x, 1.0);
310 w.axpy(-σ*θ, &xprev, 1.0);
311 w.apply(|w_i| *w_i = w_i.min(λ));
312 xprev.copy_from(x);
313
314 iters +=1;
315
316 state.if_verbose(|| {
317 F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))
318 })
319 });
320
321 iters
322 }
323
324 /// Alternative PDPS implementation of [`l1squared_nonneg`].
325 /// For detailed documentation of the inputs and outputs, refer to there.
326 ///
327 /// The `λ` component of the model is handled in the proximal step instead of the gradient step
328 /// for potential performance improvements.
329 /// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model.
330 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
331 pub fn l1squared_nonneg_pdps_alt<F, I>(
332 y : &DVector<F::MixedType>,
333 g : &DVector<F::MixedType>,
334 λ_ : F,
335 β_ : F,
336 x : &mut DVector<F::MixedType>,
337 τ_ : F,
338 σ_ : F,
339 θ_ : F,
340 iterator : I
341 ) -> usize
342 where F : Float + ToNalgebraRealField,
343 I : AlgIteratorFactory<F>
344 {
345 let λ = λ_.to_nalgebra_mixed();
346 let τ = τ_.to_nalgebra_mixed();
347 let σ = σ_.to_nalgebra_mixed();
348 let θ = θ_.to_nalgebra_mixed();
349 let β = β_.to_nalgebra_mixed();
350 let βdivθ = β / θ;
351 let σθ = σ*θ;
352 let τθ = τ*θ;
353 let mut w = DVector::zeros(x.len());
354 let mut tmp = DVector::zeros(x.len());
355 let mut xprev = x.clone();
356 let mut iters = 0;
357
358 iterator.iterate(|state| {
359 // Primal step: x^{k+1} = nonnegsoft_τλ(x^k - τ(θ w^k -g))
360 x.axpy(-τθ, &w, 1.0);
361 x.axpy(τ, g, 1.0);
362 x.apply(|x_i| *x_i = nonneg_soft_thresholding(*x_i, τ*λ));
363
364 // Dual step: w^{k+1} = prox_{σ[(β/2)‖./θ-y‖₁²]^*}(q) for q = w^k + σθ(2x^{k+1}-x^k)
365 // = q - σ prox_{(β/(2σ))‖./θ-y‖₁²}(q/σ)
366 // = q - (σθ) prox_{(β/(2σθ^2))‖.-y‖₁²}(q/(σθ))
367 // = σθ(q/(σθ) - prox_{(β/(2σθ^2))‖.-y‖₁²}(q/(σθ))
368 w /= σθ;
369 w.axpy(2.0, x, 1.0);
370 w.axpy(-1.0, &xprev, 1.0);
371 xprev.copy_from(&w); // use xprev as temporary variable
372 l1squared_prox(&mut tmp, &mut xprev, y, βdivθ/σθ);
373 w -= &xprev;
374 w *= σθ;
375 xprev.copy_from(x);
376
377 iters +=1;
378
379 state.if_verbose(|| {
380 F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))
381 })
382 });
383
384 iters
385 }
386
387
388 /// This function applies an iterative method for the solution of the problem
389 /// <div>$$
390 /// \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ + δ_{≥ 0}(x).
391 /// $$</div>
392 ///
393 /// This function returns the number of iterations taken.
394 #[replace_float_literals(F::cast_from(literal))]
395 pub fn l1squared_nonneg<F, I>(
396 y : &DVector<F::MixedType>,
397 g : &DVector<F::MixedType>,
398 λ : F,
399 β : F,
400 x : &mut DVector<F::MixedType>,
401 inner : &InnerSettings<F>,
402 iterator : I
403 ) -> usize
404 where F : Float + ToNalgebraRealField,
405 I : AlgIteratorFactory<F>
406 {
407 match inner.method {
408 InnerMethod::PDPS | InnerMethod::Default => {
409 let inner_θ = 0.001;
410 // Estimate of ‖K‖ for K=θ\Id.
411 let normest = inner_θ;
412 let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest);
413 l1squared_nonneg_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator)
414 },
415 InnerMethod::FB /*| InnerMethod::Default*/ => {
416 // The Lipschitz factor of ∇[x ↦ g^⊤ x + λ∑x]=g - λ𝟙 is FB is just a proximal point
417 // method with on constraints on τ. We “accelerate” it by adding to τ the constant θ
418 // on each iteration. Exponential growth does not seem stable.
419 let inner_τ = inner.fb_τ0;
420 let inner_θ = inner_τ;
421 l1squared_nonneg_pp(y, g, λ, β, x, inner_τ, inner_θ, iterator)
422 },
423 _ => unimplemented!("{:?}", inner.method),
424 }
425 }

mercurial