src/subproblem/l1squared_unconstrained.rs

changeset 52
f0e8704d3f0e
parent 42
6a7365d73e4c
child 54
b3312eee105c
equal deleted inserted replaced
31:6105b5cd8d89 52:f0e8704d3f0e
1 /*!
2 Iterative algorithms for solving the finite-dimensional subproblem without constraints.
3 */
4
5 use nalgebra::DVector;
6 use numeric_literals::replace_float_literals;
7 use itertools::izip;
8 use std::cmp::Ordering::*;
9
10 use std::iter::zip;
11 use alg_tools::iterate::{
12 AlgIteratorFactory,
13 AlgIteratorState,
14 };
15 use alg_tools::nalgebra_support::ToNalgebraRealField;
16 use alg_tools::nanleast::NaNLeast;
17 use alg_tools::norms::{Dist, L1};
18
19 use crate::types::*;
20 use super::{
21 InnerMethod,
22 InnerSettings
23 };
24 use super::unconstrained::soft_thresholding;
25 use super::l1squared_nonneg::max_interval_dist_to_zero;
26
27 /// Calculate $prox_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}_1^2$.
28 ///
29 /// To derive an algorithm for this, we can assume that $y=0$, as
30 /// $prox_f(x) = prox_{f_0}(x - y) - y$ for $f_0=\frac{β}{2}\norm{x}_1^2$.
31 /// Now, the optimality conditions for $w = prox_f(x)$ are
32 /// $$\tag{*}
33 /// 0 ∈ w-x + β\norm{w}_1\sign w.
34 /// $$
35 /// Clearly then $w = \soft_{β\norm{w}_1}(x)$.
36 /// Thus the components of $x$ with smallest absolute value will be zeroed out.
37 /// Denoting by $w'$ the non-zero components, and by $x'$ the corresponding components
38 /// of $x$, and by $m$ their count, multipying the corresponding lines of (*) by $\sign x'$,
39 /// we obtain
40 /// $$
41 /// \norm{x'}_1 = (1+βm)\norm{w'}_1.
42 /// $$
43 /// That is, $\norm{w}_1=\norm{w'}_1=\norm{x'}_1/(1+βm)$.
44 /// Thus, sorting $x$ by absolute value, and sequentially in order eliminating the smallest
45 /// elements, we can easily calculate what $\norm{w}_1$ should be for that choice, and
46 /// then easily calculate $w = \soft_{β\norm{w}_1}(x)$. We just have to verify that
47 /// the resulting $w$ has the same norm. There's a shortcut to this, as we work
48 /// sequentially: just check that the smallest assumed-nonzero component $i$ satisfies the
49 /// condition of soft-thresholding to remain non-zero: $|x_i|>τ\norm{x'}/(1+τm)$.
50 /// Clearly, if this condition fails for x_i, it will fail for all the components
51 /// already exluced. While, if it holds, it will hold for all components not excluded.
52 #[replace_float_literals(F::cast_from(literal))]
53 pub(super) fn l1squared_prox<F :Float + nalgebra::RealField>(
54 sorted_abs : &mut DVector<F>,
55 x : &mut DVector<F>,
56 y : &DVector<F>,
57 β : F
58 ) {
59 sorted_abs.copy_from(x);
60 sorted_abs.axpy(-1.0, y, 1.0);
61 sorted_abs.apply(|z_i| *z_i = num_traits::abs(*z_i));
62 sorted_abs.as_mut_slice().sort_unstable_by(|a, b| NaNLeast(*a).cmp(&NaNLeast(*b)));
63
64 let mut n = sorted_abs.sum();
65 for (m, az_i) in zip((1..=x.len() as u32).rev(), sorted_abs) {
66 // test first
67 let tmp = β*n/(1.0 + β*F::cast_from(m));
68 if *az_i <= tmp {
69 // Fail
70 n -= *az_i;
71 } else {
72 // Success
73 x.zip_apply(y, |x_i, y_i| *x_i = y_i + soft_thresholding(*x_i-y_i, tmp));
74 return
75 }
76 }
77 // m = 0 should always work, but x is zero.
78 x.fill(0.0);
79 }
80
81 /// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁$ at $x$.
82 ///
83 /// `v` will be modified and cannot be trusted to contain useful values afterwards.
84 #[replace_float_literals(F::cast_from(literal))]
85 fn min_subdifferential<F : Float + nalgebra::RealField>(
86 y : &DVector<F>,
87 x : &DVector<F>,
88 g : &DVector<F>,
89 λ : F,
90 β : F
91 ) -> F {
92 let mut val = 0.0;
93 let tmp = β*y.dist(x, L1);
94 for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) {
95 let (mut lb, mut ub) = (-g_i, -g_i);
96 match x_i.partial_cmp(y_i) {
97 Some(Greater) => { lb += tmp; ub += tmp },
98 Some(Less) => { lb -= tmp; ub -= tmp },
99 Some(Equal) => { lb -= tmp; ub += tmp },
100 None => {},
101 }
102 match x_i.partial_cmp(&0.0) {
103 Some(Greater) => { lb += λ; ub += λ },
104 Some(Less) => { lb -= λ; ub -= λ },
105 Some(Equal) => { lb -= λ; ub += λ },
106 None => {},
107 };
108 val = max_interval_dist_to_zero(val, lb, ub);
109 }
110 val
111 }
112
113
114 /// PDPS implementation of [`l1squared_unconstrained`].
115 /// For detailed documentation of the inputs and outputs, refer to there.
116 ///
117 /// The `λ` component of the model is handled in the proximal step instead of the gradient step
118 /// for potential performance improvements.
119 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
120 pub fn l1squared_unconstrained_pdps<F, I>(
121 y : &DVector<F::MixedType>,
122 g : &DVector<F::MixedType>,
123 λ_ : F,
124 β_ : F,
125 x : &mut DVector<F::MixedType>,
126 τ_ : F,
127 σ_ : F,
128 iterator : I
129 ) -> usize
130 where F : Float + ToNalgebraRealField,
131 I : AlgIteratorFactory<F>
132 {
133 let λ = λ_.to_nalgebra_mixed();
134 let β = β_.to_nalgebra_mixed();
135 let τ = τ_.to_nalgebra_mixed();
136 let σ = σ_.to_nalgebra_mixed();
137 let mut w = DVector::zeros(x.len());
138 let mut tmp = DVector::zeros(x.len());
139 let mut xprev = x.clone();
140 let mut iters = 0;
141
142 iterator.iterate(|state| {
143 // Primal step: x^{k+1} = prox_{τ|.-y|_1^2}(x^k - τ (w^k - g))
144 x.axpy(-τ, &w, 1.0);
145 x.axpy(τ, g, 1.0);
146 l1squared_prox(&mut tmp, x, y, τ*β);
147
148 // Dual step: w^{k+1} = proj_{[-λ,λ]}(w^k + σ(2x^{k+1}-x^k))
149 w.axpy(2.0*σ, x, 1.0);
150 w.axpy(-σ, &xprev, 1.0);
151 w.apply(|w_i| *w_i = num_traits::clamp(*w_i, -λ, λ));
152 xprev.copy_from(x);
153
154 iters +=1;
155
156 state.if_verbose(|| {
157 F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))
158 })
159 });
160
161 iters
162 }
163
164 /// Alternative PDPS implementation of [`l1squared_unconstrained`].
165 /// For detailed documentation of the inputs and outputs, refer to there.
166 ///
167 /// By not dualising the 1-norm, this should produce more sparse solutions than
168 /// [`l1squared_unconstrained_pdps`].
169 ///
170 /// The `λ` component of the model is handled in the proximal step instead of the gradient step
171 /// for potential performance improvements.
172 /// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model.
173 /// We rewrite
174 /// <div>$$
175 /// \begin{split}
176 /// & \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ \\
177 /// & = \min_{x ∈ ℝ^n} \max_{w} ⟨θ w, x⟩ - g^⊤ x + λ\|x\|₁
178 /// - \left(x ↦ \frac{β}{2θ} |x-y|_1^2 \right)^*(w).
179 /// \end{split}
180 /// $$</div>
181 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
182 pub fn l1squared_unconstrained_pdps_alt<F, I>(
183 y : &DVector<F::MixedType>,
184 g : &DVector<F::MixedType>,
185 λ_ : F,
186 β_ : F,
187 x : &mut DVector<F::MixedType>,
188 τ_ : F,
189 σ_ : F,
190 θ_ : F,
191 iterator : I
192 ) -> usize
193 where F : Float + ToNalgebraRealField,
194 I : AlgIteratorFactory<F>
195 {
196 let λ = λ_.to_nalgebra_mixed();
197 let τ = τ_.to_nalgebra_mixed();
198 let σ = σ_.to_nalgebra_mixed();
199 let θ = θ_.to_nalgebra_mixed();
200 let β = β_.to_nalgebra_mixed();
201 let σθ = σ*θ;
202 let τθ = τ*θ;
203 let mut w = DVector::zeros(x.len());
204 let mut tmp = DVector::zeros(x.len());
205 let mut xprev = x.clone();
206 let mut iters = 0;
207
208 iterator.iterate(|state| {
209 // Primal step: x^{k+1} = soft_τλ(x^k - τ(θ w^k -g))
210 x.axpy(-τθ, &w, 1.0);
211 x.axpy(τ, g, 1.0);
212 x.apply(|x_i| *x_i = soft_thresholding(*x_i, τ*λ));
213
214 // Dual step: with g(x) = (β/(2θ))‖x-y‖₁² and q = w^k + σ(2x^{k+1}-x^k),
215 // we compute w^{k+1} = prox_{σg^*}(q) for
216 // = q - σ prox_{g/σ}(q/σ)
217 // = q - σ prox_{(β/(2θσ))‖.-y‖₁²}(q/σ)
218 // = σ(q/σ - prox_{(β/(2θσ))‖.-y‖₁²}(q/σ))
219 // where q/σ = w^k/σ + (2x^{k+1}-x^k),
220 w /= σ;
221 w.axpy(2.0, x, 1.0);
222 w.axpy(-1.0, &xprev, 1.0);
223 xprev.copy_from(&w); // use xprev as temporary variable
224 l1squared_prox(&mut tmp, &mut xprev, y, β/σθ);
225 w -= &xprev;
226 w *= σ;
227 xprev.copy_from(x);
228
229 iters += 1;
230
231 state.if_verbose(|| {
232 F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))
233 })
234 });
235
236 iters
237 }
238
239
240 /// This function applies an iterative method for the solution of the problem
241 /// <div>$$
242 /// \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁.
243 /// $$</div>
244 /// Only PDPS is supported.
245 ///
246 /// This function returns the number of iterations taken.
247 #[replace_float_literals(F::cast_from(literal))]
248 pub fn l1squared_unconstrained<F, I>(
249 y : &DVector<F::MixedType>,
250 g : &DVector<F::MixedType>,
251 λ : F,
252 β : F,
253 x : &mut DVector<F::MixedType>,
254 inner : &InnerSettings<F>,
255 iterator : I
256 ) -> usize
257 where F : Float + ToNalgebraRealField,
258 I : AlgIteratorFactory<F>
259 {
260 // Estimate of ‖K‖ for K=θ Id.
261 let inner_θ = 1.0;
262 let normest = inner_θ;
263
264 let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest);
265
266 match inner.method {
267 InnerMethod::PDPS =>
268 l1squared_unconstrained_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator),
269 other => unimplemented!("${other:?} is unimplemented"),
270 }
271 }

mercurial