| |
1 /*! |
| |
2 Iterative algorithms for solving the finite-dimensional subproblem without constraints. |
| |
3 */ |
| |
4 |
| |
5 use nalgebra::DVector; |
| |
6 use numeric_literals::replace_float_literals; |
| |
7 use itertools::izip; |
| |
8 use std::cmp::Ordering::*; |
| |
9 |
| |
10 use std::iter::zip; |
| |
11 use alg_tools::iterate::{ |
| |
12 AlgIteratorFactory, |
| |
13 AlgIteratorState, |
| |
14 }; |
| |
15 use alg_tools::nalgebra_support::ToNalgebraRealField; |
| |
16 use alg_tools::nanleast::NaNLeast; |
| |
17 use alg_tools::norms::{Dist, L1}; |
| |
18 |
| |
19 use crate::types::*; |
| |
20 use super::{ |
| |
21 InnerMethod, |
| |
22 InnerSettings |
| |
23 }; |
| |
24 use super::unconstrained::soft_thresholding; |
| |
25 use super::l1squared_nonneg::max_interval_dist_to_zero; |
| |
26 |
| |
27 /// Calculate $prox_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}_1^2$. |
| |
28 /// |
| |
29 /// To derive an algorithm for this, we can assume that $y=0$, as |
| |
30 /// $prox_f(x) = prox_{f_0}(x - y) - y$ for $f_0=\frac{β}{2}\norm{x}_1^2$. |
| |
31 /// Now, the optimality conditions for $w = prox_f(x)$ are |
| |
32 /// $$\tag{*} |
| |
33 /// 0 ∈ w-x + β\norm{w}_1\sign w. |
| |
34 /// $$ |
| |
35 /// Clearly then $w = \soft_{β\norm{w}_1}(x)$. |
| |
36 /// Thus the components of $x$ with smallest absolute value will be zeroed out. |
| |
37 /// Denoting by $w'$ the non-zero components, and by $x'$ the corresponding components |
| |
38 /// of $x$, and by $m$ their count, multipying the corresponding lines of (*) by $\sign x'$, |
| |
39 /// we obtain |
| |
40 /// $$ |
| |
41 /// \norm{x'}_1 = (1+βm)\norm{w'}_1. |
| |
42 /// $$ |
| |
43 /// That is, $\norm{w}_1=\norm{w'}_1=\norm{x'}_1/(1+βm)$. |
| |
44 /// Thus, sorting $x$ by absolute value, and sequentially in order eliminating the smallest |
| |
45 /// elements, we can easily calculate what $\norm{w}_1$ should be for that choice, and |
| |
46 /// then easily calculate $w = \soft_{β\norm{w}_1}(x)$. We just have to verify that |
| |
47 /// the resulting $w$ has the same norm. There's a shortcut to this, as we work |
| |
48 /// sequentially: just check that the smallest assumed-nonzero component $i$ satisfies the |
| |
49 /// condition of soft-thresholding to remain non-zero: $|x_i|>τ\norm{x'}/(1+τm)$. |
| |
50 /// Clearly, if this condition fails for x_i, it will fail for all the components |
| |
51 /// already exluced. While, if it holds, it will hold for all components not excluded. |
| |
52 #[replace_float_literals(F::cast_from(literal))] |
| |
53 pub(super) fn l1squared_prox<F :Float + nalgebra::RealField>( |
| |
54 sorted_abs : &mut DVector<F>, |
| |
55 x : &mut DVector<F>, |
| |
56 y : &DVector<F>, |
| |
57 β : F |
| |
58 ) { |
| |
59 sorted_abs.copy_from(x); |
| |
60 sorted_abs.axpy(-1.0, y, 1.0); |
| |
61 sorted_abs.apply(|z_i| *z_i = num_traits::abs(*z_i)); |
| |
62 sorted_abs.as_mut_slice().sort_unstable_by(|a, b| NaNLeast(*a).cmp(&NaNLeast(*b))); |
| |
63 |
| |
64 let mut n = sorted_abs.sum(); |
| |
65 for (m, az_i) in zip((1..=x.len() as u32).rev(), sorted_abs) { |
| |
66 // test first |
| |
67 let tmp = β*n/(1.0 + β*F::cast_from(m)); |
| |
68 if *az_i <= tmp { |
| |
69 // Fail |
| |
70 n -= *az_i; |
| |
71 } else { |
| |
72 // Success |
| |
73 x.zip_apply(y, |x_i, y_i| *x_i = y_i + soft_thresholding(*x_i-y_i, tmp)); |
| |
74 return |
| |
75 } |
| |
76 } |
| |
77 // m = 0 should always work, but x is zero. |
| |
78 x.fill(0.0); |
| |
79 } |
| |
80 |
| |
81 /// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁$ at $x$. |
| |
82 /// |
| |
83 /// `v` will be modified and cannot be trusted to contain useful values afterwards. |
| |
84 #[replace_float_literals(F::cast_from(literal))] |
| |
85 fn min_subdifferential<F : Float + nalgebra::RealField>( |
| |
86 y : &DVector<F>, |
| |
87 x : &DVector<F>, |
| |
88 g : &DVector<F>, |
| |
89 λ : F, |
| |
90 β : F |
| |
91 ) -> F { |
| |
92 let mut val = 0.0; |
| |
93 let tmp = β*y.dist(x, L1); |
| |
94 for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) { |
| |
95 let (mut lb, mut ub) = (-g_i, -g_i); |
| |
96 match x_i.partial_cmp(y_i) { |
| |
97 Some(Greater) => { lb += tmp; ub += tmp }, |
| |
98 Some(Less) => { lb -= tmp; ub -= tmp }, |
| |
99 Some(Equal) => { lb -= tmp; ub += tmp }, |
| |
100 None => {}, |
| |
101 } |
| |
102 match x_i.partial_cmp(&0.0) { |
| |
103 Some(Greater) => { lb += λ; ub += λ }, |
| |
104 Some(Less) => { lb -= λ; ub -= λ }, |
| |
105 Some(Equal) => { lb -= λ; ub += λ }, |
| |
106 None => {}, |
| |
107 }; |
| |
108 val = max_interval_dist_to_zero(val, lb, ub); |
| |
109 } |
| |
110 val |
| |
111 } |
| |
112 |
| |
113 |
| |
114 /// PDPS implementation of [`l1squared_unconstrained`]. |
| |
115 /// For detailed documentation of the inputs and outputs, refer to there. |
| |
116 /// |
| |
117 /// The `λ` component of the model is handled in the proximal step instead of the gradient step |
| |
118 /// for potential performance improvements. |
| |
119 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] |
| |
120 pub fn l1squared_unconstrained_pdps<F, I>( |
| |
121 y : &DVector<F::MixedType>, |
| |
122 g : &DVector<F::MixedType>, |
| |
123 λ_ : F, |
| |
124 β_ : F, |
| |
125 x : &mut DVector<F::MixedType>, |
| |
126 τ_ : F, |
| |
127 σ_ : F, |
| |
128 iterator : I |
| |
129 ) -> usize |
| |
130 where F : Float + ToNalgebraRealField, |
| |
131 I : AlgIteratorFactory<F> |
| |
132 { |
| |
133 let λ = λ_.to_nalgebra_mixed(); |
| |
134 let β = β_.to_nalgebra_mixed(); |
| |
135 let τ = τ_.to_nalgebra_mixed(); |
| |
136 let σ = σ_.to_nalgebra_mixed(); |
| |
137 let mut w = DVector::zeros(x.len()); |
| |
138 let mut tmp = DVector::zeros(x.len()); |
| |
139 let mut xprev = x.clone(); |
| |
140 let mut iters = 0; |
| |
141 |
| |
142 iterator.iterate(|state| { |
| |
143 // Primal step: x^{k+1} = prox_{τ|.-y|_1^2}(x^k - τ (w^k - g)) |
| |
144 x.axpy(-τ, &w, 1.0); |
| |
145 x.axpy(τ, g, 1.0); |
| |
146 l1squared_prox(&mut tmp, x, y, τ*β); |
| |
147 |
| |
148 // Dual step: w^{k+1} = proj_{[-λ,λ]}(w^k + σ(2x^{k+1}-x^k)) |
| |
149 w.axpy(2.0*σ, x, 1.0); |
| |
150 w.axpy(-σ, &xprev, 1.0); |
| |
151 w.apply(|w_i| *w_i = num_traits::clamp(*w_i, -λ, λ)); |
| |
152 xprev.copy_from(x); |
| |
153 |
| |
154 iters +=1; |
| |
155 |
| |
156 state.if_verbose(|| { |
| |
157 F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)) |
| |
158 }) |
| |
159 }); |
| |
160 |
| |
161 iters |
| |
162 } |
| |
163 |
| |
164 /// Alternative PDPS implementation of [`l1squared_unconstrained`]. |
| |
165 /// For detailed documentation of the inputs and outputs, refer to there. |
| |
166 /// |
| |
167 /// By not dualising the 1-norm, this should produce more sparse solutions than |
| |
168 /// [`l1squared_unconstrained_pdps`]. |
| |
169 /// |
| |
170 /// The `λ` component of the model is handled in the proximal step instead of the gradient step |
| |
171 /// for potential performance improvements. |
| |
172 /// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model. |
| |
173 /// We rewrite |
| |
174 /// <div>$$ |
| |
175 /// \begin{split} |
| |
176 /// & \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ \\ |
| |
177 /// & = \min_{x ∈ ℝ^n} \max_{w} ⟨θ w, x⟩ - g^⊤ x + λ\|x\|₁ |
| |
178 /// - \left(x ↦ \frac{β}{2θ} |x-y|_1^2 \right)^*(w). |
| |
179 /// \end{split} |
| |
180 /// $$</div> |
| |
181 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] |
| |
182 pub fn l1squared_unconstrained_pdps_alt<F, I>( |
| |
183 y : &DVector<F::MixedType>, |
| |
184 g : &DVector<F::MixedType>, |
| |
185 λ_ : F, |
| |
186 β_ : F, |
| |
187 x : &mut DVector<F::MixedType>, |
| |
188 τ_ : F, |
| |
189 σ_ : F, |
| |
190 θ_ : F, |
| |
191 iterator : I |
| |
192 ) -> usize |
| |
193 where F : Float + ToNalgebraRealField, |
| |
194 I : AlgIteratorFactory<F> |
| |
195 { |
| |
196 let λ = λ_.to_nalgebra_mixed(); |
| |
197 let τ = τ_.to_nalgebra_mixed(); |
| |
198 let σ = σ_.to_nalgebra_mixed(); |
| |
199 let θ = θ_.to_nalgebra_mixed(); |
| |
200 let β = β_.to_nalgebra_mixed(); |
| |
201 let σθ = σ*θ; |
| |
202 let τθ = τ*θ; |
| |
203 let mut w = DVector::zeros(x.len()); |
| |
204 let mut tmp = DVector::zeros(x.len()); |
| |
205 let mut xprev = x.clone(); |
| |
206 let mut iters = 0; |
| |
207 |
| |
208 iterator.iterate(|state| { |
| |
209 // Primal step: x^{k+1} = soft_τλ(x^k - τ(θ w^k -g)) |
| |
210 x.axpy(-τθ, &w, 1.0); |
| |
211 x.axpy(τ, g, 1.0); |
| |
212 x.apply(|x_i| *x_i = soft_thresholding(*x_i, τ*λ)); |
| |
213 |
| |
214 // Dual step: with g(x) = (β/(2θ))‖x-y‖₁² and q = w^k + σ(2x^{k+1}-x^k), |
| |
215 // we compute w^{k+1} = prox_{σg^*}(q) for |
| |
216 // = q - σ prox_{g/σ}(q/σ) |
| |
217 // = q - σ prox_{(β/(2θσ))‖.-y‖₁²}(q/σ) |
| |
218 // = σ(q/σ - prox_{(β/(2θσ))‖.-y‖₁²}(q/σ)) |
| |
219 // where q/σ = w^k/σ + (2x^{k+1}-x^k), |
| |
220 w /= σ; |
| |
221 w.axpy(2.0, x, 1.0); |
| |
222 w.axpy(-1.0, &xprev, 1.0); |
| |
223 xprev.copy_from(&w); // use xprev as temporary variable |
| |
224 l1squared_prox(&mut tmp, &mut xprev, y, β/σθ); |
| |
225 w -= &xprev; |
| |
226 w *= σ; |
| |
227 xprev.copy_from(x); |
| |
228 |
| |
229 iters += 1; |
| |
230 |
| |
231 state.if_verbose(|| { |
| |
232 F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)) |
| |
233 }) |
| |
234 }); |
| |
235 |
| |
236 iters |
| |
237 } |
| |
238 |
| |
239 |
| |
240 /// This function applies an iterative method for the solution of the problem |
| |
241 /// <div>$$ |
| |
242 /// \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁. |
| |
243 /// $$</div> |
| |
244 /// Only PDPS is supported. |
| |
245 /// |
| |
246 /// This function returns the number of iterations taken. |
| |
247 #[replace_float_literals(F::cast_from(literal))] |
| |
248 pub fn l1squared_unconstrained<F, I>( |
| |
249 y : &DVector<F::MixedType>, |
| |
250 g : &DVector<F::MixedType>, |
| |
251 λ : F, |
| |
252 β : F, |
| |
253 x : &mut DVector<F::MixedType>, |
| |
254 inner : &InnerSettings<F>, |
| |
255 iterator : I |
| |
256 ) -> usize |
| |
257 where F : Float + ToNalgebraRealField, |
| |
258 I : AlgIteratorFactory<F> |
| |
259 { |
| |
260 // Estimate of ‖K‖ for K=θ Id. |
| |
261 let inner_θ = 1.0; |
| |
262 let normest = inner_θ; |
| |
263 |
| |
264 let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest); |
| |
265 |
| |
266 match inner.method { |
| |
267 InnerMethod::PDPS => |
| |
268 l1squared_unconstrained_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator), |
| |
269 other => unimplemented!("${other:?} is unimplemented"), |
| |
270 } |
| |
271 } |