| 1 /*! |
1 /*! |
| 2 Iterative algorithms for solving the finite-dimensional subproblem without constraints. |
2 Iterative algorithms for solving the finite-dimensional subproblem without constraints. |
| 3 */ |
3 */ |
| 4 |
4 |
| |
5 use itertools::izip; |
| 5 use nalgebra::DVector; |
6 use nalgebra::DVector; |
| 6 use numeric_literals::replace_float_literals; |
7 use numeric_literals::replace_float_literals; |
| 7 use itertools::izip; |
|
| 8 use std::cmp::Ordering::*; |
8 use std::cmp::Ordering::*; |
| 9 |
9 |
| 10 use std::iter::zip; |
10 use alg_tools::iterate::{AlgIteratorFactory, AlgIteratorState}; |
| 11 use alg_tools::iterate::{ |
|
| 12 AlgIteratorFactory, |
|
| 13 AlgIteratorState, |
|
| 14 }; |
|
| 15 use alg_tools::nalgebra_support::ToNalgebraRealField; |
11 use alg_tools::nalgebra_support::ToNalgebraRealField; |
| 16 use alg_tools::nanleast::NaNLeast; |
12 use alg_tools::nanleast::NaNLeast; |
| 17 use alg_tools::norms::{Dist, L1}; |
13 use alg_tools::norms::{Dist, L1}; |
| 18 |
14 use std::iter::zip; |
| |
15 |
| |
16 use super::l1squared_nonneg::max_interval_dist_to_zero; |
| |
17 use super::unconstrained::soft_thresholding; |
| |
18 use super::{InnerMethod, InnerSettings}; |
| 19 use crate::types::*; |
19 use crate::types::*; |
| 20 use super::{ |
20 |
| 21 InnerMethod, |
21 /// Calculate $\prox_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}_1^2$. |
| 22 InnerSettings |
|
| 23 }; |
|
| 24 use super::unconstrained::soft_thresholding; |
|
| 25 use super::l1squared_nonneg::max_interval_dist_to_zero; |
|
| 26 |
|
| 27 /// Calculate $prox_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}_1^2$. |
|
| 28 /// |
22 /// |
| 29 /// To derive an algorithm for this, we can assume that $y=0$, as |
23 /// To derive an algorithm for this, we can assume that $y=0$, as |
| 30 /// $prox_f(x) = prox_{f_0}(x - y) - y$ for $f_0=\frac{β}{2}\norm{x}_1^2$. |
24 /// $\prox\_f(x) = \prox\_{f_0}(x - y) - y$ for $f\_0=\frac{β}{2}\norm{x}\_1^2$. |
| 31 /// Now, the optimality conditions for $w = prox_f(x)$ are |
25 /// Now, the optimality conditions for $w = \prox\_f(x)$ are |
| 32 /// $$\tag{*} |
26 /// $$ |
| 33 /// 0 ∈ w-x + β\norm{w}_1\sign w. |
27 /// 0 ∈ w-x + β\norm{w}\_1\sign w. |
| 34 /// $$ |
28 /// $$ |
| 35 /// Clearly then $w = \soft_{β\norm{w}_1}(x)$. |
29 /// Clearly then $w = \soft\_{β\norm{w}\_1}(x)$. |
| 36 /// Thus the components of $x$ with smallest absolute value will be zeroed out. |
30 /// Thus the components of $x$ with smallest absolute value will be zeroed out. |
| 37 /// Denoting by $w'$ the non-zero components, and by $x'$ the corresponding components |
31 /// Denoting by $w'$ the non-zero components, and by $x'$ the corresponding components |
| 38 /// of $x$, and by $m$ their count, multipying the corresponding lines of (*) by $\sign x'$, |
32 /// of $x$, and by $m$ their count, multipying the corresponding lines of (*) by $\sign x'$, |
| 39 /// we obtain |
33 /// we obtain |
| 40 /// $$ |
34 /// $$ |
| 41 /// \norm{x'}_1 = (1+βm)\norm{w'}_1. |
35 /// \norm{x'}\_1 = (1+βm)\norm{w'}\_1. |
| 42 /// $$ |
36 /// $$ |
| 43 /// That is, $\norm{w}_1=\norm{w'}_1=\norm{x'}_1/(1+βm)$. |
37 /// That is, $\norm{w}\_1=\norm{w'}\_1=\norm{x'}\_1/(1+βm)$. |
| 44 /// Thus, sorting $x$ by absolute value, and sequentially in order eliminating the smallest |
38 /// Thus, sorting $x$ by absolute value, and sequentially in order eliminating the smallest |
| 45 /// elements, we can easily calculate what $\norm{w}_1$ should be for that choice, and |
39 /// elements, we can easily calculate what $\norm{w}\_1$ should be for that choice, and |
| 46 /// then easily calculate $w = \soft_{β\norm{w}_1}(x)$. We just have to verify that |
40 /// then easily calculate $w = \soft_{β\norm{w}\_1}(x)$. We just have to verify that |
| 47 /// the resulting $w$ has the same norm. There's a shortcut to this, as we work |
41 /// the resulting $w$ has the same norm. There's a shortcut to this, as we work |
| 48 /// sequentially: just check that the smallest assumed-nonzero component $i$ satisfies the |
42 /// sequentially: just check that the smallest assumed-nonzero component $i$ satisfies the |
| 49 /// condition of soft-thresholding to remain non-zero: $|x_i|>τ\norm{x'}/(1+τm)$. |
43 /// condition of soft-thresholding to remain non-zero: $|x\_i|>τ\norm{x'}/(1+τm)$. |
| 50 /// Clearly, if this condition fails for x_i, it will fail for all the components |
44 /// Clearly, if this condition fails for $x\_i$, it will fail for all the components |
| 51 /// already exluced. While, if it holds, it will hold for all components not excluded. |
45 /// already exluced. While, if it holds, it will hold for all components not excluded. |
| 52 #[replace_float_literals(F::cast_from(literal))] |
46 #[replace_float_literals(F::cast_from(literal))] |
| 53 pub(super) fn l1squared_prox<F :Float + nalgebra::RealField>( |
47 pub(super) fn l1squared_prox<F: Float + nalgebra::RealField>( |
| 54 sorted_abs : &mut DVector<F>, |
48 sorted_abs: &mut DVector<F>, |
| 55 x : &mut DVector<F>, |
49 x: &mut DVector<F>, |
| 56 y : &DVector<F>, |
50 y: &DVector<F>, |
| 57 β : F |
51 β: F, |
| 58 ) { |
52 ) { |
| 59 sorted_abs.copy_from(x); |
53 sorted_abs.copy_from(x); |
| 60 sorted_abs.axpy(-1.0, y, 1.0); |
54 sorted_abs.axpy(-1.0, y, 1.0); |
| 61 sorted_abs.apply(|z_i| *z_i = num_traits::abs(*z_i)); |
55 sorted_abs.apply(|z_i| *z_i = num_traits::abs(*z_i)); |
| 62 sorted_abs.as_mut_slice().sort_unstable_by(|a, b| NaNLeast(*a).cmp(&NaNLeast(*b))); |
56 sorted_abs |
| |
57 .as_mut_slice() |
| |
58 .sort_unstable_by(|a, b| NaNLeast(*a).cmp(&NaNLeast(*b))); |
| 63 |
59 |
| 64 let mut n = sorted_abs.sum(); |
60 let mut n = sorted_abs.sum(); |
| 65 for (m, az_i) in zip((1..=x.len() as u32).rev(), sorted_abs) { |
61 for (m, az_i) in zip((1..=x.len() as u32).rev(), sorted_abs) { |
| 66 // test first |
62 // test first |
| 67 let tmp = β*n/(1.0 + β*F::cast_from(m)); |
63 let tmp = β * n / (1.0 + β * F::cast_from(m)); |
| 68 if *az_i <= tmp { |
64 if *az_i <= tmp { |
| 69 // Fail |
65 // Fail |
| 70 n -= *az_i; |
66 n -= *az_i; |
| 71 } else { |
67 } else { |
| 72 // Success |
68 // Success |
| 73 x.zip_apply(y, |x_i, y_i| *x_i = y_i + soft_thresholding(*x_i-y_i, tmp)); |
69 x.zip_apply(y, |x_i, y_i| { |
| 74 return |
70 *x_i = y_i + soft_thresholding(*x_i - y_i, tmp) |
| |
71 }); |
| |
72 return; |
| 75 } |
73 } |
| 76 } |
74 } |
| 77 // m = 0 should always work, but x is zero. |
75 // m = 0 should always work, but x is zero. |
| 78 x.fill(0.0); |
76 x.fill(0.0); |
| 79 } |
77 } |
| 80 |
78 |
| 81 /// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁$ at $x$. |
79 /// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁$ at $x$. |
| 82 /// |
80 /// |
| 83 /// `v` will be modified and cannot be trusted to contain useful values afterwards. |
81 /// `v` will be modified and cannot be trusted to contain useful values afterwards. |
| 84 #[replace_float_literals(F::cast_from(literal))] |
82 #[replace_float_literals(F::cast_from(literal))] |
| 85 fn min_subdifferential<F : Float + nalgebra::RealField>( |
83 fn min_subdifferential<F: Float + nalgebra::RealField>( |
| 86 y : &DVector<F>, |
84 y: &DVector<F>, |
| 87 x : &DVector<F>, |
85 x: &DVector<F>, |
| 88 g : &DVector<F>, |
86 g: &DVector<F>, |
| 89 λ : F, |
87 λ: F, |
| 90 β : F |
88 β: F, |
| 91 ) -> F { |
89 ) -> F { |
| 92 let mut val = 0.0; |
90 let mut val = 0.0; |
| 93 let tmp = β*y.dist(x, L1); |
91 let tmp = β * y.dist(x, L1); |
| 94 for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) { |
92 for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) { |
| 95 let (mut lb, mut ub) = (-g_i, -g_i); |
93 let (mut lb, mut ub) = (-g_i, -g_i); |
| 96 match x_i.partial_cmp(y_i) { |
94 match x_i.partial_cmp(y_i) { |
| 97 Some(Greater) => { lb += tmp; ub += tmp }, |
95 Some(Greater) => { |
| 98 Some(Less) => { lb -= tmp; ub -= tmp }, |
96 lb += tmp; |
| 99 Some(Equal) => { lb -= tmp; ub += tmp }, |
97 ub += tmp |
| 100 None => {}, |
98 } |
| |
99 Some(Less) => { |
| |
100 lb -= tmp; |
| |
101 ub -= tmp |
| |
102 } |
| |
103 Some(Equal) => { |
| |
104 lb -= tmp; |
| |
105 ub += tmp |
| |
106 } |
| |
107 None => {} |
| 101 } |
108 } |
| 102 match x_i.partial_cmp(&0.0) { |
109 match x_i.partial_cmp(&0.0) { |
| 103 Some(Greater) => { lb += λ; ub += λ }, |
110 Some(Greater) => { |
| 104 Some(Less) => { lb -= λ; ub -= λ }, |
111 lb += λ; |
| 105 Some(Equal) => { lb -= λ; ub += λ }, |
112 ub += λ |
| 106 None => {}, |
113 } |
| |
114 Some(Less) => { |
| |
115 lb -= λ; |
| |
116 ub -= λ |
| |
117 } |
| |
118 Some(Equal) => { |
| |
119 lb -= λ; |
| |
120 ub += λ |
| |
121 } |
| |
122 None => {} |
| 107 }; |
123 }; |
| 108 val = max_interval_dist_to_zero(val, lb, ub); |
124 val = max_interval_dist_to_zero(val, lb, ub); |
| 109 } |
125 } |
| 110 val |
126 val |
| 111 } |
127 } |
| 112 |
128 |
| 113 |
|
| 114 /// PDPS implementation of [`l1squared_unconstrained`]. |
129 /// PDPS implementation of [`l1squared_unconstrained`]. |
| 115 /// For detailed documentation of the inputs and outputs, refer to there. |
130 /// For detailed documentation of the inputs and outputs, refer to there. |
| 116 /// |
131 /// |
| 117 /// The `λ` component of the model is handled in the proximal step instead of the gradient step |
132 /// The `λ` component of the model is handled in the proximal step instead of the gradient step |
| 118 /// for potential performance improvements. |
133 /// for potential performance improvements. |
| 119 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] |
134 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] |
| 120 pub fn l1squared_unconstrained_pdps<F, I>( |
135 pub fn l1squared_unconstrained_pdps<F, I>( |
| 121 y : &DVector<F::MixedType>, |
136 y: &DVector<F::MixedType>, |
| 122 g : &DVector<F::MixedType>, |
137 g: &DVector<F::MixedType>, |
| 123 λ_ : F, |
138 λ_: F, |
| 124 β_ : F, |
139 β_: F, |
| 125 x : &mut DVector<F::MixedType>, |
140 x: &mut DVector<F::MixedType>, |
| 126 τ_ : F, |
141 τ_: F, |
| 127 σ_ : F, |
142 σ_: F, |
| 128 iterator : I |
143 iterator: I, |
| 129 ) -> usize |
144 ) -> usize |
| 130 where F : Float + ToNalgebraRealField, |
145 where |
| 131 I : AlgIteratorFactory<F> |
146 F: Float + ToNalgebraRealField, |
| |
147 I: AlgIteratorFactory<F>, |
| 132 { |
148 { |
| 133 let λ = λ_.to_nalgebra_mixed(); |
149 let λ = λ_.to_nalgebra_mixed(); |
| 134 let β = β_.to_nalgebra_mixed(); |
150 let β = β_.to_nalgebra_mixed(); |
| 135 let τ = τ_.to_nalgebra_mixed(); |
151 let τ = τ_.to_nalgebra_mixed(); |
| 136 let σ = σ_.to_nalgebra_mixed(); |
152 let σ = σ_.to_nalgebra_mixed(); |
| 178 /// - \left(x ↦ \frac{β}{2θ} |x-y|_1^2 \right)^*(w). |
192 /// - \left(x ↦ \frac{β}{2θ} |x-y|_1^2 \right)^*(w). |
| 179 /// \end{split} |
193 /// \end{split} |
| 180 /// $$</div> |
194 /// $$</div> |
| 181 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] |
195 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] |
| 182 pub fn l1squared_unconstrained_pdps_alt<F, I>( |
196 pub fn l1squared_unconstrained_pdps_alt<F, I>( |
| 183 y : &DVector<F::MixedType>, |
197 y: &DVector<F::MixedType>, |
| 184 g : &DVector<F::MixedType>, |
198 g: &DVector<F::MixedType>, |
| 185 λ_ : F, |
199 λ_: F, |
| 186 β_ : F, |
200 β_: F, |
| 187 x : &mut DVector<F::MixedType>, |
201 x: &mut DVector<F::MixedType>, |
| 188 τ_ : F, |
202 τ_: F, |
| 189 σ_ : F, |
203 σ_: F, |
| 190 θ_ : F, |
204 θ_: F, |
| 191 iterator : I |
205 iterator: I, |
| 192 ) -> usize |
206 ) -> usize |
| 193 where F : Float + ToNalgebraRealField, |
207 where |
| 194 I : AlgIteratorFactory<F> |
208 F: Float + ToNalgebraRealField, |
| |
209 I: AlgIteratorFactory<F>, |
| 195 { |
210 { |
| 196 let λ = λ_.to_nalgebra_mixed(); |
211 let λ = λ_.to_nalgebra_mixed(); |
| 197 let τ = τ_.to_nalgebra_mixed(); |
212 let τ = τ_.to_nalgebra_mixed(); |
| 198 let σ = σ_.to_nalgebra_mixed(); |
213 let σ = σ_.to_nalgebra_mixed(); |
| 199 let θ = θ_.to_nalgebra_mixed(); |
214 let θ = θ_.to_nalgebra_mixed(); |
| 200 let β = β_.to_nalgebra_mixed(); |
215 let β = β_.to_nalgebra_mixed(); |
| 201 let σθ = σ*θ; |
216 let σθ = σ * θ; |
| 202 let τθ = τ*θ; |
217 let τθ = τ * θ; |
| 203 let mut w = DVector::zeros(x.len()); |
218 let mut w = DVector::zeros(x.len()); |
| 204 let mut tmp = DVector::zeros(x.len()); |
219 let mut tmp = DVector::zeros(x.len()); |
| 205 let mut xprev = x.clone(); |
220 let mut xprev = x.clone(); |
| 206 let mut iters = 0; |
221 let mut iters = 0; |
| 207 |
222 |
| 208 iterator.iterate(|state| { |
223 iterator.iterate(|state| { |
| 209 // Primal step: x^{k+1} = soft_τλ(x^k - τ(θ w^k -g)) |
224 // Primal step: x^{k+1} = soft_τλ(x^k - τ(θ w^k -g)) |
| 210 x.axpy(-τθ, &w, 1.0); |
225 x.axpy(-τθ, &w, 1.0); |
| 211 x.axpy(τ, g, 1.0); |
226 x.axpy(τ, g, 1.0); |
| 212 x.apply(|x_i| *x_i = soft_thresholding(*x_i, τ*λ)); |
227 x.apply(|x_i| *x_i = soft_thresholding(*x_i, τ * λ)); |
| 213 |
228 |
| 214 // Dual step: with g(x) = (β/(2θ))‖x-y‖₁² and q = w^k + σ(2x^{k+1}-x^k), |
229 // Dual step: with g(x) = (β/(2θ))‖x-y‖₁² and q = w^k + σ(2x^{k+1}-x^k), |
| 215 // we compute w^{k+1} = prox_{σg^*}(q) for |
230 // we compute w^{k+1} = prox_{σg^*}(q) for |
| 216 // = q - σ prox_{g/σ}(q/σ) |
231 // = q - σ prox_{g/σ}(q/σ) |
| 217 // = q - σ prox_{(β/(2θσ))‖.-y‖₁²}(q/σ) |
232 // = q - σ prox_{(β/(2θσ))‖.-y‖₁²}(q/σ) |
| 218 // = σ(q/σ - prox_{(β/(2θσ))‖.-y‖₁²}(q/σ)) |
233 // = σ(q/σ - prox_{(β/(2θσ))‖.-y‖₁²}(q/σ)) |
| 219 // where q/σ = w^k/σ + (2x^{k+1}-x^k), |
234 // where q/σ = w^k/σ + (2x^{k+1}-x^k), |
| 220 w /= σ; |
235 w /= σ; |
| 221 w.axpy(2.0, x, 1.0); |
236 w.axpy(2.0, x, 1.0); |
| 222 w.axpy(-1.0, &xprev, 1.0); |
237 w.axpy(-1.0, &xprev, 1.0); |
| 223 xprev.copy_from(&w); // use xprev as temporary variable |
238 xprev.copy_from(&w); // use xprev as temporary variable |
| 224 l1squared_prox(&mut tmp, &mut xprev, y, β/σθ); |
239 l1squared_prox(&mut tmp, &mut xprev, y, β / σθ); |
| 225 w -= &xprev; |
240 w -= &xprev; |
| 226 w *= σ; |
241 w *= σ; |
| 227 xprev.copy_from(x); |
242 xprev.copy_from(x); |
| 228 |
243 |
| 229 iters += 1; |
244 iters += 1; |
| 230 |
245 |
| 231 state.if_verbose(|| { |
246 state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))) |
| 232 F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)) |
|
| 233 }) |
|
| 234 }); |
247 }); |
| 235 |
248 |
| 236 iters |
249 iters |
| 237 } |
250 } |
| 238 |
|
| 239 |
251 |
| 240 /// This function applies an iterative method for the solution of the problem |
252 /// This function applies an iterative method for the solution of the problem |
| 241 /// <div>$$ |
253 /// <div>$$ |
| 242 /// \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁. |
254 /// \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁. |
| 243 /// $$</div> |
255 /// $$</div> |
| 244 /// Only PDPS is supported. |
256 /// Only PDPS is supported. |
| 245 /// |
257 /// |
| 246 /// This function returns the number of iterations taken. |
258 /// This function returns the number of iterations taken. |
| 247 #[replace_float_literals(F::cast_from(literal))] |
259 #[replace_float_literals(F::cast_from(literal))] |
| 248 pub fn l1squared_unconstrained<F, I>( |
260 pub fn l1squared_unconstrained<F, I>( |
| 249 y : &DVector<F::MixedType>, |
261 y: &DVector<F::MixedType>, |
| 250 g : &DVector<F::MixedType>, |
262 g: &DVector<F::MixedType>, |
| 251 λ : F, |
263 λ: F, |
| 252 β : F, |
264 β: F, |
| 253 x : &mut DVector<F::MixedType>, |
265 x: &mut DVector<F::MixedType>, |
| 254 inner : &InnerSettings<F>, |
266 inner: &InnerSettings<F>, |
| 255 iterator : I |
267 iterator: I, |
| 256 ) -> usize |
268 ) -> usize |
| 257 where F : Float + ToNalgebraRealField, |
269 where |
| 258 I : AlgIteratorFactory<F> |
270 F: Float + ToNalgebraRealField, |
| |
271 I: AlgIteratorFactory<F>, |
| 259 { |
272 { |
| 260 // Estimate of ‖K‖ for K=θ Id. |
273 // Estimate of ‖K‖ for K=θ Id. |
| 261 let inner_θ = 1.0; |
274 let inner_θ = 1.0; |
| 262 let normest = inner_θ; |
275 let normest = inner_θ; |
| 263 |
276 |
| 264 let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest); |
277 let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest); |
| 265 |
278 |
| 266 match inner.method { |
279 match inner.method { |
| 267 InnerMethod::PDPS => |
280 InnerMethod::PDPS => { |
| 268 l1squared_unconstrained_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator), |
281 l1squared_unconstrained_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator) |
| |
282 } |
| 269 other => unimplemented!("${other:?} is unimplemented"), |
283 other => unimplemented!("${other:?} is unimplemented"), |
| 270 } |
284 } |
| 271 } |
285 } |