src/subproblem/l1squared_unconstrained.rs

changeset 54
b3312eee105c
parent 42
6a7365d73e4c
equal deleted inserted replaced
53:92cae2e8f598 54:b3312eee105c
1 /*! 1 /*!
2 Iterative algorithms for solving the finite-dimensional subproblem without constraints. 2 Iterative algorithms for solving the finite-dimensional subproblem without constraints.
3 */ 3 */
4 4
5 use itertools::izip;
5 use nalgebra::DVector; 6 use nalgebra::DVector;
6 use numeric_literals::replace_float_literals; 7 use numeric_literals::replace_float_literals;
7 use itertools::izip;
8 use std::cmp::Ordering::*; 8 use std::cmp::Ordering::*;
9 9
10 use std::iter::zip; 10 use alg_tools::iterate::{AlgIteratorFactory, AlgIteratorState};
11 use alg_tools::iterate::{
12 AlgIteratorFactory,
13 AlgIteratorState,
14 };
15 use alg_tools::nalgebra_support::ToNalgebraRealField; 11 use alg_tools::nalgebra_support::ToNalgebraRealField;
16 use alg_tools::nanleast::NaNLeast; 12 use alg_tools::nanleast::NaNLeast;
17 use alg_tools::norms::{Dist, L1}; 13 use alg_tools::norms::{Dist, L1};
18 14 use std::iter::zip;
15
16 use super::l1squared_nonneg::max_interval_dist_to_zero;
17 use super::unconstrained::soft_thresholding;
18 use super::{InnerMethod, InnerSettings};
19 use crate::types::*; 19 use crate::types::*;
20 use super::{ 20
21 InnerMethod, 21 /// Calculate $\prox_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}_1^2$.
22 InnerSettings
23 };
24 use super::unconstrained::soft_thresholding;
25 use super::l1squared_nonneg::max_interval_dist_to_zero;
26
27 /// Calculate $prox_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}_1^2$.
28 /// 22 ///
29 /// To derive an algorithm for this, we can assume that $y=0$, as 23 /// To derive an algorithm for this, we can assume that $y=0$, as
30 /// $prox_f(x) = prox_{f_0}(x - y) - y$ for $f_0=\frac{β}{2}\norm{x}_1^2$. 24 /// $\prox\_f(x) = \prox\_{f_0}(x - y) - y$ for $f\_0=\frac{β}{2}\norm{x}\_1^2$.
31 /// Now, the optimality conditions for $w = prox_f(x)$ are 25 /// Now, the optimality conditions for $w = \prox\_f(x)$ are
32 /// $$\tag{*} 26 /// $$
33 /// 0 ∈ w-x + β\norm{w}_1\sign w. 27 /// 0 ∈ w-x + β\norm{w}\_1\sign w.
34 /// $$ 28 /// $$
35 /// Clearly then $w = \soft_{β\norm{w}_1}(x)$. 29 /// Clearly then $w = \soft\_{β\norm{w}\_1}(x)$.
36 /// Thus the components of $x$ with smallest absolute value will be zeroed out. 30 /// Thus the components of $x$ with smallest absolute value will be zeroed out.
37 /// Denoting by $w'$ the non-zero components, and by $x'$ the corresponding components 31 /// Denoting by $w'$ the non-zero components, and by $x'$ the corresponding components
38 /// of $x$, and by $m$ their count, multipying the corresponding lines of (*) by $\sign x'$, 32 /// of $x$, and by $m$ their count, multipying the corresponding lines of (*) by $\sign x'$,
39 /// we obtain 33 /// we obtain
40 /// $$ 34 /// $$
41 /// \norm{x'}_1 = (1+βm)\norm{w'}_1. 35 /// \norm{x'}\_1 = (1+βm)\norm{w'}\_1.
42 /// $$ 36 /// $$
43 /// That is, $\norm{w}_1=\norm{w'}_1=\norm{x'}_1/(1+βm)$. 37 /// That is, $\norm{w}\_1=\norm{w'}\_1=\norm{x'}\_1/(1+βm)$.
44 /// Thus, sorting $x$ by absolute value, and sequentially in order eliminating the smallest 38 /// Thus, sorting $x$ by absolute value, and sequentially in order eliminating the smallest
45 /// elements, we can easily calculate what $\norm{w}_1$ should be for that choice, and 39 /// elements, we can easily calculate what $\norm{w}\_1$ should be for that choice, and
46 /// then easily calculate $w = \soft_{β\norm{w}_1}(x)$. We just have to verify that 40 /// then easily calculate $w = \soft_{β\norm{w}\_1}(x)$. We just have to verify that
47 /// the resulting $w$ has the same norm. There's a shortcut to this, as we work 41 /// the resulting $w$ has the same norm. There's a shortcut to this, as we work
48 /// sequentially: just check that the smallest assumed-nonzero component $i$ satisfies the 42 /// sequentially: just check that the smallest assumed-nonzero component $i$ satisfies the
49 /// condition of soft-thresholding to remain non-zero: $|x_i|>τ\norm{x'}/(1+τm)$. 43 /// condition of soft-thresholding to remain non-zero: $|x\_i|>τ\norm{x'}/(1+τm)$.
50 /// Clearly, if this condition fails for x_i, it will fail for all the components 44 /// Clearly, if this condition fails for $x\_i$, it will fail for all the components
51 /// already exluced. While, if it holds, it will hold for all components not excluded. 45 /// already exluced. While, if it holds, it will hold for all components not excluded.
52 #[replace_float_literals(F::cast_from(literal))] 46 #[replace_float_literals(F::cast_from(literal))]
53 pub(super) fn l1squared_prox<F :Float + nalgebra::RealField>( 47 pub(super) fn l1squared_prox<F: Float + nalgebra::RealField>(
54 sorted_abs : &mut DVector<F>, 48 sorted_abs: &mut DVector<F>,
55 x : &mut DVector<F>, 49 x: &mut DVector<F>,
56 y : &DVector<F>, 50 y: &DVector<F>,
57 β : F 51 β: F,
58 ) { 52 ) {
59 sorted_abs.copy_from(x); 53 sorted_abs.copy_from(x);
60 sorted_abs.axpy(-1.0, y, 1.0); 54 sorted_abs.axpy(-1.0, y, 1.0);
61 sorted_abs.apply(|z_i| *z_i = num_traits::abs(*z_i)); 55 sorted_abs.apply(|z_i| *z_i = num_traits::abs(*z_i));
62 sorted_abs.as_mut_slice().sort_unstable_by(|a, b| NaNLeast(*a).cmp(&NaNLeast(*b))); 56 sorted_abs
57 .as_mut_slice()
58 .sort_unstable_by(|a, b| NaNLeast(*a).cmp(&NaNLeast(*b)));
63 59
64 let mut n = sorted_abs.sum(); 60 let mut n = sorted_abs.sum();
65 for (m, az_i) in zip((1..=x.len() as u32).rev(), sorted_abs) { 61 for (m, az_i) in zip((1..=x.len() as u32).rev(), sorted_abs) {
66 // test first 62 // test first
67 let tmp = β*n/(1.0 + β*F::cast_from(m)); 63 let tmp = β * n / (1.0 + β * F::cast_from(m));
68 if *az_i <= tmp { 64 if *az_i <= tmp {
69 // Fail 65 // Fail
70 n -= *az_i; 66 n -= *az_i;
71 } else { 67 } else {
72 // Success 68 // Success
73 x.zip_apply(y, |x_i, y_i| *x_i = y_i + soft_thresholding(*x_i-y_i, tmp)); 69 x.zip_apply(y, |x_i, y_i| {
74 return 70 *x_i = y_i + soft_thresholding(*x_i - y_i, tmp)
71 });
72 return;
75 } 73 }
76 } 74 }
77 // m = 0 should always work, but x is zero. 75 // m = 0 should always work, but x is zero.
78 x.fill(0.0); 76 x.fill(0.0);
79 } 77 }
80 78
81 /// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁$ at $x$. 79 /// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁$ at $x$.
82 /// 80 ///
83 /// `v` will be modified and cannot be trusted to contain useful values afterwards. 81 /// `v` will be modified and cannot be trusted to contain useful values afterwards.
84 #[replace_float_literals(F::cast_from(literal))] 82 #[replace_float_literals(F::cast_from(literal))]
85 fn min_subdifferential<F : Float + nalgebra::RealField>( 83 fn min_subdifferential<F: Float + nalgebra::RealField>(
86 y : &DVector<F>, 84 y: &DVector<F>,
87 x : &DVector<F>, 85 x: &DVector<F>,
88 g : &DVector<F>, 86 g: &DVector<F>,
89 λ : F, 87 λ: F,
90 β : F 88 β: F,
91 ) -> F { 89 ) -> F {
92 let mut val = 0.0; 90 let mut val = 0.0;
93 let tmp = β*y.dist(x, L1); 91 let tmp = β * y.dist(x, L1);
94 for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) { 92 for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) {
95 let (mut lb, mut ub) = (-g_i, -g_i); 93 let (mut lb, mut ub) = (-g_i, -g_i);
96 match x_i.partial_cmp(y_i) { 94 match x_i.partial_cmp(y_i) {
97 Some(Greater) => { lb += tmp; ub += tmp }, 95 Some(Greater) => {
98 Some(Less) => { lb -= tmp; ub -= tmp }, 96 lb += tmp;
99 Some(Equal) => { lb -= tmp; ub += tmp }, 97 ub += tmp
100 None => {}, 98 }
99 Some(Less) => {
100 lb -= tmp;
101 ub -= tmp
102 }
103 Some(Equal) => {
104 lb -= tmp;
105 ub += tmp
106 }
107 None => {}
101 } 108 }
102 match x_i.partial_cmp(&0.0) { 109 match x_i.partial_cmp(&0.0) {
103 Some(Greater) => { lb += λ; ub += λ }, 110 Some(Greater) => {
104 Some(Less) => { lb -= λ; ub -= λ }, 111 lb += λ;
105 Some(Equal) => { lb -= λ; ub += λ }, 112 ub += λ
106 None => {}, 113 }
114 Some(Less) => {
115 lb -= λ;
116 ub -= λ
117 }
118 Some(Equal) => {
119 lb -= λ;
120 ub += λ
121 }
122 None => {}
107 }; 123 };
108 val = max_interval_dist_to_zero(val, lb, ub); 124 val = max_interval_dist_to_zero(val, lb, ub);
109 } 125 }
110 val 126 val
111 } 127 }
112 128
113
114 /// PDPS implementation of [`l1squared_unconstrained`]. 129 /// PDPS implementation of [`l1squared_unconstrained`].
115 /// For detailed documentation of the inputs and outputs, refer to there. 130 /// For detailed documentation of the inputs and outputs, refer to there.
116 /// 131 ///
117 /// The `λ` component of the model is handled in the proximal step instead of the gradient step 132 /// The `λ` component of the model is handled in the proximal step instead of the gradient step
118 /// for potential performance improvements. 133 /// for potential performance improvements.
119 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] 134 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
120 pub fn l1squared_unconstrained_pdps<F, I>( 135 pub fn l1squared_unconstrained_pdps<F, I>(
121 y : &DVector<F::MixedType>, 136 y: &DVector<F::MixedType>,
122 g : &DVector<F::MixedType>, 137 g: &DVector<F::MixedType>,
123 λ_ : F, 138 λ_: F,
124 β_ : F, 139 β_: F,
125 x : &mut DVector<F::MixedType>, 140 x: &mut DVector<F::MixedType>,
126 τ_ : F, 141 τ_: F,
127 σ_ : F, 142 σ_: F,
128 iterator : I 143 iterator: I,
129 ) -> usize 144 ) -> usize
130 where F : Float + ToNalgebraRealField, 145 where
131 I : AlgIteratorFactory<F> 146 F: Float + ToNalgebraRealField,
147 I: AlgIteratorFactory<F>,
132 { 148 {
133 let λ = λ_.to_nalgebra_mixed(); 149 let λ = λ_.to_nalgebra_mixed();
134 let β = β_.to_nalgebra_mixed(); 150 let β = β_.to_nalgebra_mixed();
135 let τ = τ_.to_nalgebra_mixed(); 151 let τ = τ_.to_nalgebra_mixed();
136 let σ = σ_.to_nalgebra_mixed(); 152 let σ = σ_.to_nalgebra_mixed();
141 157
142 iterator.iterate(|state| { 158 iterator.iterate(|state| {
143 // Primal step: x^{k+1} = prox_{τ|.-y|_1^2}(x^k - τ (w^k - g)) 159 // Primal step: x^{k+1} = prox_{τ|.-y|_1^2}(x^k - τ (w^k - g))
144 x.axpy(-τ, &w, 1.0); 160 x.axpy(-τ, &w, 1.0);
145 x.axpy(τ, g, 1.0); 161 x.axpy(τ, g, 1.0);
146 l1squared_prox(&mut tmp, x, y, τ*β); 162 l1squared_prox(&mut tmp, x, y, τ * β);
147 163
148 // Dual step: w^{k+1} = proj_{[-λ,λ]}(w^k + σ(2x^{k+1}-x^k)) 164 // Dual step: w^{k+1} = proj_{[-λ,λ]}(w^k + σ(2x^{k+1}-x^k))
149 w.axpy(2.0*σ, x, 1.0); 165 w.axpy(2.0 * σ, x, 1.0);
150 w.axpy(-σ, &xprev, 1.0); 166 w.axpy(-σ, &xprev, 1.0);
151 w.apply(|w_i| *w_i = num_traits::clamp(*w_i, -λ, λ)); 167 w.apply(|w_i| *w_i = num_traits::clamp(*w_i, -λ, λ));
152 xprev.copy_from(x); 168 xprev.copy_from(x);
153 169
154 iters +=1; 170 iters += 1;
155 171
156 state.if_verbose(|| { 172 state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)))
157 F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))
158 })
159 }); 173 });
160 174
161 iters 175 iters
162 } 176 }
163 177
178 /// - \left(x ↦ \frac{β}{2θ} |x-y|_1^2 \right)^*(w). 192 /// - \left(x ↦ \frac{β}{2θ} |x-y|_1^2 \right)^*(w).
179 /// \end{split} 193 /// \end{split}
180 /// $$</div> 194 /// $$</div>
181 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] 195 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
182 pub fn l1squared_unconstrained_pdps_alt<F, I>( 196 pub fn l1squared_unconstrained_pdps_alt<F, I>(
183 y : &DVector<F::MixedType>, 197 y: &DVector<F::MixedType>,
184 g : &DVector<F::MixedType>, 198 g: &DVector<F::MixedType>,
185 λ_ : F, 199 λ_: F,
186 β_ : F, 200 β_: F,
187 x : &mut DVector<F::MixedType>, 201 x: &mut DVector<F::MixedType>,
188 τ_ : F, 202 τ_: F,
189 σ_ : F, 203 σ_: F,
190 θ_ : F, 204 θ_: F,
191 iterator : I 205 iterator: I,
192 ) -> usize 206 ) -> usize
193 where F : Float + ToNalgebraRealField, 207 where
194 I : AlgIteratorFactory<F> 208 F: Float + ToNalgebraRealField,
209 I: AlgIteratorFactory<F>,
195 { 210 {
196 let λ = λ_.to_nalgebra_mixed(); 211 let λ = λ_.to_nalgebra_mixed();
197 let τ = τ_.to_nalgebra_mixed(); 212 let τ = τ_.to_nalgebra_mixed();
198 let σ = σ_.to_nalgebra_mixed(); 213 let σ = σ_.to_nalgebra_mixed();
199 let θ = θ_.to_nalgebra_mixed(); 214 let θ = θ_.to_nalgebra_mixed();
200 let β = β_.to_nalgebra_mixed(); 215 let β = β_.to_nalgebra_mixed();
201 let σθ = σ*θ; 216 let σθ = σ * θ;
202 let τθ = τ*θ; 217 let τθ = τ * θ;
203 let mut w = DVector::zeros(x.len()); 218 let mut w = DVector::zeros(x.len());
204 let mut tmp = DVector::zeros(x.len()); 219 let mut tmp = DVector::zeros(x.len());
205 let mut xprev = x.clone(); 220 let mut xprev = x.clone();
206 let mut iters = 0; 221 let mut iters = 0;
207 222
208 iterator.iterate(|state| { 223 iterator.iterate(|state| {
209 // Primal step: x^{k+1} = soft_τλ(x^k - τ(θ w^k -g)) 224 // Primal step: x^{k+1} = soft_τλ(x^k - τ(θ w^k -g))
210 x.axpy(-τθ, &w, 1.0); 225 x.axpy(-τθ, &w, 1.0);
211 x.axpy(τ, g, 1.0); 226 x.axpy(τ, g, 1.0);
212 x.apply(|x_i| *x_i = soft_thresholding(*x_i, τ*λ)); 227 x.apply(|x_i| *x_i = soft_thresholding(*x_i, τ * λ));
213 228
214 // Dual step: with g(x) = (β/(2θ))‖x-y‖₁² and q = w^k + σ(2x^{k+1}-x^k), 229 // Dual step: with g(x) = (β/(2θ))‖x-y‖₁² and q = w^k + σ(2x^{k+1}-x^k),
215 // we compute w^{k+1} = prox_{σg^*}(q) for 230 // we compute w^{k+1} = prox_{σg^*}(q) for
216 // = q - σ prox_{g/σ}(q/σ) 231 // = q - σ prox_{g/σ}(q/σ)
217 // = q - σ prox_{(β/(2θσ))‖.-y‖₁²}(q/σ) 232 // = q - σ prox_{(β/(2θσ))‖.-y‖₁²}(q/σ)
218 // = σ(q/σ - prox_{(β/(2θσ))‖.-y‖₁²}(q/σ)) 233 // = σ(q/σ - prox_{(β/(2θσ))‖.-y‖₁²}(q/σ))
219 // where q/σ = w^k/σ + (2x^{k+1}-x^k), 234 // where q/σ = w^k/σ + (2x^{k+1}-x^k),
220 w /= σ; 235 w /= σ;
221 w.axpy(2.0, x, 1.0); 236 w.axpy(2.0, x, 1.0);
222 w.axpy(-1.0, &xprev, 1.0); 237 w.axpy(-1.0, &xprev, 1.0);
223 xprev.copy_from(&w); // use xprev as temporary variable 238 xprev.copy_from(&w); // use xprev as temporary variable
224 l1squared_prox(&mut tmp, &mut xprev, y, β/σθ); 239 l1squared_prox(&mut tmp, &mut xprev, y, β / σθ);
225 w -= &xprev; 240 w -= &xprev;
226 w *= σ; 241 w *= σ;
227 xprev.copy_from(x); 242 xprev.copy_from(x);
228 243
229 iters += 1; 244 iters += 1;
230 245
231 state.if_verbose(|| { 246 state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)))
232 F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))
233 })
234 }); 247 });
235 248
236 iters 249 iters
237 } 250 }
238
239 251
240 /// This function applies an iterative method for the solution of the problem 252 /// This function applies an iterative method for the solution of the problem
241 /// <div>$$ 253 /// <div>$$
242 /// \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁. 254 /// \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁.
243 /// $$</div> 255 /// $$</div>
244 /// Only PDPS is supported. 256 /// Only PDPS is supported.
245 /// 257 ///
246 /// This function returns the number of iterations taken. 258 /// This function returns the number of iterations taken.
247 #[replace_float_literals(F::cast_from(literal))] 259 #[replace_float_literals(F::cast_from(literal))]
248 pub fn l1squared_unconstrained<F, I>( 260 pub fn l1squared_unconstrained<F, I>(
249 y : &DVector<F::MixedType>, 261 y: &DVector<F::MixedType>,
250 g : &DVector<F::MixedType>, 262 g: &DVector<F::MixedType>,
251 λ : F, 263 λ: F,
252 β : F, 264 β: F,
253 x : &mut DVector<F::MixedType>, 265 x: &mut DVector<F::MixedType>,
254 inner : &InnerSettings<F>, 266 inner: &InnerSettings<F>,
255 iterator : I 267 iterator: I,
256 ) -> usize 268 ) -> usize
257 where F : Float + ToNalgebraRealField, 269 where
258 I : AlgIteratorFactory<F> 270 F: Float + ToNalgebraRealField,
271 I: AlgIteratorFactory<F>,
259 { 272 {
260 // Estimate of ‖K‖ for K=θ Id. 273 // Estimate of ‖K‖ for K=θ Id.
261 let inner_θ = 1.0; 274 let inner_θ = 1.0;
262 let normest = inner_θ; 275 let normest = inner_θ;
263 276
264 let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest); 277 let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest);
265 278
266 match inner.method { 279 match inner.method {
267 InnerMethod::PDPS => 280 InnerMethod::PDPS => {
268 l1squared_unconstrained_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator), 281 l1squared_unconstrained_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator)
282 }
269 other => unimplemented!("${other:?} is unimplemented"), 283 other => unimplemented!("${other:?} is unimplemented"),
270 } 284 }
271 } 285 }

mercurial