src/subproblem/l1squared_nonneg.rs

branch
dev
changeset 63
7a8a55fd41c0
parent 42
6a7365d73e4c
child 64
d3be4f7ffd60
equal deleted inserted replaced
61:4f468d35fa29 63:7a8a55fd41c0
1 /*! 1 /*!
2 Iterative algorithms for solving the finite-dimensional subproblem with constraint. 2 Iterative algorithms for solving the finite-dimensional subproblem with constraint.
3 */ 3 */
4 4
5 use itertools::izip;
5 use nalgebra::DVector; 6 use nalgebra::DVector;
6 use numeric_literals::replace_float_literals; 7 use numeric_literals::replace_float_literals;
7 use itertools::izip;
8 //use std::iter::zip; 8 //use std::iter::zip;
9 use std::cmp::Ordering::*; 9 use std::cmp::Ordering::*;
10 10
11 use alg_tools::iterate::{ 11 use alg_tools::iterate::{AlgIteratorFactory, AlgIteratorState};
12 AlgIteratorFactory,
13 AlgIteratorState,
14 };
15 use alg_tools::nalgebra_support::ToNalgebraRealField; 12 use alg_tools::nalgebra_support::ToNalgebraRealField;
16 use alg_tools::norms::{Dist, L1}; 13 use alg_tools::norms::{Dist, L1};
17 use alg_tools::nanleast::NaNLeast; 14
18 15 use super::l1squared_unconstrained::l1squared_prox;
16 use super::nonneg::nonneg_soft_thresholding;
17 use super::{InnerMethod, InnerSettings};
19 use crate::types::*; 18 use crate::types::*;
20 use super::{
21 InnerMethod,
22 InnerSettings
23 };
24 use super::nonneg::nonneg_soft_thresholding;
25 use super::l1squared_unconstrained::l1squared_prox;
26 19
27 /// Return maximum of `dist` and distnce of inteval `[lb, ub]` to zero. 20 /// Return maximum of `dist` and distnce of inteval `[lb, ub]` to zero.
28 #[replace_float_literals(F::cast_from(literal))] 21 #[replace_float_literals(F::cast_from(literal))]
29 pub(super) fn max_interval_dist_to_zero<F : Float>(dist : F, lb : F, ub : F) -> F { 22 pub(super) fn max_interval_dist_to_zero<F: Float>(dist: F, lb: F, ub: F) -> F {
30 if lb < 0.0 { 23 if lb < 0.0 {
31 if ub > 0.0 { 24 if ub > 0.0 {
32 dist 25 dist
33 } else { 26 } else {
34 dist.max(-ub) 27 dist.max(-ub)
35 } 28 }
36 } else /* ub ≥ 0.0*/ { 29 } else
30 /* ub ≥ 0.0*/
31 {
37 dist.max(lb) 32 dist.max(lb)
38 } 33 }
39 } 34 }
40 35
41 /// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁ +δ_{≥}(x)$ at $x$. 36 /// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁ +δ_{≥0}(x)$ at $x$.
42 /// 37 ///
43 /// `v` will be modified and cannot be trusted to contain useful values afterwards. 38 /// `v` will be modified and cannot be trusted to contain useful values afterwards.
44 #[replace_float_literals(F::cast_from(literal))] 39 #[replace_float_literals(F::cast_from(literal))]
45 fn min_subdifferential<F : Float + nalgebra::RealField>( 40 fn min_subdifferential<F: Float + nalgebra::RealField>(
46 y : &DVector<F>, 41 y: &DVector<F>,
47 x : &DVector<F>, 42 x: &DVector<F>,
48 g : &DVector<F>, 43 g: &DVector<F>,
49 λ : F, 44 λ: F,
50 β : F 45 β: F,
51 ) -> F { 46 ) -> F {
52 let mut val = 0.0; 47 let mut val = 0.0;
53 let tmp = β*y.dist(x, L1); 48 let tmp = β * y.dist(x, L1);
54 for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) { 49 for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) {
55 let (mut lb, mut ub) = (-g_i, -g_i); 50 let (mut lb, mut ub) = (-g_i, -g_i);
56 match x_i.partial_cmp(y_i) { 51 match x_i.partial_cmp(y_i) {
57 Some(Greater) => { lb += tmp; ub += tmp }, 52 Some(Greater) => {
58 Some(Less) => { lb -= tmp; ub -= tmp }, 53 lb += tmp;
59 Some(Equal) => { lb -= tmp; ub += tmp }, 54 ub += tmp
60 None => {}, 55 }
56 Some(Less) => {
57 lb -= tmp;
58 ub -= tmp
59 }
60 Some(Equal) => {
61 lb -= tmp;
62 ub += tmp
63 }
64 None => {}
61 } 65 }
62 match x_i.partial_cmp(&0.0) { 66 match x_i.partial_cmp(&0.0) {
63 Some(Greater) => { lb += λ; ub += λ }, 67 Some(Greater) => {
68 lb += λ;
69 ub += λ
70 }
64 // Less should not happen 71 // Less should not happen
65 Some(Less|Equal) => { lb = F::NEG_INFINITY; ub += λ }, 72 Some(Less | Equal) => {
66 None => {}, 73 lb = F::NEG_INFINITY;
74 ub += λ
75 }
76 None => {}
67 }; 77 };
68 val = max_interval_dist_to_zero(val, lb, ub); 78 val = max_interval_dist_to_zero(val, lb, ub);
69 } 79 }
70 val 80 val
71 } 81 }
72 82
83 // #[replace_float_literals(F::cast_from(literal))]
84 // fn lbd_soft_thresholding<F: Float>(v: F, λ: F, b: F) -> F {
85 // match (b >= 0.0, v >= b) {
86 // (true, false) => b,
87 // (true, true) => b.max(v - λ), // soft-to-b from above
88 // (false, true) => super::unconstrained::soft_thresholding(v, λ),
89 // (false, false) => 0.0.min(b.max(v + λ)), // soft-to-0 with lower bound
90 // }
91 // }
92
93 /// Calculates $\prox\_f(x)$ for $f(x)=λ\abs{x-y} + δ\_{≥0}(x)$.
94 /// This is the first output. The second output is the first output $-y$, i..e, the prox
95 /// of $f\_0$ for the shift function $f\_0(z) = f(z+y) = λ\abs{z} + δ\_{≥-y}(z)$
96 /// satisfying
97 /// $$
98 /// \prox_f(x) = \prox\_{f\_0}(x - y) + y,
99 /// $$
100 /// which is also used internally.
101 /// The third output indicates whether the output is “locked” to either $0$ (resp. $-y$ in
102 /// the reformulation), or $y$ ($0$ in the reformulation).
103 /// The fourth output indicates the next highest $λ$ where such locking would happen.
73 #[replace_float_literals(F::cast_from(literal))] 104 #[replace_float_literals(F::cast_from(literal))]
74 fn lbd_soft_thresholding<F : Float>(v : F, λ : F, b : F) -> F 105 #[inline]
75 { 106 fn shifted_nonneg_soft_thresholding<F: Float>(x: F, y: F, λ: F) -> (F, F, bool, Option<F>) {
76 match (b >= 0.0, v >= b) { 107 let z = x - y; // The shift to f_0
77 (true, false) => b, 108 if -y >= 0.0 {
78 (true, true) => b.max(v - λ), // soft-to-b from above 109 if x > λ {
79 (false, true) => super::unconstrained::soft_thresholding(v, λ), 110 (x - λ, z - λ, false, Some(x))
80 (false, false) => 0.0.min(b.max(v - λ)), // soft-to-0 with lower bound 111 } else {
112 (0.0, -y, true, None)
113 }
114 } else if z < 0.0 {
115 if λ < -x {
116 (0.0, -y, true, Some(-x))
117 } else if z + λ < 0.0 {
118 (x + λ, z + λ, false, Some(-z))
119 } else {
120 (y, 0.0, true, None)
121 }
122 } else if z > λ {
123 (x - λ, z - λ, false, Some(z))
124 } else {
125 (y, 0.0, true, None)
81 } 126 }
82 } 127 }
83 128
84 /// Calculate $prox_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}_1^2 + δ_{≥0}(x)$. 129 /// Calculate $\prox\_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}\_1\^2 + δ\_{≥0}(x)$.
85 /// 130 ///
86 /// To derive an algorithm for this, we can use 131 /// To derive an algorithm for this, we can use
87 /// $prox_f(x) = prox_{f_0}(x - y) - y$ for 132 /// $$
88 /// $f_0(z)=\frac{β}{2}\norm{z}_1^2 + δ_{≥-y}(z)$. 133 /// \prox_f(x) = \prox\_{f\_0}(x - y) + y
89 /// Now, the optimality conditions for $w = prox_{f_0}(x)$ are 134 /// \quad\text{for}\quad
135 /// f\_0(z)=\frac{β}{2}\norm{z}\_1\^2 + δ\_{≥-y}(z).
136 /// $$
137 /// Now, the optimality conditions for $w = \prox\_{f\_0}(x)$ are
90 /// $$\tag{*} 138 /// $$\tag{*}
91 /// x ∈ w + β\norm{w}_1\sign w + N_{≥ -y}(w). 139 /// x ∈ w + β\norm{w}\_1\sign w + N\_{≥ -y}(w).
92 /// $$ 140 /// $$
93 /// If we know $\norm{w}_1$, then this is easily solved by lower-bounded soft-thresholding. 141 /// If we know $\norm{w}\_1$, then this is easily solved by lower-bounded soft-thresholding.
94 /// We find this by sorting the elements by the distance to the 'locked' lower-bounded 142 /// We find this by sorting the elements by the distance to the 'locked' lower-bounded
95 /// soft-thresholding target ($0$ or $-y_i$). 143 /// soft-thresholding target ($0$ or $-y_i$).
96 /// Then we loop over this sorted vector, increasing our estimate of $\norm{w}_1$ as we decide 144 /// Then we loop over this sorted vector, increasing our estimate of $\norm{w}\_1$ as we decide
97 /// that the soft-thresholding parameter `β\norm{w}_1` has to be such that the passed elements 145 /// that the soft-thresholding parameter $β\norm{w}\_1$ has to be such that the passed elements
98 /// will reach their locked value (after which they cannot change anymore, for a larger 146 /// will reach their locked value (after which they cannot change anymore, for a larger
99 /// soft-thresholding parameter. This has to be slightly more fine-grained for account 147 /// soft-thresholding parameter. This has to be slightly more fine-grained for account
100 /// for the case that $-y_i<0$ and $x_i < -y_i$. 148 /// for the case that $-y\_i<0$ and $x\_i < -y\_i$.
101 /// 149 ///
102 /// Indeed, we denote by x' and w' the subset of elements such that w_i ≠ 0 and w_i > -y_i, 150 /// Indeed, denoting by $x'$ and $w'$ the subset of elements such that $w\_i ≠ 0$ and
103 /// we can calculate by applying $⟨\cdot, \sign w'⟩$ to the corresponding lines of (*) that 151 /// $w\_i > -y\_i$, we can calculate by applying $⟨\cdot, \sign w'⟩$ to the corresponding
104 /// $$ 152 /// lines of (*) that
105 /// \norm{x'} = \norm{w'} + β \norm{w}_1 m. 153 /// $$
106 /// $$ 154 /// \norm{x'} = \norm{w'} + β \norm{w}\_1 m,
107 /// Having a value for $t = \norm{w}-\norm{w'}$, we can then calculate 155 /// $$
108 /// $$ 156 /// where $m$ is the number of unlocked components.
109 /// \norm{x'} - t = (1+β m)\norm{w}_1, 157 /// We can now calculate the mass $t = \norm{w}-\norm{w'}$ of locked components, and so obtain
110 /// $$ 158 /// $$
111 /// from where we can calculate the soft-thresholding parameter $λ=β\norm{w}_1$. 159 /// \norm{x'} + t = (1+β m)\norm{w}\_1,
160 /// $$
161 /// from where we can calculate the soft-thresholding parameter $λ=β\norm{w}\_1$.
112 /// Since we do not actually know the unlocked elements, but just loop over all the possibilities 162 /// Since we do not actually know the unlocked elements, but just loop over all the possibilities
113 /// for them, we have to check that $λ$ is above the current lower bound for this parameter 163 /// for them, we have to check that $λ$ is above the current lower bound for this parameter
114 /// (`shift` in the code), and below a value that would cause changes in the locked set 164 /// (`shift` in the code), and below a value that would cause changes in the locked set
115 /// (`max_shift` in the code). 165 /// (`max_shift` in the code).
116 #[replace_float_literals(F::cast_from(literal))] 166 #[replace_float_literals(F::cast_from(literal))]
117 pub(super) fn l1squared_nonneg_prox<F :Float + nalgebra::RealField>( 167 pub fn l1squared_nonneg_prox<F: Float + nalgebra::RealField>(
118 sorted : &mut Vec<(F, F, F, Option<(F, F)>)>, 168 x: &mut DVector<F>,
119 x : &mut DVector<F>, 169 y: &DVector<F>,
120 y : &DVector<F>, 170 β: F,
121 β : F
122 ) { 171 ) {
123 // nalgebra double-definition bullshit workaround 172 // nalgebra double-definition bullshit workaround
124 //let max = alg_tools::NumTraitsFloat::max;
125 let abs = alg_tools::NumTraitsFloat::abs; 173 let abs = alg_tools::NumTraitsFloat::abs;
126 174 let min = alg_tools::NumTraitsFloat::min;
127 *x -= y; 175
128 176 let mut λ = 0.0;
129 for (az_x_i, &x_i, &y_i) in izip!(sorted.iter_mut(), x.iter(), y.iter()) { 177 loop {
130 // The first component of each az_x_i contains the distance of x_i to the 178 let mut w_locked = 0.0;
131 // soft-thresholding limit. If it is negative, it is always reached. 179 let mut n_unlocked = 0; // m
132 // The second component contains the absolute value of the result for that component 180 let mut x_prime = 0.0;
133 // w_i of the solution, if the soft-thresholding limit is reached. 181 let mut max_shift = F::INFINITY;
134 // This is stored here due to the sorting, although could otherwise be computed directly. 182 for (&x_i, &y_i) in izip!(x.iter(), y.iter()) {
135 // Likewise the third component contains the absolute value of x_i. 183 let (_, a_shifted, locked, next_lock) = shifted_nonneg_soft_thresholding(x_i, y_i, λ);
136 // The fourth component contains an initial lower bound. 184
137 let a_i = abs(x_i); 185 if let Some(t) = next_lock {
138 let b = -y_i; 186 max_shift = min(max_shift, t);
139 *az_x_i = match (b >= 0.0, x_i >= b) { 187 assert!(max_shift > λ);
140 (true, false) => (x_i-b, b, a_i, None), // w_i=b, so sorting element negative! 188 }
141 (true, true) => (x_i-b, b, a_i, None), // soft-to-b from above 189 if locked {
142 (false, true) => (a_i, 0.0, a_i, None), // soft-to-0 190 w_locked += abs(a_shifted);
143 (false, false) => (a_i, 0.0, a_i, Some((b, b-x_i))), // soft-to-0 with initial limit 191 } else {
144 }; 192 n_unlocked += 1;
193 x_prime += abs(x_i - y_i);
194 }
195 }
196
197 // We need ‖x'‖ = ‖w'‖ + β m ‖w‖, i.e. ‖x'‖ + (‖w‖-‖w'‖)= (1 + β m)‖w‖.
198 let λ_new = (x_prime + w_locked) / (1.0 / β + F::cast_from(n_unlocked));
199
200 if λ_new > max_shift {
201 λ = max_shift;
202 } else {
203 assert!(λ_new >= λ);
204 // success
205 x.zip_apply(y, |x_i, y_i| {
206 let (a, _, _, _) = shifted_nonneg_soft_thresholding(*x_i, y_i, λ_new);
207 //*x_i = y_i + lbd_soft_thresholding(*x_i, λ_new, -y_i)
208 *x_i = a;
209 });
210 return;
211 }
145 } 212 }
146 sorted.as_mut_slice()
147 .sort_unstable_by(|(a, _, _, _), (b, _, _, _)| NaNLeast(*a).cmp(&NaNLeast(*b)));
148
149 let mut nwlow = 0.0;
150 let mut shift = 0.0;
151 // This main loop is over different combinations of elements of the solution locked
152 // to the soft-thresholding lower bound (`0` or `-y_i`), in the sorted order of locking.
153 for (i, az_x_i) in izip!(0.., sorted.iter()) {
154 // This 'attempt is over different combinations of elements locked to the
155 // lower bound (`-y_i ≤ 0`). It calculates `max_shift` as the maximum shift that
156 // can be done until the locking would change (or become non-strictly-complementary).
157 // If the main rule (*) gives an estimate of `λ` that stays below `max_shift`, it is
158 // accepted. Otherwise `shift` is updated to `max_shift`, and we attempt again,
159 // with the non-locking set that participates in the calculation of `λ` then including
160 // the elements that are no longer locked to the lower bound.
161 'attempt: loop {
162 let mut nwthis = 0.0; // contribution to ‖w‖ from elements with locking
163 //soft-thresholding parameter = `shift`
164 let mut nxmore = 0.0; // ‖x'‖ for those elements through to not be locked to
165 // neither the soft-thresholding limit, nor the lower bound
166 let mut nwlbd = 0.0; // contribution to ‖w‖ from those elements locked to their
167 // lower bound
168 let mut m = 0;
169 let mut max_shift = F::INFINITY; // maximal shift for which our estimate of the set of
170 // unlocked elements is valid.
171 let mut max_shift_from_lbd = false; // Whether max_shift comes from the next element
172 // or from a lower bound being reached.
173 for az_x_j in sorted[i as usize..].iter() {
174 if az_x_j.0 <= shift {
175 nwthis += az_x_j.1;
176 } else {
177 match az_x_j.3 {
178 Some((l, s)) if shift < s => {
179 if max_shift > s {
180 max_shift_from_lbd = true;
181 max_shift = s;
182 }
183 nwlbd += -l;
184 },
185 _ => {
186 nxmore += az_x_j.2;
187 if m == 0 && max_shift > az_x_j.0 {
188 max_shift = az_x_j.0;
189 max_shift_from_lbd = false;
190 }
191 m += 1;
192 }
193 }
194 }
195 }
196
197 // We need ‖x'‖ = ‖w'‖ + β m ‖w‖, i.e. ‖x'‖ - (‖w‖-‖w'‖)= (1 + β m)‖w‖.
198 let tmp = β*(nxmore - (nwlow + nwthis + nwlbd))/(1.0 + β*F::cast_from(m));
199 if tmp > max_shift {
200 if max_shift_from_lbd {
201 shift = max_shift;
202 continue 'attempt;
203 } else {
204 break 'attempt
205 }
206 } else if tmp < shift {
207 // TODO: this should probably be an assert!(false)
208 break 'attempt;
209 } else {
210 // success
211 x.zip_apply(y, |x_i, y_i| *x_i = y_i + lbd_soft_thresholding(*x_i, tmp, -y_i));
212 return
213 }
214 }
215 shift = az_x_i.0;
216 nwlow += az_x_i.1;
217 }
218 // TODO: this fallback has not been verified to be correct
219 x.zip_apply(y, |x_i, y_i| *x_i = y_i + lbd_soft_thresholding(*x_i, shift, -y_i));
220 } 213 }
221 214
222 /// Proximal point method implementation of [`l1squared_nonneg`]. 215 /// Proximal point method implementation of [`l1squared_nonneg`].
223 /// For detailed documentation of the inputs and outputs, refer to there. 216 /// For detailed documentation of the inputs and outputs, refer to there.
224 /// 217 ///
225 /// The `λ` component of the model is handled in the proximal step instead of the gradient step 218 /// The `λ` component of the model is handled in the proximal step instead of the gradient step
226 /// for potential performance improvements. 219 /// for potential performance improvements.
227 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] 220 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
228 pub fn l1squared_nonneg_pp<F, I>( 221 pub fn l1squared_nonneg_pp<F, I>(
229 y : &DVector<F::MixedType>, 222 y: &DVector<F::MixedType>,
230 g : &DVector<F::MixedType>, 223 g: &DVector<F::MixedType>,
231 λ_ : F, 224 λ_: F,
232 β_ : F, 225 β_: F,
233 x : &mut DVector<F::MixedType>, 226 x: &mut DVector<F::MixedType>,
234 τ_ : F, 227 τ_: F,
235 θ_ : F, 228 θ_: F,
236 iterator : I 229 iterator: I,
237 ) -> usize 230 ) -> usize
238 where F : Float + ToNalgebraRealField, 231 where
239 I : AlgIteratorFactory<F> 232 F: Float + ToNalgebraRealField,
233 I: AlgIteratorFactory<F>,
240 { 234 {
241 let λ = λ_.to_nalgebra_mixed(); 235 let λ = λ_.to_nalgebra_mixed();
242 let β = β_.to_nalgebra_mixed(); 236 let β = β_.to_nalgebra_mixed();
243 let mut τ = τ_.to_nalgebra_mixed(); 237 let mut τ = τ_.to_nalgebra_mixed();
244 let θ = θ_.to_nalgebra_mixed(); 238 let θ = θ_.to_nalgebra_mixed();
245 let mut tmp = std::iter::repeat((0.0, 0.0, 0.0, None)).take(x.len()).collect();
246 let mut iters = 0; 239 let mut iters = 0;
247 240
248 iterator.iterate(|state| { 241 iterator.iterate(|state| {
249 // Primal step: x^{k+1} = prox_{(τβ/2)|.-y|_1^2+δ_{≥0}+}(x^k - τ(λ𝟙^⊤-g)) 242 // Primal step: x^{k+1} = prox_{(τβ/2)|.-y|_1^2+δ_{≥0}+}(x^k - τ(λ𝟙^⊤-g))
250 x.apply(|x_i| *x_i -= τ*λ); 243 x.apply(|x_i| *x_i -= τ * λ);
251 x.axpy(τ, g, 1.0); 244 x.axpy(τ, g, 1.0);
252 l1squared_nonneg_prox(&mut tmp, x, y, τ*β); 245 l1squared_nonneg_prox(x, y, τ * β);
253 246
254 iters += 1; 247 iters += 1;
255 // This gives O(1/N^2) rates due to monotonicity of function values. 248 // This gives O(1/N^2) rates due to monotonicity of function values.
256 // Higher acceleration does not seem to be numerically stable. 249 // Higher acceleration does not seem to be numerically stable.
257 τ += θ; 250 τ += θ;
258 251
259 // This gives O(1/N^3) rates due to monotonicity of function values. 252 // This gives O(1/N^3) rates due to monotonicity of function values.
260 // Higher acceleration does not seem to be numerically stable. 253 // Higher acceleration does not seem to be numerically stable.
261 //τ + = F::cast_from(iters).to_nalgebra_mixed()*θ; 254 //τ + = F::cast_from(iters).to_nalgebra_mixed()*θ;
262 255
263 state.if_verbose(|| { 256 state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)))
264 F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))
265 })
266 }); 257 });
267 258
268 iters 259 iters
269 } 260 }
270 261
274 /// The `λ` component of the model is handled in the proximal step instead of the gradient step 265 /// The `λ` component of the model is handled in the proximal step instead of the gradient step
275 /// for potential performance improvements. 266 /// for potential performance improvements.
276 /// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model. 267 /// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model.
277 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] 268 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
278 pub fn l1squared_nonneg_pdps<F, I>( 269 pub fn l1squared_nonneg_pdps<F, I>(
279 y : &DVector<F::MixedType>, 270 y: &DVector<F::MixedType>,
280 g : &DVector<F::MixedType>, 271 g: &DVector<F::MixedType>,
281 λ_ : F, 272 λ_: F,
282 β_ : F, 273 β_: F,
283 x : &mut DVector<F::MixedType>, 274 x: &mut DVector<F::MixedType>,
284 τ_ : F, 275 τ_: F,
285 σ_ : F, 276 σ_: F,
286 θ_ : F, 277 θ_: F,
287 iterator : I 278 iterator: I,
288 ) -> usize 279 ) -> usize
289 where F : Float + ToNalgebraRealField, 280 where
290 I : AlgIteratorFactory<F> 281 F: Float + ToNalgebraRealField,
282 I: AlgIteratorFactory<F>,
291 { 283 {
292 let λ = λ_.to_nalgebra_mixed(); 284 let λ = λ_.to_nalgebra_mixed();
293 let β = β_.to_nalgebra_mixed(); 285 let β = β_.to_nalgebra_mixed();
294 let τ = τ_.to_nalgebra_mixed(); 286 let τ = τ_.to_nalgebra_mixed();
295 let σ = σ_.to_nalgebra_mixed(); 287 let σ = σ_.to_nalgebra_mixed();
299 let mut xprev = x.clone(); 291 let mut xprev = x.clone();
300 let mut iters = 0; 292 let mut iters = 0;
301 293
302 iterator.iterate(|state| { 294 iterator.iterate(|state| {
303 // Primal step: x^{k+1} = prox_{(τβ/2)|.-y|_1^2}(x^k - τ (w^k - g)) 295 // Primal step: x^{k+1} = prox_{(τβ/2)|.-y|_1^2}(x^k - τ (w^k - g))
304 x.axpy(-τ*θ, &w, 1.0); 296 x.axpy(-τ * θ, &w, 1.0);
305 x.axpy(τ, g, 1.0); 297 x.axpy(τ, g, 1.0);
306 l1squared_prox(&mut tmp, x, y, τ*β); 298 l1squared_prox(&mut tmp, x, y, τ * β);
307 299
308 // Dual step: w^{k+1} = proj_{[-∞,λ]}(w^k + σ(2x^{k+1}-x^k)) 300 // Dual step: w^{k+1} = proj_{[-∞,λ]}(w^k + σ(2x^{k+1}-x^k))
309 w.axpy(2.0*σ*θ, x, 1.0); 301 w.axpy(2.0 * σ * θ, x, 1.0);
310 w.axpy(-σ*θ, &xprev, 1.0); 302 w.axpy(-σ * θ, &xprev, 1.0);
311 w.apply(|w_i| *w_i = w_i.min(λ)); 303 w.apply(|w_i| *w_i = w_i.min(λ));
312 xprev.copy_from(x); 304 xprev.copy_from(x);
313 305
314 iters +=1; 306 iters += 1;
315 307
316 state.if_verbose(|| { 308 state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)))
317 F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))
318 })
319 }); 309 });
320 310
321 iters 311 iters
322 } 312 }
323 313
338 /// - \left(x ↦ \frac{β}{2θ} |x-y|_1^2 \right)^*(w). 328 /// - \left(x ↦ \frac{β}{2θ} |x-y|_1^2 \right)^*(w).
339 /// \end{split} 329 /// \end{split}
340 /// $$</div> 330 /// $$</div>
341 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] 331 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
342 pub fn l1squared_nonneg_pdps_alt<F, I>( 332 pub fn l1squared_nonneg_pdps_alt<F, I>(
343 y : &DVector<F::MixedType>, 333 y: &DVector<F::MixedType>,
344 g : &DVector<F::MixedType>, 334 g: &DVector<F::MixedType>,
345 λ_ : F, 335 λ_: F,
346 β_ : F, 336 β_: F,
347 x : &mut DVector<F::MixedType>, 337 x: &mut DVector<F::MixedType>,
348 τ_ : F, 338 τ_: F,
349 σ_ : F, 339 σ_: F,
350 θ_ : F, 340 θ_: F,
351 iterator : I 341 iterator: I,
352 ) -> usize 342 ) -> usize
353 where F : Float + ToNalgebraRealField, 343 where
354 I : AlgIteratorFactory<F> 344 F: Float + ToNalgebraRealField,
345 I: AlgIteratorFactory<F>,
355 { 346 {
356 let λ = λ_.to_nalgebra_mixed(); 347 let λ = λ_.to_nalgebra_mixed();
357 let τ = τ_.to_nalgebra_mixed(); 348 let τ = τ_.to_nalgebra_mixed();
358 let σ = σ_.to_nalgebra_mixed(); 349 let σ = σ_.to_nalgebra_mixed();
359 let θ = θ_.to_nalgebra_mixed(); 350 let θ = θ_.to_nalgebra_mixed();
360 let β = β_.to_nalgebra_mixed(); 351 let β = β_.to_nalgebra_mixed();
361 let σθ = σ*θ; 352 let σθ = σ * θ;
362 let τθ = τ*θ; 353 let τθ = τ * θ;
363 let mut w = DVector::zeros(x.len()); 354 let mut w = DVector::zeros(x.len());
364 let mut tmp = DVector::zeros(x.len()); 355 let mut tmp = DVector::zeros(x.len());
365 let mut xprev = x.clone(); 356 let mut xprev = x.clone();
366 let mut iters = 0; 357 let mut iters = 0;
367 358
368 iterator.iterate(|state| { 359 iterator.iterate(|state| {
369 // Primal step: x^{k+1} = nonnegsoft_τλ(x^k - τ(θ w^k -g)) 360 // Primal step: x^{k+1} = nonnegsoft_τλ(x^k - τ(θ w^k -g))
370 x.axpy(-τθ, &w, 1.0); 361 x.axpy(-τθ, &w, 1.0);
371 x.axpy(τ, g, 1.0); 362 x.axpy(τ, g, 1.0);
372 x.apply(|x_i| *x_i = nonneg_soft_thresholding(*x_i, τ*λ)); 363 x.apply(|x_i| *x_i = nonneg_soft_thresholding(*x_i, τ * λ));
373 364
374 // Dual step: with g(x) = (β/(2θ))‖x-y‖₁² and q = w^k + σ(2x^{k+1}-x^k), 365 // Dual step: with g(x) = (β/(2θ))‖x-y‖₁² and q = w^k + σ(2x^{k+1}-x^k),
375 // we compute w^{k+1} = prox_{σg^*}(q) for 366 // we compute w^{k+1} = prox_{σg^*}(q) for
376 // = q - σ prox_{g/σ}(q/σ) 367 // = q - σ prox_{g/σ}(q/σ)
377 // = q - σ prox_{(β/(2θσ))‖.-y‖₁²}(q/σ) 368 // = q - σ prox_{(β/(2θσ))‖.-y‖₁²}(q/σ)
378 // = σ(q/σ - prox_{(β/(2θσ))‖.-y‖₁²}(q/σ)) 369 // = σ(q/σ - prox_{(β/(2θσ))‖.-y‖₁²}(q/σ))
379 // where q/σ = w^k/σ + (2x^{k+1}-x^k), 370 // where q/σ = w^k/σ + (2x^{k+1}-x^k),
380 w /= σ; 371 w /= σ;
381 w.axpy(2.0, x, 1.0); 372 w.axpy(2.0, x, 1.0);
382 w.axpy(-1.0, &xprev, 1.0); 373 w.axpy(-1.0, &xprev, 1.0);
383 xprev.copy_from(&w); // use xprev as temporary variable 374 xprev.copy_from(&w); // use xprev as temporary variable
384 l1squared_prox(&mut tmp, &mut xprev, y, β/σθ); 375 l1squared_prox(&mut tmp, &mut xprev, y, β / σθ);
385 w -= &xprev; 376 w -= &xprev;
386 w *= σ; 377 w *= σ;
387 xprev.copy_from(x); 378 xprev.copy_from(x);
388 379
389 iters += 1; 380 iters += 1;
390 381
391 state.if_verbose(|| { 382 state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)))
392 F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))
393 })
394 }); 383 });
395 384
396 iters 385 iters
397 } 386 }
398
399 387
400 /// This function applies an iterative method for the solution of the problem 388 /// This function applies an iterative method for the solution of the problem
401 /// <div>$$ 389 /// <div>$$
402 /// \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ + δ_{≥ 0}(x). 390 /// \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ + δ_{≥ 0}(x).
403 /// $$</div> 391 /// $$</div>
404 /// 392 ///
405 /// This function returns the number of iterations taken. 393 /// This function returns the number of iterations taken.
406 #[replace_float_literals(F::cast_from(literal))] 394 #[replace_float_literals(F::cast_from(literal))]
407 pub fn l1squared_nonneg<F, I>( 395 pub fn l1squared_nonneg<F, I>(
408 y : &DVector<F::MixedType>, 396 y: &DVector<F::MixedType>,
409 g : &DVector<F::MixedType>, 397 g: &DVector<F::MixedType>,
410 λ : F, 398 λ: F,
411 β : F, 399 β: F,
412 x : &mut DVector<F::MixedType>, 400 x: &mut DVector<F::MixedType>,
413 inner : &InnerSettings<F>, 401 inner: &InnerSettings<F>,
414 iterator : I 402 iterator: I,
415 ) -> usize 403 ) -> usize
416 where F : Float + ToNalgebraRealField, 404 where
417 I : AlgIteratorFactory<F> 405 F: Float + ToNalgebraRealField,
406 I: AlgIteratorFactory<F>,
418 { 407 {
419 match inner.method { 408 match inner.method {
420 InnerMethod::PDPS => { 409 InnerMethod::PDPS => {
421 let inner_θ = 1.0; 410 let inner_θ = 1.0;
422 // Estimate of ‖K‖ for K=θ\Id. 411 // Estimate of ‖K‖ for K=θ\Id.
423 let normest = inner_θ; 412 let normest = inner_θ;
424 let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest); 413 let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest);
425 l1squared_nonneg_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator) 414 l1squared_nonneg_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator)
426 }, 415 }
427 InnerMethod::FB => { 416 InnerMethod::PP | InnerMethod::FB => {
428 // The Lipschitz factor of ∇[x ↦ g^⊤ x + λ∑x]=g - λ𝟙 is FB is just a proximal point 417 let inner_τ = inner.pp_τ.0;
429 // method with on constraints on τ. We “accelerate” it by adding to τ the constant θ 418 let inner_θ = inner.pp_τ.1;
430 // on each iteration. Exponential growth does not seem stable.
431 let inner_τ = inner.fb_τ0;
432 let inner_θ = inner_τ;
433 l1squared_nonneg_pp(y, g, λ, β, x, inner_τ, inner_θ, iterator) 419 l1squared_nonneg_pp(y, g, λ, β, x, inner_τ, inner_θ, iterator)
434 }, 420 }
435 other => unimplemented!("${other:?} is unimplemented"), 421 other => unimplemented!("${other:?} is unimplemented"),
436 } 422 }
437 } 423 }

mercurial