| 1 /*! |
1 /*! |
| 2 Iterative algorithms for solving the finite-dimensional subproblem with constraint. |
2 Iterative algorithms for solving the finite-dimensional subproblem with constraint. |
| 3 */ |
3 */ |
| 4 |
4 |
| |
5 use itertools::izip; |
| 5 use nalgebra::DVector; |
6 use nalgebra::DVector; |
| 6 use numeric_literals::replace_float_literals; |
7 use numeric_literals::replace_float_literals; |
| 7 use itertools::izip; |
|
| 8 //use std::iter::zip; |
8 //use std::iter::zip; |
| 9 use std::cmp::Ordering::*; |
9 use std::cmp::Ordering::*; |
| 10 |
10 |
| 11 use alg_tools::iterate::{ |
11 use alg_tools::iterate::{AlgIteratorFactory, AlgIteratorState}; |
| 12 AlgIteratorFactory, |
|
| 13 AlgIteratorState, |
|
| 14 }; |
|
| 15 use alg_tools::nalgebra_support::ToNalgebraRealField; |
12 use alg_tools::nalgebra_support::ToNalgebraRealField; |
| 16 use alg_tools::norms::{Dist, L1}; |
13 use alg_tools::norms::{Dist, L1}; |
| 17 use alg_tools::nanleast::NaNLeast; |
14 |
| 18 |
15 use super::l1squared_unconstrained::l1squared_prox; |
| |
16 use super::nonneg::nonneg_soft_thresholding; |
| |
17 use super::{InnerMethod, InnerSettings}; |
| 19 use crate::types::*; |
18 use crate::types::*; |
| 20 use super::{ |
|
| 21 InnerMethod, |
|
| 22 InnerSettings |
|
| 23 }; |
|
| 24 use super::nonneg::nonneg_soft_thresholding; |
|
| 25 use super::l1squared_unconstrained::l1squared_prox; |
|
| 26 |
19 |
| 27 /// Return maximum of `dist` and distnce of inteval `[lb, ub]` to zero. |
20 /// Return maximum of `dist` and distnce of inteval `[lb, ub]` to zero. |
| 28 #[replace_float_literals(F::cast_from(literal))] |
21 #[replace_float_literals(F::cast_from(literal))] |
| 29 pub(super) fn max_interval_dist_to_zero<F : Float>(dist : F, lb : F, ub : F) -> F { |
22 pub(super) fn max_interval_dist_to_zero<F: Float>(dist: F, lb: F, ub: F) -> F { |
| 30 if lb < 0.0 { |
23 if lb < 0.0 { |
| 31 if ub > 0.0 { |
24 if ub > 0.0 { |
| 32 dist |
25 dist |
| 33 } else { |
26 } else { |
| 34 dist.max(-ub) |
27 dist.max(-ub) |
| 35 } |
28 } |
| 36 } else /* ub ≥ 0.0*/ { |
29 } else |
| |
30 /* ub ≥ 0.0*/ |
| |
31 { |
| 37 dist.max(lb) |
32 dist.max(lb) |
| 38 } |
33 } |
| 39 } |
34 } |
| 40 |
35 |
| 41 /// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁ +δ_{≥}(x)$ at $x$. |
36 /// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁ +δ_{≥0}(x)$ at $x$. |
| 42 /// |
37 /// |
| 43 /// `v` will be modified and cannot be trusted to contain useful values afterwards. |
38 /// `v` will be modified and cannot be trusted to contain useful values afterwards. |
| 44 #[replace_float_literals(F::cast_from(literal))] |
39 #[replace_float_literals(F::cast_from(literal))] |
| 45 fn min_subdifferential<F : Float + nalgebra::RealField>( |
40 fn min_subdifferential<F: Float + nalgebra::RealField>( |
| 46 y : &DVector<F>, |
41 y: &DVector<F>, |
| 47 x : &DVector<F>, |
42 x: &DVector<F>, |
| 48 g : &DVector<F>, |
43 g: &DVector<F>, |
| 49 λ : F, |
44 λ: F, |
| 50 β : F |
45 β: F, |
| 51 ) -> F { |
46 ) -> F { |
| 52 let mut val = 0.0; |
47 let mut val = 0.0; |
| 53 let tmp = β*y.dist(x, L1); |
48 let tmp = β * y.dist(x, L1); |
| 54 for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) { |
49 for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) { |
| 55 let (mut lb, mut ub) = (-g_i, -g_i); |
50 let (mut lb, mut ub) = (-g_i, -g_i); |
| 56 match x_i.partial_cmp(y_i) { |
51 match x_i.partial_cmp(y_i) { |
| 57 Some(Greater) => { lb += tmp; ub += tmp }, |
52 Some(Greater) => { |
| 58 Some(Less) => { lb -= tmp; ub -= tmp }, |
53 lb += tmp; |
| 59 Some(Equal) => { lb -= tmp; ub += tmp }, |
54 ub += tmp |
| 60 None => {}, |
55 } |
| |
56 Some(Less) => { |
| |
57 lb -= tmp; |
| |
58 ub -= tmp |
| |
59 } |
| |
60 Some(Equal) => { |
| |
61 lb -= tmp; |
| |
62 ub += tmp |
| |
63 } |
| |
64 None => {} |
| 61 } |
65 } |
| 62 match x_i.partial_cmp(&0.0) { |
66 match x_i.partial_cmp(&0.0) { |
| 63 Some(Greater) => { lb += λ; ub += λ }, |
67 Some(Greater) => { |
| |
68 lb += λ; |
| |
69 ub += λ |
| |
70 } |
| 64 // Less should not happen |
71 // Less should not happen |
| 65 Some(Less|Equal) => { lb = F::NEG_INFINITY; ub += λ }, |
72 Some(Less | Equal) => { |
| 66 None => {}, |
73 lb = F::NEG_INFINITY; |
| |
74 ub += λ |
| |
75 } |
| |
76 None => {} |
| 67 }; |
77 }; |
| 68 val = max_interval_dist_to_zero(val, lb, ub); |
78 val = max_interval_dist_to_zero(val, lb, ub); |
| 69 } |
79 } |
| 70 val |
80 val |
| 71 } |
81 } |
| 72 |
82 |
| |
83 // #[replace_float_literals(F::cast_from(literal))] |
| |
84 // fn lbd_soft_thresholding<F: Float>(v: F, λ: F, b: F) -> F { |
| |
85 // match (b >= 0.0, v >= b) { |
| |
86 // (true, false) => b, |
| |
87 // (true, true) => b.max(v - λ), // soft-to-b from above |
| |
88 // (false, true) => super::unconstrained::soft_thresholding(v, λ), |
| |
89 // (false, false) => 0.0.min(b.max(v + λ)), // soft-to-0 with lower bound |
| |
90 // } |
| |
91 // } |
| |
92 |
| |
93 /// Calculates $\prox\_f(x)$ for $f(x)=λ\abs{x-y} + δ\_{≥0}(x)$. |
| |
94 /// This is the first output. The second output is the first output $-y$, i..e, the prox |
| |
95 /// of $f\_0$ for the shift function $f\_0(z) = f(z+y) = λ\abs{z} + δ\_{≥-y}(z)$ |
| |
96 /// satisfying |
| |
97 /// $$ |
| |
98 /// \prox_f(x) = \prox\_{f\_0}(x - y) + y, |
| |
99 /// $$ |
| |
100 /// which is also used internally. |
| |
101 /// The third output indicates whether the output is “locked” to either $0$ (resp. $-y$ in |
| |
102 /// the reformulation), or $y$ ($0$ in the reformulation). |
| |
103 /// The fourth output indicates the next highest $λ$ where such locking would happen. |
| 73 #[replace_float_literals(F::cast_from(literal))] |
104 #[replace_float_literals(F::cast_from(literal))] |
| 74 fn lbd_soft_thresholding<F : Float>(v : F, λ : F, b : F) -> F |
105 #[inline] |
| 75 { |
106 fn shifted_nonneg_soft_thresholding<F: Float>(x: F, y: F, λ: F) -> (F, F, bool, Option<F>) { |
| 76 match (b >= 0.0, v >= b) { |
107 let z = x - y; // The shift to f_0 |
| 77 (true, false) => b, |
108 if -y >= 0.0 { |
| 78 (true, true) => b.max(v - λ), // soft-to-b from above |
109 if x > λ { |
| 79 (false, true) => super::unconstrained::soft_thresholding(v, λ), |
110 (x - λ, z - λ, false, Some(x)) |
| 80 (false, false) => 0.0.min(b.max(v - λ)), // soft-to-0 with lower bound |
111 } else { |
| |
112 (0.0, -y, true, None) |
| |
113 } |
| |
114 } else if z < 0.0 { |
| |
115 if λ < -x { |
| |
116 (0.0, -y, true, Some(-x)) |
| |
117 } else if z + λ < 0.0 { |
| |
118 (x + λ, z + λ, false, Some(-z)) |
| |
119 } else { |
| |
120 (y, 0.0, true, None) |
| |
121 } |
| |
122 } else if z > λ { |
| |
123 (x - λ, z - λ, false, Some(z)) |
| |
124 } else { |
| |
125 (y, 0.0, true, None) |
| 81 } |
126 } |
| 82 } |
127 } |
| 83 |
128 |
| 84 /// Calculate $prox_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}_1^2 + δ_{≥0}(x)$. |
129 /// Calculate $\prox\_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}\_1\^2 + δ\_{≥0}(x)$. |
| 85 /// |
130 /// |
| 86 /// To derive an algorithm for this, we can use |
131 /// To derive an algorithm for this, we can use |
| 87 /// $prox_f(x) = prox_{f_0}(x - y) - y$ for |
132 /// $$ |
| 88 /// $f_0(z)=\frac{β}{2}\norm{z}_1^2 + δ_{≥-y}(z)$. |
133 /// \prox_f(x) = \prox\_{f\_0}(x - y) + y |
| 89 /// Now, the optimality conditions for $w = prox_{f_0}(x)$ are |
134 /// \quad\text{for}\quad |
| |
135 /// f\_0(z)=\frac{β}{2}\norm{z}\_1\^2 + δ\_{≥-y}(z). |
| |
136 /// $$ |
| |
137 /// Now, the optimality conditions for $w = \prox\_{f\_0}(x)$ are |
| 90 /// $$\tag{*} |
138 /// $$\tag{*} |
| 91 /// x ∈ w + β\norm{w}_1\sign w + N_{≥ -y}(w). |
139 /// x ∈ w + β\norm{w}\_1\sign w + N\_{≥ -y}(w). |
| 92 /// $$ |
140 /// $$ |
| 93 /// If we know $\norm{w}_1$, then this is easily solved by lower-bounded soft-thresholding. |
141 /// If we know $\norm{w}\_1$, then this is easily solved by lower-bounded soft-thresholding. |
| 94 /// We find this by sorting the elements by the distance to the 'locked' lower-bounded |
142 /// We find this by sorting the elements by the distance to the 'locked' lower-bounded |
| 95 /// soft-thresholding target ($0$ or $-y_i$). |
143 /// soft-thresholding target ($0$ or $-y_i$). |
| 96 /// Then we loop over this sorted vector, increasing our estimate of $\norm{w}_1$ as we decide |
144 /// Then we loop over this sorted vector, increasing our estimate of $\norm{w}\_1$ as we decide |
| 97 /// that the soft-thresholding parameter `β\norm{w}_1` has to be such that the passed elements |
145 /// that the soft-thresholding parameter $β\norm{w}\_1$ has to be such that the passed elements |
| 98 /// will reach their locked value (after which they cannot change anymore, for a larger |
146 /// will reach their locked value (after which they cannot change anymore, for a larger |
| 99 /// soft-thresholding parameter. This has to be slightly more fine-grained for account |
147 /// soft-thresholding parameter. This has to be slightly more fine-grained for account |
| 100 /// for the case that $-y_i<0$ and $x_i < -y_i$. |
148 /// for the case that $-y\_i<0$ and $x\_i < -y\_i$. |
| 101 /// |
149 /// |
| 102 /// Indeed, we denote by x' and w' the subset of elements such that w_i ≠ 0 and w_i > -y_i, |
150 /// Indeed, denoting by $x'$ and $w'$ the subset of elements such that $w\_i ≠ 0$ and |
| 103 /// we can calculate by applying $⟨\cdot, \sign w'⟩$ to the corresponding lines of (*) that |
151 /// $w\_i > -y\_i$, we can calculate by applying $⟨\cdot, \sign w'⟩$ to the corresponding |
| 104 /// $$ |
152 /// lines of (*) that |
| 105 /// \norm{x'} = \norm{w'} + β \norm{w}_1 m. |
153 /// $$ |
| 106 /// $$ |
154 /// \norm{x'} = \norm{w'} + β \norm{w}\_1 m, |
| 107 /// Having a value for $t = \norm{w}-\norm{w'}$, we can then calculate |
155 /// $$ |
| 108 /// $$ |
156 /// where $m$ is the number of unlocked components. |
| 109 /// \norm{x'} - t = (1+β m)\norm{w}_1, |
157 /// We can now calculate the mass $t = \norm{w}-\norm{w'}$ of locked components, and so obtain |
| 110 /// $$ |
158 /// $$ |
| 111 /// from where we can calculate the soft-thresholding parameter $λ=β\norm{w}_1$. |
159 /// \norm{x'} + t = (1+β m)\norm{w}\_1, |
| |
160 /// $$ |
| |
161 /// from where we can calculate the soft-thresholding parameter $λ=β\norm{w}\_1$. |
| 112 /// Since we do not actually know the unlocked elements, but just loop over all the possibilities |
162 /// Since we do not actually know the unlocked elements, but just loop over all the possibilities |
| 113 /// for them, we have to check that $λ$ is above the current lower bound for this parameter |
163 /// for them, we have to check that $λ$ is above the current lower bound for this parameter |
| 114 /// (`shift` in the code), and below a value that would cause changes in the locked set |
164 /// (`shift` in the code), and below a value that would cause changes in the locked set |
| 115 /// (`max_shift` in the code). |
165 /// (`max_shift` in the code). |
| 116 #[replace_float_literals(F::cast_from(literal))] |
166 #[replace_float_literals(F::cast_from(literal))] |
| 117 pub(super) fn l1squared_nonneg_prox<F :Float + nalgebra::RealField>( |
167 pub fn l1squared_nonneg_prox<F: Float + nalgebra::RealField>( |
| 118 sorted : &mut Vec<(F, F, F, Option<(F, F)>)>, |
168 x: &mut DVector<F>, |
| 119 x : &mut DVector<F>, |
169 y: &DVector<F>, |
| 120 y : &DVector<F>, |
170 β: F, |
| 121 β : F |
|
| 122 ) { |
171 ) { |
| 123 // nalgebra double-definition bullshit workaround |
172 // nalgebra double-definition bullshit workaround |
| 124 //let max = alg_tools::NumTraitsFloat::max; |
|
| 125 let abs = alg_tools::NumTraitsFloat::abs; |
173 let abs = alg_tools::NumTraitsFloat::abs; |
| 126 |
174 let min = alg_tools::NumTraitsFloat::min; |
| 127 *x -= y; |
175 |
| 128 |
176 let mut λ = 0.0; |
| 129 for (az_x_i, &x_i, &y_i) in izip!(sorted.iter_mut(), x.iter(), y.iter()) { |
177 loop { |
| 130 // The first component of each az_x_i contains the distance of x_i to the |
178 let mut w_locked = 0.0; |
| 131 // soft-thresholding limit. If it is negative, it is always reached. |
179 let mut n_unlocked = 0; // m |
| 132 // The second component contains the absolute value of the result for that component |
180 let mut x_prime = 0.0; |
| 133 // w_i of the solution, if the soft-thresholding limit is reached. |
181 let mut max_shift = F::INFINITY; |
| 134 // This is stored here due to the sorting, although could otherwise be computed directly. |
182 for (&x_i, &y_i) in izip!(x.iter(), y.iter()) { |
| 135 // Likewise the third component contains the absolute value of x_i. |
183 let (_, a_shifted, locked, next_lock) = shifted_nonneg_soft_thresholding(x_i, y_i, λ); |
| 136 // The fourth component contains an initial lower bound. |
184 |
| 137 let a_i = abs(x_i); |
185 if let Some(t) = next_lock { |
| 138 let b = -y_i; |
186 max_shift = min(max_shift, t); |
| 139 *az_x_i = match (b >= 0.0, x_i >= b) { |
187 assert!(max_shift > λ); |
| 140 (true, false) => (x_i-b, b, a_i, None), // w_i=b, so sorting element negative! |
188 } |
| 141 (true, true) => (x_i-b, b, a_i, None), // soft-to-b from above |
189 if locked { |
| 142 (false, true) => (a_i, 0.0, a_i, None), // soft-to-0 |
190 w_locked += abs(a_shifted); |
| 143 (false, false) => (a_i, 0.0, a_i, Some((b, b-x_i))), // soft-to-0 with initial limit |
191 } else { |
| 144 }; |
192 n_unlocked += 1; |
| |
193 x_prime += abs(x_i - y_i); |
| |
194 } |
| |
195 } |
| |
196 |
| |
197 // We need ‖x'‖ = ‖w'‖ + β m ‖w‖, i.e. ‖x'‖ + (‖w‖-‖w'‖)= (1 + β m)‖w‖. |
| |
198 let λ_new = (x_prime + w_locked) / (1.0 / β + F::cast_from(n_unlocked)); |
| |
199 |
| |
200 if λ_new > max_shift { |
| |
201 λ = max_shift; |
| |
202 } else { |
| |
203 assert!(λ_new >= λ); |
| |
204 // success |
| |
205 x.zip_apply(y, |x_i, y_i| { |
| |
206 let (a, _, _, _) = shifted_nonneg_soft_thresholding(*x_i, y_i, λ_new); |
| |
207 //*x_i = y_i + lbd_soft_thresholding(*x_i, λ_new, -y_i) |
| |
208 *x_i = a; |
| |
209 }); |
| |
210 return; |
| |
211 } |
| 145 } |
212 } |
| 146 sorted.as_mut_slice() |
|
| 147 .sort_unstable_by(|(a, _, _, _), (b, _, _, _)| NaNLeast(*a).cmp(&NaNLeast(*b))); |
|
| 148 |
|
| 149 let mut nwlow = 0.0; |
|
| 150 let mut shift = 0.0; |
|
| 151 // This main loop is over different combinations of elements of the solution locked |
|
| 152 // to the soft-thresholding lower bound (`0` or `-y_i`), in the sorted order of locking. |
|
| 153 for (i, az_x_i) in izip!(0.., sorted.iter()) { |
|
| 154 // This 'attempt is over different combinations of elements locked to the |
|
| 155 // lower bound (`-y_i ≤ 0`). It calculates `max_shift` as the maximum shift that |
|
| 156 // can be done until the locking would change (or become non-strictly-complementary). |
|
| 157 // If the main rule (*) gives an estimate of `λ` that stays below `max_shift`, it is |
|
| 158 // accepted. Otherwise `shift` is updated to `max_shift`, and we attempt again, |
|
| 159 // with the non-locking set that participates in the calculation of `λ` then including |
|
| 160 // the elements that are no longer locked to the lower bound. |
|
| 161 'attempt: loop { |
|
| 162 let mut nwthis = 0.0; // contribution to ‖w‖ from elements with locking |
|
| 163 //soft-thresholding parameter = `shift` |
|
| 164 let mut nxmore = 0.0; // ‖x'‖ for those elements through to not be locked to |
|
| 165 // neither the soft-thresholding limit, nor the lower bound |
|
| 166 let mut nwlbd = 0.0; // contribution to ‖w‖ from those elements locked to their |
|
| 167 // lower bound |
|
| 168 let mut m = 0; |
|
| 169 let mut max_shift = F::INFINITY; // maximal shift for which our estimate of the set of |
|
| 170 // unlocked elements is valid. |
|
| 171 let mut max_shift_from_lbd = false; // Whether max_shift comes from the next element |
|
| 172 // or from a lower bound being reached. |
|
| 173 for az_x_j in sorted[i as usize..].iter() { |
|
| 174 if az_x_j.0 <= shift { |
|
| 175 nwthis += az_x_j.1; |
|
| 176 } else { |
|
| 177 match az_x_j.3 { |
|
| 178 Some((l, s)) if shift < s => { |
|
| 179 if max_shift > s { |
|
| 180 max_shift_from_lbd = true; |
|
| 181 max_shift = s; |
|
| 182 } |
|
| 183 nwlbd += -l; |
|
| 184 }, |
|
| 185 _ => { |
|
| 186 nxmore += az_x_j.2; |
|
| 187 if m == 0 && max_shift > az_x_j.0 { |
|
| 188 max_shift = az_x_j.0; |
|
| 189 max_shift_from_lbd = false; |
|
| 190 } |
|
| 191 m += 1; |
|
| 192 } |
|
| 193 } |
|
| 194 } |
|
| 195 } |
|
| 196 |
|
| 197 // We need ‖x'‖ = ‖w'‖ + β m ‖w‖, i.e. ‖x'‖ - (‖w‖-‖w'‖)= (1 + β m)‖w‖. |
|
| 198 let tmp = β*(nxmore - (nwlow + nwthis + nwlbd))/(1.0 + β*F::cast_from(m)); |
|
| 199 if tmp > max_shift { |
|
| 200 if max_shift_from_lbd { |
|
| 201 shift = max_shift; |
|
| 202 continue 'attempt; |
|
| 203 } else { |
|
| 204 break 'attempt |
|
| 205 } |
|
| 206 } else if tmp < shift { |
|
| 207 // TODO: this should probably be an assert!(false) |
|
| 208 break 'attempt; |
|
| 209 } else { |
|
| 210 // success |
|
| 211 x.zip_apply(y, |x_i, y_i| *x_i = y_i + lbd_soft_thresholding(*x_i, tmp, -y_i)); |
|
| 212 return |
|
| 213 } |
|
| 214 } |
|
| 215 shift = az_x_i.0; |
|
| 216 nwlow += az_x_i.1; |
|
| 217 } |
|
| 218 // TODO: this fallback has not been verified to be correct |
|
| 219 x.zip_apply(y, |x_i, y_i| *x_i = y_i + lbd_soft_thresholding(*x_i, shift, -y_i)); |
|
| 220 } |
213 } |
| 221 |
214 |
| 222 /// Proximal point method implementation of [`l1squared_nonneg`]. |
215 /// Proximal point method implementation of [`l1squared_nonneg`]. |
| 223 /// For detailed documentation of the inputs and outputs, refer to there. |
216 /// For detailed documentation of the inputs and outputs, refer to there. |
| 224 /// |
217 /// |
| 225 /// The `λ` component of the model is handled in the proximal step instead of the gradient step |
218 /// The `λ` component of the model is handled in the proximal step instead of the gradient step |
| 226 /// for potential performance improvements. |
219 /// for potential performance improvements. |
| 227 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] |
220 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] |
| 228 pub fn l1squared_nonneg_pp<F, I>( |
221 pub fn l1squared_nonneg_pp<F, I>( |
| 229 y : &DVector<F::MixedType>, |
222 y: &DVector<F::MixedType>, |
| 230 g : &DVector<F::MixedType>, |
223 g: &DVector<F::MixedType>, |
| 231 λ_ : F, |
224 λ_: F, |
| 232 β_ : F, |
225 β_: F, |
| 233 x : &mut DVector<F::MixedType>, |
226 x: &mut DVector<F::MixedType>, |
| 234 τ_ : F, |
227 τ_: F, |
| 235 θ_ : F, |
228 θ_: F, |
| 236 iterator : I |
229 iterator: I, |
| 237 ) -> usize |
230 ) -> usize |
| 238 where F : Float + ToNalgebraRealField, |
231 where |
| 239 I : AlgIteratorFactory<F> |
232 F: Float + ToNalgebraRealField, |
| |
233 I: AlgIteratorFactory<F>, |
| 240 { |
234 { |
| 241 let λ = λ_.to_nalgebra_mixed(); |
235 let λ = λ_.to_nalgebra_mixed(); |
| 242 let β = β_.to_nalgebra_mixed(); |
236 let β = β_.to_nalgebra_mixed(); |
| 243 let mut τ = τ_.to_nalgebra_mixed(); |
237 let mut τ = τ_.to_nalgebra_mixed(); |
| 244 let θ = θ_.to_nalgebra_mixed(); |
238 let θ = θ_.to_nalgebra_mixed(); |
| 245 let mut tmp = std::iter::repeat((0.0, 0.0, 0.0, None)).take(x.len()).collect(); |
|
| 246 let mut iters = 0; |
239 let mut iters = 0; |
| 247 |
240 |
| 248 iterator.iterate(|state| { |
241 iterator.iterate(|state| { |
| 249 // Primal step: x^{k+1} = prox_{(τβ/2)|.-y|_1^2+δ_{≥0}+}(x^k - τ(λ𝟙^⊤-g)) |
242 // Primal step: x^{k+1} = prox_{(τβ/2)|.-y|_1^2+δ_{≥0}+}(x^k - τ(λ𝟙^⊤-g)) |
| 250 x.apply(|x_i| *x_i -= τ*λ); |
243 x.apply(|x_i| *x_i -= τ * λ); |
| 251 x.axpy(τ, g, 1.0); |
244 x.axpy(τ, g, 1.0); |
| 252 l1squared_nonneg_prox(&mut tmp, x, y, τ*β); |
245 l1squared_nonneg_prox(x, y, τ * β); |
| 253 |
246 |
| 254 iters += 1; |
247 iters += 1; |
| 255 // This gives O(1/N^2) rates due to monotonicity of function values. |
248 // This gives O(1/N^2) rates due to monotonicity of function values. |
| 256 // Higher acceleration does not seem to be numerically stable. |
249 // Higher acceleration does not seem to be numerically stable. |
| 257 τ += θ; |
250 τ += θ; |
| 258 |
251 |
| 259 // This gives O(1/N^3) rates due to monotonicity of function values. |
252 // This gives O(1/N^3) rates due to monotonicity of function values. |
| 260 // Higher acceleration does not seem to be numerically stable. |
253 // Higher acceleration does not seem to be numerically stable. |
| 261 //τ + = F::cast_from(iters).to_nalgebra_mixed()*θ; |
254 //τ + = F::cast_from(iters).to_nalgebra_mixed()*θ; |
| 262 |
255 |
| 263 state.if_verbose(|| { |
256 state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))) |
| 264 F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)) |
|
| 265 }) |
|
| 266 }); |
257 }); |
| 267 |
258 |
| 268 iters |
259 iters |
| 269 } |
260 } |
| 270 |
261 |
| 338 /// - \left(x ↦ \frac{β}{2θ} |x-y|_1^2 \right)^*(w). |
328 /// - \left(x ↦ \frac{β}{2θ} |x-y|_1^2 \right)^*(w). |
| 339 /// \end{split} |
329 /// \end{split} |
| 340 /// $$</div> |
330 /// $$</div> |
| 341 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] |
331 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] |
| 342 pub fn l1squared_nonneg_pdps_alt<F, I>( |
332 pub fn l1squared_nonneg_pdps_alt<F, I>( |
| 343 y : &DVector<F::MixedType>, |
333 y: &DVector<F::MixedType>, |
| 344 g : &DVector<F::MixedType>, |
334 g: &DVector<F::MixedType>, |
| 345 λ_ : F, |
335 λ_: F, |
| 346 β_ : F, |
336 β_: F, |
| 347 x : &mut DVector<F::MixedType>, |
337 x: &mut DVector<F::MixedType>, |
| 348 τ_ : F, |
338 τ_: F, |
| 349 σ_ : F, |
339 σ_: F, |
| 350 θ_ : F, |
340 θ_: F, |
| 351 iterator : I |
341 iterator: I, |
| 352 ) -> usize |
342 ) -> usize |
| 353 where F : Float + ToNalgebraRealField, |
343 where |
| 354 I : AlgIteratorFactory<F> |
344 F: Float + ToNalgebraRealField, |
| |
345 I: AlgIteratorFactory<F>, |
| 355 { |
346 { |
| 356 let λ = λ_.to_nalgebra_mixed(); |
347 let λ = λ_.to_nalgebra_mixed(); |
| 357 let τ = τ_.to_nalgebra_mixed(); |
348 let τ = τ_.to_nalgebra_mixed(); |
| 358 let σ = σ_.to_nalgebra_mixed(); |
349 let σ = σ_.to_nalgebra_mixed(); |
| 359 let θ = θ_.to_nalgebra_mixed(); |
350 let θ = θ_.to_nalgebra_mixed(); |
| 360 let β = β_.to_nalgebra_mixed(); |
351 let β = β_.to_nalgebra_mixed(); |
| 361 let σθ = σ*θ; |
352 let σθ = σ * θ; |
| 362 let τθ = τ*θ; |
353 let τθ = τ * θ; |
| 363 let mut w = DVector::zeros(x.len()); |
354 let mut w = DVector::zeros(x.len()); |
| 364 let mut tmp = DVector::zeros(x.len()); |
355 let mut tmp = DVector::zeros(x.len()); |
| 365 let mut xprev = x.clone(); |
356 let mut xprev = x.clone(); |
| 366 let mut iters = 0; |
357 let mut iters = 0; |
| 367 |
358 |
| 368 iterator.iterate(|state| { |
359 iterator.iterate(|state| { |
| 369 // Primal step: x^{k+1} = nonnegsoft_τλ(x^k - τ(θ w^k -g)) |
360 // Primal step: x^{k+1} = nonnegsoft_τλ(x^k - τ(θ w^k -g)) |
| 370 x.axpy(-τθ, &w, 1.0); |
361 x.axpy(-τθ, &w, 1.0); |
| 371 x.axpy(τ, g, 1.0); |
362 x.axpy(τ, g, 1.0); |
| 372 x.apply(|x_i| *x_i = nonneg_soft_thresholding(*x_i, τ*λ)); |
363 x.apply(|x_i| *x_i = nonneg_soft_thresholding(*x_i, τ * λ)); |
| 373 |
364 |
| 374 // Dual step: with g(x) = (β/(2θ))‖x-y‖₁² and q = w^k + σ(2x^{k+1}-x^k), |
365 // Dual step: with g(x) = (β/(2θ))‖x-y‖₁² and q = w^k + σ(2x^{k+1}-x^k), |
| 375 // we compute w^{k+1} = prox_{σg^*}(q) for |
366 // we compute w^{k+1} = prox_{σg^*}(q) for |
| 376 // = q - σ prox_{g/σ}(q/σ) |
367 // = q - σ prox_{g/σ}(q/σ) |
| 377 // = q - σ prox_{(β/(2θσ))‖.-y‖₁²}(q/σ) |
368 // = q - σ prox_{(β/(2θσ))‖.-y‖₁²}(q/σ) |
| 378 // = σ(q/σ - prox_{(β/(2θσ))‖.-y‖₁²}(q/σ)) |
369 // = σ(q/σ - prox_{(β/(2θσ))‖.-y‖₁²}(q/σ)) |
| 379 // where q/σ = w^k/σ + (2x^{k+1}-x^k), |
370 // where q/σ = w^k/σ + (2x^{k+1}-x^k), |
| 380 w /= σ; |
371 w /= σ; |
| 381 w.axpy(2.0, x, 1.0); |
372 w.axpy(2.0, x, 1.0); |
| 382 w.axpy(-1.0, &xprev, 1.0); |
373 w.axpy(-1.0, &xprev, 1.0); |
| 383 xprev.copy_from(&w); // use xprev as temporary variable |
374 xprev.copy_from(&w); // use xprev as temporary variable |
| 384 l1squared_prox(&mut tmp, &mut xprev, y, β/σθ); |
375 l1squared_prox(&mut tmp, &mut xprev, y, β / σθ); |
| 385 w -= &xprev; |
376 w -= &xprev; |
| 386 w *= σ; |
377 w *= σ; |
| 387 xprev.copy_from(x); |
378 xprev.copy_from(x); |
| 388 |
379 |
| 389 iters += 1; |
380 iters += 1; |
| 390 |
381 |
| 391 state.if_verbose(|| { |
382 state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))) |
| 392 F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)) |
|
| 393 }) |
|
| 394 }); |
383 }); |
| 395 |
384 |
| 396 iters |
385 iters |
| 397 } |
386 } |
| 398 |
|
| 399 |
387 |
| 400 /// This function applies an iterative method for the solution of the problem |
388 /// This function applies an iterative method for the solution of the problem |
| 401 /// <div>$$ |
389 /// <div>$$ |
| 402 /// \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ + δ_{≥ 0}(x). |
390 /// \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ + δ_{≥ 0}(x). |
| 403 /// $$</div> |
391 /// $$</div> |
| 404 /// |
392 /// |
| 405 /// This function returns the number of iterations taken. |
393 /// This function returns the number of iterations taken. |
| 406 #[replace_float_literals(F::cast_from(literal))] |
394 #[replace_float_literals(F::cast_from(literal))] |
| 407 pub fn l1squared_nonneg<F, I>( |
395 pub fn l1squared_nonneg<F, I>( |
| 408 y : &DVector<F::MixedType>, |
396 y: &DVector<F::MixedType>, |
| 409 g : &DVector<F::MixedType>, |
397 g: &DVector<F::MixedType>, |
| 410 λ : F, |
398 λ: F, |
| 411 β : F, |
399 β: F, |
| 412 x : &mut DVector<F::MixedType>, |
400 x: &mut DVector<F::MixedType>, |
| 413 inner : &InnerSettings<F>, |
401 inner: &InnerSettings<F>, |
| 414 iterator : I |
402 iterator: I, |
| 415 ) -> usize |
403 ) -> usize |
| 416 where F : Float + ToNalgebraRealField, |
404 where |
| 417 I : AlgIteratorFactory<F> |
405 F: Float + ToNalgebraRealField, |
| |
406 I: AlgIteratorFactory<F>, |
| 418 { |
407 { |
| 419 match inner.method { |
408 match inner.method { |
| 420 InnerMethod::PDPS => { |
409 InnerMethod::PDPS => { |
| 421 let inner_θ = 1.0; |
410 let inner_θ = 1.0; |
| 422 // Estimate of ‖K‖ for K=θ\Id. |
411 // Estimate of ‖K‖ for K=θ\Id. |
| 423 let normest = inner_θ; |
412 let normest = inner_θ; |
| 424 let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest); |
413 let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest); |
| 425 l1squared_nonneg_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator) |
414 l1squared_nonneg_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator) |
| 426 }, |
415 } |
| 427 InnerMethod::FB => { |
416 InnerMethod::PP | InnerMethod::FB => { |
| 428 // The Lipschitz factor of ∇[x ↦ g^⊤ x + λ∑x]=g - λ𝟙 is FB is just a proximal point |
417 let inner_τ = inner.pp_τ.0; |
| 429 // method with on constraints on τ. We “accelerate” it by adding to τ the constant θ |
418 let inner_θ = inner.pp_τ.1; |
| 430 // on each iteration. Exponential growth does not seem stable. |
|
| 431 let inner_τ = inner.fb_τ0; |
|
| 432 let inner_θ = inner_τ; |
|
| 433 l1squared_nonneg_pp(y, g, λ, β, x, inner_τ, inner_θ, iterator) |
419 l1squared_nonneg_pp(y, g, λ, β, x, inner_τ, inner_θ, iterator) |
| 434 }, |
420 } |
| 435 other => unimplemented!("${other:?} is unimplemented"), |
421 other => unimplemented!("${other:?} is unimplemented"), |
| 436 } |
422 } |
| 437 } |
423 } |