Tue, 08 Apr 2025 13:31:39 -0500
Bump alg_tools version requirement
| 34 | 1 | /*! |
| 2 | Iterative algorithms for solving the finite-dimensional subproblem with constraint. | |
| 3 | */ | |
| 4 | ||
| 5 | use nalgebra::DVector; | |
| 6 | use numeric_literals::replace_float_literals; | |
| 7 | use itertools::izip; | |
| 8 | //use std::iter::zip; | |
| 9 | use std::cmp::Ordering::*; | |
| 10 | ||
| 11 | use alg_tools::iterate::{ | |
| 12 | AlgIteratorFactory, | |
| 13 | AlgIteratorState, | |
| 14 | }; | |
| 15 | use alg_tools::nalgebra_support::ToNalgebraRealField; | |
| 16 | use alg_tools::norms::{Dist, L1}; | |
| 17 | use alg_tools::nanleast::NaNLeast; | |
| 18 | ||
| 19 | use crate::types::*; | |
| 20 | use super::{ | |
| 21 | InnerMethod, | |
| 22 | InnerSettings | |
| 23 | }; | |
| 24 | use super::nonneg::nonneg_soft_thresholding; | |
| 25 | use super::l1squared_unconstrained::l1squared_prox; | |
| 26 | ||
| 27 | /// Return maximum of `dist` and distnce of inteval `[lb, ub]` to zero. | |
| 28 | #[replace_float_literals(F::cast_from(literal))] | |
| 29 | pub(super) fn max_interval_dist_to_zero<F : Float>(dist : F, lb : F, ub : F) -> F { | |
| 30 | if lb < 0.0 { | |
| 31 | if ub > 0.0 { | |
| 32 | dist | |
| 33 | } else { | |
| 34 | dist.max(-ub) | |
| 35 | } | |
| 36 | } else /* ub ≥ 0.0*/ { | |
| 37 | dist.max(lb) | |
| 38 | } | |
| 39 | } | |
| 40 | ||
| 41 | /// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁ +δ_{≥}(x)$ at $x$. | |
| 42 | /// | |
| 43 | /// `v` will be modified and cannot be trusted to contain useful values afterwards. | |
| 44 | #[replace_float_literals(F::cast_from(literal))] | |
| 45 | fn min_subdifferential<F : Float + nalgebra::RealField>( | |
| 46 | y : &DVector<F>, | |
| 47 | x : &DVector<F>, | |
| 48 | g : &DVector<F>, | |
| 49 | λ : F, | |
| 50 | β : F | |
| 51 | ) -> F { | |
| 52 | let mut val = 0.0; | |
| 53 | let tmp = β*y.dist(x, L1); | |
| 54 | for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) { | |
| 55 | let (mut lb, mut ub) = (-g_i, -g_i); | |
| 56 | match x_i.partial_cmp(y_i) { | |
| 57 | Some(Greater) => { lb += tmp; ub += tmp }, | |
| 58 | Some(Less) => { lb -= tmp; ub -= tmp }, | |
| 59 | Some(Equal) => { lb -= tmp; ub += tmp }, | |
| 60 | None => {}, | |
| 61 | } | |
| 62 | match x_i.partial_cmp(&0.0) { | |
| 63 | Some(Greater) => { lb += λ; ub += λ }, | |
| 64 | // Less should not happen | |
| 65 | Some(Less|Equal) => { lb = F::NEG_INFINITY; ub += λ }, | |
| 66 | None => {}, | |
| 67 | }; | |
| 68 | val = max_interval_dist_to_zero(val, lb, ub); | |
| 69 | } | |
| 70 | val | |
| 71 | } | |
| 72 | ||
| 73 | #[replace_float_literals(F::cast_from(literal))] | |
| 74 | fn lbd_soft_thresholding<F : Float>(v : F, λ : F, b : F) -> F | |
| 75 | { | |
| 76 | match (b >= 0.0, v >= b) { | |
| 77 | (true, false) => b, | |
| 78 | (true, true) => b.max(v - λ), // soft-to-b from above | |
| 79 | (false, true) => super::unconstrained::soft_thresholding(v, λ), | |
| 80 | (false, false) => 0.0.min(b.max(v - λ)), // soft-to-0 with lower bound | |
| 81 | } | |
| 82 | } | |
| 83 | ||
| 84 | /// Calculate $prox_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}_1^2 + δ_{≥0}(x)$. | |
| 85 | /// | |
| 86 | /// To derive an algorithm for this, we can use | |
| 87 | /// $prox_f(x) = prox_{f_0}(x - y) - y$ for | |
| 88 | /// $f_0(z)=\frac{β}{2}\norm{z}_1^2 + δ_{≥-y}(z)$. | |
| 89 | /// Now, the optimality conditions for $w = prox_{f_0}(x)$ are | |
| 90 | /// $$\tag{*} | |
| 91 | /// x ∈ w + β\norm{w}_1\sign w + N_{≥ -y}(w). | |
| 92 | /// $$ | |
| 93 | /// If we know $\norm{w}_1$, then this is easily solved by lower-bounded soft-thresholding. | |
| 94 | /// We find this by sorting the elements by the distance to the 'locked' lower-bounded | |
| 95 | /// soft-thresholding target ($0$ or $-y_i$). | |
| 96 | /// Then we loop over this sorted vector, increasing our estimate of $\norm{w}_1$ as we decide | |
| 97 | /// that the soft-thresholding parameter `β\norm{w}_1` has to be such that the passed elements | |
| 98 | /// will reach their locked value (after which they cannot change anymore, for a larger | |
| 99 | /// soft-thresholding parameter. This has to be slightly more fine-grained for account | |
| 100 | /// for the case that $-y_i<0$ and $x_i < -y_i$. | |
| 101 | /// | |
| 102 | /// Indeed, we denote by x' and w' the subset of elements such that w_i ≠ 0 and w_i > -y_i, | |
| 103 | /// we can calculate by applying $⟨\cdot, \sign w'⟩$ to the corresponding lines of (*) that | |
| 104 | /// $$ | |
| 105 | /// \norm{x'} = \norm{w'} + β \norm{w}_1 m. | |
| 106 | /// $$ | |
| 107 | /// Having a value for $t = \norm{w}-\norm{w'}$, we can then calculate | |
| 108 | /// $$ | |
| 109 | /// \norm{x'} - t = (1+β m)\norm{w}_1, | |
| 110 | /// $$ | |
| 111 | /// from where we can calculate the soft-thresholding parameter $λ=β\norm{w}_1$. | |
| 112 | /// Since we do not actually know the unlocked elements, but just loop over all the possibilities | |
| 113 | /// for them, we have to check that $λ$ is above the current lower bound for this parameter | |
| 114 | /// (`shift` in the code), and below a value that would cause changes in the locked set | |
| 115 | /// (`max_shift` in the code). | |
| 116 | #[replace_float_literals(F::cast_from(literal))] | |
| 117 | pub(super) fn l1squared_nonneg_prox<F :Float + nalgebra::RealField>( | |
| 118 | sorted : &mut Vec<(F, F, F, Option<(F, F)>)>, | |
| 119 | x : &mut DVector<F>, | |
| 120 | y : &DVector<F>, | |
| 121 | β : F | |
| 122 | ) { | |
| 123 | // nalgebra double-definition bullshit workaround | |
| 124 | //let max = alg_tools::NumTraitsFloat::max; | |
| 125 | let abs = alg_tools::NumTraitsFloat::abs; | |
| 126 | ||
| 127 | *x -= y; | |
| 128 | ||
| 129 | for (az_x_i, &x_i, &y_i) in izip!(sorted.iter_mut(), x.iter(), y.iter()) { | |
| 130 | // The first component of each az_x_i contains the distance of x_i to the | |
| 131 | // soft-thresholding limit. If it is negative, it is always reached. | |
| 132 | // The second component contains the absolute value of the result for that component | |
| 133 | // w_i of the solution, if the soft-thresholding limit is reached. | |
| 134 | // This is stored here due to the sorting, although could otherwise be computed directly. | |
| 135 | // Likewise the third component contains the absolute value of x_i. | |
| 136 | // The fourth component contains an initial lower bound. | |
| 137 | let a_i = abs(x_i); | |
| 138 | let b = -y_i; | |
| 139 | *az_x_i = match (b >= 0.0, x_i >= b) { | |
| 140 | (true, false) => (x_i-b, b, a_i, None), // w_i=b, so sorting element negative! | |
| 141 | (true, true) => (x_i-b, b, a_i, None), // soft-to-b from above | |
| 142 | (false, true) => (a_i, 0.0, a_i, None), // soft-to-0 | |
| 143 | (false, false) => (a_i, 0.0, a_i, Some((b, b-x_i))), // soft-to-0 with initial limit | |
| 144 | }; | |
| 145 | } | |
| 146 | sorted.as_mut_slice() | |
| 147 | .sort_unstable_by(|(a, _, _, _), (b, _, _, _)| NaNLeast(*a).cmp(&NaNLeast(*b))); | |
| 148 | ||
| 149 | let mut nwlow = 0.0; | |
| 150 | let mut shift = 0.0; | |
| 151 | // This main loop is over different combinations of elements of the solution locked | |
| 152 | // to the soft-thresholding lower bound (`0` or `-y_i`), in the sorted order of locking. | |
| 153 | for (i, az_x_i) in izip!(0.., sorted.iter()) { | |
| 154 | // This 'attempt is over different combinations of elements locked to the | |
| 155 | // lower bound (`-y_i ≤ 0`). It calculates `max_shift` as the maximum shift that | |
| 156 | // can be done until the locking would change (or become non-strictly-complementary). | |
| 157 | // If the main rule (*) gives an estimate of `λ` that stays below `max_shift`, it is | |
| 158 | // accepted. Otherwise `shift` is updated to `max_shift`, and we attempt again, | |
| 159 | // with the non-locking set that participates in the calculation of `λ` then including | |
| 160 | // the elements that are no longer locked to the lower bound. | |
| 161 | 'attempt: loop { | |
| 162 | let mut nwthis = 0.0; // contribution to ‖w‖ from elements with locking | |
| 163 | //soft-thresholding parameter = `shift` | |
| 164 | let mut nxmore = 0.0; // ‖x'‖ for those elements through to not be locked to | |
| 165 | // neither the soft-thresholding limit, nor the lower bound | |
| 166 | let mut nwlbd = 0.0; // contribution to ‖w‖ from those elements locked to their | |
| 167 | // lower bound | |
| 168 | let mut m = 0; | |
| 169 | let mut max_shift = F::INFINITY; // maximal shift for which our estimate of the set of | |
| 170 | // unlocked elements is valid. | |
| 171 | let mut max_shift_from_lbd = false; // Whether max_shift comes from the next element | |
| 172 | // or from a lower bound being reached. | |
| 173 | for az_x_j in sorted[i as usize..].iter() { | |
| 174 | if az_x_j.0 <= shift { | |
| 175 | nwthis += az_x_j.1; | |
| 176 | } else { | |
| 177 | match az_x_j.3 { | |
| 178 | Some((l, s)) if shift < s => { | |
| 179 | if max_shift > s { | |
| 180 | max_shift_from_lbd = true; | |
| 181 | max_shift = s; | |
| 182 | } | |
| 183 | nwlbd += -l; | |
| 184 | }, | |
| 185 | _ => { | |
| 186 | nxmore += az_x_j.2; | |
| 187 | if m == 0 && max_shift > az_x_j.0 { | |
| 188 | max_shift = az_x_j.0; | |
| 189 | max_shift_from_lbd = false; | |
| 190 | } | |
| 191 | m += 1; | |
| 192 | } | |
| 193 | } | |
| 194 | } | |
| 195 | } | |
| 196 | ||
| 197 | // We need ‖x'‖ = ‖w'‖ + β m ‖w‖, i.e. ‖x'‖ - (‖w‖-‖w'‖)= (1 + β m)‖w‖. | |
| 198 | let tmp = β*(nxmore - (nwlow + nwthis + nwlbd))/(1.0 + β*F::cast_from(m)); | |
| 199 | if tmp > max_shift { | |
| 200 | if max_shift_from_lbd { | |
| 201 | shift = max_shift; | |
| 202 | continue 'attempt; | |
| 203 | } else { | |
| 204 | break 'attempt | |
| 205 | } | |
| 206 | } else if tmp < shift { | |
| 207 | // TODO: this should probably be an assert!(false) | |
| 208 | break 'attempt; | |
| 209 | } else { | |
| 210 | // success | |
| 211 | x.zip_apply(y, |x_i, y_i| *x_i = y_i + lbd_soft_thresholding(*x_i, tmp, -y_i)); | |
| 212 | return | |
| 213 | } | |
| 214 | } | |
| 215 | shift = az_x_i.0; | |
| 216 | nwlow += az_x_i.1; | |
| 217 | } | |
| 218 | // TODO: this fallback has not been verified to be correct | |
| 219 | x.zip_apply(y, |x_i, y_i| *x_i = y_i + lbd_soft_thresholding(*x_i, shift, -y_i)); | |
| 220 | } | |
| 221 | ||
| 222 | /// Proximal point method implementation of [`l1squared_nonneg`]. | |
| 223 | /// For detailed documentation of the inputs and outputs, refer to there. | |
| 224 | /// | |
| 225 | /// The `λ` component of the model is handled in the proximal step instead of the gradient step | |
| 226 | /// for potential performance improvements. | |
| 227 | #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] | |
| 228 | pub fn l1squared_nonneg_pp<F, I>( | |
| 229 | y : &DVector<F::MixedType>, | |
| 230 | g : &DVector<F::MixedType>, | |
| 231 | λ_ : F, | |
| 232 | β_ : F, | |
| 233 | x : &mut DVector<F::MixedType>, | |
| 234 | τ_ : F, | |
| 235 | θ_ : F, | |
| 236 | iterator : I | |
| 237 | ) -> usize | |
| 238 | where F : Float + ToNalgebraRealField, | |
| 239 | I : AlgIteratorFactory<F> | |
| 240 | { | |
| 241 | let λ = λ_.to_nalgebra_mixed(); | |
| 242 | let β = β_.to_nalgebra_mixed(); | |
| 243 | let mut τ = τ_.to_nalgebra_mixed(); | |
| 244 | let θ = θ_.to_nalgebra_mixed(); | |
| 245 | let mut tmp = std::iter::repeat((0.0, 0.0, 0.0, None)).take(x.len()).collect(); | |
| 246 | let mut iters = 0; | |
| 247 | ||
| 248 | iterator.iterate(|state| { | |
| 249 | // Primal step: x^{k+1} = prox_{(τβ/2)|.-y|_1^2+δ_{≥0}+}(x^k - τ(λ𝟙^⊤-g)) | |
| 250 | x.apply(|x_i| *x_i -= τ*λ); | |
| 251 | x.axpy(τ, g, 1.0); | |
| 252 | l1squared_nonneg_prox(&mut tmp, x, y, τ*β); | |
| 253 | ||
| 254 | iters += 1; | |
| 255 | // This gives O(1/N^2) rates due to monotonicity of function values. | |
| 256 | // Higher acceleration does not seem to be numerically stable. | |
| 257 | τ += θ; | |
| 258 | ||
| 259 | // This gives O(1/N^3) rates due to monotonicity of function values. | |
| 260 | // Higher acceleration does not seem to be numerically stable. | |
| 261 | //τ + = F::cast_from(iters).to_nalgebra_mixed()*θ; | |
| 262 | ||
| 263 | state.if_verbose(|| { | |
| 264 | F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)) | |
| 265 | }) | |
| 266 | }); | |
| 267 | ||
| 268 | iters | |
| 269 | } | |
| 270 | ||
| 271 | /// PDPS implementation of [`l1squared_nonneg`]. | |
| 272 | /// For detailed documentation of the inputs and outputs, refer to there. | |
| 273 | /// | |
| 274 | /// The `λ` component of the model is handled in the proximal step instead of the gradient step | |
| 275 | /// for potential performance improvements. | |
| 276 | /// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model. | |
| 277 | #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] | |
| 278 | pub fn l1squared_nonneg_pdps<F, I>( | |
| 279 | y : &DVector<F::MixedType>, | |
| 280 | g : &DVector<F::MixedType>, | |
| 281 | λ_ : F, | |
| 282 | β_ : F, | |
| 283 | x : &mut DVector<F::MixedType>, | |
| 284 | τ_ : F, | |
| 285 | σ_ : F, | |
| 286 | θ_ : F, | |
| 287 | iterator : I | |
| 288 | ) -> usize | |
| 289 | where F : Float + ToNalgebraRealField, | |
| 290 | I : AlgIteratorFactory<F> | |
| 291 | { | |
| 292 | let λ = λ_.to_nalgebra_mixed(); | |
| 293 | let β = β_.to_nalgebra_mixed(); | |
| 294 | let τ = τ_.to_nalgebra_mixed(); | |
| 295 | let σ = σ_.to_nalgebra_mixed(); | |
| 296 | let θ = θ_.to_nalgebra_mixed(); | |
| 297 | let mut w = DVector::zeros(x.len()); | |
| 298 | let mut tmp = DVector::zeros(x.len()); | |
| 299 | let mut xprev = x.clone(); | |
| 300 | let mut iters = 0; | |
| 301 | ||
| 302 | iterator.iterate(|state| { | |
| 303 | // Primal step: x^{k+1} = prox_{(τβ/2)|.-y|_1^2}(x^k - τ (w^k - g)) | |
| 304 | x.axpy(-τ*θ, &w, 1.0); | |
| 305 | x.axpy(τ, g, 1.0); | |
| 306 | l1squared_prox(&mut tmp, x, y, τ*β); | |
| 307 | ||
| 308 | // Dual step: w^{k+1} = proj_{[-∞,λ]}(w^k + σ(2x^{k+1}-x^k)) | |
| 309 | w.axpy(2.0*σ*θ, x, 1.0); | |
| 310 | w.axpy(-σ*θ, &xprev, 1.0); | |
| 311 | w.apply(|w_i| *w_i = w_i.min(λ)); | |
| 312 | xprev.copy_from(x); | |
| 313 | ||
| 314 | iters +=1; | |
| 315 | ||
| 316 | state.if_verbose(|| { | |
| 317 | F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)) | |
| 318 | }) | |
| 319 | }); | |
| 320 | ||
| 321 | iters | |
| 322 | } | |
| 323 | ||
| 324 | /// Alternative PDPS implementation of [`l1squared_nonneg`]. | |
| 325 | /// For detailed documentation of the inputs and outputs, refer to there. | |
| 326 | /// | |
|
42
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
327 | /// By not dualising the 1-norm, this should produce more sparse solutions than |
|
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
328 | /// [`l1squared_nonneg_pdps`]. |
|
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
329 | /// |
| 34 | 330 | /// The `λ` component of the model is handled in the proximal step instead of the gradient step |
| 331 | /// for potential performance improvements. | |
| 332 | /// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model. | |
|
42
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
333 | /// We rewrite |
|
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
334 | /// <div>$$ |
|
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
335 | /// \begin{split} |
|
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
336 | /// & \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ + δ_{≥ 0}(x) \\ |
|
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
337 | /// & = \min_{x ∈ ℝ^n} \max_{w} ⟨θ w, x⟩ - g^⊤ x + λ\|x\|₁ + δ_{≥ 0}(x) |
|
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
338 | /// - \left(x ↦ \frac{β}{2θ} |x-y|_1^2 \right)^*(w). |
|
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
339 | /// \end{split} |
|
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
340 | /// $$</div> |
| 34 | 341 | #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] |
| 342 | pub fn l1squared_nonneg_pdps_alt<F, I>( | |
| 343 | y : &DVector<F::MixedType>, | |
| 344 | g : &DVector<F::MixedType>, | |
| 345 | λ_ : F, | |
| 346 | β_ : F, | |
| 347 | x : &mut DVector<F::MixedType>, | |
| 348 | τ_ : F, | |
| 349 | σ_ : F, | |
| 350 | θ_ : F, | |
| 351 | iterator : I | |
| 352 | ) -> usize | |
| 353 | where F : Float + ToNalgebraRealField, | |
| 354 | I : AlgIteratorFactory<F> | |
| 355 | { | |
| 356 | let λ = λ_.to_nalgebra_mixed(); | |
| 357 | let τ = τ_.to_nalgebra_mixed(); | |
| 358 | let σ = σ_.to_nalgebra_mixed(); | |
| 359 | let θ = θ_.to_nalgebra_mixed(); | |
| 360 | let β = β_.to_nalgebra_mixed(); | |
| 361 | let σθ = σ*θ; | |
| 362 | let τθ = τ*θ; | |
| 363 | let mut w = DVector::zeros(x.len()); | |
| 364 | let mut tmp = DVector::zeros(x.len()); | |
| 365 | let mut xprev = x.clone(); | |
| 366 | let mut iters = 0; | |
| 367 | ||
| 368 | iterator.iterate(|state| { | |
| 369 | // Primal step: x^{k+1} = nonnegsoft_τλ(x^k - τ(θ w^k -g)) | |
| 370 | x.axpy(-τθ, &w, 1.0); | |
| 371 | x.axpy(τ, g, 1.0); | |
| 372 | x.apply(|x_i| *x_i = nonneg_soft_thresholding(*x_i, τ*λ)); | |
| 373 | ||
|
42
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
374 | // Dual step: with g(x) = (β/(2θ))‖x-y‖₁² and q = w^k + σ(2x^{k+1}-x^k), |
|
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
375 | // we compute w^{k+1} = prox_{σg^*}(q) for |
|
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
376 | // = q - σ prox_{g/σ}(q/σ) |
|
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
377 | // = q - σ prox_{(β/(2θσ))‖.-y‖₁²}(q/σ) |
|
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
378 | // = σ(q/σ - prox_{(β/(2θσ))‖.-y‖₁²}(q/σ)) |
|
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
379 | // where q/σ = w^k/σ + (2x^{k+1}-x^k), |
|
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
380 | w /= σ; |
| 34 | 381 | w.axpy(2.0, x, 1.0); |
| 382 | w.axpy(-1.0, &xprev, 1.0); | |
| 383 | xprev.copy_from(&w); // use xprev as temporary variable | |
|
42
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
384 | l1squared_prox(&mut tmp, &mut xprev, y, β/σθ); |
| 34 | 385 | w -= &xprev; |
|
42
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
386 | w *= σ; |
| 34 | 387 | xprev.copy_from(x); |
| 388 | ||
|
42
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
389 | iters += 1; |
| 34 | 390 | |
| 391 | state.if_verbose(|| { | |
| 392 | F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)) | |
| 393 | }) | |
| 394 | }); | |
| 395 | ||
| 396 | iters | |
| 397 | } | |
| 398 | ||
| 399 | ||
| 400 | /// This function applies an iterative method for the solution of the problem | |
| 401 | /// <div>$$ | |
| 402 | /// \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ + δ_{≥ 0}(x). | |
| 403 | /// $$</div> | |
| 404 | /// | |
| 405 | /// This function returns the number of iterations taken. | |
| 406 | #[replace_float_literals(F::cast_from(literal))] | |
| 407 | pub fn l1squared_nonneg<F, I>( | |
| 408 | y : &DVector<F::MixedType>, | |
| 409 | g : &DVector<F::MixedType>, | |
| 410 | λ : F, | |
| 411 | β : F, | |
| 412 | x : &mut DVector<F::MixedType>, | |
| 413 | inner : &InnerSettings<F>, | |
| 414 | iterator : I | |
| 415 | ) -> usize | |
| 416 | where F : Float + ToNalgebraRealField, | |
| 417 | I : AlgIteratorFactory<F> | |
| 418 | { | |
| 419 | match inner.method { | |
|
39
6316d68b58af
Merging adjustments, parameter tuning, etc.
Tuomo Valkonen <tuomov@iki.fi>
parents:
34
diff
changeset
|
420 | InnerMethod::PDPS => { |
|
42
6a7365d73e4c
Fixes to Radon norm prox term inner algorithm
Tuomo Valkonen <tuomov@iki.fi>
parents:
39
diff
changeset
|
421 | let inner_θ = 1.0; |
| 34 | 422 | // Estimate of ‖K‖ for K=θ\Id. |
| 423 | let normest = inner_θ; | |
| 424 | let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest); | |
| 425 | l1squared_nonneg_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator) | |
| 426 | }, | |
|
39
6316d68b58af
Merging adjustments, parameter tuning, etc.
Tuomo Valkonen <tuomov@iki.fi>
parents:
34
diff
changeset
|
427 | InnerMethod::FB => { |
| 34 | 428 | // The Lipschitz factor of ∇[x ↦ g^⊤ x + λ∑x]=g - λ𝟙 is FB is just a proximal point |
| 429 | // method with on constraints on τ. We “accelerate” it by adding to τ the constant θ | |
| 430 | // on each iteration. Exponential growth does not seem stable. | |
| 431 | let inner_τ = inner.fb_τ0; | |
| 432 | let inner_θ = inner_τ; | |
| 433 | l1squared_nonneg_pp(y, g, λ, β, x, inner_τ, inner_θ, iterator) | |
| 434 | }, | |
|
39
6316d68b58af
Merging adjustments, parameter tuning, etc.
Tuomo Valkonen <tuomov@iki.fi>
parents:
34
diff
changeset
|
435 | other => unimplemented!("${other:?} is unimplemented"), |
| 34 | 436 | } |
| 437 | } |