diff -r 4f468d35fa29 -r 7a8a55fd41c0 src/subproblem/l1squared_nonneg.rs --- a/src/subproblem/l1squared_nonneg.rs Thu Feb 26 11:38:43 2026 -0500 +++ b/src/subproblem/l1squared_nonneg.rs Thu Feb 26 11:36:22 2026 -0500 @@ -2,221 +2,214 @@ Iterative algorithms for solving the finite-dimensional subproblem with constraint. */ +use itertools::izip; use nalgebra::DVector; use numeric_literals::replace_float_literals; -use itertools::izip; //use std::iter::zip; use std::cmp::Ordering::*; -use alg_tools::iterate::{ - AlgIteratorFactory, - AlgIteratorState, -}; +use alg_tools::iterate::{AlgIteratorFactory, AlgIteratorState}; use alg_tools::nalgebra_support::ToNalgebraRealField; use alg_tools::norms::{Dist, L1}; -use alg_tools::nanleast::NaNLeast; +use super::l1squared_unconstrained::l1squared_prox; +use super::nonneg::nonneg_soft_thresholding; +use super::{InnerMethod, InnerSettings}; use crate::types::*; -use super::{ - InnerMethod, - InnerSettings -}; -use super::nonneg::nonneg_soft_thresholding; -use super::l1squared_unconstrained::l1squared_prox; /// Return maximum of `dist` and distnce of inteval `[lb, ub]` to zero. #[replace_float_literals(F::cast_from(literal))] -pub(super) fn max_interval_dist_to_zero(dist : F, lb : F, ub : F) -> F { +pub(super) fn max_interval_dist_to_zero(dist: F, lb: F, ub: F) -> F { if lb < 0.0 { if ub > 0.0 { dist } else { dist.max(-ub) } - } else /* ub ≥ 0.0*/ { + } else + /* ub ≥ 0.0*/ + { dist.max(lb) } } -/// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁ +δ_{≥}(x)$ at $x$. +/// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁ +δ_{≥0}(x)$ at $x$. /// /// `v` will be modified and cannot be trusted to contain useful values afterwards. #[replace_float_literals(F::cast_from(literal))] -fn min_subdifferential( - y : &DVector, - x : &DVector, - g : &DVector, - λ : F, - β : F +fn min_subdifferential( + y: &DVector, + x: &DVector, + g: &DVector, + λ: F, + β: F, ) -> F { let mut val = 0.0; - let tmp = β*y.dist(x, L1); + let tmp = β * y.dist(x, L1); for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) { let (mut lb, mut ub) = (-g_i, -g_i); match x_i.partial_cmp(y_i) { - Some(Greater) => { lb += tmp; ub += tmp }, - Some(Less) => { lb -= tmp; ub -= tmp }, - Some(Equal) => { lb -= tmp; ub += tmp }, - None => {}, + Some(Greater) => { + lb += tmp; + ub += tmp + } + Some(Less) => { + lb -= tmp; + ub -= tmp + } + Some(Equal) => { + lb -= tmp; + ub += tmp + } + None => {} } match x_i.partial_cmp(&0.0) { - Some(Greater) => { lb += λ; ub += λ }, + Some(Greater) => { + lb += λ; + ub += λ + } // Less should not happen - Some(Less|Equal) => { lb = F::NEG_INFINITY; ub += λ }, - None => {}, + Some(Less | Equal) => { + lb = F::NEG_INFINITY; + ub += λ + } + None => {} }; val = max_interval_dist_to_zero(val, lb, ub); } val } +// #[replace_float_literals(F::cast_from(literal))] +// fn lbd_soft_thresholding(v: F, λ: F, b: F) -> F { +// match (b >= 0.0, v >= b) { +// (true, false) => b, +// (true, true) => b.max(v - λ), // soft-to-b from above +// (false, true) => super::unconstrained::soft_thresholding(v, λ), +// (false, false) => 0.0.min(b.max(v + λ)), // soft-to-0 with lower bound +// } +// } + +/// Calculates $\prox\_f(x)$ for $f(x)=λ\abs{x-y} + δ\_{≥0}(x)$. +/// This is the first output. The second output is the first output $-y$, i..e, the prox +/// of $f\_0$ for the shift function $f\_0(z) = f(z+y) = λ\abs{z} + δ\_{≥-y}(z)$ +/// satisfying +/// $$ +/// \prox_f(x) = \prox\_{f\_0}(x - y) + y, +/// $$ +/// which is also used internally. +/// The third output indicates whether the output is “locked” to either $0$ (resp. $-y$ in +/// the reformulation), or $y$ ($0$ in the reformulation). +/// The fourth output indicates the next highest $λ$ where such locking would happen. #[replace_float_literals(F::cast_from(literal))] -fn lbd_soft_thresholding(v : F, λ : F, b : F) -> F -{ - match (b >= 0.0, v >= b) { - (true, false) => b, - (true, true) => b.max(v - λ), // soft-to-b from above - (false, true) => super::unconstrained::soft_thresholding(v, λ), - (false, false) => 0.0.min(b.max(v - λ)), // soft-to-0 with lower bound +#[inline] +fn shifted_nonneg_soft_thresholding(x: F, y: F, λ: F) -> (F, F, bool, Option) { + let z = x - y; // The shift to f_0 + if -y >= 0.0 { + if x > λ { + (x - λ, z - λ, false, Some(x)) + } else { + (0.0, -y, true, None) + } + } else if z < 0.0 { + if λ < -x { + (0.0, -y, true, Some(-x)) + } else if z + λ < 0.0 { + (x + λ, z + λ, false, Some(-z)) + } else { + (y, 0.0, true, None) + } + } else if z > λ { + (x - λ, z - λ, false, Some(z)) + } else { + (y, 0.0, true, None) } } -/// Calculate $prox_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}_1^2 + δ_{≥0}(x)$. +/// Calculate $\prox\_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}\_1\^2 + δ\_{≥0}(x)$. /// /// To derive an algorithm for this, we can use -/// $prox_f(x) = prox_{f_0}(x - y) - y$ for -/// $f_0(z)=\frac{β}{2}\norm{z}_1^2 + δ_{≥-y}(z)$. -/// Now, the optimality conditions for $w = prox_{f_0}(x)$ are +/// $$ +/// \prox_f(x) = \prox\_{f\_0}(x - y) + y +/// \quad\text{for}\quad +/// f\_0(z)=\frac{β}{2}\norm{z}\_1\^2 + δ\_{≥-y}(z). +/// $$ +/// Now, the optimality conditions for $w = \prox\_{f\_0}(x)$ are /// $$\tag{*} -/// x ∈ w + β\norm{w}_1\sign w + N_{≥ -y}(w). +/// x ∈ w + β\norm{w}\_1\sign w + N\_{≥ -y}(w). /// $$ -/// If we know $\norm{w}_1$, then this is easily solved by lower-bounded soft-thresholding. +/// If we know $\norm{w}\_1$, then this is easily solved by lower-bounded soft-thresholding. /// We find this by sorting the elements by the distance to the 'locked' lower-bounded /// soft-thresholding target ($0$ or $-y_i$). -/// Then we loop over this sorted vector, increasing our estimate of $\norm{w}_1$ as we decide -/// that the soft-thresholding parameter `β\norm{w}_1` has to be such that the passed elements +/// Then we loop over this sorted vector, increasing our estimate of $\norm{w}\_1$ as we decide +/// that the soft-thresholding parameter $β\norm{w}\_1$ has to be such that the passed elements /// will reach their locked value (after which they cannot change anymore, for a larger /// soft-thresholding parameter. This has to be slightly more fine-grained for account -/// for the case that $-y_i<0$ and $x_i < -y_i$. +/// for the case that $-y\_i<0$ and $x\_i < -y\_i$. /// -/// Indeed, we denote by x' and w' the subset of elements such that w_i ≠ 0 and w_i > -y_i, -/// we can calculate by applying $⟨\cdot, \sign w'⟩$ to the corresponding lines of (*) that +/// Indeed, denoting by $x'$ and $w'$ the subset of elements such that $w\_i ≠ 0$ and +/// $w\_i > -y\_i$, we can calculate by applying $⟨\cdot, \sign w'⟩$ to the corresponding +/// lines of (*) that /// $$ -/// \norm{x'} = \norm{w'} + β \norm{w}_1 m. -/// $$ -/// Having a value for $t = \norm{w}-\norm{w'}$, we can then calculate +/// \norm{x'} = \norm{w'} + β \norm{w}\_1 m, /// $$ -/// \norm{x'} - t = (1+β m)\norm{w}_1, +/// where $m$ is the number of unlocked components. +/// We can now calculate the mass $t = \norm{w}-\norm{w'}$ of locked components, and so obtain /// $$ -/// from where we can calculate the soft-thresholding parameter $λ=β\norm{w}_1$. +/// \norm{x'} + t = (1+β m)\norm{w}\_1, +/// $$ +/// from where we can calculate the soft-thresholding parameter $λ=β\norm{w}\_1$. /// Since we do not actually know the unlocked elements, but just loop over all the possibilities /// for them, we have to check that $λ$ is above the current lower bound for this parameter /// (`shift` in the code), and below a value that would cause changes in the locked set /// (`max_shift` in the code). #[replace_float_literals(F::cast_from(literal))] -pub(super) fn l1squared_nonneg_prox( - sorted : &mut Vec<(F, F, F, Option<(F, F)>)>, - x : &mut DVector, - y : &DVector, - β : F +pub fn l1squared_nonneg_prox( + x: &mut DVector, + y: &DVector, + β: F, ) { // nalgebra double-definition bullshit workaround - //let max = alg_tools::NumTraitsFloat::max; let abs = alg_tools::NumTraitsFloat::abs; - - *x -= y; - - for (az_x_i, &x_i, &y_i) in izip!(sorted.iter_mut(), x.iter(), y.iter()) { - // The first component of each az_x_i contains the distance of x_i to the - // soft-thresholding limit. If it is negative, it is always reached. - // The second component contains the absolute value of the result for that component - // w_i of the solution, if the soft-thresholding limit is reached. - // This is stored here due to the sorting, although could otherwise be computed directly. - // Likewise the third component contains the absolute value of x_i. - // The fourth component contains an initial lower bound. - let a_i = abs(x_i); - let b = -y_i; - *az_x_i = match (b >= 0.0, x_i >= b) { - (true, false) => (x_i-b, b, a_i, None), // w_i=b, so sorting element negative! - (true, true) => (x_i-b, b, a_i, None), // soft-to-b from above - (false, true) => (a_i, 0.0, a_i, None), // soft-to-0 - (false, false) => (a_i, 0.0, a_i, Some((b, b-x_i))), // soft-to-0 with initial limit - }; - } - sorted.as_mut_slice() - .sort_unstable_by(|(a, _, _, _), (b, _, _, _)| NaNLeast(*a).cmp(&NaNLeast(*b))); + let min = alg_tools::NumTraitsFloat::min; - let mut nwlow = 0.0; - let mut shift = 0.0; - // This main loop is over different combinations of elements of the solution locked - // to the soft-thresholding lower bound (`0` or `-y_i`), in the sorted order of locking. - for (i, az_x_i) in izip!(0.., sorted.iter()) { - // This 'attempt is over different combinations of elements locked to the - // lower bound (`-y_i ≤ 0`). It calculates `max_shift` as the maximum shift that - // can be done until the locking would change (or become non-strictly-complementary). - // If the main rule (*) gives an estimate of `λ` that stays below `max_shift`, it is - // accepted. Otherwise `shift` is updated to `max_shift`, and we attempt again, - // with the non-locking set that participates in the calculation of `λ` then including - // the elements that are no longer locked to the lower bound. - 'attempt: loop { - let mut nwthis = 0.0; // contribution to ‖w‖ from elements with locking - //soft-thresholding parameter = `shift` - let mut nxmore = 0.0; // ‖x'‖ for those elements through to not be locked to - // neither the soft-thresholding limit, nor the lower bound - let mut nwlbd = 0.0; // contribution to ‖w‖ from those elements locked to their - // lower bound - let mut m = 0; - let mut max_shift = F::INFINITY; // maximal shift for which our estimate of the set of - // unlocked elements is valid. - let mut max_shift_from_lbd = false; // Whether max_shift comes from the next element - // or from a lower bound being reached. - for az_x_j in sorted[i as usize..].iter() { - if az_x_j.0 <= shift { - nwthis += az_x_j.1; - } else { - match az_x_j.3 { - Some((l, s)) if shift < s => { - if max_shift > s { - max_shift_from_lbd = true; - max_shift = s; - } - nwlbd += -l; - }, - _ => { - nxmore += az_x_j.2; - if m == 0 && max_shift > az_x_j.0 { - max_shift = az_x_j.0; - max_shift_from_lbd = false; - } - m += 1; - } - } - } + let mut λ = 0.0; + loop { + let mut w_locked = 0.0; + let mut n_unlocked = 0; // m + let mut x_prime = 0.0; + let mut max_shift = F::INFINITY; + for (&x_i, &y_i) in izip!(x.iter(), y.iter()) { + let (_, a_shifted, locked, next_lock) = shifted_nonneg_soft_thresholding(x_i, y_i, λ); + + if let Some(t) = next_lock { + max_shift = min(max_shift, t); + assert!(max_shift > λ); } - - // We need ‖x'‖ = ‖w'‖ + β m ‖w‖, i.e. ‖x'‖ - (‖w‖-‖w'‖)= (1 + β m)‖w‖. - let tmp = β*(nxmore - (nwlow + nwthis + nwlbd))/(1.0 + β*F::cast_from(m)); - if tmp > max_shift { - if max_shift_from_lbd { - shift = max_shift; - continue 'attempt; - } else { - break 'attempt - } - } else if tmp < shift { - // TODO: this should probably be an assert!(false) - break 'attempt; + if locked { + w_locked += abs(a_shifted); } else { - // success - x.zip_apply(y, |x_i, y_i| *x_i = y_i + lbd_soft_thresholding(*x_i, tmp, -y_i)); - return + n_unlocked += 1; + x_prime += abs(x_i - y_i); } } - shift = az_x_i.0; - nwlow += az_x_i.1; + + // We need ‖x'‖ = ‖w'‖ + β m ‖w‖, i.e. ‖x'‖ + (‖w‖-‖w'‖)= (1 + β m)‖w‖. + let λ_new = (x_prime + w_locked) / (1.0 / β + F::cast_from(n_unlocked)); + + if λ_new > max_shift { + λ = max_shift; + } else { + assert!(λ_new >= λ); + // success + x.zip_apply(y, |x_i, y_i| { + let (a, _, _, _) = shifted_nonneg_soft_thresholding(*x_i, y_i, λ_new); + //*x_i = y_i + lbd_soft_thresholding(*x_i, λ_new, -y_i) + *x_i = a; + }); + return; + } } - // TODO: this fallback has not been verified to be correct - x.zip_apply(y, |x_i, y_i| *x_i = y_i + lbd_soft_thresholding(*x_i, shift, -y_i)); } /// Proximal point method implementation of [`l1squared_nonneg`]. @@ -226,31 +219,31 @@ /// for potential performance improvements. #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] pub fn l1squared_nonneg_pp( - y : &DVector, - g : &DVector, - λ_ : F, - β_ : F, - x : &mut DVector, - τ_ : F, - θ_ : F, - iterator : I + y: &DVector, + g: &DVector, + λ_: F, + β_: F, + x: &mut DVector, + τ_: F, + θ_: F, + iterator: I, ) -> usize -where F : Float + ToNalgebraRealField, - I : AlgIteratorFactory +where + F: Float + ToNalgebraRealField, + I: AlgIteratorFactory, { let λ = λ_.to_nalgebra_mixed(); let β = β_.to_nalgebra_mixed(); let mut τ = τ_.to_nalgebra_mixed(); let θ = θ_.to_nalgebra_mixed(); - let mut tmp = std::iter::repeat((0.0, 0.0, 0.0, None)).take(x.len()).collect(); let mut iters = 0; iterator.iterate(|state| { // Primal step: x^{k+1} = prox_{(τβ/2)|.-y|_1^2+δ_{≥0}+}(x^k - τ(λ𝟙^⊤-g)) - x.apply(|x_i| *x_i -= τ*λ); + x.apply(|x_i| *x_i -= τ * λ); x.axpy(τ, g, 1.0); - l1squared_nonneg_prox(&mut tmp, x, y, τ*β); - + l1squared_nonneg_prox(x, y, τ * β); + iters += 1; // This gives O(1/N^2) rates due to monotonicity of function values. // Higher acceleration does not seem to be numerically stable. @@ -260,9 +253,7 @@ // Higher acceleration does not seem to be numerically stable. //τ + = F::cast_from(iters).to_nalgebra_mixed()*θ; - state.if_verbose(|| { - F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)) - }) + state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))) }); iters @@ -276,18 +267,19 @@ /// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model. #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] pub fn l1squared_nonneg_pdps( - y : &DVector, - g : &DVector, - λ_ : F, - β_ : F, - x : &mut DVector, - τ_ : F, - σ_ : F, - θ_ : F, - iterator : I + y: &DVector, + g: &DVector, + λ_: F, + β_: F, + x: &mut DVector, + τ_: F, + σ_: F, + θ_: F, + iterator: I, ) -> usize -where F : Float + ToNalgebraRealField, - I : AlgIteratorFactory +where + F: Float + ToNalgebraRealField, + I: AlgIteratorFactory, { let λ = λ_.to_nalgebra_mixed(); let β = β_.to_nalgebra_mixed(); @@ -301,21 +293,19 @@ iterator.iterate(|state| { // Primal step: x^{k+1} = prox_{(τβ/2)|.-y|_1^2}(x^k - τ (w^k - g)) - x.axpy(-τ*θ, &w, 1.0); + x.axpy(-τ * θ, &w, 1.0); x.axpy(τ, g, 1.0); - l1squared_prox(&mut tmp, x, y, τ*β); - + l1squared_prox(&mut tmp, x, y, τ * β); + // Dual step: w^{k+1} = proj_{[-∞,λ]}(w^k + σ(2x^{k+1}-x^k)) - w.axpy(2.0*σ*θ, x, 1.0); - w.axpy(-σ*θ, &xprev, 1.0); + w.axpy(2.0 * σ * θ, x, 1.0); + w.axpy(-σ * θ, &xprev, 1.0); w.apply(|w_i| *w_i = w_i.min(λ)); xprev.copy_from(x); - - iters +=1; - state.if_verbose(|| { - F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)) - }) + iters += 1; + + state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))) }); iters @@ -340,26 +330,27 @@ /// $$ #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] pub fn l1squared_nonneg_pdps_alt( - y : &DVector, - g : &DVector, - λ_ : F, - β_ : F, - x : &mut DVector, - τ_ : F, - σ_ : F, - θ_ : F, - iterator : I + y: &DVector, + g: &DVector, + λ_: F, + β_: F, + x: &mut DVector, + τ_: F, + σ_: F, + θ_: F, + iterator: I, ) -> usize -where F : Float + ToNalgebraRealField, - I : AlgIteratorFactory +where + F: Float + ToNalgebraRealField, + I: AlgIteratorFactory, { let λ = λ_.to_nalgebra_mixed(); let τ = τ_.to_nalgebra_mixed(); let σ = σ_.to_nalgebra_mixed(); let θ = θ_.to_nalgebra_mixed(); let β = β_.to_nalgebra_mixed(); - let σθ = σ*θ; - let τθ = τ*θ; + let σθ = σ * θ; + let τθ = τ * θ; let mut w = DVector::zeros(x.len()); let mut tmp = DVector::zeros(x.len()); let mut xprev = x.clone(); @@ -369,8 +360,8 @@ // Primal step: x^{k+1} = nonnegsoft_τλ(x^k - τ(θ w^k -g)) x.axpy(-τθ, &w, 1.0); x.axpy(τ, g, 1.0); - x.apply(|x_i| *x_i = nonneg_soft_thresholding(*x_i, τ*λ)); - + x.apply(|x_i| *x_i = nonneg_soft_thresholding(*x_i, τ * λ)); + // Dual step: with g(x) = (β/(2θ))‖x-y‖₁² and q = w^k + σ(2x^{k+1}-x^k), // we compute w^{k+1} = prox_{σg^*}(q) for // = q - σ prox_{g/σ}(q/σ) @@ -381,22 +372,19 @@ w.axpy(2.0, x, 1.0); w.axpy(-1.0, &xprev, 1.0); xprev.copy_from(&w); // use xprev as temporary variable - l1squared_prox(&mut tmp, &mut xprev, y, β/σθ); + l1squared_prox(&mut tmp, &mut xprev, y, β / σθ); w -= &xprev; w *= σ; xprev.copy_from(x); - + iters += 1; - state.if_verbose(|| { - F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)) - }) + state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))) }); iters } - /// This function applies an iterative method for the solution of the problem ///
$$ /// \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ + δ_{≥ 0}(x). @@ -405,16 +393,17 @@ /// This function returns the number of iterations taken. #[replace_float_literals(F::cast_from(literal))] pub fn l1squared_nonneg( - y : &DVector, - g : &DVector, - λ : F, - β : F, - x : &mut DVector, - inner : &InnerSettings, - iterator : I + y: &DVector, + g: &DVector, + λ: F, + β: F, + x: &mut DVector, + inner: &InnerSettings, + iterator: I, ) -> usize -where F : Float + ToNalgebraRealField, - I : AlgIteratorFactory +where + F: Float + ToNalgebraRealField, + I: AlgIteratorFactory, { match inner.method { InnerMethod::PDPS => { @@ -423,15 +412,12 @@ let normest = inner_θ; let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest); l1squared_nonneg_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator) - }, - InnerMethod::FB => { - // The Lipschitz factor of ∇[x ↦ g^⊤ x + λ∑x]=g - λ𝟙 is FB is just a proximal point - // method with on constraints on τ. We “accelerate” it by adding to τ the constant θ - // on each iteration. Exponential growth does not seem stable. - let inner_τ = inner.fb_τ0; - let inner_θ = inner_τ; + } + InnerMethod::PP | InnerMethod::FB => { + let inner_τ = inner.pp_τ.0; + let inner_θ = inner.pp_τ.1; l1squared_nonneg_pp(y, g, λ, β, x, inner_τ, inner_θ, iterator) - }, + } other => unimplemented!("${other:?} is unimplemented"), } }