Mon, 13 Apr 2026 22:29:26 -0500
Automatic transport disabling after sufficient failures, for efficiency
/*! Iterative algorithms for solving the finite-dimensional subproblem with constraint. */ use itertools::izip; use nalgebra::DVector; use numeric_literals::replace_float_literals; //use std::iter::zip; use std::cmp::Ordering::*; use alg_tools::iterate::{AlgIteratorFactory, AlgIteratorState}; use alg_tools::nalgebra_support::ToNalgebraRealField; use alg_tools::norms::{Dist, L1}; use super::l1squared_unconstrained::l1squared_prox; use super::nonneg::nonneg_soft_thresholding; use super::{InnerMethod, InnerSettings}; use crate::types::*; /// Return maximum of `dist` and distnce of inteval `[lb, ub]` to zero. #[replace_float_literals(F::cast_from(literal))] pub(super) fn max_interval_dist_to_zero<F: Float>(dist: F, lb: F, ub: F) -> F { if lb < 0.0 { if ub > 0.0 { dist } else { dist.max(-ub) } } else /* ub ≥ 0.0*/ { dist.max(lb) } } /// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁ +δ_{≥0}(x)$ at $x$. /// /// `v` will be modified and cannot be trusted to contain useful values afterwards. #[replace_float_literals(F::cast_from(literal))] fn min_subdifferential<F: Float + nalgebra::RealField>( y: &DVector<F>, x: &DVector<F>, g: &DVector<F>, λ: F, β: F, ) -> F { let mut val = 0.0; let tmp = β * y.dist(x, L1); for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) { let (mut lb, mut ub) = (-g_i, -g_i); match x_i.partial_cmp(y_i) { Some(Greater) => { lb += tmp; ub += tmp } Some(Less) => { lb -= tmp; ub -= tmp } Some(Equal) => { lb -= tmp; ub += tmp } None => {} } match x_i.partial_cmp(&0.0) { Some(Greater) => { lb += λ; ub += λ } // Less should not happen Some(Less | Equal) => { lb = F::NEG_INFINITY; ub += λ } None => {} }; val = max_interval_dist_to_zero(val, lb, ub); } val } // #[replace_float_literals(F::cast_from(literal))] // fn lbd_soft_thresholding<F: Float>(v: F, λ: F, b: F) -> F { // match (b >= 0.0, v >= b) { // (true, false) => b, // (true, true) => b.max(v - λ), // soft-to-b from above // (false, true) => super::unconstrained::soft_thresholding(v, λ), // (false, false) => 0.0.min(b.max(v + λ)), // soft-to-0 with lower bound // } // } /// Calculates $\prox\_f(x)$ for $f(x)=λ\abs{x-y} + δ\_{≥0}(x)$. /// This is the first output. The second output is the first output $-y$, i..e, the prox /// of $f\_0$ for the shift function $f\_0(z) = f(z+y) = λ\abs{z} + δ\_{≥-y}(z)$ /// satisfying /// $$ /// \prox_f(x) = \prox\_{f\_0}(x - y) + y, /// $$ /// which is also used internally. /// The third output indicates whether the output is “locked” to either $0$ (resp. $-y$ in /// the reformulation), or $y$ ($0$ in the reformulation). /// The fourth output indicates the next highest $λ$ where such locking would happen. #[replace_float_literals(F::cast_from(literal))] #[inline] fn shifted_nonneg_soft_thresholding<F: Float>(x: F, y: F, λ: F) -> (F, F, bool, Option<F>) { let z = x - y; // The shift to f_0 if -y >= 0.0 { if x > λ { (x - λ, z - λ, false, Some(x)) } else { (0.0, -y, true, None) } } else if z < 0.0 { if λ < -x { (0.0, -y, true, Some(-x)) } else if z + λ < 0.0 { (x + λ, z + λ, false, Some(-z)) } else { (y, 0.0, true, None) } } else if z > λ { (x - λ, z - λ, false, Some(z)) } else { (y, 0.0, true, None) } } /// Calculate $\prox\_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}\_1\^2 + δ\_{≥0}(x)$. /// /// To derive an algorithm for this, we can use /// $$ /// \prox_f(x) = \prox\_{f\_0}(x - y) + y /// \quad\text{for}\quad /// f\_0(z)=\frac{β}{2}\norm{z}\_1\^2 + δ\_{≥-y}(z). /// $$ /// Now, the optimality conditions for $w = \prox\_{f\_0}(x)$ are /// $$\tag{*} /// x ∈ w + β\norm{w}\_1\sign w + N\_{≥ -y}(w). /// $$ /// If we know $\norm{w}\_1$, then this is easily solved by lower-bounded soft-thresholding. /// We find this by sorting the elements by the distance to the 'locked' lower-bounded /// soft-thresholding target ($0$ or $-y_i$). /// Then we loop over this sorted vector, increasing our estimate of $\norm{w}\_1$ as we decide /// that the soft-thresholding parameter $β\norm{w}\_1$ has to be such that the passed elements /// will reach their locked value (after which they cannot change anymore, for a larger /// soft-thresholding parameter. This has to be slightly more fine-grained for account /// for the case that $-y\_i<0$ and $x\_i < -y\_i$. /// /// Indeed, denoting by $x'$ and $w'$ the subset of elements such that $w\_i ≠ 0$ and /// $w\_i > -y\_i$, we can calculate by applying $⟨\cdot, \sign w'⟩$ to the corresponding /// lines of (*) that /// $$ /// \norm{x'} = \norm{w'} + β \norm{w}\_1 m, /// $$ /// where $m$ is the number of unlocked components. /// We can now calculate the mass $t = \norm{w}-\norm{w'}$ of locked components, and so obtain /// $$ /// \norm{x'} + t = (1+β m)\norm{w}\_1, /// $$ /// from where we can calculate the soft-thresholding parameter $λ=β\norm{w}\_1$. /// Since we do not actually know the unlocked elements, but just loop over all the possibilities /// for them, we have to check that $λ$ is above the current lower bound for this parameter /// (`shift` in the code), and below a value that would cause changes in the locked set /// (`max_shift` in the code). #[replace_float_literals(F::cast_from(literal))] pub fn l1squared_nonneg_prox<F: Float + nalgebra::RealField>( x: &mut DVector<F>, y: &DVector<F>, β: F, ) { // nalgebra double-definition bullshit workaround let abs = alg_tools::NumTraitsFloat::abs; let min = alg_tools::NumTraitsFloat::min; let mut λ = 0.0; loop { let mut w_locked = 0.0; let mut n_unlocked = 0; // m let mut x_prime = 0.0; let mut max_shift = F::INFINITY; for (&x_i, &y_i) in izip!(x.iter(), y.iter()) { let (_, a_shifted, locked, next_lock) = shifted_nonneg_soft_thresholding(x_i, y_i, λ); if let Some(t) = next_lock { max_shift = min(max_shift, t); assert!(max_shift > λ); } if locked { w_locked += abs(a_shifted); } else { n_unlocked += 1; x_prime += abs(x_i - y_i); } } // We need ‖x'‖ = ‖w'‖ + β m ‖w‖, i.e. ‖x'‖ + (‖w‖-‖w'‖)= (1 + β m)‖w‖. let λ_new = (x_prime + w_locked) / (1.0 / β + F::cast_from(n_unlocked)); if λ_new > max_shift { λ = max_shift; } else { assert!(λ_new >= λ); // success x.zip_apply(y, |x_i, y_i| { let (a, _, _, _) = shifted_nonneg_soft_thresholding(*x_i, y_i, λ_new); //*x_i = y_i + lbd_soft_thresholding(*x_i, λ_new, -y_i) *x_i = a; }); return; } } } /// Proximal point method implementation of [`l1squared_nonneg`]. /// For detailed documentation of the inputs and outputs, refer to there. /// /// The `λ` component of the model is handled in the proximal step instead of the gradient step /// for potential performance improvements. #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] pub fn l1squared_nonneg_pp<F, I>( y: &DVector<F::MixedType>, g: &DVector<F::MixedType>, λ_: F, β_: F, x: &mut DVector<F::MixedType>, τ_: F, θ_: F, iterator: I, ) -> usize where F: Float + ToNalgebraRealField, I: AlgIteratorFactory<F>, { let λ = λ_.to_nalgebra_mixed(); let β = β_.to_nalgebra_mixed(); let mut τ = τ_.to_nalgebra_mixed(); let θ = θ_.to_nalgebra_mixed(); let mut iters = 0; iterator.iterate(|state| { // Primal step: x^{k+1} = prox_{(τβ/2)|.-y|_1^2+δ_{≥0}+}(x^k - τ(λ𝟙^⊤-g)) x.apply(|x_i| *x_i -= τ * λ); x.axpy(τ, g, 1.0); l1squared_nonneg_prox(x, y, τ * β); iters += 1; // This gives O(1/N^2) rates due to monotonicity of function values. // Higher acceleration does not seem to be numerically stable. τ += θ; // This gives O(1/N^3) rates due to monotonicity of function values. // Higher acceleration does not seem to be numerically stable. //τ + = F::cast_from(iters).to_nalgebra_mixed()*θ; state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))) }); iters } /// PDPS implementation of [`l1squared_nonneg`]. /// For detailed documentation of the inputs and outputs, refer to there. /// /// The `λ` component of the model is handled in the proximal step instead of the gradient step /// for potential performance improvements. /// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model. #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] pub fn l1squared_nonneg_pdps<F, I>( y: &DVector<F::MixedType>, g: &DVector<F::MixedType>, λ_: F, β_: F, x: &mut DVector<F::MixedType>, τ_: F, σ_: F, θ_: F, iterator: I, ) -> usize where F: Float + ToNalgebraRealField, I: AlgIteratorFactory<F>, { let λ = λ_.to_nalgebra_mixed(); let β = β_.to_nalgebra_mixed(); let τ = τ_.to_nalgebra_mixed(); let σ = σ_.to_nalgebra_mixed(); let θ = θ_.to_nalgebra_mixed(); let mut w = DVector::zeros(x.len()); let mut tmp = DVector::zeros(x.len()); let mut xprev = x.clone(); let mut iters = 0; iterator.iterate(|state| { // Primal step: x^{k+1} = prox_{(τβ/2)|.-y|_1^2}(x^k - τ (w^k - g)) x.axpy(-τ * θ, &w, 1.0); x.axpy(τ, g, 1.0); l1squared_prox(&mut tmp, x, y, τ * β); // Dual step: w^{k+1} = proj_{[-∞,λ]}(w^k + σ(2x^{k+1}-x^k)) w.axpy(2.0 * σ * θ, x, 1.0); w.axpy(-σ * θ, &xprev, 1.0); w.apply(|w_i| *w_i = w_i.min(λ)); xprev.copy_from(x); iters += 1; state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))) }); iters } /// Alternative PDPS implementation of [`l1squared_nonneg`]. /// For detailed documentation of the inputs and outputs, refer to there. /// /// By not dualising the 1-norm, this should produce more sparse solutions than /// [`l1squared_nonneg_pdps`]. /// /// The `λ` component of the model is handled in the proximal step instead of the gradient step /// for potential performance improvements. /// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model. /// We rewrite /// <div>$$ /// \begin{split} /// & \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ + δ_{≥ 0}(x) \\ /// & = \min_{x ∈ ℝ^n} \max_{w} ⟨θ w, x⟩ - g^⊤ x + λ\|x\|₁ + δ_{≥ 0}(x) /// - \left(x ↦ \frac{β}{2θ} |x-y|_1^2 \right)^*(w). /// \end{split} /// $$</div> #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] pub fn l1squared_nonneg_pdps_alt<F, I>( y: &DVector<F::MixedType>, g: &DVector<F::MixedType>, λ_: F, β_: F, x: &mut DVector<F::MixedType>, τ_: F, σ_: F, θ_: F, iterator: I, ) -> usize where F: Float + ToNalgebraRealField, I: AlgIteratorFactory<F>, { let λ = λ_.to_nalgebra_mixed(); let τ = τ_.to_nalgebra_mixed(); let σ = σ_.to_nalgebra_mixed(); let θ = θ_.to_nalgebra_mixed(); let β = β_.to_nalgebra_mixed(); let σθ = σ * θ; let τθ = τ * θ; let mut w = DVector::zeros(x.len()); let mut tmp = DVector::zeros(x.len()); let mut xprev = x.clone(); let mut iters = 0; iterator.iterate(|state| { // Primal step: x^{k+1} = nonnegsoft_τλ(x^k - τ(θ w^k -g)) x.axpy(-τθ, &w, 1.0); x.axpy(τ, g, 1.0); x.apply(|x_i| *x_i = nonneg_soft_thresholding(*x_i, τ * λ)); // Dual step: with g(x) = (β/(2θ))‖x-y‖₁² and q = w^k + σ(2x^{k+1}-x^k), // we compute w^{k+1} = prox_{σg^*}(q) for // = q - σ prox_{g/σ}(q/σ) // = q - σ prox_{(β/(2θσ))‖.-y‖₁²}(q/σ) // = σ(q/σ - prox_{(β/(2θσ))‖.-y‖₁²}(q/σ)) // where q/σ = w^k/σ + (2x^{k+1}-x^k), w /= σ; w.axpy(2.0, x, 1.0); w.axpy(-1.0, &xprev, 1.0); xprev.copy_from(&w); // use xprev as temporary variable l1squared_prox(&mut tmp, &mut xprev, y, β / σθ); w -= &xprev; w *= σ; xprev.copy_from(x); iters += 1; state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))) }); iters } /// This function applies an iterative method for the solution of the problem /// <div>$$ /// \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ + δ_{≥ 0}(x). /// $$</div> /// /// This function returns the number of iterations taken. #[replace_float_literals(F::cast_from(literal))] pub fn l1squared_nonneg<F, I>( y: &DVector<F::MixedType>, g: &DVector<F::MixedType>, λ: F, β: F, x: &mut DVector<F::MixedType>, inner: &InnerSettings<F>, iterator: I, ) -> usize where F: Float + ToNalgebraRealField, I: AlgIteratorFactory<F>, { match inner.method { InnerMethod::PDPS => { let inner_θ = 1.0; // Estimate of ‖K‖ for K=θ\Id. let normest = inner_θ; let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest); l1squared_nonneg_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator) } InnerMethod::PP | InnerMethod::FB => { let inner_τ = inner.pp_τ.0; let inner_θ = inner.pp_τ.1; l1squared_nonneg_pp(y, g, λ, β, x, inner_τ, inner_θ, iterator) } other => unimplemented!("${other:?} is unimplemented"), } }