src/subproblem/l1squared_nonneg.rs

Mon, 13 Apr 2026 22:29:26 -0500

author
Tuomo Valkonen <tuomov@iki.fi>
date
Mon, 13 Apr 2026 22:29:26 -0500
branch
dev
changeset 68
00d0881f89a6
parent 63
7a8a55fd41c0
child 64
d3be4f7ffd60
permissions
-rw-r--r--

Automatic transport disabling after sufficient failures, for efficiency

/*!
Iterative algorithms for solving the finite-dimensional subproblem with constraint.
*/

use itertools::izip;
use nalgebra::DVector;
use numeric_literals::replace_float_literals;
//use std::iter::zip;
use std::cmp::Ordering::*;

use alg_tools::iterate::{AlgIteratorFactory, AlgIteratorState};
use alg_tools::nalgebra_support::ToNalgebraRealField;
use alg_tools::norms::{Dist, L1};

use super::l1squared_unconstrained::l1squared_prox;
use super::nonneg::nonneg_soft_thresholding;
use super::{InnerMethod, InnerSettings};
use crate::types::*;

/// Return maximum of `dist` and distnce of inteval `[lb, ub]` to zero.
#[replace_float_literals(F::cast_from(literal))]
pub(super) fn max_interval_dist_to_zero<F: Float>(dist: F, lb: F, ub: F) -> F {
    if lb < 0.0 {
        if ub > 0.0 {
            dist
        } else {
            dist.max(-ub)
        }
    } else
    /* ub ≥ 0.0*/
    {
        dist.max(lb)
    }
}

/// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁ +δ_{≥0}(x)$ at $x$.
///
/// `v` will be modified and cannot be trusted to contain useful values afterwards.
#[replace_float_literals(F::cast_from(literal))]
fn min_subdifferential<F: Float + nalgebra::RealField>(
    y: &DVector<F>,
    x: &DVector<F>,
    g: &DVector<F>,
    λ: F,
    β: F,
) -> F {
    let mut val = 0.0;
    let tmp = β * y.dist(x, L1);
    for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) {
        let (mut lb, mut ub) = (-g_i, -g_i);
        match x_i.partial_cmp(y_i) {
            Some(Greater) => {
                lb += tmp;
                ub += tmp
            }
            Some(Less) => {
                lb -= tmp;
                ub -= tmp
            }
            Some(Equal) => {
                lb -= tmp;
                ub += tmp
            }
            None => {}
        }
        match x_i.partial_cmp(&0.0) {
            Some(Greater) => {
                lb += λ;
                ub += λ
            }
            // Less should not happen
            Some(Less | Equal) => {
                lb = F::NEG_INFINITY;
                ub += λ
            }
            None => {}
        };
        val = max_interval_dist_to_zero(val, lb, ub);
    }
    val
}

// #[replace_float_literals(F::cast_from(literal))]
// fn lbd_soft_thresholding<F: Float>(v: F, λ: F, b: F) -> F {
//     match (b >= 0.0, v >= b) {
//         (true, false) => b,
//         (true, true) => b.max(v - λ), // soft-to-b from above
//         (false, true) => super::unconstrained::soft_thresholding(v, λ),
//         (false, false) => 0.0.min(b.max(v + λ)), // soft-to-0 with lower bound
//     }
// }

/// Calculates $\prox\_f(x)$ for $f(x)=λ\abs{x-y} + δ\_{≥0}(x)$.
/// This is the first output. The second output is the first output $-y$, i..e, the prox
/// of $f\_0$ for the shift function $f\_0(z) = f(z+y) = λ\abs{z} + δ\_{≥-y}(z)$
/// satisfying
/// $$
///     \prox_f(x) = \prox\_{f\_0}(x - y) + y,
/// $$
/// which is also used internally.
/// The third output indicates whether the output is “locked” to either $0$ (resp. $-y$ in
/// the reformulation), or $y$ ($0$ in the reformulation).
/// The fourth output indicates the next highest $λ$ where such locking would happen.
#[replace_float_literals(F::cast_from(literal))]
#[inline]
fn shifted_nonneg_soft_thresholding<F: Float>(x: F, y: F, λ: F) -> (F, F, bool, Option<F>) {
    let z = x - y; // The shift to f_0
    if -y >= 0.0 {
        if x > λ {
            (x - λ, z - λ, false, Some(x))
        } else {
            (0.0, -y, true, None)
        }
    } else if z < 0.0 {
        if λ < -x {
            (0.0, -y, true, Some(-x))
        } else if z + λ < 0.0 {
            (x + λ, z + λ, false, Some(-z))
        } else {
            (y, 0.0, true, None)
        }
    } else if z > λ {
        (x - λ, z - λ, false, Some(z))
    } else {
        (y, 0.0, true, None)
    }
}

/// Calculate $\prox\_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}\_1\^2 + δ\_{≥0}(x)$.
///
/// To derive an algorithm for this, we can use
/// $$
///     \prox_f(x) = \prox\_{f\_0}(x - y) + y
///     \quad\text{for}\quad
///     f\_0(z)=\frac{β}{2}\norm{z}\_1\^2 + δ\_{≥-y}(z).
/// $$
/// Now, the optimality conditions for $w = \prox\_{f\_0}(x)$ are
/// $$\tag{*}
///     x ∈ w + β\norm{w}\_1\sign w + N\_{≥ -y}(w).
/// $$
/// If we know $\norm{w}\_1$, then this is easily solved by lower-bounded soft-thresholding.
/// We find this by sorting the elements by the distance to the 'locked' lower-bounded
/// soft-thresholding target ($0$ or $-y_i$).
/// Then we loop over this sorted vector, increasing our estimate of $\norm{w}\_1$ as we decide
/// that the soft-thresholding parameter $β\norm{w}\_1$ has to be such that the passed elements
/// will reach their locked value (after which they cannot change anymore, for a larger
/// soft-thresholding parameter. This has to be slightly more fine-grained for account
/// for the case that $-y\_i<0$ and $x\_i < -y\_i$.
///
/// Indeed, denoting by $x'$ and $w'$ the subset of elements such that $w\_i ≠ 0$ and
/// $w\_i > -y\_i$, we can calculate by applying $⟨\cdot, \sign w'⟩$ to the corresponding
/// lines of (*) that
/// $$
///     \norm{x'} = \norm{w'} + β \norm{w}\_1 m,
/// $$
/// where $m$ is the number of unlocked components.
/// We can now calculate the mass $t = \norm{w}-\norm{w'}$ of locked components, and so obtain
/// $$
///     \norm{x'} + t = (1+β m)\norm{w}\_1,
/// $$
/// from where we can calculate the soft-thresholding parameter $λ=β\norm{w}\_1$.
/// Since we do not actually know the unlocked elements, but just loop over all the possibilities
/// for them, we have to check that $λ$ is above the current lower bound for this parameter
/// (`shift` in the code), and below a value that would cause changes in the locked set
/// (`max_shift` in the code).
#[replace_float_literals(F::cast_from(literal))]
pub fn l1squared_nonneg_prox<F: Float + nalgebra::RealField>(
    x: &mut DVector<F>,
    y: &DVector<F>,
    β: F,
) {
    // nalgebra double-definition bullshit workaround
    let abs = alg_tools::NumTraitsFloat::abs;
    let min = alg_tools::NumTraitsFloat::min;

    let mut λ = 0.0;
    loop {
        let mut w_locked = 0.0;
        let mut n_unlocked = 0; // m
        let mut x_prime = 0.0;
        let mut max_shift = F::INFINITY;
        for (&x_i, &y_i) in izip!(x.iter(), y.iter()) {
            let (_, a_shifted, locked, next_lock) = shifted_nonneg_soft_thresholding(x_i, y_i, λ);

            if let Some(t) = next_lock {
                max_shift = min(max_shift, t);
                assert!(max_shift > λ);
            }
            if locked {
                w_locked += abs(a_shifted);
            } else {
                n_unlocked += 1;
                x_prime += abs(x_i - y_i);
            }
        }

        // We need ‖x'‖ = ‖w'‖ + β m ‖w‖, i.e. ‖x'‖ + (‖w‖-‖w'‖)= (1 + β m)‖w‖.
        let λ_new = (x_prime + w_locked) / (1.0 / β + F::cast_from(n_unlocked));

        if λ_new > max_shift {
            λ = max_shift;
        } else {
            assert!(λ_new >= λ);
            // success
            x.zip_apply(y, |x_i, y_i| {
                let (a, _, _, _) = shifted_nonneg_soft_thresholding(*x_i, y_i, λ_new);
                //*x_i = y_i + lbd_soft_thresholding(*x_i, λ_new, -y_i)
                *x_i = a;
            });
            return;
        }
    }
}

/// Proximal point method implementation of [`l1squared_nonneg`].
/// For detailed documentation of the inputs and outputs, refer to there.
///
/// The `λ` component of the model is handled in the proximal step instead of the gradient step
/// for potential performance improvements.
#[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
pub fn l1squared_nonneg_pp<F, I>(
    y: &DVector<F::MixedType>,
    g: &DVector<F::MixedType>,
    λ_: F,
    β_: F,
    x: &mut DVector<F::MixedType>,
    τ_: F,
    θ_: F,
    iterator: I,
) -> usize
where
    F: Float + ToNalgebraRealField,
    I: AlgIteratorFactory<F>,
{
    let λ = λ_.to_nalgebra_mixed();
    let β = β_.to_nalgebra_mixed();
    let mut τ = τ_.to_nalgebra_mixed();
    let θ = θ_.to_nalgebra_mixed();
    let mut iters = 0;

    iterator.iterate(|state| {
        // Primal step: x^{k+1} = prox_{(τβ/2)|.-y|_1^2+δ_{≥0}+}(x^k - τ(λ𝟙^⊤-g))
        x.apply(|x_i| *x_i -= τ * λ);
        x.axpy(τ, g, 1.0);
        l1squared_nonneg_prox(x, y, τ * β);

        iters += 1;
        // This gives O(1/N^2) rates due to monotonicity of function values.
        // Higher acceleration does not seem to be numerically stable.
        τ += θ;

        // This gives O(1/N^3) rates due to monotonicity of function values.
        // Higher acceleration does not seem to be numerically stable.
        //τ + = F::cast_from(iters).to_nalgebra_mixed()*θ;

        state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)))
    });

    iters
}

/// PDPS implementation of [`l1squared_nonneg`].
/// For detailed documentation of the inputs and outputs, refer to there.
///
/// The `λ` component of the model is handled in the proximal step instead of the gradient step
/// for potential performance improvements.
/// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model.
#[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
pub fn l1squared_nonneg_pdps<F, I>(
    y: &DVector<F::MixedType>,
    g: &DVector<F::MixedType>,
    λ_: F,
    β_: F,
    x: &mut DVector<F::MixedType>,
    τ_: F,
    σ_: F,
    θ_: F,
    iterator: I,
) -> usize
where
    F: Float + ToNalgebraRealField,
    I: AlgIteratorFactory<F>,
{
    let λ = λ_.to_nalgebra_mixed();
    let β = β_.to_nalgebra_mixed();
    let τ = τ_.to_nalgebra_mixed();
    let σ = σ_.to_nalgebra_mixed();
    let θ = θ_.to_nalgebra_mixed();
    let mut w = DVector::zeros(x.len());
    let mut tmp = DVector::zeros(x.len());
    let mut xprev = x.clone();
    let mut iters = 0;

    iterator.iterate(|state| {
        // Primal step: x^{k+1} = prox_{(τβ/2)|.-y|_1^2}(x^k - τ (w^k - g))
        x.axpy(-τ * θ, &w, 1.0);
        x.axpy(τ, g, 1.0);
        l1squared_prox(&mut tmp, x, y, τ * β);

        // Dual step: w^{k+1} = proj_{[-∞,λ]}(w^k + σ(2x^{k+1}-x^k))
        w.axpy(2.0 * σ * θ, x, 1.0);
        w.axpy(-σ * θ, &xprev, 1.0);
        w.apply(|w_i| *w_i = w_i.min(λ));
        xprev.copy_from(x);

        iters += 1;

        state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)))
    });

    iters
}

/// Alternative PDPS implementation of [`l1squared_nonneg`].
/// For detailed documentation of the inputs and outputs, refer to there.
///
/// By not dualising the 1-norm, this should produce more sparse solutions than
/// [`l1squared_nonneg_pdps`].
///
/// The `λ` component of the model is handled in the proximal step instead of the gradient step
/// for potential performance improvements.
/// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model.
/// We rewrite
/// <div>$$
///     \begin{split}
///     & \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ + δ_{≥ 0}(x) \\
///     & = \min_{x ∈ ℝ^n} \max_{w} ⟨θ w, x⟩ - g^⊤ x + λ\|x\|₁ + δ_{≥ 0}(x)
///      - \left(x ↦ \frac{β}{2θ} |x-y|_1^2 \right)^*(w).
///     \end{split}
/// $$</div>
#[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
pub fn l1squared_nonneg_pdps_alt<F, I>(
    y: &DVector<F::MixedType>,
    g: &DVector<F::MixedType>,
    λ_: F,
    β_: F,
    x: &mut DVector<F::MixedType>,
    τ_: F,
    σ_: F,
    θ_: F,
    iterator: I,
) -> usize
where
    F: Float + ToNalgebraRealField,
    I: AlgIteratorFactory<F>,
{
    let λ = λ_.to_nalgebra_mixed();
    let τ = τ_.to_nalgebra_mixed();
    let σ = σ_.to_nalgebra_mixed();
    let θ = θ_.to_nalgebra_mixed();
    let β = β_.to_nalgebra_mixed();
    let σθ = σ * θ;
    let τθ = τ * θ;
    let mut w = DVector::zeros(x.len());
    let mut tmp = DVector::zeros(x.len());
    let mut xprev = x.clone();
    let mut iters = 0;

    iterator.iterate(|state| {
        // Primal step: x^{k+1} = nonnegsoft_τλ(x^k - τ(θ w^k -g))
        x.axpy(-τθ, &w, 1.0);
        x.axpy(τ, g, 1.0);
        x.apply(|x_i| *x_i = nonneg_soft_thresholding(*x_i, τ * λ));

        // Dual step: with g(x) = (β/(2θ))‖x-y‖₁² and q = w^k + σ(2x^{k+1}-x^k),
        // we compute w^{k+1} = prox_{σg^*}(q) for
        //                    = q - σ prox_{g/σ}(q/σ)
        //                    = q - σ prox_{(β/(2θσ))‖.-y‖₁²}(q/σ)
        //                    = σ(q/σ - prox_{(β/(2θσ))‖.-y‖₁²}(q/σ))
        // where q/σ = w^k/σ + (2x^{k+1}-x^k),
        w /= σ;
        w.axpy(2.0, x, 1.0);
        w.axpy(-1.0, &xprev, 1.0);
        xprev.copy_from(&w); // use xprev as temporary variable
        l1squared_prox(&mut tmp, &mut xprev, y, β / σθ);
        w -= &xprev;
        w *= σ;
        xprev.copy_from(x);

        iters += 1;

        state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)))
    });

    iters
}

/// This function applies an iterative method for the solution of the problem
/// <div>$$
///     \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ + δ_{≥ 0}(x).
/// $$</div>
///
/// This function returns the number of iterations taken.
#[replace_float_literals(F::cast_from(literal))]
pub fn l1squared_nonneg<F, I>(
    y: &DVector<F::MixedType>,
    g: &DVector<F::MixedType>,
    λ: F,
    β: F,
    x: &mut DVector<F::MixedType>,
    inner: &InnerSettings<F>,
    iterator: I,
) -> usize
where
    F: Float + ToNalgebraRealField,
    I: AlgIteratorFactory<F>,
{
    match inner.method {
        InnerMethod::PDPS => {
            let inner_θ = 1.0;
            // Estimate of ‖K‖ for K=θ\Id.
            let normest = inner_θ;
            let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest);
            l1squared_nonneg_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator)
        }
        InnerMethod::PP | InnerMethod::FB => {
            let inner_τ = inner.pp_τ.0;
            let inner_θ = inner.pp_τ.1;
            l1squared_nonneg_pp(y, g, λ, β, x, inner_τ, inner_θ, iterator)
        }
        other => unimplemented!("${other:?} is unimplemented"),
    }
}

mercurial