src/subproblem/l1squared_unconstrained.rs

/*!
Iterative algorithms for solving the finite-dimensional subproblem without constraints.
*/

use itertools::izip;
use nalgebra::DVector;
use numeric_literals::replace_float_literals;
use std::cmp::Ordering::*;

use alg_tools::iterate::{AlgIteratorFactory, AlgIteratorState};
use alg_tools::nalgebra_support::ToNalgebraRealField;
use alg_tools::nanleast::NaNLeast;
use alg_tools::norms::{Dist, L1};
use std::iter::zip;

use super::l1squared_nonneg::max_interval_dist_to_zero;
use super::unconstrained::soft_thresholding;
use super::{InnerMethod, InnerSettings};
use crate::types::*;

/// Calculate $\prox_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}_1^2$.
///
/// To derive an algorithm for this, we can assume that $y=0$, as
/// $\prox\_f(x) = \prox\_{f_0}(x - y) - y$ for $f\_0=\frac{β}{2}\norm{x}\_1^2$.
/// Now, the optimality conditions for $w = \prox\_f(x)$ are
/// $$
///     0 ∈ w-x + β\norm{w}\_1\sign w.
/// $$
/// Clearly then $w = \soft\_{β\norm{w}\_1}(x)$.
/// Thus the components of $x$ with smallest absolute value will be zeroed out.
/// Denoting by $w'$ the non-zero components, and by $x'$ the corresponding components
/// of $x$, and by $m$ their count,  multipying the corresponding lines of (*) by $\sign x'$,
/// we obtain
/// $$
///     \norm{x'}\_1 = (1+βm)\norm{w'}\_1.
/// $$
/// That is, $\norm{w}\_1=\norm{w'}\_1=\norm{x'}\_1/(1+βm)$.
/// Thus, sorting $x$ by absolute value, and sequentially in order eliminating the smallest
/// elements, we can easily calculate what $\norm{w}\_1$ should be for that choice, and
/// then easily calculate $w = \soft_{β\norm{w}\_1}(x)$. We just have to verify that
/// the resulting $w$ has the same norm. There's a shortcut to this, as we work
/// sequentially: just check that the smallest assumed-nonzero component $i$ satisfies the
/// condition of soft-thresholding to remain non-zero: $|x\_i|>τ\norm{x'}/(1+τm)$.
/// Clearly, if this condition fails for $x\_i$, it will fail for all the components
/// already exluced. While, if it holds, it will hold for all components not excluded.
#[replace_float_literals(F::cast_from(literal))]
pub(super) fn l1squared_prox<F: Float + nalgebra::RealField>(
    sorted_abs: &mut DVector<F>,
    x: &mut DVector<F>,
    y: &DVector<F>,
    β: F,
) {
    sorted_abs.copy_from(x);
    sorted_abs.axpy(-1.0, y, 1.0);
    sorted_abs.apply(|z_i| *z_i = num_traits::abs(*z_i));
    sorted_abs
        .as_mut_slice()
        .sort_unstable_by(|a, b| NaNLeast(*a).cmp(&NaNLeast(*b)));

    let mut n = sorted_abs.sum();
    for (m, az_i) in zip((1..=x.len() as u32).rev(), sorted_abs) {
        // test first
        let tmp = β * n / (1.0 + β * F::cast_from(m));
        if *az_i <= tmp {
            // Fail
            n -= *az_i;
        } else {
            // Success
            x.zip_apply(y, |x_i, y_i| {
                *x_i = y_i + soft_thresholding(*x_i - y_i, tmp)
            });
            return;
        }
    }
    // m = 0 should always work, but x is zero.
    x.fill(0.0);
}

/// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁$ at $x$.
///
/// `v` will be modified and cannot be trusted to contain useful values afterwards.
#[replace_float_literals(F::cast_from(literal))]
fn min_subdifferential<F: Float + nalgebra::RealField>(
    y: &DVector<F>,
    x: &DVector<F>,
    g: &DVector<F>,
    λ: F,
    β: F,
) -> F {
    let mut val = 0.0;
    let tmp = β * y.dist(x, L1);
    for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) {
        let (mut lb, mut ub) = (-g_i, -g_i);
        match x_i.partial_cmp(y_i) {
            Some(Greater) => {
                lb += tmp;
                ub += tmp
            }
            Some(Less) => {
                lb -= tmp;
                ub -= tmp
            }
            Some(Equal) => {
                lb -= tmp;
                ub += tmp
            }
            None => {}
        }
        match x_i.partial_cmp(&0.0) {
            Some(Greater) => {
                lb += λ;
                ub += λ
            }
            Some(Less) => {
                lb -= λ;
                ub -= λ
            }
            Some(Equal) => {
                lb -= λ;
                ub += λ
            }
            None => {}
        };
        val = max_interval_dist_to_zero(val, lb, ub);
    }
    val
}

/// PDPS implementation of [`l1squared_unconstrained`].
/// For detailed documentation of the inputs and outputs, refer to there.
///
/// The `λ` component of the model is handled in the proximal step instead of the gradient step
/// for potential performance improvements.
#[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
pub fn l1squared_unconstrained_pdps<F, I>(
    y: &DVector<F::MixedType>,
    g: &DVector<F::MixedType>,
    λ_: F,
    β_: F,
    x: &mut DVector<F::MixedType>,
    τ_: F,
    σ_: F,
    iterator: I,
) -> usize
where
    F: Float + ToNalgebraRealField,
    I: AlgIteratorFactory<F>,
{
    let λ = λ_.to_nalgebra_mixed();
    let β = β_.to_nalgebra_mixed();
    let τ = τ_.to_nalgebra_mixed();
    let σ = σ_.to_nalgebra_mixed();
    let mut w = DVector::zeros(x.len());
    let mut tmp = DVector::zeros(x.len());
    let mut xprev = x.clone();
    let mut iters = 0;

    iterator.iterate(|state| {
        // Primal step: x^{k+1} = prox_{τ|.-y|_1^2}(x^k - τ (w^k - g))
        x.axpy(-τ, &w, 1.0);
        x.axpy(τ, g, 1.0);
        l1squared_prox(&mut tmp, x, y, τ * β);

        // Dual step: w^{k+1} = proj_{[-λ,λ]}(w^k + σ(2x^{k+1}-x^k))
        w.axpy(2.0 * σ, x, 1.0);
        w.axpy(-σ, &xprev, 1.0);
        w.apply(|w_i| *w_i = num_traits::clamp(*w_i, -λ, λ));
        xprev.copy_from(x);

        iters += 1;

        state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)))
    });

    iters
}

/// Alternative PDPS implementation of [`l1squared_unconstrained`].
/// For detailed documentation of the inputs and outputs, refer to there.
///
/// By not dualising the 1-norm, this should produce more sparse solutions than
/// [`l1squared_unconstrained_pdps`].
///
/// The `λ` component of the model is handled in the proximal step instead of the gradient step
/// for potential performance improvements.
/// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model.
/// We rewrite
/// <div>$$
///     \begin{split}
///     & \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ \\
///     & = \min_{x ∈ ℝ^n} \max_{w} ⟨θ w, x⟩ - g^⊤ x + λ\|x\|₁
///      - \left(x ↦ \frac{β}{2θ} |x-y|_1^2 \right)^*(w).
///     \end{split}
/// $$</div>
#[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
pub fn l1squared_unconstrained_pdps_alt<F, I>(
    y: &DVector<F::MixedType>,
    g: &DVector<F::MixedType>,
    λ_: F,
    β_: F,
    x: &mut DVector<F::MixedType>,
    τ_: F,
    σ_: F,
    θ_: F,
    iterator: I,
) -> usize
where
    F: Float + ToNalgebraRealField,
    I: AlgIteratorFactory<F>,
{
    let λ = λ_.to_nalgebra_mixed();
    let τ = τ_.to_nalgebra_mixed();
    let σ = σ_.to_nalgebra_mixed();
    let θ = θ_.to_nalgebra_mixed();
    let β = β_.to_nalgebra_mixed();
    let σθ = σ * θ;
    let τθ = τ * θ;
    let mut w = DVector::zeros(x.len());
    let mut tmp = DVector::zeros(x.len());
    let mut xprev = x.clone();
    let mut iters = 0;

    iterator.iterate(|state| {
        // Primal step: x^{k+1} = soft_τλ(x^k - τ(θ w^k -g))
        x.axpy(-τθ, &w, 1.0);
        x.axpy(τ, g, 1.0);
        x.apply(|x_i| *x_i = soft_thresholding(*x_i, τ * λ));

        // Dual step: with g(x) = (β/(2θ))‖x-y‖₁² and q = w^k + σ(2x^{k+1}-x^k),
        // we compute w^{k+1} = prox_{σg^*}(q) for
        //                    = q - σ prox_{g/σ}(q/σ)
        //                    = q - σ prox_{(β/(2θσ))‖.-y‖₁²}(q/σ)
        //                    = σ(q/σ - prox_{(β/(2θσ))‖.-y‖₁²}(q/σ))
        // where q/σ = w^k/σ + (2x^{k+1}-x^k),
        w /= σ;
        w.axpy(2.0, x, 1.0);
        w.axpy(-1.0, &xprev, 1.0);
        xprev.copy_from(&w); // use xprev as temporary variable
        l1squared_prox(&mut tmp, &mut xprev, y, β / σθ);
        w -= &xprev;
        w *= σ;
        xprev.copy_from(x);

        iters += 1;

        state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)))
    });

    iters
}

/// This function applies an iterative method for the solution of the problem
/// <div>$$
///     \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁.
/// $$</div>
/// Only PDPS is supported.
///
/// This function returns the number of iterations taken.
#[replace_float_literals(F::cast_from(literal))]
pub fn l1squared_unconstrained<F, I>(
    y: &DVector<F::MixedType>,
    g: &DVector<F::MixedType>,
    λ: F,
    β: F,
    x: &mut DVector<F::MixedType>,
    inner: &InnerSettings<F>,
    iterator: I,
) -> usize
where
    F: Float + ToNalgebraRealField,
    I: AlgIteratorFactory<F>,
{
    // Estimate of ‖K‖ for K=θ Id.
    let inner_θ = 1.0;
    let normest = inner_θ;

    let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest);

    match inner.method {
        InnerMethod::PDPS => {
            l1squared_unconstrained_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator)
        }
        other => unimplemented!("${other:?} is unimplemented"),
    }
}