src/subproblem.rs

//! Iterative algorithms for solving finite-dimensional subproblems.

use serde::{Serialize, Deserialize};
use nalgebra::{DVector, DMatrix};
use numeric_literals::replace_float_literals;
use itertools::{izip, Itertools};
use colored::Colorize;

use alg_tools::iter::Mappable;
use alg_tools::error::NumericalError;
use alg_tools::iterate::{
    AlgIteratorFactory,
    AlgIteratorState,
    AlgIteratorOptions,
    Verbose,
    Step,
};
use alg_tools::linops::{GEMV, Adjointable, AXPY};
use alg_tools::nalgebra_support::ToNalgebraRealField;
use alg_tools::norms::{Projection, Linfinity};
use alg_tools::euclidean::Euclidean;

use crate::types::*;

/// Method for solving finite-dimensional subproblems
#[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)]
#[allow(dead_code)]
pub enum InnerMethod {
    /// Forward-backward
    FB,
    /// Semismooth Newton
    SSN,
}

/// Settings for the solution of finite-dimensional subproblems
#[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)]
pub struct InnerSettings<F : Float> {
    /// Method
    pub method : InnerMethod,
    /// Proportional step length (∈ [0, 1) for `InnerMethod::FB`).
    pub τ0 : F,
    /// Fraction of `tolerance` given to inner algorithm
    pub tolerance_mult : F,
    /// Iterator options
    #[serde(flatten)]
    pub iterator_options : AlgIteratorOptions,
}

#[replace_float_literals(F::cast_from(literal))]
impl<F : Float> Default for InnerSettings<F> {
    fn default() -> Self {
        InnerSettings {
            τ0 : 0.99,
            iterator_options : AlgIteratorOptions {
                // max_iter cannot be very small, as initially FB needs many iterations, although
                // on later invocations even one or two tends to be enough
                max_iter : 2000,
                // verbose_iter affects testing of sufficient convergence, so we set it to
                // a small value…
                verbose_iter : Verbose::Every(1),
                // … but don't print out anything
                quiet : true,
                .. Default::default()
            },
            method : InnerMethod::FB,
            tolerance_mult : 0.01,
        }
    }
}

/// Compute the proximal operator of $x \mapsto x + \delta\_{[0, \infty)}$, i.e.,
/// the non-negativity contrained soft-thresholding operator.
#[inline]
#[replace_float_literals(F::cast_from(literal))]
fn nonneg_soft_thresholding<F : Float>(v : F, λ : F) -> F {
    (v - λ).max(0.0)
}

/// Forward-backward splitting implementation of [`quadratic_nonneg`].
/// For detailed documentation of the inputs and outputs, refer to there.
///
/// The `λ` component of the model is handled in the proximal step instead of the gradient step
/// for potential performance improvements.
#[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
pub fn quadratic_nonneg_fb<F, I>(
    mA : &DMatrix<F::MixedType>,
    g : &DVector<F::MixedType>,
    //c_ : F,
    λ_ : F,
    x : &mut DVector<F::MixedType>,
    τ_ : F,
    iterator : I
) -> usize
where F : Float + ToNalgebraRealField,
      I : AlgIteratorFactory<F>
{
    let mut xprev = x.clone();
    //let c = c_.to_nalgebra_mixed();
    let λ = λ_.to_nalgebra_mixed();
    let τ = τ_.to_nalgebra_mixed();
    let τλ = τ * λ;
    let mut v = DVector::zeros(x.len());
    let mut iters = 0;

    assert!(λ > 0.0);

    iterator.iterate(|state| {
        // Replace `x` with $x - τ[Ax-g]= [x + τg]- τAx$
        v.copy_from(g);             // v = g
        v.axpy(1.0, x, τ);          // v = x + τ*g
        v.sygemv(-τ, mA, x, 1.0);   // v = [x + τg]- τAx
        let backup = state.if_verbose(|| {
            xprev.copy_from(x)
        });
        // Calculate the proximal map
        x.iter_mut().zip(v.iter()).for_each(|(x_i, &v_i)| {
            *x_i = nonneg_soft_thresholding(v_i, τλ);
        });

        iters +=1;

        backup.map(|_| {
            // The subdifferential of the objective is $Ax - g + λ + ∂ δ_{≥ 0}(x)$.
            // We return the minimal ∞-norm over all subderivatives.
            v.copy_from(g);                  // d = g
            mA.gemv(&mut v, 1.0, x, -1.0);   // d =  Ax - g
            let mut val = 0.0;
            for (&v_i, &x_i) in izip!(v.iter(), x.iter()) {
                let d = v_i + λ;
                if x_i > 0.0 || d < 0.0 {
                    val = val.max(d.abs());
                }
            }
            F::from_nalgebra_mixed(val)
        })
    });

    iters
}

/// Semismooth Newton implementation of [`quadratic_nonneg`].
///
/// For detailed documentation of the inputs, refer to there.
/// This function returns the number of iterations taken if there was no inversion failure,
///
/// ## Method derivation
///
/// **The below may look like garbage. Sorry, but rustdoc is obsolete rubbish
/// that doesn't directly support by-now standard-in-markdown LaTeX math. Instead it
/// forces one into unreliable KaTeX autorender postprocessing  andescape hell and that
/// it doesn't even process correctly.**
///
/// <p>
/// For the objective
/// $$
///     J(x) = \frac{1}{2} x^⊤Ax - g^⊤ x + λ{\vec 1}^⊤ x + c + δ_{≥ 0}(x),
/// $$
/// we have the optimality condition
/// $$
///     x - \mathop{\mathrm{prox}}_{τλ{\vec 1}^⊤ + δ_{≥ 0}}(x - τ[Ax-g^⊤]) = 0,
/// $$
/// which we write as
/// $$
///     x - [G ∘ F](x)=0
/// $$
/// for
/// $$
///     G(x) = \mathop{\mathrm{prox}}_{λ{\vec 1}^⊤ + δ_{≥ 0}}
///     \quad\text{and}\quad
///     F(x) = x - τ Ax + τ g^⊤
/// $$
/// We can use Newton derivative chain rule to compute
/// $D_N[G ∘ F](x) = D_N G(F(x)) D_N F(x)$, where
/// $D_N F(x) = \mathop{\mathrm{Id}} - τ A$,
/// and $[D_N G(F(x))]_i = 1$ for inactive coordinates and $=0$ for active coordinates.
/// </p>
///
/// <p>
/// The method itself involves solving $D_N[Id - G ∘ F](x^k) s^k = - [Id - G ∘ F](x^k)$ and
/// updating $x^{k+1} = x^k + s^k$. Consequently
/// $$
///     s^k - D_N G(F(x^k)) [s^k - τ As^k] = - x^k + [G ∘ F](x^k)
/// $$
/// For $𝒜$ the set of active coordinates and $ℐ$ the set of inactive coordinates, this
/// expands as
/// $$
///     [τ A_{ℐ × ℐ}]s^k_ℐ = - x^k_ℐ + [G ∘ F](x^k)_ℐ - [τ A_{ℐ × 𝒜}]s^k_𝒜
/// $$
/// and
/// $$
///     s^k_𝒜 = - x^k_𝒜 + [G ∘ F](x^k)_𝒜.
/// $$
/// Thus on $𝒜$ the update $[x^k + s^k]_𝒜 = [G ∘ F](x^k)_𝒜$ is just the forward-backward update.
/// </p>
///
/// <p>
/// We need to detect stopping by a subdifferential and return $x$ satisfying $x ≥ 0$,
/// which is in general not true for the SSN. We therefore use that $[G ∘ F](x^k)$ is a valid
/// forward-backward step.
/// </p>
#[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
pub fn quadratic_nonneg_ssn<F, I>(
    mA : &DMatrix<F::MixedType>,
    g : &DVector<F::MixedType>,
    //c_ : F,
    λ_ : F,
    x : &mut DVector<F::MixedType>,
    τ_ : F,
    iterator : I
) -> Result<usize, NumericalError>
where F : Float + ToNalgebraRealField,
      I : AlgIteratorFactory<F>
{
    let n = x.len();
    let mut xprev = x.clone();
    let mut v = DVector::zeros(n);
    //let c = c_.to_nalgebra_mixed();
    let λ = λ_.to_nalgebra_mixed();
    let τ = τ_.to_nalgebra_mixed();
    let τλ = τ * λ;
    let mut inact : Vec<bool> = Vec::from_iter(std::iter::repeat(false).take(n));
    let mut s = DVector::zeros(0);
    let mut decomp = nalgebra::linalg::LU::new(DMatrix::zeros(0, 0));
    let mut iters = 0;

    assert!(λ > 0.0);

    let res = iterator.iterate_fallible(|state| {
        // 1. Perform delayed SSN-update based on previously computed step on active
        // coordinates. The step is delayed to the beginning of the loop because
        // the SSN step may violate constraints, so we arrange `x` to contain at the
        // end of the loop the valid FB step that forms part of the SSN step
        let mut si = s.iter();
        for (&ast, x_i, xprev_i) in izip!(inact.iter(), x.iter_mut(), xprev.iter_mut()) {
            if ast {
                *x_i = *xprev_i + *si.next().unwrap()
            }
            *xprev_i = *x_i;
        }

        //xprev.copy_from(x);

        // 2. Calculate FB step.
        // 2.1. Replace `x` with $x⁻ - τ[Ax⁻-g]= [x⁻ + τg]- τAx⁻$
        x.axpy(τ, g, 1.0);               // x = x⁻ + τ*g
        x.sygemv(-τ, mA, &xprev, 1.0);   // x = [x⁻ + τg]- τAx⁻
        // 2.2. Calculate prox and set of active coordinates at the same time
        let mut act_changed = false;
        let mut n_inact = 0;
        for (x_i, ast) in izip!(x.iter_mut(), inact.iter_mut()) {
            if *x_i > τλ {
                *x_i -= τλ;
                if !*ast {
                    act_changed = true;
                    *ast = true;
                }
                n_inact += 1;
            } else {
                *x_i = 0.0;
                if *ast {
                    act_changed = true;
                    *ast = false;
                }
            }
        }

        // *** x now contains forward-backward step ***

        // 3. Solve SSN step `s`.
        // 3.1 Construct [τ A_{ℐ × ℐ}] if the set of inactive coordinates has changed.
        if act_changed {
            let decomp_iter = inact.iter().cartesian_product(inact.iter()).zip(mA.iter());
            let decomp_constr = decomp_iter.filter_map(|((&i_inact, &j_inact), &mAij)| {
                //(i_inact && j_inact).then_some(mAij * τ)
                (i_inact && j_inact).then_some(mAij) // 🔺 below matches removal of τ
            });
            let mat = DMatrix::from_iterator(n_inact, n_inact, decomp_constr);
            decomp = nalgebra::linalg::LU::new(mat);
        }

        // 3.2 Solve `s` = $s_ℐ^k$ from
        // $[τ A_{ℐ × ℐ}]s^k_ℐ = - x^k_ℐ + [G ∘ F](x^k)_ℐ - [τ A_{ℐ × 𝒜}]s^k_𝒜$.
        // With current variable setup we have $[G ∘ F](x^k) = $`x` and $x^k = x⁻$ = `xprev`,
        // so the system to solve is $[τ A_{ℐ × ℐ}]s^k_ℐ = (x-x⁻)_ℐ  - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$
        // The matrix $[τ A_{ℐ × ℐ}]$ we have already LU-decomposed above into `decomp`.
        s = if n_inact > 0 {
            // 3.2.1 Construct `rhs` = $(x-x⁻)_ℐ  - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$
            let inactfilt = inact.iter().copied();
            let rhs_iter = izip!(x.iter(), xprev.iter(), mA.row_iter()).filter_zip(inactfilt);
            let rhs_constr = rhs_iter.map(|(&x_i, &xprev_i, mAi)| {
                // Calculate row i of [τ A_{ℐ × 𝒜}]s^k_𝒜 = [τ A_{ℐ × 𝒜}](x-xprev)_𝒜
                let actfilt = inact.iter().copied().map(std::ops::Not::not);
                let actit = izip!(x.iter(), xprev.iter(), mAi.iter()).filter_zip(actfilt);
                let actpart = actit.map(|(&x_j, &xprev_j, &mAij)| {
                    mAij * (x_j - xprev_j)
                }).sum();
                // Subtract it from [x-prev]_i
                //x_i - xprev_i - τ * actpart
                (x_i - xprev_i) / τ - actpart // 🔺 change matches removal of τ above
            });
            let mut rhs = DVector::from_iterator(n_inact, rhs_constr);
            assert_eq!(rhs.len(), n_inact);
            // Solve the system
            if !decomp.solve_mut(&mut rhs) {
                return Step::Failure(NumericalError(
                    "Failed to solve linear system for subproblem SSN."
                ))
            }
            rhs
        } else {
            DVector::zeros(0)
        };

        iters += 1;

        // 4. Report solution quality
        state.if_verbose(|| {
            // Calculate subdifferential at the FB step `x` that hasn't yet had `s` yet added.
            // The subdifferential of the objective is $Ax - g + λ + ∂ δ_{≥ 0}(x)$.
            // We return the minimal ∞-norm over all subderivatives.
            v.copy_from(g);                  // d = g
            mA.gemv(&mut v, 1.0, x, -1.0);   // d =  Ax - g
            let mut val = 0.0;
            for (&v_i, &x_i) in izip!(v.iter(), x.iter()) {
                let d = v_i + λ;
                if x_i > 0.0 || d < 0.0 {
                    val = val.max(d.abs());
                }
            }
            F::from_nalgebra_mixed(val)
        })
    });

    res.map(|_| iters)
}

/// This function applies an iterative method for the solution of the quadratic non-negativity
/// constrained problem
/// <div>$$
///     \min_{x ∈ ℝ^n} \frac{1}{2} x^⊤Ax - g^⊤ x + λ{\vec 1}^⊤ x + c + δ_{≥ 0}(x).
/// $$</div>
/// Semismooth Newton or forward-backward are supported based on the setting in `method`.
/// The parameter `mA` is matrix $A$, and `g` and `λ` are as in the mathematical formulation.
/// The constant $c$ does not need to be provided. The step length parameter is `τ` while
/// `x` contains the initial iterate and on return the final one. The `iterator` controls
/// stopping. The “verbose” value output by all methods is the $ℓ\_∞$ distance of some
/// subdifferential of the objective to zero.
///
/// Interior point methods could offer a further alternative, for example, the one in:
///
///  * Valkonen T. - _A method for weighted projections to the positive definite
///    cone_, <https://doi.org/10.1080/02331934.2014.929680>.
///
/// This function returns the number of iterations taken.
pub fn quadratic_nonneg<F, I>(
    method : InnerMethod,
    mA : &DMatrix<F::MixedType>,
    g : &DVector<F::MixedType>,
    //c_ : F,
    λ : F,
    x : &mut DVector<F::MixedType>,
    τ : F,
    iterator : I
) -> usize
where F : Float + ToNalgebraRealField,
      I : AlgIteratorFactory<F>
{
    
    match method {
        InnerMethod::FB =>
            quadratic_nonneg_fb(mA, g, λ, x, τ, iterator),
        InnerMethod::SSN =>
            quadratic_nonneg_ssn(mA, g, λ, x, τ, iterator).unwrap_or_else(|e| {
                println!("{}", format!("{e}. Using FB fallback.").red());
                let ins = InnerSettings::<F>::default();
                quadratic_nonneg_fb(mA, g, λ, x, τ, ins.iterator_options)
            })
    }
}

/// PDPSimplementation of [`l1_nonneg`].
/// For detailed documentation of the inputs and outputs, refer to there.
#[replace_float_literals(F::cast_from(literal))]
pub fn l1_nonneg_pdps<F, I, A, Y>(
    mA : &A,
    b : &Y,
    λ : F,
    x : &mut DVector<F>,
    τ : F,
    σ : F,
    iterator : I
) -> usize
where F : Float + ToNalgebraRealField,
      I : AlgIteratorFactory<F>,
      Y : Euclidean<F, Output = Y> + Projection<F, Linfinity> + AXPY<F>,
      A : GEMV<F, DVector<F>, Codomain=Y>
          + Adjointable<DVector<F>, Y>,
      for<'a> A::Adjoint<'a> : GEMV<F, Y, Codomain=DVector<F>>
{
    let mAt = mA.adjoint();
    let mut xprev = x.clone();
    let τλ = τ * λ;
    let mut v = DVector::zeros(x.len());
    let mut y = b.similar_origin();
    let mut iters = 0;

    assert!(λ > 0.0);

    iterator.iterate(|state| {
        // Replace `x` with $x - τA^*y
        xprev.copy_from(x);
        mAt.gemv(x, -τ, &y, 1.0);
        // Calculate the proximal map
        x.iter_mut().for_each(|x_i| *x_i = nonneg_soft_thresholding(*x_i, τλ));
        // Calculate v = 2x - xprev
        v.copy_from(x);
        v.axpy(-1.0, &xprev, 2.0);
        // Calculate y = y + σ(A v - b)
        y.axpy(-σ, b, 1.0);
        mA.gemv(&mut y, σ, &v, 1.0);
        y.proj_ball_mut(1.0, Linfinity);

        iters +=1;

        state.if_verbose(|| {
            // The subdifferential of the objective is $A^* ∂\|.\|(Ax - g) + λ + ∂δ_{≥ 0}(x)$.
            // We return the minimal ∞-norm over all subderivatives.
            v.copy_from(b);                  // d = g
            mA.gemv(&mut ỹ, 1.0, x, -1.0);   // d =  Ax - b
            let mut val = 0.0;
            izip!(ỹ.iter(), x.iter()).for_each(|(&ỹ_i, &x_i)| {
                use std::cmp::Ordering::*;
                let tmp = match ỹ_i.partial_cmp(&0.0) {
                    None => F::INFINITY,
                    Some(c) => match (c, x_i > 0.0) {
                        (Greater, true) => 1.0 + λ,
                        (Greater, false) | (Equal, false) => 0.0,
                        (Less, true) => -1.0 + λ,
                        (Less, false) => 0.0.min(-1.0 + λ),
                        (Equal, true) => 0.0.max(-1.0 + λ), // since λ > 0
                    }
                };
                val = val.max(tmp);
            });
            val
        })
    });

    iters
}

/// Method for L1 weight optimisation
#[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)]
#[allow(dead_code)]
pub enum L1InnerMethod {
    /// PDPS
    PDPS,
}

/// This function applies an iterative method for the solution of the L1 non-negativity
/// constrained problem
/// <div>$$
///     \min_{x ∈ ℝ^n} \|Ax - b\|_1 + λ{\vec 1}^⊤ x + δ_{≥ 0}(x).
/// $$</div>
///
/// This function returns the number of iterations taken.
pub fn l1_nonneg<F, I>(
    method : L1InnerMethod,
    mA : &DMatrix<F::MixedType>,
    b : &DVector<F::MixedType>,
    λ : F,
    σ : F,
    x : &mut DVector<F::MixedType>,
    τ : F,
    iterator : I
) -> usize
where F : Float + ToNalgebraRealField,
      I : AlgIteratorFactory<F> {

    match method {
        L1InnerMethod::PDPS => l1_nonneg_pdps(mA, b, λ, x, τ, σ, iterator)
    }
}