src/subproblem/unconstrained.rs

Sun, 11 Dec 2022 23:25:53 +0200

author
Tuomo Valkonen <tuomov@iki.fi>
date
Sun, 11 Dec 2022 23:25:53 +0200
changeset 24
d29d1fcf5423
permissions
-rw-r--r--

Support arbitrary regularisation terms; implement non-positivity-constrained regularisation.

* Fixes the conditional gradient methods that were incorrectly solving
positivity constrained subproblems although the infinite-dimensional
versions do not include such constraints.

* Introduces the `ExperimentV2` struct that has `regularisation` in place
of `α`. The `Experiment` struct is now deprecated.

* The L^2-squared experiments were switch to be unconstrained, as the
Franke-Wolfe implementations do not support constraints. (This would
be easy to add for the “fully corrective” variant, but is not
immediate for the “relaxed” variant.)

/*!
Iterative algorithms for solving the finite-dimensional subproblem without constraints.
*/

use nalgebra::{DVector, DMatrix};
use numeric_literals::replace_float_literals;
use itertools::{izip, Itertools};
use colored::Colorize;
use std::cmp::Ordering::*;

use alg_tools::iter::Mappable;
use alg_tools::error::NumericalError;
use alg_tools::iterate::{
    AlgIteratorFactory,
    AlgIteratorState,
    Step,
};
use alg_tools::linops::GEMV;
use alg_tools::nalgebra_support::ToNalgebraRealField;

use crate::types::*;
use super::{
    InnerMethod,
    InnerSettings
};

/// Compute the proximal operator of $x \mapsto |x|$, i.e., the soft-thresholding operator.
#[inline]
#[replace_float_literals(F::cast_from(literal))]
fn soft_thresholding<F : Float>(v : F, λ : F) -> F {
    if v > λ {
        v - λ
    } else if v < -λ {
        v + λ
    } else {
        0.0
    }
}

/// Returns the ∞-norm minimal subdifferential of $x ↦  x^⊤Ax - g^⊤ x + λ\|x\|₁$ at $x$.
///
/// `v` will be modified and cannot be trusted to contain useful values afterwards.
#[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
fn min_subdifferential<F : Float + ToNalgebraRealField>(
    v : &mut DVector<F::MixedType>,
    mA : &DMatrix<F::MixedType>,
    x : &DVector<F::MixedType>,
    g : &DVector<F::MixedType>,
    λ : F::MixedType
) -> F {
    v.copy_from(g);
    mA.gemv(v, 1.0, x, -1.0);   // v =  Ax - g
    let mut val = 0.0;
    for (&v_i, &x_i) in izip!(v.iter(), x.iter()) {
        // The subdifferential at x is $Ax - g + λ ∂‖·‖₁(x)$.
        val = val.max(match x_i.partial_cmp(&0.0) {
            Some(Greater) => v_i + λ,
            Some(Less) => v_i - λ,
            Some(Equal) => soft_thresholding(v_i, λ),
            None => F::MixedType::nan(),
        })
    }
    F::from_nalgebra_mixed(val)
}


/// Forward-backward splitting implementation of [`quadratic_unconstrained`].
/// For detailed documentation of the inputs and outputs, refer to there.
///
/// The `λ` component of the model is handled in the proximal step instead of the gradient step
/// for potential performance improvements.
#[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
pub fn quadratic_unconstrained_fb<F, I>(
    mA : &DMatrix<F::MixedType>,
    g : &DVector<F::MixedType>,
    //c_ : F,
    λ_ : F,
    x : &mut DVector<F::MixedType>,
    τ_ : F,
    iterator : I
) -> usize
where F : Float + ToNalgebraRealField,
      I : AlgIteratorFactory<F>
{
    let mut xprev = x.clone();
    //let c = c_.to_nalgebra_mixed();
    let λ = λ_.to_nalgebra_mixed();
    let τ = τ_.to_nalgebra_mixed();
    let τλ = τ * λ;
    let mut v = DVector::zeros(x.len());
    let mut iters = 0;

    iterator.iterate(|state| {
        // Replace `x` with $x - τ[Ax-g]= [x + τg]- τAx$
        v.copy_from(g);             // v = g
        v.axpy(1.0, x, τ);          // v = x + τ*g
        v.sygemv(-τ, mA, x, 1.0);   // v = [x + τg]- τAx
        let backup = state.if_verbose(|| {
            xprev.copy_from(x)
        });
        // Calculate the proximal map
        x.iter_mut().zip(v.iter()).for_each(|(x_i, &v_i)| {
            *x_i = soft_thresholding(v_i, τλ);
        });

        iters +=1;

        backup.map(|_| {
            min_subdifferential(&mut v, mA, x, g, λ)
        })
    });

    iters
}

/// Semismooth Newton implementation of [`quadratic_unconstrained`].
///
/// For detailed documentation of the inputs, refer to there.
/// This function returns the number of iterations taken if there was no inversion failure,
///
/// For method derivarion, see the documentation for [`super::nonneg::quadratic_nonneg`].
#[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
pub fn quadratic_unconstrained_ssn<F, I>(
    mA : &DMatrix<F::MixedType>,
    g : &DVector<F::MixedType>,
    //c_ : F,
    λ_ : F,
    x : &mut DVector<F::MixedType>,
    τ_ : F,
    iterator : I
) -> Result<usize, NumericalError>
where F : Float + ToNalgebraRealField,
      I : AlgIteratorFactory<F>
{
    let n = x.len();
    let mut xprev = x.clone();
    let mut v = DVector::zeros(n);
    //let c = c_.to_nalgebra_mixed();
    let λ = λ_.to_nalgebra_mixed();
    let τ = τ_.to_nalgebra_mixed();
    let τλ = τ * λ;
    let mut inact : Vec<bool> = Vec::from_iter(std::iter::repeat(false).take(n));
    let mut s = DVector::zeros(0);
    let mut decomp = nalgebra::linalg::LU::new(DMatrix::zeros(0, 0));
    let mut iters = 0;

    let res = iterator.iterate_fallible(|state| {
        // 1. Perform delayed SSN-update based on previously computed step on active
        // coordinates. The step is delayed to the beginning of the loop because
        // the SSN step may violate constraints, so we arrange `x` to contain at the
        // end of the loop the valid FB step that forms part of the SSN step
        let mut si = s.iter();
        for (&ast, x_i, xprev_i) in izip!(inact.iter(), x.iter_mut(), xprev.iter_mut()) {
            if ast {
                *x_i = *xprev_i + *si.next().unwrap()
            }
            *xprev_i = *x_i;
        }

        //xprev.copy_from(x);

        // 2. Calculate FB step.
        // 2.1. Replace `x` with $x⁻ - τ[Ax⁻-g]= [x⁻ + τg]- τAx⁻$
        x.axpy(τ, g, 1.0);               // x = x⁻ + τ*g
        x.sygemv(-τ, mA, &xprev, 1.0);   // x = [x⁻ + τg]- τAx⁻
        // 2.2. Calculate prox and set of active coordinates at the same time
        let mut act_changed = false;
        let mut n_inact = 0;
        for (x_i, ast) in izip!(x.iter_mut(), inact.iter_mut()) {
            if *x_i > τλ {
                *x_i -= τλ;
                if !*ast {
                    act_changed = true;
                    *ast = true;
                }
                n_inact += 1;
            } else if *x_i < -τλ {
                *x_i += τλ;
                if !*ast {
                    act_changed = true;
                    *ast = true;
                }
                n_inact += 1;
            } else {
                *x_i = 0.0;
                if *ast {
                    act_changed = true;
                    *ast = false;
                }
            }
        }

        // *** x now contains forward-backward step ***

        // 3. Solve SSN step `s`.
        // 3.1 Construct [τ A_{ℐ × ℐ}] if the set of inactive coordinates has changed.
        if act_changed {
            let decomp_iter = inact.iter().cartesian_product(inact.iter()).zip(mA.iter());
            let decomp_constr = decomp_iter.filter_map(|((&i_inact, &j_inact), &mAij)| {
                //(i_inact && j_inact).then_some(mAij * τ)
                (i_inact && j_inact).then_some(mAij) // 🔺 below matches removal of τ
            });
            let mat = DMatrix::from_iterator(n_inact, n_inact, decomp_constr);
            decomp = nalgebra::linalg::LU::new(mat);
        }

        // 3.2 Solve `s` = $s_ℐ^k$ from
        // $[τ A_{ℐ × ℐ}]s^k_ℐ = - x^k_ℐ + [G ∘ F](x^k)_ℐ - [τ A_{ℐ × 𝒜}]s^k_𝒜$.
        // With current variable setup we have $[G ∘ F](x^k) = $`x` and $x^k = x⁻$ = `xprev`,
        // so the system to solve is $[τ A_{ℐ × ℐ}]s^k_ℐ = (x-x⁻)_ℐ  - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$
        // The matrix $[τ A_{ℐ × ℐ}]$ we have already LU-decomposed above into `decomp`.
        s = if n_inact > 0 {
            // 3.2.1 Construct `rhs` = $(x-x⁻)_ℐ  - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$
            let inactfilt = inact.iter().copied();
            let rhs_iter = izip!(x.iter(), xprev.iter(), mA.row_iter()).filter_zip(inactfilt);
            let rhs_constr = rhs_iter.map(|(&x_i, &xprev_i, mAi)| {
                // Calculate row i of [τ A_{ℐ × 𝒜}]s^k_𝒜 = [τ A_{ℐ × 𝒜}](x-xprev)_𝒜
                let actfilt = inact.iter().copied().map(std::ops::Not::not);
                let actit = izip!(x.iter(), xprev.iter(), mAi.iter()).filter_zip(actfilt);
                let actpart = actit.map(|(&x_j, &xprev_j, &mAij)| {
                    mAij * (x_j - xprev_j)
                }).sum();
                // Subtract it from [x-prev]_i
                //x_i - xprev_i - τ * actpart
                (x_i - xprev_i) / τ - actpart // 🔺 change matches removal of τ above
            });
            let mut rhs = DVector::from_iterator(n_inact, rhs_constr);
            assert_eq!(rhs.len(), n_inact);
            // Solve the system
            if !decomp.solve_mut(&mut rhs) {
                return Step::Failure(NumericalError(
                    "Failed to solve linear system for subproblem SSN."
                ))
            }
            rhs
        } else {
            DVector::zeros(0)
        };

        iters += 1;

        // 4. Report solution quality
        state.if_verbose(|| {
            // Calculate subdifferential at the FB step `x` that hasn't yet had `s` yet added.
            min_subdifferential(&mut v, mA, x, g, λ)
        })
    });

    res.map(|_| iters)
}

/// This function applies an iterative method for the solution of the problem
/// <div>$$
///     \min_{x ∈ ℝ^n} \frac{1}{2} x^⊤Ax - g^⊤ x + λ\|x\|₁ + c.
/// $$</div>
/// Semismooth Newton or forward-backward are supported based on the setting in `method`.
/// The parameter `mA` is matrix $A$, and `g` and `λ` are as in the mathematical formulation.
/// The constant $c$ does not need to be provided. The step length parameter is `τ` while
/// `x` contains the initial iterate and on return the final one. The `iterator` controls
/// stopping. The “verbose” value output by all methods is the $ℓ\_∞$ distance of some
/// subdifferential of the objective to zero.
///
/// This function returns the number of iterations taken.
pub fn quadratic_unconstrained<F, I>(
    method : InnerMethod,
    mA : &DMatrix<F::MixedType>,
    g : &DVector<F::MixedType>,
    //c_ : F,
    λ : F,
    x : &mut DVector<F::MixedType>,
    τ : F,
    iterator : I
) -> usize
where F : Float + ToNalgebraRealField,
      I : AlgIteratorFactory<F>
{
    
    match method {
        InnerMethod::FB =>
            quadratic_unconstrained_fb(mA, g, λ, x, τ, iterator),
        InnerMethod::SSN =>
            quadratic_unconstrained_ssn(mA, g, λ, x, τ, iterator).unwrap_or_else(|e| {
                println!("{}", format!("{e}. Using FB fallback.").red());
                let ins = InnerSettings::<F>::default();
                quadratic_unconstrained_fb(mA, g, λ, x, τ, ins.iterator_options)
            })
    }
}

mercurial