diff -r 000000000000 -r eb3c7813b67a src/subproblem.rs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/subproblem.rs Thu Dec 01 23:07:35 2022 +0200 @@ -0,0 +1,373 @@ +//! Iterative algorithms for solving finite-dimensional subproblems. + +use serde::{Serialize, Deserialize}; +use nalgebra::{DVector, DMatrix}; +use numeric_literals::replace_float_literals; +use itertools::{izip, Itertools}; +use colored::Colorize; + +use alg_tools::iter::Mappable; +use alg_tools::error::NumericalError; +use alg_tools::iterate::{ + AlgIteratorFactory, + AlgIteratorState, + AlgIteratorOptions, + Verbose, + Step, +}; +use alg_tools::linops::GEMV; +use alg_tools::nalgebra_support::ToNalgebraRealField; + +use crate::types::*; + +/// Method for solving finite-dimensional subproblems +#[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] +#[allow(dead_code)] +pub enum InnerMethod { + /// Forward-backward + FB, + /// Semismooth Newton + SSN, +} + +/// Settings for the solution of finite-dimensional subproblems +#[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] +pub struct InnerSettings { + /// Method + pub method : InnerMethod, + /// Proportional step length (∈ [0, 1) for `InnerMethod::FB`). + pub τ0 : F, + /// Fraction of `tolerance` given to inner algorithm + pub tolerance_mult : F, + /// Iterator options + #[serde(flatten)] + pub iterator_options : AlgIteratorOptions, +} + +#[replace_float_literals(F::cast_from(literal))] +impl Default for InnerSettings { + fn default() -> Self { + InnerSettings { + τ0 : 0.99, + iterator_options : AlgIteratorOptions { + // max_iter cannot be very small, as initially FB needs many iterations, although + // on later invocations even one or two tends to be enough + max_iter : 2000, + // verbose_iter affects testing of sufficient convergence, so we set it to + // a small value… + verbose_iter : Verbose::Every(1), + // … but don't print out anything + quiet : true, + .. Default::default() + }, + method : InnerMethod::FB, + tolerance_mult : 0.01, + } + } +} + +/// Compute the proximal operator of $x \mapsto x + \delta\_{[0, \infty)}$, i.e., +/// the non-negativity contrained soft-thresholding operator. +#[inline] +#[replace_float_literals(F::cast_from(literal))] +fn nonneg_soft_thresholding(v : F, λ : F) -> F { + (v - λ).max(0.0) +} + +/// Forward-backward splitting implementation of [`quadratic_nonneg`]. +/// For detailed documentation of the inputs and outputs, refer to there. +/// +/// The `λ` component of the model is handled in the proximal step instead of the gradient step +/// for potential performance improvements. +#[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] +pub fn quadratic_nonneg_fb( + mA : &DMatrix, + g : &DVector, + //c_ : F, + λ_ : F, + x : &mut DVector, + τ_ : F, + iterator : I +) -> usize +where F : Float + ToNalgebraRealField, + I : AlgIteratorFactory +{ + let mut xprev = x.clone(); + //let c = c_.to_nalgebra_mixed(); + let λ = λ_.to_nalgebra_mixed(); + let τ = τ_.to_nalgebra_mixed(); + let τλ = τ * λ; + let mut v = DVector::zeros(x.len()); + let mut iters = 0; + + iterator.iterate(|state| { + // Replace `x` with $x - τ[Ax-g]= [x + τg]- τAx$ + v.copy_from(g); // v = g + v.axpy(1.0, x, τ); // v = x + τ*g + v.sygemv(-τ, mA, x, 1.0); // v = [x + τg]- τAx + let backup = state.if_verbose(|| { + xprev.copy_from(x) + }); + // Calculate the proximal map + x.iter_mut().zip(v.iter()).for_each(|(x_i, &v_i)| { + *x_i = nonneg_soft_thresholding(v_i, τλ); + }); + + iters +=1; + + backup.map(|_| { + // The subdifferential of the objective is $Ax - g + λ + ∂ δ_{≥ 0}(x)$. + // We return the minimal ∞-norm over all subderivatives. + v.copy_from(g); // d = g + mA.gemv(&mut v, 1.0, x, -1.0); // d = Ax - g + let mut val = 0.0; + for (&v_i, &x_i) in izip!(v.iter(), x.iter()) { + let d = v_i + λ; + if x_i > 0.0 || d < 0.0 { + val = val.max(d.abs()); + } + } + F::from_nalgebra_mixed(val) + }) + }); + + iters +} + +/// Semismooth Newton implementation of [`quadratic_nonneg`]. +/// +/// For detailed documentation of the inputs, refer to there. +/// This function returns the number of iterations taken if there was no inversion failure, +/// +/// ## Method derivation +/// +/// **The below may look like garbage. Sorry, but rustdoc is obsolete rubbish +/// that doesn't directly support by-now standard-in-markdown LaTeX math. Instead it +/// forces one into unreliable KaTeX autorender postprocessing andescape hell and that +/// it doesn't even process correctly.** +/// +///

+/// For the objective +/// $$ +/// J(x) = \frac{1}{2} x^⊤Ax - g^⊤ x + λ{\vec 1}^⊤ x + c + δ_{≥ 0}(x), +/// $$ +/// we have the optimality condition +/// $$ +/// x - \mathop{\mathrm{prox}}_{τλ{\vec 1}^⊤ + δ_{≥ 0}}(x - τ[Ax-g^⊤]) = 0, +/// $$ +/// which we write as +/// $$ +/// x - [G ∘ F](x)=0 +/// $$ +/// for +/// $$ +/// G(x) = \mathop{\mathrm{prox}}_{λ{\vec 1}^⊤ + δ_{≥ 0}} +/// \quad\text{and}\quad +/// F(x) = x - τ Ax + τ g^⊤ +/// $$ +/// We can use Newton derivative chain rule to compute +/// $D_N[G ∘ F](x) = D_N G(F(x)) D_N F(x)$, where +/// $D_N F(x) = \mathop{\mathrm{Id}} - τ A$, +/// and $[D_N G(F(x))]_i = 1$ for inactive coordinates and $=0$ for active coordinates. +///

+/// +///

+/// The method itself involves solving $D_N[Id - G ∘ F](x^k) s^k = - [Id - G ∘ F](x^k)$ and +/// updating $x^{k+1} = x^k + s^k$. Consequently +/// $$ +/// s^k - D_N G(F(x^k)) [s^k - τ As^k] = - x^k + [G ∘ F](x^k) +/// $$ +/// For $𝒜$ the set of active coordinates and $ℐ$ the set of inactive coordinates, this +/// expands as +/// $$ +/// [τ A_{ℐ × ℐ}]s^k_ℐ = - x^k_ℐ + [G ∘ F](x^k)_ℐ - [τ A_{ℐ × 𝒜}]s^k_𝒜 +/// $$ +/// and +/// $$ +/// s^k_𝒜 = - x^k_𝒜 + [G ∘ F](x^k)_𝒜. +/// $$ +/// Thus on $𝒜$ the update $[x^k + s^k]_𝒜 = [G ∘ F](x^k)_𝒜$ is just the forward-backward update. +///

+/// +///

+/// We need to detect stopping by a subdifferential and return $x$ satisfying $x ≥ 0$, +/// which is in general not true for the SSN. We therefore use that $[G ∘ F](x^k)$ is a valid +/// forward-backward step. +///

+#[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] +pub fn quadratic_nonneg_ssn( + mA : &DMatrix, + g : &DVector, + //c_ : F, + λ_ : F, + x : &mut DVector, + τ_ : F, + iterator : I +) -> Result +where F : Float + ToNalgebraRealField, + I : AlgIteratorFactory +{ + let n = x.len(); + let mut xprev = x.clone(); + let mut v = DVector::zeros(n); + //let c = c_.to_nalgebra_mixed(); + let λ = λ_.to_nalgebra_mixed(); + let τ = τ_.to_nalgebra_mixed(); + let τλ = τ * λ; + let mut inact : Vec = Vec::from_iter(std::iter::repeat(false).take(n)); + let mut s = DVector::zeros(0); + let mut decomp = nalgebra::linalg::LU::new(DMatrix::zeros(0, 0)); + let mut iters = 0; + + let res = iterator.iterate_fallible(|state| { + // 1. Perform delayed SSN-update based on previously computed step on active + // coordinates. The step is delayed to the beginning of the loop because + // the SSN step may violate constraints, so we arrange `x` to contain at the + // end of the loop the valid FB step that forms part of the SSN step + let mut si = s.iter(); + for (&ast, x_i, xprev_i) in izip!(inact.iter(), x.iter_mut(), xprev.iter_mut()) { + if ast { + *x_i = *xprev_i + *si.next().unwrap() + } + *xprev_i = *x_i; + } + + //xprev.copy_from(x); + + // 2. Calculate FB step. + // 2.1. Replace `x` with $x⁻ - τ[Ax⁻-g]= [x⁻ + τg]- τAx⁻$ + x.axpy(τ, g, 1.0); // x = x⁻ + τ*g + x.sygemv(-τ, mA, &xprev, 1.0); // x = [x⁻ + τg]- τAx⁻ + // 2.2. Calculate prox and set of active coordinates at the same time + let mut act_changed = false; + let mut n_inact = 0; + for (x_i, ast) in izip!(x.iter_mut(), inact.iter_mut()) { + if *x_i > τλ { + *x_i -= τλ; + if !*ast { + act_changed = true; + *ast = true; + } + n_inact += 1; + } else { + *x_i = 0.0; + if *ast { + act_changed = true; + *ast = false; + } + } + } + + // *** x now contains forward-backward step *** + + // 3. Solve SSN step `s`. + // 3.1 Construct [τ A_{ℐ × ℐ}] if the set of inactive coordinates has changed. + if act_changed { + let decomp_iter = inact.iter().cartesian_product(inact.iter()).zip(mA.iter()); + let decomp_constr = decomp_iter.filter_map(|((&i_inact, &j_inact), &mAij)| { + //(i_inact && j_inact).then_some(mAij * τ) + (i_inact && j_inact).then_some(mAij) // 🔺 below matches removal of τ + }); + let mat = DMatrix::from_iterator(n_inact, n_inact, decomp_constr); + decomp = nalgebra::linalg::LU::new(mat); + } + + // 3.2 Solve `s` = $s_ℐ^k$ from + // $[τ A_{ℐ × ℐ}]s^k_ℐ = - x^k_ℐ + [G ∘ F](x^k)_ℐ - [τ A_{ℐ × 𝒜}]s^k_𝒜$. + // With current variable setup we have $[G ∘ F](x^k) = $`x` and $x^k = x⁻$ = `xprev`, + // so the system to solve is $[τ A_{ℐ × ℐ}]s^k_ℐ = (x-x⁻)_ℐ - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$ + // The matrix $[τ A_{ℐ × ℐ}]$ we have already LU-decomposed above into `decomp`. + s = if n_inact > 0 { + // 3.2.1 Construct `rhs` = $(x-x⁻)_ℐ - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$ + let inactfilt = inact.iter().copied(); + let rhs_iter = izip!(x.iter(), xprev.iter(), mA.row_iter()).filter_zip(inactfilt); + let rhs_constr = rhs_iter.map(|(&x_i, &xprev_i, mAi)| { + // Calculate row i of [τ A_{ℐ × 𝒜}]s^k_𝒜 = [τ A_{ℐ × 𝒜}](x-xprev)_𝒜 + let actfilt = inact.iter().copied().map(std::ops::Not::not); + let actit = izip!(x.iter(), xprev.iter(), mAi.iter()).filter_zip(actfilt); + let actpart = actit.map(|(&x_j, &xprev_j, &mAij)| { + mAij * (x_j - xprev_j) + }).sum(); + // Subtract it from [x-prev]_i + //x_i - xprev_i - τ * actpart + (x_i - xprev_i) / τ - actpart // 🔺 change matches removal of τ above + }); + let mut rhs = DVector::from_iterator(n_inact, rhs_constr); + assert_eq!(rhs.len(), n_inact); + // Solve the system + if !decomp.solve_mut(&mut rhs) { + return Step::Failure(NumericalError( + "Failed to solve linear system for subproblem SSN." + )) + } + rhs + } else { + DVector::zeros(0) + }; + + iters += 1; + + // 4. Report solution quality + state.if_verbose(|| { + // Calculate subdifferential at the FB step `x` that hasn't yet had `s` yet added. + // The subdifferential of the objective is $Ax - g + λ + ∂ δ_{≥ 0}(x)$. + // We return the minimal ∞-norm over all subderivatives. + v.copy_from(g); // d = g + mA.gemv(&mut v, 1.0, x, -1.0); // d = Ax - g + let mut val = 0.0; + for (&v_i, &x_i) in izip!(v.iter(), x.iter()) { + let d = v_i + λ; + if x_i > 0.0 || d < 0.0 { + val = val.max(d.abs()); + } + } + F::from_nalgebra_mixed(val) + }) + }); + + res.map(|_| iters) +} + +/// This function applies an iterative method for the solution of the quadratic non-negativity +/// constrained problem +///
$$ +/// \min_{x ∈ ℝ^n} \frac{1}{2} x^⊤Ax - g^⊤ x + λ{\vec 1}^⊤ x + c + δ_{≥ 0}(x). +/// $$
+/// Semismooth Newton or forward-backward are supported based on the setting in `method`. +/// The parameter `mA` is matrix $A$, and `g` and `λ` are as in the mathematical formulation. +/// The constant $c$ does not need to be provided. The step length parameter is `τ` while +/// `x` contains the initial iterate and on return the final one. The `iterator` controls +/// stopping. The “verbose” value output by all methods is the $ℓ\_∞$ distance of some +/// subdifferential of the objective to zero. +/// +/// Interior point methods could offer a further alternative, for example, the one in: +/// +/// * Valkonen T. - _A method for weighted projections to the positive definite +/// cone_, . +/// +/// This function returns the number of iterations taken. +pub fn quadratic_nonneg( + method : InnerMethod, + mA : &DMatrix, + g : &DVector, + //c_ : F, + λ : F, + x : &mut DVector, + τ : F, + iterator : I +) -> usize +where F : Float + ToNalgebraRealField, + I : AlgIteratorFactory +{ + + match method { + InnerMethod::FB => + quadratic_nonneg_fb(mA, g, λ, x, τ, iterator), + InnerMethod::SSN => + quadratic_nonneg_ssn(mA, g, λ, x, τ, iterator).unwrap_or_else(|e| { + println!("{}", format!("{e}. Using FB fallback.").red()); + let ins = InnerSettings::::default(); + quadratic_nonneg_fb(mA, g, λ, x, τ, ins.iterator_options) + }) + } +}