--- a/src/subproblem.rs Sun Dec 11 23:19:17 2022 +0200 +++ b/src/subproblem.rs Sun Dec 11 23:25:53 2022 +0200 @@ -1,25 +1,27 @@ -//! Iterative algorithms for solving finite-dimensional subproblems. +/*! +Iterative algorithms for solving the finite-dimensional subproblem. +*/ use serde::{Serialize, Deserialize}; -use nalgebra::{DVector, DMatrix}; use numeric_literals::replace_float_literals; -use itertools::{izip, Itertools}; -use colored::Colorize; - -use alg_tools::iter::Mappable; -use alg_tools::error::NumericalError; use alg_tools::iterate::{ - AlgIteratorFactory, - AlgIteratorState, AlgIteratorOptions, Verbose, - Step, + }; -use alg_tools::linops::GEMV; -use alg_tools::nalgebra_support::ToNalgebraRealField; use crate::types::*; +pub mod nonneg; +pub mod unconstrained; + +#[deprecated(since = "1.0.1", note = "Moved to submodule nonneg")] +pub use nonneg::{ + quadratic_nonneg, + quadratic_nonneg_ssn, + quadratic_nonneg_fb +}; + /// Method for solving finite-dimensional subproblems #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] #[allow(dead_code)] @@ -65,309 +67,3 @@ } } } - -/// Compute the proximal operator of $x \mapsto x + \delta\_{[0, \infty)}$, i.e., -/// the non-negativity contrained soft-thresholding operator. -#[inline] -#[replace_float_literals(F::cast_from(literal))] -fn nonneg_soft_thresholding<F : Float>(v : F, λ : F) -> F { - (v - λ).max(0.0) -} - -/// Forward-backward splitting implementation of [`quadratic_nonneg`]. -/// For detailed documentation of the inputs and outputs, refer to there. -/// -/// The `λ` component of the model is handled in the proximal step instead of the gradient step -/// for potential performance improvements. -#[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] -pub fn quadratic_nonneg_fb<F, I>( - mA : &DMatrix<F::MixedType>, - g : &DVector<F::MixedType>, - //c_ : F, - λ_ : F, - x : &mut DVector<F::MixedType>, - τ_ : F, - iterator : I -) -> usize -where F : Float + ToNalgebraRealField, - I : AlgIteratorFactory<F> -{ - let mut xprev = x.clone(); - //let c = c_.to_nalgebra_mixed(); - let λ = λ_.to_nalgebra_mixed(); - let τ = τ_.to_nalgebra_mixed(); - let τλ = τ * λ; - let mut v = DVector::zeros(x.len()); - let mut iters = 0; - - iterator.iterate(|state| { - // Replace `x` with $x - τ[Ax-g]= [x + τg]- τAx$ - v.copy_from(g); // v = g - v.axpy(1.0, x, τ); // v = x + τ*g - v.sygemv(-τ, mA, x, 1.0); // v = [x + τg]- τAx - let backup = state.if_verbose(|| { - xprev.copy_from(x) - }); - // Calculate the proximal map - x.iter_mut().zip(v.iter()).for_each(|(x_i, &v_i)| { - *x_i = nonneg_soft_thresholding(v_i, τλ); - }); - - iters +=1; - - backup.map(|_| { - // The subdifferential of the objective is $Ax - g + λ + ∂ δ_{≥ 0}(x)$. - // We return the minimal ∞-norm over all subderivatives. - v.copy_from(g); // d = g - mA.gemv(&mut v, 1.0, x, -1.0); // d = Ax - g - let mut val = 0.0; - for (&v_i, &x_i) in izip!(v.iter(), x.iter()) { - let d = v_i + λ; - if x_i > 0.0 || d < 0.0 { - val = val.max(d.abs()); - } - } - F::from_nalgebra_mixed(val) - }) - }); - - iters -} - -/// Semismooth Newton implementation of [`quadratic_nonneg`]. -/// -/// For detailed documentation of the inputs, refer to there. -/// This function returns the number of iterations taken if there was no inversion failure, -/// -/// ## Method derivation -/// -/// **The below may look like garbage. Sorry, but rustdoc is obsolete rubbish -/// that doesn't directly support by-now standard-in-markdown LaTeX math. Instead it -/// forces one into unreliable KaTeX autorender postprocessing andescape hell and that -/// it doesn't even process correctly.** -/// -/// <p> -/// For the objective -/// $$ -/// J(x) = \frac{1}{2} x^⊤Ax - g^⊤ x + λ{\vec 1}^⊤ x + c + δ_{≥ 0}(x), -/// $$ -/// we have the optimality condition -/// $$ -/// x - \mathop{\mathrm{prox}}_{τλ{\vec 1}^⊤ + δ_{≥ 0}}(x - τ[Ax-g^⊤]) = 0, -/// $$ -/// which we write as -/// $$ -/// x - [G ∘ F](x)=0 -/// $$ -/// for -/// $$ -/// G(x) = \mathop{\mathrm{prox}}_{λ{\vec 1}^⊤ + δ_{≥ 0}} -/// \quad\text{and}\quad -/// F(x) = x - τ Ax + τ g^⊤ -/// $$ -/// We can use Newton derivative chain rule to compute -/// $D_N[G ∘ F](x) = D_N G(F(x)) D_N F(x)$, where -/// $D_N F(x) = \mathop{\mathrm{Id}} - τ A$, -/// and $[D_N G(F(x))]_i = 1$ for inactive coordinates and $=0$ for active coordinates. -/// </p> -/// -/// <p> -/// The method itself involves solving $D_N[Id - G ∘ F](x^k) s^k = - [Id - G ∘ F](x^k)$ and -/// updating $x^{k+1} = x^k + s^k$. Consequently -/// $$ -/// s^k - D_N G(F(x^k)) [s^k - τ As^k] = - x^k + [G ∘ F](x^k) -/// $$ -/// For $𝒜$ the set of active coordinates and $ℐ$ the set of inactive coordinates, this -/// expands as -/// $$ -/// [τ A_{ℐ × ℐ}]s^k_ℐ = - x^k_ℐ + [G ∘ F](x^k)_ℐ - [τ A_{ℐ × 𝒜}]s^k_𝒜 -/// $$ -/// and -/// $$ -/// s^k_𝒜 = - x^k_𝒜 + [G ∘ F](x^k)_𝒜. -/// $$ -/// Thus on $𝒜$ the update $[x^k + s^k]_𝒜 = [G ∘ F](x^k)_𝒜$ is just the forward-backward update. -/// </p> -/// -/// <p> -/// We need to detect stopping by a subdifferential and return $x$ satisfying $x ≥ 0$, -/// which is in general not true for the SSN. We therefore use that $[G ∘ F](x^k)$ is a valid -/// forward-backward step. -/// </p> -#[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] -pub fn quadratic_nonneg_ssn<F, I>( - mA : &DMatrix<F::MixedType>, - g : &DVector<F::MixedType>, - //c_ : F, - λ_ : F, - x : &mut DVector<F::MixedType>, - τ_ : F, - iterator : I -) -> Result<usize, NumericalError> -where F : Float + ToNalgebraRealField, - I : AlgIteratorFactory<F> -{ - let n = x.len(); - let mut xprev = x.clone(); - let mut v = DVector::zeros(n); - //let c = c_.to_nalgebra_mixed(); - let λ = λ_.to_nalgebra_mixed(); - let τ = τ_.to_nalgebra_mixed(); - let τλ = τ * λ; - let mut inact : Vec<bool> = Vec::from_iter(std::iter::repeat(false).take(n)); - let mut s = DVector::zeros(0); - let mut decomp = nalgebra::linalg::LU::new(DMatrix::zeros(0, 0)); - let mut iters = 0; - - let res = iterator.iterate_fallible(|state| { - // 1. Perform delayed SSN-update based on previously computed step on active - // coordinates. The step is delayed to the beginning of the loop because - // the SSN step may violate constraints, so we arrange `x` to contain at the - // end of the loop the valid FB step that forms part of the SSN step - let mut si = s.iter(); - for (&ast, x_i, xprev_i) in izip!(inact.iter(), x.iter_mut(), xprev.iter_mut()) { - if ast { - *x_i = *xprev_i + *si.next().unwrap() - } - *xprev_i = *x_i; - } - - //xprev.copy_from(x); - - // 2. Calculate FB step. - // 2.1. Replace `x` with $x⁻ - τ[Ax⁻-g]= [x⁻ + τg]- τAx⁻$ - x.axpy(τ, g, 1.0); // x = x⁻ + τ*g - x.sygemv(-τ, mA, &xprev, 1.0); // x = [x⁻ + τg]- τAx⁻ - // 2.2. Calculate prox and set of active coordinates at the same time - let mut act_changed = false; - let mut n_inact = 0; - for (x_i, ast) in izip!(x.iter_mut(), inact.iter_mut()) { - if *x_i > τλ { - *x_i -= τλ; - if !*ast { - act_changed = true; - *ast = true; - } - n_inact += 1; - } else { - *x_i = 0.0; - if *ast { - act_changed = true; - *ast = false; - } - } - } - - // *** x now contains forward-backward step *** - - // 3. Solve SSN step `s`. - // 3.1 Construct [τ A_{ℐ × ℐ}] if the set of inactive coordinates has changed. - if act_changed { - let decomp_iter = inact.iter().cartesian_product(inact.iter()).zip(mA.iter()); - let decomp_constr = decomp_iter.filter_map(|((&i_inact, &j_inact), &mAij)| { - //(i_inact && j_inact).then_some(mAij * τ) - (i_inact && j_inact).then_some(mAij) // 🔺 below matches removal of τ - }); - let mat = DMatrix::from_iterator(n_inact, n_inact, decomp_constr); - decomp = nalgebra::linalg::LU::new(mat); - } - - // 3.2 Solve `s` = $s_ℐ^k$ from - // $[τ A_{ℐ × ℐ}]s^k_ℐ = - x^k_ℐ + [G ∘ F](x^k)_ℐ - [τ A_{ℐ × 𝒜}]s^k_𝒜$. - // With current variable setup we have $[G ∘ F](x^k) = $`x` and $x^k = x⁻$ = `xprev`, - // so the system to solve is $[τ A_{ℐ × ℐ}]s^k_ℐ = (x-x⁻)_ℐ - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$ - // The matrix $[τ A_{ℐ × ℐ}]$ we have already LU-decomposed above into `decomp`. - s = if n_inact > 0 { - // 3.2.1 Construct `rhs` = $(x-x⁻)_ℐ - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$ - let inactfilt = inact.iter().copied(); - let rhs_iter = izip!(x.iter(), xprev.iter(), mA.row_iter()).filter_zip(inactfilt); - let rhs_constr = rhs_iter.map(|(&x_i, &xprev_i, mAi)| { - // Calculate row i of [τ A_{ℐ × 𝒜}]s^k_𝒜 = [τ A_{ℐ × 𝒜}](x-xprev)_𝒜 - let actfilt = inact.iter().copied().map(std::ops::Not::not); - let actit = izip!(x.iter(), xprev.iter(), mAi.iter()).filter_zip(actfilt); - let actpart = actit.map(|(&x_j, &xprev_j, &mAij)| { - mAij * (x_j - xprev_j) - }).sum(); - // Subtract it from [x-prev]_i - //x_i - xprev_i - τ * actpart - (x_i - xprev_i) / τ - actpart // 🔺 change matches removal of τ above - }); - let mut rhs = DVector::from_iterator(n_inact, rhs_constr); - assert_eq!(rhs.len(), n_inact); - // Solve the system - if !decomp.solve_mut(&mut rhs) { - return Step::Failure(NumericalError( - "Failed to solve linear system for subproblem SSN." - )) - } - rhs - } else { - DVector::zeros(0) - }; - - iters += 1; - - // 4. Report solution quality - state.if_verbose(|| { - // Calculate subdifferential at the FB step `x` that hasn't yet had `s` yet added. - // The subdifferential of the objective is $Ax - g + λ + ∂ δ_{≥ 0}(x)$. - // We return the minimal ∞-norm over all subderivatives. - v.copy_from(g); // d = g - mA.gemv(&mut v, 1.0, x, -1.0); // d = Ax - g - let mut val = 0.0; - for (&v_i, &x_i) in izip!(v.iter(), x.iter()) { - let d = v_i + λ; - if x_i > 0.0 || d < 0.0 { - val = val.max(d.abs()); - } - } - F::from_nalgebra_mixed(val) - }) - }); - - res.map(|_| iters) -} - -/// This function applies an iterative method for the solution of the quadratic non-negativity -/// constrained problem -/// <div>$$ -/// \min_{x ∈ ℝ^n} \frac{1}{2} x^⊤Ax - g^⊤ x + λ{\vec 1}^⊤ x + c + δ_{≥ 0}(x). -/// $$</div> -/// Semismooth Newton or forward-backward are supported based on the setting in `method`. -/// The parameter `mA` is matrix $A$, and `g` and `λ` are as in the mathematical formulation. -/// The constant $c$ does not need to be provided. The step length parameter is `τ` while -/// `x` contains the initial iterate and on return the final one. The `iterator` controls -/// stopping. The “verbose” value output by all methods is the $ℓ\_∞$ distance of some -/// subdifferential of the objective to zero. -/// -/// Interior point methods could offer a further alternative, for example, the one in: -/// -/// * Valkonen T. - _A method for weighted projections to the positive definite -/// cone_, <https://doi.org/10.1080/02331934.2014.929680>. -/// -/// This function returns the number of iterations taken. -pub fn quadratic_nonneg<F, I>( - method : InnerMethod, - mA : &DMatrix<F::MixedType>, - g : &DVector<F::MixedType>, - //c_ : F, - λ : F, - x : &mut DVector<F::MixedType>, - τ : F, - iterator : I -) -> usize -where F : Float + ToNalgebraRealField, - I : AlgIteratorFactory<F> -{ - - match method { - InnerMethod::FB => - quadratic_nonneg_fb(mA, g, λ, x, τ, iterator), - InnerMethod::SSN => - quadratic_nonneg_ssn(mA, g, λ, x, τ, iterator).unwrap_or_else(|e| { - println!("{}", format!("{e}. Using FB fallback.").red()); - let ins = InnerSettings::<F>::default(); - quadratic_nonneg_fb(mA, g, λ, x, τ, ins.iterator_options) - }) - } -}