Tue, 21 Mar 2023 20:31:01 +0200
Implement non-negativity constraints for the conditional gradient methods
/*! Iterative algorithms for solving the finite-dimensional subproblem without constraints. */ use nalgebra::{DVector, DMatrix}; use numeric_literals::replace_float_literals; use itertools::{izip, Itertools}; use colored::Colorize; use std::cmp::Ordering::*; use alg_tools::iter::Mappable; use alg_tools::error::NumericalError; use alg_tools::iterate::{ AlgIteratorFactory, AlgIteratorState, Step, }; use alg_tools::linops::GEMV; use alg_tools::nalgebra_support::ToNalgebraRealField; use crate::types::*; use super::{ InnerMethod, InnerSettings }; /// Compute the proximal operator of $x \mapsto |x|$, i.e., the soft-thresholding operator. #[inline] #[replace_float_literals(F::cast_from(literal))] fn soft_thresholding<F : Float>(v : F, λ : F) -> F { if v > λ { v - λ } else if v < -λ { v + λ } else { 0.0 } } /// Returns the ∞-norm minimal subdifferential of $x ↦ x^⊤Ax - g^⊤ x + λ\|x\|₁$ at $x$. /// /// `v` will be modified and cannot be trusted to contain useful values afterwards. #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] fn min_subdifferential<F : Float + ToNalgebraRealField>( v : &mut DVector<F::MixedType>, mA : &DMatrix<F::MixedType>, x : &DVector<F::MixedType>, g : &DVector<F::MixedType>, λ : F::MixedType ) -> F { v.copy_from(g); mA.gemv(v, 1.0, x, -1.0); // v = Ax - g let mut val = 0.0; for (&v_i, &x_i) in izip!(v.iter(), x.iter()) { // The subdifferential at x is $Ax - g + λ ∂‖·‖₁(x)$. val = val.max(match x_i.partial_cmp(&0.0) { Some(Greater) => v_i + λ, Some(Less) => v_i - λ, Some(Equal) => soft_thresholding(v_i, λ), None => F::MixedType::nan(), }) } F::from_nalgebra_mixed(val) } /// Forward-backward splitting implementation of [`quadratic_unconstrained`]. /// For detailed documentation of the inputs and outputs, refer to there. /// /// The `λ` component of the model is handled in the proximal step instead of the gradient step /// for potential performance improvements. #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] pub fn quadratic_unconstrained_fb<F, I>( mA : &DMatrix<F::MixedType>, g : &DVector<F::MixedType>, //c_ : F, λ_ : F, x : &mut DVector<F::MixedType>, τ_ : F, iterator : I ) -> usize where F : Float + ToNalgebraRealField, I : AlgIteratorFactory<F> { let mut xprev = x.clone(); //let c = c_.to_nalgebra_mixed(); let λ = λ_.to_nalgebra_mixed(); let τ = τ_.to_nalgebra_mixed(); let τλ = τ * λ; let mut v = DVector::zeros(x.len()); let mut iters = 0; iterator.iterate(|state| { // Replace `x` with $x - τ[Ax-g]= [x + τg]- τAx$ v.copy_from(g); // v = g v.axpy(1.0, x, τ); // v = x + τ*g v.sygemv(-τ, mA, x, 1.0); // v = [x + τg]- τAx let backup = state.if_verbose(|| { xprev.copy_from(x) }); // Calculate the proximal map x.iter_mut().zip(v.iter()).for_each(|(x_i, &v_i)| { *x_i = soft_thresholding(v_i, τλ); }); iters +=1; backup.map(|_| { min_subdifferential(&mut v, mA, x, g, λ) }) }); iters } /// Semismooth Newton implementation of [`quadratic_unconstrained`]. /// /// For detailed documentation of the inputs, refer to there. /// This function returns the number of iterations taken if there was no inversion failure, /// /// For method derivarion, see the documentation for [`super::nonneg::quadratic_nonneg`]. #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] pub fn quadratic_unconstrained_ssn<F, I>( mA : &DMatrix<F::MixedType>, g : &DVector<F::MixedType>, //c_ : F, λ_ : F, x : &mut DVector<F::MixedType>, τ_ : F, iterator : I ) -> Result<usize, NumericalError> where F : Float + ToNalgebraRealField, I : AlgIteratorFactory<F> { let n = x.len(); let mut xprev = x.clone(); let mut v = DVector::zeros(n); //let c = c_.to_nalgebra_mixed(); let λ = λ_.to_nalgebra_mixed(); let τ = τ_.to_nalgebra_mixed(); let τλ = τ * λ; let mut inact : Vec<bool> = Vec::from_iter(std::iter::repeat(false).take(n)); let mut s = DVector::zeros(0); let mut decomp = nalgebra::linalg::LU::new(DMatrix::zeros(0, 0)); let mut iters = 0; let res = iterator.iterate_fallible(|state| { // 1. Perform delayed SSN-update based on previously computed step on active // coordinates. The step is delayed to the beginning of the loop because // the SSN step may violate constraints, so we arrange `x` to contain at the // end of the loop the valid FB step that forms part of the SSN step let mut si = s.iter(); for (&ast, x_i, xprev_i) in izip!(inact.iter(), x.iter_mut(), xprev.iter_mut()) { if ast { *x_i = *xprev_i + *si.next().unwrap() } *xprev_i = *x_i; } //xprev.copy_from(x); // 2. Calculate FB step. // 2.1. Replace `x` with $x⁻ - τ[Ax⁻-g]= [x⁻ + τg]- τAx⁻$ x.axpy(τ, g, 1.0); // x = x⁻ + τ*g x.sygemv(-τ, mA, &xprev, 1.0); // x = [x⁻ + τg]- τAx⁻ // 2.2. Calculate prox and set of active coordinates at the same time let mut act_changed = false; let mut n_inact = 0; for (x_i, ast) in izip!(x.iter_mut(), inact.iter_mut()) { if *x_i > τλ { *x_i -= τλ; if !*ast { act_changed = true; *ast = true; } n_inact += 1; } else if *x_i < -τλ { *x_i += τλ; if !*ast { act_changed = true; *ast = true; } n_inact += 1; } else { *x_i = 0.0; if *ast { act_changed = true; *ast = false; } } } // *** x now contains forward-backward step *** // 3. Solve SSN step `s`. // 3.1 Construct [τ A_{ℐ × ℐ}] if the set of inactive coordinates has changed. if act_changed { let decomp_iter = inact.iter().cartesian_product(inact.iter()).zip(mA.iter()); let decomp_constr = decomp_iter.filter_map(|((&i_inact, &j_inact), &mAij)| { //(i_inact && j_inact).then_some(mAij * τ) (i_inact && j_inact).then_some(mAij) // 🔺 below matches removal of τ }); let mat = DMatrix::from_iterator(n_inact, n_inact, decomp_constr); decomp = nalgebra::linalg::LU::new(mat); } // 3.2 Solve `s` = $s_ℐ^k$ from // $[τ A_{ℐ × ℐ}]s^k_ℐ = - x^k_ℐ + [G ∘ F](x^k)_ℐ - [τ A_{ℐ × 𝒜}]s^k_𝒜$. // With current variable setup we have $[G ∘ F](x^k) = $`x` and $x^k = x⁻$ = `xprev`, // so the system to solve is $[τ A_{ℐ × ℐ}]s^k_ℐ = (x-x⁻)_ℐ - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$ // The matrix $[τ A_{ℐ × ℐ}]$ we have already LU-decomposed above into `decomp`. s = if n_inact > 0 { // 3.2.1 Construct `rhs` = $(x-x⁻)_ℐ - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$ let inactfilt = inact.iter().copied(); let rhs_iter = izip!(x.iter(), xprev.iter(), mA.row_iter()).filter_zip(inactfilt); let rhs_constr = rhs_iter.map(|(&x_i, &xprev_i, mAi)| { // Calculate row i of [τ A_{ℐ × 𝒜}]s^k_𝒜 = [τ A_{ℐ × 𝒜}](x-xprev)_𝒜 let actfilt = inact.iter().copied().map(std::ops::Not::not); let actit = izip!(x.iter(), xprev.iter(), mAi.iter()).filter_zip(actfilt); let actpart = actit.map(|(&x_j, &xprev_j, &mAij)| { mAij * (x_j - xprev_j) }).sum(); // Subtract it from [x-prev]_i //x_i - xprev_i - τ * actpart (x_i - xprev_i) / τ - actpart // 🔺 change matches removal of τ above }); let mut rhs = DVector::from_iterator(n_inact, rhs_constr); assert_eq!(rhs.len(), n_inact); // Solve the system if !decomp.solve_mut(&mut rhs) { return Step::Failure(NumericalError( "Failed to solve linear system for subproblem SSN." )) } rhs } else { DVector::zeros(0) }; iters += 1; // 4. Report solution quality state.if_verbose(|| { // Calculate subdifferential at the FB step `x` that hasn't yet had `s` yet added. min_subdifferential(&mut v, mA, x, g, λ) }) }); res.map(|_| iters) } /// This function applies an iterative method for the solution of the problem /// <div>$$ /// \min_{x ∈ ℝ^n} \frac{1}{2} x^⊤Ax - g^⊤ x + λ\|x\|₁ + c. /// $$</div> /// Semismooth Newton or forward-backward are supported based on the setting in `method`. /// The parameter `mA` is matrix $A$, and `g` and `λ` are as in the mathematical formulation. /// The constant $c$ does not need to be provided. The step length parameter is `τ` while /// `x` contains the initial iterate and on return the final one. The `iterator` controls /// stopping. The “verbose” value output by all methods is the $ℓ\_∞$ distance of some /// subdifferential of the objective to zero. /// /// This function returns the number of iterations taken. pub fn quadratic_unconstrained<F, I>( method : InnerMethod, mA : &DMatrix<F::MixedType>, g : &DVector<F::MixedType>, //c_ : F, λ : F, x : &mut DVector<F::MixedType>, τ : F, iterator : I ) -> usize where F : Float + ToNalgebraRealField, I : AlgIteratorFactory<F> { match method { InnerMethod::FB => quadratic_unconstrained_fb(mA, g, λ, x, τ, iterator), InnerMethod::SSN => quadratic_unconstrained_ssn(mA, g, λ, x, τ, iterator).unwrap_or_else(|e| { println!("{}", format!("{e}. Using FB fallback.").red()); let ins = InnerSettings::<F>::default(); quadratic_unconstrained_fb(mA, g, λ, x, τ, ins.iterator_options) }) } }