src/subproblem/unconstrained.rs

changeset 24
d29d1fcf5423
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/subproblem/unconstrained.rs	Sun Dec 11 23:25:53 2022 +0200
@@ -0,0 +1,288 @@
+/*!
+Iterative algorithms for solving the finite-dimensional subproblem without constraints.
+*/
+
+use nalgebra::{DVector, DMatrix};
+use numeric_literals::replace_float_literals;
+use itertools::{izip, Itertools};
+use colored::Colorize;
+use std::cmp::Ordering::*;
+
+use alg_tools::iter::Mappable;
+use alg_tools::error::NumericalError;
+use alg_tools::iterate::{
+    AlgIteratorFactory,
+    AlgIteratorState,
+    Step,
+};
+use alg_tools::linops::GEMV;
+use alg_tools::nalgebra_support::ToNalgebraRealField;
+
+use crate::types::*;
+use super::{
+    InnerMethod,
+    InnerSettings
+};
+
+/// Compute the proximal operator of $x \mapsto |x|$, i.e., the soft-thresholding operator.
+#[inline]
+#[replace_float_literals(F::cast_from(literal))]
+fn soft_thresholding<F : Float>(v : F, λ : F) -> F {
+    if v > λ {
+        v - λ
+    } else if v < -λ {
+        v + λ
+    } else {
+        0.0
+    }
+}
+
+/// Returns the ∞-norm minimal subdifferential of $x ↦  x^⊤Ax - g^⊤ x + λ\|x\|₁$ at $x$.
+///
+/// `v` will be modified and cannot be trusted to contain useful values afterwards.
+#[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
+fn min_subdifferential<F : Float + ToNalgebraRealField>(
+    v : &mut DVector<F::MixedType>,
+    mA : &DMatrix<F::MixedType>,
+    x : &DVector<F::MixedType>,
+    g : &DVector<F::MixedType>,
+    λ : F::MixedType
+) -> F {
+    v.copy_from(g);
+    mA.gemv(v, 1.0, x, -1.0);   // v =  Ax - g
+    let mut val = 0.0;
+    for (&v_i, &x_i) in izip!(v.iter(), x.iter()) {
+        // The subdifferential at x is $Ax - g + λ ∂‖·‖₁(x)$.
+        val = val.max(match x_i.partial_cmp(&0.0) {
+            Some(Greater) => v_i + λ,
+            Some(Less) => v_i - λ,
+            Some(Equal) => soft_thresholding(v_i, λ),
+            None => F::MixedType::nan(),
+        })
+    }
+    F::from_nalgebra_mixed(val)
+}
+
+
+/// Forward-backward splitting implementation of [`quadratic_unconstrained`].
+/// For detailed documentation of the inputs and outputs, refer to there.
+///
+/// The `λ` component of the model is handled in the proximal step instead of the gradient step
+/// for potential performance improvements.
+#[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
+pub fn quadratic_unconstrained_fb<F, I>(
+    mA : &DMatrix<F::MixedType>,
+    g : &DVector<F::MixedType>,
+    //c_ : F,
+    λ_ : F,
+    x : &mut DVector<F::MixedType>,
+    τ_ : F,
+    iterator : I
+) -> usize
+where F : Float + ToNalgebraRealField,
+      I : AlgIteratorFactory<F>
+{
+    let mut xprev = x.clone();
+    //let c = c_.to_nalgebra_mixed();
+    let λ = λ_.to_nalgebra_mixed();
+    let τ = τ_.to_nalgebra_mixed();
+    let τλ = τ * λ;
+    let mut v = DVector::zeros(x.len());
+    let mut iters = 0;
+
+    iterator.iterate(|state| {
+        // Replace `x` with $x - τ[Ax-g]= [x + τg]- τAx$
+        v.copy_from(g);             // v = g
+        v.axpy(1.0, x, τ);          // v = x + τ*g
+        v.sygemv(-τ, mA, x, 1.0);   // v = [x + τg]- τAx
+        let backup = state.if_verbose(|| {
+            xprev.copy_from(x)
+        });
+        // Calculate the proximal map
+        x.iter_mut().zip(v.iter()).for_each(|(x_i, &v_i)| {
+            *x_i = soft_thresholding(v_i, τλ);
+        });
+
+        iters +=1;
+
+        backup.map(|_| {
+            min_subdifferential(&mut v, mA, x, g, λ)
+        })
+    });
+
+    iters
+}
+
+/// Semismooth Newton implementation of [`quadratic_unconstrained`].
+///
+/// For detailed documentation of the inputs, refer to there.
+/// This function returns the number of iterations taken if there was no inversion failure,
+///
+/// For method derivarion, see the documentation for [`super::nonneg::quadratic_nonneg`].
+#[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
+pub fn quadratic_unconstrained_ssn<F, I>(
+    mA : &DMatrix<F::MixedType>,
+    g : &DVector<F::MixedType>,
+    //c_ : F,
+    λ_ : F,
+    x : &mut DVector<F::MixedType>,
+    τ_ : F,
+    iterator : I
+) -> Result<usize, NumericalError>
+where F : Float + ToNalgebraRealField,
+      I : AlgIteratorFactory<F>
+{
+    let n = x.len();
+    let mut xprev = x.clone();
+    let mut v = DVector::zeros(n);
+    //let c = c_.to_nalgebra_mixed();
+    let λ = λ_.to_nalgebra_mixed();
+    let τ = τ_.to_nalgebra_mixed();
+    let τλ = τ * λ;
+    let mut inact : Vec<bool> = Vec::from_iter(std::iter::repeat(false).take(n));
+    let mut s = DVector::zeros(0);
+    let mut decomp = nalgebra::linalg::LU::new(DMatrix::zeros(0, 0));
+    let mut iters = 0;
+
+    let res = iterator.iterate_fallible(|state| {
+        // 1. Perform delayed SSN-update based on previously computed step on active
+        // coordinates. The step is delayed to the beginning of the loop because
+        // the SSN step may violate constraints, so we arrange `x` to contain at the
+        // end of the loop the valid FB step that forms part of the SSN step
+        let mut si = s.iter();
+        for (&ast, x_i, xprev_i) in izip!(inact.iter(), x.iter_mut(), xprev.iter_mut()) {
+            if ast {
+                *x_i = *xprev_i + *si.next().unwrap()
+            }
+            *xprev_i = *x_i;
+        }
+
+        //xprev.copy_from(x);
+
+        // 2. Calculate FB step.
+        // 2.1. Replace `x` with $x⁻ - τ[Ax⁻-g]= [x⁻ + τg]- τAx⁻$
+        x.axpy(τ, g, 1.0);               // x = x⁻ + τ*g
+        x.sygemv(-τ, mA, &xprev, 1.0);   // x = [x⁻ + τg]- τAx⁻
+        // 2.2. Calculate prox and set of active coordinates at the same time
+        let mut act_changed = false;
+        let mut n_inact = 0;
+        for (x_i, ast) in izip!(x.iter_mut(), inact.iter_mut()) {
+            if *x_i > τλ {
+                *x_i -= τλ;
+                if !*ast {
+                    act_changed = true;
+                    *ast = true;
+                }
+                n_inact += 1;
+            } else if *x_i < -τλ {
+                *x_i += τλ;
+                if !*ast {
+                    act_changed = true;
+                    *ast = true;
+                }
+                n_inact += 1;
+            } else {
+                *x_i = 0.0;
+                if *ast {
+                    act_changed = true;
+                    *ast = false;
+                }
+            }
+        }
+
+        // *** x now contains forward-backward step ***
+
+        // 3. Solve SSN step `s`.
+        // 3.1 Construct [τ A_{ℐ × ℐ}] if the set of inactive coordinates has changed.
+        if act_changed {
+            let decomp_iter = inact.iter().cartesian_product(inact.iter()).zip(mA.iter());
+            let decomp_constr = decomp_iter.filter_map(|((&i_inact, &j_inact), &mAij)| {
+                //(i_inact && j_inact).then_some(mAij * τ)
+                (i_inact && j_inact).then_some(mAij) // 🔺 below matches removal of τ
+            });
+            let mat = DMatrix::from_iterator(n_inact, n_inact, decomp_constr);
+            decomp = nalgebra::linalg::LU::new(mat);
+        }
+
+        // 3.2 Solve `s` = $s_ℐ^k$ from
+        // $[τ A_{ℐ × ℐ}]s^k_ℐ = - x^k_ℐ + [G ∘ F](x^k)_ℐ - [τ A_{ℐ × 𝒜}]s^k_𝒜$.
+        // With current variable setup we have $[G ∘ F](x^k) = $`x` and $x^k = x⁻$ = `xprev`,
+        // so the system to solve is $[τ A_{ℐ × ℐ}]s^k_ℐ = (x-x⁻)_ℐ  - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$
+        // The matrix $[τ A_{ℐ × ℐ}]$ we have already LU-decomposed above into `decomp`.
+        s = if n_inact > 0 {
+            // 3.2.1 Construct `rhs` = $(x-x⁻)_ℐ  - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$
+            let inactfilt = inact.iter().copied();
+            let rhs_iter = izip!(x.iter(), xprev.iter(), mA.row_iter()).filter_zip(inactfilt);
+            let rhs_constr = rhs_iter.map(|(&x_i, &xprev_i, mAi)| {
+                // Calculate row i of [τ A_{ℐ × 𝒜}]s^k_𝒜 = [τ A_{ℐ × 𝒜}](x-xprev)_𝒜
+                let actfilt = inact.iter().copied().map(std::ops::Not::not);
+                let actit = izip!(x.iter(), xprev.iter(), mAi.iter()).filter_zip(actfilt);
+                let actpart = actit.map(|(&x_j, &xprev_j, &mAij)| {
+                    mAij * (x_j - xprev_j)
+                }).sum();
+                // Subtract it from [x-prev]_i
+                //x_i - xprev_i - τ * actpart
+                (x_i - xprev_i) / τ - actpart // 🔺 change matches removal of τ above
+            });
+            let mut rhs = DVector::from_iterator(n_inact, rhs_constr);
+            assert_eq!(rhs.len(), n_inact);
+            // Solve the system
+            if !decomp.solve_mut(&mut rhs) {
+                return Step::Failure(NumericalError(
+                    "Failed to solve linear system for subproblem SSN."
+                ))
+            }
+            rhs
+        } else {
+            DVector::zeros(0)
+        };
+
+        iters += 1;
+
+        // 4. Report solution quality
+        state.if_verbose(|| {
+            // Calculate subdifferential at the FB step `x` that hasn't yet had `s` yet added.
+            min_subdifferential(&mut v, mA, x, g, λ)
+        })
+    });
+
+    res.map(|_| iters)
+}
+
+/// This function applies an iterative method for the solution of the problem
+/// <div>$$
+///     \min_{x ∈ ℝ^n} \frac{1}{2} x^⊤Ax - g^⊤ x + λ\|x\|₁ + c.
+/// $$</div>
+/// Semismooth Newton or forward-backward are supported based on the setting in `method`.
+/// The parameter `mA` is matrix $A$, and `g` and `λ` are as in the mathematical formulation.
+/// The constant $c$ does not need to be provided. The step length parameter is `τ` while
+/// `x` contains the initial iterate and on return the final one. The `iterator` controls
+/// stopping. The “verbose” value output by all methods is the $ℓ\_∞$ distance of some
+/// subdifferential of the objective to zero.
+///
+/// This function returns the number of iterations taken.
+pub fn quadratic_unconstrained<F, I>(
+    method : InnerMethod,
+    mA : &DMatrix<F::MixedType>,
+    g : &DVector<F::MixedType>,
+    //c_ : F,
+    λ : F,
+    x : &mut DVector<F::MixedType>,
+    τ : F,
+    iterator : I
+) -> usize
+where F : Float + ToNalgebraRealField,
+      I : AlgIteratorFactory<F>
+{
+    
+    match method {
+        InnerMethod::FB =>
+            quadratic_unconstrained_fb(mA, g, λ, x, τ, iterator),
+        InnerMethod::SSN =>
+            quadratic_unconstrained_ssn(mA, g, λ, x, τ, iterator).unwrap_or_else(|e| {
+                println!("{}", format!("{e}. Using FB fallback.").red());
+                let ins = InnerSettings::<F>::default();
+                quadratic_unconstrained_fb(mA, g, λ, x, τ, ins.iterator_options)
+            })
+    }
+}

mercurial