Tue, 18 Feb 2025 20:19:57 -0500
Add arXiv link of second paper to README
/*! Iterative algorithms for solving the finite-dimensional subproblem without constraints. */ use itertools::izip; use nalgebra::DVector; use numeric_literals::replace_float_literals; use std::cmp::Ordering::*; use alg_tools::iterate::{AlgIteratorFactory, AlgIteratorState}; use alg_tools::nalgebra_support::ToNalgebraRealField; use alg_tools::nanleast::NaNLeast; use alg_tools::norms::{Dist, L1}; use std::iter::zip; use super::l1squared_nonneg::max_interval_dist_to_zero; use super::unconstrained::soft_thresholding; use super::{InnerMethod, InnerSettings}; use crate::types::*; /// Calculate $\prox_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}_1^2$. /// /// To derive an algorithm for this, we can assume that $y=0$, as /// $\prox\_f(x) = \prox\_{f_0}(x - y) - y$ for $f\_0=\frac{β}{2}\norm{x}\_1^2$. /// Now, the optimality conditions for $w = \prox\_f(x)$ are /// $$ /// 0 ∈ w-x + β\norm{w}\_1\sign w. /// $$ /// Clearly then $w = \soft\_{β\norm{w}\_1}(x)$. /// Thus the components of $x$ with smallest absolute value will be zeroed out. /// Denoting by $w'$ the non-zero components, and by $x'$ the corresponding components /// of $x$, and by $m$ their count, multipying the corresponding lines of (*) by $\sign x'$, /// we obtain /// $$ /// \norm{x'}\_1 = (1+βm)\norm{w'}\_1. /// $$ /// That is, $\norm{w}\_1=\norm{w'}\_1=\norm{x'}\_1/(1+βm)$. /// Thus, sorting $x$ by absolute value, and sequentially in order eliminating the smallest /// elements, we can easily calculate what $\norm{w}\_1$ should be for that choice, and /// then easily calculate $w = \soft_{β\norm{w}\_1}(x)$. We just have to verify that /// the resulting $w$ has the same norm. There's a shortcut to this, as we work /// sequentially: just check that the smallest assumed-nonzero component $i$ satisfies the /// condition of soft-thresholding to remain non-zero: $|x\_i|>τ\norm{x'}/(1+τm)$. /// Clearly, if this condition fails for $x\_i$, it will fail for all the components /// already exluced. While, if it holds, it will hold for all components not excluded. #[replace_float_literals(F::cast_from(literal))] pub(super) fn l1squared_prox<F: Float + nalgebra::RealField>( sorted_abs: &mut DVector<F>, x: &mut DVector<F>, y: &DVector<F>, β: F, ) { sorted_abs.copy_from(x); sorted_abs.axpy(-1.0, y, 1.0); sorted_abs.apply(|z_i| *z_i = num_traits::abs(*z_i)); sorted_abs .as_mut_slice() .sort_unstable_by(|a, b| NaNLeast(*a).cmp(&NaNLeast(*b))); let mut n = sorted_abs.sum(); for (m, az_i) in zip((1..=x.len() as u32).rev(), sorted_abs) { // test first let tmp = β * n / (1.0 + β * F::cast_from(m)); if *az_i <= tmp { // Fail n -= *az_i; } else { // Success x.zip_apply(y, |x_i, y_i| { *x_i = y_i + soft_thresholding(*x_i - y_i, tmp) }); return; } } // m = 0 should always work, but x is zero. x.fill(0.0); } /// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁$ at $x$. /// /// `v` will be modified and cannot be trusted to contain useful values afterwards. #[replace_float_literals(F::cast_from(literal))] fn min_subdifferential<F: Float + nalgebra::RealField>( y: &DVector<F>, x: &DVector<F>, g: &DVector<F>, λ: F, β: F, ) -> F { let mut val = 0.0; let tmp = β * y.dist(x, L1); for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) { let (mut lb, mut ub) = (-g_i, -g_i); match x_i.partial_cmp(y_i) { Some(Greater) => { lb += tmp; ub += tmp } Some(Less) => { lb -= tmp; ub -= tmp } Some(Equal) => { lb -= tmp; ub += tmp } None => {} } match x_i.partial_cmp(&0.0) { Some(Greater) => { lb += λ; ub += λ } Some(Less) => { lb -= λ; ub -= λ } Some(Equal) => { lb -= λ; ub += λ } None => {} }; val = max_interval_dist_to_zero(val, lb, ub); } val } /// PDPS implementation of [`l1squared_unconstrained`]. /// For detailed documentation of the inputs and outputs, refer to there. /// /// The `λ` component of the model is handled in the proximal step instead of the gradient step /// for potential performance improvements. #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] pub fn l1squared_unconstrained_pdps<F, I>( y: &DVector<F::MixedType>, g: &DVector<F::MixedType>, λ_: F, β_: F, x: &mut DVector<F::MixedType>, τ_: F, σ_: F, iterator: I, ) -> usize where F: Float + ToNalgebraRealField, I: AlgIteratorFactory<F>, { let λ = λ_.to_nalgebra_mixed(); let β = β_.to_nalgebra_mixed(); let τ = τ_.to_nalgebra_mixed(); let σ = σ_.to_nalgebra_mixed(); let mut w = DVector::zeros(x.len()); let mut tmp = DVector::zeros(x.len()); let mut xprev = x.clone(); let mut iters = 0; iterator.iterate(|state| { // Primal step: x^{k+1} = prox_{τ|.-y|_1^2}(x^k - τ (w^k - g)) x.axpy(-τ, &w, 1.0); x.axpy(τ, g, 1.0); l1squared_prox(&mut tmp, x, y, τ * β); // Dual step: w^{k+1} = proj_{[-λ,λ]}(w^k + σ(2x^{k+1}-x^k)) w.axpy(2.0 * σ, x, 1.0); w.axpy(-σ, &xprev, 1.0); w.apply(|w_i| *w_i = num_traits::clamp(*w_i, -λ, λ)); xprev.copy_from(x); iters += 1; state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))) }); iters } /// Alternative PDPS implementation of [`l1squared_unconstrained`]. /// For detailed documentation of the inputs and outputs, refer to there. /// /// By not dualising the 1-norm, this should produce more sparse solutions than /// [`l1squared_unconstrained_pdps`]. /// /// The `λ` component of the model is handled in the proximal step instead of the gradient step /// for potential performance improvements. /// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model. /// We rewrite /// <div>$$ /// \begin{split} /// & \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ \\ /// & = \min_{x ∈ ℝ^n} \max_{w} ⟨θ w, x⟩ - g^⊤ x + λ\|x\|₁ /// - \left(x ↦ \frac{β}{2θ} |x-y|_1^2 \right)^*(w). /// \end{split} /// $$</div> #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] pub fn l1squared_unconstrained_pdps_alt<F, I>( y: &DVector<F::MixedType>, g: &DVector<F::MixedType>, λ_: F, β_: F, x: &mut DVector<F::MixedType>, τ_: F, σ_: F, θ_: F, iterator: I, ) -> usize where F: Float + ToNalgebraRealField, I: AlgIteratorFactory<F>, { let λ = λ_.to_nalgebra_mixed(); let τ = τ_.to_nalgebra_mixed(); let σ = σ_.to_nalgebra_mixed(); let θ = θ_.to_nalgebra_mixed(); let β = β_.to_nalgebra_mixed(); let σθ = σ * θ; let τθ = τ * θ; let mut w = DVector::zeros(x.len()); let mut tmp = DVector::zeros(x.len()); let mut xprev = x.clone(); let mut iters = 0; iterator.iterate(|state| { // Primal step: x^{k+1} = soft_τλ(x^k - τ(θ w^k -g)) x.axpy(-τθ, &w, 1.0); x.axpy(τ, g, 1.0); x.apply(|x_i| *x_i = soft_thresholding(*x_i, τ * λ)); // Dual step: with g(x) = (β/(2θ))‖x-y‖₁² and q = w^k + σ(2x^{k+1}-x^k), // we compute w^{k+1} = prox_{σg^*}(q) for // = q - σ prox_{g/σ}(q/σ) // = q - σ prox_{(β/(2θσ))‖.-y‖₁²}(q/σ) // = σ(q/σ - prox_{(β/(2θσ))‖.-y‖₁²}(q/σ)) // where q/σ = w^k/σ + (2x^{k+1}-x^k), w /= σ; w.axpy(2.0, x, 1.0); w.axpy(-1.0, &xprev, 1.0); xprev.copy_from(&w); // use xprev as temporary variable l1squared_prox(&mut tmp, &mut xprev, y, β / σθ); w -= &xprev; w *= σ; xprev.copy_from(x); iters += 1; state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))) }); iters } /// This function applies an iterative method for the solution of the problem /// <div>$$ /// \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁. /// $$</div> /// Only PDPS is supported. /// /// This function returns the number of iterations taken. #[replace_float_literals(F::cast_from(literal))] pub fn l1squared_unconstrained<F, I>( y: &DVector<F::MixedType>, g: &DVector<F::MixedType>, λ: F, β: F, x: &mut DVector<F::MixedType>, inner: &InnerSettings<F>, iterator: I, ) -> usize where F: Float + ToNalgebraRealField, I: AlgIteratorFactory<F>, { // Estimate of ‖K‖ for K=θ Id. let inner_θ = 1.0; let normest = inner_θ; let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest); match inner.method { InnerMethod::PDPS => { l1squared_unconstrained_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator) } other => unimplemented!("${other:?} is unimplemented"), } }