diff -r aec67cdd6b14 -r efa60bc4f743 src/subproblem/l1squared_unconstrained.rs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/subproblem/l1squared_unconstrained.rs Thu Aug 29 00:00:00 2024 -0500 @@ -0,0 +1,195 @@ +/*! +Iterative algorithms for solving the finite-dimensional subproblem without constraints. +*/ + +use nalgebra::DVector; +use numeric_literals::replace_float_literals; +use itertools::izip; +use std::cmp::Ordering::*; + +use std::iter::zip; +use alg_tools::iterate::{ + AlgIteratorFactory, + AlgIteratorState, +}; +use alg_tools::nalgebra_support::ToNalgebraRealField; +use alg_tools::nanleast::NaNLeast; +use alg_tools::norms::{Dist, L1}; + +use crate::types::*; +use super::{ + InnerMethod, + InnerSettings +}; +use super::unconstrained::soft_thresholding; +use super::l1squared_nonneg::max_interval_dist_to_zero; + +/// Calculate $prox_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}_1^2$. +/// +/// To derive an algorithm for this, we can assume that $y=0$, as +/// $prox_f(x) = prox_{f_0}(x - y) - y$ for $f_0=\frac{β}{2}\norm{x}_1^2$. +/// Now, the optimality conditions for $w = prox_f(x)$ are +/// $$\tag{*} +/// 0 ∈ w-x + β\norm{w}_1\sign w. +/// $$ +/// Clearly then $w = \soft_{β\norm{w}_1}(x)$. +/// Thus the components of $x$ with smallest absolute value will be zeroed out. +/// Denoting by $w'$ the non-zero components, and by $x'$ the corresponding components +/// of $x$, and by $m$ their count, multipying the corresponding lines of (*) by $\sign x'$, +/// we obtain +/// $$ +/// \norm{x'}_1 = (1+βm)\norm{w'}_1. +/// $$ +/// That is, $\norm{w}_1=\norm{w'}_1=\norm{x'}_1/(1+βm)$. +/// Thus, sorting $x$ by absolute value, and sequentially in order eliminating the smallest +/// elements, we can easily calculate what $\norm{w}_1$ should be for that choice, and +/// then easily calculate $w = \soft_{β\norm{w}_1}(x)$. We just have to verify that +/// the resulting $w$ has the same norm. There's a shortcut to this, as we work +/// sequentially: just check that the smallest assumed-nonzero component $i$ satisfies the +/// condition of soft-thresholding to remain non-zero: $|x_i|>τ\norm{x'}/(1+τm)$. +/// Clearly, if this condition fails for x_i, it will fail for all the components +/// already exluced. While, if it holds, it will hold for all components not excluded. +#[replace_float_literals(F::cast_from(literal))] +pub(super) fn l1squared_prox( + sorted_abs : &mut DVector, + x : &mut DVector, + y : &DVector, + β : F +) { + sorted_abs.copy_from(x); + sorted_abs.axpy(-1.0, y, 1.0); + sorted_abs.apply(|z_i| *z_i = num_traits::abs(*z_i)); + sorted_abs.as_mut_slice().sort_unstable_by(|a, b| NaNLeast(*a).cmp(&NaNLeast(*b))); + + let mut n = sorted_abs.sum(); + for (m, az_i) in zip((1..=x.len() as u32).rev(), sorted_abs) { + // test first + let tmp = β*n/(1.0 + β*F::cast_from(m)); + if *az_i <= tmp { + // Fail + n -= *az_i; + } else { + // Success + x.zip_apply(y, |x_i, y_i| *x_i = y_i + soft_thresholding(*x_i-y_i, tmp)); + return + } + } + // m = 0 should always work, but x is zero. + x.fill(0.0); +} + +/// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁$ at $x$. +/// +/// `v` will be modified and cannot be trusted to contain useful values afterwards. +#[replace_float_literals(F::cast_from(literal))] +fn min_subdifferential( + y : &DVector, + x : &DVector, + g : &DVector, + λ : F, + β : F +) -> F { + let mut val = 0.0; + let tmp = β*y.dist(x, L1); + for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) { + let (mut lb, mut ub) = (-g_i, -g_i); + match x_i.partial_cmp(y_i) { + Some(Greater) => { lb += tmp; ub += tmp }, + Some(Less) => { lb -= tmp; ub -= tmp }, + Some(Equal) => { lb -= tmp; ub += tmp }, + None => {}, + } + match x_i.partial_cmp(&0.0) { + Some(Greater) => { lb += λ; ub += λ }, + Some(Less) => { lb -= λ; ub -= λ }, + Some(Equal) => { lb -= λ; ub += λ }, + None => {}, + }; + val = max_interval_dist_to_zero(val, lb, ub); + } + val +} + + +/// PDPS implementation of [`l1squared_unconstrained`]. +/// For detailed documentation of the inputs and outputs, refer to there. +/// +/// The `λ` component of the model is handled in the proximal step instead of the gradient step +/// for potential performance improvements. +#[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] +pub fn l1squared_unconstrained_pdps( + y : &DVector, + g : &DVector, + λ_ : F, + β_ : F, + x : &mut DVector, + τ_ : F, + σ_ : F, + iterator : I +) -> usize +where F : Float + ToNalgebraRealField, + I : AlgIteratorFactory +{ + let λ = λ_.to_nalgebra_mixed(); + let β = β_.to_nalgebra_mixed(); + let τ = τ_.to_nalgebra_mixed(); + let σ = σ_.to_nalgebra_mixed(); + let mut w = DVector::zeros(x.len()); + let mut tmp = DVector::zeros(x.len()); + let mut xprev = x.clone(); + let mut iters = 0; + + iterator.iterate(|state| { + // Primal step: x^{k+1} = prox_{τ|.-y|_1^2}(x^k - τ (w^k - g)) + x.axpy(-τ, &w, 1.0); + x.axpy(τ, g, 1.0); + l1squared_prox(&mut tmp, x, y, τ*β); + + // Dual step: w^{k+1} = proj_{[-λ,λ]}(w^k + σ(2x^{k+1}-x^k)) + w.axpy(2.0*σ, x, 1.0); + w.axpy(-σ, &xprev, 1.0); + w.apply(|w_i| *w_i = num_traits::clamp(*w_i, -λ, λ)); + xprev.copy_from(x); + + iters +=1; + + state.if_verbose(|| { + F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)) + }) + }); + + iters +} + + +/// This function applies an iterative method for the solution of the problem +///

$$ +/// \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁. +/// $$

+/// Only PDPS is supported. +/// +/// This function returns the number of iterations taken. +#[replace_float_literals(F::cast_from(literal))] +pub fn l1squared_unconstrained( + y : &DVector, + g : &DVector, + λ : F, + β : F, + x : &mut DVector, + inner : &InnerSettings, + iterator : I +) -> usize +where F : Float + ToNalgebraRealField, + I : AlgIteratorFactory +{ + // Estimate of ‖K‖ for K=Id. + let normest = 1.0; + + let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest); + + match inner.method { + InnerMethod::PDPS | InnerMethod::Default => + l1squared_unconstrained_pdps(y, g, λ, β, x, inner_τ, inner_σ, iterator), + _ => unimplemented!(), + } +}