src/subproblem/l1squared_unconstrained.rs

changeset 54
b3312eee105c
parent 42
6a7365d73e4c
--- a/src/subproblem/l1squared_unconstrained.rs	Mon Feb 17 14:10:45 2025 -0500
+++ b/src/subproblem/l1squared_unconstrained.rs	Mon Feb 17 14:10:52 2025 -0500
@@ -2,76 +2,74 @@
 Iterative algorithms for solving the finite-dimensional subproblem without constraints.
 */
 
+use itertools::izip;
 use nalgebra::DVector;
 use numeric_literals::replace_float_literals;
-use itertools::izip;
 use std::cmp::Ordering::*;
 
-use std::iter::zip;
-use alg_tools::iterate::{
-    AlgIteratorFactory,
-    AlgIteratorState,
-};
+use alg_tools::iterate::{AlgIteratorFactory, AlgIteratorState};
 use alg_tools::nalgebra_support::ToNalgebraRealField;
 use alg_tools::nanleast::NaNLeast;
 use alg_tools::norms::{Dist, L1};
+use std::iter::zip;
 
+use super::l1squared_nonneg::max_interval_dist_to_zero;
+use super::unconstrained::soft_thresholding;
+use super::{InnerMethod, InnerSettings};
 use crate::types::*;
-use super::{
-    InnerMethod,
-    InnerSettings
-};
-use super::unconstrained::soft_thresholding;
-use super::l1squared_nonneg::max_interval_dist_to_zero;
 
-/// Calculate $prox_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}_1^2$.
+/// Calculate $\prox_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}_1^2$.
 ///
 /// To derive an algorithm for this, we can assume that $y=0$, as
-/// $prox_f(x) = prox_{f_0}(x - y) - y$ for $f_0=\frac{β}{2}\norm{x}_1^2$.
-/// Now, the optimality conditions for $w = prox_f(x)$ are
-/// $$\tag{*}
-///     0 ∈ w-x + β\norm{w}_1\sign w.
+/// $\prox\_f(x) = \prox\_{f_0}(x - y) - y$ for $f\_0=\frac{β}{2}\norm{x}\_1^2$.
+/// Now, the optimality conditions for $w = \prox\_f(x)$ are
 /// $$
-/// Clearly then $w = \soft_{β\norm{w}_1}(x)$.
+///     0 ∈ w-x + β\norm{w}\_1\sign w.
+/// $$
+/// Clearly then $w = \soft\_{β\norm{w}\_1}(x)$.
 /// Thus the components of $x$ with smallest absolute value will be zeroed out.
 /// Denoting by $w'$ the non-zero components, and by $x'$ the corresponding components
 /// of $x$, and by $m$ their count,  multipying the corresponding lines of (*) by $\sign x'$,
 /// we obtain
 /// $$
-///     \norm{x'}_1 = (1+βm)\norm{w'}_1.
+///     \norm{x'}\_1 = (1+βm)\norm{w'}\_1.
 /// $$
-/// That is, $\norm{w}_1=\norm{w'}_1=\norm{x'}_1/(1+βm)$.
+/// That is, $\norm{w}\_1=\norm{w'}\_1=\norm{x'}\_1/(1+βm)$.
 /// Thus, sorting $x$ by absolute value, and sequentially in order eliminating the smallest
-/// elements, we can easily calculate what $\norm{w}_1$ should be for that choice, and
-/// then easily calculate $w = \soft_{β\norm{w}_1}(x)$. We just have to verify that
+/// elements, we can easily calculate what $\norm{w}\_1$ should be for that choice, and
+/// then easily calculate $w = \soft_{β\norm{w}\_1}(x)$. We just have to verify that
 /// the resulting $w$ has the same norm. There's a shortcut to this, as we work
 /// sequentially: just check that the smallest assumed-nonzero component $i$ satisfies the
-/// condition of soft-thresholding to remain non-zero: $|x_i|>τ\norm{x'}/(1+τm)$.
-/// Clearly, if this condition fails for x_i, it will fail for all the components
+/// condition of soft-thresholding to remain non-zero: $|x\_i|>τ\norm{x'}/(1+τm)$.
+/// Clearly, if this condition fails for $x\_i$, it will fail for all the components
 /// already exluced. While, if it holds, it will hold for all components not excluded.
 #[replace_float_literals(F::cast_from(literal))]
-pub(super) fn l1squared_prox<F :Float + nalgebra::RealField>(
-    sorted_abs : &mut DVector<F>,
-    x : &mut DVector<F>,
-    y : &DVector<F>,
-    β : F
+pub(super) fn l1squared_prox<F: Float + nalgebra::RealField>(
+    sorted_abs: &mut DVector<F>,
+    x: &mut DVector<F>,
+    y: &DVector<F>,
+    β: F,
 ) {
     sorted_abs.copy_from(x);
     sorted_abs.axpy(-1.0, y, 1.0);
     sorted_abs.apply(|z_i| *z_i = num_traits::abs(*z_i));
-    sorted_abs.as_mut_slice().sort_unstable_by(|a, b| NaNLeast(*a).cmp(&NaNLeast(*b)));
+    sorted_abs
+        .as_mut_slice()
+        .sort_unstable_by(|a, b| NaNLeast(*a).cmp(&NaNLeast(*b)));
 
     let mut n = sorted_abs.sum();
     for (m, az_i) in zip((1..=x.len() as u32).rev(), sorted_abs) {
         // test first
-        let tmp = β*n/(1.0 + β*F::cast_from(m));
+        let tmp = β * n / (1.0 + β * F::cast_from(m));
         if *az_i <= tmp {
             // Fail
             n -= *az_i;
         } else {
             // Success
-            x.zip_apply(y, |x_i, y_i| *x_i = y_i + soft_thresholding(*x_i-y_i, tmp));
-            return
+            x.zip_apply(y, |x_i, y_i| {
+                *x_i = y_i + soft_thresholding(*x_i - y_i, tmp)
+            });
+            return;
         }
     }
     // m = 0 should always work, but x is zero.
@@ -82,35 +80,52 @@
 ///
 /// `v` will be modified and cannot be trusted to contain useful values afterwards.
 #[replace_float_literals(F::cast_from(literal))]
-fn min_subdifferential<F : Float + nalgebra::RealField>(
-    y : &DVector<F>,
-    x : &DVector<F>,
-    g : &DVector<F>,
-    λ : F,
-    β : F
+fn min_subdifferential<F: Float + nalgebra::RealField>(
+    y: &DVector<F>,
+    x: &DVector<F>,
+    g: &DVector<F>,
+    λ: F,
+    β: F,
 ) -> F {
     let mut val = 0.0;
-    let tmp = β*y.dist(x, L1);
+    let tmp = β * y.dist(x, L1);
     for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) {
         let (mut lb, mut ub) = (-g_i, -g_i);
         match x_i.partial_cmp(y_i) {
-            Some(Greater) => { lb += tmp; ub += tmp },
-            Some(Less) => { lb -= tmp; ub -= tmp },
-            Some(Equal) => { lb -= tmp; ub += tmp },
-            None => {},
+            Some(Greater) => {
+                lb += tmp;
+                ub += tmp
+            }
+            Some(Less) => {
+                lb -= tmp;
+                ub -= tmp
+            }
+            Some(Equal) => {
+                lb -= tmp;
+                ub += tmp
+            }
+            None => {}
         }
         match x_i.partial_cmp(&0.0) {
-            Some(Greater) => { lb += λ; ub += λ },
-            Some(Less) => { lb -= λ; ub -= λ },
-            Some(Equal) => { lb -= λ; ub += λ },
-            None => {},
+            Some(Greater) => {
+                lb += λ;
+                ub += λ
+            }
+            Some(Less) => {
+                lb -= λ;
+                ub -= λ
+            }
+            Some(Equal) => {
+                lb -= λ;
+                ub += λ
+            }
+            None => {}
         };
         val = max_interval_dist_to_zero(val, lb, ub);
     }
     val
 }
 
-
 /// PDPS implementation of [`l1squared_unconstrained`].
 /// For detailed documentation of the inputs and outputs, refer to there.
 ///
@@ -118,17 +133,18 @@
 /// for potential performance improvements.
 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
 pub fn l1squared_unconstrained_pdps<F, I>(
-    y : &DVector<F::MixedType>,
-    g : &DVector<F::MixedType>,
-    λ_ : F,
-    β_ : F,
-    x : &mut DVector<F::MixedType>,
-    τ_ : F,
-    σ_ : F,
-    iterator : I
+    y: &DVector<F::MixedType>,
+    g: &DVector<F::MixedType>,
+    λ_: F,
+    β_: F,
+    x: &mut DVector<F::MixedType>,
+    τ_: F,
+    σ_: F,
+    iterator: I,
 ) -> usize
-where F : Float + ToNalgebraRealField,
-      I : AlgIteratorFactory<F>
+where
+    F: Float + ToNalgebraRealField,
+    I: AlgIteratorFactory<F>,
 {
     let λ = λ_.to_nalgebra_mixed();
     let β = β_.to_nalgebra_mixed();
@@ -143,19 +159,17 @@
         // Primal step: x^{k+1} = prox_{τ|.-y|_1^2}(x^k - τ (w^k - g))
         x.axpy(-τ, &w, 1.0);
         x.axpy(τ, g, 1.0);
-        l1squared_prox(&mut tmp, x, y, τ*β);
-        
+        l1squared_prox(&mut tmp, x, y, τ * β);
+
         // Dual step: w^{k+1} = proj_{[-λ,λ]}(w^k + σ(2x^{k+1}-x^k))
-        w.axpy(2.0*σ, x, 1.0);
+        w.axpy(2.0 * σ, x, 1.0);
         w.axpy(-σ, &xprev, 1.0);
         w.apply(|w_i| *w_i = num_traits::clamp(*w_i, -λ, λ));
         xprev.copy_from(x);
-        
-        iters +=1;
 
-        state.if_verbose(|| {
-            F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))
-        })
+        iters += 1;
+
+        state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)))
     });
 
     iters
@@ -180,26 +194,27 @@
 /// $$</div>
 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
 pub fn l1squared_unconstrained_pdps_alt<F, I>(
-    y : &DVector<F::MixedType>,
-    g : &DVector<F::MixedType>,
-    λ_ : F,
-    β_ : F,
-    x : &mut DVector<F::MixedType>,
-    τ_ : F,
-    σ_ : F,
-    θ_ : F,
-    iterator : I
+    y: &DVector<F::MixedType>,
+    g: &DVector<F::MixedType>,
+    λ_: F,
+    β_: F,
+    x: &mut DVector<F::MixedType>,
+    τ_: F,
+    σ_: F,
+    θ_: F,
+    iterator: I,
 ) -> usize
-where F : Float + ToNalgebraRealField,
-      I : AlgIteratorFactory<F>
+where
+    F: Float + ToNalgebraRealField,
+    I: AlgIteratorFactory<F>,
 {
     let λ = λ_.to_nalgebra_mixed();
     let τ = τ_.to_nalgebra_mixed();
     let σ = σ_.to_nalgebra_mixed();
     let θ = θ_.to_nalgebra_mixed();
     let β = β_.to_nalgebra_mixed();
-    let σθ = σ*θ;
-    let τθ = τ*θ;
+    let σθ = σ * θ;
+    let τθ = τ * θ;
     let mut w = DVector::zeros(x.len());
     let mut tmp = DVector::zeros(x.len());
     let mut xprev = x.clone();
@@ -209,8 +224,8 @@
         // Primal step: x^{k+1} = soft_τλ(x^k - τ(θ w^k -g))
         x.axpy(-τθ, &w, 1.0);
         x.axpy(τ, g, 1.0);
-        x.apply(|x_i| *x_i = soft_thresholding(*x_i, τ*λ));
-        
+        x.apply(|x_i| *x_i = soft_thresholding(*x_i, τ * λ));
+
         // Dual step: with g(x) = (β/(2θ))‖x-y‖₁² and q = w^k + σ(2x^{k+1}-x^k),
         // we compute w^{k+1} = prox_{σg^*}(q) for
         //                    = q - σ prox_{g/σ}(q/σ)
@@ -221,22 +236,19 @@
         w.axpy(2.0, x, 1.0);
         w.axpy(-1.0, &xprev, 1.0);
         xprev.copy_from(&w); // use xprev as temporary variable
-        l1squared_prox(&mut tmp, &mut xprev, y, β/σθ);
+        l1squared_prox(&mut tmp, &mut xprev, y, β / σθ);
         w -= &xprev;
         w *= σ;
         xprev.copy_from(x);
-        
+
         iters += 1;
 
-        state.if_verbose(|| {
-            F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))
-        })
+        state.if_verbose(|| F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)))
     });
 
     iters
 }
 
-
 /// This function applies an iterative method for the solution of the problem
 /// <div>$$
 ///     \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁.
@@ -246,16 +258,17 @@
 /// This function returns the number of iterations taken.
 #[replace_float_literals(F::cast_from(literal))]
 pub fn l1squared_unconstrained<F, I>(
-    y : &DVector<F::MixedType>,
-    g : &DVector<F::MixedType>,
-    λ : F,
-    β : F,
-    x : &mut DVector<F::MixedType>,
-    inner : &InnerSettings<F>,
-    iterator : I
+    y: &DVector<F::MixedType>,
+    g: &DVector<F::MixedType>,
+    λ: F,
+    β: F,
+    x: &mut DVector<F::MixedType>,
+    inner: &InnerSettings<F>,
+    iterator: I,
 ) -> usize
-where F : Float + ToNalgebraRealField,
-      I : AlgIteratorFactory<F>
+where
+    F: Float + ToNalgebraRealField,
+    I: AlgIteratorFactory<F>,
 {
     // Estimate of ‖K‖ for K=θ Id.
     let inner_θ = 1.0;
@@ -264,8 +277,9 @@
     let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest);
 
     match inner.method {
-        InnerMethod::PDPS =>
-            l1squared_unconstrained_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator),
+        InnerMethod::PDPS => {
+            l1squared_unconstrained_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator)
+        }
         other => unimplemented!("${other:?} is unimplemented"),
     }
 }

mercurial