Fixes to Radon norm prox term inner algorithm dev

Sun, 26 Jan 2025 11:58:02 -0500

author
Tuomo Valkonen <tuomov@iki.fi>
date
Sun, 26 Jan 2025 11:58:02 -0500
branch
dev
changeset 42
6a7365d73e4c
parent 41
b6bdb6cb4d44
child 43
aacd9af21b3a

Fixes to Radon norm prox term inner algorithm

src/subproblem/l1squared_nonneg.rs file | annotate | diff | comparison | revisions
src/subproblem/l1squared_unconstrained.rs file | annotate | diff | comparison | revisions
--- a/src/subproblem/l1squared_nonneg.rs	Sun Jan 26 11:29:57 2025 -0500
+++ b/src/subproblem/l1squared_nonneg.rs	Sun Jan 26 11:58:02 2025 -0500
@@ -324,9 +324,20 @@
 /// Alternative PDPS implementation of [`l1squared_nonneg`].
 /// For detailed documentation of the inputs and outputs, refer to there.
 ///
+/// By not dualising the 1-norm, this should produce more sparse solutions than
+/// [`l1squared_nonneg_pdps`].
+///
 /// The `λ` component of the model is handled in the proximal step instead of the gradient step
 /// for potential performance improvements.
 /// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model.
+/// We rewrite
+/// <div>$$
+///     \begin{split}
+///     & \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ + δ_{≥ 0}(x) \\
+///     & = \min_{x ∈ ℝ^n} \max_{w} ⟨θ w, x⟩ - g^⊤ x + λ\|x\|₁ + δ_{≥ 0}(x)
+///      - \left(x ↦ \frac{β}{2θ} |x-y|_1^2 \right)^*(w).
+///     \end{split}
+/// $$</div>
 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
 pub fn l1squared_nonneg_pdps_alt<F, I>(
     y : &DVector<F::MixedType>,
@@ -347,7 +358,6 @@
     let σ = σ_.to_nalgebra_mixed();
     let θ = θ_.to_nalgebra_mixed();
     let β = β_.to_nalgebra_mixed();
-    let βdivθ = β / θ;
     let σθ = σ*θ;
     let τθ = τ*θ;
     let mut w = DVector::zeros(x.len());
@@ -361,20 +371,22 @@
         x.axpy(τ, g, 1.0);
         x.apply(|x_i| *x_i = nonneg_soft_thresholding(*x_i, τ*λ));
         
-        // Dual step: w^{k+1} = prox_{σ[(β/2)‖./θ-y‖₁²]^*}(q) for q = w^k + σθ(2x^{k+1}-x^k)
-        //                    = q - σ prox_{(β/(2σ))‖./θ-y‖₁²}(q/σ)
-        //                    = q - (σθ) prox_{(β/(2σθ^2))‖.-y‖₁²}(q/(σθ))
-        //                    = σθ(q/(σθ) - prox_{(β/(2σθ^2))‖.-y‖₁²}(q/(σθ))
-        w /= σθ;
+        // Dual step: with g(x) = (β/(2θ))‖x-y‖₁² and q = w^k + σ(2x^{k+1}-x^k),
+        // we compute w^{k+1} = prox_{σg^*}(q) for
+        //                    = q - σ prox_{g/σ}(q/σ)
+        //                    = q - σ prox_{(β/(2θσ))‖.-y‖₁²}(q/σ)
+        //                    = σ(q/σ - prox_{(β/(2θσ))‖.-y‖₁²}(q/σ))
+        // where q/σ = w^k/σ + (2x^{k+1}-x^k),
+        w /= σ;
         w.axpy(2.0, x, 1.0);
         w.axpy(-1.0, &xprev, 1.0);
         xprev.copy_from(&w); // use xprev as temporary variable
-        l1squared_prox(&mut tmp, &mut xprev, y, βdivθ/σθ);
+        l1squared_prox(&mut tmp, &mut xprev, y, β/σθ);
         w -= &xprev;
-        w *= σθ;
+        w *= σ;
         xprev.copy_from(x);
         
-        iters +=1;
+        iters += 1;
 
         state.if_verbose(|| {
             F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))
@@ -406,7 +418,7 @@
 {
     match inner.method {
         InnerMethod::PDPS => {
-            let inner_θ = 0.001;
+            let inner_θ = 1.0;
             // Estimate of ‖K‖ for K=θ\Id.
             let normest = inner_θ;
             let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest);
--- a/src/subproblem/l1squared_unconstrained.rs	Sun Jan 26 11:29:57 2025 -0500
+++ b/src/subproblem/l1squared_unconstrained.rs	Sun Jan 26 11:58:02 2025 -0500
@@ -161,6 +161,81 @@
     iters
 }
 
+/// Alternative PDPS implementation of [`l1squared_unconstrained`].
+/// For detailed documentation of the inputs and outputs, refer to there.
+///
+/// By not dualising the 1-norm, this should produce more sparse solutions than
+/// [`l1squared_unconstrained_pdps`].
+///
+/// The `λ` component of the model is handled in the proximal step instead of the gradient step
+/// for potential performance improvements.
+/// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model.
+/// We rewrite
+/// <div>$$
+///     \begin{split}
+///     & \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ \\
+///     & = \min_{x ∈ ℝ^n} \max_{w} ⟨θ w, x⟩ - g^⊤ x + λ\|x\|₁
+///      - \left(x ↦ \frac{β}{2θ} |x-y|_1^2 \right)^*(w).
+///     \end{split}
+/// $$</div>
+#[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
+pub fn l1squared_unconstrained_pdps_alt<F, I>(
+    y : &DVector<F::MixedType>,
+    g : &DVector<F::MixedType>,
+    λ_ : F,
+    β_ : F,
+    x : &mut DVector<F::MixedType>,
+    τ_ : F,
+    σ_ : F,
+    θ_ : F,
+    iterator : I
+) -> usize
+where F : Float + ToNalgebraRealField,
+      I : AlgIteratorFactory<F>
+{
+    let λ = λ_.to_nalgebra_mixed();
+    let τ = τ_.to_nalgebra_mixed();
+    let σ = σ_.to_nalgebra_mixed();
+    let θ = θ_.to_nalgebra_mixed();
+    let β = β_.to_nalgebra_mixed();
+    let σθ = σ*θ;
+    let τθ = τ*θ;
+    let mut w = DVector::zeros(x.len());
+    let mut tmp = DVector::zeros(x.len());
+    let mut xprev = x.clone();
+    let mut iters = 0;
+
+    iterator.iterate(|state| {
+        // Primal step: x^{k+1} = soft_τλ(x^k - τ(θ w^k -g))
+        x.axpy(-τθ, &w, 1.0);
+        x.axpy(τ, g, 1.0);
+        x.apply(|x_i| *x_i = soft_thresholding(*x_i, τ*λ));
+        
+        // Dual step: with g(x) = (β/(2θ))‖x-y‖₁² and q = w^k + σ(2x^{k+1}-x^k),
+        // we compute w^{k+1} = prox_{σg^*}(q) for
+        //                    = q - σ prox_{g/σ}(q/σ)
+        //                    = q - σ prox_{(β/(2θσ))‖.-y‖₁²}(q/σ)
+        //                    = σ(q/σ - prox_{(β/(2θσ))‖.-y‖₁²}(q/σ))
+        // where q/σ = w^k/σ + (2x^{k+1}-x^k),
+        w /= σ;
+        w.axpy(2.0, x, 1.0);
+        w.axpy(-1.0, &xprev, 1.0);
+        xprev.copy_from(&w); // use xprev as temporary variable
+        l1squared_prox(&mut tmp, &mut xprev, y, β/σθ);
+        w -= &xprev;
+        w *= σ;
+        xprev.copy_from(x);
+        
+        iters += 1;
+
+        state.if_verbose(|| {
+            F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β))
+        })
+    });
+
+    iters
+}
+
 
 /// This function applies an iterative method for the solution of the problem
 /// <div>$$
@@ -182,14 +257,15 @@
 where F : Float + ToNalgebraRealField,
       I : AlgIteratorFactory<F>
 {
-    // Estimate of ‖K‖ for K=Id.
-    let normest = 1.0;
+    // Estimate of ‖K‖ for K=θ Id.
+    let inner_θ = 1.0;
+    let normest = inner_θ;
 
     let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest);
 
     match inner.method {
         InnerMethod::PDPS =>
-            l1squared_unconstrained_pdps(y, g, λ, β, x, inner_τ, inner_σ, iterator),
+            l1squared_unconstrained_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator),
         other => unimplemented!("${other:?} is unimplemented"),
     }
 }

mercurial