Sun, 26 Jan 2025 11:58:02 -0500
Fixes to Radon norm prox term inner algorithm
src/subproblem/l1squared_nonneg.rs | file | annotate | diff | comparison | revisions | |
src/subproblem/l1squared_unconstrained.rs | file | annotate | diff | comparison | revisions |
--- a/src/subproblem/l1squared_nonneg.rs Sun Jan 26 11:29:57 2025 -0500 +++ b/src/subproblem/l1squared_nonneg.rs Sun Jan 26 11:58:02 2025 -0500 @@ -324,9 +324,20 @@ /// Alternative PDPS implementation of [`l1squared_nonneg`]. /// For detailed documentation of the inputs and outputs, refer to there. /// +/// By not dualising the 1-norm, this should produce more sparse solutions than +/// [`l1squared_nonneg_pdps`]. +/// /// The `λ` component of the model is handled in the proximal step instead of the gradient step /// for potential performance improvements. /// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model. +/// We rewrite +/// <div>$$ +/// \begin{split} +/// & \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ + δ_{≥ 0}(x) \\ +/// & = \min_{x ∈ ℝ^n} \max_{w} ⟨θ w, x⟩ - g^⊤ x + λ\|x\|₁ + δ_{≥ 0}(x) +/// - \left(x ↦ \frac{β}{2θ} |x-y|_1^2 \right)^*(w). +/// \end{split} +/// $$</div> #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] pub fn l1squared_nonneg_pdps_alt<F, I>( y : &DVector<F::MixedType>, @@ -347,7 +358,6 @@ let σ = σ_.to_nalgebra_mixed(); let θ = θ_.to_nalgebra_mixed(); let β = β_.to_nalgebra_mixed(); - let βdivθ = β / θ; let σθ = σ*θ; let τθ = τ*θ; let mut w = DVector::zeros(x.len()); @@ -361,20 +371,22 @@ x.axpy(τ, g, 1.0); x.apply(|x_i| *x_i = nonneg_soft_thresholding(*x_i, τ*λ)); - // Dual step: w^{k+1} = prox_{σ[(β/2)‖./θ-y‖₁²]^*}(q) for q = w^k + σθ(2x^{k+1}-x^k) - // = q - σ prox_{(β/(2σ))‖./θ-y‖₁²}(q/σ) - // = q - (σθ) prox_{(β/(2σθ^2))‖.-y‖₁²}(q/(σθ)) - // = σθ(q/(σθ) - prox_{(β/(2σθ^2))‖.-y‖₁²}(q/(σθ)) - w /= σθ; + // Dual step: with g(x) = (β/(2θ))‖x-y‖₁² and q = w^k + σ(2x^{k+1}-x^k), + // we compute w^{k+1} = prox_{σg^*}(q) for + // = q - σ prox_{g/σ}(q/σ) + // = q - σ prox_{(β/(2θσ))‖.-y‖₁²}(q/σ) + // = σ(q/σ - prox_{(β/(2θσ))‖.-y‖₁²}(q/σ)) + // where q/σ = w^k/σ + (2x^{k+1}-x^k), + w /= σ; w.axpy(2.0, x, 1.0); w.axpy(-1.0, &xprev, 1.0); xprev.copy_from(&w); // use xprev as temporary variable - l1squared_prox(&mut tmp, &mut xprev, y, βdivθ/σθ); + l1squared_prox(&mut tmp, &mut xprev, y, β/σθ); w -= &xprev; - w *= σθ; + w *= σ; xprev.copy_from(x); - iters +=1; + iters += 1; state.if_verbose(|| { F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)) @@ -406,7 +418,7 @@ { match inner.method { InnerMethod::PDPS => { - let inner_θ = 0.001; + let inner_θ = 1.0; // Estimate of ‖K‖ for K=θ\Id. let normest = inner_θ; let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest);
--- a/src/subproblem/l1squared_unconstrained.rs Sun Jan 26 11:29:57 2025 -0500 +++ b/src/subproblem/l1squared_unconstrained.rs Sun Jan 26 11:58:02 2025 -0500 @@ -161,6 +161,81 @@ iters } +/// Alternative PDPS implementation of [`l1squared_unconstrained`]. +/// For detailed documentation of the inputs and outputs, refer to there. +/// +/// By not dualising the 1-norm, this should produce more sparse solutions than +/// [`l1squared_unconstrained_pdps`]. +/// +/// The `λ` component of the model is handled in the proximal step instead of the gradient step +/// for potential performance improvements. +/// The parameter `θ` is used to multiply the rescale the operator (identity) of the PDPS model. +/// We rewrite +/// <div>$$ +/// \begin{split} +/// & \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁ \\ +/// & = \min_{x ∈ ℝ^n} \max_{w} ⟨θ w, x⟩ - g^⊤ x + λ\|x\|₁ +/// - \left(x ↦ \frac{β}{2θ} |x-y|_1^2 \right)^*(w). +/// \end{split} +/// $$</div> +#[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] +pub fn l1squared_unconstrained_pdps_alt<F, I>( + y : &DVector<F::MixedType>, + g : &DVector<F::MixedType>, + λ_ : F, + β_ : F, + x : &mut DVector<F::MixedType>, + τ_ : F, + σ_ : F, + θ_ : F, + iterator : I +) -> usize +where F : Float + ToNalgebraRealField, + I : AlgIteratorFactory<F> +{ + let λ = λ_.to_nalgebra_mixed(); + let τ = τ_.to_nalgebra_mixed(); + let σ = σ_.to_nalgebra_mixed(); + let θ = θ_.to_nalgebra_mixed(); + let β = β_.to_nalgebra_mixed(); + let σθ = σ*θ; + let τθ = τ*θ; + let mut w = DVector::zeros(x.len()); + let mut tmp = DVector::zeros(x.len()); + let mut xprev = x.clone(); + let mut iters = 0; + + iterator.iterate(|state| { + // Primal step: x^{k+1} = soft_τλ(x^k - τ(θ w^k -g)) + x.axpy(-τθ, &w, 1.0); + x.axpy(τ, g, 1.0); + x.apply(|x_i| *x_i = soft_thresholding(*x_i, τ*λ)); + + // Dual step: with g(x) = (β/(2θ))‖x-y‖₁² and q = w^k + σ(2x^{k+1}-x^k), + // we compute w^{k+1} = prox_{σg^*}(q) for + // = q - σ prox_{g/σ}(q/σ) + // = q - σ prox_{(β/(2θσ))‖.-y‖₁²}(q/σ) + // = σ(q/σ - prox_{(β/(2θσ))‖.-y‖₁²}(q/σ)) + // where q/σ = w^k/σ + (2x^{k+1}-x^k), + w /= σ; + w.axpy(2.0, x, 1.0); + w.axpy(-1.0, &xprev, 1.0); + xprev.copy_from(&w); // use xprev as temporary variable + l1squared_prox(&mut tmp, &mut xprev, y, β/σθ); + w -= &xprev; + w *= σ; + xprev.copy_from(x); + + iters += 1; + + state.if_verbose(|| { + F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)) + }) + }); + + iters +} + /// This function applies an iterative method for the solution of the problem /// <div>$$ @@ -182,14 +257,15 @@ where F : Float + ToNalgebraRealField, I : AlgIteratorFactory<F> { - // Estimate of ‖K‖ for K=Id. - let normest = 1.0; + // Estimate of ‖K‖ for K=θ Id. + let inner_θ = 1.0; + let normest = inner_θ; let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest); match inner.method { InnerMethod::PDPS => - l1squared_unconstrained_pdps(y, g, λ, β, x, inner_τ, inner_σ, iterator), + l1squared_unconstrained_pdps_alt(y, g, λ, β, x, inner_τ, inner_σ, inner_θ, iterator), other => unimplemented!("${other:?} is unimplemented"), } }