Tue, 21 Mar 2023 20:31:01 +0200
Implement non-negativity constraints for the conditional gradient methods
/*! Solver for the point source localisation problem with primal-dual proximal splitting. This corresponds to the manuscript * Valkonen T. - _Proximal methods for point source localisation_, [arXiv:2212.02991](https://arxiv.org/abs/2212.02991). The main routine is [`pointsource_pdps`]. It is based on specilisatinn of [`generic_pointsource_fb_reg`] through relevant [`FBSpecialisation`] implementations. Both norm-2-squared and norm-1 data terms are supported. That is, implemented are solvers for <div> $$ \min_{μ ∈ ℳ(Ω)}~ F_0(Aμ - b) + α \|μ\|_{ℳ(Ω)} + δ_{≥ 0}(μ), $$ for both $F_0(y)=\frac{1}{2}\|y\|_2^2$ and $F_0(y)=\|y\|_1$ with the forward operator $A \in 𝕃(ℳ(Ω); ℝ^n)$. </div> ## Approach <p> The problem above can be written as $$ \min_μ \max_y G(μ) + ⟨y, Aμ-b⟩ - F_0^*(μ), $$ where $G(μ) = α \|μ\|_{ℳ(Ω)} + δ_{≥ 0}(μ)$. The Fenchel–Rockafellar optimality conditions, employing the predual in $ℳ(Ω)$, are $$ 0 ∈ A_*y + ∂G(μ) \quad\text{and}\quad Aμ - b ∈ ∂ F_0^*(y). $$ The solution of the first part is as for forward-backward, treated in the manuscript. This is the task of <code>generic_pointsource_fb</code>, where we use <code>FBSpecialisation</code> to replace the specific residual $Aμ-b$ by $y$. For $F_0(y)=\frac{1}{2}\|y\|_2^2$ the second part reads $y = Aμ -b$. For $F_0(y)=\|y\|_1$ the second part reads $y ∈ ∂\|·\|_1(Aμ - b)$. </p> Based on zero initialisation for $μ$, we use the [`Subdifferentiable`] trait to make an initialisation corresponding to the second part of the optimality conditions. In the algorithm itself, standard proximal steps are taking with respect to $F\_0^* + ⟨b, ·⟩$. */ use numeric_literals::replace_float_literals; use serde::{Serialize, Deserialize}; use nalgebra::DVector; use clap::ValueEnum; use alg_tools::iterate:: AlgIteratorFactory; use alg_tools::sets::Cube; use alg_tools::loc::Loc; use alg_tools::euclidean::Euclidean; use alg_tools::norms::{ L1, Linfinity, Projection, Norm, }; use alg_tools::bisection_tree::{ BTFN, PreBTFN, Bounds, BTNodeLookup, BTNode, BTSearch, P2Minimise, SupportGenerator, LocalAnalysis, }; use alg_tools::mapping::RealMapping; use alg_tools::nalgebra_support::ToNalgebraRealField; use alg_tools::linops::AXPY; use crate::types::*; use crate::measures::DiscreteMeasure; use crate::measures::merging::{ SpikeMerging, }; use crate::forward_model::ForwardModel; use crate::seminorms::{ DiscreteMeasureOp, Lipschitz }; use crate::plot::{ SeqPlotter, Plotting, PlotLookup }; use crate::fb::{ FBGenericConfig, FBSpecialisation, generic_pointsource_fb_reg, RegTerm, }; use crate::regularisation::NonnegRadonRegTerm; /// Acceleration #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, ValueEnum, Debug)] pub enum Acceleration { /// No acceleration #[clap(name = "none")] None, /// Partial acceleration, $ω = 1/\sqrt{1+σ}$ #[clap(name = "partial", help = "Partial acceleration, ω = 1/√(1+σ)")] Partial, /// Full acceleration, $ω = 1/\sqrt{1+2σ}$; no gap convergence guaranteed #[clap(name = "full", help = "Full acceleration, ω = 1/√(1+2σ); no gap convergence guaranteed")] Full } /// Settings for [`pointsource_pdps`]. #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] #[serde(default)] pub struct PDPSConfig<F : Float> { /// Primal step length scaling. We must have `τ0 * σ0 < 1`. pub τ0 : F, /// Dual step length scaling. We must have `τ0 * σ0 < 1`. pub σ0 : F, /// Accelerate if available pub acceleration : Acceleration, /// Generic parameters pub insertion : FBGenericConfig<F>, } #[replace_float_literals(F::cast_from(literal))] impl<F : Float> Default for PDPSConfig<F> { fn default() -> Self { let τ0 = 0.5; PDPSConfig { τ0, σ0 : 0.99/τ0, acceleration : Acceleration::Partial, insertion : Default::default() } } } /// Trait for subdifferentiable objects pub trait Subdifferentiable<F : Float, V, U=V> { /// Calculate some subdifferential at `x` fn some_subdifferential(&self, x : V) -> U; } /// Type for indicating norm-2-squared data fidelity. pub struct L2Squared; impl<F : Float, V : Euclidean<F>> Subdifferentiable<F, V> for L2Squared { fn some_subdifferential(&self, x : V) -> V { x } } impl<F : Float + nalgebra::RealField> Subdifferentiable<F, DVector<F>> for L1 { fn some_subdifferential(&self, mut x : DVector<F>) -> DVector<F> { // nalgebra sucks for providing second copies of the same stuff that's elsewhere as well. x.iter_mut() .for_each(|v| if *v != F::ZERO { *v = *v/<F as NumTraitsFloat>::abs(*v) }); x } } /// Specialisation of [`generic_pointsource_fb_reg`] to PDPS. pub struct PDPS< 'a, F : Float + ToNalgebraRealField, A : ForwardModel<Loc<F, N>, F>, D, const N : usize > { /// The data b : &'a A::Observable, /// The forward operator opA : &'a A, /// Primal step length τ : F, // Dual step length σ : F, /// Whether acceleration should be applied (if data term supports) acceleration : Acceleration, /// The dataterm. Only used by the type system. _dataterm : D, /// Previous dual iterate. y_prev : A::Observable, } /// Implementation of [`FBSpecialisation`] for μPDPS with norm-2-squared data fidelity. #[replace_float_literals(F::cast_from(literal))] impl< 'a, F : Float + ToNalgebraRealField, A : ForwardModel<Loc<F, N>, F>, const N : usize > FBSpecialisation<F, A::Observable, N> for PDPS<'a, F, A, L2Squared, N> where for<'b> &'b A::Observable : std::ops::Add<A::Observable, Output=A::Observable> { fn update( &mut self, μ : &mut DiscreteMeasure<Loc<F, N>, F>, μ_base : &DiscreteMeasure<Loc<F, N>, F> ) -> (A::Observable, Option<F>) { let σ = self.σ; let τ = self.τ; let ω = match self.acceleration { Acceleration::None => 1.0, Acceleration::Partial => { let ω = 1.0 / (1.0 + σ).sqrt(); self.σ = σ * ω; self.τ = τ / ω; ω }, Acceleration::Full => { let ω = 1.0 / (1.0 + 2.0 * σ).sqrt(); self.σ = σ * ω; self.τ = τ / ω; ω }, }; μ.prune(); let mut y = self.b.clone(); self.opA.gemv(&mut y, 1.0 + ω, μ, -1.0); self.opA.gemv(&mut y, -ω, μ_base, 1.0); y.axpy(1.0 / (1.0 + σ), &self.y_prev, σ / (1.0 + σ)); self.y_prev.copy_from(&y); (y, Some(self.τ)) } fn calculate_fit( &self, μ : &DiscreteMeasure<Loc<F, N>, F>, _y : &A::Observable ) -> F { self.calculate_fit_simple(μ) } fn calculate_fit_simple( &self, μ : &DiscreteMeasure<Loc<F, N>, F>, ) -> F { let mut residual = self.b.clone(); self.opA.gemv(&mut residual, 1.0, μ, -1.0); residual.norm2_squared_div2() } } /// Implementation of [`FBSpecialisation`] for μPDPS with norm-1 data fidelity. #[replace_float_literals(F::cast_from(literal))] impl< 'a, F : Float + ToNalgebraRealField, A : ForwardModel<Loc<F, N>, F>, const N : usize > FBSpecialisation<F, A::Observable, N> for PDPS<'a, F, A, L1, N> where A::Observable : Projection<F, Linfinity> + Norm<F, L1>, for<'b> &'b A::Observable : std::ops::Add<A::Observable, Output=A::Observable> { fn update( &mut self, μ : &mut DiscreteMeasure<Loc<F, N>, F>, μ_base : &DiscreteMeasure<Loc<F, N>, F> ) -> (A::Observable, Option<F>) { let σ = self.σ; μ.prune(); //let ȳ = self.opA.apply(μ) * 2.0 - self.opA.apply(μ_base); //*y = proj_{[-1,1]}(&self.y_prev + (ȳ - self.b) * σ) let mut y = self.y_prev.clone(); self.opA.gemv(&mut y, 2.0 * σ, μ, 1.0); self.opA.gemv(&mut y, -σ, μ_base, 1.0); y.axpy(-σ, self.b, 1.0); y.proj_ball_mut(1.0, Linfinity); self.y_prev.copy_from(&y); (y, None) } fn calculate_fit( &self, μ : &DiscreteMeasure<Loc<F, N>, F>, _y : &A::Observable ) -> F { self.calculate_fit_simple(μ) } fn calculate_fit_simple( &self, μ : &DiscreteMeasure<Loc<F, N>, F>, ) -> F { let mut residual = self.b.clone(); self.opA.gemv(&mut residual, 1.0, μ, -1.0); residual.norm(L1) } } /// Iteratively solve the pointsource localisation problem using primal-dual proximal splitting. /// /// The `dataterm` should be either [`L1`] for norm-1 data term or [`L2Squared`] for norm-2-squared. /// The settings in `config` have their [respective documentation](PDPSConfig). `opA` is the /// forward operator $A$, $b$ the observable, and $\lambda$ the regularisation weight. /// The operator `op𝒟` is used for forming the proximal term. Typically it is a convolution /// operator. Finally, the `iterator` is an outer loop verbosity and iteration count control /// as documented in [`alg_tools::iterate`]. /// /// For the mathematical formulation, see the [module level](self) documentation and the manuscript. /// /// Returns the final iterate. #[replace_float_literals(F::cast_from(literal))] pub fn pointsource_pdps_reg<'a, F, I, A, GA, 𝒟, BTA, G𝒟, S, K, D, Reg, const N : usize>( opA : &'a A, b : &'a A::Observable, reg : Reg, op𝒟 : &'a 𝒟, config : &PDPSConfig<F>, iterator : I, plotter : SeqPlotter<F, N>, dataterm : D, ) -> DiscreteMeasure<Loc<F, N>, F> where F : Float + ToNalgebraRealField, I : AlgIteratorFactory<IterInfo<F, N>>, for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable> + std::ops::Add<A::Observable, Output=A::Observable>, //+ std::ops::Mul<F, Output=A::Observable>, // <-- FIXME: compiler overflow A::Observable : std::ops::MulAssign<F>, GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone, A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>> + Lipschitz<𝒟, FloatType=F>, BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>, G𝒟 : SupportGenerator<F, N, SupportType = K, Id = usize> + Clone, 𝒟 : DiscreteMeasureOp<Loc<F, N>, F, PreCodomain = PreBTFN<F, G𝒟, N>>, 𝒟::Codomain : RealMapping<F, N>, S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, K: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, BTNodeLookup: BTNode<F, usize, Bounds<F>, N>, PlotLookup : Plotting<N>, DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>, PDPS<'a, F, A, D, N> : FBSpecialisation<F, A::Observable, N>, D : Subdifferentiable<F, A::Observable>, Reg : RegTerm<F, N> { let y = dataterm.some_subdifferential(-b); let l = opA.lipschitz_factor(&op𝒟).unwrap().sqrt(); let τ = config.τ0 / l; let σ = config.σ0 / l; let pdps = PDPS { b, opA, τ, σ, acceleration : config.acceleration, _dataterm : dataterm, y_prev : y.clone(), }; generic_pointsource_fb_reg( opA, reg, op𝒟, τ, &config.insertion, iterator, plotter, y, pdps ) } // // Deprecated interfaces // #[deprecated(note = "Use `pointsource_pdps_reg`")] pub fn pointsource_pdps<'a, F, I, A, GA, 𝒟, BTA, G𝒟, S, K, D, const N : usize>( opA : &'a A, b : &'a A::Observable, α : F, op𝒟 : &'a 𝒟, config : &PDPSConfig<F>, iterator : I, plotter : SeqPlotter<F, N>, dataterm : D, ) -> DiscreteMeasure<Loc<F, N>, F> where F : Float + ToNalgebraRealField, I : AlgIteratorFactory<IterInfo<F, N>>, for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable> + std::ops::Add<A::Observable, Output=A::Observable>, A::Observable : std::ops::MulAssign<F>, GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone, A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>> + Lipschitz<𝒟, FloatType=F>, BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>, G𝒟 : SupportGenerator<F, N, SupportType = K, Id = usize> + Clone, 𝒟 : DiscreteMeasureOp<Loc<F, N>, F, PreCodomain = PreBTFN<F, G𝒟, N>>, 𝒟::Codomain : RealMapping<F, N>, S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, K: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, BTNodeLookup: BTNode<F, usize, Bounds<F>, N>, Cube<F, N>: P2Minimise<Loc<F, N>, F>, PlotLookup : Plotting<N>, DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>, PDPS<'a, F, A, D, N> : FBSpecialisation<F, A::Observable, N>, D : Subdifferentiable<F, A::Observable> { pointsource_pdps_reg(opA, b, NonnegRadonRegTerm(α), op𝒟, config, iterator, plotter, dataterm) }