--- a/src/pdps.rs Tue Aug 01 10:25:09 2023 +0300 +++ b/src/pdps.rs Mon Feb 17 13:54:53 2025 -0500 @@ -6,8 +6,7 @@ * Valkonen T. - _Proximal methods for point source localisation_, [arXiv:2212.02991](https://arxiv.org/abs/2212.02991). -The main routine is [`pointsource_pdps`]. It is based on specilisatinn of -[`generic_pointsource_fb_reg`] through relevant [`FBSpecialisation`] implementations. +The main routine is [`pointsource_pdps_reg`]. Both norm-2-squared and norm-1 data terms are supported. That is, implemented are solvers for <div> $$ @@ -37,10 +36,6 @@ For $F_0(y)=\frac{1}{2}\|y\|_2^2$ the second part reads $y = Aμ -b$. For $F_0(y)=\|y\|_1$ the second part reads $y ∈ ∂\|·\|_1(Aμ - b)$. </p> - -Based on zero initialisation for $μ$, we use the [`Subdifferentiable`] trait to make an -initialisation corresponding to the second part of the optimality conditions. -In the algorithm itself, standard proximal steps are taking with respect to $F\_0^* + ⟨b, ·⟩$. */ use numeric_literals::replace_float_literals; @@ -48,37 +43,23 @@ use nalgebra::DVector; use clap::ValueEnum; -use alg_tools::iterate:: AlgIteratorFactory; -use alg_tools::sets::Cube; -use alg_tools::loc::Loc; +use alg_tools::iterate::AlgIteratorFactory; use alg_tools::euclidean::Euclidean; +use alg_tools::linops::Mapping; use alg_tools::norms::{ - L1, Linfinity, - Projection, Norm, + Linfinity, + Projection, }; -use alg_tools::bisection_tree::{ - BTFN, - PreBTFN, - Bounds, - BTNodeLookup, - BTNode, - BTSearch, - P2Minimise, - SupportGenerator, - LocalAnalysis, -}; -use alg_tools::mapping::RealMapping; +use alg_tools::mapping::{RealMapping, Instance}; use alg_tools::nalgebra_support::ToNalgebraRealField; use alg_tools::linops::AXPY; use crate::types::*; -use crate::measures::DiscreteMeasure; -use crate::measures::merging::{ - SpikeMerging, -}; -use crate::forward_model::ForwardModel; -use crate::seminorms::{ - DiscreteMeasureOp, Lipschitz +use crate::measures::{DiscreteMeasure, RNDM}; +use crate::measures::merging::SpikeMerging; +use crate::forward_model::{ + ForwardModel, + AdjointProductBoundedBy, }; use crate::plot::{ SeqPlotter, @@ -86,12 +67,21 @@ PlotLookup }; use crate::fb::{ + postprocess, + prune_with_stats +}; +pub use crate::prox_penalty::{ FBGenericConfig, - FBSpecialisation, - generic_pointsource_fb_reg, - RegTerm, + ProxPenalty }; -use crate::regularisation::NonnegRadonRegTerm; +use crate::regularisation::RegTerm; +use crate::dataterm::{ + DataTerm, + L2Squared, + L1 +}; +use crate::measures::merging::SpikeMergingMethod; + /// Acceleration #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, ValueEnum, Debug)] @@ -107,7 +97,30 @@ Full } -/// Settings for [`pointsource_pdps`]. +#[replace_float_literals(F::cast_from(literal))] +impl Acceleration { + /// PDPS parameter acceleration. Updates τ and σ and returns ω. + /// This uses dual strong convexity, not primal. + fn accelerate<F : Float>(self, τ : &mut F, σ : &mut F, γ : F) -> F { + match self { + Acceleration::None => 1.0, + Acceleration::Partial => { + let ω = 1.0 / (1.0 + γ * (*σ)).sqrt(); + *σ *= ω; + *τ /= ω; + ω + }, + Acceleration::Full => { + let ω = 1.0 / (1.0 + 2.0 * γ * (*σ)).sqrt(); + *σ *= ω; + *τ /= ω; + ω + }, + } + } +} + +/// Settings for [`pointsource_pdps_reg`]. #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] #[serde(default)] pub struct PDPSConfig<F : Float> { @@ -118,176 +131,77 @@ /// Accelerate if available pub acceleration : Acceleration, /// Generic parameters - pub insertion : FBGenericConfig<F>, + pub generic : FBGenericConfig<F>, } #[replace_float_literals(F::cast_from(literal))] impl<F : Float> Default for PDPSConfig<F> { fn default() -> Self { - let τ0 = 0.5; + let τ0 = 5.0; PDPSConfig { τ0, σ0 : 0.99/τ0, acceleration : Acceleration::Partial, - insertion : Default::default() + generic : FBGenericConfig { + merging : SpikeMergingMethod { enabled : true, ..Default::default() }, + .. Default::default() + }, } } } -/// Trait for subdifferentiable objects -pub trait Subdifferentiable<F : Float, V, U=V> { - /// Calculate some subdifferential at `x` - fn some_subdifferential(&self, x : V) -> U; +/// Trait for data terms for the PDPS +#[replace_float_literals(F::cast_from(literal))] +pub trait PDPSDataTerm<F : Float, V, const N : usize> : DataTerm<F, V, N> { + /// Calculate some subdifferential at `x` for the conjugate + fn some_subdifferential(&self, x : V) -> V; + + /// Factor of strong convexity of the conjugate + #[inline] + fn factor_of_strong_convexity(&self) -> F { + 0.0 + } + + /// Perform dual update + fn dual_update(&self, _y : &mut V, _y_prev : &V, _σ : F); } -/// Type for indicating norm-2-squared data fidelity. -pub struct L2Squared; -impl<F : Float, V : Euclidean<F>> Subdifferentiable<F, V> for L2Squared { +#[replace_float_literals(F::cast_from(literal))] +impl<F, V, const N : usize> PDPSDataTerm<F, V, N> +for L2Squared +where + F : Float, + V : Euclidean<F> + AXPY<F>, + for<'b> &'b V : Instance<V>, +{ fn some_subdifferential(&self, x : V) -> V { x } + + fn factor_of_strong_convexity(&self) -> F { + 1.0 + } + + #[inline] + fn dual_update(&self, y : &mut V, y_prev : &V, σ : F) { + y.axpy(1.0 / (1.0 + σ), y_prev, σ / (1.0 + σ)); + } } -impl<F : Float + nalgebra::RealField> Subdifferentiable<F, DVector<F>> for L1 { +#[replace_float_literals(F::cast_from(literal))] +impl<F : Float + nalgebra::RealField, const N : usize> +PDPSDataTerm<F, DVector<F>, N> +for L1 { fn some_subdifferential(&self, mut x : DVector<F>) -> DVector<F> { // nalgebra sucks for providing second copies of the same stuff that's elsewhere as well. x.iter_mut() .for_each(|v| if *v != F::ZERO { *v = *v/<F as NumTraitsFloat>::abs(*v) }); x } -} -/// Specialisation of [`generic_pointsource_fb_reg`] to PDPS. -pub struct PDPS< - 'a, - F : Float + ToNalgebraRealField, - A : ForwardModel<Loc<F, N>, F>, - D, - const N : usize -> { - /// The data - b : &'a A::Observable, - /// The forward operator - opA : &'a A, - /// Primal step length - τ : F, - // Dual step length - σ : F, - /// Whether acceleration should be applied (if data term supports) - acceleration : Acceleration, - /// The dataterm. Only used by the type system. - _dataterm : D, - /// Previous dual iterate. - y_prev : A::Observable, -} - -/// Implementation of [`FBSpecialisation`] for μPDPS with norm-2-squared data fidelity. -#[replace_float_literals(F::cast_from(literal))] -impl< - 'a, - F : Float + ToNalgebraRealField, - A : ForwardModel<Loc<F, N>, F>, - const N : usize -> FBSpecialisation<F, A::Observable, N> for PDPS<'a, F, A, L2Squared, N> -where for<'b> &'b A::Observable : std::ops::Add<A::Observable, Output=A::Observable> { - - fn update( - &mut self, - μ : &mut DiscreteMeasure<Loc<F, N>, F>, - μ_base : &DiscreteMeasure<Loc<F, N>, F> - ) -> (A::Observable, Option<F>) { - let σ = self.σ; - let τ = self.τ; - let ω = match self.acceleration { - Acceleration::None => 1.0, - Acceleration::Partial => { - let ω = 1.0 / (1.0 + σ).sqrt(); - self.σ = σ * ω; - self.τ = τ / ω; - ω - }, - Acceleration::Full => { - let ω = 1.0 / (1.0 + 2.0 * σ).sqrt(); - self.σ = σ * ω; - self.τ = τ / ω; - ω - }, - }; - - μ.prune(); - - let mut y = self.b.clone(); - self.opA.gemv(&mut y, 1.0 + ω, μ, -1.0); - self.opA.gemv(&mut y, -ω, μ_base, 1.0); - y.axpy(1.0 / (1.0 + σ), &self.y_prev, σ / (1.0 + σ)); - self.y_prev.copy_from(&y); - - (y, Some(self.τ)) - } - - fn calculate_fit( - &self, - μ : &DiscreteMeasure<Loc<F, N>, F>, - _y : &A::Observable - ) -> F { - self.calculate_fit_simple(μ) - } - - fn calculate_fit_simple( - &self, - μ : &DiscreteMeasure<Loc<F, N>, F>, - ) -> F { - let mut residual = self.b.clone(); - self.opA.gemv(&mut residual, 1.0, μ, -1.0); - residual.norm2_squared_div2() - } -} - -/// Implementation of [`FBSpecialisation`] for μPDPS with norm-1 data fidelity. -#[replace_float_literals(F::cast_from(literal))] -impl< - 'a, - F : Float + ToNalgebraRealField, - A : ForwardModel<Loc<F, N>, F>, - const N : usize -> FBSpecialisation<F, A::Observable, N> for PDPS<'a, F, A, L1, N> -where A::Observable : Projection<F, Linfinity> + Norm<F, L1>, - for<'b> &'b A::Observable : std::ops::Add<A::Observable, Output=A::Observable> { - fn update( - &mut self, - μ : &mut DiscreteMeasure<Loc<F, N>, F>, - μ_base : &DiscreteMeasure<Loc<F, N>, F> - ) -> (A::Observable, Option<F>) { - let σ = self.σ; - - μ.prune(); - - //let ȳ = self.opA.apply(μ) * 2.0 - self.opA.apply(μ_base); - //*y = proj_{[-1,1]}(&self.y_prev + (ȳ - self.b) * σ) - let mut y = self.y_prev.clone(); - self.opA.gemv(&mut y, 2.0 * σ, μ, 1.0); - self.opA.gemv(&mut y, -σ, μ_base, 1.0); - y.axpy(-σ, self.b, 1.0); + #[inline] + fn dual_update(&self, y : &mut DVector<F>, y_prev : &DVector<F>, σ : F) { + y.axpy(1.0, y_prev, σ); y.proj_ball_mut(1.0, Linfinity); - self.y_prev.copy_from(&y); - - (y, None) - } - - fn calculate_fit( - &self, - μ : &DiscreteMeasure<Loc<F, N>, F>, - _y : &A::Observable - ) -> F { - self.calculate_fit_simple(μ) - } - - fn calculate_fit_simple( - &self, - μ : &DiscreteMeasure<Loc<F, N>, F>, - ) -> F { - let mut residual = self.b.clone(); - self.opA.gemv(&mut residual, 1.0, μ, -1.0); - residual.norm(L1) } } @@ -304,93 +218,106 @@ /// /// Returns the final iterate. #[replace_float_literals(F::cast_from(literal))] -pub fn pointsource_pdps_reg<'a, F, I, A, GA, 𝒟, BTA, G𝒟, S, K, D, Reg, const N : usize>( - opA : &'a A, - b : &'a A::Observable, +pub fn pointsource_pdps_reg<F, I, A, D, Reg, P, const N : usize>( + opA : &A, + b : &A::Observable, reg : Reg, - op𝒟 : &'a 𝒟, - config : &PDPSConfig<F>, + prox_penalty : &P, + pdpsconfig : &PDPSConfig<F>, iterator : I, - plotter : SeqPlotter<F, N>, + mut plotter : SeqPlotter<F, N>, dataterm : D, -) -> DiscreteMeasure<Loc<F, N>, F> -where F : Float + ToNalgebraRealField, - I : AlgIteratorFactory<IterInfo<F, N>>, - for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable> - + std::ops::Add<A::Observable, Output=A::Observable>, - //+ std::ops::Mul<F, Output=A::Observable>, // <-- FIXME: compiler overflow - A::Observable : std::ops::MulAssign<F>, - GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone, - A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>> - + Lipschitz<𝒟, FloatType=F>, - BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>, - G𝒟 : SupportGenerator<F, N, SupportType = K, Id = usize> + Clone, - 𝒟 : DiscreteMeasureOp<Loc<F, N>, F, PreCodomain = PreBTFN<F, G𝒟, N>>, - 𝒟::Codomain : RealMapping<F, N>, - S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, - K: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, - BTNodeLookup: BTNode<F, usize, Bounds<F>, N>, - PlotLookup : Plotting<N>, - DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>, - PDPS<'a, F, A, D, N> : FBSpecialisation<F, A::Observable, N>, - D : Subdifferentiable<F, A::Observable>, - Reg : RegTerm<F, N> { +) -> RNDM<F, N> +where + F : Float + ToNalgebraRealField, + I : AlgIteratorFactory<IterInfo<F, N>>, + A : ForwardModel<RNDM<F, N>, F> + + AdjointProductBoundedBy<RNDM<F, N>, P, FloatType=F>, + A::PreadjointCodomain : RealMapping<F, N>, + for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable> + Instance<A::Observable>, + PlotLookup : Plotting<N>, + RNDM<F, N> : SpikeMerging<F>, + D : PDPSDataTerm<F, A::Observable, N>, + Reg : RegTerm<F, N>, + P : ProxPenalty<F, A::PreadjointCodomain, Reg, N>, +{ + + // Check parameters + assert!(pdpsconfig.τ0 > 0.0 && + pdpsconfig.σ0 > 0.0 && + pdpsconfig.τ0 * pdpsconfig.σ0 <= 1.0, + "Invalid step length parameters"); + + // Set up parameters + let config = &pdpsconfig.generic; + let l = opA.adjoint_product_bound(prox_penalty).unwrap().sqrt(); + let mut τ = pdpsconfig.τ0 / l; + let mut σ = pdpsconfig.σ0 / l; + let γ = dataterm.factor_of_strong_convexity(); + + // We multiply tolerance by τ for FB since our subproblems depending on tolerances are scaled + // by τ compared to the conditional gradient approach. + let tolerance = config.tolerance * τ * reg.tolerance_scaling(); + let mut ε = tolerance.initial(); + + // Initialise iterates + let mut μ = DiscreteMeasure::new(); + let mut y = dataterm.some_subdifferential(-b); + let mut y_prev = y.clone(); + let full_stats = |μ : &RNDM<F, N>, ε, stats| IterInfo { + value : dataterm.calculate_fit_op(μ, opA, b) + reg.apply(μ), + n_spikes : μ.len(), + ε, + // postprocessing: config.postprocessing.then(|| μ.clone()), + .. stats + }; + let mut stats = IterInfo::new(); - let y = dataterm.some_subdifferential(-b); - let l = opA.lipschitz_factor(&op𝒟).unwrap().sqrt(); - let τ = config.τ0 / l; - let σ = config.σ0 / l; + // Run the algorithm + for state in iterator.iter_init(|| full_stats(&μ, ε, stats.clone())) { + // Calculate smooth part of surrogate model. + let mut τv = opA.preadjoint().apply(y * τ); + + // Save current base point + let μ_base = μ.clone(); + + // Insert and reweigh + let (maybe_d, _within_tolerances) = prox_penalty.insert_and_reweigh( + &mut μ, &mut τv, &μ_base, None, + τ, ε, + config, ®, &state, &mut stats + ); + + // Prune and possibly merge spikes + if config.merge_now(&state) { + stats.merged += prox_penalty.merge_spikes_no_fitness( + &mut μ, &mut τv, &μ_base, None, τ, ε, config, ®, + ); + } + stats.pruned += prune_with_stats(&mut μ); - let pdps = PDPS { - b, - opA, - τ, - σ, - acceleration : config.acceleration, - _dataterm : dataterm, - y_prev : y.clone(), - }; + // Update step length parameters + let ω = pdpsconfig.acceleration.accelerate(&mut τ, &mut σ, γ); + + // Do dual update + y = b.clone(); // y = b + opA.gemv(&mut y, 1.0 + ω, &μ, -1.0); // y = A[(1+ω)μ^{k+1}]-b + opA.gemv(&mut y, -ω, &μ_base, 1.0); // y = A[(1+ω)μ^{k+1} - ω μ^k]-b + dataterm.dual_update(&mut y, &y_prev, σ); + y_prev.copy_from(&y); - generic_pointsource_fb_reg( - opA, reg, op𝒟, τ, &config.insertion, iterator, plotter, y, pdps - ) + // Give statistics if requested + let iter = state.iteration(); + stats.this_iters += 1; + + state.if_verbose(|| { + plotter.plot_spikes(iter, maybe_d.as_ref(), Some(&τv), &μ); + full_stats(&μ, ε, std::mem::replace(&mut stats, IterInfo::new())) + }); + + ε = tolerance.update(ε, iter); + } + + postprocess(μ, config, dataterm, opA, b) } -// -// Deprecated interfaces -// - -#[deprecated(note = "Use `pointsource_pdps_reg`")] -pub fn pointsource_pdps<'a, F, I, A, GA, 𝒟, BTA, G𝒟, S, K, D, const N : usize>( - opA : &'a A, - b : &'a A::Observable, - α : F, - op𝒟 : &'a 𝒟, - config : &PDPSConfig<F>, - iterator : I, - plotter : SeqPlotter<F, N>, - dataterm : D, -) -> DiscreteMeasure<Loc<F, N>, F> -where F : Float + ToNalgebraRealField, - I : AlgIteratorFactory<IterInfo<F, N>>, - for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable> - + std::ops::Add<A::Observable, Output=A::Observable>, - A::Observable : std::ops::MulAssign<F>, - GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone, - A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>> - + Lipschitz<𝒟, FloatType=F>, - BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>, - G𝒟 : SupportGenerator<F, N, SupportType = K, Id = usize> + Clone, - 𝒟 : DiscreteMeasureOp<Loc<F, N>, F, PreCodomain = PreBTFN<F, G𝒟, N>>, - 𝒟::Codomain : RealMapping<F, N>, - S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, - K: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, - BTNodeLookup: BTNode<F, usize, Bounds<F>, N>, - Cube<F, N>: P2Minimise<Loc<F, N>, F>, - PlotLookup : Plotting<N>, - DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>, - PDPS<'a, F, A, D, N> : FBSpecialisation<F, A::Observable, N>, - D : Subdifferentiable<F, A::Observable> { - - pointsource_pdps_reg(opA, b, NonnegRadonRegTerm(α), op𝒟, config, iterator, plotter, dataterm) -}