$$ @@ -37,10 +36,6 @@ For $F_0(y)=\frac{1}{2}\|y\|_2^2$ the second part reads $y = Aμ -b$. For $F_0(y)=\|y\|_1$ the second part reads $y ∈ ∂\|·\|_1(Aμ - b)$.

- -Based on zero initialisation for $μ$, we use the [`Subdifferentiable`] trait to make an -initialisation corresponding to the second part of the optimality conditions. -In the algorithm itself, standard proximal steps are taking with respect to $F\_0^* + ⟨b, ·⟩$. */ use numeric_literals::replace_float_literals; @@ -48,37 +43,23 @@ use nalgebra::DVector; use clap::ValueEnum; -use alg_tools::iterate:: AlgIteratorFactory; -use alg_tools::sets::Cube; -use alg_tools::loc::Loc; +use alg_tools::iterate::AlgIteratorFactory; use alg_tools::euclidean::Euclidean; +use alg_tools::linops::Mapping; use alg_tools::norms::{ - L1, Linfinity, - Projection, Norm, + Linfinity, + Projection, }; -use alg_tools::bisection_tree::{ - BTFN, - PreBTFN, - Bounds, - BTNodeLookup, - BTNode, - BTSearch, - P2Minimise, - SupportGenerator, - LocalAnalysis, -}; -use alg_tools::mapping::RealMapping; +use alg_tools::mapping::{RealMapping, Instance}; use alg_tools::nalgebra_support::ToNalgebraRealField; use alg_tools::linops::AXPY; use crate::types::*; -use crate::measures::DiscreteMeasure; -use crate::measures::merging::{ - SpikeMerging, -}; -use crate::forward_model::ForwardModel; -use crate::seminorms::{ - DiscreteMeasureOp, Lipschitz +use crate::measures::{DiscreteMeasure, RNDM}; +use crate::measures::merging::SpikeMerging; +use crate::forward_model::{ + ForwardModel, + AdjointProductBoundedBy, }; use crate::plot::{ SeqPlotter, @@ -86,12 +67,21 @@ PlotLookup }; use crate::fb::{ + postprocess, + prune_with_stats +}; +pub use crate::prox_penalty::{ FBGenericConfig, - FBSpecialisation, - generic_pointsource_fb_reg, - RegTerm, + ProxPenalty }; -use crate::regularisation::NonnegRadonRegTerm; +use crate::regularisation::RegTerm; +use crate::dataterm::{ + DataTerm, + L2Squared, + L1 +}; +use crate::measures::merging::SpikeMergingMethod; + /// Acceleration #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, ValueEnum, Debug)] @@ -107,7 +97,30 @@ Full } -/// Settings for [`pointsource_pdps`]. +#[replace_float_literals(F::cast_from(literal))] +impl Acceleration { + /// PDPS parameter acceleration. Updates τ and σ and returns ω. + /// This uses dual strong convexity, not primal. + fn accelerate(self, τ : &mut F, σ : &mut F, γ : F) -> F { + match self { + Acceleration::None => 1.0, + Acceleration::Partial => { + let ω = 1.0 / (1.0 + γ * (*σ)).sqrt(); + *σ *= ω; + *τ /= ω; + ω + }, + Acceleration::Full => { + let ω = 1.0 / (1.0 + 2.0 * γ * (*σ)).sqrt(); + *σ *= ω; + *τ /= ω; + ω + }, + } + } +} + +/// Settings for [`pointsource_pdps_reg`]. #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] #[serde(default)] pub struct PDPSConfig { @@ -118,176 +131,77 @@ /// Accelerate if available pub acceleration : Acceleration, /// Generic parameters - pub insertion : FBGenericConfig, + pub generic : FBGenericConfig, } #[replace_float_literals(F::cast_from(literal))] impl Default for PDPSConfig { fn default() -> Self { - let τ0 = 0.5; + let τ0 = 5.0; PDPSConfig { τ0, σ0 : 0.99/τ0, acceleration : Acceleration::Partial, - insertion : Default::default() + generic : FBGenericConfig { + merging : SpikeMergingMethod { enabled : true, ..Default::default() }, + .. Default::default() + }, } } } -/// Trait for subdifferentiable objects -pub trait Subdifferentiable { - /// Calculate some subdifferential at `x` - fn some_subdifferential(&self, x : V) -> U; +/// Trait for data terms for the PDPS +#[replace_float_literals(F::cast_from(literal))] +pub trait PDPSDataTerm : DataTerm { + /// Calculate some subdifferential at `x` for the conjugate + fn some_subdifferential(&self, x : V) -> V; + + /// Factor of strong convexity of the conjugate + #[inline] + fn factor_of_strong_convexity(&self) -> F { + 0.0 + } + + /// Perform dual update + fn dual_update(&self, _y : &mut V, _y_prev : &V, _σ : F); } -/// Type for indicating norm-2-squared data fidelity. -pub struct L2Squared; -impl> Subdifferentiable for L2Squared { +#[replace_float_literals(F::cast_from(literal))] +impl PDPSDataTerm +for L2Squared +where + F : Float, + V : Euclidean + AXPY, + for<'b> &'b V : Instance, +{ fn some_subdifferential(&self, x : V) -> V { x } + + fn factor_of_strong_convexity(&self) -> F { + 1.0 + } + + #[inline] + fn dual_update(&self, y : &mut V, y_prev : &V, σ : F) { + y.axpy(1.0 / (1.0 + σ), y_prev, σ / (1.0 + σ)); + } } -impl Subdifferentiable> for L1 { +#[replace_float_literals(F::cast_from(literal))] +impl +PDPSDataTerm, N> +for L1 { fn some_subdifferential(&self, mut x : DVector) -> DVector { // nalgebra sucks for providing second copies of the same stuff that's elsewhere as well. x.iter_mut() .for_each(|v| if *v != F::ZERO { *v = *v/::abs(*v) }); x } -} -/// Specialisation of [`generic_pointsource_fb_reg`] to PDPS. -pub struct PDPS< - 'a, - F : Float + ToNalgebraRealField, - A : ForwardModel, F>, - D, - const N : usize -> { - /// The data - b : &'a A::Observable, - /// The forward operator - opA : &'a A, - /// Primal step length - τ : F, - // Dual step length - σ : F, - /// Whether acceleration should be applied (if data term supports) - acceleration : Acceleration, - /// The dataterm. Only used by the type system. - _dataterm : D, - /// Previous dual iterate. - y_prev : A::Observable, -} - -/// Implementation of [`FBSpecialisation`] for μPDPS with norm-2-squared data fidelity. -#[replace_float_literals(F::cast_from(literal))] -impl< - 'a, - F : Float + ToNalgebraRealField, - A : ForwardModel, F>, - const N : usize -> FBSpecialisation for PDPS<'a, F, A, L2Squared, N> -where for<'b> &'b A::Observable : std::ops::Add { - - fn update( - &mut self, - μ : &mut DiscreteMeasure, F>, - μ_base : &DiscreteMeasure, F> - ) -> (A::Observable, Option) { - let σ = self.σ; - let τ = self.τ; - let ω = match self.acceleration { - Acceleration::None => 1.0, - Acceleration::Partial => { - let ω = 1.0 / (1.0 + σ).sqrt(); - self.σ = σ * ω; - self.τ = τ / ω; - ω - }, - Acceleration::Full => { - let ω = 1.0 / (1.0 + 2.0 * σ).sqrt(); - self.σ = σ * ω; - self.τ = τ / ω; - ω - }, - }; - - μ.prune(); - - let mut y = self.b.clone(); - self.opA.gemv(&mut y, 1.0 + ω, μ, -1.0); - self.opA.gemv(&mut y, -ω, μ_base, 1.0); - y.axpy(1.0 / (1.0 + σ), &self.y_prev, σ / (1.0 + σ)); - self.y_prev.copy_from(&y); - - (y, Some(self.τ)) - } - - fn calculate_fit( - &self, - μ : &DiscreteMeasure, F>, - _y : &A::Observable - ) -> F { - self.calculate_fit_simple(μ) - } - - fn calculate_fit_simple( - &self, - μ : &DiscreteMeasure, F>, - ) -> F { - let mut residual = self.b.clone(); - self.opA.gemv(&mut residual, 1.0, μ, -1.0); - residual.norm2_squared_div2() - } -} - -/// Implementation of [`FBSpecialisation`] for μPDPS with norm-1 data fidelity. -#[replace_float_literals(F::cast_from(literal))] -impl< - 'a, - F : Float + ToNalgebraRealField, - A : ForwardModel, F>, - const N : usize -> FBSpecialisation for PDPS<'a, F, A, L1, N> -where A::Observable : Projection + Norm, - for<'b> &'b A::Observable : std::ops::Add { - fn update( - &mut self, - μ : &mut DiscreteMeasure, F>, - μ_base : &DiscreteMeasure, F> - ) -> (A::Observable, Option) { - let σ = self.σ; - - μ.prune(); - - //let ȳ = self.opA.apply(μ) * 2.0 - self.opA.apply(μ_base); - //*y = proj_{[-1,1]}(&self.y_prev + (ȳ - self.b) * σ) - let mut y = self.y_prev.clone(); - self.opA.gemv(&mut y, 2.0 * σ, μ, 1.0); - self.opA.gemv(&mut y, -σ, μ_base, 1.0); - y.axpy(-σ, self.b, 1.0); + #[inline] + fn dual_update(&self, y : &mut DVector, y_prev : &DVector, σ : F) { + y.axpy(1.0, y_prev, σ); y.proj_ball_mut(1.0, Linfinity); - self.y_prev.copy_from(&y); - - (y, None) - } - - fn calculate_fit( - &self, - μ : &DiscreteMeasure, F>, - _y : &A::Observable - ) -> F { - self.calculate_fit_simple(μ) - } - - fn calculate_fit_simple( - &self, - μ : &DiscreteMeasure, F>, - ) -> F { - let mut residual = self.b.clone(); - self.opA.gemv(&mut residual, 1.0, μ, -1.0); - residual.norm(L1) } } @@ -304,93 +218,106 @@ /// /// Returns the final iterate. #[replace_float_literals(F::cast_from(literal))] -pub fn pointsource_pdps_reg<'a, F, I, A, GA, 𝒟, BTA, G𝒟, S, K, D, Reg, const N : usize>( - opA : &'a A, - b : &'a A::Observable, +pub fn pointsource_pdps_reg( + opA : &A, + b : &A::Observable, reg : Reg, - op𝒟 : &'a 𝒟, - config : &PDPSConfig, + prox_penalty : &P, + pdpsconfig : &PDPSConfig, iterator : I, - plotter : SeqPlotter, + mut plotter : SeqPlotter, dataterm : D, -) -> DiscreteMeasure, F> -where F : Float + ToNalgebraRealField, - I : AlgIteratorFactory>, - for<'b> &'b A::Observable : std::ops::Neg - + std::ops::Add, - //+ std::ops::Mul, // <-- FIXME: compiler overflow - A::Observable : std::ops::MulAssign, - GA : SupportGenerator + Clone, - A : ForwardModel, F, PreadjointCodomain = BTFN> - + Lipschitz<𝒟, FloatType=F>, - BTA : BTSearch>, - G𝒟 : SupportGenerator + Clone, - 𝒟 : DiscreteMeasureOp, F, PreCodomain = PreBTFN>, - 𝒟::Codomain : RealMapping, - S: RealMapping + LocalAnalysis, N>, - K: RealMapping + LocalAnalysis, N>, - BTNodeLookup: BTNode, N>, - PlotLookup : Plotting, - DiscreteMeasure, F> : SpikeMerging, - PDPS<'a, F, A, D, N> : FBSpecialisation, - D : Subdifferentiable, - Reg : RegTerm { +) -> RNDM +where + F : Float + ToNalgebraRealField, + I : AlgIteratorFactory>, + A : ForwardModel, F> + + AdjointProductBoundedBy, P, FloatType=F>, + A::PreadjointCodomain : RealMapping, + for<'b> &'b A::Observable : std::ops::Neg + Instance, + PlotLookup : Plotting, + RNDM : SpikeMerging, + D : PDPSDataTerm, + Reg : RegTerm, + P : ProxPenalty, +{ + + // Check parameters + assert!(pdpsconfig.τ0 > 0.0 && + pdpsconfig.σ0 > 0.0 && + pdpsconfig.τ0 * pdpsconfig.σ0 <= 1.0, + "Invalid step length parameters"); + + // Set up parameters + let config = &pdpsconfig.generic; + let l = opA.adjoint_product_bound(prox_penalty).unwrap().sqrt(); + let mut τ = pdpsconfig.τ0 / l; + let mut σ = pdpsconfig.σ0 / l; + let γ = dataterm.factor_of_strong_convexity(); + + // We multiply tolerance by τ for FB since our subproblems depending on tolerances are scaled + // by τ compared to the conditional gradient approach. + let tolerance = config.tolerance * τ * reg.tolerance_scaling(); + let mut ε = tolerance.initial(); + + // Initialise iterates + let mut μ = DiscreteMeasure::new(); + let mut y = dataterm.some_subdifferential(-b); + let mut y_prev = y.clone(); + let full_stats = |μ : &RNDM, ε, stats| IterInfo { + value : dataterm.calculate_fit_op(μ, opA, b) + reg.apply(μ), + n_spikes : μ.len(), + ε, + // postprocessing: config.postprocessing.then(|| μ.clone()), + .. stats + }; + let mut stats = IterInfo::new(); - let y = dataterm.some_subdifferential(-b); - let l = opA.lipschitz_factor(&op𝒟).unwrap().sqrt(); - let τ = config.τ0 / l; - let σ = config.σ0 / l; + // Run the algorithm + for state in iterator.iter_init(|| full_stats(&μ, ε, stats.clone())) { + // Calculate smooth part of surrogate model. + let mut τv = opA.preadjoint().apply(y * τ); + + // Save current base point + let μ_base = μ.clone(); + + // Insert and reweigh + let (maybe_d, _within_tolerances) = prox_penalty.insert_and_reweigh( + &mut μ, &mut τv, &μ_base, None, + τ, ε, + config, ®, &state, &mut stats + ); + + // Prune and possibly merge spikes + if config.merge_now(&state) { + stats.merged += prox_penalty.merge_spikes_no_fitness( + &mut μ, &mut τv, &μ_base, None, τ, ε, config, ®, + ); + } + stats.pruned += prune_with_stats(&mut μ); - let pdps = PDPS { - b, - opA, - τ, - σ, - acceleration : config.acceleration, - _dataterm : dataterm, - y_prev : y.clone(), - }; + // Update step length parameters + let ω = pdpsconfig.acceleration.accelerate(&mut τ, &mut σ, γ); + + // Do dual update + y = b.clone(); // y = b + opA.gemv(&mut y, 1.0 + ω, &μ, -1.0); // y = A[(1+ω)μ^{k+1}]-b + opA.gemv(&mut y, -ω, &μ_base, 1.0); // y = A[(1+ω)μ^{k+1} - ω μ^k]-b + dataterm.dual_update(&mut y, &y_prev, σ); + y_prev.copy_from(&y); - generic_pointsource_fb_reg( - opA, reg, op𝒟, τ, &config.insertion, iterator, plotter, y, pdps - ) + // Give statistics if requested + let iter = state.iteration(); + stats.this_iters += 1; + + state.if_verbose(|| { + plotter.plot_spikes(iter, maybe_d.as_ref(), Some(&τv), &μ); + full_stats(&μ, ε, std::mem::replace(&mut stats, IterInfo::new())) + }); + + ε = tolerance.update(ε, iter); + } + + postprocess(μ, config, dataterm, opA, b) } -// -// Deprecated interfaces -// - -#[deprecated(note = "Use `pointsource_pdps_reg`")] -pub fn pointsource_pdps<'a, F, I, A, GA, 𝒟, BTA, G𝒟, S, K, D, const N : usize>( - opA : &'a A, - b : &'a A::Observable, - α : F, - op𝒟 : &'a 𝒟, - config : &PDPSConfig, - iterator : I, - plotter : SeqPlotter, - dataterm : D, -) -> DiscreteMeasure, F> -where F : Float + ToNalgebraRealField, - I : AlgIteratorFactory>, - for<'b> &'b A::Observable : std::ops::Neg - + std::ops::Add, - A::Observable : std::ops::MulAssign, - GA : SupportGenerator + Clone, - A : ForwardModel, F, PreadjointCodomain = BTFN> - + Lipschitz<𝒟, FloatType=F>, - BTA : BTSearch>, - G𝒟 : SupportGenerator + Clone, - 𝒟 : DiscreteMeasureOp, F, PreCodomain = PreBTFN>, - 𝒟::Codomain : RealMapping, - S: RealMapping + LocalAnalysis, N>, - K: RealMapping + LocalAnalysis, N>, - BTNodeLookup: BTNode, N>, - Cube: P2Minimise, F>, - PlotLookup : Plotting, - DiscreteMeasure, F> : SpikeMerging, - PDPS<'a, F, A, D, N> : FBSpecialisation, - D : Subdifferentiable { - - pointsource_pdps_reg(opA, b, NonnegRadonRegTerm(α), op𝒟, config, iterator, plotter, dataterm) -}