src/pdps.rs

changeset 52
f0e8704d3f0e
parent 39
6316d68b58af
--- a/src/pdps.rs	Tue Aug 01 10:25:09 2023 +0300
+++ b/src/pdps.rs	Mon Feb 17 13:54:53 2025 -0500
@@ -6,8 +6,7 @@
  * Valkonen T. - _Proximal methods for point source localisation_,
    [arXiv:2212.02991](https://arxiv.org/abs/2212.02991).
 
-The main routine is [`pointsource_pdps`]. It is based on specilisatinn of
-[`generic_pointsource_fb_reg`] through relevant [`FBSpecialisation`] implementations.
+The main routine is [`pointsource_pdps_reg`].
 Both norm-2-squared and norm-1 data terms are supported. That is, implemented are solvers for
 <div>
 $$
@@ -37,10 +36,6 @@
 For $F_0(y)=\frac{1}{2}\|y\|_2^2$ the second part reads $y = Aμ -b$.
 For $F_0(y)=\|y\|_1$ the second part reads $y ∈ ∂\|·\|_1(Aμ - b)$.
 </p>
-
-Based on zero initialisation for $μ$, we use the [`Subdifferentiable`] trait to make an
-initialisation corresponding to the second part of the optimality conditions.
-In the algorithm itself, standard proximal steps are taking with respect to $F\_0^* + ⟨b, ·⟩$.
 */
 
 use numeric_literals::replace_float_literals;
@@ -48,37 +43,23 @@
 use nalgebra::DVector;
 use clap::ValueEnum;
 
-use alg_tools::iterate:: AlgIteratorFactory;
-use alg_tools::sets::Cube;
-use alg_tools::loc::Loc;
+use alg_tools::iterate::AlgIteratorFactory;
 use alg_tools::euclidean::Euclidean;
+use alg_tools::linops::Mapping;
 use alg_tools::norms::{
-    L1, Linfinity,
-    Projection, Norm,
+    Linfinity,
+    Projection,
 };
-use alg_tools::bisection_tree::{
-    BTFN,
-    PreBTFN,
-    Bounds,
-    BTNodeLookup,
-    BTNode,
-    BTSearch,
-    P2Minimise,
-    SupportGenerator,
-    LocalAnalysis,
-};
-use alg_tools::mapping::RealMapping;
+use alg_tools::mapping::{RealMapping, Instance};
 use alg_tools::nalgebra_support::ToNalgebraRealField;
 use alg_tools::linops::AXPY;
 
 use crate::types::*;
-use crate::measures::DiscreteMeasure;
-use crate::measures::merging::{
-    SpikeMerging,
-};
-use crate::forward_model::ForwardModel;
-use crate::seminorms::{
-    DiscreteMeasureOp, Lipschitz
+use crate::measures::{DiscreteMeasure, RNDM};
+use crate::measures::merging::SpikeMerging;
+use crate::forward_model::{
+    ForwardModel,
+    AdjointProductBoundedBy,
 };
 use crate::plot::{
     SeqPlotter,
@@ -86,12 +67,21 @@
     PlotLookup
 };
 use crate::fb::{
+    postprocess,
+    prune_with_stats
+};
+pub use crate::prox_penalty::{
     FBGenericConfig,
-    FBSpecialisation,
-    generic_pointsource_fb_reg,
-    RegTerm,
+    ProxPenalty
 };
-use crate::regularisation::NonnegRadonRegTerm;
+use crate::regularisation::RegTerm;
+use crate::dataterm::{
+    DataTerm,
+    L2Squared,
+    L1
+};
+use crate::measures::merging::SpikeMergingMethod;
+
 
 /// Acceleration
 #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, ValueEnum, Debug)]
@@ -107,7 +97,30 @@
     Full
 }
 
-/// Settings for [`pointsource_pdps`].
+#[replace_float_literals(F::cast_from(literal))]
+impl Acceleration {
+    /// PDPS parameter acceleration. Updates τ and σ and returns ω.
+    /// This uses dual strong convexity, not primal.
+    fn accelerate<F : Float>(self, τ : &mut F, σ : &mut F, γ : F) -> F {
+        match self {
+            Acceleration::None => 1.0,
+            Acceleration::Partial => {
+                let ω = 1.0 / (1.0 + γ * (*σ)).sqrt();
+                *σ *= ω;
+                *τ /= ω;
+                ω
+            },
+            Acceleration::Full => {
+                let ω = 1.0 / (1.0 + 2.0 * γ * (*σ)).sqrt();
+                *σ *= ω;
+                *τ /= ω;
+                ω
+            },
+        }
+    }
+}
+
+/// Settings for [`pointsource_pdps_reg`].
 #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)]
 #[serde(default)]
 pub struct PDPSConfig<F : Float> {
@@ -118,176 +131,77 @@
     /// Accelerate if available
     pub acceleration : Acceleration,
     /// Generic parameters
-    pub insertion : FBGenericConfig<F>,
+    pub generic : FBGenericConfig<F>,
 }
 
 #[replace_float_literals(F::cast_from(literal))]
 impl<F : Float> Default for PDPSConfig<F> {
     fn default() -> Self {
-        let τ0 = 0.5;
+        let τ0 = 5.0;
         PDPSConfig {
             τ0,
             σ0 : 0.99/τ0,
             acceleration : Acceleration::Partial,
-            insertion : Default::default()
+            generic : FBGenericConfig {
+                merging : SpikeMergingMethod { enabled : true, ..Default::default() },
+                .. Default::default()
+            },
         }
     }
 }
 
-/// Trait for subdifferentiable objects
-pub trait Subdifferentiable<F : Float, V, U=V> {
-    /// Calculate some subdifferential at `x`
-    fn some_subdifferential(&self, x : V) -> U;
+/// Trait for data terms for the PDPS
+#[replace_float_literals(F::cast_from(literal))]
+pub trait PDPSDataTerm<F : Float, V, const N : usize> : DataTerm<F, V, N> {
+    /// Calculate some subdifferential at `x` for the conjugate
+    fn some_subdifferential(&self, x : V) -> V;
+
+    /// Factor of strong convexity of the conjugate
+    #[inline]
+    fn factor_of_strong_convexity(&self) -> F {
+        0.0
+    }
+
+    /// Perform dual update
+    fn dual_update(&self, _y : &mut V, _y_prev : &V, _σ : F);
 }
 
-/// Type for indicating norm-2-squared data fidelity.
-pub struct L2Squared;
 
-impl<F : Float, V : Euclidean<F>> Subdifferentiable<F, V> for L2Squared {
+#[replace_float_literals(F::cast_from(literal))]
+impl<F, V, const N : usize> PDPSDataTerm<F, V, N>
+for L2Squared
+where
+    F : Float,
+    V :  Euclidean<F> + AXPY<F>,
+    for<'b> &'b V : Instance<V>,
+{
     fn some_subdifferential(&self, x : V) -> V { x }
+
+    fn factor_of_strong_convexity(&self) -> F {
+        1.0
+    }
+
+    #[inline]
+    fn dual_update(&self, y : &mut V, y_prev : &V, σ : F) {
+        y.axpy(1.0 / (1.0 + σ), y_prev, σ / (1.0 + σ));
+    }
 }
 
-impl<F : Float + nalgebra::RealField> Subdifferentiable<F, DVector<F>> for L1 {
+#[replace_float_literals(F::cast_from(literal))]
+impl<F : Float + nalgebra::RealField, const N : usize>
+PDPSDataTerm<F, DVector<F>, N>
+for L1 {
     fn some_subdifferential(&self, mut x : DVector<F>) -> DVector<F> {
         // nalgebra sucks for providing second copies of the same stuff that's elsewhere as well.
         x.iter_mut()
          .for_each(|v| if *v != F::ZERO { *v = *v/<F as NumTraitsFloat>::abs(*v) });
         x
     }
-}
 
-/// Specialisation of [`generic_pointsource_fb_reg`] to PDPS.
-pub struct PDPS<
-    'a,
-    F : Float + ToNalgebraRealField,
-    A : ForwardModel<Loc<F, N>, F>,
-    D,
-    const N : usize
-> {
-    /// The data
-    b : &'a A::Observable,
-    /// The forward operator
-    opA : &'a A,
-    /// Primal step length
-    τ : F,
-    // Dual step length
-    σ : F,
-    /// Whether acceleration should be applied (if data term supports)
-    acceleration : Acceleration,
-    /// The dataterm. Only used by the type system.
-    _dataterm : D,
-    /// Previous dual iterate.
-    y_prev : A::Observable,
-}
-
-/// Implementation of [`FBSpecialisation`] for μPDPS with norm-2-squared data fidelity.
-#[replace_float_literals(F::cast_from(literal))]
-impl<
-    'a,
-    F : Float + ToNalgebraRealField,
-    A : ForwardModel<Loc<F, N>, F>,
-    const N : usize
-> FBSpecialisation<F, A::Observable, N> for PDPS<'a, F, A, L2Squared, N>
-where for<'b> &'b A::Observable : std::ops::Add<A::Observable, Output=A::Observable> {
-
-    fn update(
-        &mut self,
-        μ : &mut DiscreteMeasure<Loc<F, N>, F>,
-        μ_base : &DiscreteMeasure<Loc<F, N>, F>
-    ) -> (A::Observable, Option<F>) {
-        let σ = self.σ;
-        let τ = self.τ;
-        let ω = match self.acceleration {
-            Acceleration::None => 1.0,
-            Acceleration::Partial => {
-                let ω = 1.0 / (1.0 + σ).sqrt();
-                self.σ = σ * ω;
-                self.τ = τ / ω;
-                ω
-            },
-            Acceleration::Full => {
-                let ω = 1.0 / (1.0 + 2.0 * σ).sqrt();
-                self.σ = σ * ω;
-                self.τ = τ / ω;
-                ω
-            },
-        };
-
-        μ.prune();
-
-        let mut y = self.b.clone();
-        self.opA.gemv(&mut y, 1.0 + ω, μ, -1.0);
-        self.opA.gemv(&mut y, -ω, μ_base, 1.0);
-        y.axpy(1.0 / (1.0 + σ), &self.y_prev,  σ / (1.0 + σ));
-        self.y_prev.copy_from(&y);
-
-        (y, Some(self.τ))
-    }
-
-    fn calculate_fit(
-        &self,
-        μ : &DiscreteMeasure<Loc<F, N>, F>,
-        _y : &A::Observable
-    ) -> F {
-        self.calculate_fit_simple(μ)
-    }
-
-    fn calculate_fit_simple(
-        &self,
-        μ : &DiscreteMeasure<Loc<F, N>, F>,
-    ) -> F {
-        let mut residual = self.b.clone();
-        self.opA.gemv(&mut residual, 1.0, μ, -1.0);
-        residual.norm2_squared_div2()
-    }
-}
-
-/// Implementation of [`FBSpecialisation`] for μPDPS with norm-1 data fidelity.
-#[replace_float_literals(F::cast_from(literal))]
-impl<
-    'a,
-    F : Float + ToNalgebraRealField,
-    A : ForwardModel<Loc<F, N>, F>,
-    const N : usize
-> FBSpecialisation<F, A::Observable, N> for PDPS<'a, F, A, L1, N>
-where A::Observable : Projection<F, Linfinity> + Norm<F, L1>,
-      for<'b> &'b A::Observable : std::ops::Add<A::Observable, Output=A::Observable> {
-    fn update(
-        &mut self,
-        μ : &mut DiscreteMeasure<Loc<F, N>, F>,
-        μ_base : &DiscreteMeasure<Loc<F, N>, F>
-    ) -> (A::Observable, Option<F>) {
-        let σ = self.σ;
-
-        μ.prune();
-
-        //let ȳ = self.opA.apply(μ) * 2.0 - self.opA.apply(μ_base);
-        //*y = proj_{[-1,1]}(&self.y_prev + (ȳ - self.b) * σ)
-        let mut y = self.y_prev.clone();
-        self.opA.gemv(&mut y, 2.0 * σ, μ, 1.0);
-        self.opA.gemv(&mut y, -σ, μ_base, 1.0);
-        y.axpy(-σ, self.b, 1.0);
+     #[inline]
+     fn dual_update(&self, y : &mut DVector<F>, y_prev : &DVector<F>, σ : F) {
+        y.axpy(1.0, y_prev, σ);
         y.proj_ball_mut(1.0, Linfinity);
-        self.y_prev.copy_from(&y);
-
-        (y, None)
-    }
-
-    fn calculate_fit(
-        &self,
-        μ : &DiscreteMeasure<Loc<F, N>, F>,
-        _y : &A::Observable
-    ) -> F {
-        self.calculate_fit_simple(μ)
-    }
-
-    fn calculate_fit_simple(
-        &self,
-        μ : &DiscreteMeasure<Loc<F, N>, F>,
-    ) -> F {
-        let mut residual = self.b.clone();
-        self.opA.gemv(&mut residual, 1.0, μ, -1.0);
-        residual.norm(L1)
     }
 }
 
@@ -304,93 +218,106 @@
 ///
 /// Returns the final iterate.
 #[replace_float_literals(F::cast_from(literal))]
-pub fn pointsource_pdps_reg<'a, F, I, A, GA, 𝒟, BTA, G𝒟, S, K, D, Reg, const N : usize>(
-    opA : &'a A,
-    b : &'a A::Observable,
+pub fn pointsource_pdps_reg<F, I, A, D, Reg, P, const N : usize>(
+    opA : &A,
+    b : &A::Observable,
     reg : Reg,
-    op𝒟 : &'a 𝒟,
-    config : &PDPSConfig<F>,
+    prox_penalty : &P,
+    pdpsconfig : &PDPSConfig<F>,
     iterator : I,
-    plotter : SeqPlotter<F, N>,
+    mut plotter : SeqPlotter<F, N>,
     dataterm : D,
-) -> DiscreteMeasure<Loc<F, N>, F>
-where F : Float + ToNalgebraRealField,
-      I : AlgIteratorFactory<IterInfo<F, N>>,
-      for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable>
-                                  + std::ops::Add<A::Observable, Output=A::Observable>,
-                                  //+ std::ops::Mul<F, Output=A::Observable>, // <-- FIXME: compiler overflow
-      A::Observable : std::ops::MulAssign<F>,
-      GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone,
-      A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>>
-          + Lipschitz<𝒟, FloatType=F>,
-      BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>,
-      G𝒟 : SupportGenerator<F, N, SupportType = K, Id = usize> + Clone,
-      𝒟 : DiscreteMeasureOp<Loc<F, N>, F, PreCodomain = PreBTFN<F, G𝒟, N>>,
-      𝒟::Codomain : RealMapping<F, N>,
-      S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>,
-      K: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>,
-      BTNodeLookup: BTNode<F, usize, Bounds<F>, N>,
-      PlotLookup : Plotting<N>,
-      DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>,
-      PDPS<'a, F, A, D, N> : FBSpecialisation<F, A::Observable, N>,
-      D : Subdifferentiable<F, A::Observable>,
-      Reg : RegTerm<F, N> {
+) -> RNDM<F, N>
+where
+    F : Float + ToNalgebraRealField,
+    I : AlgIteratorFactory<IterInfo<F, N>>,
+    A : ForwardModel<RNDM<F, N>, F>
+        + AdjointProductBoundedBy<RNDM<F, N>, P, FloatType=F>,
+    A::PreadjointCodomain : RealMapping<F, N>,
+    for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable> + Instance<A::Observable>,
+    PlotLookup : Plotting<N>,
+    RNDM<F, N> : SpikeMerging<F>,
+    D : PDPSDataTerm<F, A::Observable, N>,
+    Reg : RegTerm<F, N>,
+    P : ProxPenalty<F, A::PreadjointCodomain, Reg, N>,
+{
+
+    // Check parameters
+    assert!(pdpsconfig.τ0 > 0.0 &&
+            pdpsconfig.σ0 > 0.0 &&
+            pdpsconfig.τ0 * pdpsconfig.σ0 <= 1.0,
+            "Invalid step length parameters");
+
+    // Set up parameters
+    let config = &pdpsconfig.generic;
+    let l = opA.adjoint_product_bound(prox_penalty).unwrap().sqrt();
+    let mut τ = pdpsconfig.τ0 / l;
+    let mut σ = pdpsconfig.σ0 / l;
+    let γ = dataterm.factor_of_strong_convexity();
+
+    // We multiply tolerance by τ for FB since our subproblems depending on tolerances are scaled
+    // by τ compared to the conditional gradient approach.
+    let tolerance = config.tolerance * τ * reg.tolerance_scaling();
+    let mut ε = tolerance.initial();
+
+    // Initialise iterates
+    let mut μ = DiscreteMeasure::new();
+    let mut y = dataterm.some_subdifferential(-b);
+    let mut y_prev = y.clone();
+    let full_stats = |μ : &RNDM<F, N>, ε, stats| IterInfo {
+        value : dataterm.calculate_fit_op(μ, opA, b) + reg.apply(μ),
+        n_spikes : μ.len(),
+        ε,
+        // postprocessing: config.postprocessing.then(|| μ.clone()),
+        .. stats
+    };
+    let mut stats = IterInfo::new();
 
-    let y = dataterm.some_subdifferential(-b);
-    let l = opA.lipschitz_factor(&op𝒟).unwrap().sqrt();
-    let τ = config.τ0 / l;
-    let σ = config.σ0 / l;
+    // Run the algorithm
+    for state in iterator.iter_init(|| full_stats(&μ, ε, stats.clone())) {
+        // Calculate smooth part of surrogate model.
+        let mut τv = opA.preadjoint().apply(y * τ);
+
+        // Save current base point
+        let μ_base = μ.clone();
+        
+        // Insert and reweigh
+        let (maybe_d, _within_tolerances) = prox_penalty.insert_and_reweigh(
+            &mut μ, &mut τv, &μ_base, None,
+            τ, ε,
+            config, &reg, &state, &mut stats
+        );
+
+        // Prune and possibly merge spikes
+        if config.merge_now(&state) {
+            stats.merged += prox_penalty.merge_spikes_no_fitness(
+                &mut μ, &mut τv, &μ_base, None, τ, ε, config, &reg,
+            );
+        }
+        stats.pruned += prune_with_stats(&mut μ);
 
-    let pdps = PDPS {
-        b,
-        opA,
-        τ,
-        σ,
-        acceleration : config.acceleration,
-        _dataterm : dataterm,
-        y_prev : y.clone(),
-    };
+        // Update step length parameters
+        let ω = pdpsconfig.acceleration.accelerate(&mut τ, &mut σ, γ);
+
+        // Do dual update
+        y = b.clone();                          // y = b
+        opA.gemv(&mut y, 1.0 + ω, &μ, -1.0);    // y = A[(1+ω)μ^{k+1}]-b
+        opA.gemv(&mut y, -ω, &μ_base, 1.0);     // y = A[(1+ω)μ^{k+1} - ω μ^k]-b
+        dataterm.dual_update(&mut y, &y_prev, σ);
+        y_prev.copy_from(&y);
 
-    generic_pointsource_fb_reg(
-        opA, reg, op𝒟, τ, &config.insertion, iterator, plotter, y, pdps
-    )
+        // Give statistics if requested
+        let iter = state.iteration();
+        stats.this_iters += 1;
+
+        state.if_verbose(|| {
+            plotter.plot_spikes(iter, maybe_d.as_ref(), Some(&τv), &μ);
+            full_stats(&μ, ε, std::mem::replace(&mut stats, IterInfo::new()))
+        });
+
+        ε = tolerance.update(ε, iter);
+    }
+
+    postprocess(μ, config, dataterm, opA, b)
 }
 
-//
-// Deprecated interfaces
-//
-
-#[deprecated(note = "Use `pointsource_pdps_reg`")]
-pub fn pointsource_pdps<'a, F, I, A, GA, 𝒟, BTA, G𝒟, S, K, D, const N : usize>(
-    opA : &'a A,
-    b : &'a A::Observable,
-    α : F,
-    op𝒟 : &'a 𝒟,
-    config : &PDPSConfig<F>,
-    iterator : I,
-    plotter : SeqPlotter<F, N>,
-    dataterm : D,
-) -> DiscreteMeasure<Loc<F, N>, F>
-where F : Float + ToNalgebraRealField,
-      I : AlgIteratorFactory<IterInfo<F, N>>,
-      for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable>
-                                  + std::ops::Add<A::Observable, Output=A::Observable>,
-      A::Observable : std::ops::MulAssign<F>,
-      GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone,
-      A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>>
-          + Lipschitz<𝒟, FloatType=F>,
-      BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>,
-      G𝒟 : SupportGenerator<F, N, SupportType = K, Id = usize> + Clone,
-      𝒟 : DiscreteMeasureOp<Loc<F, N>, F, PreCodomain = PreBTFN<F, G𝒟, N>>,
-      𝒟::Codomain : RealMapping<F, N>,
-      S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>,
-      K: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>,
-      BTNodeLookup: BTNode<F, usize, Bounds<F>, N>,
-      Cube<F, N>: P2Minimise<Loc<F, N>, F>,
-      PlotLookup : Plotting<N>,
-      DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>,
-      PDPS<'a, F, A, D, N> : FBSpecialisation<F, A::Observable, N>,
-      D : Subdifferentiable<F, A::Observable> {
-
-    pointsource_pdps_reg(opA, b, NonnegRadonRegTerm(α), op𝒟, config, iterator, plotter, dataterm)
-}

mercurial