src/pdps.rs

Sun, 11 Dec 2022 23:25:53 +0200

author
Tuomo Valkonen <tuomov@iki.fi>
date
Sun, 11 Dec 2022 23:25:53 +0200
changeset 24
d29d1fcf5423
parent 13
bdc57366d4f5
permissions
-rw-r--r--

Support arbitrary regularisation terms; implement non-positivity-constrained regularisation.

* Fixes the conditional gradient methods that were incorrectly solving
positivity constrained subproblems although the infinite-dimensional
versions do not include such constraints.

* Introduces the `ExperimentV2` struct that has `regularisation` in place
of `α`. The `Experiment` struct is now deprecated.

* The L^2-squared experiments were switch to be unconstrained, as the
Franke-Wolfe implementations do not support constraints. (This would
be easy to add for the “fully corrective” variant, but is not
immediate for the “relaxed” variant.)

/*!
Solver for the point source localisation problem with primal-dual proximal splitting.

This corresponds to the manuscript

 * Valkonen T. - _Proximal methods for point source localisation_,
   [arXiv:2212.02991](https://arxiv.org/abs/2212.02991).

The main routine is [`pointsource_pdps`]. It is based on specilisatinn of
[`generic_pointsource_fb_reg`] through relevant [`FBSpecialisation`] implementations.
Both norm-2-squared and norm-1 data terms are supported. That is, implemented are solvers for
<div>
$$
    \min_{μ ∈ ℳ(Ω)}~ F_0(Aμ - b) + α \|μ\|_{ℳ(Ω)} + δ_{≥ 0}(μ),
$$
for both $F_0(y)=\frac{1}{2}\|y\|_2^2$ and  $F_0(y)=\|y\|_1$ with the forward operator
$A \in 𝕃(ℳ(Ω); ℝ^n)$.
</div>

## Approach

<p>
The problem above can be written as
$$
    \min_μ \max_y G(μ) + ⟨y, Aμ-b⟩ - F_0^*(μ),
$$
where $G(μ) = α \|μ\|_{ℳ(Ω)} + δ_{≥ 0}(μ)$.
The Fenchel–Rockafellar optimality conditions, employing the predual in $ℳ(Ω)$, are
$$
    0 ∈ A_*y + ∂G(μ)
    \quad\text{and}\quad
    Aμ - b ∈ ∂ F_0^*(y).
$$
The solution of the first part is as for forward-backward, treated in the manuscript.
This is the task of <code>generic_pointsource_fb</code>, where we use <code>FBSpecialisation</code>
to replace the specific residual $Aμ-b$ by $y$.
For $F_0(y)=\frac{1}{2}\|y\|_2^2$ the second part reads $y = Aμ -b$.
For $F_0(y)=\|y\|_1$ the second part reads $y ∈ ∂\|·\|_1(Aμ - b)$.
</p>

Based on zero initialisation for $μ$, we use the [`Subdifferentiable`] trait to make an
initialisation corresponding to the second part of the optimality conditions.
In the algorithm itself, standard proximal steps are taking with respect to $F\_0^* + ⟨b, ·⟩$.
*/

use numeric_literals::replace_float_literals;
use serde::{Serialize, Deserialize};
use nalgebra::DVector;
use clap::ValueEnum;

use alg_tools::iterate:: AlgIteratorFactory;
use alg_tools::sets::Cube;
use alg_tools::loc::Loc;
use alg_tools::euclidean::Euclidean;
use alg_tools::norms::{
    L1, Linfinity,
    Projection, Norm,
};
use alg_tools::bisection_tree::{
    BTFN,
    PreBTFN,
    Bounds,
    BTNodeLookup,
    BTNode,
    BTSearch,
    P2Minimise,
    SupportGenerator,
    LocalAnalysis,
};
use alg_tools::mapping::RealMapping;
use alg_tools::nalgebra_support::ToNalgebraRealField;
use alg_tools::linops::AXPY;

use crate::types::*;
use crate::measures::DiscreteMeasure;
use crate::measures::merging::{
    SpikeMerging,
};
use crate::forward_model::ForwardModel;
use crate::seminorms::{
    DiscreteMeasureOp, Lipschitz
};
use crate::plot::{
    SeqPlotter,
    Plotting,
    PlotLookup
};
use crate::fb::{
    FBGenericConfig,
    FBSpecialisation,
    generic_pointsource_fb_reg,
    RegTerm,
};
use crate::regularisation::NonnegRadonRegTerm;

/// Acceleration
#[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, ValueEnum, Debug)]
pub enum Acceleration {
    /// No acceleration
    #[clap(name = "none")]
    None,
    /// Partial acceleration, $ω = 1/\sqrt{1+σ}$
    #[clap(name = "partial", help = "Partial acceleration, ω = 1/√(1+σ)")]
    Partial,
    /// Full acceleration, $ω = 1/\sqrt{1+2σ}$; no gap convergence guaranteed
    #[clap(name = "full", help = "Full acceleration, ω = 1/√(1+2σ); no gap convergence guaranteed")]
    Full
}

/// Settings for [`pointsource_pdps`].
#[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)]
#[serde(default)]
pub struct PDPSConfig<F : Float> {
    /// Primal step length scaling. We must have `τ0 * σ0 < 1`.
    pub τ0 : F,
    /// Dual step length scaling. We must have `τ0 * σ0 < 1`.
    pub σ0 : F,
    /// Accelerate if available
    pub acceleration : Acceleration,
    /// Generic parameters
    pub insertion : FBGenericConfig<F>,
}

#[replace_float_literals(F::cast_from(literal))]
impl<F : Float> Default for PDPSConfig<F> {
    fn default() -> Self {
        let τ0 = 0.5;
        PDPSConfig {
            τ0,
            σ0 : 0.99/τ0,
            acceleration : Acceleration::Partial,
            insertion : Default::default()
        }
    }
}

/// Trait for subdifferentiable objects
pub trait Subdifferentiable<F : Float, V, U=V> {
    /// Calculate some subdifferential at `x`
    fn some_subdifferential(&self, x : V) -> U;
}

/// Type for indicating norm-2-squared data fidelity.
pub struct L2Squared;

impl<F : Float, V : Euclidean<F>> Subdifferentiable<F, V> for L2Squared {
    fn some_subdifferential(&self, x : V) -> V { x }
}

impl<F : Float + nalgebra::RealField> Subdifferentiable<F, DVector<F>> for L1 {
    fn some_subdifferential(&self, mut x : DVector<F>) -> DVector<F> {
        // nalgebra sucks for providing second copies of the same stuff that's elsewhere as well.
        x.iter_mut()
         .for_each(|v| if *v != F::ZERO { *v = *v/<F as NumTraitsFloat>::abs(*v) });
        x
    }
}

/// Specialisation of [`generic_pointsource_fb_reg`] to PDPS.
pub struct PDPS<
    'a,
    F : Float + ToNalgebraRealField,
    A : ForwardModel<Loc<F, N>, F>,
    D,
    const N : usize
> {
    /// The data
    b : &'a A::Observable,
    /// The forward operator
    opA : &'a A,
    /// Primal step length
    τ : F,
    // Dual step length
    σ : F,
    /// Whether acceleration should be applied (if data term supports)
    acceleration : Acceleration,
    /// The dataterm. Only used by the type system.
    _dataterm : D,
    /// Previous dual iterate.
    y_prev : A::Observable,
}

/// Implementation of [`FBSpecialisation`] for μPDPS with norm-2-squared data fidelity.
#[replace_float_literals(F::cast_from(literal))]
impl<
    'a,
    F : Float + ToNalgebraRealField,
    A : ForwardModel<Loc<F, N>, F>,
    const N : usize
> FBSpecialisation<F, A::Observable, N> for PDPS<'a, F, A, L2Squared, N>
where for<'b> &'b A::Observable : std::ops::Add<A::Observable, Output=A::Observable> {

    fn update(
        &mut self,
        μ : &mut DiscreteMeasure<Loc<F, N>, F>,
        μ_base : &DiscreteMeasure<Loc<F, N>, F>
    ) -> (A::Observable, Option<F>) {
        let σ = self.σ;
        let τ = self.τ;
        let ω = match self.acceleration {
            Acceleration::None => 1.0,
            Acceleration::Partial => {
                let ω = 1.0 / (1.0 + σ).sqrt();
                self.σ = σ * ω;
                self.τ = τ / ω;
                ω
            },
            Acceleration::Full => {
                let ω = 1.0 / (1.0 + 2.0 * σ).sqrt();
                self.σ = σ * ω;
                self.τ = τ / ω;
                ω
            },
        };

        μ.prune();

        let mut y = self.b.clone();
        self.opA.gemv(&mut y, 1.0 + ω, μ, -1.0);
        self.opA.gemv(&mut y, -ω, μ_base, 1.0);
        y.axpy(1.0 / (1.0 + σ), &self.y_prev,  σ / (1.0 + σ));
        self.y_prev.copy_from(&y);

        (y, Some(self.τ))
    }

    fn calculate_fit(
        &self,
        μ : &DiscreteMeasure<Loc<F, N>, F>,
        _y : &A::Observable
    ) -> F {
        self.calculate_fit_simple(μ)
    }

    fn calculate_fit_simple(
        &self,
        μ : &DiscreteMeasure<Loc<F, N>, F>,
    ) -> F {
        let mut residual = self.b.clone();
        self.opA.gemv(&mut residual, 1.0, μ, -1.0);
        residual.norm2_squared_div2()
    }
}

/// Implementation of [`FBSpecialisation`] for μPDPS with norm-1 data fidelity.
#[replace_float_literals(F::cast_from(literal))]
impl<
    'a,
    F : Float + ToNalgebraRealField,
    A : ForwardModel<Loc<F, N>, F>,
    const N : usize
> FBSpecialisation<F, A::Observable, N> for PDPS<'a, F, A, L1, N>
where A::Observable : Projection<F, Linfinity> + Norm<F, L1>,
      for<'b> &'b A::Observable : std::ops::Add<A::Observable, Output=A::Observable> {
    fn update(
        &mut self,
        μ : &mut DiscreteMeasure<Loc<F, N>, F>,
        μ_base : &DiscreteMeasure<Loc<F, N>, F>
    ) -> (A::Observable, Option<F>) {
        let σ = self.σ;

        μ.prune();

        //let ȳ = self.opA.apply(μ) * 2.0 - self.opA.apply(μ_base);
        //*y = proj_{[-1,1]}(&self.y_prev + (ȳ - self.b) * σ)
        let mut y = self.y_prev.clone();
        self.opA.gemv(&mut y, 2.0 * σ, μ, 1.0);
        self.opA.gemv(&mut y, -σ, μ_base, 1.0);
        y.axpy(-σ, self.b, 1.0);
        y.proj_ball_mut(1.0, Linfinity);
        self.y_prev.copy_from(&y);

        (y, None)
    }

    fn calculate_fit(
        &self,
        μ : &DiscreteMeasure<Loc<F, N>, F>,
        _y : &A::Observable
    ) -> F {
        self.calculate_fit_simple(μ)
    }

    fn calculate_fit_simple(
        &self,
        μ : &DiscreteMeasure<Loc<F, N>, F>,
    ) -> F {
        let mut residual = self.b.clone();
        self.opA.gemv(&mut residual, 1.0, μ, -1.0);
        residual.norm(L1)
    }
}

/// Iteratively solve the pointsource localisation problem using primal-dual proximal splitting.
///
/// The `dataterm` should be either [`L1`] for norm-1 data term or [`L2Squared`] for norm-2-squared.
/// The settings in `config` have their [respective documentation](PDPSConfig). `opA` is the
/// forward operator $A$, $b$ the observable, and $\lambda$ the regularisation weight.
/// The operator `op𝒟` is used for forming the proximal term. Typically it is a convolution
/// operator. Finally, the `iterator` is an outer loop verbosity and iteration count control
/// as documented in [`alg_tools::iterate`].
///
/// For the mathematical formulation, see the [module level](self) documentation and the manuscript.
///
/// Returns the final iterate.
#[replace_float_literals(F::cast_from(literal))]
pub fn pointsource_pdps_reg<'a, F, I, A, GA, 𝒟, BTA, G𝒟, S, K, D, Reg, const N : usize>(
    opA : &'a A,
    b : &'a A::Observable,
    reg : Reg,
    op𝒟 : &'a 𝒟,
    config : &PDPSConfig<F>,
    iterator : I,
    plotter : SeqPlotter<F, N>,
    dataterm : D,
) -> DiscreteMeasure<Loc<F, N>, F>
where F : Float + ToNalgebraRealField,
      I : AlgIteratorFactory<IterInfo<F, N>>,
      for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable>
                                  + std::ops::Add<A::Observable, Output=A::Observable>,
                                  //+ std::ops::Mul<F, Output=A::Observable>, // <-- FIXME: compiler overflow
      A::Observable : std::ops::MulAssign<F>,
      GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone,
      A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>>
          + Lipschitz<𝒟, FloatType=F>,
      BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>,
      G𝒟 : SupportGenerator<F, N, SupportType = K, Id = usize> + Clone,
      𝒟 : DiscreteMeasureOp<Loc<F, N>, F, PreCodomain = PreBTFN<F, G𝒟, N>>,
      𝒟::Codomain : RealMapping<F, N>,
      S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>,
      K: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>,
      BTNodeLookup: BTNode<F, usize, Bounds<F>, N>,
      PlotLookup : Plotting<N>,
      DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>,
      PDPS<'a, F, A, D, N> : FBSpecialisation<F, A::Observable, N>,
      D : Subdifferentiable<F, A::Observable>,
      Reg : RegTerm<F, N> {

    let y = dataterm.some_subdifferential(-b);
    let l = opA.lipschitz_factor(&op𝒟).unwrap().sqrt();
    let τ = config.τ0 / l;
    let σ = config.σ0 / l;

    let pdps = PDPS {
        b,
        opA,
        τ,
        σ,
        acceleration : config.acceleration,
        _dataterm : dataterm,
        y_prev : y.clone(),
    };

    generic_pointsource_fb_reg(
        opA, reg, op𝒟, τ, &config.insertion, iterator, plotter, y, pdps
    )
}

//
// Deprecated interfaces
//

#[deprecated(note = "Use `pointsource_pdps_reg`")]
pub fn pointsource_pdps<'a, F, I, A, GA, 𝒟, BTA, G𝒟, S, K, D, const N : usize>(
    opA : &'a A,
    b : &'a A::Observable,
    α : F,
    op𝒟 : &'a 𝒟,
    config : &PDPSConfig<F>,
    iterator : I,
    plotter : SeqPlotter<F, N>,
    dataterm : D,
) -> DiscreteMeasure<Loc<F, N>, F>
where F : Float + ToNalgebraRealField,
      I : AlgIteratorFactory<IterInfo<F, N>>,
      for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable>
                                  + std::ops::Add<A::Observable, Output=A::Observable>,
      A::Observable : std::ops::MulAssign<F>,
      GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone,
      A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>>
          + Lipschitz<𝒟, FloatType=F>,
      BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>,
      G𝒟 : SupportGenerator<F, N, SupportType = K, Id = usize> + Clone,
      𝒟 : DiscreteMeasureOp<Loc<F, N>, F, PreCodomain = PreBTFN<F, G𝒟, N>>,
      𝒟::Codomain : RealMapping<F, N>,
      S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>,
      K: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>,
      BTNodeLookup: BTNode<F, usize, Bounds<F>, N>,
      Cube<F, N>: P2Minimise<Loc<F, N>, F>,
      PlotLookup : Plotting<N>,
      DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>,
      PDPS<'a, F, A, D, N> : FBSpecialisation<F, A::Observable, N>,
      D : Subdifferentiable<F, A::Observable> {

    pointsource_pdps_reg(opA, b, NonnegRadonRegTerm(α), op𝒟, config, iterator, plotter, dataterm)
}

mercurial