Mercurial > repos > pointsource_algs / file revision

/*!
Solver for the point source localisation problem using a conditional gradient method.

We implement two variants, the “fully corrective” method from

  * Pieper K., Walter D. _Linear convergence of accelerated conditional gradient algorithms
    in spaces of measures_, DOI: [10.1051/cocv/2021042](https://doi.org/10.1051/cocv/2021042),
    arXiv: [1904.09218](https://doi.org/10.48550/arXiv.1904.09218).

and what we call the “relaxed” method from

  * Bredies K., Pikkarainen H. - _Inverse problems in spaces of measures_,
    DOI: [10.1051/cocv/2011205](https://doi.org/0.1051/cocv/2011205).
*/

use numeric_literals::replace_float_literals;
use serde::{Serialize, Deserialize};
//use colored::Colorize;

use alg_tools::iterate::{
    AlgIteratorFactory,
    AlgIteratorState,
    AlgIteratorOptions,
    ValueIteratorFactory,
};
use alg_tools::euclidean::Euclidean;
use alg_tools::norms::Norm;
use alg_tools::linops::Apply;
use alg_tools::sets::Cube;
use alg_tools::loc::Loc;
use alg_tools::bisection_tree::{
    BTFN,
    Bounds,
    BTNodeLookup,
    BTNode,
    BTSearch,
    P2Minimise,
    SupportGenerator,
    LocalAnalysis,
};
use alg_tools::mapping::RealMapping;
use alg_tools::nalgebra_support::ToNalgebraRealField;

use crate::types::*;
use crate::measures::{
    DiscreteMeasure,
    DeltaMeasure,
    Radon,
};
use crate::measures::merging::{
    SpikeMergingMethod,
    SpikeMerging,
};
use crate::forward_model::ForwardModel;
#[allow(unused_imports)] // Used in documentation
use crate::subproblem::{
    unconstrained::quadratic_unconstrained,
    nonneg::quadratic_nonneg,
    InnerSettings,
    InnerMethod,
};
use crate::tolerance::Tolerance;
use crate::plot::{
    SeqPlotter,
    Plotting,
    PlotLookup
};
use crate::regularisation::{
    NonnegRadonRegTerm,
    RadonRegTerm,
};
use crate::fb::RegTerm;

/// Settings for [`pointsource_fw`].
#[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)]
#[serde(default)]
pub struct FWConfig<F : Float> {
    /// Tolerance for branch-and-bound new spike location discovery
    pub tolerance : Tolerance<F>,
    /// Inner problem solution configuration. Has to have `method` set to [`InnerMethod::FB`]
    /// as the conditional gradient subproblems' optimality conditions do not in general have an
    /// invertible Newton derivative for SSN.
    pub inner : InnerSettings<F>,
    /// Variant of the conditional gradient method
    pub variant : FWVariant,
    /// Settings for branch and bound refinement when looking for predual maxima
    pub refinement : RefinementSettings<F>,
    /// Spike merging heuristic
    pub merging : SpikeMergingMethod<F>,
}

/// Conditional gradient method variant; see also [`FWConfig`].
#[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)]
#[allow(dead_code)]
pub enum FWVariant {
    /// Algorithm 2 of Walter-Pieper
    FullyCorrective,
    /// Bredies–Pikkarainen. Forces `FWConfig.inner.max_iter = 1`.
    Relaxed,
}

impl<F : Float> Default for FWConfig<F> {
    fn default() -> Self {
        FWConfig {
            tolerance : Default::default(),
            refinement : Default::default(),
            inner : Default::default(),
            variant : FWVariant::FullyCorrective,
            merging : Default::default(),
        }
    }
}

/// Helper struct for pre-initialising the finite-dimensional subproblems solver
/// [`prepare_optimise_weights`].
///
/// The pre-initialisation is done by [`prepare_optimise_weights`].
pub struct FindimData<F : Float> {
    /// ‖A‖^2
    opAnorm_squared : F,
    /// Bound $M_0$ from the Bredies–Pikkarainen article.
    m0 : F
}

/// Trait for finite dimensional weight optimisation.
pub trait WeightOptim<
    F : Float + ToNalgebraRealField,
    A : ForwardModel<Loc<F, N>, F>,
    I : AlgIteratorFactory<F>,
    const N : usize
> {

    /// Return a pre-initialisation struct for [`Self::optimise_weights`].
    ///
    /// The parameter `opA` is the forward operator $A$.
    fn prepare_optimise_weights(&self, opA : &A, b : &A::Observable) -> FindimData<F>;

    /// Solve the finite-dimensional weight optimisation problem for the 2-norm-squared data fidelity
    /// point source localisation problem.
    ///
    /// That is, we minimise
    /// <div>$$
    ///     μ ↦ \frac{1}{2}\|Aμ-b\|_w^2 + G(μ)
    /// $$</div>
    /// only with respect to the weights of $μ$. Here $G$ is a regulariser modelled by `Self`.
    ///
    /// The parameter `μ` is the discrete measure whose weights are to be optimised.
    /// The `opA` parameter is the forward operator $A$, while `b`$ and `α` are as in the
    /// objective above. The method parameter are set in `inner` (see [`InnerSettings`]), while
    /// `iterator` is used to iterate the steps of the method, and `plotter` may be used to
    /// save intermediate iteration states as images. The parameter `findim_data` should be
    /// prepared using [`Self::prepare_optimise_weights`]:
    ///
    /// Returns the number of iterations taken by the method configured in `inner`.
    fn optimise_weights<'a>(
        &self,
        μ : &mut DiscreteMeasure<Loc<F, N>, F>,
        opA : &'a A,
        b : &A::Observable,
        findim_data : &FindimData<F>,
        inner : &InnerSettings<F>,
        iterator : I
    ) -> usize;
}

/// Trait for regularisation terms supported by [`pointsource_fw_reg`].
pub trait RegTermFW<
    F : Float + ToNalgebraRealField,
    A : ForwardModel<Loc<F, N>, F>,
    I : AlgIteratorFactory<F>,
    const N : usize
> : RegTerm<F, N>
    + WeightOptim<F, A, I, N>
    + for<'a> Apply<&'a DiscreteMeasure<Loc<F, N>, F>, Output = F> {

    /// With $g = A\_\*(Aμ-b)$, returns $(x, g(x))$ for $x$ a new point to be inserted
    /// into $μ$, as determined by the regulariser.
    ///
    /// The parameters `refinement_tolerance` and `max_steps` are passed to relevant
    /// [`BTFN`] minimisation and maximisation routines.
    fn find_insertion(
        &self,
        g : &mut A::PreadjointCodomain,
        refinement_tolerance : F,
        max_steps : usize
    ) -> (Loc<F, N>, F);

    /// Insert point `ξ` into `μ` for the relaxed algorithm from Bredies–Pikkarainen.
    fn relaxed_insert<'a>(
        &self,
        μ : &mut DiscreteMeasure<Loc<F, N>, F>,
        g : &A::PreadjointCodomain,
        opA : &'a A,
        ξ : Loc<F, N>,
        v_ξ : F,
        findim_data : &FindimData<F>
    );
}

#[replace_float_literals(F::cast_from(literal))]
impl<F : Float + ToNalgebraRealField, A, I, const N : usize> WeightOptim<F, A, I, N>
for RadonRegTerm<F>
where I : AlgIteratorFactory<F>,
      A : ForwardModel<Loc<F, N>, F> {

    fn prepare_optimise_weights(&self, opA : &A, b : &A::Observable) -> FindimData<F> {
        FindimData{
            opAnorm_squared : opA.opnorm_bound().powi(2),
            m0 : b.norm2_squared() / (2.0 * self.α()),
        }
    }

    fn optimise_weights<'a>(
        &self,
        μ : &mut DiscreteMeasure<Loc<F, N>, F>,
        opA : &'a A,
        b : &A::Observable,
        findim_data : &FindimData<F>,
        inner : &InnerSettings<F>,
        iterator : I
    ) -> usize {

        // Form and solve finite-dimensional subproblem.
        let (Ã, g̃) = opA.findim_quadratic_model(&μ, b);
        let mut x = μ.masses_dvector();

        // `inner_τ1` is based on an estimate of the operator norm of $A$ from ℳ(Ω) to
        // ℝ^n. This estimate is a good one for the matrix norm from ℝ^m to ℝ^n when the
        // former is equipped with the 1-norm. We need the 2-norm. To pass from 1-norm to
        // 2-norm, we estimate
        //      ‖A‖_{2,2} := sup_{‖x‖_2 ≤ 1} ‖Ax‖_2 ≤ sup_{‖x‖_1 ≤ C} ‖Ax‖_2
        //                 = C sup_{‖x‖_1 ≤ 1} ‖Ax‖_2 = C ‖A‖_{1,2},
        // where C = √m satisfies ‖x‖_1 ≤ C ‖x‖_2. Since we are intested in ‖A_*A‖, no
        // square root is needed when we scale:
        let inner_τ = inner.τ0 / (findim_data.opAnorm_squared * F::cast_from(μ.len()));
        let iters = quadratic_unconstrained(inner.method, &Ã, &g̃, self.α(),
                                            &mut x, inner_τ, iterator);
        // Update masses of μ based on solution of finite-dimensional subproblem.
        μ.set_masses_dvector(&x);

        iters
    }
}

#[replace_float_literals(F::cast_from(literal))]
impl<F : Float + ToNalgebraRealField, A, I, S, GA, BTA, const N : usize> RegTermFW<F, A, I, N>
for RadonRegTerm<F>
where Cube<F, N> : P2Minimise<Loc<F, N>, F>,
      I : AlgIteratorFactory<F>,
      S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>,
      GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone,
      A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>>,
      BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>> {

    fn find_insertion(
        &self,
        g : &mut A::PreadjointCodomain,
        refinement_tolerance : F,
        max_steps : usize
    ) -> (Loc<F, N>, F) {
        let (ξmax, v_ξmax) = g.maximise(refinement_tolerance, max_steps);
        let (ξmin, v_ξmin) = g.minimise(refinement_tolerance, max_steps);
        if v_ξmin < 0.0 && -v_ξmin > v_ξmax {
            (ξmin, v_ξmin)
        } else {
            (ξmax, v_ξmax)
        }
    }

    fn relaxed_insert<'a>(
        &self,
        μ : &mut DiscreteMeasure<Loc<F, N>, F>,
        g : &A::PreadjointCodomain,
        opA : &'a A,
        ξ : Loc<F, N>,
        v_ξ : F,
        findim_data : &FindimData<F>
    ) {
        let α = self.0;
        let m0 = findim_data.m0;
        let φ = |t| if t <= m0 { α * t } else { α / (2.0 * m0) * (t*t + m0 * m0) };
        let v = if v_ξ.abs() <= α { 0.0 } else { m0 / α * v_ξ };
        let δ = DeltaMeasure { x : ξ, α : v };
        let dp = μ.apply(g) - δ.apply(g);
        let d = opA.apply(&*μ) - opA.apply(&δ);
        let r = d.norm2_squared();
        let s = if r == 0.0 {
            1.0
        } else {
            1.0.min( (α * μ.norm(Radon) - φ(v.abs()) - dp) / r)
        };
        *μ *= 1.0 - s;
        *μ += δ * s;
    }
}

#[replace_float_literals(F::cast_from(literal))]
impl<F : Float + ToNalgebraRealField, A, I, const N : usize> WeightOptim<F, A, I, N>
for NonnegRadonRegTerm<F>
where I : AlgIteratorFactory<F>,
      A : ForwardModel<Loc<F, N>, F> {

    fn prepare_optimise_weights(&self, opA : &A, b : &A::Observable) -> FindimData<F> {
        FindimData{
            opAnorm_squared : opA.opnorm_bound().powi(2),
            m0 : b.norm2_squared() / (2.0 * self.α()),
        }
    }

    fn optimise_weights<'a>(
        &self,
        μ : &mut DiscreteMeasure<Loc<F, N>, F>,
        opA : &'a A,
        b : &A::Observable,
        findim_data : &FindimData<F>,
        inner : &InnerSettings<F>,
        iterator : I
    ) -> usize {

        // Form and solve finite-dimensional subproblem.
        let (Ã, g̃) = opA.findim_quadratic_model(&μ, b);
        let mut x = μ.masses_dvector();

        // `inner_τ1` is based on an estimate of the operator norm of $A$ from ℳ(Ω) to
        // ℝ^n. This estimate is a good one for the matrix norm from ℝ^m to ℝ^n when the
        // former is equipped with the 1-norm. We need the 2-norm. To pass from 1-norm to
        // 2-norm, we estimate
        //      ‖A‖_{2,2} := sup_{‖x‖_2 ≤ 1} ‖Ax‖_2 ≤ sup_{‖x‖_1 ≤ C} ‖Ax‖_2
        //                 = C sup_{‖x‖_1 ≤ 1} ‖Ax‖_2 = C ‖A‖_{1,2},
        // where C = √m satisfies ‖x‖_1 ≤ C ‖x‖_2. Since we are intested in ‖A_*A‖, no
        // square root is needed when we scale:
        let inner_τ = inner.τ0 / (findim_data.opAnorm_squared * F::cast_from(μ.len()));
        let iters = quadratic_nonneg(inner.method, &Ã, &g̃, self.α(),
                                     &mut x, inner_τ, iterator);
        // Update masses of μ based on solution of finite-dimensional subproblem.
        μ.set_masses_dvector(&x);

        iters
    }
}

#[replace_float_literals(F::cast_from(literal))]
impl<F : Float + ToNalgebraRealField, A, I, S, GA, BTA, const N : usize> RegTermFW<F, A, I, N>
for NonnegRadonRegTerm<F>
where Cube<F, N> : P2Minimise<Loc<F, N>, F>,
      I : AlgIteratorFactory<F>,
      S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>,
      GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone,
      A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>>,
      BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>> {

    fn find_insertion(
        &self,
        g : &mut A::PreadjointCodomain,
        refinement_tolerance : F,
        max_steps : usize
    ) -> (Loc<F, N>, F) {
        g.maximise(refinement_tolerance, max_steps)
    }


    fn relaxed_insert<'a>(
        &self,
        μ : &mut DiscreteMeasure<Loc<F, N>, F>,
        g : &A::PreadjointCodomain,
        opA : &'a A,
        ξ : Loc<F, N>,
        v_ξ : F,
        findim_data : &FindimData<F>
    ) {
        // This is just a verbatim copy of RadonRegTerm::relaxed_insert.
        let α = self.0;
        let m0 = findim_data.m0;
        let φ = |t| if t <= m0 { α * t } else { α / (2.0 * m0) * (t*t + m0 * m0) };
        let v = if v_ξ.abs() <= α { 0.0 } else { m0 / α * v_ξ };
        let δ = DeltaMeasure { x : ξ, α : v };
        let dp = μ.apply(g) - δ.apply(g);
        let d = opA.apply(&*μ) - opA.apply(&δ);
        let r = d.norm2_squared();
        let s = if r == 0.0 {
            1.0
        } else {
            1.0.min( (α * μ.norm(Radon) - φ(v.abs()) - dp) / r)
        };
        *μ *= 1.0 - s;
        *μ += δ * s;
    }
}


/// Solve point source localisation problem using a conditional gradient method
/// for the 2-norm-squared data fidelity, i.e., the problem
/// <div>$$
///     \min_μ \frac{1}{2}\|Aμ-b\|_w^2 + G(μ),
/// $$
/// where $G$ is the regularisation term modelled by `reg`.
/// </div>
///
/// The `opA` parameter is the forward operator $A$, while `b`$ is as in the
/// objective above. The method parameter are set in `config` (see [`FWConfig`]), while
/// `iterator` is used to iterate the steps of the method, and `plotter` may be used to
/// save intermediate iteration states as images.
#[replace_float_literals(F::cast_from(literal))]
pub fn pointsource_fw_reg<'a, F, I, A, GA, BTA, S, Reg, const N : usize>(
    opA : &'a A,
    b : &A::Observable,
    reg : Reg,
    //domain : Cube<F, N>,
    config : &FWConfig<F>,
    iterator : I,
    mut plotter : SeqPlotter<F, N>,
) -> DiscreteMeasure<Loc<F, N>, F>
where F : Float + ToNalgebraRealField,
      I : AlgIteratorFactory<IterInfo<F, N>>,
      for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable>,
                                  //+ std::ops::Mul<F, Output=A::Observable>,  <-- FIXME: compiler overflow
      A::Observable : std::ops::MulAssign<F>,
      GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone,
      A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>>,
      BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>,
      S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>,
      BTNodeLookup: BTNode<F, usize, Bounds<F>, N>,
      Cube<F, N>: P2Minimise<Loc<F, N>, F>,
      PlotLookup : Plotting<N>,
      DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>,
      Reg : RegTermFW<F, A, ValueIteratorFactory<F, AlgIteratorOptions>, N> {

    // Set up parameters
    // We multiply tolerance by α for all algoritms.
    let tolerance = config.tolerance * reg.tolerance_scaling();
    let mut ε = tolerance.initial();
    let findim_data = reg.prepare_optimise_weights(opA, b);

    // Initialise operators
    let preadjA = opA.preadjoint();

    // Initialise iterates
    let mut μ = DiscreteMeasure::new();
    let mut residual = -b;

    let mut inner_iters = 0;
    let mut this_iters = 0;
    let mut pruned = 0;
    let mut merged = 0;

    // Run the algorithm
    iterator.iterate(|state| {
        // Update tolerance
        let inner_tolerance = ε * config.inner.tolerance_mult;
        let refinement_tolerance = ε * config.refinement.tolerance_mult;
        let ε_prev = ε;
        ε = tolerance.update(ε, state.iteration());

        // Calculate smooth part of surrogate model.
        //
        // Using `std::mem::replace` here is not ideal, and expects that `empty_observable`
        // has no significant overhead. For some reosn Rust doesn't allow us simply moving
        // the residual and replacing it below before the end of this closure.
        let r = std::mem::replace(&mut residual, opA.empty_observable());
        let mut g = -preadjA.apply(r);

        // Find absolute value maximising point
        let (ξ, v_ξ) = reg.find_insertion(&mut g, refinement_tolerance,
                                          config.refinement.max_steps);

        let inner_it = match config.variant {
            FWVariant::FullyCorrective => {
                // No point in optimising the weight here: the finite-dimensional algorithm is fast.
                μ += DeltaMeasure { x : ξ, α : 0.0 };
                config.inner.iterator_options.stop_target(inner_tolerance)
            },
            FWVariant::Relaxed => {
                // Perform a relaxed initialisation of μ
                reg.relaxed_insert(&mut μ, &g, opA, ξ, v_ξ, &findim_data);
                // The stop_target is only needed for the type system.
                AlgIteratorOptions{ max_iter : 1, .. config.inner.iterator_options}.stop_target(0.0)
            }
        };

        inner_iters += reg.optimise_weights(&mut μ, opA, b, &findim_data, &config.inner, inner_it);

        // Merge spikes and update residual for next step and `if_verbose` below.
        let n_before_merge = μ.len();
        residual = μ.merge_spikes_fitness(config.merging,
                                         |μ̃| opA.apply(μ̃) - b,
                                          A::Observable::norm2_squared);
        assert!(μ.len() >= n_before_merge);
        merged += μ.len() - n_before_merge;


        // Prune points with zero mass
        let n_before_prune = μ.len();
        μ.prune();
        debug_assert!(μ.len() <= n_before_prune);
        pruned += n_before_prune - μ.len();

        this_iters +=1;

        // Give function value if needed
        state.if_verbose(|| {
            plotter.plot_spikes(
                format!("iter {} start", state.iteration()), &g,
                "".to_string(), None::<&A::PreadjointCodomain>,
                None, &μ
            );
            let res = IterInfo {
                value : residual.norm2_squared_div2() + reg.apply(&μ),
                n_spikes : μ.len(),
                inner_iters,
                this_iters,
                merged,
                pruned,
                ε : ε_prev,
                postprocessing : None,
            };
            inner_iters = 0;
            this_iters = 0;
            merged = 0;
            pruned = 0;
            res
        })
    });

    // Return final iterate
    μ
}

//
// Deprecated interface
//

#[deprecated(note = "Use `pointsource_fw_reg`")]
pub fn pointsource_fw<'a, F, I, A, GA, BTA, S, const N : usize>(
    opA : &'a A,
    b : &A::Observable,
    α : F,
    //domain : Cube<F, N>,
    config : &FWConfig<F>,
    iterator : I,
    plotter : SeqPlotter<F, N>,
) -> DiscreteMeasure<Loc<F, N>, F>
where F : Float + ToNalgebraRealField,
      I : AlgIteratorFactory<IterInfo<F, N>>,
      for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable>,
                                  //+ std::ops::Mul<F, Output=A::Observable>,  <-- FIXME: compiler overflow
      A::Observable : std::ops::MulAssign<F>,
      GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone,
      A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>>,
      BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>,
      S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>,
      BTNodeLookup: BTNode<F, usize, Bounds<F>, N>,
      Cube<F, N>: P2Minimise<Loc<F, N>, F>,
      PlotLookup : Plotting<N>,
      DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F> {

      pointsource_fw_reg(opA, b, NonnegRadonRegTerm(α), config, iterator, plotter)
}

#[deprecated(note = "Use `WeightOptim::optimise_weights`")]
pub fn optimise_weights<'a, F, A, I, const N : usize>(
    μ : &mut DiscreteMeasure<Loc<F, N>, F>,
    opA : &'a A,
    b : &A::Observable,
    α : F,
    findim_data : &FindimData<F>,
    inner : &InnerSettings<F>,
    iterator : I
) -> usize
where F : Float + ToNalgebraRealField,
      I : AlgIteratorFactory<F>,
      A : ForwardModel<Loc<F, N>, F>
{
     NonnegRadonRegTerm(α).optimise_weights(μ, opA, b, findim_data, inner, iterator)
}