src/sliding_pdps.rs

branch
dev
changeset 35
b087e3eab191
child 36
fb911f72e698
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/sliding_pdps.rs	Tue Dec 31 09:25:45 2024 -0500
@@ -0,0 +1,384 @@
+/*!
+Solver for the point source localisation problem using a sliding
+primal-dual proximal splitting method.
+*/
+
+use numeric_literals::replace_float_literals;
+use serde::{Serialize, Deserialize};
+//use colored::Colorize;
+//use nalgebra::{DVector, DMatrix};
+use std::iter::Iterator;
+
+use alg_tools::iterate::AlgIteratorFactory;
+use alg_tools::euclidean::Euclidean;
+use alg_tools::sets::Cube;
+use alg_tools::loc::Loc;
+use alg_tools::mapping::{Mapping, DifferentiableRealMapping, Instance};
+use alg_tools::norms::Norm;
+use alg_tools::direct_product::Pair;
+use alg_tools::bisection_tree::{
+    BTFN,
+    PreBTFN,
+    Bounds,
+    BTNodeLookup,
+    BTNode,
+    BTSearch,
+    P2Minimise,
+    SupportGenerator,
+    LocalAnalysis,
+    //Bounded,
+};
+use alg_tools::mapping::RealMapping;
+use alg_tools::nalgebra_support::ToNalgebraRealField;
+use alg_tools::linops::{
+    BoundedLinear, AXPY, GEMV, Adjointable, IdOp,
+};
+use alg_tools::convex::{Conjugable, Prox};
+use alg_tools::norms::{L2, Linfinity, PairNorm};
+
+use crate::types::*;
+use crate::measures::{DiscreteMeasure, Radon, RNDM};
+use crate::measures::merging::SpikeMerging;
+use crate::forward_model::{
+    ForwardModel,
+    AdjointProductPairBoundedBy,
+    LipschitzValues,
+};
+// use crate::transport::TransportLipschitz;
+use crate::seminorms::DiscreteMeasureOp;
+//use crate::tolerance::Tolerance;
+use crate::plot::{
+    SeqPlotter,
+    Plotting,
+    PlotLookup
+};
+use crate::fb::*;
+use crate::regularisation::SlidingRegTerm;
+// use crate::dataterm::L2Squared;
+use crate::sliding_fb::{
+    TransportConfig,
+    TransportStepLength,
+    initial_transport,
+    aposteriori_transport,
+};
+use crate::dataterm::{calculate_residual, calculate_residual2};
+
+/// Settings for [`pointsource_sliding_pdps_pair`].
+#[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)]
+#[serde(default)]
+pub struct SlidingPDPSConfig<F : Float> {
+    /// Primal step length scaling.
+    pub τ0 : F,
+    /// Primal step length scaling.
+    pub σp0 : F,
+    /// Dual step length scaling.
+    pub σd0 : F,
+    /// Transport parameters
+    pub transport : TransportConfig<F>,
+    /// Generic parameters
+    pub insertion : FBGenericConfig<F>,
+}
+
+#[replace_float_literals(F::cast_from(literal))]
+impl<F : Float> Default for SlidingPDPSConfig<F> {
+    fn default() -> Self {
+        let τ0 = 0.99;
+        SlidingPDPSConfig {
+            τ0,
+            σd0 : 0.1,
+            σp0 : 0.99,
+            transport : Default::default(),
+            insertion : Default::default()
+        }
+    }
+}
+
+type MeasureZ<F, Z, const N : usize> = Pair<RNDM<F, N>, Z>;
+
+/// Iteratively solve the pointsource localisation with an additional variable
+/// using sliding primal-dual proximal splitting
+///
+/// The parametrisation is as for [`crate::forward_pdps::pointsource_forward_pdps_pair`].
+#[replace_float_literals(F::cast_from(literal))]
+pub fn pointsource_sliding_pdps_pair<
+    'a, F, I, A, GA, 𝒟, BTA, BT𝒟, G𝒟, S, K, Reg, Z, R, Y, /*KOpM, */ KOpZ, H, const N : usize
+>(
+    opA : &'a A,
+    b : &A::Observable,
+    reg : Reg,
+    op𝒟 : &'a 𝒟,
+    config : &SlidingPDPSConfig<F>,
+    iterator : I,
+    mut plotter : SeqPlotter<F, N>,
+    //opKμ : KOpM,
+    opKz : &KOpZ,
+    fnR : &R,
+    fnH : &H,
+    mut z : Z,
+    mut y : Y,
+) -> MeasureZ<F, Z, N>
+where
+    F : Float + ToNalgebraRealField,
+    I : AlgIteratorFactory<IterInfo<F, N>>,
+    for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable> + Instance<A::Observable>,
+    for<'b> A::Preadjoint<'b> : LipschitzValues<FloatType=F>,
+    BTFN<F, GA, BTA, N> : DifferentiableRealMapping<F, N>,
+    GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone,
+    A : ForwardModel<
+            MeasureZ<F, Z, N>,
+            F,
+            PairNorm<Radon, L2, L2>,
+            PreadjointCodomain = Pair<BTFN<F, GA, BTA, N>, Z>,
+        >
+        + AdjointProductPairBoundedBy<MeasureZ<F, Z, N>, 𝒟, IdOp<Z>, FloatType=F>,
+    BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>,
+    G𝒟 : SupportGenerator<F, N, SupportType = K, Id = usize> + Clone,
+    𝒟 : DiscreteMeasureOp<Loc<F, N>, F, PreCodomain = PreBTFN<F, G𝒟, N>,
+                                        Codomain = BTFN<F, G𝒟, BT𝒟, N>>,
+    BT𝒟 : BTSearch<F, N, Data=usize, Agg=Bounds<F>>,
+    S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>
+        + DifferentiableRealMapping<F, N>,
+    K: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>,
+        //+ Differentiable<Loc<F, N>, Derivative=Loc<F,N>>,
+    BTNodeLookup: BTNode<F, usize, Bounds<F>, N>,
+    Cube<F, N>: P2Minimise<Loc<F, N>, F>,
+    PlotLookup : Plotting<N>,
+    RNDM<F, N> : SpikeMerging<F>,
+    Reg : SlidingRegTerm<F, N>,
+    // KOpM : Linear<RNDM<F, N>, Codomain=Y>
+    //     + GEMV<F, RNDM<F, N>>
+    //     + Preadjointable<
+    //         RNDM<F, N>, Y,
+    //         PreadjointCodomain = BTFN<F, GA, BTA, N>,
+    //     >
+    //     + TransportLipschitz<L2Squared, FloatType=F>
+    //     + AdjointProductBoundedBy<RNDM<F, N>, 𝒟, FloatType=F>,
+    // for<'b> KOpM::Preadjoint<'b> : GEMV<F, Y>,
+    // Since Z is Hilbert, we may just as well use adjoints for K_z.
+    KOpZ : BoundedLinear<Z, L2, L2, F, Codomain=Y>
+        + GEMV<F, Z>
+        + Adjointable<Z, Y, AdjointCodomain = Z>,
+    for<'b> KOpZ::Adjoint<'b> : GEMV<F, Y>,
+    Y : AXPY<F> + Euclidean<F, Output=Y> + Clone + ClosedAdd,
+    for<'b> &'b Y : Instance<Y>,
+    Z : AXPY<F, Owned=Z> + Euclidean<F, Output=Z> + Clone + Norm<F, L2>,
+    for<'b> &'b Z : Instance<Z>,
+    R : Prox<Z, Codomain=F>,
+    H : Conjugable<Y, F, Codomain=F>,
+    for<'b> H::Conjugate<'b> : Prox<Y>,
+{
+
+    // Check parameters
+    assert!(config.τ0 > 0.0 &&
+            config.τ0 < 1.0 &&
+            config.σp0 > 0.0 &&
+            config.σp0 < 1.0 &&
+            config.σd0 > 0.0 &&
+            config.σp0 * config.σd0 <= 1.0,
+            "Invalid step length parameters");
+    config.transport.check();
+
+    // Initialise iterates
+    let mut μ = DiscreteMeasure::new();
+    let mut γ1 = DiscreteMeasure::new();
+    let mut residual = calculate_residual(Pair(&μ, &z), opA, b);
+    let zero_z = z.similar_origin();
+
+    // Set up parameters
+    let op𝒟norm = op𝒟.opnorm_bound(Radon, Linfinity);
+    // TODO: maybe this PairNorm doesn't make sense here?
+    let opAnorm = opA.opnorm_bound(PairNorm(Radon, L2, L2), L2);
+    let bigθ = 0.0; //opKμ.transport_lipschitz_factor(L2Squared);
+    let bigM = 0.0; //opKμ.adjoint_product_bound(&op𝒟).unwrap().sqrt();
+    let nKz = opKz.opnorm_bound(L2, L2);
+    let ℓ = 0.0;
+    let opIdZ = IdOp::new();
+    let (l, l_z) = opA.adjoint_product_pair_bound(&op𝒟, &opIdZ).unwrap();
+    // We need to satisfy
+    //
+    //     τσ_dM(1-σ_p L_z)/(1 - τ L) + [σ_p L_z + σ_pσ_d‖K_z‖^2] < 1
+    //                                  ^^^^^^^^^^^^^^^^^^^^^^^^^
+    // with 1 > σ_p L_z and 1 > τ L.
+    //
+    // To do so, we first solve σ_p and σ_d from standard PDPS step length condition
+    // ^^^^^ < 1. then we solve τ from  the rest.
+    let σ_d = config.σd0 / nKz;
+    let σ_p = config.σp0 / (l_z + config.σd0 * nKz);
+    // Observe that = 1 - ^^^^^^^^^^^^^^^^^^^^^ = 1 - σ_{p,0}
+    // We get the condition τσ_d M (1-σ_p L_z) < (1-σ_{p,0})*(1-τ L)
+    // ⟺ τ [ σ_d M (1-σ_p L_z) + (1-σ_{p,0}) L ] < (1-σ_{p,0})
+    let φ = 1.0 - config.σp0;
+    let a = 1.0 - σ_p * l_z;
+    let τ = config.τ0 * φ / ( σ_d * bigM * a + φ * l );
+    let ψ = 1.0 - τ * l;
+    let β = σ_p * config.σd0 * nKz / a; // σ_p * σ_d * (nKz * nK_z) / a;
+    assert!(β < 1.0);
+    // Now we need κ‖K_μ(π_♯^1 - π_♯^0)γ‖^2 ≤ (1/θ - τ[ℓ_v + ℓ]) ∫ c_2 dγ for κ defined as:
+    let κ = σ_d * ψ / ((1.0 - β) * ψ - τ * σ_d * bigM);
+    //  The factor two in the manuscript disappears due to the definition of 𝚹 being
+    // for ‖x-y‖₂² instead of c_2(x, y)=‖x-y‖₂²/2.
+    let calculate_θ = |ℓ_v, max_transport| {
+        config.transport.θ0 / (τ*(ℓ + ℓ_v) + κ * bigθ * max_transport)
+    };
+    let mut θ_or_adaptive = match opA.preadjoint().value_diff_unit_lipschitz_factor() {
+        // We only estimate w (the uniform Lipschitz for of v), if we also estimate ℓ_v
+        // (the uniform Lipschitz factor of ∇v).
+        // We assume that the residual is decreasing.
+        Some(ℓ_v0) => TransportStepLength::AdaptiveMax{
+            l: ℓ_v0 * b.norm2(),
+            max_transport : 0.0,
+            g : calculate_θ
+        },
+        None => TransportStepLength::FullyAdaptive{
+            l : 0.0,
+            max_transport : 0.0,
+            g : calculate_θ
+        },
+    };
+    // Acceleration is not currently supported
+    // let γ = dataterm.factor_of_strong_convexity();
+    let ω = 1.0;
+
+    // We multiply tolerance by τ for FB since our subproblems depending on tolerances are scaled
+    // by τ compared to the conditional gradient approach.
+    let tolerance = config.insertion.tolerance * τ * reg.tolerance_scaling();
+    let mut ε = tolerance.initial();
+
+    let starH = fnH.conjugate();
+
+    // Statistics
+    let full_stats = |residual : &A::Observable, μ : &RNDM<F, N>, z : &Z, ε, stats| IterInfo {
+        value : residual.norm2_squared_div2() + fnR.apply(z)
+                + reg.apply(μ) + fnH.apply(/* opKμ.apply(μ) + */ opKz.apply(z)),
+        n_spikes : μ.len(),
+        ε,
+        // postprocessing: config.insertion.postprocessing.then(|| μ.clone()),
+        .. stats
+    };
+    let mut stats = IterInfo::new();
+
+    // Run the algorithm
+    for state in iterator.iter_init(|| full_stats(&residual, &μ, &z, ε, stats.clone())) {
+        // Calculate initial transport
+        let Pair(v, _) = opA.preadjoint().apply(&residual);
+        //opKμ.preadjoint().apply_add(&mut v, y);
+        let z_base = z.clone();
+        // We want to proceed as in Example 4.12 but with v and v̆ as in §5.
+        // With A(ν, z) = A_μ ν + A_z z, following Example 5.1, we have
+        // P_ℳ[F'(ν, z) + Ξ(ν, z, y)]= A_ν^*[A_ν ν + A_z z] + K_μ ν = A_ν^*A(ν, z) + K_μ ν,
+        // where A_ν^* becomes a multiplier.
+        // This is much easier with K_μ = 0, which is the only reason why are enforcing it.
+        // TODO: Write a version of initial_transport that can deal with K_μ ≠ 0.
+ 
+        let (μ_base_masses, mut μ_base_minus_γ0) = initial_transport(
+            &mut γ1, &mut μ, |ν| opA.apply(Pair(ν, &z)),
+            ε, τ, &mut θ_or_adaptive, opAnorm,
+            v, &config.transport,
+        );
+
+        // Solve finite-dimensional subproblem several times until the dual variable for the
+        // regularisation term conforms to the assumptions made for the transport above.
+        let (d, _within_tolerances, Pair(τv̆, τz̆)) = 'adapt_transport: loop {
+            // Calculate τv̆ = τA_*(A[μ_transported + μ_transported_base]-b)
+            let residual_μ̆ = calculate_residual2(Pair(&γ1, &z),
+                                                 Pair(&μ_base_minus_γ0, &zero_z),
+                                                 opA, b);
+            let Pair(τv̆, τz) = opA.preadjoint().apply(residual_μ̆ * τ);
+            // opKμ.preadjoint().gemv(&mut τv̆, τ, y, 1.0);
+
+            // Construct μ^{k+1} by solving finite-dimensional subproblems and insert new spikes.
+            let (d, within_tolerances) = insert_and_reweigh(
+                &mut μ, &τv̆, &γ1, Some(&μ_base_minus_γ0),
+                op𝒟, op𝒟norm,
+                τ, ε, &config.insertion,
+                &reg, &state, &mut stats,
+            );
+
+            // A posteriori transport adaptation.
+            // TODO: this does not properly treat v^{k+1} - v̆^k that depends on z^{k+1}!
+            if aposteriori_transport(
+                &mut γ1, &mut μ, &mut μ_base_minus_γ0, &μ_base_masses,
+                ε, &config.transport
+            ) {
+                break 'adapt_transport (d, within_tolerances, Pair(τv̆, τz))
+            }
+        };
+
+        stats.untransported_fraction = Some({
+            assert_eq!(μ_base_masses.len(), γ1.len());
+            let (a, b) = stats.untransported_fraction.unwrap_or((0.0, 0.0));
+            let source = μ_base_masses.iter().map(|v| v.abs()).sum();
+            (a + μ_base_minus_γ0.norm(Radon), b + source)
+        });
+        stats.transport_error = Some({
+            assert_eq!(μ_base_masses.len(), γ1.len());
+            let (a, b) = stats.transport_error.unwrap_or((0.0, 0.0));
+            (a + μ.dist_matching(&γ1), b + γ1.norm(Radon))
+        });
+
+        // // Merge spikes.
+        // // This expects the prune below to prune γ.
+        // // TODO: This may not work correctly in all cases.
+        // let ins = &config.insertion;
+        // if ins.merge_now(&state) {
+        //     if let SpikeMergingMethod::None = ins.merging {
+        //     } else {
+        //         stats.merged += μ.merge_spikes(ins.merging, |μ_candidate| {
+        //             let ν = μ_candidate.sub_matching(&γ1)-&μ_base_minus_γ0;
+        //             let mut d = &τv̆ + op𝒟.preapply(ν);
+        //             reg.verify_merge_candidate(&mut d, μ_candidate, τ, ε, ins)
+        //         });
+        //     }
+        // }
+
+        // Prune spikes with zero weight. To maintain correct ordering between μ and γ1, also the
+        // latter needs to be pruned when μ is.
+        // TODO: This could do with a two-vector Vec::retain to avoid copies.
+        let μ_new = DiscreteMeasure::from_iter(μ.iter_spikes().filter(|δ| δ.α != F::ZERO).cloned());
+        if μ_new.len() != μ.len() {
+            let mut μ_iter = μ.iter_spikes();
+            γ1.prune_by(|_| μ_iter.next().unwrap().α != F::ZERO);
+            stats.pruned += μ.len() - μ_new.len();
+            μ = μ_new;
+        }
+
+        // Do z variable primal update
+        z.axpy(-σ_p/τ, τz̆, 1.0); // TODO: simplify nasty factors
+        opKz.adjoint().gemv(&mut z, -σ_p, &y, 1.0);
+        z = fnR.prox(σ_p, z);
+        // Do dual update
+        // opKμ.gemv(&mut y, σ_d*(1.0 + ω), &μ, 1.0);    // y = y + σ_d K[(1+ω)(μ,z)^{k+1}]
+        opKz.gemv(&mut y, σ_d*(1.0 + ω), &z, 1.0);
+        // opKμ.gemv(&mut y, -σ_d*ω, μ_base, 1.0);// y = y + σ_d K[(1+ω)(μ,z)^{k+1} - ω (μ,z)^k]-b
+        opKz.gemv(&mut y, -σ_d*ω, z_base, 1.0);// y = y + σ_d K[(1+ω)(μ,z)^{k+1} - ω (μ,z)^k]-b
+        y = starH.prox(σ_d, y);
+
+        // Update residual
+        residual = calculate_residual(Pair(&μ, &z), opA, b);
+
+        // Update step length parameters
+        // let ω = pdpsconfig.acceleration.accelerate(&mut τ, &mut σ, γ);
+
+        // Give statistics if requested
+        let iter = state.iteration();
+        stats.this_iters += 1;
+
+        state.if_verbose(|| {
+            plotter.plot_spikes(iter, Some(&d), Some(&τv̆), &μ);
+            full_stats(&residual, &μ, &z, ε, std::mem::replace(&mut stats, IterInfo::new()))
+        });
+
+        // Update main tolerance for next iteration
+        ε = tolerance.update(ε, iter);
+    }
+
+    let fit = |μ̃ : &RNDM<F, N>| {
+        (opA.apply(Pair(μ̃, &z))-b).norm2_squared_div2()
+        //+ fnR.apply(z) + reg.apply(μ)
+        + fnH.apply(/* opKμ.apply(&μ̃) + */ opKz.apply(&z))
+    };
+
+    μ.merge_spikes_fitness(config.insertion.merging, fit, |&v| v);
+    μ.prune();
+    Pair(μ, z)
+}

mercurial