Mon, 06 Jan 2025 11:32:57 -0500
Factor fix
/*! Solver for the point source localisation problem using a sliding primal-dual proximal splitting method. */ use numeric_literals::replace_float_literals; use serde::{Serialize, Deserialize}; //use colored::Colorize; //use nalgebra::{DVector, DMatrix}; use std::iter::Iterator; use alg_tools::iterate::AlgIteratorFactory; use alg_tools::euclidean::Euclidean; use alg_tools::sets::Cube; use alg_tools::loc::Loc; use alg_tools::mapping::{Mapping, DifferentiableRealMapping, Instance}; use alg_tools::norms::Norm; use alg_tools::direct_product::Pair; use alg_tools::bisection_tree::{ BTFN, PreBTFN, Bounds, BTNodeLookup, BTNode, BTSearch, P2Minimise, SupportGenerator, LocalAnalysis, //Bounded, }; use alg_tools::mapping::RealMapping; use alg_tools::nalgebra_support::ToNalgebraRealField; use alg_tools::linops::{ BoundedLinear, AXPY, GEMV, Adjointable, IdOp, }; use alg_tools::convex::{Conjugable, Prox}; use alg_tools::norms::{L2, Linfinity, PairNorm}; use crate::types::*; use crate::measures::{DiscreteMeasure, Radon, RNDM}; use crate::measures::merging::SpikeMerging; use crate::forward_model::{ ForwardModel, AdjointProductPairBoundedBy, LipschitzValues, }; // use crate::transport::TransportLipschitz; use crate::seminorms::DiscreteMeasureOp; //use crate::tolerance::Tolerance; use crate::plot::{ SeqPlotter, Plotting, PlotLookup }; use crate::fb::*; use crate::regularisation::SlidingRegTerm; // use crate::dataterm::L2Squared; use crate::sliding_fb::{ TransportConfig, TransportStepLength, initial_transport, aposteriori_transport, }; use crate::dataterm::{calculate_residual, calculate_residual2}; /// Settings for [`pointsource_sliding_pdps_pair`]. #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] #[serde(default)] pub struct SlidingPDPSConfig<F : Float> { /// Primal step length scaling. pub τ0 : F, /// Primal step length scaling. pub σp0 : F, /// Dual step length scaling. pub σd0 : F, /// Transport parameters pub transport : TransportConfig<F>, /// Generic parameters pub insertion : FBGenericConfig<F>, } #[replace_float_literals(F::cast_from(literal))] impl<F : Float> Default for SlidingPDPSConfig<F> { fn default() -> Self { let τ0 = 0.99; SlidingPDPSConfig { τ0, σd0 : 0.1, σp0 : 0.99, transport : Default::default(), insertion : Default::default() } } } type MeasureZ<F, Z, const N : usize> = Pair<RNDM<F, N>, Z>; /// Iteratively solve the pointsource localisation with an additional variable /// using sliding primal-dual proximal splitting /// /// The parametrisation is as for [`crate::forward_pdps::pointsource_forward_pdps_pair`]. #[replace_float_literals(F::cast_from(literal))] pub fn pointsource_sliding_pdps_pair< 'a, F, I, A, GA, 𝒟, BTA, BT𝒟, G𝒟, S, K, Reg, Z, R, Y, /*KOpM, */ KOpZ, H, const N : usize >( opA : &'a A, b : &A::Observable, reg : Reg, op𝒟 : &'a 𝒟, config : &SlidingPDPSConfig<F>, iterator : I, mut plotter : SeqPlotter<F, N>, //opKμ : KOpM, opKz : &KOpZ, fnR : &R, fnH : &H, mut z : Z, mut y : Y, ) -> MeasureZ<F, Z, N> where F : Float + ToNalgebraRealField, I : AlgIteratorFactory<IterInfo<F, N>>, for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable> + Instance<A::Observable>, for<'b> A::Preadjoint<'b> : LipschitzValues<FloatType=F>, BTFN<F, GA, BTA, N> : DifferentiableRealMapping<F, N>, GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone, A : ForwardModel< MeasureZ<F, Z, N>, F, PairNorm<Radon, L2, L2>, PreadjointCodomain = Pair<BTFN<F, GA, BTA, N>, Z>, > + AdjointProductPairBoundedBy<MeasureZ<F, Z, N>, 𝒟, IdOp<Z>, FloatType=F>, BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>, G𝒟 : SupportGenerator<F, N, SupportType = K, Id = usize> + Clone, 𝒟 : DiscreteMeasureOp<Loc<F, N>, F, PreCodomain = PreBTFN<F, G𝒟, N>, Codomain = BTFN<F, G𝒟, BT𝒟, N>>, BT𝒟 : BTSearch<F, N, Data=usize, Agg=Bounds<F>>, S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N> + DifferentiableRealMapping<F, N>, K: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, //+ Differentiable<Loc<F, N>, Derivative=Loc<F,N>>, BTNodeLookup: BTNode<F, usize, Bounds<F>, N>, Cube<F, N>: P2Minimise<Loc<F, N>, F>, PlotLookup : Plotting<N>, RNDM<F, N> : SpikeMerging<F>, Reg : SlidingRegTerm<F, N>, // KOpM : Linear<RNDM<F, N>, Codomain=Y> // + GEMV<F, RNDM<F, N>> // + Preadjointable< // RNDM<F, N>, Y, // PreadjointCodomain = BTFN<F, GA, BTA, N>, // > // + TransportLipschitz<L2Squared, FloatType=F> // + AdjointProductBoundedBy<RNDM<F, N>, 𝒟, FloatType=F>, // for<'b> KOpM::Preadjoint<'b> : GEMV<F, Y>, // Since Z is Hilbert, we may just as well use adjoints for K_z. KOpZ : BoundedLinear<Z, L2, L2, F, Codomain=Y> + GEMV<F, Z> + Adjointable<Z, Y, AdjointCodomain = Z>, for<'b> KOpZ::Adjoint<'b> : GEMV<F, Y>, Y : AXPY<F> + Euclidean<F, Output=Y> + Clone + ClosedAdd, for<'b> &'b Y : Instance<Y>, Z : AXPY<F, Owned=Z> + Euclidean<F, Output=Z> + Clone + Norm<F, L2>, for<'b> &'b Z : Instance<Z>, R : Prox<Z, Codomain=F>, H : Conjugable<Y, F, Codomain=F>, for<'b> H::Conjugate<'b> : Prox<Y>, { // Check parameters assert!(config.τ0 > 0.0 && config.τ0 < 1.0 && config.σp0 > 0.0 && config.σp0 < 1.0 && config.σd0 > 0.0 && config.σp0 * config.σd0 <= 1.0, "Invalid step length parameters"); config.transport.check(); // Initialise iterates let mut μ = DiscreteMeasure::new(); let mut γ1 = DiscreteMeasure::new(); let mut residual = calculate_residual(Pair(&μ, &z), opA, b); let zero_z = z.similar_origin(); // Set up parameters let op𝒟norm = op𝒟.opnorm_bound(Radon, Linfinity); // TODO: maybe this PairNorm doesn't make sense here? let opAnorm = opA.opnorm_bound(PairNorm(Radon, L2, L2), L2); let bigθ = 0.0; //opKμ.transport_lipschitz_factor(L2Squared); let bigM = 0.0; //opKμ.adjoint_product_bound(&op𝒟).unwrap().sqrt(); let nKz = opKz.opnorm_bound(L2, L2); let ℓ = 0.0; let opIdZ = IdOp::new(); let (l, l_z) = opA.adjoint_product_pair_bound(&op𝒟, &opIdZ).unwrap(); // We need to satisfy // // τσ_dM(1-σ_p L_z)/(1 - τ L) + [σ_p L_z + σ_pσ_d‖K_z‖^2] < 1 // ^^^^^^^^^^^^^^^^^^^^^^^^^ // with 1 > σ_p L_z and 1 > τ L. // // To do so, we first solve σ_p and σ_d from standard PDPS step length condition // ^^^^^ < 1. then we solve τ from the rest. let σ_d = config.σd0 / nKz; let σ_p = config.σp0 / (l_z + config.σd0 * nKz); // Observe that = 1 - ^^^^^^^^^^^^^^^^^^^^^ = 1 - σ_{p,0} // We get the condition τσ_d M (1-σ_p L_z) < (1-σ_{p,0})*(1-τ L) // ⟺ τ [ σ_d M (1-σ_p L_z) + (1-σ_{p,0}) L ] < (1-σ_{p,0}) let φ = 1.0 - config.σp0; let a = 1.0 - σ_p * l_z; let τ = config.τ0 * φ / ( σ_d * bigM * a + φ * l ); let ψ = 1.0 - τ * l; let β = σ_p * config.σd0 * nKz / a; // σ_p * σ_d * (nKz * nK_z) / a; assert!(β < 1.0); // Now we need κ‖K_μ(π_♯^1 - π_♯^0)γ‖^2 ≤ (1/θ - τ[ℓ_v + ℓ]) ∫ c_2 dγ for κ defined as: let κ = τ * σ_d * ψ / ((1.0 - β) * ψ - τ * σ_d * bigM); // The factor two in the manuscript disappears due to the definition of 𝚹 being // for ‖x-y‖₂² instead of c_2(x, y)=‖x-y‖₂²/2. let calculate_θ = |ℓ_v, max_transport| { config.transport.θ0 / (τ*(ℓ + ℓ_v) + κ * bigθ * max_transport) }; let mut θ_or_adaptive = match opA.preadjoint().value_diff_unit_lipschitz_factor() { // We only estimate w (the uniform Lipschitz for of v), if we also estimate ℓ_v // (the uniform Lipschitz factor of ∇v). // We assume that the residual is decreasing. Some(ℓ_v0) => TransportStepLength::AdaptiveMax{ l: ℓ_v0 * b.norm2(), max_transport : 0.0, g : calculate_θ }, None => TransportStepLength::FullyAdaptive{ l : 0.0, max_transport : 0.0, g : calculate_θ }, }; // Acceleration is not currently supported // let γ = dataterm.factor_of_strong_convexity(); let ω = 1.0; // We multiply tolerance by τ for FB since our subproblems depending on tolerances are scaled // by τ compared to the conditional gradient approach. let tolerance = config.insertion.tolerance * τ * reg.tolerance_scaling(); let mut ε = tolerance.initial(); let starH = fnH.conjugate(); // Statistics let full_stats = |residual : &A::Observable, μ : &RNDM<F, N>, z : &Z, ε, stats| IterInfo { value : residual.norm2_squared_div2() + fnR.apply(z) + reg.apply(μ) + fnH.apply(/* opKμ.apply(μ) + */ opKz.apply(z)), n_spikes : μ.len(), ε, // postprocessing: config.insertion.postprocessing.then(|| μ.clone()), .. stats }; let mut stats = IterInfo::new(); // Run the algorithm for state in iterator.iter_init(|| full_stats(&residual, &μ, &z, ε, stats.clone())) { // Calculate initial transport let Pair(v, _) = opA.preadjoint().apply(&residual); //opKμ.preadjoint().apply_add(&mut v, y); let z_base = z.clone(); // We want to proceed as in Example 4.12 but with v and v̆ as in §5. // With A(ν, z) = A_μ ν + A_z z, following Example 5.1, we have // P_ℳ[F'(ν, z) + Ξ(ν, z, y)]= A_ν^*[A_ν ν + A_z z] + K_μ ν = A_ν^*A(ν, z) + K_μ ν, // where A_ν^* becomes a multiplier. // This is much easier with K_μ = 0, which is the only reason why are enforcing it. // TODO: Write a version of initial_transport that can deal with K_μ ≠ 0. let (μ_base_masses, mut μ_base_minus_γ0) = initial_transport( &mut γ1, &mut μ, |ν| opA.apply(Pair(ν, &z)), ε, τ, &mut θ_or_adaptive, opAnorm, v, &config.transport, ); // Solve finite-dimensional subproblem several times until the dual variable for the // regularisation term conforms to the assumptions made for the transport above. let (d, _within_tolerances, Pair(τv̆, τz̆)) = 'adapt_transport: loop { // Calculate τv̆ = τA_*(A[μ_transported + μ_transported_base]-b) let residual_μ̆ = calculate_residual2(Pair(&γ1, &z), Pair(&μ_base_minus_γ0, &zero_z), opA, b); let Pair(τv̆, τz) = opA.preadjoint().apply(residual_μ̆ * τ); // opKμ.preadjoint().gemv(&mut τv̆, τ, y, 1.0); // Construct μ^{k+1} by solving finite-dimensional subproblems and insert new spikes. let (d, within_tolerances) = insert_and_reweigh( &mut μ, &τv̆, &γ1, Some(&μ_base_minus_γ0), op𝒟, op𝒟norm, τ, ε, &config.insertion, ®, &state, &mut stats, ); // A posteriori transport adaptation. // TODO: this does not properly treat v^{k+1} - v̆^k that depends on z^{k+1}! if aposteriori_transport( &mut γ1, &mut μ, &mut μ_base_minus_γ0, &μ_base_masses, ε, &config.transport ) { break 'adapt_transport (d, within_tolerances, Pair(τv̆, τz)) } }; stats.untransported_fraction = Some({ assert_eq!(μ_base_masses.len(), γ1.len()); let (a, b) = stats.untransported_fraction.unwrap_or((0.0, 0.0)); let source = μ_base_masses.iter().map(|v| v.abs()).sum(); (a + μ_base_minus_γ0.norm(Radon), b + source) }); stats.transport_error = Some({ assert_eq!(μ_base_masses.len(), γ1.len()); let (a, b) = stats.transport_error.unwrap_or((0.0, 0.0)); (a + μ.dist_matching(&γ1), b + γ1.norm(Radon)) }); // // Merge spikes. // // This expects the prune below to prune γ. // // TODO: This may not work correctly in all cases. // let ins = &config.insertion; // if ins.merge_now(&state) { // if let SpikeMergingMethod::None = ins.merging { // } else { // stats.merged += μ.merge_spikes(ins.merging, |μ_candidate| { // let ν = μ_candidate.sub_matching(&γ1)-&μ_base_minus_γ0; // let mut d = &τv̆ + op𝒟.preapply(ν); // reg.verify_merge_candidate(&mut d, μ_candidate, τ, ε, ins) // }); // } // } // Prune spikes with zero weight. To maintain correct ordering between μ and γ1, also the // latter needs to be pruned when μ is. // TODO: This could do with a two-vector Vec::retain to avoid copies. let μ_new = DiscreteMeasure::from_iter(μ.iter_spikes().filter(|δ| δ.α != F::ZERO).cloned()); if μ_new.len() != μ.len() { let mut μ_iter = μ.iter_spikes(); γ1.prune_by(|_| μ_iter.next().unwrap().α != F::ZERO); stats.pruned += μ.len() - μ_new.len(); μ = μ_new; } // Do z variable primal update z.axpy(-σ_p/τ, τz̆, 1.0); // TODO: simplify nasty factors opKz.adjoint().gemv(&mut z, -σ_p, &y, 1.0); z = fnR.prox(σ_p, z); // Do dual update // opKμ.gemv(&mut y, σ_d*(1.0 + ω), &μ, 1.0); // y = y + σ_d K[(1+ω)(μ,z)^{k+1}] opKz.gemv(&mut y, σ_d*(1.0 + ω), &z, 1.0); // opKμ.gemv(&mut y, -σ_d*ω, μ_base, 1.0);// y = y + σ_d K[(1+ω)(μ,z)^{k+1} - ω (μ,z)^k]-b opKz.gemv(&mut y, -σ_d*ω, z_base, 1.0);// y = y + σ_d K[(1+ω)(μ,z)^{k+1} - ω (μ,z)^k]-b y = starH.prox(σ_d, y); // Update residual residual = calculate_residual(Pair(&μ, &z), opA, b); // Update step length parameters // let ω = pdpsconfig.acceleration.accelerate(&mut τ, &mut σ, γ); // Give statistics if requested let iter = state.iteration(); stats.this_iters += 1; state.if_verbose(|| { plotter.plot_spikes(iter, Some(&d), Some(&τv̆), &μ); full_stats(&residual, &μ, &z, ε, std::mem::replace(&mut stats, IterInfo::new())) }); // Update main tolerance for next iteration ε = tolerance.update(ε, iter); } let fit = |μ̃ : &RNDM<F, N>| { (opA.apply(Pair(μ̃, &z))-b).norm2_squared_div2() //+ fnR.apply(z) + reg.apply(μ) + fnH.apply(/* opKμ.apply(&μ̃) + */ opKz.apply(&z)) }; μ.merge_spikes_fitness(config.insertion.merging, fit, |&v| v); μ.prune(); Pair(μ, z) }