|
1 /*! |
|
2 Solver for the point source localisation problem using a sliding |
|
3 primal-dual proximal splitting method. |
|
4 */ |
|
5 |
|
6 use numeric_literals::replace_float_literals; |
|
7 use serde::{Deserialize, Serialize}; |
|
8 //use colored::Colorize; |
|
9 //use nalgebra::{DVector, DMatrix}; |
|
10 use std::iter::Iterator; |
|
11 |
|
12 use alg_tools::convex::{Conjugable, Prox}; |
|
13 use alg_tools::direct_product::Pair; |
|
14 use alg_tools::euclidean::Euclidean; |
|
15 use alg_tools::iterate::AlgIteratorFactory; |
|
16 use alg_tools::linops::{Adjointable, BoundedLinear, IdOp, AXPY, GEMV}; |
|
17 use alg_tools::mapping::{DifferentiableRealMapping, Instance, Mapping}; |
|
18 use alg_tools::nalgebra_support::ToNalgebraRealField; |
|
19 use alg_tools::norms::{Dist, Norm}; |
|
20 use alg_tools::norms::{PairNorm, L2}; |
|
21 |
|
22 use crate::forward_model::{AdjointProductPairBoundedBy, BoundedCurvature, ForwardModel}; |
|
23 use crate::measures::merging::SpikeMerging; |
|
24 use crate::measures::{DiscreteMeasure, Radon, RNDM}; |
|
25 use crate::types::*; |
|
26 // use crate::transport::TransportLipschitz; |
|
27 //use crate::tolerance::Tolerance; |
|
28 use crate::fb::*; |
|
29 use crate::plot::{PlotLookup, Plotting, SeqPlotter}; |
|
30 use crate::regularisation::SlidingRegTerm; |
|
31 // use crate::dataterm::L2Squared; |
|
32 use crate::dataterm::{calculate_residual, calculate_residual2}; |
|
33 use crate::sliding_fb::{ |
|
34 aposteriori_transport, initial_transport, TransportConfig, TransportStepLength, |
|
35 }; |
|
36 |
|
37 /// Settings for [`pointsource_sliding_pdps_pair`]. |
|
38 #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] |
|
39 #[serde(default)] |
|
40 pub struct SlidingPDPSConfig<F: Float> { |
|
41 /// Primal step length scaling. |
|
42 pub τ0: F, |
|
43 /// Primal step length scaling. |
|
44 pub σp0: F, |
|
45 /// Dual step length scaling. |
|
46 pub σd0: F, |
|
47 /// Transport parameters |
|
48 pub transport: TransportConfig<F>, |
|
49 /// Generic parameters |
|
50 pub insertion: FBGenericConfig<F>, |
|
51 } |
|
52 |
|
53 #[replace_float_literals(F::cast_from(literal))] |
|
54 impl<F: Float> Default for SlidingPDPSConfig<F> { |
|
55 fn default() -> Self { |
|
56 SlidingPDPSConfig { |
|
57 τ0: 0.99, |
|
58 σd0: 0.05, |
|
59 σp0: 0.99, |
|
60 transport: TransportConfig { |
|
61 θ0: 0.9, |
|
62 ..Default::default() |
|
63 }, |
|
64 insertion: Default::default(), |
|
65 } |
|
66 } |
|
67 } |
|
68 |
|
69 type MeasureZ<F, Z, const N: usize> = Pair<RNDM<F, N>, Z>; |
|
70 |
|
71 /// Iteratively solve the pointsource localisation with an additional variable |
|
72 /// using sliding primal-dual proximal splitting |
|
73 /// |
|
74 /// The parametrisation is as for [`crate::forward_pdps::pointsource_forward_pdps_pair`]. |
|
75 #[replace_float_literals(F::cast_from(literal))] |
|
76 pub fn pointsource_sliding_pdps_pair< |
|
77 F, |
|
78 I, |
|
79 A, |
|
80 S, |
|
81 Reg, |
|
82 P, |
|
83 Z, |
|
84 R, |
|
85 Y, |
|
86 /*KOpM, */ KOpZ, |
|
87 H, |
|
88 const N: usize, |
|
89 >( |
|
90 opA: &A, |
|
91 b: &A::Observable, |
|
92 reg: Reg, |
|
93 prox_penalty: &P, |
|
94 config: &SlidingPDPSConfig<F>, |
|
95 iterator: I, |
|
96 mut plotter: SeqPlotter<F, N>, |
|
97 //opKμ : KOpM, |
|
98 opKz: &KOpZ, |
|
99 fnR: &R, |
|
100 fnH: &H, |
|
101 mut z: Z, |
|
102 mut y: Y, |
|
103 ) -> MeasureZ<F, Z, N> |
|
104 where |
|
105 F: Float + ToNalgebraRealField, |
|
106 I: AlgIteratorFactory<IterInfo<F, N>>, |
|
107 A: ForwardModel<MeasureZ<F, Z, N>, F, PairNorm<Radon, L2, L2>, PreadjointCodomain = Pair<S, Z>> |
|
108 + AdjointProductPairBoundedBy<MeasureZ<F, Z, N>, P, IdOp<Z>, FloatType = F> |
|
109 + BoundedCurvature<FloatType = F>, |
|
110 S: DifferentiableRealMapping<F, N>, |
|
111 for<'b> &'b A::Observable: std::ops::Neg<Output = A::Observable> + Instance<A::Observable>, |
|
112 PlotLookup: Plotting<N>, |
|
113 RNDM<F, N>: SpikeMerging<F>, |
|
114 Reg: SlidingRegTerm<F, N>, |
|
115 P: ProxPenalty<F, S, Reg, N>, |
|
116 // KOpM : Linear<RNDM<F, N>, Codomain=Y> |
|
117 // + GEMV<F, RNDM<F, N>> |
|
118 // + Preadjointable< |
|
119 // RNDM<F, N>, Y, |
|
120 // PreadjointCodomain = S, |
|
121 // > |
|
122 // + TransportLipschitz<L2Squared, FloatType=F> |
|
123 // + AdjointProductBoundedBy<RNDM<F, N>, 𝒟, FloatType=F>, |
|
124 // for<'b> KOpM::Preadjoint<'b> : GEMV<F, Y>, |
|
125 // Since Z is Hilbert, we may just as well use adjoints for K_z. |
|
126 KOpZ: BoundedLinear<Z, L2, L2, F, Codomain = Y> |
|
127 + GEMV<F, Z> |
|
128 + Adjointable<Z, Y, AdjointCodomain = Z>, |
|
129 for<'b> KOpZ::Adjoint<'b>: GEMV<F, Y>, |
|
130 Y: AXPY<F> + Euclidean<F, Output = Y> + Clone + ClosedAdd, |
|
131 for<'b> &'b Y: Instance<Y>, |
|
132 Z: AXPY<F, Owned = Z> + Euclidean<F, Output = Z> + Clone + Norm<F, L2> + Dist<F, L2>, |
|
133 for<'b> &'b Z: Instance<Z>, |
|
134 R: Prox<Z, Codomain = F>, |
|
135 H: Conjugable<Y, F, Codomain = F>, |
|
136 for<'b> H::Conjugate<'b>: Prox<Y>, |
|
137 { |
|
138 // Check parameters |
|
139 assert!( |
|
140 config.τ0 > 0.0 |
|
141 && config.τ0 < 1.0 |
|
142 && config.σp0 > 0.0 |
|
143 && config.σp0 < 1.0 |
|
144 && config.σd0 > 0.0 |
|
145 && config.σp0 * config.σd0 <= 1.0, |
|
146 "Invalid step length parameters" |
|
147 ); |
|
148 config.transport.check(); |
|
149 |
|
150 // Initialise iterates |
|
151 let mut μ = DiscreteMeasure::new(); |
|
152 let mut γ1 = DiscreteMeasure::new(); |
|
153 let mut residual = calculate_residual(Pair(&μ, &z), opA, b); |
|
154 let zero_z = z.similar_origin(); |
|
155 |
|
156 // Set up parameters |
|
157 // TODO: maybe this PairNorm doesn't make sense here? |
|
158 // let opAnorm = opA.opnorm_bound(PairNorm(Radon, L2, L2), L2); |
|
159 let bigθ = 0.0; //opKμ.transport_lipschitz_factor(L2Squared); |
|
160 let bigM = 0.0; //opKμ.adjoint_product_bound(&op𝒟).unwrap().sqrt(); |
|
161 let nKz = opKz.opnorm_bound(L2, L2); |
|
162 let ℓ = 0.0; |
|
163 let opIdZ = IdOp::new(); |
|
164 let (l, l_z) = opA |
|
165 .adjoint_product_pair_bound(prox_penalty, &opIdZ) |
|
166 .unwrap(); |
|
167 // We need to satisfy |
|
168 // |
|
169 // τσ_dM(1-σ_p L_z)/(1 - τ L) + [σ_p L_z + σ_pσ_d‖K_z‖^2] < 1 |
|
170 // ^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
171 // with 1 > σ_p L_z and 1 > τ L. |
|
172 // |
|
173 // To do so, we first solve σ_p and σ_d from standard PDPS step length condition |
|
174 // ^^^^^ < 1. then we solve τ from the rest. |
|
175 let σ_d = config.σd0 / nKz; |
|
176 let σ_p = config.σp0 / (l_z + config.σd0 * nKz); |
|
177 // Observe that = 1 - ^^^^^^^^^^^^^^^^^^^^^ = 1 - σ_{p,0} |
|
178 // We get the condition τσ_d M (1-σ_p L_z) < (1-σ_{p,0})*(1-τ L) |
|
179 // ⟺ τ [ σ_d M (1-σ_p L_z) + (1-σ_{p,0}) L ] < (1-σ_{p,0}) |
|
180 let φ = 1.0 - config.σp0; |
|
181 let a = 1.0 - σ_p * l_z; |
|
182 let τ = config.τ0 * φ / (σ_d * bigM * a + φ * l); |
|
183 let ψ = 1.0 - τ * l; |
|
184 let β = σ_p * config.σd0 * nKz / a; // σ_p * σ_d * (nKz * nK_z) / a; |
|
185 assert!(β < 1.0); |
|
186 // Now we need κ‖K_μ(π_♯^1 - π_♯^0)γ‖^2 ≤ (1/θ - τ[ℓ_F + ℓ]) ∫ c_2 dγ for κ defined as: |
|
187 let κ = τ * σ_d * ψ / ((1.0 - β) * ψ - τ * σ_d * bigM); |
|
188 // The factor two in the manuscript disappears due to the definition of 𝚹 being |
|
189 // for ‖x-y‖₂² instead of c_2(x, y)=‖x-y‖₂²/2. |
|
190 let (maybe_ℓ_F0, maybe_transport_lip) = opA.curvature_bound_components(); |
|
191 let transport_lip = maybe_transport_lip.unwrap(); |
|
192 let calculate_θ = |ℓ_F, max_transport| { |
|
193 let ℓ_r = transport_lip * max_transport; |
|
194 config.transport.θ0 / (τ * (ℓ + ℓ_F + ℓ_r) + κ * bigθ * max_transport) |
|
195 }; |
|
196 let mut θ_or_adaptive = match maybe_ℓ_F0 { |
|
197 // We assume that the residual is decreasing. |
|
198 Some(ℓ_F0) => TransportStepLength::AdaptiveMax { |
|
199 l: ℓ_F0 * b.norm2(), // TODO: could estimate computing the real reesidual |
|
200 max_transport: 0.0, |
|
201 g: calculate_θ, |
|
202 }, |
|
203 None => TransportStepLength::FullyAdaptive { |
|
204 l: F::EPSILON, |
|
205 max_transport: 0.0, |
|
206 g: calculate_θ, |
|
207 }, |
|
208 }; |
|
209 // Acceleration is not currently supported |
|
210 // let γ = dataterm.factor_of_strong_convexity(); |
|
211 let ω = 1.0; |
|
212 |
|
213 // We multiply tolerance by τ for FB since our subproblems depending on tolerances are scaled |
|
214 // by τ compared to the conditional gradient approach. |
|
215 let tolerance = config.insertion.tolerance * τ * reg.tolerance_scaling(); |
|
216 let mut ε = tolerance.initial(); |
|
217 |
|
218 let starH = fnH.conjugate(); |
|
219 |
|
220 // Statistics |
|
221 let full_stats = |residual: &A::Observable, μ: &RNDM<F, N>, z: &Z, ε, stats| IterInfo { |
|
222 value: residual.norm2_squared_div2() |
|
223 + fnR.apply(z) |
|
224 + reg.apply(μ) |
|
225 + fnH.apply(/* opKμ.apply(μ) + */ opKz.apply(z)), |
|
226 n_spikes: μ.len(), |
|
227 ε, |
|
228 // postprocessing: config.insertion.postprocessing.then(|| μ.clone()), |
|
229 ..stats |
|
230 }; |
|
231 let mut stats = IterInfo::new(); |
|
232 |
|
233 // Run the algorithm |
|
234 for state in iterator.iter_init(|| full_stats(&residual, &μ, &z, ε, stats.clone())) { |
|
235 // Calculate initial transport |
|
236 let Pair(v, _) = opA.preadjoint().apply(&residual); |
|
237 //opKμ.preadjoint().apply_add(&mut v, y); |
|
238 // We want to proceed as in Example 4.12 but with v and v̆ as in §5. |
|
239 // With A(ν, z) = A_μ ν + A_z z, following Example 5.1, we have |
|
240 // P_ℳ[F'(ν, z) + Ξ(ν, z, y)]= A_ν^*[A_ν ν + A_z z] + K_μ ν = A_ν^*A(ν, z) + K_μ ν, |
|
241 // where A_ν^* becomes a multiplier. |
|
242 // This is much easier with K_μ = 0, which is the only reason why are enforcing it. |
|
243 // TODO: Write a version of initial_transport that can deal with K_μ ≠ 0. |
|
244 |
|
245 let (μ_base_masses, mut μ_base_minus_γ0) = |
|
246 initial_transport(&mut γ1, &mut μ, τ, &mut θ_or_adaptive, v); |
|
247 |
|
248 // Solve finite-dimensional subproblem several times until the dual variable for the |
|
249 // regularisation term conforms to the assumptions made for the transport above. |
|
250 let (maybe_d, _within_tolerances, mut τv̆, z_new) = 'adapt_transport: loop { |
|
251 // Calculate τv̆ = τA_*(A[μ_transported + μ_transported_base]-b) |
|
252 let residual_μ̆ = |
|
253 calculate_residual2(Pair(&γ1, &z), Pair(&μ_base_minus_γ0, &zero_z), opA, b); |
|
254 let Pair(mut τv̆, τz̆) = opA.preadjoint().apply(residual_μ̆ * τ); |
|
255 // opKμ.preadjoint().gemv(&mut τv̆, τ, y, 1.0); |
|
256 |
|
257 // Construct μ^{k+1} by solving finite-dimensional subproblems and insert new spikes. |
|
258 let (maybe_d, within_tolerances) = prox_penalty.insert_and_reweigh( |
|
259 &mut μ, |
|
260 &mut τv̆, |
|
261 &γ1, |
|
262 Some(&μ_base_minus_γ0), |
|
263 τ, |
|
264 ε, |
|
265 &config.insertion, |
|
266 ®, |
|
267 &state, |
|
268 &mut stats, |
|
269 ); |
|
270 |
|
271 // Do z variable primal update here to able to estimate B_{v̆^k-v^{k+1}} |
|
272 let mut z_new = τz̆; |
|
273 opKz.adjoint().gemv(&mut z_new, -σ_p, &y, -σ_p / τ); |
|
274 z_new = fnR.prox(σ_p, z_new + &z); |
|
275 |
|
276 // A posteriori transport adaptation. |
|
277 if aposteriori_transport( |
|
278 &mut γ1, |
|
279 &mut μ, |
|
280 &mut μ_base_minus_γ0, |
|
281 &μ_base_masses, |
|
282 Some(z_new.dist(&z, L2)), |
|
283 ε, |
|
284 &config.transport, |
|
285 ) { |
|
286 break 'adapt_transport (maybe_d, within_tolerances, τv̆, z_new); |
|
287 } |
|
288 }; |
|
289 |
|
290 stats.untransported_fraction = Some({ |
|
291 assert_eq!(μ_base_masses.len(), γ1.len()); |
|
292 let (a, b) = stats.untransported_fraction.unwrap_or((0.0, 0.0)); |
|
293 let source = μ_base_masses.iter().map(|v| v.abs()).sum(); |
|
294 (a + μ_base_minus_γ0.norm(Radon), b + source) |
|
295 }); |
|
296 stats.transport_error = Some({ |
|
297 assert_eq!(μ_base_masses.len(), γ1.len()); |
|
298 let (a, b) = stats.transport_error.unwrap_or((0.0, 0.0)); |
|
299 (a + μ.dist_matching(&γ1), b + γ1.norm(Radon)) |
|
300 }); |
|
301 |
|
302 // Merge spikes. |
|
303 // This crucially expects the merge routine to be stable with respect to spike locations, |
|
304 // and not to performing any pruning. That is be to done below simultaneously for γ. |
|
305 let ins = &config.insertion; |
|
306 if ins.merge_now(&state) { |
|
307 stats.merged += prox_penalty.merge_spikes_no_fitness( |
|
308 &mut μ, |
|
309 &mut τv̆, |
|
310 &γ1, |
|
311 Some(&μ_base_minus_γ0), |
|
312 τ, |
|
313 ε, |
|
314 ins, |
|
315 ®, |
|
316 //Some(|μ̃ : &RNDM<F, N>| calculate_residual(Pair(μ̃, &z), opA, b).norm2_squared_div2()), |
|
317 ); |
|
318 } |
|
319 |
|
320 // Prune spikes with zero weight. To maintain correct ordering between μ and γ1, also the |
|
321 // latter needs to be pruned when μ is. |
|
322 // TODO: This could do with a two-vector Vec::retain to avoid copies. |
|
323 let μ_new = DiscreteMeasure::from_iter(μ.iter_spikes().filter(|δ| δ.α != F::ZERO).cloned()); |
|
324 if μ_new.len() != μ.len() { |
|
325 let mut μ_iter = μ.iter_spikes(); |
|
326 γ1.prune_by(|_| μ_iter.next().unwrap().α != F::ZERO); |
|
327 stats.pruned += μ.len() - μ_new.len(); |
|
328 μ = μ_new; |
|
329 } |
|
330 |
|
331 // Do dual update |
|
332 // opKμ.gemv(&mut y, σ_d*(1.0 + ω), &μ, 1.0); // y = y + σ_d K[(1+ω)(μ,z)^{k+1}] |
|
333 opKz.gemv(&mut y, σ_d * (1.0 + ω), &z_new, 1.0); |
|
334 // opKμ.gemv(&mut y, -σ_d*ω, μ_base, 1.0);// y = y + σ_d K[(1+ω)(μ,z)^{k+1} - ω (μ,z)^k]-b |
|
335 opKz.gemv(&mut y, -σ_d * ω, z, 1.0); // y = y + σ_d K[(1+ω)(μ,z)^{k+1} - ω (μ,z)^k]-b |
|
336 y = starH.prox(σ_d, y); |
|
337 z = z_new; |
|
338 |
|
339 // Update residual |
|
340 residual = calculate_residual(Pair(&μ, &z), opA, b); |
|
341 |
|
342 // Update step length parameters |
|
343 // let ω = pdpsconfig.acceleration.accelerate(&mut τ, &mut σ, γ); |
|
344 |
|
345 // Give statistics if requested |
|
346 let iter = state.iteration(); |
|
347 stats.this_iters += 1; |
|
348 |
|
349 state.if_verbose(|| { |
|
350 plotter.plot_spikes(iter, maybe_d.as_ref(), Some(&τv̆), &μ); |
|
351 full_stats( |
|
352 &residual, |
|
353 &μ, |
|
354 &z, |
|
355 ε, |
|
356 std::mem::replace(&mut stats, IterInfo::new()), |
|
357 ) |
|
358 }); |
|
359 |
|
360 // Update main tolerance for next iteration |
|
361 ε = tolerance.update(ε, iter); |
|
362 } |
|
363 |
|
364 let fit = |μ̃: &RNDM<F, N>| { |
|
365 (opA.apply(Pair(μ̃, &z))-b).norm2_squared_div2() |
|
366 //+ fnR.apply(z) + reg.apply(μ) |
|
367 + fnH.apply(/* opKμ.apply(&μ̃) + */ opKz.apply(&z)) |
|
368 }; |
|
369 |
|
370 μ.merge_spikes_fitness(config.insertion.final_merging_method(), fit, |&v| v); |
|
371 μ.prune(); |
|
372 Pair(μ, z) |
|
373 } |