|
1 /*! |
|
2 Solver for the point source localisation problem using a sliding |
|
3 forward-backward splitting method. |
|
4 */ |
|
5 |
|
6 use numeric_literals::replace_float_literals; |
|
7 use serde::{Deserialize, Serialize}; |
|
8 //use colored::Colorize; |
|
9 //use nalgebra::{DVector, DMatrix}; |
|
10 use itertools::izip; |
|
11 use std::iter::Iterator; |
|
12 |
|
13 use alg_tools::euclidean::Euclidean; |
|
14 use alg_tools::iterate::AlgIteratorFactory; |
|
15 use alg_tools::mapping::{DifferentiableRealMapping, Instance, Mapping}; |
|
16 use alg_tools::nalgebra_support::ToNalgebraRealField; |
|
17 use alg_tools::norms::Norm; |
|
18 |
|
19 use crate::forward_model::{AdjointProductBoundedBy, BoundedCurvature, ForwardModel}; |
|
20 use crate::measures::merging::SpikeMerging; |
|
21 use crate::measures::{DiscreteMeasure, Radon, RNDM}; |
|
22 use crate::types::*; |
|
23 //use crate::tolerance::Tolerance; |
|
24 use crate::dataterm::{calculate_residual, calculate_residual2, DataTerm, L2Squared}; |
|
25 use crate::fb::*; |
|
26 use crate::plot::{PlotLookup, Plotting, SeqPlotter}; |
|
27 use crate::regularisation::SlidingRegTerm; |
|
28 //use crate::transport::TransportLipschitz; |
|
29 |
|
30 /// Transport settings for [`pointsource_sliding_fb_reg`]. |
|
31 #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] |
|
32 #[serde(default)] |
|
33 pub struct TransportConfig<F: Float> { |
|
34 /// Transport step length $θ$ normalised to $(0, 1)$. |
|
35 pub θ0: F, |
|
36 /// Factor in $(0, 1)$ for decreasing transport to adapt to tolerance. |
|
37 pub adaptation: F, |
|
38 /// A posteriori transport tolerance multiplier (C_pos) |
|
39 pub tolerance_mult_con: F, |
|
40 } |
|
41 |
|
42 #[replace_float_literals(F::cast_from(literal))] |
|
43 impl<F: Float> TransportConfig<F> { |
|
44 /// Check that the parameters are ok. Panics if not. |
|
45 pub fn check(&self) { |
|
46 assert!(self.θ0 > 0.0); |
|
47 assert!(0.0 < self.adaptation && self.adaptation < 1.0); |
|
48 assert!(self.tolerance_mult_con > 0.0); |
|
49 } |
|
50 } |
|
51 |
|
52 #[replace_float_literals(F::cast_from(literal))] |
|
53 impl<F: Float> Default for TransportConfig<F> { |
|
54 fn default() -> Self { |
|
55 TransportConfig { |
|
56 θ0: 0.9, |
|
57 adaptation: 0.9, |
|
58 tolerance_mult_con: 100.0, |
|
59 } |
|
60 } |
|
61 } |
|
62 |
|
63 /// Settings for [`pointsource_sliding_fb_reg`]. |
|
64 #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] |
|
65 #[serde(default)] |
|
66 pub struct SlidingFBConfig<F: Float> { |
|
67 /// Step length scaling |
|
68 pub τ0: F, |
|
69 /// Transport parameters |
|
70 pub transport: TransportConfig<F>, |
|
71 /// Generic parameters |
|
72 pub insertion: FBGenericConfig<F>, |
|
73 } |
|
74 |
|
75 #[replace_float_literals(F::cast_from(literal))] |
|
76 impl<F: Float> Default for SlidingFBConfig<F> { |
|
77 fn default() -> Self { |
|
78 SlidingFBConfig { |
|
79 τ0: 0.99, |
|
80 transport: Default::default(), |
|
81 insertion: Default::default(), |
|
82 } |
|
83 } |
|
84 } |
|
85 |
|
86 /// Internal type of adaptive transport step length calculation |
|
87 pub(crate) enum TransportStepLength<F: Float, G: Fn(F, F) -> F> { |
|
88 /// Fixed, known step length |
|
89 #[allow(dead_code)] |
|
90 Fixed(F), |
|
91 /// Adaptive step length, only wrt. maximum transport. |
|
92 /// Content of `l` depends on use case, while `g` calculates the step length from `l`. |
|
93 AdaptiveMax { l: F, max_transport: F, g: G }, |
|
94 /// Adaptive step length. |
|
95 /// Content of `l` depends on use case, while `g` calculates the step length from `l`. |
|
96 FullyAdaptive { l: F, max_transport: F, g: G }, |
|
97 } |
|
98 |
|
99 /// Constrution of initial transport `γ1` from initial measure `μ` and `v=F'(μ)` |
|
100 /// with step lengh τ and transport step length `θ_or_adaptive`. |
|
101 #[replace_float_literals(F::cast_from(literal))] |
|
102 pub(crate) fn initial_transport<F, G, D, const N: usize>( |
|
103 γ1: &mut RNDM<F, N>, |
|
104 μ: &mut RNDM<F, N>, |
|
105 τ: F, |
|
106 θ_or_adaptive: &mut TransportStepLength<F, G>, |
|
107 v: D, |
|
108 ) -> (Vec<F>, RNDM<F, N>) |
|
109 where |
|
110 F: Float + ToNalgebraRealField, |
|
111 G: Fn(F, F) -> F, |
|
112 D: DifferentiableRealMapping<F, N>, |
|
113 { |
|
114 use TransportStepLength::*; |
|
115 |
|
116 // Save current base point and shift μ to new positions. Idea is that |
|
117 // μ_base(_masses) = μ^k (vector of masses) |
|
118 // μ_base_minus_γ0 = μ^k - π_♯^0γ^{k+1} |
|
119 // γ1 = π_♯^1γ^{k+1} |
|
120 // μ = μ^{k+1} |
|
121 let μ_base_masses: Vec<F> = μ.iter_masses().collect(); |
|
122 let mut μ_base_minus_γ0 = μ.clone(); // Weights will be set in the loop below. |
|
123 // Construct μ^{k+1} and π_♯^1γ^{k+1} initial candidates |
|
124 //let mut sum_norm_dv = 0.0; |
|
125 let γ_prev_len = γ1.len(); |
|
126 assert!(μ.len() >= γ_prev_len); |
|
127 γ1.extend(μ[γ_prev_len..].iter().cloned()); |
|
128 |
|
129 // Calculate initial transport and step length. |
|
130 // First calculate initial transported weights |
|
131 for (δ, ρ) in izip!(μ.iter_spikes(), γ1.iter_spikes_mut()) { |
|
132 // If old transport has opposing sign, the new transport will be none. |
|
133 ρ.α = if (ρ.α > 0.0 && δ.α < 0.0) || (ρ.α < 0.0 && δ.α > 0.0) { |
|
134 0.0 |
|
135 } else { |
|
136 δ.α |
|
137 }; |
|
138 } |
|
139 |
|
140 // Calculate transport rays. |
|
141 match *θ_or_adaptive { |
|
142 Fixed(θ) => { |
|
143 let θτ = τ * θ; |
|
144 for (δ, ρ) in izip!(μ.iter_spikes(), γ1.iter_spikes_mut()) { |
|
145 ρ.x = δ.x - v.differential(&δ.x) * (ρ.α.signum() * θτ); |
|
146 } |
|
147 } |
|
148 AdaptiveMax { |
|
149 l: ℓ_F, |
|
150 ref mut max_transport, |
|
151 g: ref calculate_θ, |
|
152 } => { |
|
153 *max_transport = max_transport.max(γ1.norm(Radon)); |
|
154 let θτ = τ * calculate_θ(ℓ_F, *max_transport); |
|
155 for (δ, ρ) in izip!(μ.iter_spikes(), γ1.iter_spikes_mut()) { |
|
156 ρ.x = δ.x - v.differential(&δ.x) * (ρ.α.signum() * θτ); |
|
157 } |
|
158 } |
|
159 FullyAdaptive { |
|
160 l: ref mut adaptive_ℓ_F, |
|
161 ref mut max_transport, |
|
162 g: ref calculate_θ, |
|
163 } => { |
|
164 *max_transport = max_transport.max(γ1.norm(Radon)); |
|
165 let mut θ = calculate_θ(*adaptive_ℓ_F, *max_transport); |
|
166 // Do two runs through the spikes to update θ, breaking if first run did not cause |
|
167 // a change. |
|
168 for _i in 0..=1 { |
|
169 let mut changes = false; |
|
170 for (δ, ρ) in izip!(μ.iter_spikes(), γ1.iter_spikes_mut()) { |
|
171 let dv_x = v.differential(&δ.x); |
|
172 let g = &dv_x * (ρ.α.signum() * θ * τ); |
|
173 ρ.x = δ.x - g; |
|
174 let n = g.norm2(); |
|
175 if n >= F::EPSILON { |
|
176 // Estimate Lipschitz factor of ∇v |
|
177 let this_ℓ_F = (dv_x - v.differential(&ρ.x)).norm2() / n; |
|
178 *adaptive_ℓ_F = adaptive_ℓ_F.max(this_ℓ_F); |
|
179 θ = calculate_θ(*adaptive_ℓ_F, *max_transport); |
|
180 changes = true |
|
181 } |
|
182 } |
|
183 if !changes { |
|
184 break; |
|
185 } |
|
186 } |
|
187 } |
|
188 } |
|
189 |
|
190 // Set initial guess for μ=μ^{k+1}. |
|
191 for (δ, ρ, &β) in izip!(μ.iter_spikes_mut(), γ1.iter_spikes(), μ_base_masses.iter()) { |
|
192 if ρ.α.abs() > F::EPSILON { |
|
193 δ.x = ρ.x; |
|
194 //δ.α = ρ.α; // already set above |
|
195 } else { |
|
196 δ.α = β; |
|
197 } |
|
198 } |
|
199 // Calculate μ^k-π_♯^0γ^{k+1} and v̆ = A_*(A[μ_transported + μ_transported_base]-b) |
|
200 μ_base_minus_γ0.set_masses( |
|
201 μ_base_masses |
|
202 .iter() |
|
203 .zip(γ1.iter_masses()) |
|
204 .map(|(&a, b)| a - b), |
|
205 ); |
|
206 (μ_base_masses, μ_base_minus_γ0) |
|
207 } |
|
208 |
|
209 /// A posteriori transport adaptation. |
|
210 #[replace_float_literals(F::cast_from(literal))] |
|
211 pub(crate) fn aposteriori_transport<F, const N: usize>( |
|
212 γ1: &mut RNDM<F, N>, |
|
213 μ: &mut RNDM<F, N>, |
|
214 μ_base_minus_γ0: &mut RNDM<F, N>, |
|
215 μ_base_masses: &Vec<F>, |
|
216 extra: Option<F>, |
|
217 ε: F, |
|
218 tconfig: &TransportConfig<F>, |
|
219 ) -> bool |
|
220 where |
|
221 F: Float + ToNalgebraRealField, |
|
222 { |
|
223 // 1. If π_♯^1γ^{k+1} = γ1 has non-zero mass at some point y, but μ = μ^{k+1} does not, |
|
224 // then the ansatz ∇w̃_x(y) = w^{k+1}(y) may not be satisfied. So set the mass of γ1 |
|
225 // at that point to zero, and retry. |
|
226 let mut all_ok = true; |
|
227 for (α_μ, α_γ1) in izip!(μ.iter_masses(), γ1.iter_masses_mut()) { |
|
228 if α_μ == 0.0 && *α_γ1 != 0.0 { |
|
229 all_ok = false; |
|
230 *α_γ1 = 0.0; |
|
231 } |
|
232 } |
|
233 |
|
234 // 2. Through bounding ∫ B_ω(y, z) dλ(x, y, z). |
|
235 // through the estimate ≤ C ‖Δ‖‖γ^{k+1}‖ for Δ := μ^{k+1}-μ^k-(π_♯^1-π_♯^0)γ^{k+1}, |
|
236 // which holds for some some C if the convolution kernel in 𝒟 has Lipschitz gradient. |
|
237 let nγ = γ1.norm(Radon); |
|
238 let nΔ = μ_base_minus_γ0.norm(Radon) + μ.dist_matching(&γ1) + extra.unwrap_or(0.0); |
|
239 let t = ε * tconfig.tolerance_mult_con; |
|
240 if nγ * nΔ > t { |
|
241 // Since t/(nγ*nΔ)<1, and the constant tconfig.adaptation < 1, |
|
242 // this will guarantee that eventually ‖γ‖ decreases sufficiently that we |
|
243 // will not enter here. |
|
244 *γ1 *= tconfig.adaptation * t / (nγ * nΔ); |
|
245 all_ok = false |
|
246 } |
|
247 |
|
248 if !all_ok { |
|
249 // Update weights for μ_base_minus_γ0 = μ^k - π_♯^0γ^{k+1} |
|
250 μ_base_minus_γ0.set_masses( |
|
251 μ_base_masses |
|
252 .iter() |
|
253 .zip(γ1.iter_masses()) |
|
254 .map(|(&a, b)| a - b), |
|
255 ); |
|
256 } |
|
257 |
|
258 all_ok |
|
259 } |
|
260 |
|
261 /// Iteratively solve the pointsource localisation problem using sliding forward-backward |
|
262 /// splitting |
|
263 /// |
|
264 /// The parametrisation is as for [`pointsource_fb_reg`]. |
|
265 /// Inertia is currently not supported. |
|
266 #[replace_float_literals(F::cast_from(literal))] |
|
267 pub fn pointsource_sliding_fb_reg<F, I, A, Reg, P, const N: usize>( |
|
268 opA: &A, |
|
269 b: &A::Observable, |
|
270 reg: Reg, |
|
271 prox_penalty: &P, |
|
272 config: &SlidingFBConfig<F>, |
|
273 iterator: I, |
|
274 mut plotter: SeqPlotter<F, N>, |
|
275 ) -> RNDM<F, N> |
|
276 where |
|
277 F: Float + ToNalgebraRealField, |
|
278 I: AlgIteratorFactory<IterInfo<F, N>>, |
|
279 A: ForwardModel<RNDM<F, N>, F> |
|
280 + AdjointProductBoundedBy<RNDM<F, N>, P, FloatType = F> |
|
281 + BoundedCurvature<FloatType = F>, |
|
282 for<'b> &'b A::Observable: std::ops::Neg<Output = A::Observable> + Instance<A::Observable>, |
|
283 A::PreadjointCodomain: DifferentiableRealMapping<F, N>, |
|
284 RNDM<F, N>: SpikeMerging<F>, |
|
285 Reg: SlidingRegTerm<F, N>, |
|
286 P: ProxPenalty<F, A::PreadjointCodomain, Reg, N>, |
|
287 PlotLookup: Plotting<N>, |
|
288 { |
|
289 // Check parameters |
|
290 assert!(config.τ0 > 0.0, "Invalid step length parameter"); |
|
291 config.transport.check(); |
|
292 |
|
293 // Initialise iterates |
|
294 let mut μ = DiscreteMeasure::new(); |
|
295 let mut γ1 = DiscreteMeasure::new(); |
|
296 let mut residual = -b; // Has to equal $Aμ-b$. |
|
297 |
|
298 // Set up parameters |
|
299 // let opAnorm = opA.opnorm_bound(Radon, L2); |
|
300 //let max_transport = config.max_transport.scale |
|
301 // * reg.radon_norm_bound(b.norm2_squared() / 2.0); |
|
302 //let ℓ = opA.transport.lipschitz_factor(L2Squared) * max_transport; |
|
303 let ℓ = 0.0; |
|
304 let τ = config.τ0 / opA.adjoint_product_bound(prox_penalty).unwrap(); |
|
305 let (maybe_ℓ_F0, maybe_transport_lip) = opA.curvature_bound_components(); |
|
306 let transport_lip = maybe_transport_lip.unwrap(); |
|
307 let calculate_θ = |ℓ_F, max_transport| { |
|
308 let ℓ_r = transport_lip * max_transport; |
|
309 config.transport.θ0 / (τ * (ℓ + ℓ_F + ℓ_r)) |
|
310 }; |
|
311 let mut θ_or_adaptive = match maybe_ℓ_F0 { |
|
312 //Some(ℓ_F0) => TransportStepLength::Fixed(calculate_θ(ℓ_F0 * b.norm2(), 0.0)), |
|
313 Some(ℓ_F0) => TransportStepLength::AdaptiveMax { |
|
314 l: ℓ_F0 * b.norm2(), // TODO: could estimate computing the real reesidual |
|
315 max_transport: 0.0, |
|
316 g: calculate_θ, |
|
317 }, |
|
318 None => TransportStepLength::FullyAdaptive { |
|
319 l: 10.0 * F::EPSILON, // Start with something very small to estimate differentials |
|
320 max_transport: 0.0, |
|
321 g: calculate_θ, |
|
322 }, |
|
323 }; |
|
324 // We multiply tolerance by τ for FB since our subproblems depending on tolerances are scaled |
|
325 // by τ compared to the conditional gradient approach. |
|
326 let tolerance = config.insertion.tolerance * τ * reg.tolerance_scaling(); |
|
327 let mut ε = tolerance.initial(); |
|
328 |
|
329 // Statistics |
|
330 let full_stats = |residual: &A::Observable, μ: &RNDM<F, N>, ε, stats| IterInfo { |
|
331 value: residual.norm2_squared_div2() + reg.apply(μ), |
|
332 n_spikes: μ.len(), |
|
333 ε, |
|
334 // postprocessing: config.insertion.postprocessing.then(|| μ.clone()), |
|
335 ..stats |
|
336 }; |
|
337 let mut stats = IterInfo::new(); |
|
338 |
|
339 // Run the algorithm |
|
340 for state in iterator.iter_init(|| full_stats(&residual, &μ, ε, stats.clone())) { |
|
341 // Calculate initial transport |
|
342 let v = opA.preadjoint().apply(residual); |
|
343 let (μ_base_masses, mut μ_base_minus_γ0) = |
|
344 initial_transport(&mut γ1, &mut μ, τ, &mut θ_or_adaptive, v); |
|
345 |
|
346 // Solve finite-dimensional subproblem several times until the dual variable for the |
|
347 // regularisation term conforms to the assumptions made for the transport above. |
|
348 let (maybe_d, _within_tolerances, mut τv̆) = 'adapt_transport: loop { |
|
349 // Calculate τv̆ = τA_*(A[μ_transported + μ_transported_base]-b) |
|
350 let residual_μ̆ = calculate_residual2(&γ1, &μ_base_minus_γ0, opA, b); |
|
351 let mut τv̆ = opA.preadjoint().apply(residual_μ̆ * τ); |
|
352 |
|
353 // Construct μ^{k+1} by solving finite-dimensional subproblems and insert new spikes. |
|
354 let (maybe_d, within_tolerances) = prox_penalty.insert_and_reweigh( |
|
355 &mut μ, |
|
356 &mut τv̆, |
|
357 &γ1, |
|
358 Some(&μ_base_minus_γ0), |
|
359 τ, |
|
360 ε, |
|
361 &config.insertion, |
|
362 ®, |
|
363 &state, |
|
364 &mut stats, |
|
365 ); |
|
366 |
|
367 // A posteriori transport adaptation. |
|
368 if aposteriori_transport( |
|
369 &mut γ1, |
|
370 &mut μ, |
|
371 &mut μ_base_minus_γ0, |
|
372 &μ_base_masses, |
|
373 None, |
|
374 ε, |
|
375 &config.transport, |
|
376 ) { |
|
377 break 'adapt_transport (maybe_d, within_tolerances, τv̆); |
|
378 } |
|
379 }; |
|
380 |
|
381 stats.untransported_fraction = Some({ |
|
382 assert_eq!(μ_base_masses.len(), γ1.len()); |
|
383 let (a, b) = stats.untransported_fraction.unwrap_or((0.0, 0.0)); |
|
384 let source = μ_base_masses.iter().map(|v| v.abs()).sum(); |
|
385 (a + μ_base_minus_γ0.norm(Radon), b + source) |
|
386 }); |
|
387 stats.transport_error = Some({ |
|
388 assert_eq!(μ_base_masses.len(), γ1.len()); |
|
389 let (a, b) = stats.transport_error.unwrap_or((0.0, 0.0)); |
|
390 (a + μ.dist_matching(&γ1), b + γ1.norm(Radon)) |
|
391 }); |
|
392 |
|
393 // Merge spikes. |
|
394 // This crucially expects the merge routine to be stable with respect to spike locations, |
|
395 // and not to performing any pruning. That is be to done below simultaneously for γ. |
|
396 let ins = &config.insertion; |
|
397 if ins.merge_now(&state) { |
|
398 stats.merged += prox_penalty.merge_spikes( |
|
399 &mut μ, |
|
400 &mut τv̆, |
|
401 &γ1, |
|
402 Some(&μ_base_minus_γ0), |
|
403 τ, |
|
404 ε, |
|
405 ins, |
|
406 ®, |
|
407 Some(|μ̃: &RNDM<F, N>| L2Squared.calculate_fit_op(μ̃, opA, b)), |
|
408 ); |
|
409 } |
|
410 |
|
411 // Prune spikes with zero weight. To maintain correct ordering between μ and γ1, also the |
|
412 // latter needs to be pruned when μ is. |
|
413 // TODO: This could do with a two-vector Vec::retain to avoid copies. |
|
414 let μ_new = DiscreteMeasure::from_iter(μ.iter_spikes().filter(|δ| δ.α != F::ZERO).cloned()); |
|
415 if μ_new.len() != μ.len() { |
|
416 let mut μ_iter = μ.iter_spikes(); |
|
417 γ1.prune_by(|_| μ_iter.next().unwrap().α != F::ZERO); |
|
418 stats.pruned += μ.len() - μ_new.len(); |
|
419 μ = μ_new; |
|
420 } |
|
421 |
|
422 // Update residual |
|
423 residual = calculate_residual(&μ, opA, b); |
|
424 |
|
425 let iter = state.iteration(); |
|
426 stats.this_iters += 1; |
|
427 |
|
428 // Give statistics if requested |
|
429 state.if_verbose(|| { |
|
430 plotter.plot_spikes(iter, maybe_d.as_ref(), Some(&τv̆), &μ); |
|
431 full_stats( |
|
432 &residual, |
|
433 &μ, |
|
434 ε, |
|
435 std::mem::replace(&mut stats, IterInfo::new()), |
|
436 ) |
|
437 }); |
|
438 |
|
439 // Update main tolerance for next iteration |
|
440 ε = tolerance.update(ε, iter); |
|
441 } |
|
442 |
|
443 postprocess(μ, &config.insertion, L2Squared, opA, b) |
|
444 } |