src/sliding_fb.rs

changeset 52
f0e8704d3f0e
parent 49
6b0db7251ebe
equal deleted inserted replaced
31:6105b5cd8d89 52:f0e8704d3f0e
1 /*!
2 Solver for the point source localisation problem using a sliding
3 forward-backward splitting method.
4 */
5
6 use numeric_literals::replace_float_literals;
7 use serde::{Deserialize, Serialize};
8 //use colored::Colorize;
9 //use nalgebra::{DVector, DMatrix};
10 use itertools::izip;
11 use std::iter::Iterator;
12
13 use alg_tools::euclidean::Euclidean;
14 use alg_tools::iterate::AlgIteratorFactory;
15 use alg_tools::mapping::{DifferentiableRealMapping, Instance, Mapping};
16 use alg_tools::nalgebra_support::ToNalgebraRealField;
17 use alg_tools::norms::Norm;
18
19 use crate::forward_model::{AdjointProductBoundedBy, BoundedCurvature, ForwardModel};
20 use crate::measures::merging::SpikeMerging;
21 use crate::measures::{DiscreteMeasure, Radon, RNDM};
22 use crate::types::*;
23 //use crate::tolerance::Tolerance;
24 use crate::dataterm::{calculate_residual, calculate_residual2, DataTerm, L2Squared};
25 use crate::fb::*;
26 use crate::plot::{PlotLookup, Plotting, SeqPlotter};
27 use crate::regularisation::SlidingRegTerm;
28 //use crate::transport::TransportLipschitz;
29
30 /// Transport settings for [`pointsource_sliding_fb_reg`].
31 #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)]
32 #[serde(default)]
33 pub struct TransportConfig<F: Float> {
34 /// Transport step length $θ$ normalised to $(0, 1)$.
35 pub θ0: F,
36 /// Factor in $(0, 1)$ for decreasing transport to adapt to tolerance.
37 pub adaptation: F,
38 /// A posteriori transport tolerance multiplier (C_pos)
39 pub tolerance_mult_con: F,
40 }
41
42 #[replace_float_literals(F::cast_from(literal))]
43 impl<F: Float> TransportConfig<F> {
44 /// Check that the parameters are ok. Panics if not.
45 pub fn check(&self) {
46 assert!(self.θ0 > 0.0);
47 assert!(0.0 < self.adaptation && self.adaptation < 1.0);
48 assert!(self.tolerance_mult_con > 0.0);
49 }
50 }
51
52 #[replace_float_literals(F::cast_from(literal))]
53 impl<F: Float> Default for TransportConfig<F> {
54 fn default() -> Self {
55 TransportConfig {
56 θ0: 0.9,
57 adaptation: 0.9,
58 tolerance_mult_con: 100.0,
59 }
60 }
61 }
62
63 /// Settings for [`pointsource_sliding_fb_reg`].
64 #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)]
65 #[serde(default)]
66 pub struct SlidingFBConfig<F: Float> {
67 /// Step length scaling
68 pub τ0: F,
69 /// Transport parameters
70 pub transport: TransportConfig<F>,
71 /// Generic parameters
72 pub insertion: FBGenericConfig<F>,
73 }
74
75 #[replace_float_literals(F::cast_from(literal))]
76 impl<F: Float> Default for SlidingFBConfig<F> {
77 fn default() -> Self {
78 SlidingFBConfig {
79 τ0: 0.99,
80 transport: Default::default(),
81 insertion: Default::default(),
82 }
83 }
84 }
85
86 /// Internal type of adaptive transport step length calculation
87 pub(crate) enum TransportStepLength<F: Float, G: Fn(F, F) -> F> {
88 /// Fixed, known step length
89 #[allow(dead_code)]
90 Fixed(F),
91 /// Adaptive step length, only wrt. maximum transport.
92 /// Content of `l` depends on use case, while `g` calculates the step length from `l`.
93 AdaptiveMax { l: F, max_transport: F, g: G },
94 /// Adaptive step length.
95 /// Content of `l` depends on use case, while `g` calculates the step length from `l`.
96 FullyAdaptive { l: F, max_transport: F, g: G },
97 }
98
99 /// Constrution of initial transport `γ1` from initial measure `μ` and `v=F'(μ)`
100 /// with step lengh τ and transport step length `θ_or_adaptive`.
101 #[replace_float_literals(F::cast_from(literal))]
102 pub(crate) fn initial_transport<F, G, D, const N: usize>(
103 γ1: &mut RNDM<F, N>,
104 μ: &mut RNDM<F, N>,
105 τ: F,
106 θ_or_adaptive: &mut TransportStepLength<F, G>,
107 v: D,
108 ) -> (Vec<F>, RNDM<F, N>)
109 where
110 F: Float + ToNalgebraRealField,
111 G: Fn(F, F) -> F,
112 D: DifferentiableRealMapping<F, N>,
113 {
114 use TransportStepLength::*;
115
116 // Save current base point and shift μ to new positions. Idea is that
117 // μ_base(_masses) = μ^k (vector of masses)
118 // μ_base_minus_γ0 = μ^k - π_♯^0γ^{k+1}
119 // γ1 = π_♯^1γ^{k+1}
120 // μ = μ^{k+1}
121 let μ_base_masses: Vec<F> = μ.iter_masses().collect();
122 let mut μ_base_minus_γ0 = μ.clone(); // Weights will be set in the loop below.
123 // Construct μ^{k+1} and π_♯^1γ^{k+1} initial candidates
124 //let mut sum_norm_dv = 0.0;
125 let γ_prev_len = γ1.len();
126 assert!(μ.len() >= γ_prev_len);
127 γ1.extend(μ[γ_prev_len..].iter().cloned());
128
129 // Calculate initial transport and step length.
130 // First calculate initial transported weights
131 for (δ, ρ) in izip!(μ.iter_spikes(), γ1.iter_spikes_mut()) {
132 // If old transport has opposing sign, the new transport will be none.
133 ρ.α = if (ρ.α > 0.0 && δ.α < 0.0) || (ρ.α < 0.0 && δ.α > 0.0) {
134 0.0
135 } else {
136 δ.α
137 };
138 }
139
140 // Calculate transport rays.
141 match *θ_or_adaptive {
142 Fixed(θ) => {
143 let θτ = τ * θ;
144 for (δ, ρ) in izip!(μ.iter_spikes(), γ1.iter_spikes_mut()) {
145 ρ.x = δ.x - v.differential(&δ.x) * (ρ.α.signum() * θτ);
146 }
147 }
148 AdaptiveMax {
149 l: ℓ_F,
150 ref mut max_transport,
151 g: ref calculate_θ,
152 } => {
153 *max_transport = max_transport.max(γ1.norm(Radon));
154 let θτ = τ * calculate_θ(ℓ_F, *max_transport);
155 for (δ, ρ) in izip!(μ.iter_spikes(), γ1.iter_spikes_mut()) {
156 ρ.x = δ.x - v.differential(&δ.x) * (ρ.α.signum() * θτ);
157 }
158 }
159 FullyAdaptive {
160 l: ref mut adaptive_ℓ_F,
161 ref mut max_transport,
162 g: ref calculate_θ,
163 } => {
164 *max_transport = max_transport.max(γ1.norm(Radon));
165 let mut θ = calculate_θ(*adaptive_ℓ_F, *max_transport);
166 // Do two runs through the spikes to update θ, breaking if first run did not cause
167 // a change.
168 for _i in 0..=1 {
169 let mut changes = false;
170 for (δ, ρ) in izip!(μ.iter_spikes(), γ1.iter_spikes_mut()) {
171 let dv_x = v.differential(&δ.x);
172 let g = &dv_x * (ρ.α.signum() * θ * τ);
173 ρ.x = δ.x - g;
174 let n = g.norm2();
175 if n >= F::EPSILON {
176 // Estimate Lipschitz factor of ∇v
177 let this_ℓ_F = (dv_x - v.differential(&ρ.x)).norm2() / n;
178 *adaptive_ℓ_F = adaptive_ℓ_F.max(this_ℓ_F);
179 θ = calculate_θ(*adaptive_ℓ_F, *max_transport);
180 changes = true
181 }
182 }
183 if !changes {
184 break;
185 }
186 }
187 }
188 }
189
190 // Set initial guess for μ=μ^{k+1}.
191 for (δ, ρ, &β) in izip!(μ.iter_spikes_mut(), γ1.iter_spikes(), μ_base_masses.iter()) {
192 if ρ.α.abs() > F::EPSILON {
193 δ.x = ρ.x;
194 //δ.α = ρ.α; // already set above
195 } else {
196 δ.α = β;
197 }
198 }
199 // Calculate μ^k-π_♯^0γ^{k+1} and v̆ = A_*(A[μ_transported + μ_transported_base]-b)
200 μ_base_minus_γ0.set_masses(
201 μ_base_masses
202 .iter()
203 .zip(γ1.iter_masses())
204 .map(|(&a, b)| a - b),
205 );
206 (μ_base_masses, μ_base_minus_γ0)
207 }
208
209 /// A posteriori transport adaptation.
210 #[replace_float_literals(F::cast_from(literal))]
211 pub(crate) fn aposteriori_transport<F, const N: usize>(
212 γ1: &mut RNDM<F, N>,
213 μ: &mut RNDM<F, N>,
214 μ_base_minus_γ0: &mut RNDM<F, N>,
215 μ_base_masses: &Vec<F>,
216 extra: Option<F>,
217 ε: F,
218 tconfig: &TransportConfig<F>,
219 ) -> bool
220 where
221 F: Float + ToNalgebraRealField,
222 {
223 // 1. If π_♯^1γ^{k+1} = γ1 has non-zero mass at some point y, but μ = μ^{k+1} does not,
224 // then the ansatz ∇w̃_x(y) = w^{k+1}(y) may not be satisfied. So set the mass of γ1
225 // at that point to zero, and retry.
226 let mut all_ok = true;
227 for (α_μ, α_γ1) in izip!(μ.iter_masses(), γ1.iter_masses_mut()) {
228 if α_μ == 0.0 && *α_γ1 != 0.0 {
229 all_ok = false;
230 *α_γ1 = 0.0;
231 }
232 }
233
234 // 2. Through bounding ∫ B_ω(y, z) dλ(x, y, z).
235 // through the estimate ≤ C ‖Δ‖‖γ^{k+1}‖ for Δ := μ^{k+1}-μ^k-(π_♯^1-π_♯^0)γ^{k+1},
236 // which holds for some some C if the convolution kernel in 𝒟 has Lipschitz gradient.
237 let nγ = γ1.norm(Radon);
238 let nΔ = μ_base_minus_γ0.norm(Radon) + μ.dist_matching(&γ1) + extra.unwrap_or(0.0);
239 let t = ε * tconfig.tolerance_mult_con;
240 if nγ * nΔ > t {
241 // Since t/(nγ*nΔ)<1, and the constant tconfig.adaptation < 1,
242 // this will guarantee that eventually ‖γ‖ decreases sufficiently that we
243 // will not enter here.
244 *γ1 *= tconfig.adaptation * t / (nγ * nΔ);
245 all_ok = false
246 }
247
248 if !all_ok {
249 // Update weights for μ_base_minus_γ0 = μ^k - π_♯^0γ^{k+1}
250 μ_base_minus_γ0.set_masses(
251 μ_base_masses
252 .iter()
253 .zip(γ1.iter_masses())
254 .map(|(&a, b)| a - b),
255 );
256 }
257
258 all_ok
259 }
260
261 /// Iteratively solve the pointsource localisation problem using sliding forward-backward
262 /// splitting
263 ///
264 /// The parametrisation is as for [`pointsource_fb_reg`].
265 /// Inertia is currently not supported.
266 #[replace_float_literals(F::cast_from(literal))]
267 pub fn pointsource_sliding_fb_reg<F, I, A, Reg, P, const N: usize>(
268 opA: &A,
269 b: &A::Observable,
270 reg: Reg,
271 prox_penalty: &P,
272 config: &SlidingFBConfig<F>,
273 iterator: I,
274 mut plotter: SeqPlotter<F, N>,
275 ) -> RNDM<F, N>
276 where
277 F: Float + ToNalgebraRealField,
278 I: AlgIteratorFactory<IterInfo<F, N>>,
279 A: ForwardModel<RNDM<F, N>, F>
280 + AdjointProductBoundedBy<RNDM<F, N>, P, FloatType = F>
281 + BoundedCurvature<FloatType = F>,
282 for<'b> &'b A::Observable: std::ops::Neg<Output = A::Observable> + Instance<A::Observable>,
283 A::PreadjointCodomain: DifferentiableRealMapping<F, N>,
284 RNDM<F, N>: SpikeMerging<F>,
285 Reg: SlidingRegTerm<F, N>,
286 P: ProxPenalty<F, A::PreadjointCodomain, Reg, N>,
287 PlotLookup: Plotting<N>,
288 {
289 // Check parameters
290 assert!(config.τ0 > 0.0, "Invalid step length parameter");
291 config.transport.check();
292
293 // Initialise iterates
294 let mut μ = DiscreteMeasure::new();
295 let mut γ1 = DiscreteMeasure::new();
296 let mut residual = -b; // Has to equal $Aμ-b$.
297
298 // Set up parameters
299 // let opAnorm = opA.opnorm_bound(Radon, L2);
300 //let max_transport = config.max_transport.scale
301 // * reg.radon_norm_bound(b.norm2_squared() / 2.0);
302 //let ℓ = opA.transport.lipschitz_factor(L2Squared) * max_transport;
303 let ℓ = 0.0;
304 let τ = config.τ0 / opA.adjoint_product_bound(prox_penalty).unwrap();
305 let (maybe_ℓ_F0, maybe_transport_lip) = opA.curvature_bound_components();
306 let transport_lip = maybe_transport_lip.unwrap();
307 let calculate_θ = |ℓ_F, max_transport| {
308 let ℓ_r = transport_lip * max_transport;
309 config.transport.θ0 / (τ * (ℓ + ℓ_F + ℓ_r))
310 };
311 let mut θ_or_adaptive = match maybe_ℓ_F0 {
312 //Some(ℓ_F0) => TransportStepLength::Fixed(calculate_θ(ℓ_F0 * b.norm2(), 0.0)),
313 Some(ℓ_F0) => TransportStepLength::AdaptiveMax {
314 l: ℓ_F0 * b.norm2(), // TODO: could estimate computing the real reesidual
315 max_transport: 0.0,
316 g: calculate_θ,
317 },
318 None => TransportStepLength::FullyAdaptive {
319 l: 10.0 * F::EPSILON, // Start with something very small to estimate differentials
320 max_transport: 0.0,
321 g: calculate_θ,
322 },
323 };
324 // We multiply tolerance by τ for FB since our subproblems depending on tolerances are scaled
325 // by τ compared to the conditional gradient approach.
326 let tolerance = config.insertion.tolerance * τ * reg.tolerance_scaling();
327 let mut ε = tolerance.initial();
328
329 // Statistics
330 let full_stats = |residual: &A::Observable, μ: &RNDM<F, N>, ε, stats| IterInfo {
331 value: residual.norm2_squared_div2() + reg.apply(μ),
332 n_spikes: μ.len(),
333 ε,
334 // postprocessing: config.insertion.postprocessing.then(|| μ.clone()),
335 ..stats
336 };
337 let mut stats = IterInfo::new();
338
339 // Run the algorithm
340 for state in iterator.iter_init(|| full_stats(&residual, &μ, ε, stats.clone())) {
341 // Calculate initial transport
342 let v = opA.preadjoint().apply(residual);
343 let (μ_base_masses, mut μ_base_minus_γ0) =
344 initial_transport(&mut γ1, &mut μ, τ, &mut θ_or_adaptive, v);
345
346 // Solve finite-dimensional subproblem several times until the dual variable for the
347 // regularisation term conforms to the assumptions made for the transport above.
348 let (maybe_d, _within_tolerances, mut τv̆) = 'adapt_transport: loop {
349 // Calculate τv̆ = τA_*(A[μ_transported + μ_transported_base]-b)
350 let residual_μ̆ = calculate_residual2(&γ1, &μ_base_minus_γ0, opA, b);
351 let mut τv̆ = opA.preadjoint().apply(residual_μ̆ * τ);
352
353 // Construct μ^{k+1} by solving finite-dimensional subproblems and insert new spikes.
354 let (maybe_d, within_tolerances) = prox_penalty.insert_and_reweigh(
355 &mut μ,
356 &mut τv̆,
357 &γ1,
358 Some(&μ_base_minus_γ0),
359 τ,
360 ε,
361 &config.insertion,
362 &reg,
363 &state,
364 &mut stats,
365 );
366
367 // A posteriori transport adaptation.
368 if aposteriori_transport(
369 &mut γ1,
370 &mut μ,
371 &mut μ_base_minus_γ0,
372 &μ_base_masses,
373 None,
374 ε,
375 &config.transport,
376 ) {
377 break 'adapt_transport (maybe_d, within_tolerances, τv̆);
378 }
379 };
380
381 stats.untransported_fraction = Some({
382 assert_eq!(μ_base_masses.len(), γ1.len());
383 let (a, b) = stats.untransported_fraction.unwrap_or((0.0, 0.0));
384 let source = μ_base_masses.iter().map(|v| v.abs()).sum();
385 (a + μ_base_minus_γ0.norm(Radon), b + source)
386 });
387 stats.transport_error = Some({
388 assert_eq!(μ_base_masses.len(), γ1.len());
389 let (a, b) = stats.transport_error.unwrap_or((0.0, 0.0));
390 (a + μ.dist_matching(&γ1), b + γ1.norm(Radon))
391 });
392
393 // Merge spikes.
394 // This crucially expects the merge routine to be stable with respect to spike locations,
395 // and not to performing any pruning. That is be to done below simultaneously for γ.
396 let ins = &config.insertion;
397 if ins.merge_now(&state) {
398 stats.merged += prox_penalty.merge_spikes(
399 &mut μ,
400 &mut τv̆,
401 &γ1,
402 Some(&μ_base_minus_γ0),
403 τ,
404 ε,
405 ins,
406 &reg,
407 Some(|μ̃: &RNDM<F, N>| L2Squared.calculate_fit_op(μ̃, opA, b)),
408 );
409 }
410
411 // Prune spikes with zero weight. To maintain correct ordering between μ and γ1, also the
412 // latter needs to be pruned when μ is.
413 // TODO: This could do with a two-vector Vec::retain to avoid copies.
414 let μ_new = DiscreteMeasure::from_iter(μ.iter_spikes().filter(|δ| δ.α != F::ZERO).cloned());
415 if μ_new.len() != μ.len() {
416 let mut μ_iter = μ.iter_spikes();
417 γ1.prune_by(|_| μ_iter.next().unwrap().α != F::ZERO);
418 stats.pruned += μ.len() - μ_new.len();
419 μ = μ_new;
420 }
421
422 // Update residual
423 residual = calculate_residual(&μ, opA, b);
424
425 let iter = state.iteration();
426 stats.this_iters += 1;
427
428 // Give statistics if requested
429 state.if_verbose(|| {
430 plotter.plot_spikes(iter, maybe_d.as_ref(), Some(&τv̆), &μ);
431 full_stats(
432 &residual,
433 &μ,
434 ε,
435 std::mem::replace(&mut stats, IterInfo::new()),
436 )
437 });
438
439 // Update main tolerance for next iteration
440 ε = tolerance.update(ε, iter);
441 }
442
443 postprocess(μ, &config.insertion, L2Squared, opA, b)
444 }

mercurial