Thu, 29 Aug 2024 00:00:00 -0500
Radon FB + sliding improvements
34 | 1 | /*! |
2 | Solver for the point source localisation problem using a simplified forward-backward splitting method. | |
3 | ||
4 | Instead of the $𝒟$-norm of `fb.rs`, this uses a standard Radon norm for the proximal map. | |
5 | */ | |
6 | ||
7 | use numeric_literals::replace_float_literals; | |
8 | use serde::{Serialize, Deserialize}; | |
9 | use colored::Colorize; | |
10 | use nalgebra::DVector; | |
11 | ||
12 | use alg_tools::iterate::{ | |
13 | AlgIteratorFactory, | |
14 | AlgIteratorState, | |
15 | }; | |
16 | use alg_tools::euclidean::Euclidean; | |
17 | use alg_tools::linops::Apply; | |
18 | use alg_tools::sets::Cube; | |
19 | use alg_tools::loc::Loc; | |
20 | use alg_tools::bisection_tree::{ | |
21 | BTFN, | |
22 | Bounds, | |
23 | BTNodeLookup, | |
24 | BTNode, | |
25 | BTSearch, | |
26 | P2Minimise, | |
27 | SupportGenerator, | |
28 | LocalAnalysis, | |
29 | }; | |
30 | use alg_tools::mapping::RealMapping; | |
31 | use alg_tools::nalgebra_support::ToNalgebraRealField; | |
32 | ||
33 | use crate::types::*; | |
34 | use crate::measures::{ | |
35 | DiscreteMeasure, | |
36 | DeltaMeasure, | |
37 | }; | |
38 | use crate::measures::merging::{ | |
39 | SpikeMergingMethod, | |
40 | SpikeMerging, | |
41 | }; | |
42 | use crate::forward_model::ForwardModel; | |
43 | use crate::plot::{ | |
44 | SeqPlotter, | |
45 | Plotting, | |
46 | PlotLookup | |
47 | }; | |
48 | use crate::regularisation::RegTerm; | |
49 | use crate::dataterm::{ | |
50 | calculate_residual, | |
51 | L2Squared, | |
52 | DataTerm, | |
53 | }; | |
54 | ||
55 | use crate::fb::{ | |
56 | FBGenericConfig, | |
57 | postprocess | |
58 | }; | |
59 | ||
60 | /// Settings for [`pointsource_fb_reg`]. | |
61 | #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] | |
62 | #[serde(default)] | |
63 | pub struct RadonFBConfig<F : Float> { | |
64 | /// Step length scaling | |
65 | pub τ0 : F, | |
66 | /// Generic parameters | |
67 | pub insertion : FBGenericConfig<F>, | |
68 | } | |
69 | ||
70 | #[replace_float_literals(F::cast_from(literal))] | |
71 | impl<F : Float> Default for RadonFBConfig<F> { | |
72 | fn default() -> Self { | |
73 | RadonFBConfig { | |
74 | τ0 : 0.99, | |
75 | insertion : Default::default() | |
76 | } | |
77 | } | |
78 | } | |
79 | ||
80 | #[replace_float_literals(F::cast_from(literal))] | |
81 | pub(crate) fn insert_and_reweigh< | |
82 | 'a, F, GA, BTA, S, Reg, State, const N : usize | |
83 | >( | |
84 | μ : &mut DiscreteMeasure<Loc<F, N>, F>, | |
85 | minus_τv : &mut BTFN<F, GA, BTA, N>, | |
86 | μ_base : &mut DiscreteMeasure<Loc<F, N>, F>, | |
87 | _ν_delta: Option<&DiscreteMeasure<Loc<F, N>, F>>, | |
88 | τ : F, | |
89 | ε : F, | |
90 | config : &FBGenericConfig<F>, | |
91 | reg : &Reg, | |
92 | _state : &State, | |
93 | stats : &mut IterInfo<F, N>, | |
94 | ) | |
95 | where F : Float + ToNalgebraRealField, | |
96 | GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone, | |
97 | BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>, | |
98 | S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, | |
99 | BTNodeLookup: BTNode<F, usize, Bounds<F>, N>, | |
100 | DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>, | |
101 | Reg : RegTerm<F, N>, | |
102 | State : AlgIteratorState { | |
103 | ||
104 | 'i_and_w: for i in 0..=1 { | |
105 | // Optimise weights | |
106 | if μ.len() > 0 { | |
107 | // Form finite-dimensional subproblem. The subproblem references to the original μ^k | |
108 | // from the beginning of the iteration are all contained in the immutable c and g. | |
109 | let g̃ = DVector::from_iterator(μ.len(), | |
110 | μ.iter_locations() | |
111 | .map(|ζ| F::to_nalgebra_mixed(minus_τv.apply(ζ)))); | |
112 | let mut x = μ.masses_dvector(); | |
113 | let y = μ_base.masses_dvector(); | |
114 | ||
115 | // Solve finite-dimensional subproblem. | |
116 | stats.inner_iters += reg.solve_findim_l1squared(&y, &g̃, τ, &mut x, ε, config); | |
117 | ||
118 | // Update masses of μ based on solution of finite-dimensional subproblem. | |
119 | μ.set_masses_dvector(&x); | |
120 | } | |
121 | ||
122 | if i>0 { | |
123 | // Simple debugging test to see if more inserts would be needed. Doesn't seem so. | |
124 | //let n = μ.dist_matching(μ_base); | |
125 | //println!("{:?}", reg.find_tolerance_violation_slack(minus_τv, τ, ε, false, config, n)); | |
126 | break 'i_and_w | |
127 | } | |
128 | ||
129 | // Calculate ‖μ - μ_base‖_ℳ | |
130 | let n = μ.dist_matching(μ_base); | |
131 | ||
132 | // Find a spike to insert, if needed. | |
133 | // This only check the overall tolerances, not tolerances on support of μ-μ_base or μ, | |
134 | // which are supposed to have been guaranteed by the finite-dimensional weight optimisation. | |
135 | match reg.find_tolerance_violation_slack(minus_τv, τ, ε, false, config, n) { | |
136 | None => { break 'i_and_w }, | |
137 | Some((ξ, _v_ξ, _in_bounds)) => { | |
138 | // Weight is found out by running the finite-dimensional optimisation algorithm | |
139 | // above | |
140 | *μ += DeltaMeasure { x : ξ, α : 0.0 }; | |
141 | *μ_base += DeltaMeasure { x : ξ, α : 0.0 }; | |
142 | } | |
143 | }; | |
144 | } | |
145 | } | |
146 | ||
147 | #[replace_float_literals(F::cast_from(literal))] | |
148 | pub(crate) fn prune_and_maybe_simple_merge< | |
149 | 'a, F, GA, BTA, S, Reg, State, const N : usize | |
150 | >( | |
151 | μ : &mut DiscreteMeasure<Loc<F, N>, F>, | |
152 | minus_τv : &mut BTFN<F, GA, BTA, N>, | |
153 | μ_base : &DiscreteMeasure<Loc<F, N>, F>, | |
154 | τ : F, | |
155 | ε : F, | |
156 | config : &FBGenericConfig<F>, | |
157 | reg : &Reg, | |
158 | state : &State, | |
159 | stats : &mut IterInfo<F, N>, | |
160 | ) | |
161 | where F : Float + ToNalgebraRealField, | |
162 | GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone, | |
163 | BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>, | |
164 | S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, | |
165 | BTNodeLookup: BTNode<F, usize, Bounds<F>, N>, | |
166 | DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>, | |
167 | Reg : RegTerm<F, N>, | |
168 | State : AlgIteratorState { | |
169 | ||
170 | assert!(μ_base.len() <= μ.len()); | |
171 | ||
172 | if state.iteration() % config.merge_every == 0 { | |
173 | stats.merged += μ.merge_spikes(config.merging, |μ_candidate| { | |
174 | // Important: μ_candidate's new points are afterwards, | |
175 | // and do not conflict with μ_base. | |
176 | // TODO: could simplify to requiring μ_base instead of μ_radon. | |
177 | // but may complicate with sliding base's exgtra points that need to be | |
178 | // after μ_candidate's extra points. | |
179 | // TODO: doesn't seem to work, maybe need to merge μ_base as well? | |
180 | // Although that doesn't seem to make sense. | |
181 | let μ_radon = μ_candidate.sub_matching(μ_base); | |
182 | reg.verify_merge_candidate_radonsq(minus_τv, μ_candidate, τ, ε, &config, &μ_radon) | |
183 | //let n = μ_candidate.dist_matching(μ_base); | |
184 | //reg.find_tolerance_violation_slack(minus_τv, τ, ε, false, config, n).is_none() | |
185 | }); | |
186 | } | |
187 | ||
188 | let n_before_prune = μ.len(); | |
189 | μ.prune(); | |
190 | debug_assert!(μ.len() <= n_before_prune); | |
191 | stats.pruned += n_before_prune - μ.len(); | |
192 | } | |
193 | ||
194 | ||
195 | /// Iteratively solve the pointsource localisation problem using simplified forward-backward splitting. | |
196 | /// | |
197 | /// The settings in `config` have their [respective documentation](FBConfig). `opA` is the | |
198 | /// forward operator $A$, $b$ the observable, and $\lambda$ the regularisation weight. | |
199 | /// Finally, the `iterator` is an outer loop verbosity and iteration count control | |
200 | /// as documented in [`alg_tools::iterate`]. | |
201 | /// | |
202 | /// For details on the mathematical formulation, see the [module level](self) documentation. | |
203 | /// | |
204 | /// The implementation relies on [`alg_tools::bisection_tree::BTFN`] presentations of | |
205 | /// sums of simple functions usign bisection trees, and the related | |
206 | /// [`alg_tools::bisection_tree::Aggregator`]s, to efficiently search for component functions | |
207 | /// active at a specific points, and to maximise their sums. Through the implementation of the | |
208 | /// [`alg_tools::bisection_tree::BT`] bisection trees, it also relies on the copy-on-write features | |
209 | /// of [`std::sync::Arc`] to only update relevant parts of the bisection tree when adding functions. | |
210 | /// | |
211 | /// Returns the final iterate. | |
212 | #[replace_float_literals(F::cast_from(literal))] | |
213 | pub fn pointsource_radon_fb_reg< | |
214 | 'a, F, I, A, GA, BTA, S, Reg, const N : usize | |
215 | >( | |
216 | opA : &'a A, | |
217 | b : &A::Observable, | |
218 | reg : Reg, | |
219 | fbconfig : &RadonFBConfig<F>, | |
220 | iterator : I, | |
221 | mut _plotter : SeqPlotter<F, N>, | |
222 | ) -> DiscreteMeasure<Loc<F, N>, F> | |
223 | where F : Float + ToNalgebraRealField, | |
224 | I : AlgIteratorFactory<IterInfo<F, N>>, | |
225 | for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable>, | |
226 | //+ std::ops::Mul<F, Output=A::Observable>, <-- FIXME: compiler overflow | |
227 | A::Observable : std::ops::MulAssign<F>, | |
228 | GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone, | |
229 | A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>>, | |
230 | BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>, | |
231 | S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, | |
232 | BTNodeLookup: BTNode<F, usize, Bounds<F>, N>, | |
233 | Cube<F, N>: P2Minimise<Loc<F, N>, F>, | |
234 | PlotLookup : Plotting<N>, | |
235 | DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>, | |
236 | Reg : RegTerm<F, N> { | |
237 | ||
238 | // Set up parameters | |
239 | let config = &fbconfig.insertion; | |
240 | // We need L such that the descent inequality F(ν) - F(μ) - ⟨F'(μ),ν-μ⟩ ≤ (L/2)‖ν-μ‖²_ℳ ∀ ν,μ | |
241 | // holds. Since the left hand side expands as (1/2)‖A(ν-μ)‖₂², this is to say, we need L such | |
242 | // that ‖Aμ‖₂² ≤ L ‖μ‖²_ℳ ∀ μ. Thus `opnorm_bound` gives the square root of L. | |
243 | let τ = fbconfig.τ0/opA.opnorm_bound().powi(2); | |
244 | // We multiply tolerance by τ for FB since our subproblems depending on tolerances are scaled | |
245 | // by τ compared to the conditional gradient approach. | |
246 | let tolerance = config.tolerance * τ * reg.tolerance_scaling(); | |
247 | let mut ε = tolerance.initial(); | |
248 | ||
249 | // Initialise iterates | |
250 | let mut μ = DiscreteMeasure::new(); | |
251 | let mut residual = -b; | |
252 | let mut stats = IterInfo::new(); | |
253 | ||
254 | // Run the algorithm | |
255 | iterator.iterate(|state| { | |
256 | // Calculate smooth part of surrogate model. | |
257 | // Using `std::mem::replace` here is not ideal, and expects that `empty_observable` | |
258 | // has no significant overhead. For some reosn Rust doesn't allow us simply moving | |
259 | // the residual and replacing it below before the end of this closure. | |
260 | residual *= -τ; | |
261 | let r = std::mem::replace(&mut residual, opA.empty_observable()); | |
262 | let mut minus_τv = opA.preadjoint().apply(r); | |
263 | ||
264 | // Save current base point | |
265 | let mut μ_base = μ.clone(); | |
266 | ||
267 | // Insert and reweigh | |
268 | insert_and_reweigh( | |
269 | &mut μ, &mut minus_τv, &mut μ_base, None, | |
270 | τ, ε, | |
271 | config, ®, state, &mut stats | |
272 | ); | |
273 | ||
274 | // Prune and possibly merge spikes | |
275 | prune_and_maybe_simple_merge( | |
276 | &mut μ, &mut minus_τv, &μ_base, | |
277 | τ, ε, | |
278 | config, ®, state, &mut stats | |
279 | ); | |
280 | ||
281 | // Update residual | |
282 | residual = calculate_residual(&μ, opA, b); | |
283 | ||
284 | // Update main tolerance for next iteration | |
285 | let ε_prev = ε; | |
286 | ε = tolerance.update(ε, state.iteration()); | |
287 | stats.this_iters += 1; | |
288 | ||
289 | // Give function value if needed | |
290 | state.if_verbose(|| { | |
291 | // Plot if so requested | |
292 | // plotter.plot_spikes( | |
293 | // format!("iter {} end;", state.iteration()), &d, | |
294 | // "start".to_string(), Some(&minus_τv), | |
295 | // reg.target_bounds(τ, ε_prev), &μ, | |
296 | // ); | |
297 | // Calculate mean inner iterations and reset relevant counters. | |
298 | // Return the statistics | |
299 | let res = IterInfo { | |
300 | value : residual.norm2_squared_div2() + reg.apply(&μ), | |
301 | n_spikes : μ.len(), | |
302 | ε : ε_prev, | |
303 | postprocessing: config.postprocessing.then(|| μ.clone()), | |
304 | .. stats | |
305 | }; | |
306 | stats = IterInfo::new(); | |
307 | res | |
308 | }) | |
309 | }); | |
310 | ||
311 | postprocess(μ, config, L2Squared, opA, b) | |
312 | } | |
313 | ||
314 | /// Iteratively solve the pointsource localisation problem using simplified inertial forward-backward splitting. | |
315 | /// | |
316 | /// The settings in `config` have their [respective documentation](FBConfig). `opA` is the | |
317 | /// forward operator $A$, $b$ the observable, and $\lambda$ the regularisation weight. | |
318 | /// Finally, the `iterator` is an outer loop verbosity and iteration count control | |
319 | /// as documented in [`alg_tools::iterate`]. | |
320 | /// | |
321 | /// For details on the mathematical formulation, see the [module level](self) documentation. | |
322 | /// | |
323 | /// The implementation relies on [`alg_tools::bisection_tree::BTFN`] presentations of | |
324 | /// sums of simple functions usign bisection trees, and the related | |
325 | /// [`alg_tools::bisection_tree::Aggregator`]s, to efficiently search for component functions | |
326 | /// active at a specific points, and to maximise their sums. Through the implementation of the | |
327 | /// [`alg_tools::bisection_tree::BT`] bisection trees, it also relies on the copy-on-write features | |
328 | /// of [`std::sync::Arc`] to only update relevant parts of the bisection tree when adding functions. | |
329 | /// | |
330 | /// Returns the final iterate. | |
331 | #[replace_float_literals(F::cast_from(literal))] | |
332 | pub fn pointsource_radon_fista_reg< | |
333 | 'a, F, I, A, GA, BTA, S, Reg, const N : usize | |
334 | >( | |
335 | opA : &'a A, | |
336 | b : &A::Observable, | |
337 | reg : Reg, | |
338 | fbconfig : &RadonFBConfig<F>, | |
339 | iterator : I, | |
340 | mut _plotter : SeqPlotter<F, N>, | |
341 | ) -> DiscreteMeasure<Loc<F, N>, F> | |
342 | where F : Float + ToNalgebraRealField, | |
343 | I : AlgIteratorFactory<IterInfo<F, N>>, | |
344 | for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable>, | |
345 | //+ std::ops::Mul<F, Output=A::Observable>, <-- FIXME: compiler overflow | |
346 | A::Observable : std::ops::MulAssign<F>, | |
347 | GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone, | |
348 | A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>>, | |
349 | BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>, | |
350 | S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, | |
351 | BTNodeLookup: BTNode<F, usize, Bounds<F>, N>, | |
352 | Cube<F, N>: P2Minimise<Loc<F, N>, F>, | |
353 | PlotLookup : Plotting<N>, | |
354 | DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>, | |
355 | Reg : RegTerm<F, N> { | |
356 | ||
357 | // Set up parameters | |
358 | let config = &fbconfig.insertion; | |
359 | // We need L such that the descent inequality F(ν) - F(μ) - ⟨F'(μ),ν-μ⟩ ≤ (L/2)‖ν-μ‖²_ℳ ∀ ν,μ | |
360 | // holds. Since the left hand side expands as (1/2)‖A(ν-μ)‖₂², this is to say, we need L such | |
361 | // that ‖Aμ‖₂² ≤ L ‖μ‖²_ℳ ∀ μ. Thus `opnorm_bound` gives the square root of L. | |
362 | let τ = fbconfig.τ0/opA.opnorm_bound().powi(2); | |
363 | let mut λ = 1.0; | |
364 | // We multiply tolerance by τ for FB since our subproblems depending on tolerances are scaled | |
365 | // by τ compared to the conditional gradient approach. | |
366 | let tolerance = config.tolerance * τ * reg.tolerance_scaling(); | |
367 | let mut ε = tolerance.initial(); | |
368 | ||
369 | // Initialise iterates | |
370 | let mut μ = DiscreteMeasure::new(); | |
371 | let mut μ_prev = DiscreteMeasure::new(); | |
372 | let mut residual = -b; | |
373 | let mut stats = IterInfo::new(); | |
374 | let mut warned_merging = false; | |
375 | ||
376 | // Run the algorithm | |
377 | iterator.iterate(|state| { | |
378 | // Calculate smooth part of surrogate model. | |
379 | // Using `std::mem::replace` here is not ideal, and expects that `empty_observable` | |
380 | // has no significant overhead. For some reosn Rust doesn't allow us simply moving | |
381 | // the residual and replacing it below before the end of this closure. | |
382 | residual *= -τ; | |
383 | let r = std::mem::replace(&mut residual, opA.empty_observable()); | |
384 | let mut minus_τv = opA.preadjoint().apply(r); | |
385 | ||
386 | // Save current base point | |
387 | let mut μ_base = μ.clone(); | |
388 | ||
389 | // Insert new spikes and reweigh | |
390 | insert_and_reweigh( | |
391 | &mut μ, &mut minus_τv, &mut μ_base, None, | |
392 | τ, ε, | |
393 | config, ®, state, &mut stats | |
394 | ); | |
395 | ||
396 | // (Do not) merge spikes. | |
397 | if state.iteration() % config.merge_every == 0 { | |
398 | match config.merging { | |
399 | SpikeMergingMethod::None => { }, | |
400 | _ => if !warned_merging { | |
401 | let err = format!("Merging not supported for μFISTA"); | |
402 | println!("{}", err.red()); | |
403 | warned_merging = true; | |
404 | } | |
405 | } | |
406 | } | |
407 | ||
408 | // Update inertial prameters | |
409 | let λ_prev = λ; | |
410 | λ = 2.0 * λ_prev / ( λ_prev + (4.0 + λ_prev * λ_prev).sqrt() ); | |
411 | let θ = λ / λ_prev - λ; | |
412 | ||
413 | // Perform inertial update on μ. | |
414 | // This computes μ ← (1 + θ) * μ - θ * μ_prev, pruning spikes where both μ | |
415 | // and μ_prev have zero weight. Since both have weights from the finite-dimensional | |
416 | // subproblem with a proximal projection step, this is likely to happen when the | |
417 | // spike is not needed. A copy of the pruned μ without artithmetic performed is | |
418 | // stored in μ_prev. | |
419 | let n_before_prune = μ.len(); | |
420 | μ.pruning_sub(1.0 + θ, θ, &mut μ_prev); | |
421 | debug_assert!(μ.len() <= n_before_prune); | |
422 | stats.pruned += n_before_prune - μ.len(); | |
423 | ||
424 | // Update residual | |
425 | residual = calculate_residual(&μ, opA, b); | |
426 | ||
427 | // Update main tolerance for next iteration | |
428 | let ε_prev = ε; | |
429 | ε = tolerance.update(ε, state.iteration()); | |
430 | stats.this_iters += 1; | |
431 | ||
432 | // Give function value if needed | |
433 | state.if_verbose(|| { | |
434 | // Plot if so requested | |
435 | // plotter.plot_spikes( | |
436 | // format!("iter {} end;", state.iteration()), &d, | |
437 | // "start".to_string(), Some(&minus_τv), | |
438 | // reg.target_bounds(τ, ε_prev), &μ_prev, | |
439 | // ); | |
440 | // Calculate mean inner iterations and reset relevant counters. | |
441 | // Return the statistics | |
442 | let res = IterInfo { | |
443 | value : L2Squared.calculate_fit_op(&μ_prev, opA, b) + reg.apply(&μ_prev), | |
444 | n_spikes : μ_prev.len(), | |
445 | ε : ε_prev, | |
446 | postprocessing: config.postprocessing.then(|| μ_prev.clone()), | |
447 | .. stats | |
448 | }; | |
449 | stats = IterInfo::new(); | |
450 | res | |
451 | }) | |
452 | }); | |
453 | ||
454 | postprocess(μ_prev, config, L2Squared, opA, b) | |
455 | } |