Thu, 23 Jan 2025 23:35:28 +0100
Generic proximal penalty support
35 | 1 | /*! |
2 | Solver for the point source localisation problem using a sliding | |
3 | primal-dual proximal splitting method. | |
4 | */ | |
5 | ||
6 | use numeric_literals::replace_float_literals; | |
7 | use serde::{Serialize, Deserialize}; | |
8 | //use colored::Colorize; | |
9 | //use nalgebra::{DVector, DMatrix}; | |
10 | use std::iter::Iterator; | |
11 | ||
12 | use alg_tools::iterate::AlgIteratorFactory; | |
13 | use alg_tools::euclidean::Euclidean; | |
14 | use alg_tools::mapping::{Mapping, DifferentiableRealMapping, Instance}; | |
15 | use alg_tools::norms::Norm; | |
16 | use alg_tools::direct_product::Pair; | |
17 | use alg_tools::nalgebra_support::ToNalgebraRealField; | |
18 | use alg_tools::linops::{ | |
19 | BoundedLinear, AXPY, GEMV, Adjointable, IdOp, | |
20 | }; | |
21 | use alg_tools::convex::{Conjugable, Prox}; | |
37
c5d8bd1a7728
Generic proximal penalty support
Tuomo Valkonen <tuomov@iki.fi>
parents:
36
diff
changeset
|
22 | use alg_tools::norms::{L2, PairNorm}; |
35 | 23 | |
24 | use crate::types::*; | |
25 | use crate::measures::{DiscreteMeasure, Radon, RNDM}; | |
26 | use crate::measures::merging::SpikeMerging; | |
27 | use crate::forward_model::{ | |
28 | ForwardModel, | |
29 | AdjointProductPairBoundedBy, | |
30 | LipschitzValues, | |
31 | }; | |
32 | // use crate::transport::TransportLipschitz; | |
33 | //use crate::tolerance::Tolerance; | |
34 | use crate::plot::{ | |
35 | SeqPlotter, | |
36 | Plotting, | |
37 | PlotLookup | |
38 | }; | |
39 | use crate::fb::*; | |
40 | use crate::regularisation::SlidingRegTerm; | |
41 | // use crate::dataterm::L2Squared; | |
42 | use crate::sliding_fb::{ | |
43 | TransportConfig, | |
44 | TransportStepLength, | |
45 | initial_transport, | |
46 | aposteriori_transport, | |
47 | }; | |
48 | use crate::dataterm::{calculate_residual, calculate_residual2}; | |
49 | ||
50 | /// Settings for [`pointsource_sliding_pdps_pair`]. | |
51 | #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] | |
52 | #[serde(default)] | |
53 | pub struct SlidingPDPSConfig<F : Float> { | |
54 | /// Primal step length scaling. | |
55 | pub τ0 : F, | |
56 | /// Primal step length scaling. | |
57 | pub σp0 : F, | |
58 | /// Dual step length scaling. | |
59 | pub σd0 : F, | |
60 | /// Transport parameters | |
61 | pub transport : TransportConfig<F>, | |
62 | /// Generic parameters | |
63 | pub insertion : FBGenericConfig<F>, | |
64 | } | |
65 | ||
66 | #[replace_float_literals(F::cast_from(literal))] | |
67 | impl<F : Float> Default for SlidingPDPSConfig<F> { | |
68 | fn default() -> Self { | |
69 | let τ0 = 0.99; | |
70 | SlidingPDPSConfig { | |
71 | τ0, | |
72 | σd0 : 0.1, | |
73 | σp0 : 0.99, | |
74 | transport : Default::default(), | |
75 | insertion : Default::default() | |
76 | } | |
77 | } | |
78 | } | |
79 | ||
80 | type MeasureZ<F, Z, const N : usize> = Pair<RNDM<F, N>, Z>; | |
81 | ||
82 | /// Iteratively solve the pointsource localisation with an additional variable | |
83 | /// using sliding primal-dual proximal splitting | |
84 | /// | |
85 | /// The parametrisation is as for [`crate::forward_pdps::pointsource_forward_pdps_pair`]. | |
86 | #[replace_float_literals(F::cast_from(literal))] | |
87 | pub fn pointsource_sliding_pdps_pair< | |
37
c5d8bd1a7728
Generic proximal penalty support
Tuomo Valkonen <tuomov@iki.fi>
parents:
36
diff
changeset
|
88 | F, I, A, S, Reg, P, Z, R, Y, /*KOpM, */ KOpZ, H, const N : usize |
35 | 89 | >( |
37
c5d8bd1a7728
Generic proximal penalty support
Tuomo Valkonen <tuomov@iki.fi>
parents:
36
diff
changeset
|
90 | opA : &A, |
35 | 91 | b : &A::Observable, |
92 | reg : Reg, | |
37
c5d8bd1a7728
Generic proximal penalty support
Tuomo Valkonen <tuomov@iki.fi>
parents:
36
diff
changeset
|
93 | prox_penalty : &P, |
35 | 94 | config : &SlidingPDPSConfig<F>, |
95 | iterator : I, | |
96 | mut plotter : SeqPlotter<F, N>, | |
97 | //opKμ : KOpM, | |
98 | opKz : &KOpZ, | |
99 | fnR : &R, | |
100 | fnH : &H, | |
101 | mut z : Z, | |
102 | mut y : Y, | |
103 | ) -> MeasureZ<F, Z, N> | |
104 | where | |
105 | F : Float + ToNalgebraRealField, | |
106 | I : AlgIteratorFactory<IterInfo<F, N>>, | |
107 | A : ForwardModel< | |
108 | MeasureZ<F, Z, N>, | |
109 | F, | |
110 | PairNorm<Radon, L2, L2>, | |
37
c5d8bd1a7728
Generic proximal penalty support
Tuomo Valkonen <tuomov@iki.fi>
parents:
36
diff
changeset
|
111 | PreadjointCodomain = Pair<S, Z>, |
35 | 112 | > |
37
c5d8bd1a7728
Generic proximal penalty support
Tuomo Valkonen <tuomov@iki.fi>
parents:
36
diff
changeset
|
113 | + AdjointProductPairBoundedBy<MeasureZ<F, Z, N>, P, IdOp<Z>, FloatType=F>, |
c5d8bd1a7728
Generic proximal penalty support
Tuomo Valkonen <tuomov@iki.fi>
parents:
36
diff
changeset
|
114 | S : DifferentiableRealMapping<F, N>, |
c5d8bd1a7728
Generic proximal penalty support
Tuomo Valkonen <tuomov@iki.fi>
parents:
36
diff
changeset
|
115 | for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable> + Instance<A::Observable>, |
c5d8bd1a7728
Generic proximal penalty support
Tuomo Valkonen <tuomov@iki.fi>
parents:
36
diff
changeset
|
116 | for<'b> A::Preadjoint<'b> : LipschitzValues<FloatType=F>, |
35 | 117 | PlotLookup : Plotting<N>, |
118 | RNDM<F, N> : SpikeMerging<F>, | |
119 | Reg : SlidingRegTerm<F, N>, | |
37
c5d8bd1a7728
Generic proximal penalty support
Tuomo Valkonen <tuomov@iki.fi>
parents:
36
diff
changeset
|
120 | P : ProxPenalty<F, S, Reg, N>, |
35 | 121 | // KOpM : Linear<RNDM<F, N>, Codomain=Y> |
122 | // + GEMV<F, RNDM<F, N>> | |
123 | // + Preadjointable< | |
124 | // RNDM<F, N>, Y, | |
37
c5d8bd1a7728
Generic proximal penalty support
Tuomo Valkonen <tuomov@iki.fi>
parents:
36
diff
changeset
|
125 | // PreadjointCodomain = S, |
35 | 126 | // > |
127 | // + TransportLipschitz<L2Squared, FloatType=F> | |
128 | // + AdjointProductBoundedBy<RNDM<F, N>, 𝒟, FloatType=F>, | |
129 | // for<'b> KOpM::Preadjoint<'b> : GEMV<F, Y>, | |
130 | // Since Z is Hilbert, we may just as well use adjoints for K_z. | |
131 | KOpZ : BoundedLinear<Z, L2, L2, F, Codomain=Y> | |
132 | + GEMV<F, Z> | |
133 | + Adjointable<Z, Y, AdjointCodomain = Z>, | |
134 | for<'b> KOpZ::Adjoint<'b> : GEMV<F, Y>, | |
135 | Y : AXPY<F> + Euclidean<F, Output=Y> + Clone + ClosedAdd, | |
136 | for<'b> &'b Y : Instance<Y>, | |
137 | Z : AXPY<F, Owned=Z> + Euclidean<F, Output=Z> + Clone + Norm<F, L2>, | |
138 | for<'b> &'b Z : Instance<Z>, | |
139 | R : Prox<Z, Codomain=F>, | |
140 | H : Conjugable<Y, F, Codomain=F>, | |
141 | for<'b> H::Conjugate<'b> : Prox<Y>, | |
142 | { | |
143 | ||
144 | // Check parameters | |
145 | assert!(config.τ0 > 0.0 && | |
146 | config.τ0 < 1.0 && | |
147 | config.σp0 > 0.0 && | |
148 | config.σp0 < 1.0 && | |
149 | config.σd0 > 0.0 && | |
150 | config.σp0 * config.σd0 <= 1.0, | |
151 | "Invalid step length parameters"); | |
152 | config.transport.check(); | |
153 | ||
154 | // Initialise iterates | |
155 | let mut μ = DiscreteMeasure::new(); | |
156 | let mut γ1 = DiscreteMeasure::new(); | |
157 | let mut residual = calculate_residual(Pair(&μ, &z), opA, b); | |
158 | let zero_z = z.similar_origin(); | |
159 | ||
160 | // Set up parameters | |
161 | // TODO: maybe this PairNorm doesn't make sense here? | |
162 | let opAnorm = opA.opnorm_bound(PairNorm(Radon, L2, L2), L2); | |
163 | let bigθ = 0.0; //opKμ.transport_lipschitz_factor(L2Squared); | |
164 | let bigM = 0.0; //opKμ.adjoint_product_bound(&op𝒟).unwrap().sqrt(); | |
165 | let nKz = opKz.opnorm_bound(L2, L2); | |
166 | let ℓ = 0.0; | |
167 | let opIdZ = IdOp::new(); | |
37
c5d8bd1a7728
Generic proximal penalty support
Tuomo Valkonen <tuomov@iki.fi>
parents:
36
diff
changeset
|
168 | let (l, l_z) = opA.adjoint_product_pair_bound(prox_penalty, &opIdZ).unwrap(); |
35 | 169 | // We need to satisfy |
170 | // | |
171 | // τσ_dM(1-σ_p L_z)/(1 - τ L) + [σ_p L_z + σ_pσ_d‖K_z‖^2] < 1 | |
172 | // ^^^^^^^^^^^^^^^^^^^^^^^^^ | |
173 | // with 1 > σ_p L_z and 1 > τ L. | |
174 | // | |
175 | // To do so, we first solve σ_p and σ_d from standard PDPS step length condition | |
176 | // ^^^^^ < 1. then we solve τ from the rest. | |
177 | let σ_d = config.σd0 / nKz; | |
178 | let σ_p = config.σp0 / (l_z + config.σd0 * nKz); | |
179 | // Observe that = 1 - ^^^^^^^^^^^^^^^^^^^^^ = 1 - σ_{p,0} | |
180 | // We get the condition τσ_d M (1-σ_p L_z) < (1-σ_{p,0})*(1-τ L) | |
181 | // ⟺ τ [ σ_d M (1-σ_p L_z) + (1-σ_{p,0}) L ] < (1-σ_{p,0}) | |
182 | let φ = 1.0 - config.σp0; | |
183 | let a = 1.0 - σ_p * l_z; | |
184 | let τ = config.τ0 * φ / ( σ_d * bigM * a + φ * l ); | |
185 | let ψ = 1.0 - τ * l; | |
186 | let β = σ_p * config.σd0 * nKz / a; // σ_p * σ_d * (nKz * nK_z) / a; | |
187 | assert!(β < 1.0); | |
188 | // Now we need κ‖K_μ(π_♯^1 - π_♯^0)γ‖^2 ≤ (1/θ - τ[ℓ_v + ℓ]) ∫ c_2 dγ for κ defined as: | |
36 | 189 | let κ = τ * σ_d * ψ / ((1.0 - β) * ψ - τ * σ_d * bigM); |
35 | 190 | // The factor two in the manuscript disappears due to the definition of 𝚹 being |
191 | // for ‖x-y‖₂² instead of c_2(x, y)=‖x-y‖₂²/2. | |
192 | let calculate_θ = |ℓ_v, max_transport| { | |
193 | config.transport.θ0 / (τ*(ℓ + ℓ_v) + κ * bigθ * max_transport) | |
194 | }; | |
195 | let mut θ_or_adaptive = match opA.preadjoint().value_diff_unit_lipschitz_factor() { | |
196 | // We only estimate w (the uniform Lipschitz for of v), if we also estimate ℓ_v | |
197 | // (the uniform Lipschitz factor of ∇v). | |
198 | // We assume that the residual is decreasing. | |
199 | Some(ℓ_v0) => TransportStepLength::AdaptiveMax{ | |
200 | l: ℓ_v0 * b.norm2(), | |
201 | max_transport : 0.0, | |
202 | g : calculate_θ | |
203 | }, | |
204 | None => TransportStepLength::FullyAdaptive{ | |
205 | l : 0.0, | |
206 | max_transport : 0.0, | |
207 | g : calculate_θ | |
208 | }, | |
209 | }; | |
210 | // Acceleration is not currently supported | |
211 | // let γ = dataterm.factor_of_strong_convexity(); | |
212 | let ω = 1.0; | |
213 | ||
214 | // We multiply tolerance by τ for FB since our subproblems depending on tolerances are scaled | |
215 | // by τ compared to the conditional gradient approach. | |
216 | let tolerance = config.insertion.tolerance * τ * reg.tolerance_scaling(); | |
217 | let mut ε = tolerance.initial(); | |
218 | ||
219 | let starH = fnH.conjugate(); | |
220 | ||
221 | // Statistics | |
222 | let full_stats = |residual : &A::Observable, μ : &RNDM<F, N>, z : &Z, ε, stats| IterInfo { | |
223 | value : residual.norm2_squared_div2() + fnR.apply(z) | |
224 | + reg.apply(μ) + fnH.apply(/* opKμ.apply(μ) + */ opKz.apply(z)), | |
225 | n_spikes : μ.len(), | |
226 | ε, | |
227 | // postprocessing: config.insertion.postprocessing.then(|| μ.clone()), | |
228 | .. stats | |
229 | }; | |
230 | let mut stats = IterInfo::new(); | |
231 | ||
232 | // Run the algorithm | |
233 | for state in iterator.iter_init(|| full_stats(&residual, &μ, &z, ε, stats.clone())) { | |
234 | // Calculate initial transport | |
235 | let Pair(v, _) = opA.preadjoint().apply(&residual); | |
236 | //opKμ.preadjoint().apply_add(&mut v, y); | |
237 | let z_base = z.clone(); | |
238 | // We want to proceed as in Example 4.12 but with v and v̆ as in §5. | |
239 | // With A(ν, z) = A_μ ν + A_z z, following Example 5.1, we have | |
240 | // P_ℳ[F'(ν, z) + Ξ(ν, z, y)]= A_ν^*[A_ν ν + A_z z] + K_μ ν = A_ν^*A(ν, z) + K_μ ν, | |
241 | // where A_ν^* becomes a multiplier. | |
242 | // This is much easier with K_μ = 0, which is the only reason why are enforcing it. | |
243 | // TODO: Write a version of initial_transport that can deal with K_μ ≠ 0. | |
244 | ||
245 | let (μ_base_masses, mut μ_base_minus_γ0) = initial_transport( | |
246 | &mut γ1, &mut μ, |ν| opA.apply(Pair(ν, &z)), | |
247 | ε, τ, &mut θ_or_adaptive, opAnorm, | |
248 | v, &config.transport, | |
249 | ); | |
250 | ||
251 | // Solve finite-dimensional subproblem several times until the dual variable for the | |
252 | // regularisation term conforms to the assumptions made for the transport above. | |
37
c5d8bd1a7728
Generic proximal penalty support
Tuomo Valkonen <tuomov@iki.fi>
parents:
36
diff
changeset
|
253 | let (maybe_d, _within_tolerances, Pair(τv̆, τz̆)) = 'adapt_transport: loop { |
35 | 254 | // Calculate τv̆ = τA_*(A[μ_transported + μ_transported_base]-b) |
255 | let residual_μ̆ = calculate_residual2(Pair(&γ1, &z), | |
256 | Pair(&μ_base_minus_γ0, &zero_z), | |
257 | opA, b); | |
37
c5d8bd1a7728
Generic proximal penalty support
Tuomo Valkonen <tuomov@iki.fi>
parents:
36
diff
changeset
|
258 | let mut τv̆z = opA.preadjoint().apply(residual_μ̆ * τ); |
35 | 259 | // opKμ.preadjoint().gemv(&mut τv̆, τ, y, 1.0); |
260 | ||
261 | // Construct μ^{k+1} by solving finite-dimensional subproblems and insert new spikes. | |
37
c5d8bd1a7728
Generic proximal penalty support
Tuomo Valkonen <tuomov@iki.fi>
parents:
36
diff
changeset
|
262 | let (maybe_d, within_tolerances) = prox_penalty.insert_and_reweigh( |
c5d8bd1a7728
Generic proximal penalty support
Tuomo Valkonen <tuomov@iki.fi>
parents:
36
diff
changeset
|
263 | &mut μ, &mut τv̆z.0, &γ1, Some(&μ_base_minus_γ0), |
35 | 264 | τ, ε, &config.insertion, |
265 | ®, &state, &mut stats, | |
266 | ); | |
267 | ||
268 | // A posteriori transport adaptation. | |
269 | // TODO: this does not properly treat v^{k+1} - v̆^k that depends on z^{k+1}! | |
270 | if aposteriori_transport( | |
271 | &mut γ1, &mut μ, &mut μ_base_minus_γ0, &μ_base_masses, | |
272 | ε, &config.transport | |
273 | ) { | |
37
c5d8bd1a7728
Generic proximal penalty support
Tuomo Valkonen <tuomov@iki.fi>
parents:
36
diff
changeset
|
274 | break 'adapt_transport (maybe_d, within_tolerances, τv̆z) |
35 | 275 | } |
276 | }; | |
277 | ||
278 | stats.untransported_fraction = Some({ | |
279 | assert_eq!(μ_base_masses.len(), γ1.len()); | |
280 | let (a, b) = stats.untransported_fraction.unwrap_or((0.0, 0.0)); | |
281 | let source = μ_base_masses.iter().map(|v| v.abs()).sum(); | |
282 | (a + μ_base_minus_γ0.norm(Radon), b + source) | |
283 | }); | |
284 | stats.transport_error = Some({ | |
285 | assert_eq!(μ_base_masses.len(), γ1.len()); | |
286 | let (a, b) = stats.transport_error.unwrap_or((0.0, 0.0)); | |
287 | (a + μ.dist_matching(&γ1), b + γ1.norm(Radon)) | |
288 | }); | |
289 | ||
290 | // // Merge spikes. | |
291 | // // This expects the prune below to prune γ. | |
292 | // // TODO: This may not work correctly in all cases. | |
293 | // let ins = &config.insertion; | |
294 | // if ins.merge_now(&state) { | |
295 | // if let SpikeMergingMethod::None = ins.merging { | |
296 | // } else { | |
297 | // stats.merged += μ.merge_spikes(ins.merging, |μ_candidate| { | |
298 | // let ν = μ_candidate.sub_matching(&γ1)-&μ_base_minus_γ0; | |
299 | // let mut d = &τv̆ + op𝒟.preapply(ν); | |
300 | // reg.verify_merge_candidate(&mut d, μ_candidate, τ, ε, ins) | |
301 | // }); | |
302 | // } | |
303 | // } | |
304 | ||
305 | // Prune spikes with zero weight. To maintain correct ordering between μ and γ1, also the | |
306 | // latter needs to be pruned when μ is. | |
307 | // TODO: This could do with a two-vector Vec::retain to avoid copies. | |
308 | let μ_new = DiscreteMeasure::from_iter(μ.iter_spikes().filter(|δ| δ.α != F::ZERO).cloned()); | |
309 | if μ_new.len() != μ.len() { | |
310 | let mut μ_iter = μ.iter_spikes(); | |
311 | γ1.prune_by(|_| μ_iter.next().unwrap().α != F::ZERO); | |
312 | stats.pruned += μ.len() - μ_new.len(); | |
313 | μ = μ_new; | |
314 | } | |
315 | ||
316 | // Do z variable primal update | |
317 | z.axpy(-σ_p/τ, τz̆, 1.0); // TODO: simplify nasty factors | |
318 | opKz.adjoint().gemv(&mut z, -σ_p, &y, 1.0); | |
319 | z = fnR.prox(σ_p, z); | |
320 | // Do dual update | |
321 | // opKμ.gemv(&mut y, σ_d*(1.0 + ω), &μ, 1.0); // y = y + σ_d K[(1+ω)(μ,z)^{k+1}] | |
322 | opKz.gemv(&mut y, σ_d*(1.0 + ω), &z, 1.0); | |
323 | // opKμ.gemv(&mut y, -σ_d*ω, μ_base, 1.0);// y = y + σ_d K[(1+ω)(μ,z)^{k+1} - ω (μ,z)^k]-b | |
324 | opKz.gemv(&mut y, -σ_d*ω, z_base, 1.0);// y = y + σ_d K[(1+ω)(μ,z)^{k+1} - ω (μ,z)^k]-b | |
325 | y = starH.prox(σ_d, y); | |
326 | ||
327 | // Update residual | |
328 | residual = calculate_residual(Pair(&μ, &z), opA, b); | |
329 | ||
330 | // Update step length parameters | |
331 | // let ω = pdpsconfig.acceleration.accelerate(&mut τ, &mut σ, γ); | |
332 | ||
333 | // Give statistics if requested | |
334 | let iter = state.iteration(); | |
335 | stats.this_iters += 1; | |
336 | ||
337 | state.if_verbose(|| { | |
37
c5d8bd1a7728
Generic proximal penalty support
Tuomo Valkonen <tuomov@iki.fi>
parents:
36
diff
changeset
|
338 | plotter.plot_spikes(iter, maybe_d.as_ref(), Some(&τv̆), &μ); |
35 | 339 | full_stats(&residual, &μ, &z, ε, std::mem::replace(&mut stats, IterInfo::new())) |
340 | }); | |
341 | ||
342 | // Update main tolerance for next iteration | |
343 | ε = tolerance.update(ε, iter); | |
344 | } | |
345 | ||
346 | let fit = |μ̃ : &RNDM<F, N>| { | |
347 | (opA.apply(Pair(μ̃, &z))-b).norm2_squared_div2() | |
348 | //+ fnR.apply(z) + reg.apply(μ) | |
349 | + fnH.apply(/* opKμ.apply(&μ̃) + */ opKz.apply(&z)) | |
350 | }; | |
351 | ||
352 | μ.merge_spikes_fitness(config.insertion.merging, fit, |&v| v); | |
353 | μ.prune(); | |
354 | Pair(μ, z) | |
355 | } |