Wed, 07 Dec 2022 06:54:56 +0200
arXiv links, README beautification
0 | 1 | /*! |
2 | Solver for the point source localisation problem with primal-dual proximal splitting. | |
3 | ||
4 | This corresponds to the manuscript | |
5 | ||
13
bdc57366d4f5
arXiv links, README beautification
Tuomo Valkonen <tuomov@iki.fi>
parents:
0
diff
changeset
|
6 | * Valkonen T. - _Proximal methods for point source localisation_, |
bdc57366d4f5
arXiv links, README beautification
Tuomo Valkonen <tuomov@iki.fi>
parents:
0
diff
changeset
|
7 | [arXiv:2212.02991](https://arxiv.org/abs/2212.02991). |
0 | 8 | |
9 | The main routine is [`pointsource_pdps`]. It is based on specilisatinn of | |
10 | [`generic_pointsource_fb`] through relevant [`FBSpecialisation`] implementations. | |
11 | Both norm-2-squared and norm-1 data terms are supported. That is, implemented are solvers for | |
12 | <div> | |
13 | $$ | |
14 | \min_{μ ∈ ℳ(Ω)}~ F_0(Aμ - b) + α \|μ\|_{ℳ(Ω)} + δ_{≥ 0}(μ), | |
15 | $$ | |
16 | for both $F_0(y)=\frac{1}{2}\|y\|_2^2$ and $F_0(y)=\|y\|_1$ with the forward operator | |
17 | $A \in 𝕃(ℳ(Ω); ℝ^n)$. | |
18 | </div> | |
19 | ||
20 | ## Approach | |
21 | ||
22 | <p> | |
23 | The problem above can be written as | |
24 | $$ | |
25 | \min_μ \max_y G(μ) + ⟨y, Aμ-b⟩ - F_0^*(μ), | |
26 | $$ | |
27 | where $G(μ) = α \|μ\|_{ℳ(Ω)} + δ_{≥ 0}(μ)$. | |
28 | The Fenchel–Rockafellar optimality conditions, employing the predual in $ℳ(Ω)$, are | |
29 | $$ | |
30 | 0 ∈ A_*y + ∂G(μ) | |
31 | \quad\text{and}\quad | |
32 | Aμ - b ∈ ∂ F_0^*(y). | |
33 | $$ | |
34 | The solution of the first part is as for forward-backward, treated in the manuscript. | |
35 | This is the task of <code>generic_pointsource_fb</code>, where we use <code>FBSpecialisation</code> | |
36 | to replace the specific residual $Aμ-b$ by $y$. | |
37 | For $F_0(y)=\frac{1}{2}\|y\|_2^2$ the second part reads $y = Aμ -b$. | |
38 | For $F_0(y)=\|y\|_1$ the second part reads $y ∈ ∂\|·\|_1(Aμ - b)$. | |
39 | </p> | |
40 | ||
41 | Based on zero initialisation for $μ$, we use the [`Subdifferentiable`] trait to make an | |
42 | initialisation corresponding to the second part of the optimality conditions. | |
43 | In the algorithm itself, standard proximal steps are taking with respect to $F\_0^* + ⟨b, ·⟩$. | |
44 | */ | |
45 | ||
46 | use numeric_literals::replace_float_literals; | |
47 | use serde::{Serialize, Deserialize}; | |
48 | use nalgebra::DVector; | |
49 | use clap::ValueEnum; | |
50 | ||
51 | use alg_tools::iterate:: AlgIteratorFactory; | |
52 | use alg_tools::sets::Cube; | |
53 | use alg_tools::loc::Loc; | |
54 | use alg_tools::euclidean::Euclidean; | |
55 | use alg_tools::norms::{ | |
56 | L1, Linfinity, | |
57 | Projection, Norm, | |
58 | }; | |
59 | use alg_tools::bisection_tree::{ | |
60 | BTFN, | |
61 | PreBTFN, | |
62 | Bounds, | |
63 | BTNodeLookup, | |
64 | BTNode, | |
65 | BTSearch, | |
66 | P2Minimise, | |
67 | SupportGenerator, | |
68 | LocalAnalysis, | |
69 | }; | |
70 | use alg_tools::mapping::RealMapping; | |
71 | use alg_tools::nalgebra_support::ToNalgebraRealField; | |
72 | use alg_tools::linops::AXPY; | |
73 | ||
74 | use crate::types::*; | |
75 | use crate::measures::DiscreteMeasure; | |
76 | use crate::measures::merging::{ | |
77 | SpikeMerging, | |
78 | }; | |
79 | use crate::forward_model::ForwardModel; | |
80 | use crate::seminorms::{ | |
81 | DiscreteMeasureOp, Lipschitz | |
82 | }; | |
83 | use crate::plot::{ | |
84 | SeqPlotter, | |
85 | Plotting, | |
86 | PlotLookup | |
87 | }; | |
88 | use crate::fb::{ | |
89 | FBGenericConfig, | |
90 | FBSpecialisation, | |
91 | generic_pointsource_fb | |
92 | }; | |
93 | ||
94 | /// Acceleration | |
95 | #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, ValueEnum, Debug)] | |
96 | pub enum Acceleration { | |
97 | /// No acceleration | |
98 | #[clap(name = "none")] | |
99 | None, | |
100 | /// Partial acceleration, $ω = 1/\sqrt{1+σ}$ | |
101 | #[clap(name = "partial", help = "Partial acceleration, ω = 1/√(1+σ)")] | |
102 | Partial, | |
103 | /// Full acceleration, $ω = 1/\sqrt{1+2σ}$; no gap convergence guaranteed | |
104 | #[clap(name = "full", help = "Full acceleration, ω = 1/√(1+2σ); no gap convergence guaranteed")] | |
105 | Full | |
106 | } | |
107 | ||
108 | /// Settings for [`pointsource_pdps`]. | |
109 | #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] | |
110 | #[serde(default)] | |
111 | pub struct PDPSConfig<F : Float> { | |
112 | /// Primal step length scaling. We must have `τ0 * σ0 < 1`. | |
113 | pub τ0 : F, | |
114 | /// Dual step length scaling. We must have `τ0 * σ0 < 1`. | |
115 | pub σ0 : F, | |
116 | /// Accelerate if available | |
117 | pub acceleration : Acceleration, | |
118 | /// Generic parameters | |
119 | pub insertion : FBGenericConfig<F>, | |
120 | } | |
121 | ||
122 | #[replace_float_literals(F::cast_from(literal))] | |
123 | impl<F : Float> Default for PDPSConfig<F> { | |
124 | fn default() -> Self { | |
125 | let τ0 = 0.5; | |
126 | PDPSConfig { | |
127 | τ0, | |
128 | σ0 : 0.99/τ0, | |
129 | acceleration : Acceleration::Partial, | |
130 | insertion : Default::default() | |
131 | } | |
132 | } | |
133 | } | |
134 | ||
135 | /// Trait for subdifferentiable objects | |
136 | pub trait Subdifferentiable<F : Float, V, U=V> { | |
137 | /// Calculate some subdifferential at `x` | |
138 | fn some_subdifferential(&self, x : V) -> U; | |
139 | } | |
140 | ||
141 | /// Type for indicating norm-2-squared data fidelity. | |
142 | pub struct L2Squared; | |
143 | ||
144 | impl<F : Float, V : Euclidean<F>> Subdifferentiable<F, V> for L2Squared { | |
145 | fn some_subdifferential(&self, x : V) -> V { x } | |
146 | } | |
147 | ||
148 | impl<F : Float + nalgebra::RealField> Subdifferentiable<F, DVector<F>> for L1 { | |
149 | fn some_subdifferential(&self, mut x : DVector<F>) -> DVector<F> { | |
150 | // nalgebra sucks for providing second copies of the same stuff that's elsewhere as well. | |
151 | x.iter_mut() | |
152 | .for_each(|v| if *v != F::ZERO { *v = *v/<F as NumTraitsFloat>::abs(*v) }); | |
153 | x | |
154 | } | |
155 | } | |
156 | ||
157 | /// Specialisation of [`generic_pointsource_fb`] to PDPS. | |
158 | pub struct PDPS< | |
159 | 'a, | |
160 | F : Float + ToNalgebraRealField, | |
161 | A : ForwardModel<Loc<F, N>, F>, | |
162 | D, | |
163 | const N : usize | |
164 | > { | |
165 | /// The data | |
166 | b : &'a A::Observable, | |
167 | /// The forward operator | |
168 | opA : &'a A, | |
169 | /// Primal step length | |
170 | τ : F, | |
171 | // Dual step length | |
172 | σ : F, | |
173 | /// Whether acceleration should be applied (if data term supports) | |
174 | acceleration : Acceleration, | |
175 | /// The dataterm. Only used by the type system. | |
176 | _dataterm : D, | |
177 | /// Previous dual iterate. | |
178 | y_prev : A::Observable, | |
179 | } | |
180 | ||
181 | /// Implementation of [`FBSpecialisation`] for μPDPS with norm-2-squared data fidelity. | |
182 | #[replace_float_literals(F::cast_from(literal))] | |
183 | impl< | |
184 | 'a, | |
185 | F : Float + ToNalgebraRealField, | |
186 | A : ForwardModel<Loc<F, N>, F>, | |
187 | const N : usize | |
188 | > FBSpecialisation<F, A::Observable, N> for PDPS<'a, F, A, L2Squared, N> | |
189 | where for<'b> &'b A::Observable : std::ops::Add<A::Observable, Output=A::Observable> { | |
190 | ||
191 | fn update( | |
192 | &mut self, | |
193 | μ : &mut DiscreteMeasure<Loc<F, N>, F>, | |
194 | μ_base : &DiscreteMeasure<Loc<F, N>, F> | |
195 | ) -> (A::Observable, Option<F>) { | |
196 | let σ = self.σ; | |
197 | let τ = self.τ; | |
198 | let ω = match self.acceleration { | |
199 | Acceleration::None => 1.0, | |
200 | Acceleration::Partial => { | |
201 | let ω = 1.0 / (1.0 + σ).sqrt(); | |
202 | self.σ = σ * ω; | |
203 | self.τ = τ / ω; | |
204 | ω | |
205 | }, | |
206 | Acceleration::Full => { | |
207 | let ω = 1.0 / (1.0 + 2.0 * σ).sqrt(); | |
208 | self.σ = σ * ω; | |
209 | self.τ = τ / ω; | |
210 | ω | |
211 | }, | |
212 | }; | |
213 | ||
214 | μ.prune(); | |
215 | ||
216 | let mut y = self.b.clone(); | |
217 | self.opA.gemv(&mut y, 1.0 + ω, μ, -1.0); | |
218 | self.opA.gemv(&mut y, -ω, μ_base, 1.0); | |
219 | y.axpy(1.0 / (1.0 + σ), &self.y_prev, σ / (1.0 + σ)); | |
220 | self.y_prev.copy_from(&y); | |
221 | ||
222 | (y, Some(self.τ)) | |
223 | } | |
224 | ||
225 | fn calculate_fit( | |
226 | &self, | |
227 | μ : &DiscreteMeasure<Loc<F, N>, F>, | |
228 | _y : &A::Observable | |
229 | ) -> F { | |
230 | self.calculate_fit_simple(μ) | |
231 | } | |
232 | ||
233 | fn calculate_fit_simple( | |
234 | &self, | |
235 | μ : &DiscreteMeasure<Loc<F, N>, F>, | |
236 | ) -> F { | |
237 | let mut residual = self.b.clone(); | |
238 | self.opA.gemv(&mut residual, 1.0, μ, -1.0); | |
239 | residual.norm2_squared_div2() | |
240 | } | |
241 | } | |
242 | ||
243 | /// Implementation of [`FBSpecialisation`] for μPDPS with norm-1 data fidelity. | |
244 | #[replace_float_literals(F::cast_from(literal))] | |
245 | impl< | |
246 | 'a, | |
247 | F : Float + ToNalgebraRealField, | |
248 | A : ForwardModel<Loc<F, N>, F>, | |
249 | const N : usize | |
250 | > FBSpecialisation<F, A::Observable, N> for PDPS<'a, F, A, L1, N> | |
251 | where A::Observable : Projection<F, Linfinity> + Norm<F, L1>, | |
252 | for<'b> &'b A::Observable : std::ops::Add<A::Observable, Output=A::Observable> { | |
253 | fn update( | |
254 | &mut self, | |
255 | μ : &mut DiscreteMeasure<Loc<F, N>, F>, | |
256 | μ_base : &DiscreteMeasure<Loc<F, N>, F> | |
257 | ) -> (A::Observable, Option<F>) { | |
258 | let σ = self.σ; | |
259 | ||
260 | μ.prune(); | |
261 | ||
262 | //let ȳ = self.opA.apply(μ) * 2.0 - self.opA.apply(μ_base); | |
263 | //*y = proj_{[-1,1]}(&self.y_prev + (ȳ - self.b) * σ) | |
264 | let mut y = self.y_prev.clone(); | |
265 | self.opA.gemv(&mut y, 2.0 * σ, μ, 1.0); | |
266 | self.opA.gemv(&mut y, -σ, μ_base, 1.0); | |
267 | y.axpy(-σ, self.b, 1.0); | |
268 | y.proj_ball_mut(1.0, Linfinity); | |
269 | self.y_prev.copy_from(&y); | |
270 | ||
271 | (y, None) | |
272 | } | |
273 | ||
274 | fn calculate_fit( | |
275 | &self, | |
276 | μ : &DiscreteMeasure<Loc<F, N>, F>, | |
277 | _y : &A::Observable | |
278 | ) -> F { | |
279 | self.calculate_fit_simple(μ) | |
280 | } | |
281 | ||
282 | fn calculate_fit_simple( | |
283 | &self, | |
284 | μ : &DiscreteMeasure<Loc<F, N>, F>, | |
285 | ) -> F { | |
286 | let mut residual = self.b.clone(); | |
287 | self.opA.gemv(&mut residual, 1.0, μ, -1.0); | |
288 | residual.norm(L1) | |
289 | } | |
290 | } | |
291 | ||
292 | /// Iteratively solve the pointsource localisation problem using primal-dual proximal splitting. | |
293 | /// | |
294 | /// The `dataterm` should be either [`L1`] for norm-1 data term or [`L2Squared`] for norm-2-squared. | |
295 | /// The settings in `config` have their [respective documentation](PDPSConfig). `opA` is the | |
296 | /// forward operator $A$, $b$ the observable, and $\lambda$ the regularisation weight. | |
297 | /// The operator `op𝒟` is used for forming the proximal term. Typically it is a convolution | |
298 | /// operator. Finally, the `iterator` is an outer loop verbosity and iteration count control | |
299 | /// as documented in [`alg_tools::iterate`]. | |
300 | /// | |
301 | /// For the mathematical formulation, see the [module level](self) documentation and the manuscript. | |
302 | /// | |
303 | /// Returns the final iterate. | |
304 | #[replace_float_literals(F::cast_from(literal))] | |
305 | pub fn pointsource_pdps<'a, F, I, A, GA, 𝒟, BTA, G𝒟, S, K, D, const N : usize>( | |
306 | opA : &'a A, | |
307 | b : &'a A::Observable, | |
308 | α : F, | |
309 | op𝒟 : &'a 𝒟, | |
310 | config : &PDPSConfig<F>, | |
311 | iterator : I, | |
312 | plotter : SeqPlotter<F, N>, | |
313 | dataterm : D, | |
314 | ) -> DiscreteMeasure<Loc<F, N>, F> | |
315 | where F : Float + ToNalgebraRealField, | |
316 | I : AlgIteratorFactory<IterInfo<F, N>>, | |
317 | for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable> | |
318 | + std::ops::Add<A::Observable, Output=A::Observable>, | |
319 | //+ std::ops::Mul<F, Output=A::Observable>, // <-- FIXME: compiler overflow | |
320 | A::Observable : std::ops::MulAssign<F>, | |
321 | GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone, | |
322 | A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>> | |
323 | + Lipschitz<𝒟, FloatType=F>, | |
324 | BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>, | |
325 | G𝒟 : SupportGenerator<F, N, SupportType = K, Id = usize> + Clone, | |
326 | 𝒟 : DiscreteMeasureOp<Loc<F, N>, F, PreCodomain = PreBTFN<F, G𝒟, N>>, | |
327 | 𝒟::Codomain : RealMapping<F, N>, | |
328 | S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, | |
329 | K: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, | |
330 | BTNodeLookup: BTNode<F, usize, Bounds<F>, N>, | |
331 | Cube<F, N>: P2Minimise<Loc<F, N>, F>, | |
332 | PlotLookup : Plotting<N>, | |
333 | DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>, | |
334 | PDPS<'a, F, A, D, N> : FBSpecialisation<F, A::Observable, N>, | |
335 | D : Subdifferentiable<F, A::Observable> { | |
336 | ||
337 | let y = dataterm.some_subdifferential(-b); | |
338 | let l = opA.lipschitz_factor(&op𝒟).unwrap().sqrt(); | |
339 | let τ = config.τ0 / l; | |
340 | let σ = config.σ0 / l; | |
341 | ||
342 | let pdps = PDPS { | |
343 | b, | |
344 | opA, | |
345 | τ, | |
346 | σ, | |
347 | acceleration : config.acceleration, | |
348 | _dataterm : dataterm, | |
349 | y_prev : y.clone(), | |
350 | }; | |
351 | ||
352 | generic_pointsource_fb( | |
353 | opA, α, op𝒟, τ, &config.insertion, iterator, plotter, y, | |
354 | pdps | |
355 | ) | |
356 | } |