Fri, 02 Dec 2022 18:08:40 +0200
Remove ergodic tolerance; it's not useful.
| 0 | 1 | /*! |
| 2 | Solver for the point source localisation problem with primal-dual proximal splitting. | |
| 3 | ||
| 4 | This corresponds to the manuscript | |
| 5 | ||
| 6 | * Valkonen T. - _Proximal methods for point source localisation_. ARXIV TO INSERT. | |
| 7 | ||
| 8 | The main routine is [`pointsource_pdps`]. It is based on specilisatinn of | |
| 9 | [`generic_pointsource_fb`] through relevant [`FBSpecialisation`] implementations. | |
| 10 | Both norm-2-squared and norm-1 data terms are supported. That is, implemented are solvers for | |
| 11 | <div> | |
| 12 | $$ | |
| 13 | \min_{μ ∈ ℳ(Ω)}~ F_0(Aμ - b) + α \|μ\|_{ℳ(Ω)} + δ_{≥ 0}(μ), | |
| 14 | $$ | |
| 15 | for both $F_0(y)=\frac{1}{2}\|y\|_2^2$ and $F_0(y)=\|y\|_1$ with the forward operator | |
| 16 | $A \in 𝕃(ℳ(Ω); ℝ^n)$. | |
| 17 | </div> | |
| 18 | ||
| 19 | ## Approach | |
| 20 | ||
| 21 | <p> | |
| 22 | The problem above can be written as | |
| 23 | $$ | |
| 24 | \min_μ \max_y G(μ) + ⟨y, Aμ-b⟩ - F_0^*(μ), | |
| 25 | $$ | |
| 26 | where $G(μ) = α \|μ\|_{ℳ(Ω)} + δ_{≥ 0}(μ)$. | |
| 27 | The Fenchel–Rockafellar optimality conditions, employing the predual in $ℳ(Ω)$, are | |
| 28 | $$ | |
| 29 | 0 ∈ A_*y + ∂G(μ) | |
| 30 | \quad\text{and}\quad | |
| 31 | Aμ - b ∈ ∂ F_0^*(y). | |
| 32 | $$ | |
| 33 | The solution of the first part is as for forward-backward, treated in the manuscript. | |
| 34 | This is the task of <code>generic_pointsource_fb</code>, where we use <code>FBSpecialisation</code> | |
| 35 | to replace the specific residual $Aμ-b$ by $y$. | |
| 36 | For $F_0(y)=\frac{1}{2}\|y\|_2^2$ the second part reads $y = Aμ -b$. | |
| 37 | For $F_0(y)=\|y\|_1$ the second part reads $y ∈ ∂\|·\|_1(Aμ - b)$. | |
| 38 | </p> | |
| 39 | ||
| 40 | Based on zero initialisation for $μ$, we use the [`Subdifferentiable`] trait to make an | |
| 41 | initialisation corresponding to the second part of the optimality conditions. | |
| 42 | In the algorithm itself, standard proximal steps are taking with respect to $F\_0^* + ⟨b, ·⟩$. | |
| 43 | */ | |
| 44 | ||
| 45 | use numeric_literals::replace_float_literals; | |
| 46 | use serde::{Serialize, Deserialize}; | |
| 47 | use nalgebra::DVector; | |
| 48 | use clap::ValueEnum; | |
| 49 | ||
| 50 | use alg_tools::iterate:: AlgIteratorFactory; | |
| 51 | use alg_tools::sets::Cube; | |
| 52 | use alg_tools::loc::Loc; | |
| 53 | use alg_tools::euclidean::Euclidean; | |
| 54 | use alg_tools::norms::{ | |
| 55 | L1, Linfinity, | |
| 56 | Projection, Norm, | |
| 57 | }; | |
| 58 | use alg_tools::bisection_tree::{ | |
| 59 | BTFN, | |
| 60 | PreBTFN, | |
| 61 | Bounds, | |
| 62 | BTNodeLookup, | |
| 63 | BTNode, | |
| 64 | BTSearch, | |
| 65 | P2Minimise, | |
| 66 | SupportGenerator, | |
| 67 | LocalAnalysis, | |
| 68 | }; | |
| 69 | use alg_tools::mapping::RealMapping; | |
| 70 | use alg_tools::nalgebra_support::ToNalgebraRealField; | |
| 71 | use alg_tools::linops::AXPY; | |
| 72 | ||
| 73 | use crate::types::*; | |
| 74 | use crate::measures::DiscreteMeasure; | |
| 75 | use crate::measures::merging::{ | |
| 76 | SpikeMerging, | |
| 77 | }; | |
| 78 | use crate::forward_model::ForwardModel; | |
| 79 | use crate::seminorms::{ | |
| 80 | DiscreteMeasureOp, Lipschitz | |
| 81 | }; | |
| 82 | use crate::plot::{ | |
| 83 | SeqPlotter, | |
| 84 | Plotting, | |
| 85 | PlotLookup | |
| 86 | }; | |
| 87 | use crate::fb::{ | |
| 88 | FBGenericConfig, | |
| 89 | FBSpecialisation, | |
| 90 | generic_pointsource_fb | |
| 91 | }; | |
| 92 | ||
| 93 | /// Acceleration | |
| 94 | #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, ValueEnum, Debug)] | |
| 95 | pub enum Acceleration { | |
| 96 | /// No acceleration | |
| 97 | #[clap(name = "none")] | |
| 98 | None, | |
| 99 | /// Partial acceleration, $ω = 1/\sqrt{1+σ}$ | |
| 100 | #[clap(name = "partial", help = "Partial acceleration, ω = 1/√(1+σ)")] | |
| 101 | Partial, | |
| 102 | /// Full acceleration, $ω = 1/\sqrt{1+2σ}$; no gap convergence guaranteed | |
| 103 | #[clap(name = "full", help = "Full acceleration, ω = 1/√(1+2σ); no gap convergence guaranteed")] | |
| 104 | Full | |
| 105 | } | |
| 106 | ||
| 107 | /// Settings for [`pointsource_pdps`]. | |
| 108 | #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] | |
| 109 | #[serde(default)] | |
| 110 | pub struct PDPSConfig<F : Float> { | |
| 111 | /// Primal step length scaling. We must have `τ0 * σ0 < 1`. | |
| 112 | pub τ0 : F, | |
| 113 | /// Dual step length scaling. We must have `τ0 * σ0 < 1`. | |
| 114 | pub σ0 : F, | |
| 115 | /// Accelerate if available | |
| 116 | pub acceleration : Acceleration, | |
| 117 | /// Generic parameters | |
| 118 | pub insertion : FBGenericConfig<F>, | |
| 119 | } | |
| 120 | ||
| 121 | #[replace_float_literals(F::cast_from(literal))] | |
| 122 | impl<F : Float> Default for PDPSConfig<F> { | |
| 123 | fn default() -> Self { | |
| 124 | let τ0 = 0.5; | |
| 125 | PDPSConfig { | |
| 126 | τ0, | |
| 127 | σ0 : 0.99/τ0, | |
| 128 | acceleration : Acceleration::Partial, | |
| 129 | insertion : Default::default() | |
| 130 | } | |
| 131 | } | |
| 132 | } | |
| 133 | ||
| 134 | /// Trait for subdifferentiable objects | |
| 135 | pub trait Subdifferentiable<F : Float, V, U=V> { | |
| 136 | /// Calculate some subdifferential at `x` | |
| 137 | fn some_subdifferential(&self, x : V) -> U; | |
| 138 | } | |
| 139 | ||
| 140 | /// Type for indicating norm-2-squared data fidelity. | |
| 141 | pub struct L2Squared; | |
| 142 | ||
| 143 | impl<F : Float, V : Euclidean<F>> Subdifferentiable<F, V> for L2Squared { | |
| 144 | fn some_subdifferential(&self, x : V) -> V { x } | |
| 145 | } | |
| 146 | ||
| 147 | impl<F : Float + nalgebra::RealField> Subdifferentiable<F, DVector<F>> for L1 { | |
| 148 | fn some_subdifferential(&self, mut x : DVector<F>) -> DVector<F> { | |
| 149 | // nalgebra sucks for providing second copies of the same stuff that's elsewhere as well. | |
| 150 | x.iter_mut() | |
| 151 | .for_each(|v| if *v != F::ZERO { *v = *v/<F as NumTraitsFloat>::abs(*v) }); | |
| 152 | x | |
| 153 | } | |
| 154 | } | |
| 155 | ||
| 156 | /// Specialisation of [`generic_pointsource_fb`] to PDPS. | |
| 157 | pub struct PDPS< | |
| 158 | 'a, | |
| 159 | F : Float + ToNalgebraRealField, | |
| 160 | A : ForwardModel<Loc<F, N>, F>, | |
| 161 | D, | |
| 162 | const N : usize | |
| 163 | > { | |
| 164 | /// The data | |
| 165 | b : &'a A::Observable, | |
| 166 | /// The forward operator | |
| 167 | opA : &'a A, | |
| 168 | /// Primal step length | |
| 169 | τ : F, | |
| 170 | // Dual step length | |
| 171 | σ : F, | |
| 172 | /// Whether acceleration should be applied (if data term supports) | |
| 173 | acceleration : Acceleration, | |
| 174 | /// The dataterm. Only used by the type system. | |
| 175 | _dataterm : D, | |
| 176 | /// Previous dual iterate. | |
| 177 | y_prev : A::Observable, | |
| 178 | } | |
| 179 | ||
| 180 | /// Implementation of [`FBSpecialisation`] for μPDPS with norm-2-squared data fidelity. | |
| 181 | #[replace_float_literals(F::cast_from(literal))] | |
| 182 | impl< | |
| 183 | 'a, | |
| 184 | F : Float + ToNalgebraRealField, | |
| 185 | A : ForwardModel<Loc<F, N>, F>, | |
| 186 | const N : usize | |
| 187 | > FBSpecialisation<F, A::Observable, N> for PDPS<'a, F, A, L2Squared, N> | |
| 188 | where for<'b> &'b A::Observable : std::ops::Add<A::Observable, Output=A::Observable> { | |
| 189 | ||
| 190 | fn update( | |
| 191 | &mut self, | |
| 192 | μ : &mut DiscreteMeasure<Loc<F, N>, F>, | |
| 193 | μ_base : &DiscreteMeasure<Loc<F, N>, F> | |
| 194 | ) -> (A::Observable, Option<F>) { | |
| 195 | let σ = self.σ; | |
| 196 | let τ = self.τ; | |
| 197 | let ω = match self.acceleration { | |
| 198 | Acceleration::None => 1.0, | |
| 199 | Acceleration::Partial => { | |
| 200 | let ω = 1.0 / (1.0 + σ).sqrt(); | |
| 201 | self.σ = σ * ω; | |
| 202 | self.τ = τ / ω; | |
| 203 | ω | |
| 204 | }, | |
| 205 | Acceleration::Full => { | |
| 206 | let ω = 1.0 / (1.0 + 2.0 * σ).sqrt(); | |
| 207 | self.σ = σ * ω; | |
| 208 | self.τ = τ / ω; | |
| 209 | ω | |
| 210 | }, | |
| 211 | }; | |
| 212 | ||
| 213 | μ.prune(); | |
| 214 | ||
| 215 | let mut y = self.b.clone(); | |
| 216 | self.opA.gemv(&mut y, 1.0 + ω, μ, -1.0); | |
| 217 | self.opA.gemv(&mut y, -ω, μ_base, 1.0); | |
| 218 | y.axpy(1.0 / (1.0 + σ), &self.y_prev, σ / (1.0 + σ)); | |
| 219 | self.y_prev.copy_from(&y); | |
| 220 | ||
| 221 | (y, Some(self.τ)) | |
| 222 | } | |
| 223 | ||
| 224 | fn calculate_fit( | |
| 225 | &self, | |
| 226 | μ : &DiscreteMeasure<Loc<F, N>, F>, | |
| 227 | _y : &A::Observable | |
| 228 | ) -> F { | |
| 229 | self.calculate_fit_simple(μ) | |
| 230 | } | |
| 231 | ||
| 232 | fn calculate_fit_simple( | |
| 233 | &self, | |
| 234 | μ : &DiscreteMeasure<Loc<F, N>, F>, | |
| 235 | ) -> F { | |
| 236 | let mut residual = self.b.clone(); | |
| 237 | self.opA.gemv(&mut residual, 1.0, μ, -1.0); | |
| 238 | residual.norm2_squared_div2() | |
| 239 | } | |
| 240 | } | |
| 241 | ||
| 242 | /// Implementation of [`FBSpecialisation`] for μPDPS with norm-1 data fidelity. | |
| 243 | #[replace_float_literals(F::cast_from(literal))] | |
| 244 | impl< | |
| 245 | 'a, | |
| 246 | F : Float + ToNalgebraRealField, | |
| 247 | A : ForwardModel<Loc<F, N>, F>, | |
| 248 | const N : usize | |
| 249 | > FBSpecialisation<F, A::Observable, N> for PDPS<'a, F, A, L1, N> | |
| 250 | where A::Observable : Projection<F, Linfinity> + Norm<F, L1>, | |
| 251 | for<'b> &'b A::Observable : std::ops::Add<A::Observable, Output=A::Observable> { | |
| 252 | fn update( | |
| 253 | &mut self, | |
| 254 | μ : &mut DiscreteMeasure<Loc<F, N>, F>, | |
| 255 | μ_base : &DiscreteMeasure<Loc<F, N>, F> | |
| 256 | ) -> (A::Observable, Option<F>) { | |
| 257 | let σ = self.σ; | |
| 258 | ||
| 259 | μ.prune(); | |
| 260 | ||
| 261 | //let ȳ = self.opA.apply(μ) * 2.0 - self.opA.apply(μ_base); | |
| 262 | //*y = proj_{[-1,1]}(&self.y_prev + (ȳ - self.b) * σ) | |
| 263 | let mut y = self.y_prev.clone(); | |
| 264 | self.opA.gemv(&mut y, 2.0 * σ, μ, 1.0); | |
| 265 | self.opA.gemv(&mut y, -σ, μ_base, 1.0); | |
| 266 | y.axpy(-σ, self.b, 1.0); | |
| 267 | y.proj_ball_mut(1.0, Linfinity); | |
| 268 | self.y_prev.copy_from(&y); | |
| 269 | ||
| 270 | (y, None) | |
| 271 | } | |
| 272 | ||
| 273 | fn calculate_fit( | |
| 274 | &self, | |
| 275 | μ : &DiscreteMeasure<Loc<F, N>, F>, | |
| 276 | _y : &A::Observable | |
| 277 | ) -> F { | |
| 278 | self.calculate_fit_simple(μ) | |
| 279 | } | |
| 280 | ||
| 281 | fn calculate_fit_simple( | |
| 282 | &self, | |
| 283 | μ : &DiscreteMeasure<Loc<F, N>, F>, | |
| 284 | ) -> F { | |
| 285 | let mut residual = self.b.clone(); | |
| 286 | self.opA.gemv(&mut residual, 1.0, μ, -1.0); | |
| 287 | residual.norm(L1) | |
| 288 | } | |
| 289 | } | |
| 290 | ||
| 291 | /// Iteratively solve the pointsource localisation problem using primal-dual proximal splitting. | |
| 292 | /// | |
| 293 | /// The `dataterm` should be either [`L1`] for norm-1 data term or [`L2Squared`] for norm-2-squared. | |
| 294 | /// The settings in `config` have their [respective documentation](PDPSConfig). `opA` is the | |
| 295 | /// forward operator $A$, $b$ the observable, and $\lambda$ the regularisation weight. | |
| 296 | /// The operator `op𝒟` is used for forming the proximal term. Typically it is a convolution | |
| 297 | /// operator. Finally, the `iterator` is an outer loop verbosity and iteration count control | |
| 298 | /// as documented in [`alg_tools::iterate`]. | |
| 299 | /// | |
| 300 | /// For the mathematical formulation, see the [module level](self) documentation and the manuscript. | |
| 301 | /// | |
| 302 | /// Returns the final iterate. | |
| 303 | #[replace_float_literals(F::cast_from(literal))] | |
| 304 | pub fn pointsource_pdps<'a, F, I, A, GA, 𝒟, BTA, G𝒟, S, K, D, const N : usize>( | |
| 305 | opA : &'a A, | |
| 306 | b : &'a A::Observable, | |
| 307 | α : F, | |
| 308 | op𝒟 : &'a 𝒟, | |
| 309 | config : &PDPSConfig<F>, | |
| 310 | iterator : I, | |
| 311 | plotter : SeqPlotter<F, N>, | |
| 312 | dataterm : D, | |
| 313 | ) -> DiscreteMeasure<Loc<F, N>, F> | |
| 314 | where F : Float + ToNalgebraRealField, | |
| 315 | I : AlgIteratorFactory<IterInfo<F, N>>, | |
| 316 | for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable> | |
| 317 | + std::ops::Add<A::Observable, Output=A::Observable>, | |
| 318 | //+ std::ops::Mul<F, Output=A::Observable>, // <-- FIXME: compiler overflow | |
| 319 | A::Observable : std::ops::MulAssign<F>, | |
| 320 | GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone, | |
| 321 | A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>> | |
| 322 | + Lipschitz<𝒟, FloatType=F>, | |
| 323 | BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>, | |
| 324 | G𝒟 : SupportGenerator<F, N, SupportType = K, Id = usize> + Clone, | |
| 325 | 𝒟 : DiscreteMeasureOp<Loc<F, N>, F, PreCodomain = PreBTFN<F, G𝒟, N>>, | |
| 326 | 𝒟::Codomain : RealMapping<F, N>, | |
| 327 | S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, | |
| 328 | K: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, | |
| 329 | BTNodeLookup: BTNode<F, usize, Bounds<F>, N>, | |
| 330 | Cube<F, N>: P2Minimise<Loc<F, N>, F>, | |
| 331 | PlotLookup : Plotting<N>, | |
| 332 | DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>, | |
| 333 | PDPS<'a, F, A, D, N> : FBSpecialisation<F, A::Observable, N>, | |
| 334 | D : Subdifferentiable<F, A::Observable> { | |
| 335 | ||
| 336 | let y = dataterm.some_subdifferential(-b); | |
| 337 | let l = opA.lipschitz_factor(&op𝒟).unwrap().sqrt(); | |
| 338 | let τ = config.τ0 / l; | |
| 339 | let σ = config.σ0 / l; | |
| 340 | ||
| 341 | let pdps = PDPS { | |
| 342 | b, | |
| 343 | opA, | |
| 344 | τ, | |
| 345 | σ, | |
| 346 | acceleration : config.acceleration, | |
| 347 | _dataterm : dataterm, | |
| 348 | y_prev : y.clone(), | |
| 349 | }; | |
| 350 | ||
| 351 | generic_pointsource_fb( | |
| 352 | opA, α, op𝒟, τ, &config.insertion, iterator, plotter, y, | |
| 353 | pdps | |
| 354 | ) | |
| 355 | } |