Sun, 11 Dec 2022 23:19:17 +0200
Print out experiment information when running it
| 0 | 1 | /*! |
| 2 | Solver for the point source localisation problem using a forward-backward splitting method. | |
| 3 | ||
| 4 | This corresponds to the manuscript | |
| 5 | ||
|
13
bdc57366d4f5
arXiv links, README beautification
Tuomo Valkonen <tuomov@iki.fi>
parents:
8
diff
changeset
|
6 | * Valkonen T. - _Proximal methods for point source localisation_, |
|
bdc57366d4f5
arXiv links, README beautification
Tuomo Valkonen <tuomov@iki.fi>
parents:
8
diff
changeset
|
7 | [arXiv:2212.02991](https://arxiv.org/abs/2212.02991). |
| 0 | 8 | |
| 9 | The main routine is [`pointsource_fb`]. It is based on [`generic_pointsource_fb`], which is also | |
| 10 | used by our [primal-dual proximal splitting][crate::pdps] implementation. | |
| 11 | ||
| 12 | FISTA-type inertia can also be enabled through [`FBConfig::meta`]. | |
| 13 | ||
| 14 | ## Problem | |
| 15 | ||
| 16 | <p> | |
| 17 | Our objective is to solve | |
| 18 | $$ | |
| 19 | \min_{μ ∈ ℳ(Ω)}~ F_0(Aμ-b) + α \|μ\|_{ℳ(Ω)} + δ_{≥ 0}(μ), | |
| 20 | $$ | |
| 21 | where $F_0(y)=\frac{1}{2}\|y\|_2^2$ and the forward operator $A \in 𝕃(ℳ(Ω); ℝ^n)$. | |
| 22 | </p> | |
| 23 | ||
| 24 | ## Approach | |
| 25 | ||
| 26 | <p> | |
| 27 | As documented in more detail in the paper, on each step we approximately solve | |
| 28 | $$ | |
| 29 | \min_{μ ∈ ℳ(Ω)}~ F(x) + α \|μ\|_{ℳ(Ω)} + δ_{≥ 0}(x) + \frac{1}{2}\|μ-μ^k|_𝒟^2, | |
| 30 | $$ | |
| 31 | where $𝒟: 𝕃(ℳ(Ω); C_c(Ω))$ is typically a convolution operator. | |
| 32 | </p> | |
| 33 | ||
| 34 | ## Finite-dimensional subproblems. | |
| 35 | ||
| 36 | With $C$ a projection from [`DiscreteMeasure`] to the weights, and $x^k$ such that $x^k=Cμ^k$, we | |
| 37 | form the discretised linearised inner problem | |
| 38 | <p> | |
| 39 | $$ | |
| 40 | \min_{x ∈ ℝ^n}~ τ\bigl(F(Cx^k) + [C^*∇F(Cx^k)]^⊤(x-x^k) + α {\vec 1}^⊤ x\bigr) | |
| 41 | + δ_{≥ 0}(x) + \frac{1}{2}\|x-x^k\|_{C^*𝒟C}^2, | |
| 42 | $$ | |
| 43 | equivalently | |
| 44 | $$ | |
| 45 | \begin{aligned} | |
| 46 | \min_x~ & τF(Cx^k) - τ[C^*∇F(Cx^k)]^⊤x^k + \frac{1}{2} (x^k)^⊤ C^*𝒟C x^k | |
| 47 | \\ | |
| 48 | & | |
| 49 | - [C^*𝒟C x^k - τC^*∇F(Cx^k)]^⊤ x | |
| 50 | \\ | |
| 51 | & | |
| 52 | + \frac{1}{2} x^⊤ C^*𝒟C x | |
| 53 | + τα {\vec 1}^⊤ x + δ_{≥ 0}(x), | |
| 54 | \end{aligned} | |
| 55 | $$ | |
| 56 | In other words, we obtain the quadratic non-negativity constrained problem | |
| 57 | $$ | |
| 58 | \min_{x ∈ ℝ^n}~ \frac{1}{2} x^⊤ Ã x - b̃^⊤ x + c + τα {\vec 1}^⊤ x + δ_{≥ 0}(x). | |
| 59 | $$ | |
| 60 | where | |
| 61 | $$ | |
| 62 | \begin{aligned} | |
| 63 | Ã & = C^*𝒟C, | |
| 64 | \\ | |
| 65 | g̃ & = C^*𝒟C x^k - τ C^*∇F(Cx^k) | |
| 66 | = C^* 𝒟 μ^k - τ C^*A^*(Aμ^k - b) | |
| 67 | \\ | |
| 68 | c & = τ F(Cx^k) - τ[C^*∇F(Cx^k)]^⊤x^k + \frac{1}{2} (x^k)^⊤ C^*𝒟C x^k | |
| 69 | \\ | |
| 70 | & | |
| 71 | = \frac{τ}{2} \|Aμ^k-b\|^2 - τ[Aμ^k-b]^⊤Aμ^k + \frac{1}{2} \|μ_k\|_{𝒟}^2 | |
| 72 | \\ | |
| 73 | & | |
| 74 | = -\frac{τ}{2} \|Aμ^k-b\|^2 + τ[Aμ^k-b]^⊤ b + \frac{1}{2} \|μ_k\|_{𝒟}^2. | |
| 75 | \end{aligned} | |
| 76 | $$ | |
| 77 | </p> | |
| 78 | ||
| 79 | We solve this with either SSN or FB via [`quadratic_nonneg`] as determined by | |
| 80 | [`InnerSettings`] in [`FBGenericConfig::inner`]. | |
| 81 | */ | |
| 82 | ||
| 83 | use numeric_literals::replace_float_literals; | |
| 84 | use serde::{Serialize, Deserialize}; | |
| 85 | use colored::Colorize; | |
| 86 | use nalgebra::DVector; | |
| 87 | ||
| 88 | use alg_tools::iterate::{ | |
| 89 | AlgIteratorFactory, | |
| 90 | AlgIteratorState, | |
| 91 | }; | |
| 92 | use alg_tools::euclidean::Euclidean; | |
| 93 | use alg_tools::norms::Norm; | |
| 94 | use alg_tools::linops::Apply; | |
| 95 | use alg_tools::sets::Cube; | |
| 96 | use alg_tools::loc::Loc; | |
| 97 | use alg_tools::bisection_tree::{ | |
| 98 | BTFN, | |
| 99 | PreBTFN, | |
| 100 | Bounds, | |
| 101 | BTNodeLookup, | |
| 102 | BTNode, | |
| 103 | BTSearch, | |
| 104 | P2Minimise, | |
| 105 | SupportGenerator, | |
| 106 | LocalAnalysis, | |
| 107 | Bounded, | |
| 108 | }; | |
| 109 | use alg_tools::mapping::RealMapping; | |
| 110 | use alg_tools::nalgebra_support::ToNalgebraRealField; | |
| 111 | ||
| 112 | use crate::types::*; | |
| 113 | use crate::measures::{ | |
| 114 | DiscreteMeasure, | |
| 115 | DeltaMeasure, | |
| 116 | Radon | |
| 117 | }; | |
| 118 | use crate::measures::merging::{ | |
| 119 | SpikeMergingMethod, | |
| 120 | SpikeMerging, | |
| 121 | }; | |
| 122 | use crate::forward_model::ForwardModel; | |
| 123 | use crate::seminorms::{ | |
| 124 | DiscreteMeasureOp, Lipschitz | |
| 125 | }; | |
| 126 | use crate::subproblem::{ | |
| 127 | quadratic_nonneg, | |
| 128 | InnerSettings, | |
| 129 | InnerMethod, | |
| 130 | }; | |
| 131 | use crate::tolerance::Tolerance; | |
| 132 | use crate::plot::{ | |
| 133 | SeqPlotter, | |
| 134 | Plotting, | |
| 135 | PlotLookup | |
| 136 | }; | |
| 137 | ||
| 138 | /// Method for constructing $μ$ on each iteration | |
| 139 | #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] | |
| 140 | #[allow(dead_code)] | |
| 141 | pub enum InsertionStyle { | |
| 142 | /// Resuse previous $μ$ from previous iteration, optimising weights | |
| 143 | /// before inserting new spikes. | |
| 144 | Reuse, | |
| 145 | /// Start each iteration with $μ=0$. | |
| 146 | Zero, | |
| 147 | } | |
| 148 | ||
| 149 | /// Meta-algorithm type | |
| 150 | #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] | |
| 151 | #[allow(dead_code)] | |
| 152 | pub enum FBMetaAlgorithm { | |
| 153 | /// No meta-algorithm | |
| 154 | None, | |
| 155 | /// FISTA-style inertia | |
| 156 | InertiaFISTA, | |
| 157 | } | |
| 158 | ||
| 159 | /// Settings for [`pointsource_fb`]. | |
| 160 | #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] | |
| 161 | #[serde(default)] | |
| 162 | pub struct FBConfig<F : Float> { | |
| 163 | /// Step length scaling | |
| 164 | pub τ0 : F, | |
| 165 | /// Meta-algorithm to apply | |
| 166 | pub meta : FBMetaAlgorithm, | |
| 167 | /// Generic parameters | |
| 168 | pub insertion : FBGenericConfig<F>, | |
| 169 | } | |
| 170 | ||
| 171 | /// Settings for the solution of the stepwise optimality condition in algorithms based on | |
| 172 | /// [`generic_pointsource_fb`]. | |
| 173 | #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] | |
| 174 | #[serde(default)] | |
| 175 | pub struct FBGenericConfig<F : Float> { | |
| 176 | /// Method for constructing $μ$ on each iteration; see [`InsertionStyle`]. | |
| 177 | pub insertion_style : InsertionStyle, | |
| 178 | /// Tolerance for point insertion. | |
| 179 | pub tolerance : Tolerance<F>, | |
| 180 | /// Stop looking for predual maximum (where to isert a new point) below | |
| 181 | /// `tolerance` multiplied by this factor. | |
| 182 | pub insertion_cutoff_factor : F, | |
| 183 | /// Settings for branch and bound refinement when looking for predual maxima | |
| 184 | pub refinement : RefinementSettings<F>, | |
| 185 | /// Maximum insertions within each outer iteration | |
| 186 | pub max_insertions : usize, | |
| 187 | /// Pair `(n, m)` for maximum insertions `m` on first `n` iterations. | |
| 188 | pub bootstrap_insertions : Option<(usize, usize)>, | |
| 189 | /// Inner method settings | |
| 190 | pub inner : InnerSettings<F>, | |
| 191 | /// Spike merging method | |
| 192 | pub merging : SpikeMergingMethod<F>, | |
| 193 | /// Tolerance multiplier for merges | |
| 194 | pub merge_tolerance_mult : F, | |
| 195 | /// Spike merging method after the last step | |
| 196 | pub final_merging : SpikeMergingMethod<F>, | |
| 197 | /// Iterations between merging heuristic tries | |
| 198 | pub merge_every : usize, | |
| 199 | /// Save $μ$ for postprocessing optimisation | |
| 200 | pub postprocessing : bool | |
| 201 | } | |
| 202 | ||
| 203 | #[replace_float_literals(F::cast_from(literal))] | |
| 204 | impl<F : Float> Default for FBConfig<F> { | |
| 205 | fn default() -> Self { | |
| 206 | FBConfig { | |
| 207 | τ0 : 0.99, | |
| 208 | meta : FBMetaAlgorithm::None, | |
| 209 | insertion : Default::default() | |
| 210 | } | |
| 211 | } | |
| 212 | } | |
| 213 | ||
| 214 | #[replace_float_literals(F::cast_from(literal))] | |
| 215 | impl<F : Float> Default for FBGenericConfig<F> { | |
| 216 | fn default() -> Self { | |
| 217 | FBGenericConfig { | |
| 218 | insertion_style : InsertionStyle::Reuse, | |
| 219 | tolerance : Default::default(), | |
| 220 | insertion_cutoff_factor : 1.0, | |
| 221 | refinement : Default::default(), | |
| 222 | max_insertions : 100, | |
| 223 | //bootstrap_insertions : None, | |
| 224 | bootstrap_insertions : Some((10, 1)), | |
| 225 | inner : InnerSettings { | |
| 226 | method : InnerMethod::SSN, | |
| 227 | .. Default::default() | |
| 228 | }, | |
| 229 | merging : SpikeMergingMethod::None, | |
| 230 | //merging : Default::default(), | |
| 231 | final_merging : Default::default(), | |
| 232 | merge_every : 10, | |
| 233 | merge_tolerance_mult : 2.0, | |
| 234 | postprocessing : false, | |
| 235 | } | |
| 236 | } | |
| 237 | } | |
| 238 | ||
| 239 | /// Trait for specialisation of [`generic_pointsource_fb`] to basic FB, FISTA. | |
| 240 | /// | |
| 241 | /// The idea is that the residual $Aμ - b$ in the forward step can be replaced by an arbitrary | |
| 242 | /// value. For example, to implement [primal-dual proximal splitting][crate::pdps] we replace it | |
| 243 | /// with the dual variable $y$. We can then also implement alternative data terms, as the | |
| 244 | /// (pre)differential of $F(μ)=F\_0(Aμ-b)$ is $F\'(μ) = A\_*F\_0\'(Aμ-b)$. In the case of the | |
| 245 | /// quadratic fidelity $F_0(y)=\frac{1}{2}\\|y\\|_2^2$ in a Hilbert space, of course, | |
| 246 | /// $F\_0\'(Aμ-b)=Aμ-b$ is the residual. | |
| 247 | pub trait FBSpecialisation<F : Float, Observable : Euclidean<F>, const N : usize> : Sized { | |
| 248 | /// Updates the residual and does any necessary pruning of `μ`. | |
| 249 | /// | |
| 250 | /// Returns the new residual and possibly a new step length. | |
| 251 | /// | |
| 252 | /// The measure `μ` may also be modified to apply, e.g., inertia to it. | |
| 253 | /// The updated residual should correspond to the residual at `μ`. | |
| 254 | /// See the [trait documentation][FBSpecialisation] for the use and meaning of the residual. | |
| 255 | /// | |
| 256 | /// The parameter `μ_base` is the base point of the iteration, typically the previous iterate, | |
| 257 | /// but for, e.g., FISTA has inertia applied to it. | |
| 258 | fn update( | |
| 259 | &mut self, | |
| 260 | μ : &mut DiscreteMeasure<Loc<F, N>, F>, | |
| 261 | μ_base : &DiscreteMeasure<Loc<F, N>, F>, | |
| 262 | ) -> (Observable, Option<F>); | |
| 263 | ||
| 264 | /// Calculates the data term value corresponding to iterate `μ` and available residual. | |
| 265 | /// | |
| 266 | /// Inertia and other modifications, as deemed, necessary, should be applied to `μ`. | |
| 267 | /// | |
| 268 | /// The blanket implementation correspondsn to the 2-norm-squared data fidelity | |
| 269 | /// $\\|\text{residual}\\|\_2^2/2$. | |
| 270 | fn calculate_fit( | |
| 271 | &self, | |
| 272 | _μ : &DiscreteMeasure<Loc<F, N>, F>, | |
| 273 | residual : &Observable | |
| 274 | ) -> F { | |
| 275 | residual.norm2_squared_div2() | |
| 276 | } | |
| 277 | ||
| 278 | /// Calculates the data term value at $μ$. | |
| 279 | /// | |
| 280 | /// Unlike [`Self::calculate_fit`], no inertia, etc., should be applied to `μ`. | |
| 281 | fn calculate_fit_simple( | |
| 282 | &self, | |
| 283 | μ : &DiscreteMeasure<Loc<F, N>, F>, | |
| 284 | ) -> F; | |
| 285 | ||
| 286 | /// Returns the final iterate after any necessary postprocess pruning, merging, etc. | |
| 287 | fn postprocess(self, mut μ : DiscreteMeasure<Loc<F, N>, F>, merging : SpikeMergingMethod<F>) | |
| 288 | -> DiscreteMeasure<Loc<F, N>, F> | |
| 289 | where DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F> { | |
| 290 | μ.merge_spikes_fitness(merging, | |
| 291 | |μ̃| self.calculate_fit_simple(μ̃), | |
| 292 | |&v| v); | |
| 293 | μ.prune(); | |
| 294 | μ | |
| 295 | } | |
| 296 | ||
| 297 | /// Returns measure to be used for value calculations, which may differ from μ. | |
| 298 | fn value_μ<'c, 'b : 'c>(&'b self, μ : &'c DiscreteMeasure<Loc<F, N>, F>) | |
| 299 | -> &'c DiscreteMeasure<Loc<F, N>, F> { | |
| 300 | μ | |
| 301 | } | |
| 302 | } | |
| 303 | ||
| 304 | /// Specialisation of [`generic_pointsource_fb`] to basic μFB. | |
| 305 | struct BasicFB< | |
| 306 | 'a, | |
| 307 | F : Float + ToNalgebraRealField, | |
| 308 | A : ForwardModel<Loc<F, N>, F>, | |
| 309 | const N : usize | |
| 310 | > { | |
| 311 | /// The data | |
| 312 | b : &'a A::Observable, | |
| 313 | /// The forward operator | |
| 314 | opA : &'a A, | |
| 315 | } | |
| 316 | ||
| 317 | /// Implementation of [`FBSpecialisation`] for basic μFB forward-backward splitting. | |
| 318 | #[replace_float_literals(F::cast_from(literal))] | |
| 319 | impl<'a, F : Float + ToNalgebraRealField , A : ForwardModel<Loc<F, N>, F>, const N : usize> | |
| 320 | FBSpecialisation<F, A::Observable, N> for BasicFB<'a, F, A, N> { | |
| 321 | fn update( | |
| 322 | &mut self, | |
| 323 | μ : &mut DiscreteMeasure<Loc<F, N>, F>, | |
| 324 | _μ_base : &DiscreteMeasure<Loc<F, N>, F> | |
| 325 | ) -> (A::Observable, Option<F>) { | |
| 326 | μ.prune(); | |
| 327 | //*residual = self.opA.apply(μ) - self.b; | |
| 328 | let mut residual = self.b.clone(); | |
| 329 | self.opA.gemv(&mut residual, 1.0, μ, -1.0); | |
| 330 | (residual, None) | |
| 331 | } | |
| 332 | ||
| 333 | fn calculate_fit_simple( | |
| 334 | &self, | |
| 335 | μ : &DiscreteMeasure<Loc<F, N>, F>, | |
| 336 | ) -> F { | |
| 337 | let mut residual = self.b.clone(); | |
| 338 | self.opA.gemv(&mut residual, 1.0, μ, -1.0); | |
| 339 | residual.norm2_squared_div2() | |
| 340 | } | |
| 341 | } | |
| 342 | ||
| 343 | /// Specialisation of [`generic_pointsource_fb`] to FISTA. | |
| 344 | struct FISTA< | |
| 345 | 'a, | |
| 346 | F : Float + ToNalgebraRealField, | |
| 347 | A : ForwardModel<Loc<F, N>, F>, | |
| 348 | const N : usize | |
| 349 | > { | |
| 350 | /// The data | |
| 351 | b : &'a A::Observable, | |
| 352 | /// The forward operator | |
| 353 | opA : &'a A, | |
| 354 | /// Current inertial parameter | |
| 355 | λ : F, | |
| 356 | /// Previous iterate without inertia applied. | |
| 357 | /// We need to store this here because `μ_base` passed to [`FBSpecialisation::update`] will | |
| 358 | /// have inertia applied to it, so is not useful to use. | |
| 359 | μ_prev : DiscreteMeasure<Loc<F, N>, F>, | |
| 360 | } | |
| 361 | ||
| 362 | /// Implementation of [`FBSpecialisation`] for μFISTA inertial forward-backward splitting. | |
| 363 | #[replace_float_literals(F::cast_from(literal))] | |
| 364 | impl<'a, F : Float + ToNalgebraRealField, A : ForwardModel<Loc<F, N>, F>, const N : usize> | |
| 365 | FBSpecialisation<F, A::Observable, N> for FISTA<'a, F, A, N> { | |
| 366 | fn update( | |
| 367 | &mut self, | |
| 368 | μ : &mut DiscreteMeasure<Loc<F, N>, F>, | |
| 369 | _μ_base : &DiscreteMeasure<Loc<F, N>, F> | |
| 370 | ) -> (A::Observable, Option<F>) { | |
| 371 | // Update inertial parameters | |
| 372 | let λ_prev = self.λ; | |
| 373 | self.λ = 2.0 * λ_prev / ( λ_prev + (4.0 + λ_prev * λ_prev).sqrt() ); | |
| 374 | let θ = self.λ / λ_prev - self.λ; | |
| 375 | // Perform inertial update on μ. | |
| 376 | // This computes μ ← (1 + θ) * μ - θ * μ_prev, pruning spikes where both μ | |
| 377 | // and μ_prev have zero weight. Since both have weights from the finite-dimensional | |
| 378 | // subproblem with a proximal projection step, this is likely to happen when the | |
| 379 | // spike is not needed. A copy of the pruned μ without artithmetic performed is | |
| 380 | // stored in μ_prev. | |
| 381 | μ.pruning_sub(1.0 + θ, θ, &mut self.μ_prev); | |
| 382 | ||
| 383 | //*residual = self.opA.apply(μ) - self.b; | |
| 384 | let mut residual = self.b.clone(); | |
| 385 | self.opA.gemv(&mut residual, 1.0, μ, -1.0); | |
| 386 | (residual, None) | |
| 387 | } | |
| 388 | ||
| 389 | fn calculate_fit_simple( | |
| 390 | &self, | |
| 391 | μ : &DiscreteMeasure<Loc<F, N>, F>, | |
| 392 | ) -> F { | |
| 393 | let mut residual = self.b.clone(); | |
| 394 | self.opA.gemv(&mut residual, 1.0, μ, -1.0); | |
| 395 | residual.norm2_squared_div2() | |
| 396 | } | |
| 397 | ||
| 398 | fn calculate_fit( | |
| 399 | &self, | |
| 400 | _μ : &DiscreteMeasure<Loc<F, N>, F>, | |
| 401 | _residual : &A::Observable | |
| 402 | ) -> F { | |
| 403 | self.calculate_fit_simple(&self.μ_prev) | |
| 404 | } | |
| 405 | ||
| 406 | // For FISTA we need to do a final pruning as well, due to the limited | |
| 407 | // pruning that can be done on each step. | |
| 408 | fn postprocess(mut self, μ_base : DiscreteMeasure<Loc<F, N>, F>, merging : SpikeMergingMethod<F>) | |
| 409 | -> DiscreteMeasure<Loc<F, N>, F> | |
| 410 | where DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F> { | |
| 411 | let mut μ = self.μ_prev; | |
| 412 | self.μ_prev = μ_base; | |
| 413 | μ.merge_spikes_fitness(merging, | |
| 414 | |μ̃| self.calculate_fit_simple(μ̃), | |
| 415 | |&v| v); | |
| 416 | μ.prune(); | |
| 417 | μ | |
| 418 | } | |
| 419 | ||
| 420 | fn value_μ<'c, 'b : 'c>(&'c self, _μ : &'c DiscreteMeasure<Loc<F, N>, F>) | |
| 421 | -> &'c DiscreteMeasure<Loc<F, N>, F> { | |
| 422 | &self.μ_prev | |
| 423 | } | |
| 424 | } | |
| 425 | ||
| 426 | /// Iteratively solve the pointsource localisation problem using forward-backward splitting | |
| 427 | /// | |
| 428 | /// The settings in `config` have their [respective documentation](FBConfig). `opA` is the | |
| 429 | /// forward operator $A$, $b$ the observable, and $\lambda$ the regularisation weight. | |
| 430 | /// The operator `op𝒟` is used for forming the proximal term. Typically it is a convolution | |
| 431 | /// operator. Finally, the `iterator` is an outer loop verbosity and iteration count control | |
| 432 | /// as documented in [`alg_tools::iterate`]. | |
| 433 | /// | |
| 434 | /// For details on the mathematical formulation, see the [module level](self) documentation. | |
| 435 | /// | |
| 436 | /// Returns the final iterate. | |
| 437 | #[replace_float_literals(F::cast_from(literal))] | |
| 438 | pub fn pointsource_fb<'a, F, I, A, GA, 𝒟, BTA, G𝒟, S, K, const N : usize>( | |
| 439 | opA : &'a A, | |
| 440 | b : &A::Observable, | |
| 441 | α : F, | |
| 442 | op𝒟 : &'a 𝒟, | |
| 443 | config : &FBConfig<F>, | |
| 444 | iterator : I, | |
| 445 | plotter : SeqPlotter<F, N> | |
| 446 | ) -> DiscreteMeasure<Loc<F, N>, F> | |
| 447 | where F : Float + ToNalgebraRealField, | |
| 448 | I : AlgIteratorFactory<IterInfo<F, N>>, | |
| 449 | for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable>, | |
| 450 | //+ std::ops::Mul<F, Output=A::Observable>, <-- FIXME: compiler overflow | |
| 451 | A::Observable : std::ops::MulAssign<F>, | |
| 452 | GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone, | |
| 453 | A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>> | |
| 454 | + Lipschitz<𝒟, FloatType=F>, | |
| 455 | BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>, | |
| 456 | G𝒟 : SupportGenerator<F, N, SupportType = K, Id = usize> + Clone, | |
| 457 | 𝒟 : DiscreteMeasureOp<Loc<F, N>, F, PreCodomain = PreBTFN<F, G𝒟, N>>, | |
| 458 | 𝒟::Codomain : RealMapping<F, N>, | |
| 459 | S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, | |
| 460 | K: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, | |
| 461 | BTNodeLookup: BTNode<F, usize, Bounds<F>, N>, | |
| 462 | Cube<F, N>: P2Minimise<Loc<F, N>, F>, | |
| 463 | PlotLookup : Plotting<N>, | |
| 464 | DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F> { | |
| 465 | ||
| 466 | let initial_residual = -b; | |
| 467 | let τ = config.τ0/opA.lipschitz_factor(&op𝒟).unwrap(); | |
| 468 | ||
| 469 | match config.meta { | |
| 470 | FBMetaAlgorithm::None => generic_pointsource_fb( | |
| 471 | opA, α, op𝒟, τ, &config.insertion, iterator, plotter, initial_residual, | |
| 472 | BasicFB{ b, opA } | |
| 473 | ), | |
| 474 | FBMetaAlgorithm::InertiaFISTA => generic_pointsource_fb( | |
| 475 | opA, α, op𝒟, τ, &config.insertion, iterator, plotter, initial_residual, | |
| 476 | FISTA{ b, opA, λ : 1.0, μ_prev : DiscreteMeasure::new() } | |
| 477 | ), | |
| 478 | } | |
| 479 | } | |
| 480 | ||
| 481 | /// Generic implementation of [`pointsource_fb`]. | |
| 482 | /// | |
| 483 | /// The method can be specialised to even primal-dual proximal splitting through the | |
| 484 | /// [`FBSpecialisation`] parameter `specialisation`. | |
| 485 | /// The settings in `config` have their [respective documentation](FBGenericConfig). `opA` is the | |
| 486 | /// forward operator $A$, $b$ the observable, and $\lambda$ the regularisation weight. | |
| 487 | /// The operator `op𝒟` is used for forming the proximal term. Typically it is a convolution | |
| 488 | /// operator. Finally, the `iterator` is an outer loop verbosity and iteration count control | |
| 489 | /// as documented in [`alg_tools::iterate`]. | |
| 490 | /// | |
| 491 | /// The implementation relies on [`alg_tools::bisection_tree::BTFN`] presentations of | |
| 492 | /// sums of simple functions usign bisection trees, and the related | |
| 493 | /// [`alg_tools::bisection_tree::Aggregator`]s, to efficiently search for component functions | |
| 494 | /// active at a specific points, and to maximise their sums. Through the implementation of the | |
| 495 | /// [`alg_tools::bisection_tree::BT`] bisection trees, it also relies on the copy-on-write features | |
| 496 | /// of [`std::sync::Arc`] to only update relevant parts of the bisection tree when adding functions. | |
| 497 | /// | |
| 498 | /// Returns the final iterate. | |
| 499 | #[replace_float_literals(F::cast_from(literal))] | |
| 500 | pub fn generic_pointsource_fb<'a, F, I, A, GA, 𝒟, BTA, G𝒟, S, K, Spec, const N : usize>( | |
| 501 | opA : &'a A, | |
| 502 | α : F, | |
| 503 | op𝒟 : &'a 𝒟, | |
| 504 | mut τ : F, | |
| 505 | config : &FBGenericConfig<F>, | |
| 506 | iterator : I, | |
| 507 | mut plotter : SeqPlotter<F, N>, | |
| 508 | mut residual : A::Observable, | |
| 509 | mut specialisation : Spec, | |
| 510 | ) -> DiscreteMeasure<Loc<F, N>, F> | |
| 511 | where F : Float + ToNalgebraRealField, | |
| 512 | I : AlgIteratorFactory<IterInfo<F, N>>, | |
| 513 | Spec : FBSpecialisation<F, A::Observable, N>, | |
| 514 | A::Observable : std::ops::MulAssign<F>, | |
| 515 | GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone, | |
| 516 | A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>> | |
| 517 | + Lipschitz<𝒟, FloatType=F>, | |
| 518 | BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>, | |
| 519 | G𝒟 : SupportGenerator<F, N, SupportType = K, Id = usize> + Clone, | |
| 520 | 𝒟 : DiscreteMeasureOp<Loc<F, N>, F, PreCodomain = PreBTFN<F, G𝒟, N>>, | |
| 521 | 𝒟::Codomain : RealMapping<F, N>, | |
| 522 | S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, | |
| 523 | K: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, | |
| 524 | BTNodeLookup: BTNode<F, usize, Bounds<F>, N>, | |
| 525 | Cube<F, N>: P2Minimise<Loc<F, N>, F>, | |
| 526 | PlotLookup : Plotting<N>, | |
| 527 | DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F> { | |
| 528 | ||
| 529 | // Set up parameters | |
| 530 | let quiet = iterator.is_quiet(); | |
| 531 | let op𝒟norm = op𝒟.opnorm_bound(); | |
| 532 | // We multiply tolerance by τ for FB since | |
| 533 | // our subproblems depending on tolerances are scaled by τ compared to the conditional | |
| 534 | // gradient approach. | |
|
8
ea3ca78873e8
Clean up / remove various unused FB algorithm family hacks.
Tuomo Valkonen <tuomov@iki.fi>
parents:
7
diff
changeset
|
535 | let tolerance = config.tolerance * τ * α; |
| 0 | 536 | let mut ε = tolerance.initial(); |
| 537 | ||
| 538 | // Initialise operators | |
| 539 | let preadjA = opA.preadjoint(); | |
| 540 | ||
| 541 | // Initialise iterates | |
| 542 | let mut μ = DiscreteMeasure::new(); | |
| 543 | ||
| 544 | let mut inner_iters = 0; | |
| 545 | let mut this_iters = 0; | |
| 546 | let mut pruned = 0; | |
| 547 | let mut merged = 0; | |
| 548 | ||
| 549 | let μ_diff = |μ_new : &DiscreteMeasure<Loc<F, N>, F>, | |
| 550 | μ_base : &DiscreteMeasure<Loc<F, N>, F>| { | |
| 551 | let mut ν : DiscreteMeasure<Loc<F, N>, F> = match config.insertion_style { | |
| 552 | InsertionStyle::Reuse => { | |
| 553 | μ_new.iter_spikes() | |
| 554 | .zip(μ_base.iter_masses().chain(std::iter::repeat(0.0))) | |
| 555 | .map(|(δ, α_base)| (δ.x, α_base - δ.α)) | |
| 556 | .collect() | |
| 557 | }, | |
| 558 | InsertionStyle::Zero => { | |
| 559 | μ_new.iter_spikes() | |
| 560 | .map(|δ| -δ) | |
| 561 | .chain(μ_base.iter_spikes().copied()) | |
| 562 | .collect() | |
| 563 | } | |
| 564 | }; | |
| 565 | ν.prune(); // Potential small performance improvement | |
| 566 | ν | |
| 567 | }; | |
| 568 | ||
| 569 | // Run the algorithm | |
| 570 | iterator.iterate(|state| { | |
| 571 | // Calculate subproblem tolerances, and update main tolerance for next iteration | |
| 572 | let τα = τ * α; | |
| 573 | let target_bounds = Bounds(τα - ε, τα + ε); | |
| 574 | let merge_tolerance = config.merge_tolerance_mult * ε; | |
| 575 | let merge_target_bounds = Bounds(τα - merge_tolerance, τα + merge_tolerance); | |
| 576 | let inner_tolerance = ε * config.inner.tolerance_mult; | |
| 577 | let refinement_tolerance = ε * config.refinement.tolerance_mult; | |
| 578 | let maximise_above = τα + ε * config.insertion_cutoff_factor; | |
| 579 | let ε_prev = ε; | |
| 580 | ε = tolerance.update(ε, state.iteration()); | |
| 581 | ||
| 582 | // Maximum insertion count and measure difference calculation depend on insertion style. | |
| 583 | let (m, warn_insertions) = match (state.iteration(), config.bootstrap_insertions) { | |
| 584 | (i, Some((l, k))) if i <= l => (k, false), | |
| 585 | _ => (config.max_insertions, !quiet), | |
| 586 | }; | |
| 587 | let max_insertions = match config.insertion_style { | |
| 588 | InsertionStyle::Zero => { | |
| 589 | todo!("InsertionStyle::Zero does not currently work with FISTA, so diabled."); | |
| 590 | // let n = μ.len(); | |
| 591 | // μ = DiscreteMeasure::new(); | |
| 592 | // n + m | |
| 593 | }, | |
| 594 | InsertionStyle::Reuse => m, | |
| 595 | }; | |
| 596 | ||
| 597 | // Calculate smooth part of surrogate model. | |
| 598 | // Using `std::mem::replace` here is not ideal, and expects that `empty_observable` | |
| 599 | // has no significant overhead. For some reosn Rust doesn't allow us simply moving | |
| 600 | // the residual and replacing it below before the end of this closure. | |
|
7
c32171f7cce5
Remove ergodic tolerance; it's not useful.
Tuomo Valkonen <tuomov@iki.fi>
parents:
0
diff
changeset
|
601 | residual *= -τ; |
| 0 | 602 | let r = std::mem::replace(&mut residual, opA.empty_observable()); |
| 603 | let minus_τv = preadjA.apply(r); // minus_τv = -τA^*(Aμ^k-b) | |
| 604 | // TODO: should avoid a second copy of μ here; μ_base already stores a copy. | |
| 605 | let ω0 = op𝒟.apply(μ.clone()); // 𝒟μ^k | |
| 606 | //let g = &minus_τv + ω0; // Linear term of surrogate model | |
| 607 | ||
| 608 | // Save current base point | |
| 609 | let μ_base = μ.clone(); | |
| 610 | ||
| 611 | // Add points to support until within error tolerance or maximum insertion count reached. | |
| 612 | let mut count = 0; | |
| 613 | let (within_tolerances, d) = 'insertion: loop { | |
| 614 | if μ.len() > 0 { | |
| 615 | // Form finite-dimensional subproblem. The subproblem references to the original μ^k | |
| 616 | // from the beginning of the iteration are all contained in the immutable c and g. | |
| 617 | let à = op𝒟.findim_matrix(μ.iter_locations()); | |
| 618 | let g̃ = DVector::from_iterator(μ.len(), | |
| 619 | μ.iter_locations() | |
| 620 | .map(|ζ| minus_τv.apply(ζ) + ω0.apply(ζ)) | |
| 621 | .map(F::to_nalgebra_mixed)); | |
| 622 | let mut x = μ.masses_dvector(); | |
| 623 | ||
| 624 | // The gradient of the forward component of the inner objective is C^*𝒟Cx - g̃. | |
| 625 | // We have |C^*𝒟Cx|_2 = sup_{|z|_2 ≤ 1} ⟨z, C^*𝒟Cx⟩ = sup_{|z|_2 ≤ 1} ⟨Cz|𝒟Cx⟩ | |
| 626 | // ≤ sup_{|z|_2 ≤ 1} |Cz|_ℳ |𝒟Cx|_∞ ≤ sup_{|z|_2 ≤ 1} |Cz|_ℳ |𝒟| |Cx|_ℳ | |
| 627 | // ≤ sup_{|z|_2 ≤ 1} |z|_1 |𝒟| |x|_1 ≤ sup_{|z|_2 ≤ 1} n |z|_2 |𝒟| |x|_2 | |
| 628 | // = n |𝒟| |x|_2, where n is the number of points. Therefore | |
| 629 | let inner_τ = config.inner.τ0 / (op𝒟norm * F::cast_from(μ.len())); | |
| 630 | ||
| 631 | // Solve finite-dimensional subproblem. | |
| 632 | let inner_it = config.inner.iterator_options.stop_target(inner_tolerance); | |
| 633 | inner_iters += quadratic_nonneg(config.inner.method, &Ã, &g̃, τ*α, &mut x, | |
| 634 | inner_τ, inner_it); | |
| 635 | ||
| 636 | // Update masses of μ based on solution of finite-dimensional subproblem. | |
| 637 | μ.set_masses_dvector(&x); | |
| 638 | } | |
| 639 | ||
| 640 | // Form d = ω0 - τv - 𝒟μ = -𝒟(μ - μ^k) - τv for checking the proximate optimality | |
| 641 | // conditions in the predual space, and finding new points for insertion, if necessary. | |
| 642 | let mut d = &minus_τv + op𝒟.preapply(μ_diff(&μ, &μ_base)); | |
| 643 | ||
| 644 | // If no merging heuristic is used, let's be more conservative about spike insertion, | |
| 645 | // and skip it after first round. If merging is done, being more greedy about spike | |
| 646 | // insertion also seems to improve performance. | |
| 647 | let may_break = if let SpikeMergingMethod::None = config.merging { | |
| 648 | false | |
| 649 | } else { | |
| 650 | count > 0 | |
| 651 | }; | |
| 652 | ||
| 653 | // If preliminary check indicates that we are in bonds, and if it otherwise matches | |
| 654 | // the insertion strategy, skip insertion. | |
|
7
c32171f7cce5
Remove ergodic tolerance; it's not useful.
Tuomo Valkonen <tuomov@iki.fi>
parents:
0
diff
changeset
|
655 | if may_break && target_bounds.superset(&d.bounds()) { |
| 0 | 656 | break 'insertion (true, d) |
| 657 | } | |
| 658 | ||
| 659 | // If the rough check didn't indicate stopping, find maximising point, maintaining for | |
| 660 | // the calculations in the beginning of the loop that v_ξ = (ω0-τv-𝒟μ)(ξ) = d(ξ), | |
| 661 | // where 𝒟μ is now distinct from μ0 after the insertions already performed. | |
| 662 | // We do not need to check lower bounds, as a solution of the finite-dimensional | |
| 663 | // subproblem should always satisfy them. | |
| 664 | ||
|
8
ea3ca78873e8
Clean up / remove various unused FB algorithm family hacks.
Tuomo Valkonen <tuomov@iki.fi>
parents:
7
diff
changeset
|
665 | // If μ has some spikes, only find a maximum of d if it is above a threshold |
|
ea3ca78873e8
Clean up / remove various unused FB algorithm family hacks.
Tuomo Valkonen <tuomov@iki.fi>
parents:
7
diff
changeset
|
666 | // defined by the refinment tolerance. |
|
ea3ca78873e8
Clean up / remove various unused FB algorithm family hacks.
Tuomo Valkonen <tuomov@iki.fi>
parents:
7
diff
changeset
|
667 | let (ξ, v_ξ) = match d.maximise_above(maximise_above, refinement_tolerance, |
|
ea3ca78873e8
Clean up / remove various unused FB algorithm family hacks.
Tuomo Valkonen <tuomov@iki.fi>
parents:
7
diff
changeset
|
668 | config.refinement.max_steps) { |
|
ea3ca78873e8
Clean up / remove various unused FB algorithm family hacks.
Tuomo Valkonen <tuomov@iki.fi>
parents:
7
diff
changeset
|
669 | None => break 'insertion (true, d), |
|
ea3ca78873e8
Clean up / remove various unused FB algorithm family hacks.
Tuomo Valkonen <tuomov@iki.fi>
parents:
7
diff
changeset
|
670 | Some(res) => res, |
| 0 | 671 | }; |
| 672 | ||
| 673 | // Break if maximum insertion count reached | |
| 674 | if count >= max_insertions { | |
| 675 | let in_bounds2 = target_bounds.upper() >= v_ξ; | |
| 676 | break 'insertion (in_bounds2, d) | |
| 677 | } | |
| 678 | ||
| 679 | // No point in optimising the weight here; the finite-dimensional algorithm is fast. | |
| 680 | μ += DeltaMeasure { x : ξ, α : 0.0 }; | |
| 681 | count += 1; | |
| 682 | }; | |
| 683 | ||
| 684 | if !within_tolerances && warn_insertions { | |
| 685 | // Complain (but continue) if we failed to get within tolerances | |
| 686 | // by inserting more points. | |
| 687 | let err = format!("Maximum insertions reached without achieving \ | |
| 688 | subproblem solution tolerance"); | |
| 689 | println!("{}", err.red()); | |
| 690 | } | |
| 691 | ||
| 692 | // Merge spikes | |
| 693 | if state.iteration() % config.merge_every == 0 { | |
| 694 | let n_before_merge = μ.len(); | |
| 695 | μ.merge_spikes(config.merging, |μ_candidate| { | |
| 696 | let mut d = &minus_τv + op𝒟.preapply(μ_diff(&μ_candidate, &μ_base)); | |
| 697 | ||
| 698 | if merge_target_bounds.superset(&d.bounds()) { | |
| 699 | return Some(()) | |
| 700 | } | |
| 701 | ||
| 702 | let d_min_supp = μ_candidate.iter_spikes().filter_map(|&DeltaMeasure{ α, ref x }| { | |
| 703 | (α != 0.0).then(|| d.apply(x)) | |
| 704 | }).reduce(F::min); | |
| 705 | ||
| 706 | if d_min_supp.map_or(true, |b| b >= merge_target_bounds.lower()) && | |
| 707 | d.has_upper_bound(merge_target_bounds.upper(), refinement_tolerance, | |
| 708 | config.refinement.max_steps) { | |
| 709 | Some(()) | |
| 710 | } else { | |
| 711 | None | |
| 712 | } | |
| 713 | }); | |
| 714 | debug_assert!(μ.len() >= n_before_merge); | |
| 715 | merged += μ.len() - n_before_merge; | |
| 716 | } | |
| 717 | ||
| 718 | let n_before_prune = μ.len(); | |
| 719 | (residual, τ) = match specialisation.update(&mut μ, &μ_base) { | |
| 720 | (r, None) => (r, τ), | |
| 721 | (r, Some(new_τ)) => (r, new_τ) | |
| 722 | }; | |
| 723 | debug_assert!(μ.len() <= n_before_prune); | |
| 724 | pruned += n_before_prune - μ.len(); | |
| 725 | ||
| 726 | this_iters += 1; | |
| 727 | ||
| 728 | // Give function value if needed | |
| 729 | state.if_verbose(|| { | |
| 730 | let value_μ = specialisation.value_μ(&μ); | |
| 731 | // Plot if so requested | |
| 732 | plotter.plot_spikes( | |
| 733 | format!("iter {} end; {}", state.iteration(), within_tolerances), &d, | |
| 734 | "start".to_string(), Some(&minus_τv), | |
| 735 | Some(target_bounds), value_μ, | |
| 736 | ); | |
|
8
ea3ca78873e8
Clean up / remove various unused FB algorithm family hacks.
Tuomo Valkonen <tuomov@iki.fi>
parents:
7
diff
changeset
|
737 | // Calculate mean inner iterations and reset relevant counters. |
| 0 | 738 | // Return the statistics |
| 739 | let res = IterInfo { | |
| 740 | value : specialisation.calculate_fit(&μ, &residual) + α * value_μ.norm(Radon), | |
| 741 | n_spikes : value_μ.len(), | |
| 742 | inner_iters, | |
| 743 | this_iters, | |
| 744 | merged, | |
| 745 | pruned, | |
| 746 | ε : ε_prev, | |
| 747 | postprocessing: config.postprocessing.then(|| value_μ.clone()), | |
| 748 | }; | |
| 749 | inner_iters = 0; | |
| 750 | this_iters = 0; | |
| 751 | merged = 0; | |
| 752 | pruned = 0; | |
| 753 | res | |
| 754 | }) | |
| 755 | }); | |
| 756 | ||
| 757 | specialisation.postprocess(μ, config.final_merging) | |
| 758 | } | |
| 759 | ||
| 760 | ||
| 761 | ||
| 762 |