Thu, 01 Dec 2022 23:07:35 +0200
Initial version
| 0 | 1 | //! Iterative algorithms for solving finite-dimensional subproblems. |
| 2 | ||
| 3 | use serde::{Serialize, Deserialize}; | |
| 4 | use nalgebra::{DVector, DMatrix}; | |
| 5 | use numeric_literals::replace_float_literals; | |
| 6 | use itertools::{izip, Itertools}; | |
| 7 | use colored::Colorize; | |
| 8 | ||
| 9 | use alg_tools::iter::Mappable; | |
| 10 | use alg_tools::error::NumericalError; | |
| 11 | use alg_tools::iterate::{ | |
| 12 | AlgIteratorFactory, | |
| 13 | AlgIteratorState, | |
| 14 | AlgIteratorOptions, | |
| 15 | Verbose, | |
| 16 | Step, | |
| 17 | }; | |
| 18 | use alg_tools::linops::GEMV; | |
| 19 | use alg_tools::nalgebra_support::ToNalgebraRealField; | |
| 20 | ||
| 21 | use crate::types::*; | |
| 22 | ||
| 23 | /// Method for solving finite-dimensional subproblems | |
| 24 | #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] | |
| 25 | #[allow(dead_code)] | |
| 26 | pub enum InnerMethod { | |
| 27 | /// Forward-backward | |
| 28 | FB, | |
| 29 | /// Semismooth Newton | |
| 30 | SSN, | |
| 31 | } | |
| 32 | ||
| 33 | /// Settings for the solution of finite-dimensional subproblems | |
| 34 | #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] | |
| 35 | pub struct InnerSettings<F : Float> { | |
| 36 | /// Method | |
| 37 | pub method : InnerMethod, | |
| 38 | /// Proportional step length (∈ [0, 1) for `InnerMethod::FB`). | |
| 39 | pub τ0 : F, | |
| 40 | /// Fraction of `tolerance` given to inner algorithm | |
| 41 | pub tolerance_mult : F, | |
| 42 | /// Iterator options | |
| 43 | #[serde(flatten)] | |
| 44 | pub iterator_options : AlgIteratorOptions, | |
| 45 | } | |
| 46 | ||
| 47 | #[replace_float_literals(F::cast_from(literal))] | |
| 48 | impl<F : Float> Default for InnerSettings<F> { | |
| 49 | fn default() -> Self { | |
| 50 | InnerSettings { | |
| 51 | τ0 : 0.99, | |
| 52 | iterator_options : AlgIteratorOptions { | |
| 53 | // max_iter cannot be very small, as initially FB needs many iterations, although | |
| 54 | // on later invocations even one or two tends to be enough | |
| 55 | max_iter : 2000, | |
| 56 | // verbose_iter affects testing of sufficient convergence, so we set it to | |
| 57 | // a small value… | |
| 58 | verbose_iter : Verbose::Every(1), | |
| 59 | // … but don't print out anything | |
| 60 | quiet : true, | |
| 61 | .. Default::default() | |
| 62 | }, | |
| 63 | method : InnerMethod::FB, | |
| 64 | tolerance_mult : 0.01, | |
| 65 | } | |
| 66 | } | |
| 67 | } | |
| 68 | ||
| 69 | /// Compute the proximal operator of $x \mapsto x + \delta\_{[0, \infty)}$, i.e., | |
| 70 | /// the non-negativity contrained soft-thresholding operator. | |
| 71 | #[inline] | |
| 72 | #[replace_float_literals(F::cast_from(literal))] | |
| 73 | fn nonneg_soft_thresholding<F : Float>(v : F, λ : F) -> F { | |
| 74 | (v - λ).max(0.0) | |
| 75 | } | |
| 76 | ||
| 77 | /// Forward-backward splitting implementation of [`quadratic_nonneg`]. | |
| 78 | /// For detailed documentation of the inputs and outputs, refer to there. | |
| 79 | /// | |
| 80 | /// The `λ` component of the model is handled in the proximal step instead of the gradient step | |
| 81 | /// for potential performance improvements. | |
| 82 | #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] | |
| 83 | pub fn quadratic_nonneg_fb<F, I>( | |
| 84 | mA : &DMatrix<F::MixedType>, | |
| 85 | g : &DVector<F::MixedType>, | |
| 86 | //c_ : F, | |
| 87 | λ_ : F, | |
| 88 | x : &mut DVector<F::MixedType>, | |
| 89 | τ_ : F, | |
| 90 | iterator : I | |
| 91 | ) -> usize | |
| 92 | where F : Float + ToNalgebraRealField, | |
| 93 | I : AlgIteratorFactory<F> | |
| 94 | { | |
| 95 | let mut xprev = x.clone(); | |
| 96 | //let c = c_.to_nalgebra_mixed(); | |
| 97 | let λ = λ_.to_nalgebra_mixed(); | |
| 98 | let τ = τ_.to_nalgebra_mixed(); | |
| 99 | let τλ = τ * λ; | |
| 100 | let mut v = DVector::zeros(x.len()); | |
| 101 | let mut iters = 0; | |
| 102 | ||
| 103 | iterator.iterate(|state| { | |
| 104 | // Replace `x` with $x - τ[Ax-g]= [x + τg]- τAx$ | |
| 105 | v.copy_from(g); // v = g | |
| 106 | v.axpy(1.0, x, τ); // v = x + τ*g | |
| 107 | v.sygemv(-τ, mA, x, 1.0); // v = [x + τg]- τAx | |
| 108 | let backup = state.if_verbose(|| { | |
| 109 | xprev.copy_from(x) | |
| 110 | }); | |
| 111 | // Calculate the proximal map | |
| 112 | x.iter_mut().zip(v.iter()).for_each(|(x_i, &v_i)| { | |
| 113 | *x_i = nonneg_soft_thresholding(v_i, τλ); | |
| 114 | }); | |
| 115 | ||
| 116 | iters +=1; | |
| 117 | ||
| 118 | backup.map(|_| { | |
| 119 | // The subdifferential of the objective is $Ax - g + λ + ∂ δ_{≥ 0}(x)$. | |
| 120 | // We return the minimal ∞-norm over all subderivatives. | |
| 121 | v.copy_from(g); // d = g | |
| 122 | mA.gemv(&mut v, 1.0, x, -1.0); // d = Ax - g | |
| 123 | let mut val = 0.0; | |
| 124 | for (&v_i, &x_i) in izip!(v.iter(), x.iter()) { | |
| 125 | let d = v_i + λ; | |
| 126 | if x_i > 0.0 || d < 0.0 { | |
| 127 | val = val.max(d.abs()); | |
| 128 | } | |
| 129 | } | |
| 130 | F::from_nalgebra_mixed(val) | |
| 131 | }) | |
| 132 | }); | |
| 133 | ||
| 134 | iters | |
| 135 | } | |
| 136 | ||
| 137 | /// Semismooth Newton implementation of [`quadratic_nonneg`]. | |
| 138 | /// | |
| 139 | /// For detailed documentation of the inputs, refer to there. | |
| 140 | /// This function returns the number of iterations taken if there was no inversion failure, | |
| 141 | /// | |
| 142 | /// ## Method derivation | |
| 143 | /// | |
| 144 | /// **The below may look like garbage. Sorry, but rustdoc is obsolete rubbish | |
| 145 | /// that doesn't directly support by-now standard-in-markdown LaTeX math. Instead it | |
| 146 | /// forces one into unreliable KaTeX autorender postprocessing andescape hell and that | |
| 147 | /// it doesn't even process correctly.** | |
| 148 | /// | |
| 149 | /// <p> | |
| 150 | /// For the objective | |
| 151 | /// $$ | |
| 152 | /// J(x) = \frac{1}{2} x^⊤Ax - g^⊤ x + λ{\vec 1}^⊤ x + c + δ_{≥ 0}(x), | |
| 153 | /// $$ | |
| 154 | /// we have the optimality condition | |
| 155 | /// $$ | |
| 156 | /// x - \mathop{\mathrm{prox}}_{τλ{\vec 1}^⊤ + δ_{≥ 0}}(x - τ[Ax-g^⊤]) = 0, | |
| 157 | /// $$ | |
| 158 | /// which we write as | |
| 159 | /// $$ | |
| 160 | /// x - [G ∘ F](x)=0 | |
| 161 | /// $$ | |
| 162 | /// for | |
| 163 | /// $$ | |
| 164 | /// G(x) = \mathop{\mathrm{prox}}_{λ{\vec 1}^⊤ + δ_{≥ 0}} | |
| 165 | /// \quad\text{and}\quad | |
| 166 | /// F(x) = x - τ Ax + τ g^⊤ | |
| 167 | /// $$ | |
| 168 | /// We can use Newton derivative chain rule to compute | |
| 169 | /// $D_N[G ∘ F](x) = D_N G(F(x)) D_N F(x)$, where | |
| 170 | /// $D_N F(x) = \mathop{\mathrm{Id}} - τ A$, | |
| 171 | /// and $[D_N G(F(x))]_i = 1$ for inactive coordinates and $=0$ for active coordinates. | |
| 172 | /// </p> | |
| 173 | /// | |
| 174 | /// <p> | |
| 175 | /// The method itself involves solving $D_N[Id - G ∘ F](x^k) s^k = - [Id - G ∘ F](x^k)$ and | |
| 176 | /// updating $x^{k+1} = x^k + s^k$. Consequently | |
| 177 | /// $$ | |
| 178 | /// s^k - D_N G(F(x^k)) [s^k - τ As^k] = - x^k + [G ∘ F](x^k) | |
| 179 | /// $$ | |
| 180 | /// For $𝒜$ the set of active coordinates and $ℐ$ the set of inactive coordinates, this | |
| 181 | /// expands as | |
| 182 | /// $$ | |
| 183 | /// [τ A_{ℐ × ℐ}]s^k_ℐ = - x^k_ℐ + [G ∘ F](x^k)_ℐ - [τ A_{ℐ × 𝒜}]s^k_𝒜 | |
| 184 | /// $$ | |
| 185 | /// and | |
| 186 | /// $$ | |
| 187 | /// s^k_𝒜 = - x^k_𝒜 + [G ∘ F](x^k)_𝒜. | |
| 188 | /// $$ | |
| 189 | /// Thus on $𝒜$ the update $[x^k + s^k]_𝒜 = [G ∘ F](x^k)_𝒜$ is just the forward-backward update. | |
| 190 | /// </p> | |
| 191 | /// | |
| 192 | /// <p> | |
| 193 | /// We need to detect stopping by a subdifferential and return $x$ satisfying $x ≥ 0$, | |
| 194 | /// which is in general not true for the SSN. We therefore use that $[G ∘ F](x^k)$ is a valid | |
| 195 | /// forward-backward step. | |
| 196 | /// </p> | |
| 197 | #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] | |
| 198 | pub fn quadratic_nonneg_ssn<F, I>( | |
| 199 | mA : &DMatrix<F::MixedType>, | |
| 200 | g : &DVector<F::MixedType>, | |
| 201 | //c_ : F, | |
| 202 | λ_ : F, | |
| 203 | x : &mut DVector<F::MixedType>, | |
| 204 | τ_ : F, | |
| 205 | iterator : I | |
| 206 | ) -> Result<usize, NumericalError> | |
| 207 | where F : Float + ToNalgebraRealField, | |
| 208 | I : AlgIteratorFactory<F> | |
| 209 | { | |
| 210 | let n = x.len(); | |
| 211 | let mut xprev = x.clone(); | |
| 212 | let mut v = DVector::zeros(n); | |
| 213 | //let c = c_.to_nalgebra_mixed(); | |
| 214 | let λ = λ_.to_nalgebra_mixed(); | |
| 215 | let τ = τ_.to_nalgebra_mixed(); | |
| 216 | let τλ = τ * λ; | |
| 217 | let mut inact : Vec<bool> = Vec::from_iter(std::iter::repeat(false).take(n)); | |
| 218 | let mut s = DVector::zeros(0); | |
| 219 | let mut decomp = nalgebra::linalg::LU::new(DMatrix::zeros(0, 0)); | |
| 220 | let mut iters = 0; | |
| 221 | ||
| 222 | let res = iterator.iterate_fallible(|state| { | |
| 223 | // 1. Perform delayed SSN-update based on previously computed step on active | |
| 224 | // coordinates. The step is delayed to the beginning of the loop because | |
| 225 | // the SSN step may violate constraints, so we arrange `x` to contain at the | |
| 226 | // end of the loop the valid FB step that forms part of the SSN step | |
| 227 | let mut si = s.iter(); | |
| 228 | for (&ast, x_i, xprev_i) in izip!(inact.iter(), x.iter_mut(), xprev.iter_mut()) { | |
| 229 | if ast { | |
| 230 | *x_i = *xprev_i + *si.next().unwrap() | |
| 231 | } | |
| 232 | *xprev_i = *x_i; | |
| 233 | } | |
| 234 | ||
| 235 | //xprev.copy_from(x); | |
| 236 | ||
| 237 | // 2. Calculate FB step. | |
| 238 | // 2.1. Replace `x` with $x⁻ - τ[Ax⁻-g]= [x⁻ + τg]- τAx⁻$ | |
| 239 | x.axpy(τ, g, 1.0); // x = x⁻ + τ*g | |
| 240 | x.sygemv(-τ, mA, &xprev, 1.0); // x = [x⁻ + τg]- τAx⁻ | |
| 241 | // 2.2. Calculate prox and set of active coordinates at the same time | |
| 242 | let mut act_changed = false; | |
| 243 | let mut n_inact = 0; | |
| 244 | for (x_i, ast) in izip!(x.iter_mut(), inact.iter_mut()) { | |
| 245 | if *x_i > τλ { | |
| 246 | *x_i -= τλ; | |
| 247 | if !*ast { | |
| 248 | act_changed = true; | |
| 249 | *ast = true; | |
| 250 | } | |
| 251 | n_inact += 1; | |
| 252 | } else { | |
| 253 | *x_i = 0.0; | |
| 254 | if *ast { | |
| 255 | act_changed = true; | |
| 256 | *ast = false; | |
| 257 | } | |
| 258 | } | |
| 259 | } | |
| 260 | ||
| 261 | // *** x now contains forward-backward step *** | |
| 262 | ||
| 263 | // 3. Solve SSN step `s`. | |
| 264 | // 3.1 Construct [τ A_{ℐ × ℐ}] if the set of inactive coordinates has changed. | |
| 265 | if act_changed { | |
| 266 | let decomp_iter = inact.iter().cartesian_product(inact.iter()).zip(mA.iter()); | |
| 267 | let decomp_constr = decomp_iter.filter_map(|((&i_inact, &j_inact), &mAij)| { | |
| 268 | //(i_inact && j_inact).then_some(mAij * τ) | |
| 269 | (i_inact && j_inact).then_some(mAij) // 🔺 below matches removal of τ | |
| 270 | }); | |
| 271 | let mat = DMatrix::from_iterator(n_inact, n_inact, decomp_constr); | |
| 272 | decomp = nalgebra::linalg::LU::new(mat); | |
| 273 | } | |
| 274 | ||
| 275 | // 3.2 Solve `s` = $s_ℐ^k$ from | |
| 276 | // $[τ A_{ℐ × ℐ}]s^k_ℐ = - x^k_ℐ + [G ∘ F](x^k)_ℐ - [τ A_{ℐ × 𝒜}]s^k_𝒜$. | |
| 277 | // With current variable setup we have $[G ∘ F](x^k) = $`x` and $x^k = x⁻$ = `xprev`, | |
| 278 | // so the system to solve is $[τ A_{ℐ × ℐ}]s^k_ℐ = (x-x⁻)_ℐ - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$ | |
| 279 | // The matrix $[τ A_{ℐ × ℐ}]$ we have already LU-decomposed above into `decomp`. | |
| 280 | s = if n_inact > 0 { | |
| 281 | // 3.2.1 Construct `rhs` = $(x-x⁻)_ℐ - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$ | |
| 282 | let inactfilt = inact.iter().copied(); | |
| 283 | let rhs_iter = izip!(x.iter(), xprev.iter(), mA.row_iter()).filter_zip(inactfilt); | |
| 284 | let rhs_constr = rhs_iter.map(|(&x_i, &xprev_i, mAi)| { | |
| 285 | // Calculate row i of [τ A_{ℐ × 𝒜}]s^k_𝒜 = [τ A_{ℐ × 𝒜}](x-xprev)_𝒜 | |
| 286 | let actfilt = inact.iter().copied().map(std::ops::Not::not); | |
| 287 | let actit = izip!(x.iter(), xprev.iter(), mAi.iter()).filter_zip(actfilt); | |
| 288 | let actpart = actit.map(|(&x_j, &xprev_j, &mAij)| { | |
| 289 | mAij * (x_j - xprev_j) | |
| 290 | }).sum(); | |
| 291 | // Subtract it from [x-prev]_i | |
| 292 | //x_i - xprev_i - τ * actpart | |
| 293 | (x_i - xprev_i) / τ - actpart // 🔺 change matches removal of τ above | |
| 294 | }); | |
| 295 | let mut rhs = DVector::from_iterator(n_inact, rhs_constr); | |
| 296 | assert_eq!(rhs.len(), n_inact); | |
| 297 | // Solve the system | |
| 298 | if !decomp.solve_mut(&mut rhs) { | |
| 299 | return Step::Failure(NumericalError( | |
| 300 | "Failed to solve linear system for subproblem SSN." | |
| 301 | )) | |
| 302 | } | |
| 303 | rhs | |
| 304 | } else { | |
| 305 | DVector::zeros(0) | |
| 306 | }; | |
| 307 | ||
| 308 | iters += 1; | |
| 309 | ||
| 310 | // 4. Report solution quality | |
| 311 | state.if_verbose(|| { | |
| 312 | // Calculate subdifferential at the FB step `x` that hasn't yet had `s` yet added. | |
| 313 | // The subdifferential of the objective is $Ax - g + λ + ∂ δ_{≥ 0}(x)$. | |
| 314 | // We return the minimal ∞-norm over all subderivatives. | |
| 315 | v.copy_from(g); // d = g | |
| 316 | mA.gemv(&mut v, 1.0, x, -1.0); // d = Ax - g | |
| 317 | let mut val = 0.0; | |
| 318 | for (&v_i, &x_i) in izip!(v.iter(), x.iter()) { | |
| 319 | let d = v_i + λ; | |
| 320 | if x_i > 0.0 || d < 0.0 { | |
| 321 | val = val.max(d.abs()); | |
| 322 | } | |
| 323 | } | |
| 324 | F::from_nalgebra_mixed(val) | |
| 325 | }) | |
| 326 | }); | |
| 327 | ||
| 328 | res.map(|_| iters) | |
| 329 | } | |
| 330 | ||
| 331 | /// This function applies an iterative method for the solution of the quadratic non-negativity | |
| 332 | /// constrained problem | |
| 333 | /// <div>$$ | |
| 334 | /// \min_{x ∈ ℝ^n} \frac{1}{2} x^⊤Ax - g^⊤ x + λ{\vec 1}^⊤ x + c + δ_{≥ 0}(x). | |
| 335 | /// $$</div> | |
| 336 | /// Semismooth Newton or forward-backward are supported based on the setting in `method`. | |
| 337 | /// The parameter `mA` is matrix $A$, and `g` and `λ` are as in the mathematical formulation. | |
| 338 | /// The constant $c$ does not need to be provided. The step length parameter is `τ` while | |
| 339 | /// `x` contains the initial iterate and on return the final one. The `iterator` controls | |
| 340 | /// stopping. The “verbose” value output by all methods is the $ℓ\_∞$ distance of some | |
| 341 | /// subdifferential of the objective to zero. | |
| 342 | /// | |
| 343 | /// Interior point methods could offer a further alternative, for example, the one in: | |
| 344 | /// | |
| 345 | /// * Valkonen T. - _A method for weighted projections to the positive definite | |
| 346 | /// cone_, <https://doi.org/10.1080/02331934.2014.929680>. | |
| 347 | /// | |
| 348 | /// This function returns the number of iterations taken. | |
| 349 | pub fn quadratic_nonneg<F, I>( | |
| 350 | method : InnerMethod, | |
| 351 | mA : &DMatrix<F::MixedType>, | |
| 352 | g : &DVector<F::MixedType>, | |
| 353 | //c_ : F, | |
| 354 | λ : F, | |
| 355 | x : &mut DVector<F::MixedType>, | |
| 356 | τ : F, | |
| 357 | iterator : I | |
| 358 | ) -> usize | |
| 359 | where F : Float + ToNalgebraRealField, | |
| 360 | I : AlgIteratorFactory<F> | |
| 361 | { | |
| 362 | ||
| 363 | match method { | |
| 364 | InnerMethod::FB => | |
| 365 | quadratic_nonneg_fb(mA, g, λ, x, τ, iterator), | |
| 366 | InnerMethod::SSN => | |
| 367 | quadratic_nonneg_ssn(mA, g, λ, x, τ, iterator).unwrap_or_else(|e| { | |
| 368 | println!("{}", format!("{e}. Using FB fallback.").red()); | |
| 369 | let ins = InnerSettings::<F>::default(); | |
| 370 | quadratic_nonneg_fb(mA, g, λ, x, τ, ins.iterator_options) | |
| 371 | }) | |
| 372 | } | |
| 373 | } |