Fri, 07 Oct 2022 13:11:08 +0300
Taylor 2 model attempts
0 | 1 | //! Iterative algorithms for solving finite-dimensional subproblems. |
2 | ||
3 | use serde::{Serialize, Deserialize}; | |
4 | use nalgebra::{DVector, DMatrix}; | |
5 | use numeric_literals::replace_float_literals; | |
6 | use itertools::{izip, Itertools}; | |
7 | use colored::Colorize; | |
8 | ||
9 | use alg_tools::iter::Mappable; | |
10 | use alg_tools::error::NumericalError; | |
11 | use alg_tools::iterate::{ | |
12 | AlgIteratorFactory, | |
13 | AlgIteratorState, | |
14 | AlgIteratorOptions, | |
15 | Verbose, | |
16 | Step, | |
17 | }; | |
18 | use alg_tools::linops::GEMV; | |
19 | use alg_tools::nalgebra_support::ToNalgebraRealField; | |
20 | ||
21 | use crate::types::*; | |
22 | ||
23 | /// Method for solving finite-dimensional subproblems | |
24 | #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] | |
25 | #[allow(dead_code)] | |
26 | pub enum InnerMethod { | |
27 | /// Forward-backward | |
28 | FB, | |
29 | /// Semismooth Newton | |
30 | SSN, | |
31 | } | |
32 | ||
33 | /// Settings for the solution of finite-dimensional subproblems | |
34 | #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] | |
35 | pub struct InnerSettings<F : Float> { | |
36 | /// Method | |
37 | pub method : InnerMethod, | |
38 | /// Proportional step length (∈ [0, 1) for `InnerMethod::FB`). | |
39 | pub τ0 : F, | |
40 | /// Fraction of `tolerance` given to inner algorithm | |
41 | pub tolerance_mult : F, | |
42 | /// Iterator options | |
43 | #[serde(flatten)] | |
44 | pub iterator_options : AlgIteratorOptions, | |
45 | } | |
46 | ||
47 | #[replace_float_literals(F::cast_from(literal))] | |
48 | impl<F : Float> Default for InnerSettings<F> { | |
49 | fn default() -> Self { | |
50 | InnerSettings { | |
51 | τ0 : 0.99, | |
52 | iterator_options : AlgIteratorOptions { | |
53 | // max_iter cannot be very small, as initially FB needs many iterations, although | |
54 | // on later invocations even one or two tends to be enough | |
55 | max_iter : 2000, | |
56 | // verbose_iter affects testing of sufficient convergence, so we set it to | |
57 | // a small value… | |
58 | verbose_iter : Verbose::Every(1), | |
59 | // … but don't print out anything | |
60 | quiet : true, | |
61 | .. Default::default() | |
62 | }, | |
63 | method : InnerMethod::FB, | |
64 | tolerance_mult : 0.01, | |
65 | } | |
66 | } | |
67 | } | |
68 | ||
69 | /// Compute the proximal operator of $x \mapsto x + \delta\_{[0, \infty)}$, i.e., | |
70 | /// the non-negativity contrained soft-thresholding operator. | |
71 | #[inline] | |
72 | #[replace_float_literals(F::cast_from(literal))] | |
73 | fn nonneg_soft_thresholding<F : Float>(v : F, λ : F) -> F { | |
74 | (v - λ).max(0.0) | |
75 | } | |
76 | ||
77 | /// Forward-backward splitting implementation of [`quadratic_nonneg`]. | |
78 | /// For detailed documentation of the inputs and outputs, refer to there. | |
79 | /// | |
80 | /// The `λ` component of the model is handled in the proximal step instead of the gradient step | |
81 | /// for potential performance improvements. | |
82 | #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] | |
83 | pub fn quadratic_nonneg_fb<F, I>( | |
84 | mA : &DMatrix<F::MixedType>, | |
85 | g : &DVector<F::MixedType>, | |
86 | //c_ : F, | |
87 | λ_ : F, | |
88 | x : &mut DVector<F::MixedType>, | |
89 | τ_ : F, | |
90 | iterator : I | |
91 | ) -> usize | |
92 | where F : Float + ToNalgebraRealField, | |
93 | I : AlgIteratorFactory<F> | |
94 | { | |
95 | let mut xprev = x.clone(); | |
96 | //let c = c_.to_nalgebra_mixed(); | |
97 | let λ = λ_.to_nalgebra_mixed(); | |
98 | let τ = τ_.to_nalgebra_mixed(); | |
99 | let τλ = τ * λ; | |
100 | let mut v = DVector::zeros(x.len()); | |
101 | let mut iters = 0; | |
102 | ||
103 | iterator.iterate(|state| { | |
104 | // Replace `x` with $x - τ[Ax-g]= [x + τg]- τAx$ | |
105 | v.copy_from(g); // v = g | |
106 | v.axpy(1.0, x, τ); // v = x + τ*g | |
107 | v.sygemv(-τ, mA, x, 1.0); // v = [x + τg]- τAx | |
108 | let backup = state.if_verbose(|| { | |
109 | xprev.copy_from(x) | |
110 | }); | |
111 | // Calculate the proximal map | |
112 | x.iter_mut().zip(v.iter()).for_each(|(x_i, &v_i)| { | |
113 | *x_i = nonneg_soft_thresholding(v_i, τλ); | |
114 | }); | |
115 | ||
116 | iters +=1; | |
117 | ||
118 | backup.map(|_| { | |
119 | // The subdifferential of the objective is $Ax - g + λ + ∂ δ_{≥ 0}(x)$. | |
120 | // We return the minimal ∞-norm over all subderivatives. | |
121 | v.copy_from(g); // d = g | |
122 | mA.gemv(&mut v, 1.0, x, -1.0); // d = Ax - g | |
123 | let mut val = 0.0; | |
124 | for (&v_i, &x_i) in izip!(v.iter(), x.iter()) { | |
125 | let d = v_i + λ; | |
126 | if x_i > 0.0 || d < 0.0 { | |
127 | val = val.max(d.abs()); | |
128 | } | |
129 | } | |
130 | F::from_nalgebra_mixed(val) | |
131 | }) | |
132 | }); | |
133 | ||
134 | iters | |
135 | } | |
136 | ||
137 | /// Semismooth Newton implementation of [`quadratic_nonneg`]. | |
138 | /// | |
139 | /// For detailed documentation of the inputs, refer to there. | |
140 | /// This function returns the number of iterations taken if there was no inversion failure, | |
141 | /// | |
142 | /// ## Method derivation | |
143 | /// | |
144 | /// **The below may look like garbage. Sorry, but rustdoc is obsolete rubbish | |
145 | /// that doesn't directly support by-now standard-in-markdown LaTeX math. Instead it | |
146 | /// forces one into unreliable KaTeX autorender postprocessing andescape hell and that | |
147 | /// it doesn't even process correctly.** | |
148 | /// | |
149 | /// <p> | |
150 | /// For the objective | |
151 | /// $$ | |
152 | /// J(x) = \frac{1}{2} x^⊤Ax - g^⊤ x + λ{\vec 1}^⊤ x + c + δ_{≥ 0}(x), | |
153 | /// $$ | |
154 | /// we have the optimality condition | |
155 | /// $$ | |
156 | /// x - \mathop{\mathrm{prox}}_{τλ{\vec 1}^⊤ + δ_{≥ 0}}(x - τ[Ax-g^⊤]) = 0, | |
157 | /// $$ | |
158 | /// which we write as | |
159 | /// $$ | |
160 | /// x - [G ∘ F](x)=0 | |
161 | /// $$ | |
162 | /// for | |
163 | /// $$ | |
164 | /// G(x) = \mathop{\mathrm{prox}}_{λ{\vec 1}^⊤ + δ_{≥ 0}} | |
165 | /// \quad\text{and}\quad | |
166 | /// F(x) = x - τ Ax + τ g^⊤ | |
167 | /// $$ | |
168 | /// We can use Newton derivative chain rule to compute | |
169 | /// $D_N[G ∘ F](x) = D_N G(F(x)) D_N F(x)$, where | |
170 | /// $D_N F(x) = \mathop{\mathrm{Id}} - τ A$, | |
171 | /// and $[D_N G(F(x))]_i = 1$ for inactive coordinates and $=0$ for active coordinates. | |
172 | /// </p> | |
173 | /// | |
174 | /// <p> | |
175 | /// The method itself involves solving $D_N[Id - G ∘ F](x^k) s^k = - [Id - G ∘ F](x^k)$ and | |
176 | /// updating $x^{k+1} = x^k + s^k$. Consequently | |
177 | /// $$ | |
178 | /// s^k - D_N G(F(x^k)) [s^k - τ As^k] = - x^k + [G ∘ F](x^k) | |
179 | /// $$ | |
180 | /// For $𝒜$ the set of active coordinates and $ℐ$ the set of inactive coordinates, this | |
181 | /// expands as | |
182 | /// $$ | |
183 | /// [τ A_{ℐ × ℐ}]s^k_ℐ = - x^k_ℐ + [G ∘ F](x^k)_ℐ - [τ A_{ℐ × 𝒜}]s^k_𝒜 | |
184 | /// $$ | |
185 | /// and | |
186 | /// $$ | |
187 | /// s^k_𝒜 = - x^k_𝒜 + [G ∘ F](x^k)_𝒜. | |
188 | /// $$ | |
189 | /// Thus on $𝒜$ the update $[x^k + s^k]_𝒜 = [G ∘ F](x^k)_𝒜$ is just the forward-backward update. | |
190 | /// </p> | |
191 | /// | |
192 | /// <p> | |
193 | /// We need to detect stopping by a subdifferential and return $x$ satisfying $x ≥ 0$, | |
194 | /// which is in general not true for the SSN. We therefore use that $[G ∘ F](x^k)$ is a valid | |
195 | /// forward-backward step. | |
196 | /// </p> | |
197 | #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] | |
198 | pub fn quadratic_nonneg_ssn<F, I>( | |
199 | mA : &DMatrix<F::MixedType>, | |
200 | g : &DVector<F::MixedType>, | |
201 | //c_ : F, | |
202 | λ_ : F, | |
203 | x : &mut DVector<F::MixedType>, | |
204 | τ_ : F, | |
205 | iterator : I | |
206 | ) -> Result<usize, NumericalError> | |
207 | where F : Float + ToNalgebraRealField, | |
208 | I : AlgIteratorFactory<F> | |
209 | { | |
210 | let n = x.len(); | |
211 | let mut xprev = x.clone(); | |
212 | let mut v = DVector::zeros(n); | |
213 | //let c = c_.to_nalgebra_mixed(); | |
214 | let λ = λ_.to_nalgebra_mixed(); | |
215 | let τ = τ_.to_nalgebra_mixed(); | |
216 | let τλ = τ * λ; | |
217 | let mut inact : Vec<bool> = Vec::from_iter(std::iter::repeat(false).take(n)); | |
218 | let mut s = DVector::zeros(0); | |
219 | let mut decomp = nalgebra::linalg::LU::new(DMatrix::zeros(0, 0)); | |
220 | let mut iters = 0; | |
221 | ||
222 | let res = iterator.iterate_fallible(|state| { | |
223 | // 1. Perform delayed SSN-update based on previously computed step on active | |
224 | // coordinates. The step is delayed to the beginning of the loop because | |
225 | // the SSN step may violate constraints, so we arrange `x` to contain at the | |
226 | // end of the loop the valid FB step that forms part of the SSN step | |
227 | let mut si = s.iter(); | |
228 | for (&ast, x_i, xprev_i) in izip!(inact.iter(), x.iter_mut(), xprev.iter_mut()) { | |
229 | if ast { | |
230 | *x_i = *xprev_i + *si.next().unwrap() | |
231 | } | |
232 | *xprev_i = *x_i; | |
233 | } | |
234 | ||
235 | //xprev.copy_from(x); | |
236 | ||
237 | // 2. Calculate FB step. | |
238 | // 2.1. Replace `x` with $x⁻ - τ[Ax⁻-g]= [x⁻ + τg]- τAx⁻$ | |
239 | x.axpy(τ, g, 1.0); // x = x⁻ + τ*g | |
240 | x.sygemv(-τ, mA, &xprev, 1.0); // x = [x⁻ + τg]- τAx⁻ | |
241 | // 2.2. Calculate prox and set of active coordinates at the same time | |
242 | let mut act_changed = false; | |
243 | let mut n_inact = 0; | |
244 | for (x_i, ast) in izip!(x.iter_mut(), inact.iter_mut()) { | |
245 | if *x_i > τλ { | |
246 | *x_i -= τλ; | |
247 | if !*ast { | |
248 | act_changed = true; | |
249 | *ast = true; | |
250 | } | |
251 | n_inact += 1; | |
252 | } else { | |
253 | *x_i = 0.0; | |
254 | if *ast { | |
255 | act_changed = true; | |
256 | *ast = false; | |
257 | } | |
258 | } | |
259 | } | |
260 | ||
261 | // *** x now contains forward-backward step *** | |
262 | ||
263 | // 3. Solve SSN step `s`. | |
264 | // 3.1 Construct [τ A_{ℐ × ℐ}] if the set of inactive coordinates has changed. | |
265 | if act_changed { | |
266 | let decomp_iter = inact.iter().cartesian_product(inact.iter()).zip(mA.iter()); | |
267 | let decomp_constr = decomp_iter.filter_map(|((&i_inact, &j_inact), &mAij)| { | |
268 | //(i_inact && j_inact).then_some(mAij * τ) | |
269 | (i_inact && j_inact).then_some(mAij) // 🔺 below matches removal of τ | |
270 | }); | |
271 | let mat = DMatrix::from_iterator(n_inact, n_inact, decomp_constr); | |
272 | decomp = nalgebra::linalg::LU::new(mat); | |
273 | } | |
274 | ||
275 | // 3.2 Solve `s` = $s_ℐ^k$ from | |
276 | // $[τ A_{ℐ × ℐ}]s^k_ℐ = - x^k_ℐ + [G ∘ F](x^k)_ℐ - [τ A_{ℐ × 𝒜}]s^k_𝒜$. | |
277 | // With current variable setup we have $[G ∘ F](x^k) = $`x` and $x^k = x⁻$ = `xprev`, | |
278 | // so the system to solve is $[τ A_{ℐ × ℐ}]s^k_ℐ = (x-x⁻)_ℐ - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$ | |
279 | // The matrix $[τ A_{ℐ × ℐ}]$ we have already LU-decomposed above into `decomp`. | |
280 | s = if n_inact > 0 { | |
281 | // 3.2.1 Construct `rhs` = $(x-x⁻)_ℐ - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$ | |
282 | let inactfilt = inact.iter().copied(); | |
283 | let rhs_iter = izip!(x.iter(), xprev.iter(), mA.row_iter()).filter_zip(inactfilt); | |
284 | let rhs_constr = rhs_iter.map(|(&x_i, &xprev_i, mAi)| { | |
285 | // Calculate row i of [τ A_{ℐ × 𝒜}]s^k_𝒜 = [τ A_{ℐ × 𝒜}](x-xprev)_𝒜 | |
286 | let actfilt = inact.iter().copied().map(std::ops::Not::not); | |
287 | let actit = izip!(x.iter(), xprev.iter(), mAi.iter()).filter_zip(actfilt); | |
288 | let actpart = actit.map(|(&x_j, &xprev_j, &mAij)| { | |
289 | mAij * (x_j - xprev_j) | |
290 | }).sum(); | |
291 | // Subtract it from [x-prev]_i | |
292 | //x_i - xprev_i - τ * actpart | |
293 | (x_i - xprev_i) / τ - actpart // 🔺 change matches removal of τ above | |
294 | }); | |
295 | let mut rhs = DVector::from_iterator(n_inact, rhs_constr); | |
296 | assert_eq!(rhs.len(), n_inact); | |
297 | // Solve the system | |
298 | if !decomp.solve_mut(&mut rhs) { | |
299 | return Step::Failure(NumericalError( | |
300 | "Failed to solve linear system for subproblem SSN." | |
301 | )) | |
302 | } | |
303 | rhs | |
304 | } else { | |
305 | DVector::zeros(0) | |
306 | }; | |
307 | ||
308 | iters += 1; | |
309 | ||
310 | // 4. Report solution quality | |
311 | state.if_verbose(|| { | |
312 | // Calculate subdifferential at the FB step `x` that hasn't yet had `s` yet added. | |
313 | // The subdifferential of the objective is $Ax - g + λ + ∂ δ_{≥ 0}(x)$. | |
314 | // We return the minimal ∞-norm over all subderivatives. | |
315 | v.copy_from(g); // d = g | |
316 | mA.gemv(&mut v, 1.0, x, -1.0); // d = Ax - g | |
317 | let mut val = 0.0; | |
318 | for (&v_i, &x_i) in izip!(v.iter(), x.iter()) { | |
319 | let d = v_i + λ; | |
320 | if x_i > 0.0 || d < 0.0 { | |
321 | val = val.max(d.abs()); | |
322 | } | |
323 | } | |
324 | F::from_nalgebra_mixed(val) | |
325 | }) | |
326 | }); | |
327 | ||
328 | res.map(|_| iters) | |
329 | } | |
330 | ||
331 | /// This function applies an iterative method for the solution of the quadratic non-negativity | |
332 | /// constrained problem | |
333 | /// <div>$$ | |
334 | /// \min_{x ∈ ℝ^n} \frac{1}{2} x^⊤Ax - g^⊤ x + λ{\vec 1}^⊤ x + c + δ_{≥ 0}(x). | |
335 | /// $$</div> | |
336 | /// Semismooth Newton or forward-backward are supported based on the setting in `method`. | |
337 | /// The parameter `mA` is matrix $A$, and `g` and `λ` are as in the mathematical formulation. | |
338 | /// The constant $c$ does not need to be provided. The step length parameter is `τ` while | |
339 | /// `x` contains the initial iterate and on return the final one. The `iterator` controls | |
340 | /// stopping. The “verbose” value output by all methods is the $ℓ\_∞$ distance of some | |
341 | /// subdifferential of the objective to zero. | |
342 | /// | |
343 | /// Interior point methods could offer a further alternative, for example, the one in: | |
344 | /// | |
345 | /// * Valkonen T. - _A method for weighted projections to the positive definite | |
346 | /// cone_, <https://doi.org/10.1080/02331934.2014.929680>. | |
347 | /// | |
348 | /// This function returns the number of iterations taken. | |
349 | pub fn quadratic_nonneg<F, I>( | |
350 | method : InnerMethod, | |
351 | mA : &DMatrix<F::MixedType>, | |
352 | g : &DVector<F::MixedType>, | |
353 | //c_ : F, | |
354 | λ : F, | |
355 | x : &mut DVector<F::MixedType>, | |
356 | τ : F, | |
357 | iterator : I | |
358 | ) -> usize | |
359 | where F : Float + ToNalgebraRealField, | |
360 | I : AlgIteratorFactory<F> | |
361 | { | |
362 | ||
363 | match method { | |
364 | InnerMethod::FB => | |
365 | quadratic_nonneg_fb(mA, g, λ, x, τ, iterator), | |
366 | InnerMethod::SSN => | |
367 | quadratic_nonneg_ssn(mA, g, λ, x, τ, iterator).unwrap_or_else(|e| { | |
368 | println!("{}", format!("{e}. Using FB fallback.").red()); | |
369 | let ins = InnerSettings::<F>::default(); | |
370 | quadratic_nonneg_fb(mA, g, λ, x, τ, ins.iterator_options) | |
371 | }) | |
372 | } | |
373 | } |