|
1 /*! |
|
2 Iterative algorithms for solving the finite-dimensional subproblem with non-negativity constraints. |
|
3 */ |
|
4 |
|
5 use nalgebra::{DVector, DMatrix}; |
|
6 use numeric_literals::replace_float_literals; |
|
7 use itertools::{izip, Itertools}; |
|
8 use colored::Colorize; |
|
9 |
|
10 use alg_tools::iter::Mappable; |
|
11 use alg_tools::error::NumericalError; |
|
12 use alg_tools::iterate::{ |
|
13 AlgIteratorFactory, |
|
14 AlgIteratorState, |
|
15 Step, |
|
16 }; |
|
17 use alg_tools::linops::GEMV; |
|
18 use alg_tools::nalgebra_support::ToNalgebraRealField; |
|
19 |
|
20 use crate::types::*; |
|
21 use super::{ |
|
22 InnerMethod, |
|
23 InnerSettings |
|
24 }; |
|
25 |
|
26 /// Compute the proximal operator of $x \mapsto x + \delta\_{[0, \infty)}$, i.e., |
|
27 /// the non-negativity contrained soft-thresholding operator. |
|
28 #[inline] |
|
29 #[replace_float_literals(F::cast_from(literal))] |
|
30 fn nonneg_soft_thresholding<F : Float>(v : F, λ : F) -> F { |
|
31 (v - λ).max(0.0) |
|
32 } |
|
33 |
|
34 /// Returns the ∞-norm minimal subdifferential of $x ↦ x^⊤Ax - g^⊤ x + λ\vec 1^⊤ x δ_{≥ 0}(x)$ |
|
35 /// at $x$. |
|
36 /// |
|
37 /// `v` will be modified and cannot be trusted to contain useful values afterwards. |
|
38 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] |
|
39 fn min_subdifferential<F : Float + ToNalgebraRealField>( |
|
40 v : &mut DVector<F::MixedType>, |
|
41 mA : &DMatrix<F::MixedType>, |
|
42 x : &DVector<F::MixedType>, |
|
43 g : &DVector<F::MixedType>, |
|
44 λ : F::MixedType |
|
45 ) -> F { |
|
46 v.copy_from(g); |
|
47 mA.gemv(v, 1.0, x, -1.0); // v = Ax - g |
|
48 let mut val = 0.0; |
|
49 for (&v_i, &x_i) in izip!(v.iter(), x.iter()) { |
|
50 // The subdifferential of the objective is $Ax - g + λ + ∂ δ_{≥ 0}(x)$. |
|
51 let d = v_i + λ; |
|
52 if x_i > 0.0 || d < 0.0 { |
|
53 val = val.max(d.abs()); |
|
54 } |
|
55 } |
|
56 F::from_nalgebra_mixed(val) |
|
57 } |
|
58 |
|
59 /// Forward-backward splitting implementation of [`quadratic_nonneg`]. |
|
60 /// For detailed documentation of the inputs and outputs, refer to there. |
|
61 /// |
|
62 /// The `λ` component of the model is handled in the proximal step instead of the gradient step |
|
63 /// for potential performance improvements. |
|
64 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] |
|
65 pub fn quadratic_nonneg_fb<F, I>( |
|
66 mA : &DMatrix<F::MixedType>, |
|
67 g : &DVector<F::MixedType>, |
|
68 //c_ : F, |
|
69 λ_ : F, |
|
70 x : &mut DVector<F::MixedType>, |
|
71 τ_ : F, |
|
72 iterator : I |
|
73 ) -> usize |
|
74 where F : Float + ToNalgebraRealField, |
|
75 I : AlgIteratorFactory<F> |
|
76 { |
|
77 let mut xprev = x.clone(); |
|
78 //let c = c_.to_nalgebra_mixed(); |
|
79 let λ = λ_.to_nalgebra_mixed(); |
|
80 let τ = τ_.to_nalgebra_mixed(); |
|
81 let τλ = τ * λ; |
|
82 let mut v = DVector::zeros(x.len()); |
|
83 let mut iters = 0; |
|
84 |
|
85 iterator.iterate(|state| { |
|
86 // Replace `x` with $x - τ[Ax-g]= [x + τg]- τAx$ |
|
87 v.copy_from(g); // v = g |
|
88 v.axpy(1.0, x, τ); // v = x + τ*g |
|
89 v.sygemv(-τ, mA, x, 1.0); // v = [x + τg]- τAx |
|
90 let backup = state.if_verbose(|| { |
|
91 xprev.copy_from(x) |
|
92 }); |
|
93 // Calculate the proximal map |
|
94 x.iter_mut().zip(v.iter()).for_each(|(x_i, &v_i)| { |
|
95 *x_i = nonneg_soft_thresholding(v_i, τλ); |
|
96 }); |
|
97 |
|
98 iters +=1; |
|
99 |
|
100 backup.map(|_| { |
|
101 min_subdifferential(&mut v, mA, x, g, λ) |
|
102 }) |
|
103 }); |
|
104 |
|
105 iters |
|
106 } |
|
107 |
|
108 /// Semismooth Newton implementation of [`quadratic_nonneg`]. |
|
109 /// |
|
110 /// For detailed documentation of the inputs, refer to there. |
|
111 /// This function returns the number of iterations taken if there was no inversion failure, |
|
112 /// |
|
113 /// ## Method derivation |
|
114 /// |
|
115 /// **The below may look like garbage. Sorry, but rustdoc is obsolete rubbish |
|
116 /// that doesn't directly support by-now standard-in-markdown LaTeX math. Instead it |
|
117 /// forces one into unreliable KaTeX autorender postprocessing andescape hell and that |
|
118 /// it doesn't even process correctly.** |
|
119 /// |
|
120 /// <p> |
|
121 /// For the objective |
|
122 /// $$ |
|
123 /// J(x) = \frac{1}{2} x^⊤Ax - g^⊤ x + λ{\vec 1}^⊤ x + c + δ_{≥ 0}(x), |
|
124 /// $$ |
|
125 /// we have the optimality condition |
|
126 /// $$ |
|
127 /// x - \mathop{\mathrm{prox}}_{τλ{\vec 1}^⊤ + δ_{≥ 0}}(x - τ[Ax-g^⊤]) = 0, |
|
128 /// $$ |
|
129 /// which we write as |
|
130 /// $$ |
|
131 /// x - [G ∘ F](x)=0 |
|
132 /// $$ |
|
133 /// for |
|
134 /// $$ |
|
135 /// G(x) = \mathop{\mathrm{prox}}_{λ{\vec 1}^⊤ + δ_{≥ 0}} |
|
136 /// \quad\text{and}\quad |
|
137 /// F(x) = x - τ Ax + τ g^⊤ |
|
138 /// $$ |
|
139 /// We can use Newton derivative chain rule to compute |
|
140 /// $D_N[G ∘ F](x) = D_N G(F(x)) D_N F(x)$, where |
|
141 /// $D_N F(x) = \mathop{\mathrm{Id}} - τ A$, |
|
142 /// and $[D_N G(F(x))]_i = 1$ for inactive coordinates and $=0$ for active coordinates. |
|
143 /// </p> |
|
144 /// |
|
145 /// <p> |
|
146 /// The method itself involves solving $D_N[Id - G ∘ F](x^k) s^k = - [Id - G ∘ F](x^k)$ and |
|
147 /// updating $x^{k+1} = x^k + s^k$. Consequently |
|
148 /// $$ |
|
149 /// s^k - D_N G(F(x^k)) [s^k - τ As^k] = - x^k + [G ∘ F](x^k) |
|
150 /// $$ |
|
151 /// For $𝒜$ the set of active coordinates and $ℐ$ the set of inactive coordinates, this |
|
152 /// expands as |
|
153 /// $$ |
|
154 /// [τ A_{ℐ × ℐ}]s^k_ℐ = - x^k_ℐ + [G ∘ F](x^k)_ℐ - [τ A_{ℐ × 𝒜}]s^k_𝒜 |
|
155 /// $$ |
|
156 /// and |
|
157 /// $$ |
|
158 /// s^k_𝒜 = - x^k_𝒜 + [G ∘ F](x^k)_𝒜. |
|
159 /// $$ |
|
160 /// Thus on $𝒜$ the update $[x^k + s^k]_𝒜 = [G ∘ F](x^k)_𝒜$ is just the forward-backward update. |
|
161 /// </p> |
|
162 /// |
|
163 /// <p> |
|
164 /// We need to detect stopping by a subdifferential and return $x$ satisfying $x ≥ 0$, |
|
165 /// which is in general not true for the SSN. We therefore use that $[G ∘ F](x^k)$ is a valid |
|
166 /// forward-backward step. |
|
167 /// </p> |
|
168 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] |
|
169 pub fn quadratic_nonneg_ssn<F, I>( |
|
170 mA : &DMatrix<F::MixedType>, |
|
171 g : &DVector<F::MixedType>, |
|
172 //c_ : F, |
|
173 λ_ : F, |
|
174 x : &mut DVector<F::MixedType>, |
|
175 τ_ : F, |
|
176 iterator : I |
|
177 ) -> Result<usize, NumericalError> |
|
178 where F : Float + ToNalgebraRealField, |
|
179 I : AlgIteratorFactory<F> |
|
180 { |
|
181 let n = x.len(); |
|
182 let mut xprev = x.clone(); |
|
183 let mut v = DVector::zeros(n); |
|
184 //let c = c_.to_nalgebra_mixed(); |
|
185 let λ = λ_.to_nalgebra_mixed(); |
|
186 let τ = τ_.to_nalgebra_mixed(); |
|
187 let τλ = τ * λ; |
|
188 let mut inact : Vec<bool> = Vec::from_iter(std::iter::repeat(false).take(n)); |
|
189 let mut s = DVector::zeros(0); |
|
190 let mut decomp = nalgebra::linalg::LU::new(DMatrix::zeros(0, 0)); |
|
191 let mut iters = 0; |
|
192 |
|
193 let res = iterator.iterate_fallible(|state| { |
|
194 // 1. Perform delayed SSN-update based on previously computed step on active |
|
195 // coordinates. The step is delayed to the beginning of the loop because |
|
196 // the SSN step may violate constraints, so we arrange `x` to contain at the |
|
197 // end of the loop the valid FB step that forms part of the SSN step |
|
198 let mut si = s.iter(); |
|
199 for (&ast, x_i, xprev_i) in izip!(inact.iter(), x.iter_mut(), xprev.iter_mut()) { |
|
200 if ast { |
|
201 *x_i = *xprev_i + *si.next().unwrap() |
|
202 } |
|
203 *xprev_i = *x_i; |
|
204 } |
|
205 |
|
206 //xprev.copy_from(x); |
|
207 |
|
208 // 2. Calculate FB step. |
|
209 // 2.1. Replace `x` with $x⁻ - τ[Ax⁻-g]= [x⁻ + τg]- τAx⁻$ |
|
210 x.axpy(τ, g, 1.0); // x = x⁻ + τ*g |
|
211 x.sygemv(-τ, mA, &xprev, 1.0); // x = [x⁻ + τg]- τAx⁻ |
|
212 // 2.2. Calculate prox and set of active coordinates at the same time |
|
213 let mut act_changed = false; |
|
214 let mut n_inact = 0; |
|
215 for (x_i, ast) in izip!(x.iter_mut(), inact.iter_mut()) { |
|
216 if *x_i > τλ { |
|
217 *x_i -= τλ; |
|
218 if !*ast { |
|
219 act_changed = true; |
|
220 *ast = true; |
|
221 } |
|
222 n_inact += 1; |
|
223 } else { |
|
224 *x_i = 0.0; |
|
225 if *ast { |
|
226 act_changed = true; |
|
227 *ast = false; |
|
228 } |
|
229 } |
|
230 } |
|
231 |
|
232 // *** x now contains forward-backward step *** |
|
233 |
|
234 // 3. Solve SSN step `s`. |
|
235 // 3.1 Construct [τ A_{ℐ × ℐ}] if the set of inactive coordinates has changed. |
|
236 if act_changed { |
|
237 let decomp_iter = inact.iter().cartesian_product(inact.iter()).zip(mA.iter()); |
|
238 let decomp_constr = decomp_iter.filter_map(|((&i_inact, &j_inact), &mAij)| { |
|
239 //(i_inact && j_inact).then_some(mAij * τ) |
|
240 (i_inact && j_inact).then_some(mAij) // 🔺 below matches removal of τ |
|
241 }); |
|
242 let mat = DMatrix::from_iterator(n_inact, n_inact, decomp_constr); |
|
243 decomp = nalgebra::linalg::LU::new(mat); |
|
244 } |
|
245 |
|
246 // 3.2 Solve `s` = $s_ℐ^k$ from |
|
247 // $[τ A_{ℐ × ℐ}]s^k_ℐ = - x^k_ℐ + [G ∘ F](x^k)_ℐ - [τ A_{ℐ × 𝒜}]s^k_𝒜$. |
|
248 // With current variable setup we have $[G ∘ F](x^k) = $`x` and $x^k = x⁻$ = `xprev`, |
|
249 // so the system to solve is $[τ A_{ℐ × ℐ}]s^k_ℐ = (x-x⁻)_ℐ - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$ |
|
250 // The matrix $[τ A_{ℐ × ℐ}]$ we have already LU-decomposed above into `decomp`. |
|
251 s = if n_inact > 0 { |
|
252 // 3.2.1 Construct `rhs` = $(x-x⁻)_ℐ - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$ |
|
253 let inactfilt = inact.iter().copied(); |
|
254 let rhs_iter = izip!(x.iter(), xprev.iter(), mA.row_iter()).filter_zip(inactfilt); |
|
255 let rhs_constr = rhs_iter.map(|(&x_i, &xprev_i, mAi)| { |
|
256 // Calculate row i of [τ A_{ℐ × 𝒜}]s^k_𝒜 = [τ A_{ℐ × 𝒜}](x-xprev)_𝒜 |
|
257 let actfilt = inact.iter().copied().map(std::ops::Not::not); |
|
258 let actit = izip!(x.iter(), xprev.iter(), mAi.iter()).filter_zip(actfilt); |
|
259 let actpart = actit.map(|(&x_j, &xprev_j, &mAij)| { |
|
260 mAij * (x_j - xprev_j) |
|
261 }).sum(); |
|
262 // Subtract it from [x-prev]_i |
|
263 //x_i - xprev_i - τ * actpart |
|
264 (x_i - xprev_i) / τ - actpart // 🔺 change matches removal of τ above |
|
265 }); |
|
266 let mut rhs = DVector::from_iterator(n_inact, rhs_constr); |
|
267 assert_eq!(rhs.len(), n_inact); |
|
268 // Solve the system |
|
269 if !decomp.solve_mut(&mut rhs) { |
|
270 return Step::Failure(NumericalError( |
|
271 "Failed to solve linear system for subproblem SSN." |
|
272 )) |
|
273 } |
|
274 rhs |
|
275 } else { |
|
276 DVector::zeros(0) |
|
277 }; |
|
278 |
|
279 iters += 1; |
|
280 |
|
281 // 4. Report solution quality |
|
282 state.if_verbose(|| { |
|
283 // Calculate subdifferential at the FB step `x` that hasn't yet had `s` yet added. |
|
284 min_subdifferential(&mut v, mA, x, g, λ) |
|
285 }) |
|
286 }); |
|
287 |
|
288 res.map(|_| iters) |
|
289 } |
|
290 |
|
291 /// This function applies an iterative method for the solution of the quadratic non-negativity |
|
292 /// constrained problem |
|
293 /// <div>$$ |
|
294 /// \min_{x ∈ ℝ^n} \frac{1}{2} x^⊤Ax - g^⊤ x + λ{\vec 1}^⊤ x + c + δ_{≥ 0}(x). |
|
295 /// $$</div> |
|
296 /// Semismooth Newton or forward-backward are supported based on the setting in `method`. |
|
297 /// The parameter `mA` is matrix $A$, and `g` and `λ` are as in the mathematical formulation. |
|
298 /// The constant $c$ does not need to be provided. The step length parameter is `τ` while |
|
299 /// `x` contains the initial iterate and on return the final one. The `iterator` controls |
|
300 /// stopping. The “verbose” value output by all methods is the $ℓ\_∞$ distance of some |
|
301 /// subdifferential of the objective to zero. |
|
302 /// |
|
303 /// Interior point methods could offer a further alternative, for example, the one in: |
|
304 /// |
|
305 /// * Valkonen T. - _A method for weighted projections to the positive definite |
|
306 /// cone_, <https://doi.org/10.1080/02331934.2014.929680>. |
|
307 /// |
|
308 /// This function returns the number of iterations taken. |
|
309 pub fn quadratic_nonneg<F, I>( |
|
310 method : InnerMethod, |
|
311 mA : &DMatrix<F::MixedType>, |
|
312 g : &DVector<F::MixedType>, |
|
313 //c_ : F, |
|
314 λ : F, |
|
315 x : &mut DVector<F::MixedType>, |
|
316 τ : F, |
|
317 iterator : I |
|
318 ) -> usize |
|
319 where F : Float + ToNalgebraRealField, |
|
320 I : AlgIteratorFactory<F> |
|
321 { |
|
322 |
|
323 match method { |
|
324 InnerMethod::FB => |
|
325 quadratic_nonneg_fb(mA, g, λ, x, τ, iterator), |
|
326 InnerMethod::SSN => |
|
327 quadratic_nonneg_ssn(mA, g, λ, x, τ, iterator).unwrap_or_else(|e| { |
|
328 println!("{}", format!("{e}. Using FB fallback.").red()); |
|
329 let ins = InnerSettings::<F>::default(); |
|
330 quadratic_nonneg_fb(mA, g, λ, x, τ, ins.iterator_options) |
|
331 }) |
|
332 } |
|
333 } |