src/subproblem.rs

changeset 24
d29d1fcf5423
parent 0
eb3c7813b67a
equal deleted inserted replaced
23:9869fa1e0ccd 24:d29d1fcf5423
1 //! Iterative algorithms for solving finite-dimensional subproblems. 1 /*!
2 Iterative algorithms for solving the finite-dimensional subproblem.
3 */
2 4
3 use serde::{Serialize, Deserialize}; 5 use serde::{Serialize, Deserialize};
4 use nalgebra::{DVector, DMatrix};
5 use numeric_literals::replace_float_literals; 6 use numeric_literals::replace_float_literals;
6 use itertools::{izip, Itertools};
7 use colored::Colorize;
8
9 use alg_tools::iter::Mappable;
10 use alg_tools::error::NumericalError;
11 use alg_tools::iterate::{ 7 use alg_tools::iterate::{
12 AlgIteratorFactory,
13 AlgIteratorState,
14 AlgIteratorOptions, 8 AlgIteratorOptions,
15 Verbose, 9 Verbose,
16 Step, 10
17 }; 11 };
18 use alg_tools::linops::GEMV;
19 use alg_tools::nalgebra_support::ToNalgebraRealField;
20 12
21 use crate::types::*; 13 use crate::types::*;
14
15 pub mod nonneg;
16 pub mod unconstrained;
17
18 #[deprecated(since = "1.0.1", note = "Moved to submodule nonneg")]
19 pub use nonneg::{
20 quadratic_nonneg,
21 quadratic_nonneg_ssn,
22 quadratic_nonneg_fb
23 };
22 24
23 /// Method for solving finite-dimensional subproblems 25 /// Method for solving finite-dimensional subproblems
24 #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] 26 #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)]
25 #[allow(dead_code)] 27 #[allow(dead_code)]
26 pub enum InnerMethod { 28 pub enum InnerMethod {
63 method : InnerMethod::FB, 65 method : InnerMethod::FB,
64 tolerance_mult : 0.01, 66 tolerance_mult : 0.01,
65 } 67 }
66 } 68 }
67 } 69 }
68
69 /// Compute the proximal operator of $x \mapsto x + \delta\_{[0, \infty)}$, i.e.,
70 /// the non-negativity contrained soft-thresholding operator.
71 #[inline]
72 #[replace_float_literals(F::cast_from(literal))]
73 fn nonneg_soft_thresholding<F : Float>(v : F, λ : F) -> F {
74 (v - λ).max(0.0)
75 }
76
77 /// Forward-backward splitting implementation of [`quadratic_nonneg`].
78 /// For detailed documentation of the inputs and outputs, refer to there.
79 ///
80 /// The `λ` component of the model is handled in the proximal step instead of the gradient step
81 /// for potential performance improvements.
82 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
83 pub fn quadratic_nonneg_fb<F, I>(
84 mA : &DMatrix<F::MixedType>,
85 g : &DVector<F::MixedType>,
86 //c_ : F,
87 λ_ : F,
88 x : &mut DVector<F::MixedType>,
89 τ_ : F,
90 iterator : I
91 ) -> usize
92 where F : Float + ToNalgebraRealField,
93 I : AlgIteratorFactory<F>
94 {
95 let mut xprev = x.clone();
96 //let c = c_.to_nalgebra_mixed();
97 let λ = λ_.to_nalgebra_mixed();
98 let τ = τ_.to_nalgebra_mixed();
99 let τλ = τ * λ;
100 let mut v = DVector::zeros(x.len());
101 let mut iters = 0;
102
103 iterator.iterate(|state| {
104 // Replace `x` with $x - τ[Ax-g]= [x + τg]- τAx$
105 v.copy_from(g); // v = g
106 v.axpy(1.0, x, τ); // v = x + τ*g
107 v.sygemv(-τ, mA, x, 1.0); // v = [x + τg]- τAx
108 let backup = state.if_verbose(|| {
109 xprev.copy_from(x)
110 });
111 // Calculate the proximal map
112 x.iter_mut().zip(v.iter()).for_each(|(x_i, &v_i)| {
113 *x_i = nonneg_soft_thresholding(v_i, τλ);
114 });
115
116 iters +=1;
117
118 backup.map(|_| {
119 // The subdifferential of the objective is $Ax - g + λ + ∂ δ_{≥ 0}(x)$.
120 // We return the minimal ∞-norm over all subderivatives.
121 v.copy_from(g); // d = g
122 mA.gemv(&mut v, 1.0, x, -1.0); // d = Ax - g
123 let mut val = 0.0;
124 for (&v_i, &x_i) in izip!(v.iter(), x.iter()) {
125 let d = v_i + λ;
126 if x_i > 0.0 || d < 0.0 {
127 val = val.max(d.abs());
128 }
129 }
130 F::from_nalgebra_mixed(val)
131 })
132 });
133
134 iters
135 }
136
137 /// Semismooth Newton implementation of [`quadratic_nonneg`].
138 ///
139 /// For detailed documentation of the inputs, refer to there.
140 /// This function returns the number of iterations taken if there was no inversion failure,
141 ///
142 /// ## Method derivation
143 ///
144 /// **The below may look like garbage. Sorry, but rustdoc is obsolete rubbish
145 /// that doesn't directly support by-now standard-in-markdown LaTeX math. Instead it
146 /// forces one into unreliable KaTeX autorender postprocessing andescape hell and that
147 /// it doesn't even process correctly.**
148 ///
149 /// <p>
150 /// For the objective
151 /// $$
152 /// J(x) = \frac{1}{2} x^⊤Ax - g^⊤ x + λ{\vec 1}^⊤ x + c + δ_{≥ 0}(x),
153 /// $$
154 /// we have the optimality condition
155 /// $$
156 /// x - \mathop{\mathrm{prox}}_{τλ{\vec 1}^⊤ + δ_{≥ 0}}(x - τ[Ax-g^⊤]) = 0,
157 /// $$
158 /// which we write as
159 /// $$
160 /// x - [G ∘ F](x)=0
161 /// $$
162 /// for
163 /// $$
164 /// G(x) = \mathop{\mathrm{prox}}_{λ{\vec 1}^⊤ + δ_{≥ 0}}
165 /// \quad\text{and}\quad
166 /// F(x) = x - τ Ax + τ g^⊤
167 /// $$
168 /// We can use Newton derivative chain rule to compute
169 /// $D_N[G ∘ F](x) = D_N G(F(x)) D_N F(x)$, where
170 /// $D_N F(x) = \mathop{\mathrm{Id}} - τ A$,
171 /// and $[D_N G(F(x))]_i = 1$ for inactive coordinates and $=0$ for active coordinates.
172 /// </p>
173 ///
174 /// <p>
175 /// The method itself involves solving $D_N[Id - G ∘ F](x^k) s^k = - [Id - G ∘ F](x^k)$ and
176 /// updating $x^{k+1} = x^k + s^k$. Consequently
177 /// $$
178 /// s^k - D_N G(F(x^k)) [s^k - τ As^k] = - x^k + [G ∘ F](x^k)
179 /// $$
180 /// For $𝒜$ the set of active coordinates and $ℐ$ the set of inactive coordinates, this
181 /// expands as
182 /// $$
183 /// [τ A_{ℐ × ℐ}]s^k_ℐ = - x^k_ℐ + [G ∘ F](x^k)_ℐ - [τ A_{ℐ × 𝒜}]s^k_𝒜
184 /// $$
185 /// and
186 /// $$
187 /// s^k_𝒜 = - x^k_𝒜 + [G ∘ F](x^k)_𝒜.
188 /// $$
189 /// Thus on $𝒜$ the update $[x^k + s^k]_𝒜 = [G ∘ F](x^k)_𝒜$ is just the forward-backward update.
190 /// </p>
191 ///
192 /// <p>
193 /// We need to detect stopping by a subdifferential and return $x$ satisfying $x ≥ 0$,
194 /// which is in general not true for the SSN. We therefore use that $[G ∘ F](x^k)$ is a valid
195 /// forward-backward step.
196 /// </p>
197 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
198 pub fn quadratic_nonneg_ssn<F, I>(
199 mA : &DMatrix<F::MixedType>,
200 g : &DVector<F::MixedType>,
201 //c_ : F,
202 λ_ : F,
203 x : &mut DVector<F::MixedType>,
204 τ_ : F,
205 iterator : I
206 ) -> Result<usize, NumericalError>
207 where F : Float + ToNalgebraRealField,
208 I : AlgIteratorFactory<F>
209 {
210 let n = x.len();
211 let mut xprev = x.clone();
212 let mut v = DVector::zeros(n);
213 //let c = c_.to_nalgebra_mixed();
214 let λ = λ_.to_nalgebra_mixed();
215 let τ = τ_.to_nalgebra_mixed();
216 let τλ = τ * λ;
217 let mut inact : Vec<bool> = Vec::from_iter(std::iter::repeat(false).take(n));
218 let mut s = DVector::zeros(0);
219 let mut decomp = nalgebra::linalg::LU::new(DMatrix::zeros(0, 0));
220 let mut iters = 0;
221
222 let res = iterator.iterate_fallible(|state| {
223 // 1. Perform delayed SSN-update based on previously computed step on active
224 // coordinates. The step is delayed to the beginning of the loop because
225 // the SSN step may violate constraints, so we arrange `x` to contain at the
226 // end of the loop the valid FB step that forms part of the SSN step
227 let mut si = s.iter();
228 for (&ast, x_i, xprev_i) in izip!(inact.iter(), x.iter_mut(), xprev.iter_mut()) {
229 if ast {
230 *x_i = *xprev_i + *si.next().unwrap()
231 }
232 *xprev_i = *x_i;
233 }
234
235 //xprev.copy_from(x);
236
237 // 2. Calculate FB step.
238 // 2.1. Replace `x` with $x⁻ - τ[Ax⁻-g]= [x⁻ + τg]- τAx⁻$
239 x.axpy(τ, g, 1.0); // x = x⁻ + τ*g
240 x.sygemv(-τ, mA, &xprev, 1.0); // x = [x⁻ + τg]- τAx⁻
241 // 2.2. Calculate prox and set of active coordinates at the same time
242 let mut act_changed = false;
243 let mut n_inact = 0;
244 for (x_i, ast) in izip!(x.iter_mut(), inact.iter_mut()) {
245 if *x_i > τλ {
246 *x_i -= τλ;
247 if !*ast {
248 act_changed = true;
249 *ast = true;
250 }
251 n_inact += 1;
252 } else {
253 *x_i = 0.0;
254 if *ast {
255 act_changed = true;
256 *ast = false;
257 }
258 }
259 }
260
261 // *** x now contains forward-backward step ***
262
263 // 3. Solve SSN step `s`.
264 // 3.1 Construct [τ A_{ℐ × ℐ}] if the set of inactive coordinates has changed.
265 if act_changed {
266 let decomp_iter = inact.iter().cartesian_product(inact.iter()).zip(mA.iter());
267 let decomp_constr = decomp_iter.filter_map(|((&i_inact, &j_inact), &mAij)| {
268 //(i_inact && j_inact).then_some(mAij * τ)
269 (i_inact && j_inact).then_some(mAij) // 🔺 below matches removal of τ
270 });
271 let mat = DMatrix::from_iterator(n_inact, n_inact, decomp_constr);
272 decomp = nalgebra::linalg::LU::new(mat);
273 }
274
275 // 3.2 Solve `s` = $s_ℐ^k$ from
276 // $[τ A_{ℐ × ℐ}]s^k_ℐ = - x^k_ℐ + [G ∘ F](x^k)_ℐ - [τ A_{ℐ × 𝒜}]s^k_𝒜$.
277 // With current variable setup we have $[G ∘ F](x^k) = $`x` and $x^k = x⁻$ = `xprev`,
278 // so the system to solve is $[τ A_{ℐ × ℐ}]s^k_ℐ = (x-x⁻)_ℐ - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$
279 // The matrix $[τ A_{ℐ × ℐ}]$ we have already LU-decomposed above into `decomp`.
280 s = if n_inact > 0 {
281 // 3.2.1 Construct `rhs` = $(x-x⁻)_ℐ - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$
282 let inactfilt = inact.iter().copied();
283 let rhs_iter = izip!(x.iter(), xprev.iter(), mA.row_iter()).filter_zip(inactfilt);
284 let rhs_constr = rhs_iter.map(|(&x_i, &xprev_i, mAi)| {
285 // Calculate row i of [τ A_{ℐ × 𝒜}]s^k_𝒜 = [τ A_{ℐ × 𝒜}](x-xprev)_𝒜
286 let actfilt = inact.iter().copied().map(std::ops::Not::not);
287 let actit = izip!(x.iter(), xprev.iter(), mAi.iter()).filter_zip(actfilt);
288 let actpart = actit.map(|(&x_j, &xprev_j, &mAij)| {
289 mAij * (x_j - xprev_j)
290 }).sum();
291 // Subtract it from [x-prev]_i
292 //x_i - xprev_i - τ * actpart
293 (x_i - xprev_i) / τ - actpart // 🔺 change matches removal of τ above
294 });
295 let mut rhs = DVector::from_iterator(n_inact, rhs_constr);
296 assert_eq!(rhs.len(), n_inact);
297 // Solve the system
298 if !decomp.solve_mut(&mut rhs) {
299 return Step::Failure(NumericalError(
300 "Failed to solve linear system for subproblem SSN."
301 ))
302 }
303 rhs
304 } else {
305 DVector::zeros(0)
306 };
307
308 iters += 1;
309
310 // 4. Report solution quality
311 state.if_verbose(|| {
312 // Calculate subdifferential at the FB step `x` that hasn't yet had `s` yet added.
313 // The subdifferential of the objective is $Ax - g + λ + ∂ δ_{≥ 0}(x)$.
314 // We return the minimal ∞-norm over all subderivatives.
315 v.copy_from(g); // d = g
316 mA.gemv(&mut v, 1.0, x, -1.0); // d = Ax - g
317 let mut val = 0.0;
318 for (&v_i, &x_i) in izip!(v.iter(), x.iter()) {
319 let d = v_i + λ;
320 if x_i > 0.0 || d < 0.0 {
321 val = val.max(d.abs());
322 }
323 }
324 F::from_nalgebra_mixed(val)
325 })
326 });
327
328 res.map(|_| iters)
329 }
330
331 /// This function applies an iterative method for the solution of the quadratic non-negativity
332 /// constrained problem
333 /// <div>$$
334 /// \min_{x ∈ ℝ^n} \frac{1}{2} x^⊤Ax - g^⊤ x + λ{\vec 1}^⊤ x + c + δ_{≥ 0}(x).
335 /// $$</div>
336 /// Semismooth Newton or forward-backward are supported based on the setting in `method`.
337 /// The parameter `mA` is matrix $A$, and `g` and `λ` are as in the mathematical formulation.
338 /// The constant $c$ does not need to be provided. The step length parameter is `τ` while
339 /// `x` contains the initial iterate and on return the final one. The `iterator` controls
340 /// stopping. The “verbose” value output by all methods is the $ℓ\_∞$ distance of some
341 /// subdifferential of the objective to zero.
342 ///
343 /// Interior point methods could offer a further alternative, for example, the one in:
344 ///
345 /// * Valkonen T. - _A method for weighted projections to the positive definite
346 /// cone_, <https://doi.org/10.1080/02331934.2014.929680>.
347 ///
348 /// This function returns the number of iterations taken.
349 pub fn quadratic_nonneg<F, I>(
350 method : InnerMethod,
351 mA : &DMatrix<F::MixedType>,
352 g : &DVector<F::MixedType>,
353 //c_ : F,
354 λ : F,
355 x : &mut DVector<F::MixedType>,
356 τ : F,
357 iterator : I
358 ) -> usize
359 where F : Float + ToNalgebraRealField,
360 I : AlgIteratorFactory<F>
361 {
362
363 match method {
364 InnerMethod::FB =>
365 quadratic_nonneg_fb(mA, g, λ, x, τ, iterator),
366 InnerMethod::SSN =>
367 quadratic_nonneg_ssn(mA, g, λ, x, τ, iterator).unwrap_or_else(|e| {
368 println!("{}", format!("{e}. Using FB fallback.").red());
369 let ins = InnerSettings::<F>::default();
370 quadratic_nonneg_fb(mA, g, λ, x, τ, ins.iterator_options)
371 })
372 }
373 }

mercurial