src/subproblem/nonneg.rs

changeset 24
d29d1fcf5423
parent 0
eb3c7813b67a
equal deleted inserted replaced
23:9869fa1e0ccd 24:d29d1fcf5423
1 /*!
2 Iterative algorithms for solving the finite-dimensional subproblem with non-negativity constraints.
3 */
4
5 use nalgebra::{DVector, DMatrix};
6 use numeric_literals::replace_float_literals;
7 use itertools::{izip, Itertools};
8 use colored::Colorize;
9
10 use alg_tools::iter::Mappable;
11 use alg_tools::error::NumericalError;
12 use alg_tools::iterate::{
13 AlgIteratorFactory,
14 AlgIteratorState,
15 Step,
16 };
17 use alg_tools::linops::GEMV;
18 use alg_tools::nalgebra_support::ToNalgebraRealField;
19
20 use crate::types::*;
21 use super::{
22 InnerMethod,
23 InnerSettings
24 };
25
26 /// Compute the proximal operator of $x \mapsto x + \delta\_{[0, \infty)}$, i.e.,
27 /// the non-negativity contrained soft-thresholding operator.
28 #[inline]
29 #[replace_float_literals(F::cast_from(literal))]
30 fn nonneg_soft_thresholding<F : Float>(v : F, λ : F) -> F {
31 (v - λ).max(0.0)
32 }
33
34 /// Returns the ∞-norm minimal subdifferential of $x ↦ x^⊤Ax - g^⊤ x + λ\vec 1^⊤ x δ_{≥ 0}(x)$
35 /// at $x$.
36 ///
37 /// `v` will be modified and cannot be trusted to contain useful values afterwards.
38 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
39 fn min_subdifferential<F : Float + ToNalgebraRealField>(
40 v : &mut DVector<F::MixedType>,
41 mA : &DMatrix<F::MixedType>,
42 x : &DVector<F::MixedType>,
43 g : &DVector<F::MixedType>,
44 λ : F::MixedType
45 ) -> F {
46 v.copy_from(g);
47 mA.gemv(v, 1.0, x, -1.0); // v = Ax - g
48 let mut val = 0.0;
49 for (&v_i, &x_i) in izip!(v.iter(), x.iter()) {
50 // The subdifferential of the objective is $Ax - g + λ + ∂ δ_{≥ 0}(x)$.
51 let d = v_i + λ;
52 if x_i > 0.0 || d < 0.0 {
53 val = val.max(d.abs());
54 }
55 }
56 F::from_nalgebra_mixed(val)
57 }
58
59 /// Forward-backward splitting implementation of [`quadratic_nonneg`].
60 /// For detailed documentation of the inputs and outputs, refer to there.
61 ///
62 /// The `λ` component of the model is handled in the proximal step instead of the gradient step
63 /// for potential performance improvements.
64 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
65 pub fn quadratic_nonneg_fb<F, I>(
66 mA : &DMatrix<F::MixedType>,
67 g : &DVector<F::MixedType>,
68 //c_ : F,
69 λ_ : F,
70 x : &mut DVector<F::MixedType>,
71 τ_ : F,
72 iterator : I
73 ) -> usize
74 where F : Float + ToNalgebraRealField,
75 I : AlgIteratorFactory<F>
76 {
77 let mut xprev = x.clone();
78 //let c = c_.to_nalgebra_mixed();
79 let λ = λ_.to_nalgebra_mixed();
80 let τ = τ_.to_nalgebra_mixed();
81 let τλ = τ * λ;
82 let mut v = DVector::zeros(x.len());
83 let mut iters = 0;
84
85 iterator.iterate(|state| {
86 // Replace `x` with $x - τ[Ax-g]= [x + τg]- τAx$
87 v.copy_from(g); // v = g
88 v.axpy(1.0, x, τ); // v = x + τ*g
89 v.sygemv(-τ, mA, x, 1.0); // v = [x + τg]- τAx
90 let backup = state.if_verbose(|| {
91 xprev.copy_from(x)
92 });
93 // Calculate the proximal map
94 x.iter_mut().zip(v.iter()).for_each(|(x_i, &v_i)| {
95 *x_i = nonneg_soft_thresholding(v_i, τλ);
96 });
97
98 iters +=1;
99
100 backup.map(|_| {
101 min_subdifferential(&mut v, mA, x, g, λ)
102 })
103 });
104
105 iters
106 }
107
108 /// Semismooth Newton implementation of [`quadratic_nonneg`].
109 ///
110 /// For detailed documentation of the inputs, refer to there.
111 /// This function returns the number of iterations taken if there was no inversion failure,
112 ///
113 /// ## Method derivation
114 ///
115 /// **The below may look like garbage. Sorry, but rustdoc is obsolete rubbish
116 /// that doesn't directly support by-now standard-in-markdown LaTeX math. Instead it
117 /// forces one into unreliable KaTeX autorender postprocessing andescape hell and that
118 /// it doesn't even process correctly.**
119 ///
120 /// <p>
121 /// For the objective
122 /// $$
123 /// J(x) = \frac{1}{2} x^⊤Ax - g^⊤ x + λ{\vec 1}^⊤ x + c + δ_{≥ 0}(x),
124 /// $$
125 /// we have the optimality condition
126 /// $$
127 /// x - \mathop{\mathrm{prox}}_{τλ{\vec 1}^⊤ + δ_{≥ 0}}(x - τ[Ax-g^⊤]) = 0,
128 /// $$
129 /// which we write as
130 /// $$
131 /// x - [G ∘ F](x)=0
132 /// $$
133 /// for
134 /// $$
135 /// G(x) = \mathop{\mathrm{prox}}_{λ{\vec 1}^⊤ + δ_{≥ 0}}
136 /// \quad\text{and}\quad
137 /// F(x) = x - τ Ax + τ g^⊤
138 /// $$
139 /// We can use Newton derivative chain rule to compute
140 /// $D_N[G ∘ F](x) = D_N G(F(x)) D_N F(x)$, where
141 /// $D_N F(x) = \mathop{\mathrm{Id}} - τ A$,
142 /// and $[D_N G(F(x))]_i = 1$ for inactive coordinates and $=0$ for active coordinates.
143 /// </p>
144 ///
145 /// <p>
146 /// The method itself involves solving $D_N[Id - G ∘ F](x^k) s^k = - [Id - G ∘ F](x^k)$ and
147 /// updating $x^{k+1} = x^k + s^k$. Consequently
148 /// $$
149 /// s^k - D_N G(F(x^k)) [s^k - τ As^k] = - x^k + [G ∘ F](x^k)
150 /// $$
151 /// For $𝒜$ the set of active coordinates and $ℐ$ the set of inactive coordinates, this
152 /// expands as
153 /// $$
154 /// [τ A_{ℐ × ℐ}]s^k_ℐ = - x^k_ℐ + [G ∘ F](x^k)_ℐ - [τ A_{ℐ × 𝒜}]s^k_𝒜
155 /// $$
156 /// and
157 /// $$
158 /// s^k_𝒜 = - x^k_𝒜 + [G ∘ F](x^k)_𝒜.
159 /// $$
160 /// Thus on $𝒜$ the update $[x^k + s^k]_𝒜 = [G ∘ F](x^k)_𝒜$ is just the forward-backward update.
161 /// </p>
162 ///
163 /// <p>
164 /// We need to detect stopping by a subdifferential and return $x$ satisfying $x ≥ 0$,
165 /// which is in general not true for the SSN. We therefore use that $[G ∘ F](x^k)$ is a valid
166 /// forward-backward step.
167 /// </p>
168 #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())]
169 pub fn quadratic_nonneg_ssn<F, I>(
170 mA : &DMatrix<F::MixedType>,
171 g : &DVector<F::MixedType>,
172 //c_ : F,
173 λ_ : F,
174 x : &mut DVector<F::MixedType>,
175 τ_ : F,
176 iterator : I
177 ) -> Result<usize, NumericalError>
178 where F : Float + ToNalgebraRealField,
179 I : AlgIteratorFactory<F>
180 {
181 let n = x.len();
182 let mut xprev = x.clone();
183 let mut v = DVector::zeros(n);
184 //let c = c_.to_nalgebra_mixed();
185 let λ = λ_.to_nalgebra_mixed();
186 let τ = τ_.to_nalgebra_mixed();
187 let τλ = τ * λ;
188 let mut inact : Vec<bool> = Vec::from_iter(std::iter::repeat(false).take(n));
189 let mut s = DVector::zeros(0);
190 let mut decomp = nalgebra::linalg::LU::new(DMatrix::zeros(0, 0));
191 let mut iters = 0;
192
193 let res = iterator.iterate_fallible(|state| {
194 // 1. Perform delayed SSN-update based on previously computed step on active
195 // coordinates. The step is delayed to the beginning of the loop because
196 // the SSN step may violate constraints, so we arrange `x` to contain at the
197 // end of the loop the valid FB step that forms part of the SSN step
198 let mut si = s.iter();
199 for (&ast, x_i, xprev_i) in izip!(inact.iter(), x.iter_mut(), xprev.iter_mut()) {
200 if ast {
201 *x_i = *xprev_i + *si.next().unwrap()
202 }
203 *xprev_i = *x_i;
204 }
205
206 //xprev.copy_from(x);
207
208 // 2. Calculate FB step.
209 // 2.1. Replace `x` with $x⁻ - τ[Ax⁻-g]= [x⁻ + τg]- τAx⁻$
210 x.axpy(τ, g, 1.0); // x = x⁻ + τ*g
211 x.sygemv(-τ, mA, &xprev, 1.0); // x = [x⁻ + τg]- τAx⁻
212 // 2.2. Calculate prox and set of active coordinates at the same time
213 let mut act_changed = false;
214 let mut n_inact = 0;
215 for (x_i, ast) in izip!(x.iter_mut(), inact.iter_mut()) {
216 if *x_i > τλ {
217 *x_i -= τλ;
218 if !*ast {
219 act_changed = true;
220 *ast = true;
221 }
222 n_inact += 1;
223 } else {
224 *x_i = 0.0;
225 if *ast {
226 act_changed = true;
227 *ast = false;
228 }
229 }
230 }
231
232 // *** x now contains forward-backward step ***
233
234 // 3. Solve SSN step `s`.
235 // 3.1 Construct [τ A_{ℐ × ℐ}] if the set of inactive coordinates has changed.
236 if act_changed {
237 let decomp_iter = inact.iter().cartesian_product(inact.iter()).zip(mA.iter());
238 let decomp_constr = decomp_iter.filter_map(|((&i_inact, &j_inact), &mAij)| {
239 //(i_inact && j_inact).then_some(mAij * τ)
240 (i_inact && j_inact).then_some(mAij) // 🔺 below matches removal of τ
241 });
242 let mat = DMatrix::from_iterator(n_inact, n_inact, decomp_constr);
243 decomp = nalgebra::linalg::LU::new(mat);
244 }
245
246 // 3.2 Solve `s` = $s_ℐ^k$ from
247 // $[τ A_{ℐ × ℐ}]s^k_ℐ = - x^k_ℐ + [G ∘ F](x^k)_ℐ - [τ A_{ℐ × 𝒜}]s^k_𝒜$.
248 // With current variable setup we have $[G ∘ F](x^k) = $`x` and $x^k = x⁻$ = `xprev`,
249 // so the system to solve is $[τ A_{ℐ × ℐ}]s^k_ℐ = (x-x⁻)_ℐ - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$
250 // The matrix $[τ A_{ℐ × ℐ}]$ we have already LU-decomposed above into `decomp`.
251 s = if n_inact > 0 {
252 // 3.2.1 Construct `rhs` = $(x-x⁻)_ℐ - [τ A_{ℐ × 𝒜}](x-x⁻)_𝒜$
253 let inactfilt = inact.iter().copied();
254 let rhs_iter = izip!(x.iter(), xprev.iter(), mA.row_iter()).filter_zip(inactfilt);
255 let rhs_constr = rhs_iter.map(|(&x_i, &xprev_i, mAi)| {
256 // Calculate row i of [τ A_{ℐ × 𝒜}]s^k_𝒜 = [τ A_{ℐ × 𝒜}](x-xprev)_𝒜
257 let actfilt = inact.iter().copied().map(std::ops::Not::not);
258 let actit = izip!(x.iter(), xprev.iter(), mAi.iter()).filter_zip(actfilt);
259 let actpart = actit.map(|(&x_j, &xprev_j, &mAij)| {
260 mAij * (x_j - xprev_j)
261 }).sum();
262 // Subtract it from [x-prev]_i
263 //x_i - xprev_i - τ * actpart
264 (x_i - xprev_i) / τ - actpart // 🔺 change matches removal of τ above
265 });
266 let mut rhs = DVector::from_iterator(n_inact, rhs_constr);
267 assert_eq!(rhs.len(), n_inact);
268 // Solve the system
269 if !decomp.solve_mut(&mut rhs) {
270 return Step::Failure(NumericalError(
271 "Failed to solve linear system for subproblem SSN."
272 ))
273 }
274 rhs
275 } else {
276 DVector::zeros(0)
277 };
278
279 iters += 1;
280
281 // 4. Report solution quality
282 state.if_verbose(|| {
283 // Calculate subdifferential at the FB step `x` that hasn't yet had `s` yet added.
284 min_subdifferential(&mut v, mA, x, g, λ)
285 })
286 });
287
288 res.map(|_| iters)
289 }
290
291 /// This function applies an iterative method for the solution of the quadratic non-negativity
292 /// constrained problem
293 /// <div>$$
294 /// \min_{x ∈ ℝ^n} \frac{1}{2} x^⊤Ax - g^⊤ x + λ{\vec 1}^⊤ x + c + δ_{≥ 0}(x).
295 /// $$</div>
296 /// Semismooth Newton or forward-backward are supported based on the setting in `method`.
297 /// The parameter `mA` is matrix $A$, and `g` and `λ` are as in the mathematical formulation.
298 /// The constant $c$ does not need to be provided. The step length parameter is `τ` while
299 /// `x` contains the initial iterate and on return the final one. The `iterator` controls
300 /// stopping. The “verbose” value output by all methods is the $ℓ\_∞$ distance of some
301 /// subdifferential of the objective to zero.
302 ///
303 /// Interior point methods could offer a further alternative, for example, the one in:
304 ///
305 /// * Valkonen T. - _A method for weighted projections to the positive definite
306 /// cone_, <https://doi.org/10.1080/02331934.2014.929680>.
307 ///
308 /// This function returns the number of iterations taken.
309 pub fn quadratic_nonneg<F, I>(
310 method : InnerMethod,
311 mA : &DMatrix<F::MixedType>,
312 g : &DVector<F::MixedType>,
313 //c_ : F,
314 λ : F,
315 x : &mut DVector<F::MixedType>,
316 τ : F,
317 iterator : I
318 ) -> usize
319 where F : Float + ToNalgebraRealField,
320 I : AlgIteratorFactory<F>
321 {
322
323 match method {
324 InnerMethod::FB =>
325 quadratic_nonneg_fb(mA, g, λ, x, τ, iterator),
326 InnerMethod::SSN =>
327 quadratic_nonneg_ssn(mA, g, λ, x, τ, iterator).unwrap_or_else(|e| {
328 println!("{}", format!("{e}. Using FB fallback.").red());
329 let ins = InnerSettings::<F>::default();
330 quadratic_nonneg_fb(mA, g, λ, x, τ, ins.iterator_options)
331 })
332 }
333 }

mercurial