Thu, 23 Jan 2025 23:34:05 +0100
Merging adjustments, parameter tuning, etc.
34 | 1 | /*! |
2 | Iterative algorithms for solving the finite-dimensional subproblem without constraints. | |
3 | */ | |
4 | ||
5 | use nalgebra::DVector; | |
6 | use numeric_literals::replace_float_literals; | |
7 | use itertools::izip; | |
8 | use std::cmp::Ordering::*; | |
9 | ||
10 | use std::iter::zip; | |
11 | use alg_tools::iterate::{ | |
12 | AlgIteratorFactory, | |
13 | AlgIteratorState, | |
14 | }; | |
15 | use alg_tools::nalgebra_support::ToNalgebraRealField; | |
16 | use alg_tools::nanleast::NaNLeast; | |
17 | use alg_tools::norms::{Dist, L1}; | |
18 | ||
19 | use crate::types::*; | |
20 | use super::{ | |
21 | InnerMethod, | |
22 | InnerSettings | |
23 | }; | |
24 | use super::unconstrained::soft_thresholding; | |
25 | use super::l1squared_nonneg::max_interval_dist_to_zero; | |
26 | ||
27 | /// Calculate $prox_f(x)$ for $f(x)=\frac{β}{2}\norm{x-y}_1^2$. | |
28 | /// | |
29 | /// To derive an algorithm for this, we can assume that $y=0$, as | |
30 | /// $prox_f(x) = prox_{f_0}(x - y) - y$ for $f_0=\frac{β}{2}\norm{x}_1^2$. | |
31 | /// Now, the optimality conditions for $w = prox_f(x)$ are | |
32 | /// $$\tag{*} | |
33 | /// 0 ∈ w-x + β\norm{w}_1\sign w. | |
34 | /// $$ | |
35 | /// Clearly then $w = \soft_{β\norm{w}_1}(x)$. | |
36 | /// Thus the components of $x$ with smallest absolute value will be zeroed out. | |
37 | /// Denoting by $w'$ the non-zero components, and by $x'$ the corresponding components | |
38 | /// of $x$, and by $m$ their count, multipying the corresponding lines of (*) by $\sign x'$, | |
39 | /// we obtain | |
40 | /// $$ | |
41 | /// \norm{x'}_1 = (1+βm)\norm{w'}_1. | |
42 | /// $$ | |
43 | /// That is, $\norm{w}_1=\norm{w'}_1=\norm{x'}_1/(1+βm)$. | |
44 | /// Thus, sorting $x$ by absolute value, and sequentially in order eliminating the smallest | |
45 | /// elements, we can easily calculate what $\norm{w}_1$ should be for that choice, and | |
46 | /// then easily calculate $w = \soft_{β\norm{w}_1}(x)$. We just have to verify that | |
47 | /// the resulting $w$ has the same norm. There's a shortcut to this, as we work | |
48 | /// sequentially: just check that the smallest assumed-nonzero component $i$ satisfies the | |
49 | /// condition of soft-thresholding to remain non-zero: $|x_i|>τ\norm{x'}/(1+τm)$. | |
50 | /// Clearly, if this condition fails for x_i, it will fail for all the components | |
51 | /// already exluced. While, if it holds, it will hold for all components not excluded. | |
52 | #[replace_float_literals(F::cast_from(literal))] | |
53 | pub(super) fn l1squared_prox<F :Float + nalgebra::RealField>( | |
54 | sorted_abs : &mut DVector<F>, | |
55 | x : &mut DVector<F>, | |
56 | y : &DVector<F>, | |
57 | β : F | |
58 | ) { | |
59 | sorted_abs.copy_from(x); | |
60 | sorted_abs.axpy(-1.0, y, 1.0); | |
61 | sorted_abs.apply(|z_i| *z_i = num_traits::abs(*z_i)); | |
62 | sorted_abs.as_mut_slice().sort_unstable_by(|a, b| NaNLeast(*a).cmp(&NaNLeast(*b))); | |
63 | ||
64 | let mut n = sorted_abs.sum(); | |
65 | for (m, az_i) in zip((1..=x.len() as u32).rev(), sorted_abs) { | |
66 | // test first | |
67 | let tmp = β*n/(1.0 + β*F::cast_from(m)); | |
68 | if *az_i <= tmp { | |
69 | // Fail | |
70 | n -= *az_i; | |
71 | } else { | |
72 | // Success | |
73 | x.zip_apply(y, |x_i, y_i| *x_i = y_i + soft_thresholding(*x_i-y_i, tmp)); | |
74 | return | |
75 | } | |
76 | } | |
77 | // m = 0 should always work, but x is zero. | |
78 | x.fill(0.0); | |
79 | } | |
80 | ||
81 | /// Returns the ∞-norm minimal subdifferential of $x ↦ (β/2)|x-y|_1^2 - g^⊤ x + λ\|x\|₁$ at $x$. | |
82 | /// | |
83 | /// `v` will be modified and cannot be trusted to contain useful values afterwards. | |
84 | #[replace_float_literals(F::cast_from(literal))] | |
85 | fn min_subdifferential<F : Float + nalgebra::RealField>( | |
86 | y : &DVector<F>, | |
87 | x : &DVector<F>, | |
88 | g : &DVector<F>, | |
89 | λ : F, | |
90 | β : F | |
91 | ) -> F { | |
92 | let mut val = 0.0; | |
93 | let tmp = β*y.dist(x, L1); | |
94 | for (&g_i, &x_i, y_i) in izip!(g.iter(), x.iter(), y.iter()) { | |
95 | let (mut lb, mut ub) = (-g_i, -g_i); | |
96 | match x_i.partial_cmp(y_i) { | |
97 | Some(Greater) => { lb += tmp; ub += tmp }, | |
98 | Some(Less) => { lb -= tmp; ub -= tmp }, | |
99 | Some(Equal) => { lb -= tmp; ub += tmp }, | |
100 | None => {}, | |
101 | } | |
102 | match x_i.partial_cmp(&0.0) { | |
103 | Some(Greater) => { lb += λ; ub += λ }, | |
104 | Some(Less) => { lb -= λ; ub -= λ }, | |
105 | Some(Equal) => { lb -= λ; ub += λ }, | |
106 | None => {}, | |
107 | }; | |
108 | val = max_interval_dist_to_zero(val, lb, ub); | |
109 | } | |
110 | val | |
111 | } | |
112 | ||
113 | ||
114 | /// PDPS implementation of [`l1squared_unconstrained`]. | |
115 | /// For detailed documentation of the inputs and outputs, refer to there. | |
116 | /// | |
117 | /// The `λ` component of the model is handled in the proximal step instead of the gradient step | |
118 | /// for potential performance improvements. | |
119 | #[replace_float_literals(F::cast_from(literal).to_nalgebra_mixed())] | |
120 | pub fn l1squared_unconstrained_pdps<F, I>( | |
121 | y : &DVector<F::MixedType>, | |
122 | g : &DVector<F::MixedType>, | |
123 | λ_ : F, | |
124 | β_ : F, | |
125 | x : &mut DVector<F::MixedType>, | |
126 | τ_ : F, | |
127 | σ_ : F, | |
128 | iterator : I | |
129 | ) -> usize | |
130 | where F : Float + ToNalgebraRealField, | |
131 | I : AlgIteratorFactory<F> | |
132 | { | |
133 | let λ = λ_.to_nalgebra_mixed(); | |
134 | let β = β_.to_nalgebra_mixed(); | |
135 | let τ = τ_.to_nalgebra_mixed(); | |
136 | let σ = σ_.to_nalgebra_mixed(); | |
137 | let mut w = DVector::zeros(x.len()); | |
138 | let mut tmp = DVector::zeros(x.len()); | |
139 | let mut xprev = x.clone(); | |
140 | let mut iters = 0; | |
141 | ||
142 | iterator.iterate(|state| { | |
143 | // Primal step: x^{k+1} = prox_{τ|.-y|_1^2}(x^k - τ (w^k - g)) | |
144 | x.axpy(-τ, &w, 1.0); | |
145 | x.axpy(τ, g, 1.0); | |
146 | l1squared_prox(&mut tmp, x, y, τ*β); | |
147 | ||
148 | // Dual step: w^{k+1} = proj_{[-λ,λ]}(w^k + σ(2x^{k+1}-x^k)) | |
149 | w.axpy(2.0*σ, x, 1.0); | |
150 | w.axpy(-σ, &xprev, 1.0); | |
151 | w.apply(|w_i| *w_i = num_traits::clamp(*w_i, -λ, λ)); | |
152 | xprev.copy_from(x); | |
153 | ||
154 | iters +=1; | |
155 | ||
156 | state.if_verbose(|| { | |
157 | F::from_nalgebra_mixed(min_subdifferential(y, x, g, λ, β)) | |
158 | }) | |
159 | }); | |
160 | ||
161 | iters | |
162 | } | |
163 | ||
164 | ||
165 | /// This function applies an iterative method for the solution of the problem | |
166 | /// <div>$$ | |
167 | /// \min_{x ∈ ℝ^n} \frac{β}{2} |x-y|_1^2 - g^⊤ x + λ\|x\|₁. | |
168 | /// $$</div> | |
169 | /// Only PDPS is supported. | |
170 | /// | |
171 | /// This function returns the number of iterations taken. | |
172 | #[replace_float_literals(F::cast_from(literal))] | |
173 | pub fn l1squared_unconstrained<F, I>( | |
174 | y : &DVector<F::MixedType>, | |
175 | g : &DVector<F::MixedType>, | |
176 | λ : F, | |
177 | β : F, | |
178 | x : &mut DVector<F::MixedType>, | |
179 | inner : &InnerSettings<F>, | |
180 | iterator : I | |
181 | ) -> usize | |
182 | where F : Float + ToNalgebraRealField, | |
183 | I : AlgIteratorFactory<F> | |
184 | { | |
185 | // Estimate of ‖K‖ for K=Id. | |
186 | let normest = 1.0; | |
187 | ||
188 | let (inner_τ, inner_σ) = (inner.pdps_τσ0.0 / normest, inner.pdps_τσ0.1 / normest); | |
189 | ||
190 | match inner.method { | |
39
6316d68b58af
Merging adjustments, parameter tuning, etc.
Tuomo Valkonen <tuomov@iki.fi>
parents:
34
diff
changeset
|
191 | InnerMethod::PDPS => |
34 | 192 | l1squared_unconstrained_pdps(y, g, λ, β, x, inner_τ, inner_σ, iterator), |
39
6316d68b58af
Merging adjustments, parameter tuning, etc.
Tuomo Valkonen <tuomov@iki.fi>
parents:
34
diff
changeset
|
193 | other => unimplemented!("${other:?} is unimplemented"), |
34 | 194 | } |
195 | } |