src/kernels/gaussian.rs

changeset 52
f0e8704d3f0e
parent 35
b087e3eab191
child 38
0f59c0d02e13
equal deleted inserted replaced
31:6105b5cd8d89 52:f0e8704d3f0e
15 LocalAnalysis, 15 LocalAnalysis,
16 GlobalAnalysis, 16 GlobalAnalysis,
17 Weighted, 17 Weighted,
18 Bounded, 18 Bounded,
19 }; 19 };
20 use alg_tools::mapping::Apply; 20 use alg_tools::mapping::{
21 Mapping,
22 Instance,
23 Differential,
24 DifferentiableImpl,
25 };
21 use alg_tools::maputil::array_init; 26 use alg_tools::maputil::array_init;
22 27
28 use crate::types::*;
23 use crate::fourier::Fourier; 29 use crate::fourier::Fourier;
24 use super::base::*; 30 use super::base::*;
25 use super::ball_indicator::CubeIndicator; 31 use super::ball_indicator::CubeIndicator;
26 32
27 /// Storage presentation of the the anisotropic gaussian kernel of `variance` $σ^2$. 33 /// Storage presentation of the the anisotropic gaussian kernel of `variance` $σ^2$.
56 } 62 }
57 } 63 }
58 64
59 65
60 #[replace_float_literals(S::Type::cast_from(literal))] 66 #[replace_float_literals(S::Type::cast_from(literal))]
61 impl<'a, S, const N : usize> Apply<&'a Loc<S::Type, N>> for Gaussian<S, N> 67 impl<'a, S, const N : usize> Mapping<Loc<S::Type, N>> for Gaussian<S, N>
62 where S : Constant { 68 where
63 type Output = S::Type; 69 S : Constant
70 {
71 type Codomain = S::Type;
72
64 // This is not normalised to neither to have value 1 at zero or integral 1 73 // This is not normalised to neither to have value 1 at zero or integral 1
65 // (unless the cut-off ε=0). 74 // (unless the cut-off ε=0).
66 #[inline] 75 #[inline]
67 fn apply(&self, x : &'a Loc<S::Type, N>) -> Self::Output { 76 fn apply<I : Instance<Loc<S::Type, N>>>(&self, x : I) -> Self::Codomain {
68 let d_squared = x.norm2_squared(); 77 let d_squared = x.eval(|x| x.norm2_squared());
69 let σ2 = self.variance.value(); 78 let σ2 = self.variance.value();
70 let scale = self.scale(); 79 let scale = self.scale();
71 (-d_squared / (2.0 * σ2)).exp() / scale 80 (-d_squared / (2.0 * σ2)).exp() / scale
72 } 81 }
73 } 82 }
74 83
75 impl<S, const N : usize> Apply<Loc<S::Type, N>> for Gaussian<S, N> 84 #[replace_float_literals(S::Type::cast_from(literal))]
76 where S : Constant { 85 impl<'a, S, const N : usize> DifferentiableImpl<Loc<S::Type, N>> for Gaussian<S, N>
77 type Output = S::Type; 86 where S : Constant {
78 // This is not normalised to neither to have value 1 at zero or integral 1 87 type Derivative = Loc<S::Type, N>;
79 // (unless the cut-off ε=0). 88
80 #[inline] 89 #[inline]
81 fn apply(&self, x : Loc<S::Type, N>) -> Self::Output { 90 fn differential_impl<I : Instance<Loc<S::Type, N>>>(&self, x0 : I) -> Self::Derivative {
82 self.apply(&x) 91 let x = x0.cow();
92 let f = -self.apply(&*x) / self.variance.value();
93 *x * f
94 }
95 }
96
97
98 // To calculate the the Lipschitz factors, we consider
99 // f(t) = e^{-t²/2}
100 // f'(t) = -t f(t) which has max at t=1 by f''(t)=0
101 // f''(t) = (t²-1)f(t) which has max at t=√3 by f'''(t)=0
102 // f'''(t) = -(t³-3t)
103 // So f has the Lipschitz factor L=f'(1), and f' has the Lipschitz factor L'=f''(√3).
104 //
105 // Now g(x) = Cf(‖x‖/σ) for a scaling factor C is the Gaussian.
106 // Thus ‖g(x)-g(y)‖ = C‖f(‖x‖/σ)-f(‖y‖/σ)‖ ≤ (C/σ)L‖x-y‖,
107 // so g has the Lipschitz factor (C/σ)f'(1) = (C/σ)exp(-0.5).
108 //
109 // Also ∇g(x)= Cx/(σ‖x‖)f'(‖x‖/σ) (*)
110 // = -(C/σ²)xf(‖x‖/σ)
111 // = -C/σ (x/σ) f(‖x/σ‖)
112 // ∇²g(x) = -(C/σ)[Id/σ f(‖x‖/σ) + x ⊗ x/(σ²‖x‖) f'(‖x‖/σ)]
113 // = (C/σ²)[-Id + x ⊗ x/σ²]f(‖x‖/σ).
114 // Thus ‖∇²g(x)‖ = (C/σ²)‖-Id + x ⊗ x/σ²‖f(‖x‖/σ), where
115 // ‖-Id + x ⊗ x/σ²‖ = ‖[-Id + x ⊗ x/σ²](x/‖x‖)‖ = |-1 + ‖x²/σ^2‖|.
116 // This means that ‖∇²g(x)‖ = (C/σ²)|f''(‖x‖/σ)|, which is maximised with ‖x‖/σ=√3.
117 // Hence the Lipschitz factor of ∇g is (C/σ²)f''(√3) = (C/σ²)2e^{-3/2}.
118
119 #[replace_float_literals(S::Type::cast_from(literal))]
120 impl<S, const N : usize> Lipschitz<L2> for Gaussian<S, N>
121 where S : Constant {
122 type FloatType = S::Type;
123 fn lipschitz_factor(&self, L2 : L2) -> Option<Self::FloatType> {
124 Some((-0.5).exp() / (self.scale() * self.variance.value().sqrt()))
125 }
126 }
127
128
129 #[replace_float_literals(S::Type::cast_from(literal))]
130 impl<'a, S : Constant, const N : usize> Lipschitz<L2>
131 for Differential<'a, Loc<S::Type, N>, Gaussian<S, N>> {
132 type FloatType = S::Type;
133
134 fn lipschitz_factor(&self, _l2 : L2) -> Option<S::Type> {
135 let g = self.base_fn();
136 let σ2 = g.variance.value();
137 let scale = g.scale();
138 Some(2.0*(-3.0/2.0).exp()/(σ2*scale))
139 }
140 }
141
142 // From above, norm bounds on the differnential can be calculated as achieved
143 // for f' at t=1, i.e., the bound is |f'(1)|.
144 // For g then |C/σ f'(1)|.
145 // It follows that the norm bounds on the differential are just the Lipschitz
146 // factors of the undifferentiated function, given how the latter is calculed above.
147
148 #[replace_float_literals(S::Type::cast_from(literal))]
149 impl<'b, S : Constant, const N : usize> NormBounded<L2>
150 for Differential<'b, Loc<S::Type, N>, Gaussian<S, N>> {
151 type FloatType = S::Type;
152
153 fn norm_bound(&self, _l2 : L2) -> S::Type {
154 self.base_fn().lipschitz_factor(L2).unwrap()
155 }
156 }
157
158 #[replace_float_literals(S::Type::cast_from(literal))]
159 impl<'b, 'a, S : Constant, const N : usize> NormBounded<L2>
160 for Differential<'b, Loc<S::Type, N>, &'a Gaussian<S, N>> {
161 type FloatType = S::Type;
162
163 fn norm_bound(&self, _l2 : L2) -> S::Type {
164 self.base_fn().lipschitz_factor(L2).unwrap()
83 } 165 }
84 } 166 }
85 167
86 168
87 #[replace_float_literals(S::Type::cast_from(literal))] 169 #[replace_float_literals(S::Type::cast_from(literal))]
167 /// where $a>0$ and $f$ is a gaussian kernel on $ℝ^n$. 249 /// where $a>0$ and $f$ is a gaussian kernel on $ℝ^n$.
168 pub type BasicCutGaussian<C, S, const N : usize> = SupportProductFirst<CubeIndicator<C, N>, 250 pub type BasicCutGaussian<C, S, const N : usize> = SupportProductFirst<CubeIndicator<C, N>,
169 Gaussian<S, N>>; 251 Gaussian<S, N>>;
170 252
171 253
172 /// This implements $χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ 254 /// This implements $g := χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ where $a,b>0$ and $f$ is
173 /// where $a,b>0$ and $f$ is a gaussian kernel on $ℝ^n$. 255 /// a gaussian kernel on $ℝ^n$. For an expression for $g$, see Lemma 3.9 in the manuscript.
174 #[replace_float_literals(F::cast_from(literal))] 256 #[replace_float_literals(F::cast_from(literal))]
175 impl<'a, F : Float, R, C, S, const N : usize> Apply<&'a Loc<F, N>> 257 impl<'a, F : Float, R, C, S, const N : usize> Mapping<Loc<F, N>>
176 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> 258 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>>
177 where R : Constant<Type=F>, 259 where R : Constant<Type=F>,
178 C : Constant<Type=F>, 260 C : Constant<Type=F>,
179 S : Constant<Type=F> { 261 S : Constant<Type=F> {
180 262
181 type Output = F; 263 type Codomain = F;
182 264
183 #[inline] 265 #[inline]
184 fn apply(&self, y : &'a Loc<F, N>) -> F { 266 fn apply<I : Instance<Loc<F, N>>>(&self, y : I) -> F {
185 let Convolution(ref ind, 267 let Convolution(ref ind,
186 SupportProductFirst(ref cut, 268 SupportProductFirst(ref cut,
187 ref gaussian)) = self; 269 ref gaussian)) = self;
188 let a = cut.r.value(); 270 let a = cut.r.value();
189 let b = ind.r.value(); 271 let b = ind.r.value();
190 let σ = gaussian.variance.value().sqrt(); 272 let σ = gaussian.variance.value().sqrt();
191 let t = F::SQRT_2 * σ; 273 let t = F::SQRT_2 * σ;
192 let c = 0.5; // 1/(σ√(2π) * σ√(π/2) = 1/2 274 let c = 0.5; // 1/(σ√(2π) * σ√(π/2) = 1/2
193 275
194 // This is just a product of one-dimensional versions 276 // This is just a product of one-dimensional versions
195 y.product_map(|x| { 277 y.cow().product_map(|x| {
196 let c1 = -(a.min(b + x)); //(-a).max(-x-b); 278 let c1 = -(a.min(b + x)); //(-a).max(-x-b);
197 let c2 = a.min(b - x); 279 let c2 = a.min(b - x);
198 if c1 >= c2 { 280 if c1 >= c2 {
199 0.0 281 0.0
200 } else { 282 } else {
205 } 287 }
206 }) 288 })
207 } 289 }
208 } 290 }
209 291
210 impl<F : Float, R, C, S, const N : usize> Apply<Loc<F, N>> 292 /// This implements the differential of $g := χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ where $a,b>0$
211 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> 293 /// and $f$ is a gaussian kernel on $ℝ^n$. For an expression for the value of $g$, from which the
212 where R : Constant<Type=F>, 294 /// derivative readily arises (at points of differentiability), see Lemma 3.9 in the manuscript.
213 C : Constant<Type=F>, 295 #[replace_float_literals(F::cast_from(literal))]
214 S : Constant<Type=F> { 296 impl<'a, F : Float, R, C, S, const N : usize> DifferentiableImpl<Loc<F, N>>
215 297 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>>
216 type Output = F; 298 where R : Constant<Type=F>,
217 299 C : Constant<Type=F>,
218 #[inline] 300 S : Constant<Type=F> {
219 fn apply(&self, y : Loc<F, N>) -> F { 301
220 self.apply(&y) 302 type Derivative = Loc<F, N>;
221 } 303
222 } 304 /// Although implemented, this function is not differentiable.
305 #[inline]
306 fn differential_impl<I : Instance<Loc<F, N>>>(&self, y0 : I) -> Loc<F, N> {
307 let Convolution(ref ind,
308 SupportProductFirst(ref cut,
309 ref gaussian)) = self;
310 let y = y0.cow();
311 let a = cut.r.value();
312 let b = ind.r.value();
313 let σ = gaussian.variance.value().sqrt();
314 let t = F::SQRT_2 * σ;
315 let c = 0.5; // 1/(σ√(2π) * σ√(π/2) = 1/2
316 let c_mul_erf_scale_div_t = c * F::FRAC_2_SQRT_PI / t;
317
318 // Calculate the values for all component functions of the
319 // product. This is just the loop from apply above.
320 let unscaled_vs = y.map(|x| {
321 let c1 = -(a.min(b + x)); //(-a).max(-x-b);
322 let c2 = a.min(b - x);
323 if c1 >= c2 {
324 0.0
325 } else {
326 let e1 = F::cast_from(erf((c1 / t).as_()));
327 let e2 = F::cast_from(erf((c2 / t).as_()));
328 debug_assert!(e2 >= e1);
329 c * (e2 - e1)
330 }
331 });
332 // This computes the gradient for each coordinate
333 product_differential(&*y, &unscaled_vs, |x| {
334 let c1 = -(a.min(b + x)); //(-a).max(-x-b);
335 let c2 = a.min(b - x);
336 if c1 >= c2 {
337 0.0
338 } else {
339 // erf'(z) = (2/√π)*exp(-z^2), and we get extra factor 1/(√2*σ) = -1/t
340 // from the chain rule (the minus comes from inside c_1 or c_2, and changes the
341 // order of de2 and de1 in the final calculation).
342 let de1 = if b + x < a {
343 (-((b+x)/t).powi(2)).exp()
344 } else {
345 0.0
346 };
347 let de2 = if b - x < a {
348 (-((b-x)/t).powi(2)).exp()
349 } else {
350 0.0
351 };
352 c_mul_erf_scale_div_t * (de1 - de2)
353 }
354 })
355 }
356 }
357
358
359 #[replace_float_literals(F::cast_from(literal))]
360 impl<'a, F : Float, R, C, S, const N : usize> Lipschitz<L1>
361 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>>
362 where R : Constant<Type=F>,
363 C : Constant<Type=F>,
364 S : Constant<Type=F> {
365 type FloatType = F;
366
367 fn lipschitz_factor(&self, L1 : L1) -> Option<F> {
368 // To get the product Lipschitz factor, we note that for any ψ_i, we have
369 // ∏_{i=1}^N φ_i(x_i) - ∏_{i=1}^N φ_i(y_i)
370 // = [φ_1(x_1)-φ_1(y_1)] ∏_{i=2}^N φ_i(x_i)
371 // + φ_1(y_1)[ ∏_{i=2}^N φ_i(x_i) - ∏_{i=2}^N φ_i(y_i)]
372 // = ∑_{j=1}^N [φ_j(x_j)-φ_j(y_j)]∏_{i > j} φ_i(x_i) ∏_{i < j} φ_i(y_i)
373 // Thus
374 // |∏_{i=1}^N φ_i(x_i) - ∏_{i=1}^N φ_i(y_i)|
375 // ≤ ∑_{j=1}^N |φ_j(x_j)-φ_j(y_j)| ∏_{j ≠ i} \max_i |φ_i|
376 //
377 // Thus we need 1D Lipschitz factors, and the maximum for φ = θ * ψ.
378 //
379 // We have
380 // θ * ψ(x) = 0 if c_1(x) ≥ c_2(x)
381 // = (1/2)[erf(c_2(x)/(√2σ)) - erf(c_1(x)/(√2σ))] if c_1(x) < c_2(x),
382 // where c_1(x) = max{-x-b,-a} = -min{b+x,a} and c_2(x)=min{b-x,a}, C is the Gaussian
383 // normalisation factor, and erf(s) = (2/√π) ∫_0^s e^{-t^2} dt.
384 // Thus, if c_1(x) < c_2(x) and c_1(y) < c_2(y), we have
385 // θ * ψ(x) - θ * ψ(y) = (1/√π)[∫_{c_1(x)/(√2σ)}^{c_1(y)/(√2σ) e^{-t^2} dt
386 // - ∫_{c_2(x)/(√2σ)}^{c_2(y)/(√2σ)] e^{-t^2} dt]
387 // Thus
388 // |θ * ψ(x) - θ * ψ(y)| ≤ (1/√π)/(√2σ)(|c_1(x)-c_1(y)|+|c_2(x)-c_2(y)|)
389 // ≤ 2(1/√π)/(√2σ)|x-y|
390 // ≤ √2/(√πσ)|x-y|.
391 //
392 // For the product we also need the value θ * ψ(0), which is
393 // (1/2)[erf(min{a,b}/(√2σ))-erf(max{-b,-a}/(√2σ)]
394 // = (1/2)[erf(min{a,b}/(√2σ))-erf(-min{a,b}/(√2σ))]
395 // = erf(min{a,b}/(√2σ))
396 //
397 // If c_1(x) ≥ c_2(x), then x ∉ [-(a+b), a+b]. If also y is outside that range,
398 // θ * ψ(x) = θ * ψ(y). If only y is in the range [-(a+b), a+b], we can replace
399 // x by -(a+b) or (a+b), either of which is closer to y and still θ * ψ(x)=0.
400 // Thus same calculations as above work for the Lipschitz factor.
401 let Convolution(ref ind,
402 SupportProductFirst(ref cut,
403 ref gaussian)) = self;
404 let a = cut.r.value();
405 let b = ind.r.value();
406 let σ = gaussian.variance.value().sqrt();
407 let π = F::PI;
408 let t = F::SQRT_2 * σ;
409 let l1d = F::SQRT_2 / (π.sqrt() * σ);
410 let e0 = F::cast_from(erf((a.min(b) / t).as_()));
411 Some(l1d * e0.powi(N as i32-1))
412 }
413 }
414
415 /*
416 impl<'a, F : Float, R, C, S, const N : usize> Lipschitz<L2>
417 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>>
418 where R : Constant<Type=F>,
419 C : Constant<Type=F>,
420 S : Constant<Type=F> {
421 type FloatType = F;
422 #[inline]
423 fn lipschitz_factor(&self, L2 : L2) -> Option<Self::FloatType> {
424 self.lipschitz_factor(L1).map(|l1| l1 * <S::Type>::cast_from(N).sqrt())
425 }
426 }
427 */
223 428
224 impl<F : Float, R, C, S, const N : usize> 429 impl<F : Float, R, C, S, const N : usize>
225 Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> 430 Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>>
226 where R : Constant<Type=F>, 431 where R : Constant<Type=F>,
227 C : Constant<Type=F>, 432 C : Constant<Type=F>,

mercurial