56 } |
62 } |
57 } |
63 } |
58 |
64 |
59 |
65 |
60 #[replace_float_literals(S::Type::cast_from(literal))] |
66 #[replace_float_literals(S::Type::cast_from(literal))] |
61 impl<'a, S, const N : usize> Apply<&'a Loc<S::Type, N>> for Gaussian<S, N> |
67 impl<'a, S, const N : usize> Mapping<Loc<S::Type, N>> for Gaussian<S, N> |
62 where S : Constant { |
68 where |
63 type Output = S::Type; |
69 S : Constant |
|
70 { |
|
71 type Codomain = S::Type; |
|
72 |
64 // This is not normalised to neither to have value 1 at zero or integral 1 |
73 // This is not normalised to neither to have value 1 at zero or integral 1 |
65 // (unless the cut-off ε=0). |
74 // (unless the cut-off ε=0). |
66 #[inline] |
75 #[inline] |
67 fn apply(&self, x : &'a Loc<S::Type, N>) -> Self::Output { |
76 fn apply<I : Instance<Loc<S::Type, N>>>(&self, x : I) -> Self::Codomain { |
68 let d_squared = x.norm2_squared(); |
77 let d_squared = x.eval(|x| x.norm2_squared()); |
69 let σ2 = self.variance.value(); |
78 let σ2 = self.variance.value(); |
70 let scale = self.scale(); |
79 let scale = self.scale(); |
71 (-d_squared / (2.0 * σ2)).exp() / scale |
80 (-d_squared / (2.0 * σ2)).exp() / scale |
72 } |
81 } |
73 } |
82 } |
74 |
83 |
75 impl<S, const N : usize> Apply<Loc<S::Type, N>> for Gaussian<S, N> |
84 #[replace_float_literals(S::Type::cast_from(literal))] |
76 where S : Constant { |
85 impl<'a, S, const N : usize> DifferentiableImpl<Loc<S::Type, N>> for Gaussian<S, N> |
77 type Output = S::Type; |
86 where S : Constant { |
78 // This is not normalised to neither to have value 1 at zero or integral 1 |
87 type Derivative = Loc<S::Type, N>; |
79 // (unless the cut-off ε=0). |
88 |
80 #[inline] |
89 #[inline] |
81 fn apply(&self, x : Loc<S::Type, N>) -> Self::Output { |
90 fn differential_impl<I : Instance<Loc<S::Type, N>>>(&self, x0 : I) -> Self::Derivative { |
82 self.apply(&x) |
91 let x = x0.cow(); |
|
92 let f = -self.apply(&*x) / self.variance.value(); |
|
93 *x * f |
|
94 } |
|
95 } |
|
96 |
|
97 |
|
98 // To calculate the the Lipschitz factors, we consider |
|
99 // f(t) = e^{-t²/2} |
|
100 // f'(t) = -t f(t) which has max at t=1 by f''(t)=0 |
|
101 // f''(t) = (t²-1)f(t) which has max at t=√3 by f'''(t)=0 |
|
102 // f'''(t) = -(t³-3t) |
|
103 // So f has the Lipschitz factor L=f'(1), and f' has the Lipschitz factor L'=f''(√3). |
|
104 // |
|
105 // Now g(x) = Cf(‖x‖/σ) for a scaling factor C is the Gaussian. |
|
106 // Thus ‖g(x)-g(y)‖ = C‖f(‖x‖/σ)-f(‖y‖/σ)‖ ≤ (C/σ)L‖x-y‖, |
|
107 // so g has the Lipschitz factor (C/σ)f'(1) = (C/σ)exp(-0.5). |
|
108 // |
|
109 // Also ∇g(x)= Cx/(σ‖x‖)f'(‖x‖/σ) (*) |
|
110 // = -(C/σ²)xf(‖x‖/σ) |
|
111 // = -C/σ (x/σ) f(‖x/σ‖) |
|
112 // ∇²g(x) = -(C/σ)[Id/σ f(‖x‖/σ) + x ⊗ x/(σ²‖x‖) f'(‖x‖/σ)] |
|
113 // = (C/σ²)[-Id + x ⊗ x/σ²]f(‖x‖/σ). |
|
114 // Thus ‖∇²g(x)‖ = (C/σ²)‖-Id + x ⊗ x/σ²‖f(‖x‖/σ), where |
|
115 // ‖-Id + x ⊗ x/σ²‖ = ‖[-Id + x ⊗ x/σ²](x/‖x‖)‖ = |-1 + ‖x²/σ^2‖|. |
|
116 // This means that ‖∇²g(x)‖ = (C/σ²)|f''(‖x‖/σ)|, which is maximised with ‖x‖/σ=√3. |
|
117 // Hence the Lipschitz factor of ∇g is (C/σ²)f''(√3) = (C/σ²)2e^{-3/2}. |
|
118 |
|
119 #[replace_float_literals(S::Type::cast_from(literal))] |
|
120 impl<S, const N : usize> Lipschitz<L2> for Gaussian<S, N> |
|
121 where S : Constant { |
|
122 type FloatType = S::Type; |
|
123 fn lipschitz_factor(&self, L2 : L2) -> Option<Self::FloatType> { |
|
124 Some((-0.5).exp() / (self.scale() * self.variance.value().sqrt())) |
|
125 } |
|
126 } |
|
127 |
|
128 |
|
129 #[replace_float_literals(S::Type::cast_from(literal))] |
|
130 impl<'a, S : Constant, const N : usize> Lipschitz<L2> |
|
131 for Differential<'a, Loc<S::Type, N>, Gaussian<S, N>> { |
|
132 type FloatType = S::Type; |
|
133 |
|
134 fn lipschitz_factor(&self, _l2 : L2) -> Option<S::Type> { |
|
135 let g = self.base_fn(); |
|
136 let σ2 = g.variance.value(); |
|
137 let scale = g.scale(); |
|
138 Some(2.0*(-3.0/2.0).exp()/(σ2*scale)) |
|
139 } |
|
140 } |
|
141 |
|
142 // From above, norm bounds on the differnential can be calculated as achieved |
|
143 // for f' at t=1, i.e., the bound is |f'(1)|. |
|
144 // For g then |C/σ f'(1)|. |
|
145 // It follows that the norm bounds on the differential are just the Lipschitz |
|
146 // factors of the undifferentiated function, given how the latter is calculed above. |
|
147 |
|
148 #[replace_float_literals(S::Type::cast_from(literal))] |
|
149 impl<'b, S : Constant, const N : usize> NormBounded<L2> |
|
150 for Differential<'b, Loc<S::Type, N>, Gaussian<S, N>> { |
|
151 type FloatType = S::Type; |
|
152 |
|
153 fn norm_bound(&self, _l2 : L2) -> S::Type { |
|
154 self.base_fn().lipschitz_factor(L2).unwrap() |
|
155 } |
|
156 } |
|
157 |
|
158 #[replace_float_literals(S::Type::cast_from(literal))] |
|
159 impl<'b, 'a, S : Constant, const N : usize> NormBounded<L2> |
|
160 for Differential<'b, Loc<S::Type, N>, &'a Gaussian<S, N>> { |
|
161 type FloatType = S::Type; |
|
162 |
|
163 fn norm_bound(&self, _l2 : L2) -> S::Type { |
|
164 self.base_fn().lipschitz_factor(L2).unwrap() |
83 } |
165 } |
84 } |
166 } |
85 |
167 |
86 |
168 |
87 #[replace_float_literals(S::Type::cast_from(literal))] |
169 #[replace_float_literals(S::Type::cast_from(literal))] |
167 /// where $a>0$ and $f$ is a gaussian kernel on $ℝ^n$. |
249 /// where $a>0$ and $f$ is a gaussian kernel on $ℝ^n$. |
168 pub type BasicCutGaussian<C, S, const N : usize> = SupportProductFirst<CubeIndicator<C, N>, |
250 pub type BasicCutGaussian<C, S, const N : usize> = SupportProductFirst<CubeIndicator<C, N>, |
169 Gaussian<S, N>>; |
251 Gaussian<S, N>>; |
170 |
252 |
171 |
253 |
172 /// This implements $χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ |
254 /// This implements $g := χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ where $a,b>0$ and $f$ is |
173 /// where $a,b>0$ and $f$ is a gaussian kernel on $ℝ^n$. |
255 /// a gaussian kernel on $ℝ^n$. For an expression for $g$, see Lemma 3.9 in the manuscript. |
174 #[replace_float_literals(F::cast_from(literal))] |
256 #[replace_float_literals(F::cast_from(literal))] |
175 impl<'a, F : Float, R, C, S, const N : usize> Apply<&'a Loc<F, N>> |
257 impl<'a, F : Float, R, C, S, const N : usize> Mapping<Loc<F, N>> |
176 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> |
258 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> |
177 where R : Constant<Type=F>, |
259 where R : Constant<Type=F>, |
178 C : Constant<Type=F>, |
260 C : Constant<Type=F>, |
179 S : Constant<Type=F> { |
261 S : Constant<Type=F> { |
180 |
262 |
181 type Output = F; |
263 type Codomain = F; |
182 |
264 |
183 #[inline] |
265 #[inline] |
184 fn apply(&self, y : &'a Loc<F, N>) -> F { |
266 fn apply<I : Instance<Loc<F, N>>>(&self, y : I) -> F { |
185 let Convolution(ref ind, |
267 let Convolution(ref ind, |
186 SupportProductFirst(ref cut, |
268 SupportProductFirst(ref cut, |
187 ref gaussian)) = self; |
269 ref gaussian)) = self; |
188 let a = cut.r.value(); |
270 let a = cut.r.value(); |
189 let b = ind.r.value(); |
271 let b = ind.r.value(); |
190 let σ = gaussian.variance.value().sqrt(); |
272 let σ = gaussian.variance.value().sqrt(); |
191 let t = F::SQRT_2 * σ; |
273 let t = F::SQRT_2 * σ; |
192 let c = 0.5; // 1/(σ√(2π) * σ√(π/2) = 1/2 |
274 let c = 0.5; // 1/(σ√(2π) * σ√(π/2) = 1/2 |
193 |
275 |
194 // This is just a product of one-dimensional versions |
276 // This is just a product of one-dimensional versions |
195 y.product_map(|x| { |
277 y.cow().product_map(|x| { |
196 let c1 = -(a.min(b + x)); //(-a).max(-x-b); |
278 let c1 = -(a.min(b + x)); //(-a).max(-x-b); |
197 let c2 = a.min(b - x); |
279 let c2 = a.min(b - x); |
198 if c1 >= c2 { |
280 if c1 >= c2 { |
199 0.0 |
281 0.0 |
200 } else { |
282 } else { |
205 } |
287 } |
206 }) |
288 }) |
207 } |
289 } |
208 } |
290 } |
209 |
291 |
210 impl<F : Float, R, C, S, const N : usize> Apply<Loc<F, N>> |
292 /// This implements the differential of $g := χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ where $a,b>0$ |
211 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> |
293 /// and $f$ is a gaussian kernel on $ℝ^n$. For an expression for the value of $g$, from which the |
212 where R : Constant<Type=F>, |
294 /// derivative readily arises (at points of differentiability), see Lemma 3.9 in the manuscript. |
213 C : Constant<Type=F>, |
295 #[replace_float_literals(F::cast_from(literal))] |
214 S : Constant<Type=F> { |
296 impl<'a, F : Float, R, C, S, const N : usize> DifferentiableImpl<Loc<F, N>> |
215 |
297 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> |
216 type Output = F; |
298 where R : Constant<Type=F>, |
217 |
299 C : Constant<Type=F>, |
218 #[inline] |
300 S : Constant<Type=F> { |
219 fn apply(&self, y : Loc<F, N>) -> F { |
301 |
220 self.apply(&y) |
302 type Derivative = Loc<F, N>; |
221 } |
303 |
222 } |
304 /// Although implemented, this function is not differentiable. |
|
305 #[inline] |
|
306 fn differential_impl<I : Instance<Loc<F, N>>>(&self, y0 : I) -> Loc<F, N> { |
|
307 let Convolution(ref ind, |
|
308 SupportProductFirst(ref cut, |
|
309 ref gaussian)) = self; |
|
310 let y = y0.cow(); |
|
311 let a = cut.r.value(); |
|
312 let b = ind.r.value(); |
|
313 let σ = gaussian.variance.value().sqrt(); |
|
314 let t = F::SQRT_2 * σ; |
|
315 let c = 0.5; // 1/(σ√(2π) * σ√(π/2) = 1/2 |
|
316 let c_mul_erf_scale_div_t = c * F::FRAC_2_SQRT_PI / t; |
|
317 |
|
318 // Calculate the values for all component functions of the |
|
319 // product. This is just the loop from apply above. |
|
320 let unscaled_vs = y.map(|x| { |
|
321 let c1 = -(a.min(b + x)); //(-a).max(-x-b); |
|
322 let c2 = a.min(b - x); |
|
323 if c1 >= c2 { |
|
324 0.0 |
|
325 } else { |
|
326 let e1 = F::cast_from(erf((c1 / t).as_())); |
|
327 let e2 = F::cast_from(erf((c2 / t).as_())); |
|
328 debug_assert!(e2 >= e1); |
|
329 c * (e2 - e1) |
|
330 } |
|
331 }); |
|
332 // This computes the gradient for each coordinate |
|
333 product_differential(&*y, &unscaled_vs, |x| { |
|
334 let c1 = -(a.min(b + x)); //(-a).max(-x-b); |
|
335 let c2 = a.min(b - x); |
|
336 if c1 >= c2 { |
|
337 0.0 |
|
338 } else { |
|
339 // erf'(z) = (2/√π)*exp(-z^2), and we get extra factor 1/(√2*σ) = -1/t |
|
340 // from the chain rule (the minus comes from inside c_1 or c_2, and changes the |
|
341 // order of de2 and de1 in the final calculation). |
|
342 let de1 = if b + x < a { |
|
343 (-((b+x)/t).powi(2)).exp() |
|
344 } else { |
|
345 0.0 |
|
346 }; |
|
347 let de2 = if b - x < a { |
|
348 (-((b-x)/t).powi(2)).exp() |
|
349 } else { |
|
350 0.0 |
|
351 }; |
|
352 c_mul_erf_scale_div_t * (de1 - de2) |
|
353 } |
|
354 }) |
|
355 } |
|
356 } |
|
357 |
|
358 |
|
359 #[replace_float_literals(F::cast_from(literal))] |
|
360 impl<'a, F : Float, R, C, S, const N : usize> Lipschitz<L1> |
|
361 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> |
|
362 where R : Constant<Type=F>, |
|
363 C : Constant<Type=F>, |
|
364 S : Constant<Type=F> { |
|
365 type FloatType = F; |
|
366 |
|
367 fn lipschitz_factor(&self, L1 : L1) -> Option<F> { |
|
368 // To get the product Lipschitz factor, we note that for any ψ_i, we have |
|
369 // ∏_{i=1}^N φ_i(x_i) - ∏_{i=1}^N φ_i(y_i) |
|
370 // = [φ_1(x_1)-φ_1(y_1)] ∏_{i=2}^N φ_i(x_i) |
|
371 // + φ_1(y_1)[ ∏_{i=2}^N φ_i(x_i) - ∏_{i=2}^N φ_i(y_i)] |
|
372 // = ∑_{j=1}^N [φ_j(x_j)-φ_j(y_j)]∏_{i > j} φ_i(x_i) ∏_{i < j} φ_i(y_i) |
|
373 // Thus |
|
374 // |∏_{i=1}^N φ_i(x_i) - ∏_{i=1}^N φ_i(y_i)| |
|
375 // ≤ ∑_{j=1}^N |φ_j(x_j)-φ_j(y_j)| ∏_{j ≠ i} \max_i |φ_i| |
|
376 // |
|
377 // Thus we need 1D Lipschitz factors, and the maximum for φ = θ * ψ. |
|
378 // |
|
379 // We have |
|
380 // θ * ψ(x) = 0 if c_1(x) ≥ c_2(x) |
|
381 // = (1/2)[erf(c_2(x)/(√2σ)) - erf(c_1(x)/(√2σ))] if c_1(x) < c_2(x), |
|
382 // where c_1(x) = max{-x-b,-a} = -min{b+x,a} and c_2(x)=min{b-x,a}, C is the Gaussian |
|
383 // normalisation factor, and erf(s) = (2/√π) ∫_0^s e^{-t^2} dt. |
|
384 // Thus, if c_1(x) < c_2(x) and c_1(y) < c_2(y), we have |
|
385 // θ * ψ(x) - θ * ψ(y) = (1/√π)[∫_{c_1(x)/(√2σ)}^{c_1(y)/(√2σ) e^{-t^2} dt |
|
386 // - ∫_{c_2(x)/(√2σ)}^{c_2(y)/(√2σ)] e^{-t^2} dt] |
|
387 // Thus |
|
388 // |θ * ψ(x) - θ * ψ(y)| ≤ (1/√π)/(√2σ)(|c_1(x)-c_1(y)|+|c_2(x)-c_2(y)|) |
|
389 // ≤ 2(1/√π)/(√2σ)|x-y| |
|
390 // ≤ √2/(√πσ)|x-y|. |
|
391 // |
|
392 // For the product we also need the value θ * ψ(0), which is |
|
393 // (1/2)[erf(min{a,b}/(√2σ))-erf(max{-b,-a}/(√2σ)] |
|
394 // = (1/2)[erf(min{a,b}/(√2σ))-erf(-min{a,b}/(√2σ))] |
|
395 // = erf(min{a,b}/(√2σ)) |
|
396 // |
|
397 // If c_1(x) ≥ c_2(x), then x ∉ [-(a+b), a+b]. If also y is outside that range, |
|
398 // θ * ψ(x) = θ * ψ(y). If only y is in the range [-(a+b), a+b], we can replace |
|
399 // x by -(a+b) or (a+b), either of which is closer to y and still θ * ψ(x)=0. |
|
400 // Thus same calculations as above work for the Lipschitz factor. |
|
401 let Convolution(ref ind, |
|
402 SupportProductFirst(ref cut, |
|
403 ref gaussian)) = self; |
|
404 let a = cut.r.value(); |
|
405 let b = ind.r.value(); |
|
406 let σ = gaussian.variance.value().sqrt(); |
|
407 let π = F::PI; |
|
408 let t = F::SQRT_2 * σ; |
|
409 let l1d = F::SQRT_2 / (π.sqrt() * σ); |
|
410 let e0 = F::cast_from(erf((a.min(b) / t).as_())); |
|
411 Some(l1d * e0.powi(N as i32-1)) |
|
412 } |
|
413 } |
|
414 |
|
415 /* |
|
416 impl<'a, F : Float, R, C, S, const N : usize> Lipschitz<L2> |
|
417 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> |
|
418 where R : Constant<Type=F>, |
|
419 C : Constant<Type=F>, |
|
420 S : Constant<Type=F> { |
|
421 type FloatType = F; |
|
422 #[inline] |
|
423 fn lipschitz_factor(&self, L2 : L2) -> Option<Self::FloatType> { |
|
424 self.lipschitz_factor(L1).map(|l1| l1 * <S::Type>::cast_from(N).sqrt()) |
|
425 } |
|
426 } |
|
427 */ |
223 |
428 |
224 impl<F : Float, R, C, S, const N : usize> |
429 impl<F : Float, R, C, S, const N : usize> |
225 Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> |
430 Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> |
226 where R : Constant<Type=F>, |
431 where R : Constant<Type=F>, |
227 C : Constant<Type=F>, |
432 C : Constant<Type=F>, |