57 } |
62 } |
58 } |
63 } |
59 |
64 |
60 |
65 |
61 #[replace_float_literals(S::Type::cast_from(literal))] |
66 #[replace_float_literals(S::Type::cast_from(literal))] |
62 impl<'a, S, const N : usize> Apply<&'a Loc<S::Type, N>> for Gaussian<S, N> |
67 impl<'a, S, const N : usize> Mapping<Loc<S::Type, N>> for Gaussian<S, N> |
63 where S : Constant { |
68 where |
64 type Output = S::Type; |
69 S : Constant |
|
70 { |
|
71 type Codomain = S::Type; |
|
72 |
65 // This is not normalised to neither to have value 1 at zero or integral 1 |
73 // This is not normalised to neither to have value 1 at zero or integral 1 |
66 // (unless the cut-off ε=0). |
74 // (unless the cut-off ε=0). |
67 #[inline] |
75 #[inline] |
68 fn apply(&self, x : &'a Loc<S::Type, N>) -> Self::Output { |
76 fn apply<I : Instance<Loc<S::Type, N>>>(&self, x : I) -> Self::Codomain { |
69 let d_squared = x.norm2_squared(); |
77 let d_squared = x.eval(|x| x.norm2_squared()); |
70 let σ2 = self.variance.value(); |
78 let σ2 = self.variance.value(); |
71 let scale = self.scale(); |
79 let scale = self.scale(); |
72 (-d_squared / (2.0 * σ2)).exp() / scale |
80 (-d_squared / (2.0 * σ2)).exp() / scale |
73 } |
81 } |
74 } |
82 } |
75 |
83 |
76 impl<S, const N : usize> Apply<Loc<S::Type, N>> for Gaussian<S, N> |
84 #[replace_float_literals(S::Type::cast_from(literal))] |
77 where S : Constant { |
85 impl<'a, S, const N : usize> DifferentiableImpl<Loc<S::Type, N>> for Gaussian<S, N> |
78 type Output = S::Type; |
86 where S : Constant { |
79 #[inline] |
87 type Derivative = Loc<S::Type, N>; |
80 fn apply(&self, x : Loc<S::Type, N>) -> Self::Output { |
88 |
81 self.apply(&x) |
89 #[inline] |
82 } |
90 fn differential_impl<I : Instance<Loc<S::Type, N>>>(&self, x0 : I) -> Self::Derivative { |
83 } |
91 let x = x0.cow(); |
84 |
92 let f = -self.apply(&*x) / self.variance.value(); |
85 #[replace_float_literals(S::Type::cast_from(literal))] |
93 *x * f |
86 impl<'a, S, const N : usize> Differentiable<&'a Loc<S::Type, N>> for Gaussian<S, N> |
94 } |
87 where S : Constant { |
95 } |
88 type Output = Loc<S::Type, N>; |
96 |
89 #[inline] |
97 |
90 fn differential(&self, x : &'a Loc<S::Type, N>) -> Self::Output { |
98 // To calculate the the Lipschitz factors, we consider |
91 x * (self.apply(x) / self.variance.value()) |
99 // f(t) = e^{-t²/2} |
92 } |
100 // f'(t) = -t f(t) which has max at t=1 by f''(t)=0 |
93 } |
101 // f''(t) = (t²-1)f(t) which has max at t=√3 by f'''(t)=0 |
94 |
102 // f'''(t) = -(t³-3t) |
95 impl<S, const N : usize> Differentiable<Loc<S::Type, N>> for Gaussian<S, N> |
103 // So f has the Lipschitz factor L=f'(1), and f' has the Lipschitz factor L'=f''(√3). |
96 where S : Constant { |
104 // |
97 type Output = Loc<S::Type, N>; |
105 // Now g(x) = Cf(‖x‖/σ) for a scaling factor C is the Gaussian. |
98 // This is not normalised to neither to have value 1 at zero or integral 1 |
106 // Thus ‖g(x)-g(y)‖ = C‖f(‖x‖/σ)-f(‖y‖/σ)‖ ≤ (C/σ)L‖x-y‖, |
99 // (unless the cut-off ε=0). |
107 // so g has the Lipschitz factor (C/σ)f'(1) = (C/σ)exp(-0.5). |
100 #[inline] |
108 // |
101 fn differential(&self, x : Loc<S::Type, N>) -> Self::Output { |
109 // Also ∇g(x)= Cx/(σ‖x‖)f'(‖x‖/σ) (*) |
102 x * (self.apply(&x) / self.variance.value()) |
110 // = -(C/σ²)xf(‖x‖/σ) |
103 } |
111 // = -C/σ (x/σ) f(‖x/σ‖) |
104 } |
112 // ∇²g(x) = -(C/σ)[Id/σ f(‖x‖/σ) + x ⊗ x/(σ²‖x‖) f'(‖x‖/σ)] |
|
113 // = (C/σ²)[-Id + x ⊗ x/σ²]f(‖x‖/σ). |
|
114 // Thus ‖∇²g(x)‖ = (C/σ²)‖-Id + x ⊗ x/σ²‖f(‖x‖/σ), where |
|
115 // ‖-Id + x ⊗ x/σ²‖ = ‖[-Id + x ⊗ x/σ²](x/‖x‖)‖ = |-1 + ‖x²/σ^2‖|. |
|
116 // This means that ‖∇²g(x)‖ = (C/σ²)|f''(‖x‖/σ)|, which is maximised with ‖x‖/σ=√3. |
|
117 // Hence the Lipschitz factor of ∇g is (C/σ²)f''(√3) = (C/σ²)2e^{-3/2}. |
105 |
118 |
106 #[replace_float_literals(S::Type::cast_from(literal))] |
119 #[replace_float_literals(S::Type::cast_from(literal))] |
107 impl<S, const N : usize> Lipschitz<L2> for Gaussian<S, N> |
120 impl<S, const N : usize> Lipschitz<L2> for Gaussian<S, N> |
108 where S : Constant { |
121 where S : Constant { |
109 type FloatType = S::Type; |
122 type FloatType = S::Type; |
110 fn lipschitz_factor(&self, L2 : L2) -> Option<Self::FloatType> { |
123 fn lipschitz_factor(&self, L2 : L2) -> Option<Self::FloatType> { |
111 // f(x)=f_1(‖x‖_2/σ) * √(2π) / √(2πσ)^N, where f_1 is one-dimensional Gaussian with |
|
112 // variance 1. The Lipschitz factor of f_1 is e^{-1/2}/√(2π), see, e.g., |
|
113 // https://math.stackexchange.com/questions/3630967/is-the-gaussian-density-lipschitz-continuous |
|
114 // Thus the Lipschitz factor we want is e^{-1/2} / (√(2πσ)^N * σ). |
|
115 Some((-0.5).exp() / (self.scale() * self.variance.value().sqrt())) |
124 Some((-0.5).exp() / (self.scale() * self.variance.value().sqrt())) |
116 } |
125 } |
117 } |
126 } |
|
127 |
|
128 |
|
129 #[replace_float_literals(S::Type::cast_from(literal))] |
|
130 impl<'a, S : Constant, const N : usize> Lipschitz<L2> |
|
131 for Differential<'a, Loc<S::Type, N>, Gaussian<S, N>> { |
|
132 type FloatType = S::Type; |
|
133 |
|
134 fn lipschitz_factor(&self, _l2 : L2) -> Option<S::Type> { |
|
135 let g = self.base_fn(); |
|
136 let σ2 = g.variance.value(); |
|
137 let scale = g.scale(); |
|
138 Some(2.0*(-3.0/2.0).exp()/(σ2*scale)) |
|
139 } |
|
140 } |
|
141 |
|
142 // From above, norm bounds on the differnential can be calculated as achieved |
|
143 // for f' at t=1, i.e., the bound is |f'(1)|. |
|
144 // For g then |C/σ f'(1)|. |
|
145 // It follows that the norm bounds on the differential are just the Lipschitz |
|
146 // factors of the undifferentiated function, given how the latter is calculed above. |
|
147 |
|
148 #[replace_float_literals(S::Type::cast_from(literal))] |
|
149 impl<'b, S : Constant, const N : usize> NormBounded<L2> |
|
150 for Differential<'b, Loc<S::Type, N>, Gaussian<S, N>> { |
|
151 type FloatType = S::Type; |
|
152 |
|
153 fn norm_bound(&self, _l2 : L2) -> S::Type { |
|
154 self.base_fn().lipschitz_factor(L2).unwrap() |
|
155 } |
|
156 } |
|
157 |
|
158 #[replace_float_literals(S::Type::cast_from(literal))] |
|
159 impl<'b, 'a, S : Constant, const N : usize> NormBounded<L2> |
|
160 for Differential<'b, Loc<S::Type, N>, &'a Gaussian<S, N>> { |
|
161 type FloatType = S::Type; |
|
162 |
|
163 fn norm_bound(&self, _l2 : L2) -> S::Type { |
|
164 self.base_fn().lipschitz_factor(L2).unwrap() |
|
165 } |
|
166 } |
|
167 |
118 |
168 |
119 #[replace_float_literals(S::Type::cast_from(literal))] |
169 #[replace_float_literals(S::Type::cast_from(literal))] |
120 impl<'a, S, const N : usize> Gaussian<S, N> |
170 impl<'a, S, const N : usize> Gaussian<S, N> |
121 where S : Constant { |
171 where S : Constant { |
122 |
172 |
202 |
252 |
203 |
253 |
204 /// This implements $g := χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ where $a,b>0$ and $f$ is |
254 /// This implements $g := χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ where $a,b>0$ and $f$ is |
205 /// a gaussian kernel on $ℝ^n$. For an expression for $g$, see Lemma 3.9 in the manuscript. |
255 /// a gaussian kernel on $ℝ^n$. For an expression for $g$, see Lemma 3.9 in the manuscript. |
206 #[replace_float_literals(F::cast_from(literal))] |
256 #[replace_float_literals(F::cast_from(literal))] |
207 impl<'a, F : Float, R, C, S, const N : usize> Apply<&'a Loc<F, N>> |
257 impl<'a, F : Float, R, C, S, const N : usize> Mapping<Loc<F, N>> |
208 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> |
258 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> |
209 where R : Constant<Type=F>, |
259 where R : Constant<Type=F>, |
210 C : Constant<Type=F>, |
260 C : Constant<Type=F>, |
211 S : Constant<Type=F> { |
261 S : Constant<Type=F> { |
212 |
262 |
213 type Output = F; |
263 type Codomain = F; |
214 |
264 |
215 #[inline] |
265 #[inline] |
216 fn apply(&self, y : &'a Loc<F, N>) -> F { |
266 fn apply<I : Instance<Loc<F, N>>>(&self, y : I) -> F { |
217 let Convolution(ref ind, |
267 let Convolution(ref ind, |
218 SupportProductFirst(ref cut, |
268 SupportProductFirst(ref cut, |
219 ref gaussian)) = self; |
269 ref gaussian)) = self; |
220 let a = cut.r.value(); |
270 let a = cut.r.value(); |
221 let b = ind.r.value(); |
271 let b = ind.r.value(); |
222 let σ = gaussian.variance.value().sqrt(); |
272 let σ = gaussian.variance.value().sqrt(); |
223 let t = F::SQRT_2 * σ; |
273 let t = F::SQRT_2 * σ; |
224 let c = 0.5; // 1/(σ√(2π) * σ√(π/2) = 1/2 |
274 let c = 0.5; // 1/(σ√(2π) * σ√(π/2) = 1/2 |
225 |
275 |
226 // This is just a product of one-dimensional versions |
276 // This is just a product of one-dimensional versions |
227 y.product_map(|x| { |
277 y.cow().product_map(|x| { |
228 let c1 = -(a.min(b + x)); //(-a).max(-x-b); |
278 let c1 = -(a.min(b + x)); //(-a).max(-x-b); |
229 let c2 = a.min(b - x); |
279 let c2 = a.min(b - x); |
230 if c1 >= c2 { |
280 if c1 >= c2 { |
231 0.0 |
281 0.0 |
232 } else { |
282 } else { |
237 } |
287 } |
238 }) |
288 }) |
239 } |
289 } |
240 } |
290 } |
241 |
291 |
242 impl<F : Float, R, C, S, const N : usize> Apply<Loc<F, N>> |
|
243 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> |
|
244 where R : Constant<Type=F>, |
|
245 C : Constant<Type=F>, |
|
246 S : Constant<Type=F> { |
|
247 |
|
248 type Output = F; |
|
249 |
|
250 #[inline] |
|
251 fn apply(&self, y : Loc<F, N>) -> F { |
|
252 self.apply(&y) |
|
253 } |
|
254 } |
|
255 |
|
256 /// This implements the differential of $g := χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ where $a,b>0$ |
292 /// This implements the differential of $g := χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ where $a,b>0$ |
257 /// and $f$ is a gaussian kernel on $ℝ^n$. For an expression for the value of $g$, from which the |
293 /// and $f$ is a gaussian kernel on $ℝ^n$. For an expression for the value of $g$, from which the |
258 /// derivative readily arises (at points of differentiability), see Lemma 3.9 in the manuscript. |
294 /// derivative readily arises (at points of differentiability), see Lemma 3.9 in the manuscript. |
259 #[replace_float_literals(F::cast_from(literal))] |
295 #[replace_float_literals(F::cast_from(literal))] |
260 impl<'a, F : Float, R, C, S, const N : usize> Differentiable<&'a Loc<F, N>> |
296 impl<'a, F : Float, R, C, S, const N : usize> DifferentiableImpl<Loc<F, N>> |
261 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> |
297 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> |
262 where R : Constant<Type=F>, |
298 where R : Constant<Type=F>, |
263 C : Constant<Type=F>, |
299 C : Constant<Type=F>, |
264 S : Constant<Type=F> { |
300 S : Constant<Type=F> { |
265 |
301 |
266 type Output = Loc<F, N>; |
302 type Derivative = Loc<F, N>; |
267 |
303 |
268 #[inline] |
304 /// Although implemented, this function is not differentiable. |
269 fn differential(&self, y : &'a Loc<F, N>) -> Loc<F, N> { |
305 #[inline] |
|
306 fn differential_impl<I : Instance<Loc<F, N>>>(&self, y0 : I) -> Loc<F, N> { |
270 let Convolution(ref ind, |
307 let Convolution(ref ind, |
271 SupportProductFirst(ref cut, |
308 SupportProductFirst(ref cut, |
272 ref gaussian)) = self; |
309 ref gaussian)) = self; |
|
310 let y = y0.cow(); |
273 let a = cut.r.value(); |
311 let a = cut.r.value(); |
274 let b = ind.r.value(); |
312 let b = ind.r.value(); |
275 let σ = gaussian.variance.value().sqrt(); |
313 let σ = gaussian.variance.value().sqrt(); |
276 let t = F::SQRT_2 * σ; |
314 let t = F::SQRT_2 * σ; |
277 let c = 0.5; // 1/(σ√(2π) * σ√(π/2) = 1/2 |
315 let c = 0.5; // 1/(σ√(2π) * σ√(π/2) = 1/2 |
278 let c_div_t = c / t; |
316 let c_mul_erf_scale_div_t = c * F::FRAC_2_SQRT_PI / t; |
279 |
317 |
280 // Calculate the values for all component functions of the |
318 // Calculate the values for all component functions of the |
281 // product. This is just the loop from apply above. |
319 // product. This is just the loop from apply above. |
282 let unscaled_vs = y.map(|x| { |
320 let unscaled_vs = y.map(|x| { |
283 let c1 = -(a.min(b + x)); //(-a).max(-x-b); |
321 let c1 = -(a.min(b + x)); //(-a).max(-x-b); |
290 debug_assert!(e2 >= e1); |
328 debug_assert!(e2 >= e1); |
291 c * (e2 - e1) |
329 c * (e2 - e1) |
292 } |
330 } |
293 }); |
331 }); |
294 // This computes the gradient for each coordinate |
332 // This computes the gradient for each coordinate |
295 product_differential(y, &unscaled_vs, |x| { |
333 product_differential(&*y, &unscaled_vs, |x| { |
296 let c1 = -(a.min(b + x)); //(-a).max(-x-b); |
334 let c1 = -(a.min(b + x)); //(-a).max(-x-b); |
297 let c2 = a.min(b - x); |
335 let c2 = a.min(b - x); |
298 if c1 >= c2 { |
336 if c1 >= c2 { |
299 0.0 |
337 0.0 |
300 } else { |
338 } else { |
301 // erf'(z) = (2/√π)*exp(-z^2), and we get extra factor -1/(√2*σ) = -1/t |
339 // erf'(z) = (2/√π)*exp(-z^2), and we get extra factor 1/(√2*σ) = -1/t |
302 // from the chain rule (the minus comes from inside c_1 or c_2). |
340 // from the chain rule (the minus comes from inside c_1 or c_2, and changes the |
303 let de1 = (-(c1/t).powi(2)).exp(); |
341 // order of de2 and de1 in the final calculation). |
304 let de2 = (-(c2/t).powi(2)).exp(); |
342 let de1 = if b + x < a { |
305 c_div_t * (de1 - de2) |
343 (-((b+x)/t).powi(2)).exp() |
|
344 } else { |
|
345 0.0 |
|
346 }; |
|
347 let de2 = if b - x < a { |
|
348 (-((b-x)/t).powi(2)).exp() |
|
349 } else { |
|
350 0.0 |
|
351 }; |
|
352 c_mul_erf_scale_div_t * (de1 - de2) |
306 } |
353 } |
307 }) |
354 }) |
308 } |
355 } |
309 } |
356 } |
310 |
357 |
311 impl<F : Float, R, C, S, const N : usize> Differentiable<Loc<F, N>> |
|
312 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> |
|
313 where R : Constant<Type=F>, |
|
314 C : Constant<Type=F>, |
|
315 S : Constant<Type=F> { |
|
316 |
|
317 type Output = Loc<F, N>; |
|
318 |
|
319 #[inline] |
|
320 fn differential(&self, y : Loc<F, N>) -> Loc<F, N> { |
|
321 self.differential(&y) |
|
322 } |
|
323 } |
|
324 |
358 |
325 #[replace_float_literals(F::cast_from(literal))] |
359 #[replace_float_literals(F::cast_from(literal))] |
326 impl<'a, F : Float, R, C, S, const N : usize> Lipschitz<L1> |
360 impl<'a, F : Float, R, C, S, const N : usize> Lipschitz<L1> |
327 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> |
361 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> |
328 where R : Constant<Type=F>, |
362 where R : Constant<Type=F>, |