Sat, 01 Feb 2025 16:47:11 -0500
Parameter adjustments
//! Implementation of the gaussian kernel. use float_extras::f64::erf; use numeric_literals::replace_float_literals; use serde::Serialize; use alg_tools::types::*; use alg_tools::euclidean::Euclidean; use alg_tools::norms::*; use alg_tools::loc::Loc; use alg_tools::sets::Cube; use alg_tools::bisection_tree::{ Support, Constant, Bounds, LocalAnalysis, GlobalAnalysis, Weighted, Bounded, }; use alg_tools::mapping::{ Mapping, Instance, Differential, DifferentiableImpl, }; use alg_tools::maputil::array_init; use crate::types::*; use crate::fourier::Fourier; use super::base::*; use super::ball_indicator::CubeIndicator; /// Storage presentation of the the anisotropic gaussian kernel of `variance` $σ^2$. /// /// This is the function $f(x) = C e^{-\\|x\\|\_2^2/(2σ^2)}$ for $x ∈ ℝ^N$ /// with $C=1/(2πσ^2)^{N/2}$. #[derive(Copy,Clone,Debug,Serialize,Eq)] pub struct Gaussian<S : Constant, const N : usize> { /// The variance $σ^2$. pub variance : S, } impl<S1, S2, const N : usize> PartialEq<Gaussian<S2, N>> for Gaussian<S1, N> where S1 : Constant, S2 : Constant<Type=S1::Type> { fn eq(&self, other : &Gaussian<S2, N>) -> bool { self.variance.value() == other.variance.value() } } impl<S1, S2, const N : usize> PartialOrd<Gaussian<S2, N>> for Gaussian<S1, N> where S1 : Constant, S2 : Constant<Type=S1::Type> { fn partial_cmp(&self, other : &Gaussian<S2, N>) -> Option<std::cmp::Ordering> { // A gaussian is ≤ another gaussian if the Fourier transforms satisfy the // corresponding inequality. That in turns holds if and only if the variances // satisfy the opposite inequality. let σ1sq = self.variance.value(); let σ2sq = other.variance.value(); σ2sq.partial_cmp(&σ1sq) } } #[replace_float_literals(S::Type::cast_from(literal))] impl<'a, S, const N : usize> Mapping<Loc<S::Type, N>> for Gaussian<S, N> where S : Constant { type Codomain = S::Type; // This is not normalised to neither to have value 1 at zero or integral 1 // (unless the cut-off ε=0). #[inline] fn apply<I : Instance<Loc<S::Type, N>>>(&self, x : I) -> Self::Codomain { let d_squared = x.eval(|x| x.norm2_squared()); let σ2 = self.variance.value(); let scale = self.scale(); (-d_squared / (2.0 * σ2)).exp() / scale } } #[replace_float_literals(S::Type::cast_from(literal))] impl<'a, S, const N : usize> DifferentiableImpl<Loc<S::Type, N>> for Gaussian<S, N> where S : Constant { type Derivative = Loc<S::Type, N>; #[inline] fn differential_impl<I : Instance<Loc<S::Type, N>>>(&self, x0 : I) -> Self::Derivative { let x = x0.cow(); let f = -self.apply(&*x) / self.variance.value(); *x * f } } // To calculate the the Lipschitz factors, we consider // f(t) = e^{-t²/2} // f'(t) = -t f(t) which has max at t=1 by f''(t)=0 // f''(t) = (t²-1)f(t) which has max at t=√3 by f'''(t)=0 // f'''(t) = -(t³-3t) // So f has the Lipschitz factor L=f'(1), and f' has the Lipschitz factor L'=f''(√3). // // Now g(x) = Cf(‖x‖/σ) for a scaling factor C is the Gaussian. // Thus ‖g(x)-g(y)‖ = C‖f(‖x‖/σ)-f(‖y‖/σ)‖ ≤ (C/σ)L‖x-y‖, // so g has the Lipschitz factor (C/σ)f'(1) = (C/σ)exp(-0.5). // // Also ∇g(x)= Cx/(σ‖x‖)f'(‖x‖/σ) (*) // = -(C/σ²)xf(‖x‖/σ) // = -C/σ (x/σ) f(‖x/σ‖) // ∇²g(x) = -(C/σ)[Id/σ f(‖x‖/σ) + x ⊗ x/(σ²‖x‖) f'(‖x‖/σ)] // = (C/σ²)[-Id + x ⊗ x/σ²]f(‖x‖/σ). // Thus ‖∇²g(x)‖ = (C/σ²)‖-Id + x ⊗ x/σ²‖f(‖x‖/σ), where // ‖-Id + x ⊗ x/σ²‖ = ‖[-Id + x ⊗ x/σ²](x/‖x‖)‖ = |-1 + ‖x²/σ^2‖|. // This means that ‖∇²g(x)‖ = (C/σ²)|f''(‖x‖/σ)|, which is maximised with ‖x‖/σ=√3. // Hence the Lipschitz factor of ∇g is (C/σ²)f''(√3) = (C/σ²)2e^{-3/2}. #[replace_float_literals(S::Type::cast_from(literal))] impl<S, const N : usize> Lipschitz<L2> for Gaussian<S, N> where S : Constant { type FloatType = S::Type; fn lipschitz_factor(&self, L2 : L2) -> Option<Self::FloatType> { Some((-0.5).exp() / (self.scale() * self.variance.value().sqrt())) } } #[replace_float_literals(S::Type::cast_from(literal))] impl<'a, S : Constant, const N : usize> Lipschitz<L2> for Differential<'a, Loc<S::Type, N>, Gaussian<S, N>> { type FloatType = S::Type; fn lipschitz_factor(&self, _l2 : L2) -> Option<S::Type> { let g = self.base_fn(); let σ2 = g.variance.value(); let scale = g.scale(); Some(2.0*(-3.0/2.0).exp()/(σ2*scale)) } } // From above, norm bounds on the differnential can be calculated as achieved // for f' at t=1, i.e., the bound is |f'(1)|. // For g then |C/σ f'(1)|. // It follows that the norm bounds on the differential are just the Lipschitz // factors of the undifferentiated function, given how the latter is calculed above. #[replace_float_literals(S::Type::cast_from(literal))] impl<'b, S : Constant, const N : usize> NormBounded<L2> for Differential<'b, Loc<S::Type, N>, Gaussian<S, N>> { type FloatType = S::Type; fn norm_bound(&self, _l2 : L2) -> S::Type { self.base_fn().lipschitz_factor(L2).unwrap() } } #[replace_float_literals(S::Type::cast_from(literal))] impl<'b, 'a, S : Constant, const N : usize> NormBounded<L2> for Differential<'b, Loc<S::Type, N>, &'a Gaussian<S, N>> { type FloatType = S::Type; fn norm_bound(&self, _l2 : L2) -> S::Type { self.base_fn().lipschitz_factor(L2).unwrap() } } #[replace_float_literals(S::Type::cast_from(literal))] impl<'a, S, const N : usize> Gaussian<S, N> where S : Constant { /// Returns the (reciprocal) scaling constant $1/C=(2πσ^2)^{N/2}$. #[inline] pub fn scale(&self) -> S::Type { let π = S::Type::PI; let σ2 = self.variance.value(); (2.0*π*σ2).powi(N as i32).sqrt() } } impl<'a, S, const N : usize> Support<S::Type, N> for Gaussian<S, N> where S : Constant { #[inline] fn support_hint(&self) -> Cube<S::Type,N> { array_init(|| [S::Type::NEG_INFINITY, S::Type::INFINITY]).into() } #[inline] fn in_support(&self, _x : &Loc<S::Type,N>) -> bool { true } } #[replace_float_literals(S::Type::cast_from(literal))] impl<S, const N : usize> GlobalAnalysis<S::Type, Bounds<S::Type>> for Gaussian<S, N> where S : Constant { #[inline] fn global_analysis(&self) -> Bounds<S::Type> { Bounds(0.0, 1.0/self.scale()) } } impl<S, const N : usize> LocalAnalysis<S::Type, Bounds<S::Type>, N> for Gaussian<S, N> where S : Constant { #[inline] fn local_analysis(&self, cube : &Cube<S::Type, N>) -> Bounds<S::Type> { // The function is maximised/minimised where the 2-norm is minimised/maximised. let lower = self.apply(cube.maxnorm_point()); let upper = self.apply(cube.minnorm_point()); Bounds(lower, upper) } } #[replace_float_literals(C::Type::cast_from(literal))] impl<'a, C : Constant, const N : usize> Norm<C::Type, L1> for Gaussian<C, N> { #[inline] fn norm(&self, _ : L1) -> C::Type { 1.0 } } #[replace_float_literals(C::Type::cast_from(literal))] impl<'a, C : Constant, const N : usize> Norm<C::Type, Linfinity> for Gaussian<C, N> { #[inline] fn norm(&self, _ : Linfinity) -> C::Type { self.bounds().upper() } } #[replace_float_literals(C::Type::cast_from(literal))] impl<'a, C : Constant, const N : usize> Fourier<C::Type> for Gaussian<C, N> { type Domain = Loc<C::Type, N>; type Transformed = Weighted<Gaussian<C::Type, N>, C::Type>; #[inline] fn fourier(&self) -> Self::Transformed { let π = C::Type::PI; let σ2 = self.variance.value(); let g = Gaussian { variance : 1.0 / (4.0*π*π*σ2) }; g.weigh(g.scale()) } } /// Representation of the “cut” gaussian $f χ\_{[-a, a]^n}$ /// where $a>0$ and $f$ is a gaussian kernel on $ℝ^n$. pub type BasicCutGaussian<C, S, const N : usize> = SupportProductFirst<CubeIndicator<C, N>, Gaussian<S, N>>; /// This implements $g := χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ where $a,b>0$ and $f$ is /// a gaussian kernel on $ℝ^n$. For an expression for $g$, see Lemma 3.9 in the manuscript. #[replace_float_literals(F::cast_from(literal))] impl<'a, F : Float, R, C, S, const N : usize> Mapping<Loc<F, N>> for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> where R : Constant<Type=F>, C : Constant<Type=F>, S : Constant<Type=F> { type Codomain = F; #[inline] fn apply<I : Instance<Loc<F, N>>>(&self, y : I) -> F { let Convolution(ref ind, SupportProductFirst(ref cut, ref gaussian)) = self; let a = cut.r.value(); let b = ind.r.value(); let σ = gaussian.variance.value().sqrt(); let t = F::SQRT_2 * σ; let c = 0.5; // 1/(σ√(2π) * σ√(π/2) = 1/2 // This is just a product of one-dimensional versions y.cow().product_map(|x| { let c1 = -(a.min(b + x)); //(-a).max(-x-b); let c2 = a.min(b - x); if c1 >= c2 { 0.0 } else { let e1 = F::cast_from(erf((c1 / t).as_())); let e2 = F::cast_from(erf((c2 / t).as_())); debug_assert!(e2 >= e1); c * (e2 - e1) } }) } } /// This implements the differential of $g := χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ where $a,b>0$ /// and $f$ is a gaussian kernel on $ℝ^n$. For an expression for the value of $g$, from which the /// derivative readily arises (at points of differentiability), see Lemma 3.9 in the manuscript. #[replace_float_literals(F::cast_from(literal))] impl<'a, F : Float, R, C, S, const N : usize> DifferentiableImpl<Loc<F, N>> for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> where R : Constant<Type=F>, C : Constant<Type=F>, S : Constant<Type=F> { type Derivative = Loc<F, N>; /// Although implemented, this function is not differentiable. #[inline] fn differential_impl<I : Instance<Loc<F, N>>>(&self, y0 : I) -> Loc<F, N> { let Convolution(ref ind, SupportProductFirst(ref cut, ref gaussian)) = self; let y = y0.cow(); let a = cut.r.value(); let b = ind.r.value(); let σ = gaussian.variance.value().sqrt(); let t = F::SQRT_2 * σ; let c = 0.5; // 1/(σ√(2π) * σ√(π/2) = 1/2 let c_mul_erf_scale_div_t = c * F::FRAC_2_SQRT_PI / t; // Calculate the values for all component functions of the // product. This is just the loop from apply above. let unscaled_vs = y.map(|x| { let c1 = -(a.min(b + x)); //(-a).max(-x-b); let c2 = a.min(b - x); if c1 >= c2 { 0.0 } else { let e1 = F::cast_from(erf((c1 / t).as_())); let e2 = F::cast_from(erf((c2 / t).as_())); debug_assert!(e2 >= e1); c * (e2 - e1) } }); // This computes the gradient for each coordinate product_differential(&*y, &unscaled_vs, |x| { let c1 = -(a.min(b + x)); //(-a).max(-x-b); let c2 = a.min(b - x); if c1 >= c2 { 0.0 } else { // erf'(z) = (2/√π)*exp(-z^2), and we get extra factor 1/(√2*σ) = -1/t // from the chain rule (the minus comes from inside c_1 or c_2, and changes the // order of de2 and de1 in the final calculation). let de1 = if b + x < a { (-((b+x)/t).powi(2)).exp() } else { 0.0 }; let de2 = if b - x < a { (-((b-x)/t).powi(2)).exp() } else { 0.0 }; c_mul_erf_scale_div_t * (de1 - de2) } }) } } #[replace_float_literals(F::cast_from(literal))] impl<'a, F : Float, R, C, S, const N : usize> Lipschitz<L1> for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> where R : Constant<Type=F>, C : Constant<Type=F>, S : Constant<Type=F> { type FloatType = F; fn lipschitz_factor(&self, L1 : L1) -> Option<F> { // To get the product Lipschitz factor, we note that for any ψ_i, we have // ∏_{i=1}^N φ_i(x_i) - ∏_{i=1}^N φ_i(y_i) // = [φ_1(x_1)-φ_1(y_1)] ∏_{i=2}^N φ_i(x_i) // + φ_1(y_1)[ ∏_{i=2}^N φ_i(x_i) - ∏_{i=2}^N φ_i(y_i)] // = ∑_{j=1}^N [φ_j(x_j)-φ_j(y_j)]∏_{i > j} φ_i(x_i) ∏_{i < j} φ_i(y_i) // Thus // |∏_{i=1}^N φ_i(x_i) - ∏_{i=1}^N φ_i(y_i)| // ≤ ∑_{j=1}^N |φ_j(x_j)-φ_j(y_j)| ∏_{j ≠ i} \max_i |φ_i| // // Thus we need 1D Lipschitz factors, and the maximum for φ = θ * ψ. // // We have // θ * ψ(x) = 0 if c_1(x) ≥ c_2(x) // = (1/2)[erf(c_2(x)/(√2σ)) - erf(c_1(x)/(√2σ))] if c_1(x) < c_2(x), // where c_1(x) = max{-x-b,-a} = -min{b+x,a} and c_2(x)=min{b-x,a}, C is the Gaussian // normalisation factor, and erf(s) = (2/√π) ∫_0^s e^{-t^2} dt. // Thus, if c_1(x) < c_2(x) and c_1(y) < c_2(y), we have // θ * ψ(x) - θ * ψ(y) = (1/√π)[∫_{c_1(x)/(√2σ)}^{c_1(y)/(√2σ) e^{-t^2} dt // - ∫_{c_2(x)/(√2σ)}^{c_2(y)/(√2σ)] e^{-t^2} dt] // Thus // |θ * ψ(x) - θ * ψ(y)| ≤ (1/√π)/(√2σ)(|c_1(x)-c_1(y)|+|c_2(x)-c_2(y)|) // ≤ 2(1/√π)/(√2σ)|x-y| // ≤ √2/(√πσ)|x-y|. // // For the product we also need the value θ * ψ(0), which is // (1/2)[erf(min{a,b}/(√2σ))-erf(max{-b,-a}/(√2σ)] // = (1/2)[erf(min{a,b}/(√2σ))-erf(-min{a,b}/(√2σ))] // = erf(min{a,b}/(√2σ)) // // If c_1(x) ≥ c_2(x), then x ∉ [-(a+b), a+b]. If also y is outside that range, // θ * ψ(x) = θ * ψ(y). If only y is in the range [-(a+b), a+b], we can replace // x by -(a+b) or (a+b), either of which is closer to y and still θ * ψ(x)=0. // Thus same calculations as above work for the Lipschitz factor. let Convolution(ref ind, SupportProductFirst(ref cut, ref gaussian)) = self; let a = cut.r.value(); let b = ind.r.value(); let σ = gaussian.variance.value().sqrt(); let π = F::PI; let t = F::SQRT_2 * σ; let l1d = F::SQRT_2 / (π.sqrt() * σ); let e0 = F::cast_from(erf((a.min(b) / t).as_())); Some(l1d * e0.powi(N as i32-1)) } } /* impl<'a, F : Float, R, C, S, const N : usize> Lipschitz<L2> for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> where R : Constant<Type=F>, C : Constant<Type=F>, S : Constant<Type=F> { type FloatType = F; #[inline] fn lipschitz_factor(&self, L2 : L2) -> Option<Self::FloatType> { self.lipschitz_factor(L1).map(|l1| l1 * <S::Type>::cast_from(N).sqrt()) } } */ impl<F : Float, R, C, S, const N : usize> Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> where R : Constant<Type=F>, C : Constant<Type=F>, S : Constant<Type=F> { #[inline] fn get_r(&self) -> F { let Convolution(ref ind, SupportProductFirst(ref cut, ..)) = self; ind.r.value() + cut.r.value() } } impl<F : Float, R, C, S, const N : usize> Support<F, N> for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> where R : Constant<Type=F>, C : Constant<Type=F>, S : Constant<Type=F> { #[inline] fn support_hint(&self) -> Cube<F, N> { let r = self.get_r(); array_init(|| [-r, r]).into() } #[inline] fn in_support(&self, y : &Loc<F, N>) -> bool { let r = self.get_r(); y.iter().all(|x| x.abs() <= r) } #[inline] fn bisection_hint(&self, cube : &Cube<F, N>) -> [Option<F>; N] { let r = self.get_r(); // From c1 = -a.min(b + x) and c2 = a.min(b - x) with c_1 < c_2, // solve bounds for x. that is 0 ≤ a.min(b + x) + a.min(b - x). // If b + x ≤ a and b - x ≤ a, the sum is 2b ≥ 0. // If b + x ≥ a and b - x ≥ a, the sum is 2a ≥ 0. // If b + x ≤ a and b - x ≥ a, the sum is b + x + a ⟹ need x ≥ -a - b = -r. // If b + x ≥ a and b - x ≤ a, the sum is a + b - x ⟹ need x ≤ a + b = r. cube.map(|c, d| symmetric_peak_hint(r, c, d)) } } impl<F : Float, R, C, S, const N : usize> GlobalAnalysis<F, Bounds<F>> for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> where R : Constant<Type=F>, C : Constant<Type=F>, S : Constant<Type=F> { #[inline] fn global_analysis(&self) -> Bounds<F> { Bounds(F::ZERO, self.apply(Loc::ORIGIN)) } } impl<F : Float, R, C, S, const N : usize> LocalAnalysis<F, Bounds<F>, N> for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> where R : Constant<Type=F>, C : Constant<Type=F>, S : Constant<Type=F> { #[inline] fn local_analysis(&self, cube : &Cube<F, N>) -> Bounds<F> { // The function is maximised/minimised where the absolute value is minimised/maximised. let lower = self.apply(cube.maxnorm_point()); let upper = self.apply(cube.minnorm_point()); Bounds(lower, upper) } }