diff -r 6105b5cd8d89 -r f0e8704d3f0e src/kernels/gaussian.rs --- a/src/kernels/gaussian.rs Tue Aug 01 10:25:09 2023 +0300 +++ b/src/kernels/gaussian.rs Mon Feb 17 13:54:53 2025 -0500 @@ -17,9 +17,15 @@ Weighted, Bounded, }; -use alg_tools::mapping::Apply; +use alg_tools::mapping::{ + Mapping, + Instance, + Differential, + DifferentiableImpl, +}; use alg_tools::maputil::array_init; +use crate::types::*; use crate::fourier::Fourier; use super::base::*; use super::ball_indicator::CubeIndicator; @@ -58,28 +64,104 @@ #[replace_float_literals(S::Type::cast_from(literal))] -impl<'a, S, const N : usize> Apply<&'a Loc> for Gaussian -where S : Constant { - type Output = S::Type; +impl<'a, S, const N : usize> Mapping> for Gaussian +where + S : Constant +{ + type Codomain = S::Type; + // This is not normalised to neither to have value 1 at zero or integral 1 // (unless the cut-off ε=0). #[inline] - fn apply(&self, x : &'a Loc) -> Self::Output { - let d_squared = x.norm2_squared(); + fn apply>>(&self, x : I) -> Self::Codomain { + let d_squared = x.eval(|x| x.norm2_squared()); let σ2 = self.variance.value(); let scale = self.scale(); (-d_squared / (2.0 * σ2)).exp() / scale } } -impl Apply> for Gaussian +#[replace_float_literals(S::Type::cast_from(literal))] +impl<'a, S, const N : usize> DifferentiableImpl> for Gaussian +where S : Constant { + type Derivative = Loc; + + #[inline] + fn differential_impl>>(&self, x0 : I) -> Self::Derivative { + let x = x0.cow(); + let f = -self.apply(&*x) / self.variance.value(); + *x * f + } +} + + +// To calculate the the Lipschitz factors, we consider +// f(t) = e^{-t²/2} +// f'(t) = -t f(t) which has max at t=1 by f''(t)=0 +// f''(t) = (t²-1)f(t) which has max at t=√3 by f'''(t)=0 +// f'''(t) = -(t³-3t) +// So f has the Lipschitz factor L=f'(1), and f' has the Lipschitz factor L'=f''(√3). +// +// Now g(x) = Cf(‖x‖/σ) for a scaling factor C is the Gaussian. +// Thus ‖g(x)-g(y)‖ = C‖f(‖x‖/σ)-f(‖y‖/σ)‖ ≤ (C/σ)L‖x-y‖, +// so g has the Lipschitz factor (C/σ)f'(1) = (C/σ)exp(-0.5). +// +// Also ∇g(x)= Cx/(σ‖x‖)f'(‖x‖/σ) (*) +// = -(C/σ²)xf(‖x‖/σ) +// = -C/σ (x/σ) f(‖x/σ‖) +// ∇²g(x) = -(C/σ)[Id/σ f(‖x‖/σ) + x ⊗ x/(σ²‖x‖) f'(‖x‖/σ)] +// = (C/σ²)[-Id + x ⊗ x/σ²]f(‖x‖/σ). +// Thus ‖∇²g(x)‖ = (C/σ²)‖-Id + x ⊗ x/σ²‖f(‖x‖/σ), where +// ‖-Id + x ⊗ x/σ²‖ = ‖[-Id + x ⊗ x/σ²](x/‖x‖)‖ = |-1 + ‖x²/σ^2‖|. +// This means that ‖∇²g(x)‖ = (C/σ²)|f''(‖x‖/σ)|, which is maximised with ‖x‖/σ=√3. +// Hence the Lipschitz factor of ∇g is (C/σ²)f''(√3) = (C/σ²)2e^{-3/2}. + +#[replace_float_literals(S::Type::cast_from(literal))] +impl Lipschitz for Gaussian where S : Constant { - type Output = S::Type; - // This is not normalised to neither to have value 1 at zero or integral 1 - // (unless the cut-off ε=0). - #[inline] - fn apply(&self, x : Loc) -> Self::Output { - self.apply(&x) + type FloatType = S::Type; + fn lipschitz_factor(&self, L2 : L2) -> Option { + Some((-0.5).exp() / (self.scale() * self.variance.value().sqrt())) + } +} + + +#[replace_float_literals(S::Type::cast_from(literal))] +impl<'a, S : Constant, const N : usize> Lipschitz +for Differential<'a, Loc, Gaussian> { + type FloatType = S::Type; + + fn lipschitz_factor(&self, _l2 : L2) -> Option { + let g = self.base_fn(); + let σ2 = g.variance.value(); + let scale = g.scale(); + Some(2.0*(-3.0/2.0).exp()/(σ2*scale)) + } +} + +// From above, norm bounds on the differnential can be calculated as achieved +// for f' at t=1, i.e., the bound is |f'(1)|. +// For g then |C/σ f'(1)|. +// It follows that the norm bounds on the differential are just the Lipschitz +// factors of the undifferentiated function, given how the latter is calculed above. + +#[replace_float_literals(S::Type::cast_from(literal))] +impl<'b, S : Constant, const N : usize> NormBounded +for Differential<'b, Loc, Gaussian> { + type FloatType = S::Type; + + fn norm_bound(&self, _l2 : L2) -> S::Type { + self.base_fn().lipschitz_factor(L2).unwrap() + } +} + +#[replace_float_literals(S::Type::cast_from(literal))] +impl<'b, 'a, S : Constant, const N : usize> NormBounded +for Differential<'b, Loc, &'a Gaussian> { + type FloatType = S::Type; + + fn norm_bound(&self, _l2 : L2) -> S::Type { + self.base_fn().lipschitz_factor(L2).unwrap() } } @@ -169,19 +251,19 @@ Gaussian>; -/// This implements $χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ -/// where $a,b>0$ and $f$ is a gaussian kernel on $ℝ^n$. +/// This implements $g := χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ where $a,b>0$ and $f$ is +/// a gaussian kernel on $ℝ^n$. For an expression for $g$, see Lemma 3.9 in the manuscript. #[replace_float_literals(F::cast_from(literal))] -impl<'a, F : Float, R, C, S, const N : usize> Apply<&'a Loc> +impl<'a, F : Float, R, C, S, const N : usize> Mapping> for Convolution, BasicCutGaussian> where R : Constant, C : Constant, S : Constant { - type Output = F; + type Codomain = F; #[inline] - fn apply(&self, y : &'a Loc) -> F { + fn apply>>(&self, y : I) -> F { let Convolution(ref ind, SupportProductFirst(ref cut, ref gaussian)) = self; @@ -192,7 +274,7 @@ let c = 0.5; // 1/(σ√(2π) * σ√(π/2) = 1/2 // This is just a product of one-dimensional versions - y.product_map(|x| { + y.cow().product_map(|x| { let c1 = -(a.min(b + x)); //(-a).max(-x-b); let c2 = a.min(b - x); if c1 >= c2 { @@ -207,20 +289,143 @@ } } -impl Apply> +/// This implements the differential of $g := χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ where $a,b>0$ +/// and $f$ is a gaussian kernel on $ℝ^n$. For an expression for the value of $g$, from which the +/// derivative readily arises (at points of differentiability), see Lemma 3.9 in the manuscript. +#[replace_float_literals(F::cast_from(literal))] +impl<'a, F : Float, R, C, S, const N : usize> DifferentiableImpl> for Convolution, BasicCutGaussian> where R : Constant, C : Constant, S : Constant { - type Output = F; + type Derivative = Loc; + /// Although implemented, this function is not differentiable. #[inline] - fn apply(&self, y : Loc) -> F { - self.apply(&y) + fn differential_impl>>(&self, y0 : I) -> Loc { + let Convolution(ref ind, + SupportProductFirst(ref cut, + ref gaussian)) = self; + let y = y0.cow(); + let a = cut.r.value(); + let b = ind.r.value(); + let σ = gaussian.variance.value().sqrt(); + let t = F::SQRT_2 * σ; + let c = 0.5; // 1/(σ√(2π) * σ√(π/2) = 1/2 + let c_mul_erf_scale_div_t = c * F::FRAC_2_SQRT_PI / t; + + // Calculate the values for all component functions of the + // product. This is just the loop from apply above. + let unscaled_vs = y.map(|x| { + let c1 = -(a.min(b + x)); //(-a).max(-x-b); + let c2 = a.min(b - x); + if c1 >= c2 { + 0.0 + } else { + let e1 = F::cast_from(erf((c1 / t).as_())); + let e2 = F::cast_from(erf((c2 / t).as_())); + debug_assert!(e2 >= e1); + c * (e2 - e1) + } + }); + // This computes the gradient for each coordinate + product_differential(&*y, &unscaled_vs, |x| { + let c1 = -(a.min(b + x)); //(-a).max(-x-b); + let c2 = a.min(b - x); + if c1 >= c2 { + 0.0 + } else { + // erf'(z) = (2/√π)*exp(-z^2), and we get extra factor 1/(√2*σ) = -1/t + // from the chain rule (the minus comes from inside c_1 or c_2, and changes the + // order of de2 and de1 in the final calculation). + let de1 = if b + x < a { + (-((b+x)/t).powi(2)).exp() + } else { + 0.0 + }; + let de2 = if b - x < a { + (-((b-x)/t).powi(2)).exp() + } else { + 0.0 + }; + c_mul_erf_scale_div_t * (de1 - de2) + } + }) } } + +#[replace_float_literals(F::cast_from(literal))] +impl<'a, F : Float, R, C, S, const N : usize> Lipschitz +for Convolution, BasicCutGaussian> +where R : Constant, + C : Constant, + S : Constant { + type FloatType = F; + + fn lipschitz_factor(&self, L1 : L1) -> Option { + // To get the product Lipschitz factor, we note that for any ψ_i, we have + // ∏_{i=1}^N φ_i(x_i) - ∏_{i=1}^N φ_i(y_i) + // = [φ_1(x_1)-φ_1(y_1)] ∏_{i=2}^N φ_i(x_i) + // + φ_1(y_1)[ ∏_{i=2}^N φ_i(x_i) - ∏_{i=2}^N φ_i(y_i)] + // = ∑_{j=1}^N [φ_j(x_j)-φ_j(y_j)]∏_{i > j} φ_i(x_i) ∏_{i < j} φ_i(y_i) + // Thus + // |∏_{i=1}^N φ_i(x_i) - ∏_{i=1}^N φ_i(y_i)| + // ≤ ∑_{j=1}^N |φ_j(x_j)-φ_j(y_j)| ∏_{j ≠ i} \max_i |φ_i| + // + // Thus we need 1D Lipschitz factors, and the maximum for φ = θ * ψ. + // + // We have + // θ * ψ(x) = 0 if c_1(x) ≥ c_2(x) + // = (1/2)[erf(c_2(x)/(√2σ)) - erf(c_1(x)/(√2σ))] if c_1(x) < c_2(x), + // where c_1(x) = max{-x-b,-a} = -min{b+x,a} and c_2(x)=min{b-x,a}, C is the Gaussian + // normalisation factor, and erf(s) = (2/√π) ∫_0^s e^{-t^2} dt. + // Thus, if c_1(x) < c_2(x) and c_1(y) < c_2(y), we have + // θ * ψ(x) - θ * ψ(y) = (1/√π)[∫_{c_1(x)/(√2σ)}^{c_1(y)/(√2σ) e^{-t^2} dt + // - ∫_{c_2(x)/(√2σ)}^{c_2(y)/(√2σ)] e^{-t^2} dt] + // Thus + // |θ * ψ(x) - θ * ψ(y)| ≤ (1/√π)/(√2σ)(|c_1(x)-c_1(y)|+|c_2(x)-c_2(y)|) + // ≤ 2(1/√π)/(√2σ)|x-y| + // ≤ √2/(√πσ)|x-y|. + // + // For the product we also need the value θ * ψ(0), which is + // (1/2)[erf(min{a,b}/(√2σ))-erf(max{-b,-a}/(√2σ)] + // = (1/2)[erf(min{a,b}/(√2σ))-erf(-min{a,b}/(√2σ))] + // = erf(min{a,b}/(√2σ)) + // + // If c_1(x) ≥ c_2(x), then x ∉ [-(a+b), a+b]. If also y is outside that range, + // θ * ψ(x) = θ * ψ(y). If only y is in the range [-(a+b), a+b], we can replace + // x by -(a+b) or (a+b), either of which is closer to y and still θ * ψ(x)=0. + // Thus same calculations as above work for the Lipschitz factor. + let Convolution(ref ind, + SupportProductFirst(ref cut, + ref gaussian)) = self; + let a = cut.r.value(); + let b = ind.r.value(); + let σ = gaussian.variance.value().sqrt(); + let π = F::PI; + let t = F::SQRT_2 * σ; + let l1d = F::SQRT_2 / (π.sqrt() * σ); + let e0 = F::cast_from(erf((a.min(b) / t).as_())); + Some(l1d * e0.powi(N as i32-1)) + } +} + +/* +impl<'a, F : Float, R, C, S, const N : usize> Lipschitz +for Convolution, BasicCutGaussian> +where R : Constant, + C : Constant, + S : Constant { + type FloatType = F; + #[inline] + fn lipschitz_factor(&self, L2 : L2) -> Option { + self.lipschitz_factor(L1).map(|l1| l1 * ::cast_from(N).sqrt()) + } +} +*/ + impl Convolution, BasicCutGaussian> where R : Constant,