src/kernels/gaussian.rs

branch
dev
changeset 35
b087e3eab191
parent 34
efa60bc4f743
child 38
0f59c0d02e13
--- a/src/kernels/gaussian.rs	Thu Aug 29 00:00:00 2024 -0500
+++ b/src/kernels/gaussian.rs	Tue Dec 31 09:25:45 2024 -0500
@@ -17,10 +17,15 @@
     Weighted,
     Bounded,
 };
-use alg_tools::mapping::{Apply, Differentiable};
+use alg_tools::mapping::{
+    Mapping,
+    Instance,
+    Differential,
+    DifferentiableImpl,
+};
 use alg_tools::maputil::array_init;
 
-use crate::types::Lipschitz;
+use crate::types::*;
 use crate::fourier::Fourier;
 use super::base::*;
 use super::ball_indicator::CubeIndicator;
@@ -59,63 +64,108 @@
 
 
 #[replace_float_literals(S::Type::cast_from(literal))]
-impl<'a, S, const N : usize> Apply<&'a Loc<S::Type, N>> for Gaussian<S, N>
-where S : Constant {
-    type Output = S::Type;
+impl<'a, S, const N : usize> Mapping<Loc<S::Type, N>> for Gaussian<S, N>
+where
+    S : Constant
+{
+    type Codomain = S::Type;
+
     // This is not normalised to neither to have value 1 at zero or integral 1
     // (unless the cut-off ε=0).
     #[inline]
-    fn apply(&self, x : &'a Loc<S::Type, N>) -> Self::Output {
-        let d_squared = x.norm2_squared();
+    fn apply<I : Instance<Loc<S::Type, N>>>(&self, x : I) -> Self::Codomain {
+        let d_squared = x.eval(|x| x.norm2_squared());
         let σ2 = self.variance.value();
         let scale = self.scale();
         (-d_squared / (2.0 * σ2)).exp() / scale
     }
 }
 
-impl<S, const N : usize> Apply<Loc<S::Type, N>> for Gaussian<S, N>
+#[replace_float_literals(S::Type::cast_from(literal))]
+impl<'a, S, const N : usize> DifferentiableImpl<Loc<S::Type, N>> for Gaussian<S, N>
 where S : Constant {
-    type Output = S::Type;
+    type Derivative = Loc<S::Type, N>;
+
     #[inline]
-    fn apply(&self, x : Loc<S::Type, N>) -> Self::Output {
-        self.apply(&x)
+    fn differential_impl<I : Instance<Loc<S::Type, N>>>(&self, x0 : I) -> Self::Derivative {
+        let x = x0.cow();
+        let f = -self.apply(&*x) / self.variance.value();
+        *x * f
     }
 }
 
-#[replace_float_literals(S::Type::cast_from(literal))]
-impl<'a, S, const N : usize> Differentiable<&'a Loc<S::Type, N>> for Gaussian<S, N>
-where S : Constant {
-    type Output = Loc<S::Type, N>;
-    #[inline]
-    fn differential(&self, x : &'a Loc<S::Type, N>) -> Self::Output {
-        x * (self.apply(x) / self.variance.value())
-    }
-}
 
-impl<S, const N : usize> Differentiable<Loc<S::Type, N>> for Gaussian<S, N>
-where S : Constant {
-    type Output = Loc<S::Type, N>;
-    // This is not normalised to neither to have value 1 at zero or integral 1
-    // (unless the cut-off ε=0).
-    #[inline]
-    fn differential(&self, x : Loc<S::Type, N>) -> Self::Output {
-        x * (self.apply(&x) / self.variance.value())
-    }
-}
+// To calculate the the Lipschitz factors, we consider
+// f(t)    = e^{-t²/2}
+// f'(t)   = -t f(t)       which has max at t=1 by f''(t)=0
+// f''(t)  = (t²-1)f(t)    which has max at t=√3 by f'''(t)=0
+// f'''(t) = -(t³-3t)
+// So f has the Lipschitz factor L=f'(1), and f' has the Lipschitz factor L'=f''(√3).
+//
+// Now g(x) = Cf(‖x‖/σ) for a scaling factor C is the Gaussian.
+// Thus ‖g(x)-g(y)‖ = C‖f(‖x‖/σ)-f(‖y‖/σ)‖ ≤ (C/σ)L‖x-y‖,
+// so g has the Lipschitz factor (C/σ)f'(1) = (C/σ)exp(-0.5).
+//
+// Also ∇g(x)= Cx/(σ‖x‖)f'(‖x‖/σ)       (*)
+//            = -(C/σ²)xf(‖x‖/σ)
+//            = -C/σ (x/σ) f(‖x/σ‖)
+// ∇²g(x) = -(C/σ)[Id/σ f(‖x‖/σ) + x ⊗ x/(σ²‖x‖) f'(‖x‖/σ)]
+//        = (C/σ²)[-Id + x ⊗ x/σ²]f(‖x‖/σ).
+// Thus ‖∇²g(x)‖ = (C/σ²)‖-Id + x ⊗ x/σ²‖f(‖x‖/σ), where
+// ‖-Id + x ⊗ x/σ²‖ = ‖[-Id + x ⊗ x/σ²](x/‖x‖)‖ = |-1 + ‖x²/σ^2‖|.
+// This means that  ‖∇²g(x)‖ = (C/σ²)|f''(‖x‖/σ)|, which is maximised with ‖x‖/σ=√3.
+// Hence the Lipschitz factor of ∇g is (C/σ²)f''(√3) = (C/σ²)2e^{-3/2}.
 
 #[replace_float_literals(S::Type::cast_from(literal))]
 impl<S, const N : usize> Lipschitz<L2> for Gaussian<S, N>
 where S : Constant {
     type FloatType = S::Type;
     fn lipschitz_factor(&self, L2 : L2) -> Option<Self::FloatType> {
-        // f(x)=f_1(‖x‖_2/σ) * √(2π) / √(2πσ)^N, where f_1 is one-dimensional Gaussian with
-        // variance 1. The Lipschitz factor of f_1 is e^{-1/2}/√(2π), see, e.g.,
-        // https://math.stackexchange.com/questions/3630967/is-the-gaussian-density-lipschitz-continuous
-        // Thus the Lipschitz factor we want is e^{-1/2} / (√(2πσ)^N * σ).
         Some((-0.5).exp() / (self.scale() * self.variance.value().sqrt()))
     }
 }
 
+
+#[replace_float_literals(S::Type::cast_from(literal))]
+impl<'a, S : Constant, const N : usize> Lipschitz<L2>
+for Differential<'a, Loc<S::Type, N>, Gaussian<S, N>> {
+    type FloatType = S::Type;
+    
+    fn lipschitz_factor(&self, _l2 : L2) -> Option<S::Type> {
+        let g = self.base_fn();
+        let σ2 = g.variance.value();
+        let scale = g.scale();
+        Some(2.0*(-3.0/2.0).exp()/(σ2*scale))
+    }
+}
+
+// From above, norm bounds on the differnential can be calculated as achieved
+// for f' at t=1, i.e., the bound is |f'(1)|.
+// For g then |C/σ f'(1)|.
+// It follows that the norm bounds on the differential are just the Lipschitz
+// factors of the undifferentiated function, given how the latter is calculed above.
+
+#[replace_float_literals(S::Type::cast_from(literal))]
+impl<'b, S : Constant, const N : usize> NormBounded<L2>
+for Differential<'b, Loc<S::Type, N>, Gaussian<S, N>> {
+    type FloatType = S::Type;
+    
+    fn norm_bound(&self, _l2 : L2) -> S::Type {
+        self.base_fn().lipschitz_factor(L2).unwrap()
+    }
+}
+
+#[replace_float_literals(S::Type::cast_from(literal))]
+impl<'b, 'a, S : Constant, const N : usize> NormBounded<L2>
+for Differential<'b, Loc<S::Type, N>, &'a Gaussian<S, N>> {
+    type FloatType = S::Type;
+    
+    fn norm_bound(&self, _l2 : L2) -> S::Type {
+        self.base_fn().lipschitz_factor(L2).unwrap()
+    }
+}
+
+
 #[replace_float_literals(S::Type::cast_from(literal))]
 impl<'a, S, const N : usize> Gaussian<S, N>
 where S : Constant {
@@ -204,16 +254,16 @@
 /// This implements $g := χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ where $a,b>0$ and $f$ is
 /// a gaussian kernel on $ℝ^n$. For an expression for $g$, see Lemma 3.9 in the manuscript.
 #[replace_float_literals(F::cast_from(literal))]
-impl<'a, F : Float, R, C, S, const N : usize> Apply<&'a Loc<F, N>>
+impl<'a, F : Float, R, C, S, const N : usize> Mapping<Loc<F, N>>
 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>>
 where R : Constant<Type=F>,
       C : Constant<Type=F>,
       S : Constant<Type=F> {
 
-    type Output = F;
+    type Codomain = F;
 
     #[inline]
-    fn apply(&self, y : &'a Loc<F, N>) -> F {
+    fn apply<I : Instance<Loc<F, N>>>(&self, y : I) -> F {
         let Convolution(ref ind,
                         SupportProductFirst(ref cut,
                                             ref gaussian)) = self;
@@ -224,7 +274,7 @@
         let c = 0.5; // 1/(σ√(2π) * σ√(π/2) = 1/2
         
         // This is just a product of one-dimensional versions
-        y.product_map(|x| {
+        y.cow().product_map(|x| {
             let c1 = -(a.min(b + x)); //(-a).max(-x-b);
             let c2 = a.min(b - x);
             if c1 >= c2 {
@@ -239,43 +289,31 @@
     }
 }
 
-impl<F : Float, R, C, S, const N : usize> Apply<Loc<F, N>>
+/// This implements the differential of $g := χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ where $a,b>0$
+/// and $f$ is a gaussian kernel on $ℝ^n$. For an expression for the value of $g$, from which the
+/// derivative readily arises (at points of differentiability), see Lemma 3.9 in the manuscript.
+#[replace_float_literals(F::cast_from(literal))]
+impl<'a, F : Float, R, C, S, const N : usize> DifferentiableImpl<Loc<F, N>>
 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>>
 where R : Constant<Type=F>,
       C : Constant<Type=F>,
       S : Constant<Type=F> {
 
-    type Output = F;
-
-    #[inline]
-    fn apply(&self, y : Loc<F, N>) -> F {
-        self.apply(&y)
-    }
-}
+    type Derivative = Loc<F, N>;
 
-/// This implements the differential of $g := χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ where $a,b>0$
-/// and $f$ is a gaussian kernel on $ℝ^n$. For an expression for the value of $g$, from which the
-/// derivative readily arises (at points of differentiability), see Lemma 3.9 in the manuscript.
-#[replace_float_literals(F::cast_from(literal))]
-impl<'a, F : Float, R, C, S, const N : usize> Differentiable<&'a Loc<F, N>>
-for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>>
-where R : Constant<Type=F>,
-      C : Constant<Type=F>,
-      S : Constant<Type=F> {
-
-    type Output = Loc<F, N>;
-
+    /// Although implemented, this function is not differentiable.
     #[inline]
-    fn differential(&self, y : &'a Loc<F, N>) -> Loc<F, N> {
+    fn differential_impl<I : Instance<Loc<F, N>>>(&self, y0 : I) -> Loc<F, N> {
         let Convolution(ref ind,
                         SupportProductFirst(ref cut,
                                             ref gaussian)) = self;
+        let y = y0.cow();
         let a = cut.r.value();
         let b = ind.r.value();
         let σ = gaussian.variance.value().sqrt();
         let t = F::SQRT_2 * σ;
         let c = 0.5; // 1/(σ√(2π) * σ√(π/2) = 1/2
-        let c_div_t = c / t;
+        let c_mul_erf_scale_div_t = c * F::FRAC_2_SQRT_PI / t;
         
         // Calculate the values for all component functions of the
         // product. This is just the loop from apply above.
@@ -292,35 +330,31 @@
             }
         });
         // This computes the gradient for each coordinate
-        product_differential(y, &unscaled_vs, |x| {
+        product_differential(&*y, &unscaled_vs, |x| {
             let c1 = -(a.min(b + x)); //(-a).max(-x-b);
             let c2 = a.min(b - x);
             if c1 >= c2 {
                 0.0
             } else {
-                // erf'(z) = (2/√π)*exp(-z^2), and we get extra factor -1/(√2*σ) = -1/t
-                // from the chain rule (the minus comes from inside c_1 or c_2).
-                let de1 = (-(c1/t).powi(2)).exp();
-                let de2 = (-(c2/t).powi(2)).exp();
-                c_div_t * (de1 - de2)
+                // erf'(z) = (2/√π)*exp(-z^2), and we get extra factor 1/(√2*σ) = -1/t
+                // from the chain rule (the minus comes from inside c_1 or c_2, and changes the
+                // order of de2 and de1 in the final calculation).
+                let de1 = if b + x < a {
+                    (-((b+x)/t).powi(2)).exp()
+                } else {
+                    0.0
+                };
+                let de2 = if b - x < a {
+                    (-((b-x)/t).powi(2)).exp()
+                } else {
+                    0.0
+                };
+                c_mul_erf_scale_div_t * (de1 - de2)
             }
         })
     }
 }
 
-impl<F : Float, R, C, S, const N : usize> Differentiable<Loc<F, N>>
-for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>>
-where R : Constant<Type=F>,
-      C : Constant<Type=F>,
-      S : Constant<Type=F> {
-
-    type Output = Loc<F, N>;
-
-    #[inline]
-    fn differential(&self, y : Loc<F, N>) -> Loc<F, N> {
-        self.differential(&y)
-    }
-}
 
 #[replace_float_literals(F::cast_from(literal))]
 impl<'a, F : Float, R, C, S, const N : usize> Lipschitz<L1>
@@ -378,6 +412,7 @@
     }
 }
 
+/*
 impl<'a, F : Float, R, C, S, const N : usize> Lipschitz<L2>
 for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>>
 where R : Constant<Type=F>,
@@ -389,6 +424,7 @@
         self.lipschitz_factor(L1).map(|l1| l1 * <S::Type>::cast_from(N).sqrt())
     }
 }
+*/
 
 impl<F : Float, R, C, S, const N : usize>
 Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>>

mercurial