src/kernels/gaussian.rs

//! Implementation of the gaussian kernel.

use float_extras::f64::erf;
use numeric_literals::replace_float_literals;
use serde::Serialize;
use alg_tools::types::*;
use alg_tools::euclidean::Euclidean;
use alg_tools::norms::*;
use alg_tools::loc::Loc;
use alg_tools::sets::Cube;
use alg_tools::bisection_tree::{
    Support,
    Constant,
    Bounds,
    LocalAnalysis,
    GlobalAnalysis,
    Weighted,
    Bounded,
};
use alg_tools::mapping::{Apply, Differentiable};
use alg_tools::maputil::array_init;

use crate::types::Lipschitz;
use crate::fourier::Fourier;
use super::base::*;
use super::ball_indicator::CubeIndicator;

/// Storage presentation of the the anisotropic gaussian kernel of `variance` $σ^2$.
///
/// This is the function $f(x) = C e^{-\\|x\\|\_2^2/(2σ^2)}$ for $x ∈ ℝ^N$
/// with $C=1/(2πσ^2)^{N/2}$.
#[derive(Copy,Clone,Debug,Serialize,Eq)]
pub struct Gaussian<S : Constant, const N : usize> {
    /// The variance $σ^2$.
    pub variance : S,
}

impl<S1, S2, const N : usize> PartialEq<Gaussian<S2, N>> for Gaussian<S1, N>
where S1 : Constant,
      S2 : Constant<Type=S1::Type> {
    fn eq(&self, other : &Gaussian<S2, N>) -> bool {
        self.variance.value() == other.variance.value()
    }
}

impl<S1, S2, const N : usize> PartialOrd<Gaussian<S2, N>> for Gaussian<S1, N>
where S1 : Constant,
      S2 : Constant<Type=S1::Type> {

    fn partial_cmp(&self, other : &Gaussian<S2, N>) -> Option<std::cmp::Ordering> {
        // A gaussian is ≤ another gaussian if the Fourier transforms satisfy the
        // corresponding inequality. That in turns holds if and only if the variances
        // satisfy the opposite inequality.
        let σ1sq = self.variance.value();
        let σ2sq = other.variance.value();
        σ2sq.partial_cmp(&σ1sq)
    }
}


#[replace_float_literals(S::Type::cast_from(literal))]
impl<'a, S, const N : usize> Apply<&'a Loc<S::Type, N>> for Gaussian<S, N>
where S : Constant {
    type Output = S::Type;
    // This is not normalised to neither to have value 1 at zero or integral 1
    // (unless the cut-off ε=0).
    #[inline]
    fn apply(&self, x : &'a Loc<S::Type, N>) -> Self::Output {
        let d_squared = x.norm2_squared();
        let σ2 = self.variance.value();
        let scale = self.scale();
        (-d_squared / (2.0 * σ2)).exp() / scale
    }
}

impl<S, const N : usize> Apply<Loc<S::Type, N>> for Gaussian<S, N>
where S : Constant {
    type Output = S::Type;
    #[inline]
    fn apply(&self, x : Loc<S::Type, N>) -> Self::Output {
        self.apply(&x)
    }
}

#[replace_float_literals(S::Type::cast_from(literal))]
impl<'a, S, const N : usize> Differentiable<&'a Loc<S::Type, N>> for Gaussian<S, N>
where S : Constant {
    type Output = Loc<S::Type, N>;
    #[inline]
    fn differential(&self, x : &'a Loc<S::Type, N>) -> Self::Output {
        x * (self.apply(x) / self.variance.value())
    }
}

impl<S, const N : usize> Differentiable<Loc<S::Type, N>> for Gaussian<S, N>
where S : Constant {
    type Output = Loc<S::Type, N>;
    // This is not normalised to neither to have value 1 at zero or integral 1
    // (unless the cut-off ε=0).
    #[inline]
    fn differential(&self, x : Loc<S::Type, N>) -> Self::Output {
        x * (self.apply(&x) / self.variance.value())
    }
}

#[replace_float_literals(S::Type::cast_from(literal))]
impl<S, const N : usize> Lipschitz<L2> for Gaussian<S, N>
where S : Constant {
    type FloatType = S::Type;
    fn lipschitz_factor(&self, L2 : L2) -> Option<Self::FloatType> {
        // f(x)=f_1(‖x‖_2/σ) * √(2π) / √(2πσ)^N, where f_1 is one-dimensional Gaussian with
        // variance 1. The Lipschitz factor of f_1 is e^{-1/2}/√(2π), see, e.g.,
        // https://math.stackexchange.com/questions/3630967/is-the-gaussian-density-lipschitz-continuous
        // Thus the Lipschitz factor we want is e^{-1/2} / (√(2πσ)^N * σ).
        Some((-0.5).exp() / (self.scale() * self.variance.value().sqrt()))
    }
}

#[replace_float_literals(S::Type::cast_from(literal))]
impl<'a, S, const N : usize> Gaussian<S, N>
where S : Constant {

    /// Returns the (reciprocal) scaling constant $1/C=(2πσ^2)^{N/2}$.
    #[inline]
    pub fn scale(&self) -> S::Type {
        let π = S::Type::PI;
        let σ2 = self.variance.value();
        (2.0*π*σ2).powi(N as i32).sqrt()
    }
}

impl<'a, S, const N : usize> Support<S::Type, N> for Gaussian<S, N>
where S : Constant {
    #[inline]
    fn support_hint(&self) -> Cube<S::Type,N> {
        array_init(|| [S::Type::NEG_INFINITY, S::Type::INFINITY]).into()
    }

    #[inline]
    fn in_support(&self, _x : &Loc<S::Type,N>) -> bool {
        true
    }
}

#[replace_float_literals(S::Type::cast_from(literal))]
impl<S, const N : usize> GlobalAnalysis<S::Type, Bounds<S::Type>>  for Gaussian<S, N>
where S : Constant {
    #[inline]
    fn global_analysis(&self) -> Bounds<S::Type> {
        Bounds(0.0, 1.0/self.scale())
    }
}

impl<S, const N : usize> LocalAnalysis<S::Type, Bounds<S::Type>, N>  for Gaussian<S, N>
where S : Constant {
    #[inline]
    fn local_analysis(&self, cube : &Cube<S::Type, N>) -> Bounds<S::Type> {
        // The function is maximised/minimised where the 2-norm is minimised/maximised.
        let lower = self.apply(cube.maxnorm_point());
        let upper = self.apply(cube.minnorm_point());
        Bounds(lower, upper)
    }
}

#[replace_float_literals(C::Type::cast_from(literal))]
impl<'a, C : Constant, const N : usize> Norm<C::Type, L1>
for Gaussian<C, N> {
    #[inline]
    fn norm(&self, _ : L1) -> C::Type {
        1.0
    }
}

#[replace_float_literals(C::Type::cast_from(literal))]
impl<'a, C : Constant, const N : usize> Norm<C::Type, Linfinity>
for Gaussian<C, N> {
    #[inline]
    fn norm(&self, _ : Linfinity) -> C::Type {
        self.bounds().upper()
    }
}

#[replace_float_literals(C::Type::cast_from(literal))]
impl<'a, C : Constant, const N : usize> Fourier<C::Type>
for Gaussian<C, N> {
    type Domain = Loc<C::Type, N>;
    type Transformed = Weighted<Gaussian<C::Type, N>, C::Type>;

    #[inline]
    fn fourier(&self) -> Self::Transformed {
        let π = C::Type::PI;
        let σ2 = self.variance.value();
        let g = Gaussian { variance : 1.0 / (4.0*π*π*σ2) };
        g.weigh(g.scale())
    }
}

/// Representation of the “cut” gaussian $f χ\_{[-a, a]^n}$
/// where $a>0$ and $f$ is a gaussian kernel on $ℝ^n$.
pub type BasicCutGaussian<C, S, const N : usize> = SupportProductFirst<CubeIndicator<C, N>,
                                                                       Gaussian<S, N>>;


/// This implements $g := χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ where $a,b>0$ and $f$ is
/// a gaussian kernel on $ℝ^n$. For an expression for $g$, see Lemma 3.9 in the manuscript.
#[replace_float_literals(F::cast_from(literal))]
impl<'a, F : Float, R, C, S, const N : usize> Apply<&'a Loc<F, N>>
for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>>
where R : Constant<Type=F>,
      C : Constant<Type=F>,
      S : Constant<Type=F> {

    type Output = F;

    #[inline]
    fn apply(&self, y : &'a Loc<F, N>) -> F {
        let Convolution(ref ind,
                        SupportProductFirst(ref cut,
                                            ref gaussian)) = self;
        let a = cut.r.value();
        let b = ind.r.value();
        let σ = gaussian.variance.value().sqrt();
        let π = F::PI;
        let t = F::SQRT_2 * σ;
        let c = σ * (8.0/π).sqrt();
        
        // This is just a product of one-dimensional versions
        let unscaled = y.product_map(|x| {
            let c1 = -(a.min(b + x)); //(-a).max(-x-b);
            let c2 = a.min(b - x);
            if c1 >= c2 {
                0.0
            } else {
                let e1 = F::cast_from(erf((c1 / t).as_()));
                let e2 = F::cast_from(erf((c2 / t).as_()));
                debug_assert!(e2 >= e1);
                c * (e2 - e1)
            }
        });
        
        unscaled / gaussian.scale()
    }
}

impl<F : Float, R, C, S, const N : usize> Apply<Loc<F, N>>
for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>>
where R : Constant<Type=F>,
      C : Constant<Type=F>,
      S : Constant<Type=F> {

    type Output = F;

    #[inline]
    fn apply(&self, y : Loc<F, N>) -> F {
        self.apply(&y)
    }
}

/// This implements the differential of $g := χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ where $a,b>0$
/// and $f$ is a gaussian kernel on $ℝ^n$. For an expression for the value of $g$, from which the
/// derivative readily arises (at points of differentiability), see Lemma 3.9 in the manuscript.
#[replace_float_literals(F::cast_from(literal))]
impl<'a, F : Float, R, C, S, const N : usize> Differentiable<&'a Loc<F, N>>
for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>>
where R : Constant<Type=F>,
      C : Constant<Type=F>,
      S : Constant<Type=F> {

    type Output = Loc<F, N>;

    #[inline]
    fn differential(&self, y : &'a Loc<F, N>) -> Loc<F, N> {
        let Convolution(ref ind,
                        SupportProductFirst(ref cut,
                                            ref gaussian)) = self;
        let a = cut.r.value();
        let b = ind.r.value();
        let σ = gaussian.variance.value().sqrt();
        let π = F::PI;
        let t = F::SQRT_2 * σ;
        let c = σ * (8.0/π).sqrt();
        let cd = (8.0).sqrt(); // σ * (8.0/π).sqrt() / t * (√2/π)
        
        // Calculate the values for all component functions of the
        // product. This is just the loop from apply above.
        let unscaled_vs = y.map(|x| {
            let c1 = -(a.min(b + x)); //(-a).max(-x-b);
            let c2 = a.min(b - x);
            if c1 >= c2 {
                0.0
            } else {
                let e1 = F::cast_from(erf((c1 / t).as_()));
                let e2 = F::cast_from(erf((c2 / t).as_()));
                debug_assert!(e2 >= e1);
                c * (e2 - e1)
            }
        });
        // This computes the gradient for each coordinate
        product_differential(y, &unscaled_vs, |x| {
            let c1 = -(a.min(b + x)); //(-a).max(-x-b);
            let c2 = a.min(b - x);
            if c1 >= c2 {
                0.0
            } else {
                // erf'(z) = (2/√π)*exp(-z^2), and we get extra factor -1/√(2*σ) = -1/t
                // from the chain rule
                let de1 = (-(c1/t).powi(2)).exp();
                let de2 = (-(c2/t).powi(2)).exp();
                cd * (de1 - de2)
            }
        }) / gaussian.scale()
    }
}

impl<F : Float, R, C, S, const N : usize> Differentiable<Loc<F, N>>
for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>>
where R : Constant<Type=F>,
      C : Constant<Type=F>,
      S : Constant<Type=F> {

    type Output = Loc<F, N>;

    #[inline]
    fn differential(&self, y : Loc<F, N>) -> Loc<F, N> {
        self.differential(&y)
    }
}

#[replace_float_literals(F::cast_from(literal))]
impl<'a, F : Float, R, C, S, const N : usize> Lipschitz<L2>
for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>>
where R : Constant<Type=F>,
      C : Constant<Type=F>,
      S : Constant<Type=F> {
    type FloatType = F;

    fn lipschitz_factor(&self, L2 : L2) -> Option<F> {
        todo!("This requirement some error function work.")
    }
}

impl<F : Float, R, C, S, const N : usize>
Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>>
where R : Constant<Type=F>,
      C : Constant<Type=F>,
      S : Constant<Type=F> {

    #[inline]
    fn get_r(&self) -> F {
        let Convolution(ref ind,
                        SupportProductFirst(ref cut, ..)) = self;
        ind.r.value() + cut.r.value()
    }
}

impl<F : Float, R, C, S, const N : usize> Support<F, N>
for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>>
where R : Constant<Type=F>,
      C : Constant<Type=F>,
      S : Constant<Type=F> {
    #[inline]
    fn support_hint(&self) -> Cube<F, N> {
        let r = self.get_r();
        array_init(|| [-r, r]).into()
    }

    #[inline]
    fn in_support(&self, y : &Loc<F, N>) -> bool {
        let r = self.get_r();
        y.iter().all(|x| x.abs() <= r)
    }

    #[inline]
    fn bisection_hint(&self, cube : &Cube<F, N>) -> [Option<F>; N] {
        let r = self.get_r();
        // From c1 = -a.min(b + x) and c2 = a.min(b - x) with c_1 < c_2,
        // solve bounds for x. that is 0 ≤ a.min(b + x) + a.min(b - x).
        // If b + x ≤ a and b - x ≤ a, the sum is 2b ≥ 0.
        // If b + x ≥ a and b - x ≥ a, the sum is 2a ≥ 0.
        // If b + x ≤ a and b - x ≥ a, the sum is b + x + a ⟹ need x ≥ -a - b = -r.
        // If b + x ≥ a and b - x ≤ a, the sum is a + b - x ⟹ need x ≤ a + b = r.
        cube.map(|c, d| symmetric_peak_hint(r, c, d))
    }
}

impl<F : Float, R, C, S, const N : usize> GlobalAnalysis<F, Bounds<F>>
for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>>
where R : Constant<Type=F>,
      C : Constant<Type=F>,
      S : Constant<Type=F> {
    #[inline]
    fn global_analysis(&self) -> Bounds<F> {
        Bounds(F::ZERO, self.apply(Loc::ORIGIN))
    }
}

impl<F : Float, R, C, S, const N : usize> LocalAnalysis<F, Bounds<F>, N>
for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>>
where R : Constant<Type=F>,
      C : Constant<Type=F>,
      S : Constant<Type=F> {
    #[inline]
    fn local_analysis(&self, cube : &Cube<F, N>) -> Bounds<F> {
        // The function is maximised/minimised where the absolute value is minimised/maximised.
        let lower = self.apply(cube.maxnorm_point());
        let upper = self.apply(cube.minnorm_point());
        Bounds(lower, upper)
    }
}