curvature controls dev

Fri, 31 Jan 2025 18:00:17 -0500

author
Tuomo Valkonen <tuomov@iki.fi>
date
Fri, 31 Jan 2025 18:00:17 -0500
branch
dev
changeset 44
03251c546744
parent 43
aacd9af21b3a
child 45
5200e7090e06

curvature controls

src/forward_model.rs file | annotate | diff | comparison | revisions
src/forward_model/bias.rs file | annotate | diff | comparison | revisions
src/forward_model/sensor_grid.rs file | annotate | diff | comparison | revisions
src/main.rs file | annotate | diff | comparison | revisions
src/sliding_fb.rs file | annotate | diff | comparison | revisions
src/sliding_pdps.rs file | annotate | diff | comparison | revisions
src/transport.rs file | annotate | diff | comparison | revisions
--- a/src/forward_model.rs	Thu Jan 23 23:39:40 2025 +0100
+++ b/src/forward_model.rs	Fri Jan 31 18:00:17 2025 -0500
@@ -54,6 +54,7 @@
         -> Option<(Self::FloatType, Self::FloatType)>;
 }
 
+/*
 /// Trait for [`ForwardModel`]s whose preadjoint has Lipschitz values.
 pub trait LipschitzValues {
     type FloatType : Float;
@@ -69,4 +70,16 @@
         None
     }
 }
+*/
 
+/// Trait for [`ForwardModel`]s that satisfy bounds on the curvature $𝒦_F$.
+pub trait BoundedCurvature {
+    type FloatType : Float;
+
+    /// Returns components for a bound $ℓ_F$ on the curvature
+    /// $$
+    ///     𝒦_F(μ, γ) = ∫ B_{F'(μ)} dγ +  B_F(μ, μ+Δ).
+    /// $$
+    /// such that $𝒦_F(μ, γ)  ≤ ℓ_F ∫  c_2 d|γ|$.
+    fn curvature_bound_components(&self) -> (Option<Self::FloatType>, Option<Self::FloatType>);
+}
--- a/src/forward_model/bias.rs	Thu Jan 23 23:39:40 2025 +0100
+++ b/src/forward_model/bias.rs	Fri Jan 31 18:00:17 2025 -0500
@@ -6,13 +6,11 @@
 use alg_tools::types::{Float, ClosedAdd};
 use alg_tools::mapping::Space;
 use alg_tools::direct_product::Pair;
-use alg_tools::linops::{Linear, RowOp, ColOp, IdOp, ZeroOp, AXPY};
+use alg_tools::linops::{Linear, RowOp,  IdOp, ZeroOp, AXPY};
 use alg_tools::error::DynError;
 use alg_tools::norms::{L2, Norm, PairNorm, NormExponent};
-use crate::types::L2Squared;
 use crate::measures::RNDM;
-use super::{ForwardModel, AdjointProductBoundedBy, AdjointProductPairBoundedBy, LipschitzValues};
-use crate::transport::TransportLipschitz;
+use super::{ForwardModel, AdjointProductBoundedBy, AdjointProductPairBoundedBy, BoundedCurvature};
 
 impl<Domain, F, A, E> ForwardModel<Pair<Domain, A::Observable>, F, PairNorm<E, L2, L2>>
 for RowOp<A, IdOp<A::Observable>>
@@ -56,6 +54,7 @@
     }
 }
 
+/*
 /// This `impl` is bit of an abuse as the codomain of `Apre` is a [`Pair`] of a measure predual,
 /// to which this `impl` applies, and another space.
 impl<F, Apre, Z> LipschitzValues
@@ -78,19 +77,22 @@
         self.0.value_diff_unit_lipschitz_factor()
     }
 }
-
-
+*/
 
-impl<'a, F : Float, Y : Space, XD, const N : usize> TransportLipschitz<L2Squared> for
-ZeroOp<'a, RNDM<F, N>, XD, Y, F> {
+impl<F, A, Z> BoundedCurvature
+for RowOp<A, IdOp<Z>>
+where
+    F : Float,
+    Z : Clone + Space + ClosedAdd,
+    A : BoundedCurvature<FloatType = F>,
+{
     type FloatType = F;
 
-    fn transport_lipschitz_factor(&self, _ : L2Squared) -> Self::FloatType {
-        F::ZERO
+    fn curvature_bound_components(&self) -> (Option<Self::FloatType>, Option<Self::FloatType>) {
+        self.0.curvature_bound_components()
     }
 }
 
-
 #[replace_float_literals(F::cast_from(literal))]
 impl<'a, F, D, XD, Y, const N : usize> AdjointProductBoundedBy<RNDM<F, N>, D>
 for ZeroOp<'a, RNDM<F, N>, XD, Y, F>
--- a/src/forward_model/sensor_grid.rs	Thu Jan 23 23:39:40 2025 +0100
+++ b/src/forward_model/sensor_grid.rs	Fri Jan 31 18:00:17 2025 -0500
@@ -38,12 +38,10 @@
     AutoConvolution,
     BoundedBy,
 };
-use crate::types::L2Squared;
-use crate::transport::TransportLipschitz;
 use crate::preadjoint_helper::PreadjointHelper;
 use super::{
     ForwardModel,
-    LipschitzValues,
+    BoundedCurvature,
     AdjointProductBoundedBy
 };
 use crate::frank_wolfe::FindimQuadraticModel;
@@ -309,6 +307,7 @@
     }
 }
 
+/*
 #[replace_float_literals(F::cast_from(literal))]
 impl<'a, F, S, P, BT, const N : usize> LipschitzValues
 for SensorGridPreadjoint<'a, SensorGrid<F, S, P, BT, N>, F, N>
@@ -340,6 +339,50 @@
         fw.base_sensor.diff_ref().lipschitz_factor(L2).map(|l| (2.0 * n).sqrt() * l)
     }
 }
+*/
+
+#[replace_float_literals(F::cast_from(literal))]
+impl<'a, F, S, P, BT, const N : usize> BoundedCurvature
+for SensorGrid<F, S, P, BT, N>
+where F : Float,
+      BT : SensorGridBT<F, S, P, N>,
+      S : Sensor<F, N>,
+      P : Spread<F, N>,
+      Convolution<S, P> : Spread<F, N> + Lipschitz<L2, FloatType=F> + DifferentiableMapping<Loc<F,N>> + LocalAnalysis<F, BT::Agg, N>,
+      for<'b> <Convolution<S, P> as DifferentiableMapping<Loc<F,N>>>::Differential<'b> : Lipschitz<L2, FloatType=F>,
+{
+
+    type FloatType = F;
+
+    /// Returns a bound $ℓ_F$ on the curvature
+    /// $$
+    ///     𝒦_F(μ, γ) = ∫ B_{F'(μ)} dγ +  B_F(μ, μ+Δ).
+    /// $$
+    /// such that $𝒦_F(μ, γ)  ≤ ℓ_F ∫  c_2 d|γ|$.
+    ///
+    /// For $F(μ)=(1/2)‖Aμ-b‖^2$, we have $B_F(μ, μ+Δ)=(1/2)‖AΔ‖^2$, where $Δ = (π_♯^1-π_♯^0)γ$.
+    /// So we use Lemma 3.8 for that, bounding
+    /// $(1/2)‖AΔ‖^2 ≤ Θ ∫ c_2 dγ$ for $Θ=2N_ψML_ψ^2$, where
+    ///   * $L_ψ$ is the 2-norm Lipschitz factor of $ψ$ (sensor * base_spread), and
+    ///   * $N_ψ$ the maximum overlap,
+    ///   * M is a bound on $|γ|(Ω^2)$.
+    ///
+    /// We also have $B_{F'(μ)}(x, y) = v(y) - v(x) ⟨∇v(x), x-y⟩$ for $v(x)=A^*(Aμ-b)$.
+    /// This we want the Lipschitz factor of $∇v$.
+    /// By Example 4.15, it makes sense to estimate this by $√(2N_ψ)L_{∇ψ}‖b‖$, where
+    /// $L_{∇ψ}$ is the Lipshitz factor of $∇ψ$.
+    fn curvature_bound_components(&self) -> (Option<Self::FloatType>, Option<Self::FloatType>) {
+        let n_ψ = self.max_overlapping();
+        let ψ_diff_lip = self.base_sensor.diff_ref().lipschitz_factor(L2);
+        let ψ_lip = self.base_sensor.lipschitz_factor(L2);
+        let a = ψ_diff_lip.map(|l| (2.0 * n_ψ).sqrt() * l);
+        let b = ψ_lip.map(|l| 2.0 * n_ψ * l.powi(2));
+
+        (a, b)
+    }
+}
+
+
 
 #[derive(Clone,Debug)]
 pub struct SensorGridSupportGenerator<F, S, P, const N : usize>
@@ -491,28 +534,6 @@
     }
 }
 
-#[replace_float_literals(F::cast_from(literal))]
-impl<F, BT, S, P, const N : usize> TransportLipschitz<L2Squared>
-for SensorGrid<F, S, P, BT, N>
-where F : Float + ToNalgebraRealField,
-      BT : SensorGridBT<F, S, P, N>,
-      S : Sensor<F, N>,
-      P : Spread<F, N>,
-      Convolution<S, P> : Spread<F, N> + Lipschitz<L2, FloatType = F>
-{
-    type FloatType = F;
-
-    fn transport_lipschitz_factor(&self, L2Squared : L2Squared) -> Self::FloatType {
-        // We estimate the factor by N_ψL^2, where L is the 2-norm Lipschitz factor of
-        // the base sensor (sensor * base_spread), and N_ψ the maximum overlap.
-        // The factors two comes from Lipschitz estimates having two possible
-        // points of overlap.
-        let l = self.base_sensor.lipschitz_factor(L2).unwrap();
-        2.0 * self.max_overlapping() * l.powi(2)
-    }
-}
-
-
 macro_rules! make_sensorgridsupportgenerator_scalarop_rhs {
     ($trait:ident, $fn:ident, $trait_assign:ident, $fn_assign:ident) => {
         impl<F, S, P, const N : usize>
@@ -630,3 +651,4 @@
       P : Spread<F, N>,
       Convolution<S, P> : Spread<F, N> + LocalAnalysis<F, Bounds<F>, N>,
 { }
+
--- a/src/main.rs	Thu Jan 23 23:39:40 2025 +0100
+++ b/src/main.rs	Fri Jan 31 18:00:17 2025 -0500
@@ -30,7 +30,6 @@
 pub mod fourier;
 pub mod kernels;
 pub mod seminorms;
-pub mod transport;
 pub mod forward_model;
 pub mod preadjoint_helper;
 pub mod plot;
--- a/src/sliding_fb.rs	Thu Jan 23 23:39:40 2025 +0100
+++ b/src/sliding_fb.rs	Fri Jan 31 18:00:17 2025 -0500
@@ -22,7 +22,7 @@
 use crate::forward_model::{
     ForwardModel,
     AdjointProductBoundedBy,
-    LipschitzValues,
+    BoundedCurvature,
 };
 //use crate::tolerance::Tolerance;
 use crate::plot::{
@@ -99,6 +99,7 @@
 /// Internal type of adaptive transport step length calculation
 pub(crate) enum TransportStepLength<F : Float, G : Fn(F, F) -> F> {
     /// Fixed, known step length
+    #[allow(dead_code)]
     Fixed(F),
     /// Adaptive step length, only wrt. maximum transport.
     /// Content of `l` depends on use case, while `g` calculates the step length from `l`.
@@ -158,16 +159,16 @@
                 ρ.x = δ.x - v.differential(&δ.x) * (ρ.α.signum() * θτ);
             }
         },
-        AdaptiveMax{ l : ℓ_v, ref mut max_transport, g : ref calculate_θ } => {
+        AdaptiveMax{ l : ℓ_F, ref mut max_transport, g : ref calculate_θ } => {
             *max_transport = max_transport.max(γ1.norm(Radon));
-            let θτ = τ * calculate_θ(ℓ_v, *max_transport);
+            let θτ = τ * calculate_θ(ℓ_F, *max_transport);
             for (δ, ρ) in izip!(μ.iter_spikes(), γ1.iter_spikes_mut()) {
                 ρ.x = δ.x - v.differential(&δ.x) * (ρ.α.signum() * θτ);
             }
         },
-        FullyAdaptive{ l : ref mut adaptive_ℓ_v, ref mut max_transport, g : ref calculate_θ } => {
+        FullyAdaptive{ l : ref mut adaptive_ℓ_F, ref mut max_transport, g : ref calculate_θ } => {
             *max_transport = max_transport.max(γ1.norm(Radon));
-            let mut θ = calculate_θ(*adaptive_ℓ_v, *max_transport);
+            let mut θ = calculate_θ(*adaptive_ℓ_F, *max_transport);
             // Do two runs through the spikes to update θ, breaking if first run did not cause
             // a change.
             for _i in 0..=1 {
@@ -179,9 +180,9 @@
                     let n = g.norm2();
                     if n >= F::EPSILON {
                         // Estimate Lipschitz factor of ∇v
-                        let this_ℓ_v = (dv_x - v.differential(&ρ.x)).norm2() / n;
-                        *adaptive_ℓ_v = adaptive_ℓ_v.max(this_ℓ_v);
-                        θ = calculate_θ(*adaptive_ℓ_v, *max_transport);
+                        let this_ℓ_F = (dv_x - v.differential(&ρ.x)).norm2() / n;
+                        *adaptive_ℓ_F = adaptive_ℓ_F.max(this_ℓ_F);
+                        θ = calculate_θ(*adaptive_ℓ_F, *max_transport);
                         changes = true
                     }
                 }
@@ -274,10 +275,9 @@
     F : Float + ToNalgebraRealField,
     I : AlgIteratorFactory<IterInfo<F, N>>,
     A : ForwardModel<RNDM<F, N>, F>
-        + AdjointProductBoundedBy<RNDM<F, N>, P, FloatType=F>,
-        //+ TransportLipschitz<L2Squared, FloatType=F>,
+        + AdjointProductBoundedBy<RNDM<F, N>, P, FloatType=F>
+        + BoundedCurvature<FloatType=F>,
     for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable> + Instance<A::Observable>,
-    for<'b> A::Preadjoint<'b> : LipschitzValues<FloatType=F>,
     A::PreadjointCodomain : DifferentiableRealMapping<F, N>,
     RNDM<F, N> : SpikeMerging<F>,
     Reg : SlidingRegTerm<F, N>,
@@ -301,12 +301,19 @@
     //let ℓ = opA.transport.lipschitz_factor(L2Squared) * max_transport;
     let ℓ = 0.0;
     let τ = config.τ0 / opA.adjoint_product_bound(prox_penalty).unwrap();
-    let calculate_θ = |ℓ_v, _| config.transport.θ0 / (τ*(ℓ + ℓ_v));
-    let mut θ_or_adaptive = match opA.preadjoint().value_diff_unit_lipschitz_factor() {
-        // We only estimate w (the uniform Lipschitz for of v), if we also estimate ℓ_v
-        // (the uniform Lipschitz factor of ∇v).
-        // We assume that the residual is decreasing.
-        Some(ℓ_v0) => TransportStepLength::Fixed(calculate_θ(ℓ_v0 * b.norm2(), 0.0)),
+    let (maybe_ℓ_v0, maybe_transport_lip) = opA.curvature_bound_components();
+    let transport_lip = maybe_transport_lip.unwrap();
+    let calculate_θ = |ℓ_v, max_transport| {
+        let ℓ_F = ℓ_v + transport_lip * max_transport;
+        config.transport.θ0 / (τ*(ℓ + ℓ_F))
+    };
+    let mut θ_or_adaptive = match maybe_ℓ_v0 {
+        //Some(ℓ_v0) => TransportStepLength::Fixed(calculate_θ(ℓ_v0 * b.norm2(), 0.0)),
+        Some(ℓ_v0) => TransportStepLength::AdaptiveMax {
+            l: ℓ_v0 * b.norm2(), // TODO: could estimate computing the real reesidual
+            max_transport : 0.0,
+            g : calculate_θ
+        },
         None => TransportStepLength::FullyAdaptive {
             l : 10.0 * F::EPSILON, // Start with something very small to estimate differentials
             max_transport : 0.0,
--- a/src/sliding_pdps.rs	Thu Jan 23 23:39:40 2025 +0100
+++ b/src/sliding_pdps.rs	Fri Jan 31 18:00:17 2025 -0500
@@ -27,7 +27,7 @@
 use crate::forward_model::{
     ForwardModel,
     AdjointProductPairBoundedBy,
-    LipschitzValues,
+    BoundedCurvature,
 };
 // use crate::transport::TransportLipschitz;
 //use crate::tolerance::Tolerance;
@@ -113,10 +113,10 @@
             PairNorm<Radon, L2, L2>,
             PreadjointCodomain = Pair<S, Z>,
         >
-        + AdjointProductPairBoundedBy<MeasureZ<F, Z, N>, P, IdOp<Z>, FloatType=F>,
+        + AdjointProductPairBoundedBy<MeasureZ<F, Z, N>, P, IdOp<Z>, FloatType=F>
+        + BoundedCurvature<FloatType=F>,
     S : DifferentiableRealMapping<F, N>,
     for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable> + Instance<A::Observable>,
-    for<'b> A::Preadjoint<'b> : LipschitzValues<FloatType=F>,
     PlotLookup : Plotting<N>,
     RNDM<F, N> : SpikeMerging<F>,
     Reg : SlidingRegTerm<F, N>,
@@ -188,23 +188,24 @@
     let ψ = 1.0 - τ * l;
     let β = σ_p * config.σd0 * nKz / a; // σ_p * σ_d * (nKz * nK_z) / a;
     assert!(β < 1.0);
-    // Now we need κ‖K_μ(π_♯^1 - π_♯^0)γ‖^2 ≤ (1/θ - τ[ℓ_v + ℓ]) ∫ c_2 dγ for κ defined as:
+    // Now we need κ‖K_μ(π_♯^1 - π_♯^0)γ‖^2 ≤ (1/θ - τ[ℓ_F + ℓ]) ∫ c_2 dγ for κ defined as:
     let κ = τ * σ_d * ψ / ((1.0 - β) * ψ - τ * σ_d * bigM);
     //  The factor two in the manuscript disappears due to the definition of 𝚹 being
     // for ‖x-y‖₂² instead of c_2(x, y)=‖x-y‖₂²/2.
+    let (maybe_ℓ_v0, maybe_transport_lip) = opA.curvature_bound_components();
+    let transport_lip = maybe_transport_lip.unwrap();
     let calculate_θ = |ℓ_v, max_transport| {
-        config.transport.θ0 / (τ*(ℓ + ℓ_v) + κ * bigθ * max_transport)
+        let ℓ_F = ℓ_v + transport_lip * max_transport;
+        config.transport.θ0 / (τ*(ℓ + ℓ_F) + κ * bigθ * max_transport)
     };
-    let mut θ_or_adaptive = match opA.preadjoint().value_diff_unit_lipschitz_factor() {
-        // We only estimate w (the uniform Lipschitz for of v), if we also estimate ℓ_v
-        // (the uniform Lipschitz factor of ∇v).
+    let mut θ_or_adaptive = match maybe_ℓ_v0 {
         // We assume that the residual is decreasing.
-        Some(ℓ_v0) => TransportStepLength::AdaptiveMax{
-            l: ℓ_v0 * b.norm2(),
+        Some(ℓ_v0) => TransportStepLength::AdaptiveMax {
+            l: ℓ_v0 * b.norm2(), // TODO: could estimate computing the real reesidual
             max_transport : 0.0,
             g : calculate_θ
         },
-        None => TransportStepLength::FullyAdaptive{
+        None => TransportStepLength::FullyAdaptive {
             l : F::EPSILON,
             max_transport : 0.0,
             g : calculate_θ
--- a/src/transport.rs	Thu Jan 23 23:39:40 2025 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,17 +0,0 @@
-/// Definitions related to optimal transport
-
-use crate::types::*;
-
-pub trait TransportLipschitz<Cost> {
-    /// Type of floats
-    type FloatType : Float;
-
-    /// Returns the transport Lipschitz factor of Self.
-    ///
-    /// If `Self` is a linear operator $A$ on $ℳ(Ω)$, and `Cost` represents the spatial
-    /// cost function $c$, this factor $L$ is such that, for all $0 ≤ λ ∈ ℳ(Ω^2)$,
-    /// $$
-    ///     \norm{A(π_\#^1-π_\#^0)λ}^2 ≤ L \norm{λ}_{ℳ(Ω^2)} ∫ c(x, y) dλ(x, y).
-    /// $$
-    fn transport_lipschitz_factor(&self, cost : Cost) -> Self::FloatType;
-}

mercurial