Tue, 31 Dec 2024 09:34:24 -0500
Early transport sketches
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/dataterm.rs Tue Dec 31 09:34:24 2024 -0500 @@ -0,0 +1,88 @@ +/*! +Basid definitions for data terms +*/ + +use numeric_literals::replace_float_literals; + +use alg_tools::loc::Loc; +use alg_tools::euclidean::Euclidean; +use alg_tools::linops::GEMV; +pub use alg_tools::norms::L1; +use alg_tools::norms::Norm; + +use crate::types::*; +pub use crate::types::L2Squared; +use crate::measures::DiscreteMeasure; + +/// Calculates the residual $Aμ-b$. +#[replace_float_literals(F::cast_from(literal))] +pub(crate) fn calculate_residual< + F : Float, + V : Euclidean<F> + Clone, + A : GEMV<F, DiscreteMeasure<Loc<F, N>, F>, Codomain = V>, + const N : usize +>( + μ : &DiscreteMeasure<Loc<F, N>, F>, + opA : &A, + b : &V +) -> V { + let mut r = b.clone(); + opA.gemv(&mut r, 1.0, μ, -1.0); + r +} + +/// Calculates the residual $A(μ+μ_delta)-b$. +#[replace_float_literals(F::cast_from(literal))] +pub(crate) fn calculate_residual2< + F : Float, + V : Euclidean<F> + Clone, + A : GEMV<F, DiscreteMeasure<Loc<F, N>, F>, Codomain = V>, + const N : usize +>( + μ : &DiscreteMeasure<Loc<F, N>, F>, + μ_delta : &DiscreteMeasure<Loc<F, N>, F>, + opA : &A, + b : &V +) -> V { + let mut r = b.clone(); + opA.gemv(&mut r, 1.0, μ, -1.0); + opA.gemv(&mut r, 1.0, μ_delta, -1.0); + r +} + + +/// Trait for data terms +#[replace_float_literals(F::cast_from(literal))] +pub trait DataTerm<F : Float, V, const N : usize> { + /// Calculates $F(y)$, where $F$ is the data fidelity. + fn calculate_fit(&self, _residual : &V) -> F; + + /// Calculates $F(Aμ-b)$, where $F$ is the data fidelity. + fn calculate_fit_op<A : GEMV<F, DiscreteMeasure<Loc<F, N>, F>, Codomain = V>>( + &self, + μ : &DiscreteMeasure<Loc<F, N>, F>, + opA : &A, + b : &V + ) -> F + where V : Euclidean<F> + Clone { + let r = calculate_residual(&μ, opA, b); + self.calculate_fit(&r) + } +} + +impl<F : Float, V : Euclidean<F>, const N : usize> +DataTerm<F, V, N> +for L2Squared { + fn calculate_fit(&self, residual : &V) -> F { + residual.norm2_squared_div2() + } +} + + +impl<F : Float, V : Euclidean<F> + Norm<F, L1>, const N : usize> +DataTerm<F, V, N> +for L1 { + fn calculate_fit(&self, residual : &V) -> F { + residual.norm(L1) + } +}
--- a/src/fb.rs Fri Apr 28 13:15:19 2023 +0300 +++ b/src/fb.rs Tue Dec 31 09:34:24 2024 -0500 @@ -83,17 +83,16 @@ use numeric_literals::replace_float_literals; use serde::{Serialize, Deserialize}; use colored::Colorize; -use nalgebra::{DVector, DMatrix}; +use nalgebra::DVector; use alg_tools::iterate::{ AlgIteratorFactory, AlgIteratorState, }; use alg_tools::euclidean::Euclidean; -use alg_tools::linops::Apply; +use alg_tools::linops::{Apply, GEMV}; use alg_tools::sets::Cube; use alg_tools::loc::Loc; -use alg_tools::mapping::Mapping; use alg_tools::bisection_tree::{ BTFN, PreBTFN, @@ -104,7 +103,7 @@ P2Minimise, SupportGenerator, LocalAnalysis, - Bounded, + BothGenerators, }; use alg_tools::mapping::RealMapping; use alg_tools::nalgebra_support::ToNalgebraRealField; @@ -119,12 +118,8 @@ SpikeMerging, }; use crate::forward_model::ForwardModel; -use crate::seminorms::{ - DiscreteMeasureOp, Lipschitz -}; +use crate::seminorms::DiscreteMeasureOp; use crate::subproblem::{ - nonneg::quadratic_nonneg, - unconstrained::quadratic_unconstrained, InnerSettings, InnerMethod, }; @@ -134,9 +129,11 @@ Plotting, PlotLookup }; -use crate::regularisation::{ - NonnegRadonRegTerm, - RadonRegTerm, +use crate::regularisation::RegTerm; +use crate::dataterm::{ + calculate_residual, + L2Squared, + DataTerm, }; /// Method for constructing $μ$ on each iteration @@ -150,24 +147,12 @@ Zero, } -/// Meta-algorithm type -#[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] -#[allow(dead_code)] -pub enum FBMetaAlgorithm { - /// No meta-algorithm - None, - /// FISTA-style inertia - InertiaFISTA, -} - /// Settings for [`pointsource_fb_reg`]. #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] #[serde(default)] pub struct FBConfig<F : Float> { /// Step length scaling pub τ0 : F, - /// Meta-algorithm to apply - pub meta : FBMetaAlgorithm, /// Generic parameters pub insertion : FBGenericConfig<F>, } @@ -209,7 +194,6 @@ fn default() -> Self { FBConfig { τ0 : 0.99, - meta : FBMetaAlgorithm::None, insertion : Default::default() } } @@ -240,486 +224,236 @@ } } -/// Trait for specialisation of [`generic_pointsource_fb_reg`] to basic FB, FISTA. -/// -/// The idea is that the residual $Aμ - b$ in the forward step can be replaced by an arbitrary -/// value. For example, to implement [primal-dual proximal splitting][crate::pdps] we replace it -/// with the dual variable $y$. We can then also implement alternative data terms, as the -/// (pre)differential of $F(μ)=F\_0(Aμ-b)$ is $F\'(μ) = A\_*F\_0\'(Aμ-b)$. In the case of the -/// quadratic fidelity $F_0(y)=\frac{1}{2}\\|y\\|_2^2$ in a Hilbert space, of course, -/// $F\_0\'(Aμ-b)=Aμ-b$ is the residual. -pub trait FBSpecialisation<F : Float, Observable : Euclidean<F>, const N : usize> : Sized { - /// Updates the residual and does any necessary pruning of `μ`. - /// - /// Returns the new residual and possibly a new step length. - /// - /// The measure `μ` may also be modified to apply, e.g., inertia to it. - /// The updated residual should correspond to the residual at `μ`. - /// See the [trait documentation][FBSpecialisation] for the use and meaning of the residual. - /// - /// The parameter `μ_base` is the base point of the iteration, typically the previous iterate, - /// but for, e.g., FISTA has inertia applied to it. - fn update( - &mut self, - μ : &mut DiscreteMeasure<Loc<F, N>, F>, - μ_base : &DiscreteMeasure<Loc<F, N>, F>, - ) -> (Observable, Option<F>); - - /// Calculates the data term value corresponding to iterate `μ` and available residual. - /// - /// Inertia and other modifications, as deemed, necessary, should be applied to `μ`. - /// - /// The blanket implementation correspondsn to the 2-norm-squared data fidelity - /// $\\|\text{residual}\\|\_2^2/2$. - fn calculate_fit( - &self, - _μ : &DiscreteMeasure<Loc<F, N>, F>, - residual : &Observable - ) -> F { - residual.norm2_squared_div2() - } - - /// Calculates the data term value at $μ$. - /// - /// Unlike [`Self::calculate_fit`], no inertia, etc., should be applied to `μ`. - fn calculate_fit_simple( - &self, - μ : &DiscreteMeasure<Loc<F, N>, F>, - ) -> F; - - /// Returns the final iterate after any necessary postprocess pruning, merging, etc. - fn postprocess(self, mut μ : DiscreteMeasure<Loc<F, N>, F>, merging : SpikeMergingMethod<F>) - -> DiscreteMeasure<Loc<F, N>, F> - where DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F> { - μ.merge_spikes_fitness(merging, - |μ̃| self.calculate_fit_simple(μ̃), - |&v| v); - μ.prune(); - μ - } - - /// Returns measure to be used for value calculations, which may differ from μ. - fn value_μ<'c, 'b : 'c>(&'b self, μ : &'c DiscreteMeasure<Loc<F, N>, F>) - -> &'c DiscreteMeasure<Loc<F, N>, F> { - μ - } -} - -/// Specialisation of [`generic_pointsource_fb_reg`] to basic μFB. -struct BasicFB< - 'a, - F : Float + ToNalgebraRealField, - A : ForwardModel<Loc<F, N>, F>, - const N : usize -> { - /// The data - b : &'a A::Observable, - /// The forward operator - opA : &'a A, -} - -/// Implementation of [`FBSpecialisation`] for basic μFB forward-backward splitting. #[replace_float_literals(F::cast_from(literal))] -impl<'a, F : Float + ToNalgebraRealField , A : ForwardModel<Loc<F, N>, F>, const N : usize> -FBSpecialisation<F, A::Observable, N> for BasicFB<'a, F, A, N> { - fn update( - &mut self, - μ : &mut DiscreteMeasure<Loc<F, N>, F>, - _μ_base : &DiscreteMeasure<Loc<F, N>, F> - ) -> (A::Observable, Option<F>) { - μ.prune(); - //*residual = self.opA.apply(μ) - self.b; - let mut residual = self.b.clone(); - self.opA.gemv(&mut residual, 1.0, μ, -1.0); - (residual, None) - } - - fn calculate_fit_simple( - &self, - μ : &DiscreteMeasure<Loc<F, N>, F>, - ) -> F { - let mut residual = self.b.clone(); - self.opA.gemv(&mut residual, 1.0, μ, -1.0); - residual.norm2_squared_div2() - } -} - -/// Specialisation of [`generic_pointsource_fb_reg`] to FISTA. -struct FISTA< - 'a, - F : Float + ToNalgebraRealField, - A : ForwardModel<Loc<F, N>, F>, - const N : usize -> { - /// The data - b : &'a A::Observable, - /// The forward operator - opA : &'a A, - /// Current inertial parameter - λ : F, - /// Previous iterate without inertia applied. - /// We need to store this here because `μ_base` passed to [`FBSpecialisation::update`] will - /// have inertia applied to it, so is not useful to use. - μ_prev : DiscreteMeasure<Loc<F, N>, F>, -} - -/// Implementation of [`FBSpecialisation`] for μFISTA inertial forward-backward splitting. -#[replace_float_literals(F::cast_from(literal))] -impl<'a, F : Float + ToNalgebraRealField, A : ForwardModel<Loc<F, N>, F>, const N : usize> -FBSpecialisation<F, A::Observable, N> for FISTA<'a, F, A, N> { - fn update( - &mut self, - μ : &mut DiscreteMeasure<Loc<F, N>, F>, - _μ_base : &DiscreteMeasure<Loc<F, N>, F> - ) -> (A::Observable, Option<F>) { - // Update inertial parameters - let λ_prev = self.λ; - self.λ = 2.0 * λ_prev / ( λ_prev + (4.0 + λ_prev * λ_prev).sqrt() ); - let θ = self.λ / λ_prev - self.λ; - // Perform inertial update on μ. - // This computes μ ← (1 + θ) * μ - θ * μ_prev, pruning spikes where both μ - // and μ_prev have zero weight. Since both have weights from the finite-dimensional - // subproblem with a proximal projection step, this is likely to happen when the - // spike is not needed. A copy of the pruned μ without artithmetic performed is - // stored in μ_prev. - μ.pruning_sub(1.0 + θ, θ, &mut self.μ_prev); - - //*residual = self.opA.apply(μ) - self.b; - let mut residual = self.b.clone(); - self.opA.gemv(&mut residual, 1.0, μ, -1.0); - (residual, None) - } - - fn calculate_fit_simple( - &self, - μ : &DiscreteMeasure<Loc<F, N>, F>, - ) -> F { - let mut residual = self.b.clone(); - self.opA.gemv(&mut residual, 1.0, μ, -1.0); - residual.norm2_squared_div2() - } - - fn calculate_fit( - &self, - _μ : &DiscreteMeasure<Loc<F, N>, F>, - _residual : &A::Observable - ) -> F { - self.calculate_fit_simple(&self.μ_prev) - } - - // For FISTA we need to do a final pruning as well, due to the limited - // pruning that can be done on each step. - fn postprocess(mut self, μ_base : DiscreteMeasure<Loc<F, N>, F>, merging : SpikeMergingMethod<F>) - -> DiscreteMeasure<Loc<F, N>, F> - where DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F> { - let mut μ = self.μ_prev; - self.μ_prev = μ_base; - μ.merge_spikes_fitness(merging, - |μ̃| self.calculate_fit_simple(μ̃), - |&v| v); - μ.prune(); - μ - } - - fn value_μ<'c, 'b : 'c>(&'c self, _μ : &'c DiscreteMeasure<Loc<F, N>, F>) - -> &'c DiscreteMeasure<Loc<F, N>, F> { - &self.μ_prev - } -} - - -/// Abstraction of regularisation terms for [`generic_pointsource_fb_reg`]. -pub trait RegTerm<F : Float + ToNalgebraRealField, const N : usize> -: for<'a> Apply<&'a DiscreteMeasure<Loc<F, N>, F>, Output = F> { - /// Approximately solve the problem - /// <div>$$ - /// \min_{x ∈ ℝ^n} \frac{1}{2} x^⊤Ax - g^⊤ x + τ G(x) - /// $$</div> - /// for $G$ depending on the trait implementation. - /// - /// The parameter `mA` is $A$. An estimate for its opeator norm should be provided in - /// `mA_normest`. The initial iterate and output is `x`. The current main tolerance is `ε`. - /// - /// Returns the number of iterations taken. - fn solve_findim( - &self, - mA : &DMatrix<F::MixedType>, - g : &DVector<F::MixedType>, - τ : F, - x : &mut DVector<F::MixedType>, - mA_normest : F, - ε : F, - config : &FBGenericConfig<F> - ) -> usize; - - /// Find a point where `d` may violate the tolerance `ε`. - /// - /// If `skip_by_rough_check` is set, do not find the point if a rough check indicates that we - /// are in bounds. `ε` is the current main tolerance and `τ` a scaling factor for the - /// regulariser. - /// - /// Returns `None` if `d` is in bounds either based on the rough check, or a more precise check - /// terminating early. Otherwise returns a possibly violating point, the value of `d` there, - /// and a boolean indicating whether the found point is in bounds. - fn find_tolerance_violation<G, BT>( - &self, - d : &mut BTFN<F, G, BT, N>, - τ : F, - ε : F, - skip_by_rough_check : bool, - config : &FBGenericConfig<F>, - ) -> Option<(Loc<F, N>, F, bool)> - where BT : BTSearch<F, N, Agg=Bounds<F>>, - G : SupportGenerator<F, N, Id=BT::Data>, - G::SupportType : Mapping<Loc<F, N>,Codomain=F> - + LocalAnalysis<F, Bounds<F>, N>; - - /// Verify that `d` is in bounds `ε` for a merge candidate `μ` - /// - /// `ε` is the current main tolerance and `τ` a scaling factor for the regulariser. - fn verify_merge_candidate<G, BT>( - &self, - d : &mut BTFN<F, G, BT, N>, - μ : &DiscreteMeasure<Loc<F, N>, F>, - τ : F, - ε : F, - config : &FBGenericConfig<F>, - ) -> bool - where BT : BTSearch<F, N, Agg=Bounds<F>>, - G : SupportGenerator<F, N, Id=BT::Data>, - G::SupportType : Mapping<Loc<F, N>,Codomain=F> - + LocalAnalysis<F, Bounds<F>, N>; - - fn target_bounds(&self, τ : F, ε : F) -> Option<Bounds<F>>; - - /// Returns a scaling factor for the tolerance sequence. - /// - /// Typically this is the regularisation parameter. - fn tolerance_scaling(&self) -> F; -} - -#[replace_float_literals(F::cast_from(literal))] -impl<F : Float + ToNalgebraRealField, const N : usize> RegTerm<F, N> for NonnegRadonRegTerm<F> -where Cube<F, N> : P2Minimise<Loc<F, N>, F> { - fn solve_findim( - &self, - mA : &DMatrix<F::MixedType>, - g : &DVector<F::MixedType>, - τ : F, - x : &mut DVector<F::MixedType>, - mA_normest : F, - ε : F, - config : &FBGenericConfig<F> - ) -> usize { - let inner_tolerance = ε * config.inner.tolerance_mult; - let inner_it = config.inner.iterator_options.stop_target(inner_tolerance); - let inner_τ = config.inner.τ0 / mA_normest; - quadratic_nonneg(config.inner.method, mA, g, τ * self.α(), x, - inner_τ, inner_it) - } - - #[inline] - fn find_tolerance_violation<G, BT>( - &self, - d : &mut BTFN<F, G, BT, N>, - τ : F, - ε : F, - skip_by_rough_check : bool, - config : &FBGenericConfig<F>, - ) -> Option<(Loc<F, N>, F, bool)> - where BT : BTSearch<F, N, Agg=Bounds<F>>, - G : SupportGenerator<F, N, Id=BT::Data>, - G::SupportType : Mapping<Loc<F, N>,Codomain=F> - + LocalAnalysis<F, Bounds<F>, N> { - let τα = τ * self.α(); - let keep_below = τα + ε; - let maximise_above = τα + ε * config.insertion_cutoff_factor; - let refinement_tolerance = ε * config.refinement.tolerance_mult; - - // If preliminary check indicates that we are in bonds, and if it otherwise matches - // the insertion strategy, skip insertion. - if skip_by_rough_check && d.bounds().upper() <= keep_below { - None - } else { - // If the rough check didn't indicate no insertion needed, find maximising point. - d.maximise_above(maximise_above, refinement_tolerance, config.refinement.max_steps) - .map(|(ξ, v_ξ)| (ξ, v_ξ, v_ξ <= keep_below)) +pub(crate) fn μ_diff<F : Float, const N : usize>( + μ_new : &DiscreteMeasure<Loc<F, N>, F>, + μ_base : &DiscreteMeasure<Loc<F, N>, F>, + ν_delta : Option<&DiscreteMeasure<Loc<F, N>, F>>, + config : &FBGenericConfig<F> +) -> DiscreteMeasure<Loc<F, N>, F> { + let mut ν : DiscreteMeasure<Loc<F, N>, F> = match config.insertion_style { + InsertionStyle::Reuse => { + μ_new.iter_spikes() + .zip(μ_base.iter_masses().chain(std::iter::repeat(0.0))) + .map(|(δ, α_base)| (δ.x, α_base - δ.α)) + .collect() + }, + InsertionStyle::Zero => { + μ_new.iter_spikes() + .map(|δ| -δ) + .chain(μ_base.iter_spikes().copied()) + .collect() } - } - - fn verify_merge_candidate<G, BT>( - &self, - d : &mut BTFN<F, G, BT, N>, - μ : &DiscreteMeasure<Loc<F, N>, F>, - τ : F, - ε : F, - config : &FBGenericConfig<F>, - ) -> bool - where BT : BTSearch<F, N, Agg=Bounds<F>>, - G : SupportGenerator<F, N, Id=BT::Data>, - G::SupportType : Mapping<Loc<F, N>,Codomain=F> - + LocalAnalysis<F, Bounds<F>, N> { - let τα = τ * self.α(); - let refinement_tolerance = ε * config.refinement.tolerance_mult; - let merge_tolerance = config.merge_tolerance_mult * ε; - let keep_below = τα + merge_tolerance; - let keep_supp_above = τα - merge_tolerance; - let bnd = d.bounds(); - - return ( - bnd.lower() >= keep_supp_above - || - μ.iter_spikes().map(|&DeltaMeasure{ α : β, ref x }| { - (β == 0.0) || d.apply(x) >= keep_supp_above - }).all(std::convert::identity) - ) && ( - bnd.upper() <= keep_below - || - d.has_upper_bound(keep_below, refinement_tolerance, config.refinement.max_steps) - ) - } - - fn target_bounds(&self, τ : F, ε : F) -> Option<Bounds<F>> { - let τα = τ * self.α(); - Some(Bounds(τα - ε, τα + ε)) - } - - fn tolerance_scaling(&self) -> F { - self.α() + }; + ν.prune(); // Potential small performance improvement + // Add ν_delta if given + match ν_delta { + None => ν, + Some(ν_d) => ν + ν_d, } } #[replace_float_literals(F::cast_from(literal))] -impl<F : Float + ToNalgebraRealField, const N : usize> RegTerm<F, N> for RadonRegTerm<F> -where Cube<F, N> : P2Minimise<Loc<F, N>, F> { - fn solve_findim( - &self, - mA : &DMatrix<F::MixedType>, - g : &DVector<F::MixedType>, - τ : F, - x : &mut DVector<F::MixedType>, - mA_normest: F, - ε : F, - config : &FBGenericConfig<F> - ) -> usize { - let inner_tolerance = ε * config.inner.tolerance_mult; - let inner_it = config.inner.iterator_options.stop_target(inner_tolerance); - let inner_τ = config.inner.τ0 / mA_normest; - quadratic_unconstrained(config.inner.method, mA, g, τ * self.α(), x, - inner_τ, inner_it) +pub(crate) fn insert_and_reweigh< + 'a, F, GA, 𝒟, BTA, G𝒟, S, K, Reg, State, const N : usize +>( + μ : &mut DiscreteMeasure<Loc<F, N>, F>, + minus_τv : &BTFN<F, GA, BTA, N>, + μ_base : &DiscreteMeasure<Loc<F, N>, F>, + ν_delta: Option<&DiscreteMeasure<Loc<F, N>, F>>, + op𝒟 : &'a 𝒟, + op𝒟norm : F, + τ : F, + ε : F, + config : &FBGenericConfig<F>, + reg : &Reg, + state : &State, + stats : &mut IterInfo<F, N>, +) -> (BTFN<F, BothGenerators<GA, G𝒟>, BTA, N>, bool) +where F : Float + ToNalgebraRealField, + GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone, + BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>, + G𝒟 : SupportGenerator<F, N, SupportType = K, Id = usize> + Clone, + 𝒟 : DiscreteMeasureOp<Loc<F, N>, F, PreCodomain = PreBTFN<F, G𝒟, N>>, + 𝒟::Codomain : RealMapping<F, N>, + S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, + K: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, + BTNodeLookup: BTNode<F, usize, Bounds<F>, N>, + DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>, + Reg : RegTerm<F, N>, + State : AlgIteratorState { + + // Maximum insertion count and measure difference calculation depend on insertion style. + let (m, warn_insertions) = match (state.iteration(), config.bootstrap_insertions) { + (i, Some((l, k))) if i <= l => (k, false), + _ => (config.max_insertions, !state.is_quiet()), + }; + let max_insertions = match config.insertion_style { + InsertionStyle::Zero => { + todo!("InsertionStyle::Zero does not currently work with FISTA, so diabled."); + // let n = μ.len(); + // μ = DiscreteMeasure::new(); + // n + m + }, + InsertionStyle::Reuse => m, + }; + + // TODO: should avoid a second copy of μ here; μ_base already stores a copy. + let ω0 = op𝒟.apply(match ν_delta { + None => μ.clone(), + Some(ν_d) => &*μ + ν_d, + }); + + // Add points to support until within error tolerance or maximum insertion count reached. + let mut count = 0; + let (within_tolerances, d) = 'insertion: loop { + if μ.len() > 0 { + // Form finite-dimensional subproblem. The subproblem references to the original μ^k + // from the beginning of the iteration are all contained in the immutable c and g. + let à = op𝒟.findim_matrix(μ.iter_locations()); + let g̃ = DVector::from_iterator(μ.len(), + μ.iter_locations() + .map(|ζ| minus_τv.apply(ζ) + ω0.apply(ζ)) + .map(F::to_nalgebra_mixed)); + let mut x = μ.masses_dvector(); + + // The gradient of the forward component of the inner objective is C^*𝒟Cx - g̃. + // We have |C^*𝒟Cx|_2 = sup_{|z|_2 ≤ 1} ⟨z, C^*𝒟Cx⟩ = sup_{|z|_2 ≤ 1} ⟨Cz|𝒟Cx⟩ + // ≤ sup_{|z|_2 ≤ 1} |Cz|_ℳ |𝒟Cx|_∞ ≤ sup_{|z|_2 ≤ 1} |Cz|_ℳ |𝒟| |Cx|_ℳ + // ≤ sup_{|z|_2 ≤ 1} |z|_1 |𝒟| |x|_1 ≤ sup_{|z|_2 ≤ 1} n |z|_2 |𝒟| |x|_2 + // = n |𝒟| |x|_2, where n is the number of points. Therefore + let Ã_normest = op𝒟norm * F::cast_from(μ.len()); + + // Solve finite-dimensional subproblem. + stats.inner_iters += reg.solve_findim(&Ã, &g̃, τ, &mut x, Ã_normest, ε, config); + + // Update masses of μ based on solution of finite-dimensional subproblem. + μ.set_masses_dvector(&x); + } + + // Form d = ω0 - τv - 𝒟μ = -𝒟(μ - μ^k) - τv for checking the proximate optimality + // conditions in the predual space, and finding new points for insertion, if necessary. + let mut d = minus_τv + op𝒟.preapply(μ_diff(μ, μ_base, ν_delta, config)); + + // If no merging heuristic is used, let's be more conservative about spike insertion, + // and skip it after first round. If merging is done, being more greedy about spike + // insertion also seems to improve performance. + let skip_by_rough_check = if let SpikeMergingMethod::None = config.merging { + false + } else { + count > 0 + }; + + // Find a spike to insert, if needed + let (ξ, _v_ξ, in_bounds) = match reg.find_tolerance_violation( + &mut d, τ, ε, skip_by_rough_check, config + ) { + None => break 'insertion (true, d), + Some(res) => res, + }; + + // Break if maximum insertion count reached + if count >= max_insertions { + break 'insertion (in_bounds, d) + } + + // No point in optimising the weight here; the finite-dimensional algorithm is fast. + *μ += DeltaMeasure { x : ξ, α : 0.0 }; + count += 1; + }; + + // TODO: should redo everything if some transports cause a problem. + // Maybe implementation should call above loop as a closure. + + if !within_tolerances && warn_insertions { + // Complain (but continue) if we failed to get within tolerances + // by inserting more points. + let err = format!("Maximum insertions reached without achieving \ + subproblem solution tolerance"); + println!("{}", err.red()); } - fn find_tolerance_violation<G, BT>( - &self, - d : &mut BTFN<F, G, BT, N>, - τ : F, - ε : F, - skip_by_rough_check : bool, - config : &FBGenericConfig<F>, - ) -> Option<(Loc<F, N>, F, bool)> - where BT : BTSearch<F, N, Agg=Bounds<F>>, - G : SupportGenerator<F, N, Id=BT::Data>, - G::SupportType : Mapping<Loc<F, N>,Codomain=F> - + LocalAnalysis<F, Bounds<F>, N> { - let τα = τ * self.α(); - let keep_below = τα + ε; - let keep_above = -τα - ε; - let maximise_above = τα + ε * config.insertion_cutoff_factor; - let minimise_below = -τα - ε * config.insertion_cutoff_factor; - let refinement_tolerance = ε * config.refinement.tolerance_mult; + (d, within_tolerances) +} - // If preliminary check indicates that we are in bonds, and if it otherwise matches - // the insertion strategy, skip insertion. - if skip_by_rough_check && Bounds(keep_above, keep_below).superset(&d.bounds()) { - None - } else { - // If the rough check didn't indicate no insertion needed, find maximising point. - let mx = d.maximise_above(maximise_above, refinement_tolerance, - config.refinement.max_steps); - let mi = d.minimise_below(minimise_below, refinement_tolerance, - config.refinement.max_steps); +#[replace_float_literals(F::cast_from(literal))] +pub(crate) fn prune_and_maybe_simple_merge< + 'a, F, GA, 𝒟, BTA, G𝒟, S, K, Reg, State, const N : usize +>( + μ : &mut DiscreteMeasure<Loc<F, N>, F>, + minus_τv : &BTFN<F, GA, BTA, N>, + μ_base : &DiscreteMeasure<Loc<F, N>, F>, + op𝒟 : &'a 𝒟, + τ : F, + ε : F, + config : &FBGenericConfig<F>, + reg : &Reg, + state : &State, + stats : &mut IterInfo<F, N>, +) +where F : Float + ToNalgebraRealField, + GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone, + BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>, + G𝒟 : SupportGenerator<F, N, SupportType = K, Id = usize> + Clone, + 𝒟 : DiscreteMeasureOp<Loc<F, N>, F, PreCodomain = PreBTFN<F, G𝒟, N>>, + 𝒟::Codomain : RealMapping<F, N>, + S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, + K: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, + BTNodeLookup: BTNode<F, usize, Bounds<F>, N>, + DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>, + Reg : RegTerm<F, N>, + State : AlgIteratorState { + if state.iteration() % config.merge_every == 0 { + let n_before_merge = μ.len(); + μ.merge_spikes(config.merging, |μ_candidate| { + let μd = μ_diff(&μ_candidate, &μ_base, None, config); + let mut d = minus_τv + op𝒟.preapply(μd); - match (mx, mi) { - (None, None) => None, - (Some((ξ, v_ξ)), None) => Some((ξ, v_ξ, keep_below >= v_ξ)), - (None, Some((ζ, v_ζ))) => Some((ζ, v_ζ, keep_above <= v_ζ)), - (Some((ξ, v_ξ)), Some((ζ, v_ζ))) => { - if v_ξ - τα > τα - v_ζ { - Some((ξ, v_ξ, keep_below >= v_ξ)) - } else { - Some((ζ, v_ζ, keep_above <= v_ζ)) - } - } - } - } + reg.verify_merge_candidate(&mut d, μ_candidate, τ, ε, &config) + .then_some(()) + }); + debug_assert!(μ.len() >= n_before_merge); + stats.merged += μ.len() - n_before_merge; } - fn verify_merge_candidate<G, BT>( - &self, - d : &mut BTFN<F, G, BT, N>, - μ : &DiscreteMeasure<Loc<F, N>, F>, - τ : F, - ε : F, - config : &FBGenericConfig<F>, - ) -> bool - where BT : BTSearch<F, N, Agg=Bounds<F>>, - G : SupportGenerator<F, N, Id=BT::Data>, - G::SupportType : Mapping<Loc<F, N>,Codomain=F> - + LocalAnalysis<F, Bounds<F>, N> { - let τα = τ * self.α(); - let refinement_tolerance = ε * config.refinement.tolerance_mult; - let merge_tolerance = config.merge_tolerance_mult * ε; - let keep_below = τα + merge_tolerance; - let keep_above = -τα - merge_tolerance; - let keep_supp_pos_above = τα - merge_tolerance; - let keep_supp_neg_below = -τα + merge_tolerance; - let bnd = d.bounds(); - - return ( - (bnd.lower() >= keep_supp_pos_above && bnd.upper() <= keep_supp_neg_below) - || - μ.iter_spikes().map(|&DeltaMeasure{ α : β, ref x }| { - use std::cmp::Ordering::*; - match β.partial_cmp(&0.0) { - Some(Greater) => d.apply(x) >= keep_supp_pos_above, - Some(Less) => d.apply(x) <= keep_supp_neg_below, - _ => true, - } - }).all(std::convert::identity) - ) && ( - bnd.upper() <= keep_below - || - d.has_upper_bound(keep_below, refinement_tolerance, - config.refinement.max_steps) - ) && ( - bnd.lower() >= keep_above - || - d.has_lower_bound(keep_above, refinement_tolerance, - config.refinement.max_steps) - ) - } - - fn target_bounds(&self, τ : F, ε : F) -> Option<Bounds<F>> { - let τα = τ * self.α(); - Some(Bounds(-τα - ε, τα + ε)) - } - - fn tolerance_scaling(&self) -> F { - self.α() - } + let n_before_prune = μ.len(); + μ.prune(); + debug_assert!(μ.len() <= n_before_prune); + stats.pruned += n_before_prune - μ.len(); } +#[replace_float_literals(F::cast_from(literal))] +pub(crate) fn postprocess< + F : Float, + V : Euclidean<F> + Clone, + A : GEMV<F, DiscreteMeasure<Loc<F, N>, F>, Codomain = V>, + D : DataTerm<F, V, N>, + const N : usize +> ( + mut μ : DiscreteMeasure<Loc<F, N>, F>, + config : &FBGenericConfig<F>, + dataterm : D, + opA : &A, + b : &V, +) -> DiscreteMeasure<Loc<F, N>, F> +where DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F> { + μ.merge_spikes_fitness(config.merging, + |μ̃| dataterm.calculate_fit_op(μ̃, opA, b), + |&v| v); + μ.prune(); + μ +} -/// Generic implementation of [`pointsource_fb_reg`]. +/// Iteratively solve the pointsource localisation problem using forward-backward splitting. /// -/// The method can be specialised to even primal-dual proximal splitting through the -/// [`FBSpecialisation`] parameter `specialisation`. -/// The settings in `config` have their [respective documentation](FBGenericConfig). `opA` is the +/// The settings in `config` have their [respective documentation](FBConfig). `opA` is the /// forward operator $A$, $b$ the observable, and $\lambda$ the regularisation weight. /// The operator `op𝒟` is used for forming the proximal term. Typically it is a convolution /// operator. Finally, the `iterator` is an outer loop verbosity and iteration count control /// as documented in [`alg_tools::iterate`]. /// +/// For details on the mathematical formulation, see the [module level](self) documentation. +/// /// The implementation relies on [`alg_tools::bisection_tree::BTFN`] presentations of /// sums of simple functions usign bisection trees, and the related /// [`alg_tools::bisection_tree::Aggregator`]s, to efficiently search for component functions @@ -729,252 +463,16 @@ /// /// Returns the final iterate. #[replace_float_literals(F::cast_from(literal))] -pub fn generic_pointsource_fb_reg< - 'a, F, I, A, GA, 𝒟, BTA, G𝒟, S, K, Spec, Reg, const N : usize +pub fn pointsource_fb_reg< + 'a, F, I, A, GA, 𝒟, BTA, G𝒟, S, K, Reg, const N : usize >( opA : &'a A, - reg : Reg, - op𝒟 : &'a 𝒟, - mut τ : F, - config : &FBGenericConfig<F>, - iterator : I, - mut plotter : SeqPlotter<F, N>, - mut residual : A::Observable, - mut specialisation : Spec -) -> DiscreteMeasure<Loc<F, N>, F> -where F : Float + ToNalgebraRealField, - I : AlgIteratorFactory<IterInfo<F, N>>, - Spec : FBSpecialisation<F, A::Observable, N>, - A::Observable : std::ops::MulAssign<F>, - GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone, - A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>> - + Lipschitz<𝒟, FloatType=F>, - BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>, - G𝒟 : SupportGenerator<F, N, SupportType = K, Id = usize> + Clone, - 𝒟 : DiscreteMeasureOp<Loc<F, N>, F, PreCodomain = PreBTFN<F, G𝒟, N>>, - 𝒟::Codomain : RealMapping<F, N>, - S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, - K: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, - BTNodeLookup: BTNode<F, usize, Bounds<F>, N>, - PlotLookup : Plotting<N>, - DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>, - Reg : RegTerm<F, N> { - - // Set up parameters - let quiet = iterator.is_quiet(); - let op𝒟norm = op𝒟.opnorm_bound(); - // We multiply tolerance by τ for FB since our subproblems depending on tolerances are scaled - // by τ compared to the conditional gradient approach. - let tolerance = config.tolerance * τ * reg.tolerance_scaling(); - let mut ε = tolerance.initial(); - - // Initialise operators - let preadjA = opA.preadjoint(); - - // Initialise iterates - let mut μ = DiscreteMeasure::new(); - - let mut inner_iters = 0; - let mut this_iters = 0; - let mut pruned = 0; - let mut merged = 0; - - let μ_diff = |μ_new : &DiscreteMeasure<Loc<F, N>, F>, - μ_base : &DiscreteMeasure<Loc<F, N>, F>| { - let mut ν : DiscreteMeasure<Loc<F, N>, F> = match config.insertion_style { - InsertionStyle::Reuse => { - μ_new.iter_spikes() - .zip(μ_base.iter_masses().chain(std::iter::repeat(0.0))) - .map(|(δ, α_base)| (δ.x, α_base - δ.α)) - .collect() - }, - InsertionStyle::Zero => { - μ_new.iter_spikes() - .map(|δ| -δ) - .chain(μ_base.iter_spikes().copied()) - .collect() - } - }; - ν.prune(); // Potential small performance improvement - ν - }; - - // Run the algorithm - iterator.iterate(|state| { - // Maximum insertion count and measure difference calculation depend on insertion style. - let (m, warn_insertions) = match (state.iteration(), config.bootstrap_insertions) { - (i, Some((l, k))) if i <= l => (k, false), - _ => (config.max_insertions, !quiet), - }; - let max_insertions = match config.insertion_style { - InsertionStyle::Zero => { - todo!("InsertionStyle::Zero does not currently work with FISTA, so diabled."); - // let n = μ.len(); - // μ = DiscreteMeasure::new(); - // n + m - }, - InsertionStyle::Reuse => m, - }; - - // Calculate smooth part of surrogate model. - // Using `std::mem::replace` here is not ideal, and expects that `empty_observable` - // has no significant overhead. For some reosn Rust doesn't allow us simply moving - // the residual and replacing it below before the end of this closure. - residual *= -τ; - let r = std::mem::replace(&mut residual, opA.empty_observable()); - let minus_τv = preadjA.apply(r); // minus_τv = -τA^*(Aμ^k-b) - // TODO: should avoid a second copy of μ here; μ_base already stores a copy. - let ω0 = op𝒟.apply(μ.clone()); // 𝒟μ^k - //let g = &minus_τv + ω0; // Linear term of surrogate model - - // Save current base point - let μ_base = μ.clone(); - - // Add points to support until within error tolerance or maximum insertion count reached. - let mut count = 0; - let (within_tolerances, d) = 'insertion: loop { - if μ.len() > 0 { - // Form finite-dimensional subproblem. The subproblem references to the original μ^k - // from the beginning of the iteration are all contained in the immutable c and g. - let à = op𝒟.findim_matrix(μ.iter_locations()); - let g̃ = DVector::from_iterator(μ.len(), - μ.iter_locations() - .map(|ζ| minus_τv.apply(ζ) + ω0.apply(ζ)) - .map(F::to_nalgebra_mixed)); - let mut x = μ.masses_dvector(); - - // The gradient of the forward component of the inner objective is C^*𝒟Cx - g̃. - // We have |C^*𝒟Cx|_2 = sup_{|z|_2 ≤ 1} ⟨z, C^*𝒟Cx⟩ = sup_{|z|_2 ≤ 1} ⟨Cz|𝒟Cx⟩ - // ≤ sup_{|z|_2 ≤ 1} |Cz|_ℳ |𝒟Cx|_∞ ≤ sup_{|z|_2 ≤ 1} |Cz|_ℳ |𝒟| |Cx|_ℳ - // ≤ sup_{|z|_2 ≤ 1} |z|_1 |𝒟| |x|_1 ≤ sup_{|z|_2 ≤ 1} n |z|_2 |𝒟| |x|_2 - // = n |𝒟| |x|_2, where n is the number of points. Therefore - let Ã_normest = op𝒟norm * F::cast_from(μ.len()); - - // Solve finite-dimensional subproblem. - inner_iters += reg.solve_findim(&Ã, &g̃, τ, &mut x, Ã_normest, ε, config); - - // Update masses of μ based on solution of finite-dimensional subproblem. - μ.set_masses_dvector(&x); - } - - // Form d = ω0 - τv - 𝒟μ = -𝒟(μ - μ^k) - τv for checking the proximate optimality - // conditions in the predual space, and finding new points for insertion, if necessary. - let mut d = &minus_τv + op𝒟.preapply(μ_diff(&μ, &μ_base)); - - // If no merging heuristic is used, let's be more conservative about spike insertion, - // and skip it after first round. If merging is done, being more greedy about spike - // insertion also seems to improve performance. - let skip_by_rough_check = if let SpikeMergingMethod::None = config.merging { - false - } else { - count > 0 - }; - - // Find a spike to insert, if needed - let (ξ, _v_ξ, in_bounds) = match reg.find_tolerance_violation( - &mut d, τ, ε, skip_by_rough_check, config - ) { - None => break 'insertion (true, d), - Some(res) => res, - }; - - // Break if maximum insertion count reached - if count >= max_insertions { - break 'insertion (in_bounds, d) - } - - // No point in optimising the weight here; the finite-dimensional algorithm is fast. - μ += DeltaMeasure { x : ξ, α : 0.0 }; - count += 1; - }; - - if !within_tolerances && warn_insertions { - // Complain (but continue) if we failed to get within tolerances - // by inserting more points. - let err = format!("Maximum insertions reached without achieving \ - subproblem solution tolerance"); - println!("{}", err.red()); - } - - // Merge spikes - if state.iteration() % config.merge_every == 0 { - let n_before_merge = μ.len(); - μ.merge_spikes(config.merging, |μ_candidate| { - let mut d = &minus_τv + op𝒟.preapply(μ_diff(&μ_candidate, &μ_base)); - - reg.verify_merge_candidate(&mut d, μ_candidate, τ, ε, &config) - .then_some(()) - }); - debug_assert!(μ.len() >= n_before_merge); - merged += μ.len() - n_before_merge; - } - - let n_before_prune = μ.len(); - (residual, τ) = match specialisation.update(&mut μ, &μ_base) { - (r, None) => (r, τ), - (r, Some(new_τ)) => (r, new_τ) - }; - debug_assert!(μ.len() <= n_before_prune); - pruned += n_before_prune - μ.len(); - - this_iters += 1; - - // Update main tolerance for next iteration - let ε_prev = ε; - ε = tolerance.update(ε, state.iteration()); - - // Give function value if needed - state.if_verbose(|| { - let value_μ = specialisation.value_μ(&μ); - // Plot if so requested - plotter.plot_spikes( - format!("iter {} end; {}", state.iteration(), within_tolerances), &d, - "start".to_string(), Some(&minus_τv), - reg.target_bounds(τ, ε_prev), value_μ, - ); - // Calculate mean inner iterations and reset relevant counters. - // Return the statistics - let res = IterInfo { - value : specialisation.calculate_fit(&μ, &residual) + reg.apply(value_μ), - n_spikes : value_μ.len(), - inner_iters, - this_iters, - merged, - pruned, - ε : ε_prev, - postprocessing: config.postprocessing.then(|| value_μ.clone()), - }; - inner_iters = 0; - this_iters = 0; - merged = 0; - pruned = 0; - res - }) - }); - - specialisation.postprocess(μ, config.final_merging) -} - -/// Iteratively solve the pointsource localisation problem using forward-backward splitting -/// -/// The settings in `config` have their [respective documentation](FBConfig). `opA` is the -/// forward operator $A$, $b$ the observable, and $\lambda$ the regularisation weight. -/// The operator `op𝒟` is used for forming the proximal term. Typically it is a convolution -/// operator. Finally, the `iterator` is an outer loop verbosity and iteration count control -/// as documented in [`alg_tools::iterate`]. -/// -/// For details on the mathematical formulation, see the [module level](self) documentation. -/// -/// Returns the final iterate. -#[replace_float_literals(F::cast_from(literal))] -pub fn pointsource_fb_reg<'a, F, I, A, GA, 𝒟, BTA, G𝒟, S, K, Reg, const N : usize>( - opA : &'a A, b : &A::Observable, reg : Reg, op𝒟 : &'a 𝒟, - config : &FBConfig<F>, + fbconfig : &FBConfig<F>, iterator : I, - plotter : SeqPlotter<F, N>, + mut plotter : SeqPlotter<F, N>, ) -> DiscreteMeasure<Loc<F, N>, F> where F : Float + ToNalgebraRealField, I : AlgIteratorFactory<IterInfo<F, N>>, @@ -983,7 +481,7 @@ A::Observable : std::ops::MulAssign<F>, GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone, A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>> - + Lipschitz<𝒟, FloatType=F>, + + Lipschitz<&'a 𝒟, FloatType=F>, BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>, G𝒟 : SupportGenerator<F, N, SupportType = K, Id = usize> + Clone, 𝒟 : DiscreteMeasureOp<Loc<F, N>, F, PreCodomain = PreBTFN<F, G𝒟, N>>, @@ -996,17 +494,227 @@ DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>, Reg : RegTerm<F, N> { - let initial_residual = -b; - let τ = config.τ0/opA.lipschitz_factor(&op𝒟).unwrap(); + // Set up parameters + let config = &fbconfig.insertion; + let op𝒟norm = op𝒟.opnorm_bound(); + let τ = fbconfig.τ0/opA.lipschitz_factor(&op𝒟).unwrap(); + // We multiply tolerance by τ for FB since our subproblems depending on tolerances are scaled + // by τ compared to the conditional gradient approach. + let tolerance = config.tolerance * τ * reg.tolerance_scaling(); + let mut ε = tolerance.initial(); + + // Initialise iterates + let mut μ = DiscreteMeasure::new(); + let mut residual = -b; + let mut stats = IterInfo::new(); + + // Run the algorithm + iterator.iterate(|state| { + // Calculate smooth part of surrogate model. + // Using `std::mem::replace` here is not ideal, and expects that `empty_observable` + // has no significant overhead. For some reosn Rust doesn't allow us simply moving + // the residual and replacing it below before the end of this closure. + residual *= -τ; + let r = std::mem::replace(&mut residual, opA.empty_observable()); + let minus_τv = opA.preadjoint().apply(r); + + // Save current base point + let μ_base = μ.clone(); + + // Insert and reweigh + let (d, within_tolerances) = insert_and_reweigh( + &mut μ, &minus_τv, &μ_base, None, + op𝒟, op𝒟norm, + τ, ε, + config, ®, state, &mut stats + ); + + // Prune and possibly merge spikes + prune_and_maybe_simple_merge( + &mut μ, &minus_τv, &μ_base, + op𝒟, + τ, ε, + config, ®, state, &mut stats + ); + + // Update residual + residual = calculate_residual(&μ, opA, b); + + // Update main tolerance for next iteration + let ε_prev = ε; + ε = tolerance.update(ε, state.iteration()); + stats.this_iters += 1; + + // Give function value if needed + state.if_verbose(|| { + // Plot if so requested + plotter.plot_spikes( + format!("iter {} end; {}", state.iteration(), within_tolerances), &d, + "start".to_string(), Some(&minus_τv), + reg.target_bounds(τ, ε_prev), &μ, + ); + // Calculate mean inner iterations and reset relevant counters. + // Return the statistics + let res = IterInfo { + value : residual.norm2_squared_div2() + reg.apply(&μ), + n_spikes : μ.len(), + ε : ε_prev, + postprocessing: config.postprocessing.then(|| μ.clone()), + .. stats + }; + stats = IterInfo::new(); + res + }) + }); + + postprocess(μ, config, L2Squared, opA, b) +} - match config.meta { - FBMetaAlgorithm::None => generic_pointsource_fb_reg( - opA, reg, op𝒟, τ, &config.insertion, iterator, plotter, initial_residual, - BasicFB{ b, opA }, - ), - FBMetaAlgorithm::InertiaFISTA => generic_pointsource_fb_reg( - opA, reg, op𝒟, τ, &config.insertion, iterator, plotter, initial_residual, - FISTA{ b, opA, λ : 1.0, μ_prev : DiscreteMeasure::new() }, - ), - } +/// Iteratively solve the pointsource localisation problem using inertial forward-backward splitting. +/// +/// The settings in `config` have their [respective documentation](FBConfig). `opA` is the +/// forward operator $A$, $b$ the observable, and $\lambda$ the regularisation weight. +/// The operator `op𝒟` is used for forming the proximal term. Typically it is a convolution +/// operator. Finally, the `iterator` is an outer loop verbosity and iteration count control +/// as documented in [`alg_tools::iterate`]. +/// +/// For details on the mathematical formulation, see the [module level](self) documentation. +/// +/// The implementation relies on [`alg_tools::bisection_tree::BTFN`] presentations of +/// sums of simple functions usign bisection trees, and the related +/// [`alg_tools::bisection_tree::Aggregator`]s, to efficiently search for component functions +/// active at a specific points, and to maximise their sums. Through the implementation of the +/// [`alg_tools::bisection_tree::BT`] bisection trees, it also relies on the copy-on-write features +/// of [`std::sync::Arc`] to only update relevant parts of the bisection tree when adding functions. +/// +/// Returns the final iterate. +#[replace_float_literals(F::cast_from(literal))] +pub fn pointsource_fista_reg< + 'a, F, I, A, GA, 𝒟, BTA, G𝒟, S, K, Reg, const N : usize +>( + opA : &'a A, + b : &A::Observable, + reg : Reg, + op𝒟 : &'a 𝒟, + fbconfig : &FBConfig<F>, + iterator : I, + mut plotter : SeqPlotter<F, N>, +) -> DiscreteMeasure<Loc<F, N>, F> +where F : Float + ToNalgebraRealField, + I : AlgIteratorFactory<IterInfo<F, N>>, + for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable>, + //+ std::ops::Mul<F, Output=A::Observable>, <-- FIXME: compiler overflow + A::Observable : std::ops::MulAssign<F>, + GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone, + A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>> + + Lipschitz<&'a 𝒟, FloatType=F>, + BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>, + G𝒟 : SupportGenerator<F, N, SupportType = K, Id = usize> + Clone, + 𝒟 : DiscreteMeasureOp<Loc<F, N>, F, PreCodomain = PreBTFN<F, G𝒟, N>>, + 𝒟::Codomain : RealMapping<F, N>, + S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, + K: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, + BTNodeLookup: BTNode<F, usize, Bounds<F>, N>, + Cube<F, N>: P2Minimise<Loc<F, N>, F>, + PlotLookup : Plotting<N>, + DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>, + Reg : RegTerm<F, N> { + + // Set up parameters + let config = &fbconfig.insertion; + let op𝒟norm = op𝒟.opnorm_bound(); + let τ = fbconfig.τ0/opA.lipschitz_factor(&op𝒟).unwrap(); + let mut λ = 1.0; + // We multiply tolerance by τ for FB since our subproblems depending on tolerances are scaled + // by τ compared to the conditional gradient approach. + let tolerance = config.tolerance * τ * reg.tolerance_scaling(); + let mut ε = tolerance.initial(); + + // Initialise iterates + let mut μ = DiscreteMeasure::new(); + let mut μ_prev = DiscreteMeasure::new(); + let mut residual = -b; + let mut stats = IterInfo::new(); + let mut warned_merging = false; + + // Run the algorithm + iterator.iterate(|state| { + // Calculate smooth part of surrogate model. + // Using `std::mem::replace` here is not ideal, and expects that `empty_observable` + // has no significant overhead. For some reosn Rust doesn't allow us simply moving + // the residual and replacing it below before the end of this closure. + residual *= -τ; + let r = std::mem::replace(&mut residual, opA.empty_observable()); + let minus_τv = opA.preadjoint().apply(r); + + // Save current base point + let μ_base = μ.clone(); + + // Insert new spikes and reweigh + let (d, within_tolerances) = insert_and_reweigh( + &mut μ, &minus_τv, &μ_base, None, + op𝒟, op𝒟norm, + τ, ε, + config, ®, state, &mut stats + ); + + // (Do not) merge spikes. + if state.iteration() % config.merge_every == 0 { + match config.merging { + SpikeMergingMethod::None => { }, + _ => if !warned_merging { + let err = format!("Merging not supported for μFISTA"); + println!("{}", err.red()); + warned_merging = true; + } + } + } + + // Update inertial prameters + let λ_prev = λ; + λ = 2.0 * λ_prev / ( λ_prev + (4.0 + λ_prev * λ_prev).sqrt() ); + let θ = λ / λ_prev - λ; + + // Perform inertial update on μ. + // This computes μ ← (1 + θ) * μ - θ * μ_prev, pruning spikes where both μ + // and μ_prev have zero weight. Since both have weights from the finite-dimensional + // subproblem with a proximal projection step, this is likely to happen when the + // spike is not needed. A copy of the pruned μ without artithmetic performed is + // stored in μ_prev. + let n_before_prune = μ.len(); + μ.pruning_sub(1.0 + θ, θ, &mut μ_prev); + debug_assert!(μ.len() <= n_before_prune); + stats.pruned += n_before_prune - μ.len(); + + // Update residual + residual = calculate_residual(&μ, opA, b); + + // Update main tolerance for next iteration + let ε_prev = ε; + ε = tolerance.update(ε, state.iteration()); + stats.this_iters += 1; + + // Give function value if needed + state.if_verbose(|| { + // Plot if so requested + plotter.plot_spikes( + format!("iter {} end; {}", state.iteration(), within_tolerances), &d, + "start".to_string(), Some(&minus_τv), + reg.target_bounds(τ, ε_prev), &μ_prev, + ); + // Calculate mean inner iterations and reset relevant counters. + // Return the statistics + let res = IterInfo { + value : L2Squared.calculate_fit_op(&μ_prev, opA, b) + reg.apply(&μ_prev), + n_spikes : μ_prev.len(), + ε : ε_prev, + postprocessing: config.postprocessing.then(|| μ_prev.clone()), + .. stats + }; + stats = IterInfo::new(); + res + }) + }); + + postprocess(μ_prev, config, L2Squared, opA, b) }
--- a/src/forward_model.rs Fri Apr 28 13:15:19 2023 +0300 +++ b/src/forward_model.rs Tue Dec 31 09:34:24 2024 -0500 @@ -14,7 +14,7 @@ pub use alg_tools::linops::*; use alg_tools::euclidean::Euclidean; use alg_tools::norms::{ - L1, Linfinity, Norm + L1, Linfinity, L2, Norm }; use alg_tools::bisection_tree::*; use alg_tools::mapping::RealMapping; @@ -27,7 +27,6 @@ use crate::types::*; use crate::measures::*; use crate::seminorms::{ - Lipschitz, ConvolutionOp, SimpleConvolutionKernel, }; @@ -36,6 +35,8 @@ AutoConvolution, BoundedBy, }; +use crate::types::L2Squared; +use crate::transport::TransportLipschitz; pub type RNDM<F, const N : usize> = DiscreteMeasure<Loc<F,N>, F>; @@ -558,7 +559,7 @@ /// /// **This assumes (but does not check) that the sensors are not overlapping.** #[replace_float_literals(F::cast_from(literal))] -impl<F, BT, S, P, K, const N : usize> Lipschitz<ConvolutionOp<F, K, BT, N>> +impl<'a, F, BT, S, P, K, const N : usize> Lipschitz<&'a ConvolutionOp<F, K, BT, N>> for SensorGrid<F, S, P, BT, N> where F : Float + nalgebra::RealField + ToNalgebraRealField, BT : SensorGridBT<F, S, P, N>, @@ -570,7 +571,7 @@ type FloatType = F; - fn lipschitz_factor(&self, seminorm : &ConvolutionOp<F, K, BT, N>) -> Option<F> { + fn lipschitz_factor(&self, seminorm : &'a ConvolutionOp<F, K, BT, N>) -> Option<F> { // Sensors should not take on negative values to allow // A_*A to be upper bounded by a simple convolution of `spread`. if self.sensor.bounds().lower() < 0.0 { @@ -590,6 +591,22 @@ } } +#[replace_float_literals(F::cast_from(literal))] +impl<F, BT, S, P, const N : usize> TransportLipschitz<L2Squared> +for SensorGrid<F, S, P, BT, N> +where F : Float + nalgebra::RealField + ToNalgebraRealField, + BT : SensorGridBT<F, S, P, N>, + S : Sensor<F, N>, + P : Spread<F, N>, + Convolution<S, P> : Spread<F, N> + Lipschitz<L2> { + type FloatType = F; + + fn transport_lipschitz_factor(&self, L2Squared : L2Squared) -> Self::FloatType { + todo!("Unimplemented") + } +} + + macro_rules! make_sensorgridsupportgenerator_scalarop_rhs { ($trait:ident, $fn:ident, $trait_assign:ident, $fn_assign:ident) => { impl<F, S, P, const N : usize>
--- a/src/frank_wolfe.rs Fri Apr 28 13:15:19 2023 +0300 +++ b/src/frank_wolfe.rs Tue Dec 31 09:34:24 2024 -0500 @@ -68,8 +68,8 @@ use crate::regularisation::{ NonnegRadonRegTerm, RadonRegTerm, + RegTerm }; -use crate::fb::RegTerm; /// Settings for [`pointsource_fw`]. #[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)]
--- a/src/kernels/base.rs Fri Apr 28 13:15:19 2023 +0300 +++ b/src/kernels/base.rs Tue Dec 31 09:34:24 2024 -0500 @@ -14,11 +14,12 @@ GlobalAnalysis, Bounded, }; -use alg_tools::mapping::Apply; -use alg_tools::maputil::{array_init, map2}; +use alg_tools::mapping::{Apply, Differentiable}; +use alg_tools::maputil::{array_init, map2, map1_indexed}; use alg_tools::sets::SetOrd; use crate::fourier::Fourier; +use crate::types::Lipschitz; /// Representation of the product of two kernels. /// @@ -55,6 +56,33 @@ } } +impl<A, B, F : Float, const N : usize> Differentiable<Loc<F, N>> +for SupportProductFirst<A, B> +where A : for<'a> Apply<&'a Loc<F, N>, Output=F> + + for<'a> Differentiable<&'a Loc<F, N>, Output=Loc<F, N>>, + B : for<'a> Apply<&'a Loc<F, N>, Output=F> + + for<'a> Differentiable<&'a Loc<F, N>, Output=Loc<F, N>> { + type Output = Loc<F, N>; + #[inline] + fn differential(&self, x : Loc<F, N>) -> Self::Output { + self.0.differential(&x) * self.1.apply(&x) + self.1.differential(&x) * self.0.apply(&x) + } +} + +impl<'a, A, B, F : Float, const N : usize> Differentiable<&'a Loc<F, N>> +for SupportProductFirst<A, B> +where A : Apply<&'a Loc<F, N>, Output=F> + + Differentiable<&'a Loc<F, N>, Output=Loc<F, N>>, + B : Apply<&'a Loc<F, N>, Output=F> + + Differentiable<&'a Loc<F, N>, Output=Loc<F, N>> { + type Output = Loc<F, N>; + #[inline] + fn differential(&self, x : &'a Loc<F, N>) -> Self::Output { + self.0.differential(&x) * self.1.apply(&x) + self.1.differential(&x) * self.0.apply(&x) + } +} + + impl<'a, A, B, F : Float, const N : usize> Support<F, N> for SupportProductFirst<A, B> where A : Support<F, N>, @@ -130,6 +158,28 @@ } } +impl<'a, A, B, F : Float, const N : usize> Differentiable<&'a Loc<F, N>> +for SupportSum<A, B> +where A : Differentiable<&'a Loc<F, N>, Output=Loc<F, N>>, + B : Differentiable<&'a Loc<F, N>, Output=Loc<F, N>> { + type Output = Loc<F, N>; + #[inline] + fn differential(&self, x : &'a Loc<F, N>) -> Self::Output { + self.0.differential(x) + self.1.differential(x) + } +} + +impl<A, B, F : Float, const N : usize> Differentiable<Loc<F, N>> +for SupportSum<A, B> +where A : for<'a> Differentiable<&'a Loc<F, N>, Output=Loc<F, N>>, + B : for<'a> Differentiable<&'a Loc<F, N>, Output=Loc<F, N>> { + type Output = Loc<F, N>; + #[inline] + fn differential(&self, x : Loc<F, N>) -> Self::Output { + self.0.differential(&x) + self.1.differential(&x) + } +} + impl<'a, A, B, F : Float, const N : usize> Support<F, N> for SupportSum<A, B> where A : Support<F, N>, @@ -174,6 +224,20 @@ } } +impl<F : Float, M : Copy, A, B> Lipschitz<M> for SupportSum<A, B> +where A : Lipschitz<M, FloatType = F>, + B : Lipschitz<M, FloatType = F> { + type FloatType = F; + + fn lipschitz_factor(&self, m : M) -> Option<F> { + match (self.0.lipschitz_factor(m), self.1.lipschitz_factor(m)) { + (Some(l0), Some(l1)) => Some(l0 + l1), + _ => None + } + } +} + + /// Representation of the convolution of two kernels. /// /// The kernels typically implement [`Support`]s and [`Mapping`][alg_tools::mapping::Mapping]. @@ -187,6 +251,16 @@ pub B ); +impl<F : Float, M, A, B> Lipschitz<M> for Convolution<A, B> +where A : Bounded<F> , + B : Lipschitz<M, FloatType = F> { + type FloatType = F; + + fn lipschitz_factor(&self, m : M) -> Option<F> { + self.1.lipschitz_factor(m).map(|l| l * self.0.bounds().uniform()) + } +} + /// Representation of the autoconvolution of a kernel. /// /// The kernel typically implements [`Support`] and [`Mapping`][alg_tools::mapping::Mapping]. @@ -198,6 +272,16 @@ pub A ); +impl<F : Float, M, C> Lipschitz<M> for AutoConvolution<C> +where C : Lipschitz<M, FloatType = F> + Bounded<F> { + type FloatType = F; + + fn lipschitz_factor(&self, m : M) -> Option<F> { + self.0.lipschitz_factor(m).map(|l| l * self.0.bounds().uniform()) + } +} + + /// Representation a multi-dimensional product of a one-dimensional kernel. /// /// For $G: ℝ → ℝ$, this is the function $F(x\_1, …, x\_n) := \prod_{i=1}^n G(x\_i)$. @@ -229,6 +313,45 @@ } } +impl<'a, G, F : Float, const N : usize> Differentiable<&'a Loc<F, N>> +for UniformProduct<G, N> +where G : Apply<Loc<F, 1>, Output=F> + Differentiable<Loc<F, 1>, Output=F> { + type Output = Loc<F, N>; + #[inline] + fn differential(&self, x : &'a Loc<F, N>) -> Loc<F, N> { + let vs = x.map(|y| self.0.apply(Loc([y]))); + product_differential(x, &vs, |y| self.0.differential(Loc([y]))) + } +} + +/// Helper function to calulate the differential of $f(x)=∏_{i=1}^N g(x_i)$. +/// +/// The vector `x` is the location, `vs` consists of the values `g(x_i)`, and +/// `gd` calculates the derivative `g'`. +#[inline] +pub(crate) fn product_differential<F : Float, G : Fn(F) -> F, const N : usize>( + x : &Loc<F, N>, + vs : &Loc<F, N>, + gd : G +) -> Loc<F, N> { + map1_indexed(x, |i, &y| { + gd(y) * vs.iter() + .zip(0..) + .filter_map(|(v, j)| (j != i).then_some(*v)) + .product() + }).into() +} + +impl<G, F : Float, const N : usize> Differentiable<Loc<F, N>> +for UniformProduct<G, N> +where G : Apply<Loc<F, 1>, Output=F> + Differentiable<Loc<F, 1>, Output=F> { + type Output = Loc<F, N>; + #[inline] + fn differential(&self, x : Loc<F, N>) -> Loc<F, N> { + self.differential(&x) + } +} + impl<G, F : Float, const N : usize> Support<F, N> for UniformProduct<G, N> where G : Support<F, 1> {
--- a/src/kernels/gaussian.rs Fri Apr 28 13:15:19 2023 +0300 +++ b/src/kernels/gaussian.rs Tue Dec 31 09:34:24 2024 -0500 @@ -17,9 +17,10 @@ Weighted, Bounded, }; -use alg_tools::mapping::Apply; +use alg_tools::mapping::{Apply, Differentiable}; use alg_tools::maputil::array_init; +use crate::types::Lipschitz; use crate::fourier::Fourier; use super::base::*; use super::ball_indicator::CubeIndicator; @@ -75,14 +76,45 @@ impl<S, const N : usize> Apply<Loc<S::Type, N>> for Gaussian<S, N> where S : Constant { type Output = S::Type; - // This is not normalised to neither to have value 1 at zero or integral 1 - // (unless the cut-off ε=0). #[inline] fn apply(&self, x : Loc<S::Type, N>) -> Self::Output { self.apply(&x) } } +#[replace_float_literals(S::Type::cast_from(literal))] +impl<'a, S, const N : usize> Differentiable<&'a Loc<S::Type, N>> for Gaussian<S, N> +where S : Constant { + type Output = Loc<S::Type, N>; + #[inline] + fn differential(&self, x : &'a Loc<S::Type, N>) -> Self::Output { + x * (self.apply(x) / self.variance.value()) + } +} + +impl<S, const N : usize> Differentiable<Loc<S::Type, N>> for Gaussian<S, N> +where S : Constant { + type Output = Loc<S::Type, N>; + // This is not normalised to neither to have value 1 at zero or integral 1 + // (unless the cut-off ε=0). + #[inline] + fn differential(&self, x : Loc<S::Type, N>) -> Self::Output { + x * (self.apply(&x) / self.variance.value()) + } +} + +#[replace_float_literals(S::Type::cast_from(literal))] +impl<S, const N : usize> Lipschitz<L2> for Gaussian<S, N> +where S : Constant { + type FloatType = S::Type; + fn lipschitz_factor(&self, L2 : L2) -> Option<Self::FloatType> { + // f(x)=f_1(‖x‖_2/σ) * √(2π) / √(2πσ)^N, where f_1 is one-dimensional Gaussian with + // variance 1. The Lipschitz factor of f_1 is e^{-1/2}/√(2π), see, e.g., + // https://math.stackexchange.com/questions/3630967/is-the-gaussian-density-lipschitz-continuous + // Thus the Lipschitz factor we want is e^{-1/2} / (√(2πσ)^N * σ). + Some((-0.5).exp() / (self.scale() * self.variance.value().sqrt())) + } +} #[replace_float_literals(S::Type::cast_from(literal))] impl<'a, S, const N : usize> Gaussian<S, N> @@ -169,8 +201,8 @@ Gaussian<S, N>>; -/// This implements $χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ -/// where $a,b>0$ and $f$ is a gaussian kernel on $ℝ^n$. +/// This implements $g := χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ where $a,b>0$ and $f$ is +/// a gaussian kernel on $ℝ^n$. For an expression for $g$, see Lemma 3.9 in the manuscript. #[replace_float_literals(F::cast_from(literal))] impl<'a, F : Float, R, C, S, const N : usize> Apply<&'a Loc<F, N>> for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> @@ -224,6 +256,89 @@ } } +/// This implements the differential of $g := χ\_{[-b, b]^n} \* (f χ\_{[-a, a]^n})$ where $a,b>0$ +/// and $f$ is a gaussian kernel on $ℝ^n$. For an expression for the value of $g$, from which the +/// derivative readily arises (at points of differentiability), see Lemma 3.9 in the manuscript. +#[replace_float_literals(F::cast_from(literal))] +impl<'a, F : Float, R, C, S, const N : usize> Differentiable<&'a Loc<F, N>> +for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> +where R : Constant<Type=F>, + C : Constant<Type=F>, + S : Constant<Type=F> { + + type Output = Loc<F, N>; + + #[inline] + fn differential(&self, y : &'a Loc<F, N>) -> Loc<F, N> { + let Convolution(ref ind, + SupportProductFirst(ref cut, + ref gaussian)) = self; + let a = cut.r.value(); + let b = ind.r.value(); + let σ = gaussian.variance.value().sqrt(); + let π = F::PI; + let t = F::SQRT_2 * σ; + let c = σ * (8.0/π).sqrt(); + let cd = (8.0).sqrt(); // σ * (8.0/π).sqrt() / t * (√2/π) + + // Calculate the values for all component functions of the + // product. This is just the loop from apply above. + let unscaled_vs = y.map(|x| { + let c1 = -(a.min(b + x)); //(-a).max(-x-b); + let c2 = a.min(b - x); + if c1 >= c2 { + 0.0 + } else { + let e1 = F::cast_from(erf((c1 / t).as_())); + let e2 = F::cast_from(erf((c2 / t).as_())); + debug_assert!(e2 >= e1); + c * (e2 - e1) + } + }); + // This computes the gradient for each coordinate + product_differential(y, &unscaled_vs, |x| { + let c1 = -(a.min(b + x)); //(-a).max(-x-b); + let c2 = a.min(b - x); + if c1 >= c2 { + 0.0 + } else { + // erf'(z) = (2/√π)*exp(-z^2), and we get extra factor -1/√(2*σ) = -1/t + // from the chain rule + let de1 = (-(c1/t).powi(2)).exp(); + let de2 = (-(c2/t).powi(2)).exp(); + cd * (de1 - de2) + } + }) / gaussian.scale() + } +} + +impl<F : Float, R, C, S, const N : usize> Differentiable<Loc<F, N>> +for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> +where R : Constant<Type=F>, + C : Constant<Type=F>, + S : Constant<Type=F> { + + type Output = Loc<F, N>; + + #[inline] + fn differential(&self, y : Loc<F, N>) -> Loc<F, N> { + self.differential(&y) + } +} + +#[replace_float_literals(F::cast_from(literal))] +impl<'a, F : Float, R, C, S, const N : usize> Lipschitz<L2> +for Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> +where R : Constant<Type=F>, + C : Constant<Type=F>, + S : Constant<Type=F> { + type FloatType = F; + + fn lipschitz_factor(&self, L2 : L2) -> Option<F> { + todo!("This requirement some error function work.") + } +} + impl<F : Float, R, C, S, const N : usize> Convolution<CubeIndicator<R, N>, BasicCutGaussian<C, S, N>> where R : Constant<Type=F>,
--- a/src/kernels/hat_convolution.rs Fri Apr 28 13:15:19 2023 +0300 +++ b/src/kernels/hat_convolution.rs Tue Dec 31 09:34:24 2024 -0500 @@ -14,9 +14,10 @@ GlobalAnalysis, Bounded, }; -use alg_tools::mapping::Apply; +use alg_tools::mapping::{Apply, Differentiable}; use alg_tools::maputil::array_init; +use crate::types::Lipschitz; use super::base::*; use super::ball_indicator::CubeIndicator; @@ -81,6 +82,59 @@ } } +#[replace_float_literals(S::Type::cast_from(literal))] +impl<S, const N : usize> Lipschitz<L1> for HatConv<S, N> +where S : Constant { + type FloatType = S::Type; + #[inline] + fn lipschitz_factor(&self, L1 : L1) -> Option<Self::FloatType> { + // For any ψ_i, we have + // ∏_{i=1}^N ψ_i(x_i) - ∏_{i=1}^N ψ_i(y_i) + // = [ψ_1(x_1)-ψ_1(y_1)] ∏_{i=2}^N ψ_i(x_i) + // + ψ_1(y_1)[ ∏_{i=2}^N ψ_i(x_i) - ∏_{i=2}^N ψ_i(y_i)] + // = ∑_{j=1}^N [ψ_j(x_j)-ψ_j(y_j)]∏_{i > j} ψ_i(x_i) ∏_{i < j} ψ_i(y_i) + // Thus + // |∏_{i=1}^N ψ_i(x_i) - ∏_{i=1}^N ψ_i(y_i)| + // ≤ ∑_{j=1}^N |ψ_j(x_j)-ψ_j(y_j)| ∏_{j ≠ i} \max_i |ψ_i| + let σ = self.radius(); + Some((self.lipschitz_1d_σ1() / (σ*σ)) * (self.value_1d_σ1(0.0) / σ)) + } +} + +impl<S, const N : usize> Lipschitz<L2> for HatConv<S, N> +where S : Constant { + type FloatType = S::Type; + #[inline] + fn lipschitz_factor(&self, L2 : L2) -> Option<Self::FloatType> { + self.lipschitz_factor(L1).map(|l1| l1 * <S::Type>::cast_from(N).sqrt()) + } +} + + +impl<'a, S, const N : usize> Differentiable<&'a Loc<S::Type, N>> for HatConv<S, N> +where S : Constant { + type Output = Loc<S::Type, N>; + #[inline] + fn differential(&self, y : &'a Loc<S::Type, N>) -> Self::Output { + let σ = self.radius(); + let σ2 = σ * σ; + let vs = y.map(|x| { + self.value_1d_σ1(x / σ) / σ + }); + product_differential(y, &vs, |x| { + self.diff_1d_σ1(x / σ) / σ2 + }) + } +} + +impl<'a, S, const N : usize> Differentiable<Loc<S::Type, N>> for HatConv<S, N> +where S : Constant { + type Output = Loc<S::Type, N>; + #[inline] + fn differential(&self, y : Loc<S::Type, N>) -> Self::Output { + self.differential(&y) + } +} #[replace_float_literals(S::Type::cast_from(literal))] impl<'a, F : Float, S, const N : usize> HatConv<S, N> @@ -97,6 +151,26 @@ (4.0/3.0) + 8.0 * y * y * (y - 1.0) } } + + /// Computes the differential of the kernel for $n=1$ with $σ=1$. + #[inline] + fn diff_1d_σ1(&self, x : F) -> F { + let y = x.abs(); + if y >= 1.0 { + 0.0 + } else if y > 0.5 { + - 8.0 * (y - 1.0).powi(2) + } else /* 0 ≤ y ≤ 0.5 */ { + (24.0 * y - 16.0) * y + } + } + + /// Computes the Lipschitz factor of the kernel for $n=1$ with $σ=1$. + #[inline] + fn lipschitz_1d_σ1(&self) -> F { + // Maximal absolute differential achieved at ±0.5 by diff_1d_σ1 analysis + 2.0 + } } impl<'a, S, const N : usize> Support<S::Type, N> for HatConv<S, N> @@ -201,11 +275,77 @@ } } +#[replace_float_literals(F::cast_from(literal))] +impl<'a, F : Float, R, C, const N : usize> Differentiable<&'a Loc<F, N>> +for Convolution<CubeIndicator<R, N>, HatConv<C, N>> +where R : Constant<Type=F>, + C : Constant<Type=F> { + + type Output = Loc<F, N>; + + #[inline] + fn differential(&self, y : &'a Loc<F, N>) -> Loc<F, N> { + let Convolution(ref ind, ref hatconv) = self; + let β = ind.r.value(); + let σ = hatconv.radius(); + let σ2 = σ * σ; + + let vs = y.map(|x| { + self.value_1d_σ1(x / σ, β / σ) + }); + product_differential(y, &vs, |x| { + self.diff_1d_σ1(x / σ, β / σ) / σ2 + }) + } +} + +impl<'a, F : Float, R, C, const N : usize> Differentiable<Loc<F, N>> +for Convolution<CubeIndicator<R, N>, HatConv<C, N>> +where R : Constant<Type=F>, + C : Constant<Type=F> { + + type Output = Loc<F, N>; + + #[inline] + fn differential(&self, y : Loc<F, N>) -> Loc<F, N> { + self.differential(&y) + } +} + +/// Integrate $f$, whose support is $[c, d]$, on $[a, b]$. +/// If $b > d$, add $g()$ to the result. +#[inline] +#[replace_float_literals(F::cast_from(literal))] +fn i<F: Float>(a : F, b : F, c : F, d : F, f : impl Fn(F) -> F, + g : impl Fn() -> F) -> F { + if b < c { + 0.0 + } else if b <= d { + if a <= c { + f(b) - f(c) + } else { + f(b) - f(a) + } + } else /* b > d */ { + g() + if a <= c { + f(d) - f(c) + } else if a < d { + f(d) - f(a) + } else { + 0.0 + } + } +} #[replace_float_literals(F::cast_from(literal))] impl<F : Float, C, R, const N : usize> Convolution<CubeIndicator<R, N>, HatConv<C, N>> where R : Constant<Type=F>, C : Constant<Type=F> { + + /// Calculates the value of the 1D hat convolution further convolved by a interval indicator. + /// As both functions are piecewise polynomials, this is implemented by explicit integral over + /// all subintervals of polynomiality of the cube indicator, using easily formed + /// antiderivatives. #[inline] pub fn value_1d_σ1(&self, x : F, β : F) -> F { // The integration interval @@ -218,34 +358,10 @@ y * y } - /// Integrate $f$, whose support is $[c, d]$, on $[a, b]$. - /// If $b > d$, add $g()$ to the result. - #[inline] - fn i<F: Float>(a : F, b : F, c : F, d : F, f : impl Fn(F) -> F, - g : impl Fn() -> F) -> F { - if b < c { - 0.0 - } else if b <= d { - if a <= c { - f(b) - f(c) - } else { - f(b) - f(a) - } - } else /* b > d */ { - g() + if a <= c { - f(d) - f(c) - } else if a < d { - f(d) - f(a) - } else { - 0.0 - } - } - } - // Observe the factor 1/6 at the front from the antiderivatives below. // The factor 4 is from normalisation of the original function. (4.0/6.0) * i(a, b, -1.0, -0.5, - // (2/3) (y+1)^3 on -1 < y ≤ - 1/2 + // (2/3) (y+1)^3 on -1 < y ≤ -1/2 // The antiderivative is (2/12)(y+1)^4 = (1/6)(y+1)^4 |y| pow4(y+1.0), || i(a, b, -0.5, 0.0, @@ -266,6 +382,35 @@ ) ) } + + /// Calculates the derivative of the 1D hat convolution further convolved by a interval + /// indicator. The implementation is similar to [`Self::value_1d_σ1`], using the fact that + /// $(θ * ψ)' = θ * ψ'$. + #[inline] + pub fn diff_1d_σ1(&self, x : F, β : F) -> F { + // The integration interval + let a = x - β; + let b = x + β; + + // The factor 4 is from normalisation of the original function. + 4.0 * i(a, b, -1.0, -0.5, + // (2/3) (y+1)^3 on -1 < y ≤ -1/2 + |y| (2.0/3.0) * (y + 1.0).powi(3), + || i(a, b, -0.5, 0.0, + // -2 y^3 - 2 y^2 + 1/3 on -1/2 < y ≤ 0 + |y| -2.0*(y - 1.0) * y * y + (1.0/3.0), + || i(a, b, 0.0, 0.5, + // 2 y^3 - 2 y^2 + 1/3 on 0 < y < 1/2 + |y| 2.0*(y - 1.0) * y * y + (1.0/3.0), + || i(a, b, 0.5, 1.0, + // -(2/3) (y-1)^3 on 1/2 < y ≤ 1 + |y| -(2.0/3.0) * (y - 1.0).powi(3), + || 0.0 + ) + ) + ) + ) + } } impl<F : Float, R, C, const N : usize>
--- a/src/main.rs Fri Apr 28 13:15:19 2023 +0300 +++ b/src/main.rs Tue Dec 31 09:34:24 2024 -0500 @@ -10,7 +10,6 @@ // Linear operators may be written e.g. as `opA`, to keep the capital letters of mathematical // convention while referring to the type (trait) of the operator as `A`. #![allow(non_snake_case)] -// We need the drain filter for inertial prune. #![feature(drain_filter)] use clap::Parser; @@ -29,12 +28,15 @@ pub mod fourier; pub mod kernels; pub mod seminorms; +pub mod transport; pub mod forward_model; pub mod plot; pub mod subproblem; pub mod tolerance; pub mod regularisation; +pub mod dataterm; pub mod fb; +pub mod sliding_fb; pub mod frank_wolfe; pub mod pdps; pub mod run; @@ -89,7 +91,7 @@ /// Not all algorithms are available for all the experiments. /// In particular, only PDPS is available for the experiments with L¹ data term. #[arg(value_enum, value_name = "ALGORITHM", long, short = 'a', - default_values_t = [FB, FISTA, PDPS, FW, FWRelax])] + default_values_t = [FB, FISTA, PDPS, SlidingFB, FW, FWRelax])] algorithm : Vec<DefaultAlgorithm>, /// Saved algorithm configration(s) to use on the experiments
--- a/src/measures/discrete.rs Fri Apr 28 13:15:19 2023 +0300 +++ b/src/measures/discrete.rs Tue Dec 31 09:34:24 2024 -0500 @@ -59,6 +59,20 @@ self.spikes.len() } + /// Replace with the zero measure. + #[inline] + pub fn clear(&mut self) { + self.spikes.clear() + } + + /// Remove `i`:th spike, not maintaining order. + /// + /// Panics if indiex is out of bounds. + #[inline] + pub fn swap_remove(&mut self, i : usize) -> DeltaMeasure<Domain, F>{ + self.spikes.swap_remove(i) + } + /// Iterate over (references to) the [`DeltaMeasure`] spikes in this measure #[inline] pub fn iter_spikes(&self) -> SpikeIter<'_, Domain, F> { @@ -109,6 +123,31 @@ pub fn prune(&mut self) { self.spikes.retain(|δ| δ.α != F::ZERO); } + + /// Add the spikes produced by `iter` to this measure. + #[inline] + pub fn extend<I : Iterator<Item=DeltaMeasure<Domain, F>>>( + &mut self, + iter : I + ) { + self.spikes.extend(iter); + } + + /// Add a spike to the measure + #[inline] + pub fn push(&mut self, δ : DeltaMeasure<Domain, F>) { + self.spikes.push(δ); + } +} + +impl<Domain, F : Num> IntoIterator for DiscreteMeasure<Domain, F> { + type Item = DeltaMeasure<Domain, F>; + type IntoIter = <Vec<DeltaMeasure<Domain, F>> as IntoIterator>::IntoIter; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + self.spikes.into_iter() + } } impl<Domain : Clone, F : Float> DiscreteMeasure<Domain, F> { @@ -119,7 +158,7 @@ pub fn pruning_sub(&mut self, θ : F, ζ : F, μ2 : &mut Self) { let mut μ2_get = 0; let mut μ2_insert = 0; - self.spikes.drain_filter(|&mut DeltaMeasure{ α : ref mut α_ref, ref x }| { + self.spikes.retain_mut(|&mut DeltaMeasure{ α : ref mut α_ref, ref x }| { // Get weight of spike in μ2, zero if out of bounds. let β = μ2.spikes.get(μ2_get).map_or(F::ZERO, DeltaMeasure::get_mass); μ2_get += 1; @@ -176,21 +215,47 @@ } } -impl<Domain, F :Num> Index<usize> for DiscreteMeasure<Domain, F> { - type Output = DeltaMeasure<Domain, F>; +// impl<Domain, F :Num> Index<usize> for DiscreteMeasure<Domain, F> { +// type Output = DeltaMeasure<Domain, F>; +// #[inline] +// fn index(&self, i : usize) -> &Self::Output { +// self.spikes.index(i) +// } +// } + +// impl<Domain, F :Num> IndexMut<usize> for DiscreteMeasure<Domain, F> { +// #[inline] +// fn index_mut(&mut self, i : usize) -> &mut Self::Output { +// self.spikes.index_mut(i) +// } +// } + +impl< + Domain, + F : Num, + I : std::slice::SliceIndex<[DeltaMeasure<Domain, F>]> +> Index<I> +for DiscreteMeasure<Domain, F> { + type Output = <I as std::slice::SliceIndex<[DeltaMeasure<Domain, F>]>>::Output; #[inline] - fn index(&self, i : usize) -> &Self::Output { + fn index(&self, i : I) -> &Self::Output { self.spikes.index(i) } } -impl<Domain, F :Num> IndexMut<usize> for DiscreteMeasure<Domain, F> { +impl< + Domain, + F : Num, + I : std::slice::SliceIndex<[DeltaMeasure<Domain, F>]> +> IndexMut<I> +for DiscreteMeasure<Domain, F> { #[inline] - fn index_mut(&mut self, i : usize) -> &mut Self::Output { + fn index_mut(&mut self, i : I) -> &mut Self::Output { self.spikes.index_mut(i) } } + impl<Domain, F : Num, D : Into<DeltaMeasure<Domain, F>>, const K : usize> From<[D; K]> for DiscreteMeasure<Domain, F> { #[inline]
--- a/src/pdps.rs Fri Apr 28 13:15:19 2023 +0300 +++ b/src/pdps.rs Tue Dec 31 09:34:24 2024 -0500 @@ -48,12 +48,16 @@ use nalgebra::DVector; use clap::ValueEnum; -use alg_tools::iterate:: AlgIteratorFactory; +use alg_tools::iterate::{ + AlgIteratorFactory, + AlgIteratorState, +}; use alg_tools::loc::Loc; use alg_tools::euclidean::Euclidean; +use alg_tools::linops::Apply; use alg_tools::norms::{ - L1, Linfinity, - Projection, Norm, + Linfinity, + Projection, }; use alg_tools::bisection_tree::{ BTFN, @@ -71,13 +75,9 @@ use crate::types::*; use crate::measures::DiscreteMeasure; -use crate::measures::merging::{ - SpikeMerging, -}; +use crate::measures::merging::SpikeMerging; use crate::forward_model::ForwardModel; -use crate::seminorms::{ - DiscreteMeasureOp, Lipschitz -}; +use crate::seminorms::DiscreteMeasureOp; use crate::plot::{ SeqPlotter, Plotting, @@ -85,9 +85,15 @@ }; use crate::fb::{ FBGenericConfig, - FBSpecialisation, - generic_pointsource_fb_reg, - RegTerm, + insert_and_reweigh, + postprocess, + prune_and_maybe_simple_merge +}; +use crate::regularisation::RegTerm; +use crate::dataterm::{ + DataTerm, + L2Squared, + L1 }; /// Acceleration @@ -131,160 +137,54 @@ } } -/// Trait for subdifferentiable objects -pub trait Subdifferentiable<F : Float, V, U=V> { - /// Calculate some subdifferential at `x` - fn some_subdifferential(&self, x : V) -> U; +/// Trait for data terms for the PDPS +#[replace_float_literals(F::cast_from(literal))] +pub trait PDPSDataTerm<F : Float, V, const N : usize> : DataTerm<F, V, N> { + /// Calculate some subdifferential at `x` for the conjugate + fn some_subdifferential(&self, x : V) -> V; + + /// Factor of strong convexity of the conjugate + #[inline] + fn factor_of_strong_convexity(&self) -> F { + 0.0 + } + + /// Perform dual update + fn dual_update(&self, _y : &mut V, _y_prev : &V, _σ : F); } -/// Type for indicating norm-2-squared data fidelity. -pub struct L2Squared; + +#[replace_float_literals(F::cast_from(literal))] +impl<F : Float, V : Euclidean<F> + AXPY<F>, const N : usize> +PDPSDataTerm<F, V, N> +for L2Squared { + fn some_subdifferential(&self, x : V) -> V { x } -impl<F : Float, V : Euclidean<F>> Subdifferentiable<F, V> for L2Squared { - fn some_subdifferential(&self, x : V) -> V { x } + fn factor_of_strong_convexity(&self) -> F { + 1.0 + } + + #[inline] + fn dual_update(&self, y : &mut V, y_prev : &V, σ : F) { + y.axpy(1.0 / (1.0 + σ), &y_prev, σ / (1.0 + σ)); + } } -impl<F : Float + nalgebra::RealField> Subdifferentiable<F, DVector<F>> for L1 { +#[replace_float_literals(F::cast_from(literal))] +impl<F : Float + nalgebra::RealField, const N : usize> +PDPSDataTerm<F, DVector<F>, N> +for L1 { fn some_subdifferential(&self, mut x : DVector<F>) -> DVector<F> { // nalgebra sucks for providing second copies of the same stuff that's elsewhere as well. x.iter_mut() .for_each(|v| if *v != F::ZERO { *v = *v/<F as NumTraitsFloat>::abs(*v) }); x } -} -/// Specialisation of [`generic_pointsource_fb_reg`] to PDPS. -pub struct PDPS< - 'a, - F : Float + ToNalgebraRealField, - A : ForwardModel<Loc<F, N>, F>, - D, - const N : usize -> { - /// The data - b : &'a A::Observable, - /// The forward operator - opA : &'a A, - /// Primal step length - τ : F, - // Dual step length - σ : F, - /// Whether acceleration should be applied (if data term supports) - acceleration : Acceleration, - /// The dataterm. Only used by the type system. - _dataterm : D, - /// Previous dual iterate. - y_prev : A::Observable, -} - -/// Implementation of [`FBSpecialisation`] for μPDPS with norm-2-squared data fidelity. -#[replace_float_literals(F::cast_from(literal))] -impl< - 'a, - F : Float + ToNalgebraRealField, - A : ForwardModel<Loc<F, N>, F>, - const N : usize -> FBSpecialisation<F, A::Observable, N> for PDPS<'a, F, A, L2Squared, N> -where for<'b> &'b A::Observable : std::ops::Add<A::Observable, Output=A::Observable> { - - fn update( - &mut self, - μ : &mut DiscreteMeasure<Loc<F, N>, F>, - μ_base : &DiscreteMeasure<Loc<F, N>, F> - ) -> (A::Observable, Option<F>) { - let σ = self.σ; - let τ = self.τ; - let ω = match self.acceleration { - Acceleration::None => 1.0, - Acceleration::Partial => { - let ω = 1.0 / (1.0 + σ).sqrt(); - self.σ = σ * ω; - self.τ = τ / ω; - ω - }, - Acceleration::Full => { - let ω = 1.0 / (1.0 + 2.0 * σ).sqrt(); - self.σ = σ * ω; - self.τ = τ / ω; - ω - }, - }; - - μ.prune(); - - let mut y = self.b.clone(); - self.opA.gemv(&mut y, 1.0 + ω, μ, -1.0); - self.opA.gemv(&mut y, -ω, μ_base, 1.0); - y.axpy(1.0 / (1.0 + σ), &self.y_prev, σ / (1.0 + σ)); - self.y_prev.copy_from(&y); - - (y, Some(self.τ)) - } - - fn calculate_fit( - &self, - μ : &DiscreteMeasure<Loc<F, N>, F>, - _y : &A::Observable - ) -> F { - self.calculate_fit_simple(μ) - } - - fn calculate_fit_simple( - &self, - μ : &DiscreteMeasure<Loc<F, N>, F>, - ) -> F { - let mut residual = self.b.clone(); - self.opA.gemv(&mut residual, 1.0, μ, -1.0); - residual.norm2_squared_div2() - } -} - -/// Implementation of [`FBSpecialisation`] for μPDPS with norm-1 data fidelity. -#[replace_float_literals(F::cast_from(literal))] -impl< - 'a, - F : Float + ToNalgebraRealField, - A : ForwardModel<Loc<F, N>, F>, - const N : usize -> FBSpecialisation<F, A::Observable, N> for PDPS<'a, F, A, L1, N> -where A::Observable : Projection<F, Linfinity> + Norm<F, L1>, - for<'b> &'b A::Observable : std::ops::Add<A::Observable, Output=A::Observable> { - fn update( - &mut self, - μ : &mut DiscreteMeasure<Loc<F, N>, F>, - μ_base : &DiscreteMeasure<Loc<F, N>, F> - ) -> (A::Observable, Option<F>) { - let σ = self.σ; - - μ.prune(); - - //let ȳ = self.opA.apply(μ) * 2.0 - self.opA.apply(μ_base); - //*y = proj_{[-1,1]}(&self.y_prev + (ȳ - self.b) * σ) - let mut y = self.y_prev.clone(); - self.opA.gemv(&mut y, 2.0 * σ, μ, 1.0); - self.opA.gemv(&mut y, -σ, μ_base, 1.0); - y.axpy(-σ, self.b, 1.0); + #[inline] + fn dual_update(&self, y : &mut DVector<F>, y_prev : &DVector<F>, σ : F) { + y.axpy(1.0, y_prev, σ); y.proj_ball_mut(1.0, Linfinity); - self.y_prev.copy_from(&y); - - (y, None) - } - - fn calculate_fit( - &self, - μ : &DiscreteMeasure<Loc<F, N>, F>, - _y : &A::Observable - ) -> F { - self.calculate_fit_simple(μ) - } - - fn calculate_fit_simple( - &self, - μ : &DiscreteMeasure<Loc<F, N>, F>, - ) -> F { - let mut residual = self.b.clone(); - self.opA.gemv(&mut residual, 1.0, μ, -1.0); - residual.norm(L1) } } @@ -306,9 +206,9 @@ b : &'a A::Observable, reg : Reg, op𝒟 : &'a 𝒟, - config : &PDPSConfig<F>, + pdpsconfig : &PDPSConfig<F>, iterator : I, - plotter : SeqPlotter<F, N>, + mut plotter : SeqPlotter<F, N>, dataterm : D, ) -> DiscreteMeasure<Loc<F, N>, F> where F : Float + ToNalgebraRealField, @@ -319,7 +219,7 @@ A::Observable : std::ops::MulAssign<F>, GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone, A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>> - + Lipschitz<𝒟, FloatType=F>, + + Lipschitz<&'a 𝒟, FloatType=F>, BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>, G𝒟 : SupportGenerator<F, N, SupportType = K, Id = usize> + Clone, 𝒟 : DiscreteMeasureOp<Loc<F, N>, F, PreCodomain = PreBTFN<F, G𝒟, N>>, @@ -329,27 +229,108 @@ BTNodeLookup: BTNode<F, usize, Bounds<F>, N>, PlotLookup : Plotting<N>, DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>, - PDPS<'a, F, A, D, N> : FBSpecialisation<F, A::Observable, N>, - D : Subdifferentiable<F, A::Observable>, + D : PDPSDataTerm<F, A::Observable, N>, Reg : RegTerm<F, N> { - let y = dataterm.some_subdifferential(-b); + // Set up parameters + let config = &pdpsconfig.insertion; + let op𝒟norm = op𝒟.opnorm_bound(); let l = opA.lipschitz_factor(&op𝒟).unwrap().sqrt(); - let τ = config.τ0 / l; - let σ = config.σ0 / l; + let mut τ = pdpsconfig.τ0 / l; + let mut σ = pdpsconfig.σ0 / l; + let γ = dataterm.factor_of_strong_convexity(); + + // We multiply tolerance by τ for FB since our subproblems depending on tolerances are scaled + // by τ compared to the conditional gradient approach. + let tolerance = config.tolerance * τ * reg.tolerance_scaling(); + let mut ε = tolerance.initial(); + + // Initialise iterates + let mut μ = DiscreteMeasure::new(); + let mut y = dataterm.some_subdifferential(-b); + let mut y_prev = y.clone(); + let mut stats = IterInfo::new(); + + // Run the algorithm + iterator.iterate(|state| { + // Calculate smooth part of surrogate model. + // Using `std::mem::replace` here is not ideal, and expects that `empty_observable` + // has no significant overhead. For some reosn Rust doesn't allow us simply moving + // the residual and replacing it below before the end of this closure. + y *= -τ; + let r = std::mem::replace(&mut y, opA.empty_observable()); + let minus_τv = opA.preadjoint().apply(r); + + // Save current base point + let μ_base = μ.clone(); + + // Insert and reweigh + let (d, within_tolerances) = insert_and_reweigh( + &mut μ, &minus_τv, &μ_base, None, + op𝒟, op𝒟norm, + τ, ε, + config, ®, state, &mut stats + ); + + // Prune and possibly merge spikes + prune_and_maybe_simple_merge( + &mut μ, &minus_τv, &μ_base, + op𝒟, + τ, ε, + config, ®, state, &mut stats + ); - let pdps = PDPS { - b, - opA, - τ, - σ, - acceleration : config.acceleration, - _dataterm : dataterm, - y_prev : y.clone(), - }; + // Update step length parameters + let ω = match pdpsconfig.acceleration { + Acceleration::None => 1.0, + Acceleration::Partial => { + let ω = 1.0 / (1.0 + γ * σ).sqrt(); + σ = σ * ω; + τ = τ / ω; + ω + }, + Acceleration::Full => { + let ω = 1.0 / (1.0 + 2.0 * γ * σ).sqrt(); + σ = σ * ω; + τ = τ / ω; + ω + }, + }; + + // Do dual update + y = b.clone(); // y = b + opA.gemv(&mut y, 1.0 + ω, &μ, -1.0); // y = A[(1+ω)μ^{k+1}]-b + opA.gemv(&mut y, -ω, &μ_base, 1.0); // y = A[(1+ω)μ^{k+1} - ω μ^k]-b + dataterm.dual_update(&mut y, &y_prev, σ); + y_prev.copy_from(&y); - generic_pointsource_fb_reg( - opA, reg, op𝒟, τ, &config.insertion, iterator, plotter, y, pdps - ) + // Update main tolerance for next iteration + let ε_prev = ε; + ε = tolerance.update(ε, state.iteration()); + stats.this_iters += 1; + + // Give function value if needed + state.if_verbose(|| { + // Plot if so requested + plotter.plot_spikes( + format!("iter {} end; {}", state.iteration(), within_tolerances), &d, + "start".to_string(), Some(&minus_τv), + reg.target_bounds(τ, ε_prev), &μ, + ); + // Calculate mean inner iterations and reset relevant counters. + // Return the statistics + let res = IterInfo { + value : dataterm.calculate_fit_op(&μ, opA, b) + reg.apply(&μ), + n_spikes : μ.len(), + ε : ε_prev, + postprocessing: config.postprocessing.then(|| μ.clone()), + .. stats + }; + stats = IterInfo::new(); + res + }) + }); + + postprocess(μ, config, dataterm, opA, b) }
--- a/src/regularisation.rs Fri Apr 28 13:15:19 2023 +0300 +++ b/src/regularisation.rs Tue Dec 31 09:34:24 2024 -0500 @@ -2,6 +2,7 @@ Regularisation terms */ +use numeric_literals::replace_float_literals; use serde::{Serialize, Deserialize}; use alg_tools::norms::Norm; use alg_tools::linops::Apply; @@ -9,12 +10,35 @@ use crate::types::*; use crate::measures::{ DiscreteMeasure, + DeltaMeasure, Radon }; +use crate::fb::FBGenericConfig; #[allow(unused_imports)] // Used by documentation. -use crate::fb::generic_pointsource_fb_reg; +use crate::fb::pointsource_fb_reg; +#[allow(unused_imports)] // Used by documentation. +use crate::sliding_fb::pointsource_sliding_fb_reg; -/// The regularisation term $α\\|μ\\|\_{ℳ(Ω)} + δ_{≥ 0}(μ)$ for [`generic_pointsource_fb_reg`]. +use nalgebra::{DVector, DMatrix}; +use alg_tools::nalgebra_support::ToNalgebraRealField; +use alg_tools::mapping::Mapping; +use alg_tools::bisection_tree::{ + BTFN, + Bounds, + BTSearch, + P2Minimise, + SupportGenerator, + LocalAnalysis, + Bounded, +}; +use crate::subproblem::{ + nonneg::quadratic_nonneg, + unconstrained::quadratic_unconstrained, +}; +use alg_tools::iterate::AlgIteratorFactory; + +/// The regularisation term $α\\|μ\\|\_{ℳ(Ω)} + δ_{≥ 0}(μ)$ for [`pointsource_fb_reg`] and other +/// algorithms. /// /// The only member of the struct is the regularisation parameter α. #[derive(Copy, Clone, Debug, Serialize, Deserialize)] @@ -38,7 +62,7 @@ } -/// The regularisation term $α\|μ\|_{ℳ(Ω)}$ for [`generic_pointsource_fb_reg`]. +/// The regularisation term $α\|μ\|_{ℳ(Ω)}$ for [`pointsource_fb_reg`]. /// /// The only member of the struct is the regularisation parameter α. #[derive(Copy, Clone, Debug, Serialize, Deserialize)] @@ -82,3 +106,365 @@ } } } + +/// Abstraction of regularisation terms for [`generic_pointsource_fb_reg`]. +pub trait RegTerm<F : Float + ToNalgebraRealField, const N : usize> +: for<'a> Apply<&'a DiscreteMeasure<Loc<F, N>, F>, Output = F> { + /// Approximately solve the problem + /// <div>$$ + /// \min_{x ∈ ℝ^n} \frac{1}{2} x^⊤Ax - g^⊤ x + τ G(x) + /// $$</div> + /// for $G$ depending on the trait implementation. + /// + /// The parameter `mA` is $A$. An estimate for its opeator norm should be provided in + /// `mA_normest`. The initial iterate and output is `x`. The current main tolerance is `ε`. + /// + /// Returns the number of iterations taken. + fn solve_findim( + &self, + mA : &DMatrix<F::MixedType>, + g : &DVector<F::MixedType>, + τ : F, + x : &mut DVector<F::MixedType>, + mA_normest : F, + ε : F, + config : &FBGenericConfig<F> + ) -> usize; + + /// Find a point where `d` may violate the tolerance `ε`. + /// + /// If `skip_by_rough_check` is set, do not find the point if a rough check indicates that we + /// are in bounds. `ε` is the current main tolerance and `τ` a scaling factor for the + /// regulariser. + /// + /// Returns `None` if `d` is in bounds either based on the rough check, or a more precise check + /// terminating early. Otherwise returns a possibly violating point, the value of `d` there, + /// and a boolean indicating whether the found point is in bounds. + fn find_tolerance_violation<G, BT>( + &self, + d : &mut BTFN<F, G, BT, N>, + τ : F, + ε : F, + skip_by_rough_check : bool, + config : &FBGenericConfig<F>, + ) -> Option<(Loc<F, N>, F, bool)> + where BT : BTSearch<F, N, Agg=Bounds<F>>, + G : SupportGenerator<F, N, Id=BT::Data>, + G::SupportType : Mapping<Loc<F, N>,Codomain=F> + + LocalAnalysis<F, Bounds<F>, N>; + + /// Verify that `d` is in bounds `ε` for a merge candidate `μ` + /// + /// `ε` is the current main tolerance and `τ` a scaling factor for the regulariser. + fn verify_merge_candidate<G, BT>( + &self, + d : &mut BTFN<F, G, BT, N>, + μ : &DiscreteMeasure<Loc<F, N>, F>, + τ : F, + ε : F, + config : &FBGenericConfig<F>, + ) -> bool + where BT : BTSearch<F, N, Agg=Bounds<F>>, + G : SupportGenerator<F, N, Id=BT::Data>, + G::SupportType : Mapping<Loc<F, N>,Codomain=F> + + LocalAnalysis<F, Bounds<F>, N>; + + /// TODO: document this + fn target_bounds(&self, τ : F, ε : F) -> Option<Bounds<F>>; + + /// Returns a scaling factor for the tolerance sequence. + /// + /// Typically this is the regularisation parameter. + fn tolerance_scaling(&self) -> F; +} + +/// Abstraction of regularisation terms for [`pointsource_sliding_fb_reg`]. +pub trait SlidingRegTerm<F : Float + ToNalgebraRealField, const N : usize> +: RegTerm<F, N> { + /// Calculate $τ[w(z) - w(y)]$ for some w in the subdifferential of the regularisation + /// term, such that $-ε ≤ τw - d ≤ ε$. + fn goodness<G, BT>( + &self, + d : &mut BTFN<F, G, BT, N>, + μ : &DiscreteMeasure<Loc<F, N>, F>, + y : &Loc<F, N>, + z : &Loc<F, N>, + τ : F, + ε : F, + config : &FBGenericConfig<F>, + ) -> F + where BT : BTSearch<F, N, Agg=Bounds<F>>, + G : SupportGenerator<F, N, Id=BT::Data>, + G::SupportType : Mapping<Loc<F, N>,Codomain=F> + + LocalAnalysis<F, Bounds<F>, N>; +} + +#[replace_float_literals(F::cast_from(literal))] +impl<F : Float + ToNalgebraRealField, const N : usize> RegTerm<F, N> +for NonnegRadonRegTerm<F> +where Cube<F, N> : P2Minimise<Loc<F, N>, F> { + fn solve_findim( + &self, + mA : &DMatrix<F::MixedType>, + g : &DVector<F::MixedType>, + τ : F, + x : &mut DVector<F::MixedType>, + mA_normest : F, + ε : F, + config : &FBGenericConfig<F> + ) -> usize { + let inner_tolerance = ε * config.inner.tolerance_mult; + let inner_it = config.inner.iterator_options.stop_target(inner_tolerance); + let inner_τ = config.inner.τ0 / mA_normest; + quadratic_nonneg(config.inner.method, mA, g, τ * self.α(), x, + inner_τ, inner_it) + } + + #[inline] + fn find_tolerance_violation<G, BT>( + &self, + d : &mut BTFN<F, G, BT, N>, + τ : F, + ε : F, + skip_by_rough_check : bool, + config : &FBGenericConfig<F>, + ) -> Option<(Loc<F, N>, F, bool)> + where BT : BTSearch<F, N, Agg=Bounds<F>>, + G : SupportGenerator<F, N, Id=BT::Data>, + G::SupportType : Mapping<Loc<F, N>,Codomain=F> + + LocalAnalysis<F, Bounds<F>, N> { + let τα = τ * self.α(); + let keep_below = τα + ε; + let maximise_above = τα + ε * config.insertion_cutoff_factor; + let refinement_tolerance = ε * config.refinement.tolerance_mult; + + // If preliminary check indicates that we are in bonds, and if it otherwise matches + // the insertion strategy, skip insertion. + if skip_by_rough_check && d.bounds().upper() <= keep_below { + None + } else { + // If the rough check didn't indicate no insertion needed, find maximising point. + d.maximise_above(maximise_above, refinement_tolerance, config.refinement.max_steps) + .map(|(ξ, v_ξ)| (ξ, v_ξ, v_ξ <= keep_below)) + } + } + + fn verify_merge_candidate<G, BT>( + &self, + d : &mut BTFN<F, G, BT, N>, + μ : &DiscreteMeasure<Loc<F, N>, F>, + τ : F, + ε : F, + config : &FBGenericConfig<F>, + ) -> bool + where BT : BTSearch<F, N, Agg=Bounds<F>>, + G : SupportGenerator<F, N, Id=BT::Data>, + G::SupportType : Mapping<Loc<F, N>,Codomain=F> + + LocalAnalysis<F, Bounds<F>, N> { + let τα = τ * self.α(); + let refinement_tolerance = ε * config.refinement.tolerance_mult; + let merge_tolerance = config.merge_tolerance_mult * ε; + let keep_below = τα + merge_tolerance; + let keep_supp_above = τα - merge_tolerance; + let bnd = d.bounds(); + + return ( + bnd.lower() >= keep_supp_above + || + μ.iter_spikes().map(|&DeltaMeasure{ α : β, ref x }| { + (β == 0.0) || d.apply(x) >= keep_supp_above + }).all(std::convert::identity) + ) && ( + bnd.upper() <= keep_below + || + d.has_upper_bound(keep_below, refinement_tolerance, config.refinement.max_steps) + ) + } + + fn target_bounds(&self, τ : F, ε : F) -> Option<Bounds<F>> { + let τα = τ * self.α(); + Some(Bounds(τα - ε, τα + ε)) + } + + fn tolerance_scaling(&self) -> F { + self.α() + } +} + +#[replace_float_literals(F::cast_from(literal))] +impl<F : Float + ToNalgebraRealField, const N : usize> SlidingRegTerm<F, N> +for NonnegRadonRegTerm<F> +where Cube<F, N> : P2Minimise<Loc<F, N>, F> { + + fn goodness<G, BT>( + &self, + d : &mut BTFN<F, G, BT, N>, + _μ : &DiscreteMeasure<Loc<F, N>, F>, + y : &Loc<F, N>, + z : &Loc<F, N>, + τ : F, + ε : F, + _config : &FBGenericConfig<F>, + ) -> F + where BT : BTSearch<F, N, Agg=Bounds<F>>, + G : SupportGenerator<F, N, Id=BT::Data>, + G::SupportType : Mapping<Loc<F, N>,Codomain=F> + + LocalAnalysis<F, Bounds<F>, N> { + //let w = |x| 1.0.min((ε + d.apply(x))/(τ * self.α())); + let τw = |x| τ.min((ε + d.apply(x))/self.α()); + τw(z) - τw(y) + } +} + +#[replace_float_literals(F::cast_from(literal))] +impl<F : Float + ToNalgebraRealField, const N : usize> RegTerm<F, N> for RadonRegTerm<F> +where Cube<F, N> : P2Minimise<Loc<F, N>, F> { + fn solve_findim( + &self, + mA : &DMatrix<F::MixedType>, + g : &DVector<F::MixedType>, + τ : F, + x : &mut DVector<F::MixedType>, + mA_normest: F, + ε : F, + config : &FBGenericConfig<F> + ) -> usize { + let inner_tolerance = ε * config.inner.tolerance_mult; + let inner_it = config.inner.iterator_options.stop_target(inner_tolerance); + let inner_τ = config.inner.τ0 / mA_normest; + quadratic_unconstrained(config.inner.method, mA, g, τ * self.α(), x, + inner_τ, inner_it) + } + + fn find_tolerance_violation<G, BT>( + &self, + d : &mut BTFN<F, G, BT, N>, + τ : F, + ε : F, + skip_by_rough_check : bool, + config : &FBGenericConfig<F>, + ) -> Option<(Loc<F, N>, F, bool)> + where BT : BTSearch<F, N, Agg=Bounds<F>>, + G : SupportGenerator<F, N, Id=BT::Data>, + G::SupportType : Mapping<Loc<F, N>,Codomain=F> + + LocalAnalysis<F, Bounds<F>, N> { + let τα = τ * self.α(); + let keep_below = τα + ε; + let keep_above = -τα - ε; + let maximise_above = τα + ε * config.insertion_cutoff_factor; + let minimise_below = -τα - ε * config.insertion_cutoff_factor; + let refinement_tolerance = ε * config.refinement.tolerance_mult; + + // If preliminary check indicates that we are in bonds, and if it otherwise matches + // the insertion strategy, skip insertion. + if skip_by_rough_check && Bounds(keep_above, keep_below).superset(&d.bounds()) { + None + } else { + // If the rough check didn't indicate no insertion needed, find maximising point. + let mx = d.maximise_above(maximise_above, refinement_tolerance, + config.refinement.max_steps); + let mi = d.minimise_below(minimise_below, refinement_tolerance, + config.refinement.max_steps); + + match (mx, mi) { + (None, None) => None, + (Some((ξ, v_ξ)), None) => Some((ξ, v_ξ, keep_below >= v_ξ)), + (None, Some((ζ, v_ζ))) => Some((ζ, v_ζ, keep_above <= v_ζ)), + (Some((ξ, v_ξ)), Some((ζ, v_ζ))) => { + if v_ξ - τα > τα - v_ζ { + Some((ξ, v_ξ, keep_below >= v_ξ)) + } else { + Some((ζ, v_ζ, keep_above <= v_ζ)) + } + } + } + } + } + + fn verify_merge_candidate<G, BT>( + &self, + d : &mut BTFN<F, G, BT, N>, + μ : &DiscreteMeasure<Loc<F, N>, F>, + τ : F, + ε : F, + config : &FBGenericConfig<F>, + ) -> bool + where BT : BTSearch<F, N, Agg=Bounds<F>>, + G : SupportGenerator<F, N, Id=BT::Data>, + G::SupportType : Mapping<Loc<F, N>,Codomain=F> + + LocalAnalysis<F, Bounds<F>, N> { + let τα = τ * self.α(); + let refinement_tolerance = ε * config.refinement.tolerance_mult; + let merge_tolerance = config.merge_tolerance_mult * ε; + let keep_below = τα + merge_tolerance; + let keep_above = -τα - merge_tolerance; + let keep_supp_pos_above = τα - merge_tolerance; + let keep_supp_neg_below = -τα + merge_tolerance; + let bnd = d.bounds(); + + return ( + (bnd.lower() >= keep_supp_pos_above && bnd.upper() <= keep_supp_neg_below) + || + μ.iter_spikes().map(|&DeltaMeasure{ α : β, ref x }| { + use std::cmp::Ordering::*; + match β.partial_cmp(&0.0) { + Some(Greater) => d.apply(x) >= keep_supp_pos_above, + Some(Less) => d.apply(x) <= keep_supp_neg_below, + _ => true, + } + }).all(std::convert::identity) + ) && ( + bnd.upper() <= keep_below + || + d.has_upper_bound(keep_below, refinement_tolerance, + config.refinement.max_steps) + ) && ( + bnd.lower() >= keep_above + || + d.has_lower_bound(keep_above, refinement_tolerance, + config.refinement.max_steps) + ) + } + + fn target_bounds(&self, τ : F, ε : F) -> Option<Bounds<F>> { + let τα = τ * self.α(); + Some(Bounds(-τα - ε, τα + ε)) + } + + fn tolerance_scaling(&self) -> F { + self.α() + } +} + +#[replace_float_literals(F::cast_from(literal))] +impl<F : Float + ToNalgebraRealField, const N : usize> SlidingRegTerm<F, N> +for RadonRegTerm<F> +where Cube<F, N> : P2Minimise<Loc<F, N>, F> { + + fn goodness<G, BT>( + &self, + d : &mut BTFN<F, G, BT, N>, + _μ : &DiscreteMeasure<Loc<F, N>, F>, + y : &Loc<F, N>, + z : &Loc<F, N>, + τ : F, + ε : F, + _config : &FBGenericConfig<F>, + ) -> F + where BT : BTSearch<F, N, Agg=Bounds<F>>, + G : SupportGenerator<F, N, Id=BT::Data>, + G::SupportType : Mapping<Loc<F, N>,Codomain=F> + + LocalAnalysis<F, Bounds<F>, N> { + + let α = self.α(); + // let w = |x| { + // let dx = d.apply(x); + // ((-ε + dx)/(τ * α)).max(1.0.min(ε + dx)/(τ * α)) + // }; + let τw = |x| { + let dx = d.apply(x); + ((-ε + dx)/α).max(τ.min(ε + dx)/α) + }; + τw(z) - τw(y) + } +} \ No newline at end of file
--- a/src/run.rs Fri Apr 28 13:15:19 2023 +0300 +++ b/src/run.rs Tue Dec 31 09:34:24 2024 -0500 @@ -31,10 +31,9 @@ use alg_tools::error::DynError; use alg_tools::tabledump::TableDump; use alg_tools::sets::Cube; -use alg_tools::mapping::RealMapping; +use alg_tools::mapping::{RealMapping, Differentiable}; use alg_tools::nalgebra_support::ToNalgebraRealField; use alg_tools::euclidean::Euclidean; -use alg_tools::norms::L1; use alg_tools::lingrid::lingrid; use alg_tools::sets::SetOrd; @@ -45,13 +44,16 @@ use crate::forward_model::*; use crate::fb::{ FBConfig, + FBGenericConfig, pointsource_fb_reg, - FBMetaAlgorithm, - FBGenericConfig, + pointsource_fista_reg, +}; +use crate::sliding_fb::{ + SlidingFBConfig, + pointsource_sliding_fb_reg }; use crate::pdps::{ PDPSConfig, - L2Squared, pointsource_pdps_reg, }; use crate::frank_wolfe::{ @@ -65,14 +67,25 @@ use crate::plot::*; use crate::{AlgorithmOverrides, CommandLineArgs}; use crate::tolerance::Tolerance; -use crate::regularisation::{Regularisation, RadonRegTerm, NonnegRadonRegTerm}; +use crate::regularisation::{ + Regularisation, + RadonRegTerm, + NonnegRadonRegTerm +}; +use crate::dataterm::{ + L1, + L2Squared +}; +use alg_tools::norms::L2; /// Available algorithms and their configurations #[derive(Copy, Clone, Debug, Serialize, Deserialize)] pub enum AlgorithmConfig<F : Float> { FB(FBConfig<F>), + FISTA(FBConfig<F>), FW(FWConfig<F>), PDPS(PDPSConfig<F>), + SlidingFB(SlidingFBConfig<F>), } fn unpack_tolerance<F : Float>(v : &Vec<F>) -> Tolerance<F> { @@ -104,6 +117,11 @@ insertion : override_fb_generic(fb.insertion), .. fb }), + FISTA(fb) => FISTA(FBConfig { + τ0 : cli.tau0.unwrap_or(fb.τ0), + insertion : override_fb_generic(fb.insertion), + .. fb + }), PDPS(pdps) => PDPS(PDPSConfig { τ0 : cli.tau0.unwrap_or(pdps.τ0), σ0 : cli.sigma0.unwrap_or(pdps.σ0), @@ -115,7 +133,12 @@ merging : cli.merging.clone().unwrap_or(fw.merging), tolerance : cli.tolerance.as_ref().map(unpack_tolerance).unwrap_or(fw.tolerance), .. fw - }) + }), + SlidingFB(sfb) => SlidingFB(SlidingFBConfig { + τ0 : cli.tau0.unwrap_or(sfb.τ0), + insertion : override_fb_generic(sfb.insertion), + .. sfb + }), } } } @@ -146,6 +169,9 @@ /// The μPDPS primal-dual proximal splitting method #[clap(name = "pdps")] PDPS, + /// The Sliding FB method + #[clap(name = "sliding_fb", alias = "sfb")] + SlidingFB, } impl DefaultAlgorithm { @@ -154,16 +180,14 @@ use DefaultAlgorithm::*; match *self { FB => AlgorithmConfig::FB(Default::default()), - FISTA => AlgorithmConfig::FB(FBConfig{ - meta : FBMetaAlgorithm::InertiaFISTA, - .. Default::default() - }), + FISTA => AlgorithmConfig::FISTA(Default::default()), FW => AlgorithmConfig::FW(Default::default()), FWRelax => AlgorithmConfig::FW(FWConfig{ variant : FWVariant::Relaxed, .. Default::default() }), PDPS => AlgorithmConfig::PDPS(Default::default()), + SlidingFB => AlgorithmConfig::SlidingFB(Default::default()), } } @@ -333,10 +357,20 @@ [usize; N] : Serialize, S : Sensor<F, N> + Copy + Serialize + std::fmt::Debug, P : Spread<F, N> + Copy + Serialize + std::fmt::Debug, - Convolution<S, P>: Spread<F, N> + Bounded<F> + LocalAnalysis<F, Bounds<F>, N> + Copy, + Convolution<S, P>: Spread<F, N> + Bounded<F> + LocalAnalysis<F, Bounds<F>, N> + Copy + // TODO: shold not have differentiability as a requirement, but + // decide availability of sliding based on it. + //+ for<'b> Differentiable<&'b Loc<F, N>, Output = Loc<F, N>>, + // TODO: very weird that rust only compiles with Differentiable + // instead of the above one on references, which is required by + // poitsource_sliding_fb_reg. + + Differentiable<Loc<F, N>, Output = Loc<F, N>> + + Lipschitz<L2>, + // <DefaultSG<F, S, P, N> as ForwardModel<Loc<F, N>, F>::PreadjointCodomain : for<'b> Differentiable<&'b Loc<F, N>, Output = Loc<F, N>>, AutoConvolution<P> : BoundedBy<F, K>, - K : SimpleConvolutionKernel<F, N> + LocalAnalysis<F, Bounds<F>, N> + K : SimpleConvolutionKernel<F, N> + LocalAnalysis<F, Bounds<F>, N> + Copy + Serialize + std::fmt::Debug, + //+ Differentiable<Loc<F, N>, Output = Loc<F, N>>, // TODO: shouldn't need to assume differentiability Cube<F, N>: P2Minimise<Loc<F, N>, F> + SetOrd, PlotLookup : Plotting<N>, DefaultBT<F, N> : SensorGridBT<F, S, P, N, Depth=DynamicDepth> + BTSearch<F, N>, @@ -513,6 +547,50 @@ } } }, + AlgorithmConfig::FISTA(ref algconfig) => { + match (regularisation, dataterm) { + (Regularisation::NonnegRadon(α), DataTerm::L2Squared) => { + running(); + pointsource_fista_reg( + &opA, &b, NonnegRadonRegTerm(α), &op𝒟, algconfig, + iterator, plotter + ) + }, + (Regularisation::Radon(α), DataTerm::L2Squared) => { + running(); + pointsource_fista_reg( + &opA, &b, RadonRegTerm(α), &op𝒟, algconfig, + iterator, plotter + ) + }, + _ => { + not_implemented(); + continue + } + } + }, + AlgorithmConfig::SlidingFB(ref algconfig) => { + match (regularisation, dataterm) { + (Regularisation::NonnegRadon(α), DataTerm::L2Squared) => { + running(); + pointsource_sliding_fb_reg( + &opA, &b, NonnegRadonRegTerm(α), &op𝒟, algconfig, + iterator, plotter + ) + }, + (Regularisation::Radon(α), DataTerm::L2Squared) => { + running(); + pointsource_sliding_fb_reg( + &opA, &b, RadonRegTerm(α), &op𝒟, algconfig, + iterator, plotter + ) + }, + _ => { + not_implemented(); + continue + } + } + }, AlgorithmConfig::PDPS(ref algconfig) => { running(); match (regularisation, dataterm) {
--- a/src/seminorms.rs Fri Apr 28 13:15:19 2023 +0300 +++ b/src/seminorms.rs Tue Dec 31 09:34:24 2024 -0500 @@ -368,11 +368,3 @@ make_convolutionsupportgenerator_unaryop!(Neg, neg); -/// Trait for indicating that `Self` is Lipschitz with respect to the seminorm `D`. -pub trait Lipschitz<D> { - /// The type of floats - type FloatType : Float; - - /// Returns the Lipschitz factor of `self` with respect to the seminorm `D`. - fn lipschitz_factor(&self, seminorm : &D) -> Option<Self::FloatType>; -}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/sliding_fb.rs Tue Dec 31 09:34:24 2024 -0500 @@ -0,0 +1,583 @@ +/*! +Solver for the point source localisation problem using a sliding +forward-backward splitting method. +*/ + +use numeric_literals::replace_float_literals; +use serde::{Serialize, Deserialize}; +//use colored::Colorize; +//use nalgebra::{DVector, DMatrix}; +use itertools::izip; +use std::iter::{Map, Flatten}; + +use alg_tools::iterate::{ + AlgIteratorFactory, + AlgIteratorState +}; +use alg_tools::euclidean::{ + Euclidean, + Dot +}; +use alg_tools::sets::Cube; +use alg_tools::loc::Loc; +use alg_tools::mapping::{Apply, Differentiable}; +use alg_tools::bisection_tree::{ + BTFN, + PreBTFN, + Bounds, + BTNodeLookup, + BTNode, + BTSearch, + P2Minimise, + SupportGenerator, + LocalAnalysis, + //Bounded, +}; +use alg_tools::mapping::RealMapping; +use alg_tools::nalgebra_support::ToNalgebraRealField; + +use crate::types::*; +use crate::measures::{ + DiscreteMeasure, + DeltaMeasure, +}; +use crate::measures::merging::{ + //SpikeMergingMethod, + SpikeMerging, +}; +use crate::forward_model::ForwardModel; +use crate::seminorms::DiscreteMeasureOp; +//use crate::tolerance::Tolerance; +use crate::plot::{ + SeqPlotter, + Plotting, + PlotLookup +}; +use crate::fb::*; +use crate::regularisation::SlidingRegTerm; +use crate::dataterm::{ + L2Squared, + //DataTerm, + calculate_residual, + calculate_residual2, +}; +use crate::transport::TransportLipschitz; + +/// Settings for [`pointsource_sliding_fb_reg`]. +#[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Debug)] +#[serde(default)] +pub struct SlidingFBConfig<F : Float> { + /// Step length scaling + pub τ0 : F, + /// Transport smoothness assumption + pub ℓ0 : F, + /// Inverse of the scaling factor $θ$ of the 2-norm-squared transport cost. + /// This means that $τθ$ is the step length for the transport step. + pub inverse_transport_scaling : F, + /// Factor for deciding transport reduction based on smoothness assumption violation + pub minimum_goodness_factor : F, + /// Maximum rays to retain in transports from each source. + pub maximum_rays : usize, + /// Generic parameters + pub insertion : FBGenericConfig<F>, +} + +#[replace_float_literals(F::cast_from(literal))] +impl<F : Float> Default for SlidingFBConfig<F> { + fn default() -> Self { + SlidingFBConfig { + τ0 : 0.99, + ℓ0 : 1.5, + inverse_transport_scaling : 1.0, + minimum_goodness_factor : 1.0, // TODO: totally arbitrary choice, + // should be scaled by problem data? + maximum_rays : 10, + insertion : Default::default() + } + } +} + +/// A transport ray (including various additional computational information). +#[derive(Clone, Debug)] +pub struct Ray<Domain, F : Num> { + /// The destination of the ray, and the mass. The source is indicated in a [`RaySet`]. + δ : DeltaMeasure<Domain, F>, + /// Goodness of the data term for the aray: $v(z)-v(y)-⟨∇v(x), z-y⟩ + ℓ‖z-y‖^2$. + goodness : F, + /// Goodness of the regularisation term for the ray: $w(z)-w(y)$. + /// Initially zero until $w$ can be constructed. + reg_goodness : F, + /// Indicates that this ray also forms a component in γ^{k+1} with the mass `to_return`. + to_return : F, +} + +/// A set of transport rays with the same source point. +#[derive(Clone, Debug)] +pub struct RaySet<Domain, F : Num> { + /// Source of every ray in thset + source : Domain, + /// Mass of the diagonal ray, with destination the same as the source. + diagonal: F, + /// Goodness of the data term for the diagonal ray with $z=x$: + /// $v(x)-v(y)-⟨∇v(x), x-y⟩ + ℓ‖x-y‖^2$. + diagonal_goodness : F, + /// Goodness of the data term for the diagonal ray with $z=x$: $w(x)-w(y)$. + diagonal_reg_goodness : F, + /// The non-diagonal rays. + rays : Vec<Ray<Domain, F>>, +} + +#[replace_float_literals(F::cast_from(literal))] +impl<Domain, F : Float> RaySet<Domain, F> { + fn non_diagonal_mass(&self) -> F { + self.rays + .iter() + .map(|Ray{ δ : DeltaMeasure{ α, .. }, .. }| *α) + .sum() + } + + fn total_mass(&self) -> F { + self.non_diagonal_mass() + self.diagonal + } + + fn targets<'a>(&'a self) + -> Map< + std::slice::Iter<'a, Ray<Domain, F>>, + fn(&'a Ray<Domain, F>) -> &'a DeltaMeasure<Domain, F> + > { + fn get_δ<'b, Domain, F : Float>(Ray{ δ, .. }: &'b Ray<Domain, F>) + -> &'b DeltaMeasure<Domain, F> { + δ + } + self.rays + .iter() + .map(get_δ) + } + + // fn non_diagonal_goodness(&self) -> F { + // self.rays + // .iter() + // .map(|&Ray{ δ : DeltaMeasure{ α, .. }, goodness, reg_goodness, .. }| { + // α * (goodness + reg_goodness) + // }) + // .sum() + // } + + // fn total_goodness(&self) -> F { + // self.non_diagonal_goodness() + (self.diagonal_goodness + self.diagonal_reg_goodness) + // } + + fn non_diagonal_badness(&self) -> F { + self.rays + .iter() + .map(|&Ray{ δ : DeltaMeasure{ α, .. }, goodness, reg_goodness, .. }| { + 0.0.max(- α * (goodness + reg_goodness)) + }) + .sum() + } + + fn total_badness(&self) -> F { + self.non_diagonal_badness() + + 0.0.max(- self.diagonal * (self.diagonal_goodness + self.diagonal_reg_goodness)) + } + + fn total_return(&self) -> F { + self.rays + .iter() + .map(|&Ray{ to_return, .. }| to_return) + .sum() + } +} + +#[replace_float_literals(F::cast_from(literal))] +impl<Domain : Clone, F : Num> RaySet<Domain, F> { + fn return_targets<'a>(&'a self) + -> Flatten<Map< + std::slice::Iter<'a, Ray<Domain, F>>, + fn(&'a Ray<Domain, F>) -> Option<DeltaMeasure<Domain, F>> + >> { + fn get_return<'b, Domain : Clone, F : Num>(ray: &'b Ray<Domain, F>) + -> Option<DeltaMeasure<Domain, F>> { + (ray.to_return != 0.0).then_some( + DeltaMeasure{x : ray.δ.x.clone(), α : ray.to_return} + ) + } + let tmp : Map< + std::slice::Iter<'a, Ray<Domain, F>>, + fn(&'a Ray<Domain, F>) -> Option<DeltaMeasure<Domain, F>> + > = self.rays + .iter() + .map(get_return); + tmp.flatten() + } +} + +/// Iteratively solve the pointsource localisation problem using sliding forward-backward +/// splitting +/// +/// The parametrisatio is as for [`pointsource_fb_reg`]. +/// Inertia is currently not supported. +#[replace_float_literals(F::cast_from(literal))] +pub fn pointsource_sliding_fb_reg<'a, F, I, A, GA, 𝒟, BTA, G𝒟, S, K, Reg, const N : usize>( + opA : &'a A, + b : &A::Observable, + reg : Reg, + op𝒟 : &'a 𝒟, + sfbconfig : &SlidingFBConfig<F>, + iterator : I, + mut plotter : SeqPlotter<F, N>, +) -> DiscreteMeasure<Loc<F, N>, F> +where F : Float + ToNalgebraRealField, + I : AlgIteratorFactory<IterInfo<F, N>>, + for<'b> &'b A::Observable : std::ops::Neg<Output=A::Observable>, + //+ std::ops::Mul<F, Output=A::Observable>, <-- FIXME: compiler overflow + A::Observable : std::ops::MulAssign<F>, + A::PreadjointCodomain : for<'b> Differentiable<&'b Loc<F, N>, Output=Loc<F, N>>, + GA : SupportGenerator<F, N, SupportType = S, Id = usize> + Clone, + A : ForwardModel<Loc<F, N>, F, PreadjointCodomain = BTFN<F, GA, BTA, N>> + + Lipschitz<&'a 𝒟, FloatType=F> + TransportLipschitz<L2Squared, FloatType=F>, + BTA : BTSearch<F, N, Data=usize, Agg=Bounds<F>>, + G𝒟 : SupportGenerator<F, N, SupportType = K, Id = usize> + Clone, + 𝒟 : DiscreteMeasureOp<Loc<F, N>, F, PreCodomain = PreBTFN<F, G𝒟, N>>, + 𝒟::Codomain : RealMapping<F, N>, + S: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N> + + Differentiable<Loc<F, N>, Output=Loc<F,N>>, + K: RealMapping<F, N> + LocalAnalysis<F, Bounds<F>, N>, + //+ Differentiable<Loc<F, N>, Output=Loc<F,N>>, + BTNodeLookup: BTNode<F, usize, Bounds<F>, N>, + Cube<F, N>: P2Minimise<Loc<F, N>, F>, + PlotLookup : Plotting<N>, + DiscreteMeasure<Loc<F, N>, F> : SpikeMerging<F>, + Reg : SlidingRegTerm<F, N> { + + assert!(sfbconfig.τ0 > 0.0 && + sfbconfig.inverse_transport_scaling > 0.0 && + sfbconfig.ℓ0 > 0.0); + + // Set up parameters + let config = &sfbconfig.insertion; + let op𝒟norm = op𝒟.opnorm_bound(); + let θ = sfbconfig.inverse_transport_scaling; + let τ = sfbconfig.τ0/opA.lipschitz_factor(&op𝒟).unwrap() + .max(opA.transport_lipschitz_factor(L2Squared) * θ); + let ℓ = sfbconfig.ℓ0; // TODO: v scaling? + // We multiply tolerance by τ for FB since our subproblems depending on tolerances are scaled + // by τ compared to the conditional gradient approach. + let tolerance = config.tolerance * τ * reg.tolerance_scaling(); + let mut ε = tolerance.initial(); + + // Initialise iterates + let mut μ : DiscreteMeasure<Loc<F, N>, F> = DiscreteMeasure::new(); + let mut μ_transported_base = DiscreteMeasure::new(); + let mut γ_hat : Vec<RaySet<Loc<F, N>, F>> = Vec::new(); // γ̂_k and extra info + let mut residual = -b; + let mut stats = IterInfo::new(); + + // Run the algorithm + iterator.iterate(|state| { + // Calculate smooth part of surrogate model. + // Using `std::mem::replace` here is not ideal, and expects that `empty_observable` + // has no significant overhead. For some reosn Rust doesn't allow us simply moving + // the residual and replacing it below before the end of this closure. + residual *= -τ; + let r = std::mem::replace(&mut residual, opA.empty_observable()); + let minus_τv = opA.preadjoint().apply(r); + + // Save current base point and shift μ to new positions. + let μ_base = μ.clone(); + for δ in μ.iter_spikes_mut() { + δ.x += minus_τv.differential(&δ.x) * θ; + } + let mut μ_transported = μ.clone(); + + assert_eq!(μ.len(), γ_hat.len()); + + // Calculate the goodness λ formed from γ_hat (≈ γ̂_k) and γ^{k+1}, where the latter + // transports points x from μ_base to points y in μ as shifted above, or “returns” + // them “home” to z given by the rays in γ_hat. Returning is necessary if the rays + // are not “good” for the smoothness assumptions, or if γ_hat has more mass than + // μ_base. + let mut total_goodness = 0.0; // data term goodness + let mut total_reg_goodness = 0.0; // regulariser goodness + let minimum_goodness = - ε * sfbconfig.minimum_goodness_factor; + + for (δ, r) in izip!(μ.iter_spikes(), γ_hat.iter_mut()) { + // Calculate data term goodness for all rays. + let &DeltaMeasure{ x : ref y, α : δ_mass } = δ; + let x = &r.source; + let mvy = minus_τv.apply(y); + let mdvx = minus_τv.differential(x); + let mut r_total_mass = 0.0; // Total mass of all rays with source r.source. + let mut bad_mass = 0.0; + let mut calc_goodness = |goodness : &mut F, reg_goodness : &mut F, α, z : &Loc<F, N>| { + *reg_goodness = 0.0; // Initial guess + *goodness = mvy - minus_τv.apply(z) + mdvx.dot(&(z-y)) + + ℓ * z.dist2_squared(&y); + total_goodness += *goodness * α; + r_total_mass += α; // TODO: should this include to_return from staging? (Probably not) + if *goodness < 0.0 { + bad_mass += α; + } + }; + for ray in r.rays.iter_mut() { + calc_goodness(&mut ray.goodness, &mut ray.reg_goodness, ray.δ.α, &ray.δ.x); + } + calc_goodness(&mut r.diagonal_goodness, &mut r.diagonal_reg_goodness, r.diagonal, x); + + // If the total mass of the ray set is less than that of μ at the same source, + // a diagonal component needs to be added to be able to (attempt to) transport + // all mass of μ. In the opposite case, we need to construct γ_{k+1} to ‘return’ + // the the extra mass of γ̂_k to the target z. We return mass from the oldest “bad” + // rays in the set. + if δ_mass >= r_total_mass { + r.diagonal += δ_mass - r_total_mass; + } else { + let mut reduce_transport = r_total_mass - δ_mass; + let mut good_needed = (bad_mass - reduce_transport).max(0.0); + // NOTE: reg_goodness is zero at this point, so it is not used in this code. + let mut reduce_ray = |goodness, to_return : Option<&mut F>, α : &mut F| { + if reduce_transport > 0.0 { + let return_amount = if goodness < 0.0 { + α.min(reduce_transport) + } else { + let amount = α.min(good_needed); + good_needed -= amount; + amount + }; + + if return_amount > 0.0 { + reduce_transport -= return_amount; + // Adjust total goodness by returned amount + total_goodness -= goodness * return_amount; + to_return.map(|tr| *tr += return_amount); + *α -= return_amount; + *α > 0.0 + } else { + true + } + } else { + true + } + }; + r.rays.retain_mut(|ray| { + reduce_ray(ray.goodness, Some(&mut ray.to_return), &mut ray.δ.α) + }); + // A bad diagonal is simply reduced without any 'return'. + // It was, after all, just added to match μ, but there is no need to match it. + // It's just a heuristic. + // TODO: Maybe a bad diagonal should be the first to go. + reduce_ray(r.diagonal_goodness, None, &mut r.diagonal); + } + } + + // Solve finite-dimensional subproblem several times until the dual variable for the + // regularisation term conforms to the assumptions made for the transport above. + let (d, within_tolerances) = 'adapt_transport: loop { + // If transport violates goodness requirements, shift it to ‘return’ mass to z, + // forcing y = z. Based on the badness of each ray set (sum of bad rays' goodness), + // we proportionally distribute the reductions to each ray set, and within each ray + // set, prioritise reducing the oldest bad rays' weight. + let tg = total_goodness + total_reg_goodness; + let adaptation_needed = minimum_goodness - tg; + if adaptation_needed > 0.0 { + let total_badness = γ_hat.iter().map(|r| r.total_badness()).sum(); + + let mut return_ray = |goodness : F, + reg_goodness : F, + to_return : Option<&mut F>, + α : &mut F, + left_to_return : &mut F| { + let g = goodness + reg_goodness; + assert!(*α >= 0.0 && *left_to_return >= 0.0); + if *left_to_return > 0.0 && g < 0.0 { + let return_amount = (*left_to_return / (-g)).min(*α); + *left_to_return -= (-g) * return_amount; + total_goodness -= goodness * return_amount; + total_reg_goodness -= reg_goodness * return_amount; + to_return.map(|tr| *tr += return_amount); + *α -= return_amount; + *α > 0.0 + } else { + true + } + }; + + for r in γ_hat.iter_mut() { + let mut left_to_return = adaptation_needed * r.total_badness() / total_badness; + if left_to_return > 0.0 { + for ray in r.rays.iter_mut() { + return_ray(ray.goodness, ray.reg_goodness, + Some(&mut ray.to_return), &mut ray.δ.α, &mut left_to_return); + } + return_ray(r.diagonal_goodness, r.diagonal_reg_goodness, + None, &mut r.diagonal, &mut left_to_return); + } + } + } + + // Construct μ_k + (π_#^1-π_#^0)γ_{k+1}. + // This can be broken down into + // + // μ_transported_base = [μ - π_#^0 (γ_shift + γ_return)] + π_#^1 γ_return, and + // μ_transported = π_#^1 γ_shift + // + // where γ_shift is our “true” γ_{k+1}, and γ_return is the return compoennt. + // The former can be constructed from δ.x and δ_new.x for δ in μ_base and δ_new in μ + // (which has already been shifted), and the mass stored in a γ_hat ray's δ measure + // The latter can be constructed from γ_hat rays' source and destination with the + // to_return mass. + // + // Note that μ_transported is constructed to have the same spike locations as μ, but + // to have same length as μ_base. This loop does not iterate over the spikes of μ + // (and corresponding transports of γ_hat) that have been newly added in the current + // 'adapt_transport loop. + for (δ, δ_transported, r) in izip!(μ_base.iter_spikes(), + μ_transported.iter_spikes_mut(), + γ_hat.iter()) { + let &DeltaMeasure{ref x, α} = δ; + debug_assert_eq!(*x, r.source); + let shifted_mass = r.total_mass(); + let ret_mass = r.total_return(); + // μ - π_#^0 (γ_shift + γ_return) + μ_transported_base += DeltaMeasure { x : *x, α : α - shifted_mass - ret_mass }; + // π_#^1 γ_return + μ_transported_base.extend(r.return_targets()); + // π_#^1 γ_shift + δ_transported.set_mass(shifted_mass); + } + // Calculate transported_minus_τv = -τA_*(A[μ_transported + μ_transported_base]-b) + let transported_residual = calculate_residual2(&μ_transported, + &μ_transported_base, + opA, b); + let transported_minus_τv = opA.preadjoint() + .apply(transported_residual); + + // Construct μ^{k+1} by solving finite-dimensional subproblems and insert new spikes. + let (mut d, within_tolerances) = insert_and_reweigh( + &mut μ, &transported_minus_τv, &μ_transported, Some(&μ_transported_base), + op𝒟, op𝒟norm, + τ, ε, + config, ®, state, &mut stats + ); + + // We have d = ω0 - τv - 𝒟μ = -𝒟(μ - μ^k) - τv; more precisely + // d = minus_τv + op𝒟.preapply(μ_diff(μ, μ_transported, config)); + // We “essentially” assume that the subdifferential w of the regularisation term + // satisfies w'(y)=0, so for a “goodness” estimate τ[w(y)-w(z)-w'(y)(z-y)] + // that incorporates the assumption, we need to calculate τ[w(z) - w(y)] for + // some w in the subdifferential of the regularisation term, such that + // -ε ≤ τw - d ≤ ε. This is done by [`RegTerm::goodness`]. + for r in γ_hat.iter_mut() { + for ray in r.rays.iter_mut() { + ray.reg_goodness = reg.goodness(&mut d, &μ, &r.source, &ray.δ.x, τ, ε, config); + total_reg_goodness += ray.reg_goodness * ray.δ.α; + } + } + + // If update of regularisation term goodness didn't invalidate minimum goodness + // requirements, we have found our step. Otherwise we need to keep reducing + // transport by repeating the loop. + if total_goodness + total_reg_goodness >= minimum_goodness { + break 'adapt_transport (d, within_tolerances) + } + }; + + // Update γ_hat to new location + for (δ_new, r) in izip!(μ.iter_spikes(), γ_hat.iter_mut()) { + // Prune rays that only had a return component, as the return component becomes + // a diagonal in γ̂^{k+1}. + r.rays.retain(|ray| ray.δ.α != 0.0); + // Otherwise zero out the return component, or stage rays for pruning + // to keep memory and computational demands reasonable. + let n_rays = r.rays.len(); + for (ray, ir) in izip!(r.rays.iter_mut(), (0..n_rays).rev()) { + if ir >= sfbconfig.maximum_rays { + // Only keep sfbconfig.maximum_rays - 1 previous rays, staging others for + // pruning in next step. + ray.to_return = ray.δ.α; + ray.δ.α = 0.0; + } else { + ray.to_return = 0.0; + } + ray.goodness = 0.0; // TODO: probably not needed + ray.reg_goodness = 0.0; + } + // Add a new ray for the currently diagonal component + if r.diagonal > 0.0 { + r.rays.push(Ray{ + δ : DeltaMeasure{x : r.source, α : r.diagonal}, + goodness : 0.0, + reg_goodness : 0.0, + to_return : 0.0, + }); + // TODO: Maybe this does not need to be done here, and is sufficent to to do where + // the goodness is calculated. + r.diagonal = 0.0; + } + r.diagonal_goodness = 0.0; + + // Shift source + r.source = δ_new.x; + } + // Extend to new spikes + γ_hat.extend(μ[γ_hat.len()..].iter().map(|δ_new| { + RaySet{ + source : δ_new.x, + rays : [].into(), + diagonal : 0.0, + diagonal_goodness : 0.0, + diagonal_reg_goodness : 0.0 + } + })); + + // Prune spikes with zero weight. This also moves the marginal differences of corresponding + // transports from γ_hat to γ_pruned_marginal_diff. + // TODO: optimise standard prune with swap_remove. + μ_transported_base.clear(); + let mut i = 0; + assert_eq!(μ.len(), γ_hat.len()); + while i < μ.len() { + if μ[i].α == F::ZERO { + μ.swap_remove(i); + let r = γ_hat.swap_remove(i); + μ_transported_base.extend(r.targets().cloned()); + μ_transported_base -= DeltaMeasure{ α : r.non_diagonal_mass(), x : r.source }; + } else { + i += 1; + } + } + + // TODO: how to merge? + + // Update residual + residual = calculate_residual(&μ, opA, b); + + // Update main tolerance for next iteration + let ε_prev = ε; + ε = tolerance.update(ε, state.iteration()); + stats.this_iters += 1; + + // Give function value if needed + state.if_verbose(|| { + // Plot if so requested + plotter.plot_spikes( + format!("iter {} end; {}", state.iteration(), within_tolerances), &d, + "start".to_string(), Some(&minus_τv), + reg.target_bounds(τ, ε_prev), &μ, + ); + // Calculate mean inner iterations and reset relevant counters. + // Return the statistics + let res = IterInfo { + value : residual.norm2_squared_div2() + reg.apply(&μ), + n_spikes : μ.len(), + ε : ε_prev, + postprocessing: config.postprocessing.then(|| μ.clone()), + .. stats + }; + stats = IterInfo::new(); + res + }) + }); + + postprocess(μ, config, L2Squared, opA, b) +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/transport.rs Tue Dec 31 09:34:24 2024 -0500 @@ -0,0 +1,17 @@ +/// Definitions related to optimal transport + +use crate::types::*; + +pub trait TransportLipschitz<Cost> { + /// Type of floats + type FloatType : Float; + + /// Returns the transport Lipschitz factor of Self. + /// + /// If `Self` is a linear operator $A$ on $ℳ(Ω)$, and `Cost` represents the spatial + /// cost function $c$, this factor $L$ is such that, for all $0 ≤ λ ∈ ℳ(Ω^2)$, + /// $$ + /// \norm{A(π_\#^1-π_\#^0)λ}^2 ≤ L^2 \norm{λ}_{ℳ(Ω^2)} ∫ c(x, y) dλ(x, y). + /// $$ + fn transport_lipschitz_factor(&self, cost : Cost) -> Self::FloatType; +}
--- a/src/types.rs Fri Apr 28 13:15:19 2023 +0300 +++ b/src/types.rs Tue Dec 31 09:34:24 2024 -0500 @@ -43,6 +43,22 @@ pub postprocessing : Option<DiscreteMeasure<Loc<F, N>, F>>, } +impl<F : Float, const N : usize> IterInfo<F, N> { + /// Initialise statistics with zeros. `ε` and `value` are unspecified. + pub fn new() -> Self { + IterInfo { + value : F::NAN, + n_spikes : 0, + this_iters : 0, + merged : 0, + pruned : 0, + inner_iters : 0, + ε : F::NAN, + postprocessing : None, + } + } +} + impl<F, const N : usize> LogRepr for IterInfo<F, N> where F : LogRepr + Float { fn logrepr(&self) -> ColoredString { format!("{}\t| N = {}, ε = {:.8}, inner_iters_mean = {}, merged+pruned_mean = {}+{}", @@ -95,3 +111,16 @@ } } } + +/// Type for indicating norm-2-squared data fidelity or transport cost. +#[derive(Clone, Copy, Serialize, Deserialize)] +pub struct L2Squared; + +/// Trait for indicating that `Self` is Lipschitz with respect to the (semi)norm `D`. +pub trait Lipschitz<D> { + /// The type of floats + type FloatType : Float; + + /// Returns the Lipschitz factor of `self` with respect to the (semi)norm `D`. + fn lipschitz_factor(&self, seminorm : D) -> Option<Self::FloatType>; +}