Fri, 18 Nov 2022 10:29:50 +0200
Improvements and minor fixes to bisection tree refinement.
--- a/src/bisection_tree/bt.rs Tue Nov 01 09:24:45 2022 +0200 +++ b/src/bisection_tree/bt.rs Fri Nov 18 10:29:50 2022 +0200 @@ -109,7 +109,7 @@ fn lower_or(&self) -> Self::Lower; /// Returns the numeric value of the depth - fn value(&self) -> usize; + fn value(&self) -> u32; } /// Dynamic (runtime) [`Depth`] for a [`BT`]. @@ -136,8 +136,8 @@ } #[inline] - fn value(&self) -> usize { - self.0 as usize + fn value(&self) -> u32 { + self.0 as u32 } } @@ -145,7 +145,7 @@ type Lower = Self; fn lower(&self) -> Option<Self::Lower> { None } fn lower_or(&self) -> Self::Lower { Const } - fn value(&self) -> usize { 0 } + fn value(&self) -> u32 { 0 } } macro_rules! impl_constdepth { @@ -154,7 +154,7 @@ type Lower = Const<{$n-1}>; fn lower(&self) -> Option<Self::Lower> { Some(Const) } fn lower_or(&self) -> Self::Lower { Const } - fn value(&self) -> usize { $n } + fn value(&self) -> u32 { $n } } )* }; } @@ -474,9 +474,9 @@ NodeOption::Branches(b) => { // FIXME: recursion that may cause stack overflow if the tree becomes // very deep, e.g. due to [`BTSearch::search_and_refine`]. - Arc::make_mut(b).insert(domain, d, new_leaf_depth.lower_or(), support, - task_budget); - b.summarise_into(&mut self.aggregator) + let bm = Arc::make_mut(b); + bm.insert(domain, d, new_leaf_depth.lower_or(), support, task_budget); + bm.summarise_into(&mut self.aggregator); }, } } @@ -517,11 +517,11 @@ NodeOption::Branches(b) => { // FIXME: recursion that may cause stack overflow if the tree becomes // very deep, e.g. due to [`BTSearch::search_and_refine`]. - let bnew = Arc::new(Arc::unwrap_or_clone(b).convert_aggregator(generator, domain)); + let bnew = Arc::unwrap_or_clone(b).convert_aggregator(generator, domain); let mut anew = ANew::new(); bnew.summarise_into(&mut anew); Node { - data : NodeOption::Branches(bnew), + data : NodeOption::Branches(Arc::new(bnew)), aggregator : anew, } } @@ -551,8 +551,9 @@ NodeOption::Branches(ref mut b) => { // FIXME: recursion that may cause stack overflow if the tree becomes // very deep, e.g. due to [`BTSearch::search_and_refine`]. - Arc::make_mut(b).refresh_aggregator(generator, domain, task_budget); - b.summarise_into(&mut self.aggregator); + let bm = Arc::make_mut(b); + bm.refresh_aggregator(generator, domain, task_budget); + bm.summarise_into(&mut self.aggregator); } } }
--- a/src/bisection_tree/btfn.rs Tue Nov 01 09:24:45 2022 +0200 +++ b/src/bisection_tree/btfn.rs Fri Nov 18 10:29:50 2022 +0200 @@ -573,7 +573,7 @@ &self, aggregator : &Bounds<F>, cube : &Cube<F, N>, - data : &Vec<G::Id>, + data : &[G::Id], generator : &G, step : usize ) -> RefinerResult<Bounds<F>, Self::Result> { @@ -630,7 +630,7 @@ &self, aggregator : &Bounds<F>, cube : &Cube<F, N>, - data : &Vec<G::Id>, + data : &[G::Id], generator : &G, step : usize ) -> RefinerResult<Bounds<F>, Self::Result> { @@ -658,7 +658,13 @@ // The data is refined enough, so return new hopefully better bounds // and the minimiser. let res = (x, v); - let bounds = Bounds(v, v); + let l = aggregator.lower(); + let bounds = if l > v { + eprintln!("imprecision!"); + Bounds(l, l) + } else { + Bounds(v, v) + }; RefinerResult::Uncertain(bounds, Some(res)) } } @@ -701,7 +707,7 @@ &self, aggregator : &Bounds<F>, _cube : &Cube<F, N>, - _data : &Vec<G::Id>, + _data : &[G::Id], _generator : &G, step : usize ) -> RefinerResult<Bounds<F>, Self::Result> { @@ -735,7 +741,7 @@ &self, aggregator : &Bounds<F>, _cube : &Cube<F, N>, - _data : &Vec<G::Id>, + _data : &[G::Id], _generator : &G, step : usize ) -> RefinerResult<Bounds<F>, Self::Result> { @@ -759,6 +765,21 @@ } } +// FIXME: The most likely reason for the “Refiner failure” expectation in the methods below +// is numerical inaccuracy: the `glb` maintained in `HeapContainer` (`refine.rs`) becomes bigger +// than the *upper bound* of nodes attempted to be inserted into the `heap` in the container. +// But the `glb` is there exactly to prevent that. Due to numerical inaccuracy, however, a +// newly subdivided node may have lower upper bound than the original lower bound that should +// have been above the `glb` since the node was picked from the queue. Due to the subdivision +// process, if a node whose lower bound is at the `glb` is picked, all of its refined subnodes +// should have lower bound at least the old `glb`, so in a single-threaded situation there should +// always be nodes above the `glb` in the queue. In a multi-threaded situation a node below the +// `glb` may be picked by some thread. When that happens, that thread inserts no new nodes into +// the queue. If the queue empties as a result of that, the thread goes to wait for other threads +// to produce results. Since some node had a node whose lower bound was above the `glb`, eventually +// there should be a result, or new nodes above the `glb` inserted into the queue. Then the waiting +// threads can also continue processing. If, however, numerical inaccuracy destroyes the `glb`, +// the queue may run out, and we get “Refiner failure”. impl<F : Float, G, BT, const N : usize> BTFN<F, G, BT, N> where BT : BTSearch<F, N, Agg=Bounds<F>>, G : SupportGenerator<F, N, Id=BT::Data>,
--- a/src/bisection_tree/refine.rs Tue Nov 01 09:24:45 2022 +0200 +++ b/src/bisection_tree/refine.rs Fri Nov 18 10:29:50 2022 +0200 @@ -2,7 +2,7 @@ use std::collections::BinaryHeap; use std::cmp::{PartialOrd, Ord, Ordering, Ordering::*, max}; use std::marker::PhantomData; -use std::sync::{Arc, Mutex, Condvar}; +use std::sync::{Arc, Mutex, MutexGuard, Condvar}; use crate::types::*; use crate::nanleast::NaNLeast; use crate::sets::Cube; @@ -126,7 +126,7 @@ &self, aggregator : &A, domain : &Cube<F, N>, - data : &Vec<G::Id>, + data : &[G::Id], generator : &G, step : usize, ) -> RefinerResult<A, Self::Result>; @@ -141,9 +141,13 @@ D : 'static, A : Aggregator, S : AggregatorSorting<Agg = A> { + /// Domain of `node` cube : Cube<F, N>, + /// Node to be refined node : &'a mut Node<F, D, A, N, P>, + /// Result and improve aggregator for the [`Refiner`] refiner_info : Option<(A, RResult)>, + /// For [`AggregatorSorting`] being used for the type system sorting : PhantomData<S>, } @@ -214,8 +218,8 @@ fn cmp(&self, other : &Self) -> Ordering { self.with_aggregator(|agg1| other.with_aggregator(|agg2| { match S::sort_upper(agg1).cmp(&S::sort_upper(agg2)) { - Equal => S::sort_lower(agg1).cmp(&S::sort_lower(agg2)), - order => order, + Equal => S::sort_lower(agg1).cmp(&S::sort_lower(agg2)), + order => order, } })) } @@ -230,13 +234,20 @@ Const<P> : BranchCount<N>, A : Aggregator, S : AggregatorSorting<Agg = A> { + /// Priority queue of nodes to be refined heap : BinaryHeap<RefinementInfo<'a, F, D, A, S, RResult, N, P>>, + /// Maximum of node sorting lower bounds seen in the heap glb : S::Sort, - glb_stale_counter : usize, - stale_insert_counter : usize, + /// Number of insertions in the heap since previous prune + insert_counter : usize, + /// If a result has been found by some refinment threat, it is stored here result : Option<RResult>, + /// Refinement step counter step : usize, - n_processing : usize + /// Number of threads currently processing (not sleeping) + n_processing : usize, + /// Threshold for heap pruning + heap_prune_threshold : usize, } impl<'a, F, D, A, S, RResult, const N : usize, const P : usize> @@ -248,14 +259,19 @@ S : AggregatorSorting<Agg = A> { /// Push `ri` into the [`BinaryHeap`]. Do greatest lower bound maintenance. - fn push(&mut self, ri : RefinementInfo<'a, F, D, A, S, RResult, N, P>) { + /// + /// Returns a boolean indicating whether the push was actually performed due to glb + /// filtering or not. + #[inline] + fn push(&mut self, ri : RefinementInfo<'a, F, D, A, S, RResult, N, P>) -> bool { if ri.sort_upper() >= self.glb { let l = ri.sort_lower(); self.heap.push(ri); self.glb = self.glb.max(l); - if self.glb_stale_counter > 0 { - self.stale_insert_counter += 1; - } + self.insert_counter += 1; + true + } else { + false } } } @@ -266,18 +282,17 @@ A : Aggregator, D : 'static + Copy + Send + Sync { - /// Stage all subnodes of `self` into the refinement queue [`container`]. + /// Stage all subnodes of `self` into the refinement queue `container`. fn stage_refine<'a, S, RResult>( &'a mut self, domain : Cube<F,N>, - container_arc : &Arc<Mutex<HeapContainer<'a, F, D, A, S, RResult, N, P>>>, + container : &mut HeapContainer<'a, F, D, A, S, RResult, N, P>, ) where S : AggregatorSorting<Agg = A> { - let mut container = container_arc.lock().unwrap(); // Insert all subnodes into the refinement heap. - for (node, subcube) in self.nodes_and_cubes_mut(&domain) { + for (node, cube) in self.nodes_and_cubes_mut(&domain) { container.push(RefinementInfo { - cube : subcube, - node : node, + cube, + node, refiner_info : None, sorting : PhantomData, }); @@ -300,74 +315,102 @@ /// [`Branches::stage_refine`]. /// /// `domain`, as usual, indicates the spatial area corresponding to `self`. - fn search_and_refine<'a, 'b, R, G>( + fn search_and_refine<'a, 'b, 'c, R, G>( self : &'a mut Self, domain : Cube<F,N>, refiner : &R, generator : &G, - container_arc : &Arc<Mutex<HeapContainer<'a, F, D, A, R::Sorting, R::Result, N, P>>>, + container_arc : &'c Arc<Mutex<HeapContainer<'a, F, D, A, R::Sorting, R::Result, N, P>>>, step : usize - ) -> Option<R::Result> + ) -> Result<R::Result, MutexGuard<'c, HeapContainer<'a, F, D, A, R::Sorting, R::Result, N, P>>> where R : Refiner<F, A, G, N>, G : SupportGenerator<F, N, Id=D>, G::SupportType : LocalAnalysis<F, A, N> { + //drop(container); + // Refine a leaf. - let res = if let NodeOption::Leaf(ref v) = &mut self.data { - let res = refiner.refine(&self.aggregator, &domain, v, generator, step); - if let NeedRefinement = res { - // The refiner has deemed the leaf unsufficiently refined, so subdivide - // it and add the new nodes into the refinement priority heap. - // We start iterating from the end to mix support_hint a bit. - let mut it = v.iter().rev(); - if let Some(&d) = it.next() { - // Construct new Branches - let support = generator.support_for(d); - let b = Arc::new({ - let mut b0 = Branches::new_with(&domain, &support); - b0.insert(&domain, d, Const::<1>, &support, TaskBudget::none()); + let res = match self.data { + NodeOption::Leaf(ref mut v) => { + let res = refiner.refine(&self.aggregator, &domain, v, generator, step); + if let NeedRefinement = res { + // The refiner has deemed the leaf unsufficiently refined, so subdivide + // it and add the new nodes into the refinement priority heap. + let mut it = v.iter(); + // Only create new branches if there's anything to add. + // We insert the last item first to mix the support_hint a bit. + if let Some(&d0) = it.next_back() { + // Construct new Branches + let support = generator.support_for(d0); + let mut b = Branches::new_with(&domain, &support); + b.insert(&domain, d0, Const::<1>, &support, TaskBudget::none()); for &d in it { let support = generator.support_for(d); // TODO: can we be smarter than just refining one level? - b0.insert(&domain, d, Const::<1>, &support, TaskBudget::none()); + b.insert(&domain, d, Const::<1>, &support, TaskBudget::none()); } - b0 - }); - // Update current node - b.summarise_into(&mut self.aggregator); - self.data = NodeOption::Branches(b); - // The branches will be inserted into the refinement priority queue below. + // Update current node and stage refinement of new branches. + b.summarise_into(&mut self.aggregator); + // FIXME: parent aggregators are not updated and will be out-of-date, but + // right now pointsource_algs is not really taking advantage of the + // aggregators being updated. Moreover, insertion and aggregator refinement + // code in `bt.rs` will overflow the stack in deeply nested trees. + // We nevertheless need to store `b` into `self` to be able to queue + // the branches. + self.data = NodeOption::Branches(Arc::new(b)); + // This ugly match is needed to keep the compiler happy about lifetimes. + match self.data { + NodeOption::Branches(ref mut arc_b) => { + let mut container = container_arc.lock().unwrap(); + // Safe: we just created arg_b and have a mutable exclusive + // reference to self containing it. + unsafe { Arc::get_mut_unchecked(arc_b) } + .stage_refine(domain, &mut *container); + return Err(container) + }, + _ => unreachable!("This cannot happen"), + } + } } - } - res - } else { - NeedRefinement + res + }, + NodeOption::Branches(ref mut b) => { + // Insert branches into refinement priority queue. + let mut container = container_arc.lock().unwrap(); + Arc::make_mut(b).stage_refine(domain, &mut *container); + return Err(container) + }, + NodeOption::Uninitialised => { + refiner.refine(&self.aggregator, &domain, &[], generator, step) + }, }; - if let Uncertain(agg, val) = res { - // The refiner gave an undertain result. Push a leaf back into the refinement queue - // with the new refined aggregator and custom return value. It will be popped and - // returned in the loop of [`BT::search_and_refine`] when there are no unrefined - // candidates that could potentially be better according to their basic aggregator. - let mut container = container_arc.lock().unwrap(); - container.push(RefinementInfo { - cube : domain, - node : self, - refiner_info : Some((agg, val)), - sorting : PhantomData, - }); - None - } else if let Certain(val) = res { - // The refiner gave a certain result so return it to allow early termination - Some(val) - } else if let NodeOption::Branches(ref mut b) = &mut self.data { - // Insert branches into refinement priority queue. - Arc::make_mut(b).stage_refine(domain, container_arc); - None - } else { - None + match res { + Uncertain(agg, val) => { + // The refiner gave an undertain result. Push a leaf back into the refinement queue + // with the new refined aggregator and custom return value. It will be popped and + // returned in the loop of [`BTSearch::search_and_refine`] when there are no + // unrefined candidates that could potentially be better according to their basic + // aggregator. + let mut container = container_arc.lock().unwrap(); + container.push(RefinementInfo { + cube : domain, + node : self, + refiner_info : Some((agg, val)), + sorting : PhantomData, + }); + Err(container) + }, + Certain(val) => { + // The refiner gave a certain result so return it to allow early termination + Ok(val) + }, + NeedRefinement => { + // This should only happen when we run into NodeOption::Uninitialised above. + // There's really nothing to do. + panic!("Do not know whow to refine uninitialised nodes"); + } } - } } @@ -378,7 +421,7 @@ pub trait BTSearch<F, const N : usize> : BTImpl<F, N> where F : Float { - /// Perform a search on [`Self`], as determined by `refiner`. + /// Perform a search on on `Self`, as determined by `refiner`. /// /// Nodes are inserted in a priority queue and processed in the order determined by the /// [`AggregatorSorting`] [`Refiner::Sorting`]. Leaf nodes are subdivided until the refiner @@ -398,7 +441,7 @@ } fn refinement_loop<F : Float, D, A, R, G, const N : usize, const P : usize> ( - condvar : Option<Arc<Condvar>>, + wakeup : Option<Arc<Condvar>>, refiner : &R, generator_arc : &Arc<G>, container_arc : &Arc<Mutex<HeapContainer<F, D, A, R::Sorting, R::Result, N, P>>>, @@ -410,93 +453,98 @@ D : 'static + Copy + Sync + Send + std::fmt::Debug { let mut did_park = true; + let mut container = container_arc.lock().unwrap(); 'main: loop { - let (ri, step) = { - let mut container = container_arc.lock().unwrap(); - let ri = 'get_next: loop { - if did_park { - container.n_processing += 1; - did_park = false; - } - - // Some thread has found a result, return - if container.result.is_some() { - container.n_processing -= 1; - break 'main - } + // Find a node to process + let ri = 'get_next: loop { + if did_park { + container.n_processing += 1; + did_park = false; + } - let ri = match container.heap.pop() { - Some(ri) => ri, - None => { - debug_assert!(container.n_processing > 0); - container.n_processing -= 1; - if container.n_processing == 0 { - break 'main; - } else if let Some(ref c) = condvar { - //eprintln!("Sleeping {t:?} {n} {worker_counter}\n", t=thread::current(), n=container.n_processing); - did_park = true; - container = c.wait(container).unwrap(); - continue 'get_next; - } else { - break 'main - } - } - }; - break ri; - }; - - let step = container.step; - container.step += 1; - - if let Some((_, result)) = ri.refiner_info { - // Terminate based on a “best possible” result. - container.result = Some(result); + // Some refinement task/thread has found a result, return + if container.result.is_some() { container.n_processing -= 1; break 'main } - if ri.sort_lower() >= container.glb { - container.glb_stale_counter += 1; - if container.stale_insert_counter + container.glb_stale_counter - > container.heap.len()/2 { - // GLB propery no longer correct. - match container.heap.iter().map(|ri| ri.sort_lower()).reduce(max) { - Some(glb) => { - container.glb = glb; - container.heap.retain(|ri| ri.sort_upper() >= glb); - }, - None => { - container.glb = R::Sorting::bottom() - } + match container.heap.pop() { + // There's work to be done. + Some(ri) => break 'get_next ri, + // No work to be done; park if some task/thread is still processing nodes, + // fail if not. + None => { + debug_assert!(container.n_processing > 0); + container.n_processing -= 1; + if container.n_processing == 0 { + break 'main; + } else if let Some(ref c) = wakeup { + did_park = true; + container = c.wait(container).unwrap(); + continue 'get_next; + } else { + break 'main } - container.glb_stale_counter = 0; - container.stale_insert_counter = 0; } - } - - (ri, step) + }; }; - let res = Node::search_and_refine(ri.node, ri.cube, refiner, &**generator_arc, - &container_arc, step); - if let Some(r) = res { - // Terminate based on a certain result from the refiner - let mut container = container_arc.lock().unwrap(); - if let &mut Some(ref mut r_prev) = &mut container.result { - R::fuse_results(r_prev, r); - } else { - container.result = Some(r); - } + let step = container.step; + container.step += 1; + + if let Some((_, result)) = ri.refiner_info { + // Terminate based on a “best possible” result. + container.result = Some(result); + container.n_processing -= 1; break 'main } - if let Some(ref c) = condvar { - c.notify_one(); + // Do priority queue maintenance + if container.insert_counter > container.heap_prune_threshold { + // Make sure glb is good. + match container.heap.iter().map(|ri| ri.sort_lower()).reduce(max) { + Some(glb) => { + container.glb = glb; + // Prune + container.heap.retain(|ri| ri.sort_upper() >= glb); + }, + None => { + container.glb = R::Sorting::bottom() + } + } + container.insert_counter = 0; } + + // Unlock the mutex… + drop(container); + + // … and process the node. We may get returned an already unlocked mutex. + match Node::search_and_refine(ri.node, ri.cube, refiner, &**generator_arc, + &container_arc, step) { + Ok(r) => { + let mut container = container_arc.lock().unwrap(); + // Terminate based on a certain result from the refiner + match container.result { + Some(ref mut r_prev) => R::fuse_results(r_prev, r), + None => container.result = Some(r), + } + break 'main + }, + Err(cnt) => { + container = cnt; + // Wake up another thread if one is sleeping; there should be now work in the + // queue. + if let Some(ref c) = wakeup { + c.notify_one(); + } + } + } + } - if let Some(ref c) = condvar { + // Make sure no task is sleeping + if let Some(ref c) = wakeup { c.notify_all(); } } @@ -507,7 +555,7 @@ impl<'a, M, F, D, A> BTSearch<F, $n> for BT<M,F,D,A,$n> - where //Self : BTImpl<F,$n,Data=D,Agg=A, Depth=M>, // <== automatically deduce to be implemented + where //Self : BTImpl<F,$n,Data=D,Agg=A, Depth=M>, // <== automatically deduced M : Depth, F : Float + Send, A : Aggregator, @@ -520,15 +568,15 @@ where R : Refiner<F, A, G, $n>, G : SupportGenerator<F, $n, Id=D>, G::SupportType : LocalAnalysis<F, A, $n> { - let mut init_container /*: HeapContainer<F, D, A, R::Sorting, R::Result, $n, {pow(2, $n)}>*/ - = HeapContainer { + let mut init_container = HeapContainer { heap : BinaryHeap::new(), glb : R::Sorting::bottom(), - glb_stale_counter : 0, - stale_insert_counter : 0, + insert_counter : 0, result : None, step : 0, n_processing : 0, + // An arbitrary threshold for starting pruning of the heap + heap_prune_threshold : 2u32.pow(16.max($n * self.depth.value())) as usize }; init_container.push(RefinementInfo { cube : self.domain, @@ -536,33 +584,31 @@ refiner_info : None, sorting : PhantomData, }); - // let n_workers = thread::available_parallelism() - // .map_or(1, core::num::NonZeroUsize::get); - let maybe_pool = thread_pool(); + let container_arc = Arc::new(Mutex::new(init_container)); - if let Some(pool) = maybe_pool { + if let Some(pool) = thread_pool() { let n = thread_pool_size(); pool.scope(|s| { - let condvar = Arc::new(Condvar::new()); - for _ in 0..n{ + let wakeup = Arc::new(Condvar::new()); + for _ in 0..n { let refiner_ref = &refiner; let container_t = Arc::clone(&container_arc); - let condvar_t = Arc::clone(&condvar); + let wakeup_t = Arc::clone(&wakeup); s.spawn(move |_| { - refinement_loop(Some(condvar_t), refiner_ref, generator, + refinement_loop(Some(wakeup_t), refiner_ref, generator, &container_t); }); } - refinement_loop(Some(condvar), &refiner, generator, &container_arc); + refinement_loop(Some(wakeup), &refiner, generator, &container_arc); }); } else { refinement_loop(None, &refiner, generator, &container_arc); } - Arc::try_unwrap(container_arc) - .map(|mtx| mtx.into_inner().unwrap().result) - .ok() - .flatten() + match Arc::try_unwrap(container_arc) { + Ok(mtx) => mtx.into_inner().unwrap().result, + Err(_) => panic!("Refinement threads not finished properly."), + } } } )* }