@@ -194,12 +194,14 @@ end
194194
195195function initialize! (neighborhood_search:: GridNeighborhoodSearch ,
196196 x:: AbstractMatrix , y:: AbstractMatrix ;
197- parallelization_backend = default_backend (x))
198- initialize_grid! (neighborhood_search, y; parallelization_backend)
197+ parallelization_backend = default_backend (x),
198+ eachindex_y = axes (y, 2 ))
199+ initialize_grid! (neighborhood_search, y; parallelization_backend, eachindex_y)
199200end
200201
201202function initialize_grid! (neighborhood_search:: GridNeighborhoodSearch , y:: AbstractMatrix ;
202- parallelization_backend = default_backend (y))
203+ parallelization_backend = default_backend (y),
204+ eachindex_y = axes (y, 2 ))
203205 (; cell_list) = neighborhood_search
204206
205207 empty! (cell_list)
@@ -210,8 +212,10 @@ function initialize_grid!(neighborhood_search::GridNeighborhoodSearch, y::Abstra
210212 return neighborhood_search
211213 end
212214
215+ @boundscheck checkbounds (y, eachindex_y)
216+
213217 # Ignore the parallelization backend here. This cannot be parallelized.
214- for point in axes (y, 2 )
218+ for point in eachindex_y
215219 # Get cell index of the point's cell
216220 point_coords = @inbounds extract_svector (y, Val (ndims (neighborhood_search)), point)
217221 cell = cell_coords (point_coords, neighborhood_search)
225229
226230function initialize_grid! (neighborhood_search:: GridNeighborhoodSearch {<: Any ,
227231 ParallelUpdate},
228- y:: AbstractMatrix ; parallelization_backend = default_backend (y))
232+ y:: AbstractMatrix ; parallelization_backend = default_backend (y),
233+ eachindex_y = axes (y, 2 ))
229234 (; cell_list) = neighborhood_search
230235
231236 empty! (cell_list)
@@ -236,7 +241,9 @@ function initialize_grid!(neighborhood_search::GridNeighborhoodSearch{<:Any,
236241 return neighborhood_search
237242 end
238243
239- @threaded parallelization_backend for point in axes (y, 2 )
244+ @boundscheck checkbounds (y, eachindex_y)
245+
246+ @threaded parallelization_backend for point in eachindex_y
240247 # Get cell index of the point's cell
241248 point_coords = @inbounds extract_svector (y, Val (ndims (neighborhood_search)), point)
242249 cell = cell_coords (point_coords, neighborhood_search)
@@ -250,19 +257,30 @@ end
250257
251258function update! (neighborhood_search:: GridNeighborhoodSearch ,
252259 x:: AbstractMatrix , y:: AbstractMatrix ;
253- points_moving = (true , true ), parallelization_backend = default_backend (x))
260+ points_moving = (true , true ), parallelization_backend = default_backend (x),
261+ eachindex_y = axes (y, 2 ))
254262 # The coordinates of the first set of points are irrelevant for this NHS.
255263 # Only update when the second set is moving.
256264 points_moving[2 ] || return neighborhood_search
257265
258- update_grid! (neighborhood_search, y; parallelization_backend)
266+ update_grid! (neighborhood_search, y; eachindex_y, parallelization_backend)
259267end
260268
261269# Update only with neighbor coordinates
262- function update_grid! (neighborhood_search:: GridNeighborhoodSearch{NDIMS} , y:: AbstractMatrix ;
263- parallelization_backend = default_backend (y)) where {NDIMS}
270+ function update_grid! (neighborhood_search:: Union {GridNeighborhoodSearch{<: Any ,
271+ SerialIncrementalUpdate},
272+ GridNeighborhoodSearch{<: Any ,
273+ SemiParallelUpdate}},
274+ y:: AbstractMatrix ;
275+ parallelization_backend = default_backend (y),
276+ eachindex_y = axes (y, 2 ))
264277 (; cell_list, update_buffer) = neighborhood_search
265278
279+ if eachindex_y != axes (y, 2 )
280+ # Incremental update doesn't support inactive points
281+ error (" this neighborhood search/update strategy does not support inactive points" )
282+ end
283+
266284 # Empty each thread's list
267285 @threaded parallelization_backend for i in eachindex (update_buffer)
268286 emptyat! (update_buffer, i)
@@ -302,33 +320,46 @@ function update_grid!(neighborhood_search::GridNeighborhoodSearch{NDIMS}, y::Abs
302320 return neighborhood_search
303321end
304322
305- # The type annotation is to make Julia specialize on the type of the function.
306- # Otherwise, unspecialized code will cause a lot of allocations and heavily impact performance.
307- # See https://docs.julialang.org/en/v1/manual/performance-tips/#Be-aware-of-when-Julia-avoids-specializing
308323@inline function mark_changed_cells! (neighborhood_search:: GridNeighborhoodSearch {<: Any ,
309324 SemiParallelUpdate},
310- y, parallelization_backend) where {T}
311- (; cell_list) = neighborhood_search
325+ y, parallelization_backend)
326+ (; cell_list, update_buffer ) = neighborhood_search
312327
313328 # `each_cell_index(cell_list)` might return a `KeySet`, which has to be `collect`ed
314- # first to be able to be used in a threaded loop. This function takes care of that.
315- @threaded parallelization_backend for cell_index in each_cell_index_threadable (cell_list)
316- mark_changed_cell! (neighborhood_search, cell_index, y)
329+ # first to support indexing.
330+ eachcell = each_cell_index_threadable (cell_list)
331+
332+ # Use chunks (usually one per thread) to index into the update buffer.
333+ # We cannot use `Iterators.partition` here, since the resulting iterator does not
334+ # support indexing and therefore cannot be used in a threaded loop.
335+ chunk_length = div (length (eachcell), length (update_buffer), RoundUp)
336+
337+ @threaded parallelization_backend for chunk_id in 1 : length (update_buffer)
338+ # Manual partitioning of `eachcell`
339+ start = (chunk_length * (chunk_id - 1 )) + 1
340+ end_ = min (chunk_length * chunk_id, length (eachcell))
341+
342+ for i in start: end_
343+ cell_index = eachcell[i]
344+
345+ mark_changed_cell! (neighborhood_search, cell_index, y, chunk_id)
346+ end
317347 end
318348end
319349
320350@inline function mark_changed_cells! (neighborhood_search:: GridNeighborhoodSearch {<: Any ,
321351 SerialIncrementalUpdate},
322- y, _) where {T}
352+ y, _)
323353 (; cell_list) = neighborhood_search
324354
325355 # Ignore the parallelization backend here for `SerialIncrementalUpdate`.
326356 for cell_index in each_cell_index (cell_list)
327- mark_changed_cell! (neighborhood_search, cell_index, y)
357+ # `chunk_id` is always `1` for `SerialIncrementalUpdate`
358+ mark_changed_cell! (neighborhood_search, cell_index, y, 1 )
328359 end
329360end
330361
331- @inline function mark_changed_cell! (neighborhood_search, cell_index, y)
362+ @inline function mark_changed_cell! (neighborhood_search, cell_index, y, chunk_id )
332363 (; cell_list, update_buffer) = neighborhood_search
333364
334365 for point in cell_list[cell_index]
340371 # These can be identical (see `DictionaryCellList`).
341372 if ! is_correct_cell (cell_list, cell, cell_index)
342373 # Mark this cell and continue with the next one
343- pushat! (update_buffer, Threads . threadid () , cell_index)
374+ pushat! (update_buffer, chunk_id , cell_index)
344375 break
345376 end
346377 end
349380# Fully parallel incremental update with atomic push.
350381function update_grid! (neighborhood_search:: GridNeighborhoodSearch {<: Any ,
351382 ParallelIncrementalUpdate},
352- y:: AbstractMatrix ; parallelization_backend = default_backend (y))
383+ y:: AbstractMatrix ; parallelization_backend = default_backend (y),
384+ eachindex_y = axes (y, 2 ))
353385 (; cell_list, update_buffer) = neighborhood_search
354386
387+ if eachindex_y != axes (y, 2 )
388+ # Incremental update doesn't support inactive points
389+ error (" this neighborhood search/update strategy does not support inactive points" )
390+ end
391+
355392 # Note that we need two separate loops for adding and removing points.
356393 # `push_cell_atomic!` only guarantees thread-safety when different threads push
357394 # simultaneously, but it does not work when `deleteat_cell!` is called at the same time.
@@ -405,8 +442,9 @@ function update_grid!(neighborhood_search::Union{GridNeighborhoodSearch{<:Any,
405442 ParallelUpdate},
406443 GridNeighborhoodSearch{<: Any ,
407444 SerialUpdate}},
408- y:: AbstractMatrix ; parallelization_backend = default_backend (y))
409- initialize_grid! (neighborhood_search, y; parallelization_backend)
445+ y:: AbstractMatrix ; parallelization_backend = default_backend (y),
446+ eachindex_y = axes (y, 2 ))
447+ initialize_grid! (neighborhood_search, y; parallelization_backend, eachindex_y)
410448end
411449
412450function check_collision (neighbor_cell_, neighbor_coords, cell_list, nhs)
0 commit comments