Skip to content

Commit 1965160

Browse files
committed
DArray/stencil: Automatically free HaloArray cache
1 parent 1682f3f commit 1965160

1 file changed

Lines changed: 24 additions & 8 deletions

File tree

src/array/stencil.jl

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -729,15 +729,16 @@ function select_neighborhood_info(chunks, idx, neigh_dist, boundary)
729729
return region_metadata, neighbor_chunks
730730
end
731731

732-
# Per-thread cache: IdDict{DArray, Dict{(chunk_idx, halo_width), HaloArray}}.
733-
# Using IdDict for the outer level ensures two DArrays with identical element types and
734-
# chunk shapes never share a buffer. Using chunk_idx as part of the inner key ensures that
735-
# within one DArray, every chunk has its own dedicated buffer — so if a single worker thread
736-
# processes multiple same-shaped chunks in the same iteration (sequentially), each gets a
737-
# distinct HaloArray and there is no aliasing with a concurrently running inner-stencil task.
732+
# Per-thread cache: WeakKeyDict{DArray, Dict{(chunk_idx, halo_width), HaloArray}}.
733+
# WeakKeyDict is used for the outer level so that the cache does not hold a strong reference
734+
# to the source DArray — allowing its GC finalizer to fire when user code drops its last
735+
# reference (see below). Using chunk_idx as part of the inner key ensures that within one
736+
# DArray, every chunk has its own dedicated buffer — so if a single worker thread processes
737+
# multiple same-shaped chunks in the same iteration sequentially, each gets a distinct
738+
# HaloArray and there is no aliasing with a concurrently running inner-stencil task.
738739
# Filling a cached buffer in-place is safe because spawn_datadeps blocks until all inner
739740
# tasks complete before the next iteration's build_halo_consolidated calls run.
740-
const HALO_ARRAY_CACHE = TaskLocalValue{IdDict{Any,Dict{Any,Any}}}(()->IdDict{Any,Dict{Any,Any}}())
741+
const HALO_ARRAY_CACHE = TaskLocalValue{WeakKeyDict{Any,Dict{Any,Any}}}(()->WeakKeyDict{Any,Dict{Any,Any}}())
741742

742743
# Consolidated halo builder: loads all neighbor regions directly into a HaloArray.
743744
# `read_darray` and `chunk_idx` are used solely for cache lookup — they are not DTask
@@ -752,7 +753,22 @@ function build_halo_consolidated(read_darray, chunk_idx, neigh_dist, boundary, c
752753
halo_width = ntuple(i -> get_neigh_dist(neigh_dist, i), N)
753754

754755
outer_cache = HALO_ARRAY_CACHE[]
755-
inner_cache = get!(outer_cache, read_darray) do; Dict{Any,Any}(); end
756+
757+
# Create the inner cache on first encounter of this DArray on this thread, and register
758+
# a finalizer that captures it. When the DArray becomes unreachable and is collected,
759+
# the finalizer fires and unsafe_free!s every cached HaloArray for this (DArray, thread)
760+
# pair. Because WeakKeyDict holds only a weak reference to read_darray, the DArray can
761+
# actually be collected (a plain IdDict would keep it alive forever).
762+
if !haskey(outer_cache, read_darray)
763+
inner_cache = Dict{Any,Any}()
764+
outer_cache[read_darray] = inner_cache
765+
finalizer(read_darray) do _
766+
for halo in values(inner_cache)
767+
unsafe_free!(halo)
768+
end
769+
end
770+
end
771+
inner_cache = outer_cache[read_darray]
756772
cache_key = (chunk_idx, halo_width)
757773

758774
if haskey(inner_cache, cache_key)

0 commit comments

Comments
 (0)