@@ -729,15 +729,16 @@ function select_neighborhood_info(chunks, idx, neigh_dist, boundary)
729729 return region_metadata, neighbor_chunks
730730end
731731
732- # Per-thread cache: IdDict{DArray, Dict{(chunk_idx, halo_width), HaloArray}}.
733- # Using IdDict for the outer level ensures two DArrays with identical element types and
734- # chunk shapes never share a buffer. Using chunk_idx as part of the inner key ensures that
735- # within one DArray, every chunk has its own dedicated buffer — so if a single worker thread
736- # processes multiple same-shaped chunks in the same iteration (sequentially), each gets a
737- # distinct HaloArray and there is no aliasing with a concurrently running inner-stencil task.
732+ # Per-thread cache: WeakKeyDict{DArray, Dict{(chunk_idx, halo_width), HaloArray}}.
733+ # WeakKeyDict is used for the outer level so that the cache does not hold a strong reference
734+ # to the source DArray — allowing its GC finalizer to fire when user code drops its last
735+ # reference (see below). Using chunk_idx as part of the inner key ensures that within one
736+ # DArray, every chunk has its own dedicated buffer — so if a single worker thread processes
737+ # multiple same-shaped chunks in the same iteration sequentially, each gets a distinct
738+ # HaloArray and there is no aliasing with a concurrently running inner-stencil task.
738739# Filling a cached buffer in-place is safe because spawn_datadeps blocks until all inner
739740# tasks complete before the next iteration's build_halo_consolidated calls run.
740- const HALO_ARRAY_CACHE = TaskLocalValue {IdDict {Any,Dict{Any,Any}}} (()-> IdDict {Any,Dict{Any,Any}} ())
741+ const HALO_ARRAY_CACHE = TaskLocalValue {WeakKeyDict {Any,Dict{Any,Any}}} (()-> WeakKeyDict {Any,Dict{Any,Any}} ())
741742
742743# Consolidated halo builder: loads all neighbor regions directly into a HaloArray.
743744# `read_darray` and `chunk_idx` are used solely for cache lookup — they are not DTask
@@ -752,7 +753,22 @@ function build_halo_consolidated(read_darray, chunk_idx, neigh_dist, boundary, c
752753 halo_width = ntuple (i -> get_neigh_dist (neigh_dist, i), N)
753754
754755 outer_cache = HALO_ARRAY_CACHE[]
755- inner_cache = get! (outer_cache, read_darray) do ; Dict {Any,Any} (); end
756+
757+ # Create the inner cache on first encounter of this DArray on this thread, and register
758+ # a finalizer that captures it. When the DArray becomes unreachable and is collected,
759+ # the finalizer fires and unsafe_free!s every cached HaloArray for this (DArray, thread)
760+ # pair. Because WeakKeyDict holds only a weak reference to read_darray, the DArray can
761+ # actually be collected (a plain IdDict would keep it alive forever).
762+ if ! haskey (outer_cache, read_darray)
763+ inner_cache = Dict {Any,Any} ()
764+ outer_cache[read_darray] = inner_cache
765+ finalizer (read_darray) do _
766+ for halo in values (inner_cache)
767+ unsafe_free! (halo)
768+ end
769+ end
770+ end
771+ inner_cache = outer_cache[read_darray]
756772 cache_key = (chunk_idx, halo_width)
757773
758774 if haskey (inner_cache, cache_key)
0 commit comments