@@ -54,8 +54,7 @@ function load_neighbor_region(arr, region_code::NTuple{N,Int}, neigh_dist) where
5454 lastindex (arr, i)
5555 end
5656 end )
57- # FIXME : Don't collect
58- return move (task_processor (), collect (@view arr[start_idx: stop_idx]))
57+ return move (task_processor (), copy (@view arr[start_idx: stop_idx]))
5958end
6059
6160# In-place variant: load region directly into a pre-allocated destination buffer.
@@ -176,8 +175,9 @@ function load_boundary_region(pad::Pad, arr, region_code::NTuple{N,Int}, neigh_d
176175 region_size = ntuple (N) do i
177176 region_code[i] == 0 ? size (arr, i) : get_neigh_dist (neigh_dist, i)
178177 end
179- # FIXME : return Fill(pad.padval, region_size)
180- return move (task_processor (), fill (pad. padval, region_size))
178+ result = similar (arr, region_size... )
179+ fill! (result, pad. padval)
180+ return move (task_processor (), result)
181181end
182182
183183load_boundary_region_into! (dest, pad:: Pad , arr, region_code, neigh_dist, boundary_dims) =
@@ -462,7 +462,7 @@ function load_boundary_region(::Reflect{Symm}, arr, region_code::NTuple{N,Int},
462462 end
463463 end )
464464
465- region = move (task_processor (), collect (@view arr[start_idx: stop_idx]))
465+ region = move (task_processor (), copy (@view arr[start_idx: stop_idx]))
466466
467467 # Reverse only along dimensions that are actually being reflected
468468 # (both non-zero in region_code AND past boundary)
@@ -649,6 +649,123 @@ end
649649# Chunk Selection and Halo Building
650650# ############################################################################
651651
652+ function load_neighborhood_halos (chunks, idx, neigh_dist, boundary)
653+ validate_neigh_dist (neigh_dist)
654+
655+ N = ndims (chunks)
656+ chunk_dist = 1
657+ nhalos = 3 ^ N - 1
658+ halos = Vector {Any} (undef, nhalos)
659+ h = 0
660+
661+ for i in 0 : (3 ^ N - 1 )
662+ region_code = ntuple (N) do d
663+ ((i ÷ 3 ^ (d- 1 )) % 3 ) - 1
664+ end
665+ all (== (0 ), region_code) && continue
666+ h += 1
667+
668+ chunk_offset = CartesianIndex (ntuple (N) do d
669+ region_code[d] * chunk_dist
670+ end )
671+ new_idx = idx + chunk_offset
672+
673+ if is_past_boundary (size (chunks), new_idx)
674+ boundary_dims = ntuple (N) do d
675+ new_idx[d] < 1 || new_idx[d] > size (chunks)[d]
676+ end
677+ if boundary_has_transition (boundary)
678+ new_idx = boundary_transition (boundary, new_idx, size (chunks))
679+ else
680+ new_idx = idx
681+ end
682+ chunk = chunks[new_idx]
683+ halos[h] = load_boundary_region (boundary, chunk, region_code, neigh_dist, boundary_dims)
684+ else
685+ chunk = chunks[new_idx]
686+ halos[h] = load_neighbor_region (chunk, region_code, neigh_dist)
687+ end
688+ end
689+
690+ @assert h == nhalos
691+ return Tuple (halos)
692+ end
693+
694+ function load_neighborhood_halos_from_deps (deps, idx, chunk_size, neigh_dist, boundary)
695+ validate_neigh_dist (neigh_dist)
696+
697+ N = length (chunk_size)
698+ chunk_dist = 1
699+ nhalos = 3 ^ N - 1
700+ halos = Vector {Any} (undef, nhalos)
701+ h = 0
702+
703+ for i in 0 : (3 ^ N - 1 )
704+ region_code = ntuple (N) do d
705+ ((i ÷ 3 ^ (d- 1 )) % 3 ) - 1
706+ end
707+ all (== (0 ), region_code) && continue
708+ h += 1
709+
710+ chunk_offset = CartesianIndex (ntuple (N) do d
711+ region_code[d] * chunk_dist
712+ end )
713+ new_idx = idx + chunk_offset
714+
715+ chunk = deps[h+ 1 ]
716+ if is_past_boundary (chunk_size, new_idx)
717+ boundary_dims = ntuple (N) do d
718+ new_idx[d] < 1 || new_idx[d] > chunk_size[d]
719+ end
720+ halos[h] = load_boundary_region (boundary, chunk, region_code, neigh_dist, boundary_dims)
721+ else
722+ halos[h] = load_neighbor_region (chunk, region_code, neigh_dist)
723+ end
724+ end
725+
726+ @assert h == nhalos
727+ return Tuple (halos)
728+ end
729+
730+ function select_neighborhood_chunk_deps (chunks, idx, neigh_dist, boundary)
731+ validate_neigh_dist (neigh_dist)
732+
733+ N = ndims (chunks)
734+ chunk_dist = 1
735+
736+ accesses = Any[chunks[idx]]
737+
738+ for i in 0 : (3 ^ N - 1 )
739+ region_code = ntuple (N) do d
740+ ((i ÷ 3 ^ (d- 1 )) % 3 ) - 1
741+ end
742+ all (== (0 ), region_code) && continue
743+
744+ chunk_offset = CartesianIndex (ntuple (N) do d
745+ region_code[d] * chunk_dist
746+ end )
747+ new_idx = idx + chunk_offset
748+
749+ if is_past_boundary (size (chunks), new_idx)
750+ if boundary_has_transition (boundary)
751+ new_idx = boundary_transition (boundary, new_idx, size (chunks))
752+ else
753+ new_idx = idx
754+ end
755+ end
756+ push! (accesses, chunks[new_idx])
757+ end
758+
759+ @assert length (accesses) == 3 ^ N
760+ return accesses
761+ end
762+
763+ function build_chunk_halo (neigh_dist, boundary, idx, chunk_size, own_center:: Bool , read_deps... )
764+ center = read_deps[1 ]
765+ halos = load_neighborhood_halos_from_deps (read_deps, idx, chunk_size, neigh_dist, boundary)
766+ return build_halo (neigh_dist, boundary, center, halos... ; own_center= own_center)
767+ end
768+
652769function select_neighborhood_chunks (chunks, idx, neigh_dist, boundary)
653770 validate_neigh_dist (neigh_dist)
654771
698815
699816# Returns (region_metadata, neighbor_chunk_dtasks) without spawning intermediate load tasks.
700817# region_metadata: Vector of (region_code, is_boundary, boundary_dims).
701- # neighbor_chunk_dtasks: Vector of raw chunk DTasks (resolved to arrays when build_halo_consolidated runs).
818+ # neighbor_chunk_dtasks: Vector of raw chunk DTasks (resolved to arrays when build_halo_new runs).
702819function select_neighborhood_info (chunks, idx, neigh_dist, boundary)
703820 validate_neigh_dist (neigh_dist)
704821 N = ndims (chunks)
@@ -782,7 +899,7 @@ function build_halo_new(neigh_dist, boundary, center, region_metadata, neighbor_
782899 is_boundary ? load_boundary_region (boundary, chunk, region_code, neigh_dist, boundary_dims) :
783900 load_neighbor_region (chunk, region_code, neigh_dist)
784901 end
785- return HaloArray (copy (center), halos, halo_width)
902+ return HaloArray (copy (center), halos, halo_width; own_center = true )
786903end
787904
788905# Cache-hit path: fill an existing HaloArray in-place and return it. No cache operations —
@@ -803,11 +920,12 @@ function fill_halo_inplace!(halo::HaloArray, neigh_dist, boundary, center, regio
803920 return halo
804921end
805922
806- function build_halo (neigh_dist, boundary, center, all_halos... )
923+ function build_halo (neigh_dist, boundary, center, all_halos... ; own_center :: Bool = false )
807924 N = ndims (center)
808925 expected_halos = 3 ^ N - 1
809926 @assert length (all_halos) == expected_halos " Halo mismatch: N=$N expected $expected_halos halos, got $(length (all_halos)) "
810- return HaloArray (copy (center), (all_halos... ,), ntuple (i-> get_neigh_dist (neigh_dist, i), N))
927+ center_data = own_center ? copy (center) : center
928+ return HaloArray (center_data, (all_halos... ,), ntuple (i-> get_neigh_dist (neigh_dist, i), N); own_center)
811929end
812930
813931function load_neighborhood (arr:: HaloArray{T,N} , idx) where {T,N}
0 commit comments