Skip to content

Commit c47be2d

Browse files
committed
Order multi-GPU test streams
* Order multi-GPU test streams The unified-memory verification tests exercise real cross-GPU launches after preparing source arrays on another CUDA device stream. The source allocation and copy are stream-ordered work, but the peer launch stream had no explicit dependency on that source stream. Order the peer stream after the source stream before the checked launch. For mempool-backed source arrays, keep normal peer access enabled in the positive execution cases while the companion rejection test disables only mempool access. That keeps the real launch path covered and makes mempool access the tested verifier difference without running the peer kernel in a brittle peer-disabled pool-access state. Signed-off-by: Eric Shi <ershi@nvidia.com> Approved-by: Eric Shi <ershi@nvidia.com> See merge request omniverse/warp!2399
1 parent 6a99134 commit c47be2d

1 file changed

Lines changed: 11 additions & 8 deletions

File tree

warp/tests/cuda/test_unified_memory.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ def test_unified_memory_verify_uses_peer_access_for_default_cuda_allocations(sel
482482
self.assertTrue(wp.can_access(peer_device, src))
483483

484484
wp.load_module(device=peer_device)
485-
wp.synchronize_device(target_device)
485+
peer_device.stream.wait_stream(target_device.stream)
486486
with launch_verification_mode(wp.LaunchVerificationMode.CHECKED):
487487
wp.launch(read_cpu_write_gpu, dim=n, inputs=[src], outputs=[dst], device=peer_device)
488488

@@ -518,7 +518,7 @@ def test_unified_memory_verify_uses_parent_allocator_for_default_cuda_slices(sel
518518
self.assertIs(src._ref, src_base)
519519

520520
wp.load_module(device=peer_device)
521-
wp.synchronize_device(target_device)
521+
peer_device.stream.wait_stream(target_device.stream)
522522
with launch_verification_mode(wp.LaunchVerificationMode.CHECKED):
523523
wp.launch(read_cpu_write_gpu, dim=n, inputs=[src], outputs=[dst], device=peer_device)
524524

@@ -565,8 +565,11 @@ def test_unified_memory_verify_uses_mempool_access_for_cuda_mempool_allocations(
565565
"""CUDA mempool allocations use mempool-access state for cross-GPU verification.
566566
567567
An array allocated while the source device's mempool is enabled needs
568-
the CUDA mempool access predicate. This test keeps peer access disabled
569-
so acceptance can only come from the allocation-specific mempool rule.
568+
the CUDA mempool access predicate. The companion rejection test keeps
569+
peer access enabled while mempool access is disabled, so the pair
570+
isolates the allocation-specific mempool rule without executing this
571+
peer kernel in a recently changed pool-access state with peer access
572+
disabled.
570573
"""
571574

572575
target_device, peer_device = get_cuda_device_pair_with_mempool_access_support()
@@ -575,7 +578,7 @@ def test_unified_memory_verify_uses_mempool_access_for_cuda_mempool_allocations(
575578
peer_access_saved = wp.is_peer_access_enabled(target_device, peer_device)
576579
mempool_access_saved = wp.is_mempool_access_enabled(target_device, peer_device)
577580
try:
578-
wp.set_peer_access_enabled(target_device, peer_device, False)
581+
wp.set_peer_access_enabled(target_device, peer_device, True)
579582
wp.set_mempool_access_enabled(target_device, peer_device, True)
580583

581584
with wp.ScopedMempool(target_device, True):
@@ -586,7 +589,7 @@ def test_unified_memory_verify_uses_mempool_access_for_cuda_mempool_allocations(
586589
self.assertTrue(wp.can_access(peer_device, src))
587590

588591
wp.load_module(device=peer_device)
589-
wp.synchronize_device(target_device)
592+
peer_device.stream.wait_stream(target_device.stream)
590593
with launch_verification_mode(wp.LaunchVerificationMode.CHECKED):
591594
wp.launch(read_cpu_write_gpu, dim=n, inputs=[src], outputs=[dst], device=peer_device)
592595

@@ -612,7 +615,7 @@ def test_unified_memory_verify_uses_parent_allocator_for_cuda_mempool_slices(sel
612615
peer_access_saved = wp.is_peer_access_enabled(target_device, peer_device)
613616
mempool_access_saved = wp.is_mempool_access_enabled(target_device, peer_device)
614617
try:
615-
wp.set_peer_access_enabled(target_device, peer_device, False)
618+
wp.set_peer_access_enabled(target_device, peer_device, True)
616619
wp.set_mempool_access_enabled(target_device, peer_device, True)
617620

618621
with wp.ScopedMempool(target_device, True):
@@ -624,7 +627,7 @@ def test_unified_memory_verify_uses_parent_allocator_for_cuda_mempool_slices(sel
624627
self.assertIs(src._ref, src_base)
625628

626629
wp.load_module(device=peer_device)
627-
wp.synchronize_device(target_device)
630+
peer_device.stream.wait_stream(target_device.stream)
628631
with launch_verification_mode(wp.LaunchVerificationMode.CHECKED):
629632
wp.launch(read_cpu_write_gpu, dim=n, inputs=[src], outputs=[dst], device=peer_device)
630633

0 commit comments

Comments
 (0)