@@ -48,6 +48,11 @@ def square_kernel(input: wp.array(dtype=float), output: wp.array(dtype=float)):
4848 output [i ] = input [i ] * input [i ]
4949
5050
51+ @wp .kernel
52+ def noop_kernel ():
53+ tid = wp .tid ()
54+
55+
5156def test1d (test , device ):
5257 a = np .arange (0 , dim_x ).reshape (dim_x )
5358
@@ -400,6 +405,12 @@ def kernel_single_tuple_bound(x: wp.array(dtype=float)):
400405 x [tid ] = x [tid ] * 2.0
401406
402407
408+ @wp .kernel (launch_bounds = 256 )
409+ def bounded_square_kernel (input : wp .array (dtype = float ), output : wp .array (dtype = float )):
410+ i = wp .tid ()
411+ output [i ] = input [i ] * input [i ]
412+
413+
403414def test_launch_bounds_none (test , device ):
404415 """Test kernel without launch_bounds"""
405416 n = 1024
@@ -436,7 +447,61 @@ def test_launch_bounds_single_tuple(test, device):
436447 assert_np_equal (x .numpy (), np .full (n , 2.0 , dtype = np .float32 ))
437448
438449
450+ def test_launch_device_block_dim_failure (test , device ):
451+ """Raise when CUDA rejects an oversized launch block.
452+
453+ Protects users from continuing after native stderr with kernel outputs left unchanged.
454+ """
455+ with assert_cuda_launch_error (test , r"Error launching kernel: .*noop_kernel.*Warp CUDA error" ):
456+ wp .launch (noop_kernel , dim = 1 , block_dim = 2048 , device = device )
457+
458+
459+ def test_launch_bounds_block_dim_failure (test , device ):
460+ """Raise when CUDA rejects a launch-bounds violation.
461+
462+ Protects users from silently skipping kernels whose outputs feed later simulation stages.
463+ """
464+ x = wp .ones (1 , dtype = float , device = device )
465+
466+ with assert_cuda_launch_error (test , r"Error launching kernel: .*kernel_single_bound.*Warp CUDA error" ):
467+ wp .launch (kernel_single_bound , dim = 1 , inputs = [x ], block_dim = 512 , device = device )
468+
469+
470+ def test_launch_cmd_block_dim_failure (test , device ):
471+ """Raise when recorded launches hit CUDA launch errors.
472+
473+ Protects recorded command replay from returning normally with stale outputs.
474+ """
475+ x = wp .ones (1 , dtype = float , device = device )
476+ cmd = wp .launch (kernel_single_bound , dim = 1 , inputs = [x ], block_dim = 512 , device = device , record_cmd = True )
477+
478+ with assert_cuda_launch_error (test , r"Error launching kernel: .*kernel_single_bound.*Warp CUDA error" ):
479+ cmd .launch ()
480+
481+
482+ def test_launch_adjoint_block_dim_failure (test , device ):
483+ """Raise when adjoint launches hit CUDA launch errors.
484+
485+ Protects differentiable simulations from using missing or partial gradients.
486+ """
487+ input_arr = wp .array ([1.0 ], dtype = float , requires_grad = True , device = device )
488+ output_arr = wp .empty_like (input_arr )
489+ output_arr .grad .fill_ (1.0 )
490+
491+ with assert_cuda_launch_error (test , r"Error launching kernel: .*bounded_square_kernel.*Warp CUDA error" ):
492+ wp .launch (
493+ bounded_square_kernel ,
494+ dim = input_arr .size ,
495+ inputs = [input_arr , output_arr ],
496+ adj_inputs = [None , None ],
497+ adjoint = True ,
498+ block_dim = 512 ,
499+ device = device ,
500+ )
501+
502+
439503devices = get_test_devices ()
504+ cuda_devices = get_cuda_test_devices ()
440505
441506
442507class TestLaunch (unittest .TestCase ):
@@ -462,6 +527,18 @@ class TestLaunch(unittest.TestCase):
462527add_function_test (TestLaunch , "test_launch_bounds_single" , test_launch_bounds_single , devices = devices )
463528add_function_test (TestLaunch , "test_launch_bounds_tuple" , test_launch_bounds_tuple , devices = devices )
464529add_function_test (TestLaunch , "test_launch_bounds_single_tuple" , test_launch_bounds_single_tuple , devices = devices )
530+ add_function_test (
531+ TestLaunch , "test_launch_device_block_dim_failure" , test_launch_device_block_dim_failure , devices = cuda_devices
532+ )
533+ add_function_test (
534+ TestLaunch , "test_launch_bounds_block_dim_failure" , test_launch_bounds_block_dim_failure , devices = cuda_devices
535+ )
536+ add_function_test (
537+ TestLaunch , "test_launch_cmd_block_dim_failure" , test_launch_cmd_block_dim_failure , devices = cuda_devices
538+ )
539+ add_function_test (
540+ TestLaunch , "test_launch_adjoint_block_dim_failure" , test_launch_adjoint_block_dim_failure , devices = cuda_devices
541+ )
465542
466543
467544if __name__ == "__main__" :
0 commit comments