From 6a359cc4a9c78d2e13fac5367e8924622c97d90c Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Mon, 25 May 2026 18:59:26 -0700 Subject: [PATCH 01/80] Add fp16 vector atomic capability --- .../a3-02-reference-capability-atoms.md | 12 +++++++ source/slang/hlsl.meta.slang | 33 +++++++++++++++---- source/slang/slang-capabilities.capdef | 16 ++++++++- ...byte-address-half-atomics-capability.slang | 19 +++++++++++ 4 files changed, 73 insertions(+), 7 deletions(-) create mode 100644 tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang diff --git a/docs/user-guide/a3-02-reference-capability-atoms.md b/docs/user-guide/a3-02-reference-capability-atoms.md index c2262fc581e..33d0a922e06 100644 --- a/docs/user-guide/a3-02-reference-capability-atoms.md +++ b/docs/user-guide/a3-02-reference-capability-atoms.md @@ -701,6 +701,9 @@ Extensions `SPV_NV_ray_tracing_motion_blur` > Represents the SPIR-V extension for ray tracing motion blur. +`SPV_NV_shader_atomic_fp16_vector` +> Represents the SPIR-V extension for vector atomic float16 operations. + `SPV_NV_shader_image_footprint` > Represents the SPIR-V extension for shader image footprint. @@ -723,6 +726,9 @@ Extensions `spvAtomicFloat16MinMaxEXT` > Represents the SPIR-V capability for atomic float 16 min/max operations. +`spvAtomicFloat16VectorNV` +> Represents the SPIR-V capability for vector atomic float16 operations. + `spvAtomicFloat32AddEXT` > Represents the SPIR-V capability for atomic float 32 add operations. @@ -958,6 +964,12 @@ Compound Capabilities `atomic_glsl_float2` > (GLSL/SPIRV) Capabilities required to use GLSL-tier-2 float-atomic operations +`atomic_glsl_halfadd` +> (SPIRV) Capabilities required to use either native or halfvec-emulated fp16 atomic add + +`atomic_glsl_halfscalar` +> (SPIRV) Capabilities required to use single-lane fp16 float-atomic operations + `atomic_glsl_halfvec` > (GLSL/SPIRV) Capabilities required to use NVAPI GLSL-fp16 float-atomic operations diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index bf35b722294..55900c72c23 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -6493,7 +6493,8 @@ $} /// @return The 2 16-bit floating point values packed into a 32-bit unsigned integer. [__requiresNVAPI] [ForceInline] - [require(cuda_hlsl_spirv)] + [require(cuda_hlsl, sm_5_0)] + [require(spirv, spvAtomicFloat16VectorNV)] uint _NvInterlockedAddFp16x2(uint byteAddress, uint fp16x2Value) { __target_switch @@ -6511,14 +6512,17 @@ $} /// @param byteAddress The address at which to perform the atomic add operation. /// @param value The value to add to the value at `byteAddress`. /// @param originalValue The original value at `byteAddress` before the add operation. - /// @remarks For SPIR-V, this function maps to `OpAtomicFAdd` and requires `SPV_EXT_shader_atomic_float16_add` extension. + /// @remarks For SPIR-V, this function maps to `OpAtomicFAdd` on a `half` when + /// `SPV_EXT_shader_atomic_float16_add` is available, and falls back to a `half2` + /// atomic when `SPV_NV_shader_atomic_fp16_vector` is available. /// /// For HLSL, this function translates to an NVAPI call /// due to lack of native HLSL intrinsic for floating point atomic add. For CUDA, this function /// maps to `atomicAdd`. [__requiresNVAPI] [ForceInline] - [require(cuda_hlsl_spirv, sm_5_0)] + [require(cuda_hlsl, sm_5_0)] + [require(spirv, atomic_glsl_halfadd)] void InterlockedAddF16(uint byteAddress, half value, out half originalValue) { __target_switch @@ -6536,12 +6540,28 @@ $} originalValue = asfloat16((uint16_t)(_NvInterlockedAddFp16x2(byteAddress, packedInput) >> 16)); } return; + case cuda: + __intrinsic_asm "(*$3 = atomicAdd($0._getPtrAt($1), $2))"; + case spvAtomicFloat16AddEXT: default: { let buf = __getEquivalentStructuredBuffer(this); originalValue = __atomic_add(buf[byteAddress/2], value); return; } + case spvAtomicFloat16VectorNV: + { + let buf = __getEquivalentStructuredBuffer(this); + if ((byteAddress & 2) == 0) + { + originalValue = __atomic_add(buf[byteAddress/4], half2(value, half(0.0))).x; + } + else + { + originalValue = __atomic_add(buf[byteAddress/4], half2(half(0.0), value)).y; + } + return; + } } } @@ -6558,7 +6578,8 @@ $} /// maps to `atomicAdd`. [__requiresNVAPI] [ForceInline] - [require(cuda_hlsl_spirv, sm_5_0)] + [require(cuda_hlsl, sm_5_0)] + [require(spirv, spvAtomicFloat16VectorNV)] void InterlockedAddF16Emulated(uint byteAddress, half value, out half originalValue) { __target_switch @@ -33741,7 +33762,7 @@ uint packHalf2x16(half2 unpackedValue) return packHalf2x16(float2(unpackedValue)); } -[require(spirv)] +[require(spirv, spvAtomicFloat16VectorNV)] void InterlockedAddF16Emulated(half* dest, half value, out half originalValue) { let buf = (half2*)(dest); @@ -33756,7 +33777,7 @@ void InterlockedAddF16Emulated(half* dest, half value, out half originalValue) } } -[require(spirv)] +[require(spirv, spvAtomicFloat16VectorNV)] void InterlockedAddF16x2(half2* dest, half2 value, out half2 originalValue) { originalValue = __atomic_add(*dest, value); diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index 3af42a7dc2a..ab32682020d 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -543,6 +543,10 @@ def SPV_EXT_shader_atomic_float_add : _spirv_1_0; /// [EXT] def SPV_EXT_shader_atomic_float16_add : SPV_EXT_shader_atomic_float_add; +/// Represents the SPIR-V extension for vector atomic float16 operations. +/// [EXT] +def SPV_NV_shader_atomic_fp16_vector : _spirv_1_0; + /// Represents the SPIR-V extension for atomic float min/max operations. /// [EXT] def SPV_EXT_shader_atomic_float_min_max : _spirv_1_0; @@ -700,6 +704,10 @@ def spvAtomicFloat32AddEXT : SPV_EXT_shader_atomic_float_add; /// [EXT] def spvAtomicFloat16AddEXT : SPV_EXT_shader_atomic_float16_add; +/// Represents the SPIR-V capability for vector atomic float16 operations. +/// [EXT] +def spvAtomicFloat16VectorNV : SPV_NV_shader_atomic_fp16_vector; + /// Represents the SPIR-V capability for atomic float 64 add operations. /// [EXT] def spvAtomicFloat64AddEXT : SPV_EXT_shader_atomic_float_add; @@ -1261,7 +1269,7 @@ alias GL_NV_ray_tracing_motion_blur = _GL_NV_ray_tracing_motion_blur | spvRayTra /// Represents the GL_NV_shader_atomic_fp16_vector extension. /// [EXT] -alias GL_NV_shader_atomic_fp16_vector = _GL_NV_shader_atomic_fp16_vector + _GL_NV_gpu_shader5 | _spirv_1_0; +alias GL_NV_shader_atomic_fp16_vector = _GL_NV_shader_atomic_fp16_vector + _GL_NV_gpu_shader5 | spvAtomicFloat16VectorNV; /// Represents the GL_NV_shader_invocation_reorder extension (NVIDIA-specific). /// [EXT] @@ -2376,9 +2384,15 @@ alias atomic_glsl_float1 = GL_EXT_shader_atomic_float; /// (GLSL/SPIRV) Capabilities required to use GLSL-tier-2 float-atomic operations /// [Compound] alias atomic_glsl_float2 = GL_EXT_shader_atomic_float2; +/// (SPIRV) Capabilities required to use single-lane fp16 float-atomic operations +/// [Compound] +alias atomic_glsl_halfscalar = spvAtomicFloat16AddEXT; /// (GLSL/SPIRV) Capabilities required to use NVAPI GLSL-fp16 float-atomic operations /// [Compound] alias atomic_glsl_halfvec = GL_NV_shader_atomic_fp16_vector; +/// (SPIRV) Capabilities required to use either native or halfvec-emulated fp16 atomic add +/// [Compound] +alias atomic_glsl_halfadd = atomic_glsl_halfscalar | atomic_glsl_halfvec; /// (GLSL/SPIRV) Capabilities required to use GLSL-400 atomic operations /// [Compound] alias atomic_glsl = spirv_1_0 | _GLSL_400; diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang new file mode 100644 index 00000000000..139e0254511 --- /dev/null +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -0,0 +1,19 @@ +//TEST:SIMPLE(filecheck=SCALAR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -skip-spirv-validation -ignore-capabilities -capability spvAtomicFloat16AddEXT +//TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -skip-spirv-validation -ignore-capabilities -capability GL_NV_shader_atomic_fp16_vector + +RWByteAddressBuffer tmpBuffer; + +[numthreads(1, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + half originalValue; + tmpBuffer.InterlockedAddF16(2, 1.0h, originalValue); +} + +// SCALAR: OpCapability AtomicFloat16AddEXT +// SCALAR: OpExtension "SPV_EXT_shader_atomic_float16_add" +// SCALAR-NOT: OpCapability AtomicFloat16VectorNV + +// VECTOR: OpCapability AtomicFloat16VectorNV +// VECTOR: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// VECTOR-NOT: OpCapability AtomicFloat16AddEXT From 5588f5a24a5ab606f3973a98eb3b74cc499094e4 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Mon, 25 May 2026 19:43:58 -0700 Subject: [PATCH 02/80] Address fp16 atomic capability wording --- docs/user-guide/a3-02-reference-capability-atoms.md | 2 +- source/slang/slang-capabilities.capdef | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/user-guide/a3-02-reference-capability-atoms.md b/docs/user-guide/a3-02-reference-capability-atoms.md index 33d0a922e06..3bd3ba0c116 100644 --- a/docs/user-guide/a3-02-reference-capability-atoms.md +++ b/docs/user-guide/a3-02-reference-capability-atoms.md @@ -968,7 +968,7 @@ Compound Capabilities > (SPIRV) Capabilities required to use either native or halfvec-emulated fp16 atomic add `atomic_glsl_halfscalar` -> (SPIRV) Capabilities required to use single-lane fp16 float-atomic operations +> (SPIRV) Capabilities required to use scalar fp16 float-atomic operations `atomic_glsl_halfvec` > (GLSL/SPIRV) Capabilities required to use NVAPI GLSL-fp16 float-atomic operations diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index ab32682020d..93918ebb75a 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -2384,7 +2384,7 @@ alias atomic_glsl_float1 = GL_EXT_shader_atomic_float; /// (GLSL/SPIRV) Capabilities required to use GLSL-tier-2 float-atomic operations /// [Compound] alias atomic_glsl_float2 = GL_EXT_shader_atomic_float2; -/// (SPIRV) Capabilities required to use single-lane fp16 float-atomic operations +/// (SPIRV) Capabilities required to use scalar fp16 float-atomic operations /// [Compound] alias atomic_glsl_halfscalar = spvAtomicFloat16AddEXT; /// (GLSL/SPIRV) Capabilities required to use NVAPI GLSL-fp16 float-atomic operations From c84eb0e6a4d5bcd27aa4be50b38732b1928dd09d Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Wed, 27 May 2026 16:55:04 -0700 Subject: [PATCH 03/80] Update command line capability reference --- docs/command-line-slangc-reference.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/command-line-slangc-reference.md b/docs/command-line-slangc-reference.md index 6508a617860..553264dc8ad 100644 --- a/docs/command-line-slangc-reference.md +++ b/docs/command-line-slangc-reference.md @@ -1314,6 +1314,7 @@ A capability describes an optional feature that a target may or may not support. * `SPV_EXT_descriptor_indexing` : enables the SPV_EXT_descriptor_indexing extension * `SPV_EXT_shader_atomic_float_add` : enables the SPV_EXT_shader_atomic_float_add extension * `SPV_EXT_shader_atomic_float16_add` : enables the SPV_EXT_shader_atomic_float16_add extension +* `SPV_NV_shader_atomic_fp16_vector` : enables the SPV_NV_shader_atomic_fp16_vector extension * `SPV_EXT_shader_atomic_float_min_max` : enables the SPV_EXT_shader_atomic_float_min_max extension * `SPV_EXT_mesh_shader` : enables the SPV_EXT_mesh_shader extension * `SPV_EXT_demote_to_helper_invocation` : enables the SPV_EXT_demote_to_helper_invocation extension @@ -1351,6 +1352,7 @@ A capability describes an optional feature that a target may or may not support. * `spvDeviceGroup` * `spvAtomicFloat32AddEXT` * `spvAtomicFloat16AddEXT` +* `spvAtomicFloat16VectorNV` * `spvAtomicFloat64AddEXT` * `spvInt64Atomics` * `spvAtomicFloat32MinMaxEXT` @@ -1719,7 +1721,9 @@ A capability describes an optional feature that a target may or may not support. * `texture_shadowgrad` * `atomic_glsl_float1` * `atomic_glsl_float2` +* `atomic_glsl_halfscalar` * `atomic_glsl_halfvec` +* `atomic_glsl_halfadd` * `atomic_glsl` * `atomic_glsl_int64` * `GLSL_430_SPIRV_1_0_compute` : enables the GLSL_430_SPIRV_1_0_compute extension From 474f9ca202a413f73d09c65e297068664bec5715 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Wed, 27 May 2026 18:10:54 -0700 Subject: [PATCH 04/80] Address fp16 atomic review feedback --- docs/command-line-slangc-reference.md | 2 -- docs/user-guide/a3-02-reference-capability-atoms.md | 6 ------ source/slang/hlsl.meta.slang | 10 +++++----- source/slang/slang-capabilities.capdef | 6 ------ .../byte-address-half-atomics-capability.slang | 2 +- 5 files changed, 6 insertions(+), 20 deletions(-) diff --git a/docs/command-line-slangc-reference.md b/docs/command-line-slangc-reference.md index 553264dc8ad..a34d8c1ddf9 100644 --- a/docs/command-line-slangc-reference.md +++ b/docs/command-line-slangc-reference.md @@ -1721,9 +1721,7 @@ A capability describes an optional feature that a target may or may not support. * `texture_shadowgrad` * `atomic_glsl_float1` * `atomic_glsl_float2` -* `atomic_glsl_halfscalar` * `atomic_glsl_halfvec` -* `atomic_glsl_halfadd` * `atomic_glsl` * `atomic_glsl_int64` * `GLSL_430_SPIRV_1_0_compute` : enables the GLSL_430_SPIRV_1_0_compute extension diff --git a/docs/user-guide/a3-02-reference-capability-atoms.md b/docs/user-guide/a3-02-reference-capability-atoms.md index 3bd3ba0c116..eb539231b64 100644 --- a/docs/user-guide/a3-02-reference-capability-atoms.md +++ b/docs/user-guide/a3-02-reference-capability-atoms.md @@ -964,12 +964,6 @@ Compound Capabilities `atomic_glsl_float2` > (GLSL/SPIRV) Capabilities required to use GLSL-tier-2 float-atomic operations -`atomic_glsl_halfadd` -> (SPIRV) Capabilities required to use either native or halfvec-emulated fp16 atomic add - -`atomic_glsl_halfscalar` -> (SPIRV) Capabilities required to use scalar fp16 float-atomic operations - `atomic_glsl_halfvec` > (GLSL/SPIRV) Capabilities required to use NVAPI GLSL-fp16 float-atomic operations diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 55900c72c23..18449f11705 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -6522,7 +6522,8 @@ $} [__requiresNVAPI] [ForceInline] [require(cuda_hlsl, sm_5_0)] - [require(spirv, atomic_glsl_halfadd)] + [require(spirv, spvAtomicFloat16VectorNV)] + [require(spirv, spvAtomicFloat16AddEXT)] void InterlockedAddF16(uint byteAddress, half value, out half originalValue) { __target_switch @@ -6578,8 +6579,7 @@ $} /// maps to `atomicAdd`. [__requiresNVAPI] [ForceInline] - [require(cuda_hlsl, sm_5_0)] - [require(spirv, spvAtomicFloat16VectorNV)] + [require(cuda_hlsl_spirv, sm_5_0)] void InterlockedAddF16Emulated(uint byteAddress, half value, out half originalValue) { __target_switch @@ -33762,7 +33762,7 @@ uint packHalf2x16(half2 unpackedValue) return packHalf2x16(float2(unpackedValue)); } -[require(spirv, spvAtomicFloat16VectorNV)] +[require(spirv)] void InterlockedAddF16Emulated(half* dest, half value, out half originalValue) { let buf = (half2*)(dest); @@ -33777,7 +33777,7 @@ void InterlockedAddF16Emulated(half* dest, half value, out half originalValue) } } -[require(spirv, spvAtomicFloat16VectorNV)] +[require(spirv)] void InterlockedAddF16x2(half2* dest, half2 value, out half2 originalValue) { originalValue = __atomic_add(*dest, value); diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index 93918ebb75a..745b50e4a3b 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -2384,15 +2384,9 @@ alias atomic_glsl_float1 = GL_EXT_shader_atomic_float; /// (GLSL/SPIRV) Capabilities required to use GLSL-tier-2 float-atomic operations /// [Compound] alias atomic_glsl_float2 = GL_EXT_shader_atomic_float2; -/// (SPIRV) Capabilities required to use scalar fp16 float-atomic operations -/// [Compound] -alias atomic_glsl_halfscalar = spvAtomicFloat16AddEXT; /// (GLSL/SPIRV) Capabilities required to use NVAPI GLSL-fp16 float-atomic operations /// [Compound] alias atomic_glsl_halfvec = GL_NV_shader_atomic_fp16_vector; -/// (SPIRV) Capabilities required to use either native or halfvec-emulated fp16 atomic add -/// [Compound] -alias atomic_glsl_halfadd = atomic_glsl_halfscalar | atomic_glsl_halfvec; /// (GLSL/SPIRV) Capabilities required to use GLSL-400 atomic operations /// [Compound] alias atomic_glsl = spirv_1_0 | _GLSL_400; diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 139e0254511..4cf5768efc5 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -1,5 +1,5 @@ //TEST:SIMPLE(filecheck=SCALAR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -skip-spirv-validation -ignore-capabilities -capability spvAtomicFloat16AddEXT -//TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -skip-spirv-validation -ignore-capabilities -capability GL_NV_shader_atomic_fp16_vector +//TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -skip-spirv-validation -ignore-capabilities -capability spvAtomicFloat16VectorNV RWByteAddressBuffer tmpBuffer; From 272c12b45beb6d580c8758ff4d1f1315f48b008c Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Wed, 27 May 2026 20:02:54 -0700 Subject: [PATCH 05/80] Remove ignored capability from fp16 atomic test --- .../byte-address-half-atomics-capability.slang | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 4cf5768efc5..cc8a5e40e48 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -1,5 +1,5 @@ -//TEST:SIMPLE(filecheck=SCALAR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -skip-spirv-validation -ignore-capabilities -capability spvAtomicFloat16AddEXT -//TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -skip-spirv-validation -ignore-capabilities -capability spvAtomicFloat16VectorNV +//TEST:SIMPLE(filecheck=SCALAR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -skip-spirv-validation -capability spvAtomicFloat16AddEXT +//TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -skip-spirv-validation -capability spvAtomicFloat16VectorNV RWByteAddressBuffer tmpBuffer; From 7c549da51fa084811bf6bade7ffb2e4fa80f5b98 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Wed, 27 May 2026 20:32:17 -0700 Subject: [PATCH 06/80] Add negative fp16 atomic extension checks --- .../byte-address-half-atomics-capability.slang | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index cc8a5e40e48..c020044dc75 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -13,7 +13,9 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // SCALAR: OpCapability AtomicFloat16AddEXT // SCALAR: OpExtension "SPV_EXT_shader_atomic_float16_add" // SCALAR-NOT: OpCapability AtomicFloat16VectorNV +// SCALAR-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" // VECTOR: OpCapability AtomicFloat16VectorNV // VECTOR: OpExtension "SPV_NV_shader_atomic_fp16_vector" // VECTOR-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" From d272020c2b2e79941f3446a52f0e1f76750088da Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 01:45:34 -0700 Subject: [PATCH 07/80] Enable SPIR-V validation for fp16 atomic test --- .../byte-address-half-atomics-capability.slang | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index c020044dc75..d43cd5cf52f 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -1,5 +1,5 @@ -//TEST:SIMPLE(filecheck=SCALAR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -skip-spirv-validation -capability spvAtomicFloat16AddEXT -//TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -skip-spirv-validation -capability spvAtomicFloat16VectorNV +//TEST:SIMPLE(filecheck=SCALAR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT +//TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV RWByteAddressBuffer tmpBuffer; From ba59dda6a5e67bbd3d1eeb1090cba1060fa7440b Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 03:33:48 -0700 Subject: [PATCH 08/80] Keep fp16 vector GLSL capability compatible --- source/slang/slang-capabilities.capdef | 2 +- .../gl-nv-shader-atomic-fp16-vector-capability.slang | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 tests/spirv/gl-nv-shader-atomic-fp16-vector-capability.slang diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index 745b50e4a3b..230c7aa77ad 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -1269,7 +1269,7 @@ alias GL_NV_ray_tracing_motion_blur = _GL_NV_ray_tracing_motion_blur | spvRayTra /// Represents the GL_NV_shader_atomic_fp16_vector extension. /// [EXT] -alias GL_NV_shader_atomic_fp16_vector = _GL_NV_shader_atomic_fp16_vector + _GL_NV_gpu_shader5 | spvAtomicFloat16VectorNV; +alias GL_NV_shader_atomic_fp16_vector = _GL_NV_shader_atomic_fp16_vector + _GL_NV_gpu_shader5 | spvAtomicFloat16VectorNV | _spirv_1_0; /// Represents the GL_NV_shader_invocation_reorder extension (NVIDIA-specific). /// [EXT] diff --git a/tests/spirv/gl-nv-shader-atomic-fp16-vector-capability.slang b/tests/spirv/gl-nv-shader-atomic-fp16-vector-capability.slang new file mode 100644 index 00000000000..c14557fd581 --- /dev/null +++ b/tests/spirv/gl-nv-shader-atomic-fp16-vector-capability.slang @@ -0,0 +1,9 @@ +//TEST:SIMPLE(filecheck=CHECK): -target spirv -entry computeMain -stage compute -emit-spirv-directly + +[require(GL_NV_shader_atomic_fp16_vector)] +[numthreads(1, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ +} + +// CHECK: OpMemoryModel From 198733b86e02b387b98ca086251315b1c0108b29 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 03:37:59 -0700 Subject: [PATCH 09/80] Strengthen fp16 vector capability fallback test --- tests/spirv/gl-nv-shader-atomic-fp16-vector-capability.slang | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/spirv/gl-nv-shader-atomic-fp16-vector-capability.slang b/tests/spirv/gl-nv-shader-atomic-fp16-vector-capability.slang index c14557fd581..dc3c37f8b3f 100644 --- a/tests/spirv/gl-nv-shader-atomic-fp16-vector-capability.slang +++ b/tests/spirv/gl-nv-shader-atomic-fp16-vector-capability.slang @@ -6,4 +6,6 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { } +// CHECK-NOT: OpCapability AtomicFloat16VectorNV +// CHECK-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" // CHECK: OpMemoryModel From 8a800c4c94ee3612e5e4eb5171597d03ef8a89ee Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 04:16:06 -0700 Subject: [PATCH 10/80] Preserve half lane in fp16 atomic fallback --- source/slang/hlsl.meta.slang | 37 +++++++++++++------ ...byte-address-half-atomics-capability.slang | 21 ++++++++--- ...er-atomic-fp16-vector-compatibility.slang} | 2 + 3 files changed, 44 insertions(+), 16 deletions(-) rename tests/spirv/{gl-nv-shader-atomic-fp16-vector-capability.slang => gl-nv-shader-atomic-fp16-vector-compatibility.slang} (67%) diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 18449f11705..68f52738862 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -6513,15 +6513,17 @@ $} /// @param value The value to add to the value at `byteAddress`. /// @param originalValue The original value at `byteAddress` before the add operation. /// @remarks For SPIR-V, this function maps to `OpAtomicFAdd` on a `half` when - /// `SPV_EXT_shader_atomic_float16_add` is available, and falls back to a `half2` - /// atomic when `SPV_NV_shader_atomic_fp16_vector` is available. + /// `SPV_EXT_shader_atomic_float16_add` is available. When only + /// `SPV_NV_shader_atomic_fp16_vector` is selected, it uses a 32-bit compare-exchange + /// loop so the neighboring half lane is preserved. /// /// For HLSL, this function translates to an NVAPI call /// due to lack of native HLSL intrinsic for floating point atomic add. For CUDA, this function /// maps to `atomicAdd`. [__requiresNVAPI] [ForceInline] - [require(cuda_hlsl, sm_5_0)] + [require(hlsl, sm_5_0)] + [require(cuda, cuda_sm_7_0)] [require(spirv, spvAtomicFloat16VectorNV)] [require(spirv, spvAtomicFloat16AddEXT)] void InterlockedAddF16(uint byteAddress, half value, out half originalValue) @@ -6552,16 +6554,29 @@ $} } case spvAtomicFloat16VectorNV: { - let buf = __getEquivalentStructuredBuffer(this); - if ((byteAddress & 2) == 0) - { - originalValue = __atomic_add(buf[byteAddress/4], half2(value, half(0.0))).x; - } - else + // A half2 atomic add with zero in the other lane is not bit-preserving + // for all IEEE half values, so use CAS on the containing 32-bit word. + let buf = __getEquivalentStructuredBuffer(this); + uint wordIndex = byteAddress / 4; + uint laneShift = (byteAddress & 2) == 0 ? 0U : 16U; + uint laneMask = 0xFFFFU << laneShift; + uint oldPacked = __atomic_add(buf[wordIndex], 0U); + + for (;;) { - originalValue = __atomic_add(buf[byteAddress/4], half2(half(0.0), value)).y; + uint oldHalfBits = (oldPacked >> laneShift) & 0xFFFFU; + half oldHalf = asfloat16(uint16_t(oldHalfBits)); + uint newHalfBits = uint(asuint16(oldHalf + value)); + uint newPacked = (oldPacked & ~laneMask) | (newHalfBits << laneShift); + uint actualPacked = + __atomic_compare_exchange(buf[wordIndex], oldPacked, newPacked); + if (actualPacked == oldPacked) + { + originalValue = oldHalf; + return; + } + oldPacked = actualPacked; } - return; } } } diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index d43cd5cf52f..df12e0544f0 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -1,5 +1,6 @@ //TEST:SIMPLE(filecheck=SCALAR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT -//TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV +//TEST:SIMPLE(filecheck=FALLBACK): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV +//TEST:SIMPLE(filecheck=CUDA): -target cuda -entry computeMain -stage compute -capability cuda_sm_7_0 RWByteAddressBuffer tmpBuffer; @@ -7,15 +8,25 @@ RWByteAddressBuffer tmpBuffer; void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { half originalValue; + tmpBuffer.InterlockedAddF16(0, 1.0h, originalValue); tmpBuffer.InterlockedAddF16(2, 1.0h, originalValue); } +// SCALAR-NOT: OpCapability AtomicFloat16VectorNV +// SCALAR-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" // SCALAR: OpCapability AtomicFloat16AddEXT // SCALAR: OpExtension "SPV_EXT_shader_atomic_float16_add" // SCALAR-NOT: OpCapability AtomicFloat16VectorNV // SCALAR-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" -// VECTOR: OpCapability AtomicFloat16VectorNV -// VECTOR: OpExtension "SPV_NV_shader_atomic_fp16_vector" -// VECTOR-NOT: OpCapability AtomicFloat16AddEXT -// VECTOR-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// FALLBACK-NOT: OpCapability AtomicFloat16AddEXT +// FALLBACK-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// FALLBACK-NOT: OpCapability AtomicFloat16VectorNV +// FALLBACK-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// FALLBACK-NOT: OpAtomicFAddEXT +// FALLBACK: OpAtomicIAdd +// FALLBACK: OpAtomicCompareExchange +// FALLBACK-NOT: OpAtomicFAddEXT + +// CUDA: atomicAdd( +// CUDA: _getPtrAt( diff --git a/tests/spirv/gl-nv-shader-atomic-fp16-vector-capability.slang b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang similarity index 67% rename from tests/spirv/gl-nv-shader-atomic-fp16-vector-capability.slang rename to tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang index dc3c37f8b3f..6c864053144 100644 --- a/tests/spirv/gl-nv-shader-atomic-fp16-vector-capability.slang +++ b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang @@ -1,5 +1,7 @@ //TEST:SIMPLE(filecheck=CHECK): -target spirv -entry computeMain -stage compute -emit-spirv-directly +// Requiring the GLSL extension should preserve the old SPIR-V 1.0 compatibility path and +// should not declare the NV vector capability until a vector atomic operation needs it. [require(GL_NV_shader_atomic_fp16_vector)] [numthreads(1, 1, 1)] void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) From 91fcc4ce4811522503af1472a0f1cdad2496352b Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 04:25:10 -0700 Subject: [PATCH 11/80] Cover fp16 vector atomic capability emission --- .../byte-address-half-atomics-capability.slang | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index df12e0544f0..d0bbd135d6a 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -1,15 +1,21 @@ //TEST:SIMPLE(filecheck=SCALAR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT //TEST:SIMPLE(filecheck=FALLBACK): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV +//TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_ATOMIC //TEST:SIMPLE(filecheck=CUDA): -target cuda -entry computeMain -stage compute -capability cuda_sm_7_0 RWByteAddressBuffer tmpBuffer; +RWStructuredBuffer vectorBuffer; [numthreads(1, 1, 1)] void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { +#ifdef TEST_DIRECT_VECTOR_ATOMIC + __atomic_add(vectorBuffer[0], half2(1.0h, 2.0h)); +#else half originalValue; tmpBuffer.InterlockedAddF16(0, 1.0h, originalValue); tmpBuffer.InterlockedAddF16(2, 1.0h, originalValue); +#endif } // SCALAR-NOT: OpCapability AtomicFloat16VectorNV @@ -28,5 +34,10 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // FALLBACK: OpAtomicCompareExchange // FALLBACK-NOT: OpAtomicFAddEXT +// VECTOR-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// VECTOR: OpCapability AtomicFloat16VectorNV +// VECTOR: OpExtension "SPV_NV_shader_atomic_fp16_vector" + // CUDA: atomicAdd( // CUDA: _getPtrAt( From 9abc8ec27ce167bd45df1631e7ef169843794810 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 04:37:44 -0700 Subject: [PATCH 12/80] Use default CUDA fp16 atomic lowering --- source/slang/hlsl.meta.slang | 2 -- .../byte-address-half-atomics-capability.slang | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 68f52738862..bf9a56ce26c 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -6543,8 +6543,6 @@ $} originalValue = asfloat16((uint16_t)(_NvInterlockedAddFp16x2(byteAddress, packedInput) >> 16)); } return; - case cuda: - __intrinsic_asm "(*$3 = atomicAdd($0._getPtrAt($1), $2))"; case spvAtomicFloat16AddEXT: default: { diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index d0bbd135d6a..7854e034780 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -39,5 +39,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // VECTOR: OpCapability AtomicFloat16VectorNV // VECTOR: OpExtension "SPV_NV_shader_atomic_fp16_vector" -// CUDA: atomicAdd( -// CUDA: _getPtrAt( +// CUDA-COUNT-2: atomicAdd( From f273945097d40b54bde9b0cbc6958fdd6ed23617 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 05:14:29 -0700 Subject: [PATCH 13/80] Cover fp16 vector atomic review gaps --- .../a3-02-reference-capability-atoms.md | 7 ++++-- source/slang/hlsl.meta.slang | 22 ++++++++++++++----- source/slang/slang-capabilities.capdef | 7 ++++-- ...byte-address-half-atomics-capability.slang | 22 ++++++++++++++++--- .../byte-address-half-atomics-cas-pack.slang | 14 ++++++++++++ ...der-atomic-fp16-vector-compatibility.slang | 17 ++++++++++---- 6 files changed, 72 insertions(+), 17 deletions(-) create mode 100644 tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-cas-pack.slang diff --git a/docs/user-guide/a3-02-reference-capability-atoms.md b/docs/user-guide/a3-02-reference-capability-atoms.md index eb539231b64..784c90c96b7 100644 --- a/docs/user-guide/a3-02-reference-capability-atoms.md +++ b/docs/user-guide/a3-02-reference-capability-atoms.md @@ -576,6 +576,9 @@ Extensions `GL_NV_shader_atomic_fp16_vector` > Represents the GL_NV_shader_atomic_fp16_vector extension. +> The SPIR-V 1.0 fallback preserves compatibility for shaders that only require the +> GLSL extension; emitted fp16 vector atomic operations still require +> spvAtomicFloat16VectorNV. `GL_NV_shader_invocation_reorder` > Represents the GL_NV_shader_invocation_reorder extension (NVIDIA-specific). @@ -702,7 +705,7 @@ Extensions > Represents the SPIR-V extension for ray tracing motion blur. `SPV_NV_shader_atomic_fp16_vector` -> Represents the SPIR-V extension for vector atomic float16 operations. +> Represents the SPIR-V extension for vector atomic float 16 add/min/max operations. `SPV_NV_shader_image_footprint` > Represents the SPIR-V extension for shader image footprint. @@ -727,7 +730,7 @@ Extensions > Represents the SPIR-V capability for atomic float 16 min/max operations. `spvAtomicFloat16VectorNV` -> Represents the SPIR-V capability for vector atomic float16 operations. +> Represents the SPIR-V capability for vector atomic float 16 add/min/max operations. `spvAtomicFloat32AddEXT` > Represents the SPIR-V capability for atomic float 32 add operations. diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index bf9a56ce26c..7bb5c43744e 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -6493,7 +6493,8 @@ $} /// @return The 2 16-bit floating point values packed into a 32-bit unsigned integer. [__requiresNVAPI] [ForceInline] - [require(cuda_hlsl, sm_5_0)] + [require(hlsl, sm_5_0)] + [require(cuda, cuda_sm_7_0)] [require(spirv, spvAtomicFloat16VectorNV)] uint _NvInterlockedAddFp16x2(uint byteAddress, uint fp16x2Value) { @@ -6557,15 +6558,13 @@ $} let buf = __getEquivalentStructuredBuffer(this); uint wordIndex = byteAddress / 4; uint laneShift = (byteAddress & 2) == 0 ? 0U : 16U; - uint laneMask = 0xFFFFU << laneShift; uint oldPacked = __atomic_add(buf[wordIndex], 0U); for (;;) { - uint oldHalfBits = (oldPacked >> laneShift) & 0xFFFFU; - half oldHalf = asfloat16(uint16_t(oldHalfBits)); - uint newHalfBits = uint(asuint16(oldHalf + value)); - uint newPacked = (oldPacked & ~laneMask) | (newHalfBits << laneShift); + half oldHalf; + uint newPacked = + __packInterlockedAddF16Result(oldPacked, laneShift, value, oldHalf); uint actualPacked = __atomic_compare_exchange(buf[wordIndex], oldPacked, newPacked); if (actualPacked == oldPacked) @@ -10166,6 +10165,17 @@ __generic vector dst(vector x, vector< __intrinsic_op($(kIROp_GetEquivalentStructuredBuffer)) RWStructuredBuffer __getEquivalentStructuredBuffer(RWByteAddressBuffer b); +[__NoSideEffect] +[ForceInline] +uint __packInterlockedAddF16Result(uint oldPacked, uint laneShift, half value, out half oldHalf) +{ + uint laneMask = 0xFFFFU << laneShift; + uint oldHalfBits = (oldPacked >> laneShift) & 0xFFFFU; + oldHalf = asfloat16(uint16_t(oldHalfBits)); + uint newHalfBits = uint(asuint16(oldHalf + value)); + return (oldPacked & ~laneMask) | (newHalfBits << laneShift); +} + __intrinsic_op($(kIROp_GetEquivalentStructuredBuffer)) StructuredBuffer __getEquivalentStructuredBuffer(ByteAddressBuffer b); diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index 230c7aa77ad..845ce567e1f 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -543,7 +543,7 @@ def SPV_EXT_shader_atomic_float_add : _spirv_1_0; /// [EXT] def SPV_EXT_shader_atomic_float16_add : SPV_EXT_shader_atomic_float_add; -/// Represents the SPIR-V extension for vector atomic float16 operations. +/// Represents the SPIR-V extension for vector atomic float 16 add/min/max operations. /// [EXT] def SPV_NV_shader_atomic_fp16_vector : _spirv_1_0; @@ -704,7 +704,7 @@ def spvAtomicFloat32AddEXT : SPV_EXT_shader_atomic_float_add; /// [EXT] def spvAtomicFloat16AddEXT : SPV_EXT_shader_atomic_float16_add; -/// Represents the SPIR-V capability for vector atomic float16 operations. +/// Represents the SPIR-V capability for vector atomic float 16 add/min/max operations. /// [EXT] def spvAtomicFloat16VectorNV : SPV_NV_shader_atomic_fp16_vector; @@ -1268,6 +1268,9 @@ alias GL_NV_ray_tracing = GL_EXT_ray_tracing; alias GL_NV_ray_tracing_motion_blur = _GL_NV_ray_tracing_motion_blur | spvRayTracingMotionBlurNV; /// Represents the GL_NV_shader_atomic_fp16_vector extension. +/// The SPIR-V 1.0 fallback preserves compatibility for shaders that only require the +/// GLSL extension; emitted fp16 vector atomic operations still require +/// spvAtomicFloat16VectorNV. /// [EXT] alias GL_NV_shader_atomic_fp16_vector = _GL_NV_shader_atomic_fp16_vector + _GL_NV_gpu_shader5 | spvAtomicFloat16VectorNV | _spirv_1_0; diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 7854e034780..cd0e73054bb 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -1,5 +1,6 @@ //TEST:SIMPLE(filecheck=SCALAR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT //TEST:SIMPLE(filecheck=FALLBACK): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV +//TEST:SIMPLE(filecheck=BOTH): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT -capability spvAtomicFloat16VectorNV //TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_ATOMIC //TEST:SIMPLE(filecheck=CUDA): -target cuda -entry computeMain -stage compute -capability cuda_sm_7_0 @@ -15,6 +16,11 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) half originalValue; tmpBuffer.InterlockedAddF16(0, 1.0h, originalValue); tmpBuffer.InterlockedAddF16(2, 1.0h, originalValue); + tmpBuffer.InterlockedAddF16(4, 1.0h, originalValue); + tmpBuffer.InterlockedAddF16(6, 1.0h, originalValue); + + uint dynamicByteAddress = (dispatchThreadID.x & 1) * 2; + tmpBuffer.InterlockedAddF16(dynamicByteAddress, 1.0h, originalValue); #endif } @@ -22,6 +28,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // SCALAR-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" // SCALAR: OpCapability AtomicFloat16AddEXT // SCALAR: OpExtension "SPV_EXT_shader_atomic_float16_add" +// SCALAR-COUNT-5: OpAtomicFAddEXT // SCALAR-NOT: OpCapability AtomicFloat16VectorNV // SCALAR-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" @@ -30,13 +37,22 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // FALLBACK-NOT: OpCapability AtomicFloat16VectorNV // FALLBACK-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" // FALLBACK-NOT: OpAtomicFAddEXT -// FALLBACK: OpAtomicIAdd -// FALLBACK: OpAtomicCompareExchange +// FALLBACK-COUNT-5: OpAtomicIAdd +// FALLBACK-COUNT-5: OpAtomicCompareExchange // FALLBACK-NOT: OpAtomicFAddEXT +// BOTH-NOT: OpCapability AtomicFloat16VectorNV +// BOTH-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// BOTH: OpCapability AtomicFloat16AddEXT +// BOTH: OpExtension "SPV_EXT_shader_atomic_float16_add" +// BOTH-COUNT-5: OpAtomicFAddEXT +// BOTH-NOT: OpAtomicCompareExchange +// BOTH-NOT: OpCapability AtomicFloat16VectorNV +// BOTH-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" + // VECTOR-NOT: OpCapability AtomicFloat16AddEXT // VECTOR-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" // VECTOR: OpCapability AtomicFloat16VectorNV // VECTOR: OpExtension "SPV_NV_shader_atomic_fp16_vector" -// CUDA-COUNT-2: atomicAdd( +// CUDA-COUNT-5: atomicAdd( diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-cas-pack.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-cas-pack.slang new file mode 100644 index 00000000000..7382bcead57 --- /dev/null +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-cas-pack.slang @@ -0,0 +1,14 @@ +//TEST:INTERPRET(filecheck=CHECK): + +void main() +{ + half oldHalf; + uint packed0 = __packInterlockedAddF16Result(0x40003C00U, 0U, 2.0h, oldHalf); + printf("lane0 %.1f %u\n", float(oldHalf), packed0); + + uint packed1 = __packInterlockedAddF16Result(0x40003C00U, 16U, 1.0h, oldHalf); + printf("lane1 %.1f %u\n", float(oldHalf), packed1); +} + +// CHECK: lane0 1.0 1073758720 +// CHECK: lane1 2.0 1107311616 diff --git a/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang index 6c864053144..145b1586af5 100644 --- a/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang +++ b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang @@ -1,13 +1,22 @@ -//TEST:SIMPLE(filecheck=CHECK): -target spirv -entry computeMain -stage compute -emit-spirv-directly +//TEST:SIMPLE(filecheck=NEGATIVE): -target spirv -entry computeMain -stage compute -emit-spirv-directly +//TEST:SIMPLE(filecheck=POSITIVE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_ATOMIC // Requiring the GLSL extension should preserve the old SPIR-V 1.0 compatibility path and // should not declare the NV vector capability until a vector atomic operation needs it. +RWStructuredBuffer vectorBuffer; + [require(GL_NV_shader_atomic_fp16_vector)] [numthreads(1, 1, 1)] void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { +#ifdef POSITIVE_VECTOR_ATOMIC + __atomic_add(vectorBuffer[0], half2(1.0h, 2.0h)); +#endif } -// CHECK-NOT: OpCapability AtomicFloat16VectorNV -// CHECK-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" -// CHECK: OpMemoryModel +// NEGATIVE-NOT: OpCapability AtomicFloat16VectorNV +// NEGATIVE-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// NEGATIVE: OpMemoryModel + +// POSITIVE: OpCapability AtomicFloat16VectorNV +// POSITIVE: OpExtension "SPV_NV_shader_atomic_fp16_vector" From 867e3f3d7dab5269b4de0d2da88ff292e2435155 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 05:43:15 -0700 Subject: [PATCH 14/80] Cover fp16 CAS retry behavior --- source/slang/hlsl.meta.slang | 27 +++++++++++-- .../byte-address-half-atomics-cas-pack.slang | 40 +++++++++++++++---- 2 files changed, 57 insertions(+), 10 deletions(-) diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 7bb5c43744e..e97fd358293 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -6544,6 +6544,8 @@ $} originalValue = asfloat16((uint16_t)(_NvInterlockedAddFp16x2(byteAddress, packedInput) >> 16)); } return; + // Order matters for SPIR-V: when both fp16 atomic capabilities are enabled, + // keep the scalar OpAtomicFAdd path before the vector-capability CAS fallback. case spvAtomicFloat16AddEXT: default: { @@ -6567,12 +6569,14 @@ $} __packInterlockedAddF16Result(oldPacked, laneShift, value, oldHalf); uint actualPacked = __atomic_compare_exchange(buf[wordIndex], oldPacked, newPacked); - if (actualPacked == oldPacked) + if (__commitInterlockedAddF16Result( + actualPacked, + oldPacked, + oldHalf, + originalValue)) { - originalValue = oldHalf; return; } - oldPacked = actualPacked; } } } @@ -10176,6 +10180,23 @@ uint __packInterlockedAddF16Result(uint oldPacked, uint laneShift, half value, o return (oldPacked & ~laneMask) | (newHalfBits << laneShift); } +[ForceInline] +bool __commitInterlockedAddF16Result( + uint actualPacked, + inout uint oldPacked, + half oldHalf, + out half originalValue) +{ + originalValue = oldHalf; + if (actualPacked == oldPacked) + { + return true; + } + + oldPacked = actualPacked; + return false; +} + __intrinsic_op($(kIROp_GetEquivalentStructuredBuffer)) StructuredBuffer __getEquivalentStructuredBuffer(ByteAddressBuffer b); diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-cas-pack.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-cas-pack.slang index 7382bcead57..772375952d5 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-cas-pack.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-cas-pack.slang @@ -1,14 +1,40 @@ //TEST:INTERPRET(filecheck=CHECK): -void main() +uint simulateRetry( + uint initialPacked, + uint failedActualPacked, + uint laneShift, + half value, + out half originalValue) { + uint oldPacked = initialPacked; + half oldHalf; - uint packed0 = __packInterlockedAddF16Result(0x40003C00U, 0U, 2.0h, oldHalf); - printf("lane0 %.1f %u\n", float(oldHalf), packed0); + uint newPacked = __packInterlockedAddF16Result(oldPacked, laneShift, value, oldHalf); + if (__commitInterlockedAddF16Result(failedActualPacked, oldPacked, oldHalf, originalValue)) + { + return newPacked; + } + + newPacked = __packInterlockedAddF16Result(oldPacked, laneShift, value, oldHalf); + uint successActualPacked = oldPacked; + if (__commitInterlockedAddF16Result(successActualPacked, oldPacked, oldHalf, originalValue)) + { + return newPacked; + } + + return 0U; +} + +void main() +{ + half originalValue; + uint packed0 = simulateRetry(0x40003C00U, 0x42003C00U, 0U, 2.0h, originalValue); + printf("lane0 %.1f %u\n", float(originalValue), packed0); - uint packed1 = __packInterlockedAddF16Result(0x40003C00U, 16U, 1.0h, oldHalf); - printf("lane1 %.1f %u\n", float(oldHalf), packed1); + uint packed1 = simulateRetry(0x40003C00U, 0x40004200U, 16U, 1.0h, originalValue); + printf("lane1 %.1f %u\n", float(originalValue), packed1); } -// CHECK: lane0 1.0 1073758720 -// CHECK: lane1 2.0 1107311616 +// CHECK: lane0 1.0 1107313152 +// CHECK: lane1 2.0 1107313152 From 4fdbdf89ff63be7537875547d0ce5c199c75fb75 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 06:06:22 -0700 Subject: [PATCH 15/80] Fix fp16 CAS fallback tests --- ...byte-address-half-atomics-capability.slang | 12 +++++++++-- .../byte-address-half-atomics-cas-pack.slang | 20 +++++++++++++------ 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index cd0e73054bb..4f83c1e4ddf 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -37,8 +37,16 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // FALLBACK-NOT: OpCapability AtomicFloat16VectorNV // FALLBACK-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" // FALLBACK-NOT: OpAtomicFAddEXT -// FALLBACK-COUNT-5: OpAtomicIAdd -// FALLBACK-COUNT-5: OpAtomicCompareExchange +// FALLBACK: OpAtomicIAdd +// FALLBACK: OpAtomicCompareExchange +// FALLBACK: OpAtomicIAdd +// FALLBACK: OpAtomicCompareExchange +// FALLBACK: OpAtomicIAdd +// FALLBACK: OpAtomicCompareExchange +// FALLBACK: OpAtomicIAdd +// FALLBACK: OpAtomicCompareExchange +// FALLBACK: OpAtomicIAdd +// FALLBACK: OpAtomicCompareExchange // FALLBACK-NOT: OpAtomicFAddEXT // BOTH-NOT: OpCapability AtomicFloat16VectorNV diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-cas-pack.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-cas-pack.slang index 772375952d5..cdcf53aabda 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-cas-pack.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-cas-pack.slang @@ -1,4 +1,7 @@ -//TEST:INTERPRET(filecheck=CHECK): +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK): -cpu -shaderobj -output-using-type + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer +RWStructuredBuffer outputBuffer; uint simulateRetry( uint initialPacked, @@ -26,15 +29,20 @@ uint simulateRetry( return 0U; } -void main() +[numthreads(1, 1, 1)] +void computeMain() { half originalValue; uint packed0 = simulateRetry(0x40003C00U, 0x42003C00U, 0U, 2.0h, originalValue); - printf("lane0 %.1f %u\n", float(originalValue), packed0); + outputBuffer[0] = uint(asuint16(originalValue)); + outputBuffer[1] = packed0; uint packed1 = simulateRetry(0x40003C00U, 0x40004200U, 16U, 1.0h, originalValue); - printf("lane1 %.1f %u\n", float(originalValue), packed1); + outputBuffer[2] = uint(asuint16(originalValue)); + outputBuffer[3] = packed1; } -// CHECK: lane0 1.0 1107313152 -// CHECK: lane1 2.0 1107313152 +// CHECK: 15360 +// CHECK: 1107313152 +// CHECK: 16384 +// CHECK: 1107313152 From 84558ccfe4416caa4d43b68dc3128a72fa3e308d Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 12:05:04 -0700 Subject: [PATCH 16/80] Restore fp16 vector fallback semantics --- source/slang/hlsl.meta.slang | 70 ++++--------------- ...byte-address-half-atomics-capability.slang | 18 ++--- .../byte-address-half-atomics-cas-pack.slang | 48 ------------- 3 files changed, 17 insertions(+), 119 deletions(-) delete mode 100644 tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-cas-pack.slang diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index e97fd358293..c4872c324c6 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -6493,8 +6493,7 @@ $} /// @return The 2 16-bit floating point values packed into a 32-bit unsigned integer. [__requiresNVAPI] [ForceInline] - [require(hlsl, sm_5_0)] - [require(cuda, cuda_sm_7_0)] + [require(cuda_hlsl, sm_5_0)] [require(spirv, spvAtomicFloat16VectorNV)] uint _NvInterlockedAddFp16x2(uint byteAddress, uint fp16x2Value) { @@ -6515,16 +6514,16 @@ $} /// @param originalValue The original value at `byteAddress` before the add operation. /// @remarks For SPIR-V, this function maps to `OpAtomicFAdd` on a `half` when /// `SPV_EXT_shader_atomic_float16_add` is available. When only - /// `SPV_NV_shader_atomic_fp16_vector` is selected, it uses a 32-bit compare-exchange - /// loop so the neighboring half lane is preserved. + /// `SPV_NV_shader_atomic_fp16_vector` is selected, it maps to `OpAtomicFAdd` on a + /// `half2` vector with the correct part set to `value` and the remaining part set + /// to 0. /// /// For HLSL, this function translates to an NVAPI call /// due to lack of native HLSL intrinsic for floating point atomic add. For CUDA, this function /// maps to `atomicAdd`. [__requiresNVAPI] [ForceInline] - [require(hlsl, sm_5_0)] - [require(cuda, cuda_sm_7_0)] + [require(cuda_hlsl, sm_5_0)] [require(spirv, spvAtomicFloat16VectorNV)] [require(spirv, spvAtomicFloat16AddEXT)] void InterlockedAddF16(uint byteAddress, half value, out half originalValue) @@ -6544,8 +6543,6 @@ $} originalValue = asfloat16((uint16_t)(_NvInterlockedAddFp16x2(byteAddress, packedInput) >> 16)); } return; - // Order matters for SPIR-V: when both fp16 atomic capabilities are enabled, - // keep the scalar OpAtomicFAdd path before the vector-capability CAS fallback. case spvAtomicFloat16AddEXT: default: { @@ -6555,29 +6552,16 @@ $} } case spvAtomicFloat16VectorNV: { - // A half2 atomic add with zero in the other lane is not bit-preserving - // for all IEEE half values, so use CAS on the containing 32-bit word. - let buf = __getEquivalentStructuredBuffer(this); - uint wordIndex = byteAddress / 4; - uint laneShift = (byteAddress & 2) == 0 ? 0U : 16U; - uint oldPacked = __atomic_add(buf[wordIndex], 0U); - - for (;;) + let buf = __getEquivalentStructuredBuffer(this); + if ((byteAddress & 2) == 0) { - half oldHalf; - uint newPacked = - __packInterlockedAddF16Result(oldPacked, laneShift, value, oldHalf); - uint actualPacked = - __atomic_compare_exchange(buf[wordIndex], oldPacked, newPacked); - if (__commitInterlockedAddF16Result( - actualPacked, - oldPacked, - oldHalf, - originalValue)) - { - return; - } + originalValue = __atomic_add(buf[byteAddress/4], half2(value, half(0.0))).x; } + else + { + originalValue = __atomic_add(buf[byteAddress/4], half2(half(0.0), value)).y; + } + return; } } } @@ -10169,34 +10153,6 @@ __generic vector dst(vector x, vector< __intrinsic_op($(kIROp_GetEquivalentStructuredBuffer)) RWStructuredBuffer __getEquivalentStructuredBuffer(RWByteAddressBuffer b); -[__NoSideEffect] -[ForceInline] -uint __packInterlockedAddF16Result(uint oldPacked, uint laneShift, half value, out half oldHalf) -{ - uint laneMask = 0xFFFFU << laneShift; - uint oldHalfBits = (oldPacked >> laneShift) & 0xFFFFU; - oldHalf = asfloat16(uint16_t(oldHalfBits)); - uint newHalfBits = uint(asuint16(oldHalf + value)); - return (oldPacked & ~laneMask) | (newHalfBits << laneShift); -} - -[ForceInline] -bool __commitInterlockedAddF16Result( - uint actualPacked, - inout uint oldPacked, - half oldHalf, - out half originalValue) -{ - originalValue = oldHalf; - if (actualPacked == oldPacked) - { - return true; - } - - oldPacked = actualPacked; - return false; -} - __intrinsic_op($(kIROp_GetEquivalentStructuredBuffer)) StructuredBuffer __getEquivalentStructuredBuffer(ByteAddressBuffer b); diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 4f83c1e4ddf..c7f21790c20 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -34,20 +34,10 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // FALLBACK-NOT: OpCapability AtomicFloat16AddEXT // FALLBACK-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" -// FALLBACK-NOT: OpCapability AtomicFloat16VectorNV -// FALLBACK-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" -// FALLBACK-NOT: OpAtomicFAddEXT -// FALLBACK: OpAtomicIAdd -// FALLBACK: OpAtomicCompareExchange -// FALLBACK: OpAtomicIAdd -// FALLBACK: OpAtomicCompareExchange -// FALLBACK: OpAtomicIAdd -// FALLBACK: OpAtomicCompareExchange -// FALLBACK: OpAtomicIAdd -// FALLBACK: OpAtomicCompareExchange -// FALLBACK: OpAtomicIAdd -// FALLBACK: OpAtomicCompareExchange -// FALLBACK-NOT: OpAtomicFAddEXT +// FALLBACK: OpCapability AtomicFloat16VectorNV +// FALLBACK: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// FALLBACK-COUNT-6: OpAtomicFAddEXT +// FALLBACK-NOT: OpAtomicCompareExchange // BOTH-NOT: OpCapability AtomicFloat16VectorNV // BOTH-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-cas-pack.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-cas-pack.slang deleted file mode 100644 index cdcf53aabda..00000000000 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-cas-pack.slang +++ /dev/null @@ -1,48 +0,0 @@ -//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK): -cpu -shaderobj -output-using-type - -//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer -RWStructuredBuffer outputBuffer; - -uint simulateRetry( - uint initialPacked, - uint failedActualPacked, - uint laneShift, - half value, - out half originalValue) -{ - uint oldPacked = initialPacked; - - half oldHalf; - uint newPacked = __packInterlockedAddF16Result(oldPacked, laneShift, value, oldHalf); - if (__commitInterlockedAddF16Result(failedActualPacked, oldPacked, oldHalf, originalValue)) - { - return newPacked; - } - - newPacked = __packInterlockedAddF16Result(oldPacked, laneShift, value, oldHalf); - uint successActualPacked = oldPacked; - if (__commitInterlockedAddF16Result(successActualPacked, oldPacked, oldHalf, originalValue)) - { - return newPacked; - } - - return 0U; -} - -[numthreads(1, 1, 1)] -void computeMain() -{ - half originalValue; - uint packed0 = simulateRetry(0x40003C00U, 0x42003C00U, 0U, 2.0h, originalValue); - outputBuffer[0] = uint(asuint16(originalValue)); - outputBuffer[1] = packed0; - - uint packed1 = simulateRetry(0x40003C00U, 0x40004200U, 16U, 1.0h, originalValue); - outputBuffer[2] = uint(asuint16(originalValue)); - outputBuffer[3] = packed1; -} - -// CHECK: 15360 -// CHECK: 1107313152 -// CHECK: 16384 -// CHECK: 1107313152 From 1db3352017b72d0be8018779c3d1a970d0eb7dff Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 12:29:57 -0700 Subject: [PATCH 17/80] Address fp16 vector review feedback --- source/slang/hlsl.meta.slang | 5 +- ...byte-address-half-atomics-capability.slang | 27 ++++++++++ tests/spirv/atomic-float16-vector.slang | 51 +++++++++++-------- 3 files changed, 60 insertions(+), 23 deletions(-) diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index c4872c324c6..2ddd423a3fc 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -6543,6 +6543,8 @@ $} originalValue = asfloat16((uint16_t)(_NvInterlockedAddFp16x2(byteAddress, packedInput) >> 16)); } return; + // With both SPIR-V fp16 atomic capabilities enabled, capability ranking selects + // this scalar add case over the vector fallback below. case spvAtomicFloat16AddEXT: default: { @@ -6579,7 +6581,8 @@ $} /// maps to `atomicAdd`. [__requiresNVAPI] [ForceInline] - [require(cuda_hlsl_spirv, sm_5_0)] + [require(cuda_hlsl, sm_5_0)] + [require(spirv, spvAtomicFloat16VectorNV)] void InterlockedAddF16Emulated(uint byteAddress, half value, out half originalValue) { __target_switch diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index c7f21790c20..20439820bce 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -1,10 +1,13 @@ //TEST:SIMPLE(filecheck=SCALAR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT //TEST:SIMPLE(filecheck=FALLBACK): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV +//TEST:SIMPLE(filecheck=FALLBACK_LANES): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV //TEST:SIMPLE(filecheck=BOTH): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT -capability spvAtomicFloat16VectorNV //TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_ATOMIC +//TEST:SIMPLE(filecheck=EMULATED): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_EMULATED //TEST:SIMPLE(filecheck=CUDA): -target cuda -entry computeMain -stage compute -capability cuda_sm_7_0 RWByteAddressBuffer tmpBuffer; +RWStructuredBuffer outputBuffer; RWStructuredBuffer vectorBuffer; [numthreads(1, 1, 1)] @@ -12,15 +15,26 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { #ifdef TEST_DIRECT_VECTOR_ATOMIC __atomic_add(vectorBuffer[0], half2(1.0h, 2.0h)); +#elif defined(TEST_EMULATED) + half originalValue; + tmpBuffer.InterlockedAddF16Emulated(0, 1.0h, originalValue); + outputBuffer[0] = float(originalValue); + tmpBuffer.InterlockedAddF16Emulated(2, 1.0h, originalValue); + outputBuffer[1] = float(originalValue); #else half originalValue; tmpBuffer.InterlockedAddF16(0, 1.0h, originalValue); + outputBuffer[0] = float(originalValue); tmpBuffer.InterlockedAddF16(2, 1.0h, originalValue); + outputBuffer[1] = float(originalValue); tmpBuffer.InterlockedAddF16(4, 1.0h, originalValue); + outputBuffer[2] = float(originalValue); tmpBuffer.InterlockedAddF16(6, 1.0h, originalValue); + outputBuffer[3] = float(originalValue); uint dynamicByteAddress = (dispatchThreadID.x & 1) * 2; tmpBuffer.InterlockedAddF16(dynamicByteAddress, 1.0h, originalValue); + outputBuffer[4] = float(originalValue); #endif } @@ -39,6 +53,13 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // FALLBACK-COUNT-6: OpAtomicFAddEXT // FALLBACK-NOT: OpAtomicCompareExchange +// FALLBACK_LANES-DAG: [[LOW:%[0-9]+]] = OpConstantComposite %v2half %half_0x1p_0 %half_0x0p_0 +// FALLBACK_LANES-DAG: [[HIGH:%[0-9]+]] = OpConstantComposite %v2half %half_0x0p_0 %half_0x1p_0 +// FALLBACK_LANES: [[LOW_ATOMIC:%[0-9]+]] = OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[LOW]] +// FALLBACK_LANES: OpCompositeExtract %half [[LOW_ATOMIC]] 0 +// FALLBACK_LANES: [[HIGH_ATOMIC:%[0-9]+]] = OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[HIGH]] +// FALLBACK_LANES: OpCompositeExtract %half [[HIGH_ATOMIC]] 1 + // BOTH-NOT: OpCapability AtomicFloat16VectorNV // BOTH-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" // BOTH: OpCapability AtomicFloat16AddEXT @@ -53,4 +74,10 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // VECTOR: OpCapability AtomicFloat16VectorNV // VECTOR: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// EMULATED-NOT: OpCapability AtomicFloat16AddEXT +// EMULATED-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// EMULATED: OpCapability AtomicFloat16VectorNV +// EMULATED: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// EMULATED-COUNT-2: OpAtomicFAddEXT + // CUDA-COUNT-5: atomicAdd( diff --git a/tests/spirv/atomic-float16-vector.slang b/tests/spirv/atomic-float16-vector.slang index f9602a948a7..d70d0633c17 100644 --- a/tests/spirv/atomic-float16-vector.slang +++ b/tests/spirv/atomic-float16-vector.slang @@ -1,22 +1,29 @@ -//TEST:SIMPLE(filecheck=CHECK):-target spirv -entry computeMain -stage compute -emit-spirv-directly - -//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer -RWStructuredBuffer outputBuffer; - -//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):name=workBuffer -RWStructuredBuffer workBuffer; - -[numthreads(1, 1, 1)] -void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) -{ - half2 originalValue; - - // Direct atomic operation on half2 should trigger the SPV_NV_shader_atomic_fp16_vector extension - originalValue = __atomic_add(workBuffer[0], half2(1.0h, 2.0h)); - - outputBuffer[0] = float(originalValue.x); - outputBuffer[1] = float(originalValue.y); -} - -// CHECK: OpCapability AtomicFloat16VectorNV -// CHECK: OpExtension "SPV_NV_shader_atomic_fp16_vector" +//TEST:SIMPLE(filecheck=CHECK):-target spirv -entry computeMain -stage compute -emit-spirv-directly + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer +RWStructuredBuffer outputBuffer; + +//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):name=workBuffer +RWStructuredBuffer workBuffer; + +[numthreads(1, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + half2 originalAdd; + half2 originalMin; + half2 originalMax; + + // Direct atomic operation on half2 should trigger the SPV_NV_shader_atomic_fp16_vector extension + originalAdd = __atomic_add(workBuffer[0], half2(1.0h, 2.0h)); + originalMin = __atomic_min(workBuffer[1], half2(1.0h, 2.0h)); + originalMax = __atomic_max(workBuffer[2], half2(1.0h, 2.0h)); + + outputBuffer[0] = float(originalAdd.x + originalMin.x + originalMax.x); + outputBuffer[1] = float(originalAdd.y + originalMin.y + originalMax.y); +} + +// CHECK: OpCapability AtomicFloat16VectorNV +// CHECK: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// CHECK: OpAtomicFAddEXT +// CHECK: OpAtomicFMinEXT +// CHECK: OpAtomicFMaxEXT From 72ffecbee7ddd68b9c79d1e75856b7ada49eb0a8 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 12:36:20 -0700 Subject: [PATCH 18/80] Tighten fp16 vector atomic checks --- tests/spirv/atomic-float16-vector.slang | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/spirv/atomic-float16-vector.slang b/tests/spirv/atomic-float16-vector.slang index d70d0633c17..a1585218e7d 100644 --- a/tests/spirv/atomic-float16-vector.slang +++ b/tests/spirv/atomic-float16-vector.slang @@ -22,8 +22,11 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) outputBuffer[1] = float(originalAdd.y + originalMin.y + originalMax.y); } +// CHECK-NOT: OpCapability AtomicFloat16AddEXT // CHECK: OpCapability AtomicFloat16VectorNV +// CHECK-NOT: OpCapability AtomicFloat16AddEXT // CHECK: OpExtension "SPV_NV_shader_atomic_fp16_vector" -// CHECK: OpAtomicFAddEXT -// CHECK: OpAtomicFMinEXT -// CHECK: OpAtomicFMaxEXT +// CHECK-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// CHECK-COUNT-1: OpAtomicFAddEXT +// CHECK-COUNT-1: OpAtomicFMinEXT +// CHECK-COUNT-1: OpAtomicFMaxEXT From 77f369600d9c1fc021911c63949072417697ab68 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 13:27:05 -0700 Subject: [PATCH 19/80] Avoid fp16 capability disjunction --- source/slang/hlsl.meta.slang | 22 ++---------- ...byte-address-half-atomics-capability.slang | 36 +++++++++---------- 2 files changed, 20 insertions(+), 38 deletions(-) diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 2ddd423a3fc..e4752216368 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -6513,10 +6513,8 @@ $} /// @param value The value to add to the value at `byteAddress`. /// @param originalValue The original value at `byteAddress` before the add operation. /// @remarks For SPIR-V, this function maps to `OpAtomicFAdd` on a `half` when - /// `SPV_EXT_shader_atomic_float16_add` is available. When only - /// `SPV_NV_shader_atomic_fp16_vector` is selected, it maps to `OpAtomicFAdd` on a - /// `half2` vector with the correct part set to `value` and the remaining part set - /// to 0. + /// `SPV_EXT_shader_atomic_float16_add` is available. Use `InterlockedAddF16Emulated` + /// for targets that only support `SPV_NV_shader_atomic_fp16_vector`. /// /// For HLSL, this function translates to an NVAPI call /// due to lack of native HLSL intrinsic for floating point atomic add. For CUDA, this function @@ -6524,7 +6522,6 @@ $} [__requiresNVAPI] [ForceInline] [require(cuda_hlsl, sm_5_0)] - [require(spirv, spvAtomicFloat16VectorNV)] [require(spirv, spvAtomicFloat16AddEXT)] void InterlockedAddF16(uint byteAddress, half value, out half originalValue) { @@ -6543,8 +6540,6 @@ $} originalValue = asfloat16((uint16_t)(_NvInterlockedAddFp16x2(byteAddress, packedInput) >> 16)); } return; - // With both SPIR-V fp16 atomic capabilities enabled, capability ranking selects - // this scalar add case over the vector fallback below. case spvAtomicFloat16AddEXT: default: { @@ -6552,19 +6547,6 @@ $} originalValue = __atomic_add(buf[byteAddress/2], value); return; } - case spvAtomicFloat16VectorNV: - { - let buf = __getEquivalentStructuredBuffer(this); - if ((byteAddress & 2) == 0) - { - originalValue = __atomic_add(buf[byteAddress/4], half2(value, half(0.0))).x; - } - else - { - originalValue = __atomic_add(buf[byteAddress/4], half2(half(0.0), value)).y; - } - return; - } } } diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 20439820bce..6733eaf0f95 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -1,6 +1,6 @@ //TEST:SIMPLE(filecheck=SCALAR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT -//TEST:SIMPLE(filecheck=FALLBACK): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -//TEST:SIMPLE(filecheck=FALLBACK_LANES): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV +//DIAGNOSTIC_TEST:SIMPLE(diag=NO_FP16_ATOMIC,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spirv_1_5 +//DIAGNOSTIC_TEST:SIMPLE(diag=VECTOR_ONLY,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16VectorNV //TEST:SIMPLE(filecheck=BOTH): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT -capability spvAtomicFloat16VectorNV //TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_ATOMIC //TEST:SIMPLE(filecheck=EMULATED): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_EMULATED @@ -24,6 +24,10 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) #else half originalValue; tmpBuffer.InterlockedAddF16(0, 1.0h, originalValue); +// NO_FP16_ATOMIC: entry point uses capabilities not in specified profile +// NO_FP16_ATOMIC: Missing capabilities are: 'spvAtomicFloat16AddEXT' +// VECTOR_ONLY: entry point uses capabilities not in specified profile +// VECTOR_ONLY: Missing capabilities are: 'spvAtomicFloat16AddEXT' outputBuffer[0] = float(originalValue); tmpBuffer.InterlockedAddF16(2, 1.0h, originalValue); outputBuffer[1] = float(originalValue); @@ -42,29 +46,19 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // SCALAR-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" // SCALAR: OpCapability AtomicFloat16AddEXT // SCALAR: OpExtension "SPV_EXT_shader_atomic_float16_add" -// SCALAR-COUNT-5: OpAtomicFAddEXT +// SCALAR-NOT: OpAtomicFAddEXT %v2half +// SCALAR-COUNT-5: OpAtomicFAddEXT %half +// SCALAR-NOT: OpAtomicFAddEXT %v2half // SCALAR-NOT: OpCapability AtomicFloat16VectorNV // SCALAR-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" -// FALLBACK-NOT: OpCapability AtomicFloat16AddEXT -// FALLBACK-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" -// FALLBACK: OpCapability AtomicFloat16VectorNV -// FALLBACK: OpExtension "SPV_NV_shader_atomic_fp16_vector" -// FALLBACK-COUNT-6: OpAtomicFAddEXT -// FALLBACK-NOT: OpAtomicCompareExchange - -// FALLBACK_LANES-DAG: [[LOW:%[0-9]+]] = OpConstantComposite %v2half %half_0x1p_0 %half_0x0p_0 -// FALLBACK_LANES-DAG: [[HIGH:%[0-9]+]] = OpConstantComposite %v2half %half_0x0p_0 %half_0x1p_0 -// FALLBACK_LANES: [[LOW_ATOMIC:%[0-9]+]] = OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[LOW]] -// FALLBACK_LANES: OpCompositeExtract %half [[LOW_ATOMIC]] 0 -// FALLBACK_LANES: [[HIGH_ATOMIC:%[0-9]+]] = OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[HIGH]] -// FALLBACK_LANES: OpCompositeExtract %half [[HIGH_ATOMIC]] 1 - // BOTH-NOT: OpCapability AtomicFloat16VectorNV // BOTH-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" // BOTH: OpCapability AtomicFloat16AddEXT // BOTH: OpExtension "SPV_EXT_shader_atomic_float16_add" -// BOTH-COUNT-5: OpAtomicFAddEXT +// BOTH-NOT: OpAtomicFAddEXT %v2half +// BOTH-COUNT-5: OpAtomicFAddEXT %half +// BOTH-NOT: OpAtomicFAddEXT %v2half // BOTH-NOT: OpAtomicCompareExchange // BOTH-NOT: OpCapability AtomicFloat16VectorNV // BOTH-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" @@ -79,5 +73,11 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // EMULATED: OpCapability AtomicFloat16VectorNV // EMULATED: OpExtension "SPV_NV_shader_atomic_fp16_vector" // EMULATED-COUNT-2: OpAtomicFAddEXT +// EMULATED-DAG: [[LOW:%[0-9]+]] = OpConstantComposite %v2half %half_0x1p_0 %half_0x0p_0 +// EMULATED-DAG: [[HIGH:%[0-9]+]] = OpConstantComposite %v2half %half_0x0p_0 %half_0x1p_0 +// EMULATED: [[LOW_ATOMIC:%[0-9]+]] = OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[LOW]] +// EMULATED: OpCompositeExtract %half [[LOW_ATOMIC]] 0 +// EMULATED: [[HIGH_ATOMIC:%[0-9]+]] = OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[HIGH]] +// EMULATED: OpCompositeExtract %half [[HIGH_ATOMIC]] 1 // CUDA-COUNT-5: atomicAdd( From d9e5cd42a29cdb7c7d0002dcd68ad596a629c42f Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 13:51:12 -0700 Subject: [PATCH 20/80] Address fp16 capability review gaps --- source/slang/hlsl.meta.slang | 9 ++++++--- .../byte-address-half-atomics-capability.slang | 3 +++ .../gl-nv-shader-atomic-fp16-vector-compatibility.slang | 2 ++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index e4752216368..5d1a0198202 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -6491,6 +6491,9 @@ $} /// @param byteAddress The address at which to perform the atomic add operation. /// @param fp16x2Value Two 16-bit floating point values are packed into a 32-bit unsigned integer. /// @return The 2 16-bit floating point values packed into a 32-bit unsigned integer. + /// @remarks For SPIR-V, this helper requires `SPV_NV_shader_atomic_fp16_vector` + /// and emits a `half2` `OpAtomicFAdd`; the packed fp16x2 representation matches + /// the NVAPI HLSL ABI, but the underlying operation is a vector atomic. [__requiresNVAPI] [ForceInline] [require(cuda_hlsl, sm_5_0)] @@ -6512,9 +6515,9 @@ $} /// @param byteAddress The address at which to perform the atomic add operation. /// @param value The value to add to the value at `byteAddress`. /// @param originalValue The original value at `byteAddress` before the add operation. - /// @remarks For SPIR-V, this function maps to `OpAtomicFAdd` on a `half` when - /// `SPV_EXT_shader_atomic_float16_add` is available. Use `InterlockedAddF16Emulated` - /// for targets that only support `SPV_NV_shader_atomic_fp16_vector`. + /// @remarks For SPIR-V, this function requires `SPV_EXT_shader_atomic_float16_add` + /// and maps to `OpAtomicFAdd` on a `half`. For targets that only support + /// `SPV_NV_shader_atomic_fp16_vector`, use `InterlockedAddF16Emulated` instead. /// /// For HLSL, this function translates to an NVAPI call /// due to lack of native HLSL intrinsic for floating point atomic add. For CUDA, this function diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 6733eaf0f95..8a07089662b 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -1,6 +1,7 @@ //TEST:SIMPLE(filecheck=SCALAR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT //DIAGNOSTIC_TEST:SIMPLE(diag=NO_FP16_ATOMIC,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spirv_1_5 //DIAGNOSTIC_TEST:SIMPLE(diag=VECTOR_ONLY,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16VectorNV +//DIAGNOSTIC_TEST:SIMPLE(diag=EMULATED_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_EMULATED //TEST:SIMPLE(filecheck=BOTH): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT -capability spvAtomicFloat16VectorNV //TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_ATOMIC //TEST:SIMPLE(filecheck=EMULATED): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_EMULATED @@ -18,6 +19,8 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) #elif defined(TEST_EMULATED) half originalValue; tmpBuffer.InterlockedAddF16Emulated(0, 1.0h, originalValue); +// EMULATED_NO_VECTOR: entry point uses capabilities not in specified profile +// EMULATED_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' outputBuffer[0] = float(originalValue); tmpBuffer.InterlockedAddF16Emulated(2, 1.0h, originalValue); outputBuffer[1] = float(originalValue); diff --git a/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang index 145b1586af5..4b1b9dafa00 100644 --- a/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang +++ b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang @@ -11,6 +11,8 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { #ifdef POSITIVE_VECTOR_ATOMIC __atomic_add(vectorBuffer[0], half2(1.0h, 2.0h)); +#else + vectorBuffer[0] = half2(1.0h, 2.0h); #endif } From 754e79a2952db6d9f8ac1bf2c0473cbad9af0a80 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 14:02:02 -0700 Subject: [PATCH 21/80] Diagnose half2 atomic vector capability --- source/slang/slang-emit-spirv.cpp | 67 +++++++++++++++++++ ...byte-address-half-atomics-capability.slang | 3 + 2 files changed, 70 insertions(+) diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 50e26c501ff..80b807a664f 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -4412,6 +4412,67 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex } } + Dictionary> m_diagnosedRestrictiveCaps; + void maybeDiagnoseRestrictiveCapabilityUse(IRInst* inst, CapabilityName capabilityName) + { + auto& optionSet = m_targetProgram->getOptionSet(); + if (!optionSet.getBoolOption(CompilerOptionName::RestrictiveCapabilityCheck) || + optionSet.getBoolOption(CompilerOptionName::IgnoreCapabilities)) + return; + + auto parentFunc = getParentFunc(inst); + if (!parentFunc) + return; + + HashSet* entryPoints = + getReferencingEntryPoints(m_referencingEntryPoints, parentFunc); + if (!entryPoints) + return; + + for (auto entryPoint : *entryPoints) + { + IREntryPointDecoration* entryPointDecor = + entryPoint->findDecoration(); + if (!entryPointDecor) + continue; + + CapabilitySet stageTargetCaps = m_targetProgram->getTargetReq()->getTargetCaps(); + CapabilitySet stageCapabilitySet = entryPointDecor->getProfile().getCapabilityName(); + CapabilitySet required(capabilityName); + stageTargetCaps.join(stageCapabilitySet); + required.join(stageCapabilitySet); + + if (stageTargetCaps.atLeastOneSetImpliedInOther(required) == + CapabilitySet::ImpliesReturnFlags::Implied) + continue; + + CapabilityAtomSet addedAtoms{}; + if (auto stageCapSet = stageTargetCaps.getAtomSets()) + { + if (auto requiredSet = required.getAtomSets()) + { + CapabilityAtomSet::calcSubtract(addedAtoms, (*requiredSet), (*stageCapSet)); + } + } + + StringBuilder capsSb; + printDiagnosticArg(capsSb, addedAtoms); + String missingCapsStr = capsSb.toString(); + if (!m_diagnosedRestrictiveCaps[entryPoint].add(missingCapsStr)) + continue; + + StringBuilder entryPointSb; + printDiagnosticArg(entryPointSb, entryPoint); + + m_sink->diagnose(Diagnostics::ProfileImplicitlyUpgradedRestrictive{ + .entryPoint = entryPointSb.toString(), + .profile = optionSet.getProfile().getName(), + .capabilities = missingCapsStr, + .location = entryPoint->sourceLoc, + }); + } + } + void ensureAtomicCapability(IRInst* atomicInst, SpvOp op) { auto typeOp = atomicInst->getDataType()->getOp(); @@ -4448,6 +4509,9 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex if (as(atomicInst->getDataType())->getElementType()->getOp() == kIROp_HalfType) { + maybeDiagnoseRestrictiveCapabilityUse( + atomicInst, + CapabilityName::spvAtomicFloat16VectorNV); ensureExtensionDeclaration(toSlice("SPV_NV_shader_atomic_fp16_vector")); requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV); } @@ -4476,6 +4540,9 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex if (as(atomicInst->getDataType())->getElementType()->getOp() == kIROp_HalfType) { + maybeDiagnoseRestrictiveCapabilityUse( + atomicInst, + CapabilityName::spvAtomicFloat16VectorNV); ensureExtensionDeclaration(toSlice("SPV_NV_shader_atomic_fp16_vector")); requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV); } diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 8a07089662b..1d8cf236688 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -1,6 +1,7 @@ //TEST:SIMPLE(filecheck=SCALAR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT //DIAGNOSTIC_TEST:SIMPLE(diag=NO_FP16_ATOMIC,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spirv_1_5 //DIAGNOSTIC_TEST:SIMPLE(diag=VECTOR_ONLY,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16VectorNV +//DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_ATOMIC //DIAGNOSTIC_TEST:SIMPLE(diag=EMULATED_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_EMULATED //TEST:SIMPLE(filecheck=BOTH): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT -capability spvAtomicFloat16VectorNV //TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_ATOMIC @@ -16,6 +17,8 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { #ifdef TEST_DIRECT_VECTOR_ATOMIC __atomic_add(vectorBuffer[0], half2(1.0h, 2.0h)); +// DIRECT_NO_VECTOR: entry point uses capabilities not in specified profile +// DIRECT_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' #elif defined(TEST_EMULATED) half originalValue; tmpBuffer.InterlockedAddF16Emulated(0, 1.0h, originalValue); From b42c27dec1be95bfadfe0075c2f78f314d496abc Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 14:20:20 -0700 Subject: [PATCH 22/80] Tighten fp16 vector atomic checks --- source/slang/slang-emit-spirv.cpp | 36 +++++++++++-------- ...byte-address-half-atomics-capability.slang | 3 +- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 80b807a664f..ef20bcdf8d6 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -4475,7 +4475,8 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex void ensureAtomicCapability(IRInst* atomicInst, SpvOp op) { - auto typeOp = atomicInst->getDataType()->getOp(); + IRType* atomicValueType = atomicInst->getDataType(); + auto typeOp = atomicValueType->getOp(); if (typeOp == kIROp_VoidType) { auto ptrType = atomicInst->getOperand(0)->getDataType(); @@ -4484,6 +4485,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex { if (auto atomicType = as(valType)) valType = atomicType->getElementType(); + atomicValueType = valType; typeOp = valType->getOp(); } } @@ -4506,14 +4508,16 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex requireSPIRVCapability(SpvCapabilityAtomicFloat16AddEXT); break; case kIROp_VectorType: - if (as(atomicInst->getDataType())->getElementType()->getOp() == - kIROp_HalfType) + if (auto vectorType = as(atomicValueType)) { - maybeDiagnoseRestrictiveCapabilityUse( - atomicInst, - CapabilityName::spvAtomicFloat16VectorNV); - ensureExtensionDeclaration(toSlice("SPV_NV_shader_atomic_fp16_vector")); - requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV); + if (vectorType->getElementType()->getOp() == kIROp_HalfType) + { + maybeDiagnoseRestrictiveCapabilityUse( + atomicInst, + CapabilityName::spvAtomicFloat16VectorNV); + ensureExtensionDeclaration(toSlice("SPV_NV_shader_atomic_fp16_vector")); + requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV); + } } break; } @@ -4537,14 +4541,16 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex requireSPIRVCapability(SpvCapabilityAtomicFloat16MinMaxEXT); break; case kIROp_VectorType: - if (as(atomicInst->getDataType())->getElementType()->getOp() == - kIROp_HalfType) + if (auto vectorType = as(atomicValueType)) { - maybeDiagnoseRestrictiveCapabilityUse( - atomicInst, - CapabilityName::spvAtomicFloat16VectorNV); - ensureExtensionDeclaration(toSlice("SPV_NV_shader_atomic_fp16_vector")); - requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV); + if (vectorType->getElementType()->getOp() == kIROp_HalfType) + { + maybeDiagnoseRestrictiveCapabilityUse( + atomicInst, + CapabilityName::spvAtomicFloat16VectorNV); + ensureExtensionDeclaration(toSlice("SPV_NV_shader_atomic_fp16_vector")); + requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV); + } } break; } diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 1d8cf236688..71d2ed0cc4d 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -78,12 +78,13 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // EMULATED-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" // EMULATED: OpCapability AtomicFloat16VectorNV // EMULATED: OpExtension "SPV_NV_shader_atomic_fp16_vector" -// EMULATED-COUNT-2: OpAtomicFAddEXT // EMULATED-DAG: [[LOW:%[0-9]+]] = OpConstantComposite %v2half %half_0x1p_0 %half_0x0p_0 // EMULATED-DAG: [[HIGH:%[0-9]+]] = OpConstantComposite %v2half %half_0x0p_0 %half_0x1p_0 +// EMULATED-NOT: OpAtomicFAddEXT // EMULATED: [[LOW_ATOMIC:%[0-9]+]] = OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[LOW]] // EMULATED: OpCompositeExtract %half [[LOW_ATOMIC]] 0 // EMULATED: [[HIGH_ATOMIC:%[0-9]+]] = OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[HIGH]] // EMULATED: OpCompositeExtract %half [[HIGH_ATOMIC]] 1 +// EMULATED-NOT: OpAtomicFAddEXT // CUDA-COUNT-5: atomicAdd( From bd156fa6d6fde9c7bd6d3cb9940ccc971871ab60 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 15:58:31 -0700 Subject: [PATCH 23/80] Cover fp16 vector min max review gaps --- .../a3-02-reference-capability-atoms.md | 4 ++-- source/slang/slang-capabilities.capdef | 4 ++-- .../byte-address-half-atomics-capability.slang | 16 ++++++++++++++++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/docs/user-guide/a3-02-reference-capability-atoms.md b/docs/user-guide/a3-02-reference-capability-atoms.md index 784c90c96b7..8b08b38cb01 100644 --- a/docs/user-guide/a3-02-reference-capability-atoms.md +++ b/docs/user-guide/a3-02-reference-capability-atoms.md @@ -705,7 +705,7 @@ Extensions > Represents the SPIR-V extension for ray tracing motion blur. `SPV_NV_shader_atomic_fp16_vector` -> Represents the SPIR-V extension for vector atomic float 16 add/min/max operations. +> Represents the SPIR-V extension for vector atomic float 16 add/min/max/exchange operations. `SPV_NV_shader_image_footprint` > Represents the SPIR-V extension for shader image footprint. @@ -730,7 +730,7 @@ Extensions > Represents the SPIR-V capability for atomic float 16 min/max operations. `spvAtomicFloat16VectorNV` -> Represents the SPIR-V capability for vector atomic float 16 add/min/max operations. +> Represents the SPIR-V capability for vector atomic float 16 add/min/max/exchange operations. `spvAtomicFloat32AddEXT` > Represents the SPIR-V capability for atomic float 32 add operations. diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index 845ce567e1f..31bac24974d 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -543,7 +543,7 @@ def SPV_EXT_shader_atomic_float_add : _spirv_1_0; /// [EXT] def SPV_EXT_shader_atomic_float16_add : SPV_EXT_shader_atomic_float_add; -/// Represents the SPIR-V extension for vector atomic float 16 add/min/max operations. +/// Represents the SPIR-V extension for vector atomic float 16 add/min/max/exchange operations. /// [EXT] def SPV_NV_shader_atomic_fp16_vector : _spirv_1_0; @@ -704,7 +704,7 @@ def spvAtomicFloat32AddEXT : SPV_EXT_shader_atomic_float_add; /// [EXT] def spvAtomicFloat16AddEXT : SPV_EXT_shader_atomic_float16_add; -/// Represents the SPIR-V capability for vector atomic float 16 add/min/max operations. +/// Represents the SPIR-V capability for vector atomic float 16 add/min/max/exchange operations. /// [EXT] def spvAtomicFloat16VectorNV : SPV_NV_shader_atomic_fp16_vector; diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 71d2ed0cc4d..992b83770e5 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -2,9 +2,11 @@ //DIAGNOSTIC_TEST:SIMPLE(diag=NO_FP16_ATOMIC,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spirv_1_5 //DIAGNOSTIC_TEST:SIMPLE(diag=VECTOR_ONLY,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16VectorNV //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_ATOMIC +//DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_MIN_MAX_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_MIN_MAX //DIAGNOSTIC_TEST:SIMPLE(diag=EMULATED_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_EMULATED //TEST:SIMPLE(filecheck=BOTH): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT -capability spvAtomicFloat16VectorNV //TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_ATOMIC +//TEST:SIMPLE(filecheck=VECTOR_MIN_MAX): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_MIN_MAX //TEST:SIMPLE(filecheck=EMULATED): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_EMULATED //TEST:SIMPLE(filecheck=CUDA): -target cuda -entry computeMain -stage compute -capability cuda_sm_7_0 @@ -19,6 +21,11 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) __atomic_add(vectorBuffer[0], half2(1.0h, 2.0h)); // DIRECT_NO_VECTOR: entry point uses capabilities not in specified profile // DIRECT_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' +#elif defined(TEST_DIRECT_VECTOR_MIN_MAX) + __atomic_min(vectorBuffer[0], half2(1.0h, 2.0h)); +// DIRECT_MIN_MAX_NO_VECTOR: entry point uses capabilities not in specified profile +// DIRECT_MIN_MAX_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' + __atomic_max(vectorBuffer[1], half2(3.0h, 4.0h)); #elif defined(TEST_EMULATED) half originalValue; tmpBuffer.InterlockedAddF16Emulated(0, 1.0h, originalValue); @@ -74,6 +81,15 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // VECTOR: OpCapability AtomicFloat16VectorNV // VECTOR: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// VECTOR_MIN_MAX-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR_MIN_MAX-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// VECTOR_MIN_MAX: OpCapability AtomicFloat16VectorNV +// VECTOR_MIN_MAX: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// VECTOR_MIN_MAX: OpAtomicFMinEXT %v2half +// VECTOR_MIN_MAX: OpAtomicFMaxEXT %v2half +// VECTOR_MIN_MAX-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR_MIN_MAX-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" + // EMULATED-NOT: OpCapability AtomicFloat16AddEXT // EMULATED-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" // EMULATED: OpCapability AtomicFloat16VectorNV From f9b9469f8ee428dac80136d93d221cd138f4d336 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 16:18:20 -0700 Subject: [PATCH 24/80] Wire fp16 vector atomic exchange capability --- source/slang/slang-emit-spirv.cpp | 48 ++++++++++--------- ...byte-address-half-atomics-capability.slang | 15 ++++++ tests/spirv/atomic-float16-vector.slang | 7 ++- 3 files changed, 46 insertions(+), 24 deletions(-) diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index ef20bcdf8d6..2a54d59bb33 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -4489,8 +4489,32 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex typeOp = valType->getOp(); } } + + auto maybeRequireFp16VectorAtomicCapability = [&](IRType* valueType) + { + auto vectorType = as(valueType); + if (!vectorType || vectorType->getElementType()->getOp() != kIROp_HalfType) + return false; + + auto elementCountInst = as(vectorType->getElementCount()); + if (!elementCountInst) + return false; + + auto elementCount = elementCountInst->getValue(); + if (elementCount != 2 && elementCount != 4) + return false; + + maybeDiagnoseRestrictiveCapabilityUse(atomicInst, CapabilityName::spvAtomicFloat16VectorNV); + ensureExtensionDeclaration(toSlice("SPV_NV_shader_atomic_fp16_vector")); + requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV); + return true; + }; + switch (op) { + case SpvOpAtomicExchange: + maybeRequireFp16VectorAtomicCapability(atomicValueType); + break; case SpvOpAtomicFAddEXT: { switch (typeOp) @@ -4508,17 +4532,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex requireSPIRVCapability(SpvCapabilityAtomicFloat16AddEXT); break; case kIROp_VectorType: - if (auto vectorType = as(atomicValueType)) - { - if (vectorType->getElementType()->getOp() == kIROp_HalfType) - { - maybeDiagnoseRestrictiveCapabilityUse( - atomicInst, - CapabilityName::spvAtomicFloat16VectorNV); - ensureExtensionDeclaration(toSlice("SPV_NV_shader_atomic_fp16_vector")); - requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV); - } - } + maybeRequireFp16VectorAtomicCapability(atomicValueType); break; } } @@ -4541,17 +4555,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex requireSPIRVCapability(SpvCapabilityAtomicFloat16MinMaxEXT); break; case kIROp_VectorType: - if (auto vectorType = as(atomicValueType)) - { - if (vectorType->getElementType()->getOp() == kIROp_HalfType) - { - maybeDiagnoseRestrictiveCapabilityUse( - atomicInst, - CapabilityName::spvAtomicFloat16VectorNV); - ensureExtensionDeclaration(toSlice("SPV_NV_shader_atomic_fp16_vector")); - requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV); - } - } + maybeRequireFp16VectorAtomicCapability(atomicValueType); break; } } diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 992b83770e5..5b404ec8174 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -3,10 +3,12 @@ //DIAGNOSTIC_TEST:SIMPLE(diag=VECTOR_ONLY,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16VectorNV //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_ATOMIC //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_MIN_MAX_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_MIN_MAX +//DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_EXCHANGE_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_EXCHANGE //DIAGNOSTIC_TEST:SIMPLE(diag=EMULATED_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_EMULATED //TEST:SIMPLE(filecheck=BOTH): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT -capability spvAtomicFloat16VectorNV //TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_ATOMIC //TEST:SIMPLE(filecheck=VECTOR_MIN_MAX): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_MIN_MAX +//TEST:SIMPLE(filecheck=VECTOR_EXCHANGE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_EXCHANGE //TEST:SIMPLE(filecheck=EMULATED): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_EMULATED //TEST:SIMPLE(filecheck=CUDA): -target cuda -entry computeMain -stage compute -capability cuda_sm_7_0 @@ -26,6 +28,11 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // DIRECT_MIN_MAX_NO_VECTOR: entry point uses capabilities not in specified profile // DIRECT_MIN_MAX_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' __atomic_max(vectorBuffer[1], half2(3.0h, 4.0h)); +#elif defined(TEST_DIRECT_VECTOR_EXCHANGE) + half2 originalValue = __atomic_exchange(vectorBuffer[0], half2(1.0h, 2.0h)); +// DIRECT_EXCHANGE_NO_VECTOR: entry point uses capabilities not in specified profile +// DIRECT_EXCHANGE_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' + outputBuffer[0] = float(originalValue.x + originalValue.y); #elif defined(TEST_EMULATED) half originalValue; tmpBuffer.InterlockedAddF16Emulated(0, 1.0h, originalValue); @@ -90,6 +97,14 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // VECTOR_MIN_MAX-NOT: OpCapability AtomicFloat16AddEXT // VECTOR_MIN_MAX-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// VECTOR_EXCHANGE-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR_EXCHANGE-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// VECTOR_EXCHANGE: OpCapability AtomicFloat16VectorNV +// VECTOR_EXCHANGE: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// VECTOR_EXCHANGE: OpAtomicExchange %v2half +// VECTOR_EXCHANGE-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR_EXCHANGE-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" + // EMULATED-NOT: OpCapability AtomicFloat16AddEXT // EMULATED-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" // EMULATED: OpCapability AtomicFloat16VectorNV diff --git a/tests/spirv/atomic-float16-vector.slang b/tests/spirv/atomic-float16-vector.slang index a1585218e7d..91cfa8d159d 100644 --- a/tests/spirv/atomic-float16-vector.slang +++ b/tests/spirv/atomic-float16-vector.slang @@ -12,14 +12,16 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) half2 originalAdd; half2 originalMin; half2 originalMax; + half2 originalExchange; // Direct atomic operation on half2 should trigger the SPV_NV_shader_atomic_fp16_vector extension originalAdd = __atomic_add(workBuffer[0], half2(1.0h, 2.0h)); originalMin = __atomic_min(workBuffer[1], half2(1.0h, 2.0h)); originalMax = __atomic_max(workBuffer[2], half2(1.0h, 2.0h)); + originalExchange = __atomic_exchange(workBuffer[3], half2(3.0h, 4.0h)); - outputBuffer[0] = float(originalAdd.x + originalMin.x + originalMax.x); - outputBuffer[1] = float(originalAdd.y + originalMin.y + originalMax.y); + outputBuffer[0] = float(originalAdd.x + originalMin.x + originalMax.x + originalExchange.x); + outputBuffer[1] = float(originalAdd.y + originalMin.y + originalMax.y + originalExchange.y); } // CHECK-NOT: OpCapability AtomicFloat16AddEXT @@ -30,3 +32,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // CHECK-COUNT-1: OpAtomicFAddEXT // CHECK-COUNT-1: OpAtomicFMinEXT // CHECK-COUNT-1: OpAtomicFMaxEXT +// CHECK-COUNT-1: OpAtomicExchange %v2half From 0b27dc395aa5cccd5b64fa62d70fcc0250727759 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 16:19:58 -0700 Subject: [PATCH 25/80] Cover fp16 vector alias min max atomics --- tests/spirv/atomic-float16-vector.slang | 6 +++--- .../gl-nv-shader-atomic-fp16-vector-compatibility.slang | 9 +++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tests/spirv/atomic-float16-vector.slang b/tests/spirv/atomic-float16-vector.slang index 91cfa8d159d..3cdb080dc5b 100644 --- a/tests/spirv/atomic-float16-vector.slang +++ b/tests/spirv/atomic-float16-vector.slang @@ -29,7 +29,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // CHECK-NOT: OpCapability AtomicFloat16AddEXT // CHECK: OpExtension "SPV_NV_shader_atomic_fp16_vector" // CHECK-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" -// CHECK-COUNT-1: OpAtomicFAddEXT -// CHECK-COUNT-1: OpAtomicFMinEXT -// CHECK-COUNT-1: OpAtomicFMaxEXT +// CHECK-COUNT-1: OpAtomicFAddEXT %v2half +// CHECK-COUNT-1: OpAtomicFMinEXT %v2half +// CHECK-COUNT-1: OpAtomicFMaxEXT %v2half // CHECK-COUNT-1: OpAtomicExchange %v2half diff --git a/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang index 4b1b9dafa00..706674aadf8 100644 --- a/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang +++ b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang @@ -1,5 +1,6 @@ //TEST:SIMPLE(filecheck=NEGATIVE): -target spirv -entry computeMain -stage compute -emit-spirv-directly //TEST:SIMPLE(filecheck=POSITIVE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_ATOMIC +//TEST:SIMPLE(filecheck=POSITIVE_MIN_MAX): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_MIN_MAX // Requiring the GLSL extension should preserve the old SPIR-V 1.0 compatibility path and // should not declare the NV vector capability until a vector atomic operation needs it. @@ -11,6 +12,9 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { #ifdef POSITIVE_VECTOR_ATOMIC __atomic_add(vectorBuffer[0], half2(1.0h, 2.0h)); +#elif defined(POSITIVE_VECTOR_MIN_MAX) + __atomic_min(vectorBuffer[0], half2(1.0h, 2.0h)); + __atomic_max(vectorBuffer[1], half2(3.0h, 4.0h)); #else vectorBuffer[0] = half2(1.0h, 2.0h); #endif @@ -22,3 +26,8 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // POSITIVE: OpCapability AtomicFloat16VectorNV // POSITIVE: OpExtension "SPV_NV_shader_atomic_fp16_vector" + +// POSITIVE_MIN_MAX: OpCapability AtomicFloat16VectorNV +// POSITIVE_MIN_MAX: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// POSITIVE_MIN_MAX: OpAtomicFMinEXT %v2half +// POSITIVE_MIN_MAX: OpAtomicFMaxEXT %v2half From 762b16abedac6e9a1b2c1f966f31ae8769455e04 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 16:37:22 -0700 Subject: [PATCH 26/80] Diagnose unsupported fp16 vector atomic width --- source/slang/slang-emit-spirv.cpp | 16 ++++++++++++++-- .../byte-address-half-atomics-capability.slang | 5 +++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 2a54d59bb33..2c54e09557d 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -4498,11 +4498,23 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex auto elementCountInst = as(vectorType->getElementCount()); if (!elementCountInst) - return false; + { + SLANG_DIAGNOSE_UNEXPECTED( + m_sink, + atomicInst, + "SPIR-V fp16 vector atomics require a constant half2 or half4 type."); + return true; + } auto elementCount = elementCountInst->getValue(); if (elementCount != 2 && elementCount != 4) - return false; + { + SLANG_DIAGNOSE_UNEXPECTED( + m_sink, + atomicInst, + "SPIR-V fp16 vector atomics only support half2 and half4."); + return true; + } maybeDiagnoseRestrictiveCapabilityUse(atomicInst, CapabilityName::spvAtomicFloat16VectorNV); ensureExtensionDeclaration(toSlice("SPV_NV_shader_atomic_fp16_vector")); diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 5b404ec8174..b0cf57b1227 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -4,6 +4,7 @@ //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_ATOMIC //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_MIN_MAX_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_MIN_MAX //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_EXCHANGE_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_EXCHANGE +//DIAGNOSTIC_TEST:SIMPLE(diag=UNSUPPORTED_VECTOR_WIDTH,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_UNSUPPORTED_VECTOR_WIDTH //DIAGNOSTIC_TEST:SIMPLE(diag=EMULATED_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_EMULATED //TEST:SIMPLE(filecheck=BOTH): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT -capability spvAtomicFloat16VectorNV //TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_ATOMIC @@ -15,6 +16,7 @@ RWByteAddressBuffer tmpBuffer; RWStructuredBuffer outputBuffer; RWStructuredBuffer vectorBuffer; +RWStructuredBuffer unsupportedVectorBuffer; [numthreads(1, 1, 1)] void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) @@ -33,6 +35,9 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // DIRECT_EXCHANGE_NO_VECTOR: entry point uses capabilities not in specified profile // DIRECT_EXCHANGE_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' outputBuffer[0] = float(originalValue.x + originalValue.y); +#elif defined(TEST_UNSUPPORTED_VECTOR_WIDTH) + __atomic_add(unsupportedVectorBuffer[0], half3(1.0h, 2.0h, 3.0h)); +// UNSUPPORTED_VECTOR_WIDTH: SPIR-V fp16 vector atomics only support half2 and half4. #elif defined(TEST_EMULATED) half originalValue; tmpBuffer.InterlockedAddF16Emulated(0, 1.0h, originalValue); From 52ea0bc1041ecd8648e5a4304f22073b89af3009 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 16:40:59 -0700 Subject: [PATCH 27/80] Cover fp16 vector atomic review gaps --- .../byte-address-half-atomics-capability.slang | 2 ++ tests/spirv/atomic-float16-vector.slang | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index b0cf57b1227..1f9fb9002fd 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -6,6 +6,7 @@ //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_EXCHANGE_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_EXCHANGE //DIAGNOSTIC_TEST:SIMPLE(diag=UNSUPPORTED_VECTOR_WIDTH,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_UNSUPPORTED_VECTOR_WIDTH //DIAGNOSTIC_TEST:SIMPLE(diag=EMULATED_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_EMULATED +//TEST:SIMPLE(filecheck=IGNORE_CAPS): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -ignore-capabilities -capability spirv_1_5 //TEST:SIMPLE(filecheck=BOTH): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT -capability spvAtomicFloat16VectorNV //TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_ATOMIC //TEST:SIMPLE(filecheck=VECTOR_MIN_MAX): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_MIN_MAX @@ -53,6 +54,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // NO_FP16_ATOMIC: Missing capabilities are: 'spvAtomicFloat16AddEXT' // VECTOR_ONLY: entry point uses capabilities not in specified profile // VECTOR_ONLY: Missing capabilities are: 'spvAtomicFloat16AddEXT' +// IGNORE_CAPS-NOT: entry point uses capabilities not in specified profile outputBuffer[0] = float(originalValue); tmpBuffer.InterlockedAddF16(2, 1.0h, originalValue); outputBuffer[1] = float(originalValue); diff --git a/tests/spirv/atomic-float16-vector.slang b/tests/spirv/atomic-float16-vector.slang index 3cdb080dc5b..e305549b1f1 100644 --- a/tests/spirv/atomic-float16-vector.slang +++ b/tests/spirv/atomic-float16-vector.slang @@ -5,6 +5,7 @@ RWStructuredBuffer outputBuffer; //TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):name=workBuffer RWStructuredBuffer workBuffer; +RWStructuredBuffer workBuffer4; [numthreads(1, 1, 1)] void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) @@ -13,15 +14,19 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) half2 originalMin; half2 originalMax; half2 originalExchange; + half4 originalAdd4; // Direct atomic operation on half2 should trigger the SPV_NV_shader_atomic_fp16_vector extension originalAdd = __atomic_add(workBuffer[0], half2(1.0h, 2.0h)); originalMin = __atomic_min(workBuffer[1], half2(1.0h, 2.0h)); originalMax = __atomic_max(workBuffer[2], half2(1.0h, 2.0h)); originalExchange = __atomic_exchange(workBuffer[3], half2(3.0h, 4.0h)); + originalAdd4 = __atomic_add(workBuffer4[0], half4(1.0h, 2.0h, 3.0h, 4.0h)); outputBuffer[0] = float(originalAdd.x + originalMin.x + originalMax.x + originalExchange.x); outputBuffer[1] = float(originalAdd.y + originalMin.y + originalMax.y + originalExchange.y); + outputBuffer[2] = float(originalAdd4.x + originalAdd4.y); + outputBuffer[3] = float(originalAdd4.z + originalAdd4.w); } // CHECK-NOT: OpCapability AtomicFloat16AddEXT @@ -33,3 +38,4 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // CHECK-COUNT-1: OpAtomicFMinEXT %v2half // CHECK-COUNT-1: OpAtomicFMaxEXT %v2half // CHECK-COUNT-1: OpAtomicExchange %v2half +// CHECK-COUNT-1: OpAtomicFAddEXT %v4half From f01ac0f4f223ee79367ebace1b79e2a88270b3be Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 16:50:04 -0700 Subject: [PATCH 28/80] Assert fp16 vector add opcode --- .../byte-address-half-atomics-capability.slang | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 1f9fb9002fd..f1e7f43fdb5 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -94,6 +94,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // VECTOR-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" // VECTOR: OpCapability AtomicFloat16VectorNV // VECTOR: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// VECTOR: OpAtomicFAddEXT %v2half // VECTOR_MIN_MAX-NOT: OpCapability AtomicFloat16AddEXT // VECTOR_MIN_MAX-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" From dba2059ccf80db3ce96d7a50e2011aae9c947037 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 16:57:44 -0700 Subject: [PATCH 29/80] Format fp16 vector atomic capability call --- source/slang/slang-emit-spirv.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 2c54e09557d..b51ec34069b 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -4516,7 +4516,9 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex return true; } - maybeDiagnoseRestrictiveCapabilityUse(atomicInst, CapabilityName::spvAtomicFloat16VectorNV); + maybeDiagnoseRestrictiveCapabilityUse( + atomicInst, + CapabilityName::spvAtomicFloat16VectorNV); ensureExtensionDeclaration(toSlice("SPV_NV_shader_atomic_fp16_vector")); requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV); return true; From bbbe5ca7e29938b8220d537c9882490e86fffd24 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 17:29:44 -0700 Subject: [PATCH 30/80] Tighten pointer fp16 vector atomic capability --- source/slang/hlsl.meta.slang | 6 ++++-- .../byte-address-half-atomics-capability.slang | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 5d1a0198202..511cf6275ed 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -6544,6 +6544,8 @@ $} } return; case spvAtomicFloat16AddEXT: + // Keep the explicit scalar capability case so this arm wins over the + // SPIR-V default when scalar fp16 atomic add is available. default: { let buf = __getEquivalentStructuredBuffer(this); @@ -33750,7 +33752,7 @@ uint packHalf2x16(half2 unpackedValue) return packHalf2x16(float2(unpackedValue)); } -[require(spirv)] +[require(spirv, spvAtomicFloat16VectorNV)] void InterlockedAddF16Emulated(half* dest, half value, out half originalValue) { let buf = (half2*)(dest); @@ -33765,7 +33767,7 @@ void InterlockedAddF16Emulated(half* dest, half value, out half originalValue) } } -[require(spirv)] +[require(spirv, spvAtomicFloat16VectorNV)] void InterlockedAddF16x2(half2* dest, half2 value, out half2 originalValue) { originalValue = __atomic_add(*dest, value); diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index f1e7f43fdb5..902b34504b6 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -6,6 +6,8 @@ //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_EXCHANGE_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_EXCHANGE //DIAGNOSTIC_TEST:SIMPLE(diag=UNSUPPORTED_VECTOR_WIDTH,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_UNSUPPORTED_VECTOR_WIDTH //DIAGNOSTIC_TEST:SIMPLE(diag=EMULATED_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_EMULATED +//DIAGNOSTIC_TEST:SIMPLE(diag=POINTER_EMULATED_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_POINTER_EMULATED +//DIAGNOSTIC_TEST:SIMPLE(diag=POINTER_F16X2_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_POINTER_F16X2 //TEST:SIMPLE(filecheck=IGNORE_CAPS): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -ignore-capabilities -capability spirv_1_5 //TEST:SIMPLE(filecheck=BOTH): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT -capability spvAtomicFloat16VectorNV //TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_ATOMIC @@ -16,6 +18,7 @@ RWByteAddressBuffer tmpBuffer; RWStructuredBuffer outputBuffer; +RWStructuredBuffer halfBuffer; RWStructuredBuffer vectorBuffer; RWStructuredBuffer unsupportedVectorBuffer; @@ -47,6 +50,18 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) outputBuffer[0] = float(originalValue); tmpBuffer.InterlockedAddF16Emulated(2, 1.0h, originalValue); outputBuffer[1] = float(originalValue); +#elif defined(TEST_POINTER_EMULATED) + half originalValue; + InterlockedAddF16Emulated(&halfBuffer[0], 1.0h, originalValue); +// POINTER_EMULATED_NO_VECTOR: entry point uses capabilities not in specified profile +// POINTER_EMULATED_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' + outputBuffer[0] = float(originalValue); +#elif defined(TEST_POINTER_F16X2) + half2 originalValue; + InterlockedAddF16x2(&vectorBuffer[0], half2(1.0h, 2.0h), originalValue); +// POINTER_F16X2_NO_VECTOR: entry point uses capabilities not in specified profile +// POINTER_F16X2_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' + outputBuffer[0] = float(originalValue.x + originalValue.y); #else half originalValue; tmpBuffer.InterlockedAddF16(0, 1.0h, originalValue); From 72c9e55614adfba7019f40f27e143333463dd8b9 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 17:43:04 -0700 Subject: [PATCH 31/80] Clarify fp16 scalar atomic target switch comment --- source/slang/hlsl.meta.slang | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 511cf6275ed..1ef18c2b12a 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -6544,8 +6544,8 @@ $} } return; case spvAtomicFloat16AddEXT: - // Keep the explicit scalar capability case so this arm wins over the - // SPIR-V default when scalar fp16 atomic add is available. + // This shared body is the scalar fp16 atomic-add path; the explicit + // label records the SPIR-V capability that enables it. default: { let buf = __getEquivalentStructuredBuffer(this); From 902c91093bd5f1ca713c9a7d461a2ccc64c23ab8 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 18:12:01 -0700 Subject: [PATCH 32/80] Warn on late fp16 vector atomic capability --- source/slang/hlsl.meta.slang | 7 ++- source/slang/slang-emit-spirv.cpp | 53 ++++++++++++++----- ...der-atomic-fp16-vector-compatibility.slang | 8 +++ 3 files changed, 52 insertions(+), 16 deletions(-) diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 1ef18c2b12a..2a6ce3bfcea 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -6544,8 +6544,11 @@ $} } return; case spvAtomicFloat16AddEXT: - // This shared body is the scalar fp16 atomic-add path; the explicit - // label records the SPIR-V capability that enables it. + { + let buf = __getEquivalentStructuredBuffer(this); + originalValue = __atomic_add(buf[byteAddress/2], value); + return; + } default: { let buf = __getEquivalentStructuredBuffer(this); diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index b51ec34069b..e70e688a08c 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -4412,12 +4412,29 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex } } - Dictionary> m_diagnosedRestrictiveCaps; - void maybeDiagnoseRestrictiveCapabilityUse(IRInst* inst, CapabilityName capabilityName) + Dictionary> m_diagnosedCapabilityUses; + void maybeDiagnoseCapabilityUse(IRInst* inst, CapabilityName capabilityName) { auto& optionSet = m_targetProgram->getOptionSet(); - if (!optionSet.getBoolOption(CompilerOptionName::RestrictiveCapabilityCheck) || - optionSet.getBoolOption(CompilerOptionName::IgnoreCapabilities)) + if (optionSet.getBoolOption(CompilerOptionName::IgnoreCapabilities)) + return; + + bool specificProfileRequested = optionSet.hasOption(CompilerOptionName::Profile) && + (optionSet.getIntOption(CompilerOptionName::Profile) != + SLANG_PROFILE_UNKNOWN); + bool specificCapabilityRequested = false; + for (auto atomVal : optionSet.getArray(CompilerOptionName::Capability)) + { + if ((atomVal.kind == CompilerOptionValueKind::Int && + atomVal.intValue != SLANG_CAPABILITY_UNKNOWN) || + atomVal.kind == CompilerOptionValueKind::String) + { + specificCapabilityRequested = true; + break; + } + } + if (!optionSet.getBoolOption(CompilerOptionName::RestrictiveCapabilityCheck) && + !specificProfileRequested && !specificCapabilityRequested) return; auto parentFunc = getParentFunc(inst); @@ -4458,18 +4475,28 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex StringBuilder capsSb; printDiagnosticArg(capsSb, addedAtoms); String missingCapsStr = capsSb.toString(); - if (!m_diagnosedRestrictiveCaps[entryPoint].add(missingCapsStr)) + if (!m_diagnosedCapabilityUses[entryPoint].add(missingCapsStr)) continue; StringBuilder entryPointSb; printDiagnosticArg(entryPointSb, entryPoint); - m_sink->diagnose(Diagnostics::ProfileImplicitlyUpgradedRestrictive{ - .entryPoint = entryPointSb.toString(), - .profile = optionSet.getProfile().getName(), - .capabilities = missingCapsStr, - .location = entryPoint->sourceLoc, - }); + maybeDiagnoseWarningOrError( + m_sink, + optionSet, + DiagnosticCategory::Capability, + Diagnostics::ProfileImplicitlyUpgraded{ + .entryPoint = entryPointSb.toString(), + .profile = optionSet.getProfile().getName(), + .capabilities = missingCapsStr, + .location = entryPoint->sourceLoc, + }, + Diagnostics::ProfileImplicitlyUpgradedRestrictive{ + .entryPoint = entryPointSb.toString(), + .profile = optionSet.getProfile().getName(), + .capabilities = missingCapsStr, + .location = entryPoint->sourceLoc, + }); } } @@ -4516,9 +4543,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex return true; } - maybeDiagnoseRestrictiveCapabilityUse( - atomicInst, - CapabilityName::spvAtomicFloat16VectorNV); + maybeDiagnoseCapabilityUse(atomicInst, CapabilityName::spvAtomicFloat16VectorNV); ensureExtensionDeclaration(toSlice("SPV_NV_shader_atomic_fp16_vector")); requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV); return true; diff --git a/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang index 706674aadf8..f8c4f45302a 100644 --- a/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang +++ b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang @@ -1,6 +1,8 @@ //TEST:SIMPLE(filecheck=NEGATIVE): -target spirv -entry computeMain -stage compute -emit-spirv-directly //TEST:SIMPLE(filecheck=POSITIVE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_ATOMIC //TEST:SIMPLE(filecheck=POSITIVE_MIN_MAX): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_MIN_MAX +//DIAGNOSTIC_TEST:SIMPLE(diag=POSITIVE_WARN,non-exhaustive): -target spirv -profile spirv_1_5 -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_ATOMIC +//TEST:SIMPLE(filecheck=POSITIVE_IGNORE_CAPS): -target spirv -profile spirv_1_5 -entry computeMain -stage compute -emit-spirv-directly -ignore-capabilities -DPOSITIVE_VECTOR_ATOMIC // Requiring the GLSL extension should preserve the old SPIR-V 1.0 compatibility path and // should not declare the NV vector capability until a vector atomic operation needs it. @@ -12,6 +14,8 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { #ifdef POSITIVE_VECTOR_ATOMIC __atomic_add(vectorBuffer[0], half2(1.0h, 2.0h)); +// POSITIVE_WARN: profile implicitly upgraded +// POSITIVE_WARN: spvAtomicFloat16VectorNV #elif defined(POSITIVE_VECTOR_MIN_MAX) __atomic_min(vectorBuffer[0], half2(1.0h, 2.0h)); __atomic_max(vectorBuffer[1], half2(3.0h, 4.0h)); @@ -27,6 +31,10 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // POSITIVE: OpCapability AtomicFloat16VectorNV // POSITIVE: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// POSITIVE_IGNORE_CAPS-NOT: warning[E41012] +// POSITIVE_IGNORE_CAPS: OpCapability AtomicFloat16VectorNV +// POSITIVE_IGNORE_CAPS: OpExtension "SPV_NV_shader_atomic_fp16_vector" + // POSITIVE_MIN_MAX: OpCapability AtomicFloat16VectorNV // POSITIVE_MIN_MAX: OpExtension "SPV_NV_shader_atomic_fp16_vector" // POSITIVE_MIN_MAX: OpAtomicFMinEXT %v2half From 2f4ad196ddf5b918a526b7f024780cb0858dacfa Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 18:14:37 -0700 Subject: [PATCH 33/80] Format fp16 vector capability diagnostic guard --- source/slang/slang-emit-spirv.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index e70e688a08c..b0ee9e0e18c 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -4419,9 +4419,9 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex if (optionSet.getBoolOption(CompilerOptionName::IgnoreCapabilities)) return; - bool specificProfileRequested = optionSet.hasOption(CompilerOptionName::Profile) && - (optionSet.getIntOption(CompilerOptionName::Profile) != - SLANG_PROFILE_UNKNOWN); + bool specificProfileRequested = + optionSet.hasOption(CompilerOptionName::Profile) && + (optionSet.getIntOption(CompilerOptionName::Profile) != SLANG_PROFILE_UNKNOWN); bool specificCapabilityRequested = false; for (auto atomVal : optionSet.getArray(CompilerOptionName::Capability)) { From cc0abda659b83f20d662e641801b780123dd1c41 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 18:21:22 -0700 Subject: [PATCH 34/80] Assert fp16 vector compatibility add opcode --- tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang index f8c4f45302a..837678c069a 100644 --- a/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang +++ b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang @@ -30,10 +30,12 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // POSITIVE: OpCapability AtomicFloat16VectorNV // POSITIVE: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// POSITIVE: OpAtomicFAddEXT %v2half // POSITIVE_IGNORE_CAPS-NOT: warning[E41012] // POSITIVE_IGNORE_CAPS: OpCapability AtomicFloat16VectorNV // POSITIVE_IGNORE_CAPS: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// POSITIVE_IGNORE_CAPS: OpAtomicFAddEXT %v2half // POSITIVE_MIN_MAX: OpCapability AtomicFloat16VectorNV // POSITIVE_MIN_MAX: OpExtension "SPV_NV_shader_atomic_fp16_vector" From 956848e214640e477cc3f13832bb38a9efae4fe5 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Thu, 28 May 2026 18:46:15 -0700 Subject: [PATCH 35/80] Cover half4 fp16 vector atomics --- tests/spirv/atomic-float16-vector.slang | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/spirv/atomic-float16-vector.slang b/tests/spirv/atomic-float16-vector.slang index e305549b1f1..748009ef78c 100644 --- a/tests/spirv/atomic-float16-vector.slang +++ b/tests/spirv/atomic-float16-vector.slang @@ -15,6 +15,9 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) half2 originalMax; half2 originalExchange; half4 originalAdd4; + half4 originalMin4; + half4 originalMax4; + half4 originalExchange4; // Direct atomic operation on half2 should trigger the SPV_NV_shader_atomic_fp16_vector extension originalAdd = __atomic_add(workBuffer[0], half2(1.0h, 2.0h)); @@ -22,11 +25,14 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) originalMax = __atomic_max(workBuffer[2], half2(1.0h, 2.0h)); originalExchange = __atomic_exchange(workBuffer[3], half2(3.0h, 4.0h)); originalAdd4 = __atomic_add(workBuffer4[0], half4(1.0h, 2.0h, 3.0h, 4.0h)); + originalMin4 = __atomic_min(workBuffer4[1], half4(1.0h, 2.0h, 3.0h, 4.0h)); + originalMax4 = __atomic_max(workBuffer4[2], half4(1.0h, 2.0h, 3.0h, 4.0h)); + originalExchange4 = __atomic_exchange(workBuffer4[3], half4(5.0h, 6.0h, 7.0h, 8.0h)); outputBuffer[0] = float(originalAdd.x + originalMin.x + originalMax.x + originalExchange.x); outputBuffer[1] = float(originalAdd.y + originalMin.y + originalMax.y + originalExchange.y); - outputBuffer[2] = float(originalAdd4.x + originalAdd4.y); - outputBuffer[3] = float(originalAdd4.z + originalAdd4.w); + outputBuffer[2] = float(originalAdd4.x + originalMin4.x + originalMax4.x + originalExchange4.x); + outputBuffer[3] = float(originalAdd4.w + originalMin4.w + originalMax4.w + originalExchange4.w); } // CHECK-NOT: OpCapability AtomicFloat16AddEXT @@ -39,3 +45,6 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // CHECK-COUNT-1: OpAtomicFMaxEXT %v2half // CHECK-COUNT-1: OpAtomicExchange %v2half // CHECK-COUNT-1: OpAtomicFAddEXT %v4half +// CHECK-COUNT-1: OpAtomicFMinEXT %v4half +// CHECK-COUNT-1: OpAtomicFMaxEXT %v4half +// CHECK-COUNT-1: OpAtomicExchange %v4half From 0032f5abc387708cecffa64ae4b1019ce6aba6ae Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Fri, 29 May 2026 09:33:01 -0700 Subject: [PATCH 36/80] Address fp16 atomic intrinsic review --- source/slang/hlsl.meta.slang | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 2a6ce3bfcea..0b3e362ac20 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -6543,12 +6543,6 @@ $} originalValue = asfloat16((uint16_t)(_NvInterlockedAddFp16x2(byteAddress, packedInput) >> 16)); } return; - case spvAtomicFloat16AddEXT: - { - let buf = __getEquivalentStructuredBuffer(this); - originalValue = __atomic_add(buf[byteAddress/2], value); - return; - } default: { let buf = __getEquivalentStructuredBuffer(this); @@ -33755,7 +33749,7 @@ uint packHalf2x16(half2 unpackedValue) return packHalf2x16(float2(unpackedValue)); } -[require(spirv, spvAtomicFloat16VectorNV)] +[require(spirv)] void InterlockedAddF16Emulated(half* dest, half value, out half originalValue) { let buf = (half2*)(dest); From 4ded6d0ff7cb38be8b81a92b6ca562e8c66324ee Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Fri, 29 May 2026 18:42:30 -0700 Subject: [PATCH 37/80] Keep pointer fp16x2 helper broadly available --- source/slang/hlsl.meta.slang | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 0b3e362ac20..acd2a77528b 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -33764,7 +33764,7 @@ void InterlockedAddF16Emulated(half* dest, half value, out half originalValue) } } -[require(spirv, spvAtomicFloat16VectorNV)] +[require(spirv)] void InterlockedAddF16x2(half2* dest, half2 value, out half2 originalValue) { originalValue = __atomic_add(*dest, value); From 522e860895ec4327f8eb3038e3db80c75db38900 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Fri, 29 May 2026 19:04:51 -0700 Subject: [PATCH 38/80] Add scalar exchange capability regression --- .../byte-address-half-atomics-capability.slang | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 902b34504b6..f45342dd586 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -13,6 +13,7 @@ //TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_ATOMIC //TEST:SIMPLE(filecheck=VECTOR_MIN_MAX): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_MIN_MAX //TEST:SIMPLE(filecheck=VECTOR_EXCHANGE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_EXCHANGE +//TEST:SIMPLE(filecheck=SCALAR_EXCHANGE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DTEST_DIRECT_SCALAR_EXCHANGE //TEST:SIMPLE(filecheck=EMULATED): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_EMULATED //TEST:SIMPLE(filecheck=CUDA): -target cuda -entry computeMain -stage compute -capability cuda_sm_7_0 @@ -21,6 +22,7 @@ RWStructuredBuffer outputBuffer; RWStructuredBuffer halfBuffer; RWStructuredBuffer vectorBuffer; RWStructuredBuffer unsupportedVectorBuffer; +RWStructuredBuffer uintBuffer; [numthreads(1, 1, 1)] void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) @@ -39,6 +41,9 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // DIRECT_EXCHANGE_NO_VECTOR: entry point uses capabilities not in specified profile // DIRECT_EXCHANGE_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' outputBuffer[0] = float(originalValue.x + originalValue.y); +#elif defined(TEST_DIRECT_SCALAR_EXCHANGE) + uint originalValue = __atomic_exchange(uintBuffer[0], 1u); + outputBuffer[0] = float(originalValue); #elif defined(TEST_UNSUPPORTED_VECTOR_WIDTH) __atomic_add(unsupportedVectorBuffer[0], half3(1.0h, 2.0h, 3.0h)); // UNSUPPORTED_VECTOR_WIDTH: SPIR-V fp16 vector atomics only support half2 and half4. @@ -128,6 +133,12 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // VECTOR_EXCHANGE-NOT: OpCapability AtomicFloat16AddEXT // VECTOR_EXCHANGE-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// SCALAR_EXCHANGE-NOT: OpCapability AtomicFloat16VectorNV +// SCALAR_EXCHANGE-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// SCALAR_EXCHANGE: OpAtomicExchange %uint +// SCALAR_EXCHANGE-NOT: OpCapability AtomicFloat16VectorNV +// SCALAR_EXCHANGE-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" + // EMULATED-NOT: OpCapability AtomicFloat16AddEXT // EMULATED-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" // EMULATED: OpCapability AtomicFloat16VectorNV From fca599e0b938098f3b4e4f480e1ceb712dc29ea2 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Mon, 1 Jun 2026 18:27:34 -0700 Subject: [PATCH 39/80] Address fp16 vector capability review --- docs/user-guide/a3-02-reference-capability-atoms.md | 3 --- source/slang/hlsl.meta.slang | 3 +-- source/slang/slang-capabilities.capdef | 5 +---- .../byte-address-half-atomics-capability.slang | 5 +---- .../gl-nv-shader-atomic-fp16-vector-compatibility.slang | 4 ++-- 5 files changed, 5 insertions(+), 15 deletions(-) diff --git a/docs/user-guide/a3-02-reference-capability-atoms.md b/docs/user-guide/a3-02-reference-capability-atoms.md index 8b08b38cb01..8cb7d0bd8d9 100644 --- a/docs/user-guide/a3-02-reference-capability-atoms.md +++ b/docs/user-guide/a3-02-reference-capability-atoms.md @@ -576,9 +576,6 @@ Extensions `GL_NV_shader_atomic_fp16_vector` > Represents the GL_NV_shader_atomic_fp16_vector extension. -> The SPIR-V 1.0 fallback preserves compatibility for shaders that only require the -> GLSL extension; emitted fp16 vector atomic operations still require -> spvAtomicFloat16VectorNV. `GL_NV_shader_invocation_reorder` > Represents the GL_NV_shader_invocation_reorder extension (NVIDIA-specific). diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index acd2a77528b..045ce719b20 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -6565,8 +6565,7 @@ $} /// maps to `atomicAdd`. [__requiresNVAPI] [ForceInline] - [require(cuda_hlsl, sm_5_0)] - [require(spirv, spvAtomicFloat16VectorNV)] + [require(cuda_hlsl_spirv, sm_5_0)] void InterlockedAddF16Emulated(uint byteAddress, half value, out half originalValue) { __target_switch diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index 31bac24974d..8a85010171e 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -1268,11 +1268,8 @@ alias GL_NV_ray_tracing = GL_EXT_ray_tracing; alias GL_NV_ray_tracing_motion_blur = _GL_NV_ray_tracing_motion_blur | spvRayTracingMotionBlurNV; /// Represents the GL_NV_shader_atomic_fp16_vector extension. -/// The SPIR-V 1.0 fallback preserves compatibility for shaders that only require the -/// GLSL extension; emitted fp16 vector atomic operations still require -/// spvAtomicFloat16VectorNV. /// [EXT] -alias GL_NV_shader_atomic_fp16_vector = _GL_NV_shader_atomic_fp16_vector + _GL_NV_gpu_shader5 | spvAtomicFloat16VectorNV | _spirv_1_0; +alias GL_NV_shader_atomic_fp16_vector = _GL_NV_shader_atomic_fp16_vector + _GL_NV_gpu_shader5 | spvAtomicFloat16VectorNV; /// Represents the GL_NV_shader_invocation_reorder extension (NVIDIA-specific). /// [EXT] diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index f45342dd586..da35b2951e9 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -5,7 +5,6 @@ //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_MIN_MAX_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_MIN_MAX //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_EXCHANGE_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_EXCHANGE //DIAGNOSTIC_TEST:SIMPLE(diag=UNSUPPORTED_VECTOR_WIDTH,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_UNSUPPORTED_VECTOR_WIDTH -//DIAGNOSTIC_TEST:SIMPLE(diag=EMULATED_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_EMULATED //DIAGNOSTIC_TEST:SIMPLE(diag=POINTER_EMULATED_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_POINTER_EMULATED //DIAGNOSTIC_TEST:SIMPLE(diag=POINTER_F16X2_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_POINTER_F16X2 //TEST:SIMPLE(filecheck=IGNORE_CAPS): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -ignore-capabilities -capability spirv_1_5 @@ -14,7 +13,7 @@ //TEST:SIMPLE(filecheck=VECTOR_MIN_MAX): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_MIN_MAX //TEST:SIMPLE(filecheck=VECTOR_EXCHANGE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_EXCHANGE //TEST:SIMPLE(filecheck=SCALAR_EXCHANGE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DTEST_DIRECT_SCALAR_EXCHANGE -//TEST:SIMPLE(filecheck=EMULATED): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_EMULATED +//TEST:SIMPLE(filecheck=EMULATED): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DTEST_EMULATED //TEST:SIMPLE(filecheck=CUDA): -target cuda -entry computeMain -stage compute -capability cuda_sm_7_0 RWByteAddressBuffer tmpBuffer; @@ -50,8 +49,6 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) #elif defined(TEST_EMULATED) half originalValue; tmpBuffer.InterlockedAddF16Emulated(0, 1.0h, originalValue); -// EMULATED_NO_VECTOR: entry point uses capabilities not in specified profile -// EMULATED_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' outputBuffer[0] = float(originalValue); tmpBuffer.InterlockedAddF16Emulated(2, 1.0h, originalValue); outputBuffer[1] = float(originalValue); diff --git a/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang index 837678c069a..77d3f7b442b 100644 --- a/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang +++ b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang @@ -4,8 +4,8 @@ //DIAGNOSTIC_TEST:SIMPLE(diag=POSITIVE_WARN,non-exhaustive): -target spirv -profile spirv_1_5 -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_ATOMIC //TEST:SIMPLE(filecheck=POSITIVE_IGNORE_CAPS): -target spirv -profile spirv_1_5 -entry computeMain -stage compute -emit-spirv-directly -ignore-capabilities -DPOSITIVE_VECTOR_ATOMIC -// Requiring the GLSL extension should preserve the old SPIR-V 1.0 compatibility path and -// should not declare the NV vector capability until a vector atomic operation needs it. +// Requiring the GLSL extension should not declare the NV vector capability until a vector +// atomic operation needs it. RWStructuredBuffer vectorBuffer; [require(GL_NV_shader_atomic_fp16_vector)] From 9ec066fd28c3c35bcd3c4a4a932dbb8e51952672 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Mon, 1 Jun 2026 19:42:12 -0700 Subject: [PATCH 40/80] Address fp16 vector review gaps --- docs/user-guide/a2-01-spirv-target-specific.md | 11 ++++++----- source/slang/slang-diagnostics.lua | 14 ++++++++++++++ source/slang/slang-emit-spirv.cpp | 12 ++++-------- .../byte-address-half-atomics-capability.slang | 1 + 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/docs/user-guide/a2-01-spirv-target-specific.md b/docs/user-guide/a2-01-spirv-target-specific.md index 9193e9fc6ee..dcb1413d55d 100644 --- a/docs/user-guide/a2-01-spirv-target-specific.md +++ b/docs/user-guide/a2-01-spirv-target-specific.md @@ -171,12 +171,13 @@ GLSL 4.6 with [GLSL_EXT_shader_atomic_float2](https://github.com/KhronosGroup/GL SPIR-V 1.5 with [SPV_EXT_shader_atomic_float_add](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/EXT/SPV_EXT_shader_atomic_float_add.asciidoc) and [SPV_EXT_shader_atomic_float_min_max](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/EXT/SPV_EXT_shader_atomic_float_min_max.asciidoc) can use atomic operations for 32-bit float type and 64-bit float type. SPIR-V 1.5 with [SPV_EXT_shader_atomic_float16_add](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/EXT/SPV_EXT_shader_atomic_float16_add.asciidoc) can use atomic operations for 16-bit float type +[SPV_NV_shader_atomic_fp16_vector](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/NV/SPV_NV_shader_atomic_fp16_vector.asciidoc) can use vector atomic add/min/max/exchange operations for 16-bit float vector types with 2 or 4 components. -| | 32-bit integer | 64-bit integer | 32-bit float | 64-bit float | 16-bit float | -| ------ | -------------- | --------------- | --------------------- | ---------------- | ---------------- | -| HLSL | Yes (SM5.0) | Yes (SM6.6) | Only bit-wise (SM6.6) | No | No | -| GLSL | Yes (GL4.3) | Yes (GL4.4+ext) | Yes (GL4.6+ext) | Yes (GL4.6+ext) | Yes (GL4.6+ext) | -| SPIR-V | Yes | Yes | Yes (SPV1.5+ext) | Yes (SPV1.5+ext) | Yes (SPV1.5+ext) | +| | 32-bit integer | 64-bit integer | 32-bit float | 64-bit float | 16-bit float | 16-bit float vector | +| ------ | -------------- | --------------- | --------------------- | ---------------- | ---------------- | ----------------------- | +| HLSL | Yes (SM5.0) | Yes (SM6.6) | Only bit-wise (SM6.6) | No | No | No | +| GLSL | Yes (GL4.3) | Yes (GL4.4+ext) | Yes (GL4.6+ext) | Yes (GL4.6+ext) | Yes (GL4.6+ext) | Yes (NV ext) | +| SPIR-V | Yes | Yes | Yes (SPV1.5+ext) | Yes (SPV1.5+ext) | Yes (SPV1.5+ext) | Yes (SPV_NV fp16 vector) | ## ConstantBuffer, StructuredBuffer and ByteAddressBuffer diff --git a/source/slang/slang-diagnostics.lua b/source/slang/slang-diagnostics.lua index 9e9e7a34dfd..d134ff2571f 100644 --- a/source/slang/slang-diagnostics.lua +++ b/source/slang/slang-diagnostics.lua @@ -4808,6 +4808,20 @@ warning( span { loc = "location", message = "Slang's SPIR-V backend only supports SPIR-V version 1.3 and later. Use `-emit-spirv-via-glsl` option to produce SPIR-V 1.0 through 1.2." } ) +err( + "spirv-fp16-vector-atomic-non-constant-size", + 50012, + "invalid SPIR-V fp16 vector atomic type", + span { loc = "location", message = "SPIR-V fp16 vector atomics require a constant half2 or half4 type." } +) + +err( + "spirv-fp16-vector-atomic-unsupported-width", + 50013, + "invalid SPIR-V fp16 vector atomic width", + span { loc = "location", message = "SPIR-V fp16 vector atomics only support half2 and half4." } +) + err( "invalid-mesh-stage-output-topology", 50060, diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index b0ee9e0e18c..54ac7722c85 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -4526,20 +4526,16 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex auto elementCountInst = as(vectorType->getElementCount()); if (!elementCountInst) { - SLANG_DIAGNOSE_UNEXPECTED( - m_sink, - atomicInst, - "SPIR-V fp16 vector atomics require a constant half2 or half4 type."); + m_sink->diagnose(Diagnostics::SpirvFp16VectorAtomicNonConstantSize{ + .location = atomicInst->sourceLoc}); return true; } auto elementCount = elementCountInst->getValue(); if (elementCount != 2 && elementCount != 4) { - SLANG_DIAGNOSE_UNEXPECTED( - m_sink, - atomicInst, - "SPIR-V fp16 vector atomics only support half2 and half4."); + m_sink->diagnose(Diagnostics::SpirvFp16VectorAtomicUnsupportedWidth{ + .location = atomicInst->sourceLoc}); return true; } diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index da35b2951e9..df0e8b8b907 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -45,6 +45,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) outputBuffer[0] = float(originalValue); #elif defined(TEST_UNSUPPORTED_VECTOR_WIDTH) __atomic_add(unsupportedVectorBuffer[0], half3(1.0h, 2.0h, 3.0h)); +// UNSUPPORTED_VECTOR_WIDTH: error[E50013]: invalid SPIR-V fp16 vector atomic width // UNSUPPORTED_VECTOR_WIDTH: SPIR-V fp16 vector atomics only support half2 and half4. #elif defined(TEST_EMULATED) half originalValue; From 56d8ab6c87ca6d5d96029166050a57c6e99db8a0 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Mon, 1 Jun 2026 19:51:23 -0700 Subject: [PATCH 41/80] Polish fp16 vector atomic docs --- docs/user-guide/a2-01-spirv-target-specific.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/user-guide/a2-01-spirv-target-specific.md b/docs/user-guide/a2-01-spirv-target-specific.md index dcb1413d55d..d92a881ffad 100644 --- a/docs/user-guide/a2-01-spirv-target-specific.md +++ b/docs/user-guide/a2-01-spirv-target-specific.md @@ -170,14 +170,14 @@ GLSL 4.6 with [GLSL_EXT_shader_atomic_float](https://github.com/KhronosGroup/GLS GLSL 4.6 with [GLSL_EXT_shader_atomic_float2](https://github.com/KhronosGroup/GLSL/blob/main/extensions/ext/GLSL_EXT_shader_atomic_float2.txt) can use atomic operations for 16-bit float type. SPIR-V 1.5 with [SPV_EXT_shader_atomic_float_add](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/EXT/SPV_EXT_shader_atomic_float_add.asciidoc) and [SPV_EXT_shader_atomic_float_min_max](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/EXT/SPV_EXT_shader_atomic_float_min_max.asciidoc) can use atomic operations for 32-bit float type and 64-bit float type. -SPIR-V 1.5 with [SPV_EXT_shader_atomic_float16_add](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/EXT/SPV_EXT_shader_atomic_float16_add.asciidoc) can use atomic operations for 16-bit float type -[SPV_NV_shader_atomic_fp16_vector](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/NV/SPV_NV_shader_atomic_fp16_vector.asciidoc) can use vector atomic add/min/max/exchange operations for 16-bit float vector types with 2 or 4 components. +SPIR-V 1.5 with [SPV_EXT_shader_atomic_float16_add](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/EXT/SPV_EXT_shader_atomic_float16_add.asciidoc) can use atomic operations for 16-bit float type. +SPIR-V 1.5 with [SPV_NV_shader_atomic_fp16_vector](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/NV/SPV_NV_shader_atomic_fp16_vector.asciidoc) can use vector atomic add/min/max/exchange operations for 16-bit float vector types with 2 or 4 components. | | 32-bit integer | 64-bit integer | 32-bit float | 64-bit float | 16-bit float | 16-bit float vector | | ------ | -------------- | --------------- | --------------------- | ---------------- | ---------------- | ----------------------- | | HLSL | Yes (SM5.0) | Yes (SM6.6) | Only bit-wise (SM6.6) | No | No | No | -| GLSL | Yes (GL4.3) | Yes (GL4.4+ext) | Yes (GL4.6+ext) | Yes (GL4.6+ext) | Yes (GL4.6+ext) | Yes (NV ext) | -| SPIR-V | Yes | Yes | Yes (SPV1.5+ext) | Yes (SPV1.5+ext) | Yes (SPV1.5+ext) | Yes (SPV_NV fp16 vector) | +| GLSL | Yes (GL4.3) | Yes (GL4.4+ext) | Yes (GL4.6+ext) | Yes (GL4.6+ext) | Yes (GL4.6+ext) | Yes (GL_NV ext) | +| SPIR-V | Yes | Yes | Yes (SPV1.5+ext) | Yes (SPV1.5+ext) | Yes (SPV1.5+ext) | Yes (SPV_NV ext) | ## ConstantBuffer, StructuredBuffer and ByteAddressBuffer From d5675e4771b472e7fbbc51dbf3d4aa989bcfc267 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Mon, 1 Jun 2026 20:26:11 -0700 Subject: [PATCH 42/80] Clarify fp16 vector atomic invariant --- source/slang/slang-diagnostics.lua | 7 ------- source/slang/slang-emit-spirv.cpp | 5 ++--- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/source/slang/slang-diagnostics.lua b/source/slang/slang-diagnostics.lua index d134ff2571f..2ea268f0b2b 100644 --- a/source/slang/slang-diagnostics.lua +++ b/source/slang/slang-diagnostics.lua @@ -4808,13 +4808,6 @@ warning( span { loc = "location", message = "Slang's SPIR-V backend only supports SPIR-V version 1.3 and later. Use `-emit-spirv-via-glsl` option to produce SPIR-V 1.0 through 1.2." } ) -err( - "spirv-fp16-vector-atomic-non-constant-size", - 50012, - "invalid SPIR-V fp16 vector atomic type", - span { loc = "location", message = "SPIR-V fp16 vector atomics require a constant half2 or half4 type." } -) - err( "spirv-fp16-vector-atomic-unsupported-width", 50013, diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 54ac7722c85..54fdb2aa286 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -4526,9 +4526,8 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex auto elementCountInst = as(vectorType->getElementCount()); if (!elementCountInst) { - m_sink->diagnose(Diagnostics::SpirvFp16VectorAtomicNonConstantSize{ - .location = atomicInst->sourceLoc}); - return true; + SLANG_UNEXPECTED( + "non-IntLit vector element count reached SPIR-V fp16 atomic emit"); } auto elementCount = elementCountInst->getValue(); From 6e80d7317a50c0676a83928791e2ed27415797bf Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Mon, 1 Jun 2026 20:28:16 -0700 Subject: [PATCH 43/80] Fix fp16 vector atomic formatting --- source/slang/slang-emit-spirv.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 54fdb2aa286..25eee51c4d2 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -4526,8 +4526,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex auto elementCountInst = as(vectorType->getElementCount()); if (!elementCountInst) { - SLANG_UNEXPECTED( - "non-IntLit vector element count reached SPIR-V fp16 atomic emit"); + SLANG_UNEXPECTED("non-IntLit vector element count reached SPIR-V fp16 atomic emit"); } auto elementCount = elementCountInst->getValue(); From 6589bb40fd10ca710702a3a134afd83d6d2ea85a Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Mon, 1 Jun 2026 20:53:18 -0700 Subject: [PATCH 44/80] Cover fp16 vector atomic diagnostics --- .../byte-address-half-atomics-capability.slang | 10 ++++++++++ ...gl-nv-shader-atomic-fp16-vector-compatibility.slang | 3 +++ 2 files changed, 13 insertions(+) diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index df0e8b8b907..1d4dd6da40a 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -13,6 +13,7 @@ //TEST:SIMPLE(filecheck=VECTOR_MIN_MAX): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_MIN_MAX //TEST:SIMPLE(filecheck=VECTOR_EXCHANGE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_EXCHANGE //TEST:SIMPLE(filecheck=SCALAR_EXCHANGE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DTEST_DIRECT_SCALAR_EXCHANGE +//TEST:SIMPLE(filecheck=SCALAR_HALF_EXCHANGE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DTEST_DIRECT_SCALAR_HALF_EXCHANGE //TEST:SIMPLE(filecheck=EMULATED): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DTEST_EMULATED //TEST:SIMPLE(filecheck=CUDA): -target cuda -entry computeMain -stage compute -capability cuda_sm_7_0 @@ -43,6 +44,9 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) #elif defined(TEST_DIRECT_SCALAR_EXCHANGE) uint originalValue = __atomic_exchange(uintBuffer[0], 1u); outputBuffer[0] = float(originalValue); +#elif defined(TEST_DIRECT_SCALAR_HALF_EXCHANGE) + half originalValue = __atomic_exchange(halfBuffer[0], 1.0h); + outputBuffer[0] = float(originalValue); #elif defined(TEST_UNSUPPORTED_VECTOR_WIDTH) __atomic_add(unsupportedVectorBuffer[0], half3(1.0h, 2.0h, 3.0h)); // UNSUPPORTED_VECTOR_WIDTH: error[E50013]: invalid SPIR-V fp16 vector atomic width @@ -137,6 +141,12 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // SCALAR_EXCHANGE-NOT: OpCapability AtomicFloat16VectorNV // SCALAR_EXCHANGE-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// SCALAR_HALF_EXCHANGE-NOT: OpCapability AtomicFloat16VectorNV +// SCALAR_HALF_EXCHANGE-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// SCALAR_HALF_EXCHANGE: OpAtomicExchange %half +// SCALAR_HALF_EXCHANGE-NOT: OpCapability AtomicFloat16VectorNV +// SCALAR_HALF_EXCHANGE-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" + // EMULATED-NOT: OpCapability AtomicFloat16AddEXT // EMULATED-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" // EMULATED: OpCapability AtomicFloat16VectorNV diff --git a/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang index 77d3f7b442b..84caaf7b208 100644 --- a/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang +++ b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang @@ -2,6 +2,7 @@ //TEST:SIMPLE(filecheck=POSITIVE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_ATOMIC //TEST:SIMPLE(filecheck=POSITIVE_MIN_MAX): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_MIN_MAX //DIAGNOSTIC_TEST:SIMPLE(diag=POSITIVE_WARN,non-exhaustive): -target spirv -profile spirv_1_5 -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_ATOMIC +//DIAGNOSTIC_TEST:SIMPLE(diag=POSITIVE_ERR,non-exhaustive): -target spirv -profile spirv_1_5 -restrictive-capability-check -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_ATOMIC //TEST:SIMPLE(filecheck=POSITIVE_IGNORE_CAPS): -target spirv -profile spirv_1_5 -entry computeMain -stage compute -emit-spirv-directly -ignore-capabilities -DPOSITIVE_VECTOR_ATOMIC // Requiring the GLSL extension should not declare the NV vector capability until a vector @@ -16,6 +17,8 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) __atomic_add(vectorBuffer[0], half2(1.0h, 2.0h)); // POSITIVE_WARN: profile implicitly upgraded // POSITIVE_WARN: spvAtomicFloat16VectorNV +// POSITIVE_ERR: error[E41013]: entry point uses capabilities not in specified profile +// POSITIVE_ERR: spvAtomicFloat16VectorNV #elif defined(POSITIVE_VECTOR_MIN_MAX) __atomic_min(vectorBuffer[0], half2(1.0h, 2.0h)); __atomic_max(vectorBuffer[1], half2(3.0h, 4.0h)); From 19f9be231766bd6927ab4b5bb2004f5aea9e22e8 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Tue, 2 Jun 2026 06:23:54 -0700 Subject: [PATCH 45/80] Clarify fp16 vector atomic diagnostic test --- .../gl-nv-shader-atomic-fp16-vector-compatibility.slang | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang index 84caaf7b208..df9a076197a 100644 --- a/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang +++ b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang @@ -1,12 +1,15 @@ //TEST:SIMPLE(filecheck=NEGATIVE): -target spirv -entry computeMain -stage compute -emit-spirv-directly //TEST:SIMPLE(filecheck=POSITIVE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_ATOMIC //TEST:SIMPLE(filecheck=POSITIVE_MIN_MAX): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_MIN_MAX +// DIAGNOSTIC_TEST is used here to check capability diagnostics, not just failures. +// The vector atomic is compiled under spirv_1_5, which lacks spvAtomicFloat16VectorNV. +// Without restrictive checking this produces a profile-upgrade warning; with +// -restrictive-capability-check, the same missing capability is an error. +// `non-exhaustive` keeps each check focused on that late capability diagnostic. //DIAGNOSTIC_TEST:SIMPLE(diag=POSITIVE_WARN,non-exhaustive): -target spirv -profile spirv_1_5 -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_ATOMIC //DIAGNOSTIC_TEST:SIMPLE(diag=POSITIVE_ERR,non-exhaustive): -target spirv -profile spirv_1_5 -restrictive-capability-check -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_ATOMIC //TEST:SIMPLE(filecheck=POSITIVE_IGNORE_CAPS): -target spirv -profile spirv_1_5 -entry computeMain -stage compute -emit-spirv-directly -ignore-capabilities -DPOSITIVE_VECTOR_ATOMIC -// Requiring the GLSL extension should not declare the NV vector capability until a vector -// atomic operation needs it. RWStructuredBuffer vectorBuffer; [require(GL_NV_shader_atomic_fp16_vector)] From 7934fa6d578be092ea1441c46d5d3a9b2f93c96a Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Tue, 2 Jun 2026 06:36:41 -0700 Subject: [PATCH 46/80] Clarify fp16 atomic test directives --- .../byte-address-half-atomics-capability.slang | 10 ++++++++++ ...gl-nv-shader-atomic-fp16-vector-compatibility.slang | 5 ----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 1d4dd6da40a..97fdd89710c 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -1,12 +1,22 @@ +// Scalar fp16 add checks. `NO_FP16_ATOMIC` has no fp16 atomic capability; `VECTOR_ONLY` +// has the vector capability but intentionally lacks the scalar spvAtomicFloat16AddEXT. //TEST:SIMPLE(filecheck=SCALAR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT //DIAGNOSTIC_TEST:SIMPLE(diag=NO_FP16_ATOMIC,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spirv_1_5 //DIAGNOSTIC_TEST:SIMPLE(diag=VECTOR_ONLY,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16VectorNV + +// Direct half-vector `__atomic_*` checks. `DIRECT` selects direct structured-buffer +// atomics; `NO_VECTOR` means only the scalar fp16 atomic capability is enabled. //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_ATOMIC //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_MIN_MAX_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_MIN_MAX //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_EXCHANGE_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_EXCHANGE //DIAGNOSTIC_TEST:SIMPLE(diag=UNSUPPORTED_VECTOR_WIDTH,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_UNSUPPORTED_VECTOR_WIDTH + +// Pointer-form fp16 vector checks. These paths are emitted as vector atomics, so +// they must diagnose missing spvAtomicFloat16VectorNV when only scalar fp16 add is enabled. //DIAGNOSTIC_TEST:SIMPLE(diag=POINTER_EMULATED_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_POINTER_EMULATED //DIAGNOSTIC_TEST:SIMPLE(diag=POINTER_F16X2_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_POINTER_F16X2 + +// Positive codegen checks for ignored capabilities, scalar/vector SPIR-V, and CUDA. //TEST:SIMPLE(filecheck=IGNORE_CAPS): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -ignore-capabilities -capability spirv_1_5 //TEST:SIMPLE(filecheck=BOTH): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT -capability spvAtomicFloat16VectorNV //TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_ATOMIC diff --git a/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang index df9a076197a..487ed0d2dd8 100644 --- a/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang +++ b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang @@ -1,11 +1,6 @@ //TEST:SIMPLE(filecheck=NEGATIVE): -target spirv -entry computeMain -stage compute -emit-spirv-directly //TEST:SIMPLE(filecheck=POSITIVE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_ATOMIC //TEST:SIMPLE(filecheck=POSITIVE_MIN_MAX): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_MIN_MAX -// DIAGNOSTIC_TEST is used here to check capability diagnostics, not just failures. -// The vector atomic is compiled under spirv_1_5, which lacks spvAtomicFloat16VectorNV. -// Without restrictive checking this produces a profile-upgrade warning; with -// -restrictive-capability-check, the same missing capability is an error. -// `non-exhaustive` keeps each check focused on that late capability diagnostic. //DIAGNOSTIC_TEST:SIMPLE(diag=POSITIVE_WARN,non-exhaustive): -target spirv -profile spirv_1_5 -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_ATOMIC //DIAGNOSTIC_TEST:SIMPLE(diag=POSITIVE_ERR,non-exhaustive): -target spirv -profile spirv_1_5 -restrictive-capability-check -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_ATOMIC //TEST:SIMPLE(filecheck=POSITIVE_IGNORE_CAPS): -target spirv -profile spirv_1_5 -entry computeMain -stage compute -emit-spirv-directly -ignore-capabilities -DPOSITIVE_VECTOR_ATOMIC From aa1a94885dd97332cf9eb152fc0f42313a5f3d04 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Tue, 2 Jun 2026 09:43:46 -0700 Subject: [PATCH 47/80] Add fp16 atomic runtime coverage --- ...byte-address-half-atomics-capability.slang | 74 ++++++++++++++++++- tests/spirv/atomic-float16-vector.slang | 30 +++++++- 2 files changed, 101 insertions(+), 3 deletions(-) diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 97fdd89710c..91ba580cac1 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -1,11 +1,13 @@ // Scalar fp16 add checks. `NO_FP16_ATOMIC` has no fp16 atomic capability; `VECTOR_ONLY` // has the vector capability but intentionally lacks the scalar spvAtomicFloat16AddEXT. +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME_SCALAR):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16AddEXT -output-using-type -xslang -DTEST_RUNTIME_SCALAR //TEST:SIMPLE(filecheck=SCALAR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT //DIAGNOSTIC_TEST:SIMPLE(diag=NO_FP16_ATOMIC,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spirv_1_5 //DIAGNOSTIC_TEST:SIMPLE(diag=VECTOR_ONLY,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16VectorNV // Direct half-vector `__atomic_*` checks. `DIRECT` selects direct structured-buffer // atomics; `NO_VECTOR` means only the scalar fp16 atomic capability is enabled. +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME_DIRECT):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16VectorNV -output-using-type -xslang -DTEST_RUNTIME_DIRECT //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_ATOMIC //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_MIN_MAX_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_MIN_MAX //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_EXCHANGE_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_EXCHANGE @@ -13,6 +15,8 @@ // Pointer-form fp16 vector checks. These paths are emitted as vector atomics, so // they must diagnose missing spvAtomicFloat16VectorNV when only scalar fp16 add is enabled. +// Runtime coverage for these pointer-form helpers would require SPIR-V variable pointers, +// which existing tests keep disabled on current GCP runners. //DIAGNOSTIC_TEST:SIMPLE(diag=POINTER_EMULATED_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_POINTER_EMULATED //DIAGNOSTIC_TEST:SIMPLE(diag=POINTER_F16X2_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_POINTER_F16X2 @@ -27,17 +31,85 @@ //TEST:SIMPLE(filecheck=EMULATED): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DTEST_EMULATED //TEST:SIMPLE(filecheck=CUDA): -target cuda -entry computeMain -stage compute -capability cuda_sm_7_0 +//TEST_INPUT:ubuffer(stride=4, count=8):name=tmpBuffer RWByteAddressBuffer tmpBuffer; +//TEST_INPUT:ubuffer(stride=4, count=32):out,name=outputBuffer RWStructuredBuffer outputBuffer; +//TEST_INPUT:ubuffer(stride=2, count=4):name=halfBuffer RWStructuredBuffer halfBuffer; +//TEST_INPUT:ubuffer(stride=4, count=4):name=vectorBuffer RWStructuredBuffer vectorBuffer; +#ifdef TEST_UNSUPPORTED_VECTOR_WIDTH RWStructuredBuffer unsupportedVectorBuffer; +#endif +//TEST_INPUT:ubuffer(stride=4, count=1):name=uintBuffer RWStructuredBuffer uintBuffer; [numthreads(1, 1, 1)] void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { -#ifdef TEST_DIRECT_VECTOR_ATOMIC +#ifdef TEST_RUNTIME_SCALAR + half originalLow; + half originalHigh; + tmpBuffer.Store(0, 2.0h); + tmpBuffer.Store(2, 4.0h); + tmpBuffer.InterlockedAddF16(0, 1.0h, originalLow); + tmpBuffer.InterlockedAddF16(2, 3.0h, originalHigh); + + outputBuffer[0] = float(originalLow); + outputBuffer[1] = float(originalHigh); + outputBuffer[2] = float(tmpBuffer.Load(0)); + outputBuffer[3] = float(tmpBuffer.Load(2)); + // RUNTIME_SCALAR: type: float + // RUNTIME_SCALAR-NEXT: 2.000000 + // RUNTIME_SCALAR-NEXT: 4.000000 + // RUNTIME_SCALAR-NEXT: 3.000000 + // RUNTIME_SCALAR-NEXT: 7.000000 +#elif defined(TEST_RUNTIME_DIRECT) + vectorBuffer[0] = half2(2.0h, 4.0h); + vectorBuffer[1] = half2(5.0h, 1.0h); + vectorBuffer[2] = half2(5.0h, 1.0h); + vectorBuffer[3] = half2(8.0h, 9.0h); + + half2 originalAdd = __atomic_add(vectorBuffer[0], half2(1.0h, 3.0h)); + half2 originalMin = __atomic_min(vectorBuffer[1], half2(3.0h, 2.0h)); + half2 originalMax = __atomic_max(vectorBuffer[2], half2(3.0h, 2.0h)); + half2 originalExchange = __atomic_exchange(vectorBuffer[3], half2(6.0h, 7.0h)); + + outputBuffer[0] = float(originalAdd.x); + outputBuffer[1] = float(originalAdd.y); + outputBuffer[2] = float(vectorBuffer[0].x); + outputBuffer[3] = float(vectorBuffer[0].y); + outputBuffer[4] = float(originalMin.x); + outputBuffer[5] = float(originalMin.y); + outputBuffer[6] = float(vectorBuffer[1].x); + outputBuffer[7] = float(vectorBuffer[1].y); + outputBuffer[8] = float(originalMax.x); + outputBuffer[9] = float(originalMax.y); + outputBuffer[10] = float(vectorBuffer[2].x); + outputBuffer[11] = float(vectorBuffer[2].y); + outputBuffer[12] = float(originalExchange.x); + outputBuffer[13] = float(originalExchange.y); + outputBuffer[14] = float(vectorBuffer[3].x); + outputBuffer[15] = float(vectorBuffer[3].y); + // RUNTIME_DIRECT: type: float + // RUNTIME_DIRECT-NEXT: 2.000000 + // RUNTIME_DIRECT-NEXT: 4.000000 + // RUNTIME_DIRECT-NEXT: 3.000000 + // RUNTIME_DIRECT-NEXT: 7.000000 + // RUNTIME_DIRECT-NEXT: 5.000000 + // RUNTIME_DIRECT-NEXT: 1.000000 + // RUNTIME_DIRECT-NEXT: 3.000000 + // RUNTIME_DIRECT-NEXT: 1.000000 + // RUNTIME_DIRECT-NEXT: 5.000000 + // RUNTIME_DIRECT-NEXT: 1.000000 + // RUNTIME_DIRECT-NEXT: 5.000000 + // RUNTIME_DIRECT-NEXT: 2.000000 + // RUNTIME_DIRECT-NEXT: 8.000000 + // RUNTIME_DIRECT-NEXT: 9.000000 + // RUNTIME_DIRECT-NEXT: 6.000000 + // RUNTIME_DIRECT-NEXT: 7.000000 +#elif defined(TEST_DIRECT_VECTOR_ATOMIC) __atomic_add(vectorBuffer[0], half2(1.0h, 2.0h)); // DIRECT_NO_VECTOR: entry point uses capabilities not in specified profile // DIRECT_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' diff --git a/tests/spirv/atomic-float16-vector.slang b/tests/spirv/atomic-float16-vector.slang index 748009ef78c..a80198c3f0a 100644 --- a/tests/spirv/atomic-float16-vector.slang +++ b/tests/spirv/atomic-float16-vector.slang @@ -1,15 +1,27 @@ //TEST:SIMPLE(filecheck=CHECK):-target spirv -entry computeMain -stage compute -emit-spirv-directly +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16VectorNV -output-using-type -//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer +//TEST_INPUT:ubuffer(stride=4, count=8):out,name=outputBuffer RWStructuredBuffer outputBuffer; -//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):name=workBuffer +//TEST_INPUT:ubuffer(stride=4, count=4):name=workBuffer RWStructuredBuffer workBuffer; +//TEST_INPUT:ubuffer(stride=8, count=4):name=workBuffer4 RWStructuredBuffer workBuffer4; [numthreads(1, 1, 1)] void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { + workBuffer[0] = half2(2.0h, 4.0h); + workBuffer[1] = half2(5.0h, 1.0h); + workBuffer[2] = half2(5.0h, 1.0h); + workBuffer[3] = half2(8.0h, 9.0h); + + workBuffer4[0] = half4(1.0h, 2.0h, 3.0h, 4.0h); + workBuffer4[1] = half4(5.0h, 1.0h, 7.0h, 2.0h); + workBuffer4[2] = half4(5.0h, 1.0h, 7.0h, 2.0h); + workBuffer4[3] = half4(8.0h, 9.0h, 10.0h, 11.0h); + half2 originalAdd; half2 originalMin; half2 originalMax; @@ -33,6 +45,20 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) outputBuffer[1] = float(originalAdd.y + originalMin.y + originalMax.y + originalExchange.y); outputBuffer[2] = float(originalAdd4.x + originalMin4.x + originalMax4.x + originalExchange4.x); outputBuffer[3] = float(originalAdd4.w + originalMin4.w + originalMax4.w + originalExchange4.w); + outputBuffer[4] = float(workBuffer[0].x + workBuffer[1].x + workBuffer[2].x + workBuffer[3].x); + outputBuffer[5] = float(workBuffer[0].y + workBuffer[1].y + workBuffer[2].y + workBuffer[3].y); + outputBuffer[6] = float(workBuffer4[0].x + workBuffer4[1].x + workBuffer4[2].x + workBuffer4[3].x); + outputBuffer[7] = float(workBuffer4[0].w + workBuffer4[1].w + workBuffer4[2].w + workBuffer4[3].w); + + // RUNTIME: type: float + // RUNTIME-NEXT: 20.000000 + // RUNTIME-NEXT: 15.000000 + // RUNTIME-NEXT: 19.000000 + // RUNTIME-NEXT: 19.000000 + // RUNTIME-NEXT: 12.000000 + // RUNTIME-NEXT: 13.000000 + // RUNTIME-NEXT: 13.000000 + // RUNTIME-NEXT: 22.000000 } // CHECK-NOT: OpCapability AtomicFloat16AddEXT From 9d4be3fcd8a35e255ec9571655f611984b09f609 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Tue, 2 Jun 2026 09:53:21 -0700 Subject: [PATCH 48/80] Cover all fp16 vector runtime lanes --- tests/spirv/atomic-float16-vector.slang | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/tests/spirv/atomic-float16-vector.slang b/tests/spirv/atomic-float16-vector.slang index a80198c3f0a..7840a34f99d 100644 --- a/tests/spirv/atomic-float16-vector.slang +++ b/tests/spirv/atomic-float16-vector.slang @@ -1,7 +1,7 @@ //TEST:SIMPLE(filecheck=CHECK):-target spirv -entry computeMain -stage compute -emit-spirv-directly //TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16VectorNV -output-using-type -//TEST_INPUT:ubuffer(stride=4, count=8):out,name=outputBuffer +//TEST_INPUT:ubuffer(stride=4, count=12):out,name=outputBuffer RWStructuredBuffer outputBuffer; //TEST_INPUT:ubuffer(stride=4, count=4):name=workBuffer @@ -41,23 +41,34 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) originalMax4 = __atomic_max(workBuffer4[2], half4(1.0h, 2.0h, 3.0h, 4.0h)); originalExchange4 = __atomic_exchange(workBuffer4[3], half4(5.0h, 6.0h, 7.0h, 8.0h)); + half4 original4Sum = originalAdd4 + originalMin4 + originalMax4 + originalExchange4; + half4 final4Sum = workBuffer4[0] + workBuffer4[1] + workBuffer4[2] + workBuffer4[3]; + outputBuffer[0] = float(originalAdd.x + originalMin.x + originalMax.x + originalExchange.x); outputBuffer[1] = float(originalAdd.y + originalMin.y + originalMax.y + originalExchange.y); - outputBuffer[2] = float(originalAdd4.x + originalMin4.x + originalMax4.x + originalExchange4.x); - outputBuffer[3] = float(originalAdd4.w + originalMin4.w + originalMax4.w + originalExchange4.w); - outputBuffer[4] = float(workBuffer[0].x + workBuffer[1].x + workBuffer[2].x + workBuffer[3].x); - outputBuffer[5] = float(workBuffer[0].y + workBuffer[1].y + workBuffer[2].y + workBuffer[3].y); - outputBuffer[6] = float(workBuffer4[0].x + workBuffer4[1].x + workBuffer4[2].x + workBuffer4[3].x); - outputBuffer[7] = float(workBuffer4[0].w + workBuffer4[1].w + workBuffer4[2].w + workBuffer4[3].w); + outputBuffer[2] = float(original4Sum.x); + outputBuffer[3] = float(original4Sum.y); + outputBuffer[4] = float(original4Sum.z); + outputBuffer[5] = float(original4Sum.w); + outputBuffer[6] = float(workBuffer[0].x + workBuffer[1].x + workBuffer[2].x + workBuffer[3].x); + outputBuffer[7] = float(workBuffer[0].y + workBuffer[1].y + workBuffer[2].y + workBuffer[3].y); + outputBuffer[8] = float(final4Sum.x); + outputBuffer[9] = float(final4Sum.y); + outputBuffer[10] = float(final4Sum.z); + outputBuffer[11] = float(final4Sum.w); // RUNTIME: type: float // RUNTIME-NEXT: 20.000000 // RUNTIME-NEXT: 15.000000 // RUNTIME-NEXT: 19.000000 + // RUNTIME-NEXT: 13.000000 + // RUNTIME-NEXT: 27.000000 // RUNTIME-NEXT: 19.000000 // RUNTIME-NEXT: 12.000000 // RUNTIME-NEXT: 13.000000 // RUNTIME-NEXT: 13.000000 + // RUNTIME-NEXT: 13.000000 + // RUNTIME-NEXT: 23.000000 // RUNTIME-NEXT: 22.000000 } From a16695a54b161c50911b7e54b5632341bf02e4b4 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Tue, 2 Jun 2026 10:24:41 -0700 Subject: [PATCH 49/80] Cover half4 vector atomic diagnostics --- .../byte-address-half-atomics-capability.slang | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 91ba580cac1..b81a1bf338f 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -9,6 +9,7 @@ // atomics; `NO_VECTOR` means only the scalar fp16 atomic capability is enabled. //TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME_DIRECT):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16VectorNV -output-using-type -xslang -DTEST_RUNTIME_DIRECT //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_ATOMIC +//DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_VECTOR4_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR4_ATOMIC //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_MIN_MAX_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_MIN_MAX //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_EXCHANGE_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_EXCHANGE //DIAGNOSTIC_TEST:SIMPLE(diag=UNSUPPORTED_VECTOR_WIDTH,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_UNSUPPORTED_VECTOR_WIDTH @@ -24,6 +25,7 @@ //TEST:SIMPLE(filecheck=IGNORE_CAPS): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -ignore-capabilities -capability spirv_1_5 //TEST:SIMPLE(filecheck=BOTH): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT -capability spvAtomicFloat16VectorNV //TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_ATOMIC +//TEST:SIMPLE(filecheck=VECTOR4): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR4_ATOMIC //TEST:SIMPLE(filecheck=VECTOR_MIN_MAX): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_MIN_MAX //TEST:SIMPLE(filecheck=VECTOR_EXCHANGE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_EXCHANGE //TEST:SIMPLE(filecheck=SCALAR_EXCHANGE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DTEST_DIRECT_SCALAR_EXCHANGE @@ -39,6 +41,8 @@ RWStructuredBuffer outputBuffer; RWStructuredBuffer halfBuffer; //TEST_INPUT:ubuffer(stride=4, count=4):name=vectorBuffer RWStructuredBuffer vectorBuffer; +//TEST_INPUT:ubuffer(stride=8, count=4):name=vector4Buffer +RWStructuredBuffer vector4Buffer; #ifdef TEST_UNSUPPORTED_VECTOR_WIDTH RWStructuredBuffer unsupportedVectorBuffer; #endif @@ -113,6 +117,10 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) __atomic_add(vectorBuffer[0], half2(1.0h, 2.0h)); // DIRECT_NO_VECTOR: entry point uses capabilities not in specified profile // DIRECT_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' +#elif defined(TEST_DIRECT_VECTOR4_ATOMIC) + __atomic_add(vector4Buffer[0], half4(1.0h, 2.0h, 3.0h, 4.0h)); +// DIRECT_VECTOR4_NO_VECTOR: entry point uses capabilities not in specified profile +// DIRECT_VECTOR4_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' #elif defined(TEST_DIRECT_VECTOR_MIN_MAX) __atomic_min(vectorBuffer[0], half2(1.0h, 2.0h)); // DIRECT_MIN_MAX_NO_VECTOR: entry point uses capabilities not in specified profile @@ -200,6 +208,14 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // VECTOR: OpExtension "SPV_NV_shader_atomic_fp16_vector" // VECTOR: OpAtomicFAddEXT %v2half +// VECTOR4-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR4-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// VECTOR4: OpCapability AtomicFloat16VectorNV +// VECTOR4: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// VECTOR4: OpAtomicFAddEXT %v4half +// VECTOR4-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR4-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" + // VECTOR_MIN_MAX-NOT: OpCapability AtomicFloat16AddEXT // VECTOR_MIN_MAX-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" // VECTOR_MIN_MAX: OpCapability AtomicFloat16VectorNV From 5ac48a086357492c6c5b630ea46d17ef292765b7 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Tue, 2 Jun 2026 10:33:51 -0700 Subject: [PATCH 50/80] Add half4 vector atomic runtime coverage --- ...byte-address-half-atomics-capability.slang | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index b81a1bf338f..74d7b387a01 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -8,6 +8,7 @@ // Direct half-vector `__atomic_*` checks. `DIRECT` selects direct structured-buffer // atomics; `NO_VECTOR` means only the scalar fp16 atomic capability is enabled. //TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME_DIRECT):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16VectorNV -output-using-type -xslang -DTEST_RUNTIME_DIRECT +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME_DIRECT4):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16VectorNV -output-using-type -xslang -DTEST_RUNTIME_DIRECT_VECTOR4 //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_ATOMIC //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_VECTOR4_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR4_ATOMIC //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_MIN_MAX_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_MIN_MAX @@ -113,6 +114,28 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // RUNTIME_DIRECT-NEXT: 9.000000 // RUNTIME_DIRECT-NEXT: 6.000000 // RUNTIME_DIRECT-NEXT: 7.000000 +#elif defined(TEST_RUNTIME_DIRECT_VECTOR4) + vector4Buffer[0] = half4(2.0h, 4.0h, 6.0h, 8.0h); + + half4 originalAdd = __atomic_add(vector4Buffer[0], half4(1.0h, 2.0h, 3.0h, 4.0h)); + + outputBuffer[0] = float(originalAdd.x); + outputBuffer[1] = float(originalAdd.y); + outputBuffer[2] = float(originalAdd.z); + outputBuffer[3] = float(originalAdd.w); + outputBuffer[4] = float(vector4Buffer[0].x); + outputBuffer[5] = float(vector4Buffer[0].y); + outputBuffer[6] = float(vector4Buffer[0].z); + outputBuffer[7] = float(vector4Buffer[0].w); + // RUNTIME_DIRECT4: type: float + // RUNTIME_DIRECT4-NEXT: 2.000000 + // RUNTIME_DIRECT4-NEXT: 4.000000 + // RUNTIME_DIRECT4-NEXT: 6.000000 + // RUNTIME_DIRECT4-NEXT: 8.000000 + // RUNTIME_DIRECT4-NEXT: 3.000000 + // RUNTIME_DIRECT4-NEXT: 6.000000 + // RUNTIME_DIRECT4-NEXT: 9.000000 + // RUNTIME_DIRECT4-NEXT: 12.000000 #elif defined(TEST_DIRECT_VECTOR_ATOMIC) __atomic_add(vectorBuffer[0], half2(1.0h, 2.0h)); // DIRECT_NO_VECTOR: entry point uses capabilities not in specified profile From bb824e1006bacfa17d29f4519e2b2f2a0520b704 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Tue, 2 Jun 2026 11:46:53 -0700 Subject: [PATCH 51/80] Gate fp16 atomic runtime tests --- .../byte-address-half-atomics-capability.slang | 8 +++++--- tests/spirv/atomic-float16-vector.slang | 4 +++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 74d7b387a01..5a7de86b567 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -1,14 +1,16 @@ // Scalar fp16 add checks. `NO_FP16_ATOMIC` has no fp16 atomic capability; `VECTOR_ONLY` // has the vector capability but intentionally lacks the scalar spvAtomicFloat16AddEXT. -//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME_SCALAR):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16AddEXT -output-using-type -xslang -DTEST_RUNTIME_SCALAR +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME_SCALAR):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16AddEXT -render-feature atomic-half -output-using-type -xslang -DTEST_RUNTIME_SCALAR //TEST:SIMPLE(filecheck=SCALAR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT //DIAGNOSTIC_TEST:SIMPLE(diag=NO_FP16_ATOMIC,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spirv_1_5 //DIAGNOSTIC_TEST:SIMPLE(diag=VECTOR_ONLY,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16VectorNV // Direct half-vector `__atomic_*` checks. `DIRECT` selects direct structured-buffer // atomics; `NO_VECTOR` means only the scalar fp16 atomic capability is enabled. -//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME_DIRECT):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16VectorNV -output-using-type -xslang -DTEST_RUNTIME_DIRECT -//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME_DIRECT4):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16VectorNV -output-using-type -xslang -DTEST_RUNTIME_DIRECT_VECTOR4 +// RHI does not yet expose a feature gate for VK_NV_shader_atomic_float16_vector, so these +// runtime checks stay disabled while the active tests below cover SPIR-V codegen/diagnostics. +//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME_DIRECT):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16VectorNV -output-using-type -xslang -DTEST_RUNTIME_DIRECT +//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME_DIRECT4):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16VectorNV -output-using-type -xslang -DTEST_RUNTIME_DIRECT_VECTOR4 //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_ATOMIC //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_VECTOR4_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR4_ATOMIC //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_MIN_MAX_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_MIN_MAX diff --git a/tests/spirv/atomic-float16-vector.slang b/tests/spirv/atomic-float16-vector.slang index 7840a34f99d..f73610f0234 100644 --- a/tests/spirv/atomic-float16-vector.slang +++ b/tests/spirv/atomic-float16-vector.slang @@ -1,5 +1,7 @@ //TEST:SIMPLE(filecheck=CHECK):-target spirv -entry computeMain -stage compute -emit-spirv-directly -//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16VectorNV -output-using-type +// RHI does not yet expose a feature gate for VK_NV_shader_atomic_float16_vector, so this runtime +// check stays disabled while the active SIMPLE test verifies SPIR-V capability and instruction use. +//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16VectorNV -output-using-type //TEST_INPUT:ubuffer(stride=4, count=12):out,name=outputBuffer RWStructuredBuffer outputBuffer; From b5345fcd12b04a51b271db6b5c64c31b63d13cc5 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Tue, 2 Jun 2026 12:25:52 -0700 Subject: [PATCH 52/80] Diagnose unsupported fp16 vector atomics --- source/slang/slang-diagnostics.lua | 7 +++++ source/slang/slang-emit-spirv.cpp | 31 +++++++++++++++++-- ...byte-address-half-atomics-capability.slang | 9 ++++++ 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/source/slang/slang-diagnostics.lua b/source/slang/slang-diagnostics.lua index 2ea268f0b2b..09f26a75eaf 100644 --- a/source/slang/slang-diagnostics.lua +++ b/source/slang/slang-diagnostics.lua @@ -4815,6 +4815,13 @@ err( span { loc = "location", message = "SPIR-V fp16 vector atomics only support half2 and half4." } ) +err( + "spirv-fp16-vector-atomic-unsupported-operation", + 50014, + "invalid SPIR-V fp16 vector atomic operation", + span { loc = "location", message = "SPIR-V fp16 vector atomics only support add, min, max, and exchange operations." } +) + err( "invalid-mesh-stage-output-topology", 50060, diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 25eee51c4d2..8860a511802 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -4517,16 +4517,37 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex } } - auto maybeRequireFp16VectorAtomicCapability = [&](IRType* valueType) + auto isFp16VectorAtomicType = [](IRType* valueType) { auto vectorType = as(valueType); if (!vectorType || vectorType->getElementType()->getOp() != kIROp_HalfType) return false; + return true; + }; + + auto maybeDiagnoseUnsupportedFp16VectorAtomicOperation = [&](IRType* valueType) + { + if (!isFp16VectorAtomicType(valueType)) + return false; + + m_sink->diagnose(Diagnostics::SpirvFp16VectorAtomicUnsupportedOperation{ + .location = atomicInst->sourceLoc}); + return true; + }; + + auto maybeRequireFp16VectorAtomicCapability = [&](IRType* valueType) + { + if (!isFp16VectorAtomicType(valueType)) + return false; + + auto vectorType = as(valueType); auto elementCountInst = as(vectorType->getElementCount()); if (!elementCountInst) { - SLANG_UNEXPECTED("non-IntLit vector element count reached SPIR-V fp16 atomic emit"); + m_sink->diagnose(Diagnostics::SpirvFp16VectorAtomicUnsupportedWidth{ + .location = atomicInst->sourceLoc}); + return true; } auto elementCount = elementCountInst->getValue(); @@ -4545,6 +4566,12 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex switch (op) { + case SpvOpAtomicLoad: + case SpvOpAtomicStore: + case SpvOpAtomicCompareExchange: + case SpvOpAtomicCompareExchangeWeak: + maybeDiagnoseUnsupportedFp16VectorAtomicOperation(atomicValueType); + break; case SpvOpAtomicExchange: maybeRequireFp16VectorAtomicCapability(atomicValueType); break; diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 5a7de86b567..4f257abddf0 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -16,6 +16,7 @@ //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_MIN_MAX_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_MIN_MAX //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_EXCHANGE_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_EXCHANGE //DIAGNOSTIC_TEST:SIMPLE(diag=UNSUPPORTED_VECTOR_WIDTH,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_UNSUPPORTED_VECTOR_WIDTH +//DIAGNOSTIC_TEST:SIMPLE(diag=UNSUPPORTED_VECTOR_COMPARE_EXCHANGE,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_UNSUPPORTED_VECTOR_COMPARE_EXCHANGE // Pointer-form fp16 vector checks. These paths are emitted as vector atomics, so // they must diagnose missing spvAtomicFloat16VectorNV when only scalar fp16 add is enabled. @@ -166,6 +167,14 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) __atomic_add(unsupportedVectorBuffer[0], half3(1.0h, 2.0h, 3.0h)); // UNSUPPORTED_VECTOR_WIDTH: error[E50013]: invalid SPIR-V fp16 vector atomic width // UNSUPPORTED_VECTOR_WIDTH: SPIR-V fp16 vector atomics only support half2 and half4. +#elif defined(TEST_UNSUPPORTED_VECTOR_COMPARE_EXCHANGE) + half2 originalValue = __atomic_compare_exchange( + vectorBuffer[0], + half2(1.0h, 2.0h), + half2(3.0h, 4.0h)); +// UNSUPPORTED_VECTOR_COMPARE_EXCHANGE: error[E50014]: invalid SPIR-V fp16 vector atomic operation +// UNSUPPORTED_VECTOR_COMPARE_EXCHANGE: SPIR-V fp16 vector atomics only support add, min, max, and exchange operations. + outputBuffer[0] = float(originalValue.x + originalValue.y); #elif defined(TEST_EMULATED) half originalValue; tmpBuffer.InterlockedAddF16Emulated(0, 1.0h, originalValue); From 4287d883d7559037e78d9c7dfc9a5fc169b856e8 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Tue, 2 Jun 2026 12:47:07 -0700 Subject: [PATCH 53/80] Avoid invalid fp16 atomic emission --- source/slang/slang-emit-spirv.cpp | 64 +++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 16 deletions(-) diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 8860a511802..e547bee3ecb 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -4500,7 +4500,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex } } - void ensureAtomicCapability(IRInst* atomicInst, SpvOp op) + bool ensureAtomicCapability(IRInst* atomicInst, SpvOp op) { IRType* atomicValueType = atomicInst->getDataType(); auto typeOp = atomicValueType->getOp(); @@ -4526,20 +4526,20 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex return true; }; - auto maybeDiagnoseUnsupportedFp16VectorAtomicOperation = [&](IRType* valueType) + auto checkSupportedFp16VectorAtomicOperation = [&](IRType* valueType) { if (!isFp16VectorAtomicType(valueType)) - return false; + return true; m_sink->diagnose(Diagnostics::SpirvFp16VectorAtomicUnsupportedOperation{ .location = atomicInst->sourceLoc}); - return true; + return false; }; auto maybeRequireFp16VectorAtomicCapability = [&](IRType* valueType) { if (!isFp16VectorAtomicType(valueType)) - return false; + return true; auto vectorType = as(valueType); auto elementCountInst = as(vectorType->getElementCount()); @@ -4547,7 +4547,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex { m_sink->diagnose(Diagnostics::SpirvFp16VectorAtomicUnsupportedWidth{ .location = atomicInst->sourceLoc}); - return true; + return false; } auto elementCount = elementCountInst->getValue(); @@ -4555,7 +4555,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex { m_sink->diagnose(Diagnostics::SpirvFp16VectorAtomicUnsupportedWidth{ .location = atomicInst->sourceLoc}); - return true; + return false; } maybeDiagnoseCapabilityUse(atomicInst, CapabilityName::spvAtomicFloat16VectorNV); @@ -4570,10 +4570,12 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex case SpvOpAtomicStore: case SpvOpAtomicCompareExchange: case SpvOpAtomicCompareExchangeWeak: - maybeDiagnoseUnsupportedFp16VectorAtomicOperation(atomicValueType); + if (!checkSupportedFp16VectorAtomicOperation(atomicValueType)) + return false; break; case SpvOpAtomicExchange: - maybeRequireFp16VectorAtomicCapability(atomicValueType); + if (!maybeRequireFp16VectorAtomicCapability(atomicValueType)) + return false; break; case SpvOpAtomicFAddEXT: { @@ -4592,7 +4594,8 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex requireSPIRVCapability(SpvCapabilityAtomicFloat16AddEXT); break; case kIROp_VectorType: - maybeRequireFp16VectorAtomicCapability(atomicValueType); + if (!maybeRequireFp16VectorAtomicCapability(atomicValueType)) + return false; break; } } @@ -4615,7 +4618,8 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex requireSPIRVCapability(SpvCapabilityAtomicFloat16MinMaxEXT); break; case kIROp_VectorType: - maybeRequireFp16VectorAtomicCapability(atomicValueType); + if (!maybeRequireFp16VectorAtomicCapability(atomicValueType)) + return false; break; } } @@ -4628,6 +4632,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex requireSPIRVCapability(SpvCapabilityInt64Atomics); break; } + return true; } SpvInst* emitDebugVarDeclaration(SpvInstParent* parent, IRDebugVar* debugVar) @@ -4917,6 +4922,13 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex SpvInst* emitLocalInst(SpvInstParent* parent, IRInst* inst) { SpvInst* result = nullptr; + auto emitUndefResultForDiagnosedInst = [&]() -> SpvInst* + { + auto dataType = inst->getDataType(); + if (!dataType || dataType->getOp() == kIROp_VoidType) + return nullptr; + return emitOpUndef(parent, inst, dataType); + }; // First, try to handle debug instructions with centralized debug level checking if (processDebugLocalInst(parent, inst, &result)) @@ -5491,6 +5503,11 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex const auto memoryScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType()); const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(1), ptr); + if (!ensureAtomicCapability(inst, SpvOpAtomicLoad)) + { + result = emitUndefResultForDiagnosedInst(); + break; + } result = emitOpAtomicLoad( parent, inst, @@ -5498,7 +5515,6 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex ptr, memoryScope, memorySemantics); - ensureAtomicCapability(inst, SpvOpAtomicLoad); } else { @@ -5520,9 +5536,13 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex const auto memoryScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType()); const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(2), ptr); + if (!ensureAtomicCapability(inst, SpvOpAtomicStore)) + { + result = emitUndefResultForDiagnosedInst(); + break; + } result = emitOpAtomicStore(parent, inst, ptr, memoryScope, memorySemantics, val); - ensureAtomicCapability(inst, SpvOpAtomicStore); } else { @@ -5544,6 +5564,11 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex const auto memoryScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType()); const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(2), ptr); + if (!ensureAtomicCapability(inst, SpvOpAtomicExchange)) + { + result = emitUndefResultForDiagnosedInst(); + break; + } result = emitOpAtomicExchange( parent, inst, @@ -5552,7 +5577,6 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex memoryScope, memorySemantics, val); - ensureAtomicCapability(inst, SpvOpAtomicExchange); } else { @@ -5572,6 +5596,11 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex emitMemorySemanticMask(inst->getOperand(3), inst->getOperand(0)); const auto memorySemanticsUnequal = emitMemorySemanticMask(inst->getOperand(4), inst->getOperand(0)); + if (!ensureAtomicCapability(inst, SpvOpAtomicCompareExchange)) + { + result = emitUndefResultForDiagnosedInst(); + break; + } result = emitOpAtomicCompareExchange( parent, inst, @@ -5582,7 +5611,6 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex memorySemanticsUnequal, inst->getOperand(2), inst->getOperand(1)); - ensureAtomicCapability(inst, SpvOpAtomicCompareExchange); } break; case kIROp_AtomicAdd: @@ -5603,6 +5631,11 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex emitMemorySemanticMask(inst->getOperand(2), inst->getOperand(0)); bool negateOperand = false; auto spvOp = getSpvAtomicOp(inst, negateOperand); + if (!ensureAtomicCapability(inst, spvOp)) + { + result = emitUndefResultForDiagnosedInst(); + break; + } auto operand = inst->getOperand(1); if (negateOperand) { @@ -5619,7 +5652,6 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex memoryScope, memorySemantics, operand); - ensureAtomicCapability(inst, spvOp); } break; case kIROp_ControlBarrier: From 214d377c9308038a0cde308b213dfcea666504ca Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Tue, 2 Jun 2026 13:44:01 -0700 Subject: [PATCH 54/80] Tighten fp16 atomic coverage --- .../byte-address-half-atomics-capability.slang | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 4f257abddf0..45fb5742c91 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -18,10 +18,11 @@ //DIAGNOSTIC_TEST:SIMPLE(diag=UNSUPPORTED_VECTOR_WIDTH,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_UNSUPPORTED_VECTOR_WIDTH //DIAGNOSTIC_TEST:SIMPLE(diag=UNSUPPORTED_VECTOR_COMPARE_EXCHANGE,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_UNSUPPORTED_VECTOR_COMPARE_EXCHANGE -// Pointer-form fp16 vector checks. These paths are emitted as vector atomics, so +// Emulated fp16 vector checks. These paths are emitted as vector atomics, so // they must diagnose missing spvAtomicFloat16VectorNV when only scalar fp16 add is enabled. -// Runtime coverage for these pointer-form helpers would require SPIR-V variable pointers, +// Runtime coverage for the pointer-form helpers would require SPIR-V variable pointers, // which existing tests keep disabled on current GCP runners. +//DIAGNOSTIC_TEST:SIMPLE(diag=BUFFER_EMULATED_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability cuda_hlsl_spirv+sm_5_0+spvAtomicFloat16AddEXT -DTEST_BUFFER_EMULATED_NO_VECTOR //DIAGNOSTIC_TEST:SIMPLE(diag=POINTER_EMULATED_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_POINTER_EMULATED //DIAGNOSTIC_TEST:SIMPLE(diag=POINTER_F16X2_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_POINTER_F16X2 @@ -175,6 +176,12 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // UNSUPPORTED_VECTOR_COMPARE_EXCHANGE: error[E50014]: invalid SPIR-V fp16 vector atomic operation // UNSUPPORTED_VECTOR_COMPARE_EXCHANGE: SPIR-V fp16 vector atomics only support add, min, max, and exchange operations. outputBuffer[0] = float(originalValue.x + originalValue.y); +#elif defined(TEST_BUFFER_EMULATED_NO_VECTOR) + half originalValue; + tmpBuffer.InterlockedAddF16Emulated(0, 1.0h, originalValue); +// BUFFER_EMULATED_NO_VECTOR: entry point uses capabilities not in specified profile +// BUFFER_EMULATED_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' + outputBuffer[0] = float(originalValue); #elif defined(TEST_EMULATED) half originalValue; tmpBuffer.InterlockedAddF16Emulated(0, 1.0h, originalValue); @@ -292,4 +299,8 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // EMULATED: OpCompositeExtract %half [[HIGH_ATOMIC]] 1 // EMULATED-NOT: OpAtomicFAddEXT -// CUDA-COUNT-5: atomicAdd( +// CUDA-NOT: atomicAdd((half2 * +// CUDA-NOT: atomicAdd((__half2 * +// CUDA-COUNT-5: atomicAdd((&{{.*}}), __half(1.0)) +// CUDA-NOT: atomicAdd((half2 * +// CUDA-NOT: atomicAdd((__half2 * From 65d91a702858563444c3a7571bcd1c5ead072e86 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Fri, 5 Jun 2026 19:54:15 -0700 Subject: [PATCH 55/80] Prefer fp16 vector atomic fallback --- .../a3-02-reference-capability-atoms.md | 1 + source/slang/hlsl.meta.slang | 18 +++++++++-- source/slang/slang-capabilities.capdef | 3 +- ...byte-address-half-atomics-capability.slang | 32 +++++++++++-------- 4 files changed, 37 insertions(+), 17 deletions(-) diff --git a/docs/user-guide/a3-02-reference-capability-atoms.md b/docs/user-guide/a3-02-reference-capability-atoms.md index 8cb7d0bd8d9..81e1e7c5df2 100644 --- a/docs/user-guide/a3-02-reference-capability-atoms.md +++ b/docs/user-guide/a3-02-reference-capability-atoms.md @@ -728,6 +728,7 @@ Extensions `spvAtomicFloat16VectorNV` > Represents the SPIR-V capability for vector atomic float 16 add/min/max/exchange operations. +> Implies scalar atomic float 16 add support. `spvAtomicFloat32AddEXT` > Represents the SPIR-V capability for atomic float 32 add operations. diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 045ce719b20..dbfb5ebe934 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -6516,8 +6516,8 @@ $} /// @param value The value to add to the value at `byteAddress`. /// @param originalValue The original value at `byteAddress` before the add operation. /// @remarks For SPIR-V, this function requires `SPV_EXT_shader_atomic_float16_add` - /// and maps to `OpAtomicFAdd` on a `half`. For targets that only support - /// `SPV_NV_shader_atomic_fp16_vector`, use `InterlockedAddF16Emulated` instead. + /// and maps to `OpAtomicFAdd` on a `half`. When `SPV_NV_shader_atomic_fp16_vector` + /// is available, it uses the half-vector atomic path instead. /// /// For HLSL, this function translates to an NVAPI call /// due to lack of native HLSL intrinsic for floating point atomic add. For CUDA, this function @@ -6543,6 +6543,20 @@ $} originalValue = asfloat16((uint16_t)(_NvInterlockedAddFp16x2(byteAddress, packedInput) >> 16)); } return; + case spvAtomicFloat16VectorNV: + { + let buf = __getEquivalentStructuredBuffer(this); + if ((byteAddress & 2) == 0) + { + originalValue = __atomic_add(buf[byteAddress/4], half2(value, half(0.0))).x; + } + else + { + originalValue = __atomic_add(buf[byteAddress/4], half2(half(0.0), value)).y; + } + return; + } + case spvAtomicFloat16AddEXT: default: { let buf = __getEquivalentStructuredBuffer(this); diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index 8a85010171e..12e4d8f0208 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -705,8 +705,9 @@ def spvAtomicFloat32AddEXT : SPV_EXT_shader_atomic_float_add; def spvAtomicFloat16AddEXT : SPV_EXT_shader_atomic_float16_add; /// Represents the SPIR-V capability for vector atomic float 16 add/min/max/exchange operations. +/// Implies scalar atomic float 16 add support. /// [EXT] -def spvAtomicFloat16VectorNV : SPV_NV_shader_atomic_fp16_vector; +def spvAtomicFloat16VectorNV : SPV_NV_shader_atomic_fp16_vector + spvAtomicFloat16AddEXT; /// Represents the SPIR-V capability for atomic float 64 add operations. /// [EXT] diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 45fb5742c91..4f00637cb50 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -1,9 +1,9 @@ -// Scalar fp16 add checks. `NO_FP16_ATOMIC` has no fp16 atomic capability; `VECTOR_ONLY` -// has the vector capability but intentionally lacks the scalar spvAtomicFloat16AddEXT. +// Scalar/vector fp16 add checks. `NO_FP16_ATOMIC` has no fp16 atomic capability; +// `VECTOR_F16` has the vector capability, which implies scalar spvAtomicFloat16AddEXT. //TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME_SCALAR):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16AddEXT -render-feature atomic-half -output-using-type -xslang -DTEST_RUNTIME_SCALAR //TEST:SIMPLE(filecheck=SCALAR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT //DIAGNOSTIC_TEST:SIMPLE(diag=NO_FP16_ATOMIC,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spirv_1_5 -//DIAGNOSTIC_TEST:SIMPLE(diag=VECTOR_ONLY,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16VectorNV +//TEST:SIMPLE(filecheck=VECTOR_F16): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV // Direct half-vector `__atomic_*` checks. `DIRECT` selects direct structured-buffer // atomics; `NO_VECTOR` means only the scalar fp16 atomic capability is enabled. @@ -205,8 +205,6 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) tmpBuffer.InterlockedAddF16(0, 1.0h, originalValue); // NO_FP16_ATOMIC: entry point uses capabilities not in specified profile // NO_FP16_ATOMIC: Missing capabilities are: 'spvAtomicFloat16AddEXT' -// VECTOR_ONLY: entry point uses capabilities not in specified profile -// VECTOR_ONLY: Missing capabilities are: 'spvAtomicFloat16AddEXT' // IGNORE_CAPS-NOT: entry point uses capabilities not in specified profile outputBuffer[0] = float(originalValue); tmpBuffer.InterlockedAddF16(2, 1.0h, originalValue); @@ -232,16 +230,22 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // SCALAR-NOT: OpCapability AtomicFloat16VectorNV // SCALAR-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" -// BOTH-NOT: OpCapability AtomicFloat16VectorNV -// BOTH-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" -// BOTH: OpCapability AtomicFloat16AddEXT -// BOTH: OpExtension "SPV_EXT_shader_atomic_float16_add" -// BOTH-NOT: OpAtomicFAddEXT %v2half -// BOTH-COUNT-5: OpAtomicFAddEXT %half -// BOTH-NOT: OpAtomicFAddEXT %v2half +// VECTOR_F16-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR_F16-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// VECTOR_F16: OpCapability AtomicFloat16VectorNV +// VECTOR_F16: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// VECTOR_F16-NOT: OpAtomicFAddEXT %half +// VECTOR_F16-COUNT-6: OpAtomicFAddEXT %v2half +// VECTOR_F16-NOT: OpAtomicFAddEXT %half + +// BOTH-NOT: OpCapability AtomicFloat16AddEXT +// BOTH-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// BOTH: OpCapability AtomicFloat16VectorNV +// BOTH: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// BOTH-NOT: OpAtomicFAddEXT %half +// BOTH-COUNT-6: OpAtomicFAddEXT %v2half +// BOTH-NOT: OpAtomicFAddEXT %half // BOTH-NOT: OpAtomicCompareExchange -// BOTH-NOT: OpCapability AtomicFloat16VectorNV -// BOTH-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" // VECTOR-NOT: OpCapability AtomicFloat16AddEXT // VECTOR-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" From e70efa8ec04647074c02fb3e7af97200f8556ba3 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 01:41:26 -0700 Subject: [PATCH 56/80] Extract fp16 atomic helpers --- source/slang/slang-emit-spirv.cpp | 132 ++++++++++++++++-------------- 1 file changed, 69 insertions(+), 63 deletions(-) diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index e547bee3ecb..a55af7eade4 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -4500,6 +4500,55 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex } } + static bool isFp16VectorAtomicType(IRType* valueType) + { + auto vectorType = as(valueType); + if (!vectorType || vectorType->getElementType()->getOp() != kIROp_HalfType) + return false; + + return true; + } + + bool checkSupportedFp16VectorAtomicOperation(IRInst* atomicInst, IRType* valueType) + { + if (!isFp16VectorAtomicType(valueType)) + return true; + + m_sink->diagnose(Diagnostics::SpirvFp16VectorAtomicUnsupportedOperation{ + .location = atomicInst->sourceLoc}); + return false; + } + + bool maybeRequireFp16VectorAtomicCapability(IRInst* atomicInst, IRType* valueType) + { + if (!isFp16VectorAtomicType(valueType)) + return true; + + auto vectorType = as(valueType); + auto elementCountInst = as(vectorType->getElementCount()); + if (!elementCountInst) + { + m_sink->diagnose( + Diagnostics::SpirvFp16VectorAtomicUnsupportedWidth{ + .location = atomicInst->sourceLoc}); + return false; + } + + auto elementCount = elementCountInst->getValue(); + if (elementCount != 2 && elementCount != 4) + { + m_sink->diagnose( + Diagnostics::SpirvFp16VectorAtomicUnsupportedWidth{ + .location = atomicInst->sourceLoc}); + return false; + } + + maybeDiagnoseCapabilityUse(atomicInst, CapabilityName::spvAtomicFloat16VectorNV); + ensureExtensionDeclaration(toSlice("SPV_NV_shader_atomic_fp16_vector")); + requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV); + return true; + } + bool ensureAtomicCapability(IRInst* atomicInst, SpvOp op) { IRType* atomicValueType = atomicInst->getDataType(); @@ -4517,64 +4566,17 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex } } - auto isFp16VectorAtomicType = [](IRType* valueType) - { - auto vectorType = as(valueType); - if (!vectorType || vectorType->getElementType()->getOp() != kIROp_HalfType) - return false; - - return true; - }; - - auto checkSupportedFp16VectorAtomicOperation = [&](IRType* valueType) - { - if (!isFp16VectorAtomicType(valueType)) - return true; - - m_sink->diagnose(Diagnostics::SpirvFp16VectorAtomicUnsupportedOperation{ - .location = atomicInst->sourceLoc}); - return false; - }; - - auto maybeRequireFp16VectorAtomicCapability = [&](IRType* valueType) - { - if (!isFp16VectorAtomicType(valueType)) - return true; - - auto vectorType = as(valueType); - auto elementCountInst = as(vectorType->getElementCount()); - if (!elementCountInst) - { - m_sink->diagnose(Diagnostics::SpirvFp16VectorAtomicUnsupportedWidth{ - .location = atomicInst->sourceLoc}); - return false; - } - - auto elementCount = elementCountInst->getValue(); - if (elementCount != 2 && elementCount != 4) - { - m_sink->diagnose(Diagnostics::SpirvFp16VectorAtomicUnsupportedWidth{ - .location = atomicInst->sourceLoc}); - return false; - } - - maybeDiagnoseCapabilityUse(atomicInst, CapabilityName::spvAtomicFloat16VectorNV); - ensureExtensionDeclaration(toSlice("SPV_NV_shader_atomic_fp16_vector")); - requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV); - return true; - }; - switch (op) { case SpvOpAtomicLoad: case SpvOpAtomicStore: case SpvOpAtomicCompareExchange: case SpvOpAtomicCompareExchangeWeak: - if (!checkSupportedFp16VectorAtomicOperation(atomicValueType)) + if (!checkSupportedFp16VectorAtomicOperation(atomicInst, atomicValueType)) return false; break; case SpvOpAtomicExchange: - if (!maybeRequireFp16VectorAtomicCapability(atomicValueType)) + if (!maybeRequireFp16VectorAtomicCapability(atomicInst, atomicValueType)) return false; break; case SpvOpAtomicFAddEXT: @@ -4594,7 +4596,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex requireSPIRVCapability(SpvCapabilityAtomicFloat16AddEXT); break; case kIROp_VectorType: - if (!maybeRequireFp16VectorAtomicCapability(atomicValueType)) + if (!maybeRequireFp16VectorAtomicCapability(atomicInst, atomicValueType)) return false; break; } @@ -4618,7 +4620,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex requireSPIRVCapability(SpvCapabilityAtomicFloat16MinMaxEXT); break; case kIROp_VectorType: - if (!maybeRequireFp16VectorAtomicCapability(atomicValueType)) + if (!maybeRequireFp16VectorAtomicCapability(atomicInst, atomicValueType)) return false; break; } @@ -4635,6 +4637,17 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex return true; } + static SpvInst* emitUndefResultForDiagnosedInst( + SPIRVEmitContext* context, + SpvInstParent* parent, + IRInst* inst) + { + auto dataType = inst->getDataType(); + if (!dataType || dataType->getOp() == kIROp_VoidType) + return nullptr; + return context->emitOpUndef(parent, inst, dataType); + } + SpvInst* emitDebugVarDeclaration(SpvInstParent* parent, IRDebugVar* debugVar) { // For every DebugVar, we will declare: @@ -4922,13 +4935,6 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex SpvInst* emitLocalInst(SpvInstParent* parent, IRInst* inst) { SpvInst* result = nullptr; - auto emitUndefResultForDiagnosedInst = [&]() -> SpvInst* - { - auto dataType = inst->getDataType(); - if (!dataType || dataType->getOp() == kIROp_VoidType) - return nullptr; - return emitOpUndef(parent, inst, dataType); - }; // First, try to handle debug instructions with centralized debug level checking if (processDebugLocalInst(parent, inst, &result)) @@ -5505,7 +5511,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(1), ptr); if (!ensureAtomicCapability(inst, SpvOpAtomicLoad)) { - result = emitUndefResultForDiagnosedInst(); + result = emitUndefResultForDiagnosedInst(this, parent, inst); break; } result = emitOpAtomicLoad( @@ -5538,7 +5544,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(2), ptr); if (!ensureAtomicCapability(inst, SpvOpAtomicStore)) { - result = emitUndefResultForDiagnosedInst(); + result = emitUndefResultForDiagnosedInst(this, parent, inst); break; } result = @@ -5566,7 +5572,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(2), ptr); if (!ensureAtomicCapability(inst, SpvOpAtomicExchange)) { - result = emitUndefResultForDiagnosedInst(); + result = emitUndefResultForDiagnosedInst(this, parent, inst); break; } result = emitOpAtomicExchange( @@ -5598,7 +5604,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex emitMemorySemanticMask(inst->getOperand(4), inst->getOperand(0)); if (!ensureAtomicCapability(inst, SpvOpAtomicCompareExchange)) { - result = emitUndefResultForDiagnosedInst(); + result = emitUndefResultForDiagnosedInst(this, parent, inst); break; } result = emitOpAtomicCompareExchange( @@ -5633,7 +5639,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex auto spvOp = getSpvAtomicOp(inst, negateOperand); if (!ensureAtomicCapability(inst, spvOp)) { - result = emitUndefResultForDiagnosedInst(); + result = emitUndefResultForDiagnosedInst(this, parent, inst); break; } auto operand = inst->getOperand(1); From 6bdba7abdd3f2358b591d983fdeada2a03277654 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 01:44:23 -0700 Subject: [PATCH 57/80] Fix fp16 atomic helper formatting --- source/slang/slang-emit-spirv.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index a55af7eade4..0a03f5fee2a 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -4528,18 +4528,16 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex auto elementCountInst = as(vectorType->getElementCount()); if (!elementCountInst) { - m_sink->diagnose( - Diagnostics::SpirvFp16VectorAtomicUnsupportedWidth{ - .location = atomicInst->sourceLoc}); + m_sink->diagnose(Diagnostics::SpirvFp16VectorAtomicUnsupportedWidth{ + .location = atomicInst->sourceLoc}); return false; } auto elementCount = elementCountInst->getValue(); if (elementCount != 2 && elementCount != 4) { - m_sink->diagnose( - Diagnostics::SpirvFp16VectorAtomicUnsupportedWidth{ - .location = atomicInst->sourceLoc}); + m_sink->diagnose(Diagnostics::SpirvFp16VectorAtomicUnsupportedWidth{ + .location = atomicInst->sourceLoc}); return false; } From 65dcf54f037db3eac6720b49255faf7cdc954a30 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 10:27:16 -0700 Subject: [PATCH 58/80] Validate fp16 vector atomics before SPIR-V emit --- source/slang/slang-emit-spirv.cpp | 87 +++++------------------- source/slang/slang-ir-spirv-legalize.cpp | 2 +- source/slang/slang-ir-validate.cpp | 86 ++++++++++++++++++++++- source/slang/slang-ir-validate.h | 5 ++ 4 files changed, 108 insertions(+), 72 deletions(-) diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 0a03f5fee2a..74bc7ffcee7 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -4509,45 +4509,28 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex return true; } - bool checkSupportedFp16VectorAtomicOperation(IRInst* atomicInst, IRType* valueType) + void maybeRequireFp16VectorAtomicCapability(IRInst* atomicInst, IRType* valueType) { if (!isFp16VectorAtomicType(valueType)) - return true; - - m_sink->diagnose(Diagnostics::SpirvFp16VectorAtomicUnsupportedOperation{ - .location = atomicInst->sourceLoc}); - return false; - } - - bool maybeRequireFp16VectorAtomicCapability(IRInst* atomicInst, IRType* valueType) - { - if (!isFp16VectorAtomicType(valueType)) - return true; + return; auto vectorType = as(valueType); auto elementCountInst = as(vectorType->getElementCount()); + SLANG_ASSERT(elementCountInst); if (!elementCountInst) - { - m_sink->diagnose(Diagnostics::SpirvFp16VectorAtomicUnsupportedWidth{ - .location = atomicInst->sourceLoc}); - return false; - } + return; auto elementCount = elementCountInst->getValue(); + SLANG_ASSERT(elementCount == 2 || elementCount == 4); if (elementCount != 2 && elementCount != 4) - { - m_sink->diagnose(Diagnostics::SpirvFp16VectorAtomicUnsupportedWidth{ - .location = atomicInst->sourceLoc}); - return false; - } + return; maybeDiagnoseCapabilityUse(atomicInst, CapabilityName::spvAtomicFloat16VectorNV); ensureExtensionDeclaration(toSlice("SPV_NV_shader_atomic_fp16_vector")); requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV); - return true; } - bool ensureAtomicCapability(IRInst* atomicInst, SpvOp op) + void ensureAtomicCapability(IRInst* atomicInst, SpvOp op) { IRType* atomicValueType = atomicInst->getDataType(); auto typeOp = atomicValueType->getOp(); @@ -4570,12 +4553,10 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex case SpvOpAtomicStore: case SpvOpAtomicCompareExchange: case SpvOpAtomicCompareExchangeWeak: - if (!checkSupportedFp16VectorAtomicOperation(atomicInst, atomicValueType)) - return false; + SLANG_ASSERT(!isFp16VectorAtomicType(atomicValueType)); break; case SpvOpAtomicExchange: - if (!maybeRequireFp16VectorAtomicCapability(atomicInst, atomicValueType)) - return false; + maybeRequireFp16VectorAtomicCapability(atomicInst, atomicValueType); break; case SpvOpAtomicFAddEXT: { @@ -4594,8 +4575,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex requireSPIRVCapability(SpvCapabilityAtomicFloat16AddEXT); break; case kIROp_VectorType: - if (!maybeRequireFp16VectorAtomicCapability(atomicInst, atomicValueType)) - return false; + maybeRequireFp16VectorAtomicCapability(atomicInst, atomicValueType); break; } } @@ -4618,8 +4598,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex requireSPIRVCapability(SpvCapabilityAtomicFloat16MinMaxEXT); break; case kIROp_VectorType: - if (!maybeRequireFp16VectorAtomicCapability(atomicInst, atomicValueType)) - return false; + maybeRequireFp16VectorAtomicCapability(atomicInst, atomicValueType); break; } } @@ -4632,18 +4611,6 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex requireSPIRVCapability(SpvCapabilityInt64Atomics); break; } - return true; - } - - static SpvInst* emitUndefResultForDiagnosedInst( - SPIRVEmitContext* context, - SpvInstParent* parent, - IRInst* inst) - { - auto dataType = inst->getDataType(); - if (!dataType || dataType->getOp() == kIROp_VoidType) - return nullptr; - return context->emitOpUndef(parent, inst, dataType); } SpvInst* emitDebugVarDeclaration(SpvInstParent* parent, IRDebugVar* debugVar) @@ -5507,11 +5474,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex const auto memoryScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType()); const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(1), ptr); - if (!ensureAtomicCapability(inst, SpvOpAtomicLoad)) - { - result = emitUndefResultForDiagnosedInst(this, parent, inst); - break; - } + ensureAtomicCapability(inst, SpvOpAtomicLoad); result = emitOpAtomicLoad( parent, inst, @@ -5540,11 +5503,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex const auto memoryScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType()); const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(2), ptr); - if (!ensureAtomicCapability(inst, SpvOpAtomicStore)) - { - result = emitUndefResultForDiagnosedInst(this, parent, inst); - break; - } + ensureAtomicCapability(inst, SpvOpAtomicStore); result = emitOpAtomicStore(parent, inst, ptr, memoryScope, memorySemantics, val); } @@ -5568,11 +5527,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex const auto memoryScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType()); const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(2), ptr); - if (!ensureAtomicCapability(inst, SpvOpAtomicExchange)) - { - result = emitUndefResultForDiagnosedInst(this, parent, inst); - break; - } + ensureAtomicCapability(inst, SpvOpAtomicExchange); result = emitOpAtomicExchange( parent, inst, @@ -5600,11 +5555,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex emitMemorySemanticMask(inst->getOperand(3), inst->getOperand(0)); const auto memorySemanticsUnequal = emitMemorySemanticMask(inst->getOperand(4), inst->getOperand(0)); - if (!ensureAtomicCapability(inst, SpvOpAtomicCompareExchange)) - { - result = emitUndefResultForDiagnosedInst(this, parent, inst); - break; - } + ensureAtomicCapability(inst, SpvOpAtomicCompareExchange); result = emitOpAtomicCompareExchange( parent, inst, @@ -5635,11 +5586,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex emitMemorySemanticMask(inst->getOperand(2), inst->getOperand(0)); bool negateOperand = false; auto spvOp = getSpvAtomicOp(inst, negateOperand); - if (!ensureAtomicCapability(inst, spvOp)) - { - result = emitUndefResultForDiagnosedInst(this, parent, inst); - break; - } + ensureAtomicCapability(inst, spvOp); auto operand = inst->getOperand(1); if (negateOperand) { @@ -11518,6 +11465,8 @@ SlangResult emitSPIRVFromIR( SPIRVEmitContext context(irModule, codeGenContext->getTargetProgram(), sink); legalizeIRForSPIRV(&context, irModule, irEntryPoints, codeGenContext); + if (sink->getErrorCount() != 0) + return SLANG_FAIL; #if 0 { diff --git a/source/slang/slang-ir-spirv-legalize.cpp b/source/slang/slang-ir-spirv-legalize.cpp index d4c54dd7c75..1bff3c21c03 100644 --- a/source/slang/slang-ir-spirv-legalize.cpp +++ b/source/slang/slang-ir-spirv-legalize.cpp @@ -2659,7 +2659,7 @@ struct SPIRVLegalizationContext : public SourceEmitterBase // For SPIR-V, we don't skip this validation, because we might then be generating // invalid SPIR-V. bool skipFuncParamValidation = false; - validateAtomicOperations(skipFuncParamValidation, m_sink, m_module->getModuleInst()); + validateSPIRVAtomicOperations(skipFuncParamValidation, m_sink, m_module->getModuleInst()); } void updateFunctionTypes() diff --git a/source/slang/slang-ir-validate.cpp b/source/slang/slang-ir-validate.cpp index fe68fc58717..15a47e80162 100644 --- a/source/slang/slang-ir-validate.cpp +++ b/source/slang/slang-ir-validate.cpp @@ -526,7 +526,77 @@ static bool isValidAtomicDest(bool skipFuncParamValidation, IRInst* dst) return false; } -void validateAtomicOperations(bool skipFuncParamValidation, DiagnosticSink* sink, IRInst* inst) +static IRType* getAtomicOperationValueType(IRInst* inst) +{ + auto valueType = inst->getDataType(); + if (valueType && valueType->getOp() != kIROp_VoidType) + return valueType; + + IRBuilder builder(inst); + auto ptrValueType = tryGetPointedToType(&builder, inst->getOperand(0)->getDataType()); + if (auto atomicType = as(ptrValueType)) + return atomicType->getElementType(); + return ptrValueType; +} + +static IRVectorType* getFp16VectorAtomicType(IRInst* inst) +{ + auto valueType = getAtomicOperationValueType(inst); + auto vectorType = as(valueType); + if (!vectorType || vectorType->getElementType()->getOp() != kIROp_HalfType) + return nullptr; + return vectorType; +} + +static void validateSPIRVFp16VectorAtomicOperation(DiagnosticSink* sink, IRInst* inst) +{ + auto vectorType = getFp16VectorAtomicType(inst); + if (!vectorType) + return; + + switch (inst->getOp()) + { + case kIROp_AtomicLoad: + case kIROp_AtomicStore: + case kIROp_AtomicCompareExchange: + sink->diagnose(Diagnostics::SpirvFp16VectorAtomicUnsupportedOperation{ + .location = inst->sourceLoc}); + return; + + case kIROp_AtomicExchange: + case kIROp_AtomicAdd: + case kIROp_AtomicSub: + case kIROp_AtomicMin: + case kIROp_AtomicMax: + { + auto elementCountInst = as(vectorType->getElementCount()); + if (!elementCountInst) + { + sink->diagnose(Diagnostics::SpirvFp16VectorAtomicUnsupportedWidth{ + .location = inst->sourceLoc}); + return; + } + + auto elementCount = elementCountInst->getValue(); + if (elementCount != 2 && elementCount != 4) + { + sink->diagnose(Diagnostics::SpirvFp16VectorAtomicUnsupportedWidth{ + .location = inst->sourceLoc}); + return; + } + } + return; + + default: + return; + } +} + +static void validateAtomicOperationsImpl( + bool skipFuncParamValidation, + DiagnosticSink* sink, + IRInst* inst, + bool validateSPIRVAtomics) { switch (inst->getOp()) { @@ -549,6 +619,8 @@ void validateAtomicOperations(bool skipFuncParamValidation, DiagnosticSink* sink sink->diagnose(Diagnostics::InvalidAtomicDestinationPointer{ .location = inst->sourceLoc, }); + if (validateSPIRVAtomics) + validateSPIRVFp16VectorAtomicOperation(sink, inst); } break; @@ -558,10 +630,20 @@ void validateAtomicOperations(bool skipFuncParamValidation, DiagnosticSink* sink for (auto child : inst->getModifiableChildren()) { - validateAtomicOperations(skipFuncParamValidation, sink, child); + validateAtomicOperationsImpl(skipFuncParamValidation, sink, child, validateSPIRVAtomics); } } +void validateAtomicOperations(bool skipFuncParamValidation, DiagnosticSink* sink, IRInst* inst) +{ + validateAtomicOperationsImpl(skipFuncParamValidation, sink, inst, false); +} + +void validateSPIRVAtomicOperations(bool skipFuncParamValidation, DiagnosticSink* sink, IRInst* inst) +{ + validateAtomicOperationsImpl(skipFuncParamValidation, sink, inst, true); +} + static void validateVectorOrMatrixElementType( DiagnosticSink* sink, SourceLoc sourceLoc, diff --git a/source/slang/slang-ir-validate.h b/source/slang/slang-ir-validate.h index f7ce0b05d8c..85843aeaa69 100644 --- a/source/slang/slang-ir-validate.h +++ b/source/slang/slang-ir-validate.h @@ -80,6 +80,11 @@ class [[nodiscard]] IRValidationScope // lead back to in/inout parameters that we can't validate. void validateAtomicOperations(bool skipFuncParamValidation, DiagnosticSink* sink, IRInst* inst); +void validateSPIRVAtomicOperations( + bool skipFuncParamValidation, + DiagnosticSink* sink, + IRInst* inst); + // Overload that takes IRModule* first for use with SLANG_PASS macro void validateAtomicOperations(IRModule* module, bool skipFuncParamValidation, DiagnosticSink* sink); From 054d6a7a8b3afb1c05e8926543a41319713b09b7 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 10:30:38 -0700 Subject: [PATCH 59/80] Fix fp16 vector atomic validation formatting --- source/slang/slang-ir-validate.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/slang/slang-ir-validate.cpp b/source/slang/slang-ir-validate.cpp index 15a47e80162..a216ab37119 100644 --- a/source/slang/slang-ir-validate.cpp +++ b/source/slang/slang-ir-validate.cpp @@ -559,8 +559,8 @@ static void validateSPIRVFp16VectorAtomicOperation(DiagnosticSink* sink, IRInst* case kIROp_AtomicLoad: case kIROp_AtomicStore: case kIROp_AtomicCompareExchange: - sink->diagnose(Diagnostics::SpirvFp16VectorAtomicUnsupportedOperation{ - .location = inst->sourceLoc}); + sink->diagnose( + Diagnostics::SpirvFp16VectorAtomicUnsupportedOperation{.location = inst->sourceLoc}); return; case kIROp_AtomicExchange: From 7e19877db37bb36b8beca634fcfe2f71f0d2697b Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 10:51:05 -0700 Subject: [PATCH 60/80] Clarify fp16 vector atomic diagnostic --- source/slang/slang-diagnostics.lua | 2 +- .../byte-address-half-atomics-capability.slang | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/source/slang/slang-diagnostics.lua b/source/slang/slang-diagnostics.lua index 09f26a75eaf..d56e11a042a 100644 --- a/source/slang/slang-diagnostics.lua +++ b/source/slang/slang-diagnostics.lua @@ -4819,7 +4819,7 @@ err( "spirv-fp16-vector-atomic-unsupported-operation", 50014, "invalid SPIR-V fp16 vector atomic operation", - span { loc = "location", message = "SPIR-V fp16 vector atomics only support add, min, max, and exchange operations." } + span { loc = "location", message = "SPIR-V fp16 vector atomics only support add, sub, min, max, and exchange operations." } ) err( diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 4f00637cb50..c74d383119c 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -174,7 +174,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) half2(1.0h, 2.0h), half2(3.0h, 4.0h)); // UNSUPPORTED_VECTOR_COMPARE_EXCHANGE: error[E50014]: invalid SPIR-V fp16 vector atomic operation -// UNSUPPORTED_VECTOR_COMPARE_EXCHANGE: SPIR-V fp16 vector atomics only support add, min, max, and exchange operations. +// UNSUPPORTED_VECTOR_COMPARE_EXCHANGE: SPIR-V fp16 vector atomics only support add, sub, min, max, and exchange operations. outputBuffer[0] = float(originalValue.x + originalValue.y); #elif defined(TEST_BUFFER_EMULATED_NO_VECTOR) half originalValue; From c65af3a0696742a67b1fdb236d48f2228c5ef2b9 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 11:39:26 -0700 Subject: [PATCH 61/80] Move fp16 atomic capability diagnostics before emit --- source/slang/slang-check-shader.cpp | 24 +--- source/slang/slang-compiler.h | 26 +++++ source/slang/slang-emit-spirv.cpp | 97 +--------------- source/slang/slang-ir-spirv-legalize.cpp | 142 +++++++++++++++++++++++ source/slang/slang-type-layout.cpp | 16 +-- 5 files changed, 174 insertions(+), 131 deletions(-) diff --git a/source/slang/slang-check-shader.cpp b/source/slang/slang-check-shader.cpp index 7091312e409..4dbc6ce79c0 100644 --- a/source/slang/slang-check-shader.cpp +++ b/source/slang/slang-check-shader.cpp @@ -1956,28 +1956,6 @@ void validateEntryPoint(EntryPoint* entryPoint, DiagnosticSink* sink) else { auto& targetOptionSet = target->getOptionSet(); - bool specificProfileRequested = - targetOptionSet.hasOption(CompilerOptionName::Profile) && - (targetOptionSet.getIntOption(CompilerOptionName::Profile) != - SLANG_PROFILE_UNKNOWN); - bool specificCapabilityRequested = false; - for (auto atomVal : targetOptionSet.getArray(CompilerOptionName::Capability)) - { - switch (atomVal.kind) - { - case CompilerOptionValueKind::Int: - if (atomVal.intValue != SLANG_CAPABILITY_UNKNOWN) - specificCapabilityRequested = true; - break; - case CompilerOptionValueKind::String: - // User made a specific capability request - specificCapabilityRequested = true; - break; - } - if (specificCapabilityRequested) - break; - } - if (auto declaredCapsMod = entryPointFuncDecl->findModifier()) { @@ -1988,7 +1966,7 @@ void validateEntryPoint(EntryPoint* entryPoint, DiagnosticSink* sink) } // Only attempt to error if a specific profile or capability is requested - if ((specificCapabilityRequested || specificProfileRequested) && + if (isSpecificProfileOrCapabilityRequested(targetOptionSet) && targetCaps.atLeastOneSetImpliedInOther( CapabilitySet{entryPointFuncDecl->inferredCapabilityRequirements}) == CapabilitySet::ImpliesReturnFlags::NotImplied) diff --git a/source/slang/slang-compiler.h b/source/slang/slang-compiler.h index 4b3d67b223b..af45a64af85 100644 --- a/source/slang/slang-compiler.h +++ b/source/slang/slang-compiler.h @@ -213,6 +213,32 @@ enum class DiagnosticCategory None = 0, Capability = 1 << 0, }; + +inline bool isSpecificProfileRequested(CompilerOptionSet& optionSet) +{ + return optionSet.hasOption(CompilerOptionName::Profile) && + (optionSet.getIntOption(CompilerOptionName::Profile) != SLANG_PROFILE_UNKNOWN); +} + +inline bool isSpecificCapabilityRequested(CompilerOptionSet& optionSet) +{ + for (auto atomVal : optionSet.getArray(CompilerOptionName::Capability)) + { + if ((atomVal.kind == CompilerOptionValueKind::Int && + atomVal.intValue != SLANG_CAPABILITY_UNKNOWN) || + atomVal.kind == CompilerOptionValueKind::String) + { + return true; + } + } + return false; +} + +inline bool isSpecificProfileOrCapabilityRequested(CompilerOptionSet& optionSet) +{ + return isSpecificProfileRequested(optionSet) || isSpecificCapabilityRequested(optionSet); +} + template bool maybeDiagnose( DiagnosticSink* sink, diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 74bc7ffcee7..87db026c409 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -4412,94 +4412,6 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex } } - Dictionary> m_diagnosedCapabilityUses; - void maybeDiagnoseCapabilityUse(IRInst* inst, CapabilityName capabilityName) - { - auto& optionSet = m_targetProgram->getOptionSet(); - if (optionSet.getBoolOption(CompilerOptionName::IgnoreCapabilities)) - return; - - bool specificProfileRequested = - optionSet.hasOption(CompilerOptionName::Profile) && - (optionSet.getIntOption(CompilerOptionName::Profile) != SLANG_PROFILE_UNKNOWN); - bool specificCapabilityRequested = false; - for (auto atomVal : optionSet.getArray(CompilerOptionName::Capability)) - { - if ((atomVal.kind == CompilerOptionValueKind::Int && - atomVal.intValue != SLANG_CAPABILITY_UNKNOWN) || - atomVal.kind == CompilerOptionValueKind::String) - { - specificCapabilityRequested = true; - break; - } - } - if (!optionSet.getBoolOption(CompilerOptionName::RestrictiveCapabilityCheck) && - !specificProfileRequested && !specificCapabilityRequested) - return; - - auto parentFunc = getParentFunc(inst); - if (!parentFunc) - return; - - HashSet* entryPoints = - getReferencingEntryPoints(m_referencingEntryPoints, parentFunc); - if (!entryPoints) - return; - - for (auto entryPoint : *entryPoints) - { - IREntryPointDecoration* entryPointDecor = - entryPoint->findDecoration(); - if (!entryPointDecor) - continue; - - CapabilitySet stageTargetCaps = m_targetProgram->getTargetReq()->getTargetCaps(); - CapabilitySet stageCapabilitySet = entryPointDecor->getProfile().getCapabilityName(); - CapabilitySet required(capabilityName); - stageTargetCaps.join(stageCapabilitySet); - required.join(stageCapabilitySet); - - if (stageTargetCaps.atLeastOneSetImpliedInOther(required) == - CapabilitySet::ImpliesReturnFlags::Implied) - continue; - - CapabilityAtomSet addedAtoms{}; - if (auto stageCapSet = stageTargetCaps.getAtomSets()) - { - if (auto requiredSet = required.getAtomSets()) - { - CapabilityAtomSet::calcSubtract(addedAtoms, (*requiredSet), (*stageCapSet)); - } - } - - StringBuilder capsSb; - printDiagnosticArg(capsSb, addedAtoms); - String missingCapsStr = capsSb.toString(); - if (!m_diagnosedCapabilityUses[entryPoint].add(missingCapsStr)) - continue; - - StringBuilder entryPointSb; - printDiagnosticArg(entryPointSb, entryPoint); - - maybeDiagnoseWarningOrError( - m_sink, - optionSet, - DiagnosticCategory::Capability, - Diagnostics::ProfileImplicitlyUpgraded{ - .entryPoint = entryPointSb.toString(), - .profile = optionSet.getProfile().getName(), - .capabilities = missingCapsStr, - .location = entryPoint->sourceLoc, - }, - Diagnostics::ProfileImplicitlyUpgradedRestrictive{ - .entryPoint = entryPointSb.toString(), - .profile = optionSet.getProfile().getName(), - .capabilities = missingCapsStr, - .location = entryPoint->sourceLoc, - }); - } - } - static bool isFp16VectorAtomicType(IRType* valueType) { auto vectorType = as(valueType); @@ -4509,7 +4421,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex return true; } - void maybeRequireFp16VectorAtomicCapability(IRInst* atomicInst, IRType* valueType) + void maybeRequireFp16VectorAtomicCapability(IRType* valueType) { if (!isFp16VectorAtomicType(valueType)) return; @@ -4525,7 +4437,6 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex if (elementCount != 2 && elementCount != 4) return; - maybeDiagnoseCapabilityUse(atomicInst, CapabilityName::spvAtomicFloat16VectorNV); ensureExtensionDeclaration(toSlice("SPV_NV_shader_atomic_fp16_vector")); requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV); } @@ -4556,7 +4467,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex SLANG_ASSERT(!isFp16VectorAtomicType(atomicValueType)); break; case SpvOpAtomicExchange: - maybeRequireFp16VectorAtomicCapability(atomicInst, atomicValueType); + maybeRequireFp16VectorAtomicCapability(atomicValueType); break; case SpvOpAtomicFAddEXT: { @@ -4575,7 +4486,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex requireSPIRVCapability(SpvCapabilityAtomicFloat16AddEXT); break; case kIROp_VectorType: - maybeRequireFp16VectorAtomicCapability(atomicInst, atomicValueType); + maybeRequireFp16VectorAtomicCapability(atomicValueType); break; } } @@ -4598,7 +4509,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex requireSPIRVCapability(SpvCapabilityAtomicFloat16MinMaxEXT); break; case kIROp_VectorType: - maybeRequireFp16VectorAtomicCapability(atomicInst, atomicValueType); + maybeRequireFp16VectorAtomicCapability(atomicValueType); break; } } diff --git a/source/slang/slang-ir-spirv-legalize.cpp b/source/slang/slang-ir-spirv-legalize.cpp index 1bff3c21c03..7d76a7918d2 100644 --- a/source/slang/slang-ir-spirv-legalize.cpp +++ b/source/slang/slang-ir-spirv-legalize.cpp @@ -27,6 +27,7 @@ #include "slang-ir.h" #include "slang-legalize-types.h" #include "slang-rich-diagnostics.h" +#include "slang-compiler.h" namespace Slang { @@ -2832,6 +2833,146 @@ static bool hasExplicitInterlockInst(IRFunc* func) return false; } +static IRType* getAtomicOperationValueType(IRInst* inst) +{ + if (inst->getDataType()->getOp() != kIROp_VoidType) + return inst->getDataType(); + + IRBuilder builder(inst); + auto ptrValueType = tryGetPointedToType(&builder, inst->getOperand(0)->getDataType()); + if (auto atomicType = as(ptrValueType)) + return atomicType->getElementType(); + return ptrValueType; +} + +static bool isSupportedSPIRVFp16VectorAtomic(IRInst* inst) +{ + switch (inst->getOp()) + { + case kIROp_AtomicExchange: + case kIROp_AtomicAdd: + case kIROp_AtomicSub: + case kIROp_AtomicMin: + case kIROp_AtomicMax: + break; + default: + return false; + } + + auto valueType = getAtomicOperationValueType(inst); + auto vectorType = as(valueType); + if (!vectorType || vectorType->getElementType()->getOp() != kIROp_HalfType) + return false; + + auto elementCountInst = as(vectorType->getElementCount()); + if (!elementCountInst) + return false; + + auto elementCount = elementCountInst->getValue(); + return elementCount == 2 || elementCount == 4; +} + +static bool shouldDiagnoseImplicitCapabilityUse(CompilerOptionSet& optionSet) +{ + if (optionSet.getBoolOption(CompilerOptionName::IgnoreCapabilities)) + return false; + + return optionSet.getBoolOption(CompilerOptionName::RestrictiveCapabilityCheck) || + isSpecificProfileOrCapabilityRequested(optionSet); +} + +static void diagnoseCapabilityUseForEntryPoint( + SPIRVEmitSharedContext* context, + IRFunc* entryPoint, + CapabilityName capabilityName, + Dictionary>& diagnosedCapabilityUses) +{ + auto entryPointDecor = entryPoint->findDecoration(); + if (!entryPointDecor) + return; + + CapabilitySet stageTargetCaps = context->m_targetProgram->getTargetReq()->getTargetCaps(); + CapabilitySet stageCapabilitySet = entryPointDecor->getProfile().getCapabilityName(); + CapabilitySet required(capabilityName); + stageTargetCaps.join(stageCapabilitySet); + required.join(stageCapabilitySet); + + if (stageTargetCaps.atLeastOneSetImpliedInOther(required) == + CapabilitySet::ImpliesReturnFlags::Implied) + return; + + CapabilityAtomSet addedAtoms{}; + if (auto stageCapSet = stageTargetCaps.getAtomSets()) + { + if (auto requiredSet = required.getAtomSets()) + CapabilityAtomSet::calcSubtract(addedAtoms, (*requiredSet), (*stageCapSet)); + } + + StringBuilder capsSb; + printDiagnosticArg(capsSb, addedAtoms); + String missingCapsStr = capsSb.toString(); + if (!diagnosedCapabilityUses[entryPoint].add(missingCapsStr)) + return; + + StringBuilder entryPointSb; + printDiagnosticArg(entryPointSb, entryPoint); + + auto& optionSet = context->m_targetProgram->getOptionSet(); + maybeDiagnoseWarningOrError( + context->m_sink, + optionSet, + DiagnosticCategory::Capability, + Diagnostics::ProfileImplicitlyUpgraded{ + .entryPoint = entryPointSb.toString(), + .profile = optionSet.getProfile().getName(), + .capabilities = missingCapsStr, + .location = entryPoint->sourceLoc, + }, + Diagnostics::ProfileImplicitlyUpgradedRestrictive{ + .entryPoint = entryPointSb.toString(), + .profile = optionSet.getProfile().getName(), + .capabilities = missingCapsStr, + .location = entryPoint->sourceLoc, + }); +} + +static void diagnoseSPIRVAtomicCapabilityUses(SPIRVEmitSharedContext* context, IRModule* module) +{ + auto& optionSet = context->m_targetProgram->getOptionSet(); + if (!shouldDiagnoseImplicitCapabilityUse(optionSet)) + return; + + Dictionary> diagnosedCapabilityUses; + for (auto globalInst : module->getGlobalInsts()) + { + auto func = as(globalInst); + if (!func) + continue; + + auto entryPoints = context->m_referencingEntryPoints.tryGetValue(func); + if (!entryPoints) + continue; + + for (auto block : func->getBlocks()) + { + for (auto childInst : block->getChildren()) + { + if (!isSupportedSPIRVFp16VectorAtomic(childInst)) + continue; + + for (auto entryPoint : *entryPoints) + { + diagnoseCapabilityUseForEntryPoint( + context, + entryPoint, + CapabilityName::spvAtomicFloat16VectorNV, + diagnosedCapabilityUses); + } + } + } + } +} + void insertFragmentShaderInterlock(SPIRVEmitSharedContext* context, IRModule* module) { HashSet fragmentShaders; @@ -2994,6 +3135,7 @@ void legalizeIRForSPIRV( eliminateDeadCode(module); buildEntryPointReferenceGraph(context->m_referencingEntryPoints, module); + diagnoseSPIRVAtomicCapabilityUses(context, module); insertFragmentShaderInterlock(context, module); } diff --git a/source/slang/slang-type-layout.cpp b/source/slang/slang-type-layout.cpp index 7bebfbbf3cf..34eb2c9b9bd 100644 --- a/source/slang/slang-type-layout.cpp +++ b/source/slang/slang-type-layout.cpp @@ -3499,21 +3499,7 @@ static void maybePromoteDescriptorHandleCapability(TargetRequest* targetReq) return; auto& targetOptionSet = targetReq->getOptionSet(); - bool specificProfileRequested = - targetOptionSet.hasOption(CompilerOptionName::Profile) && - (targetOptionSet.getIntOption(CompilerOptionName::Profile) != SLANG_PROFILE_UNKNOWN); - bool specificCapabilityRequested = false; - for (auto atomVal : targetOptionSet.getArray(CompilerOptionName::Capability)) - { - if ((atomVal.kind == CompilerOptionValueKind::Int && - atomVal.intValue != SLANG_CAPABILITY_UNKNOWN) || - atomVal.kind == CompilerOptionValueKind::String) - { - specificCapabilityRequested = true; - break; - } - } - if (!specificProfileRequested && !specificCapabilityRequested) + if (!isSpecificProfileOrCapabilityRequested(targetOptionSet)) { auto targetCaps = targetReq->getTargetCaps(); targetCaps.addUnexpandedCapabilites(CapabilityName::descriptor_handle); From a46301a8b5a8541986bd9b5bd0a3a3a1de855ec9 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 11:44:07 -0700 Subject: [PATCH 62/80] Fix SPIR-V legalize include order --- source/slang/slang-ir-spirv-legalize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/slang/slang-ir-spirv-legalize.cpp b/source/slang/slang-ir-spirv-legalize.cpp index 7d76a7918d2..5d20c21e351 100644 --- a/source/slang/slang-ir-spirv-legalize.cpp +++ b/source/slang/slang-ir-spirv-legalize.cpp @@ -1,6 +1,7 @@ // slang-ir-spirv-legalize.cpp #include "slang-ir-spirv-legalize.h" +#include "slang-compiler.h" #include "slang-emit-base.h" #include "slang-ir-call-graph.h" #include "slang-ir-clone.h" @@ -27,7 +28,6 @@ #include "slang-ir.h" #include "slang-legalize-types.h" #include "slang-rich-diagnostics.h" -#include "slang-compiler.h" namespace Slang { From 0dcb1ca8c3b0fcc567435c856f07c46b7104081c Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 12:03:09 -0700 Subject: [PATCH 63/80] Address fp16 atomic review nits --- source/slang/slang-ir-spirv-legalize.cpp | 5 +++-- source/slang/slang-ir-validate.cpp | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/source/slang/slang-ir-spirv-legalize.cpp b/source/slang/slang-ir-spirv-legalize.cpp index 5d20c21e351..7688379109d 100644 --- a/source/slang/slang-ir-spirv-legalize.cpp +++ b/source/slang/slang-ir-spirv-legalize.cpp @@ -2918,19 +2918,20 @@ static void diagnoseCapabilityUseForEntryPoint( printDiagnosticArg(entryPointSb, entryPoint); auto& optionSet = context->m_targetProgram->getOptionSet(); + auto entryPointProfileName = entryPointDecor->getProfile().getName(); maybeDiagnoseWarningOrError( context->m_sink, optionSet, DiagnosticCategory::Capability, Diagnostics::ProfileImplicitlyUpgraded{ .entryPoint = entryPointSb.toString(), - .profile = optionSet.getProfile().getName(), + .profile = entryPointProfileName, .capabilities = missingCapsStr, .location = entryPoint->sourceLoc, }, Diagnostics::ProfileImplicitlyUpgradedRestrictive{ .entryPoint = entryPointSb.toString(), - .profile = optionSet.getProfile().getName(), + .profile = entryPointProfileName, .capabilities = missingCapsStr, .location = entryPoint->sourceLoc, }); diff --git a/source/slang/slang-ir-validate.cpp b/source/slang/slang-ir-validate.cpp index a216ab37119..aacf0d4f498 100644 --- a/source/slang/slang-ir-validate.cpp +++ b/source/slang/slang-ir-validate.cpp @@ -588,6 +588,8 @@ static void validateSPIRVFp16VectorAtomicOperation(DiagnosticSink* sink, IRInst* return; default: + sink->diagnose( + Diagnostics::SpirvFp16VectorAtomicUnsupportedOperation{.location = inst->sourceLoc}); return; } } From 3ecda6d70e6638be3f0c5e11b6dac6c820066d4f Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 12:40:01 -0700 Subject: [PATCH 64/80] Document fp16 vector atomic sub support --- docs/user-guide/a2-01-spirv-target-specific.md | 2 +- docs/user-guide/a3-02-reference-capability-atoms.md | 4 ++-- source/slang/slang-capabilities.capdef | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/user-guide/a2-01-spirv-target-specific.md b/docs/user-guide/a2-01-spirv-target-specific.md index d92a881ffad..a3d1b7f604e 100644 --- a/docs/user-guide/a2-01-spirv-target-specific.md +++ b/docs/user-guide/a2-01-spirv-target-specific.md @@ -171,7 +171,7 @@ GLSL 4.6 with [GLSL_EXT_shader_atomic_float2](https://github.com/KhronosGroup/GL SPIR-V 1.5 with [SPV_EXT_shader_atomic_float_add](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/EXT/SPV_EXT_shader_atomic_float_add.asciidoc) and [SPV_EXT_shader_atomic_float_min_max](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/EXT/SPV_EXT_shader_atomic_float_min_max.asciidoc) can use atomic operations for 32-bit float type and 64-bit float type. SPIR-V 1.5 with [SPV_EXT_shader_atomic_float16_add](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/EXT/SPV_EXT_shader_atomic_float16_add.asciidoc) can use atomic operations for 16-bit float type. -SPIR-V 1.5 with [SPV_NV_shader_atomic_fp16_vector](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/NV/SPV_NV_shader_atomic_fp16_vector.asciidoc) can use vector atomic add/min/max/exchange operations for 16-bit float vector types with 2 or 4 components. +SPIR-V 1.5 with [SPV_NV_shader_atomic_fp16_vector](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/NV/SPV_NV_shader_atomic_fp16_vector.asciidoc) can use vector atomic add/sub/min/max/exchange operations for 16-bit float vector types with 2 or 4 components. | | 32-bit integer | 64-bit integer | 32-bit float | 64-bit float | 16-bit float | 16-bit float vector | | ------ | -------------- | --------------- | --------------------- | ---------------- | ---------------- | ----------------------- | diff --git a/docs/user-guide/a3-02-reference-capability-atoms.md b/docs/user-guide/a3-02-reference-capability-atoms.md index 81e1e7c5df2..ea1f56c50e6 100644 --- a/docs/user-guide/a3-02-reference-capability-atoms.md +++ b/docs/user-guide/a3-02-reference-capability-atoms.md @@ -702,7 +702,7 @@ Extensions > Represents the SPIR-V extension for ray tracing motion blur. `SPV_NV_shader_atomic_fp16_vector` -> Represents the SPIR-V extension for vector atomic float 16 add/min/max/exchange operations. +> Represents the SPIR-V extension for vector atomic float 16 add/sub/min/max/exchange operations. `SPV_NV_shader_image_footprint` > Represents the SPIR-V extension for shader image footprint. @@ -727,7 +727,7 @@ Extensions > Represents the SPIR-V capability for atomic float 16 min/max operations. `spvAtomicFloat16VectorNV` -> Represents the SPIR-V capability for vector atomic float 16 add/min/max/exchange operations. +> Represents the SPIR-V capability for vector atomic float 16 add/sub/min/max/exchange operations. > Implies scalar atomic float 16 add support. `spvAtomicFloat32AddEXT` diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index 12e4d8f0208..1ca4414890c 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -543,7 +543,7 @@ def SPV_EXT_shader_atomic_float_add : _spirv_1_0; /// [EXT] def SPV_EXT_shader_atomic_float16_add : SPV_EXT_shader_atomic_float_add; -/// Represents the SPIR-V extension for vector atomic float 16 add/min/max/exchange operations. +/// Represents the SPIR-V extension for vector atomic float 16 add/sub/min/max/exchange operations. /// [EXT] def SPV_NV_shader_atomic_fp16_vector : _spirv_1_0; @@ -704,7 +704,7 @@ def spvAtomicFloat32AddEXT : SPV_EXT_shader_atomic_float_add; /// [EXT] def spvAtomicFloat16AddEXT : SPV_EXT_shader_atomic_float16_add; -/// Represents the SPIR-V capability for vector atomic float 16 add/min/max/exchange operations. +/// Represents the SPIR-V capability for vector atomic float 16 add/sub/min/max/exchange operations. /// Implies scalar atomic float 16 add support. /// [EXT] def spvAtomicFloat16VectorNV : SPV_NV_shader_atomic_fp16_vector + spvAtomicFloat16AddEXT; From 8fda506562bda4c9a51a53221b50ed07c6a60763 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 12:50:12 -0700 Subject: [PATCH 65/80] Cover pointer fp16 vector atomics --- source/slang/slang-emit-spirv.cpp | 1 + ...byte-address-half-atomics-capability.slang | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 87db026c409..6870a53180a 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -7608,6 +7608,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex SpvInst* emitParam(SpvInstParent* parent, IRInst* inst) { + requireVariableBufferCapabilityIfNeeded(inst->getDataType()); auto paramSpvInst = emitOpFunctionParameter(parent, inst, inst->getFullType()); maybeEmitName(paramSpvInst, inst); maybeEmitPointerDecoration(paramSpvInst, inst); diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index c74d383119c..d5272cb2ee7 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -25,6 +25,8 @@ //DIAGNOSTIC_TEST:SIMPLE(diag=BUFFER_EMULATED_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability cuda_hlsl_spirv+sm_5_0+spvAtomicFloat16AddEXT -DTEST_BUFFER_EMULATED_NO_VECTOR //DIAGNOSTIC_TEST:SIMPLE(diag=POINTER_EMULATED_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_POINTER_EMULATED //DIAGNOSTIC_TEST:SIMPLE(diag=POINTER_F16X2_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_POINTER_F16X2 +//TEST:SIMPLE(filecheck=POINTER_EMULATED): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -skip-spirv-validation -capability spvAtomicFloat16VectorNV -DTEST_POINTER_EMULATED +//TEST:SIMPLE(filecheck=POINTER_F16X2): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_POINTER_F16X2 // Positive codegen checks for ignored capabilities, scalar/vector SPIR-V, and CUDA. //TEST:SIMPLE(filecheck=IGNORE_CAPS): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -ignore-capabilities -capability spirv_1_5 @@ -303,6 +305,26 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // EMULATED: OpCompositeExtract %half [[HIGH_ATOMIC]] 1 // EMULATED-NOT: OpAtomicFAddEXT +// POINTER_EMULATED: OpCapability VariablePointersStorageBuffer +// POINTER_EMULATED: OpExtension "SPV_KHR_variable_pointers" +// POINTER_EMULATED: OpCapability AtomicFloat16VectorNV +// POINTER_EMULATED: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// POINTER_EMULATED-DAG: [[LOW:%[0-9]+]] = OpConstantComposite %v2half %half_0x1p_0 %half_0x0p_0 +// POINTER_EMULATED-DAG: [[HIGH:%[0-9]+]] = OpConstantComposite %v2half %half_0x0p_0 %half_0x1p_0 +// POINTER_EMULATED: [[LOW_ATOMIC:%[0-9]+]] = OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[LOW]] +// POINTER_EMULATED: OpCompositeExtract %half [[LOW_ATOMIC]] 0 +// POINTER_EMULATED: [[HIGH_ATOMIC:%[0-9]+]] = OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[HIGH]] +// POINTER_EMULATED: OpCompositeExtract %half [[HIGH_ATOMIC]] 1 +// POINTER_EMULATED-NOT: OpAtomicFAddEXT + +// POINTER_F16X2: OpCapability VariablePointersStorageBuffer +// POINTER_F16X2: OpExtension "SPV_KHR_variable_pointers" +// POINTER_F16X2: OpCapability AtomicFloat16VectorNV +// POINTER_F16X2: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// POINTER_F16X2-DAG: [[VALUE:%[0-9]+]] = OpConstantComposite %v2half %half_0x1p_0 %half_0x1p_1 +// POINTER_F16X2: OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[VALUE]] +// POINTER_F16X2-NOT: OpAtomicFAddEXT + // CUDA-NOT: atomicAdd((half2 * // CUDA-NOT: atomicAdd((__half2 * // CUDA-COUNT-5: atomicAdd((&{{.*}}), __half(1.0)) From 471c576ba34279ddc96490025e2a57ea34761cdb Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 13:30:52 -0700 Subject: [PATCH 66/80] Fix fp16 pointer atomic filecheck order --- .../byte-address-half-atomics-capability.slang | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index d5272cb2ee7..66a418062ae 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -305,10 +305,10 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // EMULATED: OpCompositeExtract %half [[HIGH_ATOMIC]] 1 // EMULATED-NOT: OpAtomicFAddEXT -// POINTER_EMULATED: OpCapability VariablePointersStorageBuffer -// POINTER_EMULATED: OpExtension "SPV_KHR_variable_pointers" -// POINTER_EMULATED: OpCapability AtomicFloat16VectorNV -// POINTER_EMULATED: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// POINTER_EMULATED-DAG: OpCapability VariablePointersStorageBuffer +// POINTER_EMULATED-DAG: OpCapability AtomicFloat16VectorNV +// POINTER_EMULATED-DAG: OpExtension "SPV_KHR_variable_pointers" +// POINTER_EMULATED-DAG: OpExtension "SPV_NV_shader_atomic_fp16_vector" // POINTER_EMULATED-DAG: [[LOW:%[0-9]+]] = OpConstantComposite %v2half %half_0x1p_0 %half_0x0p_0 // POINTER_EMULATED-DAG: [[HIGH:%[0-9]+]] = OpConstantComposite %v2half %half_0x0p_0 %half_0x1p_0 // POINTER_EMULATED: [[LOW_ATOMIC:%[0-9]+]] = OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[LOW]] @@ -317,10 +317,10 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // POINTER_EMULATED: OpCompositeExtract %half [[HIGH_ATOMIC]] 1 // POINTER_EMULATED-NOT: OpAtomicFAddEXT -// POINTER_F16X2: OpCapability VariablePointersStorageBuffer -// POINTER_F16X2: OpExtension "SPV_KHR_variable_pointers" -// POINTER_F16X2: OpCapability AtomicFloat16VectorNV -// POINTER_F16X2: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// POINTER_F16X2-DAG: OpCapability VariablePointersStorageBuffer +// POINTER_F16X2-DAG: OpCapability AtomicFloat16VectorNV +// POINTER_F16X2-DAG: OpExtension "SPV_KHR_variable_pointers" +// POINTER_F16X2-DAG: OpExtension "SPV_NV_shader_atomic_fp16_vector" // POINTER_F16X2-DAG: [[VALUE:%[0-9]+]] = OpConstantComposite %v2half %half_0x1p_0 %half_0x1p_1 // POINTER_F16X2: OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[VALUE]] // POINTER_F16X2-NOT: OpAtomicFAddEXT From efdd34a327e4ec3deb9d18000bd1fba6d356c6ba Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 14:03:28 -0700 Subject: [PATCH 67/80] Cover fp16 vector atomic sub --- source/slang/slang-emit-spirv.cpp | 1 + .../byte-address-half-atomics-capability.slang | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 6870a53180a..b4fe63c872b 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -5503,6 +5503,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex { builder.setInsertBefore(inst); auto negatedOperand = builder.emitNeg(inst->getDataType(), operand); + emitLocalInst(parent, negatedOperand); operand = negatedOperand; } result = emitOpAtomicOp( diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 66a418062ae..1bf2183769e 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -12,6 +12,7 @@ //DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME_DIRECT):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16VectorNV -output-using-type -xslang -DTEST_RUNTIME_DIRECT //DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME_DIRECT4):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16VectorNV -output-using-type -xslang -DTEST_RUNTIME_DIRECT_VECTOR4 //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_ATOMIC +//DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_SUB_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_SUB //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_VECTOR4_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR4_ATOMIC //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_MIN_MAX_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_MIN_MAX //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_EXCHANGE_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_EXCHANGE @@ -32,6 +33,7 @@ //TEST:SIMPLE(filecheck=IGNORE_CAPS): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -ignore-capabilities -capability spirv_1_5 //TEST:SIMPLE(filecheck=BOTH): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT -capability spvAtomicFloat16VectorNV //TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_ATOMIC +//TEST:SIMPLE(filecheck=VECTOR_SUB): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_SUB //TEST:SIMPLE(filecheck=VECTOR4): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR4_ATOMIC //TEST:SIMPLE(filecheck=VECTOR_MIN_MAX): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_MIN_MAX //TEST:SIMPLE(filecheck=VECTOR_EXCHANGE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_EXCHANGE @@ -146,6 +148,10 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) __atomic_add(vectorBuffer[0], half2(1.0h, 2.0h)); // DIRECT_NO_VECTOR: entry point uses capabilities not in specified profile // DIRECT_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' +#elif defined(TEST_DIRECT_VECTOR_SUB) + __atomic_sub(vectorBuffer[0], half2(1.0h, 2.0h)); +// DIRECT_SUB_NO_VECTOR: entry point uses capabilities not in specified profile +// DIRECT_SUB_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' #elif defined(TEST_DIRECT_VECTOR4_ATOMIC) __atomic_add(vector4Buffer[0], half4(1.0h, 2.0h, 3.0h, 4.0h)); // DIRECT_VECTOR4_NO_VECTOR: entry point uses capabilities not in specified profile @@ -178,6 +184,9 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // UNSUPPORTED_VECTOR_COMPARE_EXCHANGE: error[E50014]: invalid SPIR-V fp16 vector atomic operation // UNSUPPORTED_VECTOR_COMPARE_EXCHANGE: SPIR-V fp16 vector atomics only support add, sub, min, max, and exchange operations. outputBuffer[0] = float(originalValue.x + originalValue.y); +// Half-vector atomic load/store are not source-expressible today: Atomic requires +// scalar IAtomicable types, while half2/half4 do not conform to that interface. The +// validator rejects those IR ops defensively if a later lowering path creates them. #elif defined(TEST_BUFFER_EMULATED_NO_VECTOR) half originalValue; tmpBuffer.InterlockedAddF16Emulated(0, 1.0h, originalValue); @@ -255,6 +264,14 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // VECTOR: OpExtension "SPV_NV_shader_atomic_fp16_vector" // VECTOR: OpAtomicFAddEXT %v2half +// VECTOR_SUB-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR_SUB-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// VECTOR_SUB: OpCapability AtomicFloat16VectorNV +// VECTOR_SUB: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// VECTOR_SUB-DAG: [[VALUE:%[0-9]+]] = OpConstantComposite %v2half {{%[0-9]+}} {{%[0-9]+}} +// VECTOR_SUB: [[NEG_VALUE:%[0-9]+]] = OpFNegate %v2half [[VALUE]] +// VECTOR_SUB: OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[NEG_VALUE]] + // VECTOR4-NOT: OpCapability AtomicFloat16AddEXT // VECTOR4-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" // VECTOR4: OpCapability AtomicFloat16VectorNV From 1867c385c0c4a71eb0fe0d170ef387e6ecb242b4 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 14:18:11 -0700 Subject: [PATCH 68/80] Tighten pointer atomic checks --- .../byte-address-half-atomics-capability.slang | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 1bf2183769e..98455a160e5 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -326,6 +326,8 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // POINTER_EMULATED-DAG: OpCapability AtomicFloat16VectorNV // POINTER_EMULATED-DAG: OpExtension "SPV_KHR_variable_pointers" // POINTER_EMULATED-DAG: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// POINTER_EMULATED-NOT: OpCapability AtomicFloat16AddEXT +// POINTER_EMULATED-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" // POINTER_EMULATED-DAG: [[LOW:%[0-9]+]] = OpConstantComposite %v2half %half_0x1p_0 %half_0x0p_0 // POINTER_EMULATED-DAG: [[HIGH:%[0-9]+]] = OpConstantComposite %v2half %half_0x0p_0 %half_0x1p_0 // POINTER_EMULATED: [[LOW_ATOMIC:%[0-9]+]] = OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[LOW]] @@ -338,6 +340,8 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // POINTER_F16X2-DAG: OpCapability AtomicFloat16VectorNV // POINTER_F16X2-DAG: OpExtension "SPV_KHR_variable_pointers" // POINTER_F16X2-DAG: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// POINTER_F16X2-NOT: OpCapability AtomicFloat16AddEXT +// POINTER_F16X2-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" // POINTER_F16X2-DAG: [[VALUE:%[0-9]+]] = OpConstantComposite %v2half %half_0x1p_0 %half_0x1p_1 // POINTER_F16X2: OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[VALUE]] // POINTER_F16X2-NOT: OpAtomicFAddEXT From 0475083d139727a22c9262f351ec0a176a08cf33 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 14:42:19 -0700 Subject: [PATCH 69/80] Address fp16 atomic review gaps --- source/slang/slang-ir-spirv-legalize.cpp | 5 ++- ...byte-address-half-atomics-capability.slang | 43 ++++++++++++++++++- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/source/slang/slang-ir-spirv-legalize.cpp b/source/slang/slang-ir-spirv-legalize.cpp index 7688379109d..c85a1b6c539 100644 --- a/source/slang/slang-ir-spirv-legalize.cpp +++ b/source/slang/slang-ir-spirv-legalize.cpp @@ -2835,8 +2835,9 @@ static bool hasExplicitInterlockInst(IRFunc* func) static IRType* getAtomicOperationValueType(IRInst* inst) { - if (inst->getDataType()->getOp() != kIROp_VoidType) - return inst->getDataType(); + auto valueType = inst->getDataType(); + if (valueType && valueType->getOp() != kIROp_VoidType) + return valueType; IRBuilder builder(inst); auto ptrValueType = tryGetPointedToType(&builder, inst->getOperand(0)->getDataType()); diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 98455a160e5..74f2bfd18a1 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -26,6 +26,7 @@ //DIAGNOSTIC_TEST:SIMPLE(diag=BUFFER_EMULATED_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability cuda_hlsl_spirv+sm_5_0+spvAtomicFloat16AddEXT -DTEST_BUFFER_EMULATED_NO_VECTOR //DIAGNOSTIC_TEST:SIMPLE(diag=POINTER_EMULATED_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_POINTER_EMULATED //DIAGNOSTIC_TEST:SIMPLE(diag=POINTER_F16X2_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_POINTER_F16X2 +//DIAGNOSTIC_TEST:SIMPLE(diag=MULTI_ENTRY_NO_VECTOR,non-exhaustive): -target spirv -entry computeMainA -entry computeMainB -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_MULTI_ENTRY_HELPER_NO_VECTOR //TEST:SIMPLE(filecheck=POINTER_EMULATED): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -skip-spirv-validation -capability spvAtomicFloat16VectorNV -DTEST_POINTER_EMULATED //TEST:SIMPLE(filecheck=POINTER_F16X2): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_POINTER_F16X2 @@ -58,6 +59,37 @@ RWStructuredBuffer unsupportedVectorBuffer; //TEST_INPUT:ubuffer(stride=4, count=1):name=uintBuffer RWStructuredBuffer uintBuffer; +#ifdef TEST_MULTI_ENTRY_HELPER_NO_VECTOR +void sharedVectorAtomic() +{ + __atomic_add(vectorBuffer[0], half2(1.0h, 2.0h)); +} + +void callSharedVectorAtomic() +{ + sharedVectorAtomic(); +} + +[shader("compute")] +[numthreads(1, 1, 1)] +void computeMainA(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + sharedVectorAtomic(); + callSharedVectorAtomic(); +// MULTI_ENTRY_NO_VECTOR: entry point 'computeMainA' uses capabilities +// MULTI_ENTRY_NO_VECTOR-SAME: Missing capabilities are: 'spvAtomicFloat16VectorNV' +// MULTI_ENTRY_NO_VECTOR: entry point 'computeMainB' uses capabilities +// MULTI_ENTRY_NO_VECTOR-SAME: Missing capabilities are: 'spvAtomicFloat16VectorNV' +// MULTI_ENTRY_NO_VECTOR-NOT: entry point ' +} + +[shader("compute")] +[numthreads(1, 1, 1)] +void computeMainB(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + callSharedVectorAtomic(); +} +#else [numthreads(1, 1, 1)] void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { @@ -230,6 +262,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) outputBuffer[4] = float(originalValue); #endif } +#endif // SCALAR-NOT: OpCapability AtomicFloat16VectorNV // SCALAR-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" @@ -246,7 +279,13 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // VECTOR_F16: OpCapability AtomicFloat16VectorNV // VECTOR_F16: OpExtension "SPV_NV_shader_atomic_fp16_vector" // VECTOR_F16-NOT: OpAtomicFAddEXT %half -// VECTOR_F16-COUNT-6: OpAtomicFAddEXT %v2half +// VECTOR_F16-DAG: [[LOW:%[0-9]+]] = OpConstantComposite %v2half %half_0x1p_0 %half_0x0p_0 +// VECTOR_F16-DAG: [[HIGH:%[0-9]+]] = OpConstantComposite %v2half %half_0x0p_0 %half_0x1p_0 +// VECTOR_F16: [[LOW_ATOMIC:%[0-9]+]] = OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[LOW]] +// VECTOR_F16: OpCompositeExtract %half [[LOW_ATOMIC]] 0 +// VECTOR_F16: [[HIGH_ATOMIC:%[0-9]+]] = OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[HIGH]] +// VECTOR_F16: OpCompositeExtract %half [[HIGH_ATOMIC]] 1 +// VECTOR_F16-COUNT-4: OpAtomicFAddEXT %v2half // VECTOR_F16-NOT: OpAtomicFAddEXT %half // BOTH-NOT: OpCapability AtomicFloat16AddEXT @@ -268,7 +307,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // VECTOR_SUB-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" // VECTOR_SUB: OpCapability AtomicFloat16VectorNV // VECTOR_SUB: OpExtension "SPV_NV_shader_atomic_fp16_vector" -// VECTOR_SUB-DAG: [[VALUE:%[0-9]+]] = OpConstantComposite %v2half {{%[0-9]+}} {{%[0-9]+}} +// VECTOR_SUB-DAG: [[VALUE:%[0-9]+]] = OpConstantComposite %v2half {{%[A-Za-z0-9_]+}} {{%[A-Za-z0-9_]+}} // VECTOR_SUB: [[NEG_VALUE:%[0-9]+]] = OpFNegate %v2half [[VALUE]] // VECTOR_SUB: OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[NEG_VALUE]] From e48354859a714af32b4dd7de81583b49ced80a9f Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 15:26:26 -0700 Subject: [PATCH 70/80] Avoid unnecessary groupshared variable pointers --- source/slang/slang-emit-spirv.cpp | 8 ++++++-- tests/language-feature/pointer/ptr-to-groupshared.slang | 3 +++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index b4fe63c872b..9cb43f576d5 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -11287,8 +11287,12 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex requireSPIRVCapability(SpvCapabilityVariablePointersStorageBuffer); break; case AddressSpace::GroupShared: - ensureExtensionDeclaration(UnownedStringSlice("SPV_KHR_variable_pointers")); - requireSPIRVCapability(SpvCapabilityVariablePointers); + if (m_targetRequest->getTargetCaps().implies( + CapabilityAtom::SPV_KHR_variable_pointers)) + { + ensureExtensionDeclaration(UnownedStringSlice("SPV_KHR_variable_pointers")); + requireSPIRVCapability(SpvCapabilityVariablePointers); + } break; } } diff --git a/tests/language-feature/pointer/ptr-to-groupshared.slang b/tests/language-feature/pointer/ptr-to-groupshared.slang index c4b6ae4ac8b..c18551c4df7 100644 --- a/tests/language-feature/pointer/ptr-to-groupshared.slang +++ b/tests/language-feature/pointer/ptr-to-groupshared.slang @@ -1,4 +1,5 @@ //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type -emit-spirv-directly -Xslang -g0 +//TEST:SIMPLE(filecheck=SPIRV): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -g0 // By default slang-test uses `-g` and it requires `VariablePointers`, which // doesn't produce the correct result due to the bug on the graphics driver. @@ -10,6 +11,8 @@ // CHECK: 1 // CHECK-NEXT: 2 // CHECK-NEXT: 0 +// SPIRV-NOT: OpCapability VariablePointers +// SPIRV: OpMemoryModel struct Data { From b31e1b893ec700052fa779fdb54b03b53f8886f1 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 15:53:01 -0700 Subject: [PATCH 71/80] Share fp16 atomic value type helper --- source/slang/slang-ir-spirv-legalize.cpp | 13 ------------- source/slang/slang-ir-util.cpp | 13 +++++++++++++ source/slang/slang-ir-util.h | 3 +++ source/slang/slang-ir-validate.cpp | 13 ------------- .../pointer/ptr-to-groupshared.slang | 5 +++++ 5 files changed, 21 insertions(+), 26 deletions(-) diff --git a/source/slang/slang-ir-spirv-legalize.cpp b/source/slang/slang-ir-spirv-legalize.cpp index c85a1b6c539..c76ee6ff965 100644 --- a/source/slang/slang-ir-spirv-legalize.cpp +++ b/source/slang/slang-ir-spirv-legalize.cpp @@ -2833,19 +2833,6 @@ static bool hasExplicitInterlockInst(IRFunc* func) return false; } -static IRType* getAtomicOperationValueType(IRInst* inst) -{ - auto valueType = inst->getDataType(); - if (valueType && valueType->getOp() != kIROp_VoidType) - return valueType; - - IRBuilder builder(inst); - auto ptrValueType = tryGetPointedToType(&builder, inst->getOperand(0)->getDataType()); - if (auto atomicType = as(ptrValueType)) - return atomicType->getElementType(); - return ptrValueType; -} - static bool isSupportedSPIRVFp16VectorAtomic(IRInst* inst) { switch (inst->getOp()) diff --git a/source/slang/slang-ir-util.cpp b/source/slang/slang-ir-util.cpp index 0a9514b9ab7..aa6882d464d 100644 --- a/source/slang/slang-ir-util.cpp +++ b/source/slang/slang-ir-util.cpp @@ -68,6 +68,19 @@ IRType* getMatrixElementType(IRType* type) return type; } +IRType* getAtomicOperationValueType(IRInst* inst) +{ + auto valueType = inst->getDataType(); + if (valueType && valueType->getOp() != kIROp_VoidType) + return valueType; + + IRBuilder builder(inst); + auto ptrValueType = tryGetPointedToType(&builder, inst->getOperand(0)->getDataType()); + if (auto atomicType = as(ptrValueType)) + return atomicType->getElementType(); + return ptrValueType; +} + Dictionary buildInterfaceRequirementDict(IRInterfaceType* interfaceType) { Dictionary result; diff --git a/source/slang/slang-ir-util.h b/source/slang/slang-ir-util.h index 5ddc8eb19dc..a3e9a349852 100644 --- a/source/slang/slang-ir-util.h +++ b/source/slang/slang-ir-util.h @@ -137,6 +137,9 @@ IRType* getVectorOrCoopMatrixElementType(IRType* type); // If `type` is a matrix, returns its element type. Otherwise, return `type`. IRType* getMatrixElementType(IRType* type); +// Returns the value type operated on by an atomic instruction. +IRType* getAtomicOperationValueType(IRInst* inst); + // True if type is a resource backing memory bool isResourceType(IRType* type); bool isOpaqueType(IRType* type, IRType** outLeafOpaqueHandleType); diff --git a/source/slang/slang-ir-validate.cpp b/source/slang/slang-ir-validate.cpp index aacf0d4f498..d9139900161 100644 --- a/source/slang/slang-ir-validate.cpp +++ b/source/slang/slang-ir-validate.cpp @@ -526,19 +526,6 @@ static bool isValidAtomicDest(bool skipFuncParamValidation, IRInst* dst) return false; } -static IRType* getAtomicOperationValueType(IRInst* inst) -{ - auto valueType = inst->getDataType(); - if (valueType && valueType->getOp() != kIROp_VoidType) - return valueType; - - IRBuilder builder(inst); - auto ptrValueType = tryGetPointedToType(&builder, inst->getOperand(0)->getDataType()); - if (auto atomicType = as(ptrValueType)) - return atomicType->getElementType(); - return ptrValueType; -} - static IRVectorType* getFp16VectorAtomicType(IRInst* inst) { auto valueType = getAtomicOperationValueType(inst); diff --git a/tests/language-feature/pointer/ptr-to-groupshared.slang b/tests/language-feature/pointer/ptr-to-groupshared.slang index c18551c4df7..ec94a37d10f 100644 --- a/tests/language-feature/pointer/ptr-to-groupshared.slang +++ b/tests/language-feature/pointer/ptr-to-groupshared.slang @@ -1,5 +1,6 @@ //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type -emit-spirv-directly -Xslang -g0 //TEST:SIMPLE(filecheck=SPIRV): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -g0 +//TEST:SIMPLE(filecheck=SPIRV_VARPTR): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -g0 -capability SPV_KHR_variable_pointers // By default slang-test uses `-g` and it requires `VariablePointers`, which // doesn't produce the correct result due to the bug on the graphics driver. @@ -13,6 +14,10 @@ // CHECK-NEXT: 0 // SPIRV-NOT: OpCapability VariablePointers // SPIRV: OpMemoryModel +// SPIRV-NOT: OpTypeFunction %void %_ptr_Workgroup +// SPIRV_VARPTR: OpCapability VariablePointers +// SPIRV_VARPTR: OpExtension "SPV_KHR_variable_pointers" +// SPIRV_VARPTR: OpMemoryModel struct Data { From bbbf8f7ce7b2c8c3e991d281ccac6e343eaa44dc Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 16:02:41 -0700 Subject: [PATCH 72/80] Assert groupshared variable pointer signature --- tests/language-feature/pointer/ptr-to-groupshared.slang | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/language-feature/pointer/ptr-to-groupshared.slang b/tests/language-feature/pointer/ptr-to-groupshared.slang index ec94a37d10f..fab0afecabc 100644 --- a/tests/language-feature/pointer/ptr-to-groupshared.slang +++ b/tests/language-feature/pointer/ptr-to-groupshared.slang @@ -18,6 +18,7 @@ // SPIRV_VARPTR: OpCapability VariablePointers // SPIRV_VARPTR: OpExtension "SPV_KHR_variable_pointers" // SPIRV_VARPTR: OpMemoryModel +// SPIRV_VARPTR: OpTypeFunction %void %_ptr_Workgroup struct Data { From ce702570dc1f201c04b5a44afdd7298dce234b30 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 18:21:28 -0700 Subject: [PATCH 73/80] Emit variable pointer capability for workgroup signatures --- source/slang/slang-emit-spirv.cpp | 48 ++++++++++++++++--- .../pointer/ptr-to-groupshared.slang | 27 +++++++++-- 2 files changed, 66 insertions(+), 9 deletions(-) diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 9cb43f576d5..166694eae3d 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -2768,10 +2768,14 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex // with the result-type operand coming first, // followed by operand sfor all the parameter types. // - return emitOpTypeFunction( - inst, - static_cast(inst)->getResultType(), - static_cast(inst)->getParamTypes()); + { + auto funcType = static_cast(inst); + requireFunctionTypeCapabilitiesIfNeeded(funcType); + return emitOpTypeFunction( + inst, + funcType->getResultType(), + funcType->getParamTypes()); + } case kIROp_RateQualifiedType: { @@ -11290,14 +11294,46 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex if (m_targetRequest->getTargetCaps().implies( CapabilityAtom::SPV_KHR_variable_pointers)) { - ensureExtensionDeclaration(UnownedStringSlice("SPV_KHR_variable_pointers")); - requireSPIRVCapability(SpvCapabilityVariablePointers); + requireSPIRVVariablePointersCapability(); } break; } } } + void requireSPIRVVariablePointersCapability() + { + ensureExtensionDeclaration(UnownedStringSlice("SPV_KHR_variable_pointers")); + requireSPIRVCapability(SpvCapabilityVariablePointers); + } + + bool isWorkgroupPointerType(IRInst* type) + { + if (auto ptrType = as(type)) + { + return ptrType->getAddressSpace() == AddressSpace::GroupShared; + } + return false; + } + + void requireFunctionTypeCapabilitiesIfNeeded(IRFuncType* funcType) + { + if (isWorkgroupPointerType(funcType->getResultType())) + { + requireSPIRVVariablePointersCapability(); + return; + } + + for (UInt pp = 0; pp < funcType->getParamCount(); ++pp) + { + if (isWorkgroupPointerType(funcType->getParamType(pp))) + { + requireSPIRVVariablePointersCapability(); + return; + } + } + } + // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpExecutionMode Dictionary> m_executionModes; template diff --git a/tests/language-feature/pointer/ptr-to-groupshared.slang b/tests/language-feature/pointer/ptr-to-groupshared.slang index fab0afecabc..8b40106b141 100644 --- a/tests/language-feature/pointer/ptr-to-groupshared.slang +++ b/tests/language-feature/pointer/ptr-to-groupshared.slang @@ -1,6 +1,7 @@ //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type -emit-spirv-directly -Xslang -g0 //TEST:SIMPLE(filecheck=SPIRV): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -g0 -//TEST:SIMPLE(filecheck=SPIRV_VARPTR): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -g0 -capability SPV_KHR_variable_pointers +//TEST:SIMPLE(filecheck=SPIRV_VARPTR): -target spirv-asm -entry computeMainNoInline -stage compute -emit-spirv-directly -g0 -capability SPV_KHR_variable_pointers -DTEST_NOINLINE_GROUPSHARED_PTR +//TEST:SIMPLE(filecheck=SPIRV_NOINLINE): -target spirv-asm -entry computeMainNoInline -stage compute -emit-spirv-directly -g0 -DTEST_NOINLINE_GROUPSHARED_PTR // By default slang-test uses `-g` and it requires `VariablePointers`, which // doesn't produce the correct result due to the bug on the graphics driver. @@ -12,13 +13,17 @@ // CHECK: 1 // CHECK-NEXT: 2 // CHECK-NEXT: 0 -// SPIRV-NOT: OpCapability VariablePointers +// SPIRV: OpCapability VariablePointers +// SPIRV: OpExtension "SPV_KHR_variable_pointers" // SPIRV: OpMemoryModel -// SPIRV-NOT: OpTypeFunction %void %_ptr_Workgroup +// SPIRV: OpTypeFunction %void %_ptr_Workgroup // SPIRV_VARPTR: OpCapability VariablePointers // SPIRV_VARPTR: OpExtension "SPV_KHR_variable_pointers" // SPIRV_VARPTR: OpMemoryModel // SPIRV_VARPTR: OpTypeFunction %void %_ptr_Workgroup +// SPIRV_NOINLINE: OpCapability VariablePointers +// SPIRV_NOINLINE: OpExtension "SPV_KHR_variable_pointers" +// SPIRV_NOINLINE: OpTypeFunction %void %_ptr_Workgroup struct Data { @@ -42,3 +47,19 @@ void computeMain(uint3 group_thread_id: SV_GroupThreadID) shared = Data(1, 2); foo(__getAddress(shared)); } + +#ifdef TEST_NOINLINE_GROUPSHARED_PTR +[noinline] +void noinlineFoo(Ptr ptr) +{ + outputBuffer[0] = ptr.value1; + outputBuffer[1] = ptr.value2; +} + +[numthreads(3, 1, 1)] +void computeMainNoInline(uint3 group_thread_id: SV_GroupThreadID) +{ + shared = Data(1, 2); + noinlineFoo(__getAddress(shared)); +} +#endif From 2a97eae651bf36cffbf571c60167defa9c4fe415 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 18:37:40 -0700 Subject: [PATCH 74/80] Check memory model for groupshared variable pointers --- tests/language-feature/pointer/ptr-to-groupshared.slang | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/language-feature/pointer/ptr-to-groupshared.slang b/tests/language-feature/pointer/ptr-to-groupshared.slang index 8b40106b141..2ff35de57e0 100644 --- a/tests/language-feature/pointer/ptr-to-groupshared.slang +++ b/tests/language-feature/pointer/ptr-to-groupshared.slang @@ -23,6 +23,7 @@ // SPIRV_VARPTR: OpTypeFunction %void %_ptr_Workgroup // SPIRV_NOINLINE: OpCapability VariablePointers // SPIRV_NOINLINE: OpExtension "SPV_KHR_variable_pointers" +// SPIRV_NOINLINE: OpMemoryModel // SPIRV_NOINLINE: OpTypeFunction %void %_ptr_Workgroup struct Data From e61856a5f7f5d7702866bde9113ef5aafaa5030a Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 20:21:42 -0700 Subject: [PATCH 75/80] Fix SPIR-V workgroup pointer capabilities --- source/slang/slang-emit-spirv.cpp | 12 ++++++++++-- .../pointer/ptr-to-groupshared.slang | 14 +++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 166694eae3d..08dba0543ee 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -2770,7 +2770,6 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex // { auto funcType = static_cast(inst); - requireFunctionTypeCapabilitiesIfNeeded(funcType); return emitOpTypeFunction( inst, funcType->getResultType(), @@ -3952,6 +3951,8 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex /// Emit the given `irFunc` to SPIR-V SpvInst* emitFunc(IRFunc* irFunc) { + requireFunctionTypeCapabilitiesIfNeeded(irFunc->getDataType()); + // [2.4: Logical Layout of a Module] // // > All function declarations ("declarations" are functions @@ -8190,7 +8191,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex SpvInst* emitGetOffsetPtr(SpvInstParent* parent, IRInst* inst) { - requireVariableBufferCapabilityIfNeeded(inst->getDataType()); + requirePtrAccessChainCapabilityIfNeeded(inst->getDataType()); return emitOpPtrAccessChain( parent, @@ -11301,6 +11302,13 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex } } + void requirePtrAccessChainCapabilityIfNeeded(IRInst* type) + { + requireVariableBufferCapabilityIfNeeded(type); + if (isWorkgroupPointerType(type)) + requireSPIRVVariablePointersCapability(); + } + void requireSPIRVVariablePointersCapability() { ensureExtensionDeclaration(UnownedStringSlice("SPV_KHR_variable_pointers")); diff --git a/tests/language-feature/pointer/ptr-to-groupshared.slang b/tests/language-feature/pointer/ptr-to-groupshared.slang index 2ff35de57e0..2554c083222 100644 --- a/tests/language-feature/pointer/ptr-to-groupshared.slang +++ b/tests/language-feature/pointer/ptr-to-groupshared.slang @@ -1,5 +1,5 @@ //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type -emit-spirv-directly -Xslang -g0 -//TEST:SIMPLE(filecheck=SPIRV): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -g0 +//TEST:SIMPLE(filecheck=SPIRV_INLINE): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -g0 //TEST:SIMPLE(filecheck=SPIRV_VARPTR): -target spirv-asm -entry computeMainNoInline -stage compute -emit-spirv-directly -g0 -capability SPV_KHR_variable_pointers -DTEST_NOINLINE_GROUPSHARED_PTR //TEST:SIMPLE(filecheck=SPIRV_NOINLINE): -target spirv-asm -entry computeMainNoInline -stage compute -emit-spirv-directly -g0 -DTEST_NOINLINE_GROUPSHARED_PTR @@ -8,15 +8,14 @@ // Tracked by github issue #9061 //DISABLE_TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type -emit-spirv-directly -// Tests if we handle passing groupshared address-space pointers correctly to a function -// when that data-type needs legalization (Data -> Data_natural due to `lower-buffer-element-type`). +// Tests if we handle groupshared address-space pointers correctly when that data-type needs +// legalization (Data -> Data_natural due to `lower-buffer-element-type`). The active runtime case +// stays force-inlined because VariablePointers produces wrong results on the CI Vulkan driver. // CHECK: 1 // CHECK-NEXT: 2 // CHECK-NEXT: 0 -// SPIRV: OpCapability VariablePointers -// SPIRV: OpExtension "SPV_KHR_variable_pointers" -// SPIRV: OpMemoryModel -// SPIRV: OpTypeFunction %void %_ptr_Workgroup +// SPIRV_INLINE-NOT: OpCapability VariablePointers +// SPIRV_INLINE: OpMemoryModel // SPIRV_VARPTR: OpCapability VariablePointers // SPIRV_VARPTR: OpExtension "SPV_KHR_variable_pointers" // SPIRV_VARPTR: OpMemoryModel @@ -36,6 +35,7 @@ struct Data uniform int* outputBuffer; groupshared Data shared; +[ForceInline] void foo(Ptr ptr) { outputBuffer[0] = ptr.value1; From 98a681f2fb3e6bef69d89a6c1107fa032cd404de Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 20:35:17 -0700 Subject: [PATCH 76/80] Use OpPtrAccessChain base type for capabilities --- source/slang/slang-emit-spirv.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 08dba0543ee..d7223cb4eeb 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -8191,7 +8191,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex SpvInst* emitGetOffsetPtr(SpvInstParent* parent, IRInst* inst) { - requirePtrAccessChainCapabilityIfNeeded(inst->getDataType()); + requirePtrAccessChainCapabilityIfNeeded(inst->getOperand(0)->getDataType()); return emitOpPtrAccessChain( parent, From 59b7cf8d8277b74dc1060db3e4da57c4cc2207ae Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sat, 6 Jun 2026 21:33:49 -0700 Subject: [PATCH 77/80] Address latest PR review gaps --- .../user-guide/a2-01-spirv-target-specific.md | 2 +- .../a3-02-reference-capability-atoms.md | 6 ++- source/slang/slang-capabilities.capdef | 6 ++- source/slang/slang-emit-spirv.cpp | 17 ++----- ...byte-address-half-atomics-capability.slang | 47 +++++++++++++++++++ .../pointer/ptr-to-groupshared.slang | 6 +++ 6 files changed, 66 insertions(+), 18 deletions(-) diff --git a/docs/user-guide/a2-01-spirv-target-specific.md b/docs/user-guide/a2-01-spirv-target-specific.md index a3d1b7f604e..103c1b8f8d6 100644 --- a/docs/user-guide/a2-01-spirv-target-specific.md +++ b/docs/user-guide/a2-01-spirv-target-specific.md @@ -171,7 +171,7 @@ GLSL 4.6 with [GLSL_EXT_shader_atomic_float2](https://github.com/KhronosGroup/GL SPIR-V 1.5 with [SPV_EXT_shader_atomic_float_add](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/EXT/SPV_EXT_shader_atomic_float_add.asciidoc) and [SPV_EXT_shader_atomic_float_min_max](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/EXT/SPV_EXT_shader_atomic_float_min_max.asciidoc) can use atomic operations for 32-bit float type and 64-bit float type. SPIR-V 1.5 with [SPV_EXT_shader_atomic_float16_add](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/EXT/SPV_EXT_shader_atomic_float16_add.asciidoc) can use atomic operations for 16-bit float type. -SPIR-V 1.5 with [SPV_NV_shader_atomic_fp16_vector](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/NV/SPV_NV_shader_atomic_fp16_vector.asciidoc) can use vector atomic add/sub/min/max/exchange operations for 16-bit float vector types with 2 or 4 components. +SPIR-V 1.5 with [SPV_NV_shader_atomic_fp16_vector](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/NV/SPV_NV_shader_atomic_fp16_vector.asciidoc) can use vector atomic add/min/max/exchange operations for 16-bit float vector types with 2 or 4 components. Vector atomic sub is emitted as a negated vector atomic add. | | 32-bit integer | 64-bit integer | 32-bit float | 64-bit float | 16-bit float | 16-bit float vector | | ------ | -------------- | --------------- | --------------------- | ---------------- | ---------------- | ----------------------- | diff --git a/docs/user-guide/a3-02-reference-capability-atoms.md b/docs/user-guide/a3-02-reference-capability-atoms.md index ea1f56c50e6..1afb511349b 100644 --- a/docs/user-guide/a3-02-reference-capability-atoms.md +++ b/docs/user-guide/a3-02-reference-capability-atoms.md @@ -702,7 +702,8 @@ Extensions > Represents the SPIR-V extension for ray tracing motion blur. `SPV_NV_shader_atomic_fp16_vector` -> Represents the SPIR-V extension for vector atomic float 16 add/sub/min/max/exchange operations. +> Represents the SPIR-V extension for vector atomic float 16 add/min/max/exchange operations. +> Vector atomic sub is emitted as a negated vector atomic add. `SPV_NV_shader_image_footprint` > Represents the SPIR-V extension for shader image footprint. @@ -727,7 +728,8 @@ Extensions > Represents the SPIR-V capability for atomic float 16 min/max operations. `spvAtomicFloat16VectorNV` -> Represents the SPIR-V capability for vector atomic float 16 add/sub/min/max/exchange operations. +> Represents the SPIR-V capability for vector atomic float 16 add/min/max/exchange operations. +> Vector atomic sub is emitted as a negated vector atomic add. > Implies scalar atomic float 16 add support. `spvAtomicFloat32AddEXT` diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index 1ca4414890c..85d12a4bf94 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -543,7 +543,8 @@ def SPV_EXT_shader_atomic_float_add : _spirv_1_0; /// [EXT] def SPV_EXT_shader_atomic_float16_add : SPV_EXT_shader_atomic_float_add; -/// Represents the SPIR-V extension for vector atomic float 16 add/sub/min/max/exchange operations. +/// Represents the SPIR-V extension for vector atomic float 16 add/min/max/exchange operations. +/// Vector atomic sub is emitted as a negated vector atomic add. /// [EXT] def SPV_NV_shader_atomic_fp16_vector : _spirv_1_0; @@ -704,7 +705,8 @@ def spvAtomicFloat32AddEXT : SPV_EXT_shader_atomic_float_add; /// [EXT] def spvAtomicFloat16AddEXT : SPV_EXT_shader_atomic_float16_add; -/// Represents the SPIR-V capability for vector atomic float 16 add/sub/min/max/exchange operations. +/// Represents the SPIR-V capability for vector atomic float 16 add/min/max/exchange operations. +/// Vector atomic sub is emitted as a negated vector atomic add. /// Implies scalar atomic float 16 add support. /// [EXT] def spvAtomicFloat16VectorNV : SPV_NV_shader_atomic_fp16_vector + spvAtomicFloat16AddEXT; diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index d7223cb4eeb..1bd23a3199e 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -4448,20 +4448,11 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex void ensureAtomicCapability(IRInst* atomicInst, SpvOp op) { - IRType* atomicValueType = atomicInst->getDataType(); + IRType* atomicValueType = getAtomicOperationValueType(atomicInst); + if (!atomicValueType) + return; + auto typeOp = atomicValueType->getOp(); - if (typeOp == kIROp_VoidType) - { - auto ptrType = atomicInst->getOperand(0)->getDataType(); - IRBuilder builder(atomicInst); - if (auto valType = tryGetPointedToType(&builder, ptrType)) - { - if (auto atomicType = as(valType)) - valType = atomicType->getElementType(); - atomicValueType = valType; - typeOp = valType->getOp(); - } - } switch (op) { diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 74f2bfd18a1..2ba17352e43 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -14,6 +14,9 @@ //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_ATOMIC //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_SUB_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_SUB //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_VECTOR4_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR4_ATOMIC +//DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_VECTOR4_SUB_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR4_SUB +//DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_VECTOR4_MIN_MAX_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR4_MIN_MAX +//DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_VECTOR4_EXCHANGE_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR4_EXCHANGE //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_MIN_MAX_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_MIN_MAX //DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_EXCHANGE_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_EXCHANGE //DIAGNOSTIC_TEST:SIMPLE(diag=UNSUPPORTED_VECTOR_WIDTH,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_UNSUPPORTED_VECTOR_WIDTH @@ -36,6 +39,9 @@ //TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_ATOMIC //TEST:SIMPLE(filecheck=VECTOR_SUB): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_SUB //TEST:SIMPLE(filecheck=VECTOR4): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR4_ATOMIC +//TEST:SIMPLE(filecheck=VECTOR4_SUB): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR4_SUB +//TEST:SIMPLE(filecheck=VECTOR4_MIN_MAX): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR4_MIN_MAX +//TEST:SIMPLE(filecheck=VECTOR4_EXCHANGE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR4_EXCHANGE //TEST:SIMPLE(filecheck=VECTOR_MIN_MAX): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_MIN_MAX //TEST:SIMPLE(filecheck=VECTOR_EXCHANGE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_EXCHANGE //TEST:SIMPLE(filecheck=SCALAR_EXCHANGE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DTEST_DIRECT_SCALAR_EXCHANGE @@ -188,6 +194,20 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) __atomic_add(vector4Buffer[0], half4(1.0h, 2.0h, 3.0h, 4.0h)); // DIRECT_VECTOR4_NO_VECTOR: entry point uses capabilities not in specified profile // DIRECT_VECTOR4_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' +#elif defined(TEST_DIRECT_VECTOR4_SUB) + __atomic_sub(vector4Buffer[0], half4(1.0h, 2.0h, 3.0h, 4.0h)); +// DIRECT_VECTOR4_SUB_NO_VECTOR: entry point uses capabilities not in specified profile +// DIRECT_VECTOR4_SUB_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' +#elif defined(TEST_DIRECT_VECTOR4_MIN_MAX) + __atomic_min(vector4Buffer[0], half4(1.0h, 2.0h, 3.0h, 4.0h)); +// DIRECT_VECTOR4_MIN_MAX_NO_VECTOR: entry point uses capabilities not in specified profile +// DIRECT_VECTOR4_MIN_MAX_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' + __atomic_max(vector4Buffer[1], half4(5.0h, 6.0h, 7.0h, 8.0h)); +#elif defined(TEST_DIRECT_VECTOR4_EXCHANGE) + half4 originalValue = __atomic_exchange(vector4Buffer[0], half4(1.0h, 2.0h, 3.0h, 4.0h)); +// DIRECT_VECTOR4_EXCHANGE_NO_VECTOR: entry point uses capabilities not in specified profile +// DIRECT_VECTOR4_EXCHANGE_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' + outputBuffer[0] = float(originalValue.x + originalValue.y + originalValue.z + originalValue.w); #elif defined(TEST_DIRECT_VECTOR_MIN_MAX) __atomic_min(vectorBuffer[0], half2(1.0h, 2.0h)); // DIRECT_MIN_MAX_NO_VECTOR: entry point uses capabilities not in specified profile @@ -274,6 +294,12 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // SCALAR-NOT: OpCapability AtomicFloat16VectorNV // SCALAR-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// IGNORE_CAPS-NOT: entry point uses capabilities not in specified profile +// IGNORE_CAPS: OpCapability AtomicFloat16AddEXT +// IGNORE_CAPS: OpExtension "SPV_EXT_shader_atomic_float16_add" +// IGNORE_CAPS-COUNT-5: OpAtomicFAddEXT %half +// IGNORE_CAPS-NOT: OpAtomicFAddEXT %v2half + // VECTOR_F16-NOT: OpCapability AtomicFloat16AddEXT // VECTOR_F16-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" // VECTOR_F16: OpCapability AtomicFloat16VectorNV @@ -319,6 +345,27 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // VECTOR4-NOT: OpCapability AtomicFloat16AddEXT // VECTOR4-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// VECTOR4_SUB-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR4_SUB-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// VECTOR4_SUB: OpCapability AtomicFloat16VectorNV +// VECTOR4_SUB: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// VECTOR4_SUB-DAG: [[VALUE:%[0-9]+]] = OpConstantComposite %v4half {{%[A-Za-z0-9_]+}} {{%[A-Za-z0-9_]+}} {{%[A-Za-z0-9_]+}} {{%[A-Za-z0-9_]+}} +// VECTOR4_SUB: [[NEG_VALUE:%[0-9]+]] = OpFNegate %v4half [[VALUE]] +// VECTOR4_SUB: OpAtomicFAddEXT %v4half {{%[0-9]+}} %uint_1 %uint_0 [[NEG_VALUE]] + +// VECTOR4_MIN_MAX-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR4_MIN_MAX-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// VECTOR4_MIN_MAX: OpCapability AtomicFloat16VectorNV +// VECTOR4_MIN_MAX: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// VECTOR4_MIN_MAX: OpAtomicFMinEXT %v4half +// VECTOR4_MIN_MAX: OpAtomicFMaxEXT %v4half + +// VECTOR4_EXCHANGE-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR4_EXCHANGE-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// VECTOR4_EXCHANGE: OpCapability AtomicFloat16VectorNV +// VECTOR4_EXCHANGE: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// VECTOR4_EXCHANGE: OpAtomicExchange %v4half + // VECTOR_MIN_MAX-NOT: OpCapability AtomicFloat16AddEXT // VECTOR_MIN_MAX-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" // VECTOR_MIN_MAX: OpCapability AtomicFloat16VectorNV diff --git a/tests/language-feature/pointer/ptr-to-groupshared.slang b/tests/language-feature/pointer/ptr-to-groupshared.slang index 2554c083222..7d0c874f5f4 100644 --- a/tests/language-feature/pointer/ptr-to-groupshared.slang +++ b/tests/language-feature/pointer/ptr-to-groupshared.slang @@ -2,6 +2,7 @@ //TEST:SIMPLE(filecheck=SPIRV_INLINE): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -g0 //TEST:SIMPLE(filecheck=SPIRV_VARPTR): -target spirv-asm -entry computeMainNoInline -stage compute -emit-spirv-directly -g0 -capability SPV_KHR_variable_pointers -DTEST_NOINLINE_GROUPSHARED_PTR //TEST:SIMPLE(filecheck=SPIRV_NOINLINE): -target spirv-asm -entry computeMainNoInline -stage compute -emit-spirv-directly -g0 -DTEST_NOINLINE_GROUPSHARED_PTR +//TEST:SIMPLE(filecheck=SPIRV_NOINLINE_RESTRICTIVE): -target spirv-asm -entry computeMainNoInline -stage compute -emit-spirv-directly -g0 -restrictive-capability-check -capability spirv_1_5 -DTEST_NOINLINE_GROUPSHARED_PTR // By default slang-test uses `-g` and it requires `VariablePointers`, which // doesn't produce the correct result due to the bug on the graphics driver. @@ -24,6 +25,11 @@ // SPIRV_NOINLINE: OpExtension "SPV_KHR_variable_pointers" // SPIRV_NOINLINE: OpMemoryModel // SPIRV_NOINLINE: OpTypeFunction %void %_ptr_Workgroup +// SPIRV_NOINLINE_RESTRICTIVE-NOT: entry point uses capabilities +// SPIRV_NOINLINE_RESTRICTIVE: OpCapability VariablePointers +// SPIRV_NOINLINE_RESTRICTIVE: OpExtension "SPV_KHR_variable_pointers" +// SPIRV_NOINLINE_RESTRICTIVE: OpMemoryModel +// SPIRV_NOINLINE_RESTRICTIVE: OpTypeFunction %void %_ptr_Workgroup struct Data { From abd537c8f9d74754f275ea9349efe7dfaa6b3666 Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sun, 7 Jun 2026 08:49:12 -0700 Subject: [PATCH 78/80] Keep atomic validation name --- source/slang/slang-ir-spirv-legalize.cpp | 7 ++++++- source/slang/slang-ir-validate.cpp | 8 ++++++-- source/slang/slang-ir-validate.h | 5 +++-- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/source/slang/slang-ir-spirv-legalize.cpp b/source/slang/slang-ir-spirv-legalize.cpp index c76ee6ff965..9ee2876afef 100644 --- a/source/slang/slang-ir-spirv-legalize.cpp +++ b/source/slang/slang-ir-spirv-legalize.cpp @@ -2660,7 +2660,12 @@ struct SPIRVLegalizationContext : public SourceEmitterBase // For SPIR-V, we don't skip this validation, because we might then be generating // invalid SPIR-V. bool skipFuncParamValidation = false; - validateSPIRVAtomicOperations(skipFuncParamValidation, m_sink, m_module->getModuleInst()); + bool validateSPIRVAtomics = true; + validateAtomicOperations( + skipFuncParamValidation, + m_sink, + m_module->getModuleInst(), + validateSPIRVAtomics); } void updateFunctionTypes() diff --git a/source/slang/slang-ir-validate.cpp b/source/slang/slang-ir-validate.cpp index d9139900161..9a960f6af30 100644 --- a/source/slang/slang-ir-validate.cpp +++ b/source/slang/slang-ir-validate.cpp @@ -628,9 +628,13 @@ void validateAtomicOperations(bool skipFuncParamValidation, DiagnosticSink* sink validateAtomicOperationsImpl(skipFuncParamValidation, sink, inst, false); } -void validateSPIRVAtomicOperations(bool skipFuncParamValidation, DiagnosticSink* sink, IRInst* inst) +void validateAtomicOperations( + bool skipFuncParamValidation, + DiagnosticSink* sink, + IRInst* inst, + bool validateSPIRVAtomics) { - validateAtomicOperationsImpl(skipFuncParamValidation, sink, inst, true); + validateAtomicOperationsImpl(skipFuncParamValidation, sink, inst, validateSPIRVAtomics); } static void validateVectorOrMatrixElementType( diff --git a/source/slang/slang-ir-validate.h b/source/slang/slang-ir-validate.h index 85843aeaa69..534775a9291 100644 --- a/source/slang/slang-ir-validate.h +++ b/source/slang/slang-ir-validate.h @@ -80,10 +80,11 @@ class [[nodiscard]] IRValidationScope // lead back to in/inout parameters that we can't validate. void validateAtomicOperations(bool skipFuncParamValidation, DiagnosticSink* sink, IRInst* inst); -void validateSPIRVAtomicOperations( +void validateAtomicOperations( bool skipFuncParamValidation, DiagnosticSink* sink, - IRInst* inst); + IRInst* inst, + bool validateSPIRVAtomics); // Overload that takes IRModule* first for use with SLANG_PASS macro void validateAtomicOperations(IRModule* module, bool skipFuncParamValidation, DiagnosticSink* sink); From a9e194391ce80ac743af54158236aa316e6b938d Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sun, 7 Jun 2026 08:59:46 -0700 Subject: [PATCH 79/80] Address atomic capability review --- source/slang/slang-emit-spirv.cpp | 1 - source/slang/slang-ir-validate.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 1bd23a3199e..a77a95e0f34 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -4438,7 +4438,6 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex return; auto elementCount = elementCountInst->getValue(); - SLANG_ASSERT(elementCount == 2 || elementCount == 4); if (elementCount != 2 && elementCount != 4) return; diff --git a/source/slang/slang-ir-validate.h b/source/slang/slang-ir-validate.h index 534775a9291..c0e8bea4085 100644 --- a/source/slang/slang-ir-validate.h +++ b/source/slang/slang-ir-validate.h @@ -80,6 +80,8 @@ class [[nodiscard]] IRValidationScope // lead back to in/inout parameters that we can't validate. void validateAtomicOperations(bool skipFuncParamValidation, DiagnosticSink* sink, IRInst* inst); +// If 'validateSPIRVAtomics' is true, also reject fp16 vector atomic operations +// and widths that cannot be represented by the SPIR-V NV vector atomic extension. void validateAtomicOperations( bool skipFuncParamValidation, DiagnosticSink* sink, From 6e92f2d6c6c18f86e12e6247395474607f9ce88c Mon Sep 17 00:00:00 2001 From: Jay Kwak <82421531+jkwak-work@users.noreply.github.com> Date: Sun, 7 Jun 2026 09:09:03 -0700 Subject: [PATCH 80/80] Remove fp16 vector capability diagnostics --- source/slang/slang-ir-spirv-legalize.cpp | 130 ------------------ ...byte-address-half-atomics-capability.slang | 76 +--------- 2 files changed, 2 insertions(+), 204 deletions(-) diff --git a/source/slang/slang-ir-spirv-legalize.cpp b/source/slang/slang-ir-spirv-legalize.cpp index 9ee2876afef..bdbb7032a88 100644 --- a/source/slang/slang-ir-spirv-legalize.cpp +++ b/source/slang/slang-ir-spirv-legalize.cpp @@ -2838,135 +2838,6 @@ static bool hasExplicitInterlockInst(IRFunc* func) return false; } -static bool isSupportedSPIRVFp16VectorAtomic(IRInst* inst) -{ - switch (inst->getOp()) - { - case kIROp_AtomicExchange: - case kIROp_AtomicAdd: - case kIROp_AtomicSub: - case kIROp_AtomicMin: - case kIROp_AtomicMax: - break; - default: - return false; - } - - auto valueType = getAtomicOperationValueType(inst); - auto vectorType = as(valueType); - if (!vectorType || vectorType->getElementType()->getOp() != kIROp_HalfType) - return false; - - auto elementCountInst = as(vectorType->getElementCount()); - if (!elementCountInst) - return false; - - auto elementCount = elementCountInst->getValue(); - return elementCount == 2 || elementCount == 4; -} - -static bool shouldDiagnoseImplicitCapabilityUse(CompilerOptionSet& optionSet) -{ - if (optionSet.getBoolOption(CompilerOptionName::IgnoreCapabilities)) - return false; - - return optionSet.getBoolOption(CompilerOptionName::RestrictiveCapabilityCheck) || - isSpecificProfileOrCapabilityRequested(optionSet); -} - -static void diagnoseCapabilityUseForEntryPoint( - SPIRVEmitSharedContext* context, - IRFunc* entryPoint, - CapabilityName capabilityName, - Dictionary>& diagnosedCapabilityUses) -{ - auto entryPointDecor = entryPoint->findDecoration(); - if (!entryPointDecor) - return; - - CapabilitySet stageTargetCaps = context->m_targetProgram->getTargetReq()->getTargetCaps(); - CapabilitySet stageCapabilitySet = entryPointDecor->getProfile().getCapabilityName(); - CapabilitySet required(capabilityName); - stageTargetCaps.join(stageCapabilitySet); - required.join(stageCapabilitySet); - - if (stageTargetCaps.atLeastOneSetImpliedInOther(required) == - CapabilitySet::ImpliesReturnFlags::Implied) - return; - - CapabilityAtomSet addedAtoms{}; - if (auto stageCapSet = stageTargetCaps.getAtomSets()) - { - if (auto requiredSet = required.getAtomSets()) - CapabilityAtomSet::calcSubtract(addedAtoms, (*requiredSet), (*stageCapSet)); - } - - StringBuilder capsSb; - printDiagnosticArg(capsSb, addedAtoms); - String missingCapsStr = capsSb.toString(); - if (!diagnosedCapabilityUses[entryPoint].add(missingCapsStr)) - return; - - StringBuilder entryPointSb; - printDiagnosticArg(entryPointSb, entryPoint); - - auto& optionSet = context->m_targetProgram->getOptionSet(); - auto entryPointProfileName = entryPointDecor->getProfile().getName(); - maybeDiagnoseWarningOrError( - context->m_sink, - optionSet, - DiagnosticCategory::Capability, - Diagnostics::ProfileImplicitlyUpgraded{ - .entryPoint = entryPointSb.toString(), - .profile = entryPointProfileName, - .capabilities = missingCapsStr, - .location = entryPoint->sourceLoc, - }, - Diagnostics::ProfileImplicitlyUpgradedRestrictive{ - .entryPoint = entryPointSb.toString(), - .profile = entryPointProfileName, - .capabilities = missingCapsStr, - .location = entryPoint->sourceLoc, - }); -} - -static void diagnoseSPIRVAtomicCapabilityUses(SPIRVEmitSharedContext* context, IRModule* module) -{ - auto& optionSet = context->m_targetProgram->getOptionSet(); - if (!shouldDiagnoseImplicitCapabilityUse(optionSet)) - return; - - Dictionary> diagnosedCapabilityUses; - for (auto globalInst : module->getGlobalInsts()) - { - auto func = as(globalInst); - if (!func) - continue; - - auto entryPoints = context->m_referencingEntryPoints.tryGetValue(func); - if (!entryPoints) - continue; - - for (auto block : func->getBlocks()) - { - for (auto childInst : block->getChildren()) - { - if (!isSupportedSPIRVFp16VectorAtomic(childInst)) - continue; - - for (auto entryPoint : *entryPoints) - { - diagnoseCapabilityUseForEntryPoint( - context, - entryPoint, - CapabilityName::spvAtomicFloat16VectorNV, - diagnosedCapabilityUses); - } - } - } - } -} - void insertFragmentShaderInterlock(SPIRVEmitSharedContext* context, IRModule* module) { HashSet fragmentShaders; @@ -3129,7 +3000,6 @@ void legalizeIRForSPIRV( eliminateDeadCode(module); buildEntryPointReferenceGraph(context->m_referencingEntryPoints, module); - diagnoseSPIRVAtomicCapabilityUses(context, module); insertFragmentShaderInterlock(context, module); } diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang index 2ba17352e43..fc288772d49 100644 --- a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -5,31 +5,17 @@ //DIAGNOSTIC_TEST:SIMPLE(diag=NO_FP16_ATOMIC,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spirv_1_5 //TEST:SIMPLE(filecheck=VECTOR_F16): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -// Direct half-vector `__atomic_*` checks. `DIRECT` selects direct structured-buffer -// atomics; `NO_VECTOR` means only the scalar fp16 atomic capability is enabled. +// Direct half-vector `__atomic_*` checks. // RHI does not yet expose a feature gate for VK_NV_shader_atomic_float16_vector, so these // runtime checks stay disabled while the active tests below cover SPIR-V codegen/diagnostics. //DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME_DIRECT):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16VectorNV -output-using-type -xslang -DTEST_RUNTIME_DIRECT //DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME_DIRECT4):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16VectorNV -output-using-type -xslang -DTEST_RUNTIME_DIRECT_VECTOR4 -//DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_ATOMIC -//DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_SUB_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_SUB -//DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_VECTOR4_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR4_ATOMIC -//DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_VECTOR4_SUB_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR4_SUB -//DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_VECTOR4_MIN_MAX_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR4_MIN_MAX -//DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_VECTOR4_EXCHANGE_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR4_EXCHANGE -//DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_MIN_MAX_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_MIN_MAX -//DIAGNOSTIC_TEST:SIMPLE(diag=DIRECT_EXCHANGE_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_DIRECT_VECTOR_EXCHANGE //DIAGNOSTIC_TEST:SIMPLE(diag=UNSUPPORTED_VECTOR_WIDTH,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_UNSUPPORTED_VECTOR_WIDTH //DIAGNOSTIC_TEST:SIMPLE(diag=UNSUPPORTED_VECTOR_COMPARE_EXCHANGE,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_UNSUPPORTED_VECTOR_COMPARE_EXCHANGE -// Emulated fp16 vector checks. These paths are emitted as vector atomics, so -// they must diagnose missing spvAtomicFloat16VectorNV when only scalar fp16 add is enabled. +// Emulated fp16 vector checks. // Runtime coverage for the pointer-form helpers would require SPIR-V variable pointers, // which existing tests keep disabled on current GCP runners. -//DIAGNOSTIC_TEST:SIMPLE(diag=BUFFER_EMULATED_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability cuda_hlsl_spirv+sm_5_0+spvAtomicFloat16AddEXT -DTEST_BUFFER_EMULATED_NO_VECTOR -//DIAGNOSTIC_TEST:SIMPLE(diag=POINTER_EMULATED_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_POINTER_EMULATED -//DIAGNOSTIC_TEST:SIMPLE(diag=POINTER_F16X2_NO_VECTOR,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_POINTER_F16X2 -//DIAGNOSTIC_TEST:SIMPLE(diag=MULTI_ENTRY_NO_VECTOR,non-exhaustive): -target spirv -entry computeMainA -entry computeMainB -emit-spirv-directly -restrictive-capability-check -capability spvAtomicFloat16AddEXT -DTEST_MULTI_ENTRY_HELPER_NO_VECTOR //TEST:SIMPLE(filecheck=POINTER_EMULATED): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -skip-spirv-validation -capability spvAtomicFloat16VectorNV -DTEST_POINTER_EMULATED //TEST:SIMPLE(filecheck=POINTER_F16X2): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_POINTER_F16X2 @@ -65,37 +51,6 @@ RWStructuredBuffer unsupportedVectorBuffer; //TEST_INPUT:ubuffer(stride=4, count=1):name=uintBuffer RWStructuredBuffer uintBuffer; -#ifdef TEST_MULTI_ENTRY_HELPER_NO_VECTOR -void sharedVectorAtomic() -{ - __atomic_add(vectorBuffer[0], half2(1.0h, 2.0h)); -} - -void callSharedVectorAtomic() -{ - sharedVectorAtomic(); -} - -[shader("compute")] -[numthreads(1, 1, 1)] -void computeMainA(uint3 dispatchThreadID : SV_DispatchThreadID) -{ - sharedVectorAtomic(); - callSharedVectorAtomic(); -// MULTI_ENTRY_NO_VECTOR: entry point 'computeMainA' uses capabilities -// MULTI_ENTRY_NO_VECTOR-SAME: Missing capabilities are: 'spvAtomicFloat16VectorNV' -// MULTI_ENTRY_NO_VECTOR: entry point 'computeMainB' uses capabilities -// MULTI_ENTRY_NO_VECTOR-SAME: Missing capabilities are: 'spvAtomicFloat16VectorNV' -// MULTI_ENTRY_NO_VECTOR-NOT: entry point ' -} - -[shader("compute")] -[numthreads(1, 1, 1)] -void computeMainB(uint3 dispatchThreadID : SV_DispatchThreadID) -{ - callSharedVectorAtomic(); -} -#else [numthreads(1, 1, 1)] void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { @@ -184,39 +139,23 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // RUNTIME_DIRECT4-NEXT: 12.000000 #elif defined(TEST_DIRECT_VECTOR_ATOMIC) __atomic_add(vectorBuffer[0], half2(1.0h, 2.0h)); -// DIRECT_NO_VECTOR: entry point uses capabilities not in specified profile -// DIRECT_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' #elif defined(TEST_DIRECT_VECTOR_SUB) __atomic_sub(vectorBuffer[0], half2(1.0h, 2.0h)); -// DIRECT_SUB_NO_VECTOR: entry point uses capabilities not in specified profile -// DIRECT_SUB_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' #elif defined(TEST_DIRECT_VECTOR4_ATOMIC) __atomic_add(vector4Buffer[0], half4(1.0h, 2.0h, 3.0h, 4.0h)); -// DIRECT_VECTOR4_NO_VECTOR: entry point uses capabilities not in specified profile -// DIRECT_VECTOR4_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' #elif defined(TEST_DIRECT_VECTOR4_SUB) __atomic_sub(vector4Buffer[0], half4(1.0h, 2.0h, 3.0h, 4.0h)); -// DIRECT_VECTOR4_SUB_NO_VECTOR: entry point uses capabilities not in specified profile -// DIRECT_VECTOR4_SUB_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' #elif defined(TEST_DIRECT_VECTOR4_MIN_MAX) __atomic_min(vector4Buffer[0], half4(1.0h, 2.0h, 3.0h, 4.0h)); -// DIRECT_VECTOR4_MIN_MAX_NO_VECTOR: entry point uses capabilities not in specified profile -// DIRECT_VECTOR4_MIN_MAX_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' __atomic_max(vector4Buffer[1], half4(5.0h, 6.0h, 7.0h, 8.0h)); #elif defined(TEST_DIRECT_VECTOR4_EXCHANGE) half4 originalValue = __atomic_exchange(vector4Buffer[0], half4(1.0h, 2.0h, 3.0h, 4.0h)); -// DIRECT_VECTOR4_EXCHANGE_NO_VECTOR: entry point uses capabilities not in specified profile -// DIRECT_VECTOR4_EXCHANGE_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' outputBuffer[0] = float(originalValue.x + originalValue.y + originalValue.z + originalValue.w); #elif defined(TEST_DIRECT_VECTOR_MIN_MAX) __atomic_min(vectorBuffer[0], half2(1.0h, 2.0h)); -// DIRECT_MIN_MAX_NO_VECTOR: entry point uses capabilities not in specified profile -// DIRECT_MIN_MAX_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' __atomic_max(vectorBuffer[1], half2(3.0h, 4.0h)); #elif defined(TEST_DIRECT_VECTOR_EXCHANGE) half2 originalValue = __atomic_exchange(vectorBuffer[0], half2(1.0h, 2.0h)); -// DIRECT_EXCHANGE_NO_VECTOR: entry point uses capabilities not in specified profile -// DIRECT_EXCHANGE_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' outputBuffer[0] = float(originalValue.x + originalValue.y); #elif defined(TEST_DIRECT_SCALAR_EXCHANGE) uint originalValue = __atomic_exchange(uintBuffer[0], 1u); @@ -239,12 +178,6 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) // Half-vector atomic load/store are not source-expressible today: Atomic requires // scalar IAtomicable types, while half2/half4 do not conform to that interface. The // validator rejects those IR ops defensively if a later lowering path creates them. -#elif defined(TEST_BUFFER_EMULATED_NO_VECTOR) - half originalValue; - tmpBuffer.InterlockedAddF16Emulated(0, 1.0h, originalValue); -// BUFFER_EMULATED_NO_VECTOR: entry point uses capabilities not in specified profile -// BUFFER_EMULATED_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' - outputBuffer[0] = float(originalValue); #elif defined(TEST_EMULATED) half originalValue; tmpBuffer.InterlockedAddF16Emulated(0, 1.0h, originalValue); @@ -254,14 +187,10 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) #elif defined(TEST_POINTER_EMULATED) half originalValue; InterlockedAddF16Emulated(&halfBuffer[0], 1.0h, originalValue); -// POINTER_EMULATED_NO_VECTOR: entry point uses capabilities not in specified profile -// POINTER_EMULATED_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' outputBuffer[0] = float(originalValue); #elif defined(TEST_POINTER_F16X2) half2 originalValue; InterlockedAddF16x2(&vectorBuffer[0], half2(1.0h, 2.0h), originalValue); -// POINTER_F16X2_NO_VECTOR: entry point uses capabilities not in specified profile -// POINTER_F16X2_NO_VECTOR: Missing capabilities are: 'spvAtomicFloat16VectorNV' outputBuffer[0] = float(originalValue.x + originalValue.y); #else half originalValue; @@ -282,7 +211,6 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) outputBuffer[4] = float(originalValue); #endif } -#endif // SCALAR-NOT: OpCapability AtomicFloat16VectorNV // SCALAR-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector"