diff --git a/docs/command-line-slangc-reference.md b/docs/command-line-slangc-reference.md index 6508a617860..a34d8c1ddf9 100644 --- a/docs/command-line-slangc-reference.md +++ b/docs/command-line-slangc-reference.md @@ -1314,6 +1314,7 @@ A capability describes an optional feature that a target may or may not support. * `SPV_EXT_descriptor_indexing` : enables the SPV_EXT_descriptor_indexing extension * `SPV_EXT_shader_atomic_float_add` : enables the SPV_EXT_shader_atomic_float_add extension * `SPV_EXT_shader_atomic_float16_add` : enables the SPV_EXT_shader_atomic_float16_add extension +* `SPV_NV_shader_atomic_fp16_vector` : enables the SPV_NV_shader_atomic_fp16_vector extension * `SPV_EXT_shader_atomic_float_min_max` : enables the SPV_EXT_shader_atomic_float_min_max extension * `SPV_EXT_mesh_shader` : enables the SPV_EXT_mesh_shader extension * `SPV_EXT_demote_to_helper_invocation` : enables the SPV_EXT_demote_to_helper_invocation extension @@ -1351,6 +1352,7 @@ A capability describes an optional feature that a target may or may not support. * `spvDeviceGroup` * `spvAtomicFloat32AddEXT` * `spvAtomicFloat16AddEXT` +* `spvAtomicFloat16VectorNV` * `spvAtomicFloat64AddEXT` * `spvInt64Atomics` * `spvAtomicFloat32MinMaxEXT` diff --git a/docs/user-guide/a2-01-spirv-target-specific.md b/docs/user-guide/a2-01-spirv-target-specific.md index 9193e9fc6ee..103c1b8f8d6 100644 --- a/docs/user-guide/a2-01-spirv-target-specific.md +++ b/docs/user-guide/a2-01-spirv-target-specific.md @@ -170,13 +170,14 @@ GLSL 4.6 with [GLSL_EXT_shader_atomic_float](https://github.com/KhronosGroup/GLS GLSL 4.6 with [GLSL_EXT_shader_atomic_float2](https://github.com/KhronosGroup/GLSL/blob/main/extensions/ext/GLSL_EXT_shader_atomic_float2.txt) can use atomic operations for 16-bit float type. SPIR-V 1.5 with [SPV_EXT_shader_atomic_float_add](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/EXT/SPV_EXT_shader_atomic_float_add.asciidoc) and [SPV_EXT_shader_atomic_float_min_max](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/EXT/SPV_EXT_shader_atomic_float_min_max.asciidoc) can use atomic operations for 32-bit float type and 64-bit float type. -SPIR-V 1.5 with [SPV_EXT_shader_atomic_float16_add](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/EXT/SPV_EXT_shader_atomic_float16_add.asciidoc) can use atomic operations for 16-bit float type - -| | 32-bit integer | 64-bit integer | 32-bit float | 64-bit float | 16-bit float | -| ------ | -------------- | --------------- | --------------------- | ---------------- | ---------------- | -| HLSL | Yes (SM5.0) | Yes (SM6.6) | Only bit-wise (SM6.6) | No | No | -| GLSL | Yes (GL4.3) | Yes (GL4.4+ext) | Yes (GL4.6+ext) | Yes (GL4.6+ext) | Yes (GL4.6+ext) | -| SPIR-V | Yes | Yes | Yes (SPV1.5+ext) | Yes (SPV1.5+ext) | Yes (SPV1.5+ext) | +SPIR-V 1.5 with [SPV_EXT_shader_atomic_float16_add](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/EXT/SPV_EXT_shader_atomic_float16_add.asciidoc) can use atomic operations for 16-bit float type. +SPIR-V 1.5 with [SPV_NV_shader_atomic_fp16_vector](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/NV/SPV_NV_shader_atomic_fp16_vector.asciidoc) can use vector atomic add/min/max/exchange operations for 16-bit float vector types with 2 or 4 components. Vector atomic sub is emitted as a negated vector atomic add. + +| | 32-bit integer | 64-bit integer | 32-bit float | 64-bit float | 16-bit float | 16-bit float vector | +| ------ | -------------- | --------------- | --------------------- | ---------------- | ---------------- | ----------------------- | +| HLSL | Yes (SM5.0) | Yes (SM6.6) | Only bit-wise (SM6.6) | No | No | No | +| GLSL | Yes (GL4.3) | Yes (GL4.4+ext) | Yes (GL4.6+ext) | Yes (GL4.6+ext) | Yes (GL4.6+ext) | Yes (GL_NV ext) | +| SPIR-V | Yes | Yes | Yes (SPV1.5+ext) | Yes (SPV1.5+ext) | Yes (SPV1.5+ext) | Yes (SPV_NV ext) | ## ConstantBuffer, StructuredBuffer and ByteAddressBuffer diff --git a/docs/user-guide/a3-02-reference-capability-atoms.md b/docs/user-guide/a3-02-reference-capability-atoms.md index c2262fc581e..1afb511349b 100644 --- a/docs/user-guide/a3-02-reference-capability-atoms.md +++ b/docs/user-guide/a3-02-reference-capability-atoms.md @@ -701,6 +701,10 @@ Extensions `SPV_NV_ray_tracing_motion_blur` > Represents the SPIR-V extension for ray tracing motion blur. +`SPV_NV_shader_atomic_fp16_vector` +> Represents the SPIR-V extension for vector atomic float 16 add/min/max/exchange operations. +> Vector atomic sub is emitted as a negated vector atomic add. + `SPV_NV_shader_image_footprint` > Represents the SPIR-V extension for shader image footprint. @@ -723,6 +727,11 @@ Extensions `spvAtomicFloat16MinMaxEXT` > Represents the SPIR-V capability for atomic float 16 min/max operations. +`spvAtomicFloat16VectorNV` +> Represents the SPIR-V capability for vector atomic float 16 add/min/max/exchange operations. +> Vector atomic sub is emitted as a negated vector atomic add. +> Implies scalar atomic float 16 add support. + `spvAtomicFloat32AddEXT` > Represents the SPIR-V capability for atomic float 32 add operations. diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index bf35b722294..dbfb5ebe934 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -6491,9 +6491,13 @@ $} /// @param byteAddress The address at which to perform the atomic add operation. /// @param fp16x2Value Two 16-bit floating point values are packed into a 32-bit unsigned integer. /// @return The 2 16-bit floating point values packed into a 32-bit unsigned integer. + /// @remarks For SPIR-V, this helper requires `SPV_NV_shader_atomic_fp16_vector` + /// and emits a `half2` `OpAtomicFAdd`; the packed fp16x2 representation matches + /// the NVAPI HLSL ABI, but the underlying operation is a vector atomic. [__requiresNVAPI] [ForceInline] - [require(cuda_hlsl_spirv)] + [require(cuda_hlsl, sm_5_0)] + [require(spirv, spvAtomicFloat16VectorNV)] uint _NvInterlockedAddFp16x2(uint byteAddress, uint fp16x2Value) { __target_switch @@ -6511,14 +6515,17 @@ $} /// @param byteAddress The address at which to perform the atomic add operation. /// @param value The value to add to the value at `byteAddress`. /// @param originalValue The original value at `byteAddress` before the add operation. - /// @remarks For SPIR-V, this function maps to `OpAtomicFAdd` and requires `SPV_EXT_shader_atomic_float16_add` extension. + /// @remarks For SPIR-V, this function requires `SPV_EXT_shader_atomic_float16_add` + /// and maps to `OpAtomicFAdd` on a `half`. When `SPV_NV_shader_atomic_fp16_vector` + /// is available, it uses the half-vector atomic path instead. /// /// For HLSL, this function translates to an NVAPI call /// due to lack of native HLSL intrinsic for floating point atomic add. For CUDA, this function /// maps to `atomicAdd`. [__requiresNVAPI] [ForceInline] - [require(cuda_hlsl_spirv, sm_5_0)] + [require(cuda_hlsl, sm_5_0)] + [require(spirv, spvAtomicFloat16AddEXT)] void InterlockedAddF16(uint byteAddress, half value, out half originalValue) { __target_switch @@ -6536,6 +6543,20 @@ $} originalValue = asfloat16((uint16_t)(_NvInterlockedAddFp16x2(byteAddress, packedInput) >> 16)); } return; + case spvAtomicFloat16VectorNV: + { + let buf = __getEquivalentStructuredBuffer(this); + if ((byteAddress & 2) == 0) + { + originalValue = __atomic_add(buf[byteAddress/4], half2(value, half(0.0))).x; + } + else + { + originalValue = __atomic_add(buf[byteAddress/4], half2(half(0.0), value)).y; + } + return; + } + case spvAtomicFloat16AddEXT: default: { let buf = __getEquivalentStructuredBuffer(this); diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index 3af42a7dc2a..85d12a4bf94 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -543,6 +543,11 @@ def SPV_EXT_shader_atomic_float_add : _spirv_1_0; /// [EXT] def SPV_EXT_shader_atomic_float16_add : SPV_EXT_shader_atomic_float_add; +/// Represents the SPIR-V extension for vector atomic float 16 add/min/max/exchange operations. +/// Vector atomic sub is emitted as a negated vector atomic add. +/// [EXT] +def SPV_NV_shader_atomic_fp16_vector : _spirv_1_0; + /// Represents the SPIR-V extension for atomic float min/max operations. /// [EXT] def SPV_EXT_shader_atomic_float_min_max : _spirv_1_0; @@ -700,6 +705,12 @@ def spvAtomicFloat32AddEXT : SPV_EXT_shader_atomic_float_add; /// [EXT] def spvAtomicFloat16AddEXT : SPV_EXT_shader_atomic_float16_add; +/// Represents the SPIR-V capability for vector atomic float 16 add/min/max/exchange operations. +/// Vector atomic sub is emitted as a negated vector atomic add. +/// Implies scalar atomic float 16 add support. +/// [EXT] +def spvAtomicFloat16VectorNV : SPV_NV_shader_atomic_fp16_vector + spvAtomicFloat16AddEXT; + /// Represents the SPIR-V capability for atomic float 64 add operations. /// [EXT] def spvAtomicFloat64AddEXT : SPV_EXT_shader_atomic_float_add; @@ -1261,7 +1272,7 @@ alias GL_NV_ray_tracing_motion_blur = _GL_NV_ray_tracing_motion_blur | spvRayTra /// Represents the GL_NV_shader_atomic_fp16_vector extension. /// [EXT] -alias GL_NV_shader_atomic_fp16_vector = _GL_NV_shader_atomic_fp16_vector + _GL_NV_gpu_shader5 | _spirv_1_0; +alias GL_NV_shader_atomic_fp16_vector = _GL_NV_shader_atomic_fp16_vector + _GL_NV_gpu_shader5 | spvAtomicFloat16VectorNV; /// Represents the GL_NV_shader_invocation_reorder extension (NVIDIA-specific). /// [EXT] diff --git a/source/slang/slang-check-shader.cpp b/source/slang/slang-check-shader.cpp index 7091312e409..4dbc6ce79c0 100644 --- a/source/slang/slang-check-shader.cpp +++ b/source/slang/slang-check-shader.cpp @@ -1956,28 +1956,6 @@ void validateEntryPoint(EntryPoint* entryPoint, DiagnosticSink* sink) else { auto& targetOptionSet = target->getOptionSet(); - bool specificProfileRequested = - targetOptionSet.hasOption(CompilerOptionName::Profile) && - (targetOptionSet.getIntOption(CompilerOptionName::Profile) != - SLANG_PROFILE_UNKNOWN); - bool specificCapabilityRequested = false; - for (auto atomVal : targetOptionSet.getArray(CompilerOptionName::Capability)) - { - switch (atomVal.kind) - { - case CompilerOptionValueKind::Int: - if (atomVal.intValue != SLANG_CAPABILITY_UNKNOWN) - specificCapabilityRequested = true; - break; - case CompilerOptionValueKind::String: - // User made a specific capability request - specificCapabilityRequested = true; - break; - } - if (specificCapabilityRequested) - break; - } - if (auto declaredCapsMod = entryPointFuncDecl->findModifier()) { @@ -1988,7 +1966,7 @@ void validateEntryPoint(EntryPoint* entryPoint, DiagnosticSink* sink) } // Only attempt to error if a specific profile or capability is requested - if ((specificCapabilityRequested || specificProfileRequested) && + if (isSpecificProfileOrCapabilityRequested(targetOptionSet) && targetCaps.atLeastOneSetImpliedInOther( CapabilitySet{entryPointFuncDecl->inferredCapabilityRequirements}) == CapabilitySet::ImpliesReturnFlags::NotImplied) diff --git a/source/slang/slang-compiler.h b/source/slang/slang-compiler.h index 4b3d67b223b..af45a64af85 100644 --- a/source/slang/slang-compiler.h +++ b/source/slang/slang-compiler.h @@ -213,6 +213,32 @@ enum class DiagnosticCategory None = 0, Capability = 1 << 0, }; + +inline bool isSpecificProfileRequested(CompilerOptionSet& optionSet) +{ + return optionSet.hasOption(CompilerOptionName::Profile) && + (optionSet.getIntOption(CompilerOptionName::Profile) != SLANG_PROFILE_UNKNOWN); +} + +inline bool isSpecificCapabilityRequested(CompilerOptionSet& optionSet) +{ + for (auto atomVal : optionSet.getArray(CompilerOptionName::Capability)) + { + if ((atomVal.kind == CompilerOptionValueKind::Int && + atomVal.intValue != SLANG_CAPABILITY_UNKNOWN) || + atomVal.kind == CompilerOptionValueKind::String) + { + return true; + } + } + return false; +} + +inline bool isSpecificProfileOrCapabilityRequested(CompilerOptionSet& optionSet) +{ + return isSpecificProfileRequested(optionSet) || isSpecificCapabilityRequested(optionSet); +} + template bool maybeDiagnose( DiagnosticSink* sink, diff --git a/source/slang/slang-diagnostics.lua b/source/slang/slang-diagnostics.lua index 9e9e7a34dfd..d56e11a042a 100644 --- a/source/slang/slang-diagnostics.lua +++ b/source/slang/slang-diagnostics.lua @@ -4808,6 +4808,20 @@ warning( span { loc = "location", message = "Slang's SPIR-V backend only supports SPIR-V version 1.3 and later. Use `-emit-spirv-via-glsl` option to produce SPIR-V 1.0 through 1.2." } ) +err( + "spirv-fp16-vector-atomic-unsupported-width", + 50013, + "invalid SPIR-V fp16 vector atomic width", + span { loc = "location", message = "SPIR-V fp16 vector atomics only support half2 and half4." } +) + +err( + "spirv-fp16-vector-atomic-unsupported-operation", + 50014, + "invalid SPIR-V fp16 vector atomic operation", + span { loc = "location", message = "SPIR-V fp16 vector atomics only support add, sub, min, max, and exchange operations." } +) + err( "invalid-mesh-stage-output-topology", 50060, diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 50e26c501ff..a77a95e0f34 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -2768,10 +2768,13 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex // with the result-type operand coming first, // followed by operand sfor all the parameter types. // - return emitOpTypeFunction( - inst, - static_cast(inst)->getResultType(), - static_cast(inst)->getParamTypes()); + { + auto funcType = static_cast(inst); + return emitOpTypeFunction( + inst, + funcType->getResultType(), + funcType->getParamTypes()); + } case kIROp_RateQualifiedType: { @@ -3948,6 +3951,8 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex /// Emit the given `irFunc` to SPIR-V SpvInst* emitFunc(IRFunc* irFunc) { + requireFunctionTypeCapabilitiesIfNeeded(irFunc->getDataType()); + // [2.4: Logical Layout of a Module] // // > All function declarations ("declarations" are functions @@ -4412,22 +4417,53 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex } } + static bool isFp16VectorAtomicType(IRType* valueType) + { + auto vectorType = as(valueType); + if (!vectorType || vectorType->getElementType()->getOp() != kIROp_HalfType) + return false; + + return true; + } + + void maybeRequireFp16VectorAtomicCapability(IRType* valueType) + { + if (!isFp16VectorAtomicType(valueType)) + return; + + auto vectorType = as(valueType); + auto elementCountInst = as(vectorType->getElementCount()); + SLANG_ASSERT(elementCountInst); + if (!elementCountInst) + return; + + auto elementCount = elementCountInst->getValue(); + if (elementCount != 2 && elementCount != 4) + return; + + ensureExtensionDeclaration(toSlice("SPV_NV_shader_atomic_fp16_vector")); + requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV); + } + void ensureAtomicCapability(IRInst* atomicInst, SpvOp op) { - auto typeOp = atomicInst->getDataType()->getOp(); - if (typeOp == kIROp_VoidType) - { - auto ptrType = atomicInst->getOperand(0)->getDataType(); - IRBuilder builder(atomicInst); - if (auto valType = tryGetPointedToType(&builder, ptrType)) - { - if (auto atomicType = as(valType)) - valType = atomicType->getElementType(); - typeOp = valType->getOp(); - } - } + IRType* atomicValueType = getAtomicOperationValueType(atomicInst); + if (!atomicValueType) + return; + + auto typeOp = atomicValueType->getOp(); + switch (op) { + case SpvOpAtomicLoad: + case SpvOpAtomicStore: + case SpvOpAtomicCompareExchange: + case SpvOpAtomicCompareExchangeWeak: + SLANG_ASSERT(!isFp16VectorAtomicType(atomicValueType)); + break; + case SpvOpAtomicExchange: + maybeRequireFp16VectorAtomicCapability(atomicValueType); + break; case SpvOpAtomicFAddEXT: { switch (typeOp) @@ -4445,12 +4481,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex requireSPIRVCapability(SpvCapabilityAtomicFloat16AddEXT); break; case kIROp_VectorType: - if (as(atomicInst->getDataType())->getElementType()->getOp() == - kIROp_HalfType) - { - ensureExtensionDeclaration(toSlice("SPV_NV_shader_atomic_fp16_vector")); - requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV); - } + maybeRequireFp16VectorAtomicCapability(atomicValueType); break; } } @@ -4473,12 +4504,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex requireSPIRVCapability(SpvCapabilityAtomicFloat16MinMaxEXT); break; case kIROp_VectorType: - if (as(atomicInst->getDataType())->getElementType()->getOp() == - kIROp_HalfType) - { - ensureExtensionDeclaration(toSlice("SPV_NV_shader_atomic_fp16_vector")); - requireSPIRVCapability(SpvCapabilityAtomicFloat16VectorNV); - } + maybeRequireFp16VectorAtomicCapability(atomicValueType); break; } } @@ -5354,6 +5380,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex const auto memoryScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType()); const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(1), ptr); + ensureAtomicCapability(inst, SpvOpAtomicLoad); result = emitOpAtomicLoad( parent, inst, @@ -5361,7 +5388,6 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex ptr, memoryScope, memorySemantics); - ensureAtomicCapability(inst, SpvOpAtomicLoad); } else { @@ -5383,9 +5409,9 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex const auto memoryScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType()); const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(2), ptr); + ensureAtomicCapability(inst, SpvOpAtomicStore); result = emitOpAtomicStore(parent, inst, ptr, memoryScope, memorySemantics, val); - ensureAtomicCapability(inst, SpvOpAtomicStore); } else { @@ -5407,6 +5433,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex const auto memoryScope = emitIntConstant(IRIntegerValue{SpvScopeDevice}, builder.getUIntType()); const auto memorySemantics = emitMemorySemanticMask(inst->getOperand(2), ptr); + ensureAtomicCapability(inst, SpvOpAtomicExchange); result = emitOpAtomicExchange( parent, inst, @@ -5415,7 +5442,6 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex memoryScope, memorySemantics, val); - ensureAtomicCapability(inst, SpvOpAtomicExchange); } else { @@ -5435,6 +5461,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex emitMemorySemanticMask(inst->getOperand(3), inst->getOperand(0)); const auto memorySemanticsUnequal = emitMemorySemanticMask(inst->getOperand(4), inst->getOperand(0)); + ensureAtomicCapability(inst, SpvOpAtomicCompareExchange); result = emitOpAtomicCompareExchange( parent, inst, @@ -5445,7 +5472,6 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex memorySemanticsUnequal, inst->getOperand(2), inst->getOperand(1)); - ensureAtomicCapability(inst, SpvOpAtomicCompareExchange); } break; case kIROp_AtomicAdd: @@ -5466,11 +5492,13 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex emitMemorySemanticMask(inst->getOperand(2), inst->getOperand(0)); bool negateOperand = false; auto spvOp = getSpvAtomicOp(inst, negateOperand); + ensureAtomicCapability(inst, spvOp); auto operand = inst->getOperand(1); if (negateOperand) { builder.setInsertBefore(inst); auto negatedOperand = builder.emitNeg(inst->getDataType(), operand); + emitLocalInst(parent, negatedOperand); operand = negatedOperand; } result = emitOpAtomicOp( @@ -5482,7 +5510,6 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex memoryScope, memorySemantics, operand); - ensureAtomicCapability(inst, spvOp); } break; case kIROp_ControlBarrier: @@ -7577,6 +7604,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex SpvInst* emitParam(SpvInstParent* parent, IRInst* inst) { + requireVariableBufferCapabilityIfNeeded(inst->getDataType()); auto paramSpvInst = emitOpFunctionParameter(parent, inst, inst->getFullType()); maybeEmitName(paramSpvInst, inst); maybeEmitPointerDecoration(paramSpvInst, inst); @@ -8153,7 +8181,7 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex SpvInst* emitGetOffsetPtr(SpvInstParent* parent, IRInst* inst) { - requireVariableBufferCapabilityIfNeeded(inst->getDataType()); + requirePtrAccessChainCapabilityIfNeeded(inst->getOperand(0)->getDataType()); return emitOpPtrAccessChain( parent, @@ -11254,13 +11282,56 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex requireSPIRVCapability(SpvCapabilityVariablePointersStorageBuffer); break; case AddressSpace::GroupShared: - ensureExtensionDeclaration(UnownedStringSlice("SPV_KHR_variable_pointers")); - requireSPIRVCapability(SpvCapabilityVariablePointers); + if (m_targetRequest->getTargetCaps().implies( + CapabilityAtom::SPV_KHR_variable_pointers)) + { + requireSPIRVVariablePointersCapability(); + } break; } } } + void requirePtrAccessChainCapabilityIfNeeded(IRInst* type) + { + requireVariableBufferCapabilityIfNeeded(type); + if (isWorkgroupPointerType(type)) + requireSPIRVVariablePointersCapability(); + } + + void requireSPIRVVariablePointersCapability() + { + ensureExtensionDeclaration(UnownedStringSlice("SPV_KHR_variable_pointers")); + requireSPIRVCapability(SpvCapabilityVariablePointers); + } + + bool isWorkgroupPointerType(IRInst* type) + { + if (auto ptrType = as(type)) + { + return ptrType->getAddressSpace() == AddressSpace::GroupShared; + } + return false; + } + + void requireFunctionTypeCapabilitiesIfNeeded(IRFuncType* funcType) + { + if (isWorkgroupPointerType(funcType->getResultType())) + { + requireSPIRVVariablePointersCapability(); + return; + } + + for (UInt pp = 0; pp < funcType->getParamCount(); ++pp) + { + if (isWorkgroupPointerType(funcType->getParamType(pp))) + { + requireSPIRVVariablePointersCapability(); + return; + } + } + } + // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpExecutionMode Dictionary> m_executionModes; template @@ -11345,6 +11416,8 @@ SlangResult emitSPIRVFromIR( SPIRVEmitContext context(irModule, codeGenContext->getTargetProgram(), sink); legalizeIRForSPIRV(&context, irModule, irEntryPoints, codeGenContext); + if (sink->getErrorCount() != 0) + return SLANG_FAIL; #if 0 { diff --git a/source/slang/slang-ir-spirv-legalize.cpp b/source/slang/slang-ir-spirv-legalize.cpp index d4c54dd7c75..bdbb7032a88 100644 --- a/source/slang/slang-ir-spirv-legalize.cpp +++ b/source/slang/slang-ir-spirv-legalize.cpp @@ -1,6 +1,7 @@ // slang-ir-spirv-legalize.cpp #include "slang-ir-spirv-legalize.h" +#include "slang-compiler.h" #include "slang-emit-base.h" #include "slang-ir-call-graph.h" #include "slang-ir-clone.h" @@ -2659,7 +2660,12 @@ struct SPIRVLegalizationContext : public SourceEmitterBase // For SPIR-V, we don't skip this validation, because we might then be generating // invalid SPIR-V. bool skipFuncParamValidation = false; - validateAtomicOperations(skipFuncParamValidation, m_sink, m_module->getModuleInst()); + bool validateSPIRVAtomics = true; + validateAtomicOperations( + skipFuncParamValidation, + m_sink, + m_module->getModuleInst(), + validateSPIRVAtomics); } void updateFunctionTypes() diff --git a/source/slang/slang-ir-util.cpp b/source/slang/slang-ir-util.cpp index 0a9514b9ab7..aa6882d464d 100644 --- a/source/slang/slang-ir-util.cpp +++ b/source/slang/slang-ir-util.cpp @@ -68,6 +68,19 @@ IRType* getMatrixElementType(IRType* type) return type; } +IRType* getAtomicOperationValueType(IRInst* inst) +{ + auto valueType = inst->getDataType(); + if (valueType && valueType->getOp() != kIROp_VoidType) + return valueType; + + IRBuilder builder(inst); + auto ptrValueType = tryGetPointedToType(&builder, inst->getOperand(0)->getDataType()); + if (auto atomicType = as(ptrValueType)) + return atomicType->getElementType(); + return ptrValueType; +} + Dictionary buildInterfaceRequirementDict(IRInterfaceType* interfaceType) { Dictionary result; diff --git a/source/slang/slang-ir-util.h b/source/slang/slang-ir-util.h index 5ddc8eb19dc..a3e9a349852 100644 --- a/source/slang/slang-ir-util.h +++ b/source/slang/slang-ir-util.h @@ -137,6 +137,9 @@ IRType* getVectorOrCoopMatrixElementType(IRType* type); // If `type` is a matrix, returns its element type. Otherwise, return `type`. IRType* getMatrixElementType(IRType* type); +// Returns the value type operated on by an atomic instruction. +IRType* getAtomicOperationValueType(IRInst* inst); + // True if type is a resource backing memory bool isResourceType(IRType* type); bool isOpaqueType(IRType* type, IRType** outLeafOpaqueHandleType); diff --git a/source/slang/slang-ir-validate.cpp b/source/slang/slang-ir-validate.cpp index fe68fc58717..9a960f6af30 100644 --- a/source/slang/slang-ir-validate.cpp +++ b/source/slang/slang-ir-validate.cpp @@ -526,7 +526,66 @@ static bool isValidAtomicDest(bool skipFuncParamValidation, IRInst* dst) return false; } -void validateAtomicOperations(bool skipFuncParamValidation, DiagnosticSink* sink, IRInst* inst) +static IRVectorType* getFp16VectorAtomicType(IRInst* inst) +{ + auto valueType = getAtomicOperationValueType(inst); + auto vectorType = as(valueType); + if (!vectorType || vectorType->getElementType()->getOp() != kIROp_HalfType) + return nullptr; + return vectorType; +} + +static void validateSPIRVFp16VectorAtomicOperation(DiagnosticSink* sink, IRInst* inst) +{ + auto vectorType = getFp16VectorAtomicType(inst); + if (!vectorType) + return; + + switch (inst->getOp()) + { + case kIROp_AtomicLoad: + case kIROp_AtomicStore: + case kIROp_AtomicCompareExchange: + sink->diagnose( + Diagnostics::SpirvFp16VectorAtomicUnsupportedOperation{.location = inst->sourceLoc}); + return; + + case kIROp_AtomicExchange: + case kIROp_AtomicAdd: + case kIROp_AtomicSub: + case kIROp_AtomicMin: + case kIROp_AtomicMax: + { + auto elementCountInst = as(vectorType->getElementCount()); + if (!elementCountInst) + { + sink->diagnose(Diagnostics::SpirvFp16VectorAtomicUnsupportedWidth{ + .location = inst->sourceLoc}); + return; + } + + auto elementCount = elementCountInst->getValue(); + if (elementCount != 2 && elementCount != 4) + { + sink->diagnose(Diagnostics::SpirvFp16VectorAtomicUnsupportedWidth{ + .location = inst->sourceLoc}); + return; + } + } + return; + + default: + sink->diagnose( + Diagnostics::SpirvFp16VectorAtomicUnsupportedOperation{.location = inst->sourceLoc}); + return; + } +} + +static void validateAtomicOperationsImpl( + bool skipFuncParamValidation, + DiagnosticSink* sink, + IRInst* inst, + bool validateSPIRVAtomics) { switch (inst->getOp()) { @@ -549,6 +608,8 @@ void validateAtomicOperations(bool skipFuncParamValidation, DiagnosticSink* sink sink->diagnose(Diagnostics::InvalidAtomicDestinationPointer{ .location = inst->sourceLoc, }); + if (validateSPIRVAtomics) + validateSPIRVFp16VectorAtomicOperation(sink, inst); } break; @@ -558,10 +619,24 @@ void validateAtomicOperations(bool skipFuncParamValidation, DiagnosticSink* sink for (auto child : inst->getModifiableChildren()) { - validateAtomicOperations(skipFuncParamValidation, sink, child); + validateAtomicOperationsImpl(skipFuncParamValidation, sink, child, validateSPIRVAtomics); } } +void validateAtomicOperations(bool skipFuncParamValidation, DiagnosticSink* sink, IRInst* inst) +{ + validateAtomicOperationsImpl(skipFuncParamValidation, sink, inst, false); +} + +void validateAtomicOperations( + bool skipFuncParamValidation, + DiagnosticSink* sink, + IRInst* inst, + bool validateSPIRVAtomics) +{ + validateAtomicOperationsImpl(skipFuncParamValidation, sink, inst, validateSPIRVAtomics); +} + static void validateVectorOrMatrixElementType( DiagnosticSink* sink, SourceLoc sourceLoc, diff --git a/source/slang/slang-ir-validate.h b/source/slang/slang-ir-validate.h index f7ce0b05d8c..c0e8bea4085 100644 --- a/source/slang/slang-ir-validate.h +++ b/source/slang/slang-ir-validate.h @@ -80,6 +80,14 @@ class [[nodiscard]] IRValidationScope // lead back to in/inout parameters that we can't validate. void validateAtomicOperations(bool skipFuncParamValidation, DiagnosticSink* sink, IRInst* inst); +// If 'validateSPIRVAtomics' is true, also reject fp16 vector atomic operations +// and widths that cannot be represented by the SPIR-V NV vector atomic extension. +void validateAtomicOperations( + bool skipFuncParamValidation, + DiagnosticSink* sink, + IRInst* inst, + bool validateSPIRVAtomics); + // Overload that takes IRModule* first for use with SLANG_PASS macro void validateAtomicOperations(IRModule* module, bool skipFuncParamValidation, DiagnosticSink* sink); diff --git a/source/slang/slang-type-layout.cpp b/source/slang/slang-type-layout.cpp index 7bebfbbf3cf..34eb2c9b9bd 100644 --- a/source/slang/slang-type-layout.cpp +++ b/source/slang/slang-type-layout.cpp @@ -3499,21 +3499,7 @@ static void maybePromoteDescriptorHandleCapability(TargetRequest* targetReq) return; auto& targetOptionSet = targetReq->getOptionSet(); - bool specificProfileRequested = - targetOptionSet.hasOption(CompilerOptionName::Profile) && - (targetOptionSet.getIntOption(CompilerOptionName::Profile) != SLANG_PROFILE_UNKNOWN); - bool specificCapabilityRequested = false; - for (auto atomVal : targetOptionSet.getArray(CompilerOptionName::Capability)) - { - if ((atomVal.kind == CompilerOptionValueKind::Int && - atomVal.intValue != SLANG_CAPABILITY_UNKNOWN) || - atomVal.kind == CompilerOptionValueKind::String) - { - specificCapabilityRequested = true; - break; - } - } - if (!specificProfileRequested && !specificCapabilityRequested) + if (!isSpecificProfileOrCapabilityRequested(targetOptionSet)) { auto targetCaps = targetReq->getTargetCaps(); targetCaps.addUnexpandedCapabilites(CapabilityName::descriptor_handle); diff --git a/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang new file mode 100644 index 00000000000..fc288772d49 --- /dev/null +++ b/tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics-capability.slang @@ -0,0 +1,367 @@ +// Scalar/vector fp16 add checks. `NO_FP16_ATOMIC` has no fp16 atomic capability; +// `VECTOR_F16` has the vector capability, which implies scalar spvAtomicFloat16AddEXT. +//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME_SCALAR):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16AddEXT -render-feature atomic-half -output-using-type -xslang -DTEST_RUNTIME_SCALAR +//TEST:SIMPLE(filecheck=SCALAR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT +//DIAGNOSTIC_TEST:SIMPLE(diag=NO_FP16_ATOMIC,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -capability spirv_1_5 +//TEST:SIMPLE(filecheck=VECTOR_F16): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV + +// Direct half-vector `__atomic_*` checks. +// RHI does not yet expose a feature gate for VK_NV_shader_atomic_float16_vector, so these +// runtime checks stay disabled while the active tests below cover SPIR-V codegen/diagnostics. +//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME_DIRECT):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16VectorNV -output-using-type -xslang -DTEST_RUNTIME_DIRECT +//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME_DIRECT4):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16VectorNV -output-using-type -xslang -DTEST_RUNTIME_DIRECT_VECTOR4 +//DIAGNOSTIC_TEST:SIMPLE(diag=UNSUPPORTED_VECTOR_WIDTH,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_UNSUPPORTED_VECTOR_WIDTH +//DIAGNOSTIC_TEST:SIMPLE(diag=UNSUPPORTED_VECTOR_COMPARE_EXCHANGE,non-exhaustive): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_UNSUPPORTED_VECTOR_COMPARE_EXCHANGE + +// Emulated fp16 vector checks. +// Runtime coverage for the pointer-form helpers would require SPIR-V variable pointers, +// which existing tests keep disabled on current GCP runners. +//TEST:SIMPLE(filecheck=POINTER_EMULATED): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -skip-spirv-validation -capability spvAtomicFloat16VectorNV -DTEST_POINTER_EMULATED +//TEST:SIMPLE(filecheck=POINTER_F16X2): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_POINTER_F16X2 + +// Positive codegen checks for ignored capabilities, scalar/vector SPIR-V, and CUDA. +//TEST:SIMPLE(filecheck=IGNORE_CAPS): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -restrictive-capability-check -ignore-capabilities -capability spirv_1_5 +//TEST:SIMPLE(filecheck=BOTH): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16AddEXT -capability spvAtomicFloat16VectorNV +//TEST:SIMPLE(filecheck=VECTOR): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_ATOMIC +//TEST:SIMPLE(filecheck=VECTOR_SUB): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_SUB +//TEST:SIMPLE(filecheck=VECTOR4): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR4_ATOMIC +//TEST:SIMPLE(filecheck=VECTOR4_SUB): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR4_SUB +//TEST:SIMPLE(filecheck=VECTOR4_MIN_MAX): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR4_MIN_MAX +//TEST:SIMPLE(filecheck=VECTOR4_EXCHANGE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR4_EXCHANGE +//TEST:SIMPLE(filecheck=VECTOR_MIN_MAX): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_MIN_MAX +//TEST:SIMPLE(filecheck=VECTOR_EXCHANGE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -capability spvAtomicFloat16VectorNV -DTEST_DIRECT_VECTOR_EXCHANGE +//TEST:SIMPLE(filecheck=SCALAR_EXCHANGE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DTEST_DIRECT_SCALAR_EXCHANGE +//TEST:SIMPLE(filecheck=SCALAR_HALF_EXCHANGE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DTEST_DIRECT_SCALAR_HALF_EXCHANGE +//TEST:SIMPLE(filecheck=EMULATED): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DTEST_EMULATED +//TEST:SIMPLE(filecheck=CUDA): -target cuda -entry computeMain -stage compute -capability cuda_sm_7_0 + +//TEST_INPUT:ubuffer(stride=4, count=8):name=tmpBuffer +RWByteAddressBuffer tmpBuffer; +//TEST_INPUT:ubuffer(stride=4, count=32):out,name=outputBuffer +RWStructuredBuffer outputBuffer; +//TEST_INPUT:ubuffer(stride=2, count=4):name=halfBuffer +RWStructuredBuffer halfBuffer; +//TEST_INPUT:ubuffer(stride=4, count=4):name=vectorBuffer +RWStructuredBuffer vectorBuffer; +//TEST_INPUT:ubuffer(stride=8, count=4):name=vector4Buffer +RWStructuredBuffer vector4Buffer; +#ifdef TEST_UNSUPPORTED_VECTOR_WIDTH +RWStructuredBuffer unsupportedVectorBuffer; +#endif +//TEST_INPUT:ubuffer(stride=4, count=1):name=uintBuffer +RWStructuredBuffer uintBuffer; + +[numthreads(1, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ +#ifdef TEST_RUNTIME_SCALAR + half originalLow; + half originalHigh; + tmpBuffer.Store(0, 2.0h); + tmpBuffer.Store(2, 4.0h); + tmpBuffer.InterlockedAddF16(0, 1.0h, originalLow); + tmpBuffer.InterlockedAddF16(2, 3.0h, originalHigh); + + outputBuffer[0] = float(originalLow); + outputBuffer[1] = float(originalHigh); + outputBuffer[2] = float(tmpBuffer.Load(0)); + outputBuffer[3] = float(tmpBuffer.Load(2)); + // RUNTIME_SCALAR: type: float + // RUNTIME_SCALAR-NEXT: 2.000000 + // RUNTIME_SCALAR-NEXT: 4.000000 + // RUNTIME_SCALAR-NEXT: 3.000000 + // RUNTIME_SCALAR-NEXT: 7.000000 +#elif defined(TEST_RUNTIME_DIRECT) + vectorBuffer[0] = half2(2.0h, 4.0h); + vectorBuffer[1] = half2(5.0h, 1.0h); + vectorBuffer[2] = half2(5.0h, 1.0h); + vectorBuffer[3] = half2(8.0h, 9.0h); + + half2 originalAdd = __atomic_add(vectorBuffer[0], half2(1.0h, 3.0h)); + half2 originalMin = __atomic_min(vectorBuffer[1], half2(3.0h, 2.0h)); + half2 originalMax = __atomic_max(vectorBuffer[2], half2(3.0h, 2.0h)); + half2 originalExchange = __atomic_exchange(vectorBuffer[3], half2(6.0h, 7.0h)); + + outputBuffer[0] = float(originalAdd.x); + outputBuffer[1] = float(originalAdd.y); + outputBuffer[2] = float(vectorBuffer[0].x); + outputBuffer[3] = float(vectorBuffer[0].y); + outputBuffer[4] = float(originalMin.x); + outputBuffer[5] = float(originalMin.y); + outputBuffer[6] = float(vectorBuffer[1].x); + outputBuffer[7] = float(vectorBuffer[1].y); + outputBuffer[8] = float(originalMax.x); + outputBuffer[9] = float(originalMax.y); + outputBuffer[10] = float(vectorBuffer[2].x); + outputBuffer[11] = float(vectorBuffer[2].y); + outputBuffer[12] = float(originalExchange.x); + outputBuffer[13] = float(originalExchange.y); + outputBuffer[14] = float(vectorBuffer[3].x); + outputBuffer[15] = float(vectorBuffer[3].y); + // RUNTIME_DIRECT: type: float + // RUNTIME_DIRECT-NEXT: 2.000000 + // RUNTIME_DIRECT-NEXT: 4.000000 + // RUNTIME_DIRECT-NEXT: 3.000000 + // RUNTIME_DIRECT-NEXT: 7.000000 + // RUNTIME_DIRECT-NEXT: 5.000000 + // RUNTIME_DIRECT-NEXT: 1.000000 + // RUNTIME_DIRECT-NEXT: 3.000000 + // RUNTIME_DIRECT-NEXT: 1.000000 + // RUNTIME_DIRECT-NEXT: 5.000000 + // RUNTIME_DIRECT-NEXT: 1.000000 + // RUNTIME_DIRECT-NEXT: 5.000000 + // RUNTIME_DIRECT-NEXT: 2.000000 + // RUNTIME_DIRECT-NEXT: 8.000000 + // RUNTIME_DIRECT-NEXT: 9.000000 + // RUNTIME_DIRECT-NEXT: 6.000000 + // RUNTIME_DIRECT-NEXT: 7.000000 +#elif defined(TEST_RUNTIME_DIRECT_VECTOR4) + vector4Buffer[0] = half4(2.0h, 4.0h, 6.0h, 8.0h); + + half4 originalAdd = __atomic_add(vector4Buffer[0], half4(1.0h, 2.0h, 3.0h, 4.0h)); + + outputBuffer[0] = float(originalAdd.x); + outputBuffer[1] = float(originalAdd.y); + outputBuffer[2] = float(originalAdd.z); + outputBuffer[3] = float(originalAdd.w); + outputBuffer[4] = float(vector4Buffer[0].x); + outputBuffer[5] = float(vector4Buffer[0].y); + outputBuffer[6] = float(vector4Buffer[0].z); + outputBuffer[7] = float(vector4Buffer[0].w); + // RUNTIME_DIRECT4: type: float + // RUNTIME_DIRECT4-NEXT: 2.000000 + // RUNTIME_DIRECT4-NEXT: 4.000000 + // RUNTIME_DIRECT4-NEXT: 6.000000 + // RUNTIME_DIRECT4-NEXT: 8.000000 + // RUNTIME_DIRECT4-NEXT: 3.000000 + // RUNTIME_DIRECT4-NEXT: 6.000000 + // RUNTIME_DIRECT4-NEXT: 9.000000 + // RUNTIME_DIRECT4-NEXT: 12.000000 +#elif defined(TEST_DIRECT_VECTOR_ATOMIC) + __atomic_add(vectorBuffer[0], half2(1.0h, 2.0h)); +#elif defined(TEST_DIRECT_VECTOR_SUB) + __atomic_sub(vectorBuffer[0], half2(1.0h, 2.0h)); +#elif defined(TEST_DIRECT_VECTOR4_ATOMIC) + __atomic_add(vector4Buffer[0], half4(1.0h, 2.0h, 3.0h, 4.0h)); +#elif defined(TEST_DIRECT_VECTOR4_SUB) + __atomic_sub(vector4Buffer[0], half4(1.0h, 2.0h, 3.0h, 4.0h)); +#elif defined(TEST_DIRECT_VECTOR4_MIN_MAX) + __atomic_min(vector4Buffer[0], half4(1.0h, 2.0h, 3.0h, 4.0h)); + __atomic_max(vector4Buffer[1], half4(5.0h, 6.0h, 7.0h, 8.0h)); +#elif defined(TEST_DIRECT_VECTOR4_EXCHANGE) + half4 originalValue = __atomic_exchange(vector4Buffer[0], half4(1.0h, 2.0h, 3.0h, 4.0h)); + outputBuffer[0] = float(originalValue.x + originalValue.y + originalValue.z + originalValue.w); +#elif defined(TEST_DIRECT_VECTOR_MIN_MAX) + __atomic_min(vectorBuffer[0], half2(1.0h, 2.0h)); + __atomic_max(vectorBuffer[1], half2(3.0h, 4.0h)); +#elif defined(TEST_DIRECT_VECTOR_EXCHANGE) + half2 originalValue = __atomic_exchange(vectorBuffer[0], half2(1.0h, 2.0h)); + outputBuffer[0] = float(originalValue.x + originalValue.y); +#elif defined(TEST_DIRECT_SCALAR_EXCHANGE) + uint originalValue = __atomic_exchange(uintBuffer[0], 1u); + outputBuffer[0] = float(originalValue); +#elif defined(TEST_DIRECT_SCALAR_HALF_EXCHANGE) + half originalValue = __atomic_exchange(halfBuffer[0], 1.0h); + outputBuffer[0] = float(originalValue); +#elif defined(TEST_UNSUPPORTED_VECTOR_WIDTH) + __atomic_add(unsupportedVectorBuffer[0], half3(1.0h, 2.0h, 3.0h)); +// UNSUPPORTED_VECTOR_WIDTH: error[E50013]: invalid SPIR-V fp16 vector atomic width +// UNSUPPORTED_VECTOR_WIDTH: SPIR-V fp16 vector atomics only support half2 and half4. +#elif defined(TEST_UNSUPPORTED_VECTOR_COMPARE_EXCHANGE) + half2 originalValue = __atomic_compare_exchange( + vectorBuffer[0], + half2(1.0h, 2.0h), + half2(3.0h, 4.0h)); +// UNSUPPORTED_VECTOR_COMPARE_EXCHANGE: error[E50014]: invalid SPIR-V fp16 vector atomic operation +// UNSUPPORTED_VECTOR_COMPARE_EXCHANGE: SPIR-V fp16 vector atomics only support add, sub, min, max, and exchange operations. + outputBuffer[0] = float(originalValue.x + originalValue.y); +// Half-vector atomic load/store are not source-expressible today: Atomic requires +// scalar IAtomicable types, while half2/half4 do not conform to that interface. The +// validator rejects those IR ops defensively if a later lowering path creates them. +#elif defined(TEST_EMULATED) + half originalValue; + tmpBuffer.InterlockedAddF16Emulated(0, 1.0h, originalValue); + outputBuffer[0] = float(originalValue); + tmpBuffer.InterlockedAddF16Emulated(2, 1.0h, originalValue); + outputBuffer[1] = float(originalValue); +#elif defined(TEST_POINTER_EMULATED) + half originalValue; + InterlockedAddF16Emulated(&halfBuffer[0], 1.0h, originalValue); + outputBuffer[0] = float(originalValue); +#elif defined(TEST_POINTER_F16X2) + half2 originalValue; + InterlockedAddF16x2(&vectorBuffer[0], half2(1.0h, 2.0h), originalValue); + outputBuffer[0] = float(originalValue.x + originalValue.y); +#else + half originalValue; + tmpBuffer.InterlockedAddF16(0, 1.0h, originalValue); +// NO_FP16_ATOMIC: entry point uses capabilities not in specified profile +// NO_FP16_ATOMIC: Missing capabilities are: 'spvAtomicFloat16AddEXT' +// IGNORE_CAPS-NOT: entry point uses capabilities not in specified profile + outputBuffer[0] = float(originalValue); + tmpBuffer.InterlockedAddF16(2, 1.0h, originalValue); + outputBuffer[1] = float(originalValue); + tmpBuffer.InterlockedAddF16(4, 1.0h, originalValue); + outputBuffer[2] = float(originalValue); + tmpBuffer.InterlockedAddF16(6, 1.0h, originalValue); + outputBuffer[3] = float(originalValue); + + uint dynamicByteAddress = (dispatchThreadID.x & 1) * 2; + tmpBuffer.InterlockedAddF16(dynamicByteAddress, 1.0h, originalValue); + outputBuffer[4] = float(originalValue); +#endif +} + +// SCALAR-NOT: OpCapability AtomicFloat16VectorNV +// SCALAR-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// SCALAR: OpCapability AtomicFloat16AddEXT +// SCALAR: OpExtension "SPV_EXT_shader_atomic_float16_add" +// SCALAR-NOT: OpAtomicFAddEXT %v2half +// SCALAR-COUNT-5: OpAtomicFAddEXT %half +// SCALAR-NOT: OpAtomicFAddEXT %v2half +// SCALAR-NOT: OpCapability AtomicFloat16VectorNV +// SCALAR-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" + +// IGNORE_CAPS-NOT: entry point uses capabilities not in specified profile +// IGNORE_CAPS: OpCapability AtomicFloat16AddEXT +// IGNORE_CAPS: OpExtension "SPV_EXT_shader_atomic_float16_add" +// IGNORE_CAPS-COUNT-5: OpAtomicFAddEXT %half +// IGNORE_CAPS-NOT: OpAtomicFAddEXT %v2half + +// VECTOR_F16-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR_F16-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// VECTOR_F16: OpCapability AtomicFloat16VectorNV +// VECTOR_F16: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// VECTOR_F16-NOT: OpAtomicFAddEXT %half +// VECTOR_F16-DAG: [[LOW:%[0-9]+]] = OpConstantComposite %v2half %half_0x1p_0 %half_0x0p_0 +// VECTOR_F16-DAG: [[HIGH:%[0-9]+]] = OpConstantComposite %v2half %half_0x0p_0 %half_0x1p_0 +// VECTOR_F16: [[LOW_ATOMIC:%[0-9]+]] = OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[LOW]] +// VECTOR_F16: OpCompositeExtract %half [[LOW_ATOMIC]] 0 +// VECTOR_F16: [[HIGH_ATOMIC:%[0-9]+]] = OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[HIGH]] +// VECTOR_F16: OpCompositeExtract %half [[HIGH_ATOMIC]] 1 +// VECTOR_F16-COUNT-4: OpAtomicFAddEXT %v2half +// VECTOR_F16-NOT: OpAtomicFAddEXT %half + +// BOTH-NOT: OpCapability AtomicFloat16AddEXT +// BOTH-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// BOTH: OpCapability AtomicFloat16VectorNV +// BOTH: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// BOTH-NOT: OpAtomicFAddEXT %half +// BOTH-COUNT-6: OpAtomicFAddEXT %v2half +// BOTH-NOT: OpAtomicFAddEXT %half +// BOTH-NOT: OpAtomicCompareExchange + +// VECTOR-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// VECTOR: OpCapability AtomicFloat16VectorNV +// VECTOR: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// VECTOR: OpAtomicFAddEXT %v2half + +// VECTOR_SUB-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR_SUB-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// VECTOR_SUB: OpCapability AtomicFloat16VectorNV +// VECTOR_SUB: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// VECTOR_SUB-DAG: [[VALUE:%[0-9]+]] = OpConstantComposite %v2half {{%[A-Za-z0-9_]+}} {{%[A-Za-z0-9_]+}} +// VECTOR_SUB: [[NEG_VALUE:%[0-9]+]] = OpFNegate %v2half [[VALUE]] +// VECTOR_SUB: OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[NEG_VALUE]] + +// VECTOR4-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR4-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// VECTOR4: OpCapability AtomicFloat16VectorNV +// VECTOR4: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// VECTOR4: OpAtomicFAddEXT %v4half +// VECTOR4-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR4-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" + +// VECTOR4_SUB-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR4_SUB-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// VECTOR4_SUB: OpCapability AtomicFloat16VectorNV +// VECTOR4_SUB: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// VECTOR4_SUB-DAG: [[VALUE:%[0-9]+]] = OpConstantComposite %v4half {{%[A-Za-z0-9_]+}} {{%[A-Za-z0-9_]+}} {{%[A-Za-z0-9_]+}} {{%[A-Za-z0-9_]+}} +// VECTOR4_SUB: [[NEG_VALUE:%[0-9]+]] = OpFNegate %v4half [[VALUE]] +// VECTOR4_SUB: OpAtomicFAddEXT %v4half {{%[0-9]+}} %uint_1 %uint_0 [[NEG_VALUE]] + +// VECTOR4_MIN_MAX-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR4_MIN_MAX-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// VECTOR4_MIN_MAX: OpCapability AtomicFloat16VectorNV +// VECTOR4_MIN_MAX: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// VECTOR4_MIN_MAX: OpAtomicFMinEXT %v4half +// VECTOR4_MIN_MAX: OpAtomicFMaxEXT %v4half + +// VECTOR4_EXCHANGE-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR4_EXCHANGE-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// VECTOR4_EXCHANGE: OpCapability AtomicFloat16VectorNV +// VECTOR4_EXCHANGE: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// VECTOR4_EXCHANGE: OpAtomicExchange %v4half + +// VECTOR_MIN_MAX-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR_MIN_MAX-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// VECTOR_MIN_MAX: OpCapability AtomicFloat16VectorNV +// VECTOR_MIN_MAX: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// VECTOR_MIN_MAX: OpAtomicFMinEXT %v2half +// VECTOR_MIN_MAX: OpAtomicFMaxEXT %v2half +// VECTOR_MIN_MAX-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR_MIN_MAX-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" + +// VECTOR_EXCHANGE-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR_EXCHANGE-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// VECTOR_EXCHANGE: OpCapability AtomicFloat16VectorNV +// VECTOR_EXCHANGE: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// VECTOR_EXCHANGE: OpAtomicExchange %v2half +// VECTOR_EXCHANGE-NOT: OpCapability AtomicFloat16AddEXT +// VECTOR_EXCHANGE-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" + +// SCALAR_EXCHANGE-NOT: OpCapability AtomicFloat16VectorNV +// SCALAR_EXCHANGE-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// SCALAR_EXCHANGE: OpAtomicExchange %uint +// SCALAR_EXCHANGE-NOT: OpCapability AtomicFloat16VectorNV +// SCALAR_EXCHANGE-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" + +// SCALAR_HALF_EXCHANGE-NOT: OpCapability AtomicFloat16VectorNV +// SCALAR_HALF_EXCHANGE-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// SCALAR_HALF_EXCHANGE: OpAtomicExchange %half +// SCALAR_HALF_EXCHANGE-NOT: OpCapability AtomicFloat16VectorNV +// SCALAR_HALF_EXCHANGE-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" + +// EMULATED-NOT: OpCapability AtomicFloat16AddEXT +// EMULATED-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// EMULATED: OpCapability AtomicFloat16VectorNV +// EMULATED: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// EMULATED-DAG: [[LOW:%[0-9]+]] = OpConstantComposite %v2half %half_0x1p_0 %half_0x0p_0 +// EMULATED-DAG: [[HIGH:%[0-9]+]] = OpConstantComposite %v2half %half_0x0p_0 %half_0x1p_0 +// EMULATED-NOT: OpAtomicFAddEXT +// EMULATED: [[LOW_ATOMIC:%[0-9]+]] = OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[LOW]] +// EMULATED: OpCompositeExtract %half [[LOW_ATOMIC]] 0 +// EMULATED: [[HIGH_ATOMIC:%[0-9]+]] = OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[HIGH]] +// EMULATED: OpCompositeExtract %half [[HIGH_ATOMIC]] 1 +// EMULATED-NOT: OpAtomicFAddEXT + +// POINTER_EMULATED-DAG: OpCapability VariablePointersStorageBuffer +// POINTER_EMULATED-DAG: OpCapability AtomicFloat16VectorNV +// POINTER_EMULATED-DAG: OpExtension "SPV_KHR_variable_pointers" +// POINTER_EMULATED-DAG: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// POINTER_EMULATED-NOT: OpCapability AtomicFloat16AddEXT +// POINTER_EMULATED-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// POINTER_EMULATED-DAG: [[LOW:%[0-9]+]] = OpConstantComposite %v2half %half_0x1p_0 %half_0x0p_0 +// POINTER_EMULATED-DAG: [[HIGH:%[0-9]+]] = OpConstantComposite %v2half %half_0x0p_0 %half_0x1p_0 +// POINTER_EMULATED: [[LOW_ATOMIC:%[0-9]+]] = OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[LOW]] +// POINTER_EMULATED: OpCompositeExtract %half [[LOW_ATOMIC]] 0 +// POINTER_EMULATED: [[HIGH_ATOMIC:%[0-9]+]] = OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[HIGH]] +// POINTER_EMULATED: OpCompositeExtract %half [[HIGH_ATOMIC]] 1 +// POINTER_EMULATED-NOT: OpAtomicFAddEXT + +// POINTER_F16X2-DAG: OpCapability VariablePointersStorageBuffer +// POINTER_F16X2-DAG: OpCapability AtomicFloat16VectorNV +// POINTER_F16X2-DAG: OpExtension "SPV_KHR_variable_pointers" +// POINTER_F16X2-DAG: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// POINTER_F16X2-NOT: OpCapability AtomicFloat16AddEXT +// POINTER_F16X2-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// POINTER_F16X2-DAG: [[VALUE:%[0-9]+]] = OpConstantComposite %v2half %half_0x1p_0 %half_0x1p_1 +// POINTER_F16X2: OpAtomicFAddEXT %v2half {{%[0-9]+}} %uint_1 %uint_0 [[VALUE]] +// POINTER_F16X2-NOT: OpAtomicFAddEXT + +// CUDA-NOT: atomicAdd((half2 * +// CUDA-NOT: atomicAdd((__half2 * +// CUDA-COUNT-5: atomicAdd((&{{.*}}), __half(1.0)) +// CUDA-NOT: atomicAdd((half2 * +// CUDA-NOT: atomicAdd((__half2 * diff --git a/tests/language-feature/pointer/ptr-to-groupshared.slang b/tests/language-feature/pointer/ptr-to-groupshared.slang index c4b6ae4ac8b..7d0c874f5f4 100644 --- a/tests/language-feature/pointer/ptr-to-groupshared.slang +++ b/tests/language-feature/pointer/ptr-to-groupshared.slang @@ -1,15 +1,35 @@ //TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type -emit-spirv-directly -Xslang -g0 +//TEST:SIMPLE(filecheck=SPIRV_INLINE): -target spirv-asm -entry computeMain -stage compute -emit-spirv-directly -g0 +//TEST:SIMPLE(filecheck=SPIRV_VARPTR): -target spirv-asm -entry computeMainNoInline -stage compute -emit-spirv-directly -g0 -capability SPV_KHR_variable_pointers -DTEST_NOINLINE_GROUPSHARED_PTR +//TEST:SIMPLE(filecheck=SPIRV_NOINLINE): -target spirv-asm -entry computeMainNoInline -stage compute -emit-spirv-directly -g0 -DTEST_NOINLINE_GROUPSHARED_PTR +//TEST:SIMPLE(filecheck=SPIRV_NOINLINE_RESTRICTIVE): -target spirv-asm -entry computeMainNoInline -stage compute -emit-spirv-directly -g0 -restrictive-capability-check -capability spirv_1_5 -DTEST_NOINLINE_GROUPSHARED_PTR // By default slang-test uses `-g` and it requires `VariablePointers`, which // doesn't produce the correct result due to the bug on the graphics driver. // Tracked by github issue #9061 //DISABLE_TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -output-using-type -emit-spirv-directly -// Tests if we handle passing groupshared address-space pointers correctly to a function -// when that data-type needs legalization (Data -> Data_natural due to `lower-buffer-element-type`). +// Tests if we handle groupshared address-space pointers correctly when that data-type needs +// legalization (Data -> Data_natural due to `lower-buffer-element-type`). The active runtime case +// stays force-inlined because VariablePointers produces wrong results on the CI Vulkan driver. // CHECK: 1 // CHECK-NEXT: 2 // CHECK-NEXT: 0 +// SPIRV_INLINE-NOT: OpCapability VariablePointers +// SPIRV_INLINE: OpMemoryModel +// SPIRV_VARPTR: OpCapability VariablePointers +// SPIRV_VARPTR: OpExtension "SPV_KHR_variable_pointers" +// SPIRV_VARPTR: OpMemoryModel +// SPIRV_VARPTR: OpTypeFunction %void %_ptr_Workgroup +// SPIRV_NOINLINE: OpCapability VariablePointers +// SPIRV_NOINLINE: OpExtension "SPV_KHR_variable_pointers" +// SPIRV_NOINLINE: OpMemoryModel +// SPIRV_NOINLINE: OpTypeFunction %void %_ptr_Workgroup +// SPIRV_NOINLINE_RESTRICTIVE-NOT: entry point uses capabilities +// SPIRV_NOINLINE_RESTRICTIVE: OpCapability VariablePointers +// SPIRV_NOINLINE_RESTRICTIVE: OpExtension "SPV_KHR_variable_pointers" +// SPIRV_NOINLINE_RESTRICTIVE: OpMemoryModel +// SPIRV_NOINLINE_RESTRICTIVE: OpTypeFunction %void %_ptr_Workgroup struct Data { @@ -21,6 +41,7 @@ struct Data uniform int* outputBuffer; groupshared Data shared; +[ForceInline] void foo(Ptr ptr) { outputBuffer[0] = ptr.value1; @@ -33,3 +54,19 @@ void computeMain(uint3 group_thread_id: SV_GroupThreadID) shared = Data(1, 2); foo(__getAddress(shared)); } + +#ifdef TEST_NOINLINE_GROUPSHARED_PTR +[noinline] +void noinlineFoo(Ptr ptr) +{ + outputBuffer[0] = ptr.value1; + outputBuffer[1] = ptr.value2; +} + +[numthreads(3, 1, 1)] +void computeMainNoInline(uint3 group_thread_id: SV_GroupThreadID) +{ + shared = Data(1, 2); + noinlineFoo(__getAddress(shared)); +} +#endif diff --git a/tests/spirv/atomic-float16-vector.slang b/tests/spirv/atomic-float16-vector.slang index f9602a948a7..f73610f0234 100644 --- a/tests/spirv/atomic-float16-vector.slang +++ b/tests/spirv/atomic-float16-vector.slang @@ -1,22 +1,89 @@ -//TEST:SIMPLE(filecheck=CHECK):-target spirv -entry computeMain -stage compute -emit-spirv-directly - -//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):out,name=outputBuffer -RWStructuredBuffer outputBuffer; - -//TEST_INPUT:ubuffer(data=[0 0 0 0], stride=4):name=workBuffer -RWStructuredBuffer workBuffer; - -[numthreads(1, 1, 1)] -void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) -{ - half2 originalValue; - - // Direct atomic operation on half2 should trigger the SPV_NV_shader_atomic_fp16_vector extension - originalValue = __atomic_add(workBuffer[0], half2(1.0h, 2.0h)); - - outputBuffer[0] = float(originalValue.x); - outputBuffer[1] = float(originalValue.y); -} - -// CHECK: OpCapability AtomicFloat16VectorNV -// CHECK: OpExtension "SPV_NV_shader_atomic_fp16_vector" +//TEST:SIMPLE(filecheck=CHECK):-target spirv -entry computeMain -stage compute -emit-spirv-directly +// RHI does not yet expose a feature gate for VK_NV_shader_atomic_float16_vector, so this runtime +// check stays disabled while the active SIMPLE test verifies SPIR-V capability and instruction use. +//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=RUNTIME):-vk -compute -entry computeMain -emit-spirv-directly -capability spvAtomicFloat16VectorNV -output-using-type + +//TEST_INPUT:ubuffer(stride=4, count=12):out,name=outputBuffer +RWStructuredBuffer outputBuffer; + +//TEST_INPUT:ubuffer(stride=4, count=4):name=workBuffer +RWStructuredBuffer workBuffer; +//TEST_INPUT:ubuffer(stride=8, count=4):name=workBuffer4 +RWStructuredBuffer workBuffer4; + +[numthreads(1, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + workBuffer[0] = half2(2.0h, 4.0h); + workBuffer[1] = half2(5.0h, 1.0h); + workBuffer[2] = half2(5.0h, 1.0h); + workBuffer[3] = half2(8.0h, 9.0h); + + workBuffer4[0] = half4(1.0h, 2.0h, 3.0h, 4.0h); + workBuffer4[1] = half4(5.0h, 1.0h, 7.0h, 2.0h); + workBuffer4[2] = half4(5.0h, 1.0h, 7.0h, 2.0h); + workBuffer4[3] = half4(8.0h, 9.0h, 10.0h, 11.0h); + + half2 originalAdd; + half2 originalMin; + half2 originalMax; + half2 originalExchange; + half4 originalAdd4; + half4 originalMin4; + half4 originalMax4; + half4 originalExchange4; + + // Direct atomic operation on half2 should trigger the SPV_NV_shader_atomic_fp16_vector extension + originalAdd = __atomic_add(workBuffer[0], half2(1.0h, 2.0h)); + originalMin = __atomic_min(workBuffer[1], half2(1.0h, 2.0h)); + originalMax = __atomic_max(workBuffer[2], half2(1.0h, 2.0h)); + originalExchange = __atomic_exchange(workBuffer[3], half2(3.0h, 4.0h)); + originalAdd4 = __atomic_add(workBuffer4[0], half4(1.0h, 2.0h, 3.0h, 4.0h)); + originalMin4 = __atomic_min(workBuffer4[1], half4(1.0h, 2.0h, 3.0h, 4.0h)); + originalMax4 = __atomic_max(workBuffer4[2], half4(1.0h, 2.0h, 3.0h, 4.0h)); + originalExchange4 = __atomic_exchange(workBuffer4[3], half4(5.0h, 6.0h, 7.0h, 8.0h)); + + half4 original4Sum = originalAdd4 + originalMin4 + originalMax4 + originalExchange4; + half4 final4Sum = workBuffer4[0] + workBuffer4[1] + workBuffer4[2] + workBuffer4[3]; + + outputBuffer[0] = float(originalAdd.x + originalMin.x + originalMax.x + originalExchange.x); + outputBuffer[1] = float(originalAdd.y + originalMin.y + originalMax.y + originalExchange.y); + outputBuffer[2] = float(original4Sum.x); + outputBuffer[3] = float(original4Sum.y); + outputBuffer[4] = float(original4Sum.z); + outputBuffer[5] = float(original4Sum.w); + outputBuffer[6] = float(workBuffer[0].x + workBuffer[1].x + workBuffer[2].x + workBuffer[3].x); + outputBuffer[7] = float(workBuffer[0].y + workBuffer[1].y + workBuffer[2].y + workBuffer[3].y); + outputBuffer[8] = float(final4Sum.x); + outputBuffer[9] = float(final4Sum.y); + outputBuffer[10] = float(final4Sum.z); + outputBuffer[11] = float(final4Sum.w); + + // RUNTIME: type: float + // RUNTIME-NEXT: 20.000000 + // RUNTIME-NEXT: 15.000000 + // RUNTIME-NEXT: 19.000000 + // RUNTIME-NEXT: 13.000000 + // RUNTIME-NEXT: 27.000000 + // RUNTIME-NEXT: 19.000000 + // RUNTIME-NEXT: 12.000000 + // RUNTIME-NEXT: 13.000000 + // RUNTIME-NEXT: 13.000000 + // RUNTIME-NEXT: 13.000000 + // RUNTIME-NEXT: 23.000000 + // RUNTIME-NEXT: 22.000000 +} + +// CHECK-NOT: OpCapability AtomicFloat16AddEXT +// CHECK: OpCapability AtomicFloat16VectorNV +// CHECK-NOT: OpCapability AtomicFloat16AddEXT +// CHECK: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// CHECK-NOT: OpExtension "SPV_EXT_shader_atomic_float16_add" +// CHECK-COUNT-1: OpAtomicFAddEXT %v2half +// CHECK-COUNT-1: OpAtomicFMinEXT %v2half +// CHECK-COUNT-1: OpAtomicFMaxEXT %v2half +// CHECK-COUNT-1: OpAtomicExchange %v2half +// CHECK-COUNT-1: OpAtomicFAddEXT %v4half +// CHECK-COUNT-1: OpAtomicFMinEXT %v4half +// CHECK-COUNT-1: OpAtomicFMaxEXT %v4half +// CHECK-COUNT-1: OpAtomicExchange %v4half diff --git a/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang new file mode 100644 index 00000000000..487ed0d2dd8 --- /dev/null +++ b/tests/spirv/gl-nv-shader-atomic-fp16-vector-compatibility.slang @@ -0,0 +1,44 @@ +//TEST:SIMPLE(filecheck=NEGATIVE): -target spirv -entry computeMain -stage compute -emit-spirv-directly +//TEST:SIMPLE(filecheck=POSITIVE): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_ATOMIC +//TEST:SIMPLE(filecheck=POSITIVE_MIN_MAX): -target spirv -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_MIN_MAX +//DIAGNOSTIC_TEST:SIMPLE(diag=POSITIVE_WARN,non-exhaustive): -target spirv -profile spirv_1_5 -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_ATOMIC +//DIAGNOSTIC_TEST:SIMPLE(diag=POSITIVE_ERR,non-exhaustive): -target spirv -profile spirv_1_5 -restrictive-capability-check -entry computeMain -stage compute -emit-spirv-directly -DPOSITIVE_VECTOR_ATOMIC +//TEST:SIMPLE(filecheck=POSITIVE_IGNORE_CAPS): -target spirv -profile spirv_1_5 -entry computeMain -stage compute -emit-spirv-directly -ignore-capabilities -DPOSITIVE_VECTOR_ATOMIC + +RWStructuredBuffer vectorBuffer; + +[require(GL_NV_shader_atomic_fp16_vector)] +[numthreads(1, 1, 1)] +void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) +{ +#ifdef POSITIVE_VECTOR_ATOMIC + __atomic_add(vectorBuffer[0], half2(1.0h, 2.0h)); +// POSITIVE_WARN: profile implicitly upgraded +// POSITIVE_WARN: spvAtomicFloat16VectorNV +// POSITIVE_ERR: error[E41013]: entry point uses capabilities not in specified profile +// POSITIVE_ERR: spvAtomicFloat16VectorNV +#elif defined(POSITIVE_VECTOR_MIN_MAX) + __atomic_min(vectorBuffer[0], half2(1.0h, 2.0h)); + __atomic_max(vectorBuffer[1], half2(3.0h, 4.0h)); +#else + vectorBuffer[0] = half2(1.0h, 2.0h); +#endif +} + +// NEGATIVE-NOT: OpCapability AtomicFloat16VectorNV +// NEGATIVE-NOT: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// NEGATIVE: OpMemoryModel + +// POSITIVE: OpCapability AtomicFloat16VectorNV +// POSITIVE: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// POSITIVE: OpAtomicFAddEXT %v2half + +// POSITIVE_IGNORE_CAPS-NOT: warning[E41012] +// POSITIVE_IGNORE_CAPS: OpCapability AtomicFloat16VectorNV +// POSITIVE_IGNORE_CAPS: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// POSITIVE_IGNORE_CAPS: OpAtomicFAddEXT %v2half + +// POSITIVE_MIN_MAX: OpCapability AtomicFloat16VectorNV +// POSITIVE_MIN_MAX: OpExtension "SPV_NV_shader_atomic_fp16_vector" +// POSITIVE_MIN_MAX: OpAtomicFMinEXT %v2half +// POSITIVE_MIN_MAX: OpAtomicFMaxEXT %v2half