From 126ee79cc7c021648992edf8e4f494ec7ee5e78a Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Tue, 6 May 2025 15:29:14 -0600 Subject: [PATCH] [SM6.9] Restore testing for native vector intrinsics Several intrinsics that were enabled for native vectors late got their testing removed as it expected scalarized forms. This adds tests for pow, modf, and abs in their native vector intrinsic forms. It removes native vector intrinsics for derivative operations as they require more scalarization removal due to their convergent markers and restores the scalarized testing for them. The 1024 size was removed from longvec-intrinsics as the verbose way that constant vectors are represented in the disassembly made the test take significantly longer. --- lib/DXIL/DxilOperations.cpp | 16 +++--- .../hlsl/types/longvec-intrinsics.hlsl | 50 ++++++++++++++++++- ...longvec-trivial-scalarized-intrinsics.hlsl | 7 +++ ...ongvec-trivial-unary-float-intrinsics.hlsl | 2 + utils/hct/hctdb.py | 8 +-- 5 files changed, 70 insertions(+), 13 deletions(-) diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 95e8dfaeba..a66dfc68d4 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -765,32 +765,32 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { "unary", Attribute::ReadNone, 1, - {{0x403}}, - {{0x3}}}, // Overloads: hf [[tmp]], @dx.op.unary.[[HTY]](i32 23, <[[NUM]] x half> [[hvec2]]) ; Log(value) + // CHECK: [[tmp2:%.*]] = fmul fast <[[NUM]] x half> [[tmp]], [[hvec1]] + // CHECK: call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 21, <[[NUM]] x half> [[tmp2]]) ; Exp(value) + hRes += pow(hVec2, hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 23, <[[NUM]] x float> [[fvec2]]) ; Log(value) + // CHECK: [[tmp2:%.*]] = fmul fast <[[NUM]] x float> [[tmp]], [[fvec1]] + // CHECK: call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 21, <[[NUM]] x float> [[tmp2]]) ; Exp(value) + fRes += pow(fVec2, fVec1); + + vector hVal; + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 29, <[[NUM]] x half> [[hvec1]]) ; Round_z(value) + // CHECK: fsub fast <[[NUM]] x half> [[hvec1]], [[tmp]] + hRes *= modf(hVec1, hVal); + hRes += hVal; + + vector fVal; + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 29, <[[NUM]] x float> [[fvec1]]) ; Round_z(value) + // CHECK: fsub fast <[[NUM]] x float> [[fvec1]], [[tmp]] + fRes *= modf(fVec1, fVal); + fRes += fVal; + // CHECK-NOT: extractelement // CHECK-NOT: insertelement // CHECK: [[sub:%.*]] = fsub fast <[[NUM]] x half> [[hvec2]], [[hvec1]] @@ -227,6 +256,25 @@ void main() { // CHECK: fmul fast <[[NUM]] x float> [[mul]], [[sub]] fRes += smoothstep(fVec1, fVec2, fVec3); + // Note that Fabs is tested in longvec-trivial-unary-float-intrinsics. + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = sub <[[NUM]] x i16> zeroinitializer, [[svec1]] + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 37, <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[tmp]]) ; IMax(a,b) + sRes += abs(sVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = sub <[[NUM]] x i32> zeroinitializer, [[ivec1]] + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 37, <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[tmp]]) ; IMax(a,b) + iRes += abs(iVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = sub <[[NUM]] x i64> zeroinitializer, [[lvec1]] + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 37, <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[tmp]]) ; IMax(a,b) + lRes += abs(lVec1); + // Intrinsics that expand into llvm ops. // CHECK-NOT: extractelement diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl index 6ebb511b00..37fb1d2e15 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl @@ -9,6 +9,13 @@ // RUN: %dxc -DFUNC=countbits -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY // RUN: %dxc -DFUNC=firstbithigh -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY // RUN: %dxc -DFUNC=firstbitlow -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddx -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddx_coarse -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddx_fine -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddy -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddy_coarse -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddy_fine -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=fwidth -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY // RUN: %dxc -DFUNC=QuadReadLaneAt -DARITY=4 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD // RUN: %dxc -DFUNC=QuadReadAcrossX -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD // RUN: %dxc -DFUNC=QuadReadAcrossY -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl index 91ab631a7e..9cc3d23b66 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl @@ -1,3 +1,5 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=abs -DOP=6 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=abs -DOP=6 -DNUM=1022 %s | FileCheck %s // RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate -DOP=7 -DNUM=7 %s | FileCheck %s // RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate -DOP=7 -DNUM=1022 %s | FileCheck %s // RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cos -DOP=12 -DNUM=7 %s | FileCheck %s diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 57f2574005..7954faf2af 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -2629,7 +2629,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per stamp", - "hf<", + "hf", "rn", [ db_dxil_param( @@ -2647,7 +2647,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per stamp", - "hf<", + "hf", "rn", [ db_dxil_param( @@ -2665,7 +2665,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per pixel", - "hf<", + "hf", "rn", [ db_dxil_param( @@ -2683,7 +2683,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per pixel", - "hf<", + "hf", "rn", [ db_dxil_param(