From 126ee79cc7c021648992edf8e4f494ec7ee5e78a Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Tue, 6 May 2025 15:29:14 -0600
Subject: [PATCH] [SM6.9] Restore testing for native vector intrinsics

Several intrinsics that were enabled for native vectors late got their
testing removed as it expected scalarized forms. This adds tests for
pow, modf, and abs in their native vector intrinsic forms. It removes
native vector intrinsics for derivative operations as they require more
scalarization removal due to their convergent markers and restores the
scalarized testing for them. The 1024 size was removed from
longvec-intrinsics as the verbose way that constant vectors are
represented in the disassembly made the test take significantly longer.
---
 lib/DXIL/DxilOperations.cpp                   | 16 +++---
 .../hlsl/types/longvec-intrinsics.hlsl        | 50 ++++++++++++++++++-
 ...longvec-trivial-scalarized-intrinsics.hlsl |  7 +++
 ...ongvec-trivial-unary-float-intrinsics.hlsl |  2 +
 utils/hct/hctdb.py                            |  8 +--
 5 files changed, 70 insertions(+), 13 deletions(-)
diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp
index 95e8dfaeba..a66dfc68d4 100644
--- a/lib/DXIL/DxilOperations.cpp
+++ b/lib/DXIL/DxilOperations.cpp
@@ -765,32 +765,32 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x403}},
-     {{0x3}}}, // Overloads: hf<hf
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
     {OC::DerivCoarseY,
      "DerivCoarseY",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x403}},
-     {{0x3}}}, // Overloads: hf<hf
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
     {OC::DerivFineX,
      "DerivFineX",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x403}},
-     {{0x3}}}, // Overloads: hf<hf
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
     {OC::DerivFineY,
      "DerivFineY",
      OCC::Unary,
      "unary",
      Attribute::ReadNone,
      1,
-     {{0x403}},
-     {{0x3}}}, // Overloads: hf<hf
+     {{0x3}},
+     {{0x0}}}, // Overloads: hf
 
     // Pixel shader
     {OC::EvalSnapped,
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl
index 0b7f0d6b2f..f13772970b 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl
@@ -2,7 +2,6 @@
 // RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=7   %s | FileCheck %s
 // RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=125 %s | FileCheck %s
 // RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=256 %s | FileCheck %s
-// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=1024 %s | FileCheck %s
 
 // Test vector-enabled non-trivial intrinsics that take parameters of various types.
 
@@ -203,6 +202,36 @@ void main() {
   // CHECK: fmul fast <[[NUM]] x float> [[tmp]], <float 0x3FE62E4300000000
   fRes += log(fVec1);
 
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 23, <[[NUM]] x half> [[hvec2]])  ; Log(value)
+  // CHECK: [[tmp2:%.*]] = fmul fast <[[NUM]] x half> [[tmp]], [[hvec1]]
+  // CHECK: call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 21, <[[NUM]] x half> [[tmp2]])  ; Exp(value)
+  hRes += pow(hVec2, hVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 23, <[[NUM]] x float> [[fvec2]])  ; Log(value)
+  // CHECK: [[tmp2:%.*]] = fmul fast <[[NUM]] x float> [[tmp]], [[fvec1]]
+  // CHECK: call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 21, <[[NUM]] x float> [[tmp2]])  ; Exp(value)
+  fRes += pow(fVec2, fVec1);
+
+  vector<half, NUM> hVal;
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 29, <[[NUM]] x half> [[hvec1]])  ; Round_z(value)
+  // CHECK: fsub fast <[[NUM]] x half> [[hvec1]], [[tmp]]
+  hRes *= modf(hVec1, hVal);
+  hRes += hVal;
+
+  vector<float, NUM> fVal;
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 29, <[[NUM]] x float> [[fvec1]])  ; Round_z(value)
+  // CHECK: fsub fast <[[NUM]] x float> [[fvec1]], [[tmp]]
+  fRes *= modf(fVec1, fVal);
+  fRes += fVal;
+
   // CHECK-NOT: extractelement
   // CHECK-NOT: insertelement
   // CHECK: [[sub:%.*]] = fsub fast <[[NUM]] x half> [[hvec2]], [[hvec1]]
@@ -227,6 +256,25 @@ void main() {
   // CHECK: fmul fast <[[NUM]] x float> [[mul]], [[sub]]
   fRes += smoothstep(fVec1, fVec2, fVec3);
 
+  // Note that Fabs is tested in longvec-trivial-unary-float-intrinsics.
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = sub <[[NUM]] x i16> zeroinitializer, [[svec1]]
+  // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 37, <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[tmp]])  ; IMax(a,b)
+  sRes += abs(sVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = sub <[[NUM]] x i32> zeroinitializer, [[ivec1]]
+  // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 37, <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[tmp]])  ; IMax(a,b)
+  iRes += abs(iVec1);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[tmp:%.*]] = sub <[[NUM]] x i64> zeroinitializer, [[lvec1]]
+  // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 37, <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[tmp]])  ; IMax(a,b)
+  lRes += abs(lVec1);
+
   // Intrinsics that expand into llvm ops.
 
   // CHECK-NOT: extractelement
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl
index 6ebb511b00..37fb1d2e15 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl
@@ -9,6 +9,13 @@
 // RUN: %dxc -DFUNC=countbits   -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
 // RUN: %dxc -DFUNC=firstbithigh -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
 // RUN: %dxc -DFUNC=firstbitlow  -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
+// RUN: %dxc -DFUNC=ddx         -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
+// RUN: %dxc -DFUNC=ddx_coarse  -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
+// RUN: %dxc -DFUNC=ddx_fine    -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
+// RUN: %dxc -DFUNC=ddy         -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
+// RUN: %dxc -DFUNC=ddy_coarse  -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
+// RUN: %dxc -DFUNC=ddy_fine    -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
+// RUN: %dxc -DFUNC=fwidth      -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY
 // RUN: %dxc -DFUNC=QuadReadLaneAt         -DARITY=4 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD
 // RUN: %dxc -DFUNC=QuadReadAcrossX        -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD
 // RUN: %dxc -DFUNC=QuadReadAcrossY        -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl
index 91ab631a7e..9cc3d23b66 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl
@@ -1,3 +1,5 @@
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=abs  -DOP=6 -DNUM=7    %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=abs  -DOP=6 -DNUM=1022 %s | FileCheck %s
 // RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate  -DOP=7 -DNUM=7    %s | FileCheck %s
 // RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate  -DOP=7 -DNUM=1022 %s | FileCheck %s
 // RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cos  -DOP=12 -DNUM=7    %s | FileCheck %s
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
index 57f2574005..7954faf2af 100644
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -2629,7 +2629,7 @@ def UFI(name, **mappings):
             next_op_idx,
             "Unary",
             "computes the rate of change of components per stamp",
-            "hf<",
+            "hf",
             "rn",
             [
                 db_dxil_param(
@@ -2647,7 +2647,7 @@ def UFI(name, **mappings):
             next_op_idx,
             "Unary",
             "computes the rate of change of components per stamp",
-            "hf<",
+            "hf",
             "rn",
             [
                 db_dxil_param(
@@ -2665,7 +2665,7 @@ def UFI(name, **mappings):
             next_op_idx,
             "Unary",
             "computes the rate of change of components per pixel",
-            "hf<",
+            "hf",
             "rn",
             [
                 db_dxil_param(
@@ -2683,7 +2683,7 @@ def UFI(name, **mappings):
             next_op_idx,
             "Unary",
             "computes the rate of change of components per pixel",
-            "hf<",
+            "hf",
             "rn",
             [
                 db_dxil_param(