Skip to content

Commit 05d11d8

Browse files
author
chenqian
committed
[RISCV][ESPV] Add support for shift/src/qacc intrinsics (src.q, srcmb, srcq, srcxxp, slcxxp, vsmulas
1 parent b2a1138 commit 05d11d8

11 files changed

Lines changed: 1701 additions & 696 deletions

llvm/include/llvm/IR/IntrinsicsRISCVESPVM.td

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1549,6 +1549,22 @@ def int_riscv_esp_srcq_128_st_incp_m : Intrinsic<[llvm_ptr_ty],
15491549
[llvm_i32_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_ptr_ty],
15501550
[IntrWriteMem, IntrArgMemOnly]>;
15511551

1552+
// SRCXXP.2Q - Shift Right Concatenated with Register offset and Post-increment, 2 Quad registers
1553+
// Performs logical right shift on concatenated qw and qy, updates both registers and pointer
1554+
// Returns: { <16 x i8> new_qy, <16 x i8> new_qw, ptr new_ptr }
1555+
// Explicit state passing: All register updates are visible in IR through return values
1556+
// This allows optimization without IntrHasSideEffects while preventing dead code elimination
1557+
def int_riscv_esp_srcxxp_2q_m : Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_ptr_ty],
1558+
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty],
1559+
[IntrNoMem]>;
1560+
1561+
// SLCXXP.2Q - Shift Left with Register offset and Post-increment, 2 Quad registers
1562+
// Shifts concatenated qy and qw left, updates both registers and pointer
1563+
// Returns modified values and updated pointer, input and output registers are the same due to constraints
1564+
def int_riscv_esp_slcxxp_2q_m : Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_ptr_ty],
1565+
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty],
1566+
[IntrNoMem]>;
1567+
15521568
// SRCMB.S16.QACC - Shift amount from rs1 register (rs1[5:0])
15531569
// Returns v8i16 (saturated 16-bit signed values)
15541570
// QACC is passed as explicit phantom operand (4x128-bit) for proper data flow tracking
@@ -1563,6 +1579,20 @@ def int_riscv_esp_srcmb_s8_qacc_m : Intrinsic<[llvm_v16i8_ty],
15631579
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty],
15641580
[IntrNoMem]>;
15651581

1582+
// SRCMB.S16.Q.QACC - Shift amounts from qw vector (8x16-bit values)
1583+
// Returns v8i16 (saturated 16-bit signed values)
1584+
// QACC is passed as explicit phantom operand (4x128-bit) for proper data flow tracking
1585+
def int_riscv_esp_srcmb_s16_q_qacc_m : Intrinsic<[llvm_v8i16_ty],
1586+
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v8i16_ty, llvm_i32_ty],
1587+
[IntrNoMem]>;
1588+
1589+
// SRCMB.S8.Q.QACC - Shift amounts from qw vector (16x8-bit values)
1590+
// Returns v16i8 (saturated 8-bit signed values)
1591+
// QACC is passed as explicit phantom operand (4x128-bit) for proper data flow tracking
1592+
def int_riscv_esp_srcmb_s8_q_qacc_m : Intrinsic<[llvm_v16i8_ty],
1593+
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
1594+
[IntrNoMem]>;
1595+
15661596
// SRCMB.U16.QACC - Shift amount from rs1 register (rs1[5:0])
15671597
// Returns v8i16 (saturated 16-bit unsigned values)
15681598
// QACC is passed as explicit phantom operand (4x128-bit) for proper data flow tracking
@@ -1641,6 +1671,30 @@ def int_riscv_esp_vmulas_s8_qacc_m :
16411671
llvm_v16i8_ty], // qy
16421672
[IntrNoMem]>;
16431673

1674+
// ESP.VSMULAS.S16.QACC (_m version) - Multiply-accumulate to QACC_H and QACC_L
1675+
// Returns: {v16i8, v16i8, v16i8, v16i8} - 4x128-bit QACC directly
1676+
// Parameters: v16i8, v16i8, v16i8, v16i8 (4x128-bit QACC passthru), v8i16, v8i16 (qx, qy), i32 (sel16)
1677+
// Note: No ClangBuiltin - builtin signature doesn't match intrinsic signature (builtin uses output pointers)
1678+
def int_riscv_esp_vsmulas_s16_qacc_m :
1679+
Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], // Returns: 4x128-bit QACC
1680+
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, // Current QACC (4x128-bit passthru)
1681+
llvm_v8i16_ty, // qx
1682+
llvm_v8i16_ty, // qy
1683+
llvm_i32_ty], // sel16
1684+
[IntrNoMem]>; // No IntrHasSideEffects
1685+
1686+
// ESP.VSMULAS.S8.QACC (_m version) - Multiply-accumulate to QACC_H and QACC_L
1687+
// Returns: {v16i8, v16i8, v16i8, v16i8} - 4x128-bit QACC directly
1688+
// Parameters: v16i8, v16i8, v16i8, v16i8 (4x128-bit QACC passthru), v16i8, v16i8 (qx, qy), i32 (sel16)
1689+
// Note: No ClangBuiltin - builtin signature doesn't match intrinsic signature (builtin uses output pointers)
1690+
def int_riscv_esp_vsmulas_s8_qacc_m :
1691+
Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], // Returns: 4x128-bit QACC
1692+
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, // Current QACC (4x128-bit passthru)
1693+
llvm_v16i8_ty, // qx
1694+
llvm_v16i8_ty, // qy
1695+
llvm_i32_ty], // sel16
1696+
[IntrNoMem]>; // No IntrHasSideEffects
1697+
16441698
// ESP.VMULAS.S8.QACC.LD.IP (_m version)
16451699
def int_riscv_esp_vmulas_s8_qacc_ld_ip_m :
16461700
Intrinsic<[llvm_ptr_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], // Returns: ptr + qu + 4x128-bit QACC

0 commit comments

Comments
 (0)