Skip to content

Intel CPU supports SM3 SM4

Sun Yimin edited this page Jun 3, 2026 · 10 revisions

即将推出的Arrow Lake和Lunar Lake处理器系列,将支持SHA512、SM3/SM4以及AVX-VNNI等指令集。

image

SM3

参考模拟实现. 比ARM64-CE的SM3指令简单。
实现

SM4

参考模拟实现. 和ARM64-CE的SM4指令类似。
实现

指令

// --- VEX and ModRM Helper Macros ---
// R' bit (Bit 7 of VEX.Byte2): Extends ModRM.reg. Inverted: 1 means no extension (0-7), 0 means extension (+8).
#define VEX_Rp(x)  (1 - ((x) >> 3))
// B' bit (Bit 5 of VEX.Byte2): Extends ModRM.rm or SIB.base. Inverted logic same as R'.
#define VEX_Bp(x)  (1 - ((x) >> 3))
// vvvv field (Bits 6:3 of VEX.Byte3): Encodes the first source operand (Xs1). Fully inverted (4 bits).
#define VEX_VVVV(x) (15 - (x))
// ModRM.reg field (Bits 5:3 of ModRM): Encodes the destination operand (Xd).
#define MODRM_REG3(x) (((x) & 7) << 3)
// ModRM.rm field (Bits 2:0 of ModRM): Encodes the second source operand (Xs2) or base register.
#define MODRM_RM3(x)  ((x) & 7)

// --- Instruction Macros (Intel Syntax: Xd, Xs1, Xs2) ---
// VSM3MSG1 xmm1, xmm2, xmm3
// Intel Syntax: Xd (dst), Xs1 (src1), Xs2 (src2)
// Opcode: VEX.NDS.128.0F38.W0 DA /r (Note: NO 66h prefix, pp=00)
// Mapping: STANDARD - Xd -> reg, Xs1 -> vvvv, Xs2 -> rm
#define VSM3MSG1(Xd, Xs1, Xs2) \
	BYTE $0xC4; \
	/* VEX.Byte2: [R'(Xd) X'=0 B'(Xs2) m=00010(0F38)] -> Base 0x42 */ \
	BYTE $((0x42) | (VEX_Rp(Xd) << 7) | (VEX_Bp(Xs2) << 5)); \
	/* VEX.Byte3: [W=0 vvvv(Xs1) L=0 pp=00(none)] -> Base 0x00 */ \
	BYTE $((0x00) | (VEX_VVVV(Xs1) << 3)); \
	BYTE $0xDA; \
	/* ModRM: [mod=11 reg(Xd) rm(Xs2)] -> Base 0xC0 */ \
	BYTE $((0xC0) | MODRM_RM3(Xs2) | MODRM_REG3(Xd))

// VSM3MSG2 xmm1, xmm2, xmm3
// Intel Syntax: Xd (dst), Xs1 (src1), Xs2 (src2)
// Opcode: VEX.NDS.128.66.0F38.W0 DA /r (Has 66h prefix, pp=01)
// Mapping: STANDARD - Xd -> reg, Xs1 -> vvvv, Xs2 -> rm
#define VSM3MSG2(Xd, Xs1, Xs2) \
	BYTE $0xC4; \
	/* VEX.Byte2: [R'(Xd) X'=0 B'(Xs2) m=00010(0F38)] -> Base 0x42 */ \
	BYTE $((0x42) | (VEX_Rp(Xd) << 7) | (VEX_Bp(Xs2) << 5)); \
	/* VEX.Byte3: [W=0 vvvv(Xs1) L=0 pp=01(66h)] -> Base 0x01 */ \
	BYTE $((0x01) | (VEX_VVVV(Xs1) << 3)); \
	BYTE $0xDA; \
	/* ModRM: [mod=11 reg(Xd) rm(Xs2)] -> Base 0xC0 */ \
	BYTE $((0xC0) | MODRM_RM3(Xs2) | MODRM_REG3(Xd))

// VSM3RNDS2 xmm1, xmm2, xmm3, imm8
// Intel Syntax: Xd (dst), Xs1 (src1), Xs2 (src2), IMM8
// Opcode: VEX.NDS.128.66.0F3A.W0 DE /r ib (0F3A map for imm8, pp=01 for 66h)
// Mapping: STANDARD - Xd -> reg, Xs1 -> vvvv, Xs2 -> rm
#define VSM3RNDS2(Xd, Xs1, Xs2, IMM8) \
	BYTE $0xC4; \
	/* VEX.Byte2: [R'(Xd) X'=0 B'(Xs2) m=00011(0F3A)] -> Base 0x43 */ \
	BYTE $((0x43) | (VEX_Rp(Xd) << 7) | (VEX_Bp(Xs2) << 5)); \
	/* VEX.Byte3: [W=0 vvvv(Xs1) L=0 pp=01(66h)] -> Base 0x01 */ \
	BYTE $((0x01) | (VEX_VVVV(Xs1) << 3)); \
	BYTE $0xDE; \
	/* ModRM: [mod=11 reg(Xd) rm(Xs2)] -> Base 0xC0 */ \
	BYTE $((0xC0) | MODRM_REG3(Xd) | MODRM_RM3(Xs2)); \
	/* Immediate byte */ \
	BYTE $((IMM8) & 0xFF)

// VSM4KEY4 xmm1, xmm2, xmm3
// Opcode Map: VEX.NDS.LIG.66.0F38.W0 DA /r
// Mapping: Xd -> reg, Xs1 -> vvvv, Xs2 -> rm
#define VSM4KEY4(Xd, Xs1, Xs2) \
	BYTE $0xC4; \
	/* VEX.Byte2: [R'(Xd) X'=1 B'(Xs2) m=00010(0F38)] -> Base 0x62 */ \
	BYTE $((0x62) | (VEX_Bp(Xs2) << 5) | (VEX_Rp(Xd) << 7)); \
	/* VEX.Byte3: [W=0 vvvv(Xs1) L=1 pp=10(66h)] -> Base 0x02 */ \
	BYTE $((0x02) | (VEX_VVVV(Xs1) << 3)); \
	BYTE $0xDA; \
	/* ModRM: [mod=11 reg(Xd) rm(Xs2)] -> Base 0xC0 */ \
	BYTE $((0xC0) | MODRM_RM3(Xs2) | MODRM_REG3(Xd))

// VSM4RNDS4 xmm1, xmm2, xmm3
// Opcode Map: VEX.NDS.LIG.F2.0F38.W0 DA /r
// Mapping: Xd -> reg, Xs1 -> vvvv, Xs2 -> rm
#define VSM4RNDS4(Xd, Xs1, Xs2) \
	BYTE $0xC4; \
	/* VEX.Byte2: [R'(Xd) X'=1 B'(Xs2) m=00010(0F38)] -> Base 0x62 */ \
	BYTE $((0x62) | (VEX_Bp(Xs2) << 5) | (VEX_Rp(Xd) << 7)); \
	/* VEX.Byte3: [W=0 vvvv(Xs1) L=1 pp=11(F2h)] -> Base 0x03 */ \
	BYTE $((0x03) | (VEX_VVVV(Xs1) << 3)); \
	BYTE $0xDA; \
	/* ModRM: [mod=11 reg(Xd) rm(Xs2)] -> Base 0xC0 */ \
	BYTE $((0xC0) | MODRM_RM3(Xs2) | MODRM_REG3(Xd))

Clone this wiki locally