Skip to content

Commit

Permalink
Regenerate c2 asm kernels with msan support
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 728351275
  • Loading branch information
dsharletg authored and xnnpack-bot committed Feb 19, 2025
1 parent 0df264c commit 7804d7d
Show file tree
Hide file tree
Showing 16 changed files with 218 additions and 122 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_10x16c2__asm_amd64_avx512f_broadcast

.intel_syntax noprefix

# Free up GP registers.
# Save register arguments for tail call to msan annotation helper.
push rdi
push rsi
push rbx
push rbp
push r15
Expand All @@ -35,14 +37,14 @@ BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_10x16c2__asm_amd64_avx512f_broadcast
push r12

# load params to free up a GP registers
mov r13, [rsp + 80] # params
mov r13, [rsp + 96] # params
vbroadcastss zmm0, DWORD PTR [r13]
vbroadcastss zmm1, DWORD PTR [r13 + 4]

# Load c pointer.
mov r10, [rsp + 56]
mov r10, [rsp + 72]
# Load cm_stride.
mov r11, [rsp + 64]
mov r11, [rsp + 80]

# Align the stack pointer.
mov r13, rsp
Expand Down Expand Up @@ -454,17 +456,21 @@ return:
pop r15
pop rbp
pop rbx
pop rsi
pop rdi
#if XNN_HAS_FEATURE(memory_sanitizer)
jmp xnn_gemm_ukernel_msan_sizeof_c_4
#else
ret
#endif
END_FUNCTION xnn_f32_gemm_minmax_ukernel_10x16c2__asm_amd64_avx512f_broadcast

#ifdef __has_feature
#if __has_feature(dataflow_sanitizer)
#if XNN_HAS_FEATURE(dataflow_sanitizer)
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_10x16c2__asm_amd64_avx512f_broadcast.dfsan
.intel_syntax noprefix
# We could implement this by calling a function that implements the dfsan instrumentation.
# For now, just break, so if someone tries to use this, they'll know where the problem is.
int 3
ret
END_FUNCTION xnn_f32_gemm_minmax_ukernel_10x16c2__asm_amd64_avx512f_broadcast.dfsan
#endif
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_11x16c2__asm_amd64_avx512f_broadcast

.intel_syntax noprefix

# Free up GP registers.
# Save register arguments for tail call to msan annotation helper.
push rdi
push rsi
push rbx
push rbp
push r15
Expand All @@ -35,14 +37,14 @@ BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_11x16c2__asm_amd64_avx512f_broadcast
push r12

# load params to free up a GP registers
mov r13, [rsp + 80] # params
mov r13, [rsp + 96] # params
vbroadcastss zmm0, DWORD PTR [r13]
vbroadcastss zmm1, DWORD PTR [r13 + 4]

# Load c pointer.
mov r10, [rsp + 56]
mov r10, [rsp + 72]
# Load cm_stride.
mov r11, [rsp + 64]
mov r11, [rsp + 80]

# Align the stack pointer.
mov r13, rsp
Expand Down Expand Up @@ -487,17 +489,21 @@ return:
pop r15
pop rbp
pop rbx
pop rsi
pop rdi
#if XNN_HAS_FEATURE(memory_sanitizer)
jmp xnn_gemm_ukernel_msan_sizeof_c_4
#else
ret
#endif
END_FUNCTION xnn_f32_gemm_minmax_ukernel_11x16c2__asm_amd64_avx512f_broadcast

#ifdef __has_feature
#if __has_feature(dataflow_sanitizer)
#if XNN_HAS_FEATURE(dataflow_sanitizer)
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_11x16c2__asm_amd64_avx512f_broadcast.dfsan
.intel_syntax noprefix
# We could implement this by calling a function that implements the dfsan instrumentation.
# For now, just break, so if someone tries to use this, they'll know where the problem is.
int 3
ret
END_FUNCTION xnn_f32_gemm_minmax_ukernel_11x16c2__asm_amd64_avx512f_broadcast.dfsan
#endif
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_1x16c2__asm_amd64_avx512f_broadcast

.intel_syntax noprefix

# Free up GP registers.
# Save register arguments for tail call to msan annotation helper.
push rdi
push rsi
push rbx
push rbp
push r15
Expand All @@ -35,14 +37,14 @@ BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_1x16c2__asm_amd64_avx512f_broadcast
push r12

# load params to free up a GP registers
mov r13, [rsp + 80] # params
mov r13, [rsp + 96] # params
vbroadcastss zmm0, DWORD PTR [r13]
vbroadcastss zmm1, DWORD PTR [r13 + 4]

# Load c pointer.
mov r10, [rsp + 56]
mov r10, [rsp + 72]
# Load cm_stride.
mov r11, [rsp + 64]
mov r11, [rsp + 80]

# Align the stack pointer.
mov r13, rsp
Expand All @@ -52,7 +54,7 @@ BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_1x16c2__asm_amd64_avx512f_broadcast
mov [rsp], r13

# Allocate some space on the stack.
sub rsp, 64
sub rsp, 128

# Copy k and flip bit.
mov r11, rdx
Expand Down Expand Up @@ -134,7 +136,7 @@ tail:
vmovups ZMMWORD PTR [r10]{k1}, zmm11

return:
add rsp, 64
add rsp, 128
mov r13, [rsp]
mov rsp, r13
# Restore the callee saved registers.
Expand All @@ -144,17 +146,21 @@ return:
pop r15
pop rbp
pop rbx
pop rsi
pop rdi
#if XNN_HAS_FEATURE(memory_sanitizer)
jmp xnn_gemm_ukernel_msan_sizeof_c_4
#else
ret
#endif
END_FUNCTION xnn_f32_gemm_minmax_ukernel_1x16c2__asm_amd64_avx512f_broadcast

#ifdef __has_feature
#if __has_feature(dataflow_sanitizer)
#if XNN_HAS_FEATURE(dataflow_sanitizer)
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_1x16c2__asm_amd64_avx512f_broadcast.dfsan
.intel_syntax noprefix
# We could implement this by calling a function that implements the dfsan instrumentation.
# For now, just break, so if someone tries to use this, they'll know where the problem is.
int 3
ret
END_FUNCTION xnn_f32_gemm_minmax_ukernel_1x16c2__asm_amd64_avx512f_broadcast.dfsan
#endif
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_1x32c2__asm_amd64_avx512f_broadcast

.intel_syntax noprefix

# Free up GP registers.
# Save register arguments for tail call to msan annotation helper.
push rdi
push rsi
push rbx
push rbp
push r15
Expand All @@ -35,14 +37,14 @@ BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_1x32c2__asm_amd64_avx512f_broadcast
push r12

# load params to free up a GP registers
mov r13, [rsp + 80] # params
mov r13, [rsp + 96] # params
vbroadcastss zmm0, DWORD PTR [r13]
vbroadcastss zmm1, DWORD PTR [r13 + 4]

# Load c pointer.
mov r10, [rsp + 56]
mov r10, [rsp + 72]
# Load cm_stride.
mov r11, [rsp + 64]
mov r11, [rsp + 80]

# Align the stack pointer.
mov r13, rsp
Expand All @@ -52,7 +54,7 @@ BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_1x32c2__asm_amd64_avx512f_broadcast
mov [rsp], r13

# Allocate some space on the stack.
sub rsp, 64
sub rsp, 128

# Copy k and flip bit.
mov r11, rdx
Expand Down Expand Up @@ -157,7 +159,7 @@ tail:
vmovups ZMMWORD PTR [r10 + 64]{k2}, zmm12

return:
add rsp, 64
add rsp, 128
mov r13, [rsp]
mov rsp, r13
# Restore the callee saved registers.
Expand All @@ -167,17 +169,21 @@ return:
pop r15
pop rbp
pop rbx
pop rsi
pop rdi
#if XNN_HAS_FEATURE(memory_sanitizer)
jmp xnn_gemm_ukernel_msan_sizeof_c_4
#else
ret
#endif
END_FUNCTION xnn_f32_gemm_minmax_ukernel_1x32c2__asm_amd64_avx512f_broadcast

#ifdef __has_feature
#if __has_feature(dataflow_sanitizer)
#if XNN_HAS_FEATURE(dataflow_sanitizer)
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_1x32c2__asm_amd64_avx512f_broadcast.dfsan
.intel_syntax noprefix
# We could implement this by calling a function that implements the dfsan instrumentation.
# For now, just break, so if someone tries to use this, they'll know where the problem is.
int 3
ret
END_FUNCTION xnn_f32_gemm_minmax_ukernel_1x32c2__asm_amd64_avx512f_broadcast.dfsan
#endif
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_2x16c2__asm_amd64_avx512f_broadcast

.intel_syntax noprefix

# Free up GP registers.
# Save register arguments for tail call to msan annotation helper.
push rdi
push rsi
push rbx
push rbp
push r15
Expand All @@ -35,14 +37,14 @@ BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_2x16c2__asm_amd64_avx512f_broadcast
push r12

# load params to free up a GP registers
mov r13, [rsp + 80] # params
mov r13, [rsp + 96] # params
vbroadcastss zmm0, DWORD PTR [r13]
vbroadcastss zmm1, DWORD PTR [r13 + 4]

# Load c pointer.
mov r10, [rsp + 56]
mov r10, [rsp + 72]
# Load cm_stride.
mov r11, [rsp + 64]
mov r11, [rsp + 80]

# Align the stack pointer.
mov r13, rsp
Expand Down Expand Up @@ -171,17 +173,21 @@ return:
pop r15
pop rbp
pop rbx
pop rsi
pop rdi
#if XNN_HAS_FEATURE(memory_sanitizer)
jmp xnn_gemm_ukernel_msan_sizeof_c_4
#else
ret
#endif
END_FUNCTION xnn_f32_gemm_minmax_ukernel_2x16c2__asm_amd64_avx512f_broadcast

#ifdef __has_feature
#if __has_feature(dataflow_sanitizer)
#if XNN_HAS_FEATURE(dataflow_sanitizer)
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_2x16c2__asm_amd64_avx512f_broadcast.dfsan
.intel_syntax noprefix
# We could implement this by calling a function that implements the dfsan instrumentation.
# For now, just break, so if someone tries to use this, they'll know where the problem is.
int 3
ret
END_FUNCTION xnn_f32_gemm_minmax_ukernel_2x16c2__asm_amd64_avx512f_broadcast.dfsan
#endif
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_2x32c2__asm_amd64_avx512f_broadcast

.intel_syntax noprefix

# Free up GP registers.
# Save register arguments for tail call to msan annotation helper.
push rdi
push rsi
push rbx
push rbp
push r15
Expand All @@ -35,14 +37,14 @@ BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_2x32c2__asm_amd64_avx512f_broadcast
push r12

# load params to free up a GP registers
mov r13, [rsp + 80] # params
mov r13, [rsp + 96] # params
vbroadcastss zmm0, DWORD PTR [r13]
vbroadcastss zmm1, DWORD PTR [r13 + 4]

# Load c pointer.
mov r10, [rsp + 56]
mov r10, [rsp + 72]
# Load cm_stride.
mov r11, [rsp + 64]
mov r11, [rsp + 80]

# Align the stack pointer.
mov r13, rsp
Expand Down Expand Up @@ -209,17 +211,21 @@ return:
pop r15
pop rbp
pop rbx
pop rsi
pop rdi
#if XNN_HAS_FEATURE(memory_sanitizer)
jmp xnn_gemm_ukernel_msan_sizeof_c_4
#else
ret
#endif
END_FUNCTION xnn_f32_gemm_minmax_ukernel_2x32c2__asm_amd64_avx512f_broadcast

#ifdef __has_feature
#if __has_feature(dataflow_sanitizer)
#if XNN_HAS_FEATURE(dataflow_sanitizer)
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_2x32c2__asm_amd64_avx512f_broadcast.dfsan
.intel_syntax noprefix
# We could implement this by calling a function that implements the dfsan instrumentation.
# For now, just break, so if someone tries to use this, they'll know where the problem is.
int 3
ret
END_FUNCTION xnn_f32_gemm_minmax_ukernel_2x32c2__asm_amd64_avx512f_broadcast.dfsan
#endif
#endif
Loading

0 comments on commit 7804d7d

Please sign in to comment.