Skip to content

Commit 7804d7d

Browse files
dsharletgxnnpack-bot
authored andcommitted
Regenerate c2 asm kernels with msan support
PiperOrigin-RevId: 728351275
1 parent 0df264c commit 7804d7d

16 files changed

+218
-122
lines changed

src/f32-gemm/gen/f32-gemm-10x16c2-minmax-asm-amd64-avx512f-broadcast.S

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,10 @@
2525
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_10x16c2__asm_amd64_avx512f_broadcast
2626

2727
.intel_syntax noprefix
28-
2928
# Free up GP registers.
29+
# Save register arguments for tail call to msan annotation helper.
30+
push rdi
31+
push rsi
3032
push rbx
3133
push rbp
3234
push r15
@@ -35,14 +37,14 @@ BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_10x16c2__asm_amd64_avx512f_broadcast
3537
push r12
3638

3739
# load params to free up a GP registers
38-
mov r13, [rsp + 80] # params
40+
mov r13, [rsp + 96] # params
3941
vbroadcastss zmm0, DWORD PTR [r13]
4042
vbroadcastss zmm1, DWORD PTR [r13 + 4]
4143

4244
# Load c pointer.
43-
mov r10, [rsp + 56]
45+
mov r10, [rsp + 72]
4446
# Load cm_stride.
45-
mov r11, [rsp + 64]
47+
mov r11, [rsp + 80]
4648

4749
# Align the stack pointer.
4850
mov r13, rsp
@@ -454,17 +456,21 @@ return:
454456
pop r15
455457
pop rbp
456458
pop rbx
459+
pop rsi
460+
pop rdi
461+
#if XNN_HAS_FEATURE(memory_sanitizer)
462+
jmp xnn_gemm_ukernel_msan_sizeof_c_4
463+
#else
457464
ret
465+
#endif
458466
END_FUNCTION xnn_f32_gemm_minmax_ukernel_10x16c2__asm_amd64_avx512f_broadcast
459467

460-
#ifdef __has_feature
461-
#if __has_feature(dataflow_sanitizer)
468+
#if XNN_HAS_FEATURE(dataflow_sanitizer)
462469
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_10x16c2__asm_amd64_avx512f_broadcast.dfsan
463470
.intel_syntax noprefix
464471
# We could implement this by calling a function that implements the dfsan instrumentation.
465472
# For now, just break, so if someone tries to use this, they'll know where the problem is.
466473
int 3
467474
ret
468475
END_FUNCTION xnn_f32_gemm_minmax_ukernel_10x16c2__asm_amd64_avx512f_broadcast.dfsan
469-
#endif
470476
#endif

src/f32-gemm/gen/f32-gemm-11x16c2-minmax-asm-amd64-avx512f-broadcast.S

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,10 @@
2525
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_11x16c2__asm_amd64_avx512f_broadcast
2626

2727
.intel_syntax noprefix
28-
2928
# Free up GP registers.
29+
# Save register arguments for tail call to msan annotation helper.
30+
push rdi
31+
push rsi
3032
push rbx
3133
push rbp
3234
push r15
@@ -35,14 +37,14 @@ BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_11x16c2__asm_amd64_avx512f_broadcast
3537
push r12
3638

3739
# load params to free up a GP registers
38-
mov r13, [rsp + 80] # params
40+
mov r13, [rsp + 96] # params
3941
vbroadcastss zmm0, DWORD PTR [r13]
4042
vbroadcastss zmm1, DWORD PTR [r13 + 4]
4143

4244
# Load c pointer.
43-
mov r10, [rsp + 56]
45+
mov r10, [rsp + 72]
4446
# Load cm_stride.
45-
mov r11, [rsp + 64]
47+
mov r11, [rsp + 80]
4648

4749
# Align the stack pointer.
4850
mov r13, rsp
@@ -487,17 +489,21 @@ return:
487489
pop r15
488490
pop rbp
489491
pop rbx
492+
pop rsi
493+
pop rdi
494+
#if XNN_HAS_FEATURE(memory_sanitizer)
495+
jmp xnn_gemm_ukernel_msan_sizeof_c_4
496+
#else
490497
ret
498+
#endif
491499
END_FUNCTION xnn_f32_gemm_minmax_ukernel_11x16c2__asm_amd64_avx512f_broadcast
492500

493-
#ifdef __has_feature
494-
#if __has_feature(dataflow_sanitizer)
501+
#if XNN_HAS_FEATURE(dataflow_sanitizer)
495502
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_11x16c2__asm_amd64_avx512f_broadcast.dfsan
496503
.intel_syntax noprefix
497504
# We could implement this by calling a function that implements the dfsan instrumentation.
498505
# For now, just break, so if someone tries to use this, they'll know where the problem is.
499506
int 3
500507
ret
501508
END_FUNCTION xnn_f32_gemm_minmax_ukernel_11x16c2__asm_amd64_avx512f_broadcast.dfsan
502-
#endif
503509
#endif

src/f32-gemm/gen/f32-gemm-1x16c2-minmax-asm-amd64-avx512f-broadcast.S

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,10 @@
2525
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_1x16c2__asm_amd64_avx512f_broadcast
2626

2727
.intel_syntax noprefix
28-
2928
# Free up GP registers.
29+
# Save register arguments for tail call to msan annotation helper.
30+
push rdi
31+
push rsi
3032
push rbx
3133
push rbp
3234
push r15
@@ -35,14 +37,14 @@ BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_1x16c2__asm_amd64_avx512f_broadcast
3537
push r12
3638

3739
# load params to free up a GP registers
38-
mov r13, [rsp + 80] # params
40+
mov r13, [rsp + 96] # params
3941
vbroadcastss zmm0, DWORD PTR [r13]
4042
vbroadcastss zmm1, DWORD PTR [r13 + 4]
4143

4244
# Load c pointer.
43-
mov r10, [rsp + 56]
45+
mov r10, [rsp + 72]
4446
# Load cm_stride.
45-
mov r11, [rsp + 64]
47+
mov r11, [rsp + 80]
4648

4749
# Align the stack pointer.
4850
mov r13, rsp
@@ -52,7 +54,7 @@ BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_1x16c2__asm_amd64_avx512f_broadcast
5254
mov [rsp], r13
5355

5456
# Allocate some space on the stack.
55-
sub rsp, 64
57+
sub rsp, 128
5658

5759
# Copy k and flip bit.
5860
mov r11, rdx
@@ -134,7 +136,7 @@ tail:
134136
vmovups ZMMWORD PTR [r10]{k1}, zmm11
135137

136138
return:
137-
add rsp, 64
139+
add rsp, 128
138140
mov r13, [rsp]
139141
mov rsp, r13
140142
# Restore the callee saved registers.
@@ -144,17 +146,21 @@ return:
144146
pop r15
145147
pop rbp
146148
pop rbx
149+
pop rsi
150+
pop rdi
151+
#if XNN_HAS_FEATURE(memory_sanitizer)
152+
jmp xnn_gemm_ukernel_msan_sizeof_c_4
153+
#else
147154
ret
155+
#endif
148156
END_FUNCTION xnn_f32_gemm_minmax_ukernel_1x16c2__asm_amd64_avx512f_broadcast
149157

150-
#ifdef __has_feature
151-
#if __has_feature(dataflow_sanitizer)
158+
#if XNN_HAS_FEATURE(dataflow_sanitizer)
152159
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_1x16c2__asm_amd64_avx512f_broadcast.dfsan
153160
.intel_syntax noprefix
154161
# We could implement this by calling a function that implements the dfsan instrumentation.
155162
# For now, just break, so if someone tries to use this, they'll know where the problem is.
156163
int 3
157164
ret
158165
END_FUNCTION xnn_f32_gemm_minmax_ukernel_1x16c2__asm_amd64_avx512f_broadcast.dfsan
159-
#endif
160166
#endif

src/f32-gemm/gen/f32-gemm-1x32c2-minmax-asm-amd64-avx512f-broadcast.S

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,10 @@
2525
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_1x32c2__asm_amd64_avx512f_broadcast
2626

2727
.intel_syntax noprefix
28-
2928
# Free up GP registers.
29+
# Save register arguments for tail call to msan annotation helper.
30+
push rdi
31+
push rsi
3032
push rbx
3133
push rbp
3234
push r15
@@ -35,14 +37,14 @@ BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_1x32c2__asm_amd64_avx512f_broadcast
3537
push r12
3638

3739
# load params to free up a GP registers
38-
mov r13, [rsp + 80] # params
40+
mov r13, [rsp + 96] # params
3941
vbroadcastss zmm0, DWORD PTR [r13]
4042
vbroadcastss zmm1, DWORD PTR [r13 + 4]
4143

4244
# Load c pointer.
43-
mov r10, [rsp + 56]
45+
mov r10, [rsp + 72]
4446
# Load cm_stride.
45-
mov r11, [rsp + 64]
47+
mov r11, [rsp + 80]
4648

4749
# Align the stack pointer.
4850
mov r13, rsp
@@ -52,7 +54,7 @@ BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_1x32c2__asm_amd64_avx512f_broadcast
5254
mov [rsp], r13
5355

5456
# Allocate some space on the stack.
55-
sub rsp, 64
57+
sub rsp, 128
5658

5759
# Copy k and flip bit.
5860
mov r11, rdx
@@ -157,7 +159,7 @@ tail:
157159
vmovups ZMMWORD PTR [r10 + 64]{k2}, zmm12
158160

159161
return:
160-
add rsp, 64
162+
add rsp, 128
161163
mov r13, [rsp]
162164
mov rsp, r13
163165
# Restore the callee saved registers.
@@ -167,17 +169,21 @@ return:
167169
pop r15
168170
pop rbp
169171
pop rbx
172+
pop rsi
173+
pop rdi
174+
#if XNN_HAS_FEATURE(memory_sanitizer)
175+
jmp xnn_gemm_ukernel_msan_sizeof_c_4
176+
#else
170177
ret
178+
#endif
171179
END_FUNCTION xnn_f32_gemm_minmax_ukernel_1x32c2__asm_amd64_avx512f_broadcast
172180

173-
#ifdef __has_feature
174-
#if __has_feature(dataflow_sanitizer)
181+
#if XNN_HAS_FEATURE(dataflow_sanitizer)
175182
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_1x32c2__asm_amd64_avx512f_broadcast.dfsan
176183
.intel_syntax noprefix
177184
# We could implement this by calling a function that implements the dfsan instrumentation.
178185
# For now, just break, so if someone tries to use this, they'll know where the problem is.
179186
int 3
180187
ret
181188
END_FUNCTION xnn_f32_gemm_minmax_ukernel_1x32c2__asm_amd64_avx512f_broadcast.dfsan
182-
#endif
183189
#endif

src/f32-gemm/gen/f32-gemm-2x16c2-minmax-asm-amd64-avx512f-broadcast.S

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,10 @@
2525
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_2x16c2__asm_amd64_avx512f_broadcast
2626

2727
.intel_syntax noprefix
28-
2928
# Free up GP registers.
29+
# Save register arguments for tail call to msan annotation helper.
30+
push rdi
31+
push rsi
3032
push rbx
3133
push rbp
3234
push r15
@@ -35,14 +37,14 @@ BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_2x16c2__asm_amd64_avx512f_broadcast
3537
push r12
3638

3739
# load params to free up a GP registers
38-
mov r13, [rsp + 80] # params
40+
mov r13, [rsp + 96] # params
3941
vbroadcastss zmm0, DWORD PTR [r13]
4042
vbroadcastss zmm1, DWORD PTR [r13 + 4]
4143

4244
# Load c pointer.
43-
mov r10, [rsp + 56]
45+
mov r10, [rsp + 72]
4446
# Load cm_stride.
45-
mov r11, [rsp + 64]
47+
mov r11, [rsp + 80]
4648

4749
# Align the stack pointer.
4850
mov r13, rsp
@@ -171,17 +173,21 @@ return:
171173
pop r15
172174
pop rbp
173175
pop rbx
176+
pop rsi
177+
pop rdi
178+
#if XNN_HAS_FEATURE(memory_sanitizer)
179+
jmp xnn_gemm_ukernel_msan_sizeof_c_4
180+
#else
174181
ret
182+
#endif
175183
END_FUNCTION xnn_f32_gemm_minmax_ukernel_2x16c2__asm_amd64_avx512f_broadcast
176184

177-
#ifdef __has_feature
178-
#if __has_feature(dataflow_sanitizer)
185+
#if XNN_HAS_FEATURE(dataflow_sanitizer)
179186
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_2x16c2__asm_amd64_avx512f_broadcast.dfsan
180187
.intel_syntax noprefix
181188
# We could implement this by calling a function that implements the dfsan instrumentation.
182189
# For now, just break, so if someone tries to use this, they'll know where the problem is.
183190
int 3
184191
ret
185192
END_FUNCTION xnn_f32_gemm_minmax_ukernel_2x16c2__asm_amd64_avx512f_broadcast.dfsan
186-
#endif
187193
#endif

src/f32-gemm/gen/f32-gemm-2x32c2-minmax-asm-amd64-avx512f-broadcast.S

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,10 @@
2525
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_2x32c2__asm_amd64_avx512f_broadcast
2626

2727
.intel_syntax noprefix
28-
2928
# Free up GP registers.
29+
# Save register arguments for tail call to msan annotation helper.
30+
push rdi
31+
push rsi
3032
push rbx
3133
push rbp
3234
push r15
@@ -35,14 +37,14 @@ BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_2x32c2__asm_amd64_avx512f_broadcast
3537
push r12
3638

3739
# load params to free up a GP registers
38-
mov r13, [rsp + 80] # params
40+
mov r13, [rsp + 96] # params
3941
vbroadcastss zmm0, DWORD PTR [r13]
4042
vbroadcastss zmm1, DWORD PTR [r13 + 4]
4143

4244
# Load c pointer.
43-
mov r10, [rsp + 56]
45+
mov r10, [rsp + 72]
4446
# Load cm_stride.
45-
mov r11, [rsp + 64]
47+
mov r11, [rsp + 80]
4648

4749
# Align the stack pointer.
4850
mov r13, rsp
@@ -209,17 +211,21 @@ return:
209211
pop r15
210212
pop rbp
211213
pop rbx
214+
pop rsi
215+
pop rdi
216+
#if XNN_HAS_FEATURE(memory_sanitizer)
217+
jmp xnn_gemm_ukernel_msan_sizeof_c_4
218+
#else
212219
ret
220+
#endif
213221
END_FUNCTION xnn_f32_gemm_minmax_ukernel_2x32c2__asm_amd64_avx512f_broadcast
214222

215-
#ifdef __has_feature
216-
#if __has_feature(dataflow_sanitizer)
223+
#if XNN_HAS_FEATURE(dataflow_sanitizer)
217224
BEGIN_FUNCTION xnn_f32_gemm_minmax_ukernel_2x32c2__asm_amd64_avx512f_broadcast.dfsan
218225
.intel_syntax noprefix
219226
# We could implement this by calling a function that implements the dfsan instrumentation.
220227
# For now, just break, so if someone tries to use this, they'll know where the problem is.
221228
int 3
222229
ret
223230
END_FUNCTION xnn_f32_gemm_minmax_ukernel_2x32c2__asm_amd64_avx512f_broadcast.dfsan
224-
#endif
225231
#endif

0 commit comments

Comments
 (0)