-
Notifications
You must be signed in to change notification settings - Fork 113
Feature/prefetch2 #1604
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Feature/prefetch2 #1604
Changes from 94 commits
63b7ff4
191105b
5b41229
a2efb44
c815076
177c18b
eae953d
2540a1b
e686437
676c643
9c2025b
721fbd5
02a4cb9
33b5f2f
ccf7a55
0642f63
72a001f
02e7bc3
7bb5cdc
f42a507
e2df25f
3010aa6
acfaf5b
67f8ce4
946bed0
d772d5f
60894ec
9910869
b9a4d5f
23992e0
f0f9afd
cfaa705
17d349c
a5abce8
c265884
aee623d
6cfc18a
168f097
f11bd84
a2a9b24
7d17452
27b725d
2e12a2c
b67b9fb
abed9ac
50cc09a
4c9fa83
9daba3f
8427323
daa5a4f
4b0600a
bbd8ac6
1ed2db1
9de5021
30ae502
79934bb
573d0be
04b4fae
0cf1286
aaa629d
5653947
c5cd669
20a70e4
74dd488
b2e6e88
9b5545f
d7568e6
c92f3cd
35da04f
6041ec6
982f41b
60a746b
4918c98
af2be33
02baeaa
4b8352c
13a192b
274cbad
89e8886
866a389
63b97b9
bcfaa50
b95f9b4
1b73643
510b0a2
55ee7cc
9d21752
8d04ac1
ca2a85a
0bc3ad3
44b9000
96a3912
6360e16
48e870b
051dd43
16a787c
32fd0c3
9fb3260
cc6e837
3229363
35e734a
a5055cc
e38501a
a8d4a0a
73f46af
37cfc7b
e223bfa
ea36ced
3b25ff5
9b83fde
709b7f9
305884e
06413d0
dd77fc0
a265269
f92570e
3ada421
2125574
951a3ee
3c8ed1a
8c7ba4d
a510234
d460006
b0f2a86
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -153,6 +153,26 @@ option(QUDA_DIRAC_COVDEV "build code for covariant derivative" ${QUDA_DIRAC_DEFA | |
| option(QUDA_DIRAC_DISTANCE_PRECONDITIONING "build code for distance preconditioned Wilson/clover Dirac operators" OFF) | ||
| set(QUDA_DOMAIN_DECOMPOSITION "0" CACHE STRING "which domain decomposition to instantiate in QUDA (1-bit number - RedBlack)") | ||
|
|
||
| option(QUDA_DSLASH_DOUBLE_STORE "store a forwards shifted copy of the gauge fields for simplified Dslash indexing" OFF) | ||
| mark_as_advanced(QUDA_DSLASH_DOUBLE_STORE) | ||
| set(QUDA_DSLASH_PREFETCH_TMA "0" CACHE STRING "enable TMA prefetching (Hopper+, 0 - disable, 1 - bulk, 2 - tensor)") | ||
| set_property(CACHE QUDA_DSLASH_PREFETCH_TMA PROPERTY STRINGS 0 1 2) | ||
| mark_as_advanced(QUDA_DSLASH_PREFETCH_TMA) | ||
| if(QUDA_DSLASH_PREFETCH_TMA GREATER 0 AND NOT QUDA_DSLASH_DOUBLE_STORE) | ||
| message(SEND_ERROR "QUDA_DSLASH_PREFETCH_TMA cannot be enabled without QUDA_DSLASH_DOUBLE_STORE") | ||
| endif() | ||
|
|
||
| set(QUDA_DSLASH_PREFETCH_DISTANCE_WILSON "0" CACHE STRING "set prefetch distance for Wilson-like fermions") | ||
| set(QUDA_DSLASH_PREFETCH_DISTANCE_STAGGERED "0" CACHE STRING "set prefetch distance for staggered-like fermions") | ||
| mark_as_advanced(QUDA_DSLASH_PREFETCH_DISTANCE_WILSON) | ||
| mark_as_advanced(QUDA_DSLASH_PREFETCH_DISTANCE_STAGGERED) | ||
| if(QUDA_DSLASH_PREFETCH_DISTANCE_WILSON GREATER 7) | ||
| message(SEND_ERROR "QUDA_DSLASH_PREFETCH_DISTANCE_WILSON is greater than pipeline length") | ||
| endif() | ||
| if(QUDA_DSLASH_PREFETCH_DISTANCE_STAGGERED GREATER 15) | ||
|
||
| message(SEND_ERROR "QUDA_DSLASH_PREFETCH_DISTANCE_STAGGERED is greater than pipeline length") | ||
| endif() | ||
|
|
||
| option(QUDA_QIO "build QIO code for binary I/O" OFF) | ||
|
|
||
| # Multi-GPU options | ||
|
|
@@ -239,7 +259,7 @@ option(QUDA_CTEST_SEP_DSLASH_POLICIES "Test Dslash policies separately in ctest | |
| option(QUDA_CTEST_DISABLE_BENCHMARKS "Disable benchmark test" ON) | ||
|
|
||
| option(QUDA_FAST_COMPILE_REDUCE "enable fast compilation in blas and reduction kernels (single warp per reduction)" OFF) | ||
| option(QUDA_FAST_COMPILE_DSLASH "enable fast compilation in dslash kernels (~20% perf impact)" OFF) | ||
| option(QUDA_FAST_COMPILE_DSLASH "enable fast compilation in coarse grid dslash kernels (significant perf impact)" OFF) | ||
|
|
||
| option(QUDA_OPENMP "enable OpenMP" OFF) | ||
| set(QUDA_CXX_STANDARD | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sticking a pin in our offline discussion about replacing numbers with string descriptors (iirc)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
e223bfa