@@ -52,243 +52,153 @@ class NvidiaHpl(HplBase, NvidiaHPCBase):
5252
5353 workload_variable (
5454 "hpl_fct_comm_policy" ,
55+ environment_variable_name = "HPL_FCT_COMM_POLICY" ,
5556 default = "1" ,
5657 values = ["0" , "1" ],
5758 description = "Which communication library to use in the panel factorization. 0 = NVSHMEM, 1 = Host MPI" ,
5859 workload_group = "all_workloads" ,
5960 )
60- environment_variable (
61- "HPL_FCT_COMM_POLICY" ,
62- "{hpl_fct_comm_policy}" ,
63- description = "" ,
64- workload_group = "all_workloads" ,
65- )
6661
6762 workload_variable (
6863 "hpl_use_nvshmem" ,
64+ environment_variable_name = "HPL_USE_NVSHMEM" ,
6965 default = "0" ,
7066 values = ["0" , "1" ],
7167 description = "Whether to use NVSHMEM or not. 0 = Disable, 1 = Enable." ,
7268 workload_group = "all_workloads" ,
7369 )
74- environment_variable (
75- "HPL_USE_NVSHMEM" ,
76- "{hpl_use_nvshmem}" ,
77- description = "Whether or not to use NVSHMEM" ,
78- workload_group = "all_workloads" ,
79- )
8070
8171 workload_variable (
8272 "hpl_p2p_as_bcast" ,
73+ environment_variable_name = "HPL_P2P_AS_BCAST" ,
8374 default = "0" ,
8475 values = ["0" , "1" , "2" , "3" , "4" ],
8576 description = "0 = ncclBcast, 1 = ncclSend/Recv, 2 = CUDA-aware MPI, 3 = host MPI, 4 = NVSHMEM" ,
8677 workload_group = "all_workloads" ,
8778 )
88- environment_variable (
89- "HPL_P2P_AS_BCAST" ,
90- "{hpl_p2p_as_bcast}" ,
91- description = "Which communication library to use in the final solve step." ,
92- workload_group = "all_workloads" ,
93- )
9479
9580 workload_variable (
9681 "hpl_nvshmem_swap" ,
82+ environment_variable_name = "HPL_NVSHMEM_SWAP" ,
9783 default = "0" ,
9884 values = ["0" , "1" ],
9985 description = "Performs row swaps using NVSHMEM instead of NCCL. 0 = Disable, 1 = Enable." ,
10086 workload_group = "all_workloads" ,
10187 )
102- environment_variable (
103- "HPL_NVSHMEM_SWAP" ,
104- "{hpl_nvshmem_swap}" ,
105- description = "Performs row swaps using NVSHMEM instead of NCCL. 0 = Disable, 1 = Enable." ,
106- workload_group = "all_workloads" ,
107- )
10888
10989 workload_variable (
11090 "hpl_chunk_size_nbs" ,
91+ environment_variable_name = "HPL_CHUNK_SIZE_NBS" ,
11192 default = "16" ,
11293 description = "Number of matrix blocks to group for computations. Needs to be > 0" ,
11394 workload_group = "all_workloads" ,
11495 )
115- environment_variable (
116- "HPL_CHUNK_SIZE_NBS" ,
117- "{hpl_chunk_size_nbs}" ,
118- description = "Number of matrix blocks to group for computations. Needs to be > 0" ,
119- workload_group = "all_workloads" ,
120- )
12196
12297 workload_variable (
12398 "hpl_dist_trsm_flag" ,
99+ environment_variable_name = "HPL_DIST_TRSM_FLAG" ,
124100 default = "1" ,
125101 values = ["0" , "1" ],
126102 description = "Perform the solve step (TRSM) in parallel, rather than on only the ranks that own part of the matrix." ,
127103 workload_group = "all_workloads" ,
128104 )
129- environment_variable (
130- "HPL_DIST_TRSM_FLAG" ,
131- "{hpl_dist_trsm_flag}" ,
132- description = "Perform the solve step (TRSM) in parallel, rather than on only the ranks that own part of the matrix." ,
133- workload_group = "all_workloads" ,
134- )
135105
136106 workload_variable (
137107 "hpl_cta_per_fct" ,
108+ environment_variable_name = "HPL_CTA_PER_FCT" ,
138109 default = "16" ,
139110 description = "Sets the number of CTAs (thread blocks) for factorization. Needs to be > 0." ,
140111 workload_group = "all_workloads" ,
141112 )
142- environment_variable (
143- "HPL_CTA_PER_FCT" ,
144- "{hpl_cta_per_fct}" ,
145- description = "Sets the number of CTAs (thread blocks) for factorization. Needs to be > 0." ,
146- workload_group = "all_workloads" ,
147- )
148113
149114 workload_variable (
150115 "hpl_alloc_hugepages" ,
116+ environment_variable_name = "HPL_ALLOC_HUGEPAGES" ,
151117 default = "0" ,
152118 values = ["0" , "1" ],
153119 description = "Use 2MB hugepages for host-side allocations. Done through the madvise syscall." ,
154120 workload_group = "all_workloads" ,
155121 )
156- environment_variable (
157- "HPL_ALLOC_HUGEPAGES" ,
158- "{hpl_alloc_hugepages}" ,
159- description = "Use 2MB hugepages for host-side allocations. Done through the madvise syscall." ,
160- workload_group = "all_workloads" ,
161- )
162122
163123 workload_variable (
164124 "warmup_end_prog" ,
125+ environment_variable_name = "WARMUP_END_PROG" ,
165126 default = "5" ,
166127 description = "Runs the main loop once before the 'real' run. Stops the warmup at x%. Values can be 1 - 100." ,
167128 workload_group = "all_workloads" ,
168129 )
169- environment_variable (
170- "WARMUP_END_PROG" ,
171- "{warmup_end_prog}" ,
172- description = "Runs the main loop once before the 'real' run. Stops the warmup at x%. Values can be 1 - 100." ,
173- workload_group = "all_workloads" ,
174- )
175130
176131 workload_variable (
177132 "test_loops" ,
133+ environment_variable_name = "TEST_LOOPS" ,
178134 default = "1" ,
179135 description = "Runs the main loop X many times" ,
180136 workload_group = "all_workloads" ,
181137 )
182- environment_variable (
183- "TEST_LOOPS" ,
184- "{test_loops}" ,
185- description = "Runs the main loop X many times" ,
186- workload_group = "all_workloads" ,
187- )
188138
189139 workload_variable (
190140 "hpl_cusolver_mp_tests" ,
141+ environment_variable_name = "HPL_CUSOLVER_MP_TESTS" ,
191142 default = "1" ,
192143 description = "Runs several tests of individual components of HPL (GEMMS, comms, etc.)" ,
193144 workload_group = "all_workloads" ,
194145 )
195- environment_variable (
196- "HPL_CUSOLVER_MP_TESTS" ,
197- "{hpl_cusolver_mp_tests}" ,
198- description = "Runs several tests of individual components of HPL (GEMMS, comms, etc.)" ,
199- workload_group = "all_workloads" ,
200- )
201146
202147 workload_variable (
203148 "hpl_cusolver_mp_tests_gemm_iters" ,
149+ environment_variable_name = "HPL_CUSOLVER_MP_TESTS_GEMM_ITERS" ,
204150 default = "128" ,
205151 description = "Number of repeat GEMM calls in tests. Needs to be > 0." ,
206152 workload_group = "all_workloads" ,
207153 )
208- environment_variable (
209- "HPL_CUSOLVER_MP_TESTS_GEMM_ITERS" ,
210- "{hpl_cusolver_mp_tests_gemm_iters}" ,
211- description = "Number of repeat GEMM calls in tests. Needs to be > 0." ,
212- workload_group = "all_workloads" ,
213- )
214154
215155 workload_variable (
216156 "hpl_ooc_mode" ,
157+ environment_variable_name = "HPL_OOC_MODE" ,
217158 default = "0" ,
218159 description = "Enables / disales out-of-core mode" ,
219160 workload_group = "all_workloads" ,
220161 )
221- environment_variable (
222- "HPL_OOC_MODE" ,
223- "{hpl_ooc_mode}" ,
224- description = "Enables / disales out-of-core mode" ,
225- workload_group = "all_workloads" ,
226- )
227162
228163 workload_variable (
229164 "hpl_ooc_max_gpu_mem" ,
165+ environment_variable_name = "HPL_OOC_MAX_GPU_MEM" ,
230166 default = "-1" ,
231167 description = "Limits the amount of GPU memory used for OOC. In GiB. Needs to be >= -1." ,
232168 workload_group = "all_workloads" ,
233169 )
234- environment_variable (
235- "HPL_OOC_MAX_GPU_MEM" ,
236- "{hpl_ooc_max_gpu_mem}" ,
237- description = "Limits the amount of GPU memory used for OOC. In GiB. Needs to be >= -1." ,
238- workload_group = "all_workloads" ,
239- )
240170
241171 workload_variable (
242172 "hpl_ooc_tile_m" ,
173+ environment_variable_name = "HPL_OOC_TILE_M" ,
243174 default = "4096" ,
244175 description = "Row blocking factor. Needs to be > 0" ,
245176 workload_group = "all_workloads" ,
246177 )
247- environment_variable (
248- "HPL_OOC_TILE_M" ,
249- "{hpl_ooc_tile_m}" ,
250- description = "Row blocking factor. Needs to be > 0" ,
251- workload_group = "all_workloads" ,
252- )
253178
254179 workload_variable (
255180 "hpl_ooc_tile_n" ,
181+ environment_variable_name = "HPL_OOC_TILE_N" ,
256182 default = "4096" ,
257183 description = "Column blocking factor. Needs to be > 0" ,
258184 workload_group = "all_workloads" ,
259185 )
260- environment_variable (
261- "HPL_OOC_TILE_N" ,
262- "{hpl_ooc_tile_n}" ,
263- description = "Column blocking factor. Needs to be > 0" ,
264- workload_group = "all_workloads" ,
265- )
266186
267187 workload_variable (
268188 "hpl_ooc_num_streams" ,
189+ environment_variable_name = "HPL_OOC_NUM_STREAMS" ,
269190 default = "3" ,
270191 description = "Number of streams used for OCC operations" ,
271192 workload_group = "all_workloads" ,
272193 )
273- environment_variable (
274- "HPL_OOC_NUM_STREAMS" ,
275- "{hpl_ooc_num_streams}" ,
276- description = "Number of streams used for OCC operations" ,
277- workload_group = "all_workloads" ,
278- )
279194
280195 workload_variable (
281196 "hpl_ooc_safe_size" ,
197+ environment_variable_name = "HPL_OOC_SAFE_SIZE" ,
282198 default = "2.0" ,
283199 description = "GPU memory (in GiB) needed for driver. This amount will not be used by HPL OCC" ,
284200 workload_group = "all_workloads" ,
285201 )
286- environment_variable (
287- "HPL_OOC_SAFE_SIZE" ,
288- "{hpl_ooc_safe_size}" ,
289- description = "GPU memory (in GiB) needed for driver. This amount will not be used by HPL OCC" ,
290- workload_group = "all_workloads" ,
291- )
292202
293203 workload_variable (
294204 "block_size" ,
0 commit comments