Merge pull request #1309 from linsword13/update-env-vars

douglasjacobsen · web-flow · commit 2a8f267ed1aa · 2025-10-29T18:16:08.000-06:00
Update to use `environment_variable_names` option for various apps
diff --git a/var/ramble/repos/builtin/applications/nccl-tests/application.py b/var/ramble/repos/builtin/applications/nccl-tests/application.py
@@ -133,16 +133,10 @@ class NcclTests(ExecutableApplication):
         default="",
         description='How NCCL communicators should be split, if at all. "0x7" for rail-aligned, "0x0" for world-level.',
         workloads=all_workloads,
+        environment_variable_name="NCCL_TESTS_SPLIT_MASK",
         expandable=False,
     )
 
-    environment_variable(
-        "NCCL_TESTS_SPLIT_MASK",
-        "{nccl_tests_split_mask}",
-        'How NCCL communicators should be split, if at all. "0x7" for rail-aligned, "0x0" for world-level.',
-        workloads=all_workloads,
-    )
-
     # (output_name, units, group_name, regex)
     regex_parts = [
         ("Size", "B", "size", "[0-9]+"),
diff --git a/var/ramble/repos/builtin/applications/nvidia-hpl/application.py b/var/ramble/repos/builtin/applications/nvidia-hpl/application.py
@@ -52,243 +52,153 @@ class NvidiaHpl(HplBase, NvidiaHPCBase):
 
     workload_variable(
         "hpl_fct_comm_policy",
+        environment_variable_name="HPL_FCT_COMM_POLICY",
         default="1",
         values=["0", "1"],
         description="Which communication library to use in the panel factorization. 0 = NVSHMEM, 1 = Host MPI",
         workload_group="all_workloads",
     )
-    environment_variable(
-        "HPL_FCT_COMM_POLICY",
-        "{hpl_fct_comm_policy}",
-        description="",
-        workload_group="all_workloads",
-    )
 
     workload_variable(
         "hpl_use_nvshmem",
+        environment_variable_name="HPL_USE_NVSHMEM",
         default="0",
         values=["0", "1"],
         description="Whether to use NVSHMEM or not. 0 = Disable, 1 = Enable.",
         workload_group="all_workloads",
     )
-    environment_variable(
-        "HPL_USE_NVSHMEM",
-        "{hpl_use_nvshmem}",
-        description="Whether or not to use NVSHMEM",
-        workload_group="all_workloads",
-    )
 
     workload_variable(
         "hpl_p2p_as_bcast",
+        environment_variable_name="HPL_P2P_AS_BCAST",
         default="0",
         values=["0", "1", "2", "3", "4"],
         description="0 = ncclBcast, 1 = ncclSend/Recv, 2 = CUDA-aware MPI, 3 = host MPI, 4 = NVSHMEM",
         workload_group="all_workloads",
     )
-    environment_variable(
-        "HPL_P2P_AS_BCAST",
-        "{hpl_p2p_as_bcast}",
-        description="Which communication library to use in the final solve step.",
-        workload_group="all_workloads",
-    )
 
     workload_variable(
         "hpl_nvshmem_swap",
+        environment_variable_name="HPL_NVSHMEM_SWAP",
         default="0",
         values=["0", "1"],
         description="Performs row swaps using NVSHMEM instead of NCCL. 0 = Disable, 1 = Enable.",
         workload_group="all_workloads",
     )
-    environment_variable(
-        "HPL_NVSHMEM_SWAP",
-        "{hpl_nvshmem_swap}",
-        description="Performs row swaps using NVSHMEM instead of NCCL. 0 = Disable, 1 = Enable.",
-        workload_group="all_workloads",
-    )
 
     workload_variable(
         "hpl_chunk_size_nbs",
+        environment_variable_name="HPL_CHUNK_SIZE_NBS",
         default="16",
         description="Number of matrix blocks to group for computations. Needs to be > 0",
         workload_group="all_workloads",
     )
-    environment_variable(
-        "HPL_CHUNK_SIZE_NBS",
-        "{hpl_chunk_size_nbs}",
-        description="Number of matrix blocks to group for computations. Needs to be > 0",
-        workload_group="all_workloads",
-    )
 
     workload_variable(
         "hpl_dist_trsm_flag",
+        environment_variable_name="HPL_DIST_TRSM_FLAG",
         default="1",
         values=["0", "1"],
         description="Perform the solve step (TRSM) in parallel, rather than on only the ranks that own part of the matrix.",
         workload_group="all_workloads",
     )
-    environment_variable(
-        "HPL_DIST_TRSM_FLAG",
-        "{hpl_dist_trsm_flag}",
-        description="Perform the solve step (TRSM) in parallel, rather than on only the ranks that own part of the matrix.",
-        workload_group="all_workloads",
-    )
 
     workload_variable(
         "hpl_cta_per_fct",
+        environment_variable_name="HPL_CTA_PER_FCT",
         default="16",
         description="Sets the number of CTAs (thread blocks) for factorization. Needs to be > 0.",
         workload_group="all_workloads",
     )
-    environment_variable(
-        "HPL_CTA_PER_FCT",
-        "{hpl_cta_per_fct}",
-        description="Sets the number of CTAs (thread blocks) for factorization. Needs to be > 0.",
-        workload_group="all_workloads",
-    )
 
     workload_variable(
         "hpl_alloc_hugepages",
+        environment_variable_name="HPL_ALLOC_HUGEPAGES",
         default="0",
         values=["0", "1"],
         description="Use 2MB hugepages for host-side allocations. Done through the madvise syscall.",
         workload_group="all_workloads",
     )
-    environment_variable(
-        "HPL_ALLOC_HUGEPAGES",
-        "{hpl_alloc_hugepages}",
-        description="Use 2MB hugepages for host-side allocations. Done through the madvise syscall.",
-        workload_group="all_workloads",
-    )
 
     workload_variable(
         "warmup_end_prog",
+        environment_variable_name="WARMUP_END_PROG",
         default="5",
         description="Runs the main loop once before the 'real' run. Stops the warmup at x%. Values can be 1 - 100.",
         workload_group="all_workloads",
     )
-    environment_variable(
-        "WARMUP_END_PROG",
-        "{warmup_end_prog}",
-        description="Runs the main loop once before the 'real' run. Stops the warmup at x%. Values can be 1 - 100.",
-        workload_group="all_workloads",
-    )
 
     workload_variable(
         "test_loops",
+        environment_variable_name="TEST_LOOPS",
         default="1",
         description="Runs the main loop X many times",
         workload_group="all_workloads",
     )
-    environment_variable(
-        "TEST_LOOPS",
-        "{test_loops}",
-        description="Runs the main loop X many times",
-        workload_group="all_workloads",
-    )
 
     workload_variable(
         "hpl_cusolver_mp_tests",
+        environment_variable_name="HPL_CUSOLVER_MP_TESTS",
         default="1",
         description="Runs several tests of individual components of HPL (GEMMS, comms, etc.)",
         workload_group="all_workloads",
     )
-    environment_variable(
-        "HPL_CUSOLVER_MP_TESTS",
-        "{hpl_cusolver_mp_tests}",
-        description="Runs several tests of individual components of HPL (GEMMS, comms, etc.)",
-        workload_group="all_workloads",
-    )
 
     workload_variable(
         "hpl_cusolver_mp_tests_gemm_iters",
+        environment_variable_name="HPL_CUSOLVER_MP_TESTS_GEMM_ITERS",
         default="128",
         description="Number of repeat GEMM calls in tests. Needs to be > 0.",
         workload_group="all_workloads",
     )
-    environment_variable(
-        "HPL_CUSOLVER_MP_TESTS_GEMM_ITERS",
-        "{hpl_cusolver_mp_tests_gemm_iters}",
-        description="Number of repeat GEMM calls in tests. Needs to be > 0.",
-        workload_group="all_workloads",
-    )
 
     workload_variable(
         "hpl_ooc_mode",
+        environment_variable_name="HPL_OOC_MODE",
         default="0",
         description="Enables / disales out-of-core mode",
         workload_group="all_workloads",
     )
-    environment_variable(
-        "HPL_OOC_MODE",
-        "{hpl_ooc_mode}",
-        description="Enables / disales out-of-core mode",
-        workload_group="all_workloads",
-    )
 
     workload_variable(
         "hpl_ooc_max_gpu_mem",
+        environment_variable_name="HPL_OOC_MAX_GPU_MEM",
         default="-1",
         description="Limits the amount of GPU memory used for OOC. In GiB. Needs to be >= -1.",
         workload_group="all_workloads",
     )
-    environment_variable(
-        "HPL_OOC_MAX_GPU_MEM",
-        "{hpl_ooc_max_gpu_mem}",
-        description="Limits the amount of GPU memory used for OOC. In GiB. Needs to be >= -1.",
-        workload_group="all_workloads",
-    )
 
     workload_variable(
         "hpl_ooc_tile_m",
+        environment_variable_name="HPL_OOC_TILE_M",
         default="4096",
         description="Row blocking factor. Needs to be > 0",
         workload_group="all_workloads",
     )
-    environment_variable(
-        "HPL_OOC_TILE_M",
-        "{hpl_ooc_tile_m}",
-        description="Row blocking factor. Needs to be > 0",
-        workload_group="all_workloads",
-    )
 
     workload_variable(
         "hpl_ooc_tile_n",
+        environment_variable_name="HPL_OOC_TILE_N",
         default="4096",
         description="Column blocking factor. Needs to be > 0",
         workload_group="all_workloads",
     )
-    environment_variable(
-        "HPL_OOC_TILE_N",
-        "{hpl_ooc_tile_n}",
-        description="Column blocking factor. Needs to be > 0",
-        workload_group="all_workloads",
-    )
 
     workload_variable(
         "hpl_ooc_num_streams",
+        environment_variable_name="HPL_OOC_NUM_STREAMS",
         default="3",
         description="Number of streams used for OCC operations",
         workload_group="all_workloads",
     )
-    environment_variable(
-        "HPL_OOC_NUM_STREAMS",
-        "{hpl_ooc_num_streams}",
-        description="Number of streams used for OCC operations",
-        workload_group="all_workloads",
-    )
 
     workload_variable(
         "hpl_ooc_safe_size",
+        environment_variable_name="HPL_OOC_SAFE_SIZE",
         default="2.0",
         description="GPU memory (in GiB) needed for driver. This amount will not be used by HPL OCC",
         workload_group="all_workloads",
     )
-    environment_variable(
-        "HPL_OOC_SAFE_SIZE",
-        "{hpl_ooc_safe_size}",
-        description="GPU memory (in GiB) needed for driver. This amount will not be used by HPL OCC",
-        workload_group="all_workloads",
-    )
 
     workload_variable(
         "block_size",
diff --git a/var/ramble/repos/builtin/applications/py-nemo-2/application.py b/var/ramble/repos/builtin/applications/py-nemo-2/application.py
@@ -77,16 +77,11 @@ class PyNemo2(BasePyNemo):
 
     workload_variable(
         "results_mount",
+        environment_variable_name="NEMO_CONTAINER_MOUNTS",
         default="{experiment_run_dir}:{experiment_run_dir}",
         description="Container mount for results data",
         workload_group="pretraining",
     )
-    environment_variable(
-        "NEMO_CONTAINER_MOUNTS",
-        value="{results_mount}",
-        description="All container mounts in an environment variable",
-        workload_group="pretraining",
-    )
     workload_variable(
         "container_mounts",
         default="{results_mount}",
diff --git a/var/ramble/repos/builtin/applications/wrfv4/application.py b/var/ramble/repos/builtin/applications/wrfv4/application.py
@@ -197,15 +197,9 @@ class Wrfv4(ExecutableApplication):
     )
 
     with when("+wrf_tiles"):
-        environment_variable(
-            "NUM_WRF_TILES",
-            value="{num_tiles}",
-            description="Number of tiles to use in WRF domain",
-            workload_group="all_workloads",
-        )
-
         workload_variable(
             "num_tiles",
+            environment_variable_name="NUM_WRF_TILES",
             default="1",
             description="Number of tiles to use in WRF domain",
             workload_group="all_workloads",
diff --git a/var/ramble/repos/builtin/base_applications/nvidia-hpc-benchmarks/base_application.py b/var/ramble/repos/builtin/base_applications/nvidia-hpc-benchmarks/base_application.py
@@ -50,78 +50,48 @@ class NvidiaHpcBenchmarks(ExecutableApplication):
 
     workload_variable(
         "nvshmem_disable_cuda_vmm",
+        environment_variable_name="NVSHMEM_DISABLE_CUDA_VMM",
         default="1",
         description="",
         workload_group="all_workloads",
     )
-    environment_variable(
-        "NVSHMEM_DISABLE_CUDA_VMM",
-        "{nvshmem_disable_cuda_vmm}",
-        description="",
-        workload_group="all_workloads",
-    )
 
     workload_variable(
         "pmix_mca_gds",
+        environment_variable_name="PMIX_MCA_gds",
         default="^ds12",
-        description="",
-        workload_group="all_workloads",
-    )
-    environment_variable(
-        "PMIX_MCA_gds",
-        "{pmix_mca_gds}",
         description="PMIX MCA gds",
         workload_group="all_workloads",
     )
 
     workload_variable(
         "ompi_mca_btl",
+        environment_variable_name="OMPI_MCA_btl",
         default="^vader,tcp,openib,uct",
-        description="",
-        workload_group="all_workloads",
-    )
-    environment_variable(
-        "OMPI_MCA_btl",
-        "{ompi_mca_btl}",
         description="OpenMPI MCA btl",
         workload_group="all_workloads",
     )
 
     workload_variable(
         "ompi_mca_pml",
+        environment_variable_name="OMPI_MCA_pml",
         default="ucx",
-        description="",
-        workload_group="all_workloads",
-    )
-    environment_variable(
-        "OMPI_MCA_pml",
-        "{ompi_mca_pml}",
         description="OpenMPI MCA pml",
         workload_group="all_workloads",
     )
 
     workload_variable(
         "ucx_net_devices",
+        environment_variable_name="UCX_NET_DEVICES",
         default="enp6s0,enp12s0,enp134s0,enp140s0",
-        description="",
-        workload_group="all_workloads",
-    )
-    environment_variable(
-        "UCX_NET_DEVICES",
-        "{ucx_net_devices}",
         description="UCX Net Devices",
         workload_group="all_workloads",
     )
 
     workload_variable(
         "ucx_max_rndv_rails",
+        environment_variable_name="UCX_MAX_RNDV_RAILS",
         default="4",
-        description="",
-        workload_group="all_workloads",
-    )
-    environment_variable(
-        "UCX_MAX_RNDV_RAILS",
-        "{ucx_max_rndv_rails}",
         description="UCX MAximum RNDV Rails",
         workload_group="all_workloads",
     )
diff --git a/var/ramble/repos/builtin/base_applications/py-nemo/base_application.py b/var/ramble/repos/builtin/base_applications/py-nemo/base_application.py