Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,8 @@ To go through the process, refer to the [guidance](collector/README.md) under th
|--------|-------------------|--------|
| h100_sxm | TRTLLM(0.20.0, 1.0.0rc3) | ✅ |
| h200_sxm | TRTLLM(0.20.0, 1.0.0rc3) | ✅ |
| h20_3e | TRTLLM(1.0.0) | ✅ |
| A100 | TRTLLM(1.0.0) | ✅ |
| b200_sxm | TRTLLM(1.0.0rc6) | ✅ |
| gb200_sxm | TRTLLM(1.0.0rc6) | ✅ |

Expand Down
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
30 changes: 30 additions & 0 deletions src/aiconfigurator/systems/h20_3e.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

data_dir: data/h20_3e # relative to systems_dir
gpu:
mem_bw: 4917000000000 # 4917GB/s
mem_bw_empirical_scaling_factor: 0.8 # some nonofficial correction based on observations, you should try to modify based on your own observations
mem_empirical_constant_latency: 0.000003 # 3us some nonofficial correction based on observations, you should try to modify based on your own observations
mem_capacity: 151397597184 # 141GiB
float16_tc_flops: 148000000000000 # 148TFLOPS
int8_tc_flops: 296000000000000 # 296TFLOPS
fp8_tc_flops: 296000000000000 # 296TFLOPS
power: 500 # Watt
sm_version: 90

node:
num_gpus_per_node: 8
inter_node_bw: 25000000000 # Byte/s per GPU, single direction, 1:1 CX7 per node
intra_node_bw: 450000000000 # Byte/s per gpu, single direction
pcie_bw: 64000000000 # Byte/s, single direction, pcie 5.0
p2p_latency: 0.00001 # 10us some nonofficial correction based on observations, you should try to modify based on your own observations

misc:
nccl_mem: # some nonofficial correction based on observations, you should try to modify based on your own observations
1: 0
2: 358612992 # 342MB
4: 411041792 # 392MB
8: 411041792 # 392MB
other_mem: 3758096384 # increase from 551MB to 3.5GB for safer deployment, this will cover part of the inaccurate mem calc.
nccl_version: '2.27.3'