Skip to content

Commit b12ad88

Browse files
authored
merge trace file (#31)
1 parent 947e52f commit b12ad88

File tree

12 files changed

+109
-4
lines changed

12 files changed

+109
-4
lines changed

examples/megatron/run_pretrain.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ elif [ "$PRIMUS_HIPBLASLT_TUNING_STAGE" -eq 2 ]; then
189189
exit 1
190190
fi
191191

192-
bash "${PRIMUS_PATH}"/examples/scripts/docker_podman_proxy.sh run --rm \
192+
bash "${PRIMUS_PATH}"/tools/docker/docker_podman_proxy.sh run --rm \
193193
--env HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES \
194194
--env GPU_MAX_HW_QUEUES=$GPU_MAX_HW_QUEUES \
195195
--env TORCH_NCCL_HIGH_PRIORITY=$TORCH_NCCL_HIGH_PRIORITY \
@@ -299,7 +299,7 @@ elif [ "$RUN_ENV" = "slurm" ]; then
299299
"
300300
fi
301301

302-
bash "${PRIMUS_PATH}"/examples/scripts/docker_podman_proxy.sh run --rm \
302+
bash "${PRIMUS_PATH}"/tools/docker/docker_podman_proxy.sh run --rm \
303303
--env SLURM_MASTER_ADDR=$SLURM_MASTER_ADDR \
304304
--env SLURM_MASTER_PORT=$SLURM_MASTER_PORT \
305305
--env SLURM_PROCID=$SLURM_PROCID \

examples/megatron/run_slurm_pretrain.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
export RUN_ENV=slurm
1010
export MODEL_CONFIG=deepseek_v2_lite
1111

12-
export PRIMUS_HIPBLASLT_TUNING_STAGE=${PRIMUS_HIPBLASLT_TUNING_STAGE:-3}
13-
export NUM_NODES=${NUM_NODES:-8}
12+
export PRIMUS_HIPBLASLT_TUNING_STAGE=${PRIMUS_HIPBLASLT_TUNING_STAGE:-0}
13+
export NUM_NODES=${NUM_NODES:-1}
1414

1515
SCRIPT_DIR=$(dirname "$(realpath "${BASH_SOURCE[0]}")")
1616

File renamed without changes.

tools/docker/start_container.sh

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/bin/bash
2+
###############################################################################
3+
# Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
4+
#
5+
# See LICENSE for license information.
6+
#################################################################################
7+
8+
PRIMUS_PATH=$(realpath "$(dirname "$0")/../..")
9+
export DOCKER_IMAGE="docker.io/rocm/megatron-lm:latest"
10+
11+
bash "${PRIMUS_PATH}"/tools/docker/docker_podman_proxy.sh run -d \
12+
--name dev_primus \
13+
--ipc=host \
14+
--network=host \
15+
--device=/dev/kfd \
16+
--device=/dev/dri \
17+
--device=/dev/infiniband \
18+
--cap-add=SYS_PTRACE \
19+
--cap-add=CAP_SYS_ADMIN \
20+
--security-opt seccomp=unconfined \
21+
--group-add video \
22+
--privileged \
23+
-v "${PRIMUS_PATH}:${PRIMUS_PATH}" \
24+
$DOCKER_IMAGE sleep infinity

tools/preflight/global_vars.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
###############################################################################
2+
# Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
3+
#
4+
# See LICENSE for license information.
5+
#################################################################################
6+
17
import os
28

39
WORLD_SIZE = int(os.environ.get("WORLD_SIZE", 1))

tools/preflight/inter_node_comm.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
###############################################################################
2+
# Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
3+
#
4+
# See LICENSE for license information.
5+
#################################################################################
6+
17
import time
28

39
import matplotlib.pyplot as plt

tools/preflight/inter_node_comm_p2p.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
###############################################################################
2+
# Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
3+
#
4+
# See LICENSE for license information.
5+
#################################################################################
6+
17
import time
28

39
import matplotlib.pyplot as plt

tools/preflight/intra_node_comm.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
###############################################################################
2+
# Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
3+
#
4+
# See LICENSE for license information.
5+
#################################################################################
6+
17
import time
28

39
import matplotlib.pyplot as plt

tools/preflight/preflight_perf_test.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
###############################################################################
2+
# Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
3+
#
4+
# See LICENSE for license information.
5+
#################################################################################
6+
17
import argparse
28
import os
39

tools/preflight/square_gemm.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
###############################################################################
2+
# Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
3+
#
4+
# See LICENSE for license information.
5+
#################################################################################
6+
17
import time
28

39
import matplotlib.pyplot as plt

0 commit comments

Comments
 (0)