Skip to content

Commit dbb8f97

Browse files
authored
support mtp; update readme (#11)
1 parent f06200c commit dbb8f97

File tree

8 files changed

+88
-11
lines changed

8 files changed

+88
-11
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,5 @@ logs
99
local/
1010
.gitmodules
1111
output
12-
experiment
12+
experiment
13+
data

README.md

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,57 @@
11
# Primus
22

3-
```shell
4-
# install pre-commit
5-
pip install pre-commit
6-
# the first time you download the repo, it will be cached for future use
7-
cd path_to_primus && pre-commit install
3+
## Overview
4+
Primus is a training framework that supports different training and inference backends. It is designed for pretraining, posttraining, and reinforcement learning tasks.
5+
6+
## Setup Environment
7+
Use the following command to create a container:
8+
```bash
9+
# pull the public docker image
10+
docker pull rocm/megatron-lm:latest
11+
12+
# create a container
13+
docker run -d \
14+
--name=dev_username \
15+
--network=host\
16+
--ipc=host \
17+
--device /dev/dri \
18+
--device /dev/kfd \
19+
--group-add video \
20+
--cap-add=SYS_PTRACE \
21+
--security-opt seccomp=unconfined \
22+
--shm-size=64G \
23+
rocm/megatron-lm:latest sleep infinity
24+
25+
# get into the container
26+
docker exec -it dev_username bash
827
```
28+
29+
30+
Use the following command to clone the repo:
31+
- [ ] Set Megatron-LM as a submodule repo
32+
```bash
33+
mkdir workspace && cd workspace
34+
git clone [email protected]:AMD-AIG-AIMA/Primus.git
35+
git clone [email protected]:NVIDIA/Megatron-LM.git
36+
# version 20250324
37+
cd Megatron-LM && git checkout d61821b7174bac690afbad9134bcb4983521052f
38+
```
39+
40+
## Setup Primus
41+
```bash
42+
cd workspace/Primus
43+
# Install the required dependencies using:
44+
pip install -r requirements.txt
45+
# setup the pre-commit for your repo
46+
pre-commit install
47+
```
48+
49+
## Examples
50+
```bash
51+
cd workspace/Primus
52+
# deepseek pretrain (default use deepseek_v2_lite model)
53+
./examples/deepseek/run_pretrain.sh
54+
```
55+
56+
57+

examples/deepseek/run_pretrain.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ echo "PRIMUS_PATH: $PRIMUS_PATH"
1616
echo "MEGATRON_PATH: $MEGATRON_PATH"
1717

1818
# check megatron path
19-
[[ -z "${MEGATRON_PATH}" ]] && {
20-
echo "MEGATRON_PATH path is not set"
19+
[[ ! -d "${MEGATRON_PATH}" ]] && {
20+
echo "Error: MEGATRON_PATH (${MEGATRON_PATH}) does not exist"
2121
exit 1
2222
}
2323

@@ -42,8 +42,8 @@ export NCCL_IB_HCA=rdma0:1,rdma1:1,rdma2:1,rdma3:1,rdma4:1,rdma5:1,rdma6:1,rdma7
4242
export NCCL_IB_GID_INDEX=3
4343
export NCCL_CROSS_NIC=0
4444
export HSA_ENABLE_SDMA=0
45-
export NCCL_SOCKET_IFNAME=${NCCL_SOCKET_IFNAME:-eth0}
46-
export GLOO_SOCKET_IFNAME=${GLOO_SOCKET_IFNAME:-eth0}
45+
export NCCL_SOCKET_IFNAME=${NCCL_SOCKET_IFNAME:-ens51f0}
46+
export GLOO_SOCKET_IFNAME=${GLOO_SOCKET_IFNAME:-ens51f0}
4747
export CUDA_DEVICE_MAX_CONNECTIONS=1 # Reducing to 1 ensures no PCIE traffic (even on single node)
4848
export NCCL_PROTO=Simple
4949
export RCCL_MSCCL_ENABLE=0

primus/configs/models/megatron/deepseek_v3.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ qk_head_dim: 128
2020
qk_pos_emb_head_dim: 64
2121
v_head_dim: 128
2222
kv_channels: 128
23+
# mtp
24+
mtp_num_layers: 1
25+
mtp_loss_scaling_factor: 0.1
2326
# moe
2427
moe_layer_freq: 3
2528
num_experts: 256

primus/configs/models/megatron/deepseek_v3_base.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ multi_latent_attention: true
99
# multi_latent_attention does not support apply_rope_fusion
1010
apply_rope_fusion: false
1111

12+
# mtp
13+
mtp_num_layers: null # num_nextn_predict_layers
14+
mtp_loss_scaling_factor: 0.1
15+
1216
# moe
1317
moe_layer_freq: 1
1418
moe_router_topk: 6

primus/configs/models/megatron/language_model.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ rotary_scaling_factor: 1.0 # float
8888
mscale: 1.0 # float
8989
mscale_all_dim: 1.0 # float
9090

91+
# MTP
92+
mtp_num_layers: null # int
93+
mtp_loss_scaling_factor: 0.1 # float
94+
9195
# MoE related
9296
num_experts: null
9397
moe_layer_freq: 1 # int

primus/configs/modules/megatron/trainer_base.yaml

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,10 +325,24 @@ log_straggler: false
325325
disable_straggler_on_startup: false
326326
straggler_ctrlr_port: 65535
327327
straggler_minmax_count: 1
328+
# inference
328329
inference_batch_times_seqlen_threshold: -1
330+
inference_dynamic_batching: false
331+
inference_dynamic_batching_buffer_size_gb: 40.0 # float
332+
inference_dynamic_batching_buffer_guaranteed_fraction: 0.2 # float
333+
inference_dynamic_batching_buffer_overflow_factor: null # float
334+
inference_dynamic_batching_max_requests_override: null # int
335+
inference_dynamic_batching_max_tokens_override: null # int
329336
max_tokens_to_oom: 12000
330337
output_bert_embeddings: false
331-
bert_embedder_type: megatron
338+
bert_embedder_type: megatron # "megatron", "huggingface"
339+
flash_decode: false
340+
enable_cuda_graph: false
341+
cuda_graph_warmup_steps: 3 # int
342+
external_cuda_graph: false
343+
cuda_graph_scope: full # full, attn
344+
inference_max_requests: 8 # int
345+
inference_max_seq_length: 2560 # int, (prefill + decode)
332346

333347
create_attention_mask_in_dataloader: true
334348
num_dataset_builder_threads: 1
@@ -354,3 +368,4 @@ parallel_output: false
354368
enable_ft_package: false
355369
calc_ft_timeouts: false
356370
run_workload_inspector_server: false
371+
is_hybrid_model: false

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
loguru
22
wandb
3+
pre-commit

0 commit comments

Comments
 (0)