Skip to content

Commit b770c5b

Browse files
committed
minor fix
1 parent 5da0471 commit b770c5b

File tree

3 files changed

+4
-12
lines changed

3 files changed

+4
-12
lines changed

configs/7B_sft.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,24 +22,15 @@
2222
CHECKPOINT_EVERY = 50
2323
ckpt = dict(
2424
enable_save_ckpt=False, # enable ckpt save.
25-
enable_internevo2hf_ckpt=False, # enable ckpt save for huggingface format.
2625
save_ckpt_folder=SAVE_CKPT_FOLDER, # Path to save training ckpt.
27-
# load_ckpt_folder= dict(path=MODEL_ONLY_FOLDER, content=["model"], ckpt_type="normal"),
28-
load_ckpt_folder="local:llm_ckpts/",
29-
# 'load_ckpt_info' setting guide:
30-
# 1. the 'path' indicate ckpt path,
31-
# 2. the 'content‘ means what states will be loaded, support: "model", "sampler", "optimizer", "scheduler", "all"
32-
# 3. the ’ckpt_type‘ means the type of checkpoint to be loaded, support: "internevo", "hf", or other custom-defined
33-
# load function such as "llama"
34-
load_ckpt_info=dict(path=MODEL_ONLY_FOLDER, content=("model",), ckpt_type="internevo"),
3526
# 'auto_resume' is designed to automatically load the latest checkpoint from 'save_ckpt_folder' when encountering
3627
# training interruptions/hangs caused by hardware failures, using a scheduling system (such as k8s/slurm)
3728
# with an automatic restart mechanism upon training reboot.
3829
# Please be aware that if `auto_resume` is not set (its default value is True), it will not load the checkpoint
3930
# path specified in `load_ckpt_info` by default.
4031
# If you want to initialize your model weights from another model, you must set `auto_resume` to False.
4132
# If you want to train from scratch, please set `auto_resume` to False and 'load_ckpt_info' to None.
42-
auto_resume=True,
33+
auto_resume=False,
4334
checkpoint_every=CHECKPOINT_EVERY,
4435
async_upload=True, # async ckpt upload. (only work for boto3 ckpt)
4536
async_upload_tmp_folder="/dev/shm/internlm_tmp_ckpt/", # path for temporarily files during asynchronous upload.
@@ -144,14 +135,12 @@
144135
model = dict(
145136
checkpoint=False, # The proportion of layers for activation aheckpointing, the optional value are True/False/[0-1]
146137
num_attention_heads=NUM_ATTENTION_HEAD,
147-
embed_split_hidden=True,
148138
vocab_size=VOCAB_SIZE,
149139
embed_grad_scale=1,
150140
parallel_output=True,
151141
hidden_size=HIDDEN_SIZE,
152142
num_layers=NUM_LAYER,
153143
mlp_ratio=MLP_RATIO,
154-
apply_post_layer_norm=False,
155144
dtype="torch.bfloat16", # Support: "torch.float16", "torch.half", "torch.bfloat16", "torch.float32", "torch.tf32"
156145
norm_type="rmsnorm",
157146
layer_norm_epsilon=1e-5,

internlm/initialize/initialize_launcher.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ def dispatch_hf_config_before_launch(hf: dict) -> None:
5757
gpc.config.model.num_experts = model_config.num_experts
5858
elif hasattr(model_config, "n_routed_experts"):
5959
gpc.config.model.num_experts = model_config.n_routed_experts
60+
if hasattr(model_config, "first_k_dense_replace"):
61+
gpc.config.model.first_k_dense_replace = model_config.first_k_dense_replace
6062

6163

6264
def args_sanity_check():

train.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
internlm/launcher/launch.py

0 commit comments

Comments
 (0)