Skip to content

Commit 20cbcfb

Browse files
feat(cpu_offload.py): support selective layers' activation cpu offload async (#391)
1 parent b0928c7 commit 20cbcfb

File tree

7 files changed

+579
-20
lines changed

7 files changed

+579
-20
lines changed

configs/7B_isp_sft.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,10 +151,18 @@
151151
cur_iter=-1,
152152
)
153153

154+
# cpu_offloading = dict(
155+
# enable=True,
156+
# num_layers=3,
157+
# )
158+
# selective_checkpoint = True
159+
# selective_checkpoint_offload = False
160+
154161
use_fp32_norm = False
155162
model = dict(
156163
checkpoint=False, # The proportion of layers for activation aheckpointing, the optional value are True/False/[0-1]
157164
num_attention_heads=NUM_ATTENTION_HEAD,
165+
num_kv_attention_heads=NUM_KV_ATTENTION_HEAD,
158166
embed_split_hidden=True,
159167
vocab_size=VOCAB_SIZE,
160168
embed_grad_scale=1,

0 commit comments

Comments
 (0)