File tree Expand file tree Collapse file tree 3 files changed +4
-3
lines changed
slime/backends/megatron_utils Expand file tree Collapse file tree 3 files changed +4
-3
lines changed Original file line number Diff line number Diff line change @@ -34,7 +34,7 @@ RUN git clone https://github.com/Dao-AILab/flash-attention.git && \
3434
3535RUN pip install git+https://github.com/ISEEKYAN/mbridge.git@89eb10887887bc74853f89a4de258c0702932a1c --no-deps
3636
37- RUN pip install flash-linear-attention==0.4.0
37+ RUN pip install flash-linear-attention==0.4.1
3838RUN pip install tilelang -f https://tile-ai.github.io/whl/nightly/cu128/
3939
4040# TE does not have wheel on cuda 13 yet, thus need to install from source
Original file line number Diff line number Diff line change 1- nightly-dev-20260118a
1+ nightly-dev-20260119a
Original file line number Diff line number Diff line change @@ -14,7 +14,8 @@ def set_default_megatron_args(args):
1414 # TODO: maybe change this after megatron has good fp8 support
1515 args .bf16 = not args .fp16
1616 # placeholders
17- args .seq_length = 4096
17+ if args .seq_length is None :
18+ args .seq_length = 4096
1819 args .max_position_embeddings = args .seq_length
1920 # TODO: revisit this when megatron(dev) have solved the optimizer-cpu-offload ckpt saving bug
2021 args .dist_ckpt_save_pre_mcore_014 = True
You can’t perform that action at this time.
0 commit comments