Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion llm_studio/src/utils/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,6 @@ def get_ds_config(cfg: DefaultConfigProblemBase):
},
"bf16": {
"enabled": True if cfg.architecture.backbone_dtype == "bfloat16" else False,
"loss_scale_window": 100,
},
# https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
"zero_force_ds_cpu_optimizer": False,
Expand Down
6 changes: 5 additions & 1 deletion llm_studio/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,12 @@ def run_train(
model.require_backward_grad_sync = itr % cfg.training.grad_accumulation == 0

# Forward pass
# When using DeepSpeed, mixed precision is handled by the engine via
# its bf16/fp16 config, so a nested torch.autocast must not be active
# (newer DeepSpeed asserts against it).
with autocast(
enabled=cfg.environment.mixed_precision,
enabled=cfg.environment.mixed_precision
and not cfg.environment.use_deepspeed,
dtype=get_torch_dtype(cfg.environment.mixed_precision_dtype),
):
output_dict = model.forward(batch)
Expand Down
Loading