We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 997eae8 commit 65635ebCopy full SHA for 65635eb
src/accelerate/accelerator.py
@@ -1636,8 +1636,11 @@ def _get_tensor_address(p):
1636
return args
1637
1638
def _prepare_cp(self, *args):
1639
- if self.parallelism_config.sp_backend == "deepspeed":
1640
- # deepspeed handles cp in a different way, configured in _prepare_deepspeed
+ # Skip CP setup if SP (Sequence Parallelism) is actually enabled (sp_size > 1)
+ # CP and SP are mutually exclusive - they're different approaches for handling long sequences:
1641
+ # - CP uses Ring Attention (FSDP2-based)
1642
+ # - SP uses ALST/Ulysses (DeepSpeed-based)
1643
+ if self.parallelism_config.sp_enabled:
1644
1645
1646
from torch.distributed.tensor.experimental import context_parallel
0 commit comments