Skip to content

Commit 9dde296

Browse files
authored
Merge branch 'master' into async_tp
2 parents d5247b9 + 2e7f8e5 commit 9dde296

File tree

6 files changed

+27
-9
lines changed

6 files changed

+27
-9
lines changed

.github/workflows/nv-a6000.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ jobs:
2323
unit-tests:
2424
runs-on: [self-hosted, nvidia, a6000]
2525
container:
26-
image: nvcr.io/nvidia/pytorch:24.09-py3
26+
image: nvcr.io/nvidia/pytorch:24.12-py3
2727
ports:
2828
- 80
2929
options: --gpus all --shm-size "8G"
@@ -58,8 +58,8 @@ jobs:
5858
run: |
5959
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
6060
cd tests
61-
python -m pytest --color=yes --durations=0 --verbose -rF -m 'inference_v2' unit/ --torch_ver="2.5" --cuda_ver="12"
62-
python -m pytest --color=yes --durations=0 --verbose -rF -m 'inference_v2_ops' unit/ --torch_ver="2.5" --cuda_ver="12"
61+
python -m pytest --color=yes --durations=0 --verbose -rF -m 'inference_v2' unit/ --torch_ver="2.6" --cuda_ver="12"
62+
python -m pytest --color=yes --durations=0 --verbose -rF -m 'inference_v2_ops' unit/ --torch_ver="2.6" --cuda_ver="12"
6363
- name: MII unit tests
6464
run: |
6565
BRANCH="main"

.github/workflows/nv-flash-attn.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
unit-tests:
1919
runs-on: [self-hosted, nvidia, a6000]
2020
container:
21-
image: nvcr.io/nvidia/pytorch:24.09-py3
21+
image: nvcr.io/nvidia/pytorch:24.12-py3
2222
ports:
2323
- 80
2424
options: --gpus all --shm-size "8G"
@@ -53,7 +53,7 @@ jobs:
5353
run: |
5454
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
5555
cd tests
56-
python -m pytest --color=yes --durations=0 --verbose -rF unit/sequence_parallelism/test_ulysses.py --torch_ver="2.5" --cuda_ver="12"
56+
python -m pytest --color=yes --durations=0 --verbose -rF unit/sequence_parallelism/test_ulysses.py --torch_ver="2.6" --cuda_ver="12"
5757
- name: Open GitHub issue if nightly CI fails
5858
if: ${{ failure() && (github.event_name == 'schedule') }}
5959
uses: JasonEtco/create-an-issue@v2

.github/workflows/nv-human-eval.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
unit-tests:
1212
runs-on: [self-hosted, nvidia, a6000]
1313
container:
14-
image: nvcr.io/nvidia/pytorch:24.09-py3
14+
image: nvcr.io/nvidia/pytorch:24.12-py3
1515
ports:
1616
- 80
1717
options: --gpus all --shm-size "8G"
@@ -50,4 +50,4 @@ jobs:
5050
run: |
5151
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
5252
cd tests
53-
python -m pytest --color=yes --durations=0 --verbose -rF -m 'evaluation' -k "test_human_eval" unit/ --torch_ver="2.5" --cuda_ver="12"
53+
python -m pytest --color=yes --durations=0 --verbose -rF -m 'evaluation' -k "test_human_eval" unit/ --torch_ver="2.6" --cuda_ver="12"

deepspeed/runtime/zero/stage3.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,7 @@ def destroy(self):
448448
for hook in self._leaf_module_hooks:
449449
hook.remove()
450450
print_rank_0("Removed grad acc hooks", force=False)
451-
del self.__ipg_bucket_flat_buffer
451+
self._release_ipg_buffers()
452452

453453
def initialize_ds_offload(
454454
self,
@@ -967,7 +967,7 @@ def _create_fp16_sub_groups(self, params_group):
967967

968968
def _release_ipg_buffers(self):
969969
if self.contiguous_gradients:
970-
self.ipg_buffer = None
970+
self.__ipg_bucket_flat_buffer = None
971971

972972
def _optimizer_step(self, sub_group_id):
973973
param_group_id = self.sub_group_to_group_id[sub_group_id]

tests/unit/runtime/half_precision/test_fp16.py

+12
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,8 @@ def test(self, zero_stage, use_cpu_offload):
357357
model.backward(loss)
358358
model.step()
359359

360+
model.destroy()
361+
360362

361363
@pytest.mark.parametrize("zero_stage", [1, 2, 3])
362364
@pytest.mark.parametrize("use_cpu_offload", [True, False])
@@ -402,6 +404,8 @@ def test(self, zero_stage, use_cpu_offload, hidden_dim=4):
402404
model.backward(loss)
403405
model.step()
404406

407+
model.destroy()
408+
405409

406410
@pytest.mark.parametrize("zero_stage", [1, 2, 3])
407411
@pytest.mark.parametrize("use_cpu_offload", [True, False])
@@ -436,6 +440,7 @@ def test(self, zero_stage, use_cpu_offload):
436440
model=model,
437441
optimizer=optimizer,
438442
model_parameters=model.parameters())
443+
model.destroy()
439444

440445

441446
@pytest.mark.parametrize("zero_stage", [1, 2, 3])
@@ -486,6 +491,8 @@ def test(self, zero_stage, use_cpu_offload):
486491
model.backward(loss)
487492
model.step()
488493

494+
model.destroy()
495+
489496

490497
@amp_available
491498
class TestAmp(DistributedTest):
@@ -615,6 +622,7 @@ def test(self, zero_stage, optimizer_constructor):
615622
model = SimpleModel(hidden_dim)
616623
client_optimizer = optimizer_constructor(params=model.parameters())
617624
model, _, _, _ = deepspeed.initialize(config=config_dict, model=model, optimizer=client_optimizer)
625+
model.destroy()
618626

619627

620628
class TestZero2ReduceScatterOff(DistributedTest):
@@ -727,6 +735,8 @@ def test(self):
727735
model.backward(loss)
728736
model.step()
729737

738+
model.destroy()
739+
730740

731741
@pytest.mark.parametrize('stage', [1, 2, 3])
732742
class TestZeroEmptyGrad(DistributedTest):
@@ -755,3 +765,5 @@ def test(self, stage):
755765
loss = model(batch[0], batch[1])
756766
model.backward(loss)
757767
model.step()
768+
769+
model.destroy()

tests/unit/runtime/test_multiple_models.py

+6
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ def train_shared_loss(num_models, config_dict, dtype):
4242
for m in models:
4343
m.optimizer.zero_grad()
4444

45+
for m in models:
46+
m.destroy()
47+
4548

4649
def train_independent_loss(num_models, config_dict, dtype):
4750
hidden_dim = 64
@@ -59,6 +62,9 @@ def train_independent_loss(num_models, config_dict, dtype):
5962
m.backward(loss)
6063
m.step()
6164

65+
for m in models:
66+
m.destroy()
67+
6268

6369
@pytest.mark.parametrize('num_models', [1, 2, 3])
6470
class TestMultipleModels(DistributedTest):

0 commit comments

Comments
 (0)