Skip to content

Commit 0919cb2

Browse files
authored
Merge branch 'master' into master
2 parents cf18d95 + 2e7f8e5 commit 0919cb2

File tree

3 files changed

+20
-2
lines changed

3 files changed

+20
-2
lines changed

deepspeed/runtime/zero/stage3.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,7 @@ def destroy(self):
448448
for hook in self._leaf_module_hooks:
449449
hook.remove()
450450
print_rank_0("Removed grad acc hooks", force=False)
451-
del self.__ipg_bucket_flat_buffer
451+
self._release_ipg_buffers()
452452

453453
def initialize_ds_offload(
454454
self,
@@ -967,7 +967,7 @@ def _create_fp16_sub_groups(self, params_group):
967967

968968
def _release_ipg_buffers(self):
969969
if self.contiguous_gradients:
970-
self.ipg_buffer = None
970+
self.__ipg_bucket_flat_buffer = None
971971

972972
def _optimizer_step(self, sub_group_id):
973973
param_group_id = self.sub_group_to_group_id[sub_group_id]

tests/unit/runtime/half_precision/test_fp16.py

+12
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,8 @@ def test(self, zero_stage, use_cpu_offload):
357357
model.backward(loss)
358358
model.step()
359359

360+
model.destroy()
361+
360362

361363
@pytest.mark.parametrize("zero_stage", [1, 2, 3])
362364
@pytest.mark.parametrize("use_cpu_offload", [True, False])
@@ -402,6 +404,8 @@ def test(self, zero_stage, use_cpu_offload, hidden_dim=4):
402404
model.backward(loss)
403405
model.step()
404406

407+
model.destroy()
408+
405409

406410
@pytest.mark.parametrize("zero_stage", [1, 2, 3])
407411
@pytest.mark.parametrize("use_cpu_offload", [True, False])
@@ -436,6 +440,7 @@ def test(self, zero_stage, use_cpu_offload):
436440
model=model,
437441
optimizer=optimizer,
438442
model_parameters=model.parameters())
443+
model.destroy()
439444

440445

441446
@pytest.mark.parametrize("zero_stage", [1, 2, 3])
@@ -486,6 +491,8 @@ def test(self, zero_stage, use_cpu_offload):
486491
model.backward(loss)
487492
model.step()
488493

494+
model.destroy()
495+
489496

490497
@amp_available
491498
class TestAmp(DistributedTest):
@@ -615,6 +622,7 @@ def test(self, zero_stage, optimizer_constructor):
615622
model = SimpleModel(hidden_dim)
616623
client_optimizer = optimizer_constructor(params=model.parameters())
617624
model, _, _, _ = deepspeed.initialize(config=config_dict, model=model, optimizer=client_optimizer)
625+
model.destroy()
618626

619627

620628
class TestZero2ReduceScatterOff(DistributedTest):
@@ -727,6 +735,8 @@ def test(self):
727735
model.backward(loss)
728736
model.step()
729737

738+
model.destroy()
739+
730740

731741
@pytest.mark.parametrize('stage', [1, 2, 3])
732742
class TestZeroEmptyGrad(DistributedTest):
@@ -755,3 +765,5 @@ def test(self, stage):
755765
loss = model(batch[0], batch[1])
756766
model.backward(loss)
757767
model.step()
768+
769+
model.destroy()

tests/unit/runtime/test_multiple_models.py

+6
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ def train_shared_loss(num_models, config_dict, dtype):
4242
for m in models:
4343
m.optimizer.zero_grad()
4444

45+
for m in models:
46+
m.destroy()
47+
4548

4649
def train_independent_loss(num_models, config_dict, dtype):
4750
hidden_dim = 64
@@ -59,6 +62,9 @@ def train_independent_loss(num_models, config_dict, dtype):
5962
m.backward(loss)
6063
m.step()
6164

65+
for m in models:
66+
m.destroy()
67+
6268

6369
@pytest.mark.parametrize('num_models', [1, 2, 3])
6470
class TestMultipleModels(DistributedTest):

0 commit comments

Comments
 (0)