Skip to content

Commit 3f2cbff

Browse files
iden-kalemajfacebook-github-bot
authored andcommitted
Use SimpleDistributedPerLayerClipping optimizer in hooks mode
Summary: We use SimpleDistributedPerLayerOptimizer instead of DistributedPerLayerOptimizer. The latter causes an issue when switching to `register_full_backward_hook`. The issue arises because DistributedPerLayerOptimizer uses per-parameter hooks on top of the per-module hooks. During the backward pass, the per-parameter hooks fire before the per-module hooks. Per-sample gradients are computed when the per-module hooks fire, and an error occurs when the per-parameter hooks try to access the per-sample gradients before they are computed. Forcing the order in which hooks are called is not possible with PyTorch. Differential Revision: D72420168
1 parent 6c2cde9 commit 3f2cbff

File tree

2 files changed

+3
-12
lines changed

2 files changed

+3
-12
lines changed

opacus/optimizers/__init__.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828

2929
__all__ = [
3030
"AdaClipDPOptimizer",
31-
"DistributedPerLayerOptimizer",
3231
"DistributedDPOptimizer",
3332
"DPOptimizer",
3433
"DPOptimizerFastGradientClipping",
@@ -55,9 +54,7 @@ def get_optimizer_class(clipping: str, distributed: bool, grad_sample_mode: str
5554
elif clipping == "per_layer" and distributed is False:
5655
return DPPerLayerOptimizer
5756
elif clipping == "per_layer" and distributed is True:
58-
if grad_sample_mode == "hooks":
59-
return DistributedPerLayerOptimizer
60-
elif grad_sample_mode == "ew":
57+
if grad_sample_mode == "hooks" or grad_sample_mode == "ew":
6158
return SimpleDistributedPerLayerOptimizer
6259
else:
6360
raise ValueError(f"Unexpected grad_sample_mode: {grad_sample_mode}")

opacus/tests/multigpu_gradcheck.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,7 @@
2626
from opacus import PrivacyEngine
2727
from opacus.distributed import DifferentiallyPrivateDistributedDataParallel as DPDDP
2828
from opacus.grad_sample import GradSampleModuleFastGradientClipping
29-
from opacus.optimizers.ddp_perlayeroptimizer import (
30-
DistributedPerLayerOptimizer,
31-
SimpleDistributedPerLayerOptimizer,
32-
)
29+
from opacus.optimizers.ddp_perlayeroptimizer import SimpleDistributedPerLayerOptimizer
3330
from opacus.optimizers.ddpoptimizer import DistributedDPOptimizer
3431
from opacus.optimizers.ddpoptimizer_fast_gradient_clipping import (
3532
DistributedDPOptimizerFastGradientClipping,
@@ -165,10 +162,7 @@ def demo_basic(rank, weight, world_size, dp, clipping, grad_sample_mode):
165162
grad_sample_mode=grad_sample_mode,
166163
)
167164
if clipping == "per_layer":
168-
assert isinstance(
169-
optimizer,
170-
(DistributedPerLayerOptimizer, SimpleDistributedPerLayerOptimizer),
171-
)
165+
assert isinstance(optimizer, SimpleDistributedPerLayerOptimizer)
172166
else:
173167
assert isinstance(optimizer, DistributedDPOptimizer)
174168

0 commit comments

Comments
 (0)