Skip to content

Commit 44e09d5

Browse files
committed
add weight standardization behind feature flag, which may potentially work well with group norm
1 parent 3480666 commit 44e09d5

File tree

3 files changed

+41
-6
lines changed

3 files changed

+41
-6
lines changed

README.md

+10
Original file line numberDiff line numberDiff line change
@@ -1264,4 +1264,14 @@ For detailed information on training the diffusion prior, please refer to the [d
12641264
}
12651265
```
12661266

1267+
```bibtex
1268+
@article{Qiao2019WeightS,
1269+
title = {Weight Standardization},
1270+
author = {Siyuan Qiao and Huiyu Wang and Chenxi Liu and Wei Shen and Alan Loddon Yuille},
1271+
journal = {ArXiv},
1272+
year = {2019},
1273+
volume = {abs/1903.10520}
1274+
}
1275+
```
1276+
12671277
*Creating noise from data is easy; creating data from noise is generative modeling.* - <a href="https://arxiv.org/abs/2011.13456">Yang Song's paper</a>

dalle2_pytorch/dalle2_pytorch.py

+30-5
Original file line numberDiff line numberDiff line change
@@ -1451,6 +1451,26 @@ def Downsample(dim, *, dim_out = None):
14511451
dim_out = default(dim_out, dim)
14521452
return nn.Conv2d(dim, dim_out, 4, 2, 1)
14531453

1454+
class WeightStandardizedConv2d(nn.Conv2d):
1455+
"""
1456+
https://arxiv.org/abs/1903.10520
1457+
weight standardization purportedly works synergistically with group normalization
1458+
"""
1459+
def forward(self, x):
1460+
eps = 1e-5 if x.dtype == torch.float32 else 1e-3
1461+
1462+
weight = self.weight
1463+
flattened_weights = rearrange(weight, 'o ... -> o (...)')
1464+
1465+
mean = reduce(weight, 'o ... -> o 1 1 1', 'mean')
1466+
1467+
var = torch.var(flattened_weights, dim = -1, unbiased = False)
1468+
var = rearrange(var, 'o -> o 1 1 1')
1469+
1470+
weight = (weight - mean) * (var + eps).rsqrt()
1471+
1472+
return F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
1473+
14541474
class SinusoidalPosEmb(nn.Module):
14551475
def __init__(self, dim):
14561476
super().__init__()
@@ -1469,10 +1489,13 @@ def __init__(
14691489
self,
14701490
dim,
14711491
dim_out,
1472-
groups = 8
1492+
groups = 8,
1493+
weight_standardization = False
14731494
):
14741495
super().__init__()
1475-
self.project = nn.Conv2d(dim, dim_out, 3, padding = 1)
1496+
conv_klass = nn.Conv2d if not weight_standardization else WeightStandardizedConv2d
1497+
1498+
self.project = conv_klass(dim, dim_out, 3, padding = 1)
14761499
self.norm = nn.GroupNorm(groups, dim_out)
14771500
self.act = nn.SiLU()
14781501

@@ -1496,6 +1519,7 @@ def __init__(
14961519
cond_dim = None,
14971520
time_cond_dim = None,
14981521
groups = 8,
1522+
weight_standardization = False,
14991523
cosine_sim_cross_attn = False
15001524
):
15011525
super().__init__()
@@ -1521,8 +1545,8 @@ def __init__(
15211545
)
15221546
)
15231547

1524-
self.block1 = Block(dim, dim_out, groups = groups)
1525-
self.block2 = Block(dim_out, dim_out, groups = groups)
1548+
self.block1 = Block(dim, dim_out, groups = groups, weight_standardization = weight_standardization)
1549+
self.block2 = Block(dim_out, dim_out, groups = groups, weight_standardization = weight_standardization)
15261550
self.res_conv = nn.Conv2d(dim, dim_out, 1) if dim != dim_out else nn.Identity()
15271551

15281552
def forward(self, x, time_emb = None, cond = None):
@@ -1747,6 +1771,7 @@ def __init__(
17471771
init_dim = None,
17481772
init_conv_kernel_size = 7,
17491773
resnet_groups = 8,
1774+
resnet_weight_standardization = False,
17501775
num_resnet_blocks = 2,
17511776
init_cross_embed = True,
17521777
init_cross_embed_kernel_sizes = (3, 7, 15),
@@ -1894,7 +1919,7 @@ def __init__(
18941919

18951920
# prepare resnet klass
18961921

1897-
resnet_block = partial(ResnetBlock, cosine_sim_cross_attn = cosine_sim_cross_attn)
1922+
resnet_block = partial(ResnetBlock, cosine_sim_cross_attn = cosine_sim_cross_attn, weight_standardization = resnet_weight_standardization)
18981923

18991924
# give memory efficient unet an initial resnet block
19001925

dalle2_pytorch/version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '1.6.5'
1+
__version__ = '1.7.0'

0 commit comments

Comments
 (0)