Skip to content

Commit a9c336d

Browse files
committed
Backend paddle: add mfnn net; add regularizer; add optimizers;
1 parent 8275aeb commit a9c336d

File tree

13 files changed

+201
-18
lines changed

13 files changed

+201
-18
lines changed

deepxde/backend/backend.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -502,3 +502,12 @@ def sparse_dense_matmul(x, y):
502502
Returns:
503503
Tensor: The multiplication result.
504504
"""
505+
506+
def l1_decay(x):
507+
"""Implement the L1 weight decay regularization."""
508+
509+
def l2_decay(x):
510+
"""Implement the L2 weight decay regularization."""
511+
512+
def l1_l2_decay(x,y):
513+
"""Implement the L1 and L2 weight decay regularization."""

deepxde/backend/paddle/tensor.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,3 +229,9 @@ def matmul(x, y):
229229

230230
def sparse_dense_matmul(x, y):
231231
return paddle.sparse.matmul(x, y)
232+
233+
def l1_decay(x):
234+
return paddle.regularizer.L1Decay(coeff=x)
235+
236+
def l2_decay(x):
237+
return paddle.regularizer.L2Decay(coeff=x)

deepxde/backend/tensorflow_compat_v1/tensor.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,3 +245,12 @@ def matmul(x, y):
245245

246246
def sparse_dense_matmul(x, y):
247247
return tf.sparse.sparse_dense_matmul(x, y)
248+
249+
def l1_decay(x):
250+
return tf.keras.regularizers.L1(l1=x)
251+
252+
def l2_decay(x):
253+
return tf.keras.regularizers.L2(l2=x)
254+
255+
def l1_l2_decay(x,y):
256+
return tf.keras.regularizers.L1L2(l1=x, l2=y)

deepxde/data/mf.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import numpy as np
22

33
from .data import Data
4-
from ..backend import tf
4+
from .. import backend as bkd
5+
from .. import config
56
from ..utils import run_if_any_none, standardize
67

78

@@ -116,7 +117,7 @@ def losses_train(self, targets, outputs, loss_fn, inputs, model, aux=None):
116117
return [loss_lo, loss_hi]
117118

118119
def losses_test(self, targets, outputs, loss_fn, inputs, model, aux=None):
119-
return [0, loss_fn(targets[1], outputs[1])]
120+
return [bkd.as_tensor(0, dtype=config.real(bkd.lib)), loss_fn(targets[1], outputs[1])]
120121

121122
@run_if_any_none("X_train", "y_train")
122123
def train_next_batch(self, batch_size=None):

deepxde/model.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -506,8 +506,13 @@ def outputs_losses_test(inputs, targets, auxiliary_vars):
506506
trainable_variables = (
507507
list(self.net.parameters()) + self.external_trainable_variables
508508
)
509+
regularizer = getattr(self.net, 'regularizer', None)
510+
if regularizer is not None:
511+
weight_decay = self.net.regularizer_value if self.opt_name == "adamw" else self.net.regularizer
512+
else:
513+
weight_decay = None
509514
self.opt = optimizers.get(
510-
trainable_variables, self.opt_name, learning_rate=lr, decay=decay
515+
trainable_variables, self.opt_name, learning_rate=lr, decay=decay, weight_decay=weight_decay,
511516
)
512517

513518
def train_step(inputs, targets, auxiliary_vars):

deepxde/nn/paddle/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"DeepONet",
55
"DeepONetCartesianProd",
66
"FNN",
7+
"MfNN",
78
"MsFFN",
89
"PFNN",
910
"STMsFFN",
@@ -12,3 +13,4 @@
1213
from .deeponet import DeepONet, DeepONetCartesianProd
1314
from .fnn import FNN, PFNN
1415
from .msffn import MsFFN, STMsFFN
16+
from .mfnn import MfNN

deepxde/nn/paddle/fnn.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,20 @@
33
from .nn import NN
44
from .. import activations
55
from .. import initializers
6+
from .. import regularizers
67

78

89
class FNN(NN):
910
"""Fully-connected neural network."""
1011

11-
def __init__(self, layer_sizes, activation, kernel_initializer):
12+
def __init__(
13+
self,
14+
layer_sizes,
15+
activation,
16+
kernel_initializer,
17+
regularization=None,
18+
dropout_rate=0.0,
19+
):
1220
super().__init__()
1321
if isinstance(activation, list):
1422
if not (len(layer_sizes) - 1) == len(activation):
@@ -20,12 +28,16 @@ def __init__(self, layer_sizes, activation, kernel_initializer):
2028
self.activation = activations.get(activation)
2129
initializer = initializers.get(kernel_initializer)
2230
initializer_zero = initializers.get("zeros")
31+
self.regularizer = regularizers.get(regularization)
32+
self.regularizer_value = regularization[1:] if regularization is not None else None
33+
self.dropout_rate = dropout_rate
2334

2435
self.linears = paddle.nn.LayerList()
2536
for i in range(1, len(layer_sizes)):
2637
self.linears.append(paddle.nn.Linear(layer_sizes[i - 1], layer_sizes[i]))
2738
initializer(self.linears[-1].weight)
2839
initializer_zero(self.linears[-1].bias)
40+
self.dropout = paddle.nn.Dropout(p=dropout_rate) if dropout_rate > 0.0 else None
2941

3042
def forward(self, inputs):
3143
x = inputs
@@ -37,6 +49,8 @@ def forward(self, inputs):
3749
if isinstance(self.activation, list)
3850
else self.activation(linear(x))
3951
)
52+
if self.dropout is not None:
53+
x = self.dropout(x)
4054
x = self.linears[-1](x)
4155
if self._output_transform is not None:
4256
x = self._output_transform(inputs, x)

deepxde/nn/paddle/mfnn.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
import paddle
2+
3+
from .nn import NN
4+
from .. import activations
5+
from .. import initializers
6+
from .. import regularizers
7+
from ... import config
8+
9+
10+
class MfNN(NN):
11+
"""Multifidelity neural networks."""
12+
13+
def __init__(
14+
self,
15+
layer_sizes_low_fidelity,
16+
layer_sizes_high_fidelity,
17+
activation,
18+
kernel_initializer,
19+
regularization=None,
20+
residue=False,
21+
trainable_low_fidelity=True,
22+
trainable_high_fidelity=True,
23+
):
24+
super().__init__()
25+
self.layer_size_lo = layer_sizes_low_fidelity
26+
self.layer_size_hi = layer_sizes_high_fidelity
27+
28+
self.activation = activations.get(activation)
29+
self.activation_tanh = activations.get("tanh")
30+
self.initializer = initializers.get(kernel_initializer)
31+
self.initializer_zero = initializers.get("zeros")
32+
self.trainable_lo = trainable_low_fidelity
33+
self.trainable_hi = trainable_high_fidelity
34+
self.residue = residue
35+
self.regularizer = regularizers.get(regularization)
36+
self.regularizer_value = regularization[1:] if regularization is not None else None
37+
38+
# low fidelity
39+
self.linears_lo = self.init_dense(self.layer_size_lo, self.trainable_lo)
40+
41+
# high fidelity
42+
# linear part
43+
self.linears_hi_l = paddle.nn.Linear(
44+
in_features=self.layer_size_lo[0] + self.layer_size_lo[-1],
45+
out_features=self.layer_size_hi[-1],
46+
weight_attr=paddle.ParamAttr(initializer=self.initializer),
47+
bias_attr=paddle.ParamAttr(initializer=self.initializer_zero),
48+
)
49+
if not self.trainable_hi:
50+
for param in self.linears_hi_l.parameters():
51+
param.stop_gradient = False
52+
# nonlinear part
53+
self.layer_size_hi = [self.layer_size_lo[0] + self.layer_size_lo[-1]] + self.layer_size_hi
54+
self.linears_hi = self.init_dense(self.layer_size_hi, self.trainable_hi)
55+
# linear + nonlinear
56+
if not self.residue:
57+
alpha = self.init_alpha(0.0, self.trainable_hi)
58+
self.add_parameter("alpha",alpha)
59+
else:
60+
alpha1 = self.init_alpha(0.0, self.trainable_hi)
61+
alpha2 = self.init_alpha(0.0, self.trainable_hi)
62+
self.add_parameter("alpha1",alpha1)
63+
self.add_parameter("alpha2",alpha2)
64+
65+
def init_dense(self, layer_size, trainable):
66+
linears = paddle.nn.LayerList()
67+
for i in range(len(layer_size) - 1):
68+
linear = paddle.nn.Linear(
69+
in_features=layer_size[i],
70+
out_features=layer_size[i + 1],
71+
weight_attr=paddle.ParamAttr(initializer=self.initializer),
72+
bias_attr=paddle.ParamAttr(initializer=self.initializer_zero),
73+
)
74+
if not trainable:
75+
for param in linear.parameters():
76+
param.stop_gradient = False
77+
linears.append(linear)
78+
return linears
79+
80+
def init_alpha(self, value, trainable):
81+
alpha = paddle.create_parameter(
82+
shape=[1],
83+
dtype=config.real(paddle),
84+
default_initializer=paddle.nn.initializer.Constant(value),
85+
)
86+
alpha.stop_gradient=not trainable
87+
return alpha
88+
89+
def forward(self, inputs):
90+
x = inputs.astype(config.real(paddle))
91+
# low fidelity
92+
y = x
93+
for i, linear in enumerate(self.linears_lo):
94+
y = linear(y)
95+
if i != len(self.linears_lo) - 1:
96+
y = self.activation(y)
97+
y_lo = y
98+
99+
# high fidelity
100+
x_hi = paddle.concat([x, y_lo], axis=1)
101+
# linear
102+
y_hi_l = self.linears_hi_l(x_hi)
103+
# nonlinear
104+
y = x_hi
105+
for i, linear in enumerate(self.linears_hi):
106+
y = linear(y)
107+
if i != len(self.linears_hi) - 1:
108+
y = self.activation(y)
109+
y_hi_nl = y
110+
# linear + nonlinear
111+
if not self.residue:
112+
alpha = self.activation_tanh(self.alpha)
113+
y_hi = y_hi_l + alpha * y_hi_nl
114+
else:
115+
alpha1 = self.activation_tanh(self.alpha1)
116+
alpha2 = self.activation_tanh(self.alpha2)
117+
y_hi = y_lo + 0.1 * (alpha1 * y_hi_l + alpha2 * y_hi_nl)
118+
119+
return y_lo, y_hi

deepxde/nn/regularizers.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
from ..backend import tf
1+
from .. import backend as bkd
2+
from ..backend import backend_name
23

34

45
def get(identifier):
@@ -22,12 +23,15 @@ def get(identifier):
2223
if not factor:
2324
raise ValueError("Regularization factor must be provided.")
2425

25-
if name == "l1":
26-
return tf.keras.regularizers.L1(l1=factor[0])
27-
if name == "l2":
28-
return tf.keras.regularizers.L2(l2=factor[0])
29-
if name in ("l1l2", "l1+l2"):
30-
if len(factor) < 2:
31-
raise ValueError("L1L2 regularizer requires both L1/L2 penalties.")
32-
return tf.keras.regularizers.L1L2(l1=factor[0], l2=factor[1])
26+
try:
27+
if name == "l1":
28+
return bkd.l1_decay(factor[0])
29+
if name == "l2":
30+
return bkd.l2_decay(factor[0])
31+
if name in ("l1l2", "l1+l2"):
32+
# TODO: only supported by 'tensorflow.compat.v1' now.
33+
if len(factor) < 2:
34+
return bkd.l1_l2_decay(factor[0], factor[1])
35+
except Exception:
36+
print(f"{name} regularization to be implemented for backend {backend_name} now.")
3337
raise ValueError(f"Unknown regularizer name: {name}")

deepxde/optimizers/paddle/optimizers.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,14 @@ def is_external_optimizer(optimizer):
1919
return optimizer in ["L-BFGS", "L-BFGS-B"]
2020

2121

22-
def get(params, optimizer, learning_rate=None, decay=None):
22+
def get(params, optimizer, learning_rate=None, decay=None, weight_decay=None):
2323
"""Retrieves an Optimizer instance."""
2424
if isinstance(optimizer, paddle.optimizer.Optimizer):
2525
return optimizer
2626

2727
if optimizer in ["L-BFGS", "L-BFGS-B"]:
28+
if weight_decay is not None:
29+
raise ValueError("L-BFGS optimizer doesn't support weight_decay")
2830
if learning_rate is not None or decay is not None:
2931
print("Warning: learning rate is ignored for {}".format(optimizer))
3032
optim = paddle.optimizer.LBFGS(
@@ -46,5 +48,17 @@ def get(params, optimizer, learning_rate=None, decay=None):
4648
learning_rate = _get_lr_scheduler(learning_rate, decay)
4749

4850
if optimizer == "adam":
49-
return paddle.optimizer.Adam(learning_rate=learning_rate, parameters=params)
51+
return paddle.optimizer.Adam(learning_rate=learning_rate, parameters=params, weight_decay=weight_decay)
52+
elif optimizer == "sgd":
53+
return paddle.optimizer.SGD(learning_rate=learning_rate, parameters=params, weight_decay=weight_decay)
54+
elif optimizer == "rmsprop":
55+
return paddle.optimizer.RMSProp(
56+
learning_rate=learning_rate, parameters=params, weight_decay=weight_decay,
57+
)
58+
elif optimizer == "adamw":
59+
if weight_decay[0] == 0:
60+
raise ValueError("AdamW optimizer requires non-zero weight decay")
61+
return paddle.optimizer.AdamW(
62+
learning_rate=learning_rate, parameters=params, weight_decay=weight_decay[0],
63+
)
5064
raise NotImplementedError(f"{optimizer} to be implemented for backend Paddle.")

0 commit comments

Comments
 (0)