Skip to content

Commit 897e24e

Browse files
authored
Merge pull request #26 from emiliocoutinho/tfp_lbfgs
ADAM training on batches, fixed residual implementation for SA-PINN, added testing suite for distributed, non-distributed, adaptive, and baseline PINN examples
2 parents 0e20fae + 7cde784 commit 897e24e

12 files changed

+1483
-36
lines changed

tensordiffeq/fit.py

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,33 +14,21 @@
1414
os.environ["TF_GPU_THREAD_MODE"] = "gpu_private"
1515

1616

17-
def fit(obj, tf_iter=0, newton_iter=0, batch_sz=None, newton_eager=True):
18-
# obj.u_model = neural_net(obj.layer_sizes)
19-
# obj.build_loss()
20-
# Can adjust batch size for collocation points, here we set it to N_f
21-
if batch_sz is not None:
22-
obj.batch_sz = batch_sz
23-
else:
24-
obj.batch_sz = obj.X_f_len
25-
# obj.batch_sz = len(obj.x_f)
26-
27-
N_f = obj.X_f_len
28-
# N_f = len(obj.x_f)
29-
n_batches = int(N_f // obj.batch_sz)
17+
def fit(obj, tf_iter=0, newton_iter=0, newton_eager=True):
18+
3019
start_time = time.time()
31-
# obj.tf_optimizer = tf.keras.optimizers.Adam(lr=0.005, beta_1=.99)
32-
# obj.tf_optimizer_weights = tf.keras.optimizers.Adam(lr=0.005, beta_1=.99)
3320

3421
# these cant be tf.functions on initialization since the distributed strategy requires its own
3522
# graph using grad and adaptgrad, so they cant be compiled as tf.functions until we know dist/non-dist
3623
obj.grad = tf.function(obj.grad)
3724
if obj.verbose: print_screen(obj)
25+
3826
print("Starting Adam training")
3927
# tf.profiler.experimental.start('../cache/tblogdir1')
4028
train_op_fn = train_op_inner(obj)
4129
with trange(tf_iter) as t:
4230
for epoch in t:
43-
loss_value = train_op_fn(n_batches, obj)
31+
loss_value = train_op_fn(obj)
4432
# Description will be displayed on the left
4533
t.set_description('Adam epoch %i' % (epoch + 1))
4634
# Postfix will be displayed on the right,
@@ -53,6 +41,7 @@ def fit(obj, tf_iter=0, newton_iter=0, batch_sz=None, newton_eager=True):
5341

5442
# tf.profiler.experimental.start('../cache/tblogdir1')
5543
if newton_iter > 0:
44+
obj.n_batches = 1
5645
print("Starting L-BFGS training")
5746
if newton_eager:
5847
print("Executing eager-mode L-BFGS")
@@ -92,10 +81,14 @@ def lbfgs_op(func, init_params, newton_iter):
9281

9382

9483
def train_op_inner(obj):
95-
@tf.function()
96-
def apply_grads(n_batches, obj=obj):
97-
for _ in range(n_batches):
84+
@tf.function
85+
def apply_grads(obj=obj):
86+
if obj.n_batches > 1:
87+
obj.batch_indx_map = np.random.choice(obj.X_f_len[0], size=obj.X_f_len[0], replace=False)
88+
89+
for i in range(obj.n_batches):
9890
# unstack = tf.unstack(obj.u_model.trainable_variables, axis = 2)
91+
obj.batch = i
9992
obj.variables = obj.u_model.trainable_variables
10093
obj.variables = obj.u_model.trainable_variables
10194
if obj.isAdaptive:
@@ -112,7 +105,10 @@ def apply_grads(n_batches, obj=obj):
112105
else:
113106
loss_value, grads = obj.grad()
114107
obj.tf_optimizer.apply_gradients(zip(grads, obj.u_model.trainable_variables))
115-
return loss_value
108+
109+
obj.batch = None
110+
111+
return loss_value
116112

117113
return apply_grads
118114

tensordiffeq/models.py

Lines changed: 41 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ def compile(self, layer_sizes, f_model, domain, bcs, isAdaptive=False,
4848
# self.X_f_in = np.asarray(tmp)
4949
self.X_f_in = [tf.cast(np.reshape(vec, (-1, 1)), tf.float32) for i, vec in enumerate(self.domain.X_f.T)]
5050
self.u_model = neural_net(self.layer_sizes)
51+
self.batch = None
52+
self.batch_indx_map = None
5153
self.lambdas = self.dict_adaptive = self.lambdas_map = None
5254
self.isAdaptive = isAdaptive
5355

@@ -83,11 +85,12 @@ def update_loss(self):
8385
#####################################
8486
# Check if adaptive is allowed
8587
if self.isAdaptive:
86-
idx_lambda_bcs = self.lambdas_map['bcs'][0]
88+
if len(self.lambdas_map['bcs']) > 0:
89+
idx_lambda_bcs = self.lambdas_map['bcs'][0]
8790

8891
for counter_bc, bc in enumerate(self.bcs):
8992
loss_bc = 0.
90-
# Check if the current BS is adaptive
93+
# Check if the current BC is adaptive
9194
if self.isAdaptive:
9295
isBC_adaptive = self.dict_adaptive["BCs"][counter_bc]
9396
else:
@@ -142,7 +145,16 @@ def update_loss(self):
142145
# Residual Equations
143146
#####################################
144147
# pass thorough the forward method
145-
f_u_preds = self.f_model(self.u_model, *self.X_f_in)
148+
if self.n_batches > 1:
149+
# The collocation points will be split based on the batch_indx_map
150+
# generated on the beginning of this epoch on models.train_op_inner.apply_grads
151+
X_batch = []
152+
for x_in in self.X_f_in:
153+
indx_on_batch = self.batch_indx_map[self.batch * self.batch_sz:(self.batch + 1) * self.batch_sz]
154+
X_batch.append(tf.gather(x_in,indx_on_batch))
155+
f_u_preds = self.f_model(self.u_model, *X_batch)
156+
else:
157+
f_u_preds = self.f_model(self.u_model, *self.X_f_in)
146158

147159
# If it is only one residual, just convert it to a tuple of one element
148160
if not isinstance(f_u_preds, tuple):
@@ -153,13 +165,23 @@ def update_loss(self):
153165
# Check if the current Residual is adaptive
154166
if self.isAdaptive:
155167
isRes_adaptive = self.dict_adaptive["residual"][counter_res]
156-
idx_lambda_res = self.lambdas_map['residual'][0]
157168
if isRes_adaptive:
169+
idx_lambda_res = self.lambdas_map['residual'][0]
170+
lambdas2loss = self.lambdas[idx_lambda_res]
171+
172+
if self.n_batches > 1:
173+
# select lambdas on minebatch
174+
lambdas2loss = tf.gather(lambdas2loss,indx_on_batch)
175+
158176
if self.g is not None:
159-
loss_r = g_MSE(f_u_pred, constant(0.0), self.g(self.lambdas[idx_lambda_res]))
177+
loss_r = g_MSE(f_u_pred, constant(0.0), self.g(lambdas2loss))
160178
else:
161-
loss_r = MSE(f_u_pred, constant(0.0), self.lambdas[idx_lambda_res])
179+
loss_r = MSE(f_u_pred, constant(0.0), lambdas2loss)
162180
idx_lambda_res += 1
181+
else:
182+
# In the case where the model is Adaptive but the residual
183+
# is not adaptive, the residual loss should be computed.
184+
loss_r = MSE(f_u_pred, constant(0.0))
163185
else:
164186
loss_r = MSE(f_u_pred, constant(0.0))
165187

@@ -177,8 +199,18 @@ def grad(self):
177199
return loss_value, grads
178200

179201
def fit(self, tf_iter=0, newton_iter=0, batch_sz=None, newton_eager=True):
180-
if self.isAdaptive and (batch_sz is not None):
181-
raise Exception("Currently we dont support minibatching for adaptive PINNs")
202+
203+
# Can adjust batch size for collocation points, here we set it to N_f
204+
N_f = self.X_f_len[0]
205+
self.batch_sz = batch_sz if batch_sz is not None else N_f
206+
self.n_batches = N_f // self.batch_sz
207+
208+
if self.isAdaptive and self.dist:
209+
raise Exception("Currently we dont support distributed training for adaptive PINNs")
210+
211+
if self.n_batches > 1 and self.dist:
212+
raise Exception("Currently we dont support distributed minibatching training")
213+
182214
if self.dist:
183215
BUFFER_SIZE = len(self.X_f_in[0])
184216
EPOCHS = tf_iter
@@ -194,13 +226,6 @@ def fit(self, tf_iter=0, newton_iter=0, batch_sz=None, newton_eager=True):
194226

195227
print("Number of GPU devices: {}".format(self.strategy.num_replicas_in_sync))
196228

197-
self.batch_sz = batch_sz if batch_sz is not None else len(self.X_f_in[0])
198-
# weights_idx = tensor(list(range(len(self.x_f))), dtype=tf.int32)
199-
# print(weights_idx)
200-
# print(tf.gather(self.col_weights, weights_idx))
201-
N_f = len(self.X_f_in[0])
202-
self.n_batches = N_f // self.batch_sz
203-
204229
BATCH_SIZE_PER_REPLICA = self.batch_sz
205230
GLOBAL_BATCH_SIZE = BATCH_SIZE_PER_REPLICA * self.strategy.num_replicas_in_sync
206231

@@ -229,7 +254,7 @@ def fit(self, tf_iter=0, newton_iter=0, batch_sz=None, newton_eager=True):
229254
fit_dist(self, tf_iter=tf_iter, newton_iter=newton_iter, batch_sz=batch_sz, newton_eager=newton_eager)
230255

231256
else:
232-
fit(self, tf_iter=tf_iter, newton_iter=newton_iter, batch_sz=batch_sz, newton_eager=newton_eager)
257+
fit(self, tf_iter=tf_iter, newton_iter=newton_iter, newton_eager=newton_eager)
233258

234259
# L-BFGS implementation from https://github.com/pierremtb/PINNs-TF2.0
235260
def get_loss_and_flat_grad(self):

tensordiffeq/test/AC2test.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
import pytest
2+
from tensordiffeq.boundaries import *
3+
import scipy.io
4+
import math
5+
import tensordiffeq as tdq
6+
from tensordiffeq.models import CollocationSolverND
7+
8+
def main(args):
9+
10+
if args is None:
11+
args = {'layer_sizes': [2, 21, 21, 21, 21, 1],
12+
13+
'run_functions_eagerly': False,
14+
'epoch_adam': 20,
15+
'epoch_lbfgs': 20,
16+
'lbfgs_eager': False,
17+
'isAdaptive': True,
18+
'dist_training': False,
19+
'dict_adaptive': {"residual": [True],
20+
"BCs": [False, False]},
21+
'N_x': 100,
22+
'N_t': 50,
23+
'N_f': 5000,
24+
'batch_sz': 200,
25+
}
26+
27+
layer_sizes = args['layer_sizes']
28+
run_functions_eagerly = args['run_functions_eagerly']
29+
epoch_adam = args['epoch_adam']
30+
epoch_lbfgs = args['epoch_lbfgs']
31+
lbfgs_eager = args['lbfgs_eager']
32+
isAdaptive = args['isAdaptive']
33+
dist_training = args['dist_training']
34+
dict_adaptive = args['dict_adaptive']
35+
N_x = args['N_x']
36+
N_t = args['N_t']
37+
N_f = args['N_f']
38+
batch_sz = args['batch_sz']
39+
40+
41+
tf.config.run_functions_eagerly(run_functions_eagerly)
42+
43+
Domain = DomainND(["x", "t"], time_var='t')
44+
45+
Domain.add("x", [-1.0, 1.0], N_x)
46+
Domain.add("t", [0.0, 1.0], N_t)
47+
48+
Domain.generate_collocation_points(N_f)
49+
50+
51+
def func_ic(x):
52+
return x ** 2 * np.cos(math.pi * x)
53+
54+
55+
# Conditions to be considered at the boundaries for the periodic BC
56+
def deriv_model(u_model, x, t):
57+
u = u_model(tf.concat([x, t], 1))
58+
u_x = tf.gradients(u, x)[0]
59+
60+
return u, u_x
61+
62+
63+
init = IC(Domain, [func_ic], var=[['x']])
64+
x_periodic = periodicBC(Domain, ['x'], [deriv_model])
65+
66+
BCs = [init, x_periodic]
67+
68+
69+
def f_model(u_model, x, t):
70+
u = u_model(tf.concat([x, t], 1))
71+
u_x = tf.gradients(u, x)
72+
u_xx = tf.gradients(u_x, x)
73+
u_t = tf.gradients(u, t)
74+
c1 = tdq.utils.constant(.0001)
75+
c2 = tdq.utils.constant(5.0)
76+
f_u = u_t - c1 * u_xx + c2 * u * u * u - c2 * u
77+
return f_u
78+
79+
## Which loss functions will have adaptive weights
80+
# "residual" should a tuple for the case of multiple residual equation
81+
# BCs have to follow the same order as the previously defined BCs list
82+
dict_adaptive = dict_adaptive
83+
84+
## Weights initialization
85+
# dictionary with keys "residual" and "BCs". Values must be a tuple with dimension
86+
# equal to the number of residuals and boundary conditions, respectively
87+
88+
if dict_adaptive["residual"][0] == False:
89+
init_residual = None
90+
else:
91+
init_residual = tf.random.uniform([N_f, 1])
92+
93+
if dict_adaptive["BCs"][0] == False:
94+
init_IC = None
95+
else:
96+
init_IC = 100 * tf.random.uniform([N_x, 1])
97+
98+
if dict_adaptive["BCs"][1] == False:
99+
init_BC = None
100+
else:
101+
init_BC = tf.random.uniform([N_t, 1])
102+
103+
init_weights = {"residual": [init_residual],
104+
"BCs": [init_IC, init_BC]}
105+
106+
107+
108+
model = CollocationSolverND()
109+
model.compile(layer_sizes, f_model, Domain, BCs, isAdaptive=isAdaptive,
110+
dict_adaptive=dict_adaptive, init_weights=init_weights, dist=dist_training)
111+
112+
model.fit(tf_iter=epoch_adam, newton_iter=epoch_lbfgs, newton_eager=lbfgs_eager, batch_sz=batch_sz)
113+
114+
return
115+
116+
if __name__ == "__main__":
117+
main(args=None)

0 commit comments

Comments
 (0)