Backend Paddle: Add LBFGS optimizer (#1095)

xiaoguoguo626807 · web-flow · commit 8bc914143e81 · 2023-01-06T12:56:17.000-05:00
diff --git a/deepxde/model.py b/deepxde/model.py
@@ -472,11 +472,24 @@ def train_step(inputs, targets, auxiliary_vars):
             if self.lr_scheduler is not None:
                 self.lr_scheduler.step()
 
+        def train_step_lbfgs(inputs, targets, auxiliary_vars):
+            def closure():
+                losses = outputs_losses_train(inputs, targets, auxiliary_vars)[1]
+                total_loss = paddle.sum(losses)
+                self.opt.clear_grad()
+                total_loss.backward()
+                return total_loss
+            
+            self.opt.step(closure)
         # Callables
         self.outputs = outputs
         self.outputs_losses_train = outputs_losses_train
         self.outputs_losses_test = outputs_losses_test
-        self.train_step = train_step
+        self.train_step = (
+            train_step
+            if not optimizers.is_external_optimizer(self.opt_name)
+            else train_step_lbfgs
+        )
 
     def _outputs(self, training, inputs):
         if backend_name == "tensorflow.compat.v1":
@@ -599,7 +612,7 @@ def train(
             elif backend_name == "pytorch":
                 self._train_pytorch_lbfgs()
             elif backend_name == "paddle":
-                raise NotImplementedError("L-BFGS will be implemented soon in PaddlePaddle")
+                self._train_paddle_lbfgs()
         else:
             if iterations is None:
                 raise ValueError("No iterations for {}.".format(self.opt_name))
@@ -740,6 +753,38 @@ def _train_pytorch_lbfgs(self):
             if self.stop_training:
                 break
 
+    def _train_paddle_lbfgs(self):
+        prev_n_iter = 0
+        
+        while prev_n_iter < optimizers.LBFGS_options["maxiter"]:
+            self.callbacks.on_epoch_begin()
+            self.callbacks.on_batch_begin()
+
+            self.train_state.set_data_train(
+                *self.data.train_next_batch(self.batch_size)
+            )
+            self._train_step(
+                self.train_state.X_train,
+                self.train_state.y_train,
+                self.train_state.train_aux_vars,
+            )
+
+            n_iter = self.opt.state_dict()["state"]["n_iter"]
+            if prev_n_iter == n_iter:
+                # Converged
+                break
+
+            self.train_state.epoch += n_iter - prev_n_iter
+            self.train_state.step += n_iter - prev_n_iter
+            prev_n_iter = n_iter
+            self._test()
+
+            self.callbacks.on_batch_end()
+            self.callbacks.on_epoch_end()
+
+            if self.stop_training:
+                break
+
     def _test(self):
         (
             self.train_state.y_pred_train,
diff --git a/deepxde/optimizers/config.py b/deepxde/optimizers/config.py
@@ -21,23 +21,24 @@ def set_LBFGS_options(
     - TensorFlow 1.x: `scipy.optimize.minimize <https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html#optimize-minimize-lbfgsb>`_
     - TensorFlow 2.x: `tfp.optimizer.lbfgs_minimize <https://www.tensorflow.org/probability/api_docs/python/tfp/optimizer/lbfgs_minimize>`_
     - PyTorch: `torch.optim.LBFGS <https://pytorch.org/docs/stable/generated/torch.optim.LBFGS.html>`_
+    - Paddle: `paddle.incubate.optimizers.LBFGS <https://www.paddlepaddle.org.cn/documentation/docs/en/develop/api/paddle/incubate/optimizer/LBFGS_en.html>`_
 
     I find empirically that torch.optim.LBFGS and scipy.optimize.minimize are better than
     tfp.optimizer.lbfgs_minimize in terms of the final loss value.
 
     Args:
-        maxcor (int): `maxcor` (scipy), `num_correction_pairs` (tfp), `history_size` (torch).
+        maxcor (int): `maxcor` (scipy), `num_correction_pairs` (tfp), `history_size` (torch), `history_size` (paddle).
             The maximum number of variable metric corrections used to define the limited
             memory matrix. (The limited memory BFGS method does not store the full
             hessian but uses this many terms in an approximation to it.)
-        ftol (float): `ftol` (scipy), `f_relative_tolerance` (tfp), `tolerance_change` (torch).
+        ftol (float): `ftol` (scipy), `f_relative_tolerance` (tfp), `tolerance_change` (torch), `tolerance_change` (paddle).
             The iteration stops when `(f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= ftol`.
-        gtol (float): `gtol` (scipy), `tolerance` (tfp), `tolerance_grad` (torch).
+        gtol (float): `gtol` (scipy), `tolerance` (tfp), `tolerance_grad` (torch), `tolerance_grad` (paddle).
             The iteration will stop when `max{|proj g_i | i = 1, ..., n} <= gtol` where
             `pg_i` is the i-th component of the projected gradient.
-        maxiter (int): `maxiter` (scipy), `max_iterations` (tfp), `max_iter` (torch).
+        maxiter (int): `maxiter` (scipy), `max_iterations` (tfp), `max_iter` (torch), `max_iter` (paddle).
             Maximum number of iterations.
-        maxfun (int): `maxfun` (scipy), `max_eval` (torch).
+        maxfun (int): `maxfun` (scipy), `max_eval` (torch), `max_eval` (paddle).
             Maximum number of function evaluations. If ``None``, `maxiter` * 1.25.
         maxls (int): `maxls` (scipy), `max_line_search_iterations` (tfp).
             Maximum number of line search steps (per iteration).
@@ -62,7 +63,7 @@ def set_LBFGS_options(
 
 
 # Backend-dependent options
-if backend_name == "pytorch":
+if backend_name in ["pytorch", "paddle"]:
     # number of iterations per optimization call
     LBFGS_options["iter_per_step"] = min(1000, LBFGS_options["maxiter"])
     LBFGS_options["fun_per_step"] = (
diff --git a/deepxde/optimizers/paddle/optimizers.py b/deepxde/optimizers/paddle/optimizers.py
@@ -1,6 +1,9 @@
 __all__ = ["get", "is_external_optimizer"]
 
 import paddle
+from paddle.incubate.optimizer import LBFGS
+
+from ..config import LBFGS_options
 
 
 def _get_lr_scheduler(lr, decay):
@@ -22,10 +25,21 @@ def get(params, optimizer, learning_rate=None, decay=None):
     if isinstance(optimizer, paddle.optimizer.Optimizer):
         return optimizer
 
-    if is_external_optimizer(optimizer):
-        # TODO: add support for L-BFGS and L-BFGS-B
-        raise NotImplementedError(f"{optimizer} is not implemented in PaddlePaddle")
-
+    if optimizer in ["L-BFGS", "L-BFGS-B"]:
+        if learning_rate is not None or decay is not None:
+            print("Warning: learning rate is ignored for {}".format(optimizer))
+        optim = LBFGS(
+            lr=1,
+            max_iter=LBFGS_options["iter_per_step"],
+            max_eval=LBFGS_options["fun_per_step"],
+            tolerance_grad=LBFGS_options["gtol"],
+            tolerance_change=LBFGS_options["ftol"],
+            history_size=LBFGS_options["maxcor"],
+            line_search_fn='strong_wolfe',
+            parameters=params,
+        )
+        return optim
+    
     if learning_rate is None:
         raise ValueError("No learning rate for {}.".format(optimizer))
 
@@ -34,4 +48,4 @@ def get(params, optimizer, learning_rate=None, decay=None):
 
     if optimizer == "adam":
         return paddle.optimizer.Adam(learning_rate=learning_rate, parameters=params)
-    raise NotImplementedError(f"{optimizer} is not implemented in PaddlePaddle")
+    raise NotImplementedError(f"{optimizer} to be implemented for backend Paddle.")
diff --git a/examples/pinn_forward/Beltrami_flow.py b/examples/pinn_forward/Beltrami_flow.py
@@ -1,4 +1,4 @@
-"""Backend supported: tensorflow.compat.v1, tensorflow, pytorch"""
+"""Backend supported: tensorflow.compat.v1, tensorflow, pytorch, paddle"""
 import deepxde as dde
 import numpy as np
 
diff --git a/examples/pinn_forward/Burgers_RAR.py b/examples/pinn_forward/Burgers_RAR.py
@@ -1,4 +1,4 @@
-"""Backend supported: tensorflow.compat.v1, tensorflow, pytorch"""
+"""Backend supported: tensorflow.compat.v1, tensorflow, pytorch, paddle"""
 import deepxde as dde
 import numpy as np
 
diff --git a/examples/pinn_forward/Kovasznay_flow.py b/examples/pinn_forward/Kovasznay_flow.py
@@ -1,4 +1,4 @@
-"""Backend supported: tensorflow.compat.v1, tensorflow, pytorch"""
+"""Backend supported: tensorflow.compat.v1, tensorflow, pytorch, paddle"""
 import deepxde as dde
 import numpy as np
 
diff --git a/examples/pinn_forward/Lotka_Volterra.py b/examples/pinn_forward/Lotka_Volterra.py
@@ -77,13 +77,7 @@ def input_transform(t):
 # def input_transform(t):
 #     return paddle.concat(
 #         (
-#             t,
 #             paddle.sin(t),
-#             paddle.sin(2 * t),
-#             paddle.sin(3 * t),
-#             paddle.sin(4 * t),
-#             paddle.sin(5 * t),
-#             paddle.sin(6 * t),
 #         ),
 #         axis=1,
 #     )
diff --git a/examples/pinn_forward/Poisson_Lshape.py b/examples/pinn_forward/Poisson_Lshape.py
@@ -1,4 +1,4 @@
-"""Backend supported: tensorflow.compat.v1, tensorflow, pytorch, jax"""
+"""Backend supported: tensorflow.compat.v1, tensorflow, pytorch, jax, paddle"""
 import deepxde as dde
 
 
diff --git a/examples/pinn_forward/heat.py b/examples/pinn_forward/heat.py
@@ -1,4 +1,4 @@
-"""Backend supported: tensorflow.compat.v1, tensorflow, pytorch"""
+"""Backend supported: tensorflow.compat.v1, tensorflow, pytorch, paddle"""
 import deepxde as dde
 import numpy as np
 

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-"""Backend supported: tensorflow.compat.v1, tensorflow, pytorch"""`
	`1`	`+"""Backend supported: tensorflow.compat.v1, tensorflow, pytorch, paddle"""`
`2`	`2`	`import deepxde as dde`
`3`	`3`	`import numpy as np`
`4`	`4`