Merge pull request #31 from lululxvi/master

Jerry-Jzy · web-flow · commit af0083b8a553 · 2025-03-09T10:12:21.000-04:00
latest update
diff --git a/deepxde/backend/jax/tensor.py b/deepxde/backend/jax/tensor.py
@@ -165,6 +165,10 @@ def reduce_max(input_tensor):
     return jnp.max(input_tensor)
 
 
+def norm(tensor, ord=None, axis=None, keepdims=False):
+    return jnp.linalg.norm(tensor, ord=ord, axis=axis, keepdims=keepdims)
+
+
 def zeros(shape, dtype):
     return jnp.zeros(shape, dtype=dtype)
 
diff --git a/deepxde/backend/paddle/__init__.py b/deepxde/backend/paddle/__init__.py
@@ -1 +1,17 @@
-from .tensor import *  # pylint: disable=redefined-builtin
+import os
+
+from .tensor import *  # pylint: disable=redefined-builtin
+
+# enable prim if specified
+enable_prim_value = os.getenv("PRIM")
+enable_prim = enable_prim_value.lower() in ['1', 'true', 'yes', 'on'] if enable_prim_value else False
+if enable_prim:
+    # Mostly for compiler running with dy2st.
+    from paddle.framework import core
+
+    core.set_prim_eager_enabled(True)
+    # The following protected member access is required.
+    # There is no alternative public API available now.
+    # pylint: disable=protected-access
+    core._set_prim_all_enabled(True)
+    print("Prim mode is enabled.")
diff --git a/deepxde/config.py b/deepxde/config.py
@@ -40,6 +40,8 @@
 
 # Default float type
 real = Real(32)
+# Using mixed precision
+mixed = False
 # Random seed
 random_seed = None
 if backend_name == "jax":
@@ -71,11 +73,14 @@ def default_float():
 def set_default_float(value):
     """Sets the default float type.
 
-    The default floating point type is 'float32'.
+    The default floating point type is 'float32'. Mixed precision uses the method in the paper:
+    `J. Hayford, J. Goldman-Wetzler, E. Wang, & L. Lu. Speeding up and reducing memory usage for scientific machine learning via mixed precision.
+    Computer Methods in Applied Mechanics and Engineering, 428, 117093, 2024 <https://doi.org/10.1016/j.cma.2024.117093>`_.
 
     Args:
-        value (String): 'float16', 'float32', or 'float64'.
+        value (String): 'float16', 'float32', 'float64', or 'mixed' (mixed precision).
     """
+    global mixed
     if value == "float16":
         print("Set the default float type to float16")
         real.set_float16()
@@ -85,6 +90,20 @@ def set_default_float(value):
     elif value == "float64":
         print("Set the default float type to float64")
         real.set_float64()
+    elif value == "mixed":
+        print("Set the float type to mixed precision of float16 and float32")
+        mixed = True
+        if backend_name == "tensorflow":
+            real.set_float16()
+            tf.keras.mixed_precision.set_global_policy("mixed_float16")
+            return # don't try to set it again below
+        if backend_name == "pytorch":
+            # Use float16 during the forward and backward passes, but store in float32
+            real.set_float32()
+        else:
+            raise ValueError(
+                f"{backend_name} backend does not currently support mixed precision."
+            )
     else:
         raise ValueError(f"{value} not supported in deepXDE")
     if backend_name in ["tensorflow.compat.v1", "tensorflow"]:
diff --git a/deepxde/data/mf.py b/deepxde/data/mf.py
@@ -1,7 +1,8 @@
 import numpy as np
 
 from .data import Data
-from ..backend import tf
+from .. import backend as bkd
+from .. import config
 from ..utils import run_if_any_none, standardize
 
 
@@ -83,20 +84,20 @@ def __init__(
         standardize=False,
     ):
         if X_lo_train is not None:
-            self.X_lo_train = X_lo_train
-            self.X_hi_train = X_hi_train
-            self.y_lo_train = y_lo_train
-            self.y_hi_train = y_hi_train
-            self.X_hi_test = X_hi_test
-            self.y_hi_test = y_hi_test
+            self.X_lo_train = X_lo_train.astype(config.real(np))
+            self.X_hi_train = X_hi_train.astype(config.real(np))
+            self.y_lo_train = y_lo_train.astype(config.real(np))
+            self.y_hi_train = y_hi_train.astype(config.real(np))
+            self.X_hi_test = X_hi_test.astype(config.real(np))
+            self.y_hi_test = y_hi_test.astype(config.real(np))
         elif fname_lo_train is not None:
-            data = np.loadtxt(fname_lo_train)
+            data = np.loadtxt(fname_lo_train).astype(config.real(np))
             self.X_lo_train = data[:, col_x]
             self.y_lo_train = data[:, col_y]
-            data = np.loadtxt(fname_hi_train)
+            data = np.loadtxt(fname_hi_train).astype(config.real(np))
             self.X_hi_train = data[:, col_x]
             self.y_hi_train = data[:, col_y]
-            data = np.loadtxt(fname_hi_test)
+            data = np.loadtxt(fname_hi_test).astype(config.real(np))
             self.X_hi_test = data[:, col_x]
             self.y_hi_test = data[:, col_y]
         else:
@@ -116,7 +117,10 @@ def losses_train(self, targets, outputs, loss_fn, inputs, model, aux=None):
         return [loss_lo, loss_hi]
 
     def losses_test(self, targets, outputs, loss_fn, inputs, model, aux=None):
-        return [0, loss_fn(targets[1], outputs[1])]
+        return [
+            bkd.as_tensor(0, dtype=config.real(bkd.lib)),
+            loss_fn(targets[1], outputs[1]),
+        ]
 
     @run_if_any_none("X_train", "y_train")
     def train_next_batch(self, batch_size=None):
diff --git a/deepxde/model.py b/deepxde/model.py
@@ -374,7 +374,11 @@ def closure():
                 total_loss.backward()
                 return total_loss
 
-            self.opt.step(closure)
+            def closure_mixed():
+                with torch.autocast(device_type=torch.get_default_device().type, dtype=torch.float16):
+                    return closure()
+
+            self.opt.step(closure if not config.mixed else closure_mixed)
             if self.lr_scheduler is not None:
                 self.lr_scheduler.step()
 
diff --git a/deepxde/nn/paddle/__init__.py b/deepxde/nn/paddle/__init__.py
@@ -4,11 +4,13 @@
     "DeepONet",
     "DeepONetCartesianProd",
     "FNN",
+    "MfNN",
     "MsFFN",
     "PFNN",
     "STMsFFN",
 ]
 
 from .deeponet import DeepONet, DeepONetCartesianProd
 from .fnn import FNN, PFNN
+from .mfnn import MfNN
 from .msffn import MsFFN, STMsFFN
diff --git a/deepxde/nn/paddle/mfnn.py b/deepxde/nn/paddle/mfnn.py
@@ -0,0 +1,115 @@
+import paddle
+
+from .nn import NN
+from .. import activations
+from .. import initializers
+from .. import regularizers
+from ... import config
+
+
+class MfNN(NN):
+    """Multifidelity neural networks."""
+
+    def __init__(
+        self,
+        layer_sizes_low_fidelity,
+        layer_sizes_high_fidelity,
+        activation,
+        kernel_initializer,
+        regularization=None,
+        residue=False,
+        trainable_low_fidelity=True,
+        trainable_high_fidelity=True,
+    ):
+        super().__init__()
+        self.layer_size_lo = layer_sizes_low_fidelity
+        self.layer_size_hi = layer_sizes_high_fidelity
+
+        self.activation = activations.get(activation)
+        self.initializer = initializers.get(kernel_initializer)
+        self.trainable_lo = trainable_low_fidelity
+        self.trainable_hi = trainable_high_fidelity
+        self.residue = residue
+        self.regularizer = regularizers.get(regularization)
+
+        # low fidelity
+        self.linears_lo = self._init_dense(self.layer_size_lo, self.trainable_lo)
+
+        # high fidelity
+        # linear part
+        self.linears_hi_l = paddle.nn.Linear(
+            in_features=self.layer_size_lo[0] + self.layer_size_lo[-1],
+            out_features=self.layer_size_hi[-1],
+            weight_attr=paddle.ParamAttr(initializer=self.initializer),
+        )
+        if not self.trainable_hi:
+            for param in self.linears_hi_l.parameters():
+                param.stop_gradient = False
+        # nonlinear part
+        self.layer_size_hi = [
+            self.layer_size_lo[0] + self.layer_size_lo[-1]
+        ] + self.layer_size_hi
+        self.linears_hi = self._init_dense(self.layer_size_hi, self.trainable_hi)
+        # linear + nonlinear
+        if not self.residue:
+            alpha = self._init_alpha(0.0, self.trainable_hi)
+            self.add_parameter("alpha", alpha)
+        else:
+            alpha1 = self._init_alpha(0.0, self.trainable_hi)
+            alpha2 = self._init_alpha(0.0, self.trainable_hi)
+            self.add_parameter("alpha1", alpha1)
+            self.add_parameter("alpha2", alpha2)
+
+    def _init_dense(self, layer_size, trainable):
+        linears = paddle.nn.LayerList()
+        for i in range(len(layer_size) - 1):
+            linear = paddle.nn.Linear(
+                in_features=layer_size[i],
+                out_features=layer_size[i + 1],
+                weight_attr=paddle.ParamAttr(initializer=self.initializer),
+            )
+            if not trainable:
+                for param in linear.parameters():
+                    param.stop_gradient = False
+            linears.append(linear)
+        return linears
+
+    def _init_alpha(self, value, trainable):
+        alpha = paddle.create_parameter(
+            shape=[1],
+            dtype=config.real(paddle),
+            default_initializer=paddle.nn.initializer.Constant(value),
+        )
+        alpha.stop_gradient = not trainable
+        return alpha
+
+    def forward(self, inputs):
+        # low fidelity
+        y = inputs
+        for i, linear in enumerate(self.linears_lo):
+            y = linear(y)
+            if i != len(self.linears_lo) - 1:
+                y = self.activation(y)
+        y_lo = y
+
+        # high fidelity
+        x_hi = paddle.concat([inputs, y_lo], axis=1)
+        # linear
+        y_hi_l = self.linears_hi_l(x_hi)
+        # nonlinear
+        y = x_hi
+        for i, linear in enumerate(self.linears_hi):
+            y = linear(y)
+            if i != len(self.linears_hi) - 1:
+                y = self.activation(y)
+        y_hi_nl = y
+        # linear + nonlinear
+        if not self.residue:
+            alpha = paddle.tanh(self.alpha)
+            y_hi = y_hi_l + alpha * y_hi_nl
+        else:
+            alpha1 = paddle.tanh(self.alpha1)
+            alpha2 = paddle.tanh(self.alpha2)
+            y_hi = y_lo + 0.1 * (alpha1 * y_hi_l + alpha2 * y_hi_nl)
+
+        return y_lo, y_hi
diff --git a/deepxde/zcs/operator.py b/deepxde/zcs/operator.py
@@ -9,7 +9,7 @@
 class PDEOperatorCartesianProd(BasePDEOperatorCartesianProd):
     """Derived `PDEOperatorCartesianProd` class for ZCS support."""
 
-    def _losses(self, outputs, loss_fn, inputs, model, num_func):
+    def _losses(self, outputs, loss_fn, inputs, model, num_func, aux):
         # PDE
         f = []
         if self.pde.pde is not None:
diff --git a/docs/user/faq.rst b/docs/user/faq.rst
@@ -10,6 +10,8 @@ General usage
   | **A**: `#5`_
 - | **Q**: By default, DeepXDE uses ``float32``. How can I use ``float64``?
   | **A**: `#28`_
+- | **Q**: How can I use mixed precision training?
+  | **A**: Use ``dde.config.set_default_float("mixed")`` with the ``tensorflow`` or ``pytorch`` backends. See `this paper <https://doi.org/10.1016/j.cma.2024.117093>`_ for more information.
 - | **Q**: I want to set the global random seeds.
   | **A**: `#353`_
 - | **Q**: GPU.
diff --git a/docs/user/research.rst b/docs/user/research.rst
diff --git a/examples/function/func_uncertainty.py b/examples/function/func_uncertainty.py
diff --git a/examples/function/mf_dataset.py b/examples/function/mf_dataset.py
diff --git a/examples/function/mf_func.py b/examples/function/mf_func.py