|
1 | | -__all__ = ["set_LBFGS_options", "set_hvd_opt_options"] |
| 1 | +__all__ = ["set_LBFGS_options", "set_NNCG_options", "set_hvd_opt_options"] |
2 | 2 |
|
3 | 3 | from ..backend import backend_name |
4 | 4 | from ..config import hvd |
5 | 5 |
|
6 | 6 | LBFGS_options = {} |
| 7 | +NNCG_options = {} |
7 | 8 | if hvd is not None: |
8 | 9 | hvd_opt_options = {} |
9 | 10 |
|
@@ -60,6 +61,60 @@ def set_LBFGS_options( |
60 | 61 | LBFGS_options["maxls"] = maxls |
61 | 62 |
|
62 | 63 |
|
| 64 | +def set_NNCG_options( |
| 65 | + lr=1, |
| 66 | + rank=50, |
| 67 | + mu=1e-1, |
| 68 | + updatefreq=20, |
| 69 | + chunksz=1, |
| 70 | + cgtol=1e-16, |
| 71 | + cgmaxiter=1000, |
| 72 | + lsfun="armijo", |
| 73 | + verbose=False, |
| 74 | +): |
| 75 | + """Sets the hyperparameters of NysNewtonCG (NNCG). |
| 76 | +
|
| 77 | + The NNCG optimizer only supports PyTorch. |
| 78 | +
|
| 79 | + Args: |
| 80 | + lr (float): |
| 81 | + Learning rate (before line search). |
| 82 | + rank (int): |
| 83 | + Rank of preconditioner matrix used in preconditioned conjugate gradient. |
| 84 | + mu (float): |
| 85 | + Hessian damping parameter. |
| 86 | + updatefreq (int): |
| 87 | + How often the preconditioner matrix in preconditioned |
| 88 | + conjugate gradient is updated. This parameter is not directly used in NNCG, |
| 89 | + instead it is used in _train_pytorch_nncg in deepxde/model.py. |
| 90 | + chunksz (int): |
| 91 | + Number of Hessian-vector products to compute in parallel when constructing |
| 92 | + preconditioner. If `chunk_size` is 1, the Hessian-vector products are |
| 93 | + computed serially. |
| 94 | + cgtol (float): |
| 95 | + Convergence tolerance for the conjugate gradient method. The iteration stops |
| 96 | + when `||r||_2 <= cgtol`, where `r` is the residual. Note that this condition |
| 97 | + is based on the absolute tolerance, not the relative tolerance. |
| 98 | + cgmaxiter (int): |
| 99 | + Maximum number of iterations for the conjugate gradient method. |
| 100 | + lsfun (str): |
| 101 | + The line search function used to find the step size. The default value is |
| 102 | + "armijo". The other option is None. |
| 103 | + verbose (bool): |
| 104 | + If `True`, prints the eigenvalues of the Nyström approximation |
| 105 | + of the Hessian. |
| 106 | + """ |
| 107 | + NNCG_options["lr"] = lr |
| 108 | + NNCG_options["rank"] = rank |
| 109 | + NNCG_options["mu"] = mu |
| 110 | + NNCG_options["updatefreq"] = updatefreq |
| 111 | + NNCG_options["chunksz"] = chunksz |
| 112 | + NNCG_options["cgtol"] = cgtol |
| 113 | + NNCG_options["cgmaxiter"] = cgmaxiter |
| 114 | + NNCG_options["lsfun"] = lsfun |
| 115 | + NNCG_options["verbose"] = verbose |
| 116 | + |
| 117 | + |
63 | 118 | def set_hvd_opt_options( |
64 | 119 | compression=None, |
65 | 120 | op=None, |
@@ -91,6 +146,7 @@ def set_hvd_opt_options( |
91 | 146 |
|
92 | 147 |
|
93 | 148 | set_LBFGS_options() |
| 149 | +set_NNCG_options() |
94 | 150 | if hvd is not None: |
95 | 151 | set_hvd_opt_options() |
96 | 152 |
|
|
0 commit comments