Merge pull request #99 from thomaspinder/Fix-fit_batches-and-add-history

thomaspinder · web-flow · commit 34d744f24f4b · 2022-08-22T22:01:27.000+01:00
Remove TensorFlow dependency, Fix fit batches and add history
diff --git a/README.md b/README.md
@@ -34,10 +34,10 @@ We have recently set up a Slack channel where we hope to facilitate discussions
 
 - [**Conjugate Inference**](https://gpjax.readthedocs.io/en/latest/nbs/regression.html)
 - [**Classification with MCMC**](https://gpjax.readthedocs.io/en/latest/nbs/classification.html)
-- [**Sparse Variational Inference**](https://gpjax.readthedocs.io/en/latest/nbs/sparse_regression.html)
+- [**Sparse Variational Inference**](https://gpjax.readthedocs.io/en/latest/nbs/uncollapsed_vi.html)
 - [**BlackJax Integration**](https://gpjax.readthedocs.io/en/latest/nbs/classification.html)
 - [**Laplace Approximations**](https://gpjax.readthedocs.io/en/latest/nbs/classification.html#Laplace-approximation)
-- [**TensorFlow Probability Integration**](https://gpjax.readthedocs.io/en/latest/nbs/tfp_intergation.html)
+- [**TensorFlow Probability Integration**](https://gpjax.readthedocs.io/en/latest/nbs/tfp_integration.html)
 - [**Inference on Non-Euclidean Spaces**](https://gpjax.readthedocs.io/en/latest/nbs/kernels.html#Custom-Kernel)
 - [**Inference on Graphs**](https://gpjax.readthedocs.io/en/latest/nbs/graph_kernels.html)
 - [**Learning Gaussian Process Barycentres**](https://gpjax.readthedocs.io/en/latest/nbs/graph_kernels.html)
@@ -139,7 +139,7 @@ pip install gpjax
 
 ### Development version
 
-To install the latest, possibly unstable, version, the following steps should be followed. It is by no means compulsory, but we do advise that you do all of the below inside a virtual environment.
+To install the latest (possibly unstable) version, the following steps should be followed. It is by no means compulsory, but we do advise that you do all of the below inside a virtual environment.
 
 ```bash
 git clone https://github.com/thomaspinder/GPJax.git
@@ -156,7 +156,7 @@ python -m pytest tests/
 
 ## Citing GPJax
 
-If you use GPJax in your research, please cite our [JOSS paper](https://joss.theoj.org/papers/10.21105/joss.04455#). A sample Bibtex file is
+If you use GPJax in your research, please cite our [JOSS paper](https://joss.theoj.org/papers/10.21105/joss.04455#). Sample Bibtex is given below:
 ```
 @article{Pinder2022,
   doi = {10.21105/joss.04455},
diff --git a/docs/_static/GP.svg b/docs/_static/GP.svg
diff --git a/docs/index.rst b/docs/index.rst
@@ -7,7 +7,7 @@ Welcome to GPJax!
 GPJax is a didactic Gaussian process library that supports GPU acceleration and just-in-time compilation. We seek to provide a flexible API as close as possible to how the underlying mathematics is written on paper to enable researchers to rapidly prototype and develop new ideas.
 
 
-.. image:: ./_static/GP.pdf
+.. image:: ./_static/GP.svg
   :width: 800
   :alt: Gaussian process posterior.
   :align: center
diff --git a/docs/nbs/uncollapsed_vi.ipynb b/docs/nbs/uncollapsed_vi.ipynb
@@ -255,7 +255,7 @@
     "    train_data = D, \n",
     "    optax_optim = optimiser,\n",
     "    n_iters=4000,\n",
-    "    seed = 42,\n",
+    "    key = jr.PRNGKey(42),\n",
     "    batch_size= 128\n",
     ")\n",
     "learned_params, training_history = inference_state.unpack()\n",
@@ -322,7 +322,7 @@
    "custom_cell_magics": "kql"
   },
   "kernelspec": {
-   "display_name": "Python 3.9.7 ('gpjax')",
+   "display_name": "Python 3.10.0 ('base')",
    "language": "python",
    "name": "python3"
   },
@@ -336,11 +336,11 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.10.0"
   },
   "vscode": {
    "interpreter": {
-    "hash": "920091140e6b97de16b405af485d142952a229f5dad61a888f46227f5acb94cf"
+    "hash": "3d597f4c481aa0f25dceb95d2a0067e73c0966dcbd003d741d821a7208527ecf"
    }
   }
  },
diff --git a/gpjax/abstractions.py b/gpjax/abstractions.py
@@ -4,15 +4,14 @@
 import jax.numpy as jnp
 import jax.random as jr
 import optax
-from chex import PRNGKey, dataclass
+from chex import dataclass
 from jax import lax
 from jax.experimental import host_callback
 from jaxtyping import f64
 from tqdm.auto import tqdm
 
 from .parameters import trainable_params
-from .types import Dataset
-from .utils import convert_seed
+from .types import Dataset, PRNGKeyType
 
 
 @dataclass(frozen=True)
@@ -82,11 +81,14 @@ def _progress_bar_scan(func):
         """Decorator that adds a progress bar to `body_fun` used in `lax.scan`."""
 
         def wrapper_progress_bar(carry, x):
-            i = x
+            if type(x) is tuple:
+                iter_num, *_ = x
+            else:
+                iter_num = x
             result = func(carry, x)
             *_, loss_val = result
-            _update_progress_bar(loss_val, i)
-            return close_tqdm(result, i)
+            _update_progress_bar(loss_val, iter_num)
+            return close_tqdm(result, iter_num)
 
         return wrapper_progress_bar
 
@@ -119,16 +121,18 @@ def loss(params):
         params = trainable_params(params, trainables)
         return objective(params)
 
+    iter_nums = jnp.arange(n_iters)
+
     @progress_bar_scan(n_iters, log_rate)
-    def step(carry, i):
+    def step(carry, iter_num):
         params, opt_state = carry
         loss_val, loss_gradient = jax.value_and_grad(loss)(params)
         updates, opt_state = optax_optim.update(loss_gradient, opt_state, params)
         params = optax.apply_updates(params, updates)
         carry = params, opt_state
         return carry, loss_val
 
-    (params, _), history = jax.lax.scan(step, (params, opt_state), jnp.arange(n_iters))
+    (params, _), history = jax.lax.scan(step, (params, opt_state), iter_nums)
     inf_state = InferenceState(params=params, history=history)
     return inf_state
 
@@ -139,7 +143,7 @@ def fit_batches(
     trainables: tp.Dict,
     train_data: Dataset,
     optax_optim,
-    seed: tp.Union[int, PRNGKey],
+    key: PRNGKeyType,
     batch_size: int,
     n_iters: tp.Optional[int] = 100,
     log_rate: tp.Optional[int] = 10,
@@ -152,7 +156,7 @@ def fit_batches(
         trainables (dict): Boolean dictionary of same structure as 'params' that determines which parameters should be trained.
         train_data (Dataset): The training dataset.
         optax_optim (GradientTransformation): The Optax optimiser that is to be used for learning a parameter set.
-        seed (int): The random seed for the mini-batch sampling.
+        key (PRNGKeyType): The PRNG key for the mini-batch sampling.
         batch_size(int): The batch_size.
         n_iters (int, optional): The number of optimisation steps to run. Defaults to 100.
         log_rate (int, optional): How frequently the objective function's value should be printed. Defaults to 10.
@@ -162,33 +166,42 @@ def fit_batches(
 
     opt_state = optax_optim.init(params)
 
-    prng = convert_seed(seed)
-
-    x, y, n = train_data.X, train_data.y, train_data.n
-
     def loss(params, batch):
         params = trainable_params(params, trainables)
         return objective(params, batch)
 
-    @progress_bar_scan(n_iters, log_rate)
-    def step(carry, _):
-        params, opt_state, prng = carry
+    keys = jax.random.split(key, n_iters)
+    iter_nums = jnp.arange(n_iters)
 
-        indicies = jr.choice(prng, n, (batch_size,), replace=True)
+    @progress_bar_scan(n_iters, log_rate)
+    def step(carry, iter_num__and__key):
+        iter_num, key = iter_num__and__key
+        params, opt_state = carry
 
-        batch = Dataset(X=x[indicies], y=y[indicies])
+        batch = get_batch(train_data, batch_size, key)
 
         loss_val, loss_gradient = jax.value_and_grad(loss)(params, batch)
         updates, opt_state = optax_optim.update(loss_gradient, opt_state, params)
         params = optax.apply_updates(params, updates)
 
-        prng, _ = jr.split(prng)
-
-        carry = params, opt_state, prng
+        carry = params, opt_state
         return carry, loss_val
 
-    (params, _, _), history = jax.lax.scan(
-        step, (params, opt_state, prng), jnp.arange(n_iters)
-    )
+    (params, _), history = jax.lax.scan(step, (params, opt_state), (iter_nums, keys))
     inf_state = InferenceState(params=params, history=history)
     return inf_state
+
+
+def get_batch(train_data: Dataset, batch_size: int, key: PRNGKeyType) -> Dataset:
+    """Batch the data into mini-batches.
+    Args:
+        train_data (Dataset): The training dataset.
+        batch_size (int): The batch size.
+    Returns:
+        Dataset: The batched dataset.
+    """
+    x, y, n = train_data.X, train_data.y, train_data.n
+
+    indicies = jr.choice(key, n, (batch_size,), replace=True)
+
+    return Dataset(X=x[indicies], y=y[indicies])
diff --git a/gpjax/utils.py b/gpjax/utils.py
@@ -79,14 +79,3 @@ def array_to_dict(parameter_array) -> tp.Dict:
         return jax.tree_util.tree_unflatten(flattened_pytree[1], parameter_array)
 
     return dict_to_array, array_to_dict
-
-
-def convert_seed(seed: tp.Union[int, PRNGKey]) -> PRNGKey:
-    """Ensure that seeds type."""
-
-    if isinstance(seed, int):
-        rng = jr.PRNGKey(seed)
-    else:  # key is of type PRNGKey
-        rng = seed
-
-    return rng
diff --git a/tests/test_abstractions.py b/tests/test_abstractions.py
@@ -5,7 +5,7 @@
 
 import gpjax as gpx
 from gpjax import RBF, Dataset, Gaussian, Prior, initialise, transform
-from gpjax.abstractions import InferenceState, fit, fit_batches
+from gpjax.abstractions import InferenceState, fit, fit_batches, get_batch
 
 
 @pytest.mark.parametrize("n_iters", [10])
@@ -67,16 +67,40 @@ def test_batch_fitting(n_iters, nb, ndata):
     D = Dataset(X=x, y=y)
 
     optimiser = optax.adam(learning_rate=0.1)
-    seed = 42
-    print("-" * 80)
-    print(params)
-    print("-" * 80)
+    key = jr.PRNGKey(42)
     inference_state = fit_batches(
-        objective, params, trainable_status, D, optimiser, seed, nb, n_iters
+        objective, params, trainable_status, D, optimiser, key, nb, n_iters
     )
     optimised_params, history = inference_state.params, inference_state.history
     optimised_params = transform(optimised_params, constrainer)
     assert isinstance(inference_state, InferenceState)
     assert isinstance(optimised_params, dict)
     assert isinstance(history, jnp.ndarray)
     assert history.shape[0] == n_iters
+
+
+@pytest.mark.parametrize("batch_size", [1, 2, 50])
+@pytest.mark.parametrize("ndim", [1, 2, 3])
+@pytest.mark.parametrize("ndata", [50])
+@pytest.mark.parametrize("key", [jr.PRNGKey(123)])
+def test_get_batch(ndata, ndim, batch_size, key):
+    x = jnp.sort(
+        jr.uniform(key=key, minval=-2.0, maxval=2.0, shape=(ndata, ndim)), axis=0
+    )
+    y = jnp.sin(x) + jr.normal(key=key, shape=x.shape) * 0.1
+    D = Dataset(X=x, y=y)
+
+    B = get_batch(D, batch_size, key)
+
+    assert B.n == batch_size
+    assert B.X.shape[1:] == x.shape[1:]
+    assert B.y.shape[1:] == y.shape[1:]
+
+    # test no caching of batches:
+    key, subkey = jr.split(key)
+    Bnew = get_batch(D, batch_size, subkey)
+    assert Bnew.n == batch_size
+    assert Bnew.X.shape[1:] == x.shape[1:]
+    assert Bnew.y.shape[1:] == y.shape[1:]
+    assert (Bnew.X != B.X).all()
+    assert (Bnew.y != B.y).all()
diff --git a/tests/test_utilities.py b/tests/test_utilities.py
@@ -1,13 +1,10 @@
 import jax.numpy as jnp
-import jax.random as jr
 import pytest
-from chex import PRNGKey
 
 from gpjax.utils import (
     I,
     as_constant,
     concat_dictionaries,
-    convert_seed,
     dict_array_coercion,
     merge_dictionaries,
     sort_dictionary,
@@ -68,8 +65,3 @@ def test_array_coercion(d):
     assert array_to_dict(dict_to_array(params)) == params
     assert isinstance(dict_to_array(params), list)
     assert isinstance(array_to_dict(dict_to_array(params)), dict)
-
-
-@pytest.mark.parametrize("seed", [1, 2, jr.PRNGKey(42)])
-def convert_seed(seed):
-    assert isinstance(convert_seed(seed), PRNGKey)