Merge pull request #300 from JaxGaussianProcesses/citations

thomaspinder · web-flow · commit 6f7db6d4adf0 · 2023-06-12T18:16:01.000+01:00
Add cite functionality
diff --git a/docs/examples/collapsed_vi.py b/docs/examples/collapsed_vi.py
@@ -105,7 +105,22 @@
 # <strong data-cite="titsias2009">Titsias (2009)</strong>.
 
 # %%
-elbo = jit(gpx.CollapsedELBO(negative=True))
+elbo = gpx.CollapsedELBO(negative=True)
+
+# %% [markdown]
+# For researchers, GPJax has the capacity to print the bibtex citation for objects such
+# as the ELBO through the `cite()` function.
+
+# %%
+print(gpx.cite(elbo))
+
+# %% [markdown]
+# JIT-compiling expensive-to-compute functions such as the ELBO is
+# advisable. This can be achieved by wrapping the function in `jax.jit()`.
+
+# %%
+
+elbo = jit(elbo)
 
 # %% [markdown]
 # We now train our model akin to a Gaussian process regression model via the `fit`
diff --git a/docs/examples/graph_kernels.py b/docs/examples/graph_kernels.py
@@ -134,9 +134,23 @@
 
 # %%
 likelihood = gpx.Gaussian(num_datapoints=D.n)
-prior = gpx.Prior(mean_function=gpx.Zero(), kernel=gpx.GraphKernel(laplacian=L))
+kernel = gpx.GraphKernel(laplacian=L)
+prior = gpx.Prior(mean_function=gpx.Zero(), kernel=kernel)
 posterior = prior * likelihood
 
+# %% [markdown]
+#
+# For researchers and the curious reader, GPJax provides the ability to print the
+# bibtex citation for objects such as the graph kernel through the `cite()` function.
+
+# %%
+print(gpx.cite(kernel))
+
+# %% [markdown]
+#
+# With a posterior defined, we can now optimise the model's hyperparameters.
+
+# %%
 opt_posterior, training_history = gpx.fit(
     model=posterior,
     objective=jit(gpx.ConjugateMLL(negative=True)),
diff --git a/docs/examples/regression.py b/docs/examples/regression.py
@@ -179,7 +179,7 @@
 # these parameters by optimising the marginal log-likelihood (MLL).
 
 # %%
-negative_mll = jit(gpx.objectives.ConjugateMLL(negative=True))
+negative_mll = gpx.objectives.ConjugateMLL(negative=True)
 negative_mll(posterior, train_data=D)
 
 
@@ -188,6 +188,20 @@
 #     ox.adam(learning_rate=0.01),
 #     ox.masked(ox.set_to_zero(), static_tree)
 #     )
+# %% [markdown]
+# For researchers, GPJax has the capacity to print the bibtex citation for objects such
+# as the marginal log-likelihood through the `cite()` function.
+
+# %%
+print(gpx.cite(negative_mll))
+
+# %% [markdown]
+# JIT-compiling expensive-to-compute functions such as the marginal log-likelihood is
+# advisable. This can be achieved by wrapping the function in `jax.jit()`.
+
+# %%
+negative_mll = jit(negative_mll)
+
 # %% [markdown]
 # Since most optimisers (including here) minimise a given function, we have realised
 # the negative marginal log-likelihood and just-in-time (JIT) compiled this to
diff --git a/docs/examples/uncollapsed_vi.py b/docs/examples/uncollapsed_vi.py
@@ -227,7 +227,22 @@
 # its negative.
 
 # %%
-negative_elbo = jit(gpx.ELBO(negative=True))
+negative_elbo = gpx.ELBO(negative=True)
+
+# %% [markdown]
+# For researchers, GPJax has the capacity to print the bibtex citation for objects such
+# as the ELBO through the `cite()` function.
+
+# %%
+print(gpx.cite(negative_elbo))
+
+# %% [markdown]
+# JIT-compiling expensive-to-compute functions such as the ELBO is
+# advisable. This can be achieved by wrapping the function in `jax.jit()`.
+
+# %%
+
+negative_elbo = jit(negative_elbo)
 
 # %% [markdown]
 # ### Mini-batching
diff --git a/docs/refs.bib b/docs/refs.bib
@@ -25,9 +25,9 @@ @book{rasmussen2006gaussian
 }
 
 @article{hensman2013gaussian,
-  title   = {Gaussian processes for big data},
+  title   = {{G}aussian processes for big data},
   author  = {Hensman, James and Fusi, Nicolo and Lawrence, Neil D},
-  journal = {arXiv preprint arXiv:1309.6835},
+  journal = {Artificial intelligence and statistics},
   year    = {2013}
 }
 
diff --git a/gpjax/__init__.py b/gpjax/__init__.py
@@ -16,6 +16,7 @@
     Module,
     param_field,
 )
+from gpjax.citation import cite
 from gpjax.dataset import Dataset
 from gpjax.fit import fit
 from gpjax.gps import (
@@ -77,6 +78,7 @@
 __all__ = [
     "Module",
     "param_field",
+    "cite",
     "kernels",
     "fit",
     "Prior",
diff --git a/gpjax/citation.py b/gpjax/citation.py
@@ -0,0 +1,196 @@
+from dataclasses import (
+    dataclass,
+    fields,
+)
+
+from beartype.typing import (
+    Dict,
+    Union,
+)
+from jaxlib.xla_extension import PjitFunction
+from plum import dispatch
+
+from gpjax.kernels import (
+    RFF,
+    ArcCosine,
+    GraphKernel,
+    Matern12,
+    Matern32,
+    Matern52,
+)
+from gpjax.objectives import (
+    ELBO,
+    CollapsedELBO,
+    ConjugateMLL,
+    LogPosteriorDensity,
+    NonConjugateMLL,
+)
+
+MaternKernels = Union[Matern12, Matern32, Matern52]
+MLLs = Union[ConjugateMLL, NonConjugateMLL, LogPosteriorDensity]
+CitationType = Union[str, Dict[str, str]]
+
+
+@dataclass(repr=False)
+class AbstractCitation:
+    citation_key: str = None
+    authors: str = None
+    title: str = None
+    year: str = None
+
+    def as_str(self) -> str:
+        citation_str = f"@{self.citation_type}{{{self.citation_key},"
+        for field in fields(self):
+            fn = field.name
+            if fn not in ["citation_type", "citation_key", "notes"]:
+                citation_str += f"\n{fn} = {{{getattr(self, fn)}}},"
+        return citation_str + "\n}"
+
+    def __repr__(self) -> str:
+        return repr(self.as_str())
+
+    def __str__(self) -> str:
+        return self.as_str()
+
+
+class NullCitation(AbstractCitation):
+    def __str__(self) -> str:
+        return (
+            "No citation available. If you think this is an error, please open a pull"
+            " request."
+        )
+
+
+class JittedFnCitation(AbstractCitation):
+    def __str__(self) -> str:
+        return "Citation not available for jitted objects."
+
+
+@dataclass
+class PhDThesisCitation(AbstractCitation):
+    school: str = None
+    institution: str = None
+    citation_type: str = "phdthesis"
+
+
+@dataclass
+class PaperCitation(AbstractCitation):
+    booktitle: str = None
+    citation_type: str = "inproceedings"
+
+
+@dataclass
+class BookCitation(AbstractCitation):
+    publisher: str = None
+    volume: str = None
+    citation_type: str = "book"
+
+
+####################
+# Default citation
+####################
+@dispatch
+def cite(tree) -> NullCitation:
+    return NullCitation()
+
+
+####################
+# Default citation
+####################
+@dispatch
+def cite(tree: PjitFunction) -> JittedFnCitation:
+    return JittedFnCitation()
+
+
+####################
+# Kernel citations
+####################
+@dispatch
+def cite(tree: MaternKernels) -> PhDThesisCitation:
+    citation = PhDThesisCitation(
+        citation_key="matern1960SpatialV",
+        authors="Bertil Matérn",
+        title=(
+            "Spatial variation : Stochastic models and their application to some"
+            " problems in forest surveys and other sampling investigations"
+        ),
+        year="1960",
+        school="Stockholm University",
+        institution="Stockholm University",
+    )
+    return citation
+
+
+@dispatch
+def cite(tree: ArcCosine) -> PaperCitation:
+    return PaperCitation(
+        citation_key="cho2009kernel",
+        authors="Cho, Youngmin and Saul, Lawrence",
+        title="Kernel Methods for Deep Learning",
+        year="2009",
+        booktitle="Advances in Neural Information Processing Systems",
+    )
+
+
+@dispatch
+def cite(tree: GraphKernel) -> PaperCitation:
+    return PaperCitation(
+        citation_key="borovitskiy2021matern",
+        title="Matérn Gaussian Processes on Graphs",
+        authors=(
+            "Borovitskiy, Viacheslav and Azangulov, Iskander and Terenin, Alexander and"
+            " Mostowsky, Peter and Deisenroth, Marc and Durrande, Nicolas"
+        ),
+        booktitle="International Conference on Artificial Intelligence and Statistics",
+        year="2021",
+    )
+
+
+@dispatch
+def cite(tree: RFF) -> PaperCitation:
+    return PaperCitation(
+        citation_key="rahimi2007random",
+        authors="Rahimi, Ali and Recht, Benjamin",
+        title="Random features for large-scale kernel machines",
+        year="2007",
+        booktitle="Advances in neural information processing systems",
+        citation_type="article",
+    )
+
+
+####################
+# Objective citations
+####################
+@dispatch
+def cite(tree: MLLs) -> BookCitation:
+    return BookCitation(
+        citation_key="rasmussen2006gaussian",
+        title="Gaussian Processes for Machine Learning",
+        authors="Rasmussen, Carl Edward and Williams, Christopher K",
+        year="2006",
+        publisher="MIT press Cambridge, MA",
+        volume="2",
+    )
+
+
+@dispatch
+def cite(tree: CollapsedELBO) -> PaperCitation:
+    return PaperCitation(
+        citation_key="titsias2009variational",
+        title="Variational learning of inducing variables in sparse Gaussian processes",
+        authors="Titsias, Michalis",
+        year="2009",
+        booktitle="International Conference on Artificial Intelligence and Statistics",
+    )
+
+
+@dispatch
+def cite(tree: ELBO) -> PaperCitation:
+    return PaperCitation(
+        citation_key="hensman2013gaussian",
+        title="Gaussian Processes for Big Data",
+        authors="Hensman, James and Fusi, Nicolo and Lawrence, Neil D",
+        year="2013",
+        booktitle="Uncertainty in Artificial Intelligence",
+        citation_type="article",
+    )
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -25,6 +25,7 @@ tensorflow-probability = "^0.19.0"
 orbax-checkpoint = "^0.2.0"
 beartype = "^0.13.1"
 jaxlib = "0.4.7" # Temporary fix: https://github.com/google/jax/issues/15951
+plum-dispatch = "^2.1.0"
 
 [tool.poetry.group.test.dependencies]
 pytest = "^7.2.2"
@@ -160,11 +161,13 @@ convention = "numpy"
 "gpjax/__init__.py" = ['I', 'F401', 'E402', 'D104']
 "gpjax/progress_bar.py" = ["TCH004"]
 "gpjax/scan.py" = ["PLR0913"]
+"gpjax/citation.py" = ["F811"]
 "tests/test_base/test_module.py" = ["PLR0915"]
 "tests/test_linops/test_linear_operator.py" = ["PLR0913"]
 "tests/test_objectives.py" = ["PLR0913"]
 "docs/examples/barycentres.py" = ["PLR0913"]
 
+
 [tool.isort]
 profile = "black"
 combine_as_imports = true
diff --git a/tests/test_citations.py b/tests/test_citations.py

Original file line number	Diff line number	Diff line change
`@@ -25,9 +25,9 @@ @book{rasmussen2006gaussian`
`25`	`25`	`}`
`26`	`26`
`27`	`27`	`@article{hensman2013gaussian,`
`28`		`- title = {Gaussian processes for big data},`
	`28`	`+ title = {{G}aussian processes for big data},`
`29`	`29`	`author = {Hensman, James and Fusi, Nicolo and Lawrence, Neil D},`
`30`		`- journal = {arXiv preprint arXiv:1309.6835},`
	`30`	`+ journal = {Artificial intelligence and statistics},`
`31`	`31`	`year = {2013}`
`32`	`32`	`}`
`33`	`33`