Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion docs/api/energy.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@ In this case, the expectations in the definition of the energy score can be repl
sample means over the ensemble members, yielding the following representation of the energy
score when evaluating an ensemble forecast $F_{ens}$ with $M$ members.

Next to the full version of the energy score, `scoringrules` provides [weighted versions](#weighted-versions) and [estimators with lower computational complexity](#estimators-for-the-energy-score) for the energy score.


::: scoringrules.energy_score

<h2>Weighted versions</h2>
## Weighted versions

The energy score provides a measure of overall forecast performance. However, it is often
the case that certain outcomes are of more interest than others, making it desirable to
Expand Down Expand Up @@ -85,3 +87,11 @@ energy scores can easily be computed for ensemble forecasts by
replacing the expectations with sample means over the ensemble members.

<br/><br/>

## Estimators for the Energy Score

The following two estimators can be used if computational power is scarce respectively the dimension of the forecasting setting is large (Ziel and Berk, 2019)[@ziel2019multivariate]. Note that, on the flip-side, lower computational complexity goes hand-in-hand with lower precision for the approximation of the ES.

::: scoringrules.energy_score_iid

::: scoringrules.energy_score_kband
7 changes: 7 additions & 0 deletions docs/refs.bib
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,10 @@ @article{winkler1972decision
year={1972},
publisher={Taylor \& Francis}
}

@article{ziel2019multivariate,
title={Multivariate forecasting evaluation: On sensitive and strictly proper scoring rules},
author={Ziel, Florian and Berk, Kevin},
journal={arXiv preprint arXiv:1910.07325},
year={2019}
}
4 changes: 4 additions & 0 deletions scoringrules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
)
from scoringrules._energy import (
energy_score,
energy_score_iid,
energy_score_kband,
owenergy_score,
twenergy_score,
vrenergy_score,
Expand Down Expand Up @@ -48,6 +50,8 @@
"brier_score",
"error_spread_score",
"energy_score",
"energy_score_iid",
"energy_score_kband",
"owenergy_score",
"twenergy_score",
"vrenergy_score",
Expand Down
132 changes: 132 additions & 0 deletions scoringrules/_energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,3 +238,135 @@ def vrenergy_score(
return energy.vrnrg(
observations, forecasts, obs_weights, fct_weights, backend=backend
)


def energy_score_iid(
observations: "Array",
forecasts: "Array",
/,
m_axis: int = -2,
v_axis: int = -1,
*,
shuffle: bool = True,
seed: int = 42,
backend: "Backend" = None,
) -> "Array":
r"""Compute the Energy Score for a finite multivariate ensemble via the IID estimator.

The Energy Score is a multivariate scoring rule expressed as

$$\text{ES}^\text{iid}(F_{ens}, \mathbf{y})= \frac{1}{M} \sum_{m=1}^{M} \| \mathbf{x}_{m} -
\mathbf{y} \| - \frac{1}{K} \sum_{m=1}^{K} \| \mathbf{x}_{m} - \mathbf{x}_{K+m} \| $$

where $\mathbf{X}$ are independent samples from $F$, $K = \text{floor}(M / 2)$
and $||\cdot||$ is the euclidean norm over the input dimensions (the variables).

To avoid the influence of possible structure during the generation of the samples, we
shuffle the forecast array once before we calculate the $ES^\text{iid}$.

Parameters
----------
observations: Array
The observed values, where the variables dimension is by default the last axis.
forecasts: Array
The predicted forecast ensemble, where the ensemble dimension is by default
represented by the second last axis and the variables dimension by the last axis.
m_axis: int
The axis corresponding to the ensemble dimension on the forecasts array. Defaults to -2.
v_axis: int
The axis corresponding to the variables dimension on the forecasts array (or the observations
array with an extra dimension on `m_axis`). Defaults to -1.
shuffle: bool
Whether to shuffle the forecasts once along the ensemble axis `m_axis`.
seed: int
The random seed for shuffling.
backend: str
The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'.

Returns
-------
energy_score: Array of shape (...)
The computed Energy Score.
"""
backend = backend if backend is not None else backends._active

B = backends.active if backend is None else backends[backend]

observations, forecasts = multivariate_array_check(
observations, forecasts, m_axis, v_axis, backend=backend
)

if shuffle:
forecasts = B.shuffle(forecasts, axis=m_axis, seed=seed)

if backend == "numba":
return energy._energy_score_iid_gufunc(observations, forecasts)

return energy.iidnrg(observations, forecasts, backend=backend)


def energy_score_kband(
observations: "Array",
forecasts: "Array",
K: int,
/,
m_axis: int = -2,
v_axis: int = -1,
*,
shuffle: bool = True,
seed: int = 42,
backend: "Backend" = None,
) -> "Array":
r"""Compute the Energy Score for a finite multivariate ensemble via the k-Band estimator.

The Energy Score is a multivariate scoring rule expressed as

$$\text{ES}^{k-\text{band}}(F_{ens}, \mathbf{y})= \frac{1}{M} \sum_{m=1}^{M} \| \mathbf{x}_{m} -
\mathbf{y} \| - \frac{1}{MK} \sum_{m=1}^{M} \sum_{k=1}^{K} \| \mathbf{x}_{m} - \mathbf{x}_{m+k} \| $$

where $\mathbf{X}$ are independent samples from $F$, $K$ is a user-choosen integer
and $||\cdot||$ is the euclidean norm over the input dimensions (the variables). Let us note
that we define $\mathbf{x}_{M+k} = \mathbf{x}_{k}$.

To avoid the influence of possible structure during the generation of the samples, we
shuffle the forecast array once before we calculate the $ES^\text{iid}$.

Parameters
----------
observations: Array
The observed values, where the variables dimension is by default the last axis.
forecasts: Array
The predicted forecast ensemble, where the ensemble dimension is by default
represented by the second last axis and the variables dimension by the last axis.
K : int
The number of resamples taken to estimate the energy score.
m_axis: int
The axis corresponding to the ensemble dimension on the forecasts array. Defaults to -2.
v_axis: int
The axis corresponding to the variables dimension on the forecasts array (or the observations
array with an extra dimension on `m_axis`). Defaults to -1.
shuffle: bool
Whether to shuffle the forecasts once along the ensemble axis `m_axis`.
seed: int
The random seed for shuffling.
backend: str
The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'.

Returns
-------
energy_score: Array of shape (...)
The computed Energy Score.
"""
B = backends.active if backend is None else backends[backend]

observations, forecasts = multivariate_array_check(
observations, forecasts, m_axis, v_axis, backend=backend
)

if shuffle:
forecasts = B.shuffle(forecasts, axis=m_axis, seed=seed)

if backend == "numba":
return energy._energy_score_kband_gufunc(observations, forecasts, K)

return energy.kbnrg(observations, forecasts, K, backend=backend)
12 changes: 12 additions & 0 deletions scoringrules/backend/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,18 @@ def max(
) -> "Array":
"""Return the maximum value of an input array ``x``."""

@abc.abstractmethod
def roll(self, x: "Array", shift: int, axis: int | None = None) -> "Array":
"""Roll the values of array ``x`` by shift along axis."""

@abc.abstractmethod
def shuffle(self, x: "Array", axis: int | None = 0, seed: int = 42) -> "Array":
"""
Shuffle the values of array ``x`` by along axis.

Note that the seed is set on the function call. Hence, calling the function multiple times with the default seed will result in identical outputs.
"""

@abc.abstractmethod
def moveaxis(
self,
Expand Down
8 changes: 8 additions & 0 deletions scoringrules/backend/jax.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ def max(
) -> "Array":
return jnp.max(x, axis=axis, keepdims=keepdims)

def roll(self, x: "Array", shift: int, axis: int | None = None) -> "Array":
return jnp.roll(x, shift=shift, axis=axis)

def shuffle(self, x: "Array", axis: int = 0, seed: int = 42) -> "Array":
return jax.random.permutation(
jax.random.key(42), x, axis=axis, independent=False
)

def moveaxis(
self,
x: "Array",
Expand Down
7 changes: 7 additions & 0 deletions scoringrules/backend/numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ def max(
) -> "NDArray":
return np.max(x, axis=axis, keepdims=keepdims)

def roll(self, x: "NDArray", shift: int, axis: int | None = None) -> "NDArray":
return np.roll(x, shift=shift, axis=axis)

def shuffle(self, x: "NDArray", axis: int = 0, seed: int = 42) -> "NDArray":
generator = np.random.default_rng(seed)
return generator.permutation(x, axis=axis)

def moveaxis(
self,
x: "NDArray",
Expand Down
20 changes: 20 additions & 0 deletions scoringrules/backend/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,26 @@ def max(
) -> "Tensor":
return tf.math.reduce_max(x, axis=axis, keepdims=keepdims)

def roll(
self,
x: "Tensor",
shift: int,
axis: int | None = None,
) -> "Tensor":
return tf.roll(x, shift=shift, axis=axis)

def shuffle(self, x: "Tensor", axis: int = 0, seed: int = 42) -> "Tensor":
# tensorflow does not allow to shuffle along a certain axis
# Hence we move the axis to shuffle in the 0st spot, shuffle, move back
tf.random.set_seed(seed)
return tf.experimental.numpy.moveaxis(
tf.random.shuffle(
tf.experimental.numpy.moveaxis(x, source=axis, destination=0)
),
source=0,
destination=axis,
)

def moveaxis(
self,
x: "Tensor",
Expand Down
13 changes: 13 additions & 0 deletions scoringrules/backend/torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,19 @@ def max(
) -> "Tensor":
return torch.max(x, axis=axis, keepdim=keepdims)

def roll(self, x: "Tensor", shift: int, axis: int | None = None) -> "Tensor":
torch.roll(x, shifts=shift, dims=axis)

def shuffle(self, x: "Tensor", axis: int = 0, seed: int = 42) -> "Tensor":
torch.manual_seed(seed=seed)
return torch.moveaxis(
torch.moveaxis(x, source=axis, destination=0)[
torch.randperm(x.shape[axis])
],
source=0,
destination=axis,
)

def moveaxis(
self,
x: "Tensor",
Expand Down
8 changes: 8 additions & 0 deletions scoringrules/core/energy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,25 @@
from ._gufuncs import (
_energy_score_gufunc,
_energy_score_iid_gufunc,
_energy_score_kband_gufunc,
_owenergy_score_gufunc,
_vrenergy_score_gufunc,
)
from ._score import energy_score as nrg
from ._score import energy_score_iid as iidnrg
from ._score import energy_score_kband as kbnrg
from ._score import owenergy_score as ownrg
from ._score import vrenergy_score as vrnrg

__all__ = [
"nrg",
"ownrg",
"vrnrg",
"iidnrg",
"kbnrg",
"_energy_score_gufunc",
"_owenergy_score_gufunc",
"_vrenergy_score_gufunc",
"_energy_score_kband_gufunc",
"_energy_score_iid_gufunc",
]
52 changes: 52 additions & 0 deletions scoringrules/core/energy/_gufuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,55 @@ def _vrenergy_score_gufunc(
wabs_y = np.linalg.norm(obs) * ow

out[0] = e_1 / M - 0.5 * e_2 / (M**2) + (wabs_x - wabs_y) * (wbar - ow)


@guvectorize(
[
"void(float32[:], float32[:,:], float32[:])",
"void(float64[:], float64[:,:], float64[:])",
],
"(d), (m, d)->()",
)
def _energy_score_iid_gufunc(
obs: np.ndarray,
fct: np.ndarray,
out: np.ndarray,
):
"""IID estimator for the energy score."""
M = fct.shape[0]
K = int(M // 2)
e_1 = 0
e_2 = 0
for i in range(M):
e_1 += float(np.linalg.norm(fct[i] - obs))
for i in range(K):
e_2 += float(np.linalg.norm(fct[i] - fct[K + i]))
out[0] = e_1 / M - 0.5 / K * e_2


@guvectorize(
[
"void(float32[:], float32[:,:], int32, float32[:])",
"void(float64[:], float64[:,:], int64, float64[:])",
],
"(d), (m, d), ()->()",
)
def _energy_score_kband_gufunc(
obs: np.ndarray,
fct: np.ndarray,
K: int,
out: np.ndarray,
):
"""K-Band estimator for the energy score."""
M = fct.shape[0]

e_1 = 0
e_2 = 0

for i in range(M):
e_1 += float(np.linalg.norm(fct[i] - obs))
for k in range(1, K + 1):
idx_rolled = np.roll(np.arange(M), k)
for i in range(M):
e_2 += float(np.linalg.norm(fct[i] - fct[idx_rolled[i]]))
out[0] = e_1 / M - 0.5 / (M * K) * e_2
Loading