Skip to content

Commit e91c71a

Browse files
authored
Merge pull request #474 from bashtage/forecast-exog
ENH: Allow forecasting with exogenous variables
2 parents c6b0574 + 91142fe commit e91c71a

File tree

11 files changed

+1407
-72
lines changed

11 files changed

+1407
-72
lines changed

.github/workflows/generate-documentation.yml

+1
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ jobs:
4040
run: |
4141
pushd doc
4242
make html
43+
make html
4344
popd
4445
- name: Deploy documentation
4546
env:

arch/bootstrap/multiple_comparison.py

-13
Original file line numberDiff line numberDiff line change
@@ -379,11 +379,6 @@ class StepM(MultipleComparison):
379379
studentization. Default is False. Note that this can be slow since
380380
the procedure requires k extra bootstraps.
381381
382-
References
383-
----------
384-
Romano, J. P., & Wolf, M. (2005). "Stepwise multiple testing as formalized
385-
data snooping." Econometrica, 73(4), 1237-1282.
386-
387382
Notes
388383
-----
389384
The size controls the Family Wise Error Rate (FWER) since this is a
@@ -526,14 +521,6 @@ class SPA(MultipleComparison, metaclass=DocStringInheritor):
526521
studentization. Default is False. Note that this can be slow since
527522
the procedure requires k extra bootstraps.
528523
529-
References
530-
----------
531-
White, H. (2000). "A reality check for data snooping." Econometrica 68,
532-
no. 5, 1097-1126.
533-
534-
Hansen, P. R. (2005). "A test for superior predictive ability."
535-
Journal of Business & Economic Statistics, 23(4)
536-
537524
Notes
538525
-----
539526
The three p-value correspond to different re-centering decisions.

arch/tests/test_examples.py

+14
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,20 @@
1616

1717
kernels = jupyter_client.kernelspec.find_kernel_specs()
1818
SKIP = False
19+
20+
if sys.platform.startswith("win") and sys.version_info >= (3, 8): # pragma: no cover
21+
import asyncio
22+
23+
try:
24+
from asyncio import WindowsSelectorEventLoopPolicy
25+
except ImportError:
26+
pass # Can't assign a policy which doesn't exist.
27+
else:
28+
if not isinstance(
29+
asyncio.get_event_loop_policy(), WindowsSelectorEventLoopPolicy
30+
):
31+
asyncio.set_event_loop_policy(WindowsSelectorEventLoopPolicy())
32+
1933
except ImportError: # pragma: no cover
2034
pytestmark = pytest.mark.skip(reason="Required packages not available")
2135

arch/tests/univariate/test_forecast.py

+182-24
Original file line numberDiff line numberDiff line change
@@ -94,15 +94,19 @@ def setup_class(cls):
9494

9595
def test_ar_forecasting(self):
9696
params = np.array([0.9])
97-
forecasts = _ar_forecast(self.zero_mean, 5, 0, 0.0, params)
97+
forecasts = _ar_forecast(
98+
self.zero_mean, 5, 0, 0.0, params, np.empty(0), np.empty(0)
99+
)
98100
expected = np.zeros((1000, 5))
99101
expected[:, 0] = 0.9 * self.zero_mean.values
100102
for i in range(1, 5):
101103
expected[:, i] = 0.9 * expected[:, i - 1]
102104
assert_allclose(forecasts, expected)
103105

104106
params = np.array([0.5, -0.3, 0.2])
105-
forecasts = _ar_forecast(self.zero_mean, 5, 2, 0.0, params)
107+
forecasts = _ar_forecast(
108+
self.zero_mean, 5, 2, 0.0, params, np.empty(0), np.empty(0)
109+
)
106110
expected = np.zeros((998, 8))
107111
expected[:, 0] = self.zero_mean.iloc[0:-2]
108112
expected[:, 1] = self.zero_mean.iloc[1:-1]
@@ -395,6 +399,15 @@ def test_fit_options(self):
395399
res = am.fit(first_obs=100)
396400
res.forecast(reindex=False)
397401

402+
def test_ar1_forecast_simulation_one(self):
403+
# Bug found when simulation=1
404+
am = arch_model(self.ar1, mean="AR", vol="GARCH", lags=[1])
405+
res = am.fit(disp="off")
406+
forecast = res.forecast(
407+
horizon=10, method="simulation", reindex=False, simulations=1
408+
)
409+
assert forecast.simulations.variances.shape == (1, 1, 10)
410+
398411
def test_ar1_forecast_simulation(self):
399412
am = arch_model(self.ar1, mean="AR", vol="GARCH", lags=[1])
400413
res = am.fit(disp="off")
@@ -584,28 +597,6 @@ def test_holdback_ar(self):
584597
res = mod.fit(disp="off")
585598
assert_allclose(res_holdback.params, res.params, rtol=1e-4, atol=1e-4)
586599

587-
def test_forecast_exogenous_regressors(self):
588-
y = 10 * self.rng.randn(1000, 1)
589-
x = self.rng.randn(1000, 2)
590-
am = HARX(y=y, x=x, lags=[1, 2])
591-
res = am.fit()
592-
fcasts = res.forecast(horizon=1, start=1, reindex=False)
593-
594-
const, har01, har02, ex0, ex1, _ = res.params
595-
y_01 = y[1:-1]
596-
y_02 = (y[1:-1] + y[0:-2]) / 2
597-
x0 = x[2:, :1]
598-
x1 = x[2:, 1:2]
599-
direct = const + har01 * y_01 + har02 * y_02 + ex0 * x0 + ex1 * x1
600-
direct = np.vstack(([[np.nan]], direct, [[np.nan]]))
601-
direct = pd.DataFrame(direct, columns=["h.1"])
602-
assert_allclose(np.asarray(direct)[1:], fcasts.mean)
603-
604-
fcasts2 = res.forecast(horizon=2, start=1, reindex=False)
605-
assert fcasts2.mean.shape == (999, 2)
606-
assert fcasts2.mean.isnull().all()["h.2"]
607-
assert_frame_equal(fcasts.mean, fcasts2.mean[["h.1"]])
608-
609600

610601
@pytest.mark.slow
611602
@pytest.mark.parametrize("first_obs", [None, 250])
@@ -672,3 +663,170 @@ def test_arx_no_lags():
672663
res = mod.fit(disp="off")
673664
assert res.params.shape[0] == 4
674665
assert "lags" not in mod._model_description(include_lags=False)
666+
667+
668+
EXOG_PARAMS = product(
669+
["pandas", "dict", "numpy"], [(10,), (1, 10), (1, 1, 10), (2, 1, 10)], [True, False]
670+
)
671+
672+
673+
@pytest.fixture(scope="function", params=EXOG_PARAMS)
674+
def exog_format(request):
675+
xtyp, shape, full = request.param
676+
rng = RandomState(123456)
677+
x_fcast = rng.standard_normal(shape)
678+
orig = x_fcast.copy()
679+
nobs = SP500.shape[0]
680+
if full:
681+
if x_fcast.ndim == 2:
682+
_x = np.full((nobs, shape[1]), np.nan)
683+
_x[-1:] = x_fcast
684+
x_fcast = _x
685+
elif x_fcast.ndim == 3:
686+
_x = np.full((shape[0], nobs, shape[-1]), np.nan)
687+
_x[:, -1:] = x_fcast
688+
x_fcast = _x
689+
else:
690+
# No full 1d
691+
return None, None
692+
if xtyp == "pandas":
693+
if x_fcast.ndim == 3:
694+
return None, None
695+
if x_fcast.ndim == 1:
696+
x_fcast = pd.Series(x_fcast)
697+
else:
698+
x_fcast = pd.DataFrame(x_fcast)
699+
x_fcast.index = SP500.index[-x_fcast.shape[0] :]
700+
elif xtyp == "dict":
701+
if x_fcast.ndim == 3:
702+
keys = [f"x{i}" for i in range(1, x_fcast.shape[0] + 1)]
703+
x_fcast = {k: x_fcast[i] for i, k in enumerate(keys)}
704+
else:
705+
x_fcast = {"x1": x_fcast}
706+
return x_fcast, orig
707+
708+
709+
def test_x_reformat_1var(exog_format):
710+
# (10,)
711+
# (1,10)
712+
# (n, 10)
713+
# (1,1,10)
714+
# (1,n,10)
715+
# {"x1"} : (10,)
716+
# {"x1"} : (1,10)
717+
# {"x1"} : (n,10)
718+
exog, ref = exog_format
719+
if exog is None:
720+
return
721+
if isinstance(exog, dict):
722+
nexog = len(exog)
723+
else:
724+
if np.ndim(exog) == 3:
725+
nexog = exog.shape[0]
726+
else:
727+
nexog = 1
728+
cols = [f"x{i}" for i in range(1, nexog + 1)]
729+
rng = RandomState(12345)
730+
x = pd.DataFrame(
731+
rng.standard_normal((SP500.shape[0], nexog)), columns=cols, index=SP500.index
732+
)
733+
mod = ARX(SP500, lags=1, x=x)
734+
res = mod.fit()
735+
fcasts = res.forecast(horizon=10, x=exog, reindex=False)
736+
ref = res.forecast(horizon=10, x=ref, reindex=False)
737+
assert_allclose(fcasts.mean, ref.mean)
738+
739+
740+
@pytest.mark.parametrize("nexog", [1, 2])
741+
def test_x_forecasting(nexog):
742+
rng = RandomState(12345)
743+
mod = arch_model(None, mean="ARX", lags=2)
744+
data = mod.simulate([0.1, 1.2, -0.6, 0.1, 0.1, 0.8], nobs=1000)
745+
cols = [f"x{i}" for i in range(1, nexog + 1)]
746+
x = pd.DataFrame(
747+
rng.standard_normal((data.data.shape[0], nexog)),
748+
columns=cols,
749+
index=data.data.index,
750+
)
751+
b = np.array([0.25, 0.5]) if x.shape[1] == 2 else np.array([0.25])
752+
y = data.data + x @ b
753+
y.name = "y"
754+
mod = arch_model(y, x, mean="ARX", lags=2)
755+
res = mod.fit(disp="off")
756+
x_fcast = np.zeros((x.shape[1], 1, 10))
757+
for i in range(x_fcast.shape[0]):
758+
x_fcast[i] = np.arange(100 * i, 100 * i + 10)
759+
760+
forecasts = res.forecast(x=x_fcast, horizon=10, reindex=False)
761+
direct = np.zeros(12)
762+
direct[:2] = y.iloc[-2:]
763+
p0, p1, p2 = res.params[:3]
764+
b0 = res.params[3]
765+
b1 = res.params[4] if x.shape[1] == 2 else 0.0
766+
for i in range(10):
767+
direct[i + 2] = p0 + p1 * direct[i + 1] + p2 * direct[i]
768+
direct[i + 2] += b0 * (i)
769+
direct[i + 2] += b1 * (100 + i)
770+
assert_allclose(forecasts.mean.iloc[0], direct[2:])
771+
772+
773+
@pytest.mark.parametrize("nexog", [1, 2])
774+
def test_x_forecasting_simulation_smoke(nexog):
775+
rng = RandomState(12345)
776+
mod = arch_model(None, mean="ARX", lags=2)
777+
data = mod.simulate([0.1, 1.2, -0.6, 0.1, 0.1, 0.8], nobs=1000)
778+
cols = [f"x{i}" for i in range(1, nexog + 1)]
779+
x = pd.DataFrame(
780+
rng.standard_normal((data.data.shape[0], nexog)),
781+
columns=cols,
782+
index=data.data.index,
783+
)
784+
b = np.array([0.25, 0.5]) if x.shape[1] == 2 else np.array([0.25])
785+
y = data.data + x @ b
786+
y.name = "y"
787+
mod = arch_model(y, x, mean="ARX", lags=2)
788+
res = mod.fit(disp="off")
789+
x_fcast = np.zeros((x.shape[1], 1, 10))
790+
for i in range(x_fcast.shape[0]):
791+
x_fcast[i] = np.arange(100 * i, 100 * i + 10)
792+
793+
res.forecast(
794+
x=x_fcast, horizon=10, reindex=False, method="simulation", simulations=10
795+
)
796+
797+
798+
def test_x_exceptions():
799+
res = ARX(SP500, lags=1).fit(disp="off")
800+
with pytest.raises(TypeError, match="x is not None but"):
801+
res.forecast(reindex=False, x=SP500)
802+
x = SP500.copy()
803+
x[:] = np.random.standard_normal(SP500.shape)
804+
res = ARX(SP500, lags=1, x=x).fit(disp="off")
805+
with pytest.raises(TypeError, match="x is None but the model"):
806+
res.forecast(reindex=False)
807+
res = ARX(SP500, lags=1, x=x).fit(disp="off")
808+
with pytest.raises(ValueError, match="x must have the same"):
809+
res.forecast(reindex=False, x={})
810+
with pytest.raises(ValueError, match="x must have the same"):
811+
res.forecast(reindex=False, x={"x0": x, "x1": x})
812+
with pytest.raises(KeyError, match="The keys of x must exactly"):
813+
res.forecast(reindex=False, x={"z": x})
814+
with pytest.raises(ValueError, match="The arrays contained in the dictionary"):
815+
_x = np.asarray(x).reshape((1, x.shape[0], 1))
816+
res.forecast(reindex=False, x={"x0": _x})
817+
x2 = pd.concat([x, x], 1)
818+
x2.columns = ["x0", "x1"]
819+
x2.iloc[:, 1] = np.random.standard_normal(SP500.shape)
820+
res = ARX(SP500, lags=1, x=x2).fit(disp="off")
821+
with pytest.raises(ValueError, match="The shapes of the arrays contained"):
822+
res.forecast(reindex=False, x={"x0": x2.iloc[:, 0], "x1": x2.iloc[10:, 1:]})
823+
with pytest.raises(ValueError, match="1- and 2-dimensional x values"):
824+
res.forecast(reindex=False, x=x2)
825+
with pytest.raises(ValueError, match="The leading dimension of x"):
826+
_x2 = np.asarray(x2)
827+
_x2 = _x2.reshape((1, -1, 2))
828+
res.forecast(reindex=False, x=_x2)
829+
with pytest.raises(ValueError, match="The number of values passed"):
830+
res.forecast(reindex=False, x=np.empty((2, SP500.shape[0], 3)))
831+
with pytest.raises(ValueError, match="The shape of x does not satisfy the"):
832+
res.forecast(reindex=False, x=np.empty((2, SP500.shape[0] // 2, 1)))

0 commit comments

Comments
 (0)