Skip to content

Commit 05f44bd

Browse files
authored
Fix Bug in Resid / Predict when Model is Emtpy (#597)
* flatten residual * add tests for empty models * bump version, build lock file * udpate tests * add test typo * update tests * fix test typo * cleanups * fix resid for weights * only test predict when fepois without fixef * fix bug in predict method with WLS
1 parent 5ae158a commit 05f44bd

File tree

7 files changed

+145
-92
lines changed

7 files changed

+145
-92
lines changed

.coverage

-52 KB
Binary file not shown.

poetry.lock

Lines changed: 83 additions & 58 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyfixest/estimation/FixestMulti_.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ def _estimate_all_models(
329329
# special case: sometimes it is useful to fit models as
330330
# "Y ~ 0 | f1 + f2" to demean Y and to use the predict() method
331331
if FIT._X_is_empty:
332-
FIT._u_hat = Y.to_numpy() - Yd_array
332+
FIT._u_hat = Yd_array
333333
else:
334334
FIT.get_fit()
335335

pyfixest/estimation/feols_.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1465,7 +1465,7 @@ def predict(
14651465
)
14661466

14671467
if newdata is None:
1468-
return self._Y_untransformed.to_numpy().flatten() - self._u_hat.flatten()
1468+
return self._Y_untransformed.to_numpy().flatten() - self.resid()
14691469

14701470
newdata = _polars_to_pandas(newdata).reset_index(drop=False)
14711471

@@ -1803,7 +1803,7 @@ def resid(self) -> np.ndarray:
18031803
np.ndarray
18041804
A np.ndarray with the residuals of the estimated regression model.
18051805
"""
1806-
return self._u_hat
1806+
return self._u_hat.flatten() / np.sqrt(self._weights).flatten()
18071807

18081808
def ritest(
18091809
self,

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "pyfixest"
3-
version = "0.24.1"
3+
version = "0.24.2"
44

55
description = "Fast high dimensional fixed effect estimation following syntax of the fixest R package. Supports OLS, IV and Poisson regression and a range of inference procedures (HC1-3, CRV1 & CRV3, wild bootstrap, randomization inference, simultaneous CIs, Romano-Wolf's multiple testing correction). Additionally, supports (some of) the regression based new Difference-in-Differences Estimators (Did2s, Linear Projections)."
66
authors = ["Alexander Fischer <[email protected]>", "Styfen Schär"]

tests/test_vs_fixest.py

Lines changed: 56 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,11 @@
8181
("Y~X1|f2^f3"),
8282
("Y~X1|f1 + f2^f3"),
8383
("Y~X1|f2^f3^f1"),
84+
# empty models
85+
("Y ~ 1 | f1"),
86+
("Y ~ 1 | f1 + f2"),
87+
("Y ~ 0 | f1"),
88+
("Y ~ 0 | f1 + f2"),
8489
]
8590

8691
iv_fmls = [
@@ -122,15 +127,11 @@ def check_absolute_diff(x1, x2, tol, msg=None):
122127
@pytest.mark.parametrize("error_type", ["2"])
123128
@pytest.mark.parametrize("dropna", [False])
124129
@pytest.mark.parametrize("inference", ["iid", "hetero", {"CRV1": "group_id"}])
125-
# @pytest.mark.parametrize("inference", ["iid", {"CRV1": "group_id"}])
126130
@pytest.mark.parametrize("weights", [None, "weights"])
127131
@pytest.mark.parametrize("f3_type", ["str", "object", "int", "categorical", "float"])
128132
@pytest.mark.parametrize("fml", ols_fmls + ols_but_not_poisson_fml)
129133
@pytest.mark.parametrize("adj", [False, True])
130-
# see here for why not test against cluster_adj = True
131-
# it triggers the N / (N-1) correction, not sure why
132-
# https://github.com/lrberge/fixest/issues/518#issuecomment-2227365516
133-
@pytest.mark.parametrize("cluster_adj", [False])
134+
@pytest.mark.parametrize("cluster_adj", [False, True])
134135
def test_single_fit_feols(
135136
N,
136137
seed,
@@ -197,9 +198,8 @@ def test_single_fit_feols(
197198
py_confint = mod.confint().xs("X1").values
198199
py_nobs = mod._N
199200
py_vcov = mod._vcov[0, 0]
200-
201-
py_resid = mod._u_hat.flatten() # noqa: F841
202-
# TODO: test residuals
201+
py_resid = mod.resid()
202+
py_predict = mod.predict()
203203

204204
df_X1 = _get_r_df(r_fixest)
205205

@@ -209,16 +209,32 @@ def test_single_fit_feols(
209209
r_tstat = df_X1["statistic"]
210210
r_confint = df_X1[["conf.low", "conf.high"]].values.astype(np.float64)
211211
r_nobs = int(stats.nobs(r_fixest)[0])
212-
r_resid = r_fixest.rx2("working_residuals") # noqa: F841
213212
r_vcov = stats.vcov(r_fixest)[0, 0]
213+
r_resid = stats.residuals(r_fixest)
214+
r_predict = stats.predict(r_fixest)
215+
216+
if not mod._X_is_empty:
217+
if inference == "iid" and adj and cluster_adj:
218+
check_absolute_diff(py_nobs, r_nobs, 1e-08, "py_nobs != r_nobs")
219+
check_absolute_diff(py_coef, r_coef, 1e-08, "py_coef != r_coef")
220+
221+
check_absolute_diff(py_vcov, r_vcov, 1e-08, "py_vcov != r_vcov")
222+
check_absolute_diff(py_se, r_se, 1e-08, "py_se != r_se")
223+
check_absolute_diff(py_pval, r_pval, 1e-08, "py_pval != r_pval")
224+
check_absolute_diff(py_tstat, r_tstat, 1e-07, "py_tstat != r_tstat")
225+
check_absolute_diff(py_confint, r_confint, 1e-08, "py_confint != r_confint")
226+
227+
# residuals invariant so to vcov type
228+
if inference == "iid" and adj and not cluster_adj:
229+
check_absolute_diff(
230+
(py_resid)[0:5], (r_resid)[0:5], 1e-07, "py_resid != r_resid"
231+
)
232+
check_absolute_diff(
233+
py_predict[0:5], r_predict[0:5], 1e-07, "py_predict != r_predict"
234+
)
214235

215-
check_absolute_diff(py_nobs, r_nobs, 1e-08, "py_nobs != r_nobs")
216-
check_absolute_diff(py_coef, r_coef, 1e-08, "py_coef != r_coef")
217-
check_absolute_diff(py_vcov, r_vcov, 1e-08, "py_vcov != r_vcov")
218-
check_absolute_diff(py_se, r_se, 1e-08, "py_se != r_se")
219-
check_absolute_diff(py_pval, r_pval, 1e-08, "py_pval != r_pval")
220-
check_absolute_diff(py_tstat, r_tstat, 1e-07, "py_tstat != r_tstat")
221-
check_absolute_diff(py_confint, r_confint, 1e-08, "py_confint != r_confint")
236+
if mod._X_is_empty:
237+
assert mod._beta_hat.size == 0
222238

223239
if not weights:
224240
py_r2 = mod._r2
@@ -296,9 +312,7 @@ def test_single_fit_fepois(
296312
py_nobs = mod._N
297313
py_vcov = mod._vcov[0, 0]
298314
py_deviance = mod.deviance
299-
300-
py_resid = mod._u_hat.flatten() # noqa: F841
301-
# TODO: test residuals
315+
py_resid = mod.resid()
302316

303317
df_X1 = _get_r_df(r_fixest)
304318

@@ -308,19 +322,29 @@ def test_single_fit_fepois(
308322
r_tstat = df_X1["statistic"]
309323
r_confint = df_X1[["conf.low", "conf.high"]].values.astype(np.float64)
310324
r_nobs = int(stats.nobs(r_fixest)[0])
311-
r_resid = r_fixest.rx2("working_residuals") # noqa: F841
325+
r_resid = stats.residuals(r_fixest)
312326
r_vcov = stats.vcov(r_fixest)[0, 0]
313327
r_deviance = r_fixest.rx2("deviance")
314328

315-
check_absolute_diff(py_nobs, r_nobs, 1e-08, "py_nobs != r_nobs")
316-
check_absolute_diff(py_coef, r_coef, 1e-08, "py_coef != r_coef")
329+
if inference == "iid" and adj and cluster_adj:
330+
check_absolute_diff(py_nobs, r_nobs, 1e-08, "py_nobs != r_nobs")
331+
check_absolute_diff(py_coef, r_coef, 1e-08, "py_coef != r_coef")
332+
check_absolute_diff((py_resid)[0:5], (r_resid)[0:5], 1e-07, "py_coef != r_coef")
333+
317334
check_absolute_diff(py_vcov, r_vcov, 1e-06, "py_vcov != r_vcov")
318335
check_absolute_diff(py_se, r_se, 1e-06, "py_se != r_se")
319336
check_absolute_diff(py_pval, r_pval, 1e-06, "py_pval != r_pval")
320337
check_absolute_diff(py_tstat, r_tstat, 1e-06, "py_tstat != r_tstat")
321338
check_absolute_diff(py_confint, r_confint, 1e-06, "py_confint != r_confint")
322339
check_absolute_diff(py_deviance, r_deviance, 1e-08, "py_deviance != r_deviance")
323340

341+
if not mod._has_fixef:
342+
py_predict = mod.predict()
343+
r_predict = stats.predict(r_fixest)
344+
check_absolute_diff(
345+
py_predict[0:5], r_predict[0:5], 1e-07, "py_predict != r_predict"
346+
)
347+
324348

325349
@pytest.mark.parametrize("N", [1000])
326350
@pytest.mark.parametrize("seed", [76540251])
@@ -398,9 +422,8 @@ def test_single_fit_iv(
398422
py_confint = mod.confint().xs("X1").values
399423
py_nobs = mod._N
400424
py_vcov = mod._vcov[0, 0]
401-
402-
py_resid = mod._u_hat.flatten() # noqa: F841
403-
# TODO: test residuals
425+
py_resid = mod.resid()
426+
py_predict = mod.predict()
404427

405428
df_X1 = _get_r_df(r_fixest)
406429

@@ -410,11 +433,16 @@ def test_single_fit_iv(
410433
r_tstat = df_X1["statistic"]
411434
r_confint = df_X1[["conf.low", "conf.high"]].values.astype(np.float64)
412435
r_nobs = int(stats.nobs(r_fixest)[0])
413-
r_resid = r_fixest.rx2("working_residuals") # noqa: F841
436+
r_resid = stats.resid(r_fixest)
437+
r_predict = stats.predict(r_fixest)
414438
r_vcov = stats.vcov(r_fixest)[0, 0]
415439

416-
check_absolute_diff(py_nobs, r_nobs, 1e-08, "py_nobs != r_nobs")
417-
check_absolute_diff(py_coef, r_coef, 1e-08, "py_coef != r_coef")
440+
if inference == "iid" and adj and cluster_adj:
441+
check_absolute_diff(py_nobs, r_nobs, 1e-08, "py_nobs != r_nobs")
442+
check_absolute_diff(py_coef, r_coef, 1e-08, "py_coef != r_coef")
443+
check_absolute_diff(py_predict[0:5], r_predict[0:5], 1e-07, "py_coef != r_coef")
444+
check_absolute_diff((py_resid)[0:5], (r_resid)[0:5], 1e-07, "py_coef != r_coef")
445+
418446
check_absolute_diff(py_vcov, r_vcov, 1e-07, "py_vcov != r_vcov")
419447
check_absolute_diff(py_se, r_se, 1e-07, "py_se != r_se")
420448
check_absolute_diff(py_pval, r_pval, 1e-06, "py_pval != r_pval")

tests/texfiles/test.tex

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
\midrule
1313
f1 & x & x \\
1414
\midrule
15-
$R^2$ & 0.489 & - \\
16-
S.E. type & by: f1 & by: f1+f2 \\
1715
Observations & 1994 & 997 \\
16+
S.E. type & by: f1 & by: f1+f2 \\
17+
$R^2$ & 0.489 & - \\
1818
\bottomrule
1919
\end{tabular}
2020
\footnotesize Significance levels: $*$ p $<$ 0.05, $**$ p $<$ 0.01, $***$ p $<$ 0.001. Format of coefficient cell: Coefficient

0 commit comments

Comments
 (0)