Added log-likelihood, pseudo r2 and pearson chi2 to fepois and tests for the first two against fixest (#1083)

Ariadnaaz · s3alfisc · web-flow · commit fe5b074630ad · 2025-11-26T19:08:07.000+01:00
* Implement log-likelihood, pearson chi2 and pseudo R2

* Included log-likelihood and pseudo R-squared in tests against fixest

* rename and fix tests

* run pre commit

* update changelog

---------

Co-authored-by: Alexander Fischer &lt;alexander-fischer1801@t-online.de&gt;
diff --git a/docs/changelog.qmd b/docs/changelog.qmd
@@ -11,7 +11,12 @@ fit2 = pf.feols("Y ~ X1 + X2", data = df)
 fit3 = pf.feols("Y ~ X1 + X2 | f1", data = df)
 ```
 
-## PyFixest 0.40.0
+## PyFixest 0.41.0 (In Development)
+
+- Adds the following statistics to the `Fepois` class: `_loglik`, `_loglik_null`, `_pseudo_r2`.
+
+
+## PyFixest 0.40.1
 
 ### Breaking Changes for compatibility with `fixest` 0.13
 
diff --git a/pyfixest/estimation/fepois_.py b/pyfixest/estimation/fepois_.py
@@ -5,6 +5,7 @@
 
 import numpy as np
 import pandas as pd
+from scipy.special import gammaln
 
 from pyfixest.errors import (
     NonConvergenceError,
@@ -338,6 +339,20 @@ def get_fit(self) -> None:
         self._u_hat_working = resid
         self._u_hat_response = self._Y - np.exp(eta)
 
+        y = self._Y.flatten()
+        self._y_hat_null = np.full_like(y, np.mean(y), dtype=float)
+
+        self._loglik = np.sum(
+            y * np.log(self._Y_hat_response) - self._Y_hat_response - gammaln(y + 1)
+        )
+        self._loglik_null = np.sum(
+            y * np.log(self._y_hat_null) - self._y_hat_null - gammaln(y + 1)
+        )
+        self._pseudo_r2 = 1 - (self._loglik / self._loglik_null)
+        self._pearson_chi2 = np.sum(
+            (y - self._Y_hat_response) ** 2 / self._Y_hat_response
+        )
+
         self._Y = WZ
         self._X = WX
         self._Z = self._X
diff --git a/tests/test_vs_fixest.py b/tests/test_vs_fixest.py
@@ -555,6 +555,9 @@ def test_single_fit_fepois(data_fepois, dropna, inference, f3_type, fml, k_adj,
     py_df_k = int(mod._df_k)
     py_df_t = int(mod._df_t)
     py_n_coefs = mod.coef().values.size
+    py_loglik = mod._loglik
+    py_loglik_null = mod._loglik_null
+    py_pseudo_r2 = mod._pseudo_r2
 
     df_X1 = _get_r_df(r_fixest)
     ro.globalenv["r_fixest"] = r_fixest
@@ -572,6 +575,9 @@ def test_single_fit_fepois(data_fepois, dropna, inference, f3_type, fml, k_adj,
     r_df_k = int(ro.r('attr(r_fixest$cov.scaled, "df.K")')[0])
     r_df_t = int(ro.r('attr(r_fixest$cov.scaled, "df.t")')[0])
     r_n_coefs = int(df_X1["n_coef"])
+    r_loglik = float(ro.r("r_fixest$loglik"))
+    r_loglik_null = float(ro.r("r_fixest$ll_null"))
+    r_pseudo_r2 = float(ro.r('fixest::r2(r_fixest)["pr2"]'))
 
     if inference == "iid" and k_adj and G_adj:
         check_absolute_diff(py_nobs, r_nobs, 1e-08, "py_nobs != r_nobs")
@@ -600,6 +606,11 @@ def test_single_fit_fepois(data_fepois, dropna, inference, f3_type, fml, k_adj,
     check_absolute_diff(py_tstat, r_tstat, 1e-06, "py_tstat != r_tstat")
     check_absolute_diff(py_confint, r_confint, 1e-06, "py_confint != r_confint")
     check_absolute_diff(py_deviance, r_deviance, 1e-08, "py_deviance != r_deviance")
+    check_absolute_diff(py_loglik, r_loglik, 1e-08, "py_ll != r_loglik")
+    check_absolute_diff(
+        py_loglik_null, r_loglik_null, 1e-08, "py_loglik_null != r_loglik_null"
+    )
+    check_absolute_diff(py_pseudo_r2, r_pseudo_r2, 1e-08, "py_pseudo_r2 != r_pseudo_r2")
 
     if not mod._has_fixef:
         py_predict_response = mod.predict(type="response")