update qplot

s3alfisc · s3alfisc · commit 33f57bbf1dd3 · 2025-05-20T21:30:16.000+02:00
diff --git a/pyfixest/estimation/quantreg_.py b/pyfixest/estimation/quantreg_.py
@@ -89,20 +89,42 @@ def to_array(self):
 
     def get_fit(self) -> None:
         """Fit a quantile regression model using the interior point method."""
-        self._beta_hat = self.fit_qreg(X=self._X, Y=self._Y, q=self._quantile)
+        self._beta_hat = self.fit_qreg_fn(X=self._X, Y=self._Y, q=self._quantile)
         self._u_hat = self._Y.flatten() - self._X @ self._beta_hat
         self._hessian = self._X.T @ self._X
         self._bread = np.linalg.inv(self._hessian)
 
-    def fit_qreg_ip(
+    def fit_qreg_fn(self, X: np.ndarray, Y: np.ndarray, q: float) -> np.ndarray:
+        """Fit a quantile regression model using the Frisch-Newton Interior Point Solver."""
+        N, _ = X.shape
+
+        beta_hat, has_converged = frisch_newton_solver(
+            A=X.T,
+            b=(1 - q) * X.T @ np.ones(N),
+            c=-Y,
+            u=np.ones(N),
+            q=q,
+            tol=1e-6,
+            max_iter=50,
+            backoff=0.9995,
+        )
+
+        if not has_converged:
+            warnings.warn(
+                "The Frisch-Newton Interior Point solver has not converged after 50 iterations."
+            )
+
+        return -beta_hat.flatten()
+
+    def fit_qreg_pfn(
         self,
         X: np.ndarray,
         Y: np.ndarray,
         q: float,
         rng: np.random.Generator,
         beta_init: Optional[np.ndarray] = None,
     ) -> np.ndarray:
-        """Fit a quantile regression model using the interior point method."""
+        """Fit a quantile regression model using preprocessing and the Frisch-Newton Interior Point Solver."""
         N, k = self._X.shape
         has_converged = False
         compute_beta_init = beta_init is None
@@ -118,7 +140,7 @@ def fit_qreg_ip(
             if compute_beta_init:
                 # get initial sample
                 idx_init = rng.choice(N, size=n_init, replace=False)
-                beta_hat_init = self.fit_qreg(X[idx_init, :], Y[idx_init], q=q)
+                beta_hat_init = self.fit_qreg_fn(X[idx_init, :], Y[idx_init], q=q)
 
             else:
                 beta_hat_init = beta_init
@@ -151,7 +173,7 @@ def fit_qreg_ip(
 
             while not has_converged and n_bad_fixups < max_bad_fixups:
                 # solve the modified problem
-                beta_hat = self.fit_qreg(X=X_sub, Y=Y_sub, q=q)
+                beta_hat = self.fit_qreg_fn(X=X_sub, Y=Y_sub, q=q)
                 r = Y.flatten() - X @ beta_hat
 
                 # count wrong predictions and get their indices
@@ -173,28 +195,6 @@ def fit_qreg_ip(
 
         return beta_hat
 
-    def fit_qreg(self, X: np.ndarray, Y: np.ndarray, q: float) -> np.ndarray:
-        """Fit a quantile regression model and return the coefficients."""
-        N, k = X.shape
-
-        beta_hat, has_converged = frisch_newton_solver(
-            A=X.T,
-            b=(1 - q) * X.T @ np.ones(N),
-            c=-Y,
-            u=np.ones(N),
-            q=q,
-            tol=1e-6,
-            max_iter=50,
-            backoff=0.9995,
-        )
-
-        if not has_converged:
-            warnings.warn(
-                "The Frisch-Newton Interior Point solver has not converged after 50 iterations."
-            )
-
-        return -beta_hat.flatten()
-
     def _vcov_iid(self) -> np.ndarray:
         raise NotImplementedError(
             """vcov = 'iid' for quantile regression is not yet implemented. "
@@ -205,11 +205,11 @@ def _vcov_iid(self) -> np.ndarray:
     def _vcov_nid(self) -> np.ndarray:
         "Compute nonparametric IID (NID) vcov matrix using the Hall-Sheather bandwidth."
         h = get_hall_sheather_bandwidth(q=self._quantile, N=self._N)
-        beta_hat_plus = self.fit_qreg(X=self._X, Y=self._Y, q=self._quantile + h)
-        # beta_hat_plus = self.fit_qreg_ip(X = self._X, Y = self._Y, q = self._quantile + h, rng = self._rng)
+        beta_hat_plus = self.fit_qreg_fn(X=self._X, Y=self._Y, q=self._quantile + h)
+        # beta_hat_plus = self.fit_qreg_pfn(X = self._X, Y = self._Y, q = self._quantile + h, rng = self._rng)
         yhat_plus = self._X @ beta_hat_plus
-        beta_hat_minus = self.fit_qreg(X=self._X, Y=self._Y, q=self._quantile - h)
-        # beta_hat_minus = self.fit_qreg_ip(X = self._X, Y = self._Y, q = self._quantile - h, rng = self._rng)
+        beta_hat_minus = self.fit_qreg_fn(X=self._X, Y=self._Y, q=self._quantile - h)
+        # beta_hat_minus = self.fit_qreg_pfn(X = self._X, Y = self._Y, q = self._quantile - h, rng = self._rng)
         yhat_minus = self._X @ beta_hat_minus
 
         s = (yhat_plus - yhat_minus) / (2 * h)
@@ -269,7 +269,7 @@ def frisch_newton_solver(
     b = b.flatten()
     u = u.flatten()
 
-    x = (1 - 0.5) * np.ones(n)
+    x = (1 - q) * np.ones(n)
     s = u - x
     d = c.copy()
     d_plus = np.maximum(d, 0)
@@ -285,7 +285,6 @@ def frisch_newton_solver(
 
     # 6) Quick sanity checks (optional)
     if True:
-        # import pdb; pdb.set_trace()
         assert np.all(z > 0)
         assert np.all(x > 0)
         assert np.all(s > 0)
@@ -331,8 +330,8 @@ def step_length(a: tuple, b: tuple, backoff: float = 0.9995):
         dw_aff = -w - (w / s) * ds_aff
 
         # Step lengths (eq. (9))
-        alpha_p_aff = step_length(a=(x, dx_aff), b=(s, ds_aff))
-        alpha_d_aff = step_length(a=(z, dz_aff), b=(w, dw_aff))
+        alpha_p_aff = step_length(a=(x, dx_aff), b=(s, ds_aff), backoff=backoff)
+        alpha_d_aff = step_length(a=(z, dz_aff), b=(w, dw_aff), backoff=backoff)
 
         # 6) Compute mu_new  and centering sigma  (eq (10))
         x_pred = x + alpha_p_aff * dx_aff
@@ -359,8 +358,8 @@ def step_length(a: tuple, b: tuple, backoff: float = 0.9995):
         dw_cor = -(w / s) * ds_cor + (mu_targ - ds_aff * dw_aff) / s
 
         # 9) Final step lengths (corrector) — eq (12)
-        alpha_p_cor = step_length(a=(x, dx_cor), b=(s, ds_cor))
-        alpha_d_cor = step_length(a=(z, dz_cor), b=(w, dw_cor))
+        alpha_p_cor = step_length(a=(x, dx_cor), b=(s, ds_cor), backoff=backoff)
+        alpha_d_cor = step_length(a=(z, dz_cor), b=(w, dw_cor), backoff=backoff)
 
         # 10) Update all variables / corrector step
         # Update
diff --git a/pyfixest/report/visualize.py b/pyfixest/report/visualize.py
@@ -1,5 +1,6 @@
 from typing import Optional, Union
 
+import math
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
@@ -416,6 +417,8 @@ def qplot(
     models: ModelInputType,
     rename_models: Optional[dict] = None,
     figsize: Optional[tuple] = None,
+    ncol: Optional[int] = None,
+    nrow: Optional[int] = None,
 ):
     """
     Plot regression quantiles.
@@ -428,7 +431,10 @@ def qplot(
         The size of the figure. If None, the default size is used.
     rename_models : dict, optional
         A dictionary to rename the models. The keys are the original model names and the values the new names.
-
+    ncol : int, optional
+        Number of columns of subplots. Default is None. Note: cannot be set jointly with nrow argument.
+    nrow : int, optional
+        Number of rows of subplots. Default is None. Note: cannot be set jointly with ncol argument.
     Returns
     -------
     object
@@ -455,9 +461,12 @@ def qplot(
         df_all = pd.concat([df_all, df], axis=0)
 
     df_all.reset_index(inplace=True)
+
     return _qplot(
         data=df_all,
         figsize=figsize,
+        nrow=nrow,
+        ncol=ncol,
     )
 
 
@@ -698,53 +707,71 @@ def _coefplot_matplotlib(
     return f
 
 
-def _qplot(data: pd.DataFrame, figsize) -> plt.Figure:
+def _qplot(data: pd.DataFrame, nrow: Optional[int]=None, ncol: Optional[int]=None, figsize:tuple[int]=(10, 6)):
     """
-    Plot quantile regression coefficients with 95% confidence intervals.
-    Each coefficient gets its own panel, quantiles on the x-axis, and
-    coefficient estimates with error bars on the y-axis.
+    Plot point estimates ± confidence intervals by quantile,
+    with one subplot per coefficient.
 
     Parameters
     ----------
-    data: pd.DataFrame
-        Input data sets
-        Expects `data` to have columns:
-        - 'Coefficient'   (e.g. 'Intercept', 'X1', 'X2')
-        - 'quantile'      (numeric, e.g. 0.1, 0.5, 0.9)
-        - 'Estimate'      (point estimate)
-        - '2.5%'          (lower bound of 95% CI)
-        - '97.5%'         (upper bound of 95% CI)
-    figsize: tuple
-        tuple with the figsize of the matplotlib plt.
+    data : pandas.DataFrame
+        Must contain columns ['Coefficient', 'quantile', 'Estimate', '2.5%', '97.5%'].
+    nrow : int, optional
+        Number of rows of subplots. If both nrow and ncol are None, defaults to 1.
+    ncol : int, optional
+        Number of columns of subplots. Exactly one of nrow/ncol must be set, unless both are None.
+    figsize : tuple, optional
+        Figure size passed to plt.subplots().
+
+    Raises
+    ------
+    ValueError
+        If both nrow and ncol are specified.
     """
-    figsize = set_figsize(figsize, plot_backend="matplotlib")
-    df = pd.DataFrame(data)
-    coeffs = df.Coefficient.unique()
-
-    fig, axes = plt.subplots(
-        # nrows=4,
-        ncols=4,
-        sharey=True,
-        figsize=figsize,
-    )
-
-    for ax, coef in zip(axes, coeffs):
-        sub = df[df["Coefficient"] == coef].sort_values("quantile")
-        x = sub["quantile"]
-        y = sub["Estimate"]
-        lower_err = y - sub["2.5%"]
-        upper_err = sub["97.5%"] - y
-
-        ax.errorbar(x, y, yerr=[lower_err, upper_err], fmt="o-", capsize=5)
+    # --- default layout: one row if neither is specified ---
+    if nrow is None and ncol is None:
+        nrow = 1
+
+    # --- error if both specified ---
+    if (nrow is not None) and (ncol is not None):
+        raise ValueError("Specify only one of nrow or ncol, not both.")
+
+    # --- determine number of panels ---
+    coeffs = list(data['Coefficient'].unique())
+    K = len(coeffs)
+
+    # compute rows × cols
+    if nrow is not None:
+        rows = nrow
+        cols = math.ceil(K / rows)
+    else:
+        cols = ncol
+        rows = math.ceil(K / cols)
+
+    # --- make subplots ---
+    fig, axes = plt.subplots(rows, cols, figsize=figsize, squeeze=False)
+    axes = axes.flatten()
+
+    # --- plot each coefficient in its own panel ---
+    for i, coef in enumerate(coeffs):
+        ax = axes[i]
+        sub = data[data['Coefficient'] == coef].sort_values('quantile')
+        q    = sub['quantile'].values
+        est  = sub['Estimate'].values
+        lo   = est - sub['2.5%'].values
+        hi   = sub['97.5%'].values - est
+
+        ax.errorbar(q, est, yerr=[lo, hi], fmt='o-')
         ax.set_title(coef)
-        ax.set_xlabel("Quantile")
-        ax.set_xticks(x)
-        ax.grid(True)
+        ax.set_xlabel('Quantile')
+        ax.set_ylabel('Estimate')
+
+    # --- hide any unused axes ---
+    for j in range(K, rows * cols):
+        axes[j].set_visible(False)
 
-    axes[0].set_ylabel("Coefficient estimate")
-    fig.suptitle("Quantile Regression Coefficients with 95% CIs", y=1.02)
     plt.tight_layout()
-    return fig
+    return fig, axes
 
 
 def _get_model_df(