Switch default solver to scipy.linalg.solve() [#846 issue] (#904)

AronNemeth · s3alfisc · web-flow · commit c40b7cc3e8f2 · 2025-05-19T21:21:50.000+02:00
* Solvers: switching to scipy.linalg.solve as default

* update test/solvers

* Update tests_solvers: new test matrix is symmetric and positive definite -&gt; Cholesky works

---------

Co-authored-by: Alexander Fischer &lt;alexander-fischer1801@t-online.de&gt;
diff --git a/pyfixest/estimation/FixestMulti_.py b/pyfixest/estimation/FixestMulti_.py
@@ -201,7 +201,11 @@ def _estimate_all_models(
         self,
         vcov: Union[str, dict[str, str], None],
         solver: Literal[
-            "np.linalg.lstsq", "np.linalg.solve", "scipy.sparse.linalg.lsqr", "jax"
+            "np.linalg.lstsq",
+            "np.linalg.solve",
+            "scipy.linalg.solve",
+            "scipy.sparse.linalg.lsqr",
+            "jax",
         ],
         demeaner_backend: Literal["numba", "jax"] = "numba",
         collin_tol: float = 1e-6,
diff --git a/pyfixest/estimation/estimation.py b/pyfixest/estimation/estimation.py
@@ -34,7 +34,7 @@ def feols(
     store_data: bool = True,
     lean: bool = False,
     weights_type: WeightsTypeOptions = "aweights",
-    solver: SolverOptions = "np.linalg.solve",
+    solver: SolverOptions = "scipy.linalg.solve",
     demeaner_backend: DemeanerBackendOptions = "numba",
     use_compression: bool = False,
     reps: int = 100,
@@ -118,8 +118,9 @@ def feols(
         see this blog post: https://notstatschat.rbind.io/2020/08/04/weights-in-statistics/.
 
     solver : SolverOptions, optional.
-        The solver to use for the regression. Can be either "np.linalg.solve" or
-        "np.linalg.lstsq". Defaults to "np.linalg.solve".
+        The solver to use for the regression. Can be "np.linalg.lstsq",
+        "np.linalg.solve", "scipy.linalg.solve", "scipy.sparse.linalg.lsqr" and "jax".
+        Defaults to "scipy.linalg.solve".
 
     demeaner_backend: DemeanerBackendOptions, optional
         The backend to use for demeaning. Can be either "numba" or "jax". Defaults to "numba".
@@ -510,7 +511,7 @@ def fepois(
     iwls_maxiter: int = 25,
     collin_tol: float = 1e-10,
     separation_check: Optional[list[str]] = None,
-    solver: SolverOptions = "np.linalg.solve",
+    solver: SolverOptions = "scipy.linalg.solve",
     demeaner_backend: DemeanerBackendOptions = "numba",
     drop_intercept: bool = False,
     i_ref1=None,
@@ -569,8 +570,9 @@ def fepois(
         Either "fe" or "ir". Executes "fe" by default (when None).
 
     solver : SolverOptions, optional.
-        The solver to use for the regression. Can be either "np.linalg.solve" or
-        "np.linalg.lstsq". Defaults to "np.linalg.solve".
+        The solver to use for the regression. Can be "np.linalg.lstsq",
+        "np.linalg.solve", "scipy.linalg.solve", "scipy.sparse.linalg.lsqr" and "jax".
+        Defaults to "scipy.linalg.solve".
 
     demeaner_backend: DemeanerBackendOptions, optional
         The backend to use for demeaning. Can be either "numba" or "jax".
@@ -737,7 +739,7 @@ def feglm(
     iwls_maxiter: int = 25,
     collin_tol: float = 1e-10,
     separation_check: Optional[list[str]] = None,
-    solver: SolverOptions = "np.linalg.solve",
+    solver: SolverOptions = "scipy.linalg.solve",
     drop_intercept: bool = False,
     i_ref1=None,
     copy_data: bool = True,
@@ -799,8 +801,9 @@ def feglm(
         Either "fe" or "ir". Executes "fe" by default (when None).
 
     solver : SolverOptions, optional.
-        The solver to use for the regression. Can be either "np.linalg.solve" or
-        "np.linalg.lstsq". Defaults to "np.linalg.solve".
+        The solver to use for the regression. Can be "np.linalg.lstsq",
+        "np.linalg.solve", "scipy.linalg.solve", "scipy.sparse.linalg.lsqr" and "jax".
+        Defaults to "scipy.linalg.solve".
 
     drop_intercept : bool, optional
         Whether to drop the intercept from the model, by default False.
diff --git a/pyfixest/estimation/fegaussian_.py b/pyfixest/estimation/fegaussian_.py
@@ -26,7 +26,11 @@ def __init__(
         tol: float,
         maxiter: int,
         solver: Literal[
-            "np.linalg.lstsq", "np.linalg.solve", "scipy.sparse.linalg.lsqr", "jax"
+            "np.linalg.lstsq",
+            "np.linalg.solve",
+            "scipy.linalg.solve",
+            "scipy.sparse.linalg.lsqr",
+            "jax",
         ],
         store_data: bool = True,
         copy_data: bool = True,
diff --git a/pyfixest/estimation/feglm_.py b/pyfixest/estimation/feglm_.py
@@ -33,7 +33,11 @@ def __init__(
         tol: float,
         maxiter: int,
         solver: Literal[
-            "np.linalg.lstsq", "np.linalg.solve", "scipy.sparse.linalg.lsqr", "jax"
+            "np.linalg.lstsq",
+            "np.linalg.solve",
+            "scipy.linalg.solve",
+            "scipy.sparse.linalg.lsqr",
+            "jax",
         ],
         store_data: bool = True,
         copy_data: bool = True,
diff --git a/pyfixest/estimation/feiv_.py b/pyfixest/estimation/feiv_.py
@@ -40,8 +40,9 @@ class Feiv(Feols):
         Names of the coefficients of Z.
     collin_tol : float
         Tolerance for collinearity check.
-    solver: Literal["np.linalg.lstsq", "np.linalg.solve", "scipy.sparse.linalg.lsqr", "jax"],
-        default is 'np.linalg.solve'. Solver to use for the estimation.
+    solver: Literal["np.linalg.lstsq", "np.linalg.solve", "scipy.linalg.solve",
+        "scipy.sparse.linalg.lsqr", "jax"],
+        default is "scipy.linalg.solve". Solver to use for the estimation.
     demeaner_backend: Literal["numba", "jax"]
         The backend used for demeaning.
     weights_name : Optional[str]
@@ -144,8 +145,12 @@ def __init__(
         fixef_tol: float,
         lookup_demeaned_data: dict[str, pd.DataFrame],
         solver: Literal[
-            "np.linalg.lstsq", "np.linalg.solve", "scipy.sparse.linalg.lsqr", "jax"
-        ] = "np.linalg.solve",
+            "np.linalg.lstsq",
+            "np.linalg.solve",
+            "scipy.linalg.solve",
+            "scipy.sparse.linalg.lsqr",
+            "jax",
+        ] = "scipy.linalg.solve",
         demeaner_backend: Literal["numba", "jax"] = "numba",
         store_data: bool = True,
         copy_data: bool = True,
diff --git a/pyfixest/estimation/felogit_.py b/pyfixest/estimation/felogit_.py
@@ -26,7 +26,11 @@ def __init__(
         tol: float,
         maxiter: int,
         solver: Literal[
-            "np.linalg.lstsq", "np.linalg.solve", "scipy.sparse.linalg.lsqr", "jax"
+            "np.linalg.lstsq",
+            "np.linalg.solve",
+            "scipy.linalg.solve",
+            "scipy.sparse.linalg.lsqr",
+            "jax",
         ],
         store_data: bool = True,
         copy_data: bool = True,
diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py
@@ -91,8 +91,9 @@ class Feols:
         Type of the weights variable. Either "aweights" for analytic weights or
         "fweights" for frequency weights.
     solver : str, optional.
-        The solver to use for the regression. Can be either "np.linalg.solve" or
-        "np.linalg.lstsq". Defaults to "np.linalg.solve".
+        The solver to use for the regression. Can be "np.linalg.lstsq",
+        "np.linalg.solve", "scipy.linalg.solve", "scipy.sparse.linalg.lsqr" and "jax".
+        Defaults to "scipy.linalg.solve".
     context : int or Mapping[str, Any]
         A dictionary containing additional context variables to be used by
         formulaic during the creation of the model matrix. This can include
@@ -204,8 +205,9 @@ class Feols:
         Adjusted R-squared value of the model.
     _adj_r2_within : float
         Adjusted R-squared value computed on demeaned dependent variable.
-    _solver: Literal["np.linalg.lstsq", "np.linalg.solve", "scipy.sparse.linalg.lsqr", "jax"],
-        default is 'np.linalg.solve'. Solver to use for the estimation.
+    _solver: Literal["np.linalg.lstsq", "np.linalg.solve", "scipy.linalg.solve",
+        "scipy.sparse.linalg.lsqr", "jax"],
+        default is "scipy.linalg.solve". Solver to use for the estimation.
     _demeaner_backend: Literal["numba", "jax"]
         The backend used for demeaning.
     _data: pd.DataFrame
@@ -234,8 +236,12 @@ def __init__(
         fixef_tol: float,
         lookup_demeaned_data: dict[str, pd.DataFrame],
         solver: Literal[
-            "np.linalg.lstsq", "np.linalg.solve", "scipy.sparse.linalg.lsqr", "jax"
-        ] = "np.linalg.solve",
+            "np.linalg.lstsq",
+            "np.linalg.solve",
+            "scipy.linalg.solve",
+            "scipy.sparse.linalg.lsqr",
+            "jax",
+        ] = "scipy.linalg.solve",
         demeaner_backend: Literal["numba", "jax"] = "numba",
         store_data: bool = True,
         copy_data: bool = True,
diff --git a/pyfixest/estimation/feols_compressed_.py b/pyfixest/estimation/feols_compressed_.py
@@ -86,7 +86,11 @@ def __init__(
         fixef_tol: float,
         lookup_demeaned_data: dict[str, pd.DataFrame],
         solver: Literal[
-            "np.linalg.lstsq", "np.linalg.solve", "scipy.sparse.linalg.lsqr", "jax"
+            "np.linalg.lstsq",
+            "np.linalg.solve",
+            "scipy.linalg.solve",
+            "scipy.sparse.linalg.lsqr",
+            "jax",
         ],
         demeaner_backend: Literal["numba", "jax"] = "numba",
         store_data: bool = True,
diff --git a/pyfixest/estimation/fepois_.py b/pyfixest/estimation/fepois_.py
@@ -51,8 +51,10 @@ class Fepois(Feols):
         Maximum number of iterations for the IRLS algorithm.
     tol : Optional[float], default=1e-08
         Tolerance level for the convergence of the IRLS algorithm.
-    solver: Literal["np.linalg.lstsq", "np.linalg.solve", "scipy.sparse.linalg.lsqr", "jax"],
-        default is 'np.linalg.solve'. Solver to use for the estimation.
+    solver : str, optional.
+        The solver to use for the regression. Can be "np.linalg.lstsq",
+        "np.linalg.solve", "scipy.linalg.solve", "scipy.sparse.linalg.lsqr" and "jax".
+        Defaults to "scipy.linalg.solve".
     demeaner_backend: Literal["numba", "jax"]
         The backend used for demeaning.
     fixef_tol: float, default = 1e-08.
@@ -86,8 +88,12 @@ def __init__(
         tol: float,
         maxiter: int,
         solver: Literal[
-            "np.linalg.lstsq", "np.linalg.solve", "scipy.sparse.linalg.lsqr", "jax"
-        ] = "np.linalg.solve",
+            "np.linalg.lstsq",
+            "np.linalg.solve",
+            "scipy.linalg.solve",
+            "scipy.sparse.linalg.lsqr",
+            "jax",
+        ] = "scipy.linalg.solve",
         demeaner_backend: Literal["numba", "jax"] = "numba",
         context: Union[int, Mapping[str, Any]] = 0,
         store_data: bool = True,
diff --git a/pyfixest/estimation/feprobit_.py b/pyfixest/estimation/feprobit_.py
@@ -28,7 +28,11 @@ def __init__(
         tol: float,
         maxiter: int,
         solver: Literal[
-            "np.linalg.lstsq", "np.linalg.solve", "scipy.sparse.linalg.lsqr", "jax"
+            "np.linalg.lstsq",
+            "np.linalg.solve",
+            "scipy.linalg.solve",
+            "scipy.sparse.linalg.lsqr",
+            "jax",
         ],
         store_data: bool = True,
         copy_data: bool = True,
diff --git a/pyfixest/estimation/literals.py b/pyfixest/estimation/literals.py
@@ -5,7 +5,11 @@
 WeightsTypeOptions = Literal["aweights", "fweights"]
 FixedRmOptions = Literal["singleton", "none"]
 SolverOptions = Literal[
-    "np.linalg.lstsq", "np.linalg.solve", "scipy.sparse.linalg.lsqr", "jax"
+    "np.linalg.lstsq",
+    "np.linalg.solve",
+    "scipy.linalg.solve",
+    "scipy.sparse.linalg.lsqr",
+    "jax",
 ]
 DemeanerBackendOptions = Literal["numba", "jax"]
 PredictionErrorOptions = Literal["prediction"]
diff --git a/pyfixest/estimation/solvers.py b/pyfixest/estimation/solvers.py
@@ -1,4 +1,5 @@
 import numpy as np
+from scipy.linalg import solve
 from scipy.sparse.linalg import lsqr
 from typing_extensions import Literal
 
@@ -7,7 +8,11 @@ def solve_ols(
     tZX: np.ndarray,
     tZY: np.ndarray,
     solver: Literal[
-        "np.linalg.lstsq", "np.linalg.solve", "scipy.sparse.linalg.lsqr", "jax"
+        "np.linalg.lstsq",
+        "np.linalg.solve",
+        "scipy.linalg.solve",
+        "scipy.sparse.linalg.lsqr",
+        "jax",
     ],
 ) -> np.ndarray:
     """
@@ -17,8 +22,8 @@ def solve_ols(
     ----------
     tZX (array-like): Z'X.
     tZY (array-like): Z'Y.
-    solver (str): The solver to use. Supported solvers are"np.linalg.lstsq",
-    "np.linalg.solve", "scipy.sparse.linalg.lsqr" and "jax".
+    solver (str): The solver to use. Supported solvers are "np.linalg.lstsq",
+    "np.linalg.solve", "scipy.linalg.solve", "scipy.sparse.linalg.lsqr" and "jax".
 
     Returns
     -------
@@ -32,6 +37,8 @@ def solve_ols(
         return np.linalg.lstsq(tZX, tZY, rcond=None)[0].flatten()
     elif solver == "np.linalg.solve":
         return np.linalg.solve(tZX, tZY).flatten()
+    elif solver == "scipy.linalg.solve":
+        return solve(tZX, tZY, assume_a="pos").flatten()
     elif solver == "scipy.sparse.linalg.lsqr":
         return lsqr(tZX, tZY)[0].flatten()
     elif solver == "jax":
diff --git a/tests/test_solvers.py b/tests/test_solvers.py
@@ -6,11 +6,11 @@
 
 def test_solve_ols_simple_2x2():
     # Test case 1: Simple 2x2 system
-    tZX = np.array([[1, 2], [3, 4]])
-    tZY = np.array([5, 6])
-    solver = "np.linalg.lstsq"
+    tZX = np.array([[4, 2], [2, 3]])
+    tZY = np.array([10, 8])
+    solver = "scipy.linalg.solve"
     solution = solve_ols(tZX, tZY, solver)
-    assert np.allclose(solution, np.array([-4.0, 4.5]))
+    assert np.allclose(solution, np.array([1.75, 1.5]))
     # Verify solution satisfies the system
     assert np.allclose(tZX @ solution, tZY)
 
@@ -19,21 +19,33 @@ def test_solve_ols_identity():
     # Test case 2: Identity matrix
     tZX = np.eye(2)
     tZY = np.array([1, 2])
-    solver = "np.linalg.lstsq"
+    solver = "scipy.linalg.solve"
     assert np.allclose(solve_ols(tZX, tZY, solver), tZY)
 
 
 @pytest.mark.parametrize(
     argnames="solver",
-    argvalues=["np.linalg.lstsq", "np.linalg.solve", "scipy.sparse.linalg.lsqr", "jax"],
-    ids=["np.linalg.lstsq", "np.linalg.solve", "scipy.sparse.linalg.lsqr", "jax"],
+    argvalues=[
+        "scipy.linalg.solve",
+        "np.linalg.lstsq",
+        "np.linalg.solve",
+        "scipy.sparse.linalg.lsqr",
+        "jax",
+    ],
+    ids=[
+        "scipy.linalg.solve",
+        "np.linalg.lstsq",
+        "np.linalg.solve",
+        "scipy.sparse.linalg.lsqr",
+        "jax",
+    ],
 )
 def test_solve_ols_different_solvers(solver):
     # Test case 3: Test different solvers give same result
-    tZX = np.array([[1, 2], [3, 4]])
-    tZY = np.array([5, 6])
+    tZX = np.array([[4, 2], [2, 3]])
+    tZY = np.array([10, 8])
     solution = solve_ols(tZX, tZY, solver)
-    assert np.allclose(solution, np.array([-4.0, 4.5]))
+    assert np.allclose(solution, np.array([1.75, 1.5]))
     # Verify solution satisfies the system
     assert np.allclose(tZX @ solution, tZY)