diff --git a/examples/solve_sparse_system.py b/examples/solve_sparse_system.py
new file mode 100644
index 0000000..c9a17cb
--- /dev/null
+++ b/examples/solve_sparse_system.py
@@ -0,0 +1,79 @@
+# copyright ################################# #
+# This file is part of the Xobjects Package.  #
+# Copyright (c) CERN, 2021.                   #
+# ########################################### #
+
+import xobjects as xo
+import scipy.sparse as sp
+import numpy as np
+from xobjects.sparse._test_helpers import rel_residual
+
+'''
+The goal of this example is to provide a short user guide for the xo.sparse module.
+
+The xo.sparse module can be used to solve sparse linear systems of equations:
+                                    A*x = b
+where A is a sparse matrix.
+
+Currently this module only supports CPU and Cupy contexts. This module contains a 
+variety of solvers for different contexts, with consistent APIs. The intended use
+is to reuse the same LHS for many solves, so the solvers work as follows:
+
+solver(A) # Performs decomposition/factorization
+solver.solve(b) # Solves Ax = b using precomputed factors
+
+For optimal performance across backends b should be a column-major (F Contiguous)
+array or vector.
+
+The intended interface for this module is:
+
+xo.sparse.factorized_sparse_solver()
+
+The function includes detailed documentation for usage, but in short, it returns
+the best performing solver based on the context and available modules. If the 
+context is not explicitly defined, it is inferred based on the input matrix.
+
+This is how modules that build upon this functionality within xsuite should interact
+with the xo.sparse module, so that cross-platform compatibility is guaranteed.
+
+For development and convenience purposes xo.sparse provides the:
+xo.sparse.solvers module
+
+which provides the following:
+xo.sparse.solvers.CPU.
+    - scipysplu : Alias for scipy SuperLU 
+    - KLUSuperLU : Alias for PyKLU
+xo.sparse.solvers.CPU.
+    - cuDSS : nvmath.sparse.advanced.DirectSolver Wrapper with a SuperLU-like interface
+    - cachedSpSM : Rewrite of cupy's SuperLU to cache the SpSM analysis step
+                   offering massive speedups compared to cupy splu when the only
+                   available backend is SpSM
+    - cupysplu : Alias for scipy SuperLU 
+'''
+
+# Example: solve small matrix system
+n = 5
+# Create matrix
+main = 2.0 + np.abs(np.random.standard_normal(n))
+lower = np.random.standard_normal(n-1)
+upper = np.random.standard_normal(n-1)
+A = sp.diags(
+    diagonals=[lower, main, upper],
+    offsets=[-1, 0, 1],
+    format="csc"
+)
+
+# Create solver:
+print("Solver selection process: ")
+solver = xo.sparse.factorized_sparse_solver(A, verbose = True)
+
+# Generate random vector to solve:
+b = np.random.standard_normal(n)
+
+# Solve system
+x = solver.solve(b)
+
+# Calculate relative residual to assess solver:
+res = rel_residual(A,x,b)
+print("Relative residual of solution ", res)
+print("Residual should be small (<10^-12)")
diff --git a/tests/test_sparse_solvers.py b/tests/test_sparse_solvers.py
new file mode 100644
index 0000000..eb43633
--- /dev/null
+++ b/tests/test_sparse_solvers.py
@@ -0,0 +1,199 @@
+# copyright ################################# #
+# This file is part of the Xobjects Package.  #
+# Copyright (c) CERN, 2021.                   #
+# ########################################### #
+
+import numpy as np
+import scipy.sparse as sp
+import xobjects as xo
+from xobjects.test_helpers import fix_random_seed
+from xobjects.context import ModuleNotAvailableError
+import warnings
+import pytest
+
+'''
+The following tests rely on computing the relative residual of the solution
+The relative residual can be defined as:
+                               || A * x - b ||
+                        η = ---------------------
+                                    ||b||
+
+Typically, the expected value for this quantity is:
+* Ideally: 1e-12 - 1e-14
+* Ill-conditioned systems: 1e-9 - 1e-10
+
+In this module, we evaluate the residual as follows:
+* We compare the residual of the reference solver (scipy) with ABS_TOL
+* We compare the residual of the KLU solver with ABS_TOL
+* We ensure that the residual of the KLU Solver is within TOLERANCE_FACTOR
+  of the reference solver. 
+  
+For reference, the machine precision for FP64 is ~2.2e-16 (PRECISION)
+
+Note: The completely random sparse system is more prone to failing the tests
+due to numerical noise, often requires looser tolerances. Still worth including
+but if testing larger systems, could potentially be omitted.
+'''
+
+# ---- Helper functions ----
+def issymmetric(A, tol=0):
+    if A.shape[0] != A.shape[1]:
+        return False
+    diff = A - A.T
+    if tol == 0:
+        return diff.nnz == 0
+    else:
+        # tolerance-based check
+        return abs(diff).max() <= tol
+
+
+def assert_residual_ok(res_ref, res_solver,
+                       abs_tol=1e-12,
+                       factor=10):
+    """
+    Check that our solver's residual is both:
+      - absolutely small enough (abs_tol),
+      - not catastrophically worse than the reference (factor * res_ref).
+    """
+    # sanity: reference solver itself should be good
+    assert res_ref < abs_tol, f"Reference residual too large: {res_ref}"
+
+    # absolute bound
+    assert res_solver < abs_tol, (
+        f"Residual {res_solver} exceeds absolute tolerance {abs_tol}"
+    )
+
+    # relative bound vs reference
+    assert res_solver <= factor * res_ref, (
+        f"Residual {res_solver} not within factor {factor} of "
+        f"reference residual {res_ref}"
+    )
+
+# ---- Tests ----
+cpu_tests = [
+        ("scipySLU",  xo.ContextCpu()),
+        ("PyKLU",     xo.ContextCpu()),
+        ]
+
+cupy_tests = []
+try:
+    cupy_tests = [
+        ("cuDSS",     xo.ContextCupy()),
+        ("CachedSLU", xo.ContextCupy()),
+        ("cupySLU",   xo.ContextCupy()),
+    ]
+except ModuleNotAvailableError:
+    warnings.warn("!!!ContextCupy unavailable. "
+                  "Skipping tests for Cupy Solvers!!!")
+
+PARAMS = cpu_tests + cupy_tests
+
+SPARSE_SYSTEM_SIZE = 2000 # (n,n) matrix
+NUM_BATCHES = 10
+PRECISION = np.finfo(float).eps
+ABS_TOL = 1e-12
+TOLERANCE_FACTOR = 2
+
+def batch_vectors_as_matrix(vector_list):
+    return np.asfortranarray(np.moveaxis(np.array(vector_list),0,-1))
+
+@fix_random_seed(1337)
+def make_random_sparse_system(n, nbatches, density=0.01):
+    A = sp.random(
+        n, n,
+        density=density,
+        format="csc",
+        random_state=np.random,
+        data_rvs=np.random.standard_normal
+    )
+    # Make it nonsingular & better conditioned:
+    # Add something on the diagonal so pivots aren't tiny/zero
+    A = A + sp.eye(n, format="csc") * 5.0  # tweak factor as you like
+    b_array = []
+    if nbatches == 0:
+        b = np.random.standard_normal(n)
+        b_array.append(b)
+    else:
+        for i in range(nbatches):
+            b = np.cos(2*i/(nbatches-1)*np.pi) * np.random.standard_normal(n)
+            b_array.append(b)
+        b = batch_vectors_as_matrix(b_array)
+    solver = sp.linalg.splu(A)
+    x = solver.solve(b)
+    return (A, b, x, b_array)
+
+@fix_random_seed(1337)
+def make_tridiagonal_system(n, nbatches):
+    main = 2.0 + np.abs(np.random.standard_normal(n))
+    lower = np.random.standard_normal(n-1)
+    upper = np.random.standard_normal(n-1)
+    A = sp.diags(
+        diagonals=[lower, main, upper],
+        offsets=[-1, 0, 1],
+        format="csc"
+    )
+    b_array = []
+    if nbatches == 0:
+        b = np.random.standard_normal(n)
+        b_array.append(b)
+    else:
+        for i in range(nbatches):
+            b = np.cos(2*i/(nbatches-1)*np.pi) * np.random.standard_normal(n)
+            b_array.append(b)
+        b = batch_vectors_as_matrix(b_array)
+    solver = sp.linalg.splu(A)
+    x = solver.solve(b)
+    return (A, b, x, b_array)
+
+random_system = make_random_sparse_system(SPARSE_SYSTEM_SIZE, 0)
+tridiag_system = make_tridiagonal_system(SPARSE_SYSTEM_SIZE, 0)
+
+@pytest.mark.parametrize("test_solver,test_context", PARAMS)
+@pytest.mark.parametrize("sparse_system", [random_system, tridiag_system])
+def test_vector_solve(test_solver, test_context, sparse_system):
+    A_sp, b_sp, x_sp, _ = sparse_system
+    assert not issymmetric(A_sp)
+
+    scipy_residual = xo.sparse.rel_residual(A_sp,x_sp,b_sp)
+    
+    if "Cpu" in str(test_context):
+        A = test_context.splike_lib.sparse.csc_matrix(A_sp)
+    if "Cupy" in str(test_context):
+        A = test_context.splike_lib.sparse.csr_matrix(A_sp)
+    b_test = test_context.nparray_to_context_array(b_sp)
+    b = test_context.nplike_lib.asfortranarray(b_test)
+    solver = xo.sparse.factorized_sparse_solver(A, 
+                                                force_solver = test_solver, 
+                                                context = test_context
+                                                )
+    x = solver.solve(b)
+
+    solver_residual = xo.sparse.rel_residual(A,x,b)
+    assert_residual_ok(scipy_residual,solver_residual, 
+                        abs_tol = ABS_TOL, factor = TOLERANCE_FACTOR)
+
+random_system = make_random_sparse_system(SPARSE_SYSTEM_SIZE, NUM_BATCHES)
+tridiag_system = make_tridiagonal_system(SPARSE_SYSTEM_SIZE, NUM_BATCHES)
+
+@pytest.mark.parametrize("test_solver,test_context", PARAMS)
+@pytest.mark.parametrize("sparse_system", [random_system, tridiag_system])
+def test_batched_solve(test_solver, test_context, sparse_system):
+    A_sp, b_sp, x_sp, _ = sparse_system
+    assert not issymmetric(A_sp)
+    scipy_residual = xo.sparse.rel_residual(A_sp,x_sp,b_sp)
+    if "Cpu" in str(test_context):
+        A = test_context.splike_lib.sparse.csc_matrix(A_sp)
+    if "Cupy" in str(test_context):
+        A = test_context.splike_lib.sparse.csr_matrix(A_sp)
+    b_test = test_context.nparray_to_context_array(b_sp)
+    b = test_context.nplike_lib.asfortranarray(b_test)
+    solver = xo.sparse.factorized_sparse_solver(A, 
+                                                n_batches = NUM_BATCHES,
+                                                force_solver = test_solver, 
+                                                context = test_context
+                                                )
+    x = solver.solve(b)
+
+    solver_residual = xo.sparse.rel_residual(A,x,b)
+    assert_residual_ok(scipy_residual,solver_residual, 
+                        abs_tol = ABS_TOL, factor = TOLERANCE_FACTOR)
\ No newline at end of file
diff --git a/xobjects/__init__.py b/xobjects/__init__.py
index 0d9f709..8c2f5a7 100644
--- a/xobjects/__init__.py
+++ b/xobjects/__init__.py
@@ -19,6 +19,7 @@
 from .string import String
 from .struct import Struct, Field
 from .ref import Ref, UnionRef
+from . import sparse
 
 from .context_cpu import ContextCpu
 from .context_pyopencl import ContextPyopencl
diff --git a/xobjects/context.py b/xobjects/context.py
index effd95a..bbd57de 100644
--- a/xobjects/context.py
+++ b/xobjects/context.py
@@ -203,6 +203,10 @@ def __init__(self, message="Module not available"):
     def __getattr__(self, attr):
         raise NameError(self.message)
 
+class ModuleNotAvailableError(ModuleNotFoundError):
+    """Raised when an optional dependency is missing."""
+    pass
+
 
 class XContext(ABC):
     minimum_alignment = 1
diff --git a/xobjects/context_cupy.py b/xobjects/context_cupy.py
index 3838376..d1b3b87 100644
--- a/xobjects/context_cupy.py
+++ b/xobjects/context_cupy.py
@@ -18,6 +18,7 @@
     classes_from_kernels,
     sort_classes,
     sources_from_classes,
+    ModuleNotAvailableError,
 )
 from .linkedarray import BaseLinkedArray
 from .specialize_source import specialize_source
@@ -398,6 +399,11 @@ def __init__(
         default_shared_mem_size_bytes=0,
         device=None,
     ):
+        if not _enabled:
+            raise ModuleNotAvailableError(
+                "cupy is not installed. " "ContextCupy is not available!"
+                )
+        
         if device is not None:
             cupy.cuda.Device(device).use()
 
diff --git a/xobjects/context_pyopencl.py b/xobjects/context_pyopencl.py
index c61e21e..196eb17 100644
--- a/xobjects/context_pyopencl.py
+++ b/xobjects/context_pyopencl.py
@@ -18,6 +18,7 @@
     classes_from_kernels,
     sort_classes,
     sources_from_classes,
+    ModuleNotAvailableError,
 )
 from .linkedarray import BaseLinkedArray
 from .specialize_source import specialize_source
@@ -127,6 +128,11 @@ def __init__(
 
         """
 
+        if not _enabled:
+            raise ModuleNotAvailableError(
+                "pyopencl is not installed. ContextPyopencl is not available!"
+            )
+        
         super().__init__()
 
         # TODO assume one device only
diff --git a/xobjects/sparse/__init__.py b/xobjects/sparse/__init__.py
new file mode 100644
index 0000000..9a4cca2
--- /dev/null
+++ b/xobjects/sparse/__init__.py
@@ -0,0 +1,3 @@
+from ._sparse import factorized_sparse_solver, rel_residual
+from . import solvers
+__all__ = ["factorized_sparse_solver","solvers", "rel_residual"]
diff --git a/xobjects/sparse/_sparse.py b/xobjects/sparse/_sparse.py
new file mode 100644
index 0000000..80f80f6
--- /dev/null
+++ b/xobjects/sparse/_sparse.py
@@ -0,0 +1,345 @@
+# copyright ################################# #
+# This file is part of the Xobjects Package.  #
+# Copyright (c) CERN, 2021.                   #
+# ########################################### #
+
+import scipy.sparse
+import numpy.linalg as npl
+from numpy import ndarray as nparray
+from typing import Optional, Literal, Union
+from ..context import XContext
+from ..context_cpu import ContextCpu
+from ..context_cupy import ContextCupy
+from ..context_pyopencl import ContextPyopencl
+from .solvers._abstract_solver import SuperLUlikeSolver
+from ..general import _print
+
+try:
+    from cupy import ndarray as cparray
+    import cupyx.scipy.sparse
+    from cupyx import cusparse
+    _cupy_available = True
+except:
+    _cupy_available = False
+    pass
+
+def factorized_sparse_solver(A: Union[scipy.sparse.csr_matrix, 
+                                      scipy.sparse.csc_matrix], 
+                             n_batches: int = 0,
+                             force_solver: Optional[
+                                            Literal["scipySLU", 
+                                                    "PyKLU",
+                                                    "cuDSS", 
+                                                    "CachedSLU", 
+                                                    "cupySLU"]
+                                                    ] = None,
+                             solverKwargs: dict = None,
+                             context: Union[
+                                      Literal["cpu",
+                                              "cupy",
+                                              "pyopencl"],
+                                      XContext
+                                            ] = None,
+                             verbose: bool = False
+                            ) -> SuperLUlikeSolver:
+    """
+    Build and return a factorized sparse linear solver on CPU or GPU.
+
+    This function inspects the provided sparse matrix and execution context
+    and returns a *factorized* solver object (SuperLU-like). The solver can
+    then be reused to efficiently solve multiple linear systems with the same
+    matrix `A`.
+
+    The actual backend is chosen automatically based on:
+      * The requested/derived `context` ("cpu", "cupy", "pyopencl"),
+      * The availability of optional libraries (PyKLU, cuDSS, CuPy/CUSPARSE),
+      * And the optional `force_solver` argument.
+
+    On CPU:
+      * Default: try PyKLU, fall back to `scipy.sparse.linalg.splu`.
+      * You may force `"scipySLU"` or `"PyKLU"` explicitly.
+
+    On CUDA/CuPy:
+      * Default: try cuDSS (`DirectSolverSuperLU`), then fall back to:
+          - `cupyx.scipy.sparse.linalg.splu` if CUSPARSE `csrsm2` is available, or
+          - a cached SuperLU-based solver (`luLU`) and finally `cupyx.splu`.
+      * You may force `"cuDSS"`, `"CachedSLU"`, or `"cupySLU"` explicitly.
+
+    PyOpenCL:
+      * Currently not supported and will raise `NotImplementedError`.
+
+    Parameters
+    ----------
+    A : scipy.sparse.spmatrix or cupyx.scipy.sparse.spmatrix
+        Sparse system matrix to factorize.
+
+        The matrix is internally converted to the format expected by the
+        chosen backend:
+
+          * CPU context: converted to CSC (`scipy.sparse.csc_matrix`).
+          * CuPy/GPU context: converted to CSR (`cupyx.scipy.sparse.csr_matrix`).
+
+        For **best performance**, you should pass `A` already in the
+        preferred format to avoid extra conversions:
+
+          * If the context is (or will usually be) **CPU**, provide `A`
+            as a CSC matrix (`A.tocsc()`).
+          * If the context is **GPU/CuPy**, provide `A` as a CSR matrix
+            (`A.tocsr()` or `cupyx.scipy.sparse.csr_matrix`).
+
+        If `context` is `None`, it is still inferred from the type of `A`
+        and the availability of CuPy, e.g.:
+
+          * SciPy sparse or NumPy array → `"cpu"`,
+          * CuPy sparse or CuPy array → `"cupy"`,
+          * otherwise a `TypeError` is raised.
+
+    n_batches : int, optional
+        Controls the expected shape of the right-hand side (RHS) for GPU
+        solvers and hence whether solves are treated as single or batched:
+
+        * If ``n_batches == 0`` (default), the solver is configured for
+          single-RHS solves and expects a vector RHS of shape ``(n,)``.
+        * If ``n_batches > 0``, the solver is configured for batched solves
+          and expects a 2D RHS array of shape ``(n, n_batches)`` (i.e.
+          ``nrhs = n_batches``).
+
+        This argument is primarily used by CUDA-based solvers (e.g. cuDSS and
+        cached SuperLU) to preconfigure internal data structures for batched
+        solves. It has no effect for CPU-based solvers.
+
+    force_solver : {"scipySLU", "PyKLU", "cuDSS", "CachedSLU", "cupySLU"}, optional
+        If provided, forces the use of a specific backend instead of the
+        automatic selection:
+
+        * `"scipySLU"` : Use `scipy.sparse.linalg.splu` (CPU).
+        * `"PyKLU"`    : Use the `PyKLU.Klu` solver (CPU).
+        * `"cuDSS"`    : Use CUDA/cuDSS-based `DirectSolverSuperLU` (GPU).
+        * `"CachedSLU"`: Use CUDA cached spsm SuperLU (`luLU`) (GPU).
+        * `"cupySLU"`  : Use `cupyx.scipy.sparse.linalg.splu` (GPU).
+
+        Using a solver that does not match the current `context` will result
+        in a `ValueError`.
+
+    solverKwargs : dict, optional
+        Extra keyword arguments forwarded to the underlying solver constructor.
+        If `None`, an empty dict is used.
+
+        Some backends make use of `permc_spec` (matrix permutation strategy).
+        When not explicitly provided and appropriate, this function sets
+        `permc_spec="MMD_AT_PLUS_A"` as a sensible default for the matrices that 
+        will typically be encountered in an xobjects workflow.
+
+    context : {"cpu", "cupy", "pyopencl"} or XContext, optional
+        Execution context. Can be either:
+
+        * A string:
+            - `"cpu"`: Use CPU-based solvers (SciPy / PyKLU).
+            - `"cupy"`: Use CuPy/CUDA-based solvers.
+            - `"pyopencl"`: PyOpenCL context (currently unsupported).
+        * A context object instance:
+            - `ContextCpu`
+            - `ContextCupy`
+            - `ContextPyopencl`
+
+        If `None`, the context is inferred from `A` as described above.
+
+    verbose : bool, optional
+        If `True`, prints debug messages describing the solver-selection
+        process, fallbacks, and the final solver that is returned.
+
+    Returns
+    -------
+    SuperLUlikeSolver
+        A factorized solver object compatible with SciPy’s `splu`-like
+        interface (i.e. typically exposing a `solve` method and related
+        accessors). The exact concrete type depends on the backend:
+          * CPU:
+              - `scipy.sparse.linalg.SuperLU` (for `"scipySLU"`),
+              - `PyKLU.Klu` (for `"PyKLU"`).
+          * CUDA/CuPy:
+              - `DirectSolverSuperLU` (cuDSS),
+              - `luLU` (cached spsm SuperLU),
+              - `cupyx.scipy.sparse.linalg.SuperLU` (for `"cupySLU"`).
+        
+        All returned solver objects implement a `solve(b)` method.
+        For **optimal performance across all backends**, the right-hand
+        side `b` should be passed as a **Fortran-contiguous (column-major)**
+        array:
+
+        * For a single RHS: shape ``(n,)`` or ``(n, 1)`` (Fortran contiguous).
+        * For multiple RHSs: shape ``(n, nrhs)`` with **Fortran layout**.
+
+        If `b` is not Fortran-contiguous, the solver will internally copy or
+        transform it, which can incur extra overhead—especially on CUDA/GPU
+        backends and in batched-solve scenarios.
+
+    Raises
+    ------
+    TypeError
+        If the type of `A` is unsupported when inferring the context.
+
+    AssertionError
+        If `A` does not match the required type for the chosen context
+
+    ModuleNotFoundError
+        If a requested solver backend depends on a module that is not
+        installed (e.g. CuPy, PyKLU, cuDSS), and no fallback is available.
+
+    RuntimeError
+        If a requested GPU solver fails during initialization.
+
+    NotImplementedError
+        If `context` is `"pyopencl"` or `ContextPyopencl`, since no sparse
+        solver is currently implemented for that backend.
+
+    ValueError
+        If an invalid `context` string is provided, or if `force_solver`
+        does not match any known solver for the active context.
+
+    Notes
+    -----
+    - For best performance on CPU, PyKLU is preferred when available.
+    - For CuPy/CUDA, cuDSS is preferred when available, and this function
+      will automatically fall back to other solvers if cuDSS is not present
+      or fails at runtime.
+    - The returned solver is *factorized* and should be reused to solve
+      multiple right-hand sides efficiently.
+
+    Examples
+    --------
+    Factorize a SciPy sparse matrix on CPU with automatic solver selection:
+
+    >>> A = scipy.sparse.random(1000, 1000, density=0.01, format="csr")
+    >>> solver = factorized_sparse_solver(A)
+    >>> x = solver.solve(b)
+
+    Explicitly request the SciPy SuperLU solver on CPU:
+
+    >>> solver = factorized_sparse_solver(A, force_solver="scipySLU")
+
+    Using CuPy and cuDSS (requires CuPy and cuDSS bindings):
+
+    >>> A_gpu = cupyx.scipy.sparse.csr_matrix(A)
+    >>> solver = factorized_sparse_solver(A_gpu, context="cupy")
+    >>> x_gpu = solver.solve(b_gpu)
+    """
+    if solverKwargs is None:
+        solverKwargs = {}
+    if context is None:
+        dbugprint(verbose, "No context provided. " \
+                           "Context will be inferred from matrix")
+        if isinstance(A, scipy.sparse.spmatrix) or isinstance(A, nparray):
+            context = 'cpu'
+        elif (_cupy_available and 
+              (isinstance(A, cupyx.scipy.sparse.spmatrix) 
+               or isinstance(A, cparray))):
+            context = 'cupy'
+        else:
+            raise TypeError("Unsupported type for A")
+    if context == "cpu" or isinstance(context, ContextCpu):
+        assert isinstance(A, scipy.sparse.spmatrix), (
+            "When using CPU context A must be a scipy.sparse matrix"
+            )
+        A = A.tocsc() # CPU Solvers require csc format
+        if 'permc_spec' not in solverKwargs:
+            solverKwargs = solverKwargs | {"permc_spec":"MMD_AT_PLUS_A"}
+        if force_solver is None:
+            dbugprint(verbose, "No solver requested. " \
+                               "Picking best solver for CPU Context")
+            try:
+                dbugprint(verbose, "Attempting to use PyKLU")
+                import PyKLU
+                solver = PyKLU.Klu(A)
+                dbugprint(verbose, "PyKLU succeeded")
+            except (ModuleNotFoundError, RuntimeError) as e:
+                dbugprint(verbose, "PyKLU failed. " \
+                                   "Falling back to scipy.splu \n"
+                                  f"Encountered error: {e}")
+                
+                solver = scipy.sparse.linalg.splu(A,**solverKwargs)
+        elif force_solver == "scipySLU":
+            solver = scipy.sparse.linalg.splu(A,**solverKwargs)
+        elif force_solver == "PyKLU":
+            import PyKLU
+            solver = PyKLU.Klu(A)
+        else:
+            raise ValueError("Unrecognized CPU Sparse solver. Available options: "
+                             "scipySLU, PyKLU")
+        
+    elif context == "cupy" or isinstance(context, ContextCupy):
+        if not _cupy_available:
+            raise ModuleNotFoundError("No cupy module found. " \
+                                      "ContextCupy unavailable")
+        assert isinstance(A ,cupyx.scipy.sparse.spmatrix), (
+                "When using ContextCupy, input must be "
+                "cupyx.scipy.sparse matrix")
+        
+        A = A.tocsr() # GPU solvers require csr format
+        if force_solver is not None and force_solver != "cuDSS":
+            if 'permc_spec' not in solverKwargs:
+                solverKwargs = solverKwargs | {"permc_spec":"MMD_AT_PLUS_A"}
+        if force_solver is None:
+            dbugprint(verbose, "No solver requested. " \
+                               "Picking best solver for Cupy Context")
+            import warnings
+            try:
+                dbugprint(verbose, "Attempting to use cuDSS Solver")
+                from .solvers.CUDA._cuDSSLU import DirectSolverSuperLU
+                solver = DirectSolverSuperLU(A, n_batches = n_batches, **solverKwargs)
+                dbugprint(verbose, "cuDSS succeeded")
+            except (ModuleNotFoundError, RuntimeError) as e:
+                dbugprint(verbose, "cuDSS failed. \n"
+                                    f"Encountered Error: {e}")
+                warnings.warn("cuDSS not available. Performance will be degraded")                               
+                if 'permc_spec' not in solverKwargs:
+                    solverKwargs = solverKwargs | {"permc_spec":"MMD_AT_PLUS_A"}
+                if cusparse.check_availability('csrsm2'):
+                    dbugprint(verbose, "csrsm2 available. Using cupyx.splu solver")
+                    solver = cupyx.scipy.sparse.linalg.splu(A, **solverKwargs)
+                else:
+                    try:
+                        dbugprint(verbose, "csrms2 unavailable. " \
+                                           "Attempting to use CachedSuperLU (spsm)")
+                        from .solvers.CUDA._luLU import luLU
+                        solver = luLU(A, n_batches = n_batches, **solverKwargs)
+                        dbugprint(verbose, "CachedSuperLU succeeded")
+                    except RuntimeError as e:
+                        dbugprint(verbose, "CachedSuperLU failed. \n"
+                                          f"Encountered error: {e} \n"
+                                           "Falling back to cupyx.splu with spsm")
+                        solver = cupyx.scipy.sparse.linalg.splu(A, **solverKwargs)
+        elif force_solver == "cuDSS":
+            from .solvers.CUDA._cuDSSLU import DirectSolverSuperLU
+            solver = DirectSolverSuperLU(A, n_batches = n_batches, **solverKwargs)
+        elif force_solver == "CachedSLU":
+            from .solvers.CUDA._luLU import luLU
+            solver = luLU(A, n_batches = n_batches, **solverKwargs)
+        elif force_solver == "cupySLU":
+            solver = cupyx.scipy.sparse.linalg.splu(A, **solverKwargs)
+        else:
+            raise ValueError("Unrecognized CUDA Sparse solver. Available options: "
+                             "cuDSS, CachedSLU, cupySLU")
+    elif context == "pyopencl" or isinstance(context, ContextPyopencl):
+        raise NotImplementedError("No sparse solver is currently available "
+                                  "for PyOpenCL context")
+    else:
+        raise ValueError("Invalid context. Available contexts are: " \
+                         "cpu, cupy, pyopencl")
+    dbugprint(verbose, "Returning solver: " + str(solver))
+    return solver
+
+def dbugprint(verbose: bool, text: str):
+    if verbose:
+        _print("[xo.sparse] "+text)
+
+def rel_residual(A,x,b):
+    if hasattr(A, "get"):
+        A = A.get()
+    if hasattr(x, "get"):
+        x = x.get()
+    if hasattr(b, "get"):
+        b = b.get()
+    assert scipy.sparse.issparse(A), ("A must be a sparse matrix")
+
+    return npl.norm(A@x - b) / (npl.norm(b))
\ No newline at end of file
diff --git a/xobjects/sparse/solvers/CPU/__init__.py b/xobjects/sparse/solvers/CPU/__init__.py
new file mode 100644
index 0000000..07b2c79
--- /dev/null
+++ b/xobjects/sparse/solvers/CPU/__init__.py
@@ -0,0 +1,11 @@
+from scipy.sparse.linalg import splu as scipysplu
+try:
+    from PyKLU import Klu as KLU
+except (ModuleNotFoundError,ImportError) as e:
+    def KLU(*args, _import_err=e, **kwargs):
+        from ....context import ModuleNotAvailableError
+        raise ModuleNotAvailableError(
+            "KLU is not available. Could not import required backend."
+            ) from _import_err
+
+__all__ = ["scipysplu", "KLU"]
\ No newline at end of file
diff --git a/xobjects/sparse/solvers/CUDA/__init__.py b/xobjects/sparse/solvers/CUDA/__init__.py
new file mode 100644
index 0000000..536fe49
--- /dev/null
+++ b/xobjects/sparse/solvers/CUDA/__init__.py
@@ -0,0 +1,26 @@
+try:
+    from ._cuDSSLU import DirectSolverSuperLU as cuDSS
+except (ModuleNotFoundError,ImportError) as e:
+    def cuDSS(*args, _import_err=e, **kwargs):
+        from ....context import ModuleNotAvailableError
+        raise ModuleNotAvailableError(
+            "cuDSS is not available. Could not import required backend."
+            ) from _import_err
+try:
+    from ._luLU import luLU as cachedSpSM
+except (ModuleNotFoundError,ImportError) as e:
+    def cachedSpSM(*args, _import_err=e, **kwargs):
+        from ....context import ModuleNotAvailableError
+        raise ModuleNotAvailableError(
+            "cachedSpSM is not available. Could not import required backend."
+            ) from _import_err
+try:
+    from cupyx.scipy.sparse.linalg import splu as cupysplu
+except (ModuleNotFoundError,ImportError) as e:
+    def cupysplu(*args, _import_err=e, **kwargs):
+        from ....context import ModuleNotAvailableError
+        raise ModuleNotAvailableError(
+            "cupysplu is not available. Could not import required backend."
+            ) from _import_err
+
+__all__ = ["cuDSS", "cachedSpSM", "cupysplu"]
\ No newline at end of file
diff --git a/xobjects/sparse/solvers/CUDA/_cuDSSLU.py b/xobjects/sparse/solvers/CUDA/_cuDSSLU.py
new file mode 100644
index 0000000..31ded5e
--- /dev/null
+++ b/xobjects/sparse/solvers/CUDA/_cuDSSLU.py
@@ -0,0 +1,106 @@
+# copyright ################################# #
+# This file is part of the Xobjects Package.  #
+# Copyright (c) CERN, 2021.                   #
+# ########################################### #
+
+import cupy as cp
+import cupyx.scipy.sparse as sp
+import nvmath
+from typing import Literal, Union
+import warnings
+
+class DirectSolverSuperLU(nvmath.sparse.advanced.DirectSolver):
+    """cuDSS-based direct solver; reuse factors for many RHS (SuperLU-style)."""
+
+    def __init__(self, A: sp.csr_matrix, *, 
+                 n_batches: int = 0,
+                 assume_general: bool = True, 
+                 **kwargs):
+        # 1) Validate A
+        if not isinstance(A, sp.csr_matrix):
+            raise TypeError("A must be cupyx.scipy.sparse.csr_matrix")
+        if A.shape[0] != A.shape[1]:
+            raise ValueError("A must be square")
+        if A.indices.dtype != cp.int32 or A.indptr.dtype != cp.int32:
+            A = sp.csr_matrix((A.data, 
+                               A.indices.astype(cp.int32), 
+                               A.indptr.astype(cp.int32)),
+                              shape=A.shape)
+        # 2) Sample RHS to initialize parent
+        if n_batches == 0:
+            b_sample = cp.zeros(A.shape[0], dtype=A.dtype)
+        else:
+            b_sample = cp.zeros((A.shape[0],n_batches), 
+                                dtype=A.dtype, order = 'F')
+        self.b_dtype = A.dtype
+        self.b_shape = b_sample.shape
+        super().__init__(A, b_sample, **kwargs)
+
+        # 3) Optional: configure plan
+        pc = self.plan_config
+        if assume_general:
+            # LU path (cuDSS will often infer this, but we can be explicit if available)
+            try:
+                pc.matrix_type = nvmath.sparse.advanced.DirectSolverMatrixType.GENERAL
+            except AttributeError:
+                pass  # older wheels infer type; safe to ignore
+
+        # 4) Build & factorize once; warmup solve to build internal caches
+        super().plan()
+        self.fac_info = super().factorize()
+        # Warmup on zeros (uses parent solve())
+        super().reset_operands(b=b_sample)
+        super().solve()
+
+    def _prepare_rhs(self, b):
+        # Accept 1-D or 2-D; ensure dtype/device and layout
+        if not isinstance(b, cp.ndarray):
+            raise TypeError('b must be cupy.ndarray')
+        if b.dtype != self.b_dtype:
+            raise TypeError(f"RHS dtype {b.dtype} does not match "
+                            f"matrix dtype {self.b_dtype}")
+        if b.ndim == 1:
+            return b  # vector is fine
+        if not b.flags.f_contiguous:
+            warnings.warn("cuDSS expects a column-major ('F' ordered) " \
+                          "matrix for the RHS. RHS was autmatically converted "
+                          "by the solver.")
+        return cp.asfortranarray(b)
+       
+    def solve(self, b):
+        """Solve A x = b using cached factors. Accepts (n,) or (n,k) RHS."""
+        assert b.shape == self.b_shape, (
+            "Cached solver can only accept RHS with the same shape "
+            f"as the initialized value. {self.b_shape}. "
+            "The initialized RHS shape can be changed by initializing "
+            "using a different value for n_batches"
+        )
+        b = self._prepare_rhs(b)
+        super().reset_operands(b=b)
+        return super().solve()
+
+    # Optional helpers for SuperLU-like API
+    def __call__(self, b):  # x = solver(b)
+        return self.solve(b)
+
+    def refactorize_values(self, A_new):
+        """Refactorize when numerical values change but sparsity pattern is the same."""
+        if not isinstance(A_new, sp.csr_matrix):
+            raise TypeError("A_new must be CSR")
+        if A_new.shape != self.operands.a_shape:
+            raise ValueError("Shape mismatch")
+        if A_new.indices.dtype != cp.int32 or A_new.indptr.dtype != cp.int32:
+            A_new = sp.csr_matrix((A_new.data, A_new.indices.astype(cp.int32),
+                                   A_new.indptr.astype(cp.int32)), shape=A_new.shape)
+        super().reset_operands(A=A_new)
+        self.fac_info = self.factorize()
+
+    def close(self):
+        """Explicitly free resources when not using a context manager."""
+        try:
+            self.__exit__(None, None, None)  # DirectSolver is context-manageable
+        except Exception:
+            pass
+        
+    def __dealloc__(self):
+        self.close()
\ No newline at end of file
diff --git a/xobjects/sparse/solvers/CUDA/_luLU.py b/xobjects/sparse/solvers/CUDA/_luLU.py
new file mode 100644
index 0000000..25665ec
--- /dev/null
+++ b/xobjects/sparse/solvers/CUDA/_luLU.py
@@ -0,0 +1,327 @@
+# copyright ################################# #
+# This file is part of the Xobjects Package.  #
+# Copyright (c) CERN, 2021.                   #
+# ########################################### #
+
+import cupy as _cupy
+import numpy as _numpy
+import cupyx.scipy.sparse
+from cupyx.scipy.sparse.linalg import SuperLU
+from cupy_backends.cuda.api import driver as _driver
+from cupy_backends.cuda.api import runtime as _runtime
+from cupy_backends.cuda.libs import cusparse as _cusparse
+from cupy._core import _dtype
+from cupy.cuda import device as _device
+from cupy.cuda import stream as _stream
+from cupyx.cusparse import _dtype_to_IndexType, SpMatDescriptor, DnMatDescriptor, check_availability
+import scipy.sparse.linalg
+try:
+    import scipy.sparse.linalg
+    scipy_available = True
+except ImportError:
+    scipy_available = False
+
+class CachedAbSolver:
+    
+    def __init__(self, a, b, alpha=1.0, lower=True, unit_diag=False, transa=False):
+        if not check_availability('spsm'):
+            raise RuntimeError('spsm is not available.')
+
+        # Canonicalise transa
+        if transa is False:
+            transa = 'N'
+        elif transa is True:
+            transa = 'T'
+        elif transa not in 'NTH':
+            raise ValueError(f'Unknown transa (actual: {transa})')
+
+        # Check A's type and sparse format
+        if cupyx.scipy.sparse.isspmatrix_csr(a):
+            pass
+        elif cupyx.scipy.sparse.isspmatrix_csc(a):
+            if transa == 'N':
+                a = a.T
+                transa = 'T'
+            elif transa == 'T':
+                a = a.T
+                transa = 'N'
+            elif transa == 'H':
+                a = a.conj().T
+                transa = 'N'
+            lower = not lower
+        elif cupyx.scipy.sparse.isspmatrix_coo(a):
+            pass
+        else:
+            raise ValueError('a must be CSR, CSC or COO sparse matrix')
+        assert a.has_canonical_format
+
+        # Check B's ndim
+        if b.ndim == 1:
+            is_b_vector = True
+            b = b.reshape(-1, 1)
+        elif b.ndim == 2:
+            is_b_vector = False
+        else:
+            raise ValueError('b.ndim must be 1 or 2')
+        self.is_b_vector = is_b_vector
+
+        # Check shapes
+        if not (a.shape[0] == a.shape[1] == b.shape[0]):
+            raise ValueError('mismatched shape')
+
+        # Check dtypes
+        dtype = a.dtype
+        if dtype.char not in 'fdFD':
+            raise TypeError('Invalid dtype (actual: {})'.format(dtype))
+        if dtype != b.dtype:
+            raise TypeError('dtype mismatch')
+        self.dtype = dtype
+
+        # Prepare fill mode
+        if lower is True:
+            fill_mode = _cusparse.CUSPARSE_FILL_MODE_LOWER
+        elif lower is False:
+            fill_mode = _cusparse.CUSPARSE_FILL_MODE_UPPER
+        else:
+            raise ValueError('Unknown lower (actual: {})'.format(lower))
+        self.fill_mode = fill_mode
+
+        # Prepare diag type
+        if unit_diag is False:
+            diag_type = _cusparse.CUSPARSE_DIAG_TYPE_NON_UNIT
+        elif unit_diag is True:
+            diag_type = _cusparse.CUSPARSE_DIAG_TYPE_UNIT
+        else:
+            raise ValueError('Unknown unit_diag (actual: {})'.format(unit_diag))
+        self.diag_type = diag_type
+        self.transa = transa
+        # Prepare op_a
+        if transa == 'N':
+            op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE
+        elif transa == 'T':
+            op_a = _cusparse.CUSPARSE_OPERATION_TRANSPOSE
+        else:  # transa == 'H'
+            if dtype.char in 'fd':
+                op_a = _cusparse.CUSPARSE_OPERATION_TRANSPOSE
+            else:
+                op_a = _cusparse.CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE
+        self.op_a = op_a
+        # Prepare op_b
+        self.op_b = self._get_opb(b)
+        
+        # Allocate space for matrix C. Note that it is known cusparseSpSM requires
+        # the output matrix zero initialized.
+        m, _ = a.shape
+        if self.op_b == _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE:
+            _, n = b.shape
+        else:
+            n, _ = b.shape
+        c_shape = m, n
+        self.c_shape = c_shape
+
+        self._perform_analysis(a, b, alpha=alpha)
+
+    def _perform_analysis(self, a, b, alpha=1.0):
+        """Solves a sparse triangular linear system op(a) * x = alpha * op(b).
+
+        Args:
+            a (cupyx.scipy.sparse.csr_matrix or cupyx.scipy.sparse.coo_matrix):
+                Sparse matrix with dimension ``(M, M)``.
+            b (cupy.ndarray): Dense matrix with dimension ``(M, K)``.
+            alpha (float or complex): Coefficient.
+            lower (bool):
+                True: ``a`` is lower triangle matrix.
+                False: ``a`` is upper triangle matrix.
+            unit_diag (bool):
+                True: diagonal part of ``a`` has unit elements.
+                False: diagonal part of ``a`` has non-unit elements.
+            transa (bool or str): True, False, 'N', 'T' or 'H'.
+                'N' or False: op(a) == ``a``.
+                'T' or True: op(a) == ``a.T``.
+                'H': op(a) == ``a.conj().T``.
+        """
+        
+        c = _cupy.zeros(self.c_shape, dtype=a.dtype, order='f')
+
+        # Prepare descriptors and other parameters
+        self.handle = _device.get_cusparse_handle()
+        self.mat_a = SpMatDescriptor.create(a)
+        mat_b = DnMatDescriptor.create(b)
+        mat_c = DnMatDescriptor.create(c)
+        self.spsm_descr = _cusparse.spSM_createDescr()
+        self.alpha = _numpy.array(alpha, dtype=c.dtype).ctypes
+        self.cuda_dtype = _dtype.to_cuda_dtype(c.dtype)
+        self.algo = _cusparse.CUSPARSE_SPSM_ALG_DEFAULT
+
+        try:
+            # Specify Lower|Upper fill mode
+            self.mat_a.set_attribute(_cusparse.CUSPARSE_SPMAT_FILL_MODE, self.fill_mode)
+
+            # Specify Unit|Non-Unit diagonal type
+            self.mat_a.set_attribute(_cusparse.CUSPARSE_SPMAT_DIAG_TYPE, self.diag_type)
+
+            # Allocate the workspace needed by the succeeding phases
+            buff_size = _cusparse.spSM_bufferSize(
+                self.handle, self.op_a, self.op_b, self.alpha.data, self.mat_a.desc, mat_b.desc,
+                mat_c.desc, self.cuda_dtype, self.algo, self.spsm_descr)
+            self.buff = _cupy.empty(buff_size, dtype=_cupy.int8)
+
+            # Perform the analysis phase
+            _cusparse.spSM_analysis(
+                self.handle, self.op_a, self.op_b, self.alpha.data, self.mat_a.desc, mat_b.desc,
+                mat_c.desc, self.cuda_dtype, self.algo, self.spsm_descr, self.buff.data.ptr)
+        except Exception as e:
+            raise RuntimeError('spSM_analysis failed.') from e
+    
+    def _get_opb(self, b):
+        # Prepare op_b
+        if b._f_contiguous:
+            op_b = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE
+        elif b._c_contiguous:
+            if _cusparse.get_build_version() < 11701:  # earlier than CUDA 11.6
+                raise ValueError('b must be F-contiguous.')
+            b = b.T
+            op_b = _cusparse.CUSPARSE_OPERATION_TRANSPOSE
+        else:
+            raise ValueError('b must be F-contiguous or C-contiguous.')
+        return op_b
+    
+    def solve(self, b):
+        assert b.dtype == self.dtype
+        assert self._get_opb(b) == self.op_b
+        
+        if b.ndim == 1:
+            is_b_vector = True
+            b = b.reshape(-1, 1)
+        elif b.ndim == 2:
+            is_b_vector = False
+        else:
+            raise ValueError('b.ndim must be 1 or 2')
+        self.is_b_vector = is_b_vector
+        
+        c = _cupy.zeros(self.c_shape, dtype=self.dtype, order='f')
+        mat_b = DnMatDescriptor.create(b)
+        mat_c = DnMatDescriptor.create(c)
+        try:
+            # Executes the solve phase
+            _cusparse.spSM_solve(
+                self.handle, self.op_a, self.op_b, self.alpha.data, self.mat_a.desc, mat_b.desc,
+                mat_c.desc, self.cuda_dtype, self.algo, self.spsm_descr, self.buff.data.ptr)
+        finally:
+            _cupy.cuda.get_current_stream().synchronize()
+        # Reshape back if B was a vector
+        if self.is_b_vector:
+            c = c.reshape(-1)
+        return c
+
+    # def __del__(self):
+    #     # Destroy matrix descriptor
+    #     print("Deleting solver obj...")
+    #     _cusparse.spSM_destroyDescr(self.spsm_descr)
+
+class luLU(SuperLU):
+
+    def __init__(self, A, 
+                 trans = 'N', 
+                 permc_spec=None, 
+                 n_batches: int = 0, 
+                 diag_pivot_thresh=None, 
+                 relax=None,
+                 panel_size=None, 
+                 options={}):
+        if not check_availability('spsm'):
+            raise RuntimeError('spsm is not available.')
+        if not scipy_available:
+            raise RuntimeError('scipy is not available')
+        if not cupyx.scipy.sparse.isspmatrix(A):
+            raise TypeError('A must be cupyx.scipy.sparse.spmatrix')
+        if A.shape[0] != A.shape[1]:
+            raise ValueError('A must be a square matrix (A.shape: {})'
+                            .format(A.shape))
+        if A.dtype.char not in 'fdFD':
+            raise TypeError('Invalid dtype (actual: {})'.format(A.dtype))
+
+        a = A.get().tocsc()
+        a_slu = scipy.sparse.linalg.splu(
+            a, permc_spec=permc_spec, diag_pivot_thresh=diag_pivot_thresh,
+            relax=relax, panel_size=panel_size, options=options)
+        super().__init__(a_slu)
+        self.b_dtype = self.L.dtype
+        self._init_solvers(trans=trans, n_batches = n_batches)
+    
+    def _init_solvers(self,trans = 'N', n_batches = 0):
+        if n_batches == 0:
+            b_sample = _cupy.zeros(self.shape[0], 
+                                   dtype=self.b_dtype)
+        else:
+            b_sample = _cupy.zeros((self.shape[0],n_batches), 
+                                   dtype=self.b_dtype, order = 'F')
+        self.trans = trans
+        self.b_shape = b_sample.shape
+        self.Lsolver = CachedAbSolver(self.L, b_sample, lower=True, transa=self.trans)
+        self.Usolver = CachedAbSolver(self.U, b_sample, lower=False, transa=self.trans)
+        # self.Usolver = CachedAbSolver(self.U.T, b_sample, lower=True, transa="T") #Can improve performance at times
+    
+    def solve(self, rhs, trans='N'):
+        """Solves linear system of equations with one or several right-hand sides.
+
+        Args:
+            rhs (cupy.ndarray): Right-hand side(s) of equation with dimension
+                ``(M)`` or ``(M, K)``.
+            trans (str): 'N', 'T' or 'H'.
+                'N': Solves ``A * x = rhs``.
+                'T': Solves ``A.T * x = rhs``.
+                'H': Solves ``A.conj().T * x = rhs``.
+
+        Returns:
+            cupy.ndarray:
+                Solution vector(s)
+        """  # NOQA
+        from cupyx import cusparse
+        if trans != self.trans:
+            raise AssertionError("Solve function assumes cached configuration. " \
+            "Rebuild cache by calling _init_solvers with desired configuration.")
+        if not isinstance(rhs, _cupy.ndarray):
+            raise TypeError('ojb must be cupy.ndarray')
+        if rhs.ndim not in (1, 2):
+            raise ValueError('rhs.ndim must be 1 or 2 (actual: {})'.
+                             format(rhs.ndim))
+        if rhs.shape[0] != self.shape[0]:
+            raise ValueError('shape mismatch (self.shape: {}, rhs.shape: {})'
+                             .format(self.shape, rhs.shape))
+        assert rhs.shape == self.b_shape, (
+            "Cached solver can only accept RHS with the same shape "
+            f"as the initialized value. {self.b_shape}. "
+            "The initialized RHS shape can be changed by initializing "
+            "using a different value for n_batches"
+        )
+        if trans not in ('N', 'T', 'H'):
+            raise ValueError('trans must be \'N\', \'T\', or \'H\'')
+
+        x = rhs.astype(self.L.dtype)
+        if trans == 'N':
+            if self.perm_r is not None:
+                if x.ndim == 2 and x._f_contiguous:
+                    x = x.T[:, self._perm_r_rev].T  # want to keep f-order
+                else:
+                    x = x[self._perm_r_rev]
+            x = self.Lsolver.solve(x)
+            x = self.Usolver.solve(x)
+            if self.perm_c is not None:
+                x = x[self.perm_c]
+        else:
+            if self.perm_c is not None:
+                if x.ndim == 2 and x._f_contiguous:
+                    x = x.T[:, self._perm_c_rev].T  # want to keep f-order
+                else:
+                    x = x[self._perm_c_rev]
+            x = self.Usolver.solve(x)
+            x = self.Lsolver.solve(x)
+            if self.perm_r is not None:
+                x = x[self.perm_r]
+
+        if not x._f_contiguous:
+            # For compatibility with SciPy
+            x = x.copy(order='F')
+        return x
+    
\ No newline at end of file
diff --git a/xobjects/sparse/solvers/__init__.py b/xobjects/sparse/solvers/__init__.py
new file mode 100644
index 0000000..3721b50
--- /dev/null
+++ b/xobjects/sparse/solvers/__init__.py
@@ -0,0 +1,3 @@
+from . import CPU
+from . import CUDA
+__all__ = ["CPU", "CUDA"]
diff --git a/xobjects/sparse/solvers/_abstract_solver.py b/xobjects/sparse/solvers/_abstract_solver.py
new file mode 100644
index 0000000..9acfe10
--- /dev/null
+++ b/xobjects/sparse/solvers/_abstract_solver.py
@@ -0,0 +1,16 @@
+# copyright ################################# #
+# This file is part of the Xobjects Package.  #
+# Copyright (c) CERN, 2021.                   #
+# ########################################### #
+
+from abc import ABC, abstractmethod
+
+class SuperLUlikeSolver(ABC):
+
+    @abstractmethod
+    def __init__(self, A):
+        pass
+
+    @abstractmethod
+    def solve(self,b):
+        return x
\ No newline at end of file
diff --git a/xobjects/test_helpers.py b/xobjects/test_helpers.py
index 00d244d..7db897c 100644
--- a/xobjects/test_helpers.py
+++ b/xobjects/test_helpers.py
@@ -98,7 +98,8 @@ def wrapper(*args, **kwargs):
             rng_state = np.random.get_state()
             try:
                 np.random.seed(seed)
-                test_function(*args, **kwargs)
+                #Return value of function instead of None
+                return test_function(*args, **kwargs) 
             finally:
                 np.random.set_state(rng_state)