Skip to content

Commit dad6d84

Browse files
committed
Harden DataFrameLike for production
* Raise a clear ImportError naming the install command when the optional narwhals package is missing, instead of a bare ModuleNotFoundError (declaration-time fail-fast, matching how DataFrame fails on missing pandas). * Document the serialization asymmetry (backend-neutral records out, pandas in) and that cuDF/Modin are Narwhals-supported but not run in CI (cuDF is GPU-only, Modin's pinned deps conflict with the test environment). * Annotate the inherited in-place ordered defaulting as deliberate DataFrame parity. * Add a skip-guarded Modin test and add narwhals + polars to the type-check environment so pyright validates the Narwhals API rather than skipping an unresolved import.
1 parent 7a8a300 commit dad6d84

3 files changed

Lines changed: 50 additions & 6 deletions

File tree

param/parameters.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3506,10 +3506,17 @@ def _get_narwhals():
35063506
"""Import and return the optional ``narwhals`` dependency.
35073507
35083508
Deferred so ``param`` keeps no hard dependency on ``narwhals`` (the same
3509-
way pandas is deferred for :class:`DataFrame`). Raises ``ModuleNotFoundError``
3510-
naming ``narwhals`` if it is not installed.
3509+
way pandas is deferred for :class:`DataFrame`). Raises a clear
3510+
``ImportError`` naming the feature and the install command if ``narwhals``
3511+
is not available.
35113512
"""
3512-
import narwhals
3513+
try:
3514+
import narwhals
3515+
except ModuleNotFoundError as e:
3516+
raise ImportError(
3517+
"param.DataFrameLike requires the optional 'narwhals' package. "
3518+
"Install it with: pip install narwhals"
3519+
) from e
35133520
return narwhals
35143521

35153522

@@ -3518,14 +3525,18 @@ class DataFrameLike(ClassSelector[t.Any]):
35183525
Parameter whose value is any dataframe-like object that Narwhals recognises.
35193526
35203527
Unlike :class:`DataFrame`, which is restricted to ``pandas.DataFrame``,
3521-
``DataFrameLike`` accepts pandas, Polars, PyArrow, cuDF, Modin and any
3522-
other backend supported by `Narwhals <https://narwhals-dev.github.io>`_.
3528+
``DataFrameLike`` accepts any object supported by
3529+
`Narwhals <https://narwhals-dev.github.io>`_. pandas, Polars and PyArrow
3530+
are exercised in this project's test suite; Modin and cuDF are supported
3531+
through the same Narwhals code path but are not run in CI (Modin's pinned
3532+
dependencies conflict with the test environment and cuDF is GPU-only).
35233533
The value is passed through unchanged, so reading the parameter returns
35243534
the original native object (no Narwhals wrapper). Authors who want a
35253535
backend-agnostic API can call ``narwhals.from_native`` on the value
35263536
themselves.
35273537
3528-
Narwhals is an optional dependency, imported on instantiation. The
3538+
Narwhals is an optional dependency, imported on instantiation; a clear
3539+
``ImportError`` with the install command is raised if it is missing. The
35293540
structure of the frame can be constrained by the rows and columns
35303541
arguments:
35313542
@@ -3544,6 +3555,13 @@ class DataFrameLike(ClassSelector[t.Any]):
35443555
``allow_lazy=True`` to also accept lazy frames (Polars ``LazyFrame``,
35453556
Dask, DuckDB). Row-count validation is skipped for lazy frames so the
35463557
frame is never implicitly collected.
3558+
3559+
Serialization is intentionally backend-neutral: ``serialize`` emits a
3560+
list of records via Narwhals (a lazy frame is collected at this point),
3561+
and ``deserialize`` reconstructs a ``pandas.DataFrame`` because JSON
3562+
carries no backend information. Round-tripping therefore does not
3563+
preserve a non-pandas backend; callers needing another backend can
3564+
rebuild from the records form.
35473565
"""
35483566

35493567
__slots__ = ['rows', 'columns', 'ordered', 'allow_lazy']
@@ -3671,6 +3689,10 @@ def _validate(self, val):
36713689
and all(isinstance(v, (type(None), numbers.Number)) for v in self.columns)): # Numeric bounds tuple
36723690
_length_bounds_check(self, self.columns, len(cols), 'columns')
36733691
elif isinstance(self.columns, (list, set)):
3692+
# Mirrors DataFrame._validate exactly (including this in-place
3693+
# ``ordered`` defaulting) so the two classes behave identically;
3694+
# cleaning up the slot mutation is deferred to a cross-cutting
3695+
# change that touches both.
36743696
self.ordered = isinstance(self.columns, list) if self.ordered is None else self.ordered
36753697
difference = set(self.columns) - {str(el) for el in cols}
36763698
if difference:

pixi.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,10 @@ lint-install = 'pre-commit install'
209209
[feature.type.dependencies]
210210
ty = "==0.0.34"
211211
mypy = "*"
212+
narwhals = "*"
212213
numpy = "*"
213214
pandas = "*"
215+
polars = "*"
214216
pyrefly = "*"
215217
pyright = "*"
216218
IPython = "*"

tests/testdataframelike.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,17 @@
3030
except ModuleNotFoundError:
3131
pa = None
3232

33+
try:
34+
import modin.pandas as mpd
35+
except Exception:
36+
# modin import can fail for reasons beyond ModuleNotFoundError (missing
37+
# execution engine), so guard broadly and skip rather than error.
38+
mpd = None
39+
3340
skip_no_pandas = pytest.mark.skipif(pd is None, reason="pandas not available")
3441
skip_no_polars = pytest.mark.skipif(pl is None, reason="polars not available")
3542
skip_no_pyarrow = pytest.mark.skipif(pa is None, reason="pyarrow not available")
43+
skip_no_modin = pytest.mark.skipif(mpd is None, reason="modin not available")
3644

3745

3846
class TestDataFrameLikeDefaults(unittest.TestCase):
@@ -77,6 +85,18 @@ class P(param.Parameterized):
7785
src = pa.table({'a': [1, 2]})
7886
self.assertIs(P(df=src).df, src)
7987

88+
@skip_no_modin
89+
def test_modin(self):
90+
class P(param.Parameterized):
91+
df = param.DataFrameLike(
92+
default=pd.DataFrame({'a': [1]}), rows=2, columns={'a'})
93+
src = mpd.DataFrame({'a': [1, 2]})
94+
p = P(df=src)
95+
self.assertIs(p.df, src)
96+
self.assertIsInstance(p.df, mpd.DataFrame)
97+
with self.assertRaises(ValueError):
98+
P(df=mpd.DataFrame({'a': [1]})) # rows=2 mismatch
99+
80100

81101
@skip_no_pandas
82102
class TestDataFrameLikeRejects(unittest.TestCase):

0 commit comments

Comments
 (0)