-
Notifications
You must be signed in to change notification settings - Fork 142
fix: unify ColumnNotFound
for duckdb
and pyspark
#2493
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
6f7a574
8fe45e6
45f09e0
6d58bc2
be834b3
19d6e24
f0a9821
c1cabd1
917d073
5d2972d
4080101
8442596
937a123
0544f30
f41c037
bb17703
df18f34
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I split the test into lazy and eager to simplify a bit the if-else statements. I hope it is a bit more readable ? |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ | |
from tests.utils import POLARS_VERSION | ||
from tests.utils import Constructor | ||
from tests.utils import ConstructorEager | ||
from tests.utils import ConstructorLazy | ||
from tests.utils import assert_equal_data | ||
|
||
|
||
|
@@ -83,48 +84,69 @@ def test_comparison_with_list_error_message() -> None: | |
nw.from_native(pd.Series([[1, 2, 3]]), series_only=True) == [1, 2, 3] # noqa: B015 | ||
|
||
|
||
def test_missing_columns( | ||
constructor: Constructor, request: pytest.FixtureRequest | ||
) -> None: | ||
if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): | ||
request.applymarker(pytest.mark.xfail) | ||
def test_missing_columns_eager(constructor_eager: ConstructorEager) -> None: | ||
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]} | ||
df = nw.from_native(constructor(data)) | ||
df = nw.from_native(constructor_eager(data)) | ||
selected_columns = ["a", "e", "f"] | ||
msg = ( | ||
r"The following columns were not found: \[.*\]" | ||
r"\n\nHint: Did you mean one of these columns: \['a', 'b', 'z'\]?" | ||
) | ||
if "polars" in str(constructor): | ||
# In the lazy case, Polars only errors when we call `collect`, | ||
# and we have no way to recover exactly which columns the user | ||
# tried selecting. So, we just emit their message (which varies | ||
# across versions...) | ||
msg = "e|f" | ||
if isinstance(df, nw.LazyFrame): | ||
with pytest.raises(ColumnNotFoundError, match=msg): | ||
df.select(selected_columns).collect() | ||
else: | ||
with pytest.raises(ColumnNotFoundError, match=msg): | ||
df.select(selected_columns) | ||
if POLARS_VERSION >= (1,): | ||
# Old Polars versions wouldn't raise an error | ||
# at all here | ||
if isinstance(df, nw.LazyFrame): | ||
with pytest.raises(ColumnNotFoundError, match=msg): | ||
df.drop(selected_columns, strict=True).collect() | ||
else: | ||
with pytest.raises(ColumnNotFoundError, match=msg): | ||
df.drop(selected_columns, strict=True) | ||
else: # pragma: no cover | ||
pass | ||
with pytest.raises(ColumnNotFoundError, match=msg): | ||
df.select(selected_columns) | ||
if "polars" in str(constructor_eager) and POLARS_VERSION < (1,): # pragma: no cover | ||
# Old Polars versions wouldn't raise an error at all here | ||
pass | ||
else: | ||
with pytest.raises(ColumnNotFoundError, match=msg): | ||
df.select(selected_columns) | ||
with pytest.raises(ColumnNotFoundError, match=msg): | ||
df.drop(selected_columns, strict=True) | ||
if "polars" in str(constructor_eager): | ||
msg = r"\n\nHint: Did you mean one of these columns: \['a', 'b', 'z'\]?" | ||
with pytest.raises(ColumnNotFoundError, match=msg): | ||
df.select(nw.col("fdfa")) | ||
Comment on lines
+105
to
+106
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. before this was not tested for polars |
||
|
||
|
||
def test_missing_columns_lazy( | ||
constructor_lazy: ConstructorLazy, request: pytest.FixtureRequest | ||
) -> None: | ||
constructor_id = str(request.node.callspec.id) | ||
if any(id_ == constructor_id for id_ in ("sqlframe", "pyspark[connect]")): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Do you have an idea on how to deal with these? |
||
# These backend raise errors at collect | ||
request.applymarker(pytest.mark.xfail) | ||
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]} | ||
df = nw.from_native(constructor_lazy(data)) | ||
selected_columns = ["a", "e", "f"] | ||
|
||
def maybe_collect(df: nw.LazyFrame[Any]) -> nw.DataFrame[Any] | nw.LazyFrame[Any]: | ||
if constructor_id == "polars[lazy]": | ||
# In the lazy case, Polars only errors when we call `collect`, | ||
# and we have no way to recover exactly which columns the user | ||
# tried selecting. So, we just emit their message (which varies | ||
# across versions...) | ||
return df.collect() | ||
return df | ||
|
||
if constructor_id == "polars[lazy]": | ||
msg = r"^e|\"(e|f)\"" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Before it was |
||
elif any(id_ == constructor_id for id_ in ("duckdb", "pyspark")): | ||
msg = r"\n\nHint: Did you mean one of these columns: \['a', 'b', 'z'\]?" | ||
else: | ||
msg = ( | ||
r"The following columns were not found: \[.*\]" | ||
r"\n\nHint: Did you mean one of these columns: \['a', 'b', 'z'\]?" | ||
) | ||
with pytest.raises(ColumnNotFoundError, match=msg): | ||
maybe_collect(df.select(selected_columns)) | ||
if constructor_id == "polars[lazy]" and POLARS_VERSION < (1,): # pragma: no cover | ||
# Old Polars versions wouldn't raise an error at all here | ||
pass | ||
else: | ||
with pytest.raises(ColumnNotFoundError, match=msg): | ||
df.select(nw.col("fdfa")) | ||
maybe_collect(df.drop(selected_columns, strict=True)) | ||
if "polars" in str(constructor_lazy): | ||
msg = r"^fdfa" | ||
with pytest.raises(ColumnNotFoundError, match=msg): | ||
maybe_collect(df.select(nw.col("fdfa"))) | ||
|
||
|
||
def test_left_to_right_broadcasting(constructor: Constructor) -> None: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do we risk catching other errors with this?
BinderException
might be a bit broad, shall we also match onstr(e)
before raisingColumnNotFoundError
?we should probably also do this for:
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@MarcoGorelli this has reminded me about an issue with
SparkLikeLazyFrame
narwhals/narwhals/_spark_like/dataframe.py
Lines 152 to 164 in b7001e4
This one is a bigger problem because it captures
CTRL+C
, so you can't easily stop the test suite while it's runningThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point @MarcoGorelli !
Regarding,
simple_select
we are already catching a blindException
:narwhals/narwhals/dataframe.py
Lines 168 to 180 in 2a16835
And should already be caught by the first test:
narwhals/tests/frame/select_test.py
Lines 104 to 109 in 2a16835
Maybe we should have a
_missing_column_exception
property inBaseFrame
that should be specified for each backend. So here we can catch the exact exception here.What do you think? I could do that in a separate PR to see how it looks like
Regarding
filter
andwith_column
, I think we are not testing what happens if we use not existing columns in these methods yet, or am I missing something? I can make another small PR for just this