Skip to content

Commit 27c5caa

Browse files
wtnclaude
andcommitted
fix(python): Suggest str.contains for string containment in map_elements
Co-authored-by: Claude <noreply@anthropic.com>
1 parent ca3ae9a commit 27c5caa

File tree

2 files changed

+70
-5
lines changed

2 files changed

+70
-5
lines changed

py-polars/src/polars/_utils/udfs.py

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -696,12 +696,44 @@ def _expr(self, value: StackEntry, col: str, param_name: str, depth: int) -> str
696696
not_ = "" if op == "is" else "not_"
697697
return f"{e1}.is_{not_}null()"
698698
elif op in ("in", "not in"):
699-
not_ = "" if op == "in" else "~"
700-
return (
701-
f"{not_}({e1}.is_in({e2}))"
702-
if " " in e1
703-
else f"{not_}{e1}.is_in({e2})"
699+
e2_stripped = e2.lstrip()
700+
is_collection_literal = e2_stripped.startswith(
701+
("(", "[", "{", "frozenset(")
704702
)
703+
704+
is_collection_variable = False
705+
if not is_collection_literal and not e2.startswith(
706+
("pl.col(", "'")
707+
):
708+
if not self._caller_variables:
709+
self._caller_variables = _get_all_caller_variables()
710+
var_value = self._caller_variables.get(e2)
711+
if isinstance(var_value, (list, tuple, set, frozenset)):
712+
is_collection_variable = True
713+
714+
if is_collection_literal or is_collection_variable:
715+
not_ = "" if op == "in" else "~"
716+
return (
717+
f"{not_}({e1}.is_in({e2}))"
718+
if " " in e1
719+
else f"{not_}{e1}.is_in({e2})"
720+
)
721+
else:
722+
e2_is_col = e2.startswith("pl.col(")
723+
e1_is_col = e1.startswith("pl.col(")
724+
725+
if e2_is_col:
726+
needle = f"pl.lit({e1})" if not e1_is_col else e1
727+
haystack = e2
728+
else:
729+
needle = e1
730+
haystack = f"pl.lit({e2})"
731+
732+
contains_expr = f"{haystack}.str.contains({needle})"
733+
734+
if op == "not in":
735+
return f"~{contains_expr}"
736+
return contains_expr
705737
elif op == "replace_strict":
706738
if not self._caller_variables:
707739
self._caller_variables = _get_all_caller_variables()

py-polars/tests/unit/operations/map/test_inefficient_map_warning.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@
2121
MY_CONSTANT = 3
2222
MY_DICT = {0: "a", 1: "b", 2: "c", 3: "d", 4: "e"}
2323
MY_LIST = [1, 2, 3]
24+
MY_STRING = "qwerty"
25+
MY_SUBSTRING = "we"
26+
MY_COLLECTION = [2, 3, 4]
2427

2528
# column_name, function, expected_suggestion
2629
TEST_CASES = [
@@ -67,12 +70,39 @@
6770
),
6871
("a", "lambda x: x in (2, 3, 4)", 'pl.col("a").is_in((2, 3, 4))', None),
6972
("a", "lambda x: x not in (2, 3, 4)", '~pl.col("a").is_in((2, 3, 4))', None),
73+
("a", "lambda x: x in MY_COLLECTION", 'pl.col("a").is_in(MY_COLLECTION)', None),
7074
(
7175
"a",
7276
"lambda x: x in (1, 2, 3, 4, 3) and x % 2 == 0 and x > 0",
7377
'pl.col("a").is_in((1, 2, 3, 4, 3)) & ((pl.col("a") % 2) == 0) & (pl.col("a") > 0)',
7478
None,
7579
),
80+
# ---------------------------------------------
81+
# string containment with 'in' operator
82+
# ---------------------------------------------
83+
(
84+
"b",
85+
"lambda x: x in MY_STRING",
86+
'pl.lit(MY_STRING).str.contains(pl.col("b"))',
87+
None,
88+
),
89+
(
90+
"b",
91+
"lambda x: MY_SUBSTRING in x",
92+
'pl.col("b").str.contains(pl.lit(MY_SUBSTRING))',
93+
None,
94+
),
95+
("b", 'lambda x: "A" in x', 'pl.col("b").str.contains(pl.lit("A"))', None),
96+
(
97+
"b",
98+
"lambda x: x not in MY_STRING",
99+
'~pl.lit(MY_STRING).str.contains(pl.col("b"))',
100+
None,
101+
),
102+
("b", "lambda x: x in x", 'pl.col("b").str.contains(pl.col("b"))', None),
103+
# ---------------------------------------------
104+
# constants
105+
# ---------------------------------------------
76106
("a", "lambda x: MY_CONSTANT + x", 'MY_CONSTANT + pl.col("a")', None),
77107
(
78108
"a",
@@ -310,6 +340,9 @@
310340
"MY_CONSTANT": MY_CONSTANT,
311341
"MY_DICT": MY_DICT,
312342
"MY_LIST": MY_LIST,
343+
"MY_STRING": MY_STRING,
344+
"MY_SUBSTRING": MY_SUBSTRING,
345+
"MY_COLLECTION": MY_COLLECTION,
313346
"cosh": cosh,
314347
"datetime": datetime,
315348
"dt": dt,

0 commit comments

Comments
 (0)