From 67dcc224749a0ec24a7d6a861ba05ece612217b2 Mon Sep 17 00:00:00 2001 From: bxff <51504045+bxff@users.noreply.github.com> Date: Sun, 1 Mar 2026 00:19:08 +0530 Subject: [PATCH 1/2] [ruff] Fix false positive for `re.split` with empty string pattern (`RUF055`) Closes #23629 `re.split("", s)` succeeds and returns `['', 'a', 'b', 'c', '']`, but the suggested fix `s.split("")` raises `ValueError: empty separator`. This adds a guard to skip the diagnostic when the separator pattern is an empty string or bytes literal for `re.split` calls. --- .../resources/test/fixtures/ruff/RUF055_0.py | 5 +++++ .../resources/test/fixtures/ruff/RUF055_3.py | 5 ++++- .../ruff/rules/unnecessary_regular_expression.rs | 12 ++++++++++++ ...es__ruff__tests__preview__RUF055_RUF055_0.py.snap | 8 ++++++++ 4 files changed, 29 insertions(+), 1 deletion(-) diff --git a/crates/ruff_linter/resources/test/fixtures/ruff/RUF055_0.py b/crates/ruff_linter/resources/test/fixtures/ruff/RUF055_0.py index 608ea2ef22862c..d56da3e484fa7e 100644 --- a/crates/ruff_linter/resources/test/fixtures/ruff/RUF055_0.py +++ b/crates/ruff_linter/resources/test/fixtures/ruff/RUF055_0.py @@ -98,3 +98,8 @@ def dashrepl(matchobj): re.sub(r'abc', "", s) re.sub(r"""abc""", "", s) re.sub(r'''abc''', "", s) + +# Empty pattern: re.split("", s) should not be flagged because +# str.split("") raises ValueError while re.split("", s) succeeds +re.split("", s) +re.split(r"", s) diff --git a/crates/ruff_linter/resources/test/fixtures/ruff/RUF055_3.py b/crates/ruff_linter/resources/test/fixtures/ruff/RUF055_3.py index 13c93f53de85f5..b69ab635b51d3a 100644 --- a/crates/ruff_linter/resources/test/fixtures/ruff/RUF055_3.py +++ b/crates/ruff_linter/resources/test/fixtures/ruff/RUF055_3.py @@ -21,4 +21,7 @@ re.match(rb"ab[c]", b_src) re.search(rb"ab[c]", b_src) re.fullmatch(rb"ab[c]", b_src) -re.split(rb"ab[c]", b_src) \ No newline at end of file +re.split(rb"ab[c]", b_src) + +# Empty pattern: re.split(rb"", b_src) should not be flagged +re.split(rb"", b_src) \ No newline at end of file diff --git a/crates/ruff_linter/src/rules/ruff/rules/unnecessary_regular_expression.rs b/crates/ruff_linter/src/rules/ruff/rules/unnecessary_regular_expression.rs index 5ccc516ee3de9a..4ee1c9a247faea 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/unnecessary_regular_expression.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/unnecessary_regular_expression.rs @@ -118,6 +118,18 @@ pub(crate) fn unnecessary_regular_expression(checker: &Checker, call: &ExprCall) return; } + // `str.split("")` raises `ValueError: empty separator` while `re.split("", s)` succeeds, + // so skip the diagnostic for `re.split` with an empty pattern. + if matches!(re_func.kind, ReFuncKind::Split) { + let is_empty = match &literal { + Literal::Str(str_lit) => str_lit.value.to_str().is_empty(), + Literal::Bytes(bytes_lit) => bytes_lit.value.iter().all(|part| part.is_empty()), + }; + if is_empty { + return; + } + } + // Now we know the pattern is a string literal with no metacharacters, so // we can proceed with the str method replacement. let new_expr = re_func.replacement(); diff --git a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF055_RUF055_0.py.snap b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF055_RUF055_0.py.snap index c9a75f25c296b4..5a20f5c18f779a 100644 --- a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF055_RUF055_0.py.snap +++ b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF055_RUF055_0.py.snap @@ -243,6 +243,7 @@ help: Replace with `s.replace(r'abc', "")` 98 + s.replace(r'abc', "") 99 | re.sub(r"""abc""", "", s) 100 | re.sub(r'''abc''', "", s) +101 | RUF055 [*] Plain string pattern passed to `re` function --> RUF055_0.py:99:1 @@ -260,6 +261,8 @@ help: Replace with `s.replace(r"""abc""", "")` - re.sub(r"""abc""", "", s) 99 + s.replace(r"""abc""", "") 100 | re.sub(r'''abc''', "", s) +101 | +102 | # Empty pattern: re.split("", s) should not be flagged because RUF055 [*] Plain string pattern passed to `re` function --> RUF055_0.py:100:1 @@ -268,6 +271,8 @@ RUF055 [*] Plain string pattern passed to `re` function 99 | re.sub(r"""abc""", "", s) 100 | re.sub(r'''abc''', "", s) | ^^^^^^^^^^^^^^^^^^^^^^^^^ +101 | +102 | # Empty pattern: re.split("", s) should not be flagged because | help: Replace with `s.replace(r'''abc''', "")` 97 | # these double as tests for preserving raw string quoting style @@ -275,3 +280,6 @@ help: Replace with `s.replace(r'''abc''', "")` 99 | re.sub(r"""abc""", "", s) - re.sub(r'''abc''', "", s) 100 + s.replace(r'''abc''', "") +101 | +102 | # Empty pattern: re.split("", s) should not be flagged because +103 | # str.split("") raises ValueError while re.split("", s) succeeds From 1dbae392ca845c02558f6e7480ccb358f48a13b5 Mon Sep 17 00:00:00 2001 From: bxff <51504045+bxff@users.noreply.github.com> Date: Mon, 2 Mar 2026 22:04:33 +0530 Subject: [PATCH 2/2] refactor: add Literal::is_empty and simplify split check Address review feedback from ntBre: - Add is_empty() method to Literal enum using direct .is_empty() on StringLiteralValue and BytesLiteralValue - Simplify the call site to a single-line condition --- .../rules/unnecessary_regular_expression.rs | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/crates/ruff_linter/src/rules/ruff/rules/unnecessary_regular_expression.rs b/crates/ruff_linter/src/rules/ruff/rules/unnecessary_regular_expression.rs index 4ee1c9a247faea..2600371cf3b29c 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/unnecessary_regular_expression.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/unnecessary_regular_expression.rs @@ -120,14 +120,8 @@ pub(crate) fn unnecessary_regular_expression(checker: &Checker, call: &ExprCall) // `str.split("")` raises `ValueError: empty separator` while `re.split("", s)` succeeds, // so skip the diagnostic for `re.split` with an empty pattern. - if matches!(re_func.kind, ReFuncKind::Split) { - let is_empty = match &literal { - Literal::Str(str_lit) => str_lit.value.to_str().is_empty(), - Literal::Bytes(bytes_lit) => bytes_lit.value.iter().all(|part| part.is_empty()), - }; - if is_empty { - return; - } + if matches!(re_func.kind, ReFuncKind::Split) && literal.is_empty() { + return; } // Now we know the pattern is a string literal with no metacharacters, so @@ -374,6 +368,15 @@ enum Literal<'a> { Bytes(&'a ExprBytesLiteral), } +impl Literal<'_> { + fn is_empty(&self) -> bool { + match self { + Literal::Str(str_lit) => str_lit.value.is_empty(), + Literal::Bytes(bytes_lit) => bytes_lit.value.is_empty(), + } + } +} + /// Try to resolve `name` to either a string or bytes literal in `semantic`. fn resolve_literal<'a>(name: &'a Expr, semantic: &'a SemanticModel) -> Option> { if let Some(str_lit) = resolve_string_literal(name, semantic) {