Skip to content

Commit a3711d1

Browse files
pythongh-124130: Fix a bug in matching regular expression \B in empty string (pythonGH-127007)
1 parent 8d16919 commit a3711d1

File tree

5 files changed

+15
-25
lines changed

5 files changed

+15
-25
lines changed

Doc/library/re.rst

+2-5
Original file line numberDiff line numberDiff line change
@@ -572,11 +572,8 @@ character ``'$'``.
572572
Word boundaries are determined by the current locale
573573
if the :py:const:`~re.LOCALE` flag is used.
574574

575-
.. note::
576-
577-
Note that ``\B`` does not match an empty string, which differs from
578-
RE implementations in other programming languages such as Perl.
579-
This behavior is kept for compatibility reasons.
575+
.. versionchanged:: next
576+
``\B`` now matches empty input string.
580577

581578
.. index:: single: \d; in regular expressions
582579

Doc/whatsnew/3.14.rst

+4
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,10 @@ Other language changes
245245
making it a :term:`generic type`.
246246
(Contributed by Brian Schubert in :gh:`126012`.)
247247

248+
* ``\B`` in :mod:`regular expression <re>` now matches empty input string.
249+
Now it is always the opposite of ``\b``.
250+
(Contributed by Serhiy Storchaka in :gh:`124130`.)
251+
248252
* iOS and macOS apps can now be configured to redirect ``stdout`` and
249253
``stderr`` content to the system log. (Contributed by Russell Keith-Magee in
250254
:gh:`127592`.)

Lib/test/test_re.py

+5-8
Original file line numberDiff line numberDiff line change
@@ -978,18 +978,15 @@ def test_word_boundaries(self):
978978
self.assertIsNone(re.fullmatch(br".+\B", b"abc", re.LOCALE))
979979
self.assertIsNone(re.fullmatch(r".+\B", "ьюя"))
980980
self.assertTrue(re.fullmatch(r".+\B", "ьюя", re.ASCII))
981-
# However, an empty string contains no word boundaries, and also no
982-
# non-boundaries.
981+
# However, an empty string contains no word boundaries.
983982
self.assertIsNone(re.search(r"\b", ""))
984983
self.assertIsNone(re.search(r"\b", "", re.ASCII))
985984
self.assertIsNone(re.search(br"\b", b""))
986985
self.assertIsNone(re.search(br"\b", b"", re.LOCALE))
987-
# This one is questionable and different from the perlre behaviour,
988-
# but describes current behavior.
989-
self.assertIsNone(re.search(r"\B", ""))
990-
self.assertIsNone(re.search(r"\B", "", re.ASCII))
991-
self.assertIsNone(re.search(br"\B", b""))
992-
self.assertIsNone(re.search(br"\B", b"", re.LOCALE))
986+
self.assertTrue(re.search(r"\B", ""))
987+
self.assertTrue(re.search(r"\B", "", re.ASCII))
988+
self.assertTrue(re.search(br"\B", b""))
989+
self.assertTrue(re.search(br"\B", b"", re.LOCALE))
993990
# A single word-character string has two boundaries, but no
994991
# non-boundary gaps.
995992
self.assertEqual(len(re.findall(r"\b", "a")), 2)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix a bug in matching regular expression ``\B`` in empty input string.
2+
Now it is always the opposite of ``\b``.
3+
To get an old behavior, use ``(?!\A\Z)\B``.
4+
To get a new behavior in old Python versions, use ``(?!\b)``.

Modules/_sre/sre_lib.h

-12
Original file line numberDiff line numberDiff line change
@@ -42,53 +42,41 @@ SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
4242
return ((void*) ptr == state->end);
4343

4444
case SRE_AT_BOUNDARY:
45-
if (state->beginning == state->end)
46-
return 0;
4745
thatp = ((void*) ptr > state->beginning) ?
4846
SRE_IS_WORD((int) ptr[-1]) : 0;
4947
thisp = ((void*) ptr < state->end) ?
5048
SRE_IS_WORD((int) ptr[0]) : 0;
5149
return thisp != thatp;
5250

5351
case SRE_AT_NON_BOUNDARY:
54-
if (state->beginning == state->end)
55-
return 0;
5652
thatp = ((void*) ptr > state->beginning) ?
5753
SRE_IS_WORD((int) ptr[-1]) : 0;
5854
thisp = ((void*) ptr < state->end) ?
5955
SRE_IS_WORD((int) ptr[0]) : 0;
6056
return thisp == thatp;
6157

6258
case SRE_AT_LOC_BOUNDARY:
63-
if (state->beginning == state->end)
64-
return 0;
6559
thatp = ((void*) ptr > state->beginning) ?
6660
SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
6761
thisp = ((void*) ptr < state->end) ?
6862
SRE_LOC_IS_WORD((int) ptr[0]) : 0;
6963
return thisp != thatp;
7064

7165
case SRE_AT_LOC_NON_BOUNDARY:
72-
if (state->beginning == state->end)
73-
return 0;
7466
thatp = ((void*) ptr > state->beginning) ?
7567
SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
7668
thisp = ((void*) ptr < state->end) ?
7769
SRE_LOC_IS_WORD((int) ptr[0]) : 0;
7870
return thisp == thatp;
7971

8072
case SRE_AT_UNI_BOUNDARY:
81-
if (state->beginning == state->end)
82-
return 0;
8373
thatp = ((void*) ptr > state->beginning) ?
8474
SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
8575
thisp = ((void*) ptr < state->end) ?
8676
SRE_UNI_IS_WORD((int) ptr[0]) : 0;
8777
return thisp != thatp;
8878

8979
case SRE_AT_UNI_NON_BOUNDARY:
90-
if (state->beginning == state->end)
91-
return 0;
9280
thatp = ((void*) ptr > state->beginning) ?
9381
SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
9482
thisp = ((void*) ptr < state->end) ?

0 commit comments

Comments
 (0)