Skip to content

Commit e839d73

Browse files
authored
perf: simply Specifier regex (#1106)
Examining the specifier regex, two potential optimizations seem worthwhile: 1. Negative lookbehinds can be eliminated if we combine the operator regex with the version regex -- this avoids crawling the same characters of a string forwards and backwards. 2. Do more string operations, like `strip()` rather than having the regex engine do this work. These ideas are somewhat combined, in that getting rid of the lookbehinds only really works by eliminating the group selectors in use. And removing the group selectors in the regex requires that we do more string operations. This also interestingly results in a regex stored in the `Specifier` which matches its secondary usage in `_tokenizer`, which was previously very slightly misaligned (in that the group captures in the regex were unused).
1 parent e3af78f commit e839d73

File tree

2 files changed

+18
-20
lines changed

2 files changed

+18
-20
lines changed

src/packaging/_tokenizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def __str__(self) -> str:
7575
re.VERBOSE,
7676
),
7777
"SPECIFIER": re.compile(
78-
Specifier._operator_regex_str + Specifier._version_regex_str,
78+
Specifier._specifier_regex_str,
7979
re.VERBOSE | re.IGNORECASE,
8080
),
8181
"AT": re.compile(r"\@"),

src/packaging/specifiers.py

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -169,18 +169,15 @@ class Specifier(BaseSpecifier):
169169

170170
__slots__ = ("_prereleases", "_spec", "_spec_version", "_wildcard_split")
171171

172-
_operator_regex_str = r"""
173-
(?P<operator>(~=|==|!=|<=|>=|<|>|===))
174-
"""
175-
_version_regex_str = r"""
176-
(?P<version>
172+
_specifier_regex_str = r"""
173+
(?:
177174
(?:
178175
# The identity operators allow for an escape hatch that will
179176
# do an exact string match of the version you wish to install.
180177
# This will not be parsed by PEP 440 and we cannot determine
181178
# any semantic meaning from it. This operator is discouraged
182179
# but included entirely as an escape hatch.
183-
(?<====) # Only match for the identity operator
180+
=== # Only match for the identity operator
184181
\s*
185182
[^\s;)]* # The arbitrary version can be just about anything,
186183
# we match everything except for whitespace, a
@@ -192,7 +189,7 @@ class Specifier(BaseSpecifier):
192189
# The (non)equality operators allow for wild card and local
193190
# versions to be specified so we have to define these two
194191
# operators separately to enable that.
195-
(?<===|!=) # Only match for equals and not equals
192+
(?:==|!=) # Only match for equals and not equals
196193
197194
\s*
198195
v?
@@ -221,7 +218,7 @@ class Specifier(BaseSpecifier):
221218
(?:
222219
# The compatible operator requires at least two digits in the
223220
# release segment.
224-
(?<=~=) # Only match for the compatible operator
221+
(?:~=) # Only match for the compatible operator
225222
226223
\s*
227224
v?
@@ -244,9 +241,7 @@ class Specifier(BaseSpecifier):
244241
# (non)equality operators do. Specifically they do not allow
245242
# local versions to be specified nor do they allow the prefix
246243
# matching wild cards.
247-
(?<!==|!=|~=) # We have special cases for these
248-
# operators so we want to make sure they
249-
# don't match here.
244+
(?:<=|>=|<|>)
250245
251246
\s*
252247
v?
@@ -267,8 +262,7 @@ class Specifier(BaseSpecifier):
267262
"""
268263

269264
_regex = re.compile(
270-
r"\s*" + _operator_regex_str + _version_regex_str + r"\s*",
271-
re.VERBOSE | re.IGNORECASE,
265+
r"\s*" + _specifier_regex_str + r"\s*", re.VERBOSE | re.IGNORECASE
272266
)
273267

274268
_operators: Final = {
@@ -295,14 +289,18 @@ def __init__(self, spec: str = "", prereleases: bool | None = None) -> None:
295289
:raises InvalidSpecifier:
296290
If the given specifier is invalid (i.e. bad syntax).
297291
"""
298-
match = self._regex.fullmatch(spec)
299-
if not match:
292+
if not self._regex.fullmatch(spec):
300293
raise InvalidSpecifier(f"Invalid specifier: {spec!r}")
301294

302-
self._spec: tuple[str, str] = (
303-
match.group("operator").strip(),
304-
match.group("version").strip(),
305-
)
295+
spec = spec.strip()
296+
if spec.startswith("==="):
297+
operator, version = spec[:3], spec[3:].strip()
298+
elif spec.startswith(("~=", "==", "!=", "<=", ">=")):
299+
operator, version = spec[:2], spec[2:].strip()
300+
else:
301+
operator, version = spec[:1], spec[1:].strip()
302+
303+
self._spec: tuple[str, str] = (operator, version)
306304

307305
# Store whether or not this Specifier should accept prereleases
308306
self._prereleases = prereleases

0 commit comments

Comments
 (0)