Skip to content

Commit 42ff1dd

Browse files
authored
Make default module mapping logic consistent and easy to understand (#20513)
This was motivated by trying to add some additional module and stub-related mappings. While doing that, I realized the patterns only applied to implementation modules only, and realized that a refactor would be the easiest way forward, while also making the logic easier to step through. I haven't added any of those module mappings in this PR since I wanted this to only be the logic change. ## Changes * Make the replacement functions more generic (allow injecting any separator) * Better type hinting - more complete types for generics, also made return values more specific (return values and variables should be hinted as specifically as possible; functions can choose to accept more generic types as arguments) * Made impl and type package mappings behave identically * The previous logic seemed to have potential for bugs - it does a fallback on each call, has some hackiness around checking for a stubs package, nested conditions, etc. Simplified the module lookup logic. Now it's just: * check if it's in the standard module mapping * else check if it's in the type stub module mapping * else check if the patterns for type stub modules match - this needs to be done first because a stub module might match an implementation module pattern (example, `python-pkg-types`) * else check if the patterns for impl modules match * else use the fallback * Update tests to ensure symmetry between standard modules and type stub modules * Added new test for `test_generate_type_stub_mappings_from_pattern_matches_para`, which is the main new logic here * All other changes created by `pants fmt` ## Testing * Added and modified doctest examples, and `python src/python/pants/backend/python/dependency_inference/default_module_mapping.py` passes * `pants test src/python/pants/backend/python/dependency_inference/::` passes * Ran this build on our monorepo, which uses quite a tangle of every feature here * CI
1 parent 62828b7 commit 42ff1dd

File tree

3 files changed

+240
-109
lines changed

3 files changed

+240
-109
lines changed

src/python/pants/backend/python/dependency_inference/default_module_mapping.py

Lines changed: 53 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,50 +5,76 @@
55
# https://www.python.org/dev/peps/pep-0503/#normalized-names.
66

77
import re
8+
from enum import Enum
89
from functools import partial
9-
from typing import Dict, Iterable, Match, Tuple
10+
from typing import Callable, Dict, List, Match, Tuple
1011

1112

12-
def all_hyphen_to_dot(m: Match[str]) -> str:
13-
"""Convert all hyphens to dots e.g. azure-foo-bar -> azure.foo.bar.
13+
class PackageSeparator(Enum):
14+
DOT = "."
15+
UNDERSCORE = "_"
16+
NONE = ""
1417

15-
>>> all_hyphen_to_dot(re.match(r"^azure-.+", "azure-foo-bar"))
18+
19+
def all_hyphen_to_separator(m: Match[str], separator: PackageSeparator) -> str:
20+
"""Convert all hyphens to a package separator e.g. azure-foo-bar -> azure.foo.bar or
21+
azure_foo_bar.
22+
23+
>>> all_hyphen_to_separator(re.match(r"^azure-.+", "azure-foo-bar"), PackageSeparator.DOT)
1624
'azure.foo.bar'
25+
>>> all_hyphen_to_separator(re.match(r"^azure-.+", "azure-foo-bar"), PackageSeparator.UNDERSCORE)
26+
'azure_foo_bar'
27+
>>> all_hyphen_to_separator(re.match(r"^azure-.+", "azure-foo-bar"), PackageSeparator.NONE)
28+
'azurefoobar'
1729
"""
18-
return m.string.replace("-", ".")
30+
return m.string.replace("-", separator.value)
1931

2032

21-
def first_group_hyphen_to_underscore(m: Match[str]) -> str:
33+
def first_group_hyphen_to_separator(m: Match[str], separator: PackageSeparator) -> str:
2234
"""Convert the first group(regex match group) of hyphens to underscores. Only returns the first
2335
group and must contain at least one group.
2436
25-
>>> first_group_hyphen_to_underscore(re.match(r"^django-((.+(-.+)?))", "django-admin-cursor-paginator"))
37+
>>> first_group_hyphen_to_separator(re.match(r"^django-((.+(-.+)?))", "django-admin-cursor-paginator"), separator=PackageSeparator.UNDERSCORE)
2638
'admin_cursor_paginator'
39+
>>> first_group_hyphen_to_separator(re.match(r"^django-((.+(-.+)?))", "django-admin-cursor-paginator"), separator=PackageSeparator.DOT)
40+
'admin.cursor.paginator'
41+
>>> first_group_hyphen_to_separator(re.match(r"^django-((.+(-.+)?))", "django-admin-cursor-paginator"), separator=PackageSeparator.NONE)
42+
'admincursorpaginator'
2743
"""
2844
if m.re.groups == 0 or not m.groups():
2945
raise ValueError(f"expected at least one group in the pattern{m.re.pattern} but got none.")
30-
return str(m.groups()[0]).replace("-", "_")
46+
return str(m.groups()[0]).replace("-", separator.value)
3147

3248

3349
def two_groups_hyphens_two_replacements_with_suffix(
3450
m: Match[str],
35-
first_group_replacement: str = ".",
36-
second_group_replacement: str = "",
51+
first_group_replacement: PackageSeparator = PackageSeparator.DOT,
52+
second_group_replacement: PackageSeparator = PackageSeparator.NONE,
3753
custom_suffix: str = "",
3854
) -> str:
3955
"""take two groups, and by default, the first will have '-' replaced with '.', the second will
4056
have '-' replaced with '' e.g. google-cloud-foo-bar -> group1(google.cloud.)group2(foobar)
4157
4258
>>> two_groups_hyphens_two_replacements_with_suffix(re.match(r"^(google-cloud-)([^.]+)", "google-cloud-foo-bar"))
4359
'google.cloud.foobar'
60+
>>> two_groups_hyphens_two_replacements_with_suffix(re.match(r"^(google-cloud-)([^.]+)", "google-cloud-foo-bar"), first_group_replacement=PackageSeparator.UNDERSCORE, second_group_replacement=PackageSeparator.DOT)
61+
'google_cloud_foo.bar'
4462
"""
4563
if m.re.groups < 2 or not m.groups():
4664
raise ValueError(f"expected at least two groups in the pattern{m.re.pattern}.")
47-
prefix = m.string[m.start(1) : m.end(1)].replace("-", first_group_replacement)
48-
suffix = m.string[m.start(2) : m.end(2)].replace("-", second_group_replacement)
65+
prefix = m.string[m.start(1) : m.end(1)].replace("-", first_group_replacement.value)
66+
suffix = m.string[m.start(2) : m.end(2)].replace("-", second_group_replacement.value)
4967
return f"{prefix}{suffix}{custom_suffix}"
5068

5169

70+
# common replacement methods
71+
all_hyphen_to_dot = partial(all_hyphen_to_separator, separator=PackageSeparator.DOT)
72+
all_hyphen_to_underscore = partial(all_hyphen_to_separator, separator=PackageSeparator.UNDERSCORE)
73+
first_group_hyphen_to_dot = partial(first_group_hyphen_to_separator, separator=PackageSeparator.DOT)
74+
first_group_hyphen_to_underscore = partial(
75+
first_group_hyphen_to_separator, separator=PackageSeparator.UNDERSCORE
76+
)
77+
5278
"""
5379
A mapping of Patterns and their replacements. will be used with `re.sub`.
5480
The match is either a string or a function`(str) -> str`; that takes a re.Match and returns
@@ -57,7 +83,7 @@ def two_groups_hyphens_two_replacements_with_suffix(
5783
then if an import in the python code is google.cloud.foo, then the package of
5884
google-cloud-foo will be used.
5985
"""
60-
DEFAULT_MODULE_PATTERN_MAPPING: Dict[re.Pattern, Iterable] = {
86+
DEFAULT_MODULE_PATTERN_MAPPING: Dict[re.Pattern, List[Callable[[Match[str]], str]]] = {
6187
re.compile(r"""^azure-.+"""): [all_hyphen_to_dot],
6288
re.compile(r"""^django-((.+(-.+)?))"""): [first_group_hyphen_to_underscore],
6389
# See https://github.com/googleapis/google-cloud-python#libraries for all Google cloud
@@ -67,13 +93,16 @@ def two_groups_hyphens_two_replacements_with_suffix(
6793
for custom_suffix in ("", "_v1", "_v2", "_v3")
6894
],
6995
re.compile(r"""^(opentelemetry-instrumentation-)([^.]+)"""): [
70-
partial(two_groups_hyphens_two_replacements_with_suffix, second_group_replacement="_"),
96+
partial(
97+
two_groups_hyphens_two_replacements_with_suffix,
98+
second_group_replacement=PackageSeparator.UNDERSCORE,
99+
),
71100
],
72-
re.compile(r"""^(oslo-.+)"""): [first_group_hyphen_to_underscore],
101+
re.compile(r"""^oslo-.+"""): [all_hyphen_to_underscore],
73102
re.compile(r"""^python-(.+)"""): [first_group_hyphen_to_underscore],
74103
}
75104

76-
DEFAULT_MODULE_MAPPING: Dict[str, Tuple] = {
105+
DEFAULT_MODULE_MAPPING: Dict[str, Tuple[str, ...]] = {
77106
"absl-py": ("absl",),
78107
"acryl-datahub": ("datahub",),
79108
"ansicolors": ("colors",),
@@ -180,7 +209,14 @@ def two_groups_hyphens_two_replacements_with_suffix(
180209
"websocket-client": ("websocket",),
181210
}
182211

183-
DEFAULT_TYPE_STUB_MODULE_MAPPING = {
212+
DEFAULT_TYPE_STUB_MODULE_PATTERN_MAPPING: Dict[re.Pattern, List[Callable[[Match[str]], str]]] = {
213+
re.compile(r"""^stubs[_-](.+)"""): [first_group_hyphen_to_underscore],
214+
re.compile(r"""^types[_-](.+)"""): [first_group_hyphen_to_underscore],
215+
re.compile(r"""^(.+)[_-]stubs"""): [first_group_hyphen_to_underscore],
216+
re.compile(r"""^(.+)[_-]types"""): [first_group_hyphen_to_underscore],
217+
}
218+
219+
DEFAULT_TYPE_STUB_MODULE_MAPPING: Dict[str, Tuple[str, ...]] = {
184220
"djangorestframework-types": ("rest_framework",),
185221
"lark-stubs": ("lark",),
186222
"types-beautifulsoup4": ("bs4",),

src/python/pants/backend/python/dependency_inference/module_mapper.py

Lines changed: 33 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
DEFAULT_MODULE_MAPPING,
2121
DEFAULT_MODULE_PATTERN_MAPPING,
2222
DEFAULT_TYPE_STUB_MODULE_MAPPING,
23+
DEFAULT_TYPE_STUB_MODULE_PATTERN_MAPPING,
2324
)
2425
from pants.backend.python.subsystems.setup import PythonSetup
2526
from pants.backend.python.target_types import (
@@ -233,7 +234,10 @@ class FirstPartyPythonTargetsMappingMarker(FirstPartyPythonMappingImplMarker):
233234
pass
234235

235236

236-
@rule(desc="Creating map of first party Python targets to Python modules", level=LogLevel.DEBUG)
237+
@rule(
238+
desc="Creating map of first party Python targets to Python modules",
239+
level=LogLevel.DEBUG,
240+
)
237241
async def map_first_party_python_targets_to_modules(
238242
_: FirstPartyPythonTargetsMappingMarker,
239243
all_python_targets: AllPythonTargets,
@@ -312,35 +316,25 @@ def providers_for_module(
312316

313317

314318
@functools.cache
315-
def generate_mappings_from_pattern(proj_name: str) -> Iterable[str]:
316-
"""Generate an iterable of possible module mappings from a project name using a regex pattern.
319+
def generate_mappings_from_pattern(proj_name: str, is_type_stub: bool) -> Tuple[str, ...]:
320+
"""Generate a tuple of possible module mappings from a project name using a regex pattern.
317321
318322
e.g. google-cloud-foo -> [google.cloud.foo, google.cloud.foo_v1, google.cloud.foo_v2]
319323
Should eliminate the need to "manually" add a mapping for every service
320324
proj_name: The project name to generate mappings for e.g google-cloud-datastream
321325
"""
326+
pattern_mappings = (
327+
DEFAULT_TYPE_STUB_MODULE_PATTERN_MAPPING if is_type_stub else DEFAULT_MODULE_PATTERN_MAPPING
328+
)
322329
pattern_values = []
323-
for match_pattern, replace_patterns in DEFAULT_MODULE_PATTERN_MAPPING.items():
330+
for match_pattern, replace_patterns in pattern_mappings.items():
324331
if match_pattern.match(proj_name) is not None:
325332
pattern_values = [
326333
match_pattern.sub(replace_pattern, proj_name)
327334
for replace_pattern in replace_patterns
328335
]
329336
break # stop after the first match in the rare chance that there are multiple matches
330-
return pattern_values
331-
332-
333-
@functools.cache
334-
def generate_mappings(proj_name: str, fallback_value: str) -> Iterable[str]:
335-
"""Will try the default mapping first and if no mapping is found, try the pattern match.
336-
337-
If those fail, use the fallback_value
338-
"""
339-
return (
340-
DEFAULT_MODULE_MAPPING.get(proj_name)
341-
or generate_mappings_from_pattern(proj_name)
342-
or (fallback_value,)
343-
)
337+
return tuple(pattern_values)
344338

345339

346340
@rule(desc="Creating map of third party targets to Python modules", level=LogLevel.DEBUG)
@@ -355,23 +349,23 @@ async def map_third_party_modules_to_addresses(
355349
for tgt in all_python_targets.third_party:
356350
resolve = tgt[PythonRequirementResolveField].normalized_value(python_setup)
357351

358-
def add_modules(modules: Iterable[str], *, type_stub: bool = False) -> None:
352+
def add_modules(modules: Iterable[str], *, is_type_stub: bool) -> None:
359353
for module in modules:
360354
resolves_to_modules_to_providers[resolve][module].append(
361355
ModuleProvider(
362356
tgt.address,
363-
ModuleProviderType.TYPE_STUB if type_stub else ModuleProviderType.IMPL,
357+
ModuleProviderType.TYPE_STUB if is_type_stub else ModuleProviderType.IMPL,
364358
)
365359
)
366360

367361
explicit_modules = tgt.get(PythonRequirementModulesField).value
368362
if explicit_modules:
369-
add_modules(explicit_modules)
363+
add_modules(explicit_modules, is_type_stub=False)
370364
continue
371365

372366
explicit_stub_modules = tgt.get(PythonRequirementTypeStubModulesField).value
373367
if explicit_stub_modules:
374-
add_modules(explicit_stub_modules, type_stub=True)
368+
add_modules(explicit_stub_modules, is_type_stub=True)
375369
continue
376370

377371
# Else, fall back to defaults.
@@ -382,21 +376,24 @@ def add_modules(modules: Iterable[str], *, type_stub: bool = False) -> None:
382376
proj_name = canonicalize_project_name(req.project_name)
383377
fallback_value = req.project_name.strip().lower().replace("-", "_")
384378

385-
in_stubs_map = proj_name in DEFAULT_TYPE_STUB_MODULE_MAPPING
386-
starts_with_prefix = fallback_value.startswith(("types_", "stubs_"))
387-
ends_with_prefix = fallback_value.endswith(("_types", "_stubs"))
388-
if proj_name not in DEFAULT_MODULE_MAPPING and (
389-
in_stubs_map or starts_with_prefix or ends_with_prefix
390-
):
391-
if in_stubs_map:
392-
stub_modules = DEFAULT_TYPE_STUB_MODULE_MAPPING[proj_name]
393-
else:
394-
stub_modules = (
395-
fallback_value[6:] if starts_with_prefix else fallback_value[:-6],
396-
)
397-
add_modules(stub_modules, type_stub=True)
379+
modules_to_add: Tuple[str, ...]
380+
is_type_stub: bool
381+
if proj_name in DEFAULT_MODULE_MAPPING:
382+
modules_to_add = DEFAULT_MODULE_MAPPING[proj_name]
383+
is_type_stub = False
384+
elif proj_name in DEFAULT_TYPE_STUB_MODULE_MAPPING:
385+
modules_to_add = DEFAULT_TYPE_STUB_MODULE_MAPPING[proj_name]
386+
is_type_stub = True
387+
# check for stubs first, since stub packages may also match impl package patterns
388+
elif modules_to_add := generate_mappings_from_pattern(proj_name, is_type_stub=True):
389+
is_type_stub = True
390+
elif modules_to_add := generate_mappings_from_pattern(proj_name, is_type_stub=False):
391+
is_type_stub = False
398392
else:
399-
add_modules(generate_mappings(proj_name, fallback_value))
393+
modules_to_add = (fallback_value,)
394+
is_type_stub = False
395+
396+
add_modules(modules_to_add, is_type_stub=is_type_stub)
400397

401398
return ThirdPartyPythonModuleMapping(
402399
FrozenDict(

0 commit comments

Comments
 (0)