Skip to content

Commit 1550337

Browse files
committed
make default module mapping logic consistent and easy to understand
1 parent c4e2344 commit 1550337

File tree

3 files changed

+197
-83
lines changed

3 files changed

+197
-83
lines changed

src/python/pants/backend/python/dependency_inference/default_module_mapping.py

+64-13
Original file line numberDiff line numberDiff line change
@@ -5,50 +5,89 @@
55
# https://www.python.org/dev/peps/pep-0503/#normalized-names.
66

77
import re
8+
from enum import Enum
89
from functools import partial
910
from typing import Dict, Iterable, Match, Tuple
1011

1112

12-
def all_hyphen_to_dot(m: Match[str]) -> str:
13-
"""Convert all hyphens to dots e.g. azure-foo-bar -> azure.foo.bar.
13+
class PackageSeparator(Enum):
14+
DOT = "."
15+
UNDERSCORE = "_"
16+
NONE = ""
1417

15-
>>> all_hyphen_to_dot(re.match(r"^azure-.+", "azure-foo-bar"))
18+
19+
def all_hyphen_to_separator(m: Match[str], separator: PackageSeparator) -> str:
20+
"""Convert all hyphens to a package separator e.g. azure-foo-bar -> azure.foo.bar or
21+
azure_foo_bar.
22+
23+
>>> all_hyphen_to_separator(re.match(r"^azure-.+", "azure-foo-bar"), PackageSeparator.DOT)
1624
'azure.foo.bar'
25+
>>> all_hyphen_to_separator(re.match(r"^azure-.+", "azure-foo-bar"), PackageSeparator.UNDERSCORE)
26+
'azure_foo_bar'
27+
>>> all_hyphen_to_separator(re.match(r"^azure-.+", "azure-foo-bar"), PackageSeparator.NONE)
28+
'azurefoobar'
1729
"""
18-
return m.string.replace("-", ".")
30+
return m.string.replace("-", separator.value)
1931

2032

21-
def first_group_hyphen_to_underscore(m: Match[str]) -> str:
33+
def first_group_hyphen_to_separator(m: Match[str], separator: PackageSeparator) -> str:
2234
"""Convert the first group(regex match group) of hyphens to underscores. Only returns the first
2335
group and must contain at least one group.
2436
25-
>>> first_group_hyphen_to_underscore(re.match(r"^django-((.+(-.+)?))", "django-admin-cursor-paginator"))
37+
>>> first_group_hyphen_to_separator(re.match(r"^django-((.+(-.+)?))", "django-admin-cursor-paginator"), separator=PackageSeparator.UNDERSCORE)
2638
'admin_cursor_paginator'
39+
>>> first_group_hyphen_to_separator(re.match(r"^django-((.+(-.+)?))", "django-admin-cursor-paginator"), separator=PackageSeparator.DOT)
40+
'admin.cursor.paginator'
41+
>>> first_group_hyphen_to_separator(re.match(r"^django-((.+(-.+)?))", "django-admin-cursor-paginator"), separator=PackageSeparator.NONE)
42+
'admincursorpaginator'
2743
"""
2844
if m.re.groups == 0 or not m.groups():
2945
raise ValueError(f"expected at least one group in the pattern{m.re.pattern} but got none.")
30-
return str(m.groups()[0]).replace("-", "_")
46+
return str(m.groups()[0]).replace("-", separator.value)
3147

3248

3349
def two_groups_hyphens_two_replacements_with_suffix(
3450
m: Match[str],
35-
first_group_replacement: str = ".",
36-
second_group_replacement: str = "",
51+
first_group_replacement: PackageSeparator = PackageSeparator.DOT,
52+
second_group_replacement: PackageSeparator = PackageSeparator.NONE,
3753
custom_suffix: str = "",
3854
) -> str:
3955
"""take two groups, and by default, the first will have '-' replaced with '.', the second will
4056
have '-' replaced with '' e.g. google-cloud-foo-bar -> group1(google.cloud.)group2(foobar)
4157
4258
>>> two_groups_hyphens_two_replacements_with_suffix(re.match(r"^(google-cloud-)([^.]+)", "google-cloud-foo-bar"))
4359
'google.cloud.foobar'
60+
>>> two_groups_hyphens_two_replacements_with_suffix(re.match(r"^(google-cloud-)([^.]+)", "google-cloud-foo-bar"), first_group_replacement=PackageSeparator.UNDERSCORE, second_group_replacement=PackageSeparator.DOT)
61+
'google_cloud_foo.bar'
4462
"""
4563
if m.re.groups < 2 or not m.groups():
4664
raise ValueError(f"expected at least two groups in the pattern{m.re.pattern}.")
47-
prefix = m.string[m.start(1) : m.end(1)].replace("-", first_group_replacement)
48-
suffix = m.string[m.start(2) : m.end(2)].replace("-", second_group_replacement)
65+
prefix = m.string[m.start(1) : m.end(1)].replace("-", first_group_replacement.value)
66+
suffix = m.string[m.start(2) : m.end(2)].replace("-", second_group_replacement.value)
4967
return f"{prefix}{suffix}{custom_suffix}"
5068

5169

70+
def only_preserve_group(m: Match[str]) -> str:
71+
"""Preseve the first and only group.
72+
73+
>>> only_preserve_group(re.match(r"^prefix_(.+)", "prefix_foo"))
74+
'foo'
75+
>>> only_preserve_group(re.match(r"^(.+)_suffix", "bar_suffix"))
76+
'bar'
77+
"""
78+
if m.re.groups != 1 or not m.groups():
79+
raise ValueError(f"expected exactly one group in the pattern{m.re.pattern}.")
80+
return m.group(1)
81+
82+
83+
# common replacement methods
84+
all_hyphen_to_dot = partial(all_hyphen_to_separator, separator=PackageSeparator.DOT)
85+
all_hyphen_to_underscore = partial(all_hyphen_to_separator, separator=PackageSeparator.UNDERSCORE)
86+
first_group_hyphen_to_dot = partial(first_group_hyphen_to_separator, separator=PackageSeparator.DOT)
87+
first_group_hyphen_to_underscore = partial(
88+
first_group_hyphen_to_separator, separator=PackageSeparator.UNDERSCORE
89+
)
90+
5291
"""
5392
A mapping of Patterns and their replacements. will be used with `re.sub`.
5493
The match is either a string or a function`(str) -> str`; that takes a re.Match and returns
@@ -67,9 +106,13 @@ def two_groups_hyphens_two_replacements_with_suffix(
67106
for custom_suffix in ("", "_v1", "_v2", "_v3")
68107
],
69108
re.compile(r"""^(opentelemetry-instrumentation-)([^.]+)"""): [
70-
partial(two_groups_hyphens_two_replacements_with_suffix, second_group_replacement="_"),
109+
partial(
110+
two_groups_hyphens_two_replacements_with_suffix,
111+
second_group_replacement="_",
112+
),
71113
],
72-
re.compile(r"""^(oslo-.+)"""): [first_group_hyphen_to_underscore],
114+
re.compile(r"""^oslo-.+"""): [all_hyphen_to_underscore],
115+
re.compile(r"""^python-(.+)"""): [first_group_hyphen_to_underscore],
73116
re.compile(r"""^python-(.+)"""): [first_group_hyphen_to_underscore],
74117
}
75118

@@ -159,6 +202,14 @@ def two_groups_hyphens_two_replacements_with_suffix(
159202
"websocket-client": ("websocket",),
160203
}
161204

205+
DEFAULT_TYPE_STUB_MODULE_PATTERN_MAPPING: Dict[re.Pattern, Iterable] = {
206+
re.compile(r"""^mypy-boto3-.+"""): [all_hyphen_to_underscore],
207+
re.compile(r"""^stubs_(.+)"""): [only_preserve_group],
208+
re.compile(r"""^types_(.+)"""): [only_preserve_group],
209+
re.compile(r"""^(.+)_stubs"""): [only_preserve_group],
210+
re.compile(r"""^(.+)_types"""): [only_preserve_group],
211+
}
212+
162213
DEFAULT_TYPE_STUB_MODULE_MAPPING = {
163214
"djangorestframework-types": ("rest_framework",),
164215
"lark-stubs": ("lark",),

src/python/pants/backend/python/dependency_inference/module_mapper.py

+27-32
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
DEFAULT_MODULE_MAPPING,
2121
DEFAULT_MODULE_PATTERN_MAPPING,
2222
DEFAULT_TYPE_STUB_MODULE_MAPPING,
23+
DEFAULT_TYPE_STUB_MODULE_PATTERN_MAPPING,
2324
)
2425
from pants.backend.python.subsystems.setup import PythonSetup
2526
from pants.backend.python.target_types import (
@@ -233,7 +234,10 @@ class FirstPartyPythonTargetsMappingMarker(FirstPartyPythonMappingImplMarker):
233234
pass
234235

235236

236-
@rule(desc="Creating map of first party Python targets to Python modules", level=LogLevel.DEBUG)
237+
@rule(
238+
desc="Creating map of first party Python targets to Python modules",
239+
level=LogLevel.DEBUG,
240+
)
237241
async def map_first_party_python_targets_to_modules(
238242
_: FirstPartyPythonTargetsMappingMarker,
239243
all_python_targets: AllPythonTargets,
@@ -312,15 +316,18 @@ def providers_for_module(
312316

313317

314318
@functools.cache
315-
def generate_mappings_from_pattern(proj_name: str) -> Iterable[str]:
319+
def generate_mappings_from_pattern(proj_name: str, is_type_stub: bool) -> Iterable[str]:
316320
"""Generate an iterable of possible module mappings from a project name using a regex pattern.
317321
318322
e.g. google-cloud-foo -> [google.cloud.foo, google.cloud.foo_v1, google.cloud.foo_v2]
319323
Should eliminate the need to "manually" add a mapping for every service
320324
proj_name: The project name to generate mappings for e.g google-cloud-datastream
321325
"""
326+
pattern_mappings = (
327+
DEFAULT_TYPE_STUB_MODULE_PATTERN_MAPPING if is_type_stub else DEFAULT_MODULE_PATTERN_MAPPING
328+
)
322329
pattern_values = []
323-
for match_pattern, replace_patterns in DEFAULT_MODULE_PATTERN_MAPPING.items():
330+
for match_pattern, replace_patterns in pattern_mappings.items():
324331
if match_pattern.match(proj_name) is not None:
325332
pattern_values = [
326333
match_pattern.sub(replace_pattern, proj_name)
@@ -330,19 +337,6 @@ def generate_mappings_from_pattern(proj_name: str) -> Iterable[str]:
330337
return pattern_values
331338

332339

333-
@functools.cache
334-
def generate_mappings(proj_name: str, fallback_value: str) -> Iterable[str]:
335-
"""Will try the default mapping first and if no mapping is found, try the pattern match.
336-
337-
If those fail, use the fallback_value
338-
"""
339-
return (
340-
DEFAULT_MODULE_MAPPING.get(proj_name)
341-
or generate_mappings_from_pattern(proj_name)
342-
or (fallback_value,)
343-
)
344-
345-
346340
@rule(desc="Creating map of third party targets to Python modules", level=LogLevel.DEBUG)
347341
async def map_third_party_modules_to_addresses(
348342
all_python_targets: AllPythonTargets,
@@ -355,12 +349,12 @@ async def map_third_party_modules_to_addresses(
355349
for tgt in all_python_targets.third_party:
356350
resolve = tgt[PythonRequirementResolveField].normalized_value(python_setup)
357351

358-
def add_modules(modules: Iterable[str], *, type_stub: bool = False) -> None:
352+
def add_modules(modules: Iterable[str], *, is_type_stub: bool = False) -> None:
359353
for module in modules:
360354
resolves_to_modules_to_providers[resolve][module].append(
361355
ModuleProvider(
362356
tgt.address,
363-
ModuleProviderType.TYPE_STUB if type_stub else ModuleProviderType.IMPL,
357+
ModuleProviderType.TYPE_STUB if is_type_stub else ModuleProviderType.IMPL,
364358
)
365359
)
366360

@@ -382,21 +376,22 @@ def add_modules(modules: Iterable[str], *, type_stub: bool = False) -> None:
382376
proj_name = canonicalize_project_name(req.project_name)
383377
fallback_value = req.project_name.strip().lower().replace("-", "_")
384378

385-
in_stubs_map = proj_name in DEFAULT_TYPE_STUB_MODULE_MAPPING
386-
starts_with_prefix = fallback_value.startswith(("types_", "stubs_"))
387-
ends_with_prefix = fallback_value.endswith(("_types", "_stubs"))
388-
if proj_name not in DEFAULT_MODULE_MAPPING and (
389-
in_stubs_map or starts_with_prefix or ends_with_prefix
390-
):
391-
if in_stubs_map:
392-
stub_modules = DEFAULT_TYPE_STUB_MODULE_MAPPING[proj_name]
393-
else:
394-
stub_modules = (
395-
fallback_value[6:] if starts_with_prefix else fallback_value[:-6],
396-
)
397-
add_modules(stub_modules, type_stub=True)
379+
modules_to_add: Tuple[str]
380+
is_type_stub: bool
381+
if proj_name in DEFAULT_MODULE_MAPPING:
382+
modules_to_add = DEFAULT_MODULE_MAPPING[proj_name]
383+
is_type_stub = False
384+
elif proj_name in DEFAULT_TYPE_STUB_MODULE_MAPPING:
385+
modules_to_add = DEFAULT_TYPE_STUB_MODULE_MAPPING[proj_name]
386+
is_type_stub = True
387+
elif modules_to_add := generate_mappings_from_pattern(proj_name, is_type_stub=False):
388+
is_type_stub = False
398389
else:
399-
add_modules(generate_mappings(proj_name, fallback_value))
390+
modules_to_add = generate_mappings_from_pattern(proj_name, is_type_stub=True)
391+
is_type_stub = True
392+
393+
modules_to_add = modules_to_add or (fallback_value,)
394+
add_modules(modules_to_add, is_type_stub=is_type_stub)
400395

401396
return ThirdPartyPythonModuleMapping(
402397
FrozenDict(

0 commit comments

Comments
 (0)