Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make default module mapping logic consistent and easy to understand #20513

Merged
merged 4 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,50 +5,76 @@
# https://www.python.org/dev/peps/pep-0503/#normalized-names.

import re
from enum import Enum
from functools import partial
from typing import Dict, Iterable, Match, Tuple
from typing import Callable, Dict, List, Match, Tuple


def all_hyphen_to_dot(m: Match[str]) -> str:
"""Convert all hyphens to dots e.g. azure-foo-bar -> azure.foo.bar.
class PackageSeparator(Enum):
DOT = "."
UNDERSCORE = "_"
NONE = ""

>>> all_hyphen_to_dot(re.match(r"^azure-.+", "azure-foo-bar"))

def all_hyphen_to_separator(m: Match[str], separator: PackageSeparator) -> str:
"""Convert all hyphens to a package separator e.g. azure-foo-bar -> azure.foo.bar or
azure_foo_bar.

>>> all_hyphen_to_separator(re.match(r"^azure-.+", "azure-foo-bar"), PackageSeparator.DOT)
'azure.foo.bar'
>>> all_hyphen_to_separator(re.match(r"^azure-.+", "azure-foo-bar"), PackageSeparator.UNDERSCORE)
'azure_foo_bar'
>>> all_hyphen_to_separator(re.match(r"^azure-.+", "azure-foo-bar"), PackageSeparator.NONE)
'azurefoobar'
"""
return m.string.replace("-", ".")
return m.string.replace("-", separator.value)


def first_group_hyphen_to_underscore(m: Match[str]) -> str:
def first_group_hyphen_to_separator(m: Match[str], separator: PackageSeparator) -> str:
"""Convert the first group(regex match group) of hyphens to underscores. Only returns the first
group and must contain at least one group.

>>> first_group_hyphen_to_underscore(re.match(r"^django-((.+(-.+)?))", "django-admin-cursor-paginator"))
>>> first_group_hyphen_to_separator(re.match(r"^django-((.+(-.+)?))", "django-admin-cursor-paginator"), separator=PackageSeparator.UNDERSCORE)
'admin_cursor_paginator'
>>> first_group_hyphen_to_separator(re.match(r"^django-((.+(-.+)?))", "django-admin-cursor-paginator"), separator=PackageSeparator.DOT)
'admin.cursor.paginator'
>>> first_group_hyphen_to_separator(re.match(r"^django-((.+(-.+)?))", "django-admin-cursor-paginator"), separator=PackageSeparator.NONE)
'admincursorpaginator'
"""
if m.re.groups == 0 or not m.groups():
raise ValueError(f"expected at least one group in the pattern{m.re.pattern} but got none.")
return str(m.groups()[0]).replace("-", "_")
return str(m.groups()[0]).replace("-", separator.value)


def two_groups_hyphens_two_replacements_with_suffix(
m: Match[str],
first_group_replacement: str = ".",
second_group_replacement: str = "",
first_group_replacement: PackageSeparator = PackageSeparator.DOT,
second_group_replacement: PackageSeparator = PackageSeparator.NONE,
custom_suffix: str = "",
) -> str:
"""take two groups, and by default, the first will have '-' replaced with '.', the second will
have '-' replaced with '' e.g. google-cloud-foo-bar -> group1(google.cloud.)group2(foobar)

>>> two_groups_hyphens_two_replacements_with_suffix(re.match(r"^(google-cloud-)([^.]+)", "google-cloud-foo-bar"))
'google.cloud.foobar'
>>> two_groups_hyphens_two_replacements_with_suffix(re.match(r"^(google-cloud-)([^.]+)", "google-cloud-foo-bar"), first_group_replacement=PackageSeparator.UNDERSCORE, second_group_replacement=PackageSeparator.DOT)
'google_cloud_foo.bar'
"""
if m.re.groups < 2 or not m.groups():
raise ValueError(f"expected at least two groups in the pattern{m.re.pattern}.")
prefix = m.string[m.start(1) : m.end(1)].replace("-", first_group_replacement)
suffix = m.string[m.start(2) : m.end(2)].replace("-", second_group_replacement)
prefix = m.string[m.start(1) : m.end(1)].replace("-", first_group_replacement.value)
suffix = m.string[m.start(2) : m.end(2)].replace("-", second_group_replacement.value)
return f"{prefix}{suffix}{custom_suffix}"


# common replacement methods
all_hyphen_to_dot = partial(all_hyphen_to_separator, separator=PackageSeparator.DOT)
all_hyphen_to_underscore = partial(all_hyphen_to_separator, separator=PackageSeparator.UNDERSCORE)
first_group_hyphen_to_dot = partial(first_group_hyphen_to_separator, separator=PackageSeparator.DOT)
first_group_hyphen_to_underscore = partial(
first_group_hyphen_to_separator, separator=PackageSeparator.UNDERSCORE
)

"""
A mapping of Patterns and their replacements. will be used with `re.sub`.
The match is either a string or a function`(str) -> str`; that takes a re.Match and returns
Expand All @@ -57,7 +83,7 @@ def two_groups_hyphens_two_replacements_with_suffix(
then if an import in the python code is google.cloud.foo, then the package of
google-cloud-foo will be used.
"""
DEFAULT_MODULE_PATTERN_MAPPING: Dict[re.Pattern, Iterable] = {
DEFAULT_MODULE_PATTERN_MAPPING: Dict[re.Pattern, List[Callable[[Match[str]], str]]] = {
re.compile(r"""^azure-.+"""): [all_hyphen_to_dot],
re.compile(r"""^django-((.+(-.+)?))"""): [first_group_hyphen_to_underscore],
# See https://github.com/googleapis/google-cloud-python#libraries for all Google cloud
Expand All @@ -67,13 +93,16 @@ def two_groups_hyphens_two_replacements_with_suffix(
for custom_suffix in ("", "_v1", "_v2", "_v3")
],
re.compile(r"""^(opentelemetry-instrumentation-)([^.]+)"""): [
partial(two_groups_hyphens_two_replacements_with_suffix, second_group_replacement="_"),
partial(
two_groups_hyphens_two_replacements_with_suffix,
second_group_replacement=PackageSeparator.UNDERSCORE,
),
],
re.compile(r"""^(oslo-.+)"""): [first_group_hyphen_to_underscore],
re.compile(r"""^oslo-.+"""): [all_hyphen_to_underscore],
re.compile(r"""^python-(.+)"""): [first_group_hyphen_to_underscore],
}

DEFAULT_MODULE_MAPPING: Dict[str, Tuple] = {
DEFAULT_MODULE_MAPPING: Dict[str, Tuple[str, ...]] = {
"absl-py": ("absl",),
"acryl-datahub": ("datahub",),
"ansicolors": ("colors",),
Expand Down Expand Up @@ -159,7 +188,14 @@ def two_groups_hyphens_two_replacements_with_suffix(
"websocket-client": ("websocket",),
}

DEFAULT_TYPE_STUB_MODULE_MAPPING = {
DEFAULT_TYPE_STUB_MODULE_PATTERN_MAPPING: Dict[re.Pattern, List[Callable[[Match[str]], str]]] = {
re.compile(r"""^stubs[_-](.+)"""): [first_group_hyphen_to_underscore],
re.compile(r"""^types[_-](.+)"""): [first_group_hyphen_to_underscore],
re.compile(r"""^(.+)[_-]stubs"""): [first_group_hyphen_to_underscore],
re.compile(r"""^(.+)[_-]types"""): [first_group_hyphen_to_underscore],
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these do the work of fallback[6:]/fallback[:6] before

}

DEFAULT_TYPE_STUB_MODULE_MAPPING: Dict[str, Tuple[str, ...]] = {
"djangorestframework-types": ("rest_framework",),
"lark-stubs": ("lark",),
"types-beautifulsoup4": ("bs4",),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
DEFAULT_MODULE_MAPPING,
DEFAULT_MODULE_PATTERN_MAPPING,
DEFAULT_TYPE_STUB_MODULE_MAPPING,
DEFAULT_TYPE_STUB_MODULE_PATTERN_MAPPING,
)
from pants.backend.python.subsystems.setup import PythonSetup
from pants.backend.python.target_types import (
Expand Down Expand Up @@ -233,7 +234,10 @@ class FirstPartyPythonTargetsMappingMarker(FirstPartyPythonMappingImplMarker):
pass


@rule(desc="Creating map of first party Python targets to Python modules", level=LogLevel.DEBUG)
@rule(
desc="Creating map of first party Python targets to Python modules",
level=LogLevel.DEBUG,
)
async def map_first_party_python_targets_to_modules(
_: FirstPartyPythonTargetsMappingMarker,
all_python_targets: AllPythonTargets,
Expand Down Expand Up @@ -312,35 +316,25 @@ def providers_for_module(


@functools.cache
def generate_mappings_from_pattern(proj_name: str) -> Iterable[str]:
"""Generate an iterable of possible module mappings from a project name using a regex pattern.
def generate_mappings_from_pattern(proj_name: str, is_type_stub: bool) -> Tuple[str, ...]:
"""Generate a tuple of possible module mappings from a project name using a regex pattern.

e.g. google-cloud-foo -> [google.cloud.foo, google.cloud.foo_v1, google.cloud.foo_v2]
Should eliminate the need to "manually" add a mapping for every service
proj_name: The project name to generate mappings for e.g google-cloud-datastream
"""
pattern_mappings = (
DEFAULT_TYPE_STUB_MODULE_PATTERN_MAPPING if is_type_stub else DEFAULT_MODULE_PATTERN_MAPPING
)
pattern_values = []
for match_pattern, replace_patterns in DEFAULT_MODULE_PATTERN_MAPPING.items():
for match_pattern, replace_patterns in pattern_mappings.items():
if match_pattern.match(proj_name) is not None:
pattern_values = [
match_pattern.sub(replace_pattern, proj_name)
for replace_pattern in replace_patterns
]
break # stop after the first match in the rare chance that there are multiple matches
return pattern_values


@functools.cache
def generate_mappings(proj_name: str, fallback_value: str) -> Iterable[str]:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not needed anymore; essentially, the only thing that should care about the fallback logic should be the main loop. The rest should just deal with their own domains.

"""Will try the default mapping first and if no mapping is found, try the pattern match.

If those fail, use the fallback_value
"""
return (
DEFAULT_MODULE_MAPPING.get(proj_name)
or generate_mappings_from_pattern(proj_name)
or (fallback_value,)
)
return tuple(pattern_values)


@rule(desc="Creating map of third party targets to Python modules", level=LogLevel.DEBUG)
Expand All @@ -355,23 +349,23 @@ async def map_third_party_modules_to_addresses(
for tgt in all_python_targets.third_party:
resolve = tgt[PythonRequirementResolveField].normalized_value(python_setup)

def add_modules(modules: Iterable[str], *, type_stub: bool = False) -> None:
def add_modules(modules: Iterable[str], *, is_type_stub: bool) -> None:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed default to ensure explicitness of something critical

for module in modules:
resolves_to_modules_to_providers[resolve][module].append(
ModuleProvider(
tgt.address,
ModuleProviderType.TYPE_STUB if type_stub else ModuleProviderType.IMPL,
ModuleProviderType.TYPE_STUB if is_type_stub else ModuleProviderType.IMPL,
)
)

explicit_modules = tgt.get(PythonRequirementModulesField).value
if explicit_modules:
add_modules(explicit_modules)
add_modules(explicit_modules, is_type_stub=False)
continue

explicit_stub_modules = tgt.get(PythonRequirementTypeStubModulesField).value
if explicit_stub_modules:
add_modules(explicit_stub_modules, type_stub=True)
add_modules(explicit_stub_modules, is_type_stub=True)
continue

# Else, fall back to defaults.
Expand All @@ -382,21 +376,24 @@ def add_modules(modules: Iterable[str], *, type_stub: bool = False) -> None:
proj_name = canonicalize_project_name(req.project_name)
fallback_value = req.project_name.strip().lower().replace("-", "_")

in_stubs_map = proj_name in DEFAULT_TYPE_STUB_MODULE_MAPPING
starts_with_prefix = fallback_value.startswith(("types_", "stubs_"))
ends_with_prefix = fallback_value.endswith(("_types", "_stubs"))
if proj_name not in DEFAULT_MODULE_MAPPING and (
in_stubs_map or starts_with_prefix or ends_with_prefix
):
if in_stubs_map:
stub_modules = DEFAULT_TYPE_STUB_MODULE_MAPPING[proj_name]
else:
stub_modules = (
fallback_value[6:] if starts_with_prefix else fallback_value[:-6],
)
add_modules(stub_modules, type_stub=True)
modules_to_add: Tuple[str, ...]
is_type_stub: bool
if proj_name in DEFAULT_MODULE_MAPPING:
modules_to_add = DEFAULT_MODULE_MAPPING[proj_name]
is_type_stub = False
elif proj_name in DEFAULT_TYPE_STUB_MODULE_MAPPING:
modules_to_add = DEFAULT_TYPE_STUB_MODULE_MAPPING[proj_name]
is_type_stub = True
# check for stubs first, since stub packages may also match impl package patterns
elif modules_to_add := generate_mappings_from_pattern(proj_name, is_type_stub=True):
is_type_stub = True
elif modules_to_add := generate_mappings_from_pattern(proj_name, is_type_stub=False):
is_type_stub = False
else:
add_modules(generate_mappings(proj_name, fallback_value))
modules_to_add = (fallback_value,)
is_type_stub = False

add_modules(modules_to_add, is_type_stub=is_type_stub)

return ThirdPartyPythonModuleMapping(
FrozenDict(
Expand Down
Loading
Loading