-
-
Notifications
You must be signed in to change notification settings - Fork 663
Allow processes to occupy more than one slot in the execution semaphore #21960
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b0e8d81
d879caf
b00354b
0801e88
a10c2b5
4d02801
963d336
5e5a0cb
485de73
8b94f22
63e1f80
226b9a2
0f63089
052fbf7
fad38d0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
from collections.abc import Iterable, Mapping | ||
from dataclasses import dataclass, field | ||
from enum import Enum | ||
from typing import Literal | ||
|
||
from pants.engine.engine_aware import SideEffecting | ||
from pants.engine.fs import EMPTY_DIGEST, Digest, FileDigest | ||
|
@@ -50,6 +51,57 @@ class ProcessCacheScope(Enum): | |
PER_SESSION = "per_session" | ||
|
||
|
||
@dataclass(frozen=True) | ||
class ProcessConcurrency: | ||
kind: Literal["exactly", "range", "exclusive"] | ||
min: int | None = None | ||
max: int | None = None | ||
|
||
def __post_init__(self): | ||
if self.min is not None and self.min < 1: | ||
raise ValueError(f"min concurrency must be >= 1, got {self.min}") | ||
if self.max is not None and self.max < 1: | ||
raise ValueError(f"max concurrency must be >= 1, got {self.max}") | ||
if self.min is not None and self.max is not None and self.min > self.max: | ||
raise ValueError( | ||
f"min concurrency must be <= max concurrency, got {self.min} and {self.max}" | ||
) | ||
if self.kind == "exactly" and self.min != self.max: | ||
raise ValueError( | ||
f"exactly concurrency must have min and max equal, got {self.min} and {self.max}" | ||
) | ||
|
||
@staticmethod | ||
def range(max: int, min: int = 1): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor tweaks on the python side since last time @benjyw The ordering of the arguments has been reversed for Iff you use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, this could be a little confusing, but since we raise a ValueError if max < min, it's probably fine. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do you think there's anything we could change to clarify it? Or leave that for a follow up after users are exposed to it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. one idea: make it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, let's leave it for now |
||
"""The amount of parallelism that this process is capable of given its inputs. This value | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. copied from |
||
does not directly set the number of cores allocated to the process: that is computed based | ||
on availability, and provided as a template value in the arguments of the process. | ||
|
||
When set, a `{pants_concurrency}` variable will be templated into the `argv` of the process. | ||
|
||
Processes which set this value may be preempted (i.e. canceled and restarted) for a short | ||
period after starting if available resources have changed (because other processes have | ||
started or finished). | ||
""" | ||
return ProcessConcurrency("range", min, max) | ||
|
||
@staticmethod | ||
def exactly(count: int): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you need a specific number of cores |
||
"""A specific number of cores required to run the process. | ||
|
||
The process will wait until the specified number of cores are available. | ||
""" | ||
return ProcessConcurrency("exactly", count, count) | ||
|
||
@staticmethod | ||
def exclusive(): | ||
"""Exclusive access to all cores. | ||
|
||
No other processes will be scheduled to run while this process is running. | ||
""" | ||
return ProcessConcurrency("exclusive") | ||
|
||
|
||
@dataclass(frozen=True) | ||
class Process: | ||
argv: tuple[str, ...] | ||
|
@@ -67,6 +119,7 @@ class Process: | |
jdk_home: str | None | ||
execution_slot_variable: str | None | ||
concurrency_available: int | ||
concurrency: ProcessConcurrency | None | ||
Comment on lines
121
to
+122
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For now I would check that at most one of them is set, and error if both are. Then we can do a separate deprecation cycle for the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To clarify, that check can be in a followup PR (and deprecation in a future followup after that). It doesn't have to be in this one. |
||
cache_scope: ProcessCacheScope | ||
remote_cache_speculation_delay_millis: int | ||
attempt: int | ||
|
@@ -89,6 +142,7 @@ def __init__( | |
jdk_home: str | None = None, | ||
execution_slot_variable: str | None = None, | ||
concurrency_available: int = 0, | ||
concurrency: ProcessConcurrency | None = None, | ||
cache_scope: ProcessCacheScope = ProcessCacheScope.SUCCESSFUL, | ||
remote_cache_speculation_delay_millis: int = 0, | ||
attempt: int = 0, | ||
|
@@ -146,12 +200,20 @@ def __init__( | |
object.__setattr__(self, "jdk_home", jdk_home) | ||
object.__setattr__(self, "execution_slot_variable", execution_slot_variable) | ||
object.__setattr__(self, "concurrency_available", concurrency_available) | ||
object.__setattr__(self, "concurrency", concurrency) | ||
object.__setattr__(self, "cache_scope", cache_scope) | ||
object.__setattr__( | ||
self, "remote_cache_speculation_delay_millis", remote_cache_speculation_delay_millis | ||
) | ||
object.__setattr__(self, "attempt", attempt) | ||
|
||
def __post_init__(self) -> None: | ||
if self.concurrency_available and self.concurrency: | ||
raise ValueError( | ||
"Cannot specify both concurrency_available and concurrency. " | ||
"Only one concurrency setting may be used at a time." | ||
) | ||
|
||
|
||
@dataclass(frozen=True) | ||
class ProcessWithRetries: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,6 +29,7 @@ | |
InteractiveProcessResult, | ||
Process, | ||
ProcessCacheScope, | ||
ProcessConcurrency, | ||
ProcessResult, | ||
) | ||
from pants.testutil.rule_runner import QueryRule, RuleRunner, mock_console | ||
|
@@ -411,3 +412,102 @@ def test_workspace_execution_support() -> None: | |
assert result3.stderr.decode() == "this-goes-to-stderr\n" | ||
snapshot = rule_runner.request(Snapshot, [result3.output_digest]) | ||
assert snapshot.files == ("capture-this-file",) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"concurrency", | ||
[ | ||
ProcessConcurrency.exactly(1), | ||
ProcessConcurrency.exactly(2), | ||
ProcessConcurrency.exclusive(), | ||
], | ||
) | ||
def test_concurrency(rule_runner: RuleRunner, concurrency: ProcessConcurrency) -> None: | ||
test_description = f"concurrency-test-{concurrency.kind}-{concurrency.min}-{concurrency.max}" | ||
process = Process( | ||
argv=("/bin/echo", test_description), | ||
concurrency=concurrency, | ||
description=test_description, | ||
) | ||
result = rule_runner.request(ProcessResult, [process]) | ||
assert result.stdout.decode() == test_description + "\n" | ||
assert result.stderr == b"" | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"concurrency", | ||
[ | ||
ProcessConcurrency.range(1, min=1), | ||
ProcessConcurrency.range(max=2), | ||
ProcessConcurrency.range(max=2, min=1), | ||
# Values larger than num cores still work (they get clamped to num cores) | ||
ProcessConcurrency.range(max=10000), | ||
ProcessConcurrency.range(min=100, max=200), | ||
], | ||
) | ||
def test_concurrency_range(rule_runner: RuleRunner, concurrency: ProcessConcurrency) -> None: | ||
test_description = f"concurrency-test-{concurrency.kind}-{concurrency.min}-{concurrency.max}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The argv needed to be unique, not just |
||
process = Process( | ||
# range concurrency must be templated with {pants_concurrency} | ||
argv=("/bin/echo", test_description + " {pants_concurrency}"), | ||
concurrency=concurrency, | ||
description=test_description, | ||
) | ||
result = rule_runner.request(ProcessResult, [process]) | ||
assert result.stdout.decode().startswith(test_description) | ||
assert result.stderr == b"" | ||
|
||
|
||
def test_concurrency_templating(rule_runner: RuleRunner) -> None: | ||
process = Process( | ||
argv=("/bin/echo", "concurrency: {pants_concurrency}"), | ||
concurrency=ProcessConcurrency.range(max=1), | ||
description="concurrency-test", | ||
) | ||
result = rule_runner.request(ProcessResult, [process]) | ||
assert result.stdout == b"concurrency: 1\n" | ||
assert result.stderr == b"" | ||
|
||
|
||
def test_concurrency_enum(): | ||
exactly_one = ProcessConcurrency.exactly(1) | ||
min_one = ProcessConcurrency.range(1, min=1) | ||
max_one = ProcessConcurrency.range(max=1) | ||
min_one_max_two = ProcessConcurrency.range(min=1, max=2) | ||
exclusive = ProcessConcurrency.exclusive() | ||
|
||
assert exactly_one.kind == "exactly" | ||
assert exactly_one.min == 1 | ||
assert exactly_one.max == 1 | ||
|
||
up_to_two = ProcessConcurrency.range(2) | ||
assert up_to_two.kind == "range" | ||
assert up_to_two.min == 1 | ||
assert up_to_two.max == 2 | ||
assert up_to_two == min_one_max_two | ||
|
||
assert min_one.kind == "range" | ||
assert max_one.kind == "range" | ||
assert min_one_max_two.kind == "range" | ||
assert exclusive.kind == "exclusive" | ||
|
||
assert min_one.min == 1 | ||
assert min_one.max == 1 | ||
assert max_one.min == 1 | ||
assert max_one.max == 1 | ||
assert min_one_max_two.min == 1 | ||
assert min_one_max_two.max == 2 | ||
assert exclusive.min is None | ||
assert exclusive.max is None | ||
|
||
assert exactly_one == ProcessConcurrency.exactly(1) | ||
assert min_one == ProcessConcurrency.range(1, min=1) | ||
assert max_one == ProcessConcurrency.range(max=1) | ||
assert min_one_max_two == ProcessConcurrency.range(min=1, max=2) | ||
assert exclusive == ProcessConcurrency.exclusive() | ||
assert min_one == max_one | ||
assert min_one != min_one_max_two | ||
assert max_one != min_one_max_two | ||
assert min_one != exclusive | ||
assert max_one != exclusive | ||
assert min_one_max_two != exclusive |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Uh oh!
There was an error while loading. Please reload this page.