Skip to content

Commit

Permalink
Print reduced pants test ... command to rerun, when on CI (#20747)
Browse files Browse the repository at this point in the history
This augments the existing test summary output to show an appropriate
invocation for rerunning any failed tests. This is controlled by the new
`[test].show_rerun_command` option, which defaults to:

- off for "local" dev: since just rerunning the exact same command will
generally already only rerun the failures, due to caching
- on in CI: since people often won't be sharing a cache with the CI
machines, so rerunning some bulk `pants test ::` command may need to
chug through all the successful tests too

For instance, given three tests, where two fail:

```
✓ //:good-but-slow succeeded in 123.00s (run locally).
✕ //:bad1 failed in 2.00s (run locally).
✕ path/to:bad2 failed in 3.00s (run locally).

To rerun the failing tests, use:

    pants test //:bad1 path/to:bad2
```

If this appears in CI a dev can copy and paste that invocation to just
those two bad tests locally, without having to spend time running
`//:good-but-slow`. Currently, without this suggested invocation, the
dev would have to copy and paste each target line from the individual
summary lines.

With a lot of failures, the line might be very long, but I think that's
okay: it should still be copy-paste-able just fine.

Potentially it'd be good to do this for other goals too (e.g. after
`pants fix ::`, `To fix the problematic files, try: pants fix ...`), but
I'm not sure we have a generic infrastructure that would make this easy,
and usually those goals will be faster than tests.
  • Loading branch information
huonw authored Apr 4, 2024
1 parent e23dc0a commit 7d850cf
Show file tree
Hide file tree
Showing 2 changed files with 168 additions and 30 deletions.
37 changes: 37 additions & 0 deletions src/python/pants/core/goals/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

import itertools
import logging
import os
import shlex
from abc import ABC, ABCMeta
from dataclasses import dataclass, field
from enum import Enum
Expand Down Expand Up @@ -663,6 +665,24 @@ class EnvironmentAware:
),
)

show_rerun_command = BoolOption(
default="CI" in os.environ,
advanced=True,
help=softwrap(
f"""
If tests fail, show an appropriate `{bin_name()} {name} ...` invocation to rerun just
those tests.
This is to make it easy to run those tests on a new machine (for instance, run tests
locally if they fail in CI): caching of successful tests means that rerunning the exact
same command on the same machine will already automatically only rerun the failures.
This defaults to `True` when running in CI (as determined by the `CI` environment
variable being set) but `False` elsewhere.
"""
),
)

def report_dir(self, distdir: DistDir) -> PurePath:
return PurePath(self._report_dir.format(distdir=distdir.relpath))

Expand Down Expand Up @@ -943,6 +963,10 @@ async def run_tests(
f"Wrote extra output from test `{result.addresses[0]}` to `{path_prefix}`."
)

rerun_command = _format_test_rerun_command(results)
if rerun_command and test_subsystem.show_rerun_command:
console.print_stderr(f"\n{rerun_command}")

if test_subsystem.report:
report_dir = test_subsystem.report_dir(distdir)
merged_reports = await Get(
Expand Down Expand Up @@ -1066,6 +1090,19 @@ def _format_test_summary(result: TestResult, run_id: RunId, console: Console) ->
return f"{sigil} {result.description} {status}{attempt_msg} {elapsed_print}{source_desc}."


def _format_test_rerun_command(results: Iterable[TestResult]) -> None | str:
failures = [result for result in results if result.exit_code not in (None, 0)]
if not failures:
return None

# format an invocation like `pants test path/to/first:address path/to/second:address ...`
addresses = sorted(shlex.quote(str(addr)) for result in failures for addr in result.addresses)
goal = f"{bin_name()} {TestSubsystem.name}"
invocation = " ".join([goal, *addresses])

return f"To rerun the failing tests, use:\n\n {invocation}"


@dataclass(frozen=True)
class TestExtraEnv:
env: EnvironmentVars
Expand Down
161 changes: 131 additions & 30 deletions src/python/pants/core/goals/test_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
TestResult,
TestSubsystem,
TestTimeoutField,
_format_test_rerun_command,
_format_test_summary,
build_runtime_package_dependencies,
run_tests,
Expand All @@ -52,6 +53,7 @@
EMPTY_DIGEST,
EMPTY_FILE_DIGEST,
Digest,
FileDigest,
MergeDigests,
Snapshot,
Workspace,
Expand Down Expand Up @@ -111,6 +113,31 @@ def make_process_result_metadata(
)


def make_test_result(
addresses: Iterable[Address],
exit_code: None | int,
stdout_bytes: bytes = b"",
stdout_digest: FileDigest = EMPTY_FILE_DIGEST,
stderr_bytes: bytes = b"",
stderr_digest: FileDigest = EMPTY_FILE_DIGEST,
coverage_data: CoverageData | None = None,
output_setting: ShowOutput = ShowOutput.NONE,
result_metadata: None | ProcessResultMetadata = None,
) -> TestResult:
"""Create a TestResult with default values for most fields."""
return TestResult(
addresses=tuple(addresses),
exit_code=exit_code,
stdout_bytes=stdout_bytes,
stdout_digest=stdout_digest,
stderr_bytes=stderr_bytes,
stderr_digest=stderr_digest,
coverage_data=coverage_data,
output_setting=output_setting,
result_metadata=result_metadata,
)


class MockMultipleSourcesField(MultipleSourcesField):
pass

Expand Down Expand Up @@ -178,13 +205,9 @@ def skipped(_: Iterable[Address]) -> bool:
@classmethod
def test_result(cls, field_sets: Iterable[MockTestFieldSet]) -> TestResult:
addresses = [field_set.address for field_set in field_sets]
return TestResult(
return make_test_result(
addresses,
exit_code=cls.exit_code(addresses),
stdout_bytes=b"",
stdout_digest=EMPTY_FILE_DIGEST,
stderr_bytes=b"",
stderr_digest=EMPTY_FILE_DIGEST,
addresses=tuple(addresses),
coverage_data=MockCoverageData(addresses),
output_setting=ShowOutput.ALL,
result_metadata=None if cls.skipped(addresses) else make_process_result_metadata("ran"),
Expand Down Expand Up @@ -247,6 +270,7 @@ def run_test_rule(
report_dir: str = TestSubsystem.default_report_path,
output: ShowOutput = ShowOutput.ALL,
valid_targets: bool = True,
show_rerun_command: bool = False,
run_id: RunId = RunId(999),
) -> tuple[int, str]:
test_subsystem = create_goal_subsystem(
Expand All @@ -261,6 +285,7 @@ def run_test_rule(
extra_env_vars=[],
shard="",
batch_size=1,
show_rerun_command=show_rerun_command,
)
debug_adapter_subsystem = create_subsystem(
DebugAdapterSubsystem,
Expand Down Expand Up @@ -408,7 +433,39 @@ def test_skipped_target_noops(rule_runner: PythonRuleRunner) -> None:
assert stderr.strip() == ""


def test_summary(rule_runner: PythonRuleRunner) -> None:
@pytest.mark.parametrize(
("show_rerun_command", "expected_stderr"),
[
(
False,
# the summary is for humans, so we test it literally, to make sure the formatting is good
dedent(
"""\
✓ //:good succeeded in 1.00s (memoized).
✕ //:bad failed in 1.00s (memoized).
"""
),
),
(
True,
dedent(
"""\
✓ //:good succeeded in 1.00s (memoized).
✕ //:bad failed in 1.00s (memoized).
To rerun the failing tests, use:
pants test //:bad
"""
),
),
],
)
def test_summary(
rule_runner: PythonRuleRunner, show_rerun_command: bool, expected_stderr: str
) -> None:
good_address = Address("", target_name="good")
bad_address = Address("", target_name="bad")
skipped_address = Address("", target_name="skipped")
Expand All @@ -417,15 +474,10 @@ def test_summary(rule_runner: PythonRuleRunner) -> None:
rule_runner,
request_type=ConditionallySucceedsRequest,
targets=[make_target(good_address), make_target(bad_address), make_target(skipped_address)],
show_rerun_command=show_rerun_command,
)
assert exit_code == ConditionallySucceedsRequest.exit_code((bad_address,))
assert stderr == dedent(
"""\
✓ //:good succeeded in 1.00s (memoized).
✕ //:bad failed in 1.00s (memoized).
"""
)
assert stderr == expected_stderr


def _assert_test_summary(
Expand All @@ -436,15 +488,11 @@ def _assert_test_summary(
result_metadata: ProcessResultMetadata | None,
) -> None:
assert expected == _format_test_summary(
TestResult(
make_test_result(
[Address(spec_path="", target_name="dummy_address")],
exit_code=exit_code,
stdout_bytes=b"",
stderr_bytes=b"",
stdout_digest=EMPTY_FILE_DIGEST,
stderr_digest=EMPTY_FILE_DIGEST,
addresses=(Address(spec_path="", target_name="dummy_address"),),
output_setting=ShowOutput.FAILED,
result_metadata=result_metadata,
output_setting=ShowOutput.FAILED,
),
RunId(run_id),
Console(use_colors=False),
Expand Down Expand Up @@ -493,6 +541,64 @@ def test_format_summary_memoized_remote(rule_runner: PythonRuleRunner) -> None:
)


@pytest.mark.parametrize(
("results", "expected"),
[
pytest.param([], None, id="no_results"),
pytest.param(
[make_test_result([Address("", target_name="t1")], exit_code=0)], None, id="one_success"
),
pytest.param(
[make_test_result([Address("", target_name="t2")], exit_code=None)],
None,
id="one_no_run",
),
pytest.param(
[make_test_result([Address("", target_name="t3")], exit_code=1)],
"To rerun the failing tests, use:\n\n pants test //:t3",
id="one_failure",
),
pytest.param(
[
make_test_result([Address("", target_name="t1")], exit_code=0),
make_test_result([Address("", target_name="t2")], exit_code=None),
make_test_result([Address("", target_name="t3")], exit_code=1),
],
"To rerun the failing tests, use:\n\n pants test //:t3",
id="one_of_each",
),
pytest.param(
[
make_test_result([Address("path/to", target_name="t1")], exit_code=1),
make_test_result([Address("another/path", target_name="t2")], exit_code=2),
make_test_result([Address("", target_name="t3")], exit_code=3),
],
"To rerun the failing tests, use:\n\n pants test //:t3 another/path:t2 path/to:t1",
id="multiple_failures",
),
pytest.param(
[
make_test_result(
[
Address(
"path with spaces",
target_name="$*",
parameters=dict(key="value"),
generated_name="gn",
)
],
exit_code=1,
)
],
"To rerun the failing tests, use:\n\n pants test 'path with spaces:$*#gn@key=value'",
id="special_characters_require_quoting",
),
],
)
def test_format_rerun_command(results: list[TestResult], expected: None | str) -> None:
assert expected == _format_test_rerun_command(results)


def test_debug_target(rule_runner: PythonRuleRunner) -> None:
exit_code, _ = run_test_rule(
rule_runner,
Expand Down Expand Up @@ -597,14 +703,12 @@ def assert_streaming_output(
expected_message: str,
result_metadata: ProcessResultMetadata = make_process_result_metadata("dummy"),
) -> None:
result = TestResult(
result = make_test_result(
addresses=(Address("demo_test"),),
exit_code=exit_code,
stdout_bytes=stdout.encode(),
stdout_digest=EMPTY_FILE_DIGEST,
stderr_bytes=stderr.encode(),
stderr_digest=EMPTY_FILE_DIGEST,
output_setting=output_setting,
addresses=(Address("demo_test"),),
result_metadata=result_metadata,
)
assert result.level() == expected_level
Expand Down Expand Up @@ -720,14 +824,11 @@ def assert_timeout_calculated(


def test_non_utf8_output() -> None:
test_result = TestResult(
test_result = make_test_result(
[],
exit_code=1, # "test error" so stdout/stderr are output in message
stdout_bytes=b"\x80\xBF", # invalid UTF-8 as required by the test
stdout_digest=EMPTY_FILE_DIGEST, # incorrect but we do not check in this test
stderr_bytes=b"\x80\xBF", # invalid UTF-8 as required by the test
stderr_digest=EMPTY_FILE_DIGEST, # incorrect but we do not check in this test
addresses=(),
output_setting=ShowOutput.ALL,
result_metadata=None,
)
assert test_result.message() == "failed (exit code 1).\n��\n��\n\n"

0 comments on commit 7d850cf

Please sign in to comment.