Print reduced pants test ... command to rerun, when on CI (#20747)

huonw · web-flow · commit 7d850cf5c06f · 2024-04-05T07:55:27.000+11:00
This augments the existing test summary output to show an appropriate
invocation for rerunning any failed tests. This is controlled by the new
`[test].show_rerun_command` option, which defaults to:

- off for "local" dev: since just rerunning the exact same command will
generally already only rerun the failures, due to caching
- on in CI: since people often won't be sharing a cache with the CI
machines, so rerunning some bulk `pants test ::` command may need to
chug through all the successful tests too

For instance, given three tests, where two fail:

```
✓ //:good-but-slow succeeded in 123.00s (run locally).
✕ //:bad1 failed in 2.00s (run locally).
✕ path/to:bad2 failed in 3.00s (run locally).

To rerun the failing tests, use:

    pants test //:bad1 path/to:bad2
```

If this appears in CI a dev can copy and paste that invocation to just
those two bad tests locally, without having to spend time running
`//:good-but-slow`. Currently, without this suggested invocation, the
dev would have to copy and paste each target line from the individual
summary lines.

With a lot of failures, the line might be very long, but I think that's
okay: it should still be copy-paste-able just fine.

Potentially it'd be good to do this for other goals too (e.g. after
`pants fix ::`, `To fix the problematic files, try: pants fix ...`), but
I'm not sure we have a generic infrastructure that would make this easy,
and usually those goals will be faster than tests.
diff --git a/src/python/pants/core/goals/test.py b/src/python/pants/core/goals/test.py
@@ -5,6 +5,8 @@
 
 import itertools
 import logging
+import os
+import shlex
 from abc import ABC, ABCMeta
 from dataclasses import dataclass, field
 from enum import Enum
@@ -663,6 +665,24 @@ class EnvironmentAware:
         ),
     )
 
+    show_rerun_command = BoolOption(
+        default="CI" in os.environ,
+        advanced=True,
+        help=softwrap(
+            f"""
+            If tests fail, show an appropriate `{bin_name()} {name} ...` invocation to rerun just
+            those tests.
+
+            This is to make it easy to run those tests on a new machine (for instance, run tests
+            locally if they fail in CI): caching of successful tests means that rerunning the exact
+            same command on the same machine will already automatically only rerun the failures.
+
+            This defaults to `True` when running in CI (as determined by the `CI` environment
+            variable being set) but `False` elsewhere.
+            """
+        ),
+    )
+
     def report_dir(self, distdir: DistDir) -> PurePath:
         return PurePath(self._report_dir.format(distdir=distdir.relpath))
 
@@ -943,6 +963,10 @@ async def run_tests(
                     f"Wrote extra output from test `{result.addresses[0]}` to `{path_prefix}`."
                 )
 
+    rerun_command = _format_test_rerun_command(results)
+    if rerun_command and test_subsystem.show_rerun_command:
+        console.print_stderr(f"\n{rerun_command}")
+
     if test_subsystem.report:
         report_dir = test_subsystem.report_dir(distdir)
         merged_reports = await Get(
@@ -1066,6 +1090,19 @@ def _format_test_summary(result: TestResult, run_id: RunId, console: Console) ->
     return f"{sigil} {result.description} {status}{attempt_msg} {elapsed_print}{source_desc}."
 
 
+def _format_test_rerun_command(results: Iterable[TestResult]) -> None | str:
+    failures = [result for result in results if result.exit_code not in (None, 0)]
+    if not failures:
+        return None
+
+    # format an invocation like `pants test path/to/first:address path/to/second:address ...`
+    addresses = sorted(shlex.quote(str(addr)) for result in failures for addr in result.addresses)
+    goal = f"{bin_name()} {TestSubsystem.name}"
+    invocation = " ".join([goal, *addresses])
+
+    return f"To rerun the failing tests, use:\n\n    {invocation}"
+
+
 @dataclass(frozen=True)
 class TestExtraEnv:
     env: EnvironmentVars
diff --git a/src/python/pants/core/goals/test_test.py b/src/python/pants/core/goals/test_test.py
@@ -33,6 +33,7 @@
     TestResult,
     TestSubsystem,
     TestTimeoutField,
+    _format_test_rerun_command,
     _format_test_summary,
     build_runtime_package_dependencies,
     run_tests,
@@ -52,6 +53,7 @@
     EMPTY_DIGEST,
     EMPTY_FILE_DIGEST,
     Digest,
+    FileDigest,
     MergeDigests,
     Snapshot,
     Workspace,
@@ -111,6 +113,31 @@ def make_process_result_metadata(
     )
 
 
+def make_test_result(
+    addresses: Iterable[Address],
+    exit_code: None | int,
+    stdout_bytes: bytes = b"",
+    stdout_digest: FileDigest = EMPTY_FILE_DIGEST,
+    stderr_bytes: bytes = b"",
+    stderr_digest: FileDigest = EMPTY_FILE_DIGEST,
+    coverage_data: CoverageData | None = None,
+    output_setting: ShowOutput = ShowOutput.NONE,
+    result_metadata: None | ProcessResultMetadata = None,
+) -> TestResult:
+    """Create a TestResult with default values for most fields."""
+    return TestResult(
+        addresses=tuple(addresses),
+        exit_code=exit_code,
+        stdout_bytes=stdout_bytes,
+        stdout_digest=stdout_digest,
+        stderr_bytes=stderr_bytes,
+        stderr_digest=stderr_digest,
+        coverage_data=coverage_data,
+        output_setting=output_setting,
+        result_metadata=result_metadata,
+    )
+
+
 class MockMultipleSourcesField(MultipleSourcesField):
     pass
 
@@ -178,13 +205,9 @@ def skipped(_: Iterable[Address]) -> bool:
     @classmethod
     def test_result(cls, field_sets: Iterable[MockTestFieldSet]) -> TestResult:
         addresses = [field_set.address for field_set in field_sets]
-        return TestResult(
+        return make_test_result(
+            addresses,
             exit_code=cls.exit_code(addresses),
-            stdout_bytes=b"",
-            stdout_digest=EMPTY_FILE_DIGEST,
-            stderr_bytes=b"",
-            stderr_digest=EMPTY_FILE_DIGEST,
-            addresses=tuple(addresses),
             coverage_data=MockCoverageData(addresses),
             output_setting=ShowOutput.ALL,
             result_metadata=None if cls.skipped(addresses) else make_process_result_metadata("ran"),
@@ -247,6 +270,7 @@ def run_test_rule(
     report_dir: str = TestSubsystem.default_report_path,
     output: ShowOutput = ShowOutput.ALL,
     valid_targets: bool = True,
+    show_rerun_command: bool = False,
     run_id: RunId = RunId(999),
 ) -> tuple[int, str]:
     test_subsystem = create_goal_subsystem(
@@ -261,6 +285,7 @@ def run_test_rule(
         extra_env_vars=[],
         shard="",
         batch_size=1,
+        show_rerun_command=show_rerun_command,
     )
     debug_adapter_subsystem = create_subsystem(
         DebugAdapterSubsystem,
@@ -408,7 +433,39 @@ def test_skipped_target_noops(rule_runner: PythonRuleRunner) -> None:
     assert stderr.strip() == ""
 
 
-def test_summary(rule_runner: PythonRuleRunner) -> None:
+@pytest.mark.parametrize(
+    ("show_rerun_command", "expected_stderr"),
+    [
+        (
+            False,
+            # the summary is for humans, so we test it literally, to make sure the formatting is good
+            dedent(
+                """\
+
+                ✓ //:good succeeded in 1.00s (memoized).
+                ✕ //:bad failed in 1.00s (memoized).
+                """
+            ),
+        ),
+        (
+            True,
+            dedent(
+                """\
+
+                ✓ //:good succeeded in 1.00s (memoized).
+                ✕ //:bad failed in 1.00s (memoized).
+
+                To rerun the failing tests, use:
+
+                    pants test //:bad
+                """
+            ),
+        ),
+    ],
+)
+def test_summary(
+    rule_runner: PythonRuleRunner, show_rerun_command: bool, expected_stderr: str
+) -> None:
     good_address = Address("", target_name="good")
     bad_address = Address("", target_name="bad")
     skipped_address = Address("", target_name="skipped")
@@ -417,15 +474,10 @@ def test_summary(rule_runner: PythonRuleRunner) -> None:
         rule_runner,
         request_type=ConditionallySucceedsRequest,
         targets=[make_target(good_address), make_target(bad_address), make_target(skipped_address)],
+        show_rerun_command=show_rerun_command,
     )
     assert exit_code == ConditionallySucceedsRequest.exit_code((bad_address,))
-    assert stderr == dedent(
-        """\
-
-        ✓ //:good succeeded in 1.00s (memoized).
-        ✕ //:bad failed in 1.00s (memoized).
-        """
-    )
+    assert stderr == expected_stderr
 
 
 def _assert_test_summary(
@@ -436,15 +488,11 @@ def _assert_test_summary(
     result_metadata: ProcessResultMetadata | None,
 ) -> None:
     assert expected == _format_test_summary(
-        TestResult(
+        make_test_result(
+            [Address(spec_path="", target_name="dummy_address")],
             exit_code=exit_code,
-            stdout_bytes=b"",
-            stderr_bytes=b"",
-            stdout_digest=EMPTY_FILE_DIGEST,
-            stderr_digest=EMPTY_FILE_DIGEST,
-            addresses=(Address(spec_path="", target_name="dummy_address"),),
-            output_setting=ShowOutput.FAILED,
             result_metadata=result_metadata,
+            output_setting=ShowOutput.FAILED,
         ),
         RunId(run_id),
         Console(use_colors=False),
@@ -493,6 +541,64 @@ def test_format_summary_memoized_remote(rule_runner: PythonRuleRunner) -> None:
     )
 
 
+@pytest.mark.parametrize(
+    ("results", "expected"),
+    [
+        pytest.param([], None, id="no_results"),
+        pytest.param(
+            [make_test_result([Address("", target_name="t1")], exit_code=0)], None, id="one_success"
+        ),
+        pytest.param(
+            [make_test_result([Address("", target_name="t2")], exit_code=None)],
+            None,
+            id="one_no_run",
+        ),
+        pytest.param(
+            [make_test_result([Address("", target_name="t3")], exit_code=1)],
+            "To rerun the failing tests, use:\n\n    pants test //:t3",
+            id="one_failure",
+        ),
+        pytest.param(
+            [
+                make_test_result([Address("", target_name="t1")], exit_code=0),
+                make_test_result([Address("", target_name="t2")], exit_code=None),
+                make_test_result([Address("", target_name="t3")], exit_code=1),
+            ],
+            "To rerun the failing tests, use:\n\n    pants test //:t3",
+            id="one_of_each",
+        ),
+        pytest.param(
+            [
+                make_test_result([Address("path/to", target_name="t1")], exit_code=1),
+                make_test_result([Address("another/path", target_name="t2")], exit_code=2),
+                make_test_result([Address("", target_name="t3")], exit_code=3),
+            ],
+            "To rerun the failing tests, use:\n\n    pants test //:t3 another/path:t2 path/to:t1",
+            id="multiple_failures",
+        ),
+        pytest.param(
+            [
+                make_test_result(
+                    [
+                        Address(
+                            "path with spaces",
+                            target_name="$*",
+                            parameters=dict(key="value"),
+                            generated_name="gn",
+                        )
+                    ],
+                    exit_code=1,
+                )
+            ],
+            "To rerun the failing tests, use:\n\n    pants test 'path with spaces:$*#gn@key=value'",
+            id="special_characters_require_quoting",
+        ),
+    ],
+)
+def test_format_rerun_command(results: list[TestResult], expected: None | str) -> None:
+    assert expected == _format_test_rerun_command(results)
+
+
 def test_debug_target(rule_runner: PythonRuleRunner) -> None:
     exit_code, _ = run_test_rule(
         rule_runner,
@@ -597,14 +703,12 @@ def assert_streaming_output(
     expected_message: str,
     result_metadata: ProcessResultMetadata = make_process_result_metadata("dummy"),
 ) -> None:
-    result = TestResult(
+    result = make_test_result(
+        addresses=(Address("demo_test"),),
         exit_code=exit_code,
         stdout_bytes=stdout.encode(),
-        stdout_digest=EMPTY_FILE_DIGEST,
         stderr_bytes=stderr.encode(),
-        stderr_digest=EMPTY_FILE_DIGEST,
         output_setting=output_setting,
-        addresses=(Address("demo_test"),),
         result_metadata=result_metadata,
     )
     assert result.level() == expected_level
@@ -720,14 +824,11 @@ def assert_timeout_calculated(
 
 
 def test_non_utf8_output() -> None:
-    test_result = TestResult(
+    test_result = make_test_result(
+        [],
         exit_code=1,  # "test error" so stdout/stderr are output in message
         stdout_bytes=b"\x80\xBF",  # invalid UTF-8 as required by the test
-        stdout_digest=EMPTY_FILE_DIGEST,  # incorrect but we do not check in this test
         stderr_bytes=b"\x80\xBF",  # invalid UTF-8 as required by the test
-        stderr_digest=EMPTY_FILE_DIGEST,  # incorrect but we do not check in this test
-        addresses=(),
         output_setting=ShowOutput.ALL,
-        result_metadata=None,
     )
     assert test_result.message() == "failed (exit code 1).\n��\n��\n\n"