Skip to content

Commit e9d9941

Browse files
stats: add output_file option to output the stats to a file (#20512)
Currently when running a Pants goal with the `--stats-log` enabled, the stats are showing at the end of the goal's output. This makes distinguishing between the goal output and stats difficult. One cannot redirect only the stats data to a file as it's just part of the goal output. This PR adds a new option to request saving the stats into a file to be able to parse it later or archive. As a next step, I'd like to see a structured data produced. JSON seems sensible so that one can mung it to obtain data in any desired format such as [Prometheus](https://prometheus.io/docs/practices/naming/). The logged stats data is not really suitable for parsing. Co-authored-by: Huon Wilson <[email protected]>
1 parent fea6313 commit e9d9941

File tree

2 files changed

+78
-7
lines changed

2 files changed

+78
-7
lines changed

src/python/pants/goal/stats_aggregator.py

+47-7
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@
44
from __future__ import annotations
55

66
import base64
7+
import datetime
78
import logging
89
from collections import Counter
910
from dataclasses import dataclass
11+
from pathlib import Path
12+
from typing import Optional
1013

1114
from pants.engine.internals.scheduler import Workunit
1215
from pants.engine.rules import collect_rules, rule
@@ -17,9 +20,10 @@
1720
WorkunitsCallbackFactoryRequest,
1821
)
1922
from pants.engine.unions import UnionRule
20-
from pants.option.option_types import BoolOption
23+
from pants.option.option_types import BoolOption, StrOption
2124
from pants.option.subsystem import Subsystem
2225
from pants.util.collections import deep_getsizeof
26+
from pants.util.dirutil import safe_open
2327
from pants.util.strutil import softwrap
2428

2529
logger = logging.getLogger(__name__)
@@ -55,13 +59,33 @@ class StatsAggregatorSubsystem(Subsystem):
5559
),
5660
advanced=True,
5761
)
62+
output_file = StrOption(
63+
default=None,
64+
metavar="<path>",
65+
help="Output the stats to this file. If unspecified, outputs to stdout.",
66+
)
67+
68+
69+
def _log_or_write_to_file(output_file: Optional[str], lines: list[str]) -> None:
70+
"""Send text to the stdout or write to the output file."""
71+
if lines:
72+
text = "\n".join(lines)
73+
if output_file:
74+
with safe_open(output_file, "a") as fh:
75+
fh.write(text)
76+
logger.info(f"Wrote Pants stats to {output_file}")
77+
else:
78+
logger.info(text)
5879

5980

6081
class StatsAggregatorCallback(WorkunitsCallback):
61-
def __init__(self, *, log: bool, memory: bool, has_histogram_module: bool) -> None:
82+
def __init__(
83+
self, *, log: bool, memory: bool, output_file: Optional[str], has_histogram_module: bool
84+
) -> None:
6285
super().__init__()
6386
self.log = log
6487
self.memory = memory
88+
self.output_file = output_file
6589
self.has_histogram_module = has_histogram_module
6690

6791
@property
@@ -80,6 +104,15 @@ def __call__(
80104
if not finished:
81105
return
82106

107+
output_lines = []
108+
if self.output_file:
109+
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
110+
# have an empty line between stats of different Pants invocations
111+
space = "\n\n" if Path(self.output_file).exists() else ""
112+
output_lines.append(
113+
f"{space}{timestamp} Command: {context.run_tracker.run_information().get('cmd_line')}"
114+
)
115+
83116
if self.log:
84117
# Capture global counters.
85118
counters = Counter(context.get_metrics())
@@ -93,7 +126,7 @@ def __call__(
93126
counter_lines = "\n".join(
94127
f" {name}: {count}" for name, count in sorted(counters.items())
95128
)
96-
logger.info(f"Counters:\n{counter_lines}")
129+
output_lines.append(f"Counters:\n{counter_lines}")
97130

98131
if self.memory:
99132
ids: set[int] = set()
@@ -115,18 +148,23 @@ def __call__(
115148
memory_lines = "\n".join(
116149
f" {size}\t\t{count}\t\t{name}" for size, count, name in sorted(entries)
117150
)
118-
logger.info(f"Memory summary (total size in bytes, count, name):\n{memory_lines}")
151+
output_lines.append(
152+
f"Memory summary (total size in bytes, count, name):\n{memory_lines}"
153+
)
119154

120155
if not (self.log and self.has_histogram_module):
156+
_log_or_write_to_file(self.output_file, output_lines)
121157
return
158+
122159
from hdrh.histogram import HdrHistogram # pants: no-infer-dep
123160

124161
histograms = context.get_observation_histograms()["histograms"]
125162
if not histograms:
126-
logger.info("No observation histogram were recorded.")
163+
output_lines.append("No observation histogram were recorded.")
164+
_log_or_write_to_file(self.output_file, output_lines)
127165
return
128166

129-
logger.info("Observation histogram summaries:")
167+
output_lines.append("Observation histogram summaries:")
130168
for name, encoded_histogram in histograms.items():
131169
# Note: The Python library for HDR Histogram will only decode compressed histograms
132170
# that are further encoded with base64. See
@@ -138,7 +176,7 @@ def __call__(
138176
[25, 50, 75, 90, 95, 99]
139177
).items()
140178
)
141-
logger.info(
179+
output_lines.append(
142180
f"Summary of `{name}` observation histogram:\n"
143181
f" min: {histogram.get_min_value()}\n"
144182
f" max: {histogram.get_max_value()}\n"
@@ -148,6 +186,7 @@ def __call__(
148186
f" sum: {int(histogram.get_mean_value() * histogram.total_count)}\n"
149187
f"{percentile_to_vals}"
150188
)
189+
_log_or_write_to_file(self.output_file, output_lines)
151190

152191

153192
@dataclass(frozen=True)
@@ -178,6 +217,7 @@ def construct_callback(
178217
StatsAggregatorCallback(
179218
log=subsystem.log,
180219
memory=subsystem.memory_summary,
220+
output_file=subsystem.output_file,
181221
has_histogram_module=has_histogram_module,
182222
)
183223
if subsystem.log or subsystem.memory_summary

src/python/pants/goal/stats_aggregator_integration_test.py

+31
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from __future__ import annotations
55

66
import re
7+
from pathlib import Path
78

89
from pants.testutil.pants_integration_test import run_pants, setup_tmpdir
910

@@ -46,3 +47,33 @@ def test_warn_if_no_histograms() -> None:
4647
assert "Counters:" in result.stderr
4748
assert "Please run with `--plugins=hdrhistogram`" in result.stderr
4849
assert "Observation histogram summaries:" not in result.stderr
50+
51+
52+
def test_writing_to_output_file() -> None:
53+
with setup_tmpdir({"src/py/app.py": "print(0)\n", "src/py/BUILD": "python_sources()"}):
54+
argv1 = [
55+
"--backend-packages=['pants.backend.python']",
56+
"--stats-log",
57+
"--stats-memory-summary",
58+
"--stats-output-file=stats.txt",
59+
"roots",
60+
]
61+
run_pants(argv1).assert_success()
62+
argv2 = [
63+
"--backend-packages=['pants.backend.python']",
64+
"--stats-log",
65+
"--stats-memory-summary",
66+
"--stats-output-file=stats.txt",
67+
"list",
68+
"::",
69+
]
70+
run_pants(argv2).assert_success()
71+
output_file_contents = Path("stats.txt").read_text()
72+
for item in ("Counters:", "Memory summary"):
73+
assert output_file_contents.count(item) == 2
74+
75+
for item in ("roots", "list"):
76+
assert item in output_file_contents
77+
78+
for cmd in (argv1, argv2):
79+
assert " ".join(cmd) in output_file_contents

0 commit comments

Comments
 (0)