Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

stats: add output_file option to output the stats to a file #20512

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 47 additions & 7 deletions src/python/pants/goal/stats_aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
from __future__ import annotations

import base64
import datetime
import logging
from collections import Counter
from dataclasses import dataclass
from pathlib import Path
from typing import Optional

from pants.engine.internals.scheduler import Workunit
from pants.engine.rules import collect_rules, rule
Expand All @@ -17,9 +20,10 @@
WorkunitsCallbackFactoryRequest,
)
from pants.engine.unions import UnionRule
from pants.option.option_types import BoolOption
from pants.option.option_types import BoolOption, StrOption
from pants.option.subsystem import Subsystem
from pants.util.collections import deep_getsizeof
from pants.util.dirutil import safe_open
from pants.util.strutil import softwrap

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -55,13 +59,33 @@ class StatsAggregatorSubsystem(Subsystem):
),
advanced=True,
)
output_file = StrOption(
default=None,
metavar="<path>",
help="Output the stats to this file. If unspecified, outputs to stdout.",
)


def _log_or_write_to_file(output_file: Optional[str], lines: list[str]) -> None:
"""Send text to the stdout or write to the output file."""
if lines:
text = "\n".join(lines)
if output_file:
with safe_open(output_file, "a") as fh:
fh.write(text)
logger.info(f"Wrote Pants stats to {output_file}")
else:
logger.info(text)


class StatsAggregatorCallback(WorkunitsCallback):
def __init__(self, *, log: bool, memory: bool, has_histogram_module: bool) -> None:
def __init__(
self, *, log: bool, memory: bool, output_file: Optional[str], has_histogram_module: bool
) -> None:
super().__init__()
self.log = log
self.memory = memory
self.output_file = output_file
self.has_histogram_module = has_histogram_module

@property
Expand All @@ -80,6 +104,15 @@ def __call__(
if not finished:
return

output_lines = []
if self.output_file:
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# have an empty line between stats of different Pants invocations
space = "\n\n" if Path(self.output_file).exists() else ""
output_lines.append(
f"{space}{timestamp} Command: {context.run_tracker.run_information().get('cmd_line')}"
)

if self.log:
# Capture global counters.
counters = Counter(context.get_metrics())
Expand All @@ -93,7 +126,7 @@ def __call__(
counter_lines = "\n".join(
f" {name}: {count}" for name, count in sorted(counters.items())
)
logger.info(f"Counters:\n{counter_lines}")
output_lines.append(f"Counters:\n{counter_lines}")

if self.memory:
ids: set[int] = set()
Expand All @@ -115,18 +148,23 @@ def __call__(
memory_lines = "\n".join(
f" {size}\t\t{count}\t\t{name}" for size, count, name in sorted(entries)
)
logger.info(f"Memory summary (total size in bytes, count, name):\n{memory_lines}")
output_lines.append(
f"Memory summary (total size in bytes, count, name):\n{memory_lines}"
)

if not (self.log and self.has_histogram_module):
_log_or_write_to_file(self.output_file, output_lines)
return

from hdrh.histogram import HdrHistogram # pants: no-infer-dep

histograms = context.get_observation_histograms()["histograms"]
if not histograms:
logger.info("No observation histogram were recorded.")
output_lines.append("No observation histogram were recorded.")
_log_or_write_to_file(self.output_file, output_lines)
return

logger.info("Observation histogram summaries:")
output_lines.append("Observation histogram summaries:")
for name, encoded_histogram in histograms.items():
# Note: The Python library for HDR Histogram will only decode compressed histograms
# that are further encoded with base64. See
Expand All @@ -138,7 +176,7 @@ def __call__(
[25, 50, 75, 90, 95, 99]
).items()
)
logger.info(
output_lines.append(
f"Summary of `{name}` observation histogram:\n"
f" min: {histogram.get_min_value()}\n"
f" max: {histogram.get_max_value()}\n"
Expand All @@ -148,6 +186,7 @@ def __call__(
f" sum: {int(histogram.get_mean_value() * histogram.total_count)}\n"
f"{percentile_to_vals}"
)
_log_or_write_to_file(self.output_file, output_lines)


@dataclass(frozen=True)
Expand Down Expand Up @@ -178,6 +217,7 @@ def construct_callback(
StatsAggregatorCallback(
log=subsystem.log,
memory=subsystem.memory_summary,
output_file=subsystem.output_file,
has_histogram_module=has_histogram_module,
)
if subsystem.log or subsystem.memory_summary
Expand Down
31 changes: 31 additions & 0 deletions src/python/pants/goal/stats_aggregator_integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from __future__ import annotations

import re
from pathlib import Path

from pants.testutil.pants_integration_test import run_pants, setup_tmpdir

Expand Down Expand Up @@ -46,3 +47,33 @@ def test_warn_if_no_histograms() -> None:
assert "Counters:" in result.stderr
assert "Please run with `--plugins=hdrhistogram`" in result.stderr
assert "Observation histogram summaries:" not in result.stderr


def test_writing_to_output_file() -> None:
with setup_tmpdir({"src/py/app.py": "print(0)\n", "src/py/BUILD": "python_sources()"}):
argv1 = [
"--backend-packages=['pants.backend.python']",
"--stats-log",
"--stats-memory-summary",
"--stats-output-file=stats.txt",
"roots",
]
run_pants(argv1).assert_success()
argv2 = [
"--backend-packages=['pants.backend.python']",
"--stats-log",
"--stats-memory-summary",
"--stats-output-file=stats.txt",
"list",
"::",
]
run_pants(argv2).assert_success()
output_file_contents = Path("stats.txt").read_text()
for item in ("Counters:", "Memory summary"):
assert output_file_contents.count(item) == 2

for item in ("roots", "list"):
assert item in output_file_contents

for cmd in (argv1, argv2):
assert " ".join(cmd) in output_file_contents
Loading