feat: improving heartbeat message (#348)

alexahaushalter · web-flow · commit e56ecb9d9cb5 · 2026-04-03T13:07:32.000-05:00
&lt;!-- SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION
&amp; AFFILIATES. All rights reserved. --&gt;
&lt;!-- SPDX-License-Identifier: Apache-2.0 --&gt;

&lt;!-- Thank you for contributing to Safe Synthesizer! --&gt;

# Summary
Improve the heartbeat message by adding an explanation that it is normal
to have long stretches with no new records.

## Pre-Review Checklist

&lt;!-- These checks should be completed before a PR is reviewed, --&gt;
&lt;!-- but you can submit a draft early to indicate that the issue is
being worked on. --&gt;

Ensure that the following pass:

- [x] `make format &amp;&amp; make check` or via prek validation.
- [x] `make test` passes locally
- [x] `make test-e2e` passes locally
- [ ] `make test-ci-container` passes locally (recommended)
- [ ] GPU CI status check passes -- comment `/sync` on this PR to
trigger a run (auto-triggers on ready-for-review)

## Pre-Merge Checklist

&lt;!-- These checks need to be completed before a PR is merged, --&gt;
&lt;!-- but as PRs often change significantly during review, --&gt;
&lt;!-- it's OK for them to be incomplete when review is first requested.
--&gt;

- [ ] New or updated tests for any fix or new behavior
- [ ] Updated documentation for new features and behaviors, including
docstrings for API docs.

## Other Notes

&lt;!-- Please add the issue number that should be closed when this PR is
merged. --&gt;
- Closes #&lt;issue&gt;

---------

Signed-off-by: Alexa Haushalter &lt;ahaushalter@nvidia.com&gt;
diff --git a/docs/user-guide/troubleshooting.md b/docs/user-guide/troubleshooting.md
@@ -278,8 +278,10 @@ to cause long generation times:
 `max_tokens` scales with context window: each generation prompt is allowed
 up to `max_seq_length` output tokens (`12,288` for SmolLM3). If the model
 produces long outputs before the stop condition fires, each prompt in the
-batch takes proportionally longer. A heartbeat log (`"Generation in
-progress"`) is emitted every 60 seconds to confirm the pipeline is alive.
+batch takes proportionally longer. Every 60 seconds a heartbeat line is
+logged starting with `Generation in progress`, plus a short note that new
+records only appear after a full batch of prompts finishes. Long
+stretches with no new records are normal while generation is still running.
 
 Long-tail batch latency: vLLM processes all prompts in a batch
 simultaneously, but `llm.generate()` blocks until every prompt completes.
diff --git a/src/nemo_safe_synthesizer/generation/vllm_backend.py b/src/nemo_safe_synthesizer/generation/vllm_backend.py
@@ -532,6 +532,7 @@ def generate(
             "Generation",
             logger_name=__name__,
             target_records=self.config.generation.num_records,
+            progress_note=("Long stretches with no new records are normal."),
         ):
             while batches.num_valid_records < self.config.generation.num_records:
                 # Generate a batch from prompts and process the responses.
diff --git a/src/nemo_safe_synthesizer/observability.py b/src/nemo_safe_synthesizer/observability.py
@@ -955,6 +955,7 @@ def heartbeat(
     interval: float = 60.0,
     *,
     logger_name: str | None = None,
+    progress_note: str | None = None,
     **extra_fields,
 ) -> Generator[None, None, None]:
     """Context manager that logs a periodic heartbeat during a long-running operation.
@@ -964,6 +965,8 @@ def heartbeat(
         interval: Seconds between heartbeat log messages.
         logger_name: Logger name (pass ``__name__`` so heartbeat logs attribute
             to the calling module).
+        progress_note: Optional sentence appended only to periodic ``... in progress``
+            lines (so ``message`` can stay short for ``... complete`` / ``... failed``).
         **extra_fields: Additional structured fields passed to the logger
             (e.g. ``model="SmolLM3"``).
     """
@@ -978,7 +981,10 @@ def _extra() -> dict:
 
     def _run() -> None:
         while not stop.wait(timeout=interval):
-            _logger.info(f"{message} in progress", extra={"ctx": _extra()})
+            event = f"{message} in progress"
+            if progress_note:
+                event = f"{event}. {progress_note}"
+            _logger.info(event, extra={"ctx": _extra()})
 
     thread = threading.Thread(target=_run, daemon=True)
     thread.start()
diff --git a/tests/observability/test_observability.py b/tests/observability/test_observability.py
@@ -638,6 +638,19 @@ def test_heartbeat_logs_progress_on_long_operation(self, caplog):
         assert "Slow op in progress" in caplog.text
         assert "Slow op complete" in caplog.text
 
+    def test_heartbeat_progress_note_on_periodic_logs_only(self, caplog):
+        caplog.set_level(logging.INFO)
+        message = "Generation"
+        progress_note = "Records update only after each batch finishes."
+        with heartbeat(message, interval=0.05, progress_note=progress_note):
+            time.sleep(0.15)
+
+        assert f"{message} in progress. {progress_note}" in caplog.text
+        assert f"{message} complete" in caplog.text
+        for record in caplog.records:
+            if "complete" in record.getMessage():
+                assert progress_note not in record.getMessage()
+
     def test_heartbeat_includes_extra_fields(self, caplog):
         caplog.set_level(logging.INFO)
         with heartbeat("Loading", interval=0.05, model="test-model"):