[nightshift] 20260422 multi-cleanup (#5044)

claude-nightshift[bot] · Nightshift Scout · Nightshift Scout · web-flow · commit 7d57055c08b6 · 2026-04-22T08:34:41.000-07:00
> *dcebae8d — haiku* > Dead code falls away > Stale comments drift with the breeze > Logs hoist at module root ## Summary Nightshift cleanup sweep across `lib/iris` and `lib/zephyr`. Two scouts produced changes; two filed `no_change` with notes recorded below for follow-up. ### lib/iris/src/iris (applied — 7505fd9) - Hoisted the `from iris.rpc import logging_pb2` import and the `_STR_TO_ENUM` mapping in `iris/logging.py` out of the `str_to_log_level` function body to module scope. The deferred import had no corresponding circular dependency (`iris.rpc.logging_pb2` does not import `iris.logging`) and every file that imports `str_to_log_level` already imports `logging_pb2` directly. - Widened the `str_to_log_level` parameter type to `str | None` and folded the falsy-check into the function, allowing four call sites to drop their redundant `if level_name else 0` guards. - Fixed a stale code comment in `cluster/types.py` that pointed at `iris.logging` for `LevelPrefixFormatter` / `_LEVEL_PREFIX` — those helpers live in `rigging.log_setup`. ### lib/zephyr/src/zephyr (applied — a3d9e7c) - Removed the dead `scatter_manifest_dir` parameter from `_regroup_result_refs`, its two call sites, and the two local variables (`output_stage_name`, `join_output_stage_name`) that only existed to construct the now-unused path. The coordinator-side scatter manifest was removed in #4853 but this plumbing was left behind. ### lib/levanter/src/levanter (no_change — findings only) Scout flagged but could not commit (sandbox denied file writes): - **Correctness bug**: `levanter/schedule.py:value_at_step` iterates the schedule forward and returns the first entry whose `start <= step`, which always yields the earliest segment's value for any schedule with more than one `ScheduleStep`. The call site `levanter/trainer.py:batch_axis_at_step` depends on this, so a training run configuring `train_batch_size` as an `IntSchedule` gets the wrong batch size after the first segment boundary. Regressed in PR #887 / commit 83f3f34 (renamed `until` → `start` without reversing the iteration order). - `utils/stat_utils.py:RunningMean.add` recomputes `self.total + total` twice (harmless duplicate). - `shapes.py:conforms` appears unreferenced across `lib/`. - `utils/background_iterable.py:80-87` has tautological `except StopIteration: raise` / `except Exception as e: raise e` blocks flagged by the AGENTS.md LLM-pitfall list. ### lib/marin/src/marin (no_change — findings only) Scout identified dead code but was also blocked by the sandbox: - `asdict_excluding` in `utils.py` (~27 LOC + self-referential tests) — last real caller was removed when speedrun code was deleted in #4541. - `lib/marin/src/marin/cluster/ray.py` (959 LOC, entirely unused) — already being removed in open PR #5028, so not duplicated here. - `evaluation/utils.py` uses `print` instead of `logger` in `download_from_gcs` / `upload_to_gcs`. - `core/runtime.py` still calls `datetime.utcnow()` (deprecated in 3.12). ## Test plan - [x] `./infra/pre-commit.py --all-files --fix` — clean - [x] `uv run pytest -x lib/iris/tests/test_logging.py` — 19 passed - [x] `uv run pytest -x lib/zephyr/tests/test_execution.py` — 42 passed --------- Co-authored-by: Nightshift Scout <nightshift@example.com> Co-authored-by: Nightshift Scout <nightshift-scout@anthropic.com> Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
diff --git a/lib/iris/src/iris/cluster/log_store/duckdb_store.py b/lib/iris/src/iris/cluster/log_store/duckdb_store.py
@@ -525,7 +525,7 @@ def get_logs(
         regular expression and matched with DuckDB's ``regexp_matches()``.
         Otherwise it is treated as an exact key lookup.
         """
-        min_level_enum = str_to_log_level(min_level) if min_level else 0
+        min_level_enum = str_to_log_level(min_level)
         is_pattern = bool(REGEX_META_RE.search(key))
 
         if not is_pattern:
diff --git a/lib/iris/src/iris/cluster/log_store/mem_store.py b/lib/iris/src/iris/cluster/log_store/mem_store.py
@@ -103,7 +103,7 @@ def get_logs(
         tail: bool = False,
         min_level: str = "",
     ) -> LogReadResult:
-        min_level_enum = str_to_log_level(min_level) if min_level else 0
+        min_level_enum = str_to_log_level(min_level)
         is_pattern = bool(REGEX_META_RE.search(key))
 
         if is_pattern:
diff --git a/lib/iris/src/iris/cluster/providers/k8s/tasks.py b/lib/iris/src/iris/cluster/providers/k8s/tasks.py
@@ -525,7 +525,7 @@ def _build_pod_manifest(
 
 def _kubectl_log_line_to_log_entry(kll: KubectlLogLine, attempt_id: int) -> logging_pb2.LogEntry:
     level_name = parse_log_level(kll.data)
-    level = str_to_log_level(level_name) if level_name else 0
+    level = str_to_log_level(level_name)
     entry = logging_pb2.LogEntry(source=kll.stream, data=kll.data, attempt_id=attempt_id, level=level)
     entry.timestamp.CopyFrom(timestamp_to_proto(Timestamp.from_seconds(kll.timestamp.timestamp())))
     return entry
diff --git a/lib/iris/src/iris/cluster/types.py b/lib/iris/src/iris/cluster/types.py
@@ -481,9 +481,9 @@ def to_proto(self) -> job_pb2.ResourceSpecProto:
 
 # Reinitialize logging with the unified Iris format.
 # Uses single-letter level prefix: I=INFO, W=WARNING, E=ERROR, D=DEBUG, C=CRITICAL.
-# NOTE: This duplicates LevelPrefixFormatter and _LEVEL_PREFIX from iris.logging
+# NOTE: This duplicates LevelPrefixFormatter and _LEVEL_PREFIX from rigging.log_setup
 # because CALLABLE_RUNNER executes inside an isolated task container that may not
-# have the iris package installed (e.g. user-provided Docker images).
+# have the rigging package installed (e.g. user-provided Docker images).
 _LEVEL_PREFIX = {"DEBUG": "D", "INFO": "I", "WARNING": "W", "ERROR": "E", "CRITICAL": "C"}
 
 class _LevelPrefixFormatter(logging.Formatter):
diff --git a/lib/iris/src/iris/cluster/worker/task_attempt.py b/lib/iris/src/iris/cluster/worker/task_attempt.py
@@ -898,7 +898,7 @@ def _monitor_loop(
     def _make_log_entry(self, *, source: str, data: str) -> logging_pb2.LogEntry:
         """Build a LogEntry proto from a source/data pair, parsing the level prefix."""
         level_name = parse_log_level(data)
-        level = str_to_log_level(level_name) if level_name else 0
+        level = str_to_log_level(level_name)
         entry = logging_pb2.LogEntry(source=source, data=data, level=level)
         entry.timestamp.epoch_ms = Timestamp.now().epoch_ms()
         return entry
diff --git a/lib/iris/src/iris/logging.py b/lib/iris/src/iris/logging.py
@@ -8,24 +8,23 @@
 retains only the functions that depend on ``iris.rpc.logging_pb2``.
 """
 
+from iris.rpc import logging_pb2
 
-def str_to_log_level(level_name: str) -> int:
+_STR_TO_ENUM = {
+    "DEBUG": logging_pb2.LOG_LEVEL_DEBUG,
+    "INFO": logging_pb2.LOG_LEVEL_INFO,
+    "WARNING": logging_pb2.LOG_LEVEL_WARNING,
+    "ERROR": logging_pb2.LOG_LEVEL_ERROR,
+    "CRITICAL": logging_pb2.LOG_LEVEL_CRITICAL,
+}
+
+
+def str_to_log_level(level_name: str | None) -> int:
     """Convert a canonical level name (e.g. "INFO") to the LogLevel proto enum value.
 
-    Returns LOG_LEVEL_UNKNOWN (0) for unrecognized names.
-    Uses lazy import to avoid pulling in protobuf at module load time.
+    Returns ``LOG_LEVEL_UNKNOWN`` (0) for ``None``, empty strings, and
+    unrecognized names.
     """
-    from iris.rpc import logging_pb2
-
-    _STR_TO_ENUM = {
-        "DEBUG": logging_pb2.LOG_LEVEL_DEBUG,
-        "INFO": logging_pb2.LOG_LEVEL_INFO,
-        "WARNING": logging_pb2.LOG_LEVEL_WARNING,
-        "ERROR": logging_pb2.LOG_LEVEL_ERROR,
-        "CRITICAL": logging_pb2.LOG_LEVEL_CRITICAL,
-    }
-    return (
-        _STR_TO_ENUM.get(level_name.upper(), logging_pb2.LOG_LEVEL_UNKNOWN)
-        if level_name
-        else logging_pb2.LOG_LEVEL_UNKNOWN
-    )
+    if not level_name:
+        return logging_pb2.LOG_LEVEL_UNKNOWN
+    return _STR_TO_ENUM.get(level_name.upper(), logging_pb2.LOG_LEVEL_UNKNOWN)
diff --git a/lib/zephyr/src/zephyr/execution.py b/lib/zephyr/src/zephyr/execution.py
@@ -866,7 +866,6 @@ def run_pipeline(
 
                 # Build and submit tasks
                 tasks = _compute_tasks_from_shards(shards, stage, aux_per_shard, stage_name=stage_label)
-                output_stage_name = tasks[0].stage_name if tasks else stage_label
                 logger.info("[%s] Starting stage %s with %d tasks", self._execution_id, stage_label, len(tasks))
                 self._start_stage(stage_label, tasks, is_last_stage=(stage_idx == last_worker_stage_idx))
 
@@ -881,7 +880,6 @@ def run_pipeline(
                     len(shards),
                     output_shard_count=stage.output_shards,
                     is_scatter=stage_is_scatter,
-                    scatter_manifest_dir=f"{self._chunk_prefix}/{self._execution_id}/{output_stage_name}",
                 )
 
             # Flatten final results — each shard may involve I/O (unpickling from
@@ -923,7 +921,6 @@ def _compute_join_aux(
 
                 join_stage_label = f"join-right-{parent_stage_idx}-{i}-stage{stage_idx}"
                 right_tasks = _compute_tasks_from_shards(right_refs, right_stage, stage_name=join_stage_label)
-                join_output_stage_name = right_tasks[0].stage_name if right_tasks else join_stage_label
                 self._start_stage(join_stage_label, right_tasks)
                 self._wait_for_stage()
                 raw = self._collect_results()
@@ -933,7 +930,6 @@ def _compute_join_aux(
                     len(right_refs),
                     output_shard_count=right_stage.output_shards,
                     is_scatter=right_is_scatter,
-                    scatter_manifest_dir=f"{self._chunk_prefix}/{self._execution_id}/{join_output_stage_name}",
                 )
 
             if len(shard_refs) != len(right_refs):
@@ -1313,7 +1309,6 @@ def _regroup_result_refs(
     input_shard_count: int,
     output_shard_count: int | None = None,
     is_scatter: bool = False,
-    scatter_manifest_dir: str = "",
 ) -> list[Shard]:
     """Regroup worker output refs by output shard index without loading data.