|
33 | 33 | import sys |
34 | 34 | import tarfile |
35 | 35 | import tempfile |
| 36 | +import threading |
36 | 37 | import time |
37 | 38 | import traceback |
38 | 39 | from dataclasses import dataclass, field |
@@ -155,6 +156,7 @@ class DeployOptions: |
155 | 156 | install_cnpg_operator: bool = False |
156 | 157 | helm_timeout: str = "15m" |
157 | 158 | helm_debug: bool = False |
| 159 | + watch_pods: bool = False |
158 | 160 | recreate_secrets: bool = False |
159 | 161 | run_tests: bool = False |
160 | 162 | dry_run: bool = False |
@@ -455,6 +457,194 @@ def _run_logged( |
455 | 457 | return subprocess.CompletedProcess(cmd, proc.returncode, stdout_text, stderr_text) |
456 | 458 |
|
457 | 459 |
|
| 460 | +def _short_pod_text(value: str, *, limit: int = 140) -> str: |
| 461 | + """Return a compact single-line pod status detail.""" |
| 462 | + value = " ".join(value.split()) |
| 463 | + if len(value) <= limit: |
| 464 | + return value |
| 465 | + return f"{value[: limit - 3]}..." |
| 466 | + |
| 467 | + |
| 468 | +def _pod_ready_counts(pod: Any) -> tuple[int, int]: |
| 469 | + statuses = (pod.status.container_statuses if pod.status else None) or [] |
| 470 | + total = len(statuses) |
| 471 | + ready = sum(1 for status in statuses if status.ready) |
| 472 | + return ready, total |
| 473 | + |
| 474 | + |
| 475 | +def _pod_restart_count(pod: Any) -> int: |
| 476 | + statuses = [] |
| 477 | + if pod.status: |
| 478 | + statuses.extend(pod.status.init_container_statuses or []) |
| 479 | + statuses.extend(pod.status.container_statuses or []) |
| 480 | + return sum(status.restart_count or 0 for status in statuses) |
| 481 | + |
| 482 | + |
| 483 | +def _pod_is_ready(pod: Any) -> bool: |
| 484 | + if pod.status and pod.status.phase == "Succeeded": |
| 485 | + return True |
| 486 | + conditions = (pod.status.conditions if pod.status else None) or [] |
| 487 | + for condition in conditions: |
| 488 | + if condition.type == "Ready": |
| 489 | + return condition.status == "True" |
| 490 | + ready, total = _pod_ready_counts(pod) |
| 491 | + return total > 0 and ready == total |
| 492 | + |
| 493 | + |
| 494 | +def _container_waiting_details(statuses: list[Any] | None, *, prefix: str) -> list[str]: |
| 495 | + details: list[str] = [] |
| 496 | + for status in statuses or []: |
| 497 | + if status.ready: |
| 498 | + continue |
| 499 | + name = status.name or "<unknown>" |
| 500 | + state = status.state |
| 501 | + if state and state.waiting: |
| 502 | + reason = state.waiting.reason or "Waiting" |
| 503 | + message = state.waiting.message or "" |
| 504 | + text = f"{prefix} {name}: {reason}" |
| 505 | + if message: |
| 506 | + text = f"{text} ({_short_pod_text(message)})" |
| 507 | + details.append(text) |
| 508 | + elif state and state.terminated: |
| 509 | + reason = state.terminated.reason or "Terminated" |
| 510 | + details.append(f"{prefix} {name}: {reason} exitCode={state.terminated.exit_code}") |
| 511 | + elif state and state.running: |
| 512 | + details.append(f"{prefix} {name}: Running but not ready") |
| 513 | + else: |
| 514 | + details.append(f"{prefix} {name}: status unknown") |
| 515 | + return details |
| 516 | + |
| 517 | + |
| 518 | +def _pod_waiting_details(pod: Any) -> list[str]: |
| 519 | + details: list[str] = [] |
| 520 | + if pod.status: |
| 521 | + for condition in pod.status.conditions or []: |
| 522 | + if condition.type in {"Ready", "ContainersReady"} or condition.status == "True": |
| 523 | + continue |
| 524 | + text = f"condition {condition.type}={condition.status}" |
| 525 | + if condition.reason: |
| 526 | + text = f"{text} ({condition.reason})" |
| 527 | + if condition.message: |
| 528 | + text = f"{text}: {_short_pod_text(condition.message)}" |
| 529 | + details.append(text) |
| 530 | + |
| 531 | + details.extend( |
| 532 | + _container_waiting_details( |
| 533 | + pod.status.init_container_statuses, |
| 534 | + prefix="init", |
| 535 | + ) |
| 536 | + ) |
| 537 | + details.extend( |
| 538 | + _container_waiting_details( |
| 539 | + pod.status.container_statuses, |
| 540 | + prefix="container", |
| 541 | + ) |
| 542 | + ) |
| 543 | + |
| 544 | + if not details and (pod.status.reason or pod.status.message): |
| 545 | + text = pod.status.reason or "Waiting" |
| 546 | + if pod.status.message: |
| 547 | + text = f"{text}: {_short_pod_text(pod.status.message)}" |
| 548 | + details.append(text) |
| 549 | + return details or ["waiting for readiness"] |
| 550 | + |
| 551 | + |
| 552 | +def _unready_pod_summary_lines(k8s: K8sClient, namespace: str, *, limit: int = 8) -> list[str]: |
| 553 | + """Return concise status lines for pods that are not ready.""" |
| 554 | + try: |
| 555 | + pods = k8s.v1.list_namespaced_pod(namespace).items |
| 556 | + except Exception as exc: |
| 557 | + return [f"pod summary unavailable: {exc}"] |
| 558 | + |
| 559 | + if not pods: |
| 560 | + return ["no pods found yet"] |
| 561 | + |
| 562 | + unready = [pod for pod in pods if not _pod_is_ready(pod)] |
| 563 | + unready.sort(key=lambda pod: pod.metadata.name or "") |
| 564 | + if not unready: |
| 565 | + return ["all pods ready"] |
| 566 | + |
| 567 | + visible = unready[:limit] |
| 568 | + lines = [ |
| 569 | + f"{len(unready)} pod(s) not ready" |
| 570 | + + (f"; showing first {limit}" if len(unready) > limit else "") |
| 571 | + ] |
| 572 | + for pod in visible: |
| 573 | + name = pod.metadata.name or "<unknown>" |
| 574 | + ready, total = _pod_ready_counts(pod) |
| 575 | + phase = pod.status.phase if pod.status and pod.status.phase else "Unknown" |
| 576 | + restarts = _pod_restart_count(pod) |
| 577 | + details = "; ".join(_pod_waiting_details(pod)[:3]) |
| 578 | + lines.append(f"{name} ready={ready}/{total} phase={phase} restarts={restarts}: {details}") |
| 579 | + return lines |
| 580 | + |
| 581 | + |
| 582 | +def _emit_pod_summary(lines: list[str], step: DeployStep, callback: DeployCallback) -> None: |
| 583 | + for line in lines: |
| 584 | + msg = f"[pods] {line}" |
| 585 | + step.output.append(msg) |
| 586 | + callback.on_log(msg) |
| 587 | + |
| 588 | + |
| 589 | +def _poll_pod_summary( |
| 590 | + k8s: K8sClient, |
| 591 | + namespace: str, |
| 592 | + step: DeployStep, |
| 593 | + callback: DeployCallback, |
| 594 | + stop_event: threading.Event, |
| 595 | + *, |
| 596 | + interval: float, |
| 597 | + heartbeat_interval: float, |
| 598 | +) -> None: |
| 599 | + """Poll and emit changed pod readiness summaries until stopped.""" |
| 600 | + last_signature = "" |
| 601 | + last_emit = 0.0 |
| 602 | + while not stop_event.is_set(): |
| 603 | + lines = _unready_pod_summary_lines(k8s, namespace) |
| 604 | + signature = "\n".join(lines) |
| 605 | + now = time.monotonic() |
| 606 | + has_blockers = lines != ["all pods ready"] |
| 607 | + if signature != last_signature or (has_blockers and now - last_emit >= heartbeat_interval): |
| 608 | + _emit_pod_summary(lines, step, callback) |
| 609 | + last_signature = signature |
| 610 | + last_emit = now |
| 611 | + if stop_event.wait(interval): |
| 612 | + break |
| 613 | + |
| 614 | + |
| 615 | +def _run_logged_with_pod_summary( |
| 616 | + cmd: list[str], |
| 617 | + k8s: K8sClient | None, |
| 618 | + namespace: str, |
| 619 | + step: DeployStep, |
| 620 | + callback: DeployCallback, |
| 621 | + *, |
| 622 | + check: bool = True, |
| 623 | + timeout: int | None = 600, |
| 624 | + poll_interval: float = 10.0, |
| 625 | + heartbeat_interval: float = 60.0, |
| 626 | +) -> subprocess.CompletedProcess[str]: |
| 627 | + """Run a command while periodically logging unready pod summaries.""" |
| 628 | + if k8s is None: |
| 629 | + callback.on_log("Pod readiness summaries unavailable: Kubernetes client is not initialized") |
| 630 | + return _run_logged(cmd, step, callback, check=check, timeout=timeout) |
| 631 | + |
| 632 | + callback.on_log("Streaming pod readiness summaries while Helm waits...") |
| 633 | + stop_event = threading.Event() |
| 634 | + poll_thread = threading.Thread( |
| 635 | + target=_poll_pod_summary, |
| 636 | + args=(k8s, namespace, step, callback, stop_event), |
| 637 | + kwargs={"interval": poll_interval, "heartbeat_interval": heartbeat_interval}, |
| 638 | + daemon=True, |
| 639 | + ) |
| 640 | + poll_thread.start() |
| 641 | + try: |
| 642 | + return _run_logged(cmd, step, callback, check=check, timeout=timeout) |
| 643 | + finally: |
| 644 | + stop_event.set() |
| 645 | + poll_thread.join(timeout=2) |
| 646 | + |
| 647 | + |
458 | 648 | def _format_elapsed(seconds: float) -> str: |
459 | 649 | """Return a compact elapsed-time string for long-running command status.""" |
460 | 650 | seconds_int = max(0, int(seconds)) |
@@ -2077,7 +2267,17 @@ def _helm_install(self) -> None: |
2077 | 2267 |
|
2078 | 2268 | debug_suffix = " --debug" if "--debug" in helm_args else "" |
2079 | 2269 | self.callback.on_log(f"Running: helm upgrade --install {release}{debug_suffix} ...") |
2080 | | - _run_logged(helm_args, step, self.callback, timeout=1200) |
| 2270 | + if self.options.watch_pods: |
| 2271 | + _run_logged_with_pod_summary( |
| 2272 | + helm_args, |
| 2273 | + self._k8s, |
| 2274 | + ns, |
| 2275 | + step, |
| 2276 | + self.callback, |
| 2277 | + timeout=1200, |
| 2278 | + ) |
| 2279 | + else: |
| 2280 | + _run_logged(helm_args, step, self.callback, timeout=1200) |
2081 | 2281 | self._finish_step(step) |
2082 | 2282 |
|
2083 | 2283 | def _patch_gateway(self) -> None: |
|
0 commit comments