Skip to content

Commit 6d681c5

Browse files
committed
fix(workbenches): extend the notebook_pod fixture
* adds a configurable timeout parameter * adds a dependency to require PVC before workbench creation * includes basic pod diagnostic directly into the log output, e.g.: ``` E Pod diagnostics: E Pod phase=Pending, reason=None, message=None E Condition PodScheduled: status=False, reason=Unschedulable, message=0/6 nodes are \ available: persistentvolumeclaim "test-auth-notebook" not found. not found ```
1 parent 39d3462 commit 6d681c5

1 file changed

Lines changed: 104 additions & 11 deletions

File tree

tests/workbenches/conftest.py

Lines changed: 104 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from collections.abc import Generator
2+
from typing import Any
23

34
import pytest
45
from kubernetes.dynamic import DynamicClient
@@ -8,6 +9,7 @@
89
from ocp_resources.pod import Pod
910
from pytest_testconfig import config as py_config
1011
from simple_logger.logger import get_logger
12+
from timeout_sampler import TimeoutExpiredError
1113

1214
from tests.workbenches.utils import get_username
1315
from utilities import constants
@@ -18,6 +20,73 @@
1820
LOGGER = get_logger(name=__name__)
1921

2022

23+
def _read_obj_field(obj: Any, field_name: str, default: Any = None) -> Any:
24+
"""Safely read attribute/key from k8s dynamic objects."""
25+
if isinstance(obj, dict):
26+
return obj.get(field_name, default)
27+
return getattr(obj, field_name, default)
28+
29+
30+
def _format_container_status(container_status: Any, status_prefix: str) -> str:
31+
"""Build one-line diagnostic summary for a container status."""
32+
name = _read_obj_field(obj=container_status, field_name="name", default="<unknown>")
33+
ready = _read_obj_field(obj=container_status, field_name="ready", default=False)
34+
restart_count = _read_obj_field(obj=container_status, field_name="restartCount", default=0)
35+
state = _read_obj_field(obj=container_status, field_name="state", default={})
36+
37+
state_description = "unknown"
38+
details: list[str] = []
39+
for state_name in ("waiting", "terminated", "running"):
40+
state_value = _read_obj_field(obj=state, field_name=state_name, default=None)
41+
if not state_value:
42+
continue
43+
44+
reason = _read_obj_field(obj=state_value, field_name="reason", default=None)
45+
message = _read_obj_field(obj=state_value, field_name="message", default=None)
46+
state_description = state_name if not reason else f"{state_name}({reason})"
47+
if message:
48+
details.append(f"message={message}")
49+
break
50+
51+
details_str = f", {', '.join(details)}" if details else ""
52+
return f"{status_prefix} '{name}': ready={ready}, restarts={restart_count}, state={state_description}{details_str}"
53+
54+
55+
def _collect_notebook_pod_diagnostics(notebook_pod: Pod) -> str:
56+
"""Collect concise pod status details for pytest assertion messages."""
57+
pod_instance = notebook_pod.instance
58+
pod_status = _read_obj_field(obj=pod_instance, field_name="status", default=None)
59+
pod_phase = _read_obj_field(obj=pod_status, field_name="phase", default="Unknown")
60+
pod_reason = _read_obj_field(obj=pod_status, field_name="reason", default=None)
61+
pod_message = _read_obj_field(obj=pod_status, field_name="message", default=None)
62+
63+
lines = [f"Pod phase={pod_phase}, reason={pod_reason}, message={pod_message}"]
64+
65+
pod_conditions = _read_obj_field(obj=pod_status, field_name="conditions", default=[]) or []
66+
lines.extend(
67+
"Condition "
68+
f"{_read_obj_field(obj=condition, field_name='type', default='<unknown>')}: "
69+
f"status={_read_obj_field(obj=condition, field_name='status', default='Unknown')}, "
70+
f"reason={_read_obj_field(obj=condition, field_name='reason', default='')}, "
71+
f"message={_read_obj_field(obj=condition, field_name='message', default='')}"
72+
for condition in pod_conditions
73+
)
74+
75+
init_container_statuses = _read_obj_field(obj=pod_status, field_name="initContainerStatuses", default=[]) or []
76+
lines.extend(
77+
_format_container_status(container_status=container_status, status_prefix="Init container")
78+
for container_status in init_container_statuses
79+
)
80+
81+
container_statuses = _read_obj_field(obj=pod_status, field_name="containerStatuses", default=[]) or []
82+
lines.extend(
83+
_format_container_status(container_status=container_status, status_prefix="Container")
84+
for container_status in container_statuses
85+
)
86+
87+
return "\n".join(lines)
88+
89+
2190
@pytest.fixture(scope="function")
2291
def users_persistent_volume_claim(
2392
request: pytest.FixtureRequest, unprivileged_model_namespace: Namespace, unprivileged_client: DynamicClient
@@ -106,8 +175,13 @@ def default_notebook(
106175
request: pytest.FixtureRequest,
107176
admin_client: DynamicClient,
108177
notebook_image: str,
178+
users_persistent_volume_claim: PersistentVolumeClaim,
109179
) -> Generator[Notebook]:
110-
"""Returns a new Notebook CR for a given namespace, name, and image"""
180+
"""Returns a new Notebook CR for a given namespace, name, and image.
181+
182+
The PVC fixture dependency guarantees the Notebook is created only after
183+
the user PVC exists, avoiding pod scheduling races on claim lookup.
184+
"""
111185
namespace = request.param["namespace"]
112186
name = request.param["name"]
113187

@@ -222,6 +296,7 @@ def default_notebook(
222296

223297
@pytest.fixture(scope="function")
224298
def notebook_pod(
299+
request: pytest.FixtureRequest,
225300
unprivileged_client: DynamicClient,
226301
default_notebook: Notebook,
227302
) -> Pod:
@@ -231,10 +306,11 @@ def notebook_pod(
231306
This fixture:
232307
- Creates a Pod object for the notebook
233308
- Waits for pod to exist
234-
- Waits for pod to reach Ready state (10-minute timeout)
309+
- Waits for pod to reach Ready state (configurable timeout)
235310
- Provides detailed diagnostics on failure
236311
237312
Args:
313+
request: Optional fixture params. Supports {"timeout": <seconds>} via indirect parametrization.
238314
unprivileged_client: Client for interacting with the cluster
239315
default_notebook: The notebook CR to get the pod for
240316
@@ -244,10 +320,13 @@ def notebook_pod(
244320
Raises:
245321
AssertionError: If pod fails to reach Ready state or is not created
246322
"""
323+
params = getattr(request, "param", {})
324+
pod_ready_timeout = params.get("timeout", Timeout.TIMEOUT_10MIN)
325+
247326
# Error messages
248327
_ERR_POD_NOT_READY = (
249-
"Pod '{pod_name}-0' failed to reach Ready state within 10 minutes.\n"
250-
"Pod Phase: {pod_phase}\n"
328+
"Pod '{pod_name}-0' failed to reach Ready state within {timeout_seconds} seconds.\n"
329+
"Pod diagnostics:\n{pod_diagnostics}\n"
251330
"Original Error: {original_error}\n"
252331
"Pod information collected to must-gather directory for debugging."
253332
)
@@ -265,18 +344,32 @@ def notebook_pod(
265344
notebook_pod.wait_for_condition(
266345
condition=Pod.Condition.READY,
267346
status=Pod.Condition.Status.TRUE,
268-
timeout=Timeout.TIMEOUT_10MIN,
347+
timeout=pod_ready_timeout,
269348
)
270-
except (TimeoutError, RuntimeError) as e:
271-
if notebook_pod.exists:
349+
except (TimeoutError, TimeoutExpiredError, RuntimeError) as e:
350+
try:
351+
pod_exists = notebook_pod.exists
352+
except Exception as exists_error: # noqa: BLE001
353+
LOGGER.warning(f"Failed to verify pod existence after timeout: {exists_error}")
354+
pod_exists = False
355+
356+
if pod_exists:
272357
# Collect pod information for debugging purposes (YAML + logs saved to must-gather dir)
273-
collect_pod_information(notebook_pod)
274-
pod_status = notebook_pod.instance.status
275-
pod_phase = pod_status.phase
358+
try:
359+
collect_pod_information(notebook_pod)
360+
except Exception as collect_error: # noqa: BLE001
361+
LOGGER.warning(f"Failed to collect pod artifacts: {collect_error}")
362+
363+
try:
364+
pod_diagnostics = _collect_notebook_pod_diagnostics(notebook_pod=notebook_pod)
365+
except Exception as diagnostics_error: # noqa: BLE001
366+
pod_diagnostics = f"<failed to collect pod diagnostics: {diagnostics_error}>"
367+
276368
raise AssertionError(
277369
_ERR_POD_NOT_READY.format(
278370
pod_name=default_notebook.name,
279-
pod_phase=pod_phase,
371+
timeout_seconds=pod_ready_timeout,
372+
pod_diagnostics=pod_diagnostics,
280373
original_error=e,
281374
)
282375
) from e

0 commit comments

Comments
 (0)