Skip to content

Commit 577cba2

Browse files
sameerdattavCopilotandreyvelich
authored
chore: Confirm that a public ConfigMap exists to check version (#250)
* Confirm that a public ConfigMap exists to check version Signed-off-by: Surya Sameer Datta Vaddadi <f20220373@goa.bits-pilani.ac.in> * python 3.9 fix Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Surya Sameer Datta Vaddadi <137607947+sameerdattav@users.noreply.github.com> * Exceptiom handling better Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Surya Sameer Datta Vaddadi <137607947+sameerdattav@users.noreply.github.com> * Addressing comments Signed-off-by: Surya Sameer Datta Vaddadi <f20220373@goa.bits-pilani.ac.in> * Update kubeflow/trainer/backends/kubernetes/backend.py Co-authored-by: Andrey Velichkevich <andrey.velichkevich@gmail.com> Signed-off-by: Surya Sameer Datta Vaddadi <137607947+sameerdattav@users.noreply.github.com> * Refactored tests into a single function and followed agents.md Signed-off-by: Surya Sameer Datta Vaddadi <f20220373@goa.bits-pilani.ac.in> * CI friendly edit Signed-off-by: Surya Sameer Datta Vaddadi <f20220373@goa.bits-pilani.ac.in> * pre-commit format checked Signed-off-by: Surya Sameer Datta Vaddadi <f20220373@goa.bits-pilani.ac.in> * Modified according to new updates Signed-off-by: Surya Sameer Datta Vaddadi <f20220373@goa.bits-pilani.ac.in> * Ran pre-commit locally to fix formatting Signed-off-by: Surya Sameer Datta Vaddadi <f20220373@goa.bits-pilani.ac.in> * unix2dos CLAUDE.md Signed-off-by: Surya Sameer Datta Vaddadi <f20220373@goa.bits-pilani.ac.in> * Revert CLAUDE.md Signed-off-by: Surya Sameer Datta Vaddadi <f20220373@goa.bits-pilani.ac.in> --------- Signed-off-by: Surya Sameer Datta Vaddadi <f20220373@goa.bits-pilani.ac.in> Signed-off-by: Surya Sameer Datta Vaddadi <137607947+sameerdattav@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Andrey Velichkevich <andrey.velichkevich@gmail.com>
1 parent d1bfed8 commit 577cba2

File tree

2 files changed

+134
-3
lines changed

2 files changed

+134
-3
lines changed

kubeflow/trainer/backends/kubernetes/backend.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import copy
1717
import logging
1818
import multiprocessing
19+
import os
1920
import random
2021
import re
2122
import string
@@ -56,6 +57,33 @@ def __init__(self, cfg: KubernetesBackendConfig):
5657

5758
self.namespace = cfg.namespace
5859

60+
# Perform control-plane version metadata verification.
61+
self.verify_backend()
62+
63+
def verify_backend(self) -> None:
64+
"""Verify that the Trainer control plane exposes version metadata.
65+
66+
This check only ensures that the public control-plane ConfigMap exists
67+
and contains a ``kubeflow_trainer_version`` field. It does not
68+
enforce version compatibility and never raises.
69+
"""
70+
71+
system_namespace = os.getenv("KUBEFLOW_SYSTEM_NAMESPACE", "kubeflow-system")
72+
config_map_name = "kubeflow-trainer-public"
73+
74+
try:
75+
_ = self.core_api.read_namespaced_config_map(
76+
name=config_map_name,
77+
namespace=system_namespace,
78+
).data["kubeflow_trainer_version"]
79+
except Exception as e: # noqa: BLE001
80+
logger.warning(
81+
"Trainer control-plane version info is not available: "
82+
f"unable to read 'kubeflow_trainer_version' from ConfigMap "
83+
f"'{config_map_name}' in namespace '{system_namespace}': {e}"
84+
)
85+
return
86+
5987
def list_runtimes(self) -> list[types.Runtime]:
6088
result = []
6189
try:

kubeflow/trainer/backends/kubernetes/backend_test.py

Lines changed: 106 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,15 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
"""
16-
Unit tests for the KubernetesBackend class in the Kubeflow Trainer SDK.
15+
"""Unit tests for the KubernetesBackend class in the Kubeflow Trainer SDK.
1716
1817
This module uses pytest and unittest.mock to simulate Kubernetes API interactions.
19-
It tests KubernetesBackend's behavior across job listing, resource creation etc
18+
It tests KubernetesBackend's behavior across job listing, resource creation etc.
2019
"""
2120

2221
from dataclasses import asdict
2322
import datetime
23+
import logging
2424
import multiprocessing
2525
import random
2626
import string
@@ -268,6 +268,22 @@ def get_custom_trainer_container(
268268
)
269269

270270

271+
def _build_core_api_mock(
272+
config_map_data: Optional[dict] = None,
273+
error: Optional[Exception] = None,
274+
):
275+
"""Helper to construct a CoreV1Api mock for version checks."""
276+
277+
core_api = Mock()
278+
279+
if error is not None:
280+
core_api.read_namespaced_config_map.side_effect = error
281+
else:
282+
core_api.read_namespaced_config_map.return_value = Mock(data=config_map_data)
283+
284+
return core_api
285+
286+
271287
def get_builtin_trainer(
272288
args: list[str],
273289
) -> models.TrainerV1alpha1Trainer:
@@ -653,6 +669,93 @@ def get_train_job_data_type(
653669
)
654670

655671

672+
def _run_verify_backend_with_core_api(core_api: Mock) -> tuple[list[str], int]:
673+
"""Helper to run verify_backend and capture warning logs."""
674+
675+
logger_name = "kubeflow.trainer.backends.kubernetes.backend"
676+
logger_obj = logging.getLogger(logger_name)
677+
678+
class _ListHandler(logging.Handler):
679+
def __init__(self) -> None:
680+
super().__init__()
681+
self.records: list[logging.LogRecord] = []
682+
683+
def emit(self, record: logging.LogRecord) -> None: # type: ignore[override]
684+
self.records.append(record)
685+
686+
handler = _ListHandler()
687+
logger_obj.addHandler(handler)
688+
previous_level = logger_obj.level
689+
logger_obj.setLevel(logging.WARNING)
690+
691+
try:
692+
with (
693+
patch("kubernetes.config.load_kube_config", return_value=None),
694+
patch("kubeflow.common.utils.is_running_in_k8s", return_value=False),
695+
patch("kubernetes.client.ApiClient", return_value=Mock()),
696+
patch("kubernetes.client.CustomObjectsApi", return_value=Mock()),
697+
patch("kubernetes.client.CoreV1Api", return_value=core_api),
698+
):
699+
KubernetesBackend(KubernetesBackendConfig())
700+
finally:
701+
logger_obj.removeHandler(handler)
702+
logger_obj.setLevel(previous_level)
703+
704+
messages = [record.getMessage() for record in handler.records]
705+
call_count = core_api.read_namespaced_config_map.call_count
706+
return messages, call_count
707+
708+
709+
@pytest.mark.parametrize(
710+
"test_case",
711+
[
712+
TestCase(
713+
name="version metadata present",
714+
expected_status=SUCCESS,
715+
config={
716+
"core_api": _build_core_api_mock({"kubeflow_trainer_version": "1.2.3"}),
717+
"expect_warning": False,
718+
},
719+
),
720+
TestCase(
721+
name="ConfigMap read error logs warning",
722+
expected_status=SUCCESS,
723+
config={
724+
"core_api": _build_core_api_mock(None, Exception("ConfigMap not found")),
725+
"expect_warning": True,
726+
"must_contain": [
727+
"Trainer control-plane version info is not available",
728+
"kubeflow-trainer-public",
729+
"ConfigMap not found",
730+
],
731+
},
732+
),
733+
],
734+
)
735+
def test_verify_backend(test_case):
736+
"""Test KubernetesBackend.verify_backend across version metadata scenarios."""
737+
738+
print("Executing test:", test_case.name)
739+
740+
core_api: Mock = test_case.config["core_api"]
741+
expect_warning: bool = test_case.config.get("expect_warning", False)
742+
must_contain: list[str] = test_case.config.get("must_contain", [])
743+
744+
warnings, call_count = _run_verify_backend_with_core_api(core_api)
745+
combined = "\n".join(warnings)
746+
747+
assert call_count >= 1
748+
749+
if expect_warning:
750+
assert warnings, "Expected warning logs but found none"
751+
for text in must_contain:
752+
assert text in combined
753+
else:
754+
assert "Trainer control-plane version info is not available" not in combined
755+
756+
print("test execution complete")
757+
758+
656759
# --------------------------
657760
# Tests
658761
# --------------------------

0 commit comments

Comments
 (0)