Skip to content

Commit bc2d8c0

Browse files
committed
tmp
1 parent fa972d4 commit bc2d8c0

File tree

3 files changed

+207
-99
lines changed

3 files changed

+207
-99
lines changed

tests/workbenches/notebook-controller/conftest.py

Lines changed: 47 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Generator, Any, Dict
1+
from typing import Generator, Any
22

33
import pytest
44
from kubernetes.dynamic import DynamicClient
@@ -12,30 +12,32 @@
1212
LocalQueue,
1313
ClusterQueue,
1414
ResourceFlavor,
15+
backup_configmap_data,
16+
update_kueue_config_frameworks,
17+
restart_kueue_deployment,
18+
create_kueue_config_patch,
1519
)
1620
from ocp_resources.namespace import Namespace
1721
from ocp_resources.config_map import ConfigMap
18-
from ocp_resources.deployment import Deployment
1922
from ocp_resources.persistent_volume_claim import PersistentVolumeClaim
2023
from ocp_resources.resource import ResourceEditor
21-
from utilities.constants import Labels, Annotations
24+
from simple_logger.logger import get_logger
25+
from utilities.constants import Labels
2226
from utilities.infra import create_ns
2327
from utilities.kueue_detection import (
2428
detect_kueue_installation_scenario,
2529
should_patch_kueue_config,
2630
should_restart_kueue_deployment,
2731
)
28-
import yaml
29-
import logging
3032

31-
LOGGER = logging.getLogger(__name__)
33+
LOGGER = get_logger(name=__name__)
3234

3335

3436
def kueue_resource_groups_for_notebooks(
3537
flavor_name: str,
3638
cpu_quota: str,
3739
memory_quota: str,
38-
) -> list[Dict[str, Any]]:
40+
) -> list[dict[str, Any]]:
3941
"""Create resource groups configuration for Kueue with notebook-specific resources"""
4042
return [
4143
{
@@ -170,103 +172,53 @@ def patched_kueue_manager_config(
170172
yield None
171173
return
172174

173-
namespace = py_config["applications_namespace"]
174-
config_map_name = "kueue-manager-config"
175+
applications_namespace = py_config["applications_namespace"]
175176

176177
# Get the existing ConfigMap
177178
try:
178179
config_map = ConfigMap(
179180
client=admin_client,
180-
name=config_map_name,
181-
namespace=namespace,
181+
name="kueue-manager-config",
182+
namespace=applications_namespace,
182183
ensure_exists=True,
183184
)
184185
except Exception as e:
185-
LOGGER.warning(f"Could not find kueue-manager-config ConfigMap: {e}")
186-
LOGGER.info("This is expected for Red Hat build of Kueue operator scenario")
187-
yield None
188-
return
189-
190-
# Store original data and annotations for restoration
191-
original_data = config_map.instance.data.copy() if config_map.instance.data else {}
192-
original_annotations = (
193-
config_map.instance.metadata.annotations.copy() if config_map.instance.metadata.annotations else {}
194-
)
186+
LOGGER.error(f"Could not find kueue-manager-config ConfigMap: {e}")
187+
LOGGER.error(f"This is unexpected for scenario '{scenario}' - the ConfigMap should exist")
188+
raise RuntimeError(
189+
f"kueue-manager-config ConfigMap not found in scenario '{scenario}'. "
190+
f"This indicates a system configuration issue that needs investigation."
191+
) from e
195192

196-
LOGGER.info("Storing original kueue-manager-config data for restoration")
193+
# Backup original data and annotations using utility function
194+
original_data, original_annotations = backup_configmap_data(config_map=config_map)
197195

198-
# Get current config data
196+
# Get current config data and update it
199197
current_data = config_map.instance.data or {}
200198
config_yaml = current_data.get("controller_manager_config.yaml", "{}")
201199

202-
# Parse the existing configuration
203-
try:
204-
config_dict = yaml.safe_load(config_yaml) or {}
205-
except yaml.YAMLError:
206-
config_dict = {}
207-
208-
# Ensure integrations section exists
209-
if "integrations" not in config_dict:
210-
config_dict["integrations"] = {}
211-
212-
if "frameworks" not in config_dict["integrations"]:
213-
config_dict["integrations"]["frameworks"] = []
214-
215-
# Add pod and statefulset if not already present
216-
frameworks = config_dict["integrations"]["frameworks"]
217-
if "pod" not in frameworks:
218-
frameworks.append("pod")
219-
if "statefulset" not in frameworks:
220-
frameworks.append("statefulset")
221-
222-
# Convert back to YAML
223-
updated_config_yaml = yaml.dump(config_dict, default_flow_style=False)
224-
updated_data = {**current_data, "controller_manager_config.yaml": updated_config_yaml}
225-
226-
# Apply the patch with both data and metadata annotations
227-
patch = {"metadata": {"annotations": {Annotations.OpenDataHubIo.MANAGED: "false"}}, "data": updated_data}
228-
229-
def restart_kueue_deployment(reason: str):
230-
"""Helper function to restart the kueue-controller-manager deployment"""
231-
if not should_restart_kueue_deployment(scenario):
232-
LOGGER.info(f"Skipping kueue-controller-manager deployment restart for scenario: {scenario}")
233-
return
234-
235-
LOGGER.info(f"Restarting kueue-controller-manager deployment - {reason}")
236-
237-
try:
238-
kueue_deployment = Deployment(
239-
client=admin_client,
240-
name="kueue-controller-manager",
241-
namespace=namespace,
242-
ensure_exists=True,
243-
)
244-
245-
# Get current replica count before restart
246-
current_replicas = kueue_deployment.replicas
247-
if current_replicas is None:
248-
current_replicas = 1
249-
LOGGER.info(f"Current kueue-controller-manager replicas: {current_replicas}")
250-
251-
# Restart deployment by scaling to 0 and back to original count
252-
LOGGER.info("Scaling kueue-controller-manager deployment to 0 replicas...")
253-
kueue_deployment.scale_replicas(replica_count=0)
254-
kueue_deployment.wait_for_replicas(deployed=False)
255-
LOGGER.info("kueue-controller-manager deployment scaled down to 0 replicas")
256-
257-
# Now scale back up to original count
258-
LOGGER.info(f"Scaling kueue-controller-manager deployment back to {current_replicas} replicas...")
259-
kueue_deployment.scale_replicas(replica_count=current_replicas)
260-
kueue_deployment.wait_for_replicas(deployed=True)
261-
262-
LOGGER.info(f"kueue-controller-manager deployment restart completed - {reason}")
263-
except Exception as e:
264-
LOGGER.warning(f"Could not restart kueue-controller-manager deployment: {e}")
265-
LOGGER.info("This is expected for Red Hat build of Kueue operator scenario")
200+
# Update configuration to add pod and statefulset frameworks
201+
updated_config_yaml = update_kueue_config_frameworks(
202+
config_yaml=config_yaml,
203+
frameworks_to_add=["pod", "statefulset"],
204+
)
205+
206+
# Create patch using utility function
207+
patch = create_kueue_config_patch(
208+
current_data=current_data,
209+
updated_config_yaml=updated_config_yaml,
210+
managed_annotation="false",
211+
)
266212

267213
with ResourceEditor(patches={config_map: patch}):
268214
# After patching the ConfigMap, restart the deployment to pick up new configuration
269-
restart_kueue_deployment(reason="to apply patched configuration")
215+
restart_kueue_deployment(
216+
client=admin_client,
217+
namespace=applications_namespace,
218+
reason="to apply patched configuration",
219+
should_restart_func=should_restart_kueue_deployment,
220+
scenario=scenario,
221+
)
270222
yield config_map
271223

272224
# Teardown: Restore original configuration and restart deployment
@@ -277,5 +229,11 @@ def restart_kueue_deployment(reason: str):
277229

278230
with ResourceEditor(patches={config_map: restore_patch}):
279231
# Restart deployment to pick up the restored original configuration
280-
restart_kueue_deployment(reason="to restore original configuration")
232+
restart_kueue_deployment(
233+
client=admin_client,
234+
namespace=applications_namespace,
235+
reason="to restore original configuration",
236+
should_restart_func=should_restart_kueue_deployment,
237+
scenario=scenario,
238+
)
281239
LOGGER.info("Original kueue-manager-config configuration restored successfully")

utilities/kueue_detection.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,6 @@ def detect_kueue_installation_scenario(client: DynamicClient) -> KueueInstallati
111111
return KueueInstallationScenario.RHOAI_MANAGED
112112

113113
# Scenario 2: Red Hat build of Kueue operator + DSC Kueue = Unmanaged
114-
# TODO: shall we also apply in case it's removed completely?
115114
elif rh_kueue_operator_installed and dsc_kueue_state == DscComponents.ManagementState.UNMANAGED:
116115
LOGGER.info("Detected scenario: Red Hat build of Kueue operator with RHOAI kueue component unmanaged")
117116
return KueueInstallationScenario.RHOAI_UNMANAGED

0 commit comments

Comments
 (0)