@@ -282,9 +282,16 @@ def _docker_server_platform() -> str:
282282 return f"linux/{ arch } "
283283
284284
285+ def _docker_image_id (image : str ) -> str :
286+ """Return Docker's local image ID for an image reference."""
287+ result = _run (["docker" , "image" , "inspect" , "--format" , "{{.Id}}" , image ], check = True )
288+ return result .stdout .strip ()
289+
290+
285291_DEFAULT_IMAGE_REGISTRY = "nvcr.io/nvidian/cfa"
286292_CNPG_OPERATOR_IMAGE_TAG = "1.29.0"
287293_PROMETHEUS_OPERATOR_CRDS_CHART_VERSION = "28.0.1"
294+ _KIND_PRELOAD_IMAGES_ENV = "NVCM_KIND_PRELOAD_IMAGES"
288295_KIND_PRELOAD_IMAGES = (LOADER_POD_IMAGE ,)
289296_KIND_PRELOAD_PLATFORM_IMAGES : dict [str , dict [str , str ]] = {
290297 LOADER_POD_IMAGE : {
@@ -299,6 +306,36 @@ def _kind_preload_source_image(image: str, platform_name: str) -> str:
299306 return _KIND_PRELOAD_PLATFORM_IMAGES .get (image , {}).get (platform_name , image )
300307
301308
309+ def _env_kind_preload_images () -> list [str ]:
310+ """Return additional Kind preload images from the environment."""
311+ value = os .environ .get (_KIND_PRELOAD_IMAGES_ENV , "" )
312+ return [part for part in value .replace ("," , " " ).split () if part ]
313+
314+
315+ def _kind_preload_images (config : NVConfigManagerInstallConfig ) -> list [str ]:
316+ """Return the effective ordered list of images to preload into Kind."""
317+ images : list [str ] = []
318+ seen : set [str ] = set ()
319+ for image in (
320+ * _KIND_PRELOAD_IMAGES ,
321+ * config .images .kind_preload_images ,
322+ * _env_kind_preload_images (),
323+ ):
324+ image = image .strip ()
325+ if not image or image in seen :
326+ continue
327+ seen .add (image )
328+ images .append (image )
329+ return images
330+
331+
332+ def _kind_node_names (cluster : str ) -> list [str ]:
333+ """Return Docker container names for nodes in a Kind cluster."""
334+ result = _run (["kind" , "get" , "nodes" , "--name" , cluster ], check = True )
335+ nodes = [line .strip () for line in result .stdout .splitlines () if line .strip ()]
336+ return nodes or [f"{ cluster } -control-plane" ]
337+
338+
302339def _strip_image_registry (repository : str ) -> str :
303340 """Return repository path with any registry host removed."""
304341 first , sep , rest = repository .partition ("/" )
@@ -457,6 +494,90 @@ def _run_logged(
457494 return subprocess .CompletedProcess (cmd , proc .returncode , stdout_text , stderr_text )
458495
459496
497+ def _run_logged_pipe (
498+ source_cmd : list [str ],
499+ sink_cmd : list [str ],
500+ step : DeployStep ,
501+ callback : DeployCallback ,
502+ * ,
503+ check : bool = True ,
504+ timeout : int | None = 600 ,
505+ ) -> subprocess .CompletedProcess [str ]:
506+ """Run source_cmd with stdout piped to sink_cmd, streaming command output."""
507+ source_str = " " .join (source_cmd [:4 ]) + ("..." if len (source_cmd ) > 4 else "" )
508+ sink_str = " " .join (sink_cmd [:4 ]) + ("..." if len (sink_cmd ) > 4 else "" )
509+ callback .on_log (f"$ { source_str } | { sink_str } " )
510+
511+ source = subprocess .Popen (source_cmd , stdout = subprocess .PIPE , stderr = subprocess .PIPE )
512+ sink = subprocess .Popen (
513+ sink_cmd ,
514+ stdin = source .stdout ,
515+ stdout = subprocess .PIPE ,
516+ stderr = subprocess .PIPE ,
517+ )
518+ if source .stdout :
519+ source .stdout .close ()
520+
521+ stdout_lines : list [str ] = []
522+ stderr_lines : list [str ] = []
523+ source_stderr_lines : list [str ] = []
524+ deadline = time .monotonic () + timeout if timeout else None
525+ sel = selectors .DefaultSelector ()
526+ try :
527+ if source .stderr :
528+ sel .register (source .stderr , selectors .EVENT_READ , "source-stderr" )
529+ if sink .stdout :
530+ sel .register (sink .stdout , selectors .EVENT_READ , "sink-stdout" )
531+ if sink .stderr :
532+ sel .register (sink .stderr , selectors .EVENT_READ , "sink-stderr" )
533+
534+ while sel .get_map ():
535+ for proc , cmd in ((source , source_cmd ), (sink , sink_cmd )):
536+ _check_deadline (deadline , proc , cmd , timeout )
537+ wait = None if deadline is None else max (0.0 , deadline - time .monotonic ())
538+ for key , _ in sel .select (timeout = wait ):
539+ line_bytes = key .fileobj .readline () # type: ignore[union-attr]
540+ if not line_bytes :
541+ sel .unregister (key .fileobj )
542+ continue
543+ line = line_bytes .decode (errors = "replace" ).rstrip ("\n " )
544+ step .output .append (line )
545+ callback .on_log (line )
546+ if key .data == "sink-stdout" :
547+ stdout_lines .append (line )
548+ elif key .data == "sink-stderr" :
549+ stderr_lines .append (line )
550+ else :
551+ source_stderr_lines .append (line )
552+
553+ source .wait ()
554+ sink .wait ()
555+ except BaseException :
556+ source .kill ()
557+ sink .kill ()
558+ source .wait ()
559+ sink .wait ()
560+ raise
561+ finally :
562+ sel .close ()
563+
564+ stdout_text = "\n " .join (stdout_lines )
565+ stderr_text = "\n " .join (stderr_lines )
566+ source_stderr_text = "\n " .join (source_stderr_lines )
567+
568+ if check and sink .returncode != 0 :
569+ raise subprocess .CalledProcessError (sink .returncode , sink_cmd , stdout_text , stderr_text )
570+ if check and source .returncode != 0 :
571+ raise subprocess .CalledProcessError (
572+ source .returncode ,
573+ source_cmd ,
574+ "" ,
575+ source_stderr_text ,
576+ )
577+
578+ return subprocess .CompletedProcess (sink_cmd , sink .returncode , stdout_text , stderr_text )
579+
580+
460581def _short_pod_text (value : str , * , limit : int = 140 ) -> str :
461582 """Return a compact single-line pod status detail."""
462583 value = " " .join (value .split ())
@@ -1193,10 +1314,13 @@ def _load_kind(self) -> None:
11931314 timeout = 300 ,
11941315 )
11951316 platform_name = _docker_server_platform ()
1196- for img in _KIND_PRELOAD_IMAGES :
1317+ for img in _kind_preload_images ( self . config ) :
11971318 # Pull arch-scoped official images when available. Docker can keep
11981319 # multi-platform tags as manifest lists even after --platform,
1199- # which makes kind's image import fail on missing platform content.
1320+ # which makes kind's default --all-platforms import fail on missing
1321+ # platform content. Import helper images directly into node
1322+ # containerd with the selected platform instead. Avoid ctr's
1323+ # digest refs here; the tag is what Kubernetes needs to resolve.
12001324 source_img = _kind_preload_source_image (img , platform_name )
12011325 self .callback .on_log (
12021326 f"Pulling helper image { source_img } for platform { platform_name } ..."
@@ -1207,23 +1331,50 @@ def _load_kind(self) -> None:
12071331 self .callback ,
12081332 timeout = 300 ,
12091333 )
1210- if source_img != img :
1211- self .callback .on_log (f"Tagging helper image { source_img } as { img } ..." )
1212- _run_logged (
1213- ["docker" , "tag" , source_img , img ],
1214- step ,
1215- self .callback ,
1216- timeout = 300 ,
1217- )
1218- self .callback .on_log (f"Loading helper image { img } into Kind cluster { cluster } ..." )
1334+ image_id = _docker_image_id (source_img )
1335+ self .callback .on_log (f"Tagging selected-platform helper image { image_id } as { img } ..." )
12191336 _run_logged (
1220- ["kind " , "load " , "docker-image" , img , "--name" , cluster ],
1337+ ["docker " , "tag " , image_id , img ],
12211338 step ,
12221339 self .callback ,
12231340 timeout = 300 ,
12241341 )
1342+ self ._load_kind_helper_image (img , cluster , platform_name , step )
12251343 self ._finish_step (step )
12261344
1345+ def _load_kind_helper_image (
1346+ self ,
1347+ image : str ,
1348+ cluster : str ,
1349+ platform_name : str ,
1350+ step : DeployStep ,
1351+ ) -> None :
1352+ """Load a helper image into Kind node containerd for one platform."""
1353+ nodes = _kind_node_names (cluster )
1354+ for node in nodes :
1355+ self .callback .on_log (f"Loading helper image { image } into Kind node { node } ..." )
1356+ _run_logged_pipe (
1357+ ["docker" , "save" , image ],
1358+ [
1359+ "docker" ,
1360+ "exec" ,
1361+ "--privileged" ,
1362+ "-i" ,
1363+ node ,
1364+ "ctr" ,
1365+ "--namespace=k8s.io" ,
1366+ "images" ,
1367+ "import" ,
1368+ "--platform" ,
1369+ platform_name ,
1370+ "--snapshotter=overlayfs" ,
1371+ "-" ,
1372+ ],
1373+ step ,
1374+ self .callback ,
1375+ timeout = 300 ,
1376+ )
1377+
12271378 def _operator_bundle_root (self ) -> Path | None :
12281379 """Return the airgap bundle root if local charts/manifests are available."""
12291380 chart_dir = Path (self .options .chart_dir ).expanduser ()
0 commit comments