opendatahub-io · zdtsw · Mar 4, 2026 · Feb 25, 2026 · coderabbitai · Feb 25, 2026
diff --git a/validation/Containerfile b/validation/Containerfile
@@ -1,13 +1,12 @@
-ARG BASEIMAGE=registry.fedoraproject.org/fedora:latest
-FROM ${BASEIMAGE}
+FROM registry.access.redhat.com/ubi9/ubi-minimal:9.5
 
-RUN source /etc/os-release && \
-    if [ "${PLATFORM_ID}" == "platform:el9" ]; then dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm; fi && \
-    if [ "${PLATFORM_ID}" == "platform:el10" ]; then dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-10.noarch.rpm; fi
+RUN microdnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
+    microdnf install -y python3 python3-configargparse python3-kubernetes && \
+    microdnf clean all
 
-RUN dnf install -y python3-configargparse python3-kubernetes python3-pip python3-build
+COPY llmd_xks_preflight.py /opt/llmd-xks-preflight/
 
-COPY . /root/src
-RUN python3 -m build /root/src -w -o /root/src && python3 -m pip install --no-deps /root/src/*.whl && rm -rf /root/src
+RUN useradd -r -u 1001 -g 0 preflight
+USER 1001
 
-ENTRYPOINT ["/usr/local/bin/llmd-xks-preflight"]
+ENTRYPOINT ["python3", "/opt/llmd-xks-preflight/llmd_xks_preflight.py"]
diff --git a/validation/Makefile b/validation/Makefile
@@ -1,57 +1,62 @@
 # Configurable settings
-MAX_LINE_LENGTH ?= 120
 CONTAINER_REPO ?= localhost/llmd-xks-checks
 CONTAINER_TAG ?= latest
-CONTAINER_TOOL ?= podman
+CONTAINER_TOOL ?= $(shell command -v podman >/dev/null 2>&1 && echo podman || echo docker)
 HOST_KUBECONFIG ?= ~/.kube/config
-FROM ?= registry.fedoraproject.org/fedora:latest
 
-.PHONY: help container run push lint pep8-fix
+# SUITE can be set to "cluster" or "operators", defaults to "all"
+SUITE ?= all
+
+# SELinux label for volume mounts (only needed for podman)
+VOLUME_OPTS ?= $(shell [ "$(CONTAINER_TOOL)" = "podman" ] && echo ":ro,Z" || echo ":ro")
+
+# CONFIG can be set to a config file path to mount into the container
+CONFIG ?=
+# Config mount and argument (only if CONFIG is set)
+CONFIG_MOUNT ?= $(if $(CONFIG),--volume $(CONFIG):/tmp/config.conf$(VOLUME_OPTS),)
+CONFIG_ARG ?= $(if $(CONFIG),--config /tmp/config.conf,)
+
+.PHONY: help image run push lint pep8-fix
 
 help:
 	@echo "Available targets:"
-	@echo "  container     Build a container image from the current directory"
-	@echo "  run           Run the container image with all tests"
-	@echo "  run-cluster   Run the container image with cluster readiness tests"
-	@echo "  run-operators Run the container image with operators readiness tests"
-	@echo "  push          Push the container image to the container registry"
+	@echo "  image         Build a container image from the current directory"
+	@echo "  run           Run the image with tests (use SUITE=cluster|operators|all)"
+	@echo "  push          Push the image to the container registry"
 	@echo "  lint          Check code for PEP8 compliance"
 	@echo "  pep8-fix      Automatically fix PEP8 compliance issues"
 	@echo ""
 	@echo "Configuration settings (all can be overridden by using environment variables):"
-	@echo "  MAX_LINE_LENGTH=$(MAX_LINE_LENGTH) Python linter line length"
 	@echo "  CONTAINER_REPO=$(CONTAINER_REPO) Container repository tag to use for build and run"
 	@echo "  CONTAINER_TAG=$(CONTAINER_TAG) Container tag to use for build and run"
 	@echo "  CONTAINER_TOOL=$(CONTAINER_TOOL) Container tool to use for build and run"
 	@echo "  HOST_KUBECONFIG=$(HOST_KUBECONFIG) Path to kubeconfig for container run"
-	@echo "  FROM=$(FROM) Base image to use for the container build"
+	@echo "  SUITE=$(SUITE) Test suite to run (all, cluster, operators)"
+	@echo "  CONFIG=$(CONFIG) Path to config file to mount into the container"
 
 
 # Build a container image from the current directory
-container:
-	$(CONTAINER_TOOL) build $(FROM:%=--build-arg BASEIMAGE=%) --tag $(CONTAINER_REPO):$(CONTAINER_TAG) .
+image:
+	$(CONTAINER_TOOL) build --tag $(CONTAINER_REPO):$(CONTAINER_TAG) .
 
-# Run the container image with all tests
+# Run the container image with tests
 run:
-	$(CONTAINER_TOOL) run --rm -it --volume $(HOST_KUBECONFIG):/root/.kube/config:ro,Z $(CONTAINER_REPO):$(CONTAINER_TAG)
-
-# Run the container image with cluster readiness tests
-run-cluster:
-	$(CONTAINER_TOOL) run --rm -it --volume $(HOST_KUBECONFIG):/root/.kube/config:ro,Z $(CONTAINER_REPO):$(CONTAINER_TAG) -s cluster
-
-# Run the container image with operators readiness tests
-run-operators:
-	$(CONTAINER_TOOL) run --rm -it --volume $(HOST_KUBECONFIG):/root/.kube/config:ro,Z $(CONTAINER_REPO):$(CONTAINER_TAG) -s operators
+	$(CONTAINER_TOOL) run --rm -it --volume $(HOST_KUBECONFIG):/tmp/kubeconfig$(VOLUME_OPTS) $(CONFIG_MOUNT) -e KUBECONFIG=/tmp/kubeconfig $(CONTAINER_REPO):$(CONTAINER_TAG) -s $(SUITE) $(CONFIG_ARG)
 
 
 # Push the container image to the container registry
 push:
 	$(CONTAINER_TOOL) push $(CONTAINER_REPO):$(CONTAINER_TAG)
 
+# Linting settings
+MAX_LINE_LENGTH ?= 120
+
 # Check code for PEP8 compliance
 lint:
+	@command -v flake8 >/dev/null 2>&1 || pip install flake8
 	flake8 --max-line-length=$(MAX_LINE_LENGTH) --exclude=build .
 
 # Automatically fix PEP8 compliance issues
 pep8-fix:
+	@command -v autopep8 >/dev/null 2>&1 || pip install autopep8
 	autopep8 --max-line-length=$(MAX_LINE_LENGTH) --in-place --recursive .
diff --git a/validation/README.md b/validation/README.md
@@ -15,6 +15,7 @@ A CLI application for running validation checks against Kubernetes clusters in t
 | Cloud provider | Managed K8s Service |
 | -------------- | ------------------- |
 | [Azure](https://azure.microsoft.com) | [AKS](https://azure.microsoft.com/en-us/products/kubernetes-service) |
+<!-- | [CoreWeave](https://coreweave.com)   | [CKS](https://coreweave.com/products/coreweave-kubernetes-service) | (coming soon) -->
 
 
 ## Container image build
@@ -24,33 +25,29 @@ This tool can be packaged and run as a container image and a Containerfile is pr
 In order to build a container locally:
 
 ```bash
-make container
+make image
 ```
 
-By default, the container is built on top of latest Fedora container image. If you have an **entitled Red Hat Enterprise Linux system**, you can use UBI9 (Universal Basic Image) as the base:
+The container is built on top of UBI9 (Universal Base Image 9.5).
 
-```bash
-FROM=registry.access.redhat.com/ubi9:latest make container
-```
-
-Notes:
-  * currently, only UBI version 9 (based on Red Hat Enterprise Linux 9) is supported
-  * while the base image itself can be pulled without registration, the container image will not build without a valid Red Hat entitlement -- if you are running a registered RHEL system, the entitlement is automatically passed to the container at build time
-
-Regardless of base image, the resulting container image repository (name) and tag can be customized by using `CONTAINER_REPO` and `CONTAINER_TAG` environment variables:
+The resulting container image repository (name) and tag can be customized by using `CONTAINER_REPO` and `CONTAINER_TAG` environment variables:
 
 ```bash
-CONTAINER_REPO=quay.io/myusername/llm-d-xks-preflight CONTAINER_TAG=mytag make container
-FROM=registry.access.redhat.com/ubi9:latest CONTAINER_REPO=quay.io/myusername/llm-d-xks-preflight CONTAINER_TAG=mytag make container
+CONTAINER_REPO=quay.io/myusername/llm-d-xks-preflight CONTAINER_TAG=mytag make image
 ```
 
 ## Container image run
 
 After building the container image as described above, a helper script to run the validations against a Kubernetes cluster is available:
 
 ```bash
-# using defaults
+# run all tests
 make run
+
+# run specific test suite (cluster or operators)
+SUITE=cluster make run
+SUITE=operators make run
+
 # if the image name and tag have been customized
 CONTAINER_REPO=quay.io/myusername/llm-d-xks-preflight CONTAINER_TAG=mytag make run
 ```
@@ -118,7 +115,7 @@ The application automatically looks for config files in the following locations
 
 You can also specify a custom config file:
 ```bash
-python llmd_xks_checks.py --config /path/to/config.conf
+CONFIG=/path/to/config.conf make run
 ```
 
 Example config file:
@@ -132,4 +129,5 @@ cloud_provider = azure
 
 - `LLMD_XKS_LOG_LEVEL`: Log level (same choices as `--log-level`)
 - `LLMD_XKS_CLOUD_PROVIDER`: Cloud provider (choices: auto, azure)
+- `LLMD_XKS_SUITE`: Test suite to run (choices: all(default), cluster, operators)
 - `KUBECONFIG`: Path to kubeconfig file (standard Kubernetes environment variable)
diff --git a/validation/llmd_xks_checks.py → validation/llmd_xks_preflight.py b/validation/llmd_xks_checks.py → validation/llmd_xks_preflight.py
@@ -40,90 +40,90 @@ def __init__(self, **kwargs):
         self.crds_cache = None
 
         self.tests = {
-                "cluster": {
-                    "description": "Cluster readiness tests",
-                    "tests": [
-                        {
-                            "name": "instance_type",
-                            "function": self.test_instance_type,
-                            "description": "Test if the cluster has at least one supported instance type",
-                            "suggested_action": "Provision a cluster with at least one supported instance type",
-                            "result": False
-                        },
-                        {
-                            "name": "gpu_availability",
-                            "function": self.test_gpu_availability,
-                            "description": "Test if the cluster has GPU drivers",
-                            "suggested_action": "Provision a cluster with at least one supported GPU driver",
-                            "result": False
-                        },
-                        ]
+            "cluster": {
+                "description": "Cluster readiness tests",
+                "tests": [
+                    {
+                        "name": "instance_type",
+                        "function": self.test_instance_type,
+                        "description": "Test if the cluster has at least one supported instance type",
+                        "suggested_action": "Provision a cluster with at least one supported instance type",
+                        "result": False
                     },
-                "operators": {
-                    "description": "Operators readiness tests",
-                    "tests": [
-                        {
-                            "name": "crd_certmanager",
-                            "function": self.test_crd_certmanager,
-                            "description": "test if the cluster has the cert-manager crds",
-                            "suggested_action": "install cert-manager",
-                            "result": False
-                        },
-                        {
-                            "name": "operator_certmanager",
-                            "function": self.test_operator_certmanager,
-                            "description": "test if the cert-manager operator is running properly",
-                            "suggested_action": "install or verify cert-manager deployment",
-                            "result": False
-                        },
-                        {
-                            "name": "crd_sailoperator",
-                            "function": self.test_crd_sailoperator,
-                            "description": "test if the cluster has the sailoperator crds",
-                            "suggested_action": "install sail-operator",
-                            "result": False
-                        },
-                        {
-                            "name": "operator_sail",
-                            "function": self.test_operator_sail,
-                            "description": "test if the sail operator is running properly",
-                            "suggested_action": "install or verify sail operator deployment",
-                            "result": False
-                        },
-                        {
-                            "name": "crd_lwsoperator",
-                            "function": self.test_crd_lwsoperator,
-                            "description": "test if the cluster has the lws-operator crds",
-                            "suggested_action": "install lws-operator",
-                            "result": False,
-                            "optional": True
-                        },
-                        {
-                            "name": "operator_lws",
-                            "function": self.test_operator_lws,
-                            "description": "test if the lws-operator is running properly",
-                            "suggested_action": "install or verify lws operator deployment",
-                            "result": False,
-                            "optional": True
-                        },
-                        {
-                            "name": "crd_kserve",
-                            "function": self.test_crd_kserve,
-                            "description": "test if the cluster has the kserve crds",
-                            "suggested_action": "install kserve",
-                            "result": False,
-                            "optional": False
-                        },
-                        {
-                            "name": "operator_kserve",
-                            "function": self.test_operator_kserve,
-                            "description": "test if the kserve controller is running properly",
-                            "suggested_action": "install or verify kserve deployment",
-                            "result": False,
-                        },
-                    ]
-                    }
+                    {
+                        "name": "gpu_availability",
+                        "function": self.test_gpu_availability,
+                        "description": "Test if the cluster has GPU drivers",
+                        "suggested_action": "Provision a cluster with at least one supported GPU driver",
+                        "result": False
+                    },
+                ]
+            },
+            "operators": {
+                "description": "Operators readiness tests",
+                "tests": [
+                    {
+                        "name": "crd_certmanager",
+                        "function": self.test_crd_certmanager,
+                        "description": "test if the cluster has the cert-manager crds",
+                        "suggested_action": "install cert-manager",
+                        "result": False
+                    },
+                    {
+                        "name": "operator_certmanager",
+                        "function": self.test_operator_certmanager,
+                        "description": "test if the cert-manager operator is running properly",
+                        "suggested_action": "install or verify cert-manager deployment",
+                        "result": False
+                    },
+                    {
+                        "name": "crd_sailoperator",
+                        "function": self.test_crd_sailoperator,
+                        "description": "test if the cluster has the sailoperator crds",
+                        "suggested_action": "install sail-operator",
+                        "result": False
+                    },
+                    {
+                        "name": "operator_sail",
+                        "function": self.test_operator_sail,
+                        "description": "test if the sail operator is running properly",
+                        "suggested_action": "install or verify sail operator deployment",
+                        "result": False
+                    },
+                    {
+                        "name": "crd_lwsoperator",
+                        "function": self.test_crd_lwsoperator,
+                        "description": "test if the cluster has the lws-operator crds",
+                        "suggested_action": "install lws-operator",
+                        "result": False,
+                        "optional": True
+                    },
+                    {
+                        "name": "operator_lws",
+                        "function": self.test_operator_lws,
+                        "description": "test if the lws-operator is running properly",
+                        "suggested_action": "install or verify lws operator deployment",
+                        "result": False,
+                        "optional": True
+                    },
+                    {
+                        "name": "crd_kserve",
+                        "function": self.test_crd_kserve,
+                        "description": "test if the cluster has the kserve crds",
+                        "suggested_action": "install kserve",
+                        "result": False,
+                        "optional": False
+                    },
+                    {
+                        "name": "operator_kserve",
+                        "function": self.test_operator_kserve,
+                        "description": "test if the kserve controller is running properly",
+                        "suggested_action": "install or verify kserve deployment",
+                        "result": False,
+                    },
+                ]
             }
+        }
 
     def _log_init(self):
         logger = logging.getLogger(__name__)
@@ -167,7 +167,7 @@ def _test_crds_present(self, required_crds):
     def _deployment_ready(self, namespace_name, deployment_name):
         try:
             deployment = self.k8s_client.AppsV1Api().read_namespaced_deployment(
-                    name=deployment_name, namespace=namespace_name)
+                name=deployment_name, namespace=namespace_name)
         except Exception as e:
             self.logger.error(f"{e}")
             return False
@@ -278,13 +278,13 @@ def nvidia_driver_present(node):
                     return True
                 else:
                     self.logger.warning(
-                            f"No allocatable NVIDIA GPUs on node {node.metadata.name}"
-                            " - no NVIDIA GPU drivers present")
+                        f"No allocatable NVIDIA GPUs on node {node.metadata.name}"
+                        " - no NVIDIA GPU drivers present")
                     return False
             else:
                 self.logger.warning(
-                        f"No NVIDIA GPU drivers present on node {node.metadata.name}"
-                        " - no NVIDIA GPU accelerators present")
+                    f"No NVIDIA GPU drivers present on node {node.metadata.name}"
+                    " - no NVIDIA GPU accelerators present")
                 return False
         gpu_found = False
         accelerators = {
@@ -463,7 +463,7 @@ def cli_arguments():
         default="all",
         env_var="LLMD_XKS_SUITE",
         help="Test suite to execute"
-        )
+    )
 
     return parser.parse_args()
 

diff --git a/validation/pyproject.toml b/validation/pyproject.toml
@@ -26,7 +26,7 @@ dependencies = [
 "Bug Tracker" = "https://github.com/kwozyman/llmd-xks-preflight/issues"
 
 [tool.hatch.build.targets.wheel]
-packages = ["llmd_xks_checks.py"]
+packages = ["llmd_xks_preflight.py"]
 
 [project.scripts]
-llmd-xks-preflight = "llmd_xks_checks:main"
+llmd-xks-preflight = "llmd_xks_preflight:main"