fix: fix deployments comments

kfirtoledo · kfirtoledo · commit 480b2d6593fa · 2025-06-23T13:13:33.000+03:00
Signed-off-by: Kfir Toledo &lt;kfir.toledo@ibm.com&gt;
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
@@ -19,7 +19,8 @@ Documentation for developing the inference scheduler.
 
 ## Kind Development Environment
 
-> **WARNING**: This currently requires you to have manually built the vllm
+> [!Warning]
+> This currently requires you to have manually built the vllm
 > simulator separately on your local system. In a future iteration this will
 > be handled automatically and will not be required. The tag for the simulator
 > currently needs to be `v0.1.0`.
@@ -116,46 +117,50 @@ kubectl rollout restart deployment food-review-endpoint-picker
 
 ## Kubernetes Development Environment
 
-A Kubernetes (or OpenShift) cluster can be used for development and testing.
-There is a cluster-level infrastructure deployment that needs to be managed,
-and then development environments can be created on a per-namespace basis to
-enable sharing the cluster with multiple developers (or feel free to just use
-the `default` namespace if the cluster is private/personal).
+A Kubernetes cluster can be used for development and testing.
+The setup can be split in two:
+
+- cluster-level infrastructure deployment (e.g., CRDs), and
+- deployment of development environments on a per-namespace basis
+
+This enables cluster sharing by multiple developers. In case of private/personal
+clusters, the the `default` namespace can be used directly.
 
 ### Setup - Infrastructure
 
-> **WARNING**: In shared cluster situations you should probably not be
-> running this unless you're the cluster admin and you're _certain_ it's you
+> [!CAUTION]
+> In shared cluster situations you should probably not be
+> running this unless you're the cluster admin and you're _certain_ you
 > that should be running this, as this can be disruptive to other developers
 > in the cluster.
 
 The following will deploy all the infrastructure-level requirements (e.g. CRDs,
-Operators, etc) to support the namespace-level development environments:
+Operators, etc.) to support the namespace-level development environments:
 
 Install GIE CRDs:
 
 ```bash
-VERSION=v0.3.0
-kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/$VERSION/manifests.yaml
+kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/latest/download/manifests.yaml
 ```
 
-Install Kgateway:
+Install kgateway:
 ```bash
 KGTW_VERSION=v2.0.2
 helm upgrade -i --create-namespace --namespace kgateway-system --version $KGTW_VERSION kgateway-crds oci://cr.kgateway.dev/kgateway-dev/charts/kgateway-crds
 helm upgrade -i --namespace kgateway-system --version $KGTW_VERSION kgateway oci://cr.kgateway.dev/kgateway-dev/charts/kgateway --set inferenceExtension.enabled=true
 ```
 
-For more details you can find in Gateway API inference [getting started guide](https://gateway-api-inference-extension.sigs.k8s.io/guides/)
+For more details, see the Gateway API inference Extension [getting started guide](https://gateway-api-inference-extension.sigs.k8s.io/guides/)
 
 ### Setup - Developer Environment
 
-> **WARNING**: This setup is currently very manual in regards to container
+> [!NOTE]
+> This setup is currently very manual in regards to container
 > images for the VLLM simulator and the EPP. It is expected that you build and
 > push images for both to your own private registry. In future iterations, we
 > will be providing automation around this to make it simpler.
 
-To deploy a development environment to the cluster you'll need to explicitly
+To deploy a development environment to the cluster, you'll need to explicitly
 provide a namespace. This can be `default` if this is your personal cluster,
 but on a shared cluster you should pick something unique. For example:
 
@@ -175,7 +180,8 @@ Set the default namespace for kubectl commands
 kubectl config set-context --current --namespace="${NAMESPACE}"
 ```
 
-> NOTE: If you are using OpenShift (oc CLI), use the following instead: `oc project "${NAMESPACE}"`
+> [!NOTE]
+> If you are using OpenShift (oc CLI), you can use the following instead: `oc project "${NAMESPACE}"`
 
 - Set Hugging Face token variable:
 
@@ -186,26 +192,26 @@ export HF_TOKEN="<HF_TOKEN>"
 Download the `llm-d-kv-cache-manager` repository (the instllation script and Helm chart to install the vLLM environment):
 
 ```bash
-cd .. & git clone git@github.com:llm-d/llm-d-kv-cache-manager.git
+cd .. && git clone git@github.com:llm-d/llm-d-kv-cache-manager.git
 ```
+
 If you prefer to clone it into the `/tmp` directory, make sure to update the `VLLM_CHART_DIR` environment variable:
 `export VLLM_CHART_DIR=<tmp_dir>/llm-d-kv-cache-manager/vllm-setup-helm`
 
-
-
 Once all this is set up, you can deploy the environment:
 
 ```bash
 make env-dev-kubernetes
 ```
 
 This will deploy the entire stack to whatever namespace you chose.
-**Note:** The model and images of each componet can  be replaced. See [Environment Configuration](#environment-configuration) for model settings.
+> [!NOTE]
+> The model and images of each componet can  be replaced. See [Environment Configuration](#environment-configuration) for model settings.
 
-You can test by exposing the inference `Gateway` via port-forward:
+You can test by exposing the `inference gateway` via port-forward:
 
 ```bash
-kubectl port-forward service/inference-gateway 8080:80
+kubectl port-forward service/inference-gateway 8080:80 -n "${NAMESPACE}"
 ```
 
 And making requests with `curl`:
@@ -215,6 +221,9 @@ curl -s -w '\n' http://localhost:8080/v1/completions -H 'Content-Type: applicati
   -d '{"model":"meta-llama/Llama-3.1-8B-Instruct","prompt":"hi","max_tokens":10,"temperature":0}' | jq
 ```
 
+> [!NOTE]
+> If the response is empty or contains an error, jq may output a cryptic error. You can run the command without jq to debug raw responses.
+
 #### Environment Configurateion
 
 **1. Setting the EPP image and tag:**
@@ -234,7 +243,7 @@ You can optionally set the vllm replicas:
 export VLLM_REPLICA_COUNT=2
 ```
 
-**3. Setting the model name and label:**
+**3. Setting the model name:**
 
 You can replace the model name that will be used in the system.
 
@@ -244,41 +253,36 @@ export MODEL_NAME="${MODEL_NAME:-mistralai/Mistral-7B-Instruct-v0.2}"
 
 **4. Additional environment settings:**
 
-More Setting of environment variables can be found in the `scripts/kubernetes-dev-env.sh`.
-
-
+More environment variable settings can be found in the `scripts/kubernetes-dev-env.sh`.
 
 #### Development Cycle
 
-> **WARNING**: This is a very manual process at the moment. We expect to make
+> [!Warning]
+> This is a very manual process at the moment. We expect to make
 > this more automated in future iterations.
 
 Make your changes locally and commit them. Then select an image tag based on
-the `git` SHA:
+the `git` SHA and set your private registry:
 
 ```bash
 export EPP_TAG=$(git rev-parse HEAD)
+export IMAGE_REGISTRY="quay.io/my-id"
 ```
 
-Build the image:
+Build the image and tag the image for your private registry:
 
 ```bash
-DEV_VERSION=$EPP_TAG make image-build
+make image-build
 ```
 
-Tag the image for your private registry and push it:
+and push it:
 
 ```bash
-$CONTAINER_RUNTIME tag quay.io/llm-d/llm-d-gateway-api-inference-extension/epp:$TAG \
-    <MY_REGISTRY>/<MY_IMAGE>:$EPP_TAG
-$CONTAINER_RUNTIME push <MY_REGISTRY>/<MY_IMAGE>:$EPP_TAG
+ make image-push
 ```
 
-> **NOTE**: `$CONTAINER_RUNTIME` can be configured or replaced with whatever your
-> environment's standard container runtime is (e.g. `podman`, `docker`).
-
-Then you can re-deploy the environment with the new changes (don't forget all
-the required env vars):
+You can now re-deploy the environment with your changes (don't forget all
+the required environment variables):
 
 ```bash
 make env-dev-kubernetes
@@ -299,3 +303,19 @@ If you also want to remove the namespace entirely, run:
 ```sh
 kubectl delete namespace ${NAMESPACE}
 ```
+
+To uninstall the infra-stracture development:
+Uninstal GIE CRDs:
+
+```sh
+kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/latest/download/manifests.yaml --ignore-not-found
+```
+
+Uninstall kgateway:
+
+```sh
+helm uninstall kgateway -n kgateway-system
+helm uninstall kgateway-crds -n kgateway-system
+```
+
+For more details, see the Gateway API inference Extension [getting started guide](https://gateway-api-inference-extension.sigs.k8s.io/guides/)
diff --git a/Makefile b/Makefile
@@ -301,7 +301,7 @@ clean-env-dev-kind:      ## Cleanup kind setup (delete cluster $(KIND_CLUSTER_NA
 
 
 # Kubernetes Development Environment - Deploy
-# This target deploys the GIE stack in a specific namespace for development and testing.
+# This target deploys the inference scheduler stack in a specific namespace for development and testing.
 .PHONY: env-dev-kubernetes
 env-dev-kubernetes: check-kubectl check-kustomize check-envsubst
 	IMAGE_REGISTRY=$(IMAGE_REGISTRY) ./scripts/kubernetes-dev-env.sh 2>&1
diff --git a/deploy/environments/dev/kubernetes-kgateway/kustomization.yaml b/deploy/environments/dev/kubernetes-kgateway/kustomization.yaml
@@ -9,7 +9,7 @@ resources:
 - gateway-parameters.yaml
 
 images:
-- name: quay.io/llm-d/gateway-api-inference-extension
+- name: ghcr.io/llm-d/gateway-api-inference-extension
   newName: ${EPP_IMAGE}
   newTag: ${EPP_TAG}
 
diff --git a/scripts/kubernetes-dev-env.sh b/scripts/kubernetes-dev-env.sh
@@ -1,8 +1,8 @@
 #!/bin/bash
 
-# This shell script deploys a Kubernetes or OpenShift cluster with an
+# This shell script deploys a Kubernetes  cluster with an
 # KGateway-based Gateway API implementation fully configured. It deploys the
-# vllm simulator, which it exposes with a Gateway -> HTTPRoute -> InferencePool.
+# vllm, which it exposes with a Gateway -> HTTPRoute -> InferencePool.
 # The Gateway is configured with the a filter for the ext_proc endpoint picker.
 
 set -eux
@@ -73,7 +73,7 @@ export EPP_IMAGE="${EPP_IMAGE:-llm-d-inference-scheduler}"
 # EPP image tag
 export EPP_TAG="${EPP_TAG:-v0.1.0}"
 
-# Whether Prompt/Document (P/D) mode is enabled for this deployment
+# Whether P/D mode is enabled for this deployment
 export PD_ENABLED="\"${PD_ENABLED:-false}\""
 
 # Token length threshold to trigger P/D logic
@@ -123,7 +123,7 @@ export VLLM_DEPLOYMENT_NAME="${VLLM_HELM_RELEASE_NAME}-${MODEL_NAME_SAFE}"
 
 kubectl create namespace ${NAMESPACE} 2>/dev/null || true
 
-# Hack to deal with KGateways broken OpenShift support
+# Hack to better deal with kgateway on OpenShift
 export PROXY_UID=$(kubectl get namespace ${NAMESPACE} -o json | jq -e -r '.metadata.annotations["openshift.io/sa.scc.uid-range"]' | perl -F'/' -lane 'print $F[0]+1');
 
 # Detect if the cluster is OpenShift by checking for the 'route.openshift.io' API group
@@ -140,7 +140,7 @@ if [[ "$CLEAN" == "true" ]]; then
   kustomize build deploy/environments/dev/kubernetes-kgateway | envsubst > temp_delet.yaml
   kustomize build deploy/environments/dev/kubernetes-kgateway | envsubst | kubectl -n "${NAMESPACE}" delete --ignore-not-found=true -f -
   # Delete vllm resources.
-  helm uninstall vllm --namespace c3
+  helm uninstall vllm --namespace ${NAMESPACE}
   exit 0
 fi
 
@@ -156,7 +156,7 @@ fi
 # Run Helm upgrade/install vllm
 echo "INFO: Deploying vLLM Environment in namespace ${NAMESPACE}, ${POOL_NAME}"
 helm upgrade --install "$VLLM_HELM_RELEASE_NAME" "$VLLM_CHART_DIR" \
-  --namespace c3 \
+  --namespace="$NAMESPACE" \
   --set secret.create=true \
   --set secret.hfTokenValue="$HF_TOKEN2" \
   --set vllm.poolLabelValue="$POOL_NAME" \