DolevAdas
diff --git a/‎.github/ISSUE_TEMPLATE/feature-request.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/ISSUE_TEMPLATE/feature-request.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎…ithub/workflows/ci-helmfile-dry-run.yaml‎ ‎…thub/workflows/ci-kustomize-dry-run.yaml‎.github/workflows/ci-helmfile-dry-run.yaml renamed to .github/workflows/ci-kustomize-dry-run.yaml
Lines changed: 78 additions & 61 deletions b/‎…ithub/workflows/ci-helmfile-dry-run.yaml‎ ‎…thub/workflows/ci-kustomize-dry-run.yaml‎.github/workflows/ci-helmfile-dry-run.yaml renamed to .github/workflows/ci-kustomize-dry-run.yaml
Lines changed: 78 additions & 61 deletions
diff --git a/‎.github/workflows/ci-pr-test.yaml‎
Lines changed: 18 additions & 18 deletions b/‎.github/workflows/ci-pr-test.yaml‎
Lines changed: 18 additions & 18 deletions
diff --git a/‎.github/workflows/e2e-accelerator-test.yaml‎
Lines changed: 7 additions & 7 deletions b/‎.github/workflows/e2e-accelerator-test.yaml‎
Lines changed: 7 additions & 7 deletions
@@ -20,7 +20,7 @@ body:
       label: Feature Area
       description: Which area of llm-d does this feature relate to?
       options:
-        - Inference Scheduling
+        - Optimized baseline
         - Prefill/Decode Disaggregation
         - KV Cache / Prefix Caching
         - Wide Expert-Parallelism
 
@@ -1,15 +1,21 @@
-name: CI Helmfile Dry Run
+name: CI Guide Dry Run
 
 on:
   pull_request:
     branches: [main]
     paths:
-      - guides/**/helmfile.yaml.gotmpl
-      - guides/**/values*.yaml
+      - .github/workflows/ci-helmfile-dry-run.yaml
+      - .github/scripts/**
+      - guides/recipes/**
+      - guides/optimized-baseline/**
+      - guides/pd-disaggregation/**
+      - guides/precise-prefix-cache-aware/**
+      - guides/simulated-accelerators/**
+      - guides/workload-autoscaling/**
       - guides/prereq/**
-      - helpers/**
+      - helpers/client-setup/**
       - docs/monitoring/scripts/install-prometheus-grafana.sh
-      - .github/workflows/ci-helmfile-dry-run.yaml
+      - .github/workflows/ci-kustomize-dry-run.yaml
   workflow_dispatch:
 
 permissions:
@@ -19,7 +25,7 @@ jobs:
   detect-changes:
     runs-on: ubuntu-latest
     outputs:
-      inference-scheduling: ${{ steps.filter.outputs.inference-scheduling }}
+      optimized-baseline: ${{ steps.filter.outputs.optimized-baseline }}
       pd-disaggregation: ${{ steps.filter.outputs.pd-disaggregation }}
       precise-prefix-cache-aware: ${{ steps.filter.outputs.precise-prefix-cache-aware }}
       simulated-accelerators: ${{ steps.filter.outputs.simulated-accelerators }}
@@ -30,14 +36,14 @@ jobs:
         id: filter
         with:
           filters: |
-            inference-scheduling:
+            optimized-baseline:
               - .github/workflows/ci-helmfile-dry-run.yaml
-              - helpers/client-setup/install-deps.sh
-              - guides/prereq/gateway-provider/common-configurations/**
+              - .github/scripts/install-gke-crds.sh
               - guides/prereq/gateway-provider/install-gateway-provider-dependencies.sh
-              - guides/prereq/gateway-provider/*.helmfile.yaml
               - docs/monitoring/scripts/install-prometheus-grafana.sh
-              - guides/inference-scheduling/**
+              - guides/recipes/modelserver/**
+              - guides/recipes/scheduler/**
+              - guides/optimized-baseline/**
             pd-disaggregation:
               - .github/workflows/ci-helmfile-dry-run.yaml
               - helpers/client-setup/install-deps.sh
@@ -71,86 +77,97 @@ jobs:
               - docs/monitoring/scripts/install-prometheus-grafana.sh
               - guides/workload-autoscaling/**
 
-  dry-run-inference-scheduling:
+  dry-run-optimized-baseline:
     needs: detect-changes
-    if: needs.detect-changes.outputs.inference-scheduling == 'true'
+    if: needs.detect-changes.outputs.optimized-baseline == 'true'
     runs-on: ubuntu-latest
     timeout-minutes: 20
     steps:
       - uses: actions/checkout@v6
         with:
           persist-credentials: false
 
-      - name: Install client dependencies
-        run: helpers/client-setup/install-deps.sh
-
       - name: Create kind cluster
         uses: helm/kind-action@ef37e7f390d99f746eb8b610417061a60e82a6cc  # v1.14.0
         with:
-          cluster_name: helmfile-dry-run
+          cluster_name: kustomize-dry-run
           node_image: kindest/node:v1.35.0  # 1.35+ required for stable DRA (resource.k8s.io/v1)
           version: v0.31.0
           wait: 120s
 
       - name: Install Gateway API and GAIE CRDs
         run: guides/prereq/gateway-provider/install-gateway-provider-dependencies.sh > /dev/null
 
-      - name: Install gateway providers
-        run: |
-          cd guides/prereq/gateway-provider
-          for f in *.helmfile.yaml; do
-            # Skip kgateway - it is deprecated and its older CRDs would
-            # overwrite the agentgateway installation.
-            [[ "$f" == "kgateway.helmfile.yaml" ]] && continue
-            echo "=== Installing gateway provider: $f ==="
-            helmfile apply -f "$f" > /dev/null
-          done
-
-      - name: Wait for gateway control planes
-        run: |
-          for ns in istio-system agentgateway-system; do
-            if kubectl get namespace "$ns" &>/dev/null; then
-              echo "Waiting for pods in $ns..."
-              kubectl wait --for=condition=ready pod \
-                --selector=app.kubernetes.io/managed-by=Helm \
-                --namespace "$ns" \
-                --timeout=300s 2>/dev/null || true
-            fi
-          done
-
       - name: Install monitoring CRDs
         run: docs/monitoring/scripts/install-prometheus-grafana.sh --crds-only
 
+      - name: Install LWS CRDs
+        run: |
+          kubectl apply --server-side -f \
+            https://raw.githubusercontent.com/kubernetes-sigs/lws/v0.7.0/config/crd/bases/leaderworkerset.x-k8s.io_leaderworkersets.yaml
+
       - name: Install GKE CRDs
         run: .github/scripts/install-gke-crds.sh
 
-      - name: Dry-run all environments
+      - name: Dry-run scheduler charts
         run: |
-          cd guides/inference-scheduling
-          # Extract only the YAML before the --- separator; the rest contains
-          # Go template syntax that yq cannot parse.
-          envs=$(awk '/^---/{exit} {print}' helmfile.yaml.gotmpl | yq -r '.environments | keys | .[]')
-          for env_name in $envs; do
+          CHART_VERSION=v1.4.0
+          failed=0
+          for chart in standalone inferencepool; do
             echo ""
             echo "========================================"
-            echo "inference-scheduling: environment=$env_name"
+            echo "optimized-baseline: scheduler (${chart})"
             echo "========================================"
-            echo "--- helmfile template ---"
-            helmfile template -e "$env_name" > /tmp/rendered.yaml
-            echo "--- creating namespaces ---"
-            # Collect namespaces from helmfile release definitions and from the
-            # rendered manifests (some resources reference namespaces outside the
-            # release namespace, e.g. monitoring).
-            {
-              helmfile list -e "$env_name" 2>/dev/null | awk 'NR>1 {print $2}'
-              grep -E '^\s+namespace:\s' /tmp/rendered.yaml | awk '{print $2}' | tr -d '"'"'"
-            } | sort -u | while read -r ns; do
-                kubectl create namespace "$ns" --dry-run=client -o yaml | kubectl apply -f - 2>/dev/null || true
-              done
+            echo "--- helm template ---"
+            if ! helm template optimized-baseline-scheduler \
+              "oci://registry.k8s.io/gateway-api-inference-extension/charts/${chart}" \
+              -f guides/recipes/scheduler/base.values.yaml \
+              -f guides/recipes/scheduler/features/monitoring.values.yaml \
+              -f guides/optimized-baseline/scheduler/optimized-baseline.values.yaml \
+              --version "${CHART_VERSION}" > /tmp/rendered.yaml; then
+              echo "FAIL: helm template failed for ${chart}"
+              failed=1
+              continue
+            fi
             echo "--- kubectl apply --dry-run=server ---"
-            kubectl apply --dry-run=server -f /tmp/rendered.yaml
-            echo "PASS: inference-scheduling / $env_name"
+            if kubectl apply --dry-run=server -f /tmp/rendered.yaml; then
+              echo "PASS: optimized-baseline / scheduler (${chart})"
+            else
+              echo "FAIL: optimized-baseline / scheduler (${chart})"
+              failed=1
+            fi
+          done
+          exit $failed
+
+      - name: Kustomize build and dry-run all model server overlays
+        run: |
+          failed=0
+          for overlay_dir in guides/optimized-baseline/modelserver/*/; do
+            # Each accelerator dir may have one or more server subdirs (vllm, sglang)
+            for kust_dir in "${overlay_dir}"*/; do
+              [ -f "${kust_dir}kustomization.yaml" ] || continue
+              name="${kust_dir#guides/optimized-baseline/modelserver/}"
+              name="${name%/}"
+              echo ""
+              echo "========================================"
+              echo "optimized-baseline: ${name}"
+              echo "========================================"
+              echo "--- kustomize build ---"
+              if ! kustomize build "${kust_dir}" > /tmp/rendered.yaml; then
+                echo "FAIL: kustomize build failed for ${name}"
+                failed=1
+                continue
+              fi
+              echo "--- kubectl apply --dry-run=server ---"
+              if kubectl apply --dry-run=server -f /tmp/rendered.yaml; then
+                echo "PASS: optimized-baseline / ${name}"
+              else
+                echo "FAIL: optimized-baseline / ${name}"
+                failed=1
+              fi
+            done
           done
+          exit $failed
 
   dry-run-pd-disaggregation:
     needs: detect-changes
 
@@ -69,10 +69,10 @@ jobs:
     needs: [fork-gate]
     runs-on: ubuntu-latest
     outputs:
-      xpu-inference-scheduling: ${{ steps.filter.outputs.xpu-inference-scheduling }}
+      xpu-optimized-baseline: ${{ steps.filter.outputs.xpu-optimized-baseline }}
       xpu-pd-disaggregation: ${{ steps.filter.outputs.xpu-pd-disaggregation }}
       xpu-prefix-cache: ${{ steps.filter.outputs.xpu-prefix-cache }}
-      hpu-inference-scheduling: ${{ steps.filter.outputs.hpu-inference-scheduling }}
+      hpu-optimized-baseline: ${{ steps.filter.outputs.hpu-optimized-baseline }}
       hpu-pd-disaggregation: ${{ steps.filter.outputs.hpu-pd-disaggregation }}
     steps:
       - uses: actions/checkout@v6
@@ -82,14 +82,14 @@ jobs:
         id: filter
         with:
           filters: |
-            xpu-inference-scheduling:
+            xpu-optimized-baseline:
               - .github/workflows/ci-pr-test.yaml
-              - .github/workflows/e2e-inference-scheduling-xpu.yaml
+              - .github/workflows/e2e-optimized-baseline-xpu.yaml
               - .github/scripts/e2e/**
               - docker/common-versions
               - guides/**/values_xpu.yaml
-              - guides/inference-scheduling/ms-inference-scheduling/values_xpu.yaml
-              - guides/inference-scheduling/helmfile.yaml.gotmpl
+              - guides/optimized-baseline/ms-optimized-baseline/values_xpu.yaml
+              - guides/optimized-baseline/helmfile.yaml.gotmpl
             xpu-pd-disaggregation:
               - .github/workflows/ci-pr-test.yaml
               - .github/workflows/e2e-pd-xpu.yaml
@@ -107,14 +107,14 @@ jobs:
               - guides/**/values_xpu.yaml
               - guides/precise-prefix-cache-aware/**/values_xpu.yaml
               - guides/precise-prefix-cache-aware/helmfile.yaml.gotmpl
-            hpu-inference-scheduling:
+            hpu-optimized-baseline:
               - .github/workflows/ci-pr-test.yaml
-              - .github/workflows/e2e-inference-scheduling-hpu.yaml
+              - .github/workflows/e2e-optimized-baseline-hpu.yaml
               - .github/scripts/e2e/**
               - docker/Dockerfile.hpu
               - guides/**/values-hpu.yaml
-              - guides/inference-scheduling/ms-inference-scheduling/values-hpu.yaml
-              - guides/inference-scheduling/helmfile.yaml.gotmpl
+              - guides/optimized-baseline/ms-optimized-baseline/values-hpu.yaml
+              - guides/optimized-baseline/helmfile.yaml.gotmpl
             hpu-pd-disaggregation:
               - .github/workflows/ci-pr-test.yaml
               - .github/workflows/e2e-pd-hpu.yaml
@@ -135,17 +135,17 @@ jobs:
       ref: ${{ github.event.pull_request.head.sha || '' }}
     secrets: inherit
 
-  # Call the XPU Inference Scheduling test workflow
-  test-xpu-inference-scheduling:
+  # Call the XPU optimized baseline test workflow
+  test-xpu-optimized-baseline:
     needs: [detect-changes, build-images]
     # Run tests if:
     # 1. It's a manual workflow_dispatch, OR
     # 2. Related files changed AND (build succeeded OR build was skipped)
     if: >-
       ${{ (github.event_name == 'workflow_dispatch' ||
-      needs.detect-changes.outputs.xpu-inference-scheduling == 'true') &&
+      needs.detect-changes.outputs.xpu-optimized-baseline == 'true') &&
       (needs.build-images.result == 'success' || needs.build-images.result == 'skipped') }}
-    uses: ./.github/workflows/e2e-inference-scheduling-xpu.yaml
+    uses: ./.github/workflows/e2e-optimized-baseline-xpu.yaml
     with:
       pr_or_branch: ${{ github.event.inputs.pr_or_branch || github.event.pull_request.number }}
       # Only use custom image tag if user provides it, or if XPU image was actually built in this PR
@@ -204,17 +204,17 @@ jobs:
         format('pr-{0}', github.event.pull_request.number) || '') }}
     secrets: inherit
 
-  # Call the HPU Inference Scheduling test workflow
-  test-hpu-inference-scheduling:
+  # Call the HPU optimized baseline test workflow
+  test-hpu-optimized-baseline:
     needs: [detect-changes, build-images]
     # Run tests if:
     # 1. It's a manual workflow_dispatch, OR
     # 2. Related files changed AND (build succeeded OR build was skipped)
     if: >-
       ${{ (github.event_name == 'workflow_dispatch' ||
-      needs.detect-changes.outputs.hpu-inference-scheduling == 'true') &&
+      needs.detect-changes.outputs.hpu-optimized-baseline == 'true') &&
       (needs.build-images.result == 'success' || needs.build-images.result == 'skipped') }}
-    uses: ./.github/workflows/e2e-inference-scheduling-hpu.yaml
+    uses: ./.github/workflows/e2e-optimized-baseline-hpu.yaml
     with:
       pr_or_branch: ${{ github.event.inputs.pr_or_branch || github.event.pull_request.number }}
       # Only use custom image tag if user provides it, or if HPU image was actually built in this PR
 
@@ -340,15 +340,15 @@ jobs:
               - gaie-kv-events
               - ms-kv-events
 
-          - name: inference-scheduling
-            namespace: llm-d-inference-scheduling
-            guide_dir: inference-scheduling
+          - name: optimized-baseline
+            namespace: llm-d-optimized-baseline
+            guide_dir: optimized-baseline
             log_file: llmd-inference-installer.log
-            helm_log_file: inference-scheduling-deployment.log
+            helm_log_file: optimized-baseline-deployment.log
             release_names:
-              - infra-inference-scheduling
-              - gaie-inference-scheduling
-              - ms-inference-scheduling
+              - infra-optimized-baseline
+              - gaie-optimized-baseline
+              - ms-optimized-baseline
 
     env:
       INSTANCE_IP: ${{ needs.setup.outputs.instance_ip }}