diff --git a/.github/hack/install-odh.sh b/.github/hack/install-odh.sh
index 524cce30e..20d9aa2dd 100755
--- a/.github/hack/install-odh.sh
+++ b/.github/hack/install-odh.sh
@@ -208,7 +208,9 @@ EOF
fi
fi
-# 7. Apply DataScienceCluster (modelsAsService Unmanaged - MaaS deployed separately)
+# 7. Apply DataScienceCluster (KServe + ModelsAsService Managed)
+# The manifest filename retains "unmanaged" for backward compat; contents include
+# modelsAsService.managementState: Managed so the operator deploys maas-controller.
echo "7. Applying DataScienceCluster..."
if kubectl get datasciencecluster -A --no-headers 2>/dev/null | grep -q .; then
echo " DataScienceCluster already exists, skipping"
diff --git a/.gitignore b/.gitignore
index 0265312dd..dca324b3f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -56,6 +56,9 @@ apps/backend/.env
CLAUDE.md
.cursor/
+# Git worktrees
+.worktrees/
+
# Docs build and site directories
docs/build/
docs/site/
diff --git a/deployment/base/maas-controller/crd/bases/maas.opendatahub.io_externalmodels.yaml b/deployment/base/maas-controller/crd/bases/maas.opendatahub.io_externalmodels.yaml
index 5f8d0e305..5d0f36605 100644
--- a/deployment/base/maas-controller/crd/bases/maas.opendatahub.io_externalmodels.yaml
+++ b/deployment/base/maas-controller/crd/bases/maas.opendatahub.io_externalmodels.yaml
@@ -82,15 +82,8 @@ spec:
pattern: ^[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?)*$
type: string
provider:
- description: |-
- Provider identifies the API format and auth type for the external model.
- The allowed values are: "openai", "anthropic", "azure-openai", "vertex" and "bedrock-openai".
- enum:
- - openai
- - anthropic
- - azure-openai
- - vertex
- - bedrock-openai
+ description: Provider identifies the API format and auth type for
+ the external model.
maxLength: 63
minLength: 1
type: string
diff --git a/deployment/base/maas-controller/crd/bases/maas.opendatahub.io_maassubscriptions.yaml b/deployment/base/maas-controller/crd/bases/maas.opendatahub.io_maassubscriptions.yaml
index df4cc0d40..1eb7f89b4 100644
--- a/deployment/base/maas-controller/crd/bases/maas.opendatahub.io_maassubscriptions.yaml
+++ b/deployment/base/maas-controller/crd/bases/maas.opendatahub.io_maassubscriptions.yaml
@@ -83,8 +83,11 @@ spec:
description: TokenRateLimit defines a token rate limit
properties:
limit:
- description: Limit is the maximum number of tokens allowed
+ description: |-
+ Limit is the maximum number of tokens allowed within the window.
+ Must be between 1 and 1,000,000,000 (1 billion).
format: int64
+ maximum: 1000000000
minimum: 1
type: integer
window:
diff --git a/deployment/base/maas-controller/rbac/clusterrole.yaml b/deployment/base/maas-controller/rbac/clusterrole.yaml
index c771d3497..89c4660ad 100644
--- a/deployment/base/maas-controller/rbac/clusterrole.yaml
+++ b/deployment/base/maas-controller/rbac/clusterrole.yaml
@@ -21,7 +21,6 @@ rules:
resources:
- endpoints
- pods
- - secrets
verbs:
- get
- list
@@ -35,6 +34,21 @@ rules:
- get
- list
- watch
+- apiGroups:
+ - ""
+ resources:
+ - secrets
+ verbs:
+ - list
+ - watch
+- apiGroups:
+ - ""
+ resourceNames:
+ - maas-db-config
+ resources:
+ - secrets
+ verbs:
+ - get
- apiGroups:
- ""
resources:
diff --git a/deployment/base/payload-processing/rbac/clusterrole.yaml b/deployment/base/payload-processing/rbac/clusterrole.yaml
index 3f71577af..dc15a0ab2 100644
--- a/deployment/base/payload-processing/rbac/clusterrole.yaml
+++ b/deployment/base/payload-processing/rbac/clusterrole.yaml
@@ -13,5 +13,5 @@ rules:
verbs: ["get", "list", "watch"]
# model-provider-resolver plugin: watches ExternalModel CRDs across namespaces
- apiGroups: ["maas.opendatahub.io"]
- resources: ["maasmodelrefs", "externalmodels"]
+ resources: ["externalmodels"]
verbs: ["get", "list", "watch"]
diff --git a/deployment/components/observability/observability/dashboards/usage-dashboard.yaml b/deployment/components/observability/observability/dashboards/usage-dashboard.yaml
index 94ecc68a0..02fe707bf 100644
--- a/deployment/components/observability/observability/dashboards/usage-dashboard.yaml
+++ b/deployment/components/observability/observability/dashboards/usage-dashboard.yaml
@@ -81,7 +81,29 @@ spec:
datasource:
kind: PrometheusDatasource
name: kuadrant-prometheus-datasource
- query: 'count(count by (user) (increase(authorized_calls{user!="", user=~"$user", subscription=~"$subscription"}[$__range]) > 0)) or vector(0)'
+ query: |-
+ count(
+ count by (user) (
+ (
+ (
+ sum by (user, subscription, limitador_namespace) (
+ increase(authorized_calls{user!="", user=~"$user", subscription=~"$subscription"}[$__range])
+ )
+ +
+ sum by (user, subscription, limitador_namespace) (
+ increase(limited_calls{user!="", user=~"$user", subscription=~"$subscription"}[$__range])
+ )
+ )
+ or
+ sum by (user, subscription, limitador_namespace) (
+ increase(authorized_calls{user!="", user=~"$user", subscription=~"$subscription"}[$__range])
+ )
+ )
+ * on(user, subscription, limitador_namespace)
+ (0 * max by (user, subscription, limitador_namespace) (max_over_time(authorized_hits{model=~"$model"}[$__range])) + 1)
+ > 0
+ )
+ ) or vector(0)
seriesNameFormat: Users
successRate:
kind: Panel
@@ -105,7 +127,41 @@ spec:
datasource:
kind: PrometheusDatasource
name: kuadrant-prometheus-datasource
- query: '((sum(increase(authorized_calls{user!="", user=~"$user", subscription=~"$subscription"}[$__range]))) / ((sum(increase(authorized_calls{user!="", user=~"$user", subscription=~"$subscription"}[$__range])) + (sum(increase(limited_calls{user!="", user=~"$user", subscription=~"$subscription"}[$__range])) or vector(0))) > 0)) or vector(1)'
+ query: |-
+ (
+ (
+ sum(
+ sum by (user, subscription, limitador_namespace) (
+ increase(authorized_calls{user!="", user=~"$user", subscription=~"$subscription"}[$__range])
+ )
+ * on(user, subscription, limitador_namespace)
+ (0 * max by (user, subscription, limitador_namespace) (max_over_time(authorized_hits{model=~"$model"}[$__range])) + 1)
+ )
+ )
+ /
+ (
+ (
+ sum(
+ sum by (user, subscription, limitador_namespace) (
+ increase(authorized_calls{user!="", user=~"$user", subscription=~"$subscription"}[$__range])
+ )
+ * on(user, subscription, limitador_namespace)
+ (0 * max by (user, subscription, limitador_namespace) (max_over_time(authorized_hits{model=~"$model"}[$__range])) + 1)
+ )
+ +
+ (
+ sum(
+ sum by (user, subscription, limitador_namespace) (
+ increase(limited_calls{user!="", user=~"$user", subscription=~"$subscription"}[$__range])
+ )
+ * on(user, subscription, limitador_namespace)
+ (0 * max by (user, subscription, limitador_namespace) (max_over_time(authorized_hits{model=~"$model"}[$__range])) + 1)
+ )
+ or vector(0)
+ )
+ ) > 0
+ )
+ ) or vector(1)
seriesNameFormat: Success Rate
tokenConsumptionByUser:
kind: Panel
@@ -180,7 +236,15 @@ spec:
query: |-
round(
sum by (user, subscription, model) (
- sum by (user, subscription, limitador_namespace) (increase(authorized_calls{user!="", user=~"$user", subscription=~"$subscription"}[$__range]))
+ (
+ (
+ sum by (user, subscription, limitador_namespace) (increase(authorized_calls{user!="", user=~"$user", subscription=~"$subscription"}[$__range]))
+ +
+ sum by (user, subscription, limitador_namespace) (increase(limited_calls{user!="", user=~"$user", subscription=~"$subscription"}[$__range]))
+ )
+ or
+ sum by (user, subscription, limitador_namespace) (increase(authorized_calls{user!="", user=~"$user", subscription=~"$subscription"}[$__range]))
+ )
* on(user, subscription, limitador_namespace) group_left(model)
(0 * max by (user, subscription, limitador_namespace, model) (max_over_time(authorized_hits{model=~"$model"}[$__range])) + 1)
)
@@ -228,7 +292,15 @@ spec:
datasource:
kind: PrometheusDatasource
name: kuadrant-prometheus-datasource
- query: 'sum(increase(limited_calls{user!="", user=~"$user", subscription=~"$subscription"}[$__range])) or vector(0)'
+ query: |-
+ sum(
+ sum by (user, subscription, limitador_namespace) (
+ increase(limited_calls{user!="", user=~"$user", subscription=~"$subscription"}[$__range])
+ )
+ * on(user, subscription, limitador_namespace)
+ (0 * max by (user, subscription, limitador_namespace) (max_over_time(authorized_hits{model=~"$model"}[$__range])) + 1)
+ )
+ or vector(0)
seriesNameFormat: Errors
totalRequests:
kind: Panel
@@ -253,7 +325,28 @@ spec:
datasource:
kind: PrometheusDatasource
name: kuadrant-prometheus-datasource
- query: '(sum(increase(authorized_calls{user!="", user=~"$user", subscription=~"$subscription"}[$__range])) or vector(0)) + (sum(increase(limited_calls{user!="", user=~"$user", subscription=~"$subscription"}[$__range])) or vector(0))'
+ query: |-
+ (
+ sum(
+ sum by (user, subscription, limitador_namespace) (
+ increase(authorized_calls{user!="", user=~"$user", subscription=~"$subscription"}[$__range])
+ )
+ * on(user, subscription, limitador_namespace)
+ (0 * max by (user, subscription, limitador_namespace) (max_over_time(authorized_hits{model=~"$model"}[$__range])) + 1)
+ )
+ or vector(0)
+ )
+ +
+ (
+ sum(
+ sum by (user, subscription, limitador_namespace) (
+ increase(limited_calls{user!="", user=~"$user", subscription=~"$subscription"}[$__range])
+ )
+ * on(user, subscription, limitador_namespace)
+ (0 * max by (user, subscription, limitador_namespace) (max_over_time(authorized_hits{model=~"$model"}[$__range])) + 1)
+ )
+ or vector(0)
+ )
seriesNameFormat: Requests
totalTokens:
kind: Panel
diff --git a/docs/content/advanced-administration/observability.md b/docs/content/advanced-administration/observability.md
index 37cda60ad..f93c8ddc7 100644
--- a/docs/content/advanced-administration/observability.md
+++ b/docs/content/advanced-administration/observability.md
@@ -144,18 +144,19 @@ The observability stack consists of:
There are two ways to enable deployment-based observability:
-1. **Operator-managed** (recommended): Enable via ModelsAsService CR
+1. **Operator-managed** (recommended): Enable via Tenant CR
2. **Kustomize-based**: Deploy manifests directly
### Option 1: Operator-Managed Telemetry
-When using the ODH/RHOAI operator, telemetry can be enabled via the ModelsAsService CR:
+When using the ODH/RHOAI operator, telemetry can be enabled via the Tenant CR (self-bootstrapped by `maas-controller` in the `models-as-a-service` namespace):
```yaml
-apiVersion: components.platform.opendatahub.io/v1alpha1
-kind: ModelsAsService
+apiVersion: maas.opendatahub.io/v1alpha1
+kind: Tenant
metadata:
- name: default-modelsasservice
+ name: default-tenant
+ namespace: models-as-a-service
spec:
telemetry:
enabled: true # Enable TelemetryPolicy and Istio Telemetry
@@ -169,11 +170,11 @@ spec:
Or patch an existing CR:
```bash
-kubectl patch modelsasservice default-modelsasservice --type=merge \
+kubectl patch tenant default-tenant -n models-as-a-service --type=merge \
-p '{"spec":{"telemetry":{"enabled":true}}}'
```
-**What the operator creates when `telemetry.enabled: true`:**
+**What the Tenant reconciler creates when `telemetry.enabled: true`:**
| Resource | Namespace | Purpose |
|----------|-----------|---------|
@@ -181,13 +182,13 @@ kubectl patch modelsasservice default-modelsasservice --type=merge \
| Istio Telemetry (`latency-per-subscription`) | Gateway namespace | Adds `subscription` label to gateway latency metrics |
!!! note "Prerequisites for Operator-Managed Telemetry"
- The operator-managed telemetry feature requires:
+ The Tenant reconciler telemetry feature requires:
- **OpenShift Service Mesh (Istio)** 2.4+ — for Istio Telemetry CRD
- **Kuadrant/RHCL** — for TelemetryPolicy CRD and AuthPolicy header injection
- **Gateway deployed** — Telemetry targets the gateway via selector
- The operator checks for CRD availability before creating resources. If a CRD is not present, that resource is silently skipped.
+ The Tenant reconciler checks for CRD availability before creating resources. If a CRD is not present, that resource is silently skipped.
!!! warning "AuthPolicy Header Dependency"
The Istio Telemetry reads the `subscription` value from the `X-MaaS-Subscription` header, which must be injected by AuthPolicy:
diff --git a/docs/content/concepts/architecture.md b/docs/content/concepts/architecture.md
index 220d01f5d..ca89699c3 100644
--- a/docs/content/concepts/architecture.md
+++ b/docs/content/concepts/architecture.md
@@ -97,7 +97,7 @@ graph TB
6. Only the hash and metadata (username, groups, name, `subscription` — the MaaSSubscription name bound at mint, `expiresAt`) are stored in PostgreSQL.
7. The plaintext key is returned to the user **only in this minting response** (show-once), along with `expiresAt`; it is **not** exposed again on later reads. The diagram below stops at storage and does not show the HTTP response back to the user.
-Every key expires. With **operator-managed** MaaS, the cluster operator sets the maximum lifetime on the **`ModelsAsService`** CR: **`spec.apiKeys.maxExpirationDays`** (see [ModelsAsService CR](../install/maas-setup.md#modelsasservice-cr)). **`maas-api`** applies that cap as **`API_KEY_MAX_EXPIRATION_DAYS`** (for example 90 days by default when defaults apply). Omit **`expiresIn`** on create to use that maximum, or set a shorter **`expiresIn`** (e.g., `30d`, `90d`, `1h`) within the configured cap. The response always includes **`expiresAt`** (RFC3339).
+Every key expires. With **operator-managed** MaaS, the cluster operator sets the maximum lifetime on the **`Tenant`** CR: **`spec.apiKeys.maxExpirationDays`** (see [Tenant CR](../install/maas-setup.md#tenant-cr)). **`maas-api`** applies that cap as **`API_KEY_MAX_EXPIRATION_DAYS`** (for example 90 days by default when defaults apply). Omit **`expiresIn`** on create to use that maximum, or set a shorter **`expiresIn`** (e.g., `30d`, `90d`, `1h`) within the configured cap. The response always includes **`expiresAt`** (RFC3339).
```mermaid
graph TB
diff --git a/docs/content/configuration-and-management/maas-controller-overview.md b/docs/content/configuration-and-management/maas-controller-overview.md
index 0d1d6e343..fbf91341d 100644
--- a/docs/content/configuration-and-management/maas-controller-overview.md
+++ b/docs/content/configuration-and-management/maas-controller-overview.md
@@ -2,18 +2,18 @@
This document describes the **MaaS Controller**: what was built, how it fits into the Models-as-a-Service (MaaS) stack, and how the pieces work together. It is intended for presentations, onboarding, and technical deep-dives.
-!!! todo "Documentation cleanup"
- TODO: Clean up this documentation.
-
---
## 1. What Is the MaaS Controller?
-The **MaaS Controller** is a Kubernetes controller that provides a **subscription-style control plane** for Models-as-a-Service. It lets platform operators define:
+The **MaaS Controller** is a Kubernetes controller with two main responsibilities:
+
+1. **Tenant reconciler** — deploys and manages the MaaS platform workloads (`maas-api`, gateway policies, telemetry, DestinationRule) via the **`Tenant`** CR (`maas.opendatahub.io/v1alpha1`). On startup the controller self-bootstraps a `default-tenant` CR in the `models-as-a-service` namespace if one does not exist. The Tenant reconciler renders embedded kustomize manifests at runtime and applies them via Server-Side Apply (SSA).
-- **Which models** are exposed through MaaS (via **MaaSModelRef**).
-- **Who can access** those models (via **MaaSAuthPolicy**).
-- **Per-user/per-group token rate limits** for those models (via **MaaSSubscription**).
+2. **Subscription reconcilers** — let platform operators define:
+ - **Which models** are exposed through MaaS (via **MaaSModelRef**).
+ - **Who can access** those models (via **MaaSAuthPolicy**).
+ - **Per-user/per-group token rate limits** for those models (via **MaaSSubscription**).
The controller does not run inference. It **reconciles** your high-level MaaS CRs into the underlying Gateway API and Kuadrant resources (HTTPRoutes, AuthPolicies, TokenRateLimitPolicies) that enforce routing, authentication, and rate limiting at the gateway.
@@ -23,6 +23,10 @@ The controller does not run inference. It **reconciles** your high-level MaaS CR
```mermaid
flowchart TB
+ subgraph Platform["Platform lifecycle"]
+ Tenant["Tenant CR\n(default-tenant)"]
+ end
+
subgraph Operator["Platform operator"]
MaaSModelRef["MaaSModelRef"]
MaaSAuthPolicy["MaaSAuthPolicy"]
@@ -30,11 +34,18 @@ flowchart TB
end
subgraph Controller["maas-controller"]
+ TenantReconciler["Tenant\nReconciler"]
ModelReconciler["MaaSModelRef\nReconciler"]
AuthReconciler["MaaSAuthPolicy\nReconciler"]
SubReconciler["MaaSSubscription\nReconciler"]
end
+ subgraph PlatformWorkloads["Platform Workloads"]
+ MaaSAPI["maas-api\n(Deployment, Service, HTTPRoute)"]
+ GatewayPolicies["Gateway default policies\n(AuthPolicy, TokenRateLimitPolicy)"]
+ Telemetry["TelemetryPolicy\nIstio Telemetry"]
+ end
+
subgraph GatewayStack["Gateway API + Kuadrant"]
HTTPRoute["HTTPRoute"]
AuthPolicy["AuthPolicy\n(Kuadrant)"]
@@ -45,6 +56,11 @@ flowchart TB
LLMIS["LLMInferenceService\n(KServe)"]
end
+ Tenant --> TenantReconciler
+ TenantReconciler --> MaaSAPI
+ TenantReconciler --> GatewayPolicies
+ TenantReconciler --> Telemetry
+
MaaSModelRef --> ModelReconciler
MaaSAuthPolicy --> AuthReconciler
MaaSSubscription --> SubReconciler
@@ -58,12 +74,66 @@ flowchart TB
HTTPRoute --> LLMIS
```
-**Summary:** You declare intent with MaaS CRs; the controller turns that into Gateway/Kuadrant resources that attach to the same HTTPRoute and backend (e.g. KServe LLMInferenceService).
+**Summary:** The controller has two sides: the **Tenant reconciler** deploys and manages the MaaS platform workloads (maas-api, gateway policies, telemetry) from the `Tenant` CR; the **subscription reconcilers** turn MaaS CRs into Gateway/Kuadrant resources that attach to per-model HTTPRoutes and backends (e.g. KServe LLMInferenceService).
The **MaaS API** GET /v1/models endpoint uses MaaSModelRef CRs as its primary source: it reads them cluster-wide (all namespaces), then **validates access** by probing each model’s `/v1/models` endpoint with the client’s **Authorization header** (passed through as-is). Only models that return 2xx or 405 are included. So the catalogue returned to the client is the set of MaaSModelRef objects the controller reconciles, filtered to those the client can actually access. No token exchange is performed; the header is forwarded as-is.
---
+## 2.1. Tenant Resource Layout
+
+The `Tenant` CR is namespace-scoped (lives in `models-as-a-service`). It owns resources across three scopes — same-namespace children use standard `ownerReference`, while cluster-scoped and cross-namespace children use **tracking labels** (Kubernetes rejects cross-namespace and namespaced-to-cluster ownerRefs).
+
+```mermaid
+graph TB
+ subgraph "models-as-a-service namespace"
+ Tenant["Tenant CR
default-tenant"]
+ API["maas-api Deployment"]
+ CM["ConfigMaps"]
+ SVC["Services"]
+ SA["ServiceAccounts"]
+ NP["NetworkPolicies"]
+ HR["HTTPRoutes"]
+ AP2["maas-api AuthPolicy"]
+ end
+
+ subgraph "openshift-ingress namespace"
+ AP["gateway AuthPolicy"]
+ DR["DestinationRule"]
+ TP["TelemetryPolicy"]
+ IT["Istio Telemetry"]
+ end
+
+ subgraph "Cluster-scoped"
+ CR["ClusterRoles"]
+ CRB["ClusterRoleBindings"]
+ end
+
+ Tenant -->|ownerRef| API
+ Tenant -->|ownerRef| CM
+ Tenant -->|ownerRef| SVC
+ Tenant -->|ownerRef| SA
+ Tenant -->|ownerRef| NP
+ Tenant -->|ownerRef| HR
+ Tenant -->|ownerRef| AP2
+ Tenant -.->|tracking labels| CR
+ Tenant -.->|tracking labels| CRB
+ Tenant -.->|tracking labels| AP
+ Tenant -.->|tracking labels| DR
+ Tenant -.->|tracking labels| TP
+ Tenant -.->|tracking labels| IT
+
+ style Tenant fill:#4a90d9,color:#fff
+ style AP fill:#f5a623,color:#fff
+ style DR fill:#f5a623,color:#fff
+ style TP fill:#f5a623,color:#fff
+ style IT fill:#f5a623,color:#fff
+```
+
+**Solid arrows** = standard ownerReference (automatic GC). **Dashed arrows** = tracking labels (finalizer-based cleanup). **Orange resources** = cross-namespace children that require tracking labels.
+
+---
+
## 3. Request Flow (End-to-End)
```mermaid
@@ -149,19 +219,22 @@ flowchart TB
subgraph Cluster["Kubernetes cluster"]
subgraph maas_controller["maas-controller (Deployment)"]
Manager["Controller Manager"]
+ TenantReconciler["Tenant\nReconciler"]
ModelReconciler["MaaSModelRef\nReconciler"]
AuthReconciler["MaaSAuthPolicy\nReconciler"]
SubReconciler["MaaSSubscription\nReconciler"]
end
- CRDs["CRDs: MaaSModelRef,\nMaaSAuthPolicy,\nMaaSSubscription"]
+ CRDs["CRDs: Tenant,\nMaaSModelRef,\nMaaSAuthPolicy,\nMaaSSubscription"]
RBAC["RBAC: ClusterRole,\nServiceAccount, etc."]
end
Watch["Watch MaaS CRs,\nGateway API, Kuadrant,\nLLMInferenceService"]
+ Manager --> TenantReconciler
Manager --> ModelReconciler
Manager --> AuthReconciler
Manager --> SubReconciler
+ TenantReconciler --> Watch
ModelReconciler --> Watch
AuthReconciler --> Watch
SubReconciler --> Watch
@@ -169,7 +242,7 @@ flowchart TB
RBAC --> maas_controller
```
-- Single binary: **manager** runs three reconcilers.
+- Single binary: **manager** runs four reconcilers (Tenant + three subscription reconcilers).
- Registers **Kubernetes core**, **Gateway API**, **KServe (v1alpha1)**, and **MaaS (v1alpha1)** schemes; uses **unstructured** for Kuadrant resources.
- Reads/writes MaaS CRs, HTTPRoutes, Gateways, AuthPolicies, TokenRateLimitPolicies, and LLMInferenceServices (read-only for model metadata/routes).
@@ -216,7 +289,8 @@ flowchart LR
Deploy --> Examples
```
-- **Namespaces**: MaaS API and controller default to **opendatahub** (configurable). MaaSAuthPolicy and MaaSSubscription default to **models-as-a-service** (configurable). MaaSModelRef must live in the **same namespace** as the model it references (e.g. **llm**).
+- **Namespaces**: MaaS API and controller default to **opendatahub** (configurable). The **Tenant** CR, MaaSAuthPolicy and MaaSSubscription default to **models-as-a-service** (configurable). MaaSModelRef must live in the **same namespace** as the model it references (e.g. **llm**).
+- **Self-bootstrap**: On startup, `maas-controller` creates a `default-tenant` CR in the `models-as-a-service` namespace if one does not exist. The Tenant reconciler then deploys `maas-api` and gateway policies via SSA.
- **Install**: `./scripts/deploy.sh` installs the full stack including the controller. Optionally run `./scripts/install-examples.sh` for sample MaaSModelRef, MaaSAuthPolicy, and MaaSSubscription.
---
@@ -263,10 +337,10 @@ Model workloads (vLLM, Llama.cpp, etc.) do not require strong identity claims in
| Topic | Summary |
|-------|---------|
-| **What** | MaaS Controller = control plane that reconciles MaaSModelRef, MaaSAuthPolicy, and MaaSSubscription into Gateway API and Kuadrant resources. |
-| **Where** | Single controller in `opendatahub`; MaaSAuthPolicy / MaaSSubscription default to `models-as-a-service`; MaaSModelRef and generated Kuadrant policies target their model’s namespace. |
-| **How** | Three reconcilers watch MaaS CRs (and related resources); each creates/updates HTTPRoutes, AuthPolicies, or TokenRateLimitPolicies. |
+| **What** | MaaS Controller = control plane with a **Tenant reconciler** (deploys maas-api and gateway policies from a `Tenant` CR) and **subscription reconcilers** (reconcile MaaSModelRef, MaaSAuthPolicy, MaaSSubscription into Gateway API and Kuadrant resources). |
+| **Where** | Single controller in `opendatahub`; `Tenant` CR / MaaSAuthPolicy / MaaSSubscription default to `models-as-a-service`; MaaSModelRef and generated Kuadrant policies target their model’s namespace. |
+| **How** | Four reconcilers: Tenant reconciler deploys platform workloads via SSA; three subscription reconcilers watch MaaS CRs (and related resources) and create/update HTTPRoutes, AuthPolicies, or TokenRateLimitPolicies. |
| **Identity bridge** | AuthPolicy exposes all user groups as a comma-separated `groups_str`; TokenRateLimitPolicy uses `groups_str.split(",").exists(...)` for subscription matching (the “string trick”). |
-| **Deploy** | Run `./scripts/deploy.sh`; optionally install examples. |
+| **Deploy** | Run `./scripts/deploy.sh`; controller self-bootstraps `default-tenant`; optionally install examples. |
This overview should be enough to explain what was created and how it works in talks or written docs.
diff --git a/docs/content/install/maas-setup.md b/docs/content/install/maas-setup.md
index 3ddb7276b..355f5f9fe 100644
--- a/docs/content/install/maas-setup.md
+++ b/docs/content/install/maas-setup.md
@@ -139,7 +139,7 @@ After creating the database Secret and Gateways, create or update your DataScien
=== "Managed (Recommended)"
- The operator deploys and manages the MaaS API. Create or update your DataScienceCluster with `modelsAsService` in Managed state:
+ The operator deploys `maas-controller`, which self-bootstraps a `default-tenant` CR and reconciles the MaaS platform workloads (maas-api, gateway policies, telemetry). Create or update your DataScienceCluster with `modelsAsService` in Managed state:
```yaml
kubectl apply -f - <= 0, "should check for preferred_username")
+ assert.True(t, userUsernameIdx >= 0, "should check for user.username")
+ assert.True(t, preferredIdx < userUsernameIdx,
+ "should check preferred_username (OIDC) before user.username (K8s)")
+
+ // For groups: should check auth.identity.groups before auth.identity.user.groups
+ identityGroupsIdx := findSubstring(celGroups, "auth.identity.groups")
+ userGroupsIdx := findSubstring(celGroups, "auth.identity.user.groups")
+ assert.True(t, identityGroupsIdx >= 0, "should check for auth.identity.groups")
+ assert.True(t, userGroupsIdx >= 0, "should check for auth.identity.user.groups")
+ assert.True(t, identityGroupsIdx < userGroupsIdx,
+ "should check auth.identity.groups (OIDC) before auth.identity.user.groups (K8s)")
+}
+
+// Helper function to find substring index
+func findSubstring(s, substr string) int {
+ for i := 0; i <= len(s)-len(substr); i++ {
+ if s[i:i+len(substr)] == substr {
+ return i
+ }
+ }
+ return -1
+}
diff --git a/maas-controller/pkg/controller/maas/maasmodelref_controller.go b/maas-controller/pkg/controller/maas/maasmodelref_controller.go
index da65d6db8..e4cd53da6 100644
--- a/maas-controller/pkg/controller/maas/maasmodelref_controller.go
+++ b/maas-controller/pkg/controller/maas/maasmodelref_controller.go
@@ -84,7 +84,6 @@ func (r *MaaSModelRefReconciler) gatewayNamespace() string {
//+kubebuilder:rbac:groups=gateway.networking.k8s.io,resources=gateways,verbs=get;list;watch
//+kubebuilder:rbac:groups=kuadrant.io,resources=authpolicies,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=serving.kserve.io,resources=llminferenceservices,verbs=get;list;watch
-//+kubebuilder:rbac:groups="",resources=secrets,verbs=get
const maasModelFinalizer = "maas.opendatahub.io/model-cleanup"
diff --git a/maas-controller/pkg/controller/maas/maassubscription_controller.go b/maas-controller/pkg/controller/maas/maassubscription_controller.go
index 8e7371ce0..3a0828173 100644
--- a/maas-controller/pkg/controller/maas/maassubscription_controller.go
+++ b/maas-controller/pkg/controller/maas/maassubscription_controller.go
@@ -20,8 +20,10 @@ import (
"context"
"errors"
"fmt"
+ "regexp"
"slices"
"sort"
+ "strconv"
"strings"
"github.com/go-logr/logr"
@@ -66,8 +68,59 @@ const (
// modelRefIndexKey is the field index key for looking up MaaSSubscriptions by model reference.
// The index value format is "namespace/name" of the model.
modelRefIndexKey = "spec.modelRef"
+
+ // maxTokenRateLimit caps the token limit to prevent Kuadrant validation failures.
+ // Values above this are unreasonable for any practical rate-limiting scenario.
+ maxTokenRateLimit int64 = 1_000_000_000 // 1 billion tokens
+
+ // maxWindowSeconds caps the window duration to 366 days (one leap year) to prevent
+ // unreasonably large windows from reaching Kuadrant. 8784h fits the CRD pattern
+ // ^[1-9]\d{0,3}(s|m|h)$.
+ maxWindowSeconds int64 = 366 * 24 * 3600 // 366 days (leap year) in seconds
)
+var windowPattern = regexp.MustCompile(`^[1-9]\d{0,3}(s|m|h)$`)
+
+// validateTokenRateLimit checks if a token rate limit has reasonable values that
+// Kuadrant will accept. Returns an error describing the issue if invalid.
+func validateTokenRateLimit(limit int64, window string) error {
+ if limit <= 0 {
+ return fmt.Errorf("token limit %d must be positive", limit)
+ }
+ if limit > maxTokenRateLimit {
+ return fmt.Errorf("token limit %d exceeds maximum allowed value %d", limit, maxTokenRateLimit)
+ }
+
+ matches := windowPattern.FindStringSubmatch(window)
+ if len(matches) != 2 {
+ return fmt.Errorf("invalid window format %q: expected a positive number followed by s, m, or h (e.g. \"1h\", \"30m\")", window)
+ }
+
+ // Extract numeric part (everything except the last character).
+ unit := matches[1]
+ numStr := window[:len(window)-len(unit)]
+ value, err := strconv.ParseInt(numStr, 10, 64)
+ if err != nil {
+ return fmt.Errorf("invalid window numeric value %q: %w", numStr, err)
+ }
+
+ var seconds int64
+ switch unit {
+ case "s":
+ seconds = value
+ case "m":
+ seconds = value * 60
+ case "h":
+ seconds = value * 3600
+ }
+
+ if seconds > maxWindowSeconds {
+ return fmt.Errorf("window %q (%d seconds) exceeds maximum allowed duration (%d seconds)", window, seconds, maxWindowSeconds)
+ }
+
+ return nil
+}
+
// ConditionSpecPriorityDuplicate is set True when another MaaSSubscription shares the same spec.priority
// (API key mint and selector use deterministic tie-break; admins should set distinct priorities).
const ConditionSpecPriorityDuplicate = "SpecPriorityDuplicate"
@@ -459,18 +512,41 @@ func (r *MaaSSubscriptionReconciler) reconcileTRLPForModel(ctx context.Context,
continue
}
var rates []any
+ var hasInvalidLimits bool
if len(mRef.TokenRateLimits) > 0 {
for _, trl := range mRef.TokenRateLimits {
+ if err := validateTokenRateLimit(trl.Limit, trl.Window); err != nil {
+ log.Error(err, "Skipping subscription with invalid token rate limit — fix the spec to include it in TRLP",
+ "subscription", sub.Name, "model", modelNamespace+"/"+modelName,
+ "limit", trl.Limit, "window", trl.Window)
+ hasInvalidLimits = true
+ break
+ }
rates = append(rates, map[string]any{"limit": trl.Limit, "window": trl.Window})
}
} else {
rates = append(rates, map[string]any{"limit": int64(100), "window": "1m"})
}
+ if hasInvalidLimits {
+ // Skip this subscription to prevent poisoning the aggregated TRLP.
+ // The subscription is already marked Degraded/Failed by validateModelRefs(),
+ // and maas-api's subscription selector rejects non-Active subscriptions,
+ // so the invalid subscription cannot be used for API key minting.
+ continue
+ }
subs = append(subs, subInfo{sub: sub, mRef: mRef, rates: rates})
break
}
}
+ // If all subscriptions were skipped due to invalid limits, treat as no effective
+ // subscriptions — delete the TRLP instead of writing one with empty limits.
+ if len(subs) == 0 && len(allSubs) > 0 {
+ log.Info("All subscriptions for model have invalid rate limits — deleting TRLP",
+ "model", modelNamespace+"/"+modelName, "invalidCount", len(allSubs))
+ return r.deleteModelTRLP(ctx, log, modelNamespace, modelName)
+ }
+
// Trust auth.identity.selected_subscription_key from AuthPolicy.
// AuthPolicy has already validated subscription selection via /v1/subscriptions/select,
// which handles:
diff --git a/maas-controller/pkg/controller/maas/maassubscription_controller_test.go b/maas-controller/pkg/controller/maas/maassubscription_controller_test.go
index 608d46866..cda455fc6 100644
--- a/maas-controller/pkg/controller/maas/maassubscription_controller_test.go
+++ b/maas-controller/pkg/controller/maas/maassubscription_controller_test.go
@@ -1248,7 +1248,7 @@ func TestMaaSSubscriptionReconciler_WindowValuesInTRLP(t *testing.T) {
{"seconds", "30s"}, // short window, typical for burst limits
{"minutes", "5m"}, // default-like value used across the codebase
{"hours", "24h"}, // common replacement for the now-removed "1d"
- {"max digits", "9999h"}, // upper bound of the 4-digit numeric cap
+ {"max digits", "8784h"}, // upper bound: 366 days (leap year) in hours
}
for _, tc := range tests {
diff --git a/maas-controller/pkg/controller/maas/tenant_controller.go b/maas-controller/pkg/controller/maas/tenant_controller.go
index 28280285c..bb6b06972 100644
--- a/maas-controller/pkg/controller/maas/tenant_controller.go
+++ b/maas-controller/pkg/controller/maas/tenant_controller.go
@@ -57,7 +57,8 @@ type TenantReconciler struct {
// +kubebuilder:rbac:groups=maas.opendatahub.io,resources=tenants/finalizers,verbs=update
// +kubebuilder:rbac:groups=gateway.networking.k8s.io,resources=gateways,verbs=get;list;watch
// +kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list;watch;create;patch;delete
-// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch
+// +kubebuilder:rbac:groups="",resources=secrets,resourceNames=maas-db-config,verbs=get
+// +kubebuilder:rbac:groups="",resources=secrets,verbs=list;watch
// +kubebuilder:rbac:groups="",resources=serviceaccounts,verbs=get;list;watch;create;patch;delete
// +kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;patch;delete
// +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;patch;delete
diff --git a/scripts/README.md b/scripts/README.md
index 6fdc9016c..67275864f 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -37,7 +37,7 @@ Automated deployment script for OpenShift clusters supporting both operator-base
- `--operator-type ` - Which operator to install (default: odh)
- `--deployment-mode ` - Deployment method (default: operator)
- `--namespace ` - Target namespace for deployment
-- `--external-oidc` - Enable external OIDC on the `maas-api` AuthPolicy (kustomize mode only; in operator mode, configure `spec.externalOIDC` on the `ModelsAsService` CR)
+- `--external-oidc` - Enable external OIDC on the `maas-api` AuthPolicy (kustomize mode only; in operator mode, configure `spec.externalOIDC` on the `Tenant` CR)
- `--enable-keycloak` - Deploy a Keycloak instance for external OIDC testing
- `--enable-tls-backend` - Enable TLS backend (default)
- `--disable-tls-backend` - Disable TLS backend
@@ -155,8 +155,8 @@ Results:
External OIDC can be enabled in two ways:
-**Operator mode:** Edit the `ModelsAsService` CR to add `spec.externalOIDC` with
-`issuerUrl` and `clientId`. The operator patches the AuthPolicy automatically.
+**Operator mode:** Edit the `Tenant` CR to add `spec.externalOIDC` with
+`issuerUrl` and `clientId`. The Tenant reconciler patches the AuthPolicy automatically.
**Kustomize mode:** Use `--external-oidc` with env vars:
```bash
diff --git a/test/e2e/fixtures/trlp-test/llm/llmis.yaml b/test/e2e/fixtures/trlp-test/llm/llmis.yaml
index 722c133b6..dc23446b3 100644
--- a/test/e2e/fixtures/trlp-test/llm/llmis.yaml
+++ b/test/e2e/fixtures/trlp-test/llm/llmis.yaml
@@ -17,7 +17,7 @@ spec:
template:
containers:
- name: main
- image: "ghcr.io/llm-d/llm-d-inference-sim:v0.7.1"
+ image: "ghcr.io/llm-d/llm-d-inference-sim:v0.8.2"
imagePullPolicy: Always
command: ["/app/llm-d-inference-sim"]
args:
@@ -27,6 +27,7 @@ spec:
- test/e2e-trlp-test-model
- --mode
- random
+ - --no-mm-encoder-only
- --ssl-certfile
- /var/run/kserve/tls/tls.crt
- --ssl-keyfile
@@ -42,6 +43,11 @@ spec:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
+ - name: POD_IP
+ valueFrom:
+ fieldRef:
+ apiVersion: v1
+ fieldPath: status.podIP
ports:
- name: https
containerPort: 8000
diff --git a/test/e2e/scripts/auth_utils.sh b/test/e2e/scripts/auth_utils.sh
index 39f8bf489..736c5eb85 100755
--- a/test/e2e/scripts/auth_utils.sh
+++ b/test/e2e/scripts/auth_utils.sh
@@ -16,6 +16,7 @@
# maasauthpolicies.yaml - MaaSAuthPolicy definitions
# maassubscriptions.yaml - MaaSSubscription definitions
# externalmodels.yaml - ExternalModel definitions
+# tenants.yaml - Tenant definitions
# pod-logs/ - Per-pod logs from the deployment namespace
#
# Usage:
@@ -127,6 +128,7 @@ MAAS_CRDS=(
"maasauthpolicies.maas.opendatahub.io"
"maassubscriptions.maas.opendatahub.io"
"externalmodels.maas.opendatahub.io"
+ "tenants.maas.opendatahub.io"
)
collect_maas_crs() {
@@ -208,6 +210,7 @@ collect_cluster_state() {
echo "--- MaaS CRs ---"
kubectl get maasmodelrefs -n "$DEPLOYMENT_NAMESPACE" 2>/dev/null || true
kubectl get maasauthpolicies,maassubscriptions -n "$MAAS_SUBSCRIPTION_NAMESPACE" 2>/dev/null || true
+ kubectl get tenants -n "$MAAS_SUBSCRIPTION_NAMESPACE" 2>/dev/null || true
echo ""
echo "--- HTTPRoutes ---"
kubectl get httproutes -A 2>/dev/null | head -30 || true
@@ -324,6 +327,8 @@ run_auth_debug_report() {
_run "MaaSSubscriptions" "kubectl get maassubscriptions -n $MAAS_SUBSCRIPTION_NAMESPACE -o wide 2>/dev/null || true"
_run "MaaSSubscription status details" "kubectl get maassubscriptions -n $MAAS_SUBSCRIPTION_NAMESPACE -o jsonpath='{range .items[*]}{.metadata.name}: {.status.phase} - {.status.conditions[?(@.type==\"Ready\")].message}{\"\\n\"}{end}' 2>/dev/null || true"
_run "MaaSModelRefs (all namespaces)" "kubectl get maasmodelrefs -A -o wide 2>/dev/null || true"
+ _run "Tenants" "kubectl get tenants -n $MAAS_SUBSCRIPTION_NAMESPACE -o wide 2>/dev/null || true"
+ _run "Tenant status details" "kubectl get tenants -n $MAAS_SUBSCRIPTION_NAMESPACE -o jsonpath='{range .items[*]}{.metadata.name}: {.status.conditions[?(@.type==\"Ready\")].status} - {.status.conditions[?(@.type==\"Ready\")].message}{\"\\n\"}{end}' 2>/dev/null || true"
_append ""
_section "Test User Information"
diff --git a/test/e2e/tests/test_external_oidc.py b/test/e2e/tests/test_external_oidc.py
index c9ae93173..9c1a3e886 100644
--- a/test/e2e/tests/test_external_oidc.py
+++ b/test/e2e/tests/test_external_oidc.py
@@ -78,6 +78,41 @@ def test_invalid_oidc_token_gets_401(self, maas_api_base_url: str):
)
assert response.status_code == 401, f"Expected 401 for invalid OIDC token, got {response.status_code}: {response.text}"
+ def test_oidc_token_can_list_models(self, maas_api_base_url: str):
+ """Test that OIDC token (not minted API key) can list models via /v1/models.
+
+ This tests the OIDC support on model AuthPolicies, not just maas-api AuthPolicy.
+ When OIDC is enabled on model AuthPolicies, users can discover available models
+ without first minting an API key.
+ """
+ token = _request_oidc_token()
+ headers = {"Authorization": f"Bearer {token}"}
+
+ models_response = requests.get(
+ f"{maas_api_base_url}/v1/models",
+ headers=headers,
+ timeout=45,
+ verify=TLS_VERIFY,
+ )
+ assert models_response.status_code == 200, (
+ f"OIDC token failed to list models: {models_response.status_code} {models_response.text}"
+ )
+
+ response_json = models_response.json()
+ assert response_json.get("object") == "list", f"Expected object=list, got: {response_json}"
+
+ items = response_json.get("data", [])
+ # Note: May be empty if OIDC user has no group access to any subscriptions
+ # We only assert the request succeeds (200), not that models are returned
+ print(f"[oidc] OIDC token listed {len(items)} model(s) from /v1/models")
+
+ # If models are returned, verify they have subscription information
+ if items:
+ first_model = items[0]
+ assert "id" in first_model, "Model should have id field"
+ assert "subscriptions" in first_model, "Model should have subscriptions array"
+ print(f"[oidc] First model: {first_model['id']} with {len(first_model.get('subscriptions', []))} subscription(s)")
+
def test_minted_api_key_can_list_models_and_infer(self, maas_api_base_url: str):
token = _request_oidc_token()
api_key = _create_oidc_api_key(maas_api_base_url, token)["key"]
@@ -112,3 +147,67 @@ def test_minted_api_key_can_list_models_and_infer(self, maas_api_base_url: str):
)
print(f"[oidc] inference succeeded for {model_id} at {time.time()}")
+
+ def test_oidc_user_without_group_access_gets_empty_list(self, maas_api_base_url: str):
+ """Test that OIDC user with no group access to any subscriptions gets empty list.
+
+ This validates that group-based access control works correctly for OIDC tokens.
+ A user without group membership in any subscription should get 200 OK with an
+ empty model list (not 401 or 403).
+
+ Note: This test requires a user configured in the OIDC provider that has no
+ group memberships matching any subscription groups. If all OIDC users have
+ access (e.g., via system:authenticated), this test will be skipped.
+ """
+ # This test requires environment configuration for a user without group access
+ # Skip if not configured
+ username_no_access = os.environ.get("OIDC_USERNAME_NO_ACCESS", "")
+ password_no_access = os.environ.get("OIDC_PASSWORD_NO_ACCESS", "")
+
+ if not username_no_access or not password_no_access:
+ pytest.skip("OIDC_USERNAME_NO_ACCESS and OIDC_PASSWORD_NO_ACCESS not configured")
+
+ # Get token for user without group access
+ token_url = _required_env("OIDC_TOKEN_URL")
+ client_id = _required_env("OIDC_CLIENT_ID")
+
+ response = requests.post(
+ token_url,
+ data={
+ "grant_type": "password",
+ "client_id": client_id,
+ "username": username_no_access,
+ "password": password_no_access,
+ },
+ timeout=30,
+ verify=TLS_VERIFY,
+ )
+ assert response.status_code == 200, f"OIDC token request failed: {response.status_code} {response.text}"
+
+ token = response.json().get("access_token")
+ assert token, "OIDC token response missing access_token"
+
+ # Request models with token from user with no group access
+ models_response = requests.get(
+ f"{maas_api_base_url}/v1/models",
+ headers={"Authorization": f"Bearer {token}"},
+ timeout=45,
+ verify=TLS_VERIFY,
+ )
+
+ # Should get 200 OK, not 401/403
+ assert models_response.status_code == 200, (
+ f"Expected 200 for user without access, got {models_response.status_code}: {models_response.text}"
+ )
+
+ response_json = models_response.json()
+ assert response_json.get("object") == "list", f"Expected object=list, got: {response_json}"
+
+ items = response_json.get("data", [])
+ # Should return empty list (not null)
+ assert isinstance(items, list), f"Expected data to be a list, got {type(items)}"
+ assert len(items) == 0, (
+ f"Expected empty list for user without group access, got {len(items)} model(s)"
+ )
+
+ print("[oidc] User without group access correctly received empty list (200 OK)")