Skip to content

Commit ac4ff76

Browse files
authored
Merge pull request #7 from nerdalert/tokenratelimit
Add TokenRateLimitPolicy
2 parents 51c7fb3 + d70e9d1 commit ac4ff76

16 files changed

+3538
-0
lines changed

deployment/kuadrant-openshift/01-kserve-config-openshift.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,27 @@ data:
2525
"cpuRequest": "200m",
2626
"cpuLimit": "1"
2727
}
28+
logger: |
29+
{
30+
"image": "kserve/agent:v0.15.2",
31+
"memoryRequest": "100Mi",
32+
"memoryLimit": "1Gi",
33+
"cpuRequest": "100m",
34+
"cpuLimit": "1"
35+
}
36+
batcher: |
37+
{
38+
"image": "kserve/agent:v0.15.2",
39+
"memoryRequest": "100Mi",
40+
"memoryLimit": "1Gi",
41+
"cpuRequest": "100m",
42+
"cpuLimit": "1"
43+
}
44+
agent: |
45+
{
46+
"image": "kserve/agent:v0.15.2",
47+
"memoryRequest": "100Mi",
48+
"memoryLimit": "1Gi",
49+
"cpuRequest": "100m",
50+
"cpuLimit": "1"
51+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Gateway-level Token Rate Limiting Policy for OpenShift
2+
# Automatically tracks tokens from response bodies (usage.total_tokens)
3+
# Uses the same user groups as the auth policy configuration
4+
---
5+
apiVersion: kuadrant.io/v1alpha1
6+
kind: TokenRateLimitPolicy
7+
metadata:
8+
name: gateway-token-rate-limits
9+
namespace: llm
10+
spec:
11+
targetRef:
12+
group: gateway.networking.k8s.io
13+
kind: Gateway
14+
name: inference-gateway
15+
limits:
16+
free-user-tokens:
17+
rates:
18+
- limit: 10000
19+
window: 1m
20+
when:
21+
- predicate: |
22+
auth.identity.groups.split(",").exists(g, g == "free")
23+
counters:
24+
- expression: auth.identity.userid
25+
26+
premium-user-tokens:
27+
rates:
28+
- limit: 50000
29+
window: 1m
30+
when:
31+
- predicate: |
32+
auth.identity.groups.split(",").exists(g, g == "premium")
33+
counters:
34+
- expression: auth.identity.userid
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
# Service and ServiceMonitor for Token Rate Limiting Metrics from Inference Gateway
2+
# Configures Prometheus scraping of Istio Envoy proxy metrics with token usage relabeling
3+
---
4+
apiVersion: v1
5+
kind: Service
6+
metadata:
7+
name: inference-gateway-envoy-metrics
8+
namespace: llm
9+
labels:
10+
app.kubernetes.io/component: inference-gateway
11+
gateway.networking.k8s.io/gateway-name: inference-gateway
12+
spec:
13+
selector:
14+
gateway.networking.k8s.io/gateway-name: inference-gateway
15+
ports:
16+
- name: http-envoy-metrics
17+
port: 15090
18+
targetPort: 15090
19+
protocol: TCP
20+
---
21+
apiVersion: monitoring.coreos.com/v1
22+
kind: ServiceMonitor
23+
metadata:
24+
name: inference-gateway-envoy-metrics
25+
namespace: llm
26+
labels:
27+
environment: base
28+
project: models-aas-observability
29+
app.kubernetes.io/component: inference-gateway
30+
gateway.networking.k8s.io/gateway-name: inference-gateway
31+
spec:
32+
namespaceSelector:
33+
matchNames: [llm]
34+
selector:
35+
matchLabels:
36+
app.kubernetes.io/component: inference-gateway
37+
gateway.networking.k8s.io/gateway-name: inference-gateway
38+
endpoints:
39+
- port: http-envoy-metrics
40+
path: /stats/prometheus
41+
interval: 15s
42+
scrapeTimeout: 10s
43+
honorLabels: true
44+
metricRelabelings:
45+
# Extract labels from token_usage_* metrics
46+
- action: replace
47+
sourceLabels: [__name__]
48+
regex: token_usage_with_user_and_group__user___([A-Za-z0-9_-]+)___group___([A-Za-z0-9_-]+)___namespace__([A-Za-z0-9_-]+).*
49+
targetLabel: user
50+
replacement: $1
51+
- action: replace
52+
sourceLabels: [__name__]
53+
regex: token_usage_with_user_and_group__user___([A-Za-z0-9_-]+)___group___([A-Za-z0-9_-]+)___namespace__([A-Za-z0-9_-]+).*
54+
targetLabel: group
55+
replacement: $2
56+
- action: replace
57+
sourceLabels: [__name__]
58+
regex: token_usage_with_user_and_group__user___([A-Za-z0-9_-]+)___group___([A-Za-z0-9_-]+)___namespace__([A-Za-z0-9_-]+).*
59+
targetLabel: namespace
60+
replacement: $3
61+
# Rename token usage metrics to clean names
62+
- action: replace
63+
sourceLabels: [__name__]
64+
regex: (token_usage_with_user_and_group)__.*$
65+
targetLabel: __name__
66+
replacement: $1
67+
# Extract labels from authorized_* metrics
68+
- action: replace
69+
sourceLabels: [__name__]
70+
regex: authorized_calls_with_user_and_group__user___([A-Za-z0-9_-]+)___group___([A-Za-z0-9_-]+)___namespace__([A-Za-z0-9_-]+)
71+
targetLabel: user
72+
replacement: $1
73+
- action: replace
74+
sourceLabels: [__name__]
75+
regex: authorized_calls_with_user_and_group__user___([A-Za-z0-9_-]+)___group___([A-Za-z0-9_-]+)___namespace__([A-Za-z0-9_-]+)
76+
targetLabel: group
77+
replacement: $2
78+
- action: replace
79+
sourceLabels: [__name__]
80+
regex: authorized_calls_with_user_and_group__user___([A-Za-z0-9_-]+)___group___([A-Za-z0-9_-]+)___namespace__([A-Za-z0-9_-]+)
81+
targetLabel: namespace
82+
replacement: $3
83+
# Extract labels from limited_* metrics
84+
- action: replace
85+
sourceLabels: [__name__]
86+
regex: limited_calls_with_user_and_group__user___([A-Za-z0-9_-]+)___group___([A-Za-z0-9_-]+)___namespace__([A-Za-z0-9_-]+)
87+
targetLabel: user
88+
replacement: $1
89+
- action: replace
90+
sourceLabels: [__name__]
91+
regex: limited_calls_with_user_and_group__user___([A-Za-z0-9_-]+)___group___([A-Za-z0-9_-]+)___namespace__([A-Za-z0-9_-]+)
92+
targetLabel: group
93+
replacement: $2
94+
- action: replace
95+
sourceLabels: [__name__]
96+
regex: limited_calls_with_user_and_group__user___([A-Za-z0-9_-]+)___group___([A-Za-z0-9_-]+)___namespace__([A-Za-z0-9_-]+)
97+
targetLabel: namespace
98+
replacement: $3
99+
# Rename call metrics to clean names
100+
- action: replace
101+
sourceLabels: [__name__]
102+
regex: (authorized_calls_with_user_and_group)__.*$
103+
targetLabel: __name__
104+
replacement: $1
105+
- action: replace
106+
sourceLabels: [__name__]
107+
regex: (limited_calls_with_user_and_group)__.*$
108+
targetLabel: __name__
109+
replacement: $1
110+

0 commit comments

Comments
 (0)