|
| 1 | +# Service and ServiceMonitor for Token Rate Limiting Metrics from Inference Gateway |
| 2 | +# Configures Prometheus scraping of Istio Envoy proxy metrics with token usage relabeling |
| 3 | +--- |
| 4 | +apiVersion: v1 |
| 5 | +kind: Service |
| 6 | +metadata: |
| 7 | + name: inference-gateway-envoy-metrics |
| 8 | + namespace: llm |
| 9 | + labels: |
| 10 | + app.kubernetes.io/component: inference-gateway |
| 11 | + gateway.networking.k8s.io/gateway-name: inference-gateway |
| 12 | +spec: |
| 13 | + selector: |
| 14 | + gateway.networking.k8s.io/gateway-name: inference-gateway |
| 15 | + ports: |
| 16 | + - name: http-envoy-metrics |
| 17 | + port: 15090 |
| 18 | + targetPort: 15090 |
| 19 | + protocol: TCP |
| 20 | +--- |
| 21 | +apiVersion: monitoring.coreos.com/v1 |
| 22 | +kind: ServiceMonitor |
| 23 | +metadata: |
| 24 | + name: inference-gateway-envoy-metrics |
| 25 | + namespace: llm |
| 26 | + labels: |
| 27 | + environment: base |
| 28 | + project: models-aas-observability |
| 29 | + app.kubernetes.io/component: inference-gateway |
| 30 | + gateway.networking.k8s.io/gateway-name: inference-gateway |
| 31 | +spec: |
| 32 | + namespaceSelector: |
| 33 | + matchNames: [llm] |
| 34 | + selector: |
| 35 | + matchLabels: |
| 36 | + app.kubernetes.io/component: inference-gateway |
| 37 | + gateway.networking.k8s.io/gateway-name: inference-gateway |
| 38 | + endpoints: |
| 39 | + - port: http-envoy-metrics |
| 40 | + path: /stats/prometheus |
| 41 | + interval: 15s |
| 42 | + scrapeTimeout: 10s |
| 43 | + honorLabels: true |
| 44 | + metricRelabelings: |
| 45 | + # Extract labels from token_usage_* metrics |
| 46 | + - action: replace |
| 47 | + sourceLabels: [__name__] |
| 48 | + regex: token_usage_with_user_and_group__user___([A-Za-z0-9_-]+)___group___([A-Za-z0-9_-]+)___namespace__([A-Za-z0-9_-]+).* |
| 49 | + targetLabel: user |
| 50 | + replacement: $1 |
| 51 | + - action: replace |
| 52 | + sourceLabels: [__name__] |
| 53 | + regex: token_usage_with_user_and_group__user___([A-Za-z0-9_-]+)___group___([A-Za-z0-9_-]+)___namespace__([A-Za-z0-9_-]+).* |
| 54 | + targetLabel: group |
| 55 | + replacement: $2 |
| 56 | + - action: replace |
| 57 | + sourceLabels: [__name__] |
| 58 | + regex: token_usage_with_user_and_group__user___([A-Za-z0-9_-]+)___group___([A-Za-z0-9_-]+)___namespace__([A-Za-z0-9_-]+).* |
| 59 | + targetLabel: namespace |
| 60 | + replacement: $3 |
| 61 | + # Rename token usage metrics to clean names |
| 62 | + - action: replace |
| 63 | + sourceLabels: [__name__] |
| 64 | + regex: (token_usage_with_user_and_group)__.*$ |
| 65 | + targetLabel: __name__ |
| 66 | + replacement: $1 |
| 67 | + # Extract labels from authorized_* metrics |
| 68 | + - action: replace |
| 69 | + sourceLabels: [__name__] |
| 70 | + regex: authorized_calls_with_user_and_group__user___([A-Za-z0-9_-]+)___group___([A-Za-z0-9_-]+)___namespace__([A-Za-z0-9_-]+) |
| 71 | + targetLabel: user |
| 72 | + replacement: $1 |
| 73 | + - action: replace |
| 74 | + sourceLabels: [__name__] |
| 75 | + regex: authorized_calls_with_user_and_group__user___([A-Za-z0-9_-]+)___group___([A-Za-z0-9_-]+)___namespace__([A-Za-z0-9_-]+) |
| 76 | + targetLabel: group |
| 77 | + replacement: $2 |
| 78 | + - action: replace |
| 79 | + sourceLabels: [__name__] |
| 80 | + regex: authorized_calls_with_user_and_group__user___([A-Za-z0-9_-]+)___group___([A-Za-z0-9_-]+)___namespace__([A-Za-z0-9_-]+) |
| 81 | + targetLabel: namespace |
| 82 | + replacement: $3 |
| 83 | + # Extract labels from limited_* metrics |
| 84 | + - action: replace |
| 85 | + sourceLabels: [__name__] |
| 86 | + regex: limited_calls_with_user_and_group__user___([A-Za-z0-9_-]+)___group___([A-Za-z0-9_-]+)___namespace__([A-Za-z0-9_-]+) |
| 87 | + targetLabel: user |
| 88 | + replacement: $1 |
| 89 | + - action: replace |
| 90 | + sourceLabels: [__name__] |
| 91 | + regex: limited_calls_with_user_and_group__user___([A-Za-z0-9_-]+)___group___([A-Za-z0-9_-]+)___namespace__([A-Za-z0-9_-]+) |
| 92 | + targetLabel: group |
| 93 | + replacement: $2 |
| 94 | + - action: replace |
| 95 | + sourceLabels: [__name__] |
| 96 | + regex: limited_calls_with_user_and_group__user___([A-Za-z0-9_-]+)___group___([A-Za-z0-9_-]+)___namespace__([A-Za-z0-9_-]+) |
| 97 | + targetLabel: namespace |
| 98 | + replacement: $3 |
| 99 | + # Rename call metrics to clean names |
| 100 | + - action: replace |
| 101 | + sourceLabels: [__name__] |
| 102 | + regex: (authorized_calls_with_user_and_group)__.*$ |
| 103 | + targetLabel: __name__ |
| 104 | + replacement: $1 |
| 105 | + - action: replace |
| 106 | + sourceLabels: [__name__] |
| 107 | + regex: (limited_calls_with_user_and_group)__.*$ |
| 108 | + targetLabel: __name__ |
| 109 | + replacement: $1 |
| 110 | + |
0 commit comments