workload-variant-autoscaler/config/samples/keda/scaledobject.yaml at abe1f689adca2a6a93f55c362347524a052e886c · opendatahub-io/workload-variant-autoscaler · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# Example KEDA ScaledObject using WVA desired-replicas metric.
# Requires VariantAutoscaling for sample-deployment (see variantautoscaling-integration.yaml).
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
  name: sample-deployment-scaler
  namespace: llm-d-sim
  labels:
    app: sample-deployment
    scaler: keda-workload-variant-autoscaler
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: sample-deployment
  pollingInterval: 5
  cooldownPeriod: 30
  initialCooldownPeriod: 30
  maxReplicaCount: 10
  fallback:
    failureThreshold: 3
    replicas: 2
    behavior: "currentReplicasIfHigher"
  advanced:
    restoreToOriginalReplicaCount: false
    horizontalPodAutoscalerConfig:
      name: wva-keda-hpa-sample-deployment
      behavior:
        scaleDown:
          stabilizationWindowSeconds: 0
          policies:
          - type: Percent
            value: 100
            periodSeconds: 30
          - type: Pods
            value: 5
            periodSeconds: 15
        scaleUp:
          stabilizationWindowSeconds: 0
          policies:
          - type: Percent
            value: 100
            periodSeconds: 30
          - type: Pods
            value: 5
            periodSeconds: 15
  triggers:
  - type: prometheus
    name: wva-desired-replicas
    metadata:
      serverAddress: https://kube-prometheus-stack-prometheus.workload-variant-autoscaler-monitoring.svc.cluster.local:9090
      query: |
        wva_desired_replicas{
          variant_name="sample-deployment",
          exported_namespace="llm-d-sim"
        }
      threshold: '1'
      activationThreshold: '0'
      metricType: "AverageValue"
      unsafeSsl: "true"