Skip to content

Commit 725f8fa

Browse files
committed
feat: add MetricsCollector interface, mock collector, e2e tests, and Helm NOTES.txt
- Extract MetricsCollector interface in pkg/gpu/collector.go for testability - Add MockCollector in pkg/gpu/mock.go for GPU-free testing - Update GPUExternalScaler to depend on interface instead of concrete type - Add comprehensive e2e integration tests in tests/e2e/: - gRPC health check verification - IsActive with activation thresholds and profiles - GetMetricSpec for all configurations - GetMetrics across single/multi-GPU, all metric types - Scale-out/scale-in simulation - All pre-built profiles validation - Add Helm chart NOTES.txt with post-install instructions - Add test-e2e and helm-test Makefile targets Signed-off-by: pmady <pavan4devops@gmail.com>
1 parent 8d0fb43 commit 725f8fa

6 files changed

Lines changed: 625 additions & 3 deletions

File tree

Makefile

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.PHONY: build proto test lint clean docker-build docker-push docker-release deploy undeploy help
1+
.PHONY: build proto test test-e2e lint clean docker-build docker-push docker-release deploy undeploy helm-lint helm-template helm-test help
22

33
BINARY_NAME := keda-gpu-scaler
44
IMAGE_REPO := ghcr.io/pmady/keda-gpu-scaler
@@ -20,6 +20,9 @@ proto: ## Generate protobuf Go code
2020
test: ## Run unit tests
2121
go test -v -race ./pkg/...
2222

23+
test-e2e: ## Run e2e integration tests (no GPU required — uses mock collector)
24+
go test -v -tags=e2e -race ./tests/e2e/...
25+
2326
lint: ## Run linter
2427
golangci-lint run ./...
2528

@@ -52,3 +55,9 @@ helm-lint: ## Lint Helm chart
5255

5356
helm-template: ## Render Helm templates
5457
helm template keda-gpu-scaler deploy/helm/keda-gpu-scaler
58+
59+
helm-test: ## Validate Helm chart renders correctly with default and custom values
60+
helm lint deploy/helm/keda-gpu-scaler
61+
helm template keda-gpu-scaler deploy/helm/keda-gpu-scaler > /dev/null
62+
helm template keda-gpu-scaler deploy/helm/keda-gpu-scaler --set grpc.port=50051 --set logLevel=debug > /dev/null
63+
@echo "Helm chart validation passed"
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
keda-gpu-scaler has been deployed as a DaemonSet on GPU nodes.
2+
3+
Service: {{ include "keda-gpu-scaler.fullname" . }}.{{ .Release.Namespace }}.svc.cluster.local
4+
gRPC Port: {{ .Values.grpc.port }}
5+
6+
To use with KEDA, create a ScaledObject with an external trigger:
7+
8+
triggers:
9+
- type: external
10+
metadata:
11+
scalerAddress: "{{ include "keda-gpu-scaler.fullname" . }}.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.grpc.port }}"
12+
profile: "vllm-inference"
13+
14+
Available profiles: vllm-inference, triton-inference, training, batch
15+
16+
Verify the scaler pods are running on GPU nodes:
17+
18+
kubectl get pods -n {{ .Release.Namespace }} -l {{ include "keda-gpu-scaler.selectorLabels" . | replace ": " "=" | replace "\n" "," }}
19+
20+
Check gRPC health:
21+
22+
kubectl exec -n {{ .Release.Namespace }} <pod-name> -- grpc_health_probe -addr=:{{ .Values.grpc.port }}
23+
24+
Documentation: https://github.com/pmady/keda-gpu-scaler

pkg/gpu/collector.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/*
2+
Copyright 2026 The keda-gpu-scaler Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package gpu
18+
19+
// MetricsCollector is the interface for collecting GPU metrics.
20+
// The real implementation uses NVML; tests can inject a mock.
21+
type MetricsCollector interface {
22+
CollectAll() ([]Metrics, error)
23+
CollectDevice(index int) (Metrics, error)
24+
DeviceCount() (int, error)
25+
Close() error
26+
}

pkg/gpu/mock.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
Copyright 2026 The keda-gpu-scaler Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package gpu
18+
19+
import "fmt"
20+
21+
// MockCollector implements MetricsCollector with configurable GPU metrics.
22+
// Used for unit tests, integration tests, and e2e tests without GPU hardware.
23+
type MockCollector struct {
24+
Devices []Metrics
25+
}
26+
27+
// NewMockCollector creates a mock collector with the given device metrics.
28+
func NewMockCollector(devices []Metrics) *MockCollector {
29+
return &MockCollector{Devices: devices}
30+
}
31+
32+
func (m *MockCollector) CollectAll() ([]Metrics, error) {
33+
return m.Devices, nil
34+
}
35+
36+
func (m *MockCollector) CollectDevice(index int) (Metrics, error) {
37+
if index < 0 || index >= len(m.Devices) {
38+
return Metrics{}, fmt.Errorf("device index %d out of range (0-%d)", index, len(m.Devices)-1)
39+
}
40+
return m.Devices[index], nil
41+
}
42+
43+
func (m *MockCollector) DeviceCount() (int, error) {
44+
return len(m.Devices), nil
45+
}
46+
47+
func (m *MockCollector) Close() error {
48+
return nil
49+
}

pkg/scaler/server.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,12 @@ import (
3232
// GPUExternalScaler implements the KEDA ExternalScaler gRPC interface.
3333
type GPUExternalScaler struct {
3434
pb.UnimplementedExternalScalerServer
35-
collector *gpu.Collector
35+
collector gpu.MetricsCollector
3636
logger *zap.Logger
3737
}
3838

3939
// NewGPUExternalScaler creates a new GPU external scaler server.
40-
func NewGPUExternalScaler(collector *gpu.Collector, logger *zap.Logger) *GPUExternalScaler {
40+
func NewGPUExternalScaler(collector gpu.MetricsCollector, logger *zap.Logger) *GPUExternalScaler {
4141
return &GPUExternalScaler{
4242
collector: collector,
4343
logger: logger,

0 commit comments

Comments
 (0)