Skip to content

Commit 2f39412

Browse files
authored
Merge pull request #139 from amaanx86/138-add-probes-and-resource-limits
Issue#138 add probes and resource limits
2 parents 2eb08b9 + 11faf4a commit 2f39412

File tree

5 files changed

+126
-1
lines changed

5 files changed

+126
-1
lines changed

helm/oci-native-ingress-controller/templates/deployment.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,24 @@ spec:
8686
protocol: TCP
8787
- name: metrics-server
8888
containerPort: 2223
89+
readinessProbe:
90+
httpGet:
91+
path: /healthz/ready
92+
port: metrics-server
93+
scheme: HTTP
94+
initialDelaySeconds: 30
95+
periodSeconds: 10
96+
timeoutSeconds: 5
97+
failureThreshold: 3
98+
livenessProbe:
99+
httpGet:
100+
path: /healthz/live
101+
port: metrics-server
102+
scheme: HTTP
103+
initialDelaySeconds: 60
104+
periodSeconds: 20
105+
timeoutSeconds: 5
106+
failureThreshold: 3
89107
resources:
90108
{{- toYaml .Values.resources | nindent 12 }}
91109
volumeMounts:

helm/oci-native-ingress-controller/values.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,10 @@ objectSelector:
120120
matchLabels:
121121
# key: value
122122

123+
# Metrics server configuration
124+
# Health probes for operational reliability and Cloud Guard compliance
125+
# Readiness probe: HTTP GET /healthz/ready on metrics-server port (initialDelaySeconds: 30)
126+
# Liveness probe: HTTP GET /healthz/live on metrics-server port (initialDelaySeconds: 60)
123127
metrics:
124128
backend: prometheus
125129
port: 2223

main.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,5 +279,10 @@ func setupInformers(informerFactory informers.SharedInformerFactory, ctx context
279279

280280
klog.Fatal("failed to sync informers")
281281
}
282+
283+
// Mark caches as synced for health checks
284+
healthChecker := server.GetHealthChecker()
285+
healthChecker.SetCachesSynced(true)
286+
282287
return ingressClassInformer, ingressInformer, serviceInformer, secretInformer, endpointInformer, podInformer, nodeInformer, serviceAccountInformer
283288
}

pkg/server/health.go

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/*
2+
*
3+
* * OCI Native Ingress Controller
4+
* *
5+
* * Copyright (c) 2023 Oracle America, Inc. and its affiliates.
6+
* * Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
7+
*
8+
*/
9+
10+
package server
11+
12+
import (
13+
"encoding/json"
14+
"net/http"
15+
"sync"
16+
"sync/atomic"
17+
)
18+
19+
type HealthChecker struct {
20+
mu sync.RWMutex
21+
cachesSynced atomic.Bool
22+
controllersReady atomic.Bool
23+
}
24+
25+
type HealthStatus struct {
26+
Status string `json:"status"`
27+
CachesSynced bool `json:"cachesSynced"`
28+
ControllersReady bool `json:"controllersReady"`
29+
}
30+
31+
var globalHealthChecker *HealthChecker
32+
33+
func NewHealthChecker() *HealthChecker {
34+
return &HealthChecker{
35+
cachesSynced: atomic.Bool{},
36+
controllersReady: atomic.Bool{},
37+
}
38+
}
39+
40+
func GetHealthChecker() *HealthChecker {
41+
if globalHealthChecker == nil {
42+
globalHealthChecker = NewHealthChecker()
43+
}
44+
return globalHealthChecker
45+
}
46+
47+
func (hc *HealthChecker) SetCachesSynced(synced bool) {
48+
hc.cachesSynced.Store(synced)
49+
}
50+
51+
func (hc *HealthChecker) SetControllersReady(ready bool) {
52+
hc.controllersReady.Store(ready)
53+
}
54+
55+
// Readiness check - pod should receive traffic
56+
func (hc *HealthChecker) HandleReadiness(w http.ResponseWriter, r *http.Request) {
57+
status := HealthStatus{
58+
Status: "unhealthy",
59+
CachesSynced: hc.cachesSynced.Load(),
60+
ControllersReady: hc.controllersReady.Load(),
61+
}
62+
63+
if status.CachesSynced && status.ControllersReady {
64+
status.Status = "healthy"
65+
w.WriteHeader(http.StatusOK)
66+
} else {
67+
w.WriteHeader(http.StatusServiceUnavailable)
68+
}
69+
70+
w.Header().Set("Content-Type", "application/json")
71+
json.NewEncoder(w).Encode(status)
72+
}
73+
74+
// Liveness check - pod should be restarted if unhealthy
75+
func (hc *HealthChecker) HandleLiveness(w http.ResponseWriter, r *http.Request) {
76+
status := HealthStatus{
77+
Status: "alive",
78+
CachesSynced: hc.cachesSynced.Load(),
79+
ControllersReady: hc.controllersReady.Load(),
80+
}
81+
82+
w.WriteHeader(http.StatusOK)
83+
w.Header().Set("Content-Type", "application/json")
84+
json.NewEncoder(w).Encode(status)
85+
}

pkg/server/server.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@ package server
1212
import (
1313
"context"
1414
"crypto/tls"
15-
"github.com/oracle/oci-native-ingress-controller/pkg/task/certificatecleanup"
1615
"net/http"
1716
"os"
1817
"time"
1918

19+
"github.com/oracle/oci-native-ingress-controller/pkg/task/certificatecleanup"
20+
2021
ctrcache "sigs.k8s.io/controller-runtime/pkg/cache"
2122
"sigs.k8s.io/controller-runtime/pkg/webhook"
2223

@@ -163,6 +164,13 @@ func SetUpControllers(opts types.IngressOpts, ingressClassInformer networkinginf
163164
)
164165
go certificateCleanUpTask.Run(ctx.Done())
165166
}
167+
168+
// Mark controllers as ready for health checks
169+
go func() {
170+
time.Sleep(2 * time.Second) // Give controllers a moment to start
171+
GetHealthChecker().SetControllersReady(true)
172+
klog.Info("Controllers marked as ready for health checks")
173+
}()
166174
}
167175
}
168176

@@ -234,5 +242,10 @@ func SetupMetricsServer(metricsBackend string, metricsPort int, mux *http.ServeM
234242
}
235243
metric.RegisterMetrics(reg, mux)
236244

245+
// Register health check endpoints
246+
hc := GetHealthChecker()
247+
mux.HandleFunc("/healthz/ready", hc.HandleReadiness)
248+
mux.HandleFunc("/healthz/live", hc.HandleLiveness)
249+
237250
return reg, nil
238251
}

0 commit comments

Comments
 (0)