Skip to content

Commit f068aff

Browse files
paigerube14Paige Patton
authored andcommitted
adding pod network bakc
Signed-off-by: Paige Patton <[email protected]>
1 parent 37ca4bb commit f068aff

36 files changed

+1026
-494
lines changed

.github/workflows/tests.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ jobs:
9999
echo "test_pod_network_filter" >> ./CI/tests/functional_tests
100100
echo "test_pod_server" >> ./CI/tests/functional_tests
101101
echo "test_time" >> ./CI/tests/functional_tests
102+
echo "test_cerberus" >> ./CI/tests/functional_tests
103+
echo "test_cerberus_unhealthy" >> ./CI/tests/functional_tests
102104
# echo "test_pvc" >> ./CI/tests/functional_tests
103105
104106
# Push on main only steps + all other functional to collect coverage

CI/templates/mock_cerberus.yaml

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
apiVersion: v1
2+
kind: ConfigMap
3+
metadata:
4+
name: mock-cerberus-server
5+
namespace: default
6+
data:
7+
server.py: |
8+
#!/usr/bin/env python3
9+
from http.server import HTTPServer, BaseHTTPRequestHandler
10+
import json
11+
12+
class MockCerberusHandler(BaseHTTPRequestHandler):
13+
def do_GET(self):
14+
if self.path == '/':
15+
# Return True to indicate cluster is healthy
16+
self.send_response(200)
17+
self.send_header('Content-type', 'text/plain')
18+
self.end_headers()
19+
self.wfile.write(b'True')
20+
elif self.path.startswith('/history'):
21+
# Return empty history (no failures)
22+
self.send_response(200)
23+
self.send_header('Content-type', 'application/json')
24+
self.end_headers()
25+
response = {
26+
"history": {
27+
"failures": []
28+
}
29+
}
30+
self.wfile.write(json.dumps(response).encode())
31+
else:
32+
self.send_response(404)
33+
self.end_headers()
34+
35+
def log_message(self, format, *args):
36+
print(f"[MockCerberus] {format % args}")
37+
38+
if __name__ == '__main__':
39+
server = HTTPServer(('0.0.0.0', 8080), MockCerberusHandler)
40+
print("[MockCerberus] Starting mock cerberus server on port 8080...")
41+
server.serve_forever()
42+
---
43+
apiVersion: v1
44+
kind: Pod
45+
metadata:
46+
name: mock-cerberus
47+
namespace: default
48+
labels:
49+
app: mock-cerberus
50+
spec:
51+
containers:
52+
- name: mock-cerberus
53+
image: python:3.9-slim
54+
command: ["python3", "/app/server.py"]
55+
ports:
56+
- containerPort: 8080
57+
name: http
58+
volumeMounts:
59+
- name: server-script
60+
mountPath: /app
61+
volumes:
62+
- name: server-script
63+
configMap:
64+
name: mock-cerberus-server
65+
defaultMode: 0755
66+
---
67+
apiVersion: v1
68+
kind: Service
69+
metadata:
70+
name: mock-cerberus
71+
namespace: default
72+
spec:
73+
selector:
74+
app: mock-cerberus
75+
ports:
76+
- protocol: TCP
77+
port: 8080
78+
targetPort: 8080
79+
type: ClusterIP
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
apiVersion: v1
2+
kind: ConfigMap
3+
metadata:
4+
name: mock-cerberus-unhealthy-server
5+
namespace: default
6+
data:
7+
server.py: |
8+
#!/usr/bin/env python3
9+
from http.server import HTTPServer, BaseHTTPRequestHandler
10+
import json
11+
12+
class MockCerberusUnhealthyHandler(BaseHTTPRequestHandler):
13+
def do_GET(self):
14+
if self.path == '/':
15+
# Return False to indicate cluster is unhealthy
16+
self.send_response(200)
17+
self.send_header('Content-type', 'text/plain')
18+
self.end_headers()
19+
self.wfile.write(b'False')
20+
elif self.path.startswith('/history'):
21+
# Return history with failures
22+
self.send_response(200)
23+
self.send_header('Content-type', 'application/json')
24+
self.end_headers()
25+
response = {
26+
"history": {
27+
"failures": [
28+
{
29+
"component": "node",
30+
"name": "test-node",
31+
"timestamp": "2024-01-01T00:00:00Z"
32+
}
33+
]
34+
}
35+
}
36+
self.wfile.write(json.dumps(response).encode())
37+
else:
38+
self.send_response(404)
39+
self.end_headers()
40+
41+
def log_message(self, format, *args):
42+
print(f"[MockCerberusUnhealthy] {format % args}")
43+
44+
if __name__ == '__main__':
45+
server = HTTPServer(('0.0.0.0', 8080), MockCerberusUnhealthyHandler)
46+
print("[MockCerberusUnhealthy] Starting mock cerberus unhealthy server on port 8080...")
47+
server.serve_forever()
48+
---
49+
apiVersion: v1
50+
kind: Pod
51+
metadata:
52+
name: mock-cerberus-unhealthy
53+
namespace: default
54+
labels:
55+
app: mock-cerberus-unhealthy
56+
spec:
57+
containers:
58+
- name: mock-cerberus-unhealthy
59+
image: python:3.9-slim
60+
command: ["python3", "/app/server.py"]
61+
ports:
62+
- containerPort: 8080
63+
name: http
64+
volumeMounts:
65+
- name: server-script
66+
mountPath: /app
67+
volumes:
68+
- name: server-script
69+
configMap:
70+
name: mock-cerberus-unhealthy-server
71+
defaultMode: 0755
72+
---
73+
apiVersion: v1
74+
kind: Service
75+
metadata:
76+
name: mock-cerberus-unhealthy
77+
namespace: default
78+
spec:
79+
selector:
80+
app: mock-cerberus-unhealthy
81+
ports:
82+
- protocol: TCP
83+
port: 8080
84+
targetPort: 8080
85+
type: ClusterIP

CI/tests/test_cerberus.sh

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
set -xeEo pipefail
2+
3+
source CI/tests/common.sh
4+
5+
trap error ERR
6+
trap finish EXIT
7+
8+
function functional_test_cerberus {
9+
echo "========================================"
10+
echo "Starting Cerberus Functional Test"
11+
echo "========================================"
12+
13+
# Deploy mock cerberus server
14+
echo "Deploying mock cerberus server..."
15+
kubectl apply -f CI/templates/mock_cerberus.yaml
16+
17+
# Wait for mock cerberus pod to be ready
18+
echo "Waiting for mock cerberus to be ready..."
19+
kubectl wait --for=condition=ready pod -l app=mock-cerberus --timeout=300s
20+
21+
# Verify mock cerberus service is accessible
22+
echo "Verifying mock cerberus service..."
23+
mock_cerberus_ip=$(kubectl get service mock-cerberus -o jsonpath='{.spec.clusterIP}')
24+
echo "Mock Cerberus IP: $mock_cerberus_ip"
25+
26+
# Test cerberus endpoint from within the cluster
27+
kubectl run cerberus-test --image=curlimages/curl:latest --rm -i --restart=Never -- \
28+
curl -s http://mock-cerberus.default.svc.cluster.local:8080/ || echo "Cerberus test curl completed"
29+
30+
# Configure scenario for pod disruption with cerberus enabled
31+
export scenario_type="pod_disruption_scenarios"
32+
export scenario_file="scenarios/kind/pod_etcd.yml"
33+
export post_config=""
34+
35+
# Generate config with cerberus enabled
36+
envsubst < CI/config/common_test_config.yaml > CI/config/cerberus_test_config.yaml
37+
38+
# Enable cerberus in the config (using yq jq-wrapper syntax with -i -y)
39+
yq -y -i '.cerberus.cerberus_enabled = true' CI/config/cerberus_test_config.yaml
40+
yq -y -i ".cerberus.cerberus_url = \"http://${mock_cerberus_ip}:8080\"" CI/config/cerberus_test_config.yaml
41+
42+
echo "========================================"
43+
echo "Cerberus Configuration:"
44+
yq '.cerberus' CI/config/cerberus_test_config.yaml
45+
echo "========================================"
46+
47+
# Run kraken with cerberus enabled
48+
echo "Running kraken with cerberus integration..."
49+
python3 -m coverage run -a run_kraken.py -c CI/config/cerberus_test_config.yaml
50+
51+
# Verify cerberus was called by checking mock cerberus logs
52+
echo "Checking mock cerberus logs..."
53+
kubectl logs -l app=mock-cerberus --tail=50
54+
55+
# Cleanup
56+
echo "Cleaning up mock cerberus..."
57+
kubectl delete -f CI/templates/mock_cerberus.yaml || true
58+
59+
echo "========================================"
60+
echo "Cerberus functional test: Success"
61+
echo "========================================"
62+
}
63+
64+
functional_test_cerberus
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
set -xeEo pipefail
2+
3+
source CI/tests/common.sh
4+
5+
trap error ERR
6+
trap finish EXIT
7+
8+
function functional_test_cerberus_unhealthy {
9+
echo "========================================"
10+
echo "Starting Cerberus Unhealthy Test"
11+
echo "========================================"
12+
13+
# Deploy mock cerberus unhealthy server
14+
echo "Deploying mock cerberus unhealthy server..."
15+
kubectl apply -f CI/templates/mock_cerberus_unhealthy.yaml
16+
17+
# Wait for mock cerberus unhealthy pod to be ready
18+
echo "Waiting for mock cerberus unhealthy to be ready..."
19+
kubectl wait --for=condition=ready pod -l app=mock-cerberus-unhealthy --timeout=300s
20+
21+
# Verify mock cerberus service is accessible
22+
echo "Verifying mock cerberus unhealthy service..."
23+
mock_cerberus_ip=$(kubectl get service mock-cerberus-unhealthy -o jsonpath='{.spec.clusterIP}')
24+
echo "Mock Cerberus Unhealthy IP: $mock_cerberus_ip"
25+
26+
# Test cerberus endpoint from within the cluster (should return False)
27+
kubectl run cerberus-unhealthy-test --image=curlimages/curl:latest --rm -i --restart=Never -- \
28+
curl -s http://mock-cerberus-unhealthy.default.svc.cluster.local:8080/ || echo "Cerberus unhealthy test curl completed"
29+
30+
# Configure scenario for pod disruption with cerberus enabled
31+
export scenario_type="pod_disruption_scenarios"
32+
export scenario_file="scenarios/kind/pod_etcd.yml"
33+
export post_config=""
34+
35+
# Generate config with cerberus enabled
36+
envsubst < CI/config/common_test_config.yaml > CI/config/cerberus_unhealthy_test_config.yaml
37+
38+
# Enable cerberus in the config but DON'T exit_on_failure (so the test can verify the behavior)
39+
# Using yq jq-wrapper syntax with -i -y
40+
yq -y -i '.cerberus.cerberus_enabled = true' CI/config/cerberus_unhealthy_test_config.yaml
41+
yq -y -i ".cerberus.cerberus_url = \"http://${mock_cerberus_ip}:8080\"" CI/config/cerberus_unhealthy_test_config.yaml
42+
yq -y -i '.kraken.exit_on_failure = false' CI/config/cerberus_unhealthy_test_config.yaml
43+
44+
echo "========================================"
45+
echo "Cerberus Unhealthy Configuration:"
46+
yq '.cerberus' CI/config/cerberus_unhealthy_test_config.yaml
47+
echo "exit_on_failure:"
48+
yq '.kraken.exit_on_failure' CI/config/cerberus_unhealthy_test_config.yaml
49+
echo "========================================"
50+
51+
# Run kraken with cerberus unhealthy (should detect unhealthy but not exit due to exit_on_failure=false)
52+
echo "Running kraken with cerberus unhealthy integration..."
53+
54+
# We expect this to complete (not exit 1) because exit_on_failure is false
55+
# But cerberus should log that the cluster is unhealthy
56+
python3 -m coverage run -a run_kraken.py -c CI/config/cerberus_unhealthy_test_config.yaml || {
57+
exit_code=$?
58+
echo "Kraken exited with code: $exit_code"
59+
# If exit_code is 1, that's expected when cerberus reports unhealthy and exit_on_failure would be true
60+
# But since we set exit_on_failure=false, it should not exit
61+
if [ $exit_code -eq 1 ]; then
62+
echo "WARNING: Kraken exited with 1, which may indicate cerberus detected unhealthy cluster"
63+
fi
64+
}
65+
66+
# Verify cerberus was called by checking mock cerberus logs
67+
echo "Checking mock cerberus unhealthy logs..."
68+
kubectl logs -l app=mock-cerberus-unhealthy --tail=50
69+
70+
# Cleanup
71+
echo "Cleaning up mock cerberus unhealthy..."
72+
kubectl delete -f CI/templates/mock_cerberus_unhealthy.yaml || true
73+
74+
echo "========================================"
75+
echo "Cerberus unhealthy functional test: Success"
76+
echo "========================================"
77+
}
78+
79+
functional_test_cerberus_unhealthy

0 commit comments

Comments
 (0)