Skip to content

Commit dacc0d3

Browse files
Add Upstreams tab (#15)
1 parent 0ba6501 commit dacc0d3

19 files changed

+1668
-28
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Caddy exposes rich metrics through its admin API and Prometheus endpoint, but re
2525
- Sorting, filtering, and full-screen ASCII graphs (CPU, RPS, RSS)
2626
- Config Inspector tab: browse the live Caddy JSON config as a collapsible tree
2727
- Certificates tab: TLS certificate monitoring with expiry tracking, color-coded warnings, and likely auto-renewal indication
28+
- Upstreams tab: reverse proxy upstream health monitoring with per-upstream status, auto-detected when `reverse_proxy` is configured
2829
- Automatic Caddy restart detection
2930

3031
**FrankenPHP Introspection**

docs/caddy-dashboard.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,26 @@ Navigation in the Caddy Config tab:
6666
| `E` | Collapse all nodes |
6767
| `r` | Refresh config from Caddy |
6868

69+
## Upstreams
70+
71+
The **Upstreams** tab appears automatically when Caddy exposes `caddy_reverse_proxy_upstreams_healthy` metrics, which happens when at least one `reverse_proxy` handler is configured.
72+
73+
The table shows one row per upstream:
74+
75+
| Column | Description |
76+
|--------|-------------|
77+
| **Upstream** | Upstream address (host:port) |
78+
| **Check** | Active health check URI and interval (e.g. `/health @5s`), extracted from Caddy config |
79+
| **LB** | Load balancing policy (e.g. `round_robin`, `least_conn`), extracted from Caddy config |
80+
| **Health** | Health status: `● healthy` or `○ down` |
81+
| **Down** | Duration since the upstream went down (e.g. `5s`, `2m30s`, `1h5m`) |
82+
83+
A `!` suffix on the health status indicates a state change since the previous poll (e.g. an upstream just went down or recovered).
84+
85+
The Check and LB columns are populated from the Caddy config when the tab first appears. Press `r` to refresh the config data.
86+
87+
Press `s`/`S` to sort by address or health status. Press `/` to filter by address or handler name.
88+
6989
## Graphs
7090

7191
Press `g` to toggle full-screen graphs showing:

docs/json-output.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,11 @@ Each line is a JSON object with the following fields:
8181
| `hosts[].statusCodes` | Status code → rate (req/s) |
8282
| `hosts[].methodRates` | HTTP method → rate (req/s) |
8383
| `hosts[].avgRequestSize` | Average request body size in bytes (omitted when 0) |
84+
| `upstreams` | Reverse proxy upstream health (omitted when no `reverse_proxy` is configured) |
85+
| `upstreams[].address` | Upstream address (host:port) |
86+
| `upstreams[].handler` | Reverse proxy handler name (omitted when Caddy doesn't expose the label) |
87+
| `upstreams[].healthy` | Whether the upstream is healthy |
88+
| `upstreams[].healthChanged` | Whether health status changed since last poll (omitted when false) |
8489

8590
## Single Snapshot
8691

internal/app/json.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,14 @@ type jsonOutput struct {
2020
Errors []string `json:"errors,omitempty"`
2121
Derived *jsonDerived `json:"derived,omitempty"`
2222
Hosts []jsonHost `json:"hosts,omitempty"`
23+
Upstreams []jsonUpstream `json:"upstreams,omitempty"`
24+
}
25+
26+
type jsonUpstream struct {
27+
Address string `json:"address"`
28+
Handler string `json:"handler,omitempty"`
29+
Healthy bool `json:"healthy"`
30+
HealthChanged bool `json:"healthChanged,omitempty"`
2331
}
2432

2533
type jsonThreadsResponse struct {
@@ -146,6 +154,15 @@ func buildJSONOutput(snap *fetcher.Snapshot, state *model.State) jsonOutput {
146154
out.Hosts = append(out.Hosts, jh)
147155
}
148156

157+
for _, ud := range state.UpstreamDerived {
158+
out.Upstreams = append(out.Upstreams, jsonUpstream{
159+
Address: ud.Address,
160+
Handler: ud.Handler,
161+
Healthy: ud.Healthy,
162+
HealthChanged: ud.HealthChanged,
163+
})
164+
}
165+
149166
sanitizeForJSON(&out)
150167

151168
return out

internal/app/status.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,16 @@ func formatStatusLine(state *model.State, hasFrankenPHP bool) string {
174174
parts = append(parts, fmt.Sprintf("up %s", model.FormatUptime(snap.Process.Uptime)))
175175
}
176176

177+
if upCount := len(snap.Metrics.Upstreams); upCount > 0 {
178+
healthy := 0
179+
for _, u := range snap.Metrics.Upstreams {
180+
if u.Healthy >= 1 {
181+
healthy++
182+
}
183+
}
184+
parts = append(parts, fmt.Sprintf("%d/%d upstreams healthy", healthy, upCount))
185+
}
186+
177187
if hasFrankenPHP {
178188
total := d.TotalBusy + d.TotalIdle
179189
fpPart := fmt.Sprintf("FrankenPHP %d/%d busy", d.TotalBusy, total)

internal/fetcher/fetcher.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,19 @@ type WorkerMetrics struct {
3737
QueueDepth float64 `json:"queueDepth"`
3838
}
3939

40+
// UpstreamMetrics represents a single Caddy reverse proxy upstream health entry.
41+
// Address is the dial target (e.g. "backend1:80"). It is not unique on its own
42+
// when Caddy exports the same address from multiple handlers: in that case the
43+
// parser disambiguates by combining Address and Handler, so consumers that need
44+
// a stable identity should use both fields together. Handler is empty when
45+
// Caddy omits the label (the common case for caddy_reverse_proxy_upstreams_healthy).
46+
// Healthy is 1.0 when healthy, 0.0 when down.
47+
type UpstreamMetrics struct {
48+
Address string `json:"address"`
49+
Handler string `json:"handler,omitempty"`
50+
Healthy float64 `json:"healthy"`
51+
}
52+
4053
type HostMetrics struct {
4154
Host string `json:"host"`
4255
RequestsTotal float64 `json:"requestsTotal"`
@@ -75,6 +88,9 @@ type MetricsSnapshot struct {
7588
// Per-host Caddy HTTP metrics
7689
Hosts map[string]*HostMetrics `json:"hosts,omitempty"`
7790

91+
// Caddy reverse proxy upstream health
92+
Upstreams map[string]*UpstreamMetrics `json:"upstreams,omitempty"`
93+
7894
// Go runtime process metrics (from standard Prometheus collector)
7995
ProcessCPUSecondsTotal float64 `json:"processCpuSecondsTotal,omitempty"`
8096
ProcessRSSBytes float64 `json:"processRssBytes,omitempty"`
@@ -121,6 +137,19 @@ type CertificateInfo struct {
121137
AutoRenew bool
122138
}
123139

140+
type ReverseProxyConfig struct {
141+
Handler string
142+
LBPolicy string
143+
HealthURI string
144+
HealthInterval string
145+
Upstreams []ReverseProxyUpstreamConfig
146+
}
147+
148+
type ReverseProxyUpstreamConfig struct {
149+
Address string
150+
MaxRequests int
151+
}
152+
124153
type Fetcher interface {
125154
Fetch(ctx context.Context) (*Snapshot, error)
126155
}

internal/fetcher/prometheus.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ func parsePrometheusMetrics(r io.Reader) (snap MetricsSnapshot, err error) {
8080
snap.ConfigLastReloadSuccessTimestamp = scalarValue(families, "caddy_config_last_reload_success_timestamp_seconds")
8181

8282
snap.Hosts = perHostMetrics(families)
83+
snap.Upstreams = upstreamMetrics(families)
8384

8485
// Fallback: if HTTP metrics exist but no host labels, aggregate as a single "*" entry
8586
if snap.HasHTTPMetrics && len(snap.Hosts) == 0 {
@@ -395,6 +396,36 @@ func perHostMetrics(families map[string]*dto.MetricFamily) map[string]*HostMetri
395396
return hosts
396397
}
397398

399+
func upstreamMetrics(families map[string]*dto.MetricFamily) map[string]*UpstreamMetrics {
400+
fam, ok := families["caddy_reverse_proxy_upstreams_healthy"]
401+
if !ok {
402+
return nil
403+
}
404+
405+
upstreams := make(map[string]*UpstreamMetrics)
406+
for _, m := range fam.GetMetric() {
407+
addr := labelValue(m, "upstream")
408+
if addr == "" {
409+
continue
410+
}
411+
handler := labelValue(m, "handler")
412+
key := addr
413+
if handler != "" {
414+
key = addr + "/" + handler
415+
}
416+
upstreams[key] = &UpstreamMetrics{
417+
Address: addr,
418+
Handler: handler,
419+
Healthy: metricValue(m),
420+
}
421+
}
422+
423+
if len(upstreams) == 0 {
424+
return nil
425+
}
426+
return upstreams
427+
}
428+
398429
func (s *MetricsSnapshot) getOrCreateWorker(name string) *WorkerMetrics {
399430
wm, ok := s.Workers[name]
400431
if !ok {

internal/fetcher/prometheus_test.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -734,3 +734,74 @@ frankenphp_busy_threads 5
734734
assert.Equal(t, float64(0), snap.ProcessRSSBytes)
735735
assert.Equal(t, float64(0), snap.ProcessStartTimeSeconds)
736736
}
737+
738+
// Caddy exports caddy_reverse_proxy_upstreams_healthy with only two labels:
739+
// upstream and handler. The current release even omits handler entirely, so this
740+
// fixture exercises both shapes.
741+
const sampleUpstreamMetrics = `# HELP caddy_reverse_proxy_upstreams_healthy Health status of reverse proxy upstreams
742+
# TYPE caddy_reverse_proxy_upstreams_healthy gauge
743+
caddy_reverse_proxy_upstreams_healthy{upstream="10.0.0.1:8080"} 1
744+
caddy_reverse_proxy_upstreams_healthy{upstream="10.0.0.2:8080"} 1
745+
caddy_reverse_proxy_upstreams_healthy{upstream="10.0.0.3:8080"} 0
746+
caddy_reverse_proxy_upstreams_healthy{handler="reverse_proxy_1",upstream="api.internal:9090"} 1
747+
`
748+
749+
func TestParsePrometheusMetrics_Upstreams(t *testing.T) {
750+
snap, err := parsePrometheusMetrics(strings.NewReader(sampleUpstreamMetrics))
751+
require.NoError(t, err)
752+
require.Len(t, snap.Upstreams, 4)
753+
754+
u1 := snap.Upstreams["10.0.0.1:8080"]
755+
require.NotNil(t, u1)
756+
assert.Equal(t, "10.0.0.1:8080", u1.Address)
757+
assert.Empty(t, u1.Handler, "current Caddy omits the handler label")
758+
assert.Equal(t, float64(1), u1.Healthy)
759+
760+
u3 := snap.Upstreams["10.0.0.3:8080"]
761+
require.NotNil(t, u3)
762+
assert.Equal(t, float64(0), u3.Healthy)
763+
764+
api := snap.Upstreams["api.internal:9090/reverse_proxy_1"]
765+
require.NotNil(t, api)
766+
assert.Equal(t, "api.internal:9090", api.Address)
767+
assert.Equal(t, "reverse_proxy_1", api.Handler)
768+
assert.Equal(t, float64(1), api.Healthy)
769+
}
770+
771+
func TestParsePrometheusMetrics_NoUpstreams(t *testing.T) {
772+
snap, err := parsePrometheusMetrics(strings.NewReader(sampleMetrics))
773+
require.NoError(t, err)
774+
assert.Nil(t, snap.Upstreams)
775+
}
776+
777+
func TestParsePrometheusMetrics_UpstreamsWithHandlerLabel(t *testing.T) {
778+
input := `# TYPE caddy_reverse_proxy_upstreams_healthy gauge
779+
caddy_reverse_proxy_upstreams_healthy{handler="rp_0",upstream="a:80"} 1
780+
caddy_reverse_proxy_upstreams_healthy{handler="rp_1",upstream="a:80"} 0
781+
`
782+
snap, err := parsePrometheusMetrics(strings.NewReader(input))
783+
require.NoError(t, err)
784+
require.Len(t, snap.Upstreams, 2, "same address with different handlers should be distinct entries")
785+
786+
u0 := snap.Upstreams["a:80/rp_0"]
787+
require.NotNil(t, u0)
788+
assert.Equal(t, "rp_0", u0.Handler)
789+
assert.Equal(t, float64(1), u0.Healthy)
790+
791+
u1 := snap.Upstreams["a:80/rp_1"]
792+
require.NotNil(t, u1)
793+
assert.Equal(t, "rp_1", u1.Handler)
794+
assert.Equal(t, float64(0), u1.Healthy)
795+
}
796+
797+
func TestParsePrometheusMetrics_UpstreamsEmptyLabel(t *testing.T) {
798+
input := `# HELP caddy_reverse_proxy_upstreams_healthy Health status
799+
# TYPE caddy_reverse_proxy_upstreams_healthy gauge
800+
caddy_reverse_proxy_upstreams_healthy{handler="rp",upstream=""} 1
801+
caddy_reverse_proxy_upstreams_healthy{handler="rp",upstream="valid:8080"} 1
802+
`
803+
snap, err := parsePrometheusMetrics(strings.NewReader(input))
804+
require.NoError(t, err)
805+
require.Len(t, snap.Upstreams, 1)
806+
assert.Contains(t, snap.Upstreams, "valid:8080/rp")
807+
}

0 commit comments

Comments
 (0)