Skip to content

Commit 604196d

Browse files
Add Upstreams tab
1 parent 096d82a commit 604196d

22 files changed

+1607
-28
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Caddy exposes rich metrics through its admin API and Prometheus endpoint, but re
2525
- Sorting, filtering, and full-screen ASCII graphs (CPU, RPS, RSS)
2626
- Config Inspector tab: browse the live Caddy JSON config as a collapsible tree
2727
- Certificates tab: TLS certificate monitoring with expiry tracking, color-coded warnings, and likely auto-renewal indication
28+
- Upstreams tab: reverse proxy upstream health monitoring with per-upstream status, auto-detected when `reverse_proxy` is configured
2829
- Automatic Caddy restart detection
2930

3031
**FrankenPHP Introspection**

docs/caddy-dashboard.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,26 @@ Navigation in the Caddy Config tab:
6666
| `E` | Collapse all nodes |
6767
| `r` | Refresh config from Caddy |
6868

69+
## Upstreams
70+
71+
The **Upstreams** tab appears automatically when Caddy exposes `caddy_reverse_proxy_upstreams_healthy` metrics, which happens when at least one `reverse_proxy` handler is configured.
72+
73+
The table shows one row per upstream:
74+
75+
| Column | Description |
76+
|--------|-------------|
77+
| **Upstream** | Upstream address (host:port) |
78+
| **Check** | Active health check URI and interval (e.g. `/ @2s`), extracted from Caddy config |
79+
| **LB** | Load balancing policy (e.g. `round_robin`, `least_conn`), extracted from Caddy config |
80+
| **Health** | Health status: `● healthy` or `○ down` |
81+
| **Down** | Duration since the upstream went down (e.g. `5s`, `2m30s`, `1h5m`) |
82+
83+
A `!` suffix on the health status indicates a state change since the previous poll (e.g. an upstream just went down or recovered).
84+
85+
The Check and LB columns are populated from the Caddy config when the tab first appears. Press `r` to refresh the config data.
86+
87+
Press `s`/`S` to sort by address or health status. Press `/` to filter by address or handler name.
88+
6989
## Graphs
7090

7191
Press `g` to toggle full-screen graphs showing:

docs/json-output.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,11 @@ Each line is a JSON object with the following fields:
8181
| `hosts[].statusCodes` | Status code → rate (req/s) |
8282
| `hosts[].methodRates` | HTTP method → rate (req/s) |
8383
| `hosts[].avgRequestSize` | Average request body size in bytes (omitted when 0) |
84+
| `upstreams` | Reverse proxy upstream health (omitted when no `reverse_proxy` is configured) |
85+
| `upstreams[].address` | Upstream address (host:port) |
86+
| `upstreams[].handler` | Reverse proxy handler name |
87+
| `upstreams[].healthy` | Whether the upstream is healthy |
88+
| `upstreams[].healthChanged` | Whether health status changed since last poll (omitted when false) |
8489

8590
## Single Snapshot
8691

docs/prometheus-export.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,14 @@ This re-reads `--ca-cert`, `--client-cert`, and `--client-key` files and applies
9090
| `ember_host_status_rate` | gauge | `host`, `class` (`2xx`, `3xx`, `4xx`, `5xx`) | Request rate by status class |
9191
| `ember_host_error_rate` | gauge | `host` | Middleware error rate (handler-level errors, distinct from HTTP status codes) |
9292

93+
### Caddy Reverse Proxy Upstreams
94+
95+
| Metric | Type | Labels | Description |
96+
|--------|------|--------|-------------|
97+
| `caddy_reverse_proxy_upstream_healthy` | gauge | `upstream`, `handler` | Whether the upstream is healthy (1) or down (0) |
98+
99+
These metrics are only emitted when Caddy has at least one `reverse_proxy` handler configured.
100+
93101
### Caddy Config Reload
94102

95103
| Metric | Type | Description |

internal/app/json.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,14 @@ type jsonOutput struct {
2020
Errors []string `json:"errors,omitempty"`
2121
Derived *jsonDerived `json:"derived,omitempty"`
2222
Hosts []jsonHost `json:"hosts,omitempty"`
23+
Upstreams []jsonUpstream `json:"upstreams,omitempty"`
24+
}
25+
26+
type jsonUpstream struct {
27+
Address string `json:"address"`
28+
Handler string `json:"handler"`
29+
Healthy bool `json:"healthy"`
30+
HealthChanged bool `json:"healthChanged,omitempty"`
2331
}
2432

2533
type jsonThreadsResponse struct {
@@ -146,6 +154,15 @@ func buildJSONOutput(snap *fetcher.Snapshot, state *model.State) jsonOutput {
146154
out.Hosts = append(out.Hosts, jh)
147155
}
148156

157+
for _, ud := range state.UpstreamDerived {
158+
out.Upstreams = append(out.Upstreams, jsonUpstream{
159+
Address: ud.Address,
160+
Handler: ud.Handler,
161+
Healthy: ud.Healthy,
162+
HealthChanged: ud.HealthChanged,
163+
})
164+
}
165+
149166
sanitizeForJSON(&out)
150167

151168
return out

internal/app/status.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,16 @@ func formatStatusLine(state *model.State, hasFrankenPHP bool) string {
174174
parts = append(parts, fmt.Sprintf("up %s", model.FormatUptime(snap.Process.Uptime)))
175175
}
176176

177+
if upCount := len(snap.Metrics.Upstreams); upCount > 0 {
178+
healthy := 0
179+
for _, u := range snap.Metrics.Upstreams {
180+
if u.Healthy >= 1 {
181+
healthy++
182+
}
183+
}
184+
parts = append(parts, fmt.Sprintf("%d/%d upstreams healthy", healthy, upCount))
185+
}
186+
177187
if hasFrankenPHP {
178188
total := d.TotalBusy + d.TotalIdle
179189
fpPart := fmt.Sprintf("FrankenPHP %d/%d busy", d.TotalBusy, total)

internal/exporter/exporter.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ func Handler(holder *StateHolder, prefix ...string) http.HandlerFunc {
5151
writeThreadMemory(w, &s, p)
5252
writeWorkerMetrics(w, &s, p)
5353
writeHostMetrics(w, &s, p)
54+
writeUpstreamMetrics(w, &s, p)
5455
writeErrorMetrics(w, &s, p)
5556
writePercentiles(w, &s, p)
5657
writeProcessMetrics(w, &s, p)
@@ -245,6 +246,32 @@ func sortedHostNames(hosts []model.HostDerived) []model.HostDerived {
245246
return sorted
246247
}
247248

249+
func writeUpstreamMetrics(w http.ResponseWriter, s *model.State, prefix string) {
250+
if len(s.UpstreamDerived) == 0 {
251+
return
252+
}
253+
254+
sorted := make([]model.UpstreamDerived, len(s.UpstreamDerived))
255+
copy(sorted, s.UpstreamDerived)
256+
slices.SortFunc(sorted, func(a, b model.UpstreamDerived) int {
257+
if c := cmp.Compare(a.Address, b.Address); c != 0 {
258+
return c
259+
}
260+
return cmp.Compare(a.Handler, b.Handler)
261+
})
262+
263+
name := prefixed(prefix, "caddy_reverse_proxy_upstream_healthy")
264+
fmt.Fprintf(w, "# HELP %s Whether the upstream is healthy\n", name)
265+
fmt.Fprintf(w, "# TYPE %s gauge\n", name)
266+
for _, u := range sorted {
267+
v := 0
268+
if u.Healthy {
269+
v = 1
270+
}
271+
fmt.Fprintf(w, "%s{upstream=\"%s\",handler=\"%s\"} %d\n", name, escapeLabelValue(u.Address), escapeLabelValue(u.Handler), v)
272+
}
273+
}
274+
248275
func writeErrorMetrics(w http.ResponseWriter, s *model.State, prefix string) {
249276
hasErrors := false
250277
for _, hd := range s.HostDerived {

internal/exporter/exporter_test.go

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -672,3 +672,83 @@ func TestBasicAuth_InvalidUser(t *testing.T) {
672672

673673
assert.Equal(t, http.StatusUnauthorized, rec.Code)
674674
}
675+
676+
func stateWithUpstreams(upstreams []model.UpstreamDerived) model.State {
677+
s := stateWithThreads(nil, nil)
678+
s.UpstreamDerived = upstreams
679+
return s
680+
}
681+
682+
func TestHandler_UpstreamMetrics(t *testing.T) {
683+
upstreams := []model.UpstreamDerived{
684+
{Address: "10.0.0.1:8080", Handler: "rp", Healthy: true},
685+
{Address: "10.0.0.2:8080", Handler: "rp", Healthy: false},
686+
{Address: "api.internal:9090", Handler: "rp_1", Healthy: true},
687+
}
688+
holder := &StateHolder{}
689+
holder.Store(stateWithUpstreams(upstreams))
690+
691+
body := get(holder).Body.String()
692+
693+
assert.Contains(t, body, "# HELP caddy_reverse_proxy_upstream_healthy")
694+
assert.Contains(t, body, "# TYPE caddy_reverse_proxy_upstream_healthy gauge")
695+
assert.Contains(t, body, `caddy_reverse_proxy_upstream_healthy{upstream="10.0.0.1:8080",handler="rp"} 1`)
696+
assert.Contains(t, body, `caddy_reverse_proxy_upstream_healthy{upstream="10.0.0.2:8080",handler="rp"} 0`)
697+
assert.Contains(t, body, `caddy_reverse_proxy_upstream_healthy{upstream="api.internal:9090",handler="rp_1"} 1`)
698+
}
699+
700+
func TestHandler_UpstreamMetrics_SkippedWhenEmpty(t *testing.T) {
701+
holder := &StateHolder{}
702+
holder.Store(stateWithThreads(nil, nil))
703+
704+
body := get(holder).Body.String()
705+
assert.NotContains(t, body, "caddy_reverse_proxy_upstream_healthy")
706+
}
707+
708+
func TestHandler_UpstreamMetrics_SortedDeterministic(t *testing.T) {
709+
upstreams := []model.UpstreamDerived{
710+
{Address: "z.internal:8080", Handler: "rp", Healthy: true},
711+
{Address: "a.internal:8080", Handler: "rp", Healthy: true},
712+
{Address: "m.internal:8080", Handler: "rp", Healthy: false},
713+
}
714+
holder := &StateHolder{}
715+
holder.Store(stateWithUpstreams(upstreams))
716+
717+
body1 := get(holder).Body.String()
718+
body2 := get(holder).Body.String()
719+
require.Equal(t, body1, body2, "output should be deterministic")
720+
}
721+
722+
func TestHandler_UpstreamMetrics_ValidPrometheus(t *testing.T) {
723+
upstreams := []model.UpstreamDerived{
724+
{Address: "10.0.0.1:8080", Handler: "rp", Healthy: true},
725+
{Address: "10.0.0.2:8080", Handler: "rp", Healthy: false},
726+
}
727+
holder := &StateHolder{}
728+
holder.Store(stateWithUpstreams(upstreams))
729+
730+
rec := get(holder)
731+
require.Equal(t, http.StatusOK, rec.Code)
732+
733+
parser := expfmt.NewTextParser(prommodel.UTF8Validation)
734+
families, err := parser.TextToMetricFamilies(rec.Body)
735+
require.NoError(t, err, "output must be valid Prometheus text format")
736+
assert.Contains(t, families, "caddy_reverse_proxy_upstream_healthy")
737+
}
738+
739+
func TestHandler_UpstreamMetrics_WithPrefix(t *testing.T) {
740+
upstreams := []model.UpstreamDerived{
741+
{Address: "10.0.0.1:8080", Handler: "rp", Healthy: true},
742+
}
743+
holder := &StateHolder{}
744+
holder.Store(stateWithUpstreams(upstreams))
745+
746+
rec := getWithPrefix(holder, "prod")
747+
require.Equal(t, http.StatusOK, rec.Code)
748+
749+
parser := expfmt.NewTextParser(prommodel.UTF8Validation)
750+
families, err := parser.TextToMetricFamilies(rec.Body)
751+
require.NoError(t, err)
752+
assert.Contains(t, families, "prod_caddy_reverse_proxy_upstream_healthy")
753+
assert.NotContains(t, families, "caddy_reverse_proxy_upstream_healthy")
754+
}

internal/fetcher/fetcher.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,19 @@ type WorkerMetrics struct {
3737
QueueDepth float64 `json:"queueDepth"`
3838
}
3939

40+
// UpstreamMetrics represents a single Caddy reverse proxy upstream health entry.
41+
// Address is the dial target (e.g. "backend1:80"). It is not unique on its own
42+
// when Caddy exports the same address from multiple handlers: in that case the
43+
// parser disambiguates by combining Address and Handler, so consumers that need
44+
// a stable identity should use both fields together. Handler is empty when
45+
// Caddy omits the label (the common case for caddy_reverse_proxy_upstreams_healthy).
46+
// Healthy is 1.0 when healthy, 0.0 when down.
47+
type UpstreamMetrics struct {
48+
Address string `json:"address"`
49+
Handler string `json:"handler,omitempty"`
50+
Healthy float64 `json:"healthy"`
51+
}
52+
4053
type HostMetrics struct {
4154
Host string `json:"host"`
4255
RequestsTotal float64 `json:"requestsTotal"`
@@ -75,6 +88,9 @@ type MetricsSnapshot struct {
7588
// Per-host Caddy HTTP metrics
7689
Hosts map[string]*HostMetrics `json:"hosts,omitempty"`
7790

91+
// Caddy reverse proxy upstream health
92+
Upstreams map[string]*UpstreamMetrics `json:"upstreams,omitempty"`
93+
7894
// Go runtime process metrics (from standard Prometheus collector)
7995
ProcessCPUSecondsTotal float64 `json:"processCpuSecondsTotal,omitempty"`
8096
ProcessRSSBytes float64 `json:"processRssBytes,omitempty"`
@@ -121,6 +137,19 @@ type CertificateInfo struct {
121137
AutoRenew bool
122138
}
123139

140+
type ReverseProxyConfig struct {
141+
Handler string
142+
LBPolicy string
143+
HealthURI string
144+
HealthInterval string
145+
Upstreams []ReverseProxyUpstreamConfig
146+
}
147+
148+
type ReverseProxyUpstreamConfig struct {
149+
Address string
150+
MaxRequests int
151+
}
152+
124153
type Fetcher interface {
125154
Fetch(ctx context.Context) (*Snapshot, error)
126155
}

internal/fetcher/prometheus.go

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,9 @@ func parsePrometheusMetrics(r io.Reader) (snap MetricsSnapshot, err error) {
2626
}
2727

2828
snap = MetricsSnapshot{
29-
Workers: make(map[string]*WorkerMetrics),
30-
Hosts: make(map[string]*HostMetrics),
29+
Workers: make(map[string]*WorkerMetrics),
30+
Hosts: make(map[string]*HostMetrics),
31+
Upstreams: make(map[string]*UpstreamMetrics),
3132
}
3233

3334
snap.TotalThreads = scalarValue(families, "frankenphp_total_threads")
@@ -80,6 +81,7 @@ func parsePrometheusMetrics(r io.Reader) (snap MetricsSnapshot, err error) {
8081
snap.ConfigLastReloadSuccessTimestamp = scalarValue(families, "caddy_config_last_reload_success_timestamp_seconds")
8182

8283
snap.Hosts = perHostMetrics(families)
84+
snap.Upstreams = upstreamMetrics(families)
8385

8486
// Fallback: if HTTP metrics exist but no host labels, aggregate as a single "*" entry
8587
if snap.HasHTTPMetrics && len(snap.Hosts) == 0 {
@@ -395,6 +397,36 @@ func perHostMetrics(families map[string]*dto.MetricFamily) map[string]*HostMetri
395397
return hosts
396398
}
397399

400+
func upstreamMetrics(families map[string]*dto.MetricFamily) map[string]*UpstreamMetrics {
401+
fam, ok := families["caddy_reverse_proxy_upstreams_healthy"]
402+
if !ok {
403+
return nil
404+
}
405+
406+
upstreams := make(map[string]*UpstreamMetrics)
407+
for _, m := range fam.GetMetric() {
408+
addr := labelValue(m, "upstream")
409+
if addr == "" {
410+
continue
411+
}
412+
handler := labelValue(m, "handler")
413+
key := addr
414+
if handler != "" {
415+
key = addr + "/" + handler
416+
}
417+
upstreams[key] = &UpstreamMetrics{
418+
Address: addr,
419+
Handler: handler,
420+
Healthy: metricValue(m),
421+
}
422+
}
423+
424+
if len(upstreams) == 0 {
425+
return nil
426+
}
427+
return upstreams
428+
}
429+
398430
func (s *MetricsSnapshot) getOrCreateWorker(name string) *WorkerMetrics {
399431
wm, ok := s.Workers[name]
400432
if !ok {

0 commit comments

Comments
 (0)