Skip to content

Commit fa2dd40

Browse files
Expose topology summaries on health endpoints
1 parent aac6c73 commit fa2dd40

4 files changed

Lines changed: 95 additions & 5 deletions

File tree

README.md

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -495,8 +495,8 @@ workflow-task command payload.
495495
## API Overview
496496

497497
### System
498-
- `GET /api/health` — Health check
499-
- `GET /api/ready` — Readiness check for migrations, default namespace, cache, auth config, and workflow v2 rollout-safety health
498+
- `GET /api/health` — Health check plus a machine-readable topology summary for the current node
499+
- `GET /api/ready` — Readiness check for migrations, default namespace, cache, auth config, workflow v2 rollout-safety health, and the current node topology summary
500500
- `GET /api/cluster/info` — Server capabilities, role topology, coordination-health summary, and version
501501
- `GET /api/system/health` — Full rollout-safety health snapshot for the requested namespace, including check status, categories, routing-drain state, operator metrics, and structural limits
502502
- `GET /api/system/metrics` — Server metrics including bounded stuck workflow-task diagnostics
@@ -703,7 +703,12 @@ class. Nodes that do not host the server's current HTTP control surface return
703703
`503` with `reason: "topology_role_unavailable"` on role-gated routes instead
704704
of pretending to be interchangeable HTTP peers. `GET /api/cluster/info`,
705705
`/api/health`, and `/api/ready` stay available for discovery and liveness even
706-
on scheduler-only, execution-only, or matching-only nodes.
706+
on scheduler-only, execution-only, or matching-only nodes. The unauthenticated
707+
health and readiness probes publish the current node's topology summary
708+
(`schema`, `version`, `current_shape`, `current_process_class`,
709+
`current_roles`, `execution_mode`, and `matching_role`) so operators can
710+
identify split-role nodes without authenticating into the broader
711+
`/api/cluster/info` manifest.
707712

708713
Those runtime-serving write and poll routes also fail closed on bootstrap
709714
blockers. If database connectivity or workflow-table migrations are not ready,

app/Http/Controllers/Api/HealthController.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ public function check(): JsonResponse
4747
'checks' => [
4848
'database' => $dbHealthy ? 'ok' : 'unavailable',
4949
],
50+
'topology' => ServerTopology::healthSummary(),
5051
], $dbHealthy ? 200 : 503);
5152
}
5253

@@ -59,6 +60,7 @@ public function ready(): JsonResponse
5960
'status' => $ready ? 'ready' : 'not_ready',
6061
'timestamp' => now()->toIso8601String(),
6162
'checks' => $snapshot['checks'],
63+
'topology' => ServerTopology::healthSummary(),
6264
], $ready ? 200 : 503);
6365
}
6466

app/Support/ServerTopology.php

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,24 @@ public static function info(): array
6262
];
6363
}
6464

65+
/**
66+
* @return array<string, mixed>
67+
*/
68+
public static function healthSummary(): array
69+
{
70+
$currentNode = self::currentNode();
71+
72+
return [
73+
'schema' => self::SCHEMA,
74+
'version' => self::VERSION,
75+
'current_shape' => $currentNode['shape'],
76+
'current_process_class' => $currentNode['process_class'],
77+
'current_roles' => $currentNode['roles'],
78+
'execution_mode' => self::executionMode(),
79+
'matching_role' => self::matchingRole(),
80+
];
81+
}
82+
6583
/**
6684
* @param array<string, array{process_classes: list<array{name: string, roles: list<string>}>}>|null $shapeAssignments
6785
* @return array{shape: string, process_class: string, roles: list<string>}

tests/Feature/HealthControllerTest.php

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
namespace Tests\Feature;
66

77
use App\Models\WorkflowNamespace;
8+
use App\Support\ServerTopology;
89
use Illuminate\Foundation\Testing\RefreshDatabase;
910
use Illuminate\Support\Facades\DB;
1011
use Illuminate\Support\Facades\Schema;
@@ -23,7 +24,33 @@ public function test_health_check_returns_serving_when_database_is_available():
2324
$response->assertOk()
2425
->assertJsonPath('status', 'serving')
2526
->assertJsonPath('checks.database', 'ok')
26-
->assertJsonStructure(['status', 'timestamp', 'checks' => ['database']]);
27+
->assertJsonPath('topology.schema', ServerTopology::SCHEMA)
28+
->assertJsonPath('topology.version', ServerTopology::VERSION)
29+
->assertJsonPath('topology.current_shape', 'standalone_server')
30+
->assertJsonPath('topology.current_process_class', 'server_http_node')
31+
->assertJsonPath('topology.execution_mode', 'remote_worker_protocol')
32+
->assertJsonPath('topology.matching_role.shape', 'in_worker')
33+
->assertJsonStructure([
34+
'status',
35+
'timestamp',
36+
'checks' => ['database'],
37+
'topology' => [
38+
'schema',
39+
'version',
40+
'current_shape',
41+
'current_process_class',
42+
'current_roles',
43+
'execution_mode',
44+
'matching_role' => [
45+
'queue_wake_enabled',
46+
'shape',
47+
'wake_owner',
48+
'task_dispatch_mode',
49+
'partition_primitives',
50+
'backpressure_model',
51+
],
52+
],
53+
]);
2754
}
2855

2956
public function test_health_check_returns_degraded_when_database_is_unavailable(): void
@@ -95,7 +122,45 @@ public function test_readiness_check_returns_ready_when_bootstrap_state_is_avail
95122
->assertJsonPath('checks.cache.status', 'ok')
96123
->assertJsonPath('checks.auth.status', 'ok')
97124
->assertJsonPath('checks.workflow_v2.status', 'ok')
98-
->assertJsonPath('checks.workflow_v2.http_status', 200);
125+
->assertJsonPath('checks.workflow_v2.http_status', 200)
126+
->assertJsonPath('topology.schema', ServerTopology::SCHEMA)
127+
->assertJsonPath('topology.version', ServerTopology::VERSION)
128+
->assertJsonPath('topology.current_shape', 'standalone_server')
129+
->assertJsonPath('topology.current_process_class', 'server_http_node')
130+
->assertJsonPath('topology.execution_mode', 'remote_worker_protocol')
131+
->assertJsonPath('topology.matching_role.task_dispatch_mode', 'poll');
132+
}
133+
134+
public function test_public_health_endpoints_publish_topology_for_split_execution_nodes(): void
135+
{
136+
WorkflowNamespace::query()->create([
137+
'name' => 'default',
138+
'description' => 'Default namespace',
139+
'retention_days' => 30,
140+
'status' => 'active',
141+
]);
142+
143+
config([
144+
'server.topology.shape' => 'split_control_execution',
145+
'server.topology.process_class' => 'execution_node',
146+
]);
147+
148+
$this->getJson('/api/health')
149+
->assertOk()
150+
->assertJsonPath('topology.schema', ServerTopology::SCHEMA)
151+
->assertJsonPath('topology.current_shape', 'split_control_execution')
152+
->assertJsonPath('topology.current_process_class', 'execution_node')
153+
->assertJsonPath('topology.current_roles.0', 'execution_plane')
154+
->assertJsonPath('topology.execution_mode', 'remote_worker_protocol')
155+
->assertJsonPath('topology.matching_role.backpressure_model', 'lease_ownership');
156+
157+
$this->getJson('/api/ready')
158+
->assertOk()
159+
->assertJsonPath('topology.schema', ServerTopology::SCHEMA)
160+
->assertJsonPath('topology.current_shape', 'split_control_execution')
161+
->assertJsonPath('topology.current_process_class', 'execution_node')
162+
->assertJsonPath('topology.current_roles.0', 'execution_plane')
163+
->assertJsonPath('topology.matching_role.shape', 'in_worker');
99164
}
100165

101166
public function test_readiness_check_warns_when_existing_create_table_migration_only_needs_adoption(): void

0 commit comments

Comments
 (0)