Skip to content

Commit 3acd7a5

Browse files
authored
feat(controller,ui): experimental Envoy credential injector behind per-instance opt-in flag (#346)
Signed-off-by: Tomas Pilar <thomas7pilar@gmail.com>
1 parent a92ed23 commit 3acd7a5

37 files changed

Lines changed: 2054 additions & 242 deletions

File tree

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
{{- if and .Values.controller .Values.controller.envoyMitm .Values.controller.envoyMitm.enabled }}
2+
{{/*
3+
The humr MITM CA. Self-signed via the bootstrap issuer; the resulting Secret
4+
in cert-manager's cluster-resource-namespace backs the humr-mitm-ca-issuer
5+
ClusterIssuer.
6+
7+
cert-manager only honours `ca:` ClusterIssuers that reference a Secret in the
8+
namespace passed to its --cluster-resource-namespace flag (default
9+
`cert-manager`). Override .Values.controller.envoyMitm.caSecretNamespace to
10+
match if your install differs.
11+
*/}}
12+
apiVersion: cert-manager.io/v1
13+
kind: Certificate
14+
metadata:
15+
name: {{ .Values.controller.envoyMitm.caSecretName }}
16+
namespace: {{ .Values.controller.envoyMitm.caSecretNamespace }}
17+
labels:
18+
{{- include "humr.labels" . | nindent 4 }}
19+
app.kubernetes.io/component: envoy-mitm
20+
spec:
21+
isCA: true
22+
commonName: humr MITM CA
23+
secretName: {{ .Values.controller.envoyMitm.caSecretName }}
24+
duration: {{ .Values.controller.envoyMitm.caDuration }}
25+
renewBefore: {{ .Values.controller.envoyMitm.caRenewBefore }}
26+
privateKey:
27+
algorithm: ECDSA
28+
size: 256
29+
issuerRef:
30+
name: {{ .Values.controller.envoyMitm.bootstrapIssuerName }}
31+
kind: ClusterIssuer
32+
group: cert-manager.io
33+
{{- end }}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{{- if and .Values.controller .Values.controller.envoyMitm .Values.controller.envoyMitm.enabled }}
2+
{{/*
3+
Cluster issuers backing the experimental Envoy credential-injector path
4+
(ADR-033). The bootstrap issuer signs the humr-mitm-ca Certificate; the CA
5+
issuer then signs per-instance leaf certs that Envoy uses to terminate the
6+
agent's TLS for credential injection.
7+
8+
ClusterIssuers are cluster-scoped — names collide globally. Override via
9+
.Values.controller.envoyMitm.{bootstrapIssuerName,caIssuerName} if needed.
10+
*/}}
11+
apiVersion: cert-manager.io/v1
12+
kind: ClusterIssuer
13+
metadata:
14+
name: {{ .Values.controller.envoyMitm.bootstrapIssuerName }}
15+
labels:
16+
{{- include "humr.labels" . | nindent 4 }}
17+
app.kubernetes.io/component: envoy-mitm
18+
spec:
19+
selfSigned: {}
20+
---
21+
apiVersion: cert-manager.io/v1
22+
kind: ClusterIssuer
23+
metadata:
24+
name: {{ .Values.controller.envoyMitm.caIssuerName }}
25+
labels:
26+
{{- include "humr.labels" . | nindent 4 }}
27+
app.kubernetes.io/component: envoy-mitm
28+
spec:
29+
ca:
30+
secretName: {{ .Values.controller.envoyMitm.caSecretName }}
31+
{{- end }}

deploy/helm/humr/templates/controller/deployment.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ spec:
7777
value: {{ .Values.controller.idleTimeout | default "1h" | quote }}
7878
- name: HUMR_TERMINATION_GRACE_PERIOD
7979
value: {{ .Values.controller.terminationGracePeriod | default 5 | quote }}
80+
- name: ENVOY_IMAGE
81+
value: {{ .Values.controller.envoyImage | default "envoyproxy/envoy-distroless:v1.32.0" | quote }}
82+
- name: ENVOY_PORT
83+
value: {{ .Values.controller.envoyPort | default 10000 | quote }}
8084
- name: POD_NAME
8185
valueFrom:
8286
fieldRef:

deploy/helm/humr/templates/controller/rbac.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ rules:
6060
- apiGroups: ["coordination.k8s.io"]
6161
resources: ["leases"]
6262
verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
63+
{{- if and .Values.controller .Values.controller.envoyMitm .Values.controller.envoyMitm.enabled }}
64+
- apiGroups: ["cert-manager.io"]
65+
resources: ["certificates"]
66+
verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
67+
{{- end }}
6368
---
6469
apiVersion: rbac.authorization.k8s.io/v1
6570
{{- if $clusterScoped }}

deploy/helm/humr/values.yaml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,33 @@ controller:
267267
idleTimeout: "1h"
268268
# -- Termination grace period in seconds for agent pods
269269
terminationGracePeriod: 5
270+
# -- Image for the experimental Envoy credential-injector sidecar (ADR-033). Renders
271+
# only on instances with `experimentalCredentialInjector: true`.
272+
envoyImage: envoyproxy/envoy-distroless:v1.32.0
273+
# -- Port the Envoy sidecar listens on inside the agent pod (proxy on 127.0.0.1).
274+
envoyPort: 10000
275+
# -- TLS interception support for the experimental credential injector. When enabled,
276+
# the chart provisions a self-signed CA via cert-manager and a CA ClusterIssuer
277+
# (`humr-mitm-ca-issuer`) that the controller uses to mint per-instance leaf
278+
# certs for Envoy to terminate the agent's TLS and inject credential headers.
279+
# Requires cert-manager to be installed in the cluster (humr's cluster:install
280+
# task does this).
281+
envoyMitm:
282+
enabled: true
283+
# -- Names of the bootstrap and CA cluster issuers. Cluster-scoped, so consider
284+
# collisions with other charts.
285+
bootstrapIssuerName: humr-selfsigned-bootstrap
286+
caIssuerName: humr-mitm-ca-issuer
287+
# -- Where the CA Secret lives. Must equal cert-manager's
288+
# --cluster-resource-namespace flag (default `cert-manager`).
289+
caSecretNamespace: cert-manager
290+
caSecretName: humr-mitm-ca
291+
# -- CA validity. cert-manager will renew before expiry automatically.
292+
caDuration: 87600h # 10y
293+
caRenewBefore: 720h # 30d
294+
# -- Per-instance leaf cert validity.
295+
leafDuration: 2160h # 90d
296+
leafRenewBefore: 360h # 15d
270297
resources:
271298
requests:
272299
cpu: 100m

deploy/helm/tasks.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ run = "helm lint ."
77

88
["helm:check:render"]
99
dir = "{{config_root}}/deploy/helm/humr"
10-
run = "helm template humr . | kubeconform -strict -summary -skip Certificate,Issuer"
10+
run = "helm template humr . | kubeconform -strict -summary -skip Certificate,Issuer,ClusterIssuer"
1111

1212
["helm:setup"]
1313
alias = "helm:build"

docs/architecture/platform-topology.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Platform topology
22

3-
Last verified: 2026-04-27
3+
Last verified: 2026-04-28
44

55
## Motivated by
66

@@ -12,6 +12,7 @@ Last verified: 2026-04-27
1212
- [ADR-012 — Runtime lifetime](../adrs/012-runtime-lifetime.md) — single-use spawn/hibernate model
1313
- [ADR-022 — Harness API server](../adrs/022-harness-api-server.md) — separate port with a restricted, internal-only surface
1414
- [ADR-023 — Harness-agnostic agent base image](../adrs/023-harness-agnostic-base-image.md)`AGENT_COMMAND` contract
15+
- [ADR-033 — Envoy-based credential gateway](../adrs/033-envoy-credential-gateway.md) — experimental per-pod sidecar replaces OneCLI on the wire
1516

1617
## Overview
1718

@@ -104,5 +105,5 @@ For each `agent-instance`, the controller reconciles a StatefulSet (replicas 0 w
104105
- **Spec/status ownership.** Controller never writes `spec.yaml`; api-server never writes `status.yaml`. Write contention between the two is impossible by convention.
105106
- **Relay-only ACP.** All ACP traffic is proxied through the api-server. Agent pods do not accept ACP connections from outside the cluster and the UI never dials pods directly.
106107
- **Two-port api-server.** The public port is user-authenticated; the harness port is cluster-internal and has no user authentication. They do not share routes.
107-
- **Credential isolation.** Agent pods never hold real upstream credentials — only a delegated OneCLI access token. Upstream tokens are injected on the wire by OneCLI. See [security-and-credentials](security-and-credentials.md).
108+
- **Credential isolation.** Agent pods never hold real upstream credentials. By default, egress flows through OneCLI's MITM gateway, which swaps a delegated OneCLI access token for the real upstream credential on the wire. Instances with the experimental `experimentalCredentialInjector` flag (ADR-033) take a different path: an Envoy sidecar in the pod intercepts agent TLS using a per-instance leaf cert and injects the credential header from a Secret mounted only into the sidecar — the agent container itself still never sees the upstream credential. See [security-and-credentials](security-and-credentials.md).
108109
- **Atomic triggers.** Trigger files are delivered via write-temp + rename so the agent's trigger watcher never reads a partial file.

docs/architecture/security-and-credentials.md

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Security and credentials
22

3-
Last verified: 2026-04-27
3+
Last verified: 2026-04-28
44

55
## Motivated by
66

@@ -11,6 +11,7 @@ Last verified: 2026-04-27
1111
- [ADR-024 — Connector-declared envs and per-agent overrides](../adrs/024-connector-declared-envs.md) — env composition at pod start; the credential owner declares the env names, not the platform
1212
- [ADR-027 — Slack per-turn user impersonation](../adrs/027-slack-user-impersonation.md) — foreign repliers fork the instance into a per-turn Job; foreign-registration tokens are minted by the api-server and inlined into the fork ConfigMap
1313
- [ADR-028 — Configurable injection on generic secrets](../adrs/028-generic-secret-injection-config.md) — generic secrets carry their own host/path/header injection rules
14+
- [ADR-033 — Envoy-based credential gateway](../adrs/033-envoy-credential-gateway.md) — replaces OneCLI with an Envoy sidecar; rolling out behind a per-instance experimental flag
1415

1516
## Overview
1617

@@ -126,6 +127,19 @@ Pod env at start is the composition of platform defaults, connector-declared env
126127

127128
OneCLI does not yet support HITL approval mid-request — the gateway either has a matching grant or it doesn't. ADR-005 calls HITL out as a future requirement; ADR-010 keeps the door open to replacing OneCLI with an in-house gateway if upstream HITL doesn't land. There is no enforcement point at which a user can approve a single outbound call today; granular control is per-secret (host/path/header) at provisioning time, not per-request.
128129

130+
### Experimental: Envoy credential injector
131+
132+
[ADR-033](../adrs/033-envoy-credential-gateway.md) replaces OneCLI with a per-pod Envoy sidecar. The full migration is gated behind a per-instance opt-in flag (`experimentalCredentialInjector`); off-by-default instances keep the OneCLI path described above unchanged. When the flag is on for an instance:
133+
134+
- The agent container's egress is proxied to a sidecar `envoy` container on `127.0.0.1`. There is no `ONECLI_ACCESS_TOKEN` and no cross-namespace traffic to the OneCLI gateway.
135+
- The agent container has **no** mounts of any credential `Secret` and runs with `automountServiceAccountToken: false` — the credential boundary lives at the container, not the pod.
136+
- The owner's user-typed credentials (generic + Anthropic) are written to per-`(owner, connection)` K8s `Secret`s by the api-server when the user creates them. Existing OneCLI-only secrets are not migrated; the experimental sidecar only sees secrets created after the flag was introduced.
137+
- The Envoy bootstrap config is rendered into a per-instance ConfigMap by the controller; topology changes (route edits, new credentials, header config) trigger a pod roll. Credential-value updates flow through kubelet's `Secret` volume sync without a restart.
138+
- NetworkPolicy drops the OneCLI peer and allows direct egress on TCP 443/80 from the sidecar (the gateway again decides per-host whether a credential is injected).
139+
- The OneCLI `GH_TOKEN=humr:sentinel` is **not** set on this path. Tooling can read `HUMR_GH_TOKEN_AVAILABLE` (`"true"`/`"false"`) from the agent env or the `humr.ai/gh-token-available` pod annotation to detect whether a GitHub credential Secret was attached, instead of failing on a 401 mid-request.
140+
141+
OAuth app connections, HITL `ext_authz`, refresh-token loop, and `gVisor`/RuntimeClass enforcement are out of scope for the first slice and tracked as follow-ups.
142+
129143
## Per-instance access token and pod identity
130144

131145
The per-instance access token is what scopes a pod's outbound traffic to a specific user's grants. The provisioning sequence:

packages/api-server-api/src/modules/instances/router.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ export const instancesRouter = t.router({
2626
secretRef: z.string().optional(),
2727
description: z.string().optional(),
2828
allowedUserEmails: z.array(z.email()).optional(),
29+
experimentalCredentialInjector: z.boolean().optional(),
2930
}))
3031
.mutation(async ({ ctx, input }) => ctx.instances.create(input)),
3132

@@ -35,6 +36,7 @@ export const instancesRouter = t.router({
3536
env: z.array(envVarSchema).optional(),
3637
secretRef: z.string().optional(),
3738
allowedUserEmails: z.array(z.email()).optional(),
39+
experimentalCredentialInjector: z.boolean().optional(),
3840
}))
3941
.mutation(async ({ ctx, input }) => {
4042
const inst = await ctx.instances.update(input);

packages/api-server-api/src/modules/instances/types.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ export interface Instance {
2828
error?: string;
2929
channels: ChannelConfig[];
3030
allowedUserEmails: string[];
31+
experimentalCredentialInjector?: boolean;
3132
}
3233

3334
export interface CreateInstanceInput {
@@ -37,13 +38,15 @@ export interface CreateInstanceInput {
3738
secretRef?: string;
3839
description?: string;
3940
allowedUserEmails?: string[];
41+
experimentalCredentialInjector?: boolean;
4042
}
4143

4244
export interface UpdateInstanceInput {
4345
id: string;
4446
env?: EnvVar[];
4547
secretRef?: string;
4648
allowedUserEmails?: string[];
49+
experimentalCredentialInjector?: boolean;
4750
}
4851

4952
export type ConnectSlackError =

0 commit comments

Comments
 (0)