Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions deploy/helm/humr/templates/cert-manager/ca-certificate.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{{- if and .Values.controller .Values.controller.envoyMitm .Values.controller.envoyMitm.enabled }}
{{/*
The humr MITM CA. Self-signed via the bootstrap issuer; the resulting Secret
in cert-manager's cluster-resource-namespace backs the humr-mitm-ca-issuer
ClusterIssuer.

cert-manager only honours `ca:` ClusterIssuers that reference a Secret in the
namespace passed to its --cluster-resource-namespace flag (default
`cert-manager`). Override .Values.controller.envoyMitm.caSecretNamespace to
match if your install differs.
*/}}
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: {{ .Values.controller.envoyMitm.caSecretName }}
namespace: {{ .Values.controller.envoyMitm.caSecretNamespace }}
labels:
{{- include "humr.labels" . | nindent 4 }}
app.kubernetes.io/component: envoy-mitm
spec:
isCA: true
commonName: humr MITM CA
secretName: {{ .Values.controller.envoyMitm.caSecretName }}
duration: {{ .Values.controller.envoyMitm.caDuration }}
renewBefore: {{ .Values.controller.envoyMitm.caRenewBefore }}
privateKey:
algorithm: ECDSA
size: 256
issuerRef:
name: {{ .Values.controller.envoyMitm.bootstrapIssuerName }}
kind: ClusterIssuer
group: cert-manager.io
{{- end }}
31 changes: 31 additions & 0 deletions deploy/helm/humr/templates/cert-manager/issuer.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{{- if and .Values.controller .Values.controller.envoyMitm .Values.controller.envoyMitm.enabled }}
{{/*
Cluster issuers backing the experimental Envoy credential-injector path
(ADR-033). The bootstrap issuer signs the humr-mitm-ca Certificate; the CA
issuer then signs per-instance leaf certs that Envoy uses to terminate the
agent's TLS for credential injection.

ClusterIssuers are cluster-scoped — names collide globally. Override via
.Values.controller.envoyMitm.{bootstrapIssuerName,caIssuerName} if needed.
*/}}
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: {{ .Values.controller.envoyMitm.bootstrapIssuerName }}
labels:
{{- include "humr.labels" . | nindent 4 }}
app.kubernetes.io/component: envoy-mitm
spec:
selfSigned: {}
---
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: {{ .Values.controller.envoyMitm.caIssuerName }}
labels:
{{- include "humr.labels" . | nindent 4 }}
app.kubernetes.io/component: envoy-mitm
spec:
ca:
secretName: {{ .Values.controller.envoyMitm.caSecretName }}
{{- end }}
4 changes: 4 additions & 0 deletions deploy/helm/humr/templates/controller/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ spec:
value: {{ .Values.controller.idleTimeout | default "1h" | quote }}
- name: HUMR_TERMINATION_GRACE_PERIOD
value: {{ .Values.controller.terminationGracePeriod | default 5 | quote }}
- name: ENVOY_IMAGE
value: {{ .Values.controller.envoyImage | default "envoyproxy/envoy-distroless:v1.32.0" | quote }}
- name: ENVOY_PORT
value: {{ .Values.controller.envoyPort | default 10000 | quote }}
- name: POD_NAME
valueFrom:
fieldRef:
Expand Down
5 changes: 5 additions & 0 deletions deploy/helm/humr/templates/controller/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ rules:
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
{{- if and .Values.controller .Values.controller.envoyMitm .Values.controller.envoyMitm.enabled }}
- apiGroups: ["cert-manager.io"]
resources: ["certificates"]
verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
{{- end }}
---
apiVersion: rbac.authorization.k8s.io/v1
{{- if $clusterScoped }}
Expand Down
27 changes: 27 additions & 0 deletions deploy/helm/humr/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,33 @@ controller:
idleTimeout: "1h"
# -- Termination grace period in seconds for agent pods
terminationGracePeriod: 5
# -- Image for the experimental Envoy credential-injector sidecar (ADR-033). Renders
# only on instances with `experimentalCredentialInjector: true`.
envoyImage: envoyproxy/envoy-distroless:v1.32.0
# -- Port the Envoy sidecar listens on inside the agent pod (proxy on 127.0.0.1).
envoyPort: 10000
# -- TLS interception support for the experimental credential injector. When enabled,
# the chart provisions a self-signed CA via cert-manager and a CA ClusterIssuer
# (`humr-mitm-ca-issuer`) that the controller uses to mint per-instance leaf
# certs for Envoy to terminate the agent's TLS and inject credential headers.
# Requires cert-manager to be installed in the cluster (humr's cluster:install
# task does this).
envoyMitm:
enabled: true
# -- Names of the bootstrap and CA cluster issuers. Cluster-scoped, so consider
# collisions with other charts.
bootstrapIssuerName: humr-selfsigned-bootstrap
caIssuerName: humr-mitm-ca-issuer
# -- Where the CA Secret lives. Must equal cert-manager's
# --cluster-resource-namespace flag (default `cert-manager`).
caSecretNamespace: cert-manager
caSecretName: humr-mitm-ca
# -- CA validity. cert-manager will renew before expiry automatically.
caDuration: 87600h # 10y
caRenewBefore: 720h # 30d
# -- Per-instance leaf cert validity.
leafDuration: 2160h # 90d
leafRenewBefore: 360h # 15d
resources:
requests:
cpu: 100m
Expand Down
2 changes: 1 addition & 1 deletion deploy/helm/tasks.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ run = "helm lint ."

["helm:check:render"]
dir = "{{config_root}}/deploy/helm/humr"
run = "helm template humr . | kubeconform -strict -summary -skip Certificate,Issuer"
run = "helm template humr . | kubeconform -strict -summary -skip Certificate,Issuer,ClusterIssuer"

["helm:setup"]
alias = "helm:build"
Expand Down
5 changes: 3 additions & 2 deletions docs/architecture/platform-topology.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Platform topology

Last verified: 2026-04-27
Last verified: 2026-04-28

## Motivated by

Expand All @@ -12,6 +12,7 @@ Last verified: 2026-04-27
- [ADR-012 — Runtime lifetime](../adrs/012-runtime-lifetime.md) — single-use spawn/hibernate model
- [ADR-022 — Harness API server](../adrs/022-harness-api-server.md) — separate port with a restricted, internal-only surface
- [ADR-023 — Harness-agnostic agent base image](../adrs/023-harness-agnostic-base-image.md) — `AGENT_COMMAND` contract
- [ADR-033 — Envoy-based credential gateway](../adrs/033-envoy-credential-gateway.md) — experimental per-pod sidecar replaces OneCLI on the wire

## Overview

Expand Down Expand Up @@ -104,5 +105,5 @@ For each `agent-instance`, the controller reconciles a StatefulSet (replicas 0 w
- **Spec/status ownership.** Controller never writes `spec.yaml`; api-server never writes `status.yaml`. Write contention between the two is impossible by convention.
- **Relay-only ACP.** All ACP traffic is proxied through the api-server. Agent pods do not accept ACP connections from outside the cluster and the UI never dials pods directly.
- **Two-port api-server.** The public port is user-authenticated; the harness port is cluster-internal and has no user authentication. They do not share routes.
- **Credential isolation.** Agent pods never hold real upstream credentials — only a delegated OneCLI access token. Upstream tokens are injected on the wire by OneCLI. See [security-and-credentials](security-and-credentials.md).
- **Credential isolation.** Agent pods never hold real upstream credentials. By default, egress flows through OneCLI's MITM gateway, which swaps a delegated OneCLI access token for the real upstream credential on the wire. Instances with the experimental `experimentalCredentialInjector` flag (ADR-033) take a different path: an Envoy sidecar in the pod intercepts agent TLS using a per-instance leaf cert and injects the credential header from a Secret mounted only into the sidecar — the agent container itself still never sees the upstream credential. See [security-and-credentials](security-and-credentials.md).
- **Atomic triggers.** Trigger files are delivered via write-temp + rename so the agent's trigger watcher never reads a partial file.
16 changes: 15 additions & 1 deletion docs/architecture/security-and-credentials.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Security and credentials

Last verified: 2026-04-27
Last verified: 2026-04-28

## Motivated by

Expand All @@ -11,6 +11,7 @@ Last verified: 2026-04-27
- [ADR-024 — Connector-declared envs and per-agent overrides](../adrs/024-connector-declared-envs.md) — env composition at pod start; the credential owner declares the env names, not the platform
- [ADR-027 — Slack per-turn user impersonation](../adrs/027-slack-user-impersonation.md) — foreign repliers fork the instance into a per-turn Job; foreign-registration tokens are minted by the api-server and inlined into the fork ConfigMap
- [ADR-028 — Configurable injection on generic secrets](../adrs/028-generic-secret-injection-config.md) — generic secrets carry their own host/path/header injection rules
- [ADR-033 — Envoy-based credential gateway](../adrs/033-envoy-credential-gateway.md) — replaces OneCLI with an Envoy sidecar; rolling out behind a per-instance experimental flag

## Overview

Expand Down Expand Up @@ -126,6 +127,19 @@ Pod env at start is the composition of platform defaults, connector-declared env

OneCLI does not yet support HITL approval mid-request — the gateway either has a matching grant or it doesn't. ADR-005 calls HITL out as a future requirement; ADR-010 keeps the door open to replacing OneCLI with an in-house gateway if upstream HITL doesn't land. There is no enforcement point at which a user can approve a single outbound call today; granular control is per-secret (host/path/header) at provisioning time, not per-request.

### Experimental: Envoy credential injector

[ADR-033](../adrs/033-envoy-credential-gateway.md) replaces OneCLI with a per-pod Envoy sidecar. The full migration is gated behind a per-instance opt-in flag (`experimentalCredentialInjector`); off-by-default instances keep the OneCLI path described above unchanged. When the flag is on for an instance:

- The agent container's egress is proxied to a sidecar `envoy` container on `127.0.0.1`. There is no `ONECLI_ACCESS_TOKEN` and no cross-namespace traffic to the OneCLI gateway.
- The agent container has **no** mounts of any credential `Secret` and runs with `automountServiceAccountToken: false` — the credential boundary lives at the container, not the pod.
- The owner's user-typed credentials (generic + Anthropic) are written to per-`(owner, connection)` K8s `Secret`s by the api-server when the user creates them. Existing OneCLI-only secrets are not migrated; the experimental sidecar only sees secrets created after the flag was introduced.
- The Envoy bootstrap config is rendered into a per-instance ConfigMap by the controller; topology changes (route edits, new credentials, header config) trigger a pod roll. Credential-value updates flow through kubelet's `Secret` volume sync without a restart.
- NetworkPolicy drops the OneCLI peer and allows direct egress on TCP 443/80 from the sidecar (the gateway again decides per-host whether a credential is injected).
- The OneCLI `GH_TOKEN=humr:sentinel` is **not** set on this path. Tooling can read `HUMR_GH_TOKEN_AVAILABLE` (`"true"`/`"false"`) from the agent env or the `humr.ai/gh-token-available` pod annotation to detect whether a GitHub credential Secret was attached, instead of failing on a 401 mid-request.

OAuth app connections, HITL `ext_authz`, refresh-token loop, and `gVisor`/RuntimeClass enforcement are out of scope for the first slice and tracked as follow-ups.

## Per-instance access token and pod identity

The per-instance access token is what scopes a pod's outbound traffic to a specific user's grants. The provisioning sequence:
Expand Down
2 changes: 2 additions & 0 deletions packages/api-server-api/src/modules/instances/router.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ export const instancesRouter = t.router({
secretRef: z.string().optional(),
description: z.string().optional(),
allowedUserEmails: z.array(z.email()).optional(),
experimentalCredentialInjector: z.boolean().optional(),
}))
.mutation(async ({ ctx, input }) => ctx.instances.create(input)),

Expand All @@ -35,6 +36,7 @@ export const instancesRouter = t.router({
env: z.array(envVarSchema).optional(),
secretRef: z.string().optional(),
allowedUserEmails: z.array(z.email()).optional(),
experimentalCredentialInjector: z.boolean().optional(),
}))
.mutation(async ({ ctx, input }) => {
const inst = await ctx.instances.update(input);
Expand Down
3 changes: 3 additions & 0 deletions packages/api-server-api/src/modules/instances/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export interface Instance {
error?: string;
channels: ChannelConfig[];
allowedUserEmails: string[];
experimentalCredentialInjector?: boolean;
}

export interface CreateInstanceInput {
Expand All @@ -37,13 +38,15 @@ export interface CreateInstanceInput {
secretRef?: string;
description?: string;
allowedUserEmails?: string[];
experimentalCredentialInjector?: boolean;
}

export interface UpdateInstanceInput {
id: string;
env?: EnvVar[];
secretRef?: string;
allowedUserEmails?: string[];
experimentalCredentialInjector?: boolean;
}

export type ConnectSlackError =
Expand Down
14 changes: 13 additions & 1 deletion packages/api-server/src/__tests__/unit/instance-assembly.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { describe, it, expect } from "vitest";
import { computeState, type InfraInstance } from "../../modules/agents/domain/instance-assembly.js";
import { assembleInstance, computeState, type InfraInstance } from "../../modules/agents/domain/instance-assembly.js";

function infra(overrides: Partial<InfraInstance> = {}): InfraInstance {
return {
Expand All @@ -21,3 +21,15 @@ describe("computeState", () => {
expect(computeState(infra({ currentState: "running", podReady: true }))).toBe("running");
});
});

describe("assembleInstance — experimentalCredentialInjector round-trip", () => {
it("threads the flag through to the assembled Instance", () => {
const instance = assembleInstance(infra({ experimentalCredentialInjector: true }), [], []);
expect(instance.experimentalCredentialInjector).toBe(true);
});

it("leaves the field undefined when not set on infra", () => {
const instance = assembleInstance(infra(), [], []);
expect(instance.experimentalCredentialInjector).toBeUndefined();
});
});
Loading
Loading