Skip to content

Commit 93aa9aa

Browse files
committed
Add route-controller DaemonSet to prevent routing loops on local clusters
The route-controller runs as a DaemonSet on every node with host networking and NET_ADMIN privileges. It discovers the cluster's service CIDRs (via the SERVICE_CIDRS env var or the Kubernetes ServiceCIDR API on k8s 1.33+) and installs iptables FORWARD chain DROP rules for each CIDR at startup. Any forwarded packet destined for an IP with no active kube-proxy DNAT rule is dropped rather than escaping via the node's default route, breaking the routing loop that Telepresence can cause on local clusters. A kernel RTN_BLACKHOLE route for the subnet was considered but rejected: it fails connect()/sendmsg() before any iptables hook fires, which would break kube-apiserver → mutating-webhook calls. The FORWARD chain only affects forwarded (pod) traffic, not locally-generated host traffic, so active services are unaffected because kube-proxy DNAT rewrites the destination before the FORWARD chain is reached. The DaemonSet is automatically enabled when image.registry is "local" or starts with "localhost:", which are the conventional registry values for local clusters (Kind, minikube, k3d, Docker Desktop). It can be force-enabled or force-disabled with routeController.enabled=true/false (null = auto-detect, the default). Signed-off-by: Thomas Hallgren <thomas@tada.se>
1 parent 39cdf1b commit 93aa9aa

File tree

14 files changed

+505
-6
lines changed

14 files changed

+505
-6
lines changed

CHANGELOG.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,15 @@ items:
5252
the need for elevated privileges when using Telepresence. Currently available for amd64 architecture only
5353
due to dependency constraints.
5454
docs: install/client
55+
- type: feature
56+
title: Add route-controller DaemonSet to prevent routing loops on local clusters
57+
body: >-
58+
A new optional <code>route-controller</code> DaemonSet can be deployed alongside the traffic-manager
59+
on local Kubernetes clusters (Kind, minikube, k3d, Docker Desktop) to prevent routing loops caused by
60+
deleted or non-existent service ClusterIPs. It installs an iptables <code>FORWARD</code> chain
61+
<code>DROP</code> rule for the service CIDR on every node, and adds per-IP kernel blackhole routes
62+
when a Service is deleted. Enable it with <code>routeController.enabled=true</code> in the Helm chart.
63+
docs: reference/route-controller
5564
- type: feature
5665
title: Automatic cache cleanup on version change
5766
body: >-
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# syntax = docker/dockerfile:1.3
2+
3+
# Copyright 2024 Datawire. All rights reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
FROM --platform=$BUILDPLATFORM golang:alpine AS routecontroller-build
18+
19+
WORKDIR telepresence
20+
COPY go.mod go.sum .
21+
COPY cmd/routecontroller/ cmd/routecontroller/
22+
COPY pkg/ pkg/
23+
COPY build-output/version.txt .
24+
25+
ARG TARGETOS
26+
ARG TARGETARCH
27+
28+
RUN \
29+
--mount=type=cache,target=/root/.cache/go-build \
30+
--mount=type=cache,target=/go/pkg/mod \
31+
GOOS=$TARGETOS GOARCH=$TARGETARCH go build -o /usr/local/bin/route-controller -trimpath -ldflags="-s -w" ./cmd/routecontroller/
32+
33+
FROM alpine AS routecontroller
34+
35+
RUN apk add --no-cache ca-certificates iptables ip6tables
36+
37+
COPY --from=routecontroller-build /usr/local/bin/route-controller /usr/local/bin/route-controller
38+
39+
ENTRYPOINT ["/usr/local/bin/route-controller"]
40+
CMD []

build-aux/main.mk

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,8 +326,20 @@ save-tel2-image: tel2-image
326326
push-client-image: client-image ## (Build) Push the client container image to $(TELEPRESENCE_REGISTRY)
327327
docker push $(CLIENT_IMAGE_FQN)
328328

329+
ROUTECONTROLLER_IMAGE_FQN=$(TELEPRESENCE_REGISTRY)/route-controller:$(TELEPRESENCE_SEMVER)
330+
331+
.PHONY: routecontroller-image
332+
routecontroller-image: images-deps ## (Build) Build the route-controller DaemonSet image
333+
$(eval PLATFORM_ARG := $(if $(TELEPRESENCE_ROUTECONTROLLER_IMAGE_PLATFORM), --platform=$(TELEPRESENCE_ROUTECONTROLLER_IMAGE_PLATFORM),))
334+
docker build $(PLATFORM_ARG) --target routecontroller --tag route-controller --tag $(ROUTECONTROLLER_IMAGE_FQN) \
335+
-f build-aux/docker/images/Dockerfile.routecontroller .
336+
337+
.PHONY: push-routecontroller-image
338+
push-routecontroller-image: routecontroller-image ## (Build) Push the route-controller DaemonSet image to $(TELEPRESENCE_REGISTRY)
339+
docker push $(ROUTECONTROLLER_IMAGE_FQN)
340+
329341
.PHONY: push-images
330-
push-images: push-tel2-image push-client-image
342+
push-images: push-tel2-image push-client-image push-routecontroller-image
331343

332344
.PHONY: helm-chart
333345
helm-chart: $(BUILDDIR)/telepresence-oss-chart.tgz
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
{{- $localCluster := or (eq .Values.image.registry "local") (hasPrefix "localhost:" .Values.image.registry) }}
2+
{{- $rcEnabled := ternary $localCluster .Values.routeController.enabled (kindIs "invalid" .Values.routeController.enabled) }}
3+
{{- if and .Values.routeController $rcEnabled }}
4+
apiVersion: apps/v1
5+
kind: DaemonSet
6+
metadata:
7+
name: route-controller
8+
namespace: {{ include "traffic-manager.namespace" $ }}
9+
labels:
10+
{{ include "telepresence.labels" $ | nindent 4 }}
11+
spec:
12+
selector:
13+
matchLabels:
14+
app: route-controller
15+
template:
16+
metadata:
17+
labels:
18+
app: route-controller
19+
spec:
20+
hostNetwork: true
21+
dnsPolicy: ClusterFirstWithHostNet
22+
serviceAccountName: route-controller
23+
containers:
24+
- name: route-controller
25+
image: "{{ coalesce .Values.routeController.image.registry .Values.image.registry }}/{{ .Values.routeController.image.name }}:{{ .Chart.AppVersion }}"
26+
imagePullPolicy: {{ coalesce .Values.routeController.image.pullPolicy .Values.image.pullPolicy }}
27+
env:
28+
- name: LOG_LEVEL
29+
value: {{ coalesce .Values.routeController.logLevel .Values.logLevel }}
30+
{{- with .Values.routeController.serviceCIDRs }}
31+
- name: SERVICE_CIDRS
32+
value: {{ join "," . }}
33+
{{- end }}
34+
securityContext:
35+
capabilities:
36+
add: ["NET_ADMIN"]
37+
resources:
38+
requests:
39+
cpu: 10m
40+
memory: 32Mi
41+
limits:
42+
memory: 64Mi
43+
{{- end }}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
{{- $localCluster := or (eq .Values.image.registry "local") (hasPrefix "localhost:" .Values.image.registry) }}
2+
{{- $rcEnabled := ternary $localCluster .Values.routeController.enabled (kindIs "invalid" .Values.routeController.enabled) }}
3+
{{- if and .Values.routeController $rcEnabled }}
4+
---
5+
apiVersion: v1
6+
kind: ServiceAccount
7+
metadata:
8+
name: route-controller
9+
namespace: {{ include "traffic-manager.namespace" $ }}
10+
---
11+
apiVersion: rbac.authorization.k8s.io/v1
12+
kind: ClusterRole
13+
metadata:
14+
name: route-controller
15+
rules:
16+
- apiGroups: ["networking.k8s.io"]
17+
resources: ["servicecidrs"]
18+
verbs: ["get", "list"]
19+
---
20+
apiVersion: rbac.authorization.k8s.io/v1
21+
kind: ClusterRoleBinding
22+
metadata:
23+
name: route-controller
24+
roleRef:
25+
apiGroup: rbac.authorization.k8s.io
26+
kind: ClusterRole
27+
name: route-controller
28+
subjects:
29+
- kind: ServiceAccount
30+
name: route-controller
31+
namespace: {{ include "traffic-manager.namespace" $ }}
32+
{{- end }}

charts/telepresence-oss/values.schema.yaml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,53 @@ properties:
507507
description: Number or replicas for the traffic-manager. The Traffic Manager only support running with one replica at the moment.
508508
const: 1
509509

510+
routeController:
511+
description: >-
512+
Configuration for the route-controller DaemonSet that installs blackhole routes for deleted service ClusterIPs
513+
to prevent routing loops on local clusters.
514+
type: object
515+
additionalProperties: false
516+
properties:
517+
enabled:
518+
description: >-
519+
Enable or disable the route-controller DaemonSet. When null (the default), the
520+
route-controller is automatically enabled for local clusters, detected by
521+
image.registry being "local" or starting with "localhost:". Set to true to
522+
force-enable or false to force-disable regardless of registry.
523+
anyOf:
524+
- type: "null"
525+
- type: boolean
526+
image:
527+
type: object
528+
additionalProperties: false
529+
properties:
530+
name:
531+
description: The name of the route-controller image
532+
type: string
533+
pullPolicy:
534+
description: Pull policy for the route-controller image. Empty string inherits from image.pullPolicy.
535+
anyOf:
536+
- type: string
537+
const: ""
538+
- $ref: "#/$defs/pullPolicy"
539+
registry:
540+
description: The registry for the route-controller image
541+
type: string
542+
logLevel:
543+
description: Log level for the route-controller. Empty string inherits from logLevel.
544+
anyOf:
545+
- type: string
546+
const: ""
547+
- $ref: "#/$defs/logLevel"
548+
serviceCIDRs:
549+
description: >-
550+
Service CIDRs for which subnet-level blackhole routes are installed at startup.
551+
If empty, the controller queries the ServiceCIDR API (k8s >= 1.33) automatically.
552+
For older clusters, set this explicitly (e.g. ["10.96.0.0/12"]).
553+
type: array
554+
items:
555+
type: string
556+
510557
resources:
511558
description: Define resource requests and limits for the Traffic Manger
512559
$ref: "#/$defs/resourceRequirements"

charts/telepresence-oss/values.yaml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,3 +198,31 @@ intercept:
198198
# Once this timeout is exceeded, the intercept no longer blocks conflicting intercepts and may be
199199
# automatically removed when another client attempts to create a conflicting intercept.
200200
inactiveBlockTimeout: 10m
201+
202+
################################################################################
203+
## Route Controller Configuration
204+
################################################################################
205+
# routeController installs a DaemonSet that watches for Service deletions and
206+
# adds temporary blackhole routes on each node for deleted ClusterIPs, preventing
207+
# routing loops on local clusters (Kind, minikube, k3d, Docker Desktop) where
208+
# deleted service IPs fall through to the node's default route.
209+
routeController:
210+
# enabled controls the route-controller DaemonSet:
211+
# null (default) - auto-enable when image.registry is "local" or starts with "localhost:"
212+
# true - always enable
213+
# false - always disable, even on local clusters
214+
enabled: ~
215+
image:
216+
# registry and pullPolicy default to the traffic-manager image settings when not set
217+
registry: ""
218+
name: route-controller
219+
pullPolicy: ""
220+
# logLevel defaults to the traffic-manager logLevel when not set
221+
logLevel: ""
222+
# serviceCIDRs is a list of service CIDRs (e.g. ["10.96.0.0/12"]) for which the
223+
# route-controller installs subnet-level blackhole routes at startup. These are
224+
# overridden by kube-proxy for active services and catch any non-existent IP in
225+
# the range without waiting for a deletion event.
226+
# If empty, the controller queries the ServiceCIDR API (k8s >= 1.33) and falls
227+
# back to per-IP blackhole routes on deletion for older clusters.
228+
serviceCIDRs: []

cmd/routecontroller/main.go

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"net"
7+
"os"
8+
"strings"
9+
10+
"github.com/coreos/go-iptables/iptables"
11+
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
12+
"k8s.io/client-go/kubernetes"
13+
"k8s.io/client-go/rest"
14+
15+
"github.com/telepresenceio/clog"
16+
tplog "github.com/telepresenceio/telepresence/v2/pkg/log"
17+
"github.com/telepresenceio/telepresence/v2/pkg/sigctx"
18+
)
19+
20+
// subnetRule tracks an iptables FORWARD DROP rule installed for a service CIDR.
21+
type subnetRule struct {
22+
ipt *iptables.IPTables
23+
cidr string
24+
}
25+
26+
func main() {
27+
ctx := context.Background()
28+
logLevel := os.Getenv("LOG_LEVEL")
29+
ctx = tplog.MakeBaseLogger(ctx, os.Stdout, logLevel)
30+
31+
if err := sigctx.DoWithSignalHandler(ctx, run); err != nil {
32+
clog.Error(ctx, err)
33+
os.Exit(1)
34+
}
35+
}
36+
37+
func run(ctx context.Context) error {
38+
cfg, err := rest.InClusterConfig()
39+
if err != nil {
40+
return fmt.Errorf("failed to get in-cluster config: %w", err)
41+
}
42+
43+
cs, err := kubernetes.NewForConfig(cfg)
44+
if err != nil {
45+
return fmt.Errorf("failed to create clientset: %w", err)
46+
}
47+
48+
rules := installSubnetBlackholes(ctx, cs)
49+
defer removeSubnetBlackholes(ctx, rules)
50+
51+
clog.Info(ctx, "Route controller started")
52+
<-ctx.Done()
53+
return nil
54+
}
55+
56+
// discoverServiceCIDRs returns the service CIDRs for which iptables FORWARD DROP rules
57+
// will be installed. It first checks the SERVICE_CIDRS environment variable
58+
// (comma-separated list of CIDRs). If that is not set, it queries the Kubernetes
59+
// ServiceCIDR API (available in k8s 1.33+). If neither is available it returns nil and
60+
// no subnet-level protection is installed; set SERVICE_CIDRS explicitly in that case.
61+
func discoverServiceCIDRs(ctx context.Context, cs *kubernetes.Clientset) []string {
62+
if envCIDRs := os.Getenv("SERVICE_CIDRS"); envCIDRs != "" {
63+
cidrs := strings.Split(envCIDRs, ",")
64+
clog.Infof(ctx, "Using service CIDRs from SERVICE_CIDRS env: %v", cidrs)
65+
return cidrs
66+
}
67+
68+
list, err := cs.NetworkingV1().ServiceCIDRs().List(ctx, meta.ListOptions{})
69+
if err != nil {
70+
clog.Warnf(ctx, "ServiceCIDR API unavailable: %v; set SERVICE_CIDRS to enable subnet blackholing", err)
71+
return nil
72+
}
73+
var cidrs []string
74+
for _, item := range list.Items {
75+
cidrs = append(cidrs, item.Spec.CIDRs...)
76+
}
77+
clog.Infof(ctx, "Discovered service CIDRs from cluster API: %v", cidrs)
78+
return cidrs
79+
}
80+
81+
// installSubnetBlackholes installs iptables FORWARD chain DROP rules for each service CIDR.
82+
//
83+
// An iptables rule in the FORWARD chain (rather than a kernel blackhole route) is used
84+
// because RTN_BLACKHOLE routes fail connect()/sendmsg() at the socket level before
85+
// any iptables hook can fire, which breaks locally-generated traffic such as
86+
// kube-apiserver → mutating-webhook calls.
87+
//
88+
// The FORWARD chain only affects traffic forwarded through the host (i.e. pod traffic via
89+
// veth pairs). Locally-generated host traffic is never subject to the FORWARD chain.
90+
//
91+
// For active services, kube-proxy's PREROUTING DNAT fires before the FORWARD chain and
92+
// rewrites the destination from ClusterIP to a pod IP, so the DROP rule (which matches
93+
// on the original service CIDR) does not apply. For deleted or never-assigned ClusterIPs
94+
// no DNAT rule exists, the FORWARD chain sees the original ClusterIP, and the DROP fires.
95+
func installSubnetBlackholes(ctx context.Context, cs *kubernetes.Clientset) []subnetRule {
96+
var rules []subnetRule
97+
for _, cidr := range discoverServiceCIDRs(ctx, cs) {
98+
cidr = strings.TrimSpace(cidr)
99+
_, network, err := net.ParseCIDR(cidr)
100+
if err != nil {
101+
clog.Errorf(ctx, "Failed to parse service CIDR %q: %v", cidr, err)
102+
continue
103+
}
104+
105+
proto := iptables.ProtocolIPv4
106+
if network.IP.To4() == nil {
107+
proto = iptables.ProtocolIPv6
108+
}
109+
110+
ipt, err := iptables.NewWithProtocol(proto)
111+
if err != nil {
112+
clog.Errorf(ctx, "Failed to initialise iptables for %s: %v", network, err)
113+
continue
114+
}
115+
116+
exists, err := ipt.Exists("filter", "FORWARD", "-d", network.String(), "-j", "DROP")
117+
if err != nil {
118+
clog.Errorf(ctx, "Failed to check iptables FORWARD rule for %s: %v", network, err)
119+
continue
120+
}
121+
if !exists {
122+
if err := ipt.Insert("filter", "FORWARD", 1, "-d", network.String(), "-j", "DROP"); err != nil {
123+
clog.Errorf(ctx, "Failed to add iptables FORWARD DROP for service CIDR %s: %v", network, err)
124+
continue
125+
}
126+
clog.Infof(ctx, "Added iptables FORWARD DROP for service CIDR %s", network)
127+
} else {
128+
clog.Debugf(ctx, "iptables FORWARD DROP for %s already present", network)
129+
}
130+
131+
rules = append(rules, subnetRule{ipt: ipt, cidr: network.String()})
132+
}
133+
return rules
134+
}
135+
136+
func removeSubnetBlackholes(ctx context.Context, rules []subnetRule) {
137+
for _, rule := range rules {
138+
if err := rule.ipt.Delete("filter", "FORWARD", "-d", rule.cidr, "-j", "DROP"); err != nil {
139+
clog.Debugf(ctx, "Failed to remove iptables FORWARD DROP for %s: %v", rule.cidr, err)
140+
} else {
141+
clog.Infof(ctx, "Removed iptables FORWARD DROP for service CIDR %s", rule.cidr)
142+
}
143+
}
144+
}

docs/helm/values.schema.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)