Skip to content

Commit 3635608

Browse files
committed
Replace rootlessport with pesto for rootless bridge port forwarding
Pesto dynamically updates pasta's forwarding table via UNIX socket, preserving source IPs that rootlessport's userspace proxy destroyed. Strip HostIP from port mappings passed to netavark in rootless mode (portMappingsForNetavark) because pesto handles host-side address binding via pasta. Netavark's DNAT rules inside the rootless netns must not restrict on destination address since pasta's splice delivers traffic with a different address than the user-specified HostIP. Pesto receives the original HostIP so pasta binds correctly on the host. Fixes: https://redhat.atlassian.net/browse/RUN-2214 Fixes: #8193 Fixes: https://redhat.atlassian.net/browse/RUN-3587 Signed-off-by: Jan Rodák <hony.com@seznam.cz>
1 parent 548bfb3 commit 3635608

6 files changed

Lines changed: 477 additions & 26 deletions

File tree

libpod/networking_common.go

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ func (c *Container) getNetworkOptions(networkOpts map[string]types.PerNetworkOpt
5959
DNSServers: nameservers,
6060
ContainerHostname: c.NetworkHostname(),
6161
}
62-
opts.PortMappings = c.convertPortMappings()
62+
opts.PortMappings = portMappingsForNetavark(c.convertPortMappings())
6363

6464
// If the container requested special network options use this instead of the config.
6565
// This is the case for container restore or network reload.
@@ -71,6 +71,25 @@ func (c *Container) getNetworkOptions(networkOpts map[string]types.PerNetworkOpt
7171
return opts
7272
}
7373

74+
// portMappingsForNetavark strips HostIP from port mappings in rootless mode.
75+
//
76+
// Pesto handles host-side address binding; netavark creates DNAT rules inside
77+
// the rootless netns. If HostIP is kept, netavark adds an "ip daddr <HostIP>"
78+
// constraint to the DNAT rule, but pasta's splice delivers traffic with a
79+
// different destination address (the host virtual IP), so the rule never
80+
// matches and the connection resets. Rootful is unaffected (no-op).
81+
func portMappingsForNetavark(ports []types.PortMapping) []types.PortMapping {
82+
if !rootless.IsRootless() || len(ports) == 0 {
83+
return ports
84+
}
85+
stripped := make([]types.PortMapping, len(ports))
86+
copy(stripped, ports)
87+
for i := range stripped {
88+
stripped[i].HostIP = ""
89+
}
90+
return stripped
91+
}
92+
7493
// setUpNetwork will set up the networks, on error it will also tear down the
7594
// networks. If rootless it will join/create the rootless network namespace.
7695
func (r *Runtime) setUpNetwork(ns string, opts types.NetworkOptions) (map[string]types.StatusBlock, error) {
@@ -110,11 +129,25 @@ func (r *Runtime) teardownNetwork(ctr *Container) error {
110129
return err
111130
}
112131

113-
if !ctr.config.NetMode.IsPasta() && len(networks) > 0 {
114-
netOpts := ctr.getNetworkOptions(networks)
115-
return r.teardownNetworkBackend(ctr.state.NetNS, netOpts)
132+
if len(networks) == 0 {
133+
return nil
116134
}
117-
return nil
135+
136+
// --net=pasta: per-container pasta cleans up when it exits, nothing to tear down.
137+
if ctr.config.NetMode.IsPasta() {
138+
return nil
139+
}
140+
141+
// Bridge mode: update pesto before netavark so pasta stops forwarding
142+
// ports for this container before the bridge/nftables rules are removed.
143+
if rootless.IsRootless() && ctr.config.NetMode.IsBridge() && len(ctr.config.PortMappings) > 0 {
144+
if err := r.teardownRootlessPortMappingViaPesto(ctr); err != nil {
145+
logrus.Warnf("pesto port cleanup failed for container %s: %v", ctr.ID(), err)
146+
}
147+
}
148+
149+
netOpts := ctr.getNetworkOptions(networks)
150+
return r.teardownNetworkBackend(ctr.state.NetNS, netOpts)
118151
}
119152

120153
// isBridgeNetMode checks if the given network mode is bridge.
@@ -402,7 +435,7 @@ func (c *Container) NetworkDisconnect(nameOrID, netName string, _ bool) error {
402435
ContainerID: c.config.ID,
403436
ContainerName: getNetworkPodName(c),
404437
}
405-
opts.PortMappings = c.convertPortMappings()
438+
opts.PortMappings = portMappingsForNetavark(c.convertPortMappings())
406439
opts.Networks = map[string]types.PerNetworkOptions{
407440
netName: networks[netName],
408441
}
@@ -421,11 +454,10 @@ func (c *Container) NetworkDisconnect(nameOrID, netName string, _ bool) error {
421454
return err
422455
}
423456

424-
// Reload ports when there are still connected networks, maybe we removed the network interface with the child ip.
425-
// Reloading without connected networks does not make sense, so we can skip this step.
426-
if rootless.IsRootless() && len(networkStatus) > 0 {
427-
if err := c.reloadRootlessRLKPortMapping(); err != nil {
428-
return err
457+
// Update pesto's forwarding table after disconnect so pasta reflects the new network state.
458+
if rootless.IsRootless() && len(networkStatus) > 0 && len(c.config.PortMappings) > 0 {
459+
if err := c.runtime.setupRootlessPortMappingViaPesto(c); err != nil {
460+
logrus.Warnf("pesto port reload failed after network disconnect for container %s: %v", c.ID(), err)
429461
}
430462
}
431463

@@ -542,7 +574,7 @@ func (c *Container) NetworkConnect(nameOrID, netName string, netOpts types.PerNe
542574
ContainerID: c.config.ID,
543575
ContainerName: getNetworkPodName(c),
544576
}
545-
opts.PortMappings = c.convertPortMappings()
577+
opts.PortMappings = portMappingsForNetavark(c.convertPortMappings())
546578
opts.Networks = map[string]types.PerNetworkOptions{
547579
netName: netOpts,
548580
}
@@ -575,11 +607,10 @@ func (c *Container) NetworkConnect(nameOrID, netName string, netOpts types.PerNe
575607
return err
576608
}
577609

578-
// The first network needs a port reload to set the correct child ip for the rootlessport process.
579-
// Adding a second network does not require a port reload because the child ip is still valid.
580-
if rootless.IsRootless() && len(networks) == 0 {
581-
if err := c.reloadRootlessRLKPortMapping(); err != nil {
582-
return err
610+
// Update pesto's forwarding table after connect so pasta reflects the new network state.
611+
if rootless.IsRootless() && len(c.config.PortMappings) > 0 {
612+
if err := c.runtime.setupRootlessPortMappingViaPesto(c); err != nil {
613+
logrus.Warnf("pesto port reload failed after network connect for container %s: %v", c.ID(), err)
583614
}
584615
}
585616

libpod/networking_freebsd.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,3 +224,11 @@ func (c *Container) inspectJoinedNetworkNS(_ string) (q types.StatusBlock, retEr
224224
func (c *Container) reloadRootlessRLKPortMapping() error {
225225
return errors.New("unsupported (*Container).reloadRootlessRLKPortMapping")
226226
}
227+
228+
func (r *Runtime) setupRootlessPortMappingViaPesto(_ *Container) error {
229+
return errors.New("unsupported setupRootlessPortMappingViaPesto on FreeBSD")
230+
}
231+
232+
func (r *Runtime) teardownRootlessPortMappingViaPesto(_ *Container) error {
233+
return errors.New("unsupported teardownRootlessPortMappingViaPesto on FreeBSD")
234+
}

libpod/networking_linux.go

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -59,15 +59,11 @@ func (r *Runtime) configureNetNS(ctr *Container, ctrNS string) (status map[strin
5959
}
6060
}()
6161

62-
// set up rootless port forwarder when rootless with ports and the network status is empty,
63-
// if this is called from network reload the network status will not be empty and we should
64-
// not set up port because they are still active
65-
if rootless.IsRootless() && len(ctr.config.PortMappings) > 0 && ctr.getNetworkStatus() == nil {
66-
// set up port forwarder for rootless netns
67-
// make sure to fix this in container.handleRestartPolicy() as well
68-
// Important we have to call this after r.setUpNetwork() so that
69-
// we can use the proper netStatus
70-
err = r.setupRootlessPortMappingViaRLK(ctr, ctrNS, netStatus)
62+
// Set up port forwarding for rootless bridge networks via pesto.
63+
// Pesto replaces the forwarding table on the running pasta instance,
64+
// preserving source IPs.
65+
if rootless.IsRootless() && len(ctr.config.PortMappings) > 0 {
66+
err = r.setupRootlessPortMappingViaPesto(ctr)
7167
}
7268
return netStatus, err
7369
}

libpod/networking_pesto_linux.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
//go:build !remote
2+
3+
// Pesto integration for rootless bridge network port forwarding.
4+
//
5+
// A shared pasta instance in the rootless netns (-c pasta.sock) handles
6+
// host-side port forwarding. On container start/stop, pesto replaces
7+
// pasta's forwarding table with the aggregate ports of all running bridge
8+
// containers. Pasta forwards via kernel splice (localhost) or TAP
9+
// (external), preserving source IPs. The container sees the real client's
10+
// address instead of a proxy or bridge gateway address.
11+
//
12+
// Container start:
13+
// - netavark sets up bridge + DNAT
14+
// - pesto updates table
15+
//
16+
// Container stop:
17+
// - pesto updates table without stopped container's ports
18+
// - netavark tears down bridge/DNAT
19+
//
20+
// Limitations:
21+
// - IPv4 only (netavark DNAT is IPv4; pesto binds 0.0.0.0 by default)
22+
// - Full table replacement per change (brief gap, possible races)
23+
// - gatherAllRootlessBridgePorts reads all containers from DB (no locks)
24+
25+
package libpod
26+
27+
import (
28+
"github.com/containers/podman/v6/libpod/define"
29+
"go.podman.io/common/libnetwork/pasta"
30+
"go.podman.io/common/libnetwork/types"
31+
)
32+
33+
// TODO: When pesto gains --add, --clear, --delete flags, switch from full
34+
// table replacement to incremental updates to avoid brief port interruptions
35+
// and reduce overhead with many container and possible race conditions.
36+
37+
func (r *Runtime) pestoSocketPath() string {
38+
info, err := r.network.RootlessNetnsInfo()
39+
if err != nil || info == nil {
40+
return ""
41+
}
42+
return info.PestoSocketPath
43+
}
44+
45+
// setupRootlessPortMappingViaPesto configures port forwarding for a rootless
46+
// bridge container by updating the shared pasta instance's forwarding table.
47+
func (r *Runtime) setupRootlessPortMappingViaPesto(ctr *Container) error {
48+
allPorts, err := r.gatherAllRootlessBridgePorts(ctr, true)
49+
if err != nil {
50+
return err
51+
}
52+
if len(allPorts) == 0 {
53+
return nil
54+
}
55+
56+
if err := pasta.PestoSetupPorts(r.config, r.pestoSocketPath(), allPorts); err != nil {
57+
return err
58+
}
59+
return nil
60+
}
61+
62+
// teardownRootlessPortMappingViaPesto removes a container's ports from pasta's forwarding table.
63+
func (r *Runtime) teardownRootlessPortMappingViaPesto(ctr *Container) error {
64+
remainingPorts, err := r.gatherAllRootlessBridgePorts(ctr, false)
65+
if err != nil {
66+
return err
67+
}
68+
return pasta.PestoTeardownPorts(r.config, r.pestoSocketPath(), remainingPorts)
69+
}
70+
71+
// gatherAllRootlessBridgePorts collects port mappings from all running
72+
// rootless bridge containers. When includeCtr is true, ctr's own ports
73+
// are included; when false they are excluded.
74+
func (r *Runtime) gatherAllRootlessBridgePorts(ctr *Container, includeCtr bool) ([]types.PortMapping, error) {
75+
var allPorts []types.PortMapping
76+
77+
ctrs, err := r.state.AllContainers(true)
78+
if err != nil {
79+
return nil, err
80+
}
81+
for _, c := range ctrs {
82+
if c.ID() == ctr.ID() {
83+
continue
84+
}
85+
if c.state.State != define.ContainerStateRunning {
86+
continue
87+
}
88+
if !c.config.NetMode.IsBridge() {
89+
continue
90+
}
91+
allPorts = append(allPorts, c.convertPortMappings()...)
92+
}
93+
if includeCtr {
94+
allPorts = append(allPorts, ctr.convertPortMappings()...)
95+
}
96+
return allPorts, nil
97+
}

test/e2e/common_test.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1629,6 +1629,52 @@ func testPortConnection(port int) {
16291629
Expect(err).ToNot(HaveOccurred())
16301630
}
16311631

1632+
// sendMessageToPort sends a message to the given tcp port
1633+
func sendMessageToPort(port int, message string) {
1634+
GinkgoHelper()
1635+
conn, err := net.DialTimeout("tcp", net.JoinHostPort("localhost", strconv.Itoa(port)), 5*time.Second)
1636+
Expect(err).ToNot(HaveOccurred(), "should connect to published port")
1637+
1638+
tcpConn := conn.(*net.TCPConn)
1639+
_, err = tcpConn.Write([]byte(message))
1640+
Expect(err).ToNot(HaveOccurred())
1641+
1642+
err = tcpConn.CloseWrite()
1643+
Expect(err).ToNot(HaveOccurred())
1644+
1645+
err = tcpConn.SetReadDeadline(time.Now().Add(5 * time.Second))
1646+
Expect(err).ToNot(HaveOccurred())
1647+
_, _ = io.Copy(io.Discard, tcpConn)
1648+
tcpConn.Close()
1649+
}
1650+
1651+
// startNCContainer starts a detached container running nc (netcat) listening
1652+
// on the given port, waits for it to be ready, and returns the container name.
1653+
func (p *PodmanTestIntegration) startNCContainer(name string, listenPort int, extraArgs ...string) string {
1654+
GinkgoHelper()
1655+
portStr := strconv.Itoa(listenPort)
1656+
args := append([]string{"run", "-d", "--name", name}, extraArgs...)
1657+
args = append(args, ALPINE, "sh", "-c", "nc -l -n -v -p "+portStr+" 2>&1")
1658+
p.PodmanExitCleanly(args...)
1659+
p.WaitForContainerLog(name, "listening")
1660+
return name
1661+
}
1662+
1663+
// WaitForContainerLog polls container logs until the given substring appears
1664+
// in either stdout or stderr. Fails the test if not found within the timeout.
1665+
func (p *PodmanTestIntegration) WaitForContainerLog(ctrName string, substr string) {
1666+
GinkgoHelper()
1667+
for range 10 {
1668+
logs := p.Podman([]string{"logs", ctrName})
1669+
logs.WaitWithDefaultTimeout()
1670+
if strings.Contains(logs.ErrorToString(), substr) || strings.Contains(logs.OutputToString(), substr) {
1671+
return
1672+
}
1673+
time.Sleep(500 * time.Millisecond)
1674+
}
1675+
Fail(fmt.Sprintf("timed out waiting for %q in logs of container %s", substr, ctrName))
1676+
}
1677+
16321678
func createNetworkName(name string) string {
16331679
return name + stringid.GenerateRandomID()[:10]
16341680
}

0 commit comments

Comments
 (0)