Skip to content

Commit d67f02c

Browse files
shailend-ggvisor-bot
authored andcommitted
Fix a bug around IFLA_NET_NS_FD
IFLA_NET_NS_FD is supplied in an RTM_SETLINK message from userland to move a link to a new netns. The same message might mutate other attributes of the link. Thus far we were applying those changes to a different link in the source netns. PiperOrigin-RevId: 834406670
1 parent 2d946ce commit d67f02c

File tree

7 files changed

+276
-96
lines changed

7 files changed

+276
-96
lines changed

pkg/sentry/socket/netstack/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ package(
99
declare_mutex(
1010
name = "netstack_link_mutex",
1111
out = "netstack_link_mutex.go",
12+
nested_lock_names = ["destStack"],
1213
package = "netstack",
1314
prefix = "netstackLink",
1415
)
@@ -55,6 +56,7 @@ go_library(
5556
"//pkg/sentry/socket/netfilter",
5657
"//pkg/sentry/socket/netlink/nlmsg",
5758
"//pkg/sentry/socket/netstack/packetmmap",
59+
"//pkg/sentry/uniqueid",
5860
"//pkg/sentry/vfs",
5961
"//pkg/sync",
6062
"//pkg/sync/locking",

pkg/sentry/socket/netstack/stack.go

Lines changed: 143 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,18 @@ type Stack struct {
4747
// linkMu serializes link creation, modification and deletion.
4848
// It is a rough parallel to the per-netns rtnl_mutex in Linux.
4949
linkMu netstackLinkMutex `state:"nosave"`
50+
51+
// id is a unique identifier for this stack, it is currently only
52+
// used for a deterministic lock ordering.
53+
id uint64
54+
}
55+
56+
// NewStack creates a new netstack.Stack which wraps the given tcpip.Stack.
57+
func NewStack(s *stack.Stack, id uint64) *Stack {
58+
return &Stack{
59+
Stack: s,
60+
id: id,
61+
}
5062
}
5163

5264
// AddInterfaceEventSubscriber implements inet.InterfaceEventPublisher.AddInterfaceEventSubscriber.
@@ -222,63 +234,132 @@ func (s *Stack) SetInterface(ctx context.Context, msg *nlmsg.Message) *syserr.Er
222234
// Netstack interfaces are always up.
223235
}
224236

225-
s.linkMu.Lock()
226-
defer s.linkMu.Unlock()
227-
return s.setLinkLocked(ctx, tcpip.NICID(ifinfomsg.Index), attrs)
237+
dstNs, err := s.lockSrcAndDst(ctx, attrs)
238+
if err != nil {
239+
return err
240+
}
241+
defer s.unlockSrcAndDst(ctx, dstNs)
242+
return s.setLinkLocked(ctx, tcpip.NICID(ifinfomsg.Index), attrs, dstNs)
243+
}
244+
245+
// If the linkAttrs map contains IFLA_NET_NS_FD, locks the source and
246+
// destination stacks and returns the destination netns, which must
247+
// be passed into unlockSrcAndDst by the caller.
248+
//
249+
// If instead the linkAttrs map does not contain IFLA_NET_NS_FD, or if it
250+
// points to the same netns as the source stack, only locks the source stack
251+
// and returns nil. And if the netns fd is invalid, returns an error.
252+
func (s *Stack) lockSrcAndDst(ctx context.Context, linkAttrs map[uint16]nlmsg.BytesView) (*inet.Namespace, *syserr.Error) {
253+
if linkAttrs == nil {
254+
s.linkMu.Lock()
255+
return nil, nil
256+
}
257+
v, ok := linkAttrs[linux.IFLA_NET_NS_FD]
258+
if !ok {
259+
s.linkMu.Lock()
260+
return nil, nil
261+
}
262+
263+
fd, ok := v.Uint32()
264+
if !ok {
265+
return nil, syserr.ErrInvalidArgument
266+
}
267+
f := inet.NamespaceByFDFromContext(ctx)
268+
if f == nil {
269+
return nil, syserr.ErrInvalidArgument
270+
}
271+
ns, err := f(int32(fd)) // ns.DecRef() is called in unlockSrcAndDst().
272+
if err != nil {
273+
return nil, syserr.FromError(err)
274+
}
275+
276+
dst := ns.Stack().(*Stack)
277+
if s == dst {
278+
ns.DecRef(ctx)
279+
s.linkMu.Lock()
280+
return nil, nil
281+
}
282+
283+
if s.id < dst.id {
284+
s.linkMu.Lock()
285+
dst.linkMu.NestedLock(netstackLinkLockDeststack)
286+
} else {
287+
dst.linkMu.Lock()
288+
s.linkMu.NestedLock(netstackLinkLockDeststack)
289+
}
290+
return ns, nil
291+
}
292+
293+
func (s *Stack) unlockSrcAndDst(ctx context.Context, ns *inet.Namespace) {
294+
if ns == nil {
295+
s.linkMu.Unlock()
296+
return
297+
}
298+
dst := ns.Stack().(*Stack)
299+
if s == dst {
300+
// This cannot happen because lockSrcAndDst() returns nil in this case.
301+
panic("unlockSrcAndDst called with dst == src")
302+
}
303+
304+
if s.id < dst.id {
305+
dst.linkMu.NestedUnlock(netstackLinkLockDeststack)
306+
s.linkMu.Unlock()
307+
} else {
308+
s.linkMu.NestedUnlock(netstackLinkLockDeststack)
309+
dst.linkMu.Unlock()
310+
}
311+
ns.DecRef(ctx)
228312
}
229313

230-
// precondition: s.linkLock is held.
231-
func (s *Stack) setLinkLocked(ctx context.Context, id tcpip.NICID, linkAttrs map[uint16]nlmsg.BytesView) *syserr.Error {
232-
oldNicInfo, ok := s.Stack.SingleNICInfo(id)
314+
// precondition: s.linkLock is held. If dstNs is not nil, dstNs.Stack.linkMu is
315+
// also held.
316+
func (s *Stack) setLinkLocked(ctx context.Context, id tcpip.NICID, linkAttrs map[uint16]nlmsg.BytesView, dstNs *inet.Namespace) *syserr.Error {
317+
src := s
318+
changed := false
319+
nicInfo, ok := src.Stack.SingleNICInfo(id)
233320
if !ok {
234321
return syserr.ErrUnknownNICID
235322
}
236323

237-
// IFLA_NET_NS_FD has to be handled first, because other parameters may be reset.
238-
if v, ok := linkAttrs[linux.IFLA_NET_NS_FD]; ok {
239-
fd, ok := v.Uint32()
240-
if !ok {
241-
return syserr.ErrInvalidArgument
242-
}
243-
f := inet.NamespaceByFDFromContext(ctx)
244-
if f == nil {
245-
return syserr.ErrInvalidArgument
246-
}
247-
ns, err := f(int32(fd))
324+
dst := src
325+
if dstNs != nil {
326+
dst = dstNs.Stack().(*Stack)
327+
}
328+
if dst != src {
329+
var err tcpip.Error
330+
oldID := id
331+
id, err = src.Stack.SetNICStack(id, dst.Stack)
248332
if err != nil {
249-
return syserr.FromError(err)
333+
return syserr.TranslateNetstackError(err)
250334
}
251-
defer ns.DecRef(ctx)
252-
peer := ns.Stack().(*Stack)
253-
if peer.Stack != s.Stack {
254-
var err tcpip.Error
255-
oldID := id
256335

257-
id, err = s.Stack.SetNICStack(id, peer.Stack)
258-
if err != nil {
259-
return syserr.TranslateNetstackError(err)
260-
}
336+
src.sendDeleteEvent(ctx, oldID, nicInfo) // inform about exit from old ns
337+
changed = true // inform about entry into new ns
261338

262-
s.sendDeleteEvent(ctx, oldID, oldNicInfo) // inform about exit from old ns
263-
peer.sendChangeEvent(ctx, id) // inform about entry into new ns
264-
// TODO: Once we support IFLA_LINK_NETNSID, we need to call sendChangeEvent on
265-
// the peer interface if this interface is part of a veth pair.
339+
nicInfo, ok = dst.Stack.SingleNICInfo(id)
340+
if !ok {
341+
// Because we hold dst.linkMu, this should never happen.
342+
ctx.Warningf("Newly rehomed NIC %d not found in new stack %p", id, dst.Stack)
343+
return syserr.ErrUnknownNICID
266344
}
345+
src = dst
346+
347+
// TODO (b/465141970): Once we support IFLA_LINK_NETNSID, we need to call sendChangeEvent on
348+
// the peer interface if this interface is part of a veth pair.
267349
}
268350

269-
changed := false
270351
for t, v := range linkAttrs {
271352
switch t {
272353
case linux.IFLA_MASTER:
273354
master, ok := v.Uint32()
274355
if !ok {
275356
return syserr.ErrInvalidArgument
276357
}
277-
if mid, ok := s.Stack.GetNICCoordinatorID(id); ok && mid == tcpip.NICID(master) {
358+
if mid, ok := src.Stack.GetNICCoordinatorID(id); ok && mid == tcpip.NICID(master) {
278359
continue
279360
}
280361
if master != 0 {
281-
if err := s.Stack.SetNICCoordinator(id, tcpip.NICID(master)); err != nil {
362+
if err := src.Stack.SetNICCoordinator(id, tcpip.NICID(master)); err != nil {
282363
return syserr.TranslateNetstackError(err)
283364
}
284365
changed = true
@@ -288,18 +369,18 @@ func (s *Stack) setLinkLocked(ctx context.Context, id tcpip.NICID, linkAttrs map
288369
return syserr.ErrInvalidArgument
289370
}
290371
addr := tcpip.LinkAddress(v)
291-
if oldNicInfo.LinkAddress == addr {
372+
if nicInfo.LinkAddress == addr {
292373
continue
293374
}
294-
if err := s.Stack.SetNICAddress(id, addr); err != nil {
375+
if err := src.Stack.SetNICAddress(id, addr); err != nil {
295376
return syserr.TranslateNetstackError(err)
296377
}
297378
changed = true
298379
case linux.IFLA_IFNAME:
299-
if oldNicInfo.Name == v.String() {
380+
if nicInfo.Name == v.String() {
300381
continue
301382
}
302-
if err := s.Stack.SetNICName(id, v.String()); err != nil {
383+
if err := src.Stack.SetNICName(id, v.String()); err != nil {
303384
return syserr.TranslateNetstackError(err)
304385
}
305386
changed = true
@@ -308,10 +389,10 @@ func (s *Stack) setLinkLocked(ctx context.Context, id tcpip.NICID, linkAttrs map
308389
if !ok {
309390
return syserr.ErrInvalidArgument
310391
}
311-
if oldNicInfo.MTU == mtu {
392+
if nicInfo.MTU == mtu {
312393
continue
313394
}
314-
if err := s.Stack.SetNICMTU(id, mtu); err != nil {
395+
if err := src.Stack.SetNICMTU(id, mtu); err != nil {
315396
return syserr.TranslateNetstackError(err)
316397
}
317398
changed = true
@@ -321,7 +402,7 @@ func (s *Stack) setLinkLocked(ctx context.Context, id tcpip.NICID, linkAttrs map
321402
}
322403

323404
if changed {
324-
s.sendChangeEvent(ctx, id)
405+
src.sendChangeEvent(ctx, id)
325406
}
326407
return nil
327408
}
@@ -378,7 +459,10 @@ func (s *Stack) newVeth(ctx context.Context, linkAttrs map[uint16]nlmsg.BytesVie
378459
}
379460
}
380461

381-
s.linkMu.Lock()
462+
dstNs, sysErr := s.lockSrcAndDst(ctx, linkAttrs)
463+
if sysErr != nil {
464+
return sysErr
465+
}
382466
ep, peerEP := veth.NewPair(defaultMTU, veth.DefaultBacklogSize)
383467
id := s.Stack.NextNICID()
384468
peerID := peerStack.Stack.NextNICID()
@@ -389,21 +473,25 @@ func (s *Stack) newVeth(ctx context.Context, linkAttrs map[uint16]nlmsg.BytesVie
389473
Name: ifname,
390474
})
391475
if err != nil {
392-
s.linkMu.Unlock()
476+
s.unlockSrcAndDst(ctx, dstNs)
393477
return syserr.TranslateNetstackError(err)
394478
}
395-
if err := s.setLinkLocked(ctx, id, linkAttrs); err != nil {
396-
s.linkMu.Unlock()
479+
if err := s.setLinkLocked(ctx, id, linkAttrs, dstNs); err != nil {
480+
s.unlockSrcAndDst(ctx, dstNs)
397481
peerEP.Close()
398482
return err
399483
}
400-
s.linkMu.Unlock()
484+
s.unlockSrcAndDst(ctx, dstNs)
401485

402486
if peerName == "" {
403487
peerName = fmt.Sprintf("veth%d", peerID)
404488
}
405-
peerStack.linkMu.Lock()
406-
defer peerStack.linkMu.Unlock()
489+
peerDstNs, sysErr := peerStack.lockSrcAndDst(ctx, peerLinkAttrs)
490+
if sysErr != nil {
491+
return sysErr
492+
}
493+
defer peerStack.unlockSrcAndDst(ctx, peerDstNs)
494+
407495
err = peerStack.Stack.CreateNICWithOptions(peerID, packetsocket.New(ethernet.New(peerEP)), stack.NICOptions{
408496
Name: peerName,
409497
})
@@ -412,7 +500,7 @@ func (s *Stack) newVeth(ctx context.Context, linkAttrs map[uint16]nlmsg.BytesVie
412500
return syserr.TranslateNetstackError(err)
413501
}
414502
if peerLinkAttrs != nil {
415-
if err := peerStack.setLinkLocked(ctx, peerID, peerLinkAttrs); err != nil {
503+
if err := peerStack.setLinkLocked(ctx, peerID, peerLinkAttrs, peerDstNs); err != nil {
416504
peerStack.Stack.RemoveNIC(peerID)
417505
peerEP.Close()
418506
return err
@@ -423,8 +511,11 @@ func (s *Stack) newVeth(ctx context.Context, linkAttrs map[uint16]nlmsg.BytesVie
423511
}
424512

425513
func (s *Stack) newBridge(ctx context.Context, linkAttrs map[uint16]nlmsg.BytesView, linkInfoAttrs map[uint16]nlmsg.BytesView) *syserr.Error {
426-
s.linkMu.Lock()
427-
defer s.linkMu.Unlock()
514+
dstNs, sysErr := s.lockSrcAndDst(ctx, linkAttrs)
515+
if sysErr != nil {
516+
return sysErr
517+
}
518+
defer s.unlockSrcAndDst(ctx, dstNs)
428519

429520
ifname := ""
430521

@@ -439,7 +530,7 @@ func (s *Stack) newBridge(ctx context.Context, linkAttrs map[uint16]nlmsg.BytesV
439530
if err != nil {
440531
return syserr.TranslateNetstackError(err)
441532
}
442-
if err := s.setLinkLocked(ctx, id, linkAttrs); err != nil {
533+
if err := s.setLinkLocked(ctx, id, linkAttrs, dstNs); err != nil {
443534
return err
444535
}
445536

runsc/boot/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ go_library(
107107
"//pkg/sentry/strace",
108108
"//pkg/sentry/time",
109109
"//pkg/sentry/unimpl:unimplemented_syscall_go_proto",
110+
"//pkg/sentry/uniqueid",
110111
"//pkg/sentry/usage",
111112
"//pkg/sentry/vfs",
112113
"//pkg/sentry/watchdog",

0 commit comments

Comments
 (0)