@@ -47,6 +47,18 @@ type Stack struct {
4747 // linkMu serializes link creation, modification and deletion.
4848 // It is a rough parallel to the per-netns rtnl_mutex in Linux.
4949 linkMu netstackLinkMutex `state:"nosave"`
50+
51+ // id is a unique identifier for this stack, it is currently only
52+ // used for a deterministic lock ordering.
53+ id uint64
54+ }
55+
56+ // NewStack creates a new netstack.Stack which wraps the given tcpip.Stack.
57+ func NewStack (s * stack.Stack , id uint64 ) * Stack {
58+ return & Stack {
59+ Stack : s ,
60+ id : id ,
61+ }
5062}
5163
5264// AddInterfaceEventSubscriber implements inet.InterfaceEventPublisher.AddInterfaceEventSubscriber.
@@ -222,63 +234,132 @@ func (s *Stack) SetInterface(ctx context.Context, msg *nlmsg.Message) *syserr.Er
222234 // Netstack interfaces are always up.
223235 }
224236
225- s .linkMu .Lock ()
226- defer s .linkMu .Unlock ()
227- return s .setLinkLocked (ctx , tcpip .NICID (ifinfomsg .Index ), attrs )
237+ dstNs , err := s .lockSrcAndDst (ctx , attrs )
238+ if err != nil {
239+ return err
240+ }
241+ defer s .unlockSrcAndDst (ctx , dstNs )
242+ return s .setLinkLocked (ctx , tcpip .NICID (ifinfomsg .Index ), attrs , dstNs )
243+ }
244+
245+ // If the linkAttrs map contains IFLA_NET_NS_FD, locks the source and
246+ // destination stacks and returns the destination netns, which must
247+ // be passed into unlockSrcAndDst by the caller.
248+ //
249+ // If instead the linkAttrs map does not contain IFLA_NET_NS_FD, or if it
250+ // points to the same netns as the source stack, only locks the source stack
251+ // and returns nil. And if the netns fd is invalid, returns an error.
252+ func (s * Stack ) lockSrcAndDst (ctx context.Context , linkAttrs map [uint16 ]nlmsg.BytesView ) (* inet.Namespace , * syserr.Error ) {
253+ if linkAttrs == nil {
254+ s .linkMu .Lock ()
255+ return nil , nil
256+ }
257+ v , ok := linkAttrs [linux .IFLA_NET_NS_FD ]
258+ if ! ok {
259+ s .linkMu .Lock ()
260+ return nil , nil
261+ }
262+
263+ fd , ok := v .Uint32 ()
264+ if ! ok {
265+ return nil , syserr .ErrInvalidArgument
266+ }
267+ f := inet .NamespaceByFDFromContext (ctx )
268+ if f == nil {
269+ return nil , syserr .ErrInvalidArgument
270+ }
271+ ns , err := f (int32 (fd )) // ns.DecRef() is called in unlockSrcAndDst().
272+ if err != nil {
273+ return nil , syserr .FromError (err )
274+ }
275+
276+ dst := ns .Stack ().(* Stack )
277+ if s == dst {
278+ ns .DecRef (ctx )
279+ s .linkMu .Lock ()
280+ return nil , nil
281+ }
282+
283+ if s .id < dst .id {
284+ s .linkMu .Lock ()
285+ dst .linkMu .NestedLock (netstackLinkLockDeststack )
286+ } else {
287+ dst .linkMu .Lock ()
288+ s .linkMu .NestedLock (netstackLinkLockDeststack )
289+ }
290+ return ns , nil
291+ }
292+
293+ func (s * Stack ) unlockSrcAndDst (ctx context.Context , ns * inet.Namespace ) {
294+ if ns == nil {
295+ s .linkMu .Unlock ()
296+ return
297+ }
298+ dst := ns .Stack ().(* Stack )
299+ if s == dst {
300+ // This cannot happen because lockSrcAndDst() returns nil in this case.
301+ panic ("unlockSrcAndDst called with dst == src" )
302+ }
303+
304+ if s .id < dst .id {
305+ dst .linkMu .NestedUnlock (netstackLinkLockDeststack )
306+ s .linkMu .Unlock ()
307+ } else {
308+ s .linkMu .NestedUnlock (netstackLinkLockDeststack )
309+ dst .linkMu .Unlock ()
310+ }
311+ ns .DecRef (ctx )
228312}
229313
230- // precondition: s.linkLock is held.
231- func (s * Stack ) setLinkLocked (ctx context.Context , id tcpip.NICID , linkAttrs map [uint16 ]nlmsg.BytesView ) * syserr.Error {
232- oldNicInfo , ok := s .Stack .SingleNICInfo (id )
314+ // precondition: s.linkLock is held. If dstNs is not nil, dstNs.Stack.linkMu is
315+ // also held.
316+ func (s * Stack ) setLinkLocked (ctx context.Context , id tcpip.NICID , linkAttrs map [uint16 ]nlmsg.BytesView , dstNs * inet.Namespace ) * syserr.Error {
317+ src := s
318+ changed := false
319+ nicInfo , ok := src .Stack .SingleNICInfo (id )
233320 if ! ok {
234321 return syserr .ErrUnknownNICID
235322 }
236323
237- // IFLA_NET_NS_FD has to be handled first, because other parameters may be reset.
238- if v , ok := linkAttrs [linux .IFLA_NET_NS_FD ]; ok {
239- fd , ok := v .Uint32 ()
240- if ! ok {
241- return syserr .ErrInvalidArgument
242- }
243- f := inet .NamespaceByFDFromContext (ctx )
244- if f == nil {
245- return syserr .ErrInvalidArgument
246- }
247- ns , err := f (int32 (fd ))
324+ dst := src
325+ if dstNs != nil {
326+ dst = dstNs .Stack ().(* Stack )
327+ }
328+ if dst != src {
329+ var err tcpip.Error
330+ oldID := id
331+ id , err = src .Stack .SetNICStack (id , dst .Stack )
248332 if err != nil {
249- return syserr .FromError (err )
333+ return syserr .TranslateNetstackError (err )
250334 }
251- defer ns .DecRef (ctx )
252- peer := ns .Stack ().(* Stack )
253- if peer .Stack != s .Stack {
254- var err tcpip.Error
255- oldID := id
256335
257- id , err = s .Stack .SetNICStack (id , peer .Stack )
258- if err != nil {
259- return syserr .TranslateNetstackError (err )
260- }
336+ src .sendDeleteEvent (ctx , oldID , nicInfo ) // inform about exit from old ns
337+ changed = true // inform about entry into new ns
261338
262- s .sendDeleteEvent (ctx , oldID , oldNicInfo ) // inform about exit from old ns
263- peer .sendChangeEvent (ctx , id ) // inform about entry into new ns
264- // TODO: Once we support IFLA_LINK_NETNSID, we need to call sendChangeEvent on
265- // the peer interface if this interface is part of a veth pair.
339+ nicInfo , ok = dst .Stack .SingleNICInfo (id )
340+ if ! ok {
341+ // Because we hold dst.linkMu, this should never happen.
342+ ctx .Warningf ("Newly rehomed NIC %d not found in new stack %p" , id , dst .Stack )
343+ return syserr .ErrUnknownNICID
266344 }
345+ src = dst
346+
347+ // TODO (b/465141970): Once we support IFLA_LINK_NETNSID, we need to call sendChangeEvent on
348+ // the peer interface if this interface is part of a veth pair.
267349 }
268350
269- changed := false
270351 for t , v := range linkAttrs {
271352 switch t {
272353 case linux .IFLA_MASTER :
273354 master , ok := v .Uint32 ()
274355 if ! ok {
275356 return syserr .ErrInvalidArgument
276357 }
277- if mid , ok := s .Stack .GetNICCoordinatorID (id ); ok && mid == tcpip .NICID (master ) {
358+ if mid , ok := src .Stack .GetNICCoordinatorID (id ); ok && mid == tcpip .NICID (master ) {
278359 continue
279360 }
280361 if master != 0 {
281- if err := s .Stack .SetNICCoordinator (id , tcpip .NICID (master )); err != nil {
362+ if err := src .Stack .SetNICCoordinator (id , tcpip .NICID (master )); err != nil {
282363 return syserr .TranslateNetstackError (err )
283364 }
284365 changed = true
@@ -288,18 +369,18 @@ func (s *Stack) setLinkLocked(ctx context.Context, id tcpip.NICID, linkAttrs map
288369 return syserr .ErrInvalidArgument
289370 }
290371 addr := tcpip .LinkAddress (v )
291- if oldNicInfo .LinkAddress == addr {
372+ if nicInfo .LinkAddress == addr {
292373 continue
293374 }
294- if err := s .Stack .SetNICAddress (id , addr ); err != nil {
375+ if err := src .Stack .SetNICAddress (id , addr ); err != nil {
295376 return syserr .TranslateNetstackError (err )
296377 }
297378 changed = true
298379 case linux .IFLA_IFNAME :
299- if oldNicInfo .Name == v .String () {
380+ if nicInfo .Name == v .String () {
300381 continue
301382 }
302- if err := s .Stack .SetNICName (id , v .String ()); err != nil {
383+ if err := src .Stack .SetNICName (id , v .String ()); err != nil {
303384 return syserr .TranslateNetstackError (err )
304385 }
305386 changed = true
@@ -308,10 +389,10 @@ func (s *Stack) setLinkLocked(ctx context.Context, id tcpip.NICID, linkAttrs map
308389 if ! ok {
309390 return syserr .ErrInvalidArgument
310391 }
311- if oldNicInfo .MTU == mtu {
392+ if nicInfo .MTU == mtu {
312393 continue
313394 }
314- if err := s .Stack .SetNICMTU (id , mtu ); err != nil {
395+ if err := src .Stack .SetNICMTU (id , mtu ); err != nil {
315396 return syserr .TranslateNetstackError (err )
316397 }
317398 changed = true
@@ -321,7 +402,7 @@ func (s *Stack) setLinkLocked(ctx context.Context, id tcpip.NICID, linkAttrs map
321402 }
322403
323404 if changed {
324- s .sendChangeEvent (ctx , id )
405+ src .sendChangeEvent (ctx , id )
325406 }
326407 return nil
327408}
@@ -378,7 +459,10 @@ func (s *Stack) newVeth(ctx context.Context, linkAttrs map[uint16]nlmsg.BytesVie
378459 }
379460 }
380461
381- s .linkMu .Lock ()
462+ dstNs , sysErr := s .lockSrcAndDst (ctx , linkAttrs )
463+ if sysErr != nil {
464+ return sysErr
465+ }
382466 ep , peerEP := veth .NewPair (defaultMTU , veth .DefaultBacklogSize )
383467 id := s .Stack .NextNICID ()
384468 peerID := peerStack .Stack .NextNICID ()
@@ -389,21 +473,25 @@ func (s *Stack) newVeth(ctx context.Context, linkAttrs map[uint16]nlmsg.BytesVie
389473 Name : ifname ,
390474 })
391475 if err != nil {
392- s .linkMu . Unlock ( )
476+ s .unlockSrcAndDst ( ctx , dstNs )
393477 return syserr .TranslateNetstackError (err )
394478 }
395- if err := s .setLinkLocked (ctx , id , linkAttrs ); err != nil {
396- s .linkMu . Unlock ( )
479+ if err := s .setLinkLocked (ctx , id , linkAttrs , dstNs ); err != nil {
480+ s .unlockSrcAndDst ( ctx , dstNs )
397481 peerEP .Close ()
398482 return err
399483 }
400- s .linkMu . Unlock ( )
484+ s .unlockSrcAndDst ( ctx , dstNs )
401485
402486 if peerName == "" {
403487 peerName = fmt .Sprintf ("veth%d" , peerID )
404488 }
405- peerStack .linkMu .Lock ()
406- defer peerStack .linkMu .Unlock ()
489+ peerDstNs , sysErr := peerStack .lockSrcAndDst (ctx , peerLinkAttrs )
490+ if sysErr != nil {
491+ return sysErr
492+ }
493+ defer peerStack .unlockSrcAndDst (ctx , peerDstNs )
494+
407495 err = peerStack .Stack .CreateNICWithOptions (peerID , packetsocket .New (ethernet .New (peerEP )), stack.NICOptions {
408496 Name : peerName ,
409497 })
@@ -412,7 +500,7 @@ func (s *Stack) newVeth(ctx context.Context, linkAttrs map[uint16]nlmsg.BytesVie
412500 return syserr .TranslateNetstackError (err )
413501 }
414502 if peerLinkAttrs != nil {
415- if err := peerStack .setLinkLocked (ctx , peerID , peerLinkAttrs ); err != nil {
503+ if err := peerStack .setLinkLocked (ctx , peerID , peerLinkAttrs , peerDstNs ); err != nil {
416504 peerStack .Stack .RemoveNIC (peerID )
417505 peerEP .Close ()
418506 return err
@@ -423,8 +511,11 @@ func (s *Stack) newVeth(ctx context.Context, linkAttrs map[uint16]nlmsg.BytesVie
423511}
424512
425513func (s * Stack ) newBridge (ctx context.Context , linkAttrs map [uint16 ]nlmsg.BytesView , linkInfoAttrs map [uint16 ]nlmsg.BytesView ) * syserr.Error {
426- s .linkMu .Lock ()
427- defer s .linkMu .Unlock ()
514+ dstNs , sysErr := s .lockSrcAndDst (ctx , linkAttrs )
515+ if sysErr != nil {
516+ return sysErr
517+ }
518+ defer s .unlockSrcAndDst (ctx , dstNs )
428519
429520 ifname := ""
430521
@@ -439,7 +530,7 @@ func (s *Stack) newBridge(ctx context.Context, linkAttrs map[uint16]nlmsg.BytesV
439530 if err != nil {
440531 return syserr .TranslateNetstackError (err )
441532 }
442- if err := s .setLinkLocked (ctx , id , linkAttrs ); err != nil {
533+ if err := s .setLinkLocked (ctx , id , linkAttrs , dstNs ); err != nil {
443534 return err
444535 }
445536
0 commit comments