diff --git a/.changelog/27167.txt b/.changelog/27167.txt new file mode 100644 index 00000000000..9a005b5da1c --- /dev/null +++ b/.changelog/27167.txt @@ -0,0 +1,3 @@ +```release-note:bug +server: Fix a potential race condition when reloading the server agent when changing TLS configuration +``` diff --git a/nomad/rpc.go b/nomad/rpc.go index d2e7a359f13..4a997aa2b32 100644 --- a/nomad/rpc.go +++ b/nomad/rpc.go @@ -301,8 +301,8 @@ func (r *rpcHandler) handleConn(ctx context.Context, conn net.Conn, rpcCtx *RPCC } // Enforce TLS if EnableRPC is set - if r.srv.config.TLSConfig.EnableRPC && !rpcCtx.TLS && pool.RPCType(buf[0]) != pool.RpcTLS { - if !r.srv.config.TLSConfig.RPCUpgradeMode { + if r.srv.rpcTLSEnabled.Load() && !rpcCtx.TLS && pool.RPCType(buf[0]) != pool.RpcTLS { + if !r.srv.rpcTLSUpgradeMode.Load() { r.logger.Warn("non-TLS connection attempted with RequireTLS set", "remote_addr", conn.RemoteAddr()) conn.Close() return diff --git a/nomad/server.go b/nomad/server.go index 912bc2c1b22..5ac3b7af8b4 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -105,8 +105,22 @@ const ( // Server is Nomad server which manages the job queues, // schedulers, and notification bus for agents. type Server struct { + + // config holds the server configuration. No fields are not mutated after + // initialization except for the TLS config, so it is safe to access them + // without a lock. Any fields that become reloadable, must be carefully + // assessed for concurrent access. config *Config + // rpcTLSEnabled and rpcTLSUpgradeMode are used by the RPC connection + // handler to determine TLS enforcement. The values can change at runtime + // due to config reloading. + // + // The setRPCTLSAtomics helper function should be used to set these values + // as it correctly handles nil configs. + rpcTLSEnabled atomic.Bool + rpcTLSUpgradeMode atomic.Bool + logger log.InterceptLogger // Connection pool to other Nomad servers @@ -350,6 +364,8 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, consulConfigFunc // Create the server s := &Server{ config: config, + rpcTLSEnabled: atomic.Bool{}, + rpcTLSUpgradeMode: atomic.Bool{}, consulCatalog: consulCatalog, connPool: pool.NewPool(logger, serverRPCCache, serverMaxStreams, tlsWrap, config.RPCSessionConfig), logger: logger, @@ -372,6 +388,10 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, consulConfigFunc lockDelayTimer: lock.NewDelayTimer(), } + // Store the server TLS RPC settings that will be used by the connection + // handler. + s.setRPCTLSAtomics(config.TLSConfig) + s.shutdownCtx, s.shutdownCancel = context.WithCancel(context.Background()) s.shutdownCh = s.shutdownCtx.Done() @@ -690,16 +710,19 @@ func (s *Server) reloadTLSConnections(newTLSConfig *config.TLSConfig) error { s.tlsWrap = tlsWrap s.tlsWrapLock.Unlock() - // Keeping configuration in sync is important for other places that require - // access to config information, such as rpc.go, where we decide on what kind - // of network connections to accept depending on the server configuration + // Ensure the config object is updated. This is done to keep things in sync + // only; all access to TLS enforcement should be done via the atomics which + // are updated below. s.config.TLSConfig = newTLSConfig + // Update the RPC TLS atomics used for connection handling. + s.setRPCTLSAtomics(newTLSConfig) + // Kill any old listeners s.rpcCancel() // Update the authenticator, so any changes in TLS verification are applied. - s.auth.SetVerifyTLS(s.config.TLSConfig != nil && s.config.TLSConfig.EnableRPC && s.config.TLSConfig.VerifyServerHostname) + s.auth.SetVerifyTLS(s.config.TLSConfig != nil && s.rpcTLSEnabled.Load() && s.config.TLSConfig.VerifyServerHostname) s.rpcTLS = incomingTLS s.connPool.ReloadTLS(tlsWrap) @@ -2088,6 +2111,16 @@ func (s *Server) setReplyQueryMeta(stateStore *state.StateStore, table string, r return nil } +// setRPCTLSAtomics is a helper function to set the atomic values for RPC TLS +// handling based on the provided config. +// +// This should be used whenever the RPC TLS config is changed to ensure as it +// correctly handles nil configs which might otherwise be missed. +func (s *Server) setRPCTLSAtomics(cfg *config.TLSConfig) { + s.rpcTLSEnabled.Store(cfg != nil && cfg.EnableRPC) + s.rpcTLSUpgradeMode.Store(cfg != nil && cfg.RPCUpgradeMode) +} + // Region returns the region of the server func (s *Server) Region() string { return s.config.Region diff --git a/nomad/server_test.go b/nomad/server_test.go index ea490c6e48b..2a933b6fcfa 100644 --- a/nomad/server_test.go +++ b/nomad/server_test.go @@ -9,6 +9,7 @@ import ( "path" "runtime" "strings" + "sync/atomic" "testing" "time" @@ -441,7 +442,7 @@ func TestServer_Reload_TLSConnections_PlaintextToTLS_OnlyRPC(t *testing.T) { err := s1.reloadTLSConnections(newTLSConfig) assert.Nil(err) - assert.True(s1.config.TLSConfig.EnableRPC) + must.True(t, s1.rpcTLSEnabled.Load()) assert.True(s1.config.TLSConfig.CertificateInfoIsEqual(newTLSConfig)) codec := rpcClient(t, s1) @@ -702,3 +703,30 @@ func TestServer_PreventRaftDowngrade(t *testing.T) { // Downgrading Raft should prevent the server from starting. require.Error(t, err) } + +func TestServer_setRPCTLSAtomics(t *testing.T) { + ci.Parallel(t) + + testServer := &Server{ + rpcTLSEnabled: atomic.Bool{}, + rpcTLSUpgradeMode: atomic.Bool{}, + } + + testServer.setRPCTLSAtomics(nil) + must.False(t, testServer.rpcTLSEnabled.Load()) + must.False(t, testServer.rpcTLSUpgradeMode.Load()) + + testServer.setRPCTLSAtomics(&config.TLSConfig{ + EnableRPC: true, + RPCUpgradeMode: false, + }) + must.True(t, testServer.rpcTLSEnabled.Load()) + must.False(t, testServer.rpcTLSUpgradeMode.Load()) + + testServer.setRPCTLSAtomics(&config.TLSConfig{ + EnableRPC: true, + RPCUpgradeMode: true, + }) + must.True(t, testServer.rpcTLSEnabled.Load()) + must.True(t, testServer.rpcTLSUpgradeMode.Load()) +}