Graceful startup/shutdown + zero-downtime deployments (#469)

metachris · web-flow · commit c15287785b25 · 2023-06-20T16:19:56.000+02:00
* graceful startup + shutdown

* doc: example config

* opt-in api not sending bids during shutdown
diff --git a/README.md b/README.md
@@ -118,21 +118,23 @@ redis-cli DEL boost-relay/sepolia:validators-registration boost-relay/sepolia:va
 
 #### General
 
-* `ACTIVE_VALIDATOR_HOURS` - number of hours to track active proposers in redis (default: 3)
-* `API_TIMEOUT_READ_MS` - http read timeout in milliseconds (default: 1500)
-* `API_TIMEOUT_READHEADER_MS` - http read header timeout in milliseconds (default: 600)
-* `API_TIMEOUT_WRITE_MS` - http write timeout in milliseconds (default: 10000)
-* `API_TIMEOUT_IDLE_MS` - http idle timeout in milliseconds (default: 3000)
-* `API_MAX_HEADER_BYTES` - http maximum header byted (default: 60kb)
-* `BLOCKSIM_MAX_CONCURRENT` - maximum number of concurrent block-sim requests (0 for no maximum, default: 4)
-* `BLOCKSIM_TIMEOUT_MS` - builder block submission validation request timeout (default: 3000)
+* `ACTIVE_VALIDATOR_HOURS` - number of hours to track active proposers in redis (default: `3`)
+* `API_MAX_HEADER_BYTES` - http maximum header bytes (default: `60_000`)
+* `API_TIMEOUT_READ_MS` - http read timeout in milliseconds (default: `1_500`)
+* `API_TIMEOUT_READHEADER_MS` - http read header timeout in milliseconds (default: `600`)
+* `API_TIMEOUT_WRITE_MS` - http write timeout in milliseconds (default: `10_000`)
+* `API_TIMEOUT_IDLE_MS` - http idle timeout in milliseconds (default: `3_000`)
+* `API_SHUTDOWN_WAIT_SEC` - how long to wait on shutdown before stopping server, to allow draining of requests (default: `30`)
+* `API_SHUTDOWN_STOP_SENDING_BIDS` - whether API should stop sending bids during shutdown (nly useful in single-instance/testnet setups, default: `false`)
+* `BLOCKSIM_MAX_CONCURRENT` - maximum number of concurrent block-sim requests (0 for no maximum, default: `4`)
+* `BLOCKSIM_TIMEOUT_MS` - builder block submission validation request timeout (default: `3000`)
 * `DB_DONT_APPLY_SCHEMA` - disable applying DB schema on startup (useful for connecting data API to read-only replica)
 * `DB_TABLE_PREFIX` - prefix to use for db tables (default uses `dev`)
-* `GETPAYLOAD_RETRY_TIMEOUT_MS` - getPayload retry getting a payload if first try failed (default: 100)
+* `GETPAYLOAD_RETRY_TIMEOUT_MS` - getPayload retry getting a payload if first try failed (default: `100`)
 * `MEMCACHED_URIS` - optional comma separated list of memcached endpoints, typically used as secondary storage alongside Redis
-* `MEMCACHED_EXPIRY_SECONDS` - item expiry timeout when using memcache (default: 45)
-* `MEMCACHED_CLIENT_TIMEOUT_MS` - client timeout in milliseconds (default: 250)
-* `MEMCACHED_MAX_IDLE_CONNS` - client max idle conns (default: 10)
+* `MEMCACHED_EXPIRY_SECONDS` - item expiry timeout when using memcache (default: `45`)
+* `MEMCACHED_CLIENT_TIMEOUT_MS` - client timeout in milliseconds (default: `250`)
+* `MEMCACHED_MAX_IDLE_CONNS` - client max idle conns (default: `10`)
 * `NUM_ACTIVE_VALIDATOR_PROCESSORS` - proposer API - number of goroutines to listen to the active validators channel
 * `NUM_VALIDATOR_REG_PROCESSORS` - proposer API - number of goroutines to listen to the validator registration channel
 * `NO_HEADER_USERAGENTS` - proposer API - comma separated list of user agents for which no bids should be returned
@@ -181,7 +183,7 @@ By default, the execution payloads for all block submission are stored in Redis
 to provide redundant data availability for getPayload responses. But the database table is not pruned automatically,
 because it takes a lot of resources to rebuild the indexes (and a better option is using `TRUNCATE`).
 
-Storing all the payloads in the database can lead to terrabytes of data in this particular table. Now it's also possible
+Storing all the payloads in the database can lead to terabytes of data in this particular table. Now it's also possible
 to use memcached as a second data availability layer. Using memcached is optional and disabled by default.
 
 To enable memcached, you just need to supply the memcached URIs either via environment variable (i.e.
diff --git a/cmd/api.go b/cmd/api.go
@@ -139,7 +139,7 @@ var apiCmd = &cobra.Command{
 		}
 
 		log.Info("Setting up datastore...")
-		ds, err := datastore.NewDatastore(log, redis, mem, db)
+		ds, err := datastore.NewDatastore(redis, mem, db)
 		if err != nil {
 			log.WithError(err).Fatalf("Failed setting up prod datastore")
 		}
diff --git a/common/common.go b/common/common.go
@@ -19,6 +19,10 @@ var (
 	DurationPerEpoch = DurationPerSlot * time.Duration(SlotsPerEpoch)
 )
 
+func SlotToEpoch(slot uint64) uint64 {
+	return slot / SlotsPerEpoch
+}
+
 // HTTPServerTimeouts are various timeouts for requests to the mev-boost HTTP server
 type HTTPServerTimeouts struct {
 	Read       time.Duration // Timeout for body reads. None if 0.
diff --git a/common/test_utils.go b/common/test_utils.go
@@ -1,7 +1,9 @@
 package common
 
 import (
+	"bytes"
 	"compress/gzip"
+	"encoding/base64"
 	"encoding/json"
 	"io"
 	"os"
@@ -98,3 +100,16 @@ func LoadGzippedJSON(t *testing.T, filename string, dst any) {
 	err := json.Unmarshal(b, dst)
 	require.NoError(t, err)
 }
+
+func MustB64Gunzip(s string) []byte {
+	b, _ := base64.StdEncoding.DecodeString(s)
+	gzreader, err := gzip.NewReader(bytes.NewReader(b))
+	if err != nil {
+		panic(err)
+	}
+	output, err := io.ReadAll(gzreader)
+	if err != nil {
+		panic(err)
+	}
+	return output
+}
diff --git a/common/utils.go b/common/utils.go
@@ -11,8 +11,10 @@ import (
 	"math/big"
 	"net/http"
 	"os"
+	"strconv"
 	"strings"
 	"testing"
+	"time"
 
 	"github.com/attestantio/go-builder-client/api/capella"
 	v1 "github.com/attestantio/go-builder-client/api/v1"
@@ -233,3 +235,15 @@ func CreateTestBlockSubmission(t *testing.T, builderPubkey string, value *big.In
 
 	return payload, getPayloadResponse, getHeaderResponse
 }
+
+// GetEnvDurationSec returns the value of the environment variable as duration in seconds,
+// or defaultValue if the environment variable doesn't exist or is not a valid integer
+func GetEnvDurationSec(key string, defaultValueSec int) time.Duration {
+	if value, ok := os.LookupEnv(key); ok {
+		val, err := strconv.Atoi(value)
+		if err != nil {
+			return time.Duration(val) * time.Second
+		}
+	}
+	return time.Duration(defaultValueSec) * time.Second
+}
diff --git a/datastore/datastore.go b/datastore/datastore.go
@@ -35,8 +35,6 @@ type GetPayloadResponseKey struct {
 
 // Datastore provides a local memory cache with a Redis and DB backend
 type Datastore struct {
-	log *logrus.Entry
-
 	redis     *RedisCache
 	memcached *Memcached
 	db        database.IDatabaseService
@@ -46,11 +44,13 @@ type Datastore struct {
 	knownValidatorsLock       sync.RWMutex
 	knownValidatorsIsUpdating uberatomic.Bool
 	knownValidatorsLastSlot   uberatomic.Uint64
+
+	// Used for proposer-API readiness check
+	KnownValidatorsWasUpdated uberatomic.Bool
 }
 
-func NewDatastore(log *logrus.Entry, redisCache *RedisCache, memcached *Memcached, db database.IDatabaseService) (ds *Datastore, err error) {
+func NewDatastore(redisCache *RedisCache, memcached *Memcached, db database.IDatabaseService) (ds *Datastore, err error) {
 	ds = &Datastore{
-		log:                     log.WithField("component", "datastore"),
 		db:                      db,
 		memcached:               memcached,
 		redis:                   redisCache,
@@ -66,7 +66,7 @@ func NewDatastore(log *logrus.Entry, redisCache *RedisCache, memcached *Memcache
 // For the CL client this is an expensive operation and takes a bunch of resources.
 // This is why we schedule the requests for slot 4 and 20 of every epoch, 6 seconds
 // into the slot (on suggestion of @potuz). It's also run once at startup.
-func (ds *Datastore) RefreshKnownValidators(beaconClient beaconclient.IMultiBeaconClient, slot uint64) {
+func (ds *Datastore) RefreshKnownValidators(log *logrus.Entry, beaconClient beaconclient.IMultiBeaconClient, slot uint64) {
 	// Ensure there's only one at a time
 	if isAlreadyUpdating := ds.knownValidatorsIsUpdating.Swap(true); isAlreadyUpdating {
 		return
@@ -75,19 +75,19 @@ func (ds *Datastore) RefreshKnownValidators(beaconClient beaconclient.IMultiBeac
 
 	headSlotPos := common.SlotPos(slot) // 1-based position in epoch (32 slots, 1..32)
 	lastUpdateSlot := ds.knownValidatorsLastSlot.Load()
-	log := ds.log.WithFields(logrus.Fields{
-		"method":         "RefreshKnownValidators",
-		"headSlot":       slot,
-		"headSlotPos":    headSlotPos,
-		"lastUpdateSlot": lastUpdateSlot,
+	log = log.WithFields(logrus.Fields{
+		"datastoreMethod": "RefreshKnownValidators",
+		"headSlot":        slot,
+		"headSlotPos":     headSlotPos,
+		"lastUpdateSlot":  lastUpdateSlot,
 	})
 
 	// Only proceed if slot newer than last updated
 	if slot <= lastUpdateSlot {
 		return
 	}
 
-	// 	// Minimum amount of slots between updates
+	// Minimum amount of slots between updates
 	slotsSinceLastUpdate := slot - lastUpdateSlot
 	if slotsSinceLastUpdate < 6 {
 		return
@@ -143,6 +143,7 @@ func (ds *Datastore) RefreshKnownValidators(beaconClient beaconclient.IMultiBeac
 	ds.knownValidatorsByIndex = knownValidatorsByIndex
 	ds.knownValidatorsLock.Unlock()
 
+	ds.KnownValidatorsWasUpdated.Store(true)
 	log.Infof("known validators updated")
 }
 
@@ -189,13 +190,8 @@ func (ds *Datastore) SaveValidatorRegistration(entry types.SignedValidatorRegist
 }
 
 // GetGetPayloadResponse returns the getPayload response from memory or Redis or Database
-func (ds *Datastore) GetGetPayloadResponse(slot uint64, proposerPubkey, blockHash string) (*common.VersionedExecutionPayload, error) {
-	log := ds.log.WithFields(logrus.Fields{
-		"method":         "GetGetPayloadResponse",
-		"slot":           slot,
-		"proposerPubkey": proposerPubkey,
-		"blockHash":      blockHash,
-	})
+func (ds *Datastore) GetGetPayloadResponse(log *logrus.Entry, slot uint64, proposerPubkey, blockHash string) (*common.VersionedExecutionPayload, error) {
+	log = log.WithField("datastoreMethod", "GetGetPayloadResponse")
 	_proposerPubkey := strings.ToLower(proposerPubkey)
 	_blockHash := strings.ToLower(blockHash)
 
diff --git a/datastore/datastore_test.go b/datastore/datastore_test.go
@@ -18,15 +18,15 @@ func setupTestDatastore(t *testing.T, mockDB *database.MockDB) *Datastore {
 	redisDs, err := NewRedisCache("", redisTestServer.Addr(), "")
 	require.NoError(t, err)
 
-	ds, err := NewDatastore(common.TestLog, redisDs, nil, mockDB)
+	ds, err := NewDatastore(redisDs, nil, mockDB)
 	require.NoError(t, err)
 
 	return ds
 }
 
 func TestGetPayloadFailure(t *testing.T) {
 	ds := setupTestDatastore(t, &database.MockDB{})
-	_, err := ds.GetGetPayloadResponse(1, "a", "b")
+	_, err := ds.GetGetPayloadResponse(common.TestLog, 1, "a", "b")
 	require.Error(t, ErrExecutionPayloadNotFound, err)
 }
 
@@ -44,7 +44,7 @@ func TestGetPayloadDatabaseFallback(t *testing.T) {
 		},
 	}
 	ds := setupTestDatastore(t, mockDB)
-	payload, err := ds.GetGetPayloadResponse(1, "a", "b")
+	payload, err := ds.GetGetPayloadResponse(common.TestLog, 1, "a", "b")
 	require.NoError(t, err)
 	require.Equal(t, "0x1bafdc454116b605005364976b134d761dd736cb4788d25c835783b46daeb121", payload.Capella.Capella.BlockHash.String())
 }
diff --git a/docs/docs/20230619-startup-shutdown-zero-downtime-deployments.md b/docs/docs/20230619-startup-shutdown-zero-downtime-deployments.md
@@ -0,0 +1,115 @@
+# On graceful service startup and shutdown, and zero-downtime deployments
+
+2023-06-19, by [@metachris](https://twitter.com/metachris)
+
+---
+
+This document explains the details of API service startup and shutdown behavior, in particular related to:
+- Proposer API
+  - Needing data before being able to handle `getPayload` requests (known validators)
+  - Draining getPayload and other requests before shutting down
+- Zero-downtime deployments
+
+---
+
+### TL;DR
+
+- We've added two endpoints: `/livez` and `/readyz` (per [k8s docs](https://kubernetes.io/docs/reference/using-api/health-checks/)):
+- On startup:
+    - `/livez` is immediately available and positive, and will stay so until the service is shut down
+    - `/readyz` starts negative, until all information is loaded to safely process requests (known validators for the proposer API)
+    - Configure your orchestration tooling to route traffic to the service only if and when `/readyz` is positive!
+- On shutdown:
+    - `/readyz` returns a negative result
+    - Wait a little and drain all requests
+    - Stop the webserver, and stop the program
+- See also: https://kubernetes.io/docs/reference/using-api/health-checks/
+
+---
+
+### Kubernetes background about health-checks
+
+There are three types of health-checks (probes): [k8s docs](https://kubernetes.io/docs/reference/using-api/health-checks/)
+
+1. Startup probe
+2. Liveness probe (`/livez`)
+3. Readiness probe (`/readyz`)
+
+(All of these can be HTTP requests or commands)
+
+1. startup check:
+    - only for the startup phase
+    - confirm that pod has started
+    - if it fails, k8s will destroy and recreate
+2. liveness check:
+    - indicated whether the service is alive. if `false`, then k8s should destroy & recreate the pods
+    - based on rules, timeouts, etc
+    - status exposed via `/livez`
+3. readiness check:
+    - Applications may be temporarily unable to serve traffic.
+    - An application might need to load large data or configuration files during startup or depend on external services after startup.
+    - In such cases, you don't want to kill the application, but you don't want to send it requests either.
+    - https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes
+    - status exposed via `/readyz`
+    - if that is `false`, then k8s will stop sending traffic to that pod but doesn't touch it otherwise
+
+---
+
+### API Startup + Shutdown Sequence
+
+The proposer API needs to load all known validators before serving traffic, otherwise, there's a risk of missed slots due to `getPayload` not having all the information it needs to succeed.
+
+**Correct startup sequence:**
+1. Service starts
+2. Does minimal initial checks
+3. Starts HTTP server (`live=true`, `ready=false`)
+4. Updates known validators from CL client (can take 10-30 sec)
+5. Sets `ready=true`, and starts receiving traffic
+
+At this point, the pod is operational and can service traffic.
+
+**Correct shutdown sequence:**
+
+1. Shutdown initiated (through signals `syscall.SIGINT` or `syscall.SIGTERM`)
+2. Set `ready=false` to stop receiving new traffic
+3. Wait some time
+4. Drain pending requests
+5. Shut down (setting `live=false` is not necessary anymore)
+
+
+---
+
+### Example k8s + AWS configuration
+
+```yaml
+ metadata:
+   name: boost-relay-api-proposer
++  annotations:
++    alb.ingress.kubernetes.io/healthcheck-interval-seconds: 10
++    alb.ingress.kubernetes.io/healthcheck-path: /readyz
++    alb.ingress.kubernetes.io/healthcheck-port: 8080
+ spec:
+  template:
+    spec:
+      containers:
+        - name: boost-relay-api-proposer
++          livenessProbe:
++            httpGet:
++              path: /livez
++              port: 8080
++              initialDelaySeconds: 5
++          readinessProbe:
++            httpGet:
++              path: /readyz
++              port: 8080
++              initialDelaySeconds: 30
+```
+
+---
+
+See also:
+
+- https://kubernetes.io/docs/reference/using-api/health-checks/
+- https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
+- https://komodor.com/blog/kubernetes-health-checks-everything-you-need-to-know/
+- https://kubernetes-sigs.github.io/aws-load-balancer-controller/v2.2/guide/ingress/annotations/
diff --git a/services/api/optimistic_test.go b/services/api/optimistic_test.go
@@ -118,7 +118,7 @@ func startTestBackend(t *testing.T) (*phase0.BLSPubKey, *bls.SecretKey, *testBac
 	require.NoError(t, err)
 	mockRedis, err := datastore.NewRedisCache("", redisTestServer.Addr(), "")
 	require.NoError(t, err)
-	mockDS, err := datastore.NewDatastore(backend.relay.log, mockRedis, nil, mockDB)
+	mockDS, err := datastore.NewDatastore(mockRedis, nil, mockDB)
 	require.NoError(t, err)
 
 	backend.relay.datastore = mockDS
diff --git a/services/api/service.go b/services/api/service.go
diff --git a/services/api/service_test.go b/services/api/service_test.go
diff --git a/services/api/types.go b/services/api/types.go

Original file line number	Diff line number	Diff line change
`@@ -139,7 +139,7 @@ var apiCmd = &cobra.Command{`
`139`	`139`	`}`
`140`	`140`
`141`	`141`	`log.Info("Setting up datastore...")`
`142`		`- ds, err := datastore.NewDatastore(log, redis, mem, db)`
	`142`	`+ ds, err := datastore.NewDatastore(redis, mem, db)`
`143`	`143`	`if err != nil {`
`144`	`144`	`log.WithError(err).Fatalf("Failed setting up prod datastore")`
`145`	`145`	`}`
Original file line number	Diff line number	Diff line change
`@@ -18,15 +18,15 @@ func setupTestDatastore(t testing.T, mockDB database.MockDB) *Datastore {`
`18`	`18`	`redisDs, err := NewRedisCache("", redisTestServer.Addr(), "")`
`19`	`19`	`require.NoError(t, err)`
`20`	`20`
`21`		`- ds, err := NewDatastore(common.TestLog, redisDs, nil, mockDB)`
	`21`	`+ ds, err := NewDatastore(redisDs, nil, mockDB)`
`22`	`22`	`require.NoError(t, err)`
`23`	`23`
`24`	`24`	`return ds`
`25`	`25`	`}`
`26`	`26`
`27`	`27`	`func TestGetPayloadFailure(t *testing.T) {`
`28`	`28`	`ds := setupTestDatastore(t, &database.MockDB{})`
`29`		`- _, err := ds.GetGetPayloadResponse(1, "a", "b")`
	`29`	`+ _, err := ds.GetGetPayloadResponse(common.TestLog, 1, "a", "b")`
`30`	`30`	`require.Error(t, ErrExecutionPayloadNotFound, err)`
`31`	`31`	`}`
`32`	`32`
`@@ -44,7 +44,7 @@ func TestGetPayloadDatabaseFallback(t *testing.T) {`
`44`	`44`	`},`
`45`	`45`	`}`
`46`	`46`	`ds := setupTestDatastore(t, mockDB)`
`47`		`- payload, err := ds.GetGetPayloadResponse(1, "a", "b")`
	`47`	`+ payload, err := ds.GetGetPayloadResponse(common.TestLog, 1, "a", "b")`
`48`	`48`	`require.NoError(t, err)`
`49`	`49`	`require.Equal(t, "0x1bafdc454116b605005364976b134d761dd736cb4788d25c835783b46daeb121", payload.Capella.Capella.BlockHash.String())`
`50`	`50`	`}`