|
| 1 | +// Package sqlretry provides a shared retry policy for transient SQL transaction |
| 2 | +// failures (serialization failures / deadlocks under SERIALIZABLE isolation). |
| 3 | +// |
| 4 | +// It is used by both SQL-backed storages — storage/sql (raw database/sql) and |
| 5 | +// storage/ent (the ent ORM) — which both run refresh-token rotation in a |
| 6 | +// SERIALIZABLE transaction and must be prepared to retry transactions the |
| 7 | +// database aborts under concurrency. |
| 8 | +package sqlretry |
| 9 | + |
| 10 | +import ( |
| 11 | + "errors" |
| 12 | + "math/rand" |
| 13 | + "time" |
| 14 | + |
| 15 | + "github.com/go-sql-driver/mysql" |
| 16 | + "github.com/lib/pq" |
| 17 | +) |
| 18 | + |
| 19 | +// MaxRetries is the maximum number of retries; the initial attempt is not |
| 20 | +// counted. Postgres requires applications to be prepared to retry transactions |
| 21 | +// aborted with SQLSTATE 40001; see |
| 22 | +// https://www.postgresql.org/docs/current/transaction-iso.html. |
| 23 | +// |
| 24 | +// 8 retries comfortably absorbs realistic refresh-token contention; combined |
| 25 | +// with jittered backoff — which de-synchronizes the retrying transactions so |
| 26 | +// effective concurrency at any instant stays low — it also survives pathological |
| 27 | +// high-contention conformance tests, while bounding worst-case latency. |
| 28 | +const MaxRetries = 8 |
| 29 | + |
| 30 | +// backoffMs is the per-attempt base backoff in milliseconds. The last element is |
| 31 | +// reused for any attempt beyond its length; a random jitter of up to the same |
| 32 | +// magnitude is added on top to de-synchronize retrying transactions (avoid a |
| 33 | +// thundering herd). |
| 34 | +var backoffMs = []int{5, 10, 25, 50, 100} |
| 35 | + |
| 36 | +const ( |
| 37 | + pgErrSerializationFailure = "40001" // serialization_failure |
| 38 | + pgErrDeadlockDetected = "40P01" // deadlock_detected |
| 39 | + mysqlErrLockDeadlock = 1213 // ER_LOCK_DEADLOCK |
| 40 | + mysqlErrLockWaitTimeout = 1205 // ER_LOCK_WAIT_TIMEOUT |
| 41 | +) |
| 42 | + |
| 43 | +// IsSerializationFailure reports whether err is a transient transaction failure |
| 44 | +// (serialization failure or deadlock) that is safe to retry by re-running the |
| 45 | +// whole transaction. It understands both the lib/pq and go-sql-driver/mysql |
| 46 | +// error types, unwrapping the error chain with errors.As. |
| 47 | +func IsSerializationFailure(err error) bool { |
| 48 | + var pqErr *pq.Error |
| 49 | + if errors.As(err, &pqErr) { |
| 50 | + return pqErr.Code == pgErrSerializationFailure || pqErr.Code == pgErrDeadlockDetected |
| 51 | + } |
| 52 | + |
| 53 | + var myErr *mysql.MySQLError |
| 54 | + if errors.As(err, &myErr) { |
| 55 | + return myErr.Number == mysqlErrLockDeadlock || myErr.Number == mysqlErrLockWaitTimeout |
| 56 | + } |
| 57 | + |
| 58 | + return false |
| 59 | +} |
| 60 | + |
| 61 | +// Do runs fn, retrying the whole operation on transient serialization/deadlock |
| 62 | +// failures with bounded, jittered backoff. Retrying is safe only when fn opens a |
| 63 | +// fresh transaction and re-reads current state on each attempt. |
| 64 | +// |
| 65 | +// isRetryable classifies an error as transient; if nil, no retries are performed |
| 66 | +// (used by backends, such as SQLite, that don't surface serialization failures). |
| 67 | +// onRetry, if non-nil, is called before each backoff sleep with the upcoming |
| 68 | +// (1-based) attempt number and the error that triggered the retry — e.g. for |
| 69 | +// logging. |
| 70 | +func Do(fn func() error, isRetryable func(error) bool, onRetry func(attempt int, err error)) error { |
| 71 | + for attempt := 0; ; attempt++ { |
| 72 | + err := fn() |
| 73 | + if err == nil || isRetryable == nil || !isRetryable(err) || attempt >= MaxRetries { |
| 74 | + return err |
| 75 | + } |
| 76 | + |
| 77 | + if onRetry != nil { |
| 78 | + onRetry(attempt+1, err) |
| 79 | + } |
| 80 | + |
| 81 | + backoff := backoffMs[min(attempt, len(backoffMs)-1)] |
| 82 | + time.Sleep(time.Duration(backoff+rand.Intn(backoff+1)) * time.Millisecond) |
| 83 | + } |
| 84 | +} |
0 commit comments