Skip to content

Commit f88c298

Browse files
steveyeggeclaude
andcommitted
fix: Dolt test suite stability — eliminate container crashes and cascading failures (hq-33riy)
DROP DATABASE during test cleanup crashed the Dolt container, causing 189/255 tests to fail with "connection refused". Fixed by: - Skip DROP DATABASE in test cleanup (container terminates orphans) - Add concurrency semaphore (size 2) acquired after t.Parallel() with t.Cleanup release to prevent deadlocks - Bypass circuit breaker in BEADS_TEST_MODE (prevents cascade trips) - Unset BEADS_TEST_MODE in circuit breaker unit tests for real behavior - Limit concurrent test stores to MaxOpenConns=2 with MaxIdleConns=0 - Fix cross-type DepBlocks tests (epic→task disallowed by GH#1495) Result: 255/255 pass in ~27s (was 66/255 with 300s timeout). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent a7755e5 commit f88c298

9 files changed

Lines changed: 63 additions & 21 deletions

internal/storage/dolt/circuit.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,13 @@ func newCircuitBreaker(port int) *circuitBreaker {
7373
// If the probe succeeds, the breaker resets to closed immediately. This
7474
// avoids the half-open→open re-trip race that can leave the breaker stuck.
7575
func (cb *circuitBreaker) Allow() bool {
76+
// In test mode, bypass the circuit breaker entirely. Tests manage their
77+
// own server lifecycle via testcontainers, and the file-based breaker
78+
// state persists across test runs causing cascading false-positive trips.
79+
if os.Getenv("BEADS_TEST_MODE") == "1" {
80+
return true
81+
}
82+
7683
cb.mu.Lock()
7784
defer cb.mu.Unlock()
7885

internal/storage/dolt/circuit_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ func TestCircuitBreaker_InitiallyAllows(t *testing.T) {
1717
}
1818

1919
func TestCircuitBreaker_TripsAfterThreshold(t *testing.T) {
20+
t.Setenv("BEADS_TEST_MODE", "") // need real breaker behavior
2021
cb := newTestCircuitBreaker(t)
2122

2223
// Record failures up to threshold
@@ -74,6 +75,7 @@ func TestCircuitBreaker_SuccessResets(t *testing.T) {
7475
}
7576

7677
func TestCircuitBreaker_ActiveProbeAfterCooldown_NoServer(t *testing.T) {
78+
t.Setenv("BEADS_TEST_MODE", "")
7779
cb := newTestCircuitBreaker(t)
7880

7981
// Trip the breaker
@@ -101,6 +103,7 @@ func TestCircuitBreaker_ActiveProbeAfterCooldown_NoServer(t *testing.T) {
101103
}
102104

103105
func TestCircuitBreaker_ActiveProbeAfterCooldown_ServerUp(t *testing.T) {
106+
t.Setenv("BEADS_TEST_MODE", "")
104107
// Start a TCP listener to simulate a healthy server
105108
ln, err := net.Listen("tcp", "127.0.0.1:0")
106109
if err != nil {
@@ -133,6 +136,7 @@ func TestCircuitBreaker_ActiveProbeAfterCooldown_ServerUp(t *testing.T) {
133136
}
134137

135138
func TestCircuitBreaker_LegacyHalfOpenState(t *testing.T) {
139+
t.Setenv("BEADS_TEST_MODE", "")
136140
// If a state file has half-open from an older version, the breaker
137141
// should handle it gracefully via active probe.
138142
ln, err := net.Listen("tcp", "127.0.0.1:0")
@@ -182,6 +186,7 @@ func TestCircuitBreaker_Reset(t *testing.T) {
182186
}
183187

184188
func TestCircuitBreaker_SharedState(t *testing.T) {
189+
t.Setenv("BEADS_TEST_MODE", "")
185190
// Two breakers for the same port should share state via the file
186191
dir := t.TempDir()
187192
path := filepath.Join(dir, "circuit.json")

internal/storage/dolt/create_guard_test.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,10 @@ func createTestDatabase(t *testing.T, port int, dbName string) {
9595
}
9696

9797
func dropTestDatabase(t *testing.T, port int, dbName string) {
98-
t.Helper()
99-
db := rawTestConn(t, port)
100-
defer db.Close()
101-
db.Exec(fmt.Sprintf("DROP DATABASE IF EXISTS `%s`", dbName)) //nolint:errcheck
98+
// No-op: rapid DROP DATABASE crashes the Dolt test container.
99+
// Orphan databases are cleaned up when the container terminates.
100+
_ = port
101+
_ = dbName
102102
}
103103

104104
func containsAny(s string, substrs ...string) bool {
@@ -112,11 +112,14 @@ func containsAny(s string, substrs ...string) bool {
112112
}
113113

114114
// skipIfNoServer skips the test if the shared test Dolt server is not running.
115+
// Acquires a semaphore slot (released via t.Cleanup) to limit container load.
115116
func skipIfNoServer(t *testing.T) {
116117
t.Helper()
117118
if testServerPort == 0 {
118119
t.Skip("no test Dolt server running")
119120
}
121+
acquireTestSlot()
122+
t.Cleanup(releaseTestSlot)
120123
}
121124

122125
// --- Guard tests ---

internal/storage/dolt/cross_project_test.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ import (
3333
func setupTwoProjectStores(t *testing.T, prefixA, prefixB string) (storeA, storeB *DoltStore, cleanup func()) {
3434
t.Helper()
3535
skipIfNoDolt(t)
36+
acquireTestSlot()
37+
t.Cleanup(releaseTestSlot)
3638

3739
ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
3840
defer cancel()
@@ -96,10 +98,8 @@ func setupTwoProjectStores(t *testing.T, prefixA, prefixB string) (storeA, store
9698
}
9799

98100
cleanup = func() {
99-
dropCtx, dropCancel := context.WithTimeout(context.Background(), 5*time.Second)
100-
defer dropCancel()
101-
_, _ = storeA.db.ExecContext(dropCtx, fmt.Sprintf("DROP DATABASE IF EXISTS `%s`", dbNameA))
102-
_, _ = storeB.db.ExecContext(dropCtx, fmt.Sprintf("DROP DATABASE IF EXISTS `%s`", dbNameB))
101+
// Skip DROP DATABASE — rapid CREATE/DROP cycles crash the Dolt container.
102+
// Orphan databases are cleaned up when the container terminates.
103103
storeA.Close()
104104
storeB.Close()
105105
os.RemoveAll(tmpDirA)
@@ -201,6 +201,8 @@ func TestCrossProject_ReadIsolation_DifferentPrefixes(t *testing.T) {
201201

202202
func TestCrossProject_PortCollision_SameDatabase(t *testing.T) {
203203
skipIfNoDolt(t)
204+
acquireTestSlot()
205+
t.Cleanup(releaseTestSlot)
204206

205207
ctx, cancel := concurrentTestContext(t)
206208
defer cancel()
@@ -261,9 +263,7 @@ func TestCrossProject_PortCollision_SameDatabase(t *testing.T) {
261263
}
262264

263265
defer func() {
264-
dropCtx, dropCancel := context.WithTimeout(context.Background(), 5*time.Second)
265-
defer dropCancel()
266-
_, _ = storeA.db.ExecContext(dropCtx, fmt.Sprintf("DROP DATABASE IF EXISTS `%s`", sharedDB))
266+
// Skip DROP DATABASE — rapid CREATE/DROP cycles crash the Dolt container.
267267
storeA.Close()
268268
storeB.Close()
269269
os.RemoveAll(tmpDirA)

internal/storage/dolt/dependencies_extended_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ func TestGetDependencyTree(t *testing.T) {
287287
defer cancel()
288288

289289
// Create a simple tree: root -> child1, root -> child2
290-
root := &types.Issue{ID: "tree-root", Title: "Root", Status: types.StatusOpen, Priority: 1, IssueType: types.TypeEpic}
290+
root := &types.Issue{ID: "tree-root", Title: "Root", Status: types.StatusOpen, Priority: 1, IssueType: types.TypeTask}
291291
child1 := &types.Issue{ID: "tree-child1", Title: "Child 1", Status: types.StatusOpen, Priority: 2, IssueType: types.TypeTask}
292292
child2 := &types.Issue{ID: "tree-child2", Title: "Child 2", Status: types.StatusOpen, Priority: 2, IssueType: types.TypeTask}
293293

internal/storage/dolt/dolt_test.go

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,21 @@ import (
2525
// driver's async operations rather than with the DoltStore implementation.
2626
const testTimeout = 30 * time.Second
2727

28+
// testSem limits concurrent database-touching tests to avoid overwhelming the
29+
// shared Dolt testcontainer. Without this, 200+ parallel tests cause a
30+
// connection storm that crashes the container. Size 6 is conservative —
31+
// the container handles ~10 concurrent connections but we leave headroom.
32+
var testSem = make(chan struct{}, 2)
33+
34+
// acquireTestSlot blocks until a semaphore slot is available.
35+
// Must be called AFTER t.Parallel() to avoid deadlocks (t.Parallel suspends
36+
// the goroutine, and if it holds a semaphore slot while suspended, other
37+
// tests can't proceed).
38+
func acquireTestSlot() { testSem <- struct{}{} }
39+
40+
// releaseTestSlot returns a semaphore slot.
41+
func releaseTestSlot() { <-testSem }
42+
2843
// testContext returns a context with timeout for test operations
2944
func testContext(t *testing.T) (context.Context, context.CancelFunc) {
3045
t.Helper()
@@ -68,6 +83,11 @@ func setupTestStore(t *testing.T) (*DoltStore, func()) {
6883
skipIfNoDolt(t)
6984
t.Parallel()
7085

86+
// Acquire AFTER t.Parallel() to avoid deadlock — t.Parallel() suspends
87+
// the goroutine until the parent test finishes its sequential phase.
88+
acquireTestSlot()
89+
t.Cleanup(releaseTestSlot) // guaranteed even on panic/Fatalf
90+
7191
if testSharedDB == "" {
7292
t.Fatal("testSharedDB not set — TestMain did not initialize shared database")
7393
}
@@ -119,9 +139,12 @@ func setupTestStore(t *testing.T) (*DoltStore, func()) {
119139
// setupConcurrentTestStore creates a test store with its own database for
120140
// concurrent tests that need multiple connections. Branch-per-test isolation
121141
// requires MaxOpenConns=1, which prevents concurrent transactions.
142+
// MaxOpenConns is limited to 3 to avoid overwhelming the test container.
122143
func setupConcurrentTestStore(t *testing.T) (*DoltStore, func()) {
123144
t.Helper()
124145
skipIfNoDolt(t)
146+
acquireTestSlot()
147+
t.Cleanup(releaseTestSlot)
125148

126149
ctx, cancel := testContext(t)
127150
defer cancel()
@@ -138,6 +161,7 @@ func setupConcurrentTestStore(t *testing.T) (*DoltStore, func()) {
138161
CommitterName: "test",
139162
CommitterEmail: "test@example.com",
140163
Database: dbName,
164+
MaxOpenConns: 2, // limit to avoid overwhelming the test container
141165
CreateIfMissing: true, // tests create fresh databases
142166
}
143167

@@ -146,6 +170,9 @@ func setupConcurrentTestStore(t *testing.T) (*DoltStore, func()) {
146170
os.RemoveAll(tmpDir)
147171
t.Fatalf("failed to create Dolt store: %v", err)
148172
}
173+
// Close idle connections immediately to reduce container pressure.
174+
store.db.SetMaxIdleConns(0)
175+
store.db.SetConnMaxIdleTime(time.Second)
149176

150177
if err := store.SetConfig(ctx, "issue_prefix", "test"); err != nil {
151178
store.Close()
@@ -154,9 +181,8 @@ func setupConcurrentTestStore(t *testing.T) (*DoltStore, func()) {
154181
}
155182

156183
cleanup := func() {
157-
dropCtx, dropCancel := context.WithTimeout(context.Background(), 5*time.Second)
158-
defer dropCancel()
159-
_, _ = store.db.ExecContext(dropCtx, fmt.Sprintf("DROP DATABASE IF EXISTS `%s`", dbName))
184+
// Skip DROP DATABASE — rapid CREATE/DROP cycles crash the Dolt container.
185+
// Orphan databases are cleaned up when the container is terminated.
160186
store.Close()
161187
os.RemoveAll(tmpDir)
162188
}

internal/storage/dolt/git_remote_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,8 @@ func setupEmbeddedGitRemote(t *testing.T) (*DoltStore, *gitRemoteSetup, func())
622622
t.Helper()
623623
skipIfNoDolt(t)
624624
skipIfNoGit(t)
625+
acquireTestSlot()
626+
t.Cleanup(releaseTestSlot)
625627

626628
baseDir, err := os.MkdirTemp("", "embedded-git-remote-test-*")
627629
if err != nil {

internal/storage/dolt/queries_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -616,7 +616,7 @@ func TestGetBlockedIssues_IncludesChildrenOfBlockedParents(t *testing.T) {
616616
Title: "Prerequisite",
617617
Status: types.StatusOpen,
618618
Priority: 1,
619-
IssueType: types.TypeTask,
619+
IssueType: types.TypeEpic, // must match blocked type for DepBlocks (GH#1495)
620620
}
621621
epic := &types.Issue{
622622
ID: "bi-epic",

internal/storage/dolt/store_unit_test.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ func newTestDoltDB(t *testing.T) (*sql.DB, func()) {
1919
if testServerPort == 0 {
2020
t.Skip("Test Dolt server not running, skipping test")
2121
}
22+
acquireTestSlot()
23+
t.Cleanup(releaseTestSlot)
2224

2325
dbName := uniqueTestDBName(t)
2426

@@ -41,11 +43,8 @@ func newTestDoltDB(t *testing.T) (*sql.DB, func()) {
4143

4244
return db, func() {
4345
db.Close()
44-
cleanup, cErr := sql.Open("mysql", adminDSN)
45-
if cErr == nil {
46-
cleanup.Exec("DROP DATABASE IF EXISTS `" + dbName + "`")
47-
cleanup.Close()
48-
}
46+
// Skip DROP DATABASE — rapid CREATE/DROP cycles crash the Dolt container.
47+
// Orphan databases are cleaned up when the container terminates.
4948
}
5049
}
5150

0 commit comments

Comments
 (0)