Skip to content

Commit 3e04b59

Browse files
tippmar-nrclaude
andauthored
ci: fix container integration test failures (#3499)
* ci: add LocalStack health check and improve Docker cleanup resilience Add a health check to the LocalStack service in docker-compose-awssdk.yml so the test app waits for LocalStack to be ready before starting. This fixes intermittent AWS SDK container test failures (SQS, Kinesis, Firehose) caused by the test app making requests before LocalStack finished initializing. Also improve Docker cleanup in ContainerApplication.cs: - Skip manual container/network removal when compose down succeeds - Remove duplicate network cleanup in PrepareForStart - Skip network inspect during pre-start diagnostics to reduce log noise Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * ci: serialize AWS SDK container tests to prevent resource contention Add [Collection("AwsSdkTests")] to all AWS SDK container test classes so they run sequentially instead of in parallel. Each test spins up its own LocalStack + DynamoDB + test app via docker compose, and running 6+ LocalStack containers simultaneously on CI runners causes resource exhaustion (LocalStack exits with code 55). Other container tests (Kafka, Memcached, smoke tests) are unaffected and continue to run in parallel. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * ci: pin LocalStack to 4.14.0 — stable tag broken by upstream change The localstack/localstack:stable tag was updated on 2026-03-23 to a new CalVer release (2026.03.0) that crashes on startup with exit code 55. The LocalStack GitHub repo was archived the same day, suggesting a major licensing/model change. Pin to the last known working version (4.14.0) to restore CI stability. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * ci: serialize Memcached and Kafka container tests Same resource contention fix as AWS SDK tests — add [Collection] to Memcached and Kafka test classes so each pair of DotNet8/DotNet10 tests runs sequentially. Prevents two instances of the same dependency service (memcached-server, kafka-broker) from competing for resources on CI. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 9700b5f commit 3e04b59

9 files changed

Lines changed: 71 additions & 41 deletions

File tree

tests/Agent/IntegrationTests/ContainerApplications/docker-compose-awssdk.yml

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
services:
2525
localstack:
26-
image: localstack/localstack:stable
26+
image: localstack/localstack:4.14.0@sha256:3ebc37595918b8accb852f8048fef2aff047d465167edd655528065b07bc364a
2727
expose: # ports are only available internal to the service, not external so there's no chance for conflicts
2828
- "4566" # LocalStack Gateway
2929
- "4559" # external services port range
@@ -35,18 +35,26 @@ services:
3535
volumes:
3636
- "${LOCALSTACK_VOLUME_DIR:-./volume}:/var/lib/localstack"
3737
- "/var/run/docker.sock:/var/run/docker.sock"
38+
healthcheck:
39+
test: ["CMD", "curl", "-f", "http://localhost:4566/_localstack/health"]
40+
interval: 5s
41+
timeout: 5s
42+
retries: 10
43+
start_period: 10s
3844

3945
dynamodb:
4046
command: "-jar DynamoDBLocal.jar -inMemory"
4147
image: "amazon/dynamodb-local:latest"
4248
expose: # ports are only available internal to the service, not external so there's no chance for conflicts
4349
- "8000"
4450
working_dir: /home/dynamodblocal
45-
51+
4652
awssdktestapp:
4753
depends_on:
48-
- localstack
49-
- dynamodb
54+
localstack:
55+
condition: service_healthy
56+
dynamodb:
57+
condition: service_started
5058
container_name: ${CONTAINER_NAME}
5159
image: ${CONTAINER_NAME}
5260
platform: ${PLATFORM}

tests/Agent/IntegrationTests/ContainerIntegrationTests/Applications/ContainerApplication.cs

Lines changed: 48 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ private void CleanupContainer()
148148
Console.WriteLine($"[{AppName} {DateTime.Now}] Cleaning up container and images related to {ContainerName} container.");
149149
TestLogger?.WriteLine($"[{AppName}] Cleaning up container and images related to {ContainerName} container.");
150150

151+
var composeDownSucceeded = false;
151152
try
152153
{
153154
var downProc = Process.Start(new ProcessStartInfo
@@ -158,57 +159,63 @@ private void CleanupContainer()
158159
RedirectStandardError = true,
159160
UseShellExecute = false
160161
});
161-
downProc?.WaitForExit(30000);
162+
if (downProc?.WaitForExit(30000) == true)
163+
{
164+
composeDownSucceeded = downProc.ExitCode == 0;
165+
}
162166
}
163167
catch (Exception ex)
164168
{
165169
Console.WriteLine($"[{AppName} {DateTime.Now}] Error during compose down: {ex.Message}");
166170
}
167171

168-
// Force remove lingering container with same name if still present
169-
try
172+
// Only force-remove individual resources if compose down didn't clean up successfully
173+
if (!composeDownSucceeded)
170174
{
171-
var inspect = Process.Start(new ProcessStartInfo
172-
{
173-
FileName = "docker",
174-
Arguments = $"ps -a --filter name=^/{ContainerName}$ -q",
175-
RedirectStandardOutput = true,
176-
RedirectStandardError = true,
177-
UseShellExecute = false
178-
});
179-
var output = inspect?.StandardOutput.ReadToEnd();
180-
inspect?.WaitForExit(5000);
181-
if (!string.IsNullOrWhiteSpace(output))
175+
// Force remove lingering container with same name if still present
176+
try
182177
{
183-
var rm = Process.Start(new ProcessStartInfo
178+
var inspect = Process.Start(new ProcessStartInfo
184179
{
185180
FileName = "docker",
186-
Arguments = $"rm -f {ContainerName}",
181+
Arguments = $"ps -a --filter name=^/{ContainerName}$ -q",
187182
RedirectStandardOutput = true,
188183
RedirectStandardError = true,
189184
UseShellExecute = false
190185
});
191-
rm?.WaitForExit(10000);
186+
var output = inspect?.StandardOutput.ReadToEnd();
187+
inspect?.WaitForExit(5000);
188+
if (!string.IsNullOrWhiteSpace(output))
189+
{
190+
var rm = Process.Start(new ProcessStartInfo
191+
{
192+
FileName = "docker",
193+
Arguments = $"rm -f {ContainerName}",
194+
RedirectStandardOutput = true,
195+
RedirectStandardError = true,
196+
UseShellExecute = false
197+
});
198+
rm?.WaitForExit(10000);
199+
}
192200
}
193-
}
194-
catch { /* ignore */ }
201+
catch { /* ignore */ }
195202

196-
// Attempt removal of lingering default network (compose sometimes races on rapid successive runs)
197-
try
198-
{
199-
var networkName = $"{ContainerName.ToLower()}_default";
200-
var proc = Process.Start(new ProcessStartInfo
203+
// Attempt removal of lingering default network (compose sometimes races on rapid successive runs)
204+
try
201205
{
202-
FileName = "docker",
203-
Arguments = $"network rm {networkName}",
204-
RedirectStandardError = true,
205-
RedirectStandardOutput = true,
206-
UseShellExecute = false
207-
});
208-
proc?.WaitForExit(5000);
206+
var networkName = $"{ContainerName.ToLower()}_default";
207+
var proc = Process.Start(new ProcessStartInfo
208+
{
209+
FileName = "docker",
210+
Arguments = $"network rm {networkName}",
211+
RedirectStandardError = true,
212+
RedirectStandardOutput = true,
213+
UseShellExecute = false
214+
});
215+
proc?.WaitForExit(5000);
216+
}
217+
catch { /* ignore */ }
209218
}
210-
catch { /* ignore */ }
211-
212219

213220
#if DEBUG
214221
// Cleanup the networks with no attached containers. Mainly for testings on dev laptops - they can build up and block runs.
@@ -219,7 +226,8 @@ private void CleanupContainer()
219226
protected override void PrepareForStart()
220227
{
221228
CleanupContainer();
222-
// Remove any stale network with expected name so compose can recreate it with correct labels
229+
230+
// Remove any stale network left by a previous crashed run so compose can recreate it cleanly
223231
try
224232
{
225233
var networkName = $"{ContainerName.ToLower()}_default";
@@ -233,7 +241,7 @@ protected override void PrepareForStart()
233241
});
234242
netRm?.WaitForExit(5000);
235243
}
236-
catch { /* ignore */ }
244+
catch { /* ignore — network may not exist */ }
237245

238246
CaptureDockerState("pre-start");
239247
}
@@ -362,8 +370,11 @@ void RunAndWrite(string title, string fileName, string args, int timeoutMs = 800
362370
RunAndWrite("containers", "docker", "ps -a --filter name=containertestapp_ --format \"{{.ID}} {{.Names}} {{.Status}}\"");
363371
// List networks
364372
RunAndWrite("networks", "docker", "network ls --format \"{{.ID}} {{.Name}}\"");
365-
// Inspect the specific expected network (may fail if absent)
366-
RunAndWrite("inspect_target_network", "docker", $"network inspect {ContainerName.ToLower()}_default");
373+
// Inspect the specific expected network (skip at pre-start since the network hasn't been created yet)
374+
if (stage != "pre-start")
375+
{
376+
RunAndWrite("inspect_target_network", "docker", $"network inspect {ContainerName.ToLower()}_default");
377+
}
367378
// Compose ls (if available)
368379
RunAndWrite("compose_projects", "docker", "compose ls");
369380

tests/Agent/IntegrationTests/ContainerIntegrationTests/Tests/AwsSdk/AwsSdkDynamoDBTest.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
namespace NewRelic.Agent.ContainerIntegrationTests.Tests.AwsSdk;
1212

13+
[Collection("AwsSdkTests")]
1314
[Trait("Architecture", "amd64")]
1415
[Trait("Distro", "Ubuntu")]
1516
public class AwsSdkDynamoDBTest : NewRelicIntegrationTest<AwsSdkContainerDynamoDBTestFixture>

tests/Agent/IntegrationTests/ContainerIntegrationTests/Tests/AwsSdk/AwsSdkFirehoseTest.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
namespace NewRelic.Agent.ContainerIntegrationTests.Tests.AwsSdk;
1212

13+
[Collection("AwsSdkTests")]
1314
[Trait("Architecture", "amd64")]
1415
[Trait("Distro", "Ubuntu")]
1516
public class AwsSdkFirehoseTest : NewRelicIntegrationTest<AwsSdkContainerFirehoseTestFixture>

tests/Agent/IntegrationTests/ContainerIntegrationTests/Tests/AwsSdk/AwsSdkKinesisTest.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
namespace NewRelic.Agent.ContainerIntegrationTests.Tests.AwsSdk;
1212

13+
[Collection("AwsSdkTests")]
1314
[Trait("Architecture", "amd64")]
1415
[Trait("Distro", "Ubuntu")]
1516
public class AwsSdkKinesisTest : NewRelicIntegrationTest<AwsSdkContainerKinesisTestFixture>

tests/Agent/IntegrationTests/ContainerIntegrationTests/Tests/AwsSdk/AwsSdkMultiServiceTest.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
namespace NewRelic.Agent.ContainerIntegrationTests.Tests.AwsSdk;
1111

12+
[Collection("AwsSdkTests")]
1213
[Trait("Architecture", "amd64")]
1314
[Trait("Distro", "Ubuntu")]
1415
public class AwsSdkMultiServiceTest : NewRelicIntegrationTest<AwsSdkContainerMultiServiceTestFixture>

tests/Agent/IntegrationTests/ContainerIntegrationTests/Tests/AwsSdk/AwsSdkSQSTest.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,12 +165,15 @@ public void Test()
165165
}
166166
}
167167

168+
[Collection("AwsSdkTests")]
168169
public class AwsSdkSQSTestInitializedCollections : AwsSdkSQSTestBase
169170
{
170171
public AwsSdkSQSTestInitializedCollections(AwsSdkContainerSQSTestFixture fixture, ITestOutputHelper output) : base(fixture, output, true)
171172
{
172173
}
173174
}
175+
176+
[Collection("AwsSdkTests")]
174177
public class AwsSdkSQSTestNullCollections : AwsSdkSQSTestBase
175178
{
176179
public AwsSdkSQSTestNullCollections(AwsSdkContainerSQSTestFixture fixture, ITestOutputHelper output) : base(fixture, output, false)

tests/Agent/IntegrationTests/ContainerIntegrationTests/Tests/KafkaTests.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ internal static string GenerateTopic()
134134
}
135135
}
136136

137+
[Collection("KafkaTests")]
137138
[Trait("Architecture", "amd64")]
138139
[Trait("Distro", "Ubuntu")]
139140
public class KafkaDotNet8Test : LinuxKafkaTest<KafkaDotNet8TestFixture>
@@ -143,6 +144,7 @@ public KafkaDotNet8Test(KafkaDotNet8TestFixture fixture, ITestOutputHelper outpu
143144
}
144145
}
145146

147+
[Collection("KafkaTests")]
146148
[Trait("Architecture", "amd64")]
147149
[Trait("Distro", "Ubuntu")]
148150
public class KafkaDotNet10Test : LinuxKafkaTest<KafkaDotNet10TestFixture>

tests/Agent/IntegrationTests/ContainerIntegrationTests/Tests/MemcachedTests.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ public void Test()
127127
}
128128
}
129129

130+
[Collection("MemcachedTests")]
130131
[Trait("Architecture", "amd64")]
131132
[Trait("Distro", "Ubuntu")]
132133
public class MemcachedDotNet8Test : LinuxMemcachedTest<MemcachedDotNet8TestFixture>
@@ -136,6 +137,7 @@ public MemcachedDotNet8Test(MemcachedDotNet8TestFixture fixture, ITestOutputHelp
136137
}
137138
}
138139

140+
[Collection("MemcachedTests")]
139141
[Trait("Architecture", "amd64")]
140142
[Trait("Distro", "Ubuntu")]
141143
public class MemcachedDotNet10Test : LinuxMemcachedTest<MemcachedDotNet10TestFixture>

0 commit comments

Comments
 (0)