Skip to content

Commit 40a6d1a

Browse files
test: stabilize four flakes surfaced by the CI Stress workflow (#193)
Run https://github.com/microsoft/microsoft-ui-reactor/actions/runs/25504721012 turned up 17 iteration failures across 200 iterations, concentrated in six tests. This commit addresses all six. Unit tests (timing budgets that were tight for a starved CI runner): - UseResourceTests.Retry_Invokes_Fetcher_Multiple_Times_And_Settles_On_Success and Retry_Exhausted_Surfaces_Final_Error: bump the poll budget from 5s to 30s. Retry uses a 100ms/200ms backoff and threadpool/timer callbacks for re-attempts; on a contended CI runner the third attempt routinely landed past the 5s ceiling, surfacing as either Expected:3 Actual:2 (third call hadn't fired) or Expected:Data Actual:Loading (state hadn't transitioned). - LogCaptureBufferTests.WaitForNewAsync_RespectsTimeout: relax the upper bound on a 150ms wait from 2s to 30s. The lower bound is the meaningful check (timeout was honored); the upper bound is just a "didn't get stuck forever" sanity guard, and 2s was tight enough that a thread-pool stall could blow it (observed 3070ms on CI). - AutoSuggestTests.WaitForState helper: bump WaitAsync from 5s to 30s. Same shape — debounce + worker continuation can land past 5s when the threadpool is starved; both Empty_State_On_No_Results and Error_State_On_Exception failed with TimeoutException at 10–12s. Selftests (one real race, one too-strict bound, both with weak diagnostics): - ThreadSafe_RapidBackgroundSetState: real bug. The 2-second CancellationTokenSource started ticking before Task.Run actually scheduled the four worker threads, so on a starved threadpool the budget could elapse before any worker hit the loop, leaving writeCount at 0 and the rendered counter never updating. Switch to Barrier(5) (4 workers + main) so the main thread waits for all workers to be inside the loop before calling cts.CancelAfter(2s). Also include writeCount and rendered in the Check messages so future regressions are diagnosable from the log. - ThreadSafe_RenderCoalescing: relax the bound from renders<100 to renders<200, and include the actual count in the Check message. 100 was too aggressive — under threadpool starvation the UI thread can sneak in renders between the background-thread setState calls, and the production coalescing logic is what we're probing, not the CI scheduler. 200 still detects "no coalescing at all" while tolerating realistic interleaving (full coalescing on a quiet machine produces ~2 renders). All five formerly-flaky unit tests pass locally after the change.
1 parent 0bbfe72 commit 40a6d1a

4 files changed

Lines changed: 29 additions & 12 deletions

File tree

tests/Reactor.AppTests.Host/SelfTest/Fixtures/ThreadSafeHookFixtures.cs

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,13 @@ public override async Task RunAsync()
4646
await Harness.Render();
4747
H.Check("RapidBG_InitialRender", H.FindText("Counter: 0") is not null);
4848

49-
// Hammer from 4 threads for 2 seconds
50-
var cts = new CancellationTokenSource(TimeSpan.FromSeconds(2));
49+
// Hammer from 4 threads for 2 seconds. Use Barrier(5) so the main
50+
// thread waits for all workers to be inside the loop before starting
51+
// the 2-second budget — otherwise a starved CI threadpool can let
52+
// CancelAfter elapse before Task.Run actually schedules the workers.
5153
int writeCount = 0;
52-
var barrier = new Barrier(4);
54+
var barrier = new Barrier(5);
55+
var cts = new CancellationTokenSource();
5356
var tasks = Enumerable.Range(0, 4).Select(t =>
5457
Task.Run(() =>
5558
{
@@ -63,6 +66,8 @@ public override async Task RunAsync()
6366
})
6467
).ToArray();
6568

69+
barrier.SignalAndWait();
70+
cts.CancelAfter(TimeSpan.FromSeconds(2));
6671
await Task.WhenAll(tasks);
6772

6873
// Let the render loop settle — low-priority enqueue means a few frames
@@ -71,14 +76,14 @@ public override async Task RunAsync()
7176
var final = lastWritten;
7277
var text = H.FindControl<TextBlock>(tb => tb.Text?.StartsWith("Counter:") == true);
7378
H.Check("RapidBG_TextPresent", text is not null);
74-
H.Check("RapidBG_WritesHappened", writeCount > 100);
79+
H.Check($"RapidBG_WritesHappened (writes={writeCount})", writeCount > 100);
7580

7681
// The rendered value should be the last value set (or very close to it,
7782
// since a render might have been in flight when the last write landed)
7883
if (text is not null)
7984
{
8085
var rendered = int.Parse(text.Text.Replace("Counter: ", ""));
81-
H.Check("RapidBG_FinalValueReasonable",
86+
H.Check($"RapidBG_FinalValueReasonable (rendered={rendered}, writes={writeCount})",
8287
rendered > 0 && rendered <= writeCount);
8388
}
8489
}
@@ -292,13 +297,17 @@ await Task.Run(() =>
292297

293298
H.Check("Coalesce_FinalValue", H.FindText("Value: 1000") is not null);
294299

295-
// Render count should be far less than 1000 due to coalescing
300+
// Render count should be far less than 1000 due to coalescing.
301+
// Threshold is loose (1/5 of writes) to tolerate scheduler interleaving
302+
// on CI runners where the UI thread can sneak in renders between
303+
// background-thread setState calls; full coalescing on a quiet machine
304+
// produces ~2 renders.
296305
var renderText = H.FindControl<TextBlock>(tb =>
297306
tb.Text?.StartsWith("Renders:") == true);
298307
if (renderText is not null)
299308
{
300309
var renders = int.Parse(renderText.Text.Replace("Renders: ", ""));
301-
H.Check("Coalesce_FarFewerRenders", renders < 100);
310+
H.Check($"Coalesce_FarFewerRenders (renders={renders})", renders < 200);
302311
}
303312
}
304313
}

tests/Reactor.Tests/AutoSuggestTests.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@ private static Task WaitForState<T>(SearchManager<T> manager, SearchState target
2121
// Check after subscribing to avoid TOCTOU race
2222
if (manager.State == target)
2323
tcs.TrySetResult();
24-
return tcs.Task.WaitAsync(TimeSpan.FromSeconds(5));
24+
// Generous budget: under thread-pool starvation on a contended CI runner
25+
// a 5s ceiling races debounce+continuation scheduling. 30s still surfaces
26+
// a genuine "state never reached" bug with a clear TimeoutException.
27+
return tcs.Task.WaitAsync(TimeSpan.FromSeconds(30));
2528
}
2629
// ════════════════════════════════════════════════════════════════
2730
// Element creation

tests/Reactor.Tests/Core/UseResourceTests.cs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,9 @@ public async Task Retry_Invokes_Fetcher_Multiple_Times_And_Settles_On_Success()
291291
// current hook state and does not re-invoke the fetcher.
292292
AsyncValue<int>? probe = null;
293293
var sw = global::System.Diagnostics.Stopwatch.StartNew();
294-
while (sw.Elapsed < TimeSpan.FromSeconds(5))
294+
// Budget covers retry backoff (100ms + 200ms = 300ms minimum) plus
295+
// threadpool/timer scheduling slack on a loaded CI runner.
296+
while (sw.Elapsed < TimeSpan.FromSeconds(30))
295297
{
296298
ctx.BeginRender(() => { });
297299
probe = ctx.UseResource(fetcher, cache, Array.Empty<object>(),
@@ -324,10 +326,11 @@ public async Task Retry_Exhausted_Surfaces_Final_Error()
324326
new ResourceOptions(RetryCount: 2), dispatcher);
325327
ctx.FlushEffects();
326328

327-
// Poll the rendered state, not the call counter.
329+
// Poll the rendered state, not the call counter. Budget covers retry
330+
// backoff plus threadpool/timer scheduling slack on a loaded CI runner.
328331
AsyncValue<int>? probe = null;
329332
var sw = global::System.Diagnostics.Stopwatch.StartNew();
330-
while (sw.Elapsed < TimeSpan.FromSeconds(5))
333+
while (sw.Elapsed < TimeSpan.FromSeconds(30))
331334
{
332335
ctx.BeginRender(() => { });
333336
probe = ctx.UseResource(fetcher, cache, Array.Empty<object>(),

tests/Reactor.Tests/Devtools/LogCaptureBufferTests.cs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,9 @@ public async Task WaitForNewAsync_RespectsTimeout()
132132
await buf.WaitForNewAsync(1, 150);
133133
sw.Stop();
134134
Assert.True(sw.ElapsedMilliseconds >= 100, $"WaitForNewAsync returned after only {sw.ElapsedMilliseconds}ms (expected ≥ ~150ms)");
135-
Assert.True(sw.ElapsedMilliseconds < 2_000, $"WaitForNewAsync blocked for {sw.ElapsedMilliseconds}ms");
135+
// Upper bound is a "didn't get stuck forever" sanity check — generous so
136+
// thread-pool starvation on a contended CI runner doesn't fail it.
137+
Assert.True(sw.ElapsedMilliseconds < 30_000, $"WaitForNewAsync blocked for {sw.ElapsedMilliseconds}ms");
136138
}
137139

138140
[Fact]

0 commit comments

Comments
 (0)