Skip to content

Commit 8a3c268

Browse files
Merge branch 'main' into users/ntripician/metadata-retry-fix
2 parents 29844bd + 562d2d5 commit 8a3c268

4 files changed

Lines changed: 235 additions & 65 deletions

File tree

Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy
2323
private const int RetryIntervalInMS = 1000; // Once we detect failover wait for 1 second before retrying request.
2424
private const int MaxRetryCount = 120;
2525
private const int MaxServiceUnavailableRetryCount = 1;
26+
private const int MaxSessionTokenRetryCount = 2;
2627

2728
private readonly IDocumentClientRetryPolicy throttlingRetry;
2829
private readonly GlobalEndpointManager globalEndpointManager;
@@ -331,14 +332,6 @@ private async Task<ShouldRetryResult> ShouldRetryInternalAsync(
331332

332333
if (statusCode == HttpStatusCode.NotFound && subStatusCode == SubStatusCodes.ReadSessionNotAvailable)
333334
{
334-
#if !INTERNAL
335-
// Only set the hub region processing header for single master accounts
336-
// Set header only after the first retry attempt fails with 404/1002
337-
if (!this.canUseMultipleWriteLocations && this.sessionTokenRetryCount >= 1)
338-
{
339-
this.addHubRegionProcessingOnlyHeader = true;
340-
}
341-
#endif
342335
return this.ShouldRetryOnSessionNotAvailable(this.documentServiceRequest);
343336
}
344337

@@ -456,12 +449,29 @@ private ShouldRetryResult ShouldRetryOnSessionNotAvailable(DocumentServiceReques
456449
}
457450
else
458451
{
452+
#if !INTERNAL
453+
// Only set the hub region processing header for single master accounts.
454+
// Set header after the second consecutive 404/1002 (count >= 2 means both
455+
// the initial request and the first retry to the write region have failed).
456+
if (this.sessionTokenRetryCount >= MaxSessionTokenRetryCount)
457+
{
458+
this.addHubRegionProcessingOnlyHeader = true;
459+
}
460+
461+
if (this.sessionTokenRetryCount > MaxSessionTokenRetryCount)
462+
{
463+
// Hub region header was set at count == MaxSessionTokenRetryCount and the
464+
// request was retried with it. If the hub still returns 404/1002, stop.
465+
return ShouldRetryResult.NoRetry();
466+
}
467+
#else
459468
if (this.sessionTokenRetryCount > 1)
460469
{
461-
// When cannot use multiple write locations, then don't retry the request if
462-
// we have already tried this request on the write location
470+
// When cannot use multiple write locations, then don't retry the request if
471+
// we have already tried this request on the write location.
463472
return ShouldRetryResult.NoRetry();
464473
}
474+
#endif
465475
else
466476
{
467477
this.retryContext = new RetryContext

Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosItemTests.cs

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4319,12 +4319,14 @@ private static async Task GivenItemAsyncWhenMissingMemberHandlingIsErrorThenExpe
43194319

43204320
[TestMethod]
43214321
[Owner("aavasthy")]
4322-
[Description("Forces two consecutive 404/1002 responses from the gateway and verifies ClientRetryPolicy sets the hub region header flag after the first retry fails.")]
4322+
[Description("Forces three consecutive 404/1002 responses from the gateway and verifies ClientRetryPolicy " +
4323+
"sets the hub region header flag after the second 404/1002 and sends it on the third attempt.")]
43234324
public async Task ReadItemAsync_ShouldAddHubHeader_OnRetryAfter_404_1002()
43244325
{
43254326
int requestCount = 0;
43264327
int return404Count = 0;
4327-
const int maxReturn404 = 2; // Return 404/1002 twice
4328+
const int maxReturn404 = 3; // Return 404/1002 three times: initial + retry to write region + retry with hub header
4329+
bool hubHeaderPresentOnThirdRequest = false;
43284330

43294331
// Created HTTP handler to intercept requests
43304332
HttpClientHandlerHelper httpHandler = new HttpClientHandlerHelper
@@ -4338,15 +4340,23 @@ public async Task ReadItemAsync_ShouldAddHubHeader_OnRetryAfter_404_1002()
43384340
{
43394341
requestCount++;
43404342

4341-
// Header should NOT be present on first retry (2nd request)
4342-
if (requestCount == 2 &&
4343-
request.Headers.TryGetValues(HubRegionHeader, out IEnumerable<string> firstRetryValues) &&
4344-
firstRetryValues.Any())
4343+
// Header should NOT be present on first two requests
4344+
if (requestCount <= 2 &&
4345+
request.Headers.TryGetValues(HubRegionHeader, out IEnumerable<string> earlyRetryValues) &&
4346+
earlyRetryValues.Any())
43454347
{
4346-
Assert.Fail("Header should NOT be present on first retry attempt.");
4348+
Assert.Fail($"Header should NOT be present on request {requestCount}.");
43474349
}
43484350

4349-
// Return fake 404/1002 for first two requests
4351+
// Header MUST be present on third request (after two consecutive 404/1002 failures)
4352+
if (requestCount == 3)
4353+
{
4354+
hubHeaderPresentOnThirdRequest =
4355+
request.Headers.TryGetValues(HubRegionHeader, out IEnumerable<string> hubValues)
4356+
&& hubValues.Any(v => v == bool.TrueString);
4357+
}
4358+
4359+
// Return fake 404/1002 for the configured number of requests
43504360
if (return404Count < maxReturn404)
43514361
{
43524362
return404Count++;
@@ -4399,8 +4409,7 @@ public async Task ReadItemAsync_ShouldAddHubHeader_OnRetryAfter_404_1002()
43994409

44004410
try
44014411
{
4402-
// This should trigger 404/1002 twice
4403-
// In single-region emulator, after first retry fails with 404/1002, it won't retry again
4412+
// This should trigger 404/1002 three times then NoRetry
44044413
ItemResponse<ToDoActivity> response = await customContainer.ReadItemAsync<ToDoActivity>(
44054414
testItem.id,
44064415
new Cosmos.PartitionKey(testItem.pk));
@@ -4409,17 +4418,18 @@ public async Task ReadItemAsync_ShouldAddHubHeader_OnRetryAfter_404_1002()
44094418
}
44104419
catch (CosmosException ex)
44114420
{
4412-
// Expected: After first retry fails with 404/1002, single master won't retry again
4421+
// Expected: After third 404/1002 (with hub header), single master stops retrying
44134422
Assert.AreEqual(HttpStatusCode.NotFound, ex.StatusCode);
44144423
Assert.AreEqual((int)SubStatusCodes.ReadSessionNotAvailable, ex.SubStatusCode);
44154424
}
44164425

44174426
// Verify the expected behavior:
4418-
// 1. Initial request (requestCount = 1) fails with 404/1002
4419-
// 2. First retry (requestCount = 2) fails with 404/1002
4420-
// 3. No more retries because single master + no additional regions
4421-
Assert.AreEqual(2, requestCount, $"Expected exactly 2 requests (initial + 1 retry) for single-region emulator, but got {requestCount}");
4422-
Assert.AreEqual(2, return404Count, "Both requests should have returned 404/1002");
4427+
// 1. Initial request (requestCount = 1) fails with 404/1002 → no hub header
4428+
// 2. First retry to write region (requestCount = 2) fails with 404/1002 → no hub header, flag set
4429+
// 3. Second retry with hub header (requestCount = 3) fails with 404/1002 → hub header present → NoRetry
4430+
Assert.AreEqual(3, requestCount, $"Expected exactly 3 requests (initial + retry to write region + retry with hub header) for single-region emulator, but got {requestCount}");
4431+
Assert.AreEqual(3, return404Count, "All three requests should have returned 404/1002");
4432+
Assert.IsTrue(hubHeaderPresentOnThirdRequest, "Hub region header must be present on the third request (after two consecutive 404/1002 failures).");
44234433
}
44244434

44254435
private async Task<T> AutoGenerateIdPatternTest<T>(Cosmos.PartitionKey pk, T itemWithoutId)

0 commit comments

Comments
 (0)