Skip to content
30 changes: 20 additions & 10 deletions Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy
private const int RetryIntervalInMS = 1000; // Once we detect failover wait for 1 second before retrying request.
private const int MaxRetryCount = 120;
private const int MaxServiceUnavailableRetryCount = 1;
private const int MaxSessionTokenRetryCount = 2;

private readonly IDocumentClientRetryPolicy throttlingRetry;
private readonly GlobalEndpointManager globalEndpointManager;
Expand Down Expand Up @@ -331,14 +332,6 @@ private async Task<ShouldRetryResult> ShouldRetryInternalAsync(

if (statusCode == HttpStatusCode.NotFound && subStatusCode == SubStatusCodes.ReadSessionNotAvailable)
{
#if !INTERNAL
// Only set the hub region processing header for single master accounts
// Set header only after the first retry attempt fails with 404/1002
if (!this.canUseMultipleWriteLocations && this.sessionTokenRetryCount >= 1)
{
this.addHubRegionProcessingOnlyHeader = true;
}
#endif
return this.ShouldRetryOnSessionNotAvailable(this.documentServiceRequest);
}

Expand Down Expand Up @@ -456,12 +449,29 @@ private ShouldRetryResult ShouldRetryOnSessionNotAvailable(DocumentServiceReques
}
else
{
#if !INTERNAL
// Only set the hub region processing header for single master accounts.
// Set header after the second consecutive 404/1002 (count >= 2 means both
// the initial request and the first retry to the write region have failed).
if (this.sessionTokenRetryCount >= MaxSessionTokenRetryCount)
Comment thread
kushagraThapar marked this conversation as resolved.
{
this.addHubRegionProcessingOnlyHeader = true;
Comment thread
kushagraThapar marked this conversation as resolved.
}

if (this.sessionTokenRetryCount > MaxSessionTokenRetryCount)
Comment thread
kundadebdatta marked this conversation as resolved.
Comment thread
aavasthy marked this conversation as resolved.
{
// Hub region header was set at count == MaxSessionTokenRetryCount and the
// request was retried with it. If the hub still returns 404/1002, stop.
return ShouldRetryResult.NoRetry();
}
#else
if (this.sessionTokenRetryCount > 1)
{
// When cannot use multiple write locations, then don't retry the request if
// we have already tried this request on the write location
// When cannot use multiple write locations, then don't retry the request if
// we have already tried this request on the write location.
return ShouldRetryResult.NoRetry();
}
#endif
else
{
this.retryContext = new RetryContext
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4319,12 +4319,14 @@ private static async Task GivenItemAsyncWhenMissingMemberHandlingIsErrorThenExpe

[TestMethod]
[Owner("aavasthy")]
[Description("Forces two consecutive 404/1002 responses from the gateway and verifies ClientRetryPolicy sets the hub region header flag after the first retry fails.")]
[Description("Forces three consecutive 404/1002 responses from the gateway and verifies ClientRetryPolicy " +
"sets the hub region header flag after the second 404/1002 and sends it on the third attempt.")]
public async Task ReadItemAsync_ShouldAddHubHeader_OnRetryAfter_404_1002()
{
int requestCount = 0;
int return404Count = 0;
const int maxReturn404 = 2; // Return 404/1002 twice
const int maxReturn404 = 3; // Return 404/1002 three times: initial + retry to write region + retry with hub header
bool hubHeaderPresentOnThirdRequest = false;

// Created HTTP handler to intercept requests
HttpClientHandlerHelper httpHandler = new HttpClientHandlerHelper
Expand All @@ -4338,15 +4340,23 @@ public async Task ReadItemAsync_ShouldAddHubHeader_OnRetryAfter_404_1002()
{
requestCount++;

// Header should NOT be present on first retry (2nd request)
if (requestCount == 2 &&
request.Headers.TryGetValues(HubRegionHeader, out IEnumerable<string> firstRetryValues) &&
firstRetryValues.Any())
// Header should NOT be present on first two requests
if (requestCount <= 2 &&
request.Headers.TryGetValues(HubRegionHeader, out IEnumerable<string> earlyRetryValues) &&
earlyRetryValues.Any())
{
Assert.Fail("Header should NOT be present on first retry attempt.");
Assert.Fail($"Header should NOT be present on request {requestCount}.");
}

// Return fake 404/1002 for first two requests
// Header MUST be present on third request (after two consecutive 404/1002 failures)
if (requestCount == 3)
{
hubHeaderPresentOnThirdRequest =
request.Headers.TryGetValues(HubRegionHeader, out IEnumerable<string> hubValues)
&& hubValues.Any(v => v == bool.TrueString);
}

// Return fake 404/1002 for the configured number of requests
if (return404Count < maxReturn404)
{
return404Count++;
Expand Down Expand Up @@ -4399,8 +4409,7 @@ public async Task ReadItemAsync_ShouldAddHubHeader_OnRetryAfter_404_1002()

try
{
// This should trigger 404/1002 twice
// In single-region emulator, after first retry fails with 404/1002, it won't retry again
// This should trigger 404/1002 three times then NoRetry
ItemResponse<ToDoActivity> response = await customContainer.ReadItemAsync<ToDoActivity>(
testItem.id,
new Cosmos.PartitionKey(testItem.pk));
Expand All @@ -4409,17 +4418,18 @@ public async Task ReadItemAsync_ShouldAddHubHeader_OnRetryAfter_404_1002()
}
catch (CosmosException ex)
{
// Expected: After first retry fails with 404/1002, single master won't retry again
// Expected: After third 404/1002 (with hub header), single master stops retrying
Assert.AreEqual(HttpStatusCode.NotFound, ex.StatusCode);
Assert.AreEqual((int)SubStatusCodes.ReadSessionNotAvailable, ex.SubStatusCode);
}

// Verify the expected behavior:
// 1. Initial request (requestCount = 1) fails with 404/1002
// 2. First retry (requestCount = 2) fails with 404/1002
// 3. No more retries because single master + no additional regions
Assert.AreEqual(2, requestCount, $"Expected exactly 2 requests (initial + 1 retry) for single-region emulator, but got {requestCount}");
Assert.AreEqual(2, return404Count, "Both requests should have returned 404/1002");
// 1. Initial request (requestCount = 1) fails with 404/1002 → no hub header
// 2. First retry to write region (requestCount = 2) fails with 404/1002 → no hub header, flag set
// 3. Second retry with hub header (requestCount = 3) fails with 404/1002 → hub header present → NoRetry
Assert.AreEqual(3, requestCount, $"Expected exactly 3 requests (initial + retry to write region + retry with hub header) for single-region emulator, but got {requestCount}");
Assert.AreEqual(3, return404Count, "All three requests should have returned 404/1002");
Assert.IsTrue(hubHeaderPresentOnThirdRequest, "Hub region header must be present on the third request (after two consecutive 404/1002 failures).");
}

private async Task<T> AutoGenerateIdPatternTest<T>(Cosmos.PartitionKey pk, T itemWithoutId)
Expand Down
Loading
Loading