Skip to content
10 changes: 7 additions & 3 deletions Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy
private const int RetryIntervalInMS = 1000; // Once we detect failover wait for 1 second before retrying request.
private const int MaxRetryCount = 120;
private const int MaxServiceUnavailableRetryCount = 1;
private const int MaxSessionTokenRetryCount = 2;

private readonly IDocumentClientRetryPolicy throttlingRetry;
private readonly GlobalEndpointManager globalEndpointManager;
Expand Down Expand Up @@ -456,10 +457,13 @@ private ShouldRetryResult ShouldRetryOnSessionNotAvailable(DocumentServiceReques
}
else
{
if (this.sessionTokenRetryCount > 1)
if (this.sessionTokenRetryCount > MaxSessionTokenRetryCount)
Comment thread
kundadebdatta marked this conversation as resolved.
Comment thread
aavasthy marked this conversation as resolved.
{
// When cannot use multiple write locations, then don't retry the request if
// we have already tried this request on the write location
// When cannot use multiple write locations, then don't retry the request if
// we have already tried this request on the write location.
// The count threshold is 2 (not 1) because the hub region processing header
// is set after the first retry fails (count=2) and the request must be retried
// once more with that header before giving up.
return ShouldRetryResult.NoRetry();
}
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4319,12 +4319,14 @@ private static async Task GivenItemAsyncWhenMissingMemberHandlingIsErrorThenExpe

[TestMethod]
[Owner("aavasthy")]
[Description("Forces two consecutive 404/1002 responses from the gateway and verifies ClientRetryPolicy sets the hub region header flag after the first retry fails.")]
[Description("Forces three consecutive 404/1002 responses from the gateway and verifies ClientRetryPolicy " +
"sets the hub region header flag after the second 404/1002 and sends it on the third attempt.")]
public async Task ReadItemAsync_ShouldAddHubHeader_OnRetryAfter_404_1002()
{
int requestCount = 0;
int return404Count = 0;
const int maxReturn404 = 2; // Return 404/1002 twice
const int maxReturn404 = 3; // Return 404/1002 three times: initial + retry to write region + retry with hub header
bool hubHeaderPresentOnThirdRequest = false;

// Created HTTP handler to intercept requests
HttpClientHandlerHelper httpHandler = new HttpClientHandlerHelper
Expand All @@ -4338,15 +4340,23 @@ public async Task ReadItemAsync_ShouldAddHubHeader_OnRetryAfter_404_1002()
{
requestCount++;

// Header should NOT be present on first retry (2nd request)
if (requestCount == 2 &&
request.Headers.TryGetValues(HubRegionHeader, out IEnumerable<string> firstRetryValues) &&
firstRetryValues.Any())
// Header should NOT be present on first two requests
if (requestCount <= 2 &&
request.Headers.TryGetValues(HubRegionHeader, out IEnumerable<string> earlyRetryValues) &&
earlyRetryValues.Any())
{
Assert.Fail("Header should NOT be present on first retry attempt.");
Assert.Fail($"Header should NOT be present on request {requestCount}.");
}

// Return fake 404/1002 for first two requests
// Header MUST be present on third request (after two consecutive 404/1002 failures)
if (requestCount == 3)
{
hubHeaderPresentOnThirdRequest =
request.Headers.TryGetValues(HubRegionHeader, out IEnumerable<string> hubValues)
&& hubValues.Any(v => v == bool.TrueString);
}

// Return fake 404/1002 for the configured number of requests
if (return404Count < maxReturn404)
{
return404Count++;
Expand Down Expand Up @@ -4399,8 +4409,7 @@ public async Task ReadItemAsync_ShouldAddHubHeader_OnRetryAfter_404_1002()

try
{
// This should trigger 404/1002 twice
// In single-region emulator, after first retry fails with 404/1002, it won't retry again
// This should trigger 404/1002 three times then NoRetry
ItemResponse<ToDoActivity> response = await customContainer.ReadItemAsync<ToDoActivity>(
testItem.id,
new Cosmos.PartitionKey(testItem.pk));
Expand All @@ -4409,17 +4418,18 @@ public async Task ReadItemAsync_ShouldAddHubHeader_OnRetryAfter_404_1002()
}
catch (CosmosException ex)
{
// Expected: After first retry fails with 404/1002, single master won't retry again
// Expected: After third 404/1002 (with hub header), single master stops retrying
Assert.AreEqual(HttpStatusCode.NotFound, ex.StatusCode);
Assert.AreEqual((int)SubStatusCodes.ReadSessionNotAvailable, ex.SubStatusCode);
}

// Verify the expected behavior:
// 1. Initial request (requestCount = 1) fails with 404/1002
// 2. First retry (requestCount = 2) fails with 404/1002
// 3. No more retries because single master + no additional regions
Assert.AreEqual(2, requestCount, $"Expected exactly 2 requests (initial + 1 retry) for single-region emulator, but got {requestCount}");
Assert.AreEqual(2, return404Count, "Both requests should have returned 404/1002");
// 1. Initial request (requestCount = 1) fails with 404/1002 → no hub header
// 2. First retry to write region (requestCount = 2) fails with 404/1002 → no hub header, flag set
// 3. Second retry with hub header (requestCount = 3) fails with 404/1002 → hub header present → NoRetry
Assert.AreEqual(3, requestCount, $"Expected exactly 3 requests (initial + retry to write region + retry with hub header) for single-region emulator, but got {requestCount}");
Assert.AreEqual(3, return404Count, "All three requests should have returned 404/1002");
Assert.IsTrue(hubHeaderPresentOnThirdRequest, "Hub region header must be present on the third request (after two consecutive 404/1002 failures).");
}

private async Task<T> AutoGenerateIdPatternTest<T>(Cosmos.PartitionKey pk, T itemWithoutId)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -466,43 +466,16 @@ public async Task ClientRetryPolicy_HubRegionHeader_AddedOn404_1002_BasedOnAccou

if (isSingleMaster)
{
// For single master, after one retry fails with 404/1002, it won't retry further
// But the header flag should be set for any potential future retries due to other errors
Assert.IsFalse(shouldRetry.ShouldRetry, "Single master should not retry again after first 404/1002 retry fails.");

// The header flag should be set even though no more 404/1002 retries will happen
// This ensures if the request is retried for a different reason (e.g., 503), it will have the header
}
else
{
// Multi-master can retry across multiple regions
Assert.IsTrue(shouldRetry.ShouldRetry, "Multi-master should continue retrying on 404/1002.");
}

// For single master: Verify header would be added if request is retried for other reasons (e.g., 503)
// For multi-master: Verify header is NOT added even on subsequent retries
if (isSingleMaster)
{
// Simulate a 503 error to trigger another retry
DocumentClientException serviceUnavailableException = new DocumentClientException(
message: "Simulated 503 ServiceUnavailable",
innerException: null,
statusCode: HttpStatusCode.ServiceUnavailable,
substatusCode: SubStatusCodes.Unknown,
requestUri: request.RequestContext.LocationEndpointToRoute,
responseHeaders: new DictionaryNameValueCollection());

shouldRetry = await retryPolicy.ShouldRetryAsync(serviceUnavailableException, CancellationToken.None);

if (shouldRetry.ShouldRetry)
{
// Now verify the header is present on this retry triggered by 503
retryPolicy.OnBeforeSendRequest(request);
headerValues = request.Headers.GetValues(HubRegionHeader);
Assert.IsNotNull(headerValues, "Header should be present on retry after 404/1002 flag was set.");
Assert.AreEqual(1, headerValues.Length, "Header should have exactly one value.");
Assert.AreEqual(bool.TrueString, headerValues[0], "Header value should be 'True'.");
}
// For single master, after the second 404/1002, the hub region header flag has been set
// and the retry policy should allow one more retry so the request can be sent with the header.
Assert.IsTrue(shouldRetry.ShouldRetry, "Single master should retry once more after second 404/1002 so the hub region header is sent.");

// Verify the header is now present on the retry
retryPolicy.OnBeforeSendRequest(request);
headerValues = request.Headers.GetValues(HubRegionHeader);
Assert.IsNotNull(headerValues, "Hub region header should be present on retry after second 404/1002.");
Assert.AreEqual(1, headerValues.Length, "Header should have exactly one value.");
Assert.AreEqual(bool.TrueString, headerValues[0], "Header value should be 'True'.");
Comment thread
aavasthy marked this conversation as resolved.
}
else
{
Expand All @@ -528,7 +501,100 @@ public async Task ClientRetryPolicy_HubRegionHeader_AddedOn404_1002_BasedOnAccou
}
}
}
}
}

/// <summary>
/// End-to-end test for the hub region discovery flow on a single-master account (Direct mode):
/// 1st request → 404/1002 (no hub header) → retry to write region
/// 2nd request → 404/1002 (no hub header) → hub header flag set, retry
/// 3rd request → assert hub header present → 403/3 from non-hub → retry
/// 4th request → assert hub header present → 200 success
Comment thread
aavasthy marked this conversation as resolved.
/// </summary>
[TestMethod]
public async Task ClientRetryPolicy_HubRegionDiscovery_EndToEnd_DirectMode()
{
// Arrange: single-master, endpoint discovery enabled
const bool enableEndpointDiscovery = true;

using GlobalEndpointManager endpointManager = this.Initialize(
useMultipleWriteLocations: false,
enableEndpointDiscovery: enableEndpointDiscovery,
isPreferredLocationsListEmpty: false,
enforceSingleMasterSingleWriteLocation: true);

ClientRetryPolicy retryPolicy = new ClientRetryPolicy(
endpointManager,
this.partitionKeyRangeLocationCache,
new RetryOptions(),
enableEndpointDiscovery,
isThinClientEnabled: false);

DocumentServiceRequest request = this.CreateRequest(isReadRequest: true, isMasterResourceType: false);

// ---- Step 1: First request attempt ----
retryPolicy.OnBeforeSendRequest(request);
Assert.IsNull(
request.Headers.GetValues(HubRegionHeader),
"Hub region header should NOT be present on the initial request.");

// Simulate 1st 404/1002
ShouldRetryResult shouldRetry = await retryPolicy.ShouldRetryAsync(
new DocumentClientException(
message: "1st 404/1002",
innerException: null,
statusCode: HttpStatusCode.NotFound,
substatusCode: SubStatusCodes.ReadSessionNotAvailable,
requestUri: request.RequestContext.LocationEndpointToRoute,
responseHeaders: new DictionaryNameValueCollection()),
CancellationToken.None);

Assert.IsTrue(shouldRetry.ShouldRetry, "Should retry after first 404/1002.");

// ---- Step 2: Retry routed to write region ----
retryPolicy.OnBeforeSendRequest(request);
Assert.IsNull(
request.Headers.GetValues(HubRegionHeader),
"Hub region header should NOT be present on the first retry (routed to write region).");

// Simulate 2nd 404/1002
shouldRetry = await retryPolicy.ShouldRetryAsync(
new DocumentClientException(
message: "2nd 404/1002",
innerException: null,
statusCode: HttpStatusCode.NotFound,
substatusCode: SubStatusCodes.ReadSessionNotAvailable,
requestUri: request.RequestContext.LocationEndpointToRoute,
responseHeaders: new DictionaryNameValueCollection()),
CancellationToken.None);

Assert.IsTrue(shouldRetry.ShouldRetry, "Should retry after second 404/1002 (hub header flag now set).");

// ---- Step 3: Retry with hub region header → gets 403/3 ----
retryPolicy.OnBeforeSendRequest(request);
string[] headerValues = request.Headers.GetValues(HubRegionHeader);
Assert.IsNotNull(headerValues, "Hub region header MUST be present on the retry after two consecutive 404/1002 errors.");
Assert.AreEqual(1, headerValues.Length, "Hub region header should have exactly one value.");
Assert.AreEqual(bool.TrueString, headerValues[0], "Hub region header value should be 'True'.");

// Simulate 403/3 (WriteForbidden) — this happens when the request reaches a non-hub region
shouldRetry = await retryPolicy.ShouldRetryAsync(
new DocumentClientException(
message: "403/3 WriteForbidden from non-hub region",
innerException: null,
statusCode: HttpStatusCode.Forbidden,
substatusCode: SubStatusCodes.WriteForbidden,
requestUri: request.RequestContext.LocationEndpointToRoute,
responseHeaders: new DictionaryNameValueCollection()),
CancellationToken.None);

Assert.IsTrue(shouldRetry.ShouldRetry, "Should retry after 403/3 to continue hub region discovery.");

// ---- Step 4: Retry still carries hub header → 200 success ----
retryPolicy.OnBeforeSendRequest(request);
headerValues = request.Headers.GetValues(HubRegionHeader);
Assert.IsNotNull(headerValues, "Hub region header MUST persist through 403/3 retries.");
Assert.AreEqual(bool.TrueString, headerValues[0], "Hub region header value should remain 'True'.");
}

private async Task ValidateConnectTimeoutTriggersClientRetryPolicyAsync(
bool isReadRequest,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,13 @@ await BackoffRetryUtility<bool>.ExecuteAsync(
Uri expectedEndpoint = new Uri(this.databaseAccount.WriteLocationsInternal[0].Endpoint);
Assert.AreEqual(expectedEndpoint, request.RequestContext.LocationEndpointToRoute);
}
else if (retryCount == 2)
{
// Third request is the retry with the hub region header set.
// It still routes to the write endpoint (index=0, preferred=false).
Uri expectedEndpoint = new Uri(this.databaseAccount.WriteLocationsInternal[0].Endpoint);
Assert.AreEqual(expectedEndpoint, request.RequestContext.LocationEndpointToRoute);
}
else
{
Assert.Fail();
Expand All @@ -268,7 +275,7 @@ await BackoffRetryUtility<bool>.ExecuteAsync(
catch (NotFoundException)
{
DefaultTrace.TraceInformation("Received expected notFoundException");
Assert.AreEqual(2, retryCount);
Assert.AreEqual(3, retryCount);
}
}
}
Expand Down
Loading