Skip to content

Commit 038e757

Browse files
ananth7592Copilot
andcommitted
LocationCache: Fixes read fallback to use WriteEndpoints[0] gated behind PPAF
When ExcludeRegions filters out all preferred read regions and PPAF (Partition Level Failover) is enabled, GetApplicableEndpoints now falls back to WriteEndpoints[0] (dynamic, tracks current write region) instead of this.defaultEndpoint (static, region-agnostic URI set once at init). The fix is gated behind isPartitionLevelFailoverEnabled (Func<bool>) wired from ConnectionPolicy.EnablePartitionLevelFailover through GlobalEndpointManager, supporting dynamic enablement per PR #5310. When PPAF is disabled, original behavior (defaultEndpoint fallback) is preserved. Fixes #5821 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 0830090 commit 038e757

3 files changed

Lines changed: 239 additions & 12 deletions

File tree

Microsoft.Azure.Cosmos/src/Routing/GlobalEndpointManager.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ public GlobalEndpointManager(
5858
owner.ServiceEndpoint,
5959
connectionPolicy.EnableEndpointDiscovery,
6060
connectionPolicy.MaxConnectionLimit,
61-
connectionPolicy.UseMultipleWriteLocations);
61+
connectionPolicy.UseMultipleWriteLocations,
62+
isPartitionLevelFailoverEnabled: () => connectionPolicy.EnablePartitionLevelFailover);
6263

6364
this.owner = owner;
6465
this.defaultEndpoint = owner.ServiceEndpoint;

Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ internal sealed class LocationCache
2929
private readonly int connectionLimit;
3030
private readonly ConcurrentDictionary<Uri, LocationUnavailabilityInfo> locationUnavailablityInfoByEndpoint;
3131
private readonly RegionNameMapper regionNameMapper;
32+
private readonly Func<bool> isPartitionLevelFailoverEnabled;
3233

3334
private DatabaseAccountLocationsInfo locationInfo;
3435
private DateTime lastCacheUpdateTimestamp;
@@ -39,13 +40,15 @@ public LocationCache(
3940
Uri defaultEndpoint,
4041
bool enableEndpointDiscovery,
4142
int connectionLimit,
42-
bool useMultipleWriteLocations)
43+
bool useMultipleWriteLocations,
44+
Func<bool> isPartitionLevelFailoverEnabled = null)
4345
{
4446
this.locationInfo = new DatabaseAccountLocationsInfo(preferredLocations, defaultEndpoint);
4547
this.defaultEndpoint = defaultEndpoint;
4648
this.enableEndpointDiscovery = enableEndpointDiscovery;
4749
this.useMultipleWriteLocations = useMultipleWriteLocations;
4850
this.connectionLimit = connectionLimit;
51+
this.isPartitionLevelFailoverEnabled = isPartitionLevelFailoverEnabled;
4952

5053
this.lockObject = new object();
5154
this.locationUnavailablityInfoByEndpoint = new ConcurrentDictionary<Uri, LocationUnavailabilityInfo>();
@@ -380,10 +383,18 @@ public ReadOnlyCollection<Uri> GetApplicableEndpoints(DocumentServiceRequest req
380383

381384
ReadOnlyCollection<string> effectivePreferredLocations = databaseAccountLocationsInfoSnapshot.EffectivePreferredLocations;
382385

386+
// For reads when PPAF is enabled, use WriteEndpoints[0] as fallback (dynamic,
387+
// tracks current write region) instead of this.defaultEndpoint (static, region-agnostic,
388+
// never updated after init). This aligns with UpdateLocationCache which already uses
389+
// WriteEndpoints[0] as the ReadEndpoints fallback, and matches Java/Python SDK behavior.
390+
Uri fallbackEndpoint = (isReadRequest && this.isPartitionLevelFailoverEnabled?.Invoke() == true)
391+
? databaseAccountLocationsInfoSnapshot.WriteEndpoints[0]
392+
: this.defaultEndpoint;
393+
383394
return GetApplicableEndpoints(
384-
isReadRequest ? this.locationInfo.AvailableReadEndpointByLocation : this.locationInfo.AvailableWriteEndpointByLocation,
395+
isReadRequest ? databaseAccountLocationsInfoSnapshot.AvailableReadEndpointByLocation : databaseAccountLocationsInfoSnapshot.AvailableWriteEndpointByLocation,
385396
effectivePreferredLocations,
386-
this.defaultEndpoint,
397+
fallbackEndpoint,
387398
request.RequestContext.ExcludeRegions);
388399
}
389400

Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs

Lines changed: 223 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1432,14 +1432,229 @@ public void VerifyRegionExcludedTest(
14321432
}
14331433

14341434
}
1435-
1436-
[TestMethod]
1437-
public void ValidateThinClientReadFallbackToWriteEndpointTest()
1438-
{
1439-
// Arrange:
1440-
Collection<AccountRegion> normalReads = new Collection<AccountRegion>()
1441-
{
1442-
new AccountRegion { Name = "ReadLocation", Endpoint = "https://readlocation.documents.azure.com" }
1435+
1436+
[TestMethod]
1437+
[Description("Validates that read fallback uses WriteEndpoints[0] when PPAF is enabled, and defaultEndpoint when PPAF is disabled. Regression test for issue #5821.")]
1438+
public void ValidateReadFallbackUsesWriteEndpointAfterHubSwitch()
1439+
{
1440+
// Arrange: Single-master account with two regions.
1441+
// Hub region (write) starts at "location1", read available at both "location1" and "location2".
1442+
Collection<AccountRegion> writeLocations = new Collection<AccountRegion>()
1443+
{
1444+
new AccountRegion { Name = "location1", Endpoint = LocationCacheTests.Location1Endpoint.ToString() },
1445+
};
1446+
1447+
Collection<AccountRegion> readLocations = new Collection<AccountRegion>()
1448+
{
1449+
new AccountRegion { Name = "location1", Endpoint = LocationCacheTests.Location1Endpoint.ToString() },
1450+
new AccountRegion { Name = "location2", Endpoint = LocationCacheTests.Location2Endpoint.ToString() },
1451+
};
1452+
1453+
AccountProperties initialAccount = new AccountProperties
1454+
{
1455+
ReadLocationsInternal = readLocations,
1456+
WriteLocationsInternal = writeLocations,
1457+
EnableMultipleWriteLocations = false,
1458+
};
1459+
1460+
// defaultEndpoint is region-agnostic (static, never updated)
1461+
Uri defaultEndpoint = new Uri("https://myaccount.documents.azure.com");
1462+
1463+
// PPAF enabled — read fallback should use WriteEndpoints[0]
1464+
LocationCache cache = new LocationCache(
1465+
preferredLocations: new List<string> { "location1" }.AsReadOnly(),
1466+
defaultEndpoint: defaultEndpoint,
1467+
enableEndpointDiscovery: true,
1468+
connectionLimit: 10,
1469+
useMultipleWriteLocations: false,
1470+
isPartitionLevelFailoverEnabled: () => true);
1471+
1472+
cache.OnDatabaseAccountRead(initialAccount);
1473+
1474+
// Act 1: Read with ExcludeRegions == preferred regions → all excluded → fallback to WriteEndpoints[0]
1475+
using (DocumentServiceRequest readRequest = DocumentServiceRequest.Create(
1476+
OperationType.Read,
1477+
ResourceType.Document,
1478+
AuthorizationTokenType.PrimaryMasterKey))
1479+
{
1480+
readRequest.RequestContext.ExcludeRegions = new List<string> { "location1" };
1481+
ReadOnlyCollection<Uri> endpoints = cache.GetApplicableEndpoints(readRequest, isReadRequest: true);
1482+
1483+
Assert.AreEqual(1, endpoints.Count);
1484+
Assert.AreEqual(
1485+
LocationCacheTests.Location1Endpoint,
1486+
endpoints[0],
1487+
"With PPAF enabled, read fallback should use WriteEndpoints[0], not defaultEndpoint.");
1488+
}
1489+
1490+
// Act 2: Simulate hub switch — write region moves from location1 to location2
1491+
Collection<AccountRegion> newWriteLocations = new Collection<AccountRegion>()
1492+
{
1493+
new AccountRegion { Name = "location2", Endpoint = LocationCacheTests.Location2Endpoint.ToString() },
1494+
};
1495+
1496+
Collection<AccountRegion> newReadLocations = new Collection<AccountRegion>()
1497+
{
1498+
new AccountRegion { Name = "location2", Endpoint = LocationCacheTests.Location2Endpoint.ToString() },
1499+
new AccountRegion { Name = "location1", Endpoint = LocationCacheTests.Location1Endpoint.ToString() },
1500+
};
1501+
1502+
AccountProperties updatedAccount = new AccountProperties
1503+
{
1504+
ReadLocationsInternal = newReadLocations,
1505+
WriteLocationsInternal = newWriteLocations,
1506+
EnableMultipleWriteLocations = false,
1507+
};
1508+
1509+
cache.OnDatabaseAccountRead(updatedAccount);
1510+
1511+
// Act 3: Same read after hub switch — WriteEndpoints[0] should now be location2
1512+
using (DocumentServiceRequest readRequest2 = DocumentServiceRequest.Create(
1513+
OperationType.Read,
1514+
ResourceType.Document,
1515+
AuthorizationTokenType.PrimaryMasterKey))
1516+
{
1517+
readRequest2.RequestContext.ExcludeRegions = new List<string> { "location1" };
1518+
ReadOnlyCollection<Uri> endpoints = cache.GetApplicableEndpoints(readRequest2, isReadRequest: true);
1519+
1520+
Assert.AreEqual(1, endpoints.Count);
1521+
Assert.AreEqual(
1522+
LocationCacheTests.Location2Endpoint,
1523+
endpoints[0],
1524+
"After hub switch, read fallback should track the new write region (location2).");
1525+
}
1526+
1527+
// Act 4: Verify write requests still use defaultEndpoint as fallback (unchanged)
1528+
using (DocumentServiceRequest writeRequest = DocumentServiceRequest.Create(
1529+
OperationType.Create,
1530+
ResourceType.Document,
1531+
AuthorizationTokenType.PrimaryMasterKey))
1532+
{
1533+
writeRequest.RequestContext.ExcludeRegions = new List<string> { "location1", "location2" };
1534+
ReadOnlyCollection<Uri> endpoints = cache.GetApplicableEndpoints(writeRequest, isReadRequest: false);
1535+
1536+
Assert.AreEqual(1, endpoints.Count);
1537+
Assert.AreEqual(
1538+
defaultEndpoint,
1539+
endpoints[0],
1540+
"Write fallback should still use defaultEndpoint.");
1541+
}
1542+
}
1543+
1544+
[TestMethod]
1545+
[Description("Validates that when PPAF is disabled, read fallback uses defaultEndpoint (original behavior).")]
1546+
public void ValidateReadFallbackUsesDefaultEndpointWhenPpafDisabled()
1547+
{
1548+
Collection<AccountRegion> writeLocations = new Collection<AccountRegion>()
1549+
{
1550+
new AccountRegion { Name = "location1", Endpoint = LocationCacheTests.Location1Endpoint.ToString() },
1551+
};
1552+
1553+
Collection<AccountRegion> readLocations = new Collection<AccountRegion>()
1554+
{
1555+
new AccountRegion { Name = "location1", Endpoint = LocationCacheTests.Location1Endpoint.ToString() },
1556+
new AccountRegion { Name = "location2", Endpoint = LocationCacheTests.Location2Endpoint.ToString() },
1557+
};
1558+
1559+
AccountProperties account = new AccountProperties
1560+
{
1561+
ReadLocationsInternal = readLocations,
1562+
WriteLocationsInternal = writeLocations,
1563+
EnableMultipleWriteLocations = false,
1564+
};
1565+
1566+
Uri defaultEndpoint = new Uri("https://myaccount.documents.azure.com");
1567+
1568+
// PPAF disabled — read fallback should use defaultEndpoint (original behavior)
1569+
LocationCache cache = new LocationCache(
1570+
preferredLocations: new List<string> { "location1" }.AsReadOnly(),
1571+
defaultEndpoint: defaultEndpoint,
1572+
enableEndpointDiscovery: true,
1573+
connectionLimit: 10,
1574+
useMultipleWriteLocations: false,
1575+
isPartitionLevelFailoverEnabled: () => false);
1576+
1577+
cache.OnDatabaseAccountRead(account);
1578+
1579+
using (DocumentServiceRequest readRequest = DocumentServiceRequest.Create(
1580+
OperationType.Read,
1581+
ResourceType.Document,
1582+
AuthorizationTokenType.PrimaryMasterKey))
1583+
{
1584+
readRequest.RequestContext.ExcludeRegions = new List<string> { "location1" };
1585+
ReadOnlyCollection<Uri> endpoints = cache.GetApplicableEndpoints(readRequest, isReadRequest: true);
1586+
1587+
Assert.AreEqual(1, endpoints.Count);
1588+
Assert.AreEqual(
1589+
defaultEndpoint,
1590+
endpoints[0],
1591+
"With PPAF disabled, read fallback should use defaultEndpoint.");
1592+
}
1593+
}
1594+
1595+
[TestMethod]
1596+
[Description("Validates dynamic PPAF toggle: behavior changes when PPAF is enabled/disabled at runtime.")]
1597+
public void ValidateReadFallbackReactsToDynamicPpafToggle()
1598+
{
1599+
Collection<AccountRegion> writeLocations = new Collection<AccountRegion>()
1600+
{
1601+
new AccountRegion { Name = "location1", Endpoint = LocationCacheTests.Location1Endpoint.ToString() },
1602+
};
1603+
1604+
Collection<AccountRegion> readLocations = new Collection<AccountRegion>()
1605+
{
1606+
new AccountRegion { Name = "location1", Endpoint = LocationCacheTests.Location1Endpoint.ToString() },
1607+
new AccountRegion { Name = "location2", Endpoint = LocationCacheTests.Location2Endpoint.ToString() },
1608+
};
1609+
1610+
AccountProperties account = new AccountProperties
1611+
{
1612+
ReadLocationsInternal = readLocations,
1613+
WriteLocationsInternal = writeLocations,
1614+
EnableMultipleWriteLocations = false,
1615+
};
1616+
1617+
Uri defaultEndpoint = new Uri("https://myaccount.documents.azure.com");
1618+
1619+
// Start with PPAF disabled, toggle dynamically
1620+
bool ppafEnabled = false;
1621+
LocationCache cache = new LocationCache(
1622+
preferredLocations: new List<string> { "location1" }.AsReadOnly(),
1623+
defaultEndpoint: defaultEndpoint,
1624+
enableEndpointDiscovery: true,
1625+
connectionLimit: 10,
1626+
useMultipleWriteLocations: false,
1627+
isPartitionLevelFailoverEnabled: () => ppafEnabled);
1628+
1629+
cache.OnDatabaseAccountRead(account);
1630+
1631+
// PPAF off → defaultEndpoint
1632+
using (DocumentServiceRequest req = DocumentServiceRequest.Create(
1633+
OperationType.Read, ResourceType.Document, AuthorizationTokenType.PrimaryMasterKey))
1634+
{
1635+
req.RequestContext.ExcludeRegions = new List<string> { "location1" };
1636+
ReadOnlyCollection<Uri> endpoints = cache.GetApplicableEndpoints(req, isReadRequest: true);
1637+
Assert.AreEqual(defaultEndpoint, endpoints[0], "PPAF off: should use defaultEndpoint.");
1638+
}
1639+
1640+
// Toggle PPAF on → WriteEndpoints[0]
1641+
ppafEnabled = true;
1642+
using (DocumentServiceRequest req = DocumentServiceRequest.Create(
1643+
OperationType.Read, ResourceType.Document, AuthorizationTokenType.PrimaryMasterKey))
1644+
{
1645+
req.RequestContext.ExcludeRegions = new List<string> { "location1" };
1646+
ReadOnlyCollection<Uri> endpoints = cache.GetApplicableEndpoints(req, isReadRequest: true);
1647+
Assert.AreEqual(LocationCacheTests.Location1Endpoint, endpoints[0], "PPAF on: should use WriteEndpoints[0].");
1648+
}
1649+
}
1650+
1651+
[TestMethod]
1652+
public void ValidateThinClientReadFallbackToWriteEndpointTest()
1653+
{
1654+
// Arrange:
1655+
Collection<AccountRegion> normalReads = new Collection<AccountRegion>()
1656+
{
1657+
new AccountRegion { Name = "ReadLocation", Endpoint = "https://readlocation.documents.azure.com" }
14431658
};
14441659

14451660
Collection<AccountRegion> normalWrites = new Collection<AccountRegion>()

0 commit comments

Comments
 (0)