From 038e75744c15377612cef850ffc11f5831c04319 Mon Sep 17 00:00:00 2001 From: Ananth Mudumba Date: Thu, 30 Apr 2026 16:20:17 -0700 Subject: [PATCH] LocationCache: Fixes read fallback to use WriteEndpoints[0] gated behind PPAF When ExcludeRegions filters out all preferred read regions and PPAF (Partition Level Failover) is enabled, GetApplicableEndpoints now falls back to WriteEndpoints[0] (dynamic, tracks current write region) instead of this.defaultEndpoint (static, region-agnostic URI set once at init). The fix is gated behind isPartitionLevelFailoverEnabled (Func) wired from ConnectionPolicy.EnablePartitionLevelFailover through GlobalEndpointManager, supporting dynamic enablement per PR #5310. When PPAF is disabled, original behavior (defaultEndpoint fallback) is preserved. Fixes #5821 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/Routing/GlobalEndpointManager.cs | 3 +- .../src/Routing/LocationCache.cs | 17 +- .../LocationCacheTests.cs | 231 +++++++++++++++++- 3 files changed, 239 insertions(+), 12 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Routing/GlobalEndpointManager.cs b/Microsoft.Azure.Cosmos/src/Routing/GlobalEndpointManager.cs index 7d58e1e1f0..8cadc2f6e8 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/GlobalEndpointManager.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/GlobalEndpointManager.cs @@ -58,7 +58,8 @@ public GlobalEndpointManager( owner.ServiceEndpoint, connectionPolicy.EnableEndpointDiscovery, connectionPolicy.MaxConnectionLimit, - connectionPolicy.UseMultipleWriteLocations); + connectionPolicy.UseMultipleWriteLocations, + isPartitionLevelFailoverEnabled: () => connectionPolicy.EnablePartitionLevelFailover); this.owner = owner; this.defaultEndpoint = owner.ServiceEndpoint; diff --git a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs index 11a1ea4f7b..6e41a6d70a 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/LocationCache.cs @@ -29,6 +29,7 @@ internal sealed class LocationCache private readonly int connectionLimit; private readonly ConcurrentDictionary locationUnavailablityInfoByEndpoint; private readonly RegionNameMapper regionNameMapper; + private readonly Func isPartitionLevelFailoverEnabled; private DatabaseAccountLocationsInfo locationInfo; private DateTime lastCacheUpdateTimestamp; @@ -39,13 +40,15 @@ public LocationCache( Uri defaultEndpoint, bool enableEndpointDiscovery, int connectionLimit, - bool useMultipleWriteLocations) + bool useMultipleWriteLocations, + Func isPartitionLevelFailoverEnabled = null) { this.locationInfo = new DatabaseAccountLocationsInfo(preferredLocations, defaultEndpoint); this.defaultEndpoint = defaultEndpoint; this.enableEndpointDiscovery = enableEndpointDiscovery; this.useMultipleWriteLocations = useMultipleWriteLocations; this.connectionLimit = connectionLimit; + this.isPartitionLevelFailoverEnabled = isPartitionLevelFailoverEnabled; this.lockObject = new object(); this.locationUnavailablityInfoByEndpoint = new ConcurrentDictionary(); @@ -380,10 +383,18 @@ public ReadOnlyCollection GetApplicableEndpoints(DocumentServiceRequest req ReadOnlyCollection effectivePreferredLocations = databaseAccountLocationsInfoSnapshot.EffectivePreferredLocations; + // For reads when PPAF is enabled, use WriteEndpoints[0] as fallback (dynamic, + // tracks current write region) instead of this.defaultEndpoint (static, region-agnostic, + // never updated after init). This aligns with UpdateLocationCache which already uses + // WriteEndpoints[0] as the ReadEndpoints fallback, and matches Java/Python SDK behavior. + Uri fallbackEndpoint = (isReadRequest && this.isPartitionLevelFailoverEnabled?.Invoke() == true) + ? databaseAccountLocationsInfoSnapshot.WriteEndpoints[0] + : this.defaultEndpoint; + return GetApplicableEndpoints( - isReadRequest ? this.locationInfo.AvailableReadEndpointByLocation : this.locationInfo.AvailableWriteEndpointByLocation, + isReadRequest ? databaseAccountLocationsInfoSnapshot.AvailableReadEndpointByLocation : databaseAccountLocationsInfoSnapshot.AvailableWriteEndpointByLocation, effectivePreferredLocations, - this.defaultEndpoint, + fallbackEndpoint, request.RequestContext.ExcludeRegions); } diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs index b8f85d4bef..797fb7b7f9 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/LocationCacheTests.cs @@ -1432,14 +1432,229 @@ public void VerifyRegionExcludedTest( } } - - [TestMethod] - public void ValidateThinClientReadFallbackToWriteEndpointTest() - { - // Arrange: - Collection normalReads = new Collection() - { - new AccountRegion { Name = "ReadLocation", Endpoint = "https://readlocation.documents.azure.com" } + + [TestMethod] + [Description("Validates that read fallback uses WriteEndpoints[0] when PPAF is enabled, and defaultEndpoint when PPAF is disabled. Regression test for issue #5821.")] + public void ValidateReadFallbackUsesWriteEndpointAfterHubSwitch() + { + // Arrange: Single-master account with two regions. + // Hub region (write) starts at "location1", read available at both "location1" and "location2". + Collection writeLocations = new Collection() + { + new AccountRegion { Name = "location1", Endpoint = LocationCacheTests.Location1Endpoint.ToString() }, + }; + + Collection readLocations = new Collection() + { + new AccountRegion { Name = "location1", Endpoint = LocationCacheTests.Location1Endpoint.ToString() }, + new AccountRegion { Name = "location2", Endpoint = LocationCacheTests.Location2Endpoint.ToString() }, + }; + + AccountProperties initialAccount = new AccountProperties + { + ReadLocationsInternal = readLocations, + WriteLocationsInternal = writeLocations, + EnableMultipleWriteLocations = false, + }; + + // defaultEndpoint is region-agnostic (static, never updated) + Uri defaultEndpoint = new Uri("https://myaccount.documents.azure.com"); + + // PPAF enabled — read fallback should use WriteEndpoints[0] + LocationCache cache = new LocationCache( + preferredLocations: new List { "location1" }.AsReadOnly(), + defaultEndpoint: defaultEndpoint, + enableEndpointDiscovery: true, + connectionLimit: 10, + useMultipleWriteLocations: false, + isPartitionLevelFailoverEnabled: () => true); + + cache.OnDatabaseAccountRead(initialAccount); + + // Act 1: Read with ExcludeRegions == preferred regions → all excluded → fallback to WriteEndpoints[0] + using (DocumentServiceRequest readRequest = DocumentServiceRequest.Create( + OperationType.Read, + ResourceType.Document, + AuthorizationTokenType.PrimaryMasterKey)) + { + readRequest.RequestContext.ExcludeRegions = new List { "location1" }; + ReadOnlyCollection endpoints = cache.GetApplicableEndpoints(readRequest, isReadRequest: true); + + Assert.AreEqual(1, endpoints.Count); + Assert.AreEqual( + LocationCacheTests.Location1Endpoint, + endpoints[0], + "With PPAF enabled, read fallback should use WriteEndpoints[0], not defaultEndpoint."); + } + + // Act 2: Simulate hub switch — write region moves from location1 to location2 + Collection newWriteLocations = new Collection() + { + new AccountRegion { Name = "location2", Endpoint = LocationCacheTests.Location2Endpoint.ToString() }, + }; + + Collection newReadLocations = new Collection() + { + new AccountRegion { Name = "location2", Endpoint = LocationCacheTests.Location2Endpoint.ToString() }, + new AccountRegion { Name = "location1", Endpoint = LocationCacheTests.Location1Endpoint.ToString() }, + }; + + AccountProperties updatedAccount = new AccountProperties + { + ReadLocationsInternal = newReadLocations, + WriteLocationsInternal = newWriteLocations, + EnableMultipleWriteLocations = false, + }; + + cache.OnDatabaseAccountRead(updatedAccount); + + // Act 3: Same read after hub switch — WriteEndpoints[0] should now be location2 + using (DocumentServiceRequest readRequest2 = DocumentServiceRequest.Create( + OperationType.Read, + ResourceType.Document, + AuthorizationTokenType.PrimaryMasterKey)) + { + readRequest2.RequestContext.ExcludeRegions = new List { "location1" }; + ReadOnlyCollection endpoints = cache.GetApplicableEndpoints(readRequest2, isReadRequest: true); + + Assert.AreEqual(1, endpoints.Count); + Assert.AreEqual( + LocationCacheTests.Location2Endpoint, + endpoints[0], + "After hub switch, read fallback should track the new write region (location2)."); + } + + // Act 4: Verify write requests still use defaultEndpoint as fallback (unchanged) + using (DocumentServiceRequest writeRequest = DocumentServiceRequest.Create( + OperationType.Create, + ResourceType.Document, + AuthorizationTokenType.PrimaryMasterKey)) + { + writeRequest.RequestContext.ExcludeRegions = new List { "location1", "location2" }; + ReadOnlyCollection endpoints = cache.GetApplicableEndpoints(writeRequest, isReadRequest: false); + + Assert.AreEqual(1, endpoints.Count); + Assert.AreEqual( + defaultEndpoint, + endpoints[0], + "Write fallback should still use defaultEndpoint."); + } + } + + [TestMethod] + [Description("Validates that when PPAF is disabled, read fallback uses defaultEndpoint (original behavior).")] + public void ValidateReadFallbackUsesDefaultEndpointWhenPpafDisabled() + { + Collection writeLocations = new Collection() + { + new AccountRegion { Name = "location1", Endpoint = LocationCacheTests.Location1Endpoint.ToString() }, + }; + + Collection readLocations = new Collection() + { + new AccountRegion { Name = "location1", Endpoint = LocationCacheTests.Location1Endpoint.ToString() }, + new AccountRegion { Name = "location2", Endpoint = LocationCacheTests.Location2Endpoint.ToString() }, + }; + + AccountProperties account = new AccountProperties + { + ReadLocationsInternal = readLocations, + WriteLocationsInternal = writeLocations, + EnableMultipleWriteLocations = false, + }; + + Uri defaultEndpoint = new Uri("https://myaccount.documents.azure.com"); + + // PPAF disabled — read fallback should use defaultEndpoint (original behavior) + LocationCache cache = new LocationCache( + preferredLocations: new List { "location1" }.AsReadOnly(), + defaultEndpoint: defaultEndpoint, + enableEndpointDiscovery: true, + connectionLimit: 10, + useMultipleWriteLocations: false, + isPartitionLevelFailoverEnabled: () => false); + + cache.OnDatabaseAccountRead(account); + + using (DocumentServiceRequest readRequest = DocumentServiceRequest.Create( + OperationType.Read, + ResourceType.Document, + AuthorizationTokenType.PrimaryMasterKey)) + { + readRequest.RequestContext.ExcludeRegions = new List { "location1" }; + ReadOnlyCollection endpoints = cache.GetApplicableEndpoints(readRequest, isReadRequest: true); + + Assert.AreEqual(1, endpoints.Count); + Assert.AreEqual( + defaultEndpoint, + endpoints[0], + "With PPAF disabled, read fallback should use defaultEndpoint."); + } + } + + [TestMethod] + [Description("Validates dynamic PPAF toggle: behavior changes when PPAF is enabled/disabled at runtime.")] + public void ValidateReadFallbackReactsToDynamicPpafToggle() + { + Collection writeLocations = new Collection() + { + new AccountRegion { Name = "location1", Endpoint = LocationCacheTests.Location1Endpoint.ToString() }, + }; + + Collection readLocations = new Collection() + { + new AccountRegion { Name = "location1", Endpoint = LocationCacheTests.Location1Endpoint.ToString() }, + new AccountRegion { Name = "location2", Endpoint = LocationCacheTests.Location2Endpoint.ToString() }, + }; + + AccountProperties account = new AccountProperties + { + ReadLocationsInternal = readLocations, + WriteLocationsInternal = writeLocations, + EnableMultipleWriteLocations = false, + }; + + Uri defaultEndpoint = new Uri("https://myaccount.documents.azure.com"); + + // Start with PPAF disabled, toggle dynamically + bool ppafEnabled = false; + LocationCache cache = new LocationCache( + preferredLocations: new List { "location1" }.AsReadOnly(), + defaultEndpoint: defaultEndpoint, + enableEndpointDiscovery: true, + connectionLimit: 10, + useMultipleWriteLocations: false, + isPartitionLevelFailoverEnabled: () => ppafEnabled); + + cache.OnDatabaseAccountRead(account); + + // PPAF off → defaultEndpoint + using (DocumentServiceRequest req = DocumentServiceRequest.Create( + OperationType.Read, ResourceType.Document, AuthorizationTokenType.PrimaryMasterKey)) + { + req.RequestContext.ExcludeRegions = new List { "location1" }; + ReadOnlyCollection endpoints = cache.GetApplicableEndpoints(req, isReadRequest: true); + Assert.AreEqual(defaultEndpoint, endpoints[0], "PPAF off: should use defaultEndpoint."); + } + + // Toggle PPAF on → WriteEndpoints[0] + ppafEnabled = true; + using (DocumentServiceRequest req = DocumentServiceRequest.Create( + OperationType.Read, ResourceType.Document, AuthorizationTokenType.PrimaryMasterKey)) + { + req.RequestContext.ExcludeRegions = new List { "location1" }; + ReadOnlyCollection endpoints = cache.GetApplicableEndpoints(req, isReadRequest: true); + Assert.AreEqual(LocationCacheTests.Location1Endpoint, endpoints[0], "PPAF on: should use WriteEndpoints[0]."); + } + } + + [TestMethod] + public void ValidateThinClientReadFallbackToWriteEndpointTest() + { + // Arrange: + Collection normalReads = new Collection() + { + new AccountRegion { Name = "ReadLocation", Endpoint = "https://readlocation.documents.azure.com" } }; Collection normalWrites = new Collection()