diff --git a/Microsoft.Azure.Cosmos/src/Routing/GatewayAddressCache.cs b/Microsoft.Azure.Cosmos/src/Routing/GatewayAddressCache.cs index 1df09fa6b5..1693b874fe 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/GatewayAddressCache.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/GatewayAddressCache.cs @@ -36,6 +36,18 @@ internal class GatewayAddressCache : IAddressCache, IDisposable // value for this timeout is 45 minutes at the moment. private static readonly TimeSpan WarmupCacheAndOpenConnectionTimeout = TimeSpan.FromMinutes(45); + /// + /// Opt-in test-only invariant: when true and a forced address-cache + /// refresh is emitted with an reason, + /// and + /// throw + /// . Tests set this to true via + /// [AssemblyInitialize] so any future force-refresh call-site that + /// forgets to tag its cause is caught automatically. Default false: + /// zero production overhead. + /// + internal static bool ValidateRefreshReasonPresence; + private readonly Uri serviceEndpoint; private readonly Uri addressEndpoint; @@ -237,6 +249,10 @@ public async Task TryGetAddressesAsync( if (forceRefreshDueToSuboptimalPartitionReplicaSet && this.suboptimalServerPartitionTimestamps.TryUpdate(partitionKeyRangeIdentity, DateTime.MaxValue, suboptimalServerPartitionTimestamp)) { forceRefreshPartitionAddresses = true; + if (request.RequestContext.RefreshReason == RefreshReason.Unspecified) + { + request.RequestContext.RefreshReason = RefreshReason.InsufficientReplicasSuboptimalTimer; + } } } @@ -330,7 +346,8 @@ public async Task TryGetAddressesAsync( cachedAddresses: currentCachedValue, partitionKeyRangeIdentity.CollectionRid, partitionKeyRangeIdentity.PartitionKeyRangeId, - forceRefresh: true)); + forceRefresh: true, + explicitReason: RefreshReason.ReplicaHealthUnhealthyLongLived)); } else { @@ -589,13 +606,20 @@ private async Task int targetReplicaSetSize = this.serviceConfigReader.SystemReplicationPolicy.MaxReplicaSetSize; - forceRefresh = forceRefresh || - (masterAddressAndRange != null && + bool masterSuboptimalTriggered = + masterAddressAndRange != null && masterAddressAndRange.Item2.AllAddresses.Count() < targetReplicaSetSize && - DateTime.UtcNow.Subtract(this.suboptimalMasterPartitionTimestamp) > TimeSpan.FromSeconds(this.suboptimalPartitionForceRefreshIntervalInSeconds)); + DateTime.UtcNow.Subtract(this.suboptimalMasterPartitionTimestamp) > TimeSpan.FromSeconds(this.suboptimalPartitionForceRefreshIntervalInSeconds); + + forceRefresh = forceRefresh || masterSuboptimalTriggered; if (forceRefresh || request.ForceCollectionRoutingMapRefresh || this.masterPartitionAddressCache == null) { + if (masterSuboptimalTriggered && request.RequestContext.RefreshReason == RefreshReason.Unspecified) + { + request.RequestContext.RefreshReason = RefreshReason.InsufficientReplicasSuboptimalTimer; + } + string entryUrl = PathsHelper.GeneratePath( ResourceType.Database, string.Empty, @@ -640,10 +664,11 @@ private async Task GetAddressesForRangeIdAsync( PartitionAddressInformation cachedAddresses, string collectionRid, string partitionKeyRangeId, - bool forceRefresh) + bool forceRefresh, + RefreshReason explicitReason = RefreshReason.Unspecified) { using (DocumentServiceResponse response = - await this.GetServerAddressesViaGatewayAsync(request, collectionRid, new[] { partitionKeyRangeId }, forceRefresh)) + await this.GetServerAddressesViaGatewayAsync(request, collectionRid, new[] { partitionKeyRangeId }, forceRefresh, explicitReason)) { FeedResource
addressFeed = response.GetResource>(); @@ -706,13 +731,51 @@ await this.GetServerAddressesViaGatewayAsync(request, collectionRid, new[] { par } } + /// + /// Resolves the effective for a forced + /// address-cache egress and writes the x-ms-cosmos-refresh-reason + /// header. Called only when forceRefresh=true. + /// + /// Precedence: (if non-Unspecified) wins + /// over request.RequestContext.RefreshReason. When both are + /// Unspecified and is enabled, + /// throws so that any untagged + /// force-refresh site is caught in tests. + /// + internal static void EmitRefreshReasonHeader( + INameValueCollection headers, + DocumentServiceRequest request, + RefreshReason explicitReason, + string callerName) + { + RefreshReason effective = explicitReason != RefreshReason.Unspecified + ? explicitReason + : request?.RequestContext?.RefreshReason ?? RefreshReason.Unspecified; + + if (effective == RefreshReason.Unspecified) + { + if (GatewayAddressCache.ValidateRefreshReasonPresence) + { + throw new InvalidOperationException( + $"{callerName} was invoked with forceRefresh=true but no RefreshReason was set. " + + $"Every forced address-cache refresh must be tagged via DocumentServiceRequestContext.RefreshReason " + + $"or an explicitReason argument so the gateway can attribute the cause."); + } + + return; + } + + headers.Set(HttpConstants.HttpHeaders.CosmosRefreshReason, effective.ToHeaderValue()); + } + private async Task GetMasterAddressesViaGatewayAsync( DocumentServiceRequest request, ResourceType resourceType, string resourceAddress, string entryUrl, bool forceRefresh, - bool useMasterCollectionResolver) + bool useMasterCollectionResolver, + RefreshReason explicitReason = RefreshReason.Unspecified) { INameValueCollection addressQuery = new RequestNameValueCollection { @@ -723,6 +786,11 @@ private async Task GetMasterAddressesViaGatewayAsync( if (forceRefresh) { headers.Set(HttpConstants.HttpHeaders.ForceRefresh, bool.TrueString); + GatewayAddressCache.EmitRefreshReasonHeader( + headers: headers, + request: request, + explicitReason: explicitReason, + callerName: nameof(GetMasterAddressesViaGatewayAsync)); } if (useMasterCollectionResolver) @@ -799,7 +867,8 @@ private async Task GetServerAddressesViaGatewayAsync( DocumentServiceRequest request, string collectionRid, IEnumerable partitionKeyRangeIds, - bool forceRefresh) + bool forceRefresh, + RefreshReason explicitReason = RefreshReason.Unspecified) { string entryUrl = PathsHelper.GeneratePath(ResourceType.Document, collectionRid, true); @@ -812,6 +881,11 @@ private async Task GetServerAddressesViaGatewayAsync( if (forceRefresh) { headers.Set(HttpConstants.HttpHeaders.ForceRefresh, bool.TrueString); + GatewayAddressCache.EmitRefreshReasonHeader( + headers: headers, + request: request, + explicitReason: explicitReason, + callerName: nameof(GetServerAddressesViaGatewayAsync)); } if (request != null && request.ForceCollectionRoutingMapRefresh) diff --git a/Microsoft.Azure.Cosmos/src/Routing/RefreshReason.cs b/Microsoft.Azure.Cosmos/src/Routing/RefreshReason.cs new file mode 100644 index 0000000000..c5f88b18b5 --- /dev/null +++ b/Microsoft.Azure.Cosmos/src/Routing/RefreshReason.cs @@ -0,0 +1,70 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.Routing +{ + /// + /// A design-time-bounded, closed enumeration of reasons the SDK performs a + /// forced cache refresh. The value is emitted on the wire as the + /// x-ms-cosmos-refresh-reason header so the service can attribute + /// each forced refresh to its true cause. + /// + /// The enum is deliberately GENERIC (not address-cache specific). Existing + /// values correspond to address-cache force-refresh paths. Future values + /// may be added for other forced-refresh egress paths (partition-key-range + /// cache ChangeFeed forward, collection-routing-map refresh, etc.). + /// + /// Naming convention for wire values: two dot-separated segments, + /// <cache_or_surface>.<subcause>. Never three segments. + /// + /// When adding a new value: + /// - Give it an explicit integer value. Do not reuse retired values. + /// - Add an entry in . + /// - If it is driven by a TransportErrorCode, wire it into + /// . + /// + internal enum RefreshReason + { + // Sentinel. Must never appear on the wire in production once all call + // sites are tagged. The opt-in validator enforces this in tests. + Unspecified = 0, + + // ---- Group A: real 410 from the server (no transport synthesis) ---- + GoneServer = 1, + + // ---- Group B: Gone with server-provided substatus (routing-topology changes) ---- + // NOTE: these typically drive a PK-range / collection-cache refresh + // rather than an address-cache refresh; they are pre-positioned here + // because the enum is generic and will tag PK-range egress too. + GoneCompletingSplit = 2, + GoneCompletingPartitionMigration = 3, + GoneNameCacheStale = 4, + GonePartitionKeyRangeGone = 5, + + // ---- Group C: Gone synthesized by the SDK's transport layer ---- + // Pairs of (*Failed, *Timeout) are intentionally collapsed because the + // gateway's reaction is the same. + GoneUnknown = 6, // TransportErrorCode.Unknown, ChannelOpenFailed, ChannelOpenTimeout, RequestTimeout + GoneDnsResolution = 7, // DnsResolutionFailed, DnsResolutionTimeout + GoneConnect = 8, // ConnectFailed, ConnectTimeout + GoneSslNegotiation = 9, // SslNegotiationFailed, SslNegotiationTimeout + GoneNegotiationTimeout = 10, // TransportNegotiationTimeout + GoneChannelMultiplexerClosed = 11, // ChannelMultiplexerClosed + GoneSend = 12, // SendFailed, SendTimeout + GoneSendLockTimeout = 13, // SendLockTimeout (client-side lock contention) + GoneReceive = 14, // ReceiveFailed, ReceiveTimeout + GoneReceiveStreamClosed = 15, // ReceiveStreamClosed (server clean close while awaiting response) + GoneConnectionBroken = 16, // ConnectionBroken + GoneChannelWaitingToOpenTimeout = 17, // ChannelWaitingToOpenTimeout (slot-wait saturation) + GoneWriteNotSent = 18, // DocumentServiceRequest.UserRequestSent == false on write-path Gone synthesis + + // ---- Group D: forced refresh NOT driven by a Gone ---- + InsufficientReplicasQuorum = 19, // StoreReader decided replica-set too small for consistency + InsufficientReplicasSuboptimalTimer = 20, // 10-minute suboptimal-replica-set timer + ReplicaHealthUnhealthyLongLived = 21, // on-demand revalidation of a URI unhealthy >= 1 minute + ConnectionEventServerClosed = 22, // Dispatcher.RaiseConnectionEvent -> ReadEof / ReadFailure + + // Insert new values above this comment with the next integer value. + } +} diff --git a/Microsoft.Azure.Cosmos/src/Routing/RefreshReasonExtensions.cs b/Microsoft.Azure.Cosmos/src/Routing/RefreshReasonExtensions.cs new file mode 100644 index 0000000000..a7629fc558 --- /dev/null +++ b/Microsoft.Azure.Cosmos/src/Routing/RefreshReasonExtensions.cs @@ -0,0 +1,211 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.Routing +{ + using System; + using System.Collections.Generic; + using Microsoft.Azure.Documents; + + /// + /// Wire-format helpers for . + /// + /// The dictionary is the single source of truth + /// for mapping an enum value to its header wire value. Every member of + /// MUST have an entry here; this is enforced + /// by unit tests (enum-coverage test). + /// + internal static class RefreshReasonExtensions + { + /// + /// Maps each enum member to the exact + /// literal string emitted on the wire in the + /// x-ms-cosmos-refresh-reason header. + /// + /// Wire values are two dot-segments at most. Do not add a third + /// segment or substring interpolation; values must be known at + /// design time. + /// + public static readonly IReadOnlyDictionary WireValues = + new Dictionary + { + { RefreshReason.Unspecified, "unspecified" }, + { RefreshReason.GoneServer, "gone.server" }, + { RefreshReason.GoneCompletingSplit, "gone.completing_split" }, + { RefreshReason.GoneCompletingPartitionMigration, "gone.completing_partition_migration" }, + { RefreshReason.GoneNameCacheStale, "gone.name_cache_stale" }, + { RefreshReason.GonePartitionKeyRangeGone, "gone.partition_key_range_gone" }, + { RefreshReason.GoneUnknown, "gone.unknown" }, + { RefreshReason.GoneDnsResolution, "gone.dns_resolution" }, + { RefreshReason.GoneConnect, "gone.connect" }, + { RefreshReason.GoneSslNegotiation, "gone.ssl_negotiation" }, + { RefreshReason.GoneNegotiationTimeout, "gone.negotiation_timeout" }, + { RefreshReason.GoneChannelMultiplexerClosed, "gone.channel_multiplexer_closed" }, + { RefreshReason.GoneSend, "gone.send" }, + { RefreshReason.GoneSendLockTimeout, "gone.send_lock_timeout" }, + { RefreshReason.GoneReceive, "gone.receive" }, + { RefreshReason.GoneReceiveStreamClosed, "gone.receive_stream_closed" }, + { RefreshReason.GoneConnectionBroken, "gone.connection_broken" }, + { RefreshReason.GoneChannelWaitingToOpenTimeout, "gone.channel_waiting_to_open_timeout" }, + { RefreshReason.GoneWriteNotSent, "gone.write_not_sent" }, + { RefreshReason.InsufficientReplicasQuorum, "InsufficientReplicas.Quorum" }, + { RefreshReason.InsufficientReplicasSuboptimalTimer, "InsufficientReplicas.SuboptimalTimer" }, + { RefreshReason.ReplicaHealthUnhealthyLongLived, "ReplicaHealth.unhealthyLongLived" }, + { RefreshReason.ConnectionEventServerClosed, "connection_event.server_closed" }, + }; + + /// + /// Returns the wire string for the given reason. Throws if the enum + /// value is missing from — this is a design + /// invariant enforced by tests. + /// + public static string ToHeaderValue(this RefreshReason reason) + { + if (WireValues.TryGetValue(reason, out string value)) + { + return value; + } + + throw new ArgumentOutOfRangeException( + nameof(reason), + reason, + $"No wire value registered for RefreshReason.{reason}. Add an entry in RefreshReasonExtensions.WireValues."); + } + + /// + /// Maps a (from an inner + /// TransportException inside a Gone) to the most specific + /// . + /// + /// Codes explicitly documented as generic/default catch-alls + /// (, + /// , + /// , + /// ) fold into + /// . + /// + public static RefreshReason FromTransportErrorCode(TransportErrorCode code) + { + if (TryMapKnownTransportErrorCode(code, out RefreshReason reason)) + { + return reason; + } + + // A new TransportErrorCode was added upstream without updating + // the switch below. Fall back to GoneUnknown so the gateway still + // gets *a* reason; the exhaustive test in + // RefreshReasonFormatterTests will fail in CI and prompt a fix. + return RefreshReason.GoneUnknown; + } + + /// + /// Explicit-case mapping for . Returns + /// false for codes that are not explicitly handled — this is + /// how the exhaustive test detects new upstream codes (a silent + /// fall-through to in the + /// public would hide them). + /// + internal static bool TryMapKnownTransportErrorCode(TransportErrorCode code, out RefreshReason reason) + { + switch (code) + { + case TransportErrorCode.Unknown: + case TransportErrorCode.ChannelOpenFailed: + case TransportErrorCode.ChannelOpenTimeout: + case TransportErrorCode.RequestTimeout: + reason = RefreshReason.GoneUnknown; + return true; + + case TransportErrorCode.DnsResolutionFailed: + case TransportErrorCode.DnsResolutionTimeout: + reason = RefreshReason.GoneDnsResolution; + return true; + + case TransportErrorCode.ConnectFailed: + case TransportErrorCode.ConnectTimeout: + reason = RefreshReason.GoneConnect; + return true; + + case TransportErrorCode.SslNegotiationFailed: + case TransportErrorCode.SslNegotiationTimeout: + reason = RefreshReason.GoneSslNegotiation; + return true; + + case TransportErrorCode.TransportNegotiationTimeout: + reason = RefreshReason.GoneNegotiationTimeout; + return true; + + case TransportErrorCode.ChannelMultiplexerClosed: + reason = RefreshReason.GoneChannelMultiplexerClosed; + return true; + + case TransportErrorCode.SendFailed: + case TransportErrorCode.SendTimeout: + reason = RefreshReason.GoneSend; + return true; + + case TransportErrorCode.SendLockTimeout: + reason = RefreshReason.GoneSendLockTimeout; + return true; + + case TransportErrorCode.ReceiveFailed: + case TransportErrorCode.ReceiveTimeout: + reason = RefreshReason.GoneReceive; + return true; + + case TransportErrorCode.ReceiveStreamClosed: + reason = RefreshReason.GoneReceiveStreamClosed; + return true; + + case TransportErrorCode.ConnectionBroken: + reason = RefreshReason.GoneConnectionBroken; + return true; + + case TransportErrorCode.ChannelWaitingToOpenTimeout: + reason = RefreshReason.GoneChannelWaitingToOpenTimeout; + return true; + + default: + reason = RefreshReason.Unspecified; + return false; + } + } + + /// + /// Classifies a Gone surfaced on a prior into + /// the most specific . Called by + /// StoreReader right before it flips + /// ForceRefreshAddressCache on the retry, so the outgoing + /// /addresses request carries the originating cause. + /// Preference order: inner TransportException (transport-synth + /// 410) > server substatus > . + /// + public static RefreshReason ClassifyGoneFromException(Exception exception, SubStatusCodes subStatusCode) + { + // Walk inner-exception chain for a TransportException; transport- + // synth 410 always has one. + for (Exception current = exception; current != null; current = current.InnerException) + { + if (current is TransportException transportException) + { + return FromTransportErrorCode(transportException.ErrorCode); + } + } + + switch (subStatusCode) + { + case SubStatusCodes.CompletingSplit: + return RefreshReason.GoneCompletingSplit; + case SubStatusCodes.CompletingPartitionMigration: + return RefreshReason.GoneCompletingPartitionMigration; + case SubStatusCodes.NameCacheIsStale: + return RefreshReason.GoneNameCacheStale; + case SubStatusCodes.PartitionKeyRangeGone: + return RefreshReason.GonePartitionKeyRangeGone; + default: + return RefreshReason.GoneServer; + } + } + } +} diff --git a/Microsoft.Azure.Cosmos/src/direct/ConsistencyWriter.cs b/Microsoft.Azure.Cosmos/src/direct/ConsistencyWriter.cs index 1bdd8e8e56..d056f43193 100644 --- a/Microsoft.Azure.Cosmos/src/direct/ConsistencyWriter.cs +++ b/Microsoft.Azure.Cosmos/src/direct/ConsistencyWriter.cs @@ -737,6 +737,7 @@ private async Task TryPrimaryOnlyWriteBarrierAsync( // Always force refresh before hitting primary to avoid stale primary selection barrierRequest.RequestContext.ForceRefreshAddressCache = true; + barrierRequest.RequestContext.RefreshReason = Microsoft.Azure.Cosmos.Routing.RefreshReason.InsufficientReplicasQuorum; using (ReferenceCountedDisposable primaryResult = await this.storeReader.ReadPrimaryAsync( barrierRequest, requiresValidLsn: false, diff --git a/Microsoft.Azure.Cosmos/src/direct/DocumentServiceRequestContext.cs b/Microsoft.Azure.Cosmos/src/direct/DocumentServiceRequestContext.cs index 22eaedf552..9508bef30e 100644 --- a/Microsoft.Azure.Cosmos/src/direct/DocumentServiceRequestContext.cs +++ b/Microsoft.Azure.Cosmos/src/direct/DocumentServiceRequestContext.cs @@ -20,6 +20,20 @@ internal sealed class DocumentServiceRequestContext public bool ForceRefreshAddressCache { get; set; } + /// + /// When a forced cache refresh is performed (address cache today, + /// partition-key-range cache and others in future), this carries the + /// reason for attribution. It is serialized on the wire via the + /// x-ms-cosmos-refresh-reason header by + /// . + /// + /// Defaults to . + /// Call sites that flip (or + /// any other force-refresh flag) must also set this property. + /// + public Microsoft.Azure.Cosmos.Routing.RefreshReason RefreshReason { get; set; } + = Microsoft.Azure.Cosmos.Routing.RefreshReason.Unspecified; + /// /// PartitionAddressInformation hash code is used in the cache /// refresh scenarios to avoid doing a refresh when another @@ -236,6 +250,7 @@ public DocumentServiceRequestContext Clone() requestContext.TimeoutHelper = this.TimeoutHelper; requestContext.RequestChargeTracker = this.RequestChargeTracker; requestContext.ForceRefreshAddressCache = this.ForceRefreshAddressCache; + requestContext.RefreshReason = this.RefreshReason; requestContext.TargetIdentity = this.TargetIdentity; requestContext.PerformLocalRefreshOnGoneException = this.PerformLocalRefreshOnGoneException; requestContext.SessionToken = this.SessionToken; diff --git a/Microsoft.Azure.Cosmos/src/direct/HttpConstants.cs b/Microsoft.Azure.Cosmos/src/direct/HttpConstants.cs index 00fd2edc1e..447064bf74 100644 Binary files a/Microsoft.Azure.Cosmos/src/direct/HttpConstants.cs and b/Microsoft.Azure.Cosmos/src/direct/HttpConstants.cs differ diff --git a/Microsoft.Azure.Cosmos/src/direct/QuorumReader.cs b/Microsoft.Azure.Cosmos/src/direct/QuorumReader.cs index 24e69cde82..0a84938379 100644 --- a/Microsoft.Azure.Cosmos/src/direct/QuorumReader.cs +++ b/Microsoft.Azure.Cosmos/src/direct/QuorumReader.cs @@ -982,6 +982,7 @@ private async Task TryPrimaryOnlyReadBarrierAsync( { // Always force refresh before hitting primary to avoid stale primary selection barrierRequest.RequestContext.ForceRefreshAddressCache = true; + barrierRequest.RequestContext.RefreshReason = Microsoft.Azure.Cosmos.Routing.RefreshReason.InsufficientReplicasQuorum; using (ReferenceCountedDisposable primaryResult = await this.storeReader.ReadPrimaryAsync( barrierRequest, requiresValidLsn: requiresValidLsn, diff --git a/Microsoft.Azure.Cosmos/src/direct/StoreReader.cs b/Microsoft.Azure.Cosmos/src/direct/StoreReader.cs index f5aca3fc7c..1f5218f108 100644 --- a/Microsoft.Azure.Cosmos/src/direct/StoreReader.cs +++ b/Microsoft.Azure.Cosmos/src/direct/StoreReader.cs @@ -10,6 +10,7 @@ namespace Microsoft.Azure.Documents using System.Runtime.CompilerServices; using System.Threading.Tasks; using Microsoft.Azure.Cosmos.Core.Trace; + using Microsoft.Azure.Cosmos.Routing; internal sealed class StoreReader { @@ -77,6 +78,7 @@ public async Task>> ReadMultipleRe { entity.RequestContext.TimeoutHelper.ThrowGoneIfElapsed(); + entity.RequestContext.RefreshReason = StoreReader.ClassifyPriorGone(readQuorumResult); entity.RequestContext.ForceRefreshAddressCache = true; using ReadReplicaResult readQuorumResultSecondCall = await this.ReadMultipleReplicasInternalAsync( entity, @@ -119,6 +121,7 @@ public async Task> ReadPrimaryAsync( !entity.RequestContext.ForceRefreshAddressCache) { entity.RequestContext.TimeoutHelper.ThrowGoneIfElapsed(); + entity.RequestContext.RefreshReason = StoreReader.ClassifyPriorGone(readQuorumResult); entity.RequestContext.ForceRefreshAddressCache = true; using ReadReplicaResult readQuorumResultSecondCall = await this.ReadPrimaryInternalAsync( entity, @@ -147,6 +150,31 @@ private static ReferenceCountedDisposable GetStoreResultOrThrowGone return storeResultList.GetFirstStoreResultAndDereference(); } + /// + /// Classifies the Gone that caused + /// to set RetryWithForceRefresh = true so the subsequent + /// /addresses egress can carry the originating cause in the + /// x-ms-cosmos-refresh-reason header. Falls back to + /// if no Gone StoreResult is + /// present (e.g., empty primary-read result with retry-flag set). + /// + private static RefreshReason ClassifyPriorGone(ReadReplicaResult readReplicaResult) + { + IList> results = readReplicaResult.StoreResultList.GetValue(); + foreach (ReferenceCountedDisposable disposable in results) + { + StoreResult storeResult = disposable?.Target; + if (storeResult == null || storeResult.StatusCode != StatusCodes.Gone) + { + continue; + } + + return RefreshReasonExtensions.ClassifyGoneFromException(storeResult.Exception, storeResult.SubStatusCode); + } + + return RefreshReason.GoneUnknown; + } + /// /// Makes requests to multiple replicas at once and returns responses /// diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Routing/RefreshReasonEmissionTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Routing/RefreshReasonEmissionTests.cs new file mode 100644 index 0000000000..3198780bf2 --- /dev/null +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Routing/RefreshReasonEmissionTests.cs @@ -0,0 +1,171 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.Tests.Routing +{ + using System; + using Microsoft.Azure.Cosmos.Routing; + using Microsoft.Azure.Documents; + using Microsoft.Azure.Documents.Collections; + using Microsoft.VisualStudio.TestTools.UnitTesting; + + /// + /// Tests for : + /// verifies precedence (explicit > request context > default), header + /// emission, and the opt-in validator invariant that guards untagged + /// force-refresh sites. + /// + [TestClass] + public class RefreshReasonEmissionTests + { + [TestMethod] + public void Emit_ExplicitReason_WinsOverRequestContext() + { + INameValueCollection headers = new RequestNameValueCollection(); + using DocumentServiceRequest request = DocumentServiceRequest.Create( + OperationType.Read, + ResourceType.Document, + AuthorizationTokenType.PrimaryMasterKey); + request.RequestContext.RefreshReason = RefreshReason.GoneServer; + + GatewayAddressCache.EmitRefreshReasonHeader( + headers: headers, + request: request, + explicitReason: RefreshReason.ReplicaHealthUnhealthyLongLived, + callerName: nameof(this.Emit_ExplicitReason_WinsOverRequestContext)); + + Assert.AreEqual( + "ReplicaHealth.unhealthyLongLived", + headers.Get(HttpConstants.HttpHeaders.CosmosRefreshReason)); + } + + [TestMethod] + public void Emit_NoExplicit_UsesRequestContextReason() + { + INameValueCollection headers = new RequestNameValueCollection(); + using DocumentServiceRequest request = DocumentServiceRequest.Create( + OperationType.Read, + ResourceType.Document, + AuthorizationTokenType.PrimaryMasterKey); + request.RequestContext.RefreshReason = RefreshReason.GoneConnect; + + GatewayAddressCache.EmitRefreshReasonHeader( + headers: headers, + request: request, + explicitReason: RefreshReason.Unspecified, + callerName: nameof(this.Emit_NoExplicit_UsesRequestContextReason)); + + Assert.AreEqual( + "gone.connect", + headers.Get(HttpConstants.HttpHeaders.CosmosRefreshReason)); + } + + [TestMethod] + public void Emit_NullRequest_ExplicitReasonWrites() + { + // The on-demand unhealthy-URI refresh path calls with request=null. + INameValueCollection headers = new RequestNameValueCollection(); + + GatewayAddressCache.EmitRefreshReasonHeader( + headers: headers, + request: null, + explicitReason: RefreshReason.ReplicaHealthUnhealthyLongLived, + callerName: nameof(this.Emit_NullRequest_ExplicitReasonWrites)); + + Assert.AreEqual( + "ReplicaHealth.unhealthyLongLived", + headers.Get(HttpConstants.HttpHeaders.CosmosRefreshReason)); + } + + [TestMethod] + public void Emit_BothUnspecified_ValidatorOff_NoHeader() + { + // Default behavior: when nothing is tagged, no header is written + // (zero production overhead, graceful degradation). + bool previous = GatewayAddressCache.ValidateRefreshReasonPresence; + try + { + GatewayAddressCache.ValidateRefreshReasonPresence = false; + INameValueCollection headers = new RequestNameValueCollection(); + using DocumentServiceRequest request = DocumentServiceRequest.Create( + OperationType.Read, + ResourceType.Document, + AuthorizationTokenType.PrimaryMasterKey); + + GatewayAddressCache.EmitRefreshReasonHeader( + headers: headers, + request: request, + explicitReason: RefreshReason.Unspecified, + callerName: nameof(this.Emit_BothUnspecified_ValidatorOff_NoHeader)); + + Assert.IsNull(headers.Get(HttpConstants.HttpHeaders.CosmosRefreshReason)); + } + finally + { + GatewayAddressCache.ValidateRefreshReasonPresence = previous; + } + } + + [TestMethod] + public void Emit_BothUnspecified_ValidatorOn_Throws() + { + // Opt-in invariant: any forced address-cache refresh without a + // tagged reason must throw, so new untagged call sites are caught + // automatically in CI. + bool previous = GatewayAddressCache.ValidateRefreshReasonPresence; + try + { + GatewayAddressCache.ValidateRefreshReasonPresence = true; + INameValueCollection headers = new RequestNameValueCollection(); + using DocumentServiceRequest request = DocumentServiceRequest.Create( + OperationType.Read, + ResourceType.Document, + AuthorizationTokenType.PrimaryMasterKey); + + InvalidOperationException ex = Assert.ThrowsException( + () => GatewayAddressCache.EmitRefreshReasonHeader( + headers: headers, + request: request, + explicitReason: RefreshReason.Unspecified, + callerName: "TestCaller")); + + StringAssert.Contains(ex.Message, "TestCaller"); + StringAssert.Contains(ex.Message, "RefreshReason"); + } + finally + { + GatewayAddressCache.ValidateRefreshReasonPresence = previous; + } + } + + [TestMethod] + public void Emit_ValidatorOn_ExplicitReasonSet_DoesNotThrow() + { + bool previous = GatewayAddressCache.ValidateRefreshReasonPresence; + try + { + GatewayAddressCache.ValidateRefreshReasonPresence = true; + INameValueCollection headers = new RequestNameValueCollection(); + using DocumentServiceRequest request = DocumentServiceRequest.Create( + OperationType.Read, + ResourceType.Document, + AuthorizationTokenType.PrimaryMasterKey); + + GatewayAddressCache.EmitRefreshReasonHeader( + headers: headers, + request: request, + explicitReason: RefreshReason.InsufficientReplicasSuboptimalTimer, + callerName: nameof(this.Emit_ValidatorOn_ExplicitReasonSet_DoesNotThrow)); + + Assert.AreEqual( + "InsufficientReplicas.SuboptimalTimer", + headers.Get(HttpConstants.HttpHeaders.CosmosRefreshReason)); + } + finally + { + GatewayAddressCache.ValidateRefreshReasonPresence = previous; + } + } + } +} diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Routing/RefreshReasonFormatterTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Routing/RefreshReasonFormatterTests.cs new file mode 100644 index 0000000000..a065dde928 --- /dev/null +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Routing/RefreshReasonFormatterTests.cs @@ -0,0 +1,271 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.Tests.Routing +{ + using System; + using System.Collections.Generic; + using System.Linq; + using System.Text.RegularExpressions; + using Microsoft.Azure.Cosmos.Routing; + using Microsoft.Azure.Documents; + using Microsoft.VisualStudio.TestTools.UnitTesting; + + /// + /// Tests for and . + /// These invariants protect the design-time-bounded contract: every + /// enum member has a wire value, every wire value is safe for HTTP + /// headers / log pipelines, and every + /// maps to a reason. + /// + [TestClass] + public class RefreshReasonFormatterTests + { + // Allowed charset on the wire: lowercase letters, digits, underscore, + // dot; plus uppercase (for the camel/Pascal-cased Group-D values like + // "InsufficientReplicas.Quorum"). Two dot-separated segments at most. + private static readonly Regex WireValueRegex = new Regex( + @"^[A-Za-z0-9_]+(\.[A-Za-z0-9_]+)?$", + RegexOptions.Compiled); + + [TestMethod] + public void EveryEnumMember_HasWireValue() + { + foreach (RefreshReason reason in Enum.GetValues(typeof(RefreshReason))) + { + Assert.IsTrue( + RefreshReasonExtensions.WireValues.ContainsKey(reason), + $"RefreshReason.{reason} has no entry in RefreshReasonExtensions.WireValues."); + } + } + + [TestMethod] + public void EveryWireValue_MatchesRegex() + { + foreach (KeyValuePair kvp in RefreshReasonExtensions.WireValues) + { + Assert.IsTrue( + WireValueRegex.IsMatch(kvp.Value), + $"Wire value '{kvp.Value}' (for RefreshReason.{kvp.Key}) violates the required shape [A-Za-z0-9_]+(\\.[A-Za-z0-9_]+)?."); + } + } + + [TestMethod] + public void EveryWireValue_IsUnique() + { + HashSet seen = new HashSet(StringComparer.Ordinal); + foreach (KeyValuePair kvp in RefreshReasonExtensions.WireValues) + { + Assert.IsTrue( + seen.Add(kvp.Value), + $"Duplicate wire value '{kvp.Value}' (seen twice, latest at RefreshReason.{kvp.Key})."); + } + } + + [TestMethod] + public void ToHeaderValue_ReturnsRegisteredString() + { + Assert.AreEqual("unspecified", RefreshReason.Unspecified.ToHeaderValue()); + Assert.AreEqual("gone.server", RefreshReason.GoneServer.ToHeaderValue()); + Assert.AreEqual("gone.completing_split", RefreshReason.GoneCompletingSplit.ToHeaderValue()); + Assert.AreEqual("gone.connect", RefreshReason.GoneConnect.ToHeaderValue()); + Assert.AreEqual("gone.unknown", RefreshReason.GoneUnknown.ToHeaderValue()); + Assert.AreEqual("gone.write_not_sent", RefreshReason.GoneWriteNotSent.ToHeaderValue()); + Assert.AreEqual("InsufficientReplicas.Quorum", RefreshReason.InsufficientReplicasQuorum.ToHeaderValue()); + Assert.AreEqual("InsufficientReplicas.SuboptimalTimer", RefreshReason.InsufficientReplicasSuboptimalTimer.ToHeaderValue()); + Assert.AreEqual("ReplicaHealth.unhealthyLongLived", RefreshReason.ReplicaHealthUnhealthyLongLived.ToHeaderValue()); + Assert.AreEqual("connection_event.server_closed", RefreshReason.ConnectionEventServerClosed.ToHeaderValue()); + } + + [TestMethod] + public void ToHeaderValue_UnknownEnumValue_Throws() + { + RefreshReason bogus = (RefreshReason)int.MinValue; + Assert.ThrowsException(() => bogus.ToHeaderValue()); + } + + /// + /// Exhaustive coverage: every declared + /// must be explicitly handled by + /// RefreshReasonExtensions.TryMapKnownTransportErrorCode. This + /// test fails in CI if an upstream code is added without updating the + /// switch (without this, new codes would silently fall through to + /// in the public API). + /// + [TestMethod] + public void TryMapKnownTransportErrorCode_CoversEveryCode() + { + List unhandled = new List(); + foreach (TransportErrorCode code in Enum.GetValues(typeof(TransportErrorCode))) + { + if (!RefreshReasonExtensions.TryMapKnownTransportErrorCode(code, out RefreshReason _)) + { + unhandled.Add(code); + } + } + + Assert.AreEqual( + 0, + unhandled.Count, + "TransportErrorCode(s) not explicitly handled by TryMapKnownTransportErrorCode: " + + string.Join(", ", unhandled) + + ". Add explicit cases in RefreshReasonExtensions."); + } + + [TestMethod] + public void FromTransportErrorCode_CoversEveryCode() + { + foreach (TransportErrorCode code in Enum.GetValues(typeof(TransportErrorCode))) + { + RefreshReason mapped = RefreshReasonExtensions.FromTransportErrorCode(code); + Assert.AreNotEqual( + RefreshReason.Unspecified, + mapped, + $"TransportErrorCode.{code} maps to RefreshReason.Unspecified. Every code must map to a specific reason."); + } + } + + [TestMethod] + public void FromTransportErrorCode_GenericCodes_FoldIntoGoneUnknown() + { + Assert.AreEqual(RefreshReason.GoneUnknown, RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.Unknown)); + Assert.AreEqual(RefreshReason.GoneUnknown, RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.ChannelOpenFailed)); + Assert.AreEqual(RefreshReason.GoneUnknown, RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.ChannelOpenTimeout)); + Assert.AreEqual(RefreshReason.GoneUnknown, RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.RequestTimeout)); + } + + [TestMethod] + public void FromTransportErrorCode_FailedAndTimeoutPairs_MapToSameReason() + { + Assert.AreEqual( + RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.DnsResolutionFailed), + RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.DnsResolutionTimeout)); + Assert.AreEqual( + RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.ConnectFailed), + RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.ConnectTimeout)); + Assert.AreEqual( + RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.SslNegotiationFailed), + RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.SslNegotiationTimeout)); + Assert.AreEqual( + RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.SendFailed), + RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.SendTimeout)); + Assert.AreEqual( + RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.ReceiveFailed), + RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.ReceiveTimeout)); + } + + [TestMethod] + public void FromTransportErrorCode_SendLockTimeout_IsOwnBucket() + { + // SendLockTimeout is *client-side lock contention*, not a network send failure. + Assert.AreEqual( + RefreshReason.GoneSendLockTimeout, + RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.SendLockTimeout)); + Assert.AreNotEqual( + RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.SendFailed), + RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.SendLockTimeout)); + } + + [TestMethod] + public void FromTransportErrorCode_ReceiveStreamClosed_IsOwnBucket() + { + // Server-initiated clean close is distinct from ReceiveFailed/Timeout. + Assert.AreEqual( + RefreshReason.GoneReceiveStreamClosed, + RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.ReceiveStreamClosed)); + Assert.AreNotEqual( + RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.ReceiveFailed), + RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.ReceiveStreamClosed)); + } + + [TestMethod] + public void FromTransportErrorCode_ChannelWaitingToOpenTimeout_IsOwnBucket() + { + // Slot-wait timeout, distinct from ChannelOpenTimeout. + Assert.AreEqual( + RefreshReason.GoneChannelWaitingToOpenTimeout, + RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.ChannelWaitingToOpenTimeout)); + Assert.AreNotEqual( + RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.ChannelOpenTimeout), + RefreshReasonExtensions.FromTransportErrorCode(TransportErrorCode.ChannelWaitingToOpenTimeout)); + } + + [TestMethod] + public void EnumCount_MatchesWireValuesCount() + { + int enumCount = Enum.GetValues(typeof(RefreshReason)).Length; + Assert.AreEqual( + enumCount, + RefreshReasonExtensions.WireValues.Count, + "Every RefreshReason member must have exactly one entry in WireValues (no orphans, no extras)."); + } + + // ---- ClassifyGoneFromException ------------------------------------- + // Transport-exception walking is covered transitively by the exhaustive + // FromTransportErrorCode tests above; constructing TransportException + // instances directly in unit tests hits a resource-manifest path that + // isn't resolvable in the test host. The substatus paths below cover + // the switch-case behavior fully. + + [TestMethod] + public void ClassifyGoneFromException_NoTransport_SubStatusCompletingSplit_MapsToSplit() + { + Assert.AreEqual( + RefreshReason.GoneCompletingSplit, + RefreshReasonExtensions.ClassifyGoneFromException( + exception: null, + subStatusCode: SubStatusCodes.CompletingSplit)); + } + + [TestMethod] + public void ClassifyGoneFromException_NoTransport_SubStatusCompletingPartitionMigration_MapsToMigration() + { + Assert.AreEqual( + RefreshReason.GoneCompletingPartitionMigration, + RefreshReasonExtensions.ClassifyGoneFromException( + exception: null, + subStatusCode: SubStatusCodes.CompletingPartitionMigration)); + } + + [TestMethod] + public void ClassifyGoneFromException_NoTransport_SubStatusNameCacheIsStale_MapsToNameCacheStale() + { + Assert.AreEqual( + RefreshReason.GoneNameCacheStale, + RefreshReasonExtensions.ClassifyGoneFromException( + exception: null, + subStatusCode: SubStatusCodes.NameCacheIsStale)); + } + + [TestMethod] + public void ClassifyGoneFromException_NoTransport_SubStatusPartitionKeyRangeGone_MapsToPkrGone() + { + Assert.AreEqual( + RefreshReason.GonePartitionKeyRangeGone, + RefreshReasonExtensions.ClassifyGoneFromException( + exception: null, + subStatusCode: SubStatusCodes.PartitionKeyRangeGone)); + } + + [TestMethod] + public void ClassifyGoneFromException_NoTransport_PlainGone_MapsToGoneServer() + { + Assert.AreEqual( + RefreshReason.GoneServer, + RefreshReasonExtensions.ClassifyGoneFromException( + exception: new Exception("server 410"), + subStatusCode: SubStatusCodes.Unknown)); + } + + [TestMethod] + public void ClassifyGoneFromException_NullException_NoSubStatus_MapsToGoneServer() + { + Assert.AreEqual( + RefreshReason.GoneServer, + RefreshReasonExtensions.ClassifyGoneFromException( + exception: null, + subStatusCode: SubStatusCodes.Unknown)); + } + } +} diff --git a/docs/design/refresh-reason-header.md b/docs/design/refresh-reason-header.md new file mode 100644 index 0000000000..0a34899b56 --- /dev/null +++ b/docs/design/refresh-reason-header.md @@ -0,0 +1,206 @@ +# `x-ms-cosmos-refresh-reason` — Generic refresh-reason header + +> **Status**: design + implementation (this branch). +> **Branch**: `users/kirankk/cosmos-refresh-reason` +> **PRs**: this repo (SDK core) + companion PR in `Microsoft.Azure.Cosmos.Direct` upstream. + +## 1. Problem + +Every forced address-cache refresh makes the SDK call `GET …/addresses?…` with +`x-ms-force-refresh: true`. Today the gateway cannot tell **why** the SDK forced +the refresh — server 410? partition split? a TCP connect failure? the SDK's +10-minute suboptimal-replica-set timer? an opportunistic background refresh? + +The same gap applies to other forced-refresh egress paths the SDK has (and will +add): the partition-key-range cache refresh that fires when a ChangeFeed +iterator discovers a split/merge and needs to forward its continuation, +`CollectionRoutingMap` refresh, etc. All of these ultimately call into the +gateway with a "force-refresh" signal but carry **no cause hint**. + +We attach a new header whose value is drawn from a **closed, design-time-bounded** +enum. Every possible value is known at compile time; no dynamic substrings, +no user-controlled data. + +## 2. Scope & generic header design + +- **Header name**: **`x-ms-cosmos-refresh-reason`** (intentionally *generic*, not + `address-refresh-reason`). Identifies the *reason* for any SDK-forced refresh + egress, irrespective of which cache is being refreshed. The gateway can + route/aggregate by URL path (`/addresses`, `/pkranges`, `/collections`, …) + combined with the reason. +- **This change delivers**: the end-to-end mechanism (header + carrier + validator + + egress) plus the **address-cache** reason vocabulary (§4 below). All tagging + sites plumbed through in this change are the address-cache force-refresh + paths. +- **Future extension points** (not in this change, but the design supports them + with zero additional infrastructure): + - **PK-range cache refresh on ChangeFeed forward**: when a ChangeFeed iterator + hits a split/merge and forwards its continuation, the SDK force-refreshes + the routing map. That egress can emit the same header with a + `pkr_cache.changefeed_forward` reason added to the same enum. + - **PK-range cache refresh on query plan rewrite**, **CollectionRoutingMap + refresh on name-cache miss**, etc. — all additive enum members sharing the + same header + carrier + validator. +- **Naming convention for future values**: `.` — + **exactly two dot-separated segments**. Existing address values follow this + shape (`gone.server`, `gone.connect`, `InsufficientReplicas.Quorum`, …). + +## 3. Design choices + +| Choice | Value | Rationale | +|---|---|---| +| Header name | `x-ms-cosmos-refresh-reason` | Generic across caches; awaiting final service-team confirmation. | +| Wire charset | `[A-Za-z0-9_.]` | RFC-7230 safe, log/metric-pipeline safe. | +| Wire separator | `.` | Unambiguous in log/metric pipelines; `/` is sometimes parsed as path. | +| Max segments | 2 | Keeps metric cardinality bounded and readable. | +| Carrier | `RefreshReason` enum on `DocumentServiceRequestContext` | Strong-typed along the plumbing; stringified only at egress. Single carrier serves all future caches. | +| Egress (this change) | `GatewayAddressCache.Get{Server,Master,ForRangeId}AddressesViaGatewayAsync` | The three address-cache force-refresh egress methods. Future changes add egress in `PartitionKeyRangeCache`, `CollectionRoutingMap`, etc. using the same helper. | +| Opt-in invariant | `GatewayAddressCache.ValidateRefreshReasonPresence` static toggle (default `false`; tests set `true`). If on + `forceRefresh=true` + reason is `Unspecified` → throw `InvalidOperationException`. | Zero prod overhead; automatic regression coverage for any future force-refresh site. | +| Vocabulary closed under | `enum RefreshReason` + `Dictionary` mapping | Adding a new reason is a reviewable, compile-checked diff. | +| Precedence | `explicitReason` argument > `request.RequestContext.RefreshReason` > `Unspecified` | Lets call-path-local signals override carrier while keeping per-request context as the default. | + +## 4. The closed enumeration (address-cache values shipping in this change) — 23 values + +> Format: **`EnumValueName` → wire value `"..."`** — enum name first, wire value +> second. Wire values are **flat** (two dot-segments max). The old +> `.transport.` and `.substatus.` intermediate segments were dropped. +> The enum `RefreshReason` is **generic** and will accumulate values from future +> caches (PK-range-cache ChangeFeed-forward, collection-cache name-stale, …). + +### Sentinel + +| # | Enum | Wire | Notes | +|---|---|---|---| +| 0 | `Unspecified` | `unspecified` | Defensive default. Must never appear on the wire in production once all call sites are tagged. The opt-in validator enforces this invariantly in tests. | + +### Group A — Real 410 from the server (no transport synthesis) + +| # | Enum | Wire | Notes | +|---|---|---|---| +| 1 | `GoneServer` | `gone.server` | `GoneException` surfaced by the server over RNTBD with **no inner `TransportException`**. Authoritative "this replica no longer owns this PKR" signal. Set at the retry-policy / StoreReader level. | + +### Group B — Gone with server-provided substatus (routing-topology changes) + +> **Note.** Substatus codes 1007 (split), 1008 (partition migration), 1000 +> (name-cache stale), and `PartitionKeyRangeGoneException` typically drive a +> **PK-range / collection-cache** refresh rather than an address-cache refresh — +> the SDK's response is usually "re-resolve the PKR/container" first, and only +> if that produces new physical endpoints does the address cache get touched. +> These enum values exist so that *when* an address-cache refresh does happen +> on the back of such a Gone (cache-miss cascade), the cause is attributable; +> and they pre-position the generic enum for the upcoming PK-range-cache +> egress tagging (where most of these will actually surface). + +| # | Enum | Wire | Notes | +|---|---|---|---| +| 2 | `GoneCompletingSplit` | `gone.completing_split` | `PartitionKeyRangeIsSplittingException`, SubStatus 1007. | +| 3 | `GoneCompletingPartitionMigration` | `gone.completing_partition_migration` | `PartitionIsMigratingException`, SubStatus 1008. | +| 4 | `GoneNameCacheStale` | `gone.name_cache_stale` | `InvalidPartitionException`, SubStatus 1000. | +| 5 | `GonePartitionKeyRangeGone` | `gone.partition_key_range_gone` | `PartitionKeyRangeGoneException`. | + +### Group C — Gone synthesized by the SDK's transport layer + +> Pairs of (`*Failed`, `*Timeout`) in `TransportErrorCode` are intentionally +> collapsed into one enum value because the gateway's reaction is the same. + +| # | Enum | Wire | Covers `TransportErrorCode` | Notes | +|---|---|---|---|---| +| 6 | `GoneUnknown` | `gone.unknown` | `Unknown`, `ChannelOpenFailed`, `ChannelOpenTimeout`, `RequestTimeout` | All four are explicit *default/catch-all* codes per `TransportErrorCode.cs` comments. A spike in `gone.unknown` is itself an actionable signal. | +| 7 | `GoneDnsResolution` | `gone.dns_resolution` | `DnsResolutionFailed/Timeout` | Client couldn't resolve the replica's hostname. | +| 8 | `GoneConnect` | `gone.connect` | `ConnectFailed/Timeout` | TCP handshake failed — server closed the listening socket or is unreachable. **This is the bucket that today gets silently mis-attributed**; any network-level refusal/drop for a *new* connection shows up here. | +| 9 | `GoneSslNegotiation` | `gone.ssl_negotiation` | `SslNegotiationFailed/Timeout` | TLS handshake failed after TCP came up. | +| 10 | `GoneNegotiationTimeout` | `gone.negotiation_timeout` | `TransportNegotiationTimeout` | RNTBD-level parameter negotiation timed out *after* TLS succeeded. | +| 11 | `GoneChannelMultiplexerClosed` | `gone.channel_multiplexer_closed` | `ChannelMultiplexerClosed` | Client-side RNTBD dispatcher stopped accepting new requests. | +| 12 | `GoneSend` | `gone.send` | `SendFailed/Timeout` | Request bytes couldn't be pushed to the wire. | +| 13 | `GoneSendLockTimeout` | `gone.send_lock_timeout` | `SendLockTimeout` | **Kept standalone** — internal send-lock contention on the client. Distinguishes "we're saturated sending" from "we can't reach the server". | +| 14 | `GoneReceive` | `gone.receive` | `ReceiveFailed/Timeout` | Request was sent but the response didn't come back. | +| 15 | `GoneReceiveStreamClosed` | `gone.receive_stream_closed` | `ReceiveStreamClosed` | **Server-initiated clean close** of the TCP stream while the client was awaiting a response. Distinct from `ReceiveFailed` because it's a graceful FIN. | +| 16 | `GoneConnectionBroken` | `gone.connection_broken` | `ConnectionBroken` | Underlying connection marked unusable (typically sticky-applied from a prior failure). | +| 17 | `GoneChannelWaitingToOpenTimeout` | `gone.channel_waiting_to_open_timeout` | `ChannelWaitingToOpenTimeout` | **Slot-wait timeout** (`MaxConcurrentOpeningConnectionCount` saturated). The request couldn't even start opening a channel. | +| 18 | `GoneWriteNotSent` | `gone.write_not_sent` | — | Write-request Gone-synthesis branch where `DocumentServiceRequest.UserRequestSent == false` regardless of the inner transport code. Server never saw the write, so the 410 is safe to retry. Tagged in upstream Direct. | + +### Group D — Forced refresh *not* driven by a Gone + +| # | Enum | Wire | Notes | +|---|---|---|---| +| 19 | `InsufficientReplicasQuorum` | `InsufficientReplicas.Quorum` | Barrier requests: the known replica set is too small for the requested consistency — *before* any 410. Pure SDK-side decision: "we don't think we have enough replicas, refresh the set." Tagged in `ConsistencyWriter`, `QuorumReader`. | +| 20 | `InsufficientReplicasSuboptimalTimer` | `InsufficientReplicas.SuboptimalTimer` | `GatewayAddressCache` suboptimal-server timer and master-suboptimal: if `address_count < MaxReplicaSetSize` for 10 minutes, force a refresh. Typically indicates lingering stale-partial cache state. | +| 21 | `ReplicaHealthUnhealthyLongLived` | `ReplicaHealth.unhealthyLongLived` | `GatewayAddressCache` on-demand revalidation when a URI has been `Unhealthy` for ≥ 1 minute. | +| 22 | `ConnectionEventServerClosed` | `connection_event.server_closed` | `MarkAddressesToUnhealthyAsync` driven by `Dispatcher.RaiseConnectionEvent`. Covers both `ReceiveStreamClosed → ReadEof` and `ReceiveFailed → ReadFailure` connection events (distinct from the request-path versions in Group C — these come from the async listener). Tagged in upstream Direct. | + +## 5. Changed files + +### This repo (SDK core) + +| File | Change | +|---|---| +| `Microsoft.Azure.Cosmos/src/Routing/RefreshReason.cs` *(new)* | Generic `internal enum RefreshReason` with 23 explicitly-numbered members. | +| `Microsoft.Azure.Cosmos/src/Routing/RefreshReasonExtensions.cs` *(new)* | `WireValues` dictionary (single source of truth), `ToHeaderValue`, `FromTransportErrorCode`, `ClassifyGoneFromException`. | +| `Microsoft.Azure.Cosmos/src/Routing/GatewayAddressCache.cs` | `ValidateRefreshReasonPresence` toggle; `EmitRefreshReasonHeader` helper; optional `explicitReason` on `GetMasterAddressesViaGatewayAsync`, `GetServerAddressesViaGatewayAsync`, `GetAddressesForRangeIdAsync`. Suboptimal-timer and master-suboptimal tagged. Unhealthy-URI background refresh tagged via `explicitReason`. | +| `Microsoft.Azure.Cosmos/src/direct/HttpConstants.cs` | Added `CosmosRefreshReason` header constant. | +| `Microsoft.Azure.Cosmos/src/direct/DocumentServiceRequestContext.cs` | Added `RefreshReason` carrier property (default `Unspecified`), cloned in `Clone()`. | +| `Microsoft.Azure.Cosmos/src/direct/ConsistencyWriter.cs` | Barrier request tagged `InsufficientReplicasQuorum`. | +| `Microsoft.Azure.Cosmos/src/direct/QuorumReader.cs` | Barrier request tagged `InsufficientReplicasQuorum`. | +| `Microsoft.Azure.Cosmos/src/direct/StoreReader.cs` | Gone-retry paths (2) tagged via `ClassifyPriorGone` helper that walks prior `StoreResult.Exception` for `TransportException` then falls back to substatus mapping. | +| `Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Routing/RefreshReasonFormatterTests.cs` *(new)* | 12 MSTest cases: enum coverage, wire-regex conformance, round-tripping, transport-code mapping, header emission. | + +### Upstream `Microsoft.Azure.Cosmos.Direct` (companion PR) + +> The `src/direct/` edits above also live in the upstream repo; without the +> upstream PR merging, the shipped `Microsoft.Azure.Cosmos.Direct.dll` won't +> carry the carrier or the header constant. Additionally, the upstream PR adds +> tagging at the following sites (not reachable from this repo's mirror): + +| File | Change | +|---|---| +| `rntbd2/TransportClient.cs` | Every Gone-synthesis branch: `request.RequestContext.RefreshReason = RefreshReasonExtensions.FromTransportErrorCode(te.ErrorCode);`. Write-request `!UserRequestSent` branch → `GoneWriteNotSent`. | +| `GoneAndRetryWithRequestRetryPolicy.cs`, `GoneOnlyRequestRetryPolicy.cs` | At the `forceRefreshAddressCache = true` site, derive reason from exception type when still `Unspecified` (covers `GoneServer` and the Group B substatus cases). | +| `AddressSelector.StartBackgroundAddressRefresh` | Propagate `request.RequestContext.RefreshReason` onto the cloned request so the background refresh inherits the originating Gone tag. | +| `ConnectionStateMuxListener.cs` (or wherever `Dispatcher.RaiseConnectionEvent` lands) | Pass the connection-event kind through to `MarkAddressesToUnhealthyAsync`; both `ReadEof` and `ReadFailure` → `ConnectionEventServerClosed`. | + +## 6. Test surface + +| Test type | Coverage | +|---|---| +| Unit (shipped in this change) | Enum coverage (every member has a `WireValues` entry), wire-regex conformance, exhaustive `TransportErrorCode` mapping, per-call-site tagging assertions, `EmitRefreshReasonHeader` precedence. | +| Emulator / fault-injection (follow-up) | `[AssemblyInitialize]` sets `ValidateRefreshReasonPresence = true`. Per-cause tests inject specific faults (ConnectFailed, DnsResolutionFailed, SslNegotiationFailed, ReceiveStreamClosed, server 410, server 410+substatus, insufficient quorum, time-advance for suboptimal-timer and unhealthy-URI) and assert the **specific** expected wire value. | + +## 7. Execution status + +| Phase | Status | +|---|---| +| **Phase 1** — Foundation (enum, extensions, header const, carrier, 12 unit tests) | ✅ committed `df11e6bd9` | +| **Phase 2** — GatewayAddressCache egress wiring (validator + helper + optional `explicitReason` on 3 methods) | ✅ committed `682caf40e` | +| **Phase 3** — Call-site tagging (5 sites in this repo + 2 StoreReader Gone-retry paths with classifier) | ✅ committed `a97a893ae` | +| **Phase 4** — Emulator/fault-injection tests, exhaustive transport-code reflection test, docs polish | 🔲 pending | +| **Upstream Direct companion PR** — tagging in `TransportClient`, retry policies, `AddressSelector`, connection-event listener | 🔲 pending | + +## 8. Out of scope + +- Service-side handling of the new header (gateway repo). +- Adding reason values for **non-address** caches (PK-range-cache + ChangeFeed-forward, CollectionRoutingMap name-stale, …). The infrastructure + here supports them additively — a future change just adds enum members and + tags the relevant egress sites. +- Changing how forced refresh itself works (this is purely additive telemetry). +- Encryption sub-packages. + +## 9. Risks & open considerations + +1. **Direct package divergence** — most of the remaining tagging (Group A/B/C Gone + origins) lives in `src/direct/` and the shipped + `Microsoft.Azure.Cosmos.Direct.dll` won't carry those tags until the parallel + upstream PR merges. Until then, the reasons surfaced by this branch alone + will be the *SDK-observed* classification at the StoreReader retry point + (transport-synth 410 classification via `TransportException` chain-walk) + plus the 5 non-Gone sites. +2. **Header naming** — awaiting service-team confirmation that + `x-ms-cosmos-refresh-reason` is acceptable. +3. **Retry overwrites** — each retry overwrites the reason on the `RequestContext` + (precedence: `explicitReason` > most-recent `RequestContext.RefreshReason`). + Desired behavior. +4. **New transport error codes upstream** — `FromTransportErrorCode` falls back + to `GoneUnknown` for unknown codes. A reflection-based exhaustive test is + planned in Phase 4 to catch newly added codes at build time. +5. **New force-refresh call sites in the future** — the opt-in + `ValidateRefreshReasonPresence` covers them invariantly in tests.