Skip to content

Commit dbf081c

Browse files
Merge branch 'master' into users/nalutripician/issue-5095-contacted-regions-hub-fallback
2 parents adbb741 + 3f82067 commit dbf081c

18 files changed

Lines changed: 1105 additions & 84 deletions

File tree

Microsoft.Azure.Cosmos.Samples/Usage/HttpClientFactory/Program.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,10 @@ private static void ShareHttpHandlers(
193193
{
194194
PooledConnectionLifetime = TimeSpan.FromMinutes(10), // Customize this value based on desired DNS refresh timer
195195
MaxConnectionsPerServer = 20, // Customize the maximum number of allowed connections
196-
EnableMultipleHttp2Connections = true // Recommended for thin client (HTTP/2) mode to open additional connections when stream limits are reached
196+
EnableMultipleHttp2Connections = true, // Recommended for thin client (HTTP/2) mode to open additional connections when stream limits are reached
197+
KeepAlivePingDelay = TimeSpan.FromSeconds(1), // Send HTTP/2 PING after 1s of inactivity to detect broken connections
198+
KeepAlivePingTimeout = TimeSpan.FromSeconds(2), // Mark connection dead if no PONG within 2s
199+
KeepAlivePingPolicy = HttpKeepAlivePingPolicy.Always
197200
};
198201

199202
CosmosClientOptions cosmosClientOptions = new CosmosClientOptions()

Microsoft.Azure.Cosmos/src/ChangeFeedProcessor/LeaseManagement/DocumentServiceLeaseManagerCosmos.cs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ internal sealed class DocumentServiceLeaseManagerCosmos : DocumentServiceLeaseMa
3131
private readonly AsyncLazy<TryCatch<string>> lazyContainerRid;
3232
private PartitionKeyRangeCache partitionKeyRangeCache;
3333

34+
internal static bool IsChangeFeedLeaseIdAsPartitionKeyEnabled = ConfigurationManager.IsChangeFeedLeaseIdAsPartitionKeyEnabled();
35+
3436
public DocumentServiceLeaseManagerCosmos(
3537
ContainerInternal monitoredContainer,
3638
ContainerInternal leaseContainer,
@@ -128,7 +130,9 @@ public override Task<DocumentServiceLease> CreateLeaseIfNotExistAsync(
128130
Mode = this.GetChangeFeedMode()
129131
};
130132

131-
this.requestOptionsFactory.AddPartitionKeyIfNeeded((string pk) => documentServiceLease.LeasePartitionKey = pk, Guid.NewGuid().ToString());
133+
this.requestOptionsFactory.AddPartitionKeyIfNeeded(
134+
(string pk) => documentServiceLease.LeasePartitionKey = pk,
135+
DocumentServiceLeaseManagerCosmos.GetLeasePartitionKeyValue(documentServiceLease.LeaseId));
132136

133137
return this.TryCreateDocumentServiceLeaseAsync(documentServiceLease);
134138
}
@@ -153,11 +157,20 @@ public override Task<DocumentServiceLease> CreateLeaseIfNotExistAsync(
153157
Mode = this.GetChangeFeedMode()
154158
};
155159

156-
this.requestOptionsFactory.AddPartitionKeyIfNeeded((string pk) => documentServiceLease.LeasePartitionKey = pk, Guid.NewGuid().ToString());
160+
this.requestOptionsFactory.AddPartitionKeyIfNeeded(
161+
(string pk) => documentServiceLease.LeasePartitionKey = pk,
162+
DocumentServiceLeaseManagerCosmos.GetLeasePartitionKeyValue(documentServiceLease.LeaseId));
157163

158164
return this.TryCreateDocumentServiceLeaseAsync(documentServiceLease);
159165
}
160166

167+
private static string GetLeasePartitionKeyValue(string leaseId)
168+
{
169+
return DocumentServiceLeaseManagerCosmos.IsChangeFeedLeaseIdAsPartitionKeyEnabled
170+
? leaseId
171+
: Guid.NewGuid().ToString();
172+
}
173+
161174
private string GetChangeFeedMode()
162175
{
163176
return this.options.Mode == ChangeFeedMode.AllVersionsAndDeletes

Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -244,11 +244,19 @@ public string ApplicationName
244244
/// </summary>
245245
/// <remarks>
246246
/// This setting is only applicable in Gateway mode.
247-
/// The SDK sets EnableMultipleHttp2Connections = true on the underlying SocketsHttpHandler,
248-
/// allowing additional HTTP/2 TCP connections to be opened when the maximum concurrent streams
249-
/// limit on an existing connection is reached. This property controls the upper bound on the
250-
/// total number of connections per server endpoint.
251-
/// When using a custom <see cref="HttpClientFactory"/>, set EnableMultipleHttp2Connections
247+
/// The SDK sets the following on the underlying SocketsHttpHandler:
248+
/// <list type="bullet">
249+
/// <item><description>EnableMultipleHttp2Connections = true — allows additional HTTP/2 TCP connections
250+
/// to be opened when the maximum concurrent streams limit on an existing connection is reached.</description></item>
251+
/// <item><description>KeepAlivePingDelay = 1 second — sends HTTP/2 PING frames after 1 second
252+
/// of inactivity to detect broken connections in the pool.</description></item>
253+
/// <item><description>KeepAlivePingTimeout = 2 seconds — marks a connection as dead if no PONG
254+
/// response is received within 2 seconds.</description></item>
255+
/// <item><description>KeepAlivePingPolicy = Always — sends pings even for idle connections, which
256+
/// is critical for detecting broken connections that remain in the pool.</description></item>
257+
/// </list>
258+
/// This property controls the upper bound on the total number of connections per server endpoint.
259+
/// When using a custom <see cref="HttpClientFactory"/>, configure these properties
252260
/// directly on your SocketsHttpHandler for equivalent behavior.
253261
/// </remarks>
254262
/// <example>
@@ -268,7 +276,10 @@ public string ApplicationName
268276
/// SocketsHttpHandler handler = new SocketsHttpHandler
269277
/// {
270278
/// MaxConnectionsPerServer = 100,
271-
/// EnableMultipleHttp2Connections = true
279+
/// EnableMultipleHttp2Connections = true,
280+
/// KeepAlivePingDelay = TimeSpan.FromSeconds(1),
281+
/// KeepAlivePingTimeout = TimeSpan.FromSeconds(2),
282+
/// KeepAlivePingPolicy = HttpKeepAlivePingPolicy.Always
272283
/// };
273284
/// CosmosClientOptions options = new CosmosClientOptions()
274285
/// {
@@ -306,6 +317,24 @@ public int GatewayModeMaxConnectionLimit
306317
/// <seealso cref="CosmosClientBuilder.WithRequestTimeout(TimeSpan)"/>
307318
public TimeSpan RequestTimeout { get; set; }
308319

320+
/// <summary>
321+
/// Gets or sets the request timeout for inference service operations (e.g., semantic reranking).
322+
/// The number specifies the time to wait for a response from the inference service before the request is cancelled.
323+
/// This is a single-attempt timeout with no retries.
324+
/// </summary>
325+
/// <value>Default value is 5 seconds.</value>
326+
/// <remarks>
327+
/// This timeout is specific to inference service operations and is separate from the standard <see cref="RequestTimeout"/>.
328+
/// If the request does not complete within the specified duration, a <see cref="CosmosException"/> with status 408 (Request Timeout) is thrown.
329+
/// No retries are attempted on timeout.
330+
/// </remarks>
331+
#if PREVIEW
332+
public
333+
#else
334+
internal
335+
#endif
336+
TimeSpan InferenceRequestTimeout { get; set; } = InferenceService.DefaultInferenceRequestTimeout;
337+
309338
/// <summary>
310339
/// The SDK does a background refresh based on the time interval set to refresh the token credentials.
311340
/// This avoids latency issues because the old token is used until the new token is retrieved.

Microsoft.Azure.Cosmos/src/DocumentClientEventSource.cs

Lines changed: 81 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,51 @@ public class Keywords
3434
public const EventKeywords HttpRequestAndResponse = (EventKeywords)1;
3535
}
3636

37+
// Ordered list of headers extracted and forwarded positionally to Event(1).
38+
// IMPORTANT: The order of entries here matches the field order declared by [Event(1)]
39+
// and the `{index}` placeholders in its Message template. Do not reorder, insert, or
40+
// remove entries without updating Event(1) and the redaction logic in Request(...).
41+
// The Authorization slot is populated from the raw header and then overwritten with
42+
// "REDACTED" in the [NonEvent] wrapper before being emitted to ETW.
43+
private static readonly string[] RequestHeaderKeysToExtract =
44+
{
45+
HttpConstants.HttpHeaders.Accept,
46+
HttpConstants.HttpHeaders.Authorization, // SECURITY: redacted to "REDACTED" in Request(...) before ETW emission. Do not remove.
47+
HttpConstants.HttpHeaders.ConsistencyLevel,
48+
HttpConstants.HttpHeaders.ContentType,
49+
HttpConstants.HttpHeaders.ContentEncoding,
50+
HttpConstants.HttpHeaders.ContentLength,
51+
HttpConstants.HttpHeaders.ContentLocation,
52+
HttpConstants.HttpHeaders.Continuation,
53+
HttpConstants.HttpHeaders.EmitVerboseTracesInQuery,
54+
HttpConstants.HttpHeaders.EnableScanInQuery,
55+
HttpConstants.HttpHeaders.ETag,
56+
HttpConstants.HttpHeaders.HttpDate,
57+
HttpConstants.HttpHeaders.IfMatch,
58+
HttpConstants.HttpHeaders.IfNoneMatch,
59+
HttpConstants.HttpHeaders.IndexingDirective,
60+
HttpConstants.HttpHeaders.KeepAlive,
61+
HttpConstants.HttpHeaders.OfferType,
62+
HttpConstants.HttpHeaders.PageSize,
63+
HttpConstants.HttpHeaders.PreTriggerExclude,
64+
HttpConstants.HttpHeaders.PreTriggerInclude,
65+
HttpConstants.HttpHeaders.PostTriggerExclude,
66+
HttpConstants.HttpHeaders.PostTriggerInclude,
67+
HttpConstants.HttpHeaders.ProfileRequest,
68+
HttpConstants.HttpHeaders.ResourceTokenExpiry,
69+
HttpConstants.HttpHeaders.SessionToken,
70+
HttpConstants.HttpHeaders.SetCookie,
71+
HttpConstants.HttpHeaders.Slug,
72+
HttpConstants.HttpHeaders.UserAgent,
73+
HttpConstants.HttpHeaders.XDate
74+
};
75+
76+
// Index of the Authorization header in RequestHeaderKeysToExtract, computed once at class
77+
// initialization so the per-request redaction path in Request(...) does not pay an O(n)
78+
// Array.IndexOf lookup on every HTTP call.
79+
private static readonly int AuthorizationHeaderIndex
80+
= Array.IndexOf(RequestHeaderKeysToExtract, HttpConstants.HttpHeaders.Authorization);
81+
3782
[NonEvent]
3883
private unsafe void WriteEventCoreWithActivityId(Guid activityId, int eventId, int eventDataCount, EventSource.EventData* dataDesc)
3984
{
@@ -270,45 +315,48 @@ private unsafe void Request(
270315
}
271316
}
272317

318+
// SECURITY: The Authorization header contains a live credential (master-key HMAC
319+
// signature, resource token, or AAD Bearer access token). It must never be written
320+
// to the ETW event payload where any listener subscribing to the "DocumentDBClient"
321+
// EventSource at Verbose level (e.g., Geneva MonitoringAgent, PerfView, dotnet-trace)
322+
// would capture it. Replace the value with a fixed placeholder while preserving the
323+
// 33-field ETW manifest so existing consumers remain compatible. The IsNullOrEmpty
324+
// guard preserves the existing "" semantics for requests that never had an
325+
// Authorization header attached (e.g. internal plumbing); we only overwrite when a
326+
// real value is present. Factored out as an internal helper so the redaction logic
327+
// can be unit-tested directly without relying on ETW listener wiring (which is
328+
// inherently racy under parallel test execution).
329+
internal static void RedactSensitiveHeaderValues(string[] headerValues)
330+
{
331+
if (headerValues == null)
332+
{
333+
return;
334+
}
335+
336+
// AuthorizationHeaderIndex is computed once at class initialization (see field
337+
// above); the Debug.Assert catches any future refactor that removes Authorization
338+
// from RequestHeaderKeysToExtract.
339+
System.Diagnostics.Debug.Assert(
340+
AuthorizationHeaderIndex >= 0,
341+
"Authorization must be present in RequestHeaderKeysToExtract so the redaction below takes effect.");
342+
343+
if (AuthorizationHeaderIndex >= 0
344+
&& headerValues.Length > AuthorizationHeaderIndex
345+
&& !string.IsNullOrEmpty(headerValues[AuthorizationHeaderIndex]))
346+
{
347+
headerValues[AuthorizationHeaderIndex] = "REDACTED";
348+
}
349+
}
350+
273351
[NonEvent]
274352
public void Request(Guid activityId, Guid localId, string uri, string resourceType, HttpRequestHeaders requestHeaders)
275353
{
276354
if (this.IsEnabled(EventLevel.Verbose, Keywords.HttpRequestAndResponse))
277355
{
278-
string[] keysToExtract =
279-
{
280-
HttpConstants.HttpHeaders.Accept,
281-
HttpConstants.HttpHeaders.Authorization,
282-
HttpConstants.HttpHeaders.ConsistencyLevel,
283-
HttpConstants.HttpHeaders.ContentType,
284-
HttpConstants.HttpHeaders.ContentEncoding,
285-
HttpConstants.HttpHeaders.ContentLength,
286-
HttpConstants.HttpHeaders.ContentLocation,
287-
HttpConstants.HttpHeaders.Continuation,
288-
HttpConstants.HttpHeaders.EmitVerboseTracesInQuery,
289-
HttpConstants.HttpHeaders.EnableScanInQuery,
290-
HttpConstants.HttpHeaders.ETag,
291-
HttpConstants.HttpHeaders.HttpDate,
292-
HttpConstants.HttpHeaders.IfMatch,
293-
HttpConstants.HttpHeaders.IfNoneMatch,
294-
HttpConstants.HttpHeaders.IndexingDirective,
295-
HttpConstants.HttpHeaders.KeepAlive,
296-
HttpConstants.HttpHeaders.OfferType,
297-
HttpConstants.HttpHeaders.PageSize,
298-
HttpConstants.HttpHeaders.PreTriggerExclude,
299-
HttpConstants.HttpHeaders.PreTriggerInclude,
300-
HttpConstants.HttpHeaders.PostTriggerExclude,
301-
HttpConstants.HttpHeaders.PostTriggerInclude,
302-
HttpConstants.HttpHeaders.ProfileRequest,
303-
HttpConstants.HttpHeaders.ResourceTokenExpiry,
304-
HttpConstants.HttpHeaders.SessionToken,
305-
HttpConstants.HttpHeaders.SetCookie,
306-
HttpConstants.HttpHeaders.Slug,
307-
HttpConstants.HttpHeaders.UserAgent,
308-
HttpConstants.HttpHeaders.XDate
309-
};
356+
string[] headerValues = Helpers.ExtractValuesFromHTTPHeaders(requestHeaders, RequestHeaderKeysToExtract);
357+
358+
RedactSensitiveHeaderValues(headerValues);
310359

311-
string[] headerValues = Helpers.ExtractValuesFromHTTPHeaders(requestHeaders, keysToExtract);
312360
this.Request(activityId, localId, uri, resourceType, headerValues[0], headerValues[1], headerValues[2], headerValues[3], headerValues[4],
313361
headerValues[5], headerValues[6], headerValues[7], headerValues[8], headerValues[9], headerValues[10], headerValues[11], headerValues[12],
314362
headerValues[13], headerValues[14], headerValues[15], headerValues[16], headerValues[17], headerValues[18], headerValues[19], headerValues[20],

Microsoft.Azure.Cosmos/src/Fluent/CosmosClientBuilder.cs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,26 @@ public CosmosClientBuilder WithRequestTimeout(TimeSpan requestTimeout)
387387
return this;
388388
}
389389

390+
/// <summary>
391+
/// Sets the request timeout for inference service operations (e.g., semantic reranking).
392+
/// This is a single-attempt timeout with no retries; if the request does not complete
393+
/// within the specified duration, a <see cref="CosmosException"/> with status 408 (Request Timeout) is thrown.
394+
/// </summary>
395+
/// <param name="inferenceRequestTimeout">A time to use as timeout for inference operations.</param>
396+
/// <value>Default value is 5 seconds.</value>
397+
/// <returns>The current <see cref="CosmosClientBuilder"/>.</returns>
398+
/// <seealso cref="CosmosClientOptions.InferenceRequestTimeout"/>
399+
#if PREVIEW
400+
public
401+
#else
402+
internal
403+
#endif
404+
CosmosClientBuilder WithInferenceRequestTimeout(TimeSpan inferenceRequestTimeout)
405+
{
406+
this.clientOptions.InferenceRequestTimeout = inferenceRequestTimeout;
407+
return this;
408+
}
409+
390410
/// <summary>
391411
/// Sets the connection mode to Direct. This is used by the client when connecting to the Azure Cosmos DB service.
392412
/// </summary>

Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,42 @@ public static HttpMessageHandler CreateSocketsHttpHandlerHelper(
183183
DefaultTrace.TraceWarning("Failed to set EnableMultipleHttp2Connections on SocketsHttpHandler: {0}", ex.Message);
184184
}
185185

186+
// Enable HTTP/2 PING keep-alive to detect broken connections.
187+
// Without this, a broken HTTP/2 connection (e.g. after a network blip or load balancer
188+
// reset) can remain in the pool indefinitely, causing persistent request failures
189+
// that only resolve after application restart.
190+
// KeepAlivePingDelay/Timeout/Policy are available on SocketsHttpHandler in .NET 5.0+.
191+
try
192+
{
193+
int pingDelayInSeconds = ConfigurationManager.GetEnvironmentVariable<int>(
194+
ConfigurationManager.Http2KeepAlivePingDelayInSeconds,
195+
defaultValue: 1);
196+
197+
int pingTimeoutInSeconds = ConfigurationManager.GetEnvironmentVariable<int>(
198+
ConfigurationManager.Http2KeepAlivePingTimeoutInSeconds,
199+
defaultValue: 2);
200+
201+
PropertyInfo keepAlivePingDelayInfo = socketHandlerType.GetProperty("KeepAlivePingDelay");
202+
keepAlivePingDelayInfo?.SetValue(socketHttpHandler, TimeSpan.FromSeconds(pingDelayInSeconds));
203+
204+
PropertyInfo keepAlivePingTimeoutInfo = socketHandlerType.GetProperty("KeepAlivePingTimeout");
205+
keepAlivePingTimeoutInfo?.SetValue(socketHttpHandler, TimeSpan.FromSeconds(pingTimeoutInSeconds));
206+
207+
// HttpKeepAlivePingPolicy.Always = 1: send pings even for idle connections,
208+
// which is critical for detecting broken connections lingering in the pool.
209+
PropertyInfo keepAlivePingPolicyInfo = socketHandlerType.GetProperty("KeepAlivePingPolicy");
210+
if (keepAlivePingPolicyInfo != null)
211+
{
212+
Type pingPolicyType = keepAlivePingPolicyInfo.PropertyType;
213+
object alwaysValue = Enum.ToObject(pingPolicyType, 1);
214+
keepAlivePingPolicyInfo.SetValue(socketHttpHandler, alwaysValue);
215+
}
216+
}
217+
catch (Exception ex)
218+
{
219+
DefaultTrace.TraceWarning("Failed to configure HTTP/2 keep-alive ping on SocketsHttpHandler: {0}", ex.Message);
220+
}
221+
186222
if (serverCertificateCustomValidationCallback != null)
187223
{
188224
//Get SslOptions Property

0 commit comments

Comments
 (0)