Skip to content

Commit 72f9deb

Browse files
Session Consistency: Adds SessionTokenMismatchRetryPolicy optimization through customer supplied region switch hints (#5128)
This PR will allow application developers to configure hints through a SessionRetryOptions instance which will signal to the SDK whether to pin retries on the local region or move quicker to a remote region when READ_SESSION_NOT_AVAILABLE errors are thrown. [] New feature (non-breaking change which adds functionality) --------- Co-authored-by: Fabian Meiswinkel <fabianm@microsoft.com>
1 parent bddf591 commit 72f9deb

9 files changed

Lines changed: 642 additions & 6 deletions

File tree

Microsoft.Azure.Cosmos/src/ConnectionPolicy.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,15 @@ internal CosmosClientTelemetryOptions CosmosClientTelemetryOptions
536536
set;
537537
}
538538

539+
/// <summary>
540+
/// provides SessionTokenMismatchRetryPolicy optimization through customer supplied region switch hints
541+
/// </summary>
542+
internal SessionRetryOptions SessionRetryOptions
543+
{
544+
get;
545+
set;
546+
}
547+
539548
/// <summary>
540549
/// GlobalEndpointManager will subscribe to this event if user updates the preferredLocations list in the Azure Cosmos DB service.
541550
/// </summary>

Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,8 @@ public CosmosClientOptions()
8989
this.ConnectionProtocol = CosmosClientOptions.DefaultProtocol;
9090
this.ApiType = CosmosClientOptions.DefaultApiType;
9191
this.CustomHandlers = new Collection<RequestHandler>();
92-
this.CosmosClientTelemetryOptions = new CosmosClientTelemetryOptions();
92+
this.CosmosClientTelemetryOptions = new CosmosClientTelemetryOptions();
93+
this.SessionRetryOptions = new SessionRetryOptions();
9394
}
9495

9596
/// <summary>
@@ -120,7 +121,12 @@ public string ApplicationName
120121
/// <summary>
121122
/// Get or set session container for the client
122123
/// </summary>
123-
internal ISessionContainer SessionContainer { get; set; }
124+
internal ISessionContainer SessionContainer { get; set; }
125+
126+
/// <summary>
127+
/// hint which guide SDK-internal retry policies on how early to switch retries to a different region.
128+
/// </summary>
129+
internal SessionRetryOptions SessionRetryOptions { get; private set; }
124130

125131
/// <summary>
126132
/// Gets or sets the location where the application is running. This will influence the SDK's choice for the Azure Cosmos DB service interaction.
@@ -740,6 +746,20 @@ public Func<HttpClient> HttpClientFactory
740746
/// after the threshold step time, the SDK will hedge to the third region and so on.
741747
/// </remarks>
742748
public AvailabilityStrategy AvailabilityStrategy { get; set; }
749+
750+
/// <summary>
751+
/// provides SessionTokenMismatchRetryPolicy optimization through customer supplied region switch hints
752+
/// </summary>
753+
#if PREVIEW
754+
public
755+
#else
756+
internal
757+
#endif
758+
bool EnableRemoteRegionPreferredForSessionRetry
759+
{
760+
get => this.SessionRetryOptions.RemoteRegionPreferred;
761+
set => this.SessionRetryOptions.RemoteRegionPreferred = value;
762+
}
743763

744764
/// <summary>
745765
/// Enable partition key level failover
@@ -1004,7 +1024,8 @@ internal virtual ConnectionPolicy GetConnectionPolicy(int clientId)
10041024
ConnectionProtocol = this.ConnectionProtocol,
10051025
UserAgentContainer = this.CreateUserAgentContainerWithFeatures(clientId),
10061026
UseMultipleWriteLocations = true,
1007-
IdleTcpConnectionTimeout = this.IdleTcpConnectionTimeout,
1027+
IdleTcpConnectionTimeout = this.IdleTcpConnectionTimeout,
1028+
SessionRetryOptions = this.SessionRetryOptions,
10081029
OpenTcpConnectionTimeout = this.OpenTcpConnectionTimeout,
10091030
MaxRequestsPerTcpConnection = this.MaxRequestsPerTcpConnection,
10101031
MaxTcpConnectionsPerEndpoint = this.MaxTcpConnectionsPerEndpoint,

Microsoft.Azure.Cosmos/src/DocumentClient.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6800,7 +6800,8 @@ private void CreateStoreModel(bool subscribeRntbdStatus)
68006800
!this.enableRntbdChannel,
68016801
this.UseMultipleWriteLocations && (this.accountServiceConfiguration.DefaultConsistencyLevel != Documents.ConsistencyLevel.Strong),
68026802
true,
6803-
enableReplicaValidation: this.isReplicaAddressValidationEnabled);
6803+
enableReplicaValidation: this.isReplicaAddressValidationEnabled,
6804+
sessionRetryOptions: this.ConnectionPolicy.SessionRetryOptions);
68046805

68056806
if (subscribeRntbdStatus)
68066807
{

Microsoft.Azure.Cosmos/src/Fluent/CosmosClientBuilder.cs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -581,8 +581,24 @@ public CosmosClientBuilder WithSerializerOptions(CosmosSerializationOptions cosm
581581
{
582582
this.clientOptions.SerializerOptions = cosmosSerializerOptions;
583583
return this;
584-
}
585-
584+
}
585+
586+
/// <summary>
587+
/// provides SessionTokenMismatchRetryPolicy optimization through customer supplied region switch hints
588+
/// </summary>
589+
/// <param name="enableRemoteRegionPreferredForSessionRetry"></param>
590+
/// <returns>The <see cref="CosmosClientBuilder"/> object</returns>
591+
#if PREVIEW
592+
public
593+
#else
594+
internal
595+
#endif
596+
CosmosClientBuilder WithEnableRemoteRegionPreferredForSessionRetry(bool enableRemoteRegionPreferredForSessionRetry)
597+
{
598+
this.clientOptions.EnableRemoteRegionPreferredForSessionRetry = enableRemoteRegionPreferredForSessionRetry;
599+
return this;
600+
}
601+
586602
/// <summary>
587603
/// Set a custom JSON serializer.
588604
/// </summary>
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// ------------------------------------------------------------
2+
// Copyright (c) Microsoft Corporation. All rights reserved.
3+
// ------------------------------------------------------------
4+
5+
namespace Microsoft.Azure.Cosmos
6+
{
7+
using System;
8+
using Microsoft.Azure.Documents;
9+
10+
/// <summary>
11+
/// Implementation of ISessionRetryOptions interface, do not want clients to subclass.
12+
/// </summary>
13+
internal sealed class SessionRetryOptions : ISessionRetryOptions
14+
{
15+
/// <summary>
16+
/// Initializes a new instance of the <see cref="SessionRetryOptions"/> class.
17+
/// </summary>
18+
public SessionRetryOptions()
19+
{
20+
this.MinInRegionRetryTime = ConfigurationManager.GetMinRetryTimeInLocalRegionWhenRemoteRegionPreferred();
21+
this.MaxInRegionRetryCount = ConfigurationManager.GetMaxRetriesInLocalRegionWhenRemoteRegionPreferred();
22+
}
23+
/// <summary>
24+
/// Sets the minimum retry time for 404/1002 retries within each region for read and write operations.
25+
/// The minimum value is 100ms - this minimum is enforced to provide a way for the local region to catch-up on replication lag. The default value is 500ms - as a recommendation ensure that this value is higher than the steady-state
26+
/// replication latency between the regions you chose
27+
/// </summary>
28+
public TimeSpan MinInRegionRetryTime { get; private set; }
29+
30+
/// <summary>
31+
/// Sets the maximum number of retries within each region for read and write operations. The minimum value is 1 - the backoff time for the last in-region retry will ensure that the total retry time within the
32+
/// region is at least the min. in-region retry time.
33+
/// </summary>
34+
public int MaxInRegionRetryCount { get; private set; }
35+
36+
/// <summary>
37+
/// hints which guide SDK-internal retry policies on how early to switch retries to a different region. If true, will retry all replicas once and add a minimum delay before switching to the next region.If false, it will
38+
/// retry in the local region up to 5s
39+
/// </summary>
40+
public bool RemoteRegionPreferred { get; set; } = false;
41+
42+
}
43+
}

Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,20 @@ internal static class ConfigurationManager
7272
/// </summary>
7373
internal static readonly string DistributedQueryGatewayModeEnabled = "AZURE_COSMOS_DISTRIBUTED_QUERY_GATEWAY_ENABLED";
7474

75+
/// <summary>
76+
/// intent is If a client specify a value, we will force it to be atleast 100ms, otherwise default is going to be 500ms
77+
/// </summary>
78+
internal static readonly string MinInRegionRetryTimeForWritesInMs = "AZURE_COSMOS_SESSION_TOKEN_MISMATCH_IN_REGION_RETRY_TIME_IN_MILLISECONDS";
79+
internal static readonly int DefaultMinInRegionRetryTimeForWritesInMs = 500;
80+
internal static readonly int MinMinInRegionRetryTimeForWritesInMs = 100;
81+
82+
/// <summary>
83+
/// intent is If a client specify a value, we will force it to be atleast 1, otherwise default is going to be 1(right now both the values are 1 but we have the provision to change them in future).
84+
/// </summary>
85+
internal static readonly string MaxRetriesInLocalRegionWhenRemoteRegionPreferred = "AZURE_COSMOS_MAX_RETRIES_IN_LOCAL_REGION_WHEN_REMOTE_REGION_PREFERRED";
86+
internal static readonly int DefaultMaxRetriesInLocalRegionWhenRemoteRegionPreferred = 1;
87+
internal static readonly int MinMaxRetriesInLocalRegionWhenRemoteRegionPreferred = 1;
88+
7589
/// <summary>
7690
/// A read-only string containing the environment variable name for enabling binary encoding. This will eventually
7791
/// be removed once binary encoding is enabled by default for both preview
@@ -96,6 +110,26 @@ public static T GetEnvironmentVariable<T>(string variable, T defaultValue)
96110
return (T)Convert.ChangeType(value, typeof(T));
97111
}
98112

113+
public static int GetMaxRetriesInLocalRegionWhenRemoteRegionPreferred()
114+
{
115+
return Math.Max(
116+
ConfigurationManager
117+
.GetEnvironmentVariable(
118+
variable: MaxRetriesInLocalRegionWhenRemoteRegionPreferred,
119+
defaultValue: DefaultMaxRetriesInLocalRegionWhenRemoteRegionPreferred),
120+
MinMaxRetriesInLocalRegionWhenRemoteRegionPreferred);
121+
}
122+
123+
public static TimeSpan GetMinRetryTimeInLocalRegionWhenRemoteRegionPreferred()
124+
{
125+
return TimeSpan.FromMilliseconds(Math.Max(
126+
ConfigurationManager
127+
.GetEnvironmentVariable(
128+
variable: MinInRegionRetryTimeForWritesInMs,
129+
defaultValue: DefaultMinInRegionRetryTimeForWritesInMs),
130+
MinMinInRegionRetryTimeForWritesInMs));
131+
}
132+
99133
/// <summary>
100134
/// Gets the boolean value of the replica validation environment variable. Note that, replica validation
101135
/// is enabled by default for the preview package and disabled for GA at the moment. The user can set the

0 commit comments

Comments
 (0)