|
2 | 2 | { |
3 | 3 | using System; |
4 | 4 | using System.Collections.Generic; |
| 5 | + using System.Diagnostics; |
5 | 6 | using System.IO; |
6 | 7 | using System.Linq; |
7 | 8 | using System.Net; |
|
12 | 13 | using System.Threading; |
13 | 14 | using System.Threading.Tasks; |
14 | 15 | using Microsoft.Azure.Cosmos.Diagnostics; |
15 | | - using Microsoft.Azure.Cosmos.FaultInjection; |
| 16 | + using Microsoft.Azure.Cosmos.FaultInjection; |
16 | 17 | using Microsoft.VisualStudio.TestTools.UnitTesting; |
17 | 18 | using Newtonsoft.Json.Linq; |
18 | 19 | using static Microsoft.Azure.Cosmos.Routing.GlobalPartitionEndpointManagerCore; |
@@ -80,7 +81,8 @@ public void TestCleanup() |
80 | 81 | finally |
81 | 82 | { |
82 | 83 | //Do not delete the resources (except MM Write test object), georeplication is slow and we want to reuse the resources |
83 | | - this.client?.Dispose(); |
| 84 | + this.client?.Dispose(); |
| 85 | + Environment.SetEnvironmentVariable(ConfigurationManager.StalePartitionUnavailabilityRefreshIntervalInSeconds, null); |
84 | 86 | } |
85 | 87 | } |
86 | 88 |
|
@@ -467,7 +469,7 @@ await this.container.DeleteItemAsync<CosmosIntegrationTestObject>( |
467 | 469 | } |
468 | 470 |
|
469 | 471 | [TestMethod] |
470 | | - [TestCategory("MultiRegion")] |
| 472 | + [TestCategory("MultiRegion")] |
471 | 473 | [DataRow(ConnectionMode.Direct, "15", "10", DisplayName = "Direct Mode - Scenario when the total iteration count is 15 and circuit breaker consecutive failure threshold is set to 10.")] |
472 | 474 | [DataRow(ConnectionMode.Direct, "25", "20", DisplayName = "Direct Mode - Scenario when the total iteration count is 25 and circuit breaker consecutive failure threshold is set to 20.")] |
473 | 475 | [DataRow(ConnectionMode.Direct, "35", "30", DisplayName = "Direct Mode - Scenario when the total iteration count is 35 and circuit breaker consecutive failure threshold is set to 30.")] |
@@ -602,7 +604,7 @@ public async Task ReadItemAsync_WithCircuitBreakerEnabledAndSingleMasterAccountA |
602 | 604 | } |
603 | 605 |
|
604 | 606 | [TestMethod] |
605 | | - [TestCategory("MultiRegion")] |
| 607 | + [TestCategory("MultiRegion")] |
606 | 608 | [DataRow(ConnectionMode.Direct, DisplayName ="Direct Mode")] |
607 | 609 | [DataRow(ConnectionMode.Gateway, DisplayName = "Gateway Mode")] |
608 | 610 | [Owner("nalutripician")] |
@@ -721,15 +723,14 @@ public async Task ReadItemAsync_WithCircuitBreakerEnabledAndTimeoutCounterOverwr |
721 | 723 | } |
722 | 724 | finally |
723 | 725 | { |
724 | | - Environment.SetEnvironmentVariable(ConfigurationManager.PartitionLevelCircuitBreakerEnabled, null); |
725 | | - Environment.SetEnvironmentVariable(ConfigurationManager.CircuitBreakerConsecutiveFailureCountForReads, null); |
726 | | - |
| 726 | + Environment.SetEnvironmentVariable(ConfigurationManager.PartitionLevelCircuitBreakerEnabled, null); |
| 727 | + Environment.SetEnvironmentVariable(ConfigurationManager.CircuitBreakerTimeoutCounterResetWindowInMinutes, null); |
727 | 728 | await this.TryDeleteItems(itemsList); |
728 | 729 | } |
729 | 730 | } |
730 | 731 |
|
731 | 732 | [TestMethod] |
732 | | - [TestCategory("MultiRegion")] |
| 733 | + [TestCategory("MultiRegion")] |
733 | 734 | [Owner("dkunda")] |
734 | 735 | [Timeout(70000)] |
735 | 736 | public async Task ReadItemAsync_WithCircuitBreakerEnabledAndSingleMasterAccountAndServiceUnavailableReceivedFromTwoRegions_ShouldApplyPartitionLevelOverrideToThridRegion() |
@@ -893,7 +894,7 @@ public async Task ReadItemAsync_WithCircuitBreakerEnabledAndSingleMasterAccountA |
893 | 894 | } |
894 | 895 |
|
895 | 896 | [TestMethod] |
896 | | - [TestCategory("MultiRegion")] |
| 897 | + [TestCategory("MultiRegion")] |
897 | 898 | [Owner("dkunda")] |
898 | 899 | [Timeout(70000)] |
899 | 900 | public async Task ReadItemAsync_WithNoPreferredRegionsAndCircuitBreakerEnabledAndSingleMasterAccountAndServiceUnavailableReceived_ShouldApplyPartitionLevelOverride() |
@@ -1011,7 +1012,7 @@ public async Task ReadItemAsync_WithNoPreferredRegionsAndCircuitBreakerEnabledAn |
1011 | 1012 |
|
1012 | 1013 | [TestMethod] |
1013 | 1014 | [Owner("dkunda")] |
1014 | | - [TestCategory("MultiRegion")] |
| 1015 | + [TestCategory("MultiRegion")] |
1015 | 1016 | [Timeout(70000)] |
1016 | 1017 | public async Task ReadItemAsync_WithCircuitBreakerDisabledAndSingleMasterAccountAndServiceUnavailableReceived_ShouldNotApplyPartitionLevelOverride() |
1017 | 1018 | { |
@@ -1095,14 +1096,12 @@ public async Task ReadItemAsync_WithCircuitBreakerDisabledAndSingleMasterAccount |
1095 | 1096 | finally |
1096 | 1097 | { |
1097 | 1098 | Environment.SetEnvironmentVariable(ConfigurationManager.PartitionLevelCircuitBreakerEnabled, null); |
1098 | | - Environment.SetEnvironmentVariable(ConfigurationManager.CircuitBreakerConsecutiveFailureCountForReads, null); |
1099 | | - |
1100 | 1099 | await this.TryDeleteItems(itemsList); |
1101 | 1100 | } |
1102 | 1101 | } |
1103 | 1102 |
|
1104 | 1103 | [TestMethod] |
1105 | | - [Owner("dkunda")] |
| 1104 | + [Owner("dkunda")] |
1106 | 1105 | [TestCategory("MultiRegion")] |
1107 | 1106 | [Timeout(70000)] |
1108 | 1107 | public async Task CreateItemAsync_WithCircuitBreakerEnabledAndSingleMasterAccountAndServiceUnavailableReceived_ShouldNotApplyPartitionLevelOverride() |
@@ -1182,7 +1181,7 @@ public async Task CreateItemAsync_WithCircuitBreakerEnabledAndSingleMasterAccoun |
1182 | 1181 |
|
1183 | 1182 | [TestMethod] |
1184 | 1183 | [Owner("dkunda")] |
1185 | | - [TestCategory("MultiMaster")] |
| 1184 | + [TestCategory("MultiMaster")] |
1186 | 1185 | [DataRow(ConnectionMode.Direct, "15", "10", DisplayName = "Direct Mode - Scenario whtn the total iteration count is 15 and circuit breaker consecutive failure threshold is set to 10.")] |
1187 | 1186 | [DataRow(ConnectionMode.Direct, "25", "20", DisplayName = "Direct Mode - Scenario whtn the total iteration count is 25 and circuit breaker consecutive failure threshold is set to 20.")] |
1188 | 1187 | [DataRow(ConnectionMode.Direct, "35", "30", DisplayName = "Direct Mode - Scenario whtn the total iteration count is 35 and circuit breaker consecutive failure threshold is set to 30.")] |
@@ -2259,7 +2258,132 @@ public async Task ClinetOverrides0msRequestTimeoutValueForPPAF() |
2259 | 2258 | Assert.IsNotNull(strat); |
2260 | 2259 | Assert.AreNotEqual(0, strat.Threshold); |
2261 | 2260 | } |
2262 | | - |
| 2261 | + |
| 2262 | + [TestMethod] |
| 2263 | + [TestCategory("MultiRegion")] |
| 2264 | + [Owner("pkolluri")] |
| 2265 | + [Timeout(70000)] |
| 2266 | + public async Task QueryItemAsync_WithCircuitBreakerEnabledMultiRegionAndServiceResponseDelay_ShouldFailOverToNextRegionAsync() |
| 2267 | + { |
| 2268 | + // Arrange. |
| 2269 | + Environment.SetEnvironmentVariable(ConfigurationManager.PartitionLevelCircuitBreakerEnabled, "True"); |
| 2270 | + Environment.SetEnvironmentVariable(ConfigurationManager.CircuitBreakerConsecutiveFailureCountForReads, "1"); |
| 2271 | + |
| 2272 | + // Enabling fault injection rule to simulate a 503 service unavailable scenario. |
| 2273 | + string serviceResponseDelayRuleId = "response-delay-rule-" + Guid.NewGuid().ToString(); |
| 2274 | + FaultInjectionRule serviceResponseDelayRuleFromRegion1 = new FaultInjectionRuleBuilder( |
| 2275 | + id: serviceResponseDelayRuleId, |
| 2276 | + condition: |
| 2277 | + new FaultInjectionConditionBuilder() |
| 2278 | + .WithOperationType(FaultInjectionOperationType.QueryItem) |
| 2279 | + .WithConnectionType(FaultInjectionConnectionType.Gateway) |
| 2280 | + .WithRegion(region1) |
| 2281 | + .Build(), |
| 2282 | + result: |
| 2283 | + FaultInjectionResultBuilder.GetResultBuilder(FaultInjectionServerErrorType.ResponseDelay) |
| 2284 | + .WithDelay(TimeSpan.FromSeconds(70)) |
| 2285 | + .Build()) |
| 2286 | + .Build(); |
| 2287 | + |
| 2288 | + serviceResponseDelayRuleFromRegion1.Disable(); |
| 2289 | + |
| 2290 | + List<FaultInjectionRule> rules = new List<FaultInjectionRule> { serviceResponseDelayRuleFromRegion1}; |
| 2291 | + FaultInjector faultInjector = new FaultInjector(rules); |
| 2292 | + |
| 2293 | + List<string> preferredRegions = new List<string> { region1, region2, region3 }; |
| 2294 | + CosmosClientOptions cosmosClientOptions = new CosmosClientOptions() |
| 2295 | + { |
| 2296 | + ConsistencyLevel = ConsistencyLevel.Session, |
| 2297 | + FaultInjector = faultInjector, |
| 2298 | + ApplicationPreferredRegions = preferredRegions, |
| 2299 | + ConnectionMode = ConnectionMode.Gateway, |
| 2300 | + }; |
| 2301 | + |
| 2302 | + List<CosmosIntegrationTestObject> itemsList = new() |
| 2303 | + { |
| 2304 | + new() { Id = "smTestId2", Pk = "smpk1" }, |
| 2305 | + }; |
| 2306 | + |
| 2307 | + try |
| 2308 | + { |
| 2309 | + using CosmosClient cosmosClient = new(connectionString: this.connectionString, clientOptions: cosmosClientOptions); |
| 2310 | + Database database = cosmosClient.GetDatabase(MultiRegionSetupHelpers.dbName); |
| 2311 | + Container container = database.GetContainer(MultiRegionSetupHelpers.containerName); |
| 2312 | + |
| 2313 | + // Act and Assert. |
| 2314 | + await this.TryCreateItems(itemsList); |
| 2315 | + |
| 2316 | + //Must Ensure the data is replicated to all regions |
| 2317 | + await Task.Delay(3000); |
| 2318 | + |
| 2319 | + bool isRegion1Available = true; |
| 2320 | + bool isRegion2Available = true; |
| 2321 | + |
| 2322 | + int thresholdCounter = 0; |
| 2323 | + int totalIterations = 7; |
| 2324 | + int ppcbDefaultThreshold = 1; |
| 2325 | + int firstRegionServiceUnavailableAttempt = 1; |
| 2326 | + |
| 2327 | + for (int attemptCount = 1; attemptCount <= totalIterations; attemptCount++) |
| 2328 | + { |
| 2329 | + try |
| 2330 | + { |
| 2331 | + string sqlQueryText = $"SELECT * FROM c WHERE c.id = '{itemsList[0].Id}'"; |
| 2332 | + using FeedIterator<CosmosIntegrationTestObject> feedIterator = container.GetItemQueryIterator<CosmosIntegrationTestObject>(sqlQueryText, requestOptions: new QueryRequestOptions()); |
| 2333 | + |
| 2334 | + while (feedIterator.HasMoreResults) |
| 2335 | + { |
| 2336 | + FeedResponse<CosmosIntegrationTestObject> response = await feedIterator.ReadNextAsync(); |
| 2337 | + Assert.AreEqual(System.Net.HttpStatusCode.OK, response.StatusCode); |
| 2338 | + IReadOnlyList<(string regionName, Uri uri)> contactedRegionMapping = response.Diagnostics.GetContactedRegions(); |
| 2339 | + HashSet<string> contactedRegions = new(contactedRegionMapping.Select(r => r.regionName)); |
| 2340 | + |
| 2341 | + if (isRegion1Available && isRegion2Available) |
| 2342 | + { |
| 2343 | + Assert.IsTrue(contactedRegions.Count == 1, "Assert that, when no failure happened, the query request is being served from region 1."); |
| 2344 | + Assert.IsTrue(contactedRegions.Contains(region1)); |
| 2345 | + |
| 2346 | + // Simulating service unavailable on region 1. |
| 2347 | + if (attemptCount == firstRegionServiceUnavailableAttempt) |
| 2348 | + { |
| 2349 | + isRegion1Available = false; |
| 2350 | + serviceResponseDelayRuleFromRegion1.Enable(); |
| 2351 | + } |
| 2352 | + } |
| 2353 | + else if (isRegion2Available) |
| 2354 | + { |
| 2355 | + if (thresholdCounter <= ppcbDefaultThreshold) |
| 2356 | + { |
| 2357 | + Assert.IsTrue(contactedRegions.Count == 2, "Asserting that when the query request succeeds before the consecutive failure count reaches the threshold, the partition didn't fail over to the next region, and the request was retried."); |
| 2358 | + Assert.IsTrue(contactedRegions.Contains(region1) && contactedRegions.Contains(region2), "Asserting that both region 1 and region 2 were contacted."); |
| 2359 | + thresholdCounter++; |
| 2360 | + } |
| 2361 | + else |
| 2362 | + { |
| 2363 | + Assert.IsTrue(contactedRegions.Count == 1, "Asserting that when the consecutive failure count reaches the threshold, the partition was failed over to the next region, and the subsequent query request/s were successful on the next region"); |
| 2364 | + } |
| 2365 | + } |
| 2366 | + } |
| 2367 | + } |
| 2368 | + catch (CosmosException ce) |
| 2369 | + { |
| 2370 | + Assert.Fail("Query operation should succeed with successful failover to next region." + ce.Diagnostics.ToString()); |
| 2371 | + } |
| 2372 | + catch (Exception ex) |
| 2373 | + { |
| 2374 | + Assert.Fail($"Unhandled Exception was thrown during Query operation call. Message: {ex.Message}"); |
| 2375 | + } |
| 2376 | + } |
| 2377 | + } |
| 2378 | + finally |
| 2379 | + { |
| 2380 | + Environment.SetEnvironmentVariable(ConfigurationManager.PartitionLevelCircuitBreakerEnabled, null); |
| 2381 | + Environment.SetEnvironmentVariable(ConfigurationManager.CircuitBreakerConsecutiveFailureCountForReads, null); |
| 2382 | + |
| 2383 | + await this.TryDeleteItems(itemsList); |
| 2384 | + } |
| 2385 | + } |
| 2386 | + |
2263 | 2387 | private async Task TryCreateItems(List<CosmosIntegrationTestObject> testItems) |
2264 | 2388 | { |
2265 | 2389 | foreach (CosmosIntegrationTestObject item in testItems) |
|
0 commit comments