Skip to content

Commit 2551f4c

Browse files
[Internal] Refactoring live-tests to reduce flakiness (Azure#45216)
* Refactoring FaultInjection tests * Update IncrementalChangeFeedProcessorTest.java * Update IncrementalChangeFeedProcessorTest.java * Update IncrementalChangeFeedProcessorTest.java * Fixing flaky test * Fixing flaky tests * Update ClientMetricsTest.java * Refactoring CF split tests * Fixing split awaiting * Update ClientMetricsTest.java * Fixing test flakiness * Update pom.xml * Update ClientMetricsTest.java * Update ClientMetricsTest.java * Reducing log noise level for metrics * Refactoring ClientMetricsTest to have more robust state management when a test times out * Adding logs to flaky test ClientMetricsTest.readAllItemsWithDetailMetricsWithExplicitPageSize * Test robustness improvements * Marking more tests as flaky * Changing test config * Test fixes * Fixing occassional hangs in CosmosPagedIterable when page size is specified * Test fixes * Update FaultInjectionWithAvailabilityStrategyTestsBase.java * Fixing flaky tests
1 parent 06c44de commit 2551f4c

38 files changed

+1531
-1010
lines changed

sdk/cosmos/azure-cosmos-benchmark/pom.xml

+21
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,27 @@ Licensed under the MIT License.
473473
</plugins>
474474
</build>
475475
</profile>
476+
<profile>
477+
<!-- integration tests, requires Cosmos DB endpoint with multi master support -->
478+
<id>fi-multi-master</id>
479+
<properties>
480+
<test.groups>fi-multi-master</test.groups>
481+
</properties>
482+
<build>
483+
<plugins>
484+
<plugin>
485+
<groupId>org.apache.maven.plugins</groupId>
486+
<artifactId>maven-failsafe-plugin</artifactId>
487+
<version>3.5.2</version> <!-- {x-version-update;org.apache.maven.plugins:maven-failsafe-plugin;external_dependency} -->
488+
<configuration>
489+
<suiteXmlFiles>
490+
<suiteXmlFile>src/test/resources/fi-multi-master-testng.xml</suiteXmlFile>
491+
</suiteXmlFiles>
492+
</configuration>
493+
</plugin>
494+
</plugins>
495+
</build>
496+
</profile>
476497
<profile>
477498
<!-- integration tests, requires Cosmos DB endpoint -->
478499
<id>examples</id>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
<!--
2+
~ The MIT License (MIT)
3+
~ Copyright (c) 2018 Microsoft Corporation
4+
~
5+
~ Permission is hereby granted, free of charge, to any person obtaining a copy
6+
~ of this software and associated documentation files (the "Software"), to deal
7+
~ in the Software without restriction, including without limitation the rights
8+
~ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
~ copies of the Software, and to permit persons to whom the Software is
10+
~ furnished to do so, subject to the following conditions:
11+
~
12+
~ The above copyright notice and this permission notice shall be included in all
13+
~ copies or substantial portions of the Software.
14+
~
15+
~ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
~ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
~ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
~ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
~ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
~ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
~ SOFTWARE.
22+
-->
23+
<!DOCTYPE suite SYSTEM "https://testng.org/testng-1.0.dtd">
24+
<suite name="fi-multi-master">
25+
<test name="fi-multi-master" group-by-instances="true">
26+
<groups>
27+
<run>
28+
<include name="fi-multi-master"/>
29+
</run>
30+
</groups>
31+
<packages>
32+
<package name="com.azure.cosmos.*"/>
33+
</packages>
34+
</test>
35+
</suite>

sdk/cosmos/azure-cosmos-spark_3_2-12/docs/AAD-Auth.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ from cryptography.hazmat.primitives import serialization
173173
secret64 = mssparkutils.credentials.getSecret("xxx", "yyy", "zzz")
174174

175175
# Decode to bytes.
176-
secret64Bytes = bytes(secret, "utf-8")
176+
secret64Bytes = bytes(secret64, "utf-8")
177177
secretBytes = base64.b64decode(secret64)
178178

179179
# Load as a certificate: https://cryptography.io/en/latest/hazmat/primitives/asymmetric/serialization/#pkcs12.

sdk/cosmos/azure-cosmos-tests/pom.xml

+21
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,27 @@ Licensed under the MIT License.
549549
</plugins>
550550
</build>
551551
</profile>
552+
<profile>
553+
<!-- integration tests, requires Cosmos DB endpoint with multi master support -->
554+
<id>fi-multi-master</id>
555+
<properties>
556+
<test.groups>fi-multi-master</test.groups>
557+
</properties>
558+
<build>
559+
<plugins>
560+
<plugin>
561+
<groupId>org.apache.maven.plugins</groupId>
562+
<artifactId>maven-failsafe-plugin</artifactId>
563+
<version>3.5.2</version> <!-- {x-version-update;org.apache.maven.plugins:maven-failsafe-plugin;external_dependency} -->
564+
<configuration>
565+
<suiteXmlFiles>
566+
<suiteXmlFile>src/test/resources/fi-multi-master-testng.xml</suiteXmlFile>
567+
</suiteXmlFiles>
568+
</configuration>
569+
</plugin>
570+
</plugins>
571+
</build>
572+
</profile>
552573
<profile>
553574
<!-- integration tests, requires Cosmos DB endpoint with multi region support -->
554575
<id>multi-region</id>

sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/AsyncCacheNonBlockingIntegrationTest.java

+18-6
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ public void afterClass() {
6363
safeClose(this.bulkClient);
6464
}
6565

66-
@Test(groups = {"split"}, timeOut = TIMEOUT * 20)
66+
@Test(groups = {"split"}, timeOut = TIMEOUT * 20, retryAnalyzer = SplitTestsRetryAnalyzer.class)
6767
public void createItem_withCacheRefresh() throws InterruptedException {
6868
String containerId = "bulksplittestcontainer_" + UUID.randomUUID();
6969
int totalRequest = getTotalRequest();
@@ -126,14 +126,26 @@ public void createItem_withCacheRefresh() throws InterruptedException {
126126

127127

128128
// Wait for the throughput update to complete so that we get the partition split
129-
while (true) {
130-
assert throughputResponse != null;
131-
if (!throughputResponse.isReplacePending()) {
132-
break;
133-
}
129+
int i = 0;
130+
// Only wait for 10 minutes for the split to complete
131+
// If backend does not finish split within 10 minutes
132+
// something is off in the backend
133+
// it could be due to limits on how many splits can be executed concurrently etc.
134+
// nothing that can really be done in the SDK
135+
while (i < 60 && throughputResponse.isReplacePending()) {
134136
logger.info("Waiting for split to complete");
135137
Thread.sleep(10 * 1000);
136138
throughputResponse = container.readThroughput().block();
139+
i++;
140+
}
141+
142+
if (throughputResponse.isReplacePending()) {
143+
throw new SplitTimeoutException(
144+
"Backend did not finish split for container '"
145+
+ getEndpoint() + "/"
146+
+ container.getDatabase().getId() + "/"
147+
+ container.getId()
148+
+ "' - skipping this test case");
137149
}
138150

139151
// Read number of partitions. Should be greater than one

sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BatchTestBase.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import com.azure.cosmos.implementation.apachecommons.lang.StringUtils;
1111
import com.azure.cosmos.models.CosmosBatchOperationResult;
1212
import com.azure.cosmos.models.CosmosBatchResponse;
13+
import com.azure.cosmos.models.CosmosItemRequestOptions;
1314
import com.azure.cosmos.models.CosmosItemResponse;
1415
import com.azure.cosmos.models.PartitionKey;
1516
import com.azure.cosmos.rx.TestSuiteBase;
@@ -99,7 +100,7 @@ void verifyByRead(CosmosContainer container, TestDoc doc) {
99100
void verifyByRead(CosmosContainer container, TestDoc doc, String eTag) {
100101
PartitionKey partitionKey = this.getPartitionKey(doc.getStatus());
101102

102-
CosmosItemResponse<TestDoc> response = container.readItem(doc.getId(), partitionKey, TestDoc.class);
103+
CosmosItemResponse<TestDoc> response = verifyExists(container, doc.getId(), partitionKey, TestDoc.class);
103104

104105
assertThat(response.getStatusCode()).isEqualTo(HttpResponseStatus.OK.code());
105106
assertThat(response.getItem()).isEqualTo(doc);

0 commit comments

Comments
 (0)