Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sdk/cosmosdb/cosmos/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
### Bugs Fixed

- [#38087](https://github.com/Azure/azure-sdk-for-js/issues/38087) Made `boundingBox` optional on the `SpatialIndex` type. Bounding boxes are only required for geometry spatial indexes, not geography ones.
- Fixed cross-partition queries making a redundant `/pkranges` metadata call on every query. Queries now reuse the shared partition key range cache (worst for hybrid queries, which previously fetched ranges per component query). The cache is also made failure-safe so a transient fetch error no longer poisons later lookups.

## 4.9.3 (2026-04-20)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -625,14 +625,14 @@ export abstract class ParallelQueryExecutionContextBase implements ExecutionCont
diagnosticNode: DiagnosticNodeInternal,
): Promise<any[]> {
const partitionKeyRange = documentProducer.targetPartitionKeyRange;
// Download the new routing map
this.routingProvider = new SmartRoutingMapProvider(this.clientContext);
// Get the queryRange that relates to this partitionKeyRange
const queryRange = QueryRange.parsePartitionKeyRange(partitionKeyRange);
// Force refresh the routing map so the split partition's replacement ranges are downloaded.
return this.routingProvider.getOverlappingRanges(
this.collectionLink,
[queryRange],
diagnosticNode,
true,
);
}

Expand Down
29 changes: 25 additions & 4 deletions sdk/cosmosdb/cosmos/src/routing/partitionKeyRangeCache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,18 @@ import { hashPartitionKey, binarySearchOnPartitionKeyRanges } from "../utils/has

/** @hidden */
export class PartitionKeyRangeCache {
// Resolved, known-good routing maps. Only successful fetches are published here.
private collectionRoutingMapByCollectionId: {
[key: string]: Promise<InMemoryCollectionRoutingMap>;
};
// In-flight fetches, so concurrent lookups (cold or forceRefresh) dedupe to a single request.
private pendingByCollectionId: {
[key: string]: Promise<InMemoryCollectionRoutingMap>;
};

constructor(private clientContext: ClientContext) {
this.collectionRoutingMapByCollectionId = {};
this.pendingByCollectionId = {};
}
/**
* Finds or Instantiates the requested Collection Routing Map
Expand All @@ -33,13 +39,28 @@ export class PartitionKeyRangeCache {
forceRefresh: boolean = false,
): Promise<InMemoryCollectionRoutingMap> {
const collectionId = getIdFromLink(collectionLink);
if (this.collectionRoutingMapByCollectionId[collectionId] === undefined || forceRefresh) {
this.collectionRoutingMapByCollectionId[collectionId] = this.requestCollectionRoutingMap(
const cached = this.collectionRoutingMapByCollectionId[collectionId];
if (cached !== undefined && !forceRefresh) {
return cached;
}
// Dedupe concurrent fetches (cold or forceRefresh) onto a single in-flight request. The map
// is published only on success, so a failed/in-flight fetch never poisons the cache or
// discards the last known-good map; on failure all waiters get the error and the next call
// retries. The prior good map keeps serving cache hits until the refresh resolves.
if (this.pendingByCollectionId[collectionId] === undefined) {
this.pendingByCollectionId[collectionId] = this.requestCollectionRoutingMap(
collectionLink,
diagnosticNode,
);
)
.then((map) => {
this.collectionRoutingMapByCollectionId[collectionId] = Promise.resolve(map);
return map;
})
.finally(() => {
delete this.pendingByCollectionId[collectionId];
});
}
return this.collectionRoutingMapByCollectionId[collectionId];
return this.pendingByCollectionId[collectionId];
}

/**
Expand Down
8 changes: 6 additions & 2 deletions sdk/cosmosdb/cosmos/src/routing/smartRoutingMapProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import type { ClientContext } from "../ClientContext.js";
import { Constants } from "../common/constants.js";
import type { DiagnosticNodeInternal } from "../diagnostics/DiagnosticNodeInternal.js";
import { PartitionKeyRangeCache } from "./partitionKeyRangeCache.js";
import type { PartitionKeyRangeCache } from "./partitionKeyRangeCache.js";
import { QueryRange } from "./QueryRange.js";

/** @hidden */
Expand All @@ -14,7 +14,9 @@ export class SmartRoutingMapProvider {
private partitionKeyRangeCache: PartitionKeyRangeCache;

constructor(clientContext: ClientContext) {
this.partitionKeyRangeCache = new PartitionKeyRangeCache(clientContext);
// Reuse the client-wide partition key range cache so routing map lookups are served from
// cache across queries instead of re-fetching pkranges on every query.
this.partitionKeyRangeCache = clientContext.partitionKeyRangeCache;
}
private static _secondRangeIsAfterFirstRange(range1: QueryRange, range2: QueryRange): boolean {
if (typeof range1.max === "undefined") {
Expand Down Expand Up @@ -73,6 +75,7 @@ export class SmartRoutingMapProvider {
collectionLink: string,
sortedRanges: QueryRange[],
diagnosticNode: DiagnosticNodeInternal,
forceRefresh: boolean = false,
): Promise<any[]> {
// validate if the list is non- overlapping and sorted TODO: any PartitionKeyRanges
if (!SmartRoutingMapProvider._isSortedAndNonOverlapping(sortedRanges)) {
Expand All @@ -88,6 +91,7 @@ export class SmartRoutingMapProvider {
const collectionRoutingMap = await this.partitionKeyRangeCache.onCollectionRoutingMap(
collectionLink,
diagnosticNode,
forceRefresh,
);

let index = 0;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

import { PartitionKeyRangeCache } from "../../../src/routing/index.js";
import type { ClientContext } from "../../../src/ClientContext.js";
import { createDummyDiagnosticNode } from "../../public/common/TestHelpers.js";
import { describe, it, assert } from "vitest";

describe("PartitionKeyRangeCache", () => {
const collectionLink = "dbs/testdb/colls/testcoll";
const ranges = [{ id: "0", minInclusive: "", maxExclusive: "FF" }];

// Builds a fake ClientContext whose queryPartitionKeyRanges() counts fetches and can be
// toggled to fail, so we can exercise the cache's dedupe/eviction/forceRefresh behavior.
function makeContext(): {
ctx: ClientContext;
calls: () => number;
setFail: (f: boolean) => void;
} {
let count = 0;
let fail = false;
const ctx = {
queryPartitionKeyRanges: () => ({
fetchAllInternal: async () => {
count++;
if (fail) throw new Error("transient pkranges failure");
return { resources: ranges };
},
}),
} as unknown as ClientContext;
return { ctx, calls: () => count, setFail: (f) => (fail = f) };
}

it("dedupes concurrent first-time lookups into one fetch", async () => {
const { ctx, calls } = makeContext();
const cache = new PartitionKeyRangeCache(ctx);
await Promise.all([
cache.onCollectionRoutingMap(collectionLink, createDummyDiagnosticNode()),
cache.onCollectionRoutingMap(collectionLink, createDummyDiagnosticNode()),
cache.onCollectionRoutingMap(collectionLink, createDummyDiagnosticNode()),
]);
assert.strictEqual(calls(), 1);
});

it("serves later lookups from cache without re-fetching", async () => {
const { ctx, calls } = makeContext();
const cache = new PartitionKeyRangeCache(ctx);
await cache.onCollectionRoutingMap(collectionLink, createDummyDiagnosticNode());
await cache.onCollectionRoutingMap(collectionLink, createDummyDiagnosticNode());
assert.strictEqual(calls(), 1);
});

it("evicts on failure so the next lookup retries instead of reusing a rejected promise", async () => {
const { ctx, calls, setFail } = makeContext();
const cache = new PartitionKeyRangeCache(ctx);

setFail(true);
let threw = false;
try {
await cache.onCollectionRoutingMap(collectionLink, createDummyDiagnosticNode());
} catch {
threw = true;
}
assert.isTrue(threw);

setFail(false);
const map = await cache.onCollectionRoutingMap(collectionLink, createDummyDiagnosticNode());
assert.isDefined(map);
assert.strictEqual(calls(), 2);
});

it("keeps the prior map and surfaces the error when a forceRefresh fails", async () => {
const { ctx, calls, setFail } = makeContext();
const cache = new PartitionKeyRangeCache(ctx);

const good = await cache.onCollectionRoutingMap(collectionLink, createDummyDiagnosticNode());

setFail(true);
let threw = false;
try {
await cache.onCollectionRoutingMap(collectionLink, createDummyDiagnosticNode(), true);
} catch {
threw = true;
}
assert.isTrue(threw);

// The failed refresh did not poison the cache: the prior map is still served without refetch.
const after = await cache.onCollectionRoutingMap(collectionLink, createDummyDiagnosticNode());
assert.strictEqual(after, good);
assert.strictEqual(calls(), 2);
});

it("dedupes concurrent forceRefresh calls into one fetch and shares the outcome", async () => {
const { ctx, calls, setFail } = makeContext();
const cache = new PartitionKeyRangeCache(ctx);

const good = await cache.onCollectionRoutingMap(collectionLink, createDummyDiagnosticNode());
assert.strictEqual(calls(), 1);

// Several simultaneous forceRefresh calls collapse to a single fetch; if it fails, all share
// the error and the prior map is kept.
setFail(true);
const refreshes = [0, 1, 2].map(() =>
cache
.onCollectionRoutingMap(collectionLink, createDummyDiagnosticNode(), true)
.catch(() => "err"),
);
const outcomes = await Promise.all(refreshes);
assert.deepStrictEqual(outcomes, ["err", "err", "err"]);
assert.strictEqual(calls(), 2);

setFail(false);
const after = await cache.onCollectionRoutingMap(collectionLink, createDummyDiagnosticNode());
assert.strictEqual(after, good);
assert.strictEqual(calls(), 2);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@
// Licensed under the MIT License.

import { MockedQueryIterator } from "./MockQueryIterator.js";
import { PartitionKeyRangeCache } from "../../../src/routing/index.js";

export class MockedClientContext {
constructor(private partitionKeyRanges: unknown) {}
public partitionKeyRangeCache: PartitionKeyRangeCache;
constructor(private partitionKeyRanges: unknown) {
this.partitionKeyRangeCache = new PartitionKeyRangeCache(this as any);
}
public readPartitionKeyRanges(): MockedQueryIterator {
return new MockedQueryIterator(this.partitionKeyRanges);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

import { CosmosClient, ResourceType } from "../../../src/index.js";
import type { Container } from "../../../src/index.js";
import { endpoint } from "../common/_testConfig.js";
import { masterKey } from "../common/_fakeTestSecrets.js";
import { getTestContainer, removeAllDatabases } from "../common/TestHelpers.js";
import { describe, it, beforeAll, afterAll, expect } from "vitest";

/**
* The partition key range cache is shared via ClientContext, so cross-partition queries should
* fetch pkranges from the gateway once and reuse the cache on subsequent queries.
*/
describe("Partition key range cache reuse", { timeout: 60000 }, () => {
let pkRangeRequests = 0;
const client = new CosmosClient({
endpoint,
key: masterKey,
plugins: [
{
on: "request",
plugin: async (context, _diagNode, next) => {
if (context.resourceType === ResourceType.pkranges) {
pkRangeRequests++;
}
return next(context);
},
},
],
});
let container: Container;

beforeAll(async () => {
await removeAllDatabases(client);
container = await getTestContainer("pkrange-cache-reuse", client, {
partitionKey: { paths: ["/pk"] },
throughput: 12000,
});
await Promise.all(
Array.from({ length: 30 }, (_, i) =>
container.items.create({ id: `item-${i}`, pk: `pk-${i}` }),
),
);
});

afterAll(async () => {
await removeAllDatabases(client);
});

it("fetches pkranges once and serves later cross-partition queries from cache", async () => {
const options = { forceQueryPlan: true };
await container.items.query("SELECT * FROM c", options).fetchAll();
expect(pkRangeRequests).toBeGreaterThan(0);
const afterFirst = pkRangeRequests;

for (let i = 0; i < 3; i++) {
await container.items.query("SELECT * FROM c", options).fetchAll();
}

// Subsequent queries reuse the cached routing map - no extra pkranges requests.
expect(pkRangeRequests).toBe(afterFirst);
});
});
Loading