Skip to content

Commit 1aa41a4

Browse files
committed
Utilize native KVS listKeys prefix option
1 parent 0e4852b commit 1aa41a4

3 files changed

Lines changed: 57 additions & 32 deletions

File tree

packages/fs-storage/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
"access": "public"
4343
},
4444
"dependencies": {
45-
"@crawlee/fs-storage-native": "0.1.5-beta.5",
45+
"@crawlee/fs-storage-native": "0.1.5-beta.6",
4646
"@crawlee/types": "workspace:*",
4747
"@sapphire/shapeshift": "^4.0.0",
4848
"content-type": "^1.0.5",

packages/fs-storage/src/resource-clients/key-value-store.ts

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -113,29 +113,54 @@ export class KeyValueStoreClient extends CachedIdClient implements storage.KeyVa
113113
})
114114
.parse(options);
115115

116-
// The native iterator yields keys in lexical order and natively supports `exclusiveStartKey`
117-
// and `limit`, but not `prefix`, and it does not throw for an unknown `exclusiveStartKey`.
118-
// To preserve the historical semantics (prefix filtering and a hard error for a missing
119-
// `exclusiveStartKey`), we collect the full key list and slice it here. We also merge in any
120-
// untracked value files present on disk (native keys take precedence on collisions).
116+
// The native iterator yields keys in lexical order and natively supports `exclusiveStartKey`,
117+
// `limit`, and (since `@crawlee/fs-storage-native` 0.1.5-beta.6) `prefix`. It does not, however,
118+
// throw for an unknown `exclusiveStartKey`, nor does it know about untracked value files on disk
119+
// ("bare files"). We layer those two pieces of historical behavior on top here.
120+
const bareFiles = await this.listBareFiles();
121+
122+
// Fast path: with no untracked files on disk we can push prefix/exclusiveStartKey/limit all the
123+
// way down to the native iterator and stream the (already paginated, already filtered) result
124+
// straight through — no need to pull every key into memory just to slice it. We only preflight
125+
// the `exclusiveStartKey` so a missing one still throws, matching the historical contract.
126+
if (bareFiles.length === 0) {
127+
if (exclusiveStartKey !== undefined && !(await this.nativeClient.recordExists(exclusiveStartKey))) {
128+
throw new Error(
129+
`exclusiveStartKey "${exclusiveStartKey}" was not found in the key-value store. ` +
130+
`This is likely a bug — the key may have been deleted between paginated listKeys calls.`,
131+
);
132+
}
133+
134+
const items: storage.KeyValueStoreItemData[] = [];
135+
const iterator = await this.nativeClient.iterateKeys(exclusiveStartKey, limit, undefined, prefix);
136+
for await (const record of iterator) {
137+
items.push({ key: record.key, size: record.size ?? 0 });
138+
}
139+
return items;
140+
}
141+
142+
// Merge path (untracked value files present on disk): bare files must be interleaved with the
143+
// native keys in lexical order, so we have to materialize and sort. Push `prefix` down to the
144+
// native side anyway to keep its contribution bounded; native keys take precedence on collisions.
121145
const itemsByKey = new Map<string, storage.KeyValueStoreItemData>();
122146

123-
for (const bareFile of await this.listBareFiles()) {
147+
for (const bareFile of bareFiles) {
148+
if (prefix && !bareFile.key.startsWith(prefix)) {
149+
continue;
150+
}
124151
const size = await readFile(bareFile.filePath)
125152
.then((buffer) => buffer.byteLength)
126153
.catch(() => 0);
127154
itemsByKey.set(bareFile.key, { key: bareFile.key, size });
128155
}
129156

130-
const iterator = await this.nativeClient.iterateKeys();
157+
const iterator = await this.nativeClient.iterateKeys(undefined, undefined, undefined, prefix);
131158
for await (const record of iterator) {
132159
itemsByKey.set(record.key, { key: record.key, size: record.size ?? 0 });
133160
}
134161

135162
// Emulate the API: keys are returned in lexical order.
136-
const items = [...itemsByKey.values()].sort((a, b) => a.key.localeCompare(b.key));
137-
138-
let filteredItems = items.filter((item) => !prefix || item.key.startsWith(prefix));
163+
let filteredItems = [...itemsByKey.values()].sort((a, b) => a.key.localeCompare(b.key));
139164

140165
if (exclusiveStartKey) {
141166
const keyPos = filteredItems.findIndex((item) => item.key === exclusiveStartKey);

pnpm-lock.yaml

Lines changed: 21 additions & 21 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)