@@ -113,29 +113,54 @@ export class KeyValueStoreClient extends CachedIdClient implements storage.KeyVa
113113 } )
114114 . parse ( options ) ;
115115
116- // The native iterator yields keys in lexical order and natively supports `exclusiveStartKey`
117- // and `limit`, but not `prefix`, and it does not throw for an unknown `exclusiveStartKey`.
118- // To preserve the historical semantics (prefix filtering and a hard error for a missing
119- // `exclusiveStartKey`), we collect the full key list and slice it here. We also merge in any
120- // untracked value files present on disk (native keys take precedence on collisions).
116+ // The native iterator yields keys in lexical order and natively supports `exclusiveStartKey`,
117+ // `limit`, and (since `@crawlee/fs-storage-native` 0.1.5-beta.6) `prefix`. It does not, however,
118+ // throw for an unknown `exclusiveStartKey`, nor does it know about untracked value files on disk
119+ // ("bare files"). We layer those two pieces of historical behavior on top here.
120+ const bareFiles = await this . listBareFiles ( ) ;
121+
122+ // Fast path: with no untracked files on disk we can push prefix/exclusiveStartKey/limit all the
123+ // way down to the native iterator and stream the (already paginated, already filtered) result
124+ // straight through — no need to pull every key into memory just to slice it. We only preflight
125+ // the `exclusiveStartKey` so a missing one still throws, matching the historical contract.
126+ if ( bareFiles . length === 0 ) {
127+ if ( exclusiveStartKey !== undefined && ! ( await this . nativeClient . recordExists ( exclusiveStartKey ) ) ) {
128+ throw new Error (
129+ `exclusiveStartKey "${ exclusiveStartKey } " was not found in the key-value store. ` +
130+ `This is likely a bug — the key may have been deleted between paginated listKeys calls.` ,
131+ ) ;
132+ }
133+
134+ const items : storage . KeyValueStoreItemData [ ] = [ ] ;
135+ const iterator = await this . nativeClient . iterateKeys ( exclusiveStartKey , limit , undefined , prefix ) ;
136+ for await ( const record of iterator ) {
137+ items . push ( { key : record . key , size : record . size ?? 0 } ) ;
138+ }
139+ return items ;
140+ }
141+
142+ // Merge path (untracked value files present on disk): bare files must be interleaved with the
143+ // native keys in lexical order, so we have to materialize and sort. Push `prefix` down to the
144+ // native side anyway to keep its contribution bounded; native keys take precedence on collisions.
121145 const itemsByKey = new Map < string , storage . KeyValueStoreItemData > ( ) ;
122146
123- for ( const bareFile of await this . listBareFiles ( ) ) {
147+ for ( const bareFile of bareFiles ) {
148+ if ( prefix && ! bareFile . key . startsWith ( prefix ) ) {
149+ continue ;
150+ }
124151 const size = await readFile ( bareFile . filePath )
125152 . then ( ( buffer ) => buffer . byteLength )
126153 . catch ( ( ) => 0 ) ;
127154 itemsByKey . set ( bareFile . key , { key : bareFile . key , size } ) ;
128155 }
129156
130- const iterator = await this . nativeClient . iterateKeys ( ) ;
157+ const iterator = await this . nativeClient . iterateKeys ( undefined , undefined , undefined , prefix ) ;
131158 for await ( const record of iterator ) {
132159 itemsByKey . set ( record . key , { key : record . key , size : record . size ?? 0 } ) ;
133160 }
134161
135162 // Emulate the API: keys are returned in lexical order.
136- const items = [ ...itemsByKey . values ( ) ] . sort ( ( a , b ) => a . key . localeCompare ( b . key ) ) ;
137-
138- let filteredItems = items . filter ( ( item ) => ! prefix || item . key . startsWith ( prefix ) ) ;
163+ let filteredItems = [ ...itemsByKey . values ( ) ] . sort ( ( a , b ) => a . key . localeCompare ( b . key ) ) ;
139164
140165 if ( exclusiveStartKey ) {
141166 const keyPos = filteredItems . findIndex ( ( item ) => item . key === exclusiveStartKey ) ;
0 commit comments