|
| 1 | +import { rm } from 'node:fs/promises'; |
| 2 | +import { resolve } from 'node:path'; |
| 3 | + |
| 4 | +import { FileSystemStorageClient } from '@crawlee/fs-storage'; |
| 5 | + |
| 6 | +// `assumeSoleOwner` controls how the native `@crawlee/fs-storage-native` extension treats requests |
| 7 | +// left *in progress* by a previous run (a dangling `orderNo` lock on disk) when a queue is reopened. |
| 8 | +// The reclaim/respect-peer-lock semantics are owned by the native extension; these tests verify the |
| 9 | +// adapter's contract on top of it: the option defaults to `true`, is honored when set, and that the |
| 10 | +// resulting behavior reaches all the way down to the native queue. |
| 11 | +describe('FileSystemStorageClient assumeSoleOwner', () => { |
| 12 | + const tmpLocation = resolve(import.meta.dirname, './tmp/assume-sole-owner'); |
| 13 | + |
| 14 | + afterEach(async () => { |
| 15 | + await rm(tmpLocation, { force: true, recursive: true }); |
| 16 | + }); |
| 17 | + |
| 18 | + test('defaults to true', () => { |
| 19 | + const storage = new FileSystemStorageClient({ localDataDirectory: tmpLocation }); |
| 20 | + expect(storage.assumeSoleOwner).toBe(true); |
| 21 | + }); |
| 22 | + |
| 23 | + test('respects an explicit false', () => { |
| 24 | + const storage = new FileSystemStorageClient({ localDataDirectory: tmpLocation, assumeSoleOwner: false }); |
| 25 | + expect(storage.assumeSoleOwner).toBe(false); |
| 26 | + }); |
| 27 | + |
| 28 | + // Seed a queue with two requests, fetch (lock) one without handling it or tearing down — leaving a |
| 29 | + // dangling in-progress lock on disk, exactly the "process died mid-flight" situation. |
| 30 | + async function seedQueueWithDanglingLock(dir: string) { |
| 31 | + const storage = new FileSystemStorageClient({ localDataDirectory: dir }); |
| 32 | + const queue = await storage.createRequestQueueClient({ name: 'default' }); |
| 33 | + await queue.addBatchOfRequests([ |
| 34 | + { url: 'http://example.com/1', uniqueKey: '1' }, |
| 35 | + { url: 'http://example.com/2', uniqueKey: '2' }, |
| 36 | + ]); |
| 37 | + const locked = await queue.fetchNextRequest(); |
| 38 | + expect(locked).not.toBeNull(); |
| 39 | + // Intentionally NO markRequestAsHandled and NO teardown/persistState — the lock is left dangling. |
| 40 | + return locked!; |
| 41 | + } |
| 42 | + |
| 43 | + test('true (default): reopening preserves contents but relinquishes the dangling lock', async () => { |
| 44 | + const dir = resolve(tmpLocation, 'sole-owner-true'); |
| 45 | + const locked = await seedQueueWithDanglingLock(dir); |
| 46 | + |
| 47 | + // Reopen the same directory as sole owner, without purging. |
| 48 | + const reopened = new FileSystemStorageClient({ localDataDirectory: dir, assumeSoleOwner: true }); |
| 49 | + const queue = await reopened.createRequestQueueClient({ name: 'default' }); |
| 50 | + |
| 51 | + // Contents preserved: both requests still present, none handled. |
| 52 | + const metadata = await queue.getMetadata(); |
| 53 | + expect(metadata.totalRequestCount).toBe(2); |
| 54 | + expect(metadata.handledRequestCount).toBe(0); |
| 55 | + expect(metadata.pendingRequestCount).toBe(2); |
| 56 | + |
| 57 | + // Lock relinquished: BOTH requests are fetchable again, including the one locked before. |
| 58 | + const a = await queue.fetchNextRequest(); |
| 59 | + const b = await queue.fetchNextRequest(); |
| 60 | + expect([a?.uniqueKey, b?.uniqueKey].sort()).toStrictEqual(['1', '2']); |
| 61 | + // The previously-locked request survived with its data intact. |
| 62 | + const reFetched = await queue.getRequest(locked.uniqueKey); |
| 63 | + expect(reFetched?.url).toBe(locked.url); |
| 64 | + }); |
| 65 | + |
| 66 | + test('false: reopening keeps the dangling lock (concurrency-safe mode)', async () => { |
| 67 | + const dir = resolve(tmpLocation, 'sole-owner-false'); |
| 68 | + await seedQueueWithDanglingLock(dir); |
| 69 | + |
| 70 | + // Reopen in concurrency-safe mode: an in-progress request is treated as a potential live peer's |
| 71 | + // lock and is NOT reclaimed until it expires. |
| 72 | + const reopened = new FileSystemStorageClient({ localDataDirectory: dir, assumeSoleOwner: false }); |
| 73 | + const queue = await reopened.createRequestQueueClient({ name: 'default' }); |
| 74 | + |
| 75 | + // Contents are still preserved... |
| 76 | + const metadata = await queue.getMetadata(); |
| 77 | + expect(metadata.totalRequestCount).toBe(2); |
| 78 | + expect(metadata.pendingRequestCount).toBe(2); |
| 79 | + |
| 80 | + // ...but only the un-locked request is handed out; the locked one stays in progress. |
| 81 | + const a = await queue.fetchNextRequest(); |
| 82 | + expect(a?.uniqueKey).toBe('2'); |
| 83 | + expect(await queue.fetchNextRequest()).toBeNull(); |
| 84 | + }); |
| 85 | +}); |
0 commit comments