Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
248 changes: 211 additions & 37 deletions src/datastore/file/EncryptedFileStore.ts
Original file line number Diff line number Diff line change
@@ -1,45 +1,79 @@
import { existsSync, readFileSync, renameSync, statSync, writeFileSync } from 'fs'; // eslint-disable-line no-restricted-imports -- files being checked
import { rename, writeFile } from 'fs/promises';
import { join } from 'path';
import {
closeSync,
existsSync,
fsyncSync,
openSync,
readdirSync,
readFileSync, // eslint-disable-line no-restricted-imports -- atomic file primitives required here
renameSync,
statSync,
unlinkSync,
writeFileSync,
} from 'fs';
import { open, rename, unlink, writeFile } from 'fs/promises';
import { basename, join } from 'path';
import { Logger } from 'pino';
import { lock, LockOptions, lockSync } from 'proper-lockfile';
import { LoggerFactory } from '../../telemetry/LoggerFactory';
import { ScopedTelemetry } from '../../telemetry/ScopedTelemetry';
import { TelemetryService } from '../../telemetry/TelemetryService';
import { processId } from '../../utils/Environment';
import { sleep } from '../../utils/Retry';
import { DataStore } from '../DataStore';
import { decrypt, encrypt } from './Encryption';

const LOCK_OPTIONS_SYNC: LockOptions = { stale: 10_000 };
const LOCK_OPTIONS: LockOptions = { ...LOCK_OPTIONS_SYNC, retries: { retries: 20, minTimeout: 50, maxTimeout: 1000 } };
const STALE_MS = 30_000;
const LOCK_OPTIONS_SYNC: LockOptions = { stale: STALE_MS, realpath: false };
const LOCK_OPTIONS: LockOptions = {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: cleanupStaleTmpFiles() runs before the lock is acquired. Since tmp files are named {name}.enc.{pid}.{counter}.tmp and the cleanup matches any PID prefix for this store name, there's a small race with other language server processes (e.g. multiple VS Code windows sharing the same storageDir) — process A could delete a .tmp file that process B is about to rename.

Moving this call inside the lock block (after lockSyncWithRetry, before the existsSync check) would close that window.

...LOCK_OPTIONS_SYNC,
retries: { retries: 100, minTimeout: 25, maxTimeout: 200, factor: 1.5, randomize: true },
};

const RENAME_MAX_RETRIES = 10;
const RENAME_RETRY_DELAY_MS = 50;
const RETRIABLE_RENAME_CODES = new Set(['EPERM', 'EACCES', 'EBUSY', 'ENOENT']);

const SYNC_LOCK_MAX_ATTEMPTS = 200;
const SYNC_LOCK_RETRY_DELAY_MS = 50;

export class EncryptedFileStore implements DataStore {
private readonly log: Logger;
private readonly file: string;
private readonly dir: string;
private readonly lockfilePath: string;
private content: Record<string, unknown> = {};
private readonly telemetry: ScopedTelemetry;
private writeQueue: Promise<void> = Promise.resolve();
private tempFileCounter = 0;

constructor(
private readonly KEY: Buffer,
private readonly encryptionKey: Buffer,
name: string,
fileDbDir: string,
) {
this.log = LoggerFactory.getLogger(`FileStore.${name}`);
this.file = join(fileDbDir, `${name}.enc`);
this.dir = fileDbDir;
this.lockfilePath = join(fileDbDir, `${name}.enc.lock`);
this.telemetry = TelemetryService.instance.get(`FileStore.${name}`);

if (existsSync(this.file)) {
const release = lockSync(this.file, LOCK_OPTIONS_SYNC);
try {
this.content = this.readFile();
} catch (error) {
this.log.error(error, 'Failed to decrypt file store, recreating store');
this.telemetry.count('filestore.recreate', 1);
this.cleanupStaleTmpFiles();

const release = lockSyncWithRetry(fileDbDir, { ...LOCK_OPTIONS_SYNC, lockfilePath: this.lockfilePath });
try {
if (existsSync(this.file)) {
try {
this.content = this.readFile();
} catch (error) {
this.log.error(error, 'Failed to decrypt file store, recreating store');
this.telemetry.count('filestore.recreate', 1);
this.saveSync();
}
} else {
this.saveSync();
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lockSyncWithRetry does up to 200 × 50ms of Atomics.wait, which hard-blocks the Node.js event loop for up to 10 seconds. This runs in the constructor during LSP initialization, so the editor gets no responses while it's spinning.

The main scenario: process A crashes while holding the lock. The stale timeout is 30s, so process B retries synchronously until proper-lockfile considers the lock stale. If that takes longer than the 10s retry budget, the server fails to start.

This is an improvement over the previous no-retry lockSync (which would just fail immediately), but worth noting the tradeoff. Could the constructor fail fast and defer store initialization to an async path to avoid blocking?

} finally {
release();
}
} else {
this.saveSync();
} finally {
release();
}
}

Expand Down Expand Up @@ -90,35 +124,175 @@ export class EncryptedFileStore implements DataStore {
}

private async withLock<T>(operation: string, fn: () => Promise<T>): Promise<T> {
return await this.telemetry.measureAsync(
operation,
async () => {
const release = await lock(this.file, LOCK_OPTIONS);
try {
this.content = this.readFile();
return await fn();
} finally {
await release();
}
},
{ captureErrorAttributes: true },
);
const previous = this.writeQueue;
let releaseNext!: () => void;
this.writeQueue = new Promise<void>((resolve) => {
releaseNext = resolve;
});
try {
await previous;
return await this.telemetry.measureAsync(
operation,
async () => {
const release = await lock(this.dir, { ...LOCK_OPTIONS, lockfilePath: this.lockfilePath });
try {
this.content = existsSync(this.file) ? this.readFile() : {};
return await fn();
} finally {
await release();
}
},
{ captureErrorAttributes: true },
);
} finally {
releaseNext();
}
}

private readFile(): Record<string, unknown> {
return JSON.parse(decrypt(this.KEY, readFileSync(this.file))) as Record<string, unknown>;
return JSON.parse(decrypt(this.encryptionKey, readFileSync(this.file))) as Record<string, unknown>;
}

private saveSync() {
const tmp = `${this.file}.${process.pid}.tmp`;
writeFileSync(tmp, encrypt(this.KEY, JSON.stringify(this.content)));
renameSync(tmp, this.file);
const tmp = this.tmpPath();
writeFileSync(tmp, encrypt(this.encryptionKey, JSON.stringify(this.content)));
fsyncFileSync(tmp);
renameSyncWithRetry(tmp, this.file);
fsyncDirSync(this.dir);
}

private async save() {
const tmp = `${this.file}.${process.pid}.tmp`;
await writeFile(tmp, encrypt(this.KEY, JSON.stringify(this.content)));
await rename(tmp, this.file);
const tmp = this.tmpPath();
await writeFile(tmp, encrypt(this.encryptionKey, JSON.stringify(this.content)));
await fsyncFile(tmp);
await renameWithRetry(tmp, this.file);
await fsyncDir(this.dir);
}

private tmpPath(): string {
this.tempFileCounter = (this.tempFileCounter + 1) % Number.MAX_SAFE_INTEGER;
return `${this.file}.${processId()}.${this.tempFileCounter}.tmp`;
}

private cleanupStaleTmpFiles(): void {
const prefix = `${basename(this.file)}.`;
try {
for (const entry of readdirSync(this.dir)) {
if (entry.startsWith(prefix) && entry.endsWith('.tmp')) {
try {
unlinkSync(join(this.dir, entry));
} catch {
// ignore - another process may own it
}
}
}
} catch {
// ignore - dir may not exist yet
}
}
}

function fsyncFileSync(filePath: string): void {
const fd = openSync(filePath, 'r+');
try {
fsyncSync(fd);
} finally {
closeSync(fd);
}
}

function fsyncDirSync(dirPath: string): void {
try {
const fd = openSync(dirPath, 'r');
try {
fsyncSync(fd);
} finally {
closeSync(fd);
}
} catch {
// Windows cannot fsync directories - writes are still durable via NTFS journaling
}
}

async function fsyncFile(filePath: string): Promise<void> {
const handle = await open(filePath, 'r+');
try {
await handle.sync();
} finally {
await handle.close();
}
}

async function fsyncDir(dirPath: string): Promise<void> {
try {
const handle = await open(dirPath, 'r');
try {
await handle.sync();
} finally {
await handle.close();
}
} catch {
// Windows cannot fsync directories
}
}

function renameSyncWithRetry(sourcePath: string, destinationPath: string): void {
for (let attempt = 0; attempt < RENAME_MAX_RETRIES; attempt++) {
try {
renameSync(sourcePath, destinationPath);
return;
} catch (error) {
const code = (error as NodeJS.ErrnoException).code;
if (!code || !RETRIABLE_RENAME_CODES.has(code) || attempt === RENAME_MAX_RETRIES - 1) {
try {
unlinkSync(sourcePath);
} catch {
// best-effort tmp cleanup
}
throw error;
}
sleepSyncMs(RENAME_RETRY_DELAY_MS);
}
}
}

function lockSyncWithRetry(target: string, options: LockOptions): () => void {
let lastError: unknown;
for (let attempt = 0; attempt < SYNC_LOCK_MAX_ATTEMPTS; attempt++) {
try {
return lockSync(target, options);
} catch (error) {
lastError = error;
if ((error as NodeJS.ErrnoException).code !== 'ELOCKED') {
throw error;
}
sleepSyncMs(SYNC_LOCK_RETRY_DELAY_MS);
}
}
throw lastError;
}

function sleepSyncMs(ms: number): void {
Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
}

async function renameWithRetry(sourcePath: string, destinationPath: string): Promise<void> {
for (let attempt = 0; attempt < RENAME_MAX_RETRIES; attempt++) {
try {
await rename(sourcePath, destinationPath);
return;
} catch (error) {
const code = (error as NodeJS.ErrnoException).code;
if (!code || !RETRIABLE_RENAME_CODES.has(code) || attempt === RENAME_MAX_RETRIES - 1) {
try {
await unlink(sourcePath);
} catch {
// best-effort tmp cleanup
}
throw error;
}
await sleep(RENAME_RETRY_DELAY_MS);
}
}
}

Expand Down
4 changes: 4 additions & 0 deletions src/utils/Environment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,7 @@ export const isLinux = process.platform === 'linux';
export const ProcessType = `${process.platform}${process.env.BUILD_TARGET ? `-${process.env.BUILD_TARGET}` : ''}-${process.arch}`;
export const ServiceEnv = `${getNodeEnv()}-${getAwsEnv()}`;
export const Service = `${ExtensionId}-${ExtensionVersion}`;

export function processId() {
return process.pid;
}
2 changes: 1 addition & 1 deletion src/utils/Retry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ export type RetryOptions = {
totalTimeoutMs: number;
};

function sleep(ms: number): Promise<void> {
export function sleep(ms: number): Promise<void> {
return new Promise<void>((resolve) => {
setTimeout(resolve, ms);
});
Expand Down
58 changes: 57 additions & 1 deletion tst/unit/datastore/FileStore.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { execFile } from 'child_process';
import { randomUUID as v4 } from 'crypto';
import { rmSync, mkdirSync, writeFileSync, existsSync, readdirSync, readFileSync } from 'fs';
import { rmSync, mkdirSync, writeFileSync, existsSync, readdirSync, readFileSync, unlinkSync } from 'fs';
import { join } from 'path';
import { promisify } from 'util';
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
Expand Down Expand Up @@ -331,6 +331,62 @@ describe('FileStore', () => {
});
});

describe('stale tmp cleanup', () => {
it('should remove leftover tmp files from prior crashed processes on construction', () => {
const encTestDir = join(testDir, 'stale-tmp-test');
mkdirSync(encTestDir, { recursive: true });
const key = encryptionKey(2);

// Simulate leftover tmp files from a crashed process
writeFileSync(join(encTestDir, 'test.enc.9999.1.tmp'), 'stale1');
writeFileSync(join(encTestDir, 'test.enc.8888.2.tmp'), 'stale2');
// Unrelated file should survive
writeFileSync(join(encTestDir, 'other.txt'), 'keep');

new EncryptedFileStore(key, 'test', encTestDir);

const files = readdirSync(encTestDir);
expect(files.filter((f) => f.endsWith('.tmp'))).toHaveLength(0);
expect(files).toContain('other.txt');
});

it('should not remove tmp files belonging to a different store name', () => {
const encTestDir = join(testDir, 'stale-tmp-isolation');
mkdirSync(encTestDir, { recursive: true });
const key = encryptionKey(2);

// tmp file for a different store name
writeFileSync(join(encTestDir, 'other.enc.9999.1.tmp'), 'other-store');

new EncryptedFileStore(key, 'test', encTestDir);

const files = readdirSync(encTestDir);
expect(files).toContain('other.enc.9999.1.tmp');
});
});

describe('withLock file deletion resilience', () => {
it('should handle file being deleted between writes', async () => {
const encTestDir = join(testDir, 'deleted-file-test');
mkdirSync(encTestDir, { recursive: true });
const key = encryptionKey(2);

const store = new EncryptedFileStore(key, 'test', encTestDir);
await store.put('key1', 'value1');

// Simulate another process deleting the file
unlinkSync(join(encTestDir, 'test.enc'));

// Next write should recreate the file (withLock checks existsSync)
await store.put('key2', 'value2');

// Only key2 should exist — key1 was lost with the deleted file
const store2 = new EncryptedFileStore(key, 'test', encTestDir);
expect(store2.get('key1')).toBeUndefined();
expect(store2.get('key2')).toBe('value2');
});
});

describe('concurrent operations', () => {
it('should serialize concurrent puts and preserve all data', async () => {
const promises = Array.from({ length: 10 }, (_, i) => fileStore.put(`key${i}`, `value${i}`));
Expand Down