Skip to content

perf: Optimize Header Validation #3994

Open
@PandaWorker

Description

The current implementation of header validation in the codebase uses Uint8Array to store character validity maps for HTTP tokens, URIs, and header values. While this approach works, it can be further optimized using bitmasking to reduce memory usage and improve performance.

function isValidHTTPToken (characters) {

const TOKEN_MAP = new Uint8Array([
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
	0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
]);

const URI_MAP = new Uint8Array([
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
]);

const HEADER_VALUE_MAP = new Uint8Array([
	0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
]);


export function isHTTPToken(c: number) {
	return c < 256 && TOKEN_MAP[c] === 1;
}

export function isHeaderValueToken(c: number) {
	return c < 256 && HEADER_VALUE_MAP[c] === 1;
}

export function isURIToken(c: number) {
	return c < 256 && URI_MAP[c] === 1;
}

export function isValidHeaderName(name: string) {
	for (let i = 0, len = name.length; i < len; i++) {
		if (!isHTTPToken(name.charCodeAt(i))) return false;
	}
	return true;
}

export function isValidHeaderValue(value: string) {
	for (let i = 0, len = value.length; i < len; i++) {
		if (!isHeaderValueToken(value.charCodeAt(i))) return false;
	}
	return true;
}

// undici 
const headerCharRegex = /[^\t\x20-\x7e\x80-\xff]/;

export function isValidHeaderValue2(characters: string): boolean {
	return !headerCharRegex.test(characters);
}

export function isTokenCharCode(c: number) {
	switch (c) {
		case 0x22:
		case 0x28:
		case 0x29:
		case 0x2c:
		case 0x2f:
		case 0x3a:
		case 0x3b:
		case 0x3c:
		case 0x3d:
		case 0x3e:
		case 0x3f:
		case 0x40:
		case 0x5b:
		case 0x5c:
		case 0x5d:
		case 0x7b:
		case 0x7d:
			// DQUOTE and "(),/:;<=>?@[\]{}"
			return false;
		default:
			// VCHAR %x21-7E
			return c >= 0x21 && c <= 0x7e;
	}
}

export function isValidHTTPToken(characters: string) {
	for (let i = 0; i < characters.length; ++i) {
		if (!isTokenCharCode(characters.charCodeAt(i))) {
			return false;
		}
	}
	return true;
}

Benchmarks

import * as utils from './constants.ts';

async function bench() {
	const { bench, run, summary } = await import('mitata');

	summary(() => {
		bench('isValidHeaderValue one char', () => {
			for (let index = 0; index < 512; index++) {
				utils.isValidHeaderValue(String.fromCharCode(index));
			}
		});

		bench('undici.isValidHeaderValue one char', () => {
			for (let index = 0; index < 512; index++) {
				utils.isValidHeaderValue2(String.fromCharCode(index));
			}
		});
	});

	summary(() => {
		bench('isValidHeaderValue', () => {
			for (let index = 0; index < 512; index++) {
				utils.isValidHeaderValue('accessToken=HFVTGWBNJNMDJNDJNDHBDHJDD123');
			}
		});

		bench('undici.isValidHeaderValue (re)', () => {
			for (let index = 0; index < 512; index++) {
				utils.isValidHeaderValue2('accessToken=HFVTGWBNJNMDJNDJNDHBDHJDD123');
			}
		});
	});


	summary(() => {
		bench('isHTTPToken', () => {
			for (let index = 0; index < 512; index++) {
				utils.isHTTPToken(index);
			}
		});

		bench('undici.isTokenCharCode', () => {
			for (let index = 0; index < 512; index++) {
				utils.isTokenCharCode(index);
			}
		});
	});

	summary(() => {
		bench('isValidHeaderName', () => {
			for (let index = 0; index < 512; index++) {
				utils.isValidHeaderName('X-Authorization-User-Id');
			}
		});

		bench('undici.isValidHTTPToken', () => {
			for (let index = 0; index < 512; index++) {
				utils.isValidHTTPToken('X-Authorization-User-Id');
			}
		});
	});

	summary(() => {
		bench('isValidHeaderName invalid', () => {
			for (let index = 0; index < 512; index++) {
				utils.isValidHeaderName('X-Authorization-User\n-Id');
			}
		});

		bench('undici.isValidHTTPToken invalid', () => {
			for (let index = 0; index < 512; index++) {
				utils.isValidHTTPToken('X-Authorization-User\n-Id');
			}
		});
	});

	for (let index = 0; index < 256; index++) {
		const char = String.fromCharCode(index);
		const [a, b] = [utils.isValidHeaderName(char), utils.isValidHTTPToken(char)];
		const [c, d] = [utils.isValidHeaderValue(char), utils.isValidHeaderValue2(char)];

		if ((a !== b) || (c !== d)) {
			console.warn({ index, char, a, b, c, d});
		}
	}

	run();
}

bench();

Results

clk: ~3.04 GHz
cpu: Apple M1 Max
runtime: node 22.12.0 (arm64-darwin)

benchmark                   avg (min … max) p75   p99    (min … top 1%)
------------------------------------------- -------------------------------
isValidHeaderValue one char    1.34 µs/iter   1.38 µs    ▄        █  ▂     
                        (1.26 µs … 1.48 µs)   1.43 µs ▅█▅█▅▅▂▁▁▂▁▂█▅██▅▂▁▂▂
undici.isValidHeaderValue ..   7.40 µs/iter   7.33 µs ▂█▂                  
                        (7.26 µs … 8.19 µs)   7.82 µs ███▇▁▄▁▁▁▁▁▁▁▄▁▁▁▁▁▁▄

summary
  isValidHeaderValue one char
   5.53x faster than undici.isValidHeaderValue one char

------------------------------------------- -------------------------------
isValidHeaderValue            19.84 µs/iter  20.16 µs  █                 █ 
                      (19.41 µs … 20.27 µs)  20.20 µs ▆█▁▆▁▁▁▁▁▁▁▁▆▆▁▁▁▁▁█▆
undici.isValidHeaderValue ..  20.48 µs/iter  20.44 µs  █                   
                      (20.36 µs … 21.11 µs)  20.70 µs ▅██▅▁▅▁▁▅▁▁▁▁▁▁▁▁▁▁▁▅

summary
  isValidHeaderValue
   1.03x faster than undici.isValidHeaderValue (re)

------------------------------------------- -------------------------------
isHTTPToken                  350.50 ns/iter 373.16 ns                    █ 
                    (182.86 ns … 389.65 ns) 381.80 ns ▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▂
undici.isTokenCharCode       732.03 ns/iter 747.38 ns              █       
                    (676.33 ns … 796.38 ns) 786.35 ns ▅▂▁▁▁▁▁▁▁▁▁▁▁█▂▁▁▁▁▁▁

summary
  isHTTPToken
   2.09x faster than undici.isTokenCharCode

------------------------------------------- -------------------------------
isValidHeaderName             13.53 µs/iter  14.96 µs ▇             █      
                      (11.21 µs … 68.92 µs)  16.46 µs █▂▂▁▁▁▁▁▁▁▁▁▁▁█▂▃▁▂▁▁
undici.isValidHTTPToken       22.95 µs/iter  23.04 µs █ ▄                  
                      (21.96 µs … 83.75 µs)  31.96 µs █▂█▃▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

summary
  isValidHeaderName
   1.7x faster than undici.isValidHTTPToken

------------------------------------------- -------------------------------
isValidHeaderName invalid     10.34 µs/iter  10.41 µs █ █                  
                      (10.18 µs … 10.53 µs)  10.51 µs █▁█▁▁▁██▁▁▁▁███▁▁▁█▁█
undici.isValidHTTPToken in..  20.10 µs/iter  20.36 µs  ▃                  █
                      (19.62 µs … 20.82 µs)  20.37 µs ▆█▁▁▆▁▁▁▁▁▆▁▁▁▆▆▁▁▆▁█

summary
  isValidHeaderName invalid
   1.94x faster than undici.isValidHTTPToken invalid

Activity

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Metadata

Assignees

No one assigned

    Labels

    enhancementNew feature or request

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions