-
-
Notifications
You must be signed in to change notification settings - Fork 60
(feat) add RE2::Set bindings #231
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Conversation
Ran a small bench across different implementations. This tends to perform best in situations where your patterns do not match. |
Could you share code of your benchmark? I plan to add a benchmarking utility. |
'use strict';
const RE2 = require('./re2');
function makePatterns(n) {
const arr = [];
for (let i = 0; i < n; ++i) arr.push('token' + i + '(?:[a-z]+)?');
return arr;
}
function makeInputs(patternCount, count, withHits) {
const arr = [];
for (let j = 0; j < count; ++j) {
if (withHits) {
arr.push('xx' + (j % patternCount) + ' ' + (j & 7) + ' token' + (j % patternCount) + ' tail');
} else {
arr.push('xx' + (j % patternCount) + ' ' + (j & 7) + ' tok' + (j % patternCount) + ' tail');
}
}
return arr;
}
function makeLongAllHits(patternCount) {
const parts = [];
for (let i = 0; i < patternCount; ++i) parts.push('some prefix ' + i + ' token' + i + ' suffix ' + (i & 7));
return parts.join(' | ');
}
function makeLongNoHits(patternCount) {
const parts = [];
for (let i = 0; i < patternCount; ++i) parts.push('item' + i + ' nohit ' + (i & 7));
return parts.join(' | ').repeat(2);
}
function measure(fn) {
const start = process.hrtime.bigint();
const result = fn();
const ms = Number(process.hrtime.bigint() - start) / 1e6;
return { timeMs: ms, result };
}
const configs = [
{ name: 'multi-small-50-hits', patterns: 50, inputs: makeInputs(50, 4000, true), mode: 'multi', desc: '4k small strings, many hits' },
{ name: 'multi-small-50-nohits', patterns: 50, inputs: makeInputs(50, 4000, false), mode: 'multi', desc: '4k small strings, no hits' },
{ name: 'multi-small-200-hits', patterns: 200, inputs: makeInputs(200, 4000, true), mode: 'multi', desc: '4k small strings, many hits' },
{ name: 'multi-small-200-nohits', patterns: 200, inputs: makeInputs(200, 4000, false), mode: 'multi', desc: '4k small strings, no hits' },
{ name: 'single-long-50-hits', patterns: 50, inputs: [makeLongAllHits(50)], mode: 'single', desc: '~3 KB string, all tokens present' },
{ name: 'single-long-50-nohits', patterns: 50, inputs: [makeLongNoHits(50)], mode: 'single', desc: '~3 KB string, no tokens' },
{ name: 'single-long-200-hits', patterns: 200, inputs: [makeLongAllHits(200)], mode: 'single', desc: '~7 KB string, all tokens present' },
{ name: 'single-long-200-nohits', patterns: 200, inputs: [makeLongNoHits(200)], mode: 'single', desc: '~8 KB string, no tokens' },
];
const results = [];
for (const cfg of configs) {
const patterns = makePatterns(cfg.patterns);
const set = new RE2.Set(patterns);
const re2List = patterns.map((p) => new RE2(p));
const jsList = patterns.map((p) => new RegExp(p));
const avgLen = cfg.inputs.reduce((n, s) => n + s.length, 0) / cfg.inputs.length;
if (cfg.mode === 'multi') {
const setRes = measure(() => {
let m = 0;
for (const s of cfg.inputs) m += set.test(s) ? 1 : 0;
return m;
});
const re2Res = measure(() => {
let m = 0;
for (const s of cfg.inputs) {
for (const re of re2List) { if (re.test(s)) { ++m; break; } }
}
return m;
});
const jsRes = measure(() => {
let m = 0;
for (const s of cfg.inputs) {
for (const re of jsList) { if (re.test(s)) { ++m; break; } }
}
return m;
});
results.push({ ...cfg, avgLen, inputsCount: cfg.inputs.length, set: setRes.timeMs, re2: re2Res.timeMs, js: jsRes.timeMs, matches: setRes.result });
} else {
const setRes = measure(() => set.match(cfg.inputs[0]).length);
const re2Res = measure(() => re2List.reduce((n, re) => n + (re.test(cfg.inputs[0]) ? 1 : 0), 0));
const jsRes = measure(() => jsList.reduce((n, re) => n + (re.test(cfg.inputs[0]) ? 1 : 0), 0));
results.push({ ...cfg, avgLen, inputsCount: cfg.inputs.length, set: setRes.timeMs, re2: re2Res.timeMs, js: jsRes.timeMs, matches: setRes.result });
}
}
console.table(results.map(r => ({
scenario: r.name,
patterns: r.patterns,
inputs: r.inputsCount,
avgInputLen: Math.round(r.avgLen),
matches: r.matches,
setMs: r.set.toFixed(3),
re2Ms: r.re2.toFixed(3),
jsMs: r.js.toFixed(3),
desc: r.desc,
})));Let me know if you hit any issues. |
RE2.Setbinding (iterable patterns, flags/anchor parsing, match/test/toString plus flags/sources/source/size/anchor props) and exported it from the addon.Closes #43