(feat) add RE2::Set bindings #231

wrmedford · 2025-11-23T22:54:49Z

Added native RE2.Set binding (iterable patterns, flags/anchor parsing, match/test/toString plus flags/sources/source/size/anchor props) and exported it from the addon.
Hoisted regex translation/escaping into shared helpers and reused across constructors.
Documented the new API in README, expanded type definitions, and added functional + perf-oriented tests (including TS coverage).

Closes #43

wrmedford · 2025-11-23T23:45:05Z

scenario	patterns	inputs	avgLen	matches	RE2.Set (ms)	RE2 list (ms)	RegExp list (ms)	notes
multi-small-50-hits	50	4000	19	4000	3.711	8.526	1.045	4k small strings, many hits
multi-small-50-nohits	50	4000	17	0	2.116	80.921	6.809	4k small strings, no hits
multi-small-200-hits	200	4000	20	4000	2.500	7.008	0.356	4k small strings, many hits
multi-small-200-nohits	200	4000	18	0	1.632	324.565	19.840	4k small strings, no hits
single-long-50-hits	50	1	1677	50	0.095	0.294	0.046	~3 KB string, all tokens present
single-long-50-nohits	50	1	1674	0	0.012	0.246	0.038	~3 KB string, no tokens
single-long-200-hits	200	1	6977	200	0.134	2.647	0.362	~7 KB string, all tokens present
single-long-200-nohits	200	1	6974	0	0.033	3.906	0.469	~8 KB string, no tokens

Ran a small bench across different implementations. This tends to perform best in situations where your patterns do not match.

uhop · 2025-11-24T00:00:02Z

@wrmedford: Ran a small bench across different implementations.

Could you share code of your benchmark? I plan to add a benchmarking utility.

wrmedford · 2025-11-24T00:04:17Z

@wrmedford: Ran a small bench across different implementations.

Could you share code of your benchmark? I plan to add a benchmarking utility.

'use strict';

const RE2 = require('./re2');

function makePatterns(n) {
  const arr = [];
  for (let i = 0; i < n; ++i) arr.push('token' + i + '(?:[a-z]+)?');
  return arr;
}

function makeInputs(patternCount, count, withHits) {
  const arr = [];
  for (let j = 0; j < count; ++j) {
    if (withHits) {
      arr.push('xx' + (j % patternCount) + ' ' + (j & 7) + ' token' + (j % patternCount) + ' tail');
    } else {
      arr.push('xx' + (j % patternCount) + ' ' + (j & 7) + ' tok' + (j % patternCount) + ' tail');
    }
  }
  return arr;
}

function makeLongAllHits(patternCount) {
  const parts = [];
  for (let i = 0; i < patternCount; ++i) parts.push('some prefix ' + i + ' token' + i + ' suffix ' + (i & 7));
  return parts.join(' | ');
}

function makeLongNoHits(patternCount) {
  const parts = [];
  for (let i = 0; i < patternCount; ++i) parts.push('item' + i + ' nohit ' + (i & 7));
  return parts.join(' | ').repeat(2);
}

function measure(fn) {
  const start = process.hrtime.bigint();
  const result = fn();
  const ms = Number(process.hrtime.bigint() - start) / 1e6;
  return { timeMs: ms, result };
}

const configs = [
  { name: 'multi-small-50-hits', patterns: 50, inputs: makeInputs(50, 4000, true), mode: 'multi', desc: '4k small strings, many hits' },
  { name: 'multi-small-50-nohits', patterns: 50, inputs: makeInputs(50, 4000, false), mode: 'multi', desc: '4k small strings, no hits' },
  { name: 'multi-small-200-hits', patterns: 200, inputs: makeInputs(200, 4000, true), mode: 'multi', desc: '4k small strings, many hits' },
  { name: 'multi-small-200-nohits', patterns: 200, inputs: makeInputs(200, 4000, false), mode: 'multi', desc: '4k small strings, no hits' },
  { name: 'single-long-50-hits', patterns: 50, inputs: [makeLongAllHits(50)], mode: 'single', desc: '~3 KB string, all tokens present' },
  { name: 'single-long-50-nohits', patterns: 50, inputs: [makeLongNoHits(50)], mode: 'single', desc: '~3 KB string, no tokens' },
  { name: 'single-long-200-hits', patterns: 200, inputs: [makeLongAllHits(200)], mode: 'single', desc: '~7 KB string, all tokens present' },
  { name: 'single-long-200-nohits', patterns: 200, inputs: [makeLongNoHits(200)], mode: 'single', desc: '~8 KB string, no tokens' },
];

const results = [];

for (const cfg of configs) {
  const patterns = makePatterns(cfg.patterns);
  const set = new RE2.Set(patterns);
  const re2List = patterns.map((p) => new RE2(p));
  const jsList = patterns.map((p) => new RegExp(p));

  const avgLen = cfg.inputs.reduce((n, s) => n + s.length, 0) / cfg.inputs.length;

  if (cfg.mode === 'multi') {
    const setRes = measure(() => {
      let m = 0;
      for (const s of cfg.inputs) m += set.test(s) ? 1 : 0;
      return m;
    });
    const re2Res = measure(() => {
      let m = 0;
      for (const s of cfg.inputs) {
        for (const re of re2List) { if (re.test(s)) { ++m; break; } }
      }
      return m;
    });
    const jsRes = measure(() => {
      let m = 0;
      for (const s of cfg.inputs) {
        for (const re of jsList) { if (re.test(s)) { ++m; break; } }
      }
      return m;
    });
    results.push({ ...cfg, avgLen, inputsCount: cfg.inputs.length, set: setRes.timeMs, re2: re2Res.timeMs, js: jsRes.timeMs, matches: setRes.result });
  } else {
    const setRes = measure(() => set.match(cfg.inputs[0]).length);
    const re2Res = measure(() => re2List.reduce((n, re) => n + (re.test(cfg.inputs[0]) ? 1 : 0), 0));
    const jsRes = measure(() => jsList.reduce((n, re) => n + (re.test(cfg.inputs[0]) ? 1 : 0), 0));
    results.push({ ...cfg, avgLen, inputsCount: cfg.inputs.length, set: setRes.timeMs, re2: re2Res.timeMs, js: jsRes.timeMs, matches: setRes.result });
  }
}

console.table(results.map(r => ({
  scenario: r.name,
  patterns: r.patterns,
  inputs: r.inputsCount,
  avgInputLen: Math.round(r.avgLen),
  matches: r.matches,
  setMs: r.set.toFixed(3),
  re2Ms: r.re2.toFixed(3),
  jsMs: r.js.toFixed(3),
  desc: r.desc,
})));

Let me know if you hit any issues.

(feat) add RE2::Set bindings

bfed0ad

uhop self-assigned this Nov 23, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

(feat) add RE2::Set bindings #231

(feat) add RE2::Set bindings #231

Uh oh!

wrmedford commented Nov 23, 2025

Uh oh!

wrmedford commented Nov 23, 2025

Uh oh!

uhop commented Nov 24, 2025

Uh oh!

wrmedford commented Nov 24, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

Uh oh!

(feat) add RE2::Set bindings #231

Are you sure you want to change the base?

(feat) add RE2::Set bindings #231

Uh oh!

Conversation

wrmedford commented Nov 23, 2025

Uh oh!

wrmedford commented Nov 23, 2025

Uh oh!

uhop commented Nov 24, 2025

Uh oh!

wrmedford commented Nov 24, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants