Skip to content
Open
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
d79d30b
test(message-parser): add benchmark suite to baseline Peggy performance
Harshit2405-2004 Mar 3, 2026
ffd0278
feat(message-parser): implement BlockSplitter PoC (Layer 1)
Harshit2405-2004 Mar 3, 2026
0c7daa2
test(message-parser): add skip-flag regression tests to document comp…
Harshit2405-2004 Mar 3, 2026
8ae0719
test(message-parser): expand benchmark suite with mixed and pathologi…
Harshit2405-2004 Mar 3, 2026
04c8fcd
refactor(message-parser): fix list segmentation and preserve syntax i…
Harshit2405-2004 Mar 3, 2026
25a81ec
refactor(message-parser): address refined review comments for BlockSp…
Harshit2405-2004 Mar 3, 2026
796b681
Merge branch 'develop' into refactor/message-parser-block-splitter
Harshit2405-2004 Mar 3, 2026
08d1dc4
refactor(message-parser): final verified fixes for BlockSplitter revi…
Harshit2405-2004 Mar 3, 2026
7874fff
refactor(message-parser): address all review comments for BlockSplitt…
Harshit2405-2004 Mar 3, 2026
2f80b11
Update packages/message-parser/benchmarks/parser.bench.ts
Harshit2405-2004 Mar 3, 2026
4bee6db
Merge branch 'develop' into test/message-parser-benchmark-suite
Harshit2405-2004 Mar 3, 2026
1bccb50
refactor(message-parser): handle mixed list ordering and restore regr…
Harshit2405-2004 Mar 5, 2026
3b3722b
chore: consolidate parser PoC
Harshit2405-2004 Mar 5, 2026
a76d0f5
Fix: Address cubic and coderabbit parser observations
Harshit2405-2004 Mar 5, 2026
1ae425c
Update packages/message-parser/tests/skip-flags-regression.spec.ts
Harshit2405-2004 Mar 5, 2026
1923a6a
Update packages/message-parser/tests/skip-flags-regression.spec.ts
Harshit2405-2004 Mar 5, 2026
81745c2
Merge branch 'develop' into feat/message-parser-poc
Harshit2405-2004 Mar 5, 2026
e753deb
Merge branch 'develop' into feat/message-parser-poc
Harshit2405-2004 Mar 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/block-splitter-layer1.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@rocket.chat/message-parser": patch
---

feat(message-parser): implement BlockSplitter PoC (Layer 1)
13 changes: 13 additions & 0 deletions packages/message-parser/benchmarks/parser.bench.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ const categories: BenchCategory[] = [
{ name: 'strike', input: '~~Hello world~~' },
{ name: 'nested', input: '**bold _italic_ and ~~strike~~**' },
{ name: 'deep nesting', input: '**bold _italic ~~strike _deep italic_~~_**' },
{ name: 'bold + italic mixed', input: '*Bold text* and _italic text_ in a message' },
{ name: 'deeply nested', input: '*bold _italic ~~strike~~ italic_ bold*' },
{ name: 'multiple', input: '**bold** normal _italic_ normal ~~strike~~ **more bold** _more italic_' },
],
},
Expand All @@ -83,6 +85,7 @@ const categories: BenchCategory[] = [
{ name: 'triple unicode (BigEmoji)', input: '😀🚀🌈', options: fullOptions },
{ name: 'in text', input: 'Hello :smile: world :heart: test :rocket: done', options: fullOptions },
{ name: 'mixed', input: 'Great job :thumbsup: 🎉 keep going :rocket:', options: fullOptions },
{ name: 'emoji heavy', input: ':smile: :wave: :rocket: :fire: :heart: :100:', options: fullOptions },
],
},
{
Expand All @@ -92,6 +95,7 @@ const categories: BenchCategory[] = [
{ name: 'multiple users', input: '@admin @user1 @moderator' },
{ name: 'channel', input: '#general' },
{ name: 'mixed', input: 'Hey @admin check #general and @user1' },
{ name: 'mentions (suggested)', input: 'Hey @john and @jane, check #general' },
],
},
{
Expand Down Expand Up @@ -141,6 +145,10 @@ const categories: BenchCategory[] = [
name: 'long with formatting',
input: '**bold** _italic_ ~~strike~~ `code` @user #channel :smile: https://example.com '.repeat(10).trim(),
},
{
name: 'unmatched markers (pathological)',
input: '*_~*_~*_~*_~*_~ hello world absolutely no closing markers anywhere at all',
},
],
},
{
Expand All @@ -158,6 +166,11 @@ const categories: BenchCategory[] = [
'**Release Notes v7.0**\n- [x] Fix #12345\n- [ ] Update docs\n\n> Important: check https://docs.rocket.chat\n\ncc @admin @devlead #releases :rocket:',
options: fullOptions,
},
{
name: 'realistic chat message',
input: 'Hello @team, please review the *important* update:\n\n1. Run `yarn build`\n2. Check #deployments\n\n*Thanks!* :rocket:',
options: fullOptions,
},
],
},
{
Expand Down
128 changes: 128 additions & 0 deletions packages/message-parser/src/BlockSplitter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
export enum BlockType {
PARAGRAPH = 'PARAGRAPH',
HEADING = 'HEADING',
CODE = 'CODE',
LIST = 'LIST',
QUOTE = 'QUOTE',
}

export type Block = {
type: BlockType;
content: string;
level?: number;
language?: string;
ordered?: boolean;
incomplete?: boolean;
};

export class BlockSplitter {
public static split(input: string): Block[] {
const lines = input.split(/\r?\n/);
const blocks: Block[] = [];
let currentBlock: Block | null = null;

for (let i = 0; i < lines.length; i++) {
const line = lines[i];

const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
if (headingMatch) {
this.flush(blocks, currentBlock);
currentBlock = {
type: BlockType.HEADING,
content: headingMatch[2],
level: headingMatch[1].length,
};
this.flush(blocks, currentBlock);
currentBlock = null;
continue;
}

if (line.startsWith('```')) {
this.flush(blocks, currentBlock);
const language = line.slice(3).trim();
const codeLines = [];
let closed = false;
i++;
while (i < lines.length && !lines[i].startsWith('```')) {
codeLines.push(lines[i]);
i++;
}
if (i < lines.length) {
closed = true;
}
blocks.push({
type: BlockType.CODE,
content: codeLines.join('\n'),
language,
incomplete: !closed,
});
currentBlock = null;
continue;
}

if (line.trim() === '') {
this.flush(blocks, currentBlock);
currentBlock = null;
continue;
}

const listMatch = line.match(/^(\s*)([-*+]|\d+\.)\s+(.+)$/);
const isIndented = /^\s+/.test(line);

if (listMatch) {
const isOrdered = /^\d+\./.test(listMatch[2]);
if (currentBlock?.type !== BlockType.LIST) {
this.flush(blocks, currentBlock);
currentBlock = {
type: BlockType.LIST,
content: line,
ordered: isOrdered,
};
} else {
if (currentBlock.ordered !== undefined && currentBlock.ordered !== isOrdered) {
currentBlock.ordered = undefined;
}
currentBlock.content += `\n${line}`;
}
continue;
}

if (isIndented && currentBlock?.type === BlockType.LIST) {
currentBlock.content += `\n${line}`;
continue;
}

if (line.startsWith('>')) {
if (currentBlock?.type !== BlockType.QUOTE) {
this.flush(blocks, currentBlock);
currentBlock = {
type: BlockType.QUOTE,
content: line,
};
} else {
currentBlock.content += `\n${line}`;
}
continue;
}

if (currentBlock?.type !== BlockType.PARAGRAPH) {
this.flush(blocks, currentBlock);
currentBlock = {
type: BlockType.PARAGRAPH,
content: line,
};
} else {
currentBlock.content += `\n${line}`;
}
}

this.flush(blocks, currentBlock);
return blocks;
}

private static flush(blocks: Block[], block: Block | null) {
if (block) {
blocks.push(block);
}
}
}
141 changes: 141 additions & 0 deletions packages/message-parser/tests/blockSplitter.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import { BlockSplitter, BlockType } from '../src/BlockSplitter';

describe('BlockSplitter', () => {
it('should split simple paragraphs', () => {
const input = 'Hello\nWorld';
const blocks = BlockSplitter.split(input);
expect(blocks.length).toBe(1);
expect(blocks[0].type).toBe(BlockType.PARAGRAPH);
expect(blocks[0].content).toBe('Hello\nWorld');
});

it('should identify headings', () => {
const input = '# Heading 1\n## Heading 2\nContent';
const blocks = BlockSplitter.split(input);
expect(blocks.length).toBe(3);
expect(blocks[0].type).toBe(BlockType.HEADING);
expect(blocks[0].level).toBe(1);
expect(blocks[1].type).toBe(BlockType.HEADING);
expect(blocks[1].level).toBe(2);
});

it('should identify code blocks', () => {
const input = 'Pre\n```javascript\nconst a = 1;\n```\nPost';
const blocks = BlockSplitter.split(input);
expect(blocks.length).toBe(3);
expect(blocks[1].type).toBe(BlockType.CODE);
expect(blocks[1].language).toBe('javascript');
expect(blocks[1].content).toBe('const a = 1;');
});

it('should handle list splitting and preserve full syntax', () => {
const input = '- item 1\n* item 2\n1. item 3';
const blocks = BlockSplitter.split(input);
expect(blocks.length).toBe(1);
expect(blocks[0].type).toBe(BlockType.LIST);
expect(blocks[0].content).toBe('- item 1\n* item 2\n1. item 3');
});

it('should handle nested lists via indentation', () => {
const input = '- Level 1\n - Level 2\n - Level 3';
const blocks = BlockSplitter.split(input);
expect(blocks.length).toBe(1);
expect(blocks[0].content).toBe('- Level 1\n - Level 2\n - Level 3');
});

it('should allow indented blank lines to continue a list', () => {
const input = '- item 1\n \n- item 2';
const blocks = BlockSplitter.split(input);
expect(blocks.length).toBe(1);
expect(blocks[0].content).toBe('- item 1\n \n- item 2');
});

it('should correctly detect boundaries: list followed by heading', () => {
const input = '- list item\n\n# Heading';
const blocks = BlockSplitter.split(input);
expect(blocks.length).toBe(2);
expect(blocks[0].type).toBe(BlockType.LIST);
expect(blocks[1].type).toBe(BlockType.HEADING);
});

it('should identify blockquotes and preserve markers', () => {
const input = '> quote line 1\n> quote line 2';
const blocks = BlockSplitter.split(input);
expect(blocks.length).toBe(1);
expect(blocks[0].type).toBe(BlockType.QUOTE);
expect(blocks[0].content).toBe('> quote line 1\n> quote line 2');
});

it('should support nested blockquotes', () => {
const input = '> outer\n>> inner';
const blocks = BlockSplitter.split(input);
expect(blocks.length).toBe(1);
expect(blocks[0].type).toBe(BlockType.QUOTE);
expect(blocks[0].content).toBe('> outer\n>> inner');
});

it('should set ordered to undefined for mixed ordered and unordered list items', () => {
const input = '- unordered\n1. ordered';
const blocks = BlockSplitter.split(input);
expect(blocks.length).toBe(1);
expect(blocks[0].type).toBe(BlockType.LIST);
expect(blocks[0].ordered).toBeUndefined();
});

it('should keep ordered=true for fully ordered lists', () => {
const input = '1. first\n2. second';
const blocks = BlockSplitter.split(input);
expect(blocks.length).toBe(1);
expect(blocks[0].type).toBe(BlockType.LIST);
expect(blocks[0].ordered).toBe(true);
});

it('should keep ordered=false for fully unordered lists', () => {
const input = '- first\n* second';
const blocks = BlockSplitter.split(input);
expect(blocks.length).toBe(1);
expect(blocks[0].type).toBe(BlockType.LIST);
expect(blocks[0].ordered).toBe(false);
});

it('should create a new paragraph block after a list block', () => {
const input = '- list item\n\nParagraph text';
const blocks = BlockSplitter.split(input);
expect(blocks.length).toBe(2);
expect(blocks[0].type).toBe(BlockType.LIST);
expect(blocks[1].type).toBe(BlockType.PARAGRAPH);
expect(blocks[1].content).toBe('Paragraph text');
});

it('should create a new paragraph block after a quote block', () => {
const input = '> blockquote\n\nParagraph text';
const blocks = BlockSplitter.split(input);
expect(blocks.length).toBe(2);
expect(blocks[0].type).toBe(BlockType.QUOTE);
expect(blocks[1].type).toBe(BlockType.PARAGRAPH);
expect(blocks[1].content).toBe('Paragraph text');
});

it('should handle empty input correctly', () => {
const input = '';
const blocks = BlockSplitter.split(input);
expect(blocks.length).toBe(0);
});

it('should yield a CODE block with incomplete flag for an unclosed code fence', () => {
const input = '```js\ncode';
const blocks = BlockSplitter.split(input);
expect(blocks.length).toBe(1);
expect(blocks[0].type).toBe(BlockType.CODE);
expect(blocks[0].content).toBe('code');
expect(blocks[0].incomplete).toBe(true);
});

it('should treat a heading without a space as a paragraph', () => {
const input = '#NoSpace';
const blocks = BlockSplitter.split(input);
expect(blocks.length).toBe(1);
expect(blocks[0].type).toBe(BlockType.PARAGRAPH);
expect(blocks[0].content).toBe('#NoSpace');
});
});
32 changes: 32 additions & 0 deletions packages/message-parser/tests/skip-flags-regression.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import { parse } from '../src';

describe('Skip Flags Regression (Complexity Audit)', () => {
const measureDepth = (depth: number) => {
const input = `${'*'.repeat(depth)}text${'*'.repeat(depth)}`;
const start = performance.now();
parse(input);
return performance.now() - start;
};

it('should log timing data for nested formatting depths', () => {
const times: Record<number, number> = {};
for (let d = 1; d <= 7; d++) {
times[d] = measureDepth(d);
}

console.table(Object.entries(times).map(([depth, time]) => ({ depth, 'time (ms)': time.toFixed(4) })));

// If d=7 takes significantly longer than linear growth from d=1
// we have confirmed the problem.
expect(times[7]).toBeDefined();
});

it('should handle pathological unmatched markers without crashing', () => {
const pathological = '*_~*_~*_~*_~*_~ hello'.repeat(5);
const start = performance.now();
parse(pathological);
const duration = performance.now() - start;
console.log(`Pathological unmatched markers (5x): ${duration.toFixed(2)}ms`);
expect(duration).toBeLessThan(1000); // Should still finish within 1s
});
});