RocketChat · Harshit2405-2004 · Mar 3, 2026 · Mar 3, 2026 · Mar 3, 2026 · Mar 3, 2026
diff --git a/.changeset/block-splitter-layer1.md b/.changeset/block-splitter-layer1.md
@@ -0,0 +1,5 @@
+---
+"@rocket.chat/message-parser": patch
+---
+
+feat(message-parser): implement BlockSplitter PoC (Layer 1)
diff --git a/packages/message-parser/benchmarks/parser.bench.ts b/packages/message-parser/benchmarks/parser.bench.ts
@@ -61,6 +61,8 @@ const categories: BenchCategory[] = [
 			{ name: 'strike', input: '~~Hello world~~' },
 			{ name: 'nested', input: '**bold _italic_ and ~~strike~~**' },
 			{ name: 'deep nesting', input: '**bold _italic ~~strike _deep italic_~~_**' },
+			{ name: 'bold + italic mixed', input: '*Bold text* and _italic text_ in a message' },
+			{ name: 'deeply nested', input: '*bold _italic ~~strike~~ italic_ bold*' },
 			{ name: 'multiple', input: '**bold** normal _italic_ normal ~~strike~~ **more bold** _more italic_' },
 		],
 	},
@@ -83,6 +85,7 @@ const categories: BenchCategory[] = [
 			{ name: 'triple unicode (BigEmoji)', input: '😀🚀🌈', options: fullOptions },
 			{ name: 'in text', input: 'Hello :smile: world :heart: test :rocket: done', options: fullOptions },
 			{ name: 'mixed', input: 'Great job :thumbsup: 🎉 keep going :rocket:', options: fullOptions },
+			{ name: 'emoji heavy', input: ':smile: :wave: :rocket: :fire: :heart: :100:', options: fullOptions },
 		],
 	},
 	{
@@ -92,6 +95,7 @@ const categories: BenchCategory[] = [
 			{ name: 'multiple users', input: '@admin @user1 @moderator' },
 			{ name: 'channel', input: '#general' },
 			{ name: 'mixed', input: 'Hey @admin check #general and @user1' },
+			{ name: 'mentions (suggested)', input: 'Hey @john and @jane, check #general' },
 		],
 	},
 	{
@@ -141,6 +145,10 @@ const categories: BenchCategory[] = [
 				name: 'long with formatting',
 				input: '**bold** _italic_ ~~strike~~ `code` @user #channel :smile: https://example.com '.repeat(10).trim(),
 			},
+			{
+				name: 'unmatched markers (pathological)',
+				input: '*_~*_~*_~*_~*_~ hello world absolutely no closing markers anywhere at all',
+			},
 		],
 	},
 	{
@@ -158,6 +166,11 @@ const categories: BenchCategory[] = [
 					'**Release Notes v7.0**\n- [x] Fix #12345\n- [ ] Update docs\n\n> Important: check https://docs.rocket.chat\n\ncc @admin @devlead #releases :rocket:',
 				options: fullOptions,
 			},
+			{
+				name: 'realistic chat message',
+				input: 'Hello @team, please review the *important* update:\n\n1. Run `yarn build`\n2. Check #deployments\n\n*Thanks!* :rocket:',
+				options: fullOptions,
+			},
 		],
 	},
 	{

diff --git a/packages/message-parser/src/BlockSplitter.ts b/packages/message-parser/src/BlockSplitter.ts
@@ -0,0 +1,128 @@
+export enum BlockType {
+	PARAGRAPH = 'PARAGRAPH',
+	HEADING = 'HEADING',
+	CODE = 'CODE',
+	LIST = 'LIST',
+	QUOTE = 'QUOTE',
+}
+
+export type Block = {
+	type: BlockType;
+	content: string;
+	level?: number;
+	language?: string;
+	ordered?: boolean;
+	incomplete?: boolean;
+};
+
+export class BlockSplitter {
+	public static split(input: string): Block[] {
+		const lines = input.split(/\r?\n/);
+		const blocks: Block[] = [];
+		let currentBlock: Block | null = null;
+
+		for (let i = 0; i < lines.length; i++) {
+			const line = lines[i];
+
+			const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
+			if (headingMatch) {
+				this.flush(blocks, currentBlock);
+				currentBlock = {
+					type: BlockType.HEADING,
+					content: headingMatch[2],
+					level: headingMatch[1].length,
+				};
+				this.flush(blocks, currentBlock);
+				currentBlock = null;
+				continue;
+			}
+
+			if (line.startsWith('```')) {
+				this.flush(blocks, currentBlock);
+				const language = line.slice(3).trim();
+				const codeLines = [];
+				let closed = false;
+				i++;
+				while (i < lines.length && !lines[i].startsWith('```')) {
+					codeLines.push(lines[i]);
+					i++;
+				}
+				if (i < lines.length) {
+					closed = true;
+				}
+				blocks.push({
+					type: BlockType.CODE,
+					content: codeLines.join('\n'),
+					language,
+					incomplete: !closed,
+				});
+				currentBlock = null;
+				continue;
+			}
+
+			if (line.trim() === '') {
+				this.flush(blocks, currentBlock);
+				currentBlock = null;
+				continue;
+			}
+
+			const listMatch = line.match(/^(\s*)([-*+]|\d+\.)\s+(.+)$/);
+			const isIndented = /^\s+/.test(line);
+
+			if (listMatch) {
+				const isOrdered = /^\d+\./.test(listMatch[2]);
+				if (currentBlock?.type !== BlockType.LIST) {
+					this.flush(blocks, currentBlock);
+					currentBlock = {
+						type: BlockType.LIST,
+						content: line,
+						ordered: isOrdered,
+					};
+				} else {
+					if (currentBlock.ordered !== undefined && currentBlock.ordered !== isOrdered) {
+						currentBlock.ordered = undefined;
+					}
+					currentBlock.content += `\n${line}`;
+				}
+				continue;
+			}
+
+			if (isIndented && currentBlock?.type === BlockType.LIST) {
+				currentBlock.content += `\n${line}`;
+				continue;
+			}
+
+			if (line.startsWith('>')) {
+				if (currentBlock?.type !== BlockType.QUOTE) {
+					this.flush(blocks, currentBlock);
+					currentBlock = {
+						type: BlockType.QUOTE,
+						content: line,
+					};
+				} else {
+					currentBlock.content += `\n${line}`;
+				}
+				continue;
+			}
+
+			if (currentBlock?.type !== BlockType.PARAGRAPH) {
+				this.flush(blocks, currentBlock);
+				currentBlock = {
+					type: BlockType.PARAGRAPH,
+					content: line,
+				};
+			} else {
+				currentBlock.content += `\n${line}`;
+			}
+		}
+
+		this.flush(blocks, currentBlock);
+		return blocks;
+	}
+
+	private static flush(blocks: Block[], block: Block | null) {
+		if (block) {
+			blocks.push(block);
+		}
+	}
+}
diff --git a/packages/message-parser/tests/blockSplitter.spec.ts b/packages/message-parser/tests/blockSplitter.spec.ts
@@ -0,0 +1,141 @@
+import { BlockSplitter, BlockType } from '../src/BlockSplitter';
+
+describe('BlockSplitter', () => {
+	it('should split simple paragraphs', () => {
+		const input = 'Hello\nWorld';
+		const blocks = BlockSplitter.split(input);
+		expect(blocks.length).toBe(1);
+		expect(blocks[0].type).toBe(BlockType.PARAGRAPH);
+		expect(blocks[0].content).toBe('Hello\nWorld');
+	});
+
+	it('should identify headings', () => {
+		const input = '# Heading 1\n## Heading 2\nContent';
+		const blocks = BlockSplitter.split(input);
+		expect(blocks.length).toBe(3);
+		expect(blocks[0].type).toBe(BlockType.HEADING);
+		expect(blocks[0].level).toBe(1);
+		expect(blocks[1].type).toBe(BlockType.HEADING);
+		expect(blocks[1].level).toBe(2);
+	});
+
+	it('should identify code blocks', () => {
+		const input = 'Pre\n```javascript\nconst a = 1;\n```\nPost';
+		const blocks = BlockSplitter.split(input);
+		expect(blocks.length).toBe(3);
+		expect(blocks[1].type).toBe(BlockType.CODE);
+		expect(blocks[1].language).toBe('javascript');
+		expect(blocks[1].content).toBe('const a = 1;');
+	});
+
+	it('should handle list splitting and preserve full syntax', () => {
+		const input = '- item 1\n* item 2\n1. item 3';
+		const blocks = BlockSplitter.split(input);
+		expect(blocks.length).toBe(1);
+		expect(blocks[0].type).toBe(BlockType.LIST);
+		expect(blocks[0].content).toBe('- item 1\n* item 2\n1. item 3');
+	});
+
+	it('should handle nested lists via indentation', () => {
+		const input = '- Level 1\n  - Level 2\n    - Level 3';
+		const blocks = BlockSplitter.split(input);
+		expect(blocks.length).toBe(1);
+		expect(blocks[0].content).toBe('- Level 1\n  - Level 2\n    - Level 3');
+	});
+
+	it('should allow indented blank lines to continue a list', () => {
+		const input = '- item 1\n  \n- item 2';
+		const blocks = BlockSplitter.split(input);
+		expect(blocks.length).toBe(1);
+		expect(blocks[0].content).toBe('- item 1\n  \n- item 2');
+	});
+
+	it('should correctly detect boundaries: list followed by heading', () => {
+		const input = '- list item\n\n# Heading';
+		const blocks = BlockSplitter.split(input);
+		expect(blocks.length).toBe(2);
+		expect(blocks[0].type).toBe(BlockType.LIST);
+		expect(blocks[1].type).toBe(BlockType.HEADING);
+	});
+
+	it('should identify blockquotes and preserve markers', () => {
+		const input = '> quote line 1\n> quote line 2';
+		const blocks = BlockSplitter.split(input);
+		expect(blocks.length).toBe(1);
+		expect(blocks[0].type).toBe(BlockType.QUOTE);
+		expect(blocks[0].content).toBe('> quote line 1\n> quote line 2');
+	});
+
+	it('should support nested blockquotes', () => {
+		const input = '> outer\n>> inner';
+		const blocks = BlockSplitter.split(input);
+		expect(blocks.length).toBe(1);
+		expect(blocks[0].type).toBe(BlockType.QUOTE);
+		expect(blocks[0].content).toBe('> outer\n>> inner');
+	});
+
+	it('should set ordered to undefined for mixed ordered and unordered list items', () => {
+		const input = '- unordered\n1. ordered';
+		const blocks = BlockSplitter.split(input);
+		expect(blocks.length).toBe(1);
+		expect(blocks[0].type).toBe(BlockType.LIST);
+		expect(blocks[0].ordered).toBeUndefined();
+	});
+
+	it('should keep ordered=true for fully ordered lists', () => {
+		const input = '1. first\n2. second';
+		const blocks = BlockSplitter.split(input);
+		expect(blocks.length).toBe(1);
+		expect(blocks[0].type).toBe(BlockType.LIST);
+		expect(blocks[0].ordered).toBe(true);
+	});
+
+	it('should keep ordered=false for fully unordered lists', () => {
+		const input = '- first\n* second';
+		const blocks = BlockSplitter.split(input);
+		expect(blocks.length).toBe(1);
+		expect(blocks[0].type).toBe(BlockType.LIST);
+		expect(blocks[0].ordered).toBe(false);
+	});
+
+	it('should create a new paragraph block after a list block', () => {
+		const input = '- list item\n\nParagraph text';
+		const blocks = BlockSplitter.split(input);
+		expect(blocks.length).toBe(2);
+		expect(blocks[0].type).toBe(BlockType.LIST);
+		expect(blocks[1].type).toBe(BlockType.PARAGRAPH);
+		expect(blocks[1].content).toBe('Paragraph text');
+	});
+
+	it('should create a new paragraph block after a quote block', () => {
+		const input = '> blockquote\n\nParagraph text';
+		const blocks = BlockSplitter.split(input);
+		expect(blocks.length).toBe(2);
+		expect(blocks[0].type).toBe(BlockType.QUOTE);
+		expect(blocks[1].type).toBe(BlockType.PARAGRAPH);
+		expect(blocks[1].content).toBe('Paragraph text');
+	});
+
+	it('should handle empty input correctly', () => {
+		const input = '';
+		const blocks = BlockSplitter.split(input);
+		expect(blocks.length).toBe(0);
+	});
+
+	it('should yield a CODE block with incomplete flag for an unclosed code fence', () => {
+		const input = '```js\ncode';
+		const blocks = BlockSplitter.split(input);
+		expect(blocks.length).toBe(1);
+		expect(blocks[0].type).toBe(BlockType.CODE);
+		expect(blocks[0].content).toBe('code');
+		expect(blocks[0].incomplete).toBe(true);
+	});
+
+	it('should treat a heading without a space as a paragraph', () => {
+		const input = '#NoSpace';
+		const blocks = BlockSplitter.split(input);
+		expect(blocks.length).toBe(1);
+		expect(blocks[0].type).toBe(BlockType.PARAGRAPH);
+		expect(blocks[0].content).toBe('#NoSpace');
+	});
+});
diff --git a/packages/message-parser/tests/skip-flags-regression.spec.ts b/packages/message-parser/tests/skip-flags-regression.spec.ts
@@ -0,0 +1,32 @@
+import { parse } from '../src';
+
+describe('Skip Flags Regression (Complexity Audit)', () => {
+	const measureDepth = (depth: number) => {
+		const input = `${'*'.repeat(depth)}text${'*'.repeat(depth)}`;
+		const start = performance.now();
+		parse(input);
+		return performance.now() - start;
+	};
+
+	it('should log timing data for nested formatting depths', () => {
+		const times: Record<number, number> = {};
+		for (let d = 1; d <= 7; d++) {
+			times[d] = measureDepth(d);
+		}
+
+		console.table(Object.entries(times).map(([depth, time]) => ({ depth, 'time (ms)': time.toFixed(4) })));
+
+		// If d=7 takes significantly longer than linear growth from d=1
+		// we have confirmed the problem.
+		expect(times[7]).toBeDefined();
+	});
+
+	it('should handle pathological unmatched markers without crashing', () => {
+		const pathological = '*_~*_~*_~*_~*_~ hello'.repeat(5);
+		const start = performance.now();
+		parse(pathological);
+		const duration = performance.now() - start;
+		console.log(`Pathological unmatched markers (5x): ${duration.toFixed(2)}ms`);
+		expect(duration).toBeLessThan(1000); // Should still finish within 1s
+	});
+});