Description
Description:
When adding numbering or images to large documents using docx, the process triggers excessive Mark-Compact events from the Node.js Garbage Collector during xmlify operation. This significantly impacts performance, making XML processing much slower.
Steps to Reproduce
I created a test file demonstrating the issue for numbering, but the same problem occurs with images.
-
With numbering:
- XML processing time: ~6s
- GC Mark-Compact events: ~15
-
Without numbering:
- XML processing time: ~1.5s
- GC Mark-Compact events: ~1
Environment
- Node.js: 20
- docx version: 9.1.1
Analysis
The issue originates from next-compiler.ts at line 145, specifically due to a regular expression used for numbering. Since the XML string is large, the regex operation is likely consuming excessive memory, leading to heavy GC activity.
Expected Behavior
Would the maintainers consider an alternative approach to this regex-heavy logic to improve performance?
I’d be happy to provide additional details or help test potential solutions.
Test
File
To execute the test file please run:
- on windows
set NODE_OPTIONS="--max-old-space-size=1024" && tsx src\performance.ts
- on unix
export NODE_OPTIONS="--max-old-space-size=1024" && tsx src\performance.ts
import fs from "fs";
import v8 from 'v8';
import { AlignmentType, convertMillimetersToTwip, File, LevelFormat, Packer, Paragraph, Table, TableCell, TableRow, TextRun } from "./index.ts";
import {
PerformanceObserver,
} from 'node:perf_hooks'
const MAX_LEVEL_SIZE = 9;
const createLevels = () => {
const levels = [];
const formats = [
LevelFormat.DECIMAL,
LevelFormat.UPPER_LETTER,
LevelFormat.LOWER_LETTER,
LevelFormat.UPPER_ROMAN,
LevelFormat.LOWER_ROMAN,
];
const formatSize = formats.length;
for (let level = 0; level < MAX_LEVEL_SIZE; level += 1) {
levels.push({
level,
format: formats[level % formatSize],
text: `%${level + 1}.`,
alignment: AlignmentType.START,
style: {
paragraph: {
indent: {
left: convertMillimetersToTwip(10 * (level + 1)),
hanging: convertMillimetersToTwip(5),
},
},
},
});
}
return levels;
}
const createParagraph = (texts: string[]) => {
return new Paragraph({ children: texts.map((text, index) => new TextRun({ text, break: (index % 5 === 0) ? 1 : 0 })) })
}
const createParagraphWithNumbering = (texts: string[], instance: number, useNumbering: boolean): Paragraph[] => {
return texts.map((text, index) => {
// to switch on/off numbering to highlight that numbering is creating Garbage Collector Mark Compact events.
const numbering = useNumbering ? { level: index % 3, reference: "default-numbering", instance } : undefined
return new Paragraph({ children: [new TextRun({ text, bold: true })], numbering })
})
}
const createTable = () => {
const rows = []
for (let i = 0; i < 20; i++) {
const cells = []
for (let j = 0; j < 10; j++) {
const paragraph = createParagraph([`${j}-${i} Lorem ipsum dolor sit amet. 33 itaque consequatur eum corporis omnis ut laborum dignissimos ut omnis tempora. Aut nihil dolorem et aspernatur nisi et minus repellat est vero velit.`])
cells.push(new TableCell({ children: [paragraph] }))
}
rows.push(new TableRow({ children: cells }))
}
return new Table({ rows })
}
const test = async (useNumbering: boolean) => {
// v8.setFlagsFromString('--trace-gc'); // for debug purposes provide v8 logs
console.log(`Running test with numberings : ${useNumbering}`)
const gcMarkCompactEventsPromise = new Promise<PerformanceEntry[]>((resolve) => {
const obs = new PerformanceObserver((list) => {
const markCompact = []
for (const entry of list.getEntries()) {
if (entry.name === 'gc' && entry.detail?.kind === 8) { // Kind 8 = Major GC (Mark-Compact)
// console.log(`Mark-Compact GC detected! Duration: ${entry.duration}ms ${JSON.stringify(entry)}`); // for debug purpose
markCompact.push(entry)
}
}
obs.disconnect()
resolve(markCompact)
});
obs.observe({ entryTypes: ['gc'], buffered: true });
})
const fileName = `./${Date.now()}.docx`
console.time("creating doc")
const children = []
for (let i = 0; i < 200; i++) {
const texts = []
for (let j = 0; j < 50; j++) {
texts.push(`${i} - ${j} text that will be added to a paragraph`)
}
const paragraph = createParagraph(texts)
const paragraphsWithNumbering = createParagraphWithNumbering(texts, i, useNumbering)
const table = createTable();
children.push(...[paragraph, table, ...paragraphsWithNumbering])
}
const doc = new File({
title: 'Performance', numbering: useNumbering ? {
config: [{
reference: 'default-numbering',
levels: createLevels(),
}]
} : undefined, sections: [{ children }]
})
console.timeEnd("creating doc")
console.time("xml process")
const buffer = await Packer.toBuffer(doc, false);
await fs.promises.writeFile(fileName, buffer)
console.timeEnd('xml process')
await fs.promises.unlink(fileName)
const markCompactEvents = await gcMarkCompactEventsPromise;
if (markCompactEvents.length > 0) {
console.error(`${markCompactEvents.length} mark compact garbage collector events happened during file creation process`)
}
console.log("===============================")
}
const runTestSuite = async () => {
await test(true)
await test(false)
}
runTestSuite()
Activity