Skip to content

Commit 031db67

Browse files
authored
Merge pull request #144 from DemchaAV/perf/render-span-and-table-cell
perf(engine): paragraph operator dedup + table cell sanitize-once (F5+F6)
2 parents 8e895f6 + cc67707 commit 031db67

4 files changed

Lines changed: 185 additions & 15 deletions

File tree

CHANGELOG.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,26 @@ Open cycle — bug-fix / housekeeping. Entries land here as they merge.
6060
re-probed tens of thousands of glyph occurrences that now collapse to roughly
6161
the number of distinct characters it uses. No public API or behaviour change.
6262

63+
- **Paragraph render writes font and colour operators only when they change.** The
64+
paragraph render handler emitted a `setFont` (`Tf`) and `setNonStrokingColor`
65+
(`rg`) operator for *every* text span, even across the spans of a single-style
66+
paragraph. It now tracks the last-written `(font, size)` and colour across the
67+
paragraph's graphics-state block and re-emits only on a real change (invalidating
68+
after inline images/shapes), so a multi-span single-style paragraph carries one
69+
`Tf` + one `rg` instead of one pair per span — fewer operators for PDFBox to
70+
serialize. **Rendered output is unchanged** (the skipped operators were
71+
redundant); pinned by the visual-regression suite plus a content-stream test
72+
asserting one `Tf` across many drawn spans. No public API or behaviour change.
73+
74+
- **Table cell text is sanitized once per cell instead of three times.** Resolving
75+
a table ran each cell's lines through `sanitizeCellLines` separately in the
76+
natural-width, natural-height and resolve passes, rebuilding the list and its
77+
per-line control-character cleanup up to three times per cell. The sanitized
78+
lines are now computed once when the logical grid is built and reused by all
79+
three passes. **Output is byte-identical** (sanitization is deterministic); on a
80+
large table this removes the dominant per-cell layout allocation. No public API
81+
or behaviour change.
82+
6383
### Tests / tooling
6484

6585
- **Benchmark regression gate and measurement probe (benchmarks module, not part

src/main/java/com/demcha/compose/document/backend/fixed/pdf/handlers/PdfParagraphFragmentRenderHandler.java

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717
import com.demcha.compose.font.FontLibrary;
1818
import com.demcha.compose.engine.render.pdf.PdfFont;
1919
import org.apache.pdfbox.pdmodel.PDPageContentStream;
20+
import org.apache.pdfbox.pdmodel.font.PDFont;
2021
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
2122

23+
import java.awt.Color;
2224
import java.io.IOException;
2325
import java.util.List;
2426

@@ -56,6 +58,11 @@ public void render(PlacedFragment fragment,
5658

5759
stream.saveGraphicsState();
5860
try {
61+
// Font and non-stroking colour persist across BT/ET within this one
62+
// q...Q block, so track the last-written pair and re-emit Tf/rg only
63+
// when a span actually changes them — a single-style paragraph then
64+
// emits one setFont + one setNonStrokingColor instead of one per span.
65+
TextRenderState textState = new TextRenderState();
5966
double cursorTop = contentTop;
6067
for (int lineIndex = 0; lineIndex < payload.lines().size(); lineIndex++) {
6168
ParagraphLine line = payload.lines().get(lineIndex);
@@ -71,7 +78,7 @@ public void render(PlacedFragment fragment,
7178
case LEFT -> innerX;
7279
};
7380

74-
renderLine(stream, fonts, line, lineX, baselineY, environment);
81+
renderLine(stream, fonts, line, lineX, baselineY, environment, textState);
7582

7683
cursorTop = lineTop - resolvedLineHeight - payload.lineGap();
7784
}
@@ -127,7 +134,8 @@ private void renderLine(PDPageContentStream stream,
127134
ParagraphLine line,
128135
double lineX,
129136
double baselineY,
130-
PdfRenderEnvironment environment) throws IOException {
137+
PdfRenderEnvironment environment,
138+
TextRenderState textState) throws IOException {
131139
List<ParagraphSpan> spans = line.spans();
132140
if (spans.isEmpty()) {
133141
return;
@@ -155,8 +163,10 @@ private void renderLine(PDPageContentStream stream,
155163
stream.newLineAtOffset((float) cursorX, (float) baselineY);
156164
inTextBlock = true;
157165
}
158-
stream.setFont(font.fontType(textSpan.textStyle().decoration()), (float) textSpan.textStyle().size());
159-
stream.setNonStrokingColor(textSpan.textStyle().color());
166+
textState.applyFont(stream,
167+
font.fontType(textSpan.textStyle().decoration()),
168+
(float) textSpan.textStyle().size());
169+
textState.applyColor(stream, textSpan.textStyle().color());
160170
stream.showText(text);
161171
cursorX += textSpan.width();
162172
} else if (span instanceof ParagraphImageSpan imageSpan) {
@@ -176,6 +186,10 @@ private void renderLine(PDPageContentStream stream,
176186
(float) imageBottom,
177187
(float) imageSpan.width(),
178188
(float) imageSpan.height());
189+
// An inline graphic runs its own graphics-state save/restore and
190+
// colour ops; drop the tracked font/colour so the next text span
191+
// re-emits them rather than trusting persistence across it.
192+
textState.invalidate();
179193
cursorX += imageSpan.width();
180194
} else if (span instanceof ParagraphShapeSpan shapeSpan) {
181195
if (inTextBlock) {
@@ -184,6 +198,7 @@ private void renderLine(PDPageContentStream stream,
184198
}
185199
renderShape(stream, shapeSpan, cursorX, baselineY,
186200
line.textAscent(), line.baselineOffsetFromBottom(), line.lineHeight());
201+
textState.invalidate();
187202
cursorX += shapeSpan.width();
188203
}
189204
}
@@ -287,4 +302,39 @@ private static void renderShape(PDPageContentStream stream,
287302
}
288303
}
289304

305+
/**
306+
* Tracks the font/size and non-stroking colour last written to the content
307+
* stream within one paragraph's {@code q...Q} block, so the handler emits a
308+
* {@code Tf}/{@code rg} operator only when a span actually changes them. The
309+
* common single-style paragraph then carries one of each instead of one per
310+
* span. {@link #invalidate()} forces a re-emit after anything that may disturb
311+
* the persisted text state (inline images, shapes).
312+
*/
313+
private static final class TextRenderState {
314+
private PDFont font;
315+
private float size = Float.NaN;
316+
private Color color;
317+
318+
void applyFont(PDPageContentStream stream, PDFont newFont, float newSize) throws IOException {
319+
if (newFont != font || newSize != size) {
320+
stream.setFont(newFont, newSize);
321+
font = newFont;
322+
size = newSize;
323+
}
324+
}
325+
326+
void applyColor(PDPageContentStream stream, Color newColor) throws IOException {
327+
if (!newColor.equals(color)) {
328+
stream.setNonStrokingColor(newColor);
329+
color = newColor;
330+
}
331+
}
332+
333+
void invalidate() {
334+
font = null;
335+
size = Float.NaN;
336+
color = null;
337+
}
338+
}
339+
290340
}

src/main/java/com/demcha/compose/document/layout/TableLayoutSupport.java

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,14 @@ record ResolvedTableLayout(
9595
* are skipped when iterating later source rows. {@code source} is the
9696
* original public {@link DocumentTableCell}, retained so the layout
9797
* can detect composed-content cells via
98-
* {@link DocumentTableCell#hasComposedContent()}.
98+
* {@link DocumentTableCell#hasComposedContent()}. {@code sanitizedLines} is
99+
* the cell's text with control characters cleaned, computed once here and
100+
* reused by the width, height and resolve passes instead of re-sanitizing the
101+
* content three times.
99102
*/
100103
private record LogicalCell(int startRow, int startColumn, int colSpan, int rowSpan,
101-
TableCellContent content, DocumentTableCell source) {
104+
TableCellContent content, DocumentTableCell source,
105+
List<String> sanitizedLines) {
102106
}
103107

104108
/**
@@ -207,7 +211,7 @@ static ResolvedTableLayoutWithContents resolveTableLayout(TableNode node,
207211
width,
208212
height,
209213
yOffset,
210-
sanitizeCellLines(logical.content()),
214+
logical.sanitizedLines(),
211215
style,
212216
fillInsets(stylesGrid, rowIndex, logical.startColumn(), logical.colSpan()),
213217
borderSides(stylesGrid, rowIndex, logical.startColumn(), logical.colSpan())));
@@ -298,7 +302,7 @@ private static double naturalCellHeight(LogicalCell logical,
298302
Padding padding = style.padding() == null ? Padding.zero() : style.padding();
299303
return prepared.measureResult().height() + padding.vertical();
300304
}
301-
return cellNaturalHeight(logical.content(), style, measurement);
305+
return cellNaturalHeight(logical.sanitizedLines(), style, measurement);
302306
}
303307

304308
static PreparedNode<TableNode> sliceTablePreparedNode(TableNode source,
@@ -510,8 +514,9 @@ private static List<List<LogicalCell>> buildLogicalRows(TableNode node, int colu
510514
occupied[r][c] = true;
511515
}
512516
}
517+
TableCellContent content = toTableCell(cell);
513518
logical.add(new LogicalCell(rowIndex, col, cell.colSpan(), cell.rowSpan(),
514-
toTableCell(cell), cell));
519+
content, cell, sanitizeCellLines(content)));
515520
col += cell.colSpan();
516521
}
517522
if (sourceIdx < source.size()) {
@@ -572,7 +577,7 @@ private static double[] resolveNaturalColumnWidths(TableNode node,
572577
}
573578
int col = logical.startColumn();
574579
singleCellRequired[col] = Math.max(singleCellRequired[col],
575-
cellNaturalWidth(logical.content(), stylesGrid[rowIndex][col], measurement));
580+
cellNaturalWidth(logical.sanitizedLines(), stylesGrid[rowIndex][col], measurement));
576581
}
577582
}
578583

@@ -596,7 +601,7 @@ private static double[] resolveNaturalColumnWidths(TableNode node,
596601
}
597602
int startCol = logical.startColumn();
598603
int endCol = startCol + logical.colSpan();
599-
double need = cellNaturalWidth(logical.content(), stylesGrid[rowIndex][startCol], measurement);
604+
double need = cellNaturalWidth(logical.sanitizedLines(), stylesGrid[rowIndex][startCol], measurement);
600605
double have = sumRange(widths, startCol, endCol);
601606
if (need <= have + EPS) {
602607
continue;
@@ -669,22 +674,22 @@ private static double[] resolveFinalColumnWidths(TableNode node,
669674
return finalWidths;
670675
}
671676

672-
private static double cellNaturalWidth(TableCellContent cell,
677+
private static double cellNaturalWidth(List<String> sanitizedLines,
673678
TableCellLayoutStyle style,
674679
TextMeasurementSystem measurement) {
675680
Padding padding = style.padding() == null ? Padding.zero() : style.padding();
676681
double maxWidth = 0.0;
677-
for (String line : sanitizeCellLines(cell)) {
682+
for (String line : sanitizedLines) {
678683
maxWidth = Math.max(maxWidth, measurement.textWidth(style.textStyle(), line));
679684
}
680685
return maxWidth + padding.horizontal();
681686
}
682687

683-
private static double cellNaturalHeight(TableCellContent cell,
688+
private static double cellNaturalHeight(List<String> sanitizedLines,
684689
TableCellLayoutStyle style,
685690
TextMeasurementSystem measurement) {
686691
Padding padding = style.padding() == null ? Padding.zero() : style.padding();
687-
int lineCount = Math.max(1, sanitizeCellLines(cell).size());
692+
int lineCount = Math.max(1, sanitizedLines.size());
688693
return (lineCount * measurement.lineHeight(style.textStyle()))
689694
+ ((lineCount - 1) * tableCellLineSpacing(style))
690695
+ padding.vertical();
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
package com.demcha.compose.document.backend.fixed.pdf;
2+
3+
import static org.assertj.core.api.Assertions.assertThat;
4+
5+
import com.demcha.compose.GraphCompose;
6+
import com.demcha.compose.document.api.DocumentSession;
7+
import org.apache.pdfbox.Loader;
8+
import org.apache.pdfbox.contentstream.operator.Operator;
9+
import org.apache.pdfbox.pdfparser.PDFStreamParser;
10+
import org.apache.pdfbox.pdmodel.PDDocument;
11+
import org.junit.jupiter.api.Test;
12+
13+
import java.io.IOException;
14+
import java.util.List;
15+
16+
/**
17+
* Guards Finding 5: the paragraph render handler tracks the last-written font and
18+
* non-stroking colour, so a single-style paragraph that wraps into many spans
19+
* emits <b>one</b> {@code setFont} ({@code Tf}) operator for the whole paragraph
20+
* instead of one per span.
21+
*
22+
* <p>Renders a real one-page document and inspects the page content stream through
23+
* the established {@link PDFStreamParser} token pattern — the proof lives entirely
24+
* in test scope, with no instrumentation in the render handler.</p>
25+
*/
26+
class ParagraphTextStateDedupTest {
27+
28+
@Test
29+
void singleStyleParagraphEmitsOneFontOperatorAcrossManySpans() throws Exception {
30+
byte[] pdf;
31+
try (DocumentSession session = GraphCompose.document()
32+
.pageSize(400, 800)
33+
.margin(24, 24, 24, 24)
34+
.create()) {
35+
// One uniform style; long enough to wrap into many lines/spans on a
36+
// single page so the dedup is meaningful (without the guard this emits
37+
// a Tf per span).
38+
String body = ("GraphCompose lays out structured documents across pages "
39+
+ "while keeping headers and footers stable. ").repeat(8);
40+
session.pageFlow(flow -> flow.addParagraph(p -> p.text(body)));
41+
pdf = session.toPdfBytes();
42+
}
43+
44+
try (PDDocument document = Loader.loadPDF(pdf)) {
45+
assertThat(document.getNumberOfPages())
46+
.describedAs("body is sized to stay on one page so one q...Q block covers every span")
47+
.isEqualTo(1);
48+
int fontOps = operatorCount(document, "Tf");
49+
int textDraws = operatorCount(document, "Tj") + operatorCount(document, "TJ");
50+
51+
assertThat(textDraws)
52+
.describedAs("the paragraph must wrap into several drawn spans for the dedup to be meaningful")
53+
.isGreaterThanOrEqualTo(2);
54+
assertThat(fontOps)
55+
.describedAs("one setFont for the whole single-style paragraph, not one per span")
56+
.isEqualTo(1);
57+
}
58+
}
59+
60+
@Test
61+
void multiStyleParagraphReEmitsFontOnEachStyleChange() throws Exception {
62+
byte[] pdf;
63+
try (DocumentSession session = GraphCompose.document()
64+
.pageSize(400, 800)
65+
.margin(24, 24, 24, 24)
66+
.create()) {
67+
// Three consecutive runs with distinct decorations (regular / bold /
68+
// regular) on one line: the tracker must re-emit Tf at each change,
69+
// not over-dedup them into a single setFont (which would draw the bold
70+
// run in the regular font).
71+
session.pageFlow(flow -> flow.addParagraph(p ->
72+
p.rich(r -> r.plain("alpha ").bold("bravo ").plain("charlie"))));
73+
pdf = session.toPdfBytes();
74+
}
75+
76+
try (PDDocument document = Loader.loadPDF(pdf)) {
77+
assertThat(operatorCount(document, "Tf"))
78+
.describedAs("a style change within a paragraph must re-emit setFont (single-style baseline is 1)")
79+
.isGreaterThanOrEqualTo(2);
80+
}
81+
}
82+
83+
private static int operatorCount(PDDocument document, String operatorName) throws IOException {
84+
int count = 0;
85+
for (var page : document.getPages()) {
86+
List<Object> tokens = new PDFStreamParser(page).parse();
87+
for (Object token : tokens) {
88+
if (token instanceof Operator operator && operatorName.equals(operator.getName())) {
89+
count++;
90+
}
91+
}
92+
}
93+
return count;
94+
}
95+
}

0 commit comments

Comments
 (0)