Skip to content

Commit 602d1bd

Browse files
committed
perf(render): emit paragraph font/colour operators only when they change
The paragraph render handler wrote a setFont (Tf) and setNonStrokingColor (rg) operator for every text span, even across the spans of a single-style paragraph. Track the last-written (font, size) and colour across the paragraph's q...Q block and re-emit only on a real change, invalidating after inline images/shapes; a multi-span single-style paragraph now carries one Tf + one rg instead of one pair per span. Rendered output is unchanged (the skipped operators were redundant). Guarded by the visual-regression suite plus ParagraphTextStateDedupTest, which asserts a single-style paragraph emits one Tf across many drawn spans and that a multi-style paragraph re-emits on each style change. Finding 5.
1 parent 646a4ac commit 602d1bd

3 files changed

Lines changed: 160 additions & 4 deletions

File tree

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,17 @@ Open cycle — bug-fix / housekeeping. Entries land here as they merge.
2828
token). **Output is byte-identical** — the fit predicate is monotonic, so the
2929
search returns the same break index. No public API or behaviour change.
3030

31+
- **Paragraph render writes font and colour operators only when they change.** The
32+
paragraph render handler emitted a `setFont` (`Tf`) and `setNonStrokingColor`
33+
(`rg`) operator for *every* text span, even across the spans of a single-style
34+
paragraph. It now tracks the last-written `(font, size)` and colour across the
35+
paragraph's graphics-state block and re-emits only on a real change (invalidating
36+
after inline images/shapes), so a multi-span single-style paragraph carries one
37+
`Tf` + one `rg` instead of one pair per span — fewer operators for PDFBox to
38+
serialize. **Rendered output is unchanged** (the skipped operators were
39+
redundant); pinned by the visual-regression suite plus a content-stream test
40+
asserting one `Tf` across many drawn spans. No public API or behaviour change.
41+
3142
### Tests / tooling
3243

3344
- **Benchmark regression gate and measurement probe (benchmarks module, not part

src/main/java/com/demcha/compose/document/backend/fixed/pdf/handlers/PdfParagraphFragmentRenderHandler.java

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717
import com.demcha.compose.font.FontLibrary;
1818
import com.demcha.compose.engine.render.pdf.PdfFont;
1919
import org.apache.pdfbox.pdmodel.PDPageContentStream;
20+
import org.apache.pdfbox.pdmodel.font.PDFont;
2021
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
2122

23+
import java.awt.Color;
2224
import java.io.IOException;
2325
import java.util.List;
2426

@@ -56,6 +58,11 @@ public void render(PlacedFragment fragment,
5658

5759
stream.saveGraphicsState();
5860
try {
61+
// Font and non-stroking colour persist across BT/ET within this one
62+
// q...Q block, so track the last-written pair and re-emit Tf/rg only
63+
// when a span actually changes them — a single-style paragraph then
64+
// emits one setFont + one setNonStrokingColor instead of one per span.
65+
TextRenderState textState = new TextRenderState();
5966
double cursorTop = contentTop;
6067
for (int lineIndex = 0; lineIndex < payload.lines().size(); lineIndex++) {
6168
ParagraphLine line = payload.lines().get(lineIndex);
@@ -71,7 +78,7 @@ public void render(PlacedFragment fragment,
7178
case LEFT -> innerX;
7279
};
7380

74-
renderLine(stream, fonts, line, lineX, baselineY, environment);
81+
renderLine(stream, fonts, line, lineX, baselineY, environment, textState);
7582

7683
cursorTop = lineTop - resolvedLineHeight - payload.lineGap();
7784
}
@@ -127,7 +134,8 @@ private void renderLine(PDPageContentStream stream,
127134
ParagraphLine line,
128135
double lineX,
129136
double baselineY,
130-
PdfRenderEnvironment environment) throws IOException {
137+
PdfRenderEnvironment environment,
138+
TextRenderState textState) throws IOException {
131139
List<ParagraphSpan> spans = line.spans();
132140
if (spans.isEmpty()) {
133141
return;
@@ -155,8 +163,10 @@ private void renderLine(PDPageContentStream stream,
155163
stream.newLineAtOffset((float) cursorX, (float) baselineY);
156164
inTextBlock = true;
157165
}
158-
stream.setFont(font.fontType(textSpan.textStyle().decoration()), (float) textSpan.textStyle().size());
159-
stream.setNonStrokingColor(textSpan.textStyle().color());
166+
textState.applyFont(stream,
167+
font.fontType(textSpan.textStyle().decoration()),
168+
(float) textSpan.textStyle().size());
169+
textState.applyColor(stream, textSpan.textStyle().color());
160170
stream.showText(text);
161171
cursorX += textSpan.width();
162172
} else if (span instanceof ParagraphImageSpan imageSpan) {
@@ -176,6 +186,10 @@ private void renderLine(PDPageContentStream stream,
176186
(float) imageBottom,
177187
(float) imageSpan.width(),
178188
(float) imageSpan.height());
189+
// An inline graphic runs its own graphics-state save/restore and
190+
// colour ops; drop the tracked font/colour so the next text span
191+
// re-emits them rather than trusting persistence across it.
192+
textState.invalidate();
179193
cursorX += imageSpan.width();
180194
} else if (span instanceof ParagraphShapeSpan shapeSpan) {
181195
if (inTextBlock) {
@@ -184,6 +198,7 @@ private void renderLine(PDPageContentStream stream,
184198
}
185199
renderShape(stream, shapeSpan, cursorX, baselineY,
186200
line.textAscent(), line.baselineOffsetFromBottom(), line.lineHeight());
201+
textState.invalidate();
187202
cursorX += shapeSpan.width();
188203
}
189204
}
@@ -287,4 +302,39 @@ private static void renderShape(PDPageContentStream stream,
287302
}
288303
}
289304

305+
/**
306+
* Tracks the font/size and non-stroking colour last written to the content
307+
* stream within one paragraph's {@code q...Q} block, so the handler emits a
308+
* {@code Tf}/{@code rg} operator only when a span actually changes them. The
309+
* common single-style paragraph then carries one of each instead of one per
310+
* span. {@link #invalidate()} forces a re-emit after anything that may disturb
311+
* the persisted text state (inline images, shapes).
312+
*/
313+
private static final class TextRenderState {
314+
private PDFont font;
315+
private float size = Float.NaN;
316+
private Color color;
317+
318+
void applyFont(PDPageContentStream stream, PDFont newFont, float newSize) throws IOException {
319+
if (newFont != font || newSize != size) {
320+
stream.setFont(newFont, newSize);
321+
font = newFont;
322+
size = newSize;
323+
}
324+
}
325+
326+
void applyColor(PDPageContentStream stream, Color newColor) throws IOException {
327+
if (!newColor.equals(color)) {
328+
stream.setNonStrokingColor(newColor);
329+
color = newColor;
330+
}
331+
}
332+
333+
void invalidate() {
334+
font = null;
335+
size = Float.NaN;
336+
color = null;
337+
}
338+
}
339+
290340
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
package com.demcha.compose.document.backend.fixed.pdf;
2+
3+
import static org.assertj.core.api.Assertions.assertThat;
4+
5+
import com.demcha.compose.GraphCompose;
6+
import com.demcha.compose.document.api.DocumentSession;
7+
import org.apache.pdfbox.Loader;
8+
import org.apache.pdfbox.contentstream.operator.Operator;
9+
import org.apache.pdfbox.pdfparser.PDFStreamParser;
10+
import org.apache.pdfbox.pdmodel.PDDocument;
11+
import org.junit.jupiter.api.Test;
12+
13+
import java.io.IOException;
14+
import java.util.List;
15+
16+
/**
17+
* Guards Finding 5: the paragraph render handler tracks the last-written font and
18+
* non-stroking colour, so a single-style paragraph that wraps into many spans
19+
* emits <b>one</b> {@code setFont} ({@code Tf}) operator for the whole paragraph
20+
* instead of one per span.
21+
*
22+
* <p>Renders a real one-page document and inspects the page content stream through
23+
* the established {@link PDFStreamParser} token pattern — the proof lives entirely
24+
* in test scope, with no instrumentation in the render handler.</p>
25+
*/
26+
class ParagraphTextStateDedupTest {
27+
28+
@Test
29+
void singleStyleParagraphEmitsOneFontOperatorAcrossManySpans() throws Exception {
30+
byte[] pdf;
31+
try (DocumentSession session = GraphCompose.document()
32+
.pageSize(400, 800)
33+
.margin(24, 24, 24, 24)
34+
.create()) {
35+
// One uniform style; long enough to wrap into many lines/spans on a
36+
// single page so the dedup is meaningful (without the guard this emits
37+
// a Tf per span).
38+
String body = ("GraphCompose lays out structured documents across pages "
39+
+ "while keeping headers and footers stable. ").repeat(8);
40+
session.pageFlow(flow -> flow.addParagraph(p -> p.text(body)));
41+
pdf = session.toPdfBytes();
42+
}
43+
44+
try (PDDocument document = Loader.loadPDF(pdf)) {
45+
assertThat(document.getNumberOfPages())
46+
.describedAs("body is sized to stay on one page so one q...Q block covers every span")
47+
.isEqualTo(1);
48+
int fontOps = operatorCount(document, "Tf");
49+
int textDraws = operatorCount(document, "Tj") + operatorCount(document, "TJ");
50+
51+
assertThat(textDraws)
52+
.describedAs("the paragraph must wrap into several drawn spans for the dedup to be meaningful")
53+
.isGreaterThanOrEqualTo(2);
54+
assertThat(fontOps)
55+
.describedAs("one setFont for the whole single-style paragraph, not one per span")
56+
.isEqualTo(1);
57+
}
58+
}
59+
60+
@Test
61+
void multiStyleParagraphReEmitsFontOnEachStyleChange() throws Exception {
62+
byte[] pdf;
63+
try (DocumentSession session = GraphCompose.document()
64+
.pageSize(400, 800)
65+
.margin(24, 24, 24, 24)
66+
.create()) {
67+
// Three consecutive runs with distinct decorations (regular / bold /
68+
// regular) on one line: the tracker must re-emit Tf at each change,
69+
// not over-dedup them into a single setFont (which would draw the bold
70+
// run in the regular font).
71+
session.pageFlow(flow -> flow.addParagraph(p ->
72+
p.rich(r -> r.plain("alpha ").bold("bravo ").plain("charlie"))));
73+
pdf = session.toPdfBytes();
74+
}
75+
76+
try (PDDocument document = Loader.loadPDF(pdf)) {
77+
assertThat(operatorCount(document, "Tf"))
78+
.describedAs("a style change within a paragraph must re-emit setFont (single-style baseline is 1)")
79+
.isGreaterThanOrEqualTo(2);
80+
}
81+
}
82+
83+
private static int operatorCount(PDDocument document, String operatorName) throws IOException {
84+
int count = 0;
85+
for (var page : document.getPages()) {
86+
List<Object> tokens = new PDFStreamParser(page).parse();
87+
for (Object token : tokens) {
88+
if (token instanceof Operator operator && operatorName.equals(operator.getName())) {
89+
count++;
90+
}
91+
}
92+
}
93+
return count;
94+
}
95+
}

0 commit comments

Comments
 (0)