Skip to content

Commit 84ef85a

Browse files
authored
Merge pull request #164 from DemchaAV/fix/docx-list-parity
fix(docx): list export resolves markers through the shared PDF rules
2 parents a211a8a + 83d1d60 commit 84ef85a

6 files changed

Lines changed: 213 additions & 55 deletions

File tree

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,16 @@ Entries land here as they merge.
140140
trailing space — with another literal space, so every exported item read
141141
`"• text"`, and markerless lists gained a stray leading space. The export
142142
now uses `ListMarker.prefix()`, matching the fixed-layout text pipeline.
143+
- **DOCX list export fully matches the PDF list pipeline.** The semantic Word
144+
backend resolved nested-item marker fallbacks against the flat-list marker
145+
and skipped flat-item normalization, so the two outputs of one session
146+
disagreed: a nested item without an explicit marker exported as the list
147+
bullet where the PDF renders the depth cascade (```````·`),
148+
an author-typed `"- item"` doubled up as `"• - item"`, and blank items
149+
produced marker-only paragraphs. Both rules now live in one shared place —
150+
`ListMarker.defaultForDepth(int)` and
151+
`ListMarker.normalizeItemText(String, boolean)` (`@since 1.8.0`) — and the
152+
fixed-layout pipeline and the DOCX export both call them.
143153

144154
### Documentation
145155

src/main/java/com/demcha/compose/document/backend/semantic/DocxSemanticBackend.java

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -159,14 +159,24 @@ private void writeNode(XWPFDocument document, DocumentNode node) throws Exceptio
159159

160160
/**
161161
* Semantic list mapping: each item becomes a marker-prefixed paragraph in
162-
* the list's text style; nested items indent two spaces per depth and use
163-
* their own marker when one is set.
162+
* the list's text style. Flat items run through the same
163+
* {@code ListMarker.normalizeItemText} step as fixed-layout rendering
164+
* (author-typed markers stripped, blank items skipped); nested items
165+
* indent two spaces per depth and use their own marker when one is set,
166+
* falling back to {@code ListMarker.defaultForDepth} otherwise.
164167
*/
165168
private void writeList(XWPFDocument document,
166169
com.demcha.compose.document.node.ListNode list) {
167170
for (String item : list.items()) {
171+
// Same normalization as the fixed-layout pipeline: strip an
172+
// author-typed leading marker and skip items with no content.
173+
String normalized = com.demcha.compose.document.node.ListMarker
174+
.normalizeItemText(item, list.normalizeMarkers());
175+
if (normalized.isBlank()) {
176+
continue;
177+
}
168178
writeListLine(document, list.textStyle(),
169-
list.marker().prefix() + item, 0);
179+
list.marker().prefix() + normalized, 0);
170180
}
171181
for (com.demcha.compose.document.node.ListItem item : list.nestedItems()) {
172182
writeNestedItem(document, list, item, 0);
@@ -178,9 +188,13 @@ private void writeNestedItem(XWPFDocument document,
178188
com.demcha.compose.document.node.ListItem item,
179189
int depth) {
180190
// prefix() carries its own trailing space (and is empty for
181-
// markerless lists), matching the fixed-layout text pipeline.
191+
// markerless lists). Items without an explicit (or markerFor-baked)
192+
// marker fall back to the same depth cascade the fixed-layout
193+
// pipeline uses — never to the flat-list marker.
182194
com.demcha.compose.document.node.ListMarker marker =
183-
item.marker() != null ? item.marker() : list.marker();
195+
item.marker() != null
196+
? item.marker()
197+
: com.demcha.compose.document.node.ListMarker.defaultForDepth(depth);
184198
writeListLine(document, list.textStyle(), marker.prefix() + item.label(), depth);
185199
for (com.demcha.compose.document.node.ListItem child : item.children()) {
186200
writeNestedItem(document, list, child, depth + 1);

src/main/java/com/demcha/compose/document/layout/TextFlowSupport.java

Lines changed: 2 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ private static ListNode flattenNestedListNode(ListNode node) {
195195

196196
private static void flattenNestedItems(List<ListItem> items, int depth, List<String> output) {
197197
for (ListItem item : items) {
198-
ListMarker marker = item.marker() != null ? item.marker() : defaultMarkerForDepth(depth);
198+
ListMarker marker = item.marker() != null ? item.marker() : ListMarker.defaultForDepth(depth);
199199
StringBuilder prefix = new StringBuilder(NESTED_LIST_INDENT_UNIT.repeat(depth));
200200
if (marker.isVisible()) {
201201
// ListMarker.normalize already appends a trailing space
@@ -210,20 +210,6 @@ private static void flattenNestedItems(List<ListItem> items, int depth, List<Str
210210
}
211211
}
212212

213-
/**
214-
* Built-in marker cascade used when a nested item has no
215-
* {@code marker} override and the list builder didn't set one for
216-
* this depth via {@code markerFor(int, ListMarker)}.
217-
*/
218-
private static ListMarker defaultMarkerForDepth(int depth) {
219-
return switch (depth) {
220-
case 0 -> ListMarker.bullet(); // •
221-
case 1 -> new ListMarker("◦"); // ◦
222-
case 2 -> new ListMarker("▪"); // ▪
223-
default -> new ListMarker("·"); // ·
224-
};
225-
}
226-
227213
/**
228214
* Splits a prepared list at whole-item boundaries, falling back to
229215
* splitting the first item's lines when no whole item fits.
@@ -350,7 +336,7 @@ private static PreparedListLayout prepareListLayout(ListNode node,
350336
boolean markdownEnabled) {
351337
List<PreparedListItemLayout> items = new ArrayList<>();
352338
for (String item : node.items()) {
353-
String normalizedItem = normalizeListItem(item, node.normalizeMarkers());
339+
String normalizedItem = ListMarker.normalizeItemText(item, node.normalizeMarkers());
354340
if (normalizedItem.isBlank()) {
355341
continue;
356342
}
@@ -517,33 +503,6 @@ private static double listItemsHeight(List<PreparedListItemLayout> items, double
517503
return total;
518504
}
519505

520-
private static String normalizeListItem(String value, boolean normalizeMarkers) {
521-
String safe = value == null ? "" : value;
522-
if (!normalizeMarkers) {
523-
// Preserve raw whitespace and any author-supplied marker
524-
// characters. Used by the nested-list flatten path so the
525-
// depth-based indent prefix survives layout.
526-
return safe;
527-
}
528-
String normalized = safe.trim();
529-
if (normalized.isEmpty()) {
530-
return normalized;
531-
}
532-
if (normalized.startsWith("•")) {
533-
return normalized.substring(1).trim();
534-
}
535-
if (normalized.startsWith("- ")) {
536-
return normalized.substring(2).trim();
537-
}
538-
if (normalized.startsWith("+ ")) {
539-
return normalized.substring(2).trim();
540-
}
541-
if (normalized.startsWith("* ") && !normalized.startsWith("**")) {
542-
return normalized.substring(2).trim();
543-
}
544-
return normalized;
545-
}
546-
547506
// ------------------------------------------------------------------
548507
// Paragraph layout core
549508
// ------------------------------------------------------------------

src/main/java/com/demcha/compose/document/node/ListMarker.java

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,70 @@ public static ListMarker custom(String marker) {
5656
return new ListMarker(marker);
5757
}
5858

59+
/**
60+
* Returns the built-in marker for a nested-list depth, used when neither
61+
* the item itself nor {@code ListBuilder.markerFor(depth, ...)} supplied
62+
* one: {@code •} at depth 0, {@code ◦} at depth 1, {@code ▪} at depth 2,
63+
* and {@code ·} below that. Fixed-layout rendering and the semantic DOCX
64+
* export both resolve their fallback through this single cascade so the
65+
* two outputs of one session always agree.
66+
*
67+
* @param depth zero-based nesting depth
68+
* @return default marker for the depth
69+
* @since 1.8.0
70+
*/
71+
public static ListMarker defaultForDepth(int depth) {
72+
return switch (depth) {
73+
case 0 -> bullet(); // •
74+
case 1 -> new ListMarker("◦"); // ◦
75+
case 2 -> new ListMarker("▪"); // ▪
76+
default -> new ListMarker("·"); // ·
77+
};
78+
}
79+
80+
/**
81+
* Normalizes an author-supplied flat list item before a marker prefix is
82+
* applied: trims the text and strips one leading author-typed marker
83+
* ({@code •}, {@code "- "}, {@code "+ "}, or {@code "* "} — but not a
84+
* {@code **bold} run) so the typed marker does not double up with the
85+
* rendered one. When {@code normalizeMarkers} is {@code false} the value
86+
* is returned unchanged apart from null-safety. A blank result means the
87+
* item carries no renderable content and should be skipped, matching
88+
* fixed-layout rendering.
89+
*
90+
* @param value raw author-supplied item text; {@code null} is
91+
* treated as empty
92+
* @param normalizeMarkers whether author-typed markers are stripped
93+
* @return normalized item text, possibly blank
94+
* @since 1.8.0
95+
*/
96+
public static String normalizeItemText(String value, boolean normalizeMarkers) {
97+
String safe = value == null ? "" : value;
98+
if (!normalizeMarkers) {
99+
// Preserve raw whitespace and any author-supplied marker
100+
// characters. Used by the nested-list flatten path so the
101+
// depth-based indent prefix survives layout.
102+
return safe;
103+
}
104+
String normalized = safe.trim();
105+
if (normalized.isEmpty()) {
106+
return normalized;
107+
}
108+
if (normalized.startsWith("•")) {
109+
return normalized.substring(1).trim();
110+
}
111+
if (normalized.startsWith("- ")) {
112+
return normalized.substring(2).trim();
113+
}
114+
if (normalized.startsWith("+ ")) {
115+
return normalized.substring(2).trim();
116+
}
117+
if (normalized.startsWith("* ") && !normalized.startsWith("**")) {
118+
return normalized.substring(2).trim();
119+
}
120+
return normalized;
121+
}
122+
59123
/**
60124
* Returns {@code true} when this marker has non-whitespace content.
61125
*
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
package com.demcha.compose.document.backend.semantic;
2+
3+
import com.demcha.compose.GraphCompose;
4+
import com.demcha.compose.document.api.DocumentSession;
5+
import com.demcha.compose.document.node.ListMarker;
6+
import com.demcha.compose.document.style.DocumentInsets;
7+
import org.apache.poi.xwpf.usermodel.XWPFDocument;
8+
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
9+
import org.junit.jupiter.api.Test;
10+
import org.junit.jupiter.api.condition.DisabledIfSystemProperty;
11+
12+
import java.io.ByteArrayInputStream;
13+
import java.util.List;
14+
import java.util.function.Consumer;
15+
16+
import static org.assertj.core.api.Assertions.assertThat;
17+
18+
/**
19+
* PDF ↔ DOCX list parity: the semantic Word export must resolve markers and
20+
* item text through the same shared rules as fixed-layout rendering — the
21+
* {@link ListMarker#defaultForDepth(int)} cascade for nested fallbacks and
22+
* {@link ListMarker#normalizeItemText(String, boolean)} for flat items —
23+
* so both outputs of one session agree.
24+
*/
25+
@DisabledIfSystemProperty(named = "no.poi", matches = "true",
26+
disabledReason = "DocxSemanticBackend requires poi-ooxml; the no-poi profile validates the rest of the suite without it")
27+
class DocxListParityTest {
28+
29+
@Test
30+
void nestedFallbackFollowsTheDepthCascade() throws Exception {
31+
List<String> texts = exportTexts(flow -> flow
32+
.addList(list -> list
33+
.name("Outline")
34+
.addItem("alpha", l1 -> l1
35+
.addItem("beta", l2 -> l2
36+
.addItem("gamma")))));
37+
38+
assertThat(texts).contains("• alpha", " ◦ beta", " ▪ gamma");
39+
}
40+
41+
@Test
42+
void explicitMarkersStillBeatTheCascade() throws Exception {
43+
List<String> texts = exportTexts(flow -> flow
44+
.addList(list -> list
45+
.name("Outline")
46+
.markerFor(1, ListMarker.custom("→"))
47+
.addItem("alpha", l1 -> l1.addItem("beta"))));
48+
49+
assertThat(texts).contains(" → beta");
50+
assertThat(texts).doesNotContain(" ◦ beta");
51+
}
52+
53+
@Test
54+
void flatItemsStripAuthorTypedMarkers() throws Exception {
55+
List<String> texts = exportTexts(flow -> flow
56+
.addList("- dashed", "• bulleted", "* starred", "+ plussed"));
57+
58+
assertThat(texts).contains("• dashed", "• bulleted", "• starred", "• plussed");
59+
assertThat(texts).noneMatch(t -> t.startsWith("• - ") || t.startsWith("• • "));
60+
}
61+
62+
@Test
63+
void boldLeadIsNotMistakenForAMarker() throws Exception {
64+
List<String> texts = exportTexts(flow -> flow
65+
.addList("**bold** lead stays intact"));
66+
67+
assertThat(texts).contains("• **bold** lead stays intact");
68+
}
69+
70+
@Test
71+
void blankFlatItemsAreDropped() throws Exception {
72+
List<String> texts = exportTexts(flow -> flow
73+
.addList("kept", "", " "));
74+
75+
assertThat(texts).contains("• kept");
76+
// No marker-only paragraphs for the blank items.
77+
assertThat(texts).noneMatch(t -> t.trim().equals("•"));
78+
}
79+
80+
@Test
81+
void normalizeMarkersFalsePreservesRawItems() throws Exception {
82+
List<String> texts = exportTexts(flow -> flow
83+
.addList(list -> list
84+
.name("Raw")
85+
.normalizeMarkers(false)
86+
.items("- raw dash survives")));
87+
88+
assertThat(texts).contains("• - raw dash survives");
89+
}
90+
91+
private static List<String> exportTexts(
92+
Consumer<com.demcha.compose.document.dsl.PageFlowBuilder> author) throws Exception {
93+
byte[] docxBytes;
94+
try (DocumentSession session = GraphCompose.document()
95+
.pageSize(595, 842)
96+
.margin(DocumentInsets.of(36))
97+
.create()) {
98+
var flow = session.dsl().pageFlow().name("Flow");
99+
author.accept(flow);
100+
flow.build();
101+
docxBytes = session.export(new DocxSemanticBackend());
102+
}
103+
try (XWPFDocument document = new XWPFDocument(new ByteArrayInputStream(docxBytes))) {
104+
return document.getParagraphs().stream()
105+
.map(XWPFParagraph::getText)
106+
.toList();
107+
}
108+
}
109+
}

src/test/java/com/demcha/compose/document/backend/semantic/DocxSemanticBackendTest.java

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -111,12 +111,12 @@ void nestedListItemsIndentTwoSpacesPerDepth() throws Exception {
111111
List<String> texts = document.getParagraphs().stream()
112112
.map(XWPFParagraph::getText).toList();
113113
// Two spaces of indent per depth; without per-item markers the
114-
// semantic export falls back to the list's top-level bullet at
115-
// every level (the visual depth cascade is a layout-pass concern).
114+
// semantic export falls back to the same depth cascade the
115+
// fixed-layout pipeline uses (• ◦ ▪), so PDF and DOCX agree.
116116
assertThat(texts).contains(
117117
"• Level zero",
118-
" Level one",
119-
" Level two");
118+
" Level one",
119+
" Level two");
120120
}
121121
}
122122

@@ -139,10 +139,12 @@ void nestedListItemsKeepTheirCustomMarkers() throws Exception {
139139
try (XWPFDocument document = new XWPFDocument(new ByteArrayInputStream(docxBytes))) {
140140
List<String> texts = document.getParagraphs().stream()
141141
.map(XWPFParagraph::getText).toList();
142-
// The top-level custom marker and the per-depth override both
143-
// survive the export.
142+
// The per-depth override survives; the flat-list marker("→") does
143+
// not leak into nested fallbacks — depth 0 takes the cascade
144+
// bullet exactly as fixed-layout rendering does (markerFor(0, ...)
145+
// is the way to control depth 0).
144146
assertThat(texts).contains(
145-
" Root",
147+
" Root",
146148
" ‣ Child");
147149
}
148150
}

0 commit comments

Comments
 (0)