Skip to content
Open
18 changes: 18 additions & 0 deletions .changeset/ninety-dogs-grow.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
"@react-pdf/pdfkit": minor
"@react-pdf/textkit": minor
---

Fix and rework the hyphenation algorithm, and allow custom word hyphenation algorithms to specify whether a hyphen should be inserted in case the word is wrapped.

**Caution**: If you have been using a custom hyphenation callback - which hasn't been working properly since at least version 2.0.21 - then you will need to change your implementation to leave a soft hyphen character (`'\u00AD'`) at the end of syllables where you want react-pdf to insert a hyphen when wrapping lines. Syllables without a final soft hyphen character will still be able to break, but will not produce a hyphen character at the end of the line.

This allows you to break correctly on normal hyphens or other special characters in your text. For example, to use the default english-language syllable breaking built into react-pdf, but also break after hyphens naturally occurring in your text (such as is often present in hyperlinks), you could use the following hyphenation callback:
```js
import { Font } from '@react-pdf/renderer';

Font.registerHyphenationCallback((word, originalHyphenationCallback) => {
return originalHyphenationCallback(word).flatMap(w => w.split(/(?<=-)/))
})
```
(`flatMap` requires at least ES2019)
9 changes: 9 additions & 0 deletions packages/font/tests/standard-fonts.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -266,4 +266,13 @@ describe('standard fonts', () => {

expect(font.src).toBe('Helvetica-BoldOblique');
});

it('should resolve advanceWidth of soft hyphen to be zero', () => {
const SOFT_HYPHEN = '\u00AD';
const fontStore = new FontStore();

const font = fontStore.getFont({ fontFamily: 'Helvetica' });

expect(font.data.encode(SOFT_HYPHEN)[1][0].advanceWidth).toBe(0);
});
});
1 change: 1 addition & 0 deletions packages/pdfkit/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"url": "http://badassjs.com/"
},
"scripts": {
"test": "vitest",
"clear": "rimraf ./lib ./src/font/data/*.json",
"parse:afm": "node ./src/font/data/compressData.js",
"build": "npm run clear && npm run parse:afm && rollup -c ",
Expand Down
2 changes: 1 addition & 1 deletion packages/pdfkit/src/font/afm.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ oe .notdef zcaron ydieresis
space exclamdown cent sterling
currency yen brokenbar section
dieresis copyright ordfeminine guillemotleft
logicalnot hyphen registered macron
logicalnot softhyphen registered macron
degree plusminus twosuperior threesuperior
acute mu paragraph periodcentered
cedilla onesuperior ordmasculine guillemotright
Expand Down
12 changes: 12 additions & 0 deletions packages/pdfkit/tests/font/standard.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import { describe, expect, it } from 'vitest';

import StandardFont from '../../src/font.js';

describe('standard fonts', () => {
it('should resolve advanceWidth of soft hyphen to be zero', () => {
const SOFT_HYPHEN = '\u00AD';
const font = StandardFont.open({}, 'Helvetica', 'Helvetica', 'foobar');

expect(font.encode(SOFT_HYPHEN)[1][0].advanceWidth).toBe(0);
});
});
20 changes: 18 additions & 2 deletions packages/textkit/src/engines/linebreaker/bestFit.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@ import { Node } from './types';

const INFINITY = 10000;

const skipPastGlueAndPenalty = (nodes: Node[], start: number): Node => {
let j = start + 1;
for (; j < nodes.length; j++) {
if (nodes[j].type !== 'glue' && nodes[j].type !== 'penalty') {
break;
}
}
return nodes[j - 1];
};

const getNextBreakpoint = (
subnodes: Node[],
widths: number[],
Expand Down Expand Up @@ -37,6 +47,8 @@ const getNextBreakpoint = (
return 0;
};

let hyphenWidth = 0;

for (let i = 0; i < subnodes.length; i += 1) {
const node = subnodes[i];

Expand All @@ -50,7 +62,11 @@ const getNextBreakpoint = (
sum.shrink += node.shrink;
}

if (sum.width - sum.shrink > lineLength) {
const potentialEndOfLine = skipPastGlueAndPenalty(subnodes, i);
hyphenWidth =
potentialEndOfLine.type === 'penalty' ? potentialEndOfLine.width : 0;

if (sum.width - sum.shrink + hyphenWidth > lineLength) {
if (position === null) {
let j = i === 0 ? i + 1 : i;

Expand Down Expand Up @@ -78,7 +94,7 @@ const getNextBreakpoint = (
}
}

return sum.width - sum.shrink > lineLength ? position : null;
return sum.width - sum.shrink + hyphenWidth > lineLength ? position : null;
};

const applyBestFit = (nodes: Node[], widths: number[]): number[] => {
Expand Down
42 changes: 36 additions & 6 deletions packages/textkit/src/engines/linebreaker/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ import insertGlyph from '../../attributedString/insertGlyph';
import advanceWidthBetween from '../../attributedString/advanceWidthBetween';
import { AttributedString, Attributes, LayoutOptions } from '../../types';
import { Node } from './types';
import generateGlyphs from '../../layout/generateGlyphs';

const HYPHEN = 0x002d;
const SOFT_HYPHEN = '\u00AD';
const HYPHEN_CODE_POINT = 0x002d;
const TOLERANCE_STEPS = 5;
const TOLERANCE_LIMIT = 50;

Expand Down Expand Up @@ -45,23 +47,49 @@ const breakLines = (
end = prevNode.end;

line = slice(start, end, attributedString);
line = insertGlyph(line.string.length, HYPHEN, line);
if (node.width > 0) {
// A non-zero-width penalty indicates an additional hyphen should be inserted
line = insertGlyph(line.string.length, HYPHEN_CODE_POINT, line);
}
} else {
end = node.end;
line = slice(start, end, attributedString);
}

start = end;

return [...acc, line];
return [...acc, removeSoftHyphens(line)];
}, []);

// Last line
lines.push(slice(start, attributedString.string.length, attributedString));
const lastLine = slice(
start,
attributedString.string.length,
attributedString,
);
lines.push(removeSoftHyphens(lastLine));

return lines;
};

/**
* Remove all soft hyphen characters from the line.
* Soft hyphens are not relevant anymore after line breaking, and will only
* disrupt the rendering later down the line if left in the text.
*
* @param line
*/
const removeSoftHyphens = (line: AttributedString): AttributedString => {
const modifiedLine = {
...line,
string: line.string.split(SOFT_HYPHEN).join(''),
};

return {
...modifiedLine,
...generateGlyphs()(modifiedLine),
};
};

/**
* Return Knuth & Plass nodes based on line and previously calculated syllables
*
Expand All @@ -78,6 +106,7 @@ const getNodes = (
let start = 0;

const hyphenWidth = 5;
const softHyphen = '\u00ad';

const { syllables } = attributedString;

Expand Down Expand Up @@ -107,7 +136,8 @@ const getNodes = (

if (syllables[index + 1] && hyphenated) {
// Add penalty node. Penalty nodes are used to represent hyphenation points.
acc.push(knuthPlass.penalty(hyphenWidth, hyphenPenalty, 1));
const penaltyWidth = s.endsWith(softHyphen) ? hyphenWidth : 0;
acc.push(knuthPlass.penalty(penaltyWidth, hyphenPenalty, 1));
}
}

Expand Down
2 changes: 1 addition & 1 deletion packages/textkit/src/engines/wordHyphenation/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ const hyphenator = hyphen(pattern);
* @returns Word parts
*/
const splitHyphen = (word: string) => {
return word.split(SOFT_HYPHEN);
return word.split(new RegExp(`(?<=${SOFT_HYPHEN})`));
};

const cache: Record<string, string[]> = {};
Expand Down
2 changes: 1 addition & 1 deletion packages/textkit/src/layout/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ const layoutEngine = (engines: Engines) => {
resolveYOffset(),
resolveAttachments(),
verticalAlignment(),
wrapWords(engines, options),
generateGlyphs(),
wrapWords(engines, options),
bidiMirroring(),
preprocessRuns(engines),
);
Expand Down
9 changes: 4 additions & 5 deletions packages/textkit/src/layout/wrapWords.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,9 @@ const wrapWords = (
const syllables = [];
const fragments = [];

const hyphenateWord =
options.hyphenationCallback ||
engines.wordHyphenation?.() ||
defaultHyphenationEngine;
const builtinHyphenateWord =
engines.wordHyphenation?.() || defaultHyphenationEngine;
const hyphenateWord = options.hyphenationCallback || builtinHyphenateWord;

for (let i = 0; i < attributedString.runs.length; i += 1) {
let string = '';
Expand All @@ -44,7 +43,7 @@ const wrapWords = (

for (let j = 0; j < words.length; j += 1) {
const word = words[j];
const parts = hyphenateWord(word);
const parts = hyphenateWord(word, builtinHyphenateWord);

syllables.push(...parts);
string += parts.join('');
Expand Down
5 changes: 4 additions & 1 deletion packages/textkit/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,10 @@ export type Fragment = {
export type Paragraph = AttributedString[];

export type LayoutOptions = {
hyphenationCallback?: (word: string) => string[];
hyphenationCallback?: (
word: string | null,
originalHyphenationCallback: (word: string | null) => string[],
) => string[];
tolerance?: number;
hyphenationPenalty?: number;
expandCharFactor?: JustificationFactor;
Expand Down
Loading