Skip to content

Commit 1f6e72b

Browse files
committed
fix(compiler): support full range of entity decoding in browser builds
BREAKING CHANGE: compiler options have been adjusted. - new option `decodeEntities` is added. - `namedCharacterReferences` option has been removed. - `maxCRNameLength` option has been rmeoved.
1 parent 8c17535 commit 1f6e72b

File tree

11 files changed

+1423
-2987
lines changed

11 files changed

+1423
-2987
lines changed

packages/compiler-core/__tests__/__snapshots__/parse.spec.ts.snap

+1,178-2,506
Large diffs are not rendered by default.

packages/compiler-core/__tests__/parse.spec.ts

+2-270
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ import {
99
NodeTypes,
1010
Position,
1111
TextNode,
12-
AttributeNode,
1312
InterpolationNode
1413
} from '../src/ast'
1514

@@ -163,114 +162,6 @@ describe('compiler: parse', () => {
163162
}
164163
})
165164
})
166-
167-
test('HTML entities compatibility in text (https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state).', () => {
168-
const spy = jest.fn()
169-
const ast = baseParse('&ampersand;', {
170-
namedCharacterReferences: { amp: '&' },
171-
onError: spy
172-
})
173-
const text = ast.children[0] as TextNode
174-
175-
expect(text).toStrictEqual({
176-
type: NodeTypes.TEXT,
177-
content: '&ersand;',
178-
loc: {
179-
start: { offset: 0, line: 1, column: 1 },
180-
end: { offset: 11, line: 1, column: 12 },
181-
source: '&ampersand;'
182-
}
183-
})
184-
expect(spy.mock.calls).toMatchObject([
185-
[
186-
{
187-
code: ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
188-
loc: {
189-
start: { offset: 4, line: 1, column: 5 }
190-
}
191-
}
192-
]
193-
])
194-
})
195-
196-
test('HTML entities compatibility in attribute (https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state).', () => {
197-
const spy = jest.fn()
198-
const ast = baseParse(
199-
'<div a="&ampersand;" b="&amp;ersand;" c="&amp!"></div>',
200-
{
201-
namedCharacterReferences: { amp: '&', 'amp;': '&' },
202-
onError: spy
203-
}
204-
)
205-
const element = ast.children[0] as ElementNode
206-
const text1 = (element.props[0] as AttributeNode).value
207-
const text2 = (element.props[1] as AttributeNode).value
208-
const text3 = (element.props[2] as AttributeNode).value
209-
210-
expect(text1).toStrictEqual({
211-
type: NodeTypes.TEXT,
212-
content: '&ampersand;',
213-
loc: {
214-
start: { offset: 7, line: 1, column: 8 },
215-
end: { offset: 20, line: 1, column: 21 },
216-
source: '"&ampersand;"'
217-
}
218-
})
219-
expect(text2).toStrictEqual({
220-
type: NodeTypes.TEXT,
221-
content: '&ersand;',
222-
loc: {
223-
start: { offset: 23, line: 1, column: 24 },
224-
end: { offset: 37, line: 1, column: 38 },
225-
source: '"&amp;ersand;"'
226-
}
227-
})
228-
expect(text3).toStrictEqual({
229-
type: NodeTypes.TEXT,
230-
content: '&!',
231-
loc: {
232-
start: { offset: 40, line: 1, column: 41 },
233-
end: { offset: 47, line: 1, column: 48 },
234-
source: '"&amp!"'
235-
}
236-
})
237-
expect(spy.mock.calls).toMatchObject([
238-
[
239-
{
240-
code: ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
241-
loc: {
242-
start: { offset: 45, line: 1, column: 46 }
243-
}
244-
}
245-
]
246-
])
247-
})
248-
249-
test('Some control character reference should be replaced.', () => {
250-
const spy = jest.fn()
251-
const ast = baseParse('&#x86;', { onError: spy })
252-
const text = ast.children[0] as TextNode
253-
254-
expect(text).toStrictEqual({
255-
type: NodeTypes.TEXT,
256-
content: '†',
257-
loc: {
258-
start: { offset: 0, line: 1, column: 1 },
259-
end: { offset: 6, line: 1, column: 7 },
260-
source: '&#x86;'
261-
}
262-
})
263-
expect(spy.mock.calls).toMatchObject([
264-
[
265-
{
266-
code: ErrorCodes.CONTROL_CHARACTER_REFERENCE,
267-
loc: {
268-
start: { offset: 0, line: 1, column: 1 }
269-
}
270-
}
271-
]
272-
])
273-
})
274165
})
275166

276167
describe('Interpolation', () => {
@@ -1652,12 +1543,10 @@ foo
16521543
expect(baz.loc.end).toEqual({ line: 2, column: 28, offset })
16531544
})
16541545

1655-
describe('namedCharacterReferences option', () => {
1546+
describe('decodeEntities option', () => {
16561547
test('use the given map', () => {
16571548
const ast: any = baseParse('&amp;&cups;', {
1658-
namedCharacterReferences: {
1659-
'cups;': '\u222A\uFE00' // UNION with serifs
1660-
},
1549+
decodeEntities: text => text.replace('&cups;', '\u222A\uFE00'),
16611550
onError: () => {} // Ignore errors
16621551
})
16631552

@@ -1756,60 +1645,6 @@ foo
17561645
errors: []
17571646
}
17581647
],
1759-
ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE: [
1760-
{
1761-
code: '<template>&#a;</template>',
1762-
errors: [
1763-
{
1764-
type: ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
1765-
loc: { offset: 10, line: 1, column: 11 }
1766-
}
1767-
]
1768-
},
1769-
{
1770-
code: '<template>&#xg;</template>',
1771-
errors: [
1772-
{
1773-
type: ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
1774-
loc: { offset: 10, line: 1, column: 11 }
1775-
}
1776-
]
1777-
},
1778-
{
1779-
code: '<template>&#99;</template>',
1780-
errors: []
1781-
},
1782-
{
1783-
code: '<template>&#xff;</template>',
1784-
errors: []
1785-
},
1786-
{
1787-
code: '<template attr="&#a;"></template>',
1788-
errors: [
1789-
{
1790-
type: ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
1791-
loc: { offset: 16, line: 1, column: 17 }
1792-
}
1793-
]
1794-
},
1795-
{
1796-
code: '<template attr="&#xg;"></template>',
1797-
errors: [
1798-
{
1799-
type: ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
1800-
loc: { offset: 16, line: 1, column: 17 }
1801-
}
1802-
]
1803-
},
1804-
{
1805-
code: '<template attr="&#99;"></template>',
1806-
errors: []
1807-
},
1808-
{
1809-
code: '<template attr="&#xff;"></template>',
1810-
errors: []
1811-
}
1812-
],
18131648
CDATA_IN_HTML_CONTENT: [
18141649
{
18151650
code: '<template><![CDATA[cdata]]></template>',
@@ -1825,37 +1660,6 @@ foo
18251660
errors: []
18261661
}
18271662
],
1828-
CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE: [
1829-
{
1830-
code: '<template>&#1234567;</template>',
1831-
errors: [
1832-
{
1833-
type: ErrorCodes.CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE,
1834-
loc: { offset: 10, line: 1, column: 11 }
1835-
}
1836-
]
1837-
}
1838-
],
1839-
CONTROL_CHARACTER_REFERENCE: [
1840-
{
1841-
code: '<template>&#0003;</template>',
1842-
errors: [
1843-
{
1844-
type: ErrorCodes.CONTROL_CHARACTER_REFERENCE,
1845-
loc: { offset: 10, line: 1, column: 11 }
1846-
}
1847-
]
1848-
},
1849-
{
1850-
code: '<template>&#x7F;</template>',
1851-
errors: [
1852-
{
1853-
type: ErrorCodes.CONTROL_CHARACTER_REFERENCE,
1854-
loc: { offset: 10, line: 1, column: 11 }
1855-
}
1856-
]
1857-
}
1858-
],
18591663
DUPLICATE_ATTRIBUTE: [
18601664
{
18611665
code: '<template><div id="" id=""></div></template>',
@@ -2412,36 +2216,6 @@ foo
24122216
]
24132217
}
24142218
],
2415-
MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE: [
2416-
{
2417-
code: '<template>&amp</template>',
2418-
options: { namedCharacterReferences: { amp: '&' } },
2419-
errors: [
2420-
{
2421-
type: ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
2422-
loc: { offset: 14, line: 1, column: 15 }
2423-
}
2424-
]
2425-
},
2426-
{
2427-
code: '<template>&#40</template>',
2428-
errors: [
2429-
{
2430-
type: ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
2431-
loc: { offset: 14, line: 1, column: 15 }
2432-
}
2433-
]
2434-
},
2435-
{
2436-
code: '<template>&#x40</template>',
2437-
errors: [
2438-
{
2439-
type: ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
2440-
loc: { offset: 15, line: 1, column: 16 }
2441-
}
2442-
]
2443-
}
2444-
],
24452219
MISSING_WHITESPACE_BETWEEN_ATTRIBUTES: [
24462220
{
24472221
code: '<template><div id="foo"class="bar"></div></template>',
@@ -2500,48 +2274,6 @@ foo
25002274
]
25012275
}
25022276
],
2503-
NONCHARACTER_CHARACTER_REFERENCE: [
2504-
{
2505-
code: '<template>&#xFFFE;</template>',
2506-
errors: [
2507-
{
2508-
type: ErrorCodes.NONCHARACTER_CHARACTER_REFERENCE,
2509-
loc: { offset: 10, line: 1, column: 11 }
2510-
}
2511-
]
2512-
},
2513-
{
2514-
code: '<template>&#x1FFFF;</template>',
2515-
errors: [
2516-
{
2517-
type: ErrorCodes.NONCHARACTER_CHARACTER_REFERENCE,
2518-
loc: { offset: 10, line: 1, column: 11 }
2519-
}
2520-
]
2521-
}
2522-
],
2523-
NULL_CHARACTER_REFERENCE: [
2524-
{
2525-
code: '<template>&#0000;</template>',
2526-
errors: [
2527-
{
2528-
type: ErrorCodes.NULL_CHARACTER_REFERENCE,
2529-
loc: { offset: 10, line: 1, column: 11 }
2530-
}
2531-
]
2532-
}
2533-
],
2534-
SURROGATE_CHARACTER_REFERENCE: [
2535-
{
2536-
code: '<template>&#xD800;</template>',
2537-
errors: [
2538-
{
2539-
type: ErrorCodes.SURROGATE_CHARACTER_REFERENCE,
2540-
loc: { offset: 10, line: 1, column: 11 }
2541-
}
2542-
]
2543-
}
2544-
],
25452277
UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME: [
25462278
{
25472279
code: "<template><div a\"bc=''></div></template>",

packages/compiler-core/src/errors.ts

-21
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,7 @@ export function createCompilerError<T extends number>(
3232
export const enum ErrorCodes {
3333
// parse errors
3434
ABRUPT_CLOSING_OF_EMPTY_COMMENT,
35-
ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
3635
CDATA_IN_HTML_CONTENT,
37-
CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE,
38-
CONTROL_CHARACTER_REFERENCE,
3936
DUPLICATE_ATTRIBUTE,
4037
END_TAG_WITH_ATTRIBUTES,
4138
END_TAG_WITH_TRAILING_SOLIDUS,
@@ -49,12 +46,8 @@ export const enum ErrorCodes {
4946
INVALID_FIRST_CHARACTER_OF_TAG_NAME,
5047
MISSING_ATTRIBUTE_VALUE,
5148
MISSING_END_TAG_NAME,
52-
MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
5349
MISSING_WHITESPACE_BETWEEN_ATTRIBUTES,
5450
NESTED_COMMENT,
55-
NONCHARACTER_CHARACTER_REFERENCE,
56-
NULL_CHARACTER_REFERENCE,
57-
SURROGATE_CHARACTER_REFERENCE,
5851
UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
5952
UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE,
6053
UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME,
@@ -101,14 +94,8 @@ export const enum ErrorCodes {
10194
export const errorMessages: { [code: number]: string } = {
10295
// parse errors
10396
[ErrorCodes.ABRUPT_CLOSING_OF_EMPTY_COMMENT]: 'Illegal comment.',
104-
[ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE]:
105-
'Illegal numeric character reference: invalid character.',
10697
[ErrorCodes.CDATA_IN_HTML_CONTENT]:
10798
'CDATA section is allowed only in XML context.',
108-
[ErrorCodes.CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE]:
109-
'Illegal numeric character reference: too big.',
110-
[ErrorCodes.CONTROL_CHARACTER_REFERENCE]:
111-
'Illegal numeric character reference: control character.',
11299
[ErrorCodes.DUPLICATE_ATTRIBUTE]: 'Duplicate attribute.',
113100
[ErrorCodes.END_TAG_WITH_ATTRIBUTES]: 'End tag cannot have attributes.',
114101
[ErrorCodes.END_TAG_WITH_TRAILING_SOLIDUS]: "Illegal '/' in tags.",
@@ -124,17 +111,9 @@ export const errorMessages: { [code: number]: string } = {
124111
"Illegal tag name. Use '&lt;' to print '<'.",
125112
[ErrorCodes.MISSING_ATTRIBUTE_VALUE]: 'Attribute value was expected.',
126113
[ErrorCodes.MISSING_END_TAG_NAME]: 'End tag name was expected.',
127-
[ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE]:
128-
'Semicolon was expected.',
129114
[ErrorCodes.MISSING_WHITESPACE_BETWEEN_ATTRIBUTES]:
130115
'Whitespace was expected.',
131116
[ErrorCodes.NESTED_COMMENT]: "Unexpected '<!--' in comment.",
132-
[ErrorCodes.NONCHARACTER_CHARACTER_REFERENCE]:
133-
'Illegal numeric character reference: non character.',
134-
[ErrorCodes.NULL_CHARACTER_REFERENCE]:
135-
'Illegal numeric character reference: null character.',
136-
[ErrorCodes.SURROGATE_CHARACTER_REFERENCE]:
137-
'Illegal numeric character reference: non-pair surrogate.',
138117
[ErrorCodes.UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME]:
139118
'Attribute name cannot contain U+0022 ("), U+0027 (\'), and U+003C (<).',
140119
[ErrorCodes.UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE]:

packages/compiler-core/src/options.ts

+1-7
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,7 @@ export interface ParserOptions {
2626
parent: ElementNode | undefined
2727
) => TextModes
2828
delimiters?: [string, string] // ['{{', '}}']
29-
30-
// Map to HTML entities. E.g., `{ "amp;": "&" }`
31-
// The full set is https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references
32-
namedCharacterReferences?: Record<string, string>
33-
// this number is based on the map above, but it should be pre-computed
34-
// to avoid the cost on every parse() call.
35-
maxCRNameLength?: number
29+
decodeEntities?: (rawText: string, asAttr: boolean) => string
3630
onError?: (error: CompilerError) => void
3731
}
3832

0 commit comments

Comments
 (0)