Skip to content

Commit ca67d2e

Browse files
authored
Formatter: Preserve content in white-space significant elements (#1280)
This pull request updates the formatter to detect `white-space` significant elements by either looking at the `style` attribute and seeing if it includes the `white-space:` property with the relevant value, or if it includes one of the Tailwind whitespace-preserving classes. Additionally, it moves some of the AST utils related to attributes from the linter package to `core` so we can also use them in the formatter. Resolves #1095
1 parent 6433a26 commit ca67d2e

34 files changed

+567
-428
lines changed

javascript/packages/core/src/ast-utils.ts

Lines changed: 272 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ import {
1414
HTMLElementNode,
1515
HTMLOpenTagNode,
1616
HTMLCloseTagNode,
17+
HTMLAttributeNode,
1718
HTMLAttributeNameNode,
19+
HTMLAttributeValueNode,
1820
HTMLCommentNode
1921
} from "./nodes.js"
2022

@@ -24,8 +26,13 @@ import {
2426
isERBNode,
2527
isERBContentNode,
2628
isHTMLCommentNode,
29+
isHTMLElementNode,
30+
isHTMLOpenTagNode,
31+
isHTMLAttributeNameNode,
32+
isHTMLAttributeValueNode,
2733
areAllOfType,
28-
filterLiteralNodes
34+
filterLiteralNodes,
35+
filterHTMLAttributeNodes
2936
} from "./node-type-guards.js"
3037

3138
import type { Location } from "./location.js"
@@ -208,10 +215,26 @@ export function getCombinedAttributeName(attributeNameNode: HTMLAttributeNameNod
208215
}
209216

210217
/**
211-
* Gets the tag name of an HTML element node
218+
* Gets the tag name of an HTML element, open tag, or close tag node.
219+
* Returns null if the node is null/undefined.
212220
*/
213-
export function getTagName(node: HTMLElementNode | HTMLOpenTagNode | HTMLCloseTagNode): string {
214-
return node.tag_name?.value ?? ""
221+
export function getTagName(node: HTMLElementNode | HTMLOpenTagNode | HTMLCloseTagNode): string
222+
export function getTagName(node: HTMLElementNode | HTMLOpenTagNode | HTMLCloseTagNode | null | undefined): string | null
223+
export function getTagName(node: HTMLElementNode | HTMLOpenTagNode | HTMLCloseTagNode | null | undefined): string | null {
224+
if (!node) return null
225+
226+
return node.tag_name?.value ?? null
227+
}
228+
229+
/**
230+
* Gets the lowercased tag name of an HTML element, open tag, or close tag node.
231+
* Similar to `Element.localName` in the DOM API.
232+
* Returns null if the node is null/undefined.
233+
*/
234+
export function getTagLocalName(node: HTMLElementNode | HTMLOpenTagNode | HTMLCloseTagNode): string
235+
export function getTagLocalName(node: HTMLElementNode | HTMLOpenTagNode | HTMLCloseTagNode | null | undefined): string | null
236+
export function getTagLocalName(node: HTMLElementNode | HTMLOpenTagNode | HTMLCloseTagNode | null | undefined): string | null {
237+
return getTagName(node)?.toLowerCase() ?? null
215238
}
216239

217240
/**
@@ -221,6 +244,251 @@ export function isCommentNode(node: Node): node is HTMLCommentNode | ERBCommentN
221244
return isHTMLCommentNode(node) || isERBCommentNode(node)
222245
}
223246

247+
/**
248+
* Gets the open tag node from an HTMLElementNode, handling both regular and conditional open tags.
249+
* For conditional open tags, returns null.
250+
* If given an HTMLOpenTagNode directly, returns it as-is.
251+
*/
252+
export function getOpenTag(node: HTMLElementNode | HTMLOpenTagNode | null | undefined): HTMLOpenTagNode | null {
253+
if (!node) return null
254+
if (isHTMLOpenTagNode(node)) return node
255+
if (isHTMLElementNode(node)) return isHTMLOpenTagNode(node.open_tag) ? node.open_tag : null
256+
257+
return null
258+
}
259+
260+
/**
261+
* Gets attributes from an HTMLElementNode or HTMLOpenTagNode
262+
*/
263+
export function getAttributes(node: HTMLElementNode | HTMLOpenTagNode | null | undefined): HTMLAttributeNode[] {
264+
const openTag = getOpenTag(node)
265+
266+
return openTag ? filterHTMLAttributeNodes(openTag.children) : []
267+
}
268+
269+
/**
270+
* Gets the attribute name from an HTMLAttributeNode (lowercased)
271+
* Returns null if the attribute name contains dynamic content (ERB)
272+
*/
273+
export function getAttributeName(attributeNode: HTMLAttributeNode, lowercase = true): string | null {
274+
if (!isHTMLAttributeNameNode(attributeNode.name)) return null
275+
276+
const staticName = getStaticAttributeName(attributeNode.name)
277+
278+
if (!lowercase) return staticName
279+
280+
return staticName ? staticName.toLowerCase() : null
281+
}
282+
283+
/**
284+
* Checks if an attribute value contains only static content (no ERB).
285+
* Accepts an HTMLAttributeNode directly, or an element/open tag + attribute name.
286+
* Returns false for null/undefined input.
287+
*/
288+
export function hasStaticAttributeValue(attributeNode: HTMLAttributeNode | null | undefined): boolean
289+
export function hasStaticAttributeValue(node: HTMLElementNode | HTMLOpenTagNode | null | undefined, attributeName: string): boolean
290+
export function hasStaticAttributeValue(nodeOrAttribute: HTMLAttributeNode | HTMLElementNode | HTMLOpenTagNode | null | undefined, attributeName?: string): boolean {
291+
const attributeNode = attributeName
292+
? getAttribute(nodeOrAttribute as HTMLElementNode | HTMLOpenTagNode, attributeName)
293+
: nodeOrAttribute as HTMLAttributeNode | null | undefined
294+
295+
if (!attributeNode?.value?.children) return false
296+
297+
return attributeNode.value.children.every(isLiteralNode)
298+
}
299+
300+
/**
301+
* Gets the static string value of an attribute (returns null if it contains ERB).
302+
* Accepts an HTMLAttributeNode directly, or an element/open tag + attribute name.
303+
* Returns null for null/undefined input.
304+
*/
305+
export function getStaticAttributeValue(attributeNode: HTMLAttributeNode | null | undefined): string | null
306+
export function getStaticAttributeValue(node: HTMLElementNode | HTMLOpenTagNode | null | undefined, attributeName: string): string | null
307+
export function getStaticAttributeValue(nodeOrAttribute: HTMLAttributeNode | HTMLElementNode | HTMLOpenTagNode | null | undefined, attributeName?: string): string | null {
308+
const attributeNode = attributeName
309+
? getAttribute(nodeOrAttribute as HTMLElementNode | HTMLOpenTagNode, attributeName)
310+
: nodeOrAttribute as HTMLAttributeNode | null | undefined
311+
312+
if (!attributeNode) return null
313+
if (!hasStaticAttributeValue(attributeNode)) return null
314+
315+
const valueNode = attributeNode.value
316+
if (!valueNode) return null
317+
318+
return filterLiteralNodes(valueNode.children).map(child => child.content).join("") || ""
319+
}
320+
321+
/**
322+
* Splits a space-separated attribute value into individual tokens.
323+
* Accepts a string, or an element/open tag + attribute name to look up.
324+
* Returns an empty array for null/undefined/empty input.
325+
*/
326+
export function getTokenList(value: string | null | undefined): string[]
327+
export function getTokenList(node: HTMLElementNode | HTMLOpenTagNode | null | undefined, attributeName: string): string[]
328+
export function getTokenList(valueOrNode: string | HTMLElementNode | HTMLOpenTagNode | null | undefined, attributeName?: string): string[] {
329+
const value = attributeName
330+
? getStaticAttributeValue(valueOrNode as HTMLElementNode | HTMLOpenTagNode, attributeName)
331+
: valueOrNode as string | null | undefined
332+
333+
if (!value) return []
334+
335+
return value.trim().split(/\s+/).filter(token => token.length > 0)
336+
}
337+
338+
/**
339+
* Finds an attribute by name in a list of attribute nodes
340+
*/
341+
export function findAttributeByName(attributes: Node[], attributeName: string): HTMLAttributeNode | null {
342+
for (const attribute of filterHTMLAttributeNodes(attributes)) {
343+
const name = getAttributeName(attribute)
344+
345+
if (name === attributeName.toLowerCase()) {
346+
return attribute
347+
}
348+
}
349+
350+
return null
351+
}
352+
353+
/**
354+
* Gets a specific attribute from an HTMLElementNode or HTMLOpenTagNode by name
355+
*/
356+
export function getAttribute(node: HTMLElementNode | HTMLOpenTagNode | null | undefined, attributeName: string): HTMLAttributeNode | null {
357+
const attributes = getAttributes(node)
358+
359+
return findAttributeByName(attributes, attributeName)
360+
}
361+
362+
/**
363+
* Checks if an element or open tag has a specific attribute
364+
*/
365+
export function hasAttribute(node: HTMLElementNode | HTMLOpenTagNode | null | undefined, attributeName: string): boolean {
366+
if (!node) return false
367+
368+
return getAttribute(node, attributeName) !== null
369+
}
370+
371+
/**
372+
* Checks if an attribute has a dynamic (ERB-containing) name.
373+
* Accepts an HTMLAttributeNode (wraps the core HTMLAttributeNameNode-level check).
374+
*/
375+
export function hasDynamicAttributeNameOnAttribute(attributeNode: HTMLAttributeNode): boolean {
376+
if (!isHTMLAttributeNameNode(attributeNode.name)) return false
377+
378+
return hasDynamicAttributeName(attributeNode.name)
379+
}
380+
381+
/**
382+
* Gets the combined string representation of an attribute name (including ERB syntax).
383+
* Accepts an HTMLAttributeNode (wraps the core HTMLAttributeNameNode-level check).
384+
*/
385+
export function getCombinedAttributeNameString(attributeNode: HTMLAttributeNode): string {
386+
if (!isHTMLAttributeNameNode(attributeNode.name)) return ""
387+
388+
return getCombinedAttributeName(attributeNode.name)
389+
}
390+
391+
/**
392+
* Checks if an attribute value contains dynamic content (ERB)
393+
*/
394+
export function hasDynamicAttributeValue(attributeNode: HTMLAttributeNode): boolean {
395+
if (!attributeNode.value?.children) return false
396+
397+
return attributeNode.value.children.some(isERBContentNode)
398+
}
399+
400+
/**
401+
* Gets the value nodes array from an attribute for dynamic inspection
402+
*/
403+
export function getAttributeValueNodes(attributeNode: HTMLAttributeNode): Node[] {
404+
return attributeNode.value?.children || []
405+
}
406+
407+
/**
408+
* Checks if an attribute value contains any static content (for validation purposes)
409+
*/
410+
export function hasStaticAttributeValueContent(attributeNode: HTMLAttributeNode): boolean {
411+
return hasStaticContent(getAttributeValueNodes(attributeNode))
412+
}
413+
414+
/**
415+
* Gets the static content of an attribute value (all literal parts combined).
416+
* Unlike getStaticAttributeValue, this extracts only the static portions from mixed content.
417+
* Returns the concatenated literal content, or null if no literal nodes exist.
418+
*/
419+
export function getStaticAttributeValueContent(attributeNode: HTMLAttributeNode): string | null {
420+
return getStaticContentFromNodes(getAttributeValueNodes(attributeNode))
421+
}
422+
423+
/**
424+
* Gets the combined attribute value including both static text and ERB tag syntax.
425+
* For ERB nodes, includes the full tag syntax (e.g., "<%= foo %>").
426+
* Returns null if the attribute has no value.
427+
*/
428+
export function getAttributeValue(attributeNode: HTMLAttributeNode): string | null {
429+
const valueNode = attributeNode.value
430+
if (!valueNode) return null
431+
432+
if (valueNode.type !== "AST_HTML_ATTRIBUTE_VALUE_NODE" || !valueNode.children?.length) {
433+
return null
434+
}
435+
436+
let result = ""
437+
438+
for (const child of valueNode.children) {
439+
if (isERBContentNode(child)) {
440+
if (child.content) {
441+
result += `${child.tag_opening?.value}${child.content.value}${child.tag_closing?.value}`
442+
}
443+
} else if (isLiteralNode(child)) {
444+
result += child.content
445+
}
446+
}
447+
448+
return result
449+
}
450+
451+
/**
452+
* Checks if an attribute has a value node
453+
*/
454+
export function hasAttributeValue(attributeNode: HTMLAttributeNode): boolean {
455+
return isHTMLAttributeValueNode(attributeNode.value)
456+
}
457+
458+
/**
459+
* Gets the quote type used for an attribute value
460+
*/
461+
export function getAttributeValueQuoteType(node: HTMLAttributeNode | HTMLAttributeValueNode): "single" | "double" | "none" | null {
462+
const valueNode = isHTMLAttributeValueNode(node) ? node : node.value
463+
if (!valueNode) return null
464+
465+
if (valueNode.quoted && valueNode.open_quote) {
466+
return valueNode.open_quote.value === '"' ? "double" : "single"
467+
}
468+
469+
return "none"
470+
}
471+
472+
/**
473+
* Checks if an attribute value is quoted
474+
*/
475+
export function isAttributeValueQuoted(attributeNode: HTMLAttributeNode): boolean {
476+
if (!isHTMLAttributeValueNode(attributeNode.value)) return false
477+
478+
return !!attributeNode.value.quoted
479+
}
480+
481+
/**
482+
* Iterates over all attributes of an element or open tag node
483+
*/
484+
export function forEachAttribute(node: HTMLElementNode | HTMLOpenTagNode, callback: (attributeNode: HTMLAttributeNode) => void): void {
485+
for (const attribute of getAttributes(node)) {
486+
callback(attribute)
487+
}
488+
}
489+
490+
// --- Position Utilities ---
491+
224492
/**
225493
* Compares two positions to determine if the first comes before the second
226494
* Returns true if pos1 comes before pos2 in source order

javascript/packages/formatter/src/format-helpers.ts

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { isNode, isERBNode, getTagName, isAnyOf, isERBControlFlowNode, hasERBOutput } from "@herb-tools/core"
1+
import { isNode, isERBNode, getTagName, isAnyOf, isERBControlFlowNode, hasERBOutput, getStaticAttributeValue, getTokenList } from "@herb-tools/core"
22
import { Node, HTMLDoctypeNode, HTMLTextNode, HTMLElementNode, HTMLCommentNode, HTMLOpenTagNode, HTMLCloseTagNode, ERBIfNode, ERBContentNode, WhitespaceNode } from "@herb-tools/core"
33

44
// --- Types ---
@@ -57,6 +57,22 @@ export const CONTENT_PRESERVING_ELEMENTS = new Set([
5757
'script', 'style', 'pre', 'textarea'
5858
])
5959

60+
// https://tailwindcss.com/docs/white-space
61+
export const WHITESPACE_PRESERVING_CLASSES = [
62+
'whitespace-pre-line',
63+
'whitespace-pre-wrap',
64+
'whitespace-pre',
65+
'whitespace-break-spaces',
66+
]
67+
68+
// https://developer.mozilla.org/en-US/docs/Web/CSS/white-space
69+
export const WHITESPACE_PRESERVING_STYLE_VALUES = new Set([
70+
'pre',
71+
'pre-line',
72+
'pre-wrap',
73+
'break-spaces',
74+
])
75+
6076
export const SPACEABLE_CONTAINERS = new Set([
6177
'div', 'section', 'article', 'main', 'header', 'footer', 'aside',
6278
'figure', 'details', 'summary', 'dialog', 'fieldset'
@@ -397,10 +413,31 @@ export function hasMixedTextAndInlineContent(children: Node[]): boolean {
397413
return (hasText && hasInlineElements) || (hasERBOutput(children) && hasText)
398414
}
399415

416+
export function hasWhitespacePreservingStyle(element: HTMLElementNode): boolean {
417+
if (getTokenList(element, "class").some(klass => WHITESPACE_PRESERVING_CLASSES.some(whitespace => klass.includes(whitespace)))) return true
418+
419+
const styleValue = getStaticAttributeValue(element, "style")
420+
if (styleValue) {
421+
const match = styleValue.match(/white-space\s*:\s*([^;!]+)/)
422+
423+
if (match) {
424+
const value = match[1].trim().toLowerCase()
425+
if (WHITESPACE_PRESERVING_STYLE_VALUES.has(value)) return true
426+
}
427+
}
428+
429+
return false
430+
}
431+
400432
export function isContentPreserving(element: HTMLElementNode | HTMLOpenTagNode | HTMLCloseTagNode): boolean {
401433
const tagName = getTagName(element)
434+
if (CONTENT_PRESERVING_ELEMENTS.has(tagName)) return true
402435

403-
return CONTENT_PRESERVING_ELEMENTS.has(tagName)
436+
if (isNode(element, HTMLElementNode)) {
437+
return hasWhitespacePreservingStyle(element)
438+
}
439+
440+
return false
404441
}
405442

406443
/**

0 commit comments

Comments
 (0)