Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(mdx-loader): refactor and fix heading to toc html value serialization #11004

Merged
merged 3 commits into from
Mar 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

import {toHeadingHTMLValue} from '../utils';
import type {Heading} from 'mdast';

describe('toHeadingHTMLValue', () => {
async function convert(heading: Heading): Promise<string> {
const {toString} = await import('mdast-util-to-string');
return toHeadingHTMLValue(heading, toString);
}

it('converts a simple heading', async () => {
const heading: Heading = {
type: 'heading',
depth: 2,
children: [
{
type: 'text',
value: 'Some heading text',
},
],
};

await expect(convert(heading)).resolves.toMatchInlineSnapshot(
`"Some heading text"`,
);
});

it('converts a heading with b tag', async () => {
const heading: Heading = {
type: 'heading',
depth: 2,
children: [
{
type: 'mdxJsxTextElement',
name: 'b',
attributes: [],
children: [
{
type: 'text',
value: 'Some title',
},
],
},
],
};

await expect(convert(heading)).resolves.toMatchInlineSnapshot(
`"<b>Some title</b>"`,
);
});

it('converts a heading with span tag + className', async () => {
const heading: Heading = {
type: 'heading',
depth: 2,
children: [
{
type: 'mdxJsxTextElement',
name: 'span',
attributes: [
{
type: 'mdxJsxAttribute',
name: 'className',
value: 'my-class',
},
],
children: [
{
type: 'text',
value: 'Some title',
},
],
},
],
};

await expect(convert(heading)).resolves.toMatchInlineSnapshot(
`"<span class="my-class">Some title</span>"`,
);
});

it('converts a heading - remove img tag', async () => {
const heading: Heading = {
type: 'heading',
depth: 2,
children: [
{
type: 'mdxJsxTextElement',
name: 'img',
attributes: [
{
type: 'mdxJsxAttribute',
name: 'src',
value: '/img/slash-introducing.svg',
},
{
type: 'mdxJsxAttribute',
name: 'height',
value: '32',
},
{
type: 'mdxJsxAttribute',
name: 'alt',
value: 'test',
},
],
children: [],
},
{
type: 'text',
value: ' Some title',
},
],
};

await expect(convert(heading)).resolves.toMatchInlineSnapshot(
`"Some title"`,
);
});
});
83 changes: 79 additions & 4 deletions packages/docusaurus-mdx-loader/src/remark/toc/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,21 @@
* LICENSE file in the root directory of this source tree.
*/

import {toValue} from '../utils';
import type {Node} from 'unist';
import type {MdxjsEsm} from 'mdast-util-mdx';
import escapeHtml from 'escape-html';
import type {Node, Parent} from 'unist';
import type {
MdxjsEsm,
MdxJsxAttribute,
MdxJsxTextElement,
} from 'mdast-util-mdx';
import type {TOCHeading, TOCItem, TOCItems, TOCSlice} from './types';
import type {
Program,
SpreadElement,
ImportDeclaration,
ImportSpecifier,
} from 'estree';
import type {Heading, PhrasingContent} from 'mdast';

export function getImportDeclarations(program: Program): ImportDeclaration[] {
return program.body.filter(
Expand Down Expand Up @@ -118,7 +123,7 @@ export async function createTOCExportNodeAST({
const {toString} = await import('mdast-util-to-string');
const {valueToEstree} = await import('estree-util-value-to-estree');
const value: TOCItem = {
value: toValue(heading, toString),
value: toHeadingHTMLValue(heading, toString),
id: heading.data!.id!,
level: heading.depth,
};
Expand Down Expand Up @@ -172,3 +177,73 @@ export async function createTOCExportNodeAST({
},
};
}

function stringifyChildren(
node: Parent,
toString: (param: unknown) => string, // TODO temporary, due to ESM
): string {
return (node.children as PhrasingContent[])
.map((item) => toHeadingHTMLValue(item, toString))
.join('')
.trim();
}

// TODO This is really a workaround, and not super reliable
// For now we only support serializing tagName, className and content
// Can we implement the TOC with real JSX nodes instead of html strings later?
function mdxJsxTextElementToHtml(
element: MdxJsxTextElement,
toString: (param: unknown) => string, // TODO temporary, due to ESM
): string {
const tag = element.name;

// See https://github.com/facebook/docusaurus/issues/11003#issuecomment-2733925363
if (tag === 'img') {
return '';
}

const attributes = element.attributes.filter(
(child): child is MdxJsxAttribute => child.type === 'mdxJsxAttribute',
);

const classAttribute =
attributes.find((attr) => attr.name === 'className') ??
attributes.find((attr) => attr.name === 'class');

const classAttributeString = classAttribute
? `class="${escapeHtml(String(classAttribute.value))}"`
: ``;

const allAttributes = classAttributeString ? ` ${classAttributeString}` : '';

const content = stringifyChildren(element, toString);

return `<${tag}${allAttributes}>${content}</${tag}>`;
}

export function toHeadingHTMLValue(
node: PhrasingContent | Heading | MdxJsxTextElement,
toString: (param: unknown) => string, // TODO temporary, due to ESM
): string {
switch (node.type) {
case 'mdxJsxTextElement': {
return mdxJsxTextElementToHtml(node as MdxJsxTextElement, toString);
}
case 'text':
return escapeHtml(node.value);
case 'heading':
return stringifyChildren(node, toString);
case 'inlineCode':
return `<code>${escapeHtml(node.value)}</code>`;
case 'emphasis':
return `<em>${stringifyChildren(node, toString)}</em>`;
case 'strong':
return `<strong>${stringifyChildren(node, toString)}</strong>`;
case 'delete':
return `<del>${stringifyChildren(node, toString)}</del>`;
case 'link':
return stringifyChildren(node, toString);
default:
return toString(node);
}
}
74 changes: 2 additions & 72 deletions packages/docusaurus-mdx-loader/src/remark/utils/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,8 @@
* LICENSE file in the root directory of this source tree.
*/

import escapeHtml from 'escape-html';
import type {Parent, Node} from 'unist';
import type {PhrasingContent, Heading} from 'mdast';
import type {
MdxJsxAttribute,
MdxJsxAttributeValueExpression,
MdxJsxTextElement,
} from 'mdast-util-mdx';
import type {Node} from 'unist';
import type {MdxJsxAttributeValueExpression} from 'mdast-util-mdx';

/**
* Util to transform one node type to another node type
Expand All @@ -35,70 +29,6 @@ export function transformNode<NewNode extends Node>(
return node as NewNode;
}

export function stringifyContent(
node: Parent,
toString: (param: unknown) => string, // TODO weird but works
): string {
return (node.children as PhrasingContent[])
.map((item) => toValue(item, toString))
.join('');
}

// TODO This is really a workaround, and not super reliable
// For now we only support serializing tagName, className and content
// Can we implement the TOC with real JSX nodes instead of html strings later?
function mdxJsxTextElementToHtml(
element: MdxJsxTextElement,
toString: (param: unknown) => string, // TODO weird but works
): string {
const tag = element.name;

const attributes = element.attributes.filter(
(child): child is MdxJsxAttribute => child.type === 'mdxJsxAttribute',
);

const classAttribute =
attributes.find((attr) => attr.name === 'className') ??
attributes.find((attr) => attr.name === 'class');

const classAttributeString = classAttribute
? `class="${escapeHtml(String(classAttribute.value))}"`
: ``;

const allAttributes = classAttributeString ? ` ${classAttributeString}` : '';

const content = stringifyContent(element, toString);

return `<${tag}${allAttributes}>${content}</${tag}>`;
}

export function toValue(
node: PhrasingContent | Heading | MdxJsxTextElement,
toString: (param: unknown) => string, // TODO weird but works
): string {
switch (node.type) {
case 'mdxJsxTextElement': {
return mdxJsxTextElementToHtml(node as MdxJsxTextElement, toString);
}
case 'text':
return escapeHtml(node.value);
case 'heading':
return stringifyContent(node, toString);
case 'inlineCode':
return `<code>${escapeHtml(node.value)}</code>`;
case 'emphasis':
return `<em>${stringifyContent(node, toString)}</em>`;
case 'strong':
return `<strong>${stringifyContent(node, toString)}</strong>`;
case 'delete':
return `<del>${stringifyContent(node, toString)}</del>`;
case 'link':
return stringifyContent(node, toString);
default:
return toString(node);
}
}

export function assetRequireAttributeValue(
requireString: string,
hash: string,
Expand Down
Loading