From 39aef996a8c5659e2b65b33f4ec79228d6dc8da5 Mon Sep 17 00:00:00 2001 From: sebastien Date: Tue, 18 Mar 2025 17:15:14 +0100 Subject: [PATCH 1/3] refactor with iso behavior --- .../src/remark/toc/utils.ts | 77 ++++++++++++++++++- .../src/remark/utils/index.ts | 74 +----------------- 2 files changed, 75 insertions(+), 76 deletions(-) diff --git a/packages/docusaurus-mdx-loader/src/remark/toc/utils.ts b/packages/docusaurus-mdx-loader/src/remark/toc/utils.ts index 59e860119f9a..b61830a63031 100644 --- a/packages/docusaurus-mdx-loader/src/remark/toc/utils.ts +++ b/packages/docusaurus-mdx-loader/src/remark/toc/utils.ts @@ -5,9 +5,13 @@ * LICENSE file in the root directory of this source tree. */ -import {toValue} from '../utils'; -import type {Node} from 'unist'; -import type {MdxjsEsm} from 'mdast-util-mdx'; +import escapeHtml from 'escape-html'; +import type {Node, Parent} from 'unist'; +import type { + MdxjsEsm, + MdxJsxAttribute, + MdxJsxTextElement, +} from 'mdast-util-mdx'; import type {TOCHeading, TOCItem, TOCItems, TOCSlice} from './types'; import type { Program, @@ -15,6 +19,7 @@ import type { ImportDeclaration, ImportSpecifier, } from 'estree'; +import type {Heading, PhrasingContent} from 'mdast'; export function getImportDeclarations(program: Program): ImportDeclaration[] { return program.body.filter( @@ -118,7 +123,7 @@ export async function createTOCExportNodeAST({ const {toString} = await import('mdast-util-to-string'); const {valueToEstree} = await import('estree-util-value-to-estree'); const value: TOCItem = { - value: toValue(heading, toString), + value: toHeadingHTMLValue(heading, toString), id: heading.data!.id!, level: heading.depth, }; @@ -172,3 +177,67 @@ export async function createTOCExportNodeAST({ }, }; } + +function stringifyChildren( + node: Parent, + toString: (param: unknown) => string, // TODO weird but works +): string { + return (node.children as PhrasingContent[]) + .map((item) => toHeadingHTMLValue(item, toString)) + .join(''); +} + +// TODO This is really a workaround, and not super reliable +// For now we only support serializing tagName, className and content +// Can we implement the TOC with real JSX nodes instead of html strings later? +function mdxJsxTextElementToHtml( + element: MdxJsxTextElement, + toString: (param: unknown) => string, // TODO weird but works +): string { + const tag = element.name; + + const attributes = element.attributes.filter( + (child): child is MdxJsxAttribute => child.type === 'mdxJsxAttribute', + ); + + const classAttribute = + attributes.find((attr) => attr.name === 'className') ?? + attributes.find((attr) => attr.name === 'class'); + + const classAttributeString = classAttribute + ? `class="${escapeHtml(String(classAttribute.value))}"` + : ``; + + const allAttributes = classAttributeString ? ` ${classAttributeString}` : ''; + + const content = stringifyChildren(element, toString); + + return `<${tag}${allAttributes}>${content}`; +} + +export function toHeadingHTMLValue( + node: PhrasingContent | Heading | MdxJsxTextElement, + toString: (param: unknown) => string, // TODO weird but works +): string { + switch (node.type) { + case 'mdxJsxTextElement': { + return mdxJsxTextElementToHtml(node as MdxJsxTextElement, toString); + } + case 'text': + return escapeHtml(node.value); + case 'heading': + return stringifyChildren(node, toString); + case 'inlineCode': + return `${escapeHtml(node.value)}`; + case 'emphasis': + return `${stringifyChildren(node, toString)}`; + case 'strong': + return `${stringifyChildren(node, toString)}`; + case 'delete': + return `${stringifyChildren(node, toString)}`; + case 'link': + return stringifyChildren(node, toString); + default: + return toString(node); + } +} diff --git a/packages/docusaurus-mdx-loader/src/remark/utils/index.ts b/packages/docusaurus-mdx-loader/src/remark/utils/index.ts index e191bbfa2e86..898f0617a4d2 100644 --- a/packages/docusaurus-mdx-loader/src/remark/utils/index.ts +++ b/packages/docusaurus-mdx-loader/src/remark/utils/index.ts @@ -5,14 +5,8 @@ * LICENSE file in the root directory of this source tree. */ -import escapeHtml from 'escape-html'; -import type {Parent, Node} from 'unist'; -import type {PhrasingContent, Heading} from 'mdast'; -import type { - MdxJsxAttribute, - MdxJsxAttributeValueExpression, - MdxJsxTextElement, -} from 'mdast-util-mdx'; +import type {Node} from 'unist'; +import type {MdxJsxAttributeValueExpression} from 'mdast-util-mdx'; /** * Util to transform one node type to another node type @@ -35,70 +29,6 @@ export function transformNode( return node as NewNode; } -export function stringifyContent( - node: Parent, - toString: (param: unknown) => string, // TODO weird but works -): string { - return (node.children as PhrasingContent[]) - .map((item) => toValue(item, toString)) - .join(''); -} - -// TODO This is really a workaround, and not super reliable -// For now we only support serializing tagName, className and content -// Can we implement the TOC with real JSX nodes instead of html strings later? -function mdxJsxTextElementToHtml( - element: MdxJsxTextElement, - toString: (param: unknown) => string, // TODO weird but works -): string { - const tag = element.name; - - const attributes = element.attributes.filter( - (child): child is MdxJsxAttribute => child.type === 'mdxJsxAttribute', - ); - - const classAttribute = - attributes.find((attr) => attr.name === 'className') ?? - attributes.find((attr) => attr.name === 'class'); - - const classAttributeString = classAttribute - ? `class="${escapeHtml(String(classAttribute.value))}"` - : ``; - - const allAttributes = classAttributeString ? ` ${classAttributeString}` : ''; - - const content = stringifyContent(element, toString); - - return `<${tag}${allAttributes}>${content}`; -} - -export function toValue( - node: PhrasingContent | Heading | MdxJsxTextElement, - toString: (param: unknown) => string, // TODO weird but works -): string { - switch (node.type) { - case 'mdxJsxTextElement': { - return mdxJsxTextElementToHtml(node as MdxJsxTextElement, toString); - } - case 'text': - return escapeHtml(node.value); - case 'heading': - return stringifyContent(node, toString); - case 'inlineCode': - return `${escapeHtml(node.value)}`; - case 'emphasis': - return `${stringifyContent(node, toString)}`; - case 'strong': - return `${stringifyContent(node, toString)}`; - case 'delete': - return `${stringifyContent(node, toString)}`; - case 'link': - return stringifyContent(node, toString); - default: - return toString(node); - } -} - export function assetRequireAttributeValue( requireString: string, hash: string, From d514ea4db63ac1c6f5b8f64090a1d8adb390cdfd Mon Sep 17 00:00:00 2001 From: sebastien Date: Tue, 18 Mar 2025 17:23:50 +0100 Subject: [PATCH 2/3] Add unit tests --- .../src/remark/toc/__tests__/utils.test.ts | 126 ++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 packages/docusaurus-mdx-loader/src/remark/toc/__tests__/utils.test.ts diff --git a/packages/docusaurus-mdx-loader/src/remark/toc/__tests__/utils.test.ts b/packages/docusaurus-mdx-loader/src/remark/toc/__tests__/utils.test.ts new file mode 100644 index 000000000000..480db49dab41 --- /dev/null +++ b/packages/docusaurus-mdx-loader/src/remark/toc/__tests__/utils.test.ts @@ -0,0 +1,126 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +import {toHeadingHTMLValue} from '../utils'; +import type {Heading} from 'mdast'; + +describe('toHeadingHTMLValue', () => { + async function convert(heading: Heading): Promise { + const {toString} = await import('mdast-util-to-string'); + return toHeadingHTMLValue(heading, toString); + } + + it('converts a simple heading', async () => { + const heading: Heading = { + type: 'heading', + depth: 2, + children: [ + { + type: 'text', + value: 'Some heading text', + }, + ], + }; + + await expect(convert(heading)).resolves.toMatchInlineSnapshot( + `"Some heading text"`, + ); + }); + + it('converts a heading with b tag', async () => { + const heading: Heading = { + type: 'heading', + depth: 2, + children: [ + { + type: 'mdxJsxTextElement', + name: 'b', + attributes: [], + children: [ + { + type: 'text', + value: 'Some title', + }, + ], + }, + ], + }; + + await expect(convert(heading)).resolves.toMatchInlineSnapshot( + `"Some title"`, + ); + }); + + it('converts a heading with span tag + className', async () => { + const heading: Heading = { + type: 'heading', + depth: 2, + children: [ + { + type: 'mdxJsxTextElement', + name: 'span', + attributes: [ + { + type: 'mdxJsxAttribute', + name: 'className', + value: 'my-class', + }, + ], + children: [ + { + type: 'text', + value: 'Some title', + }, + ], + }, + ], + }; + + await expect(convert(heading)).resolves.toMatchInlineSnapshot( + `"Some title"`, + ); + }); + + it('converts a heading with image', async () => { + const heading: Heading = { + type: 'heading', + depth: 2, + children: [ + { + type: 'mdxJsxTextElement', + name: 'img', + attributes: [ + { + type: 'mdxJsxAttribute', + name: 'src', + value: '/img/slash-introducing.svg', + }, + { + type: 'mdxJsxAttribute', + name: 'height', + value: '32', + }, + { + type: 'mdxJsxAttribute', + name: 'alt', + value: 'test', + }, + ], + children: [], + }, + { + type: 'text', + value: ' Some title', + }, + ], + }; + + await expect(convert(heading)).resolves.toMatchInlineSnapshot( + `" Some title"`, + ); + }); +}); From 0301d0027ff3263821d9a73e03d2e0eb1797924f Mon Sep 17 00:00:00 2001 From: sebastien Date: Tue, 18 Mar 2025 17:42:15 +0100 Subject: [PATCH 3/3] change behavior for tags --- .../src/remark/toc/__tests__/utils.test.ts | 4 ++-- .../docusaurus-mdx-loader/src/remark/toc/utils.ts | 14 ++++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/packages/docusaurus-mdx-loader/src/remark/toc/__tests__/utils.test.ts b/packages/docusaurus-mdx-loader/src/remark/toc/__tests__/utils.test.ts index 480db49dab41..32592d89fab6 100644 --- a/packages/docusaurus-mdx-loader/src/remark/toc/__tests__/utils.test.ts +++ b/packages/docusaurus-mdx-loader/src/remark/toc/__tests__/utils.test.ts @@ -85,7 +85,7 @@ describe('toHeadingHTMLValue', () => { ); }); - it('converts a heading with image', async () => { + it('converts a heading - remove img tag', async () => { const heading: Heading = { type: 'heading', depth: 2, @@ -120,7 +120,7 @@ describe('toHeadingHTMLValue', () => { }; await expect(convert(heading)).resolves.toMatchInlineSnapshot( - `" Some title"`, + `"Some title"`, ); }); }); diff --git a/packages/docusaurus-mdx-loader/src/remark/toc/utils.ts b/packages/docusaurus-mdx-loader/src/remark/toc/utils.ts index b61830a63031..b8200c6e4875 100644 --- a/packages/docusaurus-mdx-loader/src/remark/toc/utils.ts +++ b/packages/docusaurus-mdx-loader/src/remark/toc/utils.ts @@ -180,11 +180,12 @@ export async function createTOCExportNodeAST({ function stringifyChildren( node: Parent, - toString: (param: unknown) => string, // TODO weird but works + toString: (param: unknown) => string, // TODO temporary, due to ESM ): string { return (node.children as PhrasingContent[]) .map((item) => toHeadingHTMLValue(item, toString)) - .join(''); + .join('') + .trim(); } // TODO This is really a workaround, and not super reliable @@ -192,10 +193,15 @@ function stringifyChildren( // Can we implement the TOC with real JSX nodes instead of html strings later? function mdxJsxTextElementToHtml( element: MdxJsxTextElement, - toString: (param: unknown) => string, // TODO weird but works + toString: (param: unknown) => string, // TODO temporary, due to ESM ): string { const tag = element.name; + // See https://github.com/facebook/docusaurus/issues/11003#issuecomment-2733925363 + if (tag === 'img') { + return ''; + } + const attributes = element.attributes.filter( (child): child is MdxJsxAttribute => child.type === 'mdxJsxAttribute', ); @@ -217,7 +223,7 @@ function mdxJsxTextElementToHtml( export function toHeadingHTMLValue( node: PhrasingContent | Heading | MdxJsxTextElement, - toString: (param: unknown) => string, // TODO weird but works + toString: (param: unknown) => string, // TODO temporary, due to ESM ): string { switch (node.type) { case 'mdxJsxTextElement': {