facebook
diff --git a/‎packages/lexical-code-core/flow/LexicalCodeCore.js.flow‎
Lines changed: 2 additions & 0 deletions b/‎packages/lexical-code-core/flow/LexicalCodeCore.js.flow‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎packages/lexical-code-core/src/CodeNode.ts‎
Lines changed: 1 addition & 0 deletions b/‎packages/lexical-code-core/src/CodeNode.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎packages/lexical-code-core/src/FlatStructureUtils.ts‎
Lines changed: 34 additions & 1 deletion b/‎packages/lexical-code-core/src/FlatStructureUtils.ts‎
Lines changed: 34 additions & 1 deletion
diff --git a/‎packages/lexical-code-core/src/index.ts‎
Lines changed: 1 addition & 0 deletions b/‎packages/lexical-code-core/src/index.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎packages/lexical-code-prism/flow/LexicalCodePrism.js.flow‎
Lines changed: 1 addition & 1 deletion b/‎packages/lexical-code-prism/flow/LexicalCodePrism.js.flow‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/lexical-code-prism/src/CodeHighlighterPrism.ts‎
Lines changed: 38 additions & 15 deletions b/‎packages/lexical-code-prism/src/CodeHighlighterPrism.ts‎
Lines changed: 38 additions & 15 deletions
diff --git a/‎packages/lexical-code-prism/src/__tests__/unit/CodePrismNullDefaultLanguage.test.ts‎
Lines changed: 73 additions & 0 deletions b/‎packages/lexical-code-prism/src/__tests__/unit/CodePrismNullDefaultLanguage.test.ts‎
Lines changed: 73 additions & 0 deletions
diff --git a/‎packages/lexical-code-shiki/flow/LexicalCodeShiki.js.flow‎
Lines changed: 1 addition & 1 deletion b/‎packages/lexical-code-shiki/flow/LexicalCodeShiki.js.flow‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/lexical-code-shiki/src/CodeHighlighterShiki.ts‎
Lines changed: 32 additions & 14 deletions b/‎packages/lexical-code-shiki/src/CodeHighlighterShiki.ts‎
Lines changed: 32 additions & 14 deletions
@@ -79,6 +79,8 @@ declare export function $outdentLeadingSpaces(
   selection: RangeSelection,
 ): boolean;
 
+declare export function $plainifyCodeContent(text: string): LexicalNode[];
+
 /**
  * CodeNode
  */
 
@@ -56,6 +56,7 @@ export type SerializedCodeNode = Spread<
 >;
 
 export const DEFAULT_CODE_LANGUAGE = 'javascript';
+/** @internal Configurable through the extensions. */
 export const getDefaultCodeLanguage = (): string => DEFAULT_CODE_LANGUAGE;
 
 function hasChildDOMNodeTag(node: Node, tagName: string) {
 
@@ -17,6 +17,8 @@ import type {
 } from 'lexical';
 
 import {
+  $createLineBreakNode,
+  $createTabNode,
   $getSiblingCaret,
   $isElementNode,
   $isLineBreakNode,
@@ -25,7 +27,10 @@ import {
 } from 'lexical';
 import invariant from 'shared/invariant';
 
-import {$isCodeHighlightNode} from './CodeHighlightNode';
+import {
+  $createCodeHighlightNode,
+  $isCodeHighlightNode,
+} from './CodeHighlightNode';
 
 function $getLastMatchingCodeNode<D extends CaretDirection>(
   anchor: CodeHighlightNode | TabNode | LineBreakNode,
@@ -222,6 +227,34 @@ export function $getEndOfCodeInLine(
   return lastNode;
 }
 
+/**
+ * Plain split of code text into CodeHighlightNodes (with no highlight
+ * type) + LineBreakNodes + TabNodes. Used when the tokenizer opts out
+ * of a default language so a previously highlighted block still
+ * renders its `\n` / `\t` as real line breaks / tabs, while staying
+ * compatible with the indent / shift-lines handlers that only accept
+ * CodeHighlightNode + TabNode + LineBreakNode inside a CodeNode.
+ */
+export function $plainifyCodeContent(text: string): LexicalNode[] {
+  const out: LexicalNode[] = [];
+  const lines = text.split('\n');
+  lines.forEach((line, lineIdx) => {
+    if (lineIdx > 0) {
+      out.push($createLineBreakNode());
+    }
+    const tabParts = line.split('\t');
+    tabParts.forEach((part, partIdx) => {
+      if (partIdx > 0) {
+        out.push($createTabNode());
+      }
+      if (part.length > 0) {
+        out.push($createCodeHighlightNode(part));
+      }
+    });
+  });
+  return out;
+}
+
 /**
  * Strip up to `tabSize` leading spaces from a {@link CodeHighlightNode} that
  * starts a code line, to support outdenting space-indented code lines (e.g.
 
@@ -32,4 +32,5 @@ export {
   $getLastCodeNodeOfLine,
   $getStartOfCodeInLine,
   $outdentLeadingSpaces,
+  $plainifyCodeContent,
 } from './FlatStructureUtils';
@@ -20,7 +20,7 @@ export interface Token {
   content: TokenContent;
 }
 export interface Tokenizer {
-  defaultLanguage: string;
+  defaultLanguage: string | null;
   tokenize(code: string, language?: string): (string | Token)[];
 }
 declare export var PrismTokenizer: Tokenizer;
 
@@ -11,6 +11,7 @@ import type {LexicalEditor, LexicalNode, NodeKey} from 'lexical';
 import {
   $isCodeHighlightNode,
   $isCodeNode,
+  $plainifyCodeContent,
   CodeExtension,
   CodeHighlightNode,
   CodeIndentExtension,
@@ -50,20 +51,32 @@ export interface Token {
 }
 
 export interface Tokenizer {
-  defaultLanguage: string;
+  /**
+   * Language to fall back to when a {@link CodeNode} doesn't carry one.
+   * Set to `null` to opt out of the implicit fallback — code blocks
+   * without a language stay untouched (no `data-language` attribute, no
+   * syntax highlighting) so a markdown round-trip can preserve ``` with
+   * no info string.
+   */
+  defaultLanguage: string | null;
   tokenize(code: string, language?: string): (string | Token)[];
   $tokenize(codeNode: CodeNode, language?: string): LexicalNode[];
 }
 
 export const PrismTokenizer: Tokenizer = {
   $tokenize(codeNode: CodeNode, language?: string): LexicalNode[] {
-    return $getHighlightNodes(codeNode, language || this.defaultLanguage);
+    const lang = language || this.defaultLanguage;
+    return lang === null
+      ? $plainifyCodeContent(codeNode.getTextContent())
+      : $getHighlightNodes(codeNode, lang);
   },
   defaultLanguage: DEFAULT_CODE_LANGUAGE,
   tokenize(code: string, language?: string): (string | Token)[] {
+    const fallback = this.defaultLanguage;
     return Prism.tokenize(
       code,
-      Prism.languages[language || ''] || Prism.languages[this.defaultLanguage],
+      Prism.languages[language || ''] ||
+        (fallback === null ? undefined : Prism.languages[fallback]),
     );
   },
 };
@@ -119,22 +132,29 @@ function $codeNodeTransform(
   const {nodesCurrentlyHighlighting} = transformState;
   const nodeKey = node.getKey();
 
-  // When new code block inserted it might not have language selected
-  if (node.getLanguage() === undefined) {
+  // When new code block inserted it might not have language selected.
+  // Tokenizers configured with `defaultLanguage: null` opt out of the
+  // implicit fallback — leave the node unset and skip highlighting so
+  // markdown round-trips ``` (no info string) without injecting one.
+  if (node.getLanguage() === undefined && tokenizer.defaultLanguage !== null) {
     node.setLanguage(tokenizer.defaultLanguage);
   }
 
   const language = node.getLanguage() || tokenizer.defaultLanguage;
-  if (isCodeLanguageLoaded(language)) {
-    if (!node.getIsSyntaxHighlightSupported()) {
-      node.setIsSyntaxHighlightSupported(true);
-    }
-  } else {
-    if (node.getIsSyntaxHighlightSupported()) {
-      node.setIsSyntaxHighlightSupported(false);
+  if (language) {
+    if (isCodeLanguageLoaded(language)) {
+      if (!node.getIsSyntaxHighlightSupported()) {
+        node.setIsSyntaxHighlightSupported(true);
+      }
+    } else {
+      if (node.getIsSyntaxHighlightSupported()) {
+        node.setIsSyntaxHighlightSupported(false);
+      }
+      loadCodeLanguage(language, editor, nodeKey);
+      return;
     }
-    loadCodeLanguage(language, editor, nodeKey);
-    return;
+  } else if (node.getIsSyntaxHighlightSupported()) {
+    node.setIsSyntaxHighlightSupported(false);
   }
 
   if (nodesCurrentlyHighlighting.has(nodeKey)) {
@@ -161,7 +181,10 @@ function $codeNodeTransform(
       currentNode.getLanguage() || tokenizer.defaultLanguage;
     //const diffLanguageMatch = DIFF_LANGUAGE_REGEX.exec(currentLanguage);
 
-    const highlightNodes = tokenizer.$tokenize(currentNode, currentLanguage);
+    const highlightNodes = tokenizer.$tokenize(
+      currentNode,
+      currentLanguage ?? undefined,
+    );
 
     const diffRange = getDiffRange(currentNode.getChildren(), highlightNodes);
     const {from, to, nodesForReplacement} = diffRange;
 
@@ -0,0 +1,73 @@
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *
+ */
+
+import type {CodeNode} from '@lexical/code';
+
+import {$createCodeNode} from '@lexical/code';
+import {CodePrismExtension, PrismTokenizer} from '@lexical/code-prism';
+import {buildEditorFromExtensions, configExtension} from '@lexical/extension';
+import {RichTextExtension} from '@lexical/rich-text';
+import {$createTextNode, $getRoot, defineExtension} from 'lexical';
+import {describe, expect, test} from 'vitest';
+
+function createEditor() {
+  return buildEditorFromExtensions(
+    defineExtension({
+      dependencies: [
+        RichTextExtension,
+        configExtension(CodePrismExtension, {
+          tokenizer: {...PrismTokenizer, defaultLanguage: null},
+        }),
+      ],
+      name: 'prism-default-null',
+    }),
+  );
+}
+
+describe('Prism defaultLanguage: null (#7235)', () => {
+  test('leaves `__language` unset and skips highlight mutation', () => {
+    using editor = createEditor();
+
+    let codeNode!: CodeNode;
+    editor.update(
+      () => {
+        codeNode = $createCodeNode();
+        codeNode.append($createTextNode('hello'));
+        $getRoot().append(codeNode);
+      },
+      {discrete: true},
+    );
+
+    editor.read(() => {
+      expect(codeNode.getLanguage()).toBe(undefined);
+    });
+  });
+
+  test('splits text into CodeHighlightNode + LineBreakNode + TabNode for `\\n` / `\\t` so indent + line-move handlers stay compatible', () => {
+    using editor = createEditor();
+
+    let codeNode!: CodeNode;
+    editor.update(
+      () => {
+        codeNode = $createCodeNode();
+        codeNode.append($createTextNode('a\n\tb'));
+        $getRoot().append(codeNode);
+      },
+      {discrete: true},
+    );
+
+    editor.read(() => {
+      expect(codeNode.getChildren().map(child => child.getType())).toEqual([
+        'code-highlight',
+        'linebreak',
+        'tab',
+        'code-highlight',
+      ]);
+    });
+  });
+});
@@ -19,7 +19,7 @@ import type {CodeNode} from '@lexical/code';
  */
 
 export type Tokenizer = {
-  defaultLanguage: string;
+  defaultLanguage: string | null;
   defaultTheme: string;
   $tokenize: (codeNode: CodeNode, language?: string) => LexicalNode[];
 }
 
@@ -11,6 +11,7 @@ import type {LexicalEditor, LexicalNode, NodeKey} from 'lexical';
 import {
   $isCodeHighlightNode,
   $isCodeNode,
+  $plainifyCodeContent,
   CodeExtension,
   CodeHighlightNode,
   CodeIndentExtension,
@@ -43,7 +44,14 @@ import {
 } from './FacadeShiki';
 
 export interface Tokenizer {
-  defaultLanguage: string;
+  /**
+   * Language to fall back to when a {@link CodeNode} doesn't carry one.
+   * Set to `null` to opt out of the implicit fallback — code blocks
+   * without a language stay untouched (no `data-language` attribute, no
+   * syntax highlighting) so a markdown round-trip can preserve ``` with
+   * no info string.
+   */
+  defaultLanguage: string | null;
   defaultTheme: string;
   $tokenize: (
     this: Tokenizer,
@@ -60,7 +68,10 @@ export const ShikiTokenizer: Tokenizer = {
     codeNode: CodeNode,
     language?: string,
   ): LexicalNode[] {
-    return $getHighlightNodes(codeNode, language || this.defaultLanguage);
+    const lang = language || this.defaultLanguage;
+    return lang === null
+      ? $plainifyCodeContent(codeNode.getTextContent())
+      : $getHighlightNodes(codeNode, lang);
   },
   defaultLanguage: DEFAULT_CODE_LANGUAGE,
   defaultTheme: DEFAULT_CODE_THEME,
@@ -126,9 +137,12 @@ function $codeNodeTransform(
   const nodeKey = node.getKey();
   const {nodesCurrentlyHighlighting} = transformState;
 
-  // When new code block inserted it might not have language selected
+  // When new code block inserted it might not have language selected.
+  // Tokenizers configured with `defaultLanguage: null` opt out of the
+  // implicit fallback — leave the node unset and skip highlighting so
+  // markdown round-trips ``` (no info string) without injecting one.
   let language = node.getLanguage();
-  if (!language) {
+  if (!language && tokenizer.defaultLanguage !== null) {
     language = tokenizer.defaultLanguage;
     node.setLanguage(language);
   }
@@ -147,16 +161,20 @@ function $codeNodeTransform(
   }
 
   // dynamic import of languages
-  if (isCodeLanguageLoaded(language)) {
-    if (!node.getIsSyntaxHighlightSupported()) {
-      node.setIsSyntaxHighlightSupported(true);
-    }
-  } else {
-    if (node.getIsSyntaxHighlightSupported()) {
-      node.setIsSyntaxHighlightSupported(false);
+  if (language) {
+    if (isCodeLanguageLoaded(language)) {
+      if (!node.getIsSyntaxHighlightSupported()) {
+        node.setIsSyntaxHighlightSupported(true);
+      }
+    } else {
+      if (node.getIsSyntaxHighlightSupported()) {
+        node.setIsSyntaxHighlightSupported(false);
+      }
+      loadCodeLanguage(language, editor, nodeKey);
+      inFlight = true;
     }
-    loadCodeLanguage(language, editor, nodeKey);
-    inFlight = true;
+  } else if (node.getIsSyntaxHighlightSupported()) {
+    node.setIsSyntaxHighlightSupported(false);
   }
 
   if (inFlight) {
@@ -184,7 +202,7 @@ function $codeNodeTransform(
     }
 
     const lang = currentNode.getLanguage() || tokenizer.defaultLanguage;
-    const highlightNodes = tokenizer.$tokenize(currentNode, lang);
+    const highlightNodes = tokenizer.$tokenize(currentNode, lang ?? undefined);
     const diffRange = getDiffRange(currentNode.getChildren(), highlightNodes);
     const {from, to, nodesForReplacement} = diffRange;
Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@ export interface Token {`
`20`	`20`	`content: TokenContent;`
`21`	`21`	`}`
`22`	`22`	`export interface Tokenizer {`
`23`		`- defaultLanguage: string;`
	`23`	`+ defaultLanguage: string \| null;`
`24`	`24`	`tokenize(code: string, language?: string): (string \| Token)[];`
`25`	`25`	`}`
`26`	`26`	`declare export var PrismTokenizer: Tokenizer;`
Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,7 @@ import type {CodeNode} from '@lexical/code';`
`19`	`19`	`*/`
`20`	`20`
`21`	`21`	`export type Tokenizer = {`
`22`		`- defaultLanguage: string;`
	`22`	`+ defaultLanguage: string \| null;`
`23`	`23`	`defaultTheme: string;`
`24`	`24`	`$tokenize: (codeNode: CodeNode, language?: string) => LexicalNode[];`
`25`	`25`	`}`