Skip to content

Commit 0f78b99

Browse files
Fix markup heading parsing, fix emphasis parsing (go-gitea#36284)
Fixes go-gitea#36106, fix go-gitea#17958 --------- Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
1 parent cfd7218 commit 0f78b99

15 files changed

Lines changed: 260 additions & 196 deletions

File tree

modules/htmlutil/html.go

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package htmlutil
66
import (
77
"fmt"
88
"html/template"
9+
"io"
910
"slices"
1011
"strings"
1112
)
@@ -31,7 +32,7 @@ func ParseSizeAndClass(defaultSize int, defaultClass string, others ...any) (int
3132
return size, class
3233
}
3334

34-
func HTMLFormat(s template.HTML, rawArgs ...any) template.HTML {
35+
func htmlFormatArgs(s template.HTML, rawArgs []any) []any {
3536
if !strings.Contains(string(s), "%") || len(rawArgs) == 0 {
3637
panic("HTMLFormat requires one or more arguments")
3738
}
@@ -50,5 +51,35 @@ func HTMLFormat(s template.HTML, rawArgs ...any) template.HTML {
5051
args[i] = template.HTMLEscapeString(fmt.Sprint(v))
5152
}
5253
}
53-
return template.HTML(fmt.Sprintf(string(s), args...))
54+
return args
55+
}
56+
57+
func HTMLFormat(s template.HTML, rawArgs ...any) template.HTML {
58+
return template.HTML(fmt.Sprintf(string(s), htmlFormatArgs(s, rawArgs)...))
59+
}
60+
61+
func HTMLPrintf(w io.Writer, s template.HTML, rawArgs ...any) (int, error) {
62+
return fmt.Fprintf(w, string(s), htmlFormatArgs(s, rawArgs)...)
63+
}
64+
65+
func HTMLPrint(w io.Writer, s template.HTML) (int, error) {
66+
return io.WriteString(w, string(s))
67+
}
68+
69+
func HTMLPrintTag(w io.Writer, tag template.HTML, attrs map[string]string) (written int, err error) {
70+
n, err := io.WriteString(w, "<"+string(tag))
71+
written += n
72+
if err != nil {
73+
return written, err
74+
}
75+
for k, v := range attrs {
76+
n, err = fmt.Fprintf(w, ` %s="%s"`, template.HTMLEscapeString(k), template.HTMLEscapeString(v))
77+
written += n
78+
if err != nil {
79+
return written, err
80+
}
81+
}
82+
n, err = io.WriteString(w, ">")
83+
written += n
84+
return written, err
5485
}

modules/markup/common/footnote.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -405,9 +405,9 @@ func (r *FootnoteHTMLRenderer) renderFootnoteLink(w util.BufWriter, source []byt
405405
if entering {
406406
n := node.(*FootnoteLink)
407407
is := strconv.Itoa(n.Index)
408-
_, _ = w.WriteString(`<sup id="fnref:`)
408+
_, _ = w.WriteString(`<sup id="fnref:user-content-`)
409409
_, _ = w.Write(n.Name)
410-
_, _ = w.WriteString(`"><a href="#fn:`)
410+
_, _ = w.WriteString(`"><a href="#fn:user-content-`)
411411
_, _ = w.Write(n.Name)
412412
_, _ = w.WriteString(`" class="footnote-ref" role="doc-noteref">`) // FIXME: here and below, need to keep the classes
413413
_, _ = w.WriteString(is)
@@ -419,7 +419,7 @@ func (r *FootnoteHTMLRenderer) renderFootnoteLink(w util.BufWriter, source []byt
419419
func (r *FootnoteHTMLRenderer) renderFootnoteBackLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
420420
if entering {
421421
n := node.(*FootnoteBackLink)
422-
_, _ = w.WriteString(` <a href="#fnref:`)
422+
_, _ = w.WriteString(` <a href="#fnref:user-content-`)
423423
_, _ = w.Write(n.Name)
424424
_, _ = w.WriteString(`" class="footnote-backref" role="doc-backlink">`)
425425
_, _ = w.WriteString("&#x21a9;&#xfe0e;")
@@ -431,7 +431,7 @@ func (r *FootnoteHTMLRenderer) renderFootnoteBackLink(w util.BufWriter, source [
431431
func (r *FootnoteHTMLRenderer) renderFootnote(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
432432
n := node.(*Footnote)
433433
if entering {
434-
_, _ = w.WriteString(`<li id="fn:`)
434+
_, _ = w.WriteString(`<li id="fn:user-content-`)
435435
_, _ = w.Write(n.Name)
436436
_, _ = w.WriteString(`" role="doc-endnote"`)
437437
if node.Attributes() != nil {

modules/markup/html.go

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ import (
1212
"strings"
1313
"sync"
1414

15+
"code.gitea.io/gitea/modules/htmlutil"
1516
"code.gitea.io/gitea/modules/markup/common"
17+
"code.gitea.io/gitea/modules/translation"
1618

1719
"golang.org/x/net/html"
1820
"golang.org/x/net/html/atom"
@@ -234,6 +236,49 @@ func postProcessString(ctx *RenderContext, procs []processor, content string) (s
234236
return buf.String(), nil
235237
}
236238

239+
func RenderTocHeadingItems(ctx *RenderContext, nodeDetailsAttrs map[string]string, out io.Writer) {
240+
locale, ok := ctx.Value(translation.ContextKey).(translation.Locale)
241+
if !ok {
242+
locale = translation.NewLocale("")
243+
}
244+
_, _ = htmlutil.HTMLPrintTag(out, "details", nodeDetailsAttrs)
245+
_, _ = htmlutil.HTMLPrintf(out, "<summary>%s</summary>\n", locale.TrString("toc"))
246+
247+
baseLevel := 6
248+
for _, header := range ctx.TocHeadingItems {
249+
if header.HeadingLevel < baseLevel {
250+
baseLevel = header.HeadingLevel
251+
}
252+
}
253+
254+
currentLevel := baseLevel
255+
indent := []byte{' ', ' '}
256+
_, _ = htmlutil.HTMLPrint(out, "<ul>\n")
257+
for _, header := range ctx.TocHeadingItems {
258+
for currentLevel < header.HeadingLevel {
259+
_, _ = out.Write(indent)
260+
_, _ = htmlutil.HTMLPrint(out, "<ul>\n")
261+
indent = append(indent, ' ', ' ')
262+
currentLevel++
263+
}
264+
for currentLevel > header.HeadingLevel {
265+
indent = indent[:len(indent)-2]
266+
_, _ = out.Write(indent)
267+
_, _ = htmlutil.HTMLPrint(out, "</ul>\n")
268+
currentLevel--
269+
}
270+
_, _ = out.Write(indent)
271+
_, _ = htmlutil.HTMLPrintf(out, "<li><a href=\"#%s\">%s</a></li>\n", header.AnchorID, header.InnerText)
272+
}
273+
for currentLevel > baseLevel {
274+
indent = indent[:len(indent)-2]
275+
_, _ = out.Write(indent)
276+
_, _ = htmlutil.HTMLPrint(out, "</ul>\n")
277+
currentLevel--
278+
}
279+
_, _ = htmlutil.HTMLPrint(out, "</ul>\n</details>\n")
280+
}
281+
237282
func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output io.Writer) error {
238283
if !ctx.usedByRender && ctx.RenderHelper != nil {
239284
defer ctx.RenderHelper.CleanUp()
@@ -284,6 +329,9 @@ func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output
284329
}
285330

286331
// Render everything to buf.
332+
if ctx.TocShowInSection == TocShowInMain && len(ctx.TocHeadingItems) > 0 {
333+
RenderTocHeadingItems(ctx, nil, output)
334+
}
287335
for _, node := range newNodes {
288336
if err := html.Render(output, node); err != nil {
289337
return fmt.Errorf("markup.postProcess: html.Render: %w", err)
@@ -314,7 +362,7 @@ func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Nod
314362
return node.NextSibling
315363
}
316364

317-
processNodeAttrID(ctx, node)
365+
processNodeHeadingAndID(ctx, node)
318366
processFootnoteNode(ctx, node) // FIXME: the footnote processing should be done in the "footnote.go" renderer directly
319367

320368
if isEmojiNode(node) {

modules/markup/html_node.go

Lines changed: 37 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import (
1414
func isAnchorIDUserContent(s string) bool {
1515
// blackfridayExtRegex is for blackfriday extensions create IDs like fn:user-content-footnote
1616
// old logic: blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`)
17-
return strings.HasPrefix(s, "user-content-") || strings.Contains(s, ":user-content-")
17+
return strings.HasPrefix(s, "user-content-") || strings.Contains(s, ":user-content-") || isAnchorIDFootnote(s)
1818
}
1919

2020
func isAnchorIDFootnote(s string) bool {
@@ -34,7 +34,10 @@ func isHeadingTag(node *html.Node) bool {
3434
}
3535

3636
// getNodeText extracts the text content from a node and its children
37-
func getNodeText(node *html.Node) string {
37+
func getNodeText(node *html.Node, cached **string) string {
38+
if *cached != nil {
39+
return **cached
40+
}
3841
var text strings.Builder
3942
var extractText func(*html.Node)
4043
extractText = func(n *html.Node) {
@@ -46,36 +49,56 @@ func getNodeText(node *html.Node) string {
4649
}
4750
}
4851
extractText(node)
49-
return text.String()
52+
textStr := text.String()
53+
*cached = &textStr
54+
return textStr
5055
}
5156

52-
func processNodeAttrID(ctx *RenderContext, node *html.Node) {
57+
func processNodeHeadingAndID(ctx *RenderContext, node *html.Node) {
58+
// TODO: handle duplicate IDs, need to track existing IDs in the document
5359
// Add user-content- to IDs and "#" links if they don't already have them,
5460
// and convert the link href to a relative link to the host root
55-
hasID := false
61+
attrIDVal := ""
5662
for idx, attr := range node.Attr {
5763
if attr.Key == "id" {
58-
hasID = true
59-
if !isAnchorIDUserContent(attr.Val) {
60-
node.Attr[idx].Val = "user-content-" + attr.Val
64+
attrIDVal = attr.Val
65+
if !isAnchorIDUserContent(attrIDVal) {
66+
attrIDVal = "user-content-" + attrIDVal
67+
node.Attr[idx].Val = attrIDVal
6168
}
6269
}
6370
}
6471

72+
if !isHeadingTag(node) || !ctx.RenderOptions.EnableHeadingIDGeneration {
73+
return
74+
}
75+
6576
// For heading tags (h1-h6) without an id attribute, generate one from the text content.
6677
// This ensures HTML headings like <h1>Title</h1> get proper permalink anchors
6778
// matching the behavior of Markdown headings.
6879
// Only enabled for repository files and wiki pages via EnableHeadingIDGeneration option.
69-
if !hasID && isHeadingTag(node) && ctx.RenderOptions.EnableHeadingIDGeneration {
70-
text := getNodeText(node)
71-
if text != "" {
80+
var nodeTextCached *string
81+
if attrIDVal == "" {
82+
nodeText := getNodeText(node, &nodeTextCached)
83+
if nodeText != "" {
7284
// Use the same CleanValue function used by Markdown heading ID generation
73-
cleanedID := string(common.CleanValue([]byte(text)))
74-
if cleanedID != "" {
75-
node.Attr = append(node.Attr, html.Attribute{Key: "id", Val: "user-content-" + cleanedID})
85+
attrIDVal = string(common.CleanValue([]byte(nodeText)))
86+
if attrIDVal != "" {
87+
attrIDVal = "user-content-" + attrIDVal
88+
node.Attr = append(node.Attr, html.Attribute{Key: "id", Val: attrIDVal})
7689
}
7790
}
7891
}
92+
if ctx.TocShowInSection != "" {
93+
nodeText := getNodeText(node, &nodeTextCached)
94+
if nodeText != "" && attrIDVal != "" {
95+
ctx.TocHeadingItems = append(ctx.TocHeadingItems, &TocHeadingItem{
96+
HeadingLevel: int(node.Data[1] - '0'),
97+
AnchorID: attrIDVal,
98+
InnerText: nodeText,
99+
})
100+
}
101+
}
79102
}
80103

81104
func processFootnoteNode(ctx *RenderContext, node *html.Node) {

modules/markup/html_toc_test.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// Copyright 2026 The Gitea Authors. All rights reserved.
2+
// SPDX-License-Identifier: MIT
3+
4+
package markup_test
5+
6+
import (
7+
"regexp"
8+
"testing"
9+
10+
"code.gitea.io/gitea/modules/markup"
11+
"code.gitea.io/gitea/modules/markup/markdown"
12+
"code.gitea.io/gitea/modules/test"
13+
14+
"github.com/stretchr/testify/assert"
15+
)
16+
17+
func TestToCWithHTML(t *testing.T) {
18+
defer test.MockVariableValue(&markup.RenderBehaviorForTesting.DisableAdditionalAttributes, true)()
19+
20+
t1 := `tag <a href="link">link</a> and <b>Bold</b>`
21+
t2 := "code block `<a>`"
22+
t3 := "markdown **bold**"
23+
input := `---
24+
include_toc: true
25+
---
26+
27+
# ` + t1 + `
28+
## ` + t2 + `
29+
#### ` + t3 + `
30+
## last
31+
`
32+
33+
renderCtx := markup.NewTestRenderContext().WithEnableHeadingIDGeneration(true)
34+
resultHTML, err := markdown.RenderString(renderCtx, input)
35+
assert.NoError(t, err)
36+
result := string(resultHTML)
37+
re := regexp.MustCompile(`(?s)<details class="frontmatter-content">.*?</details>`)
38+
result = re.ReplaceAllString(result, "\n")
39+
expected := `<details><summary>toc</summary>
40+
<ul>
41+
<li><a href="#user-content-tag-link-and-bold" rel="nofollow">tag link and Bold</a></li>
42+
<ul>
43+
<li><a href="#user-content-code-block-a" rel="nofollow">code block &lt;a&gt;</a></li>
44+
<ul>
45+
<ul>
46+
<li><a href="#user-content-markdown-bold" rel="nofollow">markdown bold</a></li>
47+
</ul>
48+
</ul>
49+
<li><a href="#user-content-last" rel="nofollow">last</a></li>
50+
</ul>
51+
</ul>
52+
</details>
53+
54+
<h1 id="user-content-tag-link-and-bold">tag <a href="/link" rel="nofollow">link</a> and <b>Bold</b></h1>
55+
<h2 id="user-content-code-block-a">code block <code>&lt;a&gt;</code></h2>
56+
<h4 id="user-content-markdown-bold">markdown <strong>bold</strong></h4>
57+
<h2 id="user-content-last">last</h2>
58+
`
59+
assert.Equal(t, expected, result)
60+
}

modules/markup/markdown/goldmark.go

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,10 @@ func (g *ASTTransformer) applyElementDir(n ast.Node) {
4141
// Transform transforms the given AST tree.
4242
func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
4343
firstChild := node.FirstChild()
44-
tocMode := ""
4544
ctx := pc.Get(renderContextKey).(*markup.RenderContext)
4645
rc := pc.Get(renderConfigKey).(*RenderConfig)
4746

48-
tocList := make([]Header, 0, 20)
47+
tocMode := ""
4948
if rc.yamlNode != nil {
5049
metaNode := rc.toMetaNode(g)
5150
if metaNode != nil {
@@ -60,8 +59,6 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
6059
}
6160

6261
switch v := n.(type) {
63-
case *ast.Heading:
64-
g.transformHeading(ctx, v, reader, &tocList)
6562
case *ast.Paragraph:
6663
g.applyElementDir(v)
6764
case *ast.List:
@@ -79,19 +76,18 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
7976
return ast.WalkContinue, nil
8077
})
8178

82-
showTocInMain := tocMode == "true" /* old behavior, in main view */ || tocMode == "main"
83-
showTocInSidebar := !showTocInMain && tocMode != "false" // not hidden, not main, then show it in sidebar
84-
if len(tocList) > 0 && (showTocInMain || showTocInSidebar) {
85-
if showTocInMain {
86-
tocNode := createTOCNode(tocList, rc.Lang, nil)
87-
node.InsertBefore(node, firstChild, tocNode)
88-
} else {
89-
tocNode := createTOCNode(tocList, rc.Lang, map[string]string{"open": "open"})
90-
ctx.SidebarTocNode = tocNode
79+
if ctx.RenderOptions.EnableHeadingIDGeneration {
80+
showTocInMain := tocMode == "true" /* old behavior, in main view */ || tocMode == "main"
81+
showTocInSidebar := !showTocInMain && tocMode != "false" // not hidden, not main, then show it in sidebar
82+
switch {
83+
case showTocInMain:
84+
ctx.TocShowInSection = markup.TocShowInMain
85+
case showTocInSidebar:
86+
ctx.TocShowInSection = markup.TocShowInSidebar
9187
}
9288
}
9389

94-
if len(rc.Lang) > 0 {
90+
if rc.Lang != "" {
9591
node.SetAttributeString("lang", []byte(rc.Lang))
9692
}
9793
}

0 commit comments

Comments
 (0)