Skip to content

Commit 3754e9d

Browse files
authored
Improve diff highlighting (#36583)
1 parent fd89cee commit 3754e9d

File tree

2 files changed

+132
-48
lines changed

2 files changed

+132
-48
lines changed

services/gitdiff/highlightdiff.go

Lines changed: 86 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ import (
77
"bytes"
88
"html/template"
99
"strings"
10+
"unicode/utf8"
11+
12+
"code.gitea.io/gitea/modules/util"
1013

1114
"github.com/sergi/go-diff/diffmatchpatch"
1215
)
@@ -84,6 +87,10 @@ type highlightCodeDiff struct {
8487
tokenPlaceholderMap map[string]rune
8588

8689
placeholderOverflowCount int
90+
91+
diffCodeAddedOpen rune
92+
diffCodeRemovedOpen rune
93+
diffCodeClose rune
8794
}
8895

8996
func newHighlightCodeDiff() *highlightCodeDiff {
@@ -155,11 +162,12 @@ func (hcd *highlightCodeDiff) diffLineWithHighlight(lineType DiffLineType, codeA
155162

156163
buf := bytes.NewBuffer(nil)
157164

158-
addedCodePrefix := hcd.registerTokenAsPlaceholder(`<span class="added-code">`)
159-
addedCodeSuffix := hcd.registerTokenAsPlaceholder(`</span><!-- added-code -->`)
160-
161-
removedCodePrefix := hcd.registerTokenAsPlaceholder(`<span class="removed-code">`)
162-
removedCodeSuffix := hcd.registerTokenAsPlaceholder(`</span><!-- removed-code -->`)
165+
if hcd.diffCodeClose == 0 {
166+
// tests can pre-set the placeholders
167+
hcd.diffCodeAddedOpen = hcd.registerTokenAsPlaceholder(`<span class="added-code">`)
168+
hcd.diffCodeRemovedOpen = hcd.registerTokenAsPlaceholder(`<span class="removed-code">`)
169+
hcd.diffCodeClose = hcd.registerTokenAsPlaceholder(`</span><!-- diff-code-close -->`)
170+
}
163171

164172
equalPartSpaceOnly := true
165173
for _, diff := range diffs {
@@ -173,21 +181,21 @@ func (hcd *highlightCodeDiff) diffLineWithHighlight(lineType DiffLineType, codeA
173181

174182
// only add "added"/"removed" tags when needed:
175183
// * non-space contents appear in the DiffEqual parts (not a full-line add/del)
176-
// * placeholder map still works (not exhausted, can get removedCodeSuffix)
177-
addDiffTags := !equalPartSpaceOnly && removedCodeSuffix != 0
184+
// * placeholder map still works (not exhausted, can get the closing tag placeholder)
185+
addDiffTags := !equalPartSpaceOnly && hcd.diffCodeClose != 0
178186
if addDiffTags {
179187
for _, diff := range diffs {
180188
switch {
181189
case diff.Type == diffmatchpatch.DiffEqual:
182190
buf.WriteString(diff.Text)
183191
case diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd:
184-
buf.WriteRune(addedCodePrefix)
192+
buf.WriteRune(hcd.diffCodeAddedOpen)
185193
buf.WriteString(diff.Text)
186-
buf.WriteRune(addedCodeSuffix)
194+
buf.WriteRune(hcd.diffCodeClose)
187195
case diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel:
188-
buf.WriteRune(removedCodePrefix)
196+
buf.WriteRune(hcd.diffCodeRemovedOpen)
189197
buf.WriteString(diff.Text)
190-
buf.WriteRune(removedCodeSuffix)
198+
buf.WriteRune(hcd.diffCodeClose)
191199
}
192200
}
193201
} else {
@@ -203,12 +211,17 @@ func (hcd *highlightCodeDiff) diffLineWithHighlight(lineType DiffLineType, codeA
203211
}
204212

205213
func (hcd *highlightCodeDiff) registerTokenAsPlaceholder(token string) rune {
214+
recovered := token
215+
if token[0] == '<' && token[1] != '<' {
216+
// when recovering a single tag, only use the tag itself, ignore the trailing comment (for how the comment is generated, see the code in `convert` function)
217+
recovered = token[:strings.IndexByte(token, '>')+1]
218+
}
206219
placeholder, ok := hcd.tokenPlaceholderMap[token]
207220
if !ok {
208221
placeholder = hcd.nextPlaceholder()
209222
if placeholder != 0 {
210223
hcd.tokenPlaceholderMap[token] = placeholder
211-
hcd.placeholderTokenMap[placeholder] = token
224+
hcd.placeholderTokenMap[placeholder] = recovered
212225
}
213226
}
214227
return placeholder
@@ -285,49 +298,77 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlContent template.HTML) s
285298
return res.String()
286299
}
287300

301+
func (hcd *highlightCodeDiff) extractNextPlaceholder(buf []byte, lastIdx int) (idx int, placeholder rune, runeLen int, token string) {
302+
for idx = lastIdx; idx < len(buf); {
303+
placeholder, runeLen = utf8.DecodeRune(buf[idx:])
304+
if token = hcd.placeholderTokenMap[placeholder]; token != "" {
305+
break
306+
}
307+
idx += runeLen
308+
}
309+
return idx, placeholder, runeLen, token
310+
}
311+
288312
func (hcd *highlightCodeDiff) recoverOneDiff(str string) template.HTML {
289313
sb := strings.Builder{}
290314
var tagStack []string
315+
var diffCodeOpenTag string
316+
diffCodeCloseTag := hcd.placeholderTokenMap[hcd.diffCodeClose]
317+
strBytes := util.UnsafeStringToBytes(str)
318+
for idx := 0; idx < len(strBytes); {
319+
newIdx, placeholder, lastRuneLen, token := hcd.extractNextPlaceholder(strBytes, idx)
320+
if newIdx != idx {
321+
if diffCodeOpenTag != "" {
322+
sb.WriteString(diffCodeOpenTag)
323+
sb.Write(strBytes[idx:newIdx])
324+
sb.WriteString(diffCodeCloseTag)
325+
} else {
326+
sb.Write(strBytes[idx:newIdx])
327+
}
328+
} // else: no text content before, the current token is a placeholder
329+
if token == "" {
330+
break // reaches the string end, no more placeholder
331+
}
332+
idx = newIdx + lastRuneLen
291333

292-
for _, r := range str {
293-
token, ok := hcd.placeholderTokenMap[r]
294-
if !ok || token == "" {
295-
sb.WriteRune(r) // if the rune is not a placeholder, write it as it is
334+
// for HTML entity
335+
if token[0] == '&' {
336+
sb.WriteString(token)
296337
continue
297338
}
298-
var tokenToRecover string
299-
if strings.HasPrefix(token, "</") { // for closing tag
300-
// only get the tag itself, ignore the trailing comment (for how the comment is generated, see the code in `convert` function)
301-
tokenToRecover = token[:strings.IndexByte(token, '>')+1]
302-
if len(tagStack) == 0 {
303-
continue // if no opening tag in stack yet, skip the closing tag
304-
}
305-
tagStack = tagStack[:len(tagStack)-1]
306-
} else if token[0] == '<' { // for HTML tag
307-
if token[1] == '<' {
308-
// full tag `<<span>content</span>>`, recover to `<span>content</span>`
309-
tokenToRecover = token[1 : len(token)-1]
339+
340+
// for various HTML tags
341+
var recovered string
342+
if token[1] == '<' { // full tag `<<span>content</span>>`, recover to `<span>content</span>`
343+
recovered = token[1 : len(token)-1]
344+
if diffCodeOpenTag != "" {
345+
recovered = diffCodeOpenTag + recovered + diffCodeCloseTag
346+
} // else: just use the recovered content
347+
} else if token[1] != '/' { // opening tag
348+
if placeholder == hcd.diffCodeAddedOpen || placeholder == hcd.diffCodeRemovedOpen {
349+
diffCodeOpenTag = token
310350
} else {
311-
// opening tag
312-
tokenToRecover = token
313-
tagStack = append(tagStack, token)
351+
recovered = token
352+
tagStack = append(tagStack, recovered)
314353
}
315-
} else if token[0] == '&' { // for HTML entity
316-
tokenToRecover = token
317-
} // else: impossible
318-
sb.WriteString(tokenToRecover)
354+
} else { // closing tag
355+
if placeholder == hcd.diffCodeClose {
356+
diffCodeOpenTag = "" // the highlighted diff is closed, no more diff
357+
} else if len(tagStack) != 0 {
358+
recovered = token
359+
tagStack = tagStack[:len(tagStack)-1]
360+
} // else: if no opening tag in stack yet, skip the closing tag
361+
}
362+
sb.WriteString(recovered)
319363
}
320364

321-
if len(tagStack) > 0 {
322-
// close all opening tags
323-
for i := len(tagStack) - 1; i >= 0; i-- {
324-
tagToClose := tagStack[i]
325-
// get the closing tag "</span>" from "<span class=...>" or "<span>"
326-
pos := strings.IndexAny(tagToClose, " >")
327-
if pos != -1 {
328-
sb.WriteString("</" + tagToClose[1:pos] + ">")
329-
} // else: impossible. every tag was pushed into the stack by the code above and is valid HTML opening tag
330-
}
365+
// close all opening tags
366+
for i := len(tagStack) - 1; i >= 0; i-- {
367+
tagToClose := tagStack[i]
368+
// get the closing tag "</span>" from "<span class=...>" or "<span>"
369+
pos := strings.IndexAny(tagToClose, " >")
370+
// pos must be positive, because the tags were pushed by us
371+
sb.WriteString("</" + tagToClose[1:pos] + ">")
331372
}
332373
return template.HTML(sb.String())
333374
}

services/gitdiff/highlightdiff_test.go

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ func TestDiffWithHighlight(t *testing.T) {
6464
outDel = hcd.diffLineWithHighlight(DiffLineDel, codeA, codeB)
6565
assert.Equal(t, `<span>line1</span>`+"\n"+`<span class="removed-code"><span>line2</span></span>`, string(outDel))
6666
outAdd = hcd.diffLineWithHighlight(DiffLineAdd, codeA, codeB)
67-
assert.Equal(t, `<span>line1</span>`+"\n"+`<span class="added-code"><span>line!</span></span>`, string(outAdd))
67+
assert.Equal(t, `<span>line1</span>`+"\n"+`<span><span class="added-code">line!</span></span>`, string(outAdd))
6868
})
6969

7070
t.Run("OpenCloseTags", func(t *testing.T) {
@@ -75,12 +75,55 @@ func TestDiffWithHighlight(t *testing.T) {
7575
assert.Empty(t, string(hcd.recoverOneDiff("C")))
7676
})
7777

78-
t.Run("ComplexDiff", func(t *testing.T) {
78+
t.Run("ComplexDiff1", func(t *testing.T) {
7979
oldCode, _ := highlight.RenderCodeFast("a.go", "Go", `xxx || yyy`)
8080
newCode, _ := highlight.RenderCodeFast("a.go", "Go", `bot.xxx || bot.yyy`)
8181
hcd := newHighlightCodeDiff()
8282
out := hcd.diffLineWithHighlight(DiffLineAdd, oldCode, newCode)
83-
assert.Equal(t, `<span class="added-code"><span class="nx">bot</span><span class="p">.</span></span><span class="nx">xxx</span><span class="w"> </span><span class="o">||</span><span class="w"> </span><span class="added-code"><span class="nx">bot</span><span class="p">.</span></span><span class="nx">yyy</span>`, string(out))
83+
assert.Equal(t, strings.ReplaceAll(`
84+
<span class="added-code"><span class="nx">bot</span></span>
85+
<span class="added-code"><span class="p">.</span></span>
86+
<span class="nx">xxx</span><span class="w"> </span><span class="o">||</span><span class="w"> </span>
87+
<span class="added-code"><span class="nx">bot</span></span>
88+
<span class="added-code"><span class="p">.</span></span>
89+
<span class="nx">yyy</span>`, "\n", ""), string(out))
90+
})
91+
92+
forceTokenAsPlaceholder := func(hcd *highlightCodeDiff, r rune, token string) rune {
93+
// for testing purpose only
94+
hcd.tokenPlaceholderMap[token] = r
95+
hcd.placeholderTokenMap[r] = token
96+
return r
97+
}
98+
99+
t.Run("ComplexDiff2", func(t *testing.T) {
100+
// When running "diffLineWithHighlight", the newly inserted "added-code", and "removed-code" tags may break the original layout.
101+
// The newly inserted tags can appear in any position, because the "diff" algorithm can make outputs like:
102+
// * Equal: <span>
103+
// * Insert: xx</span><span>yy
104+
// * Equal: zz</span>
105+
// Then the newly inserted tags will make this output, the tags mismatch.
106+
// * <span> <added>xx</span><span>yy</added> zz</span>
107+
// So we need to fix it to:
108+
// * <span> <added>xx</added></span> <span><added>yy</added> zz</span>
109+
hcd := newHighlightCodeDiff()
110+
hcd.diffCodeAddedOpen = forceTokenAsPlaceholder(hcd, '[', "<add>")
111+
hcd.diffCodeClose = forceTokenAsPlaceholder(hcd, ']', "</add>")
112+
forceTokenAsPlaceholder(hcd, '{', "<T>")
113+
forceTokenAsPlaceholder(hcd, '}', "</T>")
114+
assert.Equal(t, `aa<T>xx<add>yy</add>zz</T>bb`, string(hcd.recoverOneDiff("aa{xx[yy]zz}bb")))
115+
assert.Equal(t, `aa<add>xx</add><T><add>yy</add></T><add>zz</add>bb`, string(hcd.recoverOneDiff("aa[xx{yy}zz]bb")))
116+
assert.Equal(t, `aa<T>xx<add>yy</add></T><add>zz</add>bb`, string(hcd.recoverOneDiff("aa{xx[yy}zz]bb")))
117+
assert.Equal(t, `aa<add>xx</add><T><add>yy</add>zz</T>bb`, string(hcd.recoverOneDiff("aa[xx{yy]zz}bb")))
118+
assert.Equal(t, `aa<add>xx</add><T><add>yy</add><add>zz</add></T><add>bb</add>cc`, string(hcd.recoverOneDiff("aa[xx{yy][zz}bb]cc")))
119+
120+
// And do a simple test for "diffCodeRemovedOpen", it shares the same logic as "diffCodeAddedOpen"
121+
hcd = newHighlightCodeDiff()
122+
hcd.diffCodeRemovedOpen = forceTokenAsPlaceholder(hcd, '[', "<del>")
123+
hcd.diffCodeClose = forceTokenAsPlaceholder(hcd, ']', "</del>")
124+
forceTokenAsPlaceholder(hcd, '{', "<T>")
125+
forceTokenAsPlaceholder(hcd, '}', "</T>")
126+
assert.Equal(t, `aa<del>xx</del><T><del>yy</del><del>zz</del></T><del>bb</del>cc`, string(hcd.recoverOneDiff("aa[xx{yy][zz}bb]cc")))
84127
})
85128
}
86129

0 commit comments

Comments
 (0)