Skip to content

Commit b858d58

Browse files
authored
fix: UTF-8 filename truncation (#1272)
## What? - Add UTF-8-safe truncation for sanitized attachment filenames - Preserve filename extensions while truncating the base name on valid rune boundaries - Add regression coverage for long CJK and emoji filenames ## Why? The previous byte-slicing logic could cut through a multi-byte UTF-8 sequence when shortening long attachment filenames, producing invalid UTF-8 for names containing CJK characters or emoji. Fixes #1102.
1 parent 6852183 commit b858d58

2 files changed

Lines changed: 59 additions & 2 deletions

File tree

main.go

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"strings"
2525
"sync"
2626
"time"
27+
"unicode/utf8"
2728

2829
tea "charm.land/bubbletea/v2"
2930
"github.com/floatpane/matcha/backend"
@@ -3008,13 +3009,29 @@ func sanitizeFilename(name string) string {
30083009
if len(name) > maxFilenameLen {
30093010
ext := filepath.Ext(name)
30103011
if len(ext) > maxFilenameLen {
3011-
ext = ext[:maxFilenameLen]
3012+
ext = truncateUTF8(ext, maxFilenameLen)
30123013
}
3013-
name = name[:maxFilenameLen-len(ext)] + ext
3014+
base := strings.TrimSuffix(name, ext)
3015+
name = truncateUTF8(base, maxFilenameLen-len(ext)) + ext
30143016
}
30153017
return name
30163018
}
30173019

3020+
func truncateUTF8(s string, maxBytes int) string {
3021+
if maxBytes <= 0 {
3022+
return ""
3023+
}
3024+
if len(s) <= maxBytes {
3025+
return s
3026+
}
3027+
s = s[:maxBytes]
3028+
for !utf8.ValidString(s) {
3029+
_, size := utf8.DecodeLastRuneInString(s)
3030+
s = s[:len(s)-size]
3031+
}
3032+
return s
3033+
}
3034+
30183035
func downloadAttachmentCmd(account *config.Account, uid uint32, msg tui.DownloadAttachmentMsg) tea.Cmd {
30193036
return func() tea.Msg {
30203037
// Download and decode the attachment using encoding provided in msg.Encoding.

main_test.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
package main
2+
3+
import (
4+
"path/filepath"
5+
"strings"
6+
"testing"
7+
"unicode/utf8"
8+
)
9+
10+
func TestSanitizeFilenameTruncatesCJKOnUTF8Boundary(t *testing.T) {
11+
name := strings.Repeat("文", 100) + ".txt"
12+
13+
got := sanitizeFilename(name)
14+
15+
if !utf8.ValidString(got) {
16+
t.Fatalf("sanitizeFilename returned invalid UTF-8: %q", got)
17+
}
18+
if len(got) > 255 {
19+
t.Fatalf("sanitizeFilename returned %d bytes, want at most 255", len(got))
20+
}
21+
if filepath.Ext(got) != ".txt" {
22+
t.Fatalf("sanitizeFilename lost extension: got %q", got)
23+
}
24+
}
25+
26+
func TestSanitizeFilenameTruncatesEmojiOnUTF8Boundary(t *testing.T) {
27+
name := strings.Repeat("🚀", 80) + ".log"
28+
29+
got := sanitizeFilename(name)
30+
31+
if !utf8.ValidString(got) {
32+
t.Fatalf("sanitizeFilename returned invalid UTF-8: %q", got)
33+
}
34+
if len(got) > 255 {
35+
t.Fatalf("sanitizeFilename returned %d bytes, want at most 255", len(got))
36+
}
37+
if filepath.Ext(got) != ".log" {
38+
t.Fatalf("sanitizeFilename lost extension: got %q", got)
39+
}
40+
}

0 commit comments

Comments
 (0)