Skip to content

Commit efa8a08

Browse files
kolkovclaude
andcommitted
feat: enhance Asian character detection per iblea's suggestion
Improved Unicode width calculation for Korean and Japanese characters by adding dedicated checkAsianCharacter helper function. Changes: - Add checkAsianCharacter() with comprehensive Korean/Japanese ranges: * Korean Hangul (unicode.Hangul) * Korean Hangul Jamo (0x1100-0x11FF) * Korean Hangul Compatibility Jamo (0x3130-0x318F) * Enclosed CJK Letters (0x3200-0x32FF) * Japanese Hiragana (unicode.Hiragana) * Japanese Katakana (unicode.Katakana) - Add Miscellaneous Technical emoji range (0x2300-0x23FF) for clock symbols and similar emoji - Add comprehensive tests for Korean/Japanese character detection - Add TestCheckAsianCharacter for validating the helper function Credit: Implementation based on iblea's code review suggestion on PR #563 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent 5647ced commit efa8a08

File tree

2 files changed

+47
-2
lines changed

2 files changed

+47
-2
lines changed

size.go

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,20 @@ func stringWidth(s string) int {
6060
return ansiWidth
6161
}
6262

63+
// checkAsianCharacter checks if the character is an Asian character (character of 2 width)
64+
func checkAsianCharacter(r rune) bool {
65+
if unicode.Is(unicode.Han, r) || // CJK characters
66+
unicode.Is(unicode.Hangul, r) || // Korean Hangul characters
67+
(r >= 0x3130 && r <= 0x318F) || // Hangul Compatibility Jamo (ㄱ-ㅎ, ㅏ-ㅣ)
68+
(r >= 0x1100 && r <= 0x11FF) || // Korean Hangul Jamo (ㄱ-ㅎ, ㅏ-ㅣ)
69+
(r >= 0x3200 && r <= 0x32FF) || // Enclosed CJK Letters and Months
70+
unicode.Is(unicode.Hiragana, r) || // Japanese Hiragana characters
71+
unicode.Is(unicode.Katakana, r) { // Japanese Katakana characters
72+
return true
73+
}
74+
return false
75+
}
76+
6377
// containsComplexUnicode checks if string contains emoji or complex Unicode
6478
func containsComplexUnicode(s string) bool {
6579
for _, r := range s {
@@ -68,10 +82,10 @@ func containsComplexUnicode(s string) bool {
6882
(r >= 0x1F300 && r <= 0x1F5FF) || // Misc Symbols and Pictographs
6983
(r >= 0x1F680 && r <= 0x1F6FF) || // Transport and Map Symbols
7084
(r >= 0x1F700 && r <= 0x1F77F) || // Alchemical Symbols
85+
(r >= 0x2300 && r <= 0x23FF) || // Miscellaneous Technical (clocks, etc.)
7186
(r >= 0x2600 && r <= 0x26FF) || // Miscellaneous Symbols
7287
(r >= 0x2700 && r <= 0x27BF) || // Dingbats
73-
unicode.Is(unicode.Han, r) || // CJK characters
74-
r > 0x3000 { // Other wide characters
88+
checkAsianCharacter(r) { // Asian characters (CJK, Korean, Japanese)
7589
return true
7690
}
7791
}

size_emoji_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,11 @@ func TestComplexUnicodeDetection(t *testing.T) {
6969
{"中文", true, "Chinese characters"},
7070
{"Hello World", false, "ASCII with space"},
7171
{"测试 Test", true, "Mixed Chinese and ASCII"},
72+
{"안녕하세요", true, "Korean Hangul"},
73+
{"こんにちは", true, "Japanese Hiragana"},
74+
{"カタカナ", true, "Japanese Katakana"},
75+
{"한글 Test", true, "Mixed Korean and ASCII"},
76+
{"ひらがな Test", true, "Mixed Japanese Hiragana and ASCII"},
7277
}
7378

7479
for _, tt := range tests {
@@ -79,4 +84,30 @@ func TestComplexUnicodeDetection(t *testing.T) {
7984
}
8085
})
8186
}
87+
}
88+
89+
func TestCheckAsianCharacter(t *testing.T) {
90+
tests := []struct {
91+
input rune
92+
expected bool
93+
name string
94+
}{
95+
{'A', false, "ASCII letter"},
96+
{'中', true, "Chinese character"},
97+
{'한', true, "Korean Hangul"},
98+
{'ㄱ', true, "Korean Jamo"},
99+
{'あ', true, "Japanese Hiragana"},
100+
{'カ', true, "Japanese Katakana"},
101+
{'1', false, "ASCII digit"},
102+
{' ', false, "Space"},
103+
}
104+
105+
for _, tt := range tests {
106+
t.Run(tt.name, func(t *testing.T) {
107+
got := checkAsianCharacter(tt.input)
108+
if got != tt.expected {
109+
t.Errorf("checkAsianCharacter(%q) = %v, want %v", tt.input, got, tt.expected)
110+
}
111+
})
112+
}
82113
}

0 commit comments

Comments
 (0)