Skip to content

Commit 3d613c8

Browse files
committed
cleanup unicode codepoint helper functions
1 parent 13341d1 commit 3d613c8

File tree

2 files changed

+41
-32
lines changed

2 files changed

+41
-32
lines changed

src/terminal/terminal.cc

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ void Emulator::execute( const Parser::Execute* act )
5757

5858
bool Emulator::try_combine( wchar_t ch, int chwidth )
5959
{
60-
bool zero_width = chwidth == 0;
60+
bool zero_width = chwidth == 0 || is_unicode_zwj( ch );
6161
bool force_wide = ch == 0xFE0F; // VS16
6262
static constexpr std::string_view zwj = "\u200D";
6363

@@ -128,9 +128,13 @@ void Emulator::print( const Parser::Print* act )
128128

129129
/*
130130
* Check for printing ISO 8859-1 first, it's a cheap way to detect
131-
* some common narrow characters.
131+
* some common narrow characters. Otherwise, check for unicode width overrides that
132+
* wcwidth doesn't report as a width of 2 but should be treated that way anyway
132133
*/
133-
const int chwidth = ch == L'\0' ? -1 : ( Cell::isprint_iso8859_1( ch ) ? 1 : mosh_wcwidth( ch ) );
134+
const int chwidth = ch == L'\0' ? -1
135+
: Cell::isprint_iso8859_1( ch ) ? 1
136+
: is_unicode_wide_override( ch ) ? 2
137+
: mosh_wcwidth( ch );
134138

135139
// attempt to combine with previous cell if necessary
136140
if ( !Cell::isprint_iso8859_1( ch ) && try_combine( ch, chwidth ) ) {

src/util/char_utils.h

Lines changed: 34 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,40 @@
55

66
static int mosh_wcwidth( uint32_t c )
77
{
8-
// ZWJ is a combining character
9-
if ( c == 0x0200D ) {
10-
return 0;
8+
int width = widechar_wcwidth( c );
9+
if ( width >= 0 ) {
10+
return width;
1111
}
1212

13+
/* https://github.com/ridiculousfish/widecharwidth/tree/master#c-usage */
14+
switch ( width ) {
15+
case widechar_nonprint:
16+
return -1;
17+
case widechar_combining:
18+
return 0;
19+
case widechar_ambiguous:
20+
return 1;
21+
case widechar_private_use:
22+
return 1;
23+
case widechar_unassigned:
24+
return 1;
25+
case widechar_non_character:
26+
return -1;
27+
case widechar_widened_in_9:
28+
return 2;
29+
default:
30+
return -1;
31+
}
32+
}
33+
34+
static bool is_unicode_zwj( uint32_t c )
35+
{
36+
// ZWJ is a combining character
37+
return c == 0x0200D;
38+
}
39+
40+
static bool is_unicode_wide_override( uint32_t c )
41+
{
1342
// regional indicators are wide
1443
switch ( c ) {
1544
case 0x1F1E6:
@@ -38,33 +67,9 @@ static int mosh_wcwidth( uint32_t c )
3867
case 0x1F1FD:
3968
case 0x1F1FE:
4069
case 0x1F1FF:
41-
return 2;
42-
}
43-
44-
int width = widechar_wcwidth( c );
45-
if ( width >= 0 ) {
46-
return width;
47-
}
48-
49-
/* https://github.com/ridiculousfish/widecharwidth/tree/master#c-usage */
50-
switch ( width ) {
51-
case widechar_nonprint:
52-
return -1;
53-
case widechar_combining:
54-
return 0;
55-
case widechar_ambiguous:
56-
return 1;
57-
case widechar_private_use:
58-
return 1;
59-
case widechar_unassigned:
60-
return 1;
61-
case widechar_non_character:
62-
return -1;
63-
case widechar_widened_in_9:
64-
return 2;
65-
default:
66-
return -1;
70+
return true;
6771
}
72+
return false;
6873
}
6974

7075
static bool is_unicode_modifier( wchar_t ch )

0 commit comments

Comments
 (0)