Skip to content

Commit 4c049cf

Browse files
committed
unicode: overhaul the way unicode codepoints are combined
Not all unicode codepoints that should be combined into previous codepoints to create new graphemes are considered zero width. Specifically, regional indicators by themselves are valid graphemes, but when combined, they create various regional graphemes. Additionally, there are a few emoji modifiers (for skin tone) that are colors by themselves, but modify the previous grapheme if it is combinable and _become_ zero width if so. ZWJ (the zero-width joiner) is also not itself considered a combining character, but could possibly cause two non-zero-width codepoints to all be combined, and may increase the grapheme's width from one to two. VS16 can also modify a grapheme to force it to be rendered as an emoji, which can increase its width from one to two. All of this considering, the way mosh combines unicode codepoints into cells needed some work. Now, it'll always check if it's possible to combine with the previous cell and do it if it makes sense. This largely follows tmux's general way of looking for combining opportunities. Still a TODO is handling Hangul Jamo Korean combining which has its own rules.
1 parent 08280ad commit 4c049cf

File tree

5 files changed

+178
-71
lines changed

5 files changed

+178
-71
lines changed

src/terminal/terminal.cc

Lines changed: 73 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
#include <cassert>
3434
#include <cstdlib>
3535
#include <cstring>
36-
#include <typeinfo>
3736

3837
#include <unistd.h>
3938

@@ -56,6 +55,69 @@ void Emulator::execute( const Parser::Execute* act )
5655
dispatch.dispatch( CONTROL, act, &fb );
5756
}
5857

58+
bool Emulator::try_combine( wchar_t ch, int chwidth )
59+
{
60+
bool zero_width = chwidth == 0;
61+
bool force_wide = ch == 0xFE0F; // VS16
62+
63+
Cell* combining_cell = fb.get_combining_cell();
64+
if ( !combining_cell ) {
65+
return zero_width;
66+
}
67+
68+
if ( !zero_width ) {
69+
// TODO: check Hangul Jamo state here
70+
if ( is_unicode_modifier( ch ) ) {
71+
if ( combining_cell->size() < 2 ) {
72+
return false;
73+
}
74+
force_wide = true;
75+
} else if ( combining_cell->size() < 3
76+
|| combining_cell->get_contents().rfind( "\342\200\215" ) == std::string::npos ) {
77+
return false;
78+
}
79+
}
80+
81+
if ( combining_cell->empty() ) {
82+
/* cell starts with combining character */
83+
/* ... but isn't necessarily the target for a new
84+
base character [e.g. start of line], if the
85+
combining character has been cleared with
86+
a sequence like ED ("J") or EL ("K") */
87+
assert( !combining_cell->get_wide() );
88+
combining_cell->set_fallback( true );
89+
fb.ds.move_col( 1, true, true );
90+
}
91+
92+
if ( !combining_cell->full() ) {
93+
combining_cell->append( ch );
94+
95+
if ( force_wide && !combining_cell->get_wide() ) {
96+
// have to move this emoji to the next line
97+
if ( fb.ds.auto_wrap_mode && fb.ds.next_print_will_wrap ) {
98+
fb.get_mutable_row( -1 )->set_wrap( false );
99+
fb.ds.move_col( 0 );
100+
fb.move_rows_autoscroll( 1 );
101+
*fb.get_mutable_cell() = *combining_cell;
102+
fb.reset_cell( combining_cell );
103+
fb.ds.move_col( 1, true, true );
104+
combining_cell = fb.get_combining_cell();
105+
}
106+
combining_cell->set_wide( true );
107+
if ( fb.ds.insert_mode ) {
108+
fb.insert_cell( fb.ds.get_cursor_row(), fb.ds.get_cursor_col() );
109+
} else if ( fb.ds.get_cursor_col() < fb.ds.get_width() ) {
110+
fb.reset_cell( fb.get_mutable_cell() );
111+
}
112+
fb.get_mutable_cell()->set_wide_padding( true );
113+
fb.ds.move_col( 1, true, true );
114+
}
115+
return true;
116+
}
117+
118+
return false;
119+
}
120+
59121
void Emulator::print( const Parser::Print* act )
60122
{
61123
assert( act->char_present );
@@ -68,6 +130,11 @@ void Emulator::print( const Parser::Print* act )
68130
*/
69131
const int chwidth = ch == L'\0' ? -1 : ( Cell::isprint_iso8859_1( ch ) ? 1 : mosh_wcwidth( ch ) );
70132

133+
// attempt to combine with previous cell if necessary
134+
if ( !Cell::isprint_iso8859_1( ch ) && try_combine( ch, chwidth ) ) {
135+
return;
136+
}
137+
71138
Cell* this_cell = fb.get_mutable_cell();
72139

73140
switch ( chwidth ) {
@@ -109,54 +176,17 @@ void Emulator::print( const Parser::Print* act )
109176
fb.apply_renditions_to_cell( this_cell );
110177

111178
if ( chwidth == 2 && fb.ds.get_cursor_col() + 1 < fb.ds.get_width() ) { /* erase overlapped cell */
112-
fb.reset_cell( fb.get_mutable_cell( fb.ds.get_cursor_row(), fb.ds.get_cursor_col() + 1 ) );
179+
Cell* overlapped = fb.get_mutable_cell( fb.ds.get_cursor_row(), fb.ds.get_cursor_col() + 1 );
180+
fb.reset_cell( overlapped );
181+
overlapped->set_wide_padding( true );
113182
}
114183

115184
fb.ds.move_col( chwidth, true, true );
116185

117186
break;
118187
case 0: /* combining character */
119-
{
120-
Cell* combining_cell = fb.get_combining_cell(); /* can be null if we were resized */
121-
if ( combining_cell == NULL ) { /* character is now offscreen */
122-
break;
123-
}
124-
125-
if ( combining_cell->empty() ) {
126-
/* cell starts with combining character */
127-
/* ... but isn't necessarily the target for a new
128-
base character [e.g. start of line], if the
129-
combining character has been cleared with
130-
a sequence like ED ("J") or EL ("K") */
131-
assert( !combining_cell->get_wide() );
132-
combining_cell->set_fallback( true );
133-
fb.ds.move_col( 1, true, true );
134-
}
135-
if ( !combining_cell->full() ) {
136-
combining_cell->append( ch );
137-
// VS16 causes the previous codepoint to be rendered as its emoji representation
138-
// which could cause it to change from 1 to 2 characters wide
139-
if ( ch == 0xFE0F && !combining_cell->get_wide() ) {
140-
// have to move this emoji to the next line
141-
if ( fb.ds.auto_wrap_mode && fb.ds.next_print_will_wrap ) {
142-
fb.get_mutable_row( -1 )->set_wrap( false );
143-
fb.ds.move_col( 0 );
144-
fb.move_rows_autoscroll( 1 );
145-
*fb.get_mutable_cell() = *combining_cell;
146-
fb.reset_cell( combining_cell );
147-
fb.ds.move_col( 1, true, true );
148-
combining_cell = fb.get_combining_cell();
149-
}
150-
combining_cell->set_wide( true );
151-
if ( fb.ds.insert_mode ) {
152-
fb.insert_cell( fb.ds.get_cursor_row(), fb.ds.get_cursor_col() );
153-
} else if ( fb.ds.get_cursor_col() < fb.ds.get_width() ) {
154-
fb.reset_cell( fb.get_mutable_cell() );
155-
}
156-
fb.ds.move_col( 1, true, true );
157-
}
158-
}
159-
} break;
188+
// handled above
189+
break;
160190
case -1: /* unprintable character */
161191
break;
162192
default:

src/terminal/terminal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ class Emulator
7474
void OSC_end( const Parser::OSC_End* act );
7575
void resize( size_t s_width, size_t s_height );
7676

77+
bool try_combine(wchar_t ch, int chwidth);
78+
7779
public:
7880
Emulator( size_t s_width, size_t s_height );
7981

src/terminal/terminalframebuffer.cc

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@
3939
using namespace Terminal;
4040

4141
Cell::Cell( color_type background_color )
42-
: contents(), renditions( background_color ), wide( false ), fallback( false ), wrap( false )
42+
: contents(), renditions( background_color ), wide( false ), fallback( false ), wrap( false ),
43+
wide_padding( false )
4344
{}
4445

4546
void Cell::reset( color_type background_color )
@@ -49,6 +50,7 @@ void Cell::reset( color_type background_color )
4950
wide = false;
5051
fallback = false;
5152
wrap = false;
53+
wide_padding = false;
5254
}
5355

5456
void DrawState::reinitialize_tabs( unsigned int start )
@@ -60,9 +62,8 @@ void DrawState::reinitialize_tabs( unsigned int start )
6062
}
6163

6264
DrawState::DrawState( int s_width, int s_height )
63-
: width( s_width ), height( s_height ), cursor_col( 0 ), cursor_row( 0 ), combining_char_col( 0 ),
64-
combining_char_row( 0 ), default_tabs( true ), tabs( s_width ), scrolling_region_top_row( 0 ),
65-
scrolling_region_bottom_row( height - 1 ), renditions( 0 ), save(),
65+
: width( s_width ), height( s_height ), cursor_col( 0 ), cursor_row( 0 ), default_tabs( true ), tabs( s_width ),
66+
scrolling_region_top_row( 0 ), scrolling_region_bottom_row( height - 1 ), renditions( 0 ), save(),
6667
cursor_style( Terminal::CursorStyle::BLINKING_BLOCK ), next_print_will_wrap( false ), origin_mode( false ),
6768
auto_wrap_mode( true ), insert_mode( false ), cursor_visible( true ), reverse_video( false ),
6869
bracketed_paste( false ), mouse_reporting_mode( MOUSE_REPORTING_NONE ), mouse_focus_event( false ),
@@ -115,8 +116,6 @@ void Framebuffer::scroll( int N )
115116

116117
void DrawState::new_grapheme( void )
117118
{
118-
combining_char_col = cursor_col;
119-
combining_char_row = cursor_row;
120119
}
121120

122121
void DrawState::snap_cursor_to_border( void )
@@ -191,13 +190,23 @@ void Framebuffer::move_rows_autoscroll( int rows )
191190

192191
Cell* Framebuffer::get_combining_cell( void )
193192
{
194-
if ( ( ds.get_combining_char_col() < 0 ) || ( ds.get_combining_char_row() < 0 )
195-
|| ( ds.get_combining_char_col() >= ds.get_width() )
196-
|| ( ds.get_combining_char_row() >= ds.get_height() ) ) {
193+
int cursor_x = ds.get_cursor_col();
194+
if ( cursor_x < 1 ) {
197195
return NULL;
198-
} /* can happen if a resize came in between */
196+
}
197+
198+
int n = 1;
199+
Cell* last = get_mutable_cell( -1, cursor_x - n );
200+
if ( cursor_x != 1 && ( last->get_wide_padding() ) ) {
201+
n = 2;
202+
last = get_mutable_cell( -1, cursor_x - n );
203+
}
204+
205+
if ( last && ( last->get_width() != n || last->get_wide_padding() ) ) {
206+
return NULL;
207+
}
199208

200-
return get_mutable_cell( ds.get_combining_char_row(), ds.get_combining_char_col() );
209+
return last;
201210
}
202211

203212
void DrawState::set_tab( void )
@@ -441,11 +450,6 @@ void DrawState::resize( int s_width, int s_height )
441450
snap_cursor_to_border();
442451

443452
/* saved cursor will be snapped to border on restore */
444-
445-
/* invalidate combining char cell if necessary */
446-
if ( ( combining_char_col >= width ) || ( combining_char_row >= height ) ) {
447-
combining_char_col = combining_char_row = -1;
448-
}
449453
}
450454

451455
Renditions::Renditions( color_type s_background )
@@ -750,5 +754,10 @@ bool Cell::compare( const Cell& other ) const
750754
fprintf( stderr, "wrap: %d vs. %d\n", wrap, other.wrap );
751755
}
752756

757+
if ( wide_padding != other.wide_padding ) {
758+
ret = true;
759+
fprintf( stderr, "wide_padding: %d vs. %d\n", wide_padding, other.wide_padding );
760+
}
761+
753762
return ret;
754763
}

src/terminal/terminalframebuffer.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ class Cell
124124
unsigned int wide : 1; /* 0 = narrow, 1 = wide */
125125
unsigned int fallback : 1; /* first character is combining character */
126126
unsigned int wrap : 1;
127+
unsigned int wide_padding : 1;
127128

128129
private:
129130
Cell();
@@ -136,18 +137,20 @@ class Cell
136137
bool operator==( const Cell& x ) const
137138
{
138139
return ( ( contents == x.contents ) && ( fallback == x.fallback ) && ( wide == x.wide )
139-
&& ( renditions == x.renditions ) && ( wrap == x.wrap ) );
140+
&& ( renditions == x.renditions ) && ( wrap == x.wrap ) && ( wide_padding == x.wide_padding ) );
140141
}
141142

142143
bool operator!=( const Cell& x ) const { return !operator==( x ); }
143144

144145
/* Accessors for contents field */
145146
std::string debug_contents( void ) const;
146147

148+
std::string const& get_contents() const { return contents; }
147149
bool empty( void ) const { return contents.empty(); }
148150
/* 32 seems like a reasonable limit on combining characters */
149151
bool full( void ) const { return contents.size() >= 32; }
150152
void clear( void ) { contents.clear(); }
153+
std::size_t size() const { return contents.size(); }
151154

152155
bool is_blank( void ) const
153156
{
@@ -220,6 +223,8 @@ class Cell
220223
void set_renditions( const Renditions& r ) { renditions = r; }
221224
bool get_wide( void ) const { return wide; }
222225
void set_wide( bool w ) { wide = w; }
226+
bool get_wide_padding( void ) const { return wide_padding; }
227+
void set_wide_padding( bool w ) { wide_padding = w; }
223228
unsigned int get_width( void ) const { return wide + 1; }
224229
bool get_fallback( void ) const { return fallback; }
225230
void set_fallback( bool f ) { fallback = f; }
@@ -278,7 +283,6 @@ class DrawState
278283
void snap_cursor_to_border( void );
279284

280285
int cursor_col, cursor_row;
281-
int combining_char_col, combining_char_row;
282286

283287
bool default_tabs;
284288
std::vector<bool> tabs;
@@ -332,8 +336,6 @@ class DrawState
332336

333337
int get_cursor_col( void ) const { return cursor_col; }
334338
int get_cursor_row( void ) const { return cursor_row; }
335-
int get_combining_char_col( void ) const { return combining_char_col; }
336-
int get_combining_char_row( void ) const { return combining_char_row; }
337339
int get_width( void ) const { return width; }
338340
int get_height( void ) const { return height; }
339341

0 commit comments

Comments
 (0)