1
1
//! Text cursor changes/interaction, without modifying the text.
2
2
3
3
use epaint:: text:: { cursor:: CCursor , Galley } ;
4
+ use unicode_segmentation:: UnicodeSegmentation ;
4
5
5
6
use crate :: { epaint, NumExt , Rect , Response , Ui } ;
6
7
@@ -166,7 +167,7 @@ fn select_line_at(text: &str, ccursor: CCursor) -> CCursorRange {
166
167
167
168
pub fn ccursor_next_word ( text : & str , ccursor : CCursor ) -> CCursor {
168
169
CCursor {
169
- index : next_word_boundary_char_index ( text. chars ( ) , ccursor. index ) ,
170
+ index : next_word_boundary_char_index ( text, ccursor. index ) ,
170
171
prefer_next_row : false ,
171
172
}
172
173
}
@@ -180,9 +181,10 @@ fn ccursor_next_line(text: &str, ccursor: CCursor) -> CCursor {
180
181
181
182
pub fn ccursor_previous_word ( text : & str , ccursor : CCursor ) -> CCursor {
182
183
let num_chars = text. chars ( ) . count ( ) ;
184
+ let reversed: String = text. graphemes ( true ) . rev ( ) . collect ( ) ;
183
185
CCursor {
184
186
index : num_chars
185
- - next_word_boundary_char_index ( text . chars ( ) . rev ( ) , num_chars - ccursor. index ) ,
187
+ - next_word_boundary_char_index ( & reversed , num_chars - ccursor. index ) . min ( num_chars ) ,
186
188
prefer_next_row : true ,
187
189
}
188
190
}
@@ -196,22 +198,25 @@ fn ccursor_previous_line(text: &str, ccursor: CCursor) -> CCursor {
196
198
}
197
199
}
198
200
199
- fn next_word_boundary_char_index ( it : impl Iterator < Item = char > , mut index : usize ) -> usize {
200
- let mut it = it. skip ( index) ;
201
- if let Some ( _first) = it. next ( ) {
202
- index += 1 ;
203
-
204
- if let Some ( second) = it. next ( ) {
205
- index += 1 ;
206
- for next in it {
207
- if is_word_char ( next) != is_word_char ( second) {
208
- break ;
209
- }
210
- index += 1 ;
211
- }
201
+ fn next_word_boundary_char_index ( text : & str , index : usize ) -> usize {
202
+ for word in text. split_word_bound_indices ( ) {
203
+ // Splitting considers contiguous whitespace as one word, such words must be skipped,
204
+ // this handles cases for example ' abc' (a space and a word), the cursor is at the beginning
205
+ // (before space) - this jumps at the end of 'abc' (this is consistent with text editors
206
+ // or browsers)
207
+ let ci = char_index_from_byte_index ( text, word. 0 ) ;
208
+ if ci > index && !skip_word ( word. 1 ) {
209
+ return ci;
212
210
}
213
211
}
214
- index
212
+
213
+ char_index_from_byte_index ( text, text. len ( ) )
214
+ }
215
+
216
+ fn skip_word ( text : & str ) -> bool {
217
+ // skip words that contain anything other than alphanumeric characters and underscore
218
+ // (i.e. whitespace, dashes, etc.)
219
+ !text. chars ( ) . any ( |c| !is_word_char ( c) )
215
220
}
216
221
217
222
fn next_line_boundary_char_index ( it : impl Iterator < Item = char > , mut index : usize ) -> usize {
@@ -233,7 +238,7 @@ fn next_line_boundary_char_index(it: impl Iterator<Item = char>, mut index: usiz
233
238
}
234
239
235
240
pub fn is_word_char ( c : char ) -> bool {
236
- c. is_ascii_alphanumeric ( ) || c == '_'
241
+ c. is_alphanumeric ( ) || c == '_'
237
242
}
238
243
239
244
fn is_linebreak ( c : char ) -> bool {
@@ -270,6 +275,16 @@ pub fn byte_index_from_char_index(s: &str, char_index: usize) -> usize {
270
275
s. len ( )
271
276
}
272
277
278
+ pub fn char_index_from_byte_index ( input : & str , byte_index : usize ) -> usize {
279
+ for ( ci, ( bi, _) ) in input. char_indices ( ) . enumerate ( ) {
280
+ if bi == byte_index {
281
+ return ci;
282
+ }
283
+ }
284
+
285
+ input. char_indices ( ) . last ( ) . map_or ( 0 , |( i, _) | i + 1 )
286
+ }
287
+
273
288
pub fn slice_char_range ( s : & str , char_range : std:: ops:: Range < usize > ) -> & str {
274
289
assert ! (
275
290
char_range. start <= char_range. end,
@@ -293,3 +308,38 @@ pub fn cursor_rect(galley: &Galley, cursor: &CCursor, row_height: f32) -> Rect {
293
308
294
309
cursor_pos
295
310
}
311
+
312
+ #[ cfg( test) ]
313
+ mod test {
314
+ use crate :: text_selection:: text_cursor_state:: next_word_boundary_char_index;
315
+
316
+ #[ test]
317
+ fn test_next_word_boundary_char_index ( ) {
318
+ // ASCII only
319
+ let text = "abc d3f g_h i-j" ;
320
+ assert_eq ! ( next_word_boundary_char_index( text, 1 ) , 3 ) ;
321
+ assert_eq ! ( next_word_boundary_char_index( text, 3 ) , 7 ) ;
322
+ assert_eq ! ( next_word_boundary_char_index( text, 9 ) , 11 ) ;
323
+ assert_eq ! ( next_word_boundary_char_index( text, 12 ) , 13 ) ;
324
+ assert_eq ! ( next_word_boundary_char_index( text, 13 ) , 15 ) ;
325
+ assert_eq ! ( next_word_boundary_char_index( text, 15 ) , 15 ) ;
326
+
327
+ assert_eq ! ( next_word_boundary_char_index( "" , 0 ) , 0 ) ;
328
+ assert_eq ! ( next_word_boundary_char_index( "" , 1 ) , 0 ) ;
329
+
330
+ // Unicode graphemes, some of which consist of multiple Unicode characters,
331
+ // !!! Unicode character is not always what is tranditionally considered a character,
332
+ // the values below are correct despite not seeming that way on the first look,
333
+ // handling of and around emojis is kind of weird and is not consistent across
334
+ // text editors and browsers
335
+ let text = "❤️👍 skvělá knihovna 👍❤️" ;
336
+ assert_eq ! ( next_word_boundary_char_index( text, 0 ) , 2 ) ;
337
+ assert_eq ! ( next_word_boundary_char_index( text, 2 ) , 3 ) ; // this does not skip the space between thumbs-up and 'skvělá'
338
+ assert_eq ! ( next_word_boundary_char_index( text, 6 ) , 10 ) ;
339
+ assert_eq ! ( next_word_boundary_char_index( text, 9 ) , 10 ) ;
340
+ assert_eq ! ( next_word_boundary_char_index( text, 12 ) , 19 ) ;
341
+ assert_eq ! ( next_word_boundary_char_index( text, 15 ) , 19 ) ;
342
+ assert_eq ! ( next_word_boundary_char_index( text, 19 ) , 20 ) ;
343
+ assert_eq ! ( next_word_boundary_char_index( text, 20 ) , 21 ) ;
344
+ }
345
+ }
0 commit comments