Skip to content

Commit d85bd69

Browse files
committed
perf: support incremental document changes
to reduce allocations
1 parent d8f3540 commit d85bd69

File tree

4 files changed

+298
-8
lines changed

4 files changed

+298
-8
lines changed

.vscode/tasks.json

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,18 @@
4545
"kind": "build",
4646
"isDefault": true
4747
}
48+
},
49+
{
50+
"type": "cargo",
51+
"command": "test",
52+
"problemMatcher": [
53+
"$rustc"
54+
],
55+
"group": {
56+
"kind": "test",
57+
"isDefault": true
58+
},
59+
"label": "rust: cargo test"
4860
}
4961
]
5062
}

crates/typos-lsp/src/lsp.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,7 @@ impl LanguageServer for Backend<'static, 'static> {
103103
// only support UTF-16 positions for now, which is the default when unspecified
104104
position_encoding: Some(PositionEncodingKind::UTF16),
105105
text_document_sync: Some(TextDocumentSyncCapability::Kind(
106-
// TODO: should we support incremental?
107-
TextDocumentSyncKind::FULL,
106+
TextDocumentSyncKind::INCREMENTAL,
108107
)),
109108
code_action_provider: Some(CodeActionProviderCapability::Options(
110109
CodeActionOptions {

crates/typos-lsp/src/state.rs

Lines changed: 284 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use matchit::Router;
33
use std::collections::HashMap;
44
use std::path::{Path, PathBuf};
55
use tower_lsp_server::lsp_types::{
6-
DiagnosticSeverity, TextDocumentContentChangeEvent, Uri, WorkspaceFolder,
6+
DiagnosticSeverity, Position, TextDocumentContentChangeEvent, Uri, WorkspaceFolder,
77
};
88
use tower_lsp_server::UriExt;
99

@@ -35,14 +35,70 @@ impl Document {
3535

3636
pub fn update(&mut self, version: i32, changes: Vec<TextDocumentContentChangeEvent>) {
3737
for change in changes {
38-
if change.range.is_some() {
39-
tracing::warn!("Incremental document updates are not supported");
40-
return;
38+
if let Some(range) = change.range {
39+
if let (Some(start), Some(end)) = (
40+
self.position_to_offset(range.start),
41+
self.position_to_offset(range.end),
42+
) {
43+
self.text.replace_range(start..end, &change.text);
44+
} else {
45+
tracing::warn!("Invalid range in document update: {:?}", range);
46+
}
47+
} else {
48+
self.text = change.text;
4149
}
42-
self.text = change.text;
4350
}
4451
self.version = version;
4552
}
53+
54+
fn position_to_offset(&self, position: Position) -> Option<usize> {
55+
// translates an LSP Position (0-indexed line and UTF-16 character offset) into a byte offset in the document string.
56+
// the reverse operation of AccumulatePosition::pos
57+
let mut offset = 0;
58+
// Split inclusive ensures we keep newlines, which counts towards offset.
59+
let mut lines = self.text.split_inclusive('\n');
60+
61+
for _ in 0..position.line {
62+
let line = lines.next()?; // if we go out of bounds exit position_to_offset returning None (shouldn't happen)
63+
offset += line.len();
64+
}
65+
66+
// Now we are on the correct line.
67+
let line = match lines.next() {
68+
Some(l) => l,
69+
// We are past the last line
70+
None => {
71+
return if position.character == 0 {
72+
// Insertion point on new line after end of document (EOF)
73+
Some(offset)
74+
} else {
75+
// Invalid
76+
None
77+
};
78+
}
79+
};
80+
81+
// Find char offset
82+
let mut utf16_pos = 0;
83+
// iterate over UTF-8 chars in the line
84+
// i = char index, ie: byte offset
85+
for (i, c) in line.char_indices() {
86+
if utf16_pos == position.character {
87+
return Some(offset + i);
88+
}
89+
utf16_pos += c.len_utf16() as u32;
90+
if utf16_pos > position.character {
91+
return None;
92+
}
93+
}
94+
95+
// Check if at the end of the line
96+
if utf16_pos == position.character {
97+
Some(offset + line.len())
98+
} else {
99+
None
100+
}
101+
}
46102
}
47103

48104
impl<'s> BackendState<'s> {
@@ -140,6 +196,7 @@ pub fn uri_path_sanitised(uri: &Uri) -> String {
140196
#[cfg(test)]
141197
mod tests {
142198
use super::*;
199+
use tower_lsp_server::lsp_types::Range;
143200

144201
#[test]
145202
fn test_update_document_full() {
@@ -158,4 +215,226 @@ mod tests {
158215
assert_eq!(doc.text, "hello world");
159216
assert_eq!(doc.version, 1);
160217
}
218+
219+
#[test]
220+
fn test_update_document_incremental_insert() {
221+
let mut doc = Document::new(1, "hello world".to_string());
222+
223+
// Insert "!" at the end
224+
let changes = vec![TextDocumentContentChangeEvent {
225+
range: Some(Range {
226+
start: Position {
227+
line: 0,
228+
character: 11,
229+
},
230+
end: Position {
231+
line: 0,
232+
character: 11,
233+
},
234+
}),
235+
range_length: None,
236+
text: "!".to_string(),
237+
}];
238+
239+
doc.update(2, changes);
240+
241+
assert_eq!(doc.text, "hello world!");
242+
assert_eq!(doc.version, 2);
243+
}
244+
245+
#[test]
246+
fn test_update_document_incremental_delete() {
247+
let mut doc = Document::new(1, "hello world".to_string());
248+
249+
// Delete "world"
250+
let changes = vec![TextDocumentContentChangeEvent {
251+
range: Some(Range {
252+
start: Position {
253+
line: 0,
254+
character: 6,
255+
},
256+
end: Position {
257+
line: 0,
258+
character: 11,
259+
},
260+
}),
261+
range_length: None,
262+
text: "".to_string(),
263+
}];
264+
265+
doc.update(2, changes);
266+
267+
assert_eq!(doc.text, "hello ");
268+
assert_eq!(doc.version, 2);
269+
}
270+
271+
#[test]
272+
fn test_update_document_incremental_replace() {
273+
let mut doc = Document::new(1, "hello world".to_string());
274+
275+
// Replace "world" with "there"
276+
let changes = vec![TextDocumentContentChangeEvent {
277+
range: Some(Range {
278+
start: Position {
279+
line: 0,
280+
character: 6,
281+
},
282+
end: Position {
283+
line: 0,
284+
character: 11,
285+
},
286+
}),
287+
range_length: None,
288+
text: "there".to_string(),
289+
}];
290+
291+
doc.update(2, changes);
292+
293+
assert_eq!(doc.text, "hello there");
294+
assert_eq!(doc.version, 2);
295+
}
296+
297+
#[test]
298+
fn test_update_document_multiline() {
299+
let mut doc = Document::new(1, "line 1\nline 2\nline 3".to_string());
300+
301+
// Replace "line 2" with "line two"
302+
let changes = vec![TextDocumentContentChangeEvent {
303+
range: Some(Range {
304+
start: Position {
305+
line: 1,
306+
character: 0,
307+
},
308+
end: Position {
309+
line: 1,
310+
character: 6,
311+
},
312+
}),
313+
range_length: None,
314+
text: "line two".to_string(),
315+
}];
316+
317+
doc.update(2, changes);
318+
319+
assert_eq!(doc.text, "line 1\nline two\nline 3");
320+
assert_eq!(doc.version, 2);
321+
}
322+
323+
#[test]
324+
fn test_position_to_offset_complex() {
325+
// '𐐀' is 4 bytes (0xF0 0x90 0x90 0x80), 2 UTF-16 units.
326+
let text = "a𐐀b\r\nc";
327+
let doc = Document::new(1, text.to_string());
328+
329+
// Line 0: "a𐐀b\r\n"
330+
// 'a': utf16=0, byte=0
331+
assert_eq!(
332+
doc.position_to_offset(Position {
333+
line: 0,
334+
character: 0
335+
}),
336+
Some(0)
337+
);
338+
// '𐐀': utf16=1, byte=1. Takes 2 units.
339+
assert_eq!(
340+
doc.position_to_offset(Position {
341+
line: 0,
342+
character: 1
343+
}),
344+
Some(1)
345+
);
346+
// Invalid middle of surrogate pair
347+
assert_eq!(
348+
doc.position_to_offset(Position {
349+
line: 0,
350+
character: 2
351+
}),
352+
None
353+
);
354+
// 'b': utf16=3, byte=1+4=5
355+
assert_eq!(
356+
doc.position_to_offset(Position {
357+
line: 0,
358+
character: 3
359+
}),
360+
Some(5)
361+
);
362+
// '\r': utf16=4, byte=6
363+
assert_eq!(
364+
doc.position_to_offset(Position {
365+
line: 0,
366+
character: 4
367+
}),
368+
Some(6)
369+
);
370+
// '\n': utf16=5, byte=7
371+
assert_eq!(
372+
doc.position_to_offset(Position {
373+
line: 0,
374+
character: 5
375+
}),
376+
Some(7)
377+
);
378+
// End of line 0 after \n before next line start,
379+
// effectively same as start of next line
380+
assert_eq!(
381+
doc.position_to_offset(Position {
382+
line: 0,
383+
character: 6
384+
}),
385+
Some(8)
386+
);
387+
// Out of bounds line 0
388+
assert_eq!(
389+
doc.position_to_offset(Position {
390+
line: 0,
391+
character: 7
392+
}),
393+
None
394+
);
395+
396+
// Line 1: "c"
397+
// Start of line 1
398+
assert_eq!(
399+
doc.position_to_offset(Position {
400+
line: 1,
401+
character: 0
402+
}),
403+
Some(8)
404+
);
405+
// After 'c', end of line insertion point
406+
assert_eq!(
407+
doc.position_to_offset(Position {
408+
line: 1,
409+
character: 1
410+
}),
411+
Some(9)
412+
);
413+
// Out of bounds line 1
414+
assert_eq!(
415+
doc.position_to_offset(Position {
416+
line: 1,
417+
character: 2
418+
}),
419+
None
420+
);
421+
422+
// Line 2: (Does not exist, it's EOF)
423+
// Valid EOF insertion point
424+
assert_eq!(
425+
doc.position_to_offset(Position {
426+
line: 2,
427+
character: 0
428+
}),
429+
Some(9)
430+
);
431+
// Invalid char on non-existent line
432+
assert_eq!(
433+
doc.position_to_offset(Position {
434+
line: 2,
435+
character: 1
436+
}),
437+
None
438+
);
439+
}
161440
}

crates/typos-lsp/tests/integration_test.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ async fn test_initialize_e2e() {
2727
],
2828
},
2929
"positionEncoding": "utf-16",
30-
"textDocumentSync": 1,
30+
"textDocumentSync": 2,
3131
"workspace": {
3232
"workspaceFolders": { "changeNotifications": true, "supported": true }
3333
}

0 commit comments

Comments
 (0)