perf: support incremental document changes

tekumara · tekumara · commit d85bd6933011 · 2026-01-01T08:30:02.000+11:00
to reduce allocations
diff --git a/.vscode/tasks.json b/.vscode/tasks.json
@@ -45,6 +45,18 @@
 				"kind": "build",
 				"isDefault": true
 			}
+		},
+		{
+			"type": "cargo",
+			"command": "test",
+			"problemMatcher": [
+				"$rustc"
+			],
+			"group": {
+				"kind": "test",
+				"isDefault": true
+			},
+			"label": "rust: cargo test"
 		}
 	]
 }
diff --git a/crates/typos-lsp/src/lsp.rs b/crates/typos-lsp/src/lsp.rs
@@ -103,8 +103,7 @@ impl LanguageServer for Backend<'static, 'static> {
                 // only support UTF-16 positions for now, which is the default when unspecified
                 position_encoding: Some(PositionEncodingKind::UTF16),
                 text_document_sync: Some(TextDocumentSyncCapability::Kind(
-                    // TODO: should we support incremental?
-                    TextDocumentSyncKind::FULL,
+                    TextDocumentSyncKind::INCREMENTAL,
                 )),
                 code_action_provider: Some(CodeActionProviderCapability::Options(
                     CodeActionOptions {
diff --git a/crates/typos-lsp/src/state.rs b/crates/typos-lsp/src/state.rs
@@ -3,7 +3,7 @@ use matchit::Router;
 use std::collections::HashMap;
 use std::path::{Path, PathBuf};
 use tower_lsp_server::lsp_types::{
-    DiagnosticSeverity, TextDocumentContentChangeEvent, Uri, WorkspaceFolder,
+    DiagnosticSeverity, Position, TextDocumentContentChangeEvent, Uri, WorkspaceFolder,
 };
 use tower_lsp_server::UriExt;
 
@@ -35,14 +35,70 @@ impl Document {
 
     pub fn update(&mut self, version: i32, changes: Vec<TextDocumentContentChangeEvent>) {
         for change in changes {
-            if change.range.is_some() {
-                tracing::warn!("Incremental document updates are not supported");
-                return;
+            if let Some(range) = change.range {
+                if let (Some(start), Some(end)) = (
+                    self.position_to_offset(range.start),
+                    self.position_to_offset(range.end),
+                ) {
+                    self.text.replace_range(start..end, &change.text);
+                } else {
+                    tracing::warn!("Invalid range in document update: {:?}", range);
+                }
+            } else {
+                self.text = change.text;
             }
-            self.text = change.text;
         }
         self.version = version;
     }
+
+    fn position_to_offset(&self, position: Position) -> Option<usize> {
+        // translates an LSP Position (0-indexed line and UTF-16 character offset) into a byte offset in the document string.
+        // the reverse operation of AccumulatePosition::pos
+        let mut offset = 0;
+        // Split inclusive ensures we keep newlines, which counts towards offset.
+        let mut lines = self.text.split_inclusive('\n');
+
+        for _ in 0..position.line {
+            let line = lines.next()?; // if we go out of bounds exit position_to_offset returning None (shouldn't happen)
+            offset += line.len();
+        }
+
+        // Now we are on the correct line.
+        let line = match lines.next() {
+            Some(l) => l,
+            // We are past the last line
+            None => {
+                return if position.character == 0 {
+                    // Insertion point on new line after end of document (EOF)
+                    Some(offset)
+                } else {
+                    // Invalid
+                    None
+                };
+            }
+        };
+
+        // Find char offset
+        let mut utf16_pos = 0;
+        // iterate over UTF-8 chars in the line
+        // i = char index, ie: byte offset
+        for (i, c) in line.char_indices() {
+            if utf16_pos == position.character {
+                return Some(offset + i);
+            }
+            utf16_pos += c.len_utf16() as u32;
+            if utf16_pos > position.character {
+                return None;
+            }
+        }
+
+        // Check if at the end of the line
+        if utf16_pos == position.character {
+            Some(offset + line.len())
+        } else {
+            None
+        }
+    }
 }
 
 impl<'s> BackendState<'s> {
@@ -140,6 +196,7 @@ pub fn uri_path_sanitised(uri: &Uri) -> String {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use tower_lsp_server::lsp_types::Range;
 
     #[test]
     fn test_update_document_full() {
@@ -158,4 +215,226 @@ mod tests {
         assert_eq!(doc.text, "hello world");
         assert_eq!(doc.version, 1);
     }
+
+    #[test]
+    fn test_update_document_incremental_insert() {
+        let mut doc = Document::new(1, "hello world".to_string());
+
+        // Insert "!" at the end
+        let changes = vec![TextDocumentContentChangeEvent {
+            range: Some(Range {
+                start: Position {
+                    line: 0,
+                    character: 11,
+                },
+                end: Position {
+                    line: 0,
+                    character: 11,
+                },
+            }),
+            range_length: None,
+            text: "!".to_string(),
+        }];
+
+        doc.update(2, changes);
+
+        assert_eq!(doc.text, "hello world!");
+        assert_eq!(doc.version, 2);
+    }
+
+    #[test]
+    fn test_update_document_incremental_delete() {
+        let mut doc = Document::new(1, "hello world".to_string());
+
+        // Delete "world"
+        let changes = vec![TextDocumentContentChangeEvent {
+            range: Some(Range {
+                start: Position {
+                    line: 0,
+                    character: 6,
+                },
+                end: Position {
+                    line: 0,
+                    character: 11,
+                },
+            }),
+            range_length: None,
+            text: "".to_string(),
+        }];
+
+        doc.update(2, changes);
+
+        assert_eq!(doc.text, "hello ");
+        assert_eq!(doc.version, 2);
+    }
+
+    #[test]
+    fn test_update_document_incremental_replace() {
+        let mut doc = Document::new(1, "hello world".to_string());
+
+        // Replace "world" with "there"
+        let changes = vec![TextDocumentContentChangeEvent {
+            range: Some(Range {
+                start: Position {
+                    line: 0,
+                    character: 6,
+                },
+                end: Position {
+                    line: 0,
+                    character: 11,
+                },
+            }),
+            range_length: None,
+            text: "there".to_string(),
+        }];
+
+        doc.update(2, changes);
+
+        assert_eq!(doc.text, "hello there");
+        assert_eq!(doc.version, 2);
+    }
+
+    #[test]
+    fn test_update_document_multiline() {
+        let mut doc = Document::new(1, "line 1\nline 2\nline 3".to_string());
+
+        // Replace "line 2" with "line two"
+        let changes = vec![TextDocumentContentChangeEvent {
+            range: Some(Range {
+                start: Position {
+                    line: 1,
+                    character: 0,
+                },
+                end: Position {
+                    line: 1,
+                    character: 6,
+                },
+            }),
+            range_length: None,
+            text: "line two".to_string(),
+        }];
+
+        doc.update(2, changes);
+
+        assert_eq!(doc.text, "line 1\nline two\nline 3");
+        assert_eq!(doc.version, 2);
+    }
+
+    #[test]
+    fn test_position_to_offset_complex() {
+        // '𐐀' is 4 bytes (0xF0 0x90 0x90 0x80), 2 UTF-16 units.
+        let text = "a𐐀b\r\nc";
+        let doc = Document::new(1, text.to_string());
+
+        // Line 0: "a𐐀b\r\n"
+        // 'a': utf16=0, byte=0
+        assert_eq!(
+            doc.position_to_offset(Position {
+                line: 0,
+                character: 0
+            }),
+            Some(0)
+        );
+        // '𐐀': utf16=1, byte=1. Takes 2 units.
+        assert_eq!(
+            doc.position_to_offset(Position {
+                line: 0,
+                character: 1
+            }),
+            Some(1)
+        );
+        // Invalid middle of surrogate pair
+        assert_eq!(
+            doc.position_to_offset(Position {
+                line: 0,
+                character: 2
+            }),
+            None
+        );
+        // 'b': utf16=3, byte=1+4=5
+        assert_eq!(
+            doc.position_to_offset(Position {
+                line: 0,
+                character: 3
+            }),
+            Some(5)
+        );
+        // '\r': utf16=4, byte=6
+        assert_eq!(
+            doc.position_to_offset(Position {
+                line: 0,
+                character: 4
+            }),
+            Some(6)
+        );
+        // '\n': utf16=5, byte=7
+        assert_eq!(
+            doc.position_to_offset(Position {
+                line: 0,
+                character: 5
+            }),
+            Some(7)
+        );
+        // End of line 0 after \n before next line start,
+        // effectively same as start of next line
+        assert_eq!(
+            doc.position_to_offset(Position {
+                line: 0,
+                character: 6
+            }),
+            Some(8)
+        );
+        // Out of bounds line 0
+        assert_eq!(
+            doc.position_to_offset(Position {
+                line: 0,
+                character: 7
+            }),
+            None
+        );
+
+        // Line 1: "c"
+        // Start of line 1
+        assert_eq!(
+            doc.position_to_offset(Position {
+                line: 1,
+                character: 0
+            }),
+            Some(8)
+        );
+        // After 'c', end of line insertion point
+        assert_eq!(
+            doc.position_to_offset(Position {
+                line: 1,
+                character: 1
+            }),
+            Some(9)
+        );
+        // Out of bounds line 1
+        assert_eq!(
+            doc.position_to_offset(Position {
+                line: 1,
+                character: 2
+            }),
+            None
+        );
+
+        // Line 2: (Does not exist, it's EOF)
+        // Valid EOF insertion point
+        assert_eq!(
+            doc.position_to_offset(Position {
+                line: 2,
+                character: 0
+            }),
+            Some(9)
+        );
+        // Invalid char on non-existent line
+        assert_eq!(
+            doc.position_to_offset(Position {
+                line: 2,
+                character: 1
+            }),
+            None
+        );
+    }
 }
diff --git a/crates/typos-lsp/tests/integration_test.rs b/crates/typos-lsp/tests/integration_test.rs
@@ -27,7 +27,7 @@ async fn test_initialize_e2e() {
                     ],
                 },
                 "positionEncoding": "utf-16",
-                "textDocumentSync": 1,
+                "textDocumentSync": 2,
                 "workspace": {
                   "workspaceFolders": { "changeNotifications": true, "supported": true }
                 }

Original file line number	Diff line number	Diff line change
`@@ -45,6 +45,18 @@`
`45`	`45`	`"kind": "build",`
`46`	`46`	`"isDefault": true`
`47`	`47`	`}`
	`48`	`+ },`
	`49`	`+ {`
	`50`	`+ "type": "cargo",`
	`51`	`+ "command": "test",`
	`52`	`+ "problemMatcher": [`
	`53`	`+ "$rustc"`
	`54`	`+ ],`
	`55`	`+ "group": {`
	`56`	`+ "kind": "test",`
	`57`	`+ "isDefault": true`
	`58`	`+ },`
	`59`	`+ "label": "rust: cargo test"`
`48`	`60`	`}`
`49`	`61`	`]`
`50`	`62`	`}`
Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,7 @@ async fn test_initialize_e2e() {`
`27`	`27`	`],`
`28`	`28`	`},`
`29`	`29`	`"positionEncoding": "utf-16",`
`30`		`- "textDocumentSync": 1,`
	`30`	`+ "textDocumentSync": 2,`
`31`	`31`	`"workspace": {`
`32`	`32`	`"workspaceFolders": { "changeNotifications": true, "supported": true }`
`33`	`33`	`}`