diff --git a/Cargo.lock b/Cargo.lock index cabb98a70b0e..3f1f2fe45329 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -30,7 +30,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", - "getrandom 0.2.15", "once_cell", "version_check", "zerocopy", @@ -1319,14 +1318,13 @@ dependencies = [ name = "helix-core" version = "25.1.1" dependencies = [ - "ahash", "anyhow", "arc-swap", "bitflags", "chrono", "encoding_rs", + "foldhash", "globset", - "hashbrown 0.14.5", "helix-loader", "helix-parsec", "helix-stdx", @@ -1347,7 +1345,7 @@ dependencies = [ "smartstring", "textwrap", "toml", - "tree-sitter", + "tree-house", "unicode-general-category", "unicode-segmentation", "unicode-width 0.1.12", @@ -1391,14 +1389,13 @@ dependencies = [ "cc", "etcetera", "helix-stdx", - "libloading", "log", "once_cell", "serde", "tempfile", "threadpool", "toml", - "tree-sitter", + "tree-house", ] [[package]] @@ -2665,13 +2662,31 @@ dependencies = [ ] [[package]] -name = "tree-sitter" -version = "0.22.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df7cc499ceadd4dcdf7ec6d4cbc34ece92c3fa07821e287aedecd4416c516dca" +name = "tree-house" +version = "0.1.0-beta.2" +source = "git+https://github.com/helix-editor/tree-house#1fa65eca36fdbb2837e0655bfda53ed627fc25c0" dependencies = [ - "cc", + "arc-swap", + "hashbrown 0.15.2", + "kstring", + "once_cell", "regex", + "regex-cursor", + "ropey", + "slab", + "tree-house-bindings", +] + +[[package]] +name = "tree-house-bindings" +version = "0.1.0-beta.2" +source = "git+https://github.com/helix-editor/tree-house#1fa65eca36fdbb2837e0655bfda53ed627fc25c0" +dependencies = [ + "cc", + "libloading", + "regex-cursor", + "ropey", + "thiserror 2.0.12", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 667a83967726..81d445aa27ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,7 +37,7 @@ package.helix-tui.opt-level = 2 package.helix-term.opt-level = 2 [workspace.dependencies] -tree-sitter = { version = "0.22" } +tree-house = { git = "https://github.com/helix-editor/tree-house", default-features = false } nucleo = "0.5.0" slotmap = "1.0.7" thiserror = "2.0" diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md index 82715b7efa48..5ea6d1659b34 100644 --- a/book/src/SUMMARY.md +++ b/book/src/SUMMARY.md @@ -28,3 +28,4 @@ - [Adding textobject queries](./guides/textobject.md) - [Adding indent queries](./guides/indent.md) - [Adding injection queries](./guides/injection.md) + - [Adding rainbow bracket queries](./guides/rainbow_bracket_queries.md) diff --git a/book/src/editor.md b/book/src/editor.md index 1e5c2a507749..42e59c3fc6c9 100644 --- a/book/src/editor.md +++ b/book/src/editor.md @@ -61,6 +61,7 @@ | `end-of-line-diagnostics` | Minimum severity of diagnostics to render at the end of the line. Set to `disable` to disable entirely. Refer to the setting about `inline-diagnostics` for more details | "disable" | `clipboard-provider` | Which API to use for clipboard interaction. One of `pasteboard` (MacOS), `wayland`, `x-clip`, `x-sel`, `win-32-yank`, `termux`, `tmux`, `windows`, `termcode`, `none`, or a custom command set. | Platform and environment specific. | | `editor-config` | Whether to read settings from [EditorConfig](https://editorconfig.org) files | `true` | +| `rainbow-brackets` | Whether to render rainbow colors for matching brackets. Requires tree-sitter `rainbows.scm` queries for the language. | `false` | ### `[editor.clipboard-provider]` Section diff --git a/book/src/generated/lang-support.md b/book/src/generated/lang-support.md index f6aee3fe3410..3c1796fd4c0a 100644 --- a/book/src/generated/lang-support.md +++ b/book/src/generated/lang-support.md @@ -1,260 +1,261 @@ -| Language | Syntax Highlighting | Treesitter Textobjects | Auto Indent | Default language servers | -| --- | --- | --- | --- | --- | -| ada | ✓ | ✓ | | `ada_language_server` | -| adl | ✓ | ✓ | ✓ | | -| agda | ✓ | | | | -| amber | ✓ | | | | -| astro | ✓ | | | `astro-ls` | -| awk | ✓ | ✓ | | `awk-language-server` | -| bash | ✓ | ✓ | ✓ | `bash-language-server` | -| bass | ✓ | | | `bass` | -| beancount | ✓ | | | `beancount-language-server` | -| bibtex | ✓ | | | `texlab` | -| bicep | ✓ | | | `bicep-langserver` | -| bitbake | ✓ | | | `bitbake-language-server` | -| blade | ✓ | | | | -| blueprint | ✓ | | | `blueprint-compiler` | -| c | ✓ | ✓ | ✓ | `clangd` | -| c-sharp | ✓ | ✓ | | `OmniSharp` | -| cabal | | | | `haskell-language-server-wrapper` | -| cairo | ✓ | ✓ | ✓ | `cairo-language-server` | -| capnp | ✓ | | ✓ | | -| cel | ✓ | | | | -| circom | ✓ | | | `circom-lsp` | -| clojure | ✓ | | | `clojure-lsp` | -| cmake | ✓ | ✓ | ✓ | `cmake-language-server` | -| codeql | ✓ | ✓ | | `codeql` | -| comment | ✓ | | | | -| common-lisp | ✓ | | ✓ | `cl-lsp` | -| cpon | ✓ | | ✓ | | -| cpp | ✓ | ✓ | ✓ | `clangd` | -| crystal | ✓ | ✓ | | `crystalline` | -| css | ✓ | | ✓ | `vscode-css-language-server` | -| csv | ✓ | | | | -| cue | ✓ | | | `cuelsp` | -| cylc | ✓ | ✓ | ✓ | | -| d | ✓ | ✓ | ✓ | `serve-d` | -| dart | ✓ | ✓ | ✓ | `dart` | -| dbml | ✓ | | | | -| devicetree | ✓ | | | | -| dhall | ✓ | ✓ | | `dhall-lsp-server` | -| diff | ✓ | | | | -| djot | ✓ | | | | -| docker-compose | ✓ | ✓ | ✓ | `docker-compose-langserver`, `yaml-language-server` | -| dockerfile | ✓ | ✓ | | `docker-langserver` | -| dot | ✓ | | | `dot-language-server` | -| dtd | ✓ | | | | -| dune | ✓ | | | | -| earthfile | ✓ | ✓ | ✓ | `earthlyls` | -| edoc | ✓ | | | | -| eex | ✓ | | | | -| ejs | ✓ | | | | -| elisp | ✓ | | | | -| elixir | ✓ | ✓ | ✓ | `elixir-ls` | -| elm | ✓ | ✓ | | `elm-language-server` | -| elvish | ✓ | | | `elvish` | -| env | ✓ | ✓ | | | -| erb | ✓ | | | | -| erlang | ✓ | ✓ | | `erlang_ls`, `elp` | -| esdl | ✓ | | | | -| fga | ✓ | ✓ | ✓ | | -| fidl | ✓ | | | | -| fish | ✓ | ✓ | ✓ | `fish-lsp` | -| forth | ✓ | | | `forth-lsp` | -| fortran | ✓ | | ✓ | `fortls` | -| fsharp | ✓ | | | `fsautocomplete` | -| gas | ✓ | ✓ | | `asm-lsp` | -| gdscript | ✓ | ✓ | ✓ | | -| gemini | ✓ | | | | -| gherkin | ✓ | | | | -| ghostty | ✓ | | | | -| git-attributes | ✓ | | | | -| git-commit | ✓ | ✓ | | | -| git-config | ✓ | ✓ | | | -| git-ignore | ✓ | | | | -| git-rebase | ✓ | | | | -| gjs | ✓ | ✓ | ✓ | `typescript-language-server`, `vscode-eslint-language-server`, `ember-language-server` | -| gleam | ✓ | ✓ | | `gleam` | -| glimmer | ✓ | | | `ember-language-server` | -| glsl | ✓ | ✓ | ✓ | `glsl_analyzer` | -| gn | ✓ | | | | -| go | ✓ | ✓ | ✓ | `gopls`, `golangci-lint-langserver` | -| godot-resource | ✓ | ✓ | | | -| gomod | ✓ | | | `gopls` | -| gotmpl | ✓ | | | `gopls` | -| gowork | ✓ | | | `gopls` | -| gpr | ✓ | | | `ada_language_server` | -| graphql | ✓ | ✓ | | `graphql-lsp` | -| gren | ✓ | ✓ | | | -| groovy | ✓ | | | | -| gts | ✓ | ✓ | ✓ | `typescript-language-server`, `vscode-eslint-language-server`, `ember-language-server` | -| hare | ✓ | | | | -| haskell | ✓ | ✓ | | `haskell-language-server-wrapper` | -| haskell-persistent | ✓ | | | | -| hcl | ✓ | ✓ | ✓ | `terraform-ls` | -| heex | ✓ | ✓ | | `elixir-ls` | -| helm | ✓ | | | `helm_ls` | -| hocon | ✓ | ✓ | ✓ | | -| hoon | ✓ | | | | -| hosts | ✓ | | | | -| html | ✓ | | | `vscode-html-language-server`, `superhtml` | -| hurl | ✓ | ✓ | ✓ | | -| hyprlang | ✓ | | ✓ | `hyprls` | -| idris | | | | `idris2-lsp` | -| iex | ✓ | | | | -| ini | ✓ | | | | -| ink | ✓ | | | | -| inko | ✓ | ✓ | ✓ | | -| janet | ✓ | | | | -| java | ✓ | ✓ | ✓ | `jdtls` | -| javascript | ✓ | ✓ | ✓ | `typescript-language-server` | -| jinja | ✓ | | | | -| jjdescription | ✓ | | | | -| jq | ✓ | ✓ | | `jq-lsp` | -| jsdoc | ✓ | | | | -| json | ✓ | ✓ | ✓ | `vscode-json-language-server` | -| json5 | ✓ | | | | -| jsonc | ✓ | | ✓ | `vscode-json-language-server` | -| jsonnet | ✓ | | | `jsonnet-language-server` | -| jsx | ✓ | ✓ | ✓ | `typescript-language-server` | -| julia | ✓ | ✓ | ✓ | `julia` | -| just | ✓ | ✓ | ✓ | | -| kdl | ✓ | ✓ | ✓ | | -| koka | ✓ | | ✓ | `koka` | -| kotlin | ✓ | ✓ | ✓ | `kotlin-language-server` | -| koto | ✓ | ✓ | ✓ | `koto-ls` | -| latex | ✓ | ✓ | | `texlab` | -| ld | ✓ | | ✓ | | -| ldif | ✓ | | | | -| lean | ✓ | | | `lean` | -| ledger | ✓ | | | | -| llvm | ✓ | ✓ | ✓ | | -| llvm-mir | ✓ | ✓ | ✓ | | -| llvm-mir-yaml | ✓ | | ✓ | | -| log | ✓ | | | | -| lpf | ✓ | | | | -| lua | ✓ | ✓ | ✓ | `lua-language-server` | -| mail | ✓ | ✓ | | | -| make | ✓ | | ✓ | | -| markdoc | ✓ | | | `markdoc-ls` | -| markdown | ✓ | | | `marksman`, `markdown-oxide` | -| markdown.inline | ✓ | | | | -| matlab | ✓ | ✓ | ✓ | | -| mermaid | ✓ | | | | -| meson | ✓ | | ✓ | `mesonlsp` | -| mint | | | | `mint` | -| mojo | ✓ | ✓ | ✓ | `magic` | -| move | ✓ | | | | -| msbuild | ✓ | | ✓ | | -| nasm | ✓ | ✓ | | `asm-lsp` | -| nestedtext | ✓ | ✓ | ✓ | | -| nginx | ✓ | | | | -| nickel | ✓ | | ✓ | `nls` | -| nim | ✓ | ✓ | ✓ | `nimlangserver` | -| nix | ✓ | ✓ | ✓ | `nil`, `nixd` | -| nu | ✓ | | | `nu` | -| nunjucks | ✓ | | | | -| ocaml | ✓ | | ✓ | `ocamllsp` | -| ocaml-interface | ✓ | | | `ocamllsp` | -| odin | ✓ | ✓ | ✓ | `ols` | -| ohm | ✓ | ✓ | ✓ | | -| opencl | ✓ | ✓ | ✓ | `clangd` | -| openscad | ✓ | | | `openscad-lsp` | -| org | ✓ | | | | -| pascal | ✓ | ✓ | | `pasls` | -| passwd | ✓ | | | | -| pem | ✓ | | | | -| perl | ✓ | ✓ | ✓ | `perlnavigator` | -| pest | ✓ | ✓ | ✓ | `pest-language-server` | -| php | ✓ | ✓ | ✓ | `intelephense` | -| php-only | ✓ | | | | -| pkgbuild | ✓ | ✓ | ✓ | `termux-language-server`, `bash-language-server` | -| pkl | ✓ | | ✓ | `pkl-lsp` | -| po | ✓ | ✓ | | | -| pod | ✓ | | | | -| ponylang | ✓ | ✓ | ✓ | | -| powershell | ✓ | | | | -| prisma | ✓ | ✓ | | `prisma-language-server` | -| prolog | | | | `swipl` | -| protobuf | ✓ | ✓ | ✓ | `buf`, `pb`, `protols` | -| prql | ✓ | | | | -| purescript | ✓ | ✓ | | `purescript-language-server` | -| python | ✓ | ✓ | ✓ | `ruff`, `jedi-language-server`, `pylsp` | -| qml | ✓ | | ✓ | `qmlls` | -| quint | ✓ | | | `quint-language-server` | -| r | ✓ | | | `R` | -| racket | ✓ | | ✓ | `racket` | -| regex | ✓ | | | | -| rego | ✓ | | | `regols` | -| rescript | ✓ | ✓ | | `rescript-language-server` | -| rmarkdown | ✓ | | ✓ | `R` | -| robot | ✓ | | | `robotframework_ls` | -| ron | ✓ | | ✓ | | -| rst | ✓ | | | | -| ruby | ✓ | ✓ | ✓ | `ruby-lsp`, `solargraph` | -| rust | ✓ | ✓ | ✓ | `rust-analyzer` | -| sage | ✓ | ✓ | | | -| scala | ✓ | ✓ | ✓ | `metals` | -| scheme | ✓ | | ✓ | | -| scss | ✓ | | | `vscode-css-language-server` | -| slint | ✓ | ✓ | ✓ | `slint-lsp` | -| smali | ✓ | | ✓ | | -| smithy | ✓ | | | `cs` | -| sml | ✓ | | | | -| snakemake | ✓ | | ✓ | `pylsp` | -| solidity | ✓ | ✓ | | `solc` | -| sourcepawn | ✓ | ✓ | | `sourcepawn-studio` | -| spade | ✓ | | ✓ | `spade-language-server` | -| spicedb | ✓ | | | | -| sql | ✓ | ✓ | | | -| sshclientconfig | ✓ | | | | -| starlark | ✓ | ✓ | ✓ | `starpls` | -| strace | ✓ | | | | -| supercollider | ✓ | | | | -| svelte | ✓ | | ✓ | `svelteserver` | -| sway | ✓ | ✓ | ✓ | `forc` | -| swift | ✓ | ✓ | | `sourcekit-lsp` | -| t32 | ✓ | | | | -| tablegen | ✓ | ✓ | ✓ | | -| tact | ✓ | ✓ | ✓ | | -| task | ✓ | | | | -| tcl | ✓ | | ✓ | | -| teal | ✓ | | | `teal-language-server` | -| templ | ✓ | | | `templ` | -| tera | ✓ | | | | -| textproto | ✓ | ✓ | ✓ | | -| tfvars | ✓ | | ✓ | `terraform-ls` | -| thrift | ✓ | | | | -| tlaplus | ✓ | | | | -| todotxt | ✓ | | | | -| toml | ✓ | ✓ | | `taplo` | -| tsq | ✓ | | | `ts_query_ls` | -| tsx | ✓ | ✓ | ✓ | `typescript-language-server` | -| twig | ✓ | | | | -| typescript | ✓ | ✓ | ✓ | `typescript-language-server` | -| typespec | ✓ | ✓ | ✓ | `tsp-server` | -| typst | ✓ | | | `tinymist` | -| ungrammar | ✓ | | | | -| unison | ✓ | ✓ | ✓ | | -| uxntal | ✓ | | | | -| v | ✓ | ✓ | ✓ | `v-analyzer` | -| vala | ✓ | ✓ | | `vala-language-server` | -| vento | ✓ | | | | -| verilog | ✓ | ✓ | | `svlangserver` | -| vhdl | ✓ | | | `vhdl_ls` | -| vhs | ✓ | | | | -| vue | ✓ | | | `vue-language-server` | -| wast | ✓ | | | | -| wat | ✓ | | | `wat_server` | -| webc | ✓ | | | | -| werk | ✓ | | | | -| wgsl | ✓ | | | `wgsl-analyzer` | -| wit | ✓ | | ✓ | | -| wren | ✓ | ✓ | ✓ | | -| xit | ✓ | | | | -| xml | ✓ | | ✓ | | -| xtc | ✓ | | | | -| yaml | ✓ | ✓ | ✓ | `yaml-language-server`, `ansible-language-server` | -| yara | ✓ | | | `yls` | -| yuck | ✓ | | | | -| zig | ✓ | ✓ | ✓ | `zls` | +| Language | Syntax Highlighting | Treesitter Textobjects | Auto Indent | Rainbow Brackets | Default language servers | +| --- | --- | --- | --- | --- | --- | +| ada | ✓ | ✓ | | | `ada_language_server` | +| adl | ✓ | ✓ | ✓ | | | +| agda | ✓ | | | | | +| amber | ✓ | | | | | +| astro | ✓ | | | | `astro-ls` | +| awk | ✓ | ✓ | | | `awk-language-server` | +| bash | ✓ | ✓ | ✓ | ✓ | `bash-language-server` | +| bass | ✓ | | | | `bass` | +| beancount | ✓ | | | | `beancount-language-server` | +| bibtex | ✓ | | | | `texlab` | +| bicep | ✓ | | | | `bicep-langserver` | +| bitbake | ✓ | | | | `bitbake-language-server` | +| blade | ✓ | | | | | +| blueprint | ✓ | | | | `blueprint-compiler` | +| c | ✓ | ✓ | ✓ | ✓ | `clangd` | +| c-sharp | ✓ | ✓ | | | `OmniSharp` | +| cabal | | | | | `haskell-language-server-wrapper` | +| cairo | ✓ | ✓ | ✓ | | `cairo-language-server` | +| capnp | ✓ | | ✓ | | | +| cel | ✓ | | | | | +| circom | ✓ | | | | `circom-lsp` | +| clojure | ✓ | | | ✓ | `clojure-lsp` | +| cmake | ✓ | ✓ | ✓ | | `cmake-language-server` | +| codeql | ✓ | ✓ | | | `codeql` | +| comment | ✓ | | | | | +| common-lisp | ✓ | | ✓ | ✓ | `cl-lsp` | +| cpon | ✓ | | ✓ | | | +| cpp | ✓ | ✓ | ✓ | ✓ | `clangd` | +| crystal | ✓ | ✓ | | | `crystalline` | +| css | ✓ | | ✓ | ✓ | `vscode-css-language-server` | +| csv | ✓ | | | | | +| cue | ✓ | | | | `cuelsp` | +| cylc | ✓ | ✓ | ✓ | | | +| d | ✓ | ✓ | ✓ | | `serve-d` | +| dart | ✓ | ✓ | ✓ | | `dart` | +| dbml | ✓ | | | | | +| devicetree | ✓ | | | | | +| dhall | ✓ | ✓ | | | `dhall-lsp-server` | +| diff | ✓ | | | | | +| djot | ✓ | | | | | +| docker-compose | ✓ | ✓ | ✓ | | `docker-compose-langserver`, `yaml-language-server` | +| dockerfile | ✓ | ✓ | | | `docker-langserver` | +| dot | ✓ | | | | `dot-language-server` | +| dtd | ✓ | | | | | +| dune | ✓ | | | | | +| earthfile | ✓ | ✓ | ✓ | | `earthlyls` | +| edoc | ✓ | | | | | +| eex | ✓ | | | | | +| ejs | ✓ | | | | | +| elisp | ✓ | | | | | +| elixir | ✓ | ✓ | ✓ | ✓ | `elixir-ls` | +| elm | ✓ | ✓ | | | `elm-language-server` | +| elvish | ✓ | | | | `elvish` | +| env | ✓ | ✓ | | | | +| erb | ✓ | | | | | +| erlang | ✓ | ✓ | | ✓ | `erlang_ls`, `elp` | +| esdl | ✓ | | | | | +| fga | ✓ | ✓ | ✓ | | | +| fidl | ✓ | | | | | +| fish | ✓ | ✓ | ✓ | | `fish-lsp` | +| forth | ✓ | | | | `forth-lsp` | +| fortran | ✓ | | ✓ | | `fortls` | +| fsharp | ✓ | | | | `fsautocomplete` | +| gas | ✓ | ✓ | | | `asm-lsp` | +| gdscript | ✓ | ✓ | ✓ | | | +| gemini | ✓ | | | | | +| gherkin | ✓ | | | | | +| ghostty | ✓ | | | | | +| git-attributes | ✓ | | | | | +| git-commit | ✓ | ✓ | | | | +| git-config | ✓ | ✓ | | | | +| git-ignore | ✓ | | | | | +| git-rebase | ✓ | | | | | +| gjs | ✓ | ✓ | ✓ | | `typescript-language-server`, `vscode-eslint-language-server`, `ember-language-server` | +| gleam | ✓ | ✓ | | ✓ | `gleam` | +| glimmer | ✓ | | | | `ember-language-server` | +| glsl | ✓ | ✓ | ✓ | | `glsl_analyzer` | +| gn | ✓ | | | | | +| go | ✓ | ✓ | ✓ | ✓ | `gopls`, `golangci-lint-langserver` | +| godot-resource | ✓ | ✓ | | | | +| gomod | ✓ | | | | `gopls` | +| gotmpl | ✓ | | | | `gopls` | +| gowork | ✓ | | | | `gopls` | +| gpr | ✓ | | | | `ada_language_server` | +| graphql | ✓ | ✓ | | | `graphql-lsp` | +| gren | ✓ | ✓ | | | | +| groovy | ✓ | | | | | +| gts | ✓ | ✓ | ✓ | | `typescript-language-server`, `vscode-eslint-language-server`, `ember-language-server` | +| hare | ✓ | | | | | +| haskell | ✓ | ✓ | | | `haskell-language-server-wrapper` | +| haskell-persistent | ✓ | | | | | +| hcl | ✓ | ✓ | ✓ | | `terraform-ls` | +| heex | ✓ | ✓ | | | `elixir-ls` | +| helm | ✓ | | | | `helm_ls` | +| hocon | ✓ | ✓ | ✓ | | | +| hoon | ✓ | | | | | +| hosts | ✓ | | | | | +| html | ✓ | | | ✓ | `vscode-html-language-server`, `superhtml` | +| hurl | ✓ | ✓ | ✓ | | | +| hyprlang | ✓ | | ✓ | | `hyprls` | +| idris | | | | | `idris2-lsp` | +| iex | ✓ | | | | | +| ini | ✓ | | | | | +| ink | ✓ | | | | | +| inko | ✓ | ✓ | ✓ | | | +| janet | ✓ | | | | | +| java | ✓ | ✓ | ✓ | ✓ | `jdtls` | +| javascript | ✓ | ✓ | ✓ | ✓ | `typescript-language-server` | +| jinja | ✓ | | | | | +| jjdescription | ✓ | | | | | +| jq | ✓ | ✓ | | | `jq-lsp` | +| jsdoc | ✓ | | | | | +| json | ✓ | ✓ | ✓ | ✓ | `vscode-json-language-server` | +| json5 | ✓ | | | | | +| jsonc | ✓ | | ✓ | | `vscode-json-language-server` | +| jsonnet | ✓ | | | | `jsonnet-language-server` | +| jsx | ✓ | ✓ | ✓ | ✓ | `typescript-language-server` | +| julia | ✓ | ✓ | ✓ | | `julia` | +| just | ✓ | ✓ | ✓ | | | +| kdl | ✓ | ✓ | ✓ | | | +| koka | ✓ | | ✓ | | `koka` | +| kotlin | ✓ | ✓ | ✓ | | `kotlin-language-server` | +| koto | ✓ | ✓ | ✓ | | `koto-ls` | +| latex | ✓ | ✓ | | | `texlab` | +| ld | ✓ | | ✓ | | | +| ldif | ✓ | | | | | +| lean | ✓ | | | | `lean` | +| ledger | ✓ | | | | | +| llvm | ✓ | ✓ | ✓ | | | +| llvm-mir | ✓ | ✓ | ✓ | | | +| llvm-mir-yaml | ✓ | | ✓ | | | +| log | ✓ | | | | | +| lpf | ✓ | | | | | +| lua | ✓ | ✓ | ✓ | | `lua-language-server` | +| mail | ✓ | ✓ | | | | +| make | ✓ | | ✓ | | | +| markdoc | ✓ | | | | `markdoc-ls` | +| markdown | ✓ | | | | `marksman`, `markdown-oxide` | +| markdown-rustdoc | ✓ | | | | | +| markdown.inline | ✓ | | | | | +| matlab | ✓ | ✓ | ✓ | | | +| mermaid | ✓ | | | | | +| meson | ✓ | | ✓ | | `mesonlsp` | +| mint | | | | | `mint` | +| mojo | ✓ | ✓ | ✓ | | `magic` | +| move | ✓ | | | | | +| msbuild | ✓ | | ✓ | | | +| nasm | ✓ | ✓ | | | `asm-lsp` | +| nestedtext | ✓ | ✓ | ✓ | | | +| nginx | ✓ | | | | | +| nickel | ✓ | | ✓ | | `nls` | +| nim | ✓ | ✓ | ✓ | | `nimlangserver` | +| nix | ✓ | ✓ | ✓ | ✓ | `nil`, `nixd` | +| nu | ✓ | | | | `nu` | +| nunjucks | ✓ | | | | | +| ocaml | ✓ | | ✓ | | `ocamllsp` | +| ocaml-interface | ✓ | | | | `ocamllsp` | +| odin | ✓ | ✓ | ✓ | | `ols` | +| ohm | ✓ | ✓ | ✓ | | | +| opencl | ✓ | ✓ | ✓ | | `clangd` | +| openscad | ✓ | | | | `openscad-lsp` | +| org | ✓ | | | | | +| pascal | ✓ | ✓ | | | `pasls` | +| passwd | ✓ | | | | | +| pem | ✓ | | | | | +| perl | ✓ | ✓ | ✓ | | `perlnavigator` | +| pest | ✓ | ✓ | ✓ | | `pest-language-server` | +| php | ✓ | ✓ | ✓ | | `intelephense` | +| php-only | ✓ | | | | | +| pkgbuild | ✓ | ✓ | ✓ | | `termux-language-server`, `bash-language-server` | +| pkl | ✓ | | ✓ | | `pkl-lsp` | +| po | ✓ | ✓ | | | | +| pod | ✓ | | | | | +| ponylang | ✓ | ✓ | ✓ | | | +| powershell | ✓ | | | | | +| prisma | ✓ | ✓ | | | `prisma-language-server` | +| prolog | | | | | `swipl` | +| protobuf | ✓ | ✓ | ✓ | | `buf`, `pb`, `protols` | +| prql | ✓ | | | | | +| purescript | ✓ | ✓ | | | `purescript-language-server` | +| python | ✓ | ✓ | ✓ | ✓ | `ruff`, `jedi-language-server`, `pylsp` | +| qml | ✓ | | ✓ | | `qmlls` | +| quint | ✓ | | | | `quint-language-server` | +| r | ✓ | | | | `R` | +| racket | ✓ | | ✓ | ✓ | `racket` | +| regex | ✓ | | | ✓ | | +| rego | ✓ | | | | `regols` | +| rescript | ✓ | ✓ | | | `rescript-language-server` | +| rmarkdown | ✓ | | ✓ | | `R` | +| robot | ✓ | | | | `robotframework_ls` | +| ron | ✓ | | ✓ | | | +| rst | ✓ | | | | | +| ruby | ✓ | ✓ | ✓ | ✓ | `ruby-lsp`, `solargraph` | +| rust | ✓ | ✓ | ✓ | ✓ | `rust-analyzer` | +| sage | ✓ | ✓ | | | | +| scala | ✓ | ✓ | ✓ | | `metals` | +| scheme | ✓ | | ✓ | ✓ | | +| scss | ✓ | | | ✓ | `vscode-css-language-server` | +| slint | ✓ | ✓ | ✓ | | `slint-lsp` | +| smali | ✓ | | ✓ | | | +| smithy | ✓ | | | | `cs` | +| sml | ✓ | | | | | +| snakemake | ✓ | | ✓ | | `pylsp` | +| solidity | ✓ | ✓ | | | `solc` | +| sourcepawn | ✓ | ✓ | | | `sourcepawn-studio` | +| spade | ✓ | | ✓ | | `spade-language-server` | +| spicedb | ✓ | | | | | +| sql | ✓ | ✓ | | | | +| sshclientconfig | ✓ | | | | | +| starlark | ✓ | ✓ | ✓ | ✓ | `starpls` | +| strace | ✓ | | | | | +| supercollider | ✓ | | | | | +| svelte | ✓ | | ✓ | | `svelteserver` | +| sway | ✓ | ✓ | ✓ | | `forc` | +| swift | ✓ | ✓ | | | `sourcekit-lsp` | +| t32 | ✓ | | | | | +| tablegen | ✓ | ✓ | ✓ | | | +| tact | ✓ | ✓ | ✓ | | | +| task | ✓ | | | | | +| tcl | ✓ | | ✓ | | | +| teal | ✓ | | | | `teal-language-server` | +| templ | ✓ | | | | `templ` | +| tera | ✓ | | | | | +| textproto | ✓ | ✓ | ✓ | | | +| tfvars | ✓ | | ✓ | | `terraform-ls` | +| thrift | ✓ | | | | | +| tlaplus | ✓ | | | | | +| todotxt | ✓ | | | | | +| toml | ✓ | ✓ | | ✓ | `taplo` | +| tsq | ✓ | | | | `ts_query_ls` | +| tsx | ✓ | ✓ | ✓ | ✓ | `typescript-language-server` | +| twig | ✓ | | | | | +| typescript | ✓ | ✓ | ✓ | ✓ | `typescript-language-server` | +| typespec | ✓ | ✓ | ✓ | | `tsp-server` | +| typst | ✓ | | | | `tinymist` | +| ungrammar | ✓ | | | | | +| unison | ✓ | ✓ | ✓ | | | +| uxntal | ✓ | | | | | +| v | ✓ | ✓ | ✓ | | `v-analyzer` | +| vala | ✓ | ✓ | | | `vala-language-server` | +| vento | ✓ | | | | | +| verilog | ✓ | ✓ | | | `svlangserver` | +| vhdl | ✓ | | | | `vhdl_ls` | +| vhs | ✓ | | | | | +| vue | ✓ | | | | `vue-language-server` | +| wast | ✓ | | | | | +| wat | ✓ | | | | `wat_server` | +| webc | ✓ | | | | | +| werk | ✓ | | | | | +| wgsl | ✓ | | | | `wgsl-analyzer` | +| wit | ✓ | | ✓ | | | +| wren | ✓ | ✓ | ✓ | | | +| xit | ✓ | | | | | +| xml | ✓ | | ✓ | ✓ | | +| xtc | ✓ | | | | | +| yaml | ✓ | ✓ | ✓ | ✓ | `yaml-language-server`, `ansible-language-server` | +| yara | ✓ | | | | `yls` | +| yuck | ✓ | | | | | +| zig | ✓ | ✓ | ✓ | | `zls` | diff --git a/book/src/guides/README.md b/book/src/guides/README.md index c25768e68961..e53983d60fb1 100644 --- a/book/src/guides/README.md +++ b/book/src/guides/README.md @@ -1,4 +1,4 @@ # Guides This section contains guides for adding new language server configurations, -tree-sitter grammars, textobject queries, and other similar items. +tree-sitter grammars, textobject and rainbow bracket queries, and other similar items. diff --git a/book/src/guides/rainbow_bracket_queries.md b/book/src/guides/rainbow_bracket_queries.md new file mode 100644 index 000000000000..1cba6a9907d1 --- /dev/null +++ b/book/src/guides/rainbow_bracket_queries.md @@ -0,0 +1,132 @@ +# Adding Rainbow Bracket Queries + +Helix uses `rainbows.scm` tree-sitter query files to provide rainbow bracket +functionality. + +Tree-sitter queries are documented in the tree-sitter online documentation. +If you're writing queries for the first time, be sure to check out the section +on [syntax highlighting queries] and on [query syntax]. + +Rainbow queries have two captures: `@rainbow.scope` and `@rainbow.bracket`. +`@rainbow.scope` should capture any node that increases the nesting level +while `@rainbow.bracket` should capture any bracket nodes. Put another way: +`@rainbow.scope` switches to the next rainbow color for all nodes in the tree +under it while `@rainbow.bracket` paints captured nodes with the current +rainbow color. + +For an example, let's add rainbow queries for the tree-sitter query (TSQ) +language itself. These queries will go into a +`runtime/queries/tsq/rainbows.scm` file in the repository root. + +First we'll add the `@rainbow.bracket` captures. TSQ only has parentheses and +square brackets: + +```tsq +["(" ")" "[" "]"] @rainbow.bracket +``` + +The ordering of the nodes within the alternation (square brackets) is not +taken into consideration. + +> Note: Why are these nodes quoted? Most syntax highlights capture text +> surrounded by parentheses. These are _named nodes_ and correspond to the +> names of rules in the grammar. Brackets are usually written in tree-sitter +> grammars as literal strings, for example: +> +> ```js +> { +> // ... +> arguments: seq("(", repeat($.argument), ")"), +> // ... +> } +> ``` +> +> Nodes written as literal strings in tree-sitter grammars may be captured +> in queries with those same literal strings. + +Then we'll add `@rainbow.scope` captures. The easiest way to do this is to +view the `grammar.js` file in the tree-sitter grammar's repository. For TSQ, +that file is [here][tsq grammar.js]. As we scroll down the `grammar.js`, we +see that the `(alternation)`, (L36) `(group)` (L57), `(named_node)` (L59), +`(predicate)` (L87) and `(wildcard_node)` (L97) nodes all contain literal +parentheses or square brackets in their definitions. These nodes are all +direct parents of brackets and happen to also be the nodes we want to change +to the next rainbow color, so we capture them as `@rainbow.scope`. + +```tsq +[ + (group) + (named_node) + (wildcard_node) + (predicate) + (alternation) +] @rainbow.scope +``` + +This strategy works as a rule of thumb for most programming and configuration +languages. Markup languages can be trickier and may take additional +experimentation to find the correct nodes to use for scopes and brackets. + +The `:tree-sitter-subtree` command shows the syntax tree under the primary +selection in S-expression format and can be a useful tool for determining how +to write a query. + +### Properties + +The `rainbow.include-children` property may be applied to `@rainbow.scope` +captures. By default, all `@rainbow.bracket` captures must be direct descendant +of a node captured with `@rainbow.scope` in a syntax tree in order to be +highlighted. The `rainbow.include-children` property disables that check and +allows `@rainbow.bracket` captures to be highlighted if they are direct or +indirect descendants of some node captured with `@rainbow.scope`. + +For example, this property is used in the HTML rainbow queries. + +For a document like `link`, the syntax tree is: + +```tsq +(element ; link + (start_tag ; + (tag_name)) ; a + (text) ; link + (end_tag ; + (tag_name))) ; a +``` + +If we want to highlight the `<`, `>` and `" "` and `` and ``, and ` Configuration { diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs index 04ce9a28dd23..a1e2c86405d8 100644 --- a/helix-core/src/indent.rs +++ b/helix-core/src/indent.rs @@ -1,14 +1,17 @@ use std::{borrow::Cow, collections::HashMap, iter}; use helix_stdx::rope::RopeSliceExt; -use tree_sitter::{Query, QueryCursor, QueryPredicateArg}; use crate::{ chars::{char_is_line_ending, char_is_whitespace}, graphemes::{grapheme_width, tab_width_at}, - syntax::{IndentationHeuristic, LanguageConfiguration, RopeProvider, Syntax}, - tree_sitter::Node, - Position, Rope, RopeSlice, Tendril, + syntax::{self, config::IndentationHeuristic}, + tree_sitter::{ + self, + query::{InvalidPredicateError, UserPredicate}, + Capture, Grammar, InactiveQueryCursor, Node, Pattern, Query, QueryMatch, RopeInput, + }, + Position, Rope, RopeSlice, Syntax, Tendril, }; /// Enum representing indentation style. @@ -279,18 +282,164 @@ fn add_indent_level( /// Return true if only whitespace comes before the node on its line. /// If given, new_line_byte_pos is treated the same way as any existing newline. -fn is_first_in_line(node: Node, text: RopeSlice, new_line_byte_pos: Option) -> bool { - let mut line_start_byte_pos = text.line_to_byte(node.start_position().row); +fn is_first_in_line(node: &Node, text: RopeSlice, new_line_byte_pos: Option) -> bool { + let line = text.byte_to_line(node.start_byte() as usize); + let mut line_start_byte_pos = text.line_to_byte(line) as u32; if let Some(pos) = new_line_byte_pos { if line_start_byte_pos < pos && pos <= node.start_byte() { line_start_byte_pos = pos; } } - text.byte_slice(line_start_byte_pos..node.start_byte()) + text.byte_slice(line_start_byte_pos as usize..node.start_byte() as usize) .chars() .all(|c| c.is_whitespace()) } +#[derive(Debug, Default)] +pub struct IndentQueryPredicates { + not_kind_eq: Option<(Capture, Box)>, + same_line: Option<(Capture, Capture, bool)>, + one_line: Option<(Capture, bool)>, +} + +impl IndentQueryPredicates { + fn are_satisfied( + &self, + match_: &QueryMatch, + text: RopeSlice, + new_line_byte_pos: Option, + ) -> bool { + if let Some((capture, not_expected_kind)) = self.not_kind_eq.as_ref() { + if !match_ + .nodes_for_capture(*capture) + .next() + .is_some_and(|node| node.kind() != not_expected_kind.as_ref()) + { + return false; + } + } + + if let Some((capture1, capture2, negated)) = self.same_line { + let n1 = match_.nodes_for_capture(capture1).next(); + let n2 = match_.nodes_for_capture(capture2).next(); + let satisfied = n1.zip(n2).is_some_and(|(n1, n2)| { + let n1_line = get_node_start_line(text, n1, new_line_byte_pos); + let n2_line = get_node_start_line(text, n2, new_line_byte_pos); + let same_line = n1_line == n2_line; + same_line != negated + }); + + if !satisfied { + return false; + } + } + + if let Some((capture, negated)) = self.one_line { + let node = match_.nodes_for_capture(capture).next(); + let satisfied = node.is_some_and(|node| { + let start_line = get_node_start_line(text, node, new_line_byte_pos); + let end_line = get_node_end_line(text, node, new_line_byte_pos); + let one_line = end_line == start_line; + one_line != negated + }); + + if !satisfied { + return false; + } + } + + true + } +} + +#[derive(Debug)] +pub struct IndentQuery { + query: Query, + properties: HashMap, + predicates: HashMap, + indent_capture: Option, + indent_always_capture: Option, + outdent_capture: Option, + outdent_always_capture: Option, + align_capture: Option, + anchor_capture: Option, + extend_capture: Option, + extend_prevent_once_capture: Option, +} + +impl IndentQuery { + pub fn new(grammar: Grammar, source: &str) -> Result { + let mut properties = HashMap::new(); + let mut predicates: HashMap = HashMap::new(); + let query = Query::new(grammar, source, |pattern, predicate| match predicate { + UserPredicate::SetProperty { key: "scope", val } => { + let scope = match val { + Some("all") => IndentScope::All, + Some("tail") => IndentScope::Tail, + Some(other) => { + return Err(format!("unknown scope (#set! scope \"{other}\")").into()) + } + None => return Err("missing scope value (#set! scope ...)".into()), + }; + + properties.insert(pattern, scope); + + Ok(()) + } + UserPredicate::Other(predicate) => { + let name = predicate.name(); + match name { + "not-kind-eq?" => { + predicate.check_arg_count(2)?; + let capture = predicate.capture_arg(0)?; + let not_expected_kind = predicate.str_arg(1)?; + + predicates.entry(pattern).or_default().not_kind_eq = + Some((capture, not_expected_kind.to_string().into_boxed_str())); + Ok(()) + } + "same-line?" | "not-same-line?" => { + predicate.check_arg_count(2)?; + let capture1 = predicate.capture_arg(0)?; + let capture2 = predicate.capture_arg(1)?; + let negated = name == "not-same-line?"; + + predicates.entry(pattern).or_default().same_line = + Some((capture1, capture2, negated)); + Ok(()) + } + "one-line?" | "not-one-line?" => { + predicate.check_arg_count(1)?; + let capture = predicate.capture_arg(0)?; + let negated = name == "not-one-line?"; + + predicates.entry(pattern).or_default().one_line = Some((capture, negated)); + Ok(()) + } + _ => Err(InvalidPredicateError::unknown(UserPredicate::Other( + predicate, + ))), + } + } + _ => Err(InvalidPredicateError::unknown(predicate)), + })?; + + Ok(Self { + properties, + predicates, + indent_capture: query.get_capture("indent"), + indent_always_capture: query.get_capture("indent.always"), + outdent_capture: query.get_capture("outdent"), + outdent_always_capture: query.get_capture("outdent.always"), + align_capture: query.get_capture("align"), + anchor_capture: query.get_capture("anchor"), + extend_capture: query.get_capture("extend"), + extend_prevent_once_capture: query.get_capture("extend.prevent-once"), + query, + }) + } +} + /// The total indent for some line of code. /// This is usually constructed in one of 2 ways: /// - Successively add indent captures to get the (added) indent from a single line @@ -453,16 +602,16 @@ struct IndentQueryResult<'a> { extend_captures: HashMap>, } -fn get_node_start_line(node: Node, new_line_byte_pos: Option) -> usize { - let mut node_line = node.start_position().row; +fn get_node_start_line(text: RopeSlice, node: &Node, new_line_byte_pos: Option) -> usize { + let mut node_line = text.byte_to_line(node.start_byte() as usize); // Adjust for the new line that will be inserted if new_line_byte_pos.is_some_and(|pos| node.start_byte() >= pos) { node_line += 1; } node_line } -fn get_node_end_line(node: Node, new_line_byte_pos: Option) -> usize { - let mut node_line = node.end_position().row; +fn get_node_end_line(text: RopeSlice, node: &Node, new_line_byte_pos: Option) -> usize { + let mut node_line = text.byte_to_line(node.end_byte() as usize); // Adjust for the new line that will be inserted (with a strict inequality since end_byte is exclusive) if new_line_byte_pos.is_some_and(|pos| node.end_byte() > pos) { node_line += 1; @@ -471,175 +620,98 @@ fn get_node_end_line(node: Node, new_line_byte_pos: Option) -> usize { } fn query_indents<'a>( - query: &Query, + query: &IndentQuery, syntax: &Syntax, - cursor: &mut QueryCursor, text: RopeSlice<'a>, - range: std::ops::Range, - new_line_byte_pos: Option, + range: std::ops::Range, + new_line_byte_pos: Option, ) -> IndentQueryResult<'a> { let mut indent_captures: HashMap> = HashMap::new(); let mut extend_captures: HashMap> = HashMap::new(); + + let mut cursor = InactiveQueryCursor::new(); cursor.set_byte_range(range); + let mut cursor = cursor.execute_query( + &query.query, + &syntax.tree().root_node(), + RopeInput::new(text), + ); // Iterate over all captures from the query - for m in cursor.matches(query, syntax.tree().root_node(), RopeProvider(text)) { + while let Some(m) = cursor.next_match() { // Skip matches where not all custom predicates are fulfilled - if !query.general_predicates(m.pattern_index).iter().all(|pred| { - match pred.operator.as_ref() { - "not-kind-eq?" => match (pred.args.first(), pred.args.get(1)) { - ( - Some(QueryPredicateArg::Capture(capture_idx)), - Some(QueryPredicateArg::String(kind)), - ) => { - let node = m.nodes_for_capture_index(*capture_idx).next(); - match node { - Some(node) => node.kind()!=kind.as_ref(), - _ => true, - } - } - _ => { - panic!("Invalid indent query: Arguments to \"not-kind-eq?\" must be a capture and a string"); - } - }, - "same-line?" | "not-same-line?" => { - match (pred.args.first(), pred.args.get(1)) { - ( - Some(QueryPredicateArg::Capture(capt1)), - Some(QueryPredicateArg::Capture(capt2)) - ) => { - let n1 = m.nodes_for_capture_index(*capt1).next(); - let n2 = m.nodes_for_capture_index(*capt2).next(); - match (n1, n2) { - (Some(n1), Some(n2)) => { - let n1_line = get_node_start_line(n1, new_line_byte_pos); - let n2_line = get_node_start_line(n2, new_line_byte_pos); - let same_line = n1_line == n2_line; - same_line==(pred.operator.as_ref()=="same-line?") - } - _ => true, - } - } - _ => { - panic!("Invalid indent query: Arguments to \"{}\" must be 2 captures", pred.operator); - } - } - } - "one-line?" | "not-one-line?" => match pred.args.first() { - Some(QueryPredicateArg::Capture(capture_idx)) => { - let node = m.nodes_for_capture_index(*capture_idx).next(); - - match node { - Some(node) => { - let (start_line, end_line) = (get_node_start_line(node,new_line_byte_pos), get_node_end_line(node, new_line_byte_pos)); - let one_line = end_line == start_line; - one_line != (pred.operator.as_ref() == "not-one-line?") - }, - _ => true, - } - } - _ => { - panic!("Invalid indent query: Arguments to \"not-kind-eq?\" must be a capture and a string"); - } - }, - _ => { - panic!( - "Invalid indent query: Unknown predicate (\"{}\")", - pred.operator - ); - } - } - }) { + if query + .predicates + .get(&m.pattern()) + .is_some_and(|preds| !preds.are_satisfied(&m, text, new_line_byte_pos)) + { continue; } // A list of pairs (node_id, indent_capture) that are added by this match. // They cannot be added to indent_captures immediately since they may depend on other captures (such as an @anchor). let mut added_indent_captures: Vec<(usize, IndentCapture)> = Vec::new(); // The row/column position of the optional anchor in this query - let mut anchor: Option = None; - for capture in m.captures { - let capture_name = query.capture_names()[capture.index as usize]; - let capture_type = match capture_name { - "indent" => IndentCaptureType::Indent, - "indent.always" => IndentCaptureType::IndentAlways, - "outdent" => IndentCaptureType::Outdent, - "outdent.always" => IndentCaptureType::OutdentAlways, - // The alignment will be updated to the correct value at the end, when the anchor is known. - "align" => IndentCaptureType::Align(RopeSlice::from("")), - "anchor" => { - if anchor.is_some() { - log::error!("Invalid indent query: Encountered more than one @anchor in the same match.") - } else { - anchor = Some(capture.node); - } - continue; - } - "extend" => { - extend_captures - .entry(capture.node.id()) - .or_insert_with(|| Vec::with_capacity(1)) - .push(ExtendCapture::Extend); - continue; - } - "extend.prevent-once" => { - extend_captures - .entry(capture.node.id()) - .or_insert_with(|| Vec::with_capacity(1)) - .push(ExtendCapture::PreventOnce); - continue; - } - _ => { - // Ignore any unknown captures (these may be needed for predicates such as #match?) - continue; + let mut anchor: Option<&Node> = None; + for matched_node in m.matched_nodes() { + let node_id = matched_node.node.id(); + let capture = Some(matched_node.capture); + let capture_type = if capture == query.indent_capture { + IndentCaptureType::Indent + } else if capture == query.indent_always_capture { + IndentCaptureType::IndentAlways + } else if capture == query.outdent_capture { + IndentCaptureType::Outdent + } else if capture == query.outdent_always_capture { + IndentCaptureType::OutdentAlways + } else if capture == query.align_capture { + IndentCaptureType::Align(RopeSlice::from("")) + } else if capture == query.anchor_capture { + if anchor.is_some() { + log::error!("Invalid indent query: Encountered more than one @anchor in the same match.") + } else { + anchor = Some(&matched_node.node); } + continue; + } else if capture == query.extend_capture { + extend_captures + .entry(node_id) + .or_insert_with(|| Vec::with_capacity(1)) + .push(ExtendCapture::Extend); + continue; + } else if capture == query.extend_prevent_once_capture { + extend_captures + .entry(node_id) + .or_insert_with(|| Vec::with_capacity(1)) + .push(ExtendCapture::PreventOnce); + continue; + } else { + // Ignore any unknown captures (these may be needed for predicates such as #match?) + continue; }; - let scope = capture_type.default_scope(); - let mut indent_capture = IndentCapture { + + // Apply additional settings for this capture + let scope = query + .properties + .get(&m.pattern()) + .copied() + .unwrap_or_else(|| capture_type.default_scope()); + let indent_capture = IndentCapture { capture_type, scope, }; - // Apply additional settings for this capture - for property in query.property_settings(m.pattern_index) { - match property.key.as_ref() { - "scope" => { - indent_capture.scope = match property.value.as_deref() { - Some("all") => IndentScope::All, - Some("tail") => IndentScope::Tail, - Some(s) => { - panic!("Invalid indent query: Unknown value for \"scope\" property (\"{}\")", s); - } - None => { - panic!( - "Invalid indent query: Missing value for \"scope\" property" - ); - } - } - } - _ => { - panic!( - "Invalid indent query: Unknown property \"{}\"", - property.key - ); - } - } - } - added_indent_captures.push((capture.node.id(), indent_capture)) + added_indent_captures.push((node_id, indent_capture)) } for (node_id, mut capture) in added_indent_captures { // Set the anchor for all align queries. if let IndentCaptureType::Align(_) = capture.capture_type { - let anchor = match anchor { - None => { - log::error!( - "Invalid indent query: @align requires an accompanying @anchor." - ); - continue; - } - Some(anchor) => anchor, + let Some(anchor) = anchor else { + log::error!("Invalid indent query: @align requires an accompanying @anchor."); + continue; }; + let line = text.byte_to_line(anchor.start_byte() as usize); + let line_start = text.line_to_byte(line); capture.capture_type = IndentCaptureType::Align( - text.line(anchor.start_position().row) - .byte_slice(0..anchor.start_position().column), + text.byte_slice(line_start..anchor.start_byte() as usize), ); } indent_captures @@ -691,13 +763,15 @@ fn extend_nodes<'a>( // - the cursor is on the same line as the end of the node OR // - the line that the cursor is on is more indented than the // first line of the node - if deepest_preceding.end_position().row == line { + if text.byte_to_line(deepest_preceding.end_byte() as usize) == line { extend_node = true; } else { let cursor_indent = indent_level_for_line(text.line(line), tab_width, indent_width); let node_indent = indent_level_for_line( - text.line(deepest_preceding.start_position().row), + text.line( + text.byte_to_line(deepest_preceding.start_byte() as usize), + ), tab_width, indent_width, ); @@ -714,7 +788,7 @@ fn extend_nodes<'a>( if node_captured && stop_extend { stop_extend = false; } else if extend_node && !stop_extend { - *node = deepest_preceding; + *node = deepest_preceding.clone(); break; } // If the tree contains a syntax error, `deepest_preceding` may not @@ -731,17 +805,17 @@ fn extend_nodes<'a>( /// - The indent captures for all relevant nodes. #[allow(clippy::too_many_arguments)] fn init_indent_query<'a, 'b>( - query: &Query, + query: &IndentQuery, syntax: &'a Syntax, text: RopeSlice<'b>, tab_width: usize, indent_width: usize, line: usize, - byte_pos: usize, - new_line_byte_pos: Option, + byte_pos: u32, + new_line_byte_pos: Option, ) -> Option<(Node<'a>, HashMap>>)> { // The innermost tree-sitter node which is considered for the indent - // computation. It may change if some predeceding node is extended + // computation. It may change if some preceding node is extended let mut node = syntax .tree() .root_node() @@ -751,37 +825,25 @@ fn init_indent_query<'a, 'b>( // The query range should intersect with all nodes directly preceding // the position of the indent query in case one of them is extended. let mut deepest_preceding = None; // The deepest node preceding the indent query position - let mut tree_cursor = node.walk(); - for child in node.children(&mut tree_cursor) { + for child in node.children() { if child.byte_range().end <= byte_pos { - deepest_preceding = Some(child); + deepest_preceding = Some(child.clone()); } } deepest_preceding = deepest_preceding.map(|mut prec| { // Get the deepest directly preceding node while prec.child_count() > 0 { - prec = prec.child(prec.child_count() - 1).unwrap(); + prec = prec.child(prec.child_count() - 1).unwrap().clone(); } prec }); let query_range = deepest_preceding + .as_ref() .map(|prec| prec.byte_range().end - 1..byte_pos + 1) .unwrap_or(byte_pos..byte_pos + 1); - crate::syntax::PARSER.with(|ts_parser| { - let mut ts_parser = ts_parser.borrow_mut(); - let mut cursor = ts_parser.cursors.pop().unwrap_or_default(); - let query_result = query_indents( - query, - syntax, - &mut cursor, - text, - query_range, - new_line_byte_pos, - ); - ts_parser.cursors.push(cursor); - (query_result, deepest_preceding) - }) + let query_result = query_indents(query, syntax, text, query_range, new_line_byte_pos); + (query_result, deepest_preceding) }; let extend_captures = query_result.extend_captures; @@ -839,7 +901,7 @@ fn init_indent_query<'a, 'b>( /// ``` #[allow(clippy::too_many_arguments)] pub fn treesitter_indent_for_pos<'a>( - query: &Query, + query: &IndentQuery, syntax: &Syntax, tab_width: usize, indent_width: usize, @@ -848,7 +910,7 @@ pub fn treesitter_indent_for_pos<'a>( pos: usize, new_line: bool, ) -> Option> { - let byte_pos = text.char_to_byte(pos); + let byte_pos = text.char_to_byte(pos) as u32; let new_line_byte_pos = new_line.then_some(byte_pos); let (mut node, mut indent_captures) = init_indent_query( query, @@ -868,7 +930,7 @@ pub fn treesitter_indent_for_pos<'a>( let mut indent_for_line_below = Indentation::default(); loop { - let is_first = is_first_in_line(node, text, new_line_byte_pos); + let is_first = is_first_in_line(&node, text, new_line_byte_pos); // Apply all indent definitions for this node. // Since we only iterate over each node once, we can remove the @@ -891,8 +953,8 @@ pub fn treesitter_indent_for_pos<'a>( } if let Some(parent) = node.parent() { - let node_line = get_node_start_line(node, new_line_byte_pos); - let parent_line = get_node_start_line(parent, new_line_byte_pos); + let node_line = get_node_start_line(text, &node, new_line_byte_pos); + let parent_line = get_node_start_line(text, &parent, new_line_byte_pos); if node_line != parent_line { // Don't add indent for the line below the line of the query @@ -914,8 +976,9 @@ pub fn treesitter_indent_for_pos<'a>( } else { // Only add the indentation for the line below if that line // is not after the line that the indentation is calculated for. - if (node.start_position().row < line) - || (new_line && node.start_position().row == line && node.start_byte() < byte_pos) + let node_start_line = text.byte_to_line(node.start_byte() as usize); + if node_start_line < line + || (new_line && node_start_line == line && node.start_byte() < byte_pos) { result.add_line(indent_for_line_below); } @@ -930,7 +993,7 @@ pub fn treesitter_indent_for_pos<'a>( /// This is done either using treesitter, or if that's not available by copying the indentation from the current line #[allow(clippy::too_many_arguments)] pub fn indent_for_newline( - language_config: Option<&LanguageConfiguration>, + loader: &syntax::Loader, syntax: Option<&Syntax>, indent_heuristic: &IndentationHeuristic, indent_style: &IndentStyle, @@ -947,7 +1010,7 @@ pub fn indent_for_newline( Some(syntax), ) = ( indent_heuristic, - language_config.and_then(|config| config.indent_query()), + syntax.and_then(|syntax| loader.indent_query(syntax.root_language())), syntax, ) { if let Some(indent) = treesitter_indent_for_pos( @@ -1015,10 +1078,10 @@ pub fn indent_for_newline( indent_style.as_str().repeat(indent_level) } -pub fn get_scopes(syntax: Option<&Syntax>, text: RopeSlice, pos: usize) -> Vec<&'static str> { +pub fn get_scopes<'a>(syntax: Option<&'a Syntax>, text: RopeSlice, pos: usize) -> Vec<&'a str> { let mut scopes = Vec::new(); if let Some(syntax) = syntax { - let pos = text.char_to_byte(pos); + let pos = text.char_to_byte(pos) as u32; let mut node = match syntax .tree() .root_node() diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index 3fcddfcd189a..09865ca40456 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -53,7 +53,7 @@ pub use smartstring::SmartString; pub type Tendril = SmartString; #[doc(inline)] -pub use {regex, tree_sitter}; +pub use {regex, tree_house::tree_sitter}; pub use position::{ char_idx_at_visual_offset, coords_at_pos, pos_at_coords, softwrapped_dimensions, @@ -73,3 +73,5 @@ pub use line_ending::{LineEnding, NATIVE_LINE_ENDING}; pub use transaction::{Assoc, Change, ChangeSet, Deletion, Operation, Transaction}; pub use uri::Uri; + +pub use tree_house::Language; diff --git a/helix-core/src/match_brackets.rs b/helix-core/src/match_brackets.rs index 7520d3e4646a..7f2891f334b7 100644 --- a/helix-core/src/match_brackets.rs +++ b/helix-core/src/match_brackets.rs @@ -1,7 +1,7 @@ use std::iter; +use crate::tree_sitter::Node; use ropey::RopeSlice; -use tree_sitter::Node; use crate::movement::Direction::{self, Backward, Forward}; use crate::Syntax; @@ -75,7 +75,7 @@ fn find_pair( pos_: usize, traverse_parents: bool, ) -> Option { - let pos = doc.char_to_byte(pos_); + let pos = doc.char_to_byte(pos_) as u32; let root = syntax.tree_for_byte_range(pos, pos).root_node(); let mut node = root.descendant_for_byte_range(pos, pos)?; @@ -128,7 +128,7 @@ fn find_pair( if find_pair_end(doc, sibling.prev_sibling(), start_char, end_char, Backward) .is_some() { - return doc.try_byte_to_char(sibling.start_byte()).ok(); + return doc.try_byte_to_char(sibling.start_byte() as usize).ok(); } } } else if node.is_named() { @@ -144,9 +144,9 @@ fn find_pair( if node.child_count() != 0 { return None; } - let node_start = doc.byte_to_char(node.start_byte()); - find_matching_bracket_plaintext(doc.byte_slice(node.byte_range()), pos_ - node_start) - .map(|pos| pos + node_start) + let node_start = doc.byte_to_char(node.start_byte() as usize); + let node_text = doc.byte_slice(node.start_byte() as usize..node.end_byte() as usize); + find_matching_bracket_plaintext(node_text, pos_ - node_start).map(|pos| pos + node_start) } /// Returns the position of the matching bracket under cursor. @@ -304,7 +304,7 @@ fn as_char(doc: RopeSlice, node: &Node) -> Option<(usize, char)> { if node.byte_range().len() != 1 { return None; } - let pos = doc.try_byte_to_char(node.start_byte()).ok()?; + let pos = doc.try_byte_to_char(node.start_byte() as usize).ok()?; Some((pos, doc.char(pos))) } diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs index e446d8cc425d..09a99db2575f 100644 --- a/helix-core/src/movement.rs +++ b/helix-core/src/movement.rs @@ -1,7 +1,6 @@ -use std::{cmp::Reverse, iter}; +use std::{borrow::Cow, cmp::Reverse, iter}; use ropey::iter::Chars; -use tree_sitter::{Node, QueryCursor}; use crate::{ char_idx_at_visual_offset, @@ -13,9 +12,10 @@ use crate::{ }, line_ending::rope_is_line_ending, position::char_idx_at_visual_block_offset, - syntax::LanguageConfiguration, + syntax, text_annotations::TextAnnotations, textobject::TextObject, + tree_sitter::Node, visual_offset_from_block, Range, RopeSlice, Selection, Syntax, }; @@ -560,21 +560,23 @@ fn reached_target(target: WordMotionTarget, prev_ch: char, next_ch: char) -> boo /// Finds the range of the next or previous textobject in the syntax sub-tree of `node`. /// Returns the range in the forwards direction. +#[allow(clippy::too_many_arguments)] pub fn goto_treesitter_object( slice: RopeSlice, range: Range, object_name: &str, dir: Direction, - slice_tree: Node, - lang_config: &LanguageConfiguration, + slice_tree: &Node, + syntax: &Syntax, + loader: &syntax::Loader, count: usize, ) -> Range { + let textobject_query = loader.textobject_query(syntax.root_language()); let get_range = move |range: Range| -> Option { let byte_pos = slice.char_to_byte(range.cursor(slice)); let cap_name = |t: TextObject| format!("{}.{}", object_name, t); - let mut cursor = QueryCursor::new(); - let nodes = lang_config.textobject_query()?.capture_nodes_any( + let nodes = textobject_query?.capture_nodes_any( &[ &cap_name(TextObject::Movement), &cap_name(TextObject::Around), @@ -582,7 +584,6 @@ pub fn goto_treesitter_object( ], slice_tree, slice, - &mut cursor, )?; let node = match dir { @@ -617,14 +618,15 @@ pub fn goto_treesitter_object( last_range } -fn find_parent_start(mut node: Node) -> Option { +fn find_parent_start<'tree>(node: &Node<'tree>) -> Option> { let start = node.start_byte(); + let mut node = Cow::Borrowed(node); while node.start_byte() >= start || !node.is_named() { - node = node.parent()?; + node = Cow::Owned(node.parent()?); } - Some(node) + Some(node.into_owned()) } pub fn move_parent_node_end( @@ -635,8 +637,8 @@ pub fn move_parent_node_end( movement: Movement, ) -> Selection { selection.transform(|range| { - let start_from = text.char_to_byte(range.from()); - let start_to = text.char_to_byte(range.to()); + let start_from = text.char_to_byte(range.from()) as u32; + let start_to = text.char_to_byte(range.to()) as u32; let mut node = match syntax.named_descendant_for_byte_range(start_from, start_to) { Some(node) => node, @@ -654,18 +656,18 @@ pub fn move_parent_node_end( // moving forward, we always want to move one past the end of the // current node, so use the end byte of the current node, which is an exclusive // end of the range - Direction::Forward => text.byte_to_char(node.end_byte()), + Direction::Forward => text.byte_to_char(node.end_byte() as usize), // moving backward, we want the cursor to land on the start char of // the current node, or if it is already at the start of a node, to traverse up to // the parent Direction::Backward => { - let end_head = text.byte_to_char(node.start_byte()); + let end_head = text.byte_to_char(node.start_byte() as usize); // if we're already on the beginning, look up to the parent if end_head == range.cursor(text) { - node = find_parent_start(node).unwrap_or(node); - text.byte_to_char(node.start_byte()) + node = find_parent_start(&node).unwrap_or(node); + text.byte_to_char(node.start_byte() as usize) } else { end_head } diff --git a/helix-core/src/object.rs b/helix-core/src/object.rs index 17a393caf277..e0c02d0a905e 100644 --- a/helix-core/src/object.rs +++ b/helix-core/src/object.rs @@ -4,8 +4,8 @@ pub fn expand_selection(syntax: &Syntax, text: RopeSlice, selection: Selection) let cursor = &mut syntax.walk(); selection.transform(|range| { - let from = text.char_to_byte(range.from()); - let to = text.char_to_byte(range.to()); + let from = text.char_to_byte(range.from()) as u32; + let to = text.char_to_byte(range.to()) as u32; let byte_range = from..to; cursor.reset_to_byte_range(from, to); @@ -17,8 +17,8 @@ pub fn expand_selection(syntax: &Syntax, text: RopeSlice, selection: Selection) } let node = cursor.node(); - let from = text.byte_to_char(node.start_byte()); - let to = text.byte_to_char(node.end_byte()); + let from = text.byte_to_char(node.start_byte() as usize); + let to = text.byte_to_char(node.end_byte() as usize); Range::new(to, from).with_direction(range.direction()) }) @@ -53,10 +53,10 @@ pub fn select_next_sibling(syntax: &Syntax, text: RopeSlice, selection: Selectio } pub fn select_all_siblings(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection { - selection.transform_iter(|range| { - let mut cursor = syntax.walk(); + let mut cursor = syntax.walk(); + selection.transform_iter(move |range| { let (from, to) = range.into_byte_range(text); - cursor.reset_to_byte_range(from, to); + cursor.reset_to_byte_range(from as u32, to as u32); if !cursor.goto_parent_with(|parent| parent.child_count() > 1) { return vec![range].into_iter(); @@ -67,21 +67,18 @@ pub fn select_all_siblings(syntax: &Syntax, text: RopeSlice, selection: Selectio } pub fn select_all_children(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection { - selection.transform_iter(|range| { - let mut cursor = syntax.walk(); + let mut cursor = syntax.walk(); + selection.transform_iter(move |range| { let (from, to) = range.into_byte_range(text); - cursor.reset_to_byte_range(from, to); + cursor.reset_to_byte_range(from as u32, to as u32); select_children(&mut cursor, text, range).into_iter() }) } -fn select_children<'n>( - cursor: &'n mut TreeCursor<'n>, - text: RopeSlice, - range: Range, -) -> Vec { +fn select_children(cursor: &mut TreeCursor, text: RopeSlice, range: Range) -> Vec { let children = cursor - .named_children() + .children() + .filter(|child| child.is_named()) .map(|child| Range::from_node(child, text, range.direction())) .collect::>(); @@ -98,7 +95,7 @@ pub fn select_prev_sibling(syntax: &Syntax, text: RopeSlice, selection: Selectio text, selection, |cursor| { - while !cursor.goto_prev_sibling() { + while !cursor.goto_previous_sibling() { if !cursor.goto_parent() { break; } @@ -121,16 +118,16 @@ where let cursor = &mut syntax.walk(); selection.transform(|range| { - let from = text.char_to_byte(range.from()); - let to = text.char_to_byte(range.to()); + let from = text.char_to_byte(range.from()) as u32; + let to = text.char_to_byte(range.to()) as u32; cursor.reset_to_byte_range(from, to); motion(cursor); let node = cursor.node(); - let from = text.byte_to_char(node.start_byte()); - let to = text.byte_to_char(node.end_byte()); + let from = text.byte_to_char(node.start_byte() as usize); + let to = text.byte_to_char(node.end_byte() as usize); Range::new(from, to).with_direction(direction.unwrap_or_else(|| range.direction())) }) diff --git a/helix-core/src/position.rs b/helix-core/src/position.rs index cea0b60714b4..3f888c57a853 100644 --- a/helix-core/src/position.rs +++ b/helix-core/src/position.rs @@ -89,11 +89,6 @@ impl From<(usize, usize)> for Position { } } -impl From for tree_sitter::Point { - fn from(pos: Position) -> Self { - Self::new(pos.row, pos.col) - } -} /// Convert a character index to (line, column) coordinates. /// /// column in `char` count which can be used for row:column display in diff --git a/helix-core/src/selection.rs b/helix-core/src/selection.rs index 1db2d619e614..5bde08e31ba7 100644 --- a/helix-core/src/selection.rs +++ b/helix-core/src/selection.rs @@ -9,13 +9,13 @@ use crate::{ }, line_ending::get_line_ending, movement::Direction, + tree_sitter::Node, Assoc, ChangeSet, RopeSlice, }; use helix_stdx::range::is_subset; use helix_stdx::rope::{self, RopeSliceExt}; use smallvec::{smallvec, SmallVec}; use std::{borrow::Cow, iter, slice}; -use tree_sitter::Node; /// A single selection range. /// @@ -76,8 +76,8 @@ impl Range { } pub fn from_node(node: Node, text: RopeSlice, direction: Direction) -> Self { - let from = text.byte_to_char(node.start_byte()); - let to = text.byte_to_char(node.end_byte()); + let from = text.byte_to_char(node.start_byte() as usize); + let to = text.byte_to_char(node.end_byte() as usize); Range::new(from, to).with_direction(direction) } diff --git a/helix-core/src/snippets/active.rs b/helix-core/src/snippets/active.rs index 98007ab68caf..1c10b76d20a8 100644 --- a/helix-core/src/snippets/active.rs +++ b/helix-core/src/snippets/active.rs @@ -1,6 +1,6 @@ use std::ops::{Index, IndexMut}; -use hashbrown::HashSet; +use foldhash::HashSet; use helix_stdx::range::{is_exact_subset, is_subset}; use helix_stdx::Range; use ropey::Rope; @@ -35,7 +35,7 @@ impl ActiveSnippet { let snippet = Self { ranges: snippet.ranges, tabstops: snippet.tabstops, - active_tabstops: HashSet::new(), + active_tabstops: HashSet::default(), current_tabstop: TabstopIdx(0), }; (snippet.tabstops.len() != 1).then_some(snippet) diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index 677cdfa0b673..f3630a29522c 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -1,2709 +1,982 @@ -mod tree_cursor; - -use crate::{ - auto_pairs::AutoPairs, - chars::char_is_line_ending, - diagnostic::Severity, - regex::Regex, - transaction::{ChangeSet, Operation}, - RopeSlice, Tendril, -}; - -use ahash::RandomState; -use arc_swap::{ArcSwap, Guard}; -use bitflags::bitflags; -use globset::GlobSet; -use hashbrown::raw::RawTable; -use helix_stdx::rope::{self, RopeSliceExt}; -use slotmap::{DefaultKey as LayerId, HopSlotMap}; +pub mod config; use std::{ borrow::Cow, - cell::RefCell, - collections::{HashMap, HashSet, VecDeque}, - fmt::{self, Display, Write}, - hash::{Hash, Hasher}, - mem::replace, - path::{Path, PathBuf}, - str::FromStr, + collections::HashMap, + fmt, iter, + ops::{self, RangeBounds}, + path::Path, sync::Arc, + time::Duration, }; -use once_cell::sync::{Lazy, OnceCell}; -use serde::{ser::SerializeSeq, Deserialize, Serialize}; +use anyhow::{Context, Result}; +use arc_swap::{ArcSwap, Guard}; +use config::{Configuration, FileType, LanguageConfiguration, LanguageServerConfiguration}; +use foldhash::HashSet; +use helix_loader::grammar::get_language; +use helix_stdx::rope::RopeSliceExt as _; +use once_cell::sync::OnceCell; +use ropey::RopeSlice; +use tree_house::{ + highlighter, + query_iter::{QueryIter, QueryIterEvent}, + tree_sitter::{ + query::{InvalidPredicateError, UserPredicate}, + Capture, Grammar, InactiveQueryCursor, InputEdit, Node, Pattern, Query, RopeInput, Tree, + }, + Error, InjectionLanguageMarker, LanguageConfig as SyntaxConfig, Layer, +}; -use helix_loader::grammar::{get_language, load_runtime_file}; +use crate::{indent::IndentQuery, tree_sitter, ChangeSet, Language}; -pub use tree_cursor::TreeCursor; +pub use tree_house::{ + highlighter::{Highlight, HighlightEvent}, + Error as HighlighterError, LanguageLoader, TreeCursor, TREE_SITTER_MATCH_LIMIT, +}; -fn deserialize_regex<'de, D>(deserializer: D) -> Result, D::Error> -where - D: serde::Deserializer<'de>, -{ - Option::::deserialize(deserializer)? - .map(|buf| rope::Regex::new(&buf).map_err(serde::de::Error::custom)) - .transpose() -} +#[derive(Debug)] +pub struct LanguageData { + config: Arc, + syntax: OnceCell>, + indent_query: OnceCell>, + textobject_query: OnceCell>, + rainbow_query: OnceCell>, +} + +impl LanguageData { + fn new(config: LanguageConfiguration) -> Self { + Self { + config: Arc::new(config), + syntax: OnceCell::new(), + indent_query: OnceCell::new(), + textobject_query: OnceCell::new(), + rainbow_query: OnceCell::new(), + } + } -fn deserialize_lsp_config<'de, D>(deserializer: D) -> Result, D::Error> -where - D: serde::Deserializer<'de>, -{ - Option::::deserialize(deserializer)? - .map(|toml| toml.try_into().map_err(serde::de::Error::custom)) - .transpose() -} + pub fn config(&self) -> &Arc { + &self.config + } -fn deserialize_tab_width<'de, D>(deserializer: D) -> Result -where - D: serde::Deserializer<'de>, -{ - usize::deserialize(deserializer).and_then(|n| { - if n > 0 && n <= 16 { - Ok(n) - } else { - Err(serde::de::Error::custom( - "tab width must be a value from 1 to 16 inclusive", - )) + /// Loads the grammar and compiles the highlights, injections and locals for the language. + /// This function should only be used by this module or the xtask crate. + pub fn compile_syntax_config( + config: &LanguageConfiguration, + loader: &Loader, + ) -> Result> { + let name = &config.language_id; + let parser_name = config.grammar.as_deref().unwrap_or(name); + let Some(grammar) = get_language(parser_name)? else { + log::info!("Skipping syntax config for '{name}' because the parser's shared library does not exist"); + return Ok(None); + }; + let highlight_query_text = read_query(name, "highlights.scm"); + let injection_query_text = read_query(name, "injections.scm"); + let local_query_text = read_query(name, "locals.scm"); + let config = SyntaxConfig::new( + grammar, + &highlight_query_text, + &injection_query_text, + &local_query_text, + ) + .with_context(|| format!("Failed to compile highlights for '{name}'"))?; + + reconfigure_highlights(&config, &loader.scopes()); + + Ok(Some(config)) + } + + fn syntax_config(&self, loader: &Loader) -> Option<&SyntaxConfig> { + self.syntax + .get_or_init(|| { + Self::compile_syntax_config(&self.config, loader) + .map_err(|err| { + log::error!("{err:#}"); + }) + .ok() + .flatten() + }) + .as_ref() + } + + /// Compiles the indents.scm query for a language. + /// This function should only be used by this module or the xtask crate. + pub fn compile_indent_query( + grammar: Grammar, + config: &LanguageConfiguration, + ) -> Result> { + let name = &config.language_id; + let text = read_query(name, "indents.scm"); + if text.is_empty() { + return Ok(None); } - }) -} + let indent_query = IndentQuery::new(grammar, &text) + .with_context(|| format!("Failed to compile indents.scm query for '{name}'"))?; + Ok(Some(indent_query)) + } -pub fn deserialize_auto_pairs<'de, D>(deserializer: D) -> Result, D::Error> -where - D: serde::Deserializer<'de>, -{ - Ok(Option::::deserialize(deserializer)?.and_then(AutoPairConfig::into)) -} + fn indent_query(&self, loader: &Loader) -> Option<&IndentQuery> { + self.indent_query + .get_or_init(|| { + let grammar = self.syntax_config(loader)?.grammar; + Self::compile_indent_query(grammar, &self.config) + .map_err(|err| { + log::error!("{err}"); + }) + .ok() + .flatten() + }) + .as_ref() + } + + /// Compiles the textobjects.scm query for a language. + /// This function should only be used by this module or the xtask crate. + pub fn compile_textobject_query( + grammar: Grammar, + config: &LanguageConfiguration, + ) -> Result> { + let name = &config.language_id; + let text = read_query(name, "textobjects.scm"); + if text.is_empty() { + return Ok(None); + } + let query = Query::new(grammar, &text, |_, _| Ok(())) + .with_context(|| format!("Failed to compile textobjects.scm queries for '{name}'"))?; + Ok(Some(TextObjectQuery::new(query))) + } + + fn textobject_query(&self, loader: &Loader) -> Option<&TextObjectQuery> { + self.textobject_query + .get_or_init(|| { + let grammar = self.syntax_config(loader)?.grammar; + Self::compile_textobject_query(grammar, &self.config) + .map_err(|err| { + log::error!("{err}"); + }) + .ok() + .flatten() + }) + .as_ref() + } + + /// Compiles the rainbows.scm query for a language. + /// This function should only be used by this module or the xtask crate. + pub fn compile_rainbow_query( + grammar: Grammar, + config: &LanguageConfiguration, + ) -> Result> { + let name = &config.language_id; + let text = read_query(name, "rainbows.scm"); + if text.is_empty() { + return Ok(None); + } + let rainbow_query = RainbowQuery::new(grammar, &text) + .with_context(|| format!("Failed to compile rainbows.scm query for '{name}'"))?; + Ok(Some(rainbow_query)) + } + + fn rainbow_query(&self, loader: &Loader) -> Option<&RainbowQuery> { + self.rainbow_query + .get_or_init(|| { + let grammar = self.syntax_config(loader)?.grammar; + Self::compile_rainbow_query(grammar, &self.config) + .map_err(|err| { + log::error!("{err}"); + }) + .ok() + .flatten() + }) + .as_ref() + } -fn default_timeout() -> u64 { - 20 + fn reconfigure(&self, scopes: &[String]) { + if let Some(Some(config)) = self.syntax.get() { + reconfigure_highlights(config, scopes); + } + } } -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct Configuration { - pub language: Vec, - #[serde(default)] - pub language_server: HashMap, +fn reconfigure_highlights(config: &SyntaxConfig, recognized_names: &[String]) { + config.configure(move |capture_name| { + let capture_parts: Vec<_> = capture_name.split('.').collect(); + + let mut best_index = None; + let mut best_match_len = 0; + for (i, recognized_name) in recognized_names.iter().enumerate() { + let mut len = 0; + let mut matches = true; + for (i, part) in recognized_name.split('.').enumerate() { + match capture_parts.get(i) { + Some(capture_part) if *capture_part == part => len += 1, + _ => { + matches = false; + break; + } + } + } + if matches && len > best_match_len { + best_index = Some(i); + best_match_len = len; + } + } + best_index.map(|idx| Highlight::new(idx as u32)) + }); } -// largely based on tree-sitter/cli/src/loader.rs -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case", deny_unknown_fields)] -pub struct LanguageConfiguration { - #[serde(rename = "name")] - pub language_id: String, // c-sharp, rust, tsx - #[serde(rename = "language-id")] - // see the table under https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocumentItem - pub language_server_language_id: Option, // csharp, rust, typescriptreact, for the language-server - pub scope: String, // source.rust - pub file_types: Vec, // filename extension or ends_with? - #[serde(default)] - pub shebangs: Vec, // interpreter(s) associated with language - #[serde(default)] - pub roots: Vec, // these indicate project roots <.git, Cargo.toml> - #[serde( - default, - skip_serializing, - deserialize_with = "from_comment_tokens", - alias = "comment-token" - )] - pub comment_tokens: Option>, - #[serde( - default, - skip_serializing, - deserialize_with = "from_block_comment_tokens" - )] - pub block_comment_tokens: Option>, - pub text_width: Option, - pub soft_wrap: Option, - - #[serde(default)] - pub auto_format: bool, - - #[serde(skip_serializing_if = "Option::is_none")] - pub formatter: Option, - - /// If set, overrides `editor.path-completion`. - pub path_completion: Option, - - #[serde(default)] - pub diagnostic_severity: Severity, - - pub grammar: Option, // tree-sitter grammar name, defaults to language_id - - // content_regex - #[serde(default, skip_serializing, deserialize_with = "deserialize_regex")] - pub injection_regex: Option, - // first_line_regex - // - #[serde(skip)] - pub(crate) highlight_config: OnceCell>>, - // tags_config OnceCell<> https://github.com/tree-sitter/tree-sitter/pull/583 - #[serde( - default, - skip_serializing_if = "Vec::is_empty", - serialize_with = "serialize_lang_features", - deserialize_with = "deserialize_lang_features" - )] - pub language_servers: Vec, - #[serde(skip_serializing_if = "Option::is_none")] - pub indent: Option, - - #[serde(skip)] - pub(crate) indent_query: OnceCell>, - #[serde(skip)] - pub(crate) textobject_query: OnceCell>, - #[serde(skip_serializing_if = "Option::is_none")] - pub debugger: Option, - - /// Automatic insertion of pairs to parentheses, brackets, - /// etc. Defaults to true. Optionally, this can be a list of 2-tuples - /// to specify a list of characters to pair. This overrides the - /// global setting. - #[serde(default, skip_serializing, deserialize_with = "deserialize_auto_pairs")] - pub auto_pairs: Option, - - pub rulers: Option>, // if set, override editor's rulers - - /// Hardcoded LSP root directories relative to the workspace root, like `examples` or `tools/fuzz`. - /// Falling back to the current working directory if none are configured. - pub workspace_lsp_roots: Option>, - #[serde(default)] - pub persistent_diagnostic_sources: Vec, +pub fn read_query(lang: &str, query_filename: &str) -> String { + tree_house::read_query(lang, |language| { + helix_loader::grammar::load_runtime_file(language, query_filename).unwrap_or_default() + }) } -#[derive(Debug, PartialEq, Eq, Hash)] -pub enum FileType { - /// The extension of the file, either the `Path::extension` or the full - /// filename if the file does not have an extension. - Extension(String), - /// A Unix-style path glob. This is compared to the file's absolute path, so - /// it can be used to detect files based on their directories. If the glob - /// is not an absolute path and does not already start with a glob pattern, - /// a glob pattern will be prepended to it. - Glob(globset::Glob), +#[derive(Debug, Default)] +pub struct Loader { + languages: Vec, + languages_by_extension: HashMap, + languages_by_shebang: HashMap, + languages_glob_matcher: FileTypeGlobMatcher, + language_server_configs: HashMap, + scopes: ArcSwap>, } -impl Serialize for FileType { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - use serde::ser::SerializeMap; +pub type LoaderError = globset::Error; - match self { - FileType::Extension(extension) => serializer.serialize_str(extension), - FileType::Glob(glob) => { - let mut map = serializer.serialize_map(Some(1))?; - map.serialize_entry("glob", glob.glob())?; - map.end() +impl Loader { + pub fn new(config: Configuration) -> Result { + let mut languages = Vec::with_capacity(config.language.len()); + let mut languages_by_extension = HashMap::new(); + let mut languages_by_shebang = HashMap::new(); + let mut file_type_globs = Vec::new(); + + for mut config in config.language { + let language = Language(languages.len() as u32); + config.language = Some(language); + + for file_type in &config.file_types { + match file_type { + FileType::Extension(extension) => { + languages_by_extension.insert(extension.clone(), language); + } + FileType::Glob(glob) => { + file_type_globs.push(FileTypeGlob::new(glob.to_owned(), language)); + } + }; + } + for shebang in &config.shebangs { + languages_by_shebang.insert(shebang.clone(), language); } + + languages.push(LanguageData::new(config)); } + + Ok(Self { + languages, + languages_by_extension, + languages_by_shebang, + languages_glob_matcher: FileTypeGlobMatcher::new(file_type_globs)?, + language_server_configs: config.language_server, + scopes: ArcSwap::from_pointee(Vec::new()), + }) } -} -impl<'de> Deserialize<'de> for FileType { - fn deserialize(deserializer: D) -> Result - where - D: serde::de::Deserializer<'de>, - { - struct FileTypeVisitor; + pub fn languages(&self) -> impl ExactSizeIterator { + self.languages + .iter() + .enumerate() + .map(|(idx, data)| (Language(idx as u32), data)) + } - impl<'de> serde::de::Visitor<'de> for FileTypeVisitor { - type Value = FileType; + pub fn language_configs(&self) -> impl ExactSizeIterator { + self.languages.iter().map(|language| &*language.config) + } - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { - formatter.write_str("string or table") - } + pub fn language(&self, lang: Language) -> &LanguageData { + &self.languages[lang.idx()] + } - fn visit_str(self, value: &str) -> Result - where - E: serde::de::Error, - { - Ok(FileType::Extension(value.to_string())) - } + pub fn language_for_name(&self, name: impl PartialEq) -> Option { + self.languages.iter().enumerate().find_map(|(idx, config)| { + (name == config.config.language_id).then_some(Language(idx as u32)) + }) + } - fn visit_map(self, mut map: M) -> Result - where - M: serde::de::MapAccess<'de>, - { - match map.next_entry::()? { - Some((key, mut glob)) if key == "glob" => { - // If the glob isn't an absolute path or already starts - // with a glob pattern, add a leading glob so we - // properly match relative paths. - if !glob.starts_with('/') && !glob.starts_with("*/") { - glob.insert_str(0, "*/"); - } - - globset::Glob::new(glob.as_str()) - .map(FileType::Glob) - .map_err(|err| { - serde::de::Error::custom(format!("invalid `glob` pattern: {}", err)) - }) + pub fn language_for_scope(&self, scope: &str) -> Option { + self.languages.iter().enumerate().find_map(|(idx, config)| { + (scope == config.config.scope).then_some(Language(idx as u32)) + }) + } + + pub fn language_for_match(&self, text: RopeSlice) -> Option { + // PERF: If the name matches up with the id, then this saves the need to do expensive regex. + let shortcircuit = self.language_for_name(text); + if shortcircuit.is_some() { + return shortcircuit; + } + + // If the name did not match up with a known id, then match on injection regex. + + let mut best_match_length = 0; + let mut best_match_position = None; + for (idx, data) in self.languages.iter().enumerate() { + if let Some(injection_regex) = &data.config.injection_regex { + if let Some(mat) = injection_regex.find(text.regex_input()) { + let length = mat.end() - mat.start(); + if length > best_match_length { + best_match_position = Some(idx); + best_match_length = length; } - Some((key, _value)) => Err(serde::de::Error::custom(format!( - "unknown key in `file-types` list: {}", - key - ))), - None => Err(serde::de::Error::custom( - "expected a `suffix` key in the `file-types` entry", - )), } } } - deserializer.deserialize_any(FileTypeVisitor) + best_match_position.map(|i| Language(i as u32)) } -} -fn from_comment_tokens<'de, D>(deserializer: D) -> Result>, D::Error> -where - D: serde::Deserializer<'de>, -{ - #[derive(Deserialize)] - #[serde(untagged)] - enum CommentTokens { - Multiple(Vec), - Single(String), - } - Ok( - Option::::deserialize(deserializer)?.map(|tokens| match tokens { - CommentTokens::Single(val) => vec![val], - CommentTokens::Multiple(vals) => vals, - }), - ) -} + pub fn language_for_filename(&self, path: &Path) -> Option { + // Find all the language configurations that match this file name + // or a suffix of the file name. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct BlockCommentToken { - pub start: String, - pub end: String, -} + // TODO: content_regex handling conflict resolution + self.languages_glob_matcher + .language_for_path(path) + .or_else(|| { + path.extension() + .and_then(|extension| extension.to_str()) + .and_then(|extension| self.languages_by_extension.get(extension).copied()) + }) + } -impl Default for BlockCommentToken { - fn default() -> Self { - BlockCommentToken { - start: "/*".to_string(), - end: "*/".to_string(), - } + pub fn language_for_shebang(&self, text: RopeSlice) -> Option { + let shebang: Cow = text.into(); + self.languages_by_shebang.get(shebang.as_ref()).copied() } -} -fn from_block_comment_tokens<'de, D>( - deserializer: D, -) -> Result>, D::Error> -where - D: serde::Deserializer<'de>, -{ - #[derive(Deserialize)] - #[serde(untagged)] - enum BlockCommentTokens { - Multiple(Vec), - Single(BlockCommentToken), - } - Ok( - Option::::deserialize(deserializer)?.map(|tokens| match tokens { - BlockCommentTokens::Single(val) => vec![val], - BlockCommentTokens::Multiple(vals) => vals, - }), - ) -} + pub fn indent_query(&self, lang: Language) -> Option<&IndentQuery> { + self.language(lang).indent_query(self) + } -#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)] -#[serde(rename_all = "kebab-case")] -pub enum LanguageServerFeature { - Format, - GotoDeclaration, - GotoDefinition, - GotoTypeDefinition, - GotoReference, - GotoImplementation, - // Goto, use bitflags, combining previous Goto members? - SignatureHelp, - Hover, - DocumentHighlight, - Completion, - CodeAction, - WorkspaceCommand, - DocumentSymbols, - WorkspaceSymbols, - // Symbols, use bitflags, see above? - Diagnostics, - RenameSymbol, - InlayHints, - DocumentColors, -} + pub fn textobject_query(&self, lang: Language) -> Option<&TextObjectQuery> { + self.language(lang).textobject_query(self) + } -impl Display for LanguageServerFeature { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - use LanguageServerFeature::*; - let feature = match self { - Format => "format", - GotoDeclaration => "goto-declaration", - GotoDefinition => "goto-definition", - GotoTypeDefinition => "goto-type-definition", - GotoReference => "goto-reference", - GotoImplementation => "goto-implementation", - SignatureHelp => "signature-help", - Hover => "hover", - DocumentHighlight => "document-highlight", - Completion => "completion", - CodeAction => "code-action", - WorkspaceCommand => "workspace-command", - DocumentSymbols => "document-symbols", - WorkspaceSymbols => "workspace-symbols", - Diagnostics => "diagnostics", - RenameSymbol => "rename-symbol", - InlayHints => "inlay-hints", - DocumentColors => "document-colors", - }; - write!(f, "{feature}",) + fn rainbow_query(&self, lang: Language) -> Option<&RainbowQuery> { + self.language(lang).rainbow_query(self) } -} -#[derive(Debug, Serialize, Deserialize)] -#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)] -enum LanguageServerFeatureConfiguration { - #[serde(rename_all = "kebab-case")] - Features { - #[serde(default, skip_serializing_if = "HashSet::is_empty")] - only_features: HashSet, - #[serde(default, skip_serializing_if = "HashSet::is_empty")] - except_features: HashSet, - name: String, - }, - Simple(String), -} + pub fn language_server_configs(&self) -> &HashMap { + &self.language_server_configs + } -#[derive(Debug, Default)] -pub struct LanguageServerFeatures { - pub name: String, - pub only: HashSet, - pub excluded: HashSet, -} + pub fn scopes(&self) -> Guard>> { + self.scopes.load() + } + + pub fn set_scopes(&self, scopes: Vec) { + self.scopes.store(Arc::new(scopes)); -impl LanguageServerFeatures { - pub fn has_feature(&self, feature: LanguageServerFeature) -> bool { - (self.only.is_empty() || self.only.contains(&feature)) && !self.excluded.contains(&feature) + // Reconfigure existing grammars + for data in &self.languages { + data.reconfigure(&self.scopes()); + } } } -fn deserialize_lang_features<'de, D>( - deserializer: D, -) -> Result, D::Error> -where - D: serde::Deserializer<'de>, -{ - let raw: Vec = Deserialize::deserialize(deserializer)?; - let res = raw - .into_iter() - .map(|config| match config { - LanguageServerFeatureConfiguration::Simple(name) => LanguageServerFeatures { - name, - ..Default::default() - }, - LanguageServerFeatureConfiguration::Features { - only_features, - except_features, - name, - } => LanguageServerFeatures { - name, - only: only_features, - excluded: except_features, - }, - }) - .collect(); - Ok(res) -} -fn serialize_lang_features( - map: &Vec, - serializer: S, -) -> Result -where - S: serde::Serializer, -{ - let mut serializer = serializer.serialize_seq(Some(map.len()))?; - for features in map { - let features = if features.only.is_empty() && features.excluded.is_empty() { - LanguageServerFeatureConfiguration::Simple(features.name.to_owned()) - } else { - LanguageServerFeatureConfiguration::Features { - only_features: features.only.clone(), - except_features: features.excluded.clone(), - name: features.name.to_owned(), +impl LanguageLoader for Loader { + fn language_for_marker(&self, marker: InjectionLanguageMarker) -> Option { + match marker { + InjectionLanguageMarker::Name(name) => self.language_for_name(name), + InjectionLanguageMarker::Match(text) => self.language_for_match(text), + InjectionLanguageMarker::Filename(text) => { + let path: Cow = text.into(); + self.language_for_filename(Path::new(path.as_ref())) } - }; - serializer.serialize_element(&features)?; + InjectionLanguageMarker::Shebang(text) => self.language_for_shebang(text), + } } - serializer.end() -} -fn deserialize_required_root_patterns<'de, D>(deserializer: D) -> Result, D::Error> -where - D: serde::Deserializer<'de>, -{ - let patterns = Vec::::deserialize(deserializer)?; - if patterns.is_empty() { - return Ok(None); + fn get_config(&self, lang: Language) -> Option<&SyntaxConfig> { + self.languages[lang.idx()].syntax_config(self) } - let mut builder = globset::GlobSetBuilder::new(); - for pattern in patterns { - let glob = globset::Glob::new(&pattern).map_err(serde::de::Error::custom)?; - builder.add(glob); - } - builder.build().map(Some).map_err(serde::de::Error::custom) } -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct LanguageServerConfiguration { - pub command: String, - #[serde(default)] - #[serde(skip_serializing_if = "Vec::is_empty")] - pub args: Vec, - #[serde(default, skip_serializing_if = "HashMap::is_empty")] - pub environment: HashMap, - #[serde(default, skip_serializing, deserialize_with = "deserialize_lsp_config")] - pub config: Option, - #[serde(default = "default_timeout")] - pub timeout: u64, - #[serde( - default, - skip_serializing, - deserialize_with = "deserialize_required_root_patterns" - )] - pub required_root_patterns: Option, +#[derive(Debug)] +struct FileTypeGlob { + glob: globset::Glob, + language: Language, } -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct FormatterConfiguration { - pub command: String, - #[serde(default)] - #[serde(skip_serializing_if = "Vec::is_empty")] - pub args: Vec, +impl FileTypeGlob { + pub fn new(glob: globset::Glob, language: Language) -> Self { + Self { glob, language } + } } -#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] -#[serde(rename_all = "kebab-case")] -pub struct AdvancedCompletion { - pub name: Option, - pub completion: Option, - pub default: Option, +#[derive(Debug)] +struct FileTypeGlobMatcher { + matcher: globset::GlobSet, + file_types: Vec, } -#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] -#[serde(rename_all = "kebab-case", untagged)] -pub enum DebugConfigCompletion { - Named(String), - Advanced(AdvancedCompletion), +impl Default for FileTypeGlobMatcher { + fn default() -> Self { + Self { + matcher: globset::GlobSet::empty(), + file_types: Default::default(), + } + } } -#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] -#[serde(untagged)] -pub enum DebugArgumentValue { - String(String), - Array(Vec), - Boolean(bool), -} +impl FileTypeGlobMatcher { + fn new(file_types: Vec) -> Result { + let mut builder = globset::GlobSetBuilder::new(); + for file_type in &file_types { + builder.add(file_type.glob.clone()); + } -#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] -#[serde(rename_all = "kebab-case")] -pub struct DebugTemplate { - pub name: String, - pub request: String, - #[serde(default)] - pub completion: Vec, - pub args: HashMap, -} + Ok(Self { + matcher: builder.build()?, + file_types, + }) + } -#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] -#[serde(rename_all = "kebab-case")] -pub struct DebugAdapterConfig { - pub name: String, - pub transport: String, - #[serde(default)] - pub command: String, - #[serde(default)] - pub args: Vec, - pub port_arg: Option, - pub templates: Vec, - #[serde(default)] - pub quirks: DebuggerQuirks, + fn language_for_path(&self, path: &Path) -> Option { + self.matcher + .matches(path) + .iter() + .filter_map(|idx| self.file_types.get(*idx)) + .max_by_key(|file_type| file_type.glob.glob().len()) + .map(|file_type| file_type.language) + } } -// Different workarounds for adapters' differences -#[derive(Debug, Default, PartialEq, Eq, Clone, Serialize, Deserialize)] -pub struct DebuggerQuirks { - #[serde(default)] - pub absolute_paths: bool, +#[derive(Debug)] +pub struct Syntax { + inner: tree_house::Syntax, } -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct IndentationConfiguration { - #[serde(deserialize_with = "deserialize_tab_width")] - pub tab_width: usize, - pub unit: String, -} +const PARSE_TIMEOUT: Duration = Duration::from_millis(500); // half a second is pretty generous -/// How the indentation for a newly inserted line should be determined. -/// If the selected heuristic is not available (e.g. because the current -/// language has no tree-sitter indent queries), a simpler one will be used. -#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub enum IndentationHeuristic { - /// Just copy the indentation of the line that the cursor is currently on. - Simple, - /// Use tree-sitter indent queries to compute the expected absolute indentation level of the new line. - TreeSitter, - /// Use tree-sitter indent queries to compute the expected difference in indentation between the new line - /// and the line before. Add this to the actual indentation level of the line before. - #[default] - Hybrid, -} +impl Syntax { + pub fn new(source: RopeSlice, language: Language, loader: &Loader) -> Result { + let inner = tree_house::Syntax::new(source, language, PARSE_TIMEOUT, loader)?; + Ok(Self { inner }) + } -/// Configuration for auto pairs -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case", deny_unknown_fields, untagged)] -pub enum AutoPairConfig { - /// Enables or disables auto pairing. False means disabled. True means to use the default pairs. - Enable(bool), + pub fn update( + &mut self, + old_source: RopeSlice, + source: RopeSlice, + changeset: &ChangeSet, + loader: &Loader, + ) -> Result<(), Error> { + let edits = generate_edits(old_source, changeset); + if edits.is_empty() { + Ok(()) + } else { + self.inner.update(source, PARSE_TIMEOUT, &edits, loader) + } + } - /// The mappings of pairs. - Pairs(HashMap), -} + pub fn layer(&self, layer: Layer) -> &tree_house::LayerData { + self.inner.layer(layer) + } -impl Default for AutoPairConfig { - fn default() -> Self { - AutoPairConfig::Enable(true) + pub fn root_layer(&self) -> Layer { + self.inner.root() } -} -impl From<&AutoPairConfig> for Option { - fn from(auto_pair_config: &AutoPairConfig) -> Self { - match auto_pair_config { - AutoPairConfig::Enable(false) => None, - AutoPairConfig::Enable(true) => Some(AutoPairs::default()), - AutoPairConfig::Pairs(pairs) => Some(AutoPairs::new(pairs.iter())), - } + pub fn layer_for_byte_range(&self, start: u32, end: u32) -> Layer { + self.inner.layer_for_byte_range(start, end) } -} -impl From for Option { - fn from(auto_pairs_config: AutoPairConfig) -> Self { - (&auto_pairs_config).into() + pub fn root_language(&self) -> Language { + self.layer(self.root_layer()).language } -} -impl FromStr for AutoPairConfig { - type Err = std::str::ParseBoolError; + pub fn tree(&self) -> &Tree { + self.inner.tree() + } - // only do bool parsing for runtime setting - fn from_str(s: &str) -> Result { - let enable: bool = s.parse()?; - Ok(AutoPairConfig::Enable(enable)) + pub fn tree_for_byte_range(&self, start: u32, end: u32) -> &Tree { + self.inner.tree_for_byte_range(start, end) } -} -#[derive(Debug)] -pub struct TextObjectQuery { - pub query: Query, -} + pub fn named_descendant_for_byte_range(&self, start: u32, end: u32) -> Option { + self.inner.named_descendant_for_byte_range(start, end) + } -#[derive(Debug)] -pub enum CapturedNode<'a> { - Single(Node<'a>), - /// Guaranteed to be not empty - Grouped(Vec>), -} + pub fn descendant_for_byte_range(&self, start: u32, end: u32) -> Option { + self.inner.descendant_for_byte_range(start, end) + } -impl CapturedNode<'_> { - pub fn start_byte(&self) -> usize { - match self { - Self::Single(n) => n.start_byte(), - Self::Grouped(ns) => ns[0].start_byte(), - } + pub fn walk(&self) -> TreeCursor { + self.inner.walk() } - pub fn end_byte(&self) -> usize { - match self { - Self::Single(n) => n.end_byte(), - Self::Grouped(ns) => ns.last().unwrap().end_byte(), - } + pub fn highlighter<'a>( + &'a self, + source: RopeSlice<'a>, + loader: &'a Loader, + range: impl RangeBounds, + ) -> Highlighter<'a> { + Highlighter::new(&self.inner, source, loader, range) } - pub fn byte_range(&self) -> std::ops::Range { - self.start_byte()..self.end_byte() + pub fn query_iter<'a, QueryLoader, LayerState, Range>( + &'a self, + source: RopeSlice<'a>, + loader: QueryLoader, + range: Range, + ) -> QueryIter<'a, 'a, QueryLoader, LayerState> + where + QueryLoader: FnMut(Language) -> Option<&'a Query> + 'a, + LayerState: Default, + Range: RangeBounds, + { + QueryIter::new(&self.inner, source, loader, range) } -} -/// The maximum number of in-progress matches a TS cursor can consider at once. -/// This is set to a constant in order to avoid performance problems for medium to large files. Set with `set_match_limit`. -/// Using such a limit means that we lose valid captures, so there is fundamentally a tradeoff here. -/// -/// -/// Old tree sitter versions used a limit of 32 by default until this limit was removed in version `0.19.5` (must now be set manually). -/// However, this causes performance issues for medium to large files. -/// In helix, this problem caused treesitter motions to take multiple seconds to complete in medium-sized rust files (3k loc). -/// -/// -/// Neovim also encountered this problem and reintroduced this limit after it was removed upstream -/// (see and ). -/// The number used here is fundamentally a tradeoff between breaking some obscure edge cases and performance. -/// -/// -/// Neovim chose 64 for this value somewhat arbitrarily (). -/// 64 is too low for some languages though. In particular, it breaks some highlighting for record fields in Erlang record definitions. -/// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high. -const TREE_SITTER_MATCH_LIMIT: u32 = 256; + pub fn rainbow_highlights( + &self, + source: RopeSlice, + rainbow_length: usize, + loader: &Loader, + range: impl RangeBounds, + ) -> OverlayHighlights { + struct RainbowScope<'tree> { + end: u32, + node: Option>, + highlight: Highlight, + } -impl TextObjectQuery { - /// Run the query on the given node and return sub nodes which match given - /// capture ("function.inside", "class.around", etc). - /// - /// Captures may contain multiple nodes by using quantifiers (+, *, etc), - /// and support for this is partial and could use improvement. - /// - /// ```query - /// (comment)+ @capture - /// - /// ; OR - /// ( - /// (comment)* - /// . - /// (function) - /// ) @capture - /// ``` - pub fn capture_nodes<'a>( - &'a self, - capture_name: &str, - node: Node<'a>, - slice: RopeSlice<'a>, - cursor: &'a mut QueryCursor, - ) -> Option>> { - self.capture_nodes_any(&[capture_name], node, slice, cursor) - } - - /// Find the first capture that exists out of all given `capture_names` - /// and return sub nodes that match this capture. - pub fn capture_nodes_any<'a>( - &'a self, - capture_names: &[&str], - node: Node<'a>, - slice: RopeSlice<'a>, - cursor: &'a mut QueryCursor, - ) -> Option>> { - let capture_idx = capture_names - .iter() - .find_map(|cap| self.query.capture_index_for_name(cap))?; - - cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT); - - let nodes = cursor - .captures(&self.query, node, RopeProvider(slice)) - .filter_map(move |(mat, _)| { - let nodes: Vec<_> = mat - .captures - .iter() - .filter_map(|cap| (cap.index == capture_idx).then_some(cap.node)) - .collect(); - - if nodes.len() > 1 { - Some(CapturedNode::Grouped(nodes)) - } else { - nodes.into_iter().map(CapturedNode::Single).next() - } - }); - - Some(nodes) - } -} - -pub fn read_query(language: &str, filename: &str) -> String { - static INHERITS_REGEX: Lazy = - Lazy::new(|| Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*").unwrap()); - - let query = load_runtime_file(language, filename).unwrap_or_default(); - - // replaces all "; inherits (,)*" with the queries of the given language(s) - INHERITS_REGEX - .replace_all(&query, |captures: ®ex::Captures| { - captures[1] - .split(',') - .fold(String::new(), |mut output, language| { - // `write!` to a String cannot fail. - write!(output, "\n{}\n", read_query(language, filename)).unwrap(); - output - }) - }) - .to_string() -} - -impl LanguageConfiguration { - fn initialize_highlight(&self, scopes: &[String]) -> Option> { - let highlights_query = read_query(&self.language_id, "highlights.scm"); - // always highlight syntax errors - // highlights_query += "\n(ERROR) @error"; - - let injections_query = read_query(&self.language_id, "injections.scm"); - let locals_query = read_query(&self.language_id, "locals.scm"); - - if highlights_query.is_empty() { - None - } else { - let language = get_language(self.grammar.as_deref().unwrap_or(&self.language_id)) - .map_err(|err| { - log::error!( - "Failed to load tree-sitter parser for language {:?}: {:#}", - self.language_id, - err - ) - }) - .ok()?; - let config = HighlightConfiguration::new( - language, - &highlights_query, - &injections_query, - &locals_query, - ) - .map_err(|err| log::error!("Could not parse queries for language {:?}. Are your grammars out of sync? Try running 'hx --grammar fetch' and 'hx --grammar build'. This query could not be parsed: {:?}", self.language_id, err)) - .ok()?; - - config.configure(scopes); - Some(Arc::new(config)) - } - } - - pub fn reconfigure(&self, scopes: &[String]) { - if let Some(Some(config)) = self.highlight_config.get() { - config.configure(scopes); - } - } - - pub fn highlight_config(&self, scopes: &[String]) -> Option> { - self.highlight_config - .get_or_init(|| self.initialize_highlight(scopes)) - .clone() - } - - pub fn is_highlight_initialized(&self) -> bool { - self.highlight_config.get().is_some() - } - - pub fn indent_query(&self) -> Option<&Query> { - self.indent_query - .get_or_init(|| self.load_query("indents.scm")) - .as_ref() - } - - pub fn textobject_query(&self) -> Option<&TextObjectQuery> { - self.textobject_query - .get_or_init(|| { - self.load_query("textobjects.scm") - .map(|query| TextObjectQuery { query }) - }) - .as_ref() - } - - pub fn scope(&self) -> &str { - &self.scope - } - - fn load_query(&self, kind: &str) -> Option { - let query_text = read_query(&self.language_id, kind); - if query_text.is_empty() { - return None; - } - let lang = &self.highlight_config.get()?.as_ref()?.language; - Query::new(lang, &query_text) - .map_err(|e| { - log::error!( - "Failed to parse {} queries for {}: {}", - kind, - self.language_id, - e - ) - }) - .ok() - } -} -#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] -#[serde(default, rename_all = "kebab-case", deny_unknown_fields)] -pub struct SoftWrap { - /// Soft wrap lines that exceed viewport width. Default to off - // NOTE: Option on purpose because the struct is shared between language config and global config. - // By default the option is None so that the language config falls back to the global config unless explicitly set. - pub enable: Option, - /// Maximum space left free at the end of the line. - /// This space is used to wrap text at word boundaries. If that is not possible within this limit - /// the word is simply split at the end of the line. - /// - /// This is automatically hard-limited to a quarter of the viewport to ensure correct display on small views. - /// - /// Default to 20 - pub max_wrap: Option, - /// Maximum number of indentation that can be carried over from the previous line when softwrapping. - /// If a line is indented further then this limit it is rendered at the start of the viewport instead. - /// - /// This is automatically hard-limited to a quarter of the viewport to ensure correct display on small views. - /// - /// Default to 40 - pub max_indent_retain: Option, - /// Indicator placed at the beginning of softwrapped lines - /// - /// Defaults to ↪ - pub wrap_indicator: Option, - /// Softwrap at `text_width` instead of viewport width if it is shorter - pub wrap_at_text_width: Option, -} - -#[derive(Debug)] -struct FileTypeGlob { - glob: globset::Glob, - language_id: usize, -} - -impl FileTypeGlob { - fn new(glob: globset::Glob, language_id: usize) -> Self { - Self { glob, language_id } - } -} - -#[derive(Debug)] -struct FileTypeGlobMatcher { - matcher: globset::GlobSet, - file_types: Vec, -} - -impl FileTypeGlobMatcher { - fn new(file_types: Vec) -> Result { - let mut builder = globset::GlobSetBuilder::new(); - for file_type in &file_types { - builder.add(file_type.glob.clone()); - } - - Ok(Self { - matcher: builder.build()?, - file_types, - }) - } - - fn language_id_for_path(&self, path: &Path) -> Option<&usize> { - self.matcher - .matches(path) - .iter() - .filter_map(|idx| self.file_types.get(*idx)) - .max_by_key(|file_type| file_type.glob.glob().len()) - .map(|file_type| &file_type.language_id) - } -} - -// Expose loader as Lazy<> global since it's always static? - -#[derive(Debug)] -pub struct Loader { - // highlight_names ? - language_configs: Vec>, - language_config_ids_by_extension: HashMap, // Vec - language_config_ids_glob_matcher: FileTypeGlobMatcher, - language_config_ids_by_shebang: HashMap, - - language_server_configs: HashMap, - - scopes: ArcSwap>, -} - -pub type LoaderError = globset::Error; - -impl Loader { - pub fn new(config: Configuration) -> Result { - let mut language_configs = Vec::new(); - let mut language_config_ids_by_extension = HashMap::new(); - let mut language_config_ids_by_shebang = HashMap::new(); - let mut file_type_globs = Vec::new(); - - for config in config.language { - // get the next id - let language_id = language_configs.len(); - - for file_type in &config.file_types { - // entry().or_insert(Vec::new).push(language_id); - match file_type { - FileType::Extension(extension) => { - language_config_ids_by_extension.insert(extension.clone(), language_id); - } - FileType::Glob(glob) => { - file_type_globs.push(FileTypeGlob::new(glob.to_owned(), language_id)); - } - }; - } - for shebang in &config.shebangs { - language_config_ids_by_shebang.insert(shebang.clone(), language_id); - } - - language_configs.push(Arc::new(config)); - } - - Ok(Self { - language_configs, - language_config_ids_by_extension, - language_config_ids_glob_matcher: FileTypeGlobMatcher::new(file_type_globs)?, - language_config_ids_by_shebang, - language_server_configs: config.language_server, - scopes: ArcSwap::from_pointee(Vec::new()), - }) - } - - pub fn language_config_for_file_name(&self, path: &Path) -> Option> { - // Find all the language configurations that match this file name - // or a suffix of the file name. - let configuration_id = self - .language_config_ids_glob_matcher - .language_id_for_path(path) - .or_else(|| { - path.extension() - .and_then(|extension| extension.to_str()) - .and_then(|extension| self.language_config_ids_by_extension.get(extension)) - }); - - configuration_id.and_then(|&id| self.language_configs.get(id).cloned()) - - // TODO: content_regex handling conflict resolution - } - - pub fn language_config_for_shebang( - &self, - source: RopeSlice, - ) -> Option> { - let line = Cow::from(source.line(0)); - static SHEBANG_REGEX: Lazy = - Lazy::new(|| Regex::new(&["^", SHEBANG].concat()).unwrap()); - let configuration_id = SHEBANG_REGEX - .captures(&line) - .and_then(|cap| self.language_config_ids_by_shebang.get(&cap[1])); - - configuration_id.and_then(|&id| self.language_configs.get(id).cloned()) - } - - pub fn language_config_for_scope(&self, scope: &str) -> Option> { - self.language_configs - .iter() - .find(|config| config.scope == scope) - .cloned() - } - - pub fn language_config_for_language_id( - &self, - id: impl PartialEq, - ) -> Option> { - self.language_configs - .iter() - .find(|config| id.eq(&config.language_id)) - .cloned() - } - - /// Unlike `language_config_for_language_id`, which only returns Some for an exact id, this - /// function will perform a regex match on the given string to find the closest language match. - pub fn language_config_for_name(&self, slice: RopeSlice) -> Option> { - // PERF: If the name matches up with the id, then this saves the need to do expensive regex. - let shortcircuit = self.language_config_for_language_id(slice); - if shortcircuit.is_some() { - return shortcircuit; - } - - // If the name did not match up with a known id, then match on injection regex. - - let mut best_match_length = 0; - let mut best_match_position = None; - for (i, configuration) in self.language_configs.iter().enumerate() { - if let Some(injection_regex) = &configuration.injection_regex { - if let Some(mat) = injection_regex.find(slice.regex_input()) { - let length = mat.end() - mat.start(); - if length > best_match_length { - best_match_position = Some(i); - best_match_length = length; - } - } - } - } - - best_match_position.map(|i| self.language_configs[i].clone()) - } - - pub fn language_configuration_for_injection_string( - &self, - capture: &InjectionLanguageMarker, - ) -> Option> { - match capture { - InjectionLanguageMarker::LanguageId(id) => self.language_config_for_language_id(*id), - InjectionLanguageMarker::Name(name) => self.language_config_for_name(*name), - InjectionLanguageMarker::Filename(file) => { - let path_str: Cow = (*file).into(); - self.language_config_for_file_name(Path::new(path_str.as_ref())) - } - InjectionLanguageMarker::Shebang(shebang) => { - let shebang_str: Cow = (*shebang).into(); - self.language_config_ids_by_shebang - .get(shebang_str.as_ref()) - .and_then(|&id| self.language_configs.get(id).cloned()) - } - } - } - - pub fn language_configs(&self) -> impl Iterator> { - self.language_configs.iter() - } - - pub fn language_server_configs(&self) -> &HashMap { - &self.language_server_configs - } - - pub fn set_scopes(&self, scopes: Vec) { - self.scopes.store(Arc::new(scopes)); - - // Reconfigure existing grammars - for config in self - .language_configs - .iter() - .filter(|cfg| cfg.is_highlight_initialized()) - { - config.reconfigure(&self.scopes()); - } - } - - pub fn scopes(&self) -> Guard>> { - self.scopes.load() - } -} - -pub struct TsParser { - parser: tree_sitter::Parser, - pub cursors: Vec, -} - -// could also just use a pool, or a single instance? -thread_local! { - pub static PARSER: RefCell = RefCell::new(TsParser { - parser: Parser::new(), - cursors: Vec::new(), - }) -} - -#[derive(Debug)] -pub struct Syntax { - layers: HopSlotMap, - root: LayerId, - loader: Arc>, -} - -fn byte_range_to_str(range: std::ops::Range, source: RopeSlice) -> Cow { - Cow::from(source.byte_slice(range)) -} - -impl Syntax { - pub fn new( - source: RopeSlice, - config: Arc, - loader: Arc>, - ) -> Option { - let root_layer = LanguageLayer { - tree: None, - config, - depth: 0, - flags: LayerUpdateFlags::empty(), - ranges: vec![Range { - start_byte: 0, - end_byte: usize::MAX, - start_point: Point::new(0, 0), - end_point: Point::new(usize::MAX, usize::MAX), - }], - parent: None, - }; - - // track scope_descriptor: a Vec of scopes for item in tree - - let mut layers = HopSlotMap::default(); - let root = layers.insert(root_layer); - - let mut syntax = Self { - root, - layers, - loader, - }; - - let res = syntax.update(source, source, &ChangeSet::new(source)); - - if res.is_err() { - log::error!("TS parser failed, disabling TS for the current buffer: {res:?}"); - return None; - } - Some(syntax) - } - - pub fn update( - &mut self, - old_source: RopeSlice, - source: RopeSlice, - changeset: &ChangeSet, - ) -> Result<(), Error> { - let mut queue = VecDeque::new(); - queue.push_back(self.root); - - let loader = self.loader.load(); - let scopes = loader.scopes.load(); - let injection_callback = |language: &InjectionLanguageMarker| { - loader - .language_configuration_for_injection_string(language) - .and_then(|language_config| language_config.highlight_config(&scopes)) - }; - - // Convert the changeset into tree sitter edits. - let edits = generate_edits(old_source, changeset); - - // This table allows inverse indexing of `layers`. - // That is by hashing a `Layer` you can find - // the `LayerId` of an existing equivalent `Layer` in `layers`. - // - // It is used to determine if a new layer exists for an injection - // or if an existing layer needs to be updated. - let mut layers_table = RawTable::with_capacity(self.layers.len()); - let layers_hasher = RandomState::new(); - // Use the edits to update all layers markers - fn point_add(a: Point, b: Point) -> Point { - if b.row > 0 { - Point::new(a.row.saturating_add(b.row), b.column) - } else { - Point::new(0, a.column.saturating_add(b.column)) - } - } - fn point_sub(a: Point, b: Point) -> Point { - if a.row > b.row { - Point::new(a.row.saturating_sub(b.row), a.column) - } else { - Point::new(0, a.column.saturating_sub(b.column)) - } - } - - for (layer_id, layer) in self.layers.iter_mut() { - // The root layer always covers the whole range (0..usize::MAX) - if layer.depth == 0 { - layer.flags = LayerUpdateFlags::MODIFIED; - continue; - } - - if !edits.is_empty() { - for range in &mut layer.ranges { - // Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720 - for edit in edits.iter().rev() { - let is_pure_insertion = edit.old_end_byte == edit.start_byte; - - // if edit is after range, skip - if edit.start_byte > range.end_byte { - // TODO: || (is_noop && edit.start_byte == range.end_byte) - continue; - } - - // if edit is before range, shift entire range by len - if edit.old_end_byte < range.start_byte { - range.start_byte = - edit.new_end_byte + (range.start_byte - edit.old_end_byte); - range.start_point = point_add( - edit.new_end_position, - point_sub(range.start_point, edit.old_end_position), - ); - - range.end_byte = edit - .new_end_byte - .saturating_add(range.end_byte - edit.old_end_byte); - range.end_point = point_add( - edit.new_end_position, - point_sub(range.end_point, edit.old_end_position), - ); - - layer.flags |= LayerUpdateFlags::MOVED; - } - // if the edit starts in the space before and extends into the range - else if edit.start_byte < range.start_byte { - range.start_byte = edit.new_end_byte; - range.start_point = edit.new_end_position; - - range.end_byte = range - .end_byte - .saturating_sub(edit.old_end_byte) - .saturating_add(edit.new_end_byte); - range.end_point = point_add( - edit.new_end_position, - point_sub(range.end_point, edit.old_end_position), - ); - layer.flags = LayerUpdateFlags::MODIFIED; - } - // If the edit is an insertion at the start of the tree, shift - else if edit.start_byte == range.start_byte && is_pure_insertion { - range.start_byte = edit.new_end_byte; - range.start_point = edit.new_end_position; - layer.flags |= LayerUpdateFlags::MOVED; - } else { - range.end_byte = range - .end_byte - .saturating_sub(edit.old_end_byte) - .saturating_add(edit.new_end_byte); - range.end_point = point_add( - edit.new_end_position, - point_sub(range.end_point, edit.old_end_position), - ); - layer.flags = LayerUpdateFlags::MODIFIED; - } - } - } - } - - let hash = layers_hasher.hash_one(layer); - // Safety: insert_no_grow is unsafe because it assumes that the table - // has enough capacity to hold additional elements. - // This is always the case as we reserved enough capacity above. - unsafe { layers_table.insert_no_grow(hash, layer_id) }; - } - - PARSER.with(|ts_parser| { - let ts_parser = &mut ts_parser.borrow_mut(); - ts_parser.parser.set_timeout_micros(1000 * 500); // half a second is pretty generours - let mut cursor = ts_parser.cursors.pop().unwrap_or_default(); - // TODO: might need to set cursor range - cursor.set_byte_range(0..usize::MAX); - cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT); - - let source_slice = source.slice(..); - - while let Some(layer_id) = queue.pop_front() { - let layer = &mut self.layers[layer_id]; - - // Mark the layer as touched - layer.flags |= LayerUpdateFlags::TOUCHED; - - // If a tree already exists, notify it of changes. - if let Some(tree) = &mut layer.tree { - if layer - .flags - .intersects(LayerUpdateFlags::MODIFIED | LayerUpdateFlags::MOVED) - { - for edit in edits.iter().rev() { - // Apply the edits in reverse. - // If we applied them in order then edit 1 would disrupt the positioning of edit 2. - tree.edit(edit); - } - } - - if layer.flags.contains(LayerUpdateFlags::MODIFIED) { - // Re-parse the tree. - layer.parse(&mut ts_parser.parser, source)?; - } - } else { - // always parse if this layer has never been parsed before - layer.parse(&mut ts_parser.parser, source)?; - } - - // Switch to an immutable borrow. - let layer = &self.layers[layer_id]; - - // Process injections. - let matches = cursor.matches( - &layer.config.injections_query, - layer.tree().root_node(), - RopeProvider(source_slice), - ); - let mut combined_injections = vec![ - (None, Vec::new(), IncludedChildren::default()); - layer.config.combined_injections_patterns.len() - ]; - let mut injections = Vec::new(); - let mut last_injection_end = 0; - for mat in matches { - let (injection_capture, content_node, included_children) = layer - .config - .injection_for_match(&layer.config.injections_query, &mat, source_slice); - - // in case this is a combined injection save it for more processing later - if let Some(combined_injection_idx) = layer - .config - .combined_injections_patterns - .iter() - .position(|&pattern| pattern == mat.pattern_index) - { - let entry = &mut combined_injections[combined_injection_idx]; - if injection_capture.is_some() { - entry.0 = injection_capture; - } - if let Some(content_node) = content_node { - if content_node.start_byte() >= last_injection_end { - entry.1.push(content_node); - last_injection_end = content_node.end_byte(); - } - } - entry.2 = included_children; - continue; - } - - // Explicitly remove this match so that none of its other captures will remain - // in the stream of captures. - mat.remove(); - - // If a language is found with the given name, then add a new language layer - // to the highlighted document. - if let (Some(injection_capture), Some(content_node)) = - (injection_capture, content_node) - { - if let Some(config) = (injection_callback)(&injection_capture) { - let ranges = - intersect_ranges(&layer.ranges, &[content_node], included_children); - - if !ranges.is_empty() { - if content_node.start_byte() < last_injection_end { - continue; - } - last_injection_end = content_node.end_byte(); - injections.push((config, ranges)); - } - } - } - } - - for (lang_name, content_nodes, included_children) in combined_injections { - if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) { - if let Some(config) = (injection_callback)(&lang_name) { - let ranges = - intersect_ranges(&layer.ranges, &content_nodes, included_children); - if !ranges.is_empty() { - injections.push((config, ranges)); - } - } - } - } - - let depth = layer.depth + 1; - // TODO: can't inline this since matches borrows self.layers - for (config, ranges) in injections { - let parent = Some(layer_id); - let new_layer = LanguageLayer { - tree: None, - config, - depth, - ranges, - flags: LayerUpdateFlags::empty(), - parent: None, - }; - - // Find an identical existing layer - let layer = layers_table - .get(layers_hasher.hash_one(&new_layer), |&it| { - self.layers[it] == new_layer - }) - .copied(); - - // ...or insert a new one. - let layer_id = layer.unwrap_or_else(|| self.layers.insert(new_layer)); - self.layers[layer_id].parent = parent; - - queue.push_back(layer_id); - } - - // TODO: pre-process local scopes at this time, rather than highlight? - // would solve problems with locals not working across boundaries - } - - // Return the cursor back in the pool. - ts_parser.cursors.push(cursor); - - // Reset all `LayerUpdateFlags` and remove all untouched layers - self.layers.retain(|_, layer| { - replace(&mut layer.flags, LayerUpdateFlags::empty()) - .contains(LayerUpdateFlags::TOUCHED) - }); - - Ok(()) - }) - } - - pub fn tree(&self) -> &Tree { - self.layers[self.root].tree() - } - - /// Iterate over the highlighted regions for a given slice of source code. - pub fn highlight_iter<'a>( - &'a self, - source: RopeSlice<'a>, - range: Option>, - cancellation_flag: Option<&'a AtomicUsize>, - ) -> impl Iterator> + 'a { - let mut layers = self - .layers - .iter() - .filter_map(|(_, layer)| { - // TODO: if range doesn't overlap layer range, skip it - - // Reuse a cursor from the pool if available. - let mut cursor = PARSER.with(|ts_parser| { - let highlighter = &mut ts_parser.borrow_mut(); - highlighter.cursors.pop().unwrap_or_default() - }); - - // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which - // prevents them from being moved. But both of these values are really just - // pointers, so it's actually ok to move them. - let cursor_ref = unsafe { - mem::transmute::<&mut tree_sitter::QueryCursor, &mut tree_sitter::QueryCursor>( - &mut cursor, - ) - }; - - // if reusing cursors & no range this resets to whole range - cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX)); - cursor_ref.set_match_limit(TREE_SITTER_MATCH_LIMIT); - - let mut captures = cursor_ref - .captures( - &layer.config.query, - layer.tree().root_node(), - RopeProvider(source), - ) - .peekable(); - - // If there's no captures, skip the layer - captures.peek()?; - - Some(HighlightIterLayer { - highlight_end_stack: Vec::new(), - scope_stack: vec![LocalScope { - inherits: false, - range: 0..usize::MAX, - local_defs: Vec::new(), - }], - cursor, - _tree: None, - captures: RefCell::new(captures), - config: layer.config.as_ref(), // TODO: just reuse `layer` - depth: layer.depth, // TODO: just reuse `layer` - }) - }) - .collect::>(); - - layers.sort_unstable_by_key(|layer| layer.sort_key()); - - let mut result = HighlightIter { + let mut scope_stack = Vec::::new(); + let mut highlights = Vec::new(); + let mut query_iter = self.query_iter::<_, (), _>( source, - byte_offset: range.map_or(0, |r| r.start), - cancellation_flag, - iter_count: 0, - layers, - next_event: None, - last_highlight_range: None, - }; - result.sort_layers(); - result - } - - pub fn tree_for_byte_range(&self, start: usize, end: usize) -> &Tree { - let mut container_id = self.root; - - for (layer_id, layer) in self.layers.iter() { - if layer.depth > self.layers[container_id].depth - && layer.contains_byte_range(start, end) - { - container_id = layer_id; - } - } - - self.layers[container_id].tree() - } - - pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option> { - self.tree_for_byte_range(start, end) - .root_node() - .named_descendant_for_byte_range(start, end) - } - - pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option> { - self.tree_for_byte_range(start, end) - .root_node() - .descendant_for_byte_range(start, end) - } - - pub fn walk(&self) -> TreeCursor<'_> { - // data structure to find the smallest range that contains a point - // when some of the ranges in the structure can overlap. - TreeCursor::new(&self.layers, self.root) - } - - // Commenting - // comment_strings_for_pos - // is_commented - - // Indentation - // suggested_indent_for_line_at_buffer_row - // suggested_indent_for_buffer_row - // indent_level_for_line - - // TODO: Folding -} - -bitflags! { - /// Flags that track the status of a layer - /// in the `Sytaxn::update` function - #[derive(Debug)] - struct LayerUpdateFlags : u32{ - const MODIFIED = 0b001; - const MOVED = 0b010; - const TOUCHED = 0b100; - } -} - -#[derive(Debug)] -pub struct LanguageLayer { - // mode - // grammar - pub config: Arc, - pub(crate) tree: Option, - pub ranges: Vec, - pub depth: u32, - flags: LayerUpdateFlags, - parent: Option, -} - -/// This PartialEq implementation only checks if that -/// two layers are theoretically identical (meaning they highlight the same text range with the same language). -/// It does not check whether the layers have the same internal treesitter -/// state. -impl PartialEq for LanguageLayer { - fn eq(&self, other: &Self) -> bool { - self.depth == other.depth - && self.config.language == other.config.language - && self.ranges == other.ranges - } -} - -/// Hash implementation belongs to PartialEq implementation above. -/// See its documentation for details. -impl Hash for LanguageLayer { - fn hash(&self, state: &mut H) { - self.depth.hash(state); - self.config.language.hash(state); - self.ranges.hash(state); - } -} - -impl LanguageLayer { - pub fn tree(&self) -> &Tree { - // TODO: no unwrap - self.tree.as_ref().unwrap() - } - - fn parse(&mut self, parser: &mut Parser, source: RopeSlice) -> Result<(), Error> { - parser - .set_included_ranges(&self.ranges) - .map_err(|_| Error::InvalidRanges)?; - - parser - .set_language(&self.config.language) - .map_err(|_| Error::InvalidLanguage)?; - - // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) }; - let tree = parser - .parse_with( - &mut |byte, _| { - if byte <= source.len_bytes() { - let (chunk, start_byte, _, _) = source.chunk_at_byte(byte); - &chunk.as_bytes()[byte - start_byte..] - } else { - // out of range - &[] - } - }, - self.tree.as_ref(), - ) - .ok_or(Error::Cancelled)?; - // unsafe { ts_parser.parser.set_cancellation_flag(None) }; - self.tree = Some(tree); - Ok(()) - } - - /// Whether the layer contains the given byte range. - /// - /// If the layer has multiple ranges (i.e. combined injections), the - /// given range is considered contained if it is within the start and - /// end bytes of the first and last ranges **and** if the given range - /// starts or ends within any of the layer's ranges. - fn contains_byte_range(&self, start: usize, end: usize) -> bool { - let layer_start = self - .ranges - .first() - .expect("ranges should not be empty") - .start_byte; - let layer_end = self - .ranges - .last() - .expect("ranges should not be empty") - .end_byte; - - layer_start <= start - && layer_end >= end - && self.ranges.iter().any(|range| { - let byte_range = range.start_byte..range.end_byte; - byte_range.contains(&start) || byte_range.contains(&end) - }) - } -} - -pub(crate) fn generate_edits( - old_text: RopeSlice, - changeset: &ChangeSet, -) -> Vec { - use Operation::*; - let mut old_pos = 0; - - let mut edits = Vec::new(); - - if changeset.changes.is_empty() { - return edits; - } - - let mut iter = changeset.changes.iter().peekable(); - - // TODO; this is a lot easier with Change instead of Operation. - - fn point_at_pos(text: RopeSlice, pos: usize) -> (usize, Point) { - let byte = text.char_to_byte(pos); // <- attempted to index past end - let line = text.char_to_line(pos); - let line_start_byte = text.line_to_byte(line); - let col = byte - line_start_byte; - - (byte, Point::new(line, col)) - } - - fn traverse(point: Point, text: &Tendril) -> Point { - let Point { - mut row, - mut column, - } = point; - - // TODO: there should be a better way here. - let mut chars = text.chars().peekable(); - while let Some(ch) = chars.next() { - if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) { - row += 1; - column = 0; - } else { - column += 1; - } - } - Point { row, column } - } - - while let Some(change) = iter.next() { - let len = match change { - Delete(i) | Retain(i) => *i, - Insert(_) => 0, - }; - let mut old_end = old_pos + len; - - match change { - Retain(_) => {} - Delete(_) => { - let (start_byte, start_position) = point_at_pos(old_text, old_pos); - let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end); - - // deletion - edits.push(tree_sitter::InputEdit { - start_byte, // old_pos to byte - old_end_byte, // old_end to byte - new_end_byte: start_byte, // old_pos to byte - start_position, // old pos to coords - old_end_position, // old_end to coords - new_end_position: start_position, // old pos to coords - }); - } - Insert(s) => { - let (start_byte, start_position) = point_at_pos(old_text, old_pos); - - // a subsequent delete means a replace, consume it - if let Some(Delete(len)) = iter.peek() { - old_end = old_pos + len; - let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end); - - iter.next(); - - // replacement - edits.push(tree_sitter::InputEdit { - start_byte, // old_pos to byte - old_end_byte, // old_end to byte - new_end_byte: start_byte + s.len(), // old_pos to byte + s.len() - start_position, // old pos to coords - old_end_position, // old_end to coords - new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) - }); - } else { - // insert - edits.push(tree_sitter::InputEdit { - start_byte, // old_pos to byte - old_end_byte: start_byte, // same - new_end_byte: start_byte + s.len(), // old_pos + s.len() - start_position, // old pos to coords - old_end_position: start_position, // same - new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) - }); - } - } - } - old_pos = old_end; - } - edits -} - -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::{iter, mem, ops, str}; -use tree_sitter::{ - Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, - QueryMatch, Range, TextProvider, Tree, -}; - -const CANCELLATION_CHECK_INTERVAL: usize = 100; - -/// Indicates which highlight should be applied to a region of source code. -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub struct Highlight(pub usize); - -/// Represents the reason why syntax highlighting failed. -#[derive(Debug, PartialEq, Eq)] -pub enum Error { - Cancelled, - InvalidLanguage, - InvalidRanges, - Unknown, -} - -/// Represents a single step in rendering a syntax-highlighted document. -#[derive(Copy, Clone, Debug)] -pub enum HighlightEvent { - Source { start: usize, end: usize }, - HighlightStart(Highlight), - HighlightEnd, -} - -/// Contains the data needed to highlight code written in a particular language. -/// -/// This struct is immutable and can be shared between threads. -#[derive(Debug)] -pub struct HighlightConfiguration { - pub language: Grammar, - pub query: Query, - injections_query: Query, - combined_injections_patterns: Vec, - highlights_pattern_index: usize, - highlight_indices: ArcSwap>>, - non_local_variable_patterns: Vec, - injection_content_capture_index: Option, - injection_language_capture_index: Option, - injection_filename_capture_index: Option, - injection_shebang_capture_index: Option, - local_scope_capture_index: Option, - local_def_capture_index: Option, - local_def_value_capture_index: Option, - local_ref_capture_index: Option, -} - -#[derive(Debug)] -struct LocalDef<'a> { - name: Cow<'a, str>, - value_range: ops::Range, - highlight: Option, -} - -#[derive(Debug)] -struct LocalScope<'a> { - inherits: bool, - range: ops::Range, - local_defs: Vec>, -} - -#[derive(Debug)] -struct HighlightIter<'a> { - source: RopeSlice<'a>, - byte_offset: usize, - cancellation_flag: Option<&'a AtomicUsize>, - layers: Vec>, - iter_count: usize, - next_event: Option, - last_highlight_range: Option<(usize, usize, u32)>, -} - -// Adapter to convert rope chunks to bytes -pub struct ChunksBytes<'a> { - chunks: ropey::iter::Chunks<'a>, -} -impl<'a> Iterator for ChunksBytes<'a> { - type Item = &'a [u8]; - fn next(&mut self) -> Option { - self.chunks.next().map(str::as_bytes) - } -} - -pub struct RopeProvider<'a>(pub RopeSlice<'a>); -impl<'a> TextProvider<&'a [u8]> for RopeProvider<'a> { - type I = ChunksBytes<'a>; - - fn text(&mut self, node: Node) -> Self::I { - let fragment = self.0.byte_slice(node.start_byte()..node.end_byte()); - ChunksBytes { - chunks: fragment.chunks(), - } - } -} - -struct HighlightIterLayer<'a> { - _tree: Option, - cursor: QueryCursor, - captures: RefCell, &'a [u8]>>>, - config: &'a HighlightConfiguration, - highlight_end_stack: Vec, - scope_stack: Vec>, - depth: u32, -} - -impl fmt::Debug for HighlightIterLayer<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("HighlightIterLayer").finish() - } -} - -impl HighlightConfiguration { - /// Creates a `HighlightConfiguration` for a given `Grammar` and set of highlighting - /// queries. - /// - /// # Parameters - /// - /// * `language` - The Tree-sitter `Grammar` that should be used for parsing. - /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This - /// should be non-empty, otherwise no syntax highlights will be added. - /// * `injections_query` - A string containing tree patterns for injecting other languages - /// into the document. This can be empty if no injections are desired. - /// * `locals_query` - A string containing tree patterns for tracking local variable - /// definitions and references. This can be empty if local variable tracking is not needed. - /// - /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method. - pub fn new( - language: Grammar, - highlights_query: &str, - injection_query: &str, - locals_query: &str, - ) -> Result { - // Concatenate the query strings, keeping track of the start offset of each section. - let mut query_source = String::new(); - query_source.push_str(locals_query); - let highlights_query_offset = query_source.len(); - query_source.push_str(highlights_query); - - // Construct a single query by concatenating the three query strings, but record the - // range of pattern indices that belong to each individual string. - let query = Query::new(&language, &query_source)?; - let mut highlights_pattern_index = 0; - for i in 0..(query.pattern_count()) { - let pattern_offset = query.start_byte_for_pattern(i); - if pattern_offset < highlights_query_offset { - highlights_pattern_index += 1; - } - } - - let injections_query = Query::new(&language, injection_query)?; - let combined_injections_patterns = (0..injections_query.pattern_count()) - .filter(|&i| { - injections_query - .property_settings(i) - .iter() - .any(|s| &*s.key == "injection.combined") - }) - .collect(); - - // Find all of the highlighting patterns that are disabled for nodes that - // have been identified as local variables. - let non_local_variable_patterns = (0..query.pattern_count()) - .map(|i| { - query - .property_predicates(i) - .iter() - .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local") - }) - .collect(); - - // Store the numeric ids for all of the special captures. - let mut injection_content_capture_index = None; - let mut injection_language_capture_index = None; - let mut injection_filename_capture_index = None; - let mut injection_shebang_capture_index = None; - let mut local_def_capture_index = None; - let mut local_def_value_capture_index = None; - let mut local_ref_capture_index = None; - let mut local_scope_capture_index = None; - for (i, name) in query.capture_names().iter().enumerate() { - let i = Some(i as u32); - match *name { - "local.definition" => local_def_capture_index = i, - "local.definition-value" => local_def_value_capture_index = i, - "local.reference" => local_ref_capture_index = i, - "local.scope" => local_scope_capture_index = i, - _ => {} - } - } - - for (i, name) in injections_query.capture_names().iter().enumerate() { - let i = Some(i as u32); - match *name { - "injection.content" => injection_content_capture_index = i, - "injection.language" => injection_language_capture_index = i, - "injection.filename" => injection_filename_capture_index = i, - "injection.shebang" => injection_shebang_capture_index = i, - _ => {} - } - } - - let highlight_indices = ArcSwap::from_pointee(vec![None; query.capture_names().len()]); - Ok(Self { - language, - query, - injections_query, - combined_injections_patterns, - highlights_pattern_index, - highlight_indices, - non_local_variable_patterns, - injection_content_capture_index, - injection_language_capture_index, - injection_filename_capture_index, - injection_shebang_capture_index, - local_scope_capture_index, - local_def_capture_index, - local_def_value_capture_index, - local_ref_capture_index, - }) - } - - /// Get a slice containing all of the highlight names used in the configuration. - pub fn names(&self) -> &[&str] { - self.query.capture_names() - } - - /// Set the list of recognized highlight names. - /// - /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated - /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of - /// these queries can choose to recognize highlights with different levels of specificity. - /// For example, the string `function.builtin` will match against `function.builtin.constructor` - /// but will not match `function.method.builtin` and `function.method`. - /// - /// When highlighting, results are returned as `Highlight` values, which contain the index - /// of the matched highlight this list of highlight names. - pub fn configure(&self, recognized_names: &[String]) { - let mut capture_parts = Vec::new(); - let indices: Vec<_> = self - .query - .capture_names() - .iter() - .map(move |capture_name| { - capture_parts.clear(); - capture_parts.extend(capture_name.split('.')); - - let mut best_index = None; - let mut best_match_len = 0; - for (i, recognized_name) in recognized_names.iter().enumerate() { - let mut len = 0; - let mut matches = true; - for (i, part) in recognized_name.split('.').enumerate() { - match capture_parts.get(i) { - Some(capture_part) if *capture_part == part => len += 1, - _ => { - matches = false; - break; - } - } - } - if matches && len > best_match_len { - best_index = Some(i); - best_match_len = len; - } - } - best_index.map(Highlight) - }) - .collect(); - - self.highlight_indices.store(Arc::new(indices)); - } - - fn injection_pair<'a>( - &self, - query_match: &QueryMatch<'a, 'a>, - source: RopeSlice<'a>, - ) -> (Option>, Option>) { - let mut injection_capture = None; - let mut content_node = None; - - for capture in query_match.captures { - let index = Some(capture.index); - if index == self.injection_language_capture_index { - injection_capture = Some(InjectionLanguageMarker::Name( - source.byte_slice(capture.node.byte_range()), - )); - } else if index == self.injection_filename_capture_index { - injection_capture = Some(InjectionLanguageMarker::Filename( - source.byte_slice(capture.node.byte_range()), - )); - } else if index == self.injection_shebang_capture_index { - let node_slice = source.byte_slice(capture.node.byte_range()); - - // some languages allow space and newlines before the actual string content - // so a shebang could be on either the first or second line - let lines = if let Ok(end) = node_slice.try_line_to_byte(2) { - node_slice.byte_slice(..end) - } else { - node_slice - }; - - static SHEBANG_REGEX: Lazy = - Lazy::new(|| rope::Regex::new(SHEBANG).unwrap()); - - injection_capture = SHEBANG_REGEX - .captures_iter(lines.regex_input()) - .map(|cap| { - let cap = lines.byte_slice(cap.get_group(1).unwrap().range()); - InjectionLanguageMarker::Shebang(cap) - }) - .next() - } else if index == self.injection_content_capture_index { - content_node = Some(capture.node); - } - } - (injection_capture, content_node) - } - - fn injection_for_match<'a>( - &self, - query: &'a Query, - query_match: &QueryMatch<'a, 'a>, - source: RopeSlice<'a>, - ) -> ( - Option>, - Option>, - IncludedChildren, - ) { - let (mut injection_capture, content_node) = self.injection_pair(query_match, source); - - let mut included_children = IncludedChildren::default(); - for prop in query.property_settings(query_match.pattern_index) { - match prop.key.as_ref() { - // In addition to specifying the language name via the text of a - // captured node, it can also be hard-coded via a `#set!` predicate - // that sets the injection.language key. - "injection.language" if injection_capture.is_none() => { - injection_capture = prop - .value - .as_deref() - .map(InjectionLanguageMarker::LanguageId); - } - - // By default, injections do not include the *children* of an - // `injection.content` node - only the ranges that belong to the - // node itself. This can be changed using a `#set!` predicate that - // sets the `injection.include-children` key. - "injection.include-children" => included_children = IncludedChildren::All, - - // Some queries might only exclude named children but include unnamed - // children in their `injection.content` node. This can be enabled using - // a `#set!` predicate that sets the `injection.include-unnamed-children` key. - "injection.include-unnamed-children" => { - included_children = IncludedChildren::Unnamed - } - _ => {} - } - } - - (injection_capture, content_node, included_children) - } -} - -impl HighlightIterLayer<'_> { - // First, sort scope boundaries by their byte offset in the document. At a - // given position, emit scope endings before scope beginnings. Finally, emit - // scope boundaries from deeper layers first. - fn sort_key(&self) -> Option<(usize, bool, isize)> { - let depth = -(self.depth as isize); - let next_start = self - .captures - .borrow_mut() - .peek() - .map(|(m, i)| m.captures[*i].node.start_byte()); - let next_end = self.highlight_end_stack.last().cloned(); - match (next_start, next_end) { - (Some(start), Some(end)) => { - if start < end { - Some((start, true, depth)) - } else { - Some((end, false, depth)) - } - } - (Some(i), None) => Some((i, true, depth)), - (None, Some(j)) => Some((j, false, depth)), - _ => None, - } - } -} - -#[derive(Clone)] -enum IncludedChildren { - None, - All, - Unnamed, -} - -impl Default for IncludedChildren { - fn default() -> Self { - Self::None - } -} - -// Compute the ranges that should be included when parsing an injection. -// This takes into account three things: -// * `parent_ranges` - The ranges must all fall within the *current* layer's ranges. -// * `nodes` - Every injection takes place within a set of nodes. The injection ranges -// are the ranges of those nodes. -// * `includes_children` - For some injections, the content nodes' children should be -// excluded from the nested document, so that only the content nodes' *own* content -// is reparsed. For other injections, the content nodes' entire ranges should be -// reparsed, including the ranges of their children. -fn intersect_ranges( - parent_ranges: &[Range], - nodes: &[Node], - included_children: IncludedChildren, -) -> Vec { - let mut cursor = nodes[0].walk(); - let mut result = Vec::new(); - let mut parent_range_iter = parent_ranges.iter(); - let mut parent_range = parent_range_iter - .next() - .expect("Layers should only be constructed with non-empty ranges vectors"); - for node in nodes.iter() { - let mut preceding_range = Range { - start_byte: 0, - start_point: Point::new(0, 0), - end_byte: node.start_byte(), - end_point: node.start_position(), - }; - let following_range = Range { - start_byte: node.end_byte(), - start_point: node.end_position(), - end_byte: usize::MAX, - end_point: Point::new(usize::MAX, usize::MAX), - }; - - for excluded_range in node - .children(&mut cursor) - .filter_map(|child| match included_children { - IncludedChildren::None => Some(child.range()), - IncludedChildren::All => None, - IncludedChildren::Unnamed => { - if child.is_named() { - Some(child.range()) - } else { - None - } - } - }) - .chain([following_range].iter().cloned()) - { - let mut range = Range { - start_byte: preceding_range.end_byte, - start_point: preceding_range.end_point, - end_byte: excluded_range.start_byte, - end_point: excluded_range.start_point, - }; - preceding_range = excluded_range; + |lang| loader.rainbow_query(lang).map(|q| &q.query), + range, + ); - if range.end_byte < parent_range.start_byte { + while let Some(event) = query_iter.next() { + let QueryIterEvent::Match(mat) = event else { continue; - } - - while parent_range.start_byte <= range.end_byte { - if parent_range.end_byte > range.start_byte { - if range.start_byte < parent_range.start_byte { - range.start_byte = parent_range.start_byte; - range.start_point = parent_range.start_point; - } + }; - if parent_range.end_byte < range.end_byte { - if range.start_byte < parent_range.end_byte { - result.push(Range { - start_byte: range.start_byte, - start_point: range.start_point, - end_byte: parent_range.end_byte, - end_point: parent_range.end_point, - }); - } - range.start_byte = parent_range.end_byte; - range.start_point = parent_range.end_point; - } else { - if range.start_byte < range.end_byte { - result.push(range); - } - break; - } - } + let rainbow_query = loader + .rainbow_query(query_iter.current_language()) + .expect("language must have a rainbow query to emit matches"); - if let Some(next_range) = parent_range_iter.next() { - parent_range = next_range; - } else { - return result; - } + let byte_range = mat.node.byte_range(); + // Pop any scopes that end before this capture begins. + while scope_stack + .last() + .is_some_and(|scope| byte_range.start >= scope.end) + { + scope_stack.pop(); } - } - } - result -} -impl HighlightIter<'_> { - fn emit_event( - &mut self, - offset: usize, - event: Option, - ) -> Option> { - let result; - if self.byte_offset < offset { - result = Some(Ok(HighlightEvent::Source { - start: self.byte_offset, - end: offset, - })); - self.byte_offset = offset; - self.next_event = event; - } else { - result = event.map(Ok); - } - self.sort_layers(); - result - } - - fn sort_layers(&mut self) { - while !self.layers.is_empty() { - if let Some(sort_key) = self.layers[0].sort_key() { - let mut i = 0; - while i + 1 < self.layers.len() { - if let Some(next_offset) = self.layers[i + 1].sort_key() { - if next_offset < sort_key { - i += 1; - continue; - } + let capture = Some(mat.capture); + if capture == rainbow_query.scope_capture { + scope_stack.push(RainbowScope { + end: byte_range.end, + node: if rainbow_query + .include_children_patterns + .contains(&mat.pattern) + { + None } else { - let layer = self.layers.remove(i + 1); - PARSER.with(|ts_parser| { - let highlighter = &mut ts_parser.borrow_mut(); - highlighter.cursors.push(layer.cursor); - }); + Some(mat.node.clone()) + }, + highlight: Highlight::new((scope_stack.len() % rainbow_length) as u32), + }); + } else if capture == rainbow_query.bracket_capture { + if let Some(scope) = scope_stack.last() { + if !scope + .node + .as_ref() + .is_some_and(|node| mat.node.parent().as_ref() != Some(node)) + { + let start = source + .byte_to_char(source.floor_char_boundary(byte_range.start as usize)); + let end = + source.byte_to_char(source.ceil_char_boundary(byte_range.end as usize)); + highlights.push((scope.highlight, start..end)); } - break; } - if i > 0 { - self.layers[0..(i + 1)].rotate_left(1); - } - break; - } else { - let layer = self.layers.remove(0); - PARSER.with(|ts_parser| { - let highlighter = &mut ts_parser.borrow_mut(); - highlighter.cursors.push(layer.cursor); - }); } } + + OverlayHighlights::Heterogenous { highlights } } } -impl Iterator for HighlightIter<'_> { - type Item = Result; - - fn next(&mut self) -> Option { - 'main: loop { - // If we've already determined the next highlight boundary, just return it. - if let Some(e) = self.next_event.take() { - return Some(Ok(e)); - } - - // Periodically check for cancellation, returning `Cancelled` error if the - // cancellation flag was flipped. - if let Some(cancellation_flag) = self.cancellation_flag { - self.iter_count += 1; - if self.iter_count >= CANCELLATION_CHECK_INTERVAL { - self.iter_count = 0; - if cancellation_flag.load(Ordering::Relaxed) != 0 { - return Some(Err(Error::Cancelled)); - } - } - } - - // If none of the layers have any more highlight boundaries, terminate. - if self.layers.is_empty() { - let len = self.source.len_bytes(); - return if self.byte_offset < len { - let result = Some(Ok(HighlightEvent::Source { - start: self.byte_offset, - end: len, - })); - self.byte_offset = len; - result - } else { - None - }; - } +pub type Highlighter<'a> = highlighter::Highlighter<'a, 'a, Loader>; - // Get the next capture from whichever layer has the earliest highlight boundary. - let range; - let layer = &mut self.layers[0]; - let captures = layer.captures.get_mut(); - if let Some((next_match, capture_index)) = captures.peek() { - let next_capture = next_match.captures[*capture_index]; - range = next_capture.node.byte_range(); - - // If any previous highlight ends before this node starts, then before - // processing this capture, emit the source code up until the end of the - // previous highlight, and an end event for that highlight. - if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { - if end_byte <= range.start { - layer.highlight_end_stack.pop(); - return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); - } - } - } - // If there are no more captures, then emit any remaining highlight end events. - // And if there are none of those, then just advance to the end of the document. - else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { - layer.highlight_end_stack.pop(); - return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); - } else { - return self.emit_event(self.source.len_bytes(), None); - }; +fn generate_edits(old_text: RopeSlice, changeset: &ChangeSet) -> Vec { + use crate::Operation::*; + use tree_sitter::Point; - let (mut match_, capture_index) = captures.next().unwrap(); - let mut capture = match_.captures[capture_index]; + let mut old_pos = 0; - // Remove from the local scope stack any local scopes that have already ended. - while range.start > layer.scope_stack.last().unwrap().range.end { - layer.scope_stack.pop(); - } + let mut edits = Vec::new(); - // If this capture is for tracking local variables, then process the - // local variable info. - let mut reference_highlight = None; - let mut definition_highlight = None; - while match_.pattern_index < layer.config.highlights_pattern_index { - // If the node represents a local scope, push a new local scope onto - // the scope stack. - if Some(capture.index) == layer.config.local_scope_capture_index { - definition_highlight = None; - let mut scope = LocalScope { - inherits: true, - range: range.clone(), - local_defs: Vec::new(), - }; - for prop in layer.config.query.property_settings(match_.pattern_index) { - if let "local.scope-inherits" = prop.key.as_ref() { - scope.inherits = - prop.value.as_ref().map_or(true, |r| r.as_ref() == "true"); - } - } - layer.scope_stack.push(scope); - } - // If the node represents a definition, add a new definition to the - // local scope at the top of the scope stack. - else if Some(capture.index) == layer.config.local_def_capture_index { - reference_highlight = None; - let scope = layer.scope_stack.last_mut().unwrap(); - - let mut value_range = 0..0; - for capture in match_.captures { - if Some(capture.index) == layer.config.local_def_value_capture_index { - value_range = capture.node.byte_range(); - } - } + if changeset.changes.is_empty() { + return edits; + } - let name = byte_range_to_str(range.clone(), self.source); - scope.local_defs.push(LocalDef { - name, - value_range, - highlight: None, - }); - definition_highlight = scope.local_defs.last_mut().map(|s| &mut s.highlight); - } - // If the node represents a reference, then try to find the corresponding - // definition in the scope stack. - else if Some(capture.index) == layer.config.local_ref_capture_index - && definition_highlight.is_none() - { - definition_highlight = None; - let name = byte_range_to_str(range.clone(), self.source); - for scope in layer.scope_stack.iter().rev() { - if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| { - if def.name == name && range.start >= def.value_range.end { - Some(def.highlight) - } else { - None - } - }) { - reference_highlight = highlight; - break; - } - if !scope.inherits { - break; - } - } - } + let mut iter = changeset.changes.iter().peekable(); - // Continue processing any additional matches for the same node. - if let Some((next_match, next_capture_index)) = captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - capture = next_capture; - match_ = captures.next().unwrap().0; - continue; - } - } + // TODO; this is a lot easier with Change instead of Operation. + while let Some(change) = iter.next() { + let len = match change { + Delete(i) | Retain(i) => *i, + Insert(_) => 0, + }; + let mut old_end = old_pos + len; - self.sort_layers(); - continue 'main; - } + match change { + Retain(_) => {} + Delete(_) => { + let start_byte = old_text.char_to_byte(old_pos) as u32; + let old_end_byte = old_text.char_to_byte(old_end) as u32; - // Otherwise, this capture must represent a highlight. - // If this exact range has already been highlighted by an earlier pattern, or by - // a different layer, then skip over this one. - if let Some((last_start, last_end, last_depth)) = self.last_highlight_range { - if range.start == last_start && range.end == last_end && layer.depth < last_depth { - self.sort_layers(); - continue 'main; - } + // deletion + edits.push(InputEdit { + start_byte, // old_pos to byte + old_end_byte, // old_end to byte + new_end_byte: start_byte, // old_pos to byte + start_point: Point::ZERO, + old_end_point: Point::ZERO, + new_end_point: Point::ZERO, + }); } + Insert(s) => { + let start_byte = old_text.char_to_byte(old_pos) as u32; - // If the current node was found to be a local variable, then skip over any - // highlighting patterns that are disabled for local variables. - if definition_highlight.is_some() || reference_highlight.is_some() { - while layer.config.non_local_variable_patterns[match_.pattern_index] { - match_.remove(); - if let Some((next_match, next_capture_index)) = captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - capture = next_capture; - match_ = captures.next().unwrap().0; - continue; - } - } + // a subsequent delete means a replace, consume it + if let Some(Delete(len)) = iter.peek() { + old_end = old_pos + len; + let old_end_byte = old_text.char_to_byte(old_end) as u32; - self.sort_layers(); - continue 'main; - } - } + iter.next(); - // Use the last capture found for the current node, skipping over any - // highlight patterns that also match this node. Captures - // for a given node are ordered by pattern index, so these subsequent - // captures are guaranteed to be for highlighting, not injections or - // local variables. - while let Some((next_match, next_capture_index)) = captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - match_.remove(); - capture = next_capture; - match_ = captures.next().unwrap().0; + // replacement + edits.push(InputEdit { + start_byte, // old_pos to byte + old_end_byte, // old_end to byte + new_end_byte: start_byte + s.len() as u32, // old_pos to byte + s.len() + start_point: Point::ZERO, + old_end_point: Point::ZERO, + new_end_point: Point::ZERO, + }); } else { - break; + // insert + edits.push(InputEdit { + start_byte, // old_pos to byte + old_end_byte: start_byte, // same + new_end_byte: start_byte + s.len() as u32, // old_pos + s.len() + start_point: Point::ZERO, + old_end_point: Point::ZERO, + new_end_point: Point::ZERO, + }); } } + } + old_pos = old_end; + } + edits +} - let current_highlight = layer.config.highlight_indices.load()[capture.index as usize]; - - // If this node represents a local definition, then store the current - // highlight value on the local scope entry representing this node. - if let Some(definition_highlight) = definition_highlight { - *definition_highlight = current_highlight; - } +/// A set of "overlay" highlights and ranges they apply to. +/// +/// As overlays, the styles for the given `Highlight`s are merged on top of the syntax highlights. +#[derive(Debug)] +pub enum OverlayHighlights { + /// All highlights use a single `Highlight`. + /// + /// Note that, currently, all ranges are assumed to be non-overlapping. This could change in + /// the future though. + Homogeneous { + highlight: Highlight, + ranges: Vec>, + }, + /// A collection of different highlights for given ranges. + /// + /// Note that the ranges **must be non-overlapping**. + Heterogenous { + highlights: Vec<(Highlight, ops::Range)>, + }, +} - // Emit a scope start event and push the node's end position to the stack. - if let Some(highlight) = reference_highlight.or(current_highlight) { - self.last_highlight_range = Some((range.start, range.end, layer.depth)); - layer.highlight_end_stack.push(range.end); - return self - .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight))); - } +impl OverlayHighlights { + pub fn single(highlight: Highlight, range: ops::Range) -> Self { + Self::Homogeneous { + highlight, + ranges: vec![range], + } + } - self.sort_layers(); + fn is_empty(&self) -> bool { + match self { + Self::Homogeneous { ranges, .. } => ranges.is_empty(), + Self::Heterogenous { highlights } => highlights.is_empty(), } } } -#[derive(Debug, Clone)] -pub enum InjectionLanguageMarker<'a> { - /// The language is specified by `LanguageConfiguration`'s `language_id` field. +#[derive(Debug)] +struct Overlay { + highlights: OverlayHighlights, + /// The position of the highlighter into the Vec of ranges of the overlays. /// - /// This marker is used when a pattern sets the `injection.language` property, for example - /// `(#set! injection.language "rust")`. - LanguageId(&'a str), - /// The language is specified in the document and captured by `@injection.language`. + /// Used by the `OverlayHighlighter`. + idx: usize, + /// The currently active highlight (and the ending character index) for this overlay. /// - /// This is used for markdown code fences for example. While the `LanguageId` variant can be - /// looked up by finding the language config that sets an `language_id`, this variant contains - /// text from the document being highlighted, so the text is checked against each language's - /// `injection_regex`. - Name(RopeSlice<'a>), - Filename(RopeSlice<'a>), - Shebang(RopeSlice<'a>), + /// Used by the `OverlayHighlighter`. + active_highlight: Option<(Highlight, usize)>, } -const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)"; - -pub struct Merge { - iter: I, - spans: Box)>>, +impl Overlay { + fn new(highlights: OverlayHighlights) -> Option { + (!highlights.is_empty()).then_some(Self { + highlights, + idx: 0, + active_highlight: None, + }) + } - next_event: Option, - next_span: Option<(usize, std::ops::Range)>, + fn current(&self) -> Option<(Highlight, ops::Range)> { + match &self.highlights { + OverlayHighlights::Homogeneous { highlight, ranges } => ranges + .get(self.idx) + .map(|range| (*highlight, range.clone())), + OverlayHighlights::Heterogenous { highlights } => highlights.get(self.idx).cloned(), + } + } - queue: Vec, + fn start(&self) -> Option { + match &self.highlights { + OverlayHighlights::Homogeneous { ranges, .. } => { + ranges.get(self.idx).map(|range| range.start) + } + OverlayHighlights::Heterogenous { highlights } => highlights + .get(self.idx) + .map(|(_highlight, range)| range.start), + } + } } -/// Merge a list of spans into the highlight event stream. -pub fn merge>( - iter: I, - spans: Vec<(usize, std::ops::Range)>, -) -> Merge { - let spans = Box::new(spans.into_iter()); - let mut merge = Merge { - iter, - spans, - next_event: None, - next_span: None, - queue: Vec::new(), - }; - merge.next_event = merge.iter.next(); - merge.next_span = merge.spans.next(); - merge +/// A collection of highlights to apply when rendering which merge on top of syntax highlights. +#[derive(Debug)] +pub struct OverlayHighlighter { + overlays: Vec, + next_highlight_start: usize, + next_highlight_end: usize, } -impl> Iterator for Merge { - type Item = HighlightEvent; - fn next(&mut self) -> Option { - use HighlightEvent::*; - if let Some(event) = self.queue.pop() { - return Some(event); +impl OverlayHighlighter { + pub fn new(overlays: impl IntoIterator) -> Self { + let overlays: Vec<_> = overlays.into_iter().filter_map(Overlay::new).collect(); + let next_highlight_start = overlays + .iter() + .filter_map(|overlay| overlay.start()) + .min() + .unwrap_or(usize::MAX); + + Self { + overlays, + next_highlight_start, + next_highlight_end: usize::MAX, } + } - loop { - match (self.next_event, &self.next_span) { - // this happens when range is partially or fully offscreen - (Some(Source { start, .. }), Some((span, range))) if start > range.start => { - if start > range.end { - self.next_span = self.spans.next(); - } else { - self.next_span = Some((*span, start..range.end)); - }; + /// The current position in the overlay highlights. + /// + /// This method is meant to be used when treating this type as a cursor over the overlay + /// highlights. + /// + /// `usize::MAX` is returned when there are no more overlay highlights. + pub fn next_event_offset(&self) -> usize { + self.next_highlight_start.min(self.next_highlight_end) + } + + pub fn advance(&mut self) -> (HighlightEvent, impl Iterator + '_) { + let mut refresh = false; + let prev_stack_size = self + .overlays + .iter() + .filter(|overlay| overlay.active_highlight.is_some()) + .count(); + let pos = self.next_event_offset(); + + if self.next_highlight_end == pos { + for overlay in self.overlays.iter_mut() { + if overlay + .active_highlight + .is_some_and(|(_highlight, end)| end == pos) + { + overlay.active_highlight.take(); } - _ => break, } + + refresh = true; } - match (self.next_event, &self.next_span) { - (Some(HighlightStart(i)), _) => { - self.next_event = self.iter.next(); - Some(HighlightStart(i)) - } - (Some(HighlightEnd), _) => { - self.next_event = self.iter.next(); - Some(HighlightEnd) - } - (Some(Source { start, end }), Some((_, range))) if start < range.start => { - let intersect = range.start.min(end); - let event = Source { - start, - end: intersect, + while self.next_highlight_start == pos { + let mut activated_idx = usize::MAX; + for (idx, overlay) in self.overlays.iter_mut().enumerate() { + let Some((highlight, range)) = overlay.current() else { + continue; }; + if range.start != self.next_highlight_start { + continue; + } - if end == intersect { - // the event is complete - self.next_event = self.iter.next(); - } else { - // subslice the event - self.next_event = Some(Source { - start: intersect, - end, - }); - }; + // If this overlay has a highlight at this start index, set its active highlight + // and increment the cursor position within the overlay. + overlay.active_highlight = Some((highlight, range.end)); + overlay.idx += 1; - Some(event) + activated_idx = activated_idx.min(idx); } - (Some(Source { start, end }), Some((span, range))) if start == range.start => { - let intersect = range.end.min(end); - let event = HighlightStart(Highlight(*span)); - - // enqueue in reverse order - self.queue.push(HighlightEnd); - self.queue.push(Source { - start, - end: intersect, - }); - if end == intersect { - // the event is complete - self.next_event = self.iter.next(); - } else { - // subslice the event - self.next_event = Some(Source { - start: intersect, - end, - }); - }; + // If `self.next_highlight_start == pos` that means that some overlay was ready to + // emit a highlight, so `activated_idx` must have been set to an existing index. + assert!( + (0..self.overlays.len()).contains(&activated_idx), + "expected an overlay to highlight (at pos {pos}, there are {} overlays)", + self.overlays.len() + ); + + // If any overlays are active after the (lowest) one which was just activated, the + // highlights need to be refreshed. + refresh |= self.overlays[activated_idx..] + .iter() + .any(|overlay| overlay.active_highlight.is_some()); + + self.next_highlight_start = self + .overlays + .iter() + .filter_map(|overlay| overlay.start()) + .min() + .unwrap_or(usize::MAX); + } - if intersect == range.end { - self.next_span = self.spans.next(); - } else { - self.next_span = Some((*span, intersect..range.end)); - } + self.next_highlight_end = self + .overlays + .iter() + .filter_map(|overlay| Some(overlay.active_highlight?.1)) + .min() + .unwrap_or(usize::MAX); - Some(event) - } - (Some(event), None) => { - self.next_event = self.iter.next(); - Some(event) - } - // Can happen if cursor at EOF and/or diagnostic reaches past the end. - // We need to actually emit events for the cursor-at-EOF situation, - // even though the range is past the end of the text. This needs to be - // handled appropriately by the drawing code by not assuming that - // all `Source` events point to valid indices in the rope. - (None, Some((span, range))) => { - let event = HighlightStart(Highlight(*span)); - self.queue.push(HighlightEnd); - self.queue.push(Source { - start: range.start, - end: range.end, - }); - self.next_span = self.spans.next(); - Some(event) - } - (None, None) => None, - e => unreachable!("{:?}", e), + let (event, start) = if refresh { + (HighlightEvent::Refresh, 0) + } else { + (HighlightEvent::Push, prev_stack_size) + }; + + ( + event, + self.overlays + .iter() + .flat_map(|overlay| overlay.active_highlight) + .map(|(highlight, _end)| highlight) + .skip(start), + ) + } +} + +#[derive(Debug)] +pub enum CapturedNode<'a> { + Single(Node<'a>), + /// Guaranteed to be not empty + Grouped(Vec>), +} + +impl CapturedNode<'_> { + pub fn start_byte(&self) -> usize { + match self { + Self::Single(n) => n.start_byte() as usize, + Self::Grouped(ns) => ns[0].start_byte() as usize, + } + } + + pub fn end_byte(&self) -> usize { + match self { + Self::Single(n) => n.end_byte() as usize, + Self::Grouped(ns) => ns.last().unwrap().end_byte() as usize, } } + + pub fn byte_range(&self) -> ops::Range { + self.start_byte()..self.end_byte() + } } -fn node_is_visible(node: &Node) -> bool { - node.is_missing() || (node.is_named() && node.language().node_kind_is_visible(node.kind_id())) +#[derive(Debug)] +pub struct TextObjectQuery { + query: Query, } -fn format_anonymous_node_kind(kind: &str) -> Cow { - if kind.contains('"') { - Cow::Owned(kind.replace('"', "\\\"")) - } else { - Cow::Borrowed(kind) +impl TextObjectQuery { + pub fn new(query: Query) -> Self { + Self { query } + } + + /// Run the query on the given node and return sub nodes which match given + /// capture ("function.inside", "class.around", etc). + /// + /// Captures may contain multiple nodes by using quantifiers (+, *, etc), + /// and support for this is partial and could use improvement. + /// + /// ```query + /// (comment)+ @capture + /// + /// ; OR + /// ( + /// (comment)* + /// . + /// (function) + /// ) @capture + /// ``` + pub fn capture_nodes<'a>( + &'a self, + capture_name: &str, + node: &Node<'a>, + slice: RopeSlice<'a>, + ) -> Option>> { + self.capture_nodes_any(&[capture_name], node, slice) + } + + /// Find the first capture that exists out of all given `capture_names` + /// and return sub nodes that match this capture. + pub fn capture_nodes_any<'a>( + &'a self, + capture_names: &[&str], + node: &Node<'a>, + slice: RopeSlice<'a>, + ) -> Option>> { + let capture = capture_names + .iter() + .find_map(|cap| self.query.get_capture(cap))?; + + let mut cursor = InactiveQueryCursor::new(); + cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT); + let mut cursor = cursor.execute_query(&self.query, node, RopeInput::new(slice)); + let capture_node = iter::from_fn(move || { + let (mat, _) = cursor.next_matched_node()?; + Some(mat.nodes_for_capture(capture).cloned().collect()) + }) + .filter_map(move |nodes: Vec<_>| { + if nodes.len() > 1 { + Some(CapturedNode::Grouped(nodes)) + } else { + nodes.into_iter().map(CapturedNode::Single).next() + } + }); + Some(capture_node) } } @@ -2719,6 +992,18 @@ pub fn pretty_print_tree(fmt: &mut W, node: Node) -> fmt::Result } } +fn node_is_visible(node: &Node) -> bool { + node.is_missing() || (node.is_named() && node.grammar().node_kind_is_visible(node.kind_id())) +} + +fn format_anonymous_node_kind(kind: &str) -> Cow { + if kind.contains('"') { + Cow::Owned(kind.replace('"', "\\\"")) + } else { + Cow::Borrowed(kind) + } +} + fn pretty_print_tree_impl( fmt: &mut W, cursor: &mut tree_sitter::TreeCursor, @@ -2767,11 +1052,66 @@ fn pretty_print_tree_impl( Ok(()) } +/// Finds the child of `node` which contains the given byte range. + +pub fn child_for_byte_range<'a>(node: &Node<'a>, range: ops::Range) -> Option> { + for child in node.children() { + let child_range = child.byte_range(); + + if range.start >= child_range.start && range.end <= child_range.end { + return Some(child); + } + } + + None +} + +#[derive(Debug)] +pub struct RainbowQuery { + query: Query, + include_children_patterns: HashSet, + scope_capture: Option, + bracket_capture: Option, +} + +impl RainbowQuery { + fn new(grammar: Grammar, source: &str) -> Result { + let mut include_children_patterns = HashSet::default(); + + let query = Query::new(grammar, source, |pattern, predicate| match predicate { + UserPredicate::SetProperty { + key: "rainbow.include-children", + val, + } => { + if val.is_some() { + return Err( + "property 'rainbow.include-children' does not take an argument".into(), + ); + } + include_children_patterns.insert(pattern); + Ok(()) + } + _ => Err(InvalidPredicateError::unknown(predicate)), + })?; + + Ok(Self { + include_children_patterns, + scope_capture: query.get_capture("rainbow.scope"), + bracket_capture: query.get_capture("rainbow.bracket"), + query, + }) + } +} + #[cfg(test)] mod test { + use once_cell::sync::Lazy; + use super::*; use crate::{Rope, Transaction}; + static LOADER: Lazy = Lazy::new(|| crate::config::user_lang_loader().unwrap()); + #[test] fn test_textobject_queries() { let query_str = r#" @@ -2786,29 +1126,16 @@ mod test { "#, ); - let loader = Loader::new(Configuration { - language: vec![], - language_server: HashMap::new(), - }) - .unwrap(); - let language = get_language("rust").unwrap(); - - let query = Query::new(&language, query_str).unwrap(); - let textobject = TextObjectQuery { query }; - let mut cursor = QueryCursor::new(); - - let config = HighlightConfiguration::new(language, "", "", "").unwrap(); - let syntax = Syntax::new( - source.slice(..), - Arc::new(config), - Arc::new(ArcSwap::from_pointee(loader)), - ) - .unwrap(); + let language = LOADER.language_for_name("rust").unwrap(); + let grammar = LOADER.get_config(language).unwrap().grammar; + let query = Query::new(grammar, query_str, |_, _| Ok(())).unwrap(); + let textobject = TextObjectQuery::new(query); + let syntax = Syntax::new(source.slice(..), language, &LOADER).unwrap(); let root = syntax.tree().root_node(); - let mut test = |capture, range| { + let test = |capture, range| { let matches: Vec<_> = textobject - .capture_nodes(capture, root, source.slice(..), &mut cursor) + .capture_nodes(capture, &root, source.slice(..)) .unwrap() .collect(); @@ -2827,83 +1154,9 @@ mod test { // test("multiple_nodes_grouped", 1..37); } - #[test] - fn test_parser() { - let highlight_names: Vec = [ - "attribute", - "constant", - "function.builtin", - "function", - "keyword", - "operator", - "property", - "punctuation", - "punctuation.bracket", - "punctuation.delimiter", - "string", - "string.special", - "tag", - "type", - "type.builtin", - "variable", - "variable.builtin", - "variable.parameter", - ] - .iter() - .cloned() - .map(String::from) - .collect(); - - let loader = Loader::new(Configuration { - language: vec![], - language_server: HashMap::new(), - }) - .unwrap(); - - let language = get_language("rust").unwrap(); - let config = HighlightConfiguration::new( - language, - &std::fs::read_to_string("../runtime/grammars/sources/rust/queries/highlights.scm") - .unwrap(), - &std::fs::read_to_string("../runtime/grammars/sources/rust/queries/injections.scm") - .unwrap(), - "", // locals.scm - ) - .unwrap(); - config.configure(&highlight_names); - - let source = Rope::from_str( - " - struct Stuff {} - fn main() {} - ", - ); - let syntax = Syntax::new( - source.slice(..), - Arc::new(config), - Arc::new(ArcSwap::from_pointee(loader)), - ) - .unwrap(); - let tree = syntax.tree(); - let root = tree.root_node(); - assert_eq!(root.kind(), "source_file"); - - assert_eq!( - root.to_sexp(), - concat!( - "(source_file ", - "(struct_item name: (type_identifier) body: (field_declaration_list)) ", - "(function_item name: (identifier) parameters: (parameters) body: (block)))" - ) - ); - - let struct_node = root.child(0).unwrap(); - assert_eq!(struct_node.kind(), "struct_item"); - } - #[test] fn test_input_edits() { - use tree_sitter::InputEdit; + use tree_sitter::{InputEdit, Point}; let doc = Rope::from("hello world!\ntest 123"); let transaction = Transaction::change( @@ -2920,17 +1173,17 @@ mod test { start_byte: 6, old_end_byte: 11, new_end_byte: 10, - start_position: Point { row: 0, column: 6 }, - old_end_position: Point { row: 0, column: 11 }, - new_end_position: Point { row: 0, column: 10 } + start_point: Point::ZERO, + old_end_point: Point::ZERO, + new_end_point: Point::ZERO }, InputEdit { start_byte: 12, old_end_byte: 17, new_end_byte: 12, - start_position: Point { row: 0, column: 12 }, - old_end_position: Point { row: 1, column: 4 }, - new_end_position: Point { row: 0, column: 12 } + start_point: Point::ZERO, + old_end_point: Point::ZERO, + new_end_point: Point::ZERO } ] ); @@ -2949,9 +1202,9 @@ mod test { start_byte: 8, old_end_byte: 8, new_end_byte: 14, - start_position: Point { row: 0, column: 8 }, - old_end_position: Point { row: 0, column: 8 }, - new_end_position: Point { row: 0, column: 14 } + start_point: Point::ZERO, + old_end_point: Point::ZERO, + new_end_point: Point::ZERO }] ); } @@ -2965,26 +1218,13 @@ mod test { end: usize, ) { let source = Rope::from_str(source); - - let loader = Loader::new(Configuration { - language: vec![], - language_server: HashMap::new(), - }) - .unwrap(); - let language = get_language(language_name).unwrap(); - - let config = HighlightConfiguration::new(language, "", "", "").unwrap(); - let syntax = Syntax::new( - source.slice(..), - Arc::new(config), - Arc::new(ArcSwap::from_pointee(loader)), - ) - .unwrap(); + let language = LOADER.language_for_name(language_name).unwrap(); + let syntax = Syntax::new(source.slice(..), language, &LOADER).unwrap(); let root = syntax .tree() .root_node() - .descendant_for_byte_range(start, end) + .descendant_for_byte_range(start as u32, end as u32) .unwrap(); let mut output = String::new(); @@ -3052,14 +1292,4 @@ mod test { source.len(), ); } - - #[test] - fn test_load_runtime_file() { - // Test to make sure we can load some data from the runtime directory. - let contents = load_runtime_file("rust", "indents.scm").unwrap(); - assert!(!contents.is_empty()); - - let results = load_runtime_file("rust", "does-not-exist"); - assert!(results.is_err()); - } } diff --git a/helix-core/src/syntax/config.rs b/helix-core/src/syntax/config.rs new file mode 100644 index 000000000000..2152a70b0992 --- /dev/null +++ b/helix-core/src/syntax/config.rs @@ -0,0 +1,619 @@ +use crate::{auto_pairs::AutoPairs, diagnostic::Severity, Language}; + +use globset::GlobSet; +use helix_stdx::rope; +use serde::{ser::SerializeSeq as _, Deserialize, Serialize}; + +use std::{ + collections::{HashMap, HashSet}, + fmt::{self, Display}, + path::PathBuf, + str::FromStr, +}; + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct Configuration { + pub language: Vec, + #[serde(default)] + pub language_server: HashMap, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case", deny_unknown_fields)] +pub struct LanguageConfiguration { + #[serde(skip)] + pub(super) language: Option, + + #[serde(rename = "name")] + pub language_id: String, // c-sharp, rust, tsx + #[serde(rename = "language-id")] + // see the table under https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocumentItem + pub language_server_language_id: Option, // csharp, rust, typescriptreact, for the language-server + pub scope: String, // source.rust + pub file_types: Vec, // filename extension or ends_with? + #[serde(default)] + pub shebangs: Vec, // interpreter(s) associated with language + #[serde(default)] + pub roots: Vec, // these indicate project roots <.git, Cargo.toml> + #[serde( + default, + skip_serializing, + deserialize_with = "from_comment_tokens", + alias = "comment-token" + )] + pub comment_tokens: Option>, + #[serde( + default, + skip_serializing, + deserialize_with = "from_block_comment_tokens" + )] + pub block_comment_tokens: Option>, + pub text_width: Option, + pub soft_wrap: Option, + + #[serde(default)] + pub auto_format: bool, + + #[serde(skip_serializing_if = "Option::is_none")] + pub formatter: Option, + + /// If set, overrides `editor.path-completion`. + pub path_completion: Option, + + #[serde(default)] + pub diagnostic_severity: Severity, + + pub grammar: Option, // tree-sitter grammar name, defaults to language_id + + // content_regex + #[serde(default, skip_serializing, deserialize_with = "deserialize_regex")] + pub injection_regex: Option, + // first_line_regex + // + #[serde( + default, + skip_serializing_if = "Vec::is_empty", + serialize_with = "serialize_lang_features", + deserialize_with = "deserialize_lang_features" + )] + pub language_servers: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub indent: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub debugger: Option, + + /// Automatic insertion of pairs to parentheses, brackets, + /// etc. Defaults to true. Optionally, this can be a list of 2-tuples + /// to specify a list of characters to pair. This overrides the + /// global setting. + #[serde(default, skip_serializing, deserialize_with = "deserialize_auto_pairs")] + pub auto_pairs: Option, + + pub rulers: Option>, // if set, override editor's rulers + + /// Hardcoded LSP root directories relative to the workspace root, like `examples` or `tools/fuzz`. + /// Falling back to the current working directory if none are configured. + pub workspace_lsp_roots: Option>, + #[serde(default)] + pub persistent_diagnostic_sources: Vec, + /// Overrides the `editor.rainbow-brackets` config key for the language. + pub rainbow_brackets: Option, +} + +impl LanguageConfiguration { + pub fn language(&self) -> Language { + // This value must be set by `super::Loader::new`. + self.language.unwrap() + } +} + +#[derive(Debug, PartialEq, Eq, Hash)] +pub enum FileType { + /// The extension of the file, either the `Path::extension` or the full + /// filename if the file does not have an extension. + Extension(String), + /// A Unix-style path glob. This is compared to the file's absolute path, so + /// it can be used to detect files based on their directories. If the glob + /// is not an absolute path and does not already start with a glob pattern, + /// a glob pattern will be prepended to it. + Glob(globset::Glob), +} + +impl Serialize for FileType { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + use serde::ser::SerializeMap; + + match self { + FileType::Extension(extension) => serializer.serialize_str(extension), + FileType::Glob(glob) => { + let mut map = serializer.serialize_map(Some(1))?; + map.serialize_entry("glob", glob.glob())?; + map.end() + } + } + } +} + +impl<'de> Deserialize<'de> for FileType { + fn deserialize(deserializer: D) -> Result + where + D: serde::de::Deserializer<'de>, + { + struct FileTypeVisitor; + + impl<'de> serde::de::Visitor<'de> for FileTypeVisitor { + type Value = FileType; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("string or table") + } + + fn visit_str(self, value: &str) -> Result + where + E: serde::de::Error, + { + Ok(FileType::Extension(value.to_string())) + } + + fn visit_map(self, mut map: M) -> Result + where + M: serde::de::MapAccess<'de>, + { + match map.next_entry::()? { + Some((key, mut glob)) if key == "glob" => { + // If the glob isn't an absolute path or already starts + // with a glob pattern, add a leading glob so we + // properly match relative paths. + if !glob.starts_with('/') && !glob.starts_with("*/") { + glob.insert_str(0, "*/"); + } + + globset::Glob::new(glob.as_str()) + .map(FileType::Glob) + .map_err(|err| { + serde::de::Error::custom(format!("invalid `glob` pattern: {}", err)) + }) + } + Some((key, _value)) => Err(serde::de::Error::custom(format!( + "unknown key in `file-types` list: {}", + key + ))), + None => Err(serde::de::Error::custom( + "expected a `suffix` key in the `file-types` entry", + )), + } + } + } + + deserializer.deserialize_any(FileTypeVisitor) + } +} + +fn from_comment_tokens<'de, D>(deserializer: D) -> Result>, D::Error> +where + D: serde::Deserializer<'de>, +{ + #[derive(Deserialize)] + #[serde(untagged)] + enum CommentTokens { + Multiple(Vec), + Single(String), + } + Ok( + Option::::deserialize(deserializer)?.map(|tokens| match tokens { + CommentTokens::Single(val) => vec![val], + CommentTokens::Multiple(vals) => vals, + }), + ) +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct BlockCommentToken { + pub start: String, + pub end: String, +} + +impl Default for BlockCommentToken { + fn default() -> Self { + BlockCommentToken { + start: "/*".to_string(), + end: "*/".to_string(), + } + } +} + +fn from_block_comment_tokens<'de, D>( + deserializer: D, +) -> Result>, D::Error> +where + D: serde::Deserializer<'de>, +{ + #[derive(Deserialize)] + #[serde(untagged)] + enum BlockCommentTokens { + Multiple(Vec), + Single(BlockCommentToken), + } + Ok( + Option::::deserialize(deserializer)?.map(|tokens| match tokens { + BlockCommentTokens::Single(val) => vec![val], + BlockCommentTokens::Multiple(vals) => vals, + }), + ) +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "kebab-case")] +pub enum LanguageServerFeature { + Format, + GotoDeclaration, + GotoDefinition, + GotoTypeDefinition, + GotoReference, + GotoImplementation, + // Goto, use bitflags, combining previous Goto members? + SignatureHelp, + Hover, + DocumentHighlight, + Completion, + CodeAction, + WorkspaceCommand, + DocumentSymbols, + WorkspaceSymbols, + // Symbols, use bitflags, see above? + Diagnostics, + RenameSymbol, + InlayHints, + DocumentColors, +} + +impl Display for LanguageServerFeature { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use LanguageServerFeature::*; + let feature = match self { + Format => "format", + GotoDeclaration => "goto-declaration", + GotoDefinition => "goto-definition", + GotoTypeDefinition => "goto-type-definition", + GotoReference => "goto-reference", + GotoImplementation => "goto-implementation", + SignatureHelp => "signature-help", + Hover => "hover", + DocumentHighlight => "document-highlight", + Completion => "completion", + CodeAction => "code-action", + WorkspaceCommand => "workspace-command", + DocumentSymbols => "document-symbols", + WorkspaceSymbols => "workspace-symbols", + Diagnostics => "diagnostics", + RenameSymbol => "rename-symbol", + InlayHints => "inlay-hints", + DocumentColors => "document-colors", + }; + write!(f, "{feature}",) + } +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)] +enum LanguageServerFeatureConfiguration { + #[serde(rename_all = "kebab-case")] + Features { + #[serde(default, skip_serializing_if = "HashSet::is_empty")] + only_features: HashSet, + #[serde(default, skip_serializing_if = "HashSet::is_empty")] + except_features: HashSet, + name: String, + }, + Simple(String), +} + +#[derive(Debug, Default)] +pub struct LanguageServerFeatures { + pub name: String, + pub only: HashSet, + pub excluded: HashSet, +} + +impl LanguageServerFeatures { + pub fn has_feature(&self, feature: LanguageServerFeature) -> bool { + (self.only.is_empty() || self.only.contains(&feature)) && !self.excluded.contains(&feature) + } +} + +fn deserialize_lang_features<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + let raw: Vec = Deserialize::deserialize(deserializer)?; + let res = raw + .into_iter() + .map(|config| match config { + LanguageServerFeatureConfiguration::Simple(name) => LanguageServerFeatures { + name, + ..Default::default() + }, + LanguageServerFeatureConfiguration::Features { + only_features, + except_features, + name, + } => LanguageServerFeatures { + name, + only: only_features, + excluded: except_features, + }, + }) + .collect(); + Ok(res) +} +fn serialize_lang_features( + map: &Vec, + serializer: S, +) -> Result +where + S: serde::Serializer, +{ + let mut serializer = serializer.serialize_seq(Some(map.len()))?; + for features in map { + let features = if features.only.is_empty() && features.excluded.is_empty() { + LanguageServerFeatureConfiguration::Simple(features.name.to_owned()) + } else { + LanguageServerFeatureConfiguration::Features { + only_features: features.only.clone(), + except_features: features.excluded.clone(), + name: features.name.to_owned(), + } + }; + serializer.serialize_element(&features)?; + } + serializer.end() +} + +fn deserialize_required_root_patterns<'de, D>(deserializer: D) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + let patterns = Vec::::deserialize(deserializer)?; + if patterns.is_empty() { + return Ok(None); + } + let mut builder = globset::GlobSetBuilder::new(); + for pattern in patterns { + let glob = globset::Glob::new(&pattern).map_err(serde::de::Error::custom)?; + builder.add(glob); + } + builder.build().map(Some).map_err(serde::de::Error::custom) +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct LanguageServerConfiguration { + pub command: String, + #[serde(default)] + #[serde(skip_serializing_if = "Vec::is_empty")] + pub args: Vec, + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub environment: HashMap, + #[serde(default, skip_serializing, deserialize_with = "deserialize_lsp_config")] + pub config: Option, + #[serde(default = "default_timeout")] + pub timeout: u64, + #[serde( + default, + skip_serializing, + deserialize_with = "deserialize_required_root_patterns" + )] + pub required_root_patterns: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct FormatterConfiguration { + pub command: String, + #[serde(default)] + #[serde(skip_serializing_if = "Vec::is_empty")] + pub args: Vec, +} + +#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] +#[serde(rename_all = "kebab-case")] +pub struct AdvancedCompletion { + pub name: Option, + pub completion: Option, + pub default: Option, +} + +#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] +#[serde(rename_all = "kebab-case", untagged)] +pub enum DebugConfigCompletion { + Named(String), + Advanced(AdvancedCompletion), +} + +#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] +#[serde(untagged)] +pub enum DebugArgumentValue { + String(String), + Array(Vec), + Boolean(bool), +} + +#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] +#[serde(rename_all = "kebab-case")] +pub struct DebugTemplate { + pub name: String, + pub request: String, + #[serde(default)] + pub completion: Vec, + pub args: HashMap, +} + +#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] +#[serde(rename_all = "kebab-case")] +pub struct DebugAdapterConfig { + pub name: String, + pub transport: String, + #[serde(default)] + pub command: String, + #[serde(default)] + pub args: Vec, + pub port_arg: Option, + pub templates: Vec, + #[serde(default)] + pub quirks: DebuggerQuirks, +} + +// Different workarounds for adapters' differences +#[derive(Debug, Default, PartialEq, Eq, Clone, Serialize, Deserialize)] +pub struct DebuggerQuirks { + #[serde(default)] + pub absolute_paths: bool, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct IndentationConfiguration { + #[serde(deserialize_with = "deserialize_tab_width")] + pub tab_width: usize, + pub unit: String, +} + +/// How the indentation for a newly inserted line should be determined. +/// If the selected heuristic is not available (e.g. because the current +/// language has no tree-sitter indent queries), a simpler one will be used. +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum IndentationHeuristic { + /// Just copy the indentation of the line that the cursor is currently on. + Simple, + /// Use tree-sitter indent queries to compute the expected absolute indentation level of the new line. + TreeSitter, + /// Use tree-sitter indent queries to compute the expected difference in indentation between the new line + /// and the line before. Add this to the actual indentation level of the line before. + #[default] + Hybrid, +} + +/// Configuration for auto pairs +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case", deny_unknown_fields, untagged)] +pub enum AutoPairConfig { + /// Enables or disables auto pairing. False means disabled. True means to use the default pairs. + Enable(bool), + + /// The mappings of pairs. + Pairs(HashMap), +} + +impl Default for AutoPairConfig { + fn default() -> Self { + AutoPairConfig::Enable(true) + } +} + +impl From<&AutoPairConfig> for Option { + fn from(auto_pair_config: &AutoPairConfig) -> Self { + match auto_pair_config { + AutoPairConfig::Enable(false) => None, + AutoPairConfig::Enable(true) => Some(AutoPairs::default()), + AutoPairConfig::Pairs(pairs) => Some(AutoPairs::new(pairs.iter())), + } + } +} + +impl From for Option { + fn from(auto_pairs_config: AutoPairConfig) -> Self { + (&auto_pairs_config).into() + } +} + +impl FromStr for AutoPairConfig { + type Err = std::str::ParseBoolError; + + // only do bool parsing for runtime setting + fn from_str(s: &str) -> Result { + let enable: bool = s.parse()?; + Ok(AutoPairConfig::Enable(enable)) + } +} + +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(default, rename_all = "kebab-case", deny_unknown_fields)] +pub struct SoftWrap { + /// Soft wrap lines that exceed viewport width. Default to off + // NOTE: Option on purpose because the struct is shared between language config and global config. + // By default the option is None so that the language config falls back to the global config unless explicitly set. + pub enable: Option, + /// Maximum space left free at the end of the line. + /// This space is used to wrap text at word boundaries. If that is not possible within this limit + /// the word is simply split at the end of the line. + /// + /// This is automatically hard-limited to a quarter of the viewport to ensure correct display on small views. + /// + /// Default to 20 + pub max_wrap: Option, + /// Maximum number of indentation that can be carried over from the previous line when softwrapping. + /// If a line is indented further then this limit it is rendered at the start of the viewport instead. + /// + /// This is automatically hard-limited to a quarter of the viewport to ensure correct display on small views. + /// + /// Default to 40 + pub max_indent_retain: Option, + /// Indicator placed at the beginning of softwrapped lines + /// + /// Defaults to ↪ + pub wrap_indicator: Option, + /// Softwrap at `text_width` instead of viewport width if it is shorter + pub wrap_at_text_width: Option, +} + +fn deserialize_regex<'de, D>(deserializer: D) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + Option::::deserialize(deserializer)? + .map(|buf| rope::Regex::new(&buf).map_err(serde::de::Error::custom)) + .transpose() +} + +fn deserialize_lsp_config<'de, D>(deserializer: D) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + Option::::deserialize(deserializer)? + .map(|toml| toml.try_into().map_err(serde::de::Error::custom)) + .transpose() +} + +fn deserialize_tab_width<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + usize::deserialize(deserializer).and_then(|n| { + if n > 0 && n <= 16 { + Ok(n) + } else { + Err(serde::de::Error::custom( + "tab width must be a value from 1 to 16 inclusive", + )) + } + }) +} + +pub fn deserialize_auto_pairs<'de, D>(deserializer: D) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + Ok(Option::::deserialize(deserializer)?.and_then(AutoPairConfig::into)) +} + +fn default_timeout() -> u64 { + 20 +} diff --git a/helix-core/src/syntax/tree_cursor.rs b/helix-core/src/syntax/tree_cursor.rs deleted file mode 100644 index d82ea74dbfff..000000000000 --- a/helix-core/src/syntax/tree_cursor.rs +++ /dev/null @@ -1,264 +0,0 @@ -use std::{cmp::Reverse, ops::Range}; - -use super::{LanguageLayer, LayerId}; - -use slotmap::HopSlotMap; -use tree_sitter::Node; - -/// The byte range of an injection layer. -/// -/// Injection ranges may overlap, but all overlapping parts are subsets of their parent ranges. -/// This allows us to sort the ranges ahead of time in order to efficiently find a range that -/// contains a point with maximum depth. -#[derive(Debug)] -struct InjectionRange { - start: usize, - end: usize, - layer_id: LayerId, - depth: u32, -} - -pub struct TreeCursor<'a> { - layers: &'a HopSlotMap, - root: LayerId, - current: LayerId, - injection_ranges: Vec, - // TODO: Ideally this would be a `tree_sitter::TreeCursor<'a>` but - // that returns very surprising results in testing. - cursor: Node<'a>, -} - -impl<'a> TreeCursor<'a> { - pub(super) fn new(layers: &'a HopSlotMap, root: LayerId) -> Self { - let mut injection_ranges = Vec::new(); - - for (layer_id, layer) in layers.iter() { - // Skip the root layer - if layer.parent.is_none() { - continue; - } - for byte_range in layer.ranges.iter() { - let range = InjectionRange { - start: byte_range.start_byte, - end: byte_range.end_byte, - layer_id, - depth: layer.depth, - }; - injection_ranges.push(range); - } - } - - injection_ranges.sort_unstable_by_key(|range| (range.end, Reverse(range.depth))); - - let cursor = layers[root].tree().root_node(); - - Self { - layers, - root, - current: root, - injection_ranges, - cursor, - } - } - - pub fn node(&self) -> Node<'a> { - self.cursor - } - - pub fn goto_parent(&mut self) -> bool { - if let Some(parent) = self.node().parent() { - self.cursor = parent; - return true; - } - - // If we are already on the root layer, we cannot ascend. - if self.current == self.root { - return false; - } - - // Ascend to the parent layer. - let range = self.node().byte_range(); - let parent_id = self.layers[self.current] - .parent - .expect("non-root layers have a parent"); - self.current = parent_id; - let root = self.layers[self.current].tree().root_node(); - self.cursor = root - .descendant_for_byte_range(range.start, range.end) - .unwrap_or(root); - - true - } - - pub fn goto_parent_with

(&mut self, predicate: P) -> bool - where - P: Fn(&Node) -> bool, - { - while self.goto_parent() { - if predicate(&self.node()) { - return true; - } - } - - false - } - - /// Finds the injection layer that has exactly the same range as the given `range`. - fn layer_id_of_byte_range(&self, search_range: Range) -> Option { - let start_idx = self - .injection_ranges - .partition_point(|range| range.end < search_range.end); - - self.injection_ranges[start_idx..] - .iter() - .take_while(|range| range.end == search_range.end) - .find_map(|range| (range.start == search_range.start).then_some(range.layer_id)) - } - - fn goto_first_child_impl(&mut self, named: bool) -> bool { - // Check if the current node's range is an exact injection layer range. - if let Some(layer_id) = self - .layer_id_of_byte_range(self.node().byte_range()) - .filter(|&layer_id| layer_id != self.current) - { - // Switch to the child layer. - self.current = layer_id; - self.cursor = self.layers[self.current].tree().root_node(); - return true; - } - - let child = if named { - self.cursor.named_child(0) - } else { - self.cursor.child(0) - }; - - if let Some(child) = child { - // Otherwise descend in the current tree. - self.cursor = child; - true - } else { - false - } - } - - pub fn goto_first_child(&mut self) -> bool { - self.goto_first_child_impl(false) - } - - pub fn goto_first_named_child(&mut self) -> bool { - self.goto_first_child_impl(true) - } - - fn goto_next_sibling_impl(&mut self, named: bool) -> bool { - let sibling = if named { - self.cursor.next_named_sibling() - } else { - self.cursor.next_sibling() - }; - - if let Some(sibling) = sibling { - self.cursor = sibling; - true - } else { - false - } - } - - pub fn goto_next_sibling(&mut self) -> bool { - self.goto_next_sibling_impl(false) - } - - pub fn goto_next_named_sibling(&mut self) -> bool { - self.goto_next_sibling_impl(true) - } - - fn goto_prev_sibling_impl(&mut self, named: bool) -> bool { - let sibling = if named { - self.cursor.prev_named_sibling() - } else { - self.cursor.prev_sibling() - }; - - if let Some(sibling) = sibling { - self.cursor = sibling; - true - } else { - false - } - } - - pub fn goto_prev_sibling(&mut self) -> bool { - self.goto_prev_sibling_impl(false) - } - - pub fn goto_prev_named_sibling(&mut self) -> bool { - self.goto_prev_sibling_impl(true) - } - - /// Finds the injection layer that contains the given start-end range. - fn layer_id_containing_byte_range(&self, start: usize, end: usize) -> LayerId { - let start_idx = self - .injection_ranges - .partition_point(|range| range.end < end); - - self.injection_ranges[start_idx..] - .iter() - .take_while(|range| range.start < end || range.depth > 1) - .find_map(|range| (range.start <= start).then_some(range.layer_id)) - .unwrap_or(self.root) - } - - pub fn reset_to_byte_range(&mut self, start: usize, end: usize) { - self.current = self.layer_id_containing_byte_range(start, end); - let root = self.layers[self.current].tree().root_node(); - self.cursor = root.descendant_for_byte_range(start, end).unwrap_or(root); - } - - /// Returns an iterator over the children of the node the TreeCursor is on - /// at the time this is called. - pub fn children(&'a mut self) -> ChildIter<'a> { - let parent = self.node(); - - ChildIter { - cursor: self, - parent, - named: false, - } - } - - /// Returns an iterator over the named children of the node the TreeCursor is on - /// at the time this is called. - pub fn named_children(&'a mut self) -> ChildIter<'a> { - let parent = self.node(); - - ChildIter { - cursor: self, - parent, - named: true, - } - } -} - -pub struct ChildIter<'n> { - cursor: &'n mut TreeCursor<'n>, - parent: Node<'n>, - named: bool, -} - -impl<'n> Iterator for ChildIter<'n> { - type Item = Node<'n>; - - fn next(&mut self) -> Option { - // first iteration, just visit the first child - if self.cursor.node() == self.parent { - self.cursor - .goto_first_child_impl(self.named) - .then(|| self.cursor.node()) - } else { - self.cursor - .goto_next_sibling_impl(self.named) - .then(|| self.cursor.node()) - } - } -} diff --git a/helix-core/src/text_annotations.rs b/helix-core/src/text_annotations.rs index 9704c3d6b892..0f492b8be2e5 100644 --- a/helix-core/src/text_annotations.rs +++ b/helix-core/src/text_annotations.rs @@ -5,7 +5,7 @@ use std::ops::Range; use std::ptr::NonNull; use crate::doc_formatter::FormattedGrapheme; -use crate::syntax::Highlight; +use crate::syntax::{Highlight, OverlayHighlights}; use crate::{Position, Tendril}; /// An inline annotation is continuous text shown @@ -300,10 +300,7 @@ impl<'a> TextAnnotations<'a> { } } - pub fn collect_overlay_highlights( - &self, - char_range: Range, - ) -> Vec<(usize, Range)> { + pub fn collect_overlay_highlights(&self, char_range: Range) -> OverlayHighlights { let mut highlights = Vec::new(); self.reset_pos(char_range.start); for char_idx in char_range { @@ -311,11 +308,11 @@ impl<'a> TextAnnotations<'a> { // we don't know the number of chars the original grapheme takes // however it doesn't matter as highlight boundaries are automatically // aligned to grapheme boundaries in the rendering code - highlights.push((highlight.0, char_idx..char_idx + 1)) + highlights.push((highlight, char_idx..char_idx + 1)); } } - highlights + OverlayHighlights::Heterogenous { highlights } } /// Add new inline annotations. diff --git a/helix-core/src/textobject.rs b/helix-core/src/textobject.rs index 7576b3a78668..008228f43280 100644 --- a/helix-core/src/textobject.rs +++ b/helix-core/src/textobject.rs @@ -1,13 +1,12 @@ use std::fmt::Display; use ropey::RopeSlice; -use tree_sitter::{Node, QueryCursor}; use crate::chars::{categorize_char, char_is_whitespace, CharCategory}; use crate::graphemes::{next_grapheme_boundary, prev_grapheme_boundary}; use crate::line_ending::rope_is_line_ending; use crate::movement::Direction; -use crate::syntax::LanguageConfiguration; +use crate::syntax; use crate::Range; use crate::{surround, Syntax}; @@ -260,18 +259,18 @@ pub fn textobject_treesitter( range: Range, textobject: TextObject, object_name: &str, - slice_tree: Node, - lang_config: &LanguageConfiguration, + syntax: &Syntax, + loader: &syntax::Loader, _count: usize, ) -> Range { + let root = syntax.tree().root_node(); + let textobject_query = loader.textobject_query(syntax.root_language()); let get_range = move || -> Option { let byte_pos = slice.char_to_byte(range.cursor(slice)); let capture_name = format!("{}.{}", object_name, textobject); // eg. function.inner - let mut cursor = QueryCursor::new(); - let node = lang_config - .textobject_query()? - .capture_nodes(&capture_name, slice_tree, slice, &mut cursor)? + let node = textobject_query? + .capture_nodes(&capture_name, &root, slice)? .filter(|node| node.byte_range().contains(&byte_pos)) .min_by_key(|node| node.byte_range().len())?; diff --git a/helix-core/tests/indent.rs b/helix-core/tests/indent.rs index 56b4d2ba966e..ab733f931238 100644 --- a/helix-core/tests/indent.rs +++ b/helix-core/tests/indent.rs @@ -1,12 +1,11 @@ -use arc_swap::ArcSwap; use helix_core::{ indent::{indent_level_for_line, treesitter_indent_for_pos, IndentStyle}, - syntax::{Configuration, Loader}, + syntax::{config::Configuration, Loader}, Syntax, }; use helix_stdx::rope::RopeSliceExt; use ropey::Rope; -use std::{ops::Range, path::PathBuf, process::Command, sync::Arc}; +use std::{ops::Range, path::PathBuf, process::Command}; #[test] fn test_treesitter_indent_rust() { @@ -196,17 +195,12 @@ fn test_treesitter_indent( runtime.push("../runtime"); std::env::set_var("HELIX_RUNTIME", runtime.to_str().unwrap()); - let language_config = loader.language_config_for_scope(lang_scope).unwrap(); + let language = loader.language_for_scope(lang_scope).unwrap(); + let language_config = loader.language(language).config(); let indent_style = IndentStyle::from_str(&language_config.indent.as_ref().unwrap().unit); - let highlight_config = language_config.highlight_config(&[]).unwrap(); let text = doc.slice(..); - let syntax = Syntax::new( - text, - highlight_config, - Arc::new(ArcSwap::from_pointee(loader)), - ) - .unwrap(); - let indent_query = language_config.indent_query().unwrap(); + let syntax = Syntax::new(text, language, &loader).unwrap(); + let indent_query = loader.indent_query(language).unwrap(); for i in 0..doc.len_lines() { let line = text.line(i); diff --git a/helix-dap/src/client.rs b/helix-dap/src/client.rs index 6aa656e17006..1529b6f93cff 100644 --- a/helix-dap/src/client.rs +++ b/helix-dap/src/client.rs @@ -4,7 +4,7 @@ use crate::{ types::*, Error, Result, }; -use helix_core::syntax::DebuggerQuirks; +use helix_core::syntax::config::DebuggerQuirks; use serde_json::Value; diff --git a/helix-loader/Cargo.toml b/helix-loader/Cargo.toml index 493d8b30ec0c..dcd87e3aca94 100644 --- a/helix-loader/Cargo.toml +++ b/helix-loader/Cargo.toml @@ -21,7 +21,6 @@ anyhow = "1" serde = { version = "1.0", features = ["derive"] } toml = "0.8" etcetera = "0.10" -tree-sitter.workspace = true once_cell = "1.21" log = "0.4" @@ -32,5 +31,4 @@ cc = { version = "1" } threadpool = { version = "1.0" } tempfile.workspace = true -[target.'cfg(not(target_arch = "wasm32"))'.dependencies] -libloading = "0.8" +tree-house.workspace = true diff --git a/helix-loader/src/grammar.rs b/helix-loader/src/grammar.rs index dcf440312409..f04edf3fa948 100644 --- a/helix-loader/src/grammar.rs +++ b/helix-loader/src/grammar.rs @@ -9,7 +9,7 @@ use std::{ sync::mpsc::channel, }; use tempfile::TempPath; -use tree_sitter::Language; +use tree_house::tree_sitter::Grammar; #[cfg(unix)] const DYLIB_EXTENSION: &str = "so"; @@ -61,28 +61,21 @@ const BUILD_TARGET: &str = env!("BUILD_TARGET"); const REMOTE_NAME: &str = "origin"; #[cfg(target_arch = "wasm32")] -pub fn get_language(name: &str) -> Result { +pub fn get_language(name: &str) -> Result> { unimplemented!() } #[cfg(not(target_arch = "wasm32"))] -pub fn get_language(name: &str) -> Result { - use libloading::{Library, Symbol}; +pub fn get_language(name: &str) -> Result> { let mut rel_library_path = PathBuf::new().join("grammars").join(name); rel_library_path.set_extension(DYLIB_EXTENSION); let library_path = crate::runtime_file(&rel_library_path); + if !library_path.exists() { + return Ok(None); + } - let library = unsafe { Library::new(&library_path) } - .with_context(|| format!("Error opening dynamic library {:?}", library_path))?; - let language_fn_name = format!("tree_sitter_{}", name.replace('-', "_")); - let language = unsafe { - let language_fn: Symbol Language> = library - .get(language_fn_name.as_bytes()) - .with_context(|| format!("Failed to load symbol {}", language_fn_name))?; - language_fn() - }; - std::mem::forget(library); - Ok(language) + let grammar = unsafe { Grammar::new(name, &library_path) }?; + Ok(Some(grammar)) } fn ensure_git_is_available() -> Result<()> { diff --git a/helix-lsp/src/client.rs b/helix-lsp/src/client.rs index f2b78a118ae9..83799ac75422 100644 --- a/helix-lsp/src/client.rs +++ b/helix-lsp/src/client.rs @@ -10,7 +10,7 @@ use crate::lsp::{ DidChangeWorkspaceFoldersParams, OneOf, PositionEncodingKind, SignatureHelp, Url, WorkspaceFolder, WorkspaceFoldersChangeEvent, }; -use helix_core::{find_workspace, syntax::LanguageServerFeature, ChangeSet, Rope}; +use helix_core::{find_workspace, syntax::config::LanguageServerFeature, ChangeSet, Rope}; use helix_loader::VERSION_AND_GIT_HASH; use helix_stdx::path; use parking_lot::Mutex; diff --git a/helix-lsp/src/lib.rs b/helix-lsp/src/lib.rs index ba41cbc5aa3c..0c89ee79b501 100644 --- a/helix-lsp/src/lib.rs +++ b/helix-lsp/src/lib.rs @@ -12,7 +12,7 @@ pub use jsonrpc::Call; pub use lsp::{Position, Url}; use futures_util::stream::select_all::SelectAll; -use helix_core::syntax::{ +use helix_core::syntax::config::{ LanguageConfiguration, LanguageServerConfiguration, LanguageServerFeatures, }; use helix_stdx::path; diff --git a/helix-term/src/application.rs b/helix-term/src/application.rs index 3bc32439565d..df968daf44c6 100644 --- a/helix-term/src/application.rs +++ b/helix-term/src/application.rs @@ -389,8 +389,9 @@ impl Application { let lang_loader = helix_core::config::user_lang_loader()?; self.editor.syn_loader.store(Arc::new(lang_loader)); + let loader = self.editor.syn_loader.load(); for document in self.editor.documents.values_mut() { - document.detect_language(self.editor.syn_loader.clone()); + document.detect_language(&loader); let diagnostics = Editor::doc_diagnostics( &self.editor.language_servers, &self.editor.diagnostics, diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 2e15dcdcc77c..cff5b8aecf0a 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -34,7 +34,7 @@ use helix_core::{ regex::{self, Regex}, search::{self, CharMatcher}, selection, surround, - syntax::{BlockCommentToken, LanguageServerFeature}, + syntax::config::{BlockCommentToken, LanguageServerFeature}, text_annotations::{Overlay, TextAnnotations}, textobject, unicode::width::UnicodeWidthChar, @@ -3482,12 +3482,12 @@ fn insert_with_indent(cx: &mut Context, cursor_fallback: IndentFallbackPos) { enter_insert_mode(cx); let (view, doc) = current!(cx.editor); + let loader = cx.editor.syn_loader.load(); let text = doc.text().slice(..); let contents = doc.text(); let selection = doc.selection(view.id); - let language_config = doc.language_config(); let syntax = doc.syntax(); let tab_width = doc.tab_width(); @@ -3503,7 +3503,7 @@ fn insert_with_indent(cx: &mut Context, cursor_fallback: IndentFallbackPos) { let line_end_index = cursor_line_start; let indent = indent::indent_for_newline( - language_config, + &loader, syntax, &doc.config.load().indent_heuristic, &doc.indent_style, @@ -3613,6 +3613,7 @@ fn open(cx: &mut Context, open: Open, comment_continuation: CommentContinuation) enter_insert_mode(cx); let config = cx.editor.config(); let (view, doc) = current!(cx.editor); + let loader = cx.editor.syn_loader.load(); let text = doc.text().slice(..); let contents = doc.text(); @@ -3662,7 +3663,7 @@ fn open(cx: &mut Context, open: Open, comment_continuation: CommentContinuation) let indent = match line.first_non_whitespace_char() { Some(pos) if continue_comment_token.is_some() => line.slice(..pos).to_string(), _ => indent::indent_for_newline( - doc.language_config(), + &loader, doc.syntax(), &config.indent_heuristic, &doc.indent_style, @@ -4126,6 +4127,7 @@ pub mod insert { pub fn insert_newline(cx: &mut Context) { let config = cx.editor.config(); let (view, doc) = current_ref!(cx.editor); + let loader = cx.editor.syn_loader.load(); let text = doc.text().slice(..); let line_ending = doc.line_ending.as_str(); @@ -4171,7 +4173,7 @@ pub mod insert { let indent = match line.first_non_whitespace_char() { Some(pos) if continue_comment_token.is_some() => line.slice(..pos).to_string(), _ => indent::indent_for_newline( - doc.language_config(), + &loader, doc.syntax(), &config.indent_heuristic, &doc.indent_style, @@ -5728,19 +5730,14 @@ fn goto_ts_object_impl(cx: &mut Context, object: &'static str, direction: Direct let count = cx.count(); let motion = move |editor: &mut Editor| { let (view, doc) = current!(editor); - if let Some((lang_config, syntax)) = doc.language_config().zip(doc.syntax()) { + let loader = editor.syn_loader.load(); + if let Some(syntax) = doc.syntax() { let text = doc.text().slice(..); let root = syntax.tree().root_node(); let selection = doc.selection(view.id).clone().transform(|range| { let new_range = movement::goto_treesitter_object( - text, - range, - object, - direction, - root, - lang_config, - count, + text, range, object, direction, &root, syntax, &loader, count, ); if editor.mode == Mode::Select { @@ -5828,21 +5825,15 @@ fn select_textobject(cx: &mut Context, objtype: textobject::TextObject) { if let Some(ch) = event.char() { let textobject = move |editor: &mut Editor| { let (view, doc) = current!(editor); + let loader = editor.syn_loader.load(); let text = doc.text().slice(..); let textobject_treesitter = |obj_name: &str, range: Range| -> Range { - let (lang_config, syntax) = match doc.language_config().zip(doc.syntax()) { - Some(t) => t, - None => return range, + let Some(syntax) = doc.syntax() else { + return range; }; textobject::textobject_treesitter( - text, - range, - objtype, - obj_name, - syntax.tree().root_node(), - lang_config, - count, + text, range, objtype, obj_name, syntax, &loader, count, ) }; diff --git a/helix-term/src/commands/dap.rs b/helix-term/src/commands/dap.rs index 83dd936cdff2..4f20af4afed4 100644 --- a/helix-term/src/commands/dap.rs +++ b/helix-term/src/commands/dap.rs @@ -5,7 +5,7 @@ use crate::{ ui::{self, overlay::overlaid, Picker, Popup, Prompt, PromptEvent, Text}, }; use dap::{StackFrame, Thread, ThreadStates}; -use helix_core::syntax::{DebugArgumentValue, DebugConfigCompletion, DebugTemplate}; +use helix_core::syntax::config::{DebugArgumentValue, DebugConfigCompletion, DebugTemplate}; use helix_dap::{self as dap, Client}; use helix_lsp::block_on; use helix_view::editor::Breakpoint; diff --git a/helix-term/src/commands/lsp.rs b/helix-term/src/commands/lsp.rs index 8377f7c71773..9c55c830c30a 100644 --- a/helix-term/src/commands/lsp.rs +++ b/helix-term/src/commands/lsp.rs @@ -14,7 +14,7 @@ use tui::{text::Span, widgets::Row}; use super::{align_view, push_jump, Align, Context, Editor}; use helix_core::{ - diagnostic::DiagnosticProvider, syntax::LanguageServerFeature, + diagnostic::DiagnosticProvider, syntax::config::LanguageServerFeature, text_annotations::InlineAnnotation, Selection, Uri, }; use helix_stdx::path; diff --git a/helix-term/src/commands/typed.rs b/helix-term/src/commands/typed.rs index 4e912127c3d6..d1573a78877c 100644 --- a/helix-term/src/commands/typed.rs +++ b/helix-term/src/commands/typed.rs @@ -1670,16 +1670,14 @@ fn tree_sitter_highlight_name( _args: Args, event: PromptEvent, ) -> anyhow::Result<()> { - fn find_highlight_at_cursor( - cx: &mut compositor::Context<'_>, - ) -> Option { - use helix_core::syntax::HighlightEvent; + use helix_core::syntax::Highlight; - let (view, doc) = current!(cx.editor); + fn find_highlight_at_cursor(editor: &Editor) -> Option { + let (view, doc) = current_ref!(editor); let syntax = doc.syntax()?; let text = doc.text().slice(..); let cursor = doc.selection(view.id).primary().cursor(text); - let byte = text.char_to_byte(cursor); + let byte = text.char_to_byte(cursor) as u32; let node = syntax.descendant_for_byte_range(byte, byte)?; // Query the same range as the one used in syntax highlighting. let range = { @@ -1689,25 +1687,22 @@ fn tree_sitter_highlight_name( let last_line = text.len_lines().saturating_sub(1); let height = view.inner_area(doc).height; let last_visible_line = (row + height as usize).saturating_sub(1).min(last_line); - let start = text.line_to_byte(row.min(last_line)); - let end = text.line_to_byte(last_visible_line + 1); + let start = text.line_to_byte(row.min(last_line)) as u32; + let end = text.line_to_byte(last_visible_line + 1) as u32; start..end }; - let mut highlight = None; + let loader = editor.syn_loader.load(); + let mut highlighter = syntax.highlighter(text, &loader, range); - for event in syntax.highlight_iter(text, Some(range), None) { - match event.unwrap() { - HighlightEvent::Source { start, end } - if start == node.start_byte() && end == node.end_byte() => - { - return highlight; - } - HighlightEvent::HighlightStart(hl) => { - highlight = Some(hl); - } - _ => (), + while highlighter.next_event_offset() != u32::MAX { + let start = highlighter.next_event_offset(); + highlighter.advance(); + let end = highlighter.next_event_offset(); + + if start <= node.start_byte() && end >= node.end_byte() { + return highlighter.active_highlights().next_back(); } } @@ -1718,11 +1713,11 @@ fn tree_sitter_highlight_name( return Ok(()); } - let Some(highlight) = find_highlight_at_cursor(cx) else { + let Some(highlight) = find_highlight_at_cursor(cx.editor) else { return Ok(()); }; - let content = cx.editor.theme.scope(highlight.0).to_string(); + let content = cx.editor.theme.scope(highlight).to_string(); let callback = async move { let call: job::Callback = Callback::EditorCompositor(Box::new( @@ -2080,10 +2075,11 @@ fn language(cx: &mut compositor::Context, args: Args, event: PromptEvent) -> any let doc = doc_mut!(cx.editor); + let loader = cx.editor.syn_loader.load(); if &args[0] == DEFAULT_LANGUAGE_NAME { - doc.set_language(None, None) + doc.set_language(None, &loader) } else { - doc.set_language_by_language_id(&args[0], cx.editor.syn_loader.clone())?; + doc.set_language_by_language_id(&args[0], &loader)?; } doc.detect_indent_and_line_ending(); @@ -2189,8 +2185,8 @@ fn tree_sitter_subtree( if let Some(syntax) = doc.syntax() { let primary_selection = doc.selection(view.id).primary(); let text = doc.text(); - let from = text.char_to_byte(primary_selection.from()); - let to = text.char_to_byte(primary_selection.to()); + let from = text.char_to_byte(primary_selection.from()) as u32; + let to = text.char_to_byte(primary_selection.to()) as u32; if let Some(selected_node) = syntax.descendant_for_byte_range(from, to) { let mut contents = String::from("```tsq\n"); helix_core::syntax::pretty_print_tree(&mut contents, selected_node)?; diff --git a/helix-term/src/handlers/completion.rs b/helix-term/src/handlers/completion.rs index 20fac514e170..5017399bd078 100644 --- a/helix-term/src/handlers/completion.rs +++ b/helix-term/src/handlers/completion.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use helix_core::chars::char_is_word; use helix_core::completion::CompletionProvider; -use helix_core::syntax::LanguageServerFeature; +use helix_core::syntax::config::LanguageServerFeature; use helix_event::{register_hook, TaskHandle}; use helix_lsp::lsp; use helix_stdx::rope::RopeSliceExt; diff --git a/helix-term/src/handlers/completion/request.rs b/helix-term/src/handlers/completion/request.rs index 26f252a4a9bb..51a3129a8498 100644 --- a/helix-term/src/handlers/completion/request.rs +++ b/helix-term/src/handlers/completion/request.rs @@ -5,7 +5,7 @@ use std::time::Duration; use arc_swap::ArcSwap; use futures_util::Future; use helix_core::completion::CompletionProvider; -use helix_core::syntax::LanguageServerFeature; +use helix_core::syntax::config::LanguageServerFeature; use helix_event::{cancelable_future, TaskController, TaskHandle}; use helix_lsp::lsp; use helix_lsp::lsp::{CompletionContext, CompletionTriggerKind}; diff --git a/helix-term/src/handlers/document_colors.rs b/helix-term/src/handlers/document_colors.rs index 956cecbfbb31..f46ef2ac1f5d 100644 --- a/helix-term/src/handlers/document_colors.rs +++ b/helix-term/src/handlers/document_colors.rs @@ -1,7 +1,7 @@ use std::{collections::HashSet, time::Duration}; use futures_util::{stream::FuturesOrdered, StreamExt}; -use helix_core::{syntax::LanguageServerFeature, text_annotations::InlineAnnotation}; +use helix_core::{syntax::config::LanguageServerFeature, text_annotations::InlineAnnotation}; use helix_event::{cancelable_future, register_hook}; use helix_lsp::lsp; use helix_view::{ diff --git a/helix-term/src/handlers/signature_help.rs b/helix-term/src/handlers/signature_help.rs index 33c9e16ce6b8..8a0c9754c6f0 100644 --- a/helix-term/src/handlers/signature_help.rs +++ b/helix-term/src/handlers/signature_help.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use std::time::Duration; -use helix_core::syntax::LanguageServerFeature; +use helix_core::syntax::config::LanguageServerFeature; use helix_event::{cancelable_future, register_hook, send_blocking, TaskController, TaskHandle}; use helix_lsp::lsp::{self, SignatureInformation}; use helix_stdx::rope::RopeSliceExt; diff --git a/helix-term/src/health.rs b/helix-term/src/health.rs index 05e2f9c7e07f..a2b53dbc69e6 100644 --- a/helix-term/src/health.rs +++ b/helix-term/src/health.rs @@ -12,11 +12,17 @@ pub enum TsFeature { Highlight, TextObject, AutoIndent, + RainbowBracket, } impl TsFeature { pub fn all() -> &'static [Self] { - &[Self::Highlight, Self::TextObject, Self::AutoIndent] + &[ + Self::Highlight, + Self::TextObject, + Self::AutoIndent, + Self::RainbowBracket, + ] } pub fn runtime_filename(&self) -> &'static str { @@ -24,6 +30,7 @@ impl TsFeature { Self::Highlight => "highlights.scm", Self::TextObject => "textobjects.scm", Self::AutoIndent => "indents.scm", + Self::RainbowBracket => "rainbows.scm", } } @@ -32,6 +39,7 @@ impl TsFeature { Self::Highlight => "Syntax Highlighting", Self::TextObject => "Treesitter Textobjects", Self::AutoIndent => "Auto Indent", + Self::RainbowBracket => "Rainbow Brackets", } } @@ -40,6 +48,7 @@ impl TsFeature { Self::Highlight => "Highlight", Self::TextObject => "Textobject", Self::AutoIndent => "Indent", + Self::RainbowBracket => "Rainbow", } } } diff --git a/helix-term/src/ui/document.rs b/helix-term/src/ui/document.rs index 8423ae8e437a..d71c47a167d2 100644 --- a/helix-term/src/ui/document.rs +++ b/helix-term/src/ui/document.rs @@ -3,8 +3,7 @@ use std::cmp::min; use helix_core::doc_formatter::{DocumentFormatter, GraphemeSource, TextFormat}; use helix_core::graphemes::Grapheme; use helix_core::str_utils::char_to_byte_idx; -use helix_core::syntax::Highlight; -use helix_core::syntax::HighlightEvent; +use helix_core::syntax::{self, HighlightEvent, Highlighter, OverlayHighlights}; use helix_core::text_annotations::TextAnnotations; use helix_core::{visual_offset_from_block, Position, RopeSlice}; use helix_stdx::rope::RopeSliceExt; @@ -17,61 +16,6 @@ use tui::buffer::Buffer as Surface; use crate::ui::text_decorations::DecorationManager; -#[derive(Debug, PartialEq, Eq, Clone, Copy)] -enum StyleIterKind { - /// base highlights (usually emitted by TS), byte indices (potentially not codepoint aligned) - BaseHighlights, - /// overlay highlights (emitted by custom code from selections), char indices - Overlay, -} - -/// A wrapper around a HighlightIterator -/// that merges the layered highlights to create the final text style -/// and yields the active text style and the char_idx where the active -/// style will have to be recomputed. -/// -/// TODO(ropey2): hopefully one day helix and ropey will operate entirely -/// on byte ranges and we can remove this -struct StyleIter<'a, H: Iterator> { - text_style: Style, - active_highlights: Vec, - highlight_iter: H, - kind: StyleIterKind, - text: RopeSlice<'a>, - theme: &'a Theme, -} - -impl> Iterator for StyleIter<'_, H> { - type Item = (Style, usize); - fn next(&mut self) -> Option<(Style, usize)> { - while let Some(event) = self.highlight_iter.next() { - match event { - HighlightEvent::HighlightStart(highlights) => { - self.active_highlights.push(highlights) - } - HighlightEvent::HighlightEnd => { - self.active_highlights.pop(); - } - HighlightEvent::Source { mut end, .. } => { - let style = self - .active_highlights - .iter() - .fold(self.text_style, |acc, span| { - acc.patch(self.theme.highlight(span.0)) - }); - if self.kind == StyleIterKind::BaseHighlights { - // Move the end byte index to the nearest character boundary (rounding up) - // and convert it to a character index. - end = self.text.byte_to_char(self.text.ceil_char_boundary(end)); - } - return Some((style, end)); - } - } - } - None - } -} - #[derive(Debug, PartialEq, Eq, Copy, Clone)] pub struct LinePos { /// Indicates whether the given visual line @@ -90,8 +34,8 @@ pub fn render_document( doc: &Document, offset: ViewPosition, doc_annotations: &TextAnnotations, - syntax_highlight_iter: impl Iterator, - overlay_highlight_iter: impl Iterator, + syntax_highlighter: Option>, + overlay_highlights: Vec, theme: &Theme, decorations: DecorationManager, ) { @@ -108,8 +52,8 @@ pub fn render_document( offset.anchor, &doc.text_format(viewport.width, Some(theme)), doc_annotations, - syntax_highlight_iter, - overlay_highlight_iter, + syntax_highlighter, + overlay_highlights, theme, decorations, ) @@ -122,8 +66,8 @@ pub fn render_text( anchor: usize, text_fmt: &TextFormat, text_annotations: &TextAnnotations, - syntax_highlight_iter: impl Iterator, - overlay_highlight_iter: impl Iterator, + syntax_highlighter: Option>, + overlay_highlights: Vec, theme: &Theme, mut decorations: DecorationManager, ) { @@ -133,22 +77,8 @@ pub fn render_text( let mut formatter = DocumentFormatter::new_at_prev_checkpoint(text, text_fmt, text_annotations, anchor); - let mut syntax_styles = StyleIter { - text_style: renderer.text_style, - active_highlights: Vec::with_capacity(64), - highlight_iter: syntax_highlight_iter, - kind: StyleIterKind::BaseHighlights, - theme, - text, - }; - let mut overlay_styles = StyleIter { - text_style: Style::default(), - active_highlights: Vec::with_capacity(64), - highlight_iter: overlay_highlight_iter, - kind: StyleIterKind::Overlay, - theme, - text, - }; + let mut syntax_highlighter = SyntaxHighlighter::new(syntax_highlighter, text, theme); + let mut overlay_highlighter = OverlayHighlighter::new(overlay_highlights, theme); let mut last_line_pos = LinePos { first_visual_line: false, @@ -158,12 +88,6 @@ pub fn render_text( let mut last_line_end = 0; let mut is_in_indent_area = true; let mut last_line_indent_level = 0; - let mut syntax_style_span = syntax_styles - .next() - .unwrap_or_else(|| (Style::default(), usize::MAX)); - let mut overlay_style_span = overlay_styles - .next() - .unwrap_or_else(|| (Style::default(), usize::MAX)); let mut reached_view_top = false; loop { @@ -207,21 +131,17 @@ pub fn render_text( } // acquire the correct grapheme style - while grapheme.char_idx >= syntax_style_span.1 { - syntax_style_span = syntax_styles - .next() - .unwrap_or((Style::default(), usize::MAX)); + while grapheme.char_idx >= syntax_highlighter.pos { + syntax_highlighter.advance(); } - while grapheme.char_idx >= overlay_style_span.1 { - overlay_style_span = overlay_styles - .next() - .unwrap_or((Style::default(), usize::MAX)); + while grapheme.char_idx >= overlay_highlighter.pos { + overlay_highlighter.advance(); } let grapheme_style = if let GraphemeSource::VirtualText { highlight } = grapheme.source { let mut style = renderer.text_style; if let Some(highlight) = highlight { - style = style.patch(theme.highlight(highlight.0)); + style = style.patch(theme.highlight(highlight)); } GraphemeStyle { syntax_style: style, @@ -229,8 +149,8 @@ pub fn render_text( } } else { GraphemeStyle { - syntax_style: syntax_style_span.0, - overlay_style: overlay_style_span.0, + syntax_style: syntax_highlighter.style, + overlay_style: overlay_highlighter.style, } }; decorations.decorate_grapheme(renderer, &grapheme); @@ -549,3 +469,98 @@ impl<'a> TextRenderer<'a> { ) } } + +struct SyntaxHighlighter<'h, 'r, 't> { + inner: Option>, + text: RopeSlice<'r>, + /// The character index of the next highlight event, or `usize::MAX` if the highlighter is + /// finished. + pos: usize, + theme: &'t Theme, + style: Style, +} + +impl<'h, 'r, 't> SyntaxHighlighter<'h, 'r, 't> { + fn new(inner: Option>, text: RopeSlice<'r>, theme: &'t Theme) -> Self { + let mut highlighter = Self { + inner, + text, + pos: 0, + theme, + style: Style::default(), + }; + highlighter.update_pos(); + highlighter + } + + fn update_pos(&mut self) { + self.pos = self + .inner + .as_ref() + .and_then(|highlighter| { + let next_byte_idx = highlighter.next_event_offset(); + (next_byte_idx != u32::MAX).then(|| { + // Move the byte index to the nearest character boundary (rounding up) and + // convert it to a character index. + self.text + .byte_to_char(self.text.ceil_char_boundary(next_byte_idx as usize)) + }) + }) + .unwrap_or(usize::MAX); + } + + fn advance(&mut self) { + let Some(highlighter) = self.inner.as_mut() else { + return; + }; + + let (event, highlights) = highlighter.advance(); + let base = match event { + HighlightEvent::Refresh => Style::default(), + HighlightEvent::Push => self.style, + }; + + self.style = highlights.fold(base, |acc, highlight| { + acc.patch(self.theme.highlight(highlight)) + }); + self.update_pos(); + } +} + +struct OverlayHighlighter<'t> { + inner: syntax::OverlayHighlighter, + pos: usize, + theme: &'t Theme, + style: Style, +} + +impl<'t> OverlayHighlighter<'t> { + fn new(overlays: Vec, theme: &'t Theme) -> Self { + let inner = syntax::OverlayHighlighter::new(overlays); + let mut highlighter = Self { + inner, + pos: 0, + theme, + style: Style::default(), + }; + highlighter.update_pos(); + highlighter + } + + fn update_pos(&mut self) { + self.pos = self.inner.next_event_offset(); + } + + fn advance(&mut self) { + let (event, highlights) = self.inner.advance(); + let base = match event { + HighlightEvent::Refresh => Style::default(), + HighlightEvent::Push => self.style, + }; + + self.style = highlights.fold(base, |acc, highlight| { + acc.patch(self.theme.highlight(highlight)) + }); + self.update_pos(); + } +} diff --git a/helix-term/src/ui/editor.rs b/helix-term/src/ui/editor.rs index 6be5657477bc..1f0ff4b3ee44 100644 --- a/helix-term/src/ui/editor.rs +++ b/helix-term/src/ui/editor.rs @@ -17,7 +17,7 @@ use helix_core::{ diagnostic::NumberOrString, graphemes::{next_grapheme_boundary, prev_grapheme_boundary}, movement::Direction, - syntax::{self, HighlightEvent}, + syntax::{self, OverlayHighlights}, text_annotations::TextAnnotations, unicode::width::UnicodeWidthStr, visual_offset_from_block, Change, Position, Range, Selection, Transaction, @@ -31,7 +31,7 @@ use helix_view::{ keyboard::{KeyCode, KeyModifiers}, Document, Editor, Theme, View, }; -use std::{mem::take, num::NonZeroUsize, path::PathBuf, rc::Rc}; +use std::{mem::take, num::NonZeroUsize, ops, path::PathBuf, rc::Rc}; use tui::{buffer::Buffer as Surface, text::Span}; @@ -87,6 +87,7 @@ impl EditorView { let area = view.area; let theme = &editor.theme; let config = editor.config(); + let loader = editor.syn_loader.load(); let view_offset = doc.view_offset(view.id); @@ -115,51 +116,45 @@ impl EditorView { decorations.add_decoration(line_decoration); } - let syntax_highlights = - Self::doc_syntax_highlights(doc, view_offset.anchor, inner.height, theme); + let syntax_highlighter = + Self::doc_syntax_highlighter(doc, view_offset.anchor, inner.height, &loader); + let mut overlays = Vec::new(); - let mut overlay_highlights = - Self::empty_highlight_iter(doc, view_offset.anchor, inner.height); - let overlay_syntax_highlights = Self::overlay_syntax_highlights( + overlays.push(Self::overlay_syntax_highlights( doc, view_offset.anchor, inner.height, &text_annotations, - ); - if !overlay_syntax_highlights.is_empty() { - overlay_highlights = - Box::new(syntax::merge(overlay_highlights, overlay_syntax_highlights)); - } + )); - for diagnostic in Self::doc_diagnostics_highlights(doc, theme) { - // Most of the `diagnostic` Vecs are empty most of the time. Skipping - // a merge for any empty Vec saves a significant amount of work. - if diagnostic.is_empty() { - continue; + if doc + .language_config() + .and_then(|config| config.rainbow_brackets) + .unwrap_or(config.rainbow_brackets) + { + if let Some(overlay) = + Self::doc_rainbow_highlights(doc, view_offset.anchor, inner.height, theme, &loader) + { + overlays.push(overlay); } - overlay_highlights = Box::new(syntax::merge(overlay_highlights, diagnostic)); } + Self::doc_diagnostics_highlights_into(doc, theme, &mut overlays); + if is_focused { if let Some(tabstops) = Self::tabstop_highlights(doc, theme) { - overlay_highlights = Box::new(syntax::merge(overlay_highlights, tabstops)); + overlays.push(tabstops); } - let highlights = syntax::merge( - overlay_highlights, - Self::doc_selection_highlights( - editor.mode(), - doc, - view, - theme, - &config.cursor_shape, - self.terminal_focused, - ), - ); - let focused_view_elements = Self::highlight_focused_view_elements(view, doc, theme); - if focused_view_elements.is_empty() { - overlay_highlights = Box::new(highlights) - } else { - overlay_highlights = Box::new(syntax::merge(highlights, focused_view_elements)) + overlays.push(Self::doc_selection_highlights( + editor.mode(), + doc, + view, + theme, + &config.cursor_shape, + self.terminal_focused, + )); + if let Some(overlay) = Self::highlight_focused_view_elements(view, doc, theme) { + overlays.push(overlay); } } @@ -207,8 +202,8 @@ impl EditorView { doc, view_offset, &text_annotations, - syntax_highlights, - overlay_highlights, + syntax_highlighter, + overlays, theme, decorations, ); @@ -287,57 +282,23 @@ impl EditorView { start..end } - pub fn empty_highlight_iter( - doc: &Document, - anchor: usize, - height: u16, - ) -> Box> { - let text = doc.text().slice(..); - let row = text.char_to_line(anchor.min(text.len_chars())); - - // Calculate viewport byte ranges: - // Saturating subs to make it inclusive zero indexing. - let range = Self::viewport_byte_range(text, row, height); - Box::new( - [HighlightEvent::Source { - start: text.byte_to_char(range.start), - end: text.byte_to_char(range.end), - }] - .into_iter(), - ) - } - - /// Get syntax highlights for a document in a view represented by the first line + /// Get the syntax highlighter for a document in a view represented by the first line /// and column (`offset`) and the last line. This is done instead of using a view /// directly to enable rendering syntax highlighted docs anywhere (eg. picker preview) - pub fn doc_syntax_highlights<'doc>( - doc: &'doc Document, + pub fn doc_syntax_highlighter<'editor>( + doc: &'editor Document, anchor: usize, height: u16, - _theme: &Theme, - ) -> Box + 'doc> { + loader: &'editor syntax::Loader, + ) -> Option> { + let syntax = doc.syntax()?; let text = doc.text().slice(..); let row = text.char_to_line(anchor.min(text.len_chars())); - let range = Self::viewport_byte_range(text, row, height); + let range = range.start as u32..range.end as u32; - match doc.syntax() { - Some(syntax) => { - let iter = syntax - // TODO: range doesn't actually restrict source, just highlight range - .highlight_iter(text.slice(..), Some(range), None) - .map(|event| event.unwrap()); - - Box::new(iter) - } - None => Box::new( - [HighlightEvent::Source { - start: range.start, - end: range.end, - }] - .into_iter(), - ), - } + let highlighter = syntax.highlighter(text, loader, range); + Some(highlighter) } pub fn overlay_syntax_highlights( @@ -345,7 +306,7 @@ impl EditorView { anchor: usize, height: u16, text_annotations: &TextAnnotations, - ) -> Vec<(usize, std::ops::Range)> { + ) -> OverlayHighlights { let text = doc.text().slice(..); let row = text.char_to_line(anchor.min(text.len_chars())); @@ -355,36 +316,51 @@ impl EditorView { text_annotations.collect_overlay_highlights(range) } + pub fn doc_rainbow_highlights( + doc: &Document, + anchor: usize, + height: u16, + theme: &Theme, + loader: &syntax::Loader, + ) -> Option { + let syntax = doc.syntax()?; + let text = doc.text().slice(..); + let row = text.char_to_line(anchor.min(text.len_chars())); + let visible_range = Self::viewport_byte_range(text, row, height); + let start = syntax::child_for_byte_range( + &syntax.tree().root_node(), + visible_range.start as u32..visible_range.end as u32, + ) + .map_or(visible_range.start as u32, |node| node.start_byte()); + let range = start..visible_range.end as u32; + + Some(syntax.rainbow_highlights(text, theme.rainbow_length(), loader, range)) + } + /// Get highlight spans for document diagnostics - pub fn doc_diagnostics_highlights( + pub fn doc_diagnostics_highlights_into( doc: &Document, theme: &Theme, - ) -> [Vec<(usize, std::ops::Range)>; 7] { + overlay_highlights: &mut Vec, + ) { use helix_core::diagnostic::{DiagnosticTag, Range, Severity}; let get_scope_of = |scope| { theme - .find_scope_index_exact(scope) - // get one of the themes below as fallback values - .or_else(|| theme.find_scope_index_exact("diagnostic")) - .or_else(|| theme.find_scope_index_exact("ui.cursor")) - .or_else(|| theme.find_scope_index_exact("ui.selection")) - .expect( - "at least one of the following scopes must be defined in the theme: `diagnostic`, `ui.cursor`, or `ui.selection`", - ) + .find_highlight_exact(scope) + // get one of the themes below as fallback values + .or_else(|| theme.find_highlight_exact("diagnostic")) + .or_else(|| theme.find_highlight_exact("ui.cursor")) + .or_else(|| theme.find_highlight_exact("ui.selection")) + .expect( + "at least one of the following scopes must be defined in the theme: `diagnostic`, `ui.cursor`, or `ui.selection`", + ) }; - // basically just queries the theme color defined in the config - let hint = get_scope_of("diagnostic.hint"); - let info = get_scope_of("diagnostic.info"); - let warning = get_scope_of("diagnostic.warning"); - let error = get_scope_of("diagnostic.error"); - let r#default = get_scope_of("diagnostic"); // this is a bit redundant but should be fine - // Diagnostic tags - let unnecessary = theme.find_scope_index_exact("diagnostic.unnecessary"); - let deprecated = theme.find_scope_index_exact("diagnostic.deprecated"); + let unnecessary = theme.find_highlight_exact("diagnostic.unnecessary"); + let deprecated = theme.find_highlight_exact("diagnostic.deprecated"); - let mut default_vec: Vec<(usize, std::ops::Range)> = Vec::new(); + let mut default_vec = Vec::new(); let mut info_vec = Vec::new(); let mut hint_vec = Vec::new(); let mut warning_vec = Vec::new(); @@ -392,31 +368,30 @@ impl EditorView { let mut unnecessary_vec = Vec::new(); let mut deprecated_vec = Vec::new(); - let push_diagnostic = - |vec: &mut Vec<(usize, std::ops::Range)>, scope, range: Range| { - // If any diagnostic overlaps ranges with the prior diagnostic, - // merge the two together. Otherwise push a new span. - match vec.last_mut() { - Some((_, existing_range)) if range.start <= existing_range.end => { - // This branch merges overlapping diagnostics, assuming that the current - // diagnostic starts on range.start or later. If this assertion fails, - // we will discard some part of `diagnostic`. This implies that - // `doc.diagnostics()` is not sorted by `diagnostic.range`. - debug_assert!(existing_range.start <= range.start); - existing_range.end = range.end.max(existing_range.end) - } - _ => vec.push((scope, range.start..range.end)), + let push_diagnostic = |vec: &mut Vec>, range: Range| { + // If any diagnostic overlaps ranges with the prior diagnostic, + // merge the two together. Otherwise push a new span. + match vec.last_mut() { + Some(existing_range) if range.start <= existing_range.end => { + // This branch merges overlapping diagnostics, assuming that the current + // diagnostic starts on range.start or later. If this assertion fails, + // we will discard some part of `diagnostic`. This implies that + // `doc.diagnostics()` is not sorted by `diagnostic.range`. + debug_assert!(existing_range.start <= range.start); + existing_range.end = range.end.max(existing_range.end) } - }; + _ => vec.push(range.start..range.end), + } + }; for diagnostic in doc.diagnostics() { // Separate diagnostics into different Vecs by severity. - let (vec, scope) = match diagnostic.severity { - Some(Severity::Info) => (&mut info_vec, info), - Some(Severity::Hint) => (&mut hint_vec, hint), - Some(Severity::Warning) => (&mut warning_vec, warning), - Some(Severity::Error) => (&mut error_vec, error), - _ => (&mut default_vec, r#default), + let vec = match diagnostic.severity { + Some(Severity::Info) => &mut info_vec, + Some(Severity::Hint) => &mut hint_vec, + Some(Severity::Warning) => &mut warning_vec, + Some(Severity::Error) => &mut error_vec, + _ => &mut default_vec, }; // If the diagnostic has tags and a non-warning/error severity, skip rendering @@ -429,34 +404,59 @@ impl EditorView { Some(Severity::Warning | Severity::Error) ) { - push_diagnostic(vec, scope, diagnostic.range); + push_diagnostic(vec, diagnostic.range); } for tag in &diagnostic.tags { match tag { DiagnosticTag::Unnecessary => { - if let Some(scope) = unnecessary { - push_diagnostic(&mut unnecessary_vec, scope, diagnostic.range) + if unnecessary.is_some() { + push_diagnostic(&mut unnecessary_vec, diagnostic.range) } } DiagnosticTag::Deprecated => { - if let Some(scope) = deprecated { - push_diagnostic(&mut deprecated_vec, scope, diagnostic.range) + if deprecated.is_some() { + push_diagnostic(&mut deprecated_vec, diagnostic.range) } } } } } - [ - default_vec, - unnecessary_vec, - deprecated_vec, - info_vec, - hint_vec, - warning_vec, - error_vec, - ] + overlay_highlights.push(OverlayHighlights::Homogeneous { + highlight: get_scope_of("diagnostic"), + ranges: default_vec, + }); + if let Some(highlight) = unnecessary { + overlay_highlights.push(OverlayHighlights::Homogeneous { + highlight, + ranges: unnecessary_vec, + }); + } + if let Some(highlight) = deprecated { + overlay_highlights.push(OverlayHighlights::Homogeneous { + highlight, + ranges: deprecated_vec, + }); + } + overlay_highlights.extend([ + OverlayHighlights::Homogeneous { + highlight: get_scope_of("diagnostic.info"), + ranges: info_vec, + }, + OverlayHighlights::Homogeneous { + highlight: get_scope_of("diagnostic.hint"), + ranges: hint_vec, + }, + OverlayHighlights::Homogeneous { + highlight: get_scope_of("diagnostic.warning"), + ranges: warning_vec, + }, + OverlayHighlights::Homogeneous { + highlight: get_scope_of("diagnostic.error"), + ranges: error_vec, + }, + ]); } /// Get highlight spans for selections in a document view. @@ -467,7 +467,7 @@ impl EditorView { theme: &Theme, cursor_shape_config: &CursorShapeConfig, is_terminal_focused: bool, - ) -> Vec<(usize, std::ops::Range)> { + ) -> OverlayHighlights { let text = doc.text().slice(..); let selection = doc.selection(view.id); let primary_idx = selection.primary_index(); @@ -476,34 +476,34 @@ impl EditorView { let cursor_is_block = cursorkind == CursorKind::Block; let selection_scope = theme - .find_scope_index_exact("ui.selection") + .find_highlight_exact("ui.selection") .expect("could not find `ui.selection` scope in the theme!"); let primary_selection_scope = theme - .find_scope_index_exact("ui.selection.primary") + .find_highlight_exact("ui.selection.primary") .unwrap_or(selection_scope); let base_cursor_scope = theme - .find_scope_index_exact("ui.cursor") + .find_highlight_exact("ui.cursor") .unwrap_or(selection_scope); let base_primary_cursor_scope = theme - .find_scope_index("ui.cursor.primary") + .find_highlight("ui.cursor.primary") .unwrap_or(base_cursor_scope); let cursor_scope = match mode { - Mode::Insert => theme.find_scope_index_exact("ui.cursor.insert"), - Mode::Select => theme.find_scope_index_exact("ui.cursor.select"), - Mode::Normal => theme.find_scope_index_exact("ui.cursor.normal"), + Mode::Insert => theme.find_highlight_exact("ui.cursor.insert"), + Mode::Select => theme.find_highlight_exact("ui.cursor.select"), + Mode::Normal => theme.find_highlight_exact("ui.cursor.normal"), } .unwrap_or(base_cursor_scope); let primary_cursor_scope = match mode { - Mode::Insert => theme.find_scope_index_exact("ui.cursor.primary.insert"), - Mode::Select => theme.find_scope_index_exact("ui.cursor.primary.select"), - Mode::Normal => theme.find_scope_index_exact("ui.cursor.primary.normal"), + Mode::Insert => theme.find_highlight_exact("ui.cursor.primary.insert"), + Mode::Select => theme.find_highlight_exact("ui.cursor.primary.select"), + Mode::Normal => theme.find_highlight_exact("ui.cursor.primary.normal"), } .unwrap_or(base_primary_cursor_scope); - let mut spans: Vec<(usize, std::ops::Range)> = Vec::new(); + let mut spans = Vec::new(); for (i, range) in selection.iter().enumerate() { let selection_is_primary = i == primary_idx; let (cursor_scope, selection_scope) = if selection_is_primary { @@ -563,7 +563,7 @@ impl EditorView { } } - spans + OverlayHighlights::Heterogenous { highlights: spans } } /// Render brace match, etc (meant for the focused view only) @@ -571,41 +571,24 @@ impl EditorView { view: &View, doc: &Document, theme: &Theme, - ) -> Vec<(usize, std::ops::Range)> { + ) -> Option { // Highlight matching braces - if let Some(syntax) = doc.syntax() { - let text = doc.text().slice(..); - use helix_core::match_brackets; - let pos = doc.selection(view.id).primary().cursor(text); - - if let Some(pos) = - match_brackets::find_matching_bracket(syntax, doc.text().slice(..), pos) - { - // ensure col is on screen - if let Some(highlight) = theme.find_scope_index_exact("ui.cursor.match") { - return vec![(highlight, pos..pos + 1)]; - } - } - } - Vec::new() + let syntax = doc.syntax()?; + let highlight = theme.find_highlight_exact("ui.cursor.match")?; + let text = doc.text().slice(..); + let pos = doc.selection(view.id).primary().cursor(text); + let pos = helix_core::match_brackets::find_matching_bracket(syntax, text, pos)?; + Some(OverlayHighlights::single(highlight, pos..pos + 1)) } - pub fn tabstop_highlights( - doc: &Document, - theme: &Theme, - ) -> Option)>> { + pub fn tabstop_highlights(doc: &Document, theme: &Theme) -> Option { let snippet = doc.active_snippet.as_ref()?; - let highlight = theme.find_scope_index_exact("tabstop")?; - let mut highlights = Vec::new(); + let highlight = theme.find_highlight_exact("tabstop")?; + let mut ranges = Vec::new(); for tabstop in snippet.tabstops() { - highlights.extend( - tabstop - .ranges - .iter() - .map(|range| (highlight, range.start..range.end)), - ); + ranges.extend(tabstop.ranges.iter().map(|range| range.start..range.end)); } - (!highlights.is_empty()).then_some(highlights) + Some(OverlayHighlights::Homogeneous { highlight, ranges }) } /// Render bufferline at the top diff --git a/helix-term/src/ui/lsp/signature_help.rs b/helix-term/src/ui/lsp/signature_help.rs index 2dee812417af..87a3eb95f21e 100644 --- a/helix-term/src/ui/lsp/signature_help.rs +++ b/helix-term/src/ui/lsp/signature_help.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use arc_swap::ArcSwap; -use helix_core::syntax; +use helix_core::syntax::{self, OverlayHighlights}; use helix_view::graphics::{Margin, Rect, Style}; use helix_view::input::Event; use tui::buffer::Buffer; @@ -102,13 +102,12 @@ impl Component for SignatureHelp { .unwrap_or_else(|| &self.signatures[0]); let active_param_span = signature.active_param_range.map(|(start, end)| { - vec![( - cx.editor - .theme - .find_scope_index_exact("ui.selection") - .unwrap(), - start..end, - )] + let highlight = cx + .editor + .theme + .find_highlight_exact("ui.selection") + .unwrap(); + OverlayHighlights::single(highlight, start..end) }); let signature = self @@ -120,7 +119,7 @@ impl Component for SignatureHelp { signature.signature.as_str(), &self.language, Some(&cx.editor.theme), - Arc::clone(&self.config_loader), + &self.config_loader.load(), active_param_span, ); @@ -178,7 +177,7 @@ impl Component for SignatureHelp { signature.signature.as_str(), &self.language, None, - Arc::clone(&self.config_loader), + &self.config_loader.load(), None, ); let (sig_width, sig_height) = diff --git a/helix-term/src/ui/markdown.rs b/helix-term/src/ui/markdown.rs index fe581b5adfa6..ae58d75e8f2e 100644 --- a/helix-term/src/ui/markdown.rs +++ b/helix-term/src/ui/markdown.rs @@ -10,8 +10,8 @@ use std::sync::Arc; use pulldown_cmark::{CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag, TagEnd}; use helix_core::{ - syntax::{self, HighlightEvent, InjectionLanguageMarker, Syntax}, - RopeSlice, + syntax::{self, HighlightEvent, OverlayHighlights}, + RopeSlice, Syntax, }; use helix_view::{ graphics::{Margin, Rect, Style}, @@ -32,8 +32,12 @@ pub fn highlighted_code_block<'a>( text: &str, language: &str, theme: Option<&Theme>, - config_loader: Arc>, - additional_highlight_spans: Option)>>, + loader: &syntax::Loader, + // Optional overlay highlights to mix in with the syntax highlights. + // + // Note that `OverlayHighlights` is typically used with char indexing but the only caller + // which passes this parameter currently passes **byte indices** instead. + additional_highlight_spans: Option, ) -> Text<'a> { let mut spans = Vec::new(); let mut lines = Vec::new(); @@ -48,67 +52,74 @@ pub fn highlighted_code_block<'a>( }; let ropeslice = RopeSlice::from(text); - let syntax = config_loader - .load() - .language_configuration_for_injection_string(&InjectionLanguageMarker::Name( - language.into(), - )) - .and_then(|config| config.highlight_config(theme.scopes())) - .and_then(|config| Syntax::new(ropeslice, config, Arc::clone(&config_loader))); - - let syntax = match syntax { - Some(s) => s, - None => return styled_multiline_text(text, code_style), + let Some(syntax) = loader + .language_for_match(RopeSlice::from(language)) + .and_then(|lang| Syntax::new(ropeslice, lang, loader).ok()) + else { + return styled_multiline_text(text, code_style); }; - let highlight_iter = syntax - .highlight_iter(ropeslice, None, None) - .map(|e| e.unwrap()); - let highlight_iter: Box> = - if let Some(spans) = additional_highlight_spans { - Box::new(helix_core::syntax::merge(highlight_iter, spans)) - } else { - Box::new(highlight_iter) - }; - - let mut highlights = Vec::new(); - for event in highlight_iter { - match event { - HighlightEvent::HighlightStart(span) => { - highlights.push(span); + let mut syntax_highlighter = syntax.highlighter(ropeslice, loader, ..); + let mut syntax_highlight_stack = Vec::new(); + let mut overlay_highlight_stack = Vec::new(); + let mut overlay_highlighter = syntax::OverlayHighlighter::new(additional_highlight_spans); + let mut pos = 0; + + while pos < ropeslice.len_bytes() as u32 { + if pos == syntax_highlighter.next_event_offset() { + let (event, new_highlights) = syntax_highlighter.advance(); + if event == HighlightEvent::Refresh { + syntax_highlight_stack.clear(); } - HighlightEvent::HighlightEnd => { - highlights.pop(); + syntax_highlight_stack.extend(new_highlights); + } else if pos == overlay_highlighter.next_event_offset() as u32 { + let (event, new_highlights) = overlay_highlighter.advance(); + if event == HighlightEvent::Refresh { + overlay_highlight_stack.clear(); } - HighlightEvent::Source { start, end } => { - let style = highlights - .iter() - .fold(text_style, |acc, span| acc.patch(theme.highlight(span.0))); - - let mut slice = &text[start..end]; - // TODO: do we need to handle all unicode line endings - // here, or is just '\n' okay? - while let Some(end) = slice.find('\n') { - // emit span up to newline - let text = &slice[..end]; - let text = text.replace('\t', " "); // replace tabs - let span = Span::styled(text, style); - spans.push(span); - - // truncate slice to after newline - slice = &slice[end + 1..]; - - // make a new line - let spans = std::mem::take(&mut spans); - lines.push(Spans::from(spans)); - } + overlay_highlight_stack.extend(new_highlights) + } - // if there's anything left, emit it too - if !slice.is_empty() { - let span = Span::styled(slice.replace('\t', " "), style); - spans.push(span); - } - } + let start = pos; + pos = syntax_highlighter + .next_event_offset() + .min(overlay_highlighter.next_event_offset() as u32); + if pos == u32::MAX { + pos = ropeslice.len_bytes() as u32; + } + if pos == start { + continue; + } + assert!(pos > start); + + let style = syntax_highlight_stack + .iter() + .chain(overlay_highlight_stack.iter()) + .fold(text_style, |acc, highlight| { + acc.patch(theme.highlight(*highlight)) + }); + + let mut slice = &text[start as usize..pos as usize]; + // TODO: do we need to handle all unicode line endings + // here, or is just '\n' okay? + while let Some(end) = slice.find('\n') { + // emit span up to newline + let text = &slice[..end]; + let text = text.replace('\t', " "); // replace tabs + let span = Span::styled(text, style); + spans.push(span); + + // truncate slice to after newline + slice = &slice[end + 1..]; + + // make a new line + let spans = std::mem::take(&mut spans); + lines.push(Spans::from(spans)); + } + + if !slice.is_empty() { + let span = Span::styled(slice.replace('\t', " "), style); + spans.push(span); } } @@ -286,7 +297,7 @@ impl Markdown { &text, language, theme, - Arc::clone(&self.config_loader), + &self.config_loader.load(), None, ); lines.extend(tui_text.lines.into_iter()); diff --git a/helix-term/src/ui/mod.rs b/helix-term/src/ui/mod.rs index a76adbe211d8..5b13263bbf11 100644 --- a/helix-term/src/ui/mod.rs +++ b/helix-term/src/ui/mod.rs @@ -372,7 +372,7 @@ pub mod completers { use super::Utf8PathBuf; use crate::ui::prompt::Completion; use helix_core::fuzzy::fuzzy_match; - use helix_core::syntax::LanguageServerFeature; + use helix_core::syntax::config::LanguageServerFeature; use helix_view::document::SCRATCH_BUFFER_NAME; use helix_view::theme; use helix_view::{editor::Config, Editor}; diff --git a/helix-term/src/ui/picker.rs b/helix-term/src/ui/picker.rs index a6ce91a67712..7abdfce845be 100644 --- a/helix-term/src/ui/picker.rs +++ b/helix-term/src/ui/picker.rs @@ -624,7 +624,14 @@ impl Picker { if content_type.is_binary() { return Ok(CachedPreview::Binary); } - Document::open(&path, None, None, editor.config.clone()).map_or( + Document::open( + &path, + None, + false, + editor.config.clone(), + editor.syn_loader.clone(), + ) + .map_or( Err(std::io::Error::new( std::io::ErrorKind::NotFound, "Cannot open document", @@ -933,21 +940,18 @@ impl Picker { } } - let syntax_highlights = EditorView::doc_syntax_highlights( + let loader = cx.editor.syn_loader.load(); + + let syntax_highlighter = + EditorView::doc_syntax_highlighter(doc, offset.anchor, area.height, &loader); + let mut overlay_highlights = Vec::new(); + + EditorView::doc_diagnostics_highlights_into( doc, - offset.anchor, - area.height, &cx.editor.theme, + &mut overlay_highlights, ); - let mut overlay_highlights = - EditorView::empty_highlight_iter(doc, offset.anchor, area.height); - for spans in EditorView::doc_diagnostics_highlights(doc, &cx.editor.theme) { - if spans.is_empty() { - continue; - } - overlay_highlights = Box::new(helix_core::syntax::merge(overlay_highlights, spans)); - } let mut decorations = DecorationManager::default(); if let Some((start, end)) = range { @@ -977,7 +981,7 @@ impl Picker { offset, // TODO: compute text annotations asynchronously here (like inlay hints) &TextAnnotations::default(), - syntax_highlights, + syntax_highlighter, overlay_highlights, &cx.editor.theme, decorations, diff --git a/helix-term/src/ui/picker/handlers.rs b/helix-term/src/ui/picker/handlers.rs index 040fffa8840f..9a3af9b3209c 100644 --- a/helix-term/src/ui/picker/handlers.rs +++ b/helix-term/src/ui/picker/handlers.rs @@ -70,23 +70,21 @@ impl AsyncHook return; } - let Some(language_config) = doc.detect_language_config(&editor.syn_loader.load()) - else { + let loader = editor.syn_loader.load(); + let Some(language_config) = doc.detect_language_config(&loader) else { return; }; - doc.language = Some(language_config.clone()); + let language = language_config.language(); + doc.language = Some(language_config); let text = doc.text().clone(); - let loader = editor.syn_loader.clone(); tokio::task::spawn_blocking(move || { - let Some(syntax) = language_config - .highlight_config(&loader.load().scopes()) - .and_then(|highlight_config| { - helix_core::Syntax::new(text.slice(..), highlight_config, loader) - }) - else { - log::info!("highlighting picker item failed"); - return; + let syntax = match helix_core::Syntax::new(text.slice(..), language, &loader) { + Ok(syntax) => syntax, + Err(err) => { + log::info!("highlighting picker preview failed: {err}"); + return; + } }; job::dispatch_blocking(move |editor, compositor| { diff --git a/helix-term/src/ui/prompt.rs b/helix-term/src/ui/prompt.rs index 03adeb05bbf5..56255860a799 100644 --- a/helix-term/src/ui/prompt.rs +++ b/helix-term/src/ui/prompt.rs @@ -529,7 +529,7 @@ impl Prompt { &self.line, language, Some(&cx.editor.theme), - loader.clone(), + &loader.load(), None, ) .into(); diff --git a/helix-term/tests/integration.rs b/helix-term/tests/integration.rs index 5e418cebdabd..469242e403d6 100644 --- a/helix-term/tests/integration.rs +++ b/helix-term/tests/integration.rs @@ -2,7 +2,7 @@ mod test { mod helpers; - use helix_core::{syntax::AutoPairConfig, Selection}; + use helix_core::{syntax::config::AutoPairConfig, Selection}; use helix_term::config::Config; use indoc::indoc; diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index 41c9ee1ef6e4..2d8d333da493 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -9,7 +9,7 @@ use helix_core::diagnostic::DiagnosticProvider; use helix_core::doc_formatter::TextFormat; use helix_core::encoding::Encoding; use helix_core::snippets::{ActiveSnippet, SnippetRenderCtx}; -use helix_core::syntax::{Highlight, LanguageServerFeature}; +use helix_core::syntax::config::LanguageServerFeature; use helix_core::text_annotations::{InlineAnnotation, Overlay}; use helix_event::TaskController; use helix_lsp::util::lsp_pos_to_pos; @@ -38,7 +38,7 @@ use helix_core::{ history::{History, State, UndoKind}, indent::{auto_detect_indent_style, IndentStyle}, line_ending::auto_detect_line_ending, - syntax::{self, LanguageConfiguration}, + syntax::{self, config::LanguageConfiguration}, ChangeSet, Diagnostic, LineEnding, Range, Rope, RopeBuilder, Selection, Syntax, Transaction, }; @@ -207,12 +207,17 @@ pub struct Document { // NOTE: ideally this would live on the handler for color swatches. This is blocked on a // large refactor that would make `&mut Editor` available on the `DocumentDidChange` event. pub color_swatch_controller: TaskController, + + // NOTE: this field should eventually go away - we should use the Editor's syn_loader instead + // of storing a copy on every doc. Then we can remove the surrounding `Arc` and use the + // `ArcSwap` directly. + syn_loader: Arc>, } #[derive(Debug, Clone, Default)] pub struct DocumentColorSwatches { pub color_swatches: Vec, - pub colors: Vec, + pub colors: Vec, pub color_swatches_padding: Vec, } @@ -677,6 +682,7 @@ impl Document { text: Rope, encoding_with_bom_info: Option<(&'static Encoding, bool)>, config: Arc>, + syn_loader: Arc>, ) -> Self { let (encoding, has_bom) = encoding_with_bom_info.unwrap_or((encoding::UTF_8, false)); let line_ending = config.load().default_line_ending.into(); @@ -719,13 +725,17 @@ impl Document { jump_labels: HashMap::new(), color_swatches: None, color_swatch_controller: TaskController::new(), + syn_loader, } } - pub fn default(config: Arc>) -> Self { + pub fn default( + config: Arc>, + syn_loader: Arc>, + ) -> Self { let line_ending: LineEnding = config.load().default_line_ending.into(); let text = Rope::from(line_ending.as_str()); - Self::from(text, None, config) + Self::from(text, None, config, syn_loader) } // TODO: async fn? @@ -734,8 +744,9 @@ impl Document { pub fn open( path: &Path, mut encoding: Option<&'static Encoding>, - config_loader: Option>>, + detect_language: bool, config: Arc>, + syn_loader: Arc>, ) -> Result { // If the path is not a regular file (e.g.: /dev/random) it should not be opened. if path.metadata().is_ok_and(|metadata| !metadata.is_file()) { @@ -761,12 +772,13 @@ impl Document { (Rope::from(line_ending.as_str()), encoding, false) }; - let mut doc = Self::from(rope, Some((encoding, has_bom)), config); + let loader = syn_loader.load(); + let mut doc = Self::from(rope, Some((encoding, has_bom)), config, syn_loader); // set the path and try detecting the language doc.set_path(Some(path)); - if let Some(loader) = config_loader { - doc.detect_language(loader); + if detect_language { + doc.detect_language(&loader); } doc.editor_config = editor_config; @@ -1102,22 +1114,20 @@ impl Document { } /// Detect the programming language based on the file type. - pub fn detect_language(&mut self, config_loader: Arc>) { - let loader = config_loader.load(); - self.set_language( - self.detect_language_config(&loader), - Some(Arc::clone(&config_loader)), - ); + pub fn detect_language(&mut self, loader: &syntax::Loader) { + self.set_language(self.detect_language_config(loader), loader); } /// Detect the programming language based on the file type. pub fn detect_language_config( &self, - config_loader: &syntax::Loader, - ) -> Option> { - config_loader - .language_config_for_file_name(self.path.as_ref()?) - .or_else(|| config_loader.language_config_for_shebang(self.text().slice(..))) + loader: &syntax::Loader, + ) -> Option> { + let language = loader + .language_for_filename(self.path.as_ref()?) + .or_else(|| loader.language_for_shebang(self.text().slice(..)))?; + + Some(loader.language(language).config().clone()) } /// Detect the indentation used in the file, or otherwise defaults to the language indentation @@ -1256,35 +1266,36 @@ impl Document { /// if it exists. pub fn set_language( &mut self, - language_config: Option>, - loader: Option>>, + language_config: Option>, + loader: &syntax::Loader, ) { - if let (Some(language_config), Some(loader)) = (language_config, loader) { - if let Some(highlight_config) = - language_config.highlight_config(&(*loader).load().scopes()) - { - self.syntax = Syntax::new(self.text.slice(..), highlight_config, loader); - } - - self.language = Some(language_config); - } else { - self.syntax = None; - self.language = None; - }; + self.language = language_config; + self.syntax = self.language.as_ref().and_then(|config| { + Syntax::new(self.text.slice(..), config.language(), loader) + .map_err(|err| { + // `NoRootConfig` means that there was an issue loading the language/syntax + // config for the root language of the document. An error must have already + // been logged by `LanguageData::syntax_config`. + if err != syntax::HighlighterError::NoRootConfig { + log::warn!("Error building syntax for '{}': {err}", self.display_name()); + } + }) + .ok() + }); } /// Set the programming language for the file if you know the language but don't have the - /// [`syntax::LanguageConfiguration`] for it. + /// [`syntax::config::LanguageConfiguration`] for it. pub fn set_language_by_language_id( &mut self, language_id: &str, - config_loader: Arc>, + loader: &syntax::Loader, ) -> anyhow::Result<()> { - let language_config = (*config_loader) - .load() - .language_config_for_language_id(language_id) + let language = loader + .language_for_name(language_id) .ok_or_else(|| anyhow!("invalid language id: {}", language_id))?; - self.set_language(Some(language_config), Some(config_loader)); + let config = loader.language(language).config().clone(); + self.set_language(Some(config), loader); Ok(()) } @@ -1403,14 +1414,14 @@ impl Document { // update tree-sitter syntax tree if let Some(syntax) = &mut self.syntax { - // TODO: no unwrap - let res = syntax.update( + let loader = self.syn_loader.load(); + if let Err(err) = syntax.update( old_doc.slice(..), self.text.slice(..), transaction.changes(), - ); - if res.is_err() { - log::error!("TS parser failed, disabling TS for the current buffer: {res:?}"); + &loader, + ) { + log::error!("TS parser failed, disabling TS for the current buffer: {err}"); self.syntax = None; } } @@ -2218,8 +2229,7 @@ impl Document { viewport_width, wrap_indicator: wrap_indicator.into_boxed_str(), wrap_indicator_highlight: theme - .and_then(|theme| theme.find_scope_index("ui.virtual.wrap")) - .map(Highlight), + .and_then(|theme| theme.find_highlight("ui.virtual.wrap")), soft_wrap_at_text_width, } } @@ -2299,6 +2309,7 @@ mod test { text, None, Arc::new(ArcSwap::new(Arc::new(Config::default()))), + Arc::new(ArcSwap::from_pointee(syntax::Loader::default())), ); let view = ViewId::default(); doc.set_selection(view, Selection::single(0, 0)); @@ -2337,6 +2348,7 @@ mod test { text, None, Arc::new(ArcSwap::new(Arc::new(Config::default()))), + Arc::new(ArcSwap::from_pointee(syntax::Loader::default())), ); let view = ViewId::default(); doc.set_selection(view, Selection::single(5, 5)); @@ -2450,9 +2462,12 @@ mod test { #[test] fn test_line_ending() { assert_eq!( - Document::default(Arc::new(ArcSwap::new(Arc::new(Config::default())))) - .text() - .to_string(), + Document::default( + Arc::new(ArcSwap::new(Arc::new(Config::default()))), + Arc::new(ArcSwap::from_pointee(syntax::Loader::default())) + ) + .text() + .to_string(), helix_core::NATIVE_LINE_ENDING.as_str() ); } diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index dfade86baf01..e3d3db19813b 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -46,7 +46,10 @@ pub use helix_core::diagnostic::Severity; use helix_core::{ auto_pairs::AutoPairs, diagnostic::DiagnosticProvider, - syntax::{self, AutoPairConfig, IndentationHeuristic, LanguageServerFeature, SoftWrap}, + syntax::{ + self, + config::{AutoPairConfig, IndentationHeuristic, LanguageServerFeature, SoftWrap}, + }, Change, LineEnding, Position, Range, Selection, Uri, NATIVE_LINE_ENDING, }; use helix_dap as dap; @@ -370,6 +373,8 @@ pub struct Config { /// Whether to read settings from [EditorConfig](https://editorconfig.org) files. Defaults to /// `true`. pub editor_config: bool, + /// Whether to render rainbow colors for matching brackets. Defaults to `false`. + pub rainbow_brackets: bool, } #[derive(Debug, Clone, PartialEq, Deserialize, Serialize, Eq, PartialOrd, Ord)] @@ -1017,6 +1022,7 @@ impl Default for Config { end_of_line_diagnostics: DiagnosticFilter::Disable, clipboard_provider: ClipboardProvider::default(), editor_config: true, + rainbow_brackets: false, } } } @@ -1355,7 +1361,7 @@ impl Editor { fn set_theme_impl(&mut self, theme: Theme, preview: ThemeAction) { // `ui.selection` is the only scope required to be able to render a theme. - if theme.find_scope_index_exact("ui.selection").is_none() { + if theme.find_highlight_exact("ui.selection").is_none() { self.set_error("Invalid theme: `ui.selection` required"); return; } @@ -1471,9 +1477,9 @@ impl Editor { } pub fn refresh_doc_language(&mut self, doc_id: DocumentId) { - let loader = self.syn_loader.clone(); + let loader = self.syn_loader.load(); let doc = doc_mut!(self, &doc_id); - doc.detect_language(loader); + doc.detect_language(&loader); doc.detect_editor_config(); doc.detect_indent_and_line_ending(); self.refresh_language_servers(doc_id); @@ -1509,12 +1515,12 @@ impl Editor { if let helix_lsp::Error::ExecutableNotFound(err) = err { // Silence by default since some language servers might just not be installed log::debug!( - "Language server not found for `{}` {} {}", language.scope(), lang, err, + "Language server not found for `{}` {} {}", language.scope, lang, err, ); } else { log::error!( "Failed to initialize the language servers for `{}` - `{}` {{ {} }}", - language.scope(), + language.scope, lang, err ); @@ -1733,7 +1739,10 @@ impl Editor { } pub fn new_file(&mut self, action: Action) -> DocumentId { - self.new_file_from_document(action, Document::default(self.config.clone())) + self.new_file_from_document( + action, + Document::default(self.config.clone(), self.syn_loader.clone()), + ) } pub fn new_file_from_stdin(&mut self, action: Action) -> Result { @@ -1742,6 +1751,7 @@ impl Editor { helix_core::Rope::default(), Some((encoding, has_bom)), self.config.clone(), + self.syn_loader.clone(), ); let doc_id = self.new_file_from_document(action, doc); let doc = doc_mut!(self, &doc_id); @@ -1770,8 +1780,9 @@ impl Editor { let mut doc = Document::open( &path, None, - Some(self.syn_loader.clone()), + true, self.config.clone(), + self.syn_loader.clone(), )?; let diagnostics = @@ -1866,7 +1877,12 @@ impl Editor { .iter() .map(|(&doc_id, _)| doc_id) .next() - .unwrap_or_else(|| self.new_document(Document::default(self.config.clone()))); + .unwrap_or_else(|| { + self.new_document(Document::default( + self.config.clone(), + self.syn_loader.clone(), + )) + }); let view = View::new(doc_id, self.config().gutters.clone()); let view_id = self.tree.insert(view); let doc = doc_mut!(self, &doc_id); diff --git a/helix-view/src/gutter.rs b/helix-view/src/gutter.rs index 665a78bcc6f0..c2cbc0da500e 100644 --- a/helix-view/src/gutter.rs +++ b/helix-view/src/gutter.rs @@ -1,6 +1,6 @@ use std::fmt::Write; -use helix_core::syntax::LanguageServerFeature; +use helix_core::syntax::config::LanguageServerFeature; use crate::{ editor::GutterType, @@ -334,7 +334,7 @@ mod tests { use crate::graphics::Rect; use crate::DocumentId; use arc_swap::ArcSwap; - use helix_core::Rope; + use helix_core::{syntax, Rope}; #[test] fn test_default_gutter_widths() { @@ -346,6 +346,7 @@ mod tests { rope, None, Arc::new(ArcSwap::new(Arc::new(Config::default()))), + Arc::new(ArcSwap::from_pointee(syntax::Loader::default())), ); assert_eq!(view.gutters.layout.len(), 5); @@ -371,6 +372,7 @@ mod tests { rope, None, Arc::new(ArcSwap::new(Arc::new(Config::default()))), + Arc::new(ArcSwap::from_pointee(syntax::Loader::default())), ); assert_eq!(view.gutters.layout.len(), 1); @@ -389,6 +391,7 @@ mod tests { rope, None, Arc::new(ArcSwap::new(Arc::new(Config::default()))), + Arc::new(ArcSwap::from_pointee(syntax::Loader::default())), ); assert_eq!(view.gutters.layout.len(), 2); @@ -411,6 +414,7 @@ mod tests { rope, None, Arc::new(ArcSwap::new(Arc::new(Config::default()))), + Arc::new(ArcSwap::from_pointee(syntax::Loader::default())), ); let rope = Rope::from_str("a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np"); @@ -418,6 +422,7 @@ mod tests { rope, None, Arc::new(ArcSwap::new(Arc::new(Config::default()))), + Arc::new(ArcSwap::from_pointee(syntax::Loader::default())), ); assert_eq!(view.gutters.layout.len(), 2); diff --git a/helix-view/src/theme.rs b/helix-view/src/theme.rs index af8f03bca050..6a18d321e19c 100644 --- a/helix-view/src/theme.rs +++ b/helix-view/src/theme.rs @@ -227,6 +227,7 @@ pub struct Theme { // tree-sitter highlight styles are stored in a Vec to optimize lookups scopes: Vec, highlights: Vec