From 9c4d567736dc4716a68dd8def4cd835cc807d983 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Thu, 20 Feb 2025 17:08:47 -0500 Subject: [PATCH 1/7] syntax: Move config types to a separate module --- helix-core/src/comment.rs | 3 +- helix-core/src/config.rs | 2 +- helix-core/src/indent.rs | 5 +- helix-core/src/movement.rs | 2 +- helix-core/src/syntax.rs | 619 +----------------- helix-core/src/syntax/config.rs | 616 +++++++++++++++++ helix-core/src/textobject.rs | 2 +- helix-core/tests/indent.rs | 2 +- helix-dap/src/client.rs | 2 +- helix-lsp/src/client.rs | 2 +- helix-lsp/src/lib.rs | 2 +- helix-term/src/commands.rs | 2 +- helix-term/src/commands/dap.rs | 2 +- helix-term/src/commands/lsp.rs | 2 +- helix-term/src/handlers/completion.rs | 2 +- helix-term/src/handlers/completion/request.rs | 2 +- helix-term/src/handlers/document_colors.rs | 2 +- helix-term/src/handlers/signature_help.rs | 2 +- helix-term/src/ui/mod.rs | 2 +- helix-term/tests/integration.rs | 2 +- helix-view/src/document.rs | 12 +- helix-view/src/editor.rs | 5 +- helix-view/src/gutter.rs | 2 +- xtask/src/helpers.rs | 2 +- 24 files changed, 657 insertions(+), 639 deletions(-) create mode 100644 helix-core/src/syntax/config.rs diff --git a/helix-core/src/comment.rs b/helix-core/src/comment.rs index 6bb1f300c213..5985cac7805a 100644 --- a/helix-core/src/comment.rs +++ b/helix-core/src/comment.rs @@ -4,7 +4,8 @@ use smallvec::SmallVec; use crate::{ - syntax::BlockCommentToken, Change, Range, Rope, RopeSlice, Selection, Tendril, Transaction, + syntax::config::BlockCommentToken, Change, Range, Rope, RopeSlice, Selection, Tendril, + Transaction, }; use helix_stdx::rope::RopeSliceExt; use std::borrow::Cow; diff --git a/helix-core/src/config.rs b/helix-core/src/config.rs index 27cd4e297e34..559aa2cb8492 100644 --- a/helix-core/src/config.rs +++ b/helix-core/src/config.rs @@ -1,4 +1,4 @@ -use crate::syntax::{Configuration, Loader, LoaderError}; +use crate::syntax::{config::Configuration, Loader, LoaderError}; /// Language configuration based on built-in languages.toml. pub fn default_lang_config() -> Configuration { diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs index 04ce9a28dd23..52369bb7bd87 100644 --- a/helix-core/src/indent.rs +++ b/helix-core/src/indent.rs @@ -6,7 +6,10 @@ use tree_sitter::{Query, QueryCursor, QueryPredicateArg}; use crate::{ chars::{char_is_line_ending, char_is_whitespace}, graphemes::{grapheme_width, tab_width_at}, - syntax::{IndentationHeuristic, LanguageConfiguration, RopeProvider, Syntax}, + syntax::{ + config::{IndentationHeuristic, LanguageConfiguration}, + RopeProvider, Syntax, + }, tree_sitter::Node, Position, Rope, RopeSlice, Tendril, }; diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs index e446d8cc425d..2a1fa94f29ff 100644 --- a/helix-core/src/movement.rs +++ b/helix-core/src/movement.rs @@ -13,7 +13,7 @@ use crate::{ }, line_ending::rope_is_line_ending, position::char_idx_at_visual_block_offset, - syntax::LanguageConfiguration, + syntax::config::LanguageConfiguration, text_annotations::TextAnnotations, textobject::TextObject, visual_offset_from_block, Range, RopeSlice, Selection, Syntax, diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index 677cdfa0b673..6a2c28d1e126 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -1,9 +1,8 @@ +pub mod config; mod tree_cursor; use crate::{ - auto_pairs::AutoPairs, chars::char_is_line_ending, - diagnostic::Severity, regex::Regex, transaction::{ChangeSet, Operation}, RopeSlice, Tendril, @@ -12,7 +11,7 @@ use crate::{ use ahash::RandomState; use arc_swap::{ArcSwap, Guard}; use bitflags::bitflags; -use globset::GlobSet; +use config::{Configuration, FileType, LanguageConfiguration, LanguageServerConfiguration}; use hashbrown::raw::RawTable; use helix_stdx::rope::{self, RopeSliceExt}; use slotmap::{DefaultKey as LayerId, HopSlotMap}; @@ -20,595 +19,20 @@ use slotmap::{DefaultKey as LayerId, HopSlotMap}; use std::{ borrow::Cow, cell::RefCell, - collections::{HashMap, HashSet, VecDeque}, - fmt::{self, Display, Write}, + collections::{HashMap, VecDeque}, + fmt::{self, Write}, hash::{Hash, Hasher}, mem::replace, - path::{Path, PathBuf}, - str::FromStr, + path::Path, sync::Arc, }; -use once_cell::sync::{Lazy, OnceCell}; -use serde::{ser::SerializeSeq, Deserialize, Serialize}; +use once_cell::sync::Lazy; use helix_loader::grammar::{get_language, load_runtime_file}; pub use tree_cursor::TreeCursor; -fn deserialize_regex<'de, D>(deserializer: D) -> Result, D::Error> -where - D: serde::Deserializer<'de>, -{ - Option::::deserialize(deserializer)? - .map(|buf| rope::Regex::new(&buf).map_err(serde::de::Error::custom)) - .transpose() -} - -fn deserialize_lsp_config<'de, D>(deserializer: D) -> Result, D::Error> -where - D: serde::Deserializer<'de>, -{ - Option::::deserialize(deserializer)? - .map(|toml| toml.try_into().map_err(serde::de::Error::custom)) - .transpose() -} - -fn deserialize_tab_width<'de, D>(deserializer: D) -> Result -where - D: serde::Deserializer<'de>, -{ - usize::deserialize(deserializer).and_then(|n| { - if n > 0 && n <= 16 { - Ok(n) - } else { - Err(serde::de::Error::custom( - "tab width must be a value from 1 to 16 inclusive", - )) - } - }) -} - -pub fn deserialize_auto_pairs<'de, D>(deserializer: D) -> Result, D::Error> -where - D: serde::Deserializer<'de>, -{ - Ok(Option::::deserialize(deserializer)?.and_then(AutoPairConfig::into)) -} - -fn default_timeout() -> u64 { - 20 -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct Configuration { - pub language: Vec, - #[serde(default)] - pub language_server: HashMap, -} - -// largely based on tree-sitter/cli/src/loader.rs -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case", deny_unknown_fields)] -pub struct LanguageConfiguration { - #[serde(rename = "name")] - pub language_id: String, // c-sharp, rust, tsx - #[serde(rename = "language-id")] - // see the table under https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocumentItem - pub language_server_language_id: Option, // csharp, rust, typescriptreact, for the language-server - pub scope: String, // source.rust - pub file_types: Vec, // filename extension or ends_with? - #[serde(default)] - pub shebangs: Vec, // interpreter(s) associated with language - #[serde(default)] - pub roots: Vec, // these indicate project roots <.git, Cargo.toml> - #[serde( - default, - skip_serializing, - deserialize_with = "from_comment_tokens", - alias = "comment-token" - )] - pub comment_tokens: Option>, - #[serde( - default, - skip_serializing, - deserialize_with = "from_block_comment_tokens" - )] - pub block_comment_tokens: Option>, - pub text_width: Option, - pub soft_wrap: Option, - - #[serde(default)] - pub auto_format: bool, - - #[serde(skip_serializing_if = "Option::is_none")] - pub formatter: Option, - - /// If set, overrides `editor.path-completion`. - pub path_completion: Option, - - #[serde(default)] - pub diagnostic_severity: Severity, - - pub grammar: Option, // tree-sitter grammar name, defaults to language_id - - // content_regex - #[serde(default, skip_serializing, deserialize_with = "deserialize_regex")] - pub injection_regex: Option, - // first_line_regex - // - #[serde(skip)] - pub(crate) highlight_config: OnceCell>>, - // tags_config OnceCell<> https://github.com/tree-sitter/tree-sitter/pull/583 - #[serde( - default, - skip_serializing_if = "Vec::is_empty", - serialize_with = "serialize_lang_features", - deserialize_with = "deserialize_lang_features" - )] - pub language_servers: Vec, - #[serde(skip_serializing_if = "Option::is_none")] - pub indent: Option, - - #[serde(skip)] - pub(crate) indent_query: OnceCell>, - #[serde(skip)] - pub(crate) textobject_query: OnceCell>, - #[serde(skip_serializing_if = "Option::is_none")] - pub debugger: Option, - - /// Automatic insertion of pairs to parentheses, brackets, - /// etc. Defaults to true. Optionally, this can be a list of 2-tuples - /// to specify a list of characters to pair. This overrides the - /// global setting. - #[serde(default, skip_serializing, deserialize_with = "deserialize_auto_pairs")] - pub auto_pairs: Option, - - pub rulers: Option>, // if set, override editor's rulers - - /// Hardcoded LSP root directories relative to the workspace root, like `examples` or `tools/fuzz`. - /// Falling back to the current working directory if none are configured. - pub workspace_lsp_roots: Option>, - #[serde(default)] - pub persistent_diagnostic_sources: Vec, -} - -#[derive(Debug, PartialEq, Eq, Hash)] -pub enum FileType { - /// The extension of the file, either the `Path::extension` or the full - /// filename if the file does not have an extension. - Extension(String), - /// A Unix-style path glob. This is compared to the file's absolute path, so - /// it can be used to detect files based on their directories. If the glob - /// is not an absolute path and does not already start with a glob pattern, - /// a glob pattern will be prepended to it. - Glob(globset::Glob), -} - -impl Serialize for FileType { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - use serde::ser::SerializeMap; - - match self { - FileType::Extension(extension) => serializer.serialize_str(extension), - FileType::Glob(glob) => { - let mut map = serializer.serialize_map(Some(1))?; - map.serialize_entry("glob", glob.glob())?; - map.end() - } - } - } -} - -impl<'de> Deserialize<'de> for FileType { - fn deserialize(deserializer: D) -> Result - where - D: serde::de::Deserializer<'de>, - { - struct FileTypeVisitor; - - impl<'de> serde::de::Visitor<'de> for FileTypeVisitor { - type Value = FileType; - - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { - formatter.write_str("string or table") - } - - fn visit_str(self, value: &str) -> Result - where - E: serde::de::Error, - { - Ok(FileType::Extension(value.to_string())) - } - - fn visit_map(self, mut map: M) -> Result - where - M: serde::de::MapAccess<'de>, - { - match map.next_entry::()? { - Some((key, mut glob)) if key == "glob" => { - // If the glob isn't an absolute path or already starts - // with a glob pattern, add a leading glob so we - // properly match relative paths. - if !glob.starts_with('/') && !glob.starts_with("*/") { - glob.insert_str(0, "*/"); - } - - globset::Glob::new(glob.as_str()) - .map(FileType::Glob) - .map_err(|err| { - serde::de::Error::custom(format!("invalid `glob` pattern: {}", err)) - }) - } - Some((key, _value)) => Err(serde::de::Error::custom(format!( - "unknown key in `file-types` list: {}", - key - ))), - None => Err(serde::de::Error::custom( - "expected a `suffix` key in the `file-types` entry", - )), - } - } - } - - deserializer.deserialize_any(FileTypeVisitor) - } -} - -fn from_comment_tokens<'de, D>(deserializer: D) -> Result>, D::Error> -where - D: serde::Deserializer<'de>, -{ - #[derive(Deserialize)] - #[serde(untagged)] - enum CommentTokens { - Multiple(Vec), - Single(String), - } - Ok( - Option::::deserialize(deserializer)?.map(|tokens| match tokens { - CommentTokens::Single(val) => vec![val], - CommentTokens::Multiple(vals) => vals, - }), - ) -} - -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct BlockCommentToken { - pub start: String, - pub end: String, -} - -impl Default for BlockCommentToken { - fn default() -> Self { - BlockCommentToken { - start: "/*".to_string(), - end: "*/".to_string(), - } - } -} - -fn from_block_comment_tokens<'de, D>( - deserializer: D, -) -> Result>, D::Error> -where - D: serde::Deserializer<'de>, -{ - #[derive(Deserialize)] - #[serde(untagged)] - enum BlockCommentTokens { - Multiple(Vec), - Single(BlockCommentToken), - } - Ok( - Option::::deserialize(deserializer)?.map(|tokens| match tokens { - BlockCommentTokens::Single(val) => vec![val], - BlockCommentTokens::Multiple(vals) => vals, - }), - ) -} - -#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)] -#[serde(rename_all = "kebab-case")] -pub enum LanguageServerFeature { - Format, - GotoDeclaration, - GotoDefinition, - GotoTypeDefinition, - GotoReference, - GotoImplementation, - // Goto, use bitflags, combining previous Goto members? - SignatureHelp, - Hover, - DocumentHighlight, - Completion, - CodeAction, - WorkspaceCommand, - DocumentSymbols, - WorkspaceSymbols, - // Symbols, use bitflags, see above? - Diagnostics, - RenameSymbol, - InlayHints, - DocumentColors, -} - -impl Display for LanguageServerFeature { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - use LanguageServerFeature::*; - let feature = match self { - Format => "format", - GotoDeclaration => "goto-declaration", - GotoDefinition => "goto-definition", - GotoTypeDefinition => "goto-type-definition", - GotoReference => "goto-reference", - GotoImplementation => "goto-implementation", - SignatureHelp => "signature-help", - Hover => "hover", - DocumentHighlight => "document-highlight", - Completion => "completion", - CodeAction => "code-action", - WorkspaceCommand => "workspace-command", - DocumentSymbols => "document-symbols", - WorkspaceSymbols => "workspace-symbols", - Diagnostics => "diagnostics", - RenameSymbol => "rename-symbol", - InlayHints => "inlay-hints", - DocumentColors => "document-colors", - }; - write!(f, "{feature}",) - } -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)] -enum LanguageServerFeatureConfiguration { - #[serde(rename_all = "kebab-case")] - Features { - #[serde(default, skip_serializing_if = "HashSet::is_empty")] - only_features: HashSet, - #[serde(default, skip_serializing_if = "HashSet::is_empty")] - except_features: HashSet, - name: String, - }, - Simple(String), -} - -#[derive(Debug, Default)] -pub struct LanguageServerFeatures { - pub name: String, - pub only: HashSet, - pub excluded: HashSet, -} - -impl LanguageServerFeatures { - pub fn has_feature(&self, feature: LanguageServerFeature) -> bool { - (self.only.is_empty() || self.only.contains(&feature)) && !self.excluded.contains(&feature) - } -} - -fn deserialize_lang_features<'de, D>( - deserializer: D, -) -> Result, D::Error> -where - D: serde::Deserializer<'de>, -{ - let raw: Vec = Deserialize::deserialize(deserializer)?; - let res = raw - .into_iter() - .map(|config| match config { - LanguageServerFeatureConfiguration::Simple(name) => LanguageServerFeatures { - name, - ..Default::default() - }, - LanguageServerFeatureConfiguration::Features { - only_features, - except_features, - name, - } => LanguageServerFeatures { - name, - only: only_features, - excluded: except_features, - }, - }) - .collect(); - Ok(res) -} -fn serialize_lang_features( - map: &Vec, - serializer: S, -) -> Result -where - S: serde::Serializer, -{ - let mut serializer = serializer.serialize_seq(Some(map.len()))?; - for features in map { - let features = if features.only.is_empty() && features.excluded.is_empty() { - LanguageServerFeatureConfiguration::Simple(features.name.to_owned()) - } else { - LanguageServerFeatureConfiguration::Features { - only_features: features.only.clone(), - except_features: features.excluded.clone(), - name: features.name.to_owned(), - } - }; - serializer.serialize_element(&features)?; - } - serializer.end() -} - -fn deserialize_required_root_patterns<'de, D>(deserializer: D) -> Result, D::Error> -where - D: serde::Deserializer<'de>, -{ - let patterns = Vec::::deserialize(deserializer)?; - if patterns.is_empty() { - return Ok(None); - } - let mut builder = globset::GlobSetBuilder::new(); - for pattern in patterns { - let glob = globset::Glob::new(&pattern).map_err(serde::de::Error::custom)?; - builder.add(glob); - } - builder.build().map(Some).map_err(serde::de::Error::custom) -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct LanguageServerConfiguration { - pub command: String, - #[serde(default)] - #[serde(skip_serializing_if = "Vec::is_empty")] - pub args: Vec, - #[serde(default, skip_serializing_if = "HashMap::is_empty")] - pub environment: HashMap, - #[serde(default, skip_serializing, deserialize_with = "deserialize_lsp_config")] - pub config: Option, - #[serde(default = "default_timeout")] - pub timeout: u64, - #[serde( - default, - skip_serializing, - deserialize_with = "deserialize_required_root_patterns" - )] - pub required_root_patterns: Option, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct FormatterConfiguration { - pub command: String, - #[serde(default)] - #[serde(skip_serializing_if = "Vec::is_empty")] - pub args: Vec, -} - -#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] -#[serde(rename_all = "kebab-case")] -pub struct AdvancedCompletion { - pub name: Option, - pub completion: Option, - pub default: Option, -} - -#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] -#[serde(rename_all = "kebab-case", untagged)] -pub enum DebugConfigCompletion { - Named(String), - Advanced(AdvancedCompletion), -} - -#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] -#[serde(untagged)] -pub enum DebugArgumentValue { - String(String), - Array(Vec), - Boolean(bool), -} - -#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] -#[serde(rename_all = "kebab-case")] -pub struct DebugTemplate { - pub name: String, - pub request: String, - #[serde(default)] - pub completion: Vec, - pub args: HashMap, -} - -#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] -#[serde(rename_all = "kebab-case")] -pub struct DebugAdapterConfig { - pub name: String, - pub transport: String, - #[serde(default)] - pub command: String, - #[serde(default)] - pub args: Vec, - pub port_arg: Option, - pub templates: Vec, - #[serde(default)] - pub quirks: DebuggerQuirks, -} - -// Different workarounds for adapters' differences -#[derive(Debug, Default, PartialEq, Eq, Clone, Serialize, Deserialize)] -pub struct DebuggerQuirks { - #[serde(default)] - pub absolute_paths: bool, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct IndentationConfiguration { - #[serde(deserialize_with = "deserialize_tab_width")] - pub tab_width: usize, - pub unit: String, -} - -/// How the indentation for a newly inserted line should be determined. -/// If the selected heuristic is not available (e.g. because the current -/// language has no tree-sitter indent queries), a simpler one will be used. -#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub enum IndentationHeuristic { - /// Just copy the indentation of the line that the cursor is currently on. - Simple, - /// Use tree-sitter indent queries to compute the expected absolute indentation level of the new line. - TreeSitter, - /// Use tree-sitter indent queries to compute the expected difference in indentation between the new line - /// and the line before. Add this to the actual indentation level of the line before. - #[default] - Hybrid, -} - -/// Configuration for auto pairs -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case", deny_unknown_fields, untagged)] -pub enum AutoPairConfig { - /// Enables or disables auto pairing. False means disabled. True means to use the default pairs. - Enable(bool), - - /// The mappings of pairs. - Pairs(HashMap), -} - -impl Default for AutoPairConfig { - fn default() -> Self { - AutoPairConfig::Enable(true) - } -} - -impl From<&AutoPairConfig> for Option { - fn from(auto_pair_config: &AutoPairConfig) -> Self { - match auto_pair_config { - AutoPairConfig::Enable(false) => None, - AutoPairConfig::Enable(true) => Some(AutoPairs::default()), - AutoPairConfig::Pairs(pairs) => Some(AutoPairs::new(pairs.iter())), - } - } -} - -impl From for Option { - fn from(auto_pairs_config: AutoPairConfig) -> Self { - (&auto_pairs_config).into() - } -} - -impl FromStr for AutoPairConfig { - type Err = std::str::ParseBoolError; - - // only do bool parsing for runtime setting - fn from_str(s: &str) -> Result { - let enable: bool = s.parse()?; - Ok(AutoPairConfig::Enable(enable)) - } -} - #[derive(Debug)] pub struct TextObjectQuery { pub query: Query, @@ -743,7 +167,7 @@ pub fn read_query(language: &str, filename: &str) -> String { .to_string() } -impl LanguageConfiguration { +impl config::LanguageConfiguration { fn initialize_highlight(&self, scopes: &[String]) -> Option> { let highlights_query = read_query(&self.language_id, "highlights.scm"); // always highlight syntax errors @@ -831,35 +255,6 @@ impl LanguageConfiguration { .ok() } } -#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] -#[serde(default, rename_all = "kebab-case", deny_unknown_fields)] -pub struct SoftWrap { - /// Soft wrap lines that exceed viewport width. Default to off - // NOTE: Option on purpose because the struct is shared between language config and global config. - // By default the option is None so that the language config falls back to the global config unless explicitly set. - pub enable: Option, - /// Maximum space left free at the end of the line. - /// This space is used to wrap text at word boundaries. If that is not possible within this limit - /// the word is simply split at the end of the line. - /// - /// This is automatically hard-limited to a quarter of the viewport to ensure correct display on small views. - /// - /// Default to 20 - pub max_wrap: Option, - /// Maximum number of indentation that can be carried over from the previous line when softwrapping. - /// If a line is indented further then this limit it is rendered at the start of the viewport instead. - /// - /// This is automatically hard-limited to a quarter of the viewport to ensure correct display on small views. - /// - /// Default to 40 - pub max_indent_retain: Option, - /// Indicator placed at the beginning of softwrapped lines - /// - /// Defaults to ↪ - pub wrap_indicator: Option, - /// Softwrap at `text_width` instead of viewport width if it is shorter - pub wrap_at_text_width: Option, -} #[derive(Debug)] struct FileTypeGlob { diff --git a/helix-core/src/syntax/config.rs b/helix-core/src/syntax/config.rs new file mode 100644 index 000000000000..f73103c29ebf --- /dev/null +++ b/helix-core/src/syntax/config.rs @@ -0,0 +1,616 @@ +use crate::{auto_pairs::AutoPairs, diagnostic::Severity}; + +use globset::GlobSet; +use helix_stdx::rope; +use once_cell::sync::OnceCell; +use serde::{ser::SerializeSeq as _, Deserialize, Serialize}; + +use std::{ + collections::{HashMap, HashSet}, + fmt::{self, Display}, + path::PathBuf, + str::FromStr, + sync::Arc, +}; + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct Configuration { + pub language: Vec, + #[serde(default)] + pub language_server: HashMap, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case", deny_unknown_fields)] +pub struct LanguageConfiguration { + #[serde(rename = "name")] + pub language_id: String, // c-sharp, rust, tsx + #[serde(rename = "language-id")] + // see the table under https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocumentItem + pub language_server_language_id: Option, // csharp, rust, typescriptreact, for the language-server + pub scope: String, // source.rust + pub file_types: Vec, // filename extension or ends_with? + #[serde(default)] + pub shebangs: Vec, // interpreter(s) associated with language + #[serde(default)] + pub roots: Vec, // these indicate project roots <.git, Cargo.toml> + #[serde( + default, + skip_serializing, + deserialize_with = "from_comment_tokens", + alias = "comment-token" + )] + pub comment_tokens: Option>, + #[serde( + default, + skip_serializing, + deserialize_with = "from_block_comment_tokens" + )] + pub block_comment_tokens: Option>, + pub text_width: Option, + pub soft_wrap: Option, + + #[serde(default)] + pub auto_format: bool, + + #[serde(skip_serializing_if = "Option::is_none")] + pub formatter: Option, + + /// If set, overrides `editor.path-completion`. + pub path_completion: Option, + + #[serde(default)] + pub diagnostic_severity: Severity, + + pub grammar: Option, // tree-sitter grammar name, defaults to language_id + + // content_regex + #[serde(default, skip_serializing, deserialize_with = "deserialize_regex")] + pub injection_regex: Option, + // first_line_regex + // + #[serde(skip)] + pub(crate) highlight_config: OnceCell>>, + // tags_config OnceCell<> https://github.com/tree-sitter/tree-sitter/pull/583 + #[serde( + default, + skip_serializing_if = "Vec::is_empty", + serialize_with = "serialize_lang_features", + deserialize_with = "deserialize_lang_features" + )] + pub language_servers: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub indent: Option, + + #[serde(skip)] + pub(crate) indent_query: OnceCell>, + #[serde(skip)] + pub(crate) textobject_query: OnceCell>, + #[serde(skip_serializing_if = "Option::is_none")] + pub debugger: Option, + + /// Automatic insertion of pairs to parentheses, brackets, + /// etc. Defaults to true. Optionally, this can be a list of 2-tuples + /// to specify a list of characters to pair. This overrides the + /// global setting. + #[serde(default, skip_serializing, deserialize_with = "deserialize_auto_pairs")] + pub auto_pairs: Option, + + pub rulers: Option>, // if set, override editor's rulers + + /// Hardcoded LSP root directories relative to the workspace root, like `examples` or `tools/fuzz`. + /// Falling back to the current working directory if none are configured. + pub workspace_lsp_roots: Option>, + #[serde(default)] + pub persistent_diagnostic_sources: Vec, +} + +#[derive(Debug, PartialEq, Eq, Hash)] +pub enum FileType { + /// The extension of the file, either the `Path::extension` or the full + /// filename if the file does not have an extension. + Extension(String), + /// A Unix-style path glob. This is compared to the file's absolute path, so + /// it can be used to detect files based on their directories. If the glob + /// is not an absolute path and does not already start with a glob pattern, + /// a glob pattern will be prepended to it. + Glob(globset::Glob), +} + +impl Serialize for FileType { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + use serde::ser::SerializeMap; + + match self { + FileType::Extension(extension) => serializer.serialize_str(extension), + FileType::Glob(glob) => { + let mut map = serializer.serialize_map(Some(1))?; + map.serialize_entry("glob", glob.glob())?; + map.end() + } + } + } +} + +impl<'de> Deserialize<'de> for FileType { + fn deserialize(deserializer: D) -> Result + where + D: serde::de::Deserializer<'de>, + { + struct FileTypeVisitor; + + impl<'de> serde::de::Visitor<'de> for FileTypeVisitor { + type Value = FileType; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("string or table") + } + + fn visit_str(self, value: &str) -> Result + where + E: serde::de::Error, + { + Ok(FileType::Extension(value.to_string())) + } + + fn visit_map(self, mut map: M) -> Result + where + M: serde::de::MapAccess<'de>, + { + match map.next_entry::()? { + Some((key, mut glob)) if key == "glob" => { + // If the glob isn't an absolute path or already starts + // with a glob pattern, add a leading glob so we + // properly match relative paths. + if !glob.starts_with('/') && !glob.starts_with("*/") { + glob.insert_str(0, "*/"); + } + + globset::Glob::new(glob.as_str()) + .map(FileType::Glob) + .map_err(|err| { + serde::de::Error::custom(format!("invalid `glob` pattern: {}", err)) + }) + } + Some((key, _value)) => Err(serde::de::Error::custom(format!( + "unknown key in `file-types` list: {}", + key + ))), + None => Err(serde::de::Error::custom( + "expected a `suffix` key in the `file-types` entry", + )), + } + } + } + + deserializer.deserialize_any(FileTypeVisitor) + } +} + +fn from_comment_tokens<'de, D>(deserializer: D) -> Result>, D::Error> +where + D: serde::Deserializer<'de>, +{ + #[derive(Deserialize)] + #[serde(untagged)] + enum CommentTokens { + Multiple(Vec), + Single(String), + } + Ok( + Option::::deserialize(deserializer)?.map(|tokens| match tokens { + CommentTokens::Single(val) => vec![val], + CommentTokens::Multiple(vals) => vals, + }), + ) +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct BlockCommentToken { + pub start: String, + pub end: String, +} + +impl Default for BlockCommentToken { + fn default() -> Self { + BlockCommentToken { + start: "/*".to_string(), + end: "*/".to_string(), + } + } +} + +fn from_block_comment_tokens<'de, D>( + deserializer: D, +) -> Result>, D::Error> +where + D: serde::Deserializer<'de>, +{ + #[derive(Deserialize)] + #[serde(untagged)] + enum BlockCommentTokens { + Multiple(Vec), + Single(BlockCommentToken), + } + Ok( + Option::::deserialize(deserializer)?.map(|tokens| match tokens { + BlockCommentTokens::Single(val) => vec![val], + BlockCommentTokens::Multiple(vals) => vals, + }), + ) +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "kebab-case")] +pub enum LanguageServerFeature { + Format, + GotoDeclaration, + GotoDefinition, + GotoTypeDefinition, + GotoReference, + GotoImplementation, + // Goto, use bitflags, combining previous Goto members? + SignatureHelp, + Hover, + DocumentHighlight, + Completion, + CodeAction, + WorkspaceCommand, + DocumentSymbols, + WorkspaceSymbols, + // Symbols, use bitflags, see above? + Diagnostics, + RenameSymbol, + InlayHints, + DocumentColors, +} + +impl Display for LanguageServerFeature { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use LanguageServerFeature::*; + let feature = match self { + Format => "format", + GotoDeclaration => "goto-declaration", + GotoDefinition => "goto-definition", + GotoTypeDefinition => "goto-type-definition", + GotoReference => "goto-reference", + GotoImplementation => "goto-implementation", + SignatureHelp => "signature-help", + Hover => "hover", + DocumentHighlight => "document-highlight", + Completion => "completion", + CodeAction => "code-action", + WorkspaceCommand => "workspace-command", + DocumentSymbols => "document-symbols", + WorkspaceSymbols => "workspace-symbols", + Diagnostics => "diagnostics", + RenameSymbol => "rename-symbol", + InlayHints => "inlay-hints", + DocumentColors => "document-colors", + }; + write!(f, "{feature}",) + } +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)] +enum LanguageServerFeatureConfiguration { + #[serde(rename_all = "kebab-case")] + Features { + #[serde(default, skip_serializing_if = "HashSet::is_empty")] + only_features: HashSet, + #[serde(default, skip_serializing_if = "HashSet::is_empty")] + except_features: HashSet, + name: String, + }, + Simple(String), +} + +#[derive(Debug, Default)] +pub struct LanguageServerFeatures { + pub name: String, + pub only: HashSet, + pub excluded: HashSet, +} + +impl LanguageServerFeatures { + pub fn has_feature(&self, feature: LanguageServerFeature) -> bool { + (self.only.is_empty() || self.only.contains(&feature)) && !self.excluded.contains(&feature) + } +} + +fn deserialize_lang_features<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + let raw: Vec = Deserialize::deserialize(deserializer)?; + let res = raw + .into_iter() + .map(|config| match config { + LanguageServerFeatureConfiguration::Simple(name) => LanguageServerFeatures { + name, + ..Default::default() + }, + LanguageServerFeatureConfiguration::Features { + only_features, + except_features, + name, + } => LanguageServerFeatures { + name, + only: only_features, + excluded: except_features, + }, + }) + .collect(); + Ok(res) +} +fn serialize_lang_features( + map: &Vec, + serializer: S, +) -> Result +where + S: serde::Serializer, +{ + let mut serializer = serializer.serialize_seq(Some(map.len()))?; + for features in map { + let features = if features.only.is_empty() && features.excluded.is_empty() { + LanguageServerFeatureConfiguration::Simple(features.name.to_owned()) + } else { + LanguageServerFeatureConfiguration::Features { + only_features: features.only.clone(), + except_features: features.excluded.clone(), + name: features.name.to_owned(), + } + }; + serializer.serialize_element(&features)?; + } + serializer.end() +} + +fn deserialize_required_root_patterns<'de, D>(deserializer: D) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + let patterns = Vec::::deserialize(deserializer)?; + if patterns.is_empty() { + return Ok(None); + } + let mut builder = globset::GlobSetBuilder::new(); + for pattern in patterns { + let glob = globset::Glob::new(&pattern).map_err(serde::de::Error::custom)?; + builder.add(glob); + } + builder.build().map(Some).map_err(serde::de::Error::custom) +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct LanguageServerConfiguration { + pub command: String, + #[serde(default)] + #[serde(skip_serializing_if = "Vec::is_empty")] + pub args: Vec, + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub environment: HashMap, + #[serde(default, skip_serializing, deserialize_with = "deserialize_lsp_config")] + pub config: Option, + #[serde(default = "default_timeout")] + pub timeout: u64, + #[serde( + default, + skip_serializing, + deserialize_with = "deserialize_required_root_patterns" + )] + pub required_root_patterns: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct FormatterConfiguration { + pub command: String, + #[serde(default)] + #[serde(skip_serializing_if = "Vec::is_empty")] + pub args: Vec, +} + +#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] +#[serde(rename_all = "kebab-case")] +pub struct AdvancedCompletion { + pub name: Option, + pub completion: Option, + pub default: Option, +} + +#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] +#[serde(rename_all = "kebab-case", untagged)] +pub enum DebugConfigCompletion { + Named(String), + Advanced(AdvancedCompletion), +} + +#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] +#[serde(untagged)] +pub enum DebugArgumentValue { + String(String), + Array(Vec), + Boolean(bool), +} + +#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] +#[serde(rename_all = "kebab-case")] +pub struct DebugTemplate { + pub name: String, + pub request: String, + #[serde(default)] + pub completion: Vec, + pub args: HashMap, +} + +#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] +#[serde(rename_all = "kebab-case")] +pub struct DebugAdapterConfig { + pub name: String, + pub transport: String, + #[serde(default)] + pub command: String, + #[serde(default)] + pub args: Vec, + pub port_arg: Option, + pub templates: Vec, + #[serde(default)] + pub quirks: DebuggerQuirks, +} + +// Different workarounds for adapters' differences +#[derive(Debug, Default, PartialEq, Eq, Clone, Serialize, Deserialize)] +pub struct DebuggerQuirks { + #[serde(default)] + pub absolute_paths: bool, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct IndentationConfiguration { + #[serde(deserialize_with = "deserialize_tab_width")] + pub tab_width: usize, + pub unit: String, +} + +/// How the indentation for a newly inserted line should be determined. +/// If the selected heuristic is not available (e.g. because the current +/// language has no tree-sitter indent queries), a simpler one will be used. +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum IndentationHeuristic { + /// Just copy the indentation of the line that the cursor is currently on. + Simple, + /// Use tree-sitter indent queries to compute the expected absolute indentation level of the new line. + TreeSitter, + /// Use tree-sitter indent queries to compute the expected difference in indentation between the new line + /// and the line before. Add this to the actual indentation level of the line before. + #[default] + Hybrid, +} + +/// Configuration for auto pairs +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case", deny_unknown_fields, untagged)] +pub enum AutoPairConfig { + /// Enables or disables auto pairing. False means disabled. True means to use the default pairs. + Enable(bool), + + /// The mappings of pairs. + Pairs(HashMap), +} + +impl Default for AutoPairConfig { + fn default() -> Self { + AutoPairConfig::Enable(true) + } +} + +impl From<&AutoPairConfig> for Option { + fn from(auto_pair_config: &AutoPairConfig) -> Self { + match auto_pair_config { + AutoPairConfig::Enable(false) => None, + AutoPairConfig::Enable(true) => Some(AutoPairs::default()), + AutoPairConfig::Pairs(pairs) => Some(AutoPairs::new(pairs.iter())), + } + } +} + +impl From for Option { + fn from(auto_pairs_config: AutoPairConfig) -> Self { + (&auto_pairs_config).into() + } +} + +impl FromStr for AutoPairConfig { + type Err = std::str::ParseBoolError; + + // only do bool parsing for runtime setting + fn from_str(s: &str) -> Result { + let enable: bool = s.parse()?; + Ok(AutoPairConfig::Enable(enable)) + } +} + +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(default, rename_all = "kebab-case", deny_unknown_fields)] +pub struct SoftWrap { + /// Soft wrap lines that exceed viewport width. Default to off + // NOTE: Option on purpose because the struct is shared between language config and global config. + // By default the option is None so that the language config falls back to the global config unless explicitly set. + pub enable: Option, + /// Maximum space left free at the end of the line. + /// This space is used to wrap text at word boundaries. If that is not possible within this limit + /// the word is simply split at the end of the line. + /// + /// This is automatically hard-limited to a quarter of the viewport to ensure correct display on small views. + /// + /// Default to 20 + pub max_wrap: Option, + /// Maximum number of indentation that can be carried over from the previous line when softwrapping. + /// If a line is indented further then this limit it is rendered at the start of the viewport instead. + /// + /// This is automatically hard-limited to a quarter of the viewport to ensure correct display on small views. + /// + /// Default to 40 + pub max_indent_retain: Option, + /// Indicator placed at the beginning of softwrapped lines + /// + /// Defaults to ↪ + pub wrap_indicator: Option, + /// Softwrap at `text_width` instead of viewport width if it is shorter + pub wrap_at_text_width: Option, +} + +fn deserialize_regex<'de, D>(deserializer: D) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + Option::::deserialize(deserializer)? + .map(|buf| rope::Regex::new(&buf).map_err(serde::de::Error::custom)) + .transpose() +} + +fn deserialize_lsp_config<'de, D>(deserializer: D) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + Option::::deserialize(deserializer)? + .map(|toml| toml.try_into().map_err(serde::de::Error::custom)) + .transpose() +} + +fn deserialize_tab_width<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + usize::deserialize(deserializer).and_then(|n| { + if n > 0 && n <= 16 { + Ok(n) + } else { + Err(serde::de::Error::custom( + "tab width must be a value from 1 to 16 inclusive", + )) + } + }) +} + +pub fn deserialize_auto_pairs<'de, D>(deserializer: D) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + Ok(Option::::deserialize(deserializer)?.and_then(AutoPairConfig::into)) +} + +fn default_timeout() -> u64 { + 20 +} diff --git a/helix-core/src/textobject.rs b/helix-core/src/textobject.rs index 7576b3a78668..9015e957c070 100644 --- a/helix-core/src/textobject.rs +++ b/helix-core/src/textobject.rs @@ -7,7 +7,7 @@ use crate::chars::{categorize_char, char_is_whitespace, CharCategory}; use crate::graphemes::{next_grapheme_boundary, prev_grapheme_boundary}; use crate::line_ending::rope_is_line_ending; use crate::movement::Direction; -use crate::syntax::LanguageConfiguration; +use crate::syntax::config::LanguageConfiguration; use crate::Range; use crate::{surround, Syntax}; diff --git a/helix-core/tests/indent.rs b/helix-core/tests/indent.rs index 56b4d2ba966e..b41b2f64a33c 100644 --- a/helix-core/tests/indent.rs +++ b/helix-core/tests/indent.rs @@ -1,7 +1,7 @@ use arc_swap::ArcSwap; use helix_core::{ indent::{indent_level_for_line, treesitter_indent_for_pos, IndentStyle}, - syntax::{Configuration, Loader}, + syntax::{config::Configuration, Loader}, Syntax, }; use helix_stdx::rope::RopeSliceExt; diff --git a/helix-dap/src/client.rs b/helix-dap/src/client.rs index 6aa656e17006..1529b6f93cff 100644 --- a/helix-dap/src/client.rs +++ b/helix-dap/src/client.rs @@ -4,7 +4,7 @@ use crate::{ types::*, Error, Result, }; -use helix_core::syntax::DebuggerQuirks; +use helix_core::syntax::config::DebuggerQuirks; use serde_json::Value; diff --git a/helix-lsp/src/client.rs b/helix-lsp/src/client.rs index f2b78a118ae9..83799ac75422 100644 --- a/helix-lsp/src/client.rs +++ b/helix-lsp/src/client.rs @@ -10,7 +10,7 @@ use crate::lsp::{ DidChangeWorkspaceFoldersParams, OneOf, PositionEncodingKind, SignatureHelp, Url, WorkspaceFolder, WorkspaceFoldersChangeEvent, }; -use helix_core::{find_workspace, syntax::LanguageServerFeature, ChangeSet, Rope}; +use helix_core::{find_workspace, syntax::config::LanguageServerFeature, ChangeSet, Rope}; use helix_loader::VERSION_AND_GIT_HASH; use helix_stdx::path; use parking_lot::Mutex; diff --git a/helix-lsp/src/lib.rs b/helix-lsp/src/lib.rs index ba41cbc5aa3c..0c89ee79b501 100644 --- a/helix-lsp/src/lib.rs +++ b/helix-lsp/src/lib.rs @@ -12,7 +12,7 @@ pub use jsonrpc::Call; pub use lsp::{Position, Url}; use futures_util::stream::select_all::SelectAll; -use helix_core::syntax::{ +use helix_core::syntax::config::{ LanguageConfiguration, LanguageServerConfiguration, LanguageServerFeatures, }; use helix_stdx::path; diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 2e15dcdcc77c..ffeb7e37ca33 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -34,7 +34,7 @@ use helix_core::{ regex::{self, Regex}, search::{self, CharMatcher}, selection, surround, - syntax::{BlockCommentToken, LanguageServerFeature}, + syntax::config::{BlockCommentToken, LanguageServerFeature}, text_annotations::{Overlay, TextAnnotations}, textobject, unicode::width::UnicodeWidthChar, diff --git a/helix-term/src/commands/dap.rs b/helix-term/src/commands/dap.rs index 83dd936cdff2..4f20af4afed4 100644 --- a/helix-term/src/commands/dap.rs +++ b/helix-term/src/commands/dap.rs @@ -5,7 +5,7 @@ use crate::{ ui::{self, overlay::overlaid, Picker, Popup, Prompt, PromptEvent, Text}, }; use dap::{StackFrame, Thread, ThreadStates}; -use helix_core::syntax::{DebugArgumentValue, DebugConfigCompletion, DebugTemplate}; +use helix_core::syntax::config::{DebugArgumentValue, DebugConfigCompletion, DebugTemplate}; use helix_dap::{self as dap, Client}; use helix_lsp::block_on; use helix_view::editor::Breakpoint; diff --git a/helix-term/src/commands/lsp.rs b/helix-term/src/commands/lsp.rs index 8377f7c71773..9c55c830c30a 100644 --- a/helix-term/src/commands/lsp.rs +++ b/helix-term/src/commands/lsp.rs @@ -14,7 +14,7 @@ use tui::{text::Span, widgets::Row}; use super::{align_view, push_jump, Align, Context, Editor}; use helix_core::{ - diagnostic::DiagnosticProvider, syntax::LanguageServerFeature, + diagnostic::DiagnosticProvider, syntax::config::LanguageServerFeature, text_annotations::InlineAnnotation, Selection, Uri, }; use helix_stdx::path; diff --git a/helix-term/src/handlers/completion.rs b/helix-term/src/handlers/completion.rs index 20fac514e170..5017399bd078 100644 --- a/helix-term/src/handlers/completion.rs +++ b/helix-term/src/handlers/completion.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use helix_core::chars::char_is_word; use helix_core::completion::CompletionProvider; -use helix_core::syntax::LanguageServerFeature; +use helix_core::syntax::config::LanguageServerFeature; use helix_event::{register_hook, TaskHandle}; use helix_lsp::lsp; use helix_stdx::rope::RopeSliceExt; diff --git a/helix-term/src/handlers/completion/request.rs b/helix-term/src/handlers/completion/request.rs index 26f252a4a9bb..51a3129a8498 100644 --- a/helix-term/src/handlers/completion/request.rs +++ b/helix-term/src/handlers/completion/request.rs @@ -5,7 +5,7 @@ use std::time::Duration; use arc_swap::ArcSwap; use futures_util::Future; use helix_core::completion::CompletionProvider; -use helix_core::syntax::LanguageServerFeature; +use helix_core::syntax::config::LanguageServerFeature; use helix_event::{cancelable_future, TaskController, TaskHandle}; use helix_lsp::lsp; use helix_lsp::lsp::{CompletionContext, CompletionTriggerKind}; diff --git a/helix-term/src/handlers/document_colors.rs b/helix-term/src/handlers/document_colors.rs index 956cecbfbb31..f46ef2ac1f5d 100644 --- a/helix-term/src/handlers/document_colors.rs +++ b/helix-term/src/handlers/document_colors.rs @@ -1,7 +1,7 @@ use std::{collections::HashSet, time::Duration}; use futures_util::{stream::FuturesOrdered, StreamExt}; -use helix_core::{syntax::LanguageServerFeature, text_annotations::InlineAnnotation}; +use helix_core::{syntax::config::LanguageServerFeature, text_annotations::InlineAnnotation}; use helix_event::{cancelable_future, register_hook}; use helix_lsp::lsp; use helix_view::{ diff --git a/helix-term/src/handlers/signature_help.rs b/helix-term/src/handlers/signature_help.rs index 33c9e16ce6b8..8a0c9754c6f0 100644 --- a/helix-term/src/handlers/signature_help.rs +++ b/helix-term/src/handlers/signature_help.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use std::time::Duration; -use helix_core::syntax::LanguageServerFeature; +use helix_core::syntax::config::LanguageServerFeature; use helix_event::{cancelable_future, register_hook, send_blocking, TaskController, TaskHandle}; use helix_lsp::lsp::{self, SignatureInformation}; use helix_stdx::rope::RopeSliceExt; diff --git a/helix-term/src/ui/mod.rs b/helix-term/src/ui/mod.rs index a76adbe211d8..5b13263bbf11 100644 --- a/helix-term/src/ui/mod.rs +++ b/helix-term/src/ui/mod.rs @@ -372,7 +372,7 @@ pub mod completers { use super::Utf8PathBuf; use crate::ui::prompt::Completion; use helix_core::fuzzy::fuzzy_match; - use helix_core::syntax::LanguageServerFeature; + use helix_core::syntax::config::LanguageServerFeature; use helix_view::document::SCRATCH_BUFFER_NAME; use helix_view::theme; use helix_view::{editor::Config, Editor}; diff --git a/helix-term/tests/integration.rs b/helix-term/tests/integration.rs index 5e418cebdabd..469242e403d6 100644 --- a/helix-term/tests/integration.rs +++ b/helix-term/tests/integration.rs @@ -2,7 +2,7 @@ mod test { mod helpers; - use helix_core::{syntax::AutoPairConfig, Selection}; + use helix_core::{syntax::config::AutoPairConfig, Selection}; use helix_term::config::Config; use indoc::indoc; diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index 41c9ee1ef6e4..42b64a51cf00 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -9,7 +9,7 @@ use helix_core::diagnostic::DiagnosticProvider; use helix_core::doc_formatter::TextFormat; use helix_core::encoding::Encoding; use helix_core::snippets::{ActiveSnippet, SnippetRenderCtx}; -use helix_core::syntax::{Highlight, LanguageServerFeature}; +use helix_core::syntax::{config::LanguageServerFeature, Highlight}; use helix_core::text_annotations::{InlineAnnotation, Overlay}; use helix_event::TaskController; use helix_lsp::util::lsp_pos_to_pos; @@ -38,7 +38,7 @@ use helix_core::{ history::{History, State, UndoKind}, indent::{auto_detect_indent_style, IndentStyle}, line_ending::auto_detect_line_ending, - syntax::{self, LanguageConfiguration}, + syntax::{self, config::LanguageConfiguration}, ChangeSet, Diagnostic, LineEnding, Range, Rope, RopeBuilder, Selection, Syntax, Transaction, }; @@ -1114,7 +1114,7 @@ impl Document { pub fn detect_language_config( &self, config_loader: &syntax::Loader, - ) -> Option> { + ) -> Option> { config_loader .language_config_for_file_name(self.path.as_ref()?) .or_else(|| config_loader.language_config_for_shebang(self.text().slice(..))) @@ -1256,8 +1256,8 @@ impl Document { /// if it exists. pub fn set_language( &mut self, - language_config: Option>, - loader: Option>>, + language_config: Option>, + loader: Option>>, ) { if let (Some(language_config), Some(loader)) = (language_config, loader) { if let Some(highlight_config) = @@ -1274,7 +1274,7 @@ impl Document { } /// Set the programming language for the file if you know the language but don't have the - /// [`syntax::LanguageConfiguration`] for it. + /// [`syntax::config::LanguageConfiguration`] for it. pub fn set_language_by_language_id( &mut self, language_id: &str, diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index dfade86baf01..f88d86bc6a2b 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -46,7 +46,10 @@ pub use helix_core::diagnostic::Severity; use helix_core::{ auto_pairs::AutoPairs, diagnostic::DiagnosticProvider, - syntax::{self, AutoPairConfig, IndentationHeuristic, LanguageServerFeature, SoftWrap}, + syntax::{ + self, + config::{AutoPairConfig, IndentationHeuristic, LanguageServerFeature, SoftWrap}, + }, Change, LineEnding, Position, Range, Selection, Uri, NATIVE_LINE_ENDING, }; use helix_dap as dap; diff --git a/helix-view/src/gutter.rs b/helix-view/src/gutter.rs index 665a78bcc6f0..bc87d836fc9d 100644 --- a/helix-view/src/gutter.rs +++ b/helix-view/src/gutter.rs @@ -1,6 +1,6 @@ use std::fmt::Write; -use helix_core::syntax::LanguageServerFeature; +use helix_core::syntax::config::LanguageServerFeature; use crate::{ editor::GutterType, diff --git a/xtask/src/helpers.rs b/xtask/src/helpers.rs index f96cdfb38cfa..d2c955bc4729 100644 --- a/xtask/src/helpers.rs +++ b/xtask/src/helpers.rs @@ -1,7 +1,7 @@ use std::path::{Path, PathBuf}; use crate::path; -use helix_core::syntax::Configuration as LangConfig; +use helix_core::syntax::config::Configuration as LangConfig; use helix_term::health::TsFeature; /// Get the list of languages that support a particular tree-sitter From 21668c77cbdeed51927855a9111185ec6d9a704c Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Thu, 20 Feb 2025 17:45:19 -0500 Subject: [PATCH 2/7] Add the `syn_loader` to `Document` This type also exists on `Editor`. This change brings it to the `Document` as well because the replacement for `Syntax` in the child commits will eliminate `Syntax`'s copy of `syn_loader`. `Syntax` will also be responsible for returning the highlighter and query iterators (which will borrow the loader), so the loader must be separated from that type. In the long run, when we make a larger refactor to have `Document::apply` be a function of the `Editor` instead of the `Document`, we will be able to drop this field on `Document` - it is currently only necessary for `Document::apply`. Once we make that refactor, we will be able to eliminate the surrounding `Arc` in `Arc>` and use the `ArcSwap` directly instead. --- helix-core/src/syntax.rs | 11 ++++- helix-term/src/application.rs | 3 +- helix-term/src/commands/typed.rs | 5 ++- helix-term/src/ui/picker.rs | 9 +++- helix-view/src/document.rs | 76 ++++++++++++++++++-------------- helix-view/src/editor.rs | 20 ++++++--- helix-view/src/gutter.rs | 7 ++- helix-view/src/view.rs | 7 ++- 8 files changed, 94 insertions(+), 44 deletions(-) diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index 6a2c28d1e126..dfc323429cee 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -274,6 +274,15 @@ struct FileTypeGlobMatcher { file_types: Vec, } +impl Default for FileTypeGlobMatcher { + fn default() -> Self { + Self { + matcher: globset::GlobSet::empty(), + file_types: Default::default(), + } + } +} + impl FileTypeGlobMatcher { fn new(file_types: Vec) -> Result { let mut builder = globset::GlobSetBuilder::new(); @@ -299,7 +308,7 @@ impl FileTypeGlobMatcher { // Expose loader as Lazy<> global since it's always static? -#[derive(Debug)] +#[derive(Debug, Default)] pub struct Loader { // highlight_names ? language_configs: Vec>, diff --git a/helix-term/src/application.rs b/helix-term/src/application.rs index 3bc32439565d..df968daf44c6 100644 --- a/helix-term/src/application.rs +++ b/helix-term/src/application.rs @@ -389,8 +389,9 @@ impl Application { let lang_loader = helix_core::config::user_lang_loader()?; self.editor.syn_loader.store(Arc::new(lang_loader)); + let loader = self.editor.syn_loader.load(); for document in self.editor.documents.values_mut() { - document.detect_language(self.editor.syn_loader.clone()); + document.detect_language(&loader); let diagnostics = Editor::doc_diagnostics( &self.editor.language_servers, &self.editor.diagnostics, diff --git a/helix-term/src/commands/typed.rs b/helix-term/src/commands/typed.rs index 4e912127c3d6..248adbed48eb 100644 --- a/helix-term/src/commands/typed.rs +++ b/helix-term/src/commands/typed.rs @@ -2080,10 +2080,11 @@ fn language(cx: &mut compositor::Context, args: Args, event: PromptEvent) -> any let doc = doc_mut!(cx.editor); + let loader = cx.editor.syn_loader.load(); if &args[0] == DEFAULT_LANGUAGE_NAME { - doc.set_language(None, None) + doc.set_language(None, &loader) } else { - doc.set_language_by_language_id(&args[0], cx.editor.syn_loader.clone())?; + doc.set_language_by_language_id(&args[0], &loader)?; } doc.detect_indent_and_line_ending(); diff --git a/helix-term/src/ui/picker.rs b/helix-term/src/ui/picker.rs index a6ce91a67712..5a4b3afb535b 100644 --- a/helix-term/src/ui/picker.rs +++ b/helix-term/src/ui/picker.rs @@ -624,7 +624,14 @@ impl Picker { if content_type.is_binary() { return Ok(CachedPreview::Binary); } - Document::open(&path, None, None, editor.config.clone()).map_or( + Document::open( + &path, + None, + false, + editor.config.clone(), + editor.syn_loader.clone(), + ) + .map_or( Err(std::io::Error::new( std::io::ErrorKind::NotFound, "Cannot open document", diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index 42b64a51cf00..061ddf72d399 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -207,6 +207,11 @@ pub struct Document { // NOTE: ideally this would live on the handler for color swatches. This is blocked on a // large refactor that would make `&mut Editor` available on the `DocumentDidChange` event. pub color_swatch_controller: TaskController, + + // NOTE: this field should eventually go away - we should use the Editor's syn_loader instead + // of storing a copy on every doc. Then we can remove the surrounding `Arc` and use the + // `ArcSwap` directly. + syn_loader: Arc>, } #[derive(Debug, Clone, Default)] @@ -677,6 +682,7 @@ impl Document { text: Rope, encoding_with_bom_info: Option<(&'static Encoding, bool)>, config: Arc>, + syn_loader: Arc>, ) -> Self { let (encoding, has_bom) = encoding_with_bom_info.unwrap_or((encoding::UTF_8, false)); let line_ending = config.load().default_line_ending.into(); @@ -719,13 +725,17 @@ impl Document { jump_labels: HashMap::new(), color_swatches: None, color_swatch_controller: TaskController::new(), + syn_loader, } } - pub fn default(config: Arc>) -> Self { + pub fn default( + config: Arc>, + syn_loader: Arc>, + ) -> Self { let line_ending: LineEnding = config.load().default_line_ending.into(); let text = Rope::from(line_ending.as_str()); - Self::from(text, None, config) + Self::from(text, None, config, syn_loader) } // TODO: async fn? @@ -734,8 +744,9 @@ impl Document { pub fn open( path: &Path, mut encoding: Option<&'static Encoding>, - config_loader: Option>>, + detect_language: bool, config: Arc>, + syn_loader: Arc>, ) -> Result { // If the path is not a regular file (e.g.: /dev/random) it should not be opened. if path.metadata().is_ok_and(|metadata| !metadata.is_file()) { @@ -761,12 +772,13 @@ impl Document { (Rope::from(line_ending.as_str()), encoding, false) }; - let mut doc = Self::from(rope, Some((encoding, has_bom)), config); + let loader = syn_loader.load(); + let mut doc = Self::from(rope, Some((encoding, has_bom)), config, syn_loader); // set the path and try detecting the language doc.set_path(Some(path)); - if let Some(loader) = config_loader { - doc.detect_language(loader); + if detect_language { + doc.detect_language(&loader); } doc.editor_config = editor_config; @@ -1102,12 +1114,8 @@ impl Document { } /// Detect the programming language based on the file type. - pub fn detect_language(&mut self, config_loader: Arc>) { - let loader = config_loader.load(); - self.set_language( - self.detect_language_config(&loader), - Some(Arc::clone(&config_loader)), - ); + pub fn detect_language(&mut self, loader: &syntax::Loader) { + self.set_language(self.detect_language_config(loader), loader); } /// Detect the programming language based on the file type. @@ -1257,20 +1265,20 @@ impl Document { pub fn set_language( &mut self, language_config: Option>, - loader: Option>>, + loader: &syntax::Loader, ) { - if let (Some(language_config), Some(loader)) = (language_config, loader) { - if let Some(highlight_config) = - language_config.highlight_config(&(*loader).load().scopes()) - { - self.syntax = Syntax::new(self.text.slice(..), highlight_config, loader); - } - - self.language = Some(language_config); - } else { - self.syntax = None; - self.language = None; - }; + self.language = language_config; + self.syntax = self + .language + .as_ref() + .and_then(|config| config.highlight_config(&loader.scopes())) + .and_then(|highlight_config| { + Syntax::new( + self.text.slice(..), + highlight_config, + self.syn_loader.clone(), + ) + }); } /// Set the programming language for the file if you know the language but don't have the @@ -1278,13 +1286,12 @@ impl Document { pub fn set_language_by_language_id( &mut self, language_id: &str, - config_loader: Arc>, + loader: &syntax::Loader, ) -> anyhow::Result<()> { - let language_config = (*config_loader) - .load() + let language_config = loader .language_config_for_language_id(language_id) .ok_or_else(|| anyhow!("invalid language id: {}", language_id))?; - self.set_language(Some(language_config), Some(config_loader)); + self.set_language(Some(language_config), loader); Ok(()) } @@ -2299,6 +2306,7 @@ mod test { text, None, Arc::new(ArcSwap::new(Arc::new(Config::default()))), + Arc::new(ArcSwap::from_pointee(syntax::Loader::default())), ); let view = ViewId::default(); doc.set_selection(view, Selection::single(0, 0)); @@ -2337,6 +2345,7 @@ mod test { text, None, Arc::new(ArcSwap::new(Arc::new(Config::default()))), + Arc::new(ArcSwap::from_pointee(syntax::Loader::default())), ); let view = ViewId::default(); doc.set_selection(view, Selection::single(5, 5)); @@ -2450,9 +2459,12 @@ mod test { #[test] fn test_line_ending() { assert_eq!( - Document::default(Arc::new(ArcSwap::new(Arc::new(Config::default())))) - .text() - .to_string(), + Document::default( + Arc::new(ArcSwap::new(Arc::new(Config::default()))), + Arc::new(ArcSwap::from_pointee(syntax::Loader::default())) + ) + .text() + .to_string(), helix_core::NATIVE_LINE_ENDING.as_str() ); } diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index f88d86bc6a2b..ad5adf862bd5 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -1474,9 +1474,9 @@ impl Editor { } pub fn refresh_doc_language(&mut self, doc_id: DocumentId) { - let loader = self.syn_loader.clone(); + let loader = self.syn_loader.load(); let doc = doc_mut!(self, &doc_id); - doc.detect_language(loader); + doc.detect_language(&loader); doc.detect_editor_config(); doc.detect_indent_and_line_ending(); self.refresh_language_servers(doc_id); @@ -1736,7 +1736,10 @@ impl Editor { } pub fn new_file(&mut self, action: Action) -> DocumentId { - self.new_file_from_document(action, Document::default(self.config.clone())) + self.new_file_from_document( + action, + Document::default(self.config.clone(), self.syn_loader.clone()), + ) } pub fn new_file_from_stdin(&mut self, action: Action) -> Result { @@ -1745,6 +1748,7 @@ impl Editor { helix_core::Rope::default(), Some((encoding, has_bom)), self.config.clone(), + self.syn_loader.clone(), ); let doc_id = self.new_file_from_document(action, doc); let doc = doc_mut!(self, &doc_id); @@ -1773,8 +1777,9 @@ impl Editor { let mut doc = Document::open( &path, None, - Some(self.syn_loader.clone()), + true, self.config.clone(), + self.syn_loader.clone(), )?; let diagnostics = @@ -1869,7 +1874,12 @@ impl Editor { .iter() .map(|(&doc_id, _)| doc_id) .next() - .unwrap_or_else(|| self.new_document(Document::default(self.config.clone()))); + .unwrap_or_else(|| { + self.new_document(Document::default( + self.config.clone(), + self.syn_loader.clone(), + )) + }); let view = View::new(doc_id, self.config().gutters.clone()); let view_id = self.tree.insert(view); let doc = doc_mut!(self, &doc_id); diff --git a/helix-view/src/gutter.rs b/helix-view/src/gutter.rs index bc87d836fc9d..c2cbc0da500e 100644 --- a/helix-view/src/gutter.rs +++ b/helix-view/src/gutter.rs @@ -334,7 +334,7 @@ mod tests { use crate::graphics::Rect; use crate::DocumentId; use arc_swap::ArcSwap; - use helix_core::Rope; + use helix_core::{syntax, Rope}; #[test] fn test_default_gutter_widths() { @@ -346,6 +346,7 @@ mod tests { rope, None, Arc::new(ArcSwap::new(Arc::new(Config::default()))), + Arc::new(ArcSwap::from_pointee(syntax::Loader::default())), ); assert_eq!(view.gutters.layout.len(), 5); @@ -371,6 +372,7 @@ mod tests { rope, None, Arc::new(ArcSwap::new(Arc::new(Config::default()))), + Arc::new(ArcSwap::from_pointee(syntax::Loader::default())), ); assert_eq!(view.gutters.layout.len(), 1); @@ -389,6 +391,7 @@ mod tests { rope, None, Arc::new(ArcSwap::new(Arc::new(Config::default()))), + Arc::new(ArcSwap::from_pointee(syntax::Loader::default())), ); assert_eq!(view.gutters.layout.len(), 2); @@ -411,6 +414,7 @@ mod tests { rope, None, Arc::new(ArcSwap::new(Arc::new(Config::default()))), + Arc::new(ArcSwap::from_pointee(syntax::Loader::default())), ); let rope = Rope::from_str("a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np"); @@ -418,6 +422,7 @@ mod tests { rope, None, Arc::new(ArcSwap::new(Arc::new(Config::default()))), + Arc::new(ArcSwap::from_pointee(syntax::Loader::default())), ); assert_eq!(view.gutters.layout.len(), 2); diff --git a/helix-view/src/view.rs b/helix-view/src/view.rs index d6f10753a135..6d237e203619 100644 --- a/helix-view/src/view.rs +++ b/helix-view/src/view.rs @@ -699,7 +699,7 @@ mod tests { use super::*; use arc_swap::ArcSwap; - use helix_core::Rope; + use helix_core::{syntax, Rope}; // 1 diagnostic + 1 spacer + 3 linenr (< 1000 lines) + 1 spacer + 1 diff const DEFAULT_GUTTER_OFFSET: u16 = 7; @@ -719,6 +719,7 @@ mod tests { rope, None, Arc::new(ArcSwap::new(Arc::new(Config::default()))), + Arc::new(ArcSwap::from_pointee(syntax::Loader::default())), ); doc.ensure_view_init(view.id); @@ -894,6 +895,7 @@ mod tests { rope, None, Arc::new(ArcSwap::new(Arc::new(Config::default()))), + Arc::new(ArcSwap::from_pointee(syntax::Loader::default())), ); doc.ensure_view_init(view.id); assert_eq!( @@ -924,6 +926,7 @@ mod tests { rope, None, Arc::new(ArcSwap::new(Arc::new(Config::default()))), + Arc::new(ArcSwap::from_pointee(syntax::Loader::default())), ); doc.ensure_view_init(view.id); assert_eq!( @@ -948,6 +951,7 @@ mod tests { rope, None, Arc::new(ArcSwap::new(Arc::new(Config::default()))), + Arc::new(ArcSwap::from_pointee(syntax::Loader::default())), ); doc.ensure_view_init(view.id); @@ -1032,6 +1036,7 @@ mod tests { rope, None, Arc::new(ArcSwap::new(Arc::new(Config::default()))), + Arc::new(ArcSwap::from_pointee(syntax::Loader::default())), ); doc.ensure_view_init(view.id); From 8ead488fd5884ed305986f710b209a951078ef65 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Thu, 20 Feb 2025 20:38:14 -0500 Subject: [PATCH 3/7] Replace tree-sitter with tree-house --- Cargo.lock | 37 +- Cargo.toml | 2 +- helix-core/Cargo.toml | 5 +- helix-core/src/indent.rs | 444 ++-- helix-core/src/lib.rs | 4 +- helix-core/src/match_brackets.rs | 14 +- helix-core/src/movement.rs | 36 +- helix-core/src/object.rs | 39 +- helix-core/src/position.rs | 5 - helix-core/src/selection.rs | 6 +- helix-core/src/snippets/active.rs | 4 +- helix-core/src/syntax.rs | 2672 ++++++----------------- helix-core/src/syntax/config.rs | 21 +- helix-core/src/syntax/tree_cursor.rs | 264 --- helix-core/src/text_annotations.rs | 11 +- helix-core/src/textobject.rs | 15 +- helix-core/tests/indent.rs | 16 +- helix-loader/Cargo.toml | 4 +- helix-loader/src/grammar.rs | 23 +- helix-term/src/commands.rs | 35 +- helix-term/src/commands/typed.rs | 43 +- helix-term/src/ui/document.rs | 207 +- helix-term/src/ui/editor.rs | 310 ++- helix-term/src/ui/lsp/signature_help.rs | 19 +- helix-term/src/ui/markdown.rs | 133 +- helix-term/src/ui/picker.rs | 21 +- helix-term/src/ui/picker/handlers.rs | 22 +- helix-term/src/ui/prompt.rs | 2 +- helix-view/src/document.rs | 57 +- helix-view/src/editor.rs | 6 +- helix-view/src/theme.rs | 73 +- helix-view/src/view.rs | 18 +- xtask/src/helpers.rs | 6 +- xtask/src/main.rs | 42 +- 34 files changed, 1493 insertions(+), 3123 deletions(-) delete mode 100644 helix-core/src/syntax/tree_cursor.rs diff --git a/Cargo.lock b/Cargo.lock index cabb98a70b0e..3f1f2fe45329 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -30,7 +30,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", - "getrandom 0.2.15", "once_cell", "version_check", "zerocopy", @@ -1319,14 +1318,13 @@ dependencies = [ name = "helix-core" version = "25.1.1" dependencies = [ - "ahash", "anyhow", "arc-swap", "bitflags", "chrono", "encoding_rs", + "foldhash", "globset", - "hashbrown 0.14.5", "helix-loader", "helix-parsec", "helix-stdx", @@ -1347,7 +1345,7 @@ dependencies = [ "smartstring", "textwrap", "toml", - "tree-sitter", + "tree-house", "unicode-general-category", "unicode-segmentation", "unicode-width 0.1.12", @@ -1391,14 +1389,13 @@ dependencies = [ "cc", "etcetera", "helix-stdx", - "libloading", "log", "once_cell", "serde", "tempfile", "threadpool", "toml", - "tree-sitter", + "tree-house", ] [[package]] @@ -2665,13 +2662,31 @@ dependencies = [ ] [[package]] -name = "tree-sitter" -version = "0.22.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df7cc499ceadd4dcdf7ec6d4cbc34ece92c3fa07821e287aedecd4416c516dca" +name = "tree-house" +version = "0.1.0-beta.2" +source = "git+https://github.com/helix-editor/tree-house#1fa65eca36fdbb2837e0655bfda53ed627fc25c0" dependencies = [ - "cc", + "arc-swap", + "hashbrown 0.15.2", + "kstring", + "once_cell", "regex", + "regex-cursor", + "ropey", + "slab", + "tree-house-bindings", +] + +[[package]] +name = "tree-house-bindings" +version = "0.1.0-beta.2" +source = "git+https://github.com/helix-editor/tree-house#1fa65eca36fdbb2837e0655bfda53ed627fc25c0" +dependencies = [ + "cc", + "libloading", + "regex-cursor", + "ropey", + "thiserror 2.0.12", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 667a83967726..81d445aa27ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,7 +37,7 @@ package.helix-tui.opt-level = 2 package.helix-term.opt-level = 2 [workspace.dependencies] -tree-sitter = { version = "0.22" } +tree-house = { git = "https://github.com/helix-editor/tree-house", default-features = false } nucleo = "0.5.0" slotmap = "1.0.7" thiserror = "2.0" diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml index 10fb5a52cdee..2d84dcedebec 100644 --- a/helix-core/Cargo.toml +++ b/helix-core/Cargo.toml @@ -32,13 +32,12 @@ unicode-segmentation.workspace = true unicode-width = "=0.1.12" unicode-general-category = "1.0" slotmap.workspace = true -tree-sitter.workspace = true +tree-house.workspace = true once_cell = "1.21" arc-swap = "1" regex = "1" bitflags.workspace = true -ahash = "0.8.11" -hashbrown = { version = "0.14.5", features = ["raw"] } +foldhash.workspace = true url = "2.5.4" log = "0.4" diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs index 52369bb7bd87..a1e2c86405d8 100644 --- a/helix-core/src/indent.rs +++ b/helix-core/src/indent.rs @@ -1,17 +1,17 @@ use std::{borrow::Cow, collections::HashMap, iter}; use helix_stdx::rope::RopeSliceExt; -use tree_sitter::{Query, QueryCursor, QueryPredicateArg}; use crate::{ chars::{char_is_line_ending, char_is_whitespace}, graphemes::{grapheme_width, tab_width_at}, - syntax::{ - config::{IndentationHeuristic, LanguageConfiguration}, - RopeProvider, Syntax, + syntax::{self, config::IndentationHeuristic}, + tree_sitter::{ + self, + query::{InvalidPredicateError, UserPredicate}, + Capture, Grammar, InactiveQueryCursor, Node, Pattern, Query, QueryMatch, RopeInput, }, - tree_sitter::Node, - Position, Rope, RopeSlice, Tendril, + Position, Rope, RopeSlice, Syntax, Tendril, }; /// Enum representing indentation style. @@ -282,18 +282,164 @@ fn add_indent_level( /// Return true if only whitespace comes before the node on its line. /// If given, new_line_byte_pos is treated the same way as any existing newline. -fn is_first_in_line(node: Node, text: RopeSlice, new_line_byte_pos: Option) -> bool { - let mut line_start_byte_pos = text.line_to_byte(node.start_position().row); +fn is_first_in_line(node: &Node, text: RopeSlice, new_line_byte_pos: Option) -> bool { + let line = text.byte_to_line(node.start_byte() as usize); + let mut line_start_byte_pos = text.line_to_byte(line) as u32; if let Some(pos) = new_line_byte_pos { if line_start_byte_pos < pos && pos <= node.start_byte() { line_start_byte_pos = pos; } } - text.byte_slice(line_start_byte_pos..node.start_byte()) + text.byte_slice(line_start_byte_pos as usize..node.start_byte() as usize) .chars() .all(|c| c.is_whitespace()) } +#[derive(Debug, Default)] +pub struct IndentQueryPredicates { + not_kind_eq: Option<(Capture, Box)>, + same_line: Option<(Capture, Capture, bool)>, + one_line: Option<(Capture, bool)>, +} + +impl IndentQueryPredicates { + fn are_satisfied( + &self, + match_: &QueryMatch, + text: RopeSlice, + new_line_byte_pos: Option, + ) -> bool { + if let Some((capture, not_expected_kind)) = self.not_kind_eq.as_ref() { + if !match_ + .nodes_for_capture(*capture) + .next() + .is_some_and(|node| node.kind() != not_expected_kind.as_ref()) + { + return false; + } + } + + if let Some((capture1, capture2, negated)) = self.same_line { + let n1 = match_.nodes_for_capture(capture1).next(); + let n2 = match_.nodes_for_capture(capture2).next(); + let satisfied = n1.zip(n2).is_some_and(|(n1, n2)| { + let n1_line = get_node_start_line(text, n1, new_line_byte_pos); + let n2_line = get_node_start_line(text, n2, new_line_byte_pos); + let same_line = n1_line == n2_line; + same_line != negated + }); + + if !satisfied { + return false; + } + } + + if let Some((capture, negated)) = self.one_line { + let node = match_.nodes_for_capture(capture).next(); + let satisfied = node.is_some_and(|node| { + let start_line = get_node_start_line(text, node, new_line_byte_pos); + let end_line = get_node_end_line(text, node, new_line_byte_pos); + let one_line = end_line == start_line; + one_line != negated + }); + + if !satisfied { + return false; + } + } + + true + } +} + +#[derive(Debug)] +pub struct IndentQuery { + query: Query, + properties: HashMap, + predicates: HashMap, + indent_capture: Option, + indent_always_capture: Option, + outdent_capture: Option, + outdent_always_capture: Option, + align_capture: Option, + anchor_capture: Option, + extend_capture: Option, + extend_prevent_once_capture: Option, +} + +impl IndentQuery { + pub fn new(grammar: Grammar, source: &str) -> Result { + let mut properties = HashMap::new(); + let mut predicates: HashMap = HashMap::new(); + let query = Query::new(grammar, source, |pattern, predicate| match predicate { + UserPredicate::SetProperty { key: "scope", val } => { + let scope = match val { + Some("all") => IndentScope::All, + Some("tail") => IndentScope::Tail, + Some(other) => { + return Err(format!("unknown scope (#set! scope \"{other}\")").into()) + } + None => return Err("missing scope value (#set! scope ...)".into()), + }; + + properties.insert(pattern, scope); + + Ok(()) + } + UserPredicate::Other(predicate) => { + let name = predicate.name(); + match name { + "not-kind-eq?" => { + predicate.check_arg_count(2)?; + let capture = predicate.capture_arg(0)?; + let not_expected_kind = predicate.str_arg(1)?; + + predicates.entry(pattern).or_default().not_kind_eq = + Some((capture, not_expected_kind.to_string().into_boxed_str())); + Ok(()) + } + "same-line?" | "not-same-line?" => { + predicate.check_arg_count(2)?; + let capture1 = predicate.capture_arg(0)?; + let capture2 = predicate.capture_arg(1)?; + let negated = name == "not-same-line?"; + + predicates.entry(pattern).or_default().same_line = + Some((capture1, capture2, negated)); + Ok(()) + } + "one-line?" | "not-one-line?" => { + predicate.check_arg_count(1)?; + let capture = predicate.capture_arg(0)?; + let negated = name == "not-one-line?"; + + predicates.entry(pattern).or_default().one_line = Some((capture, negated)); + Ok(()) + } + _ => Err(InvalidPredicateError::unknown(UserPredicate::Other( + predicate, + ))), + } + } + _ => Err(InvalidPredicateError::unknown(predicate)), + })?; + + Ok(Self { + properties, + predicates, + indent_capture: query.get_capture("indent"), + indent_always_capture: query.get_capture("indent.always"), + outdent_capture: query.get_capture("outdent"), + outdent_always_capture: query.get_capture("outdent.always"), + align_capture: query.get_capture("align"), + anchor_capture: query.get_capture("anchor"), + extend_capture: query.get_capture("extend"), + extend_prevent_once_capture: query.get_capture("extend.prevent-once"), + query, + }) + } +} + /// The total indent for some line of code. /// This is usually constructed in one of 2 ways: /// - Successively add indent captures to get the (added) indent from a single line @@ -456,16 +602,16 @@ struct IndentQueryResult<'a> { extend_captures: HashMap>, } -fn get_node_start_line(node: Node, new_line_byte_pos: Option) -> usize { - let mut node_line = node.start_position().row; +fn get_node_start_line(text: RopeSlice, node: &Node, new_line_byte_pos: Option) -> usize { + let mut node_line = text.byte_to_line(node.start_byte() as usize); // Adjust for the new line that will be inserted if new_line_byte_pos.is_some_and(|pos| node.start_byte() >= pos) { node_line += 1; } node_line } -fn get_node_end_line(node: Node, new_line_byte_pos: Option) -> usize { - let mut node_line = node.end_position().row; +fn get_node_end_line(text: RopeSlice, node: &Node, new_line_byte_pos: Option) -> usize { + let mut node_line = text.byte_to_line(node.end_byte() as usize); // Adjust for the new line that will be inserted (with a strict inequality since end_byte is exclusive) if new_line_byte_pos.is_some_and(|pos| node.end_byte() > pos) { node_line += 1; @@ -474,175 +620,98 @@ fn get_node_end_line(node: Node, new_line_byte_pos: Option) -> usize { } fn query_indents<'a>( - query: &Query, + query: &IndentQuery, syntax: &Syntax, - cursor: &mut QueryCursor, text: RopeSlice<'a>, - range: std::ops::Range, - new_line_byte_pos: Option, + range: std::ops::Range, + new_line_byte_pos: Option, ) -> IndentQueryResult<'a> { let mut indent_captures: HashMap> = HashMap::new(); let mut extend_captures: HashMap> = HashMap::new(); + + let mut cursor = InactiveQueryCursor::new(); cursor.set_byte_range(range); + let mut cursor = cursor.execute_query( + &query.query, + &syntax.tree().root_node(), + RopeInput::new(text), + ); // Iterate over all captures from the query - for m in cursor.matches(query, syntax.tree().root_node(), RopeProvider(text)) { + while let Some(m) = cursor.next_match() { // Skip matches where not all custom predicates are fulfilled - if !query.general_predicates(m.pattern_index).iter().all(|pred| { - match pred.operator.as_ref() { - "not-kind-eq?" => match (pred.args.first(), pred.args.get(1)) { - ( - Some(QueryPredicateArg::Capture(capture_idx)), - Some(QueryPredicateArg::String(kind)), - ) => { - let node = m.nodes_for_capture_index(*capture_idx).next(); - match node { - Some(node) => node.kind()!=kind.as_ref(), - _ => true, - } - } - _ => { - panic!("Invalid indent query: Arguments to \"not-kind-eq?\" must be a capture and a string"); - } - }, - "same-line?" | "not-same-line?" => { - match (pred.args.first(), pred.args.get(1)) { - ( - Some(QueryPredicateArg::Capture(capt1)), - Some(QueryPredicateArg::Capture(capt2)) - ) => { - let n1 = m.nodes_for_capture_index(*capt1).next(); - let n2 = m.nodes_for_capture_index(*capt2).next(); - match (n1, n2) { - (Some(n1), Some(n2)) => { - let n1_line = get_node_start_line(n1, new_line_byte_pos); - let n2_line = get_node_start_line(n2, new_line_byte_pos); - let same_line = n1_line == n2_line; - same_line==(pred.operator.as_ref()=="same-line?") - } - _ => true, - } - } - _ => { - panic!("Invalid indent query: Arguments to \"{}\" must be 2 captures", pred.operator); - } - } - } - "one-line?" | "not-one-line?" => match pred.args.first() { - Some(QueryPredicateArg::Capture(capture_idx)) => { - let node = m.nodes_for_capture_index(*capture_idx).next(); - - match node { - Some(node) => { - let (start_line, end_line) = (get_node_start_line(node,new_line_byte_pos), get_node_end_line(node, new_line_byte_pos)); - let one_line = end_line == start_line; - one_line != (pred.operator.as_ref() == "not-one-line?") - }, - _ => true, - } - } - _ => { - panic!("Invalid indent query: Arguments to \"not-kind-eq?\" must be a capture and a string"); - } - }, - _ => { - panic!( - "Invalid indent query: Unknown predicate (\"{}\")", - pred.operator - ); - } - } - }) { + if query + .predicates + .get(&m.pattern()) + .is_some_and(|preds| !preds.are_satisfied(&m, text, new_line_byte_pos)) + { continue; } // A list of pairs (node_id, indent_capture) that are added by this match. // They cannot be added to indent_captures immediately since they may depend on other captures (such as an @anchor). let mut added_indent_captures: Vec<(usize, IndentCapture)> = Vec::new(); // The row/column position of the optional anchor in this query - let mut anchor: Option = None; - for capture in m.captures { - let capture_name = query.capture_names()[capture.index as usize]; - let capture_type = match capture_name { - "indent" => IndentCaptureType::Indent, - "indent.always" => IndentCaptureType::IndentAlways, - "outdent" => IndentCaptureType::Outdent, - "outdent.always" => IndentCaptureType::OutdentAlways, - // The alignment will be updated to the correct value at the end, when the anchor is known. - "align" => IndentCaptureType::Align(RopeSlice::from("")), - "anchor" => { - if anchor.is_some() { - log::error!("Invalid indent query: Encountered more than one @anchor in the same match.") - } else { - anchor = Some(capture.node); - } - continue; - } - "extend" => { - extend_captures - .entry(capture.node.id()) - .or_insert_with(|| Vec::with_capacity(1)) - .push(ExtendCapture::Extend); - continue; - } - "extend.prevent-once" => { - extend_captures - .entry(capture.node.id()) - .or_insert_with(|| Vec::with_capacity(1)) - .push(ExtendCapture::PreventOnce); - continue; - } - _ => { - // Ignore any unknown captures (these may be needed for predicates such as #match?) - continue; + let mut anchor: Option<&Node> = None; + for matched_node in m.matched_nodes() { + let node_id = matched_node.node.id(); + let capture = Some(matched_node.capture); + let capture_type = if capture == query.indent_capture { + IndentCaptureType::Indent + } else if capture == query.indent_always_capture { + IndentCaptureType::IndentAlways + } else if capture == query.outdent_capture { + IndentCaptureType::Outdent + } else if capture == query.outdent_always_capture { + IndentCaptureType::OutdentAlways + } else if capture == query.align_capture { + IndentCaptureType::Align(RopeSlice::from("")) + } else if capture == query.anchor_capture { + if anchor.is_some() { + log::error!("Invalid indent query: Encountered more than one @anchor in the same match.") + } else { + anchor = Some(&matched_node.node); } + continue; + } else if capture == query.extend_capture { + extend_captures + .entry(node_id) + .or_insert_with(|| Vec::with_capacity(1)) + .push(ExtendCapture::Extend); + continue; + } else if capture == query.extend_prevent_once_capture { + extend_captures + .entry(node_id) + .or_insert_with(|| Vec::with_capacity(1)) + .push(ExtendCapture::PreventOnce); + continue; + } else { + // Ignore any unknown captures (these may be needed for predicates such as #match?) + continue; }; - let scope = capture_type.default_scope(); - let mut indent_capture = IndentCapture { + + // Apply additional settings for this capture + let scope = query + .properties + .get(&m.pattern()) + .copied() + .unwrap_or_else(|| capture_type.default_scope()); + let indent_capture = IndentCapture { capture_type, scope, }; - // Apply additional settings for this capture - for property in query.property_settings(m.pattern_index) { - match property.key.as_ref() { - "scope" => { - indent_capture.scope = match property.value.as_deref() { - Some("all") => IndentScope::All, - Some("tail") => IndentScope::Tail, - Some(s) => { - panic!("Invalid indent query: Unknown value for \"scope\" property (\"{}\")", s); - } - None => { - panic!( - "Invalid indent query: Missing value for \"scope\" property" - ); - } - } - } - _ => { - panic!( - "Invalid indent query: Unknown property \"{}\"", - property.key - ); - } - } - } - added_indent_captures.push((capture.node.id(), indent_capture)) + added_indent_captures.push((node_id, indent_capture)) } for (node_id, mut capture) in added_indent_captures { // Set the anchor for all align queries. if let IndentCaptureType::Align(_) = capture.capture_type { - let anchor = match anchor { - None => { - log::error!( - "Invalid indent query: @align requires an accompanying @anchor." - ); - continue; - } - Some(anchor) => anchor, + let Some(anchor) = anchor else { + log::error!("Invalid indent query: @align requires an accompanying @anchor."); + continue; }; + let line = text.byte_to_line(anchor.start_byte() as usize); + let line_start = text.line_to_byte(line); capture.capture_type = IndentCaptureType::Align( - text.line(anchor.start_position().row) - .byte_slice(0..anchor.start_position().column), + text.byte_slice(line_start..anchor.start_byte() as usize), ); } indent_captures @@ -694,13 +763,15 @@ fn extend_nodes<'a>( // - the cursor is on the same line as the end of the node OR // - the line that the cursor is on is more indented than the // first line of the node - if deepest_preceding.end_position().row == line { + if text.byte_to_line(deepest_preceding.end_byte() as usize) == line { extend_node = true; } else { let cursor_indent = indent_level_for_line(text.line(line), tab_width, indent_width); let node_indent = indent_level_for_line( - text.line(deepest_preceding.start_position().row), + text.line( + text.byte_to_line(deepest_preceding.start_byte() as usize), + ), tab_width, indent_width, ); @@ -717,7 +788,7 @@ fn extend_nodes<'a>( if node_captured && stop_extend { stop_extend = false; } else if extend_node && !stop_extend { - *node = deepest_preceding; + *node = deepest_preceding.clone(); break; } // If the tree contains a syntax error, `deepest_preceding` may not @@ -734,17 +805,17 @@ fn extend_nodes<'a>( /// - The indent captures for all relevant nodes. #[allow(clippy::too_many_arguments)] fn init_indent_query<'a, 'b>( - query: &Query, + query: &IndentQuery, syntax: &'a Syntax, text: RopeSlice<'b>, tab_width: usize, indent_width: usize, line: usize, - byte_pos: usize, - new_line_byte_pos: Option, + byte_pos: u32, + new_line_byte_pos: Option, ) -> Option<(Node<'a>, HashMap>>)> { // The innermost tree-sitter node which is considered for the indent - // computation. It may change if some predeceding node is extended + // computation. It may change if some preceding node is extended let mut node = syntax .tree() .root_node() @@ -754,37 +825,25 @@ fn init_indent_query<'a, 'b>( // The query range should intersect with all nodes directly preceding // the position of the indent query in case one of them is extended. let mut deepest_preceding = None; // The deepest node preceding the indent query position - let mut tree_cursor = node.walk(); - for child in node.children(&mut tree_cursor) { + for child in node.children() { if child.byte_range().end <= byte_pos { - deepest_preceding = Some(child); + deepest_preceding = Some(child.clone()); } } deepest_preceding = deepest_preceding.map(|mut prec| { // Get the deepest directly preceding node while prec.child_count() > 0 { - prec = prec.child(prec.child_count() - 1).unwrap(); + prec = prec.child(prec.child_count() - 1).unwrap().clone(); } prec }); let query_range = deepest_preceding + .as_ref() .map(|prec| prec.byte_range().end - 1..byte_pos + 1) .unwrap_or(byte_pos..byte_pos + 1); - crate::syntax::PARSER.with(|ts_parser| { - let mut ts_parser = ts_parser.borrow_mut(); - let mut cursor = ts_parser.cursors.pop().unwrap_or_default(); - let query_result = query_indents( - query, - syntax, - &mut cursor, - text, - query_range, - new_line_byte_pos, - ); - ts_parser.cursors.push(cursor); - (query_result, deepest_preceding) - }) + let query_result = query_indents(query, syntax, text, query_range, new_line_byte_pos); + (query_result, deepest_preceding) }; let extend_captures = query_result.extend_captures; @@ -842,7 +901,7 @@ fn init_indent_query<'a, 'b>( /// ``` #[allow(clippy::too_many_arguments)] pub fn treesitter_indent_for_pos<'a>( - query: &Query, + query: &IndentQuery, syntax: &Syntax, tab_width: usize, indent_width: usize, @@ -851,7 +910,7 @@ pub fn treesitter_indent_for_pos<'a>( pos: usize, new_line: bool, ) -> Option> { - let byte_pos = text.char_to_byte(pos); + let byte_pos = text.char_to_byte(pos) as u32; let new_line_byte_pos = new_line.then_some(byte_pos); let (mut node, mut indent_captures) = init_indent_query( query, @@ -871,7 +930,7 @@ pub fn treesitter_indent_for_pos<'a>( let mut indent_for_line_below = Indentation::default(); loop { - let is_first = is_first_in_line(node, text, new_line_byte_pos); + let is_first = is_first_in_line(&node, text, new_line_byte_pos); // Apply all indent definitions for this node. // Since we only iterate over each node once, we can remove the @@ -894,8 +953,8 @@ pub fn treesitter_indent_for_pos<'a>( } if let Some(parent) = node.parent() { - let node_line = get_node_start_line(node, new_line_byte_pos); - let parent_line = get_node_start_line(parent, new_line_byte_pos); + let node_line = get_node_start_line(text, &node, new_line_byte_pos); + let parent_line = get_node_start_line(text, &parent, new_line_byte_pos); if node_line != parent_line { // Don't add indent for the line below the line of the query @@ -917,8 +976,9 @@ pub fn treesitter_indent_for_pos<'a>( } else { // Only add the indentation for the line below if that line // is not after the line that the indentation is calculated for. - if (node.start_position().row < line) - || (new_line && node.start_position().row == line && node.start_byte() < byte_pos) + let node_start_line = text.byte_to_line(node.start_byte() as usize); + if node_start_line < line + || (new_line && node_start_line == line && node.start_byte() < byte_pos) { result.add_line(indent_for_line_below); } @@ -933,7 +993,7 @@ pub fn treesitter_indent_for_pos<'a>( /// This is done either using treesitter, or if that's not available by copying the indentation from the current line #[allow(clippy::too_many_arguments)] pub fn indent_for_newline( - language_config: Option<&LanguageConfiguration>, + loader: &syntax::Loader, syntax: Option<&Syntax>, indent_heuristic: &IndentationHeuristic, indent_style: &IndentStyle, @@ -950,7 +1010,7 @@ pub fn indent_for_newline( Some(syntax), ) = ( indent_heuristic, - language_config.and_then(|config| config.indent_query()), + syntax.and_then(|syntax| loader.indent_query(syntax.root_language())), syntax, ) { if let Some(indent) = treesitter_indent_for_pos( @@ -1018,10 +1078,10 @@ pub fn indent_for_newline( indent_style.as_str().repeat(indent_level) } -pub fn get_scopes(syntax: Option<&Syntax>, text: RopeSlice, pos: usize) -> Vec<&'static str> { +pub fn get_scopes<'a>(syntax: Option<&'a Syntax>, text: RopeSlice, pos: usize) -> Vec<&'a str> { let mut scopes = Vec::new(); if let Some(syntax) = syntax { - let pos = text.char_to_byte(pos); + let pos = text.char_to_byte(pos) as u32; let mut node = match syntax .tree() .root_node() diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index 3fcddfcd189a..09865ca40456 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -53,7 +53,7 @@ pub use smartstring::SmartString; pub type Tendril = SmartString; #[doc(inline)] -pub use {regex, tree_sitter}; +pub use {regex, tree_house::tree_sitter}; pub use position::{ char_idx_at_visual_offset, coords_at_pos, pos_at_coords, softwrapped_dimensions, @@ -73,3 +73,5 @@ pub use line_ending::{LineEnding, NATIVE_LINE_ENDING}; pub use transaction::{Assoc, Change, ChangeSet, Deletion, Operation, Transaction}; pub use uri::Uri; + +pub use tree_house::Language; diff --git a/helix-core/src/match_brackets.rs b/helix-core/src/match_brackets.rs index 7520d3e4646a..7f2891f334b7 100644 --- a/helix-core/src/match_brackets.rs +++ b/helix-core/src/match_brackets.rs @@ -1,7 +1,7 @@ use std::iter; +use crate::tree_sitter::Node; use ropey::RopeSlice; -use tree_sitter::Node; use crate::movement::Direction::{self, Backward, Forward}; use crate::Syntax; @@ -75,7 +75,7 @@ fn find_pair( pos_: usize, traverse_parents: bool, ) -> Option { - let pos = doc.char_to_byte(pos_); + let pos = doc.char_to_byte(pos_) as u32; let root = syntax.tree_for_byte_range(pos, pos).root_node(); let mut node = root.descendant_for_byte_range(pos, pos)?; @@ -128,7 +128,7 @@ fn find_pair( if find_pair_end(doc, sibling.prev_sibling(), start_char, end_char, Backward) .is_some() { - return doc.try_byte_to_char(sibling.start_byte()).ok(); + return doc.try_byte_to_char(sibling.start_byte() as usize).ok(); } } } else if node.is_named() { @@ -144,9 +144,9 @@ fn find_pair( if node.child_count() != 0 { return None; } - let node_start = doc.byte_to_char(node.start_byte()); - find_matching_bracket_plaintext(doc.byte_slice(node.byte_range()), pos_ - node_start) - .map(|pos| pos + node_start) + let node_start = doc.byte_to_char(node.start_byte() as usize); + let node_text = doc.byte_slice(node.start_byte() as usize..node.end_byte() as usize); + find_matching_bracket_plaintext(node_text, pos_ - node_start).map(|pos| pos + node_start) } /// Returns the position of the matching bracket under cursor. @@ -304,7 +304,7 @@ fn as_char(doc: RopeSlice, node: &Node) -> Option<(usize, char)> { if node.byte_range().len() != 1 { return None; } - let pos = doc.try_byte_to_char(node.start_byte()).ok()?; + let pos = doc.try_byte_to_char(node.start_byte() as usize).ok()?; Some((pos, doc.char(pos))) } diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs index 2a1fa94f29ff..09a99db2575f 100644 --- a/helix-core/src/movement.rs +++ b/helix-core/src/movement.rs @@ -1,7 +1,6 @@ -use std::{cmp::Reverse, iter}; +use std::{borrow::Cow, cmp::Reverse, iter}; use ropey::iter::Chars; -use tree_sitter::{Node, QueryCursor}; use crate::{ char_idx_at_visual_offset, @@ -13,9 +12,10 @@ use crate::{ }, line_ending::rope_is_line_ending, position::char_idx_at_visual_block_offset, - syntax::config::LanguageConfiguration, + syntax, text_annotations::TextAnnotations, textobject::TextObject, + tree_sitter::Node, visual_offset_from_block, Range, RopeSlice, Selection, Syntax, }; @@ -560,21 +560,23 @@ fn reached_target(target: WordMotionTarget, prev_ch: char, next_ch: char) -> boo /// Finds the range of the next or previous textobject in the syntax sub-tree of `node`. /// Returns the range in the forwards direction. +#[allow(clippy::too_many_arguments)] pub fn goto_treesitter_object( slice: RopeSlice, range: Range, object_name: &str, dir: Direction, - slice_tree: Node, - lang_config: &LanguageConfiguration, + slice_tree: &Node, + syntax: &Syntax, + loader: &syntax::Loader, count: usize, ) -> Range { + let textobject_query = loader.textobject_query(syntax.root_language()); let get_range = move |range: Range| -> Option { let byte_pos = slice.char_to_byte(range.cursor(slice)); let cap_name = |t: TextObject| format!("{}.{}", object_name, t); - let mut cursor = QueryCursor::new(); - let nodes = lang_config.textobject_query()?.capture_nodes_any( + let nodes = textobject_query?.capture_nodes_any( &[ &cap_name(TextObject::Movement), &cap_name(TextObject::Around), @@ -582,7 +584,6 @@ pub fn goto_treesitter_object( ], slice_tree, slice, - &mut cursor, )?; let node = match dir { @@ -617,14 +618,15 @@ pub fn goto_treesitter_object( last_range } -fn find_parent_start(mut node: Node) -> Option { +fn find_parent_start<'tree>(node: &Node<'tree>) -> Option> { let start = node.start_byte(); + let mut node = Cow::Borrowed(node); while node.start_byte() >= start || !node.is_named() { - node = node.parent()?; + node = Cow::Owned(node.parent()?); } - Some(node) + Some(node.into_owned()) } pub fn move_parent_node_end( @@ -635,8 +637,8 @@ pub fn move_parent_node_end( movement: Movement, ) -> Selection { selection.transform(|range| { - let start_from = text.char_to_byte(range.from()); - let start_to = text.char_to_byte(range.to()); + let start_from = text.char_to_byte(range.from()) as u32; + let start_to = text.char_to_byte(range.to()) as u32; let mut node = match syntax.named_descendant_for_byte_range(start_from, start_to) { Some(node) => node, @@ -654,18 +656,18 @@ pub fn move_parent_node_end( // moving forward, we always want to move one past the end of the // current node, so use the end byte of the current node, which is an exclusive // end of the range - Direction::Forward => text.byte_to_char(node.end_byte()), + Direction::Forward => text.byte_to_char(node.end_byte() as usize), // moving backward, we want the cursor to land on the start char of // the current node, or if it is already at the start of a node, to traverse up to // the parent Direction::Backward => { - let end_head = text.byte_to_char(node.start_byte()); + let end_head = text.byte_to_char(node.start_byte() as usize); // if we're already on the beginning, look up to the parent if end_head == range.cursor(text) { - node = find_parent_start(node).unwrap_or(node); - text.byte_to_char(node.start_byte()) + node = find_parent_start(&node).unwrap_or(node); + text.byte_to_char(node.start_byte() as usize) } else { end_head } diff --git a/helix-core/src/object.rs b/helix-core/src/object.rs index 17a393caf277..e0c02d0a905e 100644 --- a/helix-core/src/object.rs +++ b/helix-core/src/object.rs @@ -4,8 +4,8 @@ pub fn expand_selection(syntax: &Syntax, text: RopeSlice, selection: Selection) let cursor = &mut syntax.walk(); selection.transform(|range| { - let from = text.char_to_byte(range.from()); - let to = text.char_to_byte(range.to()); + let from = text.char_to_byte(range.from()) as u32; + let to = text.char_to_byte(range.to()) as u32; let byte_range = from..to; cursor.reset_to_byte_range(from, to); @@ -17,8 +17,8 @@ pub fn expand_selection(syntax: &Syntax, text: RopeSlice, selection: Selection) } let node = cursor.node(); - let from = text.byte_to_char(node.start_byte()); - let to = text.byte_to_char(node.end_byte()); + let from = text.byte_to_char(node.start_byte() as usize); + let to = text.byte_to_char(node.end_byte() as usize); Range::new(to, from).with_direction(range.direction()) }) @@ -53,10 +53,10 @@ pub fn select_next_sibling(syntax: &Syntax, text: RopeSlice, selection: Selectio } pub fn select_all_siblings(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection { - selection.transform_iter(|range| { - let mut cursor = syntax.walk(); + let mut cursor = syntax.walk(); + selection.transform_iter(move |range| { let (from, to) = range.into_byte_range(text); - cursor.reset_to_byte_range(from, to); + cursor.reset_to_byte_range(from as u32, to as u32); if !cursor.goto_parent_with(|parent| parent.child_count() > 1) { return vec![range].into_iter(); @@ -67,21 +67,18 @@ pub fn select_all_siblings(syntax: &Syntax, text: RopeSlice, selection: Selectio } pub fn select_all_children(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection { - selection.transform_iter(|range| { - let mut cursor = syntax.walk(); + let mut cursor = syntax.walk(); + selection.transform_iter(move |range| { let (from, to) = range.into_byte_range(text); - cursor.reset_to_byte_range(from, to); + cursor.reset_to_byte_range(from as u32, to as u32); select_children(&mut cursor, text, range).into_iter() }) } -fn select_children<'n>( - cursor: &'n mut TreeCursor<'n>, - text: RopeSlice, - range: Range, -) -> Vec { +fn select_children(cursor: &mut TreeCursor, text: RopeSlice, range: Range) -> Vec { let children = cursor - .named_children() + .children() + .filter(|child| child.is_named()) .map(|child| Range::from_node(child, text, range.direction())) .collect::>(); @@ -98,7 +95,7 @@ pub fn select_prev_sibling(syntax: &Syntax, text: RopeSlice, selection: Selectio text, selection, |cursor| { - while !cursor.goto_prev_sibling() { + while !cursor.goto_previous_sibling() { if !cursor.goto_parent() { break; } @@ -121,16 +118,16 @@ where let cursor = &mut syntax.walk(); selection.transform(|range| { - let from = text.char_to_byte(range.from()); - let to = text.char_to_byte(range.to()); + let from = text.char_to_byte(range.from()) as u32; + let to = text.char_to_byte(range.to()) as u32; cursor.reset_to_byte_range(from, to); motion(cursor); let node = cursor.node(); - let from = text.byte_to_char(node.start_byte()); - let to = text.byte_to_char(node.end_byte()); + let from = text.byte_to_char(node.start_byte() as usize); + let to = text.byte_to_char(node.end_byte() as usize); Range::new(from, to).with_direction(direction.unwrap_or_else(|| range.direction())) }) diff --git a/helix-core/src/position.rs b/helix-core/src/position.rs index cea0b60714b4..3f888c57a853 100644 --- a/helix-core/src/position.rs +++ b/helix-core/src/position.rs @@ -89,11 +89,6 @@ impl From<(usize, usize)> for Position { } } -impl From for tree_sitter::Point { - fn from(pos: Position) -> Self { - Self::new(pos.row, pos.col) - } -} /// Convert a character index to (line, column) coordinates. /// /// column in `char` count which can be used for row:column display in diff --git a/helix-core/src/selection.rs b/helix-core/src/selection.rs index 1db2d619e614..5bde08e31ba7 100644 --- a/helix-core/src/selection.rs +++ b/helix-core/src/selection.rs @@ -9,13 +9,13 @@ use crate::{ }, line_ending::get_line_ending, movement::Direction, + tree_sitter::Node, Assoc, ChangeSet, RopeSlice, }; use helix_stdx::range::is_subset; use helix_stdx::rope::{self, RopeSliceExt}; use smallvec::{smallvec, SmallVec}; use std::{borrow::Cow, iter, slice}; -use tree_sitter::Node; /// A single selection range. /// @@ -76,8 +76,8 @@ impl Range { } pub fn from_node(node: Node, text: RopeSlice, direction: Direction) -> Self { - let from = text.byte_to_char(node.start_byte()); - let to = text.byte_to_char(node.end_byte()); + let from = text.byte_to_char(node.start_byte() as usize); + let to = text.byte_to_char(node.end_byte() as usize); Range::new(from, to).with_direction(direction) } diff --git a/helix-core/src/snippets/active.rs b/helix-core/src/snippets/active.rs index 98007ab68caf..1c10b76d20a8 100644 --- a/helix-core/src/snippets/active.rs +++ b/helix-core/src/snippets/active.rs @@ -1,6 +1,6 @@ use std::ops::{Index, IndexMut}; -use hashbrown::HashSet; +use foldhash::HashSet; use helix_stdx::range::{is_exact_subset, is_subset}; use helix_stdx::Range; use ropey::Rope; @@ -35,7 +35,7 @@ impl ActiveSnippet { let snippet = Self { ranges: snippet.ranges, tabstops: snippet.tabstops, - active_tabstops: HashSet::new(), + active_tabstops: HashSet::default(), current_tabstop: TabstopIdx(0), }; (snippet.tabstops.len() != 1).then_some(snippet) diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index dfc323429cee..e232ee69bb86 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -1,323 +1,206 @@ pub mod config; -mod tree_cursor; - -use crate::{ - chars::char_is_line_ending, - regex::Regex, - transaction::{ChangeSet, Operation}, - RopeSlice, Tendril, -}; - -use ahash::RandomState; -use arc_swap::{ArcSwap, Guard}; -use bitflags::bitflags; -use config::{Configuration, FileType, LanguageConfiguration, LanguageServerConfiguration}; -use hashbrown::raw::RawTable; -use helix_stdx::rope::{self, RopeSliceExt}; -use slotmap::{DefaultKey as LayerId, HopSlotMap}; use std::{ borrow::Cow, - cell::RefCell, - collections::{HashMap, VecDeque}, - fmt::{self, Write}, - hash::{Hash, Hasher}, - mem::replace, + collections::HashMap, + fmt, iter, + ops::{self, RangeBounds}, path::Path, sync::Arc, + time::Duration, }; -use once_cell::sync::Lazy; +use anyhow::{Context, Result}; +use arc_swap::{ArcSwap, Guard}; +use config::{Configuration, FileType, LanguageConfiguration, LanguageServerConfiguration}; +use helix_loader::grammar::get_language; +use helix_stdx::rope::RopeSliceExt as _; +use once_cell::sync::OnceCell; +use ropey::RopeSlice; +use tree_house::{ + highlighter, + query_iter::QueryIter, + tree_sitter::{Grammar, InactiveQueryCursor, InputEdit, Node, Query, RopeInput, Tree}, + Error, InjectionLanguageMarker, LanguageConfig as SyntaxConfig, Layer, +}; -use helix_loader::grammar::{get_language, load_runtime_file}; +use crate::{indent::IndentQuery, tree_sitter, ChangeSet, Language}; -pub use tree_cursor::TreeCursor; +pub use tree_house::{ + highlighter::{Highlight, HighlightEvent}, + Error as HighlighterError, LanguageLoader, TreeCursor, TREE_SITTER_MATCH_LIMIT, +}; #[derive(Debug)] -pub struct TextObjectQuery { - pub query: Query, +pub struct LanguageData { + config: Arc, + syntax: OnceCell>, + indent_query: OnceCell>, + textobject_query: OnceCell>, } -#[derive(Debug)] -pub enum CapturedNode<'a> { - Single(Node<'a>), - /// Guaranteed to be not empty - Grouped(Vec>), -} - -impl CapturedNode<'_> { - pub fn start_byte(&self) -> usize { - match self { - Self::Single(n) => n.start_byte(), - Self::Grouped(ns) => ns[0].start_byte(), - } - } - - pub fn end_byte(&self) -> usize { - match self { - Self::Single(n) => n.end_byte(), - Self::Grouped(ns) => ns.last().unwrap().end_byte(), +impl LanguageData { + fn new(config: LanguageConfiguration) -> Self { + Self { + config: Arc::new(config), + syntax: OnceCell::new(), + indent_query: OnceCell::new(), + textobject_query: OnceCell::new(), } } - pub fn byte_range(&self) -> std::ops::Range { - self.start_byte()..self.end_byte() - } -} - -/// The maximum number of in-progress matches a TS cursor can consider at once. -/// This is set to a constant in order to avoid performance problems for medium to large files. Set with `set_match_limit`. -/// Using such a limit means that we lose valid captures, so there is fundamentally a tradeoff here. -/// -/// -/// Old tree sitter versions used a limit of 32 by default until this limit was removed in version `0.19.5` (must now be set manually). -/// However, this causes performance issues for medium to large files. -/// In helix, this problem caused treesitter motions to take multiple seconds to complete in medium-sized rust files (3k loc). -/// -/// -/// Neovim also encountered this problem and reintroduced this limit after it was removed upstream -/// (see and ). -/// The number used here is fundamentally a tradeoff between breaking some obscure edge cases and performance. -/// -/// -/// Neovim chose 64 for this value somewhat arbitrarily (). -/// 64 is too low for some languages though. In particular, it breaks some highlighting for record fields in Erlang record definitions. -/// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high. -const TREE_SITTER_MATCH_LIMIT: u32 = 256; - -impl TextObjectQuery { - /// Run the query on the given node and return sub nodes which match given - /// capture ("function.inside", "class.around", etc). - /// - /// Captures may contain multiple nodes by using quantifiers (+, *, etc), - /// and support for this is partial and could use improvement. - /// - /// ```query - /// (comment)+ @capture - /// - /// ; OR - /// ( - /// (comment)* - /// . - /// (function) - /// ) @capture - /// ``` - pub fn capture_nodes<'a>( - &'a self, - capture_name: &str, - node: Node<'a>, - slice: RopeSlice<'a>, - cursor: &'a mut QueryCursor, - ) -> Option>> { - self.capture_nodes_any(&[capture_name], node, slice, cursor) + pub fn config(&self) -> &Arc { + &self.config } - /// Find the first capture that exists out of all given `capture_names` - /// and return sub nodes that match this capture. - pub fn capture_nodes_any<'a>( - &'a self, - capture_names: &[&str], - node: Node<'a>, - slice: RopeSlice<'a>, - cursor: &'a mut QueryCursor, - ) -> Option>> { - let capture_idx = capture_names - .iter() - .find_map(|cap| self.query.capture_index_for_name(cap))?; + /// Loads the grammar and compiles the highlights, injections and locals for the language. + /// This function should only be used by this module or the xtask crate. + pub fn compile_syntax_config( + config: &LanguageConfiguration, + loader: &Loader, + ) -> Result> { + let name = &config.language_id; + let parser_name = config.grammar.as_deref().unwrap_or(name); + let Some(grammar) = get_language(parser_name)? else { + log::info!("Skipping syntax config for '{name}' because the parser's shared library does not exist"); + return Ok(None); + }; + let highlight_query_text = read_query(name, "highlights.scm"); + let injection_query_text = read_query(name, "injections.scm"); + let local_query_text = read_query(name, "locals.scm"); + let config = SyntaxConfig::new( + grammar, + &highlight_query_text, + &injection_query_text, + &local_query_text, + ) + .with_context(|| format!("Failed to compile highlights for '{name}'"))?; - cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT); + reconfigure_highlights(&config, &loader.scopes()); - let nodes = cursor - .captures(&self.query, node, RopeProvider(slice)) - .filter_map(move |(mat, _)| { - let nodes: Vec<_> = mat - .captures - .iter() - .filter_map(|cap| (cap.index == capture_idx).then_some(cap.node)) - .collect(); - - if nodes.len() > 1 { - Some(CapturedNode::Grouped(nodes)) - } else { - nodes.into_iter().map(CapturedNode::Single).next() - } - }); - - Some(nodes) + Ok(Some(config)) } -} - -pub fn read_query(language: &str, filename: &str) -> String { - static INHERITS_REGEX: Lazy = - Lazy::new(|| Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*").unwrap()); - - let query = load_runtime_file(language, filename).unwrap_or_default(); - - // replaces all "; inherits (,)*" with the queries of the given language(s) - INHERITS_REGEX - .replace_all(&query, |captures: ®ex::Captures| { - captures[1] - .split(',') - .fold(String::new(), |mut output, language| { - // `write!` to a String cannot fail. - write!(output, "\n{}\n", read_query(language, filename)).unwrap(); - output - }) - }) - .to_string() -} - -impl config::LanguageConfiguration { - fn initialize_highlight(&self, scopes: &[String]) -> Option> { - let highlights_query = read_query(&self.language_id, "highlights.scm"); - // always highlight syntax errors - // highlights_query += "\n(ERROR) @error"; - - let injections_query = read_query(&self.language_id, "injections.scm"); - let locals_query = read_query(&self.language_id, "locals.scm"); - if highlights_query.is_empty() { - None - } else { - let language = get_language(self.grammar.as_deref().unwrap_or(&self.language_id)) - .map_err(|err| { - log::error!( - "Failed to load tree-sitter parser for language {:?}: {:#}", - self.language_id, - err - ) - }) - .ok()?; - let config = HighlightConfiguration::new( - language, - &highlights_query, - &injections_query, - &locals_query, - ) - .map_err(|err| log::error!("Could not parse queries for language {:?}. Are your grammars out of sync? Try running 'hx --grammar fetch' and 'hx --grammar build'. This query could not be parsed: {:?}", self.language_id, err)) - .ok()?; - - config.configure(scopes); - Some(Arc::new(config)) - } + fn syntax_config(&self, loader: &Loader) -> Option<&SyntaxConfig> { + self.syntax + .get_or_init(|| { + Self::compile_syntax_config(&self.config, loader) + .map_err(|err| { + log::error!("{err:#}"); + }) + .ok() + .flatten() + }) + .as_ref() } - pub fn reconfigure(&self, scopes: &[String]) { - if let Some(Some(config)) = self.highlight_config.get() { - config.configure(scopes); + /// Compiles the indents.scm query for a language. + /// This function should only be used by this module or the xtask crate. + pub fn compile_indent_query( + grammar: Grammar, + config: &LanguageConfiguration, + ) -> Result> { + let name = &config.language_id; + let text = read_query(name, "indents.scm"); + if text.is_empty() { + return Ok(None); } + let indent_query = IndentQuery::new(grammar, &text) + .with_context(|| format!("Failed to compile indents.scm query for '{name}'"))?; + Ok(Some(indent_query)) } - pub fn highlight_config(&self, scopes: &[String]) -> Option> { - self.highlight_config - .get_or_init(|| self.initialize_highlight(scopes)) - .clone() - } - - pub fn is_highlight_initialized(&self) -> bool { - self.highlight_config.get().is_some() - } - - pub fn indent_query(&self) -> Option<&Query> { + fn indent_query(&self, loader: &Loader) -> Option<&IndentQuery> { self.indent_query - .get_or_init(|| self.load_query("indents.scm")) - .as_ref() - } - - pub fn textobject_query(&self) -> Option<&TextObjectQuery> { - self.textobject_query .get_or_init(|| { - self.load_query("textobjects.scm") - .map(|query| TextObjectQuery { query }) + let grammar = self.syntax_config(loader)?.grammar; + Self::compile_indent_query(grammar, &self.config) + .map_err(|err| { + log::error!("{err}"); + }) + .ok() + .flatten() }) .as_ref() } - pub fn scope(&self) -> &str { - &self.scope - } - - fn load_query(&self, kind: &str) -> Option { - let query_text = read_query(&self.language_id, kind); - if query_text.is_empty() { - return None; + /// Compiles the textobjects.scm query for a language. + /// This function should only be used by this module or the xtask crate. + pub fn compile_textobject_query( + grammar: Grammar, + config: &LanguageConfiguration, + ) -> Result> { + let name = &config.language_id; + let text = read_query(name, "textobjects.scm"); + if text.is_empty() { + return Ok(None); } - let lang = &self.highlight_config.get()?.as_ref()?.language; - Query::new(lang, &query_text) - .map_err(|e| { - log::error!( - "Failed to parse {} queries for {}: {}", - kind, - self.language_id, - e - ) - }) - .ok() + let query = Query::new(grammar, &text, |_, _| Ok(())) + .with_context(|| format!("Failed to compile textobjects.scm queries for '{name}'"))?; + Ok(Some(TextObjectQuery::new(query))) } -} -#[derive(Debug)] -struct FileTypeGlob { - glob: globset::Glob, - language_id: usize, -} - -impl FileTypeGlob { - fn new(glob: globset::Glob, language_id: usize) -> Self { - Self { glob, language_id } + fn textobject_query(&self, loader: &Loader) -> Option<&TextObjectQuery> { + self.textobject_query + .get_or_init(|| { + let grammar = self.syntax_config(loader)?.grammar; + Self::compile_textobject_query(grammar, &self.config) + .map_err(|err| { + log::error!("{err}"); + }) + .ok() + .flatten() + }) + .as_ref() } -} -#[derive(Debug)] -struct FileTypeGlobMatcher { - matcher: globset::GlobSet, - file_types: Vec, -} - -impl Default for FileTypeGlobMatcher { - fn default() -> Self { - Self { - matcher: globset::GlobSet::empty(), - file_types: Default::default(), + fn reconfigure(&self, scopes: &[String]) { + if let Some(Some(config)) = self.syntax.get() { + reconfigure_highlights(config, scopes); } } } -impl FileTypeGlobMatcher { - fn new(file_types: Vec) -> Result { - let mut builder = globset::GlobSetBuilder::new(); - for file_type in &file_types { - builder.add(file_type.glob.clone()); +fn reconfigure_highlights(config: &SyntaxConfig, recognized_names: &[String]) { + config.configure(move |capture_name| { + let capture_parts: Vec<_> = capture_name.split('.').collect(); + + let mut best_index = None; + let mut best_match_len = 0; + for (i, recognized_name) in recognized_names.iter().enumerate() { + let mut len = 0; + let mut matches = true; + for (i, part) in recognized_name.split('.').enumerate() { + match capture_parts.get(i) { + Some(capture_part) if *capture_part == part => len += 1, + _ => { + matches = false; + break; + } + } + } + if matches && len > best_match_len { + best_index = Some(i); + best_match_len = len; + } } - - Ok(Self { - matcher: builder.build()?, - file_types, - }) - } - - fn language_id_for_path(&self, path: &Path) -> Option<&usize> { - self.matcher - .matches(path) - .iter() - .filter_map(|idx| self.file_types.get(*idx)) - .max_by_key(|file_type| file_type.glob.glob().len()) - .map(|file_type| &file_type.language_id) - } + best_index.map(|idx| Highlight::new(idx as u32)) + }); } -// Expose loader as Lazy<> global since it's always static? +pub fn read_query(lang: &str, query_filename: &str) -> String { + tree_house::read_query(lang, |language| { + helix_loader::grammar::load_runtime_file(language, query_filename).unwrap_or_default() + }) +} #[derive(Debug, Default)] pub struct Loader { - // highlight_names ? - language_configs: Vec>, - language_config_ids_by_extension: HashMap, // Vec - language_config_ids_glob_matcher: FileTypeGlobMatcher, - language_config_ids_by_shebang: HashMap, - + languages: Vec, + languages_by_extension: HashMap, + languages_by_shebang: HashMap, + languages_glob_matcher: FileTypeGlobMatcher, language_server_configs: HashMap, - scopes: ArcSwap>, } @@ -325,96 +208,72 @@ pub type LoaderError = globset::Error; impl Loader { pub fn new(config: Configuration) -> Result { - let mut language_configs = Vec::new(); - let mut language_config_ids_by_extension = HashMap::new(); - let mut language_config_ids_by_shebang = HashMap::new(); + let mut languages = Vec::with_capacity(config.language.len()); + let mut languages_by_extension = HashMap::new(); + let mut languages_by_shebang = HashMap::new(); let mut file_type_globs = Vec::new(); - for config in config.language { - // get the next id - let language_id = language_configs.len(); + for mut config in config.language { + let language = Language(languages.len() as u32); + config.language = Some(language); for file_type in &config.file_types { - // entry().or_insert(Vec::new).push(language_id); match file_type { FileType::Extension(extension) => { - language_config_ids_by_extension.insert(extension.clone(), language_id); + languages_by_extension.insert(extension.clone(), language); } FileType::Glob(glob) => { - file_type_globs.push(FileTypeGlob::new(glob.to_owned(), language_id)); + file_type_globs.push(FileTypeGlob::new(glob.to_owned(), language)); } }; } for shebang in &config.shebangs { - language_config_ids_by_shebang.insert(shebang.clone(), language_id); + languages_by_shebang.insert(shebang.clone(), language); } - language_configs.push(Arc::new(config)); + languages.push(LanguageData::new(config)); } Ok(Self { - language_configs, - language_config_ids_by_extension, - language_config_ids_glob_matcher: FileTypeGlobMatcher::new(file_type_globs)?, - language_config_ids_by_shebang, + languages, + languages_by_extension, + languages_by_shebang, + languages_glob_matcher: FileTypeGlobMatcher::new(file_type_globs)?, language_server_configs: config.language_server, scopes: ArcSwap::from_pointee(Vec::new()), }) } - pub fn language_config_for_file_name(&self, path: &Path) -> Option> { - // Find all the language configurations that match this file name - // or a suffix of the file name. - let configuration_id = self - .language_config_ids_glob_matcher - .language_id_for_path(path) - .or_else(|| { - path.extension() - .and_then(|extension| extension.to_str()) - .and_then(|extension| self.language_config_ids_by_extension.get(extension)) - }); - - configuration_id.and_then(|&id| self.language_configs.get(id).cloned()) - - // TODO: content_regex handling conflict resolution + pub fn languages(&self) -> impl ExactSizeIterator { + self.languages + .iter() + .enumerate() + .map(|(idx, data)| (Language(idx as u32), data)) } - pub fn language_config_for_shebang( - &self, - source: RopeSlice, - ) -> Option> { - let line = Cow::from(source.line(0)); - static SHEBANG_REGEX: Lazy = - Lazy::new(|| Regex::new(&["^", SHEBANG].concat()).unwrap()); - let configuration_id = SHEBANG_REGEX - .captures(&line) - .and_then(|cap| self.language_config_ids_by_shebang.get(&cap[1])); + pub fn language_configs(&self) -> impl ExactSizeIterator { + self.languages.iter().map(|language| &*language.config) + } - configuration_id.and_then(|&id| self.language_configs.get(id).cloned()) + pub fn language(&self, lang: Language) -> &LanguageData { + &self.languages[lang.idx()] } - pub fn language_config_for_scope(&self, scope: &str) -> Option> { - self.language_configs - .iter() - .find(|config| config.scope == scope) - .cloned() + pub fn language_for_name(&self, name: impl PartialEq) -> Option { + self.languages.iter().enumerate().find_map(|(idx, config)| { + (name == config.config.language_id).then_some(Language(idx as u32)) + }) } - pub fn language_config_for_language_id( - &self, - id: impl PartialEq, - ) -> Option> { - self.language_configs - .iter() - .find(|config| id.eq(&config.language_id)) - .cloned() + pub fn language_for_scope(&self, scope: &str) -> Option { + self.languages.iter().enumerate().find_map(|(idx, config)| { + (scope == config.config.scope).then_some(Language(idx as u32)) + }) } - /// Unlike `language_config_for_language_id`, which only returns Some for an exact id, this - /// function will perform a regex match on the given string to find the closest language match. - pub fn language_config_for_name(&self, slice: RopeSlice) -> Option> { + pub fn language_for_match(&self, text: RopeSlice) -> Option { // PERF: If the name matches up with the id, then this saves the need to do expensive regex. - let shortcircuit = self.language_config_for_language_id(slice); + let shortcircuit = self.language_for_name(text); if shortcircuit.is_some() { return shortcircuit; } @@ -423,1691 +282,588 @@ impl Loader { let mut best_match_length = 0; let mut best_match_position = None; - for (i, configuration) in self.language_configs.iter().enumerate() { - if let Some(injection_regex) = &configuration.injection_regex { - if let Some(mat) = injection_regex.find(slice.regex_input()) { + for (idx, data) in self.languages.iter().enumerate() { + if let Some(injection_regex) = &data.config.injection_regex { + if let Some(mat) = injection_regex.find(text.regex_input()) { let length = mat.end() - mat.start(); if length > best_match_length { - best_match_position = Some(i); + best_match_position = Some(idx); best_match_length = length; } } } } - best_match_position.map(|i| self.language_configs[i].clone()) + best_match_position.map(|i| Language(i as u32)) } - pub fn language_configuration_for_injection_string( - &self, - capture: &InjectionLanguageMarker, - ) -> Option> { - match capture { - InjectionLanguageMarker::LanguageId(id) => self.language_config_for_language_id(*id), - InjectionLanguageMarker::Name(name) => self.language_config_for_name(*name), - InjectionLanguageMarker::Filename(file) => { - let path_str: Cow = (*file).into(); - self.language_config_for_file_name(Path::new(path_str.as_ref())) - } - InjectionLanguageMarker::Shebang(shebang) => { - let shebang_str: Cow = (*shebang).into(); - self.language_config_ids_by_shebang - .get(shebang_str.as_ref()) - .and_then(|&id| self.language_configs.get(id).cloned()) - } - } + pub fn language_for_filename(&self, path: &Path) -> Option { + // Find all the language configurations that match this file name + // or a suffix of the file name. + + // TODO: content_regex handling conflict resolution + self.languages_glob_matcher + .language_for_path(path) + .or_else(|| { + path.extension() + .and_then(|extension| extension.to_str()) + .and_then(|extension| self.languages_by_extension.get(extension).copied()) + }) } - pub fn language_configs(&self) -> impl Iterator> { - self.language_configs.iter() + pub fn language_for_shebang(&self, text: RopeSlice) -> Option { + let shebang: Cow = text.into(); + self.languages_by_shebang.get(shebang.as_ref()).copied() } - pub fn language_server_configs(&self) -> &HashMap { - &self.language_server_configs + pub fn indent_query(&self, lang: Language) -> Option<&IndentQuery> { + self.language(lang).indent_query(self) } - pub fn set_scopes(&self, scopes: Vec) { - self.scopes.store(Arc::new(scopes)); + pub fn textobject_query(&self, lang: Language) -> Option<&TextObjectQuery> { + self.language(lang).textobject_query(self) + } - // Reconfigure existing grammars - for config in self - .language_configs - .iter() - .filter(|cfg| cfg.is_highlight_initialized()) - { - config.reconfigure(&self.scopes()); - } + pub fn language_server_configs(&self) -> &HashMap { + &self.language_server_configs } pub fn scopes(&self) -> Guard>> { self.scopes.load() } -} - -pub struct TsParser { - parser: tree_sitter::Parser, - pub cursors: Vec, -} - -// could also just use a pool, or a single instance? -thread_local! { - pub static PARSER: RefCell = RefCell::new(TsParser { - parser: Parser::new(), - cursors: Vec::new(), - }) -} - -#[derive(Debug)] -pub struct Syntax { - layers: HopSlotMap, - root: LayerId, - loader: Arc>, -} - -fn byte_range_to_str(range: std::ops::Range, source: RopeSlice) -> Cow { - Cow::from(source.byte_slice(range)) -} - -impl Syntax { - pub fn new( - source: RopeSlice, - config: Arc, - loader: Arc>, - ) -> Option { - let root_layer = LanguageLayer { - tree: None, - config, - depth: 0, - flags: LayerUpdateFlags::empty(), - ranges: vec![Range { - start_byte: 0, - end_byte: usize::MAX, - start_point: Point::new(0, 0), - end_point: Point::new(usize::MAX, usize::MAX), - }], - parent: None, - }; - - // track scope_descriptor: a Vec of scopes for item in tree - - let mut layers = HopSlotMap::default(); - let root = layers.insert(root_layer); - - let mut syntax = Self { - root, - layers, - loader, - }; - - let res = syntax.update(source, source, &ChangeSet::new(source)); - - if res.is_err() { - log::error!("TS parser failed, disabling TS for the current buffer: {res:?}"); - return None; - } - Some(syntax) - } - - pub fn update( - &mut self, - old_source: RopeSlice, - source: RopeSlice, - changeset: &ChangeSet, - ) -> Result<(), Error> { - let mut queue = VecDeque::new(); - queue.push_back(self.root); - - let loader = self.loader.load(); - let scopes = loader.scopes.load(); - let injection_callback = |language: &InjectionLanguageMarker| { - loader - .language_configuration_for_injection_string(language) - .and_then(|language_config| language_config.highlight_config(&scopes)) - }; - - // Convert the changeset into tree sitter edits. - let edits = generate_edits(old_source, changeset); - - // This table allows inverse indexing of `layers`. - // That is by hashing a `Layer` you can find - // the `LayerId` of an existing equivalent `Layer` in `layers`. - // - // It is used to determine if a new layer exists for an injection - // or if an existing layer needs to be updated. - let mut layers_table = RawTable::with_capacity(self.layers.len()); - let layers_hasher = RandomState::new(); - // Use the edits to update all layers markers - fn point_add(a: Point, b: Point) -> Point { - if b.row > 0 { - Point::new(a.row.saturating_add(b.row), b.column) - } else { - Point::new(0, a.column.saturating_add(b.column)) - } - } - fn point_sub(a: Point, b: Point) -> Point { - if a.row > b.row { - Point::new(a.row.saturating_sub(b.row), a.column) - } else { - Point::new(0, a.column.saturating_sub(b.column)) - } - } - for (layer_id, layer) in self.layers.iter_mut() { - // The root layer always covers the whole range (0..usize::MAX) - if layer.depth == 0 { - layer.flags = LayerUpdateFlags::MODIFIED; - continue; - } - - if !edits.is_empty() { - for range in &mut layer.ranges { - // Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720 - for edit in edits.iter().rev() { - let is_pure_insertion = edit.old_end_byte == edit.start_byte; - - // if edit is after range, skip - if edit.start_byte > range.end_byte { - // TODO: || (is_noop && edit.start_byte == range.end_byte) - continue; - } - - // if edit is before range, shift entire range by len - if edit.old_end_byte < range.start_byte { - range.start_byte = - edit.new_end_byte + (range.start_byte - edit.old_end_byte); - range.start_point = point_add( - edit.new_end_position, - point_sub(range.start_point, edit.old_end_position), - ); - - range.end_byte = edit - .new_end_byte - .saturating_add(range.end_byte - edit.old_end_byte); - range.end_point = point_add( - edit.new_end_position, - point_sub(range.end_point, edit.old_end_position), - ); - - layer.flags |= LayerUpdateFlags::MOVED; - } - // if the edit starts in the space before and extends into the range - else if edit.start_byte < range.start_byte { - range.start_byte = edit.new_end_byte; - range.start_point = edit.new_end_position; - - range.end_byte = range - .end_byte - .saturating_sub(edit.old_end_byte) - .saturating_add(edit.new_end_byte); - range.end_point = point_add( - edit.new_end_position, - point_sub(range.end_point, edit.old_end_position), - ); - layer.flags = LayerUpdateFlags::MODIFIED; - } - // If the edit is an insertion at the start of the tree, shift - else if edit.start_byte == range.start_byte && is_pure_insertion { - range.start_byte = edit.new_end_byte; - range.start_point = edit.new_end_position; - layer.flags |= LayerUpdateFlags::MOVED; - } else { - range.end_byte = range - .end_byte - .saturating_sub(edit.old_end_byte) - .saturating_add(edit.new_end_byte); - range.end_point = point_add( - edit.new_end_position, - point_sub(range.end_point, edit.old_end_position), - ); - layer.flags = LayerUpdateFlags::MODIFIED; - } - } - } - } + pub fn set_scopes(&self, scopes: Vec) { + self.scopes.store(Arc::new(scopes)); - let hash = layers_hasher.hash_one(layer); - // Safety: insert_no_grow is unsafe because it assumes that the table - // has enough capacity to hold additional elements. - // This is always the case as we reserved enough capacity above. - unsafe { layers_table.insert_no_grow(hash, layer_id) }; + // Reconfigure existing grammars + for data in &self.languages { + data.reconfigure(&self.scopes()); } - - PARSER.with(|ts_parser| { - let ts_parser = &mut ts_parser.borrow_mut(); - ts_parser.parser.set_timeout_micros(1000 * 500); // half a second is pretty generours - let mut cursor = ts_parser.cursors.pop().unwrap_or_default(); - // TODO: might need to set cursor range - cursor.set_byte_range(0..usize::MAX); - cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT); - - let source_slice = source.slice(..); - - while let Some(layer_id) = queue.pop_front() { - let layer = &mut self.layers[layer_id]; - - // Mark the layer as touched - layer.flags |= LayerUpdateFlags::TOUCHED; - - // If a tree already exists, notify it of changes. - if let Some(tree) = &mut layer.tree { - if layer - .flags - .intersects(LayerUpdateFlags::MODIFIED | LayerUpdateFlags::MOVED) - { - for edit in edits.iter().rev() { - // Apply the edits in reverse. - // If we applied them in order then edit 1 would disrupt the positioning of edit 2. - tree.edit(edit); - } - } - - if layer.flags.contains(LayerUpdateFlags::MODIFIED) { - // Re-parse the tree. - layer.parse(&mut ts_parser.parser, source)?; - } - } else { - // always parse if this layer has never been parsed before - layer.parse(&mut ts_parser.parser, source)?; - } - - // Switch to an immutable borrow. - let layer = &self.layers[layer_id]; - - // Process injections. - let matches = cursor.matches( - &layer.config.injections_query, - layer.tree().root_node(), - RopeProvider(source_slice), - ); - let mut combined_injections = vec![ - (None, Vec::new(), IncludedChildren::default()); - layer.config.combined_injections_patterns.len() - ]; - let mut injections = Vec::new(); - let mut last_injection_end = 0; - for mat in matches { - let (injection_capture, content_node, included_children) = layer - .config - .injection_for_match(&layer.config.injections_query, &mat, source_slice); - - // in case this is a combined injection save it for more processing later - if let Some(combined_injection_idx) = layer - .config - .combined_injections_patterns - .iter() - .position(|&pattern| pattern == mat.pattern_index) - { - let entry = &mut combined_injections[combined_injection_idx]; - if injection_capture.is_some() { - entry.0 = injection_capture; - } - if let Some(content_node) = content_node { - if content_node.start_byte() >= last_injection_end { - entry.1.push(content_node); - last_injection_end = content_node.end_byte(); - } - } - entry.2 = included_children; - continue; - } - - // Explicitly remove this match so that none of its other captures will remain - // in the stream of captures. - mat.remove(); - - // If a language is found with the given name, then add a new language layer - // to the highlighted document. - if let (Some(injection_capture), Some(content_node)) = - (injection_capture, content_node) - { - if let Some(config) = (injection_callback)(&injection_capture) { - let ranges = - intersect_ranges(&layer.ranges, &[content_node], included_children); - - if !ranges.is_empty() { - if content_node.start_byte() < last_injection_end { - continue; - } - last_injection_end = content_node.end_byte(); - injections.push((config, ranges)); - } - } - } - } - - for (lang_name, content_nodes, included_children) in combined_injections { - if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) { - if let Some(config) = (injection_callback)(&lang_name) { - let ranges = - intersect_ranges(&layer.ranges, &content_nodes, included_children); - if !ranges.is_empty() { - injections.push((config, ranges)); - } - } - } - } - - let depth = layer.depth + 1; - // TODO: can't inline this since matches borrows self.layers - for (config, ranges) in injections { - let parent = Some(layer_id); - let new_layer = LanguageLayer { - tree: None, - config, - depth, - ranges, - flags: LayerUpdateFlags::empty(), - parent: None, - }; - - // Find an identical existing layer - let layer = layers_table - .get(layers_hasher.hash_one(&new_layer), |&it| { - self.layers[it] == new_layer - }) - .copied(); - - // ...or insert a new one. - let layer_id = layer.unwrap_or_else(|| self.layers.insert(new_layer)); - self.layers[layer_id].parent = parent; - - queue.push_back(layer_id); - } - - // TODO: pre-process local scopes at this time, rather than highlight? - // would solve problems with locals not working across boundaries - } - - // Return the cursor back in the pool. - ts_parser.cursors.push(cursor); - - // Reset all `LayerUpdateFlags` and remove all untouched layers - self.layers.retain(|_, layer| { - replace(&mut layer.flags, LayerUpdateFlags::empty()) - .contains(LayerUpdateFlags::TOUCHED) - }); - - Ok(()) - }) - } - - pub fn tree(&self) -> &Tree { - self.layers[self.root].tree() - } - - /// Iterate over the highlighted regions for a given slice of source code. - pub fn highlight_iter<'a>( - &'a self, - source: RopeSlice<'a>, - range: Option>, - cancellation_flag: Option<&'a AtomicUsize>, - ) -> impl Iterator> + 'a { - let mut layers = self - .layers - .iter() - .filter_map(|(_, layer)| { - // TODO: if range doesn't overlap layer range, skip it - - // Reuse a cursor from the pool if available. - let mut cursor = PARSER.with(|ts_parser| { - let highlighter = &mut ts_parser.borrow_mut(); - highlighter.cursors.pop().unwrap_or_default() - }); - - // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which - // prevents them from being moved. But both of these values are really just - // pointers, so it's actually ok to move them. - let cursor_ref = unsafe { - mem::transmute::<&mut tree_sitter::QueryCursor, &mut tree_sitter::QueryCursor>( - &mut cursor, - ) - }; - - // if reusing cursors & no range this resets to whole range - cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX)); - cursor_ref.set_match_limit(TREE_SITTER_MATCH_LIMIT); - - let mut captures = cursor_ref - .captures( - &layer.config.query, - layer.tree().root_node(), - RopeProvider(source), - ) - .peekable(); - - // If there's no captures, skip the layer - captures.peek()?; - - Some(HighlightIterLayer { - highlight_end_stack: Vec::new(), - scope_stack: vec![LocalScope { - inherits: false, - range: 0..usize::MAX, - local_defs: Vec::new(), - }], - cursor, - _tree: None, - captures: RefCell::new(captures), - config: layer.config.as_ref(), // TODO: just reuse `layer` - depth: layer.depth, // TODO: just reuse `layer` - }) - }) - .collect::>(); - - layers.sort_unstable_by_key(|layer| layer.sort_key()); - - let mut result = HighlightIter { - source, - byte_offset: range.map_or(0, |r| r.start), - cancellation_flag, - iter_count: 0, - layers, - next_event: None, - last_highlight_range: None, - }; - result.sort_layers(); - result } +} - pub fn tree_for_byte_range(&self, start: usize, end: usize) -> &Tree { - let mut container_id = self.root; - - for (layer_id, layer) in self.layers.iter() { - if layer.depth > self.layers[container_id].depth - && layer.contains_byte_range(start, end) - { - container_id = layer_id; +impl LanguageLoader for Loader { + fn language_for_marker(&self, marker: InjectionLanguageMarker) -> Option { + match marker { + InjectionLanguageMarker::Name(name) => self.language_for_name(name), + InjectionLanguageMarker::Match(text) => self.language_for_match(text), + InjectionLanguageMarker::Filename(text) => { + let path: Cow = text.into(); + self.language_for_filename(Path::new(path.as_ref())) } + InjectionLanguageMarker::Shebang(text) => self.language_for_shebang(text), } - - self.layers[container_id].tree() - } - - pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option> { - self.tree_for_byte_range(start, end) - .root_node() - .named_descendant_for_byte_range(start, end) - } - - pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option> { - self.tree_for_byte_range(start, end) - .root_node() - .descendant_for_byte_range(start, end) - } - - pub fn walk(&self) -> TreeCursor<'_> { - // data structure to find the smallest range that contains a point - // when some of the ranges in the structure can overlap. - TreeCursor::new(&self.layers, self.root) } - // Commenting - // comment_strings_for_pos - // is_commented - - // Indentation - // suggested_indent_for_line_at_buffer_row - // suggested_indent_for_buffer_row - // indent_level_for_line - - // TODO: Folding -} - -bitflags! { - /// Flags that track the status of a layer - /// in the `Sytaxn::update` function - #[derive(Debug)] - struct LayerUpdateFlags : u32{ - const MODIFIED = 0b001; - const MOVED = 0b010; - const TOUCHED = 0b100; + fn get_config(&self, lang: Language) -> Option<&SyntaxConfig> { + self.languages[lang.idx()].syntax_config(self) } } #[derive(Debug)] -pub struct LanguageLayer { - // mode - // grammar - pub config: Arc, - pub(crate) tree: Option, - pub ranges: Vec, - pub depth: u32, - flags: LayerUpdateFlags, - parent: Option, +struct FileTypeGlob { + glob: globset::Glob, + language: Language, } -/// This PartialEq implementation only checks if that -/// two layers are theoretically identical (meaning they highlight the same text range with the same language). -/// It does not check whether the layers have the same internal treesitter -/// state. -impl PartialEq for LanguageLayer { - fn eq(&self, other: &Self) -> bool { - self.depth == other.depth - && self.config.language == other.config.language - && self.ranges == other.ranges +impl FileTypeGlob { + pub fn new(glob: globset::Glob, language: Language) -> Self { + Self { glob, language } } } -/// Hash implementation belongs to PartialEq implementation above. -/// See its documentation for details. -impl Hash for LanguageLayer { - fn hash(&self, state: &mut H) { - self.depth.hash(state); - self.config.language.hash(state); - self.ranges.hash(state); - } +#[derive(Debug)] +struct FileTypeGlobMatcher { + matcher: globset::GlobSet, + file_types: Vec, } -impl LanguageLayer { - pub fn tree(&self) -> &Tree { - // TODO: no unwrap - self.tree.as_ref().unwrap() - } - - fn parse(&mut self, parser: &mut Parser, source: RopeSlice) -> Result<(), Error> { - parser - .set_included_ranges(&self.ranges) - .map_err(|_| Error::InvalidRanges)?; - - parser - .set_language(&self.config.language) - .map_err(|_| Error::InvalidLanguage)?; - - // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) }; - let tree = parser - .parse_with( - &mut |byte, _| { - if byte <= source.len_bytes() { - let (chunk, start_byte, _, _) = source.chunk_at_byte(byte); - &chunk.as_bytes()[byte - start_byte..] - } else { - // out of range - &[] - } - }, - self.tree.as_ref(), - ) - .ok_or(Error::Cancelled)?; - // unsafe { ts_parser.parser.set_cancellation_flag(None) }; - self.tree = Some(tree); - Ok(()) - } - - /// Whether the layer contains the given byte range. - /// - /// If the layer has multiple ranges (i.e. combined injections), the - /// given range is considered contained if it is within the start and - /// end bytes of the first and last ranges **and** if the given range - /// starts or ends within any of the layer's ranges. - fn contains_byte_range(&self, start: usize, end: usize) -> bool { - let layer_start = self - .ranges - .first() - .expect("ranges should not be empty") - .start_byte; - let layer_end = self - .ranges - .last() - .expect("ranges should not be empty") - .end_byte; - - layer_start <= start - && layer_end >= end - && self.ranges.iter().any(|range| { - let byte_range = range.start_byte..range.end_byte; - byte_range.contains(&start) || byte_range.contains(&end) - }) +impl Default for FileTypeGlobMatcher { + fn default() -> Self { + Self { + matcher: globset::GlobSet::empty(), + file_types: Default::default(), + } } } -pub(crate) fn generate_edits( - old_text: RopeSlice, - changeset: &ChangeSet, -) -> Vec { - use Operation::*; - let mut old_pos = 0; - - let mut edits = Vec::new(); - - if changeset.changes.is_empty() { - return edits; - } - - let mut iter = changeset.changes.iter().peekable(); - - // TODO; this is a lot easier with Change instead of Operation. - - fn point_at_pos(text: RopeSlice, pos: usize) -> (usize, Point) { - let byte = text.char_to_byte(pos); // <- attempted to index past end - let line = text.char_to_line(pos); - let line_start_byte = text.line_to_byte(line); - let col = byte - line_start_byte; - - (byte, Point::new(line, col)) - } - - fn traverse(point: Point, text: &Tendril) -> Point { - let Point { - mut row, - mut column, - } = point; - - // TODO: there should be a better way here. - let mut chars = text.chars().peekable(); - while let Some(ch) = chars.next() { - if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) { - row += 1; - column = 0; - } else { - column += 1; - } +impl FileTypeGlobMatcher { + fn new(file_types: Vec) -> Result { + let mut builder = globset::GlobSetBuilder::new(); + for file_type in &file_types { + builder.add(file_type.glob.clone()); } - Point { row, column } - } - while let Some(change) = iter.next() { - let len = match change { - Delete(i) | Retain(i) => *i, - Insert(_) => 0, - }; - let mut old_end = old_pos + len; - - match change { - Retain(_) => {} - Delete(_) => { - let (start_byte, start_position) = point_at_pos(old_text, old_pos); - let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end); - - // deletion - edits.push(tree_sitter::InputEdit { - start_byte, // old_pos to byte - old_end_byte, // old_end to byte - new_end_byte: start_byte, // old_pos to byte - start_position, // old pos to coords - old_end_position, // old_end to coords - new_end_position: start_position, // old pos to coords - }); - } - Insert(s) => { - let (start_byte, start_position) = point_at_pos(old_text, old_pos); - - // a subsequent delete means a replace, consume it - if let Some(Delete(len)) = iter.peek() { - old_end = old_pos + len; - let (old_end_byte, old_end_position) = point_at_pos(old_text, old_end); - - iter.next(); - - // replacement - edits.push(tree_sitter::InputEdit { - start_byte, // old_pos to byte - old_end_byte, // old_end to byte - new_end_byte: start_byte + s.len(), // old_pos to byte + s.len() - start_position, // old pos to coords - old_end_position, // old_end to coords - new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) - }); - } else { - // insert - edits.push(tree_sitter::InputEdit { - start_byte, // old_pos to byte - old_end_byte: start_byte, // same - new_end_byte: start_byte + s.len(), // old_pos + s.len() - start_position, // old pos to coords - old_end_position: start_position, // same - new_end_position: traverse(start_position, s), // old pos + chars, newlines matter too (iter over) - }); - } - } - } - old_pos = old_end; + Ok(Self { + matcher: builder.build()?, + file_types, + }) } - edits -} - -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::{iter, mem, ops, str}; -use tree_sitter::{ - Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, - QueryMatch, Range, TextProvider, Tree, -}; - -const CANCELLATION_CHECK_INTERVAL: usize = 100; - -/// Indicates which highlight should be applied to a region of source code. -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub struct Highlight(pub usize); - -/// Represents the reason why syntax highlighting failed. -#[derive(Debug, PartialEq, Eq)] -pub enum Error { - Cancelled, - InvalidLanguage, - InvalidRanges, - Unknown, -} - -/// Represents a single step in rendering a syntax-highlighted document. -#[derive(Copy, Clone, Debug)] -pub enum HighlightEvent { - Source { start: usize, end: usize }, - HighlightStart(Highlight), - HighlightEnd, -} - -/// Contains the data needed to highlight code written in a particular language. -/// -/// This struct is immutable and can be shared between threads. -#[derive(Debug)] -pub struct HighlightConfiguration { - pub language: Grammar, - pub query: Query, - injections_query: Query, - combined_injections_patterns: Vec, - highlights_pattern_index: usize, - highlight_indices: ArcSwap>>, - non_local_variable_patterns: Vec, - injection_content_capture_index: Option, - injection_language_capture_index: Option, - injection_filename_capture_index: Option, - injection_shebang_capture_index: Option, - local_scope_capture_index: Option, - local_def_capture_index: Option, - local_def_value_capture_index: Option, - local_ref_capture_index: Option, -} -#[derive(Debug)] -struct LocalDef<'a> { - name: Cow<'a, str>, - value_range: ops::Range, - highlight: Option, + fn language_for_path(&self, path: &Path) -> Option { + self.matcher + .matches(path) + .iter() + .filter_map(|idx| self.file_types.get(*idx)) + .max_by_key(|file_type| file_type.glob.glob().len()) + .map(|file_type| file_type.language) + } } #[derive(Debug)] -struct LocalScope<'a> { - inherits: bool, - range: ops::Range, - local_defs: Vec>, +pub struct Syntax { + inner: tree_house::Syntax, } -#[derive(Debug)] -struct HighlightIter<'a> { - source: RopeSlice<'a>, - byte_offset: usize, - cancellation_flag: Option<&'a AtomicUsize>, - layers: Vec>, - iter_count: usize, - next_event: Option, - last_highlight_range: Option<(usize, usize, u32)>, -} +const PARSE_TIMEOUT: Duration = Duration::from_millis(500); // half a second is pretty generous -// Adapter to convert rope chunks to bytes -pub struct ChunksBytes<'a> { - chunks: ropey::iter::Chunks<'a>, -} -impl<'a> Iterator for ChunksBytes<'a> { - type Item = &'a [u8]; - fn next(&mut self) -> Option { - self.chunks.next().map(str::as_bytes) +impl Syntax { + pub fn new(source: RopeSlice, language: Language, loader: &Loader) -> Result { + let inner = tree_house::Syntax::new(source, language, PARSE_TIMEOUT, loader)?; + Ok(Self { inner }) } -} - -pub struct RopeProvider<'a>(pub RopeSlice<'a>); -impl<'a> TextProvider<&'a [u8]> for RopeProvider<'a> { - type I = ChunksBytes<'a>; - fn text(&mut self, node: Node) -> Self::I { - let fragment = self.0.byte_slice(node.start_byte()..node.end_byte()); - ChunksBytes { - chunks: fragment.chunks(), + pub fn update( + &mut self, + old_source: RopeSlice, + source: RopeSlice, + changeset: &ChangeSet, + loader: &Loader, + ) -> Result<(), Error> { + let edits = generate_edits(old_source, changeset); + if edits.is_empty() { + Ok(()) + } else { + self.inner.update(source, PARSE_TIMEOUT, &edits, loader) } } -} - -struct HighlightIterLayer<'a> { - _tree: Option, - cursor: QueryCursor, - captures: RefCell, &'a [u8]>>>, - config: &'a HighlightConfiguration, - highlight_end_stack: Vec, - scope_stack: Vec>, - depth: u32, -} -impl fmt::Debug for HighlightIterLayer<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("HighlightIterLayer").finish() + pub fn layer(&self, layer: Layer) -> &tree_house::LayerData { + self.inner.layer(layer) } -} - -impl HighlightConfiguration { - /// Creates a `HighlightConfiguration` for a given `Grammar` and set of highlighting - /// queries. - /// - /// # Parameters - /// - /// * `language` - The Tree-sitter `Grammar` that should be used for parsing. - /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This - /// should be non-empty, otherwise no syntax highlights will be added. - /// * `injections_query` - A string containing tree patterns for injecting other languages - /// into the document. This can be empty if no injections are desired. - /// * `locals_query` - A string containing tree patterns for tracking local variable - /// definitions and references. This can be empty if local variable tracking is not needed. - /// - /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method. - pub fn new( - language: Grammar, - highlights_query: &str, - injection_query: &str, - locals_query: &str, - ) -> Result { - // Concatenate the query strings, keeping track of the start offset of each section. - let mut query_source = String::new(); - query_source.push_str(locals_query); - let highlights_query_offset = query_source.len(); - query_source.push_str(highlights_query); - - // Construct a single query by concatenating the three query strings, but record the - // range of pattern indices that belong to each individual string. - let query = Query::new(&language, &query_source)?; - let mut highlights_pattern_index = 0; - for i in 0..(query.pattern_count()) { - let pattern_offset = query.start_byte_for_pattern(i); - if pattern_offset < highlights_query_offset { - highlights_pattern_index += 1; - } - } - let injections_query = Query::new(&language, injection_query)?; - let combined_injections_patterns = (0..injections_query.pattern_count()) - .filter(|&i| { - injections_query - .property_settings(i) - .iter() - .any(|s| &*s.key == "injection.combined") - }) - .collect(); - - // Find all of the highlighting patterns that are disabled for nodes that - // have been identified as local variables. - let non_local_variable_patterns = (0..query.pattern_count()) - .map(|i| { - query - .property_predicates(i) - .iter() - .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local") - }) - .collect(); - - // Store the numeric ids for all of the special captures. - let mut injection_content_capture_index = None; - let mut injection_language_capture_index = None; - let mut injection_filename_capture_index = None; - let mut injection_shebang_capture_index = None; - let mut local_def_capture_index = None; - let mut local_def_value_capture_index = None; - let mut local_ref_capture_index = None; - let mut local_scope_capture_index = None; - for (i, name) in query.capture_names().iter().enumerate() { - let i = Some(i as u32); - match *name { - "local.definition" => local_def_capture_index = i, - "local.definition-value" => local_def_value_capture_index = i, - "local.reference" => local_ref_capture_index = i, - "local.scope" => local_scope_capture_index = i, - _ => {} - } - } - - for (i, name) in injections_query.capture_names().iter().enumerate() { - let i = Some(i as u32); - match *name { - "injection.content" => injection_content_capture_index = i, - "injection.language" => injection_language_capture_index = i, - "injection.filename" => injection_filename_capture_index = i, - "injection.shebang" => injection_shebang_capture_index = i, - _ => {} - } - } + pub fn root_layer(&self) -> Layer { + self.inner.root() + } - let highlight_indices = ArcSwap::from_pointee(vec![None; query.capture_names().len()]); - Ok(Self { - language, - query, - injections_query, - combined_injections_patterns, - highlights_pattern_index, - highlight_indices, - non_local_variable_patterns, - injection_content_capture_index, - injection_language_capture_index, - injection_filename_capture_index, - injection_shebang_capture_index, - local_scope_capture_index, - local_def_capture_index, - local_def_value_capture_index, - local_ref_capture_index, - }) + pub fn layer_for_byte_range(&self, start: u32, end: u32) -> Layer { + self.inner.layer_for_byte_range(start, end) } - /// Get a slice containing all of the highlight names used in the configuration. - pub fn names(&self) -> &[&str] { - self.query.capture_names() + pub fn root_language(&self) -> Language { + self.layer(self.root_layer()).language } - /// Set the list of recognized highlight names. - /// - /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated - /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of - /// these queries can choose to recognize highlights with different levels of specificity. - /// For example, the string `function.builtin` will match against `function.builtin.constructor` - /// but will not match `function.method.builtin` and `function.method`. - /// - /// When highlighting, results are returned as `Highlight` values, which contain the index - /// of the matched highlight this list of highlight names. - pub fn configure(&self, recognized_names: &[String]) { - let mut capture_parts = Vec::new(); - let indices: Vec<_> = self - .query - .capture_names() - .iter() - .map(move |capture_name| { - capture_parts.clear(); - capture_parts.extend(capture_name.split('.')); - - let mut best_index = None; - let mut best_match_len = 0; - for (i, recognized_name) in recognized_names.iter().enumerate() { - let mut len = 0; - let mut matches = true; - for (i, part) in recognized_name.split('.').enumerate() { - match capture_parts.get(i) { - Some(capture_part) if *capture_part == part => len += 1, - _ => { - matches = false; - break; - } - } - } - if matches && len > best_match_len { - best_index = Some(i); - best_match_len = len; - } - } - best_index.map(Highlight) - }) - .collect(); + pub fn tree(&self) -> &Tree { + self.inner.tree() + } - self.highlight_indices.store(Arc::new(indices)); + pub fn tree_for_byte_range(&self, start: u32, end: u32) -> &Tree { + self.inner.tree_for_byte_range(start, end) } - fn injection_pair<'a>( - &self, - query_match: &QueryMatch<'a, 'a>, - source: RopeSlice<'a>, - ) -> (Option>, Option>) { - let mut injection_capture = None; - let mut content_node = None; - - for capture in query_match.captures { - let index = Some(capture.index); - if index == self.injection_language_capture_index { - injection_capture = Some(InjectionLanguageMarker::Name( - source.byte_slice(capture.node.byte_range()), - )); - } else if index == self.injection_filename_capture_index { - injection_capture = Some(InjectionLanguageMarker::Filename( - source.byte_slice(capture.node.byte_range()), - )); - } else if index == self.injection_shebang_capture_index { - let node_slice = source.byte_slice(capture.node.byte_range()); - - // some languages allow space and newlines before the actual string content - // so a shebang could be on either the first or second line - let lines = if let Ok(end) = node_slice.try_line_to_byte(2) { - node_slice.byte_slice(..end) - } else { - node_slice - }; + pub fn named_descendant_for_byte_range(&self, start: u32, end: u32) -> Option { + self.inner.named_descendant_for_byte_range(start, end) + } - static SHEBANG_REGEX: Lazy = - Lazy::new(|| rope::Regex::new(SHEBANG).unwrap()); + pub fn descendant_for_byte_range(&self, start: u32, end: u32) -> Option { + self.inner.descendant_for_byte_range(start, end) + } - injection_capture = SHEBANG_REGEX - .captures_iter(lines.regex_input()) - .map(|cap| { - let cap = lines.byte_slice(cap.get_group(1).unwrap().range()); - InjectionLanguageMarker::Shebang(cap) - }) - .next() - } else if index == self.injection_content_capture_index { - content_node = Some(capture.node); - } - } - (injection_capture, content_node) + pub fn walk(&self) -> TreeCursor { + self.inner.walk() } - fn injection_for_match<'a>( - &self, - query: &'a Query, - query_match: &QueryMatch<'a, 'a>, + pub fn highlighter<'a>( + &'a self, source: RopeSlice<'a>, - ) -> ( - Option>, - Option>, - IncludedChildren, - ) { - let (mut injection_capture, content_node) = self.injection_pair(query_match, source); - - let mut included_children = IncludedChildren::default(); - for prop in query.property_settings(query_match.pattern_index) { - match prop.key.as_ref() { - // In addition to specifying the language name via the text of a - // captured node, it can also be hard-coded via a `#set!` predicate - // that sets the injection.language key. - "injection.language" if injection_capture.is_none() => { - injection_capture = prop - .value - .as_deref() - .map(InjectionLanguageMarker::LanguageId); - } - - // By default, injections do not include the *children* of an - // `injection.content` node - only the ranges that belong to the - // node itself. This can be changed using a `#set!` predicate that - // sets the `injection.include-children` key. - "injection.include-children" => included_children = IncludedChildren::All, - - // Some queries might only exclude named children but include unnamed - // children in their `injection.content` node. This can be enabled using - // a `#set!` predicate that sets the `injection.include-unnamed-children` key. - "injection.include-unnamed-children" => { - included_children = IncludedChildren::Unnamed - } - _ => {} - } - } - - (injection_capture, content_node, included_children) + loader: &'a Loader, + range: impl RangeBounds, + ) -> Highlighter<'a> { + Highlighter::new(&self.inner, source, loader, range) } -} -impl HighlightIterLayer<'_> { - // First, sort scope boundaries by their byte offset in the document. At a - // given position, emit scope endings before scope beginnings. Finally, emit - // scope boundaries from deeper layers first. - fn sort_key(&self) -> Option<(usize, bool, isize)> { - let depth = -(self.depth as isize); - let next_start = self - .captures - .borrow_mut() - .peek() - .map(|(m, i)| m.captures[*i].node.start_byte()); - let next_end = self.highlight_end_stack.last().cloned(); - match (next_start, next_end) { - (Some(start), Some(end)) => { - if start < end { - Some((start, true, depth)) - } else { - Some((end, false, depth)) - } - } - (Some(i), None) => Some((i, true, depth)), - (None, Some(j)) => Some((j, false, depth)), - _ => None, - } + pub fn query_iter<'a, QueryLoader, LayerState, Range>( + &'a self, + source: RopeSlice<'a>, + loader: QueryLoader, + range: Range, + ) -> QueryIter<'a, 'a, QueryLoader, LayerState> + where + QueryLoader: FnMut(Language) -> Option<&'a Query> + 'a, + LayerState: Default, + Range: RangeBounds, + { + QueryIter::new(&self.inner, source, loader, range) } } -#[derive(Clone)] -enum IncludedChildren { - None, - All, - Unnamed, -} +pub type Highlighter<'a> = highlighter::Highlighter<'a, 'a, Loader>; -impl Default for IncludedChildren { - fn default() -> Self { - Self::None +fn generate_edits(old_text: RopeSlice, changeset: &ChangeSet) -> Vec { + use crate::Operation::*; + use tree_sitter::Point; + + let mut old_pos = 0; + + let mut edits = Vec::new(); + + if changeset.changes.is_empty() { + return edits; } -} -// Compute the ranges that should be included when parsing an injection. -// This takes into account three things: -// * `parent_ranges` - The ranges must all fall within the *current* layer's ranges. -// * `nodes` - Every injection takes place within a set of nodes. The injection ranges -// are the ranges of those nodes. -// * `includes_children` - For some injections, the content nodes' children should be -// excluded from the nested document, so that only the content nodes' *own* content -// is reparsed. For other injections, the content nodes' entire ranges should be -// reparsed, including the ranges of their children. -fn intersect_ranges( - parent_ranges: &[Range], - nodes: &[Node], - included_children: IncludedChildren, -) -> Vec { - let mut cursor = nodes[0].walk(); - let mut result = Vec::new(); - let mut parent_range_iter = parent_ranges.iter(); - let mut parent_range = parent_range_iter - .next() - .expect("Layers should only be constructed with non-empty ranges vectors"); - for node in nodes.iter() { - let mut preceding_range = Range { - start_byte: 0, - start_point: Point::new(0, 0), - end_byte: node.start_byte(), - end_point: node.start_position(), - }; - let following_range = Range { - start_byte: node.end_byte(), - start_point: node.end_position(), - end_byte: usize::MAX, - end_point: Point::new(usize::MAX, usize::MAX), + let mut iter = changeset.changes.iter().peekable(); + + // TODO; this is a lot easier with Change instead of Operation. + while let Some(change) = iter.next() { + let len = match change { + Delete(i) | Retain(i) => *i, + Insert(_) => 0, }; + let mut old_end = old_pos + len; - for excluded_range in node - .children(&mut cursor) - .filter_map(|child| match included_children { - IncludedChildren::None => Some(child.range()), - IncludedChildren::All => None, - IncludedChildren::Unnamed => { - if child.is_named() { - Some(child.range()) - } else { - None - } - } - }) - .chain([following_range].iter().cloned()) - { - let mut range = Range { - start_byte: preceding_range.end_byte, - start_point: preceding_range.end_point, - end_byte: excluded_range.start_byte, - end_point: excluded_range.start_point, - }; - preceding_range = excluded_range; - - if range.end_byte < parent_range.start_byte { - continue; + match change { + Retain(_) => {} + Delete(_) => { + let start_byte = old_text.char_to_byte(old_pos) as u32; + let old_end_byte = old_text.char_to_byte(old_end) as u32; + + // deletion + edits.push(InputEdit { + start_byte, // old_pos to byte + old_end_byte, // old_end to byte + new_end_byte: start_byte, // old_pos to byte + start_point: Point::ZERO, + old_end_point: Point::ZERO, + new_end_point: Point::ZERO, + }); } + Insert(s) => { + let start_byte = old_text.char_to_byte(old_pos) as u32; - while parent_range.start_byte <= range.end_byte { - if parent_range.end_byte > range.start_byte { - if range.start_byte < parent_range.start_byte { - range.start_byte = parent_range.start_byte; - range.start_point = parent_range.start_point; - } + // a subsequent delete means a replace, consume it + if let Some(Delete(len)) = iter.peek() { + old_end = old_pos + len; + let old_end_byte = old_text.char_to_byte(old_end) as u32; - if parent_range.end_byte < range.end_byte { - if range.start_byte < parent_range.end_byte { - result.push(Range { - start_byte: range.start_byte, - start_point: range.start_point, - end_byte: parent_range.end_byte, - end_point: parent_range.end_point, - }); - } - range.start_byte = parent_range.end_byte; - range.start_point = parent_range.end_point; - } else { - if range.start_byte < range.end_byte { - result.push(range); - } - break; - } - } + iter.next(); - if let Some(next_range) = parent_range_iter.next() { - parent_range = next_range; + // replacement + edits.push(InputEdit { + start_byte, // old_pos to byte + old_end_byte, // old_end to byte + new_end_byte: start_byte + s.len() as u32, // old_pos to byte + s.len() + start_point: Point::ZERO, + old_end_point: Point::ZERO, + new_end_point: Point::ZERO, + }); } else { - return result; + // insert + edits.push(InputEdit { + start_byte, // old_pos to byte + old_end_byte: start_byte, // same + new_end_byte: start_byte + s.len() as u32, // old_pos + s.len() + start_point: Point::ZERO, + old_end_point: Point::ZERO, + new_end_point: Point::ZERO, + }); } } } + old_pos = old_end; } - result + edits } -impl HighlightIter<'_> { - fn emit_event( - &mut self, - offset: usize, - event: Option, - ) -> Option> { - let result; - if self.byte_offset < offset { - result = Some(Ok(HighlightEvent::Source { - start: self.byte_offset, - end: offset, - })); - self.byte_offset = offset; - self.next_event = event; - } else { - result = event.map(Ok); +/// A set of "overlay" highlights and ranges they apply to. +/// +/// As overlays, the styles for the given `Highlight`s are merged on top of the syntax highlights. +#[derive(Debug)] +pub enum OverlayHighlights { + /// All highlights use a single `Highlight`. + /// + /// Note that, currently, all ranges are assumed to be non-overlapping. This could change in + /// the future though. + Homogeneous { + highlight: Highlight, + ranges: Vec>, + }, + /// A collection of different highlights for given ranges. + /// + /// Note that the ranges **must be non-overlapping**. + Heterogenous { + highlights: Vec<(Highlight, ops::Range)>, + }, +} + +impl OverlayHighlights { + pub fn single(highlight: Highlight, range: ops::Range) -> Self { + Self::Homogeneous { + highlight, + ranges: vec![range], } - self.sort_layers(); - result - } - - fn sort_layers(&mut self) { - while !self.layers.is_empty() { - if let Some(sort_key) = self.layers[0].sort_key() { - let mut i = 0; - while i + 1 < self.layers.len() { - if let Some(next_offset) = self.layers[i + 1].sort_key() { - if next_offset < sort_key { - i += 1; - continue; - } - } else { - let layer = self.layers.remove(i + 1); - PARSER.with(|ts_parser| { - let highlighter = &mut ts_parser.borrow_mut(); - highlighter.cursors.push(layer.cursor); - }); - } - break; - } - if i > 0 { - self.layers[0..(i + 1)].rotate_left(1); - } - break; - } else { - let layer = self.layers.remove(0); - PARSER.with(|ts_parser| { - let highlighter = &mut ts_parser.borrow_mut(); - highlighter.cursors.push(layer.cursor); - }); - } + } + + fn is_empty(&self) -> bool { + match self { + Self::Homogeneous { ranges, .. } => ranges.is_empty(), + Self::Heterogenous { highlights } => highlights.is_empty(), } } } -impl Iterator for HighlightIter<'_> { - type Item = Result; +#[derive(Debug)] +struct Overlay { + highlights: OverlayHighlights, + /// The position of the highlighter into the Vec of ranges of the overlays. + /// + /// Used by the `OverlayHighlighter`. + idx: usize, + /// The currently active highlight (and the ending character index) for this overlay. + /// + /// Used by the `OverlayHighlighter`. + active_highlight: Option<(Highlight, usize)>, +} - fn next(&mut self) -> Option { - 'main: loop { - // If we've already determined the next highlight boundary, just return it. - if let Some(e) = self.next_event.take() { - return Some(Ok(e)); - } +impl Overlay { + fn new(highlights: OverlayHighlights) -> Option { + (!highlights.is_empty()).then_some(Self { + highlights, + idx: 0, + active_highlight: None, + }) + } - // Periodically check for cancellation, returning `Cancelled` error if the - // cancellation flag was flipped. - if let Some(cancellation_flag) = self.cancellation_flag { - self.iter_count += 1; - if self.iter_count >= CANCELLATION_CHECK_INTERVAL { - self.iter_count = 0; - if cancellation_flag.load(Ordering::Relaxed) != 0 { - return Some(Err(Error::Cancelled)); - } - } - } + fn current(&self) -> Option<(Highlight, ops::Range)> { + match &self.highlights { + OverlayHighlights::Homogeneous { highlight, ranges } => ranges + .get(self.idx) + .map(|range| (*highlight, range.clone())), + OverlayHighlights::Heterogenous { highlights } => highlights.get(self.idx).cloned(), + } + } - // If none of the layers have any more highlight boundaries, terminate. - if self.layers.is_empty() { - let len = self.source.len_bytes(); - return if self.byte_offset < len { - let result = Some(Ok(HighlightEvent::Source { - start: self.byte_offset, - end: len, - })); - self.byte_offset = len; - result - } else { - None - }; + fn start(&self) -> Option { + match &self.highlights { + OverlayHighlights::Homogeneous { ranges, .. } => { + ranges.get(self.idx).map(|range| range.start) } + OverlayHighlights::Heterogenous { highlights } => highlights + .get(self.idx) + .map(|(_highlight, range)| range.start), + } + } +} - // Get the next capture from whichever layer has the earliest highlight boundary. - let range; - let layer = &mut self.layers[0]; - let captures = layer.captures.get_mut(); - if let Some((next_match, capture_index)) = captures.peek() { - let next_capture = next_match.captures[*capture_index]; - range = next_capture.node.byte_range(); - - // If any previous highlight ends before this node starts, then before - // processing this capture, emit the source code up until the end of the - // previous highlight, and an end event for that highlight. - if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { - if end_byte <= range.start { - layer.highlight_end_stack.pop(); - return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); - } - } - } - // If there are no more captures, then emit any remaining highlight end events. - // And if there are none of those, then just advance to the end of the document. - else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { - layer.highlight_end_stack.pop(); - return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); - } else { - return self.emit_event(self.source.len_bytes(), None); - }; +/// A collection of highlights to apply when rendering which merge on top of syntax highlights. +#[derive(Debug)] +pub struct OverlayHighlighter { + overlays: Vec, + next_highlight_start: usize, + next_highlight_end: usize, +} - let (mut match_, capture_index) = captures.next().unwrap(); - let mut capture = match_.captures[capture_index]; +impl OverlayHighlighter { + pub fn new(overlays: impl IntoIterator) -> Self { + let overlays: Vec<_> = overlays.into_iter().filter_map(Overlay::new).collect(); + let next_highlight_start = overlays + .iter() + .filter_map(|overlay| overlay.start()) + .min() + .unwrap_or(usize::MAX); - // Remove from the local scope stack any local scopes that have already ended. - while range.start > layer.scope_stack.last().unwrap().range.end { - layer.scope_stack.pop(); - } + Self { + overlays, + next_highlight_start, + next_highlight_end: usize::MAX, + } + } - // If this capture is for tracking local variables, then process the - // local variable info. - let mut reference_highlight = None; - let mut definition_highlight = None; - while match_.pattern_index < layer.config.highlights_pattern_index { - // If the node represents a local scope, push a new local scope onto - // the scope stack. - if Some(capture.index) == layer.config.local_scope_capture_index { - definition_highlight = None; - let mut scope = LocalScope { - inherits: true, - range: range.clone(), - local_defs: Vec::new(), - }; - for prop in layer.config.query.property_settings(match_.pattern_index) { - if let "local.scope-inherits" = prop.key.as_ref() { - scope.inherits = - prop.value.as_ref().map_or(true, |r| r.as_ref() == "true"); - } - } - layer.scope_stack.push(scope); - } - // If the node represents a definition, add a new definition to the - // local scope at the top of the scope stack. - else if Some(capture.index) == layer.config.local_def_capture_index { - reference_highlight = None; - let scope = layer.scope_stack.last_mut().unwrap(); - - let mut value_range = 0..0; - for capture in match_.captures { - if Some(capture.index) == layer.config.local_def_value_capture_index { - value_range = capture.node.byte_range(); - } - } + /// The current position in the overlay highlights. + /// + /// This method is meant to be used when treating this type as a cursor over the overlay + /// highlights. + /// + /// `usize::MAX` is returned when there are no more overlay highlights. + pub fn next_event_offset(&self) -> usize { + self.next_highlight_start.min(self.next_highlight_end) + } - let name = byte_range_to_str(range.clone(), self.source); - scope.local_defs.push(LocalDef { - name, - value_range, - highlight: None, - }); - definition_highlight = scope.local_defs.last_mut().map(|s| &mut s.highlight); - } - // If the node represents a reference, then try to find the corresponding - // definition in the scope stack. - else if Some(capture.index) == layer.config.local_ref_capture_index - && definition_highlight.is_none() + pub fn advance(&mut self) -> (HighlightEvent, impl Iterator + '_) { + let mut refresh = false; + let prev_stack_size = self + .overlays + .iter() + .filter(|overlay| overlay.active_highlight.is_some()) + .count(); + let pos = self.next_event_offset(); + + if self.next_highlight_end == pos { + for overlay in self.overlays.iter_mut() { + if overlay + .active_highlight + .is_some_and(|(_highlight, end)| end == pos) { - definition_highlight = None; - let name = byte_range_to_str(range.clone(), self.source); - for scope in layer.scope_stack.iter().rev() { - if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| { - if def.name == name && range.start >= def.value_range.end { - Some(def.highlight) - } else { - None - } - }) { - reference_highlight = highlight; - break; - } - if !scope.inherits { - break; - } - } - } - - // Continue processing any additional matches for the same node. - if let Some((next_match, next_capture_index)) = captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - capture = next_capture; - match_ = captures.next().unwrap().0; - continue; - } - } - - self.sort_layers(); - continue 'main; - } - - // Otherwise, this capture must represent a highlight. - // If this exact range has already been highlighted by an earlier pattern, or by - // a different layer, then skip over this one. - if let Some((last_start, last_end, last_depth)) = self.last_highlight_range { - if range.start == last_start && range.end == last_end && layer.depth < last_depth { - self.sort_layers(); - continue 'main; + overlay.active_highlight.take(); } } - // If the current node was found to be a local variable, then skip over any - // highlighting patterns that are disabled for local variables. - if definition_highlight.is_some() || reference_highlight.is_some() { - while layer.config.non_local_variable_patterns[match_.pattern_index] { - match_.remove(); - if let Some((next_match, next_capture_index)) = captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - capture = next_capture; - match_ = captures.next().unwrap().0; - continue; - } - } - - self.sort_layers(); - continue 'main; - } - } + refresh = true; + } - // Use the last capture found for the current node, skipping over any - // highlight patterns that also match this node. Captures - // for a given node are ordered by pattern index, so these subsequent - // captures are guaranteed to be for highlighting, not injections or - // local variables. - while let Some((next_match, next_capture_index)) = captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - match_.remove(); - capture = next_capture; - match_ = captures.next().unwrap().0; - } else { - break; + while self.next_highlight_start == pos { + let mut activated_idx = usize::MAX; + for (idx, overlay) in self.overlays.iter_mut().enumerate() { + let Some((highlight, range)) = overlay.current() else { + continue; + }; + if range.start != self.next_highlight_start { + continue; } - } - - let current_highlight = layer.config.highlight_indices.load()[capture.index as usize]; - // If this node represents a local definition, then store the current - // highlight value on the local scope entry representing this node. - if let Some(definition_highlight) = definition_highlight { - *definition_highlight = current_highlight; - } + // If this overlay has a highlight at this start index, set its active highlight + // and increment the cursor position within the overlay. + overlay.active_highlight = Some((highlight, range.end)); + overlay.idx += 1; - // Emit a scope start event and push the node's end position to the stack. - if let Some(highlight) = reference_highlight.or(current_highlight) { - self.last_highlight_range = Some((range.start, range.end, layer.depth)); - layer.highlight_end_stack.push(range.end); - return self - .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight))); + activated_idx = activated_idx.min(idx); } - self.sort_layers(); + // If `self.next_highlight_start == pos` that means that some overlay was ready to + // emit a highlight, so `activated_idx` must have been set to an existing index. + assert!( + (0..self.overlays.len()).contains(&activated_idx), + "expected an overlay to highlight (at pos {pos}, there are {} overlays)", + self.overlays.len() + ); + + // If any overlays are active after the (lowest) one which was just activated, the + // highlights need to be refreshed. + refresh |= self.overlays[activated_idx..] + .iter() + .any(|overlay| overlay.active_highlight.is_some()); + + self.next_highlight_start = self + .overlays + .iter() + .filter_map(|overlay| overlay.start()) + .min() + .unwrap_or(usize::MAX); } - } -} - -#[derive(Debug, Clone)] -pub enum InjectionLanguageMarker<'a> { - /// The language is specified by `LanguageConfiguration`'s `language_id` field. - /// - /// This marker is used when a pattern sets the `injection.language` property, for example - /// `(#set! injection.language "rust")`. - LanguageId(&'a str), - /// The language is specified in the document and captured by `@injection.language`. - /// - /// This is used for markdown code fences for example. While the `LanguageId` variant can be - /// looked up by finding the language config that sets an `language_id`, this variant contains - /// text from the document being highlighted, so the text is checked against each language's - /// `injection_regex`. - Name(RopeSlice<'a>), - Filename(RopeSlice<'a>), - Shebang(RopeSlice<'a>), -} - -const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)"; -pub struct Merge { - iter: I, - spans: Box)>>, + self.next_highlight_end = self + .overlays + .iter() + .filter_map(|overlay| Some(overlay.active_highlight?.1)) + .min() + .unwrap_or(usize::MAX); - next_event: Option, - next_span: Option<(usize, std::ops::Range)>, + let (event, start) = if refresh { + (HighlightEvent::Refresh, 0) + } else { + (HighlightEvent::Push, prev_stack_size) + }; - queue: Vec, + ( + event, + self.overlays + .iter() + .flat_map(|overlay| overlay.active_highlight) + .map(|(highlight, _end)| highlight) + .skip(start), + ) + } } -/// Merge a list of spans into the highlight event stream. -pub fn merge>( - iter: I, - spans: Vec<(usize, std::ops::Range)>, -) -> Merge { - let spans = Box::new(spans.into_iter()); - let mut merge = Merge { - iter, - spans, - next_event: None, - next_span: None, - queue: Vec::new(), - }; - merge.next_event = merge.iter.next(); - merge.next_span = merge.spans.next(); - merge +#[derive(Debug)] +pub enum CapturedNode<'a> { + Single(Node<'a>), + /// Guaranteed to be not empty + Grouped(Vec>), } -impl> Iterator for Merge { - type Item = HighlightEvent; - fn next(&mut self) -> Option { - use HighlightEvent::*; - if let Some(event) = self.queue.pop() { - return Some(event); +impl CapturedNode<'_> { + pub fn start_byte(&self) -> usize { + match self { + Self::Single(n) => n.start_byte() as usize, + Self::Grouped(ns) => ns[0].start_byte() as usize, } + } - loop { - match (self.next_event, &self.next_span) { - // this happens when range is partially or fully offscreen - (Some(Source { start, .. }), Some((span, range))) if start > range.start => { - if start > range.end { - self.next_span = self.spans.next(); - } else { - self.next_span = Some((*span, start..range.end)); - }; - } - _ => break, - } + pub fn end_byte(&self) -> usize { + match self { + Self::Single(n) => n.end_byte() as usize, + Self::Grouped(ns) => ns.last().unwrap().end_byte() as usize, } + } - match (self.next_event, &self.next_span) { - (Some(HighlightStart(i)), _) => { - self.next_event = self.iter.next(); - Some(HighlightStart(i)) - } - (Some(HighlightEnd), _) => { - self.next_event = self.iter.next(); - Some(HighlightEnd) - } - (Some(Source { start, end }), Some((_, range))) if start < range.start => { - let intersect = range.start.min(end); - let event = Source { - start, - end: intersect, - }; - - if end == intersect { - // the event is complete - self.next_event = self.iter.next(); - } else { - // subslice the event - self.next_event = Some(Source { - start: intersect, - end, - }); - }; - - Some(event) - } - (Some(Source { start, end }), Some((span, range))) if start == range.start => { - let intersect = range.end.min(end); - let event = HighlightStart(Highlight(*span)); - - // enqueue in reverse order - self.queue.push(HighlightEnd); - self.queue.push(Source { - start, - end: intersect, - }); + pub fn byte_range(&self) -> ops::Range { + self.start_byte()..self.end_byte() + } +} - if end == intersect { - // the event is complete - self.next_event = self.iter.next(); - } else { - // subslice the event - self.next_event = Some(Source { - start: intersect, - end, - }); - }; +#[derive(Debug)] +pub struct TextObjectQuery { + query: Query, +} - if intersect == range.end { - self.next_span = self.spans.next(); - } else { - self.next_span = Some((*span, intersect..range.end)); - } +impl TextObjectQuery { + pub fn new(query: Query) -> Self { + Self { query } + } - Some(event) - } - (Some(event), None) => { - self.next_event = self.iter.next(); - Some(event) - } - // Can happen if cursor at EOF and/or diagnostic reaches past the end. - // We need to actually emit events for the cursor-at-EOF situation, - // even though the range is past the end of the text. This needs to be - // handled appropriately by the drawing code by not assuming that - // all `Source` events point to valid indices in the rope. - (None, Some((span, range))) => { - let event = HighlightStart(Highlight(*span)); - self.queue.push(HighlightEnd); - self.queue.push(Source { - start: range.start, - end: range.end, - }); - self.next_span = self.spans.next(); - Some(event) - } - (None, None) => None, - e => unreachable!("{:?}", e), - } + /// Run the query on the given node and return sub nodes which match given + /// capture ("function.inside", "class.around", etc). + /// + /// Captures may contain multiple nodes by using quantifiers (+, *, etc), + /// and support for this is partial and could use improvement. + /// + /// ```query + /// (comment)+ @capture + /// + /// ; OR + /// ( + /// (comment)* + /// . + /// (function) + /// ) @capture + /// ``` + pub fn capture_nodes<'a>( + &'a self, + capture_name: &str, + node: &Node<'a>, + slice: RopeSlice<'a>, + ) -> Option>> { + self.capture_nodes_any(&[capture_name], node, slice) } -} -fn node_is_visible(node: &Node) -> bool { - node.is_missing() || (node.is_named() && node.language().node_kind_is_visible(node.kind_id())) -} + /// Find the first capture that exists out of all given `capture_names` + /// and return sub nodes that match this capture. + pub fn capture_nodes_any<'a>( + &'a self, + capture_names: &[&str], + node: &Node<'a>, + slice: RopeSlice<'a>, + ) -> Option>> { + let capture = capture_names + .iter() + .find_map(|cap| self.query.get_capture(cap))?; -fn format_anonymous_node_kind(kind: &str) -> Cow { - if kind.contains('"') { - Cow::Owned(kind.replace('"', "\\\"")) - } else { - Cow::Borrowed(kind) + let mut cursor = InactiveQueryCursor::new(); + cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT); + let mut cursor = cursor.execute_query(&self.query, node, RopeInput::new(slice)); + let capture_node = iter::from_fn(move || { + let (mat, _) = cursor.next_matched_node()?; + Some(mat.nodes_for_capture(capture).cloned().collect()) + }) + .filter_map(move |nodes: Vec<_>| { + if nodes.len() > 1 { + Some(CapturedNode::Grouped(nodes)) + } else { + nodes.into_iter().map(CapturedNode::Single).next() + } + }); + Some(capture_node) } } @@ -2123,6 +879,18 @@ pub fn pretty_print_tree(fmt: &mut W, node: Node) -> fmt::Result } } +fn node_is_visible(node: &Node) -> bool { + node.is_missing() || (node.is_named() && node.grammar().node_kind_is_visible(node.kind_id())) +} + +fn format_anonymous_node_kind(kind: &str) -> Cow { + if kind.contains('"') { + Cow::Owned(kind.replace('"', "\\\"")) + } else { + Cow::Borrowed(kind) + } +} + fn pretty_print_tree_impl( fmt: &mut W, cursor: &mut tree_sitter::TreeCursor, @@ -2173,9 +941,13 @@ fn pretty_print_tree_impl( #[cfg(test)] mod test { + use once_cell::sync::Lazy; + use super::*; use crate::{Rope, Transaction}; + static LOADER: Lazy = Lazy::new(|| crate::config::user_lang_loader().unwrap()); + #[test] fn test_textobject_queries() { let query_str = r#" @@ -2190,29 +962,16 @@ mod test { "#, ); - let loader = Loader::new(Configuration { - language: vec![], - language_server: HashMap::new(), - }) - .unwrap(); - let language = get_language("rust").unwrap(); - - let query = Query::new(&language, query_str).unwrap(); - let textobject = TextObjectQuery { query }; - let mut cursor = QueryCursor::new(); - - let config = HighlightConfiguration::new(language, "", "", "").unwrap(); - let syntax = Syntax::new( - source.slice(..), - Arc::new(config), - Arc::new(ArcSwap::from_pointee(loader)), - ) - .unwrap(); + let language = LOADER.language_for_name("rust").unwrap(); + let grammar = LOADER.get_config(language).unwrap().grammar; + let query = Query::new(grammar, query_str, |_, _| Ok(())).unwrap(); + let textobject = TextObjectQuery::new(query); + let syntax = Syntax::new(source.slice(..), language, &LOADER).unwrap(); let root = syntax.tree().root_node(); - let mut test = |capture, range| { + let test = |capture, range| { let matches: Vec<_> = textobject - .capture_nodes(capture, root, source.slice(..), &mut cursor) + .capture_nodes(capture, &root, source.slice(..)) .unwrap() .collect(); @@ -2231,83 +990,9 @@ mod test { // test("multiple_nodes_grouped", 1..37); } - #[test] - fn test_parser() { - let highlight_names: Vec = [ - "attribute", - "constant", - "function.builtin", - "function", - "keyword", - "operator", - "property", - "punctuation", - "punctuation.bracket", - "punctuation.delimiter", - "string", - "string.special", - "tag", - "type", - "type.builtin", - "variable", - "variable.builtin", - "variable.parameter", - ] - .iter() - .cloned() - .map(String::from) - .collect(); - - let loader = Loader::new(Configuration { - language: vec![], - language_server: HashMap::new(), - }) - .unwrap(); - - let language = get_language("rust").unwrap(); - let config = HighlightConfiguration::new( - language, - &std::fs::read_to_string("../runtime/grammars/sources/rust/queries/highlights.scm") - .unwrap(), - &std::fs::read_to_string("../runtime/grammars/sources/rust/queries/injections.scm") - .unwrap(), - "", // locals.scm - ) - .unwrap(); - config.configure(&highlight_names); - - let source = Rope::from_str( - " - struct Stuff {} - fn main() {} - ", - ); - let syntax = Syntax::new( - source.slice(..), - Arc::new(config), - Arc::new(ArcSwap::from_pointee(loader)), - ) - .unwrap(); - let tree = syntax.tree(); - let root = tree.root_node(); - assert_eq!(root.kind(), "source_file"); - - assert_eq!( - root.to_sexp(), - concat!( - "(source_file ", - "(struct_item name: (type_identifier) body: (field_declaration_list)) ", - "(function_item name: (identifier) parameters: (parameters) body: (block)))" - ) - ); - - let struct_node = root.child(0).unwrap(); - assert_eq!(struct_node.kind(), "struct_item"); - } - #[test] fn test_input_edits() { - use tree_sitter::InputEdit; + use tree_sitter::{InputEdit, Point}; let doc = Rope::from("hello world!\ntest 123"); let transaction = Transaction::change( @@ -2324,17 +1009,17 @@ mod test { start_byte: 6, old_end_byte: 11, new_end_byte: 10, - start_position: Point { row: 0, column: 6 }, - old_end_position: Point { row: 0, column: 11 }, - new_end_position: Point { row: 0, column: 10 } + start_point: Point::ZERO, + old_end_point: Point::ZERO, + new_end_point: Point::ZERO }, InputEdit { start_byte: 12, old_end_byte: 17, new_end_byte: 12, - start_position: Point { row: 0, column: 12 }, - old_end_position: Point { row: 1, column: 4 }, - new_end_position: Point { row: 0, column: 12 } + start_point: Point::ZERO, + old_end_point: Point::ZERO, + new_end_point: Point::ZERO } ] ); @@ -2353,9 +1038,9 @@ mod test { start_byte: 8, old_end_byte: 8, new_end_byte: 14, - start_position: Point { row: 0, column: 8 }, - old_end_position: Point { row: 0, column: 8 }, - new_end_position: Point { row: 0, column: 14 } + start_point: Point::ZERO, + old_end_point: Point::ZERO, + new_end_point: Point::ZERO }] ); } @@ -2369,26 +1054,13 @@ mod test { end: usize, ) { let source = Rope::from_str(source); - - let loader = Loader::new(Configuration { - language: vec![], - language_server: HashMap::new(), - }) - .unwrap(); - let language = get_language(language_name).unwrap(); - - let config = HighlightConfiguration::new(language, "", "", "").unwrap(); - let syntax = Syntax::new( - source.slice(..), - Arc::new(config), - Arc::new(ArcSwap::from_pointee(loader)), - ) - .unwrap(); + let language = LOADER.language_for_name(language_name).unwrap(); + let syntax = Syntax::new(source.slice(..), language, &LOADER).unwrap(); let root = syntax .tree() .root_node() - .descendant_for_byte_range(start, end) + .descendant_for_byte_range(start as u32, end as u32) .unwrap(); let mut output = String::new(); @@ -2456,14 +1128,4 @@ mod test { source.len(), ); } - - #[test] - fn test_load_runtime_file() { - // Test to make sure we can load some data from the runtime directory. - let contents = load_runtime_file("rust", "indents.scm").unwrap(); - assert!(!contents.is_empty()); - - let results = load_runtime_file("rust", "does-not-exist"); - assert!(results.is_err()); - } } diff --git a/helix-core/src/syntax/config.rs b/helix-core/src/syntax/config.rs index f73103c29ebf..432611bb0d38 100644 --- a/helix-core/src/syntax/config.rs +++ b/helix-core/src/syntax/config.rs @@ -1,8 +1,7 @@ -use crate::{auto_pairs::AutoPairs, diagnostic::Severity}; +use crate::{auto_pairs::AutoPairs, diagnostic::Severity, Language}; use globset::GlobSet; use helix_stdx::rope; -use once_cell::sync::OnceCell; use serde::{ser::SerializeSeq as _, Deserialize, Serialize}; use std::{ @@ -10,7 +9,6 @@ use std::{ fmt::{self, Display}, path::PathBuf, str::FromStr, - sync::Arc, }; #[derive(Debug, Serialize, Deserialize)] @@ -24,6 +22,9 @@ pub struct Configuration { #[derive(Debug, Serialize, Deserialize)] #[serde(rename_all = "kebab-case", deny_unknown_fields)] pub struct LanguageConfiguration { + #[serde(skip)] + pub(super) language: Option, + #[serde(rename = "name")] pub language_id: String, // c-sharp, rust, tsx #[serde(rename = "language-id")] @@ -70,9 +71,6 @@ pub struct LanguageConfiguration { pub injection_regex: Option, // first_line_regex // - #[serde(skip)] - pub(crate) highlight_config: OnceCell>>, - // tags_config OnceCell<> https://github.com/tree-sitter/tree-sitter/pull/583 #[serde( default, skip_serializing_if = "Vec::is_empty", @@ -83,10 +81,6 @@ pub struct LanguageConfiguration { #[serde(skip_serializing_if = "Option::is_none")] pub indent: Option, - #[serde(skip)] - pub(crate) indent_query: OnceCell>, - #[serde(skip)] - pub(crate) textobject_query: OnceCell>, #[serde(skip_serializing_if = "Option::is_none")] pub debugger: Option, @@ -106,6 +100,13 @@ pub struct LanguageConfiguration { pub persistent_diagnostic_sources: Vec, } +impl LanguageConfiguration { + pub fn language(&self) -> Language { + // This value must be set by `super::Loader::new`. + self.language.unwrap() + } +} + #[derive(Debug, PartialEq, Eq, Hash)] pub enum FileType { /// The extension of the file, either the `Path::extension` or the full diff --git a/helix-core/src/syntax/tree_cursor.rs b/helix-core/src/syntax/tree_cursor.rs deleted file mode 100644 index d82ea74dbfff..000000000000 --- a/helix-core/src/syntax/tree_cursor.rs +++ /dev/null @@ -1,264 +0,0 @@ -use std::{cmp::Reverse, ops::Range}; - -use super::{LanguageLayer, LayerId}; - -use slotmap::HopSlotMap; -use tree_sitter::Node; - -/// The byte range of an injection layer. -/// -/// Injection ranges may overlap, but all overlapping parts are subsets of their parent ranges. -/// This allows us to sort the ranges ahead of time in order to efficiently find a range that -/// contains a point with maximum depth. -#[derive(Debug)] -struct InjectionRange { - start: usize, - end: usize, - layer_id: LayerId, - depth: u32, -} - -pub struct TreeCursor<'a> { - layers: &'a HopSlotMap, - root: LayerId, - current: LayerId, - injection_ranges: Vec, - // TODO: Ideally this would be a `tree_sitter::TreeCursor<'a>` but - // that returns very surprising results in testing. - cursor: Node<'a>, -} - -impl<'a> TreeCursor<'a> { - pub(super) fn new(layers: &'a HopSlotMap, root: LayerId) -> Self { - let mut injection_ranges = Vec::new(); - - for (layer_id, layer) in layers.iter() { - // Skip the root layer - if layer.parent.is_none() { - continue; - } - for byte_range in layer.ranges.iter() { - let range = InjectionRange { - start: byte_range.start_byte, - end: byte_range.end_byte, - layer_id, - depth: layer.depth, - }; - injection_ranges.push(range); - } - } - - injection_ranges.sort_unstable_by_key(|range| (range.end, Reverse(range.depth))); - - let cursor = layers[root].tree().root_node(); - - Self { - layers, - root, - current: root, - injection_ranges, - cursor, - } - } - - pub fn node(&self) -> Node<'a> { - self.cursor - } - - pub fn goto_parent(&mut self) -> bool { - if let Some(parent) = self.node().parent() { - self.cursor = parent; - return true; - } - - // If we are already on the root layer, we cannot ascend. - if self.current == self.root { - return false; - } - - // Ascend to the parent layer. - let range = self.node().byte_range(); - let parent_id = self.layers[self.current] - .parent - .expect("non-root layers have a parent"); - self.current = parent_id; - let root = self.layers[self.current].tree().root_node(); - self.cursor = root - .descendant_for_byte_range(range.start, range.end) - .unwrap_or(root); - - true - } - - pub fn goto_parent_with

(&mut self, predicate: P) -> bool - where - P: Fn(&Node) -> bool, - { - while self.goto_parent() { - if predicate(&self.node()) { - return true; - } - } - - false - } - - /// Finds the injection layer that has exactly the same range as the given `range`. - fn layer_id_of_byte_range(&self, search_range: Range) -> Option { - let start_idx = self - .injection_ranges - .partition_point(|range| range.end < search_range.end); - - self.injection_ranges[start_idx..] - .iter() - .take_while(|range| range.end == search_range.end) - .find_map(|range| (range.start == search_range.start).then_some(range.layer_id)) - } - - fn goto_first_child_impl(&mut self, named: bool) -> bool { - // Check if the current node's range is an exact injection layer range. - if let Some(layer_id) = self - .layer_id_of_byte_range(self.node().byte_range()) - .filter(|&layer_id| layer_id != self.current) - { - // Switch to the child layer. - self.current = layer_id; - self.cursor = self.layers[self.current].tree().root_node(); - return true; - } - - let child = if named { - self.cursor.named_child(0) - } else { - self.cursor.child(0) - }; - - if let Some(child) = child { - // Otherwise descend in the current tree. - self.cursor = child; - true - } else { - false - } - } - - pub fn goto_first_child(&mut self) -> bool { - self.goto_first_child_impl(false) - } - - pub fn goto_first_named_child(&mut self) -> bool { - self.goto_first_child_impl(true) - } - - fn goto_next_sibling_impl(&mut self, named: bool) -> bool { - let sibling = if named { - self.cursor.next_named_sibling() - } else { - self.cursor.next_sibling() - }; - - if let Some(sibling) = sibling { - self.cursor = sibling; - true - } else { - false - } - } - - pub fn goto_next_sibling(&mut self) -> bool { - self.goto_next_sibling_impl(false) - } - - pub fn goto_next_named_sibling(&mut self) -> bool { - self.goto_next_sibling_impl(true) - } - - fn goto_prev_sibling_impl(&mut self, named: bool) -> bool { - let sibling = if named { - self.cursor.prev_named_sibling() - } else { - self.cursor.prev_sibling() - }; - - if let Some(sibling) = sibling { - self.cursor = sibling; - true - } else { - false - } - } - - pub fn goto_prev_sibling(&mut self) -> bool { - self.goto_prev_sibling_impl(false) - } - - pub fn goto_prev_named_sibling(&mut self) -> bool { - self.goto_prev_sibling_impl(true) - } - - /// Finds the injection layer that contains the given start-end range. - fn layer_id_containing_byte_range(&self, start: usize, end: usize) -> LayerId { - let start_idx = self - .injection_ranges - .partition_point(|range| range.end < end); - - self.injection_ranges[start_idx..] - .iter() - .take_while(|range| range.start < end || range.depth > 1) - .find_map(|range| (range.start <= start).then_some(range.layer_id)) - .unwrap_or(self.root) - } - - pub fn reset_to_byte_range(&mut self, start: usize, end: usize) { - self.current = self.layer_id_containing_byte_range(start, end); - let root = self.layers[self.current].tree().root_node(); - self.cursor = root.descendant_for_byte_range(start, end).unwrap_or(root); - } - - /// Returns an iterator over the children of the node the TreeCursor is on - /// at the time this is called. - pub fn children(&'a mut self) -> ChildIter<'a> { - let parent = self.node(); - - ChildIter { - cursor: self, - parent, - named: false, - } - } - - /// Returns an iterator over the named children of the node the TreeCursor is on - /// at the time this is called. - pub fn named_children(&'a mut self) -> ChildIter<'a> { - let parent = self.node(); - - ChildIter { - cursor: self, - parent, - named: true, - } - } -} - -pub struct ChildIter<'n> { - cursor: &'n mut TreeCursor<'n>, - parent: Node<'n>, - named: bool, -} - -impl<'n> Iterator for ChildIter<'n> { - type Item = Node<'n>; - - fn next(&mut self) -> Option { - // first iteration, just visit the first child - if self.cursor.node() == self.parent { - self.cursor - .goto_first_child_impl(self.named) - .then(|| self.cursor.node()) - } else { - self.cursor - .goto_next_sibling_impl(self.named) - .then(|| self.cursor.node()) - } - } -} diff --git a/helix-core/src/text_annotations.rs b/helix-core/src/text_annotations.rs index 9704c3d6b892..0f492b8be2e5 100644 --- a/helix-core/src/text_annotations.rs +++ b/helix-core/src/text_annotations.rs @@ -5,7 +5,7 @@ use std::ops::Range; use std::ptr::NonNull; use crate::doc_formatter::FormattedGrapheme; -use crate::syntax::Highlight; +use crate::syntax::{Highlight, OverlayHighlights}; use crate::{Position, Tendril}; /// An inline annotation is continuous text shown @@ -300,10 +300,7 @@ impl<'a> TextAnnotations<'a> { } } - pub fn collect_overlay_highlights( - &self, - char_range: Range, - ) -> Vec<(usize, Range)> { + pub fn collect_overlay_highlights(&self, char_range: Range) -> OverlayHighlights { let mut highlights = Vec::new(); self.reset_pos(char_range.start); for char_idx in char_range { @@ -311,11 +308,11 @@ impl<'a> TextAnnotations<'a> { // we don't know the number of chars the original grapheme takes // however it doesn't matter as highlight boundaries are automatically // aligned to grapheme boundaries in the rendering code - highlights.push((highlight.0, char_idx..char_idx + 1)) + highlights.push((highlight, char_idx..char_idx + 1)); } } - highlights + OverlayHighlights::Heterogenous { highlights } } /// Add new inline annotations. diff --git a/helix-core/src/textobject.rs b/helix-core/src/textobject.rs index 9015e957c070..008228f43280 100644 --- a/helix-core/src/textobject.rs +++ b/helix-core/src/textobject.rs @@ -1,13 +1,12 @@ use std::fmt::Display; use ropey::RopeSlice; -use tree_sitter::{Node, QueryCursor}; use crate::chars::{categorize_char, char_is_whitespace, CharCategory}; use crate::graphemes::{next_grapheme_boundary, prev_grapheme_boundary}; use crate::line_ending::rope_is_line_ending; use crate::movement::Direction; -use crate::syntax::config::LanguageConfiguration; +use crate::syntax; use crate::Range; use crate::{surround, Syntax}; @@ -260,18 +259,18 @@ pub fn textobject_treesitter( range: Range, textobject: TextObject, object_name: &str, - slice_tree: Node, - lang_config: &LanguageConfiguration, + syntax: &Syntax, + loader: &syntax::Loader, _count: usize, ) -> Range { + let root = syntax.tree().root_node(); + let textobject_query = loader.textobject_query(syntax.root_language()); let get_range = move || -> Option { let byte_pos = slice.char_to_byte(range.cursor(slice)); let capture_name = format!("{}.{}", object_name, textobject); // eg. function.inner - let mut cursor = QueryCursor::new(); - let node = lang_config - .textobject_query()? - .capture_nodes(&capture_name, slice_tree, slice, &mut cursor)? + let node = textobject_query? + .capture_nodes(&capture_name, &root, slice)? .filter(|node| node.byte_range().contains(&byte_pos)) .min_by_key(|node| node.byte_range().len())?; diff --git a/helix-core/tests/indent.rs b/helix-core/tests/indent.rs index b41b2f64a33c..ab733f931238 100644 --- a/helix-core/tests/indent.rs +++ b/helix-core/tests/indent.rs @@ -1,4 +1,3 @@ -use arc_swap::ArcSwap; use helix_core::{ indent::{indent_level_for_line, treesitter_indent_for_pos, IndentStyle}, syntax::{config::Configuration, Loader}, @@ -6,7 +5,7 @@ use helix_core::{ }; use helix_stdx::rope::RopeSliceExt; use ropey::Rope; -use std::{ops::Range, path::PathBuf, process::Command, sync::Arc}; +use std::{ops::Range, path::PathBuf, process::Command}; #[test] fn test_treesitter_indent_rust() { @@ -196,17 +195,12 @@ fn test_treesitter_indent( runtime.push("../runtime"); std::env::set_var("HELIX_RUNTIME", runtime.to_str().unwrap()); - let language_config = loader.language_config_for_scope(lang_scope).unwrap(); + let language = loader.language_for_scope(lang_scope).unwrap(); + let language_config = loader.language(language).config(); let indent_style = IndentStyle::from_str(&language_config.indent.as_ref().unwrap().unit); - let highlight_config = language_config.highlight_config(&[]).unwrap(); let text = doc.slice(..); - let syntax = Syntax::new( - text, - highlight_config, - Arc::new(ArcSwap::from_pointee(loader)), - ) - .unwrap(); - let indent_query = language_config.indent_query().unwrap(); + let syntax = Syntax::new(text, language, &loader).unwrap(); + let indent_query = loader.indent_query(language).unwrap(); for i in 0..doc.len_lines() { let line = text.line(i); diff --git a/helix-loader/Cargo.toml b/helix-loader/Cargo.toml index 493d8b30ec0c..dcd87e3aca94 100644 --- a/helix-loader/Cargo.toml +++ b/helix-loader/Cargo.toml @@ -21,7 +21,6 @@ anyhow = "1" serde = { version = "1.0", features = ["derive"] } toml = "0.8" etcetera = "0.10" -tree-sitter.workspace = true once_cell = "1.21" log = "0.4" @@ -32,5 +31,4 @@ cc = { version = "1" } threadpool = { version = "1.0" } tempfile.workspace = true -[target.'cfg(not(target_arch = "wasm32"))'.dependencies] -libloading = "0.8" +tree-house.workspace = true diff --git a/helix-loader/src/grammar.rs b/helix-loader/src/grammar.rs index dcf440312409..f04edf3fa948 100644 --- a/helix-loader/src/grammar.rs +++ b/helix-loader/src/grammar.rs @@ -9,7 +9,7 @@ use std::{ sync::mpsc::channel, }; use tempfile::TempPath; -use tree_sitter::Language; +use tree_house::tree_sitter::Grammar; #[cfg(unix)] const DYLIB_EXTENSION: &str = "so"; @@ -61,28 +61,21 @@ const BUILD_TARGET: &str = env!("BUILD_TARGET"); const REMOTE_NAME: &str = "origin"; #[cfg(target_arch = "wasm32")] -pub fn get_language(name: &str) -> Result { +pub fn get_language(name: &str) -> Result> { unimplemented!() } #[cfg(not(target_arch = "wasm32"))] -pub fn get_language(name: &str) -> Result { - use libloading::{Library, Symbol}; +pub fn get_language(name: &str) -> Result> { let mut rel_library_path = PathBuf::new().join("grammars").join(name); rel_library_path.set_extension(DYLIB_EXTENSION); let library_path = crate::runtime_file(&rel_library_path); + if !library_path.exists() { + return Ok(None); + } - let library = unsafe { Library::new(&library_path) } - .with_context(|| format!("Error opening dynamic library {:?}", library_path))?; - let language_fn_name = format!("tree_sitter_{}", name.replace('-', "_")); - let language = unsafe { - let language_fn: Symbol Language> = library - .get(language_fn_name.as_bytes()) - .with_context(|| format!("Failed to load symbol {}", language_fn_name))?; - language_fn() - }; - std::mem::forget(library); - Ok(language) + let grammar = unsafe { Grammar::new(name, &library_path) }?; + Ok(Some(grammar)) } fn ensure_git_is_available() -> Result<()> { diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index ffeb7e37ca33..cff5b8aecf0a 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -3482,12 +3482,12 @@ fn insert_with_indent(cx: &mut Context, cursor_fallback: IndentFallbackPos) { enter_insert_mode(cx); let (view, doc) = current!(cx.editor); + let loader = cx.editor.syn_loader.load(); let text = doc.text().slice(..); let contents = doc.text(); let selection = doc.selection(view.id); - let language_config = doc.language_config(); let syntax = doc.syntax(); let tab_width = doc.tab_width(); @@ -3503,7 +3503,7 @@ fn insert_with_indent(cx: &mut Context, cursor_fallback: IndentFallbackPos) { let line_end_index = cursor_line_start; let indent = indent::indent_for_newline( - language_config, + &loader, syntax, &doc.config.load().indent_heuristic, &doc.indent_style, @@ -3613,6 +3613,7 @@ fn open(cx: &mut Context, open: Open, comment_continuation: CommentContinuation) enter_insert_mode(cx); let config = cx.editor.config(); let (view, doc) = current!(cx.editor); + let loader = cx.editor.syn_loader.load(); let text = doc.text().slice(..); let contents = doc.text(); @@ -3662,7 +3663,7 @@ fn open(cx: &mut Context, open: Open, comment_continuation: CommentContinuation) let indent = match line.first_non_whitespace_char() { Some(pos) if continue_comment_token.is_some() => line.slice(..pos).to_string(), _ => indent::indent_for_newline( - doc.language_config(), + &loader, doc.syntax(), &config.indent_heuristic, &doc.indent_style, @@ -4126,6 +4127,7 @@ pub mod insert { pub fn insert_newline(cx: &mut Context) { let config = cx.editor.config(); let (view, doc) = current_ref!(cx.editor); + let loader = cx.editor.syn_loader.load(); let text = doc.text().slice(..); let line_ending = doc.line_ending.as_str(); @@ -4171,7 +4173,7 @@ pub mod insert { let indent = match line.first_non_whitespace_char() { Some(pos) if continue_comment_token.is_some() => line.slice(..pos).to_string(), _ => indent::indent_for_newline( - doc.language_config(), + &loader, doc.syntax(), &config.indent_heuristic, &doc.indent_style, @@ -5728,19 +5730,14 @@ fn goto_ts_object_impl(cx: &mut Context, object: &'static str, direction: Direct let count = cx.count(); let motion = move |editor: &mut Editor| { let (view, doc) = current!(editor); - if let Some((lang_config, syntax)) = doc.language_config().zip(doc.syntax()) { + let loader = editor.syn_loader.load(); + if let Some(syntax) = doc.syntax() { let text = doc.text().slice(..); let root = syntax.tree().root_node(); let selection = doc.selection(view.id).clone().transform(|range| { let new_range = movement::goto_treesitter_object( - text, - range, - object, - direction, - root, - lang_config, - count, + text, range, object, direction, &root, syntax, &loader, count, ); if editor.mode == Mode::Select { @@ -5828,21 +5825,15 @@ fn select_textobject(cx: &mut Context, objtype: textobject::TextObject) { if let Some(ch) = event.char() { let textobject = move |editor: &mut Editor| { let (view, doc) = current!(editor); + let loader = editor.syn_loader.load(); let text = doc.text().slice(..); let textobject_treesitter = |obj_name: &str, range: Range| -> Range { - let (lang_config, syntax) = match doc.language_config().zip(doc.syntax()) { - Some(t) => t, - None => return range, + let Some(syntax) = doc.syntax() else { + return range; }; textobject::textobject_treesitter( - text, - range, - objtype, - obj_name, - syntax.tree().root_node(), - lang_config, - count, + text, range, objtype, obj_name, syntax, &loader, count, ) }; diff --git a/helix-term/src/commands/typed.rs b/helix-term/src/commands/typed.rs index 248adbed48eb..d1573a78877c 100644 --- a/helix-term/src/commands/typed.rs +++ b/helix-term/src/commands/typed.rs @@ -1670,16 +1670,14 @@ fn tree_sitter_highlight_name( _args: Args, event: PromptEvent, ) -> anyhow::Result<()> { - fn find_highlight_at_cursor( - cx: &mut compositor::Context<'_>, - ) -> Option { - use helix_core::syntax::HighlightEvent; + use helix_core::syntax::Highlight; - let (view, doc) = current!(cx.editor); + fn find_highlight_at_cursor(editor: &Editor) -> Option { + let (view, doc) = current_ref!(editor); let syntax = doc.syntax()?; let text = doc.text().slice(..); let cursor = doc.selection(view.id).primary().cursor(text); - let byte = text.char_to_byte(cursor); + let byte = text.char_to_byte(cursor) as u32; let node = syntax.descendant_for_byte_range(byte, byte)?; // Query the same range as the one used in syntax highlighting. let range = { @@ -1689,25 +1687,22 @@ fn tree_sitter_highlight_name( let last_line = text.len_lines().saturating_sub(1); let height = view.inner_area(doc).height; let last_visible_line = (row + height as usize).saturating_sub(1).min(last_line); - let start = text.line_to_byte(row.min(last_line)); - let end = text.line_to_byte(last_visible_line + 1); + let start = text.line_to_byte(row.min(last_line)) as u32; + let end = text.line_to_byte(last_visible_line + 1) as u32; start..end }; - let mut highlight = None; + let loader = editor.syn_loader.load(); + let mut highlighter = syntax.highlighter(text, &loader, range); - for event in syntax.highlight_iter(text, Some(range), None) { - match event.unwrap() { - HighlightEvent::Source { start, end } - if start == node.start_byte() && end == node.end_byte() => - { - return highlight; - } - HighlightEvent::HighlightStart(hl) => { - highlight = Some(hl); - } - _ => (), + while highlighter.next_event_offset() != u32::MAX { + let start = highlighter.next_event_offset(); + highlighter.advance(); + let end = highlighter.next_event_offset(); + + if start <= node.start_byte() && end >= node.end_byte() { + return highlighter.active_highlights().next_back(); } } @@ -1718,11 +1713,11 @@ fn tree_sitter_highlight_name( return Ok(()); } - let Some(highlight) = find_highlight_at_cursor(cx) else { + let Some(highlight) = find_highlight_at_cursor(cx.editor) else { return Ok(()); }; - let content = cx.editor.theme.scope(highlight.0).to_string(); + let content = cx.editor.theme.scope(highlight).to_string(); let callback = async move { let call: job::Callback = Callback::EditorCompositor(Box::new( @@ -2190,8 +2185,8 @@ fn tree_sitter_subtree( if let Some(syntax) = doc.syntax() { let primary_selection = doc.selection(view.id).primary(); let text = doc.text(); - let from = text.char_to_byte(primary_selection.from()); - let to = text.char_to_byte(primary_selection.to()); + let from = text.char_to_byte(primary_selection.from()) as u32; + let to = text.char_to_byte(primary_selection.to()) as u32; if let Some(selected_node) = syntax.descendant_for_byte_range(from, to) { let mut contents = String::from("```tsq\n"); helix_core::syntax::pretty_print_tree(&mut contents, selected_node)?; diff --git a/helix-term/src/ui/document.rs b/helix-term/src/ui/document.rs index 8423ae8e437a..d71c47a167d2 100644 --- a/helix-term/src/ui/document.rs +++ b/helix-term/src/ui/document.rs @@ -3,8 +3,7 @@ use std::cmp::min; use helix_core::doc_formatter::{DocumentFormatter, GraphemeSource, TextFormat}; use helix_core::graphemes::Grapheme; use helix_core::str_utils::char_to_byte_idx; -use helix_core::syntax::Highlight; -use helix_core::syntax::HighlightEvent; +use helix_core::syntax::{self, HighlightEvent, Highlighter, OverlayHighlights}; use helix_core::text_annotations::TextAnnotations; use helix_core::{visual_offset_from_block, Position, RopeSlice}; use helix_stdx::rope::RopeSliceExt; @@ -17,61 +16,6 @@ use tui::buffer::Buffer as Surface; use crate::ui::text_decorations::DecorationManager; -#[derive(Debug, PartialEq, Eq, Clone, Copy)] -enum StyleIterKind { - /// base highlights (usually emitted by TS), byte indices (potentially not codepoint aligned) - BaseHighlights, - /// overlay highlights (emitted by custom code from selections), char indices - Overlay, -} - -/// A wrapper around a HighlightIterator -/// that merges the layered highlights to create the final text style -/// and yields the active text style and the char_idx where the active -/// style will have to be recomputed. -/// -/// TODO(ropey2): hopefully one day helix and ropey will operate entirely -/// on byte ranges and we can remove this -struct StyleIter<'a, H: Iterator> { - text_style: Style, - active_highlights: Vec, - highlight_iter: H, - kind: StyleIterKind, - text: RopeSlice<'a>, - theme: &'a Theme, -} - -impl> Iterator for StyleIter<'_, H> { - type Item = (Style, usize); - fn next(&mut self) -> Option<(Style, usize)> { - while let Some(event) = self.highlight_iter.next() { - match event { - HighlightEvent::HighlightStart(highlights) => { - self.active_highlights.push(highlights) - } - HighlightEvent::HighlightEnd => { - self.active_highlights.pop(); - } - HighlightEvent::Source { mut end, .. } => { - let style = self - .active_highlights - .iter() - .fold(self.text_style, |acc, span| { - acc.patch(self.theme.highlight(span.0)) - }); - if self.kind == StyleIterKind::BaseHighlights { - // Move the end byte index to the nearest character boundary (rounding up) - // and convert it to a character index. - end = self.text.byte_to_char(self.text.ceil_char_boundary(end)); - } - return Some((style, end)); - } - } - } - None - } -} - #[derive(Debug, PartialEq, Eq, Copy, Clone)] pub struct LinePos { /// Indicates whether the given visual line @@ -90,8 +34,8 @@ pub fn render_document( doc: &Document, offset: ViewPosition, doc_annotations: &TextAnnotations, - syntax_highlight_iter: impl Iterator, - overlay_highlight_iter: impl Iterator, + syntax_highlighter: Option>, + overlay_highlights: Vec, theme: &Theme, decorations: DecorationManager, ) { @@ -108,8 +52,8 @@ pub fn render_document( offset.anchor, &doc.text_format(viewport.width, Some(theme)), doc_annotations, - syntax_highlight_iter, - overlay_highlight_iter, + syntax_highlighter, + overlay_highlights, theme, decorations, ) @@ -122,8 +66,8 @@ pub fn render_text( anchor: usize, text_fmt: &TextFormat, text_annotations: &TextAnnotations, - syntax_highlight_iter: impl Iterator, - overlay_highlight_iter: impl Iterator, + syntax_highlighter: Option>, + overlay_highlights: Vec, theme: &Theme, mut decorations: DecorationManager, ) { @@ -133,22 +77,8 @@ pub fn render_text( let mut formatter = DocumentFormatter::new_at_prev_checkpoint(text, text_fmt, text_annotations, anchor); - let mut syntax_styles = StyleIter { - text_style: renderer.text_style, - active_highlights: Vec::with_capacity(64), - highlight_iter: syntax_highlight_iter, - kind: StyleIterKind::BaseHighlights, - theme, - text, - }; - let mut overlay_styles = StyleIter { - text_style: Style::default(), - active_highlights: Vec::with_capacity(64), - highlight_iter: overlay_highlight_iter, - kind: StyleIterKind::Overlay, - theme, - text, - }; + let mut syntax_highlighter = SyntaxHighlighter::new(syntax_highlighter, text, theme); + let mut overlay_highlighter = OverlayHighlighter::new(overlay_highlights, theme); let mut last_line_pos = LinePos { first_visual_line: false, @@ -158,12 +88,6 @@ pub fn render_text( let mut last_line_end = 0; let mut is_in_indent_area = true; let mut last_line_indent_level = 0; - let mut syntax_style_span = syntax_styles - .next() - .unwrap_or_else(|| (Style::default(), usize::MAX)); - let mut overlay_style_span = overlay_styles - .next() - .unwrap_or_else(|| (Style::default(), usize::MAX)); let mut reached_view_top = false; loop { @@ -207,21 +131,17 @@ pub fn render_text( } // acquire the correct grapheme style - while grapheme.char_idx >= syntax_style_span.1 { - syntax_style_span = syntax_styles - .next() - .unwrap_or((Style::default(), usize::MAX)); + while grapheme.char_idx >= syntax_highlighter.pos { + syntax_highlighter.advance(); } - while grapheme.char_idx >= overlay_style_span.1 { - overlay_style_span = overlay_styles - .next() - .unwrap_or((Style::default(), usize::MAX)); + while grapheme.char_idx >= overlay_highlighter.pos { + overlay_highlighter.advance(); } let grapheme_style = if let GraphemeSource::VirtualText { highlight } = grapheme.source { let mut style = renderer.text_style; if let Some(highlight) = highlight { - style = style.patch(theme.highlight(highlight.0)); + style = style.patch(theme.highlight(highlight)); } GraphemeStyle { syntax_style: style, @@ -229,8 +149,8 @@ pub fn render_text( } } else { GraphemeStyle { - syntax_style: syntax_style_span.0, - overlay_style: overlay_style_span.0, + syntax_style: syntax_highlighter.style, + overlay_style: overlay_highlighter.style, } }; decorations.decorate_grapheme(renderer, &grapheme); @@ -549,3 +469,98 @@ impl<'a> TextRenderer<'a> { ) } } + +struct SyntaxHighlighter<'h, 'r, 't> { + inner: Option>, + text: RopeSlice<'r>, + /// The character index of the next highlight event, or `usize::MAX` if the highlighter is + /// finished. + pos: usize, + theme: &'t Theme, + style: Style, +} + +impl<'h, 'r, 't> SyntaxHighlighter<'h, 'r, 't> { + fn new(inner: Option>, text: RopeSlice<'r>, theme: &'t Theme) -> Self { + let mut highlighter = Self { + inner, + text, + pos: 0, + theme, + style: Style::default(), + }; + highlighter.update_pos(); + highlighter + } + + fn update_pos(&mut self) { + self.pos = self + .inner + .as_ref() + .and_then(|highlighter| { + let next_byte_idx = highlighter.next_event_offset(); + (next_byte_idx != u32::MAX).then(|| { + // Move the byte index to the nearest character boundary (rounding up) and + // convert it to a character index. + self.text + .byte_to_char(self.text.ceil_char_boundary(next_byte_idx as usize)) + }) + }) + .unwrap_or(usize::MAX); + } + + fn advance(&mut self) { + let Some(highlighter) = self.inner.as_mut() else { + return; + }; + + let (event, highlights) = highlighter.advance(); + let base = match event { + HighlightEvent::Refresh => Style::default(), + HighlightEvent::Push => self.style, + }; + + self.style = highlights.fold(base, |acc, highlight| { + acc.patch(self.theme.highlight(highlight)) + }); + self.update_pos(); + } +} + +struct OverlayHighlighter<'t> { + inner: syntax::OverlayHighlighter, + pos: usize, + theme: &'t Theme, + style: Style, +} + +impl<'t> OverlayHighlighter<'t> { + fn new(overlays: Vec, theme: &'t Theme) -> Self { + let inner = syntax::OverlayHighlighter::new(overlays); + let mut highlighter = Self { + inner, + pos: 0, + theme, + style: Style::default(), + }; + highlighter.update_pos(); + highlighter + } + + fn update_pos(&mut self) { + self.pos = self.inner.next_event_offset(); + } + + fn advance(&mut self) { + let (event, highlights) = self.inner.advance(); + let base = match event { + HighlightEvent::Refresh => Style::default(), + HighlightEvent::Push => self.style, + }; + + self.style = highlights.fold(base, |acc, highlight| { + acc.patch(self.theme.highlight(highlight)) + }); + self.update_pos(); + } +} diff --git a/helix-term/src/ui/editor.rs b/helix-term/src/ui/editor.rs index 6be5657477bc..9343d55d4083 100644 --- a/helix-term/src/ui/editor.rs +++ b/helix-term/src/ui/editor.rs @@ -17,7 +17,7 @@ use helix_core::{ diagnostic::NumberOrString, graphemes::{next_grapheme_boundary, prev_grapheme_boundary}, movement::Direction, - syntax::{self, HighlightEvent}, + syntax::{self, OverlayHighlights}, text_annotations::TextAnnotations, unicode::width::UnicodeWidthStr, visual_offset_from_block, Change, Position, Range, Selection, Transaction, @@ -31,7 +31,7 @@ use helix_view::{ keyboard::{KeyCode, KeyModifiers}, Document, Editor, Theme, View, }; -use std::{mem::take, num::NonZeroUsize, path::PathBuf, rc::Rc}; +use std::{mem::take, num::NonZeroUsize, ops, path::PathBuf, rc::Rc}; use tui::{buffer::Buffer as Surface, text::Span}; @@ -87,6 +87,7 @@ impl EditorView { let area = view.area; let theme = &editor.theme; let config = editor.config(); + let loader = editor.syn_loader.load(); let view_offset = doc.view_offset(view.id); @@ -115,51 +116,33 @@ impl EditorView { decorations.add_decoration(line_decoration); } - let syntax_highlights = - Self::doc_syntax_highlights(doc, view_offset.anchor, inner.height, theme); + let syntax_highlighter = + Self::doc_syntax_highlighter(doc, view_offset.anchor, inner.height, &loader); + let mut overlays = Vec::new(); - let mut overlay_highlights = - Self::empty_highlight_iter(doc, view_offset.anchor, inner.height); - let overlay_syntax_highlights = Self::overlay_syntax_highlights( + overlays.push(Self::overlay_syntax_highlights( doc, view_offset.anchor, inner.height, &text_annotations, - ); - if !overlay_syntax_highlights.is_empty() { - overlay_highlights = - Box::new(syntax::merge(overlay_highlights, overlay_syntax_highlights)); - } + )); - for diagnostic in Self::doc_diagnostics_highlights(doc, theme) { - // Most of the `diagnostic` Vecs are empty most of the time. Skipping - // a merge for any empty Vec saves a significant amount of work. - if diagnostic.is_empty() { - continue; - } - overlay_highlights = Box::new(syntax::merge(overlay_highlights, diagnostic)); - } + Self::doc_diagnostics_highlights_into(doc, theme, &mut overlays); if is_focused { if let Some(tabstops) = Self::tabstop_highlights(doc, theme) { - overlay_highlights = Box::new(syntax::merge(overlay_highlights, tabstops)); + overlays.push(tabstops); } - let highlights = syntax::merge( - overlay_highlights, - Self::doc_selection_highlights( - editor.mode(), - doc, - view, - theme, - &config.cursor_shape, - self.terminal_focused, - ), - ); - let focused_view_elements = Self::highlight_focused_view_elements(view, doc, theme); - if focused_view_elements.is_empty() { - overlay_highlights = Box::new(highlights) - } else { - overlay_highlights = Box::new(syntax::merge(highlights, focused_view_elements)) + overlays.push(Self::doc_selection_highlights( + editor.mode(), + doc, + view, + theme, + &config.cursor_shape, + self.terminal_focused, + )); + if let Some(overlay) = Self::highlight_focused_view_elements(view, doc, theme) { + overlays.push(overlay); } } @@ -207,8 +190,8 @@ impl EditorView { doc, view_offset, &text_annotations, - syntax_highlights, - overlay_highlights, + syntax_highlighter, + overlays, theme, decorations, ); @@ -287,57 +270,23 @@ impl EditorView { start..end } - pub fn empty_highlight_iter( - doc: &Document, - anchor: usize, - height: u16, - ) -> Box> { - let text = doc.text().slice(..); - let row = text.char_to_line(anchor.min(text.len_chars())); - - // Calculate viewport byte ranges: - // Saturating subs to make it inclusive zero indexing. - let range = Self::viewport_byte_range(text, row, height); - Box::new( - [HighlightEvent::Source { - start: text.byte_to_char(range.start), - end: text.byte_to_char(range.end), - }] - .into_iter(), - ) - } - - /// Get syntax highlights for a document in a view represented by the first line + /// Get the syntax highlighter for a document in a view represented by the first line /// and column (`offset`) and the last line. This is done instead of using a view /// directly to enable rendering syntax highlighted docs anywhere (eg. picker preview) - pub fn doc_syntax_highlights<'doc>( - doc: &'doc Document, + pub fn doc_syntax_highlighter<'editor>( + doc: &'editor Document, anchor: usize, height: u16, - _theme: &Theme, - ) -> Box + 'doc> { + loader: &'editor syntax::Loader, + ) -> Option> { + let syntax = doc.syntax()?; let text = doc.text().slice(..); let row = text.char_to_line(anchor.min(text.len_chars())); - let range = Self::viewport_byte_range(text, row, height); + let range = range.start as u32..range.end as u32; - match doc.syntax() { - Some(syntax) => { - let iter = syntax - // TODO: range doesn't actually restrict source, just highlight range - .highlight_iter(text.slice(..), Some(range), None) - .map(|event| event.unwrap()); - - Box::new(iter) - } - None => Box::new( - [HighlightEvent::Source { - start: range.start, - end: range.end, - }] - .into_iter(), - ), - } + let highlighter = syntax.highlighter(text, loader, range); + Some(highlighter) } pub fn overlay_syntax_highlights( @@ -345,7 +294,7 @@ impl EditorView { anchor: usize, height: u16, text_annotations: &TextAnnotations, - ) -> Vec<(usize, std::ops::Range)> { + ) -> OverlayHighlights { let text = doc.text().slice(..); let row = text.char_to_line(anchor.min(text.len_chars())); @@ -356,35 +305,29 @@ impl EditorView { } /// Get highlight spans for document diagnostics - pub fn doc_diagnostics_highlights( + pub fn doc_diagnostics_highlights_into( doc: &Document, theme: &Theme, - ) -> [Vec<(usize, std::ops::Range)>; 7] { + overlay_highlights: &mut Vec, + ) { use helix_core::diagnostic::{DiagnosticTag, Range, Severity}; let get_scope_of = |scope| { theme - .find_scope_index_exact(scope) - // get one of the themes below as fallback values - .or_else(|| theme.find_scope_index_exact("diagnostic")) - .or_else(|| theme.find_scope_index_exact("ui.cursor")) - .or_else(|| theme.find_scope_index_exact("ui.selection")) - .expect( - "at least one of the following scopes must be defined in the theme: `diagnostic`, `ui.cursor`, or `ui.selection`", - ) + .find_highlight_exact(scope) + // get one of the themes below as fallback values + .or_else(|| theme.find_highlight_exact("diagnostic")) + .or_else(|| theme.find_highlight_exact("ui.cursor")) + .or_else(|| theme.find_highlight_exact("ui.selection")) + .expect( + "at least one of the following scopes must be defined in the theme: `diagnostic`, `ui.cursor`, or `ui.selection`", + ) }; - // basically just queries the theme color defined in the config - let hint = get_scope_of("diagnostic.hint"); - let info = get_scope_of("diagnostic.info"); - let warning = get_scope_of("diagnostic.warning"); - let error = get_scope_of("diagnostic.error"); - let r#default = get_scope_of("diagnostic"); // this is a bit redundant but should be fine - // Diagnostic tags - let unnecessary = theme.find_scope_index_exact("diagnostic.unnecessary"); - let deprecated = theme.find_scope_index_exact("diagnostic.deprecated"); + let unnecessary = theme.find_highlight_exact("diagnostic.unnecessary"); + let deprecated = theme.find_highlight_exact("diagnostic.deprecated"); - let mut default_vec: Vec<(usize, std::ops::Range)> = Vec::new(); + let mut default_vec = Vec::new(); let mut info_vec = Vec::new(); let mut hint_vec = Vec::new(); let mut warning_vec = Vec::new(); @@ -392,31 +335,30 @@ impl EditorView { let mut unnecessary_vec = Vec::new(); let mut deprecated_vec = Vec::new(); - let push_diagnostic = - |vec: &mut Vec<(usize, std::ops::Range)>, scope, range: Range| { - // If any diagnostic overlaps ranges with the prior diagnostic, - // merge the two together. Otherwise push a new span. - match vec.last_mut() { - Some((_, existing_range)) if range.start <= existing_range.end => { - // This branch merges overlapping diagnostics, assuming that the current - // diagnostic starts on range.start or later. If this assertion fails, - // we will discard some part of `diagnostic`. This implies that - // `doc.diagnostics()` is not sorted by `diagnostic.range`. - debug_assert!(existing_range.start <= range.start); - existing_range.end = range.end.max(existing_range.end) - } - _ => vec.push((scope, range.start..range.end)), + let push_diagnostic = |vec: &mut Vec>, range: Range| { + // If any diagnostic overlaps ranges with the prior diagnostic, + // merge the two together. Otherwise push a new span. + match vec.last_mut() { + Some(existing_range) if range.start <= existing_range.end => { + // This branch merges overlapping diagnostics, assuming that the current + // diagnostic starts on range.start or later. If this assertion fails, + // we will discard some part of `diagnostic`. This implies that + // `doc.diagnostics()` is not sorted by `diagnostic.range`. + debug_assert!(existing_range.start <= range.start); + existing_range.end = range.end.max(existing_range.end) } - }; + _ => vec.push(range.start..range.end), + } + }; for diagnostic in doc.diagnostics() { // Separate diagnostics into different Vecs by severity. - let (vec, scope) = match diagnostic.severity { - Some(Severity::Info) => (&mut info_vec, info), - Some(Severity::Hint) => (&mut hint_vec, hint), - Some(Severity::Warning) => (&mut warning_vec, warning), - Some(Severity::Error) => (&mut error_vec, error), - _ => (&mut default_vec, r#default), + let vec = match diagnostic.severity { + Some(Severity::Info) => &mut info_vec, + Some(Severity::Hint) => &mut hint_vec, + Some(Severity::Warning) => &mut warning_vec, + Some(Severity::Error) => &mut error_vec, + _ => &mut default_vec, }; // If the diagnostic has tags and a non-warning/error severity, skip rendering @@ -429,34 +371,59 @@ impl EditorView { Some(Severity::Warning | Severity::Error) ) { - push_diagnostic(vec, scope, diagnostic.range); + push_diagnostic(vec, diagnostic.range); } for tag in &diagnostic.tags { match tag { DiagnosticTag::Unnecessary => { - if let Some(scope) = unnecessary { - push_diagnostic(&mut unnecessary_vec, scope, diagnostic.range) + if unnecessary.is_some() { + push_diagnostic(&mut unnecessary_vec, diagnostic.range) } } DiagnosticTag::Deprecated => { - if let Some(scope) = deprecated { - push_diagnostic(&mut deprecated_vec, scope, diagnostic.range) + if deprecated.is_some() { + push_diagnostic(&mut deprecated_vec, diagnostic.range) } } } } } - [ - default_vec, - unnecessary_vec, - deprecated_vec, - info_vec, - hint_vec, - warning_vec, - error_vec, - ] + overlay_highlights.push(OverlayHighlights::Homogeneous { + highlight: get_scope_of("diagnostic"), + ranges: default_vec, + }); + if let Some(highlight) = unnecessary { + overlay_highlights.push(OverlayHighlights::Homogeneous { + highlight, + ranges: unnecessary_vec, + }); + } + if let Some(highlight) = deprecated { + overlay_highlights.push(OverlayHighlights::Homogeneous { + highlight, + ranges: deprecated_vec, + }); + } + overlay_highlights.extend([ + OverlayHighlights::Homogeneous { + highlight: get_scope_of("diagnostic.info"), + ranges: info_vec, + }, + OverlayHighlights::Homogeneous { + highlight: get_scope_of("diagnostic.hint"), + ranges: hint_vec, + }, + OverlayHighlights::Homogeneous { + highlight: get_scope_of("diagnostic.warning"), + ranges: warning_vec, + }, + OverlayHighlights::Homogeneous { + highlight: get_scope_of("diagnostic.error"), + ranges: error_vec, + }, + ]); } /// Get highlight spans for selections in a document view. @@ -467,7 +434,7 @@ impl EditorView { theme: &Theme, cursor_shape_config: &CursorShapeConfig, is_terminal_focused: bool, - ) -> Vec<(usize, std::ops::Range)> { + ) -> OverlayHighlights { let text = doc.text().slice(..); let selection = doc.selection(view.id); let primary_idx = selection.primary_index(); @@ -476,34 +443,34 @@ impl EditorView { let cursor_is_block = cursorkind == CursorKind::Block; let selection_scope = theme - .find_scope_index_exact("ui.selection") + .find_highlight_exact("ui.selection") .expect("could not find `ui.selection` scope in the theme!"); let primary_selection_scope = theme - .find_scope_index_exact("ui.selection.primary") + .find_highlight_exact("ui.selection.primary") .unwrap_or(selection_scope); let base_cursor_scope = theme - .find_scope_index_exact("ui.cursor") + .find_highlight_exact("ui.cursor") .unwrap_or(selection_scope); let base_primary_cursor_scope = theme - .find_scope_index("ui.cursor.primary") + .find_highlight("ui.cursor.primary") .unwrap_or(base_cursor_scope); let cursor_scope = match mode { - Mode::Insert => theme.find_scope_index_exact("ui.cursor.insert"), - Mode::Select => theme.find_scope_index_exact("ui.cursor.select"), - Mode::Normal => theme.find_scope_index_exact("ui.cursor.normal"), + Mode::Insert => theme.find_highlight_exact("ui.cursor.insert"), + Mode::Select => theme.find_highlight_exact("ui.cursor.select"), + Mode::Normal => theme.find_highlight_exact("ui.cursor.normal"), } .unwrap_or(base_cursor_scope); let primary_cursor_scope = match mode { - Mode::Insert => theme.find_scope_index_exact("ui.cursor.primary.insert"), - Mode::Select => theme.find_scope_index_exact("ui.cursor.primary.select"), - Mode::Normal => theme.find_scope_index_exact("ui.cursor.primary.normal"), + Mode::Insert => theme.find_highlight_exact("ui.cursor.primary.insert"), + Mode::Select => theme.find_highlight_exact("ui.cursor.primary.select"), + Mode::Normal => theme.find_highlight_exact("ui.cursor.primary.normal"), } .unwrap_or(base_primary_cursor_scope); - let mut spans: Vec<(usize, std::ops::Range)> = Vec::new(); + let mut spans = Vec::new(); for (i, range) in selection.iter().enumerate() { let selection_is_primary = i == primary_idx; let (cursor_scope, selection_scope) = if selection_is_primary { @@ -563,7 +530,7 @@ impl EditorView { } } - spans + OverlayHighlights::Heterogenous { highlights: spans } } /// Render brace match, etc (meant for the focused view only) @@ -571,41 +538,24 @@ impl EditorView { view: &View, doc: &Document, theme: &Theme, - ) -> Vec<(usize, std::ops::Range)> { + ) -> Option { // Highlight matching braces - if let Some(syntax) = doc.syntax() { - let text = doc.text().slice(..); - use helix_core::match_brackets; - let pos = doc.selection(view.id).primary().cursor(text); - - if let Some(pos) = - match_brackets::find_matching_bracket(syntax, doc.text().slice(..), pos) - { - // ensure col is on screen - if let Some(highlight) = theme.find_scope_index_exact("ui.cursor.match") { - return vec![(highlight, pos..pos + 1)]; - } - } - } - Vec::new() + let syntax = doc.syntax()?; + let highlight = theme.find_highlight_exact("ui.cursor.match")?; + let text = doc.text().slice(..); + let pos = doc.selection(view.id).primary().cursor(text); + let pos = helix_core::match_brackets::find_matching_bracket(syntax, text, pos)?; + Some(OverlayHighlights::single(highlight, pos..pos + 1)) } - pub fn tabstop_highlights( - doc: &Document, - theme: &Theme, - ) -> Option)>> { + pub fn tabstop_highlights(doc: &Document, theme: &Theme) -> Option { let snippet = doc.active_snippet.as_ref()?; - let highlight = theme.find_scope_index_exact("tabstop")?; - let mut highlights = Vec::new(); + let highlight = theme.find_highlight_exact("tabstop")?; + let mut ranges = Vec::new(); for tabstop in snippet.tabstops() { - highlights.extend( - tabstop - .ranges - .iter() - .map(|range| (highlight, range.start..range.end)), - ); + ranges.extend(tabstop.ranges.iter().map(|range| range.start..range.end)); } - (!highlights.is_empty()).then_some(highlights) + Some(OverlayHighlights::Homogeneous { highlight, ranges }) } /// Render bufferline at the top diff --git a/helix-term/src/ui/lsp/signature_help.rs b/helix-term/src/ui/lsp/signature_help.rs index 2dee812417af..87a3eb95f21e 100644 --- a/helix-term/src/ui/lsp/signature_help.rs +++ b/helix-term/src/ui/lsp/signature_help.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use arc_swap::ArcSwap; -use helix_core::syntax; +use helix_core::syntax::{self, OverlayHighlights}; use helix_view::graphics::{Margin, Rect, Style}; use helix_view::input::Event; use tui::buffer::Buffer; @@ -102,13 +102,12 @@ impl Component for SignatureHelp { .unwrap_or_else(|| &self.signatures[0]); let active_param_span = signature.active_param_range.map(|(start, end)| { - vec![( - cx.editor - .theme - .find_scope_index_exact("ui.selection") - .unwrap(), - start..end, - )] + let highlight = cx + .editor + .theme + .find_highlight_exact("ui.selection") + .unwrap(); + OverlayHighlights::single(highlight, start..end) }); let signature = self @@ -120,7 +119,7 @@ impl Component for SignatureHelp { signature.signature.as_str(), &self.language, Some(&cx.editor.theme), - Arc::clone(&self.config_loader), + &self.config_loader.load(), active_param_span, ); @@ -178,7 +177,7 @@ impl Component for SignatureHelp { signature.signature.as_str(), &self.language, None, - Arc::clone(&self.config_loader), + &self.config_loader.load(), None, ); let (sig_width, sig_height) = diff --git a/helix-term/src/ui/markdown.rs b/helix-term/src/ui/markdown.rs index fe581b5adfa6..ae58d75e8f2e 100644 --- a/helix-term/src/ui/markdown.rs +++ b/helix-term/src/ui/markdown.rs @@ -10,8 +10,8 @@ use std::sync::Arc; use pulldown_cmark::{CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag, TagEnd}; use helix_core::{ - syntax::{self, HighlightEvent, InjectionLanguageMarker, Syntax}, - RopeSlice, + syntax::{self, HighlightEvent, OverlayHighlights}, + RopeSlice, Syntax, }; use helix_view::{ graphics::{Margin, Rect, Style}, @@ -32,8 +32,12 @@ pub fn highlighted_code_block<'a>( text: &str, language: &str, theme: Option<&Theme>, - config_loader: Arc>, - additional_highlight_spans: Option)>>, + loader: &syntax::Loader, + // Optional overlay highlights to mix in with the syntax highlights. + // + // Note that `OverlayHighlights` is typically used with char indexing but the only caller + // which passes this parameter currently passes **byte indices** instead. + additional_highlight_spans: Option, ) -> Text<'a> { let mut spans = Vec::new(); let mut lines = Vec::new(); @@ -48,67 +52,74 @@ pub fn highlighted_code_block<'a>( }; let ropeslice = RopeSlice::from(text); - let syntax = config_loader - .load() - .language_configuration_for_injection_string(&InjectionLanguageMarker::Name( - language.into(), - )) - .and_then(|config| config.highlight_config(theme.scopes())) - .and_then(|config| Syntax::new(ropeslice, config, Arc::clone(&config_loader))); - - let syntax = match syntax { - Some(s) => s, - None => return styled_multiline_text(text, code_style), + let Some(syntax) = loader + .language_for_match(RopeSlice::from(language)) + .and_then(|lang| Syntax::new(ropeslice, lang, loader).ok()) + else { + return styled_multiline_text(text, code_style); }; - let highlight_iter = syntax - .highlight_iter(ropeslice, None, None) - .map(|e| e.unwrap()); - let highlight_iter: Box> = - if let Some(spans) = additional_highlight_spans { - Box::new(helix_core::syntax::merge(highlight_iter, spans)) - } else { - Box::new(highlight_iter) - }; - - let mut highlights = Vec::new(); - for event in highlight_iter { - match event { - HighlightEvent::HighlightStart(span) => { - highlights.push(span); + let mut syntax_highlighter = syntax.highlighter(ropeslice, loader, ..); + let mut syntax_highlight_stack = Vec::new(); + let mut overlay_highlight_stack = Vec::new(); + let mut overlay_highlighter = syntax::OverlayHighlighter::new(additional_highlight_spans); + let mut pos = 0; + + while pos < ropeslice.len_bytes() as u32 { + if pos == syntax_highlighter.next_event_offset() { + let (event, new_highlights) = syntax_highlighter.advance(); + if event == HighlightEvent::Refresh { + syntax_highlight_stack.clear(); } - HighlightEvent::HighlightEnd => { - highlights.pop(); + syntax_highlight_stack.extend(new_highlights); + } else if pos == overlay_highlighter.next_event_offset() as u32 { + let (event, new_highlights) = overlay_highlighter.advance(); + if event == HighlightEvent::Refresh { + overlay_highlight_stack.clear(); } - HighlightEvent::Source { start, end } => { - let style = highlights - .iter() - .fold(text_style, |acc, span| acc.patch(theme.highlight(span.0))); - - let mut slice = &text[start..end]; - // TODO: do we need to handle all unicode line endings - // here, or is just '\n' okay? - while let Some(end) = slice.find('\n') { - // emit span up to newline - let text = &slice[..end]; - let text = text.replace('\t', " "); // replace tabs - let span = Span::styled(text, style); - spans.push(span); - - // truncate slice to after newline - slice = &slice[end + 1..]; - - // make a new line - let spans = std::mem::take(&mut spans); - lines.push(Spans::from(spans)); - } + overlay_highlight_stack.extend(new_highlights) + } - // if there's anything left, emit it too - if !slice.is_empty() { - let span = Span::styled(slice.replace('\t', " "), style); - spans.push(span); - } - } + let start = pos; + pos = syntax_highlighter + .next_event_offset() + .min(overlay_highlighter.next_event_offset() as u32); + if pos == u32::MAX { + pos = ropeslice.len_bytes() as u32; + } + if pos == start { + continue; + } + assert!(pos > start); + + let style = syntax_highlight_stack + .iter() + .chain(overlay_highlight_stack.iter()) + .fold(text_style, |acc, highlight| { + acc.patch(theme.highlight(*highlight)) + }); + + let mut slice = &text[start as usize..pos as usize]; + // TODO: do we need to handle all unicode line endings + // here, or is just '\n' okay? + while let Some(end) = slice.find('\n') { + // emit span up to newline + let text = &slice[..end]; + let text = text.replace('\t', " "); // replace tabs + let span = Span::styled(text, style); + spans.push(span); + + // truncate slice to after newline + slice = &slice[end + 1..]; + + // make a new line + let spans = std::mem::take(&mut spans); + lines.push(Spans::from(spans)); + } + + if !slice.is_empty() { + let span = Span::styled(slice.replace('\t', " "), style); + spans.push(span); } } @@ -286,7 +297,7 @@ impl Markdown { &text, language, theme, - Arc::clone(&self.config_loader), + &self.config_loader.load(), None, ); lines.extend(tui_text.lines.into_iter()); diff --git a/helix-term/src/ui/picker.rs b/helix-term/src/ui/picker.rs index 5a4b3afb535b..7abdfce845be 100644 --- a/helix-term/src/ui/picker.rs +++ b/helix-term/src/ui/picker.rs @@ -940,21 +940,18 @@ impl Picker { } } - let syntax_highlights = EditorView::doc_syntax_highlights( + let loader = cx.editor.syn_loader.load(); + + let syntax_highlighter = + EditorView::doc_syntax_highlighter(doc, offset.anchor, area.height, &loader); + let mut overlay_highlights = Vec::new(); + + EditorView::doc_diagnostics_highlights_into( doc, - offset.anchor, - area.height, &cx.editor.theme, + &mut overlay_highlights, ); - let mut overlay_highlights = - EditorView::empty_highlight_iter(doc, offset.anchor, area.height); - for spans in EditorView::doc_diagnostics_highlights(doc, &cx.editor.theme) { - if spans.is_empty() { - continue; - } - overlay_highlights = Box::new(helix_core::syntax::merge(overlay_highlights, spans)); - } let mut decorations = DecorationManager::default(); if let Some((start, end)) = range { @@ -984,7 +981,7 @@ impl Picker { offset, // TODO: compute text annotations asynchronously here (like inlay hints) &TextAnnotations::default(), - syntax_highlights, + syntax_highlighter, overlay_highlights, &cx.editor.theme, decorations, diff --git a/helix-term/src/ui/picker/handlers.rs b/helix-term/src/ui/picker/handlers.rs index 040fffa8840f..9a3af9b3209c 100644 --- a/helix-term/src/ui/picker/handlers.rs +++ b/helix-term/src/ui/picker/handlers.rs @@ -70,23 +70,21 @@ impl AsyncHook return; } - let Some(language_config) = doc.detect_language_config(&editor.syn_loader.load()) - else { + let loader = editor.syn_loader.load(); + let Some(language_config) = doc.detect_language_config(&loader) else { return; }; - doc.language = Some(language_config.clone()); + let language = language_config.language(); + doc.language = Some(language_config); let text = doc.text().clone(); - let loader = editor.syn_loader.clone(); tokio::task::spawn_blocking(move || { - let Some(syntax) = language_config - .highlight_config(&loader.load().scopes()) - .and_then(|highlight_config| { - helix_core::Syntax::new(text.slice(..), highlight_config, loader) - }) - else { - log::info!("highlighting picker item failed"); - return; + let syntax = match helix_core::Syntax::new(text.slice(..), language, &loader) { + Ok(syntax) => syntax, + Err(err) => { + log::info!("highlighting picker preview failed: {err}"); + return; + } }; job::dispatch_blocking(move |editor, compositor| { diff --git a/helix-term/src/ui/prompt.rs b/helix-term/src/ui/prompt.rs index 03adeb05bbf5..56255860a799 100644 --- a/helix-term/src/ui/prompt.rs +++ b/helix-term/src/ui/prompt.rs @@ -529,7 +529,7 @@ impl Prompt { &self.line, language, Some(&cx.editor.theme), - loader.clone(), + &loader.load(), None, ) .into(); diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index 061ddf72d399..2d8d333da493 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -9,7 +9,7 @@ use helix_core::diagnostic::DiagnosticProvider; use helix_core::doc_formatter::TextFormat; use helix_core::encoding::Encoding; use helix_core::snippets::{ActiveSnippet, SnippetRenderCtx}; -use helix_core::syntax::{config::LanguageServerFeature, Highlight}; +use helix_core::syntax::config::LanguageServerFeature; use helix_core::text_annotations::{InlineAnnotation, Overlay}; use helix_event::TaskController; use helix_lsp::util::lsp_pos_to_pos; @@ -217,7 +217,7 @@ pub struct Document { #[derive(Debug, Clone, Default)] pub struct DocumentColorSwatches { pub color_swatches: Vec, - pub colors: Vec, + pub colors: Vec, pub color_swatches_padding: Vec, } @@ -1121,11 +1121,13 @@ impl Document { /// Detect the programming language based on the file type. pub fn detect_language_config( &self, - config_loader: &syntax::Loader, + loader: &syntax::Loader, ) -> Option> { - config_loader - .language_config_for_file_name(self.path.as_ref()?) - .or_else(|| config_loader.language_config_for_shebang(self.text().slice(..))) + let language = loader + .language_for_filename(self.path.as_ref()?) + .or_else(|| loader.language_for_shebang(self.text().slice(..)))?; + + Some(loader.language(language).config().clone()) } /// Detect the indentation used in the file, or otherwise defaults to the language indentation @@ -1268,17 +1270,18 @@ impl Document { loader: &syntax::Loader, ) { self.language = language_config; - self.syntax = self - .language - .as_ref() - .and_then(|config| config.highlight_config(&loader.scopes())) - .and_then(|highlight_config| { - Syntax::new( - self.text.slice(..), - highlight_config, - self.syn_loader.clone(), - ) - }); + self.syntax = self.language.as_ref().and_then(|config| { + Syntax::new(self.text.slice(..), config.language(), loader) + .map_err(|err| { + // `NoRootConfig` means that there was an issue loading the language/syntax + // config for the root language of the document. An error must have already + // been logged by `LanguageData::syntax_config`. + if err != syntax::HighlighterError::NoRootConfig { + log::warn!("Error building syntax for '{}': {err}", self.display_name()); + } + }) + .ok() + }); } /// Set the programming language for the file if you know the language but don't have the @@ -1288,10 +1291,11 @@ impl Document { language_id: &str, loader: &syntax::Loader, ) -> anyhow::Result<()> { - let language_config = loader - .language_config_for_language_id(language_id) + let language = loader + .language_for_name(language_id) .ok_or_else(|| anyhow!("invalid language id: {}", language_id))?; - self.set_language(Some(language_config), loader); + let config = loader.language(language).config().clone(); + self.set_language(Some(config), loader); Ok(()) } @@ -1410,14 +1414,14 @@ impl Document { // update tree-sitter syntax tree if let Some(syntax) = &mut self.syntax { - // TODO: no unwrap - let res = syntax.update( + let loader = self.syn_loader.load(); + if let Err(err) = syntax.update( old_doc.slice(..), self.text.slice(..), transaction.changes(), - ); - if res.is_err() { - log::error!("TS parser failed, disabling TS for the current buffer: {res:?}"); + &loader, + ) { + log::error!("TS parser failed, disabling TS for the current buffer: {err}"); self.syntax = None; } } @@ -2225,8 +2229,7 @@ impl Document { viewport_width, wrap_indicator: wrap_indicator.into_boxed_str(), wrap_indicator_highlight: theme - .and_then(|theme| theme.find_scope_index("ui.virtual.wrap")) - .map(Highlight), + .and_then(|theme| theme.find_highlight("ui.virtual.wrap")), soft_wrap_at_text_width, } } diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index ad5adf862bd5..4255b9e199ec 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -1358,7 +1358,7 @@ impl Editor { fn set_theme_impl(&mut self, theme: Theme, preview: ThemeAction) { // `ui.selection` is the only scope required to be able to render a theme. - if theme.find_scope_index_exact("ui.selection").is_none() { + if theme.find_highlight_exact("ui.selection").is_none() { self.set_error("Invalid theme: `ui.selection` required"); return; } @@ -1512,12 +1512,12 @@ impl Editor { if let helix_lsp::Error::ExecutableNotFound(err) = err { // Silence by default since some language servers might just not be installed log::debug!( - "Language server not found for `{}` {} {}", language.scope(), lang, err, + "Language server not found for `{}` {} {}", language.scope, lang, err, ); } else { log::error!( "Failed to initialize the language servers for `{}` - `{}` {{ {} }}", - language.scope(), + language.scope, lang, err ); diff --git a/helix-view/src/theme.rs b/helix-view/src/theme.rs index af8f03bca050..61d490ff3978 100644 --- a/helix-view/src/theme.rs +++ b/helix-view/src/theme.rs @@ -294,43 +294,36 @@ fn build_theme_values( impl Theme { /// To allow `Highlight` to represent arbitrary RGB colors without turning it into an enum, - /// we interpret the last 3 bytes of a `Highlight` as RGB colors. - const RGB_START: usize = (usize::MAX << (8 + 8 + 8)) - 1; + /// we interpret the last 256^3 numbers as RGB. + const RGB_START: u32 = (u32::MAX << (8 + 8 + 8)) - 1 - (u32::MAX - Highlight::MAX); /// Interpret a Highlight with the RGB foreground - fn decode_rgb_highlight(rgb: usize) -> Option<(u8, u8, u8)> { - (rgb > Self::RGB_START).then(|| { - let [b, g, r, ..] = rgb.to_ne_bytes(); + fn decode_rgb_highlight(highlight: Highlight) -> Option<(u8, u8, u8)> { + (highlight.get() > Self::RGB_START).then(|| { + let [b, g, r, ..] = (highlight.get() + 1).to_ne_bytes(); (r, g, b) }) } /// Create a Highlight that represents an RGB color pub fn rgb_highlight(r: u8, g: u8, b: u8) -> Highlight { - Highlight(usize::from_ne_bytes([ - b, - g, - r, - u8::MAX, - u8::MAX, - u8::MAX, - u8::MAX, - u8::MAX, - ])) + // -1 because highlight is "non-max": u32::MAX is reserved for the null pointer + // optimization. + Highlight::new(u32::from_ne_bytes([b, g, r, u8::MAX]) - 1) } #[inline] - pub fn highlight(&self, index: usize) -> Style { - if let Some((red, green, blue)) = Self::decode_rgb_highlight(index) { + pub fn highlight(&self, highlight: Highlight) -> Style { + if let Some((red, green, blue)) = Self::decode_rgb_highlight(highlight) { Style::new().fg(Color::Rgb(red, green, blue)) } else { - self.highlights[index] + self.highlights[highlight.idx()] } } #[inline] - pub fn scope(&self, index: usize) -> &str { - &self.scopes[index] + pub fn scope(&self, highlight: Highlight) -> &str { + &self.scopes[highlight.idx()] } pub fn name(&self) -> &str { @@ -361,13 +354,16 @@ impl Theme { &self.scopes } - pub fn find_scope_index_exact(&self, scope: &str) -> Option { - self.scopes().iter().position(|s| s == scope) + pub fn find_highlight_exact(&self, scope: &str) -> Option { + self.scopes() + .iter() + .position(|s| s == scope) + .map(|idx| Highlight::new(idx as u32)) } - pub fn find_scope_index(&self, mut scope: &str) -> Option { + pub fn find_highlight(&self, mut scope: &str) -> Option { loop { - if let Some(highlight) = self.find_scope_index_exact(scope) { + if let Some(highlight) = self.find_highlight_exact(scope) { return Some(highlight); } if let Some(new_end) = scope.rfind('.') { @@ -626,23 +622,13 @@ mod tests { fn convert_to_and_from() { let (r, g, b) = (0xFF, 0xFE, 0xFA); let highlight = Theme::rgb_highlight(r, g, b); - assert_eq!(Theme::decode_rgb_highlight(highlight.0), Some((r, g, b))); + assert_eq!(Theme::decode_rgb_highlight(highlight), Some((r, g, b))); } /// make sure we can store all the colors at the end - /// ``` - /// FF FF FF FF FF FF FF FF - /// xor - /// FF FF FF FF FF 00 00 00 - /// = - /// 00 00 00 00 00 FF FF FF - /// ``` - /// - /// where the ending `(FF, FF, FF)` represents `(r, g, b)` #[test] fn full_numeric_range() { - assert_eq!(usize::MAX ^ Theme::RGB_START, 256_usize.pow(3)); - assert_eq!(Theme::RGB_START + 256_usize.pow(3), usize::MAX); + assert_eq!(Highlight::MAX - Theme::RGB_START, 256_u32.pow(3)); } #[test] @@ -650,30 +636,27 @@ mod tests { // color in the middle let (r, g, b) = (0x14, 0xAA, 0xF7); assert_eq!( - Theme::default().highlight(Theme::rgb_highlight(r, g, b).0), + Theme::default().highlight(Theme::rgb_highlight(r, g, b)), Style::new().fg(Color::Rgb(r, g, b)) ); // pure black let (r, g, b) = (0x00, 0x00, 0x00); assert_eq!( - Theme::default().highlight(Theme::rgb_highlight(r, g, b).0), + Theme::default().highlight(Theme::rgb_highlight(r, g, b)), Style::new().fg(Color::Rgb(r, g, b)) ); // pure white let (r, g, b) = (0xff, 0xff, 0xff); assert_eq!( - Theme::default().highlight(Theme::rgb_highlight(r, g, b).0), + Theme::default().highlight(Theme::rgb_highlight(r, g, b)), Style::new().fg(Color::Rgb(r, g, b)) ); } #[test] - #[should_panic( - expected = "index out of bounds: the len is 0 but the index is 18446744073692774399" - )] + #[should_panic(expected = "index out of bounds: the len is 0 but the index is 4278190078")] fn out_of_bounds() { - let (r, g, b) = (0x00, 0x00, 0x00); - - Theme::default().highlight(Theme::rgb_highlight(r, g, b).0 - 1); + let highlight = Highlight::new(Theme::rgb_highlight(0, 0, 0).get() - 1); + Theme::default().highlight(highlight); } } diff --git a/helix-view/src/view.rs b/helix-view/src/view.rs index 6d237e203619..aecf09a610ed 100644 --- a/helix-view/src/view.rs +++ b/helix-view/src/view.rs @@ -11,7 +11,6 @@ use crate::{ use helix_core::{ char_idx_at_visual_offset, doc_formatter::TextFormat, - syntax::Highlight, text_annotations::TextAnnotations, visual_offset_from_anchor, visual_offset_from_block, Position, RopeSlice, Selection, Transaction, @@ -446,9 +445,7 @@ impl View { let mut text_annotations = TextAnnotations::default(); if let Some(labels) = doc.jump_labels.get(&self.id) { - let style = theme - .and_then(|t| t.find_scope_index("ui.virtual.jump-label")) - .map(Highlight); + let style = theme.and_then(|t| t.find_highlight("ui.virtual.jump-label")); text_annotations.add_overlay(labels, style); } @@ -461,15 +458,10 @@ impl View { padding_after_inlay_hints, }) = doc.inlay_hints.get(&self.id) { - let type_style = theme - .and_then(|t| t.find_scope_index("ui.virtual.inlay-hint.type")) - .map(Highlight); - let parameter_style = theme - .and_then(|t| t.find_scope_index("ui.virtual.inlay-hint.parameter")) - .map(Highlight); - let other_style = theme - .and_then(|t| t.find_scope_index("ui.virtual.inlay-hint")) - .map(Highlight); + let type_style = theme.and_then(|t| t.find_highlight("ui.virtual.inlay-hint.type")); + let parameter_style = + theme.and_then(|t| t.find_highlight("ui.virtual.inlay-hint.parameter")); + let other_style = theme.and_then(|t| t.find_highlight("ui.virtual.inlay-hint")); // Overlapping annotations are ignored apart from the first so the order here is not random: // types -> parameters -> others should hopefully be the "correct" order for most use cases, diff --git a/xtask/src/helpers.rs b/xtask/src/helpers.rs index d2c955bc4729..9bdca5618c83 100644 --- a/xtask/src/helpers.rs +++ b/xtask/src/helpers.rs @@ -1,7 +1,7 @@ use std::path::{Path, PathBuf}; use crate::path; -use helix_core::syntax::config::Configuration as LangConfig; +use helix_core::syntax::{self, config::Configuration as LangConfig}; use helix_term::health::TsFeature; /// Get the list of languages that support a particular tree-sitter @@ -42,3 +42,7 @@ pub fn lang_config() -> LangConfig { let text = std::fs::read_to_string(path::lang_config()).unwrap(); toml::from_str(&text).unwrap() } + +pub fn syn_loader() -> syntax::Loader { + syntax::Loader::new(lang_config()).unwrap() +} diff --git a/xtask/src/main.rs b/xtask/src/main.rs index c7440fcc1888..7c5c728fe19b 100644 --- a/xtask/src/main.rs +++ b/xtask/src/main.rs @@ -18,36 +18,18 @@ pub mod tasks { } pub fn querycheck() -> Result<(), DynError> { - use crate::helpers::lang_config; - use helix_core::{syntax::read_query, tree_sitter::Query}; - use helix_loader::grammar::get_language; - - let query_files = [ - "highlights.scm", - "locals.scm", - "injections.scm", - "textobjects.scm", - "indents.scm", - ]; - - for language in lang_config().language { - let language_name = &language.language_id; - let grammar_name = language.grammar.as_ref().unwrap_or(language_name); - for query_file in query_files { - let language = get_language(grammar_name); - let query_text = read_query(language_name, query_file); - if let Ok(lang) = language { - if !query_text.is_empty() { - if let Err(reason) = Query::new(&lang, &query_text) { - return Err(format!( - "Failed to parse {} queries for {}: {}", - query_file, language_name, reason - ) - .into()); - } - } - } - } + use helix_core::syntax::LanguageData; + + let loader = crate::helpers::syn_loader(); + + for (_language, lang_data) in loader.languages() { + let config = lang_data.config(); + let Some(syntax_config) = LanguageData::compile_syntax_config(config, &loader)? else { + continue; + }; + let grammar = syntax_config.grammar; + LanguageData::compile_indent_query(grammar, config)?; + LanguageData::compile_textobject_query(grammar, config)?; } println!("Query check succeeded"); From ac88096c98fc7ea9bb00ea127e3b510980872548 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Mon, 24 Feb 2025 12:46:34 -0500 Subject: [PATCH 4/7] queries: Rewrite all locals --- runtime/queries/_javascript/locals.scm | 4 +- runtime/queries/_typescript/locals.scm | 7 +- runtime/queries/ada/locals.scm | 21 +----- runtime/queries/bibtex/locals.scm | 0 runtime/queries/cairo/locals.scm | 8 +-- runtime/queries/capnp/locals.scm | 62 +---------------- runtime/queries/circom/locals.scm | 4 +- runtime/queries/dart/locals.scm | 2 +- runtime/queries/ecma/locals.scm | 23 +------ runtime/queries/elm/locals.scm | 4 +- runtime/queries/erlang/locals.scm | 17 ++--- runtime/queries/fsharp/locals.scm | 25 ++----- runtime/queries/gleam/locals.scm | 7 +- runtime/queries/gn/locals.scm | 16 ----- runtime/queries/go/locals.scm | 20 ++---- runtime/queries/gren/locals.scm | 4 +- runtime/queries/groovy/locals.scm | 2 +- runtime/queries/hare/locals.scm | 11 +--- runtime/queries/haskell-persistent/locals.scm | 1 - runtime/queries/haskell/locals.scm | 5 +- runtime/queries/inko/locals.scm | 5 +- runtime/queries/jq/locals.scm | 4 +- runtime/queries/julia/locals.scm | 66 +++---------------- runtime/queries/just/locals.scm | 10 +-- runtime/queries/koka/locals.scm | 19 ++---- runtime/queries/kotlin/locals.scm | 2 +- runtime/queries/koto/locals.scm | 16 ++--- runtime/queries/llvm/locals.scm | 5 +- runtime/queries/matlab/locals.scm | 19 +----- runtime/queries/ocaml/locals.scm | 2 +- runtime/queries/ponylang/locals.scm | 8 +-- runtime/queries/purescript/locals.scm | 5 +- runtime/queries/python/locals.scm | 18 ++--- runtime/queries/r/locals.scm | 6 +- runtime/queries/rescript/locals.scm | 7 -- runtime/queries/ruby/locals.scm | 25 +++---- runtime/queries/rust/locals.scm | 12 +--- runtime/queries/scala/locals.scm | 21 +----- runtime/queries/smali/locals.scm | 24 +------ runtime/queries/solidity/locals.scm | 5 +- runtime/queries/sway/locals.scm | 4 +- runtime/queries/swift/locals.scm | 4 +- runtime/queries/tact/locals.scm | 7 +- runtime/queries/teal/locals.scm | 8 +-- runtime/queries/tera/locals.scm | 4 +- runtime/queries/thrift/locals.scm | 39 +---------- runtime/queries/tlaplus/locals.scm | 52 +++++++-------- runtime/queries/verilog/locals.scm | 34 ++-------- runtime/queries/wren/locals.scm | 14 +--- runtime/queries/yara/locals.scm | 4 +- 50 files changed, 158 insertions(+), 534 deletions(-) delete mode 100644 runtime/queries/bibtex/locals.scm delete mode 100644 runtime/queries/gn/locals.scm delete mode 100644 runtime/queries/haskell-persistent/locals.scm delete mode 100644 runtime/queries/rescript/locals.scm diff --git a/runtime/queries/_javascript/locals.scm b/runtime/queries/_javascript/locals.scm index 943566167933..7c7ffe13911d 100644 --- a/runtime/queries/_javascript/locals.scm +++ b/runtime/queries/_javascript/locals.scm @@ -6,9 +6,9 @@ ; (i) (formal_parameters - (identifier) @local.definition) + (identifier) @local.definition.variable.parameter) ; (i = 1) (formal_parameters (assignment_pattern - left: (identifier) @local.definition)) + left: (identifier) @local.definition.variable.parameter)) diff --git a/runtime/queries/_typescript/locals.scm b/runtime/queries/_typescript/locals.scm index 35e3c0db7792..1a4428882060 100644 --- a/runtime/queries/_typescript/locals.scm +++ b/runtime/queries/_typescript/locals.scm @@ -11,7 +11,7 @@ ;------------ (type_parameter - name: (type_identifier) @local.definition) + name: (type_identifier) @local.definition.type.parameter) ; Javascript and Typescript Treesitter grammars deviate when defining the ; tree structure for parameters, so we need to address them in each specific @@ -20,14 +20,15 @@ ; (i: t) ; (i: t = 1) (required_parameter - (identifier) @local.definition) + (identifier) @local.definition.variable.parameter) ; (i?: t) ; (i?: t = 1) // Invalid but still possible to highlight. (optional_parameter - (identifier) @local.definition) + (identifier) @local.definition.variable.parameter) ; References ;----------- (type_identifier) @local.reference +(identifier) @local.reference diff --git a/runtime/queries/ada/locals.scm b/runtime/queries/ada/locals.scm index 83d62a437721..933f1498a3a4 100644 --- a/runtime/queries/ada/locals.scm +++ b/runtime/queries/ada/locals.scm @@ -5,25 +5,6 @@ (subprogram_body) @local.scope (block_statement) @local.scope -(with_clause (_) @local.definition) -(procedure_specification name: (_) @local.definition) -(function_specification name: (_) @local.definition) -(package_declaration name: (_) @local.definition) -(package_body name: (_) @local.definition) -(generic_instantiation . name: (_) @local.definition) -(component_declaration . (identifier) @local.definition) -(exception_declaration . (identifier) @local.definition) -(formal_object_declaration . (identifier) @local.definition) -(object_declaration . (identifier) @local.definition) -(parameter_specification . (identifier) @local.definition) -(full_type_declaration . (identifier) @local.definition) -(private_type_declaration . (identifier) @local.definition) -(private_extension_declaration . (identifier) @local.definition) -(incomplete_type_declaration . (identifier) @local.definition) -(protected_type_declaration . (identifier) @local.definition) -(formal_complete_type_declaration . (identifier) @local.definition) -(formal_incomplete_type_declaration . (identifier) @local.definition) -(task_type_declaration . (identifier) @local.definition) -(subtype_declaration . (identifier) @local.definition) +(parameter_specification . (identifier) @local.definition.variable.parameter) (identifier) @local.reference diff --git a/runtime/queries/bibtex/locals.scm b/runtime/queries/bibtex/locals.scm deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/runtime/queries/cairo/locals.scm b/runtime/queries/cairo/locals.scm index 68c41a46ba8d..ea309753bcf8 100644 --- a/runtime/queries/cairo/locals.scm +++ b/runtime/queries/cairo/locals.scm @@ -14,14 +14,14 @@ ; Definitions (parameter - (identifier) @local.definition) + (identifier) @local.definition.variable.parameter) (type_parameters - (type_identifier) @local.definition) + (type_identifier) @local.definition.type.parameter) (constrained_type_parameter - left: (type_identifier) @local.definition) + left: (type_identifier) @local.definition.type.parameter) -(closure_parameters (identifier) @local.definition) +(closure_parameters (identifier) @local.definition.variable.parameter) ; References (identifier) @local.reference diff --git a/runtime/queries/capnp/locals.scm b/runtime/queries/capnp/locals.scm index e98ce2605e57..43d443483ff1 100644 --- a/runtime/queries/capnp/locals.scm +++ b/runtime/queries/capnp/locals.scm @@ -30,67 +30,7 @@ ; Definitions -(annotation_definition_identifier) @local.definition - -(const_identifier) @local.definition - -(enum (enum_identifier) @local.definition) - -[ - (enum_member) - (field_identifier) -] @local.definition - -(method_identifier) @local.definition - -(namespace) @local.definition - [ (param_identifier) (return_identifier) -] @local.definition - -(group (type_identifier) @local.definition) - -(struct (type_identifier) @local.definition) - -(union (type_identifier) @local.definition) - -(interface (type_identifier) @local.definition) - -; Generics Related (don't know how to combine these) - -(struct - (generics - (generic_parameters - (generic_identifier) @local.definition))) - -(interface - (generics - (generic_parameters - (generic_identifier) @local.definition))) - -(method - (implicit_generics - (implicit_generic_parameters - (generic_identifier) @local.definition))) - -(method - (generics - (generic_parameters - (generic_identifier) @local.definition))) - -(annotation - (generics - (generic_parameters - (generic_identifier) @local.definition))) - -(replace_using - (generics - (generic_parameters - (generic_identifier) @local.definition))) - -(return_type - (generics - (generic_parameters - (generic_identifier) @local.definition))) +] @local.definition.variable.parameter diff --git a/runtime/queries/circom/locals.scm b/runtime/queries/circom/locals.scm index e0ea12de0800..d346e8c04011 100644 --- a/runtime/queries/circom/locals.scm +++ b/runtime/queries/circom/locals.scm @@ -3,7 +3,7 @@ (main_component_definition) @local.scope (block_statement) @local.scope -(parameter name: (identifier) @local.definition) @local.definition +(parameter name: (identifier) @local.definition.variable.parameter) -(identifier) @local.reference \ No newline at end of file +(identifier) @local.reference diff --git a/runtime/queries/dart/locals.scm b/runtime/queries/dart/locals.scm index 629838e52e9b..15a81f78c73a 100644 --- a/runtime/queries/dart/locals.scm +++ b/runtime/queries/dart/locals.scm @@ -12,7 +12,7 @@ ;------------ (class_definition - body: (_) @local.definition) + body: (_) @local.definition.type) ; References ;------------ diff --git a/runtime/queries/ecma/locals.scm b/runtime/queries/ecma/locals.scm index df8eb0d397e7..345cf1770e01 100644 --- a/runtime/queries/ecma/locals.scm +++ b/runtime/queries/ecma/locals.scm @@ -12,30 +12,9 @@ ; Definitions ;------------ -; ...i -(rest_pattern - (identifier) @local.definition) - -; { i } -(object_pattern - (shorthand_property_identifier_pattern) @local.definition) - -; { a: i } -(object_pattern - (pair_pattern - value: (identifier) @local.definition)) - -; [ i ] -(array_pattern - (identifier) @local.definition) - ; i => ... (arrow_function - parameter: (identifier) @local.definition) - -; const/let/var i = ... -(variable_declarator - name: (identifier) @local.definition) + parameter: (identifier) @local.definition.variable.parameter) ; References ;------------ diff --git a/runtime/queries/elm/locals.scm b/runtime/queries/elm/locals.scm index ab1031156c6a..63d31ba58b67 100644 --- a/runtime/queries/elm/locals.scm +++ b/runtime/queries/elm/locals.scm @@ -6,8 +6,8 @@ (infix_declaration) @local.scope (let_in_expr) @local.scope -(function_declaration_left (lower_pattern (lower_case_identifier)) @local.definition) -(function_declaration_left (lower_case_identifier) @local.definition) +(function_declaration_left (lower_pattern (lower_case_identifier)) @local.definition.function) +(function_declaration_left (lower_case_identifier) @local.definition.function) (value_expr(value_qid(upper_case_identifier)) @local.reference) (value_expr(value_qid(lower_case_identifier)) @local.reference) diff --git a/runtime/queries/erlang/locals.scm b/runtime/queries/erlang/locals.scm index 156e403817d4..ddbddb045de7 100644 --- a/runtime/queries/erlang/locals.scm +++ b/runtime/queries/erlang/locals.scm @@ -1,30 +1,23 @@ ; Specs and Callbacks (attribute (stab_clause - pattern: (arguments (variable)? @local.definition) + pattern: (arguments (variable)? @local.definition.variable.parameter) ; If a spec uses a variable as the return type (and later a `when` clause to type it): - body: (variable)? @local.definition)) @local.scope + body: (variable)? @local.definition.variable.parameter)) @local.scope ; parametric `-type`s ((attribute name: (atom) @_type (arguments (binary_operator - left: (call (arguments (variable) @local.definition)) + left: (call (arguments (variable) @local.definition.variable.parameter)) operator: "::") @local.scope)) (#match? @_type "(type|opaque)")) -; macros -((attribute - name: (atom) @_define - (arguments - (call (arguments (variable) @local.definition)))) @local.scope - (#eq? @_define "define")) - ; `fun`s -(anonymous_function (stab_clause pattern: (arguments (variable) @local.definition))) @local.scope +(anonymous_function (stab_clause pattern: (arguments (variable) @local.definition.variable.parameter))) @local.scope ; Ordinary functions -(function_clause pattern: (arguments (variable) @local.definition)) @local.scope +(function_clause pattern: (arguments (variable) @local.definition.variable.parameter)) @local.scope (variable) @local.reference diff --git a/runtime/queries/fsharp/locals.scm b/runtime/queries/fsharp/locals.scm index db2291f2642b..b9b0cf1e690f 100644 --- a/runtime/queries/fsharp/locals.scm +++ b/runtime/queries/fsharp/locals.scm @@ -6,27 +6,16 @@ (function_or_value_defn) ] @local.scope -(value_declaration_left - . - [ - (_ (identifier) @local.definition) - (_ (_ (identifier) @local.definition)) - (_ (_ (_ (identifier) @local.definition))) - (_ (_ (_ (_ (identifier) @local.definition)))) - (_ (_ (_ (_ (_ (identifier) @local.definition))))) - (_ (_ (_ (_ (_ (_ (identifier) @local.definition)))))) - ]) - (function_declaration_left . - ((_) @local.definition) + ((_) @local.definition.function) ((argument_patterns [ - (_ (identifier) @local.definition) - (_ (_ (identifier) @local.definition)) - (_ (_ (_ (identifier) @local.definition))) - (_ (_ (_ (_ (identifier) @local.definition)))) - (_ (_ (_ (_ (_ (identifier) @local.definition))))) - (_ (_ (_ (_ (_ (_ (identifier) @local.definition)))))) + (_ (identifier) @local.definition.variable.parameter) + (_ (_ (identifier) @local.definition.variable.parameter)) + (_ (_ (_ (identifier) @local.definition.variable.parameter))) + (_ (_ (_ (_ (identifier) @local.definition.variable.parameter)))) + (_ (_ (_ (_ (_ (identifier) @local.definition.variable.parameter))))) + (_ (_ (_ (_ (_ (_ (identifier) @local.definition.variable.parameter)))))) ]) )) diff --git a/runtime/queries/gleam/locals.scm b/runtime/queries/gleam/locals.scm index 4930057390b5..4ae2854ced67 100644 --- a/runtime/queries/gleam/locals.scm +++ b/runtime/queries/gleam/locals.scm @@ -4,12 +4,7 @@ (case_clause) @local.scope ; Definitions -(let pattern: (identifier) @local.definition) -(function_parameter name: (identifier) @local.definition) -(list_pattern (identifier) @local.definition) -(list_pattern assign: (identifier) @local.definition) -(tuple_pattern (identifier) @local.definition) -(record_pattern_argument pattern: (identifier) @local.definition) +(function_parameter name: (identifier) @local.definition.variable.parameter) ; References (identifier) @local.reference diff --git a/runtime/queries/gn/locals.scm b/runtime/queries/gn/locals.scm deleted file mode 100644 index e8bd995c33ba..000000000000 --- a/runtime/queries/gn/locals.scm +++ /dev/null @@ -1,16 +0,0 @@ -; Copyright (C) 2021 Will Cassella (github@willcassella.com) -; -; Licensed under the Apache License, Version 2.0 (the "License"); -; you may not use this file except in compliance with the License. -; You may obtain a copy of the License at -; -; http://www.apache.org/licenses/LICENSE-2.0 -; -; Unless required by applicable law or agreed to in writing, software -; distributed under the License is distributed on an "AS IS" BASIS, -; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; See the License for the specific language governing permissions and -; limitations under the License. - -(source_file) @local.scope -(assignment target: (identifier) @local.definition) diff --git a/runtime/queries/go/locals.scm b/runtime/queries/go/locals.scm index 14a6f3e43c42..305c25cd6c25 100644 --- a/runtime/queries/go/locals.scm +++ b/runtime/queries/go/locals.scm @@ -10,26 +10,14 @@ (type_parameter_list (parameter_declaration - name: (identifier) @local.definition)) + name: (identifier) @local.definition.variable.parameter)) -(parameter_declaration (identifier) @local.definition) -(variadic_parameter_declaration (identifier) @local.definition) - -(short_var_declaration - left: (expression_list - (identifier) @local.definition)) - -(var_spec - (identifier) @local.definition) - -(for_statement - (range_clause - left: (expression_list - (identifier) @local.definition))) +(parameter_declaration (identifier) @local.definition.variable.parameter) +(variadic_parameter_declaration (identifier) @local.definition.variable.parameter) (const_declaration (const_spec - name: (identifier) @local.definition)) + name: (identifier) @local.definition.constant)) ; References diff --git a/runtime/queries/gren/locals.scm b/runtime/queries/gren/locals.scm index ab1031156c6a..63d31ba58b67 100644 --- a/runtime/queries/gren/locals.scm +++ b/runtime/queries/gren/locals.scm @@ -6,8 +6,8 @@ (infix_declaration) @local.scope (let_in_expr) @local.scope -(function_declaration_left (lower_pattern (lower_case_identifier)) @local.definition) -(function_declaration_left (lower_case_identifier) @local.definition) +(function_declaration_left (lower_pattern (lower_case_identifier)) @local.definition.function) +(function_declaration_left (lower_case_identifier) @local.definition.function) (value_expr(value_qid(upper_case_identifier)) @local.reference) (value_expr(value_qid(lower_case_identifier)) @local.reference) diff --git a/runtime/queries/groovy/locals.scm b/runtime/queries/groovy/locals.scm index 380c4ad64f3d..92791b067f76 100644 --- a/runtime/queries/groovy/locals.scm +++ b/runtime/queries/groovy/locals.scm @@ -1,6 +1,6 @@ (function_definition) @local.scope (parameter - name: (identifier) @local.definition) + name: (identifier) @local.definition.variable.parameter) (identifier) @local.reference diff --git a/runtime/queries/hare/locals.scm b/runtime/queries/hare/locals.scm index b9e0a91b544a..1cf066f47527 100644 --- a/runtime/queries/hare/locals.scm +++ b/runtime/queries/hare/locals.scm @@ -3,17 +3,10 @@ (function_declaration) @local.scope (compound_expression) @local.scope -(global_binding - (identifier) @local.definition) -(constant_binding - (identifier) @local.definition) -(type_binding - (identifier) @local.definition) - (function_declaration - (identifier) @local.definition) + (identifier) @local.definition.function) (function_declaration - (parameter (name) @local.definition)) + (parameter (name) @local.definition.variable.parameter)) (identifier) @local.reference diff --git a/runtime/queries/haskell-persistent/locals.scm b/runtime/queries/haskell-persistent/locals.scm deleted file mode 100644 index 3ffb01a17a1a..000000000000 --- a/runtime/queries/haskell-persistent/locals.scm +++ /dev/null @@ -1 +0,0 @@ -(fields (variable)) @local.definition diff --git a/runtime/queries/haskell/locals.scm b/runtime/queries/haskell/locals.scm index ad67fe43f5a1..517d35e585c2 100644 --- a/runtime/queries/haskell/locals.scm +++ b/runtime/queries/haskell/locals.scm @@ -1,4 +1,3 @@ -(signature name: (variable)) @local.definition -(function name: (variable)) @local.definition -(pat_name (variable)) @local.definition +(signature name: (variable) @local.definition.function) +(function name: (variable) @local.definition.function) (exp_name (variable)) @local.reference diff --git a/runtime/queries/inko/locals.scm b/runtime/queries/inko/locals.scm index 3266bcae0316..c59acf3393bf 100644 --- a/runtime/queries/inko/locals.scm +++ b/runtime/queries/inko/locals.scm @@ -3,8 +3,7 @@ (block) ] @local.scope -(argument name: _ @local.definition) -(define_variable name: _ @local.definition) -(named_argument name: _ @local.definition) +(argument name: _ @local.definition.variable.parameter) +(named_argument name: _ @local.definition.variable.parameter) (identifier) @local.reference diff --git a/runtime/queries/jq/locals.scm b/runtime/queries/jq/locals.scm index 40946e7c351a..fefcce8f2775 100644 --- a/runtime/queries/jq/locals.scm +++ b/runtime/queries/jq/locals.scm @@ -1,10 +1,10 @@ ;; From nvim-treesitter, contributed by @ObserverOfTime et al. (funcdef - (identifier) @local.definition) + (identifier) @local.definition.function) (funcdefargs - (identifier) @local.definition) + (identifier) @local.definition.variable.parameter) (funcname) @local.reference diff --git a/runtime/queries/julia/locals.scm b/runtime/queries/julia/locals.scm index bbf72c0e2018..b587e731603f 100644 --- a/runtime/queries/julia/locals.scm +++ b/runtime/queries/julia/locals.scm @@ -2,81 +2,30 @@ ; Definitions ; ----------- -; Variables -(assignment - (identifier) @local.definition) - -(assignment - (tuple_expression - (identifier) @local.definition)) - ; Constants (const_statement (assignment - . (identifier) @local.definition)) - -; let/const bindings -(let_binding - (identifier) @local.definition) - -(let_binding - (tuple_expression - (identifier) @local.definition)) - -; For bindings -(for_binding - (identifier) @local.definition) - -(for_binding - (tuple_expression - (identifier) @local.definition)) - -; Types -(struct_definition - name: (identifier) @local.definition) - -(abstract_definition - name: (identifier) @local.definition) - -(abstract_definition - name: (identifier) @local.definition) - -(type_parameter_list - (identifier) @local.definition) - -; Module imports -(import_statement - (identifier) @local.definition) + . (identifier) @local.definition.constant)) ; Parameters (parameter_list - (identifier) @local.definition) + (identifier) @local.definition.variable.parameter) (optional_parameter . - (identifier) @local.definition) + (identifier) @local.definition.variable.parameter) (slurp_parameter - (identifier) @local.definition) + (identifier) @local.definition.variable.parameter) (typed_parameter - parameter: (identifier) @local.definition + parameter: (identifier) @local.definition.variable.parameter (_)) ; Single parameter arrow function (function_expression . - (identifier) @local.definition) - -; Function/macro definitions -(function_definition - name: (identifier) @local.definition) @local.scope - -(short_function_definition - name: (identifier) @local.definition) @local.scope - -(macro_definition - name: (identifier) @local.definition) @local.scope + (identifier) @local.definition.variable.parameter) ; ---------- ; References @@ -97,5 +46,8 @@ (let_statement) (quote_statement) (do_clause) + (function_definition) + (short_function_definition) + (macro_definition) ] @local.scope diff --git a/runtime/queries/just/locals.scm b/runtime/queries/just/locals.scm index d612f5da4d52..f7498b74968f 100644 --- a/runtime/queries/just/locals.scm +++ b/runtime/queries/just/locals.scm @@ -8,19 +8,19 @@ ; Definitions (alias - name: (identifier) @local.definition) + name: (identifier) @local.definition.variable) (assignment - name: (identifier) @local.definition) + name: (identifier) @local.definition.variable) (mod - name: (identifier) @local.definition) + name: (identifier) @local.definition.namespace) (recipe_parameter - name: (identifier) @local.definition) + name: (identifier) @local.definition.variable.parameter) (recipe - name: (identifier) @local.definition) + name: (identifier) @local.definition.function) ; References diff --git a/runtime/queries/koka/locals.scm b/runtime/queries/koka/locals.scm index f8a83e0cb4ba..e654f70c0008 100644 --- a/runtime/queries/koka/locals.scm +++ b/runtime/queries/koka/locals.scm @@ -2,29 +2,24 @@ (block) @local.scope -(pattern - (identifier - (varid) @local.definition)) - -(decl - (apattern - (pattern - (identifier - (varid) @local.definition)))) +(pparameter + (pattern + (identifier + (varid) @local.definition.variable.parameter))) (puredecl (funid (identifier - (varid) @local.definition))) + (varid) @local.definition.function))) (puredecl (binder (identifier - (varid) @local.definition))) + (varid) @local.definition.function))) (decl (binder (identifier - (varid) @local.definition))) + (varid) @local.definition.function))) (identifier (varid) @local.reference) diff --git a/runtime/queries/kotlin/locals.scm b/runtime/queries/kotlin/locals.scm index 752ed995e892..8c85cdcd9201 100644 --- a/runtime/queries/kotlin/locals.scm +++ b/runtime/queries/kotlin/locals.scm @@ -8,7 +8,7 @@ ; Definitions (type_parameter - (type_identifier) @local.definition) + (type_identifier) @local.definition.type.parameter) ; References diff --git a/runtime/queries/koto/locals.scm b/runtime/queries/koto/locals.scm index 1886b1886740..8b5bd9cb7427 100644 --- a/runtime/queries/koto/locals.scm +++ b/runtime/queries/koto/locals.scm @@ -5,26 +5,20 @@ body: (_) @local.scope) ; Definitions -(assign - lhs: (identifier) @local.definition) - -(variable - (identifier) @local.definition) - (arg - (identifier) @local.definition) + (identifier) @local.definition.variable.parameter) (arg - (variable (identifier)) @local.definition) + (variable (identifier)) @local.definition.parameter) (import_item - (identifier) @local.definition) + (identifier) @local.definition.namespace) (entry_block - (identifier) @local.definition) + (identifier) @local.definition.variable.other.member) (entry_inline - (identifier) @local.definition) + (identifier) @local.definition.variable.other.member) ; References (identifier) @local.reference diff --git a/runtime/queries/llvm/locals.scm b/runtime/queries/llvm/locals.scm index 1946c2871394..f2e46bddd73b 100644 --- a/runtime/queries/llvm/locals.scm +++ b/runtime/queries/llvm/locals.scm @@ -5,10 +5,7 @@ ; Definitions (argument - (value (var (local_var) @local.definition))) - -(instruction - (local_var) @local.definition) + (value (var (local_var) @local.definition.variable.parameter))) ; References (local_var) @local.reference diff --git a/runtime/queries/matlab/locals.scm b/runtime/queries/matlab/locals.scm index d0589a1dbb02..882d9818749f 100644 --- a/runtime/queries/matlab/locals.scm +++ b/runtime/queries/matlab/locals.scm @@ -1,19 +1,6 @@ -(function_definition name: (identifier) @local.definition ?) @local.scope -(function_arguments (identifier)* @local.definition) +(function_definition name: (identifier) @local.definition.function ?) @local.scope +(function_arguments (identifier)* @local.definition.variable.parameter) -(lambda (arguments (identifier) @local.definition)) @local.scope - -(assignment left: ((function_call - name: (identifier) @local.definition))) -(assignment left: ((field_expression . [(function_call - name: (identifier) @local.definition) - (identifier) @local.definition]))) -(assignment left: (_) @local.definition) -(assignment (multioutput_variable (_) @local.definition)) - -(iterator . (identifier) @local.definition) -(global_operator (identifier) @local.definition) -(persistent_operator (identifier) @local.definition) -(catch_clause (identifier) @local.definition) +(lambda (arguments (identifier) @local.definition.variable.parameter)) @local.scope (identifier) @local.reference diff --git a/runtime/queries/ocaml/locals.scm b/runtime/queries/ocaml/locals.scm index 8f3f3fdf6373..b2b89049372b 100644 --- a/runtime/queries/ocaml/locals.scm +++ b/runtime/queries/ocaml/locals.scm @@ -16,7 +16,7 @@ ; Definitions ;------------ -(value_pattern) @local.definition +(value_pattern) @local.definition.variable.parameter ; References ;----------- diff --git a/runtime/queries/ponylang/locals.scm b/runtime/queries/ponylang/locals.scm index e9efd69e7f95..bf6a5ed2487c 100644 --- a/runtime/queries/ponylang/locals.scm +++ b/runtime/queries/ponylang/locals.scm @@ -23,12 +23,8 @@ (try_block then_block: (block) @local.scope) (with else_block: (block) @local.scope) -(field name: (identifier) @local.definition) -(local name: (identifier) @local.definition) -(param name: (identifier) @local.definition) -(lambdaparam name: (identifier) @local.definition) -("for" element: (idseq (identifier) @local.definition)) -(withelem name: (idseq (identifier) @local.definition)) +(param name: (identifier) @local.definition.variable.parameter) +(lambdaparam name: (identifier) @local.definition.variable.parameter) ; only lower case identifiers are references ( diff --git a/runtime/queries/purescript/locals.scm b/runtime/queries/purescript/locals.scm index ad67fe43f5a1..517d35e585c2 100644 --- a/runtime/queries/purescript/locals.scm +++ b/runtime/queries/purescript/locals.scm @@ -1,4 +1,3 @@ -(signature name: (variable)) @local.definition -(function name: (variable)) @local.definition -(pat_name (variable)) @local.definition +(signature name: (variable) @local.definition.function) +(function name: (variable) @local.definition.function) (exp_name (variable)) @local.reference diff --git a/runtime/queries/python/locals.scm b/runtime/queries/python/locals.scm index f7d5937e9f04..000a3e49fd98 100644 --- a/runtime/queries/python/locals.scm +++ b/runtime/queries/python/locals.scm @@ -10,33 +10,33 @@ ; Parameters (parameters - (identifier) @local.definition) + (identifier) @local.definition.variable.parameter) (parameters (typed_parameter - (identifier) @local.definition)) + (identifier) @local.definition.variable.parameter)) (parameters (default_parameter - name: (identifier) @local.definition)) + name: (identifier) @local.definition.variable.parameter)) (parameters (typed_default_parameter - name: (identifier) @local.definition)) + name: (identifier) @local.definition.variable.parameter)) (parameters (list_splat_pattern ; *args - (identifier) @local.definition)) + (identifier) @local.definition.variable.parameter)) (parameters (dictionary_splat_pattern ; **kwargs - (identifier) @local.definition)) + (identifier) @local.definition.variable.parameter)) (lambda_parameters - (identifier) @local.definition) + (identifier) @local.definition.variable.parameter) ; Imports (import_statement name: (dotted_name - (identifier) @local.definition)) + (identifier) @local.definition.namespace)) (aliased_import - alias: (identifier) @local.definition) + alias: (identifier) @local.definition.namespace) ;; References diff --git a/runtime/queries/r/locals.scm b/runtime/queries/r/locals.scm index be6cc637942e..ddfd09e604a2 100644 --- a/runtime/queries/r/locals.scm +++ b/runtime/queries/r/locals.scm @@ -2,10 +2,6 @@ (function_definition) @local.scope -(formal_parameters (identifier) @local.definition) - -(left_assignment name: (identifier) @local.definition) -(equals_assignment name: (identifier) @local.definition) -(right_assignment name: (identifier) @local.definition) +(formal_parameters (identifier) @local.definition.variable.parameter) (identifier) @local.reference diff --git a/runtime/queries/rescript/locals.scm b/runtime/queries/rescript/locals.scm deleted file mode 100644 index 4de73890e578..000000000000 --- a/runtime/queries/rescript/locals.scm +++ /dev/null @@ -1,7 +0,0 @@ -(switch_expression) @local.scope - -; Definitions -;------------ -(type_declaration) @local.definition -(let_binding) @local.definition -(module_declaration) @local.definition diff --git a/runtime/queries/ruby/locals.scm b/runtime/queries/ruby/locals.scm index 393caafd92cb..2da5401b9495 100644 --- a/runtime/queries/ruby/locals.scm +++ b/runtime/queries/ruby/locals.scm @@ -7,21 +7,14 @@ (do_block) ] @local.scope -(block_parameter (identifier) @local.definition) -(block_parameters (identifier) @local.definition) -(destructured_parameter (identifier) @local.definition) -(hash_splat_parameter (identifier) @local.definition) -(lambda_parameters (identifier) @local.definition) -(method_parameters (identifier) @local.definition) -(splat_parameter (identifier) @local.definition) - -(keyword_parameter name: (identifier) @local.definition) -(optional_parameter name: (identifier) @local.definition) +(block_parameter (identifier) @local.definition.variable.parameter) +(block_parameters (identifier) @local.definition.variable.parameter) +(destructured_parameter (identifier) @local.definition.variable.parameter) +(hash_splat_parameter (identifier) @local.definition.variable.parameter) +(lambda_parameters (identifier) @local.definition.variable.parameter) +(method_parameters (identifier) @local.definition.variable.parameter) +(splat_parameter (identifier) @local.definition.variable.parameter) +(keyword_parameter name: (identifier) @local.definition.variable.parameter) +(optional_parameter name: (identifier) @local.definition.variable.parameter) (identifier) @local.reference - -(assignment left: (identifier) @local.definition) -(operator_assignment left: (identifier) @local.definition) -(left_assignment_list (identifier) @local.definition) -(rest_assignment (identifier) @local.definition) -(destructured_left_assignment (identifier) @local.definition) diff --git a/runtime/queries/rust/locals.scm b/runtime/queries/rust/locals.scm index cb75d15578cd..7958ef04d832 100644 --- a/runtime/queries/rust/locals.scm +++ b/runtime/queries/rust/locals.scm @@ -15,17 +15,9 @@ ; Definitions (parameter - (identifier) @local.definition) + pattern: (identifier) @local.definition.variable.parameter) -(type_parameters - (type_identifier) @local.definition) -(constrained_type_parameter - left: (type_identifier) @local.definition) -(optional_type_parameter - name: (type_identifier) @local.definition) - -(closure_parameters (identifier) @local.definition) +(closure_parameters (identifier) @local.definition.variable.parameter) ; References (identifier) @local.reference -(type_identifier) @local.reference diff --git a/runtime/queries/scala/locals.scm b/runtime/queries/scala/locals.scm index 8eaa75ec3f90..60368f6c0ad7 100644 --- a/runtime/queries/scala/locals.scm +++ b/runtime/queries/scala/locals.scm @@ -3,27 +3,12 @@ (function_declaration - name: (identifier) @local.definition) @local.scope + name: (identifier) @local.definition.function) @local.scope (function_definition - name: (identifier) @local.definition) + name: (identifier) @local.definition.function) (parameter - name: (identifier) @local.definition) - -(binding - name: (identifier) @local.definition) - -(val_definition - pattern: (identifier) @local.definition) - -(var_definition - pattern: (identifier) @local.definition) - -(val_declaration - name: (identifier) @local.definition) - -(var_declaration - name: (identifier) @local.definition) + name: (identifier) @local.definition.variable.parameter) (identifier) @local.reference diff --git a/runtime/queries/smali/locals.scm b/runtime/queries/smali/locals.scm index 3436949010aa..2b2aef2721e0 100644 --- a/runtime/queries/smali/locals.scm +++ b/runtime/queries/smali/locals.scm @@ -16,27 +16,7 @@ (jmp_label) ] @local.reference -(enum_reference - (field_identifier) @local.definition) - -((field_definition - (access_modifiers) @_mod - (field_identifier) @local.definition) - (#eq? @_mod "enum")) - -(field_definition - (field_identifier) @local.definition - (field_type) @local.definition) - -(annotation_key) @local.definition - (method_definition - (method_signature (method_identifier) @local.definition)) - -(param_identifier) @local.definition - -(annotation_directive - (class_identifier) @local.definition) + (method_signature (method_identifier) @local.definition.function.method)) -(class_directive - (class_identifier) @local.definition) +(param_identifier) @local.definition.variable.parameter diff --git a/runtime/queries/solidity/locals.scm b/runtime/queries/solidity/locals.scm index ac8b62a392e0..69ee8ab9969c 100644 --- a/runtime/queries/solidity/locals.scm +++ b/runtime/queries/solidity/locals.scm @@ -2,8 +2,7 @@ (constructor_definition) @local.scope (block_statement) @local.scope -(function_definition (parameter name: (identifier) @local.definition)) -(constructor_definition (parameter name: (identifier) @local.definition)) -(variable_declaration name: (identifier) @local.definition) +(function_definition (parameter name: (identifier) @local.definition.variable.parameter)) +(constructor_definition (parameter name: (identifier) @local.definition.variable.parameter)) (identifier) @local.reference diff --git a/runtime/queries/sway/locals.scm b/runtime/queries/sway/locals.scm index 262d609e910c..2ee16c4872d0 100644 --- a/runtime/queries/sway/locals.scm +++ b/runtime/queries/sway/locals.scm @@ -9,9 +9,9 @@ ; Definitions (parameter - (identifier) @local.definition) + (identifier) @local.definition.variable.parameter) -(closure_parameters (identifier) @local.definition) +(closure_parameters (identifier) @local.definition.variable.parameter) ; References (identifier) @local.reference diff --git a/runtime/queries/swift/locals.scm b/runtime/queries/swift/locals.scm index 31bc9abf177b..597748c14ae1 100644 --- a/runtime/queries/swift/locals.scm +++ b/runtime/queries/swift/locals.scm @@ -1,6 +1,6 @@ ; Upstream: https://github.com/alex-pinkus/tree-sitter-swift/blob/57c1c6d6ffa1c44b330182d41717e6fe37430704/queries/locals.scm -(import_declaration (identifier) @definition.import) -(function_declaration name: (simple_identifier) @definition.function) +(import_declaration (identifier) @local.definition.namespace) +(function_declaration name: (simple_identifier) @local.definition.function) ; Scopes [ diff --git a/runtime/queries/tact/locals.scm b/runtime/queries/tact/locals.scm index f7a4115a75b1..5a7e565e61d0 100644 --- a/runtime/queries/tact/locals.scm +++ b/runtime/queries/tact/locals.scm @@ -14,14 +14,11 @@ ; Definitions @local.definition ; ------------------------------ -(let_statement - name: (identifier) @local.definition) - (parameter - name: (identifier) @local.definition) + name: (identifier) @local.definition.variable.parameter) (constant - name: (identifier) @local.definition) + name: (identifier) @local.definition.constant) ; References @local.reference ; ----------------------------- diff --git a/runtime/queries/teal/locals.scm b/runtime/queries/teal/locals.scm index 879aa71c1a3a..e6463932d6b6 100644 --- a/runtime/queries/teal/locals.scm +++ b/runtime/queries/teal/locals.scm @@ -1,17 +1,17 @@ (var_declaration declarators: (var_declarators - (var (identifier)) @local.definition)) + (var (identifier)) @local.definition.variable)) (var_assignment variables: (assignment_variables - (var (identifier) @local.definition))) + (var (identifier) @local.definition.variable))) -(arg name: (identifier) @local.definition) +(arg name: (identifier) @local.definition.variable.parameter) (anon_function) @local.scope ((function_statement - (function_name) @local.definition) @local.scope) + (function_name) @local.definition.function) @local.scope) (program) @local.scope (if_statement) @local.scope diff --git a/runtime/queries/tera/locals.scm b/runtime/queries/tera/locals.scm index 04b09cb00fbb..9743064b187f 100644 --- a/runtime/queries/tera/locals.scm +++ b/runtime/queries/tera/locals.scm @@ -1,7 +1,7 @@ (identifier) @local.reference (assignment_expression - left: (identifier) @local.definition) + left: (identifier) @local.definition.variable) (macro_statement (parameter_list - (identifier) @local.definition)) + (identifier) @local.definition.variable.parameter)) (macro_statement) @local.scope diff --git a/runtime/queries/thrift/locals.scm b/runtime/queries/thrift/locals.scm index 538b49962b33..9c4a74400d14 100644 --- a/runtime/queries/thrift/locals.scm +++ b/runtime/queries/thrift/locals.scm @@ -11,41 +11,4 @@ ; Definitions -(annotation_identifier) @local.definition - -; (const_definition (identifier) @definition.constant) - -; (enum_definition "enum" -; . (identifier) @definition.enum -; "{" (identifier) @definition.constant "}") - -; (senum_definition "senum" -; . (identifier) @definition.enum) - -; (field (identifier) @definition.field) - -; (function_definition (identifier) @definition.function) - -; (namespace_declaration -; "namespace" (namespace_scope) -; . (_) @definition.namespace -; (namespace_uri)?) - -; (parameter (identifier) @definition.parameter) - -; (struct_definition -; "struct" . (identifier) @definition.type) - -; (union_definition -; "union" . (identifier) @definition.type) - -; (exception_definition -; "exception" . (identifier) @definition.type) - -; (service_definition -; "service" . (identifier) @definition.type) - -; (interaction_definition -; "interaction" . (identifier) @definition.type) - -; (typedef_identifier) @definition.type +(annotation_identifier) @local.definition.attribute diff --git a/runtime/queries/tlaplus/locals.scm b/runtime/queries/tlaplus/locals.scm index aee8d612bd46..89a9f984f307 100644 --- a/runtime/queries/tlaplus/locals.scm +++ b/runtime/queries/tlaplus/locals.scm @@ -15,22 +15,22 @@ ] @local.scope ; Definitions -(choose (identifier) @local.definition) -(choose (tuple_of_identifiers (identifier) @local.definition)) -(constant_declaration (identifier) @local.definition) -(constant_declaration (operator_declaration name: (_) @local.definition)) -(function_definition name: (identifier) @local.definition) -(lambda (identifier) @local.definition) -(module_definition name: (_) @local.definition) -(module_definition parameter: (identifier) @local.definition) -(module_definition parameter: (operator_declaration name: (_) @local.definition)) -(operator_definition name: (_) @local.definition) -(operator_definition parameter: (identifier) @local.definition) -(operator_definition parameter: (operator_declaration name: (_) @local.definition)) -(quantifier_bound (identifier) @local.definition) -(quantifier_bound (tuple_of_identifiers (identifier) @local.definition)) -(unbounded_quantification (identifier) @local.definition) -(variable_declaration (identifier) @local.definition) +(choose (identifier) @local.definition.variable.parameter) +(choose (tuple_of_identifiers (identifier) @local.definition.variable.parameter)) +(constant_declaration (identifier) @local.definition.constant) +(constant_declaration (operator_declaration name: (_) @local.definition.constant)) +(function_definition name: (identifier) @local.definition.function) +(lambda (identifier) @local.definition.function) +(module_definition name: (_) @local.definition.namespace) +(module_definition parameter: (identifier) @local.definition.variable.parameter) +(module_definition parameter: (operator_declaration name: (_) @local.definition.variable.parameter)) +(operator_definition name: (_) @local.definition.operator) +(operator_definition parameter: (identifier) @local.definition.variable.parameter) +(operator_definition parameter: (operator_declaration name: (_) @local.definition.variable.parameter)) +(quantifier_bound (identifier) @local.definition.variable.parameter) +(quantifier_bound (tuple_of_identifiers (identifier) @local.definition.variable.parameter)) +(unbounded_quantification (identifier) @local.definition.variable.parameter) +(variable_declaration (identifier) @local.definition.variable.builtin) ; Proof scopes and definitions [ @@ -39,12 +39,12 @@ (theorem) ] @local.scope -(assume_prove (new (identifier) @local.definition)) -(assume_prove (new (operator_declaration name: (_) @local.definition))) -(assumption name: (identifier) @local.definition) -(pick_proof_step (identifier) @local.definition) -(take_proof_step (identifier) @local.definition) -(theorem name: (identifier) @local.definition) +(assume_prove (new (identifier) @local.definition.variable.parameter)) +(assume_prove (new (operator_declaration name: (_) @local.definition.variable.parameter))) +(assumption name: (identifier) @local.definition.constant) +(pick_proof_step (identifier) @local.definition.variable.parameter) +(take_proof_step (identifier) @local.definition.variable.parameter) +(theorem name: (identifier) @local.definition.constant) ;PlusCal scopes and definitions [ @@ -54,10 +54,10 @@ (pcal_with) ] @local.scope -(pcal_macro_decl parameter: (identifier) @local.definition) -(pcal_proc_var_decl (identifier) @local.definition) -(pcal_var_decl (identifier) @local.definition) -(pcal_with (identifier) @local.definition) +(pcal_macro_decl parameter: (identifier) @local.definition.variable.parameter) +(pcal_proc_var_decl (identifier) @local.definition.variable.parameter) +(pcal_var_decl (identifier) @local.definition.variable.parameter) +(pcal_with (identifier) @local.definition.variable.parameter) ; References (identifier_ref) @local.reference diff --git a/runtime/queries/verilog/locals.scm b/runtime/queries/verilog/locals.scm index 507ddea44c7b..83b7bcfa3e5a 100644 --- a/runtime/queries/verilog/locals.scm +++ b/runtime/queries/verilog/locals.scm @@ -8,54 +8,32 @@ (module_declaration) ] @scope -(data_declaration - (list_of_variable_decl_assignments - (variable_decl_assignment - (simple_identifier) @definition.var))) - -(genvar_initialization - (genvar_identifier - (simple_identifier) @definition.var)) - -(for_initialization - (for_variable_declaration - (simple_identifier) @definition.var)) - -(net_declaration - (list_of_net_decl_assignments - (net_decl_assignment - (simple_identifier) @definition.var))) - -(ansi_port_declaration - (port_identifier - (simple_identifier) @definition.var)) - (parameter_declaration (list_of_param_assignments (param_assignment (parameter_identifier - (simple_identifier) @definition.parameter)))) + (simple_identifier) @local.definition.variable.parameter)))) (local_parameter_declaration (list_of_param_assignments (param_assignment (parameter_identifier - (simple_identifier) @definition.parameter)))) + (simple_identifier) @local.definition.variable.parameter)))) ;; TODO: fixme ;(function_declaration ;(function_identifier - ;(simple_identifier) @definition.function)) + ;(simple_identifier) @local.definition.function)) (function_declaration (function_body_declaration (function_identifier (function_identifier - (simple_identifier) @definition.function)))) + (simple_identifier) @local.definition.function)))) (tf_port_item1 (port_identifier - (simple_identifier) @definition.parameter)) + (simple_identifier) @local.definition.variable.parameter)) ; too broad, now includes types etc -(simple_identifier) @reference +(simple_identifier) @local.reference diff --git a/runtime/queries/wren/locals.scm b/runtime/queries/wren/locals.scm index 38f39d913ae4..ba3a1de59904 100644 --- a/runtime/queries/wren/locals.scm +++ b/runtime/queries/wren/locals.scm @@ -6,16 +6,4 @@ (field) @local.reference (static_field) @local.reference -(for_statement - loop_variable: (name) @local.definition) - -(variable_definition - name: (name) @local.definition) - -(assignment - left: (field) @local.definition) - -(assignment - left: (static_field) @local.definition) - -(parameter) @local.definition +(parameter) @local.definition.variable.parameter diff --git a/runtime/queries/yara/locals.scm b/runtime/queries/yara/locals.scm index 16f8768a0bff..7a4814dfacb9 100644 --- a/runtime/queries/yara/locals.scm +++ b/runtime/queries/yara/locals.scm @@ -1,5 +1,5 @@ -(rule_definition) @local.definition -(string_identifier) @local.definition +(rule_definition name: (identifier) @local.definition.string.special) +(string_identifier) @local.definition.string.special.symbol (for_expression (string_identifier) @local.reference) From 813f771c0a8910875ae1d1a486d72f6170f64a33 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Tue, 25 Feb 2025 11:57:27 -0500 Subject: [PATCH 5/7] queries: Inject markdown into Rust doc comments Co-authored-by: Nik Revenco <154856872+nik-rev@users.noreply.github.com> --- book/src/generated/lang-support.md | 1 + languages.toml | 9 +++++++++ runtime/queries/markdown-rustdoc/highlights.scm | 1 + runtime/queries/markdown-rustdoc/injections.scm | 14 ++++++++++++++ runtime/queries/rust/injections.scm | 6 +++++- 5 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 runtime/queries/markdown-rustdoc/highlights.scm create mode 100644 runtime/queries/markdown-rustdoc/injections.scm diff --git a/book/src/generated/lang-support.md b/book/src/generated/lang-support.md index f6aee3fe3410..2190d5d53670 100644 --- a/book/src/generated/lang-support.md +++ b/book/src/generated/lang-support.md @@ -138,6 +138,7 @@ | make | ✓ | | ✓ | | | markdoc | ✓ | | | `markdoc-ls` | | markdown | ✓ | | | `marksman`, `markdown-oxide` | +| markdown-rustdoc | ✓ | | | | | markdown.inline | ✓ | | | | | matlab | ✓ | ✓ | ✓ | | | mermaid | ✓ | | | | diff --git a/languages.toml b/languages.toml index 9870e49c4332..a72cb6cd3888 100644 --- a/languages.toml +++ b/languages.toml @@ -1704,6 +1704,15 @@ block-comment-tokens = { start = "" } name = "markdown" source = { git = "https://github.com/tree-sitter-grammars/tree-sitter-markdown", rev = "62516e8c78380e3b51d5b55727995d2c511436d8", subpath = "tree-sitter-markdown" } +[[language]] +name = "markdown-rustdoc" +scope = "source.markdown-rustdoc" +grammar = "markdown" +injection-regex = "markdown-rustdoc" +file-types = [] +indent = { tab-width = 2, unit = " " } +block-comment-tokens = { start = "" } + [[language]] name = "markdown.inline" scope = "source.markdown.inline" diff --git a/runtime/queries/markdown-rustdoc/highlights.scm b/runtime/queries/markdown-rustdoc/highlights.scm new file mode 100644 index 000000000000..a3a30e342921 --- /dev/null +++ b/runtime/queries/markdown-rustdoc/highlights.scm @@ -0,0 +1 @@ +; inherits: markdown diff --git a/runtime/queries/markdown-rustdoc/injections.scm b/runtime/queries/markdown-rustdoc/injections.scm new file mode 100644 index 000000000000..1f2e29825c57 --- /dev/null +++ b/runtime/queries/markdown-rustdoc/injections.scm @@ -0,0 +1,14 @@ +; inherits: markdown + +; In Rust, it is common to have documentation code blocks not specify the +; language, and it is assumed to be Rust if it is not specified. + +(fenced_code_block + (code_fence_content) @injection.content + (#set! injection.language "rust") + (#set! injection.include-unnamed-children)) + +(fenced_code_block + (info_string + (language) @injection.language) + (code_fence_content) @injection.content (#set! injection.include-unnamed-children)) diff --git a/runtime/queries/rust/injections.scm b/runtime/queries/rust/injections.scm index 964eec0f8535..42ca12b5b8ab 100644 --- a/runtime/queries/rust/injections.scm +++ b/runtime/queries/rust/injections.scm @@ -1,6 +1,10 @@ -([(line_comment) (block_comment)] @injection.content +([(line_comment !doc) (block_comment !doc)] @injection.content (#set! injection.language "comment")) +((doc_comment) @injection.content + (#set! injection.language "markdown-rustdoc") + (#set! injection.combined)) + ((macro_invocation macro: [ From 56c3852729e4a756e0f1bb79fa1758f87e2ee81a Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Thu, 27 Feb 2025 10:27:57 -0500 Subject: [PATCH 6/7] Add rainbow highlights on top of tree-house bindings --- helix-core/src/syntax.rs | 168 ++++++++++++++++++++++- helix-core/src/syntax/config.rs | 2 + helix-term/src/ui/editor.rs | 33 +++++ helix-view/src/editor.rs | 3 + helix-view/src/theme.rs | 67 ++++++++- runtime/queries/bash/rainbows.scm | 20 +++ runtime/queries/c/rainbows.scm | 29 ++++ runtime/queries/clojure/rainbows.scm | 13 ++ runtime/queries/common-lisp/rainbows.scm | 1 + runtime/queries/cpp/rainbows.scm | 49 +++++++ runtime/queries/css/rainbows.scm | 15 ++ runtime/queries/ecma/rainbows.scm | 28 ++++ runtime/queries/elixir/rainbows.scm | 24 ++++ runtime/queries/erlang/rainbows.scm | 24 ++++ runtime/queries/gleam/rainbows.scm | 32 +++++ runtime/queries/go/rainbows.scm | 33 +++++ runtime/queries/html/rainbows.scm | 13 ++ runtime/queries/java/rainbows.scm | 35 +++++ runtime/queries/javascript/rainbows.scm | 1 + runtime/queries/json/rainbows.scm | 9 ++ runtime/queries/jsx/rainbows.scm | 9 ++ runtime/queries/nix/rainbows.scm | 17 +++ runtime/queries/python/rainbows.scm | 30 ++++ runtime/queries/racket/rainbows.scm | 1 + runtime/queries/regex/rainbows.scm | 17 +++ runtime/queries/ruby/rainbows.scm | 28 ++++ runtime/queries/rust/rainbows.scm | 60 ++++++++ runtime/queries/scheme/rainbows.scm | 12 ++ runtime/queries/scss/rainbows.scm | 3 + runtime/queries/starlark/rainbows.scm | 1 + runtime/queries/toml/rainbows.scm | 12 ++ runtime/queries/tsx/rainbows.scm | 2 + runtime/queries/typescript/rainbows.scm | 19 +++ runtime/queries/xml/rainbows.scm | 29 ++++ runtime/queries/yaml/rainbows.scm | 9 ++ xtask/src/main.rs | 1 + 36 files changed, 844 insertions(+), 5 deletions(-) create mode 100644 runtime/queries/bash/rainbows.scm create mode 100644 runtime/queries/c/rainbows.scm create mode 100644 runtime/queries/clojure/rainbows.scm create mode 100644 runtime/queries/common-lisp/rainbows.scm create mode 100644 runtime/queries/cpp/rainbows.scm create mode 100644 runtime/queries/css/rainbows.scm create mode 100644 runtime/queries/ecma/rainbows.scm create mode 100644 runtime/queries/elixir/rainbows.scm create mode 100644 runtime/queries/erlang/rainbows.scm create mode 100644 runtime/queries/gleam/rainbows.scm create mode 100644 runtime/queries/go/rainbows.scm create mode 100644 runtime/queries/html/rainbows.scm create mode 100644 runtime/queries/java/rainbows.scm create mode 100644 runtime/queries/javascript/rainbows.scm create mode 100644 runtime/queries/json/rainbows.scm create mode 100644 runtime/queries/jsx/rainbows.scm create mode 100644 runtime/queries/nix/rainbows.scm create mode 100644 runtime/queries/python/rainbows.scm create mode 100644 runtime/queries/racket/rainbows.scm create mode 100644 runtime/queries/regex/rainbows.scm create mode 100644 runtime/queries/ruby/rainbows.scm create mode 100644 runtime/queries/rust/rainbows.scm create mode 100644 runtime/queries/scheme/rainbows.scm create mode 100644 runtime/queries/scss/rainbows.scm create mode 100644 runtime/queries/starlark/rainbows.scm create mode 100644 runtime/queries/toml/rainbows.scm create mode 100644 runtime/queries/tsx/rainbows.scm create mode 100644 runtime/queries/typescript/rainbows.scm create mode 100644 runtime/queries/xml/rainbows.scm create mode 100644 runtime/queries/yaml/rainbows.scm diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index e232ee69bb86..f3630a29522c 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -13,14 +13,18 @@ use std::{ use anyhow::{Context, Result}; use arc_swap::{ArcSwap, Guard}; use config::{Configuration, FileType, LanguageConfiguration, LanguageServerConfiguration}; +use foldhash::HashSet; use helix_loader::grammar::get_language; use helix_stdx::rope::RopeSliceExt as _; use once_cell::sync::OnceCell; use ropey::RopeSlice; use tree_house::{ highlighter, - query_iter::QueryIter, - tree_sitter::{Grammar, InactiveQueryCursor, InputEdit, Node, Query, RopeInput, Tree}, + query_iter::{QueryIter, QueryIterEvent}, + tree_sitter::{ + query::{InvalidPredicateError, UserPredicate}, + Capture, Grammar, InactiveQueryCursor, InputEdit, Node, Pattern, Query, RopeInput, Tree, + }, Error, InjectionLanguageMarker, LanguageConfig as SyntaxConfig, Layer, }; @@ -37,6 +41,7 @@ pub struct LanguageData { syntax: OnceCell>, indent_query: OnceCell>, textobject_query: OnceCell>, + rainbow_query: OnceCell>, } impl LanguageData { @@ -46,6 +51,7 @@ impl LanguageData { syntax: OnceCell::new(), indent_query: OnceCell::new(), textobject_query: OnceCell::new(), + rainbow_query: OnceCell::new(), } } @@ -154,6 +160,36 @@ impl LanguageData { .as_ref() } + /// Compiles the rainbows.scm query for a language. + /// This function should only be used by this module or the xtask crate. + pub fn compile_rainbow_query( + grammar: Grammar, + config: &LanguageConfiguration, + ) -> Result> { + let name = &config.language_id; + let text = read_query(name, "rainbows.scm"); + if text.is_empty() { + return Ok(None); + } + let rainbow_query = RainbowQuery::new(grammar, &text) + .with_context(|| format!("Failed to compile rainbows.scm query for '{name}'"))?; + Ok(Some(rainbow_query)) + } + + fn rainbow_query(&self, loader: &Loader) -> Option<&RainbowQuery> { + self.rainbow_query + .get_or_init(|| { + let grammar = self.syntax_config(loader)?.grammar; + Self::compile_rainbow_query(grammar, &self.config) + .map_err(|err| { + log::error!("{err}"); + }) + .ok() + .flatten() + }) + .as_ref() + } + fn reconfigure(&self, scopes: &[String]) { if let Some(Some(config)) = self.syntax.get() { reconfigure_highlights(config, scopes); @@ -324,6 +360,10 @@ impl Loader { self.language(lang).textobject_query(self) } + fn rainbow_query(&self, lang: Language) -> Option<&RainbowQuery> { + self.language(lang).rainbow_query(self) + } + pub fn language_server_configs(&self) -> &HashMap { &self.language_server_configs } @@ -496,6 +536,79 @@ impl Syntax { { QueryIter::new(&self.inner, source, loader, range) } + + pub fn rainbow_highlights( + &self, + source: RopeSlice, + rainbow_length: usize, + loader: &Loader, + range: impl RangeBounds, + ) -> OverlayHighlights { + struct RainbowScope<'tree> { + end: u32, + node: Option>, + highlight: Highlight, + } + + let mut scope_stack = Vec::::new(); + let mut highlights = Vec::new(); + let mut query_iter = self.query_iter::<_, (), _>( + source, + |lang| loader.rainbow_query(lang).map(|q| &q.query), + range, + ); + + while let Some(event) = query_iter.next() { + let QueryIterEvent::Match(mat) = event else { + continue; + }; + + let rainbow_query = loader + .rainbow_query(query_iter.current_language()) + .expect("language must have a rainbow query to emit matches"); + + let byte_range = mat.node.byte_range(); + // Pop any scopes that end before this capture begins. + while scope_stack + .last() + .is_some_and(|scope| byte_range.start >= scope.end) + { + scope_stack.pop(); + } + + let capture = Some(mat.capture); + if capture == rainbow_query.scope_capture { + scope_stack.push(RainbowScope { + end: byte_range.end, + node: if rainbow_query + .include_children_patterns + .contains(&mat.pattern) + { + None + } else { + Some(mat.node.clone()) + }, + highlight: Highlight::new((scope_stack.len() % rainbow_length) as u32), + }); + } else if capture == rainbow_query.bracket_capture { + if let Some(scope) = scope_stack.last() { + if !scope + .node + .as_ref() + .is_some_and(|node| mat.node.parent().as_ref() != Some(node)) + { + let start = source + .byte_to_char(source.floor_char_boundary(byte_range.start as usize)); + let end = + source.byte_to_char(source.ceil_char_boundary(byte_range.end as usize)); + highlights.push((scope.highlight, start..end)); + } + } + } + } + + OverlayHighlights::Heterogenous { highlights } + } } pub type Highlighter<'a> = highlighter::Highlighter<'a, 'a, Loader>; @@ -939,6 +1052,57 @@ fn pretty_print_tree_impl( Ok(()) } +/// Finds the child of `node` which contains the given byte range. + +pub fn child_for_byte_range<'a>(node: &Node<'a>, range: ops::Range) -> Option> { + for child in node.children() { + let child_range = child.byte_range(); + + if range.start >= child_range.start && range.end <= child_range.end { + return Some(child); + } + } + + None +} + +#[derive(Debug)] +pub struct RainbowQuery { + query: Query, + include_children_patterns: HashSet, + scope_capture: Option, + bracket_capture: Option, +} + +impl RainbowQuery { + fn new(grammar: Grammar, source: &str) -> Result { + let mut include_children_patterns = HashSet::default(); + + let query = Query::new(grammar, source, |pattern, predicate| match predicate { + UserPredicate::SetProperty { + key: "rainbow.include-children", + val, + } => { + if val.is_some() { + return Err( + "property 'rainbow.include-children' does not take an argument".into(), + ); + } + include_children_patterns.insert(pattern); + Ok(()) + } + _ => Err(InvalidPredicateError::unknown(predicate)), + })?; + + Ok(Self { + include_children_patterns, + scope_capture: query.get_capture("rainbow.scope"), + bracket_capture: query.get_capture("rainbow.bracket"), + query, + }) + } +} + #[cfg(test)] mod test { use once_cell::sync::Lazy; diff --git a/helix-core/src/syntax/config.rs b/helix-core/src/syntax/config.rs index 432611bb0d38..2152a70b0992 100644 --- a/helix-core/src/syntax/config.rs +++ b/helix-core/src/syntax/config.rs @@ -98,6 +98,8 @@ pub struct LanguageConfiguration { pub workspace_lsp_roots: Option>, #[serde(default)] pub persistent_diagnostic_sources: Vec, + /// Overrides the `editor.rainbow-brackets` config key for the language. + pub rainbow_brackets: Option, } impl LanguageConfiguration { diff --git a/helix-term/src/ui/editor.rs b/helix-term/src/ui/editor.rs index 9343d55d4083..1f0ff4b3ee44 100644 --- a/helix-term/src/ui/editor.rs +++ b/helix-term/src/ui/editor.rs @@ -127,6 +127,18 @@ impl EditorView { &text_annotations, )); + if doc + .language_config() + .and_then(|config| config.rainbow_brackets) + .unwrap_or(config.rainbow_brackets) + { + if let Some(overlay) = + Self::doc_rainbow_highlights(doc, view_offset.anchor, inner.height, theme, &loader) + { + overlays.push(overlay); + } + } + Self::doc_diagnostics_highlights_into(doc, theme, &mut overlays); if is_focused { @@ -304,6 +316,27 @@ impl EditorView { text_annotations.collect_overlay_highlights(range) } + pub fn doc_rainbow_highlights( + doc: &Document, + anchor: usize, + height: u16, + theme: &Theme, + loader: &syntax::Loader, + ) -> Option { + let syntax = doc.syntax()?; + let text = doc.text().slice(..); + let row = text.char_to_line(anchor.min(text.len_chars())); + let visible_range = Self::viewport_byte_range(text, row, height); + let start = syntax::child_for_byte_range( + &syntax.tree().root_node(), + visible_range.start as u32..visible_range.end as u32, + ) + .map_or(visible_range.start as u32, |node| node.start_byte()); + let range = start..visible_range.end as u32; + + Some(syntax.rainbow_highlights(text, theme.rainbow_length(), loader, range)) + } + /// Get highlight spans for document diagnostics pub fn doc_diagnostics_highlights_into( doc: &Document, diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index 4255b9e199ec..e3d3db19813b 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -373,6 +373,8 @@ pub struct Config { /// Whether to read settings from [EditorConfig](https://editorconfig.org) files. Defaults to /// `true`. pub editor_config: bool, + /// Whether to render rainbow colors for matching brackets. Defaults to `false`. + pub rainbow_brackets: bool, } #[derive(Debug, Clone, PartialEq, Deserialize, Serialize, Eq, PartialOrd, Ord)] @@ -1020,6 +1022,7 @@ impl Default for Config { end_of_line_diagnostics: DiagnosticFilter::Disable, clipboard_provider: ClipboardProvider::default(), editor_config: true, + rainbow_brackets: false, } } } diff --git a/helix-view/src/theme.rs b/helix-view/src/theme.rs index 61d490ff3978..6a18d321e19c 100644 --- a/helix-view/src/theme.rs +++ b/helix-view/src/theme.rs @@ -227,6 +227,7 @@ pub struct Theme { // tree-sitter highlight styles are stored in a Vec to optimize lookups scopes: Vec, highlights: Vec