diff --git a/Cargo.lock b/Cargo.lock index 24a8fda3..42301c6b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -322,6 +322,31 @@ dependencies = [ "itertools 0.10.5", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + [[package]] name = "crunchy" version = "0.2.2" @@ -393,6 +418,15 @@ dependencies = [ "syn", ] +[[package]] +name = "ecow" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e42fc0a93992b20c58b99e59d61eaf1635a25bfbe49e4275c34ba0aee98119ba" +dependencies = [ + "serde", +] + [[package]] name = "either" version = "1.13.0" @@ -561,6 +595,7 @@ dependencies = [ "harper-comments", "harper-core", "harper-literate-haskell", + "harper-typst", "serde_json", ] @@ -651,6 +686,7 @@ dependencies = [ "harper-core", "harper-html", "harper-literate-haskell", + "harper-typst", "itertools 0.14.0", "once_cell", "open", @@ -671,6 +707,17 @@ dependencies = [ "tree-sitter", ] +[[package]] +name = "harper-typst" +version = "0.15.0" +dependencies = [ + "harper-core", + "itertools 0.14.0", + "ordered-float", + "paste", + "typst-syntax", +] + [[package]] name = "harper-wasm" version = "0.1.0" @@ -861,6 +908,16 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "indexmap" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +dependencies = [ + "equivalent", + "hashbrown 0.15.2", +] + [[package]] name = "is-docker" version = "0.2.0" @@ -1181,6 +1238,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "portable-atomic" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" + [[package]] name = "ppv-lite86" version = "0.2.20" @@ -1259,6 +1322,26 @@ dependencies = [ "serde", ] +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.8" @@ -1409,6 +1492,15 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_spanned" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" +dependencies = [ + "serde", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -1418,6 +1510,12 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + [[package]] name = "slab" version = "0.4.9" @@ -1480,6 +1578,12 @@ dependencies = [ "syn", ] +[[package]] +name = "thin-vec" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a38c90d48152c236a3ab59271da4f4ae63d678c5d7ad6b7714d7cb9760be5e4b" + [[package]] name = "thiserror" version = "1.0.69" @@ -1590,6 +1694,40 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml" +version = "0.8.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", +] + [[package]] name = "tower" version = "0.4.13" @@ -1908,6 +2046,37 @@ dependencies = [ "tree-sitter", ] +[[package]] +name = "typst-syntax" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05b7be8b6ed6b2cb39ca495947d548a28d7db0ba244008e44c5a759120327693" +dependencies = [ + "ecow", + "once_cell", + "serde", + "toml", + "typst-utils", + "unicode-ident", + "unicode-math-class", + "unicode-script", + "unicode-segmentation", + "unscanny", +] + +[[package]] +name = "typst-utils" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f0305443ed97f0b658471487228f86bf835705e7525fbdcc671cebd864f7a40" +dependencies = [ + "once_cell", + "portable-atomic", + "rayon", + "siphasher", + "thin-vec", +] + [[package]] name = "unicase" version = "2.8.1" @@ -1926,6 +2095,24 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" +[[package]] +name = "unicode-math-class" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d246cf599d5fae3c8d56e04b20eb519adb89a8af8d0b0fbcded369aa3647d65" + +[[package]] +name = "unicode-script" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fb421b350c9aff471779e262955939f565ec18b86c15364e6bdf0d662ca7c1f" + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + [[package]] name = "unicode-width" version = "0.1.14" @@ -1938,6 +2125,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" +[[package]] +name = "unscanny" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9df2af067a7953e9c3831320f35c1cc0600c30d44d9f7a12b01db1cd88d6b47" + [[package]] name = "url" version = "2.5.4" @@ -2157,6 +2350,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "winnow" +version = "0.6.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b" +dependencies = [ + "memchr", +] + [[package]] name = "write16" version = "1.0.0" diff --git a/Cargo.toml b/Cargo.toml index 41a57d70..05f7ccd1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = [ "harper-cli", "harper-core", "harper-ls", "harper-comments", "harper-wasm", "harper-tree-sitter", "harper-html", "harper-literate-haskell"] +members = [ "harper-cli", "harper-core", "harper-ls", "harper-comments", "harper-wasm", "harper-tree-sitter", "harper-html", "harper-literate-haskell", "harper-typst" ] resolver = "2" [profile.release] diff --git a/harper-cli/Cargo.toml b/harper-cli/Cargo.toml index d8496840..b1655aba 100644 --- a/harper-cli/Cargo.toml +++ b/harper-cli/Cargo.toml @@ -13,4 +13,8 @@ clap = { version = "4.5.23", features = ["derive"] } harper-literate-haskell = { path = "../harper-literate-haskell", version = "0.15.0" } harper-core = { path = "../harper-core", version = "0.15.0" } harper-comments = { path = "../harper-comments", version = "0.15.0" } +harper-typst = { path = "../harper-typst", version = "0.15.0" } serde_json = "1.0.133" + +[features] +default = [] diff --git a/harper-cli/src/main.rs b/harper-cli/src/main.rs index 3a260d0d..0044ecde 100644 --- a/harper-cli/src/main.rs +++ b/harper-cli/src/main.rs @@ -169,10 +169,11 @@ fn main() -> anyhow::Result<()> { fn load_file(file: &Path) -> anyhow::Result<(Document, String)> { let source = std::fs::read_to_string(file)?; - let mut parser: Box = + let parser: Box = match file.extension().map(|v| v.to_str().unwrap()) { Some("md") => Box::new(Markdown), Some("lhs") => Box::new(LiterateHaskellParser), + Some("typ") => Box::new(harper_typst::Typst), _ => Box::new( CommentParser::new_from_filename(file) .map(Box::new) @@ -180,5 +181,5 @@ fn load_file(file: &Path) -> anyhow::Result<(Document, String)> { ), }; - Ok((Document::new_curated(&source, &mut parser), source)) + Ok((Document::new_curated(&source, &parser), source)) } diff --git a/harper-comments/src/comment_parser.rs b/harper-comments/src/comment_parser.rs index 923b899b..7c15942c 100644 --- a/harper-comments/src/comment_parser.rs +++ b/harper-comments/src/comment_parser.rs @@ -99,7 +99,7 @@ impl CommentParser { } impl Parser for CommentParser { - fn parse(&mut self, source: &[char]) -> Vec { + fn parse(&self, source: &[char]) -> Vec { self.inner.parse(source) } } diff --git a/harper-comments/src/comment_parsers/go.rs b/harper-comments/src/comment_parsers/go.rs index 0c90b7d8..f7f824db 100644 --- a/harper-comments/src/comment_parsers/go.rs +++ b/harper-comments/src/comment_parsers/go.rs @@ -7,7 +7,7 @@ use super::without_initiators; pub struct Go; impl Parser for Go { - fn parse(&mut self, source: &[char]) -> Vec { + fn parse(&self, source: &[char]) -> Vec { let mut actual = without_initiators(source); let mut actual_source = actual.get_content(source); @@ -25,9 +25,7 @@ impl Parser for Go { actual_source = new_source } - let mut markdown_parser = Markdown; - - let mut new_tokens = markdown_parser.parse(actual_source); + let mut new_tokens = Markdown.parse(actual_source); new_tokens .iter_mut() diff --git a/harper-comments/src/comment_parsers/javadoc.rs b/harper-comments/src/comment_parsers/javadoc.rs index 93b7f903..ef071488 100644 --- a/harper-comments/src/comment_parsers/javadoc.rs +++ b/harper-comments/src/comment_parsers/javadoc.rs @@ -12,7 +12,7 @@ pub struct JavaDoc { } impl Parser for JavaDoc { - fn parse(&mut self, source: &[char]) -> Vec { + fn parse(&self, source: &[char]) -> Vec { let actual = without_initiators(source); let actual_source = actual.get_content(source); diff --git a/harper-comments/src/comment_parsers/jsdoc.rs b/harper-comments/src/comment_parsers/jsdoc.rs index 33e4858a..dcc56a0a 100644 --- a/harper-comments/src/comment_parsers/jsdoc.rs +++ b/harper-comments/src/comment_parsers/jsdoc.rs @@ -7,7 +7,7 @@ use super::without_initiators; pub struct JsDoc; impl Parser for JsDoc { - fn parse(&mut self, source: &[char]) -> Vec { + fn parse(&self, source: &[char]) -> Vec { let mut tokens = Vec::new(); let mut chars_traversed = 0; diff --git a/harper-comments/src/comment_parsers/unit.rs b/harper-comments/src/comment_parsers/unit.rs index 324225ca..3185a172 100644 --- a/harper-comments/src/comment_parsers/unit.rs +++ b/harper-comments/src/comment_parsers/unit.rs @@ -12,7 +12,7 @@ use super::without_initiators; pub struct Unit; impl Parser for Unit { - fn parse(&mut self, source: &[char]) -> Vec { + fn parse(&self, source: &[char]) -> Vec { let mut tokens = Vec::new(); let mut chars_traversed = 0; @@ -57,10 +57,7 @@ fn parse_line(source: &[char]) -> Vec { } let source = actual.get_content(source); - - let mut markdown_parser = Markdown; - - let mut new_tokens = markdown_parser.parse(source); + let mut new_tokens = Markdown.parse(source); new_tokens .iter_mut() diff --git a/harper-core/src/document.rs b/harper-core/src/document.rs index e1378485..1741ee63 100644 --- a/harper-core/src/document.rs +++ b/harper-core/src/document.rs @@ -20,14 +20,14 @@ pub struct Document { impl Default for Document { fn default() -> Self { - Self::new("", &mut PlainEnglish, &FstDictionary::curated()) + Self::new("", &PlainEnglish, &FstDictionary::curated()) } } impl Document { /// Lexes and parses text to produce a document using a provided language /// parser and dictionary. - pub fn new(text: &str, parser: &mut impl Parser, dictionary: &impl Dictionary) -> Self { + pub fn new(text: &str, parser: &impl Parser, dictionary: &impl Dictionary) -> Self { let source: Vec<_> = text.chars().collect(); Self::new_from_vec(Lrc::new(source), parser, dictionary) @@ -35,7 +35,7 @@ impl Document { /// Lexes and parses text to produce a document using a provided language /// parser and the included curated dictionary. - pub fn new_curated(text: &str, parser: &mut impl Parser) -> Self { + pub fn new_curated(text: &str, parser: &impl Parser) -> Self { let source: Vec<_> = text.chars().collect(); Self::new_from_vec(Lrc::new(source), parser, &FstDictionary::curated()) @@ -45,7 +45,7 @@ impl Document { /// parser and dictionary. pub fn new_from_vec( source: Lrc>, - parser: &mut impl Parser, + parser: &impl Parser, dictionary: &impl Dictionary, ) -> Self { let tokens = parser.parse(&source); @@ -59,25 +59,25 @@ impl Document { /// Parse text to produce a document using the built-in [`PlainEnglish`] /// parser and curated dictionary. pub fn new_plain_english_curated(text: &str) -> Self { - Self::new(text, &mut PlainEnglish, &FstDictionary::curated()) + Self::new(text, &PlainEnglish, &FstDictionary::curated()) } /// Parse text to produce a document using the built-in [`PlainEnglish`] /// parser and a provided dictionary. pub fn new_plain_english(text: &str, dictionary: &impl Dictionary) -> Self { - Self::new(text, &mut PlainEnglish, dictionary) + Self::new(text, &PlainEnglish, dictionary) } /// Parse text to produce a document using the built-in [`Markdown`] parser /// and curated dictionary. pub fn new_markdown_curated(text: &str) -> Self { - Self::new(text, &mut Markdown, &FstDictionary::curated()) + Self::new(text, &Markdown, &FstDictionary::curated()) } /// Parse text to produce a document using the built-in [`PlainEnglish`] /// parser and the curated dictionary. pub fn new_markdown(text: &str, dictionary: &impl Dictionary) -> Self { - Self::new(text, &mut Markdown, dictionary) + Self::new(text, &Markdown, dictionary) } /// Re-parse important language constructs. @@ -283,6 +283,12 @@ impl Document { } let child_tok = ©[cursor]; + + // Only condense adjacent spans + if start_tok.span.end != child_tok.span.start { + break; + } + if let TokenKind::Space(n) = child_tok.kind { *start_count += n; start_tok.span.end = child_tok.span.end; diff --git a/harper-core/src/mask/mod.rs b/harper-core/src/mask/mod.rs index 4b677c97..c633d6c6 100644 --- a/harper-core/src/mask/mod.rs +++ b/harper-core/src/mask/mod.rs @@ -8,7 +8,7 @@ use crate::Span; /// This is primarily used by [`crate::parsers::Mask`] to create parsers for /// things like comments of programming languages. pub trait Masker: Send + Sync { - fn create_mask(&mut self, source: &[char]) -> Mask; + fn create_mask(&self, source: &[char]) -> Mask; } /// Identifies portions of a [`char`] sequence that should __not__ be ignored by diff --git a/harper-core/src/parsers/collapse_identifiers.rs b/harper-core/src/parsers/collapse_identifiers.rs index e98f2c11..abdfae4b 100644 --- a/harper-core/src/parsers/collapse_identifiers.rs +++ b/harper-core/src/parsers/collapse_identifiers.rs @@ -32,7 +32,7 @@ thread_local! { } impl Parser for CollapseIdentifiers { - fn parse(&mut self, source: &[char]) -> Vec { + fn parse(&self, source: &[char]) -> Vec { let mut tokens = self.inner.parse(source); let mut to_remove = VecDeque::default(); diff --git a/harper-core/src/parsers/isolate_english.rs b/harper-core/src/parsers/isolate_english.rs index 151e5fdd..04d7e8e2 100644 --- a/harper-core/src/parsers/isolate_english.rs +++ b/harper-core/src/parsers/isolate_english.rs @@ -19,7 +19,7 @@ impl IsolateEnglish { } impl Parser for IsolateEnglish { - fn parse(&mut self, source: &[char]) -> Vec { + fn parse(&self, source: &[char]) -> Vec { let tokens = self.inner.parse(source); let mut english_tokens: Vec = Vec::with_capacity(tokens.len()); diff --git a/harper-core/src/parsers/markdown.rs b/harper-core/src/parsers/markdown.rs index cc5331fd..ed5f965c 100644 --- a/harper-core/src/parsers/markdown.rs +++ b/harper-core/src/parsers/markdown.rs @@ -120,8 +120,8 @@ impl Markdown { impl Parser for Markdown { /// This implementation is quite gross to look at, but it works. /// If any issues arise, it would likely help to refactor this out first. - fn parse(&mut self, source: &[char]) -> Vec { - let mut english_parser = PlainEnglish; + fn parse(&self, source: &[char]) -> Vec { + let english_parser = PlainEnglish; let source_str: String = source.iter().collect(); let md_parser = pulldown_cmark::Parser::new_ext( diff --git a/harper-core/src/parsers/mask.rs b/harper-core/src/parsers/mask.rs index 2bd21aae..92282794 100644 --- a/harper-core/src/parsers/mask.rs +++ b/harper-core/src/parsers/mask.rs @@ -27,7 +27,7 @@ where M: Masker, P: Parser, { - fn parse(&mut self, source: &[char]) -> Vec { + fn parse(&self, source: &[char]) -> Vec { let mask = self.masker.create_mask(source); let mut tokens: Vec = Vec::new(); diff --git a/harper-core/src/parsers/mod.rs b/harper-core/src/parsers/mod.rs index 55900667..91d81980 100644 --- a/harper-core/src/parsers/mod.rs +++ b/harper-core/src/parsers/mod.rs @@ -16,24 +16,24 @@ use crate::{Token, TokenStringExt}; #[cfg(not(feature = "concurrent"))] #[blanket(derive(Box))] pub trait Parser { - fn parse(&mut self, source: &[char]) -> Vec; + fn parse(&self, source: &[char]) -> Vec; } #[cfg(feature = "concurrent")] #[blanket(derive(Box))] pub trait Parser: Send + Sync { - fn parse(&mut self, source: &[char]) -> Vec; + fn parse(&self, source: &[char]) -> Vec; } pub trait StrParser { - fn parse_str(&mut self, source: impl AsRef) -> Vec; + fn parse_str(&self, source: impl AsRef) -> Vec; } impl StrParser for T where T: Parser, { - fn parse_str(&mut self, source: impl AsRef) -> Vec { + fn parse_str(&self, source: impl AsRef) -> Vec { let source: Vec<_> = source.as_ref().chars().collect(); self.parse(&source) } diff --git a/harper-core/src/parsers/plain_english.rs b/harper-core/src/parsers/plain_english.rs index 3f3ef119..13d8df8d 100644 --- a/harper-core/src/parsers/plain_english.rs +++ b/harper-core/src/parsers/plain_english.rs @@ -4,12 +4,11 @@ use crate::{Span, Token}; /// A parser that will attempt to lex as many tokens a possible, /// without discrimination and until the end of input. +#[derive(Clone, Copy)] pub struct PlainEnglish; -impl PlainEnglish {} - impl Parser for PlainEnglish { - fn parse(&mut self, source: &[char]) -> Vec { + fn parse(&self, source: &[char]) -> Vec { let mut cursor = 0; // Lex tokens diff --git a/harper-core/src/patterns/sequence_pattern.rs b/harper-core/src/patterns/sequence_pattern.rs index 4122d8d3..d2372477 100644 --- a/harper-core/src/patterns/sequence_pattern.rs +++ b/harper-core/src/patterns/sequence_pattern.rs @@ -56,6 +56,7 @@ impl SequencePattern { gen_then_from_is!(case_separator); gen_then_from_is!(adverb); gen_then_from_is!(adjective); + gen_then_from_is!(apostrophe); gen_then_from_is!(hyphen); /// Add a pattern that looks for more complex ideas, like nouns with adjectives attached. diff --git a/harper-core/src/word_metadata.rs b/harper-core/src/word_metadata.rs index 0cf05393..caa64ee5 100644 --- a/harper-core/src/word_metadata.rs +++ b/harper-core/src/word_metadata.rs @@ -128,7 +128,7 @@ pub enum Tense { Future, } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)] pub struct VerbData { pub is_linking: Option, pub tense: Option, @@ -144,7 +144,7 @@ impl VerbData { } } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)] pub struct NounData { pub is_proper: Option, pub is_plural: Option, @@ -164,7 +164,7 @@ impl NounData { } } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)] pub struct AdjectiveData {} impl AdjectiveData { @@ -174,7 +174,7 @@ impl AdjectiveData { } } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)] pub struct AdverbData {} impl AdverbData { @@ -184,7 +184,7 @@ impl AdverbData { } } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)] pub struct ConjunctionData {} impl ConjunctionData { diff --git a/harper-html/src/lib.rs b/harper-html/src/lib.rs index b4aefcf9..f8c7bff8 100644 --- a/harper-html/src/lib.rs +++ b/harper-html/src/lib.rs @@ -25,7 +25,7 @@ impl Default for HtmlParser { } impl Parser for HtmlParser { - fn parse(&mut self, source: &[char]) -> Vec { + fn parse(&self, source: &[char]) -> Vec { self.inner.parse(source) } } diff --git a/harper-literate-haskell/src/lib.rs b/harper-literate-haskell/src/lib.rs index f56515a7..903f9c5b 100644 --- a/harper-literate-haskell/src/lib.rs +++ b/harper-literate-haskell/src/lib.rs @@ -25,7 +25,7 @@ impl LiterateHaskellParser { } impl Parser for LiterateHaskellParser { - fn parse(&mut self, source: &[char]) -> Vec { + fn parse(&self, source: &[char]) -> Vec { Mask::new(LiterateHaskellMasker::text_only(), Markdown).parse(source) } } diff --git a/harper-literate-haskell/src/masker.rs b/harper-literate-haskell/src/masker.rs index bad9556c..52ebe6ad 100644 --- a/harper-literate-haskell/src/masker.rs +++ b/harper-literate-haskell/src/masker.rs @@ -25,7 +25,7 @@ impl LiterateHaskellMasker { } impl Masker for LiterateHaskellMasker { - fn create_mask(&mut self, source: &[char]) -> harper_core::Mask { + fn create_mask(&self, source: &[char]) -> harper_core::Mask { let mut mask = Mask::new_blank(); let mut location = 0; diff --git a/harper-ls/Cargo.toml b/harper-ls/Cargo.toml index 13065669..1449ca62 100644 --- a/harper-ls/Cargo.toml +++ b/harper-ls/Cargo.toml @@ -11,6 +11,7 @@ repository = "https://github.com/automattic/harper" harper-literate-haskell = { path = "../harper-literate-haskell", version = "0.15.0" } harper-core = { path = "../harper-core", version = "0.15.0", features = ["concurrent"] } harper-comments = { path = "../harper-comments", version = "0.15.0" } +harper-typst = { path = "../harper-typst", version = "0.15.0" } harper-html = { path = "../harper-html", version = "0.15.0" } tower-lsp = "0.20.0" tokio = { version = "1.43.0", features = ["fs", "rt", "rt-multi-thread", "macros", "io-std", "io-util", "net"] } @@ -26,3 +27,6 @@ resolve-path = "0.1.0" open = "5.3.0" futures = "0.3.31" serde = { version = "1.0.214", features = ["derive"] } + +[features] +default = [] diff --git a/harper-ls/src/backend.rs b/harper-ls/src/backend.rs index 35b4e97a..91acf5ae 100644 --- a/harper-ls/src/backend.rs +++ b/harper-ls/src/backend.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use std::path::{Component, PathBuf}; use std::sync::Arc; -use anyhow::anyhow; +use anyhow::{anyhow, Result}; use harper_comments::CommentParser; use harper_core::linting::{LintGroup, Linter}; use harper_core::parsers::{CollapseIdentifiers, IsolateEnglish, Markdown, Parser, PlainEnglish}; @@ -12,10 +12,10 @@ use harper_core::{ }; use harper_html::HtmlParser; use harper_literate_haskell::LiterateHaskellParser; -use itertools::Itertools; +use harper_typst::Typst; use serde_json::Value; use tokio::sync::{Mutex, RwLock}; -use tower_lsp::jsonrpc::Result; +use tower_lsp::jsonrpc::Result as JsonResult; use tower_lsp::lsp_types::notification::PublishDiagnostics; use tower_lsp::lsp_types::{ CodeActionOrCommand, CodeActionParams, CodeActionProviderCapability, CodeActionResponse, @@ -84,7 +84,7 @@ impl Backend { } } - async fn save_file_dictionary(&self, url: &Url, dict: impl Dictionary) -> anyhow::Result<()> { + async fn save_file_dictionary(&self, url: &Url, dict: impl Dictionary) -> Result<()> { Ok(save_dict( self.get_file_dict_path(url) .await @@ -103,13 +103,13 @@ impl Backend { } } - async fn save_user_dictionary(&self, dict: impl Dictionary) -> anyhow::Result<()> { + async fn save_user_dictionary(&self, dict: impl Dictionary) -> Result<()> { let config = self.config.read().await; Ok(save_dict(&config.user_dict_path, dict).await?) } - async fn generate_global_dictionary(&self) -> anyhow::Result { + async fn generate_global_dictionary(&self) -> Result { let mut dict = MergedDictionary::new(); dict.add_dictionary(FstDictionary::curated()); let user_dict = self.load_user_dictionary().await; @@ -117,7 +117,7 @@ impl Backend { Ok(dict) } - async fn generate_file_dictionary(&self, url: &Url) -> anyhow::Result { + async fn generate_file_dictionary(&self, url: &Url) -> Result { let (global_dictionary, file_dictionary) = tokio::join!( self.generate_global_dictionary(), self.load_file_dictionary(url) @@ -133,11 +133,7 @@ impl Backend { Ok(global_dictionary) } - async fn update_document_from_file( - &self, - url: &Url, - language_id: Option<&str>, - ) -> anyhow::Result<()> { + async fn update_document_from_file(&self, url: &Url, language_id: Option<&str>) -> Result<()> { let content = match tokio::fs::read_to_string( url.to_file_path() .map_err(|_| anyhow::format_err!("Could not extract file path."))?, @@ -159,7 +155,7 @@ impl Backend { url: &Url, text: &str, language_id: Option<&str>, - ) -> anyhow::Result<()> { + ) -> Result<()> { self.pull_config().await; let mut doc_lock = self.doc_state.lock().await; @@ -184,65 +180,79 @@ impl Backend { return Ok(()); }; - let parser: Option> = - if let Some(ts_parser) = CommentParser::new_from_language_id(language_id) { - let source = text.chars().collect_vec(); - - if let Some(new_dict) = ts_parser.create_ident_dict(&source) { - let new_dict = Arc::new(new_dict); - - if doc_state.ident_dict != new_dict { - doc_state.ident_dict = new_dict.clone(); + async fn use_ident_dict<'a>( + backend: &'a Backend, + new_dict: Arc, + parser: impl Parser + 'static, + url: &'a Url, + doc_state: &'a mut DocumentState, + config_lock: tokio::sync::RwLockReadGuard<'a, Config>, + ) -> Result> { + if doc_state.ident_dict != new_dict { + doc_state.ident_dict = new_dict.clone(); + + let mut merged = backend.generate_file_dictionary(url).await?; + merged.add_dictionary(new_dict); + let merged = Arc::new(merged); + + doc_state.linter = LintGroup::new(config_lock.lint_config, merged.clone()); + doc_state.dict = merged.clone(); + } - let mut merged = self.generate_file_dictionary(url).await?; - merged.add_dictionary(new_dict); - let merged = Arc::new(merged); + Ok(Box::new(CollapseIdentifiers::new( + Box::new(parser), + Box::new(doc_state.dict.clone()), + ))) + } - doc_state.linter = LintGroup::new(config_lock.lint_config, merged.clone()); - doc_state.dict = merged.clone(); - } - Some(Box::new(CollapseIdentifiers::new( - Box::new(ts_parser), - Box::new(doc_state.dict.clone()), - ))) + let source: Vec = text.chars().collect(); + let ts_parser = CommentParser::new_from_language_id(language_id); + let parser: Option> = match language_id.as_str() { + _ if ts_parser.is_some() => { + let ts_parser = ts_parser.unwrap(); + + if let Some(new_dict) = ts_parser.create_ident_dict(&Arc::new(source)) { + Some( + use_ident_dict( + self, + Arc::new(new_dict), + ts_parser, + url, + doc_state, + config_lock, + ) + .await?, + ) } else { Some(Box::new(ts_parser)) } - } else if language_id == "lhaskell" { - let source = text.chars().collect_vec(); + } + "lhaskell" => { let parser = LiterateHaskellParser; - if let Some(new_dict) = parser.create_ident_dict(&source) { - let new_dict = Arc::new(new_dict); - - if doc_state.ident_dict != new_dict { - doc_state.ident_dict = new_dict.clone(); - - let mut merged = self.generate_file_dictionary(url).await?; - merged.add_dictionary(new_dict); - let merged = Arc::new(merged); - - doc_state.linter = LintGroup::new(config_lock.lint_config, merged.clone()); - doc_state.dict = merged.clone(); - } - Some(Box::new(CollapseIdentifiers::new( - Box::new(parser), - Box::new(doc_state.dict.clone()), - ))) + if let Some(new_dict) = parser.create_ident_dict(&Arc::new(source)) { + Some( + use_ident_dict( + self, + Arc::new(new_dict), + parser, + url, + doc_state, + config_lock, + ) + .await?, + ) } else { Some(Box::new(parser)) } - } else if language_id == "markdown" { - Some(Box::new(Markdown)) - } else if language_id == "git-commit" || language_id == "gitcommit" { - Some(Box::new(GitCommitParser)) - } else if language_id == "html" { - Some(Box::new(HtmlParser::default())) - } else if language_id == "mail" || language_id == "plaintext" { - Some(Box::new(PlainEnglish)) - } else { - None - }; + } + "markdown" => Some(Box::new(Markdown)), + "git-commit" | "gitcommit" => Some(Box::new(GitCommitParser)), + "html" => Some(Box::new(HtmlParser::default())), + "mail" | "plaintext" => Some(Box::new(PlainEnglish)), + "typst" => Some(Box::new(Typst)), + _ => None, + }; match parser { None => { @@ -253,7 +263,7 @@ impl Backend { parser = Box::new(IsolateEnglish::new(parser, doc_state.dict.clone())); } - doc_state.document = Document::new(text, &mut parser, &doc_state.dict); + doc_state.document = Document::new(text, &parser, &doc_state.dict); } } @@ -264,7 +274,7 @@ impl Backend { &self, url: &Url, range: Range, - ) -> Result> { + ) -> JsonResult> { let (config, mut doc_states) = tokio::join!(self.config.read(), self.doc_state.lock()); let Some(doc_state) = doc_states.get_mut(url) else { return Ok(Vec::new()); @@ -367,7 +377,7 @@ impl Backend { #[tower_lsp::async_trait] impl LanguageServer for Backend { - async fn initialize(&self, _: InitializeParams) -> Result { + async fn initialize(&self, _: InitializeParams) -> JsonResult { Ok(InitializeResult { server_info: None, capabilities: ServerCapabilities { @@ -492,7 +502,7 @@ impl LanguageServer for Backend { } } - async fn execute_command(&self, params: ExecuteCommandParams) -> Result> { + async fn execute_command(&self, params: ExecuteCommandParams) -> JsonResult> { let mut string_args = params .arguments .into_iter() @@ -580,7 +590,10 @@ impl LanguageServer for Backend { } } - async fn code_action(&self, params: CodeActionParams) -> Result> { + async fn code_action( + &self, + params: CodeActionParams, + ) -> JsonResult> { let actions = self .generate_code_actions(¶ms.text_document.uri, params.range) .await?; @@ -588,7 +601,7 @@ impl LanguageServer for Backend { Ok(Some(actions)) } - async fn shutdown(&self) -> Result<()> { + async fn shutdown(&self) -> JsonResult<()> { let doc_states = self.doc_state.lock().await; // Clears the diagnostics for open buffers. diff --git a/harper-ls/src/git_commit_parser.rs b/harper-ls/src/git_commit_parser.rs index 2b2f7d06..0b86a8eb 100644 --- a/harper-ls/src/git_commit_parser.rs +++ b/harper-ls/src/git_commit_parser.rs @@ -6,15 +6,13 @@ pub struct GitCommitParser; impl Parser for GitCommitParser { /// Admittedly a somewhat naive implementation. /// We're going to get _something_ to work, before we polish it off. - fn parse(&mut self, source: &[char]) -> Vec { + fn parse(&self, source: &[char]) -> Vec { // Locate the first `#` let end = source .iter() .position(|c| *c == '#') .unwrap_or(source.len()); - let mut md_parser = Markdown; - - md_parser.parse(&source[0..end]) + Markdown.parse(&source[0..end]) } } diff --git a/harper-tree-sitter/src/lib.rs b/harper-tree-sitter/src/lib.rs index abb09aa8..1eb53e3d 100644 --- a/harper-tree-sitter/src/lib.rs +++ b/harper-tree-sitter/src/lib.rs @@ -91,7 +91,7 @@ impl TreeSitterMasker { } impl Masker for TreeSitterMasker { - fn create_mask(&mut self, source: &[char]) -> Mask { + fn create_mask(&self, source: &[char]) -> Mask { let text: String = source.iter().collect(); let Some(root) = self.parse_root(&text) else { diff --git a/harper-typst/Cargo.toml b/harper-typst/Cargo.toml new file mode 100644 index 00000000..825f3ded --- /dev/null +++ b/harper-typst/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "harper-typst" +version = "0.15.0" +edition = "2021" +description = "The language checker for developers." +license = "Apache-2.0" +repository = "https://github.com/automattic/harper" + +[dependencies] +harper-core = { path = "../harper-core", version = "0.15.0" } +typst-syntax = { version = "0.12.0" } +ordered-float = { version = "4.6.0", features = ["serde"] } +itertools = "0.14.0" +paste = "1.0.14" diff --git a/harper-typst/src/lib.rs b/harper-typst/src/lib.rs new file mode 100644 index 00000000..3cb20a55 --- /dev/null +++ b/harper-typst/src/lib.rs @@ -0,0 +1,342 @@ +mod offset_cursor; +mod typst_translator; + +use offset_cursor::OffsetCursor; +use typst_translator::TypstTranslator; + +use harper_core::{parsers::Parser, Token}; +use itertools::Itertools; +use typst_syntax::{ + ast::{AstNode, Markup}, + Source, +}; + +/// A parser that wraps the [`PlainEnglish`] parser allowing one to parse Typst files. +pub struct Typst; + +impl Parser for Typst { + fn parse(&self, source: &[char]) -> Vec { + let source_str: String = source.iter().collect(); + + // Transform the source into an AST through the `typst_syntax` crate + let typst_document = Source::detached(source_str); + let typst_tree = Markup::from_untyped(typst_document.root()) + .expect("Unable to create typst document from parsed tree!"); + + // Recurse through AST to create tokens + let parse_helper = TypstTranslator::new(&typst_document); + typst_tree + .exprs() + .filter_map(|ex| parse_helper.parse_expr(ex, OffsetCursor::new(&typst_document))) + .flatten() + .collect_vec() + } +} + +#[cfg(test)] +mod tests { + use itertools::Itertools; + use ordered_float::OrderedFloat; + + use super::Typst; + use harper_core::{Document, NounData, Punctuation, TokenKind, WordMetadata}; + + #[test] + fn contraction() { + let document = Document::new_curated("doesn't", &Typst); + let token_kinds = document.tokens().map(|t| t.kind).collect_vec(); + dbg!(&token_kinds); + + assert_eq!(token_kinds.len(), 1); + assert!(!token_kinds.into_iter().any(|t| { + matches!( + t, + TokenKind::Word(WordMetadata { + noun: Some(NounData { + is_possessive: Some(true), + .. + }), + .. + }) + ) + })) + } + + #[test] + fn possessive() { + let document = Document::new_curated("person's", &Typst); + let token_kinds = document.tokens().map(|t| t.kind).collect_vec(); + dbg!(&token_kinds); + + assert_eq!(token_kinds.len(), 1); + assert!(token_kinds.into_iter().all(|t| { + matches!( + t, + TokenKind::Word(WordMetadata { + noun: Some(NounData { + is_possessive: Some(true), + .. + }), + .. + }) + ) + })) + } + + #[test] + fn number() { + let source = "12 is larger than 11, but much less than 11!"; + + let document = Document::new_curated(source, &Typst); + let token_kinds = document.tokens().map(|t| t.kind).collect_vec(); + dbg!(&token_kinds); + + assert!(matches!( + token_kinds.as_slice(), + &[ + TokenKind::Number(OrderedFloat(12.0), None), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Number(OrderedFloat(11.0), None), + TokenKind::Punctuation(Punctuation::Comma), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Number(OrderedFloat(11.0), None), + TokenKind::Punctuation(Punctuation::Bang), + ] + )) + } + + #[test] + fn math_unlintable() { + let source = "$12 > 11$, $12 << 11!$"; + + let document = Document::new_curated(source, &Typst); + let token_kinds = document.tokens().map(|t| t.kind).collect_vec(); + dbg!(&token_kinds); + + assert!(matches!( + token_kinds.as_slice(), + &[ + TokenKind::Unlintable, + TokenKind::Punctuation(Punctuation::Comma), + TokenKind::Space(1), + TokenKind::Unlintable, + ] + )) + } + + #[test] + fn dict_parsing() { + let source = r#"#let dict = ( + name: "Typst", + born: 2019, + )"#; + + let document = Document::new_curated(source, &Typst); + let token_kinds = document.tokens().map(|t| t.kind).collect_vec(); + dbg!(&token_kinds); + + let charslice = source.chars().collect_vec(); + let tokens = document.tokens().collect_vec(); + assert_eq!(tokens[2].span.get_content_string(&charslice), "Typst"); + + assert!(matches!( + token_kinds.as_slice(), + &[ + TokenKind::Unlintable, // Ident + TokenKind::Unlintable, // Key 1 + TokenKind::Word(_), // Value 1 + TokenKind::Unlintable, // Key 2 + TokenKind::Unlintable, // Value 2 + ] + )) + } + + #[test] + fn str_parsing() { + let source = r#"#let ident = "This is a string""#; + + let document = Document::new_curated(source, &Typst); + let token_kinds = document.tokens().map(|t| t.kind).collect_vec(); + dbg!(&token_kinds); + + assert!(matches!( + &token_kinds.as_slice(), + &[ + TokenKind::Unlintable, + TokenKind::Word(_), // This + TokenKind::Space(1), + TokenKind::Word(_), // Is + TokenKind::Space(1), + TokenKind::Word(_), // A + TokenKind::Space(1), + TokenKind::Word(_), // String + ] + )) + } + + #[test] + fn non_adjacent_spaces_not_condensed() { + let source = r#"#authors_slice.join(", ", last: ", and ") bob"#; + + let document = Document::new_curated(source, &Typst); + let token_kinds = document.tokens().map(|t| t.kind).collect_vec(); + dbg!(&token_kinds); + + assert!(matches!( + &token_kinds.as_slice(), + &[ + TokenKind::Unlintable, // authors_slice.join + TokenKind::Punctuation(Punctuation::Comma), + TokenKind::Space(1), + TokenKind::Unlintable, // Ident + TokenKind::Punctuation(Punctuation::Comma), + TokenKind::Space(1), + TokenKind::Word(_), // and + TokenKind::Space(1), + TokenKind::Space(2), + TokenKind::Word(_), + ] + )) + } + + #[test] + fn header_parsing() { + let source = r"= Header + Paragraph"; + + let document = Document::new_curated(source, &Typst); + let token_kinds = document.tokens().map(|t| t.kind).collect_vec(); + dbg!(&token_kinds); + + let charslice = source.chars().collect_vec(); + let tokens = document.tokens().collect_vec(); + assert_eq!(tokens[0].span.get_content_string(&charslice), "Header"); + assert_eq!(tokens[2].span.get_content_string(&charslice), "Paragraph"); + + assert!(matches!( + &token_kinds.as_slice(), + &[ + TokenKind::Word(_), + TokenKind::Newline(1), + TokenKind::Word(_) + ] + )) + } + + #[test] + fn parbreak() { + let source = r"Paragraph + + Paragraph"; + + let document = Document::new_curated(source, &Typst); + let token_kinds = document.tokens().map(|t| t.kind).collect_vec(); + dbg!(&token_kinds); + + assert!(matches!( + &token_kinds.as_slice(), + &[ + TokenKind::Word(_), + TokenKind::ParagraphBreak, + TokenKind::Word(_), + ] + )) + } + + #[test] + fn label_unlintable() { + let source = r"= Header +