diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml new file mode 100644 index 000000000..22153da02 --- /dev/null +++ b/.github/workflows/fuzz.yml @@ -0,0 +1,21 @@ +name: Fuzz + +on: + schedule: + - cron: "*/10 * * * *" + +env: + CARGO_TERM_COLOR: always + # Run for 100 times the default + QUICKCHECK_TESTS: 10000 + +jobs: + precommit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + ref: harden-title-case + - uses: extractions/setup-just@v2 + - name: Test + run: cargo test diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml index 21fee6261..7d4511313 100644 --- a/.github/workflows/precommit.yml +++ b/.github/workflows/precommit.yml @@ -8,6 +8,8 @@ on: env: CARGO_TERM_COLOR: always + # Run for 100 times the default + QUICKCHECK_TESTS: 10000 jobs: precommit: diff --git a/harper-cli/src/main.rs b/harper-cli/src/main.rs index b1508df3a..d306b0218 100644 --- a/harper-cli/src/main.rs +++ b/harper-cli/src/main.rs @@ -11,7 +11,9 @@ use harper_core::parsers::{Markdown, MarkdownOptions}; use harper_core::{remove_overlaps, Dictionary, Document, FstDictionary, TokenKind}; use harper_literate_haskell::LiterateHaskellParser; +/// A debugging tool for the Harper grammar checker. #[derive(Debug, Parser)] +#[command(version, about)] enum Args { /// Lint a provided document. Lint { diff --git a/harper-comments/src/comment_parsers/jsdoc.rs b/harper-comments/src/comment_parsers/jsdoc.rs index 73dc9eed4..663f571f5 100644 --- a/harper-comments/src/comment_parsers/jsdoc.rs +++ b/harper-comments/src/comment_parsers/jsdoc.rs @@ -121,7 +121,7 @@ pub(super) fn mark_inline_tags(tokens: &mut [Token]) { } } -/// Checks if the provided token slice begins with an inline tag, returning it's +/// Checks if the provided token slice begins with an inline tag, returning its /// end if so. fn parse_inline_tag(tokens: &[Token]) -> Option { if !matches!( diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict index ef48d4314..8ad5f5f5c 100644 --- a/harper-core/dictionary.dict +++ b/harper-core/dictionary.dict @@ -49771,3 +49771,4 @@ Harper/SM a8c/SM a11n/1 a12s/9 +intergenerational diff --git a/harper-core/src/lexing/email_address.rs b/harper-core/src/lexing/email_address.rs index df2eb20ac..92038116d 100644 --- a/harper-core/src/lexing/email_address.rs +++ b/harper-core/src/lexing/email_address.rs @@ -16,6 +16,10 @@ pub fn lex_email_address(source: &[char]) -> Option { let domain_part_len = lex_hostname(&source[at_loc + 1..])?; + if domain_part_len == 0 { + return None; + } + Some(FoundToken { next_index: at_loc + 1 + domain_part_len, token: TokenKind::EmailAddress, @@ -155,6 +159,18 @@ mod tests { } } + #[test] + fn does_not_allow_empty_domain() { + for local in example_local_parts() { + // Generate invalid email address + let mut address = local.clone(); + address.push('@'); + address.push(' '); + + assert!(lex_email_address(&address).is_none()); + } + } + /// Tests that the email parser will not throw a panic under some random /// situations. #[test] diff --git a/harper-core/src/lexing/hostname.rs b/harper-core/src/lexing/hostname.rs index 5534fbe5f..961955da5 100644 --- a/harper-core/src/lexing/hostname.rs +++ b/harper-core/src/lexing/hostname.rs @@ -28,6 +28,13 @@ pub fn lex_hostname_token(source: &[char]) -> Option { pub fn lex_hostname(source: &[char]) -> Option { let mut passed_chars = 0; + // The beginning has different requirements from the rest of the hostname. + let first = source.first()?; + + if !matches!(first, 'A'..='Z' | 'a'..='z' | '0'..='9' ) { + return None; + } + for label in source.split(|c| *c == '.') { for c in label { passed_chars += 1; @@ -78,4 +85,10 @@ pub mod tests { assert_eq!(lex_hostname(&domain), Some(domain.len())); } } + + #[test] + fn hyphen_cannot_open_hostname() { + let host: Vec<_> = "-something.com".chars().collect(); + assert!(lex_hostname(&host).is_none()) + } } diff --git a/harper-core/src/linting/pronoun_contraction/avoid_contraction.rs b/harper-core/src/linting/pronoun_contraction/avoid_contraction.rs index 786c5f0da..546c6f740 100644 --- a/harper-core/src/linting/pronoun_contraction/avoid_contraction.rs +++ b/harper-core/src/linting/pronoun_contraction/avoid_contraction.rs @@ -34,13 +34,14 @@ impl PatternLinter for AvoidContraction { vec!['y', 'o', 'u', 'r'], word, )], - message: "I appears you intended to use the possessive version of this word".to_owned(), + message: "It appears you intended to use the possessive version of this word" + .to_owned(), priority: 63, } } fn description(&self) -> &'static str { - "This rule looks for situations where a contraction was used where it shouldn't." + "This rule looks for situations where a contraction was used where it shouldn't have been." } } diff --git a/harper-core/src/linting/spelled_numbers.rs b/harper-core/src/linting/spelled_numbers.rs index 8824d78fb..936f2e413 100644 --- a/harper-core/src/linting/spelled_numbers.rs +++ b/harper-core/src/linting/spelled_numbers.rs @@ -35,7 +35,7 @@ impl Linter for SpelledNumbers { } } -/// Converts a number to it's spelled-out variant. +/// Converts a number to its spelled-out variant. /// /// For example: 100 -> one hundred. /// diff --git a/harper-core/src/parsers/markdown.rs b/harper-core/src/parsers/markdown.rs index ea21e9c4e..16e40cb18 100644 --- a/harper-core/src/parsers/markdown.rs +++ b/harper-core/src/parsers/markdown.rs @@ -37,7 +37,7 @@ impl Markdown { /// Remove hidden Wikilink target text. /// - /// As in, the stuff to the left of the pipe operator: + /// As in the stuff to the left of the pipe operator: /// /// ```markdown /// [[Target text|Display Text]] @@ -46,6 +46,10 @@ impl Markdown { let mut to_remove = VecDeque::new(); for pipe_idx in tokens.iter_pipe_indices() { + if pipe_idx < 2 { + continue; + } + // Locate preceding `[[` let mut cursor = pipe_idx - 2; let mut open_bracket = None; @@ -351,6 +355,35 @@ mod tests { )) } + #[test] + fn just_pipe() { + let source = r"|"; + + let tokens = Markdown::default().parse_str(source); + + let token_kinds = tokens.iter().map(|t| t.kind).collect::>(); + + dbg!(&token_kinds); + + assert!(matches!( + token_kinds.as_slice(), + &[TokenKind::Punctuation(Punctuation::Pipe)] + )) + } + + #[test] + fn empty_wikilink_text() { + let source = r"[[|]]"; + + let tokens = Markdown::default().parse_str(source); + + let token_kinds = tokens.iter().map(|t| t.kind).collect::>(); + + dbg!(&token_kinds); + + assert!(matches!(token_kinds.as_slice(), &[])) + } + #[test] fn improper_wikilink_text() { let source = r"this is shown|this is also shown]]"; diff --git a/harper-core/src/parsers/mask.rs b/harper-core/src/parsers/mask.rs index 922827947..d74393d66 100644 --- a/harper-core/src/parsers/mask.rs +++ b/harper-core/src/parsers/mask.rs @@ -35,7 +35,7 @@ where let mut last_allowed: Option = None; for (span, content) in mask.iter_allowed(source) { - // Check if there was a line break between the last chunk. + // Check for a line break separating the current chunk from the preceding one. if let Some(last_allowed) = last_allowed { let intervening = Span::new(last_allowed.end, span.start); diff --git a/harper-core/src/patterns/is_not_title_case.rs b/harper-core/src/patterns/is_not_title_case.rs index 0c400a4ac..ecad0b8e8 100644 --- a/harper-core/src/patterns/is_not_title_case.rs +++ b/harper-core/src/patterns/is_not_title_case.rs @@ -24,8 +24,7 @@ impl Pattern for IsNotTitleCase { } let matched_chars = tokens[0..inner_match].span().unwrap().get_content(source); - - if make_title_case(tokens, source, &self.dict) != matched_chars { + if make_title_case(&tokens[0..inner_match], source, &self.dict) != matched_chars { inner_match } else { 0 diff --git a/harper-core/src/title_case.rs b/harper-core/src/title_case.rs index 7485017c4..5cdec274d 100644 --- a/harper-core/src/title_case.rs +++ b/harper-core/src/title_case.rs @@ -1,5 +1,6 @@ use crate::Lrc; use crate::Token; +use crate::TokenKind; use hashbrown::HashSet; use lazy_static::lazy_static; @@ -30,36 +31,13 @@ pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary) let start_index = toks.first().unwrap().span.start; - let mut words = toks.iter_word_likes().enumerate().peekable(); + let mut word_likes = toks.iter_word_likes().enumerate().peekable(); let mut output = toks.span().unwrap().get_content(source).to_vec(); - // Only specific conjunctions are not capitalized. - lazy_static! { - static ref SPECIAL_CONJUNCTIONS: HashSet> = ["and", "but", "for", "or", "nor"] - .iter() - .map(|v| v.chars().collect()) - .collect(); - } - - while let Some((index, word)) = words.next() { - if !word.kind.is_word() { - continue; - } - - let chars = word.span.get_content(source); - let chars_lower = chars.to_lower(); - - let metadata = word - .kind - .as_word() - .unwrap() - .or(&dict.get_word_metadata(&chars_lower)); - - let should_capitalize = !metadata.preposition - && !metadata.article - && !SPECIAL_CONJUNCTIONS.contains(chars_lower.as_slice()) + while let Some((index, word)) = word_likes.next() { + let should_capitalize = should_capitalize_token(&word, source, dict) || index == 0 - || words.peek().is_none(); + || word_likes.peek().is_none(); if should_capitalize { output[word.span.start - start_index] = @@ -72,7 +50,7 @@ pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary) } else { // The whole word should be lowercase. for i in word.span { - output[i - start_index] = output[i].to_ascii_lowercase(); + output[i - start_index] = output[i - start_index].to_ascii_lowercase(); } } } @@ -80,10 +58,39 @@ pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary) output } +/// Determines whether a token should be capitalized. +/// Is not responsible for capitalization requirements that are dependent on token position. +fn should_capitalize_token(tok: &Token, source: &[char], dict: &impl Dictionary) -> bool { + match tok.kind { + TokenKind::Word(mut metadata) => { + // Only specific conjunctions are not capitalized. + lazy_static! { + static ref SPECIAL_CONJUNCTIONS: HashSet> = + ["and", "but", "for", "or", "nor"] + .iter() + .map(|v| v.chars().collect()) + .collect(); + } + + let chars = tok.span.get_content(source); + let chars_lower = chars.to_lower(); + + metadata = metadata.or(&dict.get_word_metadata(&chars_lower)); + + let is_short_preposition = metadata.preposition && tok.span.len() <= 4; + + !is_short_preposition + && !metadata.article + && !SPECIAL_CONJUNCTIONS.contains(chars_lower.as_slice()) + } + _ => true, + } +} + #[cfg(test)] mod tests { - use quickcheck::{Arbitrary, TestResult}; + use quickcheck::TestResult; use quickcheck_macros::quickcheck; use super::make_title_case_str; @@ -120,49 +127,37 @@ mod tests { ) } - #[derive(Debug, Clone)] - struct Word(String); - - impl Arbitrary for Word { - fn arbitrary(g: &mut quickcheck::Gen) -> Self { - let mut s = String::new(); - - for _ in 0..g.size() { - let c = loop { - let gen = char::arbitrary(g); - - if gen.is_ascii_alphanumeric() { - break gen; - } - }; - - s.push(c); - } - - Self(s) - } + /// Check that "about" remains uppercase + #[test] + fn about_uppercase_with_numbers() { + assert_eq!( + make_title_case_str("0 about 0", &PlainEnglish, &FstDictionary::curated()), + "0 About 0" + ) } - #[derive(Debug, Clone)] - struct Sentence(String); - - /// Builds a sentence out of words from the curated [`FullDictionary`]. - impl Arbitrary for Sentence { - fn arbitrary(g: &mut quickcheck::Gen) -> Self { - let mut s = String::new(); - - let Word(first_word) = Word::arbitrary(g); - s.push_str(&first_word); - - for _ in 0..g.size() { - let Word(word) = Word::arbitrary(g); + #[test] + fn pipe_does_not_cause_crash() { + assert_eq!( + make_title_case_str("|", &Markdown::default(), &FstDictionary::curated()), + "|" + ) + } - s.push(' '); - s.push_str(&word); - } + #[test] + fn a_paragraph_does_not_cause_crash() { + assert_eq!( + make_title_case_str("A\n", &Markdown::default(), &FstDictionary::curated()), + "A" + ) + } - Self(s) - } + #[test] + fn tab_a_becomes_upcase() { + assert_eq!( + make_title_case_str("\ta", &PlainEnglish, &FstDictionary::curated()), + "\tA" + ) } #[quickcheck] @@ -184,8 +179,6 @@ mod tests { .chars() .collect(); - dbg!(&title_case); - TestResult::from_bool(title_case[prefix.chars().count() + 1] == 'a') } @@ -212,15 +205,15 @@ mod tests { } #[quickcheck] - fn first_word_is_upcase(sentence: Sentence) -> TestResult { + fn first_word_is_upcase(text: String) -> TestResult { let title_case: Vec<_> = - make_title_case_str(&sentence.0, &Markdown::default(), &FstDictionary::curated()) + make_title_case_str(&text, &PlainEnglish, &FstDictionary::curated()) .chars() .collect(); if let Some(first) = title_case.first() { - if first.is_alphabetic() { - TestResult::from_bool(first.is_uppercase()) + if first.is_ascii_alphabetic() { + TestResult::from_bool(first.is_ascii_uppercase()) } else { TestResult::discard() } diff --git a/harper-core/src/token.rs b/harper-core/src/token.rs index 06ab11ce9..f33c4a2b4 100644 --- a/harper-core/src/token.rs +++ b/harper-core/src/token.rs @@ -156,7 +156,16 @@ impl TokenStringExt for [Token] { } fn span(&self) -> Option { - Some(Span::new(self.first()?.span.start, self.last()?.span.end)) + let min_max = self + .iter() + .flat_map(|v| [v.span.start, v.span.end].into_iter()) + .minmax(); + + match min_max { + itertools::MinMaxResult::NoElements => None, + itertools::MinMaxResult::OneElement(min) => Some(Span::new(min, min)), + itertools::MinMaxResult::MinMax(min, max) => Some(Span::new(min, max)), + } } fn iter_linking_verb_indices(&self) -> impl Iterator + '_ { diff --git a/harper-core/src/token_kind.rs b/harper-core/src/token_kind.rs index 93df3a300..25aecb499 100644 --- a/harper-core/src/token_kind.rs +++ b/harper-core/src/token_kind.rs @@ -155,7 +155,7 @@ impl TokenKind { self.with_default_data() == other.with_default_data() } - /// Produces a copy of `self` with any inner data replaced with it's default + /// Produces a copy of `self` with any inner data replaced with its default /// value. Useful for making comparisons on just the variant of the /// enum. pub fn with_default_data(&self) -> Self { diff --git a/harper-core/tests/run_tests.rs b/harper-core/tests/run_tests.rs index 8c52d95ab..39f1e511f 100644 --- a/harper-core/tests/run_tests.rs +++ b/harper-core/tests/run_tests.rs @@ -48,3 +48,6 @@ create_test!(amazon_hostname.md, 0); create_test!(issue_159.md, 1); create_test!(issue_358.md, 0); create_test!(issue_195.md, 0); + +// Make sure it doesn't panic +create_test!(lukas_homework.md, 3); diff --git a/harper-core/tests/test_sources/lukas_homework.md b/harper-core/tests/test_sources/lukas_homework.md new file mode 100644 index 000000000..daf534f94 --- /dev/null +++ b/harper-core/tests/test_sources/lukas_homework.md @@ -0,0 +1,23 @@ +# Native American Assimilation and Activism Week Two Reflection + +> This is the first in a bi-weekly series that I will be publishing for my Native +American Assimilation and Activism class. Every two weeks we make posts +sharing what we learned in the class. Unfortunately, due to weather in England I +was unable to make it back to the United States in time for the first lecture. + +One of the key discussions in Monday's lecture/discussion was since time +immemorial and teaching around that. Time Immemorium is a period before +human memory, and involved other human species, travel stories, and creation +stories. Some of the key lessons I learned from that class were: + +* Humans branched from some common ancestor that had multiple other human species branch off + * We are not evolved from chimps but also have a shared ancestor +* Many genetic evolutional specializations have to do with environmental adaptation + * Some of these adaptations were shared when isolated groups had visitors (Weaving rivers theory) + * Also related: Intergenerational Trauma + +Oregon and Washington have been leading the country when it comes to +integrating Native Americans into their school curriculum. This includes +adding Since Time Immemorium curriculum. These advances have the +possibility to significantly improve the awareness and appreciation of Native +American Peoples who have and still live in these lands. diff --git a/harper-ls/src/config.rs b/harper-ls/src/config.rs index aa21cb110..79c9601cd 100644 --- a/harper-ls/src/config.rs +++ b/harper-ls/src/config.rs @@ -35,7 +35,7 @@ impl DiagnosticSeverity { #[derive(Debug, Clone, Default)] pub struct CodeActionConfig { /// Instructs `harper-ls` to place unstable code actions last. - /// In this case, "unstable" refers their existence and action. + /// In this case, "unstable" refers to their existence and action. /// /// For example, we always want to allow users to add "misspelled" elements /// to dictionary, regardless of the spelling suggestions. diff --git a/harper-ls/src/main.rs b/harper-ls/src/main.rs index 01d42e8fb..1f20b1e5c 100644 --- a/harper-ls/src/main.rs +++ b/harper-ls/src/main.rs @@ -25,6 +25,7 @@ static DEFAULT_ADDRESS: &str = "127.0.0.1:4000"; /// /// Will listen on 127.0.0.1:4000 by default. #[derive(Debug, Parser)] +#[command(version, about)] struct Args { /// Set to listen on standard input / output rather than TCP. #[arg(short, long, default_value_t = false)] diff --git a/justfile b/justfile index 96d71168c..20f056b4b 100644 --- a/justfile +++ b/justfile @@ -268,3 +268,15 @@ bump-versions: just format lazygit + +# Enter an infinite loop of testing until a bug is found. +fuzz: + #!/usr/bin/bash + + while true + do + QUICKCHECK_TESTS=100000 cargo test + if [[ x$? != x0 ]] ; then + exit $? + fi + done diff --git a/packages/harper.js/src/loadWasm.ts b/packages/harper.js/src/loadWasm.ts index 2459bb351..985cbced1 100644 --- a/packages/harper.js/src/loadWasm.ts +++ b/packages/harper.js/src/loadWasm.ts @@ -14,7 +14,7 @@ export function setWasmUri(uri: string) { curWasmUri = uri; } -/** Load the WebAssembly manually and dynamically, making sure to setup infrastructure. +/** Load the WebAssembly manually and dynamically, making sure to set up infrastructure. * You can use an optional data URL for the WebAssembly file if the module is being loaded from a Web Worker. * */ export default async function loadWasm() { diff --git a/packages/web/src/routes/+page.svelte b/packages/web/src/routes/+page.svelte index f3bfbad30..c793ac44b 100644 --- a/packages/web/src/routes/+page.svelte +++ b/packages/web/src/routes/+page.svelte @@ -103,7 +103,7 @@
Wicked Fast Since Harper runs on your devices, its able to serve up suggestions in under + >Since Harper runs on your devices, it's able to serve up suggestions in under 10 milliseconds.

diff --git a/packages/web/src/routes/docs/contributors/architecture/+page.md b/packages/web/src/routes/docs/contributors/architecture/+page.md index f8a00915b..edd7e0156 100644 --- a/packages/web/src/routes/docs/contributors/architecture/+page.md +++ b/packages/web/src/routes/docs/contributors/architecture/+page.md @@ -13,7 +13,7 @@ Hopefully, we can reduce that 10x down to something a little more reasonable. Harper tries to do one thing well: find grammatical and spelling errors in English text. If possible, provide suggestions to correct those errors. -An error and it's possible corrections together form what we call a lint. +An error and its possible corrections together form what we call a lint. In this vein, Harper serves the role of a [Linter]() for English. diff --git a/packages/web/src/routes/docs/contributors/committing/+page.md b/packages/web/src/routes/docs/contributors/committing/+page.md index 85417bf77..2df43be1d 100644 --- a/packages/web/src/routes/docs/contributors/committing/+page.md +++ b/packages/web/src/routes/docs/contributors/committing/+page.md @@ -7,7 +7,7 @@ Before creating a pull request, please make sure all your commits follow the lin Additionally, to minimize the labor required to review your commit, we run a relatively strict suite of formatting and linting programs. We highly recommend that you run both `just format` and `just precommit` before submitting a pull request. -If those scripts don't work in your environment, we run `just precommit` through GitHub actions inside of pull requests, so you may make modifications and push until the checks pass. +If those scripts don't work in your environment, we run `just precommit` through GitHub Actions inside of pull requests, so you may make modifications and push until the checks pass. If this sounds intimidating, don't worry. We are entirely willing to work with you to make sure your code can make it into Harper, just know it might take a little longer. diff --git a/packages/web/src/routes/docs/contributors/environment/+page.md b/packages/web/src/routes/docs/contributors/environment/+page.md index d87cd8c86..035bfc782 100644 --- a/packages/web/src/routes/docs/contributors/environment/+page.md +++ b/packages/web/src/routes/docs/contributors/environment/+page.md @@ -15,10 +15,11 @@ To use the tooling required to build and debug Harper, you'll need to the follow - `pandoc` We develop a set of tools, accessible via `just`, to build and debug Harper's algorithm (otherwise known as `harper-core`) and its various integrations. -To get see all the tools in your toolbox run: +The source code is in the `justfile` [at the root of the repository](https://github.com/Automattic/harper/blob/master/justfile). +To see all the tools in the toolbox, run: ```bash just --list ``` -Before getting started, we highly recommend that you run `just setup` to populate your build caches and download all dependencies. +Before making any modifications, we highly recommend that you run `just setup` to populate your build caches and download all dependencies. diff --git a/packages/web/src/routes/docs/integrations/language-server/+page.md b/packages/web/src/routes/docs/integrations/language-server/+page.md index 0f44885fc..843be18fa 100644 --- a/packages/web/src/routes/docs/integrations/language-server/+page.md +++ b/packages/web/src/routes/docs/integrations/language-server/+page.md @@ -30,6 +30,14 @@ You can install Harper on Windows through [Scoop](https://scoop.sh/). scoop install harper ``` +### Homebrew + +You may install Harper through [Homebrew](https://brew.sh). + +```bash +brew install harper +``` + ## Dictionaries `harper-ls` has three kinds of dictionaries: user, file-local, and static dictionaries. diff --git a/packages/web/src/routes/docs/integrations/neovim/+page.md b/packages/web/src/routes/docs/integrations/neovim/+page.md index a16109379..010aac9ea 100644 --- a/packages/web/src/routes/docs/integrations/neovim/+page.md +++ b/packages/web/src/routes/docs/integrations/neovim/+page.md @@ -41,6 +41,14 @@ You can install Harper on Windows through [Scoop](https://scoop.sh/). scoop install harper ``` +### Homebrew + +You may install Harper through [Homebrew](https://brew.sh). + +```bash +brew install harper +``` + ## Configuration Neovim is also one of the two primarily supported editors for `harper-ls`. @@ -49,7 +57,7 @@ As such, you can view this page as canonical documentation for the available con ### Markdown-Specific Config -The Markdown parser has it's own configuration option, used to modify its behavior in specific ways. +The Markdown parser has its own configuration option, used to modify its behavior in specific ways. For example, the title of a link is linted by default, but this behavior can be changed through the `ignore_link_title` key: ```lua