diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml new file mode 100644 index 00000000..22153da0 --- /dev/null +++ b/.github/workflows/fuzz.yml @@ -0,0 +1,21 @@ +name: Fuzz + +on: + schedule: + - cron: "*/10 * * * *" + +env: + CARGO_TERM_COLOR: always + # Run for 100 times the default + QUICKCHECK_TESTS: 10000 + +jobs: + precommit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + ref: harden-title-case + - uses: extractions/setup-just@v2 + - name: Test + run: cargo test diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml index 21fee626..7d451131 100644 --- a/.github/workflows/precommit.yml +++ b/.github/workflows/precommit.yml @@ -8,6 +8,8 @@ on: env: CARGO_TERM_COLOR: always + # Run for 100 times the default + QUICKCHECK_TESTS: 10000 jobs: precommit: diff --git a/harper-cli/src/main.rs b/harper-cli/src/main.rs index b1508df3..d306b021 100644 --- a/harper-cli/src/main.rs +++ b/harper-cli/src/main.rs @@ -11,7 +11,9 @@ use harper_core::parsers::{Markdown, MarkdownOptions}; use harper_core::{remove_overlaps, Dictionary, Document, FstDictionary, TokenKind}; use harper_literate_haskell::LiterateHaskellParser; +/// A debugging tool for the Harper grammar checker. #[derive(Debug, Parser)] +#[command(version, about)] enum Args { /// Lint a provided document. Lint { diff --git a/harper-comments/src/comment_parsers/jsdoc.rs b/harper-comments/src/comment_parsers/jsdoc.rs index 73dc9eed..663f571f 100644 --- a/harper-comments/src/comment_parsers/jsdoc.rs +++ b/harper-comments/src/comment_parsers/jsdoc.rs @@ -121,7 +121,7 @@ pub(super) fn mark_inline_tags(tokens: &mut [Token]) { } } -/// Checks if the provided token slice begins with an inline tag, returning it's +/// Checks if the provided token slice begins with an inline tag, returning its /// end if so. fn parse_inline_tag(tokens: &[Token]) -> Option { if !matches!( diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict index ef48d431..8ad5f5f5 100644 --- a/harper-core/dictionary.dict +++ b/harper-core/dictionary.dict @@ -49771,3 +49771,4 @@ Harper/SM a8c/SM a11n/1 a12s/9 +intergenerational diff --git a/harper-core/src/lexing/email_address.rs b/harper-core/src/lexing/email_address.rs index df2eb20a..92038116 100644 --- a/harper-core/src/lexing/email_address.rs +++ b/harper-core/src/lexing/email_address.rs @@ -16,6 +16,10 @@ pub fn lex_email_address(source: &[char]) -> Option { let domain_part_len = lex_hostname(&source[at_loc + 1..])?; + if domain_part_len == 0 { + return None; + } + Some(FoundToken { next_index: at_loc + 1 + domain_part_len, token: TokenKind::EmailAddress, @@ -155,6 +159,18 @@ mod tests { } } + #[test] + fn does_not_allow_empty_domain() { + for local in example_local_parts() { + // Generate invalid email address + let mut address = local.clone(); + address.push('@'); + address.push(' '); + + assert!(lex_email_address(&address).is_none()); + } + } + /// Tests that the email parser will not throw a panic under some random /// situations. #[test] diff --git a/harper-core/src/lexing/hostname.rs b/harper-core/src/lexing/hostname.rs index 5534fbe5..961955da 100644 --- a/harper-core/src/lexing/hostname.rs +++ b/harper-core/src/lexing/hostname.rs @@ -28,6 +28,13 @@ pub fn lex_hostname_token(source: &[char]) -> Option { pub fn lex_hostname(source: &[char]) -> Option { let mut passed_chars = 0; + // The beginning has different requirements from the rest of the hostname. + let first = source.first()?; + + if !matches!(first, 'A'..='Z' | 'a'..='z' | '0'..='9' ) { + return None; + } + for label in source.split(|c| *c == '.') { for c in label { passed_chars += 1; @@ -78,4 +85,10 @@ pub mod tests { assert_eq!(lex_hostname(&domain), Some(domain.len())); } } + + #[test] + fn hyphen_cannot_open_hostname() { + let host: Vec<_> = "-something.com".chars().collect(); + assert!(lex_hostname(&host).is_none()) + } } diff --git a/harper-core/src/linting/pronoun_contraction/avoid_contraction.rs b/harper-core/src/linting/pronoun_contraction/avoid_contraction.rs index 786c5f0d..546c6f74 100644 --- a/harper-core/src/linting/pronoun_contraction/avoid_contraction.rs +++ b/harper-core/src/linting/pronoun_contraction/avoid_contraction.rs @@ -34,13 +34,14 @@ impl PatternLinter for AvoidContraction { vec!['y', 'o', 'u', 'r'], word, )], - message: "I appears you intended to use the possessive version of this word".to_owned(), + message: "It appears you intended to use the possessive version of this word" + .to_owned(), priority: 63, } } fn description(&self) -> &'static str { - "This rule looks for situations where a contraction was used where it shouldn't." + "This rule looks for situations where a contraction was used where it shouldn't have been." } } diff --git a/harper-core/src/linting/spelled_numbers.rs b/harper-core/src/linting/spelled_numbers.rs index 8824d78f..936f2e41 100644 --- a/harper-core/src/linting/spelled_numbers.rs +++ b/harper-core/src/linting/spelled_numbers.rs @@ -35,7 +35,7 @@ impl Linter for SpelledNumbers { } } -/// Converts a number to it's spelled-out variant. +/// Converts a number to its spelled-out variant. /// /// For example: 100 -> one hundred. /// diff --git a/harper-core/src/parsers/markdown.rs b/harper-core/src/parsers/markdown.rs index ea21e9c4..16e40cb1 100644 --- a/harper-core/src/parsers/markdown.rs +++ b/harper-core/src/parsers/markdown.rs @@ -37,7 +37,7 @@ impl Markdown { /// Remove hidden Wikilink target text. /// - /// As in, the stuff to the left of the pipe operator: + /// As in the stuff to the left of the pipe operator: /// /// ```markdown /// [[Target text|Display Text]] @@ -46,6 +46,10 @@ impl Markdown { let mut to_remove = VecDeque::new(); for pipe_idx in tokens.iter_pipe_indices() { + if pipe_idx < 2 { + continue; + } + // Locate preceding `[[` let mut cursor = pipe_idx - 2; let mut open_bracket = None; @@ -351,6 +355,35 @@ mod tests { )) } + #[test] + fn just_pipe() { + let source = r"|"; + + let tokens = Markdown::default().parse_str(source); + + let token_kinds = tokens.iter().map(|t| t.kind).collect::>(); + + dbg!(&token_kinds); + + assert!(matches!( + token_kinds.as_slice(), + &[TokenKind::Punctuation(Punctuation::Pipe)] + )) + } + + #[test] + fn empty_wikilink_text() { + let source = r"[[|]]"; + + let tokens = Markdown::default().parse_str(source); + + let token_kinds = tokens.iter().map(|t| t.kind).collect::>(); + + dbg!(&token_kinds); + + assert!(matches!(token_kinds.as_slice(), &[])) + } + #[test] fn improper_wikilink_text() { let source = r"this is shown|this is also shown]]"; diff --git a/harper-core/src/parsers/mask.rs b/harper-core/src/parsers/mask.rs index 92282794..d74393d6 100644 --- a/harper-core/src/parsers/mask.rs +++ b/harper-core/src/parsers/mask.rs @@ -35,7 +35,7 @@ where let mut last_allowed: Option = None; for (span, content) in mask.iter_allowed(source) { - // Check if there was a line break between the last chunk. + // Check for a line break separating the current chunk from the preceding one. if let Some(last_allowed) = last_allowed { let intervening = Span::new(last_allowed.end, span.start); diff --git a/harper-core/src/patterns/is_not_title_case.rs b/harper-core/src/patterns/is_not_title_case.rs index 0c400a4a..ecad0b8e 100644 --- a/harper-core/src/patterns/is_not_title_case.rs +++ b/harper-core/src/patterns/is_not_title_case.rs @@ -24,8 +24,7 @@ impl Pattern for IsNotTitleCase { } let matched_chars = tokens[0..inner_match].span().unwrap().get_content(source); - - if make_title_case(tokens, source, &self.dict) != matched_chars { + if make_title_case(&tokens[0..inner_match], source, &self.dict) != matched_chars { inner_match } else { 0 diff --git a/harper-core/src/title_case.rs b/harper-core/src/title_case.rs index 7485017c..5cdec274 100644 --- a/harper-core/src/title_case.rs +++ b/harper-core/src/title_case.rs @@ -1,5 +1,6 @@ use crate::Lrc; use crate::Token; +use crate::TokenKind; use hashbrown::HashSet; use lazy_static::lazy_static; @@ -30,36 +31,13 @@ pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary) let start_index = toks.first().unwrap().span.start; - let mut words = toks.iter_word_likes().enumerate().peekable(); + let mut word_likes = toks.iter_word_likes().enumerate().peekable(); let mut output = toks.span().unwrap().get_content(source).to_vec(); - // Only specific conjunctions are not capitalized. - lazy_static! { - static ref SPECIAL_CONJUNCTIONS: HashSet> = ["and", "but", "for", "or", "nor"] - .iter() - .map(|v| v.chars().collect()) - .collect(); - } - - while let Some((index, word)) = words.next() { - if !word.kind.is_word() { - continue; - } - - let chars = word.span.get_content(source); - let chars_lower = chars.to_lower(); - - let metadata = word - .kind - .as_word() - .unwrap() - .or(&dict.get_word_metadata(&chars_lower)); - - let should_capitalize = !metadata.preposition - && !metadata.article - && !SPECIAL_CONJUNCTIONS.contains(chars_lower.as_slice()) + while let Some((index, word)) = word_likes.next() { + let should_capitalize = should_capitalize_token(&word, source, dict) || index == 0 - || words.peek().is_none(); + || word_likes.peek().is_none(); if should_capitalize { output[word.span.start - start_index] = @@ -72,7 +50,7 @@ pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary) } else { // The whole word should be lowercase. for i in word.span { - output[i - start_index] = output[i].to_ascii_lowercase(); + output[i - start_index] = output[i - start_index].to_ascii_lowercase(); } } } @@ -80,10 +58,39 @@ pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary) output } +/// Determines whether a token should be capitalized. +/// Is not responsible for capitalization requirements that are dependent on token position. +fn should_capitalize_token(tok: &Token, source: &[char], dict: &impl Dictionary) -> bool { + match tok.kind { + TokenKind::Word(mut metadata) => { + // Only specific conjunctions are not capitalized. + lazy_static! { + static ref SPECIAL_CONJUNCTIONS: HashSet> = + ["and", "but", "for", "or", "nor"] + .iter() + .map(|v| v.chars().collect()) + .collect(); + } + + let chars = tok.span.get_content(source); + let chars_lower = chars.to_lower(); + + metadata = metadata.or(&dict.get_word_metadata(&chars_lower)); + + let is_short_preposition = metadata.preposition && tok.span.len() <= 4; + + !is_short_preposition + && !metadata.article + && !SPECIAL_CONJUNCTIONS.contains(chars_lower.as_slice()) + } + _ => true, + } +} + #[cfg(test)] mod tests { - use quickcheck::{Arbitrary, TestResult}; + use quickcheck::TestResult; use quickcheck_macros::quickcheck; use super::make_title_case_str; @@ -120,49 +127,37 @@ mod tests { ) } - #[derive(Debug, Clone)] - struct Word(String); - - impl Arbitrary for Word { - fn arbitrary(g: &mut quickcheck::Gen) -> Self { - let mut s = String::new(); - - for _ in 0..g.size() { - let c = loop { - let gen = char::arbitrary(g); - - if gen.is_ascii_alphanumeric() { - break gen; - } - }; - - s.push(c); - } - - Self(s) - } + /// Check that "about" remains uppercase + #[test] + fn about_uppercase_with_numbers() { + assert_eq!( + make_title_case_str("0 about 0", &PlainEnglish, &FstDictionary::curated()), + "0 About 0" + ) } - #[derive(Debug, Clone)] - struct Sentence(String); - - /// Builds a sentence out of words from the curated [`FullDictionary`]. - impl Arbitrary for Sentence { - fn arbitrary(g: &mut quickcheck::Gen) -> Self { - let mut s = String::new(); - - let Word(first_word) = Word::arbitrary(g); - s.push_str(&first_word); - - for _ in 0..g.size() { - let Word(word) = Word::arbitrary(g); + #[test] + fn pipe_does_not_cause_crash() { + assert_eq!( + make_title_case_str("|", &Markdown::default(), &FstDictionary::curated()), + "|" + ) + } - s.push(' '); - s.push_str(&word); - } + #[test] + fn a_paragraph_does_not_cause_crash() { + assert_eq!( + make_title_case_str("A\n", &Markdown::default(), &FstDictionary::curated()), + "A" + ) + } - Self(s) - } + #[test] + fn tab_a_becomes_upcase() { + assert_eq!( + make_title_case_str("\ta", &PlainEnglish, &FstDictionary::curated()), + "\tA" + ) } #[quickcheck] @@ -184,8 +179,6 @@ mod tests { .chars() .collect(); - dbg!(&title_case); - TestResult::from_bool(title_case[prefix.chars().count() + 1] == 'a') } @@ -212,15 +205,15 @@ mod tests { } #[quickcheck] - fn first_word_is_upcase(sentence: Sentence) -> TestResult { + fn first_word_is_upcase(text: String) -> TestResult { let title_case: Vec<_> = - make_title_case_str(&sentence.0, &Markdown::default(), &FstDictionary::curated()) + make_title_case_str(&text, &PlainEnglish, &FstDictionary::curated()) .chars() .collect(); if let Some(first) = title_case.first() { - if first.is_alphabetic() { - TestResult::from_bool(first.is_uppercase()) + if first.is_ascii_alphabetic() { + TestResult::from_bool(first.is_ascii_uppercase()) } else { TestResult::discard() } diff --git a/harper-core/src/token.rs b/harper-core/src/token.rs index 06ab11ce..f33c4a2b 100644 --- a/harper-core/src/token.rs +++ b/harper-core/src/token.rs @@ -156,7 +156,16 @@ impl TokenStringExt for [Token] { } fn span(&self) -> Option { - Some(Span::new(self.first()?.span.start, self.last()?.span.end)) + let min_max = self + .iter() + .flat_map(|v| [v.span.start, v.span.end].into_iter()) + .minmax(); + + match min_max { + itertools::MinMaxResult::NoElements => None, + itertools::MinMaxResult::OneElement(min) => Some(Span::new(min, min)), + itertools::MinMaxResult::MinMax(min, max) => Some(Span::new(min, max)), + } } fn iter_linking_verb_indices(&self) -> impl Iterator + '_ { diff --git a/harper-core/src/token_kind.rs b/harper-core/src/token_kind.rs index 93df3a30..25aecb49 100644 --- a/harper-core/src/token_kind.rs +++ b/harper-core/src/token_kind.rs @@ -155,7 +155,7 @@ impl TokenKind { self.with_default_data() == other.with_default_data() } - /// Produces a copy of `self` with any inner data replaced with it's default + /// Produces a copy of `self` with any inner data replaced with its default /// value. Useful for making comparisons on just the variant of the /// enum. pub fn with_default_data(&self) -> Self { diff --git a/harper-core/tests/run_tests.rs b/harper-core/tests/run_tests.rs index 8c52d95a..39f1e511 100644 --- a/harper-core/tests/run_tests.rs +++ b/harper-core/tests/run_tests.rs @@ -48,3 +48,6 @@ create_test!(amazon_hostname.md, 0); create_test!(issue_159.md, 1); create_test!(issue_358.md, 0); create_test!(issue_195.md, 0); + +// Make sure it doesn't panic +create_test!(lukas_homework.md, 3); diff --git a/harper-core/tests/test_sources/lukas_homework.md b/harper-core/tests/test_sources/lukas_homework.md new file mode 100644 index 00000000..daf534f9 --- /dev/null +++ b/harper-core/tests/test_sources/lukas_homework.md @@ -0,0 +1,23 @@ +# Native American Assimilation and Activism Week Two Reflection + +> This is the first in a bi-weekly series that I will be publishing for my Native +American Assimilation and Activism class. Every two weeks we make posts +sharing what we learned in the class. Unfortunately, due to weather in England I +was unable to make it back to the United States in time for the first lecture. + +One of the key discussions in Monday's lecture/discussion was since time +immemorial and teaching around that. Time Immemorium is a period before +human memory, and involved other human species, travel stories, and creation +stories. Some of the key lessons I learned from that class were: + +* Humans branched from some common ancestor that had multiple other human species branch off + * We are not evolved from chimps but also have a shared ancestor +* Many genetic evolutional specializations have to do with environmental adaptation + * Some of these adaptations were shared when isolated groups had visitors (Weaving rivers theory) + * Also related: Intergenerational Trauma + +Oregon and Washington have been leading the country when it comes to +integrating Native Americans into their school curriculum. This includes +adding Since Time Immemorium curriculum. These advances have the +possibility to significantly improve the awareness and appreciation of Native +American Peoples who have and still live in these lands. diff --git a/harper-ls/src/config.rs b/harper-ls/src/config.rs index aa21cb11..79c9601c 100644 --- a/harper-ls/src/config.rs +++ b/harper-ls/src/config.rs @@ -35,7 +35,7 @@ impl DiagnosticSeverity { #[derive(Debug, Clone, Default)] pub struct CodeActionConfig { /// Instructs `harper-ls` to place unstable code actions last. - /// In this case, "unstable" refers their existence and action. + /// In this case, "unstable" refers to their existence and action. /// /// For example, we always want to allow users to add "misspelled" elements /// to dictionary, regardless of the spelling suggestions. diff --git a/harper-ls/src/main.rs b/harper-ls/src/main.rs index 01d42e8f..1f20b1e5 100644 --- a/harper-ls/src/main.rs +++ b/harper-ls/src/main.rs @@ -25,6 +25,7 @@ static DEFAULT_ADDRESS: &str = "127.0.0.1:4000"; /// /// Will listen on 127.0.0.1:4000 by default. #[derive(Debug, Parser)] +#[command(version, about)] struct Args { /// Set to listen on standard input / output rather than TCP. #[arg(short, long, default_value_t = false)] diff --git a/justfile b/justfile index 96d71168..20f056b4 100644 --- a/justfile +++ b/justfile @@ -268,3 +268,15 @@ bump-versions: just format lazygit + +# Enter an infinite loop of testing until a bug is found. +fuzz: + #!/usr/bin/bash + + while true + do + QUICKCHECK_TESTS=100000 cargo test + if [[ x$? != x0 ]] ; then + exit $? + fi + done diff --git a/packages/harper.js/src/loadWasm.ts b/packages/harper.js/src/loadWasm.ts index 2459bb35..985cbced 100644 --- a/packages/harper.js/src/loadWasm.ts +++ b/packages/harper.js/src/loadWasm.ts @@ -14,7 +14,7 @@ export function setWasmUri(uri: string) { curWasmUri = uri; } -/** Load the WebAssembly manually and dynamically, making sure to setup infrastructure. +/** Load the WebAssembly manually and dynamically, making sure to set up infrastructure. * You can use an optional data URL for the WebAssembly file if the module is being loaded from a Web Worker. * */ export default async function loadWasm() { diff --git a/packages/web/src/routes/+page.svelte b/packages/web/src/routes/+page.svelte index f3bfbad3..c793ac44 100644 --- a/packages/web/src/routes/+page.svelte +++ b/packages/web/src/routes/+page.svelte @@ -103,7 +103,7 @@
Wicked Fast Since Harper runs on your devices, its able to serve up suggestions in under + >Since Harper runs on your devices, it's able to serve up suggestions in under 10 milliseconds.

diff --git a/packages/web/src/routes/docs/contributors/architecture/+page.md b/packages/web/src/routes/docs/contributors/architecture/+page.md index f8a00915..edd7e015 100644 --- a/packages/web/src/routes/docs/contributors/architecture/+page.md +++ b/packages/web/src/routes/docs/contributors/architecture/+page.md @@ -13,7 +13,7 @@ Hopefully, we can reduce that 10x down to something a little more reasonable. Harper tries to do one thing well: find grammatical and spelling errors in English text. If possible, provide suggestions to correct those errors. -An error and it's possible corrections together form what we call a lint. +An error and its possible corrections together form what we call a lint. In this vein, Harper serves the role of a [Linter]() for English. diff --git a/packages/web/src/routes/docs/contributors/committing/+page.md b/packages/web/src/routes/docs/contributors/committing/+page.md index 85417bf7..2df43be1 100644 --- a/packages/web/src/routes/docs/contributors/committing/+page.md +++ b/packages/web/src/routes/docs/contributors/committing/+page.md @@ -7,7 +7,7 @@ Before creating a pull request, please make sure all your commits follow the lin Additionally, to minimize the labor required to review your commit, we run a relatively strict suite of formatting and linting programs. We highly recommend that you run both `just format` and `just precommit` before submitting a pull request. -If those scripts don't work in your environment, we run `just precommit` through GitHub actions inside of pull requests, so you may make modifications and push until the checks pass. +If those scripts don't work in your environment, we run `just precommit` through GitHub Actions inside of pull requests, so you may make modifications and push until the checks pass. If this sounds intimidating, don't worry. We are entirely willing to work with you to make sure your code can make it into Harper, just know it might take a little longer. diff --git a/packages/web/src/routes/docs/contributors/environment/+page.md b/packages/web/src/routes/docs/contributors/environment/+page.md index d87cd8c8..035bfc78 100644 --- a/packages/web/src/routes/docs/contributors/environment/+page.md +++ b/packages/web/src/routes/docs/contributors/environment/+page.md @@ -15,10 +15,11 @@ To use the tooling required to build and debug Harper, you'll need to the follow - `pandoc` We develop a set of tools, accessible via `just`, to build and debug Harper's algorithm (otherwise known as `harper-core`) and its various integrations. -To get see all the tools in your toolbox run: +The source code is in the `justfile` [at the root of the repository](https://github.com/Automattic/harper/blob/master/justfile). +To see all the tools in the toolbox, run: ```bash just --list ``` -Before getting started, we highly recommend that you run `just setup` to populate your build caches and download all dependencies. +Before making any modifications, we highly recommend that you run `just setup` to populate your build caches and download all dependencies. diff --git a/packages/web/src/routes/docs/integrations/language-server/+page.md b/packages/web/src/routes/docs/integrations/language-server/+page.md index 0f44885f..843be18f 100644 --- a/packages/web/src/routes/docs/integrations/language-server/+page.md +++ b/packages/web/src/routes/docs/integrations/language-server/+page.md @@ -30,6 +30,14 @@ You can install Harper on Windows through [Scoop](https://scoop.sh/). scoop install harper ``` +### Homebrew + +You may install Harper through [Homebrew](https://brew.sh). + +```bash +brew install harper +``` + ## Dictionaries `harper-ls` has three kinds of dictionaries: user, file-local, and static dictionaries. diff --git a/packages/web/src/routes/docs/integrations/neovim/+page.md b/packages/web/src/routes/docs/integrations/neovim/+page.md index a1610937..010aac9e 100644 --- a/packages/web/src/routes/docs/integrations/neovim/+page.md +++ b/packages/web/src/routes/docs/integrations/neovim/+page.md @@ -41,6 +41,14 @@ You can install Harper on Windows through [Scoop](https://scoop.sh/). scoop install harper ``` +### Homebrew + +You may install Harper through [Homebrew](https://brew.sh). + +```bash +brew install harper +``` + ## Configuration Neovim is also one of the two primarily supported editors for `harper-ls`. @@ -49,7 +57,7 @@ As such, you can view this page as canonical documentation for the available con ### Markdown-Specific Config -The Markdown parser has it's own configuration option, used to modify its behavior in specific ways. +The Markdown parser has its own configuration option, used to modify its behavior in specific ways. For example, the title of a link is linted by default, but this behavior can be changed through the `ignore_link_title` key: ```lua