From ed095256c6404bcf3524f95adba4a86550ff7f2b Mon Sep 17 00:00:00 2001 From: Grant Lemons Date: Thu, 16 Jan 2025 07:41:00 -0700 Subject: [PATCH 01/13] feat(#393): add version and about snippet to clap args --- harper-cli/src/main.rs | 2 ++ harper-ls/src/main.rs | 1 + 2 files changed, 3 insertions(+) diff --git a/harper-cli/src/main.rs b/harper-cli/src/main.rs index b1508df3..d306b021 100644 --- a/harper-cli/src/main.rs +++ b/harper-cli/src/main.rs @@ -11,7 +11,9 @@ use harper_core::parsers::{Markdown, MarkdownOptions}; use harper_core::{remove_overlaps, Dictionary, Document, FstDictionary, TokenKind}; use harper_literate_haskell::LiterateHaskellParser; +/// A debugging tool for the Harper grammar checker. #[derive(Debug, Parser)] +#[command(version, about)] enum Args { /// Lint a provided document. Lint { diff --git a/harper-ls/src/main.rs b/harper-ls/src/main.rs index 01d42e8f..1f20b1e5 100644 --- a/harper-ls/src/main.rs +++ b/harper-ls/src/main.rs @@ -25,6 +25,7 @@ static DEFAULT_ADDRESS: &str = "127.0.0.1:4000"; /// /// Will listen on 127.0.0.1:4000 by default. #[derive(Debug, Parser)] +#[command(version, about)] struct Args { /// Set to listen on standard input / output rather than TCP. #[arg(short, long, default_value_t = false)] From 93166f3dde8400c5ccd3635bc0073a24e968f168 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 16 Jan 2025 08:25:51 -0700 Subject: [PATCH 02/13] docs: added Homebrew as an installation method --- .../src/routes/docs/integrations/language-server/+page.md | 8 ++++++++ packages/web/src/routes/docs/integrations/neovim/+page.md | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/packages/web/src/routes/docs/integrations/language-server/+page.md b/packages/web/src/routes/docs/integrations/language-server/+page.md index 0f44885f..843be18f 100644 --- a/packages/web/src/routes/docs/integrations/language-server/+page.md +++ b/packages/web/src/routes/docs/integrations/language-server/+page.md @@ -30,6 +30,14 @@ You can install Harper on Windows through [Scoop](https://scoop.sh/). scoop install harper ``` +### Homebrew + +You may install Harper through [Homebrew](https://brew.sh). + +```bash +brew install harper +``` + ## Dictionaries `harper-ls` has three kinds of dictionaries: user, file-local, and static dictionaries. diff --git a/packages/web/src/routes/docs/integrations/neovim/+page.md b/packages/web/src/routes/docs/integrations/neovim/+page.md index a1610937..61cbd8a2 100644 --- a/packages/web/src/routes/docs/integrations/neovim/+page.md +++ b/packages/web/src/routes/docs/integrations/neovim/+page.md @@ -41,6 +41,14 @@ You can install Harper on Windows through [Scoop](https://scoop.sh/). scoop install harper ``` +### Homebrew + +You may install Harper through [Homebrew](https://brew.sh). + +```bash +brew install harper +``` + ## Configuration Neovim is also one of the two primarily supported editors for `harper-ls`. From ab126d6ca17d668bf384836976c44aa1bea5738f Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 16 Jan 2025 08:40:08 -0700 Subject: [PATCH 03/13] build(core): run non-deterministic tests for longer in CI --- .github/workflows/precommit.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml index 21fee626..7d451131 100644 --- a/.github/workflows/precommit.yml +++ b/.github/workflows/precommit.yml @@ -8,6 +8,8 @@ on: env: CARGO_TERM_COLOR: always + # Run for 100 times the default + QUICKCHECK_TESTS: 10000 jobs: precommit: From bd4294128088722ba4a7a38b958e90bf437eee02 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 16 Jan 2025 08:57:35 -0700 Subject: [PATCH 04/13] fix(core): long prepositions should be uppercase --- harper-core/src/title_case.rs | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/harper-core/src/title_case.rs b/harper-core/src/title_case.rs index 7485017c..3dd26269 100644 --- a/harper-core/src/title_case.rs +++ b/harper-core/src/title_case.rs @@ -30,7 +30,7 @@ pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary) let start_index = toks.first().unwrap().span.start; - let mut words = toks.iter_word_likes().enumerate().peekable(); + let mut word_likes = toks.iter_word_likes().enumerate().peekable(); let mut output = toks.span().unwrap().get_content(source).to_vec(); // Only specific conjunctions are not capitalized. @@ -41,7 +41,7 @@ pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary) .collect(); } - while let Some((index, word)) = words.next() { + while let Some((index, word)) = word_likes.next() { if !word.kind.is_word() { continue; } @@ -55,11 +55,13 @@ pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary) .unwrap() .or(&dict.get_word_metadata(&chars_lower)); - let should_capitalize = !metadata.preposition + let is_short_preposition = metadata.preposition && word.span.len() <= 4; + + let should_capitalize = (!is_short_preposition && !metadata.article - && !SPECIAL_CONJUNCTIONS.contains(chars_lower.as_slice()) + && !SPECIAL_CONJUNCTIONS.contains(chars_lower.as_slice())) || index == 0 - || words.peek().is_none(); + || word_likes.peek().is_none(); if should_capitalize { output[word.span.start - start_index] = @@ -120,6 +122,15 @@ mod tests { ) } + /// Check that "about" remains uppercase + #[test] + fn about_uppercase_with_numbers() { + assert_eq!( + make_title_case_str("0 about 0", &PlainEnglish, &FstDictionary::curated()), + "0 About 0" + ) + } + #[derive(Debug, Clone)] struct Word(String); From fb4d6b50e9e5dadb67f9a055e99c10a4c0d24973 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 16 Jan 2025 08:57:54 -0700 Subject: [PATCH 05/13] feat: added `just` command for fuzzing --- justfile | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/justfile b/justfile index 96d71168..54f16bfc 100644 --- a/justfile +++ b/justfile @@ -268,3 +268,15 @@ bump-versions: just format lazygit + +# Enter an infinite loop of testing until a bug is found. +fuzz mod: + #!/usr/bin/bash + + while true + do + QUICKCHECK_TESTS=1000 cargo test {{mod}} + if [[ x$? != x0 ]] ; then + exit $? + fi + done From e8db87ef4bc6af4c70faf0d42909c27ff74e480a Mon Sep 17 00:00:00 2001 From: hippietrail Date: Fri, 17 Jan 2025 00:31:32 +0800 Subject: [PATCH 06/13] typos/spelling/grammar fixes in comments and docs --- harper-comments/src/comment_parsers/jsdoc.rs | 2 +- .../src/linting/pronoun_contraction/avoid_contraction.rs | 4 ++-- harper-core/src/linting/spelled_numbers.rs | 2 +- harper-core/src/parsers/mask.rs | 2 +- harper-core/src/token_kind.rs | 2 +- harper-ls/src/config.rs | 2 +- packages/harper.js/src/loadWasm.ts | 2 +- packages/web/src/routes/+page.svelte | 2 +- .../web/src/routes/docs/contributors/architecture/+page.md | 2 +- packages/web/src/routes/docs/integrations/neovim/+page.md | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) diff --git a/harper-comments/src/comment_parsers/jsdoc.rs b/harper-comments/src/comment_parsers/jsdoc.rs index 73dc9eed..663f571f 100644 --- a/harper-comments/src/comment_parsers/jsdoc.rs +++ b/harper-comments/src/comment_parsers/jsdoc.rs @@ -121,7 +121,7 @@ pub(super) fn mark_inline_tags(tokens: &mut [Token]) { } } -/// Checks if the provided token slice begins with an inline tag, returning it's +/// Checks if the provided token slice begins with an inline tag, returning its /// end if so. fn parse_inline_tag(tokens: &[Token]) -> Option { if !matches!( diff --git a/harper-core/src/linting/pronoun_contraction/avoid_contraction.rs b/harper-core/src/linting/pronoun_contraction/avoid_contraction.rs index 786c5f0d..d80c973c 100644 --- a/harper-core/src/linting/pronoun_contraction/avoid_contraction.rs +++ b/harper-core/src/linting/pronoun_contraction/avoid_contraction.rs @@ -34,13 +34,13 @@ impl PatternLinter for AvoidContraction { vec!['y', 'o', 'u', 'r'], word, )], - message: "I appears you intended to use the possessive version of this word".to_owned(), + message: "It appears you intended to use the possessive version of this word".to_owned(), priority: 63, } } fn description(&self) -> &'static str { - "This rule looks for situations where a contraction was used where it shouldn't." + "This rule looks for situations where a contraction was used where it shouldn't have been." } } diff --git a/harper-core/src/linting/spelled_numbers.rs b/harper-core/src/linting/spelled_numbers.rs index 8824d78f..936f2e41 100644 --- a/harper-core/src/linting/spelled_numbers.rs +++ b/harper-core/src/linting/spelled_numbers.rs @@ -35,7 +35,7 @@ impl Linter for SpelledNumbers { } } -/// Converts a number to it's spelled-out variant. +/// Converts a number to its spelled-out variant. /// /// For example: 100 -> one hundred. /// diff --git a/harper-core/src/parsers/mask.rs b/harper-core/src/parsers/mask.rs index 92282794..d74393d6 100644 --- a/harper-core/src/parsers/mask.rs +++ b/harper-core/src/parsers/mask.rs @@ -35,7 +35,7 @@ where let mut last_allowed: Option = None; for (span, content) in mask.iter_allowed(source) { - // Check if there was a line break between the last chunk. + // Check for a line break separating the current chunk from the preceding one. if let Some(last_allowed) = last_allowed { let intervening = Span::new(last_allowed.end, span.start); diff --git a/harper-core/src/token_kind.rs b/harper-core/src/token_kind.rs index 93df3a30..25aecb49 100644 --- a/harper-core/src/token_kind.rs +++ b/harper-core/src/token_kind.rs @@ -155,7 +155,7 @@ impl TokenKind { self.with_default_data() == other.with_default_data() } - /// Produces a copy of `self` with any inner data replaced with it's default + /// Produces a copy of `self` with any inner data replaced with its default /// value. Useful for making comparisons on just the variant of the /// enum. pub fn with_default_data(&self) -> Self { diff --git a/harper-ls/src/config.rs b/harper-ls/src/config.rs index aa21cb11..79c9601c 100644 --- a/harper-ls/src/config.rs +++ b/harper-ls/src/config.rs @@ -35,7 +35,7 @@ impl DiagnosticSeverity { #[derive(Debug, Clone, Default)] pub struct CodeActionConfig { /// Instructs `harper-ls` to place unstable code actions last. - /// In this case, "unstable" refers their existence and action. + /// In this case, "unstable" refers to their existence and action. /// /// For example, we always want to allow users to add "misspelled" elements /// to dictionary, regardless of the spelling suggestions. diff --git a/packages/harper.js/src/loadWasm.ts b/packages/harper.js/src/loadWasm.ts index 2459bb35..985cbced 100644 --- a/packages/harper.js/src/loadWasm.ts +++ b/packages/harper.js/src/loadWasm.ts @@ -14,7 +14,7 @@ export function setWasmUri(uri: string) { curWasmUri = uri; } -/** Load the WebAssembly manually and dynamically, making sure to setup infrastructure. +/** Load the WebAssembly manually and dynamically, making sure to set up infrastructure. * You can use an optional data URL for the WebAssembly file if the module is being loaded from a Web Worker. * */ export default async function loadWasm() { diff --git a/packages/web/src/routes/+page.svelte b/packages/web/src/routes/+page.svelte index f3bfbad3..c793ac44 100644 --- a/packages/web/src/routes/+page.svelte +++ b/packages/web/src/routes/+page.svelte @@ -103,7 +103,7 @@
Wicked Fast Since Harper runs on your devices, its able to serve up suggestions in under + >Since Harper runs on your devices, it's able to serve up suggestions in under 10 milliseconds.

diff --git a/packages/web/src/routes/docs/contributors/architecture/+page.md b/packages/web/src/routes/docs/contributors/architecture/+page.md index f8a00915..edd7e015 100644 --- a/packages/web/src/routes/docs/contributors/architecture/+page.md +++ b/packages/web/src/routes/docs/contributors/architecture/+page.md @@ -13,7 +13,7 @@ Hopefully, we can reduce that 10x down to something a little more reasonable. Harper tries to do one thing well: find grammatical and spelling errors in English text. If possible, provide suggestions to correct those errors. -An error and it's possible corrections together form what we call a lint. +An error and its possible corrections together form what we call a lint. In this vein, Harper serves the role of a [Linter]() for English. diff --git a/packages/web/src/routes/docs/integrations/neovim/+page.md b/packages/web/src/routes/docs/integrations/neovim/+page.md index a1610937..e970568c 100644 --- a/packages/web/src/routes/docs/integrations/neovim/+page.md +++ b/packages/web/src/routes/docs/integrations/neovim/+page.md @@ -49,7 +49,7 @@ As such, you can view this page as canonical documentation for the available con ### Markdown-Specific Config -The Markdown parser has it's own configuration option, used to modify its behavior in specific ways. +The Markdown parser has its own configuration option, used to modify its behavior in specific ways. For example, the title of a link is linted by default, but this behavior can be changed through the `ignore_link_title` key: ```lua From f90ea1d8ee6aab0a590f94ecf2c4bc3af51e45bc Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 16 Jan 2025 09:45:22 -0700 Subject: [PATCH 07/13] test: setup GitHub Actions workflow to test frequently --- .github/workflows/fuzz.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .github/workflows/fuzz.yml diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml new file mode 100644 index 00000000..6fd6659a --- /dev/null +++ b/.github/workflows/fuzz.yml @@ -0,0 +1,19 @@ +name: Fuzz + +on: + schedule: + - cron: "*/30 * * * *" + +env: + CARGO_TERM_COLOR: always + # Run for 100 times the default + QUICKCHECK_TESTS: 10000 + +jobs: + precommit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: extractions/setup-just@v2 + - name: Test + run: cargo test From e0e90f049a0ba8990c9917597632a9beed3eb005 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 16 Jan 2025 09:52:27 -0700 Subject: [PATCH 08/13] chore: ran `just format` --- .../src/linting/pronoun_contraction/avoid_contraction.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/harper-core/src/linting/pronoun_contraction/avoid_contraction.rs b/harper-core/src/linting/pronoun_contraction/avoid_contraction.rs index d80c973c..546c6f74 100644 --- a/harper-core/src/linting/pronoun_contraction/avoid_contraction.rs +++ b/harper-core/src/linting/pronoun_contraction/avoid_contraction.rs @@ -34,7 +34,8 @@ impl PatternLinter for AvoidContraction { vec!['y', 'o', 'u', 'r'], word, )], - message: "It appears you intended to use the possessive version of this word".to_owned(), + message: "It appears you intended to use the possessive version of this word" + .to_owned(), priority: 63, } } From 4901fc38dea39194333ef1c0709b2667866f0817 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 16 Jan 2025 09:57:05 -0700 Subject: [PATCH 09/13] docs: clarified some details about build tooling --- .../web/src/routes/docs/contributors/committing/+page.md | 2 +- .../web/src/routes/docs/contributors/environment/+page.md | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/web/src/routes/docs/contributors/committing/+page.md b/packages/web/src/routes/docs/contributors/committing/+page.md index 85417bf7..2df43be1 100644 --- a/packages/web/src/routes/docs/contributors/committing/+page.md +++ b/packages/web/src/routes/docs/contributors/committing/+page.md @@ -7,7 +7,7 @@ Before creating a pull request, please make sure all your commits follow the lin Additionally, to minimize the labor required to review your commit, we run a relatively strict suite of formatting and linting programs. We highly recommend that you run both `just format` and `just precommit` before submitting a pull request. -If those scripts don't work in your environment, we run `just precommit` through GitHub actions inside of pull requests, so you may make modifications and push until the checks pass. +If those scripts don't work in your environment, we run `just precommit` through GitHub Actions inside of pull requests, so you may make modifications and push until the checks pass. If this sounds intimidating, don't worry. We are entirely willing to work with you to make sure your code can make it into Harper, just know it might take a little longer. diff --git a/packages/web/src/routes/docs/contributors/environment/+page.md b/packages/web/src/routes/docs/contributors/environment/+page.md index d87cd8c8..035bfc78 100644 --- a/packages/web/src/routes/docs/contributors/environment/+page.md +++ b/packages/web/src/routes/docs/contributors/environment/+page.md @@ -15,10 +15,11 @@ To use the tooling required to build and debug Harper, you'll need to the follow - `pandoc` We develop a set of tools, accessible via `just`, to build and debug Harper's algorithm (otherwise known as `harper-core`) and its various integrations. -To get see all the tools in your toolbox run: +The source code is in the `justfile` [at the root of the repository](https://github.com/Automattic/harper/blob/master/justfile). +To see all the tools in the toolbox, run: ```bash just --list ``` -Before getting started, we highly recommend that you run `just setup` to populate your build caches and download all dependencies. +Before making any modifications, we highly recommend that you run `just setup` to populate your build caches and download all dependencies. From 51b8c13d82c7ac7c7505ed4bfbce2e6a7e39ab45 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 16 Jan 2025 10:32:31 -0700 Subject: [PATCH 10/13] chore: temporarily change branch and schedule --- .github/workflows/fuzz.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index 6fd6659a..22153da0 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -2,7 +2,7 @@ name: Fuzz on: schedule: - - cron: "*/30 * * * *" + - cron: "*/10 * * * *" env: CARGO_TERM_COLOR: always @@ -14,6 +14,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + ref: harden-title-case - uses: extractions/setup-just@v2 - name: Test run: cargo test From 0d2b7442c3a9d923558cccb9697df52103951d03 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 16 Jan 2025 12:25:57 -0700 Subject: [PATCH 11/13] fix(core): email and hostname lexers were too permissive --- harper-core/src/lexing/email_address.rs | 16 ++++++++++++++++ harper-core/src/lexing/hostname.rs | 13 +++++++++++++ 2 files changed, 29 insertions(+) diff --git a/harper-core/src/lexing/email_address.rs b/harper-core/src/lexing/email_address.rs index df2eb20a..92038116 100644 --- a/harper-core/src/lexing/email_address.rs +++ b/harper-core/src/lexing/email_address.rs @@ -16,6 +16,10 @@ pub fn lex_email_address(source: &[char]) -> Option { let domain_part_len = lex_hostname(&source[at_loc + 1..])?; + if domain_part_len == 0 { + return None; + } + Some(FoundToken { next_index: at_loc + 1 + domain_part_len, token: TokenKind::EmailAddress, @@ -155,6 +159,18 @@ mod tests { } } + #[test] + fn does_not_allow_empty_domain() { + for local in example_local_parts() { + // Generate invalid email address + let mut address = local.clone(); + address.push('@'); + address.push(' '); + + assert!(lex_email_address(&address).is_none()); + } + } + /// Tests that the email parser will not throw a panic under some random /// situations. #[test] diff --git a/harper-core/src/lexing/hostname.rs b/harper-core/src/lexing/hostname.rs index 5534fbe5..961955da 100644 --- a/harper-core/src/lexing/hostname.rs +++ b/harper-core/src/lexing/hostname.rs @@ -28,6 +28,13 @@ pub fn lex_hostname_token(source: &[char]) -> Option { pub fn lex_hostname(source: &[char]) -> Option { let mut passed_chars = 0; + // The beginning has different requirements from the rest of the hostname. + let first = source.first()?; + + if !matches!(first, 'A'..='Z' | 'a'..='z' | '0'..='9' ) { + return None; + } + for label in source.split(|c| *c == '.') { for c in label { passed_chars += 1; @@ -78,4 +85,10 @@ pub mod tests { assert_eq!(lex_hostname(&domain), Some(domain.len())); } } + + #[test] + fn hyphen_cannot_open_hostname() { + let host: Vec<_> = "-something.com".chars().collect(); + assert!(lex_hostname(&host).is_none()) + } } From 4167300e41b3b2d1efad41c8ae3d3dfb05307ed4 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 16 Jan 2025 12:26:34 -0700 Subject: [PATCH 12/13] fix(core): out of bounds errors --- harper-core/src/parsers/markdown.rs | 35 +++++++- harper-core/src/title_case.rs | 128 ++++++++++++---------------- harper-core/src/token.rs | 11 ++- justfile | 4 +- 4 files changed, 101 insertions(+), 77 deletions(-) diff --git a/harper-core/src/parsers/markdown.rs b/harper-core/src/parsers/markdown.rs index ea21e9c4..16e40cb1 100644 --- a/harper-core/src/parsers/markdown.rs +++ b/harper-core/src/parsers/markdown.rs @@ -37,7 +37,7 @@ impl Markdown { /// Remove hidden Wikilink target text. /// - /// As in, the stuff to the left of the pipe operator: + /// As in the stuff to the left of the pipe operator: /// /// ```markdown /// [[Target text|Display Text]] @@ -46,6 +46,10 @@ impl Markdown { let mut to_remove = VecDeque::new(); for pipe_idx in tokens.iter_pipe_indices() { + if pipe_idx < 2 { + continue; + } + // Locate preceding `[[` let mut cursor = pipe_idx - 2; let mut open_bracket = None; @@ -351,6 +355,35 @@ mod tests { )) } + #[test] + fn just_pipe() { + let source = r"|"; + + let tokens = Markdown::default().parse_str(source); + + let token_kinds = tokens.iter().map(|t| t.kind).collect::>(); + + dbg!(&token_kinds); + + assert!(matches!( + token_kinds.as_slice(), + &[TokenKind::Punctuation(Punctuation::Pipe)] + )) + } + + #[test] + fn empty_wikilink_text() { + let source = r"[[|]]"; + + let tokens = Markdown::default().parse_str(source); + + let token_kinds = tokens.iter().map(|t| t.kind).collect::>(); + + dbg!(&token_kinds); + + assert!(matches!(token_kinds.as_slice(), &[])) + } + #[test] fn improper_wikilink_text() { let source = r"this is shown|this is also shown]]"; diff --git a/harper-core/src/title_case.rs b/harper-core/src/title_case.rs index 3dd26269..c215343e 100644 --- a/harper-core/src/title_case.rs +++ b/harper-core/src/title_case.rs @@ -1,5 +1,6 @@ use crate::Lrc; use crate::Token; +use crate::TokenKind; use hashbrown::HashSet; use lazy_static::lazy_static; @@ -33,33 +34,8 @@ pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary) let mut word_likes = toks.iter_word_likes().enumerate().peekable(); let mut output = toks.span().unwrap().get_content(source).to_vec(); - // Only specific conjunctions are not capitalized. - lazy_static! { - static ref SPECIAL_CONJUNCTIONS: HashSet> = ["and", "but", "for", "or", "nor"] - .iter() - .map(|v| v.chars().collect()) - .collect(); - } - while let Some((index, word)) = word_likes.next() { - if !word.kind.is_word() { - continue; - } - - let chars = word.span.get_content(source); - let chars_lower = chars.to_lower(); - - let metadata = word - .kind - .as_word() - .unwrap() - .or(&dict.get_word_metadata(&chars_lower)); - - let is_short_preposition = metadata.preposition && word.span.len() <= 4; - - let should_capitalize = (!is_short_preposition - && !metadata.article - && !SPECIAL_CONJUNCTIONS.contains(chars_lower.as_slice())) + let should_capitalize = should_capitalize_token(&word, source, dict) || index == 0 || word_likes.peek().is_none(); @@ -82,10 +58,39 @@ pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary) output } +/// Determines whether a token should be capitalized. +/// Is not responsible for capitalization requirements that are dependent on token position. +fn should_capitalize_token(tok: &Token, source: &[char], dict: &impl Dictionary) -> bool { + match tok.kind { + TokenKind::Word(mut metadata) => { + // Only specific conjunctions are not capitalized. + lazy_static! { + static ref SPECIAL_CONJUNCTIONS: HashSet> = + ["and", "but", "for", "or", "nor"] + .iter() + .map(|v| v.chars().collect()) + .collect(); + } + + let chars = tok.span.get_content(source); + let chars_lower = chars.to_lower(); + + metadata = metadata.or(&dict.get_word_metadata(&chars_lower)); + + let is_short_preposition = metadata.preposition && tok.span.len() <= 4; + + !is_short_preposition + && !metadata.article + && !SPECIAL_CONJUNCTIONS.contains(chars_lower.as_slice()) + } + _ => true, + } +} + #[cfg(test)] mod tests { - use quickcheck::{Arbitrary, TestResult}; + use quickcheck::TestResult; use quickcheck_macros::quickcheck; use super::make_title_case_str; @@ -131,49 +136,28 @@ mod tests { ) } - #[derive(Debug, Clone)] - struct Word(String); - - impl Arbitrary for Word { - fn arbitrary(g: &mut quickcheck::Gen) -> Self { - let mut s = String::new(); - - for _ in 0..g.size() { - let c = loop { - let gen = char::arbitrary(g); - - if gen.is_ascii_alphanumeric() { - break gen; - } - }; - - s.push(c); - } - - Self(s) - } + #[test] + fn pipe_does_not_cause_crash() { + assert_eq!( + make_title_case_str("|", &Markdown::default(), &FstDictionary::curated()), + "|" + ) } - #[derive(Debug, Clone)] - struct Sentence(String); - - /// Builds a sentence out of words from the curated [`FullDictionary`]. - impl Arbitrary for Sentence { - fn arbitrary(g: &mut quickcheck::Gen) -> Self { - let mut s = String::new(); - - let Word(first_word) = Word::arbitrary(g); - s.push_str(&first_word); - - for _ in 0..g.size() { - let Word(word) = Word::arbitrary(g); - - s.push(' '); - s.push_str(&word); - } + #[test] + fn a_paragraph_does_not_cause_crash() { + assert_eq!( + make_title_case_str("A\n", &Markdown::default(), &FstDictionary::curated()), + "A" + ) + } - Self(s) - } + #[test] + fn tab_a_becomes_upcase() { + assert_eq!( + make_title_case_str("\ta", &PlainEnglish, &FstDictionary::curated()), + "\tA" + ) } #[quickcheck] @@ -195,8 +179,6 @@ mod tests { .chars() .collect(); - dbg!(&title_case); - TestResult::from_bool(title_case[prefix.chars().count() + 1] == 'a') } @@ -223,15 +205,15 @@ mod tests { } #[quickcheck] - fn first_word_is_upcase(sentence: Sentence) -> TestResult { + fn first_word_is_upcase(text: String) -> TestResult { let title_case: Vec<_> = - make_title_case_str(&sentence.0, &Markdown::default(), &FstDictionary::curated()) + make_title_case_str(&text, &PlainEnglish, &FstDictionary::curated()) .chars() .collect(); if let Some(first) = title_case.first() { - if first.is_alphabetic() { - TestResult::from_bool(first.is_uppercase()) + if first.is_ascii_alphabetic() { + TestResult::from_bool(first.is_ascii_uppercase()) } else { TestResult::discard() } diff --git a/harper-core/src/token.rs b/harper-core/src/token.rs index 06ab11ce..f33c4a2b 100644 --- a/harper-core/src/token.rs +++ b/harper-core/src/token.rs @@ -156,7 +156,16 @@ impl TokenStringExt for [Token] { } fn span(&self) -> Option { - Some(Span::new(self.first()?.span.start, self.last()?.span.end)) + let min_max = self + .iter() + .flat_map(|v| [v.span.start, v.span.end].into_iter()) + .minmax(); + + match min_max { + itertools::MinMaxResult::NoElements => None, + itertools::MinMaxResult::OneElement(min) => Some(Span::new(min, min)), + itertools::MinMaxResult::MinMax(min, max) => Some(Span::new(min, max)), + } } fn iter_linking_verb_indices(&self) -> impl Iterator + '_ { diff --git a/justfile b/justfile index 54f16bfc..20f056b4 100644 --- a/justfile +++ b/justfile @@ -270,12 +270,12 @@ bump-versions: lazygit # Enter an infinite loop of testing until a bug is found. -fuzz mod: +fuzz: #!/usr/bin/bash while true do - QUICKCHECK_TESTS=1000 cargo test {{mod}} + QUICKCHECK_TESTS=100000 cargo test if [[ x$? != x0 ]] ; then exit $? fi From 083ce6776de89741d326c5ebafe92644e1390bf5 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Fri, 17 Jan 2025 14:14:31 -0700 Subject: [PATCH 13/13] fix(core): indexing problems --- harper-core/dictionary.dict | 1 + harper-core/src/patterns/is_not_title_case.rs | 3 +-- harper-core/src/title_case.rs | 2 +- harper-core/tests/run_tests.rs | 3 +++ .../tests/test_sources/lukas_homework.md | 23 +++++++++++++++++++ 5 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 harper-core/tests/test_sources/lukas_homework.md diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict index ef48d431..8ad5f5f5 100644 --- a/harper-core/dictionary.dict +++ b/harper-core/dictionary.dict @@ -49771,3 +49771,4 @@ Harper/SM a8c/SM a11n/1 a12s/9 +intergenerational diff --git a/harper-core/src/patterns/is_not_title_case.rs b/harper-core/src/patterns/is_not_title_case.rs index 0c400a4a..ecad0b8e 100644 --- a/harper-core/src/patterns/is_not_title_case.rs +++ b/harper-core/src/patterns/is_not_title_case.rs @@ -24,8 +24,7 @@ impl Pattern for IsNotTitleCase { } let matched_chars = tokens[0..inner_match].span().unwrap().get_content(source); - - if make_title_case(tokens, source, &self.dict) != matched_chars { + if make_title_case(&tokens[0..inner_match], source, &self.dict) != matched_chars { inner_match } else { 0 diff --git a/harper-core/src/title_case.rs b/harper-core/src/title_case.rs index c215343e..5cdec274 100644 --- a/harper-core/src/title_case.rs +++ b/harper-core/src/title_case.rs @@ -50,7 +50,7 @@ pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary) } else { // The whole word should be lowercase. for i in word.span { - output[i - start_index] = output[i].to_ascii_lowercase(); + output[i - start_index] = output[i - start_index].to_ascii_lowercase(); } } } diff --git a/harper-core/tests/run_tests.rs b/harper-core/tests/run_tests.rs index 8c52d95a..39f1e511 100644 --- a/harper-core/tests/run_tests.rs +++ b/harper-core/tests/run_tests.rs @@ -48,3 +48,6 @@ create_test!(amazon_hostname.md, 0); create_test!(issue_159.md, 1); create_test!(issue_358.md, 0); create_test!(issue_195.md, 0); + +// Make sure it doesn't panic +create_test!(lukas_homework.md, 3); diff --git a/harper-core/tests/test_sources/lukas_homework.md b/harper-core/tests/test_sources/lukas_homework.md new file mode 100644 index 00000000..daf534f9 --- /dev/null +++ b/harper-core/tests/test_sources/lukas_homework.md @@ -0,0 +1,23 @@ +# Native American Assimilation and Activism Week Two Reflection + +> This is the first in a bi-weekly series that I will be publishing for my Native +American Assimilation and Activism class. Every two weeks we make posts +sharing what we learned in the class. Unfortunately, due to weather in England I +was unable to make it back to the United States in time for the first lecture. + +One of the key discussions in Monday's lecture/discussion was since time +immemorial and teaching around that. Time Immemorium is a period before +human memory, and involved other human species, travel stories, and creation +stories. Some of the key lessons I learned from that class were: + +* Humans branched from some common ancestor that had multiple other human species branch off + * We are not evolved from chimps but also have a shared ancestor +* Many genetic evolutional specializations have to do with environmental adaptation + * Some of these adaptations were shared when isolated groups had visitors (Weaving rivers theory) + * Also related: Intergenerational Trauma + +Oregon and Washington have been leading the country when it comes to +integrating Native Americans into their school curriculum. This includes +adding Since Time Immemorium curriculum. These advances have the +possibility to significantly improve the awareness and appreciation of Native +American Peoples who have and still live in these lands.