|
| 1 | +use super::Suggestion; |
| 2 | +use super::{Lint, LintKind, Linter}; |
| 3 | +use crate::document::Document; |
| 4 | +use crate::{Token, TokenStringExt}; |
| 5 | + |
| 6 | +/// A linter that checks for words that are capitalized but shouldn't be |
| 7 | +/// (i.e., not at the start of a sentence/heading and not proper nouns). |
| 8 | +pub struct SentenceCasing; |
| 9 | + |
| 10 | +impl Default for SentenceCasing { |
| 11 | + fn default() -> Self { |
| 12 | + Self |
| 13 | + } |
| 14 | +} |
| 15 | + |
| 16 | +impl SentenceCasing { |
| 17 | + /// Check a sequence of tokens for incorrect capitalization. |
| 18 | + /// `first_word_idx` is the index of the first word that should be capitalized. |
| 19 | + fn check_tokens(&self, tokens: &[Token], document: &Document, lints: &mut Vec<Lint>) { |
| 20 | + // Get the index of the first word in the sequence |
| 21 | + let first_word_idx = tokens.iter().position(|t| t.kind.is_word()); |
| 22 | + |
| 23 | + let Some(first_word_idx) = first_word_idx else { |
| 24 | + return; |
| 25 | + }; |
| 26 | + |
| 27 | + // Check all words after the first one |
| 28 | + for (idx, token) in tokens.iter().enumerate() { |
| 29 | + // Skip the first word (it should be capitalized) |
| 30 | + if idx <= first_word_idx { |
| 31 | + continue; |
| 32 | + } |
| 33 | + |
| 34 | + // Only check actual words |
| 35 | + if !token.kind.is_word() { |
| 36 | + continue; |
| 37 | + } |
| 38 | + |
| 39 | + // Check if the word is capitalized |
| 40 | + let word_chars = document.get_span_content(&token.span); |
| 41 | + let Some(first_char) = word_chars.first() else { |
| 42 | + continue; |
| 43 | + }; |
| 44 | + |
| 45 | + // Skip if not capitalized |
| 46 | + if !first_char.is_uppercase() { |
| 47 | + continue; |
| 48 | + } |
| 49 | + |
| 50 | + // Skip proper nouns - these should be capitalized |
| 51 | + if token.kind.is_proper_noun() { |
| 52 | + continue; |
| 53 | + } |
| 54 | + |
| 55 | + // Skip words that are all uppercase (likely acronyms/initialisms) |
| 56 | + if word_chars |
| 57 | + .iter() |
| 58 | + .all(|c| !c.is_alphabetic() || c.is_uppercase()) |
| 59 | + { |
| 60 | + continue; |
| 61 | + } |
| 62 | + |
| 63 | + // Skip words after a colon (might be starting a new clause) |
| 64 | + if let Some(prev_non_ws) = tokens[..idx].iter().rev().find(|t| !t.kind.is_whitespace()) |
| 65 | + && prev_non_ws.kind.is_punctuation() |
| 66 | + { |
| 67 | + let prev_chars = document.get_span_content(&prev_non_ws.span); |
| 68 | + if prev_chars == [':'] { |
| 69 | + continue; |
| 70 | + } |
| 71 | + } |
| 72 | + |
| 73 | + // Skip single-letter capitalizations (often used for proper context like "Plan A") |
| 74 | + if word_chars.len() == 1 { |
| 75 | + continue; |
| 76 | + } |
| 77 | + |
| 78 | + // Skip words after opening quotes (might be a quoted sentence start) |
| 79 | + if let Some(prev_non_ws) = tokens[..idx].iter().rev().find(|t| !t.kind.is_whitespace()) |
| 80 | + && prev_non_ws.kind.is_quote() |
| 81 | + { |
| 82 | + continue; |
| 83 | + } |
| 84 | + |
| 85 | + // Check if this word follows a sentence terminator within the same sequence |
| 86 | + // (This handles cases where parsing might not have split sentences correctly) |
| 87 | + let has_terminator_before = tokens[first_word_idx + 1..idx] |
| 88 | + .iter() |
| 89 | + .any(|t| t.kind.is_sentence_terminator()); |
| 90 | + |
| 91 | + if has_terminator_before { |
| 92 | + continue; |
| 93 | + } |
| 94 | + |
| 95 | + // Create the lowercase suggestion |
| 96 | + let mut replacement_chars = word_chars.to_vec(); |
| 97 | + replacement_chars[0] = replacement_chars[0].to_ascii_lowercase(); |
| 98 | + |
| 99 | + lints.push(Lint { |
| 100 | + span: token.span, |
| 101 | + lint_kind: LintKind::Capitalization, |
| 102 | + suggestions: vec![Suggestion::ReplaceWith(replacement_chars)], |
| 103 | + priority: 63, |
| 104 | + message: "This word is capitalized but does not appear to be a proper noun. Consider using lowercase.".to_string(), |
| 105 | + }); |
| 106 | + } |
| 107 | + } |
| 108 | +} |
| 109 | + |
| 110 | +impl Linter for SentenceCasing { |
| 111 | + fn lint(&mut self, document: &Document) -> Vec<Lint> { |
| 112 | + let mut lints = Vec::new(); |
| 113 | + |
| 114 | + // Check headings |
| 115 | + for heading in document.iter_headings() { |
| 116 | + self.check_tokens(heading, document, &mut lints); |
| 117 | + } |
| 118 | + |
| 119 | + // Check regular sentences (but skip those in headings) |
| 120 | + for paragraph in document.iter_paragraphs() { |
| 121 | + // Skip paragraphs that are headings (they're already checked above) |
| 122 | + if paragraph.iter().any(|t| t.kind.is_heading_start()) { |
| 123 | + continue; |
| 124 | + } |
| 125 | + |
| 126 | + for sentence in paragraph.iter_sentences() { |
| 127 | + self.check_tokens(sentence, document, &mut lints); |
| 128 | + } |
| 129 | + } |
| 130 | + |
| 131 | + lints |
| 132 | + } |
| 133 | + |
| 134 | + fn description(&self) -> &'static str { |
| 135 | + "Flags words that are capitalized mid-sentence or mid-heading but are not proper nouns." |
| 136 | + } |
| 137 | +} |
| 138 | + |
| 139 | +#[cfg(test)] |
| 140 | +mod tests { |
| 141 | + use super::super::tests::{assert_lint_count, assert_suggestion_result}; |
| 142 | + use super::SentenceCasing; |
| 143 | + |
| 144 | + #[test] |
| 145 | + fn catches_mid_sentence_capital() { |
| 146 | + assert_lint_count( |
| 147 | + "The quick Brown fox jumps over the lazy dog.", |
| 148 | + SentenceCasing, |
| 149 | + 1, |
| 150 | + ); |
| 151 | + } |
| 152 | + |
| 153 | + #[test] |
| 154 | + fn allows_proper_nouns() { |
| 155 | + assert_lint_count("I visited Paris last summer.", SentenceCasing, 0); |
| 156 | + } |
| 157 | + |
| 158 | + #[test] |
| 159 | + fn allows_sentence_start() { |
| 160 | + assert_lint_count("The fox is quick. The dog is lazy.", SentenceCasing, 0); |
| 161 | + } |
| 162 | + |
| 163 | + #[test] |
| 164 | + fn allows_acronyms() { |
| 165 | + assert_lint_count("The NASA mission was successful.", SentenceCasing, 0); |
| 166 | + } |
| 167 | + |
| 168 | + #[test] |
| 169 | + fn allows_after_colon() { |
| 170 | + assert_lint_count("Here is the answer: True or false.", SentenceCasing, 0); |
| 171 | + } |
| 172 | + |
| 173 | + #[test] |
| 174 | + fn allows_single_letter() { |
| 175 | + assert_lint_count("This is plan A for the mission.", SentenceCasing, 0); |
| 176 | + } |
| 177 | + |
| 178 | + #[test] |
| 179 | + fn fixes_capitalization() { |
| 180 | + assert_suggestion_result( |
| 181 | + "The quick Brown fox.", |
| 182 | + SentenceCasing, |
| 183 | + "The quick brown fox.", |
| 184 | + ); |
| 185 | + } |
| 186 | + |
| 187 | + #[test] |
| 188 | + fn allows_names() { |
| 189 | + assert_lint_count("I talked to John yesterday.", SentenceCasing, 0); |
| 190 | + } |
| 191 | + |
| 192 | + #[test] |
| 193 | + fn multiple_errors() { |
| 194 | + assert_lint_count( |
| 195 | + "The Quick Brown Fox jumps over the Lazy Dog.", |
| 196 | + SentenceCasing, |
| 197 | + 4, |
| 198 | + ); |
| 199 | + } |
| 200 | + |
| 201 | + #[test] |
| 202 | + fn allows_quoted_start() { |
| 203 | + assert_lint_count("She said \"Hello there\" to him.", SentenceCasing, 0); |
| 204 | + } |
| 205 | + |
| 206 | + // Heading tests |
| 207 | + |
| 208 | + #[test] |
| 209 | + fn catches_heading_mid_word_capital() { |
| 210 | + // Markdown heading with incorrect capitalization |
| 211 | + assert_lint_count("# The Quick Brown Fox", SentenceCasing, 3); |
| 212 | + } |
| 213 | + |
| 214 | + #[test] |
| 215 | + fn allows_heading_proper_nouns() { |
| 216 | + assert_lint_count("# A trip to Paris", SentenceCasing, 0); |
| 217 | + } |
| 218 | + |
| 219 | + #[test] |
| 220 | + fn allows_heading_start_capital() { |
| 221 | + assert_lint_count("# Introduction to the topic", SentenceCasing, 0); |
| 222 | + } |
| 223 | + |
| 224 | + #[test] |
| 225 | + fn fixes_heading_capitalization() { |
| 226 | + assert_suggestion_result("# The Quick fox", SentenceCasing, "# The quick fox"); |
| 227 | + } |
| 228 | + |
| 229 | + #[test] |
| 230 | + fn heading_with_acronym() { |
| 231 | + assert_lint_count("# Working with NASA and SpaceX", SentenceCasing, 0); |
| 232 | + } |
| 233 | +} |
0 commit comments