Skip to content

Commit f2d923b

Browse files
committed
feat(core): create rule to enforce sentence case
1 parent 38c7a45 commit f2d923b

File tree

3 files changed

+236
-0
lines changed

3 files changed

+236
-0
lines changed

harper-core/src/linting/lint_group.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ use super::safe_to_save::SafeToSave;
156156
use super::save_to_safe::SaveToSafe;
157157
use super::semicolon_apostrophe::SemicolonApostrophe;
158158
use super::sentence_capitalization::SentenceCapitalization;
159+
use super::sentence_casing::SentenceCasing;
159160
use super::shoot_oneself_in_the_foot::ShootOneselfInTheFoot;
160161
use super::simple_past_to_past_participle::SimplePastToPastParticiple;
161162
use super::since_duration::SinceDuration;
@@ -623,6 +624,7 @@ impl LintGroup {
623624
insert_expr_rule!(SafeToSave, true);
624625
insert_expr_rule!(SaveToSafe, true);
625626
insert_expr_rule!(SemicolonApostrophe, true);
627+
insert_struct_rule!(SentenceCasing, false);
626628
insert_expr_rule!(ShootOneselfInTheFoot, true);
627629
insert_expr_rule!(SimplePastToPastParticiple, true);
628630
insert_expr_rule!(SinceDuration, true);

harper-core/src/linting/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ mod safe_to_save;
167167
mod save_to_safe;
168168
mod semicolon_apostrophe;
169169
mod sentence_capitalization;
170+
mod sentence_casing;
170171
mod shoot_oneself_in_the_foot;
171172
mod simple_past_to_past_participle;
172173
mod since_duration;
Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
use super::Suggestion;
2+
use super::{Lint, LintKind, Linter};
3+
use crate::document::Document;
4+
use crate::{Token, TokenStringExt};
5+
6+
/// A linter that checks for words that are capitalized but shouldn't be
7+
/// (i.e., not at the start of a sentence/heading and not proper nouns).
8+
pub struct SentenceCasing;
9+
10+
impl Default for SentenceCasing {
11+
fn default() -> Self {
12+
Self
13+
}
14+
}
15+
16+
impl SentenceCasing {
17+
/// Check a sequence of tokens for incorrect capitalization.
18+
/// `first_word_idx` is the index of the first word that should be capitalized.
19+
fn check_tokens(&self, tokens: &[Token], document: &Document, lints: &mut Vec<Lint>) {
20+
// Get the index of the first word in the sequence
21+
let first_word_idx = tokens.iter().position(|t| t.kind.is_word());
22+
23+
let Some(first_word_idx) = first_word_idx else {
24+
return;
25+
};
26+
27+
// Check all words after the first one
28+
for (idx, token) in tokens.iter().enumerate() {
29+
// Skip the first word (it should be capitalized)
30+
if idx <= first_word_idx {
31+
continue;
32+
}
33+
34+
// Only check actual words
35+
if !token.kind.is_word() {
36+
continue;
37+
}
38+
39+
// Check if the word is capitalized
40+
let word_chars = document.get_span_content(&token.span);
41+
let Some(first_char) = word_chars.first() else {
42+
continue;
43+
};
44+
45+
// Skip if not capitalized
46+
if !first_char.is_uppercase() {
47+
continue;
48+
}
49+
50+
// Skip proper nouns - these should be capitalized
51+
if token.kind.is_proper_noun() {
52+
continue;
53+
}
54+
55+
// Skip words that are all uppercase (likely acronyms/initialisms)
56+
if word_chars
57+
.iter()
58+
.all(|c| !c.is_alphabetic() || c.is_uppercase())
59+
{
60+
continue;
61+
}
62+
63+
// Skip words after a colon (might be starting a new clause)
64+
if let Some(prev_non_ws) = tokens[..idx].iter().rev().find(|t| !t.kind.is_whitespace())
65+
&& prev_non_ws.kind.is_punctuation()
66+
{
67+
let prev_chars = document.get_span_content(&prev_non_ws.span);
68+
if prev_chars == [':'] {
69+
continue;
70+
}
71+
}
72+
73+
// Skip single-letter capitalizations (often used for proper context like "Plan A")
74+
if word_chars.len() == 1 {
75+
continue;
76+
}
77+
78+
// Skip words after opening quotes (might be a quoted sentence start)
79+
if let Some(prev_non_ws) = tokens[..idx].iter().rev().find(|t| !t.kind.is_whitespace())
80+
&& prev_non_ws.kind.is_quote()
81+
{
82+
continue;
83+
}
84+
85+
// Check if this word follows a sentence terminator within the same sequence
86+
// (This handles cases where parsing might not have split sentences correctly)
87+
let has_terminator_before = tokens[first_word_idx + 1..idx]
88+
.iter()
89+
.any(|t| t.kind.is_sentence_terminator());
90+
91+
if has_terminator_before {
92+
continue;
93+
}
94+
95+
// Create the lowercase suggestion
96+
let mut replacement_chars = word_chars.to_vec();
97+
replacement_chars[0] = replacement_chars[0].to_ascii_lowercase();
98+
99+
lints.push(Lint {
100+
span: token.span,
101+
lint_kind: LintKind::Capitalization,
102+
suggestions: vec![Suggestion::ReplaceWith(replacement_chars)],
103+
priority: 63,
104+
message: "This word is capitalized but does not appear to be a proper noun. Consider using lowercase.".to_string(),
105+
});
106+
}
107+
}
108+
}
109+
110+
impl Linter for SentenceCasing {
111+
fn lint(&mut self, document: &Document) -> Vec<Lint> {
112+
let mut lints = Vec::new();
113+
114+
// Check headings
115+
for heading in document.iter_headings() {
116+
self.check_tokens(heading, document, &mut lints);
117+
}
118+
119+
// Check regular sentences (but skip those in headings)
120+
for paragraph in document.iter_paragraphs() {
121+
// Skip paragraphs that are headings (they're already checked above)
122+
if paragraph.iter().any(|t| t.kind.is_heading_start()) {
123+
continue;
124+
}
125+
126+
for sentence in paragraph.iter_sentences() {
127+
self.check_tokens(sentence, document, &mut lints);
128+
}
129+
}
130+
131+
lints
132+
}
133+
134+
fn description(&self) -> &'static str {
135+
"Flags words that are capitalized mid-sentence or mid-heading but are not proper nouns."
136+
}
137+
}
138+
139+
#[cfg(test)]
140+
mod tests {
141+
use super::super::tests::{assert_lint_count, assert_suggestion_result};
142+
use super::SentenceCasing;
143+
144+
#[test]
145+
fn catches_mid_sentence_capital() {
146+
assert_lint_count(
147+
"The quick Brown fox jumps over the lazy dog.",
148+
SentenceCasing,
149+
1,
150+
);
151+
}
152+
153+
#[test]
154+
fn allows_proper_nouns() {
155+
assert_lint_count("I visited Paris last summer.", SentenceCasing, 0);
156+
}
157+
158+
#[test]
159+
fn allows_sentence_start() {
160+
assert_lint_count("The fox is quick. The dog is lazy.", SentenceCasing, 0);
161+
}
162+
163+
#[test]
164+
fn allows_acronyms() {
165+
assert_lint_count("The NASA mission was successful.", SentenceCasing, 0);
166+
}
167+
168+
#[test]
169+
fn allows_after_colon() {
170+
assert_lint_count("Here is the answer: True or false.", SentenceCasing, 0);
171+
}
172+
173+
#[test]
174+
fn allows_single_letter() {
175+
assert_lint_count("This is plan A for the mission.", SentenceCasing, 0);
176+
}
177+
178+
#[test]
179+
fn fixes_capitalization() {
180+
assert_suggestion_result(
181+
"The quick Brown fox.",
182+
SentenceCasing,
183+
"The quick brown fox.",
184+
);
185+
}
186+
187+
#[test]
188+
fn allows_names() {
189+
assert_lint_count("I talked to John yesterday.", SentenceCasing, 0);
190+
}
191+
192+
#[test]
193+
fn multiple_errors() {
194+
assert_lint_count(
195+
"The Quick Brown Fox jumps over the Lazy Dog.",
196+
SentenceCasing,
197+
4,
198+
);
199+
}
200+
201+
#[test]
202+
fn allows_quoted_start() {
203+
assert_lint_count("She said \"Hello there\" to him.", SentenceCasing, 0);
204+
}
205+
206+
// Heading tests
207+
208+
#[test]
209+
fn catches_heading_mid_word_capital() {
210+
// Markdown heading with incorrect capitalization
211+
assert_lint_count("# The Quick Brown Fox", SentenceCasing, 3);
212+
}
213+
214+
#[test]
215+
fn allows_heading_proper_nouns() {
216+
assert_lint_count("# A trip to Paris", SentenceCasing, 0);
217+
}
218+
219+
#[test]
220+
fn allows_heading_start_capital() {
221+
assert_lint_count("# Introduction to the topic", SentenceCasing, 0);
222+
}
223+
224+
#[test]
225+
fn fixes_heading_capitalization() {
226+
assert_suggestion_result("# The Quick fox", SentenceCasing, "# The quick fox");
227+
}
228+
229+
#[test]
230+
fn heading_with_acronym() {
231+
assert_lint_count("# Working with NASA and SpaceX", SentenceCasing, 0);
232+
}
233+
}

0 commit comments

Comments
 (0)