Skip to content

Commit cef2bc6

Browse files
committed
fix: foreign language stripper; ignore asian commas in asian text
1 parent 6ac4569 commit cef2bc6

File tree

2 files changed

+13
-3
lines changed

2 files changed

+13
-3
lines changed

harper-core/src/linting/comma_fixes.rs

+12-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use super::{Lint, LintKind, Linter, Suggestion};
2-
use crate::{Span, TokenKind, TokenStringExt};
2+
use crate::{Span, Token, TokenKind, TokenStringExt};
33

44
const MSG_SPACE_BEFORE: &str = "Don't use a space before a comma.";
55
const MSG_AVOID_ASIAN: &str = "Avoid East Asian commas in English contexts.";
@@ -118,8 +118,13 @@ impl Linter for CommaFixes {
118118
add_space_after = true;
119119
}
120120

121+
// Handles Asian commas in all other contexts
122+
// TokenKind::Unlintable is used for non-English tokens
123+
// to prevent changing commas within CJK text
121124
(None | Some(_), None | Some(_), _, None | Some(_), None | Some(_))
122-
if comma_kind != ',' =>
125+
if comma_kind != ','
126+
&& !matches!(toks.1, Some(Token { kind: TokenKind::Unlintable, .. }))
127+
&& !matches!(toks.3, Some(Token { kind: TokenKind::Unlintable, .. })) =>
123128
{
124129
span = toks.2.span;
125130
suggestion = Suggestion::ReplaceWith(vec![',']);
@@ -242,4 +247,9 @@ mod tests {
242247
fn corrects_asian_comma_between_words_with_space_on_both_sides() {
243248
assert_suggestion_result("foo 、 bar", CommaFixes, "foo, bar")
244249
}
250+
251+
#[test]
252+
fn doesnt_correct_comma_between_non_english_tokens() {
253+
assert_lint_count("严禁采摘花、 果、叶,挖掘树根、草药!", CommaFixes, 0);
254+
}
245255
}

harper-core/src/parsers/isolate_english.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ impl<D: Dictionary> Parser for IsolateEnglish<D> {
2525
let mut english_tokens: Vec<Token> = Vec::with_capacity(tokens.len());
2626

2727
for chunk in tokens.iter_chunks() {
28-
if chunk.len() < 5 || is_likely_english(chunk, source, &self.dict) {
28+
if chunk.len() < 4 || is_likely_english(chunk, source, &self.dict) {
2929
english_tokens.extend_from_slice(chunk);
3030
}
3131
}

0 commit comments

Comments
 (0)