Skip to content

Commit 3943c14

Browse files
committed
Don't consider - as a word character
This produced some unfortunate subword diffs when mixing words, numbers and hyphens. Fixes #918
1 parent c2c562f commit 3943c14

File tree

4 files changed

+18
-26
lines changed

4 files changed

+18
-26
lines changed

sample_files/compare.expected

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ sample_files/comma_and_comment_1.js sample_files/comma_and_comment_2.js
4141
0a5ccbcd368607e62eaff0c4ae25049f -
4242

4343
sample_files/comments_1.rs sample_files/comments_2.rs
44-
cc3a5f9134765192e034c65fb9d02026 -
44+
a75827163654d6aed8f837bb586e733c -
4545

4646
sample_files/context_1.rs sample_files/context_2.rs
4747
1a1633bcf672a582867c815381ae1609 -
@@ -65,7 +65,7 @@ sample_files/elisp_contiguous_1.el sample_files/elisp_contiguous_2.el
6565
4a5a33873a4f84ee055d95e1448fba35 -
6666

6767
sample_files/elixir_1.ex sample_files/elixir_2.ex
68-
6bcd4f912e6adfd9cf2f83c602d72415 -
68+
85494310196ac5065b3b4ce1d4b350fd -
6969

7070
sample_files/elm_1.elm sample_files/elm_2.elm
7171
ccc1f4bb568cd72781dbcd623b612c43 -
@@ -86,7 +86,7 @@ sample_files/hare_1.ha sample_files/hare_2.ha
8686
ef6fd59edc55241311a97d21dd81e4c0 -
8787

8888
sample_files/haskell_1.hs sample_files/haskell_2.hs
89-
6791dd931d74391b3d9fb9e351a6de54 -
89+
68fd7f9865c2b1defe05ffd509e08b93 -
9090

9191
sample_files/hcl_1.hcl sample_files/hcl_2.hcl
9292
7c2aaa3a8b401bc007817f5dd608946d -
@@ -98,13 +98,13 @@ sample_files/helpful_1.el sample_files/helpful_2.el
9898
295640aa4cbc23640658a80ad2393ce4 -
9999

100100
sample_files/html_1.html sample_files/html_2.html
101-
64285a8ed6ddecab1e24bcf0ce649b62 -
101+
3cc8b445a56b74f05e1d7bb84874edab -
102102

103103
sample_files/html_simple_1.html sample_files/html_simple_2.html
104104
bb129dce38cd26eac81ca52d2016bade -
105105

106106
sample_files/huge_cpp_1.cpp sample_files/huge_cpp_2.cpp
107-
7f65e42e16ee318bbfc342b8bcc03d2e -
107+
09e8a30ad7be5686e4d03a3e6b2588aa -
108108

109109
sample_files/identical_1.scala sample_files/identical_2.scala
110110
15c5a789e644348cb7e0de051ff4b63e -
@@ -146,7 +146,7 @@ sample_files/lua_1.lua sample_files/lua_2.lua
146146
81ad9478e64494320e96284cb7632ced -
147147

148148
sample_files/makefile_1.mk sample_files/makefile_2.mk
149-
d0572210b5121ce68ac0ce45e43b922b -
149+
4759883325ade33566f2c8afa09e2d82 -
150150

151151
sample_files/many_newlines_1.txt sample_files/many_newlines_2.txt
152152
52ca05855e520876479e6f608c5e7831 -
@@ -164,7 +164,7 @@ sample_files/multiline_string_1.ml sample_files/multiline_string_2.ml
164164
ed80815053ba156505d156277d0f4195 -
165165

166166
sample_files/multiline_string_eof_1.yml sample_files/multiline_string_eof_2.yml
167-
ba8a8e7ed2f4b519feaa391fd05c95fe -
167+
cd9cfd627c28b8ecd7c990adc683281a -
168168

169169
sample_files/nest_1.rs sample_files/nest_2.rs
170170
d3a799fe2cd9d81aa251c96af5cd9711 -
@@ -260,7 +260,7 @@ sample_files/string_subwords_1.el sample_files/string_subwords_2.el
260260
b66e960672189960c2d35ef68b47a195 -
261261

262262
sample_files/strings_1.el sample_files/strings_2.el
263-
fe61803e3391fb14f5a3f05750bb94ff -
263+
26ea57243abb16043088b17bfee482a4 -
264264

265265
sample_files/swift_1.swift sample_files/swift_2.swift
266266
73830b14bd8aacac8d4590a3bed61c40 -
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"name": "foo-d123-pretty-long"
3+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"name": "d123-pretty-long"
3+
}

src/words.rs

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ pub(crate) fn split_words(s: &str) -> Vec<&str> {
1212
for (idx, c) in s.char_indices() {
1313
match word_start {
1414
Some(start) => {
15-
if c.is_alphanumeric() || c == '-' || c == '_' {
15+
if c.is_alphanumeric() || c == '_' {
1616
// Just carry on in this word.
1717
} else {
1818
// Push the previous word, then this non-word character.
@@ -22,7 +22,7 @@ pub(crate) fn split_words(s: &str) -> Vec<&str> {
2222
}
2323
}
2424
None => {
25-
if c.is_alphanumeric() || c == '-' || c == '_' {
25+
if c.is_alphanumeric() || c == '_' {
2626
word_start = Some(idx);
2727
} else {
2828
words.push(&s[idx..idx + c.len_utf8()]);
@@ -47,7 +47,7 @@ pub(crate) fn split_words_and_numbers(s: &str) -> Vec<&str> {
4747
for (idx, c) in s.char_indices() {
4848
match word_start {
4949
Some((start, start_c)) => {
50-
if c.is_alphanumeric() || c == '-' || c == '_' {
50+
if c.is_alphanumeric() || c == '_' {
5151
// Word character, add to the current word if it's
5252
// not a number.
5353
if c.is_ascii_digit() == start_c.is_ascii_digit() {
@@ -65,7 +65,7 @@ pub(crate) fn split_words_and_numbers(s: &str) -> Vec<&str> {
6565
}
6666
}
6767
None => {
68-
if c.is_alphanumeric() || c == '-' || c == '_' {
68+
if c.is_alphanumeric() || c == '_' {
6969
word_start = Some((idx, c));
7070
} else {
7171
words.push(&s[idx..idx + c.len_utf8()]);
@@ -93,13 +93,6 @@ mod tests {
9393
assert_eq!(res, vec!["example", ".", "com"])
9494
}
9595

96-
#[test]
97-
fn test_split_words_hyphens() {
98-
let s = "foo -bar-baz-";
99-
let res = split_words(s);
100-
assert_eq!(res, vec!["foo", " ", "-bar-baz-"])
101-
}
102-
10396
#[test]
10497
fn test_split_words_punctuation() {
10598
let s = "example..";
@@ -149,13 +142,6 @@ mod tests {
149142
assert_eq!(res, vec!["a", "123", "b"])
150143
}
151144

152-
#[test]
153-
fn test_split_words_and_numbers_hyphens() {
154-
let s = "a-b -c-";
155-
let res = split_words_and_numbers(s);
156-
assert_eq!(res, vec!["a-b", " ", "-c-"])
157-
}
158-
159145
#[test]
160146
fn test_split_words_and_numbers_spaces() {
161147
let s = "foo bar";

0 commit comments

Comments
 (0)