Skip to content

Commit 6990fd1

Browse files
committed
add: deburr
1 parent bbd9017 commit 6990fd1

7 files changed

Lines changed: 143 additions & 4 deletions

File tree

DOC.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2217,6 +2217,19 @@ d('doing lots of uninteresting work');
22172217
d.enabled = false;
22182218
```
22192219

2220+
## deburr
2221+
2222+
Convert Latin-1 Supplement and Latin Extended-A letters to basic Latin letters and remove combining diacritical marks.
2223+
2224+
|Name |Type |Desc |
2225+
|------|------|----------------|
2226+
|str |string|String to deburr|
2227+
|return|string|Deburred string |
2228+
2229+
```javascript
2230+
deburr('déjà vu'); // -> 'deja vu'
2231+
```
2232+
22202233
## decodeUriComponent
22212234

22222235
Better decodeURIComponent that does not throw if input is invalid.

DOC_CN.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2212,6 +2212,19 @@ d('doing lots of uninteresting work');
22122212
d.enabled = false;
22132213
```
22142214

2215+
## deburr
2216+
2217+
转换拉丁语-1补充字母和拉丁语扩展字母-A为基本拉丁字母,并且去除组合变音标记。
2218+
2219+
|参数名|类型|说明|
2220+
|-----|----|---|
2221+
|str|string|要处理的字符串|
2222+
|返回值|string|目标字符串|
2223+
2224+
```javascript
2225+
deburr('déjà vu'); // -> 'deja vu'
2226+
```
2227+
22152228
## decodeUriComponent
22162229

22172230
类似 decodeURIComponent 函数,只是输入不合法时不抛出错误并尽可能地对其进行解码。

index.json

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1399,6 +1399,19 @@
13991399
"test": [],
14001400
"demo": true
14011401
},
1402+
"deburr": {
1403+
"description": "Convert Latin-1 Supplement and Latin Extended-A letters to basic Latin letters and remove combining diacritical marks.",
1404+
"dependencies": [],
1405+
"env": [
1406+
"node",
1407+
"browser",
1408+
"miniprogram"
1409+
],
1410+
"test": [
1411+
"node",
1412+
"browser"
1413+
]
1414+
},
14021415
"decodeUriComponent": {
14031416
"description": "Better decodeURIComponent that does not throw if input is invalid.",
14041417
"dependencies": [

src/d/dateFormat.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,10 @@ exports = function(date, mask, utc, gmt) {
123123
Z: gmt
124124
? 'GMT'
125125
: utc
126-
? 'UTC'
127-
: (toStr(date).match(regTimezone) || [''])
128-
.pop()
129-
.replace(regTimezoneClip, ''),
126+
? 'UTC'
127+
: (toStr(date).match(regTimezone) || [''])
128+
.pop()
129+
.replace(regTimezoneClip, ''),
130130
o:
131131
(o > 0 ? '-' : '+') +
132132
padZero(Math.floor(Math.abs(o) / 60) * 100 + (Math.abs(o) % 60), 4),

src/d/deburr.i18n.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
## CN
2+
3+
转换拉丁语-1补充字母和拉丁语扩展字母-A为基本拉丁字母,并且去除组合变音标记。
4+
5+
|参数名|类型|说明|
6+
|-----|----|---|
7+
|str|string|要处理的字符串|
8+
|返回值|string|目标字符串|

src/d/deburr.js

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/* Convert Latin-1 Supplement and Latin Extended-A letters to basic Latin letters and remove combining diacritical marks.
2+
*
3+
* |Name |Type |Desc |
4+
* |------|------|----------------|
5+
* |str |string|String to deburr|
6+
* |return|string|Deburred string |
7+
*/
8+
9+
/* example
10+
* deburr('déjà vu'); // -> 'deja vu'
11+
*/
12+
13+
/* module
14+
* env: all
15+
* test: all
16+
*/
17+
18+
/* typescript
19+
* export declare function deburr(str: string): string;
20+
*/
21+
22+
exports = function(str) {
23+
return str
24+
.replace(regLatin, key => deburredLetters[key])
25+
.replace(regComboMark, '');
26+
};
27+
28+
// https://github.com/lodash/lodash/blob/es/deburr.js
29+
const regComboMark = /[\u0300-\u036f\ufe20-\ufe2f\u20d0-\u20ff]/g;
30+
const regLatin = /[\xc0-\xd6\xd8-\xf6\xf8-\xff\u0100-\u017f]/g;
31+
32+
// https://github.com/lodash/lodash/blob/es/_deburrLetter.js
33+
// prettier-ignore
34+
const deburredLetters = {
35+
// Latin-1 Supplement block.
36+
'\xc0': 'A', '\xc1': 'A', '\xc2': 'A', '\xc3': 'A', '\xc4': 'A', '\xc5': 'A',
37+
'\xe0': 'a', '\xe1': 'a', '\xe2': 'a', '\xe3': 'a', '\xe4': 'a', '\xe5': 'a',
38+
'\xc7': 'C', '\xe7': 'c',
39+
'\xd0': 'D', '\xf0': 'd',
40+
'\xc8': 'E', '\xc9': 'E', '\xca': 'E', '\xcb': 'E',
41+
'\xe8': 'e', '\xe9': 'e', '\xea': 'e', '\xeb': 'e',
42+
'\xcc': 'I', '\xcd': 'I', '\xce': 'I', '\xcf': 'I',
43+
'\xec': 'i', '\xed': 'i', '\xee': 'i', '\xef': 'i',
44+
'\xd1': 'N', '\xf1': 'n',
45+
'\xd2': 'O', '\xd3': 'O', '\xd4': 'O', '\xd5': 'O', '\xd6': 'O', '\xd8': 'O',
46+
'\xf2': 'o', '\xf3': 'o', '\xf4': 'o', '\xf5': 'o', '\xf6': 'o', '\xf8': 'o',
47+
'\xd9': 'U', '\xda': 'U', '\xdb': 'U', '\xdc': 'U',
48+
'\xf9': 'u', '\xfa': 'u', '\xfb': 'u', '\xfc': 'u',
49+
'\xdd': 'Y', '\xfd': 'y', '\xff': 'y',
50+
'\xc6': 'Ae', '\xe6': 'ae',
51+
'\xde': 'Th', '\xfe': 'th',
52+
'\xdf': 'ss',
53+
// Latin Extended-A block.
54+
'\u0100': 'A', '\u0102': 'A', '\u0104': 'A',
55+
'\u0101': 'a', '\u0103': 'a', '\u0105': 'a',
56+
'\u0106': 'C', '\u0108': 'C', '\u010a': 'C', '\u010c': 'C',
57+
'\u0107': 'c', '\u0109': 'c', '\u010b': 'c', '\u010d': 'c',
58+
'\u010e': 'D', '\u0110': 'D', '\u010f': 'd', '\u0111': 'd',
59+
'\u0112': 'E', '\u0114': 'E', '\u0116': 'E', '\u0118': 'E', '\u011a': 'E',
60+
'\u0113': 'e', '\u0115': 'e', '\u0117': 'e', '\u0119': 'e', '\u011b': 'e',
61+
'\u011c': 'G', '\u011e': 'G', '\u0120': 'G', '\u0122': 'G',
62+
'\u011d': 'g', '\u011f': 'g', '\u0121': 'g', '\u0123': 'g',
63+
'\u0124': 'H', '\u0126': 'H', '\u0125': 'h', '\u0127': 'h',
64+
'\u0128': 'I', '\u012a': 'I', '\u012c': 'I', '\u012e': 'I', '\u0130': 'I',
65+
'\u0129': 'i', '\u012b': 'i', '\u012d': 'i', '\u012f': 'i', '\u0131': 'i',
66+
'\u0134': 'J', '\u0135': 'j',
67+
'\u0136': 'K', '\u0137': 'k', '\u0138': 'k',
68+
'\u0139': 'L', '\u013b': 'L', '\u013d': 'L', '\u013f': 'L', '\u0141': 'L',
69+
'\u013a': 'l', '\u013c': 'l', '\u013e': 'l', '\u0140': 'l', '\u0142': 'l',
70+
'\u0143': 'N', '\u0145': 'N', '\u0147': 'N', '\u014a': 'N',
71+
'\u0144': 'n', '\u0146': 'n', '\u0148': 'n', '\u014b': 'n',
72+
'\u014c': 'O', '\u014e': 'O', '\u0150': 'O',
73+
'\u014d': 'o', '\u014f': 'o', '\u0151': 'o',
74+
'\u0154': 'R', '\u0156': 'R', '\u0158': 'R',
75+
'\u0155': 'r', '\u0157': 'r', '\u0159': 'r',
76+
'\u015a': 'S', '\u015c': 'S', '\u015e': 'S', '\u0160': 'S',
77+
'\u015b': 's', '\u015d': 's', '\u015f': 's', '\u0161': 's',
78+
'\u0162': 'T', '\u0164': 'T', '\u0166': 'T',
79+
'\u0163': 't', '\u0165': 't', '\u0167': 't',
80+
'\u0168': 'U', '\u016a': 'U', '\u016c': 'U', '\u016e': 'U', '\u0170': 'U', '\u0172': 'U',
81+
'\u0169': 'u', '\u016b': 'u', '\u016d': 'u', '\u016f': 'u', '\u0171': 'u', '\u0173': 'u',
82+
'\u0174': 'W', '\u0175': 'w',
83+
'\u0176': 'Y', '\u0177': 'y', '\u0178': 'Y',
84+
'\u0179': 'Z', '\u017b': 'Z', '\u017d': 'Z',
85+
'\u017a': 'z', '\u017c': 'z', '\u017e': 'z',
86+
'\u0132': 'IJ', '\u0133': 'ij',
87+
'\u0152': 'Oe', '\u0153': 'oe',
88+
'\u0149': "'n", '\u017f': 's'
89+
};

src/d/deburr.test.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
it('basic', () => {
2+
expect(deburr('déjà vu')).to.equal('deja vu');
3+
});

0 commit comments

Comments
 (0)