@@ -71,31 +71,33 @@ function getUnicodeNonBmpRegExp() {
7171 * Regex for matching astral plane unicode
7272 * - http://kourge.net/projects/regexp-unicode-block
7373 */
74- return new RegExp (
75- '[' +
76- '\u1D00-\u1D7F' + // Phonetic Extensions
77- '\u1D80-\u1DBF' + // Phonetic Extensions Supplement
78- '\u1DC0-\u1DFF' + // Combining Diacritical Marks Supplement
79- // '\u2000-\u206F' + // General punctuation - handled in -> getPunctuationRegExp
80- '\u20A0-\u20CF' + // Currency symbols
81- '\u20D0-\u20FF' + // Combining Diacritical Marks for Symbols
82- '\u2100-\u214F' + // Letter like symbols
83- '\u2150-\u218F' + // Number forms (eg: Roman numbers)
84- '\u2190-\u21FF' + // Arrows
85- '\u2200-\u22FF' + // Mathematical operators
86- '\u2300-\u23FF' + // Misc Technical
87- '\u2400-\u243F' + // Control pictures
88- '\u2440-\u245F' + // OCR
89- '\u2460-\u24FF' + // Enclosed alpha numerics
90- '\u2500-\u257F' + // Box Drawing
91- '\u2580-\u259F' + // Block Elements
92- '\u25A0-\u25FF' + // Geometric Shapes
93- '\u2600-\u26FF' + // Misc Symbols
94- '\u2700-\u27BF' + // Dingbats
95- '\uE000-\uF8FF' + // Private Use
96- ']' ,
97- 'g'
98- ) ;
74+
75+ /**
76+ * Notes on various unicode planes being used in the regex below:
77+ * '\u1D00-\u1D7F' Phonetic Extensions
78+ * '\u1D80-\u1DBF' Phonetic Extensions Supplement
79+ * '\u1DC0-\u1DFF' Combining Diacritical Marks Supplement
80+ * '\u20A0-\u20CF' Currency symbols
81+ * '\u20D0-\u20FF' Combining Diacritical Marks for Symbols
82+ * '\u2100-\u214F' Letter like symbols
83+ * '\u2150-\u218F' Number forms (eg: Roman numbers)
84+ * '\u2190-\u21FF' Arrows
85+ * '\u2200-\u22FF' Mathematical operators
86+ * '\u2300-\u23FF' Misc Technical
87+ * '\u2400-\u243F' Control pictures
88+ * '\u2440-\u245F' OCR
89+ * '\u2460-\u24FF' Enclosed alpha numerics
90+ * '\u2500-\u257F' Box Drawing
91+ * '\u2580-\u259F' Block Elements
92+ * '\u25A0-\u25FF' Geometric Shapes
93+ * '\u2600-\u26FF' Misc Symbols
94+ * '\u2700-\u27BF' Dingbats
95+ * '\uE000-\uF8FF' Private Use
96+ *
97+ * Note: plane '\u2000-\u206F' used for General punctuation is excluded as it is handled in -> getPunctuationRegExp
98+ */
99+
100+ return / [ \u1D00 - \u1D7F \u1D80 - \u1DBF \u1DC0 - \u1DFF \u20A0 - \u20CF \u20D0 - \u20FF \u2100 - \u214F \u2150 - \u218F \u2190 - \u21FF \u2200 - \u22FF \u2300 - \u23FF \u2400 - \u243F \u2440 - \u245F \u2460 - \u24FF \u2500 - \u257F \u2580 - \u259F \u25A0 - \u25FF \u2600 - \u26FF \u2700 - \u27BF \uE000 - \uF8FF ] / g;
99101}
100102
101103/**
0 commit comments