11#include " ./wrapped_re2.h"
22#include " ./util.h"
33
4+ #include < map>
45#include < memory>
56#include < string>
67#include < unordered_set>
@@ -18,6 +19,47 @@ inline bool isHexadecimal(char ch)
1819 return (' 0' <= ch && ch <= ' 9' ) || (' A' <= ch && ch <= ' Z' ) || (' a' <= ch && ch <= ' z' );
1920}
2021
22+ static std::map<std::string, std::string> unicodeClasses = {
23+ {" Uppercase_Letter" , " Lu" },
24+ {" Lowercase_Letter" , " Ll" },
25+ {" Titlecase_Letter" , " Lt" },
26+ {" Cased_Letter" , " LC" },
27+ {" Modifier_Letter" , " Lm" },
28+ {" Other_Letter" , " Lo" },
29+ {" Letter" , " L" },
30+ {" Nonspacing_Mark" , " Mn" },
31+ {" Spacing_Mark" , " Mc" },
32+ {" Enclosing_Mark" , " Me" },
33+ {" Mark" , " M" },
34+ {" Decimal_Number" , " Nd" },
35+ {" Letter_Number" , " Nl" },
36+ {" Other_Number" , " No" },
37+ {" Number" , " N" },
38+ {" Connector_Punctuation" , " Pc" },
39+ {" Dash_Punctuation" , " Pd" },
40+ {" Open_Punctuation" , " Ps" },
41+ {" Close_Punctuation" , " Pe" },
42+ {" Initial_Punctuation" , " Pi" },
43+ {" Final_Punctuation" , " Pf" },
44+ {" Other_Punctuation" , " Po" },
45+ {" Punctuation" , " P" },
46+ {" Math_Symbol" , " Sm" },
47+ {" Currency_Symbol" , " Sc" },
48+ {" Modifier_Symbol" , " Sk" },
49+ {" Other_Symbol" , " So" },
50+ {" Symbol" , " S" },
51+ {" Space_Separator" , " Zs" },
52+ {" Line_Separator" , " Zl" },
53+ {" Paragraph_Separator" , " Zp" },
54+ {" Separator" , " Z" },
55+ {" Control" , " Cc" },
56+ {" Format" , " Cf" },
57+ {" Surrogate" , " Cs" },
58+ {" Private_Use" , " Co" },
59+ {" Unassigned" , " Cn" },
60+ {" Other" , " C" },
61+ };
62+
2163static bool translateRegExp (const char *data, size_t size, bool multiline, std::vector<char > &buffer)
2264{
2365 std::string result;
@@ -98,6 +140,36 @@ static bool translateRegExp(const char *data, size_t size, bool multiline, std::
98140 result += " \\ u" ;
99141 i += 2 ;
100142 continue ;
143+ case ' p' :
144+ case ' P' :
145+ if (i + 2 < size) {
146+ if (data[i + 2 ] == ' {' ) {
147+ size_t j = i + 3 ;
148+ while (j < size && data[j] != ' }' ) ++j;
149+ if (j < size) {
150+ result += " \\ " ;
151+ result += data[i + 1 ];
152+ std::string name (data + i + 3 , j - i - 3 );
153+ if (unicodeClasses.find (name) != unicodeClasses.end ()) {
154+ name = unicodeClasses[name];
155+ }
156+ if (name.size () == 1 ) {
157+ result += name;
158+ } else {
159+ result += " {" ;
160+ result += name;
161+ result += " }" ;
162+ }
163+ i = j + 1 ;
164+ changed = true ;
165+ continue ;
166+ }
167+ }
168+ }
169+ result += " \\ " ;
170+ result += data[i + 1 ];
171+ i += 2 ;
172+ continue ;
101173 default :
102174 result += " \\ " ;
103175 size_t sym_size = getUtf8CharSize (ch);
0 commit comments