@@ -6735,6 +6735,7 @@ JUMPTO(SLJIT_JUMP, mainloop);
67356735#define UCPCAT_RANGE (start , end ) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1))
67366736#define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu)
67376737#define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No)
6738+ #define UCPCAT_ALL ((1 << (ucp_Zs + 1)) - 1)
67386739#endif
67396740
67406741static void check_wordboundary (compiler_common * common , BOOL ucp )
@@ -7615,6 +7616,8 @@ BOOL utf = common->utf;
76157616
76167617#ifdef SUPPORT_UNICODE
76177618sljit_u32 unicode_status = 0 ;
7619+ sljit_u32 category_list = 0 ;
7620+ sljit_u32 items ;
76187621int typereg = TMP1 ;
76197622const sljit_u32 * other_cases ;
76207623#endif /* SUPPORT_UNICODE */
@@ -7633,6 +7636,7 @@ if (cc[-1] & XCL_MAP)
76337636while (* cc != XCL_END )
76347637 {
76357638 compares ++ ;
7639+
76367640 if (* cc == XCL_SINGLE )
76377641 {
76387642 cc ++ ;
@@ -7659,6 +7663,7 @@ while (*cc != XCL_END)
76597663 {
76607664 SLJIT_ASSERT (* cc == XCL_PROP || * cc == XCL_NOTPROP );
76617665 cc ++ ;
7666+
76627667 if (* cc == PT_CLIST && cc [-1 ] == XCL_PROP )
76637668 {
76647669 other_cases = PRIV (ucd_caseless_sets ) + cc [1 ];
@@ -7675,25 +7680,34 @@ while (*cc != XCL_END)
76757680 min = 0 ;
76767681 }
76777682
7683+ items = 0 ;
7684+
76787685 switch (* cc )
76797686 {
76807687 case PT_ANY :
76817688 /* Any either accepts everything or ignored. */
76827689 if (cc [-1 ] == XCL_PROP )
7683- {
7684- compile_char1_matchingpath (common , OP_ALLANY , cc , backtracks , FALSE);
7685- if (list == backtracks )
7686- add_jump (compiler , backtracks , JUMP (SLJIT_JUMP ));
7687- return ;
7688- }
7690+ items = UCPCAT_ALL ;
76897691 break ;
76907692
76917693 case PT_LAMP :
7694+ items = UCPCAT3 (ucp_Lu , ucp_Ll , ucp_Lt );
7695+ break ;
7696+
76927697 case PT_GC :
7698+ items = UCPCAT_RANGE (PRIV (ucp_typerange )[(int )cc [1 ] * 2 ], PRIV (ucp_typerange )[(int )cc [1 ] * 2 + 1 ]);
7699+ break ;
7700+
76937701 case PT_PC :
7702+ items = UCPCAT (cc [1 ]);
7703+ break ;
7704+
76947705 case PT_WORD :
7706+ items = UCPCAT2 (ucp_Mn , ucp_Pc ) | UCPCAT_L | UCPCAT_N ;
7707+ break ;
7708+
76957709 case PT_ALNUM :
7696- unicode_status |= XCLASS_HAS_TYPE ;
7710+ items = UCPCAT_L | UCPCAT_N ;
76977711 break ;
76987712
76997713 case PT_SCX :
@@ -7736,11 +7750,32 @@ while (*cc != XCL_END)
77367750 SLJIT_UNREACHABLE ();
77377751 break ;
77387752 }
7753+
7754+ if (items > 0 )
7755+ {
7756+ if (cc [-1 ] == XCL_NOTPROP )
7757+ items ^= UCPCAT_ALL ;
7758+ category_list |= items ;
7759+ unicode_status |= XCLASS_HAS_TYPE ;
7760+ compares -- ;
7761+ }
7762+
77397763 cc += 2 ;
77407764 }
77417765#endif /* SUPPORT_UNICODE */
77427766 }
7743- SLJIT_ASSERT (compares > 0 );
7767+ SLJIT_ASSERT (compares > 0 || category_list > 0 );
7768+
7769+ #ifdef SUPPORT_UNICODE
7770+ if (category_list == UCPCAT_ALL )
7771+ {
7772+ /* All characters are accepted, same as dotall. */
7773+ compile_char1_matchingpath (common , OP_ALLANY , cc , backtracks , FALSE);
7774+ if (list == backtracks )
7775+ add_jump (compiler , backtracks , JUMP (SLJIT_JUMP ));
7776+ return ;
7777+ }
7778+ #endif /* SUPPORT_UNICODE */
77447779
77457780/* We are not necessary in utf mode even in 8 bit mode. */
77467781cc = ccbegin ;
@@ -7841,6 +7876,9 @@ if (unicode_status & XCLASS_NEEDS_UCD)
78417876
78427877 ccbegin = cc ;
78437878
7879+ if (category_list != 0 )
7880+ compares ++ ;
7881+
78447882 if (unicode_status & XCLASS_HAS_BIDICL )
78457883 {
78467884 OP1 (SLJIT_MOV_U16 , TMP1 , 0 , SLJIT_MEM1 (TMP2 ), (sljit_sw )PRIV (ucd_records ) + SLJIT_OFFSETOF (ucd_record , scriptx_bidiclass ));
@@ -8045,8 +8083,16 @@ if (unicode_status & XCLASS_NEEDS_UCD)
80458083 if (unicode_status & XCLASS_SAVE_CHAR )
80468084 typereg = RETURN_ADDR ;
80478085
8048- OP1 (SLJIT_MOV_U8 , typereg , 0 , SLJIT_MEM1 (TMP2 ), (sljit_sw )PRIV (ucd_records ) + SLJIT_OFFSETOF (ucd_record , chartype ));
8049- OP2 (SLJIT_SHL , typereg , 0 , SLJIT_IMM , 1 , typereg , 0 );
8086+ OP1 (SLJIT_MOV_U8 , TMP2 , 0 , SLJIT_MEM1 (TMP2 ), (sljit_sw )PRIV (ucd_records ) + SLJIT_OFFSETOF (ucd_record , chartype ));
8087+ OP2 (SLJIT_SHL , typereg , 0 , SLJIT_IMM , 1 , TMP2 , 0 );
8088+
8089+ if (category_list > 0 )
8090+ {
8091+ compares -- ;
8092+ invertcmp = (compares == 0 && list != backtracks );
8093+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , category_list );
8094+ add_jump (compiler , compares > 0 ? list : backtracks , JUMP (SLJIT_NOT_ZERO ^ invertcmp ));
8095+ }
80508096 }
80518097 }
80528098#endif /* SUPPORT_UNICODE */
@@ -8126,26 +8172,16 @@ while (*cc != XCL_END)
81268172 break ;
81278173
81288174 case PT_LAMP :
8129- OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT3 (ucp_Lu , ucp_Ll , ucp_Lt ));
8130- jump = JUMP (SLJIT_NOT_ZERO ^ invertcmp );
8131- break ;
8132-
81338175 case PT_GC :
8134- OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT_RANGE (PRIV (ucp_typerange )[(int )cc [1 ] * 2 ], PRIV (ucp_typerange )[(int )cc [1 ] * 2 + 1 ]));
8135- jump = JUMP (SLJIT_NOT_ZERO ^ invertcmp );
8136- break ;
8137-
81388176 case PT_PC :
8139- OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT (cc [1 ]));
8140- jump = JUMP (SLJIT_NOT_ZERO ^ invertcmp );
8141- break ;
8142-
81438177 case PT_SC :
81448178 case PT_SCX :
81458179 case PT_BOOL :
81468180 case PT_BIDICL :
8181+ case PT_WORD :
8182+ case PT_ALNUM :
81478183 compares ++ ;
8148- /* Do nothing . */
8184+ /* Already handled . */
81498185 break ;
81508186
81518187 case PT_SPACE :
@@ -8165,16 +8201,6 @@ while (*cc != XCL_END)
81658201 jump = JUMP (SLJIT_NOT_ZERO ^ invertcmp );
81668202 break ;
81678203
8168- case PT_WORD :
8169- OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT2 (ucp_Mn , ucp_Pc ) | UCPCAT_L | UCPCAT_N );
8170- jump = JUMP (SLJIT_NOT_ZERO ^ invertcmp );
8171- break ;
8172-
8173- case PT_ALNUM :
8174- OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT_L | UCPCAT_N );
8175- jump = JUMP (SLJIT_NOT_ZERO ^ invertcmp );
8176- break ;
8177-
81788204 case PT_CLIST :
81798205 other_cases = PRIV (ucd_caseless_sets ) + cc [1 ];
81808206
0 commit comments