@@ -5481,8 +5481,10 @@ return TRUE;
54815481#define XCLASS_HAS_8BIT_CHARS 0x2
54825482/* XClass has properties. */
54835483#define XCLASS_HAS_PROPS 0x4
5484+ /* XClass has character lists. */
5485+ #define XCLASS_HAS_CHAR_LISTS 0x8
54845486/* XClass matches to all >= 256 characters. */
5485- #define XCLASS_HIGH_ANY 0x8
5487+ #define XCLASS_HIGH_ANY 0x10
54865488
54875489#endif
54885490
@@ -5945,7 +5947,7 @@ for (;; pptr++)
59455947
59465948 if (cranges -> range_list_size > 0 )
59475949 {
5948- uint32_t * ranges = (uint32_t * )(cranges + 1 );
5950+ const uint32_t * ranges = (const uint32_t * )(cranges + 1 );
59495951
59505952 if (ranges [0 ] <= 255 )
59515953 xclass_props |= XCLASS_HAS_8BIT_CHARS ;
@@ -6403,76 +6405,86 @@ for (;; pptr++)
64036405 range += 2 ;
64046406 }
64056407
6406- if (( xclass_props & XCLASS_HIGH_ANY ) != 0 )
6408+ if (cranges -> char_lists_size > 0 )
64076409 {
6408- PCRE2_ASSERT (range + 2 == end && range [0 ] <= 256 &&
6409- range [1 ] >= GET_MAX_CHAR_VALUE (utf ));
6410- should_flip_negation = TRUE;
6411- range = end ;
6410+ /* The cranges structure is still used and freed later. */
6411+ PCRE2_ASSERT ((xclass_props & XCLASS_HIGH_ANY ) == 0 );
6412+ xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_CHAR_LISTS ;
64126413 }
6413-
6414- while (range < end )
6414+ else
64156415 {
6416- uint32_t range_start = range [0 ];
6417- uint32_t range_end = range [1 ];
6416+ if ((xclass_props & XCLASS_HIGH_ANY ) != 0 )
6417+ {
6418+ PCRE2_ASSERT (range + 2 == end && range [0 ] <= 256 &&
6419+ range [1 ] >= GET_MAX_CHAR_VALUE (utf ));
6420+ should_flip_negation = TRUE;
6421+ range = end ;
6422+ }
64186423
6419- range += 2 ;
6420- xclass_props |= XCLASS_REQUIRED ;
6424+ while (range < end )
6425+ {
6426+ uint32_t range_start = range [0 ];
6427+ uint32_t range_end = range [1 ];
64216428
6422- if (range_start < 256 ) range_start = 256 ;
6429+ range += 2 ;
6430+ xclass_props |= XCLASS_REQUIRED ;
6431+
6432+ if (range_start < 256 ) range_start = 256 ;
6433+
6434+ if (lengthptr != NULL )
6435+ {
6436+ #ifdef SUPPORT_UNICODE
6437+ if (utf )
6438+ {
6439+ * lengthptr += 1 ;
6440+
6441+ if (range_start < range_end )
6442+ * lengthptr += PRIV (ord2utf )(range_start , class_uchardata );
6443+
6444+ * lengthptr += PRIV (ord2utf )(range_end , class_uchardata );
6445+ continue ;
6446+ }
6447+ #endif /* SUPPORT_UNICODE */
6448+
6449+ * lengthptr += range_start < range_end ? 3 : 2 ;
6450+ continue ;
6451+ }
64236452
6424- if (lengthptr != NULL )
6425- {
64266453#ifdef SUPPORT_UNICODE
64276454 if (utf )
64286455 {
6429- * lengthptr += 1 ;
6430-
64316456 if (range_start < range_end )
6432- * lengthptr += PRIV (ord2utf )(range_start , class_uchardata );
6457+ {
6458+ * class_uchardata ++ = XCL_RANGE ;
6459+ class_uchardata += PRIV (ord2utf )(range_start , class_uchardata );
6460+ }
6461+ else
6462+ * class_uchardata ++ = XCL_SINGLE ;
64336463
6434- * lengthptr += PRIV (ord2utf )(range_end , class_uchardata );
6464+ class_uchardata += PRIV (ord2utf )(range_end , class_uchardata );
64356465 continue ;
64366466 }
64376467#endif /* SUPPORT_UNICODE */
64386468
6439- * lengthptr += range_start < range_end ? 3 : 2 ;
6440- continue ;
6441- }
6442-
6443- #ifdef SUPPORT_UNICODE
6444- if (utf )
6445- {
6469+ /* Without UTF support, character values are constrained
6470+ by the bit length, and can only be > 256 for 16-bit and
6471+ 32-bit libraries. */
6472+ #if PCRE2_CODE_UNIT_WIDTH != 8
64466473 if (range_start < range_end )
64476474 {
64486475 * class_uchardata ++ = XCL_RANGE ;
6449- class_uchardata += PRIV ( ord2utf )( range_start , class_uchardata ) ;
6476+ * class_uchardata ++ = range_start ;
64506477 }
64516478 else
64526479 * class_uchardata ++ = XCL_SINGLE ;
64536480
6454- class_uchardata += PRIV (ord2utf )(range_end , class_uchardata );
6455- continue ;
6456- }
6457- #endif /* SUPPORT_UNICODE */
6458-
6459- /* Without UTF support, character values are constrained by the bit length,
6460- and can only be > 256 for 16-bit and 32-bit libraries. */
6461- #if PCRE2_CODE_UNIT_WIDTH != 8
6462- if (range_start < range_end )
6463- {
6464- * class_uchardata ++ = XCL_RANGE ;
6465- * class_uchardata ++ = range_start ;
6481+ * class_uchardata ++ = range_end ;
6482+ #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
64666483 }
6467- else
6468- * class_uchardata ++ = XCL_SINGLE ;
64696484
6470- * class_uchardata ++ = range_end ;
6471- #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6485+ if ( lengthptr == NULL )
6486+ cb -> cx -> memctl . free ( cranges , cb -> cx -> memctl . memory_data );
64726487 }
6473-
6474- if (lengthptr == NULL )
6475- cb -> cx -> memctl .free (cranges , cb -> cx -> memctl .memory_data );
64766488 }
64776489#endif
64786490
@@ -6502,7 +6514,8 @@ for (;; pptr++)
65026514#ifdef SUPPORT_WIDE_CHARS /* Defined for 16/32 bits, or 8-bit with Unicode */
65036515 if ((xclass_props & XCLASS_REQUIRED ) != 0 )
65046516 {
6505- * class_uchardata ++ = XCL_END ; /* Marks the end of extra data */
6517+ if ((xclass_props & XCLASS_HAS_CHAR_LISTS ) == 0 )
6518+ * class_uchardata ++ = XCL_END ; /* Marks the end of extra data */
65066519 * code ++ = OP_XCLASS ;
65076520 code += LINK_SIZE ;
65086521 * code = negate_class ? XCL_NOT :0 ;
@@ -6526,6 +6539,101 @@ for (;; pptr++)
65266539 }
65276540 else code = class_uchardata ;
65286541
6542+ if ((xclass_props & XCLASS_HAS_CHAR_LISTS ) != 0 )
6543+ {
6544+ /* Char lists size is an even number,
6545+ because all items are 16 or 32 bit values. */
6546+ size_t char_lists_size = cranges -> char_lists_size ;
6547+ PCRE2_ASSERT ((char_lists_size & 0x1 ) == 0 );
6548+
6549+ if (lengthptr != NULL )
6550+ {
6551+ /* At this point, we don't know the precise location
6552+ so the maximum alignment is added to the length. */
6553+ #if PCRE2_CODE_UNIT_WIDTH == 8
6554+ * lengthptr += 2 /* sizeof(type) in PCRE2_UCHARs */ +
6555+ 3 /* maximum alignment. */ ;
6556+ #elif PCRE2_CODE_UNIT_WIDTH == 16
6557+ * lengthptr += 1 /* sizeof(type) in PCRE2_UCHARs */ +
6558+ 1 /* maximum alignment. */ ;
6559+ char_lists_size >>= 1 ;
6560+ #else
6561+ * lengthptr += 1 /* sizeof(type) in PCRE2_UCHARs */ ;
6562+ /* Padding, when the size is not divisible by 4. */
6563+ if ((char_lists_size & 0x2 ) != 0 )
6564+ char_lists_size += 2 ;
6565+ char_lists_size >>= 2 ;
6566+ #endif
6567+
6568+ if (OFLOW_MAX - * lengthptr < char_lists_size )
6569+ {
6570+ * errorcodeptr = ERR20 ; /* Integer overflow */
6571+ return 0 ;
6572+ }
6573+
6574+ * lengthptr += char_lists_size ;
6575+
6576+ if (* lengthptr > MAX_PATTERN_SIZE )
6577+ {
6578+ * errorcodeptr = ERR20 ; /* Pattern is too large */
6579+ return 0 ;
6580+ }
6581+ }
6582+ else
6583+ {
6584+ uint8_t * char_buffer = (uint8_t * )code ;
6585+
6586+ PCRE2_ASSERT (cranges -> char_lists_types <= XCL_TYPE_MASK );
6587+ #if PCRE2_CODE_UNIT_WIDTH == 8
6588+ /* Encode as high / low bytes. */
6589+ code [0 ] = (uint8_t )(XCL_LIST |
6590+ (cranges -> char_lists_types >> 8 ));
6591+ code [1 ] = (uint8_t )cranges -> char_lists_types ;
6592+ char_buffer += 2 ;
6593+
6594+ /* Compute alignment. */
6595+ if (((uintptr_t )char_buffer & 0x1 ) != 0 )
6596+ {
6597+ code [0 ] |= 1u << (XCL_ALIGNMENT_SHIFT - 8 );
6598+ char_buffer += 1 ;
6599+ }
6600+
6601+ if (((uintptr_t )char_buffer & 0x2 ) != (char_lists_size & 0x2 ))
6602+ {
6603+ code [0 ] |= 2u << (XCL_ALIGNMENT_SHIFT - 8 );
6604+ char_buffer += 2 ;
6605+ }
6606+ #elif PCRE2_CODE_UNIT_WIDTH == 16
6607+ code [0 ] = (PCRE2_UCHAR )(XCL_LIST | cranges -> char_lists_types );
6608+ char_buffer += 2 ;
6609+
6610+ /* Compute alignment. */
6611+ if (((uintptr_t )char_buffer & 0x2 ) != (char_lists_size & 0x2 ))
6612+ {
6613+ code [0 ] |= 2u << XCL_ALIGNMENT_SHIFT ;
6614+ char_buffer += 2 ;
6615+ }
6616+ #else
6617+ code [0 ] = (PCRE2_UCHAR )(XCL_LIST | cranges -> char_lists_types );
6618+ char_buffer += 4 ;
6619+
6620+ /* Padding. */
6621+ if ((char_lists_size & 0x2 ) != 0 )
6622+ {
6623+ code [0 ] |= 2u << XCL_ALIGNMENT_SHIFT ;
6624+ char_buffer += 2 ;
6625+ }
6626+ #endif
6627+ memcpy (char_buffer ,
6628+ (uint8_t * )(cranges + 1 ) + cranges -> char_lists_start ,
6629+ char_lists_size );
6630+
6631+ code = (PCRE2_UCHAR * )(char_buffer + char_lists_size );
6632+
6633+ cb -> cx -> memctl .free (cranges , cb -> cx -> memctl .memory_data );
6634+ }
6635+ }
6636+
65296637 /* Now fill in the complete length of the item */
65306638
65316639 PUT (previous , 1 , (int )(code - previous ));
@@ -6549,7 +6657,7 @@ for (;; pptr++)
65496657 if ((SELECT_VALUE8 (!utf , 0 ) || negate_class != should_flip_negation ) &&
65506658 cb -> classbits .classwords [0 ] == ~(uint32_t )0 )
65516659 {
6552- uint32_t * classwords = cb -> classbits .classwords ;
6660+ const uint32_t * classwords = cb -> classbits .classwords ;
65536661 int i ;
65546662
65556663 for (i = 0 ; i < 8 ; i ++ )
@@ -11222,7 +11330,9 @@ version of the pattern, free it before returning. Also free the list of named
1122211330groups if a larger one had to be obtained, and likewise the group information
1122311331vector. */
1122411332
11333+ #ifdef SUPPORT_UNICODE
1122511334PCRE2_ASSERT (cb .cranges == NULL );
11335+ #endif
1122611336
1122711337EXIT :
1122811338#ifdef SUPPORT_VALGRIND
0 commit comments