@@ -215,7 +215,8 @@ claim to be C99 don't support it (hence DISABLE_PERCENT_ZT). */
215215#endif
216216
217217#define CFORE_UNSET UINT32_MAX /* Unset value for startend/cfail/cerror fields */
218- #define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type field */
218+ #define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type/convert_length fields */
219+ #define MOD_STR_UNSET UINT8_MAX /* Sentinel length for unset string options */
219220#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
220221#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
221222#define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
@@ -518,7 +519,7 @@ enum { MOD_CTC, /* Applies to a compile context */
518519 MOD_OPT , /* Is an option bit */
519520 MOD_OPTMZ , /* Is an optimization directive */
520521 MOD_SIZ , /* Is a PCRE2_SIZE value */
521- MOD_STR }; /* Is a string */
522+ MOD_STR }; /* Is a string; Pascal-encoded with length in first byte */
522523
523524/* Control bits. Some apply to compiling, some to matching, but some can be set
524525either on a pattern or a data line, so they must all be distinct. There are now
@@ -617,15 +618,15 @@ different things in the two cases. */
617618
618619/* Structures for holding modifier information for patterns and subject strings
619620(data). Fields containing modifiers that can be set either for a pattern or a
620- subject must be at the start and in the same order in both cases so that the
621- same offset in the big table below works for both. */
621+ subject (MOD_PD[P]/MOD_PND) must be at the start and in the same order in both
622+ structures so that the same offset in the big table below works for both. */
622623
623624typedef struct patctl { /* Structure for pattern modifiers. */
624625 uint32_t options ; /* Must be in same position as datctl */
625626 uint32_t control ; /* Must be in same position as datctl */
626627 uint32_t control2 ; /* Must be in same position as datctl */
627628 uint32_t jitstack ; /* Must be in same position as datctl */
628- uint8_t replacement [REPLACE_MODSIZE ]; /* So must this */
629+ uint8_t replacement [1 + REPLACE_MODSIZE ]; /* So must this */
629630 uint32_t substitute_skip ; /* Must be in same position as datctl */
630631 uint32_t substitute_stop ; /* Must be in same position as datctl */
631632 uint32_t jit ;
@@ -636,7 +637,7 @@ typedef struct patctl { /* Structure for pattern modifiers. */
636637 uint32_t convert_glob_escape ;
637638 uint32_t convert_glob_separator ;
638639 int32_t regerror_buffsize ;
639- uint8_t locale [LOCALESIZE ];
640+ uint8_t locale [1 + LOCALESIZE ];
640641} patctl ;
641642
642643#define MAXCPYGET 10
@@ -647,10 +648,10 @@ typedef struct datctl { /* Structure for data line modifiers. */
647648 uint32_t control ; /* Must be in same position as patctl */
648649 uint32_t control2 ; /* Must be in same position as patctl */
649650 uint32_t jitstack ; /* Must be in same position as patctl */
650- uint8_t replacement [REPLACE_MODSIZE ]; /* So must this */
651+ uint8_t replacement [1 + REPLACE_MODSIZE ]; /* So must this */
651652 uint32_t substitute_skip ; /* Must be in same position as patctl */
652653 uint32_t substitute_stop ; /* Must be in same position as patctl */
653- uint8_t substitute_subject [SUBSTITUTE_SUBJECT_MODSIZE ];
654+ uint8_t substitute_subject [1 + SUBSTITUTE_SUBJECT_MODSIZE ];
654655 uint32_t startend [2 ];
655656 uint32_t cerror [2 ];
656657 uint32_t cfail [2 ];
@@ -663,6 +664,31 @@ typedef struct datctl { /* Structure for data line modifiers. */
663664 uint8_t get_names [LENCPYGET ];
664665} datctl ;
665666
667+ /* Helper functions to zero out the structures. */
668+
669+ static void patctl_zero (patctl * p )
670+ {
671+ memset (p , 0 , sizeof (patctl ));
672+ p -> replacement [0 ] = MOD_STR_UNSET ;
673+ p -> convert_type = CONVERT_UNSET ;
674+ p -> convert_length = CONVERT_UNSET ;
675+ p -> regerror_buffsize = -1 ;
676+ p -> locale [0 ] = MOD_STR_UNSET ;
677+ }
678+
679+ static void datctl_zero (datctl * d )
680+ {
681+ memset (d , 0 , sizeof (datctl ));
682+ d -> replacement [0 ] = MOD_STR_UNSET ;
683+ d -> substitute_subject [0 ] = MOD_STR_UNSET ;
684+ d -> oveccount = DEFAULT_OVECCOUNT ;
685+ d -> copy_numbers [0 ] = -1 ;
686+ d -> get_numbers [0 ] = -1 ;
687+ d -> startend [0 ] = d -> startend [1 ] = CFORE_UNSET ;
688+ d -> cerror [0 ] = d -> cerror [1 ] = CFORE_UNSET ;
689+ d -> cfail [0 ] = d -> cfail [1 ] = CFORE_UNSET ;
690+ }
691+
666692/* Ids for which context to modify. */
667693
668694enum { CTX_PAT , /* Active pattern context */
@@ -676,9 +702,18 @@ enum { CTX_PAT, /* Active pattern context */
676702#define CO (name ) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
677703#define MO (name ) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
678704#define PO (name ) offsetof(patctl, name)
679- #define PD (name ) PO(name)
680705#define DO (name ) offsetof(datctl, name)
681706
707+ /* Validate that the offsets for the shared fields do indeed match. */
708+
709+ STATIC_ASSERT (PO (options ) == DO (options ), options_mismatch );
710+ STATIC_ASSERT (PO (control ) == DO (control ), control_mismatch );
711+ STATIC_ASSERT (PO (control2 ) == DO (control2 ), control2_mismatch );
712+ STATIC_ASSERT (PO (jitstack ) == DO (jitstack ), jitstack_mismatch );
713+ STATIC_ASSERT (PO (replacement ) == DO (replacement ), replacement_mismatch );
714+ STATIC_ASSERT (PO (substitute_skip ) == DO (substitute_skip ), substitute_skip_mismatch );
715+ STATIC_ASSERT (PO (substitute_stop ) == DO (substitute_stop ), substitute_stop_mismatch );
716+
682717/* Table of all long-form modifiers. Must be in collating sequence of modifier
683718name because it is searched by binary chop. */
684719
@@ -707,7 +742,7 @@ static modstruct modlist[] = {
707742 { "alt_extended_class" , MOD_PAT , MOD_OPT , PCRE2_ALT_EXTENDED_CLASS , PO (options ) },
708743 { "alt_verbnames" , MOD_PAT , MOD_OPT , PCRE2_ALT_VERBNAMES , PO (options ) },
709744 { "altglobal" , MOD_PND , MOD_CTL , CTL_ALTGLOBAL , PO (control ) },
710- { "anchored" , MOD_PD , MOD_OPT , PCRE2_ANCHORED , PD (options ) },
745+ { "anchored" , MOD_PD , MOD_OPT , PCRE2_ANCHORED , PO (options ) },
711746 { "ascii_all" , MOD_CTC , MOD_OPT , PCRE2_EXTRA_ASCII_ALL , CO (extra_options ) },
712747 { "ascii_bsd" , MOD_CTC , MOD_OPT , PCRE2_EXTRA_ASCII_BSD , CO (extra_options ) },
713748 { "ascii_bss" , MOD_CTC , MOD_OPT , PCRE2_EXTRA_ASCII_BSS , CO (extra_options ) },
@@ -747,7 +782,7 @@ static modstruct modlist[] = {
747782 { "dotstar_anchor" , MOD_CTC , MOD_OPTMZ , PCRE2_DOTSTAR_ANCHOR , 0 },
748783 { "dotstar_anchor_off" , MOD_CTC , MOD_OPTMZ , PCRE2_DOTSTAR_ANCHOR_OFF , 0 },
749784 { "dupnames" , MOD_PATP , MOD_OPT , PCRE2_DUPNAMES , PO (options ) },
750- { "endanchored" , MOD_PD , MOD_OPT , PCRE2_ENDANCHORED , PD (options ) },
785+ { "endanchored" , MOD_PD , MOD_OPT , PCRE2_ENDANCHORED , PO (options ) },
751786 { "escaped_cr_is_lf" , MOD_CTC , MOD_OPT , PCRE2_EXTRA_ESCAPED_CR_IS_LF , CO (extra_options ) },
752787 { "expand" , MOD_PAT , MOD_CTL , CTL_EXPAND , PO (control ) },
753788 { "extended" , MOD_PATP , MOD_OPT , PCRE2_EXTENDED , PO (options ) },
@@ -780,7 +815,7 @@ static modstruct modlist[] = {
780815 { "max_pattern_compiled_length" , MOD_CTC , MOD_SIZ , 0 , CO (max_pattern_compiled_length ) },
781816 { "max_pattern_length" , MOD_CTC , MOD_SIZ , 0 , CO (max_pattern_length ) },
782817 { "max_varlookbehind" , MOD_CTC , MOD_INT , 0 , CO (max_varlookbehind ) },
783- { "memory" , MOD_PD , MOD_CTL , CTL_MEMORY , PD (control ) },
818+ { "memory" , MOD_PD , MOD_CTL , CTL_MEMORY , PO (control ) },
784819 { "multiline" , MOD_PATP , MOD_OPT , PCRE2_MULTILINE , PO (options ) },
785820 { "never_backslash_c" , MOD_PAT , MOD_OPT , PCRE2_NEVER_BACKSLASH_C , PO (options ) },
786821 { "never_callout" , MOD_CTC , MOD_OPT , PCRE2_EXTRA_NEVER_CALLOUT , CO (extra_options ) },
@@ -793,7 +828,7 @@ static modstruct modlist[] = {
793828 { "no_dotstar_anchor" , MOD_PAT , MOD_OPT , PCRE2_NO_DOTSTAR_ANCHOR , PO (options ) },
794829 { "no_jit" , MOD_DATP , MOD_OPT , PCRE2_NO_JIT , DO (options ) },
795830 { "no_start_optimize" , MOD_PATP , MOD_OPT , PCRE2_NO_START_OPTIMIZE , PO (options ) },
796- { "no_utf_check" , MOD_PD , MOD_OPT , PCRE2_NO_UTF_CHECK , PD (options ) },
831+ { "no_utf_check" , MOD_PD , MOD_OPT , PCRE2_NO_UTF_CHECK , PO (options ) },
797832 { "notbol" , MOD_DAT , MOD_OPT , PCRE2_NOTBOL , DO (options ) },
798833 { "notempty" , MOD_DAT , MOD_OPT , PCRE2_NOTEMPTY , DO (options ) },
799834 { "notempty_atstart" , MOD_DAT , MOD_OPT , PCRE2_NOTEMPTY_ATSTART , DO (options ) },
@@ -3617,24 +3652,6 @@ char *arg_subject = NULL;
36173652char * arg_pattern = NULL ;
36183653char * arg_error = NULL ;
36193654
3620- /* The offsets to the options and control bits fields of the pattern and data
3621- control blocks must be the same so that common options and controls such as
3622- "anchored" or "memory" can work for either of them from a single table entry.
3623- We cannot test this till runtime because "offsetof" does not work in the
3624- preprocessor. */
3625-
3626- // TODO This comment above is not correct: we can test it at compile time,
3627- // although it is true that it's not possible using the preprocessor. Use our
3628- // new STATIC_ASSERT macro.
3629-
3630- if (PO (options ) != DO (options ) || PO (control ) != DO (control ) ||
3631- PO (control2 ) != DO (control2 ))
3632- {
3633- cfprintf (clr_test_error , stderr , "** Coding error: "
3634- "options and control offsets for pattern and data must be the same.\n" );
3635- return 1 ;
3636- }
3637-
36383655/* Get buffers from malloc() so that valgrind will check their misuse when
36393656debugging. They grow automatically when very long lines are read. The 16-
36403657and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */
@@ -3655,17 +3672,8 @@ _setmode( _fileno( stdout ), _O_BINARY );
36553672
36563673locale_name [0 ] = 0 ;
36573674
3658- memset (& def_patctl , 0 , sizeof (patctl ));
3659- def_patctl .convert_type = CONVERT_UNSET ;
3660- def_patctl .regerror_buffsize = -1 ;
3661-
3662- memset (& def_datctl , 0 , sizeof (datctl ));
3663- def_datctl .oveccount = DEFAULT_OVECCOUNT ;
3664- def_datctl .copy_numbers [0 ] = -1 ;
3665- def_datctl .get_numbers [0 ] = -1 ;
3666- def_datctl .startend [0 ] = def_datctl .startend [1 ] = CFORE_UNSET ;
3667- def_datctl .cerror [0 ] = def_datctl .cerror [1 ] = CFORE_UNSET ;
3668- def_datctl .cfail [0 ] = def_datctl .cfail [1 ] = CFORE_UNSET ;
3675+ patctl_zero (& def_patctl );
3676+ datctl_zero (& def_datctl );
36693677
36703678/* Scan command line options. */
36713679
0 commit comments