Skip to content

Commit d59672a

Browse files
authored
Address three minor TODO comments in the test code (#816)
1 parent 0288afe commit d59672a

File tree

8 files changed

+263
-114
lines changed

8 files changed

+263
-114
lines changed

maint/GenerateUcd.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -695,7 +695,6 @@ def write_bitsets(list, item_size):
695695
found = 1
696696

697697
# Add new characters to an existing set
698-
# TODO: make sure the data doesn't overflow a list[]
699698

700699
if found:
701700
found = 0

src/pcre2_convert.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,12 +1119,16 @@ pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE plength, uint32_t options,
11191119
pcre2_convert_context *ccontext)
11201120
{
11211121
int rc;
1122+
PCRE2_UCHAR null_str[1] = { 0xcd };
11221123
PCRE2_UCHAR dummy_buffer[DUMMY_BUFFER_SIZE];
11231124
PCRE2_UCHAR *use_buffer = dummy_buffer;
11241125
PCRE2_SIZE use_length = DUMMY_BUFFER_SIZE;
11251126
BOOL utf = (options & PCRE2_CONVERT_UTF) != 0;
11261127
uint32_t pattype = options & TYPE_OPTIONS;
11271128

1129+
if (pattern == NULL && plength == 0)
1130+
pattern = null_str;
1131+
11281132
if (pattern == NULL || bufflenptr == NULL)
11291133
{
11301134
if (bufflenptr != NULL) *bufflenptr = 0; /* Error offset */

src/pcre2test.c

Lines changed: 50 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,8 @@ claim to be C99 don't support it (hence DISABLE_PERCENT_ZT). */
215215
#endif
216216

217217
#define CFORE_UNSET UINT32_MAX /* Unset value for startend/cfail/cerror fields */
218-
#define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type field */
218+
#define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type/convert_length fields */
219+
#define MOD_STR_UNSET UINT8_MAX /* Sentinel length for unset string options */
219220
#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
220221
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
221222
#define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
@@ -518,7 +519,7 @@ enum { MOD_CTC, /* Applies to a compile context */
518519
MOD_OPT, /* Is an option bit */
519520
MOD_OPTMZ, /* Is an optimization directive */
520521
MOD_SIZ, /* Is a PCRE2_SIZE value */
521-
MOD_STR }; /* Is a string */
522+
MOD_STR }; /* Is a string; Pascal-encoded with length in first byte */
522523

523524
/* Control bits. Some apply to compiling, some to matching, but some can be set
524525
either on a pattern or a data line, so they must all be distinct. There are now
@@ -617,15 +618,15 @@ different things in the two cases. */
617618

618619
/* Structures for holding modifier information for patterns and subject strings
619620
(data). Fields containing modifiers that can be set either for a pattern or a
620-
subject must be at the start and in the same order in both cases so that the
621-
same offset in the big table below works for both. */
621+
subject (MOD_PD[P]/MOD_PND) must be at the start and in the same order in both
622+
structures so that the same offset in the big table below works for both. */
622623

623624
typedef struct patctl { /* Structure for pattern modifiers. */
624625
uint32_t options; /* Must be in same position as datctl */
625626
uint32_t control; /* Must be in same position as datctl */
626627
uint32_t control2; /* Must be in same position as datctl */
627628
uint32_t jitstack; /* Must be in same position as datctl */
628-
uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
629+
uint8_t replacement[1+REPLACE_MODSIZE]; /* So must this */
629630
uint32_t substitute_skip; /* Must be in same position as datctl */
630631
uint32_t substitute_stop; /* Must be in same position as datctl */
631632
uint32_t jit;
@@ -636,7 +637,7 @@ typedef struct patctl { /* Structure for pattern modifiers. */
636637
uint32_t convert_glob_escape;
637638
uint32_t convert_glob_separator;
638639
int32_t regerror_buffsize;
639-
uint8_t locale[LOCALESIZE];
640+
uint8_t locale[1+LOCALESIZE];
640641
} patctl;
641642

642643
#define MAXCPYGET 10
@@ -647,10 +648,10 @@ typedef struct datctl { /* Structure for data line modifiers. */
647648
uint32_t control; /* Must be in same position as patctl */
648649
uint32_t control2; /* Must be in same position as patctl */
649650
uint32_t jitstack; /* Must be in same position as patctl */
650-
uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
651+
uint8_t replacement[1+REPLACE_MODSIZE]; /* So must this */
651652
uint32_t substitute_skip; /* Must be in same position as patctl */
652653
uint32_t substitute_stop; /* Must be in same position as patctl */
653-
uint8_t substitute_subject[SUBSTITUTE_SUBJECT_MODSIZE];
654+
uint8_t substitute_subject[1+SUBSTITUTE_SUBJECT_MODSIZE];
654655
uint32_t startend[2];
655656
uint32_t cerror[2];
656657
uint32_t cfail[2];
@@ -663,6 +664,31 @@ typedef struct datctl { /* Structure for data line modifiers. */
663664
uint8_t get_names[LENCPYGET];
664665
} datctl;
665666

667+
/* Helper functions to zero out the structures. */
668+
669+
static void patctl_zero(patctl *p)
670+
{
671+
memset(p, 0, sizeof(patctl));
672+
p->replacement[0] = MOD_STR_UNSET;
673+
p->convert_type = CONVERT_UNSET;
674+
p->convert_length = CONVERT_UNSET;
675+
p->regerror_buffsize = -1;
676+
p->locale[0] = MOD_STR_UNSET;
677+
}
678+
679+
static void datctl_zero(datctl *d)
680+
{
681+
memset(d, 0, sizeof(datctl));
682+
d->replacement[0] = MOD_STR_UNSET;
683+
d->substitute_subject[0] = MOD_STR_UNSET;
684+
d->oveccount = DEFAULT_OVECCOUNT;
685+
d->copy_numbers[0] = -1;
686+
d->get_numbers[0] = -1;
687+
d->startend[0] = d->startend[1] = CFORE_UNSET;
688+
d->cerror[0] = d->cerror[1] = CFORE_UNSET;
689+
d->cfail[0] = d->cfail[1] = CFORE_UNSET;
690+
}
691+
666692
/* Ids for which context to modify. */
667693

668694
enum { CTX_PAT, /* Active pattern context */
@@ -676,9 +702,18 @@ enum { CTX_PAT, /* Active pattern context */
676702
#define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
677703
#define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
678704
#define PO(name) offsetof(patctl, name)
679-
#define PD(name) PO(name)
680705
#define DO(name) offsetof(datctl, name)
681706

707+
/* Validate that the offsets for the shared fields do indeed match. */
708+
709+
STATIC_ASSERT(PO(options) == DO(options), options_mismatch);
710+
STATIC_ASSERT(PO(control) == DO(control), control_mismatch);
711+
STATIC_ASSERT(PO(control2) == DO(control2), control2_mismatch);
712+
STATIC_ASSERT(PO(jitstack) == DO(jitstack), jitstack_mismatch);
713+
STATIC_ASSERT(PO(replacement) == DO(replacement), replacement_mismatch);
714+
STATIC_ASSERT(PO(substitute_skip) == DO(substitute_skip), substitute_skip_mismatch);
715+
STATIC_ASSERT(PO(substitute_stop) == DO(substitute_stop), substitute_stop_mismatch);
716+
682717
/* Table of all long-form modifiers. Must be in collating sequence of modifier
683718
name because it is searched by binary chop. */
684719

@@ -707,7 +742,7 @@ static modstruct modlist[] = {
707742
{ "alt_extended_class", MOD_PAT, MOD_OPT, PCRE2_ALT_EXTENDED_CLASS, PO(options) },
708743
{ "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) },
709744
{ "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) },
710-
{ "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
745+
{ "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PO(options) },
711746
{ "ascii_all", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_ALL, CO(extra_options) },
712747
{ "ascii_bsd", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSD, CO(extra_options) },
713748
{ "ascii_bss", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ASCII_BSS, CO(extra_options) },
@@ -747,7 +782,7 @@ static modstruct modlist[] = {
747782
{ "dotstar_anchor", MOD_CTC, MOD_OPTMZ, PCRE2_DOTSTAR_ANCHOR, 0 },
748783
{ "dotstar_anchor_off", MOD_CTC, MOD_OPTMZ, PCRE2_DOTSTAR_ANCHOR_OFF, 0 },
749784
{ "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) },
750-
{ "endanchored", MOD_PD, MOD_OPT, PCRE2_ENDANCHORED, PD(options) },
785+
{ "endanchored", MOD_PD, MOD_OPT, PCRE2_ENDANCHORED, PO(options) },
751786
{ "escaped_cr_is_lf", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ESCAPED_CR_IS_LF, CO(extra_options) },
752787
{ "expand", MOD_PAT, MOD_CTL, CTL_EXPAND, PO(control) },
753788
{ "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) },
@@ -780,7 +815,7 @@ static modstruct modlist[] = {
780815
{ "max_pattern_compiled_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_compiled_length) },
781816
{ "max_pattern_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_length) },
782817
{ "max_varlookbehind", MOD_CTC, MOD_INT, 0, CO(max_varlookbehind) },
783-
{ "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) },
818+
{ "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PO(control) },
784819
{ "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) },
785820
{ "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) },
786821
{ "never_callout", MOD_CTC, MOD_OPT, PCRE2_EXTRA_NEVER_CALLOUT, CO(extra_options) },
@@ -793,7 +828,7 @@ static modstruct modlist[] = {
793828
{ "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) },
794829
{ "no_jit", MOD_DATP, MOD_OPT, PCRE2_NO_JIT, DO(options) },
795830
{ "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) },
796-
{ "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) },
831+
{ "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PO(options) },
797832
{ "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) },
798833
{ "notempty", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY, DO(options) },
799834
{ "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) },
@@ -3617,24 +3652,6 @@ char *arg_subject = NULL;
36173652
char *arg_pattern = NULL;
36183653
char *arg_error = NULL;
36193654

3620-
/* The offsets to the options and control bits fields of the pattern and data
3621-
control blocks must be the same so that common options and controls such as
3622-
"anchored" or "memory" can work for either of them from a single table entry.
3623-
We cannot test this till runtime because "offsetof" does not work in the
3624-
preprocessor. */
3625-
3626-
// TODO This comment above is not correct: we can test it at compile time,
3627-
// although it is true that it's not possible using the preprocessor. Use our
3628-
// new STATIC_ASSERT macro.
3629-
3630-
if (PO(options) != DO(options) || PO(control) != DO(control) ||
3631-
PO(control2) != DO(control2))
3632-
{
3633-
cfprintf(clr_test_error, stderr, "** Coding error: "
3634-
"options and control offsets for pattern and data must be the same.\n");
3635-
return 1;
3636-
}
3637-
36383655
/* Get buffers from malloc() so that valgrind will check their misuse when
36393656
debugging. They grow automatically when very long lines are read. The 16-
36403657
and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */
@@ -3655,17 +3672,8 @@ _setmode( _fileno( stdout ), _O_BINARY );
36553672

36563673
locale_name[0] = 0;
36573674

3658-
memset(&def_patctl, 0, sizeof(patctl));
3659-
def_patctl.convert_type = CONVERT_UNSET;
3660-
def_patctl.regerror_buffsize = -1;
3661-
3662-
memset(&def_datctl, 0, sizeof(datctl));
3663-
def_datctl.oveccount = DEFAULT_OVECCOUNT;
3664-
def_datctl.copy_numbers[0] = -1;
3665-
def_datctl.get_numbers[0] = -1;
3666-
def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET;
3667-
def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
3668-
def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;
3675+
patctl_zero(&def_patctl);
3676+
datctl_zero(&def_datctl);
36693677

36703678
/* Scan command line options. */
36713679

0 commit comments

Comments
 (0)