3939#include "v1_samp.h"
4040
4141#ifdef FEATURE_REGEXP
42- #include <pcre.h>
42+ #define PCRE2_CODE_UNIT_WIDTH 8
43+ #include <pcre2.h>
4344#include <errno.h>
4445#endif
4546
@@ -1266,7 +1267,7 @@ void* tokenized_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) {
12661267 * significantly slower than other field-types.
12671268 */
12681269struct regex_parser_data_s {
1269- pcre * re ;
1270+ pcre2_code * re ;
12701271 int consume_group ;
12711272 int return_group ;
12721273 int max_groups ;
@@ -1276,40 +1277,54 @@ PARSER(Regex)
12761277 assert (str != NULL);
12771278 assert (offs != NULL );
12781279 assert (parsed != NULL );
1279- unsigned int * ovector = NULL ;
1280+ PCRE2_SIZE * ovector ;
1281+ pcre2_match_data * match_data = NULL ;
12801282
12811283 struct regex_parser_data_s * pData = (struct regex_parser_data_s * ) node -> parser_data ;
12821284 if (pData != NULL ) {
1283- ovector = calloc (pData -> max_groups , sizeof (unsigned int ) * 3 );
1284- if (ovector == NULL ) FAIL (LN_NOMEM );
1285+ match_data = pcre2_match_data_create_from_pattern (pData -> re , NULL );
1286+ if (match_data == NULL ) FAIL (LN_NOMEM );
1287+
1288+ int result = pcre2_match (
1289+ pData -> re , /* the compiled pattern */
1290+ (PCRE2_SPTR )str , /* the subject string */
1291+ (PCRE2_SIZE )strLen , /* the length of the subject */
1292+ (PCRE2_SIZE )* offs , /* start at offset 0 in the subject */
1293+ 0 , /* default options */
1294+ match_data , /* block for storing the result */
1295+ NULL ); /* use default match context */
12851296
1286- int result = pcre_exec (pData -> re , NULL , str , strLen , * offs , 0 , (int * ) ovector , pData -> max_groups * 3 );
12871297 if (result == 0 ) result = pData -> max_groups ;
12881298 if (result > pData -> consume_group ) {
1289- /*please check 'man 3 pcreapi' for cryptic '2 * n' and '2 * n + 1' magic*/
1299+ ovector = pcre2_get_ovector_pointer (match_data );
1300+ printf ("Match succeeded at offset %d\n" , (int )ovector [0 ]);
1301+
1302+ /* please check 'man 3 pcre2api' for cryptic '2 * n' and '2 * n + 1' magic
1303+ * in a nutshell, within the ovector, the first in each pair of values is set to the
1304+ * offset of the first code unit of a substring, and the second is set to the
1305+ * offset of the first code unit after the end of a substring.
1306+ */
12901307 if (ovector [2 * pData -> consume_group ] == * offs ) {
12911308 * parsed = ovector [2 * pData -> consume_group + 1 ] - ovector [2 * pData -> consume_group ];
12921309 if (pData -> consume_group != pData -> return_group ) {
12931310 char * val = NULL ;
12941311 if ((val = strndup (str + ovector [2 * pData -> return_group ],
12951312 ovector [2 * pData -> return_group + 1 ] -
12961313 ovector [2 * pData -> return_group ])) == NULL ) {
1297- free (ovector );
12981314 FAIL (LN_NOMEM );
12991315 }
13001316 * value = json_object_new_string (val );
13011317 free (val );
13021318 if (* value == NULL ) {
1303- free (ovector );
13041319 FAIL (LN_NOMEM );
13051320 }
13061321 }
13071322 }
13081323 }
1309- free (ovector );
13101324 }
13111325 r = 0 ; /* success */
13121326done :
1327+ pcre2_match_data_free (match_data );
13131328 return r ;
13141329}
13151330
@@ -1346,8 +1361,8 @@ void* regex_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) {
13461361 char * name = NULL ;
13471362 struct regex_parser_data_s * pData = NULL ;
13481363 const char * unescaped_exp = NULL ;
1349- const char * error = NULL ;
1350- int erroffset = 0 ;
1364+ PCRE2_SIZE erroffset = 0 ;
1365+ int errcode = 0 ;
13511366
13521367
13531368 CHKN (name = es_str2cstr (node -> name , NULL ));
@@ -1365,7 +1380,7 @@ void* regex_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) {
13651380 if ((grp_parse_err = regex_parser_configure_consume_and_return_group (args , pData )) != NULL )
13661381 FAIL (LN_BADCONFIG );
13671382
1368- CHKN (pData -> re = pcre_compile ( exp , 0 , & error , & erroffset , NULL ));
1383+ CHKN (pData -> re = pcre2_compile (( PCRE2_SPTR ) exp , PCRE2_ZERO_TERMINATED , 0 , & errcode , & erroffset , NULL ));
13691384
13701385 pData -> max_groups = ((pData -> consume_group > pData -> return_group ) ? pData -> consume_group :
13711386 pData -> return_group ) + 1 ;
@@ -1387,9 +1402,12 @@ void* regex_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) {
13871402 ln_dbgprintf (ctx , "couldn't allocate memory for regex-string for field: '%s'" , name );
13881403 else if (grp_parse_err != NULL )
13891404 ln_dbgprintf (ctx , "%s for: '%s'" , grp_parse_err , name );
1390- else if (pData -> re == NULL )
1405+ else if (pData -> re == NULL ) {
1406+ PCRE2_UCHAR errbuffer [256 ];
1407+ pcre2_get_error_message (errcode , errbuffer , sizeof (errbuffer ));
13911408 ln_dbgprintf (ctx , "couldn't compile regex(encountered error '%s' at char '%d' in pattern) "
1392- "for regex-matched field: '%s'" , error , erroffset , name );
1409+ "for regex-matched field: '%s'" , errbuffer , (int )erroffset , name );
1410+ }
13931411 regex_parser_data_destructor ((void * * )& pData );
13941412 }
13951413 if (exp != NULL ) free (exp );
@@ -1401,7 +1419,7 @@ void* regex_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) {
14011419void regex_parser_data_destructor (void * * dataPtr ) {
14021420 if ((* dataPtr ) != NULL ) {
14031421 struct regex_parser_data_s * pData = (struct regex_parser_data_s * ) * dataPtr ;
1404- if (pData -> re != NULL ) pcre_free (pData -> re );
1422+ if (pData -> re != NULL ) pcre2_code_free (pData -> re );
14051423 free (pData );
14061424 * dataPtr = NULL ;
14071425 }
0 commit comments