diff --git a/doc/configuration.rst b/doc/configuration.rst index a8ad5ad2..2fbb8bb1 100644 --- a/doc/configuration.rst +++ b/doc/configuration.rst @@ -682,6 +682,16 @@ it becomes resolvable, and ``f`` will contain "12" and ``r`` will contain ":34 5 This also shows the risk associated, as the result obtained may not necessarily be what was intended. +option.dashIsEmpty +~~~~~~~~~~~~~~~~~~ + +This parameter, if True, permits to treat string consisting only of dash characters ('-') as +being empty. The default value is False. + +This parameter is meant for e.g. processing web log data where a dash indicates a missing +value but the user does not populate an analysis backend with dashes where "empty value" is +meant. + word #### diff --git a/src/parser.c b/src/parser.c index 7343d844..e5ad7b61 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,6 +1,6 @@ /* * liblognorm - a fast samples-based log normalization library - * Copyright 2010-2018 by Rainer Gerhards and Adiscon GmbH. + * Copyright 2010-2021 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * @@ -3204,6 +3204,7 @@ struct data_String { unsigned esc_md : 2; } flags; enum { ST_MATCH_EXACT = 0, ST_MATCH_LAZY = 1} matching; + int dashIsEmpty; char qchar_begin; char qchar_end; char perm_chars[256]; // TODO: make this bit-wise, so we need only 32 bytes @@ -3373,6 +3374,14 @@ PARSER_Parse(String) if(value != NULL) { size_t strt; size_t len; + if(data->dashIsEmpty) { + if( (bHaveQuotes && *parsed == 3 && !strncmp(npb->str+(*offs), "\"-\"", 3)) + || (!bHaveQuotes && *parsed == 1 && npb->str[*offs] == '-') ) { + *value = json_object_new_string_len("", 0); + r = 0; + goto done; /* shortcut exit */ + } + } if(bHaveQuotes && data->flags.strip_quotes) { strt = *offs + 1; len = *parsed - 2; /* del begin AND end quote! */ @@ -3498,6 +3507,8 @@ PARSER_Construct(String) r = LN_BADCONFIG; goto done; } + } else if(!strcasecmp(key, "option.dashIsEmpty")) { + data->dashIsEmpty = json_object_get_boolean(val); } else { ln_errprintf(ctx, 0, "invalid param for hexnumber: %s", json_object_to_json_string(val)); diff --git a/tests/Makefile.am b/tests/Makefile.am index 8e2fdb2b..4f483b93 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -67,6 +67,7 @@ TESTS_SHELLSCRIPTS = \ field_string_perm_chars.sh \ field_string_lazy_matching.sh \ field_string_doc_sample_lazy.sh \ + field_string_dashIsEmpty.sh \ field_number.sh \ field_number-fmt_number.sh \ field_number_maxval.sh \ diff --git a/tests/field_string_dashIsEmpty.sh b/tests/field_string_dashIsEmpty.sh new file mode 100755 index 00000000..0ded596a --- /dev/null +++ b/tests/field_string_dashIsEmpty.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# added 2021-06-07 by Rainer Gerhards +# This file is part of the liblognorm project, released under ASL 2.0 +. $srcdir/exec.sh +no_solaris10 +test_def $0 "quoted string with dash" + +add_rule 'version=2' +add_rule 'rule=:% + {"type":"string", "name":"str", "option.dashIsEmpty":True} + %' + +execute '"-"' +assert_output_json_eq '{ "str": ""}' + +reset_rules +add_rule 'version=2' +add_rule 'rule=:% + {"type":"quoted-string", "name":"str"} + %' + +execute '"-"' +assert_output_json_eq '{ "str": "\"-\""}' + + +cleanup_tmp_files +