Skip to content

Commit 3d0260d

Browse files
Jérémie Jourdinfrikilax
authored andcommitted
Add support for 'xml' keyword in liblognorm.
1 parent 1e18f60 commit 3d0260d

File tree

9 files changed

+218
-3
lines changed

9 files changed

+218
-3
lines changed

configure.ac

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,28 @@ else
8888
fi
8989
AC_SUBST(FEATURE_REGEXP)
9090

91+
# XML parsing
92+
AC_ARG_ENABLE(xml,
93+
[AS_HELP_STRING([--enable-xml],[Enable XML parsing @<:@default=no@:>@])],
94+
[case "${enableval}" in
95+
yes) enable_xml="yes" ;;
96+
no) enable_xml="no" ;;
97+
*) AC_MSG_ERROR(bad value ${enableval} for --enable-xml) ;;
98+
esac],
99+
[enable_xml="no"]
100+
)
101+
AM_CONDITIONAL(ENABLE_XML, test x$enable_xml = xyes)
102+
if test "$enable_xml" = "yes"; then
103+
PKG_CHECK_MODULES(LIBXML2, libxml2,,
104+
[PKG_CHECK_MODULES(LIBXML2, libxml-2.0,,)]
105+
)
106+
AC_DEFINE(FEATURE_XML, 1, [XML parsing support enabled.])
107+
FEATURE_XML=1
108+
else
109+
FEATURE_XML=0
110+
fi
111+
AC_SUBST(FEATURE_XML)
112+
91113
# debug mode settings
92114
AC_ARG_ENABLE(debug,
93115
[AS_HELP_STRING([--enable-debug],[Enable debug mode @<:@default=no@:>@])],
@@ -189,6 +211,7 @@ echo "*****************************************************"
189211
echo "liblognorm will be compiled with the following settings:"
190212
echo
191213
echo "Regex enabled: $enable_regexp"
214+
echo "XML enabled: $enable_xml"
192215
echo "Advanced Statistics enabled: $enable_advstats"
193216
echo "Testbench enabled: $enable_testbench"
194217
echo "Valgrind enabled: $enable_valgrind"

src/Makefile.am

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ liblognorm_la_SOURCES += \
4242
v1_ptree.c \
4343
v1_samp.c
4444

45-
liblognorm_la_CPPFLAGS = $(JSON_C_CFLAGS) $(WARN_CFLAGS) $(LIBESTR_CFLAGS) $(PCRE_CFLAGS)
46-
liblognorm_la_LIBADD = $(rt_libs) $(JSON_C_LIBS) $(LIBESTR_LIBS) $(PCRE_LIBS) -lestr
45+
liblognorm_la_CPPFLAGS = $(JSON_C_CFLAGS) $(WARN_CFLAGS) $(LIBESTR_CFLAGS) $(PCRE_CFLAGS) $(LIBXML2_CFLAGS)
46+
liblognorm_la_LIBADD = $(rt_libs) $(JSON_C_LIBS) $(LIBESTR_LIBS) $(PCRE_LIBS) $(LIBXML2_LIBS) -lestr
4747
# info on version-info:
4848
# http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html
4949
# Note: v2 now starts at version 5, as v1 previously also had 4

src/parser.c

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@
4747
#include <errno.h>
4848
#endif
4949

50+
#ifdef FEATURE_XML
51+
#include <libxml/xmlmemory.h>
52+
#include <libxml/parser.h>
53+
#endif
54+
5055

5156
/* how should output values be formatted? */
5257
enum FMT_MODE {
@@ -75,6 +80,41 @@ hParseInt(const unsigned char **buf, size_t *lenBuf)
7580
return i;
7681
}
7782

83+
84+
#ifdef FEATURE_XML
85+
/* Credits to https://github.com/katie-snow/xml2json-c
86+
This code is under GPL-3.0 License
87+
*/
88+
static inline void
89+
xml2jsonc_convert_elements(xmlNode *anode, json_object *jobj)
90+
{
91+
xmlNode *cur_node = NULL;
92+
json_object *cur_jobj = NULL;
93+
json_object *cur_jstr = NULL;
94+
95+
for (cur_node = anode; cur_node; cur_node = cur_node->next)
96+
{
97+
if (cur_node->type == XML_ELEMENT_NODE)
98+
{
99+
if (xmlChildElementCount(cur_node) == 0)
100+
{
101+
/* JSON string object */
102+
cur_jobj = json_object_new_object();
103+
cur_jstr = json_object_new_string((const char *)xmlNodeGetContent(cur_node));
104+
json_object_object_add(jobj, (const char *)cur_node->name, cur_jstr);
105+
}
106+
else
107+
{
108+
/* JSON object */
109+
cur_jobj = json_object_new_object();
110+
json_object_object_add(jobj, (const char *)cur_node->name, json_object_get(cur_jobj));
111+
}
112+
}
113+
xml2jsonc_convert_elements(cur_node->children, cur_jobj);
114+
}
115+
}
116+
#endif /* #ifdef FEATURE_XML */
117+
78118
/* parser _parse interface
79119
*
80120
* All parsers receive
@@ -2325,6 +2365,71 @@ PARSER_Parse(v2IPTables)
23252365
return r;
23262366
}
23272367

2368+
#ifdef FEATURE_XML
2369+
/**
2370+
* Parse XML. This parser tries to find XML data inside a message.
2371+
* If it finds valid XML, it will extract it.
2372+
*
2373+
* Note: The XML Parser expects a string that begins with '<' and
2374+
* ends with '>'. whitespace or any other character at the
2375+
* beginning or at the end of the string will cause a parse failure
2376+
*
2377+
* Note: Is there is extra content after the XML content
2378+
* the parser will fail. A hack consist of finding the
2379+
* last '>' in the string and ignore the rest.
2380+
*
2381+
* added 2021-02-01 by [email protected]
2382+
*/
2383+
PARSER_Parse(XML)
2384+
xmlDocPtr doc = NULL;
2385+
xmlNodePtr root_element = NULL;
2386+
2387+
/* Find the last occurence of '>' in the string */
2388+
char * pch;
2389+
pch=strrchr((const char *) npb->str + *offs, '>');
2390+
2391+
/* Truncate the string after the last occurence of '>' */
2392+
int newLen = pch - (npb->str + *offs) + 1;
2393+
char *cstr = strndup(npb->str + *offs, newLen);
2394+
CHKN(cstr);
2395+
2396+
doc=xmlParseDoc((xmlChar*) cstr);
2397+
free(cstr);
2398+
2399+
/* Invalid XML string */
2400+
if (doc == NULL) {
2401+
goto done;
2402+
}
2403+
2404+
/* Now convert XML document into JSON document */
2405+
root_element = xmlDocGetRootElement(doc);
2406+
json_object *json = NULL;
2407+
json = json_object_new_object();
2408+
xml2jsonc_convert_elements(root_element, json);
2409+
2410+
if(json == NULL)
2411+
goto done;
2412+
2413+
/* parsing OK */
2414+
*parsed = newLen ;
2415+
r = 0;
2416+
2417+
if(value == NULL) {
2418+
json_object_put(json);
2419+
} else {
2420+
*value = json;
2421+
}
2422+
2423+
done:
2424+
if(doc != NULL)
2425+
xmlFreeDoc(doc);
2426+
xmlCleanupParser();
2427+
return r;
2428+
}
2429+
#endif /* #ifdef FEATURE_XML */
2430+
2431+
2432+
23282433
/**
23292434
* Parse JSON. This parser tries to find JSON data inside a message.
23302435
* If it finds valid JSON, it will extract it. Extra data after the

src/parser.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@ PARSERDEF_NO_DATA(MAC48);
7878
PARSERDEF_NO_DATA(CEF);
7979
PARSERDEF(CheckpointLEA);
8080
PARSERDEF(NameValue);
81+
#ifdef FEATURE_XML
82+
PARSERDEF_NO_DATA(XML);
83+
#endif
8184

8285
#undef PARSERDEF_NO_DATA
8386

src/pdag.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,10 @@ static struct ln_parser_info parser_lookup_table[] = {
9999
PARSER_ENTRY("string-to", StringTo, 32),
100100
PARSER_ENTRY("char-to", CharTo, 32),
101101
PARSER_ENTRY("char-sep", CharSeparated, 32),
102-
PARSER_ENTRY("string", String, 32)
102+
PARSER_ENTRY("string", String, 32),
103+
#ifdef FEATURE_XML
104+
PARSER_ENTRY_NO_DATA("xml", XML, 4),
105+
#endif
103106
};
104107
#define NPARSERS (sizeof(parser_lookup_table)/sizeof(struct ln_parser_info))
105108
#define DFLT_USR_PARSER_PRIO 30000 /**< default priority if user has not specified it */

src/v1_parser.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ hParseInt(const unsigned char **buf, size_t *lenBuf)
6363
return i;
6464
}
6565

66+
6667
/* parsers for the primitive types
6768
*
6869
* All parsers receive

tests/Makefile.am

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,10 @@ REGEXP_TESTS = \
158158
field_tokenized_with_regex.sh \
159159
field_regex_while_regex_support_is_disabled.sh
160160

161+
XML_TESTS = \
162+
field_xml.sh \
163+
field_xml_jsoncnf.sh
164+
161165
EXTRA_DIST = exec.sh \
162166
$(TESTS_SHELLSCRIPTS) \
163167
$(REGEXP_TESTS) \
@@ -167,3 +171,7 @@ EXTRA_DIST = exec.sh \
167171
if ENABLE_REGEXP
168172
TESTS += $(REGEXP_TESTS)
169173
endif
174+
175+
if ENABLE_XML
176+
TESTS += $(XML_TESTS)
177+
endif

tests/field_xml.sh

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/bin/bash
2+
# added 2021-11-14 by Theo Bertin
3+
# This file is part of the liblognorm project, released under ASL 2.0
4+
. $srcdir/exec.sh
5+
6+
test_def $0 "XML field"
7+
add_rule 'version=2'
8+
add_rule 'rule=:%field:xml%'
9+
10+
execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note</note>'
11+
assert_output_json_eq '{ "field": { "note": "This is a simple note"} }'
12+
13+
execute '<?xml version="1.0" encoding="UTF-8"?><note><one>first note</one><two>second note</two></note>'
14+
assert_output_json_eq '{ "field": { "note": { "one": "first note", "two": "second note" } } }'
15+
16+
# execute '@cee: {"f1": "1", "f2": 2}'
17+
# assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }'
18+
19+
# execute '@cee: {"f1": "1", "f2": 2}'
20+
# assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }'
21+
22+
#
23+
# Things that MUST NOT work
24+
#
25+
execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note</note> ' # note the trailing space
26+
assert_output_json_eq '{ "originalmsg": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note<\/note> ", "unparsed-data": " " }'
27+
28+
execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note'
29+
assert_output_json_eq '{ "originalmsg": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note", "unparsed-data": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note" }'
30+
31+
execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note</note2>'
32+
assert_output_json_eq '{ "originalmsg": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note</note2>", "unparsed-data": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note</note2>" }'
33+
34+
35+
cleanup_tmp_files
36+

tests/field_xml_jsoncnf.sh

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/bin/bash
2+
# added 2021-11-14 by Theo Bertin
3+
# This file is part of the liblognorm project, released under ASL 2.0
4+
. $srcdir/exec.sh
5+
6+
test_def $0 "XML field"
7+
add_rule 'version=2'
8+
add_rule 'rule=:%{"name":"field", "type":"xml"}%'
9+
10+
execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note</note>'
11+
assert_output_json_eq '{ "field": { "note": "This is a simple note"} }'
12+
13+
execute '<?xml version="1.0" encoding="UTF-8"?><note><one>first note</one><two>second note</two></note>'
14+
assert_output_json_eq '{ "field": { "note": { "one": "first note", "two": "second note" } } }'
15+
16+
# execute '@cee: {"f1": "1", "f2": 2}'
17+
# assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }'
18+
19+
# execute '@cee: {"f1": "1", "f2": 2}'
20+
# assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }'
21+
22+
#
23+
# Things that MUST NOT work
24+
#
25+
execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note</note> ' # note the trailing space
26+
assert_output_json_eq '{ "originalmsg": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note<\/note> ", "unparsed-data": " " }'
27+
28+
execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note'
29+
assert_output_json_eq '{ "originalmsg": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note", "unparsed-data": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note" }'
30+
31+
execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note</note2>'
32+
assert_output_json_eq '{ "originalmsg": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note</note2>", "unparsed-data": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note</note2>" }'
33+
34+
35+
cleanup_tmp_files
36+

0 commit comments

Comments
 (0)