Add support for 'xml' keyword in liblognorm.

Jérémie Jourdin · frikilax · commit 048abc096725 · 2021-10-12T14:33:31.000+02:00
diff --git a/configure.ac b/configure.ac
@@ -65,6 +65,7 @@ AC_SUBST(LIBLOGNORM_LIBS)
 # modules we require
 PKG_CHECK_MODULES(LIBESTR, libestr >= 0.0.0)
 PKG_CHECK_MODULES(JSON_C, libfastjson,, )
+PKG_CHECK_MODULES(LIBXML2, libxml2,, )
 # add libestr flags to pkgconfig file for static linking
 AC_SUBST(pkg_config_libs_private, $LIBESTR_LIBS)
 
diff --git a/src/Makefile.am b/src/Makefile.am
@@ -43,7 +43,7 @@ liblognorm_la_SOURCES += \
 	v1_samp.c
 
 liblognorm_la_CPPFLAGS = $(JSON_C_CFLAGS) $(WARN_CFLAGS) $(LIBESTR_CFLAGS) $(PCRE_CFLAGS)
-liblognorm_la_LIBADD = $(rt_libs) $(JSON_C_LIBS) $(LIBESTR_LIBS) $(PCRE_LIBS) -lestr
+liblognorm_la_LIBADD = $(rt_libs) $(JSON_C_LIBS) $(LIBESTR_LIBS) $(PCRE_LIBS) -lestr -lxml2
 # info on version-info:
 # http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html
 # Note: v2 now starts at version 5, as v1 previously also had 4
diff --git a/src/parser.c b/src/parser.c
@@ -47,6 +47,9 @@
 #include <errno.h>
 #endif
 
+#include <libxml/xmlmemory.h>
+#include <libxml/parser.h>
+
 
 /* how should output values be formatted? */
 enum FMT_MODE {
@@ -75,6 +78,38 @@ hParseInt(const unsigned char **buf, size_t *lenBuf)
 	return i;
 }
 
+/* Credits to https://github.com/katie-snow/xml2json-c
+   This code is under GPL-3.0 License
+*/
+static inline void
+xml2jsonc_convert_elements(xmlNode *anode, json_object *jobj)
+{
+    xmlNode *cur_node = NULL;
+    json_object *cur_jobj = NULL;
+    json_object *cur_jstr = NULL;
+
+    for (cur_node = anode; cur_node; cur_node = cur_node->next)
+    {
+        if (cur_node->type == XML_ELEMENT_NODE)
+        {
+            if (xmlChildElementCount(cur_node) == 0)
+            {
+                /* JSON string object */
+                cur_jobj = json_object_new_object();
+                cur_jstr = json_object_new_string((const char *)xmlNodeGetContent(cur_node));
+                json_object_object_add(jobj, (const char *)cur_node->name, cur_jstr);
+            }
+            else
+            {
+                /* JSON object */
+                cur_jobj = json_object_new_object();
+                json_object_object_add(jobj, (const char *)cur_node->name, json_object_get(cur_jobj));
+            }
+        }
+        xml2jsonc_convert_elements(cur_node->children, cur_jobj);
+    }
+}
+
 /* parser _parse interface
  *
  * All parsers receive
@@ -2325,6 +2360,69 @@ PARSER_Parse(v2IPTables)
 	return r;
 }
 
+/**
+ * Parse XML. This parser tries to find XML data inside a message.
+ * If it finds valid XML, it will extract it.
+ *
+ * Note: The XML Parser expects a string that begins with '<' and
+ * ends with '>'. whitespace or any other character at the
+ * beginning or at the end of the string will cause a parse failure
+ *
+ * Note: Is there is extra content after the XML content
+ * the parser will fail. A hack consist of finding the
+ * last '>' in the string and ignore the rest.
+ *
+ * added 2021-02-01 by jeremie.jourdin@advens.fr
+ */
+PARSER_Parse(XML)
+        xmlDocPtr doc = NULL;
+        xmlNodePtr root_element = NULL;
+
+        /* Find the last occurence of '>' in the string */
+        char * pch;
+        pch=strrchr((const char *) npb->str + *offs, '>');
+
+        /* Truncate the string after the last occurence of '>' */
+        int newLen = pch - (npb->str + *offs) + 1;
+        char *cstr = strndup(npb->str + *offs, newLen);
+        CHKN(cstr);
+
+        doc=xmlParseDoc((xmlChar*) cstr);
+        free(cstr);
+
+        /* Invalid XML string */
+        if (doc == NULL) {
+            goto done;
+        }
+
+        /* Now convert XML document into JSON document */
+        root_element = xmlDocGetRootElement(doc);
+        json_object *json = NULL;
+        json = json_object_new_object();
+        xml2jsonc_convert_elements(root_element, json);
+
+        if(json == NULL)
+                goto done;
+
+        /* parsing OK */
+        *parsed = newLen ;
+        r = 0;
+
+        if(value == NULL) {
+                json_object_put(json);
+        } else {
+                *value = json;
+        }
+
+done:
+        if(doc != NULL)
+            xmlFreeDoc(doc);
+        xmlCleanupParser();
+        return r;
+}
+
+
+
 /**
  * Parse JSON. This parser tries to find JSON data inside a message.
  * If it finds valid JSON, it will extract it. Extra data after the
diff --git a/src/parser.h b/src/parser.h
@@ -78,6 +78,7 @@ PARSERDEF_NO_DATA(MAC48);
 PARSERDEF_NO_DATA(CEF);
 PARSERDEF(CheckpointLEA);
 PARSERDEF_NO_DATA(NameValue);
+PARSERDEF_NO_DATA(XML);
 
 #undef PARSERDEF_NO_DATA
 
diff --git a/src/pdag.c b/src/pdag.c
@@ -99,7 +99,8 @@ static struct ln_parser_info parser_lookup_table[] = {
 	PARSER_ENTRY("string-to", StringTo, 32),
 	PARSER_ENTRY("char-to", CharTo, 32),
 	PARSER_ENTRY("char-sep", CharSeparated, 32),
-	PARSER_ENTRY("string", String, 32)
+	PARSER_ENTRY("string", String, 32),
+        PARSER_ENTRY_NO_DATA("xml", XML, 4)
 };
 #define NPARSERS (sizeof(parser_lookup_table)/sizeof(struct ln_parser_info))
 #define DFLT_USR_PARSER_PRIO 30000 /**< default priority if user has not specified it */
diff --git a/src/pdag.h b/src/pdag.h
@@ -67,6 +67,7 @@ struct ln_type_pdag;
 #define PRS_STRING_TO			27
 #define PRS_CHAR_TO			28
 #define PRS_CHAR_SEP			29
+#define PRS_XML				30
 #endif
 
 #define PRS_CUSTOM_TYPE			254
diff --git a/src/v1_parser.c b/src/v1_parser.c
@@ -43,6 +43,8 @@
 #include <errno.h>
 #endif
 
+#include <libxml/xmlmemory.h>
+#include <libxml/parser.h>
 
 /* some helpers */
 static inline int
@@ -63,6 +65,38 @@ hParseInt(const unsigned char **buf, size_t *lenBuf)
 	return i;
 }
 
+/* Credits to https://github.com/katie-snow/xml2json-c
+   This code is under GPL-3.0 License
+*/
+static inline void
+xml2jsonc_convert_elements(xmlNode *anode, json_object *jobj)
+{
+    xmlNode *cur_node = NULL;
+    json_object *cur_jobj = NULL;
+    json_object *cur_jstr = NULL;
+
+    for (cur_node = anode; cur_node; cur_node = cur_node->next)
+    {
+        if (cur_node->type == XML_ELEMENT_NODE)
+        {
+            if (xmlChildElementCount(cur_node) == 0)
+            {
+                /* JSON string object */
+                cur_jobj = json_object_new_object();
+                cur_jstr = json_object_new_string((const char *)xmlNodeGetContent(cur_node));
+                json_object_object_add(jobj, (const char *)cur_node->name, cur_jstr);
+            }
+            else
+            {
+                /* JSON object */
+                cur_jobj = json_object_new_object();
+                json_object_object_add(jobj, (const char *)cur_node->name, json_object_get(cur_jobj));
+            }
+        }
+        xml2jsonc_convert_elements(cur_node->children, cur_jobj);
+    }
+}
+
 /* parsers for the primitive types
  *
  * All parsers receive
@@ -2540,6 +2574,9 @@ PARSER(v2IPTables)
 	return r;
 }
 
+
+
+
 /**
  * Parse JSON. This parser tries to find JSON data inside a message.
  * If it finds valid JSON, it will extract it. Extra data after the
@@ -2593,6 +2630,8 @@ PARSER(JSON)
 }
 
 
+
+
 /* check if a char is valid inside a name of a NameValue list
  * The set of valid characters may be extended if there is good
  * need to do so. We have selected the current set carefully, but