diff --git a/src/libbson/doc/bson_validate_flags_t.rst b/src/libbson/doc/bson_validate_flags_t.rst index 85140f0310..549516d4e0 100644 --- a/src/libbson/doc/bson_validate_flags_t.rst +++ b/src/libbson/doc/bson_validate_flags_t.rst @@ -19,27 +19,39 @@ Synopsis BSON_VALIDATE_DOT_KEYS = (1 << 2), BSON_VALIDATE_UTF8_ALLOW_NULL = (1 << 3), BSON_VALIDATE_EMPTY_KEYS = (1 << 4), + BSON_VALIDATE_CORRUPT = (1 << 30), } bson_validate_flags_t; Description ----------- -``bson_validate_flags_t`` is a set of binary flags which may be combined to specify a level of BSON document validation. +``bson_validate_flags_t`` is a set of binary flags which may be combined to +specify a level of BSON document validation. -A value of ``0``, ``false``, or ``BSON_VALIDATE_NONE`` equivalently requests the minimum applicable level of validation. +A value of ``0``, ``false``, or ``BSON_VALIDATE_NONE`` equivalently requests the +minimum applicable level of validation. -In the context of validation APIs :symbol:`bson_validate()`, :symbol:`bson_validate_with_error()`, and :symbol:`bson_validate_with_error_and_offset()` the minimum validation still guarantees that a document can be successfully traversed by :symbol:`bson_iter_visit_all()`. +In the context of validation APIs :symbol:`bson_validate()`, +:symbol:`bson_validate_with_error()`, and +:symbol:`bson_validate_with_error_and_offset()` the minimum validation still +guarantees that a document can be successfully traversed by +:symbol:`bson_iter_visit_all()`. -Higher level APIs using this type may have different minimum validation levels. For example, ``libmongoc`` functions that take ``bson_validate_flags_t`` use ``0`` to mean the document contents are not visited and malformed headers will not be detected by the client. +Higher level APIs using this type may have different minimum validation levels. +For example, ``libmongoc`` functions that take ``bson_validate_flags_t`` use +``0`` to mean the document contents are not visited and malformed headers will +not be detected by the client. -Each defined flag aside from ``BSON_VALIDATE_NONE`` describes an optional validation feature that may be enabled, alone or in combination with other features: - -* ``BSON_VALIDATE_NONE`` Minimum level of validation; in ``libbson``, validates element headers. -* ``BSON_VALIDATE_UTF8`` All keys and string values are checked for invalid UTF-8. -* ``BSON_VALIDATE_UTF8_ALLOW_NULL`` String values are allowed to have embedded NULL bytes. +* ``BSON_VALIDATE_NONE`` Minimum level of validation; in ``libbson``, validates + element headers and UTF-8 strings. +* ``BSON_VALIDATE_UTF8`` Deprecated. (All text is unconditionally checked for UTF-8 validity.) +* ``BSON_VALIDATE_UTF8_ALLOW_NULL`` UTF-8 string values are allowed to have NULL characters. * ``BSON_VALIDATE_DOLLAR_KEYS`` Prohibit keys that start with ``$`` outside of a "DBRef" subdocument. * ``BSON_VALIDATE_DOT_KEYS`` Prohibit keys that contain ``.`` anywhere in the string. * ``BSON_VALIDATE_EMPTY_KEYS`` Prohibit zero-length keys. +* ``BSON_VALIDATE_CORRUPT`` - This is not a validation flag, but will appear as + an error code if validation fails for some other reason not listed above. + Checks for BSON corruption cannot be disabled. .. seealso:: diff --git a/src/libbson/src/bson/bson-iter.c b/src/libbson/src/bson/bson-iter.c index 7c51a4945c..22bf4715ec 100644 --- a/src/libbson/src/bson/bson-iter.c +++ b/src/libbson/src/bson/bson-iter.c @@ -2025,6 +2025,7 @@ bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ while (_bson_iter_next_internal (iter, 0, &key, &bson_type, &unsupported)) { if (*key && !bson_utf8_validate (key, strlen (key), false)) { iter->err_off = iter->off; + VISIT_CORRUPT (iter, data); break; } @@ -2048,7 +2049,8 @@ bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ if (!bson_utf8_validate (utf8, utf8_len, true)) { iter->err_off = iter->off; - return true; + VISIT_CORRUPT (iter, data); + break; } if (VISIT_UTF8 (iter, key, utf8_len, utf8, data)) { @@ -2064,6 +2066,7 @@ bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ if (!bson_init_static (&b, docbuf, doclen)) { iter->err_off = iter->off; + VISIT_CORRUPT (iter, data); break; } if (VISIT_DOCUMENT (iter, key, &b, data)) { @@ -2079,6 +2082,7 @@ bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ if (!bson_init_static (&b, docbuf, doclen)) { iter->err_off = iter->off; + VISIT_CORRUPT (iter, data); break; } if (VISIT_ARRAY (iter, key, &b, data)) { @@ -2136,8 +2140,10 @@ bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ const char *options = NULL; regex = bson_iter_regex (iter, &options); - if (!bson_utf8_validate (regex, strlen (regex), true)) { + if (!bson_utf8_validate (regex, strlen (regex), false) || + !bson_utf8_validate (options, strlen (options), false)) { iter->err_off = iter->off; + VISIT_CORRUPT (iter, data); return true; } @@ -2154,6 +2160,7 @@ bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ if (!bson_utf8_validate (collection, collection_len, true)) { iter->err_off = iter->off; + VISIT_CORRUPT (iter, data); return true; } @@ -2169,7 +2176,8 @@ bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ if (!bson_utf8_validate (code, code_len, true)) { iter->err_off = iter->off; - return true; + VISIT_CORRUPT (iter, data); + break; } if (VISIT_CODE (iter, key, code_len, code, data)) { @@ -2184,7 +2192,8 @@ bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ if (!bson_utf8_validate (symbol, symbol_len, true)) { iter->err_off = iter->off; - return true; + VISIT_CORRUPT (iter, data); + break; } if (VISIT_SYMBOL (iter, key, symbol_len, symbol, data)) { @@ -2202,11 +2211,13 @@ bson_iter_visit_all (bson_iter_t *iter, /* INOUT */ if (!bson_utf8_validate (code, length, true)) { iter->err_off = iter->off; - return true; + VISIT_CORRUPT (iter, data); + break; } if (!bson_init_static (&b, docbuf, doclen)) { iter->err_off = iter->off; + VISIT_CORRUPT (iter, data); break; } if (VISIT_CODEWSCOPE (iter, key, length, code, &b, data)) { diff --git a/src/libbson/src/bson/bson-types.h b/src/libbson/src/bson/bson-types.h index 915b674e38..6f466bb4f9 100644 --- a/src/libbson/src/bson/bson-types.h +++ b/src/libbson/src/bson/bson-types.h @@ -185,25 +185,54 @@ typedef struct { /** - * bson_validate_flags_t: + * @brief Flags an error code for BSON validation functions. * - * This enumeration is used for validation of BSON documents. It allows - * selective control on what you wish to validate. + * Pass these flags bits to control the behavior of the `bson_validate` family + * of functions. * - * %BSON_VALIDATE_NONE: No additional validation occurs. - * %BSON_VALIDATE_UTF8: Check that strings are valid UTF-8. - * %BSON_VALIDATE_DOLLAR_KEYS: Check that keys do not start with $. - * %BSON_VALIDATE_DOT_KEYS: Check that keys do not contain a period. - * %BSON_VALIDATE_UTF8_ALLOW_NULL: Allow NUL bytes in UTF-8 text. - * %BSON_VALIDATE_EMPTY_KEYS: Prohibit zero-length field names + * Additionally, if validation fails, then the error code set on a `bson_error_t` + * will have the value corresponding to the reason that validation failed. */ typedef enum { + /** + * @brief No special validation behavior specified. + */ BSON_VALIDATE_NONE = 0, + /** + * @brief This flag has no effect + * + * @note Because invalid UTF-8 text is always invalid in BSON, the `bson_validate` + * function will always reject invalid UTF-8 data as corrupt BSON. + */ BSON_VALIDATE_UTF8 = (1 << 0), + /** + * @brief Check that element keys do not begin with an ASCII dollar `$` + */ BSON_VALIDATE_DOLLAR_KEYS = (1 << 1), + /** + * @brief Check that element keys do not contain an ASCII period `.` + */ BSON_VALIDATE_DOT_KEYS = (1 << 2), + /** + * @brief If set then it is *not* an error for a UTF-8 string to contain + * embedded null characters. + * + * By default, `bson_validate` APIs will reject BSON UTF-8 elements that + * contain embedded null characters. + */ BSON_VALIDATE_UTF8_ALLOW_NULL = (1 << 3), + /** + * @brief Check that no element key is a zero-length empty string. + */ BSON_VALIDATE_EMPTY_KEYS = (1 << 4), + /** + * @brief This is not a flag that controls behavior, but is instead used to indicate + * that a BSON document is corrupted in some way. This is the value that will + * appear as an error code. + * + * Passing this as a flag has no effect. + */ + BSON_VALIDATE_CORRUPT = (1 << 30), } bson_validate_flags_t; diff --git a/src/libbson/src/bson/bson.c b/src/libbson/src/bson/bson.c index 006fe2089f..d3e255f4ca 100644 --- a/src/libbson/src/bson/bson.c +++ b/src/libbson/src/bson/bson.c @@ -27,6 +27,7 @@ #include #include +#include #include @@ -2505,19 +2506,19 @@ bson_array_as_canonical_extended_json (const bson_t *bson, size_t *length) #define VALIDATION_ERR(_flag, _msg, ...) bson_set_error (&state->error, BSON_ERROR_INVALID, _flag, _msg, __VA_ARGS__) static bool -_bson_iter_validate_utf8 (const bson_iter_t *iter, const char *key, size_t v_utf8_len, const char *v_utf8, void *data) +_bson_iter_validate_utf8 (const bson_iter_t *iter, const char *key, size_t u8len, const char *u8data, void *data) { - bson_validate_state_t *state = data; - bool allow_null; + bson_validate_state_t *const state = data; - if ((state->flags & BSON_VALIDATE_UTF8)) { - allow_null = !!(state->flags & BSON_VALIDATE_UTF8_ALLOW_NULL); + const bool allow_null = !!(state->flags & BSON_VALIDATE_UTF8_ALLOW_NULL); - if (!bson_utf8_validate (v_utf8, v_utf8_len, allow_null)) { - state->err_offset = iter->off; - VALIDATION_ERR (BSON_VALIDATE_UTF8, "invalid utf8 string for key \"%s\"", key); - return true; - } + // Assert: The visitor API already checks that all text is valid UTF-8 + assert (bson_utf8_validate (u8data, u8len, true)); + + if (!allow_null && !bson_utf8_validate (u8data, u8len, false /* disallow null */)) { + state->err_offset = iter->off; + VALIDATION_ERR (BSON_VALIDATE_UTF8_ALLOW_NULL, "UTF-8 string for \"%s\" contains null characters", key); + return true; } if ((state->flags & BSON_VALIDATE_DOLLAR_KEYS)) { @@ -2538,7 +2539,7 @@ _bson_iter_validate_corrupt (const bson_iter_t *iter, void *data) bson_validate_state_t *state = data; state->err_offset = iter->err_off; - VALIDATION_ERR (BSON_VALIDATE_NONE, "%s", "corrupt BSON"); + VALIDATION_ERR (BSON_VALIDATE_CORRUPT, "%s", "corrupt BSON"); } @@ -2601,13 +2602,31 @@ _bson_iter_validate_codewscope ( BSON_UNUSED (v_code_len); BSON_UNUSED (v_code); + // Validate the code string + if (_bson_iter_validate_utf8 (iter, key, v_code_len, v_code, data)) { + return true; + } + if (!bson_validate (v_scope, state->flags, &offset)) { state->err_offset = iter->off + offset; - VALIDATION_ERR (BSON_VALIDATE_NONE, "%s", "corrupt code-with-scope"); - return false; + VALIDATION_ERR (BSON_VALIDATE_CORRUPT, "%s", "corrupt code-with-scope"); + return true; } - return true; + return false; +} + +static bool +_bson_iter_validate_dbpointer ( + const bson_iter_t *iter, const char *key, size_t coll_len, const char *coll, const bson_oid_t *oid, void *data) +{ + BSON_UNUSED (key); + BSON_UNUSED (oid); + // Validate the collection name string + if (_bson_iter_validate_utf8 (iter, key, coll_len, coll, data)) { + return true; + } + return false; } @@ -2622,17 +2641,17 @@ static const bson_visitor_t bson_validate_funcs = { NULL, /* visit_double */ _bson_iter_validate_utf8, _bson_iter_validate_document, - _bson_iter_validate_document, /* visit_array */ - NULL, /* visit_binary */ - NULL, /* visit_undefined */ - NULL, /* visit_oid */ - NULL, /* visit_bool */ - NULL, /* visit_date_time */ - NULL, /* visit_null */ - NULL, /* visit_regex */ - NULL, /* visit_dbpoint */ - NULL, /* visit_code */ - NULL, /* visit_symbol */ + _bson_iter_validate_document, /* visit_array */ + NULL, /* visit_binary */ + NULL, /* visit_undefined */ + NULL, /* visit_oid */ + NULL, /* visit_bool */ + NULL, /* visit_date_time */ + NULL, /* visit_null */ + NULL, /* visit_regex */ + _bson_iter_validate_dbpointer, /* visit_dbpointer */ + _bson_iter_validate_utf8, /* visit_code */ + _bson_iter_validate_utf8, /* visit_symbol */ _bson_iter_validate_codewscope, }; diff --git a/src/libbson/tests/test-bson.c b/src/libbson/tests/test-bson.c index 22e835170a..91780dfaec 100644 --- a/src/libbson/tests/test-bson.c +++ b/src/libbson/tests/test-bson.c @@ -19,12 +19,15 @@ #include #include #include +#include #include #include #include "TestSuite.h" #include "test-conveniences.h" + +#include #include /* CDRIVER-2460 ensure the unused old BSON_ASSERT_STATIC macro still compiles */ @@ -1164,8 +1167,8 @@ test_bson_validate (void) ASSERT_ERROR_CONTAINS (error, BSON_ERROR_INVALID, _flag, _msg); \ bson_destroy (b) - VALIDATE_TEST ("overflow2.bson", BSON_VALIDATE_NONE, 9, BSON_VALIDATE_NONE, "corrupt BSON"); - VALIDATE_TEST ("trailingnull.bson", BSON_VALIDATE_NONE, 14, BSON_VALIDATE_NONE, "corrupt BSON"); + VALIDATE_TEST ("overflow2.bson", BSON_VALIDATE_NONE, 9, BSON_VALIDATE_CORRUPT, "corrupt BSON"); + VALIDATE_TEST ("trailingnull.bson", BSON_VALIDATE_NONE, 14, BSON_VALIDATE_CORRUPT, "corrupt BSON"); VALIDATE_TEST ("dollarquery.bson", BSON_VALIDATE_DOLLAR_KEYS | BSON_VALIDATE_DOT_KEYS, 4, @@ -1176,28 +1179,28 @@ test_bson_validate (void) 4, BSON_VALIDATE_DOT_KEYS, "keys cannot contain \".\": \"abc.def\""); - VALIDATE_TEST ("overflow3.bson", BSON_VALIDATE_NONE, 9, BSON_VALIDATE_NONE, "corrupt BSON"); + VALIDATE_TEST ("overflow3.bson", BSON_VALIDATE_NONE, 9, BSON_VALIDATE_CORRUPT, "corrupt BSON"); /* same outcome as above, despite different flags */ - VALIDATE_TEST ("overflow3.bson", BSON_VALIDATE_UTF8, 9, BSON_VALIDATE_NONE, "corrupt BSON"); - VALIDATE_TEST ("overflow4.bson", BSON_VALIDATE_NONE, 9, BSON_VALIDATE_NONE, "corrupt BSON"); + VALIDATE_TEST ("overflow3.bson", BSON_VALIDATE_UTF8, 9, BSON_VALIDATE_CORRUPT, "corrupt BSON"); + VALIDATE_TEST ("overflow4.bson", BSON_VALIDATE_NONE, 9, BSON_VALIDATE_CORRUPT, "corrupt BSON"); VALIDATE_TEST ("empty_key.bson", BSON_VALIDATE_EMPTY_KEYS, 4, BSON_VALIDATE_EMPTY_KEYS, "empty key"); - VALIDATE_TEST ("test40.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_NONE, "corrupt BSON"); - VALIDATE_TEST ("test41.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_NONE, "corrupt BSON"); - VALIDATE_TEST ("test42.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_NONE, "corrupt BSON"); - VALIDATE_TEST ("test43.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_NONE, "corrupt BSON"); - VALIDATE_TEST ("test44.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_NONE, "corrupt BSON"); - VALIDATE_TEST ("test45.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_NONE, "corrupt BSON"); - VALIDATE_TEST ("test46.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_NONE, "corrupt BSON"); - VALIDATE_TEST ("test47.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_NONE, "corrupt BSON"); - VALIDATE_TEST ("test48.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_NONE, "corrupt BSON"); - VALIDATE_TEST ("test49.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_NONE, "corrupt BSON"); - VALIDATE_TEST ("test50.bson", BSON_VALIDATE_NONE, 10, BSON_VALIDATE_NONE, "corrupt code-with-scope"); - VALIDATE_TEST ("test51.bson", BSON_VALIDATE_NONE, 10, BSON_VALIDATE_NONE, "corrupt code-with-scope"); - VALIDATE_TEST ("test52.bson", BSON_VALIDATE_NONE, 9, BSON_VALIDATE_NONE, "corrupt BSON"); - VALIDATE_TEST ("test53.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_NONE, "corrupt BSON"); - VALIDATE_TEST ("test54.bson", BSON_VALIDATE_NONE, 12, BSON_VALIDATE_NONE, "corrupt BSON"); - VALIDATE_TEST ("test59.bson", BSON_VALIDATE_NONE, 9, BSON_VALIDATE_NONE, "corrupt BSON"); - VALIDATE_TEST ("test60.bson", BSON_VALIDATE_NONE, 4, BSON_VALIDATE_NONE, "corrupt BSON"); + VALIDATE_TEST ("test40.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_CORRUPT, "corrupt BSON"); + VALIDATE_TEST ("test41.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_CORRUPT, "corrupt BSON"); + VALIDATE_TEST ("test42.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_CORRUPT, "corrupt BSON"); + VALIDATE_TEST ("test43.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_CORRUPT, "corrupt BSON"); + VALIDATE_TEST ("test44.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_CORRUPT, "corrupt BSON"); + VALIDATE_TEST ("test45.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_CORRUPT, "corrupt BSON"); + VALIDATE_TEST ("test46.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_CORRUPT, "corrupt BSON"); + VALIDATE_TEST ("test47.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_CORRUPT, "corrupt BSON"); + VALIDATE_TEST ("test48.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_CORRUPT, "corrupt BSON"); + VALIDATE_TEST ("test49.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_CORRUPT, "corrupt BSON"); + VALIDATE_TEST ("test50.bson", BSON_VALIDATE_NONE, 10, BSON_VALIDATE_CORRUPT, "corrupt code-with-scope"); + VALIDATE_TEST ("test51.bson", BSON_VALIDATE_NONE, 10, BSON_VALIDATE_CORRUPT, "corrupt code-with-scope"); + VALIDATE_TEST ("test52.bson", BSON_VALIDATE_NONE, 9, BSON_VALIDATE_CORRUPT, "corrupt BSON"); + VALIDATE_TEST ("test53.bson", BSON_VALIDATE_NONE, 6, BSON_VALIDATE_CORRUPT, "corrupt BSON"); + VALIDATE_TEST ("test54.bson", BSON_VALIDATE_NONE, 12, BSON_VALIDATE_CORRUPT, "corrupt BSON"); + VALIDATE_TEST ("test59.bson", BSON_VALIDATE_NONE, 9, BSON_VALIDATE_CORRUPT, "corrupt BSON"); + VALIDATE_TEST ("test60.bson", BSON_VALIDATE_NONE, 4, BSON_VALIDATE_CORRUPT, "corrupt BSON"); /* DBRef validation */ b = BCON_NEW ("my_dbref", "{", "$ref", BCON_UTF8 ("collection"), "$id", BCON_INT32 (1), "}"); @@ -1230,6 +1233,113 @@ test_bson_validate (void) error, BSON_ERROR_INVALID, BSON_VALIDATE_DOLLAR_KEYS, "invalid key within DBRef subdocument: \"extra\""); bson_destroy (b); + { + // Invalid UTF-8 element key + error = (bson_error_t){0}; + bsonBuildDecl (tmp, kv ("foo \xff bar", int32 (42))); + mlib_check (!bson_validate_with_error_and_offset (&tmp, 0, &offset, &error)); + mlib_check (offset, eq, 4); + ASSERT_ERROR_CONTAINS (error, BSON_ERROR_INVALID, BSON_VALIDATE_CORRUPT, "corrupt BSON"); + bson_destroy (&tmp); + } + + { + // Invalid UTF-8 text element + bsonBuildDecl (tmp, kv ("foo", cstr ("bar \xff baz"))); + // Do not check for valid UTF-8 + mlib_check (!bson_validate_with_error_and_offset (&tmp, 0, &offset, &error)); + mlib_check (offset, eq, 4); + ASSERT_ERROR_CONTAINS (error, BSON_ERROR_INVALID, BSON_VALIDATE_CORRUPT, "corrupt BSON"); + bson_destroy (&tmp); + } + + { + // Valid UTF-8 with an embedded nul + bsonBuildDecl (tmp, kv ("foo", utf8_w_len ("bar \x00 baz", 9))); + // By default, nul chars are rejected: + mlib_check (!bson_validate_with_error_and_offset (&tmp, 0, &offset, &error)); + mlib_check (offset, eq, 4); + ASSERT_ERROR_CONTAINS ( + error, BSON_ERROR_INVALID, BSON_VALIDATE_UTF8_ALLOW_NULL, "\"foo\" contains null characters"); + + // allow-null: + mlib_check (bson_validate_with_error_and_offset (&tmp, BSON_VALIDATE_UTF8_ALLOW_NULL, NULL, NULL)); + bson_destroy (&tmp); + } + + { + // Invalid UTF-8 in regex + bson_t tmp = BSON_INITIALIZER; + bson_append_regex (&tmp, "foo", -1, "abc \xff 123", "gi"); + mlib_check (!bson_validate_with_error_and_offset (&tmp, 0, NULL, &error)); + ASSERT_ERROR_CONTAINS (error, BSON_ERROR_INVALID, BSON_VALIDATE_CORRUPT, "corrupt BSON"); + bson_destroy (&tmp); + } + + { + // Invalid regex options + // clang-format off + const uint8_t bytes[] = { + // header + 18, 0, 0, 0, + 11, // regex tag + 'f', 'o', 'o', 0, // key + 'a', 'b', 'c', 0, // regex + 'g', 'i', 0xff, 0, // Options with illegal byte + 0, // null + }; + // clang-format on + bson_t tmp; + mlib_check (bson_init_static (&tmp, bytes, sizeof bytes)); + mlib_check (!bson_validate_with_error (&tmp, 0, &error)); + ASSERT_ERROR_CONTAINS (error, BSON_ERROR_INVALID, BSON_VALIDATE_CORRUPT, "corrupt BSON"); + bson_destroy (&tmp); + } + + { + // Null byte in code + // clang-format off + const uint8_t bytes[] = { + // header + 21, 0, 0, 0, + 13, // JS code tag + 'j', 's', 0, // key + 8, 0, 0, 0, // strlen + 'f', 'o', 'o', 0, 'b', 'a', 'r', 0, // string with embedded nul + 0, // null + }; + // clang-format on + bson_t tmp; + mlib_check (bson_init_static (&tmp, bytes, sizeof bytes)); + mlib_check (!bson_validate_with_error (&tmp, 0, &error)); + ASSERT_ERROR_CONTAINS ( + error, BSON_ERROR_INVALID, BSON_VALIDATE_UTF8_ALLOW_NULL, "\"js\" contains null characters"); + mlib_check (bson_validate (&tmp, BSON_VALIDATE_UTF8_ALLOW_NULL, NULL)); + bson_destroy (&tmp); + } + + { + // Null byte in symbol + // clang-format off + const uint8_t bytes[] = { + // header + 22, 0, 0, 0, + 14, // Symbol tag + 's', 'y', 'm', 0, // key + 8, 0, 0, 0, // strlen + 'f', 'o', 'o', 0, 'b', 'a', 'r', 0, // string with embedded nul + 0, // null + }; + // clang-format on + bson_t tmp; + mlib_check (bson_init_static (&tmp, bytes, sizeof bytes)); + mlib_check (!bson_validate_with_error (&tmp, 0, &error)); + ASSERT_ERROR_CONTAINS ( + error, BSON_ERROR_INVALID, BSON_VALIDATE_UTF8_ALLOW_NULL, "\"sym\" contains null characters"); + mlib_check (bson_validate (&tmp, BSON_VALIDATE_UTF8_ALLOW_NULL, NULL)); + bson_destroy (&tmp); + } + #undef VALIDATE_TEST }