@@ -471,31 +471,43 @@ struct recording_allocator
471471 }
472472};
473473
474- // Regression guard for the array/map reserve amplification fix. An array32/map32 header carries a
475- // 32-bit element count read straight off the wire; before the fix the reader reserved that many
476- // slots unconditionally, so a 5-byte payload claiming 2^32-1 elements triggered a multi-GB
477- // allocation (e.g. ~34 GB for vector<double>) that throws bad_alloc inside the noexcept op and
478- // terminates the process. The reservation is now capped at the bytes remaining, so the reader must
479- // never request an allocation larger than the input and must return unexpected_end on the truncated
480- // body. The behavioral assertions alone are not enough: on an overcommitting OS the oversized
481- // reservation succeeds lazily and the loop still reports unexpected_end, so the recording allocator
482- // is what actually pins the bug.
474+ // Regression guard for the array/map reserve amplification fix. An array32/map32 header carries an
475+ // element count read straight off the wire; before the fix the reader reserved that many slots
476+ // unconditionally, so a tiny payload claiming far more elements than the input could possibly contain
477+ // drove an allocation sized by the wire count rather than the input. The reservation is now capped at
478+ // the bytes remaining, so the reader must never request an allocation larger than the input and must
479+ // return unexpected_end on the truncated body.
480+ //
481+ // The behavioral assertion (unexpected_end) cannot by itself tell the fix from the bug: both the
482+ // capped and the un-capped reader report unexpected_end on a truncated body. The recording allocator
483+ // is what pins the bug, by observing the size the reader actually asked for.
484+ //
485+ // The claimed count is a large-but-safely-allocatable value rather than the maximal 2^32-1. It is
486+ // still vastly larger than any of these few-byte payloads could justify (every element needs at least
487+ // one wire byte), so on un-patched code the recorded request blows past the input-size bound and the
488+ // assertion fails. But it is only a few MB, so the un-patched path fails by a clean assertion on every
489+ // platform instead of std::terminate-ing: a 2^32-1 reserve of vector<double> is a ~34 GB request that
490+ // throws bad_alloc out of the noexcept op on any config where it cannot be served lazily (strict
491+ // overcommit, cgroup-limited, 32-bit, or ASAN builds), aborting the whole test binary rather than
492+ // reporting a failure.
493+ inline constexpr size_t amplified_count = 1'000'000 ; // wire count, far beyond what any payload below backs
483494suite msgpack_reserve_amplification_tests = [] {
484- // array32 tag (0xdd) followed by length 0xFFFFFFFF, with no element data.
485- const std::string array32_bomb{char (0xdd ), char (0xff ), char (0xff ), char (0xff ), char (0xff )};
486- // map32 tag (0xdf) followed by length 0xFFFFFFFF, with no entry data.
487- const std::string map32_bomb{char (0xdf ), char (0xff ), char (0xff ), char (0xff ), char (0xff )};
495+ // array32 tag (0xdd) followed by a big-endian uint32 count of 1,000,000 (0x000F4240), no element data.
496+ const std::string array32_bomb{char (0xdd ), char (0x00 ), char (0x0f ), char (0x42 ), char (0x40 )};
497+ // map32 tag (0xdf) followed by a big-endian uint32 count of 1,000,000 (0x000F4240), no entry data.
498+ const std::string map32_bomb{char (0xdf ), char (0x00 ), char (0x0f ), char (0x42 ), char (0x40 )};
488499
489500 " msgpack array32 reserve is bounded by the input, not the wire count" _test = [&] {
490501 g_largest_alloc_count = 0 ;
491502 std::vector<double , recording_allocator<double >> v{};
492503 const auto ec = glz::read_msgpack (v, array32_bomb);
493504 expect (ec.ec == glz::error_code::unexpected_end)
494505 << " expected unexpected_end, got: " << glz::format_error (ec, array32_bomb);
495- // Without the cap this would be 0xFFFFFFFF; the cap keeps it within the buffer size.
506+ // No element data follows the header, so remaining == 0 and the cap holds the reservation to
507+ // the buffer size. Without the cap this would be amplified_count.
496508 expect (g_largest_alloc_count <= array32_bomb.size ())
497509 << " reserve requested " << g_largest_alloc_count << " elements for a " << array32_bomb.size ()
498- << " -byte payload" ;
510+ << " -byte payload (wire count was " << amplified_count << " ) " ;
499511 };
500512
501513 " msgpack array32 amplification guard is element-type agnostic" _test = [&] {
@@ -514,14 +526,38 @@ suite msgpack_reserve_amplification_tests = [] {
514526 const auto ec = glz::read_msgpack (m, map32_bomb);
515527 expect (ec.ec == glz::error_code::unexpected_end)
516528 << " expected unexpected_end, got: " << glz::format_error (ec, map32_bomb);
517- // The reservation must stay a small constant tied to the input, not the 2^32-1 wire count.
518- // Unlike the vector case, unordered_map implementations pre-allocate a baseline bucket array
519- // (e.g. the MSVC STL starts at 16), so an exact input-size bound does not hold here. A generous
520- // ceiling still cleanly separates a bounded reservation from the multi-billion-bucket bug.
529+ // The reservation must stay a small constant tied to the input, not the amplified_count wire
530+ // count. Unlike the vector case, unordered_map implementations pre-allocate a baseline bucket
531+ // array (e.g. the MSVC STL starts at 16), so an exact input-size bound does not hold here. A
532+ // generous ceiling still cleanly separates a bounded reservation from the wire-count-sized bug.
521533 constexpr size_t sane_bucket_bound = 1024 ;
522534 expect (g_largest_alloc_count < sane_bucket_bound)
523535 << " reserve requested " << g_largest_alloc_count << " buckets for a " << map32_bomb.size ()
524- << " -byte payload" ;
536+ << " -byte payload (wire count was " << amplified_count << " )" ;
537+ };
538+
539+ " msgpack array reserve clamps to remaining bytes, not the wire count" _test = [&] {
540+ // The zero-body bombs above only ever exercise reserve(0) on the patched path. This case drives
541+ // the cap's min(len, remaining) with a NON-ZERO remaining: an array32 claiming amplified_count
542+ // elements followed by 8 one-byte elements (positive fixints) and then truncated. The reader may
543+ // reserve up to the 8 trailing bytes, never the wire count, then walks the present elements and
544+ // reports unexpected_end on the missing remainder.
545+ std::string payload{char (0xdd ), char (0x00 ), char (0x0f ), char (0x42 ), char (0x40 )};
546+ for (char i = 0 ; i < 8 ; ++i) {
547+ payload.push_back (i); // positive fixint -> one wire byte each
548+ }
549+ g_largest_alloc_count = 0 ;
550+ std::vector<double , recording_allocator<double >> v{};
551+ const auto ec = glz::read_msgpack (v, payload);
552+ expect (ec.ec == glz::error_code::unexpected_end)
553+ << " expected unexpected_end, got: " << glz::format_error (ec, payload);
554+ // Bounded by a small multiple of the 8-byte body (reserve(8) plus at most one growth step),
555+ // never the amplified_count wire count. A generous ceiling separates the two unambiguously
556+ // without coupling to a specific growth policy.
557+ constexpr size_t sane_bound = 1024 ;
558+ expect (g_largest_alloc_count < sane_bound)
559+ << " reserve requested " << g_largest_alloc_count
560+ << " elements; expected a small bound tied to the 8-byte body, not " << amplified_count;
525561 };
526562
527563 " msgpack reserve cap leaves valid arrays intact" _test = [] {
0 commit comments