Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions include/glaze/cbor/read.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1395,9 +1395,9 @@ namespace glz
if constexpr (N > 0) {
static constexpr auto HashInfo = hash_info<T>;

const auto index = key_len < HashInfo.min_length || key_len > HashInfo.max_length
? N
: decode_hash_with_size<CBOR, T, HashInfo, HashInfo.type>::op(it, end, key_len);
// decode_hash_with_size pre-screens the key length against [min_length, max_length],
// so no call-site length filter is needed here (the buffer bound above still applies).
const auto index = decode_hash_with_size<CBOR, T, HashInfo, HashInfo.type>::op(it, end, key_len);

if (index < N) [[likely]] {
const sv key{reinterpret_cast<const char*>(it), static_cast<size_t>(key_len)};
Expand Down
71 changes: 43 additions & 28 deletions include/glaze/core/reflect.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2666,16 +2666,38 @@ namespace glz
};

template <uint32_t Format, class T, auto HashInfo, hash_type Type>
struct decode_hash_with_size;
struct decode_hash_with_size_impl;

// Single entry point for the in-place key-hash readers (BSON, MessagePack, CBOR, CSV, TOML, plus
// the compile-time-key callers). Every reader below dereferences key bytes only at offsets that
// are guaranteed below min_length (front_hash_bytes <= min_length, unique_index < min_length, and
// the mod4 family reads offset 0 where min_length >= 1) or reads at most n bytes. A key whose
// length is outside [min_length, max_length] therefore cannot match any reflected key and must
// not be hashed, so rejecting it here bounds every reader's key access in one place. This is why
// the individual readers carry no per-read bounds checks; the sole exception is unique_per_length,
// whose length-indexed table yields 255 for absent lengths and so keeps its own end check.
template <uint32_t Format, class T, auto HashInfo, hash_type Type>
struct decode_hash_with_size
{
static constexpr auto N = reflect<T>::size;

GLZ_ALWAYS_INLINE static constexpr size_t op(auto&& it, auto&& end, const size_t n) noexcept
{
if (n < HashInfo.min_length || n > HashInfo.max_length) [[unlikely]] {
return N;
}
return decode_hash_with_size_impl<Format, T, HashInfo, Type>::op(it, end, n);
}
};

template <uint32_t Format, class T, auto HashInfo>
struct decode_hash_with_size<Format, T, HashInfo, hash_type::single_element>
struct decode_hash_with_size_impl<Format, T, HashInfo, hash_type::single_element>
{
GLZ_ALWAYS_INLINE static constexpr size_t op(auto&&, auto&&, const size_t) noexcept { return 0; }
};

template <uint32_t Format, class T, auto HashInfo>
struct decode_hash_with_size<Format, T, HashInfo, hash_type::mod4>
struct decode_hash_with_size_impl<Format, T, HashInfo, hash_type::mod4>
{
GLZ_ALWAYS_INLINE static constexpr size_t op(auto&& it, auto&&, const size_t) noexcept
{
Expand All @@ -2684,7 +2706,7 @@ namespace glz
};

template <uint32_t Format, class T, auto HashInfo>
struct decode_hash_with_size<Format, T, HashInfo, hash_type::xor_mod4>
struct decode_hash_with_size_impl<Format, T, HashInfo, hash_type::xor_mod4>
{
static constexpr auto first_key_char = reflect<T>::keys[0][0];

Expand All @@ -2695,7 +2717,7 @@ namespace glz
};

template <uint32_t Format, class T, auto HashInfo>
struct decode_hash_with_size<Format, T, HashInfo, hash_type::minus_mod4>
struct decode_hash_with_size_impl<Format, T, HashInfo, hash_type::minus_mod4>
{
static constexpr auto first_key_char = reflect<T>::keys[0][0];

Expand All @@ -2706,68 +2728,57 @@ namespace glz
};

template <uint32_t Format, class T, auto HashInfo>
struct decode_hash_with_size<Format, T, HashInfo, hash_type::unique_index>
struct decode_hash_with_size_impl<Format, T, HashInfo, hash_type::unique_index>
{
static constexpr auto N = reflect<T>::size;
static constexpr auto bsize = bucket_size(hash_type::unique_index, N);
static constexpr auto uindex = HashInfo.unique_index;

GLZ_ALWAYS_INLINE static constexpr size_t op(auto&& it, auto&& end, const size_t n) noexcept
GLZ_ALWAYS_INLINE static constexpr size_t op(auto&& it, auto&&, const size_t n) noexcept
{
// unique_index < min_length <= n, so it[unique_index] is within the key (the wrapper has
// already rejected lengths outside [min_length, max_length]).
if constexpr (HashInfo.sized_hash) {
if (n == 0 || n > HashInfo.max_length) {
return N; // error
}

const auto h = bitmix(uint16_t(it[HashInfo.unique_index]) | (uint16_t(n) << 8), HashInfo.seed);
return HashInfo.table[h % bsize];
}
else {
if constexpr (N == 2) {
if ((it + uindex) >= end) [[unlikely]] {
return N; // error
}
// Avoids using a hash table
constexpr auto first_key_char = reflect<T>::keys[0][uindex];
return size_t(bool(it[uindex] ^ first_key_char));
}
else {
if ((it + uindex) >= end) [[unlikely]] {
return N; // error
}
return HashInfo.table[uint8_t(it[uindex])];
}
}
}
};

template <uint32_t Format, class T, auto HashInfo>
struct decode_hash_with_size<Format, T, HashInfo, hash_type::three_element_unique_index>
struct decode_hash_with_size_impl<Format, T, HashInfo, hash_type::three_element_unique_index>
{
static constexpr auto N = reflect<T>::size;
static constexpr auto uindex = HashInfo.unique_index;

GLZ_ALWAYS_INLINE static constexpr size_t op(auto&& it, auto&& end, const size_t) noexcept
GLZ_ALWAYS_INLINE static constexpr size_t op(auto&& it, auto&&, const size_t) noexcept
{
if constexpr (uindex > 0) {
if ((it + uindex) >= end) [[unlikely]] {
return N; // error
}
}
// uindex < min_length <= n (the wrapper bounded n), so it[uindex] is within the key.
// Avoids using a hash table
constexpr auto first_key_char = reflect<T>::keys[0][uindex];
return (uint8_t(it[uindex] ^ first_key_char) * HashInfo.seed) % 4;
}
};

template <uint32_t Format, class T, auto HashInfo>
struct decode_hash_with_size<Format, T, HashInfo, hash_type::front_hash>
struct decode_hash_with_size_impl<Format, T, HashInfo, hash_type::front_hash>
{
static constexpr auto N = reflect<T>::size;
static constexpr auto bsize = bucket_size(hash_type::front_hash, N);

GLZ_ALWAYS_INLINE static constexpr size_t op(auto&& it, auto&&, const size_t) noexcept
{
// front_hash_bytes <= min_length <= n, so reading the prefix stays within the key (the
// wrapper rejects keys shorter than min_length before dispatching here).
if constexpr (HashInfo.front_hash_bytes == 2) {
uint16_t h;
if consteval {
Expand Down Expand Up @@ -2823,13 +2834,17 @@ namespace glz
};

template <uint32_t Format, class T, auto HashInfo>
struct decode_hash_with_size<Format, T, HashInfo, hash_type::unique_per_length>
struct decode_hash_with_size_impl<Format, T, HashInfo, hash_type::unique_per_length>
{
static constexpr auto N = reflect<T>::size;
static constexpr auto bsize = bucket_size(hash_type::unique_per_length, N);

GLZ_ALWAYS_INLINE static constexpr size_t op(auto&& it, auto&& end, const size_t n) noexcept
{
// Unlike the other readers, the read offset here is indexed by key length and absent
// lengths map to 255 (see unique_per_length_info), so a foreign key whose length falls in
// a gap of [min_length, max_length] would read it[255]. The wrapper's length pre-screen
// does not catch that, so this reader keeps its own end check.
const auto pos = per_length_info<T>.unique_index[uint8_t(n)];
if ((it + pos) >= end) [[unlikely]] {
return N; // error
Expand All @@ -2840,7 +2855,7 @@ namespace glz
};

template <uint32_t Format, class T, auto HashInfo>
struct decode_hash_with_size<Format, T, HashInfo, hash_type::full_flat>
struct decode_hash_with_size_impl<Format, T, HashInfo, hash_type::full_flat>
{
static constexpr auto N = reflect<T>::size;
static constexpr auto bsize = bucket_size(hash_type::full_flat, N);
Expand Down
10 changes: 4 additions & 6 deletions include/glaze/stencil/stencilcount.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,10 @@ namespace glz
static constexpr auto N = reflect<T>::size;
static constexpr auto HashInfo = hash_info<T>;

const auto index = [&] {
if (key.size() < HashInfo.min_length || key.size() > HashInfo.max_length) {
return N;
}
return decode_hash_with_size<STENCIL, T, HashInfo, HashInfo.type>::op(start, end, key.size());
}();
// decode_hash_with_size pre-screens the key length against [min_length, max_length],
// so no call-site length filter is needed here.
const auto index =
decode_hash_with_size<STENCIL, T, HashInfo, HashInfo.type>::op(start, end, key.size());

if (index < N) [[likely]] {
std::string temp{};
Expand Down
149 changes: 149 additions & 0 deletions tests/bson_test/bson_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
#include <array>
#include <chrono>
#include <cstdint>
#include <cstring>
#include <filesystem>
#include <limits>
#include <map>
#include <memory>
#include <optional>
#include <string>
#include <string_view>
Expand Down Expand Up @@ -1631,4 +1633,151 @@ suite bson_skip_marker_suite = [] {
};
};

// decode_hash_with_size::op is always called as op(key.data(), key.data() + key.size(),
// key.size()) by the in-place decoders (see bson/read.hpp and msgpack/read.hpp). Its length
// pre-screen rejects any key whose length is outside [min_length, max_length] before dispatching to
// a reader, which is the single bound that keeps these readers from dereferencing key bytes past a
// short key:
// - front_hash reads the first front_hash_bytes of the key
// - the sized unique_index path reads it[unique_index]
// - mod4 / xor_mod4 / minus_mod4 dereference the first key byte (*it)
// - three_element_unique_index with unique_index == 0 reads it[0]
// The static_asserts pin the selected hash so each case keeps exercising a distinct reader if the
// hashing-selection logic ever changes.
namespace bson_hash_bounds_tests
{
// front_hash_bytes == 4: distinguished by the first four bytes.
struct front_hash_keys
{
int aaaa{};
int aaab{};
int aaba{};
int bbbb{};
int aabb{};
};
static_assert(glz::hash_info<front_hash_keys>.type == glz::hash_type::front_hash);
static_assert(glz::hash_info<front_hash_keys>.front_hash_bytes == 4);

struct sized_unique_keys
{
int xaa{};
int xab{};
int xabb{};
};
static_assert(glz::hash_info<sized_unique_keys>.type == glz::hash_type::unique_index);
static_assert(glz::hash_info<sized_unique_keys>.sized_hash);
static_assert(glz::hash_info<sized_unique_keys>.unique_index == 2);

struct mod4_keys
{
int x{};
int yy{};
int zzz{};
};
static_assert(glz::hash_info<mod4_keys>.type == glz::hash_type::mod4);

struct three_element_keys
{
int dzz{};
int gzz{};
int fzz{};
};
static_assert(glz::hash_info<three_element_keys>.type == glz::hash_type::three_element_unique_index);
static_assert(glz::hash_info<three_element_keys>.unique_index == 0);

// Reproduce the exact call the in-place decoders make for a foreign key of length n: op(it, it +
// n, n). [key, key + n) is an exact-size heap allocation, so a read past the key lands in ASAN's
// redzone when the bound is missing.
template <class T>
size_t hash_index_of_short_key(const char* key, size_t n)
{
constexpr auto& HashInfo = glz::hash_info<T>;
return glz::decode_hash_with_size<glz::BSON, T, HashInfo, HashInfo.type>::op(key, key + n, n);
}
}

// A foreign key shorter than the bytes its hash reads must be rejected (returning the not-found
// index reflect<T>::size) without reading past the key, and a valid document must still decode.
suite bson_hash_bounds_suite = [] {
using namespace bson_hash_bounds_tests;

"front_hash short key stays in bounds"_test = [] {
// front_hash reads 4 bytes; a one-byte key would run three bytes past it.
auto buffer = std::make_unique<char[]>(1);
buffer[0] = 'a';
const auto index = hash_index_of_short_key<front_hash_keys>(buffer.get(), 1);
expect(index == glz::reflect<front_hash_keys>::size); // no field matches a one-byte key
};

"sized unique_index short key stays in bounds"_test = [] {
// unique_index is 2, so a one-byte key has no it[2]; the read would run two bytes past it.
auto buffer = std::make_unique<char[]>(1);
buffer[0] = 'x';
const auto index = hash_index_of_short_key<sized_unique_keys>(buffer.get(), 1);
expect(index == glz::reflect<sized_unique_keys>::size);
};

"mod4 empty key stays in bounds"_test = [] {
// mod4 dereferences the first key byte; an empty key has none.
auto buffer = std::make_unique<char[]>(1);
const auto index = hash_index_of_short_key<mod4_keys>(buffer.get() + 1, 0);
expect(index == glz::reflect<mod4_keys>::size);
};

"three_element_unique_index empty key stays in bounds"_test = [] {
// unique_index is 0, so the lookup reads it[0]; an empty key has none.
auto buffer = std::make_unique<char[]>(1);
const auto index = hash_index_of_short_key<three_element_keys>(buffer.get() + 1, 0);
expect(index == glz::reflect<three_element_keys>::size);
};

"front_hash valid keys round-trip"_test = [] {
const front_hash_keys original{1, 2, 3, 4, 5};
const auto encoded = glz::write_bson(original);
expect(encoded.has_value());

front_hash_keys decoded{};
const auto ec = glz::read_bson(decoded, *encoded);
expect(!ec);
expect(decoded.aaaa == 1);
expect(decoded.aabb == 5);
};

"sized unique_index valid keys round-trip"_test = [] {
const sized_unique_keys original{4, 5, 6};
const auto encoded = glz::write_bson(original);
expect(encoded.has_value());

sized_unique_keys decoded{};
const auto ec = glz::read_bson(decoded, *encoded);
expect(!ec);
expect(decoded.xaa == 4);
expect(decoded.xabb == 6);
};

"mod4 valid keys round-trip"_test = [] {
const mod4_keys original{1, 2, 3};
const auto encoded = glz::write_bson(original);
expect(encoded.has_value());

mod4_keys decoded{};
const auto ec = glz::read_bson(decoded, *encoded);
expect(!ec);
expect(decoded.x == 1);
expect(decoded.zzz == 3);
};

"three_element_unique_index valid keys round-trip"_test = [] {
const three_element_keys original{7, 8, 9};
const auto encoded = glz::write_bson(original);
expect(encoded.has_value());

three_element_keys decoded{};
const auto ec = glz::read_bson(decoded, *encoded);
expect(!ec);
expect(decoded.dzz == 7);
expect(decoded.fzz == 9);
};
};

int main() { return 0; }
Loading