Skip to content

Commit c7e69fa

Browse files
committed
Read eetf tail/key tags directly to avoid ei_get_type over-read
The list-tail and map-key guards added in this branch only prove one byte (the tag) is present, but get_type -> ei_get_type then reads a 2-4 byte length header off the raw pointer for header-bearing tags, over-reading past end when the tag is the final byte (verified with a guard page: SIGSEGV on a truncated improper tail or map key). Read the tag with a single-byte peek instead: a proper list tail only accepts ERL_NIL_EXT, and is_string/is_atom classify the raw map-key tag while term_to_json_value re-reads and bounds-checks the full key. Also fix decode_number casting the scratch value through the forwarding-reference type T instead of the decayed value type V; static_cast<T> forms a reference cast that fails to compile where int64_t is long long while long is the same width (macOS/LLP64). Matches the existing float branch. Add regression tests: empty buffer -> no_read_input; truncated map header and truncated key tag -> unexpected_end; list missing its NIL tail -> unexpected_end (with a valid-list counterpart); improper list tail -> array_element_not_found.
1 parent 0b0f2f3 commit c7e69fa

3 files changed

Lines changed: 86 additions & 7 deletions

File tree

include/glaze/eetf/eetf_to_json.hpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,10 @@ namespace glz
164164
if (invalid_end(ctx, it, end)) [[unlikely]] {
165165
return;
166166
}
167-
const auto tag = get_type(ctx, it);
168-
if (tag != ERL_NIL_EXT) {
167+
// A proper list terminates with ERL_NIL_EXT, a single tag byte. Read that tag directly
168+
// rather than through get_type/ei_get_type, which reads a 2-4 byte length header off the
169+
// raw pointer (past end when the tail tag is the final byte) for any other term type.
170+
if (uint8_t(*it) != ERL_NIL_EXT) {
169171
ctx.error = error_code::array_element_not_found;
170172
return;
171173
}
@@ -208,7 +210,12 @@ namespace glz
208210
if (invalid_end(ctx, it, end)) [[unlikely]] {
209211
return;
210212
}
211-
const auto key_type = get_type(ctx, it);
213+
// Read the key tag directly rather than through get_type/ei_get_type, which reads a
214+
// 2-4 byte length header off the raw pointer (past end when the tag is the final byte).
215+
// is_string/is_atom accept the raw, un-normalized tag, and term_to_json_value re-reads
216+
// and bounds-checks the full key below. Widen to int so the tag clears the int_t
217+
// constraint on is_string/is_atom (uint8_t is a char type and would be rejected).
218+
const int key_type = uint8_t(*it);
212219
// support only string or atom keys in json
213220
if (!eetf::is_string(key_type) && !eetf::is_atom(key_type)) {
214221
ctx.error = error_code::syntax_error;

include/glaze/eetf/ei.hpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,10 @@ namespace glz
117117
GLZ_ALWAYS_INLINE void decode_number(T&& value, Args&&... args) noexcept
118118
{
119119
using V = std::remove_cvref_t<T>;
120+
// Cast the scratch value through V (the decayed value type), never T. T is a forwarding
121+
// reference, so static_cast<T>(v) forms a reference cast that fails to compile when the
122+
// scratch type (long / long long) is a distinct type of the same width as V -- e.g. on
123+
// platforms where int64_t is long long while long is also 64-bit (macOS/LLP64-style).
120124
if constexpr (std::floating_point<std::remove_cvref_t<T>>) {
121125
double v;
122126
detail::decode_impl([&](const char* buf, int* index) { return ei_decode_double(buf, index, &v); },
@@ -129,27 +133,27 @@ namespace glz
129133
long long v;
130134
detail::decode_impl([&](const char* buf, int* index) { return ei_decode_longlong(buf, index, &v); },
131135
std::forward<Args>(args)...);
132-
value = static_cast<T>(v);
136+
value = static_cast<V>(v);
133137
}
134138
else {
135139
unsigned long long v;
136140
detail::decode_impl([&](const char* buf, int* index) { return ei_decode_ulonglong(buf, index, &v); },
137141
std::forward<Args>(args)...);
138-
value = static_cast<T>(v);
142+
value = static_cast<V>(v);
139143
}
140144
}
141145
else {
142146
if constexpr (std::is_signed_v<V>) {
143147
long v;
144148
detail::decode_impl([&](const char* buf, int* index) { return ei_decode_long(buf, index, &v); },
145149
std::forward<Args>(args)...);
146-
value = static_cast<T>(v);
150+
value = static_cast<V>(v);
147151
}
148152
else {
149153
unsigned long v;
150154
detail::decode_impl([&](const char* buf, int* index) { return ei_decode_ulong(buf, index, &v); },
151155
std::forward<Args>(args)...);
152-
value = static_cast<T>(v);
156+
value = static_cast<V>(v);
153157
}
154158
}
155159
}

tests/eetf_test/eetf_test.cpp

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,74 @@ suite eetf_to_json_tests = [] {
455455
expect_unexpected_end(partial_digits);
456456
};
457457

458+
"eetf_to_json empty buffer"_test = [] {
459+
// No version byte at all: the entry guard rejects before decode_version reads the tag.
460+
const std::string buffer{};
461+
std::string json{};
462+
const auto ec = glz::eetf_to_json(buffer, json);
463+
expect(ec.ec == glz::error_code::no_read_input);
464+
};
465+
466+
"eetf_to_json truncated map"_test = [] {
467+
auto expect_unexpected_end = [](const auto& buffer) {
468+
std::string json{};
469+
const auto ec = glz::eetf_to_json(buffer, json);
470+
expect(ec.ec == glz::error_code::unexpected_end);
471+
};
472+
473+
// ERL_MAP_EXT declares arity 1 but the buffer ends before any key/value entry.
474+
const std::array<std::uint8_t, 6> missing_entries{
475+
uint8_t(glz::eetf_magic_version), uint8_t(ERL_MAP_EXT), 0, 0, 0, 1};
476+
// ERL_MAP_EXT arity 1 with a key tag (ERL_ATOM_EXT) as the final byte: the tag must be
477+
// classified without ei_get_type reading its 2-byte length header past the end.
478+
const std::array<std::uint8_t, 7> truncated_key{
479+
uint8_t(glz::eetf_magic_version), uint8_t(ERL_MAP_EXT), 0, 0, 0, 1, uint8_t(ERL_ATOM_EXT)};
480+
481+
expect_unexpected_end(missing_entries);
482+
expect_unexpected_end(truncated_key);
483+
};
484+
485+
"eetf_to_json list missing tail"_test = [] {
486+
// ERL_LIST_EXT, one element (small integer 5), but the ERL_NIL_EXT tail is missing.
487+
const std::array<std::uint8_t, 8> missing_tail{
488+
uint8_t(glz::eetf_magic_version), uint8_t(ERL_LIST_EXT), 0, 0, 0, 1, uint8_t(ERL_SMALL_INTEGER_EXT), 5};
489+
std::string json{};
490+
const auto ec = glz::eetf_to_json(missing_tail, json);
491+
expect(ec.ec == glz::error_code::unexpected_end);
492+
493+
// The same list with the NIL tail present parses cleanly: the tail guard must not reject it.
494+
const std::array<std::uint8_t, 9> with_tail{uint8_t(glz::eetf_magic_version),
495+
uint8_t(ERL_LIST_EXT),
496+
0,
497+
0,
498+
0,
499+
1,
500+
uint8_t(ERL_SMALL_INTEGER_EXT),
501+
5,
502+
uint8_t(ERL_NIL_EXT)};
503+
json.clear();
504+
expect(!glz::eetf_to_json(with_tail, json));
505+
expect(json == "[5]") << json;
506+
};
507+
508+
"eetf_to_json list improper tail"_test = [] {
509+
// ERL_LIST_EXT with one element and a non-NIL tail tag (ERL_MAP_EXT) as the final byte. The
510+
// tail must be classified from its single tag byte without ei_get_type reading the 4-byte
511+
// length header past the end of the buffer.
512+
const std::array<std::uint8_t, 9> buffer{uint8_t(glz::eetf_magic_version),
513+
uint8_t(ERL_LIST_EXT),
514+
0,
515+
0,
516+
0,
517+
1,
518+
uint8_t(ERL_SMALL_INTEGER_EXT),
519+
5,
520+
uint8_t(ERL_MAP_EXT)};
521+
std::string json{};
522+
const auto ec = glz::eetf_to_json(buffer, json);
523+
expect(ec.ec == glz::error_code::array_element_not_found);
524+
};
525+
458526
"eetf_to_json no header"_test = [] {
459527
constexpr int items = 3;
460528
std::vector<simple> v;

0 commit comments

Comments
 (0)