Skip to content

Commit d1161df

Browse files
committed
Bound non-null-terminated value scanners against end of input
Several JSON value scanners dereference *it without an end check, relying on the trailing '\0' sentinel that only exists for null-terminated buffers. On a non-null-terminated buffer (opts.null_terminated = false) there is no sentinel, so truncated or boundary input reads one or more bytes past the end (heap-buffer-overflow under ASAN). Each fix leaves the null-terminated fast path byte-for-byte unchanged and bounds only the non-null-terminated path: - skip_number (non-validating): gate the digit scan on it < end when not null-terminated (skip_number_opts gains a null_terminated field). - skip_number_with_validation: guard each standalone *it read with it != end (the find_if_not scans were already end-bounded). - number_of_array_elements: bound the element pre-scan loop on it == end. - skip_string (non-padded, validating): bound the scan loop and the post-backslash read (skip_string_opts gains a null_terminated field). - NDJSON read_new_lines: bound the inter-record newline scan on it != end. Adds a non_null_terminated_scanner_bounds suite exercising each scanner at its buffer boundary; these run under the ASAN CI job.
1 parent 2ceef98 commit d1161df

4 files changed

Lines changed: 231 additions & 32 deletions

File tree

include/glaze/json/ndjson.hpp

Lines changed: 56 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -56,18 +56,38 @@ namespace glz
5656
auto value_it = value.begin();
5757

5858
auto read_new_lines = [&] {
59-
while (*it == '\r') {
60-
++it;
61-
if (*it == '\n') {
59+
// A null-terminated buffer stops on the trailing '\0' sentinel. A non-null-terminated
60+
// buffer has no sentinel, so bound the scan on it != end while consuming the line
61+
// breaks between records.
62+
if constexpr (Opts.null_terminated) {
63+
while (*it == '\r') {
6264
++it;
65+
if (*it == '\n') {
66+
++it;
67+
}
68+
else {
69+
ctx.error = error_code::syntax_error; // Expected '\n' after '\r'
70+
return;
71+
}
6372
}
64-
else {
65-
ctx.error = error_code::syntax_error; // Expected '\n' after '\r'
66-
return;
73+
while (*it == '\n') {
74+
++it;
6775
}
6876
}
69-
while (*it == '\n') {
70-
++it;
77+
else {
78+
while (it != end && *it == '\r') {
79+
++it;
80+
if (it != end && *it == '\n') {
81+
++it;
82+
}
83+
else {
84+
ctx.error = error_code::syntax_error; // Expected '\n' after '\r'
85+
return;
86+
}
87+
}
88+
while (it != end && *it == '\n') {
89+
++it;
90+
}
7191
}
7292
};
7393

@@ -126,18 +146,38 @@ namespace glz
126146
}();
127147

128148
auto read_new_lines = [&] {
129-
while (*it == '\r') {
130-
++it;
131-
if (*it == '\n') {
149+
// A null-terminated buffer stops on the trailing '\0' sentinel. A non-null-terminated
150+
// buffer has no sentinel, so bound the scan on it != end while consuming the line
151+
// breaks between records.
152+
if constexpr (Opts.null_terminated) {
153+
while (*it == '\r') {
132154
++it;
155+
if (*it == '\n') {
156+
++it;
157+
}
158+
else {
159+
ctx.error = error_code::syntax_error; // Expected '\n' after '\r'
160+
return;
161+
}
133162
}
134-
else {
135-
ctx.error = error_code::syntax_error; // Expected '\n' after '\r'
136-
return;
163+
while (*it == '\n') {
164+
++it;
137165
}
138166
}
139-
while (*it == '\n') {
140-
++it;
167+
else {
168+
while (it != end && *it == '\r') {
169+
++it;
170+
if (it != end && *it == '\n') {
171+
++it;
172+
}
173+
else {
174+
ctx.error = error_code::syntax_error; // Expected '\n' after '\r'
175+
return;
176+
}
177+
}
178+
while (it != end && *it == '\n') {
179+
++it;
180+
}
141181
}
142182
};
143183

include/glaze/json/read.hpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2549,11 +2549,25 @@ namespace glz
25492549
if (bool(ctx.error)) [[unlikely]]
25502550
return {};
25512551

2552+
if constexpr (not Opts.null_terminated) {
2553+
if (it == end) [[unlikely]] {
2554+
ctx.error = error_code::unexpected_end;
2555+
return {};
2556+
}
2557+
}
25522558
if (*it == ']') [[unlikely]] {
25532559
return 0;
25542560
}
25552561
size_t count = 1;
25562562
while (true) {
2563+
// A null-terminated buffer falls out through the '\0' case below; a non-null-terminated
2564+
// buffer has no sentinel, so bound the scan here before dereferencing.
2565+
if constexpr (not Opts.null_terminated) {
2566+
if (it == end) [[unlikely]] {
2567+
ctx.error = error_code::unexpected_end;
2568+
return {};
2569+
}
2570+
}
25572571
switch (*it) {
25582572
case ',': {
25592573
++count;

include/glaze/util/parse.hpp

Lines changed: 56 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -745,18 +745,26 @@ namespace glz
745745
bool padded;
746746
bool opening_handled;
747747
bool validate_skipped;
748+
bool null_terminated;
748749

749750
// Convert from any opts-like type (consteval because check_* functions are consteval)
750751
template <typename T>
751752
consteval skip_string_opts(const T& opts) noexcept
752753
: padded{check_is_padded(opts)},
753754
opening_handled{check_opening_handled(opts)},
754-
validate_skipped{check_validate_skipped(opts)}
755+
validate_skipped{check_validate_skipped(opts)},
756+
null_terminated{check_null_terminated(opts)}
755757
{}
756758

757-
// Direct construction - all values required
758-
consteval skip_string_opts(bool padded_, bool opening_handled_, bool validate_skipped_) noexcept
759-
: padded{padded_}, opening_handled{opening_handled_}, validate_skipped{validate_skipped_}
759+
// Direct construction - null_terminated defaults to true for the structural (non-validating)
760+
// skip_until_closed call sites, which route through the end-bounded skip_string_view path
761+
// and so are independent of this flag.
762+
consteval skip_string_opts(bool padded_, bool opening_handled_, bool validate_skipped_,
763+
bool null_terminated_ = true) noexcept
764+
: padded{padded_},
765+
opening_handled{opening_handled_},
766+
validate_skipped{validate_skipped_},
767+
null_terminated{null_terminated_}
760768
{}
761769
};
762770

@@ -863,6 +871,15 @@ namespace glz
863871

864872
if constexpr (Opts.validate_skipped) {
865873
while (true) {
874+
// A null-terminated buffer terminates on the trailing '\0' (caught by the control
875+
// character check below); a non-null-terminated buffer has no sentinel, so bound the
876+
// scan before each dereference.
877+
if constexpr (not Opts.null_terminated) {
878+
if (it == end) [[unlikely]] {
879+
ctx.error = error_code::unexpected_end;
880+
return;
881+
}
882+
}
866883
if ((*it & 0b11100000) == 0) [[unlikely]] {
867884
ctx.error = error_code::syntax_error;
868885
return;
@@ -875,6 +892,12 @@ namespace glz
875892
}
876893
case '\\': {
877894
++it;
895+
if constexpr (not Opts.null_terminated) {
896+
if (it == end) [[unlikely]] {
897+
ctx.error = error_code::unexpected_end;
898+
return;
899+
}
900+
}
878901
if (char_unescape_table[uint8_t(*it)]) {
879902
++it;
880903
continue;
@@ -1239,12 +1262,17 @@ namespace glz
12391262

12401263
GLZ_ALWAYS_INLINE void skip_number_with_validation(is_context auto&& ctx, auto&& it, auto end) noexcept
12411264
{
1242-
it += *it == '-';
1265+
// Every standalone *it read below is guarded by it != end so the scan stays inside the
1266+
// buffer for non-null-terminated input (the std::find_if_not calls are already bounded by
1267+
// end). A null-terminated buffer is unaffected: it reaches end only once the number is
1268+
// fully consumed, where these guards short-circuit exactly as the trailing '\0' sentinel
1269+
// would have.
1270+
it += (it != end) && (*it == '-');
12431271
const auto sig_start_it = it;
12441272
auto frac_start_it = end;
1245-
if (*it == '0') {
1273+
if (it != end && *it == '0') {
12461274
++it;
1247-
if (*it != '.') {
1275+
if (it == end || *it != '.') {
12481276
return;
12491277
}
12501278
++it;
@@ -1255,11 +1283,11 @@ namespace glz
12551283
ctx.error = error_code::syntax_error;
12561284
return;
12571285
}
1258-
if ((*it | ('E' ^ 'e')) == 'e') {
1286+
if (it != end && (*it | ('E' ^ 'e')) == 'e') {
12591287
++it;
12601288
goto exp_start;
12611289
}
1262-
if (*it != '.') return;
1290+
if (it == end || *it != '.') return;
12631291
++it;
12641292
frac_start:
12651293
frac_start_it = it;
@@ -1268,10 +1296,10 @@ namespace glz
12681296
ctx.error = error_code::syntax_error;
12691297
return;
12701298
}
1271-
if ((*it | ('E' ^ 'e')) != 'e') return;
1299+
if (it == end || (*it | ('E' ^ 'e')) != 'e') return;
12721300
++it;
12731301
exp_start:
1274-
it += *it == '+' || *it == '-';
1302+
it += (it != end) && (*it == '+' || *it == '-');
12751303
const auto exp_start_it = it;
12761304
it = std::find_if_not(it, end, is_digit);
12771305
if (it == exp_start_it) {
@@ -1284,22 +1312,34 @@ namespace glz
12841312
struct skip_number_opts
12851313
{
12861314
bool validate;
1315+
bool null_terminated;
12871316

1288-
// Convert from any opts-like type (consteval because check_validate_skipped is consteval)
1317+
// Convert from any opts-like type (consteval because check_* functions are consteval)
12891318
template <typename T>
1290-
consteval skip_number_opts(const T& opts) noexcept : validate{check_validate_skipped(opts)}
1319+
consteval skip_number_opts(const T& opts) noexcept
1320+
: validate{check_validate_skipped(opts)}, null_terminated{check_null_terminated(opts)}
12911321
{}
12921322

12931323
// Direct construction
1294-
explicit consteval skip_number_opts(bool validate_) noexcept : validate{validate_} {}
1324+
explicit consteval skip_number_opts(bool validate_, bool null_terminated_ = true) noexcept
1325+
: validate{validate_}, null_terminated{null_terminated_}
1326+
{}
12951327
};
12961328

12971329
template <skip_number_opts Opts>
12981330
GLZ_ALWAYS_INLINE void skip_number(is_context auto&& ctx, auto&& it, auto end) noexcept
12991331
{
13001332
if constexpr (not Opts.validate) {
1301-
while (numeric_table[uint8_t(*it)]) {
1302-
++it;
1333+
if constexpr (Opts.null_terminated) {
1334+
// Relies on the trailing '\0' sentinel (numeric_table['\0'] == false) to terminate.
1335+
while (numeric_table[uint8_t(*it)]) {
1336+
++it;
1337+
}
1338+
}
1339+
else {
1340+
while (it < end && numeric_table[uint8_t(*it)]) {
1341+
++it;
1342+
}
13031343
}
13041344
}
13051345
else {

tests/json_test/json_test.cpp

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2269,6 +2269,111 @@ suite early_end = [] {
22692269
trace.end("early_end");
22702270
};
22712271

2272+
// Bounds hardening for value scanners that do not route through skip_ws. On a non-null-terminated
2273+
// buffer there is no trailing '\0' sentinel, so each scanner below must respect end explicitly.
2274+
// Built under ASAN in CI, these cases catch reads past an exact-size buffer.
2275+
suite non_null_terminated_scanner_bounds = [] {
2276+
"nnt skip_number (raw_json) stays in bounds"_test = [] {
2277+
static constexpr glz::opts options{.null_terminated = false};
2278+
// Bare numbers that end exactly at the buffer end must parse without reading past it.
2279+
for (const std::string_view num : {"12345", "0", "3.14", "1e-12", "2.5E+9"}) {
2280+
std::vector<char> buf{num.begin(), num.end()};
2281+
const std::string_view view{buf.data(), buf.data() + buf.size()};
2282+
glz::raw_json value{};
2283+
const auto ec = glz::read<options>(value, view);
2284+
expect(not ec) << glz::format_error(ec, view);
2285+
expect(value.str == num);
2286+
}
2287+
};
2288+
2289+
"nnt number_of_array_elements stays in bounds"_test = [] {
2290+
static constexpr glz::opts options{.null_terminated = false};
2291+
// std::forward_list routes through the element pre-scan; truncated arrays must error rather
2292+
// than read past the end.
2293+
for (const std::string_view s : {"[1,2,3,4,5", "[1,2,", "[1", "["}) {
2294+
std::vector<char> buf{s.begin(), s.end()};
2295+
const std::string_view view{buf.data(), buf.data() + buf.size()};
2296+
std::forward_list<int> value{};
2297+
const auto ec = glz::read<options>(value, view);
2298+
expect(ec);
2299+
expect(ec.count <= view.size());
2300+
}
2301+
// A complete array still parses.
2302+
const std::string_view complete = "[1,2,3,4,5]";
2303+
std::vector<char> buf{complete.begin(), complete.end()};
2304+
const std::string_view view{buf.data(), buf.data() + buf.size()};
2305+
std::forward_list<int> value{};
2306+
const auto ec = glz::read<options>(value, view);
2307+
expect(not ec) << glz::format_error(ec, view);
2308+
expect(value == (std::forward_list<int>{1, 2, 3, 4, 5}));
2309+
};
2310+
2311+
"nnt validated number skip stays in bounds"_test = [] {
2312+
static constexpr glz::opts options{.null_terminated = false};
2313+
// The JSON-pointer path validates the located number; incomplete numbers must be rejected
2314+
// without scanning past the end, and complete numbers must still resolve.
2315+
const auto resolves = [](std::string_view s) {
2316+
std::vector<char> buf{s.begin(), s.end()};
2317+
return glz::get_view_json<"/x", options>(std::string_view{buf.data(), buf.data() + buf.size()}).has_value();
2318+
};
2319+
expect(not resolves(R"({"x":-)"));
2320+
expect(not resolves(R"({"x":1e)"));
2321+
expect(not resolves(R"({"x":1.)"));
2322+
expect(not resolves(R"({"x":1.5e)"));
2323+
expect(not resolves(R"({"x":1.5e+)"));
2324+
expect(resolves(R"({"x":0)"));
2325+
expect(resolves(R"({"x":12345)"));
2326+
expect(resolves(R"({"x":12345})"));
2327+
};
2328+
2329+
"nnt validated string skip stays in bounds"_test = [] {
2330+
static constexpr glz::opts options{.null_terminated = false};
2331+
// The validating skip path (here via a JSON pointer) must not scan past the end of an
2332+
// unterminated string value or a string truncated mid-escape.
2333+
const auto resolves = [](std::string_view s) {
2334+
std::vector<char> buf{s.begin(), s.end()};
2335+
return glz::get_view_json<"/x", options>(std::string_view{buf.data(), buf.data() + buf.size()}).has_value();
2336+
};
2337+
expect(not resolves(R"({"x":"abcdef)")); // no closing quote
2338+
expect(not resolves(R"({"x":"ab\)")); // trailing backslash at the end
2339+
expect(not resolves(R"({"x":"ab\u12)")); // truncated unicode escape
2340+
expect(resolves(R"({"x":"abc"})")); // complete string resolves
2341+
};
2342+
2343+
"nnt ndjson newline scan stays in bounds"_test = [] {
2344+
// Under null_terminated = false the record values use the end-bounded number parser and the
2345+
// inter-record newline scan is bounded too, so records with or without a trailing newline
2346+
// (and with '\r\n' line endings) stay inside the buffer.
2347+
static constexpr glz::opts options{.format = glz::NDJSON, .null_terminated = false};
2348+
const auto read_vec = [](std::string_view s) {
2349+
std::vector<char> buf{s.begin(), s.end()};
2350+
std::vector<int> value{};
2351+
const auto ec = glz::read<options>(value, std::string_view{buf.data(), buf.data() + buf.size()});
2352+
return std::pair{ec, value};
2353+
};
2354+
{
2355+
auto [ec, v] = read_vec("1");
2356+
expect(not ec);
2357+
expect(v == std::vector<int>{1});
2358+
}
2359+
{
2360+
auto [ec, v] = read_vec("1\n2\n");
2361+
expect(not ec);
2362+
expect(v == std::vector<int>{1, 2});
2363+
}
2364+
{
2365+
auto [ec, v] = read_vec("1\n2");
2366+
expect(not ec);
2367+
expect(v == std::vector<int>{1, 2});
2368+
}
2369+
{
2370+
auto [ec, v] = read_vec("1\r\n2\r\n");
2371+
expect(not ec);
2372+
expect(v == std::vector<int>{1, 2});
2373+
}
2374+
};
2375+
};
2376+
22722377
suite minified_custom_object = [] {
22732378
using namespace ut;
22742379

0 commit comments

Comments
 (0)