Skip to content

Commit efc6feb

Browse files
authored
Avoid exceptions when count does not match the number of elements (#1)
Fixes issue #735
2 parents 23b9db2 + 053a6e1 commit efc6feb

File tree

6 files changed

+150
-23
lines changed

6 files changed

+150
-23
lines changed

source/detail/constants.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ const column_t constants::max_column()
4949
return column_t(std::numeric_limits<column_t::index_t>::max());
5050
}
5151

52+
const size_t constants::max_elements_for_reserve()
53+
{
54+
return 10000;
55+
}
56+
5257
// constants
5358
const path constants::package_properties()
5459
{

source/detail/constants.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,14 @@ struct XLNT_API constants
5454
/// </summary>
5555
static const column_t max_column();
5656

57+
/// <summary>
58+
/// Returns the maximum amount of elements that functions like std::vector::reserve (or other containers) are allowed to allocate.
59+
/// Information like a "count" is often saved in XLSX files and can be used by std::vector::reserve (or other containers)
60+
/// to allocate the memory right away and thus improve performance. However, malicious or broken files
61+
/// might then cause XLNT to allocate extreme amounts of memory. This function sets a limit to protect against such issues.
62+
/// </summary>
63+
static const size_t max_elements_for_reserve();
64+
5765
/// <summary>
5866
/// Returns the URI of the directory containing package properties.
5967
/// </summary>

source/detail/limits.hpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// Copyright (c) 2014-2021 Thomas Fussell
2+
//
3+
// Permission is hereby granted, free of charge, to any person obtaining a copy
4+
// of this software and associated documentation files (the "Software"), to deal
5+
// in the Software without restriction, including without limitation the rights
6+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7+
// copies of the Software, and to permit persons to whom the Software is
8+
// furnished to do so, subject to the following conditions:
9+
//
10+
// The above copyright notice and this permission notice shall be included in
11+
// all copies or substantial portions of the Software.
12+
//
13+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19+
// THE SOFTWARE
20+
//
21+
// @license: http://www.opensource.org/licenses/mit-license.php
22+
// @author: see AUTHORS file
23+
24+
#pragma once
25+
26+
#include "constants.hpp"
27+
28+
namespace xlnt {
29+
namespace detail {
30+
31+
/// <summary>
32+
/// Clips the maximum number of reserved elements to a certain upper limit.
33+
/// Information like a "count" is often saved in XLSX files and can be used by std::vector::reserve (or other containers)
34+
/// to allocate the memory right away and thus improve performance. However, malicious or broken files
35+
/// might then cause XLNT to allocate extreme amounts of memory. This function clips the number of elements
36+
/// to an upper limit to protect against such issues, but still allow the caller to pre-allocate memory.
37+
/// </summary>
38+
inline size_t clip_reserve_elements(size_t num_elements)
39+
{
40+
return std::min(num_elements, xlnt::constants::max_elements_for_reserve());
41+
}
42+
43+
} // namespace detail
44+
} // namespace xlnt

source/detail/serialization/xlsx_consumer.cpp

Lines changed: 85 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include <detail/serialization/vector_streambuf.hpp>
4747
#include <detail/serialization/xlsx_consumer.hpp>
4848
#include <detail/serialization/zstream.hpp>
49+
#include <detail/limits.hpp>
4950

5051
namespace {
5152
/// string_equal
@@ -267,7 +268,7 @@ xlnt::detail::Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser, std::uno
267268
case xml::parser::end_attribute:
268269
case xml::parser::eof:
269270
default: {
270-
throw xlnt::exception("unexcpected XML parsing event");
271+
throw xlnt::exception("unexpected XML parsing event");
271272
}
272273
}
273274
// Prevents unhandled exceptions from being triggered.
@@ -344,7 +345,7 @@ std::pair<xlnt::row_properties, int> parse_row(xml::parser *parser, xlnt::detail
344345
case xml::parser::end_attribute:
345346
case xml::parser::eof:
346347
default: {
347-
throw xlnt::exception("unexcpected XML parsing event");
348+
throw xlnt::exception("unexpected XML parsing event");
348349
}
349350
}
350351
}
@@ -382,7 +383,7 @@ Sheet_Data parse_sheet_data(xml::parser *parser, xlnt::detail::number_serialiser
382383
case xml::parser::end_attribute:
383384
case xml::parser::eof:
384385
default: {
385-
throw xlnt::exception("unexcpected XML parsing event");
386+
throw xlnt::exception("unexpected XML parsing event");
386387
}
387388
}
388389
}
@@ -2280,10 +2281,12 @@ void xlsx_consumer::read_shared_string_table()
22802281

22812282
expect_end_element(qn("spreadsheetml", "sst"));
22822283

2284+
#ifdef THROW_ON_INVALID_XML
22832285
if (has_unique_count && unique_count != target_.shared_strings().size())
22842286
{
22852287
throw invalid_file("sizes don't match");
22862288
}
2289+
#endif
22872290
}
22882291

22892292
void xlsx_consumer::read_shared_workbook_revision_headers()
@@ -2317,7 +2320,12 @@ void xlsx_consumer::read_stylesheet()
23172320
if (current_style_element == qn("spreadsheetml", "borders"))
23182321
{
23192322
auto &borders = stylesheet.borders;
2320-
auto count = parser().attribute<std::size_t>("count");
2323+
optional<std::size_t> count;
2324+
if (parser().attribute_present("count"))
2325+
{
2326+
count = parser().attribute<std::size_t>("count");
2327+
borders.reserve(xlnt::detail::clip_reserve_elements(count.get()));
2328+
}
23212329

23222330
while (in_element(qn("spreadsheetml", "borders")))
23232331
{
@@ -2370,15 +2378,22 @@ void xlsx_consumer::read_stylesheet()
23702378
expect_end_element(qn("spreadsheetml", "border"));
23712379
}
23722380

2373-
if (count != borders.size())
2381+
#ifdef THROW_ON_INVALID_XML
2382+
if (count.is_set() && count != borders.size())
23742383
{
23752384
throw xlnt::exception("border counts don't match");
23762385
}
2386+
#endif
23772387
}
23782388
else if (current_style_element == qn("spreadsheetml", "fills"))
23792389
{
23802390
auto &fills = stylesheet.fills;
2381-
auto count = parser().attribute<std::size_t>("count");
2391+
optional<std::size_t> count;
2392+
if (parser().attribute_present("count"))
2393+
{
2394+
count = parser().attribute<std::size_t>("count");
2395+
fills.reserve(xlnt::detail::clip_reserve_elements(count.get()));
2396+
}
23822397

23832398
while (in_element(qn("spreadsheetml", "fills")))
23842399
{
@@ -2455,15 +2470,22 @@ void xlsx_consumer::read_stylesheet()
24552470
expect_end_element(qn("spreadsheetml", "fill"));
24562471
}
24572472

2458-
if (count != fills.size())
2473+
#ifdef THROW_ON_INVALID_XML
2474+
if (count.is_set() && count != fills.size())
24592475
{
24602476
throw xlnt::exception("counts don't match");
24612477
}
2478+
#endif
24622479
}
24632480
else if (current_style_element == qn("spreadsheetml", "fonts"))
24642481
{
24652482
auto &fonts = stylesheet.fonts;
2466-
auto count = parser().attribute<std::size_t>("count", 0);
2483+
optional<std::size_t> count;
2484+
if (parser().attribute_present("count"))
2485+
{
2486+
count = parser().attribute<std::size_t>("count");
2487+
fonts.reserve(xlnt::detail::clip_reserve_elements(count.get()));
2488+
}
24672489

24682490
if (parser().attribute_present(qn("x14ac", "knownFonts")))
24692491
{
@@ -2598,15 +2620,22 @@ void xlsx_consumer::read_stylesheet()
25982620
expect_end_element(qn("spreadsheetml", "font"));
25992621
}
26002622

2601-
if (count != stylesheet.fonts.size())
2623+
#ifdef THROW_ON_INVALID_XML
2624+
if (count.is_set() && count != stylesheet.fonts.size())
26022625
{
2603-
// throw xlnt::exception("counts don't match");
2626+
throw xlnt::exception("counts don't match");
26042627
}
2628+
#endif
26052629
}
26062630
else if (current_style_element == qn("spreadsheetml", "numFmts"))
26072631
{
26082632
auto &number_formats = stylesheet.number_formats;
2609-
auto count = parser().attribute<std::size_t>("count");
2633+
optional<std::size_t> count;
2634+
if (parser().attribute_present("count"))
2635+
{
2636+
count = parser().attribute<std::size_t>("count");
2637+
number_formats.reserve(xlnt::detail::clip_reserve_elements(count.get()));
2638+
}
26102639

26112640
while (in_element(qn("spreadsheetml", "numFmts")))
26122641
{
@@ -2629,14 +2658,21 @@ void xlsx_consumer::read_stylesheet()
26292658
number_formats.push_back(nf);
26302659
}
26312660

2632-
if (count != number_formats.size())
2661+
#ifdef THROW_ON_INVALID_XML
2662+
if (count.is_set() && count != number_formats.size())
26332663
{
26342664
throw xlnt::exception("counts don't match");
26352665
}
2666+
#endif
26362667
}
26372668
else if (current_style_element == qn("spreadsheetml", "cellStyles"))
26382669
{
2639-
auto count = parser().attribute<std::size_t>("count");
2670+
optional<std::size_t> count;
2671+
if (parser().attribute_present("count"))
2672+
{
2673+
count = parser().attribute<std::size_t>("count");
2674+
styles.reserve(xlnt::detail::clip_reserve_elements(count.get()));
2675+
}
26402676

26412677
while (in_element(qn("spreadsheetml", "cellStyles")))
26422678
{
@@ -2665,16 +2701,30 @@ void xlsx_consumer::read_stylesheet()
26652701
expect_end_element(qn("spreadsheetml", "cellStyle"));
26662702
}
26672703

2668-
if (count != styles.size())
2704+
#ifdef THROW_ON_INVALID_XML
2705+
if (count.is_set() && count != styles.size())
26692706
{
26702707
throw xlnt::exception("counts don't match");
26712708
}
2709+
#endif
26722710
}
26732711
else if (current_style_element == qn("spreadsheetml", "cellStyleXfs")
26742712
|| current_style_element == qn("spreadsheetml", "cellXfs"))
26752713
{
26762714
auto in_style_records = current_style_element.name() == "cellStyleXfs";
2677-
auto count = parser().attribute<std::size_t>("count");
2715+
optional<std::size_t> count;
2716+
if (parser().attribute_present("count"))
2717+
{
2718+
count = parser().attribute<std::size_t>("count");
2719+
if (in_style_records)
2720+
{
2721+
style_records.reserve(xlnt::detail::clip_reserve_elements(count.get()));
2722+
}
2723+
else
2724+
{
2725+
format_records.reserve(xlnt::detail::clip_reserve_elements(count.get()));
2726+
}
2727+
}
26782728

26792729
while (in_element(current_style_element))
26802730
{
@@ -2803,15 +2853,16 @@ void xlsx_consumer::read_stylesheet()
28032853
expect_end_element(qn("spreadsheetml", "xf"));
28042854
}
28052855

2806-
if ((in_style_records && count != style_records.size())
2807-
|| (!in_style_records && count != format_records.size()))
2856+
#ifdef THROW_ON_INVALID_XML
2857+
if (count.is_set() && ((in_style_records && count != style_records.size())
2858+
|| (!in_style_records && count != format_records.size())))
28082859
{
28092860
throw xlnt::exception("counts don't match");
28102861
}
2862+
#endif
28112863
}
28122864
else if (current_style_element == qn("spreadsheetml", "dxfs"))
28132865
{
2814-
auto count = parser().attribute<std::size_t>("count");
28152866
std::size_t processed = 0;
28162867

28172868
while (in_element(current_style_element))
@@ -2822,17 +2873,22 @@ void xlsx_consumer::read_stylesheet()
28222873
++processed;
28232874
}
28242875

2825-
if (count != processed)
2876+
#ifdef THROW_ON_INVALID_XML
2877+
if (parser().attribute_present("count"))
28262878
{
2827-
throw xlnt::exception("counts don't match");
2879+
std::size_t count = parser().attribute<std::size_t>("count");
2880+
if (count != processed)
2881+
{
2882+
throw xlnt::exception("counts don't match");
2883+
}
28282884
}
2885+
#endif
28292886
}
28302887
else if (current_style_element == qn("spreadsheetml", "tableStyles"))
28312888
{
28322889
skip_attribute("defaultTableStyle");
28332890
skip_attribute("defaultPivotStyle");
28342891

2835-
auto count = parser().attribute<std::size_t>("count");
28362892
std::size_t processed = 0;
28372893

28382894
while (in_element(qn("spreadsheetml", "tableStyles")))
@@ -2843,10 +2899,16 @@ void xlsx_consumer::read_stylesheet()
28432899
++processed;
28442900
}
28452901

2846-
if (count != processed)
2902+
#ifdef THROW_ON_INVALID_XML
2903+
if (parser().attribute_present("count"))
28472904
{
2848-
throw xlnt::exception("counts don't match");
2905+
std::size_t count = parser().attribute<std::size_t>("count");
2906+
if (count != processed)
2907+
{
2908+
throw xlnt::exception("counts don't match");
2909+
}
28492910
}
2911+
#endif
28502912
}
28512913
else if (current_style_element == qn("spreadsheetml", "extLst"))
28522914
{
6.84 KB
Binary file not shown.

tests/workbook/serialization_test_suite.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ class serialization_test_suite : public test_suite
7171
register_test(test_Issue445_inline_str_streaming_read);
7272
register_test(test_Issue492_stream_empty_row);
7373
register_test(test_Issue503_external_link_load);
74+
register_test(test_Issue735_wrong_count);
7475
register_test(test_formatting);
7576
register_test(test_active_sheet);
7677
register_test(test_locale_comma);
@@ -764,6 +765,13 @@ class serialization_test_suite : public test_suite
764765
auto cell = ws.cell("A1");
765766
xlnt_assert_equals(cell.value<std::string>(), std::string("WDG_IC_00000003.aut"));
766767
}
768+
769+
void test_Issue735_wrong_count()
770+
{
771+
xlnt::workbook wb;
772+
wb.load(path_helper::test_file("Issue735_wrong_count.xlsx"));
773+
xlnt_assert_throws_nothing(wb.active_sheet());
774+
}
767775

768776
void test_formatting()
769777
{

0 commit comments

Comments
 (0)