Skip to content

Commit 9a346dd

Browse files
committed
Fix parsing of numbers in floating point format to integers
When parsing a string like "123.456" to an integer the ICU backend would first parse it greedily to a floating point value and then cast/truncate it to an integer. Set the flag to only parse integers when parsing to an integral number. Care must be taken not to set that when parsing e.g. a currency or date to an integer where the truncation is intended.
1 parent ff91706 commit 9a346dd

File tree

7 files changed

+104
-2
lines changed

7 files changed

+104
-2
lines changed

doc/changelog.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
/*!
99
\page changelog Changelog
1010

11+
- 1.88.0
12+
- Fix parsing of numbers in floating point format to integers
1113
- 1.86.0
1214
- Make ICU implementation of `to_title` threadsafe
1315
- Add allocator support to `utf_to_utf`

src/boost/locale/icu/formatter.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,9 @@ namespace boost { namespace locale { namespace impl_icu {
5454
public:
5555
typedef std::basic_string<CharType> string_type;
5656

57-
number_format(icu::NumberFormat& fmt, std::string codepage) : cvt_(codepage), icu_fmt_(fmt) {}
57+
number_format(icu::NumberFormat& fmt, const std::string& codepage, bool isNumberOnly = false) :
58+
cvt_(codepage), icu_fmt_(fmt), isNumberOnly_(isNumberOnly)
59+
{}
5860

5961
string_type format(double value, size_t& code_points) const override { return do_format(value, code_points); }
6062
string_type format(int64_t value, size_t& code_points) const override { return do_format(value, code_points); }
@@ -107,6 +109,9 @@ namespace boost { namespace locale { namespace impl_icu {
107109
icu::ParsePosition pp;
108110
icu::UnicodeString tmp = cvt_.icu(str.data(), str.data() + str.size());
109111

112+
// For the plain number parsing (no currency etc) parse "123.456" as 2 ints
113+
// not a float later converted to int
114+
icu_fmt_.setParseIntegerOnly(std::is_integral<ValueType>::value && isNumberOnly_);
110115
icu_fmt_.parse(tmp, val, pp);
111116

112117
ValueType tmp_v;
@@ -122,6 +127,7 @@ namespace boost { namespace locale { namespace impl_icu {
122127

123128
icu_std_converter<CharType> cvt_;
124129
icu::NumberFormat& icu_fmt_;
130+
const bool isNumberOnly_;
125131
};
126132

127133
template<typename CharType>
@@ -355,7 +361,7 @@ namespace boost { namespace locale { namespace impl_icu {
355361
icu::NumberFormat& nf =
356362
cache.number_format((how == std::ios_base::scientific) ? num_fmt_type::sci : num_fmt_type::number);
357363
set_fraction_digits(nf, how, ios.precision());
358-
return ptr_type(new number_format<CharType>(nf, encoding));
364+
return ptr_type(new number_format<CharType>(nf, encoding, true));
359365
}
360366
case currency: {
361367
icu::NumberFormat& nf = cache.number_format(

test/formatting_common.hpp

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
//
2+
// Copyright (c) 2024 Alexander Grund
3+
//
4+
// Distributed under the Boost Software License, Version 1.0.
5+
// https://www.boost.org/LICENSE_1_0.txt
6+
7+
#include <boost/locale/formatting.hpp>
8+
#include <boost/locale/generator.hpp>
9+
#include <cstdint>
10+
#include <limits>
11+
#include <sstream>
12+
13+
#include "../src/boost/locale/util/foreach_char.hpp"
14+
#include "boostLocale/test/tools.hpp"
15+
#include "boostLocale/test/unit_test.hpp"
16+
17+
template<typename CharType, typename IntType>
18+
void test_parse_multi_number_by_char(const std::locale& locale)
19+
{
20+
std::basic_istringstream<CharType> stream;
21+
stream.imbue(locale);
22+
stream.str(ascii_to<CharType>("42.12,345"));
23+
stream >> boost::locale::as::number;
24+
25+
IntType value;
26+
TEST_REQUIRE(stream >> value);
27+
TEST_EQ(value, IntType(42));
28+
TEST_EQ(static_cast<char>(stream.get()), '.');
29+
TEST_REQUIRE(stream >> value);
30+
TEST_EQ(value, IntType(12345));
31+
TEST_REQUIRE(!(stream >> value));
32+
TEST(stream.eof());
33+
34+
stream.str(ascii_to<CharType>("42.25,678"));
35+
stream.clear();
36+
float fValue;
37+
TEST_REQUIRE(stream >> fValue);
38+
TEST_EQ(fValue, 42.25);
39+
TEST_EQ(static_cast<char>(stream.get()), ',');
40+
TEST_REQUIRE(stream >> value);
41+
TEST_EQ(value, IntType(678));
42+
TEST_REQUIRE(!(stream >> value));
43+
TEST(stream.eof());
44+
45+
// Parsing a floating point currency to integer truncates the floating point value but fully parses it
46+
stream.str(ascii_to<CharType>("USD1,234.55,67.89"));
47+
stream.clear();
48+
TEST_REQUIRE(!(stream >> value));
49+
stream.clear();
50+
stream >> boost::locale::as::currency >> boost::locale::as::currency_iso;
51+
if(stream >> value) { // Parsing currencies not fully supported by WinAPI backend
52+
TEST_EQ(value, IntType(1234));
53+
TEST_EQ(static_cast<char>(stream.get()), ',');
54+
TEST_REQUIRE(stream >> boost::locale::as::number >> value);
55+
TEST_EQ(value, IntType(67));
56+
TEST(!stream.eof());
57+
}
58+
}
59+
60+
/// Test that parsing multiple numbers without any spaces works as expected
61+
void test_parse_multi_number()
62+
{
63+
const auto locale = boost::locale::generator{}("en_US.UTF-8");
64+
65+
#define BOOST_LOCALE_CALL_I(T, I) \
66+
std::cout << "\t" #I << std::endl; \
67+
test_parse_multi_number_by_char<T, I>(locale);
68+
69+
#define BOOST_LOCALE_CALL(T) \
70+
std::cout << "test_parse_multi_number " #T << std::endl; \
71+
BOOST_LOCALE_CALL_I(T, int16_t); \
72+
BOOST_LOCALE_CALL_I(T, uint16_t); \
73+
BOOST_LOCALE_CALL_I(T, int32_t); \
74+
BOOST_LOCALE_CALL_I(T, uint32_t); \
75+
BOOST_LOCALE_CALL_I(T, int64_t); \
76+
BOOST_LOCALE_CALL_I(T, uint64_t);
77+
78+
BOOST_LOCALE_CALL(char);
79+
BOOST_LOCALE_CALL(wchar_t);
80+
#undef BOOST_LOCALE_CALL
81+
#undef BOOST_LOCALE_CALL_I
82+
}

test/test_formatting.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
#include "boostLocale/test/tools.hpp"
2323
#include "boostLocale/test/unit_test.hpp"
24+
#include "formatting_common.hpp"
2425

2526
const std::string test_locale_name = "en_US";
2627
std::string message_path = "./";
@@ -928,6 +929,8 @@ void test_main(int argc, char** argv)
928929
test_manip<char32_t>();
929930
test_format_class<char32_t>();
930931
#endif
932+
933+
test_parse_multi_number();
931934
}
932935

933936
// boostinspect:noascii

test/test_posix_formatting.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#endif
2020
#include "boostLocale/test/tools.hpp"
2121
#include "boostLocale/test/unit_test.hpp"
22+
#include "formatting_common.hpp"
2223

2324
#ifdef BOOST_LOCALE_NO_POSIX_BACKEND
2425
// Dummy just to make it compile
@@ -185,6 +186,7 @@ void test_main(int /*argc*/, char** /*argv*/)
185186
TEST(v == "12345,45" || v == "12 345,45" || v == "12.345,45");
186187
}
187188
}
189+
test_parse_multi_number();
188190
}
189191

190192
// boostinspect:noascii

test/test_std_formatting.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include "boostLocale/test/tools.hpp"
1616
#include "boostLocale/test/unit_test.hpp"
17+
#include "formatting_common.hpp"
1718

1819
template<typename CharType, typename RefCharType>
1920
void test_by_char(const std::locale& l, const std::locale& lreal)
@@ -230,6 +231,10 @@ void test_main(int /*argc*/, char** /*argv*/)
230231
}
231232
}
232233
}
234+
// Std backend silently falls back to the C locale when the locale is not supported
235+
// which breaks the test assumptions
236+
if(has_std_locale("en_US.UTF-8"))
237+
test_parse_multi_number();
233238
}
234239

235240
// boostinspect:noascii

test/test_winapi_formatting.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "../src/boost/locale/win32/lcid.hpp"
2424
#include "boostLocale/test/tools.hpp"
2525
#include "boostLocale/test/unit_test.hpp"
26+
#include "formatting_common.hpp"
2627

2728
template<typename CharType>
2829
void test_by_char(const std::locale& l, std::string name, int lcid)
@@ -176,6 +177,7 @@ void test_main(int /*argc*/, char** /*argv*/)
176177
test_by_char<wchar_t>(l, name, name_lcid.second);
177178
}
178179
}
180+
test_parse_multi_number();
179181
std::cout << "- Testing strftime" << std::endl;
180182
test_date_time(gen("en_US.UTF-8"));
181183
}

0 commit comments

Comments
 (0)