Skip to content

Commit c08054f

Browse files
committed
[libcxx] Provide locale conversions to tests through lit substitution (llvm#105651)
There are 2 problems today that this PR resolves: libcxx tests assume the thousands separator for fr_FR locale is x00A0 on Windows. This currently fails when run on newer versions of Windows (it seems to have been updated to the new correct value of 0x202F around windows 11. The exact windows version where it changed doesn't seem to be documented anywhere). Depending the OS version, you need different values. There are several ifdefs to determine the environment/platform-specific locale conversion values and it leads to maintenance as things change over time. This PR includes the following changes: - Provide the environment's locale conversion values through a substitution. The test can opt in by placing the substitution value in a define flag. - Remove the platform ifdefs (the swapping of values between Windows, Linux, Apple, AIX). This is accomplished through a lit feature action that fetches the environment's locale conversions (lconv) for members like 'thousands_sep' that we need to provide. This should ensure that we don't lose the effectiveness of the test itself. In addition, as a result of the above, this PR: - Fixes a handful of locale tests which unexpectedly fail on newer Windows versions. - Resolves 3 XFAIL FIX-MEs. Originally submitted in llvm#86649. Co-authored-by: Rodrigo Salazar <[email protected]> (cherry picked from commit f909b22)
1 parent 8422c56 commit c08054f

File tree

9 files changed

+138
-88
lines changed

9 files changed

+138
-88
lines changed

Diff for: libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313

1414
// REQUIRES: locale.fr_FR.UTF-8
1515

16+
// ADDITIONAL_COMPILE_FLAGS: -DFR_MON_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_MON_THOUSANDS_SEP}
17+
1618
// <locale>
1719

1820
// class money_get<charT, InputIterator>
@@ -59,7 +61,8 @@ class my_facetw
5961
};
6062

6163
static std::wstring convert_thousands_sep(std::wstring const& in) {
62-
return LocaleHelpers::convert_thousands_sep_fr_FR(in);
64+
const wchar_t fr_sep = LocaleHelpers::mon_thousands_sep_or_default(FR_MON_THOU_SEP);
65+
return LocaleHelpers::convert_thousands_sep(in, fr_sep);
6366
}
6467
#endif // TEST_HAS_NO_WIDE_CHARACTERS
6568

Diff for: libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
// REQUIRES: locale.ru_RU.UTF-8
1313

14+
// ADDITIONAL_COMPILE_FLAGS: -DRU_MON_THOU_SEP=%{LOCALE_CONV_RU_RU_UTF_8_MON_THOUSANDS_SEP}
15+
1416
// XFAIL: glibc-old-ru_RU-decimal-point
1517

1618
// <locale>
@@ -52,7 +54,8 @@ class my_facetw
5254
};
5355

5456
static std::wstring convert_thousands_sep(std::wstring const& in) {
55-
return LocaleHelpers::convert_thousands_sep_ru_RU(in);
57+
const wchar_t ru_sep = LocaleHelpers::mon_thousands_sep_or_default(RU_MON_THOU_SEP);
58+
return LocaleHelpers::convert_thousands_sep(in, ru_sep);
5659
}
5760
#endif // TEST_HAS_NO_WIDE_CHARACTERS
5861

Diff for: libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313

1414
// REQUIRES: locale.fr_FR.UTF-8
1515

16+
// ADDITIONAL_COMPILE_FLAGS: -DFR_MON_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_MON_THOUSANDS_SEP}
17+
1618
// <locale>
1719

1820
// class money_put<charT, OutputIterator>
@@ -59,7 +61,8 @@ class my_facetw
5961
};
6062

6163
static std::wstring convert_thousands_sep(std::wstring const& in) {
62-
return LocaleHelpers::convert_thousands_sep_fr_FR(in);
64+
const wchar_t fr_sep = LocaleHelpers::mon_thousands_sep_or_default(FR_MON_THOU_SEP);
65+
return LocaleHelpers::convert_thousands_sep(in, fr_sep);
6366
}
6467
#endif // TEST_HAS_NO_WIDE_CHARACTERS
6568

Diff for: libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
// REQUIRES: locale.ru_RU.UTF-8
1313

14+
// ADDITIONAL_COMPILE_FLAGS: -DRU_MON_THOU_SEP=%{LOCALE_CONV_RU_RU_UTF_8_MON_THOUSANDS_SEP}
15+
1416
// XFAIL: glibc-old-ru_RU-decimal-point
1517

1618
// <locale>
@@ -52,7 +54,8 @@ class my_facetw
5254
};
5355

5456
static std::wstring convert_thousands_sep(std::wstring const& in) {
55-
return LocaleHelpers::convert_thousands_sep_ru_RU(in);
57+
const wchar_t ru_sep = LocaleHelpers::mon_thousands_sep_or_default(RU_MON_THOU_SEP);
58+
return LocaleHelpers::convert_thousands_sep(in, ru_sep);
5659
}
5760
#endif // TEST_HAS_NO_WIDE_CHARACTERS
5861

Diff for: libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp

+9-25
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,14 @@
99
// NetBSD does not support LC_MONETARY at the moment
1010
// XFAIL: netbsd
1111

12-
// XFAIL: LIBCXX-FREEBSD-FIXME
13-
1412
// REQUIRES: locale.en_US.UTF-8
1513
// REQUIRES: locale.fr_FR.UTF-8
1614
// REQUIRES: locale.ru_RU.UTF-8
1715
// REQUIRES: locale.zh_CN.UTF-8
1816

17+
// ADDITIONAL_COMPILE_FLAGS: -DFR_MON_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_MON_THOUSANDS_SEP}
18+
// ADDITIONAL_COMPILE_FLAGS: -DRU_MON_THOU_SEP=%{LOCALE_CONV_RU_RU_UTF_8_MON_THOUSANDS_SEP}
19+
1920
// <locale>
2021

2122
// class moneypunct_byname<charT, International>
@@ -27,6 +28,7 @@
2728
#include <cassert>
2829

2930
#include "test_macros.h"
31+
#include "locale_helpers.h"
3032
#include "platform_support.h" // locale name macros
3133

3234
class Fnf
@@ -110,17 +112,10 @@ int main(int, char**)
110112
Fnt f(LOCALE_fr_FR_UTF_8, 1);
111113
assert(f.thousands_sep() == ' ');
112114
}
113-
// The below tests work around GLIBC's use of U202F as mon_thousands_sep.
115+
114116
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
115-
#if defined(_CS_GNU_LIBC_VERSION)
116-
const wchar_t fr_sep = glibc_version_less_than("2.27") ? L' ' : L'\u202F';
117-
#elif defined(_WIN32)
118-
const wchar_t fr_sep = L'\u00A0';
119-
#elif defined(_AIX)
120-
const wchar_t fr_sep = L'\u202F';
121-
#else
122-
const wchar_t fr_sep = L' ';
123-
#endif
117+
const wchar_t fr_sep = LocaleHelpers::mon_thousands_sep_or_default(FR_MON_THOU_SEP);
118+
124119
{
125120
Fwf f(LOCALE_fr_FR_UTF_8, 1);
126121
assert(f.thousands_sep() == fr_sep);
@@ -140,19 +135,8 @@ int main(int, char**)
140135
assert(f.thousands_sep() == sep);
141136
}
142137
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
143-
// The below tests work around GLIBC's use of U00A0 as mon_thousands_sep
144-
// and U002E as mon_decimal_point.
145-
// TODO: Fix thousands_sep for 'char'.
146-
// related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=16006
147-
# if defined(_CS_GNU_LIBC_VERSION)
148-
// FIXME libc++ specifically works around \u00A0 by translating it into
149-
// a regular space.
150-
const wchar_t wsep = glibc_version_less_than("2.27") ? L'\u00A0' : L'\u202F';
151-
# elif defined(_WIN32) || defined(_AIX)
152-
const wchar_t wsep = L'\u00A0';
153-
# else
154-
const wchar_t wsep = L' ';
155-
# endif
138+
const wchar_t wsep = LocaleHelpers::mon_thousands_sep_or_default(RU_MON_THOU_SEP);
139+
156140
{
157141
Fwf f(LOCALE_ru_RU_UTF_8, 1);
158142
assert(f.thousands_sep() == wsep);

Diff for: libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp

+8-12
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
// REQUIRES: locale.en_US.UTF-8
1515
// REQUIRES: locale.fr_FR.UTF-8
1616

17+
// ADDITIONAL_COMPILE_FLAGS: -DFR_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_THOUSANDS_SEP}
18+
1719
// <locale>
1820

1921
// template <class charT> class numpunct_byname;
@@ -25,6 +27,7 @@
2527
#include <cassert>
2628

2729
#include "test_macros.h"
30+
#include "locale_helpers.h"
2831
#include "platform_support.h" // locale name macros
2932

3033
int main(int, char**)
@@ -74,18 +77,11 @@ int main(int, char**)
7477
}
7578
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
7679
{
77-
#if defined(_CS_GNU_LIBC_VERSION)
78-
const wchar_t wsep = glibc_version_less_than("2.27") ? L' ' : L'\u202f';
79-
# elif defined(_AIX)
80-
const wchar_t wsep = L'\u202F';
81-
# elif defined(_WIN32)
82-
const wchar_t wsep = L'\u00A0';
83-
# else
84-
const wchar_t wsep = L',';
85-
# endif
86-
typedef wchar_t C;
87-
const std::numpunct<C>& np = std::use_facet<std::numpunct<C> >(l);
88-
assert(np.thousands_sep() == wsep);
80+
const wchar_t wsep = LocaleHelpers::thousands_sep_or_default(FR_THOU_SEP);
81+
82+
typedef wchar_t C;
83+
const std::numpunct<C>& np = std::use_facet<std::numpunct<C> >(l);
84+
assert(np.thousands_sep() == wsep);
8985
}
9086
#endif // TEST_HAS_NO_WIDE_CHARACTERS
9187
}

Diff for: libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp

+9-15
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
// REQUIRES: locale.fr_FR.UTF-8
1717
// REQUIRES: locale.ja_JP.UTF-8
1818

19+
// ADDITIONAL_COMPILE_FLAGS: -DFR_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_THOUSANDS_SEP}
20+
// ADDITIONAL_COMPILE_FLAGS: -DFR_DEC_POINT=%{LOCALE_CONV_FR_FR_UTF_8_DECIMAL_POINT}
21+
1922
// <chrono>
2023

2124
// template<class Rep, class Period = ratio<1>> class duration;
@@ -33,6 +36,7 @@
3336
#include <sstream>
3437

3538
#include "make_string.h"
39+
#include "locale_helpers.h"
3640
#include "platform_support.h" // locale name macros
3741
#include "test_macros.h"
3842

@@ -88,21 +92,11 @@ static void test_values() {
8892
assert(stream_fr_FR_locale<CharT>(1'000.123456s) == SV("1 000,1235s"));
8993
#endif
9094
} else {
91-
#ifdef _WIN32
92-
assert(stream_fr_FR_locale<CharT>(-1'000'000s) == SV("-1\u00A0000\u00A0000s"));
93-
assert(stream_fr_FR_locale<CharT>(1'000'000s) == SV("1\u00A0000\u00A0000s"));
94-
assert(stream_fr_FR_locale<CharT>(-1'000.123456s) == SV("-1\u00A0000,1235s"));
95-
assert(stream_fr_FR_locale<CharT>(1'000.123456s) == SV("1\u00A0000,1235s"));
96-
#elif defined(__APPLE__)
97-
assert(stream_fr_FR_locale<CharT>(-1'000'000s) == SV("-1000000s"));
98-
assert(stream_fr_FR_locale<CharT>(1'000'000s) == SV("1000000s"));
99-
assert(stream_fr_FR_locale<CharT>(-1'000.123456s) == SV("-1000,1235s"));
100-
assert(stream_fr_FR_locale<CharT>(1'000.123456s) == SV("1000,1235s"));
101-
#else
102-
assert(stream_fr_FR_locale<CharT>(-1'000'000s) == SV("-1\u202f000\u202f000s"));
103-
assert(stream_fr_FR_locale<CharT>(1'000'000s) == SV("1\u202f000\u202f000s"));
104-
assert(stream_fr_FR_locale<CharT>(-1'000.123456s) == SV("-1\u202f000,1235s"));
105-
assert(stream_fr_FR_locale<CharT>(1'000.123456s) == SV("1\u202f000,1235s"));
95+
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
96+
assert(stream_fr_FR_locale<CharT>(-1'000'000s) == L"-1" FR_THOU_SEP "000" FR_THOU_SEP "000s");
97+
assert(stream_fr_FR_locale<CharT>(1'000'000s) == L"1" FR_THOU_SEP "000" FR_THOU_SEP "000s");
98+
assert(stream_fr_FR_locale<CharT>(-1'000.123456s) == L"-1" FR_THOU_SEP "000" FR_DEC_POINT "1235s");
99+
assert(stream_fr_FR_locale<CharT>(1'000.123456s) == L"1" FR_THOU_SEP "000" FR_DEC_POINT "1235s");
106100
#endif
107101
}
108102

Diff for: libcxx/test/support/locale_helpers.h

+6-31
Original file line numberDiff line numberDiff line change
@@ -41,37 +41,6 @@ std::wstring convert_thousands_sep(std::wstring const& in, wchar_t sep) {
4141
return out;
4242
}
4343

44-
// GLIBC 2.27 and newer use U+202F NARROW NO-BREAK SPACE as a thousands separator.
45-
// This function converts the spaces in string inputs to U+202F if need
46-
// be. FreeBSD's locale data also uses U+202F, since 2018.
47-
// Windows uses U+00A0 NO-BREAK SPACE.
48-
std::wstring convert_thousands_sep_fr_FR(std::wstring const& in) {
49-
#if defined(_CS_GNU_LIBC_VERSION)
50-
if (glibc_version_less_than("2.27"))
51-
return in;
52-
else
53-
return convert_thousands_sep(in, L'\u202F');
54-
#elif defined(__FreeBSD__)
55-
return convert_thousands_sep(in, L'\u202F');
56-
#elif defined(_WIN32)
57-
return convert_thousands_sep(in, L'\u00A0');
58-
#else
59-
return in;
60-
#endif
61-
}
62-
63-
// GLIBC 2.27 uses U+202F NARROW NO-BREAK SPACE as a thousands separator.
64-
// FreeBSD, AIX and Windows use U+00A0 NO-BREAK SPACE.
65-
std::wstring convert_thousands_sep_ru_RU(std::wstring const& in) {
66-
#if defined(TEST_HAS_GLIBC)
67-
return convert_thousands_sep(in, L'\u202F');
68-
# elif defined(__FreeBSD__) || defined(_WIN32) || defined(_AIX)
69-
return convert_thousands_sep(in, L'\u00A0');
70-
# else
71-
return in;
72-
# endif
73-
}
74-
7544
std::wstring negate_en_US(std::wstring s) {
7645
#if defined(_WIN32)
7746
return L"(" + s + L")";
@@ -80,6 +49,12 @@ std::wstring negate_en_US(std::wstring s) {
8049
#endif
8150
}
8251

52+
wchar_t thousands_sep_or_default(std::wstring s) { return !s.empty() ? s[0] : L','; }
53+
54+
wchar_t mon_thousands_sep_or_default(std::wstring s) { return thousands_sep_or_default(s); }
55+
56+
wchar_t decimal_point_or_default(std::wstring s) { return !s.empty() ? s[0] : L'.'; }
57+
8358
#endif // TEST_HAS_NO_WIDE_CHARACTERS
8459

8560
std::string negate_en_US(std::string s) {

Diff for: libcxx/utils/libcxx/test/features.py

+90-1
Original file line numberDiff line numberDiff line change
@@ -425,17 +425,106 @@ def _mingwSupportsModules(cfg):
425425
"fr_CA.ISO8859-1": ["fr_CA.ISO8859-1", "French_Canada.1252"],
426426
"cs_CZ.ISO8859-2": ["cs_CZ.ISO8859-2", "Czech_Czech Republic.1250"],
427427
}
428+
provide_locale_conversions = {
429+
"fr_FR.UTF-8": ["decimal_point", "mon_thousands_sep", "thousands_sep"],
430+
"ru_RU.UTF-8": ["mon_thousands_sep"],
431+
}
428432
for locale, alts in locales.items():
429433
# Note: Using alts directly in the lambda body here will bind it to the value at the
430434
# end of the loop. Assigning it to a default argument works around this issue.
431435
DEFAULT_FEATURES.append(
432436
Feature(
433437
name="locale.{}".format(locale),
434438
when=lambda cfg, alts=alts: hasAnyLocale(cfg, alts),
435-
)
439+
actions=lambda cfg, locale=locale, alts=alts: _getLocaleFlagsAction(
440+
cfg, locale, alts, provide_locale_conversions[locale]
441+
)
442+
if locale in provide_locale_conversions
443+
and "_LIBCPP_HAS_NO_WIDE_CHARACTERS" not in compilerMacros(cfg)
444+
else [],
445+
),
436446
)
437447

438448

449+
# Provide environment locale conversions through substitutions to avoid platform specific
450+
# maintenance.
451+
def _getLocaleFlagsAction(cfg, locale, alts, members):
452+
alts_list = ",".join([f'"{l}"' for l in alts])
453+
get_member_list = ",".join([f"lc->{m}" for m in members])
454+
455+
localeconv_info = programOutput(
456+
cfg,
457+
r"""
458+
#if defined(_WIN32) && !defined(_CRT_SECURE_NO_WARNINGS)
459+
#define _CRT_SECURE_NO_WARNINGS
460+
#endif
461+
#include <stdio.h>
462+
#include <locale.h>
463+
#include <stdlib.h>
464+
#include <wchar.h>
465+
466+
// Print each requested locale conversion member on separate lines.
467+
int main() {
468+
const char* locales[] = { %s };
469+
for (int loc_i = 0; loc_i < %d; ++loc_i) {
470+
if (!setlocale(LC_ALL, locales[loc_i])) {
471+
continue; // Choose first locale name that is recognized.
472+
}
473+
474+
lconv* lc = localeconv();
475+
const char* members[] = { %s };
476+
for (size_t m_i = 0; m_i < %d; ++m_i) {
477+
if (!members[m_i]) {
478+
printf("\n"); // member value is an empty string
479+
continue;
480+
}
481+
482+
size_t len = mbstowcs(nullptr, members[m_i], 0);
483+
if (len == static_cast<size_t>(-1)) {
484+
fprintf(stderr, "mbstowcs failed unexpectedly\n");
485+
return 1;
486+
}
487+
// Include room for null terminator. Use malloc as these features
488+
// are also used by lit configs that don't use -lc++ (libunwind tests).
489+
wchar_t* dst = (wchar_t*)malloc((len + 1) * sizeof(wchar_t));
490+
size_t ret = mbstowcs(dst, members[m_i], len + 1);
491+
if (ret == static_cast<size_t>(-1)) {
492+
fprintf(stderr, "mbstowcs failed unexpectedly\n");
493+
free(dst);
494+
return 1;
495+
}
496+
497+
for (size_t i = 0; i < len; ++i) {
498+
if (dst[i] > 0x7F) {
499+
printf("\\u%%04x", dst[i]);
500+
} else {
501+
// c++03 does not allow basic ascii-range characters in UCNs
502+
printf("%%c", (char)dst[i]);
503+
}
504+
}
505+
printf("\n");
506+
free(dst);
507+
}
508+
return 0;
509+
}
510+
511+
return 1;
512+
}
513+
"""
514+
% (alts_list, len(alts), get_member_list, len(members)),
515+
)
516+
valid_define_name = re.sub(r"[.-]", "_", locale).upper()
517+
return [
518+
# Provide locale conversion through a substitution.
519+
# Example: %{LOCALE_CONV_FR_FR_UTF_8_THOUSANDS_SEP} = L"\u202f"
520+
AddSubstitution(
521+
f"%{{LOCALE_CONV_{valid_define_name}_{member.upper()}}}",
522+
lambda cfg, value=value: f"'L\"{value}\"'",
523+
)
524+
for member, value in zip(members, localeconv_info.split("\n"))
525+
]
526+
527+
439528
# Add features representing the target platform name: darwin, linux, windows, etc...
440529
DEFAULT_FEATURES += [
441530
Feature(name="darwin", when=lambda cfg: "__APPLE__" in compilerMacros(cfg)),

0 commit comments

Comments
 (0)