Skip to content

release/20.x: [libcxx] Provide locale conversions to tests through lit substitution (#105651) #136449

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: release/20.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

// REQUIRES: locale.fr_FR.UTF-8

// ADDITIONAL_COMPILE_FLAGS: -DFR_MON_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_MON_THOUSANDS_SEP}

// <locale>

// class money_get<charT, InputIterator>
Expand Down Expand Up @@ -59,7 +61,8 @@ class my_facetw
};

static std::wstring convert_thousands_sep(std::wstring const& in) {
return LocaleHelpers::convert_thousands_sep_fr_FR(in);
const wchar_t fr_sep = LocaleHelpers::mon_thousands_sep_or_default(FR_MON_THOU_SEP);
return LocaleHelpers::convert_thousands_sep(in, fr_sep);
}
#endif // TEST_HAS_NO_WIDE_CHARACTERS

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

// REQUIRES: locale.ru_RU.UTF-8

// ADDITIONAL_COMPILE_FLAGS: -DRU_MON_THOU_SEP=%{LOCALE_CONV_RU_RU_UTF_8_MON_THOUSANDS_SEP}

// XFAIL: glibc-old-ru_RU-decimal-point

// <locale>
Expand Down Expand Up @@ -52,7 +54,8 @@ class my_facetw
};

static std::wstring convert_thousands_sep(std::wstring const& in) {
return LocaleHelpers::convert_thousands_sep_ru_RU(in);
const wchar_t ru_sep = LocaleHelpers::mon_thousands_sep_or_default(RU_MON_THOU_SEP);
return LocaleHelpers::convert_thousands_sep(in, ru_sep);
}
#endif // TEST_HAS_NO_WIDE_CHARACTERS

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

// REQUIRES: locale.fr_FR.UTF-8

// ADDITIONAL_COMPILE_FLAGS: -DFR_MON_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_MON_THOUSANDS_SEP}

// <locale>

// class money_put<charT, OutputIterator>
Expand Down Expand Up @@ -59,7 +61,8 @@ class my_facetw
};

static std::wstring convert_thousands_sep(std::wstring const& in) {
return LocaleHelpers::convert_thousands_sep_fr_FR(in);
const wchar_t fr_sep = LocaleHelpers::mon_thousands_sep_or_default(FR_MON_THOU_SEP);
return LocaleHelpers::convert_thousands_sep(in, fr_sep);
}
#endif // TEST_HAS_NO_WIDE_CHARACTERS

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

// REQUIRES: locale.ru_RU.UTF-8

// ADDITIONAL_COMPILE_FLAGS: -DRU_MON_THOU_SEP=%{LOCALE_CONV_RU_RU_UTF_8_MON_THOUSANDS_SEP}

// XFAIL: glibc-old-ru_RU-decimal-point

// <locale>
Expand Down Expand Up @@ -52,7 +54,8 @@ class my_facetw
};

static std::wstring convert_thousands_sep(std::wstring const& in) {
return LocaleHelpers::convert_thousands_sep_ru_RU(in);
const wchar_t ru_sep = LocaleHelpers::mon_thousands_sep_or_default(RU_MON_THOU_SEP);
return LocaleHelpers::convert_thousands_sep(in, ru_sep);
}
#endif // TEST_HAS_NO_WIDE_CHARACTERS

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@
// NetBSD does not support LC_MONETARY at the moment
// XFAIL: netbsd

// XFAIL: LIBCXX-FREEBSD-FIXME

// REQUIRES: locale.en_US.UTF-8
// REQUIRES: locale.fr_FR.UTF-8
// REQUIRES: locale.ru_RU.UTF-8
// REQUIRES: locale.zh_CN.UTF-8

// ADDITIONAL_COMPILE_FLAGS: -DFR_MON_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_MON_THOUSANDS_SEP}
// ADDITIONAL_COMPILE_FLAGS: -DRU_MON_THOU_SEP=%{LOCALE_CONV_RU_RU_UTF_8_MON_THOUSANDS_SEP}

// <locale>

// class moneypunct_byname<charT, International>
Expand All @@ -27,6 +28,7 @@
#include <cassert>

#include "test_macros.h"
#include "locale_helpers.h"
#include "platform_support.h" // locale name macros

class Fnf
Expand Down Expand Up @@ -110,17 +112,10 @@ int main(int, char**)
Fnt f(LOCALE_fr_FR_UTF_8, 1);
assert(f.thousands_sep() == ' ');
}
// The below tests work around GLIBC's use of U202F as mon_thousands_sep.

#ifndef TEST_HAS_NO_WIDE_CHARACTERS
#if defined(_CS_GNU_LIBC_VERSION)
const wchar_t fr_sep = glibc_version_less_than("2.27") ? L' ' : L'\u202F';
#elif defined(_WIN32)
const wchar_t fr_sep = L'\u00A0';
#elif defined(_AIX)
const wchar_t fr_sep = L'\u202F';
#else
const wchar_t fr_sep = L' ';
#endif
const wchar_t fr_sep = LocaleHelpers::mon_thousands_sep_or_default(FR_MON_THOU_SEP);

{
Fwf f(LOCALE_fr_FR_UTF_8, 1);
assert(f.thousands_sep() == fr_sep);
Expand All @@ -140,19 +135,8 @@ int main(int, char**)
assert(f.thousands_sep() == sep);
}
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
// The below tests work around GLIBC's use of U00A0 as mon_thousands_sep
// and U002E as mon_decimal_point.
// TODO: Fix thousands_sep for 'char'.
// related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=16006
# if defined(_CS_GNU_LIBC_VERSION)
// FIXME libc++ specifically works around \u00A0 by translating it into
// a regular space.
const wchar_t wsep = glibc_version_less_than("2.27") ? L'\u00A0' : L'\u202F';
# elif defined(_WIN32) || defined(_AIX)
const wchar_t wsep = L'\u00A0';
# else
const wchar_t wsep = L' ';
# endif
const wchar_t wsep = LocaleHelpers::mon_thousands_sep_or_default(RU_MON_THOU_SEP);

{
Fwf f(LOCALE_ru_RU_UTF_8, 1);
assert(f.thousands_sep() == wsep);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
// REQUIRES: locale.en_US.UTF-8
// REQUIRES: locale.fr_FR.UTF-8

// ADDITIONAL_COMPILE_FLAGS: -DFR_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_THOUSANDS_SEP}

// <locale>

// template <class charT> class numpunct_byname;
Expand All @@ -25,6 +27,7 @@
#include <cassert>

#include "test_macros.h"
#include "locale_helpers.h"
#include "platform_support.h" // locale name macros

int main(int, char**)
Expand Down Expand Up @@ -74,18 +77,11 @@ int main(int, char**)
}
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
{
#if defined(_CS_GNU_LIBC_VERSION)
const wchar_t wsep = glibc_version_less_than("2.27") ? L' ' : L'\u202f';
# elif defined(_AIX)
const wchar_t wsep = L'\u202F';
# elif defined(_WIN32)
const wchar_t wsep = L'\u00A0';
# else
const wchar_t wsep = L',';
# endif
typedef wchar_t C;
const std::numpunct<C>& np = std::use_facet<std::numpunct<C> >(l);
assert(np.thousands_sep() == wsep);
const wchar_t wsep = LocaleHelpers::thousands_sep_or_default(FR_THOU_SEP);

typedef wchar_t C;
const std::numpunct<C>& np = std::use_facet<std::numpunct<C> >(l);
assert(np.thousands_sep() == wsep);
}
#endif // TEST_HAS_NO_WIDE_CHARACTERS
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
// REQUIRES: locale.fr_FR.UTF-8
// REQUIRES: locale.ja_JP.UTF-8

// ADDITIONAL_COMPILE_FLAGS: -DFR_THOU_SEP=%{LOCALE_CONV_FR_FR_UTF_8_THOUSANDS_SEP}
// ADDITIONAL_COMPILE_FLAGS: -DFR_DEC_POINT=%{LOCALE_CONV_FR_FR_UTF_8_DECIMAL_POINT}

// <chrono>

// template<class Rep, class Period = ratio<1>> class duration;
Expand All @@ -33,6 +36,7 @@
#include <sstream>

#include "make_string.h"
#include "locale_helpers.h"
#include "platform_support.h" // locale name macros
#include "test_macros.h"

Expand Down Expand Up @@ -88,21 +92,11 @@ static void test_values() {
assert(stream_fr_FR_locale<CharT>(1'000.123456s) == SV("1 000,1235s"));
#endif
} else {
#ifdef _WIN32
assert(stream_fr_FR_locale<CharT>(-1'000'000s) == SV("-1\u00A0000\u00A0000s"));
assert(stream_fr_FR_locale<CharT>(1'000'000s) == SV("1\u00A0000\u00A0000s"));
assert(stream_fr_FR_locale<CharT>(-1'000.123456s) == SV("-1\u00A0000,1235s"));
assert(stream_fr_FR_locale<CharT>(1'000.123456s) == SV("1\u00A0000,1235s"));
#elif defined(__APPLE__)
assert(stream_fr_FR_locale<CharT>(-1'000'000s) == SV("-1000000s"));
assert(stream_fr_FR_locale<CharT>(1'000'000s) == SV("1000000s"));
assert(stream_fr_FR_locale<CharT>(-1'000.123456s) == SV("-1000,1235s"));
assert(stream_fr_FR_locale<CharT>(1'000.123456s) == SV("1000,1235s"));
#else
assert(stream_fr_FR_locale<CharT>(-1'000'000s) == SV("-1\u202f000\u202f000s"));
assert(stream_fr_FR_locale<CharT>(1'000'000s) == SV("1\u202f000\u202f000s"));
assert(stream_fr_FR_locale<CharT>(-1'000.123456s) == SV("-1\u202f000,1235s"));
assert(stream_fr_FR_locale<CharT>(1'000.123456s) == SV("1\u202f000,1235s"));
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
assert(stream_fr_FR_locale<CharT>(-1'000'000s) == L"-1" FR_THOU_SEP "000" FR_THOU_SEP "000s");
assert(stream_fr_FR_locale<CharT>(1'000'000s) == L"1" FR_THOU_SEP "000" FR_THOU_SEP "000s");
assert(stream_fr_FR_locale<CharT>(-1'000.123456s) == L"-1" FR_THOU_SEP "000" FR_DEC_POINT "1235s");
assert(stream_fr_FR_locale<CharT>(1'000.123456s) == L"1" FR_THOU_SEP "000" FR_DEC_POINT "1235s");
#endif
}

Expand Down
37 changes: 6 additions & 31 deletions libcxx/test/support/locale_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,37 +41,6 @@ std::wstring convert_thousands_sep(std::wstring const& in, wchar_t sep) {
return out;
}

// GLIBC 2.27 and newer use U+202F NARROW NO-BREAK SPACE as a thousands separator.
// This function converts the spaces in string inputs to U+202F if need
// be. FreeBSD's locale data also uses U+202F, since 2018.
// Windows uses U+00A0 NO-BREAK SPACE.
std::wstring convert_thousands_sep_fr_FR(std::wstring const& in) {
#if defined(_CS_GNU_LIBC_VERSION)
if (glibc_version_less_than("2.27"))
return in;
else
return convert_thousands_sep(in, L'\u202F');
#elif defined(__FreeBSD__)
return convert_thousands_sep(in, L'\u202F');
#elif defined(_WIN32)
return convert_thousands_sep(in, L'\u00A0');
#else
return in;
#endif
}

// GLIBC 2.27 uses U+202F NARROW NO-BREAK SPACE as a thousands separator.
// FreeBSD, AIX and Windows use U+00A0 NO-BREAK SPACE.
std::wstring convert_thousands_sep_ru_RU(std::wstring const& in) {
#if defined(TEST_HAS_GLIBC)
return convert_thousands_sep(in, L'\u202F');
# elif defined(__FreeBSD__) || defined(_WIN32) || defined(_AIX)
return convert_thousands_sep(in, L'\u00A0');
# else
return in;
# endif
}

std::wstring negate_en_US(std::wstring s) {
#if defined(_WIN32)
return L"(" + s + L")";
Expand All @@ -80,6 +49,12 @@ std::wstring negate_en_US(std::wstring s) {
#endif
}

wchar_t thousands_sep_or_default(std::wstring s) { return !s.empty() ? s[0] : L','; }

wchar_t mon_thousands_sep_or_default(std::wstring s) { return thousands_sep_or_default(s); }

wchar_t decimal_point_or_default(std::wstring s) { return !s.empty() ? s[0] : L'.'; }

#endif // TEST_HAS_NO_WIDE_CHARACTERS

std::string negate_en_US(std::string s) {
Expand Down
92 changes: 91 additions & 1 deletion libcxx/utils/libcxx/test/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,17 +425,107 @@ def _mingwSupportsModules(cfg):
"fr_CA.ISO8859-1": ["fr_CA.ISO8859-1", "French_Canada.1252"],
"cs_CZ.ISO8859-2": ["cs_CZ.ISO8859-2", "Czech_Czech Republic.1250"],
}
provide_locale_conversions = {
"fr_FR.UTF-8": ["decimal_point", "mon_thousands_sep", "thousands_sep"],
"ru_RU.UTF-8": ["mon_thousands_sep"],
}
for locale, alts in locales.items():
# Note: Using alts directly in the lambda body here will bind it to the value at the
# end of the loop. Assigning it to a default argument works around this issue.
DEFAULT_FEATURES.append(
Feature(
name="locale.{}".format(locale),
when=lambda cfg, alts=alts: hasAnyLocale(cfg, alts),
)
actions=lambda cfg, locale=locale, alts=alts: _getLocaleFlagsAction(
cfg, locale, alts, provide_locale_conversions[locale]
)
if locale in provide_locale_conversions
and ("_LIBCPP_HAS_WIDE_CHARACTERS" not in compilerMacros(cfg) or
compilerMacros(cfg)["_LIBCPP_HAS_WIDE_CHARACTERS"] == "1")
else [],
),
)


# Provide environment locale conversions through substitutions to avoid platform specific
# maintenance.
def _getLocaleFlagsAction(cfg, locale, alts, members):
alts_list = ",".join([f'"{l}"' for l in alts])
get_member_list = ",".join([f"lc->{m}" for m in members])

localeconv_info = programOutput(
cfg,
r"""
#if defined(_WIN32) && !defined(_CRT_SECURE_NO_WARNINGS)
#define _CRT_SECURE_NO_WARNINGS
#endif
#include <stdio.h>
#include <locale.h>
#include <stdlib.h>
#include <wchar.h>

// Print each requested locale conversion member on separate lines.
int main() {
const char* locales[] = { %s };
for (int loc_i = 0; loc_i < %d; ++loc_i) {
if (!setlocale(LC_ALL, locales[loc_i])) {
continue; // Choose first locale name that is recognized.
}

lconv* lc = localeconv();
const char* members[] = { %s };
for (size_t m_i = 0; m_i < %d; ++m_i) {
if (!members[m_i]) {
printf("\n"); // member value is an empty string
continue;
}

size_t len = mbstowcs(nullptr, members[m_i], 0);
if (len == static_cast<size_t>(-1)) {
fprintf(stderr, "mbstowcs failed unexpectedly\n");
return 1;
}
// Include room for null terminator. Use malloc as these features
// are also used by lit configs that don't use -lc++ (libunwind tests).
wchar_t* dst = (wchar_t*)malloc((len + 1) * sizeof(wchar_t));
size_t ret = mbstowcs(dst, members[m_i], len + 1);
if (ret == static_cast<size_t>(-1)) {
fprintf(stderr, "mbstowcs failed unexpectedly\n");
free(dst);
return 1;
}

for (size_t i = 0; i < len; ++i) {
if (dst[i] > 0x7F) {
printf("\\u%%04x", dst[i]);
} else {
// c++03 does not allow basic ascii-range characters in UCNs
printf("%%c", (char)dst[i]);
}
}
printf("\n");
free(dst);
}
return 0;
}

return 1;
}
"""
% (alts_list, len(alts), get_member_list, len(members)),
)
valid_define_name = re.sub(r"[.-]", "_", locale).upper()
return [
# Provide locale conversion through a substitution.
# Example: %{LOCALE_CONV_FR_FR_UTF_8_THOUSANDS_SEP} = L"\u202f"
AddSubstitution(
f"%{{LOCALE_CONV_{valid_define_name}_{member.upper()}}}",
lambda cfg, value=value: f"'L\"{value}\"'",
)
for member, value in zip(members, localeconv_info.split("\n"))
]


# Add features representing the target platform name: darwin, linux, windows, etc...
DEFAULT_FEATURES += [
Feature(name="darwin", when=lambda cfg: "__APPLE__" in compilerMacros(cfg)),
Expand Down
Loading