Open
Description
When the regex_constants::collate
flag is set, character ranges fail to match characters according to the locale's collation order.
Test case
#include <iostream>
#include <locale>
#include <regex>
using namespace std;
int main() {
const locale de("de_DE");
regex_traits<wchar_t> traits;
traits.imbue(de);
wregex re(L"[a-z]", regex_constants::collate);
re.imbue(de);
const wchar_t eszett = L'ß';
const wchar_t s = L's';
const wchar_t t = L't';
const wchar_t ss[] = {L's', L's'};
const wchar_t minus = L'-';
const auto stransform = traits.transform(&s, &s + 1);
const auto ttransform = traits.transform(&t, &t + 1);
const auto eszetttransform = traits.transform(&eszett, &eszett + 1);
const auto sstransform = traits.transform(begin(ss), end(ss));
cout << regex_match(L"ß", re) << '\n';
cout << (stransform <= eszetttransform
&& eszetttransform <= ttransform)
<< ' '
<< (sstransform == eszetttransform)
<< '\n';
return 0;
}
Expected behavior
The range should match ß
, since ß
collates like ss
and should be sorted between s
and t
(and thus between a
and z
as well). Hence, the output of this program should be:
1
1 1
Actual behavior
The range does not match ß
, so the actual output is:
0
1 1
Note that the output of the second line shows that the problem does not lie in the implementation of regex_traits<wchar_t>::transform
, as it returns sort keys that are equal for ß
and ss
and place ß
between s
and t
.