1- #include < cstring>
2- #include < exception>
3-
4- #ifdef CAGE_DEBUG
5- #include " utf8/checked.h"
6- #else
7- #include " utf8/unchecked.h"
8- namespace utf8
9- {
10- using namespace unchecked ;
11- }
12- #endif
1+ #include < cstring> // std::strlen
2+ #include < string_view>
3+
4+ #include " uni_algo/conv.h"
135
146#include < cage-core/pointerRangeHolder.h>
157#include < cage-core/utf.h>
168
17- #define TRY_BEGIN \
18- try \
9+ namespace cage
10+ {
11+ namespace
1912 {
20- #define TRY_END \
21- } \
22- catch (const std::exception &e) \
23- { \
24- CAGE_LOG_THROW (e.what ()); \
25- CAGE_THROW_ERROR (InvalidUtfString, " string with invalid utf encoding" ); \
13+ std::string_view view (PointerRange<const char > buffer)
14+ {
15+ return std::string_view (buffer.begin (), buffer.end ());
16+ }
17+
18+ std::u32string_view view (PointerRange<const uint32> buffer)
19+ {
20+ return std::u32string_view ((const char32_t *)buffer.begin (), (const char32_t *)buffer.end ());
21+ }
2622 }
2723
28- namespace cage
29- {
3024 bool utfValid (PointerRange<const char > buffer)
3125 {
32- TRY_BEGIN
33- return utf8::is_valid (buffer.begin (), buffer.end ());
34- TRY_END
26+ return una::is_valid_utf8 (view (buffer));
3527 }
3628
3729 bool utfValid (const String &str)
@@ -46,9 +38,7 @@ namespace cage
4638
4739 uint32 utf32Length (PointerRange<const char > buffer)
4840 {
49- TRY_BEGIN
50- return numeric_cast<uint32>(utf8::distance (buffer.begin (), buffer.end ()));
51- TRY_END
41+ return una::utf8to32u (view (buffer)).length ();
5242 }
5343
5444 uint32 utf32Length (const String &str)
@@ -63,30 +53,13 @@ namespace cage
6353
6454 uint32 utf8Length (PointerRange<const uint32> buffer)
6555 {
66- TRY_BEGIN
67- const uint32 *b = buffer.begin ();
68- const uint32 *e = buffer.end ();
69- char tmp[7 ];
70- uint32 result = 0 ;
71- while (b != e)
72- {
73- char *d = utf8::utf32to8 (b, b + 1 , tmp);
74- result += numeric_cast<uint32>(d - tmp);
75- b++;
76- }
77- return result;
78- TRY_END
56+ return una::utf32to8u (view (buffer)).length ();
7957 }
8058
81- Holder<PointerRange<uint32>> utf8to32 (PointerRange<const char > inBuffer )
59+ Holder<PointerRange<uint32>> utf8to32 (PointerRange<const char > buffer )
8260 {
83- PointerRangeHolder<uint32> result;
84- result.resize (utf32Length (inBuffer));
85- TRY_BEGIN
86- uint32 *end = utf8::utf8to32 (inBuffer.begin (), inBuffer.end (), result.data ());
87- CAGE_ASSERT (end == result.data () + result.size ());
88- return result;
89- TRY_END
61+ const auto t = una::utf8to32u (view (buffer));
62+ return PointerRangeHolder<uint32>(t.begin (), t.end ());
9063 }
9164
9265 Holder<PointerRange<uint32>> utf8to32 (const String &str)
@@ -101,11 +74,10 @@ namespace cage
10174
10275 void utf8to32 (PointerRange<uint32> &outBuffer, PointerRange<const char > inBuffer)
10376 {
104- CAGE_ASSERT (outBuffer.size () >= utf32Length (inBuffer));
105- TRY_BEGIN
106- uint32 *end = utf8::utf8to32 (inBuffer.begin (), inBuffer.end (), outBuffer.begin ());
107- outBuffer = { outBuffer.begin (), end };
108- TRY_END
77+ const auto t = una::utf8to32u (view (inBuffer));
78+ CAGE_ASSERT (outBuffer.size () >= t.size ());
79+ detail::memcpy (outBuffer.data (), t.data (), t.size () * sizeof (uint32));
80+ outBuffer = PointerRange<uint32>(outBuffer.begin (), outBuffer.begin () + t.size ());
10981 }
11082
11183 void utf8to32 (PointerRange<uint32> &outBuffer, const String &str)
@@ -120,31 +92,21 @@ namespace cage
12092
12193 Holder<PointerRange<char >> utf32to8 (PointerRange<const uint32> buffer)
12294 {
123- PointerRangeHolder<char > result;
124- result.resize (utf8Length (buffer));
125- TRY_BEGIN
126- char *end = utf8::utf32to8 (buffer.begin (), buffer.end (), result.data ());
127- CAGE_ASSERT (end == result.data () + result.size ());
128- return result;
129- TRY_END
95+ const auto t = una::utf32to8u (view (buffer));
96+ return PointerRangeHolder<char >(t.begin (), t.end ());
13097 }
13198
13299 void utf32to8 (PointerRange<char > &outBuffer, PointerRange<const uint32> inBuffer)
133100 {
134- CAGE_ASSERT (outBuffer.size () >= utf8Length (inBuffer));
135- TRY_BEGIN
136- char *end = utf8::utf32to8 (inBuffer.begin (), inBuffer.end (), outBuffer.begin ());
137- outBuffer = { outBuffer.begin (), end };
138- TRY_END
101+ const auto t = una::utf32to8u (view (inBuffer));
102+ CAGE_ASSERT (outBuffer.size () >= t.size ());
103+ detail::memcpy (outBuffer.data (), t.data (), t.size () * sizeof (char ));
104+ outBuffer = PointerRange<char >(outBuffer.begin (), outBuffer.begin () + t.size ());
139105 }
140106
141- String utf32to8string (PointerRange<const uint32> inBuffer )
107+ String utf32to8string (PointerRange<const uint32> buffer )
142108 {
143- if (utf8Length (inBuffer) > String::MaxLength)
144- CAGE_THROW_ERROR (Exception, " utf string too long" );
145- char buff[String::MaxLength];
146- PointerRange<char > pr = { buff, buff + String::MaxLength - 1 };
147- utf32to8 (pr, inBuffer);
148- return String (pr);
109+ const auto t = una::utf32to8u (view (buffer));
110+ return String (PointerRange<const char >((const char *)t.data (), (const char *)t.data () + t.size ()));
149111 }
150112}
0 commit comments