|
| 1 | +#include <algorithm> |
| 2 | + |
| 3 | +#include "uni_algo/case.h" |
| 4 | +#include "uni_algo/conv.h" |
| 5 | +#include "uni_algo/norm.h" |
| 6 | + |
| 7 | +#include <cage-core/pointerRangeHolder.h> |
| 8 | +#include <cage-core/unicode.h> |
| 9 | + |
| 10 | +namespace cage |
| 11 | +{ |
| 12 | + namespace |
| 13 | + { |
| 14 | + std::string_view view(PointerRange<const char> buffer) |
| 15 | + { |
| 16 | + return std::string_view(buffer.begin(), buffer.end()); |
| 17 | + } |
| 18 | + } |
| 19 | + |
| 20 | + Holder<PointerRange<char>> unicodeTransform(PointerRange<const char> buffer, const UnicodeTransformConfig &cfg) |
| 21 | + { |
| 22 | + const std::string_view v = view(buffer); |
| 23 | + const una::locale l = una::locale(cfg.locale ? cfg.locale : ""); |
| 24 | + switch (cfg.transform) |
| 25 | + { |
| 26 | + case UnicodeTransformEnum::None: |
| 27 | + return PointerRangeHolder<char>(buffer); |
| 28 | + case UnicodeTransformEnum::Validate: |
| 29 | + { |
| 30 | + const auto t = una::utf32to8u(una::utf8to32u(v)); |
| 31 | + return PointerRangeHolder<char>(t.begin(), t.end()); |
| 32 | + } |
| 33 | + case UnicodeTransformEnum::CanonicalComposition: |
| 34 | + { |
| 35 | + const auto t = una::norm::to_nfc_utf8(v); |
| 36 | + return PointerRangeHolder<char>(t.begin(), t.end()); |
| 37 | + } |
| 38 | + case UnicodeTransformEnum::CanonicalDecomposition: |
| 39 | + { |
| 40 | + const auto t = una::norm::to_nfd_utf8(v); |
| 41 | + return PointerRangeHolder<char>(t.begin(), t.end()); |
| 42 | + } |
| 43 | + case UnicodeTransformEnum::CompatibilityComposition: |
| 44 | + { |
| 45 | + const auto t = una::norm::to_nfkc_utf8(v); |
| 46 | + return PointerRangeHolder<char>(t.begin(), t.end()); |
| 47 | + } |
| 48 | + case UnicodeTransformEnum::CompatibilityDecomposition: |
| 49 | + { |
| 50 | + const auto t = una::norm::to_nfkd_utf8(v); |
| 51 | + return PointerRangeHolder<char>(t.begin(), t.end()); |
| 52 | + } |
| 53 | + case UnicodeTransformEnum::Lowercase: |
| 54 | + { |
| 55 | + const auto t = una::cases::to_lowercase_utf8(v, l); |
| 56 | + return PointerRangeHolder<char>(t.begin(), t.end()); |
| 57 | + } |
| 58 | + case UnicodeTransformEnum::Uppercase: |
| 59 | + { |
| 60 | + const auto t = una::cases::to_uppercase_utf8(v, l); |
| 61 | + return PointerRangeHolder<char>(t.begin(), t.end()); |
| 62 | + } |
| 63 | + case UnicodeTransformEnum::Titlecase: |
| 64 | + { |
| 65 | + const auto t = una::cases::to_titlecase_utf8(v, l); |
| 66 | + return PointerRangeHolder<char>(t.begin(), t.end()); |
| 67 | + } |
| 68 | + case UnicodeTransformEnum::Casefold: |
| 69 | + { |
| 70 | + const auto t = una::cases::to_casefold_utf8(v); |
| 71 | + return PointerRangeHolder<char>(t.begin(), t.end()); |
| 72 | + } |
| 73 | + case UnicodeTransformEnum::Unaccent: |
| 74 | + { |
| 75 | + const auto t = una::norm::to_unaccent_utf8(v); |
| 76 | + return PointerRangeHolder<char>(t.begin(), t.end()); |
| 77 | + } |
| 78 | + case UnicodeTransformEnum::FuzzyMatching: |
| 79 | + { |
| 80 | + auto t = una::norm::to_unaccent_utf8(una::cases::to_casefold_utf8(v)); |
| 81 | + t.erase(std::unique(t.begin(), t.end()), t.end()); |
| 82 | + return PointerRangeHolder<char>(t.begin(), t.end()); |
| 83 | + } |
| 84 | + default: |
| 85 | + CAGE_THROW_CRITICAL(Exception, "invalid UnicodeTransformEnum value"); |
| 86 | + } |
| 87 | + } |
| 88 | + |
| 89 | + String unicodeTransformString(PointerRange<const char> buffer, const UnicodeTransformConfig &cfg) |
| 90 | + { |
| 91 | + const auto t = unicodeTransform(buffer, cfg); |
| 92 | + return String(PointerRange<const char>((const char *)t.data(), (const char *)t.data() + t.size())); |
| 93 | + } |
| 94 | +} |
0 commit comments