diff --git a/cmake/presets/common.json b/cmake/presets/common.json index e2beb5e6cc..64c7741fde 100644 --- a/cmake/presets/common.json +++ b/cmake/presets/common.json @@ -27,7 +27,16 @@ "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug", - "CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}" + "CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}", + "LIBUNICODE_TABLEGEN_FASTBUILD": "ON" + } + }, + { + "name": "gcc-like-debug", + "hidden": true, + "inherits": ["debug"], + "cacheVariables": { + "CMAKE_CXX_FLAGS_DEBUG": "-O0 -ggdb3 -fno-inline-small-functions -ginline-points -fno-omit-frame-pointer" } } ] diff --git a/cmake/presets/os-linux.json b/cmake/presets/os-linux.json index 31b1922fe5..90ed49f1b6 100644 --- a/cmake/presets/os-linux.json +++ b/cmake/presets/os-linux.json @@ -45,7 +45,7 @@ { "name": "linux-debug", "displayName": "Linux 64-bit", - "inherits": ["linux-common", "debug"] + "inherits": ["linux-common", "gcc-like-debug"] }, { "name": "linux-clang-release", @@ -55,7 +55,7 @@ { "name": "linux-clang-debug", "displayName": "Linux 64-bit (Clang)", - "inherits": ["linux-clang", "debug"] + "inherits": ["linux-clang", "gcc-like-debug"] }, { "name": "linux-gcc-release", @@ -65,7 +65,7 @@ { "name": "linux-gcc-debug", "displayName": "Linux 64-bit (GCC)", - "inherits": ["linux-gcc", "debug"] + "inherits": ["linux-gcc", "gcc-like-debug"] } ], "buildPresets": [ diff --git a/cmake/presets/os-macos.json b/cmake/presets/os-macos.json index 9553289651..fc00aa0604 100644 --- a/cmake/presets/os-macos.json +++ b/cmake/presets/os-macos.json @@ -20,7 +20,7 @@ { "name": "macos-debug", "displayName": "MacOS - Debug", - "inherits": ["macos-common", "debug"] + "inherits": ["macos-common", "gcc-like-debug"] }, { "name": "macos-release", diff --git a/cmake/presets/os-windows.json b/cmake/presets/os-windows.json index c895f0c644..fb16afd8bc 100644 --- a/cmake/presets/os-windows.json +++ b/cmake/presets/os-windows.json @@ -5,9 +5,9 @@ { "name": "windows-common", "inherits": "contour-common", + "generator": "Visual Studio 17 2022", "displayName": "Windows - common settings", "hidden": true, - "toolset": "host=x64", "binaryDir": "${sourceDir}/out/build/${presetName}", "condition": { "type": "equals", diff --git a/scripts/install-deps.ps1 b/scripts/install-deps.ps1 index 3f50ca08e9..f12809f189 100644 --- a/scripts/install-deps.ps1 +++ b/scripts/install-deps.ps1 @@ -14,9 +14,9 @@ class ThirdParty { $ThirdParties = @( [ThirdParty]@{ - Folder = "libunicode-23d7b30166a914b10526bb8fe7a469a9610c07dc"; - Archive = "libunicode-23d7b30166a914b10526bb8fe7a469a9610c07dc.zip"; - URI = "https://github.com/contour-terminal/libunicode/archive/23d7b30166a914b10526bb8fe7a469a9610c07dc.zip"; + Folder = "libunicode-dabfea48f7fd2a8bf6ae19e37581de5c127c607f"; + Archive = "libunicode-dabfea48f7fd2a8bf6ae19e37581de5c127c607f.zip"; + URI = "https://github.com/contour-terminal/libunicode/archive/dabfea48f7fd2a8bf6ae19e37581de5c127c607f.zip"; Macro = "libunicode" }; [ThirdParty]@{ diff --git a/scripts/install-deps.sh b/scripts/install-deps.sh index 579e9a9b8e..36047c1df5 100755 --- a/scripts/install-deps.sh +++ b/scripts/install-deps.sh @@ -121,7 +121,7 @@ fetch_and_unpack_boxed() fetch_and_unpack_libunicode() { if test x$LIBUNICODE_SRC_DIR = x; then - local libunicode_git_sha="23d7b30166a914b10526bb8fe7a469a9610c07dc" + local libunicode_git_sha="dabfea48f7fd2a8bf6ae19e37581de5c127c607f" fetch_and_unpack \ libunicode-$libunicode_git_sha \ libunicode-$libunicode_git_sha.tar.gz \ diff --git a/src/contour/ContourApp.cpp b/src/contour/ContourApp.cpp index a3154cfda7..f3b50dd756 100644 --- a/src/contour/ContourApp.cpp +++ b/src/contour/ContourApp.cpp @@ -140,18 +140,6 @@ ContourApp::ContourApp(): app("contour", "Contour Terminal Emulator", CONTOUR_VE signal(SIGABRT, segvHandler); #endif -#if defined(_WIN32) - // Enable VT output processing on Conhost. - HANDLE stdoutHandle = GetStdHandle(STD_OUTPUT_HANDLE); - DWORD savedModes {}; // NOTE: Is it required to restore that upon process exit? - if (GetConsoleMode(stdoutHandle, &savedModes) != FALSE) - { - DWORD modes = savedModes; - modes |= ENABLE_VIRTUAL_TERMINAL_PROCESSING; - SetConsoleMode(stdoutHandle, modes); - } -#endif - link("contour.capture", bind(&ContourApp::captureAction, this)); link("contour.list-debug-tags", bind(&ContourApp::listDebugTagsAction, this)); link("contour.set.profile", bind(&ContourApp::profileAction, this)); diff --git a/src/crispy/App.cpp b/src/crispy/App.cpp index 216efd8bf1..e53fe5ba09 100644 --- a/src/crispy/App.cpp +++ b/src/crispy/App.cpp @@ -21,6 +21,10 @@ #include #endif +#if defined(_WIN32) + #include +#endif + using std::bind; using std::cout; using std::exception; @@ -110,11 +114,7 @@ app::app(std::string appName, std::string appTitle, std::string appVersion, std: _appLicense { std::move(appLicense) }, _localStateDir { xdgStateHome() / _appName } { - if (char const* logFilterString = getenv("LOG")) - { - logstore::configure(logFilterString); - customizeLogStoreOutput(); - } + basicSetup(); _instance = this; @@ -128,6 +128,39 @@ app::~app() _instance = nullptr; } +void app::basicSetup() noexcept +{ + enableVTProcessing(); + enableUtf8Output(); + if (char const* logFilterString = getenv("LOG")) + { + logstore::configure(logFilterString); + customizeLogStoreOutput(); + } +} + +void app::enableVTProcessing() noexcept +{ +#if defined(_WIN32) + // Enable VT output processing on Conhost. + HANDLE stdoutHandle = GetStdHandle(STD_OUTPUT_HANDLE); + DWORD savedModes {}; // NOTE: Is it required to restore that upon process exit? + if (GetConsoleMode(stdoutHandle, &savedModes) != FALSE) + { + SetConsoleMode(stdoutHandle, + savedModes | ENABLE_VIRTUAL_TERMINAL_PROCESSING | ENABLE_PROCESSED_OUTPUT + | ENABLE_WRAP_AT_EOL_OUTPUT); + } +#endif +} + +void app::enableUtf8Output() noexcept +{ +#if defined(_WIN32) + SetConsoleOutputCP(CP_UTF8); +#endif +} + void app::link(std::string command, std::function handler) { _handlers[std::move(command)] = std::move(handler); diff --git a/src/crispy/App.h b/src/crispy/App.h index 93df6730b3..a328af475e 100644 --- a/src/crispy/App.h +++ b/src/crispy/App.h @@ -32,6 +32,9 @@ class app [[nodiscard]] std::string const& appVersion() const noexcept { return _appVersion; } [[nodiscard]] std::filesystem::path const& localStateDir() const noexcept { return _localStateDir; } + static void basicSetup() noexcept; + static void enableVTProcessing() noexcept; + static void enableUtf8Output() noexcept; static void customizeLogStoreOutput(); protected: diff --git a/src/text_shaper/CMakeLists.txt b/src/text_shaper/CMakeLists.txt index 67bf10fcea..dce6a29023 100644 --- a/src/text_shaper/CMakeLists.txt +++ b/src/text_shaper/CMakeLists.txt @@ -11,6 +11,8 @@ if("${CMAKE_SYSTEM}" MATCHES "Windows") list(APPEND text_shaper_SRC directwrite_analysis_wrapper.h) list(APPEND text_shaper_SRC directwrite_locator.cpp directwrite_locator.h) list(APPEND text_shaper_SRC directwrite_shaper.cpp directwrite_shaper.h) +else() + list(APPEND text_shaper_SRC fontconfig_locator.cpp fontconfig_locator.h) endif() if(APPLE) list(APPEND text_shaper_SRC coretext_locator.h coretext_locator.mm) diff --git a/src/vtbackend/CellUtil.h b/src/vtbackend/CellUtil.h index 8c1285338d..82ddbb5f16 100644 --- a/src/vtbackend/CellUtil.h +++ b/src/vtbackend/CellUtil.h @@ -103,12 +103,12 @@ template if (!AllowWidthChange) return 0; - auto const newWidth = [codepoint]() { + auto const newWidth = [codepoint]() -> int { switch (codepoint) { case 0xFE0E: return 1; case 0xFE0F: return 2; - default: return unicode::width(codepoint); + default: return static_cast(unicode::width(codepoint)); } }(); diff --git a/src/vtbackend/Line.cpp b/src/vtbackend/Line.cpp index cda94adfab..969e5797f8 100644 --- a/src/vtbackend/Line.cpp +++ b/src/vtbackend/Line.cpp @@ -1,8 +1,10 @@ // SPDX-License-Identifier: Apache-2.0 #include #include +#include #include +#include #include #include #include @@ -154,76 +156,108 @@ std::string Line::toUtf8Trimmed(bool stripLeadingSpaces, bool stripTrailin } template -InflatedLineBuffer inflate(TrivialLineBuffer const& input) +struct TrivialLineInflater { - static constexpr char32_t ReplacementCharacter { 0xFFFD }; + TrivialLineBuffer const& input; + InflatedLineBuffer columns; - auto columns = InflatedLineBuffer {}; - columns.reserve(unbox(input.displayWidth)); + explicit TrivialLineInflater(TrivialLineBuffer const& input): input { input } + { + columns.reserve(unbox(input.displayWidth)); + } - auto lastChar = char32_t { 0 }; - auto utf8DecoderState = unicode::utf8_decoder_state {}; - auto gapPending = 0; + InflatedLineBuffer inflate() && + { + vtParserLog()("Inflating TrivialLineBuffer: '{}'", input.text.data() ? crispy::escape(input.text.data()) : ""); + auto lineSegmenter = unicode::grapheme_line_segmenter { *this, input.text.view() }; + [[maybe_unused]] auto result = lineSegmenter.process(std::numeric_limits::max()); + assert(result.stop_condition == unicode::StopCondition::EndOfInput); + [[maybe_unused]] auto const flushed = lineSegmenter.flush(std::numeric_limits::max()); + assert(flushed.stop_condition == unicode::StopCondition::EndOfInput); + vtParserLog()("Inflated {}/{} columns", columns.size(), input.displayWidth); + + // Fill remaining columns + for (unsigned i = columns.size(); i < unbox(input.displayWidth); ++i) + { + columns.emplace_back(input.fillAttributes); + } + assert(columns.size() == unbox(input.displayWidth)); + + return std::move(columns); + } - for (char const ch: input.text.view()) + void on_invalid(std::string_view /*invalid*/) noexcept { - unicode::ConvertResult const r = unicode::from_utf8(utf8DecoderState, static_cast(ch)); - if (holds_alternative(r)) - continue; + fmt::print("inflate invalid\n"); + static constexpr char32_t ReplacementCharacter { 0xFFFD }; - auto const nextChar = - holds_alternative(r) ? get(r).value : ReplacementCharacter; + columns.emplace_back(); + columns.back().setHyperlink(input.hyperlink); + columns.back().write(input.textAttributes, ReplacementCharacter, 1); + } - if (unicode::grapheme_segmenter::breakable(lastChar, nextChar)) + void on_ascii(std::string_view text) noexcept + { + fmt::print("inflate ASCII: '{}'\n", text); + for (auto const ch: text) { - while (gapPending > 0) - { - columns.emplace_back(input.textAttributes.with(CellFlag::WideCharContinuation), - input.hyperlink); - --gapPending; - } - auto const charWidth = unicode::width(nextChar); - columns.emplace_back(Cell {}); + columns.emplace_back(); columns.back().setHyperlink(input.hyperlink); - columns.back().write(input.textAttributes, nextChar, static_cast(charWidth)); - gapPending = charWidth - 1; + columns.back().write(input.textAttributes, ch, 1); } - else + } + + void on_grapheme_cluster(std::string_view text, unsigned width) noexcept + { + fmt::print("inflate GC: '{}', width: {}\n", text, width); + columns.emplace_back(input.textAttributes, input.hyperlink); + Cell& cell = columns.back(); + cell.setHyperlink(input.hyperlink); + + auto utf8DecoderState = unicode::utf8_decoder_state {}; + for (auto const ch: text) { - Cell& prevCell = columns.back(); - auto const extendedWidth = prevCell.appendCharacter(nextChar); - if (extendedWidth > 0) + unicode::ConvertResult const r = unicode::from_utf8(utf8DecoderState, static_cast(ch)); + if (auto const* cp = std::get_if(&r)) { - auto const cellsAvailable = *input.displayWidth - static_cast(columns.size()) + 1; - auto const n = min(extendedWidth, cellsAvailable); - for (int i = 1; i < n; ++i) - { - columns.emplace_back(Cell { input.textAttributes }); - columns.back().setHyperlink(input.hyperlink); - } + std::cout << fmt::format(" - codepoint: U+{:X}\n", (unsigned) cp->value); + if (cell.codepointCount() == 0) + cell.setCharacter(cp->value); + else + (void) cell.appendCharacter(cp->value); } } - lastChar = nextChar; - } - while (gapPending > 0) - { - columns.emplace_back(Cell { input.textAttributes, input.hyperlink }); - --gapPending; - } + fmt::print(" -> result (UTF-8): \"{}\"\n", cell.toUtf8()); - assert(columns.size() == unbox(input.usedColumns)); - assert(unbox(input.displayWidth) > 0); - - while (columns.size() < unbox(input.displayWidth)) - columns.emplace_back(Cell { input.fillAttributes }); + // Fill remaining columns for wide characters + for (unsigned i = 1; i < width; ++i) + { + std::cout << fmt::format(" - continuation\n"); + columns.emplace_back(input.textAttributes.with(CellFlag::WideCharContinuation), input.hyperlink); + cell.setWidth(width); + } + } +}; - return columns; +template +InflatedLineBuffer inflate(TrivialLineBuffer const& input) +{ + return TrivialLineInflater(input).inflate(); } + } // end namespace vtbackend +// {{{ Explicit instantiation of Line for supported cell types. #include -template class vtbackend::Line; - #include -template class vtbackend::Line; + +namespace vtbackend +{ + +template class Line; +template class Line; +template InflatedLineBuffer inflate(TrivialLineBuffer const& input); + +} // namespace vtbackend +// }}} diff --git a/src/vtbackend/Screen.h b/src/vtbackend/Screen.h index b3e7b9dbe1..0118eff5b0 100644 --- a/src/vtbackend/Screen.h +++ b/src/vtbackend/Screen.h @@ -669,7 +669,14 @@ template inline bool Screen::isContiguousToCurrentLine(std::string_view continuationChars) const noexcept { auto const& line = currentLine(); +#if !defined(_WIN32) return line.isTrivialBuffer() && line.trivialBuffer().text.view().end() == continuationChars.begin(); +#else + char const* const end = line.trivialBuffer().text.data() + line.trivialBuffer().text.size(); + char const* const next = continuationChars.data(); + return line.isTrivialBuffer() && end == next; + // TODO: && line.trivialBuffer().text.view().end() == continuationChars.begin(); +#endif } } // namespace vtbackend diff --git a/src/vtbackend/Screen_test.cpp b/src/vtbackend/Screen_test.cpp index ea582de959..48922bbf95 100644 --- a/src/vtbackend/Screen_test.cpp +++ b/src/vtbackend/Screen_test.cpp @@ -552,9 +552,17 @@ TEST_CASE("AppendChar.emoji_1", "[screen]") auto mock = MockTerm { PageSize { LineCount(1), ColumnCount(3) } }; auto& screen = mock.terminal.primaryScreen(); - mock.writeToScreen(U"\U0001F600"); + mock.writeToScreen("\xf0\x9f\x98\x80"); // U+1F600 - auto const& c1 = screen.at(LineOffset(0), ColumnOffset(0)); + Line const& line = screen.grid().lineAt(LineOffset(0)); + CHECK(line.isTrivialBuffer()); + TrivialLineBuffer const& trivialBuffer = line.trivialBuffer(); + CHECK(trivialBuffer.usedColumns == ColumnCount(2)); + CHECK(trivialBuffer.text.view() == "\xf0\x9f\x98\x80"); + + Line::InflatedBuffer const& inflated = line.inflatedBuffer(); + CompactCell const& c1 = inflated.at(0); + // auto const& c1 = screen.at(LineOffset(0), ColumnOffset(0)); CHECK(c1.codepoints() == U"\U0001F600"); CHECK(c1.width() == 2); REQUIRE(screen.logicalCursorPosition() == CellLocation { LineOffset(0), ColumnOffset(2) }); diff --git a/src/vtbackend/SequenceBuilder.h b/src/vtbackend/SequenceBuilder.h index 10e2188b0e..2c40ae5e29 100644 --- a/src/vtbackend/SequenceBuilder.h +++ b/src/vtbackend/SequenceBuilder.h @@ -50,12 +50,22 @@ class SequenceBuilder } void print(char32_t codepoint) { + if (vtParserLog) + { + if (codepoint < 0x80 && std::isprint(static_cast(codepoint))) + vtParserLog()("Print: '{}'", static_cast(codepoint)); + else + vtParserLog()("Print: U+{:X}", (unsigned) codepoint); + } _incrementInstructionCounter(); _handler.writeText(codepoint); } size_t print(std::string_view chars, size_t cellCount) { + if (vtParserLog) + vtParserLog()("Print: ({}) '{}'", cellCount, crispy::escape(chars)); + assert(!chars.empty()); _incrementInstructionCounter(cellCount); @@ -63,7 +73,13 @@ class SequenceBuilder return _handler.maxBulkTextSequenceWidth(); } - void printEnd() { _handler.writeTextEnd(); } + void printEnd() + { + if (vtParserLog) + vtParserLog()("PrintEnd"); + + _handler.writeTextEnd(); + } void execute(char controlCode) { _handler.executeControlCode(controlCode); } diff --git a/src/vtbackend/ViCommands.cpp b/src/vtbackend/ViCommands.cpp index a325d3febd..f6c3bc2723 100644 --- a/src/vtbackend/ViCommands.cpp +++ b/src/vtbackend/ViCommands.cpp @@ -114,7 +114,7 @@ namespace return terminal.alternateScreen().grid().rightMostNonEmptyAt(lineOffset); } - constexpr std::optional> matchingPairOfChar(char32_t input) noexcept + constexpr std::optional> matchingPairOfChar(char32_t input) noexcept { auto constexpr Pairs = std::array { std::pair { U'(', U')' }, diff --git a/src/vtbackend/cell/CellConcept.h b/src/vtbackend/cell/CellConcept.h index fa3c324c32..b871caa990 100644 --- a/src/vtbackend/cell/CellConcept.h +++ b/src/vtbackend/cell/CellConcept.h @@ -52,6 +52,7 @@ concept CellConcept = requires(T t, T const& u) { t.setCharacter(char32_t {}); { t.appendCharacter(char32_t {}) } -> std::same_as; + // TODO(pr) rename appendCharacter function to extendGraphemeCluster(codepoint) { u.toUtf8() } -> std::convertible_to; diff --git a/src/vtbackend/cell/CompactCell.h b/src/vtbackend/cell/CompactCell.h index a992e4aeaa..d0f4d8a5a0 100644 --- a/src/vtbackend/cell/CompactCell.h +++ b/src/vtbackend/cell/CompactCell.h @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -316,7 +317,7 @@ inline void CompactCell::setCharacter(char32_t codepoint) noexcept _extra->imageFragment = {}; } if (codepoint) - setWidth(static_cast(std::max(unicode::width(codepoint), 1))); + setWidth(static_cast(std::max(unicode::width(codepoint), 1u))); else setWidth(1); } @@ -324,6 +325,7 @@ inline void CompactCell::setCharacter(char32_t codepoint) noexcept inline int CompactCell::appendCharacter(char32_t codepoint) noexcept { assert(codepoint != 0); + assert(_codepoint != 0); CellExtra& ext = extra(); if (ext.codepoints.size() < MaxCodepoints - 1) diff --git a/src/vtbackend/cell/SimpleCell.h b/src/vtbackend/cell/SimpleCell.h index 6dac42a42c..40c223ae59 100644 --- a/src/vtbackend/cell/SimpleCell.h +++ b/src/vtbackend/cell/SimpleCell.h @@ -161,7 +161,7 @@ inline void SimpleCell::setCharacter(char32_t codepoint) if (codepoint) { _codepoints.push_back(codepoint); - setWidth(static_cast(std::max(unicode::width(codepoint), 1))); + setWidth(static_cast(std::max(unicode::width(codepoint), 1u))); } else setWidth(1); @@ -169,6 +169,8 @@ inline void SimpleCell::setCharacter(char32_t codepoint) inline int SimpleCell::appendCharacter(char32_t codepoint) { + assert(codepoint != 0); + assert(!_codepoints.empty() && "Use setCharacter() for first character."); _codepoints.push_back(codepoint); auto const diff = CellUtil::computeWidthChange(*this, codepoint); diff --git a/src/vtbackend/logging.h b/src/vtbackend/logging.h index 8ef48550bb..e78e937940 100644 --- a/src/vtbackend/logging.h +++ b/src/vtbackend/logging.h @@ -8,10 +8,7 @@ namespace vtbackend auto const inline terminalLog = logstore::category("vt.session", "Logs general terminal events."); auto const inline inputLog = logstore::category("vt.input", "Logs terminal keyboard/mouse input events."); -auto const inline vtParserLog = logstore::category("vt.parser", - "Logs terminal parser errors.", - logstore::category::state::Enabled, - logstore::category::visibility::Hidden); +auto const inline vtParserLog = logstore::category("vt.parser", "Logs terminal parser errors."); #if defined(LIBTERMINAL_LOG_TRACE) auto const inline vtTraceSequenceLog = logstore::category("vt.trace.sequence", "Logs terminal screen trace."); diff --git a/src/vtparser/CMakeLists.txt b/src/vtparser/CMakeLists.txt index 734aea8da5..647863f539 100644 --- a/src/vtparser/CMakeLists.txt +++ b/src/vtparser/CMakeLists.txt @@ -13,6 +13,7 @@ target_link_libraries(vtparser PUBLIC fmt::fmt-header-only range-v3::range-v3 unicode::unicode + crispy::core ) target_include_directories(vtparser PUBLIC $ @@ -25,6 +26,6 @@ if(VTPARSER_TESTING) add_executable(vtparser_test Parser_test.cpp ) - target_link_libraries(vtparser_test vtparser Catch2::Catch2WithMain) + target_link_libraries(vtparser_test vtparser Catch2::Catch2) add_test(vtparser_test ./vtparser_test) endif() diff --git a/src/vtparser/Parser-impl.h b/src/vtparser/Parser-impl.h index 93e4ef3232..ccc0a6b8d7 100644 --- a/src/vtparser/Parser-impl.h +++ b/src/vtparser/Parser-impl.h @@ -2,6 +2,8 @@ #pragma once #include +#include + #include #include @@ -326,7 +328,19 @@ void Parser::parseFragment(gsl::span(*input)); auto const [processKind, processedByteCount] = parseBulkText(input, end); + // TODO(pr) what if parseBulkText() knows we've hit the end already? then we should break out of the + // loop right away + vtTraceParserLog()("VTParser: Processed {} bytes. Kind {}\n", + static_cast(processedByteCount), + processKind == ProcessKind::ContinueBulk ? "ContinueBulk" : "FallbackToFSM"); switch (processKind) { case ProcessKind::ContinueBulk: @@ -335,7 +349,14 @@ void Parser::parseFragment(gsl::span(*input++)); + input += processedByteCount; + if (input != end) + { + // TODO(pr) [libunicode] fix zero side Parser.simple_ut8 + auto const ch = static_cast(*input++); + if (ch != 0) + processOnceViaStateMachine(ch); + } break; } } @@ -356,7 +377,10 @@ void Parser::processOnceViaStateMachine(uint8_ handle(ActionClass::Enter, Table.entryEvents[static_cast(t)], ch); } else if (Action const a = Table.events[s][ch]; a != Action::Undefined) + { + vtTraceParserLog()("VTParser: Handling action {} for state/input pair.\n", a); handle(ActionClass::Event, a, ch); + } else _eventListener.error("Parser error: Unknown action for state/input pair."); } @@ -373,48 +397,127 @@ auto Parser::parseBulkText(char const* begin, if (!maxCharCount) return { ProcessKind::FallbackToFSM, 0 }; - _scanState.next = nullptr; - auto const chunk = std::string_view(input, static_cast(std::distance(input, end))); - auto const [cellCount, subStart, subEnd] = unicode::scan_text(_scanState, chunk, maxCharCount); + auto const chunk = std::string_view(input, end); - if (_scanState.next == input) - return { ProcessKind::FallbackToFSM, 0 }; + // TODO(pr) What if the last call to parseBulkText was only a partial read, and we have + // more text to read? Then we should not just call reset() but expand_buffer_by(). + _graphemeLineSegmenter.reset(chunk); + + unicode::grapheme_segmentation_result const result = _graphemeLineSegmenter.process(maxCharCount); + vtTraceParserLog()( + "result: [text: \"{}\", width: {}, stop: {}]", result.text, result.width, [](auto val) { + switch (val) + { + case unicode::StopCondition::UnexpectedInput: return "UnexpectedInput"; + case unicode::StopCondition::EndOfWidth: return "EndOfWidth"; + case unicode::StopCondition::EndOfInput: return "EndOfInput"; + } + return "Unknown"; + }(result.stop_condition)); // We do not test on cellCount>0 because the scan could contain only a ZWJ (zero width // joiner), and that would be misleading. - assert(subStart <= subEnd); - auto const byteCount = static_cast(std::distance(subStart, subEnd)); - if (byteCount == 0) - return { ProcessKind::FallbackToFSM, 0 }; + auto const cellCount = result.width; + auto const* subStart = result.text.data(); + auto const* subEnd = subStart + result.text.size(); + assert(subStart <= subEnd); assert(cellCount <= maxCharCount); assert(subEnd <= chunk.data() + chunk.size()); - assert(_scanState.next <= chunk.data() + chunk.size()); + assert(_graphemeLineSegmenter.next() <= chunk.data() + chunk.size()); - auto const text = std::string_view { subStart, byteCount }; - if (_scanState.utf8.expectedLength == 0) + auto const byteCount = static_cast(std::distance(subStart, subEnd)); + assert(byteCount == result.text.size()); + // if (byteCount == 0) + // return { ProcessKind::FallbackToFSM, 0 }; + + if (!_graphemeLineSegmenter.is_utf8_byte_pending()) { - if (!text.empty()) + if (byteCount > 0) + { + auto const text = std::string_view { subStart, byteCount }; + if (vtTraceParserLog) + vtTraceParserLog()("Printing fast-scanned text \"{}\" with {} cells and size {}. ", + text, + cellCount, + text.size()); _eventListener.print(text, cellCount); + } // This optimization is for the `cat`-people. // It further optimizes the throughput performance by bypassing // the FSM for the `(TEXT LF+)+`-case. // // As of bench-headless, the performance incrrease is about 50x. - if (input != end && *input == '\n') - _eventListener.execute(*input++); + // We need to ensure that there is input beyond the current chunk. + if (byteCount != static_cast(std::distance(input, end))) + { + if (*input == '\n') + { + auto x = makeParseBulkResult( + input, maxCharCount, unicode::StopCondition::EndOfInput, result.width, 1); + _eventListener.execute('\n'); + return x; + } + else if ((input + byteCount + 1) != end && input[byteCount] == '\r' + && input[byteCount + 1] == '\n') + { + // TODO: should have flushed first + auto x = makeParseBulkResult( + input, maxCharCount, unicode::StopCondition::EndOfInput, result.width, 2); + _eventListener.execute('\r'); + _eventListener.execute('\n'); + return x; + } + } } - auto const count = static_cast(std::distance(input, _scanState.next)); - return { ProcessKind::ContinueBulk, count }; + return makeParseBulkResult(input, maxCharCount, result.stop_condition, result.width, 0); +} + +template +auto Parser::makeParseBulkResult(char const* input, + unsigned maxCharCount, + unicode::StopCondition resultStopCondition, + unsigned resultWidth, + unsigned e) noexcept + -> std::tuple +{ + assert(input <= _graphemeLineSegmenter.next()); + auto const count = static_cast(std::distance(input, _graphemeLineSegmenter.next())); + + switch (resultStopCondition) + { + case unicode::StopCondition::UnexpectedInput: // + return { ProcessKind::FallbackToFSM, count + e }; + case unicode::StopCondition::EndOfWidth: // + return { ProcessKind::FallbackToFSM, count + e }; + case unicode::StopCondition::EndOfInput: + if (!_graphemeLineSegmenter.is_utf8_byte_pending()) + { + unicode::grapheme_segmentation_result const flushResult = + _graphemeLineSegmenter.flush(maxCharCount - resultWidth); + std::cout << "flushResult: " << flushResult << '\n'; + if (!flushResult.text.empty()) + { + auto const text = std::string_view { flushResult.text.data(), flushResult.text.size() }; + if (vtTraceParserLog) + vtTraceParserLog()( + "Printing flushed text \"{}\" with {} cells.", text, flushResult.width); + _eventListener.print(text, flushResult.width); + } + } + return { ProcessKind::ContinueBulk, count + e }; + } + crispy::unreachable(); + std::abort(); } template void Parser::printUtf8Byte(char ch) { - unicode::ConvertResult const r = unicode::from_utf8(_scanState.utf8, (uint8_t) ch); + unicode::ConvertResult const r = _graphemeLineSegmenter.process_single_byte(static_cast(ch)); if (std::holds_alternative(r)) return; @@ -422,7 +525,7 @@ void Parser::printUtf8Byte(char ch) auto const codepoint = std::holds_alternative(r) ? std::get(r).value : ReplacementCharacter; _eventListener.print(codepoint); - _scanState.lastCodepointHint = codepoint; + _graphemeLineSegmenter.reset_last_codepoint_hint(codepoint); } template @@ -433,9 +536,12 @@ void Parser::handle(ActionClass actionClass, (void) actionClass; auto const ch = static_cast(codepoint); + if (vtTraceParserLog) + vtTraceParserLog()("Parser.handle: {} {} {:X}", actionClass, action, (unsigned) ch); + switch (action) { - case Action::GroundStart: _scanState.lastCodepointHint = 0; break; + case Action::GroundStart: _graphemeLineSegmenter.reset_last_codepoint_hint(); break; case Action::Clear: _eventListener.clear(); break; case Action::CollectLeader: _eventListener.collectLeader(ch); break; case Action::Collect: _eventListener.collect(ch); break; diff --git a/src/vtparser/Parser.h b/src/vtparser/Parser.h index 72ddd9300c..b631d06671 100644 --- a/src/vtparser/Parser.h +++ b/src/vtparser/Parser.h @@ -1,8 +1,10 @@ // SPDX-License-Identifier: Apache-2.0 #pragma once +#include + #include -#include +#include #include @@ -20,6 +22,16 @@ namespace vtparser { +#if defined(__GNUC__) || defined(__clang__) + #define VTPARSER_NOINLINE __attribute__((noinline)) +#elif defined(_MSC_VER) + #define VTPARSER_NOINLINE __declspec(noinline) +#else + #define VTPARSER_NOINLINE /*!*/ +#endif + +auto const inline vtTraceParserLog = logstore::category("vt.trace.parser", "Logs terminal parser trace."); + // NOLINTBEGIN(readability-identifier-naming) enum class State : uint8_t { @@ -692,7 +704,10 @@ class Parser [[nodiscard]] State state() const noexcept { return _state; } - [[nodiscard]] char32_t precedingGraphicCharacter() const noexcept { return _scanState.lastCodepointHint; } + [[nodiscard]] char32_t precedingGraphicCharacter() const noexcept + { + return _graphemeLineSegmenter.last_codepoint_hint(); + } void printUtf8Byte(char ch); @@ -705,7 +720,12 @@ class Parser FallbackToFSM }; - std::tuple parseBulkText(char const* begin, char const* end) noexcept; + auto parseBulkText(char const* begin, char const* end) noexcept -> std::tuple; + auto makeParseBulkResult(char const* begin, + unsigned maxCharCount, + unicode::StopCondition resultStopCondition, + unsigned resultWidth, + unsigned e) noexcept -> std::tuple; void processOnceViaStateMachine(uint8_t ch); void handle(ActionClass actionClass, Action action, uint8_t codepoint); @@ -714,7 +734,7 @@ class Parser // State _state = State::Ground; EventListener& _eventListener; - unicode::scan_state _scanState {}; + unicode::grapheme_line_segmenter _graphemeLineSegmenter; }; /// @returns parsed tuple with OSC code and offset to first data parameter byte. diff --git a/src/vtparser/Parser_test.cpp b/src/vtparser/Parser_test.cpp index bcb3555398..47c64c673c 100644 --- a/src/vtparser/Parser_test.cpp +++ b/src/vtparser/Parser_test.cpp @@ -2,8 +2,13 @@ #include #include +#include +#include + #include +#define CATCH_CONFIG_RUNNER +#include #include using namespace std; @@ -17,9 +22,22 @@ class MockParserEvents final: public vtparser::NullParserEvents size_t maxCharCount = 80; void error(string_view const& msg) override { INFO(fmt::format("Parser error received. {}", msg)); } - void print(char32_t ch) override { text += unicode::convert_to(ch); } + + void execute(char ch) override + { + UNSCOPED_INFO(fmt::format("execute: U+{:X}", (unsigned) ch)); + text += ch; + } + + void print(char32_t ch) override + { + UNSCOPED_INFO(fmt::format("print: U+{:X}", (unsigned) ch)); + text += unicode::convert_to(ch); + } + size_t print(std::string_view s, size_t cellCount) override { + UNSCOPED_INFO(fmt::format("print: {}", crispy::escape(s))); text += s; return maxCharCount -= cellCount; } @@ -33,6 +51,17 @@ class MockParserEvents final: public vtparser::NullParserEvents void dispatchPM() override { pm += "}"; } }; +TEST_CASE("Parser.utf8_sequence", "[Parser]") +{ + MockParserEvents textListener; + auto p = vtparser::Parser(textListener); + + p.parseFragment("Hall\xC3\xB6le\r\nHow are you?"); + // FIXME: a trailing zero is appended to the string, which is not expected. + + CHECK(textListener.text == "Hall\xC3\xB6le\r\nHow are you?"); +} + TEST_CASE("Parser.utf8_single", "[Parser]") { MockParserEvents textListener; @@ -65,3 +94,15 @@ TEST_CASE("Parser.APC") REQUIRE(listener.apc == "{Gi=1,a=q;}"); REQUIRE(listener.text == "ABCDEF"); } + +int main(int argc, char const* argv[]) +{ + crispy::app::basicSetup(); + + int const result = Catch::Session().run(argc, argv); + + // avoid closing extern console to close on VScode/windows + // system("pause"); + + return result; +}