From c3150624959cc58e0571311c001843fb35610cce Mon Sep 17 00:00:00 2001 From: Christian Parpart Date: Sun, 10 Mar 2024 20:40:08 +0100 Subject: [PATCH 1/7] Adapt to latest changes in libunicode and its new grapheme_line_segmenter API Signed-off-by: Christian Parpart --- cmake/presets/common.json | 3 ++- scripts/install-deps.ps1 | 6 +++--- scripts/install-deps.sh | 2 +- src/vtbackend/CellUtil.h | 4 ++-- src/vtbackend/cell/CompactCell.h | 3 ++- src/vtbackend/cell/SimpleCell.h | 2 +- src/vtparser/Parser-impl.h | 22 +++++++++++++--------- src/vtparser/Parser.h | 9 ++++++--- src/vtparser/Parser_test.cpp | 10 +++++++++- 9 files changed, 39 insertions(+), 22 deletions(-) diff --git a/cmake/presets/common.json b/cmake/presets/common.json index e2beb5e6cc..3209a6f387 100644 --- a/cmake/presets/common.json +++ b/cmake/presets/common.json @@ -27,7 +27,8 @@ "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug", - "CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}" + "CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}", + "LIBUNICODE_TABLEGEN_FASTBUILD": "ON" } } ] diff --git a/scripts/install-deps.ps1 b/scripts/install-deps.ps1 index 3f50ca08e9..f12809f189 100644 --- a/scripts/install-deps.ps1 +++ b/scripts/install-deps.ps1 @@ -14,9 +14,9 @@ class ThirdParty { $ThirdParties = @( [ThirdParty]@{ - Folder = "libunicode-23d7b30166a914b10526bb8fe7a469a9610c07dc"; - Archive = "libunicode-23d7b30166a914b10526bb8fe7a469a9610c07dc.zip"; - URI = "https://github.com/contour-terminal/libunicode/archive/23d7b30166a914b10526bb8fe7a469a9610c07dc.zip"; + Folder = "libunicode-dabfea48f7fd2a8bf6ae19e37581de5c127c607f"; + Archive = "libunicode-dabfea48f7fd2a8bf6ae19e37581de5c127c607f.zip"; + URI = "https://github.com/contour-terminal/libunicode/archive/dabfea48f7fd2a8bf6ae19e37581de5c127c607f.zip"; Macro = "libunicode" }; [ThirdParty]@{ diff --git a/scripts/install-deps.sh b/scripts/install-deps.sh index 579e9a9b8e..36047c1df5 100755 --- a/scripts/install-deps.sh +++ b/scripts/install-deps.sh @@ -121,7 +121,7 @@ fetch_and_unpack_boxed() fetch_and_unpack_libunicode() { if test x$LIBUNICODE_SRC_DIR = x; then - local libunicode_git_sha="23d7b30166a914b10526bb8fe7a469a9610c07dc" + local libunicode_git_sha="dabfea48f7fd2a8bf6ae19e37581de5c127c607f" fetch_and_unpack \ libunicode-$libunicode_git_sha \ libunicode-$libunicode_git_sha.tar.gz \ diff --git a/src/vtbackend/CellUtil.h b/src/vtbackend/CellUtil.h index 8c1285338d..82ddbb5f16 100644 --- a/src/vtbackend/CellUtil.h +++ b/src/vtbackend/CellUtil.h @@ -103,12 +103,12 @@ template if (!AllowWidthChange) return 0; - auto const newWidth = [codepoint]() { + auto const newWidth = [codepoint]() -> int { switch (codepoint) { case 0xFE0E: return 1; case 0xFE0F: return 2; - default: return unicode::width(codepoint); + default: return static_cast(unicode::width(codepoint)); } }(); diff --git a/src/vtbackend/cell/CompactCell.h b/src/vtbackend/cell/CompactCell.h index a992e4aeaa..7da8632d4d 100644 --- a/src/vtbackend/cell/CompactCell.h +++ b/src/vtbackend/cell/CompactCell.h @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -316,7 +317,7 @@ inline void CompactCell::setCharacter(char32_t codepoint) noexcept _extra->imageFragment = {}; } if (codepoint) - setWidth(static_cast(std::max(unicode::width(codepoint), 1))); + setWidth(static_cast(std::max(unicode::width(codepoint), 1u))); else setWidth(1); } diff --git a/src/vtbackend/cell/SimpleCell.h b/src/vtbackend/cell/SimpleCell.h index 6dac42a42c..0f98cd2d34 100644 --- a/src/vtbackend/cell/SimpleCell.h +++ b/src/vtbackend/cell/SimpleCell.h @@ -161,7 +161,7 @@ inline void SimpleCell::setCharacter(char32_t codepoint) if (codepoint) { _codepoints.push_back(codepoint); - setWidth(static_cast(std::max(unicode::width(codepoint), 1))); + setWidth(static_cast(std::max(unicode::width(codepoint), 1u))); } else setWidth(1); diff --git a/src/vtparser/Parser-impl.h b/src/vtparser/Parser-impl.h index 93e4ef3232..4e1367e923 100644 --- a/src/vtparser/Parser-impl.h +++ b/src/vtparser/Parser-impl.h @@ -373,11 +373,15 @@ auto Parser::parseBulkText(char const* begin, if (!maxCharCount) return { ProcessKind::FallbackToFSM, 0 }; - _scanState.next = nullptr; auto const chunk = std::string_view(input, static_cast(std::distance(input, end))); - auto const [cellCount, subStart, subEnd] = unicode::scan_text(_scanState, chunk, maxCharCount); - if (_scanState.next == input) + _graphemeLineSegmenter.reset(chunk); + unicode::grapheme_segmentation_result result = _graphemeLineSegmenter.process(maxCharCount); + auto const cellCount = result.width; + auto const* subStart = result.text.data(); + auto const* subEnd = subStart + result.text.size(); + + if (result.text.empty()) return { ProcessKind::FallbackToFSM, 0 }; // We do not test on cellCount>0 because the scan could contain only a ZWJ (zero width @@ -390,10 +394,10 @@ auto Parser::parseBulkText(char const* begin, assert(cellCount <= maxCharCount); assert(subEnd <= chunk.data() + chunk.size()); - assert(_scanState.next <= chunk.data() + chunk.size()); + assert(_graphemeLineSegmenter.next() <= chunk.data() + chunk.size()); auto const text = std::string_view { subStart, byteCount }; - if (_scanState.utf8.expectedLength == 0) + if (!_graphemeLineSegmenter.is_utf8_byte_pending()) { if (!text.empty()) _eventListener.print(text, cellCount); @@ -407,14 +411,14 @@ auto Parser::parseBulkText(char const* begin, _eventListener.execute(*input++); } - auto const count = static_cast(std::distance(input, _scanState.next)); + auto const count = static_cast(std::distance(input, _graphemeLineSegmenter.next())); return { ProcessKind::ContinueBulk, count }; } template void Parser::printUtf8Byte(char ch) { - unicode::ConvertResult const r = unicode::from_utf8(_scanState.utf8, (uint8_t) ch); + unicode::ConvertResult const r = _graphemeLineSegmenter.process_single_byte(static_cast(ch)); if (std::holds_alternative(r)) return; @@ -422,7 +426,7 @@ void Parser::printUtf8Byte(char ch) auto const codepoint = std::holds_alternative(r) ? std::get(r).value : ReplacementCharacter; _eventListener.print(codepoint); - _scanState.lastCodepointHint = codepoint; + _graphemeLineSegmenter.reset_last_codepoint_hint(codepoint); } template @@ -435,7 +439,7 @@ void Parser::handle(ActionClass actionClass, switch (action) { - case Action::GroundStart: _scanState.lastCodepointHint = 0; break; + case Action::GroundStart: _graphemeLineSegmenter.reset_last_codepoint_hint(); break; case Action::Clear: _eventListener.clear(); break; case Action::CollectLeader: _eventListener.collectLeader(ch); break; case Action::Collect: _eventListener.collect(ch); break; diff --git a/src/vtparser/Parser.h b/src/vtparser/Parser.h index 72ddd9300c..ecb625396f 100644 --- a/src/vtparser/Parser.h +++ b/src/vtparser/Parser.h @@ -2,7 +2,7 @@ #pragma once #include -#include +#include #include @@ -692,7 +692,10 @@ class Parser [[nodiscard]] State state() const noexcept { return _state; } - [[nodiscard]] char32_t precedingGraphicCharacter() const noexcept { return _scanState.lastCodepointHint; } + [[nodiscard]] char32_t precedingGraphicCharacter() const noexcept + { + return _graphemeLineSegmenter.last_codepoint_hint(); + } void printUtf8Byte(char ch); @@ -714,7 +717,7 @@ class Parser // State _state = State::Ground; EventListener& _eventListener; - unicode::scan_state _scanState {}; + unicode::grapheme_line_segmenter _graphemeLineSegmenter; }; /// @returns parsed tuple with OSC code and offset to first data parameter byte. diff --git a/src/vtparser/Parser_test.cpp b/src/vtparser/Parser_test.cpp index bcb3555398..b1836caac7 100644 --- a/src/vtparser/Parser_test.cpp +++ b/src/vtparser/Parser_test.cpp @@ -2,6 +2,8 @@ #include #include +#include + #include #include @@ -17,9 +19,15 @@ class MockParserEvents final: public vtparser::NullParserEvents size_t maxCharCount = 80; void error(string_view const& msg) override { INFO(fmt::format("Parser error received. {}", msg)); } - void print(char32_t ch) override { text += unicode::convert_to(ch); } + + void print(char32_t ch) override + { + UNSCOPED_INFO(fmt::format("print: U+{:X}", (unsigned) ch)); + text += unicode::convert_to(ch); + } size_t print(std::string_view s, size_t cellCount) override { + UNSCOPED_INFO(fmt::format("print: {}", crispy::escape(s))); text += s; return maxCharCount -= cellCount; } From b219e5f8811e179a8dffbc5e1b4756c7b69ab901 Mon Sep 17 00:00:00 2001 From: Christian Parpart Date: Thu, 28 Mar 2024 16:40:53 +0100 Subject: [PATCH 2/7] wip Signed-off-by: Christian Parpart --- src/vtbackend/Line.cpp | 134 +++++++++++++++++++------------ src/vtbackend/Screen_test.cpp | 12 ++- src/vtbackend/cell/CellConcept.h | 1 + src/vtbackend/cell/CompactCell.h | 1 + src/vtbackend/cell/SimpleCell.h | 2 + src/vtparser/CMakeLists.txt | 4 +- src/vtparser/Parser-impl.h | 43 ++++++++-- src/vtparser/Parser.h | 4 + 8 files changed, 142 insertions(+), 59 deletions(-) diff --git a/src/vtbackend/Line.cpp b/src/vtbackend/Line.cpp index cda94adfab..d65d17f76c 100644 --- a/src/vtbackend/Line.cpp +++ b/src/vtbackend/Line.cpp @@ -1,8 +1,10 @@ // SPDX-License-Identifier: Apache-2.0 #include #include +#include #include +#include #include #include #include @@ -154,76 +156,106 @@ std::string Line::toUtf8Trimmed(bool stripLeadingSpaces, bool stripTrailin } template -InflatedLineBuffer inflate(TrivialLineBuffer const& input) +struct TrivialLineInflater { - static constexpr char32_t ReplacementCharacter { 0xFFFD }; + TrivialLineBuffer const& input; + InflatedLineBuffer columns; - auto columns = InflatedLineBuffer {}; - columns.reserve(unbox(input.displayWidth)); + explicit TrivialLineInflater(TrivialLineBuffer const& input): input { input } + { + columns.reserve(unbox(input.displayWidth)); + } - auto lastChar = char32_t { 0 }; - auto utf8DecoderState = unicode::utf8_decoder_state {}; - auto gapPending = 0; + InflatedLineBuffer inflate() && + { + vtParserLog()("Inflating TrivialLineBuffer: '{}'", input.text.data() ? crispy::escape(input.text.data()) : ""); + auto lineSegmenter = unicode::grapheme_line_segmenter { *this, input.text.view() }; + auto result = lineSegmenter.process(std::numeric_limits::max()); + assert(result.stop_condition == unicode::StopCondition::EndOfInput); + vtParserLog()("Inflated {}/{} columns", columns.size(), input.displayWidth); + + // Fill remaining columns + for (unsigned i = columns.size(); i < unbox(input.displayWidth); ++i) + { + columns.emplace_back(input.fillAttributes); + } + assert(columns.size() == unbox(input.displayWidth)); + + return std::move(columns); + } - for (char const ch: input.text.view()) + void on_invalid(std::string_view /*invalid*/) noexcept { - unicode::ConvertResult const r = unicode::from_utf8(utf8DecoderState, static_cast(ch)); - if (holds_alternative(r)) - continue; + fmt::print("inflate invalid\n"); + static constexpr char32_t ReplacementCharacter { 0xFFFD }; - auto const nextChar = - holds_alternative(r) ? get(r).value : ReplacementCharacter; + columns.emplace_back(); + columns.back().setHyperlink(input.hyperlink); + columns.back().write(input.textAttributes, ReplacementCharacter, 1); + } - if (unicode::grapheme_segmenter::breakable(lastChar, nextChar)) + void on_ascii(std::string_view text) noexcept + { + fmt::print("inflate ASCII: '{}'\n", text); + for (auto const ch: text) { - while (gapPending > 0) - { - columns.emplace_back(input.textAttributes.with(CellFlag::WideCharContinuation), - input.hyperlink); - --gapPending; - } - auto const charWidth = unicode::width(nextChar); - columns.emplace_back(Cell {}); + columns.emplace_back(); columns.back().setHyperlink(input.hyperlink); - columns.back().write(input.textAttributes, nextChar, static_cast(charWidth)); - gapPending = charWidth - 1; + columns.back().write(input.textAttributes, ch, 1); } - else + } + + void on_grapheme_cluster(std::string_view text, unsigned width) noexcept + { + fmt::print("inflate GC: '{}', width: {}\n", text, width); + columns.emplace_back(input.textAttributes, input.hyperlink); + Cell& cell = columns.back(); + cell.setHyperlink(input.hyperlink); + + auto utf8DecoderState = unicode::utf8_decoder_state {}; + for (auto const ch: text) { - Cell& prevCell = columns.back(); - auto const extendedWidth = prevCell.appendCharacter(nextChar); - if (extendedWidth > 0) + unicode::ConvertResult const r = unicode::from_utf8(utf8DecoderState, static_cast(ch)); + if (auto const* cp = std::get_if(&r)) { - auto const cellsAvailable = *input.displayWidth - static_cast(columns.size()) + 1; - auto const n = min(extendedWidth, cellsAvailable); - for (int i = 1; i < n; ++i) - { - columns.emplace_back(Cell { input.textAttributes }); - columns.back().setHyperlink(input.hyperlink); - } + std::cout << fmt::format(" - codepoint: U+{:X}\n", (unsigned) cp->value); + if (cell.codepointCount() == 0) + cell.setCharacter(cp->value); + else + (void) cell.appendCharacter(cp->value); } } - lastChar = nextChar; - } - while (gapPending > 0) - { - columns.emplace_back(Cell { input.textAttributes, input.hyperlink }); - --gapPending; - } + fmt::print(" -> result (UTF-8): \"{}\"\n", cell.toUtf8()); - assert(columns.size() == unbox(input.usedColumns)); - assert(unbox(input.displayWidth) > 0); - - while (columns.size() < unbox(input.displayWidth)) - columns.emplace_back(Cell { input.fillAttributes }); + // Fill remaining columns for wide characters + for (unsigned i = 1; i < width; ++i) + { + std::cout << fmt::format(" - continuation\n"); + columns.emplace_back(input.textAttributes.with(CellFlag::WideCharContinuation), input.hyperlink); + cell.setWidth(width); + } + } +}; - return columns; +template +InflatedLineBuffer inflate(TrivialLineBuffer const& input) +{ + return TrivialLineInflater(input).inflate(); } + } // end namespace vtbackend +// {{{ Explicit instantiation of Line for supported cell types. #include -template class vtbackend::Line; - #include -template class vtbackend::Line; + +namespace vtbackend +{ + +template class Line; +template class Line; +template InflatedLineBuffer inflate(TrivialLineBuffer const& input); + +} // namespace vtbackend +// }}} diff --git a/src/vtbackend/Screen_test.cpp b/src/vtbackend/Screen_test.cpp index ea582de959..48922bbf95 100644 --- a/src/vtbackend/Screen_test.cpp +++ b/src/vtbackend/Screen_test.cpp @@ -552,9 +552,17 @@ TEST_CASE("AppendChar.emoji_1", "[screen]") auto mock = MockTerm { PageSize { LineCount(1), ColumnCount(3) } }; auto& screen = mock.terminal.primaryScreen(); - mock.writeToScreen(U"\U0001F600"); + mock.writeToScreen("\xf0\x9f\x98\x80"); // U+1F600 - auto const& c1 = screen.at(LineOffset(0), ColumnOffset(0)); + Line const& line = screen.grid().lineAt(LineOffset(0)); + CHECK(line.isTrivialBuffer()); + TrivialLineBuffer const& trivialBuffer = line.trivialBuffer(); + CHECK(trivialBuffer.usedColumns == ColumnCount(2)); + CHECK(trivialBuffer.text.view() == "\xf0\x9f\x98\x80"); + + Line::InflatedBuffer const& inflated = line.inflatedBuffer(); + CompactCell const& c1 = inflated.at(0); + // auto const& c1 = screen.at(LineOffset(0), ColumnOffset(0)); CHECK(c1.codepoints() == U"\U0001F600"); CHECK(c1.width() == 2); REQUIRE(screen.logicalCursorPosition() == CellLocation { LineOffset(0), ColumnOffset(2) }); diff --git a/src/vtbackend/cell/CellConcept.h b/src/vtbackend/cell/CellConcept.h index fa3c324c32..b871caa990 100644 --- a/src/vtbackend/cell/CellConcept.h +++ b/src/vtbackend/cell/CellConcept.h @@ -52,6 +52,7 @@ concept CellConcept = requires(T t, T const& u) { t.setCharacter(char32_t {}); { t.appendCharacter(char32_t {}) } -> std::same_as; + // TODO(pr) rename appendCharacter function to extendGraphemeCluster(codepoint) { u.toUtf8() } -> std::convertible_to; diff --git a/src/vtbackend/cell/CompactCell.h b/src/vtbackend/cell/CompactCell.h index 7da8632d4d..d0f4d8a5a0 100644 --- a/src/vtbackend/cell/CompactCell.h +++ b/src/vtbackend/cell/CompactCell.h @@ -325,6 +325,7 @@ inline void CompactCell::setCharacter(char32_t codepoint) noexcept inline int CompactCell::appendCharacter(char32_t codepoint) noexcept { assert(codepoint != 0); + assert(_codepoint != 0); CellExtra& ext = extra(); if (ext.codepoints.size() < MaxCodepoints - 1) diff --git a/src/vtbackend/cell/SimpleCell.h b/src/vtbackend/cell/SimpleCell.h index 0f98cd2d34..40c223ae59 100644 --- a/src/vtbackend/cell/SimpleCell.h +++ b/src/vtbackend/cell/SimpleCell.h @@ -169,6 +169,8 @@ inline void SimpleCell::setCharacter(char32_t codepoint) inline int SimpleCell::appendCharacter(char32_t codepoint) { + assert(codepoint != 0); + assert(!_codepoints.empty() && "Use setCharacter() for first character."); _codepoints.push_back(codepoint); auto const diff = CellUtil::computeWidthChange(*this, codepoint); diff --git a/src/vtparser/CMakeLists.txt b/src/vtparser/CMakeLists.txt index 734aea8da5..8a250e07f4 100644 --- a/src/vtparser/CMakeLists.txt +++ b/src/vtparser/CMakeLists.txt @@ -13,6 +13,7 @@ target_link_libraries(vtparser PUBLIC fmt::fmt-header-only range-v3::range-v3 unicode::unicode + crispy::core ) target_include_directories(vtparser PUBLIC $ @@ -24,7 +25,8 @@ if(VTPARSER_TESTING) enable_testing() add_executable(vtparser_test Parser_test.cpp + test_main.cpp ) - target_link_libraries(vtparser_test vtparser Catch2::Catch2WithMain) + target_link_libraries(vtparser_test vtparser Catch2::Catch2) add_test(vtparser_test ./vtparser_test) endif() diff --git a/src/vtparser/Parser-impl.h b/src/vtparser/Parser-impl.h index 4e1367e923..73f89b33d2 100644 --- a/src/vtparser/Parser-impl.h +++ b/src/vtparser/Parser-impl.h @@ -365,6 +365,16 @@ template auto Parser::parseBulkText(char const* begin, char const* end) noexcept -> std::tuple { + // auto constexpr StopConditionStr = [](unicode::StopCondition value) -> std::string_view { + // switch (value) + // { + // case unicode::StopCondition::UnexpectedInput: return "UnexpectedInput"; + // case unicode::StopCondition::EndOfInput: return "EndOfInput"; + // case unicode::StopCondition::EndOfWidth: return "EndOfWidth"; + // } + // return "Unknown"; + // }; + auto const* input = begin; if (_state != State::Ground) return { ProcessKind::FallbackToFSM, 0 }; @@ -375,11 +385,28 @@ auto Parser::parseBulkText(char const* begin, auto const chunk = std::string_view(input, static_cast(std::distance(input, end))); - _graphemeLineSegmenter.reset(chunk); - unicode::grapheme_segmentation_result result = _graphemeLineSegmenter.process(maxCharCount); - auto const cellCount = result.width; + if (_graphemeLineSegmenter.next() == begin) + _graphemeLineSegmenter.expand_buffer_by(chunk.size()); + else + _graphemeLineSegmenter.reset(chunk); + // if (_graphemeLineSegmenter.end() == begin) + // _graphemeLineSegmenter.expand_buffer_by(chunk.size()); + // else + // _graphemeLineSegmenter.reset(chunk); + // TODO(pr) What if the last call to parseBulkText was only a partial read, and we have + // more text to read? Then we should not just call reset() but expand_buffer_by(). + // _graphemeLineSegmenter.reset(chunk); + + unicode::grapheme_segmentation_result const result = _graphemeLineSegmenter.process(maxCharCount); + unicode::grapheme_segmentation_result const flushResult = + _graphemeLineSegmenter.flush(maxCharCount - result.width); + // TODO(pr) this flush should only happen if non-text was reeived, e.g. a control sequence, or + // if the last codepoint was fully processed. Otherwise, we should not flush, but + // continue processing the next codepoint (in the NEXT call). + + auto const cellCount = result.width + flushResult.width; auto const* subStart = result.text.data(); - auto const* subEnd = subStart + result.text.size(); + auto const* subEnd = subStart + result.text.size() + flushResult.text.size(); if (result.text.empty()) return { ProcessKind::FallbackToFSM, 0 }; @@ -400,7 +427,10 @@ auto Parser::parseBulkText(char const* begin, if (!_graphemeLineSegmenter.is_utf8_byte_pending()) { if (!text.empty()) + { + vtTraceParserLog()("Printing fast-scanned text \"{}\" with {} cells.", text, cellCount); _eventListener.print(text, cellCount); + } // This optimization is for the `cat`-people. // It further optimizes the throughput performance by bypassing @@ -411,7 +441,7 @@ auto Parser::parseBulkText(char const* begin, _eventListener.execute(*input++); } - auto const count = static_cast(std::distance(input, _graphemeLineSegmenter.next())); + auto const count = static_cast(std::distance(input, _graphemeLineSegmenter.next())); return { ProcessKind::ContinueBulk, count }; } @@ -437,6 +467,9 @@ void Parser::handle(ActionClass actionClass, (void) actionClass; auto const ch = static_cast(codepoint); + if (vtTraceParserLog) + vtTraceParserLog()("Parser.handle: {} {} {:X}", actionClass, action, (unsigned) ch); + switch (action) { case Action::GroundStart: _graphemeLineSegmenter.reset_last_codepoint_hint(); break; diff --git a/src/vtparser/Parser.h b/src/vtparser/Parser.h index ecb625396f..e526f143a5 100644 --- a/src/vtparser/Parser.h +++ b/src/vtparser/Parser.h @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 #pragma once +#include + #include #include @@ -20,6 +22,8 @@ namespace vtparser { +auto const inline vtTraceParserLog = logstore::category("vt.trace.parser", "Logs terminal parser trace."); + // NOLINTBEGIN(readability-identifier-naming) enum class State : uint8_t { From 731f8b1e0d777a154a43b1a0750004ed2dd0812f Mon Sep 17 00:00:00 2001 From: Christian Parpart Date: Wed, 10 Apr 2024 07:48:04 +0200 Subject: [PATCH 3/7] WIP Signed-off-by: Christian Parpart --- cmake/presets/common.json | 8 ++ cmake/presets/os-linux.json | 6 +- cmake/presets/os-macos.json | 2 +- src/vtbackend/Line.cpp | 2 + src/vtbackend/Selector_test.cpp | 220 ++++++++++++++++---------------- src/vtbackend/SequenceBuilder.h | 18 ++- src/vtparser/Parser-impl.h | 97 ++++++++------ src/vtparser/Parser.h | 8 ++ 8 files changed, 209 insertions(+), 152 deletions(-) diff --git a/cmake/presets/common.json b/cmake/presets/common.json index 3209a6f387..64c7741fde 100644 --- a/cmake/presets/common.json +++ b/cmake/presets/common.json @@ -30,6 +30,14 @@ "CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}", "LIBUNICODE_TABLEGEN_FASTBUILD": "ON" } + }, + { + "name": "gcc-like-debug", + "hidden": true, + "inherits": ["debug"], + "cacheVariables": { + "CMAKE_CXX_FLAGS_DEBUG": "-O0 -ggdb3 -fno-inline-small-functions -ginline-points -fno-omit-frame-pointer" + } } ] } diff --git a/cmake/presets/os-linux.json b/cmake/presets/os-linux.json index 31b1922fe5..90ed49f1b6 100644 --- a/cmake/presets/os-linux.json +++ b/cmake/presets/os-linux.json @@ -45,7 +45,7 @@ { "name": "linux-debug", "displayName": "Linux 64-bit", - "inherits": ["linux-common", "debug"] + "inherits": ["linux-common", "gcc-like-debug"] }, { "name": "linux-clang-release", @@ -55,7 +55,7 @@ { "name": "linux-clang-debug", "displayName": "Linux 64-bit (Clang)", - "inherits": ["linux-clang", "debug"] + "inherits": ["linux-clang", "gcc-like-debug"] }, { "name": "linux-gcc-release", @@ -65,7 +65,7 @@ { "name": "linux-gcc-debug", "displayName": "Linux 64-bit (GCC)", - "inherits": ["linux-gcc", "debug"] + "inherits": ["linux-gcc", "gcc-like-debug"] } ], "buildPresets": [ diff --git a/cmake/presets/os-macos.json b/cmake/presets/os-macos.json index 9553289651..fc00aa0604 100644 --- a/cmake/presets/os-macos.json +++ b/cmake/presets/os-macos.json @@ -20,7 +20,7 @@ { "name": "macos-debug", "displayName": "MacOS - Debug", - "inherits": ["macos-common", "debug"] + "inherits": ["macos-common", "gcc-like-debug"] }, { "name": "macos-release", diff --git a/src/vtbackend/Line.cpp b/src/vtbackend/Line.cpp index d65d17f76c..c7e960cd15 100644 --- a/src/vtbackend/Line.cpp +++ b/src/vtbackend/Line.cpp @@ -172,6 +172,8 @@ struct TrivialLineInflater auto lineSegmenter = unicode::grapheme_line_segmenter { *this, input.text.view() }; auto result = lineSegmenter.process(std::numeric_limits::max()); assert(result.stop_condition == unicode::StopCondition::EndOfInput); + auto const flushed = lineSegmenter.flush(std::numeric_limits::max()); + assert(flushed.stop_condition == unicode::StopCondition::EndOfInput); vtParserLog()("Inflated {}/{} columns", columns.size(), input.displayWidth); // Fill remaining columns diff --git a/src/vtbackend/Selector_test.cpp b/src/vtbackend/Selector_test.cpp index 829cb67df8..58a5b17b24 100644 --- a/src/vtbackend/Selector_test.cpp +++ b/src/vtbackend/Selector_test.cpp @@ -92,74 +92,74 @@ TEST_CASE("Selector.Linear", "[selector]") REQUIRE(screen.grid().lineText(LineOffset(1)) == "ab,cdefg,hi"); REQUIRE(screen.grid().lineText(LineOffset(2)) == "12345,67890"); - SECTION("single-cell") - { // "b" - auto const pos = CellLocation { LineOffset(1), ColumnOffset(1) }; - auto selector = LinearSelection(selectionHelper, pos, []() {}); - (void) selector.extend(pos); - selector.complete(); - - vector const selection = selector.ranges(); - REQUIRE(selection.size() == 1); - Selection::Range const& r1 = selection[0]; - CHECK(r1.line == pos.line); - CHECK(r1.fromColumn == pos.column); - CHECK(r1.toColumn == pos.column); - CHECK(r1.length() == ColumnCount(1)); - - auto selectedText = TextSelection { screen }; - renderSelection(selector, selectedText); - CHECK(selectedText.text == "b"); - } - - SECTION("forward single-line") - { // "b,c" - auto const pos = CellLocation { LineOffset(1), ColumnOffset(1) }; - auto selector = LinearSelection(selectionHelper, pos, []() {}); - (void) selector.extend(CellLocation { LineOffset(1), ColumnOffset(3) }); - selector.complete(); - - vector const selection = selector.ranges(); - REQUIRE(selection.size() == 1); - Selection::Range const& r1 = selection[0]; - CHECK(r1.line == LineOffset(1)); - CHECK(r1.fromColumn == ColumnOffset(1)); - CHECK(r1.toColumn == ColumnOffset(3)); - CHECK(r1.length() == ColumnCount(3)); - - auto selectedText = TextSelection { screen }; - renderSelection(selector, selectedText); - CHECK(selectedText.text == "b,c"); - } - - SECTION("forward multi-line") - { // "b,cdefg,hi\n1234" - auto const pos = CellLocation { LineOffset(1), ColumnOffset(1) }; - auto selector = LinearSelection(selectionHelper, pos, []() {}); - (void) selector.extend(CellLocation { LineOffset(2), ColumnOffset(3) }); - selector.complete(); - - vector const selection = selector.ranges(); - REQUIRE(selection.size() == 2); - - Selection::Range const& r1 = selection[0]; - CHECK(r1.line == LineOffset(1)); - CHECK(r1.fromColumn == ColumnOffset(1)); - CHECK(r1.toColumn == ColumnOffset(10)); - CHECK(r1.length() == ColumnCount(10)); - - Selection::Range const& r2 = selection[1]; - CHECK(r2.line == LineOffset(2)); - CHECK(r2.fromColumn == ColumnOffset(0)); - CHECK(r2.toColumn == ColumnOffset(3)); - CHECK(r2.length() == ColumnCount(4)); - - auto selectedText = TextSelection { screen }; - renderSelection(selector, selectedText); - CHECK(selectedText.text == "b,cdefg,hi\n1234"); - } - - SECTION("multiple lines fully in history") + // SECTION("single-cell") + // { // "b" + // auto const pos = CellLocation { LineOffset(1), ColumnOffset(1) }; + // auto selector = LinearSelection(selectionHelper, pos, []() {}); + // (void) selector.extend(pos); + // selector.complete(); + // + // vector const selection = selector.ranges(); + // REQUIRE(selection.size() == 1); + // Selection::Range const& r1 = selection[0]; + // CHECK(r1.line == pos.line); + // CHECK(r1.fromColumn == pos.column); + // CHECK(r1.toColumn == pos.column); + // CHECK(r1.length() == ColumnCount(1)); + // + // auto selectedText = TextSelection { screen }; + // renderSelection(selector, selectedText); + // CHECK(selectedText.text == "b"); + // } + + // SECTION("forward single-line") + // { // "b,c" + // auto const pos = CellLocation { LineOffset(1), ColumnOffset(1) }; + // auto selector = LinearSelection(selectionHelper, pos, []() {}); + // (void) selector.extend(CellLocation { LineOffset(1), ColumnOffset(3) }); + // selector.complete(); + // + // vector const selection = selector.ranges(); + // REQUIRE(selection.size() == 1); + // Selection::Range const& r1 = selection[0]; + // CHECK(r1.line == LineOffset(1)); + // CHECK(r1.fromColumn == ColumnOffset(1)); + // CHECK(r1.toColumn == ColumnOffset(3)); + // CHECK(r1.length() == ColumnCount(3)); + // + // auto selectedText = TextSelection { screen }; + // renderSelection(selector, selectedText); + // CHECK(selectedText.text == "b,c"); + // } + + // SECTION("forward multi-line") + // { // "b,cdefg,hi\n1234" + // auto const pos = CellLocation { LineOffset(1), ColumnOffset(1) }; + // auto selector = LinearSelection(selectionHelper, pos, []() {}); + // (void) selector.extend(CellLocation { LineOffset(2), ColumnOffset(3) }); + // selector.complete(); + // + // vector const selection = selector.ranges(); + // REQUIRE(selection.size() == 2); + // + // Selection::Range const& r1 = selection[0]; + // CHECK(r1.line == LineOffset(1)); + // CHECK(r1.fromColumn == ColumnOffset(1)); + // CHECK(r1.toColumn == ColumnOffset(10)); + // CHECK(r1.length() == ColumnCount(10)); + // + // Selection::Range const& r2 = selection[1]; + // CHECK(r2.line == LineOffset(2)); + // CHECK(r2.fromColumn == ColumnOffset(0)); + // CHECK(r2.toColumn == ColumnOffset(3)); + // CHECK(r2.length() == ColumnCount(4)); + // + // auto selectedText = TextSelection { screen }; + // renderSelection(selector, selectedText); + // CHECK(selectedText.text == "b,cdefg,hi\n1234"); + // } + + // SECTION("multiple lines fully in history") { term.writeToScreen("foo\r\nbar\r\n"); // move first two lines into history. /* @@ -196,48 +196,48 @@ TEST_CASE("Selector.Linear", "[selector]") CHECK(selectedText.text == "fg,hi\n123"); } - SECTION("multiple lines from history into main buffer") - { - term.writeToScreen("foo\r\nbar\r\n"); // move first two lines into history. - /* - -3 | "12345,67890" - -2 | "ab,cdefg,hi" (-- - -1 | "12345,67890" ----------- - 0 | "foo" --) - 1 | "bar" - 2 | "" - */ - - auto selector = - LinearSelection(selectionHelper, CellLocation { LineOffset(-2), ColumnOffset(8) }, []() {}); - (void) selector.extend(CellLocation { LineOffset(0), ColumnOffset(1) }); - selector.complete(); - - vector const selection = selector.ranges(); - REQUIRE(selection.size() == 3); - - Selection::Range const& r1 = selection[0]; - CHECK(r1.line == LineOffset(-2)); - CHECK(r1.fromColumn == ColumnOffset(8)); - CHECK(r1.toColumn == ColumnOffset(10)); - CHECK(r1.length() == ColumnCount(3)); - - Selection::Range const& r2 = selection[1]; - CHECK(r2.line == LineOffset(-1)); - CHECK(r2.fromColumn == ColumnOffset(0)); - CHECK(r2.toColumn == ColumnOffset(10)); - CHECK(r2.length() == ColumnCount(11)); - - Selection::Range const& r3 = selection[2]; - CHECK(r3.line == LineOffset(0)); - CHECK(r3.fromColumn == ColumnOffset(0)); - CHECK(r3.toColumn == ColumnOffset(1)); - CHECK(r3.length() == ColumnCount(2)); - - auto selectedText = TextSelection { screen }; - renderSelection(selector, selectedText); - CHECK(selectedText.text == ",hi\n12345,67890\nfo"); - } + // SECTION("multiple lines from history into main buffer") + // { + // term.writeToScreen("foo\r\nbar\r\n"); // move first two lines into history. + // /* + // -3 | "12345,67890" + // -2 | "ab,cdefg,hi" (-- + // -1 | "12345,67890" ----------- + // 0 | "foo" --) + // 1 | "bar" + // 2 | "" + // */ + // + // auto selector = + // LinearSelection(selectionHelper, CellLocation { LineOffset(-2), ColumnOffset(8) }, []() {}); + // (void) selector.extend(CellLocation { LineOffset(0), ColumnOffset(1) }); + // selector.complete(); + // + // vector const selection = selector.ranges(); + // REQUIRE(selection.size() == 3); + // + // Selection::Range const& r1 = selection[0]; + // CHECK(r1.line == LineOffset(-2)); + // CHECK(r1.fromColumn == ColumnOffset(8)); + // CHECK(r1.toColumn == ColumnOffset(10)); + // CHECK(r1.length() == ColumnCount(3)); + // + // Selection::Range const& r2 = selection[1]; + // CHECK(r2.line == LineOffset(-1)); + // CHECK(r2.fromColumn == ColumnOffset(0)); + // CHECK(r2.toColumn == ColumnOffset(10)); + // CHECK(r2.length() == ColumnCount(11)); + // + // Selection::Range const& r3 = selection[2]; + // CHECK(r3.line == LineOffset(0)); + // CHECK(r3.fromColumn == ColumnOffset(0)); + // CHECK(r3.toColumn == ColumnOffset(1)); + // CHECK(r3.length() == ColumnCount(2)); + // + // auto selectedText = TextSelection { screen }; + // renderSelection(selector, selectedText); + // CHECK(selectedText.text == ",hi\n12345,67890\nfo"); + // } } TEST_CASE("Selector.LinearWordWise", "[selector]") diff --git a/src/vtbackend/SequenceBuilder.h b/src/vtbackend/SequenceBuilder.h index 10e2188b0e..2c40ae5e29 100644 --- a/src/vtbackend/SequenceBuilder.h +++ b/src/vtbackend/SequenceBuilder.h @@ -50,12 +50,22 @@ class SequenceBuilder } void print(char32_t codepoint) { + if (vtParserLog) + { + if (codepoint < 0x80 && std::isprint(static_cast(codepoint))) + vtParserLog()("Print: '{}'", static_cast(codepoint)); + else + vtParserLog()("Print: U+{:X}", (unsigned) codepoint); + } _incrementInstructionCounter(); _handler.writeText(codepoint); } size_t print(std::string_view chars, size_t cellCount) { + if (vtParserLog) + vtParserLog()("Print: ({}) '{}'", cellCount, crispy::escape(chars)); + assert(!chars.empty()); _incrementInstructionCounter(cellCount); @@ -63,7 +73,13 @@ class SequenceBuilder return _handler.maxBulkTextSequenceWidth(); } - void printEnd() { _handler.writeTextEnd(); } + void printEnd() + { + if (vtParserLog) + vtParserLog()("PrintEnd"); + + _handler.writeTextEnd(); + } void execute(char controlCode) { _handler.executeControlCode(controlCode); } diff --git a/src/vtparser/Parser-impl.h b/src/vtparser/Parser-impl.h index 73f89b33d2..8c0202234b 100644 --- a/src/vtparser/Parser-impl.h +++ b/src/vtparser/Parser-impl.h @@ -2,6 +2,8 @@ #pragma once #include +#include + #include #include @@ -326,7 +328,14 @@ void Parser::parseFragment(gsl::span(processedByteCount), + processKind == ProcessKind::ContinueBulk ? "ContinueBulk" : "FallbackToFSM"); switch (processKind) { case ProcessKind::ContinueBulk: @@ -335,7 +344,9 @@ void Parser::parseFragment(gsl::span(*input++)); + input += processedByteCount; + if (input != end) + processOnceViaStateMachine(static_cast(*input++)); break; } } @@ -365,16 +376,6 @@ template auto Parser::parseBulkText(char const* begin, char const* end) noexcept -> std::tuple { - // auto constexpr StopConditionStr = [](unicode::StopCondition value) -> std::string_view { - // switch (value) - // { - // case unicode::StopCondition::UnexpectedInput: return "UnexpectedInput"; - // case unicode::StopCondition::EndOfInput: return "EndOfInput"; - // case unicode::StopCondition::EndOfWidth: return "EndOfWidth"; - // } - // return "Unknown"; - // }; - auto const* input = begin; if (_state != State::Ground) return { ProcessKind::FallbackToFSM, 0 }; @@ -386,49 +387,44 @@ auto Parser::parseBulkText(char const* begin, auto const chunk = std::string_view(input, static_cast(std::distance(input, end))); if (_graphemeLineSegmenter.next() == begin) + { + std::cout << " expand_buffer_by(" << chunk.size() << ")\n"; _graphemeLineSegmenter.expand_buffer_by(chunk.size()); + } else + { + std::cout << " reset()\n"; _graphemeLineSegmenter.reset(chunk); - // if (_graphemeLineSegmenter.end() == begin) - // _graphemeLineSegmenter.expand_buffer_by(chunk.size()); - // else - // _graphemeLineSegmenter.reset(chunk); + } // TODO(pr) What if the last call to parseBulkText was only a partial read, and we have // more text to read? Then we should not just call reset() but expand_buffer_by(). - // _graphemeLineSegmenter.reset(chunk); unicode::grapheme_segmentation_result const result = _graphemeLineSegmenter.process(maxCharCount); - unicode::grapheme_segmentation_result const flushResult = - _graphemeLineSegmenter.flush(maxCharCount - result.width); - // TODO(pr) this flush should only happen if non-text was reeived, e.g. a control sequence, or - // if the last codepoint was fully processed. Otherwise, we should not flush, but - // continue processing the next codepoint (in the NEXT call). - - auto const cellCount = result.width + flushResult.width; - auto const* subStart = result.text.data(); - auto const* subEnd = subStart + result.text.size() + flushResult.text.size(); - - if (result.text.empty()) - return { ProcessKind::FallbackToFSM, 0 }; + std::cout << " result: " << result << '\n'; // We do not test on cellCount>0 because the scan could contain only a ZWJ (zero width // joiner), and that would be misleading. - assert(subStart <= subEnd); - auto const byteCount = static_cast(std::distance(subStart, subEnd)); - if (byteCount == 0) - return { ProcessKind::FallbackToFSM, 0 }; + auto const cellCount = result.width; + auto const* subStart = result.text.data(); + auto const* subEnd = subStart + result.text.size(); + assert(subStart <= subEnd); assert(cellCount <= maxCharCount); assert(subEnd <= chunk.data() + chunk.size()); assert(_graphemeLineSegmenter.next() <= chunk.data() + chunk.size()); - auto const text = std::string_view { subStart, byteCount }; + auto const byteCount = static_cast(std::distance(subStart, subEnd)); + // if (byteCount == 0) + // return { ProcessKind::FallbackToFSM, 0 }; + if (!_graphemeLineSegmenter.is_utf8_byte_pending()) { - if (!text.empty()) + if (byteCount > 0) { - vtTraceParserLog()("Printing fast-scanned text \"{}\" with {} cells.", text, cellCount); + auto const text = std::string_view { subStart, byteCount }; + if (vtTraceParserLog) + vtTraceParserLog()("Printing fast-scanned text \"{}\" with {} cells.", text, cellCount); _eventListener.print(text, cellCount); } @@ -437,12 +433,39 @@ auto Parser::parseBulkText(char const* begin, // the FSM for the `(TEXT LF+)+`-case. // // As of bench-headless, the performance incrrease is about 50x. - if (input != end && *input == '\n') + if (input != end + && *input == '\n') // TODO(pr) This is not correct. We should check and consume CR+LF. _eventListener.execute(*input++); } + assert(input <= _graphemeLineSegmenter.next()); auto const count = static_cast(std::distance(input, _graphemeLineSegmenter.next())); - return { ProcessKind::ContinueBulk, count }; + + switch (result.stop_condition) + { + case unicode::StopCondition::UnexpectedInput: // + return { ProcessKind::FallbackToFSM, count }; + case unicode::StopCondition::EndOfWidth: // + return { ProcessKind::FallbackToFSM, count }; + case unicode::StopCondition::EndOfInput: + if (!_graphemeLineSegmenter.is_utf8_byte_pending()) + { + unicode::grapheme_segmentation_result const flushResult = + _graphemeLineSegmenter.flush(maxCharCount - result.width); + std::cout << "flushResult: " << flushResult << '\n'; + if (!flushResult.text.empty()) + { + auto const text = std::string_view { flushResult.text.data(), flushResult.text.size() }; + if (vtTraceParserLog) + vtTraceParserLog()( + "Printing flushed text \"{}\" with {} cells.", text, flushResult.width); + _eventListener.print(text, flushResult.width); + } + } + return { ProcessKind::ContinueBulk, count }; + } + crispy::unreachable(); + std::abort(); } template diff --git a/src/vtparser/Parser.h b/src/vtparser/Parser.h index e526f143a5..6e69e8a212 100644 --- a/src/vtparser/Parser.h +++ b/src/vtparser/Parser.h @@ -22,6 +22,14 @@ namespace vtparser { +#if defined(__GNUC__) || defined(__clang__) + #define VTPARSER_NOINLINE __attribute__((noinline)) +#elif defined(_MSC_VER) + #define VTPARSER_NOINLINE __declspec(noinline) +#else + #define VTPARSER_NOINLINE /*!*/ +#endif + auto const inline vtTraceParserLog = logstore::category("vt.trace.parser", "Logs terminal parser trace."); // NOLINTBEGIN(readability-identifier-naming) From eb6eba3a3ced11f5ef18d822fe4892a8a5d4ecc1 Mon Sep 17 00:00:00 2001 From: Christian Parpart Date: Sun, 21 Apr 2024 20:23:48 +0200 Subject: [PATCH 4/7] wip from windows Signed-off-by: Christian Parpart --- cmake/presets/os-windows.json | 2 +- src/text_shaper/CMakeLists.txt | 2 + src/vtbackend/Screen.h | 7 ++++ src/vtparser/CMakeLists.txt | 1 - src/vtparser/Parser-impl.h | 37 +++++++++++++----- src/vtparser/Parser.h | 7 +++- src/vtparser/Parser_test.cpp | 70 ++++++++++++++++++++++++++++++++++ 7 files changed, 113 insertions(+), 13 deletions(-) diff --git a/cmake/presets/os-windows.json b/cmake/presets/os-windows.json index c895f0c644..fb16afd8bc 100644 --- a/cmake/presets/os-windows.json +++ b/cmake/presets/os-windows.json @@ -5,9 +5,9 @@ { "name": "windows-common", "inherits": "contour-common", + "generator": "Visual Studio 17 2022", "displayName": "Windows - common settings", "hidden": true, - "toolset": "host=x64", "binaryDir": "${sourceDir}/out/build/${presetName}", "condition": { "type": "equals", diff --git a/src/text_shaper/CMakeLists.txt b/src/text_shaper/CMakeLists.txt index 67bf10fcea..dce6a29023 100644 --- a/src/text_shaper/CMakeLists.txt +++ b/src/text_shaper/CMakeLists.txt @@ -11,6 +11,8 @@ if("${CMAKE_SYSTEM}" MATCHES "Windows") list(APPEND text_shaper_SRC directwrite_analysis_wrapper.h) list(APPEND text_shaper_SRC directwrite_locator.cpp directwrite_locator.h) list(APPEND text_shaper_SRC directwrite_shaper.cpp directwrite_shaper.h) +else() + list(APPEND text_shaper_SRC fontconfig_locator.cpp fontconfig_locator.h) endif() if(APPLE) list(APPEND text_shaper_SRC coretext_locator.h coretext_locator.mm) diff --git a/src/vtbackend/Screen.h b/src/vtbackend/Screen.h index b3e7b9dbe1..d3deebbaa7 100644 --- a/src/vtbackend/Screen.h +++ b/src/vtbackend/Screen.h @@ -669,7 +669,14 @@ template inline bool Screen::isContiguousToCurrentLine(std::string_view continuationChars) const noexcept { auto const& line = currentLine(); +#if !defined(_WIN32) return line.isTrivialBuffer() && line.trivialBuffer().text.view().end() == continuationChars.begin(); +#else + auto const end = line.trivialBuffer().text.data() + line.trivialBuffer().text.size(); + auto const next = continuationChars.data(); + return line.isTrivialBuffer() && end == next; + // TODO: && line.trivialBuffer().text.view().end() == continuationChars.begin(); +#endif } } // namespace vtbackend diff --git a/src/vtparser/CMakeLists.txt b/src/vtparser/CMakeLists.txt index 8a250e07f4..647863f539 100644 --- a/src/vtparser/CMakeLists.txt +++ b/src/vtparser/CMakeLists.txt @@ -25,7 +25,6 @@ if(VTPARSER_TESTING) enable_testing() add_executable(vtparser_test Parser_test.cpp - test_main.cpp ) target_link_libraries(vtparser_test vtparser Catch2::Catch2) add_test(vtparser_test ./vtparser_test) diff --git a/src/vtparser/Parser-impl.h b/src/vtparser/Parser-impl.h index 8c0202234b..fb54e6728d 100644 --- a/src/vtparser/Parser-impl.h +++ b/src/vtparser/Parser-impl.h @@ -374,7 +374,7 @@ void Parser::processOnceViaStateMachine(uint8_ template auto Parser::parseBulkText(char const* begin, char const* end) noexcept - -> std::tuple +-> std::tuple { auto const* input = begin; if (_state != State::Ground) @@ -415,6 +415,7 @@ auto Parser::parseBulkText(char const* begin, assert(_graphemeLineSegmenter.next() <= chunk.data() + chunk.size()); auto const byteCount = static_cast(std::distance(subStart, subEnd)); + assert(byteCount == result.text.size()); // if (byteCount == 0) // return { ProcessKind::FallbackToFSM, 0 }; @@ -422,7 +423,7 @@ auto Parser::parseBulkText(char const* begin, { if (byteCount > 0) { - auto const text = std::string_view { subStart, byteCount }; + auto const text = std::string_view{ subStart, byteCount }; if (vtTraceParserLog) vtTraceParserLog()("Printing fast-scanned text \"{}\" with {} cells.", text, cellCount); _eventListener.print(text, cellCount); @@ -433,25 +434,41 @@ auto Parser::parseBulkText(char const* begin, // the FSM for the `(TEXT LF+)+`-case. // // As of bench-headless, the performance incrrease is about 50x. - if (input != end - && *input == '\n') // TODO(pr) This is not correct. We should check and consume CR+LF. - _eventListener.execute(*input++); + if (input + byteCount != end && *input == '\n') + { + auto x = makeParseBulkResult(input, maxCharCount, unicode::StopCondition::EndOfInput, result.width, 1); + _eventListener.execute('\n'); + return x; + } + else if ((input + byteCount + 1) != end && input[byteCount] == '\r' && input[byteCount + 1] == '\n') + { + // TODO: should have flushed first + auto x = makeParseBulkResult(input, maxCharCount, unicode::StopCondition::EndOfInput, result.width, 2); + _eventListener.execute('\r'); + _eventListener.execute('\n'); + return x; + } } + return makeParseBulkResult(input, maxCharCount, result.stop_condition, result.width, 0); +} +template +auto Parser::makeParseBulkResult(char const* input, unsigned maxCharCount, unicode::StopCondition resultStopCondition, unsigned resultWidth, unsigned e) noexcept -> std::tuple +{ assert(input <= _graphemeLineSegmenter.next()); auto const count = static_cast(std::distance(input, _graphemeLineSegmenter.next())); - switch (result.stop_condition) + switch (resultStopCondition) { case unicode::StopCondition::UnexpectedInput: // - return { ProcessKind::FallbackToFSM, count }; + return { ProcessKind::FallbackToFSM, count + e }; case unicode::StopCondition::EndOfWidth: // - return { ProcessKind::FallbackToFSM, count }; + return { ProcessKind::FallbackToFSM, count + e }; case unicode::StopCondition::EndOfInput: if (!_graphemeLineSegmenter.is_utf8_byte_pending()) { unicode::grapheme_segmentation_result const flushResult = - _graphemeLineSegmenter.flush(maxCharCount - result.width); + _graphemeLineSegmenter.flush(maxCharCount - resultWidth); std::cout << "flushResult: " << flushResult << '\n'; if (!flushResult.text.empty()) { @@ -462,7 +479,7 @@ auto Parser::parseBulkText(char const* begin, _eventListener.print(text, flushResult.width); } } - return { ProcessKind::ContinueBulk, count }; + return { ProcessKind::ContinueBulk, count + e }; } crispy::unreachable(); std::abort(); diff --git a/src/vtparser/Parser.h b/src/vtparser/Parser.h index 6e69e8a212..b631d06671 100644 --- a/src/vtparser/Parser.h +++ b/src/vtparser/Parser.h @@ -720,7 +720,12 @@ class Parser FallbackToFSM }; - std::tuple parseBulkText(char const* begin, char const* end) noexcept; + auto parseBulkText(char const* begin, char const* end) noexcept -> std::tuple; + auto makeParseBulkResult(char const* begin, + unsigned maxCharCount, + unicode::StopCondition resultStopCondition, + unsigned resultWidth, + unsigned e) noexcept -> std::tuple; void processOnceViaStateMachine(uint8_t ch); void handle(ActionClass actionClass, Action action, uint8_t codepoint); diff --git a/src/vtparser/Parser_test.cpp b/src/vtparser/Parser_test.cpp index b1836caac7..3a1b2bbb11 100644 --- a/src/vtparser/Parser_test.cpp +++ b/src/vtparser/Parser_test.cpp @@ -2,12 +2,21 @@ #include #include +#include +#include + #include #include +#define CATCH_CONFIG_RUNNER +#include #include +#if defined(_WIN32) + #include +#endif + using namespace std; class MockParserEvents final: public vtparser::NullParserEvents @@ -20,11 +29,18 @@ class MockParserEvents final: public vtparser::NullParserEvents void error(string_view const& msg) override { INFO(fmt::format("Parser error received. {}", msg)); } + void execute(char ch) override + { + UNSCOPED_INFO(fmt::format("execute: U+{:X}", (unsigned) ch)); + text += ch; + } + void print(char32_t ch) override { UNSCOPED_INFO(fmt::format("print: U+{:X}", (unsigned) ch)); text += unicode::convert_to(ch); } + size_t print(std::string_view s, size_t cellCount) override { UNSCOPED_INFO(fmt::format("print: {}", crispy::escape(s))); @@ -41,6 +57,17 @@ class MockParserEvents final: public vtparser::NullParserEvents void dispatchPM() override { pm += "}"; } }; +TEST_CASE("Parser.utf8_sequence", "[Parser]") +{ + MockParserEvents textListener; + auto p = vtparser::Parser(textListener); + + p.parseFragment("Hall\xC3\xB6le\r\nHow are you?"); + // FIXME: a trailing zero is appended to the string, which is not expected. + + CHECK(textListener.text == "Hall\xC3\xB6le\r\nHow are you?"); +} + TEST_CASE("Parser.utf8_single", "[Parser]") { MockParserEvents textListener; @@ -73,3 +100,46 @@ TEST_CASE("Parser.APC") REQUIRE(listener.apc == "{Gi=1,a=q;}"); REQUIRE(listener.text == "ABCDEF"); } + +namespace +{ + +struct SetupTeardown +{ + SetupTeardown() + { +#if defined(_WIN32) + const auto stdoutHandle = GetStdHandle(STD_OUTPUT_HANDLE); + const auto stdoutCP = GetConsoleOutputCP(); + DWORD stdoutMode; + GetConsoleMode(stdoutHandle, &stdoutMode); + SetConsoleMode(stdoutHandle, + ENABLE_PROCESSED_OUTPUT | ENABLE_WRAP_AT_EOL_OUTPUT + | ENABLE_VIRTUAL_TERMINAL_PROCESSING); + SetConsoleOutputCP(CP_UTF8); +#endif + + char const* logFilterString = getenv("LOG"); + if (logFilterString) + { + logstore::configure(logFilterString); + crispy::app::customizeLogStoreOutput(); + } + } + + ~SetupTeardown() = default; +}; + +} // namespace + +int main(int argc, char const* argv[]) +{ + auto const _ = SetupTeardown {}; + + int const result = Catch::Session().run(argc, argv); + + // avoid closing extern console to close on VScode/windows + // system("pause"); + + return result; +} From 0e721e896f99dd7ae38f41ec48d9ea088d1a2bcb Mon Sep 17 00:00:00 2001 From: Christian Parpart Date: Sun, 21 Apr 2024 22:38:37 +0200 Subject: [PATCH 5/7] linux wip Signed-off-by: Christian Parpart --- src/contour/ContourApp.cpp | 12 ---------- src/crispy/App.cpp | 43 +++++++++++++++++++++++++++++++----- src/crispy/App.h | 3 +++ src/vtparser/Parser_test.cpp | 39 +------------------------------- 4 files changed, 42 insertions(+), 55 deletions(-) diff --git a/src/contour/ContourApp.cpp b/src/contour/ContourApp.cpp index a3154cfda7..f3b50dd756 100644 --- a/src/contour/ContourApp.cpp +++ b/src/contour/ContourApp.cpp @@ -140,18 +140,6 @@ ContourApp::ContourApp(): app("contour", "Contour Terminal Emulator", CONTOUR_VE signal(SIGABRT, segvHandler); #endif -#if defined(_WIN32) - // Enable VT output processing on Conhost. - HANDLE stdoutHandle = GetStdHandle(STD_OUTPUT_HANDLE); - DWORD savedModes {}; // NOTE: Is it required to restore that upon process exit? - if (GetConsoleMode(stdoutHandle, &savedModes) != FALSE) - { - DWORD modes = savedModes; - modes |= ENABLE_VIRTUAL_TERMINAL_PROCESSING; - SetConsoleMode(stdoutHandle, modes); - } -#endif - link("contour.capture", bind(&ContourApp::captureAction, this)); link("contour.list-debug-tags", bind(&ContourApp::listDebugTagsAction, this)); link("contour.set.profile", bind(&ContourApp::profileAction, this)); diff --git a/src/crispy/App.cpp b/src/crispy/App.cpp index 216efd8bf1..e53fe5ba09 100644 --- a/src/crispy/App.cpp +++ b/src/crispy/App.cpp @@ -21,6 +21,10 @@ #include #endif +#if defined(_WIN32) + #include +#endif + using std::bind; using std::cout; using std::exception; @@ -110,11 +114,7 @@ app::app(std::string appName, std::string appTitle, std::string appVersion, std: _appLicense { std::move(appLicense) }, _localStateDir { xdgStateHome() / _appName } { - if (char const* logFilterString = getenv("LOG")) - { - logstore::configure(logFilterString); - customizeLogStoreOutput(); - } + basicSetup(); _instance = this; @@ -128,6 +128,39 @@ app::~app() _instance = nullptr; } +void app::basicSetup() noexcept +{ + enableVTProcessing(); + enableUtf8Output(); + if (char const* logFilterString = getenv("LOG")) + { + logstore::configure(logFilterString); + customizeLogStoreOutput(); + } +} + +void app::enableVTProcessing() noexcept +{ +#if defined(_WIN32) + // Enable VT output processing on Conhost. + HANDLE stdoutHandle = GetStdHandle(STD_OUTPUT_HANDLE); + DWORD savedModes {}; // NOTE: Is it required to restore that upon process exit? + if (GetConsoleMode(stdoutHandle, &savedModes) != FALSE) + { + SetConsoleMode(stdoutHandle, + savedModes | ENABLE_VIRTUAL_TERMINAL_PROCESSING | ENABLE_PROCESSED_OUTPUT + | ENABLE_WRAP_AT_EOL_OUTPUT); + } +#endif +} + +void app::enableUtf8Output() noexcept +{ +#if defined(_WIN32) + SetConsoleOutputCP(CP_UTF8); +#endif +} + void app::link(std::string command, std::function handler) { _handlers[std::move(command)] = std::move(handler); diff --git a/src/crispy/App.h b/src/crispy/App.h index 93df6730b3..a328af475e 100644 --- a/src/crispy/App.h +++ b/src/crispy/App.h @@ -32,6 +32,9 @@ class app [[nodiscard]] std::string const& appVersion() const noexcept { return _appVersion; } [[nodiscard]] std::filesystem::path const& localStateDir() const noexcept { return _localStateDir; } + static void basicSetup() noexcept; + static void enableVTProcessing() noexcept; + static void enableUtf8Output() noexcept; static void customizeLogStoreOutput(); protected: diff --git a/src/vtparser/Parser_test.cpp b/src/vtparser/Parser_test.cpp index 3a1b2bbb11..47c64c673c 100644 --- a/src/vtparser/Parser_test.cpp +++ b/src/vtparser/Parser_test.cpp @@ -3,8 +3,6 @@ #include #include -#include - #include #include @@ -13,10 +11,6 @@ #include #include -#if defined(_WIN32) - #include -#endif - using namespace std; class MockParserEvents final: public vtparser::NullParserEvents @@ -101,40 +95,9 @@ TEST_CASE("Parser.APC") REQUIRE(listener.text == "ABCDEF"); } -namespace -{ - -struct SetupTeardown -{ - SetupTeardown() - { -#if defined(_WIN32) - const auto stdoutHandle = GetStdHandle(STD_OUTPUT_HANDLE); - const auto stdoutCP = GetConsoleOutputCP(); - DWORD stdoutMode; - GetConsoleMode(stdoutHandle, &stdoutMode); - SetConsoleMode(stdoutHandle, - ENABLE_PROCESSED_OUTPUT | ENABLE_WRAP_AT_EOL_OUTPUT - | ENABLE_VIRTUAL_TERMINAL_PROCESSING); - SetConsoleOutputCP(CP_UTF8); -#endif - - char const* logFilterString = getenv("LOG"); - if (logFilterString) - { - logstore::configure(logFilterString); - crispy::app::customizeLogStoreOutput(); - } - } - - ~SetupTeardown() = default; -}; - -} // namespace - int main(int argc, char const* argv[]) { - auto const _ = SetupTeardown {}; + crispy::app::basicSetup(); int const result = Catch::Session().run(argc, argv); From 15ed83fffea358220a6c08be3deacb81f4b46262 Mon Sep 17 00:00:00 2001 From: Christian Parpart Date: Mon, 22 Apr 2024 08:22:43 +0200 Subject: [PATCH 6/7] wip Signed-off-by: Christian Parpart --- src/vtbackend/Line.cpp | 4 ++-- src/vtbackend/Screen.h | 4 ++-- src/vtbackend/ViCommands.cpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/vtbackend/Line.cpp b/src/vtbackend/Line.cpp index c7e960cd15..969e5797f8 100644 --- a/src/vtbackend/Line.cpp +++ b/src/vtbackend/Line.cpp @@ -170,9 +170,9 @@ struct TrivialLineInflater { vtParserLog()("Inflating TrivialLineBuffer: '{}'", input.text.data() ? crispy::escape(input.text.data()) : ""); auto lineSegmenter = unicode::grapheme_line_segmenter { *this, input.text.view() }; - auto result = lineSegmenter.process(std::numeric_limits::max()); + [[maybe_unused]] auto result = lineSegmenter.process(std::numeric_limits::max()); assert(result.stop_condition == unicode::StopCondition::EndOfInput); - auto const flushed = lineSegmenter.flush(std::numeric_limits::max()); + [[maybe_unused]] auto const flushed = lineSegmenter.flush(std::numeric_limits::max()); assert(flushed.stop_condition == unicode::StopCondition::EndOfInput); vtParserLog()("Inflated {}/{} columns", columns.size(), input.displayWidth); diff --git a/src/vtbackend/Screen.h b/src/vtbackend/Screen.h index d3deebbaa7..0118eff5b0 100644 --- a/src/vtbackend/Screen.h +++ b/src/vtbackend/Screen.h @@ -672,8 +672,8 @@ inline bool Screen::isContiguousToCurrentLine(std::string_view continuatio #if !defined(_WIN32) return line.isTrivialBuffer() && line.trivialBuffer().text.view().end() == continuationChars.begin(); #else - auto const end = line.trivialBuffer().text.data() + line.trivialBuffer().text.size(); - auto const next = continuationChars.data(); + char const* const end = line.trivialBuffer().text.data() + line.trivialBuffer().text.size(); + char const* const next = continuationChars.data(); return line.isTrivialBuffer() && end == next; // TODO: && line.trivialBuffer().text.view().end() == continuationChars.begin(); #endif diff --git a/src/vtbackend/ViCommands.cpp b/src/vtbackend/ViCommands.cpp index a325d3febd..f6c3bc2723 100644 --- a/src/vtbackend/ViCommands.cpp +++ b/src/vtbackend/ViCommands.cpp @@ -114,7 +114,7 @@ namespace return terminal.alternateScreen().grid().rightMostNonEmptyAt(lineOffset); } - constexpr std::optional> matchingPairOfChar(char32_t input) noexcept + constexpr std::optional> matchingPairOfChar(char32_t input) noexcept { auto constexpr Pairs = std::array { std::pair { U'(', U')' }, From 07a33796ebef202fab0c5adb1e45a27d0771af51 Mon Sep 17 00:00:00 2001 From: Yaraslau Tamashevich Date: Wed, 24 Jul 2024 18:24:29 +0300 Subject: [PATCH 7/7] Get tests conformance --- src/vtbackend/Selector_test.cpp | 220 ++++++++++++++++---------------- src/vtbackend/logging.h | 5 +- src/vtparser/Parser-impl.h | 95 +++++++++----- 3 files changed, 173 insertions(+), 147 deletions(-) diff --git a/src/vtbackend/Selector_test.cpp b/src/vtbackend/Selector_test.cpp index 58a5b17b24..829cb67df8 100644 --- a/src/vtbackend/Selector_test.cpp +++ b/src/vtbackend/Selector_test.cpp @@ -92,74 +92,74 @@ TEST_CASE("Selector.Linear", "[selector]") REQUIRE(screen.grid().lineText(LineOffset(1)) == "ab,cdefg,hi"); REQUIRE(screen.grid().lineText(LineOffset(2)) == "12345,67890"); - // SECTION("single-cell") - // { // "b" - // auto const pos = CellLocation { LineOffset(1), ColumnOffset(1) }; - // auto selector = LinearSelection(selectionHelper, pos, []() {}); - // (void) selector.extend(pos); - // selector.complete(); - // - // vector const selection = selector.ranges(); - // REQUIRE(selection.size() == 1); - // Selection::Range const& r1 = selection[0]; - // CHECK(r1.line == pos.line); - // CHECK(r1.fromColumn == pos.column); - // CHECK(r1.toColumn == pos.column); - // CHECK(r1.length() == ColumnCount(1)); - // - // auto selectedText = TextSelection { screen }; - // renderSelection(selector, selectedText); - // CHECK(selectedText.text == "b"); - // } - - // SECTION("forward single-line") - // { // "b,c" - // auto const pos = CellLocation { LineOffset(1), ColumnOffset(1) }; - // auto selector = LinearSelection(selectionHelper, pos, []() {}); - // (void) selector.extend(CellLocation { LineOffset(1), ColumnOffset(3) }); - // selector.complete(); - // - // vector const selection = selector.ranges(); - // REQUIRE(selection.size() == 1); - // Selection::Range const& r1 = selection[0]; - // CHECK(r1.line == LineOffset(1)); - // CHECK(r1.fromColumn == ColumnOffset(1)); - // CHECK(r1.toColumn == ColumnOffset(3)); - // CHECK(r1.length() == ColumnCount(3)); - // - // auto selectedText = TextSelection { screen }; - // renderSelection(selector, selectedText); - // CHECK(selectedText.text == "b,c"); - // } - - // SECTION("forward multi-line") - // { // "b,cdefg,hi\n1234" - // auto const pos = CellLocation { LineOffset(1), ColumnOffset(1) }; - // auto selector = LinearSelection(selectionHelper, pos, []() {}); - // (void) selector.extend(CellLocation { LineOffset(2), ColumnOffset(3) }); - // selector.complete(); - // - // vector const selection = selector.ranges(); - // REQUIRE(selection.size() == 2); - // - // Selection::Range const& r1 = selection[0]; - // CHECK(r1.line == LineOffset(1)); - // CHECK(r1.fromColumn == ColumnOffset(1)); - // CHECK(r1.toColumn == ColumnOffset(10)); - // CHECK(r1.length() == ColumnCount(10)); - // - // Selection::Range const& r2 = selection[1]; - // CHECK(r2.line == LineOffset(2)); - // CHECK(r2.fromColumn == ColumnOffset(0)); - // CHECK(r2.toColumn == ColumnOffset(3)); - // CHECK(r2.length() == ColumnCount(4)); - // - // auto selectedText = TextSelection { screen }; - // renderSelection(selector, selectedText); - // CHECK(selectedText.text == "b,cdefg,hi\n1234"); - // } - - // SECTION("multiple lines fully in history") + SECTION("single-cell") + { // "b" + auto const pos = CellLocation { LineOffset(1), ColumnOffset(1) }; + auto selector = LinearSelection(selectionHelper, pos, []() {}); + (void) selector.extend(pos); + selector.complete(); + + vector const selection = selector.ranges(); + REQUIRE(selection.size() == 1); + Selection::Range const& r1 = selection[0]; + CHECK(r1.line == pos.line); + CHECK(r1.fromColumn == pos.column); + CHECK(r1.toColumn == pos.column); + CHECK(r1.length() == ColumnCount(1)); + + auto selectedText = TextSelection { screen }; + renderSelection(selector, selectedText); + CHECK(selectedText.text == "b"); + } + + SECTION("forward single-line") + { // "b,c" + auto const pos = CellLocation { LineOffset(1), ColumnOffset(1) }; + auto selector = LinearSelection(selectionHelper, pos, []() {}); + (void) selector.extend(CellLocation { LineOffset(1), ColumnOffset(3) }); + selector.complete(); + + vector const selection = selector.ranges(); + REQUIRE(selection.size() == 1); + Selection::Range const& r1 = selection[0]; + CHECK(r1.line == LineOffset(1)); + CHECK(r1.fromColumn == ColumnOffset(1)); + CHECK(r1.toColumn == ColumnOffset(3)); + CHECK(r1.length() == ColumnCount(3)); + + auto selectedText = TextSelection { screen }; + renderSelection(selector, selectedText); + CHECK(selectedText.text == "b,c"); + } + + SECTION("forward multi-line") + { // "b,cdefg,hi\n1234" + auto const pos = CellLocation { LineOffset(1), ColumnOffset(1) }; + auto selector = LinearSelection(selectionHelper, pos, []() {}); + (void) selector.extend(CellLocation { LineOffset(2), ColumnOffset(3) }); + selector.complete(); + + vector const selection = selector.ranges(); + REQUIRE(selection.size() == 2); + + Selection::Range const& r1 = selection[0]; + CHECK(r1.line == LineOffset(1)); + CHECK(r1.fromColumn == ColumnOffset(1)); + CHECK(r1.toColumn == ColumnOffset(10)); + CHECK(r1.length() == ColumnCount(10)); + + Selection::Range const& r2 = selection[1]; + CHECK(r2.line == LineOffset(2)); + CHECK(r2.fromColumn == ColumnOffset(0)); + CHECK(r2.toColumn == ColumnOffset(3)); + CHECK(r2.length() == ColumnCount(4)); + + auto selectedText = TextSelection { screen }; + renderSelection(selector, selectedText); + CHECK(selectedText.text == "b,cdefg,hi\n1234"); + } + + SECTION("multiple lines fully in history") { term.writeToScreen("foo\r\nbar\r\n"); // move first two lines into history. /* @@ -196,48 +196,48 @@ TEST_CASE("Selector.Linear", "[selector]") CHECK(selectedText.text == "fg,hi\n123"); } - // SECTION("multiple lines from history into main buffer") - // { - // term.writeToScreen("foo\r\nbar\r\n"); // move first two lines into history. - // /* - // -3 | "12345,67890" - // -2 | "ab,cdefg,hi" (-- - // -1 | "12345,67890" ----------- - // 0 | "foo" --) - // 1 | "bar" - // 2 | "" - // */ - // - // auto selector = - // LinearSelection(selectionHelper, CellLocation { LineOffset(-2), ColumnOffset(8) }, []() {}); - // (void) selector.extend(CellLocation { LineOffset(0), ColumnOffset(1) }); - // selector.complete(); - // - // vector const selection = selector.ranges(); - // REQUIRE(selection.size() == 3); - // - // Selection::Range const& r1 = selection[0]; - // CHECK(r1.line == LineOffset(-2)); - // CHECK(r1.fromColumn == ColumnOffset(8)); - // CHECK(r1.toColumn == ColumnOffset(10)); - // CHECK(r1.length() == ColumnCount(3)); - // - // Selection::Range const& r2 = selection[1]; - // CHECK(r2.line == LineOffset(-1)); - // CHECK(r2.fromColumn == ColumnOffset(0)); - // CHECK(r2.toColumn == ColumnOffset(10)); - // CHECK(r2.length() == ColumnCount(11)); - // - // Selection::Range const& r3 = selection[2]; - // CHECK(r3.line == LineOffset(0)); - // CHECK(r3.fromColumn == ColumnOffset(0)); - // CHECK(r3.toColumn == ColumnOffset(1)); - // CHECK(r3.length() == ColumnCount(2)); - // - // auto selectedText = TextSelection { screen }; - // renderSelection(selector, selectedText); - // CHECK(selectedText.text == ",hi\n12345,67890\nfo"); - // } + SECTION("multiple lines from history into main buffer") + { + term.writeToScreen("foo\r\nbar\r\n"); // move first two lines into history. + /* + -3 | "12345,67890" + -2 | "ab,cdefg,hi" (-- + -1 | "12345,67890" ----------- + 0 | "foo" --) + 1 | "bar" + 2 | "" + */ + + auto selector = + LinearSelection(selectionHelper, CellLocation { LineOffset(-2), ColumnOffset(8) }, []() {}); + (void) selector.extend(CellLocation { LineOffset(0), ColumnOffset(1) }); + selector.complete(); + + vector const selection = selector.ranges(); + REQUIRE(selection.size() == 3); + + Selection::Range const& r1 = selection[0]; + CHECK(r1.line == LineOffset(-2)); + CHECK(r1.fromColumn == ColumnOffset(8)); + CHECK(r1.toColumn == ColumnOffset(10)); + CHECK(r1.length() == ColumnCount(3)); + + Selection::Range const& r2 = selection[1]; + CHECK(r2.line == LineOffset(-1)); + CHECK(r2.fromColumn == ColumnOffset(0)); + CHECK(r2.toColumn == ColumnOffset(10)); + CHECK(r2.length() == ColumnCount(11)); + + Selection::Range const& r3 = selection[2]; + CHECK(r3.line == LineOffset(0)); + CHECK(r3.fromColumn == ColumnOffset(0)); + CHECK(r3.toColumn == ColumnOffset(1)); + CHECK(r3.length() == ColumnCount(2)); + + auto selectedText = TextSelection { screen }; + renderSelection(selector, selectedText); + CHECK(selectedText.text == ",hi\n12345,67890\nfo"); + } } TEST_CASE("Selector.LinearWordWise", "[selector]") diff --git a/src/vtbackend/logging.h b/src/vtbackend/logging.h index 8ef48550bb..e78e937940 100644 --- a/src/vtbackend/logging.h +++ b/src/vtbackend/logging.h @@ -8,10 +8,7 @@ namespace vtbackend auto const inline terminalLog = logstore::category("vt.session", "Logs general terminal events."); auto const inline inputLog = logstore::category("vt.input", "Logs terminal keyboard/mouse input events."); -auto const inline vtParserLog = logstore::category("vt.parser", - "Logs terminal parser errors.", - logstore::category::state::Enabled, - logstore::category::visibility::Hidden); +auto const inline vtParserLog = logstore::category("vt.parser", "Logs terminal parser errors."); #if defined(LIBTERMINAL_LOG_TRACE) auto const inline vtTraceSequenceLog = logstore::category("vt.trace.sequence", "Logs terminal screen trace."); diff --git a/src/vtparser/Parser-impl.h b/src/vtparser/Parser-impl.h index fb54e6728d..ccc0a6b8d7 100644 --- a/src/vtparser/Parser-impl.h +++ b/src/vtparser/Parser-impl.h @@ -329,13 +329,18 @@ void Parser::parseFragment(gsl::span(*input)); auto const [processKind, processedByteCount] = parseBulkText(input, end); // TODO(pr) what if parseBulkText() knows we've hit the end already? then we should break out of the // loop right away - fmt::print("VTParser: Processed {} bytes. Kind {}\n", - static_cast(processedByteCount), - processKind == ProcessKind::ContinueBulk ? "ContinueBulk" : "FallbackToFSM"); + vtTraceParserLog()("VTParser: Processed {} bytes. Kind {}\n", + static_cast(processedByteCount), + processKind == ProcessKind::ContinueBulk ? "ContinueBulk" : "FallbackToFSM"); switch (processKind) { case ProcessKind::ContinueBulk: @@ -346,7 +351,12 @@ void Parser::parseFragment(gsl::span(*input++)); + { + // TODO(pr) [libunicode] fix zero side Parser.simple_ut8 + auto const ch = static_cast(*input++); + if (ch != 0) + processOnceViaStateMachine(ch); + } break; } } @@ -367,14 +377,17 @@ void Parser::processOnceViaStateMachine(uint8_ handle(ActionClass::Enter, Table.entryEvents[static_cast(t)], ch); } else if (Action const a = Table.events[s][ch]; a != Action::Undefined) + { + vtTraceParserLog()("VTParser: Handling action {} for state/input pair.\n", a); handle(ActionClass::Event, a, ch); + } else _eventListener.error("Parser error: Unknown action for state/input pair."); } template auto Parser::parseBulkText(char const* begin, char const* end) noexcept --> std::tuple + -> std::tuple { auto const* input = begin; if (_state != State::Ground) @@ -384,23 +397,23 @@ auto Parser::parseBulkText(char const* begin, if (!maxCharCount) return { ProcessKind::FallbackToFSM, 0 }; - auto const chunk = std::string_view(input, static_cast(std::distance(input, end))); + auto const chunk = std::string_view(input, end); - if (_graphemeLineSegmenter.next() == begin) - { - std::cout << " expand_buffer_by(" << chunk.size() << ")\n"; - _graphemeLineSegmenter.expand_buffer_by(chunk.size()); - } - else - { - std::cout << " reset()\n"; - _graphemeLineSegmenter.reset(chunk); - } // TODO(pr) What if the last call to parseBulkText was only a partial read, and we have // more text to read? Then we should not just call reset() but expand_buffer_by(). + _graphemeLineSegmenter.reset(chunk); unicode::grapheme_segmentation_result const result = _graphemeLineSegmenter.process(maxCharCount); - std::cout << " result: " << result << '\n'; + vtTraceParserLog()( + "result: [text: \"{}\", width: {}, stop: {}]", result.text, result.width, [](auto val) { + switch (val) + { + case unicode::StopCondition::UnexpectedInput: return "UnexpectedInput"; + case unicode::StopCondition::EndOfWidth: return "EndOfWidth"; + case unicode::StopCondition::EndOfInput: return "EndOfInput"; + } + return "Unknown"; + }(result.stop_condition)); // We do not test on cellCount>0 because the scan could contain only a ZWJ (zero width // joiner), and that would be misleading. @@ -423,9 +436,12 @@ auto Parser::parseBulkText(char const* begin, { if (byteCount > 0) { - auto const text = std::string_view{ subStart, byteCount }; + auto const text = std::string_view { subStart, byteCount }; if (vtTraceParserLog) - vtTraceParserLog()("Printing fast-scanned text \"{}\" with {} cells.", text, cellCount); + vtTraceParserLog()("Printing fast-scanned text \"{}\" with {} cells and size {}. ", + text, + cellCount, + text.size()); _eventListener.print(text, cellCount); } @@ -434,26 +450,39 @@ auto Parser::parseBulkText(char const* begin, // the FSM for the `(TEXT LF+)+`-case. // // As of bench-headless, the performance incrrease is about 50x. - if (input + byteCount != end && *input == '\n') + // We need to ensure that there is input beyond the current chunk. + if (byteCount != static_cast(std::distance(input, end))) { - auto x = makeParseBulkResult(input, maxCharCount, unicode::StopCondition::EndOfInput, result.width, 1); - _eventListener.execute('\n'); - return x; - } - else if ((input + byteCount + 1) != end && input[byteCount] == '\r' && input[byteCount + 1] == '\n') - { - // TODO: should have flushed first - auto x = makeParseBulkResult(input, maxCharCount, unicode::StopCondition::EndOfInput, result.width, 2); - _eventListener.execute('\r'); - _eventListener.execute('\n'); - return x; + if (*input == '\n') + { + auto x = makeParseBulkResult( + input, maxCharCount, unicode::StopCondition::EndOfInput, result.width, 1); + _eventListener.execute('\n'); + return x; + } + else if ((input + byteCount + 1) != end && input[byteCount] == '\r' + && input[byteCount + 1] == '\n') + { + // TODO: should have flushed first + auto x = makeParseBulkResult( + input, maxCharCount, unicode::StopCondition::EndOfInput, result.width, 2); + _eventListener.execute('\r'); + _eventListener.execute('\n'); + return x; + } } } + return makeParseBulkResult(input, maxCharCount, result.stop_condition, result.width, 0); } template -auto Parser::makeParseBulkResult(char const* input, unsigned maxCharCount, unicode::StopCondition resultStopCondition, unsigned resultWidth, unsigned e) noexcept -> std::tuple +auto Parser::makeParseBulkResult(char const* input, + unsigned maxCharCount, + unicode::StopCondition resultStopCondition, + unsigned resultWidth, + unsigned e) noexcept + -> std::tuple { assert(input <= _graphemeLineSegmenter.next()); auto const count = static_cast(std::distance(input, _graphemeLineSegmenter.next()));