|
5 | 5 | #include "Row.hpp"
|
6 | 6 |
|
7 | 7 | #include <isa_availability.h>
|
8 |
| -#include <til/unicode.h> |
9 | 8 |
|
10 |
| -#include "textBuffer.hpp" |
11 |
| -#include "../../types/inc/GlyphWidth.hpp" |
| 9 | +#include "../../types/inc/CodepointWidthDetector.hpp" |
12 | 10 |
|
13 | 11 | // It would be nice to add checked array access in the future, but it's a little annoying to do so without impacting
|
14 | 12 | // performance (including Debug performance). Other languages are a little bit more ergonomic there than C++.
|
@@ -568,6 +566,7 @@ void ROW::ReplaceAttributes(const til::CoordType beginIndex, const til::CoordTyp
|
568 | 566 | void ROW::ReplaceCharacters(til::CoordType columnBegin, til::CoordType width, const std::wstring_view& chars)
|
569 | 567 | try
|
570 | 568 | {
|
| 569 | + assert(width >= 1 && width <= 2); |
571 | 570 | WriteHelper h{ *this, columnBegin, _columnCount, chars };
|
572 | 571 | if (!h.IsValid())
|
573 | 572 | {
|
@@ -666,56 +665,89 @@ catch (...)
|
666 | 665 |
|
667 | 666 | [[msvc::forceinline]] void ROW::WriteHelper::_replaceTextUnicode(size_t ch, std::wstring_view::const_iterator it) noexcept
|
668 | 667 | {
|
669 |
| - const auto end = chars.end(); |
| 668 | + auto& cwd = CodepointWidthDetector::Singleton(); |
670 | 669 |
|
671 |
| - while (it != end) |
| 670 | + // Check if the new text joins with the existing contents of the row to form a single grapheme cluster. |
| 671 | + if (it == chars.begin()) |
672 | 672 | {
|
673 |
| - unsigned int width = 1; |
674 |
| - auto ptr = &*it; |
675 |
| - const auto wch = *ptr; |
676 |
| - size_t advance = 1; |
| 673 | + auto colPrev = colBeg; |
| 674 | + while (colPrev > 0 && row._uncheckedIsTrailer(--colPrev)) |
| 675 | + { |
| 676 | + } |
677 | 677 |
|
678 |
| - ++it; |
| 678 | + const auto chPrev = row._uncheckedCharOffset(colPrev); |
| 679 | + const std::wstring_view charsPrev{ row._chars.data() + chPrev, ch - chPrev }; |
679 | 680 |
|
680 |
| - // Even in our slow-path we can avoid calling IsGlyphFullWidth if the current character is ASCII. |
681 |
| - // It also allows us to skip the surrogate pair decoding at the same time. |
682 |
| - if (wch >= 0x80) |
| 681 | + GraphemeState state; |
| 682 | + cwd.GraphemeNext(state, charsPrev); |
| 683 | + cwd.GraphemeNext(state, chars); |
| 684 | + |
| 685 | + if (state.len > 0) |
683 | 686 | {
|
684 |
| - if (til::is_surrogate(wch)) |
| 687 | + colBegDirty = colPrev; |
| 688 | + colEnd = colPrev; |
| 689 | + |
| 690 | + const auto colEndNew = gsl::narrow_cast<uint16_t>(colEnd + state.width); |
| 691 | + if (colEndNew > colLimit) |
685 | 692 | {
|
686 |
| - if (it != end && til::is_leading_surrogate(wch) && til::is_trailing_surrogate(*it)) |
687 |
| - { |
688 |
| - advance = 2; |
689 |
| - ++it; |
690 |
| - } |
691 |
| - else |
692 |
| - { |
693 |
| - ptr = &UNICODE_REPLACEMENT; |
694 |
| - } |
| 693 | + colEndDirty = colLimit; |
| 694 | + charsConsumed = ch - chBeg; |
| 695 | + return; |
695 | 696 | }
|
696 | 697 |
|
697 |
| - width = IsGlyphFullWidth({ ptr, advance }) + 1u; |
698 |
| - } |
| 698 | + // Fill our char-offset buffer with 1 entry containing the mapping from the |
| 699 | + // current column (colEnd) to the start of the glyph in the string (ch)... |
| 700 | + til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(chPrev); |
| 701 | + // ...followed by 0-N entries containing an indication that the |
| 702 | + // columns are just a wide-glyph extension of the preceding one. |
| 703 | + while (colEnd < colEndNew) |
| 704 | + { |
| 705 | + til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(chPrev | CharOffsetsTrailer); |
| 706 | + } |
699 | 707 |
|
700 |
| - const auto colEndNew = gsl::narrow_cast<uint16_t>(colEnd + width); |
701 |
| - if (colEndNew > colLimit) |
702 |
| - { |
703 |
| - colEndDirty = colLimit; |
704 |
| - charsConsumed = ch - chBeg; |
705 |
| - return; |
| 708 | + ch += state.len; |
| 709 | + it += state.len; |
706 | 710 | }
|
| 711 | + } |
| 712 | + else |
| 713 | + { |
| 714 | + // The non-ASCII character we have encountered may be a combining mark, like "a^" which is then displayed as "â". |
| 715 | + // In order to recognize both characters as a single grapheme, we need to back up by 1 ASCII character |
| 716 | + // and let MeasureNext() find the next proper grapheme boundary. |
| 717 | + --colEnd; |
| 718 | + --ch; |
| 719 | + --it; |
| 720 | + } |
| 721 | + |
| 722 | + if (const auto end = chars.end(); it != end) |
| 723 | + { |
| 724 | + GraphemeState state{ .beg = &*it }; |
707 | 725 |
|
708 |
| - // Fill our char-offset buffer with 1 entry containing the mapping from the |
709 |
| - // current column (colEnd) to the start of the glyph in the string (ch)... |
710 |
| - til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(ch); |
711 |
| - // ...followed by 0-N entries containing an indication that the |
712 |
| - // columns are just a wide-glyph extension of the preceding one. |
713 |
| - while (colEnd < colEndNew) |
| 726 | + do |
714 | 727 | {
|
715 |
| - til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(ch | CharOffsetsTrailer); |
716 |
| - } |
| 728 | + cwd.GraphemeNext(state, chars); |
| 729 | + |
| 730 | + const auto colEndNew = gsl::narrow_cast<uint16_t>(colEnd + state.width); |
| 731 | + if (colEndNew > colLimit) |
| 732 | + { |
| 733 | + colEndDirty = colLimit; |
| 734 | + charsConsumed = ch - chBeg; |
| 735 | + return; |
| 736 | + } |
| 737 | + |
| 738 | + // Fill our char-offset buffer with 1 entry containing the mapping from the |
| 739 | + // current column (colEnd) to the start of the glyph in the string (ch)... |
| 740 | + til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(ch); |
| 741 | + // ...followed by 0-N entries containing an indication that the |
| 742 | + // columns are just a wide-glyph extension of the preceding one. |
| 743 | + while (colEnd < colEndNew) |
| 744 | + { |
| 745 | + til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(ch | CharOffsetsTrailer); |
| 746 | + } |
717 | 747 |
|
718 |
| - ch += advance; |
| 748 | + ch += state.len; |
| 749 | + it += state.len; |
| 750 | + } while (it != end); |
719 | 751 | }
|
720 | 752 |
|
721 | 753 | colEndDirty = colEnd;
|
@@ -1058,7 +1090,7 @@ std::wstring_view ROW::GetText() const noexcept
|
1058 | 1090 |
|
1059 | 1091 | std::wstring_view ROW::GetText(til::CoordType columnBegin, til::CoordType columnEnd) const noexcept
|
1060 | 1092 | {
|
1061 |
| - const til::CoordType columns = _columnCount; |
| 1093 | + const auto columns = GetReadableColumnCount(); |
1062 | 1094 | const auto colBeg = clamp(columnBegin, 0, columns);
|
1063 | 1095 | const auto colEnd = clamp(columnEnd, colBeg, columns);
|
1064 | 1096 | const size_t chBeg = _uncheckedCharOffset(gsl::narrow_cast<size_t>(colBeg));
|
|
0 commit comments