Unity-Engineering-software-engineering
diff --git a/‎.github/actions/spelling/expect/expect.txt
Lines changed: 18 additions & 0 deletions b/‎.github/actions/spelling/expect/expect.txt
Lines changed: 18 additions & 0 deletions
diff --git a/‎doc/cascadia/profiles.schema.json
Lines changed: 5 additions & 0 deletions b/‎doc/cascadia/profiles.schema.json
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/buffer/out/Row.cpp
Lines changed: 23 additions & 39 deletions b/‎src/buffer/out/Row.cpp
Lines changed: 23 additions & 39 deletions
diff --git a/‎src/buffer/out/Row.hpp
Lines changed: 1 addition & 1 deletion b/‎src/buffer/out/Row.hpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/buffer/out/textBuffer.cpp
Lines changed: 26 additions & 43 deletions b/‎src/buffer/out/textBuffer.cpp
Lines changed: 26 additions & 43 deletions
diff --git a/‎src/cascadia/TerminalCore/ICoreSettings.idl
Lines changed: 1 addition & 0 deletions b/‎src/cascadia/TerminalCore/ICoreSettings.idl
Lines changed: 1 addition & 0 deletions
@@ -144,6 +144,7 @@ bytebuffer
 cac
 cacafire
 CALLCONV
+CANDRABINDU
 capslock
 CARETBLINKINGENABLED
 CARRIAGERETURN
@@ -155,6 +156,7 @@ cbiex
 CBN
 CBoolean
 cbt
+Ccc
 CCCBB
 cch
 CCHAR
@@ -180,6 +182,7 @@ chaof
 charinfo
 CHARSETINFO
 chh
+chonker
 chshdng
 CHT
 Cic
@@ -598,7 +601,9 @@ FEEF
 fesb
 FFAF
 FFDE
+FFFD
 FFFDb
+FFrom
 fgbg
 FGCOLOR
 FGHIJ
@@ -617,6 +622,7 @@ FINDDOWN
 FINDSTRINGEXACT
 FINDUP
 FIter
+FITZPATRICK
 FIXEDCONVERTED
 FIXEDFILEINFO
 Flg
@@ -888,11 +894,13 @@ jconcpp
 JLO
 JOBOBJECT
 JOBOBJECTINFOCLASS
+JONGSEONG
 JPN
 jsoncpp
 Jsons
 jsprovider
 jumplist
+JUNGSEONG
 KAttrs
 kawa
 Kazu
@@ -911,6 +919,7 @@ keyups
 KILLACTIVE
 KILLFOCUS
 kinda
+KIYEOK
 KLF
 KLMNO
 KLMNOPQRST
@@ -1020,6 +1029,7 @@ luma
 lval
 LVB
 LVERTICAL
+LVT
 LWA
 LWIN
 lwkmvj
@@ -1049,6 +1059,7 @@ mdmerge
 MDs
 MEASUREITEM
 megamix
+Meh
 memallocator
 meme
 MENUCHAR
@@ -1164,6 +1175,7 @@ NOMINMAX
 NOMOVE
 NONALERT
 nonbreaking
+noncharacter
 nonclient
 NONINFRINGEMENT
 NONPREROTATED
@@ -1212,6 +1224,7 @@ ntuser
 NTVDM
 ntverp
 nugetversions
+NUKTA
 nullness
 nullonfailure
 nullopts
@@ -1489,6 +1502,7 @@ renderengine
 rendersize
 reparented
 reparenting
+REPH
 replatformed
 Replymessage
 repositorypath
@@ -1517,6 +1531,7 @@ rgw
 RIGHTALIGN
 RIGHTBUTTON
 riid
+ris
 RIS
 roadmap
 robomac
@@ -1883,6 +1898,7 @@ UPDATEDISPLAY
 UPDOWN
 UPKEY
 upss
+UPSS
 uregex
 URegular
 usebackq
@@ -1925,6 +1941,7 @@ vga
 vgaoem
 viewkind
 viewports
+VIRAMA
 Virt
 VIRTTERM
 vkey
@@ -2165,6 +2182,7 @@ Zabcdefghijklmn
 Zabcdefghijklmnopqrstuvwxyz
 ZCmd
 ZCtrl
+ZWJs
 zxcvbnm
 ZYXWVU
 ZYXWVUTd
@@ -2344,6 +2344,11 @@
           "description": "Force the terminal to use the legacy input encoding. Certain keys in some applications may stop working when enabling this setting.",
           "type": "boolean"
         },
+        "experimental.graphemes": {
+          "default": true,
+          "description": "When set to true, the terminal will use grapheme cluster boundaries for cursor movement. Otherwise, the terminal will use codepoint boundaries.",
+          "type": "boolean"
+        },
         "experimental.useBackgroundImageForWindow": {
           "default": false,
           "description": "When set to true, the background image for the currently focused profile is expanded to encompass the entire window, beneath other panes.",
 
@@ -5,10 +5,8 @@
 #include "Row.hpp"
 
 #include <isa_availability.h>
-#include <til/unicode.h>
 
-#include "textBuffer.hpp"
-#include "../../types/inc/GlyphWidth.hpp"
+#include "../../types/inc/CodepointWidthDetector.hpp"
 
 // It would be nice to add checked array access in the future, but it's a little annoying to do so without impacting
 // performance (including Debug performance). Other languages are a little bit more ergonomic there than C++.
@@ -646,60 +644,45 @@ catch (...)
     //
     // We can infer the "end" from the amount of columns we're given (colLimit - colBeg),
     // because ASCII is always 1 column wide per character.
-    auto it = chars.begin();
-    const auto end = it + std::min<size_t>(chars.size(), colLimit - colBeg);
+    const auto len = std::min<size_t>(chars.size(), colLimit - colBeg);
     size_t ch = chBeg;
 
-    while (it != end)
+    for (size_t off = 0; off < len; ++off)
     {
-        if (*it >= 0x80) [[unlikely]]
+        if (chars[off] >= 0x80) [[unlikely]]
         {
-            _replaceTextUnicode(ch, it);
+            _replaceTextUnicode(ch, off);
             return;
         }
 
         til::at(row._charOffsets, colEnd) = gsl::narrow_cast<uint16_t>(ch);
         ++colEnd;
         ++ch;
-        ++it;
     }
 
     colEndDirty = colEnd;
     charsConsumed = ch - chBeg;
 }
 
-[[msvc::forceinline]] void ROW::WriteHelper::_replaceTextUnicode(size_t ch, std::wstring_view::const_iterator it) noexcept
+[[msvc::forceinline]] void ROW::WriteHelper::_replaceTextUnicode(size_t ch, size_t off)
 {
-    const auto end = chars.end();
+    auto& cwd = CodepointWidthDetector::Singleton();
+    const auto len = chars.size();
 
-    while (it != end)
+    // The non-ASCII character we have encountered may be a combining mark, like "a^" which is then displayed as "â".
+    // In order to recognize both characters as a single grapheme, we need to back up by 1 ASCII character
+    // and let MeasureNext() find the next proper grapheme boundary.
+    if (off != 0)
     {
-        unsigned int width = 1;
-        auto ptr = &*it;
-        const auto wch = *ptr;
-        size_t advance = 1;
-
-        ++it;
-
-        // Even in our slow-path we can avoid calling IsGlyphFullWidth if the current character is ASCII.
-        // It also allows us to skip the surrogate pair decoding at the same time.
-        if (wch >= 0x80)
-        {
-            if (til::is_surrogate(wch))
-            {
-                if (it != end && til::is_leading_surrogate(wch) && til::is_trailing_surrogate(*it))
-                {
-                    advance = 2;
-                    ++it;
-                }
-                else
-                {
-                    ptr = &UNICODE_REPLACEMENT;
-                }
-            }
+        --colEnd;
+        --ch;
+        --off;
+    }
 
-            width = IsGlyphFullWidth({ ptr, advance }) + 1u;
-        }
+    while (off < len)
+    {
+        int width;
+        const auto end = cwd.GraphemeNext(chars, off, &width);
 
         const auto colEndNew = gsl::narrow_cast<uint16_t>(colEnd + width);
         if (colEndNew > colLimit)
@@ -719,7 +702,8 @@ catch (...)
             til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(ch | CharOffsetsTrailer);
         }
 
-        ch += advance;
+        ch += end - off;
+        off = end;
     }
 
     colEndDirty = colEnd;
@@ -1062,7 +1046,7 @@ std::wstring_view ROW::GetText() const noexcept
 
 std::wstring_view ROW::GetText(til::CoordType columnBegin, til::CoordType columnEnd) const noexcept
 {
-    const til::CoordType columns = _columnCount;
+    const auto columns = GetReadableColumnCount();
     const auto colBeg = clamp(columnBegin, 0, columns);
     const auto colEnd = clamp(columnEnd, colBeg, columns);
     const size_t chBeg = _uncheckedCharOffset(gsl::narrow_cast<size_t>(colBeg));
 
@@ -181,7 +181,7 @@ class ROW final
         bool IsValid() const noexcept;
         void ReplaceCharacters(til::CoordType width) noexcept;
         void ReplaceText() noexcept;
-        void _replaceTextUnicode(size_t ch, std::wstring_view::const_iterator it) noexcept;
+        void _replaceTextUnicode(size_t ch, size_t off);
         void CopyTextFrom(const std::span<const uint16_t>& charOffsets) noexcept;
         static void _copyOffsets(uint16_t* dst, const uint16_t* src, uint16_t size, uint16_t offset) noexcept;
         void Finish();
 
@@ -2,16 +2,13 @@
 // Licensed under the MIT license.
 
 #include "precomp.h"
-
 #include "textBuffer.hpp"
 
 #include <til/hash.h>
-#include <til/unicode.h>
 
 #include "UTextAdapter.h"
-#include "../../types/inc/GlyphWidth.hpp"
+#include "../../types/inc/CodepointWidthDetector.hpp"
 #include "../renderer/base/renderer.hpp"
-#include "../types/inc/convert.hpp"
 #include "../types/inc/utils.hpp"
 
 using namespace Microsoft::Console;
@@ -408,17 +405,17 @@ void TextBuffer::_PrepareForDoubleByteSequence(const DbcsAttribute dbcsAttribute
 // Given the character offset `position` in the `chars` string, this function returns the starting position of the next grapheme.
 // For instance, given a `chars` of L"x\uD83D\uDE42y" and a `position` of 1 it'll return 3.
 // GraphemePrev would do the exact inverse of this operation.
-// In the future, these functions are expected to also deliver information about how many columns a grapheme occupies.
-// (I know that mere UTF-16 code point iteration doesn't handle graphemes, but that's what we're working towards.)
 size_t TextBuffer::GraphemeNext(const std::wstring_view& chars, size_t position) noexcept
 {
-    return til::utf16_iterate_next(chars, position);
+    auto& cwd = CodepointWidthDetector::Singleton();
+    return cwd.GraphemeNext(chars, position, nullptr);
 }
 
 // It's the counterpart to GraphemeNext. See GraphemeNext.
 size_t TextBuffer::GraphemePrev(const std::wstring_view& chars, size_t position) noexcept
 {
-    return til::utf16_iterate_prev(chars, position);
+    auto& cwd = CodepointWidthDetector::Singleton();
+    return cwd.GraphemePrev(chars, position, nullptr);
 }
 
 // Ever wondered how much space a piece of text needs before inserting it? This function will tell you!
@@ -445,7 +442,7 @@ size_t TextBuffer::FitTextIntoColumns(const std::wstring_view& chars, til::Coord
     {
     }
 
-    const auto dist = gsl::narrow_cast<size_t>(it - beg);
+    auto dist = gsl::narrow_cast<size_t>(it - beg);
     auto col = gsl::narrow_cast<til::CoordType>(dist);
 
     if (it == asciiEnd) [[likely]]
@@ -455,33 +452,23 @@ size_t TextBuffer::FitTextIntoColumns(const std::wstring_view& chars, til::Coord
     }
 
     // Unicode slow-path where we need to count text and columns separately.
-    for (;;)
-    {
-        auto ptr = &*it;
-        const auto wch = *ptr;
-        size_t len = 1;
-
-        col++;
+    auto& cwd = CodepointWidthDetector::Singleton();
+    const auto len = chars.size();
 
-        // Even in our slow-path we can avoid calling IsGlyphFullWidth if the current character is ASCII.
-        // It also allows us to skip the surrogate pair decoding at the same time.
-        if (wch >= 0x80)
-        {
-            if (til::is_surrogate(wch))
-            {
-                const auto it2 = it + 1;
-                if (til::is_leading_surrogate(wch) && it2 != end && til::is_trailing_surrogate(*it2))
-                {
-                    len = 2;
-                }
-                else
-                {
-                    ptr = &UNICODE_REPLACEMENT;
-                }
-            }
+    // The non-ASCII character we have encountered may be a combining mark, like "a^" which is then displayed as "â".
+    // In order to recognize both characters as a single grapheme, we need to back up by 1 ASCII character
+    // and let GraphemeNext() find the next proper grapheme boundary.
+    if (dist != 0)
+    {
+        dist--;
+        col--;
+    }
 
-            col += IsGlyphFullWidth({ ptr, len });
-        }
+    while (dist < len)
+    {
+        int width;
+        dist = cwd.GraphemeNext(chars, dist, &width);
+        col += width;
 
         // If we ran out of columns, we need to always return `columnLimit` and not `cols`,
         // because if we tried inserting a wide glyph into just 1 remaining column it will
@@ -490,17 +477,13 @@ size_t TextBuffer::FitTextIntoColumns(const std::wstring_view& chars, til::Coord
         if (col > columnLimit)
         {
             columns = columnLimit;
-            return gsl::narrow_cast<size_t>(it - beg);
-        }
-
-        // But if we simply ran out of text we just need to return the actual number of columns.
-        it += len;
-        if (it == end)
-        {
-            columns = col;
-            return chars.size();
+            return dist;
         }
     }
+
+    // But if we simply ran out of text we just need to return the actual number of columns.
+    columns = col;
+    return chars.size();
 }
 
 // Pretend as if `position` is a regular cursor in the TextBuffer.
 
@@ -20,6 +20,7 @@ namespace Microsoft.Terminal.Core
         String WordDelimiters;
 
         Boolean ForceVTInput;
+        Boolean Graphemes;
         Boolean TrimBlockSelection;
         Boolean DetectURLs;
         Boolean VtPassthrough;