Unity-Engineering-software-engineering
diff --git a/‎.github/actions/spelling/expect/expect.txt
Lines changed: 15 additions & 3 deletions b/‎.github/actions/spelling/expect/expect.txt
Lines changed: 15 additions & 3 deletions
diff --git a/‎src/buffer/out/Row.cpp
Lines changed: 73 additions & 41 deletions b/‎src/buffer/out/Row.cpp
Lines changed: 73 additions & 41 deletions
@@ -146,6 +146,7 @@ bytebuffer
 cac
 cacafire
 CALLCONV
+CANDRABINDU
 capslock
 CARETBLINKINGENABLED
 CARRIAGERETURN
@@ -156,6 +157,7 @@ CBash
 cbiex
 CBN
 cbt
+Ccc
 CCCBB
 cch
 CCHAR
@@ -293,7 +295,6 @@ CREATESTRUCT
 CREATESTRUCTW
 createvpack
 crisman
-CRLFs
 crloew
 CRTLIBS
 csbi
@@ -593,6 +594,7 @@ fesb
 FFAF
 ffd
 FFDE
+FFFD
 FFFDb
 fgbg
 FGCOLOR
@@ -613,6 +615,7 @@ FINDREGEX
 FINDSTRINGEXACT
 FINDUP
 FIter
+FITZPATRICK
 FIXEDFILEINFO
 Flg
 flyouts
@@ -879,10 +882,12 @@ jconcpp
 JLO
 JOBOBJECT
 JOBOBJECTINFOCLASS
+JONGSEONG
 JPN
 jsoncpp
 jsprovider
 jumplist
+JUNGSEONG
 KAttrs
 kawa
 Kazu
@@ -901,6 +906,7 @@ keyups
 KILLACTIVE
 KILLFOCUS
 kinda
+KIYEOK
 KLF
 KLMNO
 KLMNOPQRST
@@ -1010,6 +1016,7 @@ luma
 lval
 LVB
 LVERTICAL
+LVT
 LWA
 LWIN
 lwkmvj
@@ -1205,6 +1212,7 @@ ntuser
 NTVDM
 ntverp
 nugetversions
+NUKTA
 nullness
 nullonfailure
 nullopts
@@ -1467,7 +1475,6 @@ READMODE
 rectread
 redef
 redefinable
-Redir
 redist
 REDSCROLL
 REFCLSID
@@ -1485,6 +1492,7 @@ renderengine
 rendersize
 reparented
 reparenting
+REPH
 replatformed
 Replymessage
 repositorypath
@@ -1514,6 +1522,7 @@ rgw
 RIGHTALIGN
 RIGHTBUTTON
 riid
+ris
 RIS
 roadmap
 robomac
@@ -1919,6 +1928,7 @@ vga
 vgaoem
 viewkind
 viewports
+VIRAMA
 Virt
 VIRTTERM
 vkey
@@ -1969,8 +1979,8 @@ wchars
 WCIA
 WCIW
 WCSHELPER
-wcsicmp
 wcsrev
+wcswidth
 wddm
 wddmcon
 WDDMCONSOLECONTEXT
@@ -2125,6 +2135,7 @@ XFORM
 XIn
 XManifest
 XMath
+XNamespace
 xorg
 XPan
 XResource
@@ -2156,6 +2167,7 @@ Zabcdefghijklmn
 Zabcdefghijklmnopqrstuvwxyz
 ZCmd
 ZCtrl
+ZWJs
 zxcvbnm
 ZYXWVU
 ZYXWVUTd
@@ -5,10 +5,8 @@
 #include "Row.hpp"
 
 #include <isa_availability.h>
-#include <til/unicode.h>
 
-#include "textBuffer.hpp"
-#include "../../types/inc/GlyphWidth.hpp"
+#include "../../types/inc/CodepointWidthDetector.hpp"
 
 // It would be nice to add checked array access in the future, but it's a little annoying to do so without impacting
 // performance (including Debug performance). Other languages are a little bit more ergonomic there than C++.
@@ -568,6 +566,7 @@ void ROW::ReplaceAttributes(const til::CoordType beginIndex, const til::CoordTyp
 void ROW::ReplaceCharacters(til::CoordType columnBegin, til::CoordType width, const std::wstring_view& chars)
 try
 {
+    assert(width >= 1 && width <= 2);
     WriteHelper h{ *this, columnBegin, _columnCount, chars };
     if (!h.IsValid())
     {
@@ -666,56 +665,89 @@ catch (...)
 
 [[msvc::forceinline]] void ROW::WriteHelper::_replaceTextUnicode(size_t ch, std::wstring_view::const_iterator it) noexcept
 {
-    const auto end = chars.end();
+    auto& cwd = CodepointWidthDetector::Singleton();
 
-    while (it != end)
+    // Check if the new text joins with the existing contents of the row to form a single grapheme cluster.
+    if (it == chars.begin())
     {
-        unsigned int width = 1;
-        auto ptr = &*it;
-        const auto wch = *ptr;
-        size_t advance = 1;
+        auto colPrev = colBeg;
+        while (colPrev > 0 && row._uncheckedIsTrailer(--colPrev))
+        {
+        }
 
-        ++it;
+        const auto chPrev = row._uncheckedCharOffset(colPrev);
+        const std::wstring_view charsPrev{ row._chars.data() + chPrev, ch - chPrev };
 
-        // Even in our slow-path we can avoid calling IsGlyphFullWidth if the current character is ASCII.
-        // It also allows us to skip the surrogate pair decoding at the same time.
-        if (wch >= 0x80)
+        GraphemeState state;
+        cwd.GraphemeNext(state, charsPrev);
+        cwd.GraphemeNext(state, chars);
+
+        if (state.len > 0)
         {
-            if (til::is_surrogate(wch))
+            colBegDirty = colPrev;
+            colEnd = colPrev;
+
+            const auto colEndNew = gsl::narrow_cast<uint16_t>(colEnd + state.width);
+            if (colEndNew > colLimit)
             {
-                if (it != end && til::is_leading_surrogate(wch) && til::is_trailing_surrogate(*it))
-                {
-                    advance = 2;
-                    ++it;
-                }
-                else
-                {
-                    ptr = &UNICODE_REPLACEMENT;
-                }
+                colEndDirty = colLimit;
+                charsConsumed = ch - chBeg;
+                return;
             }
 
-            width = IsGlyphFullWidth({ ptr, advance }) + 1u;
-        }
+            // Fill our char-offset buffer with 1 entry containing the mapping from the
+            // current column (colEnd) to the start of the glyph in the string (ch)...
+            til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(chPrev);
+            // ...followed by 0-N entries containing an indication that the
+            // columns are just a wide-glyph extension of the preceding one.
+            while (colEnd < colEndNew)
+            {
+                til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(chPrev | CharOffsetsTrailer);
+            }
 
-        const auto colEndNew = gsl::narrow_cast<uint16_t>(colEnd + width);
-        if (colEndNew > colLimit)
-        {
-            colEndDirty = colLimit;
-            charsConsumed = ch - chBeg;
-            return;
+            ch += state.len;
+            it += state.len;
         }
+    }
+    else
+    {
+        // The non-ASCII character we have encountered may be a combining mark, like "a^" which is then displayed as "â".
+        // In order to recognize both characters as a single grapheme, we need to back up by 1 ASCII character
+        // and let MeasureNext() find the next proper grapheme boundary.
+        --colEnd;
+        --ch;
+        --it;
+    }
+
+    if (const auto end = chars.end(); it != end)
+    {
+        GraphemeState state{ .beg = &*it };
 
-        // Fill our char-offset buffer with 1 entry containing the mapping from the
-        // current column (colEnd) to the start of the glyph in the string (ch)...
-        til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(ch);
-        // ...followed by 0-N entries containing an indication that the
-        // columns are just a wide-glyph extension of the preceding one.
-        while (colEnd < colEndNew)
+        do
         {
-            til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(ch | CharOffsetsTrailer);
-        }
+            cwd.GraphemeNext(state, chars);
+
+            const auto colEndNew = gsl::narrow_cast<uint16_t>(colEnd + state.width);
+            if (colEndNew > colLimit)
+            {
+                colEndDirty = colLimit;
+                charsConsumed = ch - chBeg;
+                return;
+            }
+
+            // Fill our char-offset buffer with 1 entry containing the mapping from the
+            // current column (colEnd) to the start of the glyph in the string (ch)...
+            til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(ch);
+            // ...followed by 0-N entries containing an indication that the
+            // columns are just a wide-glyph extension of the preceding one.
+            while (colEnd < colEndNew)
+            {
+                til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(ch | CharOffsetsTrailer);
+            }
 
-        ch += advance;
+            ch += state.len;
+            it += state.len;
+        } while (it != end);
     }
 
     colEndDirty = colEnd;
@@ -1058,7 +1090,7 @@ std::wstring_view ROW::GetText() const noexcept
 
 std::wstring_view ROW::GetText(til::CoordType columnBegin, til::CoordType columnEnd) const noexcept
 {
-    const til::CoordType columns = _columnCount;
+    const auto columns = GetReadableColumnCount();
     const auto colBeg = clamp(columnBegin, 0, columns);
     const auto colEnd = clamp(columnEnd, colBeg, columns);
     const size_t chBeg = _uncheckedCharOffset(gsl::narrow_cast<size_t>(colBeg));