llvm · Bertik23 · Jul 29, 2025 · Aug 4, 2025 · Aug 4, 2025 · Aug 4, 2025
diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h
@@ -93,7 +93,12 @@ namespace llvm {
   private:
     lltok::Kind LexToken();
 
+    // Return closes pointer after `Ptr` that is an end of a label.
+    // Returns nullptr if `Ptr` doesn't point into a label.
+    const char *getLabelTail(const char *Ptr);
     int getNextChar();
+    const char *skipNChars(unsigned N);
+    void advancePositionTo(const char *Ptr);
     void SkipLineComment();
     bool SkipCComment();
     lltok::Kind ReadString(lltok::Kind kind);

diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
@@ -155,15 +155,6 @@ static bool isLabelChar(char C) {
          C == '.' || C == '_';
 }
 
-/// isLabelTail - Return true if this pointer points to a valid end of a label.
-static const char *isLabelTail(const char *CurPtr) {
-  while (true) {
-    if (CurPtr[0] == ':') return CurPtr+1;
-    if (!isLabelChar(CurPtr[0])) return nullptr;
-    ++CurPtr;
-  }
-}
-
 //===----------------------------------------------------------------------===//
 // Lexer definition.
 //===----------------------------------------------------------------------===//
@@ -174,19 +165,35 @@ LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err,
   CurPtr = CurBuf.begin();
 }
 
+const char *LLLexer::getLabelTail(const char *Ptr) {
+  while (Ptr != CurBuf.end()) {
+    if (Ptr[0] == ':')
+      return Ptr + 1;
+    if (!isLabelChar(Ptr[0]))
+      return nullptr;
+    ++Ptr;
+  }
+  return nullptr;
+}
+
 int LLLexer::getNextChar() {
-  char CurChar = *CurPtr++;
-  switch (CurChar) {
-  default: return (unsigned char)CurChar;
-  case 0:
-    // A nul character in the stream is either the end of the current buffer or
-    // a random nul in the file.  Disambiguate that here.
-    if (CurPtr-1 != CurBuf.end())
-      return 0;  // Just whitespace.
-
-    // Otherwise, return end of file.
-    --CurPtr;  // Another call to lex will return EOF again.
+  if (CurPtr == CurBuf.end())
     return EOF;
+  return *CurPtr++;
+}
+
+const char *LLLexer::skipNChars(unsigned N) {
+  while (N--)
+    getNextChar();
+  return CurPtr;
+}
+
+void LLLexer::advancePositionTo(const char *Ptr) {
+  while (CurPtr != Ptr) {
+    if (CurPtr > Ptr) {
+      --CurPtr;
+    } else
+      getNextChar();
   }
 }
 
@@ -215,13 +222,13 @@ lltok::Kind LLLexer::LexToken() {
     case '%': return LexPercent();
     case '"': return LexQuote();
     case '.':
-      if (const char *Ptr = isLabelTail(CurPtr)) {
-        CurPtr = Ptr;
+      if (const char *Ptr = getLabelTail(CurPtr)) {
+        advancePositionTo(Ptr);
         StrVal.assign(TokStart, CurPtr-1);
         return lltok::LabelStr;
       }
       if (CurPtr[0] == '.' && CurPtr[1] == '.') {
-        CurPtr += 2;
+        skipNChars(2);
         return lltok::dotdotdot;
       }
       return lltok::Error;
@@ -298,15 +305,15 @@ lltok::Kind LLLexer::LexAt() {
 }
 
 lltok::Kind LLLexer::LexDollar() {
-  if (const char *Ptr = isLabelTail(TokStart)) {
-    CurPtr = Ptr;
+  if (const char *Ptr = getLabelTail(TokStart)) {
+    advancePositionTo(Ptr);
     StrVal.assign(TokStart, CurPtr - 1);
     return lltok::LabelStr;
   }
 
   // Handle DollarStringConstant: $\"[^\"]*\"
   if (CurPtr[0] == '"') {
-    ++CurPtr;
+    getNextChar();
 
     while (true) {
       int CurChar = getNextChar();
@@ -358,11 +365,11 @@ bool LLLexer::ReadVarName() {
   if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
       CurPtr[0] == '-' || CurPtr[0] == '$' ||
       CurPtr[0] == '.' || CurPtr[0] == '_') {
-    ++CurPtr;
+    getNextChar();
     while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
            CurPtr[0] == '-' || CurPtr[0] == '$' ||
            CurPtr[0] == '.' || CurPtr[0] == '_')
-      ++CurPtr;
+      getNextChar();
 
     StrVal.assign(NameStart, CurPtr);
     return true;
@@ -376,7 +383,8 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) {
   if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
     return lltok::Error;
 
-  for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
+  for (getNextChar(); isdigit(static_cast<unsigned char>(CurPtr[0]));
+       getNextChar())
     /*empty*/;
 
   uint64_t Val = atoull(TokStart + 1, CurPtr);
@@ -389,7 +397,7 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) {
 lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
   // Handle StringConstant: \"[^\"]*\"
   if (CurPtr[0] == '"') {
-    ++CurPtr;
+    getNextChar();
 
     while (true) {
       int CurChar = getNextChar();
@@ -435,7 +443,7 @@ lltok::Kind LLLexer::LexQuote() {
     return kind;
 
   if (CurPtr[0] == ':') {
-    ++CurPtr;
+    getNextChar();
     if (StringRef(StrVal).contains(0)) {
       LexError("NUL character is not allowed in names");
       kind = lltok::Error;
@@ -455,11 +463,11 @@ lltok::Kind LLLexer::LexExclaim() {
   if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
       CurPtr[0] == '-' || CurPtr[0] == '$' ||
       CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
-    ++CurPtr;
+    getNextChar();
     while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
            CurPtr[0] == '-' || CurPtr[0] == '$' ||
            CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
-      ++CurPtr;
+      getNextChar();
 
     StrVal.assign(TokStart+1, CurPtr);   // Skip !
     UnEscapeLexed(StrVal);
@@ -495,7 +503,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar;
   const char *KeywordEnd = nullptr;
 
-  for (; isLabelChar(*CurPtr); ++CurPtr) {
+  for (; isLabelChar(*CurPtr); getNextChar()) {
     // If we decide this is an integer, remember the end of the sequence.
     if (!IntEnd && !isdigit(static_cast<unsigned char>(*CurPtr)))
       IntEnd = CurPtr;
@@ -507,15 +515,16 @@ lltok::Kind LLLexer::LexIdentifier() {
   // If we stopped due to a colon, unless we were directed to ignore it,
   // this really is a label.
   if (!IgnoreColonInIdentifiers && *CurPtr == ':') {
-    StrVal.assign(StartChar-1, CurPtr++);
+    StrVal.assign(StartChar - 1, CurPtr);
+    getNextChar();
     return lltok::LabelStr;
   }
 
   // Otherwise, this wasn't a label.  If this was valid as an integer type,
   // return it.
   if (!IntEnd) IntEnd = CurPtr;
   if (IntEnd != StartChar) {
-    CurPtr = IntEnd;
+    advancePositionTo(IntEnd);
     uint64_t NumBits = atoull(StartChar, CurPtr);
     if (NumBits < IntegerType::MIN_INT_BITS ||
         NumBits > IntegerType::MAX_INT_BITS) {
@@ -528,7 +537,7 @@ lltok::Kind LLLexer::LexIdentifier() {
 
   // Otherwise, this was a letter sequence.  See which keyword this is.
   if (!KeywordEnd) KeywordEnd = CurPtr;
-  CurPtr = KeywordEnd;
+  advancePositionTo(KeywordEnd);
   --StartChar;
   StringRef Keyword(StartChar, CurPtr - StartChar);
 
@@ -1042,7 +1051,7 @@ lltok::Kind LLLexer::LexIdentifier() {
     StringRef HexStr(TokStart + 3, len);
     if (!all_of(HexStr, isxdigit)) {
       // Bad token, return it as an error.
-      CurPtr = TokStart+3;
+      advancePositionTo(TokStart + 3);
       return lltok::Error;
     }
     APInt Tmp(bits, HexStr, 16);
@@ -1055,12 +1064,12 @@ lltok::Kind LLLexer::LexIdentifier() {
 
   // If this is "cc1234", return this as just "cc".
   if (TokStart[0] == 'c' && TokStart[1] == 'c') {
-    CurPtr = TokStart+2;
+    advancePositionTo(TokStart + 2);
     return lltok::kw_cc;
   }
 
   // Finally, if this isn't known, return an error.
-  CurPtr = TokStart+1;
+  advancePositionTo(TokStart + 1);
   return lltok::Error;
 }
 
@@ -1073,24 +1082,25 @@ lltok::Kind LLLexer::LexIdentifier() {
 ///    HexHalfConstant   0xH[0-9A-Fa-f]+
 ///    HexBFloatConstant 0xR[0-9A-Fa-f]+
 lltok::Kind LLLexer::Lex0x() {
-  CurPtr = TokStart + 2;
+  advancePositionTo(TokStart + 2);
 
   char Kind;
   if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H' ||
       CurPtr[0] == 'R') {
-    Kind = *CurPtr++;
+    Kind = *CurPtr;
+    getNextChar();
   } else {
     Kind = 'J';
   }
 
   if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) {
     // Bad token, return it as an error.
-    CurPtr = TokStart+1;
+    advancePositionTo(TokStart + 1);
     return lltok::Error;
   }
 
   while (isxdigit(static_cast<unsigned char>(CurPtr[0])))
-    ++CurPtr;
+    getNextChar();
 
   if (Kind == 'J') {
     // HexFPConstant - Floating point constant represented in IEEE format as a
@@ -1145,9 +1155,9 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
   if (!isdigit(static_cast<unsigned char>(TokStart[0])) &&
       !isdigit(static_cast<unsigned char>(CurPtr[0]))) {
     // Okay, this is not a number after the -, it's probably a label.
-    if (const char *End = isLabelTail(CurPtr)) {
+    if (const char *End = getLabelTail(CurPtr)) {
       StrVal.assign(TokStart, End-1);
-      CurPtr = End;
+      advancePositionTo(End);
       return lltok::LabelStr;
     }
 
@@ -1157,13 +1167,13 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
   // At this point, it is either a label, int or fp constant.
 
   // Skip digits, we have at least one.
-  for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
+  for (; isdigit(static_cast<unsigned char>(CurPtr[0])); getNextChar())
     /*empty*/;
 
   // Check if this is a fully-numeric label:
   if (isdigit(TokStart[0]) && CurPtr[0] == ':') {
     uint64_t Val = atoull(TokStart, CurPtr);
-    ++CurPtr; // Skip the colon.
+    getNextChar(); // Skip the colon.
     if ((unsigned)Val != Val)
       LexError("invalid value number (too large)");
     UIntVal = unsigned(Val);
@@ -1172,9 +1182,9 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
 
   // Check to see if this really is a string label, e.g. "-1:".
   if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
-    if (const char *End = isLabelTail(CurPtr)) {
+    if (const char *End = getLabelTail(CurPtr)) {
       StrVal.assign(TokStart, End-1);
-      CurPtr = End;
+      advancePositionTo(End);
       return lltok::LabelStr;
     }
   }
@@ -1188,17 +1198,19 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
     return lltok::APSInt;
   }
 
-  ++CurPtr;
+  getNextChar();
 
   // Skip over [0-9]*([eE][-+]?[0-9]+)?
-  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
+  while (isdigit(static_cast<unsigned char>(CurPtr[0])))
+    getNextChar();
 
   if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
     if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
         ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
           isdigit(static_cast<unsigned char>(CurPtr[2])))) {
-      CurPtr += 2;
-      while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
+      skipNChars(2);
+      while (isdigit(static_cast<unsigned char>(CurPtr[0])))
+        getNextChar();
     }
   }
 
@@ -1216,26 +1228,29 @@ lltok::Kind LLLexer::LexPositive() {
     return lltok::Error;
 
   // Skip digits.
-  for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
+  for (getNextChar(); isdigit(static_cast<unsigned char>(CurPtr[0]));
+       getNextChar())
     /*empty*/;
 
   // At this point, we need a '.'.
   if (CurPtr[0] != '.') {
-    CurPtr = TokStart+1;
+    advancePositionTo(TokStart + 1);
     return lltok::Error;
   }
 
-  ++CurPtr;
+  getNextChar();
 
   // Skip over [0-9]*([eE][-+]?[0-9]+)?
-  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
+  while (isdigit(static_cast<unsigned char>(CurPtr[0])))
+    getNextChar();
 
   if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
     if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
         ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
         isdigit(static_cast<unsigned char>(CurPtr[2])))) {
-      CurPtr += 2;
-      while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
+      skipNChars(2);
+      while (isdigit(static_cast<unsigned char>(CurPtr[0])))
+        getNextChar();
     }
   }