diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h index 501a7aefccd7f..d0d6f72c197da 100644 --- a/llvm/include/llvm/AsmParser/LLLexer.h +++ b/llvm/include/llvm/AsmParser/LLLexer.h @@ -93,7 +93,12 @@ namespace llvm { private: lltok::Kind LexToken(); + // Return closes pointer after `Ptr` that is an end of a label. + // Returns nullptr if `Ptr` doesn't point into a label. + const char *getLabelTail(const char *Ptr); int getNextChar(); + const char *skipNChars(unsigned N); + void advancePositionTo(const char *Ptr); void SkipLineComment(); bool SkipCComment(); lltok::Kind ReadString(lltok::Kind kind); diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 520c6a00a9c07..578ac851e38c5 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -155,15 +155,6 @@ static bool isLabelChar(char C) { C == '.' || C == '_'; } -/// isLabelTail - Return true if this pointer points to a valid end of a label. -static const char *isLabelTail(const char *CurPtr) { - while (true) { - if (CurPtr[0] == ':') return CurPtr+1; - if (!isLabelChar(CurPtr[0])) return nullptr; - ++CurPtr; - } -} - //===----------------------------------------------------------------------===// // Lexer definition. //===----------------------------------------------------------------------===// @@ -174,19 +165,35 @@ LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err, CurPtr = CurBuf.begin(); } +const char *LLLexer::getLabelTail(const char *Ptr) { + while (Ptr != CurBuf.end()) { + if (Ptr[0] == ':') + return Ptr + 1; + if (!isLabelChar(Ptr[0])) + return nullptr; + ++Ptr; + } + return nullptr; +} + int LLLexer::getNextChar() { - char CurChar = *CurPtr++; - switch (CurChar) { - default: return (unsigned char)CurChar; - case 0: - // A nul character in the stream is either the end of the current buffer or - // a random nul in the file. Disambiguate that here. - if (CurPtr-1 != CurBuf.end()) - return 0; // Just whitespace. - - // Otherwise, return end of file. - --CurPtr; // Another call to lex will return EOF again. + if (CurPtr == CurBuf.end()) return EOF; + return *CurPtr++; +} + +const char *LLLexer::skipNChars(unsigned N) { + while (N--) + getNextChar(); + return CurPtr; +} + +void LLLexer::advancePositionTo(const char *Ptr) { + while (CurPtr != Ptr) { + if (CurPtr > Ptr) { + --CurPtr; + } else + getNextChar(); } } @@ -215,13 +222,13 @@ lltok::Kind LLLexer::LexToken() { case '%': return LexPercent(); case '"': return LexQuote(); case '.': - if (const char *Ptr = isLabelTail(CurPtr)) { - CurPtr = Ptr; + if (const char *Ptr = getLabelTail(CurPtr)) { + advancePositionTo(Ptr); StrVal.assign(TokStart, CurPtr-1); return lltok::LabelStr; } if (CurPtr[0] == '.' && CurPtr[1] == '.') { - CurPtr += 2; + skipNChars(2); return lltok::dotdotdot; } return lltok::Error; @@ -298,15 +305,15 @@ lltok::Kind LLLexer::LexAt() { } lltok::Kind LLLexer::LexDollar() { - if (const char *Ptr = isLabelTail(TokStart)) { - CurPtr = Ptr; + if (const char *Ptr = getLabelTail(TokStart)) { + advancePositionTo(Ptr); StrVal.assign(TokStart, CurPtr - 1); return lltok::LabelStr; } // Handle DollarStringConstant: $\"[^\"]*\" if (CurPtr[0] == '"') { - ++CurPtr; + getNextChar(); while (true) { int CurChar = getNextChar(); @@ -358,11 +365,11 @@ bool LLLexer::ReadVarName() { if (isalpha(static_cast(CurPtr[0])) || CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_') { - ++CurPtr; + getNextChar(); while (isalnum(static_cast(CurPtr[0])) || CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_') - ++CurPtr; + getNextChar(); StrVal.assign(NameStart, CurPtr); return true; @@ -376,7 +383,8 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) { if (!isdigit(static_cast(CurPtr[0]))) return lltok::Error; - for (++CurPtr; isdigit(static_cast(CurPtr[0])); ++CurPtr) + for (getNextChar(); isdigit(static_cast(CurPtr[0])); + getNextChar()) /*empty*/; uint64_t Val = atoull(TokStart + 1, CurPtr); @@ -389,7 +397,7 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) { lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) { // Handle StringConstant: \"[^\"]*\" if (CurPtr[0] == '"') { - ++CurPtr; + getNextChar(); while (true) { int CurChar = getNextChar(); @@ -435,7 +443,7 @@ lltok::Kind LLLexer::LexQuote() { return kind; if (CurPtr[0] == ':') { - ++CurPtr; + getNextChar(); if (StringRef(StrVal).contains(0)) { LexError("NUL character is not allowed in names"); kind = lltok::Error; @@ -455,11 +463,11 @@ lltok::Kind LLLexer::LexExclaim() { if (isalpha(static_cast(CurPtr[0])) || CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') { - ++CurPtr; + getNextChar(); while (isalnum(static_cast(CurPtr[0])) || CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') - ++CurPtr; + getNextChar(); StrVal.assign(TokStart+1, CurPtr); // Skip ! UnEscapeLexed(StrVal); @@ -495,7 +503,7 @@ lltok::Kind LLLexer::LexIdentifier() { const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar; const char *KeywordEnd = nullptr; - for (; isLabelChar(*CurPtr); ++CurPtr) { + for (; isLabelChar(*CurPtr); getNextChar()) { // If we decide this is an integer, remember the end of the sequence. if (!IntEnd && !isdigit(static_cast(*CurPtr))) IntEnd = CurPtr; @@ -507,7 +515,8 @@ lltok::Kind LLLexer::LexIdentifier() { // If we stopped due to a colon, unless we were directed to ignore it, // this really is a label. if (!IgnoreColonInIdentifiers && *CurPtr == ':') { - StrVal.assign(StartChar-1, CurPtr++); + StrVal.assign(StartChar - 1, CurPtr); + getNextChar(); return lltok::LabelStr; } @@ -515,7 +524,7 @@ lltok::Kind LLLexer::LexIdentifier() { // return it. if (!IntEnd) IntEnd = CurPtr; if (IntEnd != StartChar) { - CurPtr = IntEnd; + advancePositionTo(IntEnd); uint64_t NumBits = atoull(StartChar, CurPtr); if (NumBits < IntegerType::MIN_INT_BITS || NumBits > IntegerType::MAX_INT_BITS) { @@ -528,7 +537,7 @@ lltok::Kind LLLexer::LexIdentifier() { // Otherwise, this was a letter sequence. See which keyword this is. if (!KeywordEnd) KeywordEnd = CurPtr; - CurPtr = KeywordEnd; + advancePositionTo(KeywordEnd); --StartChar; StringRef Keyword(StartChar, CurPtr - StartChar); @@ -1042,7 +1051,7 @@ lltok::Kind LLLexer::LexIdentifier() { StringRef HexStr(TokStart + 3, len); if (!all_of(HexStr, isxdigit)) { // Bad token, return it as an error. - CurPtr = TokStart+3; + advancePositionTo(TokStart + 3); return lltok::Error; } APInt Tmp(bits, HexStr, 16); @@ -1055,12 +1064,12 @@ lltok::Kind LLLexer::LexIdentifier() { // If this is "cc1234", return this as just "cc". if (TokStart[0] == 'c' && TokStart[1] == 'c') { - CurPtr = TokStart+2; + advancePositionTo(TokStart + 2); return lltok::kw_cc; } // Finally, if this isn't known, return an error. - CurPtr = TokStart+1; + advancePositionTo(TokStart + 1); return lltok::Error; } @@ -1073,24 +1082,25 @@ lltok::Kind LLLexer::LexIdentifier() { /// HexHalfConstant 0xH[0-9A-Fa-f]+ /// HexBFloatConstant 0xR[0-9A-Fa-f]+ lltok::Kind LLLexer::Lex0x() { - CurPtr = TokStart + 2; + advancePositionTo(TokStart + 2); char Kind; if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H' || CurPtr[0] == 'R') { - Kind = *CurPtr++; + Kind = *CurPtr; + getNextChar(); } else { Kind = 'J'; } if (!isxdigit(static_cast(CurPtr[0]))) { // Bad token, return it as an error. - CurPtr = TokStart+1; + advancePositionTo(TokStart + 1); return lltok::Error; } while (isxdigit(static_cast(CurPtr[0]))) - ++CurPtr; + getNextChar(); if (Kind == 'J') { // HexFPConstant - Floating point constant represented in IEEE format as a @@ -1145,9 +1155,9 @@ lltok::Kind LLLexer::LexDigitOrNegative() { if (!isdigit(static_cast(TokStart[0])) && !isdigit(static_cast(CurPtr[0]))) { // Okay, this is not a number after the -, it's probably a label. - if (const char *End = isLabelTail(CurPtr)) { + if (const char *End = getLabelTail(CurPtr)) { StrVal.assign(TokStart, End-1); - CurPtr = End; + advancePositionTo(End); return lltok::LabelStr; } @@ -1157,13 +1167,13 @@ lltok::Kind LLLexer::LexDigitOrNegative() { // At this point, it is either a label, int or fp constant. // Skip digits, we have at least one. - for (; isdigit(static_cast(CurPtr[0])); ++CurPtr) + for (; isdigit(static_cast(CurPtr[0])); getNextChar()) /*empty*/; // Check if this is a fully-numeric label: if (isdigit(TokStart[0]) && CurPtr[0] == ':') { uint64_t Val = atoull(TokStart, CurPtr); - ++CurPtr; // Skip the colon. + getNextChar(); // Skip the colon. if ((unsigned)Val != Val) LexError("invalid value number (too large)"); UIntVal = unsigned(Val); @@ -1172,9 +1182,9 @@ lltok::Kind LLLexer::LexDigitOrNegative() { // Check to see if this really is a string label, e.g. "-1:". if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') { - if (const char *End = isLabelTail(CurPtr)) { + if (const char *End = getLabelTail(CurPtr)) { StrVal.assign(TokStart, End-1); - CurPtr = End; + advancePositionTo(End); return lltok::LabelStr; } } @@ -1188,17 +1198,19 @@ lltok::Kind LLLexer::LexDigitOrNegative() { return lltok::APSInt; } - ++CurPtr; + getNextChar(); // Skip over [0-9]*([eE][-+]?[0-9]+)? - while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; + while (isdigit(static_cast(CurPtr[0]))) + getNextChar(); if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { if (isdigit(static_cast(CurPtr[1])) || ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(static_cast(CurPtr[2])))) { - CurPtr += 2; - while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; + skipNChars(2); + while (isdigit(static_cast(CurPtr[0]))) + getNextChar(); } } @@ -1216,26 +1228,29 @@ lltok::Kind LLLexer::LexPositive() { return lltok::Error; // Skip digits. - for (++CurPtr; isdigit(static_cast(CurPtr[0])); ++CurPtr) + for (getNextChar(); isdigit(static_cast(CurPtr[0])); + getNextChar()) /*empty*/; // At this point, we need a '.'. if (CurPtr[0] != '.') { - CurPtr = TokStart+1; + advancePositionTo(TokStart + 1); return lltok::Error; } - ++CurPtr; + getNextChar(); // Skip over [0-9]*([eE][-+]?[0-9]+)? - while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; + while (isdigit(static_cast(CurPtr[0]))) + getNextChar(); if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { if (isdigit(static_cast(CurPtr[1])) || ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(static_cast(CurPtr[2])))) { - CurPtr += 2; - while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; + skipNChars(2); + while (isdigit(static_cast(CurPtr[0]))) + getNextChar(); } }