Skip to content

Commit 4a219bf

Browse files
committed
Fix a bug in the .mlir lexer, where a \0 character in a file is treated as a colon (due to an accidental fall through) instead of whitespace.
Summary: While here, simplify the lexer a bit by eliminating the unneeded 'operator' classification of certain sigils, they can just be treated as 'punctuation'. Reviewers: rriddle! Subscribers: mehdi_amini, rriddle, jpienaar, burmako, shauheen, antiagainst, nicolasvasilache, arpith-jacob, mgester, lucyrfox, liufengdb, Joonsoo, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D76647
1 parent f09f4b2 commit 4a219bf

File tree

5 files changed

+35
-40
lines changed

5 files changed

+35
-40
lines changed

mlir/lib/Parser/Lexer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@ Token Lexer::lexToken() {
8383
// marker that llvm::MemoryBuffer guarantees will be there.
8484
if (curPtr - 1 == curBuffer.end())
8585
return formToken(Token::eof, tokStart);
86+
continue;
8687

87-
LLVM_FALLTHROUGH;
8888
case ':':
8989
return formToken(Token::colon, tokStart);
9090
case ',':

mlir/lib/Parser/Token.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,6 @@ StringRef Token::getTokenSpelling(Kind kind) {
145145
#define TOK_PUNCTUATION(NAME, SPELLING) \
146146
case NAME: \
147147
return SPELLING;
148-
#define TOK_OPERATOR(NAME, SPELLING) \
149-
case NAME: \
150-
return SPELLING;
151148
#define TOK_KEYWORD(SPELLING) \
152149
case kw_##SPELLING: \
153150
return #SPELLING;

mlir/lib/Parser/Token.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ class Token {
2323
#define TOK_IDENTIFIER(NAME) NAME,
2424
#define TOK_LITERAL(NAME) NAME,
2525
#define TOK_PUNCTUATION(NAME, SPELLING) NAME,
26-
#define TOK_OPERATOR(NAME, SPELLING) NAME,
2726
#define TOK_KEYWORD(SPELLING) kw_##SPELLING,
2827
#include "TokenKinds.def"
2928
};
@@ -50,7 +49,8 @@ class Token {
5049
bool isNot(Kind k) const { return kind != k; }
5150

5251
/// Return true if this token isn't one of the specified kinds.
53-
template <typename... T> bool isNot(Kind k1, Kind k2, T... others) const {
52+
template <typename... T>
53+
bool isNot(Kind k1, Kind k2, T... others) const {
5454
return !isAny(k1, k2, others...);
5555
}
5656

mlir/lib/Parser/TokenKinds.def

Lines changed: 26 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@
1111
//
1212
//===----------------------------------------------------------------------===//
1313

14-
#if !defined(TOK_MARKER) && !defined(TOK_IDENTIFIER) && !defined(TOK_LITERAL)&&\
15-
!defined(TOK_PUNCTUATION) && !defined(TOK_OPERATOR) && !defined(TOK_KEYWORD)
16-
# error Must define one of the TOK_ macros.
14+
#if !defined(TOK_MARKER) && !defined(TOK_IDENTIFIER) && \
15+
!defined(TOK_LITERAL) && !defined(TOK_PUNCTUATION) && \
16+
!defined(TOK_KEYWORD)
17+
#error Must define one of the TOK_ macros.
1718
#endif
1819

1920
#ifndef TOK_MARKER
@@ -28,14 +29,10 @@
2829
#ifndef TOK_PUNCTUATION
2930
#define TOK_PUNCTUATION(NAME, SPELLING)
3031
#endif
31-
#ifndef TOK_OPERATOR
32-
#define TOK_OPERATOR(NAME, SPELLING)
33-
#endif
3432
#ifndef TOK_KEYWORD
3533
#define TOK_KEYWORD(SPELLING)
3634
#endif
3735

38-
3936
// Markers
4037
TOK_MARKER(eof)
4138
TOK_MARKER(error)
@@ -49,34 +46,30 @@ TOK_IDENTIFIER(caret_identifier) // ^foo
4946
TOK_IDENTIFIER(exclamation_identifier) // !foo
5047

5148
// Literals
52-
TOK_LITERAL(floatliteral) // 2.0
53-
TOK_LITERAL(integer) // 42
54-
TOK_LITERAL(string) // "foo"
55-
TOK_LITERAL(inttype) // i4, si8, ui16
49+
TOK_LITERAL(floatliteral) // 2.0
50+
TOK_LITERAL(integer) // 42
51+
TOK_LITERAL(string) // "foo"
52+
TOK_LITERAL(inttype) // i4, si8, ui16
5653

5754
// Punctuation.
58-
TOK_PUNCTUATION(arrow, "->")
59-
TOK_PUNCTUATION(at, "@")
60-
TOK_PUNCTUATION(colon, ":")
61-
TOK_PUNCTUATION(comma, ",")
62-
TOK_PUNCTUATION(question, "?")
63-
TOK_PUNCTUATION(l_paren, "(")
64-
TOK_PUNCTUATION(r_paren, ")")
65-
TOK_PUNCTUATION(l_brace, "{")
66-
TOK_PUNCTUATION(r_brace, "}")
67-
TOK_PUNCTUATION(l_square, "[")
68-
TOK_PUNCTUATION(r_square, "]")
69-
TOK_PUNCTUATION(less, "<")
70-
TOK_PUNCTUATION(greater, ">")
71-
TOK_PUNCTUATION(equal, "=")
72-
TOK_PUNCTUATION(ellipsis, "...")
73-
// TODO: More punctuation.
74-
75-
// Operators.
76-
TOK_OPERATOR(plus, "+")
77-
TOK_OPERATOR(minus, "-")
78-
TOK_OPERATOR(star, "*")
79-
// TODO: More operator tokens
55+
TOK_PUNCTUATION(arrow, "->")
56+
TOK_PUNCTUATION(at, "@")
57+
TOK_PUNCTUATION(colon, ":")
58+
TOK_PUNCTUATION(comma, ",")
59+
TOK_PUNCTUATION(ellipsis, "...")
60+
TOK_PUNCTUATION(equal, "=")
61+
TOK_PUNCTUATION(greater, ">")
62+
TOK_PUNCTUATION(l_brace, "{")
63+
TOK_PUNCTUATION(l_paren, "(")
64+
TOK_PUNCTUATION(l_square, "[")
65+
TOK_PUNCTUATION(less, "<")
66+
TOK_PUNCTUATION(minus, "-")
67+
TOK_PUNCTUATION(plus, "+")
68+
TOK_PUNCTUATION(question, "?")
69+
TOK_PUNCTUATION(r_brace, "}")
70+
TOK_PUNCTUATION(r_paren, ")")
71+
TOK_PUNCTUATION(r_square, "]")
72+
TOK_PUNCTUATION(star, "*")
8073

8174
// Keywords. These turn "foo" into Token::kw_foo enums.
8275

@@ -122,5 +115,4 @@ TOK_KEYWORD(vector)
122115
#undef TOK_IDENTIFIER
123116
#undef TOK_LITERAL
124117
#undef TOK_PUNCTUATION
125-
#undef TOK_OPERATOR
126118
#undef TOK_KEYWORD

mlir/test/IR/parser.mlir

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1225,3 +1225,9 @@ func @pretty_names() {
12251225
return
12261226
}
12271227

1228+
// CHECK-LABEL: func @zero_whitespace() {
1229+
// CHECK-NEXT: return
1230+
func @zero_whitespace() {
1231+
// This is a \0 byte.
1232+
return
1233+
}

0 commit comments

Comments
 (0)