CIRCT  20.0.0git
FIRLexer.h
Go to the documentation of this file.
1 //===- FIRLexer.h - .fir lexer and token definitions ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Defines the a Lexer and Token interface for .fir files.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef FIRTOMLIR_FIRLEXER_H
14 #define FIRTOMLIR_FIRLEXER_H
15 
16 #include "circt/Support/LLVM.h"
17 #include "mlir/IR/BuiltinAttributes.h"
18 #include "llvm/Support/SourceMgr.h"
19 
20 namespace mlir {
21 class MLIRContext;
22 class Location;
23 } // namespace mlir
24 
25 namespace circt {
26 namespace firrtl {
27 
28 /// This represents a specific token for .fir files.
29 class FIRToken {
30 public:
31  enum Kind {
32 #define TOK_MARKER(NAME) NAME,
33 #define TOK_IDENTIFIER(NAME) NAME,
34 #define TOK_LITERAL(NAME) NAME,
35 #define TOK_PUNCTUATION(NAME, SPELLING) NAME,
36 #define TOK_KEYWORD(SPELLING) kw_##SPELLING,
37 #define TOK_LPKEYWORD(SPELLING) lp_##SPELLING,
38 #include "FIRTokenKinds.def"
39  };
40 
42 
43  // Return the bytes that make up this token.
44  StringRef getSpelling() const { return spelling; }
45 
46  // Token classification.
47  Kind getKind() const { return kind; }
48  bool is(Kind K) const { return kind == K; }
49 
50  bool isAny(Kind k1, Kind k2) const { return is(k1) || is(k2); }
51 
52  /// Return true if this token is one of the specified kinds.
53  template <typename... T>
54  bool isAny(Kind k1, Kind k2, Kind k3, T... others) const {
55  if (is(k1))
56  return true;
57  return isAny(k2, k3, others...);
58  }
59 
60  bool isNot(Kind k) const { return kind != k; }
61 
62  /// Return true if this token isn't one of the specified kinds.
63  template <typename... T>
64  bool isNot(Kind k1, Kind k2, T... others) const {
65  return !isAny(k1, k2, others...);
66  }
67 
68  /// Return true if this is one of the keyword token kinds (e.g. kw_wire).
69  bool isKeyword() const;
70 
71  /// Given a token containing a string literal, return its value, including
72  /// removing the quote characters and unescaping the contents of the string.
73  /// The lexer has already verified that this token is valid.
74  std::string getStringValue() const;
75  static std::string getStringValue(StringRef spelling);
76 
77  /// Given a token containing a verbatim string, return its value, including
78  /// removing the quote characters and unescaping the quotes of the string. The
79  /// lexer has already verified that this token is valid.
80  std::string getVerbatimStringValue() const;
81  static std::string getVerbatimStringValue(StringRef spelling);
82 
83  // Location processing.
84  llvm::SMLoc getLoc() const;
85  llvm::SMLoc getEndLoc() const;
86  llvm::SMRange getLocRange() const;
87 
88 private:
89  /// Discriminator that indicates the sort of token this is.
91 
92  /// A reference to the entire token contents; this is always a pointer into
93  /// a memory buffer owned by the source manager.
94  StringRef spelling;
95 };
96 
97 class FIRLexerCursor;
98 
99 /// This implements a lexer for .fir files.
100 class FIRLexer {
101 public:
102  FIRLexer(const llvm::SourceMgr &sourceMgr, mlir::MLIRContext *context);
103 
104  const llvm::SourceMgr &getSourceMgr() const { return sourceMgr; }
105 
106  /// Move to the next valid token.
108 
109  const FIRToken &getToken() const { return curToken; }
110 
111  mlir::Location translateLocation(llvm::SMLoc loc);
112 
113  /// Return the indentation level of the specified token or None if this token
114  /// is preceded by another token on the same line.
115  std::optional<unsigned> getIndentation(const FIRToken &tok) const;
116 
117  /// Get an opaque pointer into the lexer state that can be restored later.
118  FIRLexerCursor getCursor() const;
119 
120 private:
122 
123  // Helpers.
124  FIRToken formToken(FIRToken::Kind kind, const char *tokStart) {
125  return FIRToken(kind, StringRef(tokStart, curPtr - tokStart));
126  }
127 
128  FIRToken emitError(const char *loc, const Twine &message);
129 
130  // Lexer implementation methods.
131  FIRToken lexFileInfo(const char *tokStart);
132  FIRToken lexInlineAnnotation(const char *tokStart);
133  FIRToken lexIdentifierOrKeyword(const char *tokStart);
134  FIRToken lexNumber(const char *tokStart);
135  void skipComment();
136  FIRToken lexString(const char *tokStart, bool isVerbatim);
137 
138  const llvm::SourceMgr &sourceMgr;
139  const mlir::StringAttr bufferNameIdentifier;
140 
141  StringRef curBuffer;
142  const char *curPtr;
143 
144  /// This is the next token that hasn't been consumed yet.
146 
147  FIRLexer(const FIRLexer &) = delete;
148  void operator=(const FIRLexer &) = delete;
149  friend class FIRLexerCursor;
150 };
151 
152 /// This is the state captured for a lexer cursor.
154 public:
155  FIRLexerCursor(const FIRLexer &lexer)
156  : state(lexer.curPtr), curToken(lexer.getToken()) {}
157 
158  void restore(FIRLexer &lexer) {
159  lexer.curPtr = state;
160  lexer.curToken = curToken;
161  }
162 
163 private:
164  const char *state;
166 };
167 
169  return FIRLexerCursor(*this);
170 }
171 
172 } // namespace firrtl
173 } // namespace circt
174 
175 #endif // FIRTOMLIR_FIRLEXER_H
This is the state captured for a lexer cursor.
Definition: FIRLexer.h:153
FIRLexerCursor(const FIRLexer &lexer)
Definition: FIRLexer.h:155
void restore(FIRLexer &lexer)
Definition: FIRLexer.h:158
This implements a lexer for .fir files.
Definition: FIRLexer.h:100
FIRToken lexFileInfo(const char *tokStart)
Lex a file info specifier.
Definition: FIRLexer.cpp:319
void lexToken()
Move to the next valid token.
Definition: FIRLexer.h:107
FIRToken lexIdentifierOrKeyword(const char *tokStart)
Lex an identifier or keyword that starts with a letter.
Definition: FIRLexer.cpp:391
const llvm::SourceMgr & sourceMgr
Definition: FIRLexer.h:138
FIRToken curToken
This is the next token that hasn't been consumed yet.
Definition: FIRLexer.h:145
FIRToken formToken(FIRToken::Kind kind, const char *tokStart)
Definition: FIRLexer.h:124
const llvm::SourceMgr & getSourceMgr() const
Definition: FIRLexer.h:104
FIRToken lexNumber(const char *tokStart)
Lex a number literal.
Definition: FIRLexer.cpp:512
FIRToken lexString(const char *tokStart, bool isVerbatim)
StringLit ::= '"' UnquotedString? '"' VerbatimStringLit ::= '\'' UnquotedString? '\'' UnquotedString ...
Definition: FIRLexer.cpp:463
friend class FIRLexerCursor
Definition: FIRLexer.h:149
FIRLexer(const FIRLexer &)=delete
FIRLexerCursor getCursor() const
Get an opaque pointer into the lexer state that can be restored later.
Definition: FIRLexer.h:168
const char * curPtr
Definition: FIRLexer.h:142
std::optional< unsigned > getIndentation(const FIRToken &tok) const
Return the indentation level of the specified token or None if this token is preceded by another toke...
Definition: FIRLexer.cpp:185
void skipComment()
Skip a comment line, starting with a ';' and going to end of line.
Definition: FIRLexer.cpp:438
FIRToken emitError(const char *loc, const Twine &message)
Emit an error message and return a FIRToken::error token.
Definition: FIRLexer.cpp:179
FIRLexer(const llvm::SourceMgr &sourceMgr, mlir::MLIRContext *context)
mlir::Location translateLocation(llvm::SMLoc loc)
Encode the specified source location information into a Location object for attachment to the IR or e...
Definition: FIRLexer.cpp:170
void operator=(const FIRLexer &)=delete
FIRToken lexInlineAnnotation(const char *tokStart)
Lex a non-standard inline Annotation file.
Definition: FIRLexer.cpp:350
const FIRToken & getToken() const
Definition: FIRLexer.h:109
const mlir::StringAttr bufferNameIdentifier
Definition: FIRLexer.h:139
This represents a specific token for .fir files.
Definition: FIRLexer.h:29
bool isNot(Kind k) const
Definition: FIRLexer.h:60
std::string getVerbatimStringValue() const
Given a token containing a verbatim string, return its value, including removing the quote characters...
Definition: FIRLexer.cpp:117
StringRef getSpelling() const
Definition: FIRLexer.h:44
bool isNot(Kind k1, Kind k2, T... others) const
Return true if this token isn't one of the specified kinds.
Definition: FIRLexer.h:64
bool is(Kind K) const
Definition: FIRLexer.h:48
FIRToken(Kind kind, StringRef spelling)
Definition: FIRLexer.h:41
StringRef spelling
A reference to the entire token contents; this is always a pointer into a memory buffer owned by the ...
Definition: FIRLexer.h:94
llvm::SMRange getLocRange() const
Definition: FIRLexer.cpp:41
bool isAny(Kind k1, Kind k2, Kind k3, T... others) const
Return true if this token is one of the specified kinds.
Definition: FIRLexer.h:54
bool isAny(Kind k1, Kind k2) const
Definition: FIRLexer.h:50
std::string getStringValue() const
Given a token containing a string literal, return its value, including removing the quote characters ...
Definition: FIRLexer.cpp:58
Kind kind
Discriminator that indicates the sort of token this is.
Definition: FIRLexer.h:90
llvm::SMLoc getEndLoc() const
Definition: FIRLexer.cpp:37
Kind getKind() const
Definition: FIRLexer.h:47
llvm::SMLoc getLoc() const
Definition: FIRLexer.cpp:33
bool isKeyword() const
Return true if this is one of the keyword token kinds (e.g. kw_wire).
Definition: FIRLexer.cpp:44
The InstanceGraph op interface, see InstanceGraphInterface.td for more details.
Definition: DebugAnalysis.h:21