CIRCT 20.0.0git
Loading...
Searching...
No Matches
PrettyPrinter.cpp
Go to the documentation of this file.
1//===- PrettyPrinter.cpp - Pretty printing --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements a pretty-printer.
10// "PrettyPrinting", Derek C. Oppen, 1980.
11// https://dx.doi.org/10.1145/357114.357115
12//
13// This was selected as it is linear in number of tokens O(n) and requires
14// memory O(linewidth).
15//
16// This has been adjusted from the paper:
17// * Deque for tokens instead of ringbuffer + left/right cursors.
18// This is simpler to reason about and allows us to easily grow the buffer
19// to accommodate longer widths when needed (and not reserve 3*linewidth).
20// Since scanStack references buffered tokens by index, we track an offset
21// that we increase when dropping off the front.
22// When the scan stack is cleared the buffer is reset, including this offset.
23// * Indentation tracked from left not relative to margin (linewidth).
24// * Indentation emitted lazily, avoid trailing whitespace.
25// * Group indentation styles: Visual and Block, set on 'begin' tokens.
26// "Visual" is the style in the paper, offset relative to current column.
27// "Block" is relative to current base indentation.
28// * Break: Add "Neverbreak": acts like a break re:sizing previous range,
29// but will never be broken. Useful for adding content to end of line
30// that may go over margin but should not influence layout.
31// * Begin: Add "Never" breaking style, for forcing no breaks including
32// within nested groups. Use sparingly. It is an error to insert
33// a newline (Break with spaces==kInfinity) within such a group.
34// * If leftTotal grows too large, "rebase" our datastructures by
35// walking the tokens with pending sizes (scanStack) and adjusting
36// them by `leftTotal - 1`. Also reset tokenOffset while visiting.
37// This is mostly needed due to use of tokens/groups that 'never' break
38// which can greatly increase times between `clear()`.
39// * Optionally, minimum amount of space is granted regardless of indentation.
40// To avoid forcing expressions against the line limit, never try to print
41// an expression in, say, 2 columns, as this is unlikely to produce good
42// output.
43// (TODO)
44//
45// There are many pretty-printing implementations based on this paper,
46// and research literature is rich with functional formulations based originally
47// on this algorithm.
48//
49// Implementations of note that have interesting modifications for their
50// languages and modernization of the paper's algorithm:
51// * prettyplease / rustc_ast_pretty
52// Pretty-printers for rust, the first being useful for rustfmt-like output.
53// These have largely the same code and were based on one another.
54// https://github.com/dtolnay/prettyplease
55// https://github.com/rust-lang/rust/tree/master/compiler/rustc_ast_pretty
56// This is closest to the paper's algorithm with modernizations,
57// and most of the initial tweaks have also been implemented here (thanks!).
58// * swift-format: https://github.com/apple/swift-format/
59//
60// If we want fancier output or need to handle more complicated constructs,
61// both are good references for lessons and ideas.
62//
63// FWIW, at time of writing these have compatible licensing (Apache 2.0).
64//
65//===----------------------------------------------------------------------===//
66
68#include "llvm/ADT/TypeSwitch.h"
69#include "llvm/Support/Casting.h"
70#include "llvm/Support/raw_ostream.h"
71
72namespace circt {
73namespace pretty {
74
75/// Destructor, anchor.
77
78/// Add token for printing. In Oppen, this is "scan".
80 // Add token to tokens, and add its index to scanStack.
81 auto addScanToken = [&](auto offset) {
82 auto right = tokenOffset + tokens.size();
83 scanStack.push_back(right);
84 tokens.push_back({t, offset});
85 };
86 llvm::TypeSwitch<Token *>(&t)
87 .Case([&](StringToken *s) {
88 // If nothing on stack, directly print
89 FormattedToken f{t, (int32_t)s->text().size()};
90 // Empty string token isn't /wrong/ but can have unintended effect.
91 assert(!s->text().empty() && "empty string token");
92 if (scanStack.empty())
93 return print(f);
94 tokens.push_back(f);
95 rightTotal += f.size;
96 assert(rightTotal > 0);
98 })
99 .Case([&](BreakToken *b) {
100 if (scanStack.empty())
101 clear();
102 else
103 checkStack();
104 addScanToken(-rightTotal);
105 rightTotal += b->spaces();
106 assert(rightTotal > 0);
107 })
108 .Case([&](BeginToken *b) {
109 if (scanStack.empty())
110 clear();
111 addScanToken(-rightTotal);
112 })
113 .Case([&](EndToken *end) {
114 if (scanStack.empty())
115 return print({t, 0});
116 addScanToken(-1);
117 })
118 .Case([&](CallbackToken *c) {
119 // Callbacktoken must be associated with a listener, it doesn't have any
120 // meaning without it.
122 if (scanStack.empty())
123 return print({t, 0});
124 tokens.push_back({t, 0});
125 });
127}
128
130 // Check for too-large totals, reset.
131 // This can happen if we have an open group and emit
132 // many tokens, especially newlines which have artificial size.
133 if (tokens.empty())
134 return;
135 assert(leftTotal >= 0);
136 assert(rightTotal >= 0);
137 if (uint32_t(leftTotal) > rebaseThreshold) {
138 // Plan: reset leftTotal to '1', adjust all accordingly.
139 auto adjust = leftTotal - 1;
140 for (auto &scanIndex : scanStack) {
141 assert(scanIndex >= tokenOffset);
142 auto &t = tokens[scanIndex - tokenOffset];
143 if (isa<BreakToken, BeginToken>(&t.token)) {
144 if (t.size < 0) {
145 assert(t.size + adjust < 0);
146 t.size += adjust;
147 }
148 }
149 // While walking, reset tokenOffset too.
150 scanIndex -= tokenOffset;
151 }
152 leftTotal -= adjust;
153 rightTotal -= adjust;
154 tokenOffset = 0;
155 }
156}
157
159 if (!scanStack.empty()) {
160 checkStack();
161 advanceLeft();
162 }
163 assert(scanStack.empty() && "unclosed groups at EOF");
164 if (scanStack.empty())
165 clear();
166}
167
169 assert(scanStack.empty() && "clearing tokens while still on scan stack");
170 assert(tokens.empty());
171 leftTotal = rightTotal = 1;
172 tokens.clear();
173 tokenOffset = 0;
174 if (listener && !donotClear)
175 listener->clear();
176}
177
178/// Break encountered, set sizes of begin/breaks in scanStack that we now know.
180 unsigned depth = 0;
181 while (!scanStack.empty()) {
182 auto x = scanStack.back();
183 assert(x >= tokenOffset && tokens.size() + tokenOffset > x);
184 auto &t = tokens[x - tokenOffset];
185 if (llvm::isa<BeginToken>(&t.token)) {
186 if (depth == 0)
187 break;
188 scanStack.pop_back();
189 t.size += rightTotal;
190 --depth;
191 } else if (llvm::isa<EndToken>(&t.token)) {
192 scanStack.pop_back();
193 t.size = 1;
194 ++depth;
195 } else {
196 scanStack.pop_back();
197 t.size += rightTotal;
198 if (depth == 0)
199 break;
200 }
201 }
202}
203
204/// Check if there are enough tokens to hit width, if so print.
205/// If scan size is wider than line, it's infinity.
207 // While buffer needs more than 1 line to print, print and consume.
208 assert(!tokens.empty());
209 assert(leftTotal >= 0);
210 assert(rightTotal >= 0);
211 while (rightTotal - leftTotal > space && !tokens.empty()) {
212
213 // Ran out of space, set size to infinity and take off scan stack.
214 // No need to keep track as we know enough to know this won't fit.
215 if (!scanStack.empty() && tokenOffset == scanStack.front()) {
216 tokens.front().size = kInfinity;
217 scanStack.pop_front();
218 }
219 advanceLeft();
220 }
221}
222
223/// Print out tokens we know sizes for, and drop from token buffer.
225 assert(!tokens.empty());
226
227 while (!tokens.empty() && tokens.front().size >= 0) {
228 const auto &f = tokens.front();
229 print(f);
230 leftTotal +=
231 llvm::TypeSwitch<const Token *, int32_t>(&f.token)
232 .Case([&](const BreakToken *b) { return b->spaces(); })
233 .Case([&](const StringToken *s) { return s->text().size(); })
234 .Default([](const auto *) { return 0; });
235 tokens.pop_front();
236 ++tokenOffset;
237 }
238}
239
240/// Compute indentation w/o overflow, clamp to [0,maxStartingIndent].
241/// Output looks better if we don't stop indenting entirely at target width,
242/// but don't do this indefinitely.
243static uint32_t computeNewIndent(ssize_t newIndent, int32_t offset,
244 uint32_t maxStartingIndent) {
245 return std::clamp<ssize_t>(newIndent + offset, 0, maxStartingIndent);
246}
247
248/// Print a token, maintaining printStack for context.
250 llvm::TypeSwitch<const Token *>(&f.token)
251 .Case([&](const StringToken *s) {
252 space -= f.size;
253 os.indent(pendingIndentation);
255 os << s->text();
256 })
257 .Case([&](const BreakToken *b) {
258 auto &frame = getPrintFrame();
259 assert((b->spaces() != kInfinity || alwaysFits == 0) &&
260 "newline inside never group");
261 bool fits =
262 (alwaysFits > 0) || b->neverbreak() ||
263 frame.breaks == PrintBreaks::Fits ||
264 (frame.breaks == PrintBreaks::Inconsistent && f.size <= space);
265 if (fits) {
266 space -= b->spaces();
268 } else {
269 os << "\n";
273 }
274 })
275 .Case([&](const BeginToken *b) {
276 if (b->breaks() == Breaks::Never) {
277 printStack.push_back({0, PrintBreaks::AlwaysFits});
278 ++alwaysFits;
279 } else if (f.size > space && alwaysFits == 0) {
280 auto breaks = b->breaks() == Breaks::Consistent
283 ssize_t newIndent = indent;
284 if (b->style() == IndentStyle::Visual)
285 newIndent = ssize_t{margin} - space;
287 printStack.push_back({indent, breaks});
288 } else {
289 printStack.push_back({0, PrintBreaks::Fits});
290 }
291 })
292 .Case([&](const EndToken *) {
293 assert(!printStack.empty() && "more ends than begins?");
294 // Try to tolerate this when assertions are disabled.
295 if (printStack.empty())
296 return;
298 --alwaysFits;
299 printStack.pop_back();
300 auto &frame = getPrintFrame();
301 if (frame.breaks != PrintBreaks::Fits &&
302 frame.breaks != PrintBreaks::AlwaysFits)
303 indent = frame.offset;
304 })
305 .Case([&](const CallbackToken *c) {
306 if (pendingIndentation) {
307 // This is necessary to get the correct location on the stream for the
308 // callback invocation.
309 os.indent(pendingIndentation);
311 }
312 listener->print();
313 });
314}
315} // end namespace pretty
316} // end namespace circt
assert(baseType &&"element must be base type")
SmallVector< PrintEntry > printStack
Stack of printing contexts (indentation + breaking behavior).
void rebaseIfNeeded()
Reset leftTotal and tokenOffset, rebase size data and scanStack indices.
void add(Token t)
Add token for printing. In Oppen, this is "scan".
bool donotClear
Flag to identify a state when the clear cannot be called.
std::deque< FormattedToken > tokens
Unprinted tokens, combination of 'token' and 'size' in Oppen.
Listener * listener
Hook for Token storage events.
uint32_t indent
Current indentation level.
uint32_t tokenOffset
index of first token, for resolving scanStack entries.
const uint32_t maxStartingIndent
Maximum starting indentation level (default=kInfinity/4).
int32_t space
Characters left on this line.
static constexpr uint32_t kInfinity
llvm::raw_ostream & os
Output stream.
static constexpr decltype(leftTotal) rebaseThreshold
Threshold for walking scan state and "rebasing" totals/offsets.
void checkStack()
Break encountered, set sizes of begin/breaks in scanStack we now know.
void print(const FormattedToken &f)
Print a token, maintaining printStack for context.
const uint32_t margin
Target line width.
uint32_t pendingIndentation
Whitespace to print before next, tracked to avoid trailing whitespace.
int32_t leftTotal
Sizes: printed, enqueued.
std::deque< uint32_t > scanStack
Stack of begin/break tokens, adjust by tokenOffset to index into tokens.
void clear()
Clear token buffer, scanStack must be empty.
void advanceLeft()
Print out tokens we know sizes for, and drop from token buffer.
uint32_t alwaysFits
Number of "AlwaysFits" on print stack.
void checkStream()
Check if there's enough tokens to hit width, if so print.
auto & getPrintFrame()
Get current printing frame.
static uint32_t computeNewIndent(ssize_t newIndent, int32_t offset, uint32_t maxStartingIndent)
Compute indentation w/o overflow, clamp to [0,maxStartingIndent].
The InstanceGraph op interface, see InstanceGraphInterface.td for more details.
Format token with tracked size.
virtual void clear()
No tokens referencing external memory are present.
virtual ~Listener()
Destructor, anchor.
virtual void print()
Listener for print event.