CIRCT  19.0.0git
PrettyPrinter.cpp
Go to the documentation of this file.
1 //===- PrettyPrinter.cpp - Pretty printing --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements a pretty-printer.
10 // "PrettyPrinting", Derek C. Oppen, 1980.
11 // https://dx.doi.org/10.1145/357114.357115
12 //
13 // This was selected as it is linear in number of tokens O(n) and requires
14 // memory O(linewidth).
15 //
16 // This has been adjusted from the paper:
17 // * Deque for tokens instead of ringbuffer + left/right cursors.
18 // This is simpler to reason about and allows us to easily grow the buffer
19 // to accommodate longer widths when needed (and not reserve 3*linewidth).
20 // Since scanStack references buffered tokens by index, we track an offset
21 // that we increase when dropping off the front.
22 // When the scan stack is cleared the buffer is reset, including this offset.
23 // * Indentation tracked from left not relative to margin (linewidth).
24 // * Indentation emitted lazily, avoid trailing whitespace.
25 // * Group indentation styles: Visual and Block, set on 'begin' tokens.
26 // "Visual" is the style in the paper, offset relative to current column.
27 // "Block" is relative to current base indentation.
28 // * Break: Add "Neverbreak": acts like a break re:sizing previous range,
29 // but will never be broken. Useful for adding content to end of line
30 // that may go over margin but should not influence layout.
31 // * Begin: Add "Never" breaking style, for forcing no breaks including
32 // within nested groups. Use sparingly. It is an error to insert
33 // a newline (Break with spaces==kInfinity) within such a group.
34 // * If leftTotal grows too large, "rebase" our datastructures by
35 // walking the tokens with pending sizes (scanStack) and adjusting
36 // them by `leftTotal - 1`. Also reset tokenOffset while visiting.
37 // This is mostly needed due to use of tokens/groups that 'never' break
38 // which can greatly increase times between `clear()`.
39 // * Optionally, minimum amount of space is granted regardless of indentation.
40 // To avoid forcing expressions against the line limit, never try to print
41 // an expression in, say, 2 columns, as this is unlikely to produce good
42 // output.
43 // (TODO)
44 //
45 // There are many pretty-printing implementations based on this paper,
46 // and research literature is rich with functional formulations based originally
47 // on this algorithm.
48 //
49 // Implementations of note that have interesting modifications for their
50 // languages and modernization of the paper's algorithm:
51 // * prettyplease / rustc_ast_pretty
52 // Pretty-printers for rust, the first being useful for rustfmt-like output.
53 // These have largely the same code and were based on one another.
54 // https://github.com/dtolnay/prettyplease
55 // https://github.com/rust-lang/rust/tree/master/compiler/rustc_ast_pretty
56 // This is closest to the paper's algorithm with modernizations,
57 // and most of the initial tweaks have also been implemented here (thanks!).
58 // * swift-format: https://github.com/apple/swift-format/
59 //
60 // If we want fancier output or need to handle more complicated constructs,
61 // both are good references for lessons and ideas.
62 //
63 // FWIW, at time of writing these have compatible licensing (Apache 2.0).
64 //
65 //===----------------------------------------------------------------------===//
66 
68 #include "llvm/ADT/TypeSwitch.h"
69 #include "llvm/Support/Casting.h"
70 #include "llvm/Support/raw_ostream.h"
71 
72 namespace circt {
73 namespace pretty {
74 
75 /// Destructor, anchor.
77 
78 /// Add token for printing. In Oppen, this is "scan".
80  // Add token to tokens, and add its index to scanStack.
81  auto addScanToken = [&](auto offset) {
82  auto right = tokenOffset + tokens.size();
83  scanStack.push_back(right);
84  tokens.push_back({t, offset});
85  };
86  llvm::TypeSwitch<Token *>(&t)
87  .Case([&](StringToken *s) {
88  // If nothing on stack, directly print
89  FormattedToken f{t, (int32_t)s->text().size()};
90  // Empty string token isn't /wrong/ but can have unintended effect.
91  assert(!s->text().empty() && "empty string token");
92  if (scanStack.empty())
93  return print(f);
94  tokens.push_back(f);
95  rightTotal += f.size;
96  assert(rightTotal > 0);
97  checkStream();
98  })
99  .Case([&](BreakToken *b) {
100  if (scanStack.empty())
101  clear();
102  else
103  checkStack();
104  addScanToken(-rightTotal);
105  rightTotal += b->spaces();
106  assert(rightTotal > 0);
107  })
108  .Case([&](BeginToken *b) {
109  if (scanStack.empty())
110  clear();
111  addScanToken(-rightTotal);
112  })
113  .Case([&](EndToken *end) {
114  if (scanStack.empty())
115  return print({t, 0});
116  addScanToken(-1);
117  })
118  .Case([&](CallbackToken *c) {
119  // Callbacktoken must be associated with a listener, it doesn't have any
120  // meaning without it.
121  assert(listener);
122  if (scanStack.empty())
123  return print({t, 0});
124  tokens.push_back({t, 0});
125  });
126  rebaseIfNeeded();
127 }
128 
130  // Check for too-large totals, reset.
131  // This can happen if we have an open group and emit
132  // many tokens, especially newlines which have artificial size.
133  if (tokens.empty())
134  return;
135  assert(leftTotal >= 0);
136  assert(rightTotal >= 0);
137  if (uint32_t(leftTotal) > rebaseThreshold) {
138  // Plan: reset leftTotal to '1', adjust all accordingly.
139  auto adjust = leftTotal - 1;
140  for (auto &scanIndex : scanStack) {
141  assert(scanIndex >= tokenOffset);
142  auto &t = tokens[scanIndex - tokenOffset];
143  if (isa<BreakToken, BeginToken>(&t.token)) {
144  if (t.size < 0) {
145  assert(t.size + adjust < 0);
146  t.size += adjust;
147  }
148  }
149  // While walking, reset tokenOffset too.
150  scanIndex -= tokenOffset;
151  }
152  leftTotal -= adjust;
153  rightTotal -= adjust;
154  tokenOffset = 0;
155  }
156 }
157 
159  if (!scanStack.empty()) {
160  checkStack();
161  advanceLeft();
162  }
163  assert(scanStack.empty() && "unclosed groups at EOF");
164  if (scanStack.empty())
165  clear();
166 }
167 
169  assert(scanStack.empty() && "clearing tokens while still on scan stack");
170  assert(tokens.empty());
171  leftTotal = rightTotal = 1;
172  tokens.clear();
173  tokenOffset = 0;
174  if (listener && !donotClear)
175  listener->clear();
176 }
177 
178 /// Break encountered, set sizes of begin/breaks in scanStack that we now know.
180  unsigned depth = 0;
181  while (!scanStack.empty()) {
182  auto x = scanStack.back();
183  assert(x >= tokenOffset && tokens.size() + tokenOffset > x);
184  auto &t = tokens[x - tokenOffset];
185  if (llvm::isa<BeginToken>(&t.token)) {
186  if (depth == 0)
187  break;
188  scanStack.pop_back();
189  t.size += rightTotal;
190  --depth;
191  } else if (llvm::isa<EndToken>(&t.token)) {
192  scanStack.pop_back();
193  t.size = 1;
194  ++depth;
195  } else {
196  scanStack.pop_back();
197  t.size += rightTotal;
198  if (depth == 0)
199  break;
200  }
201  }
202 }
203 
204 /// Check if there are enough tokens to hit width, if so print.
205 /// If scan size is wider than line, it's infinity.
207  // While buffer needs more than 1 line to print, print and consume.
208  assert(!tokens.empty());
209  assert(leftTotal >= 0);
210  assert(rightTotal >= 0);
211  while (rightTotal - leftTotal > space && !tokens.empty()) {
212 
213  // Ran out of space, set size to infinity and take off scan stack.
214  // No need to keep track as we know enough to know this won't fit.
215  if (!scanStack.empty() && tokenOffset == scanStack.front()) {
216  tokens.front().size = kInfinity;
217  scanStack.pop_front();
218  }
219  advanceLeft();
220  }
221 }
222 
223 /// Print out tokens we know sizes for, and drop from token buffer.
225  assert(!tokens.empty());
226 
227  while (!tokens.empty() && tokens.front().size >= 0) {
228  const auto &f = tokens.front();
229  print(f);
230  leftTotal +=
231  llvm::TypeSwitch<const Token *, int32_t>(&f.token)
232  .Case([&](const BreakToken *b) { return b->spaces(); })
233  .Case([&](const StringToken *s) { return s->text().size(); })
234  .Default([](const auto *) { return 0; });
235  tokens.pop_front();
236  ++tokenOffset;
237  }
238 }
239 
240 /// Compute indentation w/o overflow, clamp to [0,maxStartingIndent].
241 /// Output looks better if we don't stop indenting entirely at target width,
242 /// but don't do this indefinitely.
243 static uint32_t computeNewIndent(ssize_t newIndent, int32_t offset,
244  uint32_t maxStartingIndent) {
245  return std::clamp<ssize_t>(newIndent + offset, 0, maxStartingIndent);
246 }
247 
248 /// Print a token, maintaining printStack for context.
250  llvm::TypeSwitch<const Token *>(&f.token)
251  .Case([&](const StringToken *s) {
252  space -= f.size;
253  os.indent(pendingIndentation);
254  pendingIndentation = 0;
255  os << s->text();
256  })
257  .Case([&](const BreakToken *b) {
258  auto &frame = getPrintFrame();
259  assert((b->spaces() != kInfinity || alwaysFits == 0) &&
260  "newline inside never group");
261  bool fits =
262  (alwaysFits > 0) || b->neverbreak() ||
263  frame.breaks == PrintBreaks::Fits ||
264  (frame.breaks == PrintBreaks::Inconsistent && f.size <= space);
265  if (fits) {
266  space -= b->spaces();
267  pendingIndentation += b->spaces();
268  } else {
269  os << "\n";
273  }
274  })
275  .Case([&](const BeginToken *b) {
276  if (b->breaks() == Breaks::Never) {
277  printStack.push_back({0, PrintBreaks::AlwaysFits});
278  ++alwaysFits;
279  } else if (f.size > space && alwaysFits == 0) {
280  auto breaks = b->breaks() == Breaks::Consistent
283  ssize_t newIndent = indent;
284  if (b->style() == IndentStyle::Visual)
285  newIndent = ssize_t{margin} - space;
286  indent = computeNewIndent(newIndent, b->offset(), maxStartingIndent);
287  printStack.push_back({indent, breaks});
288  } else {
289  printStack.push_back({0, PrintBreaks::Fits});
290  }
291  })
292  .Case([&](const EndToken *) {
293  assert(!printStack.empty() && "more ends than begins?");
294  // Try to tolerate this when assertions are disabled.
295  if (printStack.empty())
296  return;
297  if (getPrintFrame().breaks == PrintBreaks::AlwaysFits)
298  --alwaysFits;
299  printStack.pop_back();
300  auto &frame = getPrintFrame();
301  if (frame.breaks != PrintBreaks::Fits &&
302  frame.breaks != PrintBreaks::AlwaysFits)
303  indent = frame.offset;
304  })
305  .Case([&](const CallbackToken *c) {
306  if (pendingIndentation) {
307  // This is necessary to get the correct location on the stream for the
308  // callback invocation.
309  os.indent(pendingIndentation);
310  pendingIndentation = 0;
311  }
312  listener->print();
313  });
314 }
315 } // end namespace pretty
316 } // end namespace circt
assert(baseType &&"element must be base type")
SmallVector< PrintEntry > printStack
Stack of printing contexts (indentation + breaking behavior).
void rebaseIfNeeded()
Reset leftTotal and tokenOffset, rebase size data and scanStack indices.
void add(Token t)
Add token for printing. In Oppen, this is "scan".
bool donotClear
Flag to identify a state when the clear cannot be called.
std::deque< FormattedToken > tokens
Unprinted tokens, combination of 'token' and 'size' in Oppen.
Listener * listener
Hook for Token storage events.
uint32_t indent
Current indentation level.
uint32_t tokenOffset
index of first token, for resolving scanStack entries.
const uint32_t maxStartingIndent
Maximum starting indentation level (default=kInfinity/4).
int32_t space
Characters left on this line.
static constexpr uint32_t kInfinity
llvm::raw_ostream & os
Output stream.
static constexpr decltype(leftTotal) rebaseThreshold
Threshold for walking scan state and "rebasing" totals/offsets.
void checkStack()
Break encountered, set sizes of begin/breaks in scanStack we now know.
auto & getPrintFrame()
Get current printing frame.
void print(const FormattedToken &f)
Print a token, maintaining printStack for context.
const uint32_t margin
Target line width.
uint32_t pendingIndentation
Whitespace to print before next, tracked to avoid trailing whitespace.
int32_t leftTotal
Sizes: printed, enqueued.
std::deque< uint32_t > scanStack
Stack of begin/break tokens, adjust by tokenOffset to index into tokens.
void clear()
Clear token buffer, scanStack must be empty.
void advanceLeft()
Print out tokens we know sizes for, and drop from token buffer.
uint32_t alwaysFits
Number of "AlwaysFits" on print stack.
void checkStream()
Check if there's enough tokens to hit width, if so print.
static uint32_t computeNewIndent(ssize_t newIndent, int32_t offset, uint32_t maxStartingIndent)
Compute indentation w/o overflow, clamp to [0,maxStartingIndent].
The InstanceGraph op interface, see InstanceGraphInterface.td for more details.
Definition: DebugAnalysis.h:21
uint32_t spaces() const
Format token with tracked size.
virtual void clear()
No tokens referencing external memory are present.
virtual ~Listener()
Destructor, anchor.
virtual void print()
Listener for print event.
StringRef text() const