CIRCT  20.0.0git
TemporalCodeMotionPass.cpp
Go to the documentation of this file.
1 //===- TemporalCodeMotionPass.cpp - Implement Temporal Code Motion Pass ---===//
2 //
3 // Implement Pass to move all signal drives in a unique exiting block per
4 // temporal region and coalesce drives to the same signal.
5 //
6 //===----------------------------------------------------------------------===//
7 
8 #include "TemporalRegions.h"
10 #include "circt/Dialect/HW/HWOps.h"
13 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
14 #include "mlir/IR/Dominance.h"
15 #include "mlir/IR/PatternMatch.h"
16 #include "mlir/IR/Region.h"
17 #include "mlir/Pass/Pass.h"
18 #include "mlir/Support/LLVM.h"
19 #include "mlir/Transforms/RegionUtils.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include <queue>
22 
23 namespace circt {
24 namespace llhd {
25 #define GEN_PASS_DEF_TEMPORALCODEMOTION
26 #include "circt/Dialect/LLHD/Transforms/Passes.h.inc"
27 } // namespace llhd
28 } // namespace circt
29 
30 using namespace circt;
31 using namespace mlir;
32 
33 /// Explore all paths from the 'driveBlock' to the 'dominator' block and
34 /// construct a boolean expression at the current insertion point of 'builder'
35 /// to represent all those paths.
36 static Value
37 getBranchDecisionsFromDominatorToTarget(OpBuilder &builder, Block *driveBlock,
38  Block *dominator,
39  DenseMap<Block *, Value> &mem) {
40  Location loc = driveBlock->getTerminator()->getLoc();
41  if (mem.count(driveBlock))
42  return mem[driveBlock];
43 
44  SmallVector<Block *> worklist;
45  worklist.push_back(driveBlock);
46 
47  while (!worklist.empty()) {
48  Block *curr = worklist.back();
49 
50  if (curr == dominator || curr->getPredecessors().empty()) {
51  if (!mem.count(curr))
52  mem[curr] = builder.create<hw::ConstantOp>(loc, APInt(1, 1));
53 
54  worklist.pop_back();
55  continue;
56  }
57 
58  bool addedSomething = false;
59  for (auto *predBlock : curr->getPredecessors()) {
60  if (!mem.count(predBlock)) {
61  worklist.push_back(predBlock);
62  addedSomething = true;
63  }
64  }
65 
66  if (addedSomething)
67  continue;
68 
69  Value runner = builder.create<hw::ConstantOp>(loc, APInt(1, 0));
70  for (auto *predBlock : curr->getPredecessors()) {
71  if (predBlock->getTerminator()->getNumSuccessors() != 1) {
72  auto condBr = cast<cf::CondBranchOp>(predBlock->getTerminator());
73  Value cond = condBr.getCondition();
74  if (condBr.getFalseDest() == curr) {
75  Value trueVal = builder.create<hw::ConstantOp>(loc, APInt(1, 1));
76  cond = builder.create<comb::XorOp>(loc, cond, trueVal);
77  }
78  Value next = builder.create<comb::AndOp>(loc, mem[predBlock], cond);
79  runner = builder.create<comb::OrOp>(loc, runner, next);
80  } else {
81  runner = builder.create<comb::OrOp>(loc, runner, mem[predBlock]);
82  }
83  }
84  mem[curr] = runner;
85  worklist.pop_back();
86  }
87 
88  return mem[driveBlock];
89 }
90 
91 /// More a 'llhd.drv' operation before the 'moveBefore' operation by adjusting
92 /// the 'enable' operand.
93 static void moveDriveOpBefore(llhd::DrvOp drvOp, Block *dominator,
94  Operation *moveBefore,
95  DenseMap<Block *, Value> &mem) {
96  OpBuilder builder(drvOp);
97  builder.setInsertionPoint(moveBefore);
98  Block *drvParentBlock = drvOp->getBlock();
99 
100  // Find sequence of branch decisions and add them as a sequence of
101  // instructions to the TR exiting block
102  Value finalValue = getBranchDecisionsFromDominatorToTarget(
103  builder, drvParentBlock, dominator, mem);
104 
105  if (drvOp.getEnable())
106  finalValue = builder.create<comb::AndOp>(drvOp.getLoc(), drvOp.getEnable(),
107  finalValue);
108 
109  drvOp.getEnableMutable().assign(finalValue);
110  drvOp->moveBefore(moveBefore);
111 }
112 
113 namespace {
114 struct TemporalCodeMotionPass
115  : public llhd::impl::TemporalCodeMotionBase<TemporalCodeMotionPass> {
116  void runOnOperation() override;
117  LogicalResult runOnProcess(llhd::ProcessOp procOp);
118 };
119 } // namespace
120 
121 void TemporalCodeMotionPass::runOnOperation() {
122  for (auto proc : getOperation().getOps<llhd::ProcessOp>())
123  (void)runOnProcess(proc); // Ignore processes that could not be lowered
124 }
125 
126 LogicalResult TemporalCodeMotionPass::runOnProcess(llhd::ProcessOp procOp) {
127  llhd::TemporalRegionAnalysis trAnalysis =
129  unsigned numTRs = trAnalysis.getNumTemporalRegions();
130 
131  // Only support processes with max. 2 temporal regions and one wait terminator
132  // as this is enough to represent flip-flops, registers, etc.
133  // NOTE: there always has to be either a wait or halt terminator in a process
134  // If the wait block creates the backwards edge, we only have one TR,
135  // otherwise we have 2 TRs
136  // NOTE: as the wait instruction needs to be on every path around the loop,
137  // it has to be the only exiting block of its TR
138  // NOTE: the other TR can either have only one exiting block, then we do not
139  // need to add an auxillary block, otherwise we have to add one
140  // NOTE: All drive operations have to be moved to the single exiting block of
141  // its TR. To do so, add the condition under which its block is reached from
142  // the TR entry block as a gating condition to the 'llhd.drv' operation
143  // NOTE: the entry blocks that are not part of the infinite loop do not count
144  // as TR and have TR number -1
145  // TODO: need to check that entry blocks that are note part of the loop to not
146  // have any instructions that have side effects that should not be allowed
147  // outside of the loop (drv, prb, ...)
148  // TODO: add support for more TRs and wait terminators (e.g., to represent
149  // FSMs)
150  if (numTRs > 2)
151  return failure();
152 
153  bool seenWait = false;
154  WalkResult walkResult = procOp.walk([&](llhd::WaitOp op) -> WalkResult {
155  if (seenWait)
156  return failure();
157 
158  // Check that the block containing the wait is the only exiting block of
159  // that TR
160  int trId = trAnalysis.getBlockTR(op->getBlock());
161  if (!trAnalysis.hasSingleExitBlock(trId))
162  return failure();
163 
164  seenWait = true;
165  return WalkResult::advance();
166  });
167  if (walkResult.wasInterrupted())
168  return failure();
169 
170  //===--------------------------------------------------------------------===//
171  // Create unique exit block per TR
172  //===--------------------------------------------------------------------===//
173 
174  // TODO: consider the case where a wait brances to itself
175  for (unsigned currTR = 0; currTR < numTRs; ++currTR) {
176  unsigned numTRSuccs = trAnalysis.getNumTRSuccessors(currTR);
177  (void)numTRSuccs;
178  // NOTE: Above error checks make this impossible to trigger, but the above
179  // are changed this one might have to be promoted to a proper error message.
180  assert((numTRSuccs == 1 ||
181  (numTRSuccs == 2 && trAnalysis.isOwnTRSuccessor(currTR))) &&
182  "only TRs with a single TR as possible successor are "
183  "supported for now.");
184 
185  if (trAnalysis.hasSingleExitBlock(currTR))
186  continue;
187 
188  // Get entry block of successor TR
189  Block *succTREntry =
190  trAnalysis.getTREntryBlock(*trAnalysis.getTRSuccessors(currTR).begin());
191 
192  // Create the auxillary block as we currently don't have a single exiting
193  // block and give it the same arguments as the entry block of the
194  // successor TR
195  Block *auxBlock = new Block();
196  auxBlock->addArguments(
197  succTREntry->getArgumentTypes(),
198  SmallVector<Location>(succTREntry->getNumArguments(), procOp.getLoc()));
199 
200  // Insert the auxillary block after the last block of the current TR
201  procOp.getBody().getBlocks().insertAfter(
202  Region::iterator(trAnalysis.getExitingBlocksInTR(currTR).back()),
203  auxBlock);
204 
205  // Let all current exit blocks branch to the auxillary block instead.
206  for (Block *exit : trAnalysis.getExitingBlocksInTR(currTR))
207  for (auto [i, succ] : llvm::enumerate(exit->getSuccessors()))
208  if (trAnalysis.getBlockTR(succ) != static_cast<int>(currTR))
209  exit->getTerminator()->setSuccessor(auxBlock, i);
210 
211  // Let the auxiallary block branch to the entry block of the successor
212  // temporal region entry block
213  OpBuilder b(procOp);
214  b.setInsertionPointToEnd(auxBlock);
215  b.create<cf::BranchOp>(procOp.getLoc(), succTREntry,
216  auxBlock->getArguments());
217  }
218 
219  //===--------------------------------------------------------------------===//
220  // Move drive instructions
221  //===--------------------------------------------------------------------===//
222 
223  DenseMap<Operation *, Block *> drvPos;
224 
225  // Force a new analysis as we have changed the CFG
226  trAnalysis = llhd::TemporalRegionAnalysis(procOp);
227  numTRs = trAnalysis.getNumTemporalRegions();
228  OpBuilder builder(procOp);
229 
230  for (unsigned currTR = 0; currTR < numTRs; ++currTR) {
231  DenseMap<Block *, Value> mem;
232 
233  // We ensured this in the previous phase above.
234  assert(trAnalysis.getExitingBlocksInTR(currTR).size() == 1);
235 
236  Block *exitingBlock = trAnalysis.getExitingBlocksInTR(currTR)[0];
237  Block *entryBlock = trAnalysis.getTREntryBlock(currTR);
238 
239  DominanceInfo dom(procOp);
240  Block *dominator = exitingBlock;
241 
242  // Collect all 'llhd.drv' operations in the process and compute their common
243  // dominator block.
244  procOp.walk([&](llhd::DrvOp op) {
245  if (trAnalysis.getBlockTR(op.getOperation()->getBlock()) ==
246  static_cast<int>(currTR)) {
247  Block *parentBlock = op.getOperation()->getBlock();
248  drvPos[op] = parentBlock;
249  dominator = dom.findNearestCommonDominator(dominator, parentBlock);
250  }
251  });
252 
253  // Set insertion point before first 'llhd.drv' op in the exiting block
254  Operation *moveBefore = exitingBlock->getTerminator();
255  exitingBlock->walk([&](llhd::DrvOp op) { moveBefore = op; });
256 
257  assert(dominator &&
258  "could not find nearest common dominator for TR exiting "
259  "block and the block containing drv");
260 
261  // If the dominator isn't already a TR entry block, set it to the nearest
262  // dominating TR entry block.
263  if (trAnalysis.getBlockTR(dominator) != static_cast<int>(currTR))
264  dominator = trAnalysis.getTREntryBlock(currTR);
265 
266  std::queue<Block *> workQueue;
267  SmallPtrSet<Block *, 32> workDone;
268 
269  if (entryBlock != exitingBlock)
270  workQueue.push(entryBlock);
271 
272  while (!workQueue.empty()) {
273  Block *block = workQueue.front();
274  workQueue.pop();
275  workDone.insert(block);
276 
277  builder.setInsertionPoint(moveBefore);
278  SmallVector<llhd::DrvOp> drives(block->getOps<llhd::DrvOp>());
279  for (auto drive : drives)
280  moveDriveOpBefore(drive, dominator, moveBefore, mem);
281 
282  for (Block *succ : block->getSuccessors()) {
283  if (succ == exitingBlock ||
284  trAnalysis.getBlockTR(succ) != static_cast<int>(currTR))
285  continue;
286 
287  if (llvm::all_of(succ->getPredecessors(), [&](Block *block) {
288  return workDone.contains(block);
289  }))
290  workQueue.push(succ);
291  }
292  }
293 
294  // Merge entry and exit block of each TR, remove all other blocks
295  if (entryBlock != exitingBlock) {
296  entryBlock->getTerminator()->erase();
297  entryBlock->getOperations().splice(entryBlock->end(),
298  exitingBlock->getOperations());
299  }
300  }
301 
302  IRRewriter rewriter(procOp);
303  (void)mlir::eraseUnreachableBlocks(rewriter, procOp->getRegions());
304 
305  //===--------------------------------------------------------------------===//
306  // Coalesce multiple drives to the same signal
307  //===--------------------------------------------------------------------===//
308 
309  trAnalysis = llhd::TemporalRegionAnalysis(procOp);
310  DominanceInfo dom(procOp);
311  for (unsigned currTR = 0; currTR < numTRs; ++currTR) {
312  // We ensured this in the previous phase above.
313  assert(trAnalysis.getExitingBlocksInTR(currTR).size() == 1);
314 
315  Block *exitingBlock = trAnalysis.getExitingBlocksInTR(currTR)[0];
316  DenseMap<std::pair<Value, Value>, llhd::DrvOp> sigToDrv;
317 
318  SmallVector<llhd::DrvOp> drives(exitingBlock->getOps<llhd::DrvOp>());
319  for (auto op : drives) {
320  auto sigTimePair = std::make_pair(op.getSignal(), op.getTime());
321  if (!sigToDrv.count(sigTimePair)) {
322  sigToDrv[sigTimePair] = op;
323  continue;
324  }
325 
326  OpBuilder builder(op);
327  if (op.getEnable()) {
328  // Multiplex value to be driven
329  auto firstDrive = sigToDrv[sigTimePair];
330  Value muxValue = builder.create<comb::MuxOp>(
331  op.getLoc(), op.getEnable(), op.getValue(), firstDrive.getValue());
332  op.getValueMutable().assign(muxValue);
333 
334  // Take the disjunction of the enable conditions
335  if (firstDrive.getEnable()) {
336  Value orVal = builder.create<comb::OrOp>(op.getLoc(), op.getEnable(),
337  firstDrive.getEnable());
338  op.getEnableMutable().assign(orVal);
339  } else {
340  // No enable is equivalent to a constant 'true' enable
341  op.getEnableMutable().clear();
342  }
343  }
344 
345  sigToDrv[sigTimePair]->erase();
346  sigToDrv[sigTimePair] = op;
347  }
348  }
349 
350  return success();
351 }
352 
353 std::unique_ptr<OperationPass<hw::HWModuleOp>>
355  return std::make_unique<TemporalCodeMotionPass>();
356 }
assert(baseType &&"element must be base type")
static void moveDriveOpBefore(llhd::DrvOp drvOp, Block *dominator, Operation *moveBefore, DenseMap< Block *, Value > &mem)
More a 'llhd.drv' operation before the 'moveBefore' operation by adjusting the 'enable' operand.
static Value getBranchDecisionsFromDominatorToTarget(OpBuilder &builder, Block *driveBlock, Block *dominator, DenseMap< Block *, Value > &mem)
Explore all paths from the 'driveBlock' to the 'dominator' block and construct a boolean expression a...
def create(data_type, value)
Definition: hw.py:433
std::unique_ptr< OperationPass< hw::HWModuleOp > > createTemporalCodeMotionPass()
The InstanceGraph op interface, see InstanceGraphInterface.td for more details.
Definition: DebugAnalysis.h:21
SmallVector< Block *, 8 > getExitingBlocksInTR(int) const
SmallVector< int, 8 > getTRSuccessors(int)