CIRCT  20.0.0git
TemporalCodeMotionPass.cpp
Go to the documentation of this file.
1 //===- TemporalCodeMotionPass.cpp - Implement Temporal Code Motion Pass ---===//
2 //
3 // Implement Pass to move all signal drives in a unique exiting block per
4 // temporal region and coalesce drives to the same signal.
5 //
6 //===----------------------------------------------------------------------===//
7 
8 #include "TemporalRegions.h"
10 #include "circt/Dialect/HW/HWOps.h"
13 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
14 #include "mlir/IR/Dominance.h"
15 #include "mlir/IR/PatternMatch.h"
16 #include "mlir/IR/Region.h"
17 #include "mlir/Pass/Pass.h"
18 #include "mlir/Support/LLVM.h"
19 #include "mlir/Transforms/RegionUtils.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include <queue>
22 
23 namespace circt {
24 namespace llhd {
25 #define GEN_PASS_DEF_TEMPORALCODEMOTION
26 #include "circt/Dialect/LLHD/Transforms/Passes.h.inc"
27 } // namespace llhd
28 } // namespace circt
29 
30 using namespace circt;
31 using namespace mlir;
32 
33 /// Explore all paths from the 'driveBlock' to the 'dominator' block and
34 /// construct a boolean expression at the current insertion point of 'builder'
35 /// to represent all those paths.
36 static Value
37 getBranchDecisionsFromDominatorToTarget(OpBuilder &builder, Block *driveBlock,
38  Block *dominator,
39  DenseMap<Block *, Value> &mem) {
40  Location loc = driveBlock->getTerminator()->getLoc();
41  if (mem.count(driveBlock))
42  return mem[driveBlock];
43 
44  SmallVector<Block *> worklist;
45  worklist.push_back(driveBlock);
46 
47  while (!worklist.empty()) {
48  Block *curr = worklist.back();
49 
50  if (curr == dominator || curr->getPredecessors().empty()) {
51  if (!mem.count(curr))
52  mem[curr] = builder.create<hw::ConstantOp>(loc, APInt(1, 1));
53 
54  worklist.pop_back();
55  continue;
56  }
57 
58  bool addedSomething = false;
59  for (auto *predBlock : curr->getPredecessors()) {
60  if (!mem.count(predBlock)) {
61  worklist.push_back(predBlock);
62  addedSomething = true;
63  }
64  }
65 
66  if (addedSomething)
67  continue;
68 
69  Value runner = builder.create<hw::ConstantOp>(loc, APInt(1, 0));
70  for (auto *predBlock : curr->getPredecessors()) {
71  if (predBlock->getTerminator()->getNumSuccessors() != 1) {
72  auto condBr = cast<cf::CondBranchOp>(predBlock->getTerminator());
73  Value cond = condBr.getCondition();
74  if (condBr.getFalseDest() == curr) {
75  Value trueVal = builder.create<hw::ConstantOp>(loc, APInt(1, 1));
76  cond = builder.create<comb::XorOp>(loc, cond, trueVal);
77  }
78  Value next = builder.create<comb::AndOp>(loc, mem[predBlock], cond);
79  runner = builder.create<comb::OrOp>(loc, runner, next);
80  } else {
81  runner = builder.create<comb::OrOp>(loc, runner, mem[predBlock]);
82  }
83  }
84  mem[curr] = runner;
85  worklist.pop_back();
86  }
87 
88  return mem[driveBlock];
89 }
90 
91 /// More a 'llhd.drv' operation before the 'moveBefore' operation by adjusting
92 /// the 'enable' operand.
93 static void moveDriveOpBefore(llhd::DrvOp drvOp, Block *dominator,
94  Operation *moveBefore,
95  DenseMap<Block *, Value> &mem) {
96  OpBuilder builder(drvOp);
97  builder.setInsertionPoint(moveBefore);
98  Block *drvParentBlock = drvOp->getBlock();
99 
100  // Find sequence of branch decisions and add them as a sequence of
101  // instructions to the TR exiting block
102  Value finalValue = getBranchDecisionsFromDominatorToTarget(
103  builder, drvParentBlock, dominator, mem);
104 
105  if (drvOp.getEnable())
106  finalValue = builder.create<comb::AndOp>(drvOp.getLoc(), drvOp.getEnable(),
107  finalValue);
108 
109  drvOp.getEnableMutable().assign(finalValue);
110  drvOp->moveBefore(moveBefore);
111 }
112 
113 namespace {
114 struct TemporalCodeMotionPass
115  : public llhd::impl::TemporalCodeMotionBase<TemporalCodeMotionPass> {
116  void runOnOperation() override;
117  LogicalResult runOnProcess(llhd::ProcessOp procOp);
118 };
119 } // namespace
120 
121 void TemporalCodeMotionPass::runOnOperation() {
122  for (auto proc : getOperation().getOps<llhd::ProcessOp>())
123  (void)runOnProcess(proc); // Ignore processes that could not be lowered
124 }
125 
126 LogicalResult TemporalCodeMotionPass::runOnProcess(llhd::ProcessOp procOp) {
127  llhd::TemporalRegionAnalysis trAnalysis =
129  unsigned numTRs = trAnalysis.getNumTemporalRegions();
130 
131  // Only support processes with max. 2 temporal regions and one wait terminator
132  // as this is enough to represent flip-flops, registers, etc.
133  // NOTE: there always has to be either a wait or halt terminator in a process
134  // If the wait block creates the backwards edge, we only have one TR,
135  // otherwise we have 2 TRs
136  // NOTE: as the wait instruction needs to be on every path around the loop,
137  // it has to be the only exiting block of its TR
138  // NOTE: the other TR can either have only one exiting block, then we do not
139  // need to add an auxillary block, otherwise we have to add one
140  // NOTE: All drive operations have to be moved to the single exiting block of
141  // its TR. To do so, add the condition under which its block is reached from
142  // the TR entry block as a gating condition to the 'llhd.drv' operation
143  // NOTE: the entry blocks that are not part of the infinite loop do not count
144  // as TR and have TR number -1
145  // TODO: need to check that entry blocks that are note part of the loop to not
146  // have any instructions that have side effects that should not be allowed
147  // outside of the loop (drv, prb, ...)
148  // TODO: add support for more TRs and wait terminators (e.g., to represent
149  // FSMs)
150  if (numTRs > 2)
151  return failure();
152 
153  bool seenWait = false;
154  WalkResult walkResult = procOp.walk([&](llhd::WaitOp op) -> WalkResult {
155  if (seenWait)
156  return failure();
157 
158  // Check that the block containing the wait is the only exiting block of
159  // that TR
160  int trId = trAnalysis.getBlockTR(op->getBlock());
161  if (!trAnalysis.hasSingleExitBlock(trId))
162  return failure();
163 
164  seenWait = true;
165  return WalkResult::advance();
166  });
167  if (walkResult.wasInterrupted())
168  return failure();
169 
170  //===--------------------------------------------------------------------===//
171  // Create unique exit block per TR
172  //===--------------------------------------------------------------------===//
173 
174  // TODO: consider the case where a wait brances to itself
175  for (unsigned currTR = 0; currTR < numTRs; ++currTR) {
176  unsigned numTRSuccs = trAnalysis.getNumTRSuccessors(currTR);
177  // NOTE: Above error checks make this impossible to trigger, but the above
178  // are changed this one might have to be promoted to a proper error message.
179  assert((numTRSuccs == 1 ||
180  (numTRSuccs == 2 && trAnalysis.isOwnTRSuccessor(currTR))) &&
181  "only TRs with a single TR as possible successor are "
182  "supported for now.");
183 
184  if (trAnalysis.hasSingleExitBlock(currTR))
185  continue;
186 
187  // Get entry block of successor TR
188  Block *succTREntry =
189  trAnalysis.getTREntryBlock(*trAnalysis.getTRSuccessors(currTR).begin());
190 
191  // Create the auxillary block as we currently don't have a single exiting
192  // block and give it the same arguments as the entry block of the
193  // successor TR
194  Block *auxBlock = new Block();
195  auxBlock->addArguments(
196  succTREntry->getArgumentTypes(),
197  SmallVector<Location>(succTREntry->getNumArguments(), procOp.getLoc()));
198 
199  // Insert the auxillary block after the last block of the current TR
200  procOp.getBody().getBlocks().insertAfter(
201  Region::iterator(trAnalysis.getExitingBlocksInTR(currTR).back()),
202  auxBlock);
203 
204  // Let all current exit blocks branch to the auxillary block instead.
205  for (Block *exit : trAnalysis.getExitingBlocksInTR(currTR))
206  for (auto [i, succ] : llvm::enumerate(exit->getSuccessors()))
207  if (trAnalysis.getBlockTR(succ) != static_cast<int>(currTR))
208  exit->getTerminator()->setSuccessor(auxBlock, i);
209 
210  // Let the auxiallary block branch to the entry block of the successor
211  // temporal region entry block
212  OpBuilder b(procOp);
213  b.setInsertionPointToEnd(auxBlock);
214  b.create<cf::BranchOp>(procOp.getLoc(), succTREntry,
215  auxBlock->getArguments());
216  }
217 
218  //===--------------------------------------------------------------------===//
219  // Move drive instructions
220  //===--------------------------------------------------------------------===//
221 
222  DenseMap<Operation *, Block *> drvPos;
223 
224  // Force a new analysis as we have changed the CFG
225  trAnalysis = llhd::TemporalRegionAnalysis(procOp);
226  numTRs = trAnalysis.getNumTemporalRegions();
227  OpBuilder builder(procOp);
228 
229  for (unsigned currTR = 0; currTR < numTRs; ++currTR) {
230  DenseMap<Block *, Value> mem;
231 
232  // We ensured this in the previous phase above.
233  assert(trAnalysis.getExitingBlocksInTR(currTR).size() == 1);
234 
235  Block *exitingBlock = trAnalysis.getExitingBlocksInTR(currTR)[0];
236  Block *entryBlock = trAnalysis.getTREntryBlock(currTR);
237 
238  DominanceInfo dom(procOp);
239  Block *dominator = exitingBlock;
240 
241  // Collect all 'llhd.drv' operations in the process and compute their common
242  // dominator block.
243  procOp.walk([&](llhd::DrvOp op) {
244  if (trAnalysis.getBlockTR(op.getOperation()->getBlock()) ==
245  static_cast<int>(currTR)) {
246  Block *parentBlock = op.getOperation()->getBlock();
247  drvPos[op] = parentBlock;
248  dominator = dom.findNearestCommonDominator(dominator, parentBlock);
249  }
250  });
251 
252  // Set insertion point before first 'llhd.drv' op in the exiting block
253  Operation *moveBefore = exitingBlock->getTerminator();
254  exitingBlock->walk([&](llhd::DrvOp op) { moveBefore = op; });
255 
256  assert(dominator &&
257  "could not find nearest common dominator for TR exiting "
258  "block and the block containing drv");
259 
260  // If the dominator isn't already a TR entry block, set it to the nearest
261  // dominating TR entry block.
262  if (trAnalysis.getBlockTR(dominator) != static_cast<int>(currTR))
263  dominator = trAnalysis.getTREntryBlock(currTR);
264 
265  std::queue<Block *> workQueue;
266  SmallPtrSet<Block *, 32> workDone;
267 
268  if (entryBlock != exitingBlock)
269  workQueue.push(entryBlock);
270 
271  while (!workQueue.empty()) {
272  Block *block = workQueue.front();
273  workQueue.pop();
274  workDone.insert(block);
275 
276  builder.setInsertionPoint(moveBefore);
277  SmallVector<llhd::DrvOp> drives(block->getOps<llhd::DrvOp>());
278  for (auto drive : drives)
279  moveDriveOpBefore(drive, dominator, moveBefore, mem);
280 
281  for (Block *succ : block->getSuccessors()) {
282  if (succ == exitingBlock ||
283  trAnalysis.getBlockTR(succ) != static_cast<int>(currTR))
284  continue;
285 
286  if (llvm::all_of(succ->getPredecessors(), [&](Block *block) {
287  return workDone.contains(block);
288  }))
289  workQueue.push(succ);
290  }
291  }
292 
293  // Merge entry and exit block of each TR, remove all other blocks
294  if (entryBlock != exitingBlock) {
295  entryBlock->getTerminator()->erase();
296  entryBlock->getOperations().splice(entryBlock->end(),
297  exitingBlock->getOperations());
298  }
299  }
300 
301  IRRewriter rewriter(procOp);
302  (void)mlir::eraseUnreachableBlocks(rewriter, procOp->getRegions());
303 
304  //===--------------------------------------------------------------------===//
305  // Coalesce multiple drives to the same signal
306  //===--------------------------------------------------------------------===//
307 
308  trAnalysis = llhd::TemporalRegionAnalysis(procOp);
309  DominanceInfo dom(procOp);
310  for (unsigned currTR = 0; currTR < numTRs; ++currTR) {
311  // We ensured this in the previous phase above.
312  assert(trAnalysis.getExitingBlocksInTR(currTR).size() == 1);
313 
314  Block *exitingBlock = trAnalysis.getExitingBlocksInTR(currTR)[0];
315  DenseMap<std::pair<Value, Value>, llhd::DrvOp> sigToDrv;
316 
317  SmallVector<llhd::DrvOp> drives(exitingBlock->getOps<llhd::DrvOp>());
318  for (auto op : drives) {
319  auto sigTimePair = std::make_pair(op.getSignal(), op.getTime());
320  if (!sigToDrv.count(sigTimePair)) {
321  sigToDrv[sigTimePair] = op;
322  continue;
323  }
324 
325  OpBuilder builder(op);
326  if (op.getEnable()) {
327  // Multiplex value to be driven
328  auto firstDrive = sigToDrv[sigTimePair];
329  Value muxValue = builder.create<comb::MuxOp>(
330  op.getLoc(), op.getEnable(), op.getValue(), firstDrive.getValue());
331  op.getValueMutable().assign(muxValue);
332 
333  // Take the disjunction of the enable conditions
334  if (firstDrive.getEnable()) {
335  Value orVal = builder.create<comb::OrOp>(op.getLoc(), op.getEnable(),
336  firstDrive.getEnable());
337  op.getEnableMutable().assign(orVal);
338  } else {
339  // No enable is equivalent to a constant 'true' enable
340  op.getEnableMutable().clear();
341  }
342  }
343 
344  sigToDrv[sigTimePair]->erase();
345  sigToDrv[sigTimePair] = op;
346  }
347  }
348 
349  return success();
350 }
351 
352 std::unique_ptr<OperationPass<hw::HWModuleOp>>
354  return std::make_unique<TemporalCodeMotionPass>();
355 }
assert(baseType &&"element must be base type")
static void moveDriveOpBefore(llhd::DrvOp drvOp, Block *dominator, Operation *moveBefore, DenseMap< Block *, Value > &mem)
More a 'llhd.drv' operation before the 'moveBefore' operation by adjusting the 'enable' operand.
static Value getBranchDecisionsFromDominatorToTarget(OpBuilder &builder, Block *driveBlock, Block *dominator, DenseMap< Block *, Value > &mem)
Explore all paths from the 'driveBlock' to the 'dominator' block and construct a boolean expression a...
def create(data_type, value)
Definition: hw.py:393
std::unique_ptr< OperationPass< hw::HWModuleOp > > createTemporalCodeMotionPass()
The InstanceGraph op interface, see InstanceGraphInterface.td for more details.
Definition: DebugAnalysis.h:21
SmallVector< Block *, 8 > getExitingBlocksInTR(int) const
SmallVector< int, 8 > getTRSuccessors(int)