CIRCT 21.0.0git
Loading...
Searching...
No Matches
TemporalCodeMotionPass.cpp
Go to the documentation of this file.
1//===- TemporalCodeMotionPass.cpp - Implement Temporal Code Motion Pass ---===//
2//
3// Implement Pass to move all signal drives in a unique exiting block per
4// temporal region and coalesce drives to the same signal.
5//
6//===----------------------------------------------------------------------===//
7
8#include "TemporalRegions.h"
13#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
14#include "mlir/IR/Dominance.h"
15#include "mlir/IR/PatternMatch.h"
16#include "mlir/IR/Region.h"
17#include "mlir/Pass/Pass.h"
18#include "mlir/Support/LLVM.h"
19#include "mlir/Transforms/RegionUtils.h"
20#include "llvm/ADT/STLExtras.h"
21#include <queue>
22
23namespace circt {
24namespace llhd {
25#define GEN_PASS_DEF_TEMPORALCODEMOTION
26#include "circt/Dialect/LLHD/Transforms/LLHDPasses.h.inc"
27} // namespace llhd
28} // namespace circt
29
30using namespace circt;
31using namespace mlir;
32
33/// Explore all paths from the 'driveBlock' to the 'dominator' block and
34/// construct a boolean expression at the current insertion point of 'builder'
35/// to represent all those paths.
36static Value
37getBranchDecisionsFromDominatorToTarget(OpBuilder &builder, Block *driveBlock,
38 Block *dominator,
39 DenseMap<Block *, Value> &mem) {
40 Location loc = driveBlock->getTerminator()->getLoc();
41 if (mem.count(driveBlock))
42 return mem[driveBlock];
43
44 SmallVector<Block *> worklist;
45 worklist.push_back(driveBlock);
46
47 while (!worklist.empty()) {
48 Block *curr = worklist.back();
49
50 if (curr == dominator || curr->getPredecessors().empty()) {
51 if (!mem.count(curr))
52 mem[curr] = builder.create<hw::ConstantOp>(loc, APInt(1, 1));
53
54 worklist.pop_back();
55 continue;
56 }
57
58 bool addedSomething = false;
59 for (auto *predBlock : curr->getPredecessors()) {
60 if (!mem.count(predBlock)) {
61 worklist.push_back(predBlock);
62 addedSomething = true;
63 }
64 }
65
66 if (addedSomething)
67 continue;
68
69 Value runner = builder.create<hw::ConstantOp>(loc, APInt(1, 0));
70 for (auto *predBlock : curr->getPredecessors()) {
71 if (predBlock->getTerminator()->getNumSuccessors() != 1) {
72 auto condBr = cast<cf::CondBranchOp>(predBlock->getTerminator());
73 Value cond = condBr.getCondition();
74 if (condBr.getFalseDest() == curr) {
75 Value trueVal = builder.create<hw::ConstantOp>(loc, APInt(1, 1));
76 cond = builder.create<comb::XorOp>(loc, cond, trueVal);
77 }
78 Value next = builder.create<comb::AndOp>(loc, mem[predBlock], cond);
79 runner = builder.create<comb::OrOp>(loc, runner, next);
80 } else {
81 runner = builder.create<comb::OrOp>(loc, runner, mem[predBlock]);
82 }
83 }
84 mem[curr] = runner;
85 worklist.pop_back();
86 }
87
88 return mem[driveBlock];
89}
90
91/// More a 'llhd.drv' operation before the 'moveBefore' operation by adjusting
92/// the 'enable' operand.
93static void moveDriveOpBefore(llhd::DrvOp drvOp, Block *dominator,
94 Operation *moveBefore,
95 DenseMap<Block *, Value> &mem) {
96 OpBuilder builder(drvOp);
97 builder.setInsertionPoint(moveBefore);
98 Block *drvParentBlock = drvOp->getBlock();
99
100 // Find sequence of branch decisions and add them as a sequence of
101 // instructions to the TR exiting block
103 builder, drvParentBlock, dominator, mem);
104
105 if (drvOp.getEnable())
106 finalValue = builder.create<comb::AndOp>(drvOp.getLoc(), drvOp.getEnable(),
107 finalValue);
108
109 drvOp.getEnableMutable().assign(finalValue);
110 drvOp->moveBefore(moveBefore);
111}
112
113namespace {
114struct TemporalCodeMotionPass
115 : public llhd::impl::TemporalCodeMotionBase<TemporalCodeMotionPass> {
116 void runOnOperation() override;
117 LogicalResult runOnProcess(llhd::ProcessOp procOp);
118};
119} // namespace
120
121void TemporalCodeMotionPass::runOnOperation() {
122 for (auto proc : getOperation().getOps<llhd::ProcessOp>())
123 (void)runOnProcess(proc); // Ignore processes that could not be lowered
124}
125
126static LogicalResult checkForCFGLoop(llhd::ProcessOp procOp) {
127 SmallVector<Block *> toCheck(
128 llvm::map_range(procOp.getOps<llhd::WaitOp>(), [](llhd::WaitOp waitOp) {
129 return waitOp.getSuccessor();
130 }));
131 toCheck.push_back(&procOp.getBody().front());
132
133 SmallVector<Block *> worklist;
134 DenseSet<Block *> visited;
135 for (auto *block : toCheck) {
136 worklist.clear();
137 visited.clear();
138 worklist.push_back(block);
139
140 while (!worklist.empty()) {
141 Block *curr = worklist.pop_back_val();
142 if (isa<llhd::WaitOp>(curr->getTerminator()))
143 continue;
144
145 visited.insert(curr);
146
147 for (auto *succ : curr->getSuccessors()) {
148 if (visited.contains(succ))
149 return failure();
150
151 worklist.push_back(succ);
152 }
153 }
154 }
155
156 return success();
157}
158
159LogicalResult TemporalCodeMotionPass::runOnProcess(llhd::ProcessOp procOp) {
160 // Make sure there are no CFG loops that don't contain a block with a wait
161 // terminator in the cycle because that's currently not supported by the
162 // temporal region analysis and this pass.
163 if (failed(checkForCFGLoop(procOp)))
164 return failure();
165
168 unsigned numTRs = trAnalysis.getNumTemporalRegions();
169
170 // Only support processes with max. 2 temporal regions and one wait terminator
171 // as this is enough to represent flip-flops, registers, etc.
172 // NOTE: there always has to be either a wait or halt terminator in a process
173 // If the wait block creates the backwards edge, we only have one TR,
174 // otherwise we have 2 TRs
175 // NOTE: as the wait instruction needs to be on every path around the loop,
176 // it has to be the only exiting block of its TR
177 // NOTE: the other TR can either have only one exiting block, then we do not
178 // need to add an auxillary block, otherwise we have to add one
179 // NOTE: All drive operations have to be moved to the single exiting block of
180 // its TR. To do so, add the condition under which its block is reached from
181 // the TR entry block as a gating condition to the 'llhd.drv' operation
182 // NOTE: the entry blocks that are not part of the infinite loop do not count
183 // as TR and have TR number -1
184 // TODO: need to check that entry blocks that are note part of the loop to not
185 // have any instructions that have side effects that should not be allowed
186 // outside of the loop (drv, prb, ...)
187 // TODO: add support for more TRs and wait terminators (e.g., to represent
188 // FSMs)
189 if (numTRs > 2)
190 return failure();
191
192 bool seenWait = false;
193 WalkResult walkResult = procOp.walk([&](llhd::WaitOp op) -> WalkResult {
194 if (seenWait)
195 return failure();
196
197 // Check that the block containing the wait is the only exiting block of
198 // that TR
199 int trId = trAnalysis.getBlockTR(op->getBlock());
200 if (!trAnalysis.hasSingleExitBlock(trId))
201 return failure();
202
203 seenWait = true;
204 return WalkResult::advance();
205 });
206 if (walkResult.wasInterrupted())
207 return failure();
208
209 //===--------------------------------------------------------------------===//
210 // Create unique exit block per TR
211 //===--------------------------------------------------------------------===//
212
213 // TODO: consider the case where a wait brances to itself
214 for (unsigned currTR = 0; currTR < numTRs; ++currTR) {
215 unsigned numTRSuccs = trAnalysis.getNumTRSuccessors(currTR);
216 (void)numTRSuccs;
217 // NOTE: Above error checks make this impossible to trigger, but the above
218 // are changed this one might have to be promoted to a proper error message.
219 assert((numTRSuccs == 1 ||
220 (numTRSuccs == 2 && trAnalysis.isOwnTRSuccessor(currTR))) &&
221 "only TRs with a single TR as possible successor are "
222 "supported for now.");
223
224 if (trAnalysis.hasSingleExitBlock(currTR))
225 continue;
226
227 // Get entry block of successor TR
228 Block *succTREntry =
229 trAnalysis.getTREntryBlock(*trAnalysis.getTRSuccessors(currTR).begin());
230
231 // Create the auxillary block as we currently don't have a single exiting
232 // block and give it the same arguments as the entry block of the
233 // successor TR
234 Block *auxBlock = new Block();
235 auxBlock->addArguments(
236 succTREntry->getArgumentTypes(),
237 SmallVector<Location>(succTREntry->getNumArguments(), procOp.getLoc()));
238
239 // Insert the auxillary block after the last block of the current TR
240 procOp.getBody().getBlocks().insertAfter(
241 Region::iterator(trAnalysis.getExitingBlocksInTR(currTR).back()),
242 auxBlock);
243
244 // Let all current exit blocks branch to the auxillary block instead.
245 for (Block *exit : trAnalysis.getExitingBlocksInTR(currTR))
246 for (auto [i, succ] : llvm::enumerate(exit->getSuccessors()))
247 if (trAnalysis.getBlockTR(succ) != static_cast<int>(currTR))
248 exit->getTerminator()->setSuccessor(auxBlock, i);
249
250 // Let the auxiallary block branch to the entry block of the successor
251 // temporal region entry block
252 OpBuilder b(procOp);
253 b.setInsertionPointToEnd(auxBlock);
254 b.create<cf::BranchOp>(procOp.getLoc(), succTREntry,
255 auxBlock->getArguments());
256 }
257
258 //===--------------------------------------------------------------------===//
259 // Move drive instructions
260 //===--------------------------------------------------------------------===//
261
262 DenseMap<Operation *, Block *> drvPos;
263
264 // Force a new analysis as we have changed the CFG
265 trAnalysis = llhd::TemporalRegionAnalysis(procOp);
266 numTRs = trAnalysis.getNumTemporalRegions();
267 OpBuilder builder(procOp);
268
269 for (unsigned currTR = 0; currTR < numTRs; ++currTR) {
270 DenseMap<Block *, Value> mem;
271
272 // We ensured this in the previous phase above.
273 assert(trAnalysis.getExitingBlocksInTR(currTR).size() == 1);
274
275 Block *exitingBlock = trAnalysis.getExitingBlocksInTR(currTR)[0];
276 Block *entryBlock = trAnalysis.getTREntryBlock(currTR);
277
278 DominanceInfo dom(procOp);
279 Block *dominator = exitingBlock;
280
281 // Collect all 'llhd.drv' operations in the process and compute their common
282 // dominator block.
283 procOp.walk([&](llhd::DrvOp op) {
284 if (trAnalysis.getBlockTR(op.getOperation()->getBlock()) ==
285 static_cast<int>(currTR)) {
286 Block *parentBlock = op.getOperation()->getBlock();
287 drvPos[op] = parentBlock;
288 dominator = dom.findNearestCommonDominator(dominator, parentBlock);
289 }
290 });
291
292 // Set insertion point before first 'llhd.drv' op in the exiting block
293 Operation *moveBefore = exitingBlock->getTerminator();
294 exitingBlock->walk([&](llhd::DrvOp op) { moveBefore = op; });
295
296 assert(dominator &&
297 "could not find nearest common dominator for TR exiting "
298 "block and the block containing drv");
299
300 // If the dominator isn't already a TR entry block, set it to the nearest
301 // dominating TR entry block.
302 if (trAnalysis.getBlockTR(dominator) != static_cast<int>(currTR))
303 dominator = trAnalysis.getTREntryBlock(currTR);
304
305 std::queue<Block *> workQueue;
306 SmallPtrSet<Block *, 32> workDone;
307
308 if (entryBlock != exitingBlock)
309 workQueue.push(entryBlock);
310
311 while (!workQueue.empty()) {
312 Block *block = workQueue.front();
313 workQueue.pop();
314 workDone.insert(block);
315
316 builder.setInsertionPoint(moveBefore);
317 SmallVector<llhd::DrvOp> drives(block->getOps<llhd::DrvOp>());
318 for (auto drive : drives)
319 moveDriveOpBefore(drive, dominator, moveBefore, mem);
320
321 for (Block *succ : block->getSuccessors()) {
322 if (succ == exitingBlock ||
323 trAnalysis.getBlockTR(succ) != static_cast<int>(currTR))
324 continue;
325
326 if (llvm::all_of(succ->getPredecessors(), [&](Block *block) {
327 return workDone.contains(block);
328 }))
329 workQueue.push(succ);
330 }
331 }
332
333 // Merge entry and exit block of each TR, remove all other blocks
334 if (entryBlock != exitingBlock) {
335 entryBlock->getTerminator()->erase();
336 entryBlock->getOperations().splice(entryBlock->end(),
337 exitingBlock->getOperations());
338 }
339 }
340
341 IRRewriter rewriter(procOp);
342 (void)mlir::eraseUnreachableBlocks(rewriter, procOp->getRegions());
343
344 //===--------------------------------------------------------------------===//
345 // Coalesce multiple drives to the same signal
346 //===--------------------------------------------------------------------===//
347
348 trAnalysis = llhd::TemporalRegionAnalysis(procOp);
349 DominanceInfo dom(procOp);
350 for (unsigned currTR = 0; currTR < numTRs; ++currTR) {
351 // We ensured this in the previous phase above.
352 assert(trAnalysis.getExitingBlocksInTR(currTR).size() == 1);
353
354 Block *exitingBlock = trAnalysis.getExitingBlocksInTR(currTR)[0];
355 DenseMap<std::pair<Value, Value>, llhd::DrvOp> sigToDrv;
356
357 SmallVector<llhd::DrvOp> drives(exitingBlock->getOps<llhd::DrvOp>());
358 for (auto op : drives) {
359 auto sigTimePair = std::make_pair(op.getSignal(), op.getTime());
360 if (!sigToDrv.count(sigTimePair)) {
361 sigToDrv[sigTimePair] = op;
362 continue;
363 }
364
365 OpBuilder builder(op);
366 if (op.getEnable()) {
367 // Multiplex value to be driven
368 auto firstDrive = sigToDrv[sigTimePair];
369 Value muxValue = builder.create<comb::MuxOp>(
370 op.getLoc(), op.getEnable(), op.getValue(), firstDrive.getValue());
371 op.getValueMutable().assign(muxValue);
372
373 // Take the disjunction of the enable conditions
374 if (firstDrive.getEnable()) {
375 Value orVal = builder.create<comb::OrOp>(op.getLoc(), op.getEnable(),
376 firstDrive.getEnable());
377 op.getEnableMutable().assign(orVal);
378 } else {
379 // No enable is equivalent to a constant 'true' enable
380 op.getEnableMutable().clear();
381 }
382 }
383
384 sigToDrv[sigTimePair]->erase();
385 sigToDrv[sigTimePair] = op;
386 }
387 }
388
389 return success();
390}
assert(baseType &&"element must be base type")
static void moveDriveOpBefore(llhd::DrvOp drvOp, Block *dominator, Operation *moveBefore, DenseMap< Block *, Value > &mem)
More a 'llhd.drv' operation before the 'moveBefore' operation by adjusting the 'enable' operand.
static LogicalResult checkForCFGLoop(llhd::ProcessOp procOp)
static Value getBranchDecisionsFromDominatorToTarget(OpBuilder &builder, Block *driveBlock, Block *dominator, DenseMap< Block *, Value > &mem)
Explore all paths from the 'driveBlock' to the 'dominator' block and construct a boolean expression a...
create(data_type, value)
Definition hw.py:433
The InstanceGraph op interface, see InstanceGraphInterface.td for more details.
SmallVector< Block *, 8 > getExitingBlocksInTR(int) const
SmallVector< int, 8 > getTRSuccessors(int)