CIRCT 20.0.0git
Loading...
Searching...
No Matches
TemporalCodeMotionPass.cpp
Go to the documentation of this file.
1//===- TemporalCodeMotionPass.cpp - Implement Temporal Code Motion Pass ---===//
2//
3// Implement Pass to move all signal drives in a unique exiting block per
4// temporal region and coalesce drives to the same signal.
5//
6//===----------------------------------------------------------------------===//
7
8#include "TemporalRegions.h"
13#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
14#include "mlir/IR/Dominance.h"
15#include "mlir/IR/PatternMatch.h"
16#include "mlir/IR/Region.h"
17#include "mlir/Pass/Pass.h"
18#include "mlir/Support/LLVM.h"
19#include "mlir/Transforms/RegionUtils.h"
20#include "llvm/ADT/STLExtras.h"
21#include <queue>
22
23namespace circt {
24namespace llhd {
25#define GEN_PASS_DEF_TEMPORALCODEMOTION
26#include "circt/Dialect/LLHD/Transforms/LLHDPasses.h.inc"
27} // namespace llhd
28} // namespace circt
29
30using namespace circt;
31using namespace mlir;
32
33/// Explore all paths from the 'driveBlock' to the 'dominator' block and
34/// construct a boolean expression at the current insertion point of 'builder'
35/// to represent all those paths.
36static Value
37getBranchDecisionsFromDominatorToTarget(OpBuilder &builder, Block *driveBlock,
38 Block *dominator,
39 DenseMap<Block *, Value> &mem) {
40 Location loc = driveBlock->getTerminator()->getLoc();
41 if (mem.count(driveBlock))
42 return mem[driveBlock];
43
44 SmallVector<Block *> worklist;
45 worklist.push_back(driveBlock);
46
47 while (!worklist.empty()) {
48 Block *curr = worklist.back();
49
50 if (curr == dominator || curr->getPredecessors().empty()) {
51 if (!mem.count(curr))
52 mem[curr] = builder.create<hw::ConstantOp>(loc, APInt(1, 1));
53
54 worklist.pop_back();
55 continue;
56 }
57
58 bool addedSomething = false;
59 for (auto *predBlock : curr->getPredecessors()) {
60 if (!mem.count(predBlock)) {
61 worklist.push_back(predBlock);
62 addedSomething = true;
63 }
64 }
65
66 if (addedSomething)
67 continue;
68
69 Value runner = builder.create<hw::ConstantOp>(loc, APInt(1, 0));
70 for (auto *predBlock : curr->getPredecessors()) {
71 if (predBlock->getTerminator()->getNumSuccessors() != 1) {
72 auto condBr = cast<cf::CondBranchOp>(predBlock->getTerminator());
73 Value cond = condBr.getCondition();
74 if (condBr.getFalseDest() == curr) {
75 Value trueVal = builder.create<hw::ConstantOp>(loc, APInt(1, 1));
76 cond = builder.create<comb::XorOp>(loc, cond, trueVal);
77 }
78 Value next = builder.create<comb::AndOp>(loc, mem[predBlock], cond);
79 runner = builder.create<comb::OrOp>(loc, runner, next);
80 } else {
81 runner = builder.create<comb::OrOp>(loc, runner, mem[predBlock]);
82 }
83 }
84 mem[curr] = runner;
85 worklist.pop_back();
86 }
87
88 return mem[driveBlock];
89}
90
91/// More a 'llhd.drv' operation before the 'moveBefore' operation by adjusting
92/// the 'enable' operand.
93static void moveDriveOpBefore(llhd::DrvOp drvOp, Block *dominator,
94 Operation *moveBefore,
95 DenseMap<Block *, Value> &mem) {
96 OpBuilder builder(drvOp);
97 builder.setInsertionPoint(moveBefore);
98 Block *drvParentBlock = drvOp->getBlock();
99
100 // Find sequence of branch decisions and add them as a sequence of
101 // instructions to the TR exiting block
103 builder, drvParentBlock, dominator, mem);
104
105 if (drvOp.getEnable())
106 finalValue = builder.create<comb::AndOp>(drvOp.getLoc(), drvOp.getEnable(),
107 finalValue);
108
109 drvOp.getEnableMutable().assign(finalValue);
110 drvOp->moveBefore(moveBefore);
111}
112
113namespace {
114struct TemporalCodeMotionPass
115 : public llhd::impl::TemporalCodeMotionBase<TemporalCodeMotionPass> {
116 void runOnOperation() override;
117 LogicalResult runOnProcess(llhd::ProcessOp procOp);
118};
119} // namespace
120
121void TemporalCodeMotionPass::runOnOperation() {
122 for (auto proc : getOperation().getOps<llhd::ProcessOp>())
123 (void)runOnProcess(proc); // Ignore processes that could not be lowered
124}
125
126LogicalResult TemporalCodeMotionPass::runOnProcess(llhd::ProcessOp procOp) {
129 unsigned numTRs = trAnalysis.getNumTemporalRegions();
130
131 // Only support processes with max. 2 temporal regions and one wait terminator
132 // as this is enough to represent flip-flops, registers, etc.
133 // NOTE: there always has to be either a wait or halt terminator in a process
134 // If the wait block creates the backwards edge, we only have one TR,
135 // otherwise we have 2 TRs
136 // NOTE: as the wait instruction needs to be on every path around the loop,
137 // it has to be the only exiting block of its TR
138 // NOTE: the other TR can either have only one exiting block, then we do not
139 // need to add an auxillary block, otherwise we have to add one
140 // NOTE: All drive operations have to be moved to the single exiting block of
141 // its TR. To do so, add the condition under which its block is reached from
142 // the TR entry block as a gating condition to the 'llhd.drv' operation
143 // NOTE: the entry blocks that are not part of the infinite loop do not count
144 // as TR and have TR number -1
145 // TODO: need to check that entry blocks that are note part of the loop to not
146 // have any instructions that have side effects that should not be allowed
147 // outside of the loop (drv, prb, ...)
148 // TODO: add support for more TRs and wait terminators (e.g., to represent
149 // FSMs)
150 if (numTRs > 2)
151 return failure();
152
153 bool seenWait = false;
154 WalkResult walkResult = procOp.walk([&](llhd::WaitOp op) -> WalkResult {
155 if (seenWait)
156 return failure();
157
158 // Check that the block containing the wait is the only exiting block of
159 // that TR
160 int trId = trAnalysis.getBlockTR(op->getBlock());
161 if (!trAnalysis.hasSingleExitBlock(trId))
162 return failure();
163
164 seenWait = true;
165 return WalkResult::advance();
166 });
167 if (walkResult.wasInterrupted())
168 return failure();
169
170 //===--------------------------------------------------------------------===//
171 // Create unique exit block per TR
172 //===--------------------------------------------------------------------===//
173
174 // TODO: consider the case where a wait brances to itself
175 for (unsigned currTR = 0; currTR < numTRs; ++currTR) {
176 unsigned numTRSuccs = trAnalysis.getNumTRSuccessors(currTR);
177 (void)numTRSuccs;
178 // NOTE: Above error checks make this impossible to trigger, but the above
179 // are changed this one might have to be promoted to a proper error message.
180 assert((numTRSuccs == 1 ||
181 (numTRSuccs == 2 && trAnalysis.isOwnTRSuccessor(currTR))) &&
182 "only TRs with a single TR as possible successor are "
183 "supported for now.");
184
185 if (trAnalysis.hasSingleExitBlock(currTR))
186 continue;
187
188 // Get entry block of successor TR
189 Block *succTREntry =
190 trAnalysis.getTREntryBlock(*trAnalysis.getTRSuccessors(currTR).begin());
191
192 // Create the auxillary block as we currently don't have a single exiting
193 // block and give it the same arguments as the entry block of the
194 // successor TR
195 Block *auxBlock = new Block();
196 auxBlock->addArguments(
197 succTREntry->getArgumentTypes(),
198 SmallVector<Location>(succTREntry->getNumArguments(), procOp.getLoc()));
199
200 // Insert the auxillary block after the last block of the current TR
201 procOp.getBody().getBlocks().insertAfter(
202 Region::iterator(trAnalysis.getExitingBlocksInTR(currTR).back()),
203 auxBlock);
204
205 // Let all current exit blocks branch to the auxillary block instead.
206 for (Block *exit : trAnalysis.getExitingBlocksInTR(currTR))
207 for (auto [i, succ] : llvm::enumerate(exit->getSuccessors()))
208 if (trAnalysis.getBlockTR(succ) != static_cast<int>(currTR))
209 exit->getTerminator()->setSuccessor(auxBlock, i);
210
211 // Let the auxiallary block branch to the entry block of the successor
212 // temporal region entry block
213 OpBuilder b(procOp);
214 b.setInsertionPointToEnd(auxBlock);
215 b.create<cf::BranchOp>(procOp.getLoc(), succTREntry,
216 auxBlock->getArguments());
217 }
218
219 //===--------------------------------------------------------------------===//
220 // Move drive instructions
221 //===--------------------------------------------------------------------===//
222
223 DenseMap<Operation *, Block *> drvPos;
224
225 // Force a new analysis as we have changed the CFG
226 trAnalysis = llhd::TemporalRegionAnalysis(procOp);
227 numTRs = trAnalysis.getNumTemporalRegions();
228 OpBuilder builder(procOp);
229
230 for (unsigned currTR = 0; currTR < numTRs; ++currTR) {
231 DenseMap<Block *, Value> mem;
232
233 // We ensured this in the previous phase above.
234 assert(trAnalysis.getExitingBlocksInTR(currTR).size() == 1);
235
236 Block *exitingBlock = trAnalysis.getExitingBlocksInTR(currTR)[0];
237 Block *entryBlock = trAnalysis.getTREntryBlock(currTR);
238
239 DominanceInfo dom(procOp);
240 Block *dominator = exitingBlock;
241
242 // Collect all 'llhd.drv' operations in the process and compute their common
243 // dominator block.
244 procOp.walk([&](llhd::DrvOp op) {
245 if (trAnalysis.getBlockTR(op.getOperation()->getBlock()) ==
246 static_cast<int>(currTR)) {
247 Block *parentBlock = op.getOperation()->getBlock();
248 drvPos[op] = parentBlock;
249 dominator = dom.findNearestCommonDominator(dominator, parentBlock);
250 }
251 });
252
253 // Set insertion point before first 'llhd.drv' op in the exiting block
254 Operation *moveBefore = exitingBlock->getTerminator();
255 exitingBlock->walk([&](llhd::DrvOp op) { moveBefore = op; });
256
257 assert(dominator &&
258 "could not find nearest common dominator for TR exiting "
259 "block and the block containing drv");
260
261 // If the dominator isn't already a TR entry block, set it to the nearest
262 // dominating TR entry block.
263 if (trAnalysis.getBlockTR(dominator) != static_cast<int>(currTR))
264 dominator = trAnalysis.getTREntryBlock(currTR);
265
266 std::queue<Block *> workQueue;
267 SmallPtrSet<Block *, 32> workDone;
268
269 if (entryBlock != exitingBlock)
270 workQueue.push(entryBlock);
271
272 while (!workQueue.empty()) {
273 Block *block = workQueue.front();
274 workQueue.pop();
275 workDone.insert(block);
276
277 builder.setInsertionPoint(moveBefore);
278 SmallVector<llhd::DrvOp> drives(block->getOps<llhd::DrvOp>());
279 for (auto drive : drives)
280 moveDriveOpBefore(drive, dominator, moveBefore, mem);
281
282 for (Block *succ : block->getSuccessors()) {
283 if (succ == exitingBlock ||
284 trAnalysis.getBlockTR(succ) != static_cast<int>(currTR))
285 continue;
286
287 if (llvm::all_of(succ->getPredecessors(), [&](Block *block) {
288 return workDone.contains(block);
289 }))
290 workQueue.push(succ);
291 }
292 }
293
294 // Merge entry and exit block of each TR, remove all other blocks
295 if (entryBlock != exitingBlock) {
296 entryBlock->getTerminator()->erase();
297 entryBlock->getOperations().splice(entryBlock->end(),
298 exitingBlock->getOperations());
299 }
300 }
301
302 IRRewriter rewriter(procOp);
303 (void)mlir::eraseUnreachableBlocks(rewriter, procOp->getRegions());
304
305 //===--------------------------------------------------------------------===//
306 // Coalesce multiple drives to the same signal
307 //===--------------------------------------------------------------------===//
308
309 trAnalysis = llhd::TemporalRegionAnalysis(procOp);
310 DominanceInfo dom(procOp);
311 for (unsigned currTR = 0; currTR < numTRs; ++currTR) {
312 // We ensured this in the previous phase above.
313 assert(trAnalysis.getExitingBlocksInTR(currTR).size() == 1);
314
315 Block *exitingBlock = trAnalysis.getExitingBlocksInTR(currTR)[0];
316 DenseMap<std::pair<Value, Value>, llhd::DrvOp> sigToDrv;
317
318 SmallVector<llhd::DrvOp> drives(exitingBlock->getOps<llhd::DrvOp>());
319 for (auto op : drives) {
320 auto sigTimePair = std::make_pair(op.getSignal(), op.getTime());
321 if (!sigToDrv.count(sigTimePair)) {
322 sigToDrv[sigTimePair] = op;
323 continue;
324 }
325
326 OpBuilder builder(op);
327 if (op.getEnable()) {
328 // Multiplex value to be driven
329 auto firstDrive = sigToDrv[sigTimePair];
330 Value muxValue = builder.create<comb::MuxOp>(
331 op.getLoc(), op.getEnable(), op.getValue(), firstDrive.getValue());
332 op.getValueMutable().assign(muxValue);
333
334 // Take the disjunction of the enable conditions
335 if (firstDrive.getEnable()) {
336 Value orVal = builder.create<comb::OrOp>(op.getLoc(), op.getEnable(),
337 firstDrive.getEnable());
338 op.getEnableMutable().assign(orVal);
339 } else {
340 // No enable is equivalent to a constant 'true' enable
341 op.getEnableMutable().clear();
342 }
343 }
344
345 sigToDrv[sigTimePair]->erase();
346 sigToDrv[sigTimePair] = op;
347 }
348 }
349
350 return success();
351}
assert(baseType &&"element must be base type")
static void moveDriveOpBefore(llhd::DrvOp drvOp, Block *dominator, Operation *moveBefore, DenseMap< Block *, Value > &mem)
More a 'llhd.drv' operation before the 'moveBefore' operation by adjusting the 'enable' operand.
static Value getBranchDecisionsFromDominatorToTarget(OpBuilder &builder, Block *driveBlock, Block *dominator, DenseMap< Block *, Value > &mem)
Explore all paths from the 'driveBlock' to the 'dominator' block and construct a boolean expression a...
create(data_type, value)
Definition hw.py:433
The InstanceGraph op interface, see InstanceGraphInterface.td for more details.
SmallVector< Block *, 8 > getExitingBlocksInTR(int) const
SmallVector< int, 8 > getTRSuccessors(int)