CIRCT  18.0.0git
CFToHandshake.cpp
Go to the documentation of this file.
1 //===- CFToHandshake.cpp - Convert standard MLIR into dataflow IR ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //===----------------------------------------------------------------------===//
7 //
8 // This is the main Standard to Handshake Conversion Pass Implementation.
9 //
10 //===----------------------------------------------------------------------===//
11 
13 #include "../PassDetail.h"
17 #include "mlir/Analysis/CFGLoopInfo.h"
18 #include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
19 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
20 #include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
21 #include "mlir/Dialect/Affine/IR/AffineOps.h"
22 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
23 #include "mlir/Dialect/Affine/Utils.h"
24 #include "mlir/Dialect/Arith/IR/Arith.h"
25 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
26 #include "mlir/Dialect/Func/IR/FuncOps.h"
27 #include "mlir/Dialect/MemRef/IR/MemRef.h"
28 #include "mlir/Dialect/SCF/IR/SCF.h"
29 #include "mlir/IR/Builders.h"
30 #include "mlir/IR/BuiltinOps.h"
31 #include "mlir/IR/Diagnostics.h"
32 #include "mlir/IR/Dominance.h"
33 #include "mlir/IR/OpImplementation.h"
34 #include "mlir/IR/PatternMatch.h"
35 #include "mlir/IR/Types.h"
36 #include "mlir/IR/Value.h"
37 #include "mlir/Pass/Pass.h"
38 #include "mlir/Support/LLVM.h"
39 #include "mlir/Transforms/DialectConversion.h"
40 #include "mlir/Transforms/Passes.h"
41 #include "llvm/ADT/SmallSet.h"
42 #include "llvm/ADT/TypeSwitch.h"
43 #include "llvm/Support/raw_ostream.h"
44 
45 #include <list>
46 #include <map>
47 
48 using namespace mlir;
49 using namespace mlir::func;
50 using namespace mlir::affine;
51 using namespace circt;
52 using namespace circt::handshake;
53 using namespace std;
54 
55 // ============================================================================
56 // Partial lowering infrastructure
57 // ============================================================================
58 
59 namespace {
60 template <typename TOp>
61 class LowerOpTarget : public ConversionTarget {
62 public:
63  explicit LowerOpTarget(MLIRContext &context) : ConversionTarget(context) {
64  loweredOps.clear();
65  addLegalDialect<HandshakeDialect>();
66  addLegalDialect<mlir::func::FuncDialect>();
67  addLegalDialect<mlir::arith::ArithDialect>();
68  addIllegalDialect<mlir::scf::SCFDialect>();
69  addIllegalDialect<AffineDialect>();
70 
71  /// The root operation to be replaced is marked dynamically legal
72  /// based on the lowering status of the given operation, see
73  /// PartialLowerOp.
74  addDynamicallyLegalOp<TOp>([&](const auto &op) { return loweredOps[op]; });
75  }
76  DenseMap<Operation *, bool> loweredOps;
77 };
78 
79 /// Default function for partial lowering of handshake::FuncOp. Lowering is
80 /// achieved by a provided partial lowering function.
81 ///
82 /// A partial lowering function may only replace a subset of the operations
83 /// within the funcOp currently being lowered. However, the dialect conversion
84 /// scheme requires the matched root operation to be replaced/updated, if the
85 /// match was successful. To facilitate this, rewriter.updateRootInPlace
86 /// wraps the partial update function.
87 /// Next, the function operation is expected to go from illegal to legalized,
88 /// after matchAndRewrite returned true. To work around this,
89 /// LowerFuncOpTarget::loweredFuncs is used to communicate between the target
90 /// and the conversion, to indicate that the partial lowering was completed.
91 template <typename TOp>
92 struct PartialLowerOp : public ConversionPattern {
93  using PartialLoweringFunc =
94  std::function<LogicalResult(TOp, ConversionPatternRewriter &)>;
95 
96 public:
97  PartialLowerOp(LowerOpTarget<TOp> &target, MLIRContext *context,
98  LogicalResult &loweringResRef, const PartialLoweringFunc &fun)
99  : ConversionPattern(TOp::getOperationName(), 1, context), target(target),
100  loweringRes(loweringResRef), fun(fun) {}
101  using ConversionPattern::ConversionPattern;
102  LogicalResult
103  matchAndRewrite(Operation *op, ArrayRef<Value> /*operands*/,
104  ConversionPatternRewriter &rewriter) const override {
105  assert(isa<TOp>(op));
106  rewriter.updateRootInPlace(
107  op, [&] { loweringRes = fun(dyn_cast<TOp>(op), rewriter); });
108  target.loweredOps[op] = true;
109  return loweringRes;
110  };
111 
112 private:
113  LowerOpTarget<TOp> &target;
114  LogicalResult &loweringRes;
115  // NOTE: this is basically the rewrite function
116  PartialLoweringFunc fun;
117 };
118 } // namespace
119 
120 // Convenience function for running lowerToHandshake with a partial
121 // handshake::FuncOp lowering function.
122 template <typename TOp>
123 static LogicalResult partiallyLowerOp(
124  const std::function<LogicalResult(TOp, ConversionPatternRewriter &)>
125  &loweringFunc,
126  MLIRContext *ctx, TOp op) {
127 
128  RewritePatternSet patterns(ctx);
129  auto target = LowerOpTarget<TOp>(*ctx);
130  LogicalResult partialLoweringSuccessfull = success();
131  patterns.add<PartialLowerOp<TOp>>(target, ctx, partialLoweringSuccessfull,
132  loweringFunc);
133  return success(
134  applyPartialConversion(op, target, std::move(patterns)).succeeded() &&
135  partialLoweringSuccessfull.succeeded());
136 }
137 
138 class LowerRegionTarget : public ConversionTarget {
139 public:
140  explicit LowerRegionTarget(MLIRContext &context, Region &region)
141  : ConversionTarget(context), region(region) {
142  // The root operation is marked dynamically legal to ensure
143  // the pattern on its region is only applied once.
144  markUnknownOpDynamicallyLegal([&](Operation *op) {
145  if (op != region.getParentOp())
146  return true;
147  return opLowered;
148  });
149  }
150  bool opLowered = false;
151  Region &region;
152 };
153 
154 /// Allows to partially lower a region by matching on the parent operation to
155 /// then call the provided partial lowering function with the region and the
156 /// rewriter.
157 ///
158 /// The interplay with the target is similar to PartialLowerOp
159 struct PartialLowerRegion : public ConversionPattern {
161  std::function<LogicalResult(Region &, ConversionPatternRewriter &)>;
162 
163 public:
164  PartialLowerRegion(LowerRegionTarget &target, MLIRContext *context,
165  LogicalResult &loweringResRef,
166  const PartialLoweringFunc &fun)
167  : ConversionPattern(target.region.getParentOp()->getName().getStringRef(),
168  1, context),
169  target(target), loweringRes(loweringResRef), fun(fun) {}
170  using ConversionPattern::ConversionPattern;
171  LogicalResult
172  matchAndRewrite(Operation *op, ArrayRef<Value> /*operands*/,
173  ConversionPatternRewriter &rewriter) const override {
174  rewriter.updateRootInPlace(
175  op, [&] { loweringRes = fun(target.region, rewriter); });
176 
177  target.opLowered = true;
178  return loweringRes;
179  };
180 
181 private:
183  LogicalResult &loweringRes;
185 };
186 
187 LogicalResult
189  MLIRContext *ctx, Region &r) {
190 
191  Operation *op = r.getParentOp();
192  RewritePatternSet patterns(ctx);
193  auto target = LowerRegionTarget(*ctx, r);
194  LogicalResult partialLoweringSuccessfull = success();
195  patterns.add<PartialLowerRegion>(target, ctx, partialLoweringSuccessfull,
196  loweringFunc);
197  return success(
198  applyPartialConversion(op, target, std::move(patterns)).succeeded() &&
199  partialLoweringSuccessfull.succeeded());
200 }
201 
202 #define returnOnError(logicalResult) \
203  if (failed(logicalResult)) \
204  return failure();
205 
206 // ============================================================================
207 // Start of lowering passes
208 // ============================================================================
209 
210 Value HandshakeLowering::getBlockEntryControl(Block *block) const {
211  auto it = blockEntryControlMap.find(block);
212  assert(it != blockEntryControlMap.end() &&
213  "No block entry control value registerred for this block!");
214  return it->second;
215 }
216 
217 void HandshakeLowering::setBlockEntryControl(Block *block, Value v) {
218  blockEntryControlMap[block] = v;
219 }
220 
222  Block *entryBlock = &r.front();
223  auto &entryBlockOps = entryBlock->getOperations();
224 
225  // Move all operations to entry block and erase other blocks.
226  for (Block &block : llvm::make_early_inc_range(llvm::drop_begin(r, 1))) {
227  entryBlockOps.splice(entryBlockOps.end(), block.getOperations());
228 
229  block.clear();
230  block.dropAllDefinedValueUses();
231  for (size_t i = 0; i < block.getNumArguments(); i++) {
232  block.eraseArgument(i);
233  }
234  block.erase();
235  }
236 
237  // Remove any control flow operations, and move the non-control flow
238  // terminator op to the end of the entry block.
239  for (Operation &terminatorLike : llvm::make_early_inc_range(*entryBlock)) {
240  if (!terminatorLike.hasTrait<OpTrait::IsTerminator>())
241  continue;
242 
243  if (isa<mlir::cf::CondBranchOp, mlir::cf::BranchOp>(terminatorLike)) {
244  terminatorLike.erase();
245  continue;
246  }
247 
248  // Else, assume that this is a return-like terminator op.
249  terminatorLike.moveBefore(entryBlock, entryBlock->end());
250  }
251 }
252 
253 void removeBasicBlocks(handshake::FuncOp funcOp) {
254  if (funcOp.isExternal())
255  return; // nothing to do, external funcOp.
256 
257  removeBasicBlocks(funcOp.getBody());
258 }
259 
260 static LogicalResult isValidMemrefType(Location loc, mlir::MemRefType type) {
261  if (type.getNumDynamicDims() != 0 || type.getShape().size() != 1)
262  return emitError(loc) << "memref's must be both statically sized and "
263  "unidimensional.";
264  return success();
265 }
266 
267 static unsigned getBlockPredecessorCount(Block *block) {
268  // Returns number of block predecessors
269  auto predecessors = block->getPredecessors();
270  return std::distance(predecessors.begin(), predecessors.end());
271 }
272 
273 // Insert appropriate type of Merge CMerge for control-only path,
274 // Merge for single-successor blocks, Mux otherwise
276 HandshakeLowering::insertMerge(Block *block, Value val,
277  BackedgeBuilder &edgeBuilder,
278  ConversionPatternRewriter &rewriter) {
279  unsigned numPredecessors = getBlockPredecessorCount(block);
280  auto insertLoc = block->front().getLoc();
281  SmallVector<Backedge> dataEdges;
282  SmallVector<Value> operands;
283 
284  // Every block (except the entry block) needs to feed it's entry control into
285  // a control merge
286  if (val == getBlockEntryControl(block)) {
287 
288  Operation *mergeOp;
289  if (block == &r.front()) {
290  // For consistency within the entry block, replace the latter's entry
291  // control with the output of a MergeOp that takes the control-only
292  // network's start point as input. This makes it so that only the
293  // MergeOp's output is used as a control within the entry block, instead
294  // of a combination of the MergeOp's output and the function/block control
295  // argument. Taking this step out should have no impact on functionality
296  // but would make the resulting IR less "regular"
297  operands.push_back(val);
298  mergeOp = rewriter.create<handshake::MergeOp>(insertLoc, operands);
299  } else {
300  for (unsigned i = 0; i < numPredecessors; i++) {
301  auto edge = edgeBuilder.get(rewriter.getNoneType());
302  dataEdges.push_back(edge);
303  operands.push_back(Value(edge));
304  }
305  mergeOp = rewriter.create<handshake::ControlMergeOp>(insertLoc, operands);
306  }
307  setBlockEntryControl(block, mergeOp->getResult(0));
308  return MergeOpInfo{mergeOp, val, dataEdges};
309  }
310 
311  // Every live-in value to a block is passed through a merge-like operation,
312  // even when it's not required for circuit correctness (useless merge-like
313  // operations are removed down the line during handshake canonicalization)
314 
315  // Insert "dummy" MergeOp's for blocks with less than two predecessors
316  if (numPredecessors <= 1) {
317  if (numPredecessors == 0) {
318  // All of the entry block's block arguments get passed through a dummy
319  // MergeOp. There is no need for a backedge here as the unique operand can
320  // be resolved immediately
321  operands.push_back(val);
322  } else {
323  // The value incoming from the single block predecessor will be resolved
324  // later during merge reconnection
325  auto edge = edgeBuilder.get(val.getType());
326  dataEdges.push_back(edge);
327  operands.push_back(Value(edge));
328  }
329  auto merge = rewriter.create<handshake::MergeOp>(insertLoc, operands);
330  return MergeOpInfo{merge, val, dataEdges};
331  }
332 
333  // Create a backedge for the index operand, and another one for each data
334  // operand. The index operand will eventually resolve to the current block's
335  // control merge index output, while data operands will resolve to their
336  // respective values from each block predecessor
337  Backedge indexEdge = edgeBuilder.get(rewriter.getIndexType());
338  for (unsigned i = 0; i < numPredecessors; i++) {
339  auto edge = edgeBuilder.get(val.getType());
340  dataEdges.push_back(edge);
341  operands.push_back(Value(edge));
342  }
343  auto mux =
344  rewriter.create<handshake::MuxOp>(insertLoc, Value(indexEdge), operands);
345  return MergeOpInfo{mux, val, dataEdges, indexEdge};
346 }
347 
349 HandshakeLowering::insertMergeOps(HandshakeLowering::ValueMap &mergePairs,
350  BackedgeBuilder &edgeBuilder,
351  ConversionPatternRewriter &rewriter) {
352  HandshakeLowering::BlockOps blockMerges;
353  for (Block &block : r) {
354  rewriter.setInsertionPointToStart(&block);
355 
356  // All of the block's live-ins are passed explictly through block arguments
357  // thanks to prior SSA maximization
358  for (auto &arg : block.getArguments()) {
359  // No merges on memref block arguments; these are handled separately
360  if (arg.getType().isa<mlir::MemRefType>())
361  continue;
362 
363  auto mergeInfo = insertMerge(&block, arg, edgeBuilder, rewriter);
364  blockMerges[&block].push_back(mergeInfo);
365  mergePairs[arg] = mergeInfo.op->getResult(0);
366  }
367  }
368  return blockMerges;
369 }
370 
371 // Get value from predBlock which will be set as operand of op (merge)
373  Block *predBlock) {
374  // The input value to the merge operations
375  Value srcVal = mergeInfo.val;
376  // The block the merge operation belongs to
377  Block *block = mergeInfo.op->getBlock();
378 
379  // The block terminator is either a cf-level branch or cf-level conditional
380  // branch. In either case, identify the value passed to the block using its
381  // index in the list of block arguments
382  unsigned index = srcVal.cast<BlockArgument>().getArgNumber();
383  Operation *termOp = predBlock->getTerminator();
384  if (mlir::cf::CondBranchOp br = dyn_cast<mlir::cf::CondBranchOp>(termOp)) {
385  // Block should be one of the two destinations of the conditional branch
386  if (block == br.getTrueDest())
387  return br.getTrueOperand(index);
388  assert(block == br.getFalseDest());
389  return br.getFalseOperand(index);
390  }
391  if (isa<mlir::cf::BranchOp>(termOp))
392  return termOp->getOperand(index);
393  return nullptr;
394 }
395 
396 static void removeBlockOperands(Region &f) {
397  // Remove all block arguments, they are no longer used
398  // eraseArguments also removes corresponding branch operands
399  for (Block &block : f) {
400  if (!block.isEntryBlock()) {
401  int x = block.getNumArguments() - 1;
402  for (int i = x; i >= 0; --i)
403  block.eraseArgument(i);
404  }
405  }
406 }
407 
408 /// Returns the first occurance of an operation of type TOp, else, returns
409 /// null op.
410 template <typename TOp>
411 static Operation *getFirstOp(Block *block) {
412  auto ops = block->getOps<TOp>();
413  if (ops.empty())
414  return nullptr;
415  return *ops.begin();
416 }
417 
418 static Operation *getControlMerge(Block *block) {
419  return getFirstOp<ControlMergeOp>(block);
420 }
421 
422 static ConditionalBranchOp getControlCondBranch(Block *block) {
423  for (auto cbranch : block->getOps<handshake::ConditionalBranchOp>()) {
424  if (cbranch.isControl())
425  return cbranch;
426  }
427  return nullptr;
428 }
429 
430 static void reconnectMergeOps(Region &r,
431  HandshakeLowering::BlockOps blockMerges,
432  HandshakeLowering::ValueMap &mergePairs) {
433  // At this point all merge-like operations have backedges as operands.
434  // We here replace all backedge values with appropriate value from
435  // predecessor block. The predecessor can either be a merge, the original
436  // defining value, or a branch operand.
437 
438  for (Block &block : r) {
439  for (auto &mergeInfo : blockMerges[&block]) {
440  int operandIdx = 0;
441  // Set appropriate operand from each predecessor block
442  for (auto *predBlock : block.getPredecessors()) {
443  Value mgOperand = getMergeOperand(mergeInfo, predBlock);
444  assert(mgOperand != nullptr);
445  if (!mgOperand.getDefiningOp()) {
446  assert(mergePairs.count(mgOperand));
447  mgOperand = mergePairs[mgOperand];
448  }
449  mergeInfo.dataEdges[operandIdx].setValue(mgOperand);
450  operandIdx++;
451  }
452 
453  // Reconnect all operands originating from livein defining value through
454  // corresponding merge of that block
455  for (Operation &opp : block)
456  if (!isa<MergeLikeOpInterface>(opp))
457  opp.replaceUsesOfWith(mergeInfo.val, mergeInfo.op->getResult(0));
458  }
459  }
460 
461  // Connect select operand of muxes to control merge's index result in all
462  // blocks with more than one predecessor
463  for (Block &block : r) {
464  if (getBlockPredecessorCount(&block) > 1) {
465  Operation *cntrlMg = getControlMerge(&block);
466  assert(cntrlMg != nullptr);
467 
468  for (auto &mergeInfo : blockMerges[&block]) {
469  if (mergeInfo.op != cntrlMg) {
470  // If the block has multiple predecessors, merge-like operation that
471  // are not the block's control merge must have an index operand (at
472  // this point, an index backedge)
473  assert(mergeInfo.indexEdge.has_value());
474  (*mergeInfo.indexEdge).setValue(cntrlMg->getResult(1));
475  }
476  }
477  }
478  }
479 
481 }
482 
483 static bool isAllocOp(Operation *op) {
484  return isa<memref::AllocOp, memref::AllocaOp>(op);
485 }
486 
487 LogicalResult
488 HandshakeLowering::addMergeOps(ConversionPatternRewriter &rewriter) {
489 
490  // Stores mapping from each value that pass through a merge operation to the
491  // first result of that merge operation
492  ValueMap mergePairs;
493 
494  // Create backedge builder to manage operands of merge operations between
495  // insertion and reconnection
496  BackedgeBuilder edgeBuilder{rewriter, r.front().front().getLoc()};
497 
498  // Insert merge operations (with backedges instead of actual operands)
499  BlockOps mergeOps = insertMergeOps(mergePairs, edgeBuilder, rewriter);
500 
501  // Reconnect merge operations with values incoming from predecessor blocks
502  // and resolve all backedges that were created during merge insertion
503  reconnectMergeOps(r, mergeOps, mergePairs);
504  return success();
505 }
506 
507 static bool isLiveOut(Value val) {
508  // Identifies liveout values after adding Merges
509  for (auto &u : val.getUses())
510  // Result is liveout if used by some Merge block
511  if (isa<MergeLikeOpInterface>(u.getOwner()))
512  return true;
513  return false;
514 }
515 
516 // A value can have multiple branches in a single successor block
517 // (for instance, there can be an SSA phi and a merge that we insert)
518 // This function determines the number of branches to insert based on the
519 // value uses in successor blocks
520 static int getBranchCount(Value val, Block *block) {
521  int uses = 0;
522  for (int i = 0, e = block->getNumSuccessors(); i < e; ++i) {
523  int curr = 0;
524  Block *succ = block->getSuccessor(i);
525  for (auto &u : val.getUses()) {
526  if (u.getOwner()->getBlock() == succ)
527  curr++;
528  }
529  uses = (curr > uses) ? curr : uses;
530  }
531  return uses;
532 }
533 
534 namespace {
535 
536 /// This class inserts a reorder prevention mechanism for blocks with multiple
537 /// successors. Such a mechanism is required to guarantee correct execution in a
538 /// multi-threaded usage of the circuits.
539 ///
540 /// The order of the results matches the order of the traversals of the
541 /// divergence point. A FIFO buffer stores the condition of the conditional
542 /// branch. The buffer feeds a mux that guarantees the correct out-order.
543 class FeedForwardNetworkRewriter {
544 public:
545  FeedForwardNetworkRewriter(HandshakeLowering &hl,
546  ConversionPatternRewriter &rewriter)
547  : hl(hl), rewriter(rewriter), postDomInfo(hl.getRegion().getParentOp()),
548  domInfo(hl.getRegion().getParentOp()),
549  loopInfo(domInfo.getDomTree(&hl.getRegion())) {}
550  LogicalResult apply();
551 
552 private:
553  HandshakeLowering &hl;
554  ConversionPatternRewriter &rewriter;
555  PostDominanceInfo postDomInfo;
556  DominanceInfo domInfo;
557  CFGLoopInfo loopInfo;
558 
559  using BlockPair = std::pair<Block *, Block *>;
560  using BlockPairs = SmallVector<BlockPair>;
561  LogicalResult findBlockPairs(BlockPairs &blockPairs);
562 
563  BufferOp buildSplitNetwork(Block *splitBlock,
564  handshake::ConditionalBranchOp &ctrlBr);
565  LogicalResult buildMergeNetwork(Block *mergeBlock, BufferOp buf,
566  handshake::ConditionalBranchOp &ctrlBr);
567 
568  // Determines if the cmerge inpus match the cond_br output order.
569  bool requiresOperandFlip(ControlMergeOp &ctrlMerge,
570  handshake::ConditionalBranchOp &ctrlBr);
571  bool formsIrreducibleCF(Block *splitBlock, Block *mergeBlock);
572 };
573 } // namespace
574 
575 LogicalResult
576 HandshakeLowering::feedForwardRewriting(ConversionPatternRewriter &rewriter) {
577  // Nothing to do on a single block region.
578  if (this->getRegion().hasOneBlock())
579  return success();
580  return FeedForwardNetworkRewriter(*this, rewriter).apply();
581 }
582 
583 [[maybe_unused]] static bool loopsHaveSingleExit(CFGLoopInfo &loopInfo) {
584  for (CFGLoop *loop : loopInfo.getTopLevelLoops())
585  if (!loop->getExitBlock())
586  return false;
587  return true;
588 }
589 
590 bool FeedForwardNetworkRewriter::formsIrreducibleCF(Block *splitBlock,
591  Block *mergeBlock) {
592  CFGLoop *loop = loopInfo.getLoopFor(mergeBlock);
593  for (auto *mergePred : mergeBlock->getPredecessors()) {
594  // Skip loop predecessors
595  if (loop && loop->contains(mergePred))
596  continue;
597 
598  // A DAG-CFG is irreducible, iff a merge block has a predecessor that can be
599  // reached from both successors of a split node, e.g., neither is a
600  // dominator.
601  // => Their control flow can merge in other places, which makes this
602  // irreducible.
603  if (llvm::none_of(splitBlock->getSuccessors(), [&](Block *splitSucc) {
604  if (splitSucc == mergeBlock || mergePred == splitBlock)
605  return true;
606  return domInfo.dominates(splitSucc, mergePred);
607  }))
608  return true;
609  }
610  return false;
611 }
612 
613 static Operation *findBranchToBlock(Block *block) {
614  Block *pred = *block->getPredecessors().begin();
615  return pred->getTerminator();
616 }
617 
618 LogicalResult
619 FeedForwardNetworkRewriter::findBlockPairs(BlockPairs &blockPairs) {
620  // assumes that merge block insertion happended beforehand
621  // Thus, for each split block, there exists one merge block which is the post
622  // dominator of the child nodes.
623  Region &r = hl.getRegion();
624  Operation *parentOp = r.getParentOp();
625 
626  // Assumes that each loop has only one exit block. Such an error should
627  // already be reported by the loop rewriting.
628  assert(loopsHaveSingleExit(loopInfo) &&
629  "expected loop to only have one exit block.");
630 
631  for (Block &b : r) {
632  if (b.getNumSuccessors() < 2)
633  continue;
634 
635  // Loop headers cannot be merge blocks.
636  if (loopInfo.getLoopFor(&b))
637  continue;
638 
639  assert(b.getNumSuccessors() == 2);
640  Block *succ0 = b.getSuccessor(0);
641  Block *succ1 = b.getSuccessor(1);
642 
643  if (succ0 == succ1)
644  continue;
645 
646  Block *mergeBlock = postDomInfo.findNearestCommonDominator(succ0, succ1);
647 
648  // Precondition checks
649  if (formsIrreducibleCF(&b, mergeBlock)) {
650  return parentOp->emitError("expected only reducible control flow.")
651  .attachNote(findBranchToBlock(mergeBlock)->getLoc())
652  << "This branch is involved in the irreducible control flow";
653  }
654 
655  unsigned nonLoopPreds = 0;
656  CFGLoop *loop = loopInfo.getLoopFor(mergeBlock);
657  for (auto *pred : mergeBlock->getPredecessors()) {
658  if (loop && loop->contains(pred))
659  continue;
660  nonLoopPreds++;
661  }
662  if (nonLoopPreds > 2)
663  return parentOp
664  ->emitError("expected a merge block to have two predecessors. "
665  "Did you run the merge block insertion pass?")
666  .attachNote(findBranchToBlock(mergeBlock)->getLoc())
667  << "This branch jumps to the illegal block";
668 
669  blockPairs.emplace_back(&b, mergeBlock);
670  }
671 
672  return success();
673 }
674 
675 LogicalResult FeedForwardNetworkRewriter::apply() {
676  BlockPairs pairs;
677 
678  if (failed(findBlockPairs(pairs)))
679  return failure();
680 
681  for (auto [splitBlock, mergeBlock] : pairs) {
682  handshake::ConditionalBranchOp ctrlBr;
683  BufferOp buffer = buildSplitNetwork(splitBlock, ctrlBr);
684  if (failed(buildMergeNetwork(mergeBlock, buffer, ctrlBr)))
685  return failure();
686  }
687 
688  return success();
689 }
690 
691 BufferOp FeedForwardNetworkRewriter::buildSplitNetwork(
692  Block *splitBlock, handshake::ConditionalBranchOp &ctrlBr) {
693  SmallVector<handshake::ConditionalBranchOp> branches;
694  llvm::copy(splitBlock->getOps<handshake::ConditionalBranchOp>(),
695  std::back_inserter(branches));
696 
697  auto *findRes = llvm::find_if(branches, [](auto br) {
698  return br.getDataOperand().getType().template isa<NoneType>();
699  });
700 
701  assert(findRes && "expected one branch for the ctrl signal");
702  ctrlBr = *findRes;
703 
704  Value cond = ctrlBr.getConditionOperand();
705  assert(llvm::all_of(branches, [&](auto branch) {
706  return branch.getConditionOperand() == cond;
707  }));
708 
709  Location loc = cond.getLoc();
710  rewriter.setInsertionPointAfterValue(cond);
711 
712  // The buffer size defines the number of threads that can be concurently
713  // traversing the sub-CFG starting at the splitBlock.
714  size_t bufferSize = 2;
715  // TODO how to size these?
716  // Longest path in a CFG-DAG would be O(#blocks)
717 
718  return rewriter.create<handshake::BufferOp>(loc, cond, bufferSize,
719  BufferTypeEnum::fifo);
720 }
721 
722 LogicalResult FeedForwardNetworkRewriter::buildMergeNetwork(
723  Block *mergeBlock, BufferOp buf, handshake::ConditionalBranchOp &ctrlBr) {
724  // Replace control merge with mux
725  auto ctrlMerges = mergeBlock->getOps<handshake::ControlMergeOp>();
726  assert(std::distance(ctrlMerges.begin(), ctrlMerges.end()) == 1);
727 
728  handshake::ControlMergeOp ctrlMerge = *ctrlMerges.begin();
729  // This input might contain irreducible loops that we cannot handle.
730  if (ctrlMerge.getNumOperands() != 2)
731  return ctrlMerge.emitError("expected cmerges to have two operands");
732  rewriter.setInsertionPointAfter(ctrlMerge);
733  Location loc = ctrlMerge->getLoc();
734 
735  // The newly inserted mux has to select the results from the correct operand.
736  // As there is no guarantee on the order of cmerge inputs, the correct order
737  // has to be determined first.
738  bool requiresFlip = requiresOperandFlip(ctrlMerge, ctrlBr);
739  SmallVector<Value> muxOperands;
740  if (requiresFlip)
741  muxOperands = llvm::to_vector(llvm::reverse(ctrlMerge.getOperands()));
742  else
743  muxOperands = llvm::to_vector(ctrlMerge.getOperands());
744 
745  Value newCtrl = rewriter.create<handshake::MuxOp>(loc, buf, muxOperands);
746 
747  Value cond = buf.getResult();
748  if (requiresFlip) {
749  // As the mux operand order is the flipped cmerge input order, the index
750  // which replaces the output of the cmerge has to be flipped/negated as
751  // well.
752  cond = rewriter.create<arith::XOrIOp>(
753  loc, cond.getType(), cond,
754  rewriter.create<arith::ConstantOp>(
755  loc, rewriter.getIntegerAttr(rewriter.getI1Type(), 1)));
756  }
757 
758  // Require a cast to index to stick to the type of the mux input.
759  Value condAsIndex =
760  rewriter.create<arith::IndexCastOp>(loc, rewriter.getIndexType(), cond);
761 
762  hl.setBlockEntryControl(mergeBlock, newCtrl);
763 
764  // Replace with new ctrl value from mux and the index
765  rewriter.replaceOp(ctrlMerge, {newCtrl, condAsIndex});
766  return success();
767 }
768 
769 bool FeedForwardNetworkRewriter::requiresOperandFlip(
770  ControlMergeOp &ctrlMerge, handshake::ConditionalBranchOp &ctrlBr) {
771  assert(ctrlMerge.getNumOperands() == 2 &&
772  "Loops should already have been handled");
773 
774  Value fstOperand = ctrlMerge.getOperand(0);
775 
776  assert(ctrlBr.getTrueResult().hasOneUse() &&
777  "expected the result of a branch to only have one user");
778  Operation *trueUser = *ctrlBr.getTrueResult().user_begin();
779  if (trueUser == ctrlBr)
780  // The cmerge directly consumes the cond_br output.
781  return ctrlBr.getTrueResult() == fstOperand;
782 
783  // The cmerge is consumed in an intermediate block. Find out if this block is
784  // a predecessor of the "true" successor of the cmerge.
785  Block *trueBlock = trueUser->getBlock();
786  return domInfo.dominates(trueBlock, fstOperand.getDefiningOp()->getBlock());
787 }
788 
789 namespace {
790 // This function creates the loop 'continue' and 'exit' network around backedges
791 // in the CFG.
792 // We don't have a standard dialect based LoopInfo utility in MLIR
793 // (which could probably deduce most of the information that we need for this
794 // transformation), so we roll our own loop-detection analysis. This is
795 // simplified by the need to only detect outermost loops. Inner loops are
796 // not included in the loop network (since we only care about restricting
797 // different function invocations from activating a loop, not prevent loop
798 // pipelining within a single function invocation).
799 class LoopNetworkRewriter {
800 public:
801  LoopNetworkRewriter(HandshakeLowering &hl) : hl(hl) {}
802 
803  LogicalResult processRegion(Region &r, ConversionPatternRewriter &rewriter);
804 
805 private:
806  // An exit pair is a pair of <in loop block, outside loop block> that
807  // indicates where control leaves a loop.
808  using ExitPair = std::pair<Block *, Block *>;
809  LogicalResult processOuterLoop(Location loc, CFGLoop *loop);
810 
811  // Builds the loop continue network in between the loop header and its loop
812  // latch. The loop continuation network will replace the existing control
813  // merge in the loop header with a mux + loop priming register.
814  // The 'loopPrimingInput' is a backedge that will later be assigned by
815  // 'buildExitNetwork'. The value is to be used as the input to the loop
816  // priming buffer.
817  // Returns a reference to the loop priming register.
818  BufferOp buildContinueNetwork(Block *loopHeader, Block *loopLatch,
819  Backedge &loopPrimingInput);
820 
821  // Builds the loop exit network. This detects the conditional operands used in
822  // each of the exit blocks, matches their parity with the convention used to
823  // prime the loop register, and assigns it to the loop priming register input.
824  void buildExitNetwork(Block *loopHeader,
825  const llvm::SmallSet<ExitPair, 2> &exitPairs,
826  BufferOp loopPrimingRegister,
827  Backedge &loopPrimingInput);
828 
829 private:
830  ConversionPatternRewriter *rewriter = nullptr;
831  HandshakeLowering &hl;
832 };
833 } // namespace
834 
835 LogicalResult
836 HandshakeLowering::loopNetworkRewriting(ConversionPatternRewriter &rewriter) {
837  return LoopNetworkRewriter(*this).processRegion(r, rewriter);
838 }
839 
840 LogicalResult
841 LoopNetworkRewriter::processRegion(Region &r,
842  ConversionPatternRewriter &rewriter) {
843  // Nothing to do on a single block region.
844  if (r.hasOneBlock())
845  return success();
846  this->rewriter = &rewriter;
847 
848  Operation *op = r.getParentOp();
849 
850  DominanceInfo domInfo(op);
851  CFGLoopInfo loopInfo(domInfo.getDomTree(&r));
852 
853  for (CFGLoop *loop : loopInfo.getTopLevelLoops()) {
854  if (!loop->getLoopLatch())
855  return emitError(op->getLoc()) << "Multiple loop latches detected "
856  "(backedges from within the loop "
857  "to the loop header). Loop task "
858  "pipelining is only supported for "
859  "loops with unified loop latches.";
860 
861  // This is the start of an outer loop - go process!
862  if (failed(processOuterLoop(op->getLoc(), loop)))
863  return failure();
864  }
865 
866  return success();
867 }
868 
869 // Returns the operand of the 'mux' operation which originated from 'block'.
870 static Value getOperandFromBlock(MuxOp mux, Block *block) {
871  auto inValueIt = llvm::find_if(mux.getDataOperands(), [&](Value operand) {
872  return block == operand.getParentBlock();
873  });
874  assert(
875  inValueIt != mux.getOperands().end() &&
876  "Expected mux to have an operand originating from the requested block.");
877  return *inValueIt;
878 }
879 
880 // Returns a list of operands from 'mux' which corresponds to the inputs of the
881 // 'cmerge' operation. The results are sorted such that the i'th cmerge operand
882 // and the i'th sorted operand originate from the same block.
883 static std::vector<Value> getSortedInputs(ControlMergeOp cmerge, MuxOp mux) {
884  std::vector<Value> sortedOperands;
885  for (auto in : cmerge.getOperands()) {
886  auto *srcBlock = in.getParentBlock();
887  sortedOperands.push_back(getOperandFromBlock(mux, srcBlock));
888  }
889 
890  // Sanity check: ensure that operands are unique
891  for (unsigned i = 0; i < sortedOperands.size(); ++i) {
892  for (unsigned j = 0; j < sortedOperands.size(); ++j) {
893  if (i == j)
894  continue;
895  assert(sortedOperands[i] != sortedOperands[j] &&
896  "Cannot have an identical operand from two different blocks!");
897  }
898  }
899 
900  return sortedOperands;
901 }
902 
903 BufferOp LoopNetworkRewriter::buildContinueNetwork(Block *loopHeader,
904  Block *loopLatch,
905  Backedge &loopPrimingInput) {
906  // Gather the muxes to replace before modifying block internals; it's been
907  // found that if this is not done, we have determinism issues wrt. generating
908  // the same order of replaced muxes on repeated runs of an identical
909  // conversion.
910  llvm::SmallVector<MuxOp> muxesToReplace;
911  llvm::copy(loopHeader->getOps<MuxOp>(), std::back_inserter(muxesToReplace));
912 
913  // Fetch the control merge of the block; it is assumed that, at this point of
914  // lowering, no other form of control can be used for the loop header block
915  // than a control merge.
916  auto *cmerge = getControlMerge(loopHeader);
917  assert(hl.getBlockEntryControl(loopHeader) == cmerge->getResult(0) &&
918  "Expected control merge to be the control component of a loop header");
919  auto loc = cmerge->getLoc();
920 
921  // sanity check: cmerge should have >1 input to actually be a loop
922  assert(cmerge->getNumOperands() > 1 && "This cannot be a loop header");
923 
924  // Partition the control merge inputs into those originating from backedges,
925  // and those originating elsewhere.
926  SmallVector<Value> externalCtrls, loopCtrls;
927  for (auto cval : cmerge->getOperands()) {
928  if (cval.getParentBlock() == loopLatch)
929  loopCtrls.push_back(cval);
930  else
931  externalCtrls.push_back(cval);
932  }
933  assert(loopCtrls.size() == 1 &&
934  "Expected a single loop control value to match the single loop latch");
935  Value loopCtrl = loopCtrls.front();
936 
937  // Merge all of the controls in each partition
938  rewriter->setInsertionPointToStart(loopHeader);
939  auto externalCtrlMerge = rewriter->create<ControlMergeOp>(loc, externalCtrls);
940 
941  // Create loop mux and the loop priming register. The loop mux will on select
942  // "0" select external control, and internal control at "1". This convention
943  // must be followed by the loop exit network.
944  auto primingRegister =
945  rewriter->create<BufferOp>(loc, loopPrimingInput, 1, BufferTypeEnum::seq);
946  // Initialize the priming register to path 0.
947  primingRegister->setAttr("initValues", rewriter->getI64ArrayAttr({0}));
948 
949  // The loop control mux will deterministically select between control entering
950  // the loop from any external block or the single loop backedge.
951  auto loopCtrlMux = rewriter->create<MuxOp>(
952  loc, primingRegister.getResult(),
953  llvm::SmallVector<Value>{externalCtrlMerge.getResult(), loopCtrl});
954 
955  // Replace the existing control merge 'result' output with the loop control
956  // mux.
957  cmerge->getResult(0).replaceAllUsesWith(loopCtrlMux.getResult());
958 
959  // Register the new block entry control value
960  hl.setBlockEntryControl(loopHeader, loopCtrlMux.getResult());
961 
962  // Next, we need to consider how to replace the control merge 'index' output,
963  // used to drive input selection to the block.
964 
965  // Inputs to the loop header will be sourced from muxes with inputs from both
966  // the loop latch as well as external blocks. Iterate through these and sort
967  // based on the input ordering to the external/internal control merge.
968  // We do this by maintaining a mapping between the external and loop data
969  // inputs for each data mux in the design. The key of these maps is the
970  // original mux (that is to be replaced).
971  DenseMap<MuxOp, std::vector<Value>> externalDataInputs;
972  DenseMap<MuxOp, Value> loopDataInputs;
973  for (auto muxOp : muxesToReplace) {
974  if (muxOp == loopCtrlMux)
975  continue;
976 
977  externalDataInputs[muxOp] = getSortedInputs(externalCtrlMerge, muxOp);
978  loopDataInputs[muxOp] = getOperandFromBlock(muxOp, loopLatch);
979  assert(/*loop latch input*/ 1 + externalDataInputs[muxOp].size() ==
980  muxOp.getDataOperands().size() &&
981  "Expected all mux operands to be partitioned between loop and "
982  "external data inputs");
983  }
984 
985  // With this, we now replace each of the data input muxes in the loop header.
986  // We instantiate a single mux driven by the external control merge.
987  // This, as well as the corresponding data input coming from within the single
988  // loop latch, will then be selected between by a 3rd mux, based on the
989  // priming register.
990  for (MuxOp mux : muxesToReplace) {
991  auto externalDataMux = rewriter->create<MuxOp>(
992  loc, externalCtrlMerge.getIndex(), externalDataInputs[mux]);
993 
994  rewriter->replaceOp(
995  mux, rewriter
996  ->create<MuxOp>(loc, primingRegister,
997  llvm::SmallVector<Value>{externalDataMux,
998  loopDataInputs[mux]})
999  .getResult());
1000  }
1001 
1002  // Now all values defined by the original cmerge should have been replaced,
1003  // and it may be erased.
1004  rewriter->eraseOp(cmerge);
1005 
1006  // Return the priming register to be referenced by the exit network builder.
1007  return primingRegister;
1008 }
1009 
1010 void LoopNetworkRewriter::buildExitNetwork(
1011  Block *loopHeader, const llvm::SmallSet<ExitPair, 2> &exitPairs,
1012  BufferOp loopPrimingRegister, Backedge &loopPrimingInput) {
1013  auto loc = loopPrimingRegister.getLoc();
1014 
1015  // Iterate over the exit pairs to gather up the condition signals that need to
1016  // be connected to the exit network. In doing so, we parity-correct these
1017  // condition values based on the convention used in buildContinueNetwork - The
1018  // loop mux will on select "0" select external control, and internal control
1019  // at "1". This convention which must be followed by the loop exit network.
1020  // External control must be selected when exiting the loop (to reprime the
1021  // register).
1022  SmallVector<Value> parityCorrectedConds;
1023  for (auto &[condBlock, exitBlock] : exitPairs) {
1024  auto condBr = getControlCondBranch(condBlock);
1025  assert(
1026  condBr &&
1027  "Expected a conditional control branch op in the loop condition block");
1028  Operation *trueUser = *condBr.getTrueResult().getUsers().begin();
1029  bool isTrueParity = trueUser->getBlock() == exitBlock;
1030  assert(isTrueParity ^
1031  ((*condBr.getFalseResult().getUsers().begin())->getBlock() ==
1032  exitBlock) &&
1033  "The user of either the true or the false result should be in the "
1034  "exit block");
1035 
1036  Value condValue = condBr.getConditionOperand();
1037  if (isTrueParity) {
1038  // This goes against the convention, and we have to invert the condition
1039  // value before connecting it to the exit network.
1040  rewriter->setInsertionPoint(condBr);
1041  condValue = rewriter->create<arith::XOrIOp>(
1042  loc, condValue.getType(), condValue,
1043  rewriter->create<arith::ConstantOp>(
1044  loc, rewriter->getIntegerAttr(rewriter->getI1Type(), 1)));
1045  }
1046  parityCorrectedConds.push_back(condValue);
1047  }
1048 
1049  // Merge all of the parity-corrected exit conditions and assign them
1050  // to the loop priming input.
1051  auto exitMerge = rewriter->create<MergeOp>(loc, parityCorrectedConds);
1052  loopPrimingInput.setValue(exitMerge);
1053 }
1054 
1055 LogicalResult LoopNetworkRewriter::processOuterLoop(Location loc,
1056  CFGLoop *loop) {
1057  // We determine the exit pairs of the loop; this is the in-loop nodes
1058  // which branch off to the exit nodes.
1059  llvm::SmallSet<ExitPair, 2> exitPairs;
1060  SmallVector<Block *> exitBlocks;
1061  loop->getExitBlocks(exitBlocks);
1062  for (auto *exitNode : exitBlocks) {
1063  for (auto *pred : exitNode->getPredecessors()) {
1064  // is the predecessor inside the loop?
1065  if (!loop->contains(pred))
1066  continue;
1067 
1068  ExitPair condPair = {pred, exitNode};
1069  assert(!exitPairs.count(condPair) &&
1070  "identical condition pairs should never be possible");
1071  exitPairs.insert(condPair);
1072  }
1073  }
1074  assert(!exitPairs.empty() && "No exits from loop?");
1075 
1076  // The first precondition to our loop transformation is that only a single
1077  // exit pair exists in the loop.
1078  if (exitPairs.size() > 1)
1079  return emitError(loc)
1080  << "Multiple exits detected within a loop. Loop task pipelining is "
1081  "only supported for loops with unified loop exit blocks.";
1082 
1083  Block *header = loop->getHeader();
1084  BackedgeBuilder bebuilder(*rewriter, header->front().getLoc());
1085 
1086  // Build the loop continue network. Loop continuation is triggered solely by
1087  // backedges to the header.
1088  auto loopPrimingRegisterInput = bebuilder.get(rewriter->getI1Type());
1089  auto loopPrimingRegister = buildContinueNetwork(header, loop->getLoopLatch(),
1090  loopPrimingRegisterInput);
1091 
1092  // Build the loop exit network. Loop exiting is driven solely by exit pairs
1093  // from the loop.
1094  buildExitNetwork(header, exitPairs, loopPrimingRegister,
1095  loopPrimingRegisterInput);
1096 
1097  return success();
1098 }
1099 
1100 // Return the appropriate branch result based on successor block which uses it
1101 static Value getSuccResult(Operation *termOp, Operation *newOp,
1102  Block *succBlock) {
1103  // For conditional block, check if result goes to true or to false successor
1104  if (auto condBranchOp = dyn_cast<mlir::cf::CondBranchOp>(termOp)) {
1105  if (condBranchOp.getTrueDest() == succBlock)
1106  return dyn_cast<handshake::ConditionalBranchOp>(newOp).getTrueResult();
1107  else {
1108  assert(condBranchOp.getFalseDest() == succBlock);
1109  return dyn_cast<handshake::ConditionalBranchOp>(newOp).getFalseResult();
1110  }
1111  }
1112  // If the block is unconditional, newOp has only one result
1113  return newOp->getResult(0);
1114 }
1115 
1116 LogicalResult
1117 HandshakeLowering::addBranchOps(ConversionPatternRewriter &rewriter) {
1118 
1119  BlockValues liveOuts;
1120 
1121  for (Block &block : r) {
1122  for (Operation &op : block) {
1123  for (auto result : op.getResults())
1124  if (isLiveOut(result))
1125  liveOuts[&block].push_back(result);
1126  }
1127  }
1128 
1129  for (Block &block : r) {
1130  Operation *termOp = block.getTerminator();
1131  rewriter.setInsertionPoint(termOp);
1132 
1133  for (Value val : liveOuts[&block]) {
1134  // Count the number of branches which the liveout needs
1135  int numBranches = getBranchCount(val, &block);
1136 
1137  // Instantiate branches and connect to Merges
1138  for (int i = 0, e = numBranches; i < e; ++i) {
1139  Operation *newOp = nullptr;
1140 
1141  if (auto condBranchOp = dyn_cast<mlir::cf::CondBranchOp>(termOp))
1142  newOp = rewriter.create<handshake::ConditionalBranchOp>(
1143  termOp->getLoc(), condBranchOp.getCondition(), val);
1144  else if (isa<mlir::cf::BranchOp>(termOp))
1145  newOp = rewriter.create<handshake::BranchOp>(termOp->getLoc(), val);
1146 
1147  if (newOp == nullptr)
1148  continue;
1149 
1150  for (int j = 0, e = block.getNumSuccessors(); j < e; ++j) {
1151  Block *succ = block.getSuccessor(j);
1152  Value res = getSuccResult(termOp, newOp, succ);
1153 
1154  for (auto &u : val.getUses()) {
1155  if (u.getOwner()->getBlock() == succ) {
1156  u.getOwner()->replaceUsesOfWith(val, res);
1157  break;
1158  }
1159  }
1160  }
1161  }
1162  }
1163  }
1164 
1165  return success();
1166 }
1167 
1168 LogicalResult HandshakeLowering::connectConstantsToControl(
1169  ConversionPatternRewriter &rewriter, bool sourceConstants) {
1170  // Create new constants which have a control-only input to trigger them.
1171  // These are conneted to the control network or optionally to a Source
1172  // operation (always triggering). Control-network connected constants may
1173  // help debugability, but result in a slightly larger circuit.
1174 
1175  if (sourceConstants) {
1176  for (auto constantOp : llvm::make_early_inc_range(
1177  r.template getOps<mlir::arith::ConstantOp>())) {
1178  rewriter.setInsertionPointAfter(constantOp);
1179  auto value = constantOp.getValue();
1180  rewriter.replaceOpWithNewOp<handshake::ConstantOp>(
1181  constantOp, value.getType(), value,
1182  rewriter.create<handshake::SourceOp>(constantOp.getLoc(),
1183  rewriter.getNoneType()));
1184  }
1185  } else {
1186  for (Block &block : r) {
1187  Value blockEntryCtrl = getBlockEntryControl(&block);
1188  for (auto constantOp : llvm::make_early_inc_range(
1189  block.template getOps<mlir::arith::ConstantOp>())) {
1190  rewriter.setInsertionPointAfter(constantOp);
1191  auto value = constantOp.getValue();
1192  rewriter.replaceOpWithNewOp<handshake::ConstantOp>(
1193  constantOp, value.getType(), value, blockEntryCtrl);
1194  }
1195  }
1196  }
1197  return success();
1198 }
1199 
1200 /// Holds information about an handshake "basic block terminator" control
1201 /// operation
1203  /// The operation
1204  Operation *op;
1205  /// The operation's control operand (must have type NoneType)
1207 
1208  BlockControlTerm(Operation *op, Value ctrlOperand)
1209  : op(op), ctrlOperand(ctrlOperand) {
1210  assert(op && ctrlOperand);
1211  assert(ctrlOperand.getType().isa<NoneType>() &&
1212  "Control operand must be a NoneType");
1213  }
1214 
1215  /// Checks for member-wise equality
1216  friend bool operator==(const BlockControlTerm &lhs,
1217  const BlockControlTerm &rhs) {
1218  return lhs.op == rhs.op && lhs.ctrlOperand == rhs.ctrlOperand;
1219  }
1220 };
1221 
1223  // Identify the control terminator operation and its control operand in the
1224  // given block. One such operation must exist in the block
1225  for (Operation &op : *block) {
1226  if (auto branchOp = dyn_cast<handshake::BranchOp>(op))
1227  if (branchOp.isControl())
1228  return {branchOp, branchOp.getDataOperand()};
1229  if (auto branchOp = dyn_cast<handshake::ConditionalBranchOp>(op))
1230  if (branchOp.isControl())
1231  return {branchOp, branchOp.getDataOperand()};
1232  if (auto endOp = dyn_cast<handshake::ReturnOp>(op))
1233  return {endOp, endOp.getOperands().back()};
1234  }
1235  llvm_unreachable("Block terminator must exist");
1236 }
1237 
1238 static LogicalResult getOpMemRef(Operation *op, Value &out) {
1239  out = Value();
1240  if (auto memOp = dyn_cast<memref::LoadOp>(op))
1241  out = memOp.getMemRef();
1242  else if (auto memOp = dyn_cast<memref::StoreOp>(op))
1243  out = memOp.getMemRef();
1244  else if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) {
1245  MemRefAccess access(op);
1246  out = access.memref;
1247  }
1248  if (out != Value())
1249  return success();
1250  return op->emitOpError("Unknown Op type");
1251 }
1252 
1253 static bool isMemoryOp(Operation *op) {
1254  return isa<memref::LoadOp, memref::StoreOp, AffineReadOpInterface,
1255  AffineWriteOpInterface>(op);
1256 }
1257 
1258 LogicalResult
1259 HandshakeLowering::replaceMemoryOps(ConversionPatternRewriter &rewriter,
1260  MemRefToMemoryAccessOp &memRefOps) {
1261 
1262  std::vector<Operation *> opsToErase;
1263 
1264  // Enrich the memRefOps context with BlockArguments, in case they aren't used.
1265  for (auto arg : r.getArguments()) {
1266  auto memrefType = dyn_cast<mlir::MemRefType>(arg.getType());
1267  if (!memrefType)
1268  continue;
1269  // Ensure that this is a valid memref-typed value.
1270  if (failed(isValidMemrefType(arg.getLoc(), memrefType)))
1271  return failure();
1272  memRefOps.insert(std::make_pair(arg, std::vector<Operation *>()));
1273  }
1274 
1275  // Replace load and store ops with the corresponding handshake ops
1276  // Need to traverse ops in blocks to store them in memRefOps in program
1277  // order
1278  for (Operation &op : r.getOps()) {
1279  if (!isMemoryOp(&op))
1280  continue;
1281 
1282  rewriter.setInsertionPoint(&op);
1283  Value memref;
1284  if (getOpMemRef(&op, memref).failed())
1285  return failure();
1286  Operation *newOp = nullptr;
1287 
1288  llvm::TypeSwitch<Operation *>(&op)
1289  .Case<memref::LoadOp>([&](auto loadOp) {
1290  // Get operands which correspond to address indices
1291  // This will add all operands except alloc
1292  SmallVector<Value, 8> operands(loadOp.getIndices());
1293 
1294  newOp =
1295  rewriter.create<handshake::LoadOp>(op.getLoc(), memref, operands);
1296  op.getResult(0).replaceAllUsesWith(newOp->getResult(0));
1297  })
1298  .Case<memref::StoreOp>([&](auto storeOp) {
1299  // Get operands which correspond to address indices
1300  // This will add all operands except alloc and data
1301  SmallVector<Value, 8> operands(storeOp.getIndices());
1302 
1303  // Create new op where operands are store data and address indices
1304  newOp = rewriter.create<handshake::StoreOp>(
1305  op.getLoc(), storeOp.getValueToStore(), operands);
1306  })
1307  .Case<AffineReadOpInterface, AffineWriteOpInterface>([&](auto) {
1308  // Get essential memref access inforamtion.
1309  MemRefAccess access(&op);
1310  // The address of an affine load/store operation can be a result
1311  // of an affine map, which is a linear combination of constants
1312  // and parameters. Therefore, we should extract the affine map of
1313  // each address and expand it into proper expressions that
1314  // calculate the result.
1315  AffineMap map;
1316  if (auto loadOp = dyn_cast<AffineReadOpInterface>(op))
1317  map = loadOp.getAffineMap();
1318  else
1319  map = dyn_cast<AffineWriteOpInterface>(op).getAffineMap();
1320 
1321  // The returned object from expandAffineMap is an optional list of
1322  // the expansion results from the given affine map, which are the
1323  // actual address indices that can be used as operands for
1324  // handshake LoadOp/StoreOp. The following processing requires it
1325  // to be a valid result.
1326  auto operands =
1327  expandAffineMap(rewriter, op.getLoc(), map, access.indices);
1328  assert(operands && "Address operands of affine memref access "
1329  "cannot be reduced.");
1330 
1331  if (isa<AffineReadOpInterface>(op)) {
1332  auto loadOp = rewriter.create<handshake::LoadOp>(
1333  op.getLoc(), access.memref, *operands);
1334  newOp = loadOp;
1335  op.getResult(0).replaceAllUsesWith(loadOp.getDataResult());
1336  } else {
1337  newOp = rewriter.create<handshake::StoreOp>(
1338  op.getLoc(), op.getOperand(0), *operands);
1339  }
1340  })
1341  .Default([&](auto) {
1342  op.emitOpError("Load/store operation cannot be handled.");
1343  });
1344 
1345  memRefOps[memref].push_back(newOp);
1346  opsToErase.push_back(&op);
1347  }
1348 
1349  // Erase old memory ops
1350  for (unsigned i = 0, e = opsToErase.size(); i != e; ++i) {
1351  auto *op = opsToErase[i];
1352  for (int j = 0, e = op->getNumOperands(); j < e; ++j)
1353  op->eraseOperand(0);
1354  assert(op->getNumOperands() == 0);
1355 
1356  rewriter.eraseOp(op);
1357  }
1358 
1359  return success();
1360 }
1361 
1362 static SmallVector<Value, 8> getResultsToMemory(Operation *op) {
1363  // Get load/store results which are given as inputs to MemoryOp
1364 
1365  if (handshake::LoadOp loadOp = dyn_cast<handshake::LoadOp>(op)) {
1366  // For load, get all address outputs/indices
1367  // (load also has one data output which goes to successor operation)
1368  SmallVector<Value, 8> results(loadOp.getAddressResults());
1369  return results;
1370 
1371  } else {
1372  // For store, all outputs (data and address indices) go to memory
1373  assert(dyn_cast<handshake::StoreOp>(op));
1374  handshake::StoreOp storeOp = dyn_cast<handshake::StoreOp>(op);
1375  SmallVector<Value, 8> results(storeOp.getResults());
1376  return results;
1377  }
1378 }
1379 
1380 static void addLazyForks(Region &f, ConversionPatternRewriter &rewriter) {
1381 
1382  for (Block &block : f) {
1383  Value ctrl = getBlockControlTerminator(&block).ctrlOperand;
1384  if (!ctrl.hasOneUse())
1385  insertFork(ctrl, true, rewriter);
1386  }
1387 }
1388 
1389 static void removeUnusedAllocOps(Region &r,
1390  ConversionPatternRewriter &rewriter) {
1391  std::vector<Operation *> opsToDelete;
1392 
1393  // Remove alloc operations whose result have no use
1394  for (auto &op : r.getOps())
1395  if (isAllocOp(&op) && op.getResult(0).use_empty())
1396  opsToDelete.push_back(&op);
1397 
1398  llvm::for_each(opsToDelete, [&](auto allocOp) { rewriter.eraseOp(allocOp); });
1399 }
1400 
1401 static void addJoinOps(ConversionPatternRewriter &rewriter,
1402  ArrayRef<BlockControlTerm> controlTerms) {
1403  for (auto term : controlTerms) {
1404  auto &[op, ctrl] = term;
1405  auto *srcOp = ctrl.getDefiningOp();
1406 
1407  // Insert only single join per block
1408  if (!isa<JoinOp>(srcOp)) {
1409  rewriter.setInsertionPointAfter(srcOp);
1410  Operation *newJoin = rewriter.create<JoinOp>(srcOp->getLoc(), ctrl);
1411  op->replaceUsesOfWith(ctrl, newJoin->getResult(0));
1412  }
1413  }
1414 }
1415 
1416 static std::vector<BlockControlTerm>
1417 getControlTerminators(ArrayRef<Operation *> memOps) {
1418  std::vector<BlockControlTerm> terminators;
1419 
1420  for (Operation *op : memOps) {
1421  // Get block from which the mem op originates
1422  Block *block = op->getBlock();
1423  // Identify the control terminator in the block
1424  auto term = getBlockControlTerminator(block);
1425  if (std::find(terminators.begin(), terminators.end(), term) ==
1426  terminators.end())
1427  terminators.push_back(term);
1428  }
1429  return terminators;
1430 }
1431 
1432 static void addValueToOperands(Operation *op, Value val) {
1433 
1434  SmallVector<Value, 8> results(op->getOperands());
1435  results.push_back(val);
1436  op->setOperands(results);
1437 }
1438 
1439 static void setLoadDataInputs(ArrayRef<Operation *> memOps, Operation *memOp) {
1440  // Set memory outputs as load input data
1441  int ld_count = 0;
1442  for (auto *op : memOps) {
1443  if (isa<handshake::LoadOp>(op))
1444  addValueToOperands(op, memOp->getResult(ld_count++));
1445  }
1446 }
1447 
1448 static LogicalResult setJoinControlInputs(ArrayRef<Operation *> memOps,
1449  Operation *memOp, int offset,
1450  ArrayRef<int> cntrlInd) {
1451  // Connect all memory ops to the join of that block (ensures that all mem
1452  // ops terminate before a new block starts)
1453  for (int i = 0, e = memOps.size(); i < e; ++i) {
1454  auto *op = memOps[i];
1455  Value ctrl = getBlockControlTerminator(op->getBlock()).ctrlOperand;
1456  auto *srcOp = ctrl.getDefiningOp();
1457  if (!isa<JoinOp>(srcOp)) {
1458  return srcOp->emitOpError("Op expected to be a JoinOp");
1459  }
1460  addValueToOperands(srcOp, memOp->getResult(offset + cntrlInd[i]));
1461  }
1462  return success();
1463 }
1464 
1465 void HandshakeLowering::setMemOpControlInputs(
1466  ConversionPatternRewriter &rewriter, ArrayRef<Operation *> memOps,
1467  Operation *memOp, int offset, ArrayRef<int> cntrlInd) {
1468  for (int i = 0, e = memOps.size(); i < e; ++i) {
1469  std::vector<Value> controlOperands;
1470  Operation *currOp = memOps[i];
1471  Block *currBlock = currOp->getBlock();
1472 
1473  // Set load/store control inputs from the block input control value
1474  Value blockEntryCtrl = getBlockEntryControl(currBlock);
1475  controlOperands.push_back(blockEntryCtrl);
1476 
1477  // Set load/store control inputs from predecessors in block
1478  for (int j = 0, f = i; j < f; ++j) {
1479  Operation *predOp = memOps[j];
1480  Block *predBlock = predOp->getBlock();
1481  if (currBlock == predBlock)
1482  // Any dependency but RARs
1483  if (!(isa<handshake::LoadOp>(currOp) && isa<handshake::LoadOp>(predOp)))
1484  // cntrlInd maps memOps index to correct control output index
1485  controlOperands.push_back(memOp->getResult(offset + cntrlInd[j]));
1486  }
1487 
1488  // If there is only one control input, add directly to memory op
1489  if (controlOperands.size() == 1)
1490  addValueToOperands(currOp, controlOperands[0]);
1491 
1492  // If multiple, join them and connect join output to memory op
1493  else {
1494  rewriter.setInsertionPoint(currOp);
1495  Operation *joinOp =
1496  rewriter.create<JoinOp>(currOp->getLoc(), controlOperands);
1497  addValueToOperands(currOp, joinOp->getResult(0));
1498  }
1499  }
1500 }
1501 
1502 LogicalResult
1503 HandshakeLowering::connectToMemory(ConversionPatternRewriter &rewriter,
1504  MemRefToMemoryAccessOp memRefOps, bool lsq) {
1505  // Add MemoryOps which represent the memory interface
1506  // Connect memory operations and control appropriately
1507  int mem_count = 0;
1508  for (auto memory : memRefOps) {
1509  // First operand corresponds to memref (alloca or function argument)
1510  Value memrefOperand = memory.first;
1511 
1512  // A memory is external if the memref that defines it is provided as a
1513  // function (block) argument.
1514  bool isExternalMemory = memrefOperand.isa<BlockArgument>();
1515 
1516  mlir::MemRefType memrefType =
1517  memrefOperand.getType().cast<mlir::MemRefType>();
1518  if (failed(isValidMemrefType(memrefOperand.getLoc(), memrefType)))
1519  return failure();
1520 
1521  std::vector<Value> operands;
1522 
1523  // Get control terminators whose control operand need to connect to memory
1524  std::vector<BlockControlTerm> controlTerms =
1525  getControlTerminators(memory.second);
1526 
1527  // In case of LSQ interface, set control values as inputs (used to
1528  // trigger allocation to LSQ)
1529  if (lsq)
1530  for (auto valOp : controlTerms)
1531  operands.push_back(valOp.ctrlOperand);
1532 
1533  // Add load indices and store data+indices to memory operands
1534  // Count number of loads so that we can generate appropriate number of
1535  // memory outputs (data to load ops)
1536 
1537  // memory.second is in program order
1538  // Enforce MemoryOp port ordering as follows:
1539  // Operands: all stores then all loads (stdata1, staddr1, stdata2,...,
1540  // ldaddr1, ldaddr2,....) Outputs: all load outputs, ordered the same as
1541  // load addresses (lddata1, lddata2, ...), followed by all none outputs,
1542  // ordered as operands (stnone1, stnone2,...ldnone1, ldnone2,...)
1543  std::vector<int> newInd(memory.second.size(), 0);
1544  int ind = 0;
1545  for (int i = 0, e = memory.second.size(); i < e; ++i) {
1546  auto *op = memory.second[i];
1547  if (isa<handshake::StoreOp>(op)) {
1548  SmallVector<Value, 8> results = getResultsToMemory(op);
1549  operands.insert(operands.end(), results.begin(), results.end());
1550  newInd[i] = ind++;
1551  }
1552  }
1553 
1554  int ld_count = 0;
1555 
1556  for (int i = 0, e = memory.second.size(); i < e; ++i) {
1557  auto *op = memory.second[i];
1558  if (isa<handshake::LoadOp>(op)) {
1559  SmallVector<Value, 8> results = getResultsToMemory(op);
1560  operands.insert(operands.end(), results.begin(), results.end());
1561 
1562  ld_count++;
1563  newInd[i] = ind++;
1564  }
1565  }
1566 
1567  // control-only outputs for each access (indicate access completion)
1568  int cntrl_count = lsq ? 0 : memory.second.size();
1569 
1570  Block *entryBlock = &r.front();
1571  rewriter.setInsertionPointToStart(entryBlock);
1572 
1573  // Place memory op next to the alloc op
1574  Operation *newOp = nullptr;
1575  if (isExternalMemory)
1576  newOp = rewriter.create<ExternalMemoryOp>(
1577  entryBlock->front().getLoc(), memrefOperand, operands, ld_count,
1578  cntrl_count - ld_count, mem_count++);
1579  else
1580  newOp = rewriter.create<MemoryOp>(entryBlock->front().getLoc(), operands,
1581  ld_count, cntrl_count, lsq, mem_count++,
1582  memrefOperand);
1583 
1584  setLoadDataInputs(memory.second, newOp);
1585 
1586  if (!lsq) {
1587  // Create Joins which join done signals from memory with the
1588  // control-only network
1589  addJoinOps(rewriter, controlTerms);
1590 
1591  // Connect all load/store done signals to the join of their block
1592  // Ensure that the block terminates only after all its accesses have
1593  // completed
1594  // True is default. When no sync needed, set to false (for now,
1595  // user-determined)
1596  bool control = true;
1597 
1598  if (control)
1599  returnOnError(
1600  setJoinControlInputs(memory.second, newOp, ld_count, newInd));
1601 
1602  // Set control-only inputs to each memory op
1603  // Ensure that op starts only after prior blocks have completed
1604  // Ensure that op starts only after predecessor ops (with RAW, WAR, or
1605  // WAW) have completed
1606  setMemOpControlInputs(rewriter, memory.second, newOp, ld_count, newInd);
1607  }
1608  }
1609 
1610  if (lsq)
1611  addLazyForks(r, rewriter);
1612 
1613  removeUnusedAllocOps(r, rewriter);
1614  return success();
1615 }
1616 
1617 LogicalResult
1618 HandshakeLowering::replaceCallOps(ConversionPatternRewriter &rewriter) {
1619  for (Block &block : r) {
1620  /// An instance is activated whenever control arrives at the basic block
1621  /// of the source callOp.
1622  Value blockEntryControl = getBlockEntryControl(&block);
1623  for (Operation &op : block) {
1624  if (auto callOp = dyn_cast<mlir::func::CallOp>(op)) {
1625  llvm::SmallVector<Value> operands;
1626  llvm::copy(callOp.getOperands(), std::back_inserter(operands));
1627  operands.push_back(blockEntryControl);
1628  rewriter.setInsertionPoint(callOp);
1629  auto instanceOp = rewriter.create<handshake::InstanceOp>(
1630  callOp.getLoc(), callOp.getCallee(), callOp.getResultTypes(),
1631  operands);
1632  // Replace all results of the source callOp.
1633  for (auto it : llvm::zip(callOp.getResults(), instanceOp.getResults()))
1634  std::get<0>(it).replaceAllUsesWith(std::get<1>(it));
1635  rewriter.eraseOp(callOp);
1636  }
1637  }
1638  }
1639  return success();
1640 }
1641 
1642 namespace {
1643 /// Strategy class for SSA maximization during cf-to-handshake conversion.
1644 /// Block arguments of type MemRefType and allocation operations are not
1645 /// considered for SSA maximization.
1646 class HandshakeLoweringSSAStrategy : public SSAMaximizationStrategy {
1647  /// Filters out block arguments of type MemRefType
1648  bool maximizeArgument(BlockArgument arg) override {
1649  return !arg.getType().isa<mlir::MemRefType>();
1650  }
1651 
1652  /// Filters out allocation operations
1653  bool maximizeOp(Operation *op) override { return !isAllocOp(op); }
1654 };
1655 } // namespace
1656 
1657 /// Converts every value in the region into maximal SSA form, unless the value
1658 /// is a block argument of type MemRefType or the result of an allocation
1659 /// operation.
1660 static LogicalResult maximizeSSANoMem(Region &r,
1661  ConversionPatternRewriter &rewriter) {
1662  HandshakeLoweringSSAStrategy strategy;
1663  return maximizeSSA(r, strategy, rewriter);
1664 }
1665 
1666 static LogicalResult lowerFuncOp(func::FuncOp funcOp, MLIRContext *ctx,
1667  bool sourceConstants,
1668  bool disableTaskPipelining) {
1669  // Only retain those attributes that are not constructed by build.
1670  SmallVector<NamedAttribute, 4> attributes;
1671  for (const auto &attr : funcOp->getAttrs()) {
1672  if (attr.getName() == SymbolTable::getSymbolAttrName() ||
1673  attr.getName() == funcOp.getFunctionTypeAttrName())
1674  continue;
1675  attributes.push_back(attr);
1676  }
1677 
1678  // Get function arguments
1679  llvm::SmallVector<mlir::Type, 8> argTypes;
1680  for (auto &argType : funcOp.getArgumentTypes())
1681  argTypes.push_back(argType);
1682 
1683  // Get function results
1684  llvm::SmallVector<mlir::Type, 8> resTypes;
1685  for (auto resType : funcOp.getResultTypes())
1686  resTypes.push_back(resType);
1687 
1688  handshake::FuncOp newFuncOp;
1689 
1690  // Add control input/output to function arguments/results and create a
1691  // handshake::FuncOp of appropriate type
1692  returnOnError(partiallyLowerOp<func::FuncOp>(
1693  [&](func::FuncOp funcOp, PatternRewriter &rewriter) {
1694  auto noneType = rewriter.getNoneType();
1695  resTypes.push_back(noneType);
1696  argTypes.push_back(noneType);
1697  auto func_type = rewriter.getFunctionType(argTypes, resTypes);
1698  newFuncOp = rewriter.create<handshake::FuncOp>(
1699  funcOp.getLoc(), funcOp.getName(), func_type, attributes);
1700  rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(),
1701  newFuncOp.end());
1702  if (!newFuncOp.isExternal()) {
1703  newFuncOp.getBodyBlock()->addArgument(rewriter.getNoneType(),
1704  funcOp.getLoc());
1705  newFuncOp.resolveArgAndResNames();
1706  }
1707  rewriter.eraseOp(funcOp);
1708  return success();
1709  },
1710  ctx, funcOp));
1711 
1712  // Apply SSA maximization
1713  returnOnError(
1714  partiallyLowerRegion(maximizeSSANoMem, ctx, newFuncOp.getBody()));
1715 
1716  if (!newFuncOp.isExternal()) {
1717  Block *bodyBlock = newFuncOp.getBodyBlock();
1718  Value entryCtrl = bodyBlock->getArguments().back();
1719  HandshakeLowering fol(newFuncOp.getBody());
1720  if (failed(lowerRegion<func::ReturnOp, handshake::ReturnOp>(
1721  fol, sourceConstants, disableTaskPipelining, entryCtrl)))
1722  return failure();
1723  }
1724 
1725  return success();
1726 }
1727 
1728 namespace {
1729 
1730 struct HandshakeRemoveBlockPass
1731  : HandshakeRemoveBlockBase<HandshakeRemoveBlockPass> {
1732  void runOnOperation() override { removeBasicBlocks(getOperation()); }
1733 };
1734 
1735 struct CFToHandshakePass : public CFToHandshakeBase<CFToHandshakePass> {
1736  CFToHandshakePass(bool sourceConstants, bool disableTaskPipelining) {
1737  this->sourceConstants = sourceConstants;
1738  this->disableTaskPipelining = disableTaskPipelining;
1739  }
1740  void runOnOperation() override {
1741  ModuleOp m = getOperation();
1742 
1743  for (auto funcOp : llvm::make_early_inc_range(m.getOps<func::FuncOp>())) {
1744  if (failed(lowerFuncOp(funcOp, &getContext(), sourceConstants,
1745  disableTaskPipelining))) {
1746  signalPassFailure();
1747  return;
1748  }
1749  }
1750  }
1751 };
1752 
1753 } // namespace
1754 
1755 std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
1756 circt::createCFToHandshakePass(bool sourceConstants,
1757  bool disableTaskPipelining) {
1758  return std::make_unique<CFToHandshakePass>(sourceConstants,
1759  disableTaskPipelining);
1760 }
1761 
1762 std::unique_ptr<mlir::OperationPass<handshake::FuncOp>>
1764  return std::make_unique<HandshakeRemoveBlockPass>();
1765 }
lowerAnnotationsNoRefTypePorts FirtoolPreserveValuesMode value
Definition: Firtool.cpp:95
static ConditionalBranchOp getControlCondBranch(Block *block)
#define returnOnError(logicalResult)
static LogicalResult lowerFuncOp(func::FuncOp funcOp, MLIRContext *ctx, bool sourceConstants, bool disableTaskPipelining)
static bool isMemoryOp(Operation *op)
static LogicalResult setJoinControlInputs(ArrayRef< Operation * > memOps, Operation *memOp, int offset, ArrayRef< int > cntrlInd)
static void addJoinOps(ConversionPatternRewriter &rewriter, ArrayRef< BlockControlTerm > controlTerms)
static void addLazyForks(Region &f, ConversionPatternRewriter &rewriter)
static bool isLiveOut(Value val)
static Operation * getControlMerge(Block *block)
static SmallVector< Value, 8 > getResultsToMemory(Operation *op)
static unsigned getBlockPredecessorCount(Block *block)
static int getBranchCount(Value val, Block *block)
void removeBasicBlocks(handshake::FuncOp funcOp)
static Operation * getFirstOp(Block *block)
Returns the first occurance of an operation of type TOp, else, returns null op.
static Value getSuccResult(Operation *termOp, Operation *newOp, Block *succBlock)
static Value getMergeOperand(HandshakeLowering::MergeOpInfo mergeInfo, Block *predBlock)
static LogicalResult isValidMemrefType(Location loc, mlir::MemRefType type)
static bool isAllocOp(Operation *op)
static Value getOperandFromBlock(MuxOp mux, Block *block)
static LogicalResult getOpMemRef(Operation *op, Value &out)
static LogicalResult partiallyLowerOp(const std::function< LogicalResult(TOp, ConversionPatternRewriter &)> &loweringFunc, MLIRContext *ctx, TOp op)
static void addValueToOperands(Operation *op, Value val)
static bool loopsHaveSingleExit(CFGLoopInfo &loopInfo)
static std::vector< Value > getSortedInputs(ControlMergeOp cmerge, MuxOp mux)
static void removeBlockOperands(Region &f)
static void removeUnusedAllocOps(Region &r, ConversionPatternRewriter &rewriter)
static LogicalResult maximizeSSANoMem(Region &r, ConversionPatternRewriter &rewriter)
Converts every value in the region into maximal SSA form, unless the value is a block argument of typ...
static Operation * findBranchToBlock(Block *block)
static std::vector< BlockControlTerm > getControlTerminators(ArrayRef< Operation * > memOps)
static BlockControlTerm getBlockControlTerminator(Block *block)
static void reconnectMergeOps(Region &r, HandshakeLowering::BlockOps blockMerges, HandshakeLowering::ValueMap &mergePairs)
static void setLoadDataInputs(ArrayRef< Operation * > memOps, Operation *memOp)
assert(baseType &&"element must be base type")
static void insertFork(Value result, OpBuilder &rewriter)
static void mergeBlock(Block &destination, Block::iterator insertPoint, Block &source)
Move all operations from a source block in to a destination block.
Definition: ExpandWhens.cpp:28
LowerRegionTarget(MLIRContext &context, Region &region)
Instantiate one of these and use it to build typed backedges.
Backedge get(mlir::Type resultType, mlir::LocationAttr optionalLoc={})
Create a typed backedge.
Backedge is a wrapper class around a Value.
void setValue(mlir::Value)
Strategy class to control the behavior of SSA maximization.
Definition: Passes.h:53
DenseMap< Block *, std::vector< MergeOpInfo > > BlockOps
Definition: CFToHandshake.h:61
DenseMap< Block *, std::vector< Value > > BlockValues
Definition: CFToHandshake.h:60
llvm::MapVector< Value, std::vector< Operation * > > MemRefToMemoryAccessOp
Definition: CFToHandshake.h:64
DenseMap< Value, Value > ValueMap
Definition: CFToHandshake.h:62
llvm::function_ref< LogicalResult(Region &, ConversionPatternRewriter &)> RegionLoweringFunc
Definition: CFToHandshake.h:39
LogicalResult partiallyLowerRegion(const RegionLoweringFunc &loweringFunc, MLIRContext *ctx, Region &r)
StringAttr getName(ArrayAttr names, size_t idx)
Return the name at the specified index of the ArrayAttr or null if it cannot be determined.
This file defines an intermediate representation for circuits acting as an abstraction for constraint...
Definition: DebugAnalysis.h:21
LogicalResult maximizeSSA(Value value, PatternRewriter &rewriter)
Converts a single value within a function into maximal SSA form.
Definition: MaximizeSSA.cpp:81
std::unique_ptr< mlir::OperationPass< mlir::ModuleOp > > createCFToHandshakePass(bool sourceConstants=false, bool disableTaskPipelining=false)
std::unique_ptr< mlir::OperationPass< handshake::FuncOp > > createHandshakeRemoveBlockPass()
Holds information about an handshake "basic block terminator" control operation.
friend bool operator==(const BlockControlTerm &lhs, const BlockControlTerm &rhs)
Checks for member-wise equality.
Value ctrlOperand
The operation's control operand (must have type NoneType)
BlockControlTerm(Operation *op, Value ctrlOperand)
Operation * op
The operation.
Allows to partially lower a region by matching on the parent operation to then call the provided part...
PartialLoweringFunc fun
LogicalResult matchAndRewrite(Operation *op, ArrayRef< Value >, ConversionPatternRewriter &rewriter) const override
std::function< LogicalResult(Region &, ConversionPatternRewriter &)> PartialLoweringFunc
PartialLowerRegion(LowerRegionTarget &target, MLIRContext *context, LogicalResult &loweringResRef, const PartialLoweringFunc &fun)
LogicalResult & loweringRes
LowerRegionTarget & target