CIRCT  19.0.0git
CFToHandshake.cpp
Go to the documentation of this file.
1 //===- CFToHandshake.cpp - Convert standard MLIR into dataflow IR ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //===----------------------------------------------------------------------===//
7 //
8 // This is the main Standard to Handshake Conversion Pass Implementation.
9 //
10 //===----------------------------------------------------------------------===//
11 
13 #include "../PassDetail.h"
18 #include "mlir/Analysis/CFGLoopInfo.h"
19 #include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
20 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
21 #include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
22 #include "mlir/Dialect/Affine/IR/AffineOps.h"
23 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
24 #include "mlir/Dialect/Affine/Utils.h"
25 #include "mlir/Dialect/Arith/IR/Arith.h"
26 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
27 #include "mlir/Dialect/Func/IR/FuncOps.h"
28 #include "mlir/Dialect/MemRef/IR/MemRef.h"
29 #include "mlir/Dialect/SCF/IR/SCF.h"
30 #include "mlir/IR/Builders.h"
31 #include "mlir/IR/BuiltinOps.h"
32 #include "mlir/IR/Diagnostics.h"
33 #include "mlir/IR/Dominance.h"
34 #include "mlir/IR/OpImplementation.h"
35 #include "mlir/IR/PatternMatch.h"
36 #include "mlir/IR/Types.h"
37 #include "mlir/IR/Value.h"
38 #include "mlir/Pass/Pass.h"
39 #include "mlir/Support/LLVM.h"
40 #include "mlir/Transforms/DialectConversion.h"
41 #include "mlir/Transforms/Passes.h"
42 #include "llvm/ADT/SmallSet.h"
43 #include "llvm/ADT/TypeSwitch.h"
44 #include "llvm/Support/raw_ostream.h"
45 
46 #include <list>
47 #include <map>
48 
49 using namespace mlir;
50 using namespace mlir::func;
51 using namespace mlir::affine;
52 using namespace circt;
53 using namespace circt::handshake;
54 using namespace std;
55 
56 // ============================================================================
57 // Partial lowering infrastructure
58 // ============================================================================
59 
60 namespace {
61 template <typename TOp>
62 class LowerOpTarget : public ConversionTarget {
63 public:
64  explicit LowerOpTarget(MLIRContext &context) : ConversionTarget(context) {
65  loweredOps.clear();
66  addLegalDialect<HandshakeDialect>();
67  addLegalDialect<mlir::func::FuncDialect>();
68  addLegalDialect<mlir::arith::ArithDialect>();
69  addIllegalDialect<mlir::scf::SCFDialect>();
70  addIllegalDialect<AffineDialect>();
71 
72  /// The root operation to be replaced is marked dynamically legal
73  /// based on the lowering status of the given operation, see
74  /// PartialLowerOp.
75  addDynamicallyLegalOp<TOp>([&](const auto &op) { return loweredOps[op]; });
76  }
77  DenseMap<Operation *, bool> loweredOps;
78 };
79 
80 /// Default function for partial lowering of handshake::FuncOp. Lowering is
81 /// achieved by a provided partial lowering function.
82 ///
83 /// A partial lowering function may only replace a subset of the operations
84 /// within the funcOp currently being lowered. However, the dialect conversion
85 /// scheme requires the matched root operation to be replaced/updated/erased. It
86 /// is the partial update function's responsibility to ensure this. The parital
87 /// update function may only mutate the IR through the provided
88 /// ConversionPatternRewriter, like any other ConversionPattern.
89 /// Next, the function operation is expected to go
90 /// from illegal to legalized, after matchAndRewrite returned true. To work
91 /// around this, LowerFuncOpTarget::loweredFuncs is used to communicate between
92 /// the target and the conversion, to indicate that the partial lowering was
93 /// completed.
94 template <typename TOp>
95 struct PartialLowerOp : public ConversionPattern {
96  using PartialLoweringFunc =
97  std::function<LogicalResult(TOp, ConversionPatternRewriter &)>;
98 
99 public:
100  PartialLowerOp(LowerOpTarget<TOp> &target, MLIRContext *context,
101  LogicalResult &loweringResRef, const PartialLoweringFunc &fun)
102  : ConversionPattern(TOp::getOperationName(), 1, context), target(target),
103  loweringRes(loweringResRef), fun(fun) {}
104  using ConversionPattern::ConversionPattern;
105  LogicalResult
106  matchAndRewrite(Operation *op, ArrayRef<Value> /*operands*/,
107  ConversionPatternRewriter &rewriter) const override {
108  assert(isa<TOp>(op));
109  loweringRes = fun(dyn_cast<TOp>(op), rewriter);
110  target.loweredOps[op] = true;
111  return loweringRes;
112  };
113 
114 private:
115  LowerOpTarget<TOp> &target;
116  LogicalResult &loweringRes;
117  // NOTE: this is basically the rewrite function
118  PartialLoweringFunc fun;
119 };
120 } // namespace
121 
122 // Convenience function for running lowerToHandshake with a partial
123 // handshake::FuncOp lowering function.
124 template <typename TOp>
125 static LogicalResult partiallyLowerOp(
126  const std::function<LogicalResult(TOp, ConversionPatternRewriter &)>
127  &loweringFunc,
128  MLIRContext *ctx, TOp op) {
129 
130  RewritePatternSet patterns(ctx);
131  auto target = LowerOpTarget<TOp>(*ctx);
132  LogicalResult partialLoweringSuccessfull = success();
133  patterns.add<PartialLowerOp<TOp>>(target, ctx, partialLoweringSuccessfull,
134  loweringFunc);
135  return success(
136  applyPartialConversion(op, target, std::move(patterns)).succeeded() &&
137  partialLoweringSuccessfull.succeeded());
138 }
139 
140 class LowerRegionTarget : public ConversionTarget {
141 public:
142  explicit LowerRegionTarget(MLIRContext &context, Region &region)
143  : ConversionTarget(context), region(region) {
144  // The root operation is marked dynamically legal to ensure
145  // the pattern on its region is only applied once.
146  markUnknownOpDynamicallyLegal([&](Operation *op) {
147  if (op != region.getParentOp())
148  return true;
149  return opLowered;
150  });
151  }
152  bool opLowered = false;
153  Region &region;
154 };
155 
156 /// Allows to partially lower a region by matching on the parent operation to
157 /// then call the provided partial lowering function with the region and the
158 /// rewriter.
159 ///
160 /// The interplay with the target is similar to PartialLowerOp
161 struct PartialLowerRegion : public ConversionPattern {
163  std::function<LogicalResult(Region &, ConversionPatternRewriter &)>;
164 
165 public:
166  PartialLowerRegion(LowerRegionTarget &target, MLIRContext *context,
167  LogicalResult &loweringResRef,
168  const PartialLoweringFunc &fun)
169  : ConversionPattern(target.region.getParentOp()->getName().getStringRef(),
170  1, context),
171  target(target), loweringRes(loweringResRef), fun(fun) {}
172  using ConversionPattern::ConversionPattern;
173  LogicalResult
174  matchAndRewrite(Operation *op, ArrayRef<Value> /*operands*/,
175  ConversionPatternRewriter &rewriter) const override {
176  rewriter.modifyOpInPlace(
177  op, [&] { loweringRes = fun(target.region, rewriter); });
178 
179  target.opLowered = true;
180  return loweringRes;
181  };
182 
183 private:
185  LogicalResult &loweringRes;
187 };
188 
189 LogicalResult
191  MLIRContext *ctx, Region &r) {
192 
193  Operation *op = r.getParentOp();
194  RewritePatternSet patterns(ctx);
195  auto target = LowerRegionTarget(*ctx, r);
196  LogicalResult partialLoweringSuccessfull = success();
197  patterns.add<PartialLowerRegion>(target, ctx, partialLoweringSuccessfull,
198  loweringFunc);
199  return success(
200  applyPartialConversion(op, target, std::move(patterns)).succeeded() &&
201  partialLoweringSuccessfull.succeeded());
202 }
203 
204 // ============================================================================
205 // Start of lowering passes
206 // ============================================================================
207 
208 Value HandshakeLowering::getBlockEntryControl(Block *block) const {
209  auto it = blockEntryControlMap.find(block);
210  assert(it != blockEntryControlMap.end() &&
211  "No block entry control value registerred for this block!");
212  return it->second;
213 }
214 
215 void HandshakeLowering::setBlockEntryControl(Block *block, Value v) {
216  blockEntryControlMap[block] = v;
217 }
218 
220  Block *entryBlock = &r.front();
221  auto &entryBlockOps = entryBlock->getOperations();
222 
223  // Move all operations to entry block and erase other blocks.
224  for (Block &block : llvm::make_early_inc_range(llvm::drop_begin(r, 1))) {
225  entryBlockOps.splice(entryBlockOps.end(), block.getOperations());
226 
227  block.clear();
228  block.dropAllDefinedValueUses();
229  for (size_t i = 0; i < block.getNumArguments(); i++) {
230  block.eraseArgument(i);
231  }
232  block.erase();
233  }
234 
235  // Remove any control flow operations, and move the non-control flow
236  // terminator op to the end of the entry block.
237  for (Operation &terminatorLike : llvm::make_early_inc_range(*entryBlock)) {
238  if (!terminatorLike.hasTrait<OpTrait::IsTerminator>())
239  continue;
240 
241  if (isa<mlir::cf::CondBranchOp, mlir::cf::BranchOp>(terminatorLike)) {
242  terminatorLike.erase();
243  continue;
244  }
245 
246  // Else, assume that this is a return-like terminator op.
247  terminatorLike.moveBefore(entryBlock, entryBlock->end());
248  }
249 }
250 
251 LogicalResult
252 HandshakeLowering::runSSAMaximization(ConversionPatternRewriter &rewriter,
253  Value entryCtrl) {
254  return maximizeSSA(entryCtrl, rewriter);
255 }
256 
257 void removeBasicBlocks(handshake::FuncOp funcOp) {
258  if (funcOp.isExternal())
259  return; // nothing to do, external funcOp.
260 
261  removeBasicBlocks(funcOp.getBody());
262 }
263 
264 static LogicalResult isValidMemrefType(Location loc, mlir::MemRefType type) {
265  if (type.getNumDynamicDims() != 0 || type.getShape().size() != 1)
266  return emitError(loc) << "memref's must be both statically sized and "
267  "unidimensional.";
268  return success();
269 }
270 
271 static unsigned getBlockPredecessorCount(Block *block) {
272  // Returns number of block predecessors
273  auto predecessors = block->getPredecessors();
274  return std::distance(predecessors.begin(), predecessors.end());
275 }
276 
277 // Insert appropriate type of Merge CMerge for control-only path,
278 // Merge for single-successor blocks, Mux otherwise
280 HandshakeLowering::insertMerge(Block *block, Value val,
281  BackedgeBuilder &edgeBuilder,
282  ConversionPatternRewriter &rewriter) {
283  unsigned numPredecessors = getBlockPredecessorCount(block);
284  auto insertLoc = block->front().getLoc();
285  SmallVector<Backedge> dataEdges;
286  SmallVector<Value> operands;
287 
288  // Every block (except the entry block) needs to feed it's entry control into
289  // a control merge
290  if (val == getBlockEntryControl(block)) {
291 
292  Operation *mergeOp;
293  if (block == &r.front()) {
294  // For consistency within the entry block, replace the latter's entry
295  // control with the output of a MergeOp that takes the control-only
296  // network's start point as input. This makes it so that only the
297  // MergeOp's output is used as a control within the entry block, instead
298  // of a combination of the MergeOp's output and the function/block control
299  // argument. Taking this step out should have no impact on functionality
300  // but would make the resulting IR less "regular"
301  operands.push_back(val);
302  mergeOp = rewriter.create<handshake::MergeOp>(insertLoc, operands);
303  } else {
304  for (unsigned i = 0; i < numPredecessors; i++) {
305  auto edge = edgeBuilder.get(rewriter.getNoneType());
306  dataEdges.push_back(edge);
307  operands.push_back(Value(edge));
308  }
309  mergeOp = rewriter.create<handshake::ControlMergeOp>(insertLoc, operands);
310  }
311  setBlockEntryControl(block, mergeOp->getResult(0));
312  return MergeOpInfo{mergeOp, val, dataEdges};
313  }
314 
315  // Every live-in value to a block is passed through a merge-like operation,
316  // even when it's not required for circuit correctness (useless merge-like
317  // operations are removed down the line during handshake canonicalization)
318 
319  // Insert "dummy" MergeOp's for blocks with less than two predecessors
320  if (numPredecessors <= 1) {
321  if (numPredecessors == 0) {
322  // All of the entry block's block arguments get passed through a dummy
323  // MergeOp. There is no need for a backedge here as the unique operand can
324  // be resolved immediately
325  operands.push_back(val);
326  } else {
327  // The value incoming from the single block predecessor will be resolved
328  // later during merge reconnection
329  auto edge = edgeBuilder.get(val.getType());
330  dataEdges.push_back(edge);
331  operands.push_back(Value(edge));
332  }
333  auto merge = rewriter.create<handshake::MergeOp>(insertLoc, operands);
334  return MergeOpInfo{merge, val, dataEdges};
335  }
336 
337  // Create a backedge for the index operand, and another one for each data
338  // operand. The index operand will eventually resolve to the current block's
339  // control merge index output, while data operands will resolve to their
340  // respective values from each block predecessor
341  Backedge indexEdge = edgeBuilder.get(rewriter.getIndexType());
342  for (unsigned i = 0; i < numPredecessors; i++) {
343  auto edge = edgeBuilder.get(val.getType());
344  dataEdges.push_back(edge);
345  operands.push_back(Value(edge));
346  }
347  auto mux =
348  rewriter.create<handshake::MuxOp>(insertLoc, Value(indexEdge), operands);
349  return MergeOpInfo{mux, val, dataEdges, indexEdge};
350 }
351 
353 HandshakeLowering::insertMergeOps(HandshakeLowering::ValueMap &mergePairs,
354  BackedgeBuilder &edgeBuilder,
355  ConversionPatternRewriter &rewriter) {
356  HandshakeLowering::BlockOps blockMerges;
357  for (Block &block : r) {
358  rewriter.setInsertionPointToStart(&block);
359 
360  // All of the block's live-ins are passed explictly through block arguments
361  // thanks to prior SSA maximization
362  for (auto &arg : block.getArguments()) {
363  // No merges on memref block arguments; these are handled separately
364  if (isa<mlir::MemRefType>(arg.getType()))
365  continue;
366 
367  auto mergeInfo = insertMerge(&block, arg, edgeBuilder, rewriter);
368  blockMerges[&block].push_back(mergeInfo);
369  mergePairs[arg] = mergeInfo.op->getResult(0);
370  }
371  }
372  return blockMerges;
373 }
374 
375 // Get value from predBlock which will be set as operand of op (merge)
377  Block *predBlock) {
378  // The input value to the merge operations
379  Value srcVal = mergeInfo.val;
380  // The block the merge operation belongs to
381  Block *block = mergeInfo.op->getBlock();
382 
383  // The block terminator is either a cf-level branch or cf-level conditional
384  // branch. In either case, identify the value passed to the block using its
385  // index in the list of block arguments
386  unsigned index = cast<BlockArgument>(srcVal).getArgNumber();
387  Operation *termOp = predBlock->getTerminator();
388  if (mlir::cf::CondBranchOp br = dyn_cast<mlir::cf::CondBranchOp>(termOp)) {
389  // Block should be one of the two destinations of the conditional branch
390  if (block == br.getTrueDest())
391  return br.getTrueOperand(index);
392  assert(block == br.getFalseDest());
393  return br.getFalseOperand(index);
394  }
395  if (isa<mlir::cf::BranchOp>(termOp))
396  return termOp->getOperand(index);
397  return nullptr;
398 }
399 
400 static void removeBlockOperands(Region &f) {
401  // Remove all block arguments, they are no longer used
402  // eraseArguments also removes corresponding branch operands
403  for (Block &block : f) {
404  if (!block.isEntryBlock()) {
405  int x = block.getNumArguments() - 1;
406  for (int i = x; i >= 0; --i)
407  block.eraseArgument(i);
408  }
409  }
410 }
411 
412 /// Returns the first occurance of an operation of type TOp, else, returns
413 /// null op.
414 template <typename TOp>
415 static Operation *getFirstOp(Block *block) {
416  auto ops = block->getOps<TOp>();
417  if (ops.empty())
418  return nullptr;
419  return *ops.begin();
420 }
421 
422 static Operation *getControlMerge(Block *block) {
423  return getFirstOp<ControlMergeOp>(block);
424 }
425 
426 static ConditionalBranchOp getControlCondBranch(Block *block) {
427  for (auto cbranch : block->getOps<handshake::ConditionalBranchOp>()) {
428  if (cbranch.isControl())
429  return cbranch;
430  }
431  return nullptr;
432 }
433 
434 static void reconnectMergeOps(Region &r,
435  HandshakeLowering::BlockOps blockMerges,
436  HandshakeLowering::ValueMap &mergePairs) {
437  // At this point all merge-like operations have backedges as operands.
438  // We here replace all backedge values with appropriate value from
439  // predecessor block. The predecessor can either be a merge, the original
440  // defining value, or a branch operand.
441 
442  for (Block &block : r) {
443  for (auto &mergeInfo : blockMerges[&block]) {
444  int operandIdx = 0;
445  // Set appropriate operand from each predecessor block
446  for (auto *predBlock : block.getPredecessors()) {
447  Value mgOperand = getMergeOperand(mergeInfo, predBlock);
448  assert(mgOperand != nullptr);
449  if (!mgOperand.getDefiningOp()) {
450  assert(mergePairs.count(mgOperand));
451  mgOperand = mergePairs[mgOperand];
452  }
453  mergeInfo.dataEdges[operandIdx].setValue(mgOperand);
454  operandIdx++;
455  }
456 
457  // Reconnect all operands originating from livein defining value through
458  // corresponding merge of that block
459  for (Operation &opp : block)
460  if (!isa<MergeLikeOpInterface>(opp))
461  opp.replaceUsesOfWith(mergeInfo.val, mergeInfo.op->getResult(0));
462  }
463  }
464 
465  // Connect select operand of muxes to control merge's index result in all
466  // blocks with more than one predecessor
467  for (Block &block : r) {
468  if (getBlockPredecessorCount(&block) > 1) {
469  Operation *cntrlMg = getControlMerge(&block);
470  assert(cntrlMg != nullptr);
471 
472  for (auto &mergeInfo : blockMerges[&block]) {
473  if (mergeInfo.op != cntrlMg) {
474  // If the block has multiple predecessors, merge-like operation that
475  // are not the block's control merge must have an index operand (at
476  // this point, an index backedge)
477  assert(mergeInfo.indexEdge.has_value());
478  (*mergeInfo.indexEdge).setValue(cntrlMg->getResult(1));
479  }
480  }
481  }
482  }
483 
485 }
486 
487 static bool isAllocOp(Operation *op) {
488  return isa<memref::AllocOp, memref::AllocaOp>(op);
489 }
490 
491 LogicalResult
492 HandshakeLowering::addMergeOps(ConversionPatternRewriter &rewriter) {
493 
494  // Stores mapping from each value that pass through a merge operation to the
495  // first result of that merge operation
496  ValueMap mergePairs;
497 
498  // Create backedge builder to manage operands of merge operations between
499  // insertion and reconnection
500  BackedgeBuilder edgeBuilder{rewriter, r.front().front().getLoc()};
501 
502  // Insert merge operations (with backedges instead of actual operands)
503  BlockOps mergeOps = insertMergeOps(mergePairs, edgeBuilder, rewriter);
504 
505  // Reconnect merge operations with values incoming from predecessor blocks
506  // and resolve all backedges that were created during merge insertion
507  reconnectMergeOps(r, mergeOps, mergePairs);
508  return success();
509 }
510 
511 static bool isLiveOut(Value val) {
512  // Identifies liveout values after adding Merges
513  for (auto &u : val.getUses())
514  // Result is liveout if used by some Merge block
515  if (isa<MergeLikeOpInterface>(u.getOwner()))
516  return true;
517  return false;
518 }
519 
520 // A value can have multiple branches in a single successor block
521 // (for instance, there can be an SSA phi and a merge that we insert)
522 // This function determines the number of branches to insert based on the
523 // value uses in successor blocks
524 static int getBranchCount(Value val, Block *block) {
525  int uses = 0;
526  for (int i = 0, e = block->getNumSuccessors(); i < e; ++i) {
527  int curr = 0;
528  Block *succ = block->getSuccessor(i);
529  for (auto &u : val.getUses()) {
530  if (u.getOwner()->getBlock() == succ)
531  curr++;
532  }
533  uses = (curr > uses) ? curr : uses;
534  }
535  return uses;
536 }
537 
538 namespace {
539 
540 /// This class inserts a reorder prevention mechanism for blocks with multiple
541 /// successors. Such a mechanism is required to guarantee correct execution in a
542 /// multi-threaded usage of the circuits.
543 ///
544 /// The order of the results matches the order of the traversals of the
545 /// divergence point. A FIFO buffer stores the condition of the conditional
546 /// branch. The buffer feeds a mux that guarantees the correct out-order.
547 class FeedForwardNetworkRewriter {
548 public:
549  FeedForwardNetworkRewriter(HandshakeLowering &hl,
550  ConversionPatternRewriter &rewriter)
551  : hl(hl), rewriter(rewriter), postDomInfo(hl.getRegion().getParentOp()),
552  domInfo(hl.getRegion().getParentOp()),
553  loopInfo(domInfo.getDomTree(&hl.getRegion())) {}
554  LogicalResult apply();
555 
556 private:
557  HandshakeLowering &hl;
558  ConversionPatternRewriter &rewriter;
559  PostDominanceInfo postDomInfo;
560  DominanceInfo domInfo;
561  CFGLoopInfo loopInfo;
562 
563  using BlockPair = std::pair<Block *, Block *>;
564  using BlockPairs = SmallVector<BlockPair>;
565  LogicalResult findBlockPairs(BlockPairs &blockPairs);
566 
567  BufferOp buildSplitNetwork(Block *splitBlock,
568  handshake::ConditionalBranchOp &ctrlBr);
569  LogicalResult buildMergeNetwork(Block *mergeBlock, BufferOp buf,
570  handshake::ConditionalBranchOp &ctrlBr);
571 
572  // Determines if the cmerge inpus match the cond_br output order.
573  bool requiresOperandFlip(ControlMergeOp &ctrlMerge,
574  handshake::ConditionalBranchOp &ctrlBr);
575  bool formsIrreducibleCF(Block *splitBlock, Block *mergeBlock);
576 };
577 } // namespace
578 
579 LogicalResult
580 HandshakeLowering::feedForwardRewriting(ConversionPatternRewriter &rewriter) {
581  // Nothing to do on a single block region.
582  if (this->getRegion().hasOneBlock())
583  return success();
584  return FeedForwardNetworkRewriter(*this, rewriter).apply();
585 }
586 
587 [[maybe_unused]] static bool loopsHaveSingleExit(CFGLoopInfo &loopInfo) {
588  for (CFGLoop *loop : loopInfo.getTopLevelLoops())
589  if (!loop->getExitBlock())
590  return false;
591  return true;
592 }
593 
594 bool FeedForwardNetworkRewriter::formsIrreducibleCF(Block *splitBlock,
595  Block *mergeBlock) {
596  CFGLoop *loop = loopInfo.getLoopFor(mergeBlock);
597  for (auto *mergePred : mergeBlock->getPredecessors()) {
598  // Skip loop predecessors
599  if (loop && loop->contains(mergePred))
600  continue;
601 
602  // A DAG-CFG is irreducible, iff a merge block has a predecessor that can be
603  // reached from both successors of a split node, e.g., neither is a
604  // dominator.
605  // => Their control flow can merge in other places, which makes this
606  // irreducible.
607  if (llvm::none_of(splitBlock->getSuccessors(), [&](Block *splitSucc) {
608  if (splitSucc == mergeBlock || mergePred == splitBlock)
609  return true;
610  return domInfo.dominates(splitSucc, mergePred);
611  }))
612  return true;
613  }
614  return false;
615 }
616 
617 static Operation *findBranchToBlock(Block *block) {
618  Block *pred = *block->getPredecessors().begin();
619  return pred->getTerminator();
620 }
621 
622 LogicalResult
623 FeedForwardNetworkRewriter::findBlockPairs(BlockPairs &blockPairs) {
624  // assumes that merge block insertion happended beforehand
625  // Thus, for each split block, there exists one merge block which is the post
626  // dominator of the child nodes.
627  Region &r = hl.getRegion();
628  Operation *parentOp = r.getParentOp();
629 
630  // Assumes that each loop has only one exit block. Such an error should
631  // already be reported by the loop rewriting.
632  assert(loopsHaveSingleExit(loopInfo) &&
633  "expected loop to only have one exit block.");
634 
635  for (Block &b : r) {
636  if (b.getNumSuccessors() < 2)
637  continue;
638 
639  // Loop headers cannot be merge blocks.
640  if (loopInfo.getLoopFor(&b))
641  continue;
642 
643  assert(b.getNumSuccessors() == 2);
644  Block *succ0 = b.getSuccessor(0);
645  Block *succ1 = b.getSuccessor(1);
646 
647  if (succ0 == succ1)
648  continue;
649 
650  Block *mergeBlock = postDomInfo.findNearestCommonDominator(succ0, succ1);
651 
652  // Precondition checks
653  if (formsIrreducibleCF(&b, mergeBlock)) {
654  return parentOp->emitError("expected only reducible control flow.")
655  .attachNote(findBranchToBlock(mergeBlock)->getLoc())
656  << "This branch is involved in the irreducible control flow";
657  }
658 
659  unsigned nonLoopPreds = 0;
660  CFGLoop *loop = loopInfo.getLoopFor(mergeBlock);
661  for (auto *pred : mergeBlock->getPredecessors()) {
662  if (loop && loop->contains(pred))
663  continue;
664  nonLoopPreds++;
665  }
666  if (nonLoopPreds > 2)
667  return parentOp
668  ->emitError("expected a merge block to have two predecessors. "
669  "Did you run the merge block insertion pass?")
670  .attachNote(findBranchToBlock(mergeBlock)->getLoc())
671  << "This branch jumps to the illegal block";
672 
673  blockPairs.emplace_back(&b, mergeBlock);
674  }
675 
676  return success();
677 }
678 
679 LogicalResult FeedForwardNetworkRewriter::apply() {
680  BlockPairs pairs;
681 
682  if (failed(findBlockPairs(pairs)))
683  return failure();
684 
685  for (auto [splitBlock, mergeBlock] : pairs) {
686  handshake::ConditionalBranchOp ctrlBr;
687  BufferOp buffer = buildSplitNetwork(splitBlock, ctrlBr);
688  if (failed(buildMergeNetwork(mergeBlock, buffer, ctrlBr)))
689  return failure();
690  }
691 
692  return success();
693 }
694 
695 BufferOp FeedForwardNetworkRewriter::buildSplitNetwork(
696  Block *splitBlock, handshake::ConditionalBranchOp &ctrlBr) {
697  SmallVector<handshake::ConditionalBranchOp> branches;
698  llvm::copy(splitBlock->getOps<handshake::ConditionalBranchOp>(),
699  std::back_inserter(branches));
700 
701  auto *findRes = llvm::find_if(branches, [](auto br) {
702  return llvm::isa<NoneType>(br.getDataOperand().getType());
703  });
704 
705  assert(findRes && "expected one branch for the ctrl signal");
706  ctrlBr = *findRes;
707 
708  Value cond = ctrlBr.getConditionOperand();
709  assert(llvm::all_of(branches, [&](auto branch) {
710  return branch.getConditionOperand() == cond;
711  }));
712 
713  Location loc = cond.getLoc();
714  rewriter.setInsertionPointAfterValue(cond);
715 
716  // The buffer size defines the number of threads that can be concurently
717  // traversing the sub-CFG starting at the splitBlock.
718  size_t bufferSize = 2;
719  // TODO how to size these?
720  // Longest path in a CFG-DAG would be O(#blocks)
721 
722  return rewriter.create<handshake::BufferOp>(loc, cond, bufferSize,
723  BufferTypeEnum::fifo);
724 }
725 
726 LogicalResult FeedForwardNetworkRewriter::buildMergeNetwork(
727  Block *mergeBlock, BufferOp buf, handshake::ConditionalBranchOp &ctrlBr) {
728  // Replace control merge with mux
729  auto ctrlMerges = mergeBlock->getOps<handshake::ControlMergeOp>();
730  assert(std::distance(ctrlMerges.begin(), ctrlMerges.end()) == 1);
731 
732  handshake::ControlMergeOp ctrlMerge = *ctrlMerges.begin();
733  // This input might contain irreducible loops that we cannot handle.
734  if (ctrlMerge.getNumOperands() != 2)
735  return ctrlMerge.emitError("expected cmerges to have two operands");
736  rewriter.setInsertionPointAfter(ctrlMerge);
737  Location loc = ctrlMerge->getLoc();
738 
739  // The newly inserted mux has to select the results from the correct operand.
740  // As there is no guarantee on the order of cmerge inputs, the correct order
741  // has to be determined first.
742  bool requiresFlip = requiresOperandFlip(ctrlMerge, ctrlBr);
743  SmallVector<Value> muxOperands;
744  if (requiresFlip)
745  muxOperands = llvm::to_vector(llvm::reverse(ctrlMerge.getOperands()));
746  else
747  muxOperands = llvm::to_vector(ctrlMerge.getOperands());
748 
749  Value newCtrl = rewriter.create<handshake::MuxOp>(loc, buf, muxOperands);
750 
751  Value cond = buf.getResult();
752  if (requiresFlip) {
753  // As the mux operand order is the flipped cmerge input order, the index
754  // which replaces the output of the cmerge has to be flipped/negated as
755  // well.
756  cond = rewriter.create<arith::XOrIOp>(
757  loc, cond.getType(), cond,
758  rewriter.create<arith::ConstantOp>(
759  loc, rewriter.getIntegerAttr(rewriter.getI1Type(), 1)));
760  }
761 
762  // Require a cast to index to stick to the type of the mux input.
763  Value condAsIndex =
764  rewriter.create<arith::IndexCastOp>(loc, rewriter.getIndexType(), cond);
765 
766  hl.setBlockEntryControl(mergeBlock, newCtrl);
767 
768  // Replace with new ctrl value from mux and the index
769  rewriter.replaceOp(ctrlMerge, {newCtrl, condAsIndex});
770  return success();
771 }
772 
773 bool FeedForwardNetworkRewriter::requiresOperandFlip(
774  ControlMergeOp &ctrlMerge, handshake::ConditionalBranchOp &ctrlBr) {
775  assert(ctrlMerge.getNumOperands() == 2 &&
776  "Loops should already have been handled");
777 
778  Value fstOperand = ctrlMerge.getOperand(0);
779 
780  assert(ctrlBr.getTrueResult().hasOneUse() &&
781  "expected the result of a branch to only have one user");
782  Operation *trueUser = *ctrlBr.getTrueResult().user_begin();
783  if (trueUser == ctrlBr)
784  // The cmerge directly consumes the cond_br output.
785  return ctrlBr.getTrueResult() == fstOperand;
786 
787  // The cmerge is consumed in an intermediate block. Find out if this block is
788  // a predecessor of the "true" successor of the cmerge.
789  Block *trueBlock = trueUser->getBlock();
790  return domInfo.dominates(trueBlock, fstOperand.getDefiningOp()->getBlock());
791 }
792 
793 namespace {
794 // This function creates the loop 'continue' and 'exit' network around backedges
795 // in the CFG.
796 // We don't have a standard dialect based LoopInfo utility in MLIR
797 // (which could probably deduce most of the information that we need for this
798 // transformation), so we roll our own loop-detection analysis. This is
799 // simplified by the need to only detect outermost loops. Inner loops are
800 // not included in the loop network (since we only care about restricting
801 // different function invocations from activating a loop, not prevent loop
802 // pipelining within a single function invocation).
803 class LoopNetworkRewriter {
804 public:
805  LoopNetworkRewriter(HandshakeLowering &hl) : hl(hl) {}
806 
807  LogicalResult processRegion(Region &r, ConversionPatternRewriter &rewriter);
808 
809 private:
810  // An exit pair is a pair of <in loop block, outside loop block> that
811  // indicates where control leaves a loop.
812  using ExitPair = std::pair<Block *, Block *>;
813  LogicalResult processOuterLoop(Location loc, CFGLoop *loop);
814 
815  // Builds the loop continue network in between the loop header and its loop
816  // latch. The loop continuation network will replace the existing control
817  // merge in the loop header with a mux + loop priming register.
818  // The 'loopPrimingInput' is a backedge that will later be assigned by
819  // 'buildExitNetwork'. The value is to be used as the input to the loop
820  // priming buffer.
821  // Returns a reference to the loop priming register.
822  BufferOp buildContinueNetwork(Block *loopHeader, Block *loopLatch,
823  Backedge &loopPrimingInput);
824 
825  // Builds the loop exit network. This detects the conditional operands used in
826  // each of the exit blocks, matches their parity with the convention used to
827  // prime the loop register, and assigns it to the loop priming register input.
828  void buildExitNetwork(Block *loopHeader,
829  const llvm::SmallSet<ExitPair, 2> &exitPairs,
830  BufferOp loopPrimingRegister,
831  Backedge &loopPrimingInput);
832 
833 private:
834  ConversionPatternRewriter *rewriter = nullptr;
835  HandshakeLowering &hl;
836 };
837 } // namespace
838 
839 LogicalResult
840 HandshakeLowering::loopNetworkRewriting(ConversionPatternRewriter &rewriter) {
841  return LoopNetworkRewriter(*this).processRegion(r, rewriter);
842 }
843 
844 LogicalResult
845 LoopNetworkRewriter::processRegion(Region &r,
846  ConversionPatternRewriter &rewriter) {
847  // Nothing to do on a single block region.
848  if (r.hasOneBlock())
849  return success();
850  this->rewriter = &rewriter;
851 
852  Operation *op = r.getParentOp();
853 
854  DominanceInfo domInfo(op);
855  CFGLoopInfo loopInfo(domInfo.getDomTree(&r));
856 
857  for (CFGLoop *loop : loopInfo.getTopLevelLoops()) {
858  if (!loop->getLoopLatch())
859  return emitError(op->getLoc()) << "Multiple loop latches detected "
860  "(backedges from within the loop "
861  "to the loop header). Loop task "
862  "pipelining is only supported for "
863  "loops with unified loop latches.";
864 
865  // This is the start of an outer loop - go process!
866  if (failed(processOuterLoop(op->getLoc(), loop)))
867  return failure();
868  }
869 
870  return success();
871 }
872 
873 // Returns the operand of the 'mux' operation which originated from 'block'.
874 static Value getOperandFromBlock(MuxOp mux, Block *block) {
875  auto inValueIt = llvm::find_if(mux.getDataOperands(), [&](Value operand) {
876  return block == operand.getParentBlock();
877  });
878  assert(
879  inValueIt != mux.getOperands().end() &&
880  "Expected mux to have an operand originating from the requested block.");
881  return *inValueIt;
882 }
883 
884 // Returns a list of operands from 'mux' which corresponds to the inputs of the
885 // 'cmerge' operation. The results are sorted such that the i'th cmerge operand
886 // and the i'th sorted operand originate from the same block.
887 static std::vector<Value> getSortedInputs(ControlMergeOp cmerge, MuxOp mux) {
888  std::vector<Value> sortedOperands;
889  for (auto in : cmerge.getOperands()) {
890  auto *srcBlock = in.getParentBlock();
891  sortedOperands.push_back(getOperandFromBlock(mux, srcBlock));
892  }
893 
894  // Sanity check: ensure that operands are unique
895  for (unsigned i = 0; i < sortedOperands.size(); ++i) {
896  for (unsigned j = 0; j < sortedOperands.size(); ++j) {
897  if (i == j)
898  continue;
899  assert(sortedOperands[i] != sortedOperands[j] &&
900  "Cannot have an identical operand from two different blocks!");
901  }
902  }
903 
904  return sortedOperands;
905 }
906 
907 BufferOp LoopNetworkRewriter::buildContinueNetwork(Block *loopHeader,
908  Block *loopLatch,
909  Backedge &loopPrimingInput) {
910  // Gather the muxes to replace before modifying block internals; it's been
911  // found that if this is not done, we have determinism issues wrt. generating
912  // the same order of replaced muxes on repeated runs of an identical
913  // conversion.
914  llvm::SmallVector<MuxOp> muxesToReplace;
915  llvm::copy(loopHeader->getOps<MuxOp>(), std::back_inserter(muxesToReplace));
916 
917  // Fetch the control merge of the block; it is assumed that, at this point of
918  // lowering, no other form of control can be used for the loop header block
919  // than a control merge.
920  auto *cmerge = getControlMerge(loopHeader);
921  assert(hl.getBlockEntryControl(loopHeader) == cmerge->getResult(0) &&
922  "Expected control merge to be the control component of a loop header");
923  auto loc = cmerge->getLoc();
924 
925  // sanity check: cmerge should have >1 input to actually be a loop
926  assert(cmerge->getNumOperands() > 1 && "This cannot be a loop header");
927 
928  // Partition the control merge inputs into those originating from backedges,
929  // and those originating elsewhere.
930  SmallVector<Value> externalCtrls, loopCtrls;
931  for (auto cval : cmerge->getOperands()) {
932  if (cval.getParentBlock() == loopLatch)
933  loopCtrls.push_back(cval);
934  else
935  externalCtrls.push_back(cval);
936  }
937  assert(loopCtrls.size() == 1 &&
938  "Expected a single loop control value to match the single loop latch");
939  Value loopCtrl = loopCtrls.front();
940 
941  // Merge all of the controls in each partition
942  rewriter->setInsertionPointToStart(loopHeader);
943  auto externalCtrlMerge = rewriter->create<ControlMergeOp>(loc, externalCtrls);
944 
945  // Create loop mux and the loop priming register. The loop mux will on select
946  // "0" select external control, and internal control at "1". This convention
947  // must be followed by the loop exit network.
948  auto primingRegister =
949  rewriter->create<BufferOp>(loc, loopPrimingInput, 1, BufferTypeEnum::seq);
950  // Initialize the priming register to path 0.
951  primingRegister->setAttr("initValues", rewriter->getI64ArrayAttr({0}));
952 
953  // The loop control mux will deterministically select between control entering
954  // the loop from any external block or the single loop backedge.
955  auto loopCtrlMux = rewriter->create<MuxOp>(
956  loc, primingRegister.getResult(),
957  llvm::SmallVector<Value>{externalCtrlMerge.getResult(), loopCtrl});
958 
959  // Replace the existing control merge 'result' output with the loop control
960  // mux.
961  cmerge->getResult(0).replaceAllUsesWith(loopCtrlMux.getResult());
962 
963  // Register the new block entry control value
964  hl.setBlockEntryControl(loopHeader, loopCtrlMux.getResult());
965 
966  // Next, we need to consider how to replace the control merge 'index' output,
967  // used to drive input selection to the block.
968 
969  // Inputs to the loop header will be sourced from muxes with inputs from both
970  // the loop latch as well as external blocks. Iterate through these and sort
971  // based on the input ordering to the external/internal control merge.
972  // We do this by maintaining a mapping between the external and loop data
973  // inputs for each data mux in the design. The key of these maps is the
974  // original mux (that is to be replaced).
975  DenseMap<MuxOp, std::vector<Value>> externalDataInputs;
976  DenseMap<MuxOp, Value> loopDataInputs;
977  for (auto muxOp : muxesToReplace) {
978  if (muxOp == loopCtrlMux)
979  continue;
980 
981  externalDataInputs[muxOp] = getSortedInputs(externalCtrlMerge, muxOp);
982  loopDataInputs[muxOp] = getOperandFromBlock(muxOp, loopLatch);
983  assert(/*loop latch input*/ 1 + externalDataInputs[muxOp].size() ==
984  muxOp.getDataOperands().size() &&
985  "Expected all mux operands to be partitioned between loop and "
986  "external data inputs");
987  }
988 
989  // With this, we now replace each of the data input muxes in the loop header.
990  // We instantiate a single mux driven by the external control merge.
991  // This, as well as the corresponding data input coming from within the single
992  // loop latch, will then be selected between by a 3rd mux, based on the
993  // priming register.
994  for (MuxOp mux : muxesToReplace) {
995  auto externalDataMux = rewriter->create<MuxOp>(
996  loc, externalCtrlMerge.getIndex(), externalDataInputs[mux]);
997 
998  rewriter->replaceOp(
999  mux, rewriter
1000  ->create<MuxOp>(loc, primingRegister,
1001  llvm::SmallVector<Value>{externalDataMux,
1002  loopDataInputs[mux]})
1003  .getResult());
1004  }
1005 
1006  // Now all values defined by the original cmerge should have been replaced,
1007  // and it may be erased.
1008  rewriter->eraseOp(cmerge);
1009 
1010  // Return the priming register to be referenced by the exit network builder.
1011  return primingRegister;
1012 }
1013 
1014 void LoopNetworkRewriter::buildExitNetwork(
1015  Block *loopHeader, const llvm::SmallSet<ExitPair, 2> &exitPairs,
1016  BufferOp loopPrimingRegister, Backedge &loopPrimingInput) {
1017  auto loc = loopPrimingRegister.getLoc();
1018 
1019  // Iterate over the exit pairs to gather up the condition signals that need to
1020  // be connected to the exit network. In doing so, we parity-correct these
1021  // condition values based on the convention used in buildContinueNetwork - The
1022  // loop mux will on select "0" select external control, and internal control
1023  // at "1". This convention which must be followed by the loop exit network.
1024  // External control must be selected when exiting the loop (to reprime the
1025  // register).
1026  SmallVector<Value> parityCorrectedConds;
1027  for (auto &[condBlock, exitBlock] : exitPairs) {
1028  auto condBr = getControlCondBranch(condBlock);
1029  assert(
1030  condBr &&
1031  "Expected a conditional control branch op in the loop condition block");
1032  Operation *trueUser = *condBr.getTrueResult().getUsers().begin();
1033  bool isTrueParity = trueUser->getBlock() == exitBlock;
1034  assert(isTrueParity ^
1035  ((*condBr.getFalseResult().getUsers().begin())->getBlock() ==
1036  exitBlock) &&
1037  "The user of either the true or the false result should be in the "
1038  "exit block");
1039 
1040  Value condValue = condBr.getConditionOperand();
1041  if (isTrueParity) {
1042  // This goes against the convention, and we have to invert the condition
1043  // value before connecting it to the exit network.
1044  rewriter->setInsertionPoint(condBr);
1045  condValue = rewriter->create<arith::XOrIOp>(
1046  loc, condValue.getType(), condValue,
1047  rewriter->create<arith::ConstantOp>(
1048  loc, rewriter->getIntegerAttr(rewriter->getI1Type(), 1)));
1049  }
1050  parityCorrectedConds.push_back(condValue);
1051  }
1052 
1053  // Merge all of the parity-corrected exit conditions and assign them
1054  // to the loop priming input.
1055  auto exitMerge = rewriter->create<MergeOp>(loc, parityCorrectedConds);
1056  loopPrimingInput.setValue(exitMerge);
1057 }
1058 
1059 LogicalResult LoopNetworkRewriter::processOuterLoop(Location loc,
1060  CFGLoop *loop) {
1061  // We determine the exit pairs of the loop; this is the in-loop nodes
1062  // which branch off to the exit nodes.
1063  llvm::SmallSet<ExitPair, 2> exitPairs;
1064  SmallVector<Block *> exitBlocks;
1065  loop->getExitBlocks(exitBlocks);
1066  for (auto *exitNode : exitBlocks) {
1067  for (auto *pred : exitNode->getPredecessors()) {
1068  // is the predecessor inside the loop?
1069  if (!loop->contains(pred))
1070  continue;
1071 
1072  ExitPair condPair = {pred, exitNode};
1073  assert(!exitPairs.count(condPair) &&
1074  "identical condition pairs should never be possible");
1075  exitPairs.insert(condPair);
1076  }
1077  }
1078  assert(!exitPairs.empty() && "No exits from loop?");
1079 
1080  // The first precondition to our loop transformation is that only a single
1081  // exit pair exists in the loop.
1082  if (exitPairs.size() > 1)
1083  return emitError(loc)
1084  << "Multiple exits detected within a loop. Loop task pipelining is "
1085  "only supported for loops with unified loop exit blocks.";
1086 
1087  Block *header = loop->getHeader();
1088  BackedgeBuilder bebuilder(*rewriter, header->front().getLoc());
1089 
1090  // Build the loop continue network. Loop continuation is triggered solely by
1091  // backedges to the header.
1092  auto loopPrimingRegisterInput = bebuilder.get(rewriter->getI1Type());
1093  auto loopPrimingRegister = buildContinueNetwork(header, loop->getLoopLatch(),
1094  loopPrimingRegisterInput);
1095 
1096  // Build the loop exit network. Loop exiting is driven solely by exit pairs
1097  // from the loop.
1098  buildExitNetwork(header, exitPairs, loopPrimingRegister,
1099  loopPrimingRegisterInput);
1100 
1101  return success();
1102 }
1103 
1104 // Return the appropriate branch result based on successor block which uses it
1105 static Value getSuccResult(Operation *termOp, Operation *newOp,
1106  Block *succBlock) {
1107  // For conditional block, check if result goes to true or to false successor
1108  if (auto condBranchOp = dyn_cast<mlir::cf::CondBranchOp>(termOp)) {
1109  if (condBranchOp.getTrueDest() == succBlock)
1110  return dyn_cast<handshake::ConditionalBranchOp>(newOp).getTrueResult();
1111  else {
1112  assert(condBranchOp.getFalseDest() == succBlock);
1113  return dyn_cast<handshake::ConditionalBranchOp>(newOp).getFalseResult();
1114  }
1115  }
1116  // If the block is unconditional, newOp has only one result
1117  return newOp->getResult(0);
1118 }
1119 
1120 LogicalResult
1121 HandshakeLowering::addBranchOps(ConversionPatternRewriter &rewriter) {
1122 
1123  BlockValues liveOuts;
1124 
1125  for (Block &block : r) {
1126  for (Operation &op : block) {
1127  for (auto result : op.getResults())
1128  if (isLiveOut(result))
1129  liveOuts[&block].push_back(result);
1130  }
1131  }
1132 
1133  for (Block &block : r) {
1134  Operation *termOp = block.getTerminator();
1135  rewriter.setInsertionPoint(termOp);
1136 
1137  for (Value val : liveOuts[&block]) {
1138  // Count the number of branches which the liveout needs
1139  int numBranches = getBranchCount(val, &block);
1140 
1141  // Instantiate branches and connect to Merges
1142  for (int i = 0, e = numBranches; i < e; ++i) {
1143  Operation *newOp = nullptr;
1144 
1145  if (auto condBranchOp = dyn_cast<mlir::cf::CondBranchOp>(termOp))
1146  newOp = rewriter.create<handshake::ConditionalBranchOp>(
1147  termOp->getLoc(), condBranchOp.getCondition(), val);
1148  else if (isa<mlir::cf::BranchOp>(termOp))
1149  newOp = rewriter.create<handshake::BranchOp>(termOp->getLoc(), val);
1150 
1151  if (newOp == nullptr)
1152  continue;
1153 
1154  for (int j = 0, e = block.getNumSuccessors(); j < e; ++j) {
1155  Block *succ = block.getSuccessor(j);
1156  Value res = getSuccResult(termOp, newOp, succ);
1157 
1158  for (auto &u : val.getUses()) {
1159  if (u.getOwner()->getBlock() == succ) {
1160  u.getOwner()->replaceUsesOfWith(val, res);
1161  break;
1162  }
1163  }
1164  }
1165  }
1166  }
1167  }
1168 
1169  return success();
1170 }
1171 
1172 LogicalResult HandshakeLowering::connectConstantsToControl(
1173  ConversionPatternRewriter &rewriter, bool sourceConstants) {
1174  // Create new constants which have a control-only input to trigger them.
1175  // These are conneted to the control network or optionally to a Source
1176  // operation (always triggering). Control-network connected constants may
1177  // help debugability, but result in a slightly larger circuit.
1178 
1179  if (sourceConstants) {
1180  for (auto constantOp : llvm::make_early_inc_range(
1181  r.template getOps<mlir::arith::ConstantOp>())) {
1182  rewriter.setInsertionPointAfter(constantOp);
1183  auto value = constantOp.getValue();
1184  rewriter.replaceOpWithNewOp<handshake::ConstantOp>(
1185  constantOp, value.getType(), value,
1186  rewriter.create<handshake::SourceOp>(constantOp.getLoc(),
1187  rewriter.getNoneType()));
1188  }
1189  } else {
1190  for (Block &block : r) {
1191  Value blockEntryCtrl = getBlockEntryControl(&block);
1192  for (auto constantOp : llvm::make_early_inc_range(
1193  block.template getOps<mlir::arith::ConstantOp>())) {
1194  rewriter.setInsertionPointAfter(constantOp);
1195  auto value = constantOp.getValue();
1196  rewriter.replaceOpWithNewOp<handshake::ConstantOp>(
1197  constantOp, value.getType(), value, blockEntryCtrl);
1198  }
1199  }
1200  }
1201  return success();
1202 }
1203 
1204 /// Holds information about an handshake "basic block terminator" control
1205 /// operation
1207  /// The operation
1208  Operation *op;
1209  /// The operation's control operand (must have type NoneType)
1211 
1212  BlockControlTerm(Operation *op, Value ctrlOperand)
1213  : op(op), ctrlOperand(ctrlOperand) {
1214  assert(op && ctrlOperand);
1215  assert(isa<NoneType>(ctrlOperand.getType()) &&
1216  "Control operand must be a NoneType");
1217  }
1218 
1219  /// Checks for member-wise equality
1220  friend bool operator==(const BlockControlTerm &lhs,
1221  const BlockControlTerm &rhs) {
1222  return lhs.op == rhs.op && lhs.ctrlOperand == rhs.ctrlOperand;
1223  }
1224 };
1225 
1227  // Identify the control terminator operation and its control operand in the
1228  // given block. One such operation must exist in the block
1229  for (Operation &op : *block) {
1230  if (auto branchOp = dyn_cast<handshake::BranchOp>(op))
1231  if (branchOp.isControl())
1232  return {branchOp, branchOp.getDataOperand()};
1233  if (auto branchOp = dyn_cast<handshake::ConditionalBranchOp>(op))
1234  if (branchOp.isControl())
1235  return {branchOp, branchOp.getDataOperand()};
1236  if (auto endOp = dyn_cast<handshake::ReturnOp>(op))
1237  return {endOp, endOp.getOperands().back()};
1238  }
1239  llvm_unreachable("Block terminator must exist");
1240 }
1241 
1242 static LogicalResult getOpMemRef(Operation *op, Value &out) {
1243  out = Value();
1244  if (auto memOp = dyn_cast<memref::LoadOp>(op))
1245  out = memOp.getMemRef();
1246  else if (auto memOp = dyn_cast<memref::StoreOp>(op))
1247  out = memOp.getMemRef();
1248  else if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) {
1249  MemRefAccess access(op);
1250  out = access.memref;
1251  }
1252  if (out != Value())
1253  return success();
1254  return op->emitOpError("Unknown Op type");
1255 }
1256 
1257 static bool isMemoryOp(Operation *op) {
1258  return isa<memref::LoadOp, memref::StoreOp, AffineReadOpInterface,
1259  AffineWriteOpInterface>(op);
1260 }
1261 
1262 LogicalResult
1263 HandshakeLowering::replaceMemoryOps(ConversionPatternRewriter &rewriter,
1264  MemRefToMemoryAccessOp &memRefOps) {
1265 
1266  std::vector<Operation *> opsToErase;
1267 
1268  // Enrich the memRefOps context with BlockArguments, in case they aren't used.
1269  for (auto arg : r.getArguments()) {
1270  auto memrefType = dyn_cast<mlir::MemRefType>(arg.getType());
1271  if (!memrefType)
1272  continue;
1273  // Ensure that this is a valid memref-typed value.
1274  if (failed(isValidMemrefType(arg.getLoc(), memrefType)))
1275  return failure();
1276  memRefOps.insert(std::make_pair(arg, std::vector<Operation *>()));
1277  }
1278 
1279  // Replace load and store ops with the corresponding handshake ops
1280  // Need to traverse ops in blocks to store them in memRefOps in program
1281  // order
1282  for (Operation &op : r.getOps()) {
1283  if (!isMemoryOp(&op))
1284  continue;
1285 
1286  rewriter.setInsertionPoint(&op);
1287  Value memref;
1288  if (getOpMemRef(&op, memref).failed())
1289  return failure();
1290  Operation *newOp = nullptr;
1291 
1292  llvm::TypeSwitch<Operation *>(&op)
1293  .Case<memref::LoadOp>([&](auto loadOp) {
1294  // Get operands which correspond to address indices
1295  // This will add all operands except alloc
1296  SmallVector<Value, 8> operands(loadOp.getIndices());
1297 
1298  newOp =
1299  rewriter.create<handshake::LoadOp>(op.getLoc(), memref, operands);
1300  op.getResult(0).replaceAllUsesWith(newOp->getResult(0));
1301  })
1302  .Case<memref::StoreOp>([&](auto storeOp) {
1303  // Get operands which correspond to address indices
1304  // This will add all operands except alloc and data
1305  SmallVector<Value, 8> operands(storeOp.getIndices());
1306 
1307  // Create new op where operands are store data and address indices
1308  newOp = rewriter.create<handshake::StoreOp>(
1309  op.getLoc(), storeOp.getValueToStore(), operands);
1310  })
1311  .Case<AffineReadOpInterface, AffineWriteOpInterface>([&](auto) {
1312  // Get essential memref access inforamtion.
1313  MemRefAccess access(&op);
1314  // The address of an affine load/store operation can be a result
1315  // of an affine map, which is a linear combination of constants
1316  // and parameters. Therefore, we should extract the affine map of
1317  // each address and expand it into proper expressions that
1318  // calculate the result.
1319  AffineMap map;
1320  if (auto loadOp = dyn_cast<AffineReadOpInterface>(op))
1321  map = loadOp.getAffineMap();
1322  else
1323  map = dyn_cast<AffineWriteOpInterface>(op).getAffineMap();
1324 
1325  // The returned object from expandAffineMap is an optional list of
1326  // the expansion results from the given affine map, which are the
1327  // actual address indices that can be used as operands for
1328  // handshake LoadOp/StoreOp. The following processing requires it
1329  // to be a valid result.
1330  auto operands =
1331  expandAffineMap(rewriter, op.getLoc(), map, access.indices);
1332  assert(operands && "Address operands of affine memref access "
1333  "cannot be reduced.");
1334 
1335  if (isa<AffineReadOpInterface>(op)) {
1336  auto loadOp = rewriter.create<handshake::LoadOp>(
1337  op.getLoc(), access.memref, *operands);
1338  newOp = loadOp;
1339  op.getResult(0).replaceAllUsesWith(loadOp.getDataResult());
1340  } else {
1341  newOp = rewriter.create<handshake::StoreOp>(
1342  op.getLoc(), op.getOperand(0), *operands);
1343  }
1344  })
1345  .Default([&](auto) {
1346  op.emitOpError("Load/store operation cannot be handled.");
1347  });
1348 
1349  memRefOps[memref].push_back(newOp);
1350  opsToErase.push_back(&op);
1351  }
1352 
1353  // Erase old memory ops
1354  for (unsigned i = 0, e = opsToErase.size(); i != e; ++i) {
1355  auto *op = opsToErase[i];
1356  for (int j = 0, e = op->getNumOperands(); j < e; ++j)
1357  op->eraseOperand(0);
1358  assert(op->getNumOperands() == 0);
1359 
1360  rewriter.eraseOp(op);
1361  }
1362 
1363  return success();
1364 }
1365 
1366 static SmallVector<Value, 8> getResultsToMemory(Operation *op) {
1367  // Get load/store results which are given as inputs to MemoryOp
1368 
1369  if (handshake::LoadOp loadOp = dyn_cast<handshake::LoadOp>(op)) {
1370  // For load, get all address outputs/indices
1371  // (load also has one data output which goes to successor operation)
1372  SmallVector<Value, 8> results(loadOp.getAddressResults());
1373  return results;
1374 
1375  } else {
1376  // For store, all outputs (data and address indices) go to memory
1377  assert(dyn_cast<handshake::StoreOp>(op));
1378  handshake::StoreOp storeOp = dyn_cast<handshake::StoreOp>(op);
1379  SmallVector<Value, 8> results(storeOp.getResults());
1380  return results;
1381  }
1382 }
1383 
1384 static void addLazyForks(Region &f, ConversionPatternRewriter &rewriter) {
1385 
1386  for (Block &block : f) {
1387  Value ctrl = getBlockControlTerminator(&block).ctrlOperand;
1388  if (!ctrl.hasOneUse())
1389  insertFork(ctrl, true, rewriter);
1390  }
1391 }
1392 
1393 static void removeUnusedAllocOps(Region &r,
1394  ConversionPatternRewriter &rewriter) {
1395  std::vector<Operation *> opsToDelete;
1396 
1397  // Remove alloc operations whose result have no use
1398  for (auto &op : r.getOps())
1399  if (isAllocOp(&op) && op.getResult(0).use_empty())
1400  opsToDelete.push_back(&op);
1401 
1402  llvm::for_each(opsToDelete, [&](auto allocOp) { rewriter.eraseOp(allocOp); });
1403 }
1404 
1405 static void addJoinOps(ConversionPatternRewriter &rewriter,
1406  ArrayRef<BlockControlTerm> controlTerms) {
1407  for (auto term : controlTerms) {
1408  auto &[op, ctrl] = term;
1409  auto *srcOp = ctrl.getDefiningOp();
1410 
1411  // Insert only single join per block
1412  if (!isa<JoinOp>(srcOp)) {
1413  rewriter.setInsertionPointAfter(srcOp);
1414  Operation *newJoin = rewriter.create<JoinOp>(srcOp->getLoc(), ctrl);
1415  op->replaceUsesOfWith(ctrl, newJoin->getResult(0));
1416  }
1417  }
1418 }
1419 
1420 static std::vector<BlockControlTerm>
1421 getControlTerminators(ArrayRef<Operation *> memOps) {
1422  std::vector<BlockControlTerm> terminators;
1423 
1424  for (Operation *op : memOps) {
1425  // Get block from which the mem op originates
1426  Block *block = op->getBlock();
1427  // Identify the control terminator in the block
1428  auto term = getBlockControlTerminator(block);
1429  if (std::find(terminators.begin(), terminators.end(), term) ==
1430  terminators.end())
1431  terminators.push_back(term);
1432  }
1433  return terminators;
1434 }
1435 
1436 static void addValueToOperands(Operation *op, Value val) {
1437 
1438  SmallVector<Value, 8> results(op->getOperands());
1439  results.push_back(val);
1440  op->setOperands(results);
1441 }
1442 
1443 static void setLoadDataInputs(ArrayRef<Operation *> memOps, Operation *memOp) {
1444  // Set memory outputs as load input data
1445  int ld_count = 0;
1446  for (auto *op : memOps) {
1447  if (isa<handshake::LoadOp>(op))
1448  addValueToOperands(op, memOp->getResult(ld_count++));
1449  }
1450 }
1451 
1452 static LogicalResult setJoinControlInputs(ArrayRef<Operation *> memOps,
1453  Operation *memOp, int offset,
1454  ArrayRef<int> cntrlInd) {
1455  // Connect all memory ops to the join of that block (ensures that all mem
1456  // ops terminate before a new block starts)
1457  for (int i = 0, e = memOps.size(); i < e; ++i) {
1458  auto *op = memOps[i];
1459  Value ctrl = getBlockControlTerminator(op->getBlock()).ctrlOperand;
1460  auto *srcOp = ctrl.getDefiningOp();
1461  if (!isa<JoinOp>(srcOp)) {
1462  return srcOp->emitOpError("Op expected to be a JoinOp");
1463  }
1464  addValueToOperands(srcOp, memOp->getResult(offset + cntrlInd[i]));
1465  }
1466  return success();
1467 }
1468 
1469 void HandshakeLowering::setMemOpControlInputs(
1470  ConversionPatternRewriter &rewriter, ArrayRef<Operation *> memOps,
1471  Operation *memOp, int offset, ArrayRef<int> cntrlInd) {
1472  for (int i = 0, e = memOps.size(); i < e; ++i) {
1473  std::vector<Value> controlOperands;
1474  Operation *currOp = memOps[i];
1475  Block *currBlock = currOp->getBlock();
1476 
1477  // Set load/store control inputs from the block input control value
1478  Value blockEntryCtrl = getBlockEntryControl(currBlock);
1479  controlOperands.push_back(blockEntryCtrl);
1480 
1481  // Set load/store control inputs from predecessors in block
1482  for (int j = 0, f = i; j < f; ++j) {
1483  Operation *predOp = memOps[j];
1484  Block *predBlock = predOp->getBlock();
1485  if (currBlock == predBlock)
1486  // Any dependency but RARs
1487  if (!(isa<handshake::LoadOp>(currOp) && isa<handshake::LoadOp>(predOp)))
1488  // cntrlInd maps memOps index to correct control output index
1489  controlOperands.push_back(memOp->getResult(offset + cntrlInd[j]));
1490  }
1491 
1492  // If there is only one control input, add directly to memory op
1493  if (controlOperands.size() == 1)
1494  addValueToOperands(currOp, controlOperands[0]);
1495 
1496  // If multiple, join them and connect join output to memory op
1497  else {
1498  rewriter.setInsertionPoint(currOp);
1499  Operation *joinOp =
1500  rewriter.create<JoinOp>(currOp->getLoc(), controlOperands);
1501  addValueToOperands(currOp, joinOp->getResult(0));
1502  }
1503  }
1504 }
1505 
1506 LogicalResult
1507 HandshakeLowering::connectToMemory(ConversionPatternRewriter &rewriter,
1508  MemRefToMemoryAccessOp memRefOps, bool lsq) {
1509  // Add MemoryOps which represent the memory interface
1510  // Connect memory operations and control appropriately
1511  int mem_count = 0;
1512  for (auto memory : memRefOps) {
1513  // First operand corresponds to memref (alloca or function argument)
1514  Value memrefOperand = memory.first;
1515 
1516  // A memory is external if the memref that defines it is provided as a
1517  // function (block) argument.
1518  bool isExternalMemory = isa<BlockArgument>(memrefOperand);
1519 
1520  mlir::MemRefType memrefType =
1521  cast<mlir::MemRefType>(memrefOperand.getType());
1522  if (failed(isValidMemrefType(memrefOperand.getLoc(), memrefType)))
1523  return failure();
1524 
1525  std::vector<Value> operands;
1526 
1527  // Get control terminators whose control operand need to connect to memory
1528  std::vector<BlockControlTerm> controlTerms =
1529  getControlTerminators(memory.second);
1530 
1531  // In case of LSQ interface, set control values as inputs (used to
1532  // trigger allocation to LSQ)
1533  if (lsq)
1534  for (auto valOp : controlTerms)
1535  operands.push_back(valOp.ctrlOperand);
1536 
1537  // Add load indices and store data+indices to memory operands
1538  // Count number of loads so that we can generate appropriate number of
1539  // memory outputs (data to load ops)
1540 
1541  // memory.second is in program order
1542  // Enforce MemoryOp port ordering as follows:
1543  // Operands: all stores then all loads (stdata1, staddr1, stdata2,...,
1544  // ldaddr1, ldaddr2,....) Outputs: all load outputs, ordered the same as
1545  // load addresses (lddata1, lddata2, ...), followed by all none outputs,
1546  // ordered as operands (stnone1, stnone2,...ldnone1, ldnone2,...)
1547  std::vector<int> newInd(memory.second.size(), 0);
1548  int ind = 0;
1549  for (int i = 0, e = memory.second.size(); i < e; ++i) {
1550  auto *op = memory.second[i];
1551  if (isa<handshake::StoreOp>(op)) {
1552  SmallVector<Value, 8> results = getResultsToMemory(op);
1553  operands.insert(operands.end(), results.begin(), results.end());
1554  newInd[i] = ind++;
1555  }
1556  }
1557 
1558  int ld_count = 0;
1559 
1560  for (int i = 0, e = memory.second.size(); i < e; ++i) {
1561  auto *op = memory.second[i];
1562  if (isa<handshake::LoadOp>(op)) {
1563  SmallVector<Value, 8> results = getResultsToMemory(op);
1564  operands.insert(operands.end(), results.begin(), results.end());
1565 
1566  ld_count++;
1567  newInd[i] = ind++;
1568  }
1569  }
1570 
1571  // control-only outputs for each access (indicate access completion)
1572  int cntrl_count = lsq ? 0 : memory.second.size();
1573 
1574  Block *entryBlock = &r.front();
1575  rewriter.setInsertionPointToStart(entryBlock);
1576 
1577  // Place memory op next to the alloc op
1578  Operation *newOp = nullptr;
1579  if (isExternalMemory)
1580  newOp = rewriter.create<ExternalMemoryOp>(
1581  entryBlock->front().getLoc(), memrefOperand, operands, ld_count,
1582  cntrl_count - ld_count, mem_count++);
1583  else
1584  newOp = rewriter.create<MemoryOp>(entryBlock->front().getLoc(), operands,
1585  ld_count, cntrl_count, lsq, mem_count++,
1586  memrefOperand);
1587 
1588  setLoadDataInputs(memory.second, newOp);
1589 
1590  if (!lsq) {
1591  // Create Joins which join done signals from memory with the
1592  // control-only network
1593  addJoinOps(rewriter, controlTerms);
1594 
1595  // Connect all load/store done signals to the join of their block
1596  // Ensure that the block terminates only after all its accesses have
1597  // completed
1598  // True is default. When no sync needed, set to false (for now,
1599  // user-determined)
1600  bool control = true;
1601 
1602  if (control &&
1603  setJoinControlInputs(memory.second, newOp, ld_count, newInd).failed())
1604  return failure();
1605 
1606  // Set control-only inputs to each memory op
1607  // Ensure that op starts only after prior blocks have completed
1608  // Ensure that op starts only after predecessor ops (with RAW, WAR, or
1609  // WAW) have completed
1610  setMemOpControlInputs(rewriter, memory.second, newOp, ld_count, newInd);
1611  }
1612  }
1613 
1614  if (lsq)
1615  addLazyForks(r, rewriter);
1616 
1617  removeUnusedAllocOps(r, rewriter);
1618  return success();
1619 }
1620 
1621 LogicalResult
1622 HandshakeLowering::replaceCallOps(ConversionPatternRewriter &rewriter) {
1623  for (Block &block : r) {
1624  /// An instance is activated whenever control arrives at the basic block
1625  /// of the source callOp.
1626  Value blockEntryControl = getBlockEntryControl(&block);
1627  for (Operation &op : block) {
1628  if (auto callOp = dyn_cast<mlir::func::CallOp>(op)) {
1629  llvm::SmallVector<Value> operands;
1630  llvm::copy(callOp.getOperands(), std::back_inserter(operands));
1631  operands.push_back(blockEntryControl);
1632  rewriter.setInsertionPoint(callOp);
1633  auto instanceOp = rewriter.create<handshake::InstanceOp>(
1634  callOp.getLoc(), callOp.getCallee(), callOp.getResultTypes(),
1635  operands);
1636  // Replace all results of the source callOp.
1637  for (auto it : llvm::zip(callOp.getResults(), instanceOp.getResults()))
1638  std::get<0>(it).replaceAllUsesWith(std::get<1>(it));
1639  rewriter.eraseOp(callOp);
1640  }
1641  }
1642  }
1643  return success();
1644 }
1645 
1646 namespace {
1647 /// Strategy class for SSA maximization during cf-to-handshake conversion.
1648 /// Block arguments of type MemRefType and allocation operations are not
1649 /// considered for SSA maximization.
1650 class HandshakeLoweringSSAStrategy : public SSAMaximizationStrategy {
1651  /// Filters out block arguments of type MemRefType
1652  bool maximizeArgument(BlockArgument arg) override {
1653  return !isa<mlir::MemRefType>(arg.getType());
1654  }
1655 
1656  /// Filters out allocation operations
1657  bool maximizeOp(Operation *op) override { return !isAllocOp(op); }
1658 };
1659 } // namespace
1660 
1661 /// Converts every value in the region into maximal SSA form, unless the value
1662 /// is a block argument of type MemRefType or the result of an allocation
1663 /// operation.
1664 static LogicalResult maximizeSSANoMem(Region &r,
1665  ConversionPatternRewriter &rewriter) {
1666  HandshakeLoweringSSAStrategy strategy;
1667  return maximizeSSA(r, strategy, rewriter);
1668 }
1669 
1670 static LogicalResult lowerFuncOp(func::FuncOp funcOp, MLIRContext *ctx,
1671  bool sourceConstants,
1672  bool disableTaskPipelining) {
1673  // Only retain those attributes that are not constructed by build.
1674  SmallVector<NamedAttribute, 4> attributes;
1675  for (const auto &attr : funcOp->getAttrs()) {
1676  if (attr.getName() == SymbolTable::getSymbolAttrName() ||
1677  attr.getName() == funcOp.getFunctionTypeAttrName())
1678  continue;
1679  attributes.push_back(attr);
1680  }
1681 
1682  // Get function arguments
1683  llvm::SmallVector<mlir::Type, 8> argTypes;
1684  for (auto &argType : funcOp.getArgumentTypes())
1685  argTypes.push_back(argType);
1686 
1687  // Get function results
1688  llvm::SmallVector<mlir::Type, 8> resTypes;
1689  for (auto resType : funcOp.getResultTypes())
1690  resTypes.push_back(resType);
1691 
1692  handshake::FuncOp newFuncOp;
1693 
1694  // Add control input/output to function arguments/results and create a
1695  // handshake::FuncOp of appropriate type
1696  if (partiallyLowerOp<func::FuncOp>(
1697  [&](func::FuncOp funcOp, PatternRewriter &rewriter) {
1698  auto noneType = rewriter.getNoneType();
1699  resTypes.push_back(noneType);
1700  argTypes.push_back(noneType);
1701  auto func_type = rewriter.getFunctionType(argTypes, resTypes);
1702  newFuncOp = rewriter.create<handshake::FuncOp>(
1703  funcOp.getLoc(), funcOp.getName(), func_type, attributes);
1704  rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(),
1705  newFuncOp.end());
1706  if (!newFuncOp.isExternal()) {
1707  newFuncOp.getBodyBlock()->addArgument(rewriter.getNoneType(),
1708  funcOp.getLoc());
1709  newFuncOp.resolveArgAndResNames();
1710  }
1711  rewriter.eraseOp(funcOp);
1712  return success();
1713  },
1714  ctx, funcOp)
1715  .failed())
1716  return failure();
1717 
1718  // Apply SSA maximization
1719  if (partiallyLowerRegion(maximizeSSANoMem, ctx, newFuncOp.getBody()).failed())
1720  return failure();
1721 
1722  if (!newFuncOp.isExternal()) {
1723  Block *bodyBlock = newFuncOp.getBodyBlock();
1724  Value entryCtrl = bodyBlock->getArguments().back();
1725  HandshakeLowering fol(newFuncOp.getBody());
1726  if (failed(lowerRegion<func::ReturnOp, handshake::ReturnOp>(
1727  fol, sourceConstants, disableTaskPipelining, entryCtrl)))
1728  return failure();
1729  }
1730 
1731  return success();
1732 }
1733 
1734 namespace {
1735 
1736 struct HandshakeRemoveBlockPass
1737  : HandshakeRemoveBlockBase<HandshakeRemoveBlockPass> {
1738  void runOnOperation() override { removeBasicBlocks(getOperation()); }
1739 };
1740 
1741 struct CFToHandshakePass : public CFToHandshakeBase<CFToHandshakePass> {
1742  CFToHandshakePass(bool sourceConstants, bool disableTaskPipelining) {
1743  this->sourceConstants = sourceConstants;
1744  this->disableTaskPipelining = disableTaskPipelining;
1745  }
1746  void runOnOperation() override {
1747  ModuleOp m = getOperation();
1748 
1749  for (auto funcOp : llvm::make_early_inc_range(m.getOps<func::FuncOp>())) {
1750  if (failed(lowerFuncOp(funcOp, &getContext(), sourceConstants,
1751  disableTaskPipelining))) {
1752  signalPassFailure();
1753  return;
1754  }
1755  }
1756  }
1757 };
1758 
1759 } // namespace
1760 
1761 std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
1762 circt::createCFToHandshakePass(bool sourceConstants,
1763  bool disableTaskPipelining) {
1764  return std::make_unique<CFToHandshakePass>(sourceConstants,
1765  disableTaskPipelining);
1766 }
1767 
1768 std::unique_ptr<mlir::OperationPass<handshake::FuncOp>>
1770  return std::make_unique<HandshakeRemoveBlockPass>();
1771 }
static ConditionalBranchOp getControlCondBranch(Block *block)
static LogicalResult lowerFuncOp(func::FuncOp funcOp, MLIRContext *ctx, bool sourceConstants, bool disableTaskPipelining)
static bool isMemoryOp(Operation *op)
static LogicalResult setJoinControlInputs(ArrayRef< Operation * > memOps, Operation *memOp, int offset, ArrayRef< int > cntrlInd)
static void addJoinOps(ConversionPatternRewriter &rewriter, ArrayRef< BlockControlTerm > controlTerms)
static void addLazyForks(Region &f, ConversionPatternRewriter &rewriter)
static bool isLiveOut(Value val)
static Operation * getControlMerge(Block *block)
static SmallVector< Value, 8 > getResultsToMemory(Operation *op)
static unsigned getBlockPredecessorCount(Block *block)
static int getBranchCount(Value val, Block *block)
void removeBasicBlocks(handshake::FuncOp funcOp)
static Operation * getFirstOp(Block *block)
Returns the first occurance of an operation of type TOp, else, returns null op.
static Value getSuccResult(Operation *termOp, Operation *newOp, Block *succBlock)
static Value getMergeOperand(HandshakeLowering::MergeOpInfo mergeInfo, Block *predBlock)
static LogicalResult isValidMemrefType(Location loc, mlir::MemRefType type)
static bool isAllocOp(Operation *op)
static Value getOperandFromBlock(MuxOp mux, Block *block)
static LogicalResult getOpMemRef(Operation *op, Value &out)
static LogicalResult partiallyLowerOp(const std::function< LogicalResult(TOp, ConversionPatternRewriter &)> &loweringFunc, MLIRContext *ctx, TOp op)
static void addValueToOperands(Operation *op, Value val)
static bool loopsHaveSingleExit(CFGLoopInfo &loopInfo)
static std::vector< Value > getSortedInputs(ControlMergeOp cmerge, MuxOp mux)
static void removeBlockOperands(Region &f)
static void removeUnusedAllocOps(Region &r, ConversionPatternRewriter &rewriter)
static LogicalResult maximizeSSANoMem(Region &r, ConversionPatternRewriter &rewriter)
Converts every value in the region into maximal SSA form, unless the value is a block argument of typ...
static Operation * findBranchToBlock(Block *block)
static std::vector< BlockControlTerm > getControlTerminators(ArrayRef< Operation * > memOps)
static BlockControlTerm getBlockControlTerminator(Block *block)
static void reconnectMergeOps(Region &r, HandshakeLowering::BlockOps blockMerges, HandshakeLowering::ValueMap &mergePairs)
static void setLoadDataInputs(ArrayRef< Operation * > memOps, Operation *memOp)
assert(baseType &&"element must be base type")
static void insertFork(Value result, OpBuilder &rewriter)
static void mergeBlock(Block &destination, Block::iterator insertPoint, Block &source)
Move all operations from a source block in to a destination block.
Definition: ExpandWhens.cpp:28
LowerRegionTarget(MLIRContext &context, Region &region)
Instantiate one of these and use it to build typed backedges.
Backedge get(mlir::Type resultType, mlir::LocationAttr optionalLoc={})
Create a typed backedge.
Backedge is a wrapper class around a Value.
void setValue(mlir::Value)
Strategy class to control the behavior of SSA maximization.
Definition: Passes.h:53
DenseMap< Block *, std::vector< MergeOpInfo > > BlockOps
Definition: CFToHandshake.h:60
DenseMap< Block *, std::vector< Value > > BlockValues
Definition: CFToHandshake.h:59
llvm::MapVector< Value, std::vector< Operation * > > MemRefToMemoryAccessOp
Definition: CFToHandshake.h:63
DenseMap< Value, Value > ValueMap
Definition: CFToHandshake.h:61
llvm::function_ref< LogicalResult(Region &, ConversionPatternRewriter &)> RegionLoweringFunc
Definition: CFToHandshake.h:38
LogicalResult partiallyLowerRegion(const RegionLoweringFunc &loweringFunc, MLIRContext *ctx, Region &r)
StringAttr getName(ArrayAttr names, size_t idx)
Return the name at the specified index of the ArrayAttr or null if it cannot be determined.
The InstanceGraph op interface, see InstanceGraphInterface.td for more details.
Definition: DebugAnalysis.h:21
LogicalResult maximizeSSA(Value value, PatternRewriter &rewriter)
Converts a single value within a function into maximal SSA form.
Definition: MaximizeSSA.cpp:81
std::unique_ptr< mlir::OperationPass< mlir::ModuleOp > > createCFToHandshakePass(bool sourceConstants=false, bool disableTaskPipelining=false)
std::unique_ptr< mlir::OperationPass< handshake::FuncOp > > createHandshakeRemoveBlockPass()
Holds information about an handshake "basic block terminator" control operation.
friend bool operator==(const BlockControlTerm &lhs, const BlockControlTerm &rhs)
Checks for member-wise equality.
Value ctrlOperand
The operation's control operand (must have type NoneType)
BlockControlTerm(Operation *op, Value ctrlOperand)
Operation * op
The operation.
Allows to partially lower a region by matching on the parent operation to then call the provided part...
PartialLoweringFunc fun
LogicalResult matchAndRewrite(Operation *op, ArrayRef< Value >, ConversionPatternRewriter &rewriter) const override
std::function< LogicalResult(Region &, ConversionPatternRewriter &)> PartialLoweringFunc
PartialLowerRegion(LowerRegionTarget &target, MLIRContext *context, LogicalResult &loweringResRef, const PartialLoweringFunc &fun)
LogicalResult & loweringRes
LowerRegionTarget & target