CIRCT  19.0.0git
AffineToLoopSchedule.cpp
Go to the documentation of this file.
1 //===- AffineToLoopSchedule.cpp--------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
15 #include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
16 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
17 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
18 #include "mlir/Dialect/Affine/IR/AffineMemoryOpInterfaces.h"
19 #include "mlir/Dialect/Affine/IR/AffineOps.h"
20 #include "mlir/Dialect/Affine/LoopUtils.h"
21 #include "mlir/Dialect/Affine/Utils.h"
22 #include "mlir/Dialect/Arith/IR/Arith.h"
23 #include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
24 #include "mlir/Dialect/Func/IR/FuncOps.h"
25 #include "mlir/Dialect/MemRef/IR/MemRef.h"
26 #include "mlir/Dialect/SCF/IR/SCF.h"
27 #include "mlir/IR/BuiltinDialect.h"
28 #include "mlir/IR/Dominance.h"
29 #include "mlir/IR/IRMapping.h"
30 #include "mlir/IR/ImplicitLocOpBuilder.h"
31 #include "mlir/Pass/Pass.h"
32 #include "mlir/Transforms/DialectConversion.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/TypeSwitch.h"
35 #include "llvm/Support/Debug.h"
36 #include <cassert>
37 #include <limits>
38 
39 #define DEBUG_TYPE "affine-to-loopschedule"
40 
41 namespace circt {
42 #define GEN_PASS_DEF_AFFINETOLOOPSCHEDULE
43 #include "circt/Conversion/Passes.h.inc"
44 } // namespace circt
45 
46 using namespace mlir;
47 using namespace mlir::arith;
48 using namespace mlir::memref;
49 using namespace mlir::scf;
50 using namespace mlir::func;
51 using namespace mlir::affine;
52 using namespace circt;
53 using namespace circt::analysis;
54 using namespace circt::scheduling;
55 using namespace circt::loopschedule;
56 
57 namespace {
58 
59 struct AffineToLoopSchedule
60  : public circt::impl::AffineToLoopScheduleBase<AffineToLoopSchedule> {
61  void runOnOperation() override;
62 
63 private:
64  ModuloProblem getModuloProblem(CyclicProblem &prob);
65  LogicalResult
66  lowerAffineStructures(MemoryDependenceAnalysis &dependenceAnalysis);
67  LogicalResult populateOperatorTypes(SmallVectorImpl<AffineForOp> &loopNest,
68  ModuloProblem &problem);
69  LogicalResult solveSchedulingProblem(SmallVectorImpl<AffineForOp> &loopNest,
70  ModuloProblem &problem);
71  LogicalResult
72  createLoopSchedulePipeline(SmallVectorImpl<AffineForOp> &loopNest,
73  ModuloProblem &problem);
74 
75  CyclicSchedulingAnalysis *schedulingAnalysis;
76 };
77 
78 } // namespace
79 
80 ModuloProblem AffineToLoopSchedule::getModuloProblem(CyclicProblem &prob) {
81  auto modProb = ModuloProblem::get(prob.getContainingOp());
82  for (auto *op : prob.getOperations()) {
83  auto opr = prob.getLinkedOperatorType(op);
84  if (opr.has_value()) {
85  modProb.setLinkedOperatorType(op, opr.value());
86  auto latency = prob.getLatency(opr.value());
87  if (latency.has_value())
88  modProb.setLatency(opr.value(), latency.value());
89  }
90  modProb.insertOperation(op);
91  }
92 
93  for (auto *op : prob.getOperations()) {
94  for (auto dep : prob.getDependences(op)) {
95  if (dep.isAuxiliary()) {
96  auto depInserted = modProb.insertDependence(dep);
97  assert(succeeded(depInserted));
98  (void)depInserted;
99  }
100  auto distance = prob.getDistance(dep);
101  if (distance.has_value())
102  modProb.setDistance(dep, distance.value());
103  }
104  }
105 
106  return modProb;
107 }
108 
109 void AffineToLoopSchedule::runOnOperation() {
110  // Get dependence analysis for the whole function.
111  auto dependenceAnalysis = getAnalysis<MemoryDependenceAnalysis>();
112 
113  // After dependence analysis, materialize affine structures.
114  if (failed(lowerAffineStructures(dependenceAnalysis)))
115  return signalPassFailure();
116 
117  // Get scheduling analysis for the whole function.
118  schedulingAnalysis = &getAnalysis<CyclicSchedulingAnalysis>();
119 
120  // Collect perfectly nested loops and work on them.
121  auto outerLoops = getOperation().getOps<AffineForOp>();
122  for (auto root : llvm::make_early_inc_range(outerLoops)) {
123  SmallVector<AffineForOp> nestedLoops;
124  getPerfectlyNestedLoops(nestedLoops, root);
125 
126  // Restrict to single loops to simplify things for now.
127  if (nestedLoops.size() != 1)
128  continue;
129 
130  ModuloProblem moduloProblem =
131  getModuloProblem(schedulingAnalysis->getProblem(nestedLoops.back()));
132 
133  // Populate the target operator types.
134  if (failed(populateOperatorTypes(nestedLoops, moduloProblem)))
135  return signalPassFailure();
136 
137  // Solve the scheduling problem computed by the analysis.
138  if (failed(solveSchedulingProblem(nestedLoops, moduloProblem)))
139  return signalPassFailure();
140 
141  // Convert the IR.
142  if (failed(createLoopSchedulePipeline(nestedLoops, moduloProblem)))
143  return signalPassFailure();
144  }
145 }
146 
147 /// Apply the affine map from an 'affine.load' operation to its operands, and
148 /// feed the results to a newly created 'memref.load' operation (which replaces
149 /// the original 'affine.load').
150 /// Also replaces the affine load with the memref load in dependenceAnalysis.
151 /// TODO(mikeurbach): this is copied from AffineToStandard, see if we can reuse.
152 class AffineLoadLowering : public OpConversionPattern<AffineLoadOp> {
153 public:
154  AffineLoadLowering(MLIRContext *context,
155  MemoryDependenceAnalysis &dependenceAnalysis)
156  : OpConversionPattern(context), dependenceAnalysis(dependenceAnalysis) {}
157 
158  LogicalResult
159  matchAndRewrite(AffineLoadOp op, OpAdaptor adaptor,
160  ConversionPatternRewriter &rewriter) const override {
161  // Expand affine map from 'affineLoadOp'.
162  SmallVector<Value, 8> indices(op.getMapOperands());
163  auto resultOperands =
164  expandAffineMap(rewriter, op.getLoc(), op.getAffineMap(), indices);
165  if (!resultOperands)
166  return failure();
167 
168  // Build memref.load memref[expandedMap.results].
169  auto memrefLoad = rewriter.replaceOpWithNewOp<memref::LoadOp>(
170  op, op.getMemRef(), *resultOperands);
171 
172  dependenceAnalysis.replaceOp(op, memrefLoad);
173 
174  return success();
175  }
176 
177 private:
179 };
180 
181 /// Apply the affine map from an 'affine.store' operation to its operands, and
182 /// feed the results to a newly created 'memref.store' operation (which replaces
183 /// the original 'affine.store').
184 /// Also replaces the affine store with the memref store in dependenceAnalysis.
185 /// TODO(mikeurbach): this is copied from AffineToStandard, see if we can reuse.
186 class AffineStoreLowering : public OpConversionPattern<AffineStoreOp> {
187 public:
188  AffineStoreLowering(MLIRContext *context,
189  MemoryDependenceAnalysis &dependenceAnalysis)
190  : OpConversionPattern(context), dependenceAnalysis(dependenceAnalysis) {}
191 
192  LogicalResult
193  matchAndRewrite(AffineStoreOp op, OpAdaptor adaptor,
194  ConversionPatternRewriter &rewriter) const override {
195  // Expand affine map from 'affineStoreOp'.
196  SmallVector<Value, 8> indices(op.getMapOperands());
197  auto maybeExpandedMap =
198  expandAffineMap(rewriter, op.getLoc(), op.getAffineMap(), indices);
199  if (!maybeExpandedMap)
200  return failure();
201 
202  // Build memref.store valueToStore, memref[expandedMap.results].
203  auto memrefStore = rewriter.replaceOpWithNewOp<memref::StoreOp>(
204  op, op.getValueToStore(), op.getMemRef(), *maybeExpandedMap);
205 
206  dependenceAnalysis.replaceOp(op, memrefStore);
207 
208  return success();
209  }
210 
211 private:
213 };
214 
215 /// Helper to hoist computation out of scf::IfOp branches, turning it into a
216 /// mux-like operation, and exposing potentially concurrent execution of its
217 /// branches.
220 
221  LogicalResult
222  matchAndRewrite(IfOp op, OpAdaptor adaptor,
223  ConversionPatternRewriter &rewriter) const override {
224  rewriter.modifyOpInPlace(op, [&]() {
225  if (!op.thenBlock()->without_terminator().empty()) {
226  rewriter.splitBlock(op.thenBlock(), --op.thenBlock()->end());
227  rewriter.inlineBlockBefore(&op.getThenRegion().front(), op);
228  }
229  if (op.elseBlock() && !op.elseBlock()->without_terminator().empty()) {
230  rewriter.splitBlock(op.elseBlock(), --op.elseBlock()->end());
231  rewriter.inlineBlockBefore(&op.getElseRegion().front(), op);
232  }
233  });
234 
235  return success();
236  }
237 };
238 
239 /// Helper to determine if an scf::IfOp is in mux-like form.
240 static bool ifOpLegalityCallback(IfOp op) {
241  return op.thenBlock()->without_terminator().empty() &&
242  (!op.elseBlock() || op.elseBlock()->without_terminator().empty());
243 }
244 
245 /// Helper to mark AffineYieldOp legal, unless it is inside a partially
246 /// converted scf::IfOp.
247 static bool yieldOpLegalityCallback(AffineYieldOp op) {
248  return !op->getParentOfType<IfOp>();
249 }
250 
251 /// After analyzing memory dependences, and before creating the schedule, we
252 /// want to materialize affine operations with arithmetic, scf, and memref
253 /// operations, which make the condition computation of addresses, etc.
254 /// explicit. This is important so the schedule can consider potentially complex
255 /// computations in the condition of ifs, or the addresses of loads and stores.
256 /// The dependence analysis will be updated so the dependences from the affine
257 /// loads and stores are now on the memref loads and stores.
258 LogicalResult AffineToLoopSchedule::lowerAffineStructures(
259  MemoryDependenceAnalysis &dependenceAnalysis) {
260  auto *context = &getContext();
261  auto op = getOperation();
262 
263  ConversionTarget target(*context);
264  target.addLegalDialect<AffineDialect, ArithDialect, MemRefDialect,
265  SCFDialect>();
266  target.addIllegalOp<AffineIfOp, AffineLoadOp, AffineStoreOp>();
267  target.addDynamicallyLegalOp<IfOp>(ifOpLegalityCallback);
268  target.addDynamicallyLegalOp<AffineYieldOp>(yieldOpLegalityCallback);
269 
270  RewritePatternSet patterns(context);
271  populateAffineToStdConversionPatterns(patterns);
272  patterns.add<AffineLoadLowering>(context, dependenceAnalysis);
273  patterns.add<AffineStoreLowering>(context, dependenceAnalysis);
274  patterns.add<IfOpHoisting>(context);
275 
276  if (failed(applyPartialConversion(op, target, std::move(patterns))))
277  return failure();
278 
279  return success();
280 }
281 
282 /// Populate the schedling problem operator types for the dialect we are
283 /// targetting. Right now, we assume Calyx, which has a standard library with
284 /// well-defined operator latencies. Ultimately, we should move this to a
285 /// dialect interface in the Scheduling dialect.
286 LogicalResult AffineToLoopSchedule::populateOperatorTypes(
287  SmallVectorImpl<AffineForOp> &loopNest, ModuloProblem &problem) {
288  // Scheduling analyis only considers the innermost loop nest for now.
289  auto forOp = loopNest.back();
290 
291  // Load the Calyx operator library into the problem. This is a very minimal
292  // set of arithmetic and memory operators for now. This should ultimately be
293  // pulled out into some sort of dialect interface.
294  Problem::OperatorType combOpr = problem.getOrInsertOperatorType("comb");
295  problem.setLatency(combOpr, 0);
296  Problem::OperatorType seqOpr = problem.getOrInsertOperatorType("seq");
297  problem.setLatency(seqOpr, 1);
298  Problem::OperatorType mcOpr = problem.getOrInsertOperatorType("multicycle");
299  problem.setLatency(mcOpr, 3);
300 
301  Operation *unsupported;
302  WalkResult result = forOp.getBody()->walk([&](Operation *op) {
303  return TypeSwitch<Operation *, WalkResult>(op)
304  .Case<AddIOp, IfOp, AffineYieldOp, arith::ConstantOp, CmpIOp,
305  IndexCastOp, memref::AllocaOp, YieldOp>([&](Operation *combOp) {
306  // Some known combinational ops.
307  problem.setLinkedOperatorType(combOp, combOpr);
308  return WalkResult::advance();
309  })
310  .Case<AddIOp, CmpIOp>([&](Operation *seqOp) {
311  // These ops need to be sequential for now because we do not
312  // have enough information to chain them together yet.
313  problem.setLinkedOperatorType(seqOp, seqOpr);
314  return WalkResult::advance();
315  })
316  .Case<AffineStoreOp, memref::StoreOp>([&](Operation *memOp) {
317  // Some known sequential ops. In certain cases, reads may be
318  // combinational in Calyx, but taking advantage of that is left as
319  // a future enhancement.
320  Value memRef = isa<AffineStoreOp>(*memOp)
321  ? cast<AffineStoreOp>(*memOp).getMemRef()
322  : cast<memref::StoreOp>(*memOp).getMemRef();
324  "mem_" + std::to_string(hash_value(memRef)));
325  problem.setLatency(memOpr, 1);
326  problem.setLimit(memOpr, 1);
327  problem.setLinkedOperatorType(memOp, memOpr);
328  return WalkResult::advance();
329  })
330  .Case<AffineLoadOp, memref::LoadOp>([&](Operation *memOp) {
331  // Some known sequential ops. In certain cases, reads may be
332  // combinational in Calyx, but taking advantage of that is left as
333  // a future enhancement.
334  Value memRef = isa<AffineLoadOp>(*memOp)
335  ? cast<AffineLoadOp>(*memOp).getMemRef()
336  : cast<memref::LoadOp>(*memOp).getMemRef();
338  "mem_" + std::to_string(hash_value(memRef)));
339  problem.setLatency(memOpr, 1);
340  problem.setLimit(memOpr, 1);
341  problem.setLinkedOperatorType(memOp, memOpr);
342  return WalkResult::advance();
343  })
344  .Case<MulIOp>([&](Operation *mcOp) {
345  // Some known multi-cycle ops.
346  problem.setLinkedOperatorType(mcOp, mcOpr);
347  return WalkResult::advance();
348  })
349  .Default([&](Operation *badOp) {
350  unsupported = op;
351  return WalkResult::interrupt();
352  });
353  });
354 
355  if (result.wasInterrupted())
356  return forOp.emitError("unsupported operation ") << *unsupported;
357 
358  return success();
359 }
360 
361 /// Solve the pre-computed scheduling problem.
362 LogicalResult AffineToLoopSchedule::solveSchedulingProblem(
363  SmallVectorImpl<AffineForOp> &loopNest, ModuloProblem &problem) {
364  // Scheduling analyis only considers the innermost loop nest for now.
365  auto forOp = loopNest.back();
366 
367  // Optionally debug problem inputs.
368  LLVM_DEBUG(forOp.getBody()->walk<WalkOrder::PreOrder>([&](Operation *op) {
369  llvm::dbgs() << "Scheduling inputs for " << *op;
370  auto opr = problem.getLinkedOperatorType(op);
371  llvm::dbgs() << "\n opr = " << opr;
372  llvm::dbgs() << "\n latency = " << problem.getLatency(*opr);
373  for (auto dep : problem.getDependences(op))
374  if (dep.isAuxiliary())
375  llvm::dbgs() << "\n dep = { distance = " << problem.getDistance(dep)
376  << ", source = " << *dep.getSource() << " }";
377  llvm::dbgs() << "\n\n";
378  }));
379 
380  // Verify and solve the problem.
381  if (failed(problem.check()))
382  return failure();
383 
384  auto *anchor = forOp.getBody()->getTerminator();
385  if (failed(scheduleSimplex(problem, anchor)))
386  return failure();
387 
388  // Verify the solution.
389  if (failed(problem.verify()))
390  return failure();
391 
392  // Optionally debug problem outputs.
393  LLVM_DEBUG({
394  llvm::dbgs() << "Scheduled initiation interval = "
395  << problem.getInitiationInterval() << "\n\n";
396  forOp.getBody()->walk<WalkOrder::PreOrder>([&](Operation *op) {
397  llvm::dbgs() << "Scheduling outputs for " << *op;
398  llvm::dbgs() << "\n start = " << problem.getStartTime(op);
399  llvm::dbgs() << "\n\n";
400  });
401  });
402 
403  return success();
404 }
405 
406 /// Create the loopschedule pipeline op for a loop nest.
407 LogicalResult AffineToLoopSchedule::createLoopSchedulePipeline(
408  SmallVectorImpl<AffineForOp> &loopNest, ModuloProblem &problem) {
409  // Scheduling analyis only considers the innermost loop nest for now.
410  auto forOp = loopNest.back();
411 
412  auto outerLoop = loopNest.front();
413  auto innerLoop = loopNest.back();
414  ImplicitLocOpBuilder builder(outerLoop.getLoc(), outerLoop);
415 
416  // Create Values for the loop's lower and upper bounds.
417  Value lowerBound = lowerAffineLowerBound(innerLoop, builder);
418  Value upperBound = lowerAffineUpperBound(innerLoop, builder);
419  int64_t stepValue = innerLoop.getStep().getSExtValue();
420  auto step = builder.create<arith::ConstantOp>(
421  IntegerAttr::get(builder.getIndexType(), stepValue));
422 
423  // Create the pipeline op, with the same result types as the inner loop. An
424  // iter arg is created for the induction variable.
425  TypeRange resultTypes = innerLoop.getResultTypes();
426 
427  auto ii = builder.getI64IntegerAttr(problem.getInitiationInterval().value());
428 
429  SmallVector<Value> iterArgs;
430  iterArgs.push_back(lowerBound);
431  iterArgs.append(innerLoop.getInits().begin(), innerLoop.getInits().end());
432 
433  // If possible, attach a constant trip count attribute. This could be
434  // generalized to support non-constant trip counts by supporting an AffineMap.
435  std::optional<IntegerAttr> tripCountAttr;
436  if (auto tripCount = getConstantTripCount(forOp))
437  tripCountAttr = builder.getI64IntegerAttr(*tripCount);
438 
439  auto pipeline = builder.create<LoopSchedulePipelineOp>(
440  resultTypes, ii, tripCountAttr, iterArgs);
441 
442  // Create the condition, which currently just compares the induction variable
443  // to the upper bound.
444  Block &condBlock = pipeline.getCondBlock();
445  builder.setInsertionPointToStart(&condBlock);
446  auto cmpResult = builder.create<arith::CmpIOp>(
447  builder.getI1Type(), arith::CmpIPredicate::ult, condBlock.getArgument(0),
448  upperBound);
449  condBlock.getTerminator()->insertOperands(0, {cmpResult});
450 
451  // Add the non-yield operations to their start time groups.
452  DenseMap<unsigned, SmallVector<Operation *>> startGroups;
453  for (auto *op : problem.getOperations()) {
454  if (isa<AffineYieldOp, YieldOp>(op))
455  continue;
456  auto startTime = problem.getStartTime(op);
457  startGroups[*startTime].push_back(op);
458  }
459 
460  // Maintain mappings of values in the loop body and results of stages,
461  // initially populated with the iter args.
462  IRMapping valueMap;
463  // Nested loops are not supported yet.
464  assert(iterArgs.size() == forOp.getBody()->getNumArguments());
465  for (size_t i = 0; i < iterArgs.size(); ++i)
466  valueMap.map(forOp.getBody()->getArgument(i),
467  pipeline.getStagesBlock().getArgument(i));
468 
469  // Create the stages.
470  Block &stagesBlock = pipeline.getStagesBlock();
471  builder.setInsertionPointToStart(&stagesBlock);
472 
473  // Iterate in order of the start times.
474  SmallVector<unsigned> startTimes;
475  for (const auto &group : startGroups)
476  startTimes.push_back(group.first);
477  llvm::sort(startTimes);
478 
479  DominanceInfo dom(getOperation());
480 
481  // Keys for translating values in each stage
482  SmallVector<SmallVector<Value>> registerValues;
483  SmallVector<SmallVector<Type>> registerTypes;
484 
485  // The maps that ensure a stage uses the correct version of a value
486  SmallVector<IRMapping> stageValueMaps;
487 
488  // For storing the range of stages an operation's results need to be valid for
489  DenseMap<Operation *, std::pair<unsigned, unsigned>> pipeTimes;
490 
491  for (auto startTime : startTimes) {
492  auto group = startGroups[startTime];
493 
494  // Collect the return types for this stage. Operations whose results are not
495  // used within this stage are returned.
496  auto isLoopTerminator = [forOp](Operation *op) {
497  return isa<AffineYieldOp>(op) && op->getParentOp() == forOp;
498  };
499 
500  // Initialize set of registers up until this point in time
501  for (unsigned i = registerValues.size(); i <= startTime; ++i)
502  registerValues.emplace_back(SmallVector<Value>());
503 
504  // Check each operation to see if its results need plumbing
505  for (auto *op : group) {
506  if (op->getUsers().empty())
507  continue;
508 
509  unsigned pipeEndTime = 0;
510  for (auto *user : op->getUsers()) {
511  unsigned userStartTime = *problem.getStartTime(user);
512  if (*problem.getStartTime(user) > startTime)
513  pipeEndTime = std::max(pipeEndTime, userStartTime);
514  else if (isLoopTerminator(user))
515  // Manually forward the value into the terminator's valueMap
516  pipeEndTime = std::max(pipeEndTime, userStartTime + 1);
517  }
518 
519  // Insert the range of pipeline stages the value needs to be valid for
520  pipeTimes[op] = std::pair(startTime, pipeEndTime);
521 
522  // Add register stages for each time slice we need to pipe to
523  for (unsigned i = registerValues.size(); i <= pipeEndTime; ++i)
524  registerValues.push_back(SmallVector<Value>());
525 
526  // Keep a collection of this stages results as keys to our valueMaps
527  for (auto result : op->getResults())
528  registerValues[startTime].push_back(result);
529 
530  // Other stages that use the value will need these values as keys too
531  unsigned firstUse = std::max(
532  startTime + 1,
533  startTime + *problem.getLatency(*problem.getLinkedOperatorType(op)));
534  for (unsigned i = firstUse; i < pipeEndTime; ++i) {
535  for (auto result : op->getResults())
536  registerValues[i].push_back(result);
537  }
538  }
539  }
540 
541  // Now make register Types and stageValueMaps
542  for (unsigned i = 0; i < registerValues.size(); ++i) {
543  SmallVector<mlir::Type> types;
544  for (auto val : registerValues[i])
545  types.push_back(val.getType());
546 
547  registerTypes.push_back(types);
548  stageValueMaps.push_back(valueMap);
549  }
550 
551  // One more map is needed for the pipeline stages terminator
552  stageValueMaps.push_back(valueMap);
553 
554  // Create stages along with maps
555  for (auto startTime : startTimes) {
556  auto group = startGroups[startTime];
557  llvm::sort(group,
558  [&](Operation *a, Operation *b) { return dom.dominates(a, b); });
559  auto stageTypes = registerTypes[startTime];
560  // Add the induction variable increment in the first stage.
561  if (startTime == 0)
562  stageTypes.push_back(lowerBound.getType());
563 
564  // Create the stage itself.
565  builder.setInsertionPoint(stagesBlock.getTerminator());
566  auto startTimeAttr = builder.getIntegerAttr(
567  builder.getIntegerType(64, /*isSigned=*/true), startTime);
568  auto stage =
569  builder.create<LoopSchedulePipelineStageOp>(stageTypes, startTimeAttr);
570  auto &stageBlock = stage.getBodyBlock();
571  auto *stageTerminator = stageBlock.getTerminator();
572  builder.setInsertionPointToStart(&stageBlock);
573 
574  for (auto *op : group) {
575  auto *newOp = builder.clone(*op, stageValueMaps[startTime]);
576 
577  // All further uses in this stage should used the cloned-version of values
578  // So we update the mapping in this stage
579  for (auto result : op->getResults())
580  stageValueMaps[startTime].map(
581  result, newOp->getResult(result.getResultNumber()));
582  }
583 
584  // Register all values in the terminator, using their mapped value
585  SmallVector<Value> stageOperands;
586  unsigned resIndex = 0;
587  for (auto res : registerValues[startTime]) {
588  stageOperands.push_back(stageValueMaps[startTime].lookup(res));
589  // Additionally, update the map of the stage that will consume the
590  // registered value
591  unsigned destTime = startTime + 1;
592  unsigned latency = *problem.getLatency(
593  *problem.getLinkedOperatorType(res.getDefiningOp()));
594  // Multi-cycle case
595  if (*problem.getStartTime(res.getDefiningOp()) == startTime &&
596  latency > 1)
597  destTime = startTime + latency;
598  destTime = std::min((unsigned)(stageValueMaps.size() - 1), destTime);
599  stageValueMaps[destTime].map(res, stage.getResult(resIndex++));
600  }
601  // Add these mapped values to pipeline.register
602  stageTerminator->insertOperands(stageTerminator->getNumOperands(),
603  stageOperands);
604 
605  // Add the induction variable increment to the first stage.
606  if (startTime == 0) {
607  auto incResult =
608  builder.create<arith::AddIOp>(stagesBlock.getArgument(0), step);
609  stageTerminator->insertOperands(stageTerminator->getNumOperands(),
610  incResult->getResults());
611  }
612  }
613 
614  // Add the iter args and results to the terminator.
615  auto stagesTerminator =
616  cast<LoopScheduleTerminatorOp>(stagesBlock.getTerminator());
617 
618  // Collect iter args and results from the induction variable increment and any
619  // mapped values that were originally yielded.
620  SmallVector<Value> termIterArgs;
621  SmallVector<Value> termResults;
622  termIterArgs.push_back(
623  stagesBlock.front().getResult(stagesBlock.front().getNumResults() - 1));
624 
625  for (auto value : forOp.getBody()->getTerminator()->getOperands()) {
626  unsigned lookupTime = std::min((unsigned)(stageValueMaps.size() - 1),
627  pipeTimes[value.getDefiningOp()].second);
628 
629  termIterArgs.push_back(stageValueMaps[lookupTime].lookup(value));
630  termResults.push_back(stageValueMaps[lookupTime].lookup(value));
631  }
632 
633  stagesTerminator.getIterArgsMutable().append(termIterArgs);
634  stagesTerminator.getResultsMutable().append(termResults);
635 
636  // Replace loop results with pipeline results.
637  for (size_t i = 0; i < forOp.getNumResults(); ++i)
638  forOp.getResult(i).replaceAllUsesWith(pipeline.getResult(i));
639 
640  // Remove the loop nest from the IR.
641  loopNest.front().walk([](Operation *op) {
642  op->dropAllUses();
643  op->dropAllDefinedValueUses();
644  op->dropAllReferences();
645  op->erase();
646  });
647 
648  return success();
649 }
650 
651 std::unique_ptr<mlir::Pass> circt::createAffineToLoopSchedule() {
652  return std::make_unique<AffineToLoopSchedule>();
653 }
static bool yieldOpLegalityCallback(AffineYieldOp op)
Helper to mark AffineYieldOp legal, unless it is inside a partially converted scf::IfOp.
static bool ifOpLegalityCallback(IfOp op)
Helper to determine if an scf::IfOp is in mux-like form.
assert(baseType &&"element must be base type")
Apply the affine map from an 'affine.load' operation to its operands, and feed the results to a newly...
MemoryDependenceAnalysis & dependenceAnalysis
LogicalResult matchAndRewrite(AffineLoadOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
AffineLoadLowering(MLIRContext *context, MemoryDependenceAnalysis &dependenceAnalysis)
Apply the affine map from an 'affine.store' operation to its operands, and feed the results to a newl...
AffineStoreLowering(MLIRContext *context, MemoryDependenceAnalysis &dependenceAnalysis)
MemoryDependenceAnalysis & dependenceAnalysis
LogicalResult matchAndRewrite(AffineStoreOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
This class models a cyclic scheduling problem.
Definition: Problems.h:292
std::optional< unsigned > getInitiationInterval()
The initiation interval (II) is the number of time steps between subsequent iterations,...
Definition: Problems.h:312
std::optional< unsigned > getDistance(Dependence dep)
The distance determines whether a dependence has to be satisfied in the same iteration (distance=0 or...
Definition: Problems.h:302
This class models the modulo scheduling problem as the composition of the cyclic problem and the reso...
Definition: Problems.h:447
virtual LogicalResult verify() override
Return success if the computed solution is valid.
Definition: Problems.cpp:397
void setLatency(OperatorType opr, unsigned val)
Definition: Problems.h:213
virtual LogicalResult check()
Return success if the constructed scheduling problem is valid.
Definition: Problems.cpp:96
std::optional< OperatorType > getLinkedOperatorType(Operation *op)
The linked operator type provides the runtime characteristics for op.
Definition: Problems.h:202
std::optional< unsigned > getStartTime(Operation *op)
Return the start time for op, as computed by the scheduler.
Definition: Problems.h:218
OperatorType getOrInsertOperatorType(StringRef name)
Retrieves the operator type identified by the client-specific name.
Definition: Problems.cpp:47
DependenceRange getDependences(Operation *op)
Return a range object to transparently iterate over op's incoming 1) implicit def-use dependences (ba...
Definition: Problems.cpp:53
const OperationSet & getOperations()
Return the set of operations.
Definition: Problems.h:178
void setLinkedOperatorType(Operation *op, OperatorType opr)
Definition: Problems.h:205
mlir::StringAttr OperatorType
Operator types are distinguished by name (chosen by the client).
Definition: Problems.h:104
std::optional< unsigned > getLatency(OperatorType opr)
The latency is the number of cycles opr needs to compute its result.
Definition: Problems.h:210
Operation * getContainingOp()
Return the operation containing this problem, e.g. to emit diagnostics.
Definition: Problems.h:171
void setLimit(OperatorType opr, unsigned val)
Definition: Problems.h:421
Direction get(bool isOutput)
Returns an output direction if isOutput is true, otherwise returns an input direction.
Definition: CalyxOps.cpp:54
LogicalResult scheduleSimplex(Problem &prob, Operation *lastOp)
Solve the basic problem using linear programming and a handwritten implementation of the simplex algo...
The InstanceGraph op interface, see InstanceGraphInterface.td for more details.
Definition: DebugAnalysis.h:21
std::unique_ptr< mlir::Pass > createAffineToLoopSchedule()
llvm::hash_code hash_value(const T &e)
Helper to hoist computation out of scf::IfOp branches, turning it into a mux-like operation,...
LogicalResult matchAndRewrite(IfOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
CyclicSchedulingAnalysis constructs a CyclicProblem for each AffineForOp by performing a memory depen...
MemoryDependenceAnalysis traverses any AffineForOps in the FuncOp body and checks for affine memory a...