CIRCT  18.0.0git
AffineToLoopSchedule.cpp
Go to the documentation of this file.
1 //===- AffineToLoopSchedule.cpp--------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "../PassDetail.h"
16 #include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
17 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
18 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
19 #include "mlir/Dialect/Affine/IR/AffineMemoryOpInterfaces.h"
20 #include "mlir/Dialect/Affine/IR/AffineOps.h"
21 #include "mlir/Dialect/Affine/LoopUtils.h"
22 #include "mlir/Dialect/Affine/Utils.h"
23 #include "mlir/Dialect/Arith/IR/Arith.h"
24 #include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
25 #include "mlir/Dialect/Func/IR/FuncOps.h"
26 #include "mlir/Dialect/MemRef/IR/MemRef.h"
27 #include "mlir/Dialect/SCF/IR/SCF.h"
28 #include "mlir/IR/BuiltinDialect.h"
29 #include "mlir/IR/Dominance.h"
30 #include "mlir/IR/IRMapping.h"
31 #include "mlir/IR/ImplicitLocOpBuilder.h"
32 #include "mlir/Transforms/DialectConversion.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/TypeSwitch.h"
35 #include "llvm/Support/Debug.h"
36 #include <cassert>
37 #include <limits>
38 
39 #define DEBUG_TYPE "affine-to-loopschedule"
40 
41 using namespace mlir;
42 using namespace mlir::arith;
43 using namespace mlir::memref;
44 using namespace mlir::scf;
45 using namespace mlir::func;
46 using namespace mlir::affine;
47 using namespace circt;
48 using namespace circt::analysis;
49 using namespace circt::scheduling;
50 using namespace circt::loopschedule;
51 
52 namespace {
53 
54 struct AffineToLoopSchedule
55  : public AffineToLoopScheduleBase<AffineToLoopSchedule> {
56  void runOnOperation() override;
57 
58 private:
59  ModuloProblem getModuloProblem(CyclicProblem &prob);
60  LogicalResult
61  lowerAffineStructures(MemoryDependenceAnalysis &dependenceAnalysis);
62  LogicalResult populateOperatorTypes(SmallVectorImpl<AffineForOp> &loopNest,
63  ModuloProblem &problem);
64  LogicalResult solveSchedulingProblem(SmallVectorImpl<AffineForOp> &loopNest,
65  ModuloProblem &problem);
66  LogicalResult
67  createLoopSchedulePipeline(SmallVectorImpl<AffineForOp> &loopNest,
68  ModuloProblem &problem);
69 
70  CyclicSchedulingAnalysis *schedulingAnalysis;
71 };
72 
73 } // namespace
74 
75 ModuloProblem AffineToLoopSchedule::getModuloProblem(CyclicProblem &prob) {
76  auto modProb = ModuloProblem::get(prob.getContainingOp());
77  for (auto *op : prob.getOperations()) {
78  auto opr = prob.getLinkedOperatorType(op);
79  if (opr.has_value()) {
80  modProb.setLinkedOperatorType(op, opr.value());
81  auto latency = prob.getLatency(opr.value());
82  if (latency.has_value())
83  modProb.setLatency(opr.value(), latency.value());
84  }
85  modProb.insertOperation(op);
86  }
87 
88  for (auto *op : prob.getOperations()) {
89  for (auto dep : prob.getDependences(op)) {
90  if (dep.isAuxiliary()) {
91  auto depInserted = modProb.insertDependence(dep);
92  assert(succeeded(depInserted));
93  (void)depInserted;
94  }
95  auto distance = prob.getDistance(dep);
96  if (distance.has_value())
97  modProb.setDistance(dep, distance.value());
98  }
99  }
100 
101  return modProb;
102 }
103 
104 void AffineToLoopSchedule::runOnOperation() {
105  // Get dependence analysis for the whole function.
106  auto dependenceAnalysis = getAnalysis<MemoryDependenceAnalysis>();
107 
108  // After dependence analysis, materialize affine structures.
109  if (failed(lowerAffineStructures(dependenceAnalysis)))
110  return signalPassFailure();
111 
112  // Get scheduling analysis for the whole function.
113  schedulingAnalysis = &getAnalysis<CyclicSchedulingAnalysis>();
114 
115  // Collect perfectly nested loops and work on them.
116  auto outerLoops = getOperation().getOps<AffineForOp>();
117  for (auto root : llvm::make_early_inc_range(outerLoops)) {
118  SmallVector<AffineForOp> nestedLoops;
119  getPerfectlyNestedLoops(nestedLoops, root);
120 
121  // Restrict to single loops to simplify things for now.
122  if (nestedLoops.size() != 1)
123  continue;
124 
125  ModuloProblem moduloProblem =
126  getModuloProblem(schedulingAnalysis->getProblem(nestedLoops.back()));
127 
128  // Populate the target operator types.
129  if (failed(populateOperatorTypes(nestedLoops, moduloProblem)))
130  return signalPassFailure();
131 
132  // Solve the scheduling problem computed by the analysis.
133  if (failed(solveSchedulingProblem(nestedLoops, moduloProblem)))
134  return signalPassFailure();
135 
136  // Convert the IR.
137  if (failed(createLoopSchedulePipeline(nestedLoops, moduloProblem)))
138  return signalPassFailure();
139  }
140 }
141 
142 /// Apply the affine map from an 'affine.load' operation to its operands, and
143 /// feed the results to a newly created 'memref.load' operation (which replaces
144 /// the original 'affine.load').
145 /// Also replaces the affine load with the memref load in dependenceAnalysis.
146 /// TODO(mikeurbach): this is copied from AffineToStandard, see if we can reuse.
147 class AffineLoadLowering : public OpConversionPattern<AffineLoadOp> {
148 public:
149  AffineLoadLowering(MLIRContext *context,
150  MemoryDependenceAnalysis &dependenceAnalysis)
151  : OpConversionPattern(context), dependenceAnalysis(dependenceAnalysis) {}
152 
153  LogicalResult
154  matchAndRewrite(AffineLoadOp op, OpAdaptor adaptor,
155  ConversionPatternRewriter &rewriter) const override {
156  // Expand affine map from 'affineLoadOp'.
157  SmallVector<Value, 8> indices(op.getMapOperands());
158  auto resultOperands =
159  expandAffineMap(rewriter, op.getLoc(), op.getAffineMap(), indices);
160  if (!resultOperands)
161  return failure();
162 
163  // Build memref.load memref[expandedMap.results].
164  auto memrefLoad = rewriter.replaceOpWithNewOp<memref::LoadOp>(
165  op, op.getMemRef(), *resultOperands);
166 
167  dependenceAnalysis.replaceOp(op, memrefLoad);
168 
169  return success();
170  }
171 
172 private:
174 };
175 
176 /// Apply the affine map from an 'affine.store' operation to its operands, and
177 /// feed the results to a newly created 'memref.store' operation (which replaces
178 /// the original 'affine.store').
179 /// Also replaces the affine store with the memref store in dependenceAnalysis.
180 /// TODO(mikeurbach): this is copied from AffineToStandard, see if we can reuse.
181 class AffineStoreLowering : public OpConversionPattern<AffineStoreOp> {
182 public:
183  AffineStoreLowering(MLIRContext *context,
184  MemoryDependenceAnalysis &dependenceAnalysis)
185  : OpConversionPattern(context), dependenceAnalysis(dependenceAnalysis) {}
186 
187  LogicalResult
188  matchAndRewrite(AffineStoreOp op, OpAdaptor adaptor,
189  ConversionPatternRewriter &rewriter) const override {
190  // Expand affine map from 'affineStoreOp'.
191  SmallVector<Value, 8> indices(op.getMapOperands());
192  auto maybeExpandedMap =
193  expandAffineMap(rewriter, op.getLoc(), op.getAffineMap(), indices);
194  if (!maybeExpandedMap)
195  return failure();
196 
197  // Build memref.store valueToStore, memref[expandedMap.results].
198  auto memrefStore = rewriter.replaceOpWithNewOp<memref::StoreOp>(
199  op, op.getValueToStore(), op.getMemRef(), *maybeExpandedMap);
200 
201  dependenceAnalysis.replaceOp(op, memrefStore);
202 
203  return success();
204  }
205 
206 private:
208 };
209 
210 /// Helper to hoist computation out of scf::IfOp branches, turning it into a
211 /// mux-like operation, and exposing potentially concurrent execution of its
212 /// branches.
215 
216  LogicalResult
217  matchAndRewrite(IfOp op, OpAdaptor adaptor,
218  ConversionPatternRewriter &rewriter) const override {
219  rewriter.updateRootInPlace(op, [&]() {
220  if (!op.thenBlock()->without_terminator().empty()) {
221  rewriter.splitBlock(op.thenBlock(), --op.thenBlock()->end());
222  rewriter.inlineBlockBefore(&op.getThenRegion().front(), op);
223  }
224  if (op.elseBlock() && !op.elseBlock()->without_terminator().empty()) {
225  rewriter.splitBlock(op.elseBlock(), --op.elseBlock()->end());
226  rewriter.inlineBlockBefore(&op.getElseRegion().front(), op);
227  }
228  });
229 
230  return success();
231  }
232 };
233 
234 /// Helper to determine if an scf::IfOp is in mux-like form.
235 static bool ifOpLegalityCallback(IfOp op) {
236  return op.thenBlock()->without_terminator().empty() &&
237  (!op.elseBlock() || op.elseBlock()->without_terminator().empty());
238 }
239 
240 /// Helper to mark AffineYieldOp legal, unless it is inside a partially
241 /// converted scf::IfOp.
242 static bool yieldOpLegalityCallback(AffineYieldOp op) {
243  return !op->getParentOfType<IfOp>();
244 }
245 
246 /// After analyzing memory dependences, and before creating the schedule, we
247 /// want to materialize affine operations with arithmetic, scf, and memref
248 /// operations, which make the condition computation of addresses, etc.
249 /// explicit. This is important so the schedule can consider potentially complex
250 /// computations in the condition of ifs, or the addresses of loads and stores.
251 /// The dependence analysis will be updated so the dependences from the affine
252 /// loads and stores are now on the memref loads and stores.
253 LogicalResult AffineToLoopSchedule::lowerAffineStructures(
254  MemoryDependenceAnalysis &dependenceAnalysis) {
255  auto *context = &getContext();
256  auto op = getOperation();
257 
258  ConversionTarget target(*context);
259  target.addLegalDialect<AffineDialect, ArithDialect, MemRefDialect,
260  SCFDialect>();
261  target.addIllegalOp<AffineIfOp, AffineLoadOp, AffineStoreOp>();
262  target.addDynamicallyLegalOp<IfOp>(ifOpLegalityCallback);
263  target.addDynamicallyLegalOp<AffineYieldOp>(yieldOpLegalityCallback);
264 
265  RewritePatternSet patterns(context);
266  populateAffineToStdConversionPatterns(patterns);
267  patterns.add<AffineLoadLowering>(context, dependenceAnalysis);
268  patterns.add<AffineStoreLowering>(context, dependenceAnalysis);
269  patterns.add<IfOpHoisting>(context);
270 
271  if (failed(applyPartialConversion(op, target, std::move(patterns))))
272  return failure();
273 
274  return success();
275 }
276 
277 /// Populate the schedling problem operator types for the dialect we are
278 /// targetting. Right now, we assume Calyx, which has a standard library with
279 /// well-defined operator latencies. Ultimately, we should move this to a
280 /// dialect interface in the Scheduling dialect.
281 LogicalResult AffineToLoopSchedule::populateOperatorTypes(
282  SmallVectorImpl<AffineForOp> &loopNest, ModuloProblem &problem) {
283  // Scheduling analyis only considers the innermost loop nest for now.
284  auto forOp = loopNest.back();
285 
286  // Load the Calyx operator library into the problem. This is a very minimal
287  // set of arithmetic and memory operators for now. This should ultimately be
288  // pulled out into some sort of dialect interface.
289  Problem::OperatorType combOpr = problem.getOrInsertOperatorType("comb");
290  problem.setLatency(combOpr, 0);
291  Problem::OperatorType seqOpr = problem.getOrInsertOperatorType("seq");
292  problem.setLatency(seqOpr, 1);
293  Problem::OperatorType mcOpr = problem.getOrInsertOperatorType("multicycle");
294  problem.setLatency(mcOpr, 3);
295 
296  Operation *unsupported;
297  WalkResult result = forOp.getBody()->walk([&](Operation *op) {
298  return TypeSwitch<Operation *, WalkResult>(op)
299  .Case<AddIOp, IfOp, AffineYieldOp, arith::ConstantOp, CmpIOp,
300  IndexCastOp, memref::AllocaOp, YieldOp>([&](Operation *combOp) {
301  // Some known combinational ops.
302  problem.setLinkedOperatorType(combOp, combOpr);
303  return WalkResult::advance();
304  })
305  .Case<AddIOp, CmpIOp>([&](Operation *seqOp) {
306  // These ops need to be sequential for now because we do not
307  // have enough information to chain them together yet.
308  problem.setLinkedOperatorType(seqOp, seqOpr);
309  return WalkResult::advance();
310  })
311  .Case<AffineStoreOp, memref::StoreOp>([&](Operation *memOp) {
312  // Some known sequential ops. In certain cases, reads may be
313  // combinational in Calyx, but taking advantage of that is left as
314  // a future enhancement.
315  Value memRef = isa<AffineStoreOp>(*memOp)
316  ? cast<AffineStoreOp>(*memOp).getMemRef()
317  : cast<memref::StoreOp>(*memOp).getMemRef();
319  "mem_" + std::to_string(hash_value(memRef)));
320  problem.setLatency(memOpr, 1);
321  problem.setLimit(memOpr, 1);
322  problem.setLinkedOperatorType(memOp, memOpr);
323  return WalkResult::advance();
324  })
325  .Case<AffineLoadOp, memref::LoadOp>([&](Operation *memOp) {
326  // Some known sequential ops. In certain cases, reads may be
327  // combinational in Calyx, but taking advantage of that is left as
328  // a future enhancement.
329  Value memRef = isa<AffineLoadOp>(*memOp)
330  ? cast<AffineLoadOp>(*memOp).getMemRef()
331  : cast<memref::LoadOp>(*memOp).getMemRef();
333  "mem_" + std::to_string(hash_value(memRef)));
334  problem.setLatency(memOpr, 1);
335  problem.setLimit(memOpr, 1);
336  problem.setLinkedOperatorType(memOp, memOpr);
337  return WalkResult::advance();
338  })
339  .Case<MulIOp>([&](Operation *mcOp) {
340  // Some known multi-cycle ops.
341  problem.setLinkedOperatorType(mcOp, mcOpr);
342  return WalkResult::advance();
343  })
344  .Default([&](Operation *badOp) {
345  unsupported = op;
346  return WalkResult::interrupt();
347  });
348  });
349 
350  if (result.wasInterrupted())
351  return forOp.emitError("unsupported operation ") << *unsupported;
352 
353  return success();
354 }
355 
356 /// Solve the pre-computed scheduling problem.
357 LogicalResult AffineToLoopSchedule::solveSchedulingProblem(
358  SmallVectorImpl<AffineForOp> &loopNest, ModuloProblem &problem) {
359  // Scheduling analyis only considers the innermost loop nest for now.
360  auto forOp = loopNest.back();
361 
362  // Optionally debug problem inputs.
363  LLVM_DEBUG(forOp.getBody()->walk<WalkOrder::PreOrder>([&](Operation *op) {
364  llvm::dbgs() << "Scheduling inputs for " << *op;
365  auto opr = problem.getLinkedOperatorType(op);
366  llvm::dbgs() << "\n opr = " << opr;
367  llvm::dbgs() << "\n latency = " << problem.getLatency(*opr);
368  for (auto dep : problem.getDependences(op))
369  if (dep.isAuxiliary())
370  llvm::dbgs() << "\n dep = { distance = " << problem.getDistance(dep)
371  << ", source = " << *dep.getSource() << " }";
372  llvm::dbgs() << "\n\n";
373  }));
374 
375  // Verify and solve the problem.
376  if (failed(problem.check()))
377  return failure();
378 
379  auto *anchor = forOp.getBody()->getTerminator();
380  if (failed(scheduleSimplex(problem, anchor)))
381  return failure();
382 
383  // Verify the solution.
384  if (failed(problem.verify()))
385  return failure();
386 
387  // Optionally debug problem outputs.
388  LLVM_DEBUG({
389  llvm::dbgs() << "Scheduled initiation interval = "
390  << problem.getInitiationInterval() << "\n\n";
391  forOp.getBody()->walk<WalkOrder::PreOrder>([&](Operation *op) {
392  llvm::dbgs() << "Scheduling outputs for " << *op;
393  llvm::dbgs() << "\n start = " << problem.getStartTime(op);
394  llvm::dbgs() << "\n\n";
395  });
396  });
397 
398  return success();
399 }
400 
401 /// Create the loopschedule pipeline op for a loop nest.
402 LogicalResult AffineToLoopSchedule::createLoopSchedulePipeline(
403  SmallVectorImpl<AffineForOp> &loopNest, ModuloProblem &problem) {
404  // Scheduling analyis only considers the innermost loop nest for now.
405  auto forOp = loopNest.back();
406 
407  auto outerLoop = loopNest.front();
408  auto innerLoop = loopNest.back();
409  ImplicitLocOpBuilder builder(outerLoop.getLoc(), outerLoop);
410 
411  // Create Values for the loop's lower and upper bounds.
412  Value lowerBound = lowerAffineLowerBound(innerLoop, builder);
413  Value upperBound = lowerAffineUpperBound(innerLoop, builder);
414  int64_t stepValue = innerLoop.getStep();
415  auto step = builder.create<arith::ConstantOp>(
416  IntegerAttr::get(builder.getIndexType(), stepValue));
417 
418  // Create the pipeline op, with the same result types as the inner loop. An
419  // iter arg is created for the induction variable.
420  TypeRange resultTypes = innerLoop.getResultTypes();
421 
422  auto ii = builder.getI64IntegerAttr(problem.getInitiationInterval().value());
423 
424  SmallVector<Value> iterArgs;
425  iterArgs.push_back(lowerBound);
426  iterArgs.append(innerLoop.getIterOperands().begin(),
427  innerLoop.getIterOperands().end());
428 
429  // If possible, attach a constant trip count attribute. This could be
430  // generalized to support non-constant trip counts by supporting an AffineMap.
431  std::optional<IntegerAttr> tripCountAttr;
432  if (auto tripCount = getConstantTripCount(forOp))
433  tripCountAttr = builder.getI64IntegerAttr(*tripCount);
434 
435  auto pipeline = builder.create<LoopSchedulePipelineOp>(
436  resultTypes, ii, tripCountAttr, iterArgs);
437 
438  // Create the condition, which currently just compares the induction variable
439  // to the upper bound.
440  Block &condBlock = pipeline.getCondBlock();
441  builder.setInsertionPointToStart(&condBlock);
442  auto cmpResult = builder.create<arith::CmpIOp>(
443  builder.getI1Type(), arith::CmpIPredicate::ult, condBlock.getArgument(0),
444  upperBound);
445  condBlock.getTerminator()->insertOperands(0, {cmpResult});
446 
447  // Add the non-yield operations to their start time groups.
448  DenseMap<unsigned, SmallVector<Operation *>> startGroups;
449  for (auto *op : problem.getOperations()) {
450  if (isa<AffineYieldOp, YieldOp>(op))
451  continue;
452  auto startTime = problem.getStartTime(op);
453  startGroups[*startTime].push_back(op);
454  }
455 
456  // Maintain mappings of values in the loop body and results of stages,
457  // initially populated with the iter args.
458  IRMapping valueMap;
459  // Nested loops are not supported yet.
460  assert(iterArgs.size() == forOp.getBody()->getNumArguments());
461  for (size_t i = 0; i < iterArgs.size(); ++i)
462  valueMap.map(forOp.getBody()->getArgument(i),
463  pipeline.getStagesBlock().getArgument(i));
464 
465  // Create the stages.
466  Block &stagesBlock = pipeline.getStagesBlock();
467  builder.setInsertionPointToStart(&stagesBlock);
468 
469  // Iterate in order of the start times.
470  SmallVector<unsigned> startTimes;
471  for (const auto &group : startGroups)
472  startTimes.push_back(group.first);
473  llvm::sort(startTimes);
474 
475  DominanceInfo dom(getOperation());
476 
477  // Keys for translating values in each stage
478  SmallVector<SmallVector<Value>> registerValues;
479  SmallVector<SmallVector<Type>> registerTypes;
480 
481  // The maps that ensure a stage uses the correct version of a value
482  SmallVector<IRMapping> stageValueMaps;
483 
484  // For storing the range of stages an operation's results need to be valid for
485  DenseMap<Operation *, std::pair<unsigned, unsigned>> pipeTimes;
486 
487  for (auto startTime : startTimes) {
488  auto group = startGroups[startTime];
489 
490  // Collect the return types for this stage. Operations whose results are not
491  // used within this stage are returned.
492  auto isLoopTerminator = [forOp](Operation *op) {
493  return isa<AffineYieldOp>(op) && op->getParentOp() == forOp;
494  };
495 
496  // Initialize set of registers up until this point in time
497  for (unsigned i = registerValues.size(); i <= startTime; ++i)
498  registerValues.emplace_back(SmallVector<Value>());
499 
500  // Check each operation to see if its results need plumbing
501  for (auto *op : group) {
502  if (op->getUsers().empty())
503  continue;
504 
505  unsigned pipeEndTime = 0;
506  for (auto *user : op->getUsers()) {
507  unsigned userStartTime = *problem.getStartTime(user);
508  if (*problem.getStartTime(user) > startTime)
509  pipeEndTime = std::max(pipeEndTime, userStartTime);
510  else if (isLoopTerminator(user))
511  // Manually forward the value into the terminator's valueMap
512  pipeEndTime = std::max(pipeEndTime, userStartTime + 1);
513  }
514 
515  // Insert the range of pipeline stages the value needs to be valid for
516  pipeTimes[op] = std::pair(startTime, pipeEndTime);
517 
518  // Add register stages for each time slice we need to pipe to
519  for (unsigned i = registerValues.size(); i <= pipeEndTime; ++i)
520  registerValues.push_back(SmallVector<Value>());
521 
522  // Keep a collection of this stages results as keys to our valueMaps
523  for (auto result : op->getResults())
524  registerValues[startTime].push_back(result);
525 
526  // Other stages that use the value will need these values as keys too
527  unsigned firstUse = std::max(
528  startTime + 1,
529  startTime + *problem.getLatency(*problem.getLinkedOperatorType(op)));
530  for (unsigned i = firstUse; i < pipeEndTime; ++i) {
531  for (auto result : op->getResults())
532  registerValues[i].push_back(result);
533  }
534  }
535  }
536 
537  // Now make register Types and stageValueMaps
538  for (unsigned i = 0; i < registerValues.size(); ++i) {
539  SmallVector<mlir::Type> types;
540  for (auto val : registerValues[i])
541  types.push_back(val.getType());
542 
543  registerTypes.push_back(types);
544  stageValueMaps.push_back(valueMap);
545  }
546 
547  // One more map is needed for the pipeline stages terminator
548  stageValueMaps.push_back(valueMap);
549 
550  // Create stages along with maps
551  for (auto startTime : startTimes) {
552  auto group = startGroups[startTime];
553  llvm::sort(group,
554  [&](Operation *a, Operation *b) { return dom.dominates(a, b); });
555  auto stageTypes = registerTypes[startTime];
556  // Add the induction variable increment in the first stage.
557  if (startTime == 0)
558  stageTypes.push_back(lowerBound.getType());
559 
560  // Create the stage itself.
561  builder.setInsertionPoint(stagesBlock.getTerminator());
562  auto startTimeAttr = builder.getIntegerAttr(
563  builder.getIntegerType(64, /*isSigned=*/true), startTime);
564  auto stage =
565  builder.create<LoopSchedulePipelineStageOp>(stageTypes, startTimeAttr);
566  auto &stageBlock = stage.getBodyBlock();
567  auto *stageTerminator = stageBlock.getTerminator();
568  builder.setInsertionPointToStart(&stageBlock);
569 
570  for (auto *op : group) {
571  auto *newOp = builder.clone(*op, stageValueMaps[startTime]);
572 
573  // All further uses in this stage should used the cloned-version of values
574  // So we update the mapping in this stage
575  for (auto result : op->getResults())
576  stageValueMaps[startTime].map(
577  result, newOp->getResult(result.getResultNumber()));
578  }
579 
580  // Register all values in the terminator, using their mapped value
581  SmallVector<Value> stageOperands;
582  unsigned resIndex = 0;
583  for (auto res : registerValues[startTime]) {
584  stageOperands.push_back(stageValueMaps[startTime].lookup(res));
585  // Additionally, update the map of the stage that will consume the
586  // registered value
587  unsigned destTime = startTime + 1;
588  unsigned latency = *problem.getLatency(
589  *problem.getLinkedOperatorType(res.getDefiningOp()));
590  // Multi-cycle case
591  if (*problem.getStartTime(res.getDefiningOp()) == startTime &&
592  latency > 1)
593  destTime = startTime + latency;
594  destTime = std::min((unsigned)(stageValueMaps.size() - 1), destTime);
595  stageValueMaps[destTime].map(res, stage.getResult(resIndex++));
596  }
597  // Add these mapped values to pipeline.register
598  stageTerminator->insertOperands(stageTerminator->getNumOperands(),
599  stageOperands);
600 
601  // Add the induction variable increment to the first stage.
602  if (startTime == 0) {
603  auto incResult =
604  builder.create<arith::AddIOp>(stagesBlock.getArgument(0), step);
605  stageTerminator->insertOperands(stageTerminator->getNumOperands(),
606  incResult->getResults());
607  }
608  }
609 
610  // Add the iter args and results to the terminator.
611  auto stagesTerminator =
612  cast<LoopScheduleTerminatorOp>(stagesBlock.getTerminator());
613 
614  // Collect iter args and results from the induction variable increment and any
615  // mapped values that were originally yielded.
616  SmallVector<Value> termIterArgs;
617  SmallVector<Value> termResults;
618  termIterArgs.push_back(
619  stagesBlock.front().getResult(stagesBlock.front().getNumResults() - 1));
620 
621  for (auto value : forOp.getBody()->getTerminator()->getOperands()) {
622  unsigned lookupTime = std::min((unsigned)(stageValueMaps.size() - 1),
623  pipeTimes[value.getDefiningOp()].second);
624 
625  termIterArgs.push_back(stageValueMaps[lookupTime].lookup(value));
626  termResults.push_back(stageValueMaps[lookupTime].lookup(value));
627  }
628 
629  stagesTerminator.getIterArgsMutable().append(termIterArgs);
630  stagesTerminator.getResultsMutable().append(termResults);
631 
632  // Replace loop results with pipeline results.
633  for (size_t i = 0; i < forOp.getNumResults(); ++i)
634  forOp.getResult(i).replaceAllUsesWith(pipeline.getResult(i));
635 
636  // Remove the loop nest from the IR.
637  loopNest.front().walk([](Operation *op) {
638  op->dropAllUses();
639  op->dropAllDefinedValueUses();
640  op->dropAllReferences();
641  op->erase();
642  });
643 
644  return success();
645 }
646 
647 std::unique_ptr<mlir::Pass> circt::createAffineToLoopSchedule() {
648  return std::make_unique<AffineToLoopSchedule>();
649 }
static bool yieldOpLegalityCallback(AffineYieldOp op)
Helper to mark AffineYieldOp legal, unless it is inside a partially converted scf::IfOp.
static bool ifOpLegalityCallback(IfOp op)
Helper to determine if an scf::IfOp is in mux-like form.
assert(baseType &&"element must be base type")
Builder builder
Apply the affine map from an 'affine.load' operation to its operands, and feed the results to a newly...
MemoryDependenceAnalysis & dependenceAnalysis
LogicalResult matchAndRewrite(AffineLoadOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
AffineLoadLowering(MLIRContext *context, MemoryDependenceAnalysis &dependenceAnalysis)
Apply the affine map from an 'affine.store' operation to its operands, and feed the results to a newl...
AffineStoreLowering(MLIRContext *context, MemoryDependenceAnalysis &dependenceAnalysis)
MemoryDependenceAnalysis & dependenceAnalysis
LogicalResult matchAndRewrite(AffineStoreOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
This class models a cyclic scheduling problem.
Definition: Problems.h:292
std::optional< unsigned > getInitiationInterval()
The initiation interval (II) is the number of time steps between subsequent iterations,...
Definition: Problems.h:312
std::optional< unsigned > getDistance(Dependence dep)
The distance determines whether a dependence has to be satisfied in the same iteration (distance=0 or...
Definition: Problems.h:302
This class models the modulo scheduling problem as the composition of the cyclic problem and the reso...
Definition: Problems.h:446
virtual LogicalResult verify() override
Return success if the computed solution is valid.
Definition: Problems.cpp:397
void setLatency(OperatorType opr, unsigned val)
Definition: Problems.h:213
virtual LogicalResult check()
Return success if the constructed scheduling problem is valid.
Definition: Problems.cpp:96
std::optional< OperatorType > getLinkedOperatorType(Operation *op)
The linked operator type provides the runtime characteristics for op.
Definition: Problems.h:202
std::optional< unsigned > getStartTime(Operation *op)
Return the start time for op, as computed by the scheduler.
Definition: Problems.h:218
OperatorType getOrInsertOperatorType(StringRef name)
Retrieves the operator type identified by the client-specific name.
Definition: Problems.cpp:47
DependenceRange getDependences(Operation *op)
Return a range object to transparently iterate over op's incoming 1) implicit def-use dependences (ba...
Definition: Problems.cpp:53
const OperationSet & getOperations()
Return the set of operations.
Definition: Problems.h:178
void setLinkedOperatorType(Operation *op, OperatorType opr)
Definition: Problems.h:205
mlir::StringAttr OperatorType
Operator types are distinguished by name (chosen by the client).
Definition: Problems.h:104
std::optional< unsigned > getLatency(OperatorType opr)
The latency is the number of cycles opr needs to compute its result.
Definition: Problems.h:210
Operation * getContainingOp()
Return the operation containing this problem, e.g. to emit diagnostics.
Definition: Problems.h:171
void setLimit(OperatorType opr, unsigned val)
Definition: Problems.h:420
Direction get(bool isOutput)
Returns an output direction if isOutput is true, otherwise returns an input direction.
Definition: CalyxOps.cpp:53
LogicalResult scheduleSimplex(Problem &prob, Operation *lastOp)
Solve the basic problem using linear programming and a handwritten implementation of the simplex algo...
This file defines an intermediate representation for circuits acting as an abstraction for constraint...
std::unique_ptr< mlir::Pass > createAffineToLoopSchedule()
mlir::raw_indented_ostream & dbgs()
Definition: Utility.h:28
llvm::hash_code hash_value(const T &e)
Helper to hoist computation out of scf::IfOp branches, turning it into a mux-like operation,...
LogicalResult matchAndRewrite(IfOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
CyclicSchedulingAnalysis constructs a CyclicProblem for each AffineForOp by performing a memory depen...
MemoryDependenceAnalysis traverses any AffineForOps in the FuncOp body and checks for affine memory a...