CIRCT 21.0.0git
Loading...
Searching...
No Matches
AffineToLoopSchedule.cpp
Go to the documentation of this file.
1//===- AffineToLoopSchedule.cpp--------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
15#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
16#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
17#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
18#include "mlir/Dialect/Affine/IR/AffineMemoryOpInterfaces.h"
19#include "mlir/Dialect/Affine/IR/AffineOps.h"
20#include "mlir/Dialect/Affine/LoopUtils.h"
21#include "mlir/Dialect/Affine/Utils.h"
22#include "mlir/Dialect/Arith/IR/Arith.h"
23#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
24#include "mlir/Dialect/Func/IR/FuncOps.h"
25#include "mlir/Dialect/MemRef/IR/MemRef.h"
26#include "mlir/Dialect/SCF/IR/SCF.h"
27#include "mlir/IR/BuiltinDialect.h"
28#include "mlir/IR/Dominance.h"
29#include "mlir/IR/IRMapping.h"
30#include "mlir/IR/ImplicitLocOpBuilder.h"
31#include "mlir/Pass/Pass.h"
32#include "mlir/Transforms/DialectConversion.h"
33#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/TypeSwitch.h"
35#include "llvm/Support/Debug.h"
36#include <cassert>
37#include <limits>
38
39#define DEBUG_TYPE "affine-to-loopschedule"
40
41namespace circt {
42#define GEN_PASS_DEF_AFFINETOLOOPSCHEDULE
43#include "circt/Conversion/Passes.h.inc"
44} // namespace circt
45
46using namespace mlir;
47using namespace mlir::arith;
48using namespace mlir::memref;
49using namespace mlir::scf;
50using namespace mlir::func;
51using namespace mlir::affine;
52using namespace circt;
53using namespace circt::analysis;
54using namespace circt::scheduling;
55using namespace circt::loopschedule;
56
57namespace {
58
59struct AffineToLoopSchedule
60 : public circt::impl::AffineToLoopScheduleBase<AffineToLoopSchedule> {
61 void runOnOperation() override;
62
63private:
64 ModuloProblem getModuloProblem(CyclicProblem &prob);
65 LogicalResult
66 lowerAffineStructures(MemoryDependenceAnalysis &dependenceAnalysis);
67 LogicalResult populateOperatorTypes(SmallVectorImpl<AffineForOp> &loopNest,
68 ModuloProblem &problem);
69 LogicalResult solveSchedulingProblem(SmallVectorImpl<AffineForOp> &loopNest,
70 ModuloProblem &problem);
71 LogicalResult
72 createLoopSchedulePipeline(SmallVectorImpl<AffineForOp> &loopNest,
73 ModuloProblem &problem);
74
75 CyclicSchedulingAnalysis *schedulingAnalysis;
76};
77
78} // namespace
79
80ModuloProblem AffineToLoopSchedule::getModuloProblem(CyclicProblem &prob) {
81 ModuloProblem modProb(prob.getContainingOp());
82 for (auto *op : prob.getOperations()) {
83 auto opr = prob.getLinkedOperatorType(op);
84 if (opr.has_value()) {
85 modProb.setLinkedOperatorType(op, opr.value());
86 auto latency = prob.getLatency(opr.value());
87 if (latency.has_value())
88 modProb.setLatency(opr.value(), latency.value());
89 }
90 auto rsrc = prob.getLinkedResourceTypes(op);
91 if (rsrc.has_value())
92 modProb.setLinkedResourceTypes(op, rsrc.value());
93 modProb.insertOperation(op);
94 }
95
96 for (auto *op : prob.getOperations()) {
97 for (auto dep : prob.getDependences(op)) {
98 if (dep.isAuxiliary()) {
99 auto depInserted = modProb.insertDependence(dep);
100 assert(succeeded(depInserted));
101 (void)depInserted;
102 }
103 auto distance = prob.getDistance(dep);
104 if (distance.has_value())
105 modProb.setDistance(dep, distance.value());
106 }
107 }
108
109 return modProb;
110}
111
112void AffineToLoopSchedule::runOnOperation() {
113 // Get dependence analysis for the whole function.
114 auto dependenceAnalysis = getAnalysis<MemoryDependenceAnalysis>();
115
116 // After dependence analysis, materialize affine structures.
117 if (failed(lowerAffineStructures(dependenceAnalysis)))
118 return signalPassFailure();
119
120 // Get scheduling analysis for the whole function.
121 schedulingAnalysis = &getAnalysis<CyclicSchedulingAnalysis>();
122
123 // Collect perfectly nested loops and work on them.
124 auto outerLoops = getOperation().getOps<AffineForOp>();
125 for (auto root : llvm::make_early_inc_range(outerLoops)) {
126 SmallVector<AffineForOp> nestedLoops;
127 getPerfectlyNestedLoops(nestedLoops, root);
128
129 // Restrict to single loops to simplify things for now.
130 if (nestedLoops.size() != 1)
131 continue;
132
133 ModuloProblem moduloProblem =
134 getModuloProblem(schedulingAnalysis->getProblem(nestedLoops.back()));
135
136 // Populate the target operator types.
137 if (failed(populateOperatorTypes(nestedLoops, moduloProblem)))
138 return signalPassFailure();
139
140 // Solve the scheduling problem computed by the analysis.
141 if (failed(solveSchedulingProblem(nestedLoops, moduloProblem)))
142 return signalPassFailure();
143
144 // Convert the IR.
145 if (failed(createLoopSchedulePipeline(nestedLoops, moduloProblem)))
146 return signalPassFailure();
147 }
148}
149
150/// Apply the affine map from an 'affine.load' operation to its operands, and
151/// feed the results to a newly created 'memref.load' operation (which replaces
152/// the original 'affine.load').
153/// Also replaces the affine load with the memref load in dependenceAnalysis.
154/// TODO(mikeurbach): this is copied from AffineToStandard, see if we can reuse.
155class AffineLoadLowering : public OpConversionPattern<AffineLoadOp> {
156public:
160
161 LogicalResult
162 matchAndRewrite(AffineLoadOp op, OpAdaptor adaptor,
163 ConversionPatternRewriter &rewriter) const override {
164 // Expand affine map from 'affineLoadOp'.
165 SmallVector<Value, 8> indices(op.getMapOperands());
166 auto resultOperands =
167 expandAffineMap(rewriter, op.getLoc(), op.getAffineMap(), indices);
168 if (!resultOperands)
169 return failure();
170
171 // Build memref.load memref[expandedMap.results].
172 auto memrefLoad = rewriter.replaceOpWithNewOp<memref::LoadOp>(
173 op, op.getMemRef(), *resultOperands);
174
175 dependenceAnalysis.replaceOp(op, memrefLoad);
176
177 return success();
178 }
179
180private:
182};
183
184/// Apply the affine map from an 'affine.store' operation to its operands, and
185/// feed the results to a newly created 'memref.store' operation (which replaces
186/// the original 'affine.store').
187/// Also replaces the affine store with the memref store in dependenceAnalysis.
188/// TODO(mikeurbach): this is copied from AffineToStandard, see if we can reuse.
189class AffineStoreLowering : public OpConversionPattern<AffineStoreOp> {
190public:
194
195 LogicalResult
196 matchAndRewrite(AffineStoreOp op, OpAdaptor adaptor,
197 ConversionPatternRewriter &rewriter) const override {
198 // Expand affine map from 'affineStoreOp'.
199 SmallVector<Value, 8> indices(op.getMapOperands());
200 auto maybeExpandedMap =
201 expandAffineMap(rewriter, op.getLoc(), op.getAffineMap(), indices);
202 if (!maybeExpandedMap)
203 return failure();
204
205 // Build memref.store valueToStore, memref[expandedMap.results].
206 auto memrefStore = rewriter.replaceOpWithNewOp<memref::StoreOp>(
207 op, op.getValueToStore(), op.getMemRef(), *maybeExpandedMap);
208
209 dependenceAnalysis.replaceOp(op, memrefStore);
210
211 return success();
212 }
213
214private:
216};
217
218/// Helper to hoist computation out of scf::IfOp branches, turning it into a
219/// mux-like operation, and exposing potentially concurrent execution of its
220/// branches.
223
224 LogicalResult
225 matchAndRewrite(IfOp op, OpAdaptor adaptor,
226 ConversionPatternRewriter &rewriter) const override {
227 rewriter.modifyOpInPlace(op, [&]() {
228 if (!op.thenBlock()->without_terminator().empty()) {
229 rewriter.splitBlock(op.thenBlock(), --op.thenBlock()->end());
230 rewriter.inlineBlockBefore(&op.getThenRegion().front(), op);
231 }
232 if (op.elseBlock() && !op.elseBlock()->without_terminator().empty()) {
233 rewriter.splitBlock(op.elseBlock(), --op.elseBlock()->end());
234 rewriter.inlineBlockBefore(&op.getElseRegion().front(), op);
235 }
236 });
237
238 return success();
239 }
240};
241
242/// Helper to determine if an scf::IfOp is in mux-like form.
243static bool ifOpLegalityCallback(IfOp op) {
244 return op.thenBlock()->without_terminator().empty() &&
245 (!op.elseBlock() || op.elseBlock()->without_terminator().empty());
246}
247
248/// Helper to mark AffineYieldOp legal, unless it is inside a partially
249/// converted scf::IfOp.
250static bool yieldOpLegalityCallback(AffineYieldOp op) {
251 return !op->getParentOfType<IfOp>();
252}
253
254/// After analyzing memory dependences, and before creating the schedule, we
255/// want to materialize affine operations with arithmetic, scf, and memref
256/// operations, which make the condition computation of addresses, etc.
257/// explicit. This is important so the schedule can consider potentially complex
258/// computations in the condition of ifs, or the addresses of loads and stores.
259/// The dependence analysis will be updated so the dependences from the affine
260/// loads and stores are now on the memref loads and stores.
261LogicalResult AffineToLoopSchedule::lowerAffineStructures(
262 MemoryDependenceAnalysis &dependenceAnalysis) {
263 auto *context = &getContext();
264 auto op = getOperation();
265
266 ConversionTarget target(*context);
267 target.addLegalDialect<AffineDialect, ArithDialect, MemRefDialect,
268 SCFDialect>();
269 target.addIllegalOp<AffineIfOp, AffineLoadOp, AffineStoreOp>();
270 target.addDynamicallyLegalOp<IfOp>(ifOpLegalityCallback);
271 target.addDynamicallyLegalOp<AffineYieldOp>(yieldOpLegalityCallback);
272
273 RewritePatternSet patterns(context);
274 populateAffineToStdConversionPatterns(patterns);
275 patterns.add<AffineLoadLowering>(context, dependenceAnalysis);
276 patterns.add<AffineStoreLowering>(context, dependenceAnalysis);
277 patterns.add<IfOpHoisting>(context);
278
279 if (failed(applyPartialConversion(op, target, std::move(patterns))))
280 return failure();
281
282 return success();
283}
284
285/// Populate the schedling problem operator types for the dialect we are
286/// targetting. Right now, we assume Calyx, which has a standard library with
287/// well-defined operator latencies. Ultimately, we should move this to a
288/// dialect interface in the Scheduling dialect.
289LogicalResult AffineToLoopSchedule::populateOperatorTypes(
290 SmallVectorImpl<AffineForOp> &loopNest, ModuloProblem &problem) {
291 // Scheduling analyis only considers the innermost loop nest for now.
292 auto forOp = loopNest.back();
293
294 // Load the Calyx operator library into the problem. This is a very minimal
295 // set of arithmetic and memory operators for now. This should ultimately be
296 // pulled out into some sort of dialect interface.
297 Problem::OperatorType combOpr = problem.getOrInsertOperatorType("comb");
298 problem.setLatency(combOpr, 0);
299 Problem::OperatorType seqOpr = problem.getOrInsertOperatorType("seq");
300 problem.setLatency(seqOpr, 1);
301 Problem::OperatorType mcOpr = problem.getOrInsertOperatorType("multicycle");
302 problem.setLatency(mcOpr, 3);
303
304 Operation *unsupported;
305 WalkResult result = forOp.getBody()->walk([&](Operation *op) {
306 return TypeSwitch<Operation *, WalkResult>(op)
307 .Case<AddIOp, IfOp, AffineYieldOp, arith::ConstantOp, CmpIOp,
308 IndexCastOp, memref::AllocaOp, YieldOp>([&](Operation *combOp) {
309 // Some known combinational ops.
310 problem.setLinkedOperatorType(combOp, combOpr);
311 return WalkResult::advance();
312 })
313 .Case<AddIOp, CmpIOp>([&](Operation *seqOp) {
314 // These ops need to be sequential for now because we do not
315 // have enough information to chain them together yet.
316 problem.setLinkedOperatorType(seqOp, seqOpr);
317 return WalkResult::advance();
318 })
319 .Case<AffineStoreOp, memref::StoreOp>([&](Operation *memOp) {
320 // Some known sequential ops. In certain cases, reads may be
321 // combinational in Calyx, but taking advantage of that is left as
322 // a future enhancement.
323 Value memRef = isa<AffineStoreOp>(*memOp)
324 ? cast<AffineStoreOp>(*memOp).getMemRef()
325 : cast<memref::StoreOp>(*memOp).getMemRef();
327 "mem_" + std::to_string(hash_value(memRef)));
328 problem.setLatency(memOpr, 1);
329 problem.setLinkedOperatorType(memOp, memOpr);
330
331 auto memRsrc = problem.getOrInsertResourceType(
332 "mem_" + std::to_string(hash_value(memRef)) + "_rsrc");
333 problem.setLimit(memRsrc, 1);
335 memOp, SmallVector<Problem::ResourceType>{memRsrc});
336
337 return WalkResult::advance();
338 })
339 .Case<AffineLoadOp, memref::LoadOp>([&](Operation *memOp) {
340 // Some known sequential ops. In certain cases, reads may be
341 // combinational in Calyx, but taking advantage of that is left as
342 // a future enhancement.
343 Value memRef = isa<AffineLoadOp>(*memOp)
344 ? cast<AffineLoadOp>(*memOp).getMemRef()
345 : cast<memref::LoadOp>(*memOp).getMemRef();
347 "mem_" + std::to_string(hash_value(memRef)));
348 problem.setLatency(memOpr, 1);
349 problem.setLinkedOperatorType(memOp, memOpr);
350
351 auto memRsrc = problem.getOrInsertResourceType(
352 "mem_" + std::to_string(hash_value(memRef)) + "_rsrc");
353 problem.setLimit(memRsrc, 1);
355 memOp, SmallVector<Problem::ResourceType>{memRsrc});
356
357 return WalkResult::advance();
358 })
359 .Case<MulIOp>([&](Operation *mcOp) {
360 // Some known multi-cycle ops.
361 problem.setLinkedOperatorType(mcOp, mcOpr);
362 return WalkResult::advance();
363 })
364 .Default([&](Operation *badOp) {
365 unsupported = op;
366 return WalkResult::interrupt();
367 });
368 });
369
370 if (result.wasInterrupted())
371 return forOp.emitError("unsupported operation ") << *unsupported;
372
373 return success();
374}
375
376/// Solve the pre-computed scheduling problem.
377LogicalResult AffineToLoopSchedule::solveSchedulingProblem(
378 SmallVectorImpl<AffineForOp> &loopNest, ModuloProblem &problem) {
379 // Scheduling analyis only considers the innermost loop nest for now.
380 auto forOp = loopNest.back();
381
382 // Optionally debug problem inputs.
383 LLVM_DEBUG(forOp.getBody()->walk<WalkOrder::PreOrder>([&](Operation *op) {
384 llvm::dbgs() << "Scheduling inputs for " << *op;
385 auto opr = problem.getLinkedOperatorType(op);
386 llvm::dbgs() << "\n opr = " << opr->getAttr();
387 llvm::dbgs() << "\n latency = " << problem.getLatency(*opr);
388 for (auto dep : problem.getDependences(op))
389 if (dep.isAuxiliary())
390 llvm::dbgs() << "\n dep = { distance = " << problem.getDistance(dep)
391 << ", source = " << *dep.getSource() << " }";
392 llvm::dbgs() << "\n\n";
393 }));
394
395 // Verify and solve the problem.
396 if (failed(problem.check()))
397 return failure();
398
399 auto *anchor = forOp.getBody()->getTerminator();
400 if (failed(scheduleSimplex(problem, anchor)))
401 return failure();
402
403 // Verify the solution.
404 if (failed(problem.verify()))
405 return failure();
406
407 // Optionally debug problem outputs.
408 LLVM_DEBUG({
409 llvm::dbgs() << "Scheduled initiation interval = "
410 << problem.getInitiationInterval() << "\n\n";
411 forOp.getBody()->walk<WalkOrder::PreOrder>([&](Operation *op) {
412 llvm::dbgs() << "Scheduling outputs for " << *op;
413 llvm::dbgs() << "\n start = " << problem.getStartTime(op);
414 llvm::dbgs() << "\n\n";
415 });
416 });
417
418 return success();
419}
420
421/// Create the loopschedule pipeline op for a loop nest.
422LogicalResult AffineToLoopSchedule::createLoopSchedulePipeline(
423 SmallVectorImpl<AffineForOp> &loopNest, ModuloProblem &problem) {
424 // Scheduling analyis only considers the innermost loop nest for now.
425 auto forOp = loopNest.back();
426
427 auto outerLoop = loopNest.front();
428 auto innerLoop = loopNest.back();
429 ImplicitLocOpBuilder builder(outerLoop.getLoc(), outerLoop);
430
431 // Create Values for the loop's lower and upper bounds.
432 Value lowerBound = lowerAffineLowerBound(innerLoop, builder);
433 Value upperBound = lowerAffineUpperBound(innerLoop, builder);
434 int64_t stepValue = innerLoop.getStep().getSExtValue();
435 auto step = builder.create<arith::ConstantOp>(
436 IntegerAttr::get(builder.getIndexType(), stepValue));
437
438 // Create the pipeline op, with the same result types as the inner loop. An
439 // iter arg is created for the induction variable.
440 TypeRange resultTypes = innerLoop.getResultTypes();
441
442 auto ii = builder.getI64IntegerAttr(problem.getInitiationInterval().value());
443
444 SmallVector<Value> iterArgs;
445 iterArgs.push_back(lowerBound);
446 iterArgs.append(innerLoop.getInits().begin(), innerLoop.getInits().end());
447
448 // If possible, attach a constant trip count attribute. This could be
449 // generalized to support non-constant trip counts by supporting an AffineMap.
450 std::optional<IntegerAttr> tripCountAttr;
451 if (auto tripCount = getConstantTripCount(forOp))
452 tripCountAttr = builder.getI64IntegerAttr(*tripCount);
453
454 auto pipeline = builder.create<LoopSchedulePipelineOp>(
455 resultTypes, ii, tripCountAttr, iterArgs);
456
457 // Create the condition, which currently just compares the induction variable
458 // to the upper bound.
459 Block &condBlock = pipeline.getCondBlock();
460 builder.setInsertionPointToStart(&condBlock);
461 auto cmpResult = builder.create<arith::CmpIOp>(
462 builder.getI1Type(), arith::CmpIPredicate::ult, condBlock.getArgument(0),
463 upperBound);
464 condBlock.getTerminator()->insertOperands(0, {cmpResult});
465
466 // Add the non-yield operations to their start time groups.
467 DenseMap<unsigned, SmallVector<Operation *>> startGroups;
468 for (auto *op : problem.getOperations()) {
469 if (isa<AffineYieldOp, YieldOp>(op))
470 continue;
471 auto startTime = problem.getStartTime(op);
472 startGroups[*startTime].push_back(op);
473 }
474
475 // Maintain mappings of values in the loop body and results of stages,
476 // initially populated with the iter args.
477 IRMapping valueMap;
478 // Nested loops are not supported yet.
479 assert(iterArgs.size() == forOp.getBody()->getNumArguments());
480 for (size_t i = 0; i < iterArgs.size(); ++i)
481 valueMap.map(forOp.getBody()->getArgument(i),
482 pipeline.getStagesBlock().getArgument(i));
483
484 // Create the stages.
485 Block &stagesBlock = pipeline.getStagesBlock();
486 builder.setInsertionPointToStart(&stagesBlock);
487
488 // Iterate in order of the start times.
489 SmallVector<unsigned> startTimes;
490 for (const auto &group : startGroups)
491 startTimes.push_back(group.first);
492 llvm::sort(startTimes);
493
494 DominanceInfo dom(getOperation());
495
496 // Keys for translating values in each stage
497 SmallVector<SmallVector<Value>> registerValues;
498 SmallVector<SmallVector<Type>> registerTypes;
499
500 // The maps that ensure a stage uses the correct version of a value
501 SmallVector<IRMapping> stageValueMaps;
502
503 // For storing the range of stages an operation's results need to be valid for
504 DenseMap<Operation *, std::pair<unsigned, unsigned>> pipeTimes;
505
506 for (auto startTime : startTimes) {
507 auto group = startGroups[startTime];
508
509 // Collect the return types for this stage. Operations whose results are not
510 // used within this stage are returned.
511 auto isLoopTerminator = [forOp](Operation *op) {
512 return isa<AffineYieldOp>(op) && op->getParentOp() == forOp;
513 };
514
515 // Initialize set of registers up until this point in time
516 for (unsigned i = registerValues.size(); i <= startTime; ++i)
517 registerValues.emplace_back(SmallVector<Value>());
518
519 // Check each operation to see if its results need plumbing
520 for (auto *op : group) {
521 if (op->getUsers().empty())
522 continue;
523
524 unsigned pipeEndTime = 0;
525 for (auto *user : op->getUsers()) {
526 unsigned userStartTime = *problem.getStartTime(user);
527 if (*problem.getStartTime(user) > startTime)
528 pipeEndTime = std::max(pipeEndTime, userStartTime);
529 else if (isLoopTerminator(user))
530 // Manually forward the value into the terminator's valueMap
531 pipeEndTime = std::max(pipeEndTime, userStartTime + 1);
532 }
533
534 // Insert the range of pipeline stages the value needs to be valid for
535 pipeTimes[op] = std::pair(startTime, pipeEndTime);
536
537 // Add register stages for each time slice we need to pipe to
538 for (unsigned i = registerValues.size(); i <= pipeEndTime; ++i)
539 registerValues.push_back(SmallVector<Value>());
540
541 // Keep a collection of this stages results as keys to our valueMaps
542 for (auto result : op->getResults())
543 registerValues[startTime].push_back(result);
544
545 // Other stages that use the value will need these values as keys too
546 unsigned firstUse = std::max(
547 startTime + 1,
548 startTime + *problem.getLatency(*problem.getLinkedOperatorType(op)));
549 for (unsigned i = firstUse; i < pipeEndTime; ++i) {
550 for (auto result : op->getResults())
551 registerValues[i].push_back(result);
552 }
553 }
554 }
555
556 // Now make register Types and stageValueMaps
557 for (unsigned i = 0; i < registerValues.size(); ++i) {
558 SmallVector<mlir::Type> types;
559 for (auto val : registerValues[i])
560 types.push_back(val.getType());
561
562 registerTypes.push_back(types);
563 stageValueMaps.push_back(valueMap);
564 }
565
566 // One more map is needed for the pipeline stages terminator
567 stageValueMaps.push_back(valueMap);
568
569 // Create stages along with maps
570 for (auto startTime : startTimes) {
571 auto group = startGroups[startTime];
572 llvm::sort(group,
573 [&](Operation *a, Operation *b) { return dom.dominates(a, b); });
574 auto stageTypes = registerTypes[startTime];
575 // Add the induction variable increment in the first stage.
576 if (startTime == 0)
577 stageTypes.push_back(lowerBound.getType());
578
579 // Create the stage itself.
580 builder.setInsertionPoint(stagesBlock.getTerminator());
581 auto startTimeAttr = builder.getIntegerAttr(
582 builder.getIntegerType(64, /*isSigned=*/true), startTime);
583 auto stage =
584 builder.create<LoopSchedulePipelineStageOp>(stageTypes, startTimeAttr);
585 auto &stageBlock = stage.getBodyBlock();
586 auto *stageTerminator = stageBlock.getTerminator();
587 builder.setInsertionPointToStart(&stageBlock);
588
589 for (auto *op : group) {
590 auto *newOp = builder.clone(*op, stageValueMaps[startTime]);
591
592 // All further uses in this stage should used the cloned-version of values
593 // So we update the mapping in this stage
594 for (auto result : op->getResults())
595 stageValueMaps[startTime].map(
596 result, newOp->getResult(result.getResultNumber()));
597 }
598
599 // Register all values in the terminator, using their mapped value
600 SmallVector<Value> stageOperands;
601 unsigned resIndex = 0;
602 for (auto res : registerValues[startTime]) {
603 stageOperands.push_back(stageValueMaps[startTime].lookup(res));
604 // Additionally, update the map of the stage that will consume the
605 // registered value
606 unsigned destTime = startTime + 1;
607 unsigned latency = *problem.getLatency(
608 *problem.getLinkedOperatorType(res.getDefiningOp()));
609 // Multi-cycle case
610 if (*problem.getStartTime(res.getDefiningOp()) == startTime &&
611 latency > 1)
612 destTime = startTime + latency;
613 destTime = std::min((unsigned)(stageValueMaps.size() - 1), destTime);
614 stageValueMaps[destTime].map(res, stage.getResult(resIndex++));
615 }
616 // Add these mapped values to pipeline.register
617 stageTerminator->insertOperands(stageTerminator->getNumOperands(),
618 stageOperands);
619
620 // Add the induction variable increment to the first stage.
621 if (startTime == 0) {
622 auto incResult =
623 builder.create<arith::AddIOp>(stagesBlock.getArgument(0), step);
624 stageTerminator->insertOperands(stageTerminator->getNumOperands(),
625 incResult->getResults());
626 }
627 }
628
629 // Add the iter args and results to the terminator.
630 auto stagesTerminator =
631 cast<LoopScheduleTerminatorOp>(stagesBlock.getTerminator());
632
633 // Collect iter args and results from the induction variable increment and any
634 // mapped values that were originally yielded.
635 SmallVector<Value> termIterArgs;
636 SmallVector<Value> termResults;
637 termIterArgs.push_back(
638 stagesBlock.front().getResult(stagesBlock.front().getNumResults() - 1));
639
640 for (auto value : forOp.getBody()->getTerminator()->getOperands()) {
641 unsigned lookupTime = std::min((unsigned)(stageValueMaps.size() - 1),
642 pipeTimes[value.getDefiningOp()].second);
643
644 termIterArgs.push_back(stageValueMaps[lookupTime].lookup(value));
645 termResults.push_back(stageValueMaps[lookupTime].lookup(value));
646 }
647
648 stagesTerminator.getIterArgsMutable().append(termIterArgs);
649 stagesTerminator.getResultsMutable().append(termResults);
650
651 // Replace loop results with pipeline results.
652 for (size_t i = 0; i < forOp.getNumResults(); ++i)
653 forOp.getResult(i).replaceAllUsesWith(pipeline.getResult(i));
654
655 // Remove the loop nest from the IR.
656 loopNest.front().walk([](Operation *op) {
657 op->dropAllUses();
658 op->dropAllDefinedValueUses();
659 op->dropAllReferences();
660 op->erase();
661 });
662
663 return success();
664}
665
666std::unique_ptr<mlir::Pass> circt::createAffineToLoopSchedule() {
667 return std::make_unique<AffineToLoopSchedule>();
668}
static bool yieldOpLegalityCallback(AffineYieldOp op)
Helper to mark AffineYieldOp legal, unless it is inside a partially converted scf::IfOp.
static bool ifOpLegalityCallback(IfOp op)
Helper to determine if an scf::IfOp is in mux-like form.
assert(baseType &&"element must be base type")
Apply the affine map from an 'affine.load' operation to its operands, and feed the results to a newly...
MemoryDependenceAnalysis & dependenceAnalysis
LogicalResult matchAndRewrite(AffineLoadOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
AffineLoadLowering(MLIRContext *context, MemoryDependenceAnalysis &dependenceAnalysis)
Apply the affine map from an 'affine.store' operation to its operands, and feed the results to a newl...
AffineStoreLowering(MLIRContext *context, MemoryDependenceAnalysis &dependenceAnalysis)
MemoryDependenceAnalysis & dependenceAnalysis
LogicalResult matchAndRewrite(AffineStoreOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
This class models a cyclic scheduling problem.
Definition Problems.h:359
std::optional< unsigned > getDistance(Dependence dep)
The distance determines whether a dependence has to be satisfied in the same iteration (distance=0 or...
Definition Problems.h:374
std::optional< unsigned > getInitiationInterval()
The initiation interval (II) is the number of time steps between subsequent iterations,...
Definition Problems.h:384
This class models the modulo scheduling problem as the composition of the cyclic problem and the reso...
Definition Problems.h:529
virtual LogicalResult verify() override
Return success if the computed solution is valid.
Definition Problems.cpp:441
void setLatency(OperatorType opr, unsigned val)
Definition Problems.h:276
virtual LogicalResult check()
Return success if the constructed scheduling problem is valid.
Definition Problems.cpp:111
std::optional< unsigned > getLatency(OperatorType opr)
The latency is the number of cycles opr needs to compute its result.
Definition Problems.h:273
std::optional< SmallVector< ResourceType > > getLinkedResourceTypes(Operation *op)
The linked resource type provides the available resources for op.
Definition Problems.h:265
void setLinkedResourceTypes(Operation *op, SmallVector< ResourceType > rsrc)
Definition Problems.h:268
std::optional< OperatorType > getLinkedOperatorType(Operation *op)
The linked operator type provides the runtime characteristics for op.
Definition Problems.h:256
OperatorType getOrInsertOperatorType(StringRef name)
Retrieves the operator type identified by the client-specific name.
Definition Problems.cpp:47
std::optional< unsigned > getStartTime(Operation *op)
Return the start time for op, as computed by the scheduler.
Definition Problems.h:281
void setLinkedOperatorType(Operation *op, OperatorType opr)
Definition Problems.h:259
Operation * getContainingOp()
Return the operation containing this problem, e.g. to emit diagnostics.
Definition Problems.h:219
ResourceType getOrInsertResourceType(StringRef name)
Retrieves the resource type identified by the client-specific name.
Definition Problems.cpp:53
void setLimit(ResourceType rsrc, unsigned val)
Definition Problems.h:503
static llvm::hash_code hash_value(const ModulePort &port)
Definition HWTypes.h:38
LogicalResult scheduleSimplex(Problem &prob, Operation *lastOp)
Solve the basic problem using linear programming and a handwritten implementation of the simplex algo...
The InstanceGraph op interface, see InstanceGraphInterface.td for more details.
std::unique_ptr< mlir::Pass > createAffineToLoopSchedule()
Helper to hoist computation out of scf::IfOp branches, turning it into a mux-like operation,...
LogicalResult matchAndRewrite(IfOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
CyclicSchedulingAnalysis constructs a CyclicProblem for each AffineForOp by performing a memory depen...
MemoryDependenceAnalysis traverses any AffineForOps in the FuncOp body and checks for affine memory a...
void replaceOp(Operation *oldOp, Operation *newOp)
Replaces the dependences, if any, from the oldOp to the newOp.
Operator types are distinguished by name (chosen by the client).
Definition Problems.h:98