doxygen/AffineToLoopSchedule_8cpp_source.html

//===- AffineToLoopSchedule.cpp--------------------------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//


#include "circt/Conversion/AffineToLoopSchedule.h"

#include "circt/Analysis/DependenceAnalysis.h"

#include "circt/Analysis/SchedulingAnalysis.h"

#include "circt/Dialect/LoopSchedule/LoopScheduleOps.h"

#include "circt/Scheduling/Algorithms.h"

#include "circt/Scheduling/Problems.h"

#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"

#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"

#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"

#include "mlir/Dialect/Affine/IR/AffineMemoryOpInterfaces.h"

#include "mlir/Dialect/Affine/IR/AffineOps.h"

#include "mlir/Dialect/Affine/LoopUtils.h"

#include "mlir/Dialect/Affine/Utils.h"

#include "mlir/Dialect/Arith/IR/Arith.h"

#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"

#include "mlir/Dialect/Func/IR/FuncOps.h"

#include "mlir/Dialect/MemRef/IR/MemRef.h"

#include "mlir/Dialect/SCF/IR/SCF.h"

#include "mlir/IR/BuiltinDialect.h"

#include "mlir/IR/Dominance.h"

#include "mlir/IR/IRMapping.h"

#include "mlir/IR/ImplicitLocOpBuilder.h"

#include "mlir/Pass/Pass.h"

#include "mlir/Transforms/DialectConversion.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/TypeSwitch.h"

#include "llvm/Support/Debug.h"

#include <cassert>

#include <limits>


#define DEBUG_TYPE "affine-to-loopschedule"


namespace circt {

#define GEN_PASS_DEF_AFFINETOLOOPSCHEDULE

#include "circt/Conversion/Passes.h.inc"

} // namespace circt


using namespace mlir;

using namespace mlir::arith;

using namespace mlir::memref;

using namespace mlir::scf;

using namespace mlir::func;

using namespace mlir::affine;

using namespace circt;

using namespace circt::analysis;

using namespace circt::scheduling;

using namespace circt::loopschedule;


namespace {


struct AffineToLoopSchedule

    : public circt::impl::AffineToLoopScheduleBase<AffineToLoopSchedule> {

  void runOnOperation() override;


private:

  ModuloProblem getModuloProblem(CyclicProblem &prob);

  LogicalResult

  lowerAffineStructures(MemoryDependenceAnalysis &dependenceAnalysis);

  LogicalResult populateOperatorTypes(SmallVectorImpl<AffineForOp> &loopNest,

                                      ModuloProblem &problem);

  LogicalResult solveSchedulingProblem(SmallVectorImpl<AffineForOp> &loopNest,

                                       ModuloProblem &problem);

  LogicalResult

  createLoopSchedulePipeline(SmallVectorImpl<AffineForOp> &loopNest,

                             ModuloProblem &problem);


  CyclicSchedulingAnalysis *schedulingAnalysis;

};


} // namespace


ModuloProblem AffineToLoopSchedule::getModuloProblem(CyclicProblem &prob) {

  ModuloProblem modProb(prob.getContainingOp());

  for (auto *op : prob.getOperations()) {

    auto opr = prob.getLinkedOperatorType(op);

    if (opr.has_value()) {

      modProb.setLinkedOperatorType(op, opr.value());

      auto latency = prob.getLatency(opr.value());

      if (latency.has_value())

        modProb.setLatency(opr.value(), latency.value());

    }

    auto rsrc = prob.getLinkedResourceTypes(op);

    if (rsrc.has_value())

      modProb.setLinkedResourceTypes(op, rsrc.value());

    modProb.insertOperation(op);

  }


  for (auto *op : prob.getOperations()) {

    for (auto dep : prob.getDependences(op)) {

      if (dep.isAuxiliary()) {

        auto depInserted = modProb.insertDependence(dep);

        assert(succeeded(depInserted));

        (void)depInserted;

      }

      auto distance = prob.getDistance(dep);

      if (distance.has_value())

        modProb.setDistance(dep, distance.value());

    }

  }


  return modProb;

}


void AffineToLoopSchedule::runOnOperation() {

  // Get dependence analysis for the whole function.

  auto dependenceAnalysis = getAnalysis<MemoryDependenceAnalysis>();


  // After dependence analysis, materialize affine structures.

  if (failed(lowerAffineStructures(dependenceAnalysis)))

    return signalPassFailure();


  // Get scheduling analysis for the whole function.

  schedulingAnalysis = &getAnalysis<CyclicSchedulingAnalysis>();


  // Collect perfectly nested loops and work on them.

  auto outerLoops = getOperation().getOps<AffineForOp>();

  for (auto root : llvm::make_early_inc_range(outerLoops)) {

    SmallVector<AffineForOp> nestedLoops;

    getPerfectlyNestedLoops(nestedLoops, root);


    // Restrict to single loops to simplify things for now.

    if (nestedLoops.size() != 1)

      continue;


    ModuloProblem moduloProblem =

        getModuloProblem(schedulingAnalysis->getProblem(nestedLoops.back()));


    // Populate the target operator types.

    if (failed(populateOperatorTypes(nestedLoops, moduloProblem)))

      return signalPassFailure();


    // Solve the scheduling problem computed by the analysis.

    if (failed(solveSchedulingProblem(nestedLoops, moduloProblem)))

      return signalPassFailure();


    // Convert the IR.

    if (failed(createLoopSchedulePipeline(nestedLoops, moduloProblem)))

      return signalPassFailure();

  }

}


/// Apply the affine map from an 'affine.load' operation to its operands, and

/// feed the results to a newly created 'memref.load' operation (which replaces

/// the original 'affine.load').

/// Also replaces the affine load with the memref load in dependenceAnalysis.

/// TODO(mikeurbach): this is copied from AffineToStandard, see if we can reuse.


class AffineLoadLowering : public OpConversionPattern<AffineLoadOp> {

public:


  AffineLoadLowering(MLIRContext *context,

                     MemoryDependenceAnalysis &dependenceAnalysis)

      : OpConversionPattern(context), dependenceAnalysis(dependenceAnalysis) {}


  LogicalResult


  matchAndRewrite(AffineLoadOp op, OpAdaptor adaptor,

                  ConversionPatternRewriter &rewriter) const override {

    // Expand affine map from 'affineLoadOp'.

    SmallVector<Value, 8> indices(op.getMapOperands());

    auto resultOperands =

        expandAffineMap(rewriter, op.getLoc(), op.getAffineMap(), indices);

    if (!resultOperands)

      return failure();


    // Build memref.load memref[expandedMap.results].

    auto memrefLoad = rewriter.replaceOpWithNewOp<memref::LoadOp>(

        op, op.getMemRef(), *resultOperands);


    dependenceAnalysis.replaceOp(op, memrefLoad);


    return success();

  }


private:

  MemoryDependenceAnalysis &dependenceAnalysis;

};


/// Apply the affine map from an 'affine.store' operation to its operands, and

/// feed the results to a newly created 'memref.store' operation (which replaces

/// the original 'affine.store').

/// Also replaces the affine store with the memref store in dependenceAnalysis.

/// TODO(mikeurbach): this is copied from AffineToStandard, see if we can reuse.


class AffineStoreLowering : public OpConversionPattern<AffineStoreOp> {

public:


  AffineStoreLowering(MLIRContext *context,

                      MemoryDependenceAnalysis &dependenceAnalysis)

      : OpConversionPattern(context), dependenceAnalysis(dependenceAnalysis) {}


  LogicalResult


  matchAndRewrite(AffineStoreOp op, OpAdaptor adaptor,

                  ConversionPatternRewriter &rewriter) const override {

    // Expand affine map from 'affineStoreOp'.

    SmallVector<Value, 8> indices(op.getMapOperands());

    auto maybeExpandedMap =

        expandAffineMap(rewriter, op.getLoc(), op.getAffineMap(), indices);

    if (!maybeExpandedMap)

      return failure();


    // Build memref.store valueToStore, memref[expandedMap.results].

    auto memrefStore = rewriter.replaceOpWithNewOp<memref::StoreOp>(

        op, op.getValueToStore(), op.getMemRef(), *maybeExpandedMap);


    dependenceAnalysis.replaceOp(op, memrefStore);


    return success();

  }


private:

  MemoryDependenceAnalysis &dependenceAnalysis;

};


/// Helper to hoist computation out of scf::IfOp branches, turning it into a

/// mux-like operation, and exposing potentially concurrent execution of its

/// branches.


struct IfOpHoisting : OpConversionPattern<IfOp> {

  using OpConversionPattern<IfOp>::OpConversionPattern;


  LogicalResult


  matchAndRewrite(IfOp op, OpAdaptor adaptor,

                  ConversionPatternRewriter &rewriter) const override {

    rewriter.modifyOpInPlace(op, [&]() {

      if (!op.thenBlock()->without_terminator().empty()) {

        rewriter.splitBlock(op.thenBlock(), --op.thenBlock()->end());

        rewriter.inlineBlockBefore(&op.getThenRegion().front(), op);

      }

      if (op.elseBlock() && !op.elseBlock()->without_terminator().empty()) {

        rewriter.splitBlock(op.elseBlock(), --op.elseBlock()->end());

        rewriter.inlineBlockBefore(&op.getElseRegion().front(), op);

      }

    });


    return success();

  }


};


/// Helper to determine if an scf::IfOp is in mux-like form.


static bool ifOpLegalityCallback(IfOp op) {

  return op.thenBlock()->without_terminator().empty() &&

         (!op.elseBlock() || op.elseBlock()->without_terminator().empty());

}


/// Helper to mark AffineYieldOp legal, unless it is inside a partially

/// converted scf::IfOp.


static bool yieldOpLegalityCallback(AffineYieldOp op) {

  return !op->getParentOfType<IfOp>();

}


/// After analyzing memory dependences, and before creating the schedule, we

/// want to materialize affine operations with arithmetic, scf, and memref

/// operations, which make the condition computation of addresses, etc.

/// explicit. This is important so the schedule can consider potentially complex

/// computations in the condition of ifs, or the addresses of loads and stores.

/// The dependence analysis will be updated so the dependences from the affine

/// loads and stores are now on the memref loads and stores.

LogicalResult AffineToLoopSchedule::lowerAffineStructures(

    MemoryDependenceAnalysis &dependenceAnalysis) {

  auto *context = &getContext();

  auto op = getOperation();


  ConversionTarget target(*context);

  target.addLegalDialect<AffineDialect, ArithDialect, MemRefDialect,

                         SCFDialect>();

  target.addIllegalOp<AffineIfOp, AffineLoadOp, AffineStoreOp>();

  target.addDynamicallyLegalOp<IfOp>(ifOpLegalityCallback);

  target.addDynamicallyLegalOp<AffineYieldOp>(yieldOpLegalityCallback);


  RewritePatternSet patterns(context);

  populateAffineToStdConversionPatterns(patterns);

  patterns.add<AffineLoadLowering>(context, dependenceAnalysis);

  patterns.add<AffineStoreLowering>(context, dependenceAnalysis);

  patterns.add<IfOpHoisting>(context);


  if (failed(applyPartialConversion(op, target, std::move(patterns))))

    return failure();


  return success();

}


/// Populate the schedling problem operator types for the dialect we are

/// targetting. Right now, we assume Calyx, which has a standard library with

/// well-defined operator latencies. Ultimately, we should move this to a

/// dialect interface in the Scheduling dialect.

LogicalResult AffineToLoopSchedule::populateOperatorTypes(

    SmallVectorImpl<AffineForOp> &loopNest, ModuloProblem &problem) {

  // Scheduling analyis only considers the innermost loop nest for now.

  auto forOp = loopNest.back();


  // Load the Calyx operator library into the problem. This is a very minimal

  // set of arithmetic and memory operators for now. This should ultimately be

  // pulled out into some sort of dialect interface.

  Problem::OperatorType combOpr = problem.getOrInsertOperatorType("comb");

  problem.setLatency(combOpr, 0);

  Problem::OperatorType seqOpr = problem.getOrInsertOperatorType("seq");

  problem.setLatency(seqOpr, 1);

  Problem::OperatorType mcOpr = problem.getOrInsertOperatorType("multicycle");

  problem.setLatency(mcOpr, 3);


  Operation *unsupported;

  WalkResult result = forOp.getBody()->walk([&](Operation *op) {

    return TypeSwitch<Operation *, WalkResult>(op)

        .Case<AddIOp, IfOp, AffineYieldOp, arith::ConstantOp, CmpIOp,

              IndexCastOp, memref::AllocaOp, YieldOp>([&](Operation *combOp) {

          // Some known combinational ops.

          problem.setLinkedOperatorType(combOp, combOpr);

          return WalkResult::advance();

        })

        .Case<AddIOp, CmpIOp>([&](Operation *seqOp) {

          // These ops need to be sequential for now because we do not

          // have enough information to chain them together yet.

          problem.setLinkedOperatorType(seqOp, seqOpr);

          return WalkResult::advance();

        })

        .Case<AffineStoreOp, memref::StoreOp>([&](Operation *memOp) {

          // Some known sequential ops. In certain cases, reads may be

          // combinational in Calyx, but taking advantage of that is left as

          // a future enhancement.

          Value memRef = isa<AffineStoreOp>(*memOp)

                             ? cast<AffineStoreOp>(*memOp).getMemRef()

                             : cast<memref::StoreOp>(*memOp).getMemRef();

          Problem::OperatorType memOpr = problem.getOrInsertOperatorType(

              "mem_" + std::to_string(hash_value(memRef)));

          problem.setLatency(memOpr, 1);

          problem.setLinkedOperatorType(memOp, memOpr);


          auto memRsrc = problem.getOrInsertResourceType(

              "mem_" + std::to_string(hash_value(memRef)) + "_rsrc");

          problem.setLimit(memRsrc, 1);

          problem.setLinkedResourceTypes(

              memOp, SmallVector<Problem::ResourceType>{memRsrc});


          return WalkResult::advance();

        })

        .Case<AffineLoadOp, memref::LoadOp>([&](Operation *memOp) {

          // Some known sequential ops. In certain cases, reads may be

          // combinational in Calyx, but taking advantage of that is left as

          // a future enhancement.

          Value memRef = isa<AffineLoadOp>(*memOp)

                             ? cast<AffineLoadOp>(*memOp).getMemRef()

                             : cast<memref::LoadOp>(*memOp).getMemRef();

          Problem::OperatorType memOpr = problem.getOrInsertOperatorType(

              "mem_" + std::to_string(hash_value(memRef)));

          problem.setLatency(memOpr, 1);

          problem.setLinkedOperatorType(memOp, memOpr);


          auto memRsrc = problem.getOrInsertResourceType(

              "mem_" + std::to_string(hash_value(memRef)) + "_rsrc");

          problem.setLimit(memRsrc, 1);

          problem.setLinkedResourceTypes(

              memOp, SmallVector<Problem::ResourceType>{memRsrc});


          return WalkResult::advance();

        })

        .Case<MulIOp>([&](Operation *mcOp) {

          // Some known multi-cycle ops.

          problem.setLinkedOperatorType(mcOp, mcOpr);

          return WalkResult::advance();

        })

        .Default([&](Operation *badOp) {

          unsupported = op;

          return WalkResult::interrupt();

        });

  });


  if (result.wasInterrupted())

    return forOp.emitError("unsupported operation ") << *unsupported;


  return success();

}


/// Solve the pre-computed scheduling problem.

LogicalResult AffineToLoopSchedule::solveSchedulingProblem(

    SmallVectorImpl<AffineForOp> &loopNest, ModuloProblem &problem) {

  // Scheduling analyis only considers the innermost loop nest for now.

  auto forOp = loopNest.back();


  // Optionally debug problem inputs.

  LLVM_DEBUG(forOp.getBody()->walk<WalkOrder::PreOrder>([&](Operation *op) {

    llvm::dbgs() << "Scheduling inputs for " << *op;

    auto opr = problem.getLinkedOperatorType(op);

    llvm::dbgs() << "\n  opr = " << opr->getAttr();

    llvm::dbgs() << "\n  latency = " << problem.getLatency(*opr);

    for (auto dep : problem.getDependences(op))

      if (dep.isAuxiliary())

        llvm::dbgs() << "\n  dep = { distance = " << problem.getDistance(dep)

                     << ", source = " << *dep.getSource() << " }";

    llvm::dbgs() << "\n\n";

  }));


  // Verify and solve the problem.

  if (failed(problem.check()))

    return failure();


  auto *anchor = forOp.getBody()->getTerminator();

  if (failed(scheduleSimplex(problem, anchor)))

    return failure();


  // Verify the solution.

  if (failed(problem.verify()))

    return failure();


  // Optionally debug problem outputs.

  LLVM_DEBUG({

    llvm::dbgs() << "Scheduled initiation interval = "

                 << problem.getInitiationInterval() << "\n\n";

    forOp.getBody()->walk<WalkOrder::PreOrder>([&](Operation *op) {

      llvm::dbgs() << "Scheduling outputs for " << *op;

      llvm::dbgs() << "\n  start = " << problem.getStartTime(op);

      llvm::dbgs() << "\n\n";

    });

  });


  return success();

}


/// Create the loopschedule pipeline op for a loop nest.

LogicalResult AffineToLoopSchedule::createLoopSchedulePipeline(

    SmallVectorImpl<AffineForOp> &loopNest, ModuloProblem &problem) {

  // Scheduling analyis only considers the innermost loop nest for now.

  auto forOp = loopNest.back();


  auto outerLoop = loopNest.front();

  auto innerLoop = loopNest.back();

  ImplicitLocOpBuilder builder(outerLoop.getLoc(), outerLoop);


  // Create Values for the loop's lower and upper bounds.

  Value lowerBound = lowerAffineLowerBound(innerLoop, builder);

  Value upperBound = lowerAffineUpperBound(innerLoop, builder);

  int64_t stepValue = innerLoop.getStep().getSExtValue();

  auto step = arith::ConstantOp::create(

      builder, IntegerAttr::get(builder.getIndexType(), stepValue));


  // Create the pipeline op, with the same result types as the inner loop. An

  // iter arg is created for the induction variable.

  TypeRange resultTypes = innerLoop.getResultTypes();


  auto ii = builder.getI64IntegerAttr(problem.getInitiationInterval().value());


  SmallVector<Value> iterArgs;

  iterArgs.push_back(lowerBound);

  iterArgs.append(innerLoop.getInits().begin(), innerLoop.getInits().end());


  // If possible, attach a constant trip count attribute. This could be

  // generalized to support non-constant trip counts by supporting an AffineMap.

  std::optional<IntegerAttr> tripCountAttr;

  if (auto tripCount = getConstantTripCount(forOp))

    tripCountAttr = builder.getI64IntegerAttr(*tripCount);


  auto pipeline = LoopSchedulePipelineOp::create(builder, resultTypes, ii,

                                                 tripCountAttr, iterArgs);


  // Create the condition, which currently just compares the induction variable

  // to the upper bound.

  Block &condBlock = pipeline.getCondBlock();

  builder.setInsertionPointToStart(&condBlock);

  auto cmpResult = arith::CmpIOp::create(builder, builder.getI1Type(),

                                         arith::CmpIPredicate::ult,

                                         condBlock.getArgument(0), upperBound);

  condBlock.getTerminator()->insertOperands(0, {cmpResult});


  // Add the non-yield operations to their start time groups.

  DenseMap<unsigned, SmallVector<Operation *>> startGroups;

  for (auto *op : problem.getOperations()) {

    if (isa<AffineYieldOp, YieldOp>(op))

      continue;

    auto startTime = problem.getStartTime(op);

    startGroups[*startTime].push_back(op);

  }


  // Maintain mappings of values in the loop body and results of stages,

  // initially populated with the iter args.

  IRMapping valueMap;

  // Nested loops are not supported yet.

  assert(iterArgs.size() == forOp.getBody()->getNumArguments());

  for (size_t i = 0; i < iterArgs.size(); ++i)

    valueMap.map(forOp.getBody()->getArgument(i),

                 pipeline.getStagesBlock().getArgument(i));


  // Create the stages.

  Block &stagesBlock = pipeline.getStagesBlock();

  builder.setInsertionPointToStart(&stagesBlock);


  // Iterate in order of the start times.

  SmallVector<unsigned> startTimes;

  for (const auto &group : startGroups)

    startTimes.push_back(group.first);

  llvm::sort(startTimes);


  DominanceInfo dom(getOperation());


  // Keys for translating values in each stage

  SmallVector<SmallVector<Value>> registerValues;

  SmallVector<SmallVector<Type>> registerTypes;


  // The maps that ensure a stage uses the correct version of a value

  SmallVector<IRMapping> stageValueMaps;


  // For storing the range of stages an operation's results need to be valid for

  DenseMap<Operation *, std::pair<unsigned, unsigned>> pipeTimes;


  for (auto startTime : startTimes) {

    auto group = startGroups[startTime];


    // Collect the return types for this stage. Operations whose results are not

    // used within this stage are returned.

    auto isLoopTerminator = [forOp](Operation *op) {

      return isa<AffineYieldOp>(op) && op->getParentOp() == forOp;

    };


    // Initialize set of registers up until this point in time

    for (unsigned i = registerValues.size(); i <= startTime; ++i)

      registerValues.emplace_back(SmallVector<Value>());


    // Check each operation to see if its results need plumbing

    for (auto *op : group) {

      if (op->getUsers().empty())

        continue;


      unsigned pipeEndTime = 0;

      for (auto *user : op->getUsers()) {

        unsigned userStartTime = *problem.getStartTime(user);

        if (*problem.getStartTime(user) > startTime)

          pipeEndTime = std::max(pipeEndTime, userStartTime);

        else if (isLoopTerminator(user))

          // Manually forward the value into the terminator's valueMap

          pipeEndTime = std::max(pipeEndTime, userStartTime + 1);

      }


      // Insert the range of pipeline stages the value needs to be valid for

      pipeTimes[op] = std::pair(startTime, pipeEndTime);


      // Add register stages for each time slice we need to pipe to

      for (unsigned i = registerValues.size(); i <= pipeEndTime; ++i)

        registerValues.push_back(SmallVector<Value>());


      // Keep a collection of this stages results as keys to our valueMaps

      for (auto result : op->getResults())

        registerValues[startTime].push_back(result);


      // Other stages that use the value will need these values as keys too

      unsigned firstUse = std::max(

          startTime + 1,

          startTime + *problem.getLatency(*problem.getLinkedOperatorType(op)));

      for (unsigned i = firstUse; i < pipeEndTime; ++i) {

        for (auto result : op->getResults())

          registerValues[i].push_back(result);

      }

    }

  }


  // Now make register Types and stageValueMaps

  for (unsigned i = 0; i < registerValues.size(); ++i) {

    SmallVector<mlir::Type> types;

    for (auto val : registerValues[i])

      types.push_back(val.getType());


    registerTypes.push_back(types);

    stageValueMaps.push_back(valueMap);

  }


  // One more map is needed for the pipeline stages terminator

  stageValueMaps.push_back(valueMap);


  // Create stages along with maps

  for (auto startTime : startTimes) {

    auto group = startGroups[startTime];

    llvm::sort(group, [&](Operation *a, Operation *b) {

      return dom.properlyDominates(a, b);

    });

    auto stageTypes = registerTypes[startTime];

    // Add the induction variable increment in the first stage.

    if (startTime == 0)

      stageTypes.push_back(lowerBound.getType());


    // Create the stage itself.

    builder.setInsertionPoint(stagesBlock.getTerminator());

    auto startTimeAttr = builder.getIntegerAttr(

        builder.getIntegerType(64, /*isSigned=*/true), startTime);

    auto stage =

        LoopSchedulePipelineStageOp::create(builder, stageTypes, startTimeAttr);

    auto &stageBlock = stage.getBodyBlock();

    auto *stageTerminator = stageBlock.getTerminator();

    builder.setInsertionPointToStart(&stageBlock);


    for (auto *op : group) {

      auto *newOp = builder.clone(*op, stageValueMaps[startTime]);


      // All further uses in this stage should used the cloned-version of values

      // So we update the mapping in this stage

      for (auto result : op->getResults())

        stageValueMaps[startTime].map(

            result, newOp->getResult(result.getResultNumber()));

    }


    // Register all values in the terminator, using their mapped value

    SmallVector<Value> stageOperands;

    unsigned resIndex = 0;

    for (auto res : registerValues[startTime]) {

      stageOperands.push_back(stageValueMaps[startTime].lookup(res));

      // Additionally, update the map of the stage that will consume the

      // registered value

      unsigned destTime = startTime + 1;

      unsigned latency = *problem.getLatency(

          *problem.getLinkedOperatorType(res.getDefiningOp()));

      // Multi-cycle case

      if (*problem.getStartTime(res.getDefiningOp()) == startTime &&

          latency > 1)

        destTime = startTime + latency;

      destTime = std::min((unsigned)(stageValueMaps.size() - 1), destTime);

      stageValueMaps[destTime].map(res, stage.getResult(resIndex++));

    }

    // Add these mapped values to pipeline.register

    stageTerminator->insertOperands(stageTerminator->getNumOperands(),

                                    stageOperands);


    // Add the induction variable increment to the first stage.

    if (startTime == 0) {

      auto incResult =

          arith::AddIOp::create(builder, stagesBlock.getArgument(0), step);

      stageTerminator->insertOperands(stageTerminator->getNumOperands(),

                                      incResult->getResults());

    }

  }


  // Add the iter args and results to the terminator.

  auto stagesTerminator =

      cast<LoopScheduleTerminatorOp>(stagesBlock.getTerminator());


  // Collect iter args and results from the induction variable increment and any

  // mapped values that were originally yielded.

  SmallVector<Value> termIterArgs;

  SmallVector<Value> termResults;

  termIterArgs.push_back(

      stagesBlock.front().getResult(stagesBlock.front().getNumResults() - 1));


  for (auto value : forOp.getBody()->getTerminator()->getOperands()) {

    unsigned lookupTime = std::min((unsigned)(stageValueMaps.size() - 1),

                                   pipeTimes[value.getDefiningOp()].second);


    termIterArgs.push_back(stageValueMaps[lookupTime].lookup(value));

    termResults.push_back(stageValueMaps[lookupTime].lookup(value));

  }


  stagesTerminator.getIterArgsMutable().append(termIterArgs);

  stagesTerminator.getResultsMutable().append(termResults);


  // Replace loop results with pipeline results.

  for (size_t i = 0; i < forOp.getNumResults(); ++i)

    forOp.getResult(i).replaceAllUsesWith(pipeline.getResult(i));


  // Remove the loop nest from the IR.

  loopNest.front().walk([](Operation *op) {

    op->dropAllUses();

    op->dropAllDefinedValueUses();

    op->dropAllReferences();

    op->erase();

  });


  return success();

}


std::unique_ptr<mlir::Pass> circt::createAffineToLoopSchedule() {

  return std::make_unique<AffineToLoopSchedule>();

}


yieldOpLegalityCallback
static bool yieldOpLegalityCallback(AffineYieldOp op)
Helper to mark AffineYieldOp legal, unless it is inside a partially converted scf::IfOp.
Definition AffineToLoopSchedule.cpp:250

ifOpLegalityCallback
static bool ifOpLegalityCallback(IfOp op)
Helper to determine if an scf::IfOp is in mux-like form.
Definition AffineToLoopSchedule.cpp:243

AffineToLoopSchedule.h

Algorithms.h

assert
assert(baseType &&"element must be base type")

DependenceAnalysis.h

LoopScheduleOps.h

Problems.h

SchedulingAnalysis.h

AffineLoadLowering
Apply the affine map from an 'affine.load' operation to its operands, and feed the results to a newly...
Definition AffineToLoopSchedule.cpp:155

AffineLoadLowering::dependenceAnalysis
MemoryDependenceAnalysis & dependenceAnalysis
Definition AffineToLoopSchedule.cpp:181

AffineLoadLowering::matchAndRewrite
LogicalResult matchAndRewrite(AffineLoadOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Definition AffineToLoopSchedule.cpp:162

AffineLoadLowering::AffineLoadLowering
AffineLoadLowering(MLIRContext *context, MemoryDependenceAnalysis &dependenceAnalysis)
Definition AffineToLoopSchedule.cpp:157

AffineStoreLowering
Apply the affine map from an 'affine.store' operation to its operands, and feed the results to a newl...
Definition AffineToLoopSchedule.cpp:189

AffineStoreLowering::AffineStoreLowering
AffineStoreLowering(MLIRContext *context, MemoryDependenceAnalysis &dependenceAnalysis)
Definition AffineToLoopSchedule.cpp:191

AffineStoreLowering::dependenceAnalysis
MemoryDependenceAnalysis & dependenceAnalysis
Definition AffineToLoopSchedule.cpp:215

AffineStoreLowering::matchAndRewrite
LogicalResult matchAndRewrite(AffineStoreOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Definition AffineToLoopSchedule.cpp:196

circt::scheduling::CyclicProblem
This class models a cyclic scheduling problem.
Definition Problems.h:359

circt::scheduling::CyclicProblem::getDistance
std::optional< unsigned > getDistance(Dependence dep)
The distance determines whether a dependence has to be satisfied in the same iteration (distance=0 or...
Definition Problems.h:374

circt::scheduling::CyclicProblem::getInitiationInterval
std::optional< unsigned > getInitiationInterval()
The initiation interval (II) is the number of time steps between subsequent iterations,...
Definition Problems.h:384

circt::scheduling::ModuloProblem
This class models the modulo scheduling problem as the composition of the cyclic problem and the reso...
Definition Problems.h:529

circt::scheduling::ModuloProblem::verify
virtual LogicalResult verify() override
Return success if the computed solution is valid.
Definition Problems.cpp:441

circt::scheduling::Problem::setLatency
void setLatency(OperatorType opr, unsigned val)
Definition Problems.h:276

circt::scheduling::Problem::check
virtual LogicalResult check()
Return success if the constructed scheduling problem is valid.
Definition Problems.cpp:111

circt::scheduling::Problem::getLatency
std::optional< unsigned > getLatency(OperatorType opr)
The latency is the number of cycles opr needs to compute its result.
Definition Problems.h:273

circt::scheduling::Problem::getLinkedResourceTypes
std::optional< SmallVector< ResourceType > > getLinkedResourceTypes(Operation *op)
The linked resource type provides the available resources for op.
Definition Problems.h:265

circt::scheduling::Problem::setLinkedResourceTypes
void setLinkedResourceTypes(Operation *op, SmallVector< ResourceType > rsrc)
Definition Problems.h:268

circt::scheduling::Problem::getLinkedOperatorType
std::optional< OperatorType > getLinkedOperatorType(Operation *op)
The linked operator type provides the runtime characteristics for op.
Definition Problems.h:256

circt::scheduling::Problem::getOrInsertOperatorType
OperatorType getOrInsertOperatorType(StringRef name)
Retrieves the operator type identified by the client-specific name.
Definition Problems.cpp:47

circt::scheduling::Problem::getStartTime
std::optional< unsigned > getStartTime(Operation *op)
Return the start time for op, as computed by the scheduler.
Definition Problems.h:281

circt::scheduling::Problem::setLinkedOperatorType
void setLinkedOperatorType(Operation *op, OperatorType opr)
Definition Problems.h:259

circt::scheduling::Problem::getContainingOp
Operation * getContainingOp()
Return the operation containing this problem, e.g. to emit diagnostics.
Definition Problems.h:219

circt::scheduling::Problem::getOrInsertResourceType
ResourceType getOrInsertResourceType(StringRef name)
Retrieves the resource type identified by the client-specific name.
Definition Problems.cpp:53

circt::scheduling::SharedOperatorsProblem::setLimit
void setLimit(ResourceType rsrc, unsigned val)
Definition Problems.h:503

mlir::OpConversionPattern
Definition LLVM.h:176

circt::analysis
Definition DependenceAnalysis.h:31

circt::hw::hash_value
static llvm::hash_code hash_value(const ModulePort &port)
Definition HWTypes.h:38

circt::pretty::IndentStyle::Block
@ Block

circt::scheduling
Definition Algorithms.h:19

circt::scheduling::scheduleSimplex
LogicalResult scheduleSimplex(Problem &prob, Operation *lastOp)
Solve the basic problem using linear programming and a handwritten implementation of the simplex algo...
Definition SimplexSchedulers.cpp:1367

circt
The InstanceGraph op interface, see InstanceGraphInterface.td for more details.
Definition DebugAnalysis.h:21

circt::createAffineToLoopSchedule
std::unique_ptr< mlir::Pass > createAffineToLoopSchedule()
Definition AffineToLoopSchedule.cpp:667

llvm
Definition ImportVerilog.h:22

mlir::affine
Definition DependenceAnalysis.h:22

mlir::func
Definition DependenceAnalysis.h:25

mlir
Definition DebugAnalysis.h:16

patterns
Definition LTLFolds.cpp:45

IfOpHoisting
Helper to hoist computation out of scf::IfOp branches, turning it into a mux-like operation,...
Definition AffineToLoopSchedule.cpp:221

IfOpHoisting::matchAndRewrite
LogicalResult matchAndRewrite(IfOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Definition AffineToLoopSchedule.cpp:225

circt::analysis::CyclicSchedulingAnalysis
CyclicSchedulingAnalysis constructs a CyclicProblem for each AffineForOp by performing a memory depen...
Definition SchedulingAnalysis.h:35

circt::analysis::MemoryDependenceAnalysis
MemoryDependenceAnalysis traverses any AffineForOps in the FuncOp body and checks for affine memory a...
Definition DependenceAnalysis.h:67

circt::analysis::MemoryDependenceAnalysis::replaceOp
void replaceOp(Operation *oldOp, Operation *newOp)
Replaces the dependences, if any, from the oldOp to the newOp.
Definition DependenceAnalysis.cpp:160

circt::scheduling::Problem::OperatorType
Operator types are distinguished by name (chosen by the client).
Definition Problems.h:98