doxygen/LowerVectorizations_8cpp_source.html

 //===- LowerVectorizations.cpp --------------------------------------------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//


 #include "circt/Dialect/Arc/ArcOps.h"

 #include "circt/Dialect/Arc/ArcPasses.h"

 #include "circt/Dialect/Comb/CombOps.h"

 #include "mlir/Dialect/Arith/IR/Arith.h"

 #include "mlir/Dialect/Vector/IR/VectorOps.h"

 #include "mlir/IR/ImplicitLocOpBuilder.h"

 #include "mlir/Pass/Pass.h"


 #include "circt/Dialect/Arc/ArcPassesEnums.cpp.inc"


 namespace circt {

 namespace arc {

 #define GEN_PASS_DEF_LOWERVECTORIZATIONS

 #include "circt/Dialect/Arc/ArcPasses.h.inc"

 } // namespace arc

 } // namespace circt


 using namespace mlir;

 using namespace circt;

 using namespace arc;


 /// Vectorizes the `arc.vectorize` boundary by packing the vector elements into

 /// an integer value. Returns the vectorized version of the op. May invalidate

 /// the passed operation.

 ///

 /// Example:

 /// ```mlir

 /// %0:2 = arc.vectorize (%in0, %in1), (%in2, %in3) : (i1, i1, i1, i1) -> (i1,

 /// i1) { ^bb0(%arg0: i1, %arg1: i1):

 ///   %1 = comb.and %arg0, %arg1 : i1

 ///   arc.vectorize.return %1 : i1

 /// }

 /// ```

 /// becomes

 /// ```mlir

 /// %0 = comb.concat %in0, %in1 : i1, i1

 /// %1 = comb.concat %in2, %in3 : i1, i1

 /// %2 = arc.vectorize (%0), (%1) : (i2, i2) -> i2 {

 /// ^bb0(%arg0: i1, %arg1: i1):

 ///   %11 = comb.and %arg0, %arg1 : i1

 ///   arc.vectorize.return %11 : i1

 /// }

 /// %3 = comb.extract %2 from 0 : (i2) -> i1

 /// %4 = comb.extract %2 from 1 : (i2) -> i1

 /// ```

 static VectorizeOp lowerBoundaryScalar(VectorizeOp op) {

   ImplicitLocOpBuilder builder(op.getLoc(), op);

   SmallVector<ValueRange> vectors;


   for (ValueRange range : op.getInputs()) {

     unsigned bw = range.front().getType().getIntOrFloatBitWidth();

     vectors.push_back(builder

                           .create<comb::ConcatOp>(

                               builder.getIntegerType(bw * range.size()), range)

                           ->getResults());

   }


   unsigned width = op->getResult(0).getType().getIntOrFloatBitWidth();

   VectorizeOp newOp = builder.create<VectorizeOp>(

       builder.getIntegerType(width * op->getNumResults()), vectors);

   newOp.getBody().takeBody(op.getBody());


   for (OpResult res : op.getResults()) {

     Value newRes = builder.create<comb::ExtractOp>(

         newOp.getResult(0), width * res.getResultNumber(), width);

     res.replaceAllUsesWith(newRes);

   }


   op->erase();

   return newOp;

 }


 /// Vectorizes the `arc.vectorize` boundary by using the `vector` type and

 /// dialect for SIMD-based vectorization. Returns the vectorized version of the

 /// op. May invalidate the passed operation.

 ///

 /// Example:

 /// ```mlir

 /// %0:2 = arc.vectorize (%in0, %in1), (%in2, %in2) : (i64, i64, i32, i32) ->

 /// (i64, i64) { ^bb0(%arg0: i64, %arg1: i32):

 ///   %c0_i32 = hw.constant 0 : i32

 ///   %1 = comb.concat %c0_i32, %arg1 : i32, i32

 ///   %2 = comb.and %arg0, %1 : i64

 ///   arc.vectorize.return %2 : i64

 /// }

 /// ```

 /// becomes

 /// ```mlir

 /// %cst = arith.constant dense<0> : vector<2xi64>

 /// %0 = vector.insert %in0, %cst [0] : i64 into vector<2xi64>

 /// %1 = vector.insert %in1, %0 [1] : i64 into vector<2xi64>

 /// %2 = vector.broadcast %in2 : i32 to vector<2xi32>

 /// %3 = arc.vectorize (%1), (%2) : (vector<2xi64>, vector<2xi32>) ->

 /// vector<2xi64> { ^bb0(%arg0: i64, %arg1: i32):

 ///   %c0_i32 = hw.constant 0 : i32

 ///   %4 = comb.concat %c0_i32, %arg1 : i32, i32

 ///   %5 = comb.and %arg0, %4 : i64

 ///   arc.vectorize.return %5 : i64

 /// }

 /// %4 = vector.extract %3[0] : vector<2xi64>

 /// %5 = vector.extract %3[1] : vector<2xi64>

 /// ```

 static VectorizeOp lowerBoundaryVector(VectorizeOp op) {

   ImplicitLocOpBuilder builder(op.getLoc(), op);

   SmallVector<ValueRange> vectors;


   for (ValueRange range : op.getInputs()) {

     // Insert a broadcast operation if all elements of a vector are the same

     // because it's a significantly cheaper instruction.

     VectorType type = VectorType::get(SmallVector<int64_t>(1, range.size()),

                                       range.front().getType());

     if (llvm::all_equal(range)) {

       vectors.push_back(

           builder.create<vector::BroadcastOp>(type, range.front())

               ->getResults());

       continue;

     }


     // Otherwise do a gather.

     ValueRange vector =

         builder

             .create<arith::ConstantOp>(DenseElementsAttr::get(

                 type, SmallVector<Attribute>(

                           range.size(),

                           builder.getIntegerAttr(type.getElementType(), 0))))

             ->getResults();

     for (auto [i, element] : llvm::enumerate(range))

       vector = builder.create<vector::InsertOp>(element, vector.front(), i)

                    ->getResults();


     vectors.push_back(vector);

   }


   VectorType resType = VectorType::get(

       SmallVector<int64_t>(1, op->getNumResults()), op.getResult(0).getType());

   VectorizeOp newOp = builder.create<VectorizeOp>(resType, vectors);

   newOp.getBody().takeBody(op.getBody());


   for (OpResult res : op.getResults())

     res.replaceAllUsesWith(builder.create<vector::ExtractOp>(

         newOp.getResult(0), res.getResultNumber()));


   op->erase();

   return newOp;

 }


 /// Vectorizes the boundary of the given `arc.vectorize` operation if it is not

 /// already vectorized. If the body of the `arc.vectorize` operation is already

 /// vectorized the same vectorization technique (SIMD or scalar) is chosen.

 /// Otherwise,

 ///  * packs the vector in a scalar if it fits in a 64-bit integer or

 ///  * uses the `vector` type and dialect for SIMD vectorization

 /// Returns the vectorized version of the op. May invalidate the passed

 /// operation.

 static VectorizeOp lowerBoundary(VectorizeOp op) {

   // Nothing to do if it is already vectorized.

   if (op.isBoundaryVectorized())

     return op;


   // If the body is already vectorized, we must use the same vectorization

   // technique. Otherwise, we would produce invalid IR.

   if (op.isBodyVectorized()) {

     if (isa<VectorType>(op.getBody().front().getArgumentTypes().front()))

       return lowerBoundaryVector(op);

     return lowerBoundaryScalar(op);

   }


   // If the vector can fit in an i64 value, use scalar vectorization, otherwise

   // use SIMD.

   unsigned numLanes = op.getInputs().size();

   unsigned maxLaneWidth = 0;

   for (OperandRange range : op.getInputs())

     maxLaneWidth =

         std::max(maxLaneWidth, range.front().getType().getIntOrFloatBitWidth());


   if ((numLanes * maxLaneWidth <= 64) &&

       op->getResult(0).getType().getIntOrFloatBitWidth() *

               op->getNumResults() <=

           64)

     return lowerBoundaryScalar(op);

   return lowerBoundaryVector(op);

 }


 /// Vectorizes the body of the given `arc.vectorize` operation if it is not

 /// already vectorized. If the boundary of the `arc.vectorize` operation is

 /// already vectorized the same vectorization technique (SIMD or scalar) is

 /// chosen. Otherwise,

 ///  * packs the vector in a scalar if it fits in a 64-bit integer or

 ///  * uses the `vector` type and dialect for SIMD vectorization

 /// Returns the vectorized version of the op or failure. May invalidate the

 /// passed operation.

 static FailureOr<VectorizeOp> lowerBody(VectorizeOp op) {

   if (op.isBodyVectorized())

     return op;


   return op->emitError("lowering body not yet supported");

 }


 /// Inlines the `arc.vectorize` operations body once both the boundary and body

 /// are vectorized.

 ///

 /// Example:

 /// ```mlir

 /// %0 = comb.concat %in0, %in1 : i1, i1

 /// %1 = comb.concat %in2, %in2 : i1, i1

 /// %2 = arc.vectorize (%0), (%1) : (i2, i2) -> i2 {

 /// ^bb0(%arg0: i2, %arg1: i2):

 ///   %12 = arith.andi %arg0, %arg1 : i2

 ///   arc.vectorize.return %12 : i2

 /// }

 /// %3 = comb.extract %2 from 0 : (i2) -> i1

 /// %4 = comb.extract %2 from 1 : (i2) -> i1

 /// ```

 /// becomes

 /// ```mlir

 /// %0 = comb.concat %in0, %in1 : i1, i1

 /// %1 = comb.concat %in2, %in2 : i1, i1

 /// %2 = arith.andi %0, %1 : i2

 /// %3 = comb.extract %2 from 0 : (i2) -> i1

 /// %4 = comb.extract %2 from 1 : (i2) -> i1

 /// ```

 static LogicalResult inlineBody(VectorizeOp op) {

   if (!(op.isBodyVectorized() && op.isBoundaryVectorized()))

     return op->emitError(

         "can only inline body if boundary and body are already vectorized");


   Block &block = op.getBody().front();

   for (auto [operand, arg] : llvm::zip(op.getInputs(), block.getArguments()))

     arg.replaceAllUsesWith(operand.front());


   Operation *terminator = block.getTerminator();

   op->getResult(0).replaceAllUsesWith(terminator->getOperand(0));

   terminator->erase();


   op->getBlock()->getOperations().splice(op->getIterator(),

                                          block.getOperations());

   op->erase();


   return success();

 }


 namespace {

 /// A pass to vectorize (parts of) an `arc.vectorize` operation.

 struct LowerVectorizationsPass

     : public arc::impl::LowerVectorizationsBase<LowerVectorizationsPass> {

   LowerVectorizationsPass() = default;

   explicit LowerVectorizationsPass(LowerVectorizationsModeEnum mode)

       : LowerVectorizationsPass() {

     this->mode.setValue(mode);

   }


   void runOnOperation() override {

     WalkResult result = getOperation().walk([&](VectorizeOp op) -> WalkResult {

       switch (mode) {

       case LowerVectorizationsModeEnum::Full:

         if (auto newOp = lowerBody(lowerBoundary(op));

             succeeded(newOp) && succeeded(inlineBody(*newOp)))

           return WalkResult::advance();

         return WalkResult::interrupt();

       case LowerVectorizationsModeEnum::Boundary:

         return lowerBoundary(op), WalkResult::advance();

       case LowerVectorizationsModeEnum::Body:

         return static_cast<LogicalResult>(lowerBody(op));

       case LowerVectorizationsModeEnum::InlineBody:

         return inlineBody(op);

       }

       llvm_unreachable("all enum cases must be handled above");

     });

     if (result.wasInterrupted())

       signalPassFailure();

   }

 };

 } // namespace


 std::unique_ptr<Pass>

 arc::createLowerVectorizationsPass(LowerVectorizationsModeEnum mode) {

   return std::make_unique<LowerVectorizationsPass>(mode);

 }

ArcOps.h

ArcPasses.h

CombOps.h

lowerBoundary
static VectorizeOp lowerBoundary(VectorizeOp op)
Vectorizes the boundary of the given arc.vectorize operation if it is not already vectorized.
Definition: LowerVectorizations.cpp:163

lowerBody
static FailureOr< VectorizeOp > lowerBody(VectorizeOp op)
Vectorizes the body of the given arc.vectorize operation if it is not already vectorized.
Definition: LowerVectorizations.cpp:200

lowerBoundaryVector
static VectorizeOp lowerBoundaryVector(VectorizeOp op)
Vectorizes the arc.vectorize boundary by using the vector type and dialect for SIMD-based vectorizati...
Definition: LowerVectorizations.cpp:111

inlineBody
static LogicalResult inlineBody(VectorizeOp op)
Inlines the arc.vectorize operations body once both the boundary and body are vectorized.
Definition: LowerVectorizations.cpp:230

lowerBoundaryScalar
static VectorizeOp lowerBoundaryScalar(VectorizeOp op)
Vectorizes the arc.vectorize boundary by packing the vector elements into an integer value.
Definition: LowerVectorizations.cpp:54

comb.ExtractOp
Definition: comb.py:184

circt::arc::createLowerVectorizationsPass
std::unique_ptr< mlir::Pass > createLowerVectorizationsPass(LowerVectorizationsModeEnum mode=LowerVectorizationsModeEnum::Full)
Definition: LowerVectorizations.cpp:284

circt::calyx::direction::get
Direction get(bool isOutput)
Returns an output direction if isOutput is true, otherwise returns an input direction.
Definition: CalyxOps.cpp:55

circt
The InstanceGraph op interface, see InstanceGraphInterface.td for more details.
Definition: DebugAnalysis.h:21

mlir
Definition: DebugAnalysis.h:16