CIRCT 22.0.0git
Loading...
Searching...
No Matches
SOPBalancing.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements SOP (Sum-of-Products) balancing for delay optimization
10// based on "Delay Optimization Using SOP Balancing" by Mishchenko et al.
11// (ICCAD 2011).
12//
13// NOTE: Currently supports AIG but should be extended to other logic forms
14// (MIG, comb or/and) in the future.
15//
16//===----------------------------------------------------------------------===//
17
24#include "mlir/Support/LLVM.h"
25#include "llvm/ADT/APInt.h"
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/Support/Debug.h"
29
30#define DEBUG_TYPE "synth-sop-balancing"
31
32using namespace circt;
33using namespace circt::synth;
34using namespace mlir;
35
36namespace circt {
37namespace synth {
38#define GEN_PASS_DEF_SOPBALANCING
39#include "circt/Dialect/Synth/Transforms/SynthPasses.h.inc"
40} // namespace synth
41} // namespace circt
42
43using namespace circt::synth;
44
45//===----------------------------------------------------------------------===//
46// SOP Cache
47//===----------------------------------------------------------------------===//
48
49namespace {
50
51/// Expected maximum number of inputs for ISOP extraction, used as a hint for
52/// SmallVector capacity to avoid reallocations in common cases.
53constexpr unsigned expectedISOPInputs = 8;
54
55/// Cache for SOP extraction results, keyed by truth table.
56class SOPCache {
57public:
58 const SOPForm &getOrCompute(const APInt &truthTable, unsigned numVars) {
59 auto it = cache.find(truthTable);
60 if (it != cache.end())
61 return it->second;
62 return cache
63 .try_emplace(truthTable, circt::extractISOP(truthTable, numVars))
64 .first->second;
65 }
66
67private:
68 DenseMap<APInt, SOPForm> cache;
69};
70
71//===----------------------------------------------------------------------===//
72// Tree Building Helpers
73//===----------------------------------------------------------------------===//
74
75/// Simulate balanced tree and return output arrival time.
76static DelayType simulateBalancedTree(ArrayRef<DelayType> arrivalTimes) {
77 if (arrivalTimes.empty())
78 return 0;
79 return buildBalancedTreeWithArrivalTimes<DelayType>(
80 arrivalTimes, [](auto a, auto b) { return std::max(a, b) + 1; });
81}
82
83/// Build balanced AND tree.
85buildBalancedAndTree(OpBuilder &builder, Location loc,
86 SmallVectorImpl<ValueWithArrivalTime> &nodes) {
87 assert(!nodes.empty());
88
89 if (nodes.size() == 1)
90 return nodes[0];
91
92 size_t num = nodes.size();
93 auto result = buildBalancedTreeWithArrivalTimes<ValueWithArrivalTime>(
94 nodes, [&](const auto &n1, const auto &n2) {
95 Value v = aig::AndInverterOp::create(builder, loc, n1.getValue(),
96 n2.getValue(), n1.isInverted(),
97 n2.isInverted());
99 v, std::max(n1.getArrivalTime(), n2.getArrivalTime()) + 1, false,
100 num++);
101 });
102 return result;
103}
104
105/// Build balanced SOP structure.
106Value buildBalancedSOP(OpBuilder &builder, Location loc, const SOPForm &sop,
107 ArrayRef<Value> inputs,
108 ArrayRef<DelayType> inputArrivalTimes) {
109 SmallVector<ValueWithArrivalTime, expectedISOPInputs> productTerms, literals;
110
111 size_t num = 0;
112 for (const auto &cube : sop.cubes) {
113 for (unsigned i = 0; i < sop.numVars; ++i) {
114 if (cube.hasLiteral(i))
115 literals.push_back(ValueWithArrivalTime(
116 inputs[i], inputArrivalTimes[i], cube.isLiteralInverted(i), num++));
117 }
118
119 if (literals.empty())
120 continue;
121
122 // Get product term, and flip the inversion to construct OR afterwards.
123 productTerms.push_back(
124 buildBalancedAndTree(builder, loc, literals).flipInversion());
125
126 literals.clear();
127 }
128
129 assert(!productTerms.empty() && "No product terms");
130
131 auto andOfInverted =
132 buildBalancedAndTree(builder, loc, productTerms).flipInversion();
133 // Let's invert the output.
134 if (andOfInverted.isInverted())
135 return aig::AndInverterOp::create(builder, loc, andOfInverted.getValue(),
136 true);
137 return andOfInverted.getValue();
138}
139
140/// Compute SOP delays for cost estimation.
141void computeSOPDelays(const SOPForm &sop, ArrayRef<DelayType> inputArrivalTimes,
142 SmallVectorImpl<DelayType> &delays) {
143 SmallVector<DelayType, expectedISOPInputs> productArrivalTimes, literalTimes;
144 for (const auto &cube : sop.cubes) {
145 for (unsigned i = 0; i < sop.numVars; ++i)
146 // No need to consider inverted literals separately for delay.
147 if (cube.hasLiteral(i))
148 literalTimes.push_back(inputArrivalTimes[i]);
149 if (!literalTimes.empty()) {
150 productArrivalTimes.push_back(simulateBalancedTree(literalTimes));
151 literalTimes.clear();
152 }
153 }
154
155 DelayType outputTime = simulateBalancedTree(productArrivalTimes);
156
157 delays.resize(sop.numVars, 0);
158 // Compute the delay contribution of each input to the output for cost
159 // estimation. The CutRewriter framework requires per-input delays, even
160 // though this is somewhat artificial for SOP balancing. This may be
161 // improved in future framework improvements.
162 //
163 // First, determine which variables are actually used in the SOP by
164 // collecting a bitmask from all cubes.
165 uint64_t mask = 0;
166 for (auto &cube : sop.cubes)
167 mask |= cube.mask;
168
169 // Compute delay for each used input variable.
170 for (unsigned i = 0; i < sop.numVars; ++i)
171 if (mask & (1u << i))
172 delays[i] = outputTime - inputArrivalTimes[i];
173}
174
175//===----------------------------------------------------------------------===//
176// SOP Balancing Pattern
177//===----------------------------------------------------------------------===//
178
179/// Pattern that performs SOP balancing on cuts.
180struct SOPBalancingPattern : public CutRewritePattern {
181 SOPBalancingPattern(MLIRContext *context) : CutRewritePattern(context) {}
182
183 std::optional<MatchResult> match(CutEnumerator &enumerator,
184 const Cut &cut) const override {
185 if (cut.isTrivialCut() || cut.getOutputSize() != 1)
186 return std::nullopt;
187
188 const auto &tt = cut.getTruthTable();
189 const SOPForm &sop = sopCache.getOrCompute(tt.table, tt.numInputs);
190 if (sop.cubes.empty())
191 return std::nullopt;
192
193 SmallVector<DelayType, expectedISOPInputs> arrivalTimes;
194 if (failed(cut.getInputArrivalTimes(enumerator, arrivalTimes)))
195 return std::nullopt;
196
197 // Compute area estimate
198 unsigned totalGates = 0;
199 for (const auto &cube : sop.cubes)
200 if (cube.size() > 1)
201 totalGates += cube.size() - 1;
202 if (sop.cubes.size() > 1)
203 totalGates += sop.cubes.size() - 1;
204
205 SmallVector<DelayType, expectedISOPInputs> delays;
206 computeSOPDelays(sop, arrivalTimes, delays);
207
208 MatchResult result;
209 result.area = static_cast<double>(totalGates);
210 result.setOwnedDelays(std::move(delays));
211 return result;
212 }
213
214 FailureOr<Operation *> rewrite(OpBuilder &builder, CutEnumerator &enumerator,
215 const Cut &cut) const override {
216 const auto &tt = cut.getTruthTable();
217 const SOPForm &sop = sopCache.getOrCompute(tt.table, tt.numInputs);
218 LLVM_DEBUG({
219 llvm::dbgs() << "Rewriting SOP:\n";
220 sop.dump(llvm::dbgs());
221 });
222
223 SmallVector<DelayType, expectedISOPInputs> arrivalTimes;
224 if (failed(cut.getInputArrivalTimes(enumerator, arrivalTimes)))
225 return failure();
226 // Construct the fused location.
227 SetVector<Location> inputLocs;
228 inputLocs.insert(cut.getRoot()->getLoc());
229 for (auto input : cut.inputs)
230 inputLocs.insert(input.getLoc());
231
232 auto loc = builder.getFusedLoc(inputLocs.getArrayRef());
233
234 Value result = buildBalancedSOP(builder, loc, sop, cut.inputs.getArrayRef(),
235 arrivalTimes);
236
237 auto *op = result.getDefiningOp();
238 if (!op)
239 op = aig::AndInverterOp::create(builder, loc, result, false);
240 return op;
241 }
242
243 unsigned getNumOutputs() const override { return 1; }
244 StringRef getPatternName() const override { return "sop-balancing"; }
245
246private:
247 // Cache for SOP extraction results. Hence the pattern is stateful and must
248 // not be used in parallelly.
249 mutable SOPCache sopCache;
250};
251
252} // namespace
253
254//===----------------------------------------------------------------------===//
255// SOP Balancing Pass
256//===----------------------------------------------------------------------===//
257
259 : public circt::synth::impl::SOPBalancingBase<SOPBalancingPass> {
260 using SOPBalancingBase::SOPBalancingBase;
261
262 void runOnOperation() override {
263 auto module = getOperation();
264
265 CutRewriterOptions options;
266 options.strategy = strategy;
267 options.maxCutInputSize = maxCutInputSize;
268 options.maxCutSizePerRoot = maxCutsPerRoot;
269 options.allowNoMatch = true;
270
271 SmallVector<std::unique_ptr<CutRewritePattern>, 1> patterns;
272 patterns.push_back(
273 std::make_unique<SOPBalancingPattern>(module->getContext()));
274
275 CutRewritePatternSet patternSet(std::move(patterns));
276 CutRewriter rewriter(options, patternSet);
277 if (failed(rewriter.run(module)))
278 return signalPassFailure();
279 }
280};
assert(baseType &&"element must be base type")
static std::unique_ptr< Context > context
static Location getLoc(DefSlot slot)
Definition Mem2Reg.cpp:216
Strategy strategy
Cut enumeration engine for combinational logic networks.
Manages a collection of rewriting patterns for combinational logic optimization.
Main cut-based rewriting algorithm for combinational logic optimization.
LogicalResult run(Operation *topOp)
Execute the complete cut-based rewriting algorithm.
Represents a cut in the combinational logic network.
unsigned getOutputSize() const
Get the number of outputs from root operation.
mlir::Operation * getRoot() const
Get the root operation of this cut.
LogicalResult getInputArrivalTimes(CutEnumerator &enumerator, SmallVectorImpl< DelayType > &results) const
Get arrival times for each input of this cut.
llvm::SmallSetVector< mlir::Value, 4 > inputs
External inputs to this cut (cut boundary).
const BinaryTruthTable & getTruthTable() const
Get the truth table for this cut.
bool isTrivialCut() const
Check if this cut represents a trivial cut.
Helper class for delay-aware tree building.
Definition SynthOps.h:58
int64_t DelayType
Definition CutRewriter.h:36
The InstanceGraph op interface, see InstanceGraphInterface.td for more details.
SOPForm extractISOP(const llvm::APInt &truthTable, unsigned numVars)
Extract ISOP (Irredundant Sum-of-Products) from a truth table.
Definition synth.py:1
void runOnOperation() override
Represents a sum-of-products expression.
Definition TruthTable.h:250
unsigned numVars
Definition TruthTable.h:252
void dump(llvm::raw_ostream &os=llvm::errs()) const
Debug dump method for SOP forms.
llvm::SmallVector< Cube > cubes
Definition TruthTable.h:251
Base class for cut rewriting patterns used in combinational logic optimization.
virtual StringRef getPatternName() const
Get the name of this pattern. Used for debugging.
virtual FailureOr< Operation * > rewrite(mlir::OpBuilder &builder, CutEnumerator &enumerator, const Cut &cut) const =0
Return a new operation that replaces the matched cut.
virtual std::optional< MatchResult > match(CutEnumerator &enumerator, const Cut &cut) const =0
Check if a cut matches this pattern and compute area/delay metrics.
virtual unsigned getNumOutputs() const =0
Get the number of outputs this pattern produces.
Configuration options for the cut-based rewriting algorithm.
unsigned maxCutInputSize
Maximum number of inputs allowed for any cut.
unsigned maxCutSizePerRoot
Maximum number of cuts to maintain per logic node.
bool allowNoMatch
Fail if there is a root operation that has no matching pattern.
OptimizationStrategy strategy
Optimization strategy (area vs. timing).
Result of matching a cut against a pattern.
Definition CutRewriter.h:74
void setOwnedDelays(SmallVector< DelayType, 6 > delays)
Set delays by transferring ownership (for dynamically computed delays).
Definition CutRewriter.h:91
double area
Area cost of implementing this cut with the pattern.
Definition CutRewriter.h:76