Loading [MathJax]/extensions/tex2jax.js
CIRCT 21.0.0git
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Mem2Reg.cpp
Go to the documentation of this file.
1//===- Mem2Reg.cpp - Promote signal/memory slots to values ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
14#include "mlir/Analysis/Liveness.h"
15#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
16#include "mlir/IR/Dominance.h"
17#include "llvm/Support/Debug.h"
18#include "llvm/Support/GenericIteratedDominanceFrontier.h"
19
20#define DEBUG_TYPE "llhd-mem2reg"
21
22namespace circt {
23namespace llhd {
24#define GEN_PASS_DEF_MEM2REGPASS
25#include "circt/Dialect/LLHD/Transforms/LLHDPasses.h.inc"
26} // namespace llhd
27} // namespace circt
28
29using namespace mlir;
30using namespace circt;
31using namespace llhd;
32using llvm::PointerIntPair;
33using llvm::SmallDenseSet;
34using llvm::SpecificBumpPtrAllocator;
35
36/// Check whether a value is defined by `llhd.constant_time <0ns, 0d, 1e>`.
37static bool isEpsilonDelay(Value value) {
38 if (auto timeOp = value.getDefiningOp<ConstantTimeOp>()) {
39 auto t = timeOp.getValue();
40 return t.getTime() == 0 && t.getDelta() == 0 && t.getEpsilon() == 1;
41 }
42 return false;
43}
44
45/// Check whether a value is defined by `llhd.constant_time <0ns, 1d, 0e>`.
46static bool isDeltaDelay(Value value) {
47 if (auto timeOp = value.getDefiningOp<ConstantTimeOp>()) {
48 auto t = timeOp.getValue();
49 return t.getTime() == 0 && t.getDelta() == 1 && t.getEpsilon() == 0;
50 }
51 return false;
52}
53
54/// Check whether an operation is a `llhd.drive` with an epsilon delay. This
55/// corresponds to a blocking assignment in Verilog.
56static bool isBlockingDrive(Operation *op) {
57 if (auto driveOp = dyn_cast<DrvOp>(op))
58 return isEpsilonDelay(driveOp.getTime());
59 return false;
60}
61
62/// Check whether an operation is a `llhd.drive` with a delta delay. This
63/// corresponds to a non-blocking assignment in Verilog.
64static bool isDeltaDrive(Operation *op) {
65 if (auto driveOp = dyn_cast<DrvOp>(op))
66 return isDeltaDelay(driveOp.getTime());
67 return false;
68}
69
70//===----------------------------------------------------------------------===//
71// Reaching Definitions and Placeholders
72//===----------------------------------------------------------------------===//
73
74namespace {
75/// Information about whether a definition is driven back onto its signal. For
76/// example, probes provide a definition for their signal that does not have to
77/// be driven back onto the signal. Drives on the other hand provide a
78/// definition that eventually must be driven onto the signal.
79struct DriveCondition {
80 static DriveCondition never() { return ConditionAndMode(Value{}, Never); }
81 static DriveCondition always() { return ConditionAndMode(Value{}, Always); }
82 static DriveCondition conditional(Value condition = {}) {
83 return ConditionAndMode(condition, Conditional);
84 }
85
86 bool isNever() const { return conditionAndMode.getInt() == Never; }
87 bool isAlways() const { return conditionAndMode.getInt() == Always; }
88 bool isConditional() const {
89 return conditionAndMode.getInt() == Conditional;
90 }
91
92 Value getCondition() const { return conditionAndMode.getPointer(); }
93 void setCondition(Value condition) { conditionAndMode.setPointer(condition); }
94
95 bool operator==(const DriveCondition &other) const {
96 return conditionAndMode == other.conditionAndMode;
97 }
98 bool operator!=(const DriveCondition &other) const {
99 return conditionAndMode != other.conditionAndMode;
100 }
101
102private:
103 enum {
104 Never,
105 Always,
106 Conditional,
107 };
108 typedef PointerIntPair<Value, 2> ConditionAndMode;
109 ConditionAndMode conditionAndMode;
110
111 DriveCondition(ConditionAndMode conditionAndMode)
112 : conditionAndMode(conditionAndMode) {}
113 friend DenseMapInfo<DriveCondition>;
114};
115
116/// A definition for a memory slot that may not yet have a concrete SSA value.
117/// These are created for blocks which need to merge distinct definitions for
118/// the same slot coming from its predecssors, as a standin before block
119/// arguments are created. They are also created for drives, where a concrete
120/// value is already available in the form of the driven value.
121struct Def {
122 Block *block;
123 Type type;
124 Value value;
125 DriveCondition condition;
126 bool valueIsPlaceholder = false;
127 bool conditionIsPlaceholder = false;
128
129 Def(Value value, DriveCondition condition)
130 : block(value.getParentBlock()), type(value.getType()), value(value),
131 condition(condition) {}
132 Def(Block *block, Type type, DriveCondition condition)
133 : block(block), type(type), condition(condition) {}
134
135 Value getValueOrPlaceholder();
136 Value getConditionOrPlaceholder();
137};
138} // namespace
139
140/// Return the SSA value for this definition if it already has one, or create
141/// a placeholder value if no value exists yet.
142Value Def::getValueOrPlaceholder() {
143 if (!value) {
144 auto builder = OpBuilder::atBlockBegin(block);
145 value = builder
146 .create<UnrealizedConversionCastOp>(builder.getUnknownLoc(),
147 type, ValueRange{})
148 .getResult(0);
149 valueIsPlaceholder = true;
150 }
151 return value;
152}
153
154/// Return the drive condition for this definition. Creates a constant false or
155/// true SSA value if the drive mode is "never" or "always", respectively. If
156/// the mode is "conditional", return the its condition value if it already has
157/// one, or create a placeholder value if no value exists yet.
158Value Def::getConditionOrPlaceholder() {
159 if (!condition.getCondition()) {
160 auto builder = OpBuilder::atBlockBegin(block);
161 Value value;
162 if (condition.isNever()) {
163 value = builder.create<hw::ConstantOp>(builder.getUnknownLoc(),
164 builder.getI1Type(), 0);
165 } else if (condition.isAlways()) {
166 value = builder.create<hw::ConstantOp>(builder.getUnknownLoc(),
167 builder.getI1Type(), 1);
168 } else {
169 value = builder
170 .create<UnrealizedConversionCastOp>(builder.getUnknownLoc(),
171 builder.getI1Type(),
172 ValueRange{})
173 .getResult(0);
174 conditionIsPlaceholder = true;
175 }
176 condition.setCondition(value);
177 }
178 return condition.getCondition();
179}
180
181// Allow `DriveCondition` to be used as hash map key.
182template <>
183struct llvm::DenseMapInfo<DriveCondition> {
190 static unsigned getHashValue(DriveCondition d) {
192 d.conditionAndMode);
193 }
194 static bool isEqual(DriveCondition lhs, DriveCondition rhs) {
195 return lhs == rhs;
196 }
197};
198
199//===----------------------------------------------------------------------===//
200// Lattice to Propagate Needed and Reaching Definitions
201//===----------------------------------------------------------------------===//
202
203/// The slot a reaching definition specifies a value for, alongside a bit
204/// indicating whether the definition is from a delayed drive or a blocking
205/// drive.
206using DefSlot = PointerIntPair<Value, 1>;
207static DefSlot blockingSlot(Value slot) { return {slot, 0}; }
208static DefSlot delayedSlot(Value slot) { return {slot, 1}; }
209static Value getSlot(DefSlot slot) { return slot.getPointer(); }
210static bool isDelayed(DefSlot slot) { return slot.getInt(); }
211static Type getStoredType(Value slot) {
212 return cast<hw::InOutType>(slot.getType()).getElementType();
213}
214static Type getStoredType(DefSlot slot) {
215 return getStoredType(slot.getPointer());
216}
217static Location getLoc(DefSlot slot) { return slot.getPointer().getLoc(); }
218
219namespace {
220
221struct LatticeNode;
222struct BlockExit;
223struct ProbeNode;
224struct DriveNode;
225
226struct LatticeValue {
227 LatticeNode *nodeBefore = nullptr;
228 LatticeNode *nodeAfter = nullptr;
229 SmallDenseSet<Value, 1> neededDefs;
231};
232
233struct LatticeNode {
234 enum class Kind { BlockEntry, BlockExit, Probe, Drive };
235 const Kind kind;
236 LatticeNode(Kind kind) : kind(kind) {}
237};
238
239struct BlockEntry : public LatticeNode {
240 Block *block;
241 LatticeValue *valueAfter;
242 SmallVector<BlockExit *, 2> predecessors;
243 SmallVector<std::pair<Value, Def *>, 0> insertedProbes;
245
246 BlockEntry(Block *block, LatticeValue *valueAfter)
247 : LatticeNode(Kind::BlockEntry), block(block), valueAfter(valueAfter) {
248 assert(!valueAfter->nodeBefore);
249 valueAfter->nodeBefore = this;
250 }
251
252 static bool classof(const LatticeNode *n) {
253 return n->kind == Kind::BlockEntry;
254 }
255};
256
257struct BlockExit : public LatticeNode {
258 Block *block;
259 LatticeValue *valueBefore;
260 SmallVector<BlockEntry *, 2> successors;
261 Operation *terminator;
262 bool suspends;
263
264 BlockExit(Block *block, LatticeValue *valueBefore)
265 : LatticeNode(Kind::BlockExit), block(block), valueBefore(valueBefore),
266 terminator(block->getTerminator()),
267 suspends(isa<HaltOp, WaitOp>(terminator)) {
268 assert(!valueBefore->nodeAfter);
269 valueBefore->nodeAfter = this;
270 }
271
272 static bool classof(const LatticeNode *n) {
273 return n->kind == Kind::BlockExit;
274 }
275};
276
277struct OpNode : public LatticeNode {
278 Operation *op;
279 LatticeValue *valueBefore;
280 LatticeValue *valueAfter;
281
282 OpNode(Kind kind, Operation *op, LatticeValue *valueBefore,
283 LatticeValue *valueAfter)
284 : LatticeNode(kind), op(op), valueBefore(valueBefore),
285 valueAfter(valueAfter) {
286 assert(!valueBefore->nodeAfter);
287 assert(!valueAfter->nodeBefore);
288 valueBefore->nodeAfter = this;
289 valueAfter->nodeBefore = this;
290 }
291
292 static bool classof(const LatticeNode *n) {
293 return isa<ProbeNode, DriveNode>(n);
294 }
295};
296
297struct ProbeNode : public OpNode {
298 Value slot;
299
300 ProbeNode(PrbOp op, Value slot, LatticeValue *valueBefore,
301 LatticeValue *valueAfter)
302 : OpNode(Kind::Probe, op, valueBefore, valueAfter), slot(slot) {}
303
304 static bool classof(const LatticeNode *n) { return n->kind == Kind::Probe; }
305};
306
307struct DriveNode : public OpNode {
308 DefSlot slot;
309 Def *def;
310
311 DriveNode(DrvOp op, Value slot, Def *def, LatticeValue *valueBefore,
312 LatticeValue *valueAfter)
313 : OpNode(Kind::Drive, op, valueBefore, valueAfter),
314 slot(isDeltaDrive(op) ? delayedSlot(slot) : blockingSlot(slot)),
315 def(def) {
317 }
318
319 /// Returns true if the op does not drive an entire slot, but only a part of a
320 /// slot through a projection.
321 bool drivesProjection() const { return op->getOperand(0) != getSlot(slot); }
322
323 /// Return the drive op.
324 DrvOp getDriveOp() const { return cast<DrvOp>(op); }
325
326 static bool classof(const LatticeNode *n) { return n->kind == Kind::Drive; }
327};
328
329/// A lattice of block entry and exit nodes, nodes for relevant operations such
330/// as probes and drives, and values flowing between the nodes.
331struct Lattice {
332 /// Create a new value on the lattice.
333 LatticeValue *createValue() {
334 auto *value = new (valueAllocator.Allocate()) LatticeValue();
335 values.push_back(value);
336 return value;
337 }
338
339 /// Create a new node on the lattice.
340 template <class T, typename... Args>
341 T *createNode(Args... args) {
342 auto *node =
343 new (getAllocator<T>().Allocate()) T(std::forward<Args>(args)...);
344 nodes.push_back(node);
345 return node;
346 }
347
348 /// Create a new reaching definition.
349 template <typename... Args>
350 Def *createDef(Args... args) {
351 auto *def = new (defAllocator.Allocate()) Def(std::forward<Args>(args)...);
352 defs.push_back(def);
353 return def;
354 }
355
356 /// Create a new reaching definition for an existing value in the IR.
357 Def *createDef(Value value, DriveCondition mode) {
358 auto &slot = defsForValues[{value, mode}];
359 if (!slot) {
360 slot = new (defAllocator.Allocate()) Def(value, mode);
361 defs.push_back(slot);
362 }
363 return slot;
364 }
365
366#ifndef NDEBUG
367 void dump(llvm::raw_ostream &os = llvm::dbgs());
368#endif
369
370 /// All nodes in the lattice.
371 std::vector<LatticeNode *> nodes;
372 /// All values in the lattice.
373 std::vector<LatticeValue *> values;
374 /// All reaching defs in the lattice.
375 std::vector<Def *> defs;
376 /// The reaching defs for concrete values in the IR. This map is used to
377 /// create a single def for the same SSA value to allow for pointer equality
378 /// comparisons.
379 DenseMap<std::pair<Value, DriveCondition>, Def *> defsForValues;
380
381private:
382 SpecificBumpPtrAllocator<LatticeValue> valueAllocator;
383 SpecificBumpPtrAllocator<Def> defAllocator;
384 SpecificBumpPtrAllocator<BlockEntry> blockEntryAllocator;
385 SpecificBumpPtrAllocator<BlockExit> blockExitAllocator;
386 SpecificBumpPtrAllocator<ProbeNode> probeAllocator;
387 SpecificBumpPtrAllocator<DriveNode> driveAllocator;
388
389 // Helper function to get the correct allocator given a lattice node class.
390 template <class T>
391 SpecificBumpPtrAllocator<T> &getAllocator();
392};
393
394// Specializations for the `getAllocator` template that map node types to the
395// correct allocator.
396template <>
397SpecificBumpPtrAllocator<BlockEntry> &Lattice::getAllocator() {
398 return blockEntryAllocator;
399}
400template <>
401SpecificBumpPtrAllocator<BlockExit> &Lattice::getAllocator() {
402 return blockExitAllocator;
403}
404template <>
405SpecificBumpPtrAllocator<ProbeNode> &Lattice::getAllocator() {
406 return probeAllocator;
407}
408template <>
409SpecificBumpPtrAllocator<DriveNode> &Lattice::getAllocator() {
410 return driveAllocator;
411}
412
413} // namespace
414
415#ifndef NDEBUG
416/// Print the lattice in human-readable form. Useful for debugging.
417void Lattice::dump(llvm::raw_ostream &os) {
418 // Helper functions to quickly come up with unique names for things.
419 llvm::MapVector<Block *, unsigned> blockNames;
420 llvm::MapVector<Value, unsigned> memNames;
421 llvm::MapVector<Def *, unsigned> defNames;
422
423 auto blockName = [&](Block *block) {
424 unsigned id = blockNames.insert({block, blockNames.size()}).first->second;
425 return std::string("bb") + llvm::utostr(id);
426 };
427
428 auto memName = [&](DefSlot value) {
429 unsigned id =
430 memNames.insert({getSlot(value), memNames.size()}).first->second;
431 return std::string("mem") + llvm::utostr(id) +
432 (isDelayed(value) ? "#" : "");
433 };
434
435 auto defName = [&](Def *def) {
436 unsigned id = defNames.insert({def, defNames.size()}).first->second;
437 return std::string("def") + llvm::utostr(id);
438 };
439
440 // Ensure the blocks are named in the order they were created.
441 for (auto *node : nodes)
442 if (auto *entry = dyn_cast<BlockEntry>(node))
443 blockName(entry->block);
444
445 // Iterate over all block entry nodes.
446 os << "lattice {\n";
447 for (auto *node : nodes) {
448 auto *entry = dyn_cast<BlockEntry>(node);
449 if (!entry)
450 continue;
451
452 // Print the opening braces and predecessors for the block.
453 os << " " << blockName(entry->block) << ":";
454 if (entry->predecessors.empty()) {
455 os << " // no predecessors";
456 } else {
457 os << " // from";
458 for (auto *node : entry->predecessors)
459 os << " " << blockName(node->block);
460 }
461 os << "\n";
462
463 // Print all nodes following the block entry, up until the block exit.
464 auto *value = entry->valueAfter;
465 while (true) {
466 // Print the needed defs at this lattice point.
467 if (!value->neededDefs.empty()) {
468 os << " -> need";
469 for (auto mem : value->neededDefs)
470 os << " " << memName(blockingSlot(mem));
471 os << "\n";
472 }
473 if (!value->reachingDefs.empty()) {
474 os << " -> def";
475 for (auto [mem, def] : value->reachingDefs) {
476 os << " " << memName(mem) << "=" << defName(def);
477 if (def->condition.isNever())
478 os << "[N]";
479 else if (def->condition.isAlways())
480 os << "[A]";
481 else
482 os << "[C]";
483 }
484 os << "\n";
485 }
486 if (isa<BlockExit>(value->nodeAfter))
487 break;
488
489 // Print the node.
490 if (auto *node = dyn_cast<ProbeNode>(value->nodeAfter))
491 os << " probe " << memName(blockingSlot(node->slot)) << "\n";
492 else if (auto *node = dyn_cast<DriveNode>(value->nodeAfter))
493 os << " drive " << memName(node->slot) << "\n";
494 else
495 os << " unknown\n";
496
497 // Advance to the next node.
498 value = cast<OpNode>(value->nodeAfter)->valueAfter;
499 }
500
501 // Print the closing braces and successors for the block.
502 auto *exit = cast<BlockExit>(value->nodeAfter);
503 if (isa<WaitOp>(exit->terminator))
504 os << " wait";
505 else if (exit->successors.empty())
506 os << " halt";
507 else
508 os << " goto";
509 for (auto *node : exit->successors)
510 os << " " << blockName(node->block);
511 if (exit->suspends)
512 os << " // suspends";
513 os << "\n";
514 }
515
516 // Dump the memories.
517 for (auto [mem, id] : memNames)
518 os << " mem" << id << ": " << mem << "\n";
519
520 os << "}\n";
521}
522#endif
523
524//===----------------------------------------------------------------------===//
525// Projection Utilities
526//===----------------------------------------------------------------------===//
527
528namespace {
529/// A single projection operation, together with the concrete value of the
530/// signal being projected into. This helper is useful to unpack a stack of
531/// projections to get the targeted value, change it, and then pack the stack
532/// back up into an updated value.
533struct Projection {
534 /// The projection operation.
535 Operation *op;
536 /// The value being projected into. This is not the op's target signal, but
537 /// rather the value of the op's target signal.
538 Value into;
539};
540} // namespace
541
542/// A stack of projection operations.
543using ProjectionStack = SmallVector<Projection>;
544
545/// Collect the `llhd.sig.*` projection ops between `fromSignal` and `toSlot`.
546/// The `fromSignal` value must be derived from `toSlot` through only
547/// `llhd.sig.*` operations. The result is a stack; the op producing
548/// `fromSignal` appears first in the vector, while the final op projecting into
549/// `toSlot` appears last in the vector.
550static ProjectionStack getProjections(Value fromSignal, Value toSlot) {
551 ProjectionStack stack;
552 while (fromSignal != toSlot) {
553 auto *op = cast<OpResult>(fromSignal).getOwner();
554 stack.push_back({op, Value()});
555 fromSignal = op->getOperand(0);
556 }
557 return stack;
558}
559
560/// Resolve a stack of projections by taking a value and descending into its
561/// subelements until the final value targeted by the projection stack remains,
562/// which is then returned. Also updates the `into` fields of the projections in
563/// the stack to represent the concrete value of intermediate projections. This
564/// allows a later `packProjections` to reconstruct the root value with the
565/// field targeted by the projection updated to a different value.
566static Value unpackProjections(OpBuilder &builder, Value value,
567 ProjectionStack &projections) {
568 for (auto &projection : llvm::reverse(projections)) {
569 projection.into = value;
570 value = TypeSwitch<Operation *, Value>(projection.op)
571 .Case<SigArrayGetOp>([&](auto op) {
572 return builder.createOrFold<hw::ArrayGetOp>(
573 op.getLoc(), value, op.getIndex());
574 });
575 }
576 return value;
577}
578
579/// Undo a stack of projections by taking the value of the projected field and
580/// injecting it into the surrounding aggregate value that the projection
581/// targets. This requires the `into` fields to be set to the concrete value of
582/// the intermediate projections.
583///
584/// Example:
585/// ```
586/// auto projections = getProjections(...);
587/// auto fieldValue = unpackProjections(..., aggregateValue, projections);
588/// fieldValue = update(fieldValue);
589/// aggregateValue = packProjections(..., fieldValue, projections);
590/// ```
591static Value packProjections(OpBuilder &builder, Value value,
592 const ProjectionStack &projections) {
593 for (auto projection : projections) {
594 value = TypeSwitch<Operation *, Value>(projection.op)
595 .Case<SigArrayGetOp>([&](auto op) {
596 return builder.createOrFold<hw::ArrayInjectOp>(
597 op.getLoc(), projection.into, op.getIndex(), value);
598 });
599 }
600 return value;
601}
602
603//===----------------------------------------------------------------------===//
604// Drive/Probe to SSA Value Promotion
605//===----------------------------------------------------------------------===//
606
607namespace {
608/// The main promoter forwarding drives to probes within a region.
609struct Promoter {
610 Promoter(Region &region) : region(region) {}
611 LogicalResult promote();
612
613 void findPromotableSlots();
614 Value resolveSlot(Value projectionOrSlot);
615
616 void captureAcrossWait();
617 void captureAcrossWait(PrbOp probeOp, ArrayRef<WaitOp> waitOps,
618 Liveness &liveness, DominanceInfo &dominance);
619
620 void constructLattice();
621 void propagateBackward();
622 void propagateBackward(LatticeNode *node);
623 void propagateForward(bool optimisticMerges);
624 void propagateForward(LatticeNode *node, bool optimisticMerges);
625 void markDirty(LatticeNode *node);
626
627 void insertProbeBlocks();
628 void insertProbes();
629 void insertProbes(BlockEntry *node);
630
631 void insertDriveBlocks();
632 void insertDrives();
633 void insertDrives(BlockExit *node);
634 void insertDrives(DriveNode *node);
635
636 void resolveDefinitions();
637 void resolveDefinitions(ProbeNode *node);
638 void resolveDefinitions(DriveNode *node);
639 void resolveDefinitionValue(DriveNode *node);
640 void resolveDefinitionCondition(DriveNode *node);
641
642 void insertBlockArgs();
643 bool insertBlockArgs(BlockEntry *node);
644 void replaceValueWith(Value oldValue, Value newValue);
645
646 /// The region we are promoting in.
647 Region &region;
648
649 /// The slots we are promoting. Mostly `llhd.sig` ops in practice. This
650 /// establishes a deterministic order for slot allocations, such that
651 /// everything else in the pass can operate using unordered maps and sets.
652 SmallVector<Value> slots;
653 /// A mapping from projections to the root slot they are projecting into.
654 SmallDenseMap<Value, Value> projections;
655 /// A set of all promotable signal SSA values. This is the union of `slots`
656 /// and `projections` of those slots.
657 SmallDenseSet<Value> promotable;
658
659 /// The lattice used to propagate needed definitions backwards and reaching
660 /// definitions forwards.
661 Lattice lattice;
662 /// A worklist of lattice nodes used within calls to `propagate*`.
663 SmallPtrSet<LatticeNode *, 4> dirtyNodes;
664
665 /// Helper to clean up unused ops.
666 UnusedOpPruner pruner;
667};
668} // namespace
669
670LogicalResult Promoter::promote() {
671 if (region.empty())
672 return success();
673
674 findPromotableSlots();
675 if (slots.empty())
676 return success();
677
678 captureAcrossWait();
679
680 constructLattice();
681 LLVM_DEBUG({
682 llvm::dbgs() << "Initial lattice:\n";
683 lattice.dump();
684 });
685
686 // Propagate the needed definitions backward across the lattice.
687 propagateBackward();
688 LLVM_DEBUG({
689 llvm::dbgs() << "After backward propagation:\n";
690 lattice.dump();
691 });
692
693 // Insert probes wherever a def is needed for the first time.
694 insertProbeBlocks();
695 insertProbes();
696 LLVM_DEBUG({
697 llvm::dbgs() << "After probe insertion:\n";
698 lattice.dump();
699 });
700
701 // Propagate the reaching definitions forward across the lattice.
702 propagateForward(true);
703 propagateForward(false);
704 LLVM_DEBUG({
705 llvm::dbgs() << "After forward propagation:\n";
706 lattice.dump();
707 });
708
709 // Resolve definitions.
710 resolveDefinitions();
711
712 // Insert drives wherever a reaching def can no longer propagate.
713 insertDriveBlocks();
714 insertDrives();
715 LLVM_DEBUG({
716 llvm::dbgs() << "After def resolution and drive insertion:\n";
717 lattice.dump();
718 });
719
720 // Insert the necessary block arguments.
721 insertBlockArgs();
722
723 // Erase operations that have become unused.
724 pruner.eraseNow();
725
726 return success();
727}
728
729/// Identify any promotable slots probed or driven under the current region.
730void Promoter::findPromotableSlots() {
731 SmallPtrSet<Value, 8> seenSlots;
732 SmallPtrSet<Operation *, 8> checkedUsers;
733 SmallVector<Operation *, 8> userWorklist;
734
735 region.walk([&](Operation *op) {
736 for (auto operand : op->getOperands()) {
737 if (!seenSlots.insert(operand).second)
738 continue;
739
740 // We can only promote probes and drives on a locally-defined signal.
741 // Other signals, such as the ones brought into a module through a port,
742 // have an unknown aliasing relationship with the other ports.
743 if (!operand.getDefiningOp<llhd::SignalOp>())
744 continue;
745
746 // Ensure the slot is not used in any way we cannot reason about.
747 auto checkUser = [&](Operation *user) -> bool {
748 // We don't support nested probes and drives.
749 if (region.isProperAncestor(user->getParentRegion()))
750 return false;
751 // Ignore uses outside of the region.
752 if (user->getParentRegion() != &region)
753 return true;
754 // Projection operations are okay as long as they are in the same block
755 // as any of their users.
756 if (isa<SigArrayGetOp>(user)) {
757 for (auto *projectionUser : user->getUsers()) {
758 if (projectionUser->getBlock() != user->getBlock())
759 return false;
760 if (checkedUsers.insert(projectionUser).second)
761 userWorklist.push_back(projectionUser);
762 }
763 projections.insert({user->getResult(0), operand});
764 return true;
765 }
766 return isa<PrbOp>(user) || isBlockingDrive(user) || isDeltaDrive(user);
767 };
768 checkedUsers.clear();
769 if (!llvm::all_of(operand.getUsers(), [&](auto *user) {
770 auto allOk = true;
771 if (checkedUsers.insert(user).second)
772 userWorklist.push_back(user);
773 while (!userWorklist.empty() && allOk)
774 allOk &= checkUser(userWorklist.pop_back_val());
775 userWorklist.clear();
776 return allOk;
777 }))
778 continue;
779
780 slots.push_back(operand);
781 }
782 });
783
784 // Populate `promotable` with the slots and projections we are promoting.
785 promotable.insert(slots.begin(), slots.end());
786 for (auto [projection, slot] : llvm::make_early_inc_range(projections))
787 if (!promotable.contains(slot))
788 projections.erase(projection);
789 for (auto [projection, slot] : projections)
790 promotable.insert(projection);
791
792 LLVM_DEBUG(llvm::dbgs() << "Found " << slots.size() << " promotable slots, "
793 << promotable.size() << " promotable values\n");
794}
795
796/// Resolve SSA values in `projection` to the `slot` they are projecting into.
797/// Simply returns the given value if it is not in `projections`.
798Value Promoter::resolveSlot(Value projectionOrSlot) {
799 if (auto slot = projections.lookup(projectionOrSlot))
800 return slot;
801 return projectionOrSlot;
802}
803
804/// Explicitly capture any probes that are live across an `llhd.wait` as block
805/// arguments and destination operand of that wait. This ensures that replacing
806/// the probe with a reaching definition later on will capture the value of the
807/// reaching definition before the wait.
808void Promoter::captureAcrossWait() {
809 if (region.hasOneBlock())
810 return;
811
812 SmallVector<WaitOp> waitOps;
813 for (auto &block : region)
814 if (auto waitOp = dyn_cast<WaitOp>(block.getTerminator()))
815 waitOps.push_back(waitOp);
816
817 DominanceInfo dominance(region.getParentOp());
818 Liveness liveness(region.getParentOp());
819
820 SmallVector<WaitOp> crossingWaitOps;
821 for (auto &block : region) {
822 for (auto probeOp : block.getOps<PrbOp>()) {
823 for (auto waitOp : waitOps)
824 if (liveness.getLiveness(waitOp->getBlock())->isLiveOut(probeOp))
825 crossingWaitOps.push_back(waitOp);
826 if (!crossingWaitOps.empty()) {
827 captureAcrossWait(probeOp, crossingWaitOps, liveness, dominance);
828 crossingWaitOps.clear();
829 }
830 }
831 }
832}
833
834/// Add a probe as block argument to a list of wait ops and update uses of the
835/// probe to use the added block arguments as appropriate. This may insert
836/// additional block arguments in case the probe and added block arguments both
837/// reach the same block.
838void Promoter::captureAcrossWait(PrbOp probeOp, ArrayRef<WaitOp> waitOps,
839 Liveness &liveness, DominanceInfo &dominance) {
840 LLVM_DEBUG({
841 llvm::dbgs() << "Capture " << probeOp << "\n";
842 for (auto waitOp : waitOps)
843 llvm::dbgs() << "- Across " << waitOp << "\n";
844 });
845
846 // Calculate the merge points for this probe once it gets promoted to block
847 // arguments across the wait ops.
848 auto &domTree = dominance.getDomTree(&region);
849 llvm::IDFCalculatorBase<Block, false> idfCalculator(domTree);
850
851 // Calculate the set of blocks which will define this probe as a distinct
852 // value.
853 SmallPtrSet<Block *, 4> definingBlocks;
854 definingBlocks.insert(probeOp->getBlock());
855 for (auto waitOp : waitOps)
856 definingBlocks.insert(waitOp.getDest());
857 idfCalculator.setDefiningBlocks(definingBlocks);
858
859 // Calculate where the probe is live.
860 SmallPtrSet<Block *, 16> liveInBlocks;
861 for (auto &block : region)
862 if (liveness.getLiveness(&block)->isLiveIn(probeOp))
863 liveInBlocks.insert(&block);
864 idfCalculator.setLiveInBlocks(liveInBlocks);
865
866 // Calculate the merge points where we will have to insert block arguments for
867 // this probe.
868 SmallVector<Block *> mergePointsVec;
869 idfCalculator.calculate(mergePointsVec);
870 SmallPtrSet<Block *, 16> mergePoints(mergePointsVec.begin(),
871 mergePointsVec.end());
872 for (auto waitOp : waitOps)
873 mergePoints.insert(waitOp.getDest());
874 LLVM_DEBUG(llvm::dbgs() << "- " << mergePoints.size() << " merge points\n");
875
876 // Perform a depth-first search starting at the block containing the probe,
877 // which dominates all its uses. When we encounter a block that is a merge
878 // point, insert a block argument.
879 struct WorklistItem {
880 DominanceInfoNode *domNode;
881 Value reachingDef;
882 };
883 SmallVector<WorklistItem> worklist;
884 worklist.push_back({domTree.getNode(probeOp->getBlock()), probeOp});
885
886 while (!worklist.empty()) {
887 auto item = worklist.pop_back_val();
888 auto *block = item.domNode->getBlock();
889
890 // If this block is a merge point, insert a block argument for the probe.
891 if (mergePoints.contains(block))
892 item.reachingDef =
893 block->addArgument(probeOp.getType(), probeOp.getLoc());
894
895 // Replace any uses of the probe in this block with the current reaching
896 // definition.
897 for (auto &op : *block)
898 op.replaceUsesOfWith(probeOp, item.reachingDef);
899
900 // If the terminator of this block branches to a merge point, add the
901 // current reaching definition as a destination operand.
902 if (auto branchOp = dyn_cast<BranchOpInterface>(block->getTerminator())) {
903 for (auto &blockOperand : branchOp->getBlockOperands())
904 if (mergePoints.contains(blockOperand.get()))
905 branchOp.getSuccessorOperands(blockOperand.getOperandNumber())
906 .append(item.reachingDef);
907 } else if (auto waitOp = dyn_cast<WaitOp>(block->getTerminator())) {
908 if (mergePoints.contains(waitOp.getDest()))
909 waitOp.getDestOperandsMutable().append(item.reachingDef);
910 }
911
912 for (auto *child : item.domNode->children())
913 worklist.push_back({child, item.reachingDef});
914 }
915}
916
917//===----------------------------------------------------------------------===//
918// Lattice Construction and Propagation
919//===----------------------------------------------------------------------===//
920
921/// Populate the lattice with nodes and values corresponding to the blocks and
922/// relevant operations in the region we're promoting.
923void Promoter::constructLattice() {
924 // Create entry nodes for each block.
926 for (auto &block : region) {
927 auto *entry = lattice.createNode<BlockEntry>(&block, lattice.createValue());
928 blockEntries.insert({&block, entry});
929 }
930
931 // Create nodes for each operation that is relevant for the pass.
932 for (auto &block : region) {
933 auto *valueBefore = blockEntries.lookup(&block)->valueAfter;
934
935 // Handle operations.
936 for (auto &op : block.without_terminator()) {
937 // Handle probes.
938 if (auto probeOp = dyn_cast<PrbOp>(op)) {
939 if (!promotable.contains(probeOp.getSignal()))
940 continue;
941 auto *node = lattice.createNode<ProbeNode>(
942 probeOp, resolveSlot(probeOp.getSignal()), valueBefore,
943 lattice.createValue());
944 valueBefore = node->valueAfter;
945 continue;
946 }
947
948 // Handle drives.
949 if (auto driveOp = dyn_cast<DrvOp>(op)) {
950 if (!isBlockingDrive(&op) && !isDeltaDrive(&op))
951 continue;
952 if (!promotable.contains(driveOp.getSignal()))
953 continue;
954 auto condition = DriveCondition::always();
955 if (auto enable = driveOp.getEnable())
956 condition = DriveCondition::conditional(enable);
957 // Drives that target a slot directly provide the driven value as a
958 // definition for the slot. Drives to projections have their value
959 // calculated later on.
960 auto slot = resolveSlot(driveOp.getSignal());
961 auto *def = driveOp.getSignal() == slot
962 ? lattice.createDef(driveOp.getValue(), condition)
963 : lattice.createDef(driveOp->getBlock(),
964 getStoredType(slot), condition);
965 auto *node = lattice.createNode<DriveNode>(
966 driveOp, slot, def, valueBefore, lattice.createValue());
967 valueBefore = node->valueAfter;
968 continue;
969 }
970 }
971
972 // Create the exit node for the block.
973 auto *exit = lattice.createNode<BlockExit>(&block, valueBefore);
974 for (auto *otherBlock : exit->terminator->getSuccessors()) {
975 auto *otherEntry = blockEntries.lookup(otherBlock);
976 exit->successors.push_back(otherEntry);
977 otherEntry->predecessors.push_back(exit);
978 }
979 }
980}
981
982/// Propagate the lattice values backwards against control flow until a fixed
983/// point is reached.
984void Promoter::propagateBackward() {
985 for (auto *node : lattice.nodes)
986 propagateBackward(node);
987 while (!dirtyNodes.empty()) {
988 auto *node = *dirtyNodes.begin();
989 dirtyNodes.erase(node);
990 propagateBackward(node);
991 }
992}
993
994/// Propagate the lattice value after a node backward to the value before a
995/// node.
996void Promoter::propagateBackward(LatticeNode *node) {
997 auto update = [&](LatticeValue *value, auto &neededDefs) {
998 if (value->neededDefs != neededDefs) {
999 value->neededDefs = neededDefs;
1000 markDirty(value->nodeBefore);
1001 }
1002 };
1003
1004 // Probes need a definition for the probed slot to be available.
1005 if (auto *probe = dyn_cast<ProbeNode>(node)) {
1006 auto needed = probe->valueAfter->neededDefs;
1007 needed.insert(probe->slot);
1008 update(probe->valueBefore, needed);
1009 return;
1010 }
1011
1012 // Blocking unconditional drives to an entire slot kill the need for a
1013 // definition to be available, since they provide a definition themselves.
1014 // Conditional drives propagate the need for a definition, since they have to
1015 // forward an incoming reaching def in case they are disabled. Drives to
1016 // projections need a definition to be available such that they can partially
1017 // update it.
1018 if (auto *drive = dyn_cast<DriveNode>(node)) {
1019 auto needed = drive->valueAfter->neededDefs;
1020 if (drive->drivesProjection())
1021 needed.insert(getSlot(drive->slot));
1022 else if (!isDelayed(drive->slot) && !drive->getDriveOp().getEnable())
1023 needed.erase(getSlot(drive->slot));
1024 update(drive->valueBefore, needed);
1025 return;
1026 }
1027
1028 // Block entries simply trigger updates to all their predecessors.
1029 if (auto *entry = dyn_cast<BlockEntry>(node)) {
1030 for (auto *predecessor : entry->predecessors)
1031 markDirty(predecessor);
1032 return;
1033 }
1034
1035 // Block exits merge any needed definitions from their successors.
1036 if (auto *exit = dyn_cast<BlockExit>(node)) {
1037 if (exit->suspends)
1038 return;
1039 SmallDenseSet<Value, 1> needed;
1040 for (auto *successors : exit->successors)
1041 needed.insert(successors->valueAfter->neededDefs.begin(),
1042 successors->valueAfter->neededDefs.end());
1043 update(exit->valueBefore, needed);
1044 return;
1045 }
1046
1047 assert(false && "unhandled node in backward propagation");
1048}
1049
1050/// Propagate the lattice values forwards along with control flow until a fixed
1051/// point is reached. If `optimisticMerges` is true, block entry points
1052/// propagate definitions from their predecessors into the block without
1053/// creating a merge definition even if the definition is not available in all
1054/// predecessors. This is overly optimistic, but initially helps definitions
1055/// propagate through loop structures. If `optimisticMerges` is false, block
1056/// entry points create merge definitions for definitions that are not available
1057/// in all predecessors.
1058void Promoter::propagateForward(bool optimisticMerges) {
1059 for (auto *node : lattice.nodes)
1060 propagateForward(node, optimisticMerges);
1061 while (!dirtyNodes.empty()) {
1062 auto *node = *dirtyNodes.begin();
1063 dirtyNodes.erase(node);
1064 propagateForward(node, optimisticMerges);
1065 }
1066}
1067
1068/// Propagate the lattice value before a node forward to the value after a node.
1069void Promoter::propagateForward(LatticeNode *node, bool optimisticMerges) {
1070 auto update = [&](LatticeValue *value, auto &reachingDefs) {
1071 if (value->reachingDefs != reachingDefs) {
1072 value->reachingDefs = reachingDefs;
1073 markDirty(value->nodeAfter);
1074 }
1075 };
1076
1077 // Probes simply propagate any reaching defs.
1078 if (auto *probe = dyn_cast<ProbeNode>(node)) {
1079 update(probe->valueAfter, probe->valueBefore->reachingDefs);
1080 return;
1081 }
1082
1083 // Drives propagate the driven value as a reaching def. Blocking drives kill
1084 // earlier non-blocking drives. This reflects Verilog and VHDL behaviour,
1085 // where a drive sequence like
1086 //
1087 // a <= #10ns 42; // A
1088 // a <= 43; // B
1089 // a = 44; // C
1090 //
1091 // would see (B) override (A), because it happens earlier, and (C) override
1092 // (B), because it in turn happens earlier.
1093 if (auto *drive = dyn_cast<DriveNode>(node)) {
1094 auto reaching = drive->valueBefore->reachingDefs;
1095
1096 // If the drive is conditional or driving a projection of a slot, merge any
1097 // incoming reaching def into the reaching def created by the drive. This is
1098 // necessary since a conditional drive following an unconditional drive may
1099 // have to insert a multiplexer to forward to subsequent probes.
1100 if (drive->drivesProjection() || drive->getDriveOp().getEnable()) {
1101 auto *inDef = reaching.lookup(drive->slot);
1102 if (inDef) {
1103 if (drive->def->value != inDef->value)
1104 drive->def->value = {};
1105 if (inDef->condition.isAlways() || drive->def->condition.isAlways())
1106 drive->def->condition = DriveCondition::always();
1107 else if (drive->def->condition != inDef->condition)
1108 drive->def->condition = DriveCondition::conditional();
1109 }
1110 }
1111
1112 reaching[drive->slot] = drive->def;
1113 if (!isDelayed(drive->slot))
1114 reaching.erase(delayedSlot(getSlot(drive->slot)));
1115 update(drive->valueAfter, reaching);
1116 return;
1117 }
1118
1119 // Block entry points propagate any reaching definitions available in all
1120 // predecessors, plus any probes inserted locally.
1121 if (auto *entry = dyn_cast<BlockEntry>(node)) {
1122 // Propagate reaching definitions for each inserted probe.
1124 for (auto [slot, insertedProbe] : entry->insertedProbes) {
1125 reaching[blockingSlot(slot)] = insertedProbe;
1126 reaching[delayedSlot(slot)] = insertedProbe;
1127 }
1128
1129 // Propagate reaching definitions from predecessors, creating new
1130 // definitions in case of a merge.
1132 for (auto *predecessor : entry->predecessors)
1133 if (!predecessor->suspends)
1134 reachingDefs.insert(predecessor->valueBefore->reachingDefs.begin(),
1135 predecessor->valueBefore->reachingDefs.end());
1136
1137 for (auto pair : reachingDefs) {
1138 DefSlot slot = pair.first;
1139 Def *reachingDef = pair.second;
1140 DriveCondition reachingDefCondition = reachingDef->condition;
1141
1142 // Do not override inserted probes.
1143 if (reaching.contains(slot))
1144 continue;
1145
1146 // Check if all predecessors provide a definition for this slot. If any
1147 // multiple definitions for the same slot reach us, simply set the
1148 // `reachingDef` to null such that we can insert a new merge definition.
1149 // Separately track whether the drive mode of all definitions is
1150 // identical. This is often the case, for example when the definitions of
1151 // two unconditional drives converge, and we would like to preserve that
1152 // both drives were unconditional, even if the driven value differs.
1153 if (llvm::any_of(entry->predecessors, [&](auto *predecessor) {
1154 return predecessor->suspends;
1155 }))
1156 continue;
1157 for (auto *predecessor : entry->predecessors) {
1158 auto otherDef = predecessor->valueBefore->reachingDefs.lookup(slot);
1159 if (!otherDef && optimisticMerges)
1160 continue;
1161 // If the definitions are not identical, indicate that we will have
1162 // to create a new merge def.
1163 if (reachingDef != otherDef)
1164 reachingDef = nullptr;
1165 // If the definitions have different modes, indicate that we will
1166 // have to create a conditional drive later.
1167 auto condition =
1168 otherDef ? otherDef->condition : DriveCondition::never();
1169 if (reachingDefCondition != condition)
1170 reachingDefCondition = DriveCondition::conditional();
1171 }
1172
1173 // Create a merge definition if different definitions reach us from our
1174 // predecessors.
1175 if (!reachingDef)
1176 reachingDef = entry->mergedDefs.lookup(slot);
1177 if (!reachingDef) {
1178 reachingDef = lattice.createDef(entry->block, getStoredType(slot),
1179 reachingDefCondition);
1180 entry->mergedDefs.insert({slot, reachingDef});
1181 } else {
1182 reachingDef->condition = reachingDefCondition;
1183 }
1184 reaching.insert({slot, reachingDef});
1185 }
1186
1187 update(entry->valueAfter, reaching);
1188 return;
1189 }
1190
1191 // Block exits simply trigger updates to all their successors.
1192 if (auto *exit = dyn_cast<BlockExit>(node)) {
1193 for (auto *successor : exit->successors)
1194 markDirty(successor);
1195 return;
1196 }
1197
1198 assert(false && "unhandled node in forward propagation");
1199}
1200
1201/// Mark a lattice node to be updated during propagation.
1202void Promoter::markDirty(LatticeNode *node) {
1203 assert(node);
1204 dirtyNodes.insert(node);
1205}
1206
1207//===----------------------------------------------------------------------===//
1208// Drive/Probe Insertion
1209//===----------------------------------------------------------------------===//
1210
1211/// Insert additional probe blocks where needed. This can happen if a definition
1212/// is needed in a block which has a suspending and non-suspending predecessor.
1213/// In that case we would like to insert probes in the predecessor blocks, but
1214/// cannot do so because of the suspending predecessor.
1215void Promoter::insertProbeBlocks() {
1216 // Find all blocks that have any needed definition that can't propagate beyond
1217 // one of its predecessors. If that's the case, we need an additional probe
1218 // block after that predecessor.
1219 SmallDenseSet<std::pair<BlockExit *, BlockEntry *>, 1> worklist;
1220 for (auto *node : lattice.nodes) {
1221 if (auto *entry = dyn_cast<BlockEntry>(node)) {
1222 SmallVector<Value> partialSlots;
1223 for (auto slot : entry->valueAfter->neededDefs) {
1224 unsigned numIncoming = 0;
1225 for (auto *predecessor : entry->predecessors)
1226 if (predecessor->valueBefore->neededDefs.contains(slot))
1227 ++numIncoming;
1228 if (numIncoming != 0 && numIncoming != entry->predecessors.size())
1229 partialSlots.push_back(slot);
1230 }
1231 for (auto *predecessor : entry->predecessors)
1232 if (llvm::any_of(partialSlots, [&](auto slot) {
1233 return !predecessor->valueBefore->neededDefs.contains(slot);
1234 }))
1235 worklist.insert({predecessor, entry});
1236 }
1237 }
1238
1239 // Insert probe blocks after all blocks we have identified.
1240 for (auto [predecessor, successor] : worklist) {
1241 LLVM_DEBUG(llvm::dbgs() << "- Inserting probe block towards " << successor
1242 << " after " << *predecessor->terminator << "\n");
1243 OpBuilder builder(predecessor->terminator);
1244 auto *newBlock = builder.createBlock(successor->block);
1245 for (auto oldArg : successor->block->getArguments())
1246 newBlock->addArgument(oldArg.getType(), oldArg.getLoc());
1247 builder.create<cf::BranchOp>(predecessor->terminator->getLoc(),
1248 successor->block, newBlock->getArguments());
1249 for (auto &blockOp : predecessor->terminator->getBlockOperands())
1250 if (blockOp.get() == successor->block)
1251 blockOp.set(newBlock);
1252
1253 // Create new nodes in the lattice for the added block.
1254 auto *value = lattice.createValue();
1255 value->neededDefs = successor->valueAfter->neededDefs;
1256 auto *newEntry = lattice.createNode<BlockEntry>(newBlock, value);
1257 auto *newExit = lattice.createNode<BlockExit>(newBlock, value);
1258 newEntry->predecessors.push_back(predecessor);
1259 newExit->successors.push_back(successor);
1260 llvm::replace(successor->predecessors, predecessor, newExit);
1261 llvm::replace(predecessor->successors, successor, newEntry);
1262 }
1263}
1264
1265/// Insert probes wherever a definition is needed for the first time. This is
1266/// the case in the entry block, after any suspensions, and after operations
1267/// that have unknown effects on memory slots.
1268void Promoter::insertProbes() {
1269 for (auto *node : lattice.nodes) {
1270 if (auto *entry = dyn_cast<BlockEntry>(node))
1271 insertProbes(entry);
1272 }
1273}
1274
1275/// Insert probes at the beginning of a block for definitions that are needed in
1276/// this block but not in its predecessors.
1277void Promoter::insertProbes(BlockEntry *node) {
1278 auto builder = OpBuilder::atBlockBegin(node->block);
1279 for (auto neededDef : slots) {
1280 if (!node->valueAfter->neededDefs.contains(neededDef))
1281 continue;
1282 if (!node->predecessors.empty() &&
1283 llvm::all_of(node->predecessors, [&](auto *predecessor) {
1284 return predecessor->valueBefore->neededDefs.contains(neededDef);
1285 }))
1286 continue;
1287 LLVM_DEBUG(llvm::dbgs() << "- Inserting probe for " << neededDef
1288 << " in block " << node->block << "\n");
1289 auto value = builder.create<PrbOp>(neededDef.getLoc(), neededDef);
1290 auto *def = lattice.createDef(value, DriveCondition::never());
1291 node->insertedProbes.push_back({neededDef, def});
1292 }
1293}
1294
1295/// Insert additional drive blocks where needed. This can happen if a definition
1296/// continues into some of a block's successors, but not all of them.
1297void Promoter::insertDriveBlocks() {
1298 // Find all blocks that have any reaching definition that can't propagate
1299 // beyond one of its successors. If that's the case, we need an additional
1300 // drive block before that successor.
1301 SmallDenseSet<std::pair<BlockExit *, BlockEntry *>, 1> worklist;
1302 for (auto *node : lattice.nodes) {
1303 if (auto *exit = dyn_cast<BlockExit>(node)) {
1304 SmallVector<DefSlot> partialSlots;
1305 for (auto [slot, reachingDef] : exit->valueBefore->reachingDefs) {
1306 if (reachingDef->condition.isNever())
1307 continue;
1308 unsigned numContinues = 0;
1309 for (auto *successor : exit->successors)
1310 if (successor->valueAfter->reachingDefs.contains(slot))
1311 ++numContinues;
1312 if (numContinues != 0 && numContinues != exit->successors.size())
1313 partialSlots.push_back(slot);
1314 }
1315 for (auto *successor : exit->successors)
1316 if (llvm::any_of(partialSlots, [&](auto slot) {
1317 return !successor->valueAfter->reachingDefs.contains(slot);
1318 }))
1319 worklist.insert({exit, successor});
1320 }
1321 }
1322
1323 // Insert drive blocks before all blocks we have identified.
1324 for (auto [predecessor, successor] : worklist) {
1325 LLVM_DEBUG(llvm::dbgs() << "- Inserting drive block towards " << successor
1326 << " after " << *predecessor->terminator << "\n");
1327 OpBuilder builder(predecessor->terminator);
1328 auto *newBlock = builder.createBlock(successor->block);
1329 for (auto oldArg : successor->block->getArguments())
1330 newBlock->addArgument(oldArg.getType(), oldArg.getLoc());
1331 builder.create<cf::BranchOp>(predecessor->terminator->getLoc(),
1332 successor->block, newBlock->getArguments());
1333 for (auto &blockOp : predecessor->terminator->getBlockOperands())
1334 if (blockOp.get() == successor->block)
1335 blockOp.set(newBlock);
1336
1337 // Create new nodes in the lattice for the added block.
1338 auto *value = lattice.createValue();
1339 value->neededDefs = successor->valueAfter->neededDefs;
1340 value->reachingDefs = predecessor->valueBefore->reachingDefs;
1341 auto *newEntry = lattice.createNode<BlockEntry>(newBlock, value);
1342 auto *newExit = lattice.createNode<BlockExit>(newBlock, value);
1343 newEntry->predecessors.push_back(predecessor);
1344 newExit->successors.push_back(successor);
1345 llvm::replace(successor->predecessors, predecessor, newExit);
1346 llvm::replace(predecessor->successors, successor, newEntry);
1347 }
1348}
1349
1350/// Insert drives wherever a reaching definition can no longer propagate. This
1351/// is the before any suspensions and before operations that have unknown
1352/// effects on memory slots.
1353void Promoter::insertDrives() {
1354 for (auto *node : lattice.nodes) {
1355 if (auto *exit = dyn_cast<BlockExit>(node))
1356 insertDrives(exit);
1357 else if (auto *drive = dyn_cast<DriveNode>(node))
1358 insertDrives(drive);
1359 }
1360}
1361
1362/// Insert drives at block terminators for definitions that do not propagate
1363/// into successors.
1364void Promoter::insertDrives(BlockExit *node) {
1365 auto builder = OpBuilder::atBlockTerminator(node->block);
1366
1367 ConstantTimeOp epsilonTime;
1368 ConstantTimeOp deltaTime;
1369 auto getTime = [&](bool delta) {
1370 if (delta) {
1371 if (!deltaTime)
1372 deltaTime = builder.create<ConstantTimeOp>(node->terminator->getLoc(),
1373 0, "ns", 1, 0);
1374 return deltaTime;
1375 }
1376 if (!epsilonTime)
1377 epsilonTime = builder.create<ConstantTimeOp>(node->terminator->getLoc(),
1378 0, "ns", 0, 1);
1379 return epsilonTime;
1380 };
1381
1382 auto insertDriveForSlot = [&](DefSlot slot) {
1383 auto reachingDef = node->valueBefore->reachingDefs.lookup(slot);
1384 if (!reachingDef || reachingDef->condition.isNever())
1385 return;
1386 if (!node->suspends && !node->successors.empty() &&
1387 llvm::all_of(node->successors, [&](auto *successor) {
1388 return successor->valueAfter->reachingDefs.contains(slot);
1389 }))
1390 return;
1391 LLVM_DEBUG(llvm::dbgs() << "- Inserting drive for " << getSlot(slot) << " "
1392 << (isDelayed(slot) ? "(delayed)" : "(blocking)")
1393 << " before " << *node->terminator << "\n");
1394 auto time = getTime(isDelayed(slot));
1395 auto value = reachingDef->getValueOrPlaceholder();
1396 auto enable = reachingDef->condition.isConditional()
1397 ? reachingDef->getConditionOrPlaceholder()
1398 : Value{};
1399 builder.create<DrvOp>(getLoc(slot), getSlot(slot), value, time, enable);
1400 };
1401
1402 for (auto slot : slots)
1403 insertDriveForSlot(blockingSlot(slot));
1404 for (auto slot : slots)
1405 insertDriveForSlot(delayedSlot(slot));
1406}
1407
1408/// Remove drives to slots that we are promoting. These have been replaced with
1409/// new drives at block exits.
1410void Promoter::insertDrives(DriveNode *node) {
1411 if (!promotable.contains(getSlot(node->slot)))
1412 return;
1413 LLVM_DEBUG(llvm::dbgs() << "- Removing drive " << *node->op << "\n");
1414 pruner.eraseNow(node->op);
1415 node->op = nullptr;
1416}
1417
1418//===----------------------------------------------------------------------===//
1419// Drive-to-Probe Forwarding
1420//===----------------------------------------------------------------------===//
1421
1422/// Forward definitions throughout the IR.
1423void Promoter::resolveDefinitions() {
1424 for (auto *node : lattice.nodes) {
1425 if (auto *probe = dyn_cast<ProbeNode>(node))
1426 resolveDefinitions(probe);
1427 else if (auto *drive = dyn_cast<DriveNode>(node))
1428 resolveDefinitions(drive);
1429 }
1430}
1431
1432/// Replace probes with the corresponding reaching definition.
1433void Promoter::resolveDefinitions(ProbeNode *node) {
1434 if (!promotable.contains(node->slot))
1435 return;
1436 auto *def = node->valueBefore->reachingDefs.lookup(blockingSlot(node->slot));
1437 assert(def && "no definition reaches probe");
1438
1439 // Gather any projections between the probe and the underlying slot.
1440 auto projections = getProjections(node->op->getOperand(0), node->slot);
1441
1442 // Extract the projected value from the aggregate.
1443 auto builder = OpBuilder(node->op);
1444 auto value = def->getValueOrPlaceholder();
1445 value = unpackProjections(builder, value, projections);
1446
1447 // Replace the probe result with the projected value.
1448 LLVM_DEBUG(llvm::dbgs() << "- Replacing " << *node->op << "\n");
1449 replaceValueWith(node->op->getResult(0), value);
1450 pruner.eraseNow(node->op);
1451 node->op = nullptr;
1452}
1453
1454/// Mutate reaching definitions for conditional drives or drives that target
1455/// projections of a slot.
1456void Promoter::resolveDefinitions(DriveNode *node) {
1457 if (!promotable.contains(getSlot(node->slot)))
1458 return;
1459
1460 // Check whether the drive already has a value and condition set for its
1461 // reaching def. This is the case for drives to an entire slot. Conditional
1462 // drives and drives to a projection will have no value set initially, in
1463 // which case either the value is null or it is a placeholder.
1464 if (!node->def->value || node->def->valueIsPlaceholder)
1465 resolveDefinitionValue(node);
1466 if (node->def->condition.isConditional() &&
1467 (!node->def->condition.getCondition() ||
1468 node->def->conditionIsPlaceholder))
1469 resolveDefinitionCondition(node);
1470}
1471
1472void Promoter::resolveDefinitionValue(DriveNode *node) {
1473 // Get the slot definition reaching the drive. This is the value the drive has
1474 // to update.
1475 auto *inDef = node->valueBefore->reachingDefs.lookup(node->slot);
1476 assert(inDef && "no definition reaches drive");
1477 auto driveOp = node->getDriveOp();
1478 LLVM_DEBUG(llvm::dbgs() << "- Injecting value for " << driveOp << "\n");
1479
1480 // Gather any projections between the drive and the underlying slot.
1481 auto projections = getProjections(driveOp.getSignal(), getSlot(node->slot));
1482
1483 // Extract the current value from the aggregate.
1484 auto builder = OpBuilder(driveOp);
1485 auto value = inDef->getValueOrPlaceholder();
1486 value = unpackProjections(builder, value, projections);
1487
1488 // Update the value according to the drive.
1489 pruner.eraseLaterIfUnused(value);
1490 if (!driveOp.getEnable())
1491 value = driveOp.getValue();
1492 else
1493 value = builder.createOrFold<comb::MuxOp>(
1494 driveOp.getLoc(), driveOp.getEnable(), driveOp.getValue(), value);
1495
1496 // Inject the updated value into the aggregate.
1497 value = packProjections(builder, value, projections);
1498
1499 // Track the updated slot value in the drive's reaching def.
1500 if (node->def->valueIsPlaceholder) {
1501 auto *placeholder = node->def->value.getDefiningOp();
1502 assert(isa_and_nonnull<UnrealizedConversionCastOp>(placeholder) &&
1503 "placeholder replaced but valueIsPlaceholder still set");
1504 replaceValueWith(placeholder->getResult(0), value);
1505 pruner.eraseNow(placeholder);
1506 node->def->valueIsPlaceholder = false;
1507 }
1508 node->def->value = value;
1509}
1510
1511void Promoter::resolveDefinitionCondition(DriveNode *node) {
1512 // Get the slot definition reaching the drive. This contains the condition the
1513 // drive has to update.
1514 auto *inDef = node->valueBefore->reachingDefs.lookup(node->slot);
1515 assert(inDef && "no definition reaches drive");
1516 auto driveOp = node->getDriveOp();
1517 LLVM_DEBUG(llvm::dbgs() << "- Mutating condition for " << driveOp << "\n");
1518 OpBuilder builder(driveOp);
1519
1520 // Adjust the drive condition by combining the incoming condition with the
1521 // enable of this drive op.
1522 Value condition;
1523 if (inDef->condition.isNever())
1524 condition = driveOp.getEnable();
1525 else
1526 condition =
1527 builder.createOrFold<comb::OrOp>(driveOp.getLoc(), driveOp.getEnable(),
1528 inDef->getConditionOrPlaceholder());
1529
1530 // Track the updated drive condition in the drive's reaching def.
1531 if (node->def->conditionIsPlaceholder) {
1532 auto *placeholder = node->def->condition.getCondition().getDefiningOp();
1533 assert(isa_and_nonnull<UnrealizedConversionCastOp>(placeholder) &&
1534 "placeholder replaced but conditionIsPlaceholder still set");
1535 replaceValueWith(placeholder->getResult(0), condition);
1536 pruner.eraseNow(placeholder);
1537 node->def->conditionIsPlaceholder = false;
1538 }
1539 node->def->condition.setCondition(condition);
1540}
1541
1542//===----------------------------------------------------------------------===//
1543// Block Argument Insertion
1544//===----------------------------------------------------------------------===//
1545
1546/// Insert block arguments into the IR.
1547void Promoter::insertBlockArgs() {
1548 bool anyArgsInserted = true;
1549 while (anyArgsInserted) {
1550 anyArgsInserted = false;
1551 for (auto *node : lattice.nodes)
1552 if (auto *entry = dyn_cast<BlockEntry>(node))
1553 anyArgsInserted |= insertBlockArgs(entry);
1554 }
1555}
1556
1557/// Insert block arguments for any merging definitions for which a placeholder
1558/// value has been created. Also insert corresponding successor operands to any
1559/// ops branching here. Returns true if any arguments were inserted.
1560///
1561/// This function may create additional placeholders in predecessor blocks.
1562/// Creating block arguments in a later block may uncover additional arguments
1563/// to be inserted in a previous one. Therefore this function must be called
1564/// until no more block arguments are inserted.
1565bool Promoter::insertBlockArgs(BlockEntry *node) {
1566 // Determine which slots require a merging definition. Use the `slots` array
1567 // for this to have a deterministic order for the block arguments. We only
1568 // insert block arguments for the def's value or drive condition if
1569 // placeholders have been created for them, indicating that they are actually
1570 // used.
1571 enum class Which { Value, Condition };
1572 SmallVector<std::pair<DefSlot, Which>> neededSlots;
1573 auto addNeededSlot = [&](DefSlot slot) {
1574 if (auto *def = node->mergedDefs.lookup(slot)) {
1575 if (node->valueAfter->reachingDefs.contains(slot)) {
1576 if (def->valueIsPlaceholder)
1577 neededSlots.push_back({slot, Which::Value});
1578 if (def->conditionIsPlaceholder)
1579 neededSlots.push_back({slot, Which::Condition});
1580 }
1581 }
1582 };
1583 for (auto slot : slots) {
1584 addNeededSlot(blockingSlot(slot));
1585 addNeededSlot(delayedSlot(slot));
1586 }
1587 if (neededSlots.empty())
1588 return false;
1589 LLVM_DEBUG(llvm::dbgs() << "- Adding " << neededSlots.size()
1590 << " args to block " << node->block << "\n");
1591
1592 // Add the block arguments.
1593 for (auto [slot, which] : neededSlots) {
1594 auto *def = node->mergedDefs.lookup(slot);
1595 assert(def);
1596 switch (which) {
1597 case Which::Value: {
1598 // Create an argument for the definition's value and replace any
1599 // placeholder we might have created earlier.
1600 auto *placeholder = def->value.getDefiningOp();
1601 assert(isa_and_nonnull<UnrealizedConversionCastOp>(placeholder) &&
1602 "placeholder replaced but valueIsPlaceholder still set");
1603 auto arg = node->block->addArgument(getStoredType(slot), getLoc(slot));
1604 replaceValueWith(placeholder->getResult(0), arg);
1605 pruner.eraseNow(placeholder);
1606 def->value = arg;
1607 def->valueIsPlaceholder = false;
1608 break;
1609 }
1610 case Which::Condition: {
1611 // If the definition's drive mode is conditional, create an argument for
1612 // the drive condition and replace any placeholder we might have created
1613 // earlier.
1614 auto *placeholder = def->condition.getCondition().getDefiningOp();
1615 assert(isa_and_nonnull<UnrealizedConversionCastOp>(placeholder) &&
1616 "placeholder replaced but conditionIsPlaceholder still set");
1617 auto conditionArg = node->block->addArgument(
1618 IntegerType::get(region.getContext(), 1), getLoc(slot));
1619 replaceValueWith(placeholder->getResult(0), conditionArg);
1620 pruner.eraseNow(placeholder);
1621 def->condition.setCondition(conditionArg);
1622 def->conditionIsPlaceholder = false;
1623 break;
1624 }
1625 }
1626 }
1627
1628 // Add successor operands to the predecessor terminators.
1629 for (auto *predecessor : node->predecessors) {
1630 // Collect the interesting reaching definitions in the predecessor.
1631 SmallVector<Value> args;
1632 for (auto [slot, which] : neededSlots) {
1633 auto *def = predecessor->valueBefore->reachingDefs.lookup(slot);
1634 auto builder = OpBuilder::atBlockTerminator(predecessor->block);
1635 switch (which) {
1636 case Which::Value:
1637 if (def) {
1638 args.push_back(def->getValueOrPlaceholder());
1639 } else {
1640 auto type = getStoredType(slot);
1641 auto flatType = builder.getIntegerType(hw::getBitWidth(type));
1642 Value value =
1643 builder.create<hw::ConstantOp>(getLoc(slot), flatType, 0);
1644 if (type != flatType)
1645 value = builder.create<hw::BitcastOp>(getLoc(slot), type, value);
1646 args.push_back(value);
1647 }
1648 break;
1649 case Which::Condition:
1650 if (def) {
1651 args.push_back(def->getConditionOrPlaceholder());
1652 } else {
1653 args.push_back(builder.create<hw::ConstantOp>(
1654 getLoc(slot), builder.getI1Type(), 0));
1655 }
1656 break;
1657 }
1658 }
1659
1660 // Add the reaching definitions to the branch op.
1661 auto branchOp = cast<BranchOpInterface>(predecessor->terminator);
1662 for (auto &blockOperand : branchOp->getBlockOperands())
1663 if (blockOperand.get() == node->block)
1664 branchOp.getSuccessorOperands(blockOperand.getOperandNumber())
1665 .append(args);
1666 }
1667
1668 return true;
1669}
1670
1671/// Replace all uses of an old value with a new value in the IR, and update all
1672/// mentions of the old value in the lattice to the new value.
1673void Promoter::replaceValueWith(Value oldValue, Value newValue) {
1674 oldValue.replaceAllUsesWith(newValue);
1675 for (auto *def : lattice.defs) {
1676 if (def->value == oldValue)
1677 def->value = newValue;
1678 if (def->condition.isConditional() &&
1679 def->condition.getCondition() == oldValue)
1680 def->condition.setCondition(newValue);
1681 }
1682}
1683
1684//===----------------------------------------------------------------------===//
1685// Pass Infrastructure
1686//===----------------------------------------------------------------------===//
1687
1688namespace {
1689struct Mem2RegPass : public llhd::impl::Mem2RegPassBase<Mem2RegPass> {
1690 void runOnOperation() override;
1691};
1692} // namespace
1693
1694void Mem2RegPass::runOnOperation() {
1695 SmallVector<Region *> regions;
1696 getOperation()->walk<WalkOrder::PreOrder>([&](Operation *op) {
1697 if (isa<ProcessOp, FinalOp>(op)) {
1698 auto &region = op->getRegion(0);
1699 if (!region.empty())
1700 regions.push_back(&region);
1701 return WalkResult::skip();
1702 }
1703 return WalkResult::advance();
1704 });
1705 for (auto *region : regions)
1706 if (failed(Promoter(*region).promote()))
1707 return signalPassFailure();
1708}
static bool isAlways(Attribute attr, bool expected)
Definition ArcFolds.cpp:22
static bool isLiveOut(Value val)
assert(baseType &&"element must be base type")
static void dump(DIModule &module, raw_indented_ostream &os)
static bool isDelayed(DefSlot slot)
Definition Mem2Reg.cpp:210
static Value packProjections(OpBuilder &builder, Value value, const ProjectionStack &projections)
Undo a stack of projections by taking the value of the projected field and injecting it into the surr...
Definition Mem2Reg.cpp:591
static Location getLoc(DefSlot slot)
Definition Mem2Reg.cpp:217
static bool isDeltaDrive(Operation *op)
Check whether an operation is a llhd.drive with a delta delay.
Definition Mem2Reg.cpp:64
static ProjectionStack getProjections(Value fromSignal, Value toSlot)
Collect the llhd.sig.
Definition Mem2Reg.cpp:550
static bool isDeltaDelay(Value value)
Check whether a value is defined by llhd.constant_time <0ns, 1d, 0e>.
Definition Mem2Reg.cpp:46
static DefSlot blockingSlot(Value slot)
Definition Mem2Reg.cpp:207
static Value unpackProjections(OpBuilder &builder, Value value, ProjectionStack &projections)
Resolve a stack of projections by taking a value and descending into its subelements until the final ...
Definition Mem2Reg.cpp:566
static DefSlot delayedSlot(Value slot)
Definition Mem2Reg.cpp:208
static Value getSlot(DefSlot slot)
Definition Mem2Reg.cpp:209
SmallVector< Projection > ProjectionStack
A stack of projection operations.
Definition Mem2Reg.cpp:543
static bool isBlockingDrive(Operation *op)
Check whether an operation is a llhd.drive with an epsilon delay.
Definition Mem2Reg.cpp:56
static Type getStoredType(Value slot)
Definition Mem2Reg.cpp:211
static bool isEpsilonDelay(Value value)
Check whether a value is defined by llhd.constant_time <0ns, 0d, 1e>.
Definition Mem2Reg.cpp:37
PointerIntPair< Value, 1 > DefSlot
The slot a reaching definition specifies a value for, alongside a bit indicating whether the definiti...
Definition Mem2Reg.cpp:206
static StringAttr append(StringAttr base, const Twine &suffix)
Return a attribute with the specified suffix appended.
create(data_type, value)
Definition hw.py:433
Direction get(bool isOutput)
Returns an output direction if isOutput is true, otherwise returns an input direction.
Definition CalyxOps.cpp:55
static bool operator==(const ModulePort &a, const ModulePort &b)
Definition HWTypes.h:35
The InstanceGraph op interface, see InstanceGraphInterface.td for more details.
bool operator!=(uint64_t a, const FVInt &b)
Definition FVInt.h:651
Utility that tracks operations that have potentially become unused and allows them to be cleaned up a...
static DriveCondition getEmptyKey()
Definition Mem2Reg.cpp:184
static DriveCondition getTombstoneKey()
Definition Mem2Reg.cpp:187
static bool isEqual(DriveCondition lhs, DriveCondition rhs)
Definition Mem2Reg.cpp:194
static unsigned getHashValue(DriveCondition d)
Definition Mem2Reg.cpp:190