CIRCT  20.0.0git
LowerState.cpp
Go to the documentation of this file.
1 //===- LowerState.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
13 #include "circt/Dialect/HW/HWOps.h"
18 #include "mlir/Analysis/TopologicalSortUtils.h"
19 #include "mlir/Dialect/Func/IR/FuncOps.h"
20 #include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
21 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
22 #include "mlir/Dialect/SCF/IR/SCF.h"
23 #include "mlir/IR/IRMapping.h"
24 #include "mlir/IR/ImplicitLocOpBuilder.h"
25 #include "mlir/IR/SymbolTable.h"
26 #include "mlir/Interfaces/SideEffectInterfaces.h"
27 #include "mlir/Pass/Pass.h"
28 #include "llvm/ADT/TypeSwitch.h"
29 #include "llvm/Support/Debug.h"
30 
31 #define DEBUG_TYPE "arc-lower-state"
32 
33 namespace circt {
34 namespace arc {
35 #define GEN_PASS_DEF_LOWERSTATEPASS
36 #include "circt/Dialect/Arc/ArcPasses.h.inc"
37 } // namespace arc
38 } // namespace circt
39 
40 using namespace circt;
41 using namespace arc;
42 using namespace hw;
43 using namespace mlir;
44 using llvm::SmallDenseSet;
45 
46 namespace {
47 enum class Phase { Initial, Old, New, Final };
48 
49 template <class OS>
50 OS &operator<<(OS &os, Phase phase) {
51  switch (phase) {
52  case Phase::Initial:
53  return os << "initial";
54  case Phase::Old:
55  return os << "old";
56  case Phase::New:
57  return os << "new";
58  case Phase::Final:
59  return os << "final";
60  }
61 }
62 
63 struct ModuleLowering;
64 
65 /// All state associated with lowering a single operation. Instances of this
66 /// struct are kept on a worklist to perform a depth-first traversal of the
67 /// module being lowered.
68 ///
69 /// The actual lowering occurs in `lower()`. This function is called exactly
70 /// twice. A first time with `initial` being true, where other values and
71 /// operations that have to be lowered first may be marked with `addPending`. No
72 /// actual lowering or error reporting should occur when `initial` is true. The
73 /// worklist then ensures that all `pending` ops are lowered before `lower()` is
74 /// called a second time with `initial` being false. At this point the actual
75 /// lowering and error reporting should occur.
76 ///
77 /// The `initial` variable is used to allow for a single block of code to mark
78 /// values and ops as dependencies and actually do the lowering based on them.
79 struct OpLowering {
80  Operation *op;
81  Phase phase;
82  ModuleLowering &module;
83 
84  bool initial = true;
85  SmallVector<std::pair<Operation *, Phase>, 2> pending;
86 
87  OpLowering(Operation *op, Phase phase, ModuleLowering &module)
88  : op(op), phase(phase), module(module) {}
89 
90  // Operation Lowering.
91  LogicalResult lower();
92  LogicalResult lowerDefault();
93  LogicalResult lower(StateOp op);
94  LogicalResult lower(sim::DPICallOp op);
95  LogicalResult
96  lowerStateful(Value clock, Value enable, Value reset, ValueRange inputs,
97  ResultRange results,
98  llvm::function_ref<ValueRange(ValueRange)> createMapping);
99  LogicalResult lower(MemoryOp op);
100  LogicalResult lower(TapOp op);
101  LogicalResult lower(InstanceOp op);
102  LogicalResult lower(hw::OutputOp op);
103  LogicalResult lower(seq::InitialOp op);
104  LogicalResult lower(llhd::FinalOp op);
105 
106  scf::IfOp createIfClockOp(Value clock);
107 
108  // Value Lowering. These functions are called from the `lower()` functions
109  // above. They handle values used by the `op`. This can generate reads from
110  // state and memory storage on-the-fly, or mark other ops as dependencies to
111  // be lowered first.
112  Value lowerValue(Value value, Phase phase);
113  Value lowerValue(InstanceOp op, OpResult result, Phase phase);
114  Value lowerValue(StateOp op, OpResult result, Phase phase);
115  Value lowerValue(sim::DPICallOp op, OpResult result, Phase phase);
116  Value lowerValue(MemoryReadPortOp op, OpResult result, Phase phase);
117  Value lowerValue(seq::InitialOp op, OpResult result, Phase phase);
118  Value lowerValue(seq::FromImmutableOp op, OpResult result, Phase phase);
119 
120  void addPending(Value value, Phase phase);
121  void addPending(Operation *op, Phase phase);
122 };
123 
124 /// All state associated with lowering a single module.
125 struct ModuleLowering {
126  /// The module being lowered.
127  HWModuleOp moduleOp;
128  /// The builder for the main body of the model.
129  OpBuilder builder;
130  /// The builder for state allocation ops.
131  OpBuilder allocBuilder;
132  /// The builder for the initial phase.
133  OpBuilder initialBuilder;
134  /// The builder for the final phase.
135  OpBuilder finalBuilder;
136 
137  /// The storage value that can be used for `arc.alloc_state` and friends.
138  Value storageArg;
139 
140  /// A worklist of pending op lowerings.
141  SmallVector<OpLowering> opsWorklist;
142  /// The set of ops currently in the worklist. Used to detect cycles.
143  SmallDenseSet<std::pair<Operation *, Phase>> opsSeen;
144  /// The ops that have already been lowered.
145  DenseSet<std::pair<Operation *, Phase>> loweredOps;
146  /// The values that have already been lowered.
147  DenseMap<std::pair<Value, Phase>, Value> loweredValues;
148 
149  /// The allocated input ports.
150  SmallVector<Value> allocatedInputs;
151  /// The allocated states as a mapping from op results to `arc.alloc_state`
152  /// results.
153  DenseMap<Value, Value> allocatedStates;
154  /// The allocated storage for instance inputs and top module outputs.
155  DenseMap<OpOperand *, Value> allocatedOutputs;
156  /// The allocated storage for values computed during the initial phase.
157  DenseMap<Value, Value> allocatedInitials;
158  /// The allocated storage for taps.
159  DenseMap<Operation *, Value> allocatedTaps;
160 
161  /// A mapping from unlowered clocks to a value indicating a posedge. This is
162  /// used to not create an excessive number of posedge detectors.
163  DenseMap<Value, Value> loweredPosedges;
164  /// The previous enable and the value it was lowered to. This is used to reuse
165  /// previous if ops for the same enable value.
166  std::pair<Value, Value> prevEnable;
167  /// The previous reset and the value it was lowered to. This is used to reuse
168  /// previous if ops for the same reset value.
169  std::pair<Value, Value> prevReset;
170 
171  ModuleLowering(HWModuleOp moduleOp)
172  : moduleOp(moduleOp), builder(moduleOp), allocBuilder(moduleOp),
173  initialBuilder(moduleOp), finalBuilder(moduleOp) {}
174  LogicalResult run();
175  LogicalResult lowerOp(Operation *op);
176  Value getAllocatedState(OpResult result);
177  Value detectPosedge(Value clock);
178  OpBuilder &getBuilder(Phase phase);
179  Value requireLoweredValue(Value value, Phase phase, Location useLoc);
180 };
181 } // namespace
182 
183 //===----------------------------------------------------------------------===//
184 // Module Lowering
185 //===----------------------------------------------------------------------===//
186 
187 LogicalResult ModuleLowering::run() {
188  LLVM_DEBUG(llvm::dbgs() << "Lowering module `" << moduleOp.getModuleName()
189  << "`\n");
190 
191  // Create the replacement `ModelOp`.
192  auto modelOp =
193  builder.create<ModelOp>(moduleOp.getLoc(), moduleOp.getModuleNameAttr(),
194  TypeAttr::get(moduleOp.getModuleType()),
195  FlatSymbolRefAttr{}, FlatSymbolRefAttr{});
196  auto &modelBlock = modelOp.getBody().emplaceBlock();
197  storageArg = modelBlock.addArgument(
198  StorageType::get(builder.getContext(), {}), modelOp.getLoc());
199  builder.setInsertionPointToStart(&modelBlock);
200 
201  // Create the `arc.initial` op to contain the ops for the initialization
202  // phase.
203  auto initialOp = builder.create<InitialOp>(moduleOp.getLoc());
204  initialBuilder.setInsertionPointToStart(&initialOp.getBody().emplaceBlock());
205 
206  // Create the `arc.final` op to contain the ops for the finalization phase.
207  auto finalOp = builder.create<FinalOp>(moduleOp.getLoc());
208  finalBuilder.setInsertionPointToStart(&finalOp.getBody().emplaceBlock());
209 
210  // Position the alloc builder such that allocation ops get inserted above the
211  // initial op.
212  allocBuilder.setInsertionPoint(initialOp);
213 
214  // Allocate storage for the inputs.
215  for (auto arg : moduleOp.getBodyBlock()->getArguments()) {
216  auto name = moduleOp.getArgName(arg.getArgNumber());
217  auto state = allocBuilder.create<RootInputOp>(
218  arg.getLoc(), StateType::get(arg.getType()), name, storageArg);
219  allocatedInputs.push_back(state);
220  }
221 
222  // Lower the ops.
223  for (auto &op : moduleOp.getOps()) {
224  if (mlir::isMemoryEffectFree(&op) && !isa<hw::OutputOp>(op))
225  continue;
226  if (isa<MemoryReadPortOp, MemoryWritePortOp>(op))
227  continue; // handled as part of `MemoryOp`
228  if (failed(lowerOp(&op)))
229  return failure();
230  }
231 
232  // Clean up any dead ops. The lowering inserts a few defensive
233  // `arc.state_read` ops that may remain unused. This cleans them up.
234  for (auto &op : llvm::make_early_inc_range(llvm::reverse(modelBlock)))
235  if (mlir::isOpTriviallyDead(&op))
236  op.erase();
237 
238  return success();
239 }
240 
241 /// Lower an op and its entire fan-in cone.
242 LogicalResult ModuleLowering::lowerOp(Operation *op) {
243  LLVM_DEBUG(llvm::dbgs() << "- Handling " << *op << "\n");
244 
245  // Pick in which phases the given operation has to perform some work.
246  SmallVector<Phase, 2> phases = {Phase::New};
247  if (isa<seq::InitialOp>(op))
248  phases = {Phase::Initial};
249  if (isa<llhd::FinalOp>(op))
250  phases = {Phase::Final};
251  if (isa<StateOp>(op))
252  phases = {Phase::Initial, Phase::New};
253 
254  for (auto phase : phases) {
255  if (loweredOps.contains({op, phase}))
256  return success();
257  opsWorklist.push_back(OpLowering(op, phase, *this));
258  opsSeen.insert({op, phase});
259  }
260 
261  auto dumpWorklist = [&] {
262  for (auto &opLowering : llvm::reverse(opsWorklist))
263  opLowering.op->emitRemark()
264  << "computing " << opLowering.phase << " phase here";
265  };
266 
267  while (!opsWorklist.empty()) {
268  auto &opLowering = opsWorklist.back();
269 
270  // Collect an initial list of operands that need to be lowered.
271  if (opLowering.initial) {
272  if (failed(opLowering.lower())) {
273  dumpWorklist();
274  return failure();
275  }
276  std::reverse(opLowering.pending.begin(), opLowering.pending.end());
277  opLowering.initial = false;
278  }
279 
280  // Push operands onto the worklist.
281  if (!opLowering.pending.empty()) {
282  auto [defOp, phase] = opLowering.pending.pop_back_val();
283  if (loweredOps.contains({defOp, phase}))
284  continue;
285  if (!opsSeen.insert({defOp, phase}).second) {
286  defOp->emitOpError("is on a combinational loop");
287  dumpWorklist();
288  return failure();
289  }
290  opsWorklist.push_back(OpLowering(defOp, phase, *this));
291  continue;
292  }
293 
294  // At this point all operands are available and the op itself can be
295  // lowered.
296  LLVM_DEBUG(llvm::dbgs() << " - Lowering " << opLowering.phase << " "
297  << *opLowering.op << "\n");
298  if (failed(opLowering.lower())) {
299  dumpWorklist();
300  return failure();
301  }
302  loweredOps.insert({opLowering.op, opLowering.phase});
303  opsSeen.erase({opLowering.op, opLowering.phase});
304  opsWorklist.pop_back();
305  }
306 
307  return success();
308 }
309 
310 /// Return the `arc.alloc_state` associated with the given state op result.
311 /// Creates the allocation op if it does not yet exist.
312 Value ModuleLowering::getAllocatedState(OpResult result) {
313  if (auto alloc = allocatedStates.lookup(result))
314  return alloc;
315 
316  // Handle memories.
317  if (auto memOp = dyn_cast<MemoryOp>(result.getOwner())) {
318  auto alloc = allocBuilder.create<AllocMemoryOp>(
319  memOp.getLoc(), memOp.getType(), storageArg, memOp->getAttrs());
320  allocatedStates.insert({result, alloc});
321  return alloc;
322  }
323 
324  // Create the allocation op.
325  auto alloc = allocBuilder.create<AllocStateOp>(
326  result.getLoc(), StateType::get(result.getType()), storageArg);
327  allocatedStates.insert({result, alloc});
328 
329  // HACK: If the result comes from an instance op, add the instance and port
330  // name as an attribute to the allocation. This will make it show up in the C
331  // headers later. Get rid of this once we have proper debug dialect support.
332  if (auto instOp = dyn_cast<InstanceOp>(result.getOwner()))
333  alloc->setAttr(
334  "name", builder.getStringAttr(
335  instOp.getInstanceName() + "/" +
336  instOp.getOutputName(result.getResultNumber()).getValue()));
337 
338  // HACK: If the result comes from an op that has a "names" attribute, use that
339  // as a name for the allocation. This should no longer be necessary once we
340  // properly support the Debug dialect.
341  if (isa<StateOp, sim::DPICallOp>(result.getOwner()))
342  if (auto names = result.getOwner()->getAttrOfType<ArrayAttr>("names"))
343  if (result.getResultNumber() < names.size())
344  alloc->setAttr("name", names[result.getResultNumber()]);
345 
346  return alloc;
347 }
348 
349 /// Allocate the necessary storage, reads, writes, and comparisons to detect a
350 /// rising edge on a clock value.
351 Value ModuleLowering::detectPosedge(Value clock) {
352  auto loc = clock.getLoc();
353  if (isa<seq::ClockType>(clock.getType()))
354  clock = builder.create<seq::FromClockOp>(loc, clock);
355 
356  // Allocate storage to store the previous clock value.
357  auto oldStorage = allocBuilder.create<AllocStateOp>(
358  loc, StateType::get(builder.getI1Type()), storageArg);
359 
360  // Read the old clock value from storage and write the new clock value to
361  // storage.
362  auto oldClock = builder.create<StateReadOp>(loc, oldStorage);
363  builder.create<StateWriteOp>(loc, oldStorage, clock, Value{});
364 
365  // Detect a rising edge.
366  auto edge = builder.create<comb::XorOp>(loc, oldClock, clock);
367  return builder.create<comb::AndOp>(loc, edge, clock);
368 }
369 
370 /// Get the builder appropriate for the given phase.
371 OpBuilder &ModuleLowering::getBuilder(Phase phase) {
372  switch (phase) {
373  case Phase::Initial:
374  return initialBuilder;
375  case Phase::Old:
376  case Phase::New:
377  return builder;
378  case Phase::Final:
379  return finalBuilder;
380  }
381 }
382 
383 /// Get the lowered value, or emit a diagnostic and return null.
384 Value ModuleLowering::requireLoweredValue(Value value, Phase phase,
385  Location useLoc) {
386  if (auto lowered = loweredValues.lookup({value, phase}))
387  return lowered;
388  auto d = emitError(value.getLoc()) << "value has not been lowered";
389  d.attachNote(useLoc) << "value used here";
390  return {};
391 }
392 
393 //===----------------------------------------------------------------------===//
394 // Operation Lowering
395 //===----------------------------------------------------------------------===//
396 
397 /// Create a new `scf.if` operation with the given builder, or reuse a previous
398 /// `scf.if` if the builder's insertion point is located right after it.
399 static scf::IfOp createOrReuseIf(OpBuilder &builder, Value condition,
400  bool withElse) {
401  if (auto ip = builder.getInsertionPoint(); ip != builder.getBlock()->begin())
402  if (auto ifOp = dyn_cast<scf::IfOp>(*std::prev(ip)))
403  if (ifOp.getCondition() == condition)
404  return ifOp;
405  return builder.create<scf::IfOp>(condition.getLoc(), condition, withElse);
406 }
407 
408 /// This function is called from the lowering worklist in order to perform a
409 /// depth-first traversal of the surrounding module. These functions call
410 /// `lowerValue` to mark their operands as dependencies in the depth-first
411 /// traversal, and to map them to the lowered value in one go.
412 LogicalResult OpLowering::lower() {
413  return TypeSwitch<Operation *, LogicalResult>(op)
414  // Operations with special lowering.
415  .Case<StateOp, sim::DPICallOp, MemoryOp, TapOp, InstanceOp, hw::OutputOp,
416  seq::InitialOp, llhd::FinalOp>([&](auto op) { return lower(op); })
417 
418  // Operations that should be skipped entirely and never land on the
419  // worklist to be lowered.
420  .Case<MemoryWritePortOp, MemoryReadPortOp>([&](auto op) {
421  assert(false && "ports must be lowered by memory op");
422  return failure();
423  })
424 
425  // All other ops are simply cloned into the lowered model.
426  .Default([&](auto) { return lowerDefault(); });
427 }
428 
429 /// Called for all operations for which there is no special lowering. Simply
430 /// clones the operation.
431 LogicalResult OpLowering::lowerDefault() {
432  // Make sure that all operand values are lowered first.
433  IRMapping mapping;
434  auto anyFailed = false;
435  op->walk([&](Operation *nestedOp) {
436  for (auto operand : nestedOp->getOperands()) {
437  if (op->isAncestor(operand.getParentBlock()->getParentOp()))
438  continue;
439  auto lowered = lowerValue(operand, phase);
440  if (!lowered)
441  anyFailed = true;
442  mapping.map(operand, lowered);
443  }
444  });
445  if (initial)
446  return success();
447  if (anyFailed)
448  return failure();
449 
450  // Clone the operation.
451  auto *clonedOp = module.getBuilder(phase).clone(*op, mapping);
452 
453  // Keep track of the results.
454  for (auto [oldResult, newResult] :
455  llvm::zip(op->getResults(), clonedOp->getResults()))
456  module.loweredValues[{oldResult, phase}] = newResult;
457 
458  return success();
459 }
460 
461 /// Lower a state to a corresponding storage allocation and `write` of the
462 /// state's new value to it. This function uses the `Old` phase to get the
463 /// values at the state input before the current update, and then uses them to
464 /// compute the `New` value.
465 LogicalResult OpLowering::lower(StateOp op) {
466  // Handle initialization.
467  if (phase == Phase::Initial) {
468  // Ensure the initial values of the register have been lowered before.
469  if (initial) {
470  for (auto initial : op.getInitials())
471  lowerValue(initial, Phase::Initial);
472  return success();
473  }
474 
475  // Write the initial values to the allocated storage in the initial block.
476  if (op.getInitials().empty())
477  return success();
478  for (auto [initial, result] :
479  llvm::zip(op.getInitials(), op.getResults())) {
480  auto value = lowerValue(initial, Phase::Initial);
481  if (!value)
482  return failure();
483  auto state = module.getAllocatedState(result);
484  if (!state)
485  return failure();
486  module.initialBuilder.create<StateWriteOp>(value.getLoc(), state, value,
487  Value{});
488  }
489  return success();
490  }
491 
492  assert(phase == Phase::New);
493 
494  if (!initial) {
495  if (!op.getClock())
496  return op.emitOpError() << "must have a clock";
497  if (op.getLatency() > 1)
498  return op.emitOpError("latencies > 1 not supported yet");
499  }
500 
501  return lowerStateful(op.getClock(), op.getEnable(), op.getReset(),
502  op.getInputs(), op.getResults(), [&](ValueRange inputs) {
503  return module.builder
504  .create<CallOp>(op.getLoc(), op.getResultTypes(),
505  op.getArc(), inputs)
506  .getResults();
507  });
508 }
509 
510 /// Lower a DPI call to a corresponding storage allocation and write of the
511 /// state's new value to it. This function uses the `Old` phase to get the
512 /// values at the state input before the current update, and then uses them to
513 /// compute the `New` value.
514 LogicalResult OpLowering::lower(sim::DPICallOp op) {
515  // Handle unclocked DPI calls.
516  if (!op.getClock()) {
517  // Make sure that all operands have been lowered.
518  SmallVector<Value> inputs;
519  for (auto operand : op.getInputs())
520  inputs.push_back(lowerValue(operand, phase));
521  if (initial)
522  return success();
523  if (llvm::is_contained(inputs, Value{}))
524  return failure();
525  if (op.getEnable())
526  return op.emitOpError() << "without clock cannot have an enable";
527 
528  // Lower the op to a regular function call.
529  auto callOp = module.getBuilder(phase).create<func::CallOp>(
530  op.getLoc(), op.getCalleeAttr(), op.getResultTypes(), inputs);
531  for (auto [oldResult, newResult] :
532  llvm::zip(op.getResults(), callOp.getResults()))
533  module.loweredValues[{oldResult, phase}] = newResult;
534  return success();
535  }
536 
537  assert(phase == Phase::New);
538 
539  return lowerStateful(op.getClock(), op.getEnable(), /*reset=*/{},
540  op.getInputs(), op.getResults(), [&](ValueRange inputs) {
541  return module.builder
542  .create<func::CallOp>(op.getLoc(),
543  op.getCalleeAttr(),
544  op.getResultTypes(), inputs)
545  .getResults();
546  });
547 }
548 
549 /// Lower a state to a corresponding storage allocation and `write` of the
550 /// state's new value to it. This function uses the `Old` phase to get the
551 /// values at the state input before the current update, and then uses them to
552 /// compute the `New` value.
553 LogicalResult OpLowering::lowerStateful(
554  Value clock, Value enable, Value reset, ValueRange inputs,
555  ResultRange results,
556  llvm::function_ref<ValueRange(ValueRange)> createMapping) {
557  // Ensure all operands are lowered before we lower the op itself. State ops
558  // are special in that they require the "old" value of their inputs and
559  // enable, in order to compute the updated "new" value. The clock needs to be
560  // the "new" value though, such that other states can act as a clock source.
561  if (initial) {
562  lowerValue(clock, Phase::New);
563  if (enable)
564  lowerValue(enable, Phase::Old);
565  if (reset)
566  lowerValue(reset, Phase::Old);
567  for (auto input : inputs)
568  lowerValue(input, Phase::Old);
569  return success();
570  }
571 
572  // Check if we're inserting right after an `if` op for the same clock edge, in
573  // which case we can reuse that op. Otherwise, create the new `if` op.
574  auto ifClockOp = createIfClockOp(clock);
575  if (!ifClockOp)
576  return failure();
577  OpBuilder::InsertionGuard guard(module.builder);
578  module.builder.setInsertionPoint(ifClockOp.thenYield());
579 
580  // Make sure we have the state storage available such that we can read and
581  // write from and to them.
582  SmallVector<Value> states;
583  for (auto result : results) {
584  auto state = module.getAllocatedState(result);
585  if (!state)
586  return failure();
587  states.push_back(state);
588  }
589 
590  // Handle the reset.
591  if (reset) {
592  // Check if we can reuse a previous reset value.
593  auto &[unloweredReset, loweredReset] = module.prevReset;
594  if (unloweredReset != reset ||
595  loweredReset.getParentBlock() != module.builder.getBlock()) {
596  unloweredReset = reset;
597  loweredReset = lowerValue(reset, Phase::Old);
598  if (!loweredReset)
599  return failure();
600  }
601 
602  // Check if we're inserting right after an if op for the same reset, in
603  // which case we can reuse that op. Otherwise create the new if op.
604  auto ifResetOp = createOrReuseIf(module.builder, loweredReset, true);
605  module.builder.setInsertionPoint(ifResetOp.thenYield());
606 
607  // Generate the zero value writes.
608  for (auto state : states) {
609  auto type = cast<StateType>(state.getType()).getType();
610  Value value = module.builder.create<ConstantOp>(
611  loweredReset.getLoc(),
612  module.builder.getIntegerType(hw::getBitWidth(type)), 0);
613  if (value.getType() != type)
614  value = module.builder.create<BitcastOp>(loweredReset.getLoc(), type,
615  value);
616  module.builder.create<StateWriteOp>(loweredReset.getLoc(), state, value,
617  Value{});
618  }
619  module.builder.setInsertionPoint(ifResetOp.elseYield());
620  }
621 
622  // Handle the enable.
623  if (enable) {
624  // Check if we can reuse a previous enable value.
625  auto &[unloweredEnable, loweredEnable] = module.prevEnable;
626  if (unloweredEnable != enable ||
627  loweredEnable.getParentBlock() != module.builder.getBlock()) {
628  unloweredEnable = enable;
629  loweredEnable = lowerValue(enable, Phase::Old);
630  if (!loweredEnable)
631  return failure();
632  }
633 
634  // Check if we're inserting right after an if op for the same enable, in
635  // which case we can reuse that op. Otherwise create the new if op.
636  auto ifEnableOp = createOrReuseIf(module.builder, loweredEnable, false);
637  module.builder.setInsertionPoint(ifEnableOp.thenYield());
638  }
639 
640  // Get the transfer function inputs. This potentially inserts read ops.
641  SmallVector<Value> loweredInputs;
642  for (auto input : inputs) {
643  auto lowered = lowerValue(input, Phase::Old);
644  if (!lowered)
645  return failure();
646  loweredInputs.push_back(lowered);
647  }
648 
649  // Compute the transfer function and write its results to the state's storage.
650  auto loweredResults = createMapping(loweredInputs);
651  for (auto [state, value] : llvm::zip(states, loweredResults))
652  module.builder.create<StateWriteOp>(value.getLoc(), state, value, Value{});
653 
654  // Since we just wrote the new state value to storage, insert read ops just
655  // before the if op that keep the old value around for any later ops that
656  // still need it.
657  module.builder.setInsertionPoint(ifClockOp);
658  for (auto [state, result] : llvm::zip(states, results)) {
659  auto oldValue = module.builder.create<StateReadOp>(result.getLoc(), state);
660  module.loweredValues[{result, Phase::Old}] = oldValue;
661  }
662 
663  return success();
664 }
665 
666 /// Lower a memory and its read and write ports to corresponding
667 /// `arc.memory_write` operations. Reads are also executed at this point and
668 /// stored in `loweredValues` for later operations to pick up.
669 LogicalResult OpLowering::lower(MemoryOp op) {
670  assert(phase == Phase::New);
671 
672  // Collect all the reads and writes.
673  SmallVector<MemoryReadPortOp> reads;
674  SmallVector<MemoryWritePortOp> writes;
675 
676  for (auto *user : op->getUsers()) {
677  if (auto read = dyn_cast<MemoryReadPortOp>(user)) {
678  reads.push_back(read);
679  } else if (auto write = dyn_cast<MemoryWritePortOp>(user)) {
680  writes.push_back(write);
681  } else {
682  auto d = op.emitOpError()
683  << "users must all be memory read or write port ops";
684  d.attachNote(user->getLoc())
685  << "but found " << user->getName() << " user here";
686  return d;
687  }
688  }
689 
690  // Ensure all operands are lowered before we lower the memory itself.
691  if (initial) {
692  for (auto read : reads)
693  lowerValue(read, Phase::Old);
694  for (auto write : writes) {
695  if (write.getClock())
696  lowerValue(write.getClock(), Phase::New);
697  for (auto input : write.getInputs())
698  lowerValue(input, Phase::Old);
699  }
700  return success();
701  }
702 
703  // Get the allocated storage for the memory.
704  auto state = module.getAllocatedState(op->getResult(0));
705 
706  // Since we are going to write new values into storage, insert read ops that
707  // keep the old values around for any later ops that still need them.
708  for (auto read : reads) {
709  auto oldValue = lowerValue(read, Phase::Old);
710  if (!oldValue)
711  return failure();
712  module.loweredValues[{read, Phase::Old}] = oldValue;
713  }
714 
715  // Lower the writes.
716  for (auto write : writes) {
717  if (!write.getClock())
718  return write.emitOpError() << "must have a clock";
719  if (write.getLatency() > 1)
720  return write.emitOpError("latencies > 1 not supported yet");
721 
722  // Create the if op for the clock edge.
723  auto ifClockOp = createIfClockOp(write.getClock());
724  if (!ifClockOp)
725  return failure();
726  OpBuilder::InsertionGuard guard(module.builder);
727  module.builder.setInsertionPoint(ifClockOp.thenYield());
728 
729  // Call the arc that computes the address, data, and enable.
730  SmallVector<Value> inputs;
731  for (auto input : write.getInputs()) {
732  auto lowered = lowerValue(input, Phase::Old);
733  if (!lowered)
734  return failure();
735  inputs.push_back(lowered);
736  }
737  auto callOp = module.builder.create<CallOp>(
738  write.getLoc(), write.getArcResultTypes(), write.getArc(), inputs);
739 
740  // If the write has an enable, wrap the remaining logic in an if op.
741  if (write.getEnable()) {
742  auto ifEnableOp = createOrReuseIf(
743  module.builder, callOp.getResult(write.getEnableIdx()), false);
744  module.builder.setInsertionPoint(ifEnableOp.thenYield());
745  }
746 
747  // If the write is masked, read the current
748  // value in the memory and merge it with the updated value.
749  auto address = callOp.getResult(write.getAddressIdx());
750  auto data = callOp.getResult(write.getDataIdx());
751  if (write.getMask()) {
752  auto mask = callOp.getResult(write.getMaskIdx(write.getEnable()));
753  auto maskInv = module.builder.createOrFold<comb::XorOp>(
754  write.getLoc(), mask,
755  module.builder.create<ConstantOp>(write.getLoc(), mask.getType(), -1),
756  true);
757  auto oldData =
758  module.builder.create<MemoryReadOp>(write.getLoc(), state, address);
759  auto oldMasked = module.builder.create<comb::AndOp>(
760  write.getLoc(), maskInv, oldData, true);
761  auto newMasked =
762  module.builder.create<comb::AndOp>(write.getLoc(), mask, data, true);
763  data = module.builder.create<comb::OrOp>(write.getLoc(), oldMasked,
764  newMasked, true);
765  }
766 
767  // Actually write to the memory.
768  module.builder.create<MemoryWriteOp>(write.getLoc(), state, address,
769  Value{}, data);
770  }
771 
772  return success();
773 }
774 
775 /// Lower a tap by allocating state storage for it and writing the current value
776 /// observed by the tap to it.
777 LogicalResult OpLowering::lower(TapOp op) {
778  assert(phase == Phase::New);
779 
780  auto value = lowerValue(op.getValue(), phase);
781  if (initial)
782  return success();
783  if (!value)
784  return failure();
785 
786  auto &state = module.allocatedTaps[op];
787  if (!state) {
788  auto alloc = module.allocBuilder.create<AllocStateOp>(
789  op.getLoc(), StateType::get(value.getType()), module.storageArg, true);
790  alloc->setAttr("name", op.getNameAttr());
791  state = alloc;
792  }
793  module.builder.create<StateWriteOp>(op.getLoc(), state, value, Value{});
794  return success();
795 }
796 
797 /// Lower an instance by allocating state storage for each of its inputs and
798 /// writing the current value into that storage. This makes instance inputs
799 /// behave like outputs of the top-level module.
800 LogicalResult OpLowering::lower(InstanceOp op) {
801  assert(phase == Phase::New);
802 
803  // Get the current values flowing into the instance's inputs.
804  SmallVector<Value> values;
805  for (auto operand : op.getOperands())
806  values.push_back(lowerValue(operand, Phase::New));
807  if (initial)
808  return success();
809  if (llvm::is_contained(values, Value{}))
810  return failure();
811 
812  // Then allocate storage for each instance input and assign the corresponding
813  // value.
814  for (auto [value, name] : llvm::zip(values, op.getArgNames())) {
815  auto state = module.allocBuilder.create<AllocStateOp>(
816  value.getLoc(), StateType::get(value.getType()), module.storageArg);
817  state->setAttr("name", module.builder.getStringAttr(
818  op.getInstanceName() + "/" +
819  cast<StringAttr>(name).getValue()));
820  module.builder.create<StateWriteOp>(value.getLoc(), state, value, Value{});
821  }
822 
823  // HACK: Also ensure that storage has been allocated for all outputs.
824  // Otherwise only the actually used instance outputs would be allocated, which
825  // would make the optimization user-visible. Remove this once we use the debug
826  // dialect.
827  for (auto result : op.getResults())
828  module.getAllocatedState(result);
829 
830  return success();
831 }
832 
833 /// Lower the main module's outputs by allocating storage for each and then
834 /// writing the current value into that storage.
835 LogicalResult OpLowering::lower(hw::OutputOp op) {
836  assert(phase == Phase::New);
837 
838  // First get the current value of all outputs.
839  SmallVector<Value> values;
840  for (auto operand : op.getOperands())
841  values.push_back(lowerValue(operand, Phase::New));
842  if (initial)
843  return success();
844  if (llvm::is_contained(values, Value{}))
845  return failure();
846 
847  // Then allocate storage for each output and assign the corresponding value.
848  for (auto [value, name] :
849  llvm::zip(values, module.moduleOp.getOutputNames())) {
850  auto state = module.allocBuilder.create<RootOutputOp>(
851  value.getLoc(), StateType::get(value.getType()), cast<StringAttr>(name),
852  module.storageArg);
853  module.builder.create<StateWriteOp>(value.getLoc(), state, value, Value{});
854  }
855  return success();
856 }
857 
858 /// Lower `seq.initial` ops by inlining them into the `arc.initial` op.
859 LogicalResult OpLowering::lower(seq::InitialOp op) {
860  assert(phase == Phase::Initial);
861 
862  // First get the initial value of all operands.
863  SmallVector<Value> operands;
864  for (auto operand : op.getOperands())
865  operands.push_back(lowerValue(operand, Phase::Initial));
866  if (initial)
867  return success();
868  if (llvm::is_contained(operands, Value{}))
869  return failure();
870 
871  // Expose the `seq.initial` operands as values for the block arguments.
872  for (auto [arg, operand] : llvm::zip(op.getBody().getArguments(), operands))
873  module.loweredValues[{arg, Phase::Initial}] = operand;
874 
875  // Lower each op in the body.
876  for (auto &bodyOp : op.getOps()) {
877  if (isa<seq::YieldOp>(bodyOp))
878  continue;
879 
880  // Clone the operation.
881  auto *clonedOp = module.initialBuilder.clone(bodyOp);
882  auto result = clonedOp->walk([&](Operation *nestedClonedOp) {
883  for (auto &operand : nestedClonedOp->getOpOperands()) {
884  if (clonedOp->isAncestor(operand.get().getParentBlock()->getParentOp()))
885  continue;
886  auto value = module.requireLoweredValue(operand.get(), Phase::Initial,
887  nestedClonedOp->getLoc());
888  if (!value)
889  return WalkResult::interrupt();
890  operand.set(value);
891  }
892  return WalkResult::advance();
893  });
894  if (result.wasInterrupted())
895  return failure();
896 
897  // Keep track of the results.
898  for (auto [result, lowered] :
899  llvm::zip(bodyOp.getResults(), clonedOp->getResults()))
900  module.loweredValues[{result, Phase::Initial}] = lowered;
901  }
902 
903  // Expose the operands of `seq.yield` as results from the initial op.
904  auto *terminator = op.getBodyBlock()->getTerminator();
905  for (auto [result, operand] :
906  llvm::zip(op.getResults(), terminator->getOperands())) {
907  auto value = module.requireLoweredValue(operand, Phase::Initial,
908  terminator->getLoc());
909  if (!value)
910  return failure();
911  module.loweredValues[{result, Phase::Initial}] = value;
912  }
913 
914  return success();
915 }
916 
917 /// Lower `llhd.final` ops into `scf.execute_region` ops in the `arc.final` op.
918 LogicalResult OpLowering::lower(llhd::FinalOp op) {
919  assert(phase == Phase::Final);
920 
921  // Determine the uses of values defined outside the op.
922  SmallVector<Value> externalOperands;
923  op.walk([&](Operation *nestedOp) {
924  for (auto value : nestedOp->getOperands())
925  if (!op->isAncestor(value.getParentBlock()->getParentOp()))
926  externalOperands.push_back(value);
927  });
928 
929  // Make sure that all uses of external values are lowered first.
930  IRMapping mapping;
931  for (auto operand : externalOperands) {
932  auto lowered = lowerValue(operand, Phase::Final);
933  if (!initial && !lowered)
934  return failure();
935  mapping.map(operand, lowered);
936  }
937  if (initial)
938  return success();
939 
940  // Handle the simple case where the final op contains only one block, which we
941  // can inline directly.
942  if (op.getBody().hasOneBlock()) {
943  for (auto &bodyOp : op.getBody().front().without_terminator())
944  module.finalBuilder.clone(bodyOp, mapping);
945  return success();
946  }
947 
948  // Create a new `scf.execute_region` op and clone the entire `llhd.final` body
949  // region into it. Replace `llhd.halt` ops with `scf.yield`.
950  auto executeOp = module.finalBuilder.create<scf::ExecuteRegionOp>(
951  op.getLoc(), TypeRange{});
952  module.finalBuilder.cloneRegionBefore(op.getBody(), executeOp.getRegion(),
953  executeOp.getRegion().begin(), mapping);
954  executeOp.walk([&](llhd::HaltOp op) {
955  OpBuilder(op).create<scf::YieldOp>(op.getLoc());
956  op.erase();
957  });
958 
959  return success();
960 }
961 
962 /// Create the operations necessary to detect a posedge on the given clock,
963 /// potentially reusing a previous posedge detection, and create an `scf.if`
964 /// operation for that posedge. This also tries to reuse an `scf.if` operation
965 /// immediately before the builder's insertion point if possible.
966 scf::IfOp OpLowering::createIfClockOp(Value clock) {
967  auto &posedge = module.loweredPosedges[clock];
968  if (!posedge) {
969  auto loweredClock = lowerValue(clock, Phase::New);
970  if (!loweredClock)
971  return {};
972  posedge = module.detectPosedge(loweredClock);
973  }
974  return createOrReuseIf(module.builder, posedge, false);
975 }
976 
977 //===----------------------------------------------------------------------===//
978 // Value Lowering
979 //===----------------------------------------------------------------------===//
980 
981 /// Lower a value being used by the current operation. This will mark the
982 /// defining operation as to be lowered first (through `addPending`) in most
983 /// cases. Some operations and values have special handling though. For example,
984 /// states and memory reads are immediately materialized as a new read op.
985 Value OpLowering::lowerValue(Value value, Phase phase) {
986  // Handle module inputs. They read the same in all phases.
987  if (auto arg = dyn_cast<BlockArgument>(value)) {
988  if (initial)
989  return {};
990  auto state = module.allocatedInputs[arg.getArgNumber()];
991  return module.getBuilder(phase).create<StateReadOp>(arg.getLoc(), state);
992  }
993 
994  // Check if the value has already been lowered.
995  if (auto lowered = module.loweredValues.lookup({value, phase}))
996  return lowered;
997 
998  // At this point the value is the result of an op. (Block arguments are
999  // handled above.)
1000  auto result = cast<OpResult>(value);
1001  auto *op = result.getOwner();
1002 
1003  // Special handling for some ops.
1004  if (auto instOp = dyn_cast<InstanceOp>(op))
1005  return lowerValue(instOp, result, phase);
1006  if (auto stateOp = dyn_cast<StateOp>(op))
1007  return lowerValue(stateOp, result, phase);
1008  if (auto dpiOp = dyn_cast<sim::DPICallOp>(op); dpiOp && dpiOp.getClock())
1009  return lowerValue(dpiOp, result, phase);
1010  if (auto readOp = dyn_cast<MemoryReadPortOp>(op))
1011  return lowerValue(readOp, result, phase);
1012  if (auto initialOp = dyn_cast<seq::InitialOp>(op))
1013  return lowerValue(initialOp, result, phase);
1014  if (auto castOp = dyn_cast<seq::FromImmutableOp>(op))
1015  return lowerValue(castOp, result, phase);
1016 
1017  // Otherwise we mark the defining operation as to be lowered first. This will
1018  // cause the lookup in `loweredValues` above to return a value the next time
1019  // (i.e. when initial is false).
1020  if (initial) {
1021  addPending(op, phase);
1022  return {};
1023  }
1024  emitError(result.getLoc()) << "value has not been lowered";
1025  return {};
1026 }
1027 
1028 /// Handle instance outputs. They behave essentially like a top-level module
1029 /// input, and read the same in all phases.
1030 Value OpLowering::lowerValue(InstanceOp op, OpResult result, Phase phase) {
1031  if (initial)
1032  return {};
1033  auto state = module.getAllocatedState(result);
1034  return module.getBuilder(phase).create<StateReadOp>(result.getLoc(), state);
1035 }
1036 
1037 /// Handle uses of a state. This creates an `arc.state_read` op to read from the
1038 /// state's storage. If the new value after all updates is requested, marks the
1039 /// state as to be lowered first (which will perform the writes). If the old
1040 /// value is requested, asserts that no new values have been written.
1041 Value OpLowering::lowerValue(StateOp op, OpResult result, Phase phase) {
1042  if (initial) {
1043  // Ensure that the new or initial value has been written by the lowering of
1044  // the state op before we attempt to read it.
1045  if (phase == Phase::New || phase == Phase::Initial)
1046  addPending(op, phase);
1047  return {};
1048  }
1049 
1050  // If we want to read the old value, no writes must have been lowered yet.
1051  if (phase == Phase::Old)
1052  assert(!module.loweredOps.contains({op, Phase::New}) &&
1053  "need old value but new value already written");
1054 
1055  auto state = module.getAllocatedState(result);
1056  return module.getBuilder(phase).create<StateReadOp>(result.getLoc(), state);
1057 }
1058 
1059 /// Handle uses of a DPI call. This creates an `arc.state_read` op to read from
1060 /// the state's storage. If the new value after all updates is requested, marks
1061 /// the state as to be lowered first (which will perform the writes). If the old
1062 /// value is requested, asserts that no new values have been written.
1063 Value OpLowering::lowerValue(sim::DPICallOp op, OpResult result, Phase phase) {
1064  if (initial) {
1065  // Ensure that the new or initial value has been written by the lowering of
1066  // the state op before we attempt to read it.
1067  if (phase == Phase::New || phase == Phase::Initial)
1068  addPending(op, phase);
1069  return {};
1070  }
1071 
1072  // If we want to read the old value, no writes must have been lowered yet.
1073  if (phase == Phase::Old)
1074  assert(!module.loweredOps.contains({op, Phase::New}) &&
1075  "need old value but new value already written");
1076 
1077  auto state = module.getAllocatedState(result);
1078  return module.getBuilder(phase).create<StateReadOp>(result.getLoc(), state);
1079 }
1080 
1081 /// Handle uses of a memory read operation. This creates an `arc.memory_read` op
1082 /// to read from the memory's storage. Similar to the `StateOp` handling
1083 /// otherwise.
1084 Value OpLowering::lowerValue(MemoryReadPortOp op, OpResult result,
1085  Phase phase) {
1086  auto memOp = op.getMemory().getDefiningOp<MemoryOp>();
1087  if (!memOp) {
1088  if (!initial)
1089  op->emitOpError() << "memory must be defined locally";
1090  return {};
1091  }
1092 
1093  auto address = lowerValue(op.getAddress(), phase);
1094  if (initial) {
1095  // Ensure that all new values are written before we attempt to read them.
1096  if (phase == Phase::New)
1097  addPending(memOp.getOperation(), Phase::New);
1098  return {};
1099  }
1100  if (!address)
1101  return {};
1102 
1103  if (phase == Phase::Old) {
1104  // If we want to read the old value, no writes must have been lowered yet.
1105  assert(!module.loweredOps.contains({memOp, Phase::New}) &&
1106  "need old memory value but new value already written");
1107  } else {
1108  assert(phase == Phase::New);
1109  }
1110 
1111  auto state = module.getAllocatedState(memOp->getResult(0));
1112  return module.getBuilder(phase).create<MemoryReadOp>(result.getLoc(), state,
1113  address);
1114 }
1115 
1116 /// Handle uses of `seq.initial` values computed during the initial phase. This
1117 /// ensures that the interesting value is stored into storage during the initial
1118 /// phase, and then reads it back using an `arc.state_read` op.
1119 Value OpLowering::lowerValue(seq::InitialOp op, OpResult result, Phase phase) {
1120  // Ensure the op has been lowered first.
1121  if (initial) {
1122  addPending(op, Phase::Initial);
1123  return {};
1124  }
1125  auto value = module.loweredValues.lookup({result, Phase::Initial});
1126  if (!value) {
1127  emitError(result.getLoc()) << "value has not been lowered";
1128  return {};
1129  }
1130 
1131  // If we are using the value of `seq.initial` in the initial phase directly,
1132  // there is no need to write it so any temporary storage.
1133  if (phase == Phase::Initial)
1134  return value;
1135 
1136  // If necessary, allocate storage for the computed value and store it in the
1137  // initial phase.
1138  auto &state = module.allocatedInitials[result];
1139  if (!state) {
1140  state = module.allocBuilder.create<AllocStateOp>(
1141  value.getLoc(), StateType::get(value.getType()), module.storageArg);
1142  OpBuilder::InsertionGuard guard(module.initialBuilder);
1143  module.initialBuilder.setInsertionPointAfterValue(value);
1144  module.initialBuilder.create<StateWriteOp>(value.getLoc(), state, value,
1145  Value{});
1146  }
1147 
1148  // Read back the value computed during the initial phase.
1149  return module.getBuilder(phase).create<StateReadOp>(state.getLoc(), state);
1150 }
1151 
1152 /// The `seq.from_immutable` cast is just a passthrough.
1153 Value OpLowering::lowerValue(seq::FromImmutableOp op, OpResult result,
1154  Phase phase) {
1155  return lowerValue(op.getInput(), phase);
1156 }
1157 
1158 /// Mark a value as to be lowered before the current op.
1159 void OpLowering::addPending(Value value, Phase phase) {
1160  auto *defOp = value.getDefiningOp();
1161  assert(defOp && "block args should never be marked as a dependency");
1162  addPending(defOp, phase);
1163 }
1164 
1165 /// Mark an operation as to be lowered before the current op. This adds that
1166 /// operation to the `pending` list if the operation has not yet been lowered.
1167 void OpLowering::addPending(Operation *op, Phase phase) {
1168  auto pair = std::make_pair(op, phase);
1169  if (!module.loweredOps.contains(pair))
1170  if (!llvm::is_contained(pending, pair))
1171  pending.push_back(pair);
1172 }
1173 
1174 //===----------------------------------------------------------------------===//
1175 // Pass Infrastructure
1176 //===----------------------------------------------------------------------===//
1177 
1178 namespace {
1179 struct LowerStatePass : public arc::impl::LowerStatePassBase<LowerStatePass> {
1180  using LowerStatePassBase::LowerStatePassBase;
1181  void runOnOperation() override;
1182 };
1183 } // namespace
1184 
1185 void LowerStatePass::runOnOperation() {
1186  auto op = getOperation();
1187  for (auto moduleOp : llvm::make_early_inc_range(op.getOps<HWModuleOp>())) {
1188  if (failed(ModuleLowering(moduleOp).run()))
1189  return signalPassFailure();
1190  moduleOp.erase();
1191  }
1192  for (auto extModuleOp :
1193  llvm::make_early_inc_range(op.getOps<HWModuleExternOp>()))
1194  extModuleOp.erase();
1195 }
assert(baseType &&"element must be base type")
static scf::IfOp createOrReuseIf(OpBuilder &builder, Value condition, bool withElse)
Create a new scf.if operation with the given builder, or reuse a previous scf.if if the builder's ins...
Definition: LowerState.cpp:399
def create(data_type, value)
Definition: hw.py:441
def create(data_type, value)
Definition: hw.py:433
Direction get(bool isOutput)
Returns an output direction if isOutput is true, otherwise returns an input direction.
Definition: CalyxOps.cpp:55
int64_t getBitWidth(mlir::Type type)
Return the hardware bit width of a type.
Definition: HWTypes.cpp:110
The InstanceGraph op interface, see InstanceGraphInterface.td for more details.
Definition: DebugAnalysis.h:21
raw_ostream & operator<<(raw_ostream &os, const FVInt &value)
Definition: FVInt.h:653
int run(Type[Generator] generator=CppGenerator, cmdline_args=sys.argv)
Definition: codegen.py:121
Definition: hw.py:1