CIRCT 23.0.0git
Loading...
Searching...
No Matches
esitester.cpp
Go to the documentation of this file.
1//===- esitester.cpp - ESI accelerator test/example tool ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// DO NOT EDIT!
10// This file is distributed as part of an ESI runtime package. The source for
11// this file should always be modified within CIRCT
12// (lib/dialect/ESI/runtime/cpp/tools/esitester.cpp).
13//
14//===----------------------------------------------------------------------===//
15//
16// This application isn't a utility so much as a test driver for an ESI system.
17// It is also useful as an example of how to use the ESI C++ API. esiquery.cpp
18// is also useful as an example.
19//
20//===----------------------------------------------------------------------===//
21
22#include "esi/Accelerator.h"
23#include "esi/CLI.h"
24#include "esi/Manifest.h"
25#include "esi/Services.h"
26#include "esi/TypedPorts.h"
27
28#include <algorithm>
29#include <atomic>
30#include <chrono>
31#include <cstdlib>
32#include <cstring>
33#include <future>
34#include <iostream>
35#include <map>
36#include <memory>
37#include <random>
38#include <span>
39#include <sstream>
40#include <stdexcept>
41#include <vector>
42
43using namespace esi;
44
45// Forward declarations of test functions.
47 uint32_t iterations);
49 const std::vector<uint32_t> &widths, bool write,
50 bool read);
52 uint32_t xferCount,
53 const std::vector<uint32_t> &widths, bool read,
54 bool write);
56 const std::vector<uint32_t> &widths, bool read, bool write);
58 const std::vector<uint32_t> &widths,
59 uint32_t xferCount, bool read, bool write);
61 uint32_t iterations, bool pipeline);
63 Accelerator *, uint32_t width,
64 uint32_t xferCount, bool read,
65 bool write);
67 uint32_t addAmt, uint32_t numItems);
69 uint32_t addAmt, uint32_t numItems);
71 uint32_t xTrans, uint32_t yTrans,
72 uint32_t numCoords);
74 uint32_t xTrans, uint32_t yTrans,
75 uint32_t numCoords, size_t batchSizeLimit);
77 uint32_t xTrans, uint32_t yTrans,
78 uint32_t numCoords);
80 uint32_t iterations);
82
83// Default widths and default widths string for CLI help text.
84constexpr std::array<uint32_t, 5> defaultWidths = {32, 64, 128, 256, 512};
85static std::string defaultWidthsStr() {
86 std::string s;
87 for (size_t i = 0; i < defaultWidths.size(); ++i) {
88 s += std::to_string(defaultWidths[i]);
89 if (i + 1 < defaultWidths.size())
90 s += ",";
91 }
92 return s;
93}
94
95// Helper to format bandwidth with appropriate units.
96static std::string formatBandwidth(double bytesPerSec) {
97 const char *unit = "B/s";
98 double value = bytesPerSec;
99 if (bytesPerSec >= 1e9) {
100 unit = "GB/s";
101 value = bytesPerSec / 1e9;
102 } else if (bytesPerSec >= 1e6) {
103 unit = "MB/s";
104 value = bytesPerSec / 1e6;
105 } else if (bytesPerSec >= 1e3) {
106 unit = "KB/s";
107 value = bytesPerSec / 1e3;
108 }
109 std::ostringstream oss;
110 oss.setf(std::ios::fixed);
111 oss.precision(2);
112 oss << value << " " << unit;
113 return oss.str();
114}
115
116// Human-readable size from bytes.
117static std::string humanBytes(uint64_t bytes) {
118 const char *units[] = {"B", "KB", "MB", "GB", "TB"};
119 double v = (double)bytes;
120 int u = 0;
121 while (v >= 1024.0 && u < 4) {
122 v /= 1024.0;
123 ++u;
124 }
125 std::ostringstream oss;
126 oss.setf(std::ios::fixed);
127 oss.precision(u == 0 ? 0 : 2);
128 oss << v << " " << units[u];
129 return oss.str();
130}
131
132// Human-readable time from microseconds.
133static std::string humanTimeUS(uint64_t us) {
134 if (us < 1000)
135 return std::to_string(us) + " us";
136 double ms = us / 1000.0;
137 if (ms < 1000.0) {
138 std::ostringstream oss;
139 oss.setf(std::ios::fixed);
140 oss.precision(ms < 10.0 ? 2 : (ms < 100.0 ? 1 : 0));
141 oss << ms << " ms";
142 return oss.str();
143 }
144 double sec = ms / 1000.0;
145 std::ostringstream oss;
146 oss.setf(std::ios::fixed);
147 oss.precision(sec < 10.0 ? 3 : 2);
148 oss << sec << " s";
149 return oss.str();
150}
151
152// MSVC does not implement std::aligned_malloc, even though it's part of the
153// C++17 standard. Provide a compatibility layer.
154static void *alignedAllocCompat(std::size_t alignment, std::size_t size) {
155#if defined(_MSC_VER)
156 void *ptr = _aligned_malloc(size, alignment);
157 if (!ptr)
158 throw std::bad_alloc();
159 return ptr;
160#else
161 void *ptr = std::aligned_alloc(alignment, size);
162 if (!ptr)
163 throw std::bad_alloc();
164 return ptr;
165#endif
166}
167
168static void alignedFreeCompat(void *ptr) {
169#if defined(_MSC_VER)
170 _aligned_free(ptr);
171#else
172 std::free(ptr);
173#endif
174}
175
176int main(int argc, const char *argv[]) {
177 CliParser cli("esitester");
178 cli.description("Test an ESI system running the ESI tester image.");
179 cli.require_subcommand(1);
180
181 CLI::App *callback_test =
182 cli.add_subcommand("callback", "initiate callback test");
183 uint32_t cb_iters = 1;
184 callback_test->add_option("-i,--iters", cb_iters,
185 "Number of iterations to run");
186
187 CLI::App *hostmemtestSub =
188 cli.add_subcommand("hostmem", "Run the host memory test");
189 bool hmRead = false;
190 bool hmWrite = false;
191 std::vector<uint32_t> hostmemWidths(defaultWidths.begin(),
192 defaultWidths.end());
193 hostmemtestSub->add_flag("-w,--write", hmWrite,
194 "Enable host memory write test");
195 hostmemtestSub->add_flag("-r,--read", hmRead, "Enable host memory read test");
196 hostmemtestSub->add_option(
197 "--widths", hostmemWidths,
198 "Hostmem test widths (default: " + defaultWidthsStr() + ")");
199
200 CLI::App *dmatestSub = cli.add_subcommand("dma", "Run the DMA test");
201 bool dmaRead = false;
202 bool dmaWrite = false;
203 std::vector<uint32_t> dmaWidths(defaultWidths.begin(), defaultWidths.end());
204 dmatestSub->add_flag("-w,--write", dmaWrite, "Enable dma write test");
205 dmatestSub->add_flag("-r,--read", dmaRead, "Enable dma read test");
206 dmatestSub->add_option("--widths", dmaWidths,
207 "DMA test widths (default: " + defaultWidthsStr() +
208 ")");
209
210 CLI::App *bandwidthSub =
211 cli.add_subcommand("bandwidth", "Run the bandwidth test");
212 uint32_t xferCount = 1000;
213 bandwidthSub->add_option("-c,--count", xferCount,
214 "Number of transfers to perform");
215 bool bandwidthRead = false;
216 bool bandwidthWrite = false;
217 std::vector<uint32_t> bandwidthWidths(defaultWidths.begin(),
218 defaultWidths.end());
219 bandwidthSub->add_option("--widths", bandwidthWidths,
220 "Width of the transfers to perform (default: " +
221 defaultWidthsStr() + ")");
222 bandwidthSub->add_flag("-w,--write", bandwidthWrite,
223 "Enable bandwidth write");
224 bandwidthSub->add_flag("-r,--read", bandwidthRead, "Enable bandwidth read");
225
226 CLI::App *hostmembwSub =
227 cli.add_subcommand("hostmembw", "Run the host memory bandwidth test");
228 uint32_t hmBwCount = 1000;
229 bool hmBwRead = false;
230 bool hmBwWrite = false;
231 std::vector<uint32_t> hmBwWidths(defaultWidths.begin(), defaultWidths.end());
232 hostmembwSub->add_option("-c,--count", hmBwCount,
233 "Number of hostmem transfers");
234 hostmembwSub->add_option(
235 "--widths", hmBwWidths,
236 "Hostmem bandwidth widths (default: " + defaultWidthsStr() + ")");
237 hostmembwSub->add_flag("-w,--write", hmBwWrite,
238 "Measure hostmem write bandwidth");
239 hostmembwSub->add_flag("-r,--read", hmBwRead,
240 "Measure hostmem read bandwidth");
241
242 CLI::App *loopbackSub =
243 cli.add_subcommand("loopback", "Test LoopbackInOutAdd function service");
244 uint32_t loopbackIters = 10;
245 bool loopbackPipeline = false;
246 loopbackSub->add_option("-i,--iters", loopbackIters,
247 "Number of function invocations (default 10)");
248 loopbackSub->add_flag("-p,--pipeline", loopbackPipeline,
249 "Pipeline all calls then collect results");
250
251 CLI::App *aggBwSub = cli.add_subcommand(
252 "aggbandwidth",
253 "Aggregate hostmem bandwidth across four units (readmem*, writemem*)");
254 uint32_t aggWidth = 512;
255 uint32_t aggCount = 1000;
256 bool aggRead = false;
257 bool aggWrite = false;
258 aggBwSub->add_option(
259 "--width", aggWidth,
260 "Bit width (default 512; other widths ignored if absent)");
261 aggBwSub->add_option("-c,--count", aggCount, "Flits per unit (default 1000)");
262 aggBwSub->add_flag("-r,--read", aggRead, "Include read units");
263 aggBwSub->add_flag("-w,--write", aggWrite, "Include write units");
264
265 CLI::App *streamingAddSub = cli.add_subcommand(
266 "streaming_add", "Test StreamingAdder function service with list input");
267 uint32_t streamingAddAmt = 5;
268 uint32_t streamingNumItems = 5;
269 bool streamingTranslate = false;
270 streamingAddSub->add_option("-a,--add", streamingAddAmt,
271 "Amount to add to each element (default 5)");
272 streamingAddSub->add_option("-n,--num-items", streamingNumItems,
273 "Number of random items in the list (default 5)");
274 streamingAddSub->add_flag("-t,--translate", streamingTranslate,
275 "Use message translation (list translation)");
276
277 CLI::App *coordTranslateSub = cli.add_subcommand(
278 "translate_coords",
279 "Test CoordTranslator function service with list of coordinates");
280 uint32_t coordXTrans = 10;
281 uint32_t coordYTrans = 20;
282 uint32_t coordNumItems = 5;
283 coordTranslateSub->add_option("-x,--x-translation", coordXTrans,
284 "X translation amount (default 10)");
285 coordTranslateSub->add_option("-y,--y-translation", coordYTrans,
286 "Y translation amount (default 20)");
287 coordTranslateSub->add_option("-n,--num-coords", coordNumItems,
288 "Number of random coordinates (default 5)");
289
290 CLI::App *serialCoordTranslateSub = cli.add_subcommand(
291 "serial_coords",
292 "Test SerialCoordTranslator function service with list of coordinates");
293 uint32_t serialBatchSize = 240;
294 serialCoordTranslateSub->add_option("-x,--x-translation", coordXTrans,
295 "X translation amount (default 10)");
296 serialCoordTranslateSub->add_option("-y,--y-translation", coordYTrans,
297 "Y translation amount (default 20)");
298 serialCoordTranslateSub->add_option(
299 "-n,--num-coords", coordNumItems,
300 "Number of random coordinates (default 5)");
301 serialCoordTranslateSub
302 ->add_option("-b,--batch-size", serialBatchSize,
303 "Coordinates per header (default 240, max 65535)")
304 ->check(CLI::Range(1u, 0xFFFFu));
305
306 CLI::App *autoSerialCoordTranslateSub = cli.add_subcommand(
307 "auto_serial_coords",
308 "Test AutoSerialCoordTranslator (uses ListWindowToParallel/Serial "
309 "converters under the hood)");
310 uint32_t autoCoordXTrans = 10;
311 uint32_t autoCoordYTrans = 20;
312 uint32_t autoCoordNumItems = 5;
313 autoSerialCoordTranslateSub->add_option("-x,--x-translation", autoCoordXTrans,
314 "X translation amount (default 10)");
315 autoSerialCoordTranslateSub->add_option("-y,--y-translation", autoCoordYTrans,
316 "Y translation amount (default 20)");
317 autoSerialCoordTranslateSub->add_option(
318 "-n,--num-coords", autoCoordNumItems,
319 "Number of random coordinates (default 5)");
320
321 CLI::App *channelTestSub = cli.add_subcommand(
322 "channel", "Test ChannelService to_host and from_host");
323 uint32_t channelIters = 10;
324 channelTestSub->add_option("-i,--iters", channelIters,
325 "Number of loopback iterations (default 10)");
326
327 CLI::App *resetSub = cli.add_subcommand(
328 "reset", "Test the design reset feature (telemetry clears after reset)");
329
330 if (int rc = cli.esiParse(argc, argv))
331 return rc;
332 if (!cli.get_help_ptr()->empty())
333 return 0;
334
335 Context &ctxt = cli.getContext();
336 AcceleratorConnection *acc = cli.connect();
337 try {
338 const auto &info = *acc->getService<services::SysInfo>();
339 ctxt.getLogger().info("esitester", "Connected to accelerator.");
340 Manifest manifest(ctxt, info.getJsonManifest());
341 Accelerator *accel = manifest.buildAccelerator(*acc);
342 ctxt.getLogger().info("esitester", "Built accelerator.");
343 acc->getServiceThread()->addPoll(*accel);
344
345 if (*callback_test) {
346 callbackTest(acc, accel, cb_iters);
347 } else if (*hostmemtestSub) {
348 hostmemTest(acc, accel, hostmemWidths, hmWrite, hmRead);
349 } else if (*loopbackSub) {
350 loopbackAddTest(acc, accel, loopbackIters, loopbackPipeline);
351 } else if (*dmatestSub) {
352 dmaTest(acc, accel, dmaWidths, dmaRead, dmaWrite);
353 } else if (*bandwidthSub) {
354 bandwidthTest(acc, accel, bandwidthWidths, xferCount, bandwidthRead,
355 bandwidthWrite);
356 } else if (*hostmembwSub) {
357 hostmemBandwidthTest(acc, accel, hmBwCount, hmBwWidths, hmBwRead,
358 hmBwWrite);
359 } else if (*aggBwSub) {
360 aggregateHostmemBandwidthTest(acc, accel, aggWidth, aggCount, aggRead,
361 aggWrite);
362 } else if (*streamingAddSub) {
363 if (streamingTranslate)
364 streamingAddTranslatedTest(acc, accel, streamingAddAmt,
365 streamingNumItems);
366 else
367 streamingAddTest(acc, accel, streamingAddAmt, streamingNumItems);
368 } else if (*coordTranslateSub) {
369 coordTranslateTest(acc, accel, coordXTrans, coordYTrans, coordNumItems);
370 } else if (*serialCoordTranslateSub) {
371 serialCoordTranslateTest(acc, accel, coordXTrans, coordYTrans,
372 coordNumItems, serialBatchSize);
373 } else if (*autoSerialCoordTranslateSub) {
374 autoSerialCoordTranslateTest(acc, accel, autoCoordXTrans, autoCoordYTrans,
375 autoCoordNumItems);
376 } else if (*channelTestSub) {
377 channelTest(acc, accel, channelIters);
378 } else if (*resetSub) {
379 resetTest(acc, accel);
380 }
381
382 acc->disconnect();
383 } catch (std::exception &e) {
384 ctxt.getLogger().error("esitester", e.what());
385 acc->disconnect();
386 return -1;
387 }
388 std::cout << "Exiting successfully\n";
389 return 0;
390}
391
393 uint32_t iterations) {
394 auto cb_test = accel->getChildren().find(AppID("cb_test"));
395 if (cb_test == accel->getChildren().end())
396 throw std::runtime_error("No cb_test child found in accelerator");
397 auto &ports = cb_test->second->getPorts();
398 auto cmd_port = ports.find(AppID("cmd"));
399 if (cmd_port == ports.end())
400 throw std::runtime_error("No cmd port found in cb_test child");
401 auto *cmdMMIO = cmd_port->second.getAs<services::MMIO::MMIORegion>();
402 if (!cmdMMIO)
403 throw std::runtime_error("cb_test cmd port is not MMIO");
404
405 auto f = ports.find(AppID("cb"));
406 if (f == ports.end())
407 throw std::runtime_error("No cb port found in accelerator");
408
409 auto *callPort = f->second.getAs<services::CallService::Callback>();
410 if (!callPort)
411 throw std::runtime_error("cb port is not a CallService::Callback");
412
413 std::atomic<uint32_t> callbackCount = 0;
414 callPort->connect(
415 [conn, &callbackCount](const MessageData &data) mutable -> MessageData {
416 conn->getLogger().debug(
417 [&](std::string &subsystem, std::string &msg,
418 std::unique_ptr<std::map<std::string, std::any>> &details) {
419 subsystem = "ESITESTER";
420 msg = "Received callback";
421 details = std::make_unique<std::map<std::string, std::any>>();
422 details->emplace("data", data);
423 });
424 std::cout << "callback: " << *data.as<uint64_t>() << std::endl;
425 callbackCount.fetch_add(1);
426 return MessageData();
427 },
428 true);
429
430 for (uint32_t i = 0; i < iterations; ++i) {
431 conn->getLogger().info("esitester", "Issuing callback command iteration " +
432 std::to_string(i) + "/" +
433 std::to_string(iterations));
434 cmdMMIO->write(0x10, i); // Command the callback
435 // Wait up to 1 second for the callback to be invoked.
436 for (uint32_t wait = 0; wait < 1000; ++wait) {
437 if (callbackCount.load() > i)
438 break;
439 std::this_thread::sleep_for(std::chrono::milliseconds(1));
440 }
441 if (callbackCount.load() <= i)
442 throw std::runtime_error("Callback test failed. No callback received");
443 }
444}
445
446/// Test the hostmem write functionality.
449 uint32_t width) {
450 std::cout << "Running hostmem WRITE test with width " << width << std::endl;
451 uint64_t *dataPtr = static_cast<uint64_t *>(region.getPtr());
452 auto check = [&](bool print) {
453 bool ret = true;
454 for (size_t i = 0; i < 9; ++i) {
455 if (print)
456 printf("[write] dataPtr[%zu] = 0x%016lx\n", i, dataPtr[i]);
457 if (i < (width + 63) / 64 && dataPtr[i] == 0xFFFFFFFFFFFFFFFFull)
458 ret = false;
459 }
460 return ret;
461 };
462
463 auto writeMemChildIter = acc->getChildren().find(AppID("writemem", width));
464 if (writeMemChildIter == acc->getChildren().end())
465 throw std::runtime_error(
466 "hostmem write test failed. No writemem child found");
467 auto &writeMemPorts = writeMemChildIter->second->getPorts();
468
469 auto cmdPortIter = writeMemPorts.find(AppID("cmd", width));
470 if (cmdPortIter == writeMemPorts.end())
471 throw std::runtime_error(
472 "hostmem write test failed. No (cmd,width) MMIO port");
473 auto *cmdMMIO = cmdPortIter->second.getAs<services::MMIO::MMIORegion>();
474 if (!cmdMMIO)
475 throw std::runtime_error(
476 "hostmem write test failed. (cmd,width) port not MMIO");
477
478 auto issuedPortIter = writeMemPorts.find(AppID("addrCmdIssued"));
479 if (issuedPortIter == writeMemPorts.end())
480 throw std::runtime_error(
481 "hostmem write test failed. addrCmdIssued missing");
482 auto *addrCmdIssuedPort =
483 issuedPortIter->second.getAs<services::TelemetryService::Metric>();
484 if (!addrCmdIssuedPort)
485 throw std::runtime_error(
486 "hostmem write test failed. addrCmdIssued not telemetry");
487 addrCmdIssuedPort->connect();
488
489 auto responsesPortIter = writeMemPorts.find(AppID("addrCmdResponses"));
490 if (responsesPortIter == writeMemPorts.end())
491 throw std::runtime_error(
492 "hostmem write test failed. addrCmdResponses missing");
493 auto *addrCmdResponsesPort =
494 responsesPortIter->second.getAs<services::TelemetryService::Metric>();
495 if (!addrCmdResponsesPort)
496 throw std::runtime_error(
497 "hostmem write test failed. addrCmdResponses not telemetry");
498 addrCmdResponsesPort->connect();
499
500 for (size_t i = 0, e = 9; i < e; ++i)
501 dataPtr[i] = 0xFFFFFFFFFFFFFFFFull;
502 region.flush();
503 cmdMMIO->write(0x10, reinterpret_cast<uint64_t>(region.getDevicePtr()));
504 cmdMMIO->write(0x18, 1);
505 cmdMMIO->write(0x20, 1);
506 bool done = false;
507 for (int i = 0; i < 100; ++i) {
508 auto issued = addrCmdIssuedPort->readInt();
509 auto responses = addrCmdResponsesPort->readInt();
510 if (issued == 1 && responses == 1) {
511 done = true;
512 break;
513 }
514 std::this_thread::sleep_for(std::chrono::microseconds(100));
515 }
516 if (!done) {
517 check(true);
518 throw std::runtime_error("hostmem write test (" + std::to_string(width) +
519 " bits) timeout waiting for completion");
520 }
521 if (!check(true))
522 throw std::runtime_error("hostmem write test failed (" +
523 std::to_string(width) + " bits)");
524}
525
528 uint32_t width) {
529 std::cout << "Running hostmem READ test with width " << width << std::endl;
530 auto readMemChildIter = acc->getChildren().find(AppID("readmem", width));
531 if (readMemChildIter == acc->getChildren().end())
532 throw std::runtime_error(
533 "hostmem read test failed. No readmem child found");
534
535 auto &readMemPorts = readMemChildIter->second->getPorts();
536 auto addrCmdPortIter = readMemPorts.find(AppID("cmd", width));
537 if (addrCmdPortIter == readMemPorts.end())
538 throw std::runtime_error(
539 "hostmem read test failed. No AddressCommand MMIO port");
540 auto *addrCmdMMIO =
541 addrCmdPortIter->second.getAs<services::MMIO::MMIORegion>();
542 if (!addrCmdMMIO)
543 throw std::runtime_error(
544 "hostmem read test failed. AddressCommand port not MMIO");
545
546 auto lastReadPortIter = readMemPorts.find(AppID("lastReadLSB"));
547 if (lastReadPortIter == readMemPorts.end())
548 throw std::runtime_error("hostmem read test failed. lastReadLSB missing");
549 auto *lastReadPort =
550 lastReadPortIter->second.getAs<services::TelemetryService::Metric>();
551 if (!lastReadPort)
552 throw std::runtime_error(
553 "hostmem read test failed. lastReadLSB not telemetry");
554 lastReadPort->connect();
555
556 auto issuedPortIter = readMemPorts.find(AppID("addrCmdIssued"));
557 if (issuedPortIter == readMemPorts.end())
558 throw std::runtime_error("hostmem read test failed. addrCmdIssued missing");
559 auto *addrCmdIssuedPort =
560 issuedPortIter->second.getAs<services::TelemetryService::Metric>();
561 if (!addrCmdIssuedPort)
562 throw std::runtime_error(
563 "hostmem read test failed. addrCmdIssued not telemetry");
564 addrCmdIssuedPort->connect();
565
566 auto responsesPortIter = readMemPorts.find(AppID("addrCmdResponses"));
567 if (responsesPortIter == readMemPorts.end())
568 throw std::runtime_error(
569 "hostmem read test failed. addrCmdResponses missing");
570 auto *addrCmdResponsesPort =
571 responsesPortIter->second.getAs<services::TelemetryService::Metric>();
572 if (!addrCmdResponsesPort)
573 throw std::runtime_error(
574 "hostmem read test failed. addrCmdResponses not telemetry");
575 addrCmdResponsesPort->connect();
576
577 for (size_t i = 0; i < 8; ++i) {
578 auto *dataPtr = static_cast<uint64_t *>(region.getPtr());
579 dataPtr[0] = 0x12345678ull << i;
580 dataPtr[1] = 0xDEADBEEFull << i;
581 region.flush();
582 addrCmdMMIO->write(0x10, reinterpret_cast<uint64_t>(region.getDevicePtr()));
583 addrCmdMMIO->write(0x18, 1);
584 addrCmdMMIO->write(0x20, 1);
585 bool done = false;
586 for (int waitLoop = 0; waitLoop < 100; ++waitLoop) {
587 auto issued = addrCmdIssuedPort->readInt();
588 auto responses = addrCmdResponsesPort->readInt();
589 if (issued == 1 && responses == 1) {
590 done = true;
591 break;
592 }
593 std::this_thread::sleep_for(std::chrono::milliseconds(10));
594 }
595 if (!done)
596 throw std::runtime_error("hostmem read (" + std::to_string(width) +
597 " bits) timeout waiting for completion");
598 uint64_t captured = lastReadPort->readInt();
599 uint64_t expected = dataPtr[0];
600 if (width < 64)
601 expected &= ((1ull << width) - 1);
602 if (captured != expected)
603 throw std::runtime_error("hostmem read test (" + std::to_string(width) +
604 " bits) failed. Expected " +
605 esi::toHex(expected) + ", got " +
606 esi::toHex(captured));
607 }
608}
609
611 const std::vector<uint32_t> &widths, bool write,
612 bool read) {
613 // Enable the host memory service.
614 auto hostmem = conn->getService<services::HostMem>();
615 hostmem->start();
616 auto scratchRegion = hostmem->allocate(/*size(bytes)=*/1024 * 1024,
617 /*memOpts=*/{.writeable = true});
618 uint64_t *dataPtr = static_cast<uint64_t *>(scratchRegion->getPtr());
619 conn->getLogger().info("esitester",
620 "Running host memory test with region size " +
621 std::to_string(scratchRegion->getSize()) +
622 " bytes at 0x" + toHex(dataPtr));
623 for (size_t i = 0; i < scratchRegion->getSize() / 8; ++i)
624 dataPtr[i] = 0;
625 scratchRegion->flush();
626
627 bool passed = true;
628 for (size_t width : widths) {
629 try {
630 if (write)
631 hostmemWriteTest(acc, *scratchRegion, width);
632 if (read)
633 hostmemReadTest(acc, *scratchRegion, width);
634 } catch (std::exception &e) {
635 conn->getLogger().error("esitester", "Hostmem test failed for width " +
636 std::to_string(width) + ": " +
637 e.what());
638 passed = false;
639 }
640 }
641 if (!passed)
642 throw std::runtime_error("Hostmem test failed");
643 std::cout << "Hostmem test passed" << std::endl;
644}
645
647 size_t width) {
648 Logger &logger = conn->getLogger();
649 logger.info("esitester",
650 "== Running DMA read test with width " + std::to_string(width));
651 AppIDPath lastPath;
652 BundlePort *toHostMMIOPort =
653 acc->resolvePort({AppID("tohostdma", width), AppID("cmd")}, lastPath);
654 if (!toHostMMIOPort)
655 throw std::runtime_error("dma read test failed. No tohostdma[" +
656 std::to_string(width) + "] found");
657 auto *toHostMMIO = toHostMMIOPort->getAs<services::MMIO::MMIORegion>();
658 if (!toHostMMIO)
659 throw std::runtime_error("dma read test failed. MMIO port is not MMIO");
660 lastPath.clear();
661 BundlePort *outPortBundle =
662 acc->resolvePort({AppID("tohostdma", width), AppID("out")}, lastPath);
663 ReadChannelPort &outPort = outPortBundle->getRawRead("data");
664 outPort.connect();
665
666 size_t xferCount = 24;
667 uint64_t last = 0;
668 MessageData data;
669 toHostMMIO->write(0, xferCount);
670 for (size_t i = 0; i < xferCount; ++i) {
671 outPort.read(data);
672 if (width == 64) {
673 uint64_t val = *data.as<uint64_t>();
674 if (val < last)
675 throw std::runtime_error("dma read test failed. Out of order data");
676 last = val;
677 }
678 logger.debug("esitester",
679 "Cycle count [" + std::to_string(i) + "] = 0x" + data.toHex());
680 }
681 outPort.disconnect();
682 std::cout << " DMA read test for " << width << " bits passed" << std::endl;
683}
684
686 size_t width) {
687 Logger &logger = conn->getLogger();
688 logger.info("esitester",
689 "Running DMA write test with width " + std::to_string(width));
690 AppIDPath lastPath;
691 BundlePort *fromHostMMIOPort =
692 acc->resolvePort({AppID("fromhostdma", width), AppID("cmd")}, lastPath);
693 if (!fromHostMMIOPort)
694 throw std::runtime_error("dma read test for " + toString(width) +
695 " bits failed. No fromhostdma[" +
696 std::to_string(width) + "] found");
697 auto *fromHostMMIO = fromHostMMIOPort->getAs<services::MMIO::MMIORegion>();
698 if (!fromHostMMIO)
699 throw std::runtime_error("dma write test for " + toString(width) +
700 " bits failed. MMIO port is not MMIO");
701 lastPath.clear();
702 BundlePort *outPortBundle =
703 acc->resolvePort({AppID("fromhostdma", width), AppID("in")}, lastPath);
704 if (!outPortBundle)
705 throw std::runtime_error("dma write test for " + toString(width) +
706 " bits failed. No out port found");
707 WriteChannelPort &writePort = outPortBundle->getRawWrite("data");
708 writePort.connect();
709
710 size_t xferCount = 24;
711 uint8_t *data = new uint8_t[width];
712 for (size_t i = 0; i < width / 8; ++i)
713 data[i] = 0;
714 fromHostMMIO->read(8);
715 fromHostMMIO->write(0, xferCount);
716 for (size_t i = 1; i < xferCount + 1; ++i) {
717 data[0] = i;
718 bool successWrite;
719 size_t attempts = 0;
720 do {
721 successWrite = writePort.tryWrite(MessageData(data, width / 8));
722 if (!successWrite) {
723 std::this_thread::sleep_for(std::chrono::milliseconds(10));
724 }
725 } while (!successWrite && ++attempts < 100);
726 if (!successWrite)
727 throw std::runtime_error("dma write test for " + toString(width) +
728 " bits failed. Write failed");
729 uint64_t lastReadMMIO;
730 for (size_t a = 0; a < 20; ++a) {
731 lastReadMMIO = fromHostMMIO->read(8);
732 if (lastReadMMIO == i)
733 break;
734 std::this_thread::sleep_for(std::chrono::milliseconds(10));
735 if (a >= 19)
736 throw std::runtime_error("dma write for " + toString(width) +
737 " bits test failed. Read from MMIO failed");
738 }
739 }
740 writePort.disconnect();
741 delete[] data;
742 std::cout << " DMA write test for " << width << " bits passed" << std::endl;
743}
744
746 const std::vector<uint32_t> &widths, bool read,
747 bool write) {
748 bool success = true;
749 if (write)
750 for (size_t width : widths)
751 try {
752 dmaWriteTest(conn, acc, width);
753 } catch (std::exception &e) {
754 success = false;
755 std::cerr << "DMA write test for " << width
756 << " bits failed: " << e.what() << std::endl;
757 }
758 if (read)
759 for (size_t width : widths)
760 dmaReadTest(conn, acc, width);
761 if (!success)
762 throw std::runtime_error("DMA test failed");
763 std::cout << "DMA test passed" << std::endl;
764}
765
766//
767// DMA bandwidth test
768//
769
771 size_t width, size_t xferCount) {
772
773 AppIDPath lastPath;
774 BundlePort *toHostMMIOPort =
775 acc->resolvePort({AppID("tohostdma", width), AppID("cmd")}, lastPath);
776 if (!toHostMMIOPort)
777 throw std::runtime_error("bandwidth test failed. No tohostdma[" +
778 std::to_string(width) + "] found");
779 auto *toHostMMIO = toHostMMIOPort->getAs<services::MMIO::MMIORegion>();
780 if (!toHostMMIO)
781 throw std::runtime_error("bandwidth test failed. MMIO port is not MMIO");
782 lastPath.clear();
783 BundlePort *outPortBundle =
784 acc->resolvePort({AppID("tohostdma", width), AppID("out")}, lastPath);
785 ReadChannelPort &outPort = outPortBundle->getRawRead("data");
786 outPort.connect();
787
788 Logger &logger = conn->getLogger();
789 logger.info("esitester", "Starting read bandwidth test with " +
790 std::to_string(xferCount) + " x " +
791 std::to_string(width) + " bit transfers");
792 MessageData data;
793 auto start = std::chrono::high_resolution_clock::now();
794 toHostMMIO->write(0, xferCount);
795 for (size_t i = 0; i < xferCount; ++i) {
796 outPort.read(data);
797 logger.debug(
798 [i, &data](std::string &subsystem, std::string &msg,
799 std::unique_ptr<std::map<std::string, std::any>> &details) {
800 subsystem = "esitester";
801 msg = "Cycle count [" + std::to_string(i) + "] = 0x" + data.toHex();
802 });
803 }
804 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
805 std::chrono::high_resolution_clock::now() - start);
806 double bytesPerSec =
807 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
808 logger.info("esitester",
809 " Bandwidth test: " + std::to_string(xferCount) + " x " +
810 std::to_string(width) + " bit transfers in " +
811 std::to_string(duration.count()) + " microseconds");
812 logger.info("esitester", " bandwidth: " + formatBandwidth(bytesPerSec));
813}
814
816 size_t width, size_t xferCount) {
817
818 AppIDPath lastPath;
819 BundlePort *fromHostMMIOPort =
820 acc->resolvePort({AppID("fromhostdma", width), AppID("cmd")}, lastPath);
821 if (!fromHostMMIOPort)
822 throw std::runtime_error("bandwidth test failed. No fromhostdma[" +
823 std::to_string(width) + "] found");
824 auto *fromHostMMIO = fromHostMMIOPort->getAs<services::MMIO::MMIORegion>();
825 if (!fromHostMMIO)
826 throw std::runtime_error("bandwidth test failed. MMIO port is not MMIO");
827 lastPath.clear();
828 BundlePort *inPortBundle =
829 acc->resolvePort({AppID("fromhostdma", width), AppID("in")}, lastPath);
830 WriteChannelPort &outPort = inPortBundle->getRawWrite("data");
831 outPort.connect();
832
833 Logger &logger = conn->getLogger();
834 logger.info("esitester", "Starting write bandwidth test with " +
835 std::to_string(xferCount) + " x " +
836 std::to_string(width) + " bit transfers");
837 std::vector<uint8_t> dataVec(width / 8);
838 for (size_t i = 0; i < width / 8; ++i)
839 dataVec[i] = i;
840 MessageData data(dataVec);
841 auto start = std::chrono::high_resolution_clock::now();
842 fromHostMMIO->write(0, xferCount);
843 for (size_t i = 0; i < xferCount; ++i) {
844 outPort.write(data);
845 logger.debug(
846 [i, &data](std::string &subsystem, std::string &msg,
847 std::unique_ptr<std::map<std::string, std::any>> &details) {
848 subsystem = "esitester";
849 msg = "Cycle count [" + std::to_string(i) + "] = 0x" + data.toHex();
850 });
851 }
852 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
853 std::chrono::high_resolution_clock::now() - start);
854 double bytesPerSec =
855 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
856 logger.info("esitester",
857 " Bandwidth test: " + std::to_string(xferCount) + " x " +
858 std::to_string(width) + " bit transfers in " +
859 std::to_string(duration.count()) + " microseconds");
860 logger.info("esitester", " bandwidth: " + formatBandwidth(bytesPerSec));
861}
862
864 const std::vector<uint32_t> &widths,
865 uint32_t xferCount, bool read, bool write) {
866 if (read)
867 for (uint32_t w : widths)
868 bandwidthReadTest(conn, acc, w, xferCount);
869 if (write)
870 for (uint32_t w : widths)
871 bandwidthWriteTest(conn, acc, w, xferCount);
872}
873
874//
875// Hostmem bandwidth test
876//
877
878static void
881 uint32_t width, uint32_t xferCount) {
882 Logger &logger = conn->getLogger();
883 logger.info("esitester", "Starting hostmem WRITE bandwidth test: " +
884 std::to_string(xferCount) + " x " +
885 std::to_string(width) + " bits");
886
887 auto writeMemChildIter = acc->getChildren().find(AppID("writemem", width));
888 if (writeMemChildIter == acc->getChildren().end())
889 throw std::runtime_error("hostmem write bandwidth: writemem child missing");
890 auto &writeMemPorts = writeMemChildIter->second->getPorts();
891
892 auto cmdPortIter = writeMemPorts.find(AppID("cmd", width));
893 if (cmdPortIter == writeMemPorts.end())
894 throw std::runtime_error("hostmem write bandwidth: cmd MMIO missing");
895 auto *cmdMMIO = cmdPortIter->second.getAs<services::MMIO::MMIORegion>();
896 if (!cmdMMIO)
897 throw std::runtime_error("hostmem write bandwidth: cmd not MMIO");
898
899 auto issuedIter = writeMemPorts.find(AppID("addrCmdIssued"));
900 auto respIter = writeMemPorts.find(AppID("addrCmdResponses"));
901 auto cycleCount = writeMemPorts.find(AppID("addrCmdCycles"));
902 if (issuedIter == writeMemPorts.end() || respIter == writeMemPorts.end() ||
903 cycleCount == writeMemPorts.end())
904 throw std::runtime_error("hostmem write bandwidth: telemetry missing");
905 auto *issuedPort =
906 issuedIter->second.getAs<services::TelemetryService::Metric>();
907 auto *respPort = respIter->second.getAs<services::TelemetryService::Metric>();
908 auto *cyclePort =
909 cycleCount->second.getAs<services::TelemetryService::Metric>();
910 if (!issuedPort || !respPort || !cyclePort)
911 throw std::runtime_error(
912 "hostmem write bandwidth: telemetry type mismatch");
913
914 issuedPort->connect();
915 respPort->connect();
916 cyclePort->connect();
917
918 // Initialize pattern (optional).
919 uint64_t *dataPtr = static_cast<uint64_t *>(region.getPtr());
920 size_t words = region.getSize() / 8;
921 for (size_t i = 0; i < words; ++i)
922 dataPtr[i] = i + 0xA5A50000;
923 region.flush();
924
925 auto start = std::chrono::high_resolution_clock::now();
926 // Fire off xferCount write commands (one flit each).
927 uint64_t devPtr = reinterpret_cast<uint64_t>(region.getDevicePtr());
928 cmdMMIO->write(0x10, devPtr); // address
929 cmdMMIO->write(0x18, xferCount); // flits
930 cmdMMIO->write(0x20, 1); // start
931
932 // Wait for responses counter to reach target.
933 bool completed = false;
934 for (int wait = 0; wait < 100000; ++wait) {
935 uint64_t respNow = respPort->readInt();
936 if (respNow == xferCount) {
937 completed = true;
938 break;
939 }
940 std::this_thread::sleep_for(std::chrono::microseconds(50));
941 }
942 if (!completed)
943 throw std::runtime_error("hostmem write bandwidth timeout");
944 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
945 std::chrono::high_resolution_clock::now() - start);
946 double bytesPerSec =
947 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
948 uint64_t cycles = cyclePort->readInt();
949 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
950 std::cout << "[WRITE] Hostmem bandwidth (" << std::to_string(width)
951 << "): " << formatBandwidth(bytesPerSec) << " "
952 << std::to_string(xferCount) << " flits in "
953 << std::to_string(duration.count()) << " us, "
954 << std::to_string(cycles) << " cycles, " << bytesPerCycle
955 << " bytes/cycle" << std::endl;
956}
957
958static void
961 uint32_t width, uint32_t xferCount) {
962 Logger &logger = conn->getLogger();
963 logger.info("esitester", "Starting hostmem READ bandwidth test: " +
964 std::to_string(xferCount) + " x " +
965 std::to_string(width) + " bits");
966
967 auto readMemChildIter = acc->getChildren().find(AppID("readmem", width));
968 if (readMemChildIter == acc->getChildren().end())
969 throw std::runtime_error("hostmem read bandwidth: readmem child missing");
970 auto &readMemPorts = readMemChildIter->second->getPorts();
971
972 auto cmdPortIter = readMemPorts.find(AppID("cmd", width));
973 if (cmdPortIter == readMemPorts.end())
974 throw std::runtime_error("hostmem read bandwidth: cmd MMIO missing");
975 auto *cmdMMIO = cmdPortIter->second.getAs<services::MMIO::MMIORegion>();
976 if (!cmdMMIO)
977 throw std::runtime_error("hostmem read bandwidth: cmd not MMIO");
978
979 auto issuedIter = readMemPorts.find(AppID("addrCmdIssued"));
980 auto respIter = readMemPorts.find(AppID("addrCmdResponses"));
981 auto cyclePort = readMemPorts.find(AppID("addrCmdCycles"));
982 if (issuedIter == readMemPorts.end() || respIter == readMemPorts.end() ||
983 cyclePort == readMemPorts.end())
984 throw std::runtime_error("hostmem read bandwidth: telemetry missing");
985 auto *issuedPort =
986 issuedIter->second.getAs<services::TelemetryService::Metric>();
987 auto *respPort = respIter->second.getAs<services::TelemetryService::Metric>();
988 auto *cycleCntPort =
989 cyclePort->second.getAs<services::TelemetryService::Metric>();
990 if (!issuedPort || !respPort || !cycleCntPort)
991 throw std::runtime_error("hostmem read bandwidth: telemetry type mismatch");
992 issuedPort->connect();
993 respPort->connect();
994 cycleCntPort->connect();
995
996 // Prepare memory pattern (optional).
997 uint64_t *dataPtr = static_cast<uint64_t *>(region.getPtr());
998 size_t words64 = region.getSize() / 8;
999 for (size_t i = 0; i < words64; ++i)
1000 dataPtr[i] = 0xCAFEBABE0000ull + i;
1001 region.flush();
1002 uint64_t devPtr = reinterpret_cast<uint64_t>(region.getDevicePtr());
1003 auto start = std::chrono::high_resolution_clock::now();
1004
1005 cmdMMIO->write(0x10, devPtr);
1006 cmdMMIO->write(0x18, xferCount);
1007 cmdMMIO->write(0x20, 1);
1008
1009 bool timeout = true;
1010 for (int wait = 0; wait < 100000; ++wait) {
1011 uint64_t respNow = respPort->readInt();
1012 if (respNow == xferCount) {
1013 timeout = false;
1014 break;
1015 }
1016 std::this_thread::sleep_for(std::chrono::microseconds(50));
1017 }
1018 if (timeout)
1019 throw std::runtime_error("hostmem read bandwidth timeout");
1020 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
1021 std::chrono::high_resolution_clock::now() - start);
1022 double bytesPerSec =
1023 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
1024 uint64_t cycles = cycleCntPort->readInt();
1025 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
1026 std::cout << "[ READ] Hostmem bandwidth (" << width
1027 << "): " << formatBandwidth(bytesPerSec) << ", " << xferCount
1028 << " flits in " << duration.count() << " us, " << cycles
1029 << " cycles, " << bytesPerCycle << " bytes/cycle" << std::endl;
1030}
1031
1033 uint32_t xferCount,
1034 const std::vector<uint32_t> &widths, bool read,
1035 bool write) {
1036 auto hostmemSvc = conn->getService<services::HostMem>();
1037 hostmemSvc->start();
1038 auto region = hostmemSvc->allocate(/*size(bytes)=*/1024 * 1024 * 1024,
1039 /*memOpts=*/{.writeable = true});
1040 for (uint32_t w : widths) {
1041 if (write)
1042 hostmemWriteBandwidthTest(conn, acc, *region, w, xferCount);
1043 if (read)
1044 hostmemReadBandwidthTest(conn, acc, *region, w, xferCount);
1045 }
1046}
1047
1049 uint32_t iterations, bool pipeline) {
1050 Logger &logger = conn->getLogger();
1051 auto loopbackChild = accel->getChildren().find(AppID("loopback"));
1052 if (loopbackChild == accel->getChildren().end())
1053 throw std::runtime_error("Loopback test: no 'loopback' child");
1054 auto &ports = loopbackChild->second->getPorts();
1055 auto addIter = ports.find(AppID("add"));
1056 if (addIter == ports.end())
1057 throw std::runtime_error("Loopback test: no 'add' port");
1058
1059 // Use FuncService::Func instead of raw channels.
1060 auto *funcPort = addIter->second.getAs<services::FuncService::Function>();
1061 if (!funcPort)
1062 throw std::runtime_error(
1063 "Loopback test: 'add' port not a FuncService::Function");
1064 funcPort->connect();
1065 if (iterations == 0) {
1066 logger.info("esitester", "Loopback add test: 0 iterations (skipped)");
1067 return;
1068 }
1069 std::mt19937_64 rng(0xC0FFEE);
1070 std::uniform_int_distribution<uint32_t> dist(0, (1u << 24) - 1);
1071
1072 if (!pipeline) {
1073 auto start = std::chrono::high_resolution_clock::now();
1074 for (uint32_t i = 0; i < iterations; ++i) {
1075 uint32_t argVal = dist(rng);
1076 uint32_t expected = (argVal + 11) & 0xFFFF;
1077 uint8_t argBytes[3] = {
1078 static_cast<uint8_t>(argVal & 0xFF),
1079 static_cast<uint8_t>((argVal >> 8) & 0xFF),
1080 static_cast<uint8_t>((argVal >> 16) & 0xFF),
1081 };
1082 MessageData argMsg(argBytes, 3);
1083 MessageData resMsg = funcPort->call(argMsg).get();
1084 uint16_t got = *resMsg.as<uint16_t>();
1085 std::cout << "[loopback] i=" << i << " arg=0x" << esi::toHex(argVal)
1086 << " got=0x" << esi::toHex(got) << " exp=0x"
1087 << esi::toHex(expected) << std::endl;
1088 if (got != expected)
1089 throw std::runtime_error("Loopback mismatch (non-pipelined)");
1090 }
1091 auto end = std::chrono::high_resolution_clock::now();
1092 auto us = std::chrono::duration_cast<std::chrono::microseconds>(end - start)
1093 .count();
1094 double callsPerSec = (double)iterations * 1e6 / (double)us;
1095 logger.info("esitester", "Loopback add test passed (non-pipelined, " +
1096 std::to_string(iterations) + " calls, " +
1097 std::to_string(us) + " us, " +
1098 std::to_string(callsPerSec) + " calls/s)");
1099 } else {
1100 // Pipelined mode: launch all calls first, then collect.
1101 std::vector<std::future<MessageData>> futures;
1102 futures.reserve(iterations);
1103 std::vector<uint32_t> expectedVals;
1104 expectedVals.reserve(iterations);
1105
1106 auto issueStart = std::chrono::high_resolution_clock::now();
1107 for (uint32_t i = 0; i < iterations; ++i) {
1108 uint32_t argVal = dist(rng);
1109 uint32_t expected = (argVal + 11) & 0xFFFF;
1110 uint8_t argBytes[3] = {
1111 static_cast<uint8_t>(argVal & 0xFF),
1112 static_cast<uint8_t>((argVal >> 8) & 0xFF),
1113 static_cast<uint8_t>((argVal >> 16) & 0xFF),
1114 };
1115 futures.emplace_back(funcPort->call(MessageData(argBytes, 3)));
1116 expectedVals.emplace_back(expected);
1117 }
1118 auto issueEnd = std::chrono::high_resolution_clock::now();
1119
1120 for (uint32_t i = 0; i < iterations; ++i) {
1121 MessageData resMsg = futures[i].get();
1122 uint16_t got = *resMsg.as<uint16_t>();
1123 uint16_t exp = (uint16_t)expectedVals[i];
1124 std::cout << "[loopback-pipelined] i=" << i << " got=0x"
1125 << esi::toHex(got) << " exp=0x" << esi::toHex(exp) << std::endl;
1126 if (got != exp)
1127 throw std::runtime_error("Loopback mismatch (pipelined) idx=" +
1128 std::to_string(i));
1129 }
1130 auto collectEnd = std::chrono::high_resolution_clock::now();
1131
1132 auto issueUs = std::chrono::duration_cast<std::chrono::microseconds>(
1133 issueEnd - issueStart)
1134 .count();
1135 auto totalUs = std::chrono::duration_cast<std::chrono::microseconds>(
1136 collectEnd - issueStart)
1137 .count();
1138
1139 double issueRate = (double)iterations * 1e6 / (double)issueUs;
1140 double completionRate = (double)iterations * 1e6 / (double)totalUs;
1141
1142 logger.info("esitester", "Loopback add test passed (pipelined). Issued " +
1143 std::to_string(iterations) + " in " +
1144 std::to_string(issueUs) + " us (" +
1145 std::to_string(issueRate) +
1146 " calls/s), total " + std::to_string(totalUs) +
1147 " us (" + std::to_string(completionRate) +
1148 " calls/s effective)");
1149 }
1150}
1151
1152// Exercise the design reset feature using existing telemetry. Run a hostmem
1153// write operation on the 'writemem' module, which increments its
1154// 'addrCmdResponses' telemetry counter. Confirm the telemetry advanced,
1155// request a design reset, then confirm the telemetry has been cleared back to
1156// zero (the counters live in the user design which the reset clears).
1157static void resetTest(AcceleratorConnection *conn, Accelerator *accel) {
1158 Logger &logger = conn->getLogger();
1159 constexpr uint32_t width = 64;
1160
1161 // Run an existing test that increments telemetry. The hostmem write test
1162 // bumps the writemem module's 'addrCmdResponses' counter.
1163 hostmemTest(conn, accel, {width}, /*write=*/true, /*read=*/false);
1164
1165 // Grab the writemem module's response telemetry counter to observe the reset.
1166 auto writeMemChildIter = accel->getChildren().find(AppID("writemem", width));
1167 if (writeMemChildIter == accel->getChildren().end())
1168 throw std::runtime_error("Reset test: no 'writemem' child");
1169 auto &ports = writeMemChildIter->second->getPorts();
1170 auto respIter = ports.find(AppID("addrCmdResponses"));
1171 if (respIter == ports.end())
1172 throw std::runtime_error(
1173 "Reset test: no 'addrCmdResponses' telemetry port");
1174 auto *respMetric =
1175 respIter->second.getAs<services::TelemetryService::Metric>();
1176 if (!respMetric)
1177 throw std::runtime_error("Reset test: 'addrCmdResponses' not telemetry");
1178 respMetric->connect();
1179
1180 uint64_t before = respMetric->readInt();
1181 std::cout << "[reset] telemetry addrCmdResponses before reset = " << before
1182 << std::endl;
1183 if (before == 0)
1184 throw std::runtime_error(
1185 "Reset test: telemetry was not incremented by the hostmem write");
1186
1187 // Request a design reset.
1188 logger.info("esitester", "Requesting design reset");
1189 if (!conn->reset())
1190 throw std::runtime_error("Reset test: reset() reported failure");
1191 std::cout << "[reset] reset requested" << std::endl;
1192
1193 // The reset is asserted a fixed number of cycles after the request (to let
1194 // in-flight transactions drain), so poll the telemetry until it clears.
1195 uint64_t after = before;
1196 constexpr int maxPolls = 1000000;
1197 for (int polls = 0; polls < maxPolls; ++polls) {
1198 after = respMetric->readInt();
1199 if (after == 0)
1200 break;
1201 std::this_thread::sleep_for(std::chrono::microseconds(1));
1202 }
1203 std::cout << "[reset] telemetry addrCmdResponses after reset = " << after
1204 << std::endl;
1205 if (after != 0)
1206 throw std::runtime_error(
1207 "Reset test: telemetry was not cleared by the reset (got " +
1208 std::to_string(after) + ")");
1209
1210 std::cout << "Reset test passed" << std::endl;
1211}
1212
1214 Accelerator *acc, uint32_t width,
1215 uint32_t xferCount, bool read,
1216 bool write) {
1217 Logger &logger = conn->getLogger();
1218 if (!read && !write) {
1219 std::cout << "aggbandwidth: nothing to do (enable --read and/or --write)\n";
1220 return;
1221 }
1222 logger.info(
1223 "esitester",
1224 "Aggregate hostmem bandwidth start width=" + std::to_string(width) +
1225 " count=" + std::to_string(xferCount) +
1226 " read=" + (read ? "Y" : "N") + " write=" + (write ? "Y" : "N"));
1227
1228 auto hostmemSvc = conn->getService<services::HostMem>();
1229 hostmemSvc->start();
1230
1231 struct Unit {
1232 std::string prefix;
1233 bool isRead = false;
1234 bool isWrite = false;
1235 std::unique_ptr<esi::services::HostMem::HostMemRegion> region;
1236 services::TelemetryService::Metric *resp = nullptr;
1237 services::TelemetryService::Metric *cycles = nullptr;
1238 services::MMIO::MMIORegion *cmd = nullptr;
1239 bool launched = false;
1240 bool done = false;
1241 uint64_t bytes = 0;
1242 uint64_t duration_us = 0;
1243 uint64_t cycleCount = 0;
1244 std::chrono::high_resolution_clock::time_point start;
1245 };
1246 std::vector<Unit> units;
1247 const std::vector<std::string> readPrefixes = {"readmem", "readmem_0",
1248 "readmem_1", "readmem_2"};
1249 const std::vector<std::string> writePrefixes = {"writemem", "writemem_0",
1250 "writemem_1", "writemem_2"};
1251
1252 auto addUnits = [&](const std::vector<std::string> &pref, bool doRead,
1253 bool doWrite) {
1254 for (auto &p : pref) {
1255 AppID id(p, width);
1256 auto childIt = acc->getChildren().find(id);
1257 if (childIt == acc->getChildren().end())
1258 continue; // silently skip missing variants
1259 auto &ports = childIt->second->getPorts();
1260 auto cmdIt = ports.find(AppID("cmd", width));
1261 auto respIt = ports.find(AppID("addrCmdResponses"));
1262 auto cycIt = ports.find(AppID("addrCmdCycles"));
1263 if (cmdIt == ports.end() || respIt == ports.end() || cycIt == ports.end())
1264 continue;
1265 auto *cmd = cmdIt->second.getAs<services::MMIO::MMIORegion>();
1266 auto *resp = respIt->second.getAs<services::TelemetryService::Metric>();
1267 auto *cyc = cycIt->second.getAs<services::TelemetryService::Metric>();
1268 if (!cmd || !resp || !cyc)
1269 continue;
1270 resp->connect();
1271 cyc->connect();
1272 Unit u;
1273 u.prefix = p;
1274 u.isRead = doRead;
1275 u.isWrite = doWrite;
1276 u.region = hostmemSvc->allocate(1024 * 1024 * 1024, {.writeable = true});
1277 // Init pattern.
1278 uint64_t *ptr = static_cast<uint64_t *>(u.region->getPtr());
1279 size_t words = u.region->getSize() / 8;
1280 for (size_t i = 0; i < words; ++i)
1281 ptr[i] =
1282 (p[0] == 'w' ? (0xA5A500000000ull + i) : (0xCAFEBABE0000ull + i));
1283 u.region->flush();
1284 u.cmd = cmd;
1285 u.resp = resp;
1286 u.cycles = cyc;
1287 u.bytes = uint64_t(xferCount) * (width / 8);
1288 units.emplace_back(std::move(u));
1289 }
1290 };
1291 if (read)
1292 addUnits(readPrefixes, true, false);
1293 if (write)
1294 addUnits(writePrefixes, false, true);
1295 if (units.empty()) {
1296 std::cout << "aggbandwidth: no matching units present for width " << width
1297 << "\n";
1298 return;
1299 }
1300
1301 auto wallStart = std::chrono::high_resolution_clock::now();
1302 // Launch sequentially.
1303 for (auto &u : units) {
1304 uint64_t devPtr = reinterpret_cast<uint64_t>(u.region->getDevicePtr());
1305 u.cmd->write(0x10, devPtr);
1306 u.cmd->write(0x18, xferCount);
1307 u.cmd->write(0x20, 1);
1308 u.start = std::chrono::high_resolution_clock::now();
1309 u.launched = true;
1310 }
1311
1312 // Poll all until complete.
1313 const uint64_t timeoutLoops = 200000; // ~10s at 50us sleep
1314 uint64_t loops = 0;
1315 while (true) {
1316 bool allDone = true;
1317 for (auto &u : units) {
1318 if (u.done)
1319 continue;
1320 if (u.resp->readInt() == xferCount) {
1321 auto end = std::chrono::high_resolution_clock::now();
1322 u.duration_us =
1323 std::chrono::duration_cast<std::chrono::microseconds>(end - u.start)
1324 .count();
1325 u.cycleCount = u.cycles->readInt();
1326 u.done = true;
1327 } else {
1328 allDone = false;
1329 }
1330 }
1331 if (allDone)
1332 break;
1333 if (++loops >= timeoutLoops)
1334 throw std::runtime_error("aggbandwidth: timeout");
1335 std::this_thread::sleep_for(std::chrono::microseconds(50));
1336 }
1337 auto wallUs = std::chrono::duration_cast<std::chrono::microseconds>(
1338 std::chrono::high_resolution_clock::now() - wallStart)
1339 .count();
1340
1341 uint64_t totalBytes = 0;
1342 uint64_t totalReadBytes = 0;
1343 uint64_t totalWriteBytes = 0;
1344 for (auto &u : units) {
1345 totalBytes += u.bytes;
1346 if (u.isRead)
1347 totalReadBytes += u.bytes;
1348 if (u.isWrite)
1349 totalWriteBytes += u.bytes;
1350 double unitBps = (double)u.bytes * 1e6 / (double)u.duration_us;
1351 std::cout << "[agg-unit] " << u.prefix << "[" << width << "] "
1352 << (u.isRead ? "READ" : (u.isWrite ? "WRITE" : "UNK"))
1353 << " bytes=" << humanBytes(u.bytes) << " (" << u.bytes << " B)"
1354 << " time=" << humanTimeUS(u.duration_us) << " (" << u.duration_us
1355 << " us) cycles=" << u.cycleCount
1356 << " throughput=" << formatBandwidth(unitBps) << std::endl;
1357 }
1358 // Compute aggregate bandwidths as total size / total wall time (not sum of
1359 // unit throughputs).
1360 double aggReadBps =
1361 totalReadBytes ? (double)totalReadBytes * 1e6 / (double)wallUs : 0.0;
1362 double aggWriteBps =
1363 totalWriteBytes ? (double)totalWriteBytes * 1e6 / (double)wallUs : 0.0;
1364 double aggCombinedBps =
1365 totalBytes ? (double)totalBytes * 1e6 / (double)wallUs : 0.0;
1366
1367 std::cout << "[agg-total] units=" << units.size()
1368 << " read_bytes=" << humanBytes(totalReadBytes) << " ("
1369 << totalReadBytes << " B)"
1370 << " read_bw=" << formatBandwidth(aggReadBps)
1371 << " write_bytes=" << humanBytes(totalWriteBytes) << " ("
1372 << totalWriteBytes << " B)"
1373 << " write_bw=" << formatBandwidth(aggWriteBps)
1374 << " combined_bytes=" << humanBytes(totalBytes) << " ("
1375 << totalBytes << " B)"
1376 << " combined_bw=" << formatBandwidth(aggCombinedBps)
1377 << " wall_time=" << humanTimeUS(wallUs) << " (" << wallUs << " us)"
1378 << std::endl;
1379 logger.info("esitester", "Aggregate hostmem bandwidth test complete");
1380}
1381
1382/// Packed struct representing a parallel window argument for StreamingAdder.
1383/// Layout in SystemVerilog (so it must be reversed in C):
1384/// { add_amt: UInt(32), input: UInt(32), last: UInt(8) }
1385#pragma pack(push, 1)
1387 uint8_t last;
1388 uint32_t input;
1389 uint32_t addAmt;
1390};
1391#pragma pack(pop)
1392static_assert(sizeof(StreamingAddArg) == 9,
1393 "StreamingAddArg must be 9 bytes packed");
1394
1395/// Packed struct representing a parallel window result for StreamingAdder.
1396/// Layout in SystemVerilog (so it must be reversed in C):
1397/// { data: UInt(32), last: UInt(8) }
1398#pragma pack(push, 1)
1400 uint8_t last;
1401 uint32_t data;
1402};
1403#pragma pack(pop)
1404static_assert(sizeof(StreamingAddResult) == 5,
1405 "StreamingAddResult must be 5 bytes packed");
1406
1407/// Test the StreamingAdder module. This module takes a struct containing
1408/// an add_amt and a list of uint32s, adds add_amt to each element, and
1409/// returns the resulting list. The data is streamed using windowed types.
1411 uint32_t addAmt, uint32_t numItems) {
1412 Logger &logger = conn->getLogger();
1413 logger.info("esitester", "Starting streaming add test with add_amt=" +
1414 std::to_string(addAmt) +
1415 ", num_items=" + std::to_string(numItems));
1416
1417 // Generate random input data.
1418 std::mt19937 rng(0xDEADBEEF);
1419 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1420 std::vector<uint32_t> inputData;
1421 inputData.reserve(numItems);
1422 for (uint32_t i = 0; i < numItems; ++i)
1423 inputData.push_back(dist(rng));
1424
1425 // Find the streaming_adder child.
1426 auto streamingAdderChild =
1427 accel->getChildren().find(AppID("streaming_adder"));
1428 if (streamingAdderChild == accel->getChildren().end())
1429 throw std::runtime_error(
1430 "Streaming add test: no 'streaming_adder' child found");
1431
1432 auto &ports = streamingAdderChild->second->getPorts();
1433 auto addIter = ports.find(AppID("streaming_add"));
1434 if (addIter == ports.end())
1435 throw std::runtime_error(
1436 "Streaming add test: no 'streaming_add' port found");
1437
1438 // Get the raw read/write channel ports for the windowed function.
1439 // The argument channel expects parallel windowed data where each message
1440 // contains: struct { add_amt: UInt(32), input: UInt(32), last: bool }
1441 WriteChannelPort &argPort = addIter->second.getRawWrite("arg");
1442 ReadChannelPort &resultPort = addIter->second.getRawRead("result");
1443
1444 argPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
1445 resultPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
1446
1447 // Send each list element with add_amt repeated in every message.
1448 for (size_t i = 0; i < inputData.size(); ++i) {
1449 StreamingAddArg arg;
1450 arg.addAmt = addAmt;
1451 arg.input = inputData[i];
1452 arg.last = (i == inputData.size() - 1) ? 1 : 0;
1453 argPort.write(
1454 MessageData(reinterpret_cast<const uint8_t *>(&arg), sizeof(arg)));
1455 logger.debug("esitester", "Sent {add_amt=" + std::to_string(arg.addAmt) +
1456 ", input=" + std::to_string(arg.input) +
1457 ", last=" + (arg.last ? "true" : "false") +
1458 "}");
1459 }
1460
1461 // Read the result list (also windowed).
1462 std::vector<uint32_t> results;
1463 bool lastSeen = false;
1464 while (!lastSeen) {
1465 MessageData resMsg;
1466 resultPort.read(resMsg);
1467 if (resMsg.getSize() < sizeof(StreamingAddResult))
1468 throw std::runtime_error(
1469 "Streaming add test: unexpected result message size");
1470
1471 const auto *res =
1472 reinterpret_cast<const StreamingAddResult *>(resMsg.getBytes());
1473 lastSeen = res->last != 0;
1474 results.push_back(res->data);
1475 logger.debug("esitester", "Received result=" + std::to_string(res->data) +
1476 " (last=" + (lastSeen ? "true" : "false") +
1477 ")");
1478 }
1479
1480 // Verify results.
1481 if (results.size() != inputData.size())
1482 throw std::runtime_error(
1483 "Streaming add test: result size mismatch. Expected " +
1484 std::to_string(inputData.size()) + ", got " +
1485 std::to_string(results.size()));
1486
1487 bool passed = true;
1488 std::cout << "Streaming add test results:" << std::endl;
1489 for (size_t i = 0; i < inputData.size(); ++i) {
1490 uint32_t expected = inputData[i] + addAmt;
1491 std::cout << " input[" << i << "]=" << inputData[i] << " + " << addAmt
1492 << " = " << results[i] << " (expected " << expected << ")";
1493 if (results[i] != expected) {
1494 std::cout << " MISMATCH!";
1495 passed = false;
1496 }
1497 std::cout << std::endl;
1498 }
1499
1500 argPort.disconnect();
1501 resultPort.disconnect();
1502
1503 if (!passed)
1504 throw std::runtime_error("Streaming add test failed: result mismatch");
1505
1506 logger.info("esitester", "Streaming add test passed");
1507 std::cout << "Streaming add test passed" << std::endl;
1508}
1509
1510/// Test the StreamingAdder module using message translation.
1511/// This version uses the list translation support where the message format is:
1512/// Argument: { add_amt (4 bytes), input_length (8 bytes), input_data[] }
1513/// Result: { data_length (8 bytes), data[] }
1514/// The translation layer automatically converts between this format and the
1515/// parallel windowed frames used by the hardware.
1516
1517/// Translated argument struct for StreamingAdder.
1518/// Memory layout (standard C struct ordering, fields in declaration order):
1519/// ESI type: struct { add_amt: UInt(32), input: List<UInt(32)> }
1520/// becomes host struct:
1521/// { input_length (size_t, 8 bytes on 64-bit), add_amt (uint32_t),
1522/// input_data[] }
1523/// Note: The translation layer handles the conversion between this C struct
1524/// layout and the hardware's SystemVerilog frame format.
1525/// Note: size_t is used for list lengths, so this format is platform-dependent.
1526#pragma pack(push, 1)
1529 uint32_t addAmt;
1530 // Trailing array data follows immediately after the struct in memory.
1531 // Use inputData() accessor to access it.
1532
1533 /// Get pointer to trailing input data array.
1534 uint32_t *inputData() { return reinterpret_cast<uint32_t *>(this + 1); }
1535 const uint32_t *inputData() const {
1536 return reinterpret_cast<const uint32_t *>(this + 1);
1537 }
1538 /// Get span view of input data (requires inputLength to be set first).
1539 std::span<uint32_t> inputDataSpan() { return {inputData(), inputLength}; }
1540 std::span<const uint32_t> inputDataSpan() const {
1541 return {inputData(), inputLength};
1542 }
1543
1544 static size_t allocSize(size_t numItems) {
1545 return sizeof(StreamingAddTranslatedArg) + numItems * sizeof(uint32_t);
1546 }
1547};
1548#pragma pack(pop)
1549
1550/// Translated result struct for StreamingAdder.
1551/// Memory layout:
1552/// struct { data: List<UInt(32)> }
1553/// becomes:
1554/// { data_length (size_t, 8 bytes on 64-bit), data[] }
1555#pragma pack(push, 1)
1558 // Trailing array data follows immediately after the struct in memory.
1559
1560 /// Get pointer to trailing result data array.
1561 uint32_t *data() { return reinterpret_cast<uint32_t *>(this + 1); }
1562 const uint32_t *data() const {
1563 return reinterpret_cast<const uint32_t *>(this + 1);
1564 }
1565 /// Get span view of result data (requires dataLength to be set first).
1566 std::span<uint32_t> dataSpan() { return {data(), dataLength}; }
1567 std::span<const uint32_t> dataSpan() const { return {data(), dataLength}; }
1568
1569 static size_t allocSize(size_t numItems) {
1570 return sizeof(StreamingAddTranslatedResult) + numItems * sizeof(uint32_t);
1571 }
1572};
1573#pragma pack(pop)
1574
1576 Accelerator *accel, uint32_t addAmt,
1577 uint32_t numItems) {
1578 Logger &logger = conn->getLogger();
1579 logger.info("esitester",
1580 "Starting streaming add test (translated) with add_amt=" +
1581 std::to_string(addAmt) +
1582 ", num_items=" + std::to_string(numItems));
1583
1584 // Generate random input data.
1585 std::mt19937 rng(0xDEADBEEF);
1586 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1587 std::vector<uint32_t> inputData;
1588 inputData.reserve(numItems);
1589 for (uint32_t i = 0; i < numItems; ++i)
1590 inputData.push_back(dist(rng));
1591
1592 // Find the streaming_adder child.
1593 auto streamingAdderChild =
1594 accel->getChildren().find(AppID("streaming_adder"));
1595 if (streamingAdderChild == accel->getChildren().end())
1596 throw std::runtime_error(
1597 "Streaming add test: no 'streaming_adder' child found");
1598
1599 auto &ports = streamingAdderChild->second->getPorts();
1600 auto addIter = ports.find(AppID("streaming_add"));
1601 if (addIter == ports.end())
1602 throw std::runtime_error(
1603 "Streaming add test: no 'streaming_add' port found");
1604
1605 // Get the raw read/write channel ports with translation enabled (default).
1606 WriteChannelPort &argPort = addIter->second.getRawWrite("arg");
1607 ReadChannelPort &resultPort = addIter->second.getRawRead("result");
1608
1609 // Connect with translation enabled (the default).
1610 argPort.connect();
1611 resultPort.connect();
1612
1613 // Allocate the argument struct with proper alignment for the struct members.
1614 // We use aligned_alloc to ensure the buffer meets alignment requirements.
1615 size_t argSize = StreamingAddTranslatedArg::allocSize(numItems);
1616 constexpr size_t alignment = alignof(StreamingAddTranslatedArg);
1617 // aligned_alloc requires size to be a multiple of alignment
1618 size_t allocSize = ((argSize + alignment - 1) / alignment) * alignment;
1619 void *argRaw = alignedAllocCompat(alignment, allocSize);
1620 if (!argRaw)
1621 throw std::bad_alloc();
1622 auto argDeleter = [](void *p) { alignedFreeCompat(p); };
1623 std::unique_ptr<void, decltype(argDeleter)> argBuffer(argRaw, argDeleter);
1624 auto *arg = static_cast<StreamingAddTranslatedArg *>(argRaw);
1625 arg->inputLength = numItems;
1626 arg->addAmt = addAmt;
1627 for (uint32_t i = 0; i < numItems; ++i)
1628 arg->inputData()[i] = inputData[i];
1629
1630 logger.debug("esitester",
1631 "Sending translated argument: " + std::to_string(argSize) +
1632 " bytes, list_length=" + std::to_string(arg->inputLength) +
1633 ", add_amt=" + std::to_string(arg->addAmt));
1634
1635 // Send the complete message - translation will split it into frames.
1636 argPort.write(MessageData(reinterpret_cast<const uint8_t *>(arg), argSize));
1637 // argBuffer automatically freed when it goes out of scope
1638
1639 // Read the translated result.
1640 MessageData resMsg;
1641 resultPort.read(resMsg);
1642
1643 logger.debug("esitester", "Received translated result: " +
1644 std::to_string(resMsg.getSize()) + " bytes");
1645
1646 if (resMsg.getSize() < sizeof(StreamingAddTranslatedResult))
1647 throw std::runtime_error(
1648 "Streaming add test (translated): result too small");
1649
1650 const auto *result =
1651 reinterpret_cast<const StreamingAddTranslatedResult *>(resMsg.getBytes());
1652
1653 if (resMsg.getSize() <
1654 StreamingAddTranslatedResult::allocSize(result->dataLength))
1655 throw std::runtime_error(
1656 "Streaming add test (translated): result data truncated");
1657
1658 // Verify results.
1659 if (result->dataLength != inputData.size())
1660 throw std::runtime_error(
1661 "Streaming add test (translated): result size mismatch. Expected " +
1662 std::to_string(inputData.size()) + ", got " +
1663 std::to_string(result->dataLength));
1664
1665 bool passed = true;
1666 std::cout << "Streaming add test results:" << std::endl;
1667 for (size_t i = 0; i < inputData.size(); ++i) {
1668 uint32_t expected = inputData[i] + addAmt;
1669 std::cout << " input[" << i << "]=" << inputData[i] << " + " << addAmt
1670 << " = " << result->data()[i] << " (expected " << expected << ")";
1671 if (result->data()[i] != expected) {
1672 std::cout << " MISMATCH!";
1673 passed = false;
1674 }
1675 std::cout << std::endl;
1676 }
1677
1678 argPort.disconnect();
1679 resultPort.disconnect();
1680
1681 if (!passed)
1682 throw std::runtime_error(
1683 "Streaming add test (translated) failed: result mismatch");
1684
1685 logger.info("esitester", "Streaming add test passed (translated)");
1686 std::cout << "Streaming add test passed" << std::endl;
1687}
1688
1689/// Test the CoordTranslator module using message translation.
1690/// This version uses the list translation support where the message format is:
1691/// Argument: { x_translation, y_translation, coords_length, coords[] }
1692/// Result: { coords_length, coords[] }
1693/// Each coord is a struct { x, y }.
1694
1695/// Coordinate struct for CoordTranslator.
1696/// SV ordering means y comes before x in memory.
1697#pragma pack(push, 1)
1698struct Coord {
1699 uint32_t y; // SV ordering: last declared field first in memory
1700 uint32_t x;
1701};
1702#pragma pack(pop)
1703static_assert(sizeof(Coord) == 8, "Coord must be 8 bytes packed");
1704
1705/// Translated argument struct for CoordTranslator.
1706/// Memory layout (standard C struct ordering):
1707/// ESI type: struct { x_translation: UInt(32), y_translation: UInt(32),
1708/// coords: List<struct{x, y}> }
1709/// becomes host struct:
1710/// { coords_length (size_t, 8 bytes on 64-bit), y_translation (uint32_t),
1711/// x_translation (uint32_t), coords[] }
1712/// Note: Fields are in reverse order due to SV struct ordering.
1713/// Note: size_t is used for list lengths, so this format is platform-dependent.
1714#pragma pack(push, 1)
1717 uint32_t yTranslation; // SV ordering: last declared field first in memory
1719 // Trailing array data follows immediately after the struct in memory.
1720
1721 /// Get pointer to trailing coords array.
1722 Coord *coords() { return reinterpret_cast<Coord *>(this + 1); }
1723 const Coord *coords() const {
1724 return reinterpret_cast<const Coord *>(this + 1);
1725 }
1726 /// Get span view of coords (requires coordsLength to be set first).
1727 std::span<Coord> coordsSpan() { return {coords(), coordsLength}; }
1728 std::span<const Coord> coordsSpan() const { return {coords(), coordsLength}; }
1729
1730 static size_t allocSize(size_t numCoords) {
1731 return sizeof(CoordTranslateArg) + numCoords * sizeof(Coord);
1732 }
1733};
1734#pragma pack(pop)
1735
1736/// Translated result struct for CoordTranslator.
1737/// Memory layout:
1738/// ESI type: List<struct{x, y}>
1739/// becomes host struct:
1740/// { coords_length (size_t, 8 bytes on 64-bit), coords[] }
1741#pragma pack(push, 1)
1744 // Trailing array data follows immediately after the struct in memory.
1745
1746 /// Get pointer to trailing coords array.
1747 Coord *coords() { return reinterpret_cast<Coord *>(this + 1); }
1748 const Coord *coords() const {
1749 return reinterpret_cast<const Coord *>(this + 1);
1750 }
1751 /// Get span view of coords (requires coordsLength to be set first).
1752 std::span<Coord> coordsSpan() { return {coords(), coordsLength}; }
1753 std::span<const Coord> coordsSpan() const { return {coords(), coordsLength}; }
1754
1755 static size_t allocSize(size_t numCoords) {
1756 return sizeof(CoordTranslateResult) + numCoords * sizeof(Coord);
1757 }
1758};
1759#pragma pack(pop)
1760
1762 uint32_t xTrans, uint32_t yTrans,
1763 uint32_t numCoords) {
1764 Logger &logger = conn->getLogger();
1765 logger.info("esitester", "Starting coord translate test with x_trans=" +
1766 std::to_string(xTrans) +
1767 ", y_trans=" + std::to_string(yTrans) +
1768 ", num_coords=" + std::to_string(numCoords));
1769
1770 // Generate random input coordinates.
1771 // Note: Coord struct has y before x due to SV ordering, but we generate
1772 // and display as (x, y) for human readability.
1773 std::mt19937 rng(0xDEADBEEF);
1774 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1775 std::vector<Coord> inputCoords;
1776 inputCoords.reserve(numCoords);
1777 for (uint32_t i = 0; i < numCoords; ++i) {
1778 Coord c;
1779 c.x = dist(rng);
1780 c.y = dist(rng);
1781 inputCoords.push_back(c);
1782 }
1783
1784 // Find the coord_translator child.
1785 auto coordTranslatorChild =
1786 accel->getChildren().find(AppID("coord_translator"));
1787 if (coordTranslatorChild == accel->getChildren().end())
1788 throw std::runtime_error(
1789 "Coord translate test: no 'coord_translator' child found");
1790
1791 auto &ports = coordTranslatorChild->second->getPorts();
1792 auto translateIter = ports.find(AppID("translate_coords"));
1793 if (translateIter == ports.end())
1794 throw std::runtime_error(
1795 "Coord translate test: no 'translate_coords' port found");
1796
1797 // Use FuncService::Function which handles connection and translation.
1798 auto *funcPort =
1799 translateIter->second.getAs<services::FuncService::Function>();
1800 if (!funcPort)
1801 throw std::runtime_error(
1802 "Coord translate test: 'translate_coords' port not a "
1803 "FuncService::Function");
1804 funcPort->connect();
1805
1806 // Allocate the argument struct with proper alignment for the struct members.
1807 size_t argSize = CoordTranslateArg::allocSize(numCoords);
1808 constexpr size_t alignment = alignof(CoordTranslateArg);
1809 // aligned_alloc requires size to be a multiple of alignment
1810 size_t allocSize = ((argSize + alignment - 1) / alignment) * alignment;
1811 void *argRaw = alignedAllocCompat(alignment, allocSize);
1812 if (!argRaw)
1813 throw std::bad_alloc();
1814 auto argDeleter = [](void *p) { alignedFreeCompat(p); };
1815 std::unique_ptr<void, decltype(argDeleter)> argBuffer(argRaw, argDeleter);
1816 auto *arg = static_cast<CoordTranslateArg *>(argRaw);
1817 arg->coordsLength = numCoords;
1818 arg->xTranslation = xTrans;
1819 arg->yTranslation = yTrans;
1820 for (uint32_t i = 0; i < numCoords; ++i)
1821 arg->coords()[i] = inputCoords[i];
1822
1823 logger.debug(
1824 "esitester",
1825 "Sending coord translate argument: " + std::to_string(argSize) +
1826 " bytes, coords_length=" + std::to_string(arg->coordsLength) +
1827 ", x_trans=" + std::to_string(arg->xTranslation) +
1828 ", y_trans=" + std::to_string(arg->yTranslation));
1829
1830 // Call the function - translation happens automatically.
1831 MessageData resMsg =
1832 funcPort
1833 ->call(MessageData(reinterpret_cast<const uint8_t *>(arg), argSize))
1834 .get();
1835 // argBuffer automatically freed when it goes out of scope
1836
1837 logger.debug("esitester", "Received coord translate result: " +
1838 std::to_string(resMsg.getSize()) + " bytes");
1839
1840 if (resMsg.getSize() < sizeof(CoordTranslateResult))
1841 throw std::runtime_error("Coord translate test: result too small");
1842
1843 const auto *result =
1844 reinterpret_cast<const CoordTranslateResult *>(resMsg.getBytes());
1845
1846 if (resMsg.getSize() < CoordTranslateResult::allocSize(result->coordsLength))
1847 throw std::runtime_error("Coord translate test: result data truncated");
1848
1849 // Verify results.
1850 if (result->coordsLength != inputCoords.size())
1851 throw std::runtime_error(
1852 "Coord translate test: result size mismatch. Expected " +
1853 std::to_string(inputCoords.size()) + ", got " +
1854 std::to_string(result->coordsLength));
1855
1856 bool passed = true;
1857 std::cout << "Coord translate test results:" << std::endl;
1858 for (size_t i = 0; i < inputCoords.size(); ++i) {
1859 uint32_t expectedX = inputCoords[i].x + xTrans;
1860 uint32_t expectedY = inputCoords[i].y + yTrans;
1861 std::cout << " coord[" << i << "]=(" << inputCoords[i].x << ","
1862 << inputCoords[i].y << ") + (" << xTrans << "," << yTrans
1863 << ") = (" << result->coords()[i].x << ","
1864 << result->coords()[i].y << ")";
1865 if (result->coords()[i].x != expectedX ||
1866 result->coords()[i].y != expectedY) {
1867 std::cout << " MISMATCH! (expected (" << expectedX << "," << expectedY
1868 << "))";
1869 passed = false;
1870 }
1871 std::cout << std::endl;
1872 }
1873
1874 if (!passed)
1875 throw std::runtime_error("Coord translate test failed: result mismatch");
1876
1877 logger.info("esitester", "Coord translate test passed");
1878 std::cout << "Coord translate test passed" << std::endl;
1879}
1880
1881//
1882// SerialCoordTranslator test
1883//
1884
1885#pragma pack(push, 1)
1887 uint16_t coordsCount;
1890};
1891static_assert(sizeof(SerialCoordHeader) == 10, "Size mismatch");
1893 SerialCoordData(uint32_t x, uint32_t y) : _pad_head(0), y(y), x(x) {}
1894 uint16_t _pad_head;
1895 uint32_t y;
1896 uint32_t x;
1897};
1898static_assert(sizeof(SerialCoordData) == sizeof(SerialCoordHeader),
1899 "Size mismatch");
1900#pragma pack(pop)
1901
1902// Note: this application is intended to test hardware. As such, we need
1903// to be able to send batches. So this is not the typical way one would define a
1904// message struct. It's closer to a streaming style.
1906private:
1908 std::vector<SerialCoordData> coords;
1910
1911public:
1913 header.coordsCount = 0;
1914 header.xTranslation = 0;
1915 header.yTranslation = 0;
1916 // The footer is a count==0 header that terminates the list per the ESI
1917 // bulk-transfer serial encoding. Static fields are constant within a
1918 // list so the footer's translation values are irrelevant; zero them.
1919 footer.coordsCount = 0;
1920 footer.xTranslation = 0;
1921 footer.yTranslation = 0;
1922 }
1923 void yTranslation(uint32_t yTrans) { header.yTranslation = yTrans; }
1924 uint32_t yTranslation() const { return header.yTranslation; }
1925 void xTranslation(uint32_t xTrans) { header.xTranslation = xTrans; }
1926 uint32_t xTranslation() const { return header.xTranslation; }
1927 void appendCoord(uint32_t x, uint32_t y) {
1928 coords.emplace_back(x, y);
1929 header.coordsCount = (uint16_t)coords.size();
1930 }
1931 const std::vector<SerialCoordData> &getCoords() const { return coords; }
1932
1933 size_t numSegments() const override { return 3; }
1934 Segment segment(size_t idx) const override {
1935 if (idx == 0)
1936 return {reinterpret_cast<const uint8_t *>(&header), sizeof(header)};
1937 else if (idx == 1)
1938 return {reinterpret_cast<const uint8_t *>(coords.data()),
1939 coords.size() * sizeof(SerialCoordData)};
1940 else if (idx == 2)
1941 return {reinterpret_cast<const uint8_t *>(&footer), sizeof(footer)};
1942 else
1943 throw std::out_of_range("SerialCoordInput: invalid segment index");
1944 }
1945};
1946
1947// Like SerialCoordInput but without the trailing count==0 terminator. Used
1948// when streaming multiple bursts that together comprise a single logical
1949// list; the caller is responsible for sending a separate terminator burst
1950// (a SerialCoordBurst with count==0 and no data).
1952private:
1954 std::vector<SerialCoordData> coords;
1955
1956public:
1962 void yTranslation(uint32_t yTrans) { header.yTranslation = yTrans; }
1963 void xTranslation(uint32_t xTrans) { header.xTranslation = xTrans; }
1964 void appendCoord(uint32_t x, uint32_t y) {
1965 coords.emplace_back(x, y);
1966 header.coordsCount = (uint16_t)coords.size();
1967 }
1968
1969 size_t numSegments() const override { return 2; }
1970 Segment segment(size_t idx) const override {
1971 if (idx == 0)
1972 return {reinterpret_cast<const uint8_t *>(&header), sizeof(header)};
1973 else if (idx == 1)
1974 return {reinterpret_cast<const uint8_t *>(coords.data()),
1975 coords.size() * sizeof(SerialCoordData)};
1976 else
1977 throw std::out_of_range("SerialCoordBurst: invalid segment index");
1978 }
1979};
1980
1981#pragma pack(push, 1)
1983 uint8_t _pad[6];
1984 uint16_t coordsCount;
1985};
1987 uint32_t y;
1988 uint32_t x;
1989};
1994#pragma pack(pop)
1995static_assert(sizeof(SerialCoordOutputFrame) == 8, "Size mismatch");
1996
1997/// Deserialized result batch from the serial coord translator. The
1998/// TypeDeserializer accumulates header+data frame sequences until the
1999/// zero-count footer header, then emits the complete coordinate list.
2001 std::vector<Coord> coords;
2002
2004 : public QueuedDecodeTypeDeserializer<SerialCoordOutputBatch> {
2005 public:
2009
2011 : Base(std::move(output)) {}
2012
2013 private:
2014 DecodedOutputs decode(std::unique_ptr<SegmentedMessageData> &msg) override {
2015 DecodedOutputs decoded;
2016
2017 MessageData scratch;
2018 const MessageData &flat =
2019 detail::getMessageDataRef<SerialCoordOutputBatch>(*msg, scratch);
2020 const uint8_t *bytes = flat.getBytes();
2021 size_t size = flat.getSize();
2022 constexpr size_t frameSize = sizeof(SerialCoordOutputFrame);
2023
2024 size_t offset = 0;
2025 while (offset < size) {
2026 size_t needed = frameSize - partialFrameBytes.size();
2027 size_t chunkSize = std::min(needed, size - offset);
2028 partialFrameBytes.insert(partialFrameBytes.end(), bytes + offset,
2029 bytes + offset + chunkSize);
2030 offset += chunkSize;
2031
2032 if (partialFrameBytes.size() != frameSize)
2033 break;
2034
2036 std::memcpy(&frame, partialFrameBytes.data(), frameSize);
2037 partialFrameBytes.clear();
2038
2039 if (remainingCoords == 0) {
2040 // Header frame.
2041 uint16_t batchCount = frame.header.coordsCount;
2042 if (batchCount == 0) {
2043 // Footer: end of list. Emit accumulated coordinates.
2044 auto batch = std::make_unique<SerialCoordOutputBatch>();
2045 batch->coords = std::move(accumulated);
2046 accumulated.clear();
2047 decoded.push_back(std::move(batch));
2048 msg.reset();
2049 return decoded;
2050 }
2051 remainingCoords = batchCount;
2052 continue;
2053 }
2054 // Data frame.
2055 accumulated.push_back({frame.data.y, frame.data.x});
2057 }
2058
2059 msg.reset();
2060 return decoded;
2061 }
2062
2063 std::vector<Coord> accumulated;
2064 std::vector<uint8_t> partialFrameBytes;
2066 };
2067};
2068
2070 Accelerator *accel, uint32_t xTrans,
2071 uint32_t yTrans, uint32_t numCoords,
2072 size_t batchSizeLimit) {
2073 Logger &logger = conn->getLogger();
2074 logger.info("esitester", "Starting Serial coord translate test");
2075
2076 // Generate random coordinates.
2077 std::mt19937 rng(0xDEADBEEF);
2078 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
2079 std::vector<Coord> inputCoords;
2080 inputCoords.reserve(numCoords);
2081 for (uint32_t i = 0; i < numCoords; ++i)
2082 inputCoords.push_back({dist(rng), dist(rng)});
2083
2084 auto child = accel->getChildren().find(AppID("coord_translator_serial"));
2085 if (child == accel->getChildren().end())
2086 throw std::runtime_error("Serial coord translate test: no "
2087 "'coord_translator_serial' child found");
2088
2089 auto &ports = child->second->getPorts();
2090 auto portIter = ports.find(AppID("translate_coords_serial"));
2091 if (portIter == ports.end())
2092 throw std::runtime_error(
2093 "Serial coord translate test: no 'translate_coords_serial' port found");
2094
2095 TypedWritePort<SerialCoordBurst, /*SkipTypeCheck=*/true> argPort(
2096 portIter->second.getRawWrite("arg"));
2097 // Use the raw read port so we can verify the multi-burst output framing
2098 // explicitly rather than relying on the typed deserializer to accumulate
2099 // frames until the terminator.
2100 ReadChannelPort &resultRaw = portIter->second.getRawRead("result");
2101
2102 argPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
2103 // Use an unlimited read queue so the device output isn't stalled by a full
2104 // queue while we're still writing. With raw reads (translateMessage=false),
2105 // each output frame becomes its own queued message, so the default 32-msg
2106 // limit can be hit easily on a multi-burst run.
2107 resultRaw.connect(ChannelPort::ConnectOptions(/*bufferSize=*/0,
2108 /*translateMessage=*/false));
2109
2110 size_t sent = 0;
2111 while (sent < numCoords) {
2112 size_t batchSize = std::min(batchSizeLimit, numCoords - sent);
2113
2114 // Send Header. Only the first header needs the translation values, test
2115 // the subsequent ones with zero translation to verify that the hardware
2116 // correctly applies the first header's translation to the whole list.
2117 auto batch = std::make_unique<SerialCoordBurst>();
2118 batch->xTranslation(sent == 0 ? xTrans : 0);
2119 batch->yTranslation(sent == 0 ? yTrans : 0);
2120 // Send Data
2121 for (size_t i = 0; i < batchSize; ++i) {
2122 batch->appendCoord(inputCoords[sent + i].x, inputCoords[sent + i].y);
2123 }
2124 argPort.write(batch);
2125 sent += batchSize;
2126 }
2127 // Send final header with count=0 to signal end of input.
2128 auto footerBurst = std::make_unique<SerialCoordBurst>();
2129 argPort.write(footerBurst);
2130
2131 // Read raw output frames, walking the bulk-transfer wire format: zero or
2132 // more (HDR(N) + N data frames) sequences followed by a single HDR(0)
2133 // terminator. Each `read()` returns whatever the transport layer has
2134 // available, which is not guaranteed to align with frame boundaries
2135 // (e.g., DMA channel engines may coalesce or split across frames). So
2136 // we accumulate bytes into a buffer and only consume whole frames.
2137 constexpr size_t frameSize = sizeof(SerialCoordOutputFrame);
2138 std::vector<uint8_t> rxBuf;
2139 auto readFrame = [&](SerialCoordOutputFrame &out) {
2140 while (rxBuf.size() < frameSize) {
2141 MessageData data;
2142 resultRaw.read(data);
2143 rxBuf.insert(rxBuf.end(), data.getBytes(),
2144 data.getBytes() + data.getSize());
2145 }
2146 std::memcpy(&out, rxBuf.data(), frameSize);
2147 rxBuf.erase(rxBuf.begin(), rxBuf.begin() + frameSize);
2148 };
2149
2150 std::vector<Coord> results;
2151 results.reserve(numCoords);
2152 while (true) {
2154 readFrame(hdr);
2155 uint16_t batchCount = hdr.header.coordsCount;
2156 if (batchCount == 0)
2157 break;
2158 for (uint16_t i = 0; i < batchCount; ++i) {
2159 SerialCoordOutputFrame frame{};
2160 readFrame(frame);
2161 results.push_back({frame.data.y, frame.data.x});
2162 }
2163 }
2164
2165 // Verify
2166 bool passed = true;
2167 std::cout << "Serial coord translate test results:" << std::endl;
2168 if (results.size() != inputCoords.size()) {
2169 std::cout << "Result size mismatch. Expected " << inputCoords.size()
2170 << ", got " << results.size() << std::endl;
2171 passed = false;
2172 }
2173 for (size_t i = 0; i < std::min(inputCoords.size(), results.size()); ++i) {
2174 uint32_t expX = inputCoords[i].x + xTrans;
2175 uint32_t expY = inputCoords[i].y + yTrans;
2176 std::cout << " coord[" << i << "]=(" << inputCoords[i].x << ","
2177 << inputCoords[i].y << ") + (" << xTrans << "," << yTrans
2178 << ") = (" << results[i].x << "," << results[i].y
2179 << ") (expected (" << expX << "," << expY << "))";
2180 if (results[i].x != expX || results[i].y != expY) {
2181 std::cout << " MISMATCH!";
2182 passed = false;
2183 }
2184 std::cout << std::endl;
2185 }
2186
2187 argPort.disconnect();
2188 resultRaw.disconnect();
2189
2190 if (!passed)
2191 throw std::runtime_error("Serial coord translate test failed");
2192
2193 logger.info("esitester", "Serial coord translate test passed");
2194 std::cout << "Serial coord translate test passed" << std::endl;
2195}
2196
2197//
2198// AutoSerialCoordTranslator test
2199//
2200// The hardware module pipes the input through ListWindowToParallel ->
2201// per-coordinate translation -> ListWindowToSerial. The conversion modules
2202// emit one or more bulk transfers per call (each `header(count>0)` followed
2203// by `count` data frames) terminated by a `header(count==0)` footer per the
2204// ESI WindowField serial-encoding spec. This test:
2205// * Sends exactly one input batch: header(numCoords) + numCoords data
2206// frames + header(0) footer.
2207// * Reads back: a sequence of one-or-more `header(count>0) + count data`
2208// bursts terminated by `header(0)`. Use raw frame reads since the
2209// canonical `SerialCoordOutputBatch` deserializer hasn't been wired in
2210// for the converter pair.
2211//
2213 Accelerator *accel, uint32_t xTrans,
2214 uint32_t yTrans, uint32_t numCoords) {
2215 Logger &logger = conn->getLogger();
2216 logger.info("esitester", "Starting Auto serial coord translate test");
2217
2218 // Generate random coordinates.
2219 std::mt19937 rng(0xDEADBEEF);
2220 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
2221 std::vector<Coord> inputCoords;
2222 inputCoords.reserve(numCoords);
2223 for (uint32_t i = 0; i < numCoords; ++i)
2224 inputCoords.push_back({dist(rng), dist(rng)});
2225
2226 auto child = accel->getChildren().find(AppID("coord_translator_auto_serial"));
2227 if (child == accel->getChildren().end())
2228 throw std::runtime_error("Auto serial coord translate test: no "
2229 "'coord_translator_auto_serial' child found");
2230
2231 auto &ports = child->second->getPorts();
2232 auto portIter = ports.find(AppID("translate_coords_auto_serial"));
2233 if (portIter == ports.end())
2234 throw std::runtime_error("Auto serial coord translate test: no "
2235 "'translate_coords_auto_serial' port found");
2236
2237 // Reuse SerialCoordInput: the input wire format is identical (header with
2238 // x/y_translation+count, followed by data frames each carrying one coord).
2239 TypedWritePort<SerialCoordInput, /*SkipTypeCheck=*/true> argPort(
2240 portIter->second.getRawWrite("arg"));
2241 argPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
2242
2243 // Use the raw read port for results: read one header frame then numCoords
2244 // data frames as raw `SerialCoordOutputFrame`-shaped messages. Disable
2245 // window-message translation so we get one frame per `read()` instead of
2246 // assembled higher-level messages.
2247 ReadChannelPort &resultRaw = portIter->second.getRawRead("result");
2248 // Use an unlimited read queue so the device output isn't stalled by a full
2249 // queue while we're still writing. With raw reads (translateMessage=false),
2250 // each output frame becomes its own queued message, so the default 32-msg
2251 // limit can be hit easily on a multi-frame run.
2252 resultRaw.connect(ChannelPort::ConnectOptions(/*bufferSize=*/0,
2253 /*translateMessage=*/false));
2254
2255 // Send a single header+data burst.
2256 auto batch = std::make_unique<SerialCoordInput>();
2257 batch->xTranslation(xTrans);
2258 batch->yTranslation(yTrans);
2259 for (uint32_t i = 0; i < numCoords; ++i)
2260 batch->appendCoord(inputCoords[i].x, inputCoords[i].y);
2261 argPort.write(batch);
2262
2263 // Helper: read one raw frame, accumulating bytes across `read()` calls
2264 // since transports such as DMA channel engines do not guarantee that
2265 // each `read()` returns exactly one frame.
2266 constexpr size_t frameSize = sizeof(SerialCoordOutputFrame);
2267 std::vector<uint8_t> rxBuf;
2268 auto readFrame = [&](SerialCoordOutputFrame &out) {
2269 while (rxBuf.size() < frameSize) {
2270 MessageData data;
2271 resultRaw.read(data);
2272 rxBuf.insert(rxBuf.end(), data.getBytes(),
2273 data.getBytes() + data.getSize());
2274 }
2275 std::memcpy(&out, rxBuf.data(), frameSize);
2276 rxBuf.erase(rxBuf.begin(), rxBuf.begin() + frameSize);
2277 };
2278
2279 // Read a sequence of one-or-more `header(count>0) + count data` bursts
2280 // followed by a `header(count==0)` terminator footer. Total data items
2281 // received across all bursts must equal numCoords.
2282 std::vector<Coord> results;
2283 results.reserve(numCoords);
2284 while (true) {
2286 readFrame(hdr);
2287 uint16_t burstCount = hdr.header.coordsCount;
2288 if (burstCount == 0)
2289 break;
2290 if (results.size() + burstCount > numCoords)
2291 throw std::runtime_error(
2292 "Auto serial coord translate test: bursts overflow expected total " +
2293 std::to_string(numCoords));
2294 for (uint32_t i = 0; i < burstCount; ++i) {
2295 SerialCoordOutputFrame frame{};
2296 readFrame(frame);
2297 results.push_back({frame.data.y, frame.data.x});
2298 }
2299 }
2300 if (results.size() != numCoords)
2301 throw std::runtime_error("Auto serial coord translate test: got " +
2302 std::to_string(results.size()) +
2303 " coords across all bursts " + "(expected " +
2304 std::to_string(numCoords) + ")");
2305
2306 argPort.disconnect();
2307 resultRaw.disconnect();
2308
2309 bool passed = true;
2310 std::cout << "Auto serial coord translate test results:" << std::endl;
2311 for (size_t i = 0; i < inputCoords.size(); ++i) {
2312 uint32_t expX = inputCoords[i].x + xTrans;
2313 uint32_t expY = inputCoords[i].y + yTrans;
2314 std::cout << " coord[" << i << "]=(" << inputCoords[i].x << ","
2315 << inputCoords[i].y << ") + (" << xTrans << "," << yTrans
2316 << ") = (" << results[i].x << "," << results[i].y
2317 << ") (expected (" << expX << "," << expY << "))";
2318 if (results[i].x != expX || results[i].y != expY) {
2319 std::cout << " MISMATCH!";
2320 passed = false;
2321 }
2322 std::cout << std::endl;
2323 }
2324
2325 if (!passed)
2326 throw std::runtime_error("Auto serial coord translate test failed");
2327
2328 logger.info("esitester", "Auto serial coord translate test passed");
2329 std::cout << "Auto serial coord translate test passed" << std::endl;
2330}
2331
2333 uint32_t iterations) {
2334 Logger &logger = conn->getLogger();
2335
2336 auto channelChild = accel->getChildren().find(AppID("channel_test"));
2337 if (channelChild == accel->getChildren().end())
2338 throw std::runtime_error("Channel test: no 'channel_test' child");
2339 auto &ports = channelChild->second->getPorts();
2340
2341 // --- Get the MMIO port to trigger the producer ---
2342 auto cmdIter = ports.find(AppID("cmd"));
2343 if (cmdIter == ports.end())
2344 throw std::runtime_error("Channel test: no 'cmd' port");
2345 auto *cmdMMIO = cmdIter->second.getAs<services::MMIO::MMIORegion>();
2346 if (!cmdMMIO)
2347 throw std::runtime_error("Channel test: 'cmd' is not MMIO");
2348
2349 // --- Get the producer to_host port ---
2350 auto producerIter = ports.find(AppID("producer"));
2351 if (producerIter == ports.end())
2352 throw std::runtime_error("Channel test: no 'producer' port");
2353 auto *producerPort =
2354 producerIter->second.getAs<services::ChannelService::ToHost>();
2355 if (!producerPort)
2356 throw std::runtime_error(
2357 "Channel test: 'producer' is not a ChannelService::ToHost");
2358 producerPort->connect();
2359
2360 // --- Test to_host: MMIO-triggered incrementing values ---
2361 // Write the number of values to send at offset 0x0.
2362 cmdMMIO->write(0x0, iterations);
2363
2364 for (uint32_t i = 0; i < iterations; ++i) {
2365 MessageData recvData = producerPort->read().get();
2366 uint32_t got = *recvData.as<uint32_t>();
2367 std::cout << "[channel] producer i=" << i << " got=" << got << std::endl;
2368 if (got != i)
2369 throw std::runtime_error("Channel producer: expected " +
2370 std::to_string(i) + ", got " +
2371 std::to_string(got));
2372 }
2373 logger.info("esitester", "Channel test: producer passed (" +
2374 std::to_string(iterations) +
2375 " incrementing values)");
2376
2377 // --- Test from_host -> to_host loopback ---
2378 auto loopbackInIter = ports.find(AppID("loopback_in"));
2379 if (loopbackInIter == ports.end())
2380 throw std::runtime_error("Channel test: no 'loopback_in' port");
2381 auto *fromHostPort =
2382 loopbackInIter->second.getAs<services::ChannelService::FromHost>();
2383 if (!fromHostPort)
2384 throw std::runtime_error(
2385 "Channel test: 'loopback_in' is not a ChannelService::FromHost");
2386 fromHostPort->connect();
2387
2388 auto loopbackOutIter = ports.find(AppID("loopback_out"));
2389 if (loopbackOutIter == ports.end())
2390 throw std::runtime_error("Channel test: no 'loopback_out' port");
2391 auto *loopbackOutPort =
2392 loopbackOutIter->second.getAs<services::ChannelService::ToHost>();
2393 if (!loopbackOutPort)
2394 throw std::runtime_error(
2395 "Channel test: 'loopback_out' is not a ChannelService::ToHost");
2396 loopbackOutPort->connect();
2397
2398 std::mt19937_64 rng(0xDEADBEEF);
2399 std::uniform_int_distribution<uint32_t> dist(0, UINT32_MAX);
2400
2401 for (uint32_t i = 0; i < iterations; ++i) {
2402 uint32_t sendVal = dist(rng);
2403 fromHostPort->write(MessageData::from(sendVal));
2404 MessageData recvData = loopbackOutPort->read().get();
2405 uint32_t recvVal = *recvData.as<uint32_t>();
2406 std::cout << "[channel] loopback i=" << i << " sent=0x"
2407 << esi::toHex(sendVal) << " recv=0x" << esi::toHex(recvVal)
2408 << std::endl;
2409 if (recvVal != sendVal)
2410 throw std::runtime_error("Channel loopback mismatch at i=" +
2411 std::to_string(i));
2412 }
2413
2414 logger.info("esitester", "Channel test: loopback passed (" +
2415 std::to_string(iterations) + " iterations)");
2416 std::cout << "Channel test passed" << std::endl;
2417}
static void print(TypedAttr val, llvm::raw_ostream &os)
DecodedOutputs decode(std::unique_ptr< SegmentedMessageData > &msg) override
Decode one raw message into zero or more typed outputs.
Abstract class representing a connection to an accelerator.
Definition Accelerator.h:96
Top level accelerator class.
Definition Accelerator.h:77
Services provide connections to 'bundles' – collections of named, unidirectional communication channe...
Definition Ports.h:611
T * getAs() const
Cast this Bundle port to a subclass which is actually useful.
Definition Ports.h:639
ReadChannelPort & getRawRead(const std::string &name) const
Definition Ports.cpp:52
WriteChannelPort & getRawWrite(const std::string &name) const
Get access to the raw byte streams of a channel.
Definition Ports.cpp:42
Common options and code for ESI runtime tools.
Definition CLI.h:29
Context & getContext()
Get the context.
Definition CLI.h:69
AcceleratorConnection * connect()
Connect to the accelerator using the specified backend and connection.
Definition CLI.h:66
int esiParse(int argc, const char **argv)
Run the parser.
Definition CLI.h:52
AcceleratorConnections, Accelerators, and Manifests must all share a context.
Definition Context.h:34
Logger & getLogger()
Definition Context.h:69
const std::map< AppID, Instance * > & getChildren() const
Access the module's children by ID.
Definition Design.h:71
virtual void error(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an error.
Definition Logging.h:64
virtual void info(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an informational message.
Definition Logging.h:75
void debug(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report a debug message.
Definition Logging.h:83
Class to parse a manifest.
Definition Manifest.h:39
Accelerator * buildAccelerator(AcceleratorConnection &acc) const
A concrete flat message backed by a single vector of bytes.
Definition Common.h:155
const uint8_t * getBytes() const
Definition Common.h:166
const T * as() const
Cast to a type.
Definition Common.h:190
size_t getSize() const
Get the size of the data in bytes.
Definition Common.h:180
static MessageData from(T &t)
Cast from a type to its raw bytes.
Definition Common.h:200
Helper base class for stateful deserializers which may emit zero, one, or many typed outputs for each...
Definition TypedPorts.h:246
detail::TypedReadOwnedCallback< SerialCoordOutputBatch > OutputCallback
Definition TypedPorts.h:248
std::vector< std::unique_ptr< SerialCoordOutputBatch > > DecodedOutputs
Definition TypedPorts.h:249
A ChannelPort which reads data from the accelerator.
Definition Ports.h:453
virtual void connect(ReadCallback callback, const ConnectOptions &options={})
Definition Ports.cpp:140
virtual void disconnect() override
Disconnect the channel.
Definition Ports.cpp:70
virtual void read(MessageData &outData)
Specify a buffer to read into.
Definition Ports.h:517
Abstract multi-segment message.
Definition Common.h:133
void connect(const ChannelPort::ConnectOptions &opts={std::nullopt, false})
Definition TypedPorts.h:626
void write(const T &data)
Definition TypedPorts.h:636
A ChannelPort which sends data to the accelerator.
Definition Ports.h:308
virtual void disconnect() override
Definition Ports.h:322
void write(const MessageData &data)
A very basic blocking write API.
Definition Ports.h:327
bool tryWrite(const MessageData &data)
A basic non-blocking write API.
Definition Ports.h:357
virtual void connect(const ConnectOptions &options={}) override
Set up a connection to the accelerator.
Definition Ports.h:312
A function call which gets attached to a service port.
Definition Services.h:405
A port which writes data to the accelerator (from_host).
Definition Services.h:315
A port which reads data from the accelerator (to_host).
Definition Services.h:291
A function call which gets attached to a service port.
Definition Services.h:353
virtual void start()
In cases where necessary, enable host memory services.
Definition Services.h:261
A "slice" of some parent MMIO space.
Definition Services.h:181
Information about the Accelerator system.
Definition Services.h:113
A telemetry port which gets attached to a service port.
Definition Services.h:470
void connect()
Connect to a particular telemetry port. Offset should be non-nullopt.
Definition Services.cpp:459
int main()
static void * alignedAllocCompat(std::size_t alignment, std::size_t size)
static void hostmemWriteTest(Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width)
Test the hostmem write functionality.
static void aggregateHostmemBandwidthTest(AcceleratorConnection *, Accelerator *, uint32_t width, uint32_t xferCount, bool read, bool write)
static void dmaTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool read, bool write)
static void hostmemBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, uint32_t xferCount, const std::vector< uint32_t > &widths, bool read, bool write)
static void callbackTest(AcceleratorConnection *, Accelerator *, uint32_t iterations)
static void bandwidthTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, uint32_t xferCount, bool read, bool write)
static void serialCoordTranslateTest(AcceleratorConnection *, Accelerator *, uint32_t xTrans, uint32_t yTrans, uint32_t numCoords, size_t batchSizeLimit)
constexpr std::array< uint32_t, 5 > defaultWidths
Definition esitester.cpp:84
static void hostmemReadBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width, uint32_t xferCount)
static void bandwidthReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static void channelTest(AcceleratorConnection *, Accelerator *, uint32_t iterations)
static std::string formatBandwidth(double bytesPerSec)
Definition esitester.cpp:96
static void autoSerialCoordTranslateTest(AcceleratorConnection *, Accelerator *, uint32_t xTrans, uint32_t yTrans, uint32_t numCoords)
static void hostmemWriteBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width, uint32_t xferCount)
static void alignedFreeCompat(void *ptr)
static void dmaWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void bandwidthWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static std::string humanBytes(uint64_t bytes)
static void streamingAddTest(AcceleratorConnection *, Accelerator *, uint32_t addAmt, uint32_t numItems)
Test the StreamingAdder module.
static void loopbackAddTest(AcceleratorConnection *, Accelerator *, uint32_t iterations, bool pipeline)
static void dmaReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void streamingAddTranslatedTest(AcceleratorConnection *, Accelerator *, uint32_t addAmt, uint32_t numItems)
static void hostmemTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool write, bool read)
static std::string humanTimeUS(uint64_t us)
static void coordTranslateTest(AcceleratorConnection *, Accelerator *, uint32_t xTrans, uint32_t yTrans, uint32_t numCoords)
static void resetTest(AcceleratorConnection *, Accelerator *)
static std::string defaultWidthsStr()
Definition esitester.cpp:85
static void hostmemReadTest(Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width)
Definition debug.py:1
Definition esi.py:1
std::string toString(const std::any &a)
'Stringify' a std::any. This is used to log std::any values by some loggers.
Definition Logging.cpp:132
std::string toHex(void *val)
Definition Common.cpp:37
Translated argument struct for CoordTranslator.
std::span< const Coord > coordsSpan() const
const Coord * coords() const
static size_t allocSize(size_t numCoords)
Coord * coords()
Get pointer to trailing coords array.
std::span< Coord > coordsSpan()
Get span view of coords (requires coordsLength to be set first).
Translated result struct for CoordTranslator.
static size_t allocSize(size_t numCoords)
std::span< Coord > coordsSpan()
Get span view of coords (requires coordsLength to be set first).
const Coord * coords() const
Coord * coords()
Get pointer to trailing coords array.
std::span< const Coord > coordsSpan() const
Test the CoordTranslator module using message translation.
uint32_t x
uint32_t y
void yTranslation(uint32_t yTrans)
void appendCoord(uint32_t x, uint32_t y)
std::vector< SerialCoordData > coords
void xTranslation(uint32_t xTrans)
SerialCoordHeader header
Segment segment(size_t idx) const override
Get a segment by index.
size_t numSegments() const override
Number of segments in the message.
SerialCoordData(uint32_t x, uint32_t y)
size_t numSegments() const override
Number of segments in the message.
void appendCoord(uint32_t x, uint32_t y)
uint32_t yTranslation() const
SerialCoordHeader header
void yTranslation(uint32_t yTrans)
SerialCoordHeader footer
uint32_t xTranslation() const
Segment segment(size_t idx) const override
Get a segment by index.
const std::vector< SerialCoordData > & getCoords() const
void xTranslation(uint32_t xTrans)
std::vector< SerialCoordData > coords
Deserialized result batch from the serial coord translator.
std::vector< Coord > coords
Packed struct representing a parallel window argument for StreamingAdder.
Packed struct representing a parallel window result for StreamingAdder.
Test the StreamingAdder module using message translation.
uint32_t * inputData()
Get pointer to trailing input data array.
static size_t allocSize(size_t numItems)
std::span< uint32_t > inputDataSpan()
Get span view of input data (requires inputLength to be set first).
std::span< const uint32_t > inputDataSpan() const
const uint32_t * inputData() const
Translated result struct for StreamingAdder.
uint32_t * data()
Get pointer to trailing result data array.
std::span< uint32_t > dataSpan()
Get span view of result data (requires dataLength to be set first).
static size_t allocSize(size_t numItems)
std::span< const uint32_t > dataSpan() const
const uint32_t * data() const
A contiguous, non-owning view of bytes within a SegmentedMessageData.
Definition Common.h:118
size_t size
Definition Common.h:120
RAII memory region for host memory.
Definition Services.h:237
virtual void * getDevicePtr() const
Sometimes the pointer the device sees is different from the pointer the host sees.
Definition Services.h:243
virtual void * getPtr() const =0
Get a pointer to the host memory.
virtual void flush()
Flush the memory region to ensure that the device sees the latest contents.
Definition Services.h:251
virtual std::size_t getSize() const =0
SerialCoordOutputData data
SerialCoordOutputHeader header