47 const std::vector<uint32_t> &widths,
bool write,
51 const std::vector<uint32_t> &widths,
bool read,
54 const std::vector<uint32_t> &widths,
bool read,
bool write);
56 const std::vector<uint32_t> &widths,
57 uint32_t xferCount,
bool read,
bool write);
62 uint32_t xferCount,
bool read,
65 uint32_t addAmt, uint32_t numItems);
67 uint32_t addAmt, uint32_t numItems);
69 uint32_t xTrans, uint32_t yTrans,
72 uint32_t xTrans, uint32_t yTrans,
73 uint32_t numCoords,
size_t batchSizeLimit);
78constexpr std::array<uint32_t, 5>
defaultWidths = {32, 64, 128, 256, 512};
91 const char *unit =
"B/s";
92 double value = bytesPerSec;
93 if (bytesPerSec >= 1e9) {
95 value = bytesPerSec / 1e9;
96 }
else if (bytesPerSec >= 1e6) {
98 value = bytesPerSec / 1e6;
99 }
else if (bytesPerSec >= 1e3) {
101 value = bytesPerSec / 1e3;
103 std::ostringstream oss;
104 oss.setf(std::ios::fixed);
106 oss << value <<
" " << unit;
112 const char *units[] = {
"B",
"KB",
"MB",
"GB",
"TB"};
113 double v = (double)bytes;
115 while (v >= 1024.0 && u < 4) {
119 std::ostringstream oss;
120 oss.setf(std::ios::fixed);
121 oss.precision(u == 0 ? 0 : 2);
122 oss << v <<
" " << units[u];
129 return std::to_string(us) +
" us";
130 double ms = us / 1000.0;
132 std::ostringstream oss;
133 oss.setf(std::ios::fixed);
134 oss.precision(ms < 10.0 ? 2 : (ms < 100.0 ? 1 : 0));
138 double sec = ms / 1000.0;
139 std::ostringstream oss;
140 oss.setf(std::ios::fixed);
141 oss.precision(sec < 10.0 ? 3 : 2);
150 void *ptr = _aligned_malloc(size, alignment);
152 throw std::bad_alloc();
155 void *ptr = std::aligned_alloc(alignment, size);
157 throw std::bad_alloc();
170int main(
int argc,
const char *argv[]) {
172 cli.description(
"Test an ESI system running the ESI tester image.");
173 cli.require_subcommand(1);
175 CLI::App *callback_test =
176 cli.add_subcommand(
"callback",
"initiate callback test");
177 uint32_t cb_iters = 1;
178 callback_test->add_option(
"-i,--iters", cb_iters,
179 "Number of iterations to run");
181 CLI::App *hostmemtestSub =
182 cli.add_subcommand(
"hostmem",
"Run the host memory test");
184 bool hmWrite =
false;
187 hostmemtestSub->add_flag(
"-w,--write", hmWrite,
188 "Enable host memory write test");
189 hostmemtestSub->add_flag(
"-r,--read", hmRead,
"Enable host memory read test");
190 hostmemtestSub->add_option(
191 "--widths", hostmemWidths,
194 CLI::App *dmatestSub = cli.add_subcommand(
"dma",
"Run the DMA test");
195 bool dmaRead =
false;
196 bool dmaWrite =
false;
198 dmatestSub->add_flag(
"-w,--write", dmaWrite,
"Enable dma write test");
199 dmatestSub->add_flag(
"-r,--read", dmaRead,
"Enable dma read test");
200 dmatestSub->add_option(
"--widths", dmaWidths,
204 CLI::App *bandwidthSub =
205 cli.add_subcommand(
"bandwidth",
"Run the bandwidth test");
206 uint32_t xferCount = 1000;
207 bandwidthSub->add_option(
"-c,--count", xferCount,
208 "Number of transfers to perform");
209 bool bandwidthRead =
false;
210 bool bandwidthWrite =
false;
213 bandwidthSub->add_option(
"--widths", bandwidthWidths,
214 "Width of the transfers to perform (default: " +
216 bandwidthSub->add_flag(
"-w,--write", bandwidthWrite,
217 "Enable bandwidth write");
218 bandwidthSub->add_flag(
"-r,--read", bandwidthRead,
"Enable bandwidth read");
220 CLI::App *hostmembwSub =
221 cli.add_subcommand(
"hostmembw",
"Run the host memory bandwidth test");
222 uint32_t hmBwCount = 1000;
223 bool hmBwRead =
false;
224 bool hmBwWrite =
false;
226 hostmembwSub->add_option(
"-c,--count", hmBwCount,
227 "Number of hostmem transfers");
228 hostmembwSub->add_option(
229 "--widths", hmBwWidths,
231 hostmembwSub->add_flag(
"-w,--write", hmBwWrite,
232 "Measure hostmem write bandwidth");
233 hostmembwSub->add_flag(
"-r,--read", hmBwRead,
234 "Measure hostmem read bandwidth");
236 CLI::App *loopbackSub =
237 cli.add_subcommand(
"loopback",
"Test LoopbackInOutAdd function service");
238 uint32_t loopbackIters = 10;
239 bool loopbackPipeline =
false;
240 loopbackSub->add_option(
"-i,--iters", loopbackIters,
241 "Number of function invocations (default 10)");
242 loopbackSub->add_flag(
"-p,--pipeline", loopbackPipeline,
243 "Pipeline all calls then collect results");
245 CLI::App *aggBwSub = cli.add_subcommand(
247 "Aggregate hostmem bandwidth across four units (readmem*, writemem*)");
248 uint32_t aggWidth = 512;
249 uint32_t aggCount = 1000;
250 bool aggRead =
false;
251 bool aggWrite =
false;
252 aggBwSub->add_option(
254 "Bit width (default 512; other widths ignored if absent)");
255 aggBwSub->add_option(
"-c,--count", aggCount,
"Flits per unit (default 1000)");
256 aggBwSub->add_flag(
"-r,--read", aggRead,
"Include read units");
257 aggBwSub->add_flag(
"-w,--write", aggWrite,
"Include write units");
259 CLI::App *streamingAddSub = cli.add_subcommand(
260 "streaming_add",
"Test StreamingAdder function service with list input");
261 uint32_t streamingAddAmt = 5;
262 uint32_t streamingNumItems = 5;
263 bool streamingTranslate =
false;
264 streamingAddSub->add_option(
"-a,--add", streamingAddAmt,
265 "Amount to add to each element (default 5)");
266 streamingAddSub->add_option(
"-n,--num-items", streamingNumItems,
267 "Number of random items in the list (default 5)");
268 streamingAddSub->add_flag(
"-t,--translate", streamingTranslate,
269 "Use message translation (list translation)");
271 CLI::App *coordTranslateSub = cli.add_subcommand(
273 "Test CoordTranslator function service with list of coordinates");
274 uint32_t coordXTrans = 10;
275 uint32_t coordYTrans = 20;
276 uint32_t coordNumItems = 5;
277 coordTranslateSub->add_option(
"-x,--x-translation", coordXTrans,
278 "X translation amount (default 10)");
279 coordTranslateSub->add_option(
"-y,--y-translation", coordYTrans,
280 "Y translation amount (default 20)");
281 coordTranslateSub->add_option(
"-n,--num-coords", coordNumItems,
282 "Number of random coordinates (default 5)");
284 CLI::App *serialCoordTranslateSub = cli.add_subcommand(
286 "Test SerialCoordTranslator function service with list of coordinates");
287 uint32_t serialBatchSize = 240;
288 serialCoordTranslateSub->add_option(
"-x,--x-translation", coordXTrans,
289 "X translation amount (default 10)");
290 serialCoordTranslateSub->add_option(
"-y,--y-translation", coordYTrans,
291 "Y translation amount (default 20)");
292 serialCoordTranslateSub->add_option(
293 "-n,--num-coords", coordNumItems,
294 "Number of random coordinates (default 5)");
295 serialCoordTranslateSub
296 ->add_option(
"-b,--batch-size", serialBatchSize,
297 "Coordinates per header (default 240, max 65535)")
298 ->check(CLI::Range(1u, 0xFFFFu));
300 CLI::App *channelTestSub = cli.add_subcommand(
301 "channel",
"Test ChannelService to_host and from_host");
302 uint32_t channelIters = 10;
303 channelTestSub->add_option(
"-i,--iters", channelIters,
304 "Number of loopback iterations (default 10)");
306 if (
int rc = cli.
esiParse(argc, argv))
308 if (!cli.get_help_ptr()->empty())
315 ctxt.
getLogger().
info(
"esitester",
"Connected to accelerator.");
316 Manifest manifest(ctxt, info.getJsonManifest());
319 acc->getServiceThread()->addPoll(*accel);
321 if (*callback_test) {
323 }
else if (*hostmemtestSub) {
324 hostmemTest(acc, accel, hostmemWidths, hmWrite, hmRead);
325 }
else if (*loopbackSub) {
327 }
else if (*dmatestSub) {
328 dmaTest(acc, accel, dmaWidths, dmaRead, dmaWrite);
329 }
else if (*bandwidthSub) {
330 bandwidthTest(acc, accel, bandwidthWidths, xferCount, bandwidthRead,
332 }
else if (*hostmembwSub) {
335 }
else if (*aggBwSub) {
338 }
else if (*streamingAddSub) {
339 if (streamingTranslate)
344 }
else if (*coordTranslateSub) {
346 }
else if (*serialCoordTranslateSub) {
348 coordNumItems, serialBatchSize);
349 }
else if (*channelTestSub) {
354 }
catch (std::exception &e) {
359 std::cout <<
"Exiting successfully\n";
364 uint32_t iterations) {
367 throw std::runtime_error(
"No cb_test child found in accelerator");
368 auto &ports = cb_test->second->getPorts();
369 auto cmd_port = ports.find(
AppID(
"cmd"));
370 if (cmd_port == ports.end())
371 throw std::runtime_error(
"No cmd port found in cb_test child");
374 throw std::runtime_error(
"cb_test cmd port is not MMIO");
376 auto f = ports.find(
AppID(
"cb"));
377 if (f == ports.end())
378 throw std::runtime_error(
"No cb port found in accelerator");
382 throw std::runtime_error(
"cb port is not a CallService::Callback");
384 std::atomic<uint32_t> callbackCount = 0;
387 callbackCount.fetch_add(1);
388 conn->getLogger().
debug(
389 [&](std::string &subsystem, std::string &msg,
390 std::unique_ptr<std::map<std::string, std::any>> &details) {
391 subsystem =
"ESITESTER";
392 msg =
"Received callback";
393 details = std::make_unique<std::map<std::string, std::any>>();
394 details->emplace(
"data", data);
396 std::cout <<
"callback: " << *data.as<uint64_t>() << std::endl;
401 for (uint32_t i = 0; i < iterations; ++i) {
402 conn->getLogger().info(
"esitester",
"Issuing callback command iteration " +
403 std::to_string(i) +
"/" +
404 std::to_string(iterations));
405 cmdMMIO->write(0x10, i);
407 for (uint32_t wait = 0; wait < 1000; ++wait) {
408 if (callbackCount.load() > i)
410 std::this_thread::sleep_for(std::chrono::milliseconds(1));
412 if (callbackCount.load() <= i)
413 throw std::runtime_error(
"Callback test failed. No callback received");
421 std::cout <<
"Running hostmem WRITE test with width " << width << std::endl;
422 uint64_t *dataPtr =
static_cast<uint64_t *
>(region.
getPtr());
423 auto check = [&](
bool print) {
425 for (
size_t i = 0; i < 9; ++i) {
427 printf(
"[write] dataPtr[%zu] = 0x%016lx\n", i, dataPtr[i]);
428 if (i < (width + 63) / 64 && dataPtr[i] == 0xFFFFFFFFFFFFFFFFull)
434 auto writeMemChildIter = acc->getChildren().find(
AppID(
"writemem", width));
435 if (writeMemChildIter == acc->getChildren().end())
436 throw std::runtime_error(
437 "hostmem write test failed. No writemem child found");
438 auto &writeMemPorts = writeMemChildIter->second->getPorts();
440 auto cmdPortIter = writeMemPorts.find(
AppID(
"cmd", width));
441 if (cmdPortIter == writeMemPorts.end())
442 throw std::runtime_error(
443 "hostmem write test failed. No (cmd,width) MMIO port");
446 throw std::runtime_error(
447 "hostmem write test failed. (cmd,width) port not MMIO");
449 auto issuedPortIter = writeMemPorts.find(
AppID(
"addrCmdIssued"));
450 if (issuedPortIter == writeMemPorts.end())
451 throw std::runtime_error(
452 "hostmem write test failed. addrCmdIssued missing");
453 auto *addrCmdIssuedPort =
455 if (!addrCmdIssuedPort)
456 throw std::runtime_error(
457 "hostmem write test failed. addrCmdIssued not telemetry");
458 addrCmdIssuedPort->connect();
460 auto responsesPortIter = writeMemPorts.find(
AppID(
"addrCmdResponses"));
461 if (responsesPortIter == writeMemPorts.end())
462 throw std::runtime_error(
463 "hostmem write test failed. addrCmdResponses missing");
464 auto *addrCmdResponsesPort =
466 if (!addrCmdResponsesPort)
467 throw std::runtime_error(
468 "hostmem write test failed. addrCmdResponses not telemetry");
469 addrCmdResponsesPort->connect();
471 for (
size_t i = 0, e = 9; i < e; ++i)
472 dataPtr[i] = 0xFFFFFFFFFFFFFFFFull;
474 cmdMMIO->write(0x10,
reinterpret_cast<uint64_t
>(region.
getDevicePtr()));
475 cmdMMIO->write(0x18, 1);
476 cmdMMIO->write(0x20, 1);
478 for (
int i = 0; i < 100; ++i) {
479 auto issued = addrCmdIssuedPort->readInt();
480 auto responses = addrCmdResponsesPort->readInt();
481 if (issued == 1 && responses == 1) {
485 std::this_thread::sleep_for(std::chrono::microseconds(100));
489 throw std::runtime_error(
"hostmem write test (" + std::to_string(width) +
490 " bits) timeout waiting for completion");
493 throw std::runtime_error(
"hostmem write test failed (" +
494 std::to_string(width) +
" bits)");
500 std::cout <<
"Running hostmem READ test with width " << width << std::endl;
501 auto readMemChildIter = acc->getChildren().find(
AppID(
"readmem", width));
502 if (readMemChildIter == acc->getChildren().end())
503 throw std::runtime_error(
504 "hostmem read test failed. No readmem child found");
506 auto &readMemPorts = readMemChildIter->second->getPorts();
507 auto addrCmdPortIter = readMemPorts.find(
AppID(
"cmd", width));
508 if (addrCmdPortIter == readMemPorts.end())
509 throw std::runtime_error(
510 "hostmem read test failed. No AddressCommand MMIO port");
514 throw std::runtime_error(
515 "hostmem read test failed. AddressCommand port not MMIO");
517 auto lastReadPortIter = readMemPorts.find(
AppID(
"lastReadLSB"));
518 if (lastReadPortIter == readMemPorts.end())
519 throw std::runtime_error(
"hostmem read test failed. lastReadLSB missing");
523 throw std::runtime_error(
524 "hostmem read test failed. lastReadLSB not telemetry");
525 lastReadPort->connect();
527 auto issuedPortIter = readMemPorts.find(
AppID(
"addrCmdIssued"));
528 if (issuedPortIter == readMemPorts.end())
529 throw std::runtime_error(
"hostmem read test failed. addrCmdIssued missing");
530 auto *addrCmdIssuedPort =
532 if (!addrCmdIssuedPort)
533 throw std::runtime_error(
534 "hostmem read test failed. addrCmdIssued not telemetry");
535 addrCmdIssuedPort->connect();
537 auto responsesPortIter = readMemPorts.find(
AppID(
"addrCmdResponses"));
538 if (responsesPortIter == readMemPorts.end())
539 throw std::runtime_error(
540 "hostmem read test failed. addrCmdResponses missing");
541 auto *addrCmdResponsesPort =
543 if (!addrCmdResponsesPort)
544 throw std::runtime_error(
545 "hostmem read test failed. addrCmdResponses not telemetry");
546 addrCmdResponsesPort->connect();
548 for (
size_t i = 0; i < 8; ++i) {
549 auto *dataPtr =
static_cast<uint64_t *
>(region.
getPtr());
550 dataPtr[0] = 0x12345678ull << i;
551 dataPtr[1] = 0xDEADBEEFull << i;
553 addrCmdMMIO->write(0x10,
reinterpret_cast<uint64_t
>(region.
getDevicePtr()));
554 addrCmdMMIO->write(0x18, 1);
555 addrCmdMMIO->write(0x20, 1);
557 for (
int waitLoop = 0; waitLoop < 100; ++waitLoop) {
558 auto issued = addrCmdIssuedPort->readInt();
559 auto responses = addrCmdResponsesPort->readInt();
560 if (issued == 1 && responses == 1) {
564 std::this_thread::sleep_for(std::chrono::milliseconds(10));
567 throw std::runtime_error(
"hostmem read (" + std::to_string(width) +
568 " bits) timeout waiting for completion");
569 uint64_t captured = lastReadPort->readInt();
570 uint64_t expected = dataPtr[0];
572 expected &= ((1ull << width) - 1);
573 if (captured != expected)
574 throw std::runtime_error(
"hostmem read test (" + std::to_string(width) +
575 " bits) failed. Expected " +
582 const std::vector<uint32_t> &widths,
bool write,
587 auto scratchRegion = hostmem->allocate(1024 * 1024,
588 {.writeable =
true});
589 uint64_t *dataPtr =
static_cast<uint64_t *
>(scratchRegion->getPtr());
590 conn->getLogger().info(
"esitester",
591 "Running host memory test with region size " +
592 std::to_string(scratchRegion->getSize()) +
593 " bytes at 0x" +
toHex(dataPtr));
594 for (
size_t i = 0; i < scratchRegion->getSize() / 8; ++i)
596 scratchRegion->flush();
599 for (
size_t width : widths) {
605 }
catch (std::exception &e) {
606 conn->getLogger().error(
"esitester",
"Hostmem test failed for width " +
607 std::to_string(width) +
": " +
613 throw std::runtime_error(
"Hostmem test failed");
614 std::cout <<
"Hostmem test passed" << std::endl;
619 Logger &logger = conn->getLogger();
620 logger.
info(
"esitester",
621 "== Running DMA read test with width " + std::to_string(width));
624 acc->resolvePort({
AppID(
"tohostdma", width),
AppID(
"cmd")}, lastPath);
626 throw std::runtime_error(
"dma read test failed. No tohostdma[" +
627 std::to_string(width) +
"] found");
630 throw std::runtime_error(
"dma read test failed. MMIO port is not MMIO");
633 acc->resolvePort({
AppID(
"tohostdma", width),
AppID(
"out")}, lastPath);
637 size_t xferCount = 24;
640 toHostMMIO->write(0, xferCount);
641 for (
size_t i = 0; i < xferCount; ++i) {
644 uint64_t val = *data.as<uint64_t>();
646 throw std::runtime_error(
"dma read test failed. Out of order data");
649 logger.
debug(
"esitester",
650 "Cycle count [" + std::to_string(i) +
"] = 0x" + data.toHex());
653 std::cout <<
" DMA read test for " << width <<
" bits passed" << std::endl;
658 Logger &logger = conn->getLogger();
659 logger.
info(
"esitester",
660 "Running DMA write test with width " + std::to_string(width));
663 acc->resolvePort({
AppID(
"fromhostdma", width),
AppID(
"cmd")}, lastPath);
664 if (!fromHostMMIOPort)
665 throw std::runtime_error(
"dma read test for " +
toString(width) +
666 " bits failed. No fromhostdma[" +
667 std::to_string(width) +
"] found");
670 throw std::runtime_error(
"dma write test for " +
toString(width) +
671 " bits failed. MMIO port is not MMIO");
674 acc->resolvePort({
AppID(
"fromhostdma", width),
AppID(
"in")}, lastPath);
676 throw std::runtime_error(
"dma write test for " +
toString(width) +
677 " bits failed. No out port found");
681 size_t xferCount = 24;
682 uint8_t *data =
new uint8_t[width];
683 for (
size_t i = 0; i < width / 8; ++i)
685 fromHostMMIO->read(8);
686 fromHostMMIO->write(0, xferCount);
687 for (
size_t i = 1; i < xferCount + 1; ++i) {
694 std::this_thread::sleep_for(std::chrono::milliseconds(10));
696 }
while (!successWrite && ++attempts < 100);
698 throw std::runtime_error(
"dma write test for " +
toString(width) +
699 " bits failed. Write failed");
700 uint64_t lastReadMMIO;
701 for (
size_t a = 0; a < 20; ++a) {
702 lastReadMMIO = fromHostMMIO->read(8);
703 if (lastReadMMIO == i)
705 std::this_thread::sleep_for(std::chrono::milliseconds(10));
707 throw std::runtime_error(
"dma write for " +
toString(width) +
708 " bits test failed. Read from MMIO failed");
713 std::cout <<
" DMA write test for " << width <<
" bits passed" << std::endl;
717 const std::vector<uint32_t> &widths,
bool read,
721 for (
size_t width : widths)
724 }
catch (std::exception &e) {
726 std::cerr <<
"DMA write test for " << width
727 <<
" bits failed: " << e.what() << std::endl;
730 for (
size_t width : widths)
733 throw std::runtime_error(
"DMA test failed");
734 std::cout <<
"DMA test passed" << std::endl;
742 size_t width,
size_t xferCount) {
746 acc->resolvePort({
AppID(
"tohostdma", width),
AppID(
"cmd")}, lastPath);
748 throw std::runtime_error(
"bandwidth test failed. No tohostdma[" +
749 std::to_string(width) +
"] found");
752 throw std::runtime_error(
"bandwidth test failed. MMIO port is not MMIO");
755 acc->resolvePort({
AppID(
"tohostdma", width),
AppID(
"out")}, lastPath);
759 Logger &logger = conn->getLogger();
760 logger.
info(
"esitester",
"Starting read bandwidth test with " +
761 std::to_string(xferCount) +
" x " +
762 std::to_string(width) +
" bit transfers");
764 auto start = std::chrono::high_resolution_clock::now();
765 toHostMMIO->write(0, xferCount);
766 for (
size_t i = 0; i < xferCount; ++i) {
769 [i, &data](std::string &subsystem, std::string &msg,
770 std::unique_ptr<std::map<std::string, std::any>> &details) {
771 subsystem =
"esitester";
772 msg =
"Cycle count [" + std::to_string(i) +
"] = 0x" + data.toHex();
775 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
776 std::chrono::high_resolution_clock::now() - start);
778 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
779 logger.
info(
"esitester",
780 " Bandwidth test: " + std::to_string(xferCount) +
" x " +
781 std::to_string(width) +
" bit transfers in " +
782 std::to_string(duration.count()) +
" microseconds");
787 size_t width,
size_t xferCount) {
791 acc->resolvePort({
AppID(
"fromhostdma", width),
AppID(
"cmd")}, lastPath);
792 if (!fromHostMMIOPort)
793 throw std::runtime_error(
"bandwidth test failed. No fromhostdma[" +
794 std::to_string(width) +
"] found");
797 throw std::runtime_error(
"bandwidth test failed. MMIO port is not MMIO");
800 acc->resolvePort({
AppID(
"fromhostdma", width),
AppID(
"in")}, lastPath);
804 Logger &logger = conn->getLogger();
805 logger.
info(
"esitester",
"Starting write bandwidth test with " +
806 std::to_string(xferCount) +
" x " +
807 std::to_string(width) +
" bit transfers");
808 std::vector<uint8_t> dataVec(width / 8);
809 for (
size_t i = 0; i < width / 8; ++i)
812 auto start = std::chrono::high_resolution_clock::now();
813 fromHostMMIO->write(0, xferCount);
814 for (
size_t i = 0; i < xferCount; ++i) {
817 [i, &data](std::string &subsystem, std::string &msg,
818 std::unique_ptr<std::map<std::string, std::any>> &details) {
819 subsystem =
"esitester";
820 msg =
"Cycle count [" + std::to_string(i) +
"] = 0x" + data.toHex();
823 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
824 std::chrono::high_resolution_clock::now() - start);
826 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
827 logger.
info(
"esitester",
828 " Bandwidth test: " + std::to_string(xferCount) +
" x " +
829 std::to_string(width) +
" bit transfers in " +
830 std::to_string(duration.count()) +
" microseconds");
835 const std::vector<uint32_t> &widths,
836 uint32_t xferCount,
bool read,
bool write) {
838 for (uint32_t w : widths)
841 for (uint32_t w : widths)
852 uint32_t width, uint32_t xferCount) {
853 Logger &logger = conn->getLogger();
854 logger.
info(
"esitester",
"Starting hostmem WRITE bandwidth test: " +
855 std::to_string(xferCount) +
" x " +
856 std::to_string(width) +
" bits");
858 auto writeMemChildIter = acc->getChildren().find(
AppID(
"writemem", width));
859 if (writeMemChildIter == acc->getChildren().end())
860 throw std::runtime_error(
"hostmem write bandwidth: writemem child missing");
861 auto &writeMemPorts = writeMemChildIter->second->getPorts();
863 auto cmdPortIter = writeMemPorts.find(
AppID(
"cmd", width));
864 if (cmdPortIter == writeMemPorts.end())
865 throw std::runtime_error(
"hostmem write bandwidth: cmd MMIO missing");
868 throw std::runtime_error(
"hostmem write bandwidth: cmd not MMIO");
870 auto issuedIter = writeMemPorts.find(
AppID(
"addrCmdIssued"));
871 auto respIter = writeMemPorts.find(
AppID(
"addrCmdResponses"));
872 auto cycleCount = writeMemPorts.find(
AppID(
"addrCmdCycles"));
873 if (issuedIter == writeMemPorts.end() || respIter == writeMemPorts.end() ||
874 cycleCount == writeMemPorts.end())
875 throw std::runtime_error(
"hostmem write bandwidth: telemetry missing");
881 if (!issuedPort || !respPort || !cyclePort)
882 throw std::runtime_error(
883 "hostmem write bandwidth: telemetry type mismatch");
885 issuedPort->connect();
890 uint64_t *dataPtr =
static_cast<uint64_t *
>(region.
getPtr());
891 size_t words = region.
getSize() / 8;
892 for (
size_t i = 0; i < words; ++i)
893 dataPtr[i] = i + 0xA5A50000;
896 auto start = std::chrono::high_resolution_clock::now();
898 uint64_t devPtr =
reinterpret_cast<uint64_t
>(region.
getDevicePtr());
899 cmdMMIO->write(0x10, devPtr);
900 cmdMMIO->write(0x18, xferCount);
901 cmdMMIO->write(0x20, 1);
904 bool completed =
false;
905 for (
int wait = 0; wait < 100000; ++wait) {
906 uint64_t respNow = respPort->
readInt();
907 if (respNow == xferCount) {
911 std::this_thread::sleep_for(std::chrono::microseconds(50));
914 throw std::runtime_error(
"hostmem write bandwidth timeout");
915 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
916 std::chrono::high_resolution_clock::now() - start);
918 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
919 uint64_t cycles = cyclePort->
readInt();
920 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
921 std::cout <<
"[WRITE] Hostmem bandwidth (" << std::to_string(width)
923 << std::to_string(xferCount) <<
" flits in "
924 << std::to_string(duration.count()) <<
" us, "
925 << std::to_string(cycles) <<
" cycles, " << bytesPerCycle
926 <<
" bytes/cycle" << std::endl;
932 uint32_t width, uint32_t xferCount) {
933 Logger &logger = conn->getLogger();
934 logger.
info(
"esitester",
"Starting hostmem READ bandwidth test: " +
935 std::to_string(xferCount) +
" x " +
936 std::to_string(width) +
" bits");
938 auto readMemChildIter = acc->getChildren().find(
AppID(
"readmem", width));
939 if (readMemChildIter == acc->getChildren().end())
940 throw std::runtime_error(
"hostmem read bandwidth: readmem child missing");
941 auto &readMemPorts = readMemChildIter->second->getPorts();
943 auto cmdPortIter = readMemPorts.find(
AppID(
"cmd", width));
944 if (cmdPortIter == readMemPorts.end())
945 throw std::runtime_error(
"hostmem read bandwidth: cmd MMIO missing");
948 throw std::runtime_error(
"hostmem read bandwidth: cmd not MMIO");
950 auto issuedIter = readMemPorts.find(
AppID(
"addrCmdIssued"));
951 auto respIter = readMemPorts.find(
AppID(
"addrCmdResponses"));
952 auto cyclePort = readMemPorts.find(
AppID(
"addrCmdCycles"));
953 if (issuedIter == readMemPorts.end() || respIter == readMemPorts.end() ||
954 cyclePort == readMemPorts.end())
955 throw std::runtime_error(
"hostmem read bandwidth: telemetry missing");
961 if (!issuedPort || !respPort || !cycleCntPort)
962 throw std::runtime_error(
"hostmem read bandwidth: telemetry type mismatch");
963 issuedPort->connect();
968 uint64_t *dataPtr =
static_cast<uint64_t *
>(region.
getPtr());
969 size_t words64 = region.
getSize() / 8;
970 for (
size_t i = 0; i < words64; ++i)
971 dataPtr[i] = 0xCAFEBABE0000ull + i;
973 uint64_t devPtr =
reinterpret_cast<uint64_t
>(region.
getDevicePtr());
974 auto start = std::chrono::high_resolution_clock::now();
976 cmdMMIO->write(0x10, devPtr);
977 cmdMMIO->write(0x18, xferCount);
978 cmdMMIO->write(0x20, 1);
981 for (
int wait = 0; wait < 100000; ++wait) {
982 uint64_t respNow = respPort->
readInt();
983 if (respNow == xferCount) {
987 std::this_thread::sleep_for(std::chrono::microseconds(50));
990 throw std::runtime_error(
"hostmem read bandwidth timeout");
991 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
992 std::chrono::high_resolution_clock::now() - start);
994 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
995 uint64_t cycles = cycleCntPort->
readInt();
996 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
997 std::cout <<
"[ READ] Hostmem bandwidth (" << width
999 <<
" flits in " << duration.count() <<
" us, " << cycles
1000 <<
" cycles, " << bytesPerCycle <<
" bytes/cycle" << std::endl;
1005 const std::vector<uint32_t> &widths,
bool read,
1008 hostmemSvc->
start();
1009 auto region = hostmemSvc->allocate(1024 * 1024 * 1024,
1010 {.writeable =
true});
1011 for (uint32_t w : widths) {
1020 uint32_t iterations,
bool pipeline) {
1021 Logger &logger = conn->getLogger();
1024 throw std::runtime_error(
"Loopback test: no 'loopback' child");
1025 auto &ports = loopbackChild->second->getPorts();
1026 auto addIter = ports.find(
AppID(
"add"));
1027 if (addIter == ports.end())
1028 throw std::runtime_error(
"Loopback test: no 'add' port");
1033 throw std::runtime_error(
1034 "Loopback test: 'add' port not a FuncService::Function");
1035 funcPort->connect();
1036 if (iterations == 0) {
1037 logger.
info(
"esitester",
"Loopback add test: 0 iterations (skipped)");
1040 std::mt19937_64 rng(0xC0FFEE);
1041 std::uniform_int_distribution<uint32_t> dist(0, (1u << 24) - 1);
1044 auto start = std::chrono::high_resolution_clock::now();
1045 for (uint32_t i = 0; i < iterations; ++i) {
1046 uint32_t argVal = dist(rng);
1047 uint32_t expected = (argVal + 11) & 0xFFFF;
1048 uint8_t argBytes[3] = {
1049 static_cast<uint8_t
>(argVal & 0xFF),
1050 static_cast<uint8_t
>((argVal >> 8) & 0xFF),
1051 static_cast<uint8_t
>((argVal >> 16) & 0xFF),
1054 MessageData resMsg = funcPort->call(argMsg).get();
1055 uint16_t got = *resMsg.
as<uint16_t>();
1056 std::cout <<
"[loopback] i=" << i <<
" arg=0x" <<
esi::toHex(argVal)
1059 if (got != expected)
1060 throw std::runtime_error(
"Loopback mismatch (non-pipelined)");
1062 auto end = std::chrono::high_resolution_clock::now();
1063 auto us = std::chrono::duration_cast<std::chrono::microseconds>(end - start)
1065 double callsPerSec = (double)iterations * 1e6 / (
double)us;
1066 logger.
info(
"esitester",
"Loopback add test passed (non-pipelined, " +
1067 std::to_string(iterations) +
" calls, " +
1068 std::to_string(us) +
" us, " +
1069 std::to_string(callsPerSec) +
" calls/s)");
1072 std::vector<std::future<MessageData>> futures;
1073 futures.reserve(iterations);
1074 std::vector<uint32_t> expectedVals;
1075 expectedVals.reserve(iterations);
1077 auto issueStart = std::chrono::high_resolution_clock::now();
1078 for (uint32_t i = 0; i < iterations; ++i) {
1079 uint32_t argVal = dist(rng);
1080 uint32_t expected = (argVal + 11) & 0xFFFF;
1081 uint8_t argBytes[3] = {
1082 static_cast<uint8_t
>(argVal & 0xFF),
1083 static_cast<uint8_t
>((argVal >> 8) & 0xFF),
1084 static_cast<uint8_t
>((argVal >> 16) & 0xFF),
1086 futures.emplace_back(funcPort->call(
MessageData(argBytes, 3)));
1087 expectedVals.emplace_back(expected);
1089 auto issueEnd = std::chrono::high_resolution_clock::now();
1091 for (uint32_t i = 0; i < iterations; ++i) {
1093 uint16_t got = *resMsg.
as<uint16_t>();
1094 uint16_t exp = (uint16_t)expectedVals[i];
1095 std::cout <<
"[loopback-pipelined] i=" << i <<
" got=0x"
1098 throw std::runtime_error(
"Loopback mismatch (pipelined) idx=" +
1101 auto collectEnd = std::chrono::high_resolution_clock::now();
1103 auto issueUs = std::chrono::duration_cast<std::chrono::microseconds>(
1104 issueEnd - issueStart)
1106 auto totalUs = std::chrono::duration_cast<std::chrono::microseconds>(
1107 collectEnd - issueStart)
1110 double issueRate = (double)iterations * 1e6 / (
double)issueUs;
1111 double completionRate = (double)iterations * 1e6 / (
double)totalUs;
1113 logger.
info(
"esitester",
"Loopback add test passed (pipelined). Issued " +
1114 std::to_string(iterations) +
" in " +
1115 std::to_string(issueUs) +
" us (" +
1116 std::to_string(issueRate) +
1117 " calls/s), total " + std::to_string(totalUs) +
1118 " us (" + std::to_string(completionRate) +
1119 " calls/s effective)");
1125 uint32_t xferCount,
bool read,
1127 Logger &logger = conn->getLogger();
1128 if (!read && !write) {
1129 std::cout <<
"aggbandwidth: nothing to do (enable --read and/or --write)\n";
1134 "Aggregate hostmem bandwidth start width=" + std::to_string(width) +
1135 " count=" + std::to_string(xferCount) +
1136 " read=" + (read ?
"Y" :
"N") +
" write=" + (write ?
"Y" :
"N"));
1139 hostmemSvc->
start();
1143 bool isRead =
false;
1144 bool isWrite =
false;
1145 std::unique_ptr<esi::services::HostMem::HostMemRegion> region;
1149 bool launched =
false;
1152 uint64_t duration_us = 0;
1153 uint64_t cycleCount = 0;
1154 std::chrono::high_resolution_clock::time_point start;
1156 std::vector<Unit> units;
1157 const std::vector<std::string> readPrefixes = {
"readmem",
"readmem_0",
1158 "readmem_1",
"readmem_2"};
1159 const std::vector<std::string> writePrefixes = {
"writemem",
"writemem_0",
1160 "writemem_1",
"writemem_2"};
1162 auto addUnits = [&](
const std::vector<std::string> &pref,
bool doRead,
1164 for (
auto &p : pref) {
1166 auto childIt = acc->getChildren().find(
id);
1167 if (childIt == acc->getChildren().end())
1169 auto &ports = childIt->second->getPorts();
1170 auto cmdIt = ports.find(
AppID(
"cmd", width));
1171 auto respIt = ports.find(
AppID(
"addrCmdResponses"));
1172 auto cycIt = ports.find(
AppID(
"addrCmdCycles"));
1173 if (cmdIt == ports.end() || respIt == ports.end() || cycIt == ports.end())
1178 if (!cmd || !resp || !cyc)
1185 u.isWrite = doWrite;
1186 u.region = hostmemSvc->allocate(1024 * 1024 * 1024, {.writeable =
true});
1188 uint64_t *ptr =
static_cast<uint64_t *
>(u.region->getPtr());
1189 size_t words = u.region->getSize() / 8;
1190 for (
size_t i = 0; i < words; ++i)
1192 (p[0] ==
'w' ? (0xA5A500000000ull + i) : (0xCAFEBABE0000ull + i));
1197 u.bytes = uint64_t(xferCount) * (width / 8);
1198 units.emplace_back(std::move(u));
1202 addUnits(readPrefixes,
true,
false);
1204 addUnits(writePrefixes,
false,
true);
1205 if (units.empty()) {
1206 std::cout <<
"aggbandwidth: no matching units present for width " << width
1211 auto wallStart = std::chrono::high_resolution_clock::now();
1213 for (
auto &u : units) {
1214 uint64_t devPtr =
reinterpret_cast<uint64_t
>(u.region->getDevicePtr());
1215 u.cmd->write(0x10, devPtr);
1216 u.cmd->write(0x18, xferCount);
1217 u.cmd->write(0x20, 1);
1218 u.start = std::chrono::high_resolution_clock::now();
1223 const uint64_t timeoutLoops = 200000;
1226 bool allDone =
true;
1227 for (
auto &u : units) {
1230 if (u.resp->readInt() == xferCount) {
1231 auto end = std::chrono::high_resolution_clock::now();
1233 std::chrono::duration_cast<std::chrono::microseconds>(end - u.start)
1235 u.cycleCount = u.cycles->readInt();
1243 if (++loops >= timeoutLoops)
1244 throw std::runtime_error(
"aggbandwidth: timeout");
1245 std::this_thread::sleep_for(std::chrono::microseconds(50));
1247 auto wallUs = std::chrono::duration_cast<std::chrono::microseconds>(
1248 std::chrono::high_resolution_clock::now() - wallStart)
1251 uint64_t totalBytes = 0;
1252 uint64_t totalReadBytes = 0;
1253 uint64_t totalWriteBytes = 0;
1254 for (
auto &u : units) {
1255 totalBytes += u.bytes;
1257 totalReadBytes += u.bytes;
1259 totalWriteBytes += u.bytes;
1260 double unitBps = (double)u.bytes * 1e6 / (
double)u.duration_us;
1261 std::cout <<
"[agg-unit] " << u.prefix <<
"[" << width <<
"] "
1262 << (u.isRead ?
"READ" : (u.isWrite ?
"WRITE" :
"UNK"))
1263 <<
" bytes=" <<
humanBytes(u.bytes) <<
" (" << u.bytes <<
" B)"
1264 <<
" time=" <<
humanTimeUS(u.duration_us) <<
" (" << u.duration_us
1265 <<
" us) cycles=" << u.cycleCount
1271 totalReadBytes ? (double)totalReadBytes * 1e6 / (
double)wallUs : 0.0;
1272 double aggWriteBps =
1273 totalWriteBytes ? (double)totalWriteBytes * 1e6 / (
double)wallUs : 0.0;
1274 double aggCombinedBps =
1275 totalBytes ? (double)totalBytes * 1e6 / (
double)wallUs : 0.0;
1277 std::cout <<
"[agg-total] units=" << units.size()
1278 <<
" read_bytes=" <<
humanBytes(totalReadBytes) <<
" ("
1279 << totalReadBytes <<
" B)"
1281 <<
" write_bytes=" <<
humanBytes(totalWriteBytes) <<
" ("
1282 << totalWriteBytes <<
" B)"
1284 <<
" combined_bytes=" <<
humanBytes(totalBytes) <<
" ("
1285 << totalBytes <<
" B)"
1287 <<
" wall_time=" <<
humanTimeUS(wallUs) <<
" (" << wallUs <<
" us)"
1289 logger.
info(
"esitester",
"Aggregate hostmem bandwidth test complete");
1295#pragma pack(push, 1)
1303 "StreamingAddArg must be 9 bytes packed");
1308#pragma pack(push, 1)
1315 "StreamingAddResult must be 5 bytes packed");
1321 uint32_t addAmt, uint32_t numItems) {
1322 Logger &logger = conn->getLogger();
1323 logger.
info(
"esitester",
"Starting streaming add test with add_amt=" +
1324 std::to_string(addAmt) +
1325 ", num_items=" + std::to_string(numItems));
1328 std::mt19937 rng(0xDEADBEEF);
1329 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1330 std::vector<uint32_t> inputData;
1331 inputData.reserve(numItems);
1332 for (uint32_t i = 0; i < numItems; ++i)
1333 inputData.push_back(dist(rng));
1336 auto streamingAdderChild =
1338 if (streamingAdderChild == accel->
getChildren().end())
1339 throw std::runtime_error(
1340 "Streaming add test: no 'streaming_adder' child found");
1342 auto &ports = streamingAdderChild->second->getPorts();
1343 auto addIter = ports.find(
AppID(
"streaming_add"));
1344 if (addIter == ports.end())
1345 throw std::runtime_error(
1346 "Streaming add test: no 'streaming_add' port found");
1358 for (
size_t i = 0; i < inputData.size(); ++i) {
1361 arg.
input = inputData[i];
1362 arg.
last = (i == inputData.size() - 1) ? 1 : 0;
1364 MessageData(
reinterpret_cast<const uint8_t *
>(&arg),
sizeof(arg)));
1365 logger.
debug(
"esitester",
"Sent {add_amt=" + std::to_string(arg.
addAmt) +
1366 ", input=" + std::to_string(arg.
input) +
1367 ", last=" + (arg.
last ?
"true" :
"false") +
1372 std::vector<uint32_t> results;
1373 bool lastSeen =
false;
1376 resultPort.
read(resMsg);
1378 throw std::runtime_error(
1379 "Streaming add test: unexpected result message size");
1383 lastSeen = res->
last != 0;
1384 results.push_back(res->data);
1385 logger.
debug(
"esitester",
"Received result=" + std::to_string(res->data) +
1386 " (last=" + (lastSeen ?
"true" :
"false") +
1391 if (results.size() != inputData.size())
1392 throw std::runtime_error(
1393 "Streaming add test: result size mismatch. Expected " +
1394 std::to_string(inputData.size()) +
", got " +
1395 std::to_string(results.size()));
1398 std::cout <<
"Streaming add test results:" << std::endl;
1399 for (
size_t i = 0; i < inputData.size(); ++i) {
1400 uint32_t expected = inputData[i] + addAmt;
1401 std::cout <<
" input[" << i <<
"]=" << inputData[i] <<
" + " << addAmt
1402 <<
" = " << results[i] <<
" (expected " << expected <<
")";
1403 if (results[i] != expected) {
1404 std::cout <<
" MISMATCH!";
1407 std::cout << std::endl;
1414 throw std::runtime_error(
"Streaming add test failed: result mismatch");
1416 logger.
info(
"esitester",
"Streaming add test passed");
1417 std::cout <<
"Streaming add test passed" << std::endl;
1436#pragma pack(push, 1)
1444 uint32_t *
inputData() {
return reinterpret_cast<uint32_t *
>(
this + 1); }
1446 return reinterpret_cast<const uint32_t *
>(
this + 1);
1465#pragma pack(push, 1)
1471 uint32_t *
data() {
return reinterpret_cast<uint32_t *
>(
this + 1); }
1473 return reinterpret_cast<const uint32_t *
>(
this + 1);
1487 uint32_t numItems) {
1488 Logger &logger = conn->getLogger();
1489 logger.
info(
"esitester",
1490 "Starting streaming add test (translated) with add_amt=" +
1491 std::to_string(addAmt) +
1492 ", num_items=" + std::to_string(numItems));
1495 std::mt19937 rng(0xDEADBEEF);
1496 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1497 std::vector<uint32_t> inputData;
1498 inputData.reserve(numItems);
1499 for (uint32_t i = 0; i < numItems; ++i)
1500 inputData.push_back(dist(rng));
1503 auto streamingAdderChild =
1505 if (streamingAdderChild == accel->
getChildren().end())
1506 throw std::runtime_error(
1507 "Streaming add test: no 'streaming_adder' child found");
1509 auto &ports = streamingAdderChild->second->getPorts();
1510 auto addIter = ports.find(
AppID(
"streaming_add"));
1511 if (addIter == ports.end())
1512 throw std::runtime_error(
1513 "Streaming add test: no 'streaming_add' port found");
1528 size_t allocSize = ((argSize + alignment - 1) / alignment) * alignment;
1531 throw std::bad_alloc();
1533 std::unique_ptr<void,
decltype(argDeleter)> argBuffer(argRaw, argDeleter);
1536 arg->addAmt = addAmt;
1537 for (uint32_t i = 0; i < numItems; ++i)
1538 arg->inputData()[i] = inputData[i];
1540 logger.
debug(
"esitester",
1541 "Sending translated argument: " + std::to_string(argSize) +
1542 " bytes, list_length=" + std::to_string(arg->inputLength) +
1543 ", add_amt=" + std::to_string(arg->addAmt));
1546 argPort.
write(
MessageData(
reinterpret_cast<const uint8_t *
>(arg), argSize));
1551 resultPort.
read(resMsg);
1553 logger.
debug(
"esitester",
"Received translated result: " +
1554 std::to_string(resMsg.
getSize()) +
" bytes");
1557 throw std::runtime_error(
1558 "Streaming add test (translated): result too small");
1560 const auto *result =
1565 throw std::runtime_error(
1566 "Streaming add test (translated): result data truncated");
1569 if (result->dataLength != inputData.size())
1570 throw std::runtime_error(
1571 "Streaming add test (translated): result size mismatch. Expected " +
1572 std::to_string(inputData.size()) +
", got " +
1573 std::to_string(result->dataLength));
1576 std::cout <<
"Streaming add test results:" << std::endl;
1577 for (
size_t i = 0; i < inputData.size(); ++i) {
1578 uint32_t expected = inputData[i] + addAmt;
1579 std::cout <<
" input[" << i <<
"]=" << inputData[i] <<
" + " << addAmt
1580 <<
" = " << result->data()[i] <<
" (expected " << expected <<
")";
1581 if (result->data()[i] != expected) {
1582 std::cout <<
" MISMATCH!";
1585 std::cout << std::endl;
1592 throw std::runtime_error(
1593 "Streaming add test (translated) failed: result mismatch");
1595 logger.
info(
"esitester",
"Streaming add test passed (translated)");
1596 std::cout <<
"Streaming add test passed" << std::endl;
1607#pragma pack(push, 1)
1613static_assert(
sizeof(
Coord) == 8,
"Coord must be 8 bytes packed");
1624#pragma pack(push, 1)
1634 return reinterpret_cast<const Coord *
>(
this + 1);
1651#pragma pack(push, 1)
1659 return reinterpret_cast<const Coord *
>(
this + 1);
1672 uint32_t xTrans, uint32_t yTrans,
1673 uint32_t numCoords) {
1674 Logger &logger = conn->getLogger();
1675 logger.
info(
"esitester",
"Starting coord translate test with x_trans=" +
1676 std::to_string(xTrans) +
1677 ", y_trans=" + std::to_string(yTrans) +
1678 ", num_coords=" + std::to_string(numCoords));
1683 std::mt19937 rng(0xDEADBEEF);
1684 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1685 std::vector<Coord> inputCoords;
1686 inputCoords.reserve(numCoords);
1687 for (uint32_t i = 0; i < numCoords; ++i) {
1691 inputCoords.push_back(c);
1695 auto coordTranslatorChild =
1697 if (coordTranslatorChild == accel->
getChildren().end())
1698 throw std::runtime_error(
1699 "Coord translate test: no 'coord_translator' child found");
1701 auto &ports = coordTranslatorChild->second->getPorts();
1702 auto translateIter = ports.find(
AppID(
"translate_coords"));
1703 if (translateIter == ports.end())
1704 throw std::runtime_error(
1705 "Coord translate test: no 'translate_coords' port found");
1711 throw std::runtime_error(
1712 "Coord translate test: 'translate_coords' port not a "
1713 "FuncService::Function");
1714 funcPort->connect();
1720 size_t allocSize = ((argSize + alignment - 1) / alignment) * alignment;
1723 throw std::bad_alloc();
1725 std::unique_ptr<void,
decltype(argDeleter)> argBuffer(argRaw, argDeleter);
1728 arg->xTranslation = xTrans;
1729 arg->yTranslation = yTrans;
1730 for (uint32_t i = 0; i < numCoords; ++i)
1731 arg->coords()[i] = inputCoords[i];
1735 "Sending coord translate argument: " + std::to_string(argSize) +
1736 " bytes, coords_length=" + std::to_string(arg->coordsLength) +
1737 ", x_trans=" + std::to_string(arg->xTranslation) +
1738 ", y_trans=" + std::to_string(arg->yTranslation));
1743 ->call(
MessageData(
reinterpret_cast<const uint8_t *
>(arg), argSize))
1747 logger.
debug(
"esitester",
"Received coord translate result: " +
1748 std::to_string(resMsg.
getSize()) +
" bytes");
1751 throw std::runtime_error(
"Coord translate test: result too small");
1753 const auto *result =
1757 throw std::runtime_error(
"Coord translate test: result data truncated");
1760 if (result->coordsLength != inputCoords.size())
1761 throw std::runtime_error(
1762 "Coord translate test: result size mismatch. Expected " +
1763 std::to_string(inputCoords.size()) +
", got " +
1764 std::to_string(result->coordsLength));
1767 std::cout <<
"Coord translate test results:" << std::endl;
1768 for (
size_t i = 0; i < inputCoords.size(); ++i) {
1769 uint32_t expectedX = inputCoords[i].x + xTrans;
1770 uint32_t expectedY = inputCoords[i].y + yTrans;
1771 std::cout <<
" coord[" << i <<
"]=(" << inputCoords[i].x <<
","
1772 << inputCoords[i].y <<
") + (" << xTrans <<
"," << yTrans
1773 <<
") = (" << result->
coords()[i].
x <<
","
1774 << result->coords()[i].y <<
")";
1775 if (result->coords()[i].x != expectedX ||
1776 result->coords()[i].y != expectedY) {
1777 std::cout <<
" MISMATCH! (expected (" << expectedX <<
"," << expectedY
1781 std::cout << std::endl;
1785 throw std::runtime_error(
"Coord translate test failed: result mismatch");
1787 logger.
info(
"esitester",
"Coord translate test passed");
1788 std::cout <<
"Coord translate test passed" << std::endl;
1795#pragma pack(push, 1)
1831 coords.emplace_back(x, y);
1839 return {
reinterpret_cast<const uint8_t *
>(&
header),
sizeof(
header)};
1841 return {
reinterpret_cast<const uint8_t *
>(
coords.data()),
1844 throw std::out_of_range(
"SerialCoordInput: invalid segment index");
1848#pragma pack(push, 1)
1866 uint32_t yTrans, uint32_t numCoords,
1867 size_t batchSizeLimit) {
1868 Logger &logger = conn->getLogger();
1869 logger.
info(
"esitester",
"Starting serial coord translate test");
1872 std::mt19937 rng(0xDEADBEEF);
1873 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1874 std::vector<Coord> inputCoords;
1875 inputCoords.reserve(numCoords);
1876 for (uint32_t i = 0; i < numCoords; ++i)
1877 inputCoords.push_back({dist(rng), dist(rng)});
1881 throw std::runtime_error(
"Serial coord translate test: no "
1882 "'coord_translator_serial' child found");
1884 auto &ports = child->second->getPorts();
1885 auto portIter = ports.find(
AppID(
"translate_coords_serial"));
1886 if (portIter == ports.end())
1887 throw std::runtime_error(
1888 "Serial coord translate test: no 'translate_coords_serial' port found");
1891 portIter->second.getRawWrite(
"arg"));
1902 while (sent < numCoords) {
1903 size_t batchSize = std::min(batchSizeLimit, numCoords - sent);
1908 auto batch = std::make_unique<SerialCoordInput>();
1909 batch->xTranslation(sent == 0 ? xTrans : 0);
1910 batch->yTranslation(sent == 0 ? yTrans : 0);
1912 for (
size_t i = 0; i < batchSize; ++i) {
1913 batch->appendCoord(inputCoords[sent + i].x, inputCoords[sent + i].y);
1915 argPort.
write(batch);
1919 auto footerData = std::make_unique<SerialCoordInput>();
1920 argPort.
write(footerData);
1925 std::vector<Coord> results;
1929 resultPort.
read(msg);
1931 throw std::runtime_error(
"Unexpected result message size");
1936 if (batchCount == 0)
1940 for (uint16_t i = 0; i < batchCount; ++i) {
1941 resultPort.
read(msg);
1943 throw std::runtime_error(
"Unexpected result message size");
1944 const auto *dFrame =
1946 results.push_back({dFrame->data.y, dFrame->data.x});
1952 std::cout <<
"Serial coord translate test results:" << std::endl;
1953 if (results.size() != inputCoords.size()) {
1954 std::cout <<
"Result size mismatch. Expected " << inputCoords.size()
1955 <<
", got " << results.size() << std::endl;
1958 for (
size_t i = 0; i < std::min(inputCoords.size(), results.size()); ++i) {
1959 uint32_t expX = inputCoords[i].x + xTrans;
1960 uint32_t expY = inputCoords[i].y + yTrans;
1961 std::cout <<
" coord[" << i <<
"]=(" << inputCoords[i].x <<
","
1962 << inputCoords[i].y <<
") + (" << xTrans <<
"," << yTrans
1963 <<
") = (" << results[i].x <<
"," << results[i].y
1964 <<
") (expected (" << expX <<
"," << expY <<
"))";
1965 if (results[i].x != expX || results[i].y != expY) {
1966 std::cout <<
" MISMATCH!";
1969 std::cout << std::endl;
1976 throw std::runtime_error(
"Serial coord translate test failed");
1978 logger.
info(
"esitester",
"Serial coord translate test passed");
1979 std::cout <<
"Serial coord translate test passed" << std::endl;
1983 uint32_t iterations) {
1984 Logger &logger = conn->getLogger();
1988 throw std::runtime_error(
"Channel test: no 'channel_test' child");
1989 auto &ports = channelChild->second->getPorts();
1992 auto cmdIter = ports.find(
AppID(
"cmd"));
1993 if (cmdIter == ports.end())
1994 throw std::runtime_error(
"Channel test: no 'cmd' port");
1997 throw std::runtime_error(
"Channel test: 'cmd' is not MMIO");
2000 auto producerIter = ports.find(
AppID(
"producer"));
2001 if (producerIter == ports.end())
2002 throw std::runtime_error(
"Channel test: no 'producer' port");
2003 auto *producerPort =
2006 throw std::runtime_error(
2007 "Channel test: 'producer' is not a ChannelService::ToHost");
2008 producerPort->connect();
2012 cmdMMIO->write(0x0, iterations);
2014 for (uint32_t i = 0; i < iterations; ++i) {
2015 MessageData recvData = producerPort->read().get();
2016 uint32_t got = *recvData.
as<uint32_t>();
2017 std::cout <<
"[channel] producer i=" << i <<
" got=" << got << std::endl;
2019 throw std::runtime_error(
"Channel producer: expected " +
2020 std::to_string(i) +
", got " +
2021 std::to_string(got));
2023 logger.
info(
"esitester",
"Channel test: producer passed (" +
2024 std::to_string(iterations) +
2025 " incrementing values)");
2028 auto loopbackInIter = ports.find(
AppID(
"loopback_in"));
2029 if (loopbackInIter == ports.end())
2030 throw std::runtime_error(
"Channel test: no 'loopback_in' port");
2031 auto *fromHostPort =
2034 throw std::runtime_error(
2035 "Channel test: 'loopback_in' is not a ChannelService::FromHost");
2036 fromHostPort->connect();
2038 auto loopbackOutIter = ports.find(
AppID(
"loopback_out"));
2039 if (loopbackOutIter == ports.end())
2040 throw std::runtime_error(
"Channel test: no 'loopback_out' port");
2041 auto *loopbackOutPort =
2043 if (!loopbackOutPort)
2044 throw std::runtime_error(
2045 "Channel test: 'loopback_out' is not a ChannelService::ToHost");
2046 loopbackOutPort->connect();
2048 std::mt19937_64 rng(0xDEADBEEF);
2049 std::uniform_int_distribution<uint32_t> dist(0, UINT32_MAX);
2051 for (uint32_t i = 0; i < iterations; ++i) {
2052 uint32_t sendVal = dist(rng);
2054 MessageData recvData = loopbackOutPort->read().get();
2055 uint32_t recvVal = *recvData.
as<uint32_t>();
2056 std::cout <<
"[channel] loopback i=" << i <<
" sent=0x"
2059 if (recvVal != sendVal)
2060 throw std::runtime_error(
"Channel loopback mismatch at i=" +
2064 logger.
info(
"esitester",
"Channel test: loopback passed (" +
2065 std::to_string(iterations) +
" iterations)");
2066 std::cout <<
"Channel test passed" << std::endl;
static void print(TypedAttr val, llvm::raw_ostream &os)
static void writePort(uint16_t port)
Write the port number to a file.
Abstract class representing a connection to an accelerator.
Top level accelerator class.
Services provide connections to 'bundles' – collections of named, unidirectional communication channe...
T * getAs() const
Cast this Bundle port to a subclass which is actually useful.
ReadChannelPort & getRawRead(const std::string &name) const
WriteChannelPort & getRawWrite(const std::string &name) const
Get access to the raw byte streams of a channel.
Common options and code for ESI runtime tools.
Context & getContext()
Get the context.
AcceleratorConnection * connect()
Connect to the accelerator using the specified backend and connection.
int esiParse(int argc, const char **argv)
Run the parser.
AcceleratorConnections, Accelerators, and Manifests must all share a context.
const std::map< AppID, Instance * > & getChildren() const
Access the module's children by ID.
virtual void error(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an error.
virtual void info(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an informational message.
void debug(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report a debug message.
Class to parse a manifest.
Accelerator * buildAccelerator(AcceleratorConnection &acc) const
A logical chunk of data representing serialized data.
const uint8_t * getBytes() const
const T * as() const
Cast to a type.
size_t getSize() const
Get the size of the data in bytes.
static MessageData from(T &t)
Cast from a type to its raw bytes.
A ChannelPort which reads data from the accelerator.
virtual void connect(std::function< bool(MessageData)> callback, const ConnectOptions &options={})
void setMaxDataQueueMsgs(uint64_t maxMsgs)
Set maximum number of messages to store in the dataQueue.
virtual void disconnect() override
virtual void read(MessageData &outData)
Specify a buffer to read into.
Abstract multi-segment message.
void connect(const ChannelPort::ConnectOptions &opts={})
void write(const T &data)
A ChannelPort which sends data to the accelerator.
virtual void disconnect() override
void write(const MessageData &data)
A very basic blocking write API.
virtual void connect(const ConnectOptions &options={}) override
Set up a connection to the accelerator.
A function call which gets attached to a service port.
A port which writes data to the accelerator (from_host).
A port which reads data from the accelerator (to_host).
A function call which gets attached to a service port.
virtual void start()
In cases where necessary, enable host memory services.
A "slice" of some parent MMIO space.
Information about the Accelerator system.
A telemetry port which gets attached to a service port.
void connect()
Connect to a particular telemetry port. Offset should be non-nullopt.
static void * alignedAllocCompat(std::size_t alignment, std::size_t size)
static void hostmemWriteTest(Accelerator *acc, esi::services::HostMem::HostMemRegion ®ion, uint32_t width)
Test the hostmem write functionality.
static void aggregateHostmemBandwidthTest(AcceleratorConnection *, Accelerator *, uint32_t width, uint32_t xferCount, bool read, bool write)
static void dmaTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool read, bool write)
static void hostmemBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, uint32_t xferCount, const std::vector< uint32_t > &widths, bool read, bool write)
static void callbackTest(AcceleratorConnection *, Accelerator *, uint32_t iterations)
static void bandwidthTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, uint32_t xferCount, bool read, bool write)
static void serialCoordTranslateTest(AcceleratorConnection *, Accelerator *, uint32_t xTrans, uint32_t yTrans, uint32_t numCoords, size_t batchSizeLimit)
constexpr std::array< uint32_t, 5 > defaultWidths
static void hostmemReadBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion ®ion, uint32_t width, uint32_t xferCount)
static void bandwidthReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static void channelTest(AcceleratorConnection *, Accelerator *, uint32_t iterations)
static std::string formatBandwidth(double bytesPerSec)
static void hostmemWriteBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion ®ion, uint32_t width, uint32_t xferCount)
static void alignedFreeCompat(void *ptr)
static void dmaWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void bandwidthWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static std::string humanBytes(uint64_t bytes)
static void streamingAddTest(AcceleratorConnection *, Accelerator *, uint32_t addAmt, uint32_t numItems)
Test the StreamingAdder module.
static void loopbackAddTest(AcceleratorConnection *, Accelerator *, uint32_t iterations, bool pipeline)
static void dmaReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void streamingAddTranslatedTest(AcceleratorConnection *, Accelerator *, uint32_t addAmt, uint32_t numItems)
static void hostmemTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool write, bool read)
static std::string humanTimeUS(uint64_t us)
int main(int argc, const char *argv[])
static void coordTranslateTest(AcceleratorConnection *, Accelerator *, uint32_t xTrans, uint32_t yTrans, uint32_t numCoords)
static std::string defaultWidthsStr()
static void hostmemReadTest(Accelerator *acc, esi::services::HostMem::HostMemRegion ®ion, uint32_t width)
std::string toString(const std::any &a)
'Stringify' a std::any. This is used to log std::any values by some loggers.
std::string toHex(void *val)
Translated argument struct for CoordTranslator.
std::span< const Coord > coordsSpan() const
const Coord * coords() const
static size_t allocSize(size_t numCoords)
Coord * coords()
Get pointer to trailing coords array.
std::span< Coord > coordsSpan()
Get span view of coords (requires coordsLength to be set first).
Translated result struct for CoordTranslator.
static size_t allocSize(size_t numCoords)
std::span< Coord > coordsSpan()
Get span view of coords (requires coordsLength to be set first).
const Coord * coords() const
Coord * coords()
Get pointer to trailing coords array.
std::span< const Coord > coordsSpan() const
Test the CoordTranslator module using message translation.
SerialCoordData(uint32_t x, uint32_t y)
Packed struct representing a parallel window argument for StreamingAdder.
Packed struct representing a parallel window result for StreamingAdder.
Test the StreamingAdder module using message translation.
uint32_t * inputData()
Get pointer to trailing input data array.
static size_t allocSize(size_t numItems)
std::span< uint32_t > inputDataSpan()
Get span view of input data (requires inputLength to be set first).
std::span< const uint32_t > inputDataSpan() const
const uint32_t * inputData() const
Translated result struct for StreamingAdder.
uint32_t * data()
Get pointer to trailing result data array.
std::span< uint32_t > dataSpan()
Get span view of result data (requires dataLength to be set first).
static size_t allocSize(size_t numItems)
std::span< const uint32_t > dataSpan() const
const uint32_t * data() const
A contiguous, non-owning view of bytes within a SegmentedMessageData.
RAII memory region for host memory.
virtual void * getDevicePtr() const
Sometimes the pointer the device sees is different from the pointer the host sees.
virtual void * getPtr() const =0
Get a pointer to the host memory.
virtual void flush()
Flush the memory region to ensure that the device sees the latest contents.
virtual std::size_t getSize() const =0
SerialCoordOutputData data
SerialCoordOutputHeader header