46 const std::vector<uint32_t> &widths,
bool write,
50 const std::vector<uint32_t> &widths,
bool read,
53 const std::vector<uint32_t> &widths,
bool read,
bool write);
55 const std::vector<uint32_t> &widths,
56 uint32_t xferCount,
bool read,
bool write);
61 uint32_t xferCount,
bool read,
64 uint32_t addAmt, uint32_t numItems);
66 uint32_t addAmt, uint32_t numItems);
68 uint32_t xTrans, uint32_t yTrans,
72constexpr std::array<uint32_t, 5>
defaultWidths = {32, 64, 128, 256, 512};
85 const char *unit =
"B/s";
86 double value = bytesPerSec;
87 if (bytesPerSec >= 1e9) {
89 value = bytesPerSec / 1e9;
90 }
else if (bytesPerSec >= 1e6) {
92 value = bytesPerSec / 1e6;
93 }
else if (bytesPerSec >= 1e3) {
95 value = bytesPerSec / 1e3;
97 std::ostringstream oss;
98 oss.setf(std::ios::fixed);
100 oss << value <<
" " << unit;
106 const char *units[] = {
"B",
"KB",
"MB",
"GB",
"TB"};
107 double v = (double)bytes;
109 while (v >= 1024.0 && u < 4) {
113 std::ostringstream oss;
114 oss.setf(std::ios::fixed);
115 oss.precision(u == 0 ? 0 : 2);
116 oss << v <<
" " << units[u];
123 return std::to_string(us) +
" us";
124 double ms = us / 1000.0;
126 std::ostringstream oss;
127 oss.setf(std::ios::fixed);
128 oss.precision(ms < 10.0 ? 2 : (ms < 100.0 ? 1 : 0));
132 double sec = ms / 1000.0;
133 std::ostringstream oss;
134 oss.setf(std::ios::fixed);
135 oss.precision(sec < 10.0 ? 3 : 2);
144 void *ptr = _aligned_malloc(size, alignment);
146 throw std::bad_alloc();
149 void *ptr = std::aligned_alloc(alignment, size);
151 throw std::bad_alloc();
164int main(
int argc,
const char *argv[]) {
166 cli.description(
"Test an ESI system running the ESI tester image.");
167 cli.require_subcommand(1);
169 CLI::App *callback_test =
170 cli.add_subcommand(
"callback",
"initiate callback test");
171 uint32_t cb_iters = 1;
172 callback_test->add_option(
"-i,--iters", cb_iters,
173 "Number of iterations to run");
175 CLI::App *hostmemtestSub =
176 cli.add_subcommand(
"hostmem",
"Run the host memory test");
178 bool hmWrite =
false;
181 hostmemtestSub->add_flag(
"-w,--write", hmWrite,
182 "Enable host memory write test");
183 hostmemtestSub->add_flag(
"-r,--read", hmRead,
"Enable host memory read test");
184 hostmemtestSub->add_option(
185 "--widths", hostmemWidths,
188 CLI::App *dmatestSub = cli.add_subcommand(
"dma",
"Run the DMA test");
189 bool dmaRead =
false;
190 bool dmaWrite =
false;
192 dmatestSub->add_flag(
"-w,--write", dmaWrite,
"Enable dma write test");
193 dmatestSub->add_flag(
"-r,--read", dmaRead,
"Enable dma read test");
194 dmatestSub->add_option(
"--widths", dmaWidths,
198 CLI::App *bandwidthSub =
199 cli.add_subcommand(
"bandwidth",
"Run the bandwidth test");
200 uint32_t xferCount = 1000;
201 bandwidthSub->add_option(
"-c,--count", xferCount,
202 "Number of transfers to perform");
203 bool bandwidthRead =
false;
204 bool bandwidthWrite =
false;
207 bandwidthSub->add_option(
"--widths", bandwidthWidths,
208 "Width of the transfers to perform (default: " +
210 bandwidthSub->add_flag(
"-w,--write", bandwidthWrite,
211 "Enable bandwidth write");
212 bandwidthSub->add_flag(
"-r,--read", bandwidthRead,
"Enable bandwidth read");
214 CLI::App *hostmembwSub =
215 cli.add_subcommand(
"hostmembw",
"Run the host memory bandwidth test");
216 uint32_t hmBwCount = 1000;
217 bool hmBwRead =
false;
218 bool hmBwWrite =
false;
220 hostmembwSub->add_option(
"-c,--count", hmBwCount,
221 "Number of hostmem transfers");
222 hostmembwSub->add_option(
223 "--widths", hmBwWidths,
225 hostmembwSub->add_flag(
"-w,--write", hmBwWrite,
226 "Measure hostmem write bandwidth");
227 hostmembwSub->add_flag(
"-r,--read", hmBwRead,
228 "Measure hostmem read bandwidth");
230 CLI::App *loopbackSub =
231 cli.add_subcommand(
"loopback",
"Test LoopbackInOutAdd function service");
232 uint32_t loopbackIters = 10;
233 bool loopbackPipeline =
false;
234 loopbackSub->add_option(
"-i,--iters", loopbackIters,
235 "Number of function invocations (default 10)");
236 loopbackSub->add_flag(
"-p,--pipeline", loopbackPipeline,
237 "Pipeline all calls then collect results");
239 CLI::App *aggBwSub = cli.add_subcommand(
241 "Aggregate hostmem bandwidth across four units (readmem*, writemem*)");
242 uint32_t aggWidth = 512;
243 uint32_t aggCount = 1000;
244 bool aggRead =
false;
245 bool aggWrite =
false;
246 aggBwSub->add_option(
248 "Bit width (default 512; other widths ignored if absent)");
249 aggBwSub->add_option(
"-c,--count", aggCount,
"Flits per unit (default 1000)");
250 aggBwSub->add_flag(
"-r,--read", aggRead,
"Include read units");
251 aggBwSub->add_flag(
"-w,--write", aggWrite,
"Include write units");
253 CLI::App *streamingAddSub = cli.add_subcommand(
254 "streaming_add",
"Test StreamingAdder function service with list input");
255 uint32_t streamingAddAmt = 5;
256 uint32_t streamingNumItems = 5;
257 bool streamingTranslate =
false;
258 streamingAddSub->add_option(
"-a,--add", streamingAddAmt,
259 "Amount to add to each element (default 5)");
260 streamingAddSub->add_option(
"-n,--num-items", streamingNumItems,
261 "Number of random items in the list (default 5)");
262 streamingAddSub->add_flag(
"-t,--translate", streamingTranslate,
263 "Use message translation (list translation)");
265 CLI::App *coordTranslateSub = cli.add_subcommand(
267 "Test CoordTranslator function service with list of coordinates");
268 uint32_t coordXTrans = 10;
269 uint32_t coordYTrans = 20;
270 uint32_t coordNumItems = 5;
271 coordTranslateSub->add_option(
"-x,--x-translation", coordXTrans,
272 "X translation amount (default 10)");
273 coordTranslateSub->add_option(
"-y,--y-translation", coordYTrans,
274 "Y translation amount (default 20)");
275 coordTranslateSub->add_option(
"-n,--num-coords", coordNumItems,
276 "Number of random coordinates (default 5)");
278 if (
int rc = cli.
esiParse(argc, argv))
280 if (!cli.get_help_ptr()->empty())
287 ctxt.
getLogger().
info(
"esitester",
"Connected to accelerator.");
288 Manifest manifest(ctxt, info.getJsonManifest());
293 if (*callback_test) {
295 }
else if (*hostmemtestSub) {
296 hostmemTest(acc, accel, hostmemWidths, hmWrite, hmRead);
297 }
else if (*loopbackSub) {
299 }
else if (*dmatestSub) {
300 dmaTest(acc, accel, dmaWidths, dmaRead, dmaWrite);
301 }
else if (*bandwidthSub) {
302 bandwidthTest(acc, accel, bandwidthWidths, xferCount, bandwidthRead,
304 }
else if (*hostmembwSub) {
307 }
else if (*aggBwSub) {
310 }
else if (*streamingAddSub) {
311 if (streamingTranslate)
316 }
else if (*coordTranslateSub) {
321 }
catch (std::exception &e) {
326 std::cout <<
"Exiting successfully\n";
331 uint32_t iterations) {
334 throw std::runtime_error(
"No cb_test child found in accelerator");
335 auto &ports = cb_test->second->getPorts();
336 auto cmd_port = ports.find(
AppID(
"cmd"));
337 if (cmd_port == ports.end())
338 throw std::runtime_error(
"No cmd port found in cb_test child");
341 throw std::runtime_error(
"cb_test cmd port is not MMIO");
343 auto f = ports.find(
AppID(
"cb"));
344 if (f == ports.end())
345 throw std::runtime_error(
"No cb port found in accelerator");
349 throw std::runtime_error(
"cb port is not a CallService::Callback");
351 std::atomic<uint32_t> callbackCount = 0;
354 callbackCount.fetch_add(1);
356 [&](std::string &subsystem, std::string &msg,
357 std::unique_ptr<std::map<std::string, std::any>> &details) {
358 subsystem =
"ESITESTER";
359 msg =
"Received callback";
360 details = std::make_unique<std::map<std::string, std::any>>();
361 details->emplace(
"data", data);
363 std::cout <<
"callback: " << *data.as<uint64_t>() << std::endl;
368 for (uint32_t i = 0; i < iterations; ++i) {
369 conn->
getLogger().
info(
"esitester",
"Issuing callback command iteration " +
370 std::to_string(i) +
"/" +
371 std::to_string(iterations));
372 cmdMMIO->write(0x10, i);
374 for (uint32_t wait = 0; wait < 1000; ++wait) {
375 if (callbackCount.load() > i)
377 std::this_thread::sleep_for(std::chrono::milliseconds(1));
379 if (callbackCount.load() <= i)
380 throw std::runtime_error(
"Callback test failed. No callback received");
388 std::cout <<
"Running hostmem WRITE test with width " << width << std::endl;
389 uint64_t *dataPtr =
static_cast<uint64_t *
>(region.
getPtr());
390 auto check = [&](
bool print) {
392 for (
size_t i = 0; i < 9; ++i) {
394 printf(
"[write] dataPtr[%zu] = 0x%016lx\n", i, dataPtr[i]);
395 if (i < (width + 63) / 64 && dataPtr[i] == 0xFFFFFFFFFFFFFFFFull)
403 throw std::runtime_error(
404 "hostmem write test failed. No writemem child found");
405 auto &writeMemPorts = writeMemChildIter->second->getPorts();
407 auto cmdPortIter = writeMemPorts.find(
AppID(
"cmd", width));
408 if (cmdPortIter == writeMemPorts.end())
409 throw std::runtime_error(
410 "hostmem write test failed. No (cmd,width) MMIO port");
413 throw std::runtime_error(
414 "hostmem write test failed. (cmd,width) port not MMIO");
416 auto issuedPortIter = writeMemPorts.find(
AppID(
"addrCmdIssued"));
417 if (issuedPortIter == writeMemPorts.end())
418 throw std::runtime_error(
419 "hostmem write test failed. addrCmdIssued missing");
420 auto *addrCmdIssuedPort =
422 if (!addrCmdIssuedPort)
423 throw std::runtime_error(
424 "hostmem write test failed. addrCmdIssued not telemetry");
425 addrCmdIssuedPort->connect();
427 auto responsesPortIter = writeMemPorts.find(
AppID(
"addrCmdResponses"));
428 if (responsesPortIter == writeMemPorts.end())
429 throw std::runtime_error(
430 "hostmem write test failed. addrCmdResponses missing");
431 auto *addrCmdResponsesPort =
433 if (!addrCmdResponsesPort)
434 throw std::runtime_error(
435 "hostmem write test failed. addrCmdResponses not telemetry");
436 addrCmdResponsesPort->connect();
438 for (
size_t i = 0, e = 9; i < e; ++i)
439 dataPtr[i] = 0xFFFFFFFFFFFFFFFFull;
441 cmdMMIO->write(0x10,
reinterpret_cast<uint64_t
>(region.
getDevicePtr()));
442 cmdMMIO->write(0x18, 1);
443 cmdMMIO->write(0x20, 1);
445 for (
int i = 0; i < 100; ++i) {
446 auto issued = addrCmdIssuedPort->readInt();
447 auto responses = addrCmdResponsesPort->readInt();
448 if (issued == 1 && responses == 1) {
452 std::this_thread::sleep_for(std::chrono::microseconds(100));
456 throw std::runtime_error(
"hostmem write test (" + std::to_string(width) +
457 " bits) timeout waiting for completion");
460 throw std::runtime_error(
"hostmem write test failed (" +
461 std::to_string(width) +
" bits)");
467 std::cout <<
"Running hostmem READ test with width " << width << std::endl;
470 throw std::runtime_error(
471 "hostmem read test failed. No readmem child found");
473 auto &readMemPorts = readMemChildIter->second->getPorts();
474 auto addrCmdPortIter = readMemPorts.find(
AppID(
"cmd", width));
475 if (addrCmdPortIter == readMemPorts.end())
476 throw std::runtime_error(
477 "hostmem read test failed. No AddressCommand MMIO port");
481 throw std::runtime_error(
482 "hostmem read test failed. AddressCommand port not MMIO");
484 auto lastReadPortIter = readMemPorts.find(
AppID(
"lastReadLSB"));
485 if (lastReadPortIter == readMemPorts.end())
486 throw std::runtime_error(
"hostmem read test failed. lastReadLSB missing");
490 throw std::runtime_error(
491 "hostmem read test failed. lastReadLSB not telemetry");
492 lastReadPort->connect();
494 auto issuedPortIter = readMemPorts.find(
AppID(
"addrCmdIssued"));
495 if (issuedPortIter == readMemPorts.end())
496 throw std::runtime_error(
"hostmem read test failed. addrCmdIssued missing");
497 auto *addrCmdIssuedPort =
499 if (!addrCmdIssuedPort)
500 throw std::runtime_error(
501 "hostmem read test failed. addrCmdIssued not telemetry");
502 addrCmdIssuedPort->connect();
504 auto responsesPortIter = readMemPorts.find(
AppID(
"addrCmdResponses"));
505 if (responsesPortIter == readMemPorts.end())
506 throw std::runtime_error(
507 "hostmem read test failed. addrCmdResponses missing");
508 auto *addrCmdResponsesPort =
510 if (!addrCmdResponsesPort)
511 throw std::runtime_error(
512 "hostmem read test failed. addrCmdResponses not telemetry");
513 addrCmdResponsesPort->connect();
515 for (
size_t i = 0; i < 8; ++i) {
516 auto *dataPtr =
static_cast<uint64_t *
>(region.
getPtr());
517 dataPtr[0] = 0x12345678ull << i;
518 dataPtr[1] = 0xDEADBEEFull << i;
520 addrCmdMMIO->write(0x10,
reinterpret_cast<uint64_t
>(region.
getDevicePtr()));
521 addrCmdMMIO->write(0x18, 1);
522 addrCmdMMIO->write(0x20, 1);
524 for (
int waitLoop = 0; waitLoop < 100; ++waitLoop) {
525 auto issued = addrCmdIssuedPort->readInt();
526 auto responses = addrCmdResponsesPort->readInt();
527 if (issued == 1 && responses == 1) {
531 std::this_thread::sleep_for(std::chrono::milliseconds(10));
534 throw std::runtime_error(
"hostmem read (" + std::to_string(width) +
535 " bits) timeout waiting for completion");
536 uint64_t captured = lastReadPort->readInt();
537 uint64_t expected = dataPtr[0];
539 expected &= ((1ull << width) - 1);
540 if (captured != expected)
541 throw std::runtime_error(
"hostmem read test (" + std::to_string(width) +
542 " bits) failed. Expected " +
549 const std::vector<uint32_t> &widths,
bool write,
554 auto scratchRegion = hostmem->allocate(1024 * 1024,
555 {.writeable =
true});
556 uint64_t *dataPtr =
static_cast<uint64_t *
>(scratchRegion->getPtr());
558 "Running host memory test with region size " +
559 std::to_string(scratchRegion->getSize()) +
560 " bytes at 0x" +
toHex(dataPtr));
561 for (
size_t i = 0; i < scratchRegion->getSize() / 8; ++i)
563 scratchRegion->flush();
566 for (
size_t width : widths) {
572 }
catch (std::exception &e) {
573 conn->
getLogger().
error(
"esitester",
"Hostmem test failed for width " +
574 std::to_string(width) +
": " +
580 throw std::runtime_error(
"Hostmem test failed");
581 std::cout <<
"Hostmem test passed" << std::endl;
587 logger.
info(
"esitester",
588 "== Running DMA read test with width " + std::to_string(width));
593 throw std::runtime_error(
"dma read test failed. No tohostdma[" +
594 std::to_string(width) +
"] found");
597 throw std::runtime_error(
"dma read test failed. MMIO port is not MMIO");
604 size_t xferCount = 24;
607 toHostMMIO->write(0, xferCount);
608 for (
size_t i = 0; i < xferCount; ++i) {
611 uint64_t val = *data.as<uint64_t>();
613 throw std::runtime_error(
"dma read test failed. Out of order data");
616 logger.
debug(
"esitester",
617 "Cycle count [" + std::to_string(i) +
"] = 0x" + data.toHex());
620 std::cout <<
" DMA read test for " << width <<
" bits passed" << std::endl;
626 logger.
info(
"esitester",
627 "Running DMA write test with width " + std::to_string(width));
631 if (!fromHostMMIOPort)
632 throw std::runtime_error(
"dma read test for " +
toString(width) +
633 " bits failed. No fromhostdma[" +
634 std::to_string(width) +
"] found");
637 throw std::runtime_error(
"dma write test for " +
toString(width) +
638 " bits failed. MMIO port is not MMIO");
643 throw std::runtime_error(
"dma write test for " +
toString(width) +
644 " bits failed. No out port found");
648 size_t xferCount = 24;
649 uint8_t *data =
new uint8_t[width];
650 for (
size_t i = 0; i < width / 8; ++i)
652 fromHostMMIO->read(8);
653 fromHostMMIO->write(0, xferCount);
654 for (
size_t i = 1; i < xferCount + 1; ++i) {
661 std::this_thread::sleep_for(std::chrono::milliseconds(10));
663 }
while (!successWrite && ++attempts < 100);
665 throw std::runtime_error(
"dma write test for " +
toString(width) +
666 " bits failed. Write failed");
667 uint64_t lastReadMMIO;
668 for (
size_t a = 0; a < 20; ++a) {
669 lastReadMMIO = fromHostMMIO->read(8);
670 if (lastReadMMIO == i)
672 std::this_thread::sleep_for(std::chrono::milliseconds(10));
674 throw std::runtime_error(
"dma write for " +
toString(width) +
675 " bits test failed. Read from MMIO failed");
680 std::cout <<
" DMA write test for " << width <<
" bits passed" << std::endl;
684 const std::vector<uint32_t> &widths,
bool read,
688 for (
size_t width : widths)
691 }
catch (std::exception &e) {
693 std::cerr <<
"DMA write test for " << width
694 <<
" bits failed: " << e.what() << std::endl;
697 for (
size_t width : widths)
700 throw std::runtime_error(
"DMA test failed");
701 std::cout <<
"DMA test passed" << std::endl;
709 size_t width,
size_t xferCount) {
715 throw std::runtime_error(
"bandwidth test failed. No tohostdma[" +
716 std::to_string(width) +
"] found");
719 throw std::runtime_error(
"bandwidth test failed. MMIO port is not MMIO");
727 logger.
info(
"esitester",
"Starting read bandwidth test with " +
728 std::to_string(xferCount) +
" x " +
729 std::to_string(width) +
" bit transfers");
731 auto start = std::chrono::high_resolution_clock::now();
732 toHostMMIO->write(0, xferCount);
733 for (
size_t i = 0; i < xferCount; ++i) {
736 [i, &data](std::string &subsystem, std::string &msg,
737 std::unique_ptr<std::map<std::string, std::any>> &details) {
738 subsystem =
"esitester";
739 msg =
"Cycle count [" + std::to_string(i) +
"] = 0x" + data.toHex();
742 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
743 std::chrono::high_resolution_clock::now() - start);
745 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
746 logger.
info(
"esitester",
747 " Bandwidth test: " + std::to_string(xferCount) +
" x " +
748 std::to_string(width) +
" bit transfers in " +
749 std::to_string(duration.count()) +
" microseconds");
754 size_t width,
size_t xferCount) {
759 if (!fromHostMMIOPort)
760 throw std::runtime_error(
"bandwidth test failed. No fromhostdma[" +
761 std::to_string(width) +
"] found");
764 throw std::runtime_error(
"bandwidth test failed. MMIO port is not MMIO");
772 logger.
info(
"esitester",
"Starting write bandwidth test with " +
773 std::to_string(xferCount) +
" x " +
774 std::to_string(width) +
" bit transfers");
775 std::vector<uint8_t> dataVec(width / 8);
776 for (
size_t i = 0; i < width / 8; ++i)
779 auto start = std::chrono::high_resolution_clock::now();
780 fromHostMMIO->write(0, xferCount);
781 for (
size_t i = 0; i < xferCount; ++i) {
784 [i, &data](std::string &subsystem, std::string &msg,
785 std::unique_ptr<std::map<std::string, std::any>> &details) {
786 subsystem =
"esitester";
787 msg =
"Cycle count [" + std::to_string(i) +
"] = 0x" + data.toHex();
790 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
791 std::chrono::high_resolution_clock::now() - start);
793 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
794 logger.
info(
"esitester",
795 " Bandwidth test: " + std::to_string(xferCount) +
" x " +
796 std::to_string(width) +
" bit transfers in " +
797 std::to_string(duration.count()) +
" microseconds");
802 const std::vector<uint32_t> &widths,
803 uint32_t xferCount,
bool read,
bool write) {
805 for (uint32_t w : widths)
808 for (uint32_t w : widths)
819 uint32_t width, uint32_t xferCount) {
821 logger.
info(
"esitester",
"Starting hostmem WRITE bandwidth test: " +
822 std::to_string(xferCount) +
" x " +
823 std::to_string(width) +
" bits");
827 throw std::runtime_error(
"hostmem write bandwidth: writemem child missing");
828 auto &writeMemPorts = writeMemChildIter->second->getPorts();
830 auto cmdPortIter = writeMemPorts.find(
AppID(
"cmd", width));
831 if (cmdPortIter == writeMemPorts.end())
832 throw std::runtime_error(
"hostmem write bandwidth: cmd MMIO missing");
835 throw std::runtime_error(
"hostmem write bandwidth: cmd not MMIO");
837 auto issuedIter = writeMemPorts.find(
AppID(
"addrCmdIssued"));
838 auto respIter = writeMemPorts.find(
AppID(
"addrCmdResponses"));
839 auto cycleCount = writeMemPorts.find(
AppID(
"addrCmdCycles"));
840 if (issuedIter == writeMemPorts.end() || respIter == writeMemPorts.end() ||
841 cycleCount == writeMemPorts.end())
842 throw std::runtime_error(
"hostmem write bandwidth: telemetry missing");
848 if (!issuedPort || !respPort || !cyclePort)
849 throw std::runtime_error(
850 "hostmem write bandwidth: telemetry type mismatch");
852 issuedPort->connect();
857 uint64_t *dataPtr =
static_cast<uint64_t *
>(region.
getPtr());
858 size_t words = region.
getSize() / 8;
859 for (
size_t i = 0; i < words; ++i)
860 dataPtr[i] = i + 0xA5A50000;
863 auto start = std::chrono::high_resolution_clock::now();
865 uint64_t devPtr =
reinterpret_cast<uint64_t
>(region.
getDevicePtr());
866 cmdMMIO->write(0x10, devPtr);
867 cmdMMIO->write(0x18, xferCount);
868 cmdMMIO->write(0x20, 1);
871 bool completed =
false;
872 for (
int wait = 0; wait < 100000; ++wait) {
873 uint64_t respNow = respPort->
readInt();
874 if (respNow == xferCount) {
878 std::this_thread::sleep_for(std::chrono::microseconds(50));
881 throw std::runtime_error(
"hostmem write bandwidth timeout");
882 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
883 std::chrono::high_resolution_clock::now() - start);
885 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
886 uint64_t cycles = cyclePort->
readInt();
887 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
888 std::cout <<
"[WRITE] Hostmem bandwidth (" << std::to_string(width)
890 << std::to_string(xferCount) <<
" flits in "
891 << std::to_string(duration.count()) <<
" us, "
892 << std::to_string(cycles) <<
" cycles, " << bytesPerCycle
893 <<
" bytes/cycle" << std::endl;
899 uint32_t width, uint32_t xferCount) {
901 logger.
info(
"esitester",
"Starting hostmem READ bandwidth test: " +
902 std::to_string(xferCount) +
" x " +
903 std::to_string(width) +
" bits");
907 throw std::runtime_error(
"hostmem read bandwidth: readmem child missing");
908 auto &readMemPorts = readMemChildIter->second->getPorts();
910 auto cmdPortIter = readMemPorts.find(
AppID(
"cmd", width));
911 if (cmdPortIter == readMemPorts.end())
912 throw std::runtime_error(
"hostmem read bandwidth: cmd MMIO missing");
915 throw std::runtime_error(
"hostmem read bandwidth: cmd not MMIO");
917 auto issuedIter = readMemPorts.find(
AppID(
"addrCmdIssued"));
918 auto respIter = readMemPorts.find(
AppID(
"addrCmdResponses"));
919 auto cyclePort = readMemPorts.find(
AppID(
"addrCmdCycles"));
920 if (issuedIter == readMemPorts.end() || respIter == readMemPorts.end() ||
921 cyclePort == readMemPorts.end())
922 throw std::runtime_error(
"hostmem read bandwidth: telemetry missing");
928 if (!issuedPort || !respPort || !cycleCntPort)
929 throw std::runtime_error(
"hostmem read bandwidth: telemetry type mismatch");
930 issuedPort->connect();
935 uint64_t *dataPtr =
static_cast<uint64_t *
>(region.
getPtr());
936 size_t words64 = region.
getSize() / 8;
937 for (
size_t i = 0; i < words64; ++i)
938 dataPtr[i] = 0xCAFEBABE0000ull + i;
940 uint64_t devPtr =
reinterpret_cast<uint64_t
>(region.
getDevicePtr());
941 auto start = std::chrono::high_resolution_clock::now();
943 cmdMMIO->write(0x10, devPtr);
944 cmdMMIO->write(0x18, xferCount);
945 cmdMMIO->write(0x20, 1);
948 for (
int wait = 0; wait < 100000; ++wait) {
949 uint64_t respNow = respPort->
readInt();
950 if (respNow == xferCount) {
954 std::this_thread::sleep_for(std::chrono::microseconds(50));
957 throw std::runtime_error(
"hostmem read bandwidth timeout");
958 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
959 std::chrono::high_resolution_clock::now() - start);
961 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
962 uint64_t cycles = cycleCntPort->
readInt();
963 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
964 std::cout <<
"[ READ] Hostmem bandwidth (" << width
966 <<
" flits in " << duration.count() <<
" us, " << cycles
967 <<
" cycles, " << bytesPerCycle <<
" bytes/cycle" << std::endl;
972 const std::vector<uint32_t> &widths,
bool read,
976 auto region = hostmemSvc->allocate(1024 * 1024 * 1024,
977 {.writeable =
true});
978 for (uint32_t w : widths) {
987 uint32_t iterations,
bool pipeline) {
991 throw std::runtime_error(
"Loopback test: no 'loopback' child");
992 auto &ports = loopbackChild->second->getPorts();
993 auto addIter = ports.find(
AppID(
"add"));
994 if (addIter == ports.end())
995 throw std::runtime_error(
"Loopback test: no 'add' port");
1000 throw std::runtime_error(
1001 "Loopback test: 'add' port not a FuncService::Function");
1002 funcPort->connect();
1003 if (iterations == 0) {
1004 logger.
info(
"esitester",
"Loopback add test: 0 iterations (skipped)");
1007 std::mt19937_64 rng(0xC0FFEE);
1008 std::uniform_int_distribution<uint32_t> dist(0, (1u << 24) - 1);
1011 auto start = std::chrono::high_resolution_clock::now();
1012 for (uint32_t i = 0; i < iterations; ++i) {
1013 uint32_t argVal = dist(rng);
1014 uint32_t expected = (argVal + 11) & 0xFFFF;
1015 uint8_t argBytes[3] = {
1016 static_cast<uint8_t
>(argVal & 0xFF),
1017 static_cast<uint8_t
>((argVal >> 8) & 0xFF),
1018 static_cast<uint8_t
>((argVal >> 16) & 0xFF),
1021 MessageData resMsg = funcPort->call(argMsg).get();
1022 uint16_t got = *resMsg.
as<uint16_t>();
1023 std::cout <<
"[loopback] i=" << i <<
" arg=0x" <<
esi::toHex(argVal)
1026 if (got != expected)
1027 throw std::runtime_error(
"Loopback mismatch (non-pipelined)");
1029 auto end = std::chrono::high_resolution_clock::now();
1030 auto us = std::chrono::duration_cast<std::chrono::microseconds>(end - start)
1032 double callsPerSec = (double)iterations * 1e6 / (
double)us;
1033 logger.
info(
"esitester",
"Loopback add test passed (non-pipelined, " +
1034 std::to_string(iterations) +
" calls, " +
1035 std::to_string(us) +
" us, " +
1036 std::to_string(callsPerSec) +
" calls/s)");
1039 std::vector<std::future<MessageData>> futures;
1040 futures.reserve(iterations);
1041 std::vector<uint32_t> expectedVals;
1042 expectedVals.reserve(iterations);
1044 auto issueStart = std::chrono::high_resolution_clock::now();
1045 for (uint32_t i = 0; i < iterations; ++i) {
1046 uint32_t argVal = dist(rng);
1047 uint32_t expected = (argVal + 11) & 0xFFFF;
1048 uint8_t argBytes[3] = {
1049 static_cast<uint8_t
>(argVal & 0xFF),
1050 static_cast<uint8_t
>((argVal >> 8) & 0xFF),
1051 static_cast<uint8_t
>((argVal >> 16) & 0xFF),
1053 futures.emplace_back(funcPort->call(
MessageData(argBytes, 3)));
1054 expectedVals.emplace_back(expected);
1056 auto issueEnd = std::chrono::high_resolution_clock::now();
1058 for (uint32_t i = 0; i < iterations; ++i) {
1060 uint16_t got = *resMsg.
as<uint16_t>();
1061 uint16_t exp = (uint16_t)expectedVals[i];
1062 std::cout <<
"[loopback-pipelined] i=" << i <<
" got=0x"
1065 throw std::runtime_error(
"Loopback mismatch (pipelined) idx=" +
1068 auto collectEnd = std::chrono::high_resolution_clock::now();
1070 auto issueUs = std::chrono::duration_cast<std::chrono::microseconds>(
1071 issueEnd - issueStart)
1073 auto totalUs = std::chrono::duration_cast<std::chrono::microseconds>(
1074 collectEnd - issueStart)
1077 double issueRate = (double)iterations * 1e6 / (
double)issueUs;
1078 double completionRate = (double)iterations * 1e6 / (
double)totalUs;
1080 logger.
info(
"esitester",
"Loopback add test passed (pipelined). Issued " +
1081 std::to_string(iterations) +
" in " +
1082 std::to_string(issueUs) +
" us (" +
1083 std::to_string(issueRate) +
1084 " calls/s), total " + std::to_string(totalUs) +
1085 " us (" + std::to_string(completionRate) +
1086 " calls/s effective)");
1092 uint32_t xferCount,
bool read,
1095 if (!read && !write) {
1096 std::cout <<
"aggbandwidth: nothing to do (enable --read and/or --write)\n";
1101 "Aggregate hostmem bandwidth start width=" + std::to_string(width) +
1102 " count=" + std::to_string(xferCount) +
1103 " read=" + (read ?
"Y" :
"N") +
" write=" + (write ?
"Y" :
"N"));
1106 hostmemSvc->
start();
1110 bool isRead =
false;
1111 bool isWrite =
false;
1112 std::unique_ptr<esi::services::HostMem::HostMemRegion> region;
1116 bool launched =
false;
1119 uint64_t duration_us = 0;
1120 uint64_t cycleCount = 0;
1121 std::chrono::high_resolution_clock::time_point start;
1123 std::vector<Unit> units;
1124 const std::vector<std::string> readPrefixes = {
"readmem",
"readmem_0",
1125 "readmem_1",
"readmem_2"};
1126 const std::vector<std::string> writePrefixes = {
"writemem",
"writemem_0",
1127 "writemem_1",
"writemem_2"};
1129 auto addUnits = [&](
const std::vector<std::string> &pref,
bool doRead,
1131 for (
auto &p : pref) {
1136 auto &ports = childIt->second->getPorts();
1137 auto cmdIt = ports.find(
AppID(
"cmd", width));
1138 auto respIt = ports.find(
AppID(
"addrCmdResponses"));
1139 auto cycIt = ports.find(
AppID(
"addrCmdCycles"));
1140 if (cmdIt == ports.end() || respIt == ports.end() || cycIt == ports.end())
1145 if (!cmd || !resp || !cyc)
1152 u.isWrite = doWrite;
1153 u.region = hostmemSvc->allocate(1024 * 1024 * 1024, {.writeable =
true});
1155 uint64_t *ptr =
static_cast<uint64_t *
>(u.region->getPtr());
1156 size_t words = u.region->getSize() / 8;
1157 for (
size_t i = 0; i < words; ++i)
1159 (p[0] ==
'w' ? (0xA5A500000000ull + i) : (0xCAFEBABE0000ull + i));
1164 u.bytes = uint64_t(xferCount) * (width / 8);
1165 units.emplace_back(std::move(u));
1169 addUnits(readPrefixes,
true,
false);
1171 addUnits(writePrefixes,
false,
true);
1172 if (units.empty()) {
1173 std::cout <<
"aggbandwidth: no matching units present for width " << width
1178 auto wallStart = std::chrono::high_resolution_clock::now();
1180 for (
auto &u : units) {
1181 uint64_t devPtr =
reinterpret_cast<uint64_t
>(u.region->getDevicePtr());
1182 u.cmd->write(0x10, devPtr);
1183 u.cmd->write(0x18, xferCount);
1184 u.cmd->write(0x20, 1);
1185 u.start = std::chrono::high_resolution_clock::now();
1190 const uint64_t timeoutLoops = 200000;
1193 bool allDone =
true;
1194 for (
auto &u : units) {
1197 if (u.resp->readInt() == xferCount) {
1198 auto end = std::chrono::high_resolution_clock::now();
1200 std::chrono::duration_cast<std::chrono::microseconds>(end - u.start)
1202 u.cycleCount = u.cycles->readInt();
1210 if (++loops >= timeoutLoops)
1211 throw std::runtime_error(
"aggbandwidth: timeout");
1212 std::this_thread::sleep_for(std::chrono::microseconds(50));
1214 auto wallUs = std::chrono::duration_cast<std::chrono::microseconds>(
1215 std::chrono::high_resolution_clock::now() - wallStart)
1218 uint64_t totalBytes = 0;
1219 uint64_t totalReadBytes = 0;
1220 uint64_t totalWriteBytes = 0;
1221 for (
auto &u : units) {
1222 totalBytes += u.bytes;
1224 totalReadBytes += u.bytes;
1226 totalWriteBytes += u.bytes;
1227 double unitBps = (double)u.bytes * 1e6 / (
double)u.duration_us;
1228 std::cout <<
"[agg-unit] " << u.prefix <<
"[" << width <<
"] "
1229 << (u.isRead ?
"READ" : (u.isWrite ?
"WRITE" :
"UNK"))
1230 <<
" bytes=" <<
humanBytes(u.bytes) <<
" (" << u.bytes <<
" B)"
1231 <<
" time=" <<
humanTimeUS(u.duration_us) <<
" (" << u.duration_us
1232 <<
" us) cycles=" << u.cycleCount
1238 totalReadBytes ? (double)totalReadBytes * 1e6 / (
double)wallUs : 0.0;
1239 double aggWriteBps =
1240 totalWriteBytes ? (double)totalWriteBytes * 1e6 / (
double)wallUs : 0.0;
1241 double aggCombinedBps =
1242 totalBytes ? (double)totalBytes * 1e6 / (
double)wallUs : 0.0;
1244 std::cout <<
"[agg-total] units=" << units.size()
1245 <<
" read_bytes=" <<
humanBytes(totalReadBytes) <<
" ("
1246 << totalReadBytes <<
" B)"
1248 <<
" write_bytes=" <<
humanBytes(totalWriteBytes) <<
" ("
1249 << totalWriteBytes <<
" B)"
1251 <<
" combined_bytes=" <<
humanBytes(totalBytes) <<
" ("
1252 << totalBytes <<
" B)"
1254 <<
" wall_time=" <<
humanTimeUS(wallUs) <<
" (" << wallUs <<
" us)"
1256 logger.
info(
"esitester",
"Aggregate hostmem bandwidth test complete");
1262#pragma pack(push, 1)
1270 "StreamingAddArg must be 9 bytes packed");
1275#pragma pack(push, 1)
1282 "StreamingAddResult must be 5 bytes packed");
1288 uint32_t addAmt, uint32_t numItems) {
1290 logger.
info(
"esitester",
"Starting streaming add test with add_amt=" +
1291 std::to_string(addAmt) +
1292 ", num_items=" + std::to_string(numItems));
1295 std::mt19937 rng(0xDEADBEEF);
1296 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1297 std::vector<uint32_t> inputData;
1298 inputData.reserve(numItems);
1299 for (uint32_t i = 0; i < numItems; ++i)
1300 inputData.push_back(dist(rng));
1303 auto streamingAdderChild =
1305 if (streamingAdderChild == accel->
getChildren().end())
1306 throw std::runtime_error(
1307 "Streaming add test: no 'streaming_adder' child found");
1309 auto &ports = streamingAdderChild->second->getPorts();
1310 auto addIter = ports.find(
AppID(
"streaming_add"));
1311 if (addIter == ports.end())
1312 throw std::runtime_error(
1313 "Streaming add test: no 'streaming_add' port found");
1325 for (
size_t i = 0; i < inputData.size(); ++i) {
1328 arg.
input = inputData[i];
1329 arg.
last = (i == inputData.size() - 1) ? 1 : 0;
1331 MessageData(
reinterpret_cast<const uint8_t *
>(&arg),
sizeof(arg)));
1332 logger.
debug(
"esitester",
"Sent {add_amt=" + std::to_string(arg.
addAmt) +
1333 ", input=" + std::to_string(arg.
input) +
1334 ", last=" + (arg.
last ?
"true" :
"false") +
1339 std::vector<uint32_t> results;
1340 bool lastSeen =
false;
1343 resultPort.
read(resMsg);
1345 throw std::runtime_error(
1346 "Streaming add test: unexpected result message size");
1350 lastSeen = res->
last != 0;
1351 results.push_back(res->data);
1352 logger.
debug(
"esitester",
"Received result=" + std::to_string(res->data) +
1353 " (last=" + (lastSeen ?
"true" :
"false") +
1358 if (results.size() != inputData.size())
1359 throw std::runtime_error(
1360 "Streaming add test: result size mismatch. Expected " +
1361 std::to_string(inputData.size()) +
", got " +
1362 std::to_string(results.size()));
1365 std::cout <<
"Streaming add test results:" << std::endl;
1366 for (
size_t i = 0; i < inputData.size(); ++i) {
1367 uint32_t expected = inputData[i] + addAmt;
1368 std::cout <<
" input[" << i <<
"]=" << inputData[i] <<
" + " << addAmt
1369 <<
" = " << results[i] <<
" (expected " << expected <<
")";
1370 if (results[i] != expected) {
1371 std::cout <<
" MISMATCH!";
1374 std::cout << std::endl;
1381 throw std::runtime_error(
"Streaming add test failed: result mismatch");
1383 logger.
info(
"esitester",
"Streaming add test passed");
1384 std::cout <<
"Streaming add test passed" << std::endl;
1403#pragma pack(push, 1)
1411 uint32_t *
inputData() {
return reinterpret_cast<uint32_t *
>(
this + 1); }
1413 return reinterpret_cast<const uint32_t *
>(
this + 1);
1432#pragma pack(push, 1)
1438 uint32_t *
data() {
return reinterpret_cast<uint32_t *
>(
this + 1); }
1440 return reinterpret_cast<const uint32_t *
>(
this + 1);
1454 uint32_t numItems) {
1456 logger.
info(
"esitester",
1457 "Starting streaming add test (translated) with add_amt=" +
1458 std::to_string(addAmt) +
1459 ", num_items=" + std::to_string(numItems));
1462 std::mt19937 rng(0xDEADBEEF);
1463 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1464 std::vector<uint32_t> inputData;
1465 inputData.reserve(numItems);
1466 for (uint32_t i = 0; i < numItems; ++i)
1467 inputData.push_back(dist(rng));
1470 auto streamingAdderChild =
1472 if (streamingAdderChild == accel->
getChildren().end())
1473 throw std::runtime_error(
1474 "Streaming add test: no 'streaming_adder' child found");
1476 auto &ports = streamingAdderChild->second->getPorts();
1477 auto addIter = ports.find(
AppID(
"streaming_add"));
1478 if (addIter == ports.end())
1479 throw std::runtime_error(
1480 "Streaming add test: no 'streaming_add' port found");
1495 size_t allocSize = ((argSize + alignment - 1) / alignment) * alignment;
1498 throw std::bad_alloc();
1500 std::unique_ptr<void,
decltype(argDeleter)> argBuffer(argRaw, argDeleter);
1503 arg->addAmt = addAmt;
1504 for (uint32_t i = 0; i < numItems; ++i)
1505 arg->inputData()[i] = inputData[i];
1507 logger.
debug(
"esitester",
1508 "Sending translated argument: " + std::to_string(argSize) +
1509 " bytes, list_length=" + std::to_string(arg->inputLength) +
1510 ", add_amt=" + std::to_string(arg->addAmt));
1513 argPort.
write(
MessageData(
reinterpret_cast<const uint8_t *
>(arg), argSize));
1518 resultPort.
read(resMsg);
1520 logger.
debug(
"esitester",
"Received translated result: " +
1521 std::to_string(resMsg.
getSize()) +
" bytes");
1524 throw std::runtime_error(
1525 "Streaming add test (translated): result too small");
1527 const auto *result =
1532 throw std::runtime_error(
1533 "Streaming add test (translated): result data truncated");
1536 if (result->dataLength != inputData.size())
1537 throw std::runtime_error(
1538 "Streaming add test (translated): result size mismatch. Expected " +
1539 std::to_string(inputData.size()) +
", got " +
1540 std::to_string(result->dataLength));
1543 std::cout <<
"Streaming add test results:" << std::endl;
1544 for (
size_t i = 0; i < inputData.size(); ++i) {
1545 uint32_t expected = inputData[i] + addAmt;
1546 std::cout <<
" input[" << i <<
"]=" << inputData[i] <<
" + " << addAmt
1547 <<
" = " << result->
data()[i] <<
" (expected " << expected <<
")";
1548 if (result->data()[i] != expected) {
1549 std::cout <<
" MISMATCH!";
1552 std::cout << std::endl;
1559 throw std::runtime_error(
1560 "Streaming add test (translated) failed: result mismatch");
1562 logger.
info(
"esitester",
"Streaming add test passed (translated)");
1563 std::cout <<
"Streaming add test passed" << std::endl;
1574#pragma pack(push, 1)
1580static_assert(
sizeof(
Coord) == 8,
"Coord must be 8 bytes packed");
1591#pragma pack(push, 1)
1601 return reinterpret_cast<const Coord *
>(
this + 1);
1618#pragma pack(push, 1)
1626 return reinterpret_cast<const Coord *
>(
this + 1);
1639 uint32_t xTrans, uint32_t yTrans,
1640 uint32_t numCoords) {
1642 logger.
info(
"esitester",
"Starting coord translate test with x_trans=" +
1643 std::to_string(xTrans) +
1644 ", y_trans=" + std::to_string(yTrans) +
1645 ", num_coords=" + std::to_string(numCoords));
1650 std::mt19937 rng(0xDEADBEEF);
1651 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1652 std::vector<Coord> inputCoords;
1653 inputCoords.reserve(numCoords);
1654 for (uint32_t i = 0; i < numCoords; ++i) {
1658 inputCoords.push_back(c);
1662 auto coordTranslatorChild =
1664 if (coordTranslatorChild == accel->
getChildren().end())
1665 throw std::runtime_error(
1666 "Coord translate test: no 'coord_translator' child found");
1668 auto &ports = coordTranslatorChild->second->getPorts();
1669 auto translateIter = ports.find(
AppID(
"translate_coords"));
1670 if (translateIter == ports.end())
1671 throw std::runtime_error(
1672 "Coord translate test: no 'translate_coords' port found");
1678 throw std::runtime_error(
1679 "Coord translate test: 'translate_coords' port not a "
1680 "FuncService::Function");
1681 funcPort->connect();
1687 size_t allocSize = ((argSize + alignment - 1) / alignment) * alignment;
1690 throw std::bad_alloc();
1692 std::unique_ptr<void,
decltype(argDeleter)> argBuffer(argRaw, argDeleter);
1695 arg->xTranslation = xTrans;
1696 arg->yTranslation = yTrans;
1697 for (uint32_t i = 0; i < numCoords; ++i)
1698 arg->coords()[i] = inputCoords[i];
1702 "Sending coord translate argument: " + std::to_string(argSize) +
1703 " bytes, coords_length=" + std::to_string(arg->coordsLength) +
1704 ", x_trans=" + std::to_string(arg->xTranslation) +
1705 ", y_trans=" + std::to_string(arg->yTranslation));
1710 ->call(
MessageData(
reinterpret_cast<const uint8_t *
>(arg), argSize))
1714 logger.
debug(
"esitester",
"Received coord translate result: " +
1715 std::to_string(resMsg.
getSize()) +
" bytes");
1718 throw std::runtime_error(
"Coord translate test: result too small");
1720 const auto *result =
1724 throw std::runtime_error(
"Coord translate test: result data truncated");
1727 if (result->coordsLength != inputCoords.size())
1728 throw std::runtime_error(
1729 "Coord translate test: result size mismatch. Expected " +
1730 std::to_string(inputCoords.size()) +
", got " +
1731 std::to_string(result->coordsLength));
1734 std::cout <<
"Coord translate test results:" << std::endl;
1735 for (
size_t i = 0; i < inputCoords.size(); ++i) {
1736 uint32_t expectedX = inputCoords[i].x + xTrans;
1737 uint32_t expectedY = inputCoords[i].y + yTrans;
1738 std::cout <<
" coord[" << i <<
"]=(" << inputCoords[i].x <<
","
1739 << inputCoords[i].y <<
") + (" << xTrans <<
"," << yTrans
1740 <<
") = (" << result->
coords()[i].
x <<
","
1741 << result->coords()[i].y <<
")";
1742 if (result->coords()[i].x != expectedX ||
1743 result->coords()[i].y != expectedY) {
1744 std::cout <<
" MISMATCH! (expected (" << expectedX <<
"," << expectedY
1748 std::cout << std::endl;
1752 throw std::runtime_error(
"Coord translate test failed: result mismatch");
1754 logger.
info(
"esitester",
"Coord translate test passed");
1755 std::cout <<
"Coord translate test passed" << std::endl;
static void print(TypedAttr val, llvm::raw_ostream &os)
static void writePort(uint16_t port)
Write the port number to a file.
Abstract class representing a connection to an accelerator.
ServiceClass * getService(AppIDPath id={}, std::string implName={}, ServiceImplDetails details={}, HWClientDetails clients={})
Get a typed reference to a particular service type.
virtual void disconnect()
Disconnect from the accelerator cleanly.
Logger & getLogger() const
AcceleratorServiceThread * getServiceThread()
Return a pointer to the accelerator 'service' thread (or threads).
void addPoll(HWModule &module)
Poll this module.
Top level accelerator class.
Services provide connections to 'bundles' – collections of named, unidirectional communication channe...
T * getAs() const
Cast this Bundle port to a subclass which is actually useful.
ReadChannelPort & getRawRead(const std::string &name) const
WriteChannelPort & getRawWrite(const std::string &name) const
Get access to the raw byte streams of a channel.
Common options and code for ESI runtime tools.
Context & getContext()
Get the context.
AcceleratorConnection * connect()
Connect to the accelerator using the specified backend and connection.
int esiParse(int argc, const char **argv)
Run the parser.
AcceleratorConnections, Accelerators, and Manifests must all share a context.
BundlePort * resolvePort(const AppIDPath &path, AppIDPath &lastLookup) const
Attempt to resolve a path to a port.
const std::map< AppID, Instance * > & getChildren() const
Access the module's children by ID.
virtual void error(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an error.
virtual void info(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an informational message.
void debug(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report a debug message.
Class to parse a manifest.
Accelerator * buildAccelerator(AcceleratorConnection &acc) const
A logical chunk of data representing serialized data.
const uint8_t * getBytes() const
const T * as() const
Cast to a type.
size_t getSize() const
Get the size of the data in bytes.
A ChannelPort which reads data from the accelerator.
virtual void connect(std::function< bool(MessageData)> callback, const ConnectOptions &options={})
virtual void disconnect() override
virtual void read(MessageData &outData)
Specify a buffer to read into.
A ChannelPort which sends data to the accelerator.
virtual void disconnect() override
void write(const MessageData &data)
A very basic blocking write API.
virtual void connect(const ConnectOptions &options={}) override
Set up a connection to the accelerator.
A function call which gets attached to a service port.
A function call which gets attached to a service port.
virtual void start()
In cases where necessary, enable host memory services.
A "slice" of some parent MMIO space.
Information about the Accelerator system.
A telemetry port which gets attached to a service port.
void connect()
Connect to a particular telemetry port. Offset should be non-nullopt.
static void * alignedAllocCompat(std::size_t alignment, std::size_t size)
static void hostmemWriteTest(Accelerator *acc, esi::services::HostMem::HostMemRegion ®ion, uint32_t width)
Test the hostmem write functionality.
static void aggregateHostmemBandwidthTest(AcceleratorConnection *, Accelerator *, uint32_t width, uint32_t xferCount, bool read, bool write)
static void dmaTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool read, bool write)
static void hostmemBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, uint32_t xferCount, const std::vector< uint32_t > &widths, bool read, bool write)
static void callbackTest(AcceleratorConnection *, Accelerator *, uint32_t iterations)
static void bandwidthTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, uint32_t xferCount, bool read, bool write)
constexpr std::array< uint32_t, 5 > defaultWidths
static void hostmemReadBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion ®ion, uint32_t width, uint32_t xferCount)
static void bandwidthReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static std::string formatBandwidth(double bytesPerSec)
static void hostmemWriteBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion ®ion, uint32_t width, uint32_t xferCount)
static void alignedFreeCompat(void *ptr)
static void dmaWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void bandwidthWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static std::string humanBytes(uint64_t bytes)
static void streamingAddTest(AcceleratorConnection *, Accelerator *, uint32_t addAmt, uint32_t numItems)
Test the StreamingAdder module.
static void loopbackAddTest(AcceleratorConnection *, Accelerator *, uint32_t iterations, bool pipeline)
static void dmaReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void streamingAddTranslatedTest(AcceleratorConnection *, Accelerator *, uint32_t addAmt, uint32_t numItems)
static void hostmemTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool write, bool read)
static std::string humanTimeUS(uint64_t us)
int main(int argc, const char *argv[])
static void coordTranslateTest(AcceleratorConnection *, Accelerator *, uint32_t xTrans, uint32_t yTrans, uint32_t numCoords)
static std::string defaultWidthsStr()
static void hostmemReadTest(Accelerator *acc, esi::services::HostMem::HostMemRegion ®ion, uint32_t width)
std::string toString(const std::any &a)
'Stringify' a std::any. This is used to log std::any values by some loggers.
std::string toHex(void *val)
Translated argument struct for CoordTranslator.
std::span< const Coord > coordsSpan() const
const Coord * coords() const
static size_t allocSize(size_t numCoords)
Coord * coords()
Get pointer to trailing coords array.
std::span< Coord > coordsSpan()
Get span view of coords (requires coordsLength to be set first).
Translated result struct for CoordTranslator.
static size_t allocSize(size_t numCoords)
std::span< Coord > coordsSpan()
Get span view of coords (requires coordsLength to be set first).
const Coord * coords() const
Coord * coords()
Get pointer to trailing coords array.
std::span< const Coord > coordsSpan() const
Test the CoordTranslator module using message translation.
Packed struct representing a parallel window argument for StreamingAdder.
Packed struct representing a parallel window result for StreamingAdder.
Test the StreamingAdder module using message translation.
uint32_t * inputData()
Get pointer to trailing input data array.
static size_t allocSize(size_t numItems)
std::span< uint32_t > inputDataSpan()
Get span view of input data (requires inputLength to be set first).
std::span< const uint32_t > inputDataSpan() const
const uint32_t * inputData() const
Translated result struct for StreamingAdder.
uint32_t * data()
Get pointer to trailing result data array.
std::span< uint32_t > dataSpan()
Get span view of result data (requires dataLength to be set first).
static size_t allocSize(size_t numItems)
std::span< const uint32_t > dataSpan() const
const uint32_t * data() const
RAII memory region for host memory.
virtual void * getDevicePtr() const
Sometimes the pointer the device sees is different from the pointer the host sees.
virtual void * getPtr() const =0
Get a pointer to the host memory.
virtual void flush()
Flush the memory region to ensure that the device sees the latest contents.
virtual std::size_t getSize() const =0