43 const std::vector<uint32_t> &widths,
bool write,
47 const std::vector<uint32_t> &widths,
bool read,
50 const std::vector<uint32_t> &widths,
bool read,
bool write);
52 const std::vector<uint32_t> &widths,
53 uint32_t xferCount,
bool read,
bool write);
55 uint32_t iterations,
bool pipeline);
58 uint32_t xferCount,
bool read,
62constexpr std::array<uint32_t, 5>
defaultWidths = {32, 64, 128, 256, 512};
75 const char *unit =
"B/s";
76 double value = bytesPerSec;
77 if (bytesPerSec >= 1e9) {
79 value = bytesPerSec / 1e9;
80 }
else if (bytesPerSec >= 1e6) {
82 value = bytesPerSec / 1e6;
83 }
else if (bytesPerSec >= 1e3) {
85 value = bytesPerSec / 1e3;
87 std::ostringstream oss;
88 oss.setf(std::ios::fixed);
90 oss << value <<
" " << unit;
96 const char *units[] = {
"B",
"KB",
"MB",
"GB",
"TB"};
97 double v = (double)bytes;
99 while (v >= 1024.0 && u < 4) {
103 std::ostringstream oss;
104 oss.setf(std::ios::fixed);
105 oss.precision(u == 0 ? 0 : 2);
106 oss << v <<
" " << units[u];
113 return std::to_string(us) +
" us";
114 double ms = us / 1000.0;
116 std::ostringstream oss;
117 oss.setf(std::ios::fixed);
118 oss.precision(ms < 10.0 ? 2 : (ms < 100.0 ? 1 : 0));
122 double sec = ms / 1000.0;
123 std::ostringstream oss;
124 oss.setf(std::ios::fixed);
125 oss.precision(sec < 10.0 ? 3 : 2);
130int main(
int argc,
const char *argv[]) {
132 cli.description(
"Test an ESI system running the ESI tester image.");
133 cli.require_subcommand(1);
135 CLI::App *callback_test =
136 cli.add_subcommand(
"callback",
"initiate callback test");
137 uint32_t cb_iters = 1;
138 callback_test->add_option(
"-i,--iters", cb_iters,
139 "Number of iterations to run");
141 CLI::App *hostmemtestSub =
142 cli.add_subcommand(
"hostmem",
"Run the host memory test");
144 bool hmWrite =
false;
147 hostmemtestSub->add_flag(
"-w,--write", hmWrite,
148 "Enable host memory write test");
149 hostmemtestSub->add_flag(
"-r,--read", hmRead,
"Enable host memory read test");
150 hostmemtestSub->add_option(
151 "--widths", hostmemWidths,
154 CLI::App *dmatestSub = cli.add_subcommand(
"dma",
"Run the DMA test");
155 bool dmaRead =
false;
156 bool dmaWrite =
false;
158 dmatestSub->add_flag(
"-w,--write", dmaWrite,
"Enable dma write test");
159 dmatestSub->add_flag(
"-r,--read", dmaRead,
"Enable dma read test");
160 dmatestSub->add_option(
"--widths", dmaWidths,
164 CLI::App *bandwidthSub =
165 cli.add_subcommand(
"bandwidth",
"Run the bandwidth test");
166 uint32_t xferCount = 1000;
167 bandwidthSub->add_option(
"-c,--count", xferCount,
168 "Number of transfers to perform");
169 bool bandwidthRead =
false;
170 bool bandwidthWrite =
false;
173 bandwidthSub->add_option(
"--widths", bandwidthWidths,
174 "Width of the transfers to perform (default: " +
176 bandwidthSub->add_flag(
"-w,--write", bandwidthWrite,
177 "Enable bandwidth write");
178 bandwidthSub->add_flag(
"-r,--read", bandwidthRead,
"Enable bandwidth read");
180 CLI::App *hostmembwSub =
181 cli.add_subcommand(
"hostmembw",
"Run the host memory bandwidth test");
182 uint32_t hmBwCount = 1000;
183 bool hmBwRead =
false;
184 bool hmBwWrite =
false;
186 hostmembwSub->add_option(
"-c,--count", hmBwCount,
187 "Number of hostmem transfers");
188 hostmembwSub->add_option(
189 "--widths", hmBwWidths,
191 hostmembwSub->add_flag(
"-w,--write", hmBwWrite,
192 "Measure hostmem write bandwidth");
193 hostmembwSub->add_flag(
"-r,--read", hmBwRead,
194 "Measure hostmem read bandwidth");
196 CLI::App *loopbackSub =
197 cli.add_subcommand(
"loopback",
"Test LoopbackInOutAdd function service");
198 uint32_t loopbackIters = 10;
199 bool loopbackPipeline =
false;
200 loopbackSub->add_option(
"-i,--iters", loopbackIters,
201 "Number of function invocations (default 10)");
202 loopbackSub->add_flag(
"-p,--pipeline", loopbackPipeline,
203 "Pipeline all calls then collect results");
205 CLI::App *aggBwSub = cli.add_subcommand(
207 "Aggregate hostmem bandwidth across four units (readmem*, writemem*)");
208 uint32_t aggWidth = 512;
209 uint32_t aggCount = 1000;
210 bool aggRead =
false;
211 bool aggWrite =
false;
212 aggBwSub->add_option(
214 "Bit width (default 512; other widths ignored if absent)");
215 aggBwSub->add_option(
"-c,--count", aggCount,
"Flits per unit (default 1000)");
216 aggBwSub->add_flag(
"-r,--read", aggRead,
"Include read units");
217 aggBwSub->add_flag(
"-w,--write", aggWrite,
"Include write units");
219 if (
int rc = cli.
esiParse(argc, argv))
221 if (!cli.get_help_ptr()->empty())
228 ctxt.
getLogger().
info(
"esitester",
"Connected to accelerator.");
229 Manifest manifest(ctxt, info.getJsonManifest());
234 if (*callback_test) {
236 }
else if (*hostmemtestSub) {
237 hostmemTest(acc, accel, hostmemWidths, hmWrite, hmRead);
238 }
else if (*loopbackSub) {
240 }
else if (*dmatestSub) {
241 dmaTest(acc, accel, dmaWidths, dmaRead, dmaWrite);
242 }
else if (*bandwidthSub) {
243 bandwidthTest(acc, accel, bandwidthWidths, xferCount, bandwidthRead,
245 }
else if (*hostmembwSub) {
248 }
else if (*aggBwSub) {
254 }
catch (std::exception &e) {
259 std::cout <<
"Exiting successfully\n";
264 uint32_t iterations) {
267 throw std::runtime_error(
"No cb_test child found in accelerator");
268 auto &ports = cb_test->second->getPorts();
269 auto cmd_port = ports.find(
AppID(
"cmd"));
270 if (cmd_port == ports.end())
271 throw std::runtime_error(
"No cmd port found in cb_test child");
274 throw std::runtime_error(
"cb_test cmd port is not MMIO");
276 auto f = ports.find(
AppID(
"cb"));
277 if (f == ports.end())
278 throw std::runtime_error(
"No cb port found in accelerator");
282 throw std::runtime_error(
"cb port is not a CallService::Callback");
284 std::atomic<uint32_t> callbackCount = 0;
287 callbackCount.fetch_add(1);
289 [&](std::string &subsystem, std::string &msg,
290 std::unique_ptr<std::map<std::string, std::any>> &details) {
291 subsystem =
"ESITESTER";
292 msg =
"Received callback";
293 details = std::make_unique<std::map<std::string, std::any>>();
294 details->emplace(
"data", data);
296 std::cout <<
"callback: " << *data.as<uint64_t>() << std::endl;
301 for (uint32_t i = 0; i < iterations; ++i) {
302 conn->
getLogger().
info(
"esitester",
"Issuing callback command iteration " +
303 std::to_string(i) +
"/" +
304 std::to_string(iterations));
305 cmdMMIO->write(0x10, i);
307 for (uint32_t wait = 0; wait < 1000; ++wait) {
308 if (callbackCount.load() > i)
310 std::this_thread::sleep_for(std::chrono::milliseconds(1));
312 if (callbackCount.load() <= i)
313 throw std::runtime_error(
"Callback test failed. No callback received");
321 std::cout <<
"Running hostmem WRITE test with width " << width << std::endl;
322 uint64_t *dataPtr =
static_cast<uint64_t *
>(region.
getPtr());
323 auto check = [&](
bool print) {
325 for (
size_t i = 0; i < 9; ++i) {
327 printf(
"[write] dataPtr[%zu] = 0x%016lx\n", i, dataPtr[i]);
328 if (i < (width + 63) / 64 && dataPtr[i] == 0xFFFFFFFFFFFFFFFFull)
336 throw std::runtime_error(
337 "hostmem write test failed. No writemem child found");
338 auto &writeMemPorts = writeMemChildIter->second->getPorts();
340 auto cmdPortIter = writeMemPorts.find(
AppID(
"cmd", width));
341 if (cmdPortIter == writeMemPorts.end())
342 throw std::runtime_error(
343 "hostmem write test failed. No (cmd,width) MMIO port");
346 throw std::runtime_error(
347 "hostmem write test failed. (cmd,width) port not MMIO");
349 auto issuedPortIter = writeMemPorts.find(
AppID(
"addrCmdIssued"));
350 if (issuedPortIter == writeMemPorts.end())
351 throw std::runtime_error(
352 "hostmem write test failed. addrCmdIssued missing");
353 auto *addrCmdIssuedPort =
355 if (!addrCmdIssuedPort)
356 throw std::runtime_error(
357 "hostmem write test failed. addrCmdIssued not telemetry");
358 addrCmdIssuedPort->connect();
360 auto responsesPortIter = writeMemPorts.find(
AppID(
"addrCmdResponses"));
361 if (responsesPortIter == writeMemPorts.end())
362 throw std::runtime_error(
363 "hostmem write test failed. addrCmdResponses missing");
364 auto *addrCmdResponsesPort =
366 if (!addrCmdResponsesPort)
367 throw std::runtime_error(
368 "hostmem write test failed. addrCmdResponses not telemetry");
369 addrCmdResponsesPort->connect();
371 for (
size_t i = 0, e = 9; i < e; ++i)
372 dataPtr[i] = 0xFFFFFFFFFFFFFFFFull;
374 cmdMMIO->write(0x10,
reinterpret_cast<uint64_t
>(region.
getDevicePtr()));
375 cmdMMIO->write(0x18, 1);
376 cmdMMIO->write(0x20, 1);
378 for (
int i = 0; i < 100; ++i) {
379 auto issued = addrCmdIssuedPort->readInt();
380 auto responses = addrCmdResponsesPort->readInt();
381 if (issued == 1 && responses == 1) {
385 std::this_thread::sleep_for(std::chrono::microseconds(100));
389 throw std::runtime_error(
"hostmem write test (" + std::to_string(width) +
390 " bits) timeout waiting for completion");
393 throw std::runtime_error(
"hostmem write test failed (" +
394 std::to_string(width) +
" bits)");
400 std::cout <<
"Running hostmem READ test with width " << width << std::endl;
403 throw std::runtime_error(
404 "hostmem read test failed. No readmem child found");
406 auto &readMemPorts = readMemChildIter->second->getPorts();
407 auto addrCmdPortIter = readMemPorts.find(
AppID(
"cmd", width));
408 if (addrCmdPortIter == readMemPorts.end())
409 throw std::runtime_error(
410 "hostmem read test failed. No AddressCommand MMIO port");
414 throw std::runtime_error(
415 "hostmem read test failed. AddressCommand port not MMIO");
417 auto lastReadPortIter = readMemPorts.find(
AppID(
"lastReadLSB"));
418 if (lastReadPortIter == readMemPorts.end())
419 throw std::runtime_error(
"hostmem read test failed. lastReadLSB missing");
423 throw std::runtime_error(
424 "hostmem read test failed. lastReadLSB not telemetry");
425 lastReadPort->connect();
427 auto issuedPortIter = readMemPorts.find(
AppID(
"addrCmdIssued"));
428 if (issuedPortIter == readMemPorts.end())
429 throw std::runtime_error(
"hostmem read test failed. addrCmdIssued missing");
430 auto *addrCmdIssuedPort =
432 if (!addrCmdIssuedPort)
433 throw std::runtime_error(
434 "hostmem read test failed. addrCmdIssued not telemetry");
435 addrCmdIssuedPort->connect();
437 auto responsesPortIter = readMemPorts.find(
AppID(
"addrCmdResponses"));
438 if (responsesPortIter == readMemPorts.end())
439 throw std::runtime_error(
440 "hostmem read test failed. addrCmdResponses missing");
441 auto *addrCmdResponsesPort =
443 if (!addrCmdResponsesPort)
444 throw std::runtime_error(
445 "hostmem read test failed. addrCmdResponses not telemetry");
446 addrCmdResponsesPort->connect();
448 for (
size_t i = 0; i < 8; ++i) {
449 auto *dataPtr =
static_cast<uint64_t *
>(region.
getPtr());
450 dataPtr[0] = 0x12345678ull << i;
451 dataPtr[1] = 0xDEADBEEFull << i;
453 addrCmdMMIO->write(0x10,
reinterpret_cast<uint64_t
>(region.
getDevicePtr()));
454 addrCmdMMIO->write(0x18, 1);
455 addrCmdMMIO->write(0x20, 1);
457 for (
int waitLoop = 0; waitLoop < 100; ++waitLoop) {
458 auto issued = addrCmdIssuedPort->readInt();
459 auto responses = addrCmdResponsesPort->readInt();
460 if (issued == 1 && responses == 1) {
464 std::this_thread::sleep_for(std::chrono::milliseconds(10));
467 throw std::runtime_error(
"hostmem read (" + std::to_string(width) +
468 " bits) timeout waiting for completion");
469 uint64_t captured = lastReadPort->readInt();
470 uint64_t expected = dataPtr[0];
472 expected &= ((1ull << width) - 1);
473 if (captured != expected)
474 throw std::runtime_error(
"hostmem read test (" + std::to_string(width) +
475 " bits) failed. Expected " +
482 const std::vector<uint32_t> &widths,
bool write,
487 auto scratchRegion = hostmem->allocate(1024 * 1024,
488 {.writeable =
true});
489 uint64_t *dataPtr =
static_cast<uint64_t *
>(scratchRegion->getPtr());
491 "Running host memory test with region size " +
492 std::to_string(scratchRegion->getSize()) +
493 " bytes at 0x" +
toHex(dataPtr));
494 for (
size_t i = 0; i < scratchRegion->getSize() / 8; ++i)
496 scratchRegion->flush();
499 for (
size_t width : widths) {
505 }
catch (std::exception &e) {
506 conn->
getLogger().
error(
"esitester",
"Hostmem test failed for width " +
507 std::to_string(width) +
": " +
513 throw std::runtime_error(
"Hostmem test failed");
514 std::cout <<
"Hostmem test passed" << std::endl;
520 logger.
info(
"esitester",
521 "== Running DMA read test with width " + std::to_string(width));
526 throw std::runtime_error(
"dma read test failed. No tohostdma[" +
527 std::to_string(width) +
"] found");
530 throw std::runtime_error(
"dma read test failed. MMIO port is not MMIO");
537 size_t xferCount = 24;
540 toHostMMIO->write(0, xferCount);
541 for (
size_t i = 0; i < xferCount; ++i) {
544 uint64_t val = *data.as<uint64_t>();
546 throw std::runtime_error(
"dma read test failed. Out of order data");
549 logger.
debug(
"esitester",
550 "Cycle count [" + std::to_string(i) +
"] = 0x" + data.toHex());
553 std::cout <<
" DMA read test for " << width <<
" bits passed" << std::endl;
559 logger.
info(
"esitester",
560 "Running DMA write test with width " + std::to_string(width));
564 if (!fromHostMMIOPort)
565 throw std::runtime_error(
"dma read test for " +
toString(width) +
566 " bits failed. No fromhostdma[" +
567 std::to_string(width) +
"] found");
570 throw std::runtime_error(
"dma write test for " +
toString(width) +
571 " bits failed. MMIO port is not MMIO");
576 throw std::runtime_error(
"dma write test for " +
toString(width) +
577 " bits failed. No out port found");
581 size_t xferCount = 24;
582 uint8_t *data =
new uint8_t[width];
583 for (
size_t i = 0; i < width / 8; ++i)
585 fromHostMMIO->read(8);
586 fromHostMMIO->write(0, xferCount);
587 for (
size_t i = 1; i < xferCount + 1; ++i) {
594 std::this_thread::sleep_for(std::chrono::milliseconds(10));
596 }
while (!successWrite && ++attempts < 100);
598 throw std::runtime_error(
"dma write test for " +
toString(width) +
599 " bits failed. Write failed");
600 uint64_t lastReadMMIO;
601 for (
size_t a = 0; a < 20; ++a) {
602 lastReadMMIO = fromHostMMIO->read(8);
603 if (lastReadMMIO == i)
605 std::this_thread::sleep_for(std::chrono::milliseconds(10));
607 throw std::runtime_error(
"dma write for " +
toString(width) +
608 " bits test failed. Read from MMIO failed");
613 std::cout <<
" DMA write test for " << width <<
" bits passed" << std::endl;
617 const std::vector<uint32_t> &widths,
bool read,
621 for (
size_t width : widths)
624 }
catch (std::exception &e) {
626 std::cerr <<
"DMA write test for " << width
627 <<
" bits failed: " << e.what() << std::endl;
630 for (
size_t width : widths)
633 throw std::runtime_error(
"DMA test failed");
634 std::cout <<
"DMA test passed" << std::endl;
642 size_t width,
size_t xferCount) {
648 throw std::runtime_error(
"bandwidth test failed. No tohostdma[" +
649 std::to_string(width) +
"] found");
652 throw std::runtime_error(
"bandwidth test failed. MMIO port is not MMIO");
660 logger.
info(
"esitester",
"Starting read bandwidth test with " +
661 std::to_string(xferCount) +
" x " +
662 std::to_string(width) +
" bit transfers");
664 auto start = std::chrono::high_resolution_clock::now();
665 toHostMMIO->write(0, xferCount);
666 for (
size_t i = 0; i < xferCount; ++i) {
669 [i, &data](std::string &subsystem, std::string &msg,
670 std::unique_ptr<std::map<std::string, std::any>> &details) {
671 subsystem =
"esitester";
672 msg =
"Cycle count [" + std::to_string(i) +
"] = 0x" + data.toHex();
675 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
676 std::chrono::high_resolution_clock::now() - start);
678 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
679 logger.
info(
"esitester",
680 " Bandwidth test: " + std::to_string(xferCount) +
" x " +
681 std::to_string(width) +
" bit transfers in " +
682 std::to_string(duration.count()) +
" microseconds");
687 size_t width,
size_t xferCount) {
692 if (!fromHostMMIOPort)
693 throw std::runtime_error(
"bandwidth test failed. No fromhostdma[" +
694 std::to_string(width) +
"] found");
697 throw std::runtime_error(
"bandwidth test failed. MMIO port is not MMIO");
705 logger.
info(
"esitester",
"Starting write bandwidth test with " +
706 std::to_string(xferCount) +
" x " +
707 std::to_string(width) +
" bit transfers");
708 std::vector<uint8_t> dataVec(width / 8);
709 for (
size_t i = 0; i < width / 8; ++i)
712 auto start = std::chrono::high_resolution_clock::now();
713 fromHostMMIO->write(0, xferCount);
714 for (
size_t i = 0; i < xferCount; ++i) {
717 [i, &data](std::string &subsystem, std::string &msg,
718 std::unique_ptr<std::map<std::string, std::any>> &details) {
719 subsystem =
"esitester";
720 msg =
"Cycle count [" + std::to_string(i) +
"] = 0x" + data.toHex();
723 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
724 std::chrono::high_resolution_clock::now() - start);
726 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
727 logger.
info(
"esitester",
728 " Bandwidth test: " + std::to_string(xferCount) +
" x " +
729 std::to_string(width) +
" bit transfers in " +
730 std::to_string(duration.count()) +
" microseconds");
735 const std::vector<uint32_t> &widths,
736 uint32_t xferCount,
bool read,
bool write) {
738 for (uint32_t w : widths)
741 for (uint32_t w : widths)
752 uint32_t width, uint32_t xferCount) {
754 logger.
info(
"esitester",
"Starting hostmem WRITE bandwidth test: " +
755 std::to_string(xferCount) +
" x " +
756 std::to_string(width) +
" bits");
760 throw std::runtime_error(
"hostmem write bandwidth: writemem child missing");
761 auto &writeMemPorts = writeMemChildIter->second->getPorts();
763 auto cmdPortIter = writeMemPorts.find(
AppID(
"cmd", width));
764 if (cmdPortIter == writeMemPorts.end())
765 throw std::runtime_error(
"hostmem write bandwidth: cmd MMIO missing");
768 throw std::runtime_error(
"hostmem write bandwidth: cmd not MMIO");
770 auto issuedIter = writeMemPorts.find(
AppID(
"addrCmdIssued"));
771 auto respIter = writeMemPorts.find(
AppID(
"addrCmdResponses"));
772 auto cycleCount = writeMemPorts.find(
AppID(
"addrCmdCycles"));
773 if (issuedIter == writeMemPorts.end() || respIter == writeMemPorts.end() ||
774 cycleCount == writeMemPorts.end())
775 throw std::runtime_error(
"hostmem write bandwidth: telemetry missing");
781 if (!issuedPort || !respPort || !cyclePort)
782 throw std::runtime_error(
783 "hostmem write bandwidth: telemetry type mismatch");
785 issuedPort->connect();
790 uint64_t *dataPtr =
static_cast<uint64_t *
>(region.
getPtr());
791 size_t words = region.
getSize() / 8;
792 for (
size_t i = 0; i < words; ++i)
793 dataPtr[i] = i + 0xA5A50000;
796 auto start = std::chrono::high_resolution_clock::now();
798 uint64_t devPtr =
reinterpret_cast<uint64_t
>(region.
getDevicePtr());
799 cmdMMIO->write(0x10, devPtr);
800 cmdMMIO->write(0x18, xferCount);
801 cmdMMIO->write(0x20, 1);
804 bool completed =
false;
805 for (
int wait = 0; wait < 100000; ++wait) {
806 uint64_t respNow = respPort->
readInt();
807 if (respNow == xferCount) {
811 std::this_thread::sleep_for(std::chrono::microseconds(50));
814 throw std::runtime_error(
"hostmem write bandwidth timeout");
815 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
816 std::chrono::high_resolution_clock::now() - start);
818 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
819 uint64_t cycles = cyclePort->
readInt();
820 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
821 std::cout <<
"[WRITE] Hostmem bandwidth (" << std::to_string(width)
823 << std::to_string(xferCount) <<
" flits in "
824 << std::to_string(duration.count()) <<
" us, "
825 << std::to_string(cycles) <<
" cycles, " << bytesPerCycle
826 <<
" bytes/cycle" << std::endl;
832 uint32_t width, uint32_t xferCount) {
834 logger.
info(
"esitester",
"Starting hostmem READ bandwidth test: " +
835 std::to_string(xferCount) +
" x " +
836 std::to_string(width) +
" bits");
840 throw std::runtime_error(
"hostmem read bandwidth: readmem child missing");
841 auto &readMemPorts = readMemChildIter->second->getPorts();
843 auto cmdPortIter = readMemPorts.find(
AppID(
"cmd", width));
844 if (cmdPortIter == readMemPorts.end())
845 throw std::runtime_error(
"hostmem read bandwidth: cmd MMIO missing");
848 throw std::runtime_error(
"hostmem read bandwidth: cmd not MMIO");
850 auto issuedIter = readMemPorts.find(
AppID(
"addrCmdIssued"));
851 auto respIter = readMemPorts.find(
AppID(
"addrCmdResponses"));
852 auto cyclePort = readMemPorts.find(
AppID(
"addrCmdCycles"));
853 if (issuedIter == readMemPorts.end() || respIter == readMemPorts.end() ||
854 cyclePort == readMemPorts.end())
855 throw std::runtime_error(
"hostmem read bandwidth: telemetry missing");
861 if (!issuedPort || !respPort || !cycleCntPort)
862 throw std::runtime_error(
"hostmem read bandwidth: telemetry type mismatch");
863 issuedPort->connect();
868 uint64_t *dataPtr =
static_cast<uint64_t *
>(region.
getPtr());
869 size_t words64 = region.
getSize() / 8;
870 for (
size_t i = 0; i < words64; ++i)
871 dataPtr[i] = 0xCAFEBABE0000ull + i;
873 uint64_t devPtr =
reinterpret_cast<uint64_t
>(region.
getDevicePtr());
874 auto start = std::chrono::high_resolution_clock::now();
876 cmdMMIO->write(0x10, devPtr);
877 cmdMMIO->write(0x18, xferCount);
878 cmdMMIO->write(0x20, 1);
881 for (
int wait = 0; wait < 100000; ++wait) {
882 uint64_t respNow = respPort->
readInt();
883 if (respNow == xferCount) {
887 std::this_thread::sleep_for(std::chrono::microseconds(50));
890 throw std::runtime_error(
"hostmem read bandwidth timeout");
891 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
892 std::chrono::high_resolution_clock::now() - start);
894 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
895 uint64_t cycles = cycleCntPort->
readInt();
896 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
897 std::cout <<
"[ READ] Hostmem bandwidth (" << width
899 <<
" flits in " << duration.count() <<
" us, " << cycles
900 <<
" cycles, " << bytesPerCycle <<
" bytes/cycle" << std::endl;
905 const std::vector<uint32_t> &widths,
bool read,
909 auto region = hostmemSvc->allocate(1024 * 1024 * 1024,
910 {.writeable =
true});
911 for (uint32_t w : widths) {
920 uint32_t iterations,
bool pipeline) {
924 throw std::runtime_error(
"Loopback test: no 'loopback' child");
925 auto &ports = loopbackChild->second->getPorts();
926 auto addIter = ports.find(
AppID(
"add"));
927 if (addIter == ports.end())
928 throw std::runtime_error(
"Loopback test: no 'add' port");
933 throw std::runtime_error(
934 "Loopback test: 'add' port not a FuncService::Function");
936 if (iterations == 0) {
937 logger.
info(
"esitester",
"Loopback add test: 0 iterations (skipped)");
940 std::mt19937_64 rng(0xC0FFEE);
941 std::uniform_int_distribution<uint32_t> dist(0, (1u << 24) - 1);
944 auto start = std::chrono::high_resolution_clock::now();
945 for (uint32_t i = 0; i < iterations; ++i) {
946 uint32_t argVal = dist(rng);
947 uint32_t expected = (argVal + 11) & 0xFFFF;
948 uint8_t argBytes[3] = {
949 static_cast<uint8_t
>(argVal & 0xFF),
950 static_cast<uint8_t
>((argVal >> 8) & 0xFF),
951 static_cast<uint8_t
>((argVal >> 16) & 0xFF),
955 uint16_t got = *resMsg.
as<uint16_t>();
956 std::cout <<
"[loopback] i=" << i <<
" arg=0x" <<
esi::toHex(argVal)
960 throw std::runtime_error(
"Loopback mismatch (non-pipelined)");
962 auto end = std::chrono::high_resolution_clock::now();
963 auto us = std::chrono::duration_cast<std::chrono::microseconds>(end - start)
965 double callsPerSec = (double)iterations * 1e6 / (
double)us;
966 logger.
info(
"esitester",
"Loopback add test passed (non-pipelined, " +
967 std::to_string(iterations) +
" calls, " +
968 std::to_string(us) +
" us, " +
969 std::to_string(callsPerSec) +
" calls/s)");
972 std::vector<std::future<MessageData>> futures;
973 futures.reserve(iterations);
974 std::vector<uint32_t> expectedVals;
975 expectedVals.reserve(iterations);
977 auto issueStart = std::chrono::high_resolution_clock::now();
978 for (uint32_t i = 0; i < iterations; ++i) {
979 uint32_t argVal = dist(rng);
980 uint32_t expected = (argVal + 11) & 0xFFFF;
981 uint8_t argBytes[3] = {
982 static_cast<uint8_t
>(argVal & 0xFF),
983 static_cast<uint8_t
>((argVal >> 8) & 0xFF),
984 static_cast<uint8_t
>((argVal >> 16) & 0xFF),
986 futures.emplace_back(funcPort->call(
MessageData(argBytes, 3)));
987 expectedVals.emplace_back(expected);
989 auto issueEnd = std::chrono::high_resolution_clock::now();
991 for (uint32_t i = 0; i < iterations; ++i) {
993 uint16_t got = *resMsg.
as<uint16_t>();
994 uint16_t exp = (uint16_t)expectedVals[i];
995 std::cout <<
"[loopback-pipelined] i=" << i <<
" got=0x"
998 throw std::runtime_error(
"Loopback mismatch (pipelined) idx=" +
1001 auto collectEnd = std::chrono::high_resolution_clock::now();
1003 auto issueUs = std::chrono::duration_cast<std::chrono::microseconds>(
1004 issueEnd - issueStart)
1006 auto totalUs = std::chrono::duration_cast<std::chrono::microseconds>(
1007 collectEnd - issueStart)
1010 double issueRate = (double)iterations * 1e6 / (
double)issueUs;
1011 double completionRate = (double)iterations * 1e6 / (
double)totalUs;
1013 logger.
info(
"esitester",
"Loopback add test passed (pipelined). Issued " +
1014 std::to_string(iterations) +
" in " +
1015 std::to_string(issueUs) +
" us (" +
1016 std::to_string(issueRate) +
1017 " calls/s), total " + std::to_string(totalUs) +
1018 " us (" + std::to_string(completionRate) +
1019 " calls/s effective)");
1025 uint32_t xferCount,
bool read,
1028 if (!read && !write) {
1029 std::cout <<
"aggbandwidth: nothing to do (enable --read and/or --write)\n";
1034 "Aggregate hostmem bandwidth start width=" + std::to_string(width) +
1035 " count=" + std::to_string(xferCount) +
1036 " read=" + (read ?
"Y" :
"N") +
" write=" + (write ?
"Y" :
"N"));
1039 hostmemSvc->
start();
1043 bool isRead =
false;
1044 bool isWrite =
false;
1045 std::unique_ptr<esi::services::HostMem::HostMemRegion> region;
1049 bool launched =
false;
1052 uint64_t duration_us = 0;
1053 uint64_t cycleCount = 0;
1054 std::chrono::high_resolution_clock::time_point start;
1056 std::vector<Unit> units;
1057 const std::vector<std::string> readPrefixes = {
"readmem",
"readmem_0",
1058 "readmem_1",
"readmem_2"};
1059 const std::vector<std::string> writePrefixes = {
"writemem",
"writemem_0",
1060 "writemem_1",
"writemem_2"};
1062 auto addUnits = [&](
const std::vector<std::string> &pref,
bool doRead,
1064 for (
auto &p : pref) {
1069 auto &ports = childIt->second->getPorts();
1070 auto cmdIt = ports.find(
AppID(
"cmd", width));
1071 auto respIt = ports.find(
AppID(
"addrCmdResponses"));
1072 auto cycIt = ports.find(
AppID(
"addrCmdCycles"));
1073 if (cmdIt == ports.end() || respIt == ports.end() || cycIt == ports.end())
1078 if (!cmd || !resp || !cyc)
1085 u.isWrite = doWrite;
1086 u.region = hostmemSvc->allocate(1024 * 1024 * 1024, {.writeable =
true});
1088 uint64_t *ptr =
static_cast<uint64_t *
>(u.region->getPtr());
1089 size_t words = u.region->getSize() / 8;
1090 for (
size_t i = 0; i < words; ++i)
1092 (p[0] ==
'w' ? (0xA5A500000000ull + i) : (0xCAFEBABE0000ull + i));
1097 u.bytes = uint64_t(xferCount) * (width / 8);
1098 units.emplace_back(std::move(u));
1102 addUnits(readPrefixes,
true,
false);
1104 addUnits(writePrefixes,
false,
true);
1105 if (units.empty()) {
1106 std::cout <<
"aggbandwidth: no matching units present for width " << width
1111 auto wallStart = std::chrono::high_resolution_clock::now();
1113 for (
auto &u : units) {
1114 uint64_t devPtr =
reinterpret_cast<uint64_t
>(u.region->getDevicePtr());
1115 u.cmd->write(0x10, devPtr);
1116 u.cmd->write(0x18, xferCount);
1117 u.cmd->write(0x20, 1);
1118 u.start = std::chrono::high_resolution_clock::now();
1123 const uint64_t timeoutLoops = 200000;
1126 bool allDone =
true;
1127 for (
auto &u : units) {
1130 if (u.resp->readInt() == xferCount) {
1131 auto end = std::chrono::high_resolution_clock::now();
1133 std::chrono::duration_cast<std::chrono::microseconds>(end - u.start)
1135 u.cycleCount = u.cycles->readInt();
1143 if (++loops >= timeoutLoops)
1144 throw std::runtime_error(
"aggbandwidth: timeout");
1145 std::this_thread::sleep_for(std::chrono::microseconds(50));
1147 auto wallUs = std::chrono::duration_cast<std::chrono::microseconds>(
1148 std::chrono::high_resolution_clock::now() - wallStart)
1151 uint64_t totalBytes = 0;
1152 uint64_t totalReadBytes = 0;
1153 uint64_t totalWriteBytes = 0;
1154 for (
auto &u : units) {
1155 totalBytes += u.bytes;
1157 totalReadBytes += u.bytes;
1159 totalWriteBytes += u.bytes;
1160 double unitBps = (double)u.bytes * 1e6 / (
double)u.duration_us;
1161 std::cout <<
"[agg-unit] " << u.prefix <<
"[" << width <<
"] "
1162 << (u.isRead ?
"READ" : (u.isWrite ?
"WRITE" :
"UNK"))
1163 <<
" bytes=" <<
humanBytes(u.bytes) <<
" (" << u.bytes <<
" B)"
1164 <<
" time=" <<
humanTimeUS(u.duration_us) <<
" (" << u.duration_us
1165 <<
" us) cycles=" << u.cycleCount
1171 totalReadBytes ? (double)totalReadBytes * 1e6 / (
double)wallUs : 0.0;
1172 double aggWriteBps =
1173 totalWriteBytes ? (double)totalWriteBytes * 1e6 / (
double)wallUs : 0.0;
1174 double aggCombinedBps =
1175 totalBytes ? (double)totalBytes * 1e6 / (
double)wallUs : 0.0;
1177 std::cout <<
"[agg-total] units=" << units.size()
1178 <<
" read_bytes=" <<
humanBytes(totalReadBytes) <<
" ("
1179 << totalReadBytes <<
" B)"
1181 <<
" write_bytes=" <<
humanBytes(totalWriteBytes) <<
" ("
1182 << totalWriteBytes <<
" B)"
1184 <<
" combined_bytes=" <<
humanBytes(totalBytes) <<
" ("
1185 << totalBytes <<
" B)"
1187 <<
" wall_time=" <<
humanTimeUS(wallUs) <<
" (" << wallUs <<
" us)"
1189 logger.
info(
"esitester",
"Aggregate hostmem bandwidth test complete");
static void print(TypedAttr val, llvm::raw_ostream &os)
static void writePort(uint16_t port)
Write the port number to a file.
Abstract class representing a connection to an accelerator.
ServiceClass * getService(AppIDPath id={}, std::string implName={}, ServiceImplDetails details={}, HWClientDetails clients={})
Get a typed reference to a particular service type.
virtual void disconnect()
Disconnect from the accelerator cleanly.
Logger & getLogger() const
AcceleratorServiceThread * getServiceThread()
Return a pointer to the accelerator 'service' thread (or threads).
void addPoll(HWModule &module)
Poll this module.
Top level accelerator class.
Services provide connections to 'bundles' – collections of named, unidirectional communication channe...
T * getAs() const
Cast this Bundle port to a subclass which is actually useful.
ReadChannelPort & getRawRead(const std::string &name) const
WriteChannelPort & getRawWrite(const std::string &name) const
Get access to the raw byte streams of a channel.
Common options and code for ESI runtime tools.
Context & getContext()
Get the context.
AcceleratorConnection * connect()
Connect to the accelerator using the specified backend and connection.
int esiParse(int argc, const char **argv)
Run the parser.
AcceleratorConnections, Accelerators, and Manifests must all share a context.
BundlePort * resolvePort(const AppIDPath &path, AppIDPath &lastLookup) const
Attempt to resolve a path to a port.
const std::map< AppID, Instance * > & getChildren() const
Access the module's children by ID.
virtual void error(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an error.
virtual void info(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an informational message.
void debug(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report a debug message.
Class to parse a manifest.
Accelerator * buildAccelerator(AcceleratorConnection &acc) const
A logical chunk of data representing serialized data.
const T * as() const
Cast to a type.
A ChannelPort which reads data from the accelerator.
virtual void connect(std::function< bool(MessageData)> callback, std::optional< unsigned > bufferSize=std::nullopt)
virtual void disconnect() override
virtual void read(MessageData &outData)
Specify a buffer to read into.
A ChannelPort which sends data to the accelerator.
virtual void write(const MessageData &)=0
A very basic blocking write API.
virtual void connect(std::optional< unsigned > bufferSize=std::nullopt) override
Set up a connection to the accelerator.
A function call which gets attached to a service port.
A function call which gets attached to a service port.
virtual void start()
In cases where necessary, enable host memory services.
A "slice" of some parent MMIO space.
Information about the Accelerator system.
A telemetry port which gets attached to a service port.
void connect()
Connect to a particular telemetry port. Offset should be non-nullopt.
static void hostmemWriteTest(Accelerator *acc, esi::services::HostMem::HostMemRegion ®ion, uint32_t width)
Test the hostmem write functionality.
static void aggregateHostmemBandwidthTest(AcceleratorConnection *, Accelerator *, uint32_t width, uint32_t xferCount, bool read, bool write)
static void dmaTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool read, bool write)
static void hostmemBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, uint32_t xferCount, const std::vector< uint32_t > &widths, bool read, bool write)
static void callbackTest(AcceleratorConnection *, Accelerator *, uint32_t iterations)
static void bandwidthTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, uint32_t xferCount, bool read, bool write)
constexpr std::array< uint32_t, 5 > defaultWidths
static void hostmemReadBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion ®ion, uint32_t width, uint32_t xferCount)
static void bandwidthReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static std::string formatBandwidth(double bytesPerSec)
static void hostmemWriteBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion ®ion, uint32_t width, uint32_t xferCount)
static void dmaWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void bandwidthWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static std::string humanBytes(uint64_t bytes)
static void loopbackAddTest(AcceleratorConnection *, Accelerator *, uint32_t iterations, bool pipeline)
static void dmaReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void hostmemTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool write, bool read)
static std::string humanTimeUS(uint64_t us)
int main(int argc, const char *argv[])
static std::string defaultWidthsStr()
static void hostmemReadTest(Accelerator *acc, esi::services::HostMem::HostMemRegion ®ion, uint32_t width)
std::string toString(const std::any &a)
'Stringify' a std::any. This is used to log std::any values by some loggers.
std::string toHex(void *val)
RAII memory region for host memory.
virtual void * getDevicePtr() const
Sometimes the pointer the device sees is different from the pointer the host sees.
virtual void * getPtr() const =0
Get a pointer to the host memory.
virtual void flush()
Flush the memory region to ensure that the device sees the latest contents.
virtual std::size_t getSize() const =0