49 const std::vector<uint32_t> &widths,
bool write,
53 const std::vector<uint32_t> &widths,
bool read,
56 const std::vector<uint32_t> &widths,
bool read,
bool write);
58 const std::vector<uint32_t> &widths,
59 uint32_t xferCount,
bool read,
bool write);
64 uint32_t xferCount,
bool read,
67 uint32_t addAmt, uint32_t numItems);
69 uint32_t addAmt, uint32_t numItems);
71 uint32_t xTrans, uint32_t yTrans,
74 uint32_t xTrans, uint32_t yTrans,
75 uint32_t numCoords,
size_t batchSizeLimit);
77 uint32_t xTrans, uint32_t yTrans,
84constexpr std::array<uint32_t, 5>
defaultWidths = {32, 64, 128, 256, 512};
97 const char *unit =
"B/s";
98 double value = bytesPerSec;
99 if (bytesPerSec >= 1e9) {
101 value = bytesPerSec / 1e9;
102 }
else if (bytesPerSec >= 1e6) {
104 value = bytesPerSec / 1e6;
105 }
else if (bytesPerSec >= 1e3) {
107 value = bytesPerSec / 1e3;
109 std::ostringstream oss;
110 oss.setf(std::ios::fixed);
112 oss << value <<
" " << unit;
118 const char *units[] = {
"B",
"KB",
"MB",
"GB",
"TB"};
119 double v = (double)bytes;
121 while (v >= 1024.0 && u < 4) {
125 std::ostringstream oss;
126 oss.setf(std::ios::fixed);
127 oss.precision(u == 0 ? 0 : 2);
128 oss << v <<
" " << units[u];
135 return std::to_string(us) +
" us";
136 double ms = us / 1000.0;
138 std::ostringstream oss;
139 oss.setf(std::ios::fixed);
140 oss.precision(ms < 10.0 ? 2 : (ms < 100.0 ? 1 : 0));
144 double sec = ms / 1000.0;
145 std::ostringstream oss;
146 oss.setf(std::ios::fixed);
147 oss.precision(sec < 10.0 ? 3 : 2);
156 void *ptr = _aligned_malloc(size, alignment);
158 throw std::bad_alloc();
161 void *ptr = std::aligned_alloc(alignment, size);
163 throw std::bad_alloc();
176int main(
int argc,
const char *argv[]) {
178 cli.description(
"Test an ESI system running the ESI tester image.");
179 cli.require_subcommand(1);
181 CLI::App *callback_test =
182 cli.add_subcommand(
"callback",
"initiate callback test");
183 uint32_t cb_iters = 1;
184 callback_test->add_option(
"-i,--iters", cb_iters,
185 "Number of iterations to run");
187 CLI::App *hostmemtestSub =
188 cli.add_subcommand(
"hostmem",
"Run the host memory test");
190 bool hmWrite =
false;
193 hostmemtestSub->add_flag(
"-w,--write", hmWrite,
194 "Enable host memory write test");
195 hostmemtestSub->add_flag(
"-r,--read", hmRead,
"Enable host memory read test");
196 hostmemtestSub->add_option(
197 "--widths", hostmemWidths,
200 CLI::App *dmatestSub = cli.add_subcommand(
"dma",
"Run the DMA test");
201 bool dmaRead =
false;
202 bool dmaWrite =
false;
204 dmatestSub->add_flag(
"-w,--write", dmaWrite,
"Enable dma write test");
205 dmatestSub->add_flag(
"-r,--read", dmaRead,
"Enable dma read test");
206 dmatestSub->add_option(
"--widths", dmaWidths,
210 CLI::App *bandwidthSub =
211 cli.add_subcommand(
"bandwidth",
"Run the bandwidth test");
212 uint32_t xferCount = 1000;
213 bandwidthSub->add_option(
"-c,--count", xferCount,
214 "Number of transfers to perform");
215 bool bandwidthRead =
false;
216 bool bandwidthWrite =
false;
219 bandwidthSub->add_option(
"--widths", bandwidthWidths,
220 "Width of the transfers to perform (default: " +
222 bandwidthSub->add_flag(
"-w,--write", bandwidthWrite,
223 "Enable bandwidth write");
224 bandwidthSub->add_flag(
"-r,--read", bandwidthRead,
"Enable bandwidth read");
226 CLI::App *hostmembwSub =
227 cli.add_subcommand(
"hostmembw",
"Run the host memory bandwidth test");
228 uint32_t hmBwCount = 1000;
229 bool hmBwRead =
false;
230 bool hmBwWrite =
false;
232 hostmembwSub->add_option(
"-c,--count", hmBwCount,
233 "Number of hostmem transfers");
234 hostmembwSub->add_option(
235 "--widths", hmBwWidths,
237 hostmembwSub->add_flag(
"-w,--write", hmBwWrite,
238 "Measure hostmem write bandwidth");
239 hostmembwSub->add_flag(
"-r,--read", hmBwRead,
240 "Measure hostmem read bandwidth");
242 CLI::App *loopbackSub =
243 cli.add_subcommand(
"loopback",
"Test LoopbackInOutAdd function service");
244 uint32_t loopbackIters = 10;
245 bool loopbackPipeline =
false;
246 loopbackSub->add_option(
"-i,--iters", loopbackIters,
247 "Number of function invocations (default 10)");
248 loopbackSub->add_flag(
"-p,--pipeline", loopbackPipeline,
249 "Pipeline all calls then collect results");
251 CLI::App *aggBwSub = cli.add_subcommand(
253 "Aggregate hostmem bandwidth across four units (readmem*, writemem*)");
254 uint32_t aggWidth = 512;
255 uint32_t aggCount = 1000;
256 bool aggRead =
false;
257 bool aggWrite =
false;
258 aggBwSub->add_option(
260 "Bit width (default 512; other widths ignored if absent)");
261 aggBwSub->add_option(
"-c,--count", aggCount,
"Flits per unit (default 1000)");
262 aggBwSub->add_flag(
"-r,--read", aggRead,
"Include read units");
263 aggBwSub->add_flag(
"-w,--write", aggWrite,
"Include write units");
265 CLI::App *streamingAddSub = cli.add_subcommand(
266 "streaming_add",
"Test StreamingAdder function service with list input");
267 uint32_t streamingAddAmt = 5;
268 uint32_t streamingNumItems = 5;
269 bool streamingTranslate =
false;
270 streamingAddSub->add_option(
"-a,--add", streamingAddAmt,
271 "Amount to add to each element (default 5)");
272 streamingAddSub->add_option(
"-n,--num-items", streamingNumItems,
273 "Number of random items in the list (default 5)");
274 streamingAddSub->add_flag(
"-t,--translate", streamingTranslate,
275 "Use message translation (list translation)");
277 CLI::App *coordTranslateSub = cli.add_subcommand(
279 "Test CoordTranslator function service with list of coordinates");
280 uint32_t coordXTrans = 10;
281 uint32_t coordYTrans = 20;
282 uint32_t coordNumItems = 5;
283 coordTranslateSub->add_option(
"-x,--x-translation", coordXTrans,
284 "X translation amount (default 10)");
285 coordTranslateSub->add_option(
"-y,--y-translation", coordYTrans,
286 "Y translation amount (default 20)");
287 coordTranslateSub->add_option(
"-n,--num-coords", coordNumItems,
288 "Number of random coordinates (default 5)");
290 CLI::App *serialCoordTranslateSub = cli.add_subcommand(
292 "Test SerialCoordTranslator function service with list of coordinates");
293 uint32_t serialBatchSize = 240;
294 serialCoordTranslateSub->add_option(
"-x,--x-translation", coordXTrans,
295 "X translation amount (default 10)");
296 serialCoordTranslateSub->add_option(
"-y,--y-translation", coordYTrans,
297 "Y translation amount (default 20)");
298 serialCoordTranslateSub->add_option(
299 "-n,--num-coords", coordNumItems,
300 "Number of random coordinates (default 5)");
301 serialCoordTranslateSub
302 ->add_option(
"-b,--batch-size", serialBatchSize,
303 "Coordinates per header (default 240, max 65535)")
304 ->check(CLI::Range(1u, 0xFFFFu));
306 CLI::App *autoSerialCoordTranslateSub = cli.add_subcommand(
307 "auto_serial_coords",
308 "Test AutoSerialCoordTranslator (uses ListWindowToParallel/Serial "
309 "converters under the hood)");
310 uint32_t autoCoordXTrans = 10;
311 uint32_t autoCoordYTrans = 20;
312 uint32_t autoCoordNumItems = 5;
313 autoSerialCoordTranslateSub->add_option(
"-x,--x-translation", autoCoordXTrans,
314 "X translation amount (default 10)");
315 autoSerialCoordTranslateSub->add_option(
"-y,--y-translation", autoCoordYTrans,
316 "Y translation amount (default 20)");
317 autoSerialCoordTranslateSub->add_option(
318 "-n,--num-coords", autoCoordNumItems,
319 "Number of random coordinates (default 5)");
321 CLI::App *channelTestSub = cli.add_subcommand(
322 "channel",
"Test ChannelService to_host and from_host");
323 uint32_t channelIters = 10;
324 channelTestSub->add_option(
"-i,--iters", channelIters,
325 "Number of loopback iterations (default 10)");
327 CLI::App *resetSub = cli.add_subcommand(
328 "reset",
"Test the design reset feature (telemetry clears after reset)");
330 if (
int rc = cli.
esiParse(argc, argv))
332 if (!cli.get_help_ptr()->empty())
339 ctxt.
getLogger().
info(
"esitester",
"Connected to accelerator.");
340 Manifest manifest(ctxt, info.getJsonManifest());
343 acc->getServiceThread()->addPoll(*accel);
345 if (*callback_test) {
347 }
else if (*hostmemtestSub) {
348 hostmemTest(acc, accel, hostmemWidths, hmWrite, hmRead);
349 }
else if (*loopbackSub) {
351 }
else if (*dmatestSub) {
352 dmaTest(acc, accel, dmaWidths, dmaRead, dmaWrite);
353 }
else if (*bandwidthSub) {
354 bandwidthTest(acc, accel, bandwidthWidths, xferCount, bandwidthRead,
356 }
else if (*hostmembwSub) {
359 }
else if (*aggBwSub) {
362 }
else if (*streamingAddSub) {
363 if (streamingTranslate)
368 }
else if (*coordTranslateSub) {
370 }
else if (*serialCoordTranslateSub) {
372 coordNumItems, serialBatchSize);
373 }
else if (*autoSerialCoordTranslateSub) {
376 }
else if (*channelTestSub) {
378 }
else if (*resetSub) {
383 }
catch (std::exception &e) {
388 std::cout <<
"Exiting successfully\n";
393 uint32_t iterations) {
396 throw std::runtime_error(
"No cb_test child found in accelerator");
397 auto &ports = cb_test->second->getPorts();
398 auto cmd_port = ports.find(
AppID(
"cmd"));
399 if (cmd_port == ports.end())
400 throw std::runtime_error(
"No cmd port found in cb_test child");
403 throw std::runtime_error(
"cb_test cmd port is not MMIO");
405 auto f = ports.find(
AppID(
"cb"));
406 if (f == ports.end())
407 throw std::runtime_error(
"No cb port found in accelerator");
411 throw std::runtime_error(
"cb port is not a CallService::Callback");
413 std::atomic<uint32_t> callbackCount = 0;
416 conn->getLogger().
debug(
417 [&](std::string &subsystem, std::string &msg,
418 std::unique_ptr<std::map<std::string, std::any>> &details) {
419 subsystem =
"ESITESTER";
420 msg =
"Received callback";
421 details = std::make_unique<std::map<std::string, std::any>>();
422 details->emplace(
"data", data);
424 std::cout <<
"callback: " << *data.as<uint64_t>() << std::endl;
425 callbackCount.fetch_add(1);
430 for (uint32_t i = 0; i < iterations; ++i) {
431 conn->getLogger().info(
"esitester",
"Issuing callback command iteration " +
432 std::to_string(i) +
"/" +
433 std::to_string(iterations));
434 cmdMMIO->write(0x10, i);
436 for (uint32_t wait = 0; wait < 1000; ++wait) {
437 if (callbackCount.load() > i)
439 std::this_thread::sleep_for(std::chrono::milliseconds(1));
441 if (callbackCount.load() <= i)
442 throw std::runtime_error(
"Callback test failed. No callback received");
450 std::cout <<
"Running hostmem WRITE test with width " << width << std::endl;
451 uint64_t *dataPtr =
static_cast<uint64_t *
>(region.
getPtr());
452 auto check = [&](
bool print) {
454 for (
size_t i = 0; i < 9; ++i) {
456 printf(
"[write] dataPtr[%zu] = 0x%016lx\n", i, dataPtr[i]);
457 if (i < (width + 63) / 64 && dataPtr[i] == 0xFFFFFFFFFFFFFFFFull)
463 auto writeMemChildIter = acc->getChildren().find(
AppID(
"writemem", width));
464 if (writeMemChildIter == acc->getChildren().end())
465 throw std::runtime_error(
466 "hostmem write test failed. No writemem child found");
467 auto &writeMemPorts = writeMemChildIter->second->getPorts();
469 auto cmdPortIter = writeMemPorts.find(
AppID(
"cmd", width));
470 if (cmdPortIter == writeMemPorts.end())
471 throw std::runtime_error(
472 "hostmem write test failed. No (cmd,width) MMIO port");
475 throw std::runtime_error(
476 "hostmem write test failed. (cmd,width) port not MMIO");
478 auto issuedPortIter = writeMemPorts.find(
AppID(
"addrCmdIssued"));
479 if (issuedPortIter == writeMemPorts.end())
480 throw std::runtime_error(
481 "hostmem write test failed. addrCmdIssued missing");
482 auto *addrCmdIssuedPort =
484 if (!addrCmdIssuedPort)
485 throw std::runtime_error(
486 "hostmem write test failed. addrCmdIssued not telemetry");
487 addrCmdIssuedPort->connect();
489 auto responsesPortIter = writeMemPorts.find(
AppID(
"addrCmdResponses"));
490 if (responsesPortIter == writeMemPorts.end())
491 throw std::runtime_error(
492 "hostmem write test failed. addrCmdResponses missing");
493 auto *addrCmdResponsesPort =
495 if (!addrCmdResponsesPort)
496 throw std::runtime_error(
497 "hostmem write test failed. addrCmdResponses not telemetry");
498 addrCmdResponsesPort->connect();
500 for (
size_t i = 0, e = 9; i < e; ++i)
501 dataPtr[i] = 0xFFFFFFFFFFFFFFFFull;
503 cmdMMIO->write(0x10,
reinterpret_cast<uint64_t
>(region.
getDevicePtr()));
504 cmdMMIO->write(0x18, 1);
505 cmdMMIO->write(0x20, 1);
507 for (
int i = 0; i < 100; ++i) {
508 auto issued = addrCmdIssuedPort->readInt();
509 auto responses = addrCmdResponsesPort->readInt();
510 if (issued == 1 && responses == 1) {
514 std::this_thread::sleep_for(std::chrono::microseconds(100));
518 throw std::runtime_error(
"hostmem write test (" + std::to_string(width) +
519 " bits) timeout waiting for completion");
522 throw std::runtime_error(
"hostmem write test failed (" +
523 std::to_string(width) +
" bits)");
529 std::cout <<
"Running hostmem READ test with width " << width << std::endl;
530 auto readMemChildIter = acc->getChildren().find(
AppID(
"readmem", width));
531 if (readMemChildIter == acc->getChildren().end())
532 throw std::runtime_error(
533 "hostmem read test failed. No readmem child found");
535 auto &readMemPorts = readMemChildIter->second->getPorts();
536 auto addrCmdPortIter = readMemPorts.find(
AppID(
"cmd", width));
537 if (addrCmdPortIter == readMemPorts.end())
538 throw std::runtime_error(
539 "hostmem read test failed. No AddressCommand MMIO port");
543 throw std::runtime_error(
544 "hostmem read test failed. AddressCommand port not MMIO");
546 auto lastReadPortIter = readMemPorts.find(
AppID(
"lastReadLSB"));
547 if (lastReadPortIter == readMemPorts.end())
548 throw std::runtime_error(
"hostmem read test failed. lastReadLSB missing");
552 throw std::runtime_error(
553 "hostmem read test failed. lastReadLSB not telemetry");
554 lastReadPort->connect();
556 auto issuedPortIter = readMemPorts.find(
AppID(
"addrCmdIssued"));
557 if (issuedPortIter == readMemPorts.end())
558 throw std::runtime_error(
"hostmem read test failed. addrCmdIssued missing");
559 auto *addrCmdIssuedPort =
561 if (!addrCmdIssuedPort)
562 throw std::runtime_error(
563 "hostmem read test failed. addrCmdIssued not telemetry");
564 addrCmdIssuedPort->connect();
566 auto responsesPortIter = readMemPorts.find(
AppID(
"addrCmdResponses"));
567 if (responsesPortIter == readMemPorts.end())
568 throw std::runtime_error(
569 "hostmem read test failed. addrCmdResponses missing");
570 auto *addrCmdResponsesPort =
572 if (!addrCmdResponsesPort)
573 throw std::runtime_error(
574 "hostmem read test failed. addrCmdResponses not telemetry");
575 addrCmdResponsesPort->connect();
577 for (
size_t i = 0; i < 8; ++i) {
578 auto *dataPtr =
static_cast<uint64_t *
>(region.
getPtr());
579 dataPtr[0] = 0x12345678ull << i;
580 dataPtr[1] = 0xDEADBEEFull << i;
582 addrCmdMMIO->write(0x10,
reinterpret_cast<uint64_t
>(region.
getDevicePtr()));
583 addrCmdMMIO->write(0x18, 1);
584 addrCmdMMIO->write(0x20, 1);
586 for (
int waitLoop = 0; waitLoop < 100; ++waitLoop) {
587 auto issued = addrCmdIssuedPort->readInt();
588 auto responses = addrCmdResponsesPort->readInt();
589 if (issued == 1 && responses == 1) {
593 std::this_thread::sleep_for(std::chrono::milliseconds(10));
596 throw std::runtime_error(
"hostmem read (" + std::to_string(width) +
597 " bits) timeout waiting for completion");
598 uint64_t captured = lastReadPort->readInt();
599 uint64_t expected = dataPtr[0];
601 expected &= ((1ull << width) - 1);
602 if (captured != expected)
603 throw std::runtime_error(
"hostmem read test (" + std::to_string(width) +
604 " bits) failed. Expected " +
611 const std::vector<uint32_t> &widths,
bool write,
616 auto scratchRegion = hostmem->allocate(1024 * 1024,
617 {.writeable =
true});
618 uint64_t *dataPtr =
static_cast<uint64_t *
>(scratchRegion->getPtr());
619 conn->getLogger().info(
"esitester",
620 "Running host memory test with region size " +
621 std::to_string(scratchRegion->getSize()) +
622 " bytes at 0x" +
toHex(dataPtr));
623 for (
size_t i = 0; i < scratchRegion->getSize() / 8; ++i)
625 scratchRegion->flush();
628 for (
size_t width : widths) {
634 }
catch (std::exception &e) {
635 conn->getLogger().error(
"esitester",
"Hostmem test failed for width " +
636 std::to_string(width) +
": " +
642 throw std::runtime_error(
"Hostmem test failed");
643 std::cout <<
"Hostmem test passed" << std::endl;
648 Logger &logger = conn->getLogger();
649 logger.
info(
"esitester",
650 "== Running DMA read test with width " + std::to_string(width));
653 acc->resolvePort({
AppID(
"tohostdma", width),
AppID(
"cmd")}, lastPath);
655 throw std::runtime_error(
"dma read test failed. No tohostdma[" +
656 std::to_string(width) +
"] found");
659 throw std::runtime_error(
"dma read test failed. MMIO port is not MMIO");
662 acc->resolvePort({
AppID(
"tohostdma", width),
AppID(
"out")}, lastPath);
666 size_t xferCount = 24;
669 toHostMMIO->write(0, xferCount);
670 for (
size_t i = 0; i < xferCount; ++i) {
673 uint64_t val = *data.as<uint64_t>();
675 throw std::runtime_error(
"dma read test failed. Out of order data");
678 logger.
debug(
"esitester",
679 "Cycle count [" + std::to_string(i) +
"] = 0x" + data.toHex());
682 std::cout <<
" DMA read test for " << width <<
" bits passed" << std::endl;
687 Logger &logger = conn->getLogger();
688 logger.
info(
"esitester",
689 "Running DMA write test with width " + std::to_string(width));
692 acc->resolvePort({
AppID(
"fromhostdma", width),
AppID(
"cmd")}, lastPath);
693 if (!fromHostMMIOPort)
694 throw std::runtime_error(
"dma read test for " +
toString(width) +
695 " bits failed. No fromhostdma[" +
696 std::to_string(width) +
"] found");
699 throw std::runtime_error(
"dma write test for " +
toString(width) +
700 " bits failed. MMIO port is not MMIO");
703 acc->resolvePort({
AppID(
"fromhostdma", width),
AppID(
"in")}, lastPath);
705 throw std::runtime_error(
"dma write test for " +
toString(width) +
706 " bits failed. No out port found");
710 size_t xferCount = 24;
711 uint8_t *data =
new uint8_t[width];
712 for (
size_t i = 0; i < width / 8; ++i)
714 fromHostMMIO->read(8);
715 fromHostMMIO->write(0, xferCount);
716 for (
size_t i = 1; i < xferCount + 1; ++i) {
723 std::this_thread::sleep_for(std::chrono::milliseconds(10));
725 }
while (!successWrite && ++attempts < 100);
727 throw std::runtime_error(
"dma write test for " +
toString(width) +
728 " bits failed. Write failed");
729 uint64_t lastReadMMIO;
730 for (
size_t a = 0; a < 20; ++a) {
731 lastReadMMIO = fromHostMMIO->read(8);
732 if (lastReadMMIO == i)
734 std::this_thread::sleep_for(std::chrono::milliseconds(10));
736 throw std::runtime_error(
"dma write for " +
toString(width) +
737 " bits test failed. Read from MMIO failed");
742 std::cout <<
" DMA write test for " << width <<
" bits passed" << std::endl;
746 const std::vector<uint32_t> &widths,
bool read,
750 for (
size_t width : widths)
753 }
catch (std::exception &e) {
755 std::cerr <<
"DMA write test for " << width
756 <<
" bits failed: " << e.what() << std::endl;
759 for (
size_t width : widths)
762 throw std::runtime_error(
"DMA test failed");
763 std::cout <<
"DMA test passed" << std::endl;
771 size_t width,
size_t xferCount) {
775 acc->resolvePort({
AppID(
"tohostdma", width),
AppID(
"cmd")}, lastPath);
777 throw std::runtime_error(
"bandwidth test failed. No tohostdma[" +
778 std::to_string(width) +
"] found");
781 throw std::runtime_error(
"bandwidth test failed. MMIO port is not MMIO");
784 acc->resolvePort({
AppID(
"tohostdma", width),
AppID(
"out")}, lastPath);
788 Logger &logger = conn->getLogger();
789 logger.
info(
"esitester",
"Starting read bandwidth test with " +
790 std::to_string(xferCount) +
" x " +
791 std::to_string(width) +
" bit transfers");
793 auto start = std::chrono::high_resolution_clock::now();
794 toHostMMIO->write(0, xferCount);
795 for (
size_t i = 0; i < xferCount; ++i) {
798 [i, &data](std::string &subsystem, std::string &msg,
799 std::unique_ptr<std::map<std::string, std::any>> &details) {
800 subsystem =
"esitester";
801 msg =
"Cycle count [" + std::to_string(i) +
"] = 0x" + data.toHex();
804 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
805 std::chrono::high_resolution_clock::now() - start);
807 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
808 logger.
info(
"esitester",
809 " Bandwidth test: " + std::to_string(xferCount) +
" x " +
810 std::to_string(width) +
" bit transfers in " +
811 std::to_string(duration.count()) +
" microseconds");
816 size_t width,
size_t xferCount) {
820 acc->resolvePort({
AppID(
"fromhostdma", width),
AppID(
"cmd")}, lastPath);
821 if (!fromHostMMIOPort)
822 throw std::runtime_error(
"bandwidth test failed. No fromhostdma[" +
823 std::to_string(width) +
"] found");
826 throw std::runtime_error(
"bandwidth test failed. MMIO port is not MMIO");
829 acc->resolvePort({
AppID(
"fromhostdma", width),
AppID(
"in")}, lastPath);
833 Logger &logger = conn->getLogger();
834 logger.
info(
"esitester",
"Starting write bandwidth test with " +
835 std::to_string(xferCount) +
" x " +
836 std::to_string(width) +
" bit transfers");
837 std::vector<uint8_t> dataVec(width / 8);
838 for (
size_t i = 0; i < width / 8; ++i)
841 auto start = std::chrono::high_resolution_clock::now();
842 fromHostMMIO->write(0, xferCount);
843 for (
size_t i = 0; i < xferCount; ++i) {
846 [i, &data](std::string &subsystem, std::string &msg,
847 std::unique_ptr<std::map<std::string, std::any>> &details) {
848 subsystem =
"esitester";
849 msg =
"Cycle count [" + std::to_string(i) +
"] = 0x" + data.toHex();
852 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
853 std::chrono::high_resolution_clock::now() - start);
855 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
856 logger.
info(
"esitester",
857 " Bandwidth test: " + std::to_string(xferCount) +
" x " +
858 std::to_string(width) +
" bit transfers in " +
859 std::to_string(duration.count()) +
" microseconds");
864 const std::vector<uint32_t> &widths,
865 uint32_t xferCount,
bool read,
bool write) {
867 for (uint32_t w : widths)
870 for (uint32_t w : widths)
881 uint32_t width, uint32_t xferCount) {
882 Logger &logger = conn->getLogger();
883 logger.
info(
"esitester",
"Starting hostmem WRITE bandwidth test: " +
884 std::to_string(xferCount) +
" x " +
885 std::to_string(width) +
" bits");
887 auto writeMemChildIter = acc->getChildren().find(
AppID(
"writemem", width));
888 if (writeMemChildIter == acc->getChildren().end())
889 throw std::runtime_error(
"hostmem write bandwidth: writemem child missing");
890 auto &writeMemPorts = writeMemChildIter->second->getPorts();
892 auto cmdPortIter = writeMemPorts.find(
AppID(
"cmd", width));
893 if (cmdPortIter == writeMemPorts.end())
894 throw std::runtime_error(
"hostmem write bandwidth: cmd MMIO missing");
897 throw std::runtime_error(
"hostmem write bandwidth: cmd not MMIO");
899 auto issuedIter = writeMemPorts.find(
AppID(
"addrCmdIssued"));
900 auto respIter = writeMemPorts.find(
AppID(
"addrCmdResponses"));
901 auto cycleCount = writeMemPorts.find(
AppID(
"addrCmdCycles"));
902 if (issuedIter == writeMemPorts.end() || respIter == writeMemPorts.end() ||
903 cycleCount == writeMemPorts.end())
904 throw std::runtime_error(
"hostmem write bandwidth: telemetry missing");
910 if (!issuedPort || !respPort || !cyclePort)
911 throw std::runtime_error(
912 "hostmem write bandwidth: telemetry type mismatch");
914 issuedPort->connect();
919 uint64_t *dataPtr =
static_cast<uint64_t *
>(region.
getPtr());
920 size_t words = region.
getSize() / 8;
921 for (
size_t i = 0; i < words; ++i)
922 dataPtr[i] = i + 0xA5A50000;
925 auto start = std::chrono::high_resolution_clock::now();
927 uint64_t devPtr =
reinterpret_cast<uint64_t
>(region.
getDevicePtr());
928 cmdMMIO->write(0x10, devPtr);
929 cmdMMIO->write(0x18, xferCount);
930 cmdMMIO->write(0x20, 1);
933 bool completed =
false;
934 for (
int wait = 0; wait < 100000; ++wait) {
935 uint64_t respNow = respPort->
readInt();
936 if (respNow == xferCount) {
940 std::this_thread::sleep_for(std::chrono::microseconds(50));
943 throw std::runtime_error(
"hostmem write bandwidth timeout");
944 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
945 std::chrono::high_resolution_clock::now() - start);
947 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
948 uint64_t cycles = cyclePort->
readInt();
949 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
950 std::cout <<
"[WRITE] Hostmem bandwidth (" << std::to_string(width)
952 << std::to_string(xferCount) <<
" flits in "
953 << std::to_string(duration.count()) <<
" us, "
954 << std::to_string(cycles) <<
" cycles, " << bytesPerCycle
955 <<
" bytes/cycle" << std::endl;
961 uint32_t width, uint32_t xferCount) {
962 Logger &logger = conn->getLogger();
963 logger.
info(
"esitester",
"Starting hostmem READ bandwidth test: " +
964 std::to_string(xferCount) +
" x " +
965 std::to_string(width) +
" bits");
967 auto readMemChildIter = acc->getChildren().find(
AppID(
"readmem", width));
968 if (readMemChildIter == acc->getChildren().end())
969 throw std::runtime_error(
"hostmem read bandwidth: readmem child missing");
970 auto &readMemPorts = readMemChildIter->second->getPorts();
972 auto cmdPortIter = readMemPorts.find(
AppID(
"cmd", width));
973 if (cmdPortIter == readMemPorts.end())
974 throw std::runtime_error(
"hostmem read bandwidth: cmd MMIO missing");
977 throw std::runtime_error(
"hostmem read bandwidth: cmd not MMIO");
979 auto issuedIter = readMemPorts.find(
AppID(
"addrCmdIssued"));
980 auto respIter = readMemPorts.find(
AppID(
"addrCmdResponses"));
981 auto cyclePort = readMemPorts.find(
AppID(
"addrCmdCycles"));
982 if (issuedIter == readMemPorts.end() || respIter == readMemPorts.end() ||
983 cyclePort == readMemPorts.end())
984 throw std::runtime_error(
"hostmem read bandwidth: telemetry missing");
990 if (!issuedPort || !respPort || !cycleCntPort)
991 throw std::runtime_error(
"hostmem read bandwidth: telemetry type mismatch");
992 issuedPort->connect();
997 uint64_t *dataPtr =
static_cast<uint64_t *
>(region.
getPtr());
998 size_t words64 = region.
getSize() / 8;
999 for (
size_t i = 0; i < words64; ++i)
1000 dataPtr[i] = 0xCAFEBABE0000ull + i;
1002 uint64_t devPtr =
reinterpret_cast<uint64_t
>(region.
getDevicePtr());
1003 auto start = std::chrono::high_resolution_clock::now();
1005 cmdMMIO->write(0x10, devPtr);
1006 cmdMMIO->write(0x18, xferCount);
1007 cmdMMIO->write(0x20, 1);
1009 bool timeout =
true;
1010 for (
int wait = 0; wait < 100000; ++wait) {
1011 uint64_t respNow = respPort->
readInt();
1012 if (respNow == xferCount) {
1016 std::this_thread::sleep_for(std::chrono::microseconds(50));
1019 throw std::runtime_error(
"hostmem read bandwidth timeout");
1020 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
1021 std::chrono::high_resolution_clock::now() - start);
1022 double bytesPerSec =
1023 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
1024 uint64_t cycles = cycleCntPort->
readInt();
1025 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
1026 std::cout <<
"[ READ] Hostmem bandwidth (" << width
1028 <<
" flits in " << duration.count() <<
" us, " << cycles
1029 <<
" cycles, " << bytesPerCycle <<
" bytes/cycle" << std::endl;
1034 const std::vector<uint32_t> &widths,
bool read,
1037 hostmemSvc->
start();
1038 auto region = hostmemSvc->allocate(1024 * 1024 * 1024,
1039 {.writeable =
true});
1040 for (uint32_t w : widths) {
1049 uint32_t iterations,
bool pipeline) {
1050 Logger &logger = conn->getLogger();
1053 throw std::runtime_error(
"Loopback test: no 'loopback' child");
1054 auto &ports = loopbackChild->second->getPorts();
1055 auto addIter = ports.find(
AppID(
"add"));
1056 if (addIter == ports.end())
1057 throw std::runtime_error(
"Loopback test: no 'add' port");
1062 throw std::runtime_error(
1063 "Loopback test: 'add' port not a FuncService::Function");
1064 funcPort->connect();
1065 if (iterations == 0) {
1066 logger.
info(
"esitester",
"Loopback add test: 0 iterations (skipped)");
1069 std::mt19937_64 rng(0xC0FFEE);
1070 std::uniform_int_distribution<uint32_t> dist(0, (1u << 24) - 1);
1073 auto start = std::chrono::high_resolution_clock::now();
1074 for (uint32_t i = 0; i < iterations; ++i) {
1075 uint32_t argVal = dist(rng);
1076 uint32_t expected = (argVal + 11) & 0xFFFF;
1077 uint8_t argBytes[3] = {
1078 static_cast<uint8_t
>(argVal & 0xFF),
1079 static_cast<uint8_t
>((argVal >> 8) & 0xFF),
1080 static_cast<uint8_t
>((argVal >> 16) & 0xFF),
1083 MessageData resMsg = funcPort->call(argMsg).get();
1084 uint16_t got = *resMsg.
as<uint16_t>();
1085 std::cout <<
"[loopback] i=" << i <<
" arg=0x" <<
esi::toHex(argVal)
1088 if (got != expected)
1089 throw std::runtime_error(
"Loopback mismatch (non-pipelined)");
1091 auto end = std::chrono::high_resolution_clock::now();
1092 auto us = std::chrono::duration_cast<std::chrono::microseconds>(end - start)
1094 double callsPerSec = (double)iterations * 1e6 / (
double)us;
1095 logger.
info(
"esitester",
"Loopback add test passed (non-pipelined, " +
1096 std::to_string(iterations) +
" calls, " +
1097 std::to_string(us) +
" us, " +
1098 std::to_string(callsPerSec) +
" calls/s)");
1101 std::vector<std::future<MessageData>> futures;
1102 futures.reserve(iterations);
1103 std::vector<uint32_t> expectedVals;
1104 expectedVals.reserve(iterations);
1106 auto issueStart = std::chrono::high_resolution_clock::now();
1107 for (uint32_t i = 0; i < iterations; ++i) {
1108 uint32_t argVal = dist(rng);
1109 uint32_t expected = (argVal + 11) & 0xFFFF;
1110 uint8_t argBytes[3] = {
1111 static_cast<uint8_t
>(argVal & 0xFF),
1112 static_cast<uint8_t
>((argVal >> 8) & 0xFF),
1113 static_cast<uint8_t
>((argVal >> 16) & 0xFF),
1115 futures.emplace_back(funcPort->call(
MessageData(argBytes, 3)));
1116 expectedVals.emplace_back(expected);
1118 auto issueEnd = std::chrono::high_resolution_clock::now();
1120 for (uint32_t i = 0; i < iterations; ++i) {
1122 uint16_t got = *resMsg.
as<uint16_t>();
1123 uint16_t exp = (uint16_t)expectedVals[i];
1124 std::cout <<
"[loopback-pipelined] i=" << i <<
" got=0x"
1127 throw std::runtime_error(
"Loopback mismatch (pipelined) idx=" +
1130 auto collectEnd = std::chrono::high_resolution_clock::now();
1132 auto issueUs = std::chrono::duration_cast<std::chrono::microseconds>(
1133 issueEnd - issueStart)
1135 auto totalUs = std::chrono::duration_cast<std::chrono::microseconds>(
1136 collectEnd - issueStart)
1139 double issueRate = (double)iterations * 1e6 / (
double)issueUs;
1140 double completionRate = (double)iterations * 1e6 / (
double)totalUs;
1142 logger.
info(
"esitester",
"Loopback add test passed (pipelined). Issued " +
1143 std::to_string(iterations) +
" in " +
1144 std::to_string(issueUs) +
" us (" +
1145 std::to_string(issueRate) +
1146 " calls/s), total " + std::to_string(totalUs) +
1147 " us (" + std::to_string(completionRate) +
1148 " calls/s effective)");
1158 Logger &logger = conn->getLogger();
1159 constexpr uint32_t width = 64;
1166 auto writeMemChildIter = accel->
getChildren().find(
AppID(
"writemem", width));
1167 if (writeMemChildIter == accel->
getChildren().end())
1168 throw std::runtime_error(
"Reset test: no 'writemem' child");
1169 auto &ports = writeMemChildIter->second->getPorts();
1170 auto respIter = ports.find(
AppID(
"addrCmdResponses"));
1171 if (respIter == ports.end())
1172 throw std::runtime_error(
1173 "Reset test: no 'addrCmdResponses' telemetry port");
1177 throw std::runtime_error(
"Reset test: 'addrCmdResponses' not telemetry");
1178 respMetric->connect();
1180 uint64_t before = respMetric->readInt();
1181 std::cout <<
"[reset] telemetry addrCmdResponses before reset = " << before
1184 throw std::runtime_error(
1185 "Reset test: telemetry was not incremented by the hostmem write");
1188 logger.
info(
"esitester",
"Requesting design reset");
1190 throw std::runtime_error(
"Reset test: reset() reported failure");
1191 std::cout <<
"[reset] reset requested" << std::endl;
1195 uint64_t after = before;
1196 constexpr int maxPolls = 1000000;
1197 for (
int polls = 0; polls < maxPolls; ++polls) {
1198 after = respMetric->readInt();
1201 std::this_thread::sleep_for(std::chrono::microseconds(1));
1203 std::cout <<
"[reset] telemetry addrCmdResponses after reset = " << after
1206 throw std::runtime_error(
1207 "Reset test: telemetry was not cleared by the reset (got " +
1208 std::to_string(after) +
")");
1210 std::cout <<
"Reset test passed" << std::endl;
1215 uint32_t xferCount,
bool read,
1217 Logger &logger = conn->getLogger();
1218 if (!read && !write) {
1219 std::cout <<
"aggbandwidth: nothing to do (enable --read and/or --write)\n";
1224 "Aggregate hostmem bandwidth start width=" + std::to_string(width) +
1225 " count=" + std::to_string(xferCount) +
1226 " read=" + (read ?
"Y" :
"N") +
" write=" + (write ?
"Y" :
"N"));
1229 hostmemSvc->
start();
1233 bool isRead =
false;
1234 bool isWrite =
false;
1235 std::unique_ptr<esi::services::HostMem::HostMemRegion> region;
1239 bool launched =
false;
1242 uint64_t duration_us = 0;
1243 uint64_t cycleCount = 0;
1244 std::chrono::high_resolution_clock::time_point start;
1246 std::vector<Unit> units;
1247 const std::vector<std::string> readPrefixes = {
"readmem",
"readmem_0",
1248 "readmem_1",
"readmem_2"};
1249 const std::vector<std::string> writePrefixes = {
"writemem",
"writemem_0",
1250 "writemem_1",
"writemem_2"};
1252 auto addUnits = [&](
const std::vector<std::string> &pref,
bool doRead,
1254 for (
auto &p : pref) {
1256 auto childIt = acc->getChildren().find(
id);
1257 if (childIt == acc->getChildren().end())
1259 auto &ports = childIt->second->getPorts();
1260 auto cmdIt = ports.find(
AppID(
"cmd", width));
1261 auto respIt = ports.find(
AppID(
"addrCmdResponses"));
1262 auto cycIt = ports.find(
AppID(
"addrCmdCycles"));
1263 if (cmdIt == ports.end() || respIt == ports.end() || cycIt == ports.end())
1268 if (!cmd || !resp || !cyc)
1275 u.isWrite = doWrite;
1276 u.region = hostmemSvc->allocate(1024 * 1024 * 1024, {.writeable =
true});
1278 uint64_t *ptr =
static_cast<uint64_t *
>(u.region->getPtr());
1279 size_t words = u.region->getSize() / 8;
1280 for (
size_t i = 0; i < words; ++i)
1282 (p[0] ==
'w' ? (0xA5A500000000ull + i) : (0xCAFEBABE0000ull + i));
1287 u.bytes = uint64_t(xferCount) * (width / 8);
1288 units.emplace_back(std::move(u));
1292 addUnits(readPrefixes,
true,
false);
1294 addUnits(writePrefixes,
false,
true);
1295 if (units.empty()) {
1296 std::cout <<
"aggbandwidth: no matching units present for width " << width
1301 auto wallStart = std::chrono::high_resolution_clock::now();
1303 for (
auto &u : units) {
1304 uint64_t devPtr =
reinterpret_cast<uint64_t
>(u.region->getDevicePtr());
1305 u.cmd->write(0x10, devPtr);
1306 u.cmd->write(0x18, xferCount);
1307 u.cmd->write(0x20, 1);
1308 u.start = std::chrono::high_resolution_clock::now();
1313 const uint64_t timeoutLoops = 200000;
1316 bool allDone =
true;
1317 for (
auto &u : units) {
1320 if (u.resp->readInt() == xferCount) {
1321 auto end = std::chrono::high_resolution_clock::now();
1323 std::chrono::duration_cast<std::chrono::microseconds>(end - u.start)
1325 u.cycleCount = u.cycles->readInt();
1333 if (++loops >= timeoutLoops)
1334 throw std::runtime_error(
"aggbandwidth: timeout");
1335 std::this_thread::sleep_for(std::chrono::microseconds(50));
1337 auto wallUs = std::chrono::duration_cast<std::chrono::microseconds>(
1338 std::chrono::high_resolution_clock::now() - wallStart)
1341 uint64_t totalBytes = 0;
1342 uint64_t totalReadBytes = 0;
1343 uint64_t totalWriteBytes = 0;
1344 for (
auto &u : units) {
1345 totalBytes += u.bytes;
1347 totalReadBytes += u.bytes;
1349 totalWriteBytes += u.bytes;
1350 double unitBps = (double)u.bytes * 1e6 / (
double)u.duration_us;
1351 std::cout <<
"[agg-unit] " << u.prefix <<
"[" << width <<
"] "
1352 << (u.isRead ?
"READ" : (u.isWrite ?
"WRITE" :
"UNK"))
1353 <<
" bytes=" <<
humanBytes(u.bytes) <<
" (" << u.bytes <<
" B)"
1354 <<
" time=" <<
humanTimeUS(u.duration_us) <<
" (" << u.duration_us
1355 <<
" us) cycles=" << u.cycleCount
1361 totalReadBytes ? (double)totalReadBytes * 1e6 / (
double)wallUs : 0.0;
1362 double aggWriteBps =
1363 totalWriteBytes ? (double)totalWriteBytes * 1e6 / (
double)wallUs : 0.0;
1364 double aggCombinedBps =
1365 totalBytes ? (double)totalBytes * 1e6 / (
double)wallUs : 0.0;
1367 std::cout <<
"[agg-total] units=" << units.size()
1368 <<
" read_bytes=" <<
humanBytes(totalReadBytes) <<
" ("
1369 << totalReadBytes <<
" B)"
1371 <<
" write_bytes=" <<
humanBytes(totalWriteBytes) <<
" ("
1372 << totalWriteBytes <<
" B)"
1374 <<
" combined_bytes=" <<
humanBytes(totalBytes) <<
" ("
1375 << totalBytes <<
" B)"
1377 <<
" wall_time=" <<
humanTimeUS(wallUs) <<
" (" << wallUs <<
" us)"
1379 logger.
info(
"esitester",
"Aggregate hostmem bandwidth test complete");
1385#pragma pack(push, 1)
1393 "StreamingAddArg must be 9 bytes packed");
1398#pragma pack(push, 1)
1405 "StreamingAddResult must be 5 bytes packed");
1411 uint32_t addAmt, uint32_t numItems) {
1412 Logger &logger = conn->getLogger();
1413 logger.
info(
"esitester",
"Starting streaming add test with add_amt=" +
1414 std::to_string(addAmt) +
1415 ", num_items=" + std::to_string(numItems));
1418 std::mt19937 rng(0xDEADBEEF);
1419 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1420 std::vector<uint32_t> inputData;
1421 inputData.reserve(numItems);
1422 for (uint32_t i = 0; i < numItems; ++i)
1423 inputData.push_back(dist(rng));
1426 auto streamingAdderChild =
1428 if (streamingAdderChild == accel->
getChildren().end())
1429 throw std::runtime_error(
1430 "Streaming add test: no 'streaming_adder' child found");
1432 auto &ports = streamingAdderChild->second->getPorts();
1433 auto addIter = ports.find(
AppID(
"streaming_add"));
1434 if (addIter == ports.end())
1435 throw std::runtime_error(
1436 "Streaming add test: no 'streaming_add' port found");
1448 for (
size_t i = 0; i < inputData.size(); ++i) {
1451 arg.
input = inputData[i];
1452 arg.
last = (i == inputData.size() - 1) ? 1 : 0;
1454 MessageData(
reinterpret_cast<const uint8_t *
>(&arg),
sizeof(arg)));
1455 logger.
debug(
"esitester",
"Sent {add_amt=" + std::to_string(arg.
addAmt) +
1456 ", input=" + std::to_string(arg.
input) +
1457 ", last=" + (arg.
last ?
"true" :
"false") +
1462 std::vector<uint32_t> results;
1463 bool lastSeen =
false;
1466 resultPort.
read(resMsg);
1468 throw std::runtime_error(
1469 "Streaming add test: unexpected result message size");
1473 lastSeen = res->
last != 0;
1474 results.push_back(res->data);
1475 logger.
debug(
"esitester",
"Received result=" + std::to_string(res->data) +
1476 " (last=" + (lastSeen ?
"true" :
"false") +
1481 if (results.size() != inputData.size())
1482 throw std::runtime_error(
1483 "Streaming add test: result size mismatch. Expected " +
1484 std::to_string(inputData.size()) +
", got " +
1485 std::to_string(results.size()));
1488 std::cout <<
"Streaming add test results:" << std::endl;
1489 for (
size_t i = 0; i < inputData.size(); ++i) {
1490 uint32_t expected = inputData[i] + addAmt;
1491 std::cout <<
" input[" << i <<
"]=" << inputData[i] <<
" + " << addAmt
1492 <<
" = " << results[i] <<
" (expected " << expected <<
")";
1493 if (results[i] != expected) {
1494 std::cout <<
" MISMATCH!";
1497 std::cout << std::endl;
1504 throw std::runtime_error(
"Streaming add test failed: result mismatch");
1506 logger.
info(
"esitester",
"Streaming add test passed");
1507 std::cout <<
"Streaming add test passed" << std::endl;
1526#pragma pack(push, 1)
1534 uint32_t *
inputData() {
return reinterpret_cast<uint32_t *
>(
this + 1); }
1536 return reinterpret_cast<const uint32_t *
>(
this + 1);
1555#pragma pack(push, 1)
1561 uint32_t *
data() {
return reinterpret_cast<uint32_t *
>(
this + 1); }
1563 return reinterpret_cast<const uint32_t *
>(
this + 1);
1577 uint32_t numItems) {
1578 Logger &logger = conn->getLogger();
1579 logger.
info(
"esitester",
1580 "Starting streaming add test (translated) with add_amt=" +
1581 std::to_string(addAmt) +
1582 ", num_items=" + std::to_string(numItems));
1585 std::mt19937 rng(0xDEADBEEF);
1586 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1587 std::vector<uint32_t> inputData;
1588 inputData.reserve(numItems);
1589 for (uint32_t i = 0; i < numItems; ++i)
1590 inputData.push_back(dist(rng));
1593 auto streamingAdderChild =
1595 if (streamingAdderChild == accel->
getChildren().end())
1596 throw std::runtime_error(
1597 "Streaming add test: no 'streaming_adder' child found");
1599 auto &ports = streamingAdderChild->second->getPorts();
1600 auto addIter = ports.find(
AppID(
"streaming_add"));
1601 if (addIter == ports.end())
1602 throw std::runtime_error(
1603 "Streaming add test: no 'streaming_add' port found");
1618 size_t allocSize = ((argSize + alignment - 1) / alignment) * alignment;
1621 throw std::bad_alloc();
1623 std::unique_ptr<void,
decltype(argDeleter)> argBuffer(argRaw, argDeleter);
1626 arg->addAmt = addAmt;
1627 for (uint32_t i = 0; i < numItems; ++i)
1628 arg->inputData()[i] = inputData[i];
1630 logger.
debug(
"esitester",
1631 "Sending translated argument: " + std::to_string(argSize) +
1632 " bytes, list_length=" + std::to_string(arg->inputLength) +
1633 ", add_amt=" + std::to_string(arg->addAmt));
1636 argPort.
write(
MessageData(
reinterpret_cast<const uint8_t *
>(arg), argSize));
1641 resultPort.
read(resMsg);
1643 logger.
debug(
"esitester",
"Received translated result: " +
1644 std::to_string(resMsg.
getSize()) +
" bytes");
1647 throw std::runtime_error(
1648 "Streaming add test (translated): result too small");
1650 const auto *result =
1655 throw std::runtime_error(
1656 "Streaming add test (translated): result data truncated");
1659 if (result->dataLength != inputData.size())
1660 throw std::runtime_error(
1661 "Streaming add test (translated): result size mismatch. Expected " +
1662 std::to_string(inputData.size()) +
", got " +
1663 std::to_string(result->dataLength));
1666 std::cout <<
"Streaming add test results:" << std::endl;
1667 for (
size_t i = 0; i < inputData.size(); ++i) {
1668 uint32_t expected = inputData[i] + addAmt;
1669 std::cout <<
" input[" << i <<
"]=" << inputData[i] <<
" + " << addAmt
1670 <<
" = " << result->
data()[i] <<
" (expected " << expected <<
")";
1671 if (result->data()[i] != expected) {
1672 std::cout <<
" MISMATCH!";
1675 std::cout << std::endl;
1682 throw std::runtime_error(
1683 "Streaming add test (translated) failed: result mismatch");
1685 logger.
info(
"esitester",
"Streaming add test passed (translated)");
1686 std::cout <<
"Streaming add test passed" << std::endl;
1697#pragma pack(push, 1)
1703static_assert(
sizeof(
Coord) == 8,
"Coord must be 8 bytes packed");
1714#pragma pack(push, 1)
1724 return reinterpret_cast<const Coord *
>(
this + 1);
1741#pragma pack(push, 1)
1749 return reinterpret_cast<const Coord *
>(
this + 1);
1762 uint32_t xTrans, uint32_t yTrans,
1763 uint32_t numCoords) {
1764 Logger &logger = conn->getLogger();
1765 logger.
info(
"esitester",
"Starting coord translate test with x_trans=" +
1766 std::to_string(xTrans) +
1767 ", y_trans=" + std::to_string(yTrans) +
1768 ", num_coords=" + std::to_string(numCoords));
1773 std::mt19937 rng(0xDEADBEEF);
1774 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1775 std::vector<Coord> inputCoords;
1776 inputCoords.reserve(numCoords);
1777 for (uint32_t i = 0; i < numCoords; ++i) {
1781 inputCoords.push_back(c);
1785 auto coordTranslatorChild =
1787 if (coordTranslatorChild == accel->
getChildren().end())
1788 throw std::runtime_error(
1789 "Coord translate test: no 'coord_translator' child found");
1791 auto &ports = coordTranslatorChild->second->getPorts();
1792 auto translateIter = ports.find(
AppID(
"translate_coords"));
1793 if (translateIter == ports.end())
1794 throw std::runtime_error(
1795 "Coord translate test: no 'translate_coords' port found");
1801 throw std::runtime_error(
1802 "Coord translate test: 'translate_coords' port not a "
1803 "FuncService::Function");
1804 funcPort->connect();
1810 size_t allocSize = ((argSize + alignment - 1) / alignment) * alignment;
1813 throw std::bad_alloc();
1815 std::unique_ptr<void,
decltype(argDeleter)> argBuffer(argRaw, argDeleter);
1818 arg->xTranslation = xTrans;
1819 arg->yTranslation = yTrans;
1820 for (uint32_t i = 0; i < numCoords; ++i)
1821 arg->coords()[i] = inputCoords[i];
1825 "Sending coord translate argument: " + std::to_string(argSize) +
1826 " bytes, coords_length=" + std::to_string(arg->coordsLength) +
1827 ", x_trans=" + std::to_string(arg->xTranslation) +
1828 ", y_trans=" + std::to_string(arg->yTranslation));
1833 ->call(
MessageData(
reinterpret_cast<const uint8_t *
>(arg), argSize))
1837 logger.
debug(
"esitester",
"Received coord translate result: " +
1838 std::to_string(resMsg.
getSize()) +
" bytes");
1841 throw std::runtime_error(
"Coord translate test: result too small");
1843 const auto *result =
1847 throw std::runtime_error(
"Coord translate test: result data truncated");
1850 if (result->coordsLength != inputCoords.size())
1851 throw std::runtime_error(
1852 "Coord translate test: result size mismatch. Expected " +
1853 std::to_string(inputCoords.size()) +
", got " +
1854 std::to_string(result->coordsLength));
1857 std::cout <<
"Coord translate test results:" << std::endl;
1858 for (
size_t i = 0; i < inputCoords.size(); ++i) {
1859 uint32_t expectedX = inputCoords[i].x + xTrans;
1860 uint32_t expectedY = inputCoords[i].y + yTrans;
1861 std::cout <<
" coord[" << i <<
"]=(" << inputCoords[i].x <<
","
1862 << inputCoords[i].y <<
") + (" << xTrans <<
"," << yTrans
1863 <<
") = (" << result->
coords()[i].
x <<
","
1864 << result->coords()[i].y <<
")";
1865 if (result->coords()[i].x != expectedX ||
1866 result->coords()[i].y != expectedY) {
1867 std::cout <<
" MISMATCH! (expected (" << expectedX <<
"," << expectedY
1871 std::cout << std::endl;
1875 throw std::runtime_error(
"Coord translate test failed: result mismatch");
1877 logger.
info(
"esitester",
"Coord translate test passed");
1878 std::cout <<
"Coord translate test passed" << std::endl;
1885#pragma pack(push, 1)
1928 coords.emplace_back(x, y);
1936 return {
reinterpret_cast<const uint8_t *
>(&
header),
sizeof(
header)};
1938 return {
reinterpret_cast<const uint8_t *
>(
coords.data()),
1941 return {
reinterpret_cast<const uint8_t *
>(&
footer),
sizeof(
footer)};
1943 throw std::out_of_range(
"SerialCoordInput: invalid segment index");
1965 coords.emplace_back(x, y);
1972 return {
reinterpret_cast<const uint8_t *
>(&
header),
sizeof(
header)};
1974 return {
reinterpret_cast<const uint8_t *
>(
coords.data()),
1977 throw std::out_of_range(
"SerialCoordBurst: invalid segment index");
1981#pragma pack(push, 1)
2019 detail::getMessageDataRef<SerialCoordOutputBatch>(*msg, scratch);
2020 const uint8_t *bytes = flat.
getBytes();
2025 while (offset < size) {
2027 size_t chunkSize = std::min(needed, size - offset);
2029 bytes + offset + chunkSize);
2030 offset += chunkSize;
2042 if (batchCount == 0) {
2044 auto batch = std::make_unique<SerialCoordOutputBatch>();
2047 decoded.push_back(std::move(batch));
2071 uint32_t yTrans, uint32_t numCoords,
2072 size_t batchSizeLimit) {
2073 Logger &logger = conn->getLogger();
2074 logger.
info(
"esitester",
"Starting Serial coord translate test");
2077 std::mt19937 rng(0xDEADBEEF);
2078 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
2079 std::vector<Coord> inputCoords;
2080 inputCoords.reserve(numCoords);
2081 for (uint32_t i = 0; i < numCoords; ++i)
2082 inputCoords.push_back({dist(rng), dist(rng)});
2086 throw std::runtime_error(
"Serial coord translate test: no "
2087 "'coord_translator_serial' child found");
2089 auto &ports = child->second->getPorts();
2090 auto portIter = ports.find(
AppID(
"translate_coords_serial"));
2091 if (portIter == ports.end())
2092 throw std::runtime_error(
2093 "Serial coord translate test: no 'translate_coords_serial' port found");
2096 portIter->second.getRawWrite(
"arg"));
2111 while (sent < numCoords) {
2112 size_t batchSize = std::min(batchSizeLimit, numCoords - sent);
2117 auto batch = std::make_unique<SerialCoordBurst>();
2118 batch->xTranslation(sent == 0 ? xTrans : 0);
2119 batch->yTranslation(sent == 0 ? yTrans : 0);
2121 for (
size_t i = 0; i < batchSize; ++i) {
2122 batch->appendCoord(inputCoords[sent + i].x, inputCoords[sent + i].y);
2124 argPort.
write(batch);
2128 auto footerBurst = std::make_unique<SerialCoordBurst>();
2129 argPort.
write(footerBurst);
2138 std::vector<uint8_t> rxBuf;
2140 while (rxBuf.size() < frameSize) {
2142 resultRaw.
read(data);
2143 rxBuf.insert(rxBuf.end(), data.getBytes(),
2144 data.getBytes() + data.getSize());
2146 std::memcpy(&out, rxBuf.data(), frameSize);
2147 rxBuf.erase(rxBuf.begin(), rxBuf.begin() + frameSize);
2150 std::vector<Coord> results;
2151 results.reserve(numCoords);
2155 uint16_t batchCount = hdr.header.coordsCount;
2156 if (batchCount == 0)
2158 for (uint16_t i = 0; i < batchCount; ++i) {
2161 results.push_back({frame.data.y, frame.data.x});
2167 std::cout <<
"Serial coord translate test results:" << std::endl;
2168 if (results.size() != inputCoords.size()) {
2169 std::cout <<
"Result size mismatch. Expected " << inputCoords.size()
2170 <<
", got " << results.size() << std::endl;
2173 for (
size_t i = 0; i < std::min(inputCoords.size(), results.size()); ++i) {
2174 uint32_t expX = inputCoords[i].x + xTrans;
2175 uint32_t expY = inputCoords[i].y + yTrans;
2176 std::cout <<
" coord[" << i <<
"]=(" << inputCoords[i].x <<
","
2177 << inputCoords[i].y <<
") + (" << xTrans <<
"," << yTrans
2178 <<
") = (" << results[i].x <<
"," << results[i].y
2179 <<
") (expected (" << expX <<
"," << expY <<
"))";
2180 if (results[i].x != expX || results[i].y != expY) {
2181 std::cout <<
" MISMATCH!";
2184 std::cout << std::endl;
2191 throw std::runtime_error(
"Serial coord translate test failed");
2193 logger.
info(
"esitester",
"Serial coord translate test passed");
2194 std::cout <<
"Serial coord translate test passed" << std::endl;
2214 uint32_t yTrans, uint32_t numCoords) {
2215 Logger &logger = conn->getLogger();
2216 logger.
info(
"esitester",
"Starting Auto serial coord translate test");
2219 std::mt19937 rng(0xDEADBEEF);
2220 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
2221 std::vector<Coord> inputCoords;
2222 inputCoords.reserve(numCoords);
2223 for (uint32_t i = 0; i < numCoords; ++i)
2224 inputCoords.push_back({dist(rng), dist(rng)});
2226 auto child = accel->
getChildren().find(
AppID(
"coord_translator_auto_serial"));
2228 throw std::runtime_error(
"Auto serial coord translate test: no "
2229 "'coord_translator_auto_serial' child found");
2231 auto &ports = child->second->getPorts();
2232 auto portIter = ports.find(
AppID(
"translate_coords_auto_serial"));
2233 if (portIter == ports.end())
2234 throw std::runtime_error(
"Auto serial coord translate test: no "
2235 "'translate_coords_auto_serial' port found");
2240 portIter->second.getRawWrite(
"arg"));
2256 auto batch = std::make_unique<SerialCoordInput>();
2257 batch->xTranslation(xTrans);
2258 batch->yTranslation(yTrans);
2259 for (uint32_t i = 0; i < numCoords; ++i)
2260 batch->appendCoord(inputCoords[i].x, inputCoords[i].y);
2261 argPort.
write(batch);
2267 std::vector<uint8_t> rxBuf;
2269 while (rxBuf.size() < frameSize) {
2271 resultRaw.
read(data);
2272 rxBuf.insert(rxBuf.end(), data.getBytes(),
2273 data.getBytes() + data.getSize());
2275 std::memcpy(&out, rxBuf.data(), frameSize);
2276 rxBuf.erase(rxBuf.begin(), rxBuf.begin() + frameSize);
2282 std::vector<Coord> results;
2283 results.reserve(numCoords);
2287 uint16_t burstCount = hdr.header.coordsCount;
2288 if (burstCount == 0)
2290 if (results.size() + burstCount > numCoords)
2291 throw std::runtime_error(
2292 "Auto serial coord translate test: bursts overflow expected total " +
2293 std::to_string(numCoords));
2294 for (uint32_t i = 0; i < burstCount; ++i) {
2297 results.push_back({frame.data.y, frame.data.x});
2300 if (results.size() != numCoords)
2301 throw std::runtime_error(
"Auto serial coord translate test: got " +
2302 std::to_string(results.size()) +
2303 " coords across all bursts " +
"(expected " +
2304 std::to_string(numCoords) +
")");
2310 std::cout <<
"Auto serial coord translate test results:" << std::endl;
2311 for (
size_t i = 0; i < inputCoords.size(); ++i) {
2312 uint32_t expX = inputCoords[i].x + xTrans;
2313 uint32_t expY = inputCoords[i].y + yTrans;
2314 std::cout <<
" coord[" << i <<
"]=(" << inputCoords[i].x <<
","
2315 << inputCoords[i].y <<
") + (" << xTrans <<
"," << yTrans
2316 <<
") = (" << results[i].x <<
"," << results[i].y
2317 <<
") (expected (" << expX <<
"," << expY <<
"))";
2318 if (results[i].x != expX || results[i].y != expY) {
2319 std::cout <<
" MISMATCH!";
2322 std::cout << std::endl;
2326 throw std::runtime_error(
"Auto serial coord translate test failed");
2328 logger.
info(
"esitester",
"Auto serial coord translate test passed");
2329 std::cout <<
"Auto serial coord translate test passed" << std::endl;
2333 uint32_t iterations) {
2334 Logger &logger = conn->getLogger();
2338 throw std::runtime_error(
"Channel test: no 'channel_test' child");
2339 auto &ports = channelChild->second->getPorts();
2342 auto cmdIter = ports.find(
AppID(
"cmd"));
2343 if (cmdIter == ports.end())
2344 throw std::runtime_error(
"Channel test: no 'cmd' port");
2347 throw std::runtime_error(
"Channel test: 'cmd' is not MMIO");
2350 auto producerIter = ports.find(
AppID(
"producer"));
2351 if (producerIter == ports.end())
2352 throw std::runtime_error(
"Channel test: no 'producer' port");
2353 auto *producerPort =
2356 throw std::runtime_error(
2357 "Channel test: 'producer' is not a ChannelService::ToHost");
2358 producerPort->connect();
2362 cmdMMIO->write(0x0, iterations);
2364 for (uint32_t i = 0; i < iterations; ++i) {
2365 MessageData recvData = producerPort->read().get();
2366 uint32_t got = *recvData.
as<uint32_t>();
2367 std::cout <<
"[channel] producer i=" << i <<
" got=" << got << std::endl;
2369 throw std::runtime_error(
"Channel producer: expected " +
2370 std::to_string(i) +
", got " +
2371 std::to_string(got));
2373 logger.
info(
"esitester",
"Channel test: producer passed (" +
2374 std::to_string(iterations) +
2375 " incrementing values)");
2378 auto loopbackInIter = ports.find(
AppID(
"loopback_in"));
2379 if (loopbackInIter == ports.end())
2380 throw std::runtime_error(
"Channel test: no 'loopback_in' port");
2381 auto *fromHostPort =
2384 throw std::runtime_error(
2385 "Channel test: 'loopback_in' is not a ChannelService::FromHost");
2386 fromHostPort->connect();
2388 auto loopbackOutIter = ports.find(
AppID(
"loopback_out"));
2389 if (loopbackOutIter == ports.end())
2390 throw std::runtime_error(
"Channel test: no 'loopback_out' port");
2391 auto *loopbackOutPort =
2393 if (!loopbackOutPort)
2394 throw std::runtime_error(
2395 "Channel test: 'loopback_out' is not a ChannelService::ToHost");
2396 loopbackOutPort->connect();
2398 std::mt19937_64 rng(0xDEADBEEF);
2399 std::uniform_int_distribution<uint32_t> dist(0, UINT32_MAX);
2401 for (uint32_t i = 0; i < iterations; ++i) {
2402 uint32_t sendVal = dist(rng);
2404 MessageData recvData = loopbackOutPort->read().get();
2405 uint32_t recvVal = *recvData.
as<uint32_t>();
2406 std::cout <<
"[channel] loopback i=" << i <<
" sent=0x"
2409 if (recvVal != sendVal)
2410 throw std::runtime_error(
"Channel loopback mismatch at i=" +
2414 logger.
info(
"esitester",
"Channel test: loopback passed (" +
2415 std::to_string(iterations) +
" iterations)");
2416 std::cout <<
"Channel test passed" << std::endl;
static void print(TypedAttr val, llvm::raw_ostream &os)
TypeDeserializer(OutputCallback output)
Base::OutputCallback OutputCallback
std::vector< uint8_t > partialFrameBytes
Base::DecodedOutputs DecodedOutputs
DecodedOutputs decode(std::unique_ptr< SegmentedMessageData > &msg) override
Decode one raw message into zero or more typed outputs.
std::vector< Coord > accumulated
Abstract class representing a connection to an accelerator.
Top level accelerator class.
Services provide connections to 'bundles' – collections of named, unidirectional communication channe...
T * getAs() const
Cast this Bundle port to a subclass which is actually useful.
ReadChannelPort & getRawRead(const std::string &name) const
WriteChannelPort & getRawWrite(const std::string &name) const
Get access to the raw byte streams of a channel.
Common options and code for ESI runtime tools.
Context & getContext()
Get the context.
AcceleratorConnection * connect()
Connect to the accelerator using the specified backend and connection.
int esiParse(int argc, const char **argv)
Run the parser.
AcceleratorConnections, Accelerators, and Manifests must all share a context.
const std::map< AppID, Instance * > & getChildren() const
Access the module's children by ID.
virtual void error(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an error.
virtual void info(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an informational message.
void debug(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report a debug message.
Class to parse a manifest.
Accelerator * buildAccelerator(AcceleratorConnection &acc) const
A concrete flat message backed by a single vector of bytes.
const uint8_t * getBytes() const
const T * as() const
Cast to a type.
size_t getSize() const
Get the size of the data in bytes.
static MessageData from(T &t)
Cast from a type to its raw bytes.
Helper base class for stateful deserializers which may emit zero, one, or many typed outputs for each...
detail::TypedReadOwnedCallback< SerialCoordOutputBatch > OutputCallback
std::vector< std::unique_ptr< SerialCoordOutputBatch > > DecodedOutputs
A ChannelPort which reads data from the accelerator.
virtual void connect(ReadCallback callback, const ConnectOptions &options={})
virtual void disconnect() override
Disconnect the channel.
virtual void read(MessageData &outData)
Specify a buffer to read into.
Abstract multi-segment message.
void connect(const ChannelPort::ConnectOptions &opts={std::nullopt, false})
void write(const T &data)
A ChannelPort which sends data to the accelerator.
virtual void disconnect() override
void write(const MessageData &data)
A very basic blocking write API.
bool tryWrite(const MessageData &data)
A basic non-blocking write API.
virtual void connect(const ConnectOptions &options={}) override
Set up a connection to the accelerator.
A function call which gets attached to a service port.
A port which writes data to the accelerator (from_host).
A port which reads data from the accelerator (to_host).
A function call which gets attached to a service port.
virtual void start()
In cases where necessary, enable host memory services.
A "slice" of some parent MMIO space.
Information about the Accelerator system.
A telemetry port which gets attached to a service port.
void connect()
Connect to a particular telemetry port. Offset should be non-nullopt.
static void * alignedAllocCompat(std::size_t alignment, std::size_t size)
static void hostmemWriteTest(Accelerator *acc, esi::services::HostMem::HostMemRegion ®ion, uint32_t width)
Test the hostmem write functionality.
static void aggregateHostmemBandwidthTest(AcceleratorConnection *, Accelerator *, uint32_t width, uint32_t xferCount, bool read, bool write)
static void dmaTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool read, bool write)
static void hostmemBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, uint32_t xferCount, const std::vector< uint32_t > &widths, bool read, bool write)
static void callbackTest(AcceleratorConnection *, Accelerator *, uint32_t iterations)
static void bandwidthTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, uint32_t xferCount, bool read, bool write)
static void serialCoordTranslateTest(AcceleratorConnection *, Accelerator *, uint32_t xTrans, uint32_t yTrans, uint32_t numCoords, size_t batchSizeLimit)
constexpr std::array< uint32_t, 5 > defaultWidths
static void hostmemReadBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion ®ion, uint32_t width, uint32_t xferCount)
static void bandwidthReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static void channelTest(AcceleratorConnection *, Accelerator *, uint32_t iterations)
static std::string formatBandwidth(double bytesPerSec)
static void autoSerialCoordTranslateTest(AcceleratorConnection *, Accelerator *, uint32_t xTrans, uint32_t yTrans, uint32_t numCoords)
static void hostmemWriteBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion ®ion, uint32_t width, uint32_t xferCount)
static void alignedFreeCompat(void *ptr)
static void dmaWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void bandwidthWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static std::string humanBytes(uint64_t bytes)
static void streamingAddTest(AcceleratorConnection *, Accelerator *, uint32_t addAmt, uint32_t numItems)
Test the StreamingAdder module.
static void loopbackAddTest(AcceleratorConnection *, Accelerator *, uint32_t iterations, bool pipeline)
static void dmaReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void streamingAddTranslatedTest(AcceleratorConnection *, Accelerator *, uint32_t addAmt, uint32_t numItems)
static void hostmemTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool write, bool read)
static std::string humanTimeUS(uint64_t us)
static void coordTranslateTest(AcceleratorConnection *, Accelerator *, uint32_t xTrans, uint32_t yTrans, uint32_t numCoords)
static void resetTest(AcceleratorConnection *, Accelerator *)
static std::string defaultWidthsStr()
static void hostmemReadTest(Accelerator *acc, esi::services::HostMem::HostMemRegion ®ion, uint32_t width)
std::string toString(const std::any &a)
'Stringify' a std::any. This is used to log std::any values by some loggers.
std::string toHex(void *val)
Translated argument struct for CoordTranslator.
std::span< const Coord > coordsSpan() const
const Coord * coords() const
static size_t allocSize(size_t numCoords)
Coord * coords()
Get pointer to trailing coords array.
std::span< Coord > coordsSpan()
Get span view of coords (requires coordsLength to be set first).
Translated result struct for CoordTranslator.
static size_t allocSize(size_t numCoords)
std::span< Coord > coordsSpan()
Get span view of coords (requires coordsLength to be set first).
const Coord * coords() const
Coord * coords()
Get pointer to trailing coords array.
std::span< const Coord > coordsSpan() const
Test the CoordTranslator module using message translation.
void yTranslation(uint32_t yTrans)
void appendCoord(uint32_t x, uint32_t y)
std::vector< SerialCoordData > coords
void xTranslation(uint32_t xTrans)
Segment segment(size_t idx) const override
Get a segment by index.
size_t numSegments() const override
Number of segments in the message.
SerialCoordData(uint32_t x, uint32_t y)
Deserialized result batch from the serial coord translator.
std::vector< Coord > coords
Packed struct representing a parallel window argument for StreamingAdder.
Packed struct representing a parallel window result for StreamingAdder.
Test the StreamingAdder module using message translation.
uint32_t * inputData()
Get pointer to trailing input data array.
static size_t allocSize(size_t numItems)
std::span< uint32_t > inputDataSpan()
Get span view of input data (requires inputLength to be set first).
std::span< const uint32_t > inputDataSpan() const
const uint32_t * inputData() const
Translated result struct for StreamingAdder.
uint32_t * data()
Get pointer to trailing result data array.
std::span< uint32_t > dataSpan()
Get span view of result data (requires dataLength to be set first).
static size_t allocSize(size_t numItems)
std::span< const uint32_t > dataSpan() const
const uint32_t * data() const
A contiguous, non-owning view of bytes within a SegmentedMessageData.
RAII memory region for host memory.
virtual void * getDevicePtr() const
Sometimes the pointer the device sees is different from the pointer the host sees.
virtual void * getPtr() const =0
Get a pointer to the host memory.
virtual void flush()
Flush the memory region to ensure that the device sees the latest contents.
virtual std::size_t getSize() const =0
SerialCoordOutputData data
SerialCoordOutputHeader header