CIRCT 22.0.0git
Loading...
Searching...
No Matches
esitester.cpp
Go to the documentation of this file.
1//===- esitester.cpp - ESI accelerator test/example tool ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// DO NOT EDIT!
10// This file is distributed as part of an ESI runtime package. The source for
11// this file should always be modified within CIRCT
12// (lib/dialect/ESI/runtime/cpp/tools/esitester.cpp).
13//
14//===----------------------------------------------------------------------===//
15//
16// This application isn't a utility so much as a test driver for an ESI system.
17// It is also useful as an example of how to use the ESI C++ API. esiquery.cpp
18// is also useful as an example.
19//
20//===----------------------------------------------------------------------===//
21
22#include "esi/Accelerator.h"
23#include "esi/CLI.h"
24#include "esi/Manifest.h"
25#include "esi/Services.h"
26
27#include <atomic>
28#include <chrono>
29#include <cstdlib>
30#include <future>
31#include <iostream>
32#include <map>
33#include <memory>
34#include <random>
35#include <span>
36#include <sstream>
37#include <stdexcept>
38#include <vector>
39
40using namespace esi;
41
42// Forward declarations of test functions.
44 uint32_t iterations);
46 const std::vector<uint32_t> &widths, bool write,
47 bool read);
49 uint32_t xferCount,
50 const std::vector<uint32_t> &widths, bool read,
51 bool write);
53 const std::vector<uint32_t> &widths, bool read, bool write);
55 const std::vector<uint32_t> &widths,
56 uint32_t xferCount, bool read, bool write);
58 uint32_t iterations, bool pipeline);
60 Accelerator *, uint32_t width,
61 uint32_t xferCount, bool read,
62 bool write);
64 uint32_t addAmt, uint32_t numItems);
66 uint32_t addAmt, uint32_t numItems);
68 uint32_t xTrans, uint32_t yTrans,
69 uint32_t numCoords);
70
71// Default widths and default widths string for CLI help text.
72constexpr std::array<uint32_t, 5> defaultWidths = {32, 64, 128, 256, 512};
73static std::string defaultWidthsStr() {
74 std::string s;
75 for (size_t i = 0; i < defaultWidths.size(); ++i) {
76 s += std::to_string(defaultWidths[i]);
77 if (i + 1 < defaultWidths.size())
78 s += ",";
79 }
80 return s;
81}
82
83// Helper to format bandwidth with appropriate units.
84static std::string formatBandwidth(double bytesPerSec) {
85 const char *unit = "B/s";
86 double value = bytesPerSec;
87 if (bytesPerSec >= 1e9) {
88 unit = "GB/s";
89 value = bytesPerSec / 1e9;
90 } else if (bytesPerSec >= 1e6) {
91 unit = "MB/s";
92 value = bytesPerSec / 1e6;
93 } else if (bytesPerSec >= 1e3) {
94 unit = "KB/s";
95 value = bytesPerSec / 1e3;
96 }
97 std::ostringstream oss;
98 oss.setf(std::ios::fixed);
99 oss.precision(2);
100 oss << value << " " << unit;
101 return oss.str();
102}
103
104// Human-readable size from bytes.
105static std::string humanBytes(uint64_t bytes) {
106 const char *units[] = {"B", "KB", "MB", "GB", "TB"};
107 double v = (double)bytes;
108 int u = 0;
109 while (v >= 1024.0 && u < 4) {
110 v /= 1024.0;
111 ++u;
112 }
113 std::ostringstream oss;
114 oss.setf(std::ios::fixed);
115 oss.precision(u == 0 ? 0 : 2);
116 oss << v << " " << units[u];
117 return oss.str();
118}
119
120// Human-readable time from microseconds.
121static std::string humanTimeUS(uint64_t us) {
122 if (us < 1000)
123 return std::to_string(us) + " us";
124 double ms = us / 1000.0;
125 if (ms < 1000.0) {
126 std::ostringstream oss;
127 oss.setf(std::ios::fixed);
128 oss.precision(ms < 10.0 ? 2 : (ms < 100.0 ? 1 : 0));
129 oss << ms << " ms";
130 return oss.str();
131 }
132 double sec = ms / 1000.0;
133 std::ostringstream oss;
134 oss.setf(std::ios::fixed);
135 oss.precision(sec < 10.0 ? 3 : 2);
136 oss << sec << " s";
137 return oss.str();
138}
139
140// MSVC does not implement std::aligned_malloc, even though it's part of the
141// C++17 standard. Provide a compatibility layer.
142static void *alignedAllocCompat(std::size_t alignment, std::size_t size) {
143#if defined(_MSC_VER)
144 void *ptr = _aligned_malloc(size, alignment);
145 if (!ptr)
146 throw std::bad_alloc();
147 return ptr;
148#else
149 void *ptr = std::aligned_alloc(alignment, size);
150 if (!ptr)
151 throw std::bad_alloc();
152 return ptr;
153#endif
154}
155
156static void alignedFreeCompat(void *ptr) {
157#if defined(_MSC_VER)
158 _aligned_free(ptr);
159#else
160 std::free(ptr);
161#endif
162}
163
164int main(int argc, const char *argv[]) {
165 CliParser cli("esitester");
166 cli.description("Test an ESI system running the ESI tester image.");
167 cli.require_subcommand(1);
168
169 CLI::App *callback_test =
170 cli.add_subcommand("callback", "initiate callback test");
171 uint32_t cb_iters = 1;
172 callback_test->add_option("-i,--iters", cb_iters,
173 "Number of iterations to run");
174
175 CLI::App *hostmemtestSub =
176 cli.add_subcommand("hostmem", "Run the host memory test");
177 bool hmRead = false;
178 bool hmWrite = false;
179 std::vector<uint32_t> hostmemWidths(defaultWidths.begin(),
180 defaultWidths.end());
181 hostmemtestSub->add_flag("-w,--write", hmWrite,
182 "Enable host memory write test");
183 hostmemtestSub->add_flag("-r,--read", hmRead, "Enable host memory read test");
184 hostmemtestSub->add_option(
185 "--widths", hostmemWidths,
186 "Hostmem test widths (default: " + defaultWidthsStr() + ")");
187
188 CLI::App *dmatestSub = cli.add_subcommand("dma", "Run the DMA test");
189 bool dmaRead = false;
190 bool dmaWrite = false;
191 std::vector<uint32_t> dmaWidths(defaultWidths.begin(), defaultWidths.end());
192 dmatestSub->add_flag("-w,--write", dmaWrite, "Enable dma write test");
193 dmatestSub->add_flag("-r,--read", dmaRead, "Enable dma read test");
194 dmatestSub->add_option("--widths", dmaWidths,
195 "DMA test widths (default: " + defaultWidthsStr() +
196 ")");
197
198 CLI::App *bandwidthSub =
199 cli.add_subcommand("bandwidth", "Run the bandwidth test");
200 uint32_t xferCount = 1000;
201 bandwidthSub->add_option("-c,--count", xferCount,
202 "Number of transfers to perform");
203 bool bandwidthRead = false;
204 bool bandwidthWrite = false;
205 std::vector<uint32_t> bandwidthWidths(defaultWidths.begin(),
206 defaultWidths.end());
207 bandwidthSub->add_option("--widths", bandwidthWidths,
208 "Width of the transfers to perform (default: " +
209 defaultWidthsStr() + ")");
210 bandwidthSub->add_flag("-w,--write", bandwidthWrite,
211 "Enable bandwidth write");
212 bandwidthSub->add_flag("-r,--read", bandwidthRead, "Enable bandwidth read");
213
214 CLI::App *hostmembwSub =
215 cli.add_subcommand("hostmembw", "Run the host memory bandwidth test");
216 uint32_t hmBwCount = 1000;
217 bool hmBwRead = false;
218 bool hmBwWrite = false;
219 std::vector<uint32_t> hmBwWidths(defaultWidths.begin(), defaultWidths.end());
220 hostmembwSub->add_option("-c,--count", hmBwCount,
221 "Number of hostmem transfers");
222 hostmembwSub->add_option(
223 "--widths", hmBwWidths,
224 "Hostmem bandwidth widths (default: " + defaultWidthsStr() + ")");
225 hostmembwSub->add_flag("-w,--write", hmBwWrite,
226 "Measure hostmem write bandwidth");
227 hostmembwSub->add_flag("-r,--read", hmBwRead,
228 "Measure hostmem read bandwidth");
229
230 CLI::App *loopbackSub =
231 cli.add_subcommand("loopback", "Test LoopbackInOutAdd function service");
232 uint32_t loopbackIters = 10;
233 bool loopbackPipeline = false;
234 loopbackSub->add_option("-i,--iters", loopbackIters,
235 "Number of function invocations (default 10)");
236 loopbackSub->add_flag("-p,--pipeline", loopbackPipeline,
237 "Pipeline all calls then collect results");
238
239 CLI::App *aggBwSub = cli.add_subcommand(
240 "aggbandwidth",
241 "Aggregate hostmem bandwidth across four units (readmem*, writemem*)");
242 uint32_t aggWidth = 512;
243 uint32_t aggCount = 1000;
244 bool aggRead = false;
245 bool aggWrite = false;
246 aggBwSub->add_option(
247 "--width", aggWidth,
248 "Bit width (default 512; other widths ignored if absent)");
249 aggBwSub->add_option("-c,--count", aggCount, "Flits per unit (default 1000)");
250 aggBwSub->add_flag("-r,--read", aggRead, "Include read units");
251 aggBwSub->add_flag("-w,--write", aggWrite, "Include write units");
252
253 CLI::App *streamingAddSub = cli.add_subcommand(
254 "streaming_add", "Test StreamingAdder function service with list input");
255 uint32_t streamingAddAmt = 5;
256 uint32_t streamingNumItems = 5;
257 bool streamingTranslate = false;
258 streamingAddSub->add_option("-a,--add", streamingAddAmt,
259 "Amount to add to each element (default 5)");
260 streamingAddSub->add_option("-n,--num-items", streamingNumItems,
261 "Number of random items in the list (default 5)");
262 streamingAddSub->add_flag("-t,--translate", streamingTranslate,
263 "Use message translation (list translation)");
264
265 CLI::App *coordTranslateSub = cli.add_subcommand(
266 "translate_coords",
267 "Test CoordTranslator function service with list of coordinates");
268 uint32_t coordXTrans = 10;
269 uint32_t coordYTrans = 20;
270 uint32_t coordNumItems = 5;
271 coordTranslateSub->add_option("-x,--x-translation", coordXTrans,
272 "X translation amount (default 10)");
273 coordTranslateSub->add_option("-y,--y-translation", coordYTrans,
274 "Y translation amount (default 20)");
275 coordTranslateSub->add_option("-n,--num-coords", coordNumItems,
276 "Number of random coordinates (default 5)");
277
278 if (int rc = cli.esiParse(argc, argv))
279 return rc;
280 if (!cli.get_help_ptr()->empty())
281 return 0;
282
283 Context &ctxt = cli.getContext();
284 AcceleratorConnection *acc = cli.connect();
285 try {
286 const auto &info = *acc->getService<services::SysInfo>();
287 ctxt.getLogger().info("esitester", "Connected to accelerator.");
288 Manifest manifest(ctxt, info.getJsonManifest());
289 Accelerator *accel = manifest.buildAccelerator(*acc);
290 ctxt.getLogger().info("esitester", "Built accelerator.");
291 acc->getServiceThread()->addPoll(*accel);
292
293 if (*callback_test) {
294 callbackTest(acc, accel, cb_iters);
295 } else if (*hostmemtestSub) {
296 hostmemTest(acc, accel, hostmemWidths, hmWrite, hmRead);
297 } else if (*loopbackSub) {
298 loopbackAddTest(acc, accel, loopbackIters, loopbackPipeline);
299 } else if (*dmatestSub) {
300 dmaTest(acc, accel, dmaWidths, dmaRead, dmaWrite);
301 } else if (*bandwidthSub) {
302 bandwidthTest(acc, accel, bandwidthWidths, xferCount, bandwidthRead,
303 bandwidthWrite);
304 } else if (*hostmembwSub) {
305 hostmemBandwidthTest(acc, accel, hmBwCount, hmBwWidths, hmBwRead,
306 hmBwWrite);
307 } else if (*aggBwSub) {
308 aggregateHostmemBandwidthTest(acc, accel, aggWidth, aggCount, aggRead,
309 aggWrite);
310 } else if (*streamingAddSub) {
311 if (streamingTranslate)
312 streamingAddTranslatedTest(acc, accel, streamingAddAmt,
313 streamingNumItems);
314 else
315 streamingAddTest(acc, accel, streamingAddAmt, streamingNumItems);
316 } else if (*coordTranslateSub) {
317 coordTranslateTest(acc, accel, coordXTrans, coordYTrans, coordNumItems);
318 }
319
320 acc->disconnect();
321 } catch (std::exception &e) {
322 ctxt.getLogger().error("esitester", e.what());
323 acc->disconnect();
324 return -1;
325 }
326 std::cout << "Exiting successfully\n";
327 return 0;
328}
329
331 uint32_t iterations) {
332 auto cb_test = accel->getChildren().find(AppID("cb_test"));
333 if (cb_test == accel->getChildren().end())
334 throw std::runtime_error("No cb_test child found in accelerator");
335 auto &ports = cb_test->second->getPorts();
336 auto cmd_port = ports.find(AppID("cmd"));
337 if (cmd_port == ports.end())
338 throw std::runtime_error("No cmd port found in cb_test child");
339 auto *cmdMMIO = cmd_port->second.getAs<services::MMIO::MMIORegion>();
340 if (!cmdMMIO)
341 throw std::runtime_error("cb_test cmd port is not MMIO");
342
343 auto f = ports.find(AppID("cb"));
344 if (f == ports.end())
345 throw std::runtime_error("No cb port found in accelerator");
346
347 auto *callPort = f->second.getAs<services::CallService::Callback>();
348 if (!callPort)
349 throw std::runtime_error("cb port is not a CallService::Callback");
350
351 std::atomic<uint32_t> callbackCount = 0;
352 callPort->connect(
353 [conn, &callbackCount](const MessageData &data) mutable -> MessageData {
354 callbackCount.fetch_add(1);
355 conn->getLogger().debug(
356 [&](std::string &subsystem, std::string &msg,
357 std::unique_ptr<std::map<std::string, std::any>> &details) {
358 subsystem = "ESITESTER";
359 msg = "Received callback";
360 details = std::make_unique<std::map<std::string, std::any>>();
361 details->emplace("data", data);
362 });
363 std::cout << "callback: " << *data.as<uint64_t>() << std::endl;
364 return MessageData();
365 },
366 true);
367
368 for (uint32_t i = 0; i < iterations; ++i) {
369 conn->getLogger().info("esitester", "Issuing callback command iteration " +
370 std::to_string(i) + "/" +
371 std::to_string(iterations));
372 cmdMMIO->write(0x10, i); // Command the callback
373 // Wait up to 1 second for the callback to be invoked.
374 for (uint32_t wait = 0; wait < 1000; ++wait) {
375 if (callbackCount.load() > i)
376 break;
377 std::this_thread::sleep_for(std::chrono::milliseconds(1));
378 }
379 if (callbackCount.load() <= i)
380 throw std::runtime_error("Callback test failed. No callback received");
381 }
382}
383
384/// Test the hostmem write functionality.
387 uint32_t width) {
388 std::cout << "Running hostmem WRITE test with width " << width << std::endl;
389 uint64_t *dataPtr = static_cast<uint64_t *>(region.getPtr());
390 auto check = [&](bool print) {
391 bool ret = true;
392 for (size_t i = 0; i < 9; ++i) {
393 if (print)
394 printf("[write] dataPtr[%zu] = 0x%016lx\n", i, dataPtr[i]);
395 if (i < (width + 63) / 64 && dataPtr[i] == 0xFFFFFFFFFFFFFFFFull)
396 ret = false;
397 }
398 return ret;
399 };
400
401 auto writeMemChildIter = acc->getChildren().find(AppID("writemem", width));
402 if (writeMemChildIter == acc->getChildren().end())
403 throw std::runtime_error(
404 "hostmem write test failed. No writemem child found");
405 auto &writeMemPorts = writeMemChildIter->second->getPorts();
406
407 auto cmdPortIter = writeMemPorts.find(AppID("cmd", width));
408 if (cmdPortIter == writeMemPorts.end())
409 throw std::runtime_error(
410 "hostmem write test failed. No (cmd,width) MMIO port");
411 auto *cmdMMIO = cmdPortIter->second.getAs<services::MMIO::MMIORegion>();
412 if (!cmdMMIO)
413 throw std::runtime_error(
414 "hostmem write test failed. (cmd,width) port not MMIO");
415
416 auto issuedPortIter = writeMemPorts.find(AppID("addrCmdIssued"));
417 if (issuedPortIter == writeMemPorts.end())
418 throw std::runtime_error(
419 "hostmem write test failed. addrCmdIssued missing");
420 auto *addrCmdIssuedPort =
421 issuedPortIter->second.getAs<services::TelemetryService::Metric>();
422 if (!addrCmdIssuedPort)
423 throw std::runtime_error(
424 "hostmem write test failed. addrCmdIssued not telemetry");
425 addrCmdIssuedPort->connect();
426
427 auto responsesPortIter = writeMemPorts.find(AppID("addrCmdResponses"));
428 if (responsesPortIter == writeMemPorts.end())
429 throw std::runtime_error(
430 "hostmem write test failed. addrCmdResponses missing");
431 auto *addrCmdResponsesPort =
432 responsesPortIter->second.getAs<services::TelemetryService::Metric>();
433 if (!addrCmdResponsesPort)
434 throw std::runtime_error(
435 "hostmem write test failed. addrCmdResponses not telemetry");
436 addrCmdResponsesPort->connect();
437
438 for (size_t i = 0, e = 9; i < e; ++i)
439 dataPtr[i] = 0xFFFFFFFFFFFFFFFFull;
440 region.flush();
441 cmdMMIO->write(0x10, reinterpret_cast<uint64_t>(region.getDevicePtr()));
442 cmdMMIO->write(0x18, 1);
443 cmdMMIO->write(0x20, 1);
444 bool done = false;
445 for (int i = 0; i < 100; ++i) {
446 auto issued = addrCmdIssuedPort->readInt();
447 auto responses = addrCmdResponsesPort->readInt();
448 if (issued == 1 && responses == 1) {
449 done = true;
450 break;
451 }
452 std::this_thread::sleep_for(std::chrono::microseconds(100));
453 }
454 if (!done) {
455 check(true);
456 throw std::runtime_error("hostmem write test (" + std::to_string(width) +
457 " bits) timeout waiting for completion");
458 }
459 if (!check(true))
460 throw std::runtime_error("hostmem write test failed (" +
461 std::to_string(width) + " bits)");
462}
463
466 uint32_t width) {
467 std::cout << "Running hostmem READ test with width " << width << std::endl;
468 auto readMemChildIter = acc->getChildren().find(AppID("readmem", width));
469 if (readMemChildIter == acc->getChildren().end())
470 throw std::runtime_error(
471 "hostmem read test failed. No readmem child found");
472
473 auto &readMemPorts = readMemChildIter->second->getPorts();
474 auto addrCmdPortIter = readMemPorts.find(AppID("cmd", width));
475 if (addrCmdPortIter == readMemPorts.end())
476 throw std::runtime_error(
477 "hostmem read test failed. No AddressCommand MMIO port");
478 auto *addrCmdMMIO =
479 addrCmdPortIter->second.getAs<services::MMIO::MMIORegion>();
480 if (!addrCmdMMIO)
481 throw std::runtime_error(
482 "hostmem read test failed. AddressCommand port not MMIO");
483
484 auto lastReadPortIter = readMemPorts.find(AppID("lastReadLSB"));
485 if (lastReadPortIter == readMemPorts.end())
486 throw std::runtime_error("hostmem read test failed. lastReadLSB missing");
487 auto *lastReadPort =
488 lastReadPortIter->second.getAs<services::TelemetryService::Metric>();
489 if (!lastReadPort)
490 throw std::runtime_error(
491 "hostmem read test failed. lastReadLSB not telemetry");
492 lastReadPort->connect();
493
494 auto issuedPortIter = readMemPorts.find(AppID("addrCmdIssued"));
495 if (issuedPortIter == readMemPorts.end())
496 throw std::runtime_error("hostmem read test failed. addrCmdIssued missing");
497 auto *addrCmdIssuedPort =
498 issuedPortIter->second.getAs<services::TelemetryService::Metric>();
499 if (!addrCmdIssuedPort)
500 throw std::runtime_error(
501 "hostmem read test failed. addrCmdIssued not telemetry");
502 addrCmdIssuedPort->connect();
503
504 auto responsesPortIter = readMemPorts.find(AppID("addrCmdResponses"));
505 if (responsesPortIter == readMemPorts.end())
506 throw std::runtime_error(
507 "hostmem read test failed. addrCmdResponses missing");
508 auto *addrCmdResponsesPort =
509 responsesPortIter->second.getAs<services::TelemetryService::Metric>();
510 if (!addrCmdResponsesPort)
511 throw std::runtime_error(
512 "hostmem read test failed. addrCmdResponses not telemetry");
513 addrCmdResponsesPort->connect();
514
515 for (size_t i = 0; i < 8; ++i) {
516 auto *dataPtr = static_cast<uint64_t *>(region.getPtr());
517 dataPtr[0] = 0x12345678ull << i;
518 dataPtr[1] = 0xDEADBEEFull << i;
519 region.flush();
520 addrCmdMMIO->write(0x10, reinterpret_cast<uint64_t>(region.getDevicePtr()));
521 addrCmdMMIO->write(0x18, 1);
522 addrCmdMMIO->write(0x20, 1);
523 bool done = false;
524 for (int waitLoop = 0; waitLoop < 100; ++waitLoop) {
525 auto issued = addrCmdIssuedPort->readInt();
526 auto responses = addrCmdResponsesPort->readInt();
527 if (issued == 1 && responses == 1) {
528 done = true;
529 break;
530 }
531 std::this_thread::sleep_for(std::chrono::milliseconds(10));
532 }
533 if (!done)
534 throw std::runtime_error("hostmem read (" + std::to_string(width) +
535 " bits) timeout waiting for completion");
536 uint64_t captured = lastReadPort->readInt();
537 uint64_t expected = dataPtr[0];
538 if (width < 64)
539 expected &= ((1ull << width) - 1);
540 if (captured != expected)
541 throw std::runtime_error("hostmem read test (" + std::to_string(width) +
542 " bits) failed. Expected " +
543 esi::toHex(expected) + ", got " +
544 esi::toHex(captured));
545 }
546}
547
549 const std::vector<uint32_t> &widths, bool write,
550 bool read) {
551 // Enable the host memory service.
552 auto hostmem = conn->getService<services::HostMem>();
553 hostmem->start();
554 auto scratchRegion = hostmem->allocate(/*size(bytes)=*/1024 * 1024,
555 /*memOpts=*/{.writeable = true});
556 uint64_t *dataPtr = static_cast<uint64_t *>(scratchRegion->getPtr());
557 conn->getLogger().info("esitester",
558 "Running host memory test with region size " +
559 std::to_string(scratchRegion->getSize()) +
560 " bytes at 0x" + toHex(dataPtr));
561 for (size_t i = 0; i < scratchRegion->getSize() / 8; ++i)
562 dataPtr[i] = 0;
563 scratchRegion->flush();
564
565 bool passed = true;
566 for (size_t width : widths) {
567 try {
568 if (write)
569 hostmemWriteTest(acc, *scratchRegion, width);
570 if (read)
571 hostmemReadTest(acc, *scratchRegion, width);
572 } catch (std::exception &e) {
573 conn->getLogger().error("esitester", "Hostmem test failed for width " +
574 std::to_string(width) + ": " +
575 e.what());
576 passed = false;
577 }
578 }
579 if (!passed)
580 throw std::runtime_error("Hostmem test failed");
581 std::cout << "Hostmem test passed" << std::endl;
582}
583
585 size_t width) {
586 Logger &logger = conn->getLogger();
587 logger.info("esitester",
588 "== Running DMA read test with width " + std::to_string(width));
589 AppIDPath lastPath;
590 BundlePort *toHostMMIOPort =
591 acc->resolvePort({AppID("tohostdma", width), AppID("cmd")}, lastPath);
592 if (!toHostMMIOPort)
593 throw std::runtime_error("dma read test failed. No tohostdma[" +
594 std::to_string(width) + "] found");
595 auto *toHostMMIO = toHostMMIOPort->getAs<services::MMIO::MMIORegion>();
596 if (!toHostMMIO)
597 throw std::runtime_error("dma read test failed. MMIO port is not MMIO");
598 lastPath.clear();
599 BundlePort *outPortBundle =
600 acc->resolvePort({AppID("tohostdma", width), AppID("out")}, lastPath);
601 ReadChannelPort &outPort = outPortBundle->getRawRead("data");
602 outPort.connect();
603
604 size_t xferCount = 24;
605 uint64_t last = 0;
606 MessageData data;
607 toHostMMIO->write(0, xferCount);
608 for (size_t i = 0; i < xferCount; ++i) {
609 outPort.read(data);
610 if (width == 64) {
611 uint64_t val = *data.as<uint64_t>();
612 if (val < last)
613 throw std::runtime_error("dma read test failed. Out of order data");
614 last = val;
615 }
616 logger.debug("esitester",
617 "Cycle count [" + std::to_string(i) + "] = 0x" + data.toHex());
618 }
619 outPort.disconnect();
620 std::cout << " DMA read test for " << width << " bits passed" << std::endl;
621}
622
624 size_t width) {
625 Logger &logger = conn->getLogger();
626 logger.info("esitester",
627 "Running DMA write test with width " + std::to_string(width));
628 AppIDPath lastPath;
629 BundlePort *fromHostMMIOPort =
630 acc->resolvePort({AppID("fromhostdma", width), AppID("cmd")}, lastPath);
631 if (!fromHostMMIOPort)
632 throw std::runtime_error("dma read test for " + toString(width) +
633 " bits failed. No fromhostdma[" +
634 std::to_string(width) + "] found");
635 auto *fromHostMMIO = fromHostMMIOPort->getAs<services::MMIO::MMIORegion>();
636 if (!fromHostMMIO)
637 throw std::runtime_error("dma write test for " + toString(width) +
638 " bits failed. MMIO port is not MMIO");
639 lastPath.clear();
640 BundlePort *outPortBundle =
641 acc->resolvePort({AppID("fromhostdma", width), AppID("in")}, lastPath);
642 if (!outPortBundle)
643 throw std::runtime_error("dma write test for " + toString(width) +
644 " bits failed. No out port found");
645 WriteChannelPort &writePort = outPortBundle->getRawWrite("data");
647
648 size_t xferCount = 24;
649 uint8_t *data = new uint8_t[width];
650 for (size_t i = 0; i < width / 8; ++i)
651 data[i] = 0;
652 fromHostMMIO->read(8);
653 fromHostMMIO->write(0, xferCount);
654 for (size_t i = 1; i < xferCount + 1; ++i) {
655 data[0] = i;
656 bool successWrite;
657 size_t attempts = 0;
658 do {
659 successWrite = writePort.tryWrite(MessageData(data, width / 8));
660 if (!successWrite) {
661 std::this_thread::sleep_for(std::chrono::milliseconds(10));
662 }
663 } while (!successWrite && ++attempts < 100);
664 if (!successWrite)
665 throw std::runtime_error("dma write test for " + toString(width) +
666 " bits failed. Write failed");
667 uint64_t lastReadMMIO;
668 for (size_t a = 0; a < 20; ++a) {
669 lastReadMMIO = fromHostMMIO->read(8);
670 if (lastReadMMIO == i)
671 break;
672 std::this_thread::sleep_for(std::chrono::milliseconds(10));
673 if (a >= 19)
674 throw std::runtime_error("dma write for " + toString(width) +
675 " bits test failed. Read from MMIO failed");
676 }
677 }
678 writePort.disconnect();
679 delete[] data;
680 std::cout << " DMA write test for " << width << " bits passed" << std::endl;
681}
682
684 const std::vector<uint32_t> &widths, bool read,
685 bool write) {
686 bool success = true;
687 if (write)
688 for (size_t width : widths)
689 try {
690 dmaWriteTest(conn, acc, width);
691 } catch (std::exception &e) {
692 success = false;
693 std::cerr << "DMA write test for " << width
694 << " bits failed: " << e.what() << std::endl;
695 }
696 if (read)
697 for (size_t width : widths)
698 dmaReadTest(conn, acc, width);
699 if (!success)
700 throw std::runtime_error("DMA test failed");
701 std::cout << "DMA test passed" << std::endl;
702}
703
704//
705// DMA bandwidth test
706//
707
709 size_t width, size_t xferCount) {
710
711 AppIDPath lastPath;
712 BundlePort *toHostMMIOPort =
713 acc->resolvePort({AppID("tohostdma", width), AppID("cmd")}, lastPath);
714 if (!toHostMMIOPort)
715 throw std::runtime_error("bandwidth test failed. No tohostdma[" +
716 std::to_string(width) + "] found");
717 auto *toHostMMIO = toHostMMIOPort->getAs<services::MMIO::MMIORegion>();
718 if (!toHostMMIO)
719 throw std::runtime_error("bandwidth test failed. MMIO port is not MMIO");
720 lastPath.clear();
721 BundlePort *outPortBundle =
722 acc->resolvePort({AppID("tohostdma", width), AppID("out")}, lastPath);
723 ReadChannelPort &outPort = outPortBundle->getRawRead("data");
724 outPort.connect();
725
726 Logger &logger = conn->getLogger();
727 logger.info("esitester", "Starting read bandwidth test with " +
728 std::to_string(xferCount) + " x " +
729 std::to_string(width) + " bit transfers");
730 MessageData data;
731 auto start = std::chrono::high_resolution_clock::now();
732 toHostMMIO->write(0, xferCount);
733 for (size_t i = 0; i < xferCount; ++i) {
734 outPort.read(data);
735 logger.debug(
736 [i, &data](std::string &subsystem, std::string &msg,
737 std::unique_ptr<std::map<std::string, std::any>> &details) {
738 subsystem = "esitester";
739 msg = "Cycle count [" + std::to_string(i) + "] = 0x" + data.toHex();
740 });
741 }
742 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
743 std::chrono::high_resolution_clock::now() - start);
744 double bytesPerSec =
745 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
746 logger.info("esitester",
747 " Bandwidth test: " + std::to_string(xferCount) + " x " +
748 std::to_string(width) + " bit transfers in " +
749 std::to_string(duration.count()) + " microseconds");
750 logger.info("esitester", " bandwidth: " + formatBandwidth(bytesPerSec));
751}
752
754 size_t width, size_t xferCount) {
755
756 AppIDPath lastPath;
757 BundlePort *fromHostMMIOPort =
758 acc->resolvePort({AppID("fromhostdma", width), AppID("cmd")}, lastPath);
759 if (!fromHostMMIOPort)
760 throw std::runtime_error("bandwidth test failed. No fromhostdma[" +
761 std::to_string(width) + "] found");
762 auto *fromHostMMIO = fromHostMMIOPort->getAs<services::MMIO::MMIORegion>();
763 if (!fromHostMMIO)
764 throw std::runtime_error("bandwidth test failed. MMIO port is not MMIO");
765 lastPath.clear();
766 BundlePort *inPortBundle =
767 acc->resolvePort({AppID("fromhostdma", width), AppID("in")}, lastPath);
768 WriteChannelPort &outPort = inPortBundle->getRawWrite("data");
769 outPort.connect();
770
771 Logger &logger = conn->getLogger();
772 logger.info("esitester", "Starting write bandwidth test with " +
773 std::to_string(xferCount) + " x " +
774 std::to_string(width) + " bit transfers");
775 std::vector<uint8_t> dataVec(width / 8);
776 for (size_t i = 0; i < width / 8; ++i)
777 dataVec[i] = i;
778 MessageData data(dataVec);
779 auto start = std::chrono::high_resolution_clock::now();
780 fromHostMMIO->write(0, xferCount);
781 for (size_t i = 0; i < xferCount; ++i) {
782 outPort.write(data);
783 logger.debug(
784 [i, &data](std::string &subsystem, std::string &msg,
785 std::unique_ptr<std::map<std::string, std::any>> &details) {
786 subsystem = "esitester";
787 msg = "Cycle count [" + std::to_string(i) + "] = 0x" + data.toHex();
788 });
789 }
790 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
791 std::chrono::high_resolution_clock::now() - start);
792 double bytesPerSec =
793 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
794 logger.info("esitester",
795 " Bandwidth test: " + std::to_string(xferCount) + " x " +
796 std::to_string(width) + " bit transfers in " +
797 std::to_string(duration.count()) + " microseconds");
798 logger.info("esitester", " bandwidth: " + formatBandwidth(bytesPerSec));
799}
800
802 const std::vector<uint32_t> &widths,
803 uint32_t xferCount, bool read, bool write) {
804 if (read)
805 for (uint32_t w : widths)
806 bandwidthReadTest(conn, acc, w, xferCount);
807 if (write)
808 for (uint32_t w : widths)
809 bandwidthWriteTest(conn, acc, w, xferCount);
810}
811
812//
813// Hostmem bandwidth test
814//
815
816static void
819 uint32_t width, uint32_t xferCount) {
820 Logger &logger = conn->getLogger();
821 logger.info("esitester", "Starting hostmem WRITE bandwidth test: " +
822 std::to_string(xferCount) + " x " +
823 std::to_string(width) + " bits");
824
825 auto writeMemChildIter = acc->getChildren().find(AppID("writemem", width));
826 if (writeMemChildIter == acc->getChildren().end())
827 throw std::runtime_error("hostmem write bandwidth: writemem child missing");
828 auto &writeMemPorts = writeMemChildIter->second->getPorts();
829
830 auto cmdPortIter = writeMemPorts.find(AppID("cmd", width));
831 if (cmdPortIter == writeMemPorts.end())
832 throw std::runtime_error("hostmem write bandwidth: cmd MMIO missing");
833 auto *cmdMMIO = cmdPortIter->second.getAs<services::MMIO::MMIORegion>();
834 if (!cmdMMIO)
835 throw std::runtime_error("hostmem write bandwidth: cmd not MMIO");
836
837 auto issuedIter = writeMemPorts.find(AppID("addrCmdIssued"));
838 auto respIter = writeMemPorts.find(AppID("addrCmdResponses"));
839 auto cycleCount = writeMemPorts.find(AppID("addrCmdCycles"));
840 if (issuedIter == writeMemPorts.end() || respIter == writeMemPorts.end() ||
841 cycleCount == writeMemPorts.end())
842 throw std::runtime_error("hostmem write bandwidth: telemetry missing");
843 auto *issuedPort =
844 issuedIter->second.getAs<services::TelemetryService::Metric>();
845 auto *respPort = respIter->second.getAs<services::TelemetryService::Metric>();
846 auto *cyclePort =
847 cycleCount->second.getAs<services::TelemetryService::Metric>();
848 if (!issuedPort || !respPort || !cyclePort)
849 throw std::runtime_error(
850 "hostmem write bandwidth: telemetry type mismatch");
851
852 issuedPort->connect();
853 respPort->connect();
854 cyclePort->connect();
855
856 // Initialize pattern (optional).
857 uint64_t *dataPtr = static_cast<uint64_t *>(region.getPtr());
858 size_t words = region.getSize() / 8;
859 for (size_t i = 0; i < words; ++i)
860 dataPtr[i] = i + 0xA5A50000;
861 region.flush();
862
863 auto start = std::chrono::high_resolution_clock::now();
864 // Fire off xferCount write commands (one flit each).
865 uint64_t devPtr = reinterpret_cast<uint64_t>(region.getDevicePtr());
866 cmdMMIO->write(0x10, devPtr); // address
867 cmdMMIO->write(0x18, xferCount); // flits
868 cmdMMIO->write(0x20, 1); // start
869
870 // Wait for responses counter to reach target.
871 bool completed = false;
872 for (int wait = 0; wait < 100000; ++wait) {
873 uint64_t respNow = respPort->readInt();
874 if (respNow == xferCount) {
875 completed = true;
876 break;
877 }
878 std::this_thread::sleep_for(std::chrono::microseconds(50));
879 }
880 if (!completed)
881 throw std::runtime_error("hostmem write bandwidth timeout");
882 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
883 std::chrono::high_resolution_clock::now() - start);
884 double bytesPerSec =
885 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
886 uint64_t cycles = cyclePort->readInt();
887 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
888 std::cout << "[WRITE] Hostmem bandwidth (" << std::to_string(width)
889 << "): " << formatBandwidth(bytesPerSec) << " "
890 << std::to_string(xferCount) << " flits in "
891 << std::to_string(duration.count()) << " us, "
892 << std::to_string(cycles) << " cycles, " << bytesPerCycle
893 << " bytes/cycle" << std::endl;
894}
895
896static void
899 uint32_t width, uint32_t xferCount) {
900 Logger &logger = conn->getLogger();
901 logger.info("esitester", "Starting hostmem READ bandwidth test: " +
902 std::to_string(xferCount) + " x " +
903 std::to_string(width) + " bits");
904
905 auto readMemChildIter = acc->getChildren().find(AppID("readmem", width));
906 if (readMemChildIter == acc->getChildren().end())
907 throw std::runtime_error("hostmem read bandwidth: readmem child missing");
908 auto &readMemPorts = readMemChildIter->second->getPorts();
909
910 auto cmdPortIter = readMemPorts.find(AppID("cmd", width));
911 if (cmdPortIter == readMemPorts.end())
912 throw std::runtime_error("hostmem read bandwidth: cmd MMIO missing");
913 auto *cmdMMIO = cmdPortIter->second.getAs<services::MMIO::MMIORegion>();
914 if (!cmdMMIO)
915 throw std::runtime_error("hostmem read bandwidth: cmd not MMIO");
916
917 auto issuedIter = readMemPorts.find(AppID("addrCmdIssued"));
918 auto respIter = readMemPorts.find(AppID("addrCmdResponses"));
919 auto cyclePort = readMemPorts.find(AppID("addrCmdCycles"));
920 if (issuedIter == readMemPorts.end() || respIter == readMemPorts.end() ||
921 cyclePort == readMemPorts.end())
922 throw std::runtime_error("hostmem read bandwidth: telemetry missing");
923 auto *issuedPort =
924 issuedIter->second.getAs<services::TelemetryService::Metric>();
925 auto *respPort = respIter->second.getAs<services::TelemetryService::Metric>();
926 auto *cycleCntPort =
927 cyclePort->second.getAs<services::TelemetryService::Metric>();
928 if (!issuedPort || !respPort || !cycleCntPort)
929 throw std::runtime_error("hostmem read bandwidth: telemetry type mismatch");
930 issuedPort->connect();
931 respPort->connect();
932 cycleCntPort->connect();
933
934 // Prepare memory pattern (optional).
935 uint64_t *dataPtr = static_cast<uint64_t *>(region.getPtr());
936 size_t words64 = region.getSize() / 8;
937 for (size_t i = 0; i < words64; ++i)
938 dataPtr[i] = 0xCAFEBABE0000ull + i;
939 region.flush();
940 uint64_t devPtr = reinterpret_cast<uint64_t>(region.getDevicePtr());
941 auto start = std::chrono::high_resolution_clock::now();
942
943 cmdMMIO->write(0x10, devPtr);
944 cmdMMIO->write(0x18, xferCount);
945 cmdMMIO->write(0x20, 1);
946
947 bool timeout = true;
948 for (int wait = 0; wait < 100000; ++wait) {
949 uint64_t respNow = respPort->readInt();
950 if (respNow == xferCount) {
951 timeout = false;
952 break;
953 }
954 std::this_thread::sleep_for(std::chrono::microseconds(50));
955 }
956 if (timeout)
957 throw std::runtime_error("hostmem read bandwidth timeout");
958 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
959 std::chrono::high_resolution_clock::now() - start);
960 double bytesPerSec =
961 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
962 uint64_t cycles = cycleCntPort->readInt();
963 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
964 std::cout << "[ READ] Hostmem bandwidth (" << width
965 << "): " << formatBandwidth(bytesPerSec) << ", " << xferCount
966 << " flits in " << duration.count() << " us, " << cycles
967 << " cycles, " << bytesPerCycle << " bytes/cycle" << std::endl;
968}
969
971 uint32_t xferCount,
972 const std::vector<uint32_t> &widths, bool read,
973 bool write) {
974 auto hostmemSvc = conn->getService<services::HostMem>();
975 hostmemSvc->start();
976 auto region = hostmemSvc->allocate(/*size(bytes)=*/1024 * 1024 * 1024,
977 /*memOpts=*/{.writeable = true});
978 for (uint32_t w : widths) {
979 if (write)
980 hostmemWriteBandwidthTest(conn, acc, *region, w, xferCount);
981 if (read)
982 hostmemReadBandwidthTest(conn, acc, *region, w, xferCount);
983 }
984}
985
987 uint32_t iterations, bool pipeline) {
988 Logger &logger = conn->getLogger();
989 auto loopbackChild = accel->getChildren().find(AppID("loopback"));
990 if (loopbackChild == accel->getChildren().end())
991 throw std::runtime_error("Loopback test: no 'loopback' child");
992 auto &ports = loopbackChild->second->getPorts();
993 auto addIter = ports.find(AppID("add"));
994 if (addIter == ports.end())
995 throw std::runtime_error("Loopback test: no 'add' port");
996
997 // Use FuncService::Func instead of raw channels.
998 auto *funcPort = addIter->second.getAs<services::FuncService::Function>();
999 if (!funcPort)
1000 throw std::runtime_error(
1001 "Loopback test: 'add' port not a FuncService::Function");
1002 funcPort->connect();
1003 if (iterations == 0) {
1004 logger.info("esitester", "Loopback add test: 0 iterations (skipped)");
1005 return;
1006 }
1007 std::mt19937_64 rng(0xC0FFEE);
1008 std::uniform_int_distribution<uint32_t> dist(0, (1u << 24) - 1);
1009
1010 if (!pipeline) {
1011 auto start = std::chrono::high_resolution_clock::now();
1012 for (uint32_t i = 0; i < iterations; ++i) {
1013 uint32_t argVal = dist(rng);
1014 uint32_t expected = (argVal + 11) & 0xFFFF;
1015 uint8_t argBytes[3] = {
1016 static_cast<uint8_t>(argVal & 0xFF),
1017 static_cast<uint8_t>((argVal >> 8) & 0xFF),
1018 static_cast<uint8_t>((argVal >> 16) & 0xFF),
1019 };
1020 MessageData argMsg(argBytes, 3);
1021 MessageData resMsg = funcPort->call(argMsg).get();
1022 uint16_t got = *resMsg.as<uint16_t>();
1023 std::cout << "[loopback] i=" << i << " arg=0x" << esi::toHex(argVal)
1024 << " got=0x" << esi::toHex(got) << " exp=0x"
1025 << esi::toHex(expected) << std::endl;
1026 if (got != expected)
1027 throw std::runtime_error("Loopback mismatch (non-pipelined)");
1028 }
1029 auto end = std::chrono::high_resolution_clock::now();
1030 auto us = std::chrono::duration_cast<std::chrono::microseconds>(end - start)
1031 .count();
1032 double callsPerSec = (double)iterations * 1e6 / (double)us;
1033 logger.info("esitester", "Loopback add test passed (non-pipelined, " +
1034 std::to_string(iterations) + " calls, " +
1035 std::to_string(us) + " us, " +
1036 std::to_string(callsPerSec) + " calls/s)");
1037 } else {
1038 // Pipelined mode: launch all calls first, then collect.
1039 std::vector<std::future<MessageData>> futures;
1040 futures.reserve(iterations);
1041 std::vector<uint32_t> expectedVals;
1042 expectedVals.reserve(iterations);
1043
1044 auto issueStart = std::chrono::high_resolution_clock::now();
1045 for (uint32_t i = 0; i < iterations; ++i) {
1046 uint32_t argVal = dist(rng);
1047 uint32_t expected = (argVal + 11) & 0xFFFF;
1048 uint8_t argBytes[3] = {
1049 static_cast<uint8_t>(argVal & 0xFF),
1050 static_cast<uint8_t>((argVal >> 8) & 0xFF),
1051 static_cast<uint8_t>((argVal >> 16) & 0xFF),
1052 };
1053 futures.emplace_back(funcPort->call(MessageData(argBytes, 3)));
1054 expectedVals.emplace_back(expected);
1055 }
1056 auto issueEnd = std::chrono::high_resolution_clock::now();
1057
1058 for (uint32_t i = 0; i < iterations; ++i) {
1059 MessageData resMsg = futures[i].get();
1060 uint16_t got = *resMsg.as<uint16_t>();
1061 uint16_t exp = (uint16_t)expectedVals[i];
1062 std::cout << "[loopback-pipelined] i=" << i << " got=0x"
1063 << esi::toHex(got) << " exp=0x" << esi::toHex(exp) << std::endl;
1064 if (got != exp)
1065 throw std::runtime_error("Loopback mismatch (pipelined) idx=" +
1066 std::to_string(i));
1067 }
1068 auto collectEnd = std::chrono::high_resolution_clock::now();
1069
1070 auto issueUs = std::chrono::duration_cast<std::chrono::microseconds>(
1071 issueEnd - issueStart)
1072 .count();
1073 auto totalUs = std::chrono::duration_cast<std::chrono::microseconds>(
1074 collectEnd - issueStart)
1075 .count();
1076
1077 double issueRate = (double)iterations * 1e6 / (double)issueUs;
1078 double completionRate = (double)iterations * 1e6 / (double)totalUs;
1079
1080 logger.info("esitester", "Loopback add test passed (pipelined). Issued " +
1081 std::to_string(iterations) + " in " +
1082 std::to_string(issueUs) + " us (" +
1083 std::to_string(issueRate) +
1084 " calls/s), total " + std::to_string(totalUs) +
1085 " us (" + std::to_string(completionRate) +
1086 " calls/s effective)");
1087 }
1088}
1089
1091 Accelerator *acc, uint32_t width,
1092 uint32_t xferCount, bool read,
1093 bool write) {
1094 Logger &logger = conn->getLogger();
1095 if (!read && !write) {
1096 std::cout << "aggbandwidth: nothing to do (enable --read and/or --write)\n";
1097 return;
1098 }
1099 logger.info(
1100 "esitester",
1101 "Aggregate hostmem bandwidth start width=" + std::to_string(width) +
1102 " count=" + std::to_string(xferCount) +
1103 " read=" + (read ? "Y" : "N") + " write=" + (write ? "Y" : "N"));
1104
1105 auto hostmemSvc = conn->getService<services::HostMem>();
1106 hostmemSvc->start();
1107
1108 struct Unit {
1109 std::string prefix;
1110 bool isRead = false;
1111 bool isWrite = false;
1112 std::unique_ptr<esi::services::HostMem::HostMemRegion> region;
1113 services::TelemetryService::Metric *resp = nullptr;
1114 services::TelemetryService::Metric *cycles = nullptr;
1115 services::MMIO::MMIORegion *cmd = nullptr;
1116 bool launched = false;
1117 bool done = false;
1118 uint64_t bytes = 0;
1119 uint64_t duration_us = 0;
1120 uint64_t cycleCount = 0;
1121 std::chrono::high_resolution_clock::time_point start;
1122 };
1123 std::vector<Unit> units;
1124 const std::vector<std::string> readPrefixes = {"readmem", "readmem_0",
1125 "readmem_1", "readmem_2"};
1126 const std::vector<std::string> writePrefixes = {"writemem", "writemem_0",
1127 "writemem_1", "writemem_2"};
1128
1129 auto addUnits = [&](const std::vector<std::string> &pref, bool doRead,
1130 bool doWrite) {
1131 for (auto &p : pref) {
1132 AppID id(p, width);
1133 auto childIt = acc->getChildren().find(id);
1134 if (childIt == acc->getChildren().end())
1135 continue; // silently skip missing variants
1136 auto &ports = childIt->second->getPorts();
1137 auto cmdIt = ports.find(AppID("cmd", width));
1138 auto respIt = ports.find(AppID("addrCmdResponses"));
1139 auto cycIt = ports.find(AppID("addrCmdCycles"));
1140 if (cmdIt == ports.end() || respIt == ports.end() || cycIt == ports.end())
1141 continue;
1142 auto *cmd = cmdIt->second.getAs<services::MMIO::MMIORegion>();
1143 auto *resp = respIt->second.getAs<services::TelemetryService::Metric>();
1144 auto *cyc = cycIt->second.getAs<services::TelemetryService::Metric>();
1145 if (!cmd || !resp || !cyc)
1146 continue;
1147 resp->connect();
1148 cyc->connect();
1149 Unit u;
1150 u.prefix = p;
1151 u.isRead = doRead;
1152 u.isWrite = doWrite;
1153 u.region = hostmemSvc->allocate(1024 * 1024 * 1024, {.writeable = true});
1154 // Init pattern.
1155 uint64_t *ptr = static_cast<uint64_t *>(u.region->getPtr());
1156 size_t words = u.region->getSize() / 8;
1157 for (size_t i = 0; i < words; ++i)
1158 ptr[i] =
1159 (p[0] == 'w' ? (0xA5A500000000ull + i) : (0xCAFEBABE0000ull + i));
1160 u.region->flush();
1161 u.cmd = cmd;
1162 u.resp = resp;
1163 u.cycles = cyc;
1164 u.bytes = uint64_t(xferCount) * (width / 8);
1165 units.emplace_back(std::move(u));
1166 }
1167 };
1168 if (read)
1169 addUnits(readPrefixes, true, false);
1170 if (write)
1171 addUnits(writePrefixes, false, true);
1172 if (units.empty()) {
1173 std::cout << "aggbandwidth: no matching units present for width " << width
1174 << "\n";
1175 return;
1176 }
1177
1178 auto wallStart = std::chrono::high_resolution_clock::now();
1179 // Launch sequentially.
1180 for (auto &u : units) {
1181 uint64_t devPtr = reinterpret_cast<uint64_t>(u.region->getDevicePtr());
1182 u.cmd->write(0x10, devPtr);
1183 u.cmd->write(0x18, xferCount);
1184 u.cmd->write(0x20, 1);
1185 u.start = std::chrono::high_resolution_clock::now();
1186 u.launched = true;
1187 }
1188
1189 // Poll all until complete.
1190 const uint64_t timeoutLoops = 200000; // ~10s at 50us sleep
1191 uint64_t loops = 0;
1192 while (true) {
1193 bool allDone = true;
1194 for (auto &u : units) {
1195 if (u.done)
1196 continue;
1197 if (u.resp->readInt() == xferCount) {
1198 auto end = std::chrono::high_resolution_clock::now();
1199 u.duration_us =
1200 std::chrono::duration_cast<std::chrono::microseconds>(end - u.start)
1201 .count();
1202 u.cycleCount = u.cycles->readInt();
1203 u.done = true;
1204 } else {
1205 allDone = false;
1206 }
1207 }
1208 if (allDone)
1209 break;
1210 if (++loops >= timeoutLoops)
1211 throw std::runtime_error("aggbandwidth: timeout");
1212 std::this_thread::sleep_for(std::chrono::microseconds(50));
1213 }
1214 auto wallUs = std::chrono::duration_cast<std::chrono::microseconds>(
1215 std::chrono::high_resolution_clock::now() - wallStart)
1216 .count();
1217
1218 uint64_t totalBytes = 0;
1219 uint64_t totalReadBytes = 0;
1220 uint64_t totalWriteBytes = 0;
1221 for (auto &u : units) {
1222 totalBytes += u.bytes;
1223 if (u.isRead)
1224 totalReadBytes += u.bytes;
1225 if (u.isWrite)
1226 totalWriteBytes += u.bytes;
1227 double unitBps = (double)u.bytes * 1e6 / (double)u.duration_us;
1228 std::cout << "[agg-unit] " << u.prefix << "[" << width << "] "
1229 << (u.isRead ? "READ" : (u.isWrite ? "WRITE" : "UNK"))
1230 << " bytes=" << humanBytes(u.bytes) << " (" << u.bytes << " B)"
1231 << " time=" << humanTimeUS(u.duration_us) << " (" << u.duration_us
1232 << " us) cycles=" << u.cycleCount
1233 << " throughput=" << formatBandwidth(unitBps) << std::endl;
1234 }
1235 // Compute aggregate bandwidths as total size / total wall time (not sum of
1236 // unit throughputs).
1237 double aggReadBps =
1238 totalReadBytes ? (double)totalReadBytes * 1e6 / (double)wallUs : 0.0;
1239 double aggWriteBps =
1240 totalWriteBytes ? (double)totalWriteBytes * 1e6 / (double)wallUs : 0.0;
1241 double aggCombinedBps =
1242 totalBytes ? (double)totalBytes * 1e6 / (double)wallUs : 0.0;
1243
1244 std::cout << "[agg-total] units=" << units.size()
1245 << " read_bytes=" << humanBytes(totalReadBytes) << " ("
1246 << totalReadBytes << " B)"
1247 << " read_bw=" << formatBandwidth(aggReadBps)
1248 << " write_bytes=" << humanBytes(totalWriteBytes) << " ("
1249 << totalWriteBytes << " B)"
1250 << " write_bw=" << formatBandwidth(aggWriteBps)
1251 << " combined_bytes=" << humanBytes(totalBytes) << " ("
1252 << totalBytes << " B)"
1253 << " combined_bw=" << formatBandwidth(aggCombinedBps)
1254 << " wall_time=" << humanTimeUS(wallUs) << " (" << wallUs << " us)"
1255 << std::endl;
1256 logger.info("esitester", "Aggregate hostmem bandwidth test complete");
1257}
1258
1259/// Packed struct representing a parallel window argument for StreamingAdder.
1260/// Layout in SystemVerilog (so it must be reversed in C):
1261/// { add_amt: UInt(32), input: UInt(32), last: UInt(8) }
1262#pragma pack(push, 1)
1264 uint8_t last;
1265 uint32_t input;
1266 uint32_t addAmt;
1267};
1268#pragma pack(pop)
1269static_assert(sizeof(StreamingAddArg) == 9,
1270 "StreamingAddArg must be 9 bytes packed");
1271
1272/// Packed struct representing a parallel window result for StreamingAdder.
1273/// Layout in SystemVerilog (so it must be reversed in C):
1274/// { data: UInt(32), last: UInt(8) }
1275#pragma pack(push, 1)
1277 uint8_t last;
1278 uint32_t data;
1279};
1280#pragma pack(pop)
1281static_assert(sizeof(StreamingAddResult) == 5,
1282 "StreamingAddResult must be 5 bytes packed");
1283
1284/// Test the StreamingAdder module. This module takes a struct containing
1285/// an add_amt and a list of uint32s, adds add_amt to each element, and
1286/// returns the resulting list. The data is streamed using windowed types.
1288 uint32_t addAmt, uint32_t numItems) {
1289 Logger &logger = conn->getLogger();
1290 logger.info("esitester", "Starting streaming add test with add_amt=" +
1291 std::to_string(addAmt) +
1292 ", num_items=" + std::to_string(numItems));
1293
1294 // Generate random input data.
1295 std::mt19937 rng(0xDEADBEEF);
1296 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1297 std::vector<uint32_t> inputData;
1298 inputData.reserve(numItems);
1299 for (uint32_t i = 0; i < numItems; ++i)
1300 inputData.push_back(dist(rng));
1301
1302 // Find the streaming_adder child.
1303 auto streamingAdderChild =
1304 accel->getChildren().find(AppID("streaming_adder"));
1305 if (streamingAdderChild == accel->getChildren().end())
1306 throw std::runtime_error(
1307 "Streaming add test: no 'streaming_adder' child found");
1308
1309 auto &ports = streamingAdderChild->second->getPorts();
1310 auto addIter = ports.find(AppID("streaming_add"));
1311 if (addIter == ports.end())
1312 throw std::runtime_error(
1313 "Streaming add test: no 'streaming_add' port found");
1314
1315 // Get the raw read/write channel ports for the windowed function.
1316 // The argument channel expects parallel windowed data where each message
1317 // contains: struct { add_amt: UInt(32), input: UInt(32), last: bool }
1318 WriteChannelPort &argPort = addIter->second.getRawWrite("arg");
1319 ReadChannelPort &resultPort = addIter->second.getRawRead("result");
1320
1321 argPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
1322 resultPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
1323
1324 // Send each list element with add_amt repeated in every message.
1325 for (size_t i = 0; i < inputData.size(); ++i) {
1326 StreamingAddArg arg;
1327 arg.addAmt = addAmt;
1328 arg.input = inputData[i];
1329 arg.last = (i == inputData.size() - 1) ? 1 : 0;
1330 argPort.write(
1331 MessageData(reinterpret_cast<const uint8_t *>(&arg), sizeof(arg)));
1332 logger.debug("esitester", "Sent {add_amt=" + std::to_string(arg.addAmt) +
1333 ", input=" + std::to_string(arg.input) +
1334 ", last=" + (arg.last ? "true" : "false") +
1335 "}");
1336 }
1337
1338 // Read the result list (also windowed).
1339 std::vector<uint32_t> results;
1340 bool lastSeen = false;
1341 while (!lastSeen) {
1342 MessageData resMsg;
1343 resultPort.read(resMsg);
1344 if (resMsg.getSize() < sizeof(StreamingAddResult))
1345 throw std::runtime_error(
1346 "Streaming add test: unexpected result message size");
1347
1348 const auto *res =
1349 reinterpret_cast<const StreamingAddResult *>(resMsg.getBytes());
1350 lastSeen = res->last != 0;
1351 results.push_back(res->data);
1352 logger.debug("esitester", "Received result=" + std::to_string(res->data) +
1353 " (last=" + (lastSeen ? "true" : "false") +
1354 ")");
1355 }
1356
1357 // Verify results.
1358 if (results.size() != inputData.size())
1359 throw std::runtime_error(
1360 "Streaming add test: result size mismatch. Expected " +
1361 std::to_string(inputData.size()) + ", got " +
1362 std::to_string(results.size()));
1363
1364 bool passed = true;
1365 std::cout << "Streaming add test results:" << std::endl;
1366 for (size_t i = 0; i < inputData.size(); ++i) {
1367 uint32_t expected = inputData[i] + addAmt;
1368 std::cout << " input[" << i << "]=" << inputData[i] << " + " << addAmt
1369 << " = " << results[i] << " (expected " << expected << ")";
1370 if (results[i] != expected) {
1371 std::cout << " MISMATCH!";
1372 passed = false;
1373 }
1374 std::cout << std::endl;
1375 }
1376
1377 argPort.disconnect();
1378 resultPort.disconnect();
1379
1380 if (!passed)
1381 throw std::runtime_error("Streaming add test failed: result mismatch");
1382
1383 logger.info("esitester", "Streaming add test passed");
1384 std::cout << "Streaming add test passed" << std::endl;
1385}
1386
1387/// Test the StreamingAdder module using message translation.
1388/// This version uses the list translation support where the message format is:
1389/// Argument: { add_amt (4 bytes), input_length (8 bytes), input_data[] }
1390/// Result: { data_length (8 bytes), data[] }
1391/// The translation layer automatically converts between this format and the
1392/// parallel windowed frames used by the hardware.
1393
1394/// Translated argument struct for StreamingAdder.
1395/// Memory layout (standard C struct ordering, fields in declaration order):
1396/// ESI type: struct { add_amt: UInt(32), input: List<UInt(32)> }
1397/// becomes host struct:
1398/// { input_length (size_t, 8 bytes on 64-bit), add_amt (uint32_t),
1399/// input_data[] }
1400/// Note: The translation layer handles the conversion between this C struct
1401/// layout and the hardware's SystemVerilog frame format.
1402/// Note: size_t is used for list lengths, so this format is platform-dependent.
1403#pragma pack(push, 1)
1406 uint32_t addAmt;
1407 // Trailing array data follows immediately after the struct in memory.
1408 // Use inputData() accessor to access it.
1409
1410 /// Get pointer to trailing input data array.
1411 uint32_t *inputData() { return reinterpret_cast<uint32_t *>(this + 1); }
1412 const uint32_t *inputData() const {
1413 return reinterpret_cast<const uint32_t *>(this + 1);
1414 }
1415 /// Get span view of input data (requires inputLength to be set first).
1416 std::span<uint32_t> inputDataSpan() { return {inputData(), inputLength}; }
1417 std::span<const uint32_t> inputDataSpan() const {
1418 return {inputData(), inputLength};
1419 }
1420
1421 static size_t allocSize(size_t numItems) {
1422 return sizeof(StreamingAddTranslatedArg) + numItems * sizeof(uint32_t);
1423 }
1424};
1425#pragma pack(pop)
1426
1427/// Translated result struct for StreamingAdder.
1428/// Memory layout:
1429/// struct { data: List<UInt(32)> }
1430/// becomes:
1431/// { data_length (size_t, 8 bytes on 64-bit), data[] }
1432#pragma pack(push, 1)
1435 // Trailing array data follows immediately after the struct in memory.
1436
1437 /// Get pointer to trailing result data array.
1438 uint32_t *data() { return reinterpret_cast<uint32_t *>(this + 1); }
1439 const uint32_t *data() const {
1440 return reinterpret_cast<const uint32_t *>(this + 1);
1441 }
1442 /// Get span view of result data (requires dataLength to be set first).
1443 std::span<uint32_t> dataSpan() { return {data(), dataLength}; }
1444 std::span<const uint32_t> dataSpan() const { return {data(), dataLength}; }
1445
1446 static size_t allocSize(size_t numItems) {
1447 return sizeof(StreamingAddTranslatedResult) + numItems * sizeof(uint32_t);
1448 }
1449};
1450#pragma pack(pop)
1451
1453 Accelerator *accel, uint32_t addAmt,
1454 uint32_t numItems) {
1455 Logger &logger = conn->getLogger();
1456 logger.info("esitester",
1457 "Starting streaming add test (translated) with add_amt=" +
1458 std::to_string(addAmt) +
1459 ", num_items=" + std::to_string(numItems));
1460
1461 // Generate random input data.
1462 std::mt19937 rng(0xDEADBEEF);
1463 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1464 std::vector<uint32_t> inputData;
1465 inputData.reserve(numItems);
1466 for (uint32_t i = 0; i < numItems; ++i)
1467 inputData.push_back(dist(rng));
1468
1469 // Find the streaming_adder child.
1470 auto streamingAdderChild =
1471 accel->getChildren().find(AppID("streaming_adder"));
1472 if (streamingAdderChild == accel->getChildren().end())
1473 throw std::runtime_error(
1474 "Streaming add test: no 'streaming_adder' child found");
1475
1476 auto &ports = streamingAdderChild->second->getPorts();
1477 auto addIter = ports.find(AppID("streaming_add"));
1478 if (addIter == ports.end())
1479 throw std::runtime_error(
1480 "Streaming add test: no 'streaming_add' port found");
1481
1482 // Get the raw read/write channel ports with translation enabled (default).
1483 WriteChannelPort &argPort = addIter->second.getRawWrite("arg");
1484 ReadChannelPort &resultPort = addIter->second.getRawRead("result");
1485
1486 // Connect with translation enabled (the default).
1487 argPort.connect();
1488 resultPort.connect();
1489
1490 // Allocate the argument struct with proper alignment for the struct members.
1491 // We use aligned_alloc to ensure the buffer meets alignment requirements.
1492 size_t argSize = StreamingAddTranslatedArg::allocSize(numItems);
1493 constexpr size_t alignment = alignof(StreamingAddTranslatedArg);
1494 // aligned_alloc requires size to be a multiple of alignment
1495 size_t allocSize = ((argSize + alignment - 1) / alignment) * alignment;
1496 void *argRaw = alignedAllocCompat(alignment, allocSize);
1497 if (!argRaw)
1498 throw std::bad_alloc();
1499 auto argDeleter = [](void *p) { alignedFreeCompat(p); };
1500 std::unique_ptr<void, decltype(argDeleter)> argBuffer(argRaw, argDeleter);
1501 auto *arg = static_cast<StreamingAddTranslatedArg *>(argRaw);
1502 arg->inputLength = numItems;
1503 arg->addAmt = addAmt;
1504 for (uint32_t i = 0; i < numItems; ++i)
1505 arg->inputData()[i] = inputData[i];
1506
1507 logger.debug("esitester",
1508 "Sending translated argument: " + std::to_string(argSize) +
1509 " bytes, list_length=" + std::to_string(arg->inputLength) +
1510 ", add_amt=" + std::to_string(arg->addAmt));
1511
1512 // Send the complete message - translation will split it into frames.
1513 argPort.write(MessageData(reinterpret_cast<const uint8_t *>(arg), argSize));
1514 // argBuffer automatically freed when it goes out of scope
1515
1516 // Read the translated result.
1517 MessageData resMsg;
1518 resultPort.read(resMsg);
1519
1520 logger.debug("esitester", "Received translated result: " +
1521 std::to_string(resMsg.getSize()) + " bytes");
1522
1523 if (resMsg.getSize() < sizeof(StreamingAddTranslatedResult))
1524 throw std::runtime_error(
1525 "Streaming add test (translated): result too small");
1526
1527 const auto *result =
1528 reinterpret_cast<const StreamingAddTranslatedResult *>(resMsg.getBytes());
1529
1530 if (resMsg.getSize() <
1531 StreamingAddTranslatedResult::allocSize(result->dataLength))
1532 throw std::runtime_error(
1533 "Streaming add test (translated): result data truncated");
1534
1535 // Verify results.
1536 if (result->dataLength != inputData.size())
1537 throw std::runtime_error(
1538 "Streaming add test (translated): result size mismatch. Expected " +
1539 std::to_string(inputData.size()) + ", got " +
1540 std::to_string(result->dataLength));
1541
1542 bool passed = true;
1543 std::cout << "Streaming add test results:" << std::endl;
1544 for (size_t i = 0; i < inputData.size(); ++i) {
1545 uint32_t expected = inputData[i] + addAmt;
1546 std::cout << " input[" << i << "]=" << inputData[i] << " + " << addAmt
1547 << " = " << result->data()[i] << " (expected " << expected << ")";
1548 if (result->data()[i] != expected) {
1549 std::cout << " MISMATCH!";
1550 passed = false;
1551 }
1552 std::cout << std::endl;
1553 }
1554
1555 argPort.disconnect();
1556 resultPort.disconnect();
1557
1558 if (!passed)
1559 throw std::runtime_error(
1560 "Streaming add test (translated) failed: result mismatch");
1561
1562 logger.info("esitester", "Streaming add test passed (translated)");
1563 std::cout << "Streaming add test passed" << std::endl;
1564}
1565
1566/// Test the CoordTranslator module using message translation.
1567/// This version uses the list translation support where the message format is:
1568/// Argument: { x_translation, y_translation, coords_length, coords[] }
1569/// Result: { coords_length, coords[] }
1570/// Each coord is a struct { x, y }.
1571
1572/// Coordinate struct for CoordTranslator.
1573/// SV ordering means y comes before x in memory.
1574#pragma pack(push, 1)
1575struct Coord {
1576 uint32_t y; // SV ordering: last declared field first in memory
1577 uint32_t x;
1578};
1579#pragma pack(pop)
1580static_assert(sizeof(Coord) == 8, "Coord must be 8 bytes packed");
1581
1582/// Translated argument struct for CoordTranslator.
1583/// Memory layout (standard C struct ordering):
1584/// ESI type: struct { x_translation: UInt(32), y_translation: UInt(32),
1585/// coords: List<struct{x, y}> }
1586/// becomes host struct:
1587/// { coords_length (size_t, 8 bytes on 64-bit), y_translation (uint32_t),
1588/// x_translation (uint32_t), coords[] }
1589/// Note: Fields are in reverse order due to SV struct ordering.
1590/// Note: size_t is used for list lengths, so this format is platform-dependent.
1591#pragma pack(push, 1)
1594 uint32_t yTranslation; // SV ordering: last declared field first in memory
1596 // Trailing array data follows immediately after the struct in memory.
1597
1598 /// Get pointer to trailing coords array.
1599 Coord *coords() { return reinterpret_cast<Coord *>(this + 1); }
1600 const Coord *coords() const {
1601 return reinterpret_cast<const Coord *>(this + 1);
1602 }
1603 /// Get span view of coords (requires coordsLength to be set first).
1604 std::span<Coord> coordsSpan() { return {coords(), coordsLength}; }
1605 std::span<const Coord> coordsSpan() const { return {coords(), coordsLength}; }
1606
1607 static size_t allocSize(size_t numCoords) {
1608 return sizeof(CoordTranslateArg) + numCoords * sizeof(Coord);
1609 }
1610};
1611#pragma pack(pop)
1612
1613/// Translated result struct for CoordTranslator.
1614/// Memory layout:
1615/// ESI type: List<struct{x, y}>
1616/// becomes host struct:
1617/// { coords_length (size_t, 8 bytes on 64-bit), coords[] }
1618#pragma pack(push, 1)
1621 // Trailing array data follows immediately after the struct in memory.
1622
1623 /// Get pointer to trailing coords array.
1624 Coord *coords() { return reinterpret_cast<Coord *>(this + 1); }
1625 const Coord *coords() const {
1626 return reinterpret_cast<const Coord *>(this + 1);
1627 }
1628 /// Get span view of coords (requires coordsLength to be set first).
1629 std::span<Coord> coordsSpan() { return {coords(), coordsLength}; }
1630 std::span<const Coord> coordsSpan() const { return {coords(), coordsLength}; }
1631
1632 static size_t allocSize(size_t numCoords) {
1633 return sizeof(CoordTranslateResult) + numCoords * sizeof(Coord);
1634 }
1635};
1636#pragma pack(pop)
1637
1639 uint32_t xTrans, uint32_t yTrans,
1640 uint32_t numCoords) {
1641 Logger &logger = conn->getLogger();
1642 logger.info("esitester", "Starting coord translate test with x_trans=" +
1643 std::to_string(xTrans) +
1644 ", y_trans=" + std::to_string(yTrans) +
1645 ", num_coords=" + std::to_string(numCoords));
1646
1647 // Generate random input coordinates.
1648 // Note: Coord struct has y before x due to SV ordering, but we generate
1649 // and display as (x, y) for human readability.
1650 std::mt19937 rng(0xDEADBEEF);
1651 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1652 std::vector<Coord> inputCoords;
1653 inputCoords.reserve(numCoords);
1654 for (uint32_t i = 0; i < numCoords; ++i) {
1655 Coord c;
1656 c.x = dist(rng);
1657 c.y = dist(rng);
1658 inputCoords.push_back(c);
1659 }
1660
1661 // Find the coord_translator child.
1662 auto coordTranslatorChild =
1663 accel->getChildren().find(AppID("coord_translator"));
1664 if (coordTranslatorChild == accel->getChildren().end())
1665 throw std::runtime_error(
1666 "Coord translate test: no 'coord_translator' child found");
1667
1668 auto &ports = coordTranslatorChild->second->getPorts();
1669 auto translateIter = ports.find(AppID("translate_coords"));
1670 if (translateIter == ports.end())
1671 throw std::runtime_error(
1672 "Coord translate test: no 'translate_coords' port found");
1673
1674 // Use FuncService::Function which handles connection and translation.
1675 auto *funcPort =
1676 translateIter->second.getAs<services::FuncService::Function>();
1677 if (!funcPort)
1678 throw std::runtime_error(
1679 "Coord translate test: 'translate_coords' port not a "
1680 "FuncService::Function");
1681 funcPort->connect();
1682
1683 // Allocate the argument struct with proper alignment for the struct members.
1684 size_t argSize = CoordTranslateArg::allocSize(numCoords);
1685 constexpr size_t alignment = alignof(CoordTranslateArg);
1686 // aligned_alloc requires size to be a multiple of alignment
1687 size_t allocSize = ((argSize + alignment - 1) / alignment) * alignment;
1688 void *argRaw = alignedAllocCompat(alignment, allocSize);
1689 if (!argRaw)
1690 throw std::bad_alloc();
1691 auto argDeleter = [](void *p) { alignedFreeCompat(p); };
1692 std::unique_ptr<void, decltype(argDeleter)> argBuffer(argRaw, argDeleter);
1693 auto *arg = static_cast<CoordTranslateArg *>(argRaw);
1694 arg->coordsLength = numCoords;
1695 arg->xTranslation = xTrans;
1696 arg->yTranslation = yTrans;
1697 for (uint32_t i = 0; i < numCoords; ++i)
1698 arg->coords()[i] = inputCoords[i];
1699
1700 logger.debug(
1701 "esitester",
1702 "Sending coord translate argument: " + std::to_string(argSize) +
1703 " bytes, coords_length=" + std::to_string(arg->coordsLength) +
1704 ", x_trans=" + std::to_string(arg->xTranslation) +
1705 ", y_trans=" + std::to_string(arg->yTranslation));
1706
1707 // Call the function - translation happens automatically.
1708 MessageData resMsg =
1709 funcPort
1710 ->call(MessageData(reinterpret_cast<const uint8_t *>(arg), argSize))
1711 .get();
1712 // argBuffer automatically freed when it goes out of scope
1713
1714 logger.debug("esitester", "Received coord translate result: " +
1715 std::to_string(resMsg.getSize()) + " bytes");
1716
1717 if (resMsg.getSize() < sizeof(CoordTranslateResult))
1718 throw std::runtime_error("Coord translate test: result too small");
1719
1720 const auto *result =
1721 reinterpret_cast<const CoordTranslateResult *>(resMsg.getBytes());
1722
1723 if (resMsg.getSize() < CoordTranslateResult::allocSize(result->coordsLength))
1724 throw std::runtime_error("Coord translate test: result data truncated");
1725
1726 // Verify results.
1727 if (result->coordsLength != inputCoords.size())
1728 throw std::runtime_error(
1729 "Coord translate test: result size mismatch. Expected " +
1730 std::to_string(inputCoords.size()) + ", got " +
1731 std::to_string(result->coordsLength));
1732
1733 bool passed = true;
1734 std::cout << "Coord translate test results:" << std::endl;
1735 for (size_t i = 0; i < inputCoords.size(); ++i) {
1736 uint32_t expectedX = inputCoords[i].x + xTrans;
1737 uint32_t expectedY = inputCoords[i].y + yTrans;
1738 std::cout << " coord[" << i << "]=(" << inputCoords[i].x << ","
1739 << inputCoords[i].y << ") + (" << xTrans << "," << yTrans
1740 << ") = (" << result->coords()[i].x << ","
1741 << result->coords()[i].y << ")";
1742 if (result->coords()[i].x != expectedX ||
1743 result->coords()[i].y != expectedY) {
1744 std::cout << " MISMATCH! (expected (" << expectedX << "," << expectedY
1745 << "))";
1746 passed = false;
1747 }
1748 std::cout << std::endl;
1749 }
1750
1751 if (!passed)
1752 throw std::runtime_error("Coord translate test failed: result mismatch");
1753
1754 logger.info("esitester", "Coord translate test passed");
1755 std::cout << "Coord translate test passed" << std::endl;
1756}
static void print(TypedAttr val, llvm::raw_ostream &os)
static void writePort(uint16_t port)
Write the port number to a file.
Definition RpcServer.cpp:38
Abstract class representing a connection to an accelerator.
Definition Accelerator.h:79
ServiceClass * getService(AppIDPath id={}, std::string implName={}, ServiceImplDetails details={}, HWClientDetails clients={})
Get a typed reference to a particular service type.
virtual void disconnect()
Disconnect from the accelerator cleanly.
Logger & getLogger() const
Definition Accelerator.h:84
AcceleratorServiceThread * getServiceThread()
Return a pointer to the accelerator 'service' thread (or threads).
void addPoll(HWModule &module)
Poll this module.
Top level accelerator class.
Definition Accelerator.h:60
Services provide connections to 'bundles' – collections of named, unidirectional communication channe...
Definition Ports.h:433
T * getAs() const
Cast this Bundle port to a subclass which is actually useful.
Definition Ports.h:461
ReadChannelPort & getRawRead(const std::string &name) const
Definition Ports.cpp:52
WriteChannelPort & getRawWrite(const std::string &name) const
Get access to the raw byte streams of a channel.
Definition Ports.cpp:42
Common options and code for ESI runtime tools.
Definition CLI.h:29
Context & getContext()
Get the context.
Definition CLI.h:63
AcceleratorConnection * connect()
Connect to the accelerator using the specified backend and connection.
Definition CLI.h:60
int esiParse(int argc, const char **argv)
Run the parser.
Definition CLI.h:46
AcceleratorConnections, Accelerators, and Manifests must all share a context.
Definition Context.h:34
Logger & getLogger()
Definition Context.h:69
BundlePort * resolvePort(const AppIDPath &path, AppIDPath &lastLookup) const
Attempt to resolve a path to a port.
Definition Design.cpp:72
const std::map< AppID, Instance * > & getChildren() const
Access the module's children by ID.
Definition Design.h:71
virtual void error(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an error.
Definition Logging.h:64
virtual void info(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an informational message.
Definition Logging.h:75
void debug(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report a debug message.
Definition Logging.h:83
Class to parse a manifest.
Definition Manifest.h:39
Accelerator * buildAccelerator(AcceleratorConnection &acc) const
A logical chunk of data representing serialized data.
Definition Common.h:113
const uint8_t * getBytes() const
Definition Common.h:124
const T * as() const
Cast to a type.
Definition Common.h:148
size_t getSize() const
Get the size of the data in bytes.
Definition Common.h:138
A ChannelPort which reads data from the accelerator.
Definition Ports.h:318
virtual void connect(std::function< bool(MessageData)> callback, const ConnectOptions &options={})
Definition Ports.cpp:69
virtual void disconnect() override
Definition Ports.h:323
virtual void read(MessageData &outData)
Specify a buffer to read into.
Definition Ports.h:358
A ChannelPort which sends data to the accelerator.
Definition Ports.h:206
virtual void disconnect() override
Definition Ports.h:217
void write(const MessageData &data)
A very basic blocking write API.
Definition Ports.h:222
virtual void connect(const ConnectOptions &options={}) override
Set up a connection to the accelerator.
Definition Ports.h:210
A function call which gets attached to a service port.
Definition Services.h:329
A function call which gets attached to a service port.
Definition Services.h:277
virtual void start()
In cases where necessary, enable host memory services.
Definition Services.h:247
A "slice" of some parent MMIO space.
Definition Services.h:173
Information about the Accelerator system.
Definition Services.h:113
A telemetry port which gets attached to a service port.
Definition Services.h:393
void connect()
Connect to a particular telemetry port. Offset should be non-nullopt.
Definition Services.cpp:378
static void * alignedAllocCompat(std::size_t alignment, std::size_t size)
static void hostmemWriteTest(Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width)
Test the hostmem write functionality.
static void aggregateHostmemBandwidthTest(AcceleratorConnection *, Accelerator *, uint32_t width, uint32_t xferCount, bool read, bool write)
static void dmaTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool read, bool write)
static void hostmemBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, uint32_t xferCount, const std::vector< uint32_t > &widths, bool read, bool write)
static void callbackTest(AcceleratorConnection *, Accelerator *, uint32_t iterations)
static void bandwidthTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, uint32_t xferCount, bool read, bool write)
constexpr std::array< uint32_t, 5 > defaultWidths
Definition esitester.cpp:72
static void hostmemReadBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width, uint32_t xferCount)
static void bandwidthReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static std::string formatBandwidth(double bytesPerSec)
Definition esitester.cpp:84
static void hostmemWriteBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width, uint32_t xferCount)
static void alignedFreeCompat(void *ptr)
static void dmaWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void bandwidthWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static std::string humanBytes(uint64_t bytes)
static void streamingAddTest(AcceleratorConnection *, Accelerator *, uint32_t addAmt, uint32_t numItems)
Test the StreamingAdder module.
static void loopbackAddTest(AcceleratorConnection *, Accelerator *, uint32_t iterations, bool pipeline)
static void dmaReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void streamingAddTranslatedTest(AcceleratorConnection *, Accelerator *, uint32_t addAmt, uint32_t numItems)
static void hostmemTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool write, bool read)
static std::string humanTimeUS(uint64_t us)
int main(int argc, const char *argv[])
static void coordTranslateTest(AcceleratorConnection *, Accelerator *, uint32_t xTrans, uint32_t yTrans, uint32_t numCoords)
static std::string defaultWidthsStr()
Definition esitester.cpp:73
static void hostmemReadTest(Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width)
Definition debug.py:1
Definition esi.py:1
std::string toString(const std::any &a)
'Stringify' a std::any. This is used to log std::any values by some loggers.
Definition Logging.cpp:132
std::string toHex(void *val)
Definition Common.cpp:37
Translated argument struct for CoordTranslator.
std::span< const Coord > coordsSpan() const
const Coord * coords() const
static size_t allocSize(size_t numCoords)
Coord * coords()
Get pointer to trailing coords array.
std::span< Coord > coordsSpan()
Get span view of coords (requires coordsLength to be set first).
Translated result struct for CoordTranslator.
static size_t allocSize(size_t numCoords)
std::span< Coord > coordsSpan()
Get span view of coords (requires coordsLength to be set first).
const Coord * coords() const
Coord * coords()
Get pointer to trailing coords array.
std::span< const Coord > coordsSpan() const
Test the CoordTranslator module using message translation.
uint32_t x
uint32_t y
Packed struct representing a parallel window argument for StreamingAdder.
Packed struct representing a parallel window result for StreamingAdder.
Test the StreamingAdder module using message translation.
uint32_t * inputData()
Get pointer to trailing input data array.
static size_t allocSize(size_t numItems)
std::span< uint32_t > inputDataSpan()
Get span view of input data (requires inputLength to be set first).
std::span< const uint32_t > inputDataSpan() const
const uint32_t * inputData() const
Translated result struct for StreamingAdder.
uint32_t * data()
Get pointer to trailing result data array.
std::span< uint32_t > dataSpan()
Get span view of result data (requires dataLength to be set first).
static size_t allocSize(size_t numItems)
std::span< const uint32_t > dataSpan() const
const uint32_t * data() const
RAII memory region for host memory.
Definition Services.h:223
virtual void * getDevicePtr() const
Sometimes the pointer the device sees is different from the pointer the host sees.
Definition Services.h:229
virtual void * getPtr() const =0
Get a pointer to the host memory.
virtual void flush()
Flush the memory region to ensure that the device sees the latest contents.
Definition Services.h:237
virtual std::size_t getSize() const =0