CIRCT 23.0.0git
Loading...
Searching...
No Matches
esitester.cpp
Go to the documentation of this file.
1//===- esitester.cpp - ESI accelerator test/example tool ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// DO NOT EDIT!
10// This file is distributed as part of an ESI runtime package. The source for
11// this file should always be modified within CIRCT
12// (lib/dialect/ESI/runtime/cpp/tools/esitester.cpp).
13//
14//===----------------------------------------------------------------------===//
15//
16// This application isn't a utility so much as a test driver for an ESI system.
17// It is also useful as an example of how to use the ESI C++ API. esiquery.cpp
18// is also useful as an example.
19//
20//===----------------------------------------------------------------------===//
21
22#include "esi/Accelerator.h"
23#include "esi/CLI.h"
24#include "esi/Manifest.h"
25#include "esi/Services.h"
26#include "esi/TypedPorts.h"
27
28#include <atomic>
29#include <chrono>
30#include <cstdlib>
31#include <future>
32#include <iostream>
33#include <map>
34#include <memory>
35#include <random>
36#include <span>
37#include <sstream>
38#include <stdexcept>
39#include <vector>
40
41using namespace esi;
42
43// Forward declarations of test functions.
45 uint32_t iterations);
47 const std::vector<uint32_t> &widths, bool write,
48 bool read);
50 uint32_t xferCount,
51 const std::vector<uint32_t> &widths, bool read,
52 bool write);
54 const std::vector<uint32_t> &widths, bool read, bool write);
56 const std::vector<uint32_t> &widths,
57 uint32_t xferCount, bool read, bool write);
59 uint32_t iterations, bool pipeline);
61 Accelerator *, uint32_t width,
62 uint32_t xferCount, bool read,
63 bool write);
65 uint32_t addAmt, uint32_t numItems);
67 uint32_t addAmt, uint32_t numItems);
69 uint32_t xTrans, uint32_t yTrans,
70 uint32_t numCoords);
72 uint32_t xTrans, uint32_t yTrans,
73 uint32_t numCoords, size_t batchSizeLimit);
75 uint32_t iterations);
76
77// Default widths and default widths string for CLI help text.
78constexpr std::array<uint32_t, 5> defaultWidths = {32, 64, 128, 256, 512};
79static std::string defaultWidthsStr() {
80 std::string s;
81 for (size_t i = 0; i < defaultWidths.size(); ++i) {
82 s += std::to_string(defaultWidths[i]);
83 if (i + 1 < defaultWidths.size())
84 s += ",";
85 }
86 return s;
87}
88
89// Helper to format bandwidth with appropriate units.
90static std::string formatBandwidth(double bytesPerSec) {
91 const char *unit = "B/s";
92 double value = bytesPerSec;
93 if (bytesPerSec >= 1e9) {
94 unit = "GB/s";
95 value = bytesPerSec / 1e9;
96 } else if (bytesPerSec >= 1e6) {
97 unit = "MB/s";
98 value = bytesPerSec / 1e6;
99 } else if (bytesPerSec >= 1e3) {
100 unit = "KB/s";
101 value = bytesPerSec / 1e3;
102 }
103 std::ostringstream oss;
104 oss.setf(std::ios::fixed);
105 oss.precision(2);
106 oss << value << " " << unit;
107 return oss.str();
108}
109
110// Human-readable size from bytes.
111static std::string humanBytes(uint64_t bytes) {
112 const char *units[] = {"B", "KB", "MB", "GB", "TB"};
113 double v = (double)bytes;
114 int u = 0;
115 while (v >= 1024.0 && u < 4) {
116 v /= 1024.0;
117 ++u;
118 }
119 std::ostringstream oss;
120 oss.setf(std::ios::fixed);
121 oss.precision(u == 0 ? 0 : 2);
122 oss << v << " " << units[u];
123 return oss.str();
124}
125
126// Human-readable time from microseconds.
127static std::string humanTimeUS(uint64_t us) {
128 if (us < 1000)
129 return std::to_string(us) + " us";
130 double ms = us / 1000.0;
131 if (ms < 1000.0) {
132 std::ostringstream oss;
133 oss.setf(std::ios::fixed);
134 oss.precision(ms < 10.0 ? 2 : (ms < 100.0 ? 1 : 0));
135 oss << ms << " ms";
136 return oss.str();
137 }
138 double sec = ms / 1000.0;
139 std::ostringstream oss;
140 oss.setf(std::ios::fixed);
141 oss.precision(sec < 10.0 ? 3 : 2);
142 oss << sec << " s";
143 return oss.str();
144}
145
146// MSVC does not implement std::aligned_malloc, even though it's part of the
147// C++17 standard. Provide a compatibility layer.
148static void *alignedAllocCompat(std::size_t alignment, std::size_t size) {
149#if defined(_MSC_VER)
150 void *ptr = _aligned_malloc(size, alignment);
151 if (!ptr)
152 throw std::bad_alloc();
153 return ptr;
154#else
155 void *ptr = std::aligned_alloc(alignment, size);
156 if (!ptr)
157 throw std::bad_alloc();
158 return ptr;
159#endif
160}
161
162static void alignedFreeCompat(void *ptr) {
163#if defined(_MSC_VER)
164 _aligned_free(ptr);
165#else
166 std::free(ptr);
167#endif
168}
169
170int main(int argc, const char *argv[]) {
171 CliParser cli("esitester");
172 cli.description("Test an ESI system running the ESI tester image.");
173 cli.require_subcommand(1);
174
175 CLI::App *callback_test =
176 cli.add_subcommand("callback", "initiate callback test");
177 uint32_t cb_iters = 1;
178 callback_test->add_option("-i,--iters", cb_iters,
179 "Number of iterations to run");
180
181 CLI::App *hostmemtestSub =
182 cli.add_subcommand("hostmem", "Run the host memory test");
183 bool hmRead = false;
184 bool hmWrite = false;
185 std::vector<uint32_t> hostmemWidths(defaultWidths.begin(),
186 defaultWidths.end());
187 hostmemtestSub->add_flag("-w,--write", hmWrite,
188 "Enable host memory write test");
189 hostmemtestSub->add_flag("-r,--read", hmRead, "Enable host memory read test");
190 hostmemtestSub->add_option(
191 "--widths", hostmemWidths,
192 "Hostmem test widths (default: " + defaultWidthsStr() + ")");
193
194 CLI::App *dmatestSub = cli.add_subcommand("dma", "Run the DMA test");
195 bool dmaRead = false;
196 bool dmaWrite = false;
197 std::vector<uint32_t> dmaWidths(defaultWidths.begin(), defaultWidths.end());
198 dmatestSub->add_flag("-w,--write", dmaWrite, "Enable dma write test");
199 dmatestSub->add_flag("-r,--read", dmaRead, "Enable dma read test");
200 dmatestSub->add_option("--widths", dmaWidths,
201 "DMA test widths (default: " + defaultWidthsStr() +
202 ")");
203
204 CLI::App *bandwidthSub =
205 cli.add_subcommand("bandwidth", "Run the bandwidth test");
206 uint32_t xferCount = 1000;
207 bandwidthSub->add_option("-c,--count", xferCount,
208 "Number of transfers to perform");
209 bool bandwidthRead = false;
210 bool bandwidthWrite = false;
211 std::vector<uint32_t> bandwidthWidths(defaultWidths.begin(),
212 defaultWidths.end());
213 bandwidthSub->add_option("--widths", bandwidthWidths,
214 "Width of the transfers to perform (default: " +
215 defaultWidthsStr() + ")");
216 bandwidthSub->add_flag("-w,--write", bandwidthWrite,
217 "Enable bandwidth write");
218 bandwidthSub->add_flag("-r,--read", bandwidthRead, "Enable bandwidth read");
219
220 CLI::App *hostmembwSub =
221 cli.add_subcommand("hostmembw", "Run the host memory bandwidth test");
222 uint32_t hmBwCount = 1000;
223 bool hmBwRead = false;
224 bool hmBwWrite = false;
225 std::vector<uint32_t> hmBwWidths(defaultWidths.begin(), defaultWidths.end());
226 hostmembwSub->add_option("-c,--count", hmBwCount,
227 "Number of hostmem transfers");
228 hostmembwSub->add_option(
229 "--widths", hmBwWidths,
230 "Hostmem bandwidth widths (default: " + defaultWidthsStr() + ")");
231 hostmembwSub->add_flag("-w,--write", hmBwWrite,
232 "Measure hostmem write bandwidth");
233 hostmembwSub->add_flag("-r,--read", hmBwRead,
234 "Measure hostmem read bandwidth");
235
236 CLI::App *loopbackSub =
237 cli.add_subcommand("loopback", "Test LoopbackInOutAdd function service");
238 uint32_t loopbackIters = 10;
239 bool loopbackPipeline = false;
240 loopbackSub->add_option("-i,--iters", loopbackIters,
241 "Number of function invocations (default 10)");
242 loopbackSub->add_flag("-p,--pipeline", loopbackPipeline,
243 "Pipeline all calls then collect results");
244
245 CLI::App *aggBwSub = cli.add_subcommand(
246 "aggbandwidth",
247 "Aggregate hostmem bandwidth across four units (readmem*, writemem*)");
248 uint32_t aggWidth = 512;
249 uint32_t aggCount = 1000;
250 bool aggRead = false;
251 bool aggWrite = false;
252 aggBwSub->add_option(
253 "--width", aggWidth,
254 "Bit width (default 512; other widths ignored if absent)");
255 aggBwSub->add_option("-c,--count", aggCount, "Flits per unit (default 1000)");
256 aggBwSub->add_flag("-r,--read", aggRead, "Include read units");
257 aggBwSub->add_flag("-w,--write", aggWrite, "Include write units");
258
259 CLI::App *streamingAddSub = cli.add_subcommand(
260 "streaming_add", "Test StreamingAdder function service with list input");
261 uint32_t streamingAddAmt = 5;
262 uint32_t streamingNumItems = 5;
263 bool streamingTranslate = false;
264 streamingAddSub->add_option("-a,--add", streamingAddAmt,
265 "Amount to add to each element (default 5)");
266 streamingAddSub->add_option("-n,--num-items", streamingNumItems,
267 "Number of random items in the list (default 5)");
268 streamingAddSub->add_flag("-t,--translate", streamingTranslate,
269 "Use message translation (list translation)");
270
271 CLI::App *coordTranslateSub = cli.add_subcommand(
272 "translate_coords",
273 "Test CoordTranslator function service with list of coordinates");
274 uint32_t coordXTrans = 10;
275 uint32_t coordYTrans = 20;
276 uint32_t coordNumItems = 5;
277 coordTranslateSub->add_option("-x,--x-translation", coordXTrans,
278 "X translation amount (default 10)");
279 coordTranslateSub->add_option("-y,--y-translation", coordYTrans,
280 "Y translation amount (default 20)");
281 coordTranslateSub->add_option("-n,--num-coords", coordNumItems,
282 "Number of random coordinates (default 5)");
283
284 CLI::App *serialCoordTranslateSub = cli.add_subcommand(
285 "serial_coords",
286 "Test SerialCoordTranslator function service with list of coordinates");
287 uint32_t serialBatchSize = 240;
288 serialCoordTranslateSub->add_option("-x,--x-translation", coordXTrans,
289 "X translation amount (default 10)");
290 serialCoordTranslateSub->add_option("-y,--y-translation", coordYTrans,
291 "Y translation amount (default 20)");
292 serialCoordTranslateSub->add_option(
293 "-n,--num-coords", coordNumItems,
294 "Number of random coordinates (default 5)");
295 serialCoordTranslateSub
296 ->add_option("-b,--batch-size", serialBatchSize,
297 "Coordinates per header (default 240, max 65535)")
298 ->check(CLI::Range(1u, 0xFFFFu));
299
300 CLI::App *channelTestSub = cli.add_subcommand(
301 "channel", "Test ChannelService to_host and from_host");
302 uint32_t channelIters = 10;
303 channelTestSub->add_option("-i,--iters", channelIters,
304 "Number of loopback iterations (default 10)");
305
306 if (int rc = cli.esiParse(argc, argv))
307 return rc;
308 if (!cli.get_help_ptr()->empty())
309 return 0;
310
311 Context &ctxt = cli.getContext();
312 AcceleratorConnection *acc = cli.connect();
313 try {
314 const auto &info = *acc->getService<services::SysInfo>();
315 ctxt.getLogger().info("esitester", "Connected to accelerator.");
316 Manifest manifest(ctxt, info.getJsonManifest());
317 Accelerator *accel = manifest.buildAccelerator(*acc);
318 ctxt.getLogger().info("esitester", "Built accelerator.");
319 acc->getServiceThread()->addPoll(*accel);
320
321 if (*callback_test) {
322 callbackTest(acc, accel, cb_iters);
323 } else if (*hostmemtestSub) {
324 hostmemTest(acc, accel, hostmemWidths, hmWrite, hmRead);
325 } else if (*loopbackSub) {
326 loopbackAddTest(acc, accel, loopbackIters, loopbackPipeline);
327 } else if (*dmatestSub) {
328 dmaTest(acc, accel, dmaWidths, dmaRead, dmaWrite);
329 } else if (*bandwidthSub) {
330 bandwidthTest(acc, accel, bandwidthWidths, xferCount, bandwidthRead,
331 bandwidthWrite);
332 } else if (*hostmembwSub) {
333 hostmemBandwidthTest(acc, accel, hmBwCount, hmBwWidths, hmBwRead,
334 hmBwWrite);
335 } else if (*aggBwSub) {
336 aggregateHostmemBandwidthTest(acc, accel, aggWidth, aggCount, aggRead,
337 aggWrite);
338 } else if (*streamingAddSub) {
339 if (streamingTranslate)
340 streamingAddTranslatedTest(acc, accel, streamingAddAmt,
341 streamingNumItems);
342 else
343 streamingAddTest(acc, accel, streamingAddAmt, streamingNumItems);
344 } else if (*coordTranslateSub) {
345 coordTranslateTest(acc, accel, coordXTrans, coordYTrans, coordNumItems);
346 } else if (*serialCoordTranslateSub) {
347 serialCoordTranslateTest(acc, accel, coordXTrans, coordYTrans,
348 coordNumItems, serialBatchSize);
349 } else if (*channelTestSub) {
350 channelTest(acc, accel, channelIters);
351 }
352
353 acc->disconnect();
354 } catch (std::exception &e) {
355 ctxt.getLogger().error("esitester", e.what());
356 acc->disconnect();
357 return -1;
358 }
359 std::cout << "Exiting successfully\n";
360 return 0;
361}
362
364 uint32_t iterations) {
365 auto cb_test = accel->getChildren().find(AppID("cb_test"));
366 if (cb_test == accel->getChildren().end())
367 throw std::runtime_error("No cb_test child found in accelerator");
368 auto &ports = cb_test->second->getPorts();
369 auto cmd_port = ports.find(AppID("cmd"));
370 if (cmd_port == ports.end())
371 throw std::runtime_error("No cmd port found in cb_test child");
372 auto *cmdMMIO = cmd_port->second.getAs<services::MMIO::MMIORegion>();
373 if (!cmdMMIO)
374 throw std::runtime_error("cb_test cmd port is not MMIO");
375
376 auto f = ports.find(AppID("cb"));
377 if (f == ports.end())
378 throw std::runtime_error("No cb port found in accelerator");
379
380 auto *callPort = f->second.getAs<services::CallService::Callback>();
381 if (!callPort)
382 throw std::runtime_error("cb port is not a CallService::Callback");
383
384 std::atomic<uint32_t> callbackCount = 0;
385 callPort->connect(
386 [conn, &callbackCount](const MessageData &data) mutable -> MessageData {
387 callbackCount.fetch_add(1);
388 conn->getLogger().debug(
389 [&](std::string &subsystem, std::string &msg,
390 std::unique_ptr<std::map<std::string, std::any>> &details) {
391 subsystem = "ESITESTER";
392 msg = "Received callback";
393 details = std::make_unique<std::map<std::string, std::any>>();
394 details->emplace("data", data);
395 });
396 std::cout << "callback: " << *data.as<uint64_t>() << std::endl;
397 return MessageData();
398 },
399 true);
400
401 for (uint32_t i = 0; i < iterations; ++i) {
402 conn->getLogger().info("esitester", "Issuing callback command iteration " +
403 std::to_string(i) + "/" +
404 std::to_string(iterations));
405 cmdMMIO->write(0x10, i); // Command the callback
406 // Wait up to 1 second for the callback to be invoked.
407 for (uint32_t wait = 0; wait < 1000; ++wait) {
408 if (callbackCount.load() > i)
409 break;
410 std::this_thread::sleep_for(std::chrono::milliseconds(1));
411 }
412 if (callbackCount.load() <= i)
413 throw std::runtime_error("Callback test failed. No callback received");
414 }
415}
416
417/// Test the hostmem write functionality.
420 uint32_t width) {
421 std::cout << "Running hostmem WRITE test with width " << width << std::endl;
422 uint64_t *dataPtr = static_cast<uint64_t *>(region.getPtr());
423 auto check = [&](bool print) {
424 bool ret = true;
425 for (size_t i = 0; i < 9; ++i) {
426 if (print)
427 printf("[write] dataPtr[%zu] = 0x%016lx\n", i, dataPtr[i]);
428 if (i < (width + 63) / 64 && dataPtr[i] == 0xFFFFFFFFFFFFFFFFull)
429 ret = false;
430 }
431 return ret;
432 };
433
434 auto writeMemChildIter = acc->getChildren().find(AppID("writemem", width));
435 if (writeMemChildIter == acc->getChildren().end())
436 throw std::runtime_error(
437 "hostmem write test failed. No writemem child found");
438 auto &writeMemPorts = writeMemChildIter->second->getPorts();
439
440 auto cmdPortIter = writeMemPorts.find(AppID("cmd", width));
441 if (cmdPortIter == writeMemPorts.end())
442 throw std::runtime_error(
443 "hostmem write test failed. No (cmd,width) MMIO port");
444 auto *cmdMMIO = cmdPortIter->second.getAs<services::MMIO::MMIORegion>();
445 if (!cmdMMIO)
446 throw std::runtime_error(
447 "hostmem write test failed. (cmd,width) port not MMIO");
448
449 auto issuedPortIter = writeMemPorts.find(AppID("addrCmdIssued"));
450 if (issuedPortIter == writeMemPorts.end())
451 throw std::runtime_error(
452 "hostmem write test failed. addrCmdIssued missing");
453 auto *addrCmdIssuedPort =
454 issuedPortIter->second.getAs<services::TelemetryService::Metric>();
455 if (!addrCmdIssuedPort)
456 throw std::runtime_error(
457 "hostmem write test failed. addrCmdIssued not telemetry");
458 addrCmdIssuedPort->connect();
459
460 auto responsesPortIter = writeMemPorts.find(AppID("addrCmdResponses"));
461 if (responsesPortIter == writeMemPorts.end())
462 throw std::runtime_error(
463 "hostmem write test failed. addrCmdResponses missing");
464 auto *addrCmdResponsesPort =
465 responsesPortIter->second.getAs<services::TelemetryService::Metric>();
466 if (!addrCmdResponsesPort)
467 throw std::runtime_error(
468 "hostmem write test failed. addrCmdResponses not telemetry");
469 addrCmdResponsesPort->connect();
470
471 for (size_t i = 0, e = 9; i < e; ++i)
472 dataPtr[i] = 0xFFFFFFFFFFFFFFFFull;
473 region.flush();
474 cmdMMIO->write(0x10, reinterpret_cast<uint64_t>(region.getDevicePtr()));
475 cmdMMIO->write(0x18, 1);
476 cmdMMIO->write(0x20, 1);
477 bool done = false;
478 for (int i = 0; i < 100; ++i) {
479 auto issued = addrCmdIssuedPort->readInt();
480 auto responses = addrCmdResponsesPort->readInt();
481 if (issued == 1 && responses == 1) {
482 done = true;
483 break;
484 }
485 std::this_thread::sleep_for(std::chrono::microseconds(100));
486 }
487 if (!done) {
488 check(true);
489 throw std::runtime_error("hostmem write test (" + std::to_string(width) +
490 " bits) timeout waiting for completion");
491 }
492 if (!check(true))
493 throw std::runtime_error("hostmem write test failed (" +
494 std::to_string(width) + " bits)");
495}
496
499 uint32_t width) {
500 std::cout << "Running hostmem READ test with width " << width << std::endl;
501 auto readMemChildIter = acc->getChildren().find(AppID("readmem", width));
502 if (readMemChildIter == acc->getChildren().end())
503 throw std::runtime_error(
504 "hostmem read test failed. No readmem child found");
505
506 auto &readMemPorts = readMemChildIter->second->getPorts();
507 auto addrCmdPortIter = readMemPorts.find(AppID("cmd", width));
508 if (addrCmdPortIter == readMemPorts.end())
509 throw std::runtime_error(
510 "hostmem read test failed. No AddressCommand MMIO port");
511 auto *addrCmdMMIO =
512 addrCmdPortIter->second.getAs<services::MMIO::MMIORegion>();
513 if (!addrCmdMMIO)
514 throw std::runtime_error(
515 "hostmem read test failed. AddressCommand port not MMIO");
516
517 auto lastReadPortIter = readMemPorts.find(AppID("lastReadLSB"));
518 if (lastReadPortIter == readMemPorts.end())
519 throw std::runtime_error("hostmem read test failed. lastReadLSB missing");
520 auto *lastReadPort =
521 lastReadPortIter->second.getAs<services::TelemetryService::Metric>();
522 if (!lastReadPort)
523 throw std::runtime_error(
524 "hostmem read test failed. lastReadLSB not telemetry");
525 lastReadPort->connect();
526
527 auto issuedPortIter = readMemPorts.find(AppID("addrCmdIssued"));
528 if (issuedPortIter == readMemPorts.end())
529 throw std::runtime_error("hostmem read test failed. addrCmdIssued missing");
530 auto *addrCmdIssuedPort =
531 issuedPortIter->second.getAs<services::TelemetryService::Metric>();
532 if (!addrCmdIssuedPort)
533 throw std::runtime_error(
534 "hostmem read test failed. addrCmdIssued not telemetry");
535 addrCmdIssuedPort->connect();
536
537 auto responsesPortIter = readMemPorts.find(AppID("addrCmdResponses"));
538 if (responsesPortIter == readMemPorts.end())
539 throw std::runtime_error(
540 "hostmem read test failed. addrCmdResponses missing");
541 auto *addrCmdResponsesPort =
542 responsesPortIter->second.getAs<services::TelemetryService::Metric>();
543 if (!addrCmdResponsesPort)
544 throw std::runtime_error(
545 "hostmem read test failed. addrCmdResponses not telemetry");
546 addrCmdResponsesPort->connect();
547
548 for (size_t i = 0; i < 8; ++i) {
549 auto *dataPtr = static_cast<uint64_t *>(region.getPtr());
550 dataPtr[0] = 0x12345678ull << i;
551 dataPtr[1] = 0xDEADBEEFull << i;
552 region.flush();
553 addrCmdMMIO->write(0x10, reinterpret_cast<uint64_t>(region.getDevicePtr()));
554 addrCmdMMIO->write(0x18, 1);
555 addrCmdMMIO->write(0x20, 1);
556 bool done = false;
557 for (int waitLoop = 0; waitLoop < 100; ++waitLoop) {
558 auto issued = addrCmdIssuedPort->readInt();
559 auto responses = addrCmdResponsesPort->readInt();
560 if (issued == 1 && responses == 1) {
561 done = true;
562 break;
563 }
564 std::this_thread::sleep_for(std::chrono::milliseconds(10));
565 }
566 if (!done)
567 throw std::runtime_error("hostmem read (" + std::to_string(width) +
568 " bits) timeout waiting for completion");
569 uint64_t captured = lastReadPort->readInt();
570 uint64_t expected = dataPtr[0];
571 if (width < 64)
572 expected &= ((1ull << width) - 1);
573 if (captured != expected)
574 throw std::runtime_error("hostmem read test (" + std::to_string(width) +
575 " bits) failed. Expected " +
576 esi::toHex(expected) + ", got " +
577 esi::toHex(captured));
578 }
579}
580
582 const std::vector<uint32_t> &widths, bool write,
583 bool read) {
584 // Enable the host memory service.
585 auto hostmem = conn->getService<services::HostMem>();
586 hostmem->start();
587 auto scratchRegion = hostmem->allocate(/*size(bytes)=*/1024 * 1024,
588 /*memOpts=*/{.writeable = true});
589 uint64_t *dataPtr = static_cast<uint64_t *>(scratchRegion->getPtr());
590 conn->getLogger().info("esitester",
591 "Running host memory test with region size " +
592 std::to_string(scratchRegion->getSize()) +
593 " bytes at 0x" + toHex(dataPtr));
594 for (size_t i = 0; i < scratchRegion->getSize() / 8; ++i)
595 dataPtr[i] = 0;
596 scratchRegion->flush();
597
598 bool passed = true;
599 for (size_t width : widths) {
600 try {
601 if (write)
602 hostmemWriteTest(acc, *scratchRegion, width);
603 if (read)
604 hostmemReadTest(acc, *scratchRegion, width);
605 } catch (std::exception &e) {
606 conn->getLogger().error("esitester", "Hostmem test failed for width " +
607 std::to_string(width) + ": " +
608 e.what());
609 passed = false;
610 }
611 }
612 if (!passed)
613 throw std::runtime_error("Hostmem test failed");
614 std::cout << "Hostmem test passed" << std::endl;
615}
616
618 size_t width) {
619 Logger &logger = conn->getLogger();
620 logger.info("esitester",
621 "== Running DMA read test with width " + std::to_string(width));
622 AppIDPath lastPath;
623 BundlePort *toHostMMIOPort =
624 acc->resolvePort({AppID("tohostdma", width), AppID("cmd")}, lastPath);
625 if (!toHostMMIOPort)
626 throw std::runtime_error("dma read test failed. No tohostdma[" +
627 std::to_string(width) + "] found");
628 auto *toHostMMIO = toHostMMIOPort->getAs<services::MMIO::MMIORegion>();
629 if (!toHostMMIO)
630 throw std::runtime_error("dma read test failed. MMIO port is not MMIO");
631 lastPath.clear();
632 BundlePort *outPortBundle =
633 acc->resolvePort({AppID("tohostdma", width), AppID("out")}, lastPath);
634 ReadChannelPort &outPort = outPortBundle->getRawRead("data");
635 outPort.connect();
636
637 size_t xferCount = 24;
638 uint64_t last = 0;
639 MessageData data;
640 toHostMMIO->write(0, xferCount);
641 for (size_t i = 0; i < xferCount; ++i) {
642 outPort.read(data);
643 if (width == 64) {
644 uint64_t val = *data.as<uint64_t>();
645 if (val < last)
646 throw std::runtime_error("dma read test failed. Out of order data");
647 last = val;
648 }
649 logger.debug("esitester",
650 "Cycle count [" + std::to_string(i) + "] = 0x" + data.toHex());
651 }
652 outPort.disconnect();
653 std::cout << " DMA read test for " << width << " bits passed" << std::endl;
654}
655
657 size_t width) {
658 Logger &logger = conn->getLogger();
659 logger.info("esitester",
660 "Running DMA write test with width " + std::to_string(width));
661 AppIDPath lastPath;
662 BundlePort *fromHostMMIOPort =
663 acc->resolvePort({AppID("fromhostdma", width), AppID("cmd")}, lastPath);
664 if (!fromHostMMIOPort)
665 throw std::runtime_error("dma read test for " + toString(width) +
666 " bits failed. No fromhostdma[" +
667 std::to_string(width) + "] found");
668 auto *fromHostMMIO = fromHostMMIOPort->getAs<services::MMIO::MMIORegion>();
669 if (!fromHostMMIO)
670 throw std::runtime_error("dma write test for " + toString(width) +
671 " bits failed. MMIO port is not MMIO");
672 lastPath.clear();
673 BundlePort *outPortBundle =
674 acc->resolvePort({AppID("fromhostdma", width), AppID("in")}, lastPath);
675 if (!outPortBundle)
676 throw std::runtime_error("dma write test for " + toString(width) +
677 " bits failed. No out port found");
678 WriteChannelPort &writePort = outPortBundle->getRawWrite("data");
680
681 size_t xferCount = 24;
682 uint8_t *data = new uint8_t[width];
683 for (size_t i = 0; i < width / 8; ++i)
684 data[i] = 0;
685 fromHostMMIO->read(8);
686 fromHostMMIO->write(0, xferCount);
687 for (size_t i = 1; i < xferCount + 1; ++i) {
688 data[0] = i;
689 bool successWrite;
690 size_t attempts = 0;
691 do {
692 successWrite = writePort.tryWrite(MessageData(data, width / 8));
693 if (!successWrite) {
694 std::this_thread::sleep_for(std::chrono::milliseconds(10));
695 }
696 } while (!successWrite && ++attempts < 100);
697 if (!successWrite)
698 throw std::runtime_error("dma write test for " + toString(width) +
699 " bits failed. Write failed");
700 uint64_t lastReadMMIO;
701 for (size_t a = 0; a < 20; ++a) {
702 lastReadMMIO = fromHostMMIO->read(8);
703 if (lastReadMMIO == i)
704 break;
705 std::this_thread::sleep_for(std::chrono::milliseconds(10));
706 if (a >= 19)
707 throw std::runtime_error("dma write for " + toString(width) +
708 " bits test failed. Read from MMIO failed");
709 }
710 }
711 writePort.disconnect();
712 delete[] data;
713 std::cout << " DMA write test for " << width << " bits passed" << std::endl;
714}
715
717 const std::vector<uint32_t> &widths, bool read,
718 bool write) {
719 bool success = true;
720 if (write)
721 for (size_t width : widths)
722 try {
723 dmaWriteTest(conn, acc, width);
724 } catch (std::exception &e) {
725 success = false;
726 std::cerr << "DMA write test for " << width
727 << " bits failed: " << e.what() << std::endl;
728 }
729 if (read)
730 for (size_t width : widths)
731 dmaReadTest(conn, acc, width);
732 if (!success)
733 throw std::runtime_error("DMA test failed");
734 std::cout << "DMA test passed" << std::endl;
735}
736
737//
738// DMA bandwidth test
739//
740
742 size_t width, size_t xferCount) {
743
744 AppIDPath lastPath;
745 BundlePort *toHostMMIOPort =
746 acc->resolvePort({AppID("tohostdma", width), AppID("cmd")}, lastPath);
747 if (!toHostMMIOPort)
748 throw std::runtime_error("bandwidth test failed. No tohostdma[" +
749 std::to_string(width) + "] found");
750 auto *toHostMMIO = toHostMMIOPort->getAs<services::MMIO::MMIORegion>();
751 if (!toHostMMIO)
752 throw std::runtime_error("bandwidth test failed. MMIO port is not MMIO");
753 lastPath.clear();
754 BundlePort *outPortBundle =
755 acc->resolvePort({AppID("tohostdma", width), AppID("out")}, lastPath);
756 ReadChannelPort &outPort = outPortBundle->getRawRead("data");
757 outPort.connect();
758
759 Logger &logger = conn->getLogger();
760 logger.info("esitester", "Starting read bandwidth test with " +
761 std::to_string(xferCount) + " x " +
762 std::to_string(width) + " bit transfers");
763 MessageData data;
764 auto start = std::chrono::high_resolution_clock::now();
765 toHostMMIO->write(0, xferCount);
766 for (size_t i = 0; i < xferCount; ++i) {
767 outPort.read(data);
768 logger.debug(
769 [i, &data](std::string &subsystem, std::string &msg,
770 std::unique_ptr<std::map<std::string, std::any>> &details) {
771 subsystem = "esitester";
772 msg = "Cycle count [" + std::to_string(i) + "] = 0x" + data.toHex();
773 });
774 }
775 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
776 std::chrono::high_resolution_clock::now() - start);
777 double bytesPerSec =
778 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
779 logger.info("esitester",
780 " Bandwidth test: " + std::to_string(xferCount) + " x " +
781 std::to_string(width) + " bit transfers in " +
782 std::to_string(duration.count()) + " microseconds");
783 logger.info("esitester", " bandwidth: " + formatBandwidth(bytesPerSec));
784}
785
787 size_t width, size_t xferCount) {
788
789 AppIDPath lastPath;
790 BundlePort *fromHostMMIOPort =
791 acc->resolvePort({AppID("fromhostdma", width), AppID("cmd")}, lastPath);
792 if (!fromHostMMIOPort)
793 throw std::runtime_error("bandwidth test failed. No fromhostdma[" +
794 std::to_string(width) + "] found");
795 auto *fromHostMMIO = fromHostMMIOPort->getAs<services::MMIO::MMIORegion>();
796 if (!fromHostMMIO)
797 throw std::runtime_error("bandwidth test failed. MMIO port is not MMIO");
798 lastPath.clear();
799 BundlePort *inPortBundle =
800 acc->resolvePort({AppID("fromhostdma", width), AppID("in")}, lastPath);
801 WriteChannelPort &outPort = inPortBundle->getRawWrite("data");
802 outPort.connect();
803
804 Logger &logger = conn->getLogger();
805 logger.info("esitester", "Starting write bandwidth test with " +
806 std::to_string(xferCount) + " x " +
807 std::to_string(width) + " bit transfers");
808 std::vector<uint8_t> dataVec(width / 8);
809 for (size_t i = 0; i < width / 8; ++i)
810 dataVec[i] = i;
811 MessageData data(dataVec);
812 auto start = std::chrono::high_resolution_clock::now();
813 fromHostMMIO->write(0, xferCount);
814 for (size_t i = 0; i < xferCount; ++i) {
815 outPort.write(data);
816 logger.debug(
817 [i, &data](std::string &subsystem, std::string &msg,
818 std::unique_ptr<std::map<std::string, std::any>> &details) {
819 subsystem = "esitester";
820 msg = "Cycle count [" + std::to_string(i) + "] = 0x" + data.toHex();
821 });
822 }
823 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
824 std::chrono::high_resolution_clock::now() - start);
825 double bytesPerSec =
826 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
827 logger.info("esitester",
828 " Bandwidth test: " + std::to_string(xferCount) + " x " +
829 std::to_string(width) + " bit transfers in " +
830 std::to_string(duration.count()) + " microseconds");
831 logger.info("esitester", " bandwidth: " + formatBandwidth(bytesPerSec));
832}
833
835 const std::vector<uint32_t> &widths,
836 uint32_t xferCount, bool read, bool write) {
837 if (read)
838 for (uint32_t w : widths)
839 bandwidthReadTest(conn, acc, w, xferCount);
840 if (write)
841 for (uint32_t w : widths)
842 bandwidthWriteTest(conn, acc, w, xferCount);
843}
844
845//
846// Hostmem bandwidth test
847//
848
849static void
852 uint32_t width, uint32_t xferCount) {
853 Logger &logger = conn->getLogger();
854 logger.info("esitester", "Starting hostmem WRITE bandwidth test: " +
855 std::to_string(xferCount) + " x " +
856 std::to_string(width) + " bits");
857
858 auto writeMemChildIter = acc->getChildren().find(AppID("writemem", width));
859 if (writeMemChildIter == acc->getChildren().end())
860 throw std::runtime_error("hostmem write bandwidth: writemem child missing");
861 auto &writeMemPorts = writeMemChildIter->second->getPorts();
862
863 auto cmdPortIter = writeMemPorts.find(AppID("cmd", width));
864 if (cmdPortIter == writeMemPorts.end())
865 throw std::runtime_error("hostmem write bandwidth: cmd MMIO missing");
866 auto *cmdMMIO = cmdPortIter->second.getAs<services::MMIO::MMIORegion>();
867 if (!cmdMMIO)
868 throw std::runtime_error("hostmem write bandwidth: cmd not MMIO");
869
870 auto issuedIter = writeMemPorts.find(AppID("addrCmdIssued"));
871 auto respIter = writeMemPorts.find(AppID("addrCmdResponses"));
872 auto cycleCount = writeMemPorts.find(AppID("addrCmdCycles"));
873 if (issuedIter == writeMemPorts.end() || respIter == writeMemPorts.end() ||
874 cycleCount == writeMemPorts.end())
875 throw std::runtime_error("hostmem write bandwidth: telemetry missing");
876 auto *issuedPort =
877 issuedIter->second.getAs<services::TelemetryService::Metric>();
878 auto *respPort = respIter->second.getAs<services::TelemetryService::Metric>();
879 auto *cyclePort =
880 cycleCount->second.getAs<services::TelemetryService::Metric>();
881 if (!issuedPort || !respPort || !cyclePort)
882 throw std::runtime_error(
883 "hostmem write bandwidth: telemetry type mismatch");
884
885 issuedPort->connect();
886 respPort->connect();
887 cyclePort->connect();
888
889 // Initialize pattern (optional).
890 uint64_t *dataPtr = static_cast<uint64_t *>(region.getPtr());
891 size_t words = region.getSize() / 8;
892 for (size_t i = 0; i < words; ++i)
893 dataPtr[i] = i + 0xA5A50000;
894 region.flush();
895
896 auto start = std::chrono::high_resolution_clock::now();
897 // Fire off xferCount write commands (one flit each).
898 uint64_t devPtr = reinterpret_cast<uint64_t>(region.getDevicePtr());
899 cmdMMIO->write(0x10, devPtr); // address
900 cmdMMIO->write(0x18, xferCount); // flits
901 cmdMMIO->write(0x20, 1); // start
902
903 // Wait for responses counter to reach target.
904 bool completed = false;
905 for (int wait = 0; wait < 100000; ++wait) {
906 uint64_t respNow = respPort->readInt();
907 if (respNow == xferCount) {
908 completed = true;
909 break;
910 }
911 std::this_thread::sleep_for(std::chrono::microseconds(50));
912 }
913 if (!completed)
914 throw std::runtime_error("hostmem write bandwidth timeout");
915 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
916 std::chrono::high_resolution_clock::now() - start);
917 double bytesPerSec =
918 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
919 uint64_t cycles = cyclePort->readInt();
920 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
921 std::cout << "[WRITE] Hostmem bandwidth (" << std::to_string(width)
922 << "): " << formatBandwidth(bytesPerSec) << " "
923 << std::to_string(xferCount) << " flits in "
924 << std::to_string(duration.count()) << " us, "
925 << std::to_string(cycles) << " cycles, " << bytesPerCycle
926 << " bytes/cycle" << std::endl;
927}
928
929static void
932 uint32_t width, uint32_t xferCount) {
933 Logger &logger = conn->getLogger();
934 logger.info("esitester", "Starting hostmem READ bandwidth test: " +
935 std::to_string(xferCount) + " x " +
936 std::to_string(width) + " bits");
937
938 auto readMemChildIter = acc->getChildren().find(AppID("readmem", width));
939 if (readMemChildIter == acc->getChildren().end())
940 throw std::runtime_error("hostmem read bandwidth: readmem child missing");
941 auto &readMemPorts = readMemChildIter->second->getPorts();
942
943 auto cmdPortIter = readMemPorts.find(AppID("cmd", width));
944 if (cmdPortIter == readMemPorts.end())
945 throw std::runtime_error("hostmem read bandwidth: cmd MMIO missing");
946 auto *cmdMMIO = cmdPortIter->second.getAs<services::MMIO::MMIORegion>();
947 if (!cmdMMIO)
948 throw std::runtime_error("hostmem read bandwidth: cmd not MMIO");
949
950 auto issuedIter = readMemPorts.find(AppID("addrCmdIssued"));
951 auto respIter = readMemPorts.find(AppID("addrCmdResponses"));
952 auto cyclePort = readMemPorts.find(AppID("addrCmdCycles"));
953 if (issuedIter == readMemPorts.end() || respIter == readMemPorts.end() ||
954 cyclePort == readMemPorts.end())
955 throw std::runtime_error("hostmem read bandwidth: telemetry missing");
956 auto *issuedPort =
957 issuedIter->second.getAs<services::TelemetryService::Metric>();
958 auto *respPort = respIter->second.getAs<services::TelemetryService::Metric>();
959 auto *cycleCntPort =
960 cyclePort->second.getAs<services::TelemetryService::Metric>();
961 if (!issuedPort || !respPort || !cycleCntPort)
962 throw std::runtime_error("hostmem read bandwidth: telemetry type mismatch");
963 issuedPort->connect();
964 respPort->connect();
965 cycleCntPort->connect();
966
967 // Prepare memory pattern (optional).
968 uint64_t *dataPtr = static_cast<uint64_t *>(region.getPtr());
969 size_t words64 = region.getSize() / 8;
970 for (size_t i = 0; i < words64; ++i)
971 dataPtr[i] = 0xCAFEBABE0000ull + i;
972 region.flush();
973 uint64_t devPtr = reinterpret_cast<uint64_t>(region.getDevicePtr());
974 auto start = std::chrono::high_resolution_clock::now();
975
976 cmdMMIO->write(0x10, devPtr);
977 cmdMMIO->write(0x18, xferCount);
978 cmdMMIO->write(0x20, 1);
979
980 bool timeout = true;
981 for (int wait = 0; wait < 100000; ++wait) {
982 uint64_t respNow = respPort->readInt();
983 if (respNow == xferCount) {
984 timeout = false;
985 break;
986 }
987 std::this_thread::sleep_for(std::chrono::microseconds(50));
988 }
989 if (timeout)
990 throw std::runtime_error("hostmem read bandwidth timeout");
991 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
992 std::chrono::high_resolution_clock::now() - start);
993 double bytesPerSec =
994 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
995 uint64_t cycles = cycleCntPort->readInt();
996 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
997 std::cout << "[ READ] Hostmem bandwidth (" << width
998 << "): " << formatBandwidth(bytesPerSec) << ", " << xferCount
999 << " flits in " << duration.count() << " us, " << cycles
1000 << " cycles, " << bytesPerCycle << " bytes/cycle" << std::endl;
1001}
1002
1004 uint32_t xferCount,
1005 const std::vector<uint32_t> &widths, bool read,
1006 bool write) {
1007 auto hostmemSvc = conn->getService<services::HostMem>();
1008 hostmemSvc->start();
1009 auto region = hostmemSvc->allocate(/*size(bytes)=*/1024 * 1024 * 1024,
1010 /*memOpts=*/{.writeable = true});
1011 for (uint32_t w : widths) {
1012 if (write)
1013 hostmemWriteBandwidthTest(conn, acc, *region, w, xferCount);
1014 if (read)
1015 hostmemReadBandwidthTest(conn, acc, *region, w, xferCount);
1016 }
1017}
1018
1020 uint32_t iterations, bool pipeline) {
1021 Logger &logger = conn->getLogger();
1022 auto loopbackChild = accel->getChildren().find(AppID("loopback"));
1023 if (loopbackChild == accel->getChildren().end())
1024 throw std::runtime_error("Loopback test: no 'loopback' child");
1025 auto &ports = loopbackChild->second->getPorts();
1026 auto addIter = ports.find(AppID("add"));
1027 if (addIter == ports.end())
1028 throw std::runtime_error("Loopback test: no 'add' port");
1029
1030 // Use FuncService::Func instead of raw channels.
1031 auto *funcPort = addIter->second.getAs<services::FuncService::Function>();
1032 if (!funcPort)
1033 throw std::runtime_error(
1034 "Loopback test: 'add' port not a FuncService::Function");
1035 funcPort->connect();
1036 if (iterations == 0) {
1037 logger.info("esitester", "Loopback add test: 0 iterations (skipped)");
1038 return;
1039 }
1040 std::mt19937_64 rng(0xC0FFEE);
1041 std::uniform_int_distribution<uint32_t> dist(0, (1u << 24) - 1);
1042
1043 if (!pipeline) {
1044 auto start = std::chrono::high_resolution_clock::now();
1045 for (uint32_t i = 0; i < iterations; ++i) {
1046 uint32_t argVal = dist(rng);
1047 uint32_t expected = (argVal + 11) & 0xFFFF;
1048 uint8_t argBytes[3] = {
1049 static_cast<uint8_t>(argVal & 0xFF),
1050 static_cast<uint8_t>((argVal >> 8) & 0xFF),
1051 static_cast<uint8_t>((argVal >> 16) & 0xFF),
1052 };
1053 MessageData argMsg(argBytes, 3);
1054 MessageData resMsg = funcPort->call(argMsg).get();
1055 uint16_t got = *resMsg.as<uint16_t>();
1056 std::cout << "[loopback] i=" << i << " arg=0x" << esi::toHex(argVal)
1057 << " got=0x" << esi::toHex(got) << " exp=0x"
1058 << esi::toHex(expected) << std::endl;
1059 if (got != expected)
1060 throw std::runtime_error("Loopback mismatch (non-pipelined)");
1061 }
1062 auto end = std::chrono::high_resolution_clock::now();
1063 auto us = std::chrono::duration_cast<std::chrono::microseconds>(end - start)
1064 .count();
1065 double callsPerSec = (double)iterations * 1e6 / (double)us;
1066 logger.info("esitester", "Loopback add test passed (non-pipelined, " +
1067 std::to_string(iterations) + " calls, " +
1068 std::to_string(us) + " us, " +
1069 std::to_string(callsPerSec) + " calls/s)");
1070 } else {
1071 // Pipelined mode: launch all calls first, then collect.
1072 std::vector<std::future<MessageData>> futures;
1073 futures.reserve(iterations);
1074 std::vector<uint32_t> expectedVals;
1075 expectedVals.reserve(iterations);
1076
1077 auto issueStart = std::chrono::high_resolution_clock::now();
1078 for (uint32_t i = 0; i < iterations; ++i) {
1079 uint32_t argVal = dist(rng);
1080 uint32_t expected = (argVal + 11) & 0xFFFF;
1081 uint8_t argBytes[3] = {
1082 static_cast<uint8_t>(argVal & 0xFF),
1083 static_cast<uint8_t>((argVal >> 8) & 0xFF),
1084 static_cast<uint8_t>((argVal >> 16) & 0xFF),
1085 };
1086 futures.emplace_back(funcPort->call(MessageData(argBytes, 3)));
1087 expectedVals.emplace_back(expected);
1088 }
1089 auto issueEnd = std::chrono::high_resolution_clock::now();
1090
1091 for (uint32_t i = 0; i < iterations; ++i) {
1092 MessageData resMsg = futures[i].get();
1093 uint16_t got = *resMsg.as<uint16_t>();
1094 uint16_t exp = (uint16_t)expectedVals[i];
1095 std::cout << "[loopback-pipelined] i=" << i << " got=0x"
1096 << esi::toHex(got) << " exp=0x" << esi::toHex(exp) << std::endl;
1097 if (got != exp)
1098 throw std::runtime_error("Loopback mismatch (pipelined) idx=" +
1099 std::to_string(i));
1100 }
1101 auto collectEnd = std::chrono::high_resolution_clock::now();
1102
1103 auto issueUs = std::chrono::duration_cast<std::chrono::microseconds>(
1104 issueEnd - issueStart)
1105 .count();
1106 auto totalUs = std::chrono::duration_cast<std::chrono::microseconds>(
1107 collectEnd - issueStart)
1108 .count();
1109
1110 double issueRate = (double)iterations * 1e6 / (double)issueUs;
1111 double completionRate = (double)iterations * 1e6 / (double)totalUs;
1112
1113 logger.info("esitester", "Loopback add test passed (pipelined). Issued " +
1114 std::to_string(iterations) + " in " +
1115 std::to_string(issueUs) + " us (" +
1116 std::to_string(issueRate) +
1117 " calls/s), total " + std::to_string(totalUs) +
1118 " us (" + std::to_string(completionRate) +
1119 " calls/s effective)");
1120 }
1121}
1122
1124 Accelerator *acc, uint32_t width,
1125 uint32_t xferCount, bool read,
1126 bool write) {
1127 Logger &logger = conn->getLogger();
1128 if (!read && !write) {
1129 std::cout << "aggbandwidth: nothing to do (enable --read and/or --write)\n";
1130 return;
1131 }
1132 logger.info(
1133 "esitester",
1134 "Aggregate hostmem bandwidth start width=" + std::to_string(width) +
1135 " count=" + std::to_string(xferCount) +
1136 " read=" + (read ? "Y" : "N") + " write=" + (write ? "Y" : "N"));
1137
1138 auto hostmemSvc = conn->getService<services::HostMem>();
1139 hostmemSvc->start();
1140
1141 struct Unit {
1142 std::string prefix;
1143 bool isRead = false;
1144 bool isWrite = false;
1145 std::unique_ptr<esi::services::HostMem::HostMemRegion> region;
1146 services::TelemetryService::Metric *resp = nullptr;
1147 services::TelemetryService::Metric *cycles = nullptr;
1148 services::MMIO::MMIORegion *cmd = nullptr;
1149 bool launched = false;
1150 bool done = false;
1151 uint64_t bytes = 0;
1152 uint64_t duration_us = 0;
1153 uint64_t cycleCount = 0;
1154 std::chrono::high_resolution_clock::time_point start;
1155 };
1156 std::vector<Unit> units;
1157 const std::vector<std::string> readPrefixes = {"readmem", "readmem_0",
1158 "readmem_1", "readmem_2"};
1159 const std::vector<std::string> writePrefixes = {"writemem", "writemem_0",
1160 "writemem_1", "writemem_2"};
1161
1162 auto addUnits = [&](const std::vector<std::string> &pref, bool doRead,
1163 bool doWrite) {
1164 for (auto &p : pref) {
1165 AppID id(p, width);
1166 auto childIt = acc->getChildren().find(id);
1167 if (childIt == acc->getChildren().end())
1168 continue; // silently skip missing variants
1169 auto &ports = childIt->second->getPorts();
1170 auto cmdIt = ports.find(AppID("cmd", width));
1171 auto respIt = ports.find(AppID("addrCmdResponses"));
1172 auto cycIt = ports.find(AppID("addrCmdCycles"));
1173 if (cmdIt == ports.end() || respIt == ports.end() || cycIt == ports.end())
1174 continue;
1175 auto *cmd = cmdIt->second.getAs<services::MMIO::MMIORegion>();
1176 auto *resp = respIt->second.getAs<services::TelemetryService::Metric>();
1177 auto *cyc = cycIt->second.getAs<services::TelemetryService::Metric>();
1178 if (!cmd || !resp || !cyc)
1179 continue;
1180 resp->connect();
1181 cyc->connect();
1182 Unit u;
1183 u.prefix = p;
1184 u.isRead = doRead;
1185 u.isWrite = doWrite;
1186 u.region = hostmemSvc->allocate(1024 * 1024 * 1024, {.writeable = true});
1187 // Init pattern.
1188 uint64_t *ptr = static_cast<uint64_t *>(u.region->getPtr());
1189 size_t words = u.region->getSize() / 8;
1190 for (size_t i = 0; i < words; ++i)
1191 ptr[i] =
1192 (p[0] == 'w' ? (0xA5A500000000ull + i) : (0xCAFEBABE0000ull + i));
1193 u.region->flush();
1194 u.cmd = cmd;
1195 u.resp = resp;
1196 u.cycles = cyc;
1197 u.bytes = uint64_t(xferCount) * (width / 8);
1198 units.emplace_back(std::move(u));
1199 }
1200 };
1201 if (read)
1202 addUnits(readPrefixes, true, false);
1203 if (write)
1204 addUnits(writePrefixes, false, true);
1205 if (units.empty()) {
1206 std::cout << "aggbandwidth: no matching units present for width " << width
1207 << "\n";
1208 return;
1209 }
1210
1211 auto wallStart = std::chrono::high_resolution_clock::now();
1212 // Launch sequentially.
1213 for (auto &u : units) {
1214 uint64_t devPtr = reinterpret_cast<uint64_t>(u.region->getDevicePtr());
1215 u.cmd->write(0x10, devPtr);
1216 u.cmd->write(0x18, xferCount);
1217 u.cmd->write(0x20, 1);
1218 u.start = std::chrono::high_resolution_clock::now();
1219 u.launched = true;
1220 }
1221
1222 // Poll all until complete.
1223 const uint64_t timeoutLoops = 200000; // ~10s at 50us sleep
1224 uint64_t loops = 0;
1225 while (true) {
1226 bool allDone = true;
1227 for (auto &u : units) {
1228 if (u.done)
1229 continue;
1230 if (u.resp->readInt() == xferCount) {
1231 auto end = std::chrono::high_resolution_clock::now();
1232 u.duration_us =
1233 std::chrono::duration_cast<std::chrono::microseconds>(end - u.start)
1234 .count();
1235 u.cycleCount = u.cycles->readInt();
1236 u.done = true;
1237 } else {
1238 allDone = false;
1239 }
1240 }
1241 if (allDone)
1242 break;
1243 if (++loops >= timeoutLoops)
1244 throw std::runtime_error("aggbandwidth: timeout");
1245 std::this_thread::sleep_for(std::chrono::microseconds(50));
1246 }
1247 auto wallUs = std::chrono::duration_cast<std::chrono::microseconds>(
1248 std::chrono::high_resolution_clock::now() - wallStart)
1249 .count();
1250
1251 uint64_t totalBytes = 0;
1252 uint64_t totalReadBytes = 0;
1253 uint64_t totalWriteBytes = 0;
1254 for (auto &u : units) {
1255 totalBytes += u.bytes;
1256 if (u.isRead)
1257 totalReadBytes += u.bytes;
1258 if (u.isWrite)
1259 totalWriteBytes += u.bytes;
1260 double unitBps = (double)u.bytes * 1e6 / (double)u.duration_us;
1261 std::cout << "[agg-unit] " << u.prefix << "[" << width << "] "
1262 << (u.isRead ? "READ" : (u.isWrite ? "WRITE" : "UNK"))
1263 << " bytes=" << humanBytes(u.bytes) << " (" << u.bytes << " B)"
1264 << " time=" << humanTimeUS(u.duration_us) << " (" << u.duration_us
1265 << " us) cycles=" << u.cycleCount
1266 << " throughput=" << formatBandwidth(unitBps) << std::endl;
1267 }
1268 // Compute aggregate bandwidths as total size / total wall time (not sum of
1269 // unit throughputs).
1270 double aggReadBps =
1271 totalReadBytes ? (double)totalReadBytes * 1e6 / (double)wallUs : 0.0;
1272 double aggWriteBps =
1273 totalWriteBytes ? (double)totalWriteBytes * 1e6 / (double)wallUs : 0.0;
1274 double aggCombinedBps =
1275 totalBytes ? (double)totalBytes * 1e6 / (double)wallUs : 0.0;
1276
1277 std::cout << "[agg-total] units=" << units.size()
1278 << " read_bytes=" << humanBytes(totalReadBytes) << " ("
1279 << totalReadBytes << " B)"
1280 << " read_bw=" << formatBandwidth(aggReadBps)
1281 << " write_bytes=" << humanBytes(totalWriteBytes) << " ("
1282 << totalWriteBytes << " B)"
1283 << " write_bw=" << formatBandwidth(aggWriteBps)
1284 << " combined_bytes=" << humanBytes(totalBytes) << " ("
1285 << totalBytes << " B)"
1286 << " combined_bw=" << formatBandwidth(aggCombinedBps)
1287 << " wall_time=" << humanTimeUS(wallUs) << " (" << wallUs << " us)"
1288 << std::endl;
1289 logger.info("esitester", "Aggregate hostmem bandwidth test complete");
1290}
1291
1292/// Packed struct representing a parallel window argument for StreamingAdder.
1293/// Layout in SystemVerilog (so it must be reversed in C):
1294/// { add_amt: UInt(32), input: UInt(32), last: UInt(8) }
1295#pragma pack(push, 1)
1297 uint8_t last;
1298 uint32_t input;
1299 uint32_t addAmt;
1300};
1301#pragma pack(pop)
1302static_assert(sizeof(StreamingAddArg) == 9,
1303 "StreamingAddArg must be 9 bytes packed");
1304
1305/// Packed struct representing a parallel window result for StreamingAdder.
1306/// Layout in SystemVerilog (so it must be reversed in C):
1307/// { data: UInt(32), last: UInt(8) }
1308#pragma pack(push, 1)
1310 uint8_t last;
1311 uint32_t data;
1312};
1313#pragma pack(pop)
1314static_assert(sizeof(StreamingAddResult) == 5,
1315 "StreamingAddResult must be 5 bytes packed");
1316
1317/// Test the StreamingAdder module. This module takes a struct containing
1318/// an add_amt and a list of uint32s, adds add_amt to each element, and
1319/// returns the resulting list. The data is streamed using windowed types.
1321 uint32_t addAmt, uint32_t numItems) {
1322 Logger &logger = conn->getLogger();
1323 logger.info("esitester", "Starting streaming add test with add_amt=" +
1324 std::to_string(addAmt) +
1325 ", num_items=" + std::to_string(numItems));
1326
1327 // Generate random input data.
1328 std::mt19937 rng(0xDEADBEEF);
1329 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1330 std::vector<uint32_t> inputData;
1331 inputData.reserve(numItems);
1332 for (uint32_t i = 0; i < numItems; ++i)
1333 inputData.push_back(dist(rng));
1334
1335 // Find the streaming_adder child.
1336 auto streamingAdderChild =
1337 accel->getChildren().find(AppID("streaming_adder"));
1338 if (streamingAdderChild == accel->getChildren().end())
1339 throw std::runtime_error(
1340 "Streaming add test: no 'streaming_adder' child found");
1341
1342 auto &ports = streamingAdderChild->second->getPorts();
1343 auto addIter = ports.find(AppID("streaming_add"));
1344 if (addIter == ports.end())
1345 throw std::runtime_error(
1346 "Streaming add test: no 'streaming_add' port found");
1347
1348 // Get the raw read/write channel ports for the windowed function.
1349 // The argument channel expects parallel windowed data where each message
1350 // contains: struct { add_amt: UInt(32), input: UInt(32), last: bool }
1351 WriteChannelPort &argPort = addIter->second.getRawWrite("arg");
1352 ReadChannelPort &resultPort = addIter->second.getRawRead("result");
1353
1354 argPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
1355 resultPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
1356
1357 // Send each list element with add_amt repeated in every message.
1358 for (size_t i = 0; i < inputData.size(); ++i) {
1359 StreamingAddArg arg;
1360 arg.addAmt = addAmt;
1361 arg.input = inputData[i];
1362 arg.last = (i == inputData.size() - 1) ? 1 : 0;
1363 argPort.write(
1364 MessageData(reinterpret_cast<const uint8_t *>(&arg), sizeof(arg)));
1365 logger.debug("esitester", "Sent {add_amt=" + std::to_string(arg.addAmt) +
1366 ", input=" + std::to_string(arg.input) +
1367 ", last=" + (arg.last ? "true" : "false") +
1368 "}");
1369 }
1370
1371 // Read the result list (also windowed).
1372 std::vector<uint32_t> results;
1373 bool lastSeen = false;
1374 while (!lastSeen) {
1375 MessageData resMsg;
1376 resultPort.read(resMsg);
1377 if (resMsg.getSize() < sizeof(StreamingAddResult))
1378 throw std::runtime_error(
1379 "Streaming add test: unexpected result message size");
1380
1381 const auto *res =
1382 reinterpret_cast<const StreamingAddResult *>(resMsg.getBytes());
1383 lastSeen = res->last != 0;
1384 results.push_back(res->data);
1385 logger.debug("esitester", "Received result=" + std::to_string(res->data) +
1386 " (last=" + (lastSeen ? "true" : "false") +
1387 ")");
1388 }
1389
1390 // Verify results.
1391 if (results.size() != inputData.size())
1392 throw std::runtime_error(
1393 "Streaming add test: result size mismatch. Expected " +
1394 std::to_string(inputData.size()) + ", got " +
1395 std::to_string(results.size()));
1396
1397 bool passed = true;
1398 std::cout << "Streaming add test results:" << std::endl;
1399 for (size_t i = 0; i < inputData.size(); ++i) {
1400 uint32_t expected = inputData[i] + addAmt;
1401 std::cout << " input[" << i << "]=" << inputData[i] << " + " << addAmt
1402 << " = " << results[i] << " (expected " << expected << ")";
1403 if (results[i] != expected) {
1404 std::cout << " MISMATCH!";
1405 passed = false;
1406 }
1407 std::cout << std::endl;
1408 }
1409
1410 argPort.disconnect();
1411 resultPort.disconnect();
1412
1413 if (!passed)
1414 throw std::runtime_error("Streaming add test failed: result mismatch");
1415
1416 logger.info("esitester", "Streaming add test passed");
1417 std::cout << "Streaming add test passed" << std::endl;
1418}
1419
1420/// Test the StreamingAdder module using message translation.
1421/// This version uses the list translation support where the message format is:
1422/// Argument: { add_amt (4 bytes), input_length (8 bytes), input_data[] }
1423/// Result: { data_length (8 bytes), data[] }
1424/// The translation layer automatically converts between this format and the
1425/// parallel windowed frames used by the hardware.
1426
1427/// Translated argument struct for StreamingAdder.
1428/// Memory layout (standard C struct ordering, fields in declaration order):
1429/// ESI type: struct { add_amt: UInt(32), input: List<UInt(32)> }
1430/// becomes host struct:
1431/// { input_length (size_t, 8 bytes on 64-bit), add_amt (uint32_t),
1432/// input_data[] }
1433/// Note: The translation layer handles the conversion between this C struct
1434/// layout and the hardware's SystemVerilog frame format.
1435/// Note: size_t is used for list lengths, so this format is platform-dependent.
1436#pragma pack(push, 1)
1439 uint32_t addAmt;
1440 // Trailing array data follows immediately after the struct in memory.
1441 // Use inputData() accessor to access it.
1442
1443 /// Get pointer to trailing input data array.
1444 uint32_t *inputData() { return reinterpret_cast<uint32_t *>(this + 1); }
1445 const uint32_t *inputData() const {
1446 return reinterpret_cast<const uint32_t *>(this + 1);
1447 }
1448 /// Get span view of input data (requires inputLength to be set first).
1449 std::span<uint32_t> inputDataSpan() { return {inputData(), inputLength}; }
1450 std::span<const uint32_t> inputDataSpan() const {
1451 return {inputData(), inputLength};
1452 }
1453
1454 static size_t allocSize(size_t numItems) {
1455 return sizeof(StreamingAddTranslatedArg) + numItems * sizeof(uint32_t);
1456 }
1457};
1458#pragma pack(pop)
1459
1460/// Translated result struct for StreamingAdder.
1461/// Memory layout:
1462/// struct { data: List<UInt(32)> }
1463/// becomes:
1464/// { data_length (size_t, 8 bytes on 64-bit), data[] }
1465#pragma pack(push, 1)
1468 // Trailing array data follows immediately after the struct in memory.
1469
1470 /// Get pointer to trailing result data array.
1471 uint32_t *data() { return reinterpret_cast<uint32_t *>(this + 1); }
1472 const uint32_t *data() const {
1473 return reinterpret_cast<const uint32_t *>(this + 1);
1474 }
1475 /// Get span view of result data (requires dataLength to be set first).
1476 std::span<uint32_t> dataSpan() { return {data(), dataLength}; }
1477 std::span<const uint32_t> dataSpan() const { return {data(), dataLength}; }
1478
1479 static size_t allocSize(size_t numItems) {
1480 return sizeof(StreamingAddTranslatedResult) + numItems * sizeof(uint32_t);
1481 }
1482};
1483#pragma pack(pop)
1484
1486 Accelerator *accel, uint32_t addAmt,
1487 uint32_t numItems) {
1488 Logger &logger = conn->getLogger();
1489 logger.info("esitester",
1490 "Starting streaming add test (translated) with add_amt=" +
1491 std::to_string(addAmt) +
1492 ", num_items=" + std::to_string(numItems));
1493
1494 // Generate random input data.
1495 std::mt19937 rng(0xDEADBEEF);
1496 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1497 std::vector<uint32_t> inputData;
1498 inputData.reserve(numItems);
1499 for (uint32_t i = 0; i < numItems; ++i)
1500 inputData.push_back(dist(rng));
1501
1502 // Find the streaming_adder child.
1503 auto streamingAdderChild =
1504 accel->getChildren().find(AppID("streaming_adder"));
1505 if (streamingAdderChild == accel->getChildren().end())
1506 throw std::runtime_error(
1507 "Streaming add test: no 'streaming_adder' child found");
1508
1509 auto &ports = streamingAdderChild->second->getPorts();
1510 auto addIter = ports.find(AppID("streaming_add"));
1511 if (addIter == ports.end())
1512 throw std::runtime_error(
1513 "Streaming add test: no 'streaming_add' port found");
1514
1515 // Get the raw read/write channel ports with translation enabled (default).
1516 WriteChannelPort &argPort = addIter->second.getRawWrite("arg");
1517 ReadChannelPort &resultPort = addIter->second.getRawRead("result");
1518
1519 // Connect with translation enabled (the default).
1520 argPort.connect();
1521 resultPort.connect();
1522
1523 // Allocate the argument struct with proper alignment for the struct members.
1524 // We use aligned_alloc to ensure the buffer meets alignment requirements.
1525 size_t argSize = StreamingAddTranslatedArg::allocSize(numItems);
1526 constexpr size_t alignment = alignof(StreamingAddTranslatedArg);
1527 // aligned_alloc requires size to be a multiple of alignment
1528 size_t allocSize = ((argSize + alignment - 1) / alignment) * alignment;
1529 void *argRaw = alignedAllocCompat(alignment, allocSize);
1530 if (!argRaw)
1531 throw std::bad_alloc();
1532 auto argDeleter = [](void *p) { alignedFreeCompat(p); };
1533 std::unique_ptr<void, decltype(argDeleter)> argBuffer(argRaw, argDeleter);
1534 auto *arg = static_cast<StreamingAddTranslatedArg *>(argRaw);
1535 arg->inputLength = numItems;
1536 arg->addAmt = addAmt;
1537 for (uint32_t i = 0; i < numItems; ++i)
1538 arg->inputData()[i] = inputData[i];
1539
1540 logger.debug("esitester",
1541 "Sending translated argument: " + std::to_string(argSize) +
1542 " bytes, list_length=" + std::to_string(arg->inputLength) +
1543 ", add_amt=" + std::to_string(arg->addAmt));
1544
1545 // Send the complete message - translation will split it into frames.
1546 argPort.write(MessageData(reinterpret_cast<const uint8_t *>(arg), argSize));
1547 // argBuffer automatically freed when it goes out of scope
1548
1549 // Read the translated result.
1550 MessageData resMsg;
1551 resultPort.read(resMsg);
1552
1553 logger.debug("esitester", "Received translated result: " +
1554 std::to_string(resMsg.getSize()) + " bytes");
1555
1556 if (resMsg.getSize() < sizeof(StreamingAddTranslatedResult))
1557 throw std::runtime_error(
1558 "Streaming add test (translated): result too small");
1559
1560 const auto *result =
1561 reinterpret_cast<const StreamingAddTranslatedResult *>(resMsg.getBytes());
1562
1563 if (resMsg.getSize() <
1564 StreamingAddTranslatedResult::allocSize(result->dataLength))
1565 throw std::runtime_error(
1566 "Streaming add test (translated): result data truncated");
1567
1568 // Verify results.
1569 if (result->dataLength != inputData.size())
1570 throw std::runtime_error(
1571 "Streaming add test (translated): result size mismatch. Expected " +
1572 std::to_string(inputData.size()) + ", got " +
1573 std::to_string(result->dataLength));
1574
1575 bool passed = true;
1576 std::cout << "Streaming add test results:" << std::endl;
1577 for (size_t i = 0; i < inputData.size(); ++i) {
1578 uint32_t expected = inputData[i] + addAmt;
1579 std::cout << " input[" << i << "]=" << inputData[i] << " + " << addAmt
1580 << " = " << result->data()[i] << " (expected " << expected << ")";
1581 if (result->data()[i] != expected) {
1582 std::cout << " MISMATCH!";
1583 passed = false;
1584 }
1585 std::cout << std::endl;
1586 }
1587
1588 argPort.disconnect();
1589 resultPort.disconnect();
1590
1591 if (!passed)
1592 throw std::runtime_error(
1593 "Streaming add test (translated) failed: result mismatch");
1594
1595 logger.info("esitester", "Streaming add test passed (translated)");
1596 std::cout << "Streaming add test passed" << std::endl;
1597}
1598
1599/// Test the CoordTranslator module using message translation.
1600/// This version uses the list translation support where the message format is:
1601/// Argument: { x_translation, y_translation, coords_length, coords[] }
1602/// Result: { coords_length, coords[] }
1603/// Each coord is a struct { x, y }.
1604
1605/// Coordinate struct for CoordTranslator.
1606/// SV ordering means y comes before x in memory.
1607#pragma pack(push, 1)
1608struct Coord {
1609 uint32_t y; // SV ordering: last declared field first in memory
1610 uint32_t x;
1611};
1612#pragma pack(pop)
1613static_assert(sizeof(Coord) == 8, "Coord must be 8 bytes packed");
1614
1615/// Translated argument struct for CoordTranslator.
1616/// Memory layout (standard C struct ordering):
1617/// ESI type: struct { x_translation: UInt(32), y_translation: UInt(32),
1618/// coords: List<struct{x, y}> }
1619/// becomes host struct:
1620/// { coords_length (size_t, 8 bytes on 64-bit), y_translation (uint32_t),
1621/// x_translation (uint32_t), coords[] }
1622/// Note: Fields are in reverse order due to SV struct ordering.
1623/// Note: size_t is used for list lengths, so this format is platform-dependent.
1624#pragma pack(push, 1)
1627 uint32_t yTranslation; // SV ordering: last declared field first in memory
1629 // Trailing array data follows immediately after the struct in memory.
1630
1631 /// Get pointer to trailing coords array.
1632 Coord *coords() { return reinterpret_cast<Coord *>(this + 1); }
1633 const Coord *coords() const {
1634 return reinterpret_cast<const Coord *>(this + 1);
1635 }
1636 /// Get span view of coords (requires coordsLength to be set first).
1637 std::span<Coord> coordsSpan() { return {coords(), coordsLength}; }
1638 std::span<const Coord> coordsSpan() const { return {coords(), coordsLength}; }
1639
1640 static size_t allocSize(size_t numCoords) {
1641 return sizeof(CoordTranslateArg) + numCoords * sizeof(Coord);
1642 }
1643};
1644#pragma pack(pop)
1645
1646/// Translated result struct for CoordTranslator.
1647/// Memory layout:
1648/// ESI type: List<struct{x, y}>
1649/// becomes host struct:
1650/// { coords_length (size_t, 8 bytes on 64-bit), coords[] }
1651#pragma pack(push, 1)
1654 // Trailing array data follows immediately after the struct in memory.
1655
1656 /// Get pointer to trailing coords array.
1657 Coord *coords() { return reinterpret_cast<Coord *>(this + 1); }
1658 const Coord *coords() const {
1659 return reinterpret_cast<const Coord *>(this + 1);
1660 }
1661 /// Get span view of coords (requires coordsLength to be set first).
1662 std::span<Coord> coordsSpan() { return {coords(), coordsLength}; }
1663 std::span<const Coord> coordsSpan() const { return {coords(), coordsLength}; }
1664
1665 static size_t allocSize(size_t numCoords) {
1666 return sizeof(CoordTranslateResult) + numCoords * sizeof(Coord);
1667 }
1668};
1669#pragma pack(pop)
1670
1672 uint32_t xTrans, uint32_t yTrans,
1673 uint32_t numCoords) {
1674 Logger &logger = conn->getLogger();
1675 logger.info("esitester", "Starting coord translate test with x_trans=" +
1676 std::to_string(xTrans) +
1677 ", y_trans=" + std::to_string(yTrans) +
1678 ", num_coords=" + std::to_string(numCoords));
1679
1680 // Generate random input coordinates.
1681 // Note: Coord struct has y before x due to SV ordering, but we generate
1682 // and display as (x, y) for human readability.
1683 std::mt19937 rng(0xDEADBEEF);
1684 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1685 std::vector<Coord> inputCoords;
1686 inputCoords.reserve(numCoords);
1687 for (uint32_t i = 0; i < numCoords; ++i) {
1688 Coord c;
1689 c.x = dist(rng);
1690 c.y = dist(rng);
1691 inputCoords.push_back(c);
1692 }
1693
1694 // Find the coord_translator child.
1695 auto coordTranslatorChild =
1696 accel->getChildren().find(AppID("coord_translator"));
1697 if (coordTranslatorChild == accel->getChildren().end())
1698 throw std::runtime_error(
1699 "Coord translate test: no 'coord_translator' child found");
1700
1701 auto &ports = coordTranslatorChild->second->getPorts();
1702 auto translateIter = ports.find(AppID("translate_coords"));
1703 if (translateIter == ports.end())
1704 throw std::runtime_error(
1705 "Coord translate test: no 'translate_coords' port found");
1706
1707 // Use FuncService::Function which handles connection and translation.
1708 auto *funcPort =
1709 translateIter->second.getAs<services::FuncService::Function>();
1710 if (!funcPort)
1711 throw std::runtime_error(
1712 "Coord translate test: 'translate_coords' port not a "
1713 "FuncService::Function");
1714 funcPort->connect();
1715
1716 // Allocate the argument struct with proper alignment for the struct members.
1717 size_t argSize = CoordTranslateArg::allocSize(numCoords);
1718 constexpr size_t alignment = alignof(CoordTranslateArg);
1719 // aligned_alloc requires size to be a multiple of alignment
1720 size_t allocSize = ((argSize + alignment - 1) / alignment) * alignment;
1721 void *argRaw = alignedAllocCompat(alignment, allocSize);
1722 if (!argRaw)
1723 throw std::bad_alloc();
1724 auto argDeleter = [](void *p) { alignedFreeCompat(p); };
1725 std::unique_ptr<void, decltype(argDeleter)> argBuffer(argRaw, argDeleter);
1726 auto *arg = static_cast<CoordTranslateArg *>(argRaw);
1727 arg->coordsLength = numCoords;
1728 arg->xTranslation = xTrans;
1729 arg->yTranslation = yTrans;
1730 for (uint32_t i = 0; i < numCoords; ++i)
1731 arg->coords()[i] = inputCoords[i];
1732
1733 logger.debug(
1734 "esitester",
1735 "Sending coord translate argument: " + std::to_string(argSize) +
1736 " bytes, coords_length=" + std::to_string(arg->coordsLength) +
1737 ", x_trans=" + std::to_string(arg->xTranslation) +
1738 ", y_trans=" + std::to_string(arg->yTranslation));
1739
1740 // Call the function - translation happens automatically.
1741 MessageData resMsg =
1742 funcPort
1743 ->call(MessageData(reinterpret_cast<const uint8_t *>(arg), argSize))
1744 .get();
1745 // argBuffer automatically freed when it goes out of scope
1746
1747 logger.debug("esitester", "Received coord translate result: " +
1748 std::to_string(resMsg.getSize()) + " bytes");
1749
1750 if (resMsg.getSize() < sizeof(CoordTranslateResult))
1751 throw std::runtime_error("Coord translate test: result too small");
1752
1753 const auto *result =
1754 reinterpret_cast<const CoordTranslateResult *>(resMsg.getBytes());
1755
1756 if (resMsg.getSize() < CoordTranslateResult::allocSize(result->coordsLength))
1757 throw std::runtime_error("Coord translate test: result data truncated");
1758
1759 // Verify results.
1760 if (result->coordsLength != inputCoords.size())
1761 throw std::runtime_error(
1762 "Coord translate test: result size mismatch. Expected " +
1763 std::to_string(inputCoords.size()) + ", got " +
1764 std::to_string(result->coordsLength));
1765
1766 bool passed = true;
1767 std::cout << "Coord translate test results:" << std::endl;
1768 for (size_t i = 0; i < inputCoords.size(); ++i) {
1769 uint32_t expectedX = inputCoords[i].x + xTrans;
1770 uint32_t expectedY = inputCoords[i].y + yTrans;
1771 std::cout << " coord[" << i << "]=(" << inputCoords[i].x << ","
1772 << inputCoords[i].y << ") + (" << xTrans << "," << yTrans
1773 << ") = (" << result->coords()[i].x << ","
1774 << result->coords()[i].y << ")";
1775 if (result->coords()[i].x != expectedX ||
1776 result->coords()[i].y != expectedY) {
1777 std::cout << " MISMATCH! (expected (" << expectedX << "," << expectedY
1778 << "))";
1779 passed = false;
1780 }
1781 std::cout << std::endl;
1782 }
1783
1784 if (!passed)
1785 throw std::runtime_error("Coord translate test failed: result mismatch");
1786
1787 logger.info("esitester", "Coord translate test passed");
1788 std::cout << "Coord translate test passed" << std::endl;
1789}
1790
1791//
1792// SerialCoordTranslator test
1793//
1794
1795#pragma pack(push, 1)
1797 uint16_t coordsCount;
1800};
1801static_assert(sizeof(SerialCoordHeader) == 10, "Size mismatch");
1803 SerialCoordData(uint32_t x, uint32_t y) : _pad_head(0), y(y), x(x) {}
1804 uint16_t _pad_head;
1805 uint32_t y;
1806 uint32_t x;
1807};
1808static_assert(sizeof(SerialCoordData) == sizeof(SerialCoordHeader),
1809 "Size mismatch");
1810#pragma pack(pop)
1811
1812// Note: this application is intended to test hardware. As such, we need
1813// to be able to send batches. So this is not the typical way one would define a
1814// message struct. It's closer to a streaming style.
1816private:
1818 std::vector<SerialCoordData> coords;
1819
1820public:
1826 void yTranslation(uint32_t yTrans) { header.yTranslation = yTrans; }
1827 uint32_t yTranslation() const { return header.yTranslation; }
1828 void xTranslation(uint32_t xTrans) { header.xTranslation = xTrans; }
1829 uint32_t xTranslation() const { return header.xTranslation; }
1830 void appendCoord(uint32_t x, uint32_t y) {
1831 coords.emplace_back(x, y);
1832 header.coordsCount = (uint16_t)coords.size();
1833 }
1834 const std::vector<SerialCoordData> &getCoords() const { return coords; }
1835
1836 size_t numSegments() const override { return 2; }
1837 Segment segment(size_t idx) const override {
1838 if (idx == 0)
1839 return {reinterpret_cast<const uint8_t *>(&header), sizeof(header)};
1840 else if (idx == 1)
1841 return {reinterpret_cast<const uint8_t *>(coords.data()),
1842 coords.size() * sizeof(SerialCoordData)};
1843 else
1844 throw std::out_of_range("SerialCoordInput: invalid segment index");
1845 }
1846};
1847
1848#pragma pack(push, 1)
1850 uint8_t _pad[6];
1851 uint16_t coordsCount;
1852};
1854 uint32_t y;
1855 uint32_t x;
1856};
1861#pragma pack(pop)
1862static_assert(sizeof(SerialCoordOutputFrame) == 8, "Size mismatch");
1863
1865 Accelerator *accel, uint32_t xTrans,
1866 uint32_t yTrans, uint32_t numCoords,
1867 size_t batchSizeLimit) {
1868 Logger &logger = conn->getLogger();
1869 logger.info("esitester", "Starting serial coord translate test");
1870
1871 // Generate random coordinates.
1872 std::mt19937 rng(0xDEADBEEF);
1873 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1874 std::vector<Coord> inputCoords;
1875 inputCoords.reserve(numCoords);
1876 for (uint32_t i = 0; i < numCoords; ++i)
1877 inputCoords.push_back({dist(rng), dist(rng)});
1878
1879 auto child = accel->getChildren().find(AppID("coord_translator_serial"));
1880 if (child == accel->getChildren().end())
1881 throw std::runtime_error("Serial coord translate test: no "
1882 "'coord_translator_serial' child found");
1883
1884 auto &ports = child->second->getPorts();
1885 auto portIter = ports.find(AppID("translate_coords_serial"));
1886 if (portIter == ports.end())
1887 throw std::runtime_error(
1888 "Serial coord translate test: no 'translate_coords_serial' port found");
1889
1890 TypedWritePort<SerialCoordInput, /*SkipTypeCheck=*/true> argPort(
1891 portIter->second.getRawWrite("arg"));
1892 ReadChannelPort &resultPort = portIter->second.getRawRead("result");
1893
1894 argPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
1895 resultPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
1896 // The serial window reply is emitted as many raw frames. This test writes
1897 // all request batches before draining the result stream, so the default
1898 // polling queue depth can fill up and backpressure the DMA engine.
1899 resultPort.setMaxDataQueueMsgs(0);
1900
1901 size_t sent = 0;
1902 while (sent < numCoords) {
1903 size_t batchSize = std::min(batchSizeLimit, numCoords - sent);
1904
1905 // Send Header. Only the first header needs the translation values, test the
1906 // subsequent ones with zero translation to verify that the hardware
1907 // correctly applies the first header's translation to the whole list.
1908 auto batch = std::make_unique<SerialCoordInput>();
1909 batch->xTranslation(sent == 0 ? xTrans : 0);
1910 batch->yTranslation(sent == 0 ? yTrans : 0);
1911 // Send Data
1912 for (size_t i = 0; i < batchSize; ++i) {
1913 batch->appendCoord(inputCoords[sent + i].x, inputCoords[sent + i].y);
1914 }
1915 argPort.write(batch);
1916 sent += batchSize;
1917 }
1918 // Send final header with count=0 to signal end of input
1919 auto footerData = std::make_unique<SerialCoordInput>();
1920 argPort.write(footerData);
1921
1922 // Read results. The hardware echoes headers (with count) followed by
1923 // translated data frames, then autonomously sends a footer header with
1924 // count=0 to signal end of list.
1925 std::vector<Coord> results;
1926 while (true) {
1927 // Read Header
1928 MessageData msg;
1929 resultPort.read(msg);
1930 if (msg.getSize() != sizeof(SerialCoordOutputFrame))
1931 throw std::runtime_error("Unexpected result message size");
1932
1933 const auto *frame =
1934 reinterpret_cast<const SerialCoordOutputFrame *>(msg.getBytes());
1935 uint16_t batchCount = frame->header.coordsCount;
1936 if (batchCount == 0)
1937 break;
1938
1939 // Read Data
1940 for (uint16_t i = 0; i < batchCount; ++i) {
1941 resultPort.read(msg);
1942 if (msg.getSize() != sizeof(SerialCoordOutputFrame))
1943 throw std::runtime_error("Unexpected result message size");
1944 const auto *dFrame =
1945 reinterpret_cast<const SerialCoordOutputFrame *>(msg.getBytes());
1946 results.push_back({dFrame->data.y, dFrame->data.x});
1947 }
1948 }
1949
1950 // Verify
1951 bool passed = true;
1952 std::cout << "Serial coord translate test results:" << std::endl;
1953 if (results.size() != inputCoords.size()) {
1954 std::cout << "Result size mismatch. Expected " << inputCoords.size()
1955 << ", got " << results.size() << std::endl;
1956 passed = false;
1957 }
1958 for (size_t i = 0; i < std::min(inputCoords.size(), results.size()); ++i) {
1959 uint32_t expX = inputCoords[i].x + xTrans;
1960 uint32_t expY = inputCoords[i].y + yTrans;
1961 std::cout << " coord[" << i << "]=(" << inputCoords[i].x << ","
1962 << inputCoords[i].y << ") + (" << xTrans << "," << yTrans
1963 << ") = (" << results[i].x << "," << results[i].y
1964 << ") (expected (" << expX << "," << expY << "))";
1965 if (results[i].x != expX || results[i].y != expY) {
1966 std::cout << " MISMATCH!";
1967 passed = false;
1968 }
1969 std::cout << std::endl;
1970 }
1971
1972 argPort.disconnect();
1973 resultPort.disconnect();
1974
1975 if (!passed)
1976 throw std::runtime_error("Serial coord translate test failed");
1977
1978 logger.info("esitester", "Serial coord translate test passed");
1979 std::cout << "Serial coord translate test passed" << std::endl;
1980}
1981
1983 uint32_t iterations) {
1984 Logger &logger = conn->getLogger();
1985
1986 auto channelChild = accel->getChildren().find(AppID("channel_test"));
1987 if (channelChild == accel->getChildren().end())
1988 throw std::runtime_error("Channel test: no 'channel_test' child");
1989 auto &ports = channelChild->second->getPorts();
1990
1991 // --- Get the MMIO port to trigger the producer ---
1992 auto cmdIter = ports.find(AppID("cmd"));
1993 if (cmdIter == ports.end())
1994 throw std::runtime_error("Channel test: no 'cmd' port");
1995 auto *cmdMMIO = cmdIter->second.getAs<services::MMIO::MMIORegion>();
1996 if (!cmdMMIO)
1997 throw std::runtime_error("Channel test: 'cmd' is not MMIO");
1998
1999 // --- Get the producer to_host port ---
2000 auto producerIter = ports.find(AppID("producer"));
2001 if (producerIter == ports.end())
2002 throw std::runtime_error("Channel test: no 'producer' port");
2003 auto *producerPort =
2004 producerIter->second.getAs<services::ChannelService::ToHost>();
2005 if (!producerPort)
2006 throw std::runtime_error(
2007 "Channel test: 'producer' is not a ChannelService::ToHost");
2008 producerPort->connect();
2009
2010 // --- Test to_host: MMIO-triggered incrementing values ---
2011 // Write the number of values to send at offset 0x0.
2012 cmdMMIO->write(0x0, iterations);
2013
2014 for (uint32_t i = 0; i < iterations; ++i) {
2015 MessageData recvData = producerPort->read().get();
2016 uint32_t got = *recvData.as<uint32_t>();
2017 std::cout << "[channel] producer i=" << i << " got=" << got << std::endl;
2018 if (got != i)
2019 throw std::runtime_error("Channel producer: expected " +
2020 std::to_string(i) + ", got " +
2021 std::to_string(got));
2022 }
2023 logger.info("esitester", "Channel test: producer passed (" +
2024 std::to_string(iterations) +
2025 " incrementing values)");
2026
2027 // --- Test from_host -> to_host loopback ---
2028 auto loopbackInIter = ports.find(AppID("loopback_in"));
2029 if (loopbackInIter == ports.end())
2030 throw std::runtime_error("Channel test: no 'loopback_in' port");
2031 auto *fromHostPort =
2032 loopbackInIter->second.getAs<services::ChannelService::FromHost>();
2033 if (!fromHostPort)
2034 throw std::runtime_error(
2035 "Channel test: 'loopback_in' is not a ChannelService::FromHost");
2036 fromHostPort->connect();
2037
2038 auto loopbackOutIter = ports.find(AppID("loopback_out"));
2039 if (loopbackOutIter == ports.end())
2040 throw std::runtime_error("Channel test: no 'loopback_out' port");
2041 auto *loopbackOutPort =
2042 loopbackOutIter->second.getAs<services::ChannelService::ToHost>();
2043 if (!loopbackOutPort)
2044 throw std::runtime_error(
2045 "Channel test: 'loopback_out' is not a ChannelService::ToHost");
2046 loopbackOutPort->connect();
2047
2048 std::mt19937_64 rng(0xDEADBEEF);
2049 std::uniform_int_distribution<uint32_t> dist(0, UINT32_MAX);
2050
2051 for (uint32_t i = 0; i < iterations; ++i) {
2052 uint32_t sendVal = dist(rng);
2053 fromHostPort->write(MessageData::from(sendVal));
2054 MessageData recvData = loopbackOutPort->read().get();
2055 uint32_t recvVal = *recvData.as<uint32_t>();
2056 std::cout << "[channel] loopback i=" << i << " sent=0x"
2057 << esi::toHex(sendVal) << " recv=0x" << esi::toHex(recvVal)
2058 << std::endl;
2059 if (recvVal != sendVal)
2060 throw std::runtime_error("Channel loopback mismatch at i=" +
2061 std::to_string(i));
2062 }
2063
2064 logger.info("esitester", "Channel test: loopback passed (" +
2065 std::to_string(iterations) + " iterations)");
2066 std::cout << "Channel test passed" << std::endl;
2067}
static void print(TypedAttr val, llvm::raw_ostream &os)
static void writePort(uint16_t port)
Write the port number to a file.
Definition RpcServer.cpp:39
Abstract class representing a connection to an accelerator.
Definition Accelerator.h:89
Top level accelerator class.
Definition Accelerator.h:70
Services provide connections to 'bundles' – collections of named, unidirectional communication channe...
Definition Ports.h:456
T * getAs() const
Cast this Bundle port to a subclass which is actually useful.
Definition Ports.h:484
ReadChannelPort & getRawRead(const std::string &name) const
Definition Ports.cpp:52
WriteChannelPort & getRawWrite(const std::string &name) const
Get access to the raw byte streams of a channel.
Definition Ports.cpp:42
Common options and code for ESI runtime tools.
Definition CLI.h:29
Context & getContext()
Get the context.
Definition CLI.h:69
AcceleratorConnection * connect()
Connect to the accelerator using the specified backend and connection.
Definition CLI.h:66
int esiParse(int argc, const char **argv)
Run the parser.
Definition CLI.h:52
AcceleratorConnections, Accelerators, and Manifests must all share a context.
Definition Context.h:34
Logger & getLogger()
Definition Context.h:69
const std::map< AppID, Instance * > & getChildren() const
Access the module's children by ID.
Definition Design.h:71
virtual void error(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an error.
Definition Logging.h:64
virtual void info(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an informational message.
Definition Logging.h:75
void debug(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report a debug message.
Definition Logging.h:83
Class to parse a manifest.
Definition Manifest.h:39
Accelerator * buildAccelerator(AcceleratorConnection &acc) const
A logical chunk of data representing serialized data.
Definition Common.h:113
const uint8_t * getBytes() const
Definition Common.h:124
const T * as() const
Cast to a type.
Definition Common.h:148
size_t getSize() const
Get the size of the data in bytes.
Definition Common.h:138
static MessageData from(T &t)
Cast from a type to its raw bytes.
Definition Common.h:158
A ChannelPort which reads data from the accelerator.
Definition Ports.h:341
virtual void connect(std::function< bool(MessageData)> callback, const ConnectOptions &options={})
Definition Ports.cpp:69
void setMaxDataQueueMsgs(uint64_t maxMsgs)
Set maximum number of messages to store in the dataQueue.
Definition Ports.h:393
virtual void disconnect() override
Definition Ports.h:346
virtual void read(MessageData &outData)
Specify a buffer to read into.
Definition Ports.h:381
Abstract multi-segment message.
Definition Common.h:190
void connect(const ChannelPort::ConnectOptions &opts={})
Definition TypedPorts.h:233
void write(const T &data)
Definition TypedPorts.h:242
A ChannelPort which sends data to the accelerator.
Definition Ports.h:215
virtual void disconnect() override
Definition Ports.h:226
void write(const MessageData &data)
A very basic blocking write API.
Definition Ports.h:231
virtual void connect(const ConnectOptions &options={}) override
Set up a connection to the accelerator.
Definition Ports.h:219
A function call which gets attached to a service port.
Definition Services.h:405
A port which writes data to the accelerator (from_host).
Definition Services.h:315
A port which reads data from the accelerator (to_host).
Definition Services.h:291
A function call which gets attached to a service port.
Definition Services.h:353
virtual void start()
In cases where necessary, enable host memory services.
Definition Services.h:261
A "slice" of some parent MMIO space.
Definition Services.h:181
Information about the Accelerator system.
Definition Services.h:113
A telemetry port which gets attached to a service port.
Definition Services.h:470
void connect()
Connect to a particular telemetry port. Offset should be non-nullopt.
Definition Services.cpp:459
static void * alignedAllocCompat(std::size_t alignment, std::size_t size)
static void hostmemWriteTest(Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width)
Test the hostmem write functionality.
static void aggregateHostmemBandwidthTest(AcceleratorConnection *, Accelerator *, uint32_t width, uint32_t xferCount, bool read, bool write)
static void dmaTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool read, bool write)
static void hostmemBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, uint32_t xferCount, const std::vector< uint32_t > &widths, bool read, bool write)
static void callbackTest(AcceleratorConnection *, Accelerator *, uint32_t iterations)
static void bandwidthTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, uint32_t xferCount, bool read, bool write)
static void serialCoordTranslateTest(AcceleratorConnection *, Accelerator *, uint32_t xTrans, uint32_t yTrans, uint32_t numCoords, size_t batchSizeLimit)
constexpr std::array< uint32_t, 5 > defaultWidths
Definition esitester.cpp:78
static void hostmemReadBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width, uint32_t xferCount)
static void bandwidthReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static void channelTest(AcceleratorConnection *, Accelerator *, uint32_t iterations)
static std::string formatBandwidth(double bytesPerSec)
Definition esitester.cpp:90
static void hostmemWriteBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width, uint32_t xferCount)
static void alignedFreeCompat(void *ptr)
static void dmaWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void bandwidthWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static std::string humanBytes(uint64_t bytes)
static void streamingAddTest(AcceleratorConnection *, Accelerator *, uint32_t addAmt, uint32_t numItems)
Test the StreamingAdder module.
static void loopbackAddTest(AcceleratorConnection *, Accelerator *, uint32_t iterations, bool pipeline)
static void dmaReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void streamingAddTranslatedTest(AcceleratorConnection *, Accelerator *, uint32_t addAmt, uint32_t numItems)
static void hostmemTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool write, bool read)
static std::string humanTimeUS(uint64_t us)
int main(int argc, const char *argv[])
static void coordTranslateTest(AcceleratorConnection *, Accelerator *, uint32_t xTrans, uint32_t yTrans, uint32_t numCoords)
static std::string defaultWidthsStr()
Definition esitester.cpp:79
static void hostmemReadTest(Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width)
Definition debug.py:1
Definition esi.py:1
std::string toString(const std::any &a)
'Stringify' a std::any. This is used to log std::any values by some loggers.
Definition Logging.cpp:132
std::string toHex(void *val)
Definition Common.cpp:37
Translated argument struct for CoordTranslator.
std::span< const Coord > coordsSpan() const
const Coord * coords() const
static size_t allocSize(size_t numCoords)
Coord * coords()
Get pointer to trailing coords array.
std::span< Coord > coordsSpan()
Get span view of coords (requires coordsLength to be set first).
Translated result struct for CoordTranslator.
static size_t allocSize(size_t numCoords)
std::span< Coord > coordsSpan()
Get span view of coords (requires coordsLength to be set first).
const Coord * coords() const
Coord * coords()
Get pointer to trailing coords array.
std::span< const Coord > coordsSpan() const
Test the CoordTranslator module using message translation.
uint32_t x
uint32_t y
SerialCoordData(uint32_t x, uint32_t y)
size_t numSegments() const override
Number of segments in the message.
void appendCoord(uint32_t x, uint32_t y)
uint32_t yTranslation() const
SerialCoordHeader header
void yTranslation(uint32_t yTrans)
uint32_t xTranslation() const
Segment segment(size_t idx) const override
Get a segment by index.
const std::vector< SerialCoordData > & getCoords() const
void xTranslation(uint32_t xTrans)
std::vector< SerialCoordData > coords
Packed struct representing a parallel window argument for StreamingAdder.
Packed struct representing a parallel window result for StreamingAdder.
Test the StreamingAdder module using message translation.
uint32_t * inputData()
Get pointer to trailing input data array.
static size_t allocSize(size_t numItems)
std::span< uint32_t > inputDataSpan()
Get span view of input data (requires inputLength to be set first).
std::span< const uint32_t > inputDataSpan() const
const uint32_t * inputData() const
Translated result struct for StreamingAdder.
uint32_t * data()
Get pointer to trailing result data array.
std::span< uint32_t > dataSpan()
Get span view of result data (requires dataLength to be set first).
static size_t allocSize(size_t numItems)
std::span< const uint32_t > dataSpan() const
const uint32_t * data() const
A contiguous, non-owning view of bytes within a SegmentedMessageData.
Definition Common.h:175
size_t size
Definition Common.h:177
RAII memory region for host memory.
Definition Services.h:237
virtual void * getDevicePtr() const
Sometimes the pointer the device sees is different from the pointer the host sees.
Definition Services.h:243
virtual void * getPtr() const =0
Get a pointer to the host memory.
virtual void flush()
Flush the memory region to ensure that the device sees the latest contents.
Definition Services.h:251
virtual std::size_t getSize() const =0
SerialCoordOutputData data
SerialCoordOutputHeader header