CIRCT 23.0.0git
Loading...
Searching...
No Matches
esitester.cpp
Go to the documentation of this file.
1//===- esitester.cpp - ESI accelerator test/example tool ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// DO NOT EDIT!
10// This file is distributed as part of an ESI runtime package. The source for
11// this file should always be modified within CIRCT
12// (lib/dialect/ESI/runtime/cpp/tools/esitester.cpp).
13//
14//===----------------------------------------------------------------------===//
15//
16// This application isn't a utility so much as a test driver for an ESI system.
17// It is also useful as an example of how to use the ESI C++ API. esiquery.cpp
18// is also useful as an example.
19//
20//===----------------------------------------------------------------------===//
21
22#include "esi/Accelerator.h"
23#include "esi/CLI.h"
24#include "esi/Manifest.h"
25#include "esi/Services.h"
26
27#include <atomic>
28#include <chrono>
29#include <cstdlib>
30#include <future>
31#include <iostream>
32#include <map>
33#include <memory>
34#include <random>
35#include <span>
36#include <sstream>
37#include <stdexcept>
38#include <vector>
39
40using namespace esi;
41
42// Forward declarations of test functions.
44 uint32_t iterations);
46 const std::vector<uint32_t> &widths, bool write,
47 bool read);
49 uint32_t xferCount,
50 const std::vector<uint32_t> &widths, bool read,
51 bool write);
53 const std::vector<uint32_t> &widths, bool read, bool write);
55 const std::vector<uint32_t> &widths,
56 uint32_t xferCount, bool read, bool write);
58 uint32_t iterations, bool pipeline);
60 Accelerator *, uint32_t width,
61 uint32_t xferCount, bool read,
62 bool write);
64 uint32_t addAmt, uint32_t numItems);
66 uint32_t addAmt, uint32_t numItems);
68 uint32_t xTrans, uint32_t yTrans,
69 uint32_t numCoords);
71 uint32_t xTrans, uint32_t yTrans,
72 uint32_t numCoords, size_t batchSizeLimit);
74 uint32_t iterations);
75
76// Default widths and default widths string for CLI help text.
77constexpr std::array<uint32_t, 5> defaultWidths = {32, 64, 128, 256, 512};
78static std::string defaultWidthsStr() {
79 std::string s;
80 for (size_t i = 0; i < defaultWidths.size(); ++i) {
81 s += std::to_string(defaultWidths[i]);
82 if (i + 1 < defaultWidths.size())
83 s += ",";
84 }
85 return s;
86}
87
88// Helper to format bandwidth with appropriate units.
89static std::string formatBandwidth(double bytesPerSec) {
90 const char *unit = "B/s";
91 double value = bytesPerSec;
92 if (bytesPerSec >= 1e9) {
93 unit = "GB/s";
94 value = bytesPerSec / 1e9;
95 } else if (bytesPerSec >= 1e6) {
96 unit = "MB/s";
97 value = bytesPerSec / 1e6;
98 } else if (bytesPerSec >= 1e3) {
99 unit = "KB/s";
100 value = bytesPerSec / 1e3;
101 }
102 std::ostringstream oss;
103 oss.setf(std::ios::fixed);
104 oss.precision(2);
105 oss << value << " " << unit;
106 return oss.str();
107}
108
109// Human-readable size from bytes.
110static std::string humanBytes(uint64_t bytes) {
111 const char *units[] = {"B", "KB", "MB", "GB", "TB"};
112 double v = (double)bytes;
113 int u = 0;
114 while (v >= 1024.0 && u < 4) {
115 v /= 1024.0;
116 ++u;
117 }
118 std::ostringstream oss;
119 oss.setf(std::ios::fixed);
120 oss.precision(u == 0 ? 0 : 2);
121 oss << v << " " << units[u];
122 return oss.str();
123}
124
125// Human-readable time from microseconds.
126static std::string humanTimeUS(uint64_t us) {
127 if (us < 1000)
128 return std::to_string(us) + " us";
129 double ms = us / 1000.0;
130 if (ms < 1000.0) {
131 std::ostringstream oss;
132 oss.setf(std::ios::fixed);
133 oss.precision(ms < 10.0 ? 2 : (ms < 100.0 ? 1 : 0));
134 oss << ms << " ms";
135 return oss.str();
136 }
137 double sec = ms / 1000.0;
138 std::ostringstream oss;
139 oss.setf(std::ios::fixed);
140 oss.precision(sec < 10.0 ? 3 : 2);
141 oss << sec << " s";
142 return oss.str();
143}
144
145// MSVC does not implement std::aligned_malloc, even though it's part of the
146// C++17 standard. Provide a compatibility layer.
147static void *alignedAllocCompat(std::size_t alignment, std::size_t size) {
148#if defined(_MSC_VER)
149 void *ptr = _aligned_malloc(size, alignment);
150 if (!ptr)
151 throw std::bad_alloc();
152 return ptr;
153#else
154 void *ptr = std::aligned_alloc(alignment, size);
155 if (!ptr)
156 throw std::bad_alloc();
157 return ptr;
158#endif
159}
160
161static void alignedFreeCompat(void *ptr) {
162#if defined(_MSC_VER)
163 _aligned_free(ptr);
164#else
165 std::free(ptr);
166#endif
167}
168
169int main(int argc, const char *argv[]) {
170 CliParser cli("esitester");
171 cli.description("Test an ESI system running the ESI tester image.");
172 cli.require_subcommand(1);
173
174 CLI::App *callback_test =
175 cli.add_subcommand("callback", "initiate callback test");
176 uint32_t cb_iters = 1;
177 callback_test->add_option("-i,--iters", cb_iters,
178 "Number of iterations to run");
179
180 CLI::App *hostmemtestSub =
181 cli.add_subcommand("hostmem", "Run the host memory test");
182 bool hmRead = false;
183 bool hmWrite = false;
184 std::vector<uint32_t> hostmemWidths(defaultWidths.begin(),
185 defaultWidths.end());
186 hostmemtestSub->add_flag("-w,--write", hmWrite,
187 "Enable host memory write test");
188 hostmemtestSub->add_flag("-r,--read", hmRead, "Enable host memory read test");
189 hostmemtestSub->add_option(
190 "--widths", hostmemWidths,
191 "Hostmem test widths (default: " + defaultWidthsStr() + ")");
192
193 CLI::App *dmatestSub = cli.add_subcommand("dma", "Run the DMA test");
194 bool dmaRead = false;
195 bool dmaWrite = false;
196 std::vector<uint32_t> dmaWidths(defaultWidths.begin(), defaultWidths.end());
197 dmatestSub->add_flag("-w,--write", dmaWrite, "Enable dma write test");
198 dmatestSub->add_flag("-r,--read", dmaRead, "Enable dma read test");
199 dmatestSub->add_option("--widths", dmaWidths,
200 "DMA test widths (default: " + defaultWidthsStr() +
201 ")");
202
203 CLI::App *bandwidthSub =
204 cli.add_subcommand("bandwidth", "Run the bandwidth test");
205 uint32_t xferCount = 1000;
206 bandwidthSub->add_option("-c,--count", xferCount,
207 "Number of transfers to perform");
208 bool bandwidthRead = false;
209 bool bandwidthWrite = false;
210 std::vector<uint32_t> bandwidthWidths(defaultWidths.begin(),
211 defaultWidths.end());
212 bandwidthSub->add_option("--widths", bandwidthWidths,
213 "Width of the transfers to perform (default: " +
214 defaultWidthsStr() + ")");
215 bandwidthSub->add_flag("-w,--write", bandwidthWrite,
216 "Enable bandwidth write");
217 bandwidthSub->add_flag("-r,--read", bandwidthRead, "Enable bandwidth read");
218
219 CLI::App *hostmembwSub =
220 cli.add_subcommand("hostmembw", "Run the host memory bandwidth test");
221 uint32_t hmBwCount = 1000;
222 bool hmBwRead = false;
223 bool hmBwWrite = false;
224 std::vector<uint32_t> hmBwWidths(defaultWidths.begin(), defaultWidths.end());
225 hostmembwSub->add_option("-c,--count", hmBwCount,
226 "Number of hostmem transfers");
227 hostmembwSub->add_option(
228 "--widths", hmBwWidths,
229 "Hostmem bandwidth widths (default: " + defaultWidthsStr() + ")");
230 hostmembwSub->add_flag("-w,--write", hmBwWrite,
231 "Measure hostmem write bandwidth");
232 hostmembwSub->add_flag("-r,--read", hmBwRead,
233 "Measure hostmem read bandwidth");
234
235 CLI::App *loopbackSub =
236 cli.add_subcommand("loopback", "Test LoopbackInOutAdd function service");
237 uint32_t loopbackIters = 10;
238 bool loopbackPipeline = false;
239 loopbackSub->add_option("-i,--iters", loopbackIters,
240 "Number of function invocations (default 10)");
241 loopbackSub->add_flag("-p,--pipeline", loopbackPipeline,
242 "Pipeline all calls then collect results");
243
244 CLI::App *aggBwSub = cli.add_subcommand(
245 "aggbandwidth",
246 "Aggregate hostmem bandwidth across four units (readmem*, writemem*)");
247 uint32_t aggWidth = 512;
248 uint32_t aggCount = 1000;
249 bool aggRead = false;
250 bool aggWrite = false;
251 aggBwSub->add_option(
252 "--width", aggWidth,
253 "Bit width (default 512; other widths ignored if absent)");
254 aggBwSub->add_option("-c,--count", aggCount, "Flits per unit (default 1000)");
255 aggBwSub->add_flag("-r,--read", aggRead, "Include read units");
256 aggBwSub->add_flag("-w,--write", aggWrite, "Include write units");
257
258 CLI::App *streamingAddSub = cli.add_subcommand(
259 "streaming_add", "Test StreamingAdder function service with list input");
260 uint32_t streamingAddAmt = 5;
261 uint32_t streamingNumItems = 5;
262 bool streamingTranslate = false;
263 streamingAddSub->add_option("-a,--add", streamingAddAmt,
264 "Amount to add to each element (default 5)");
265 streamingAddSub->add_option("-n,--num-items", streamingNumItems,
266 "Number of random items in the list (default 5)");
267 streamingAddSub->add_flag("-t,--translate", streamingTranslate,
268 "Use message translation (list translation)");
269
270 CLI::App *coordTranslateSub = cli.add_subcommand(
271 "translate_coords",
272 "Test CoordTranslator function service with list of coordinates");
273 uint32_t coordXTrans = 10;
274 uint32_t coordYTrans = 20;
275 uint32_t coordNumItems = 5;
276 coordTranslateSub->add_option("-x,--x-translation", coordXTrans,
277 "X translation amount (default 10)");
278 coordTranslateSub->add_option("-y,--y-translation", coordYTrans,
279 "Y translation amount (default 20)");
280 coordTranslateSub->add_option("-n,--num-coords", coordNumItems,
281 "Number of random coordinates (default 5)");
282
283 CLI::App *serialCoordTranslateSub = cli.add_subcommand(
284 "serial_coords",
285 "Test SerialCoordTranslator function service with list of coordinates");
286 uint32_t serialBatchSize = 240;
287 serialCoordTranslateSub->add_option("-x,--x-translation", coordXTrans,
288 "X translation amount (default 10)");
289 serialCoordTranslateSub->add_option("-y,--y-translation", coordYTrans,
290 "Y translation amount (default 20)");
291 serialCoordTranslateSub->add_option(
292 "-n,--num-coords", coordNumItems,
293 "Number of random coordinates (default 5)");
294 serialCoordTranslateSub
295 ->add_option("-b,--batch-size", serialBatchSize,
296 "Coordinates per header (default 240, max 65535)")
297 ->check(CLI::Range(1u, 0xFFFFu));
298
299 CLI::App *channelTestSub = cli.add_subcommand(
300 "channel", "Test ChannelService to_host and from_host");
301 uint32_t channelIters = 10;
302 channelTestSub->add_option("-i,--iters", channelIters,
303 "Number of loopback iterations (default 10)");
304
305 if (int rc = cli.esiParse(argc, argv))
306 return rc;
307 if (!cli.get_help_ptr()->empty())
308 return 0;
309
310 Context &ctxt = cli.getContext();
311 AcceleratorConnection *acc = cli.connect();
312 try {
313 const auto &info = *acc->getService<services::SysInfo>();
314 ctxt.getLogger().info("esitester", "Connected to accelerator.");
315 Manifest manifest(ctxt, info.getJsonManifest());
316 Accelerator *accel = manifest.buildAccelerator(*acc);
317 ctxt.getLogger().info("esitester", "Built accelerator.");
318 acc->getServiceThread()->addPoll(*accel);
319
320 if (*callback_test) {
321 callbackTest(acc, accel, cb_iters);
322 } else if (*hostmemtestSub) {
323 hostmemTest(acc, accel, hostmemWidths, hmWrite, hmRead);
324 } else if (*loopbackSub) {
325 loopbackAddTest(acc, accel, loopbackIters, loopbackPipeline);
326 } else if (*dmatestSub) {
327 dmaTest(acc, accel, dmaWidths, dmaRead, dmaWrite);
328 } else if (*bandwidthSub) {
329 bandwidthTest(acc, accel, bandwidthWidths, xferCount, bandwidthRead,
330 bandwidthWrite);
331 } else if (*hostmembwSub) {
332 hostmemBandwidthTest(acc, accel, hmBwCount, hmBwWidths, hmBwRead,
333 hmBwWrite);
334 } else if (*aggBwSub) {
335 aggregateHostmemBandwidthTest(acc, accel, aggWidth, aggCount, aggRead,
336 aggWrite);
337 } else if (*streamingAddSub) {
338 if (streamingTranslate)
339 streamingAddTranslatedTest(acc, accel, streamingAddAmt,
340 streamingNumItems);
341 else
342 streamingAddTest(acc, accel, streamingAddAmt, streamingNumItems);
343 } else if (*coordTranslateSub) {
344 coordTranslateTest(acc, accel, coordXTrans, coordYTrans, coordNumItems);
345 } else if (*serialCoordTranslateSub) {
346 serialCoordTranslateTest(acc, accel, coordXTrans, coordYTrans,
347 coordNumItems, serialBatchSize);
348 } else if (*channelTestSub) {
349 channelTest(acc, accel, channelIters);
350 }
351
352 acc->disconnect();
353 } catch (std::exception &e) {
354 ctxt.getLogger().error("esitester", e.what());
355 acc->disconnect();
356 return -1;
357 }
358 std::cout << "Exiting successfully\n";
359 return 0;
360}
361
363 uint32_t iterations) {
364 auto cb_test = accel->getChildren().find(AppID("cb_test"));
365 if (cb_test == accel->getChildren().end())
366 throw std::runtime_error("No cb_test child found in accelerator");
367 auto &ports = cb_test->second->getPorts();
368 auto cmd_port = ports.find(AppID("cmd"));
369 if (cmd_port == ports.end())
370 throw std::runtime_error("No cmd port found in cb_test child");
371 auto *cmdMMIO = cmd_port->second.getAs<services::MMIO::MMIORegion>();
372 if (!cmdMMIO)
373 throw std::runtime_error("cb_test cmd port is not MMIO");
374
375 auto f = ports.find(AppID("cb"));
376 if (f == ports.end())
377 throw std::runtime_error("No cb port found in accelerator");
378
379 auto *callPort = f->second.getAs<services::CallService::Callback>();
380 if (!callPort)
381 throw std::runtime_error("cb port is not a CallService::Callback");
382
383 std::atomic<uint32_t> callbackCount = 0;
384 callPort->connect(
385 [conn, &callbackCount](const MessageData &data) mutable -> MessageData {
386 callbackCount.fetch_add(1);
387 conn->getLogger().debug(
388 [&](std::string &subsystem, std::string &msg,
389 std::unique_ptr<std::map<std::string, std::any>> &details) {
390 subsystem = "ESITESTER";
391 msg = "Received callback";
392 details = std::make_unique<std::map<std::string, std::any>>();
393 details->emplace("data", data);
394 });
395 std::cout << "callback: " << *data.as<uint64_t>() << std::endl;
396 return MessageData();
397 },
398 true);
399
400 for (uint32_t i = 0; i < iterations; ++i) {
401 conn->getLogger().info("esitester", "Issuing callback command iteration " +
402 std::to_string(i) + "/" +
403 std::to_string(iterations));
404 cmdMMIO->write(0x10, i); // Command the callback
405 // Wait up to 1 second for the callback to be invoked.
406 for (uint32_t wait = 0; wait < 1000; ++wait) {
407 if (callbackCount.load() > i)
408 break;
409 std::this_thread::sleep_for(std::chrono::milliseconds(1));
410 }
411 if (callbackCount.load() <= i)
412 throw std::runtime_error("Callback test failed. No callback received");
413 }
414}
415
416/// Test the hostmem write functionality.
419 uint32_t width) {
420 std::cout << "Running hostmem WRITE test with width " << width << std::endl;
421 uint64_t *dataPtr = static_cast<uint64_t *>(region.getPtr());
422 auto check = [&](bool print) {
423 bool ret = true;
424 for (size_t i = 0; i < 9; ++i) {
425 if (print)
426 printf("[write] dataPtr[%zu] = 0x%016lx\n", i, dataPtr[i]);
427 if (i < (width + 63) / 64 && dataPtr[i] == 0xFFFFFFFFFFFFFFFFull)
428 ret = false;
429 }
430 return ret;
431 };
432
433 auto writeMemChildIter = acc->getChildren().find(AppID("writemem", width));
434 if (writeMemChildIter == acc->getChildren().end())
435 throw std::runtime_error(
436 "hostmem write test failed. No writemem child found");
437 auto &writeMemPorts = writeMemChildIter->second->getPorts();
438
439 auto cmdPortIter = writeMemPorts.find(AppID("cmd", width));
440 if (cmdPortIter == writeMemPorts.end())
441 throw std::runtime_error(
442 "hostmem write test failed. No (cmd,width) MMIO port");
443 auto *cmdMMIO = cmdPortIter->second.getAs<services::MMIO::MMIORegion>();
444 if (!cmdMMIO)
445 throw std::runtime_error(
446 "hostmem write test failed. (cmd,width) port not MMIO");
447
448 auto issuedPortIter = writeMemPorts.find(AppID("addrCmdIssued"));
449 if (issuedPortIter == writeMemPorts.end())
450 throw std::runtime_error(
451 "hostmem write test failed. addrCmdIssued missing");
452 auto *addrCmdIssuedPort =
453 issuedPortIter->second.getAs<services::TelemetryService::Metric>();
454 if (!addrCmdIssuedPort)
455 throw std::runtime_error(
456 "hostmem write test failed. addrCmdIssued not telemetry");
457 addrCmdIssuedPort->connect();
458
459 auto responsesPortIter = writeMemPorts.find(AppID("addrCmdResponses"));
460 if (responsesPortIter == writeMemPorts.end())
461 throw std::runtime_error(
462 "hostmem write test failed. addrCmdResponses missing");
463 auto *addrCmdResponsesPort =
464 responsesPortIter->second.getAs<services::TelemetryService::Metric>();
465 if (!addrCmdResponsesPort)
466 throw std::runtime_error(
467 "hostmem write test failed. addrCmdResponses not telemetry");
468 addrCmdResponsesPort->connect();
469
470 for (size_t i = 0, e = 9; i < e; ++i)
471 dataPtr[i] = 0xFFFFFFFFFFFFFFFFull;
472 region.flush();
473 cmdMMIO->write(0x10, reinterpret_cast<uint64_t>(region.getDevicePtr()));
474 cmdMMIO->write(0x18, 1);
475 cmdMMIO->write(0x20, 1);
476 bool done = false;
477 for (int i = 0; i < 100; ++i) {
478 auto issued = addrCmdIssuedPort->readInt();
479 auto responses = addrCmdResponsesPort->readInt();
480 if (issued == 1 && responses == 1) {
481 done = true;
482 break;
483 }
484 std::this_thread::sleep_for(std::chrono::microseconds(100));
485 }
486 if (!done) {
487 check(true);
488 throw std::runtime_error("hostmem write test (" + std::to_string(width) +
489 " bits) timeout waiting for completion");
490 }
491 if (!check(true))
492 throw std::runtime_error("hostmem write test failed (" +
493 std::to_string(width) + " bits)");
494}
495
498 uint32_t width) {
499 std::cout << "Running hostmem READ test with width " << width << std::endl;
500 auto readMemChildIter = acc->getChildren().find(AppID("readmem", width));
501 if (readMemChildIter == acc->getChildren().end())
502 throw std::runtime_error(
503 "hostmem read test failed. No readmem child found");
504
505 auto &readMemPorts = readMemChildIter->second->getPorts();
506 auto addrCmdPortIter = readMemPorts.find(AppID("cmd", width));
507 if (addrCmdPortIter == readMemPorts.end())
508 throw std::runtime_error(
509 "hostmem read test failed. No AddressCommand MMIO port");
510 auto *addrCmdMMIO =
511 addrCmdPortIter->second.getAs<services::MMIO::MMIORegion>();
512 if (!addrCmdMMIO)
513 throw std::runtime_error(
514 "hostmem read test failed. AddressCommand port not MMIO");
515
516 auto lastReadPortIter = readMemPorts.find(AppID("lastReadLSB"));
517 if (lastReadPortIter == readMemPorts.end())
518 throw std::runtime_error("hostmem read test failed. lastReadLSB missing");
519 auto *lastReadPort =
520 lastReadPortIter->second.getAs<services::TelemetryService::Metric>();
521 if (!lastReadPort)
522 throw std::runtime_error(
523 "hostmem read test failed. lastReadLSB not telemetry");
524 lastReadPort->connect();
525
526 auto issuedPortIter = readMemPorts.find(AppID("addrCmdIssued"));
527 if (issuedPortIter == readMemPorts.end())
528 throw std::runtime_error("hostmem read test failed. addrCmdIssued missing");
529 auto *addrCmdIssuedPort =
530 issuedPortIter->second.getAs<services::TelemetryService::Metric>();
531 if (!addrCmdIssuedPort)
532 throw std::runtime_error(
533 "hostmem read test failed. addrCmdIssued not telemetry");
534 addrCmdIssuedPort->connect();
535
536 auto responsesPortIter = readMemPorts.find(AppID("addrCmdResponses"));
537 if (responsesPortIter == readMemPorts.end())
538 throw std::runtime_error(
539 "hostmem read test failed. addrCmdResponses missing");
540 auto *addrCmdResponsesPort =
541 responsesPortIter->second.getAs<services::TelemetryService::Metric>();
542 if (!addrCmdResponsesPort)
543 throw std::runtime_error(
544 "hostmem read test failed. addrCmdResponses not telemetry");
545 addrCmdResponsesPort->connect();
546
547 for (size_t i = 0; i < 8; ++i) {
548 auto *dataPtr = static_cast<uint64_t *>(region.getPtr());
549 dataPtr[0] = 0x12345678ull << i;
550 dataPtr[1] = 0xDEADBEEFull << i;
551 region.flush();
552 addrCmdMMIO->write(0x10, reinterpret_cast<uint64_t>(region.getDevicePtr()));
553 addrCmdMMIO->write(0x18, 1);
554 addrCmdMMIO->write(0x20, 1);
555 bool done = false;
556 for (int waitLoop = 0; waitLoop < 100; ++waitLoop) {
557 auto issued = addrCmdIssuedPort->readInt();
558 auto responses = addrCmdResponsesPort->readInt();
559 if (issued == 1 && responses == 1) {
560 done = true;
561 break;
562 }
563 std::this_thread::sleep_for(std::chrono::milliseconds(10));
564 }
565 if (!done)
566 throw std::runtime_error("hostmem read (" + std::to_string(width) +
567 " bits) timeout waiting for completion");
568 uint64_t captured = lastReadPort->readInt();
569 uint64_t expected = dataPtr[0];
570 if (width < 64)
571 expected &= ((1ull << width) - 1);
572 if (captured != expected)
573 throw std::runtime_error("hostmem read test (" + std::to_string(width) +
574 " bits) failed. Expected " +
575 esi::toHex(expected) + ", got " +
576 esi::toHex(captured));
577 }
578}
579
581 const std::vector<uint32_t> &widths, bool write,
582 bool read) {
583 // Enable the host memory service.
584 auto hostmem = conn->getService<services::HostMem>();
585 hostmem->start();
586 auto scratchRegion = hostmem->allocate(/*size(bytes)=*/1024 * 1024,
587 /*memOpts=*/{.writeable = true});
588 uint64_t *dataPtr = static_cast<uint64_t *>(scratchRegion->getPtr());
589 conn->getLogger().info("esitester",
590 "Running host memory test with region size " +
591 std::to_string(scratchRegion->getSize()) +
592 " bytes at 0x" + toHex(dataPtr));
593 for (size_t i = 0; i < scratchRegion->getSize() / 8; ++i)
594 dataPtr[i] = 0;
595 scratchRegion->flush();
596
597 bool passed = true;
598 for (size_t width : widths) {
599 try {
600 if (write)
601 hostmemWriteTest(acc, *scratchRegion, width);
602 if (read)
603 hostmemReadTest(acc, *scratchRegion, width);
604 } catch (std::exception &e) {
605 conn->getLogger().error("esitester", "Hostmem test failed for width " +
606 std::to_string(width) + ": " +
607 e.what());
608 passed = false;
609 }
610 }
611 if (!passed)
612 throw std::runtime_error("Hostmem test failed");
613 std::cout << "Hostmem test passed" << std::endl;
614}
615
617 size_t width) {
618 Logger &logger = conn->getLogger();
619 logger.info("esitester",
620 "== Running DMA read test with width " + std::to_string(width));
621 AppIDPath lastPath;
622 BundlePort *toHostMMIOPort =
623 acc->resolvePort({AppID("tohostdma", width), AppID("cmd")}, lastPath);
624 if (!toHostMMIOPort)
625 throw std::runtime_error("dma read test failed. No tohostdma[" +
626 std::to_string(width) + "] found");
627 auto *toHostMMIO = toHostMMIOPort->getAs<services::MMIO::MMIORegion>();
628 if (!toHostMMIO)
629 throw std::runtime_error("dma read test failed. MMIO port is not MMIO");
630 lastPath.clear();
631 BundlePort *outPortBundle =
632 acc->resolvePort({AppID("tohostdma", width), AppID("out")}, lastPath);
633 ReadChannelPort &outPort = outPortBundle->getRawRead("data");
634 outPort.connect();
635
636 size_t xferCount = 24;
637 uint64_t last = 0;
638 MessageData data;
639 toHostMMIO->write(0, xferCount);
640 for (size_t i = 0; i < xferCount; ++i) {
641 outPort.read(data);
642 if (width == 64) {
643 uint64_t val = *data.as<uint64_t>();
644 if (val < last)
645 throw std::runtime_error("dma read test failed. Out of order data");
646 last = val;
647 }
648 logger.debug("esitester",
649 "Cycle count [" + std::to_string(i) + "] = 0x" + data.toHex());
650 }
651 outPort.disconnect();
652 std::cout << " DMA read test for " << width << " bits passed" << std::endl;
653}
654
656 size_t width) {
657 Logger &logger = conn->getLogger();
658 logger.info("esitester",
659 "Running DMA write test with width " + std::to_string(width));
660 AppIDPath lastPath;
661 BundlePort *fromHostMMIOPort =
662 acc->resolvePort({AppID("fromhostdma", width), AppID("cmd")}, lastPath);
663 if (!fromHostMMIOPort)
664 throw std::runtime_error("dma read test for " + toString(width) +
665 " bits failed. No fromhostdma[" +
666 std::to_string(width) + "] found");
667 auto *fromHostMMIO = fromHostMMIOPort->getAs<services::MMIO::MMIORegion>();
668 if (!fromHostMMIO)
669 throw std::runtime_error("dma write test for " + toString(width) +
670 " bits failed. MMIO port is not MMIO");
671 lastPath.clear();
672 BundlePort *outPortBundle =
673 acc->resolvePort({AppID("fromhostdma", width), AppID("in")}, lastPath);
674 if (!outPortBundle)
675 throw std::runtime_error("dma write test for " + toString(width) +
676 " bits failed. No out port found");
677 WriteChannelPort &writePort = outPortBundle->getRawWrite("data");
679
680 size_t xferCount = 24;
681 uint8_t *data = new uint8_t[width];
682 for (size_t i = 0; i < width / 8; ++i)
683 data[i] = 0;
684 fromHostMMIO->read(8);
685 fromHostMMIO->write(0, xferCount);
686 for (size_t i = 1; i < xferCount + 1; ++i) {
687 data[0] = i;
688 bool successWrite;
689 size_t attempts = 0;
690 do {
691 successWrite = writePort.tryWrite(MessageData(data, width / 8));
692 if (!successWrite) {
693 std::this_thread::sleep_for(std::chrono::milliseconds(10));
694 }
695 } while (!successWrite && ++attempts < 100);
696 if (!successWrite)
697 throw std::runtime_error("dma write test for " + toString(width) +
698 " bits failed. Write failed");
699 uint64_t lastReadMMIO;
700 for (size_t a = 0; a < 20; ++a) {
701 lastReadMMIO = fromHostMMIO->read(8);
702 if (lastReadMMIO == i)
703 break;
704 std::this_thread::sleep_for(std::chrono::milliseconds(10));
705 if (a >= 19)
706 throw std::runtime_error("dma write for " + toString(width) +
707 " bits test failed. Read from MMIO failed");
708 }
709 }
710 writePort.disconnect();
711 delete[] data;
712 std::cout << " DMA write test for " << width << " bits passed" << std::endl;
713}
714
716 const std::vector<uint32_t> &widths, bool read,
717 bool write) {
718 bool success = true;
719 if (write)
720 for (size_t width : widths)
721 try {
722 dmaWriteTest(conn, acc, width);
723 } catch (std::exception &e) {
724 success = false;
725 std::cerr << "DMA write test for " << width
726 << " bits failed: " << e.what() << std::endl;
727 }
728 if (read)
729 for (size_t width : widths)
730 dmaReadTest(conn, acc, width);
731 if (!success)
732 throw std::runtime_error("DMA test failed");
733 std::cout << "DMA test passed" << std::endl;
734}
735
736//
737// DMA bandwidth test
738//
739
741 size_t width, size_t xferCount) {
742
743 AppIDPath lastPath;
744 BundlePort *toHostMMIOPort =
745 acc->resolvePort({AppID("tohostdma", width), AppID("cmd")}, lastPath);
746 if (!toHostMMIOPort)
747 throw std::runtime_error("bandwidth test failed. No tohostdma[" +
748 std::to_string(width) + "] found");
749 auto *toHostMMIO = toHostMMIOPort->getAs<services::MMIO::MMIORegion>();
750 if (!toHostMMIO)
751 throw std::runtime_error("bandwidth test failed. MMIO port is not MMIO");
752 lastPath.clear();
753 BundlePort *outPortBundle =
754 acc->resolvePort({AppID("tohostdma", width), AppID("out")}, lastPath);
755 ReadChannelPort &outPort = outPortBundle->getRawRead("data");
756 outPort.connect();
757
758 Logger &logger = conn->getLogger();
759 logger.info("esitester", "Starting read bandwidth test with " +
760 std::to_string(xferCount) + " x " +
761 std::to_string(width) + " bit transfers");
762 MessageData data;
763 auto start = std::chrono::high_resolution_clock::now();
764 toHostMMIO->write(0, xferCount);
765 for (size_t i = 0; i < xferCount; ++i) {
766 outPort.read(data);
767 logger.debug(
768 [i, &data](std::string &subsystem, std::string &msg,
769 std::unique_ptr<std::map<std::string, std::any>> &details) {
770 subsystem = "esitester";
771 msg = "Cycle count [" + std::to_string(i) + "] = 0x" + data.toHex();
772 });
773 }
774 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
775 std::chrono::high_resolution_clock::now() - start);
776 double bytesPerSec =
777 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
778 logger.info("esitester",
779 " Bandwidth test: " + std::to_string(xferCount) + " x " +
780 std::to_string(width) + " bit transfers in " +
781 std::to_string(duration.count()) + " microseconds");
782 logger.info("esitester", " bandwidth: " + formatBandwidth(bytesPerSec));
783}
784
786 size_t width, size_t xferCount) {
787
788 AppIDPath lastPath;
789 BundlePort *fromHostMMIOPort =
790 acc->resolvePort({AppID("fromhostdma", width), AppID("cmd")}, lastPath);
791 if (!fromHostMMIOPort)
792 throw std::runtime_error("bandwidth test failed. No fromhostdma[" +
793 std::to_string(width) + "] found");
794 auto *fromHostMMIO = fromHostMMIOPort->getAs<services::MMIO::MMIORegion>();
795 if (!fromHostMMIO)
796 throw std::runtime_error("bandwidth test failed. MMIO port is not MMIO");
797 lastPath.clear();
798 BundlePort *inPortBundle =
799 acc->resolvePort({AppID("fromhostdma", width), AppID("in")}, lastPath);
800 WriteChannelPort &outPort = inPortBundle->getRawWrite("data");
801 outPort.connect();
802
803 Logger &logger = conn->getLogger();
804 logger.info("esitester", "Starting write bandwidth test with " +
805 std::to_string(xferCount) + " x " +
806 std::to_string(width) + " bit transfers");
807 std::vector<uint8_t> dataVec(width / 8);
808 for (size_t i = 0; i < width / 8; ++i)
809 dataVec[i] = i;
810 MessageData data(dataVec);
811 auto start = std::chrono::high_resolution_clock::now();
812 fromHostMMIO->write(0, xferCount);
813 for (size_t i = 0; i < xferCount; ++i) {
814 outPort.write(data);
815 logger.debug(
816 [i, &data](std::string &subsystem, std::string &msg,
817 std::unique_ptr<std::map<std::string, std::any>> &details) {
818 subsystem = "esitester";
819 msg = "Cycle count [" + std::to_string(i) + "] = 0x" + data.toHex();
820 });
821 }
822 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
823 std::chrono::high_resolution_clock::now() - start);
824 double bytesPerSec =
825 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
826 logger.info("esitester",
827 " Bandwidth test: " + std::to_string(xferCount) + " x " +
828 std::to_string(width) + " bit transfers in " +
829 std::to_string(duration.count()) + " microseconds");
830 logger.info("esitester", " bandwidth: " + formatBandwidth(bytesPerSec));
831}
832
834 const std::vector<uint32_t> &widths,
835 uint32_t xferCount, bool read, bool write) {
836 if (read)
837 for (uint32_t w : widths)
838 bandwidthReadTest(conn, acc, w, xferCount);
839 if (write)
840 for (uint32_t w : widths)
841 bandwidthWriteTest(conn, acc, w, xferCount);
842}
843
844//
845// Hostmem bandwidth test
846//
847
848static void
851 uint32_t width, uint32_t xferCount) {
852 Logger &logger = conn->getLogger();
853 logger.info("esitester", "Starting hostmem WRITE bandwidth test: " +
854 std::to_string(xferCount) + " x " +
855 std::to_string(width) + " bits");
856
857 auto writeMemChildIter = acc->getChildren().find(AppID("writemem", width));
858 if (writeMemChildIter == acc->getChildren().end())
859 throw std::runtime_error("hostmem write bandwidth: writemem child missing");
860 auto &writeMemPorts = writeMemChildIter->second->getPorts();
861
862 auto cmdPortIter = writeMemPorts.find(AppID("cmd", width));
863 if (cmdPortIter == writeMemPorts.end())
864 throw std::runtime_error("hostmem write bandwidth: cmd MMIO missing");
865 auto *cmdMMIO = cmdPortIter->second.getAs<services::MMIO::MMIORegion>();
866 if (!cmdMMIO)
867 throw std::runtime_error("hostmem write bandwidth: cmd not MMIO");
868
869 auto issuedIter = writeMemPorts.find(AppID("addrCmdIssued"));
870 auto respIter = writeMemPorts.find(AppID("addrCmdResponses"));
871 auto cycleCount = writeMemPorts.find(AppID("addrCmdCycles"));
872 if (issuedIter == writeMemPorts.end() || respIter == writeMemPorts.end() ||
873 cycleCount == writeMemPorts.end())
874 throw std::runtime_error("hostmem write bandwidth: telemetry missing");
875 auto *issuedPort =
876 issuedIter->second.getAs<services::TelemetryService::Metric>();
877 auto *respPort = respIter->second.getAs<services::TelemetryService::Metric>();
878 auto *cyclePort =
879 cycleCount->second.getAs<services::TelemetryService::Metric>();
880 if (!issuedPort || !respPort || !cyclePort)
881 throw std::runtime_error(
882 "hostmem write bandwidth: telemetry type mismatch");
883
884 issuedPort->connect();
885 respPort->connect();
886 cyclePort->connect();
887
888 // Initialize pattern (optional).
889 uint64_t *dataPtr = static_cast<uint64_t *>(region.getPtr());
890 size_t words = region.getSize() / 8;
891 for (size_t i = 0; i < words; ++i)
892 dataPtr[i] = i + 0xA5A50000;
893 region.flush();
894
895 auto start = std::chrono::high_resolution_clock::now();
896 // Fire off xferCount write commands (one flit each).
897 uint64_t devPtr = reinterpret_cast<uint64_t>(region.getDevicePtr());
898 cmdMMIO->write(0x10, devPtr); // address
899 cmdMMIO->write(0x18, xferCount); // flits
900 cmdMMIO->write(0x20, 1); // start
901
902 // Wait for responses counter to reach target.
903 bool completed = false;
904 for (int wait = 0; wait < 100000; ++wait) {
905 uint64_t respNow = respPort->readInt();
906 if (respNow == xferCount) {
907 completed = true;
908 break;
909 }
910 std::this_thread::sleep_for(std::chrono::microseconds(50));
911 }
912 if (!completed)
913 throw std::runtime_error("hostmem write bandwidth timeout");
914 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
915 std::chrono::high_resolution_clock::now() - start);
916 double bytesPerSec =
917 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
918 uint64_t cycles = cyclePort->readInt();
919 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
920 std::cout << "[WRITE] Hostmem bandwidth (" << std::to_string(width)
921 << "): " << formatBandwidth(bytesPerSec) << " "
922 << std::to_string(xferCount) << " flits in "
923 << std::to_string(duration.count()) << " us, "
924 << std::to_string(cycles) << " cycles, " << bytesPerCycle
925 << " bytes/cycle" << std::endl;
926}
927
928static void
931 uint32_t width, uint32_t xferCount) {
932 Logger &logger = conn->getLogger();
933 logger.info("esitester", "Starting hostmem READ bandwidth test: " +
934 std::to_string(xferCount) + " x " +
935 std::to_string(width) + " bits");
936
937 auto readMemChildIter = acc->getChildren().find(AppID("readmem", width));
938 if (readMemChildIter == acc->getChildren().end())
939 throw std::runtime_error("hostmem read bandwidth: readmem child missing");
940 auto &readMemPorts = readMemChildIter->second->getPorts();
941
942 auto cmdPortIter = readMemPorts.find(AppID("cmd", width));
943 if (cmdPortIter == readMemPorts.end())
944 throw std::runtime_error("hostmem read bandwidth: cmd MMIO missing");
945 auto *cmdMMIO = cmdPortIter->second.getAs<services::MMIO::MMIORegion>();
946 if (!cmdMMIO)
947 throw std::runtime_error("hostmem read bandwidth: cmd not MMIO");
948
949 auto issuedIter = readMemPorts.find(AppID("addrCmdIssued"));
950 auto respIter = readMemPorts.find(AppID("addrCmdResponses"));
951 auto cyclePort = readMemPorts.find(AppID("addrCmdCycles"));
952 if (issuedIter == readMemPorts.end() || respIter == readMemPorts.end() ||
953 cyclePort == readMemPorts.end())
954 throw std::runtime_error("hostmem read bandwidth: telemetry missing");
955 auto *issuedPort =
956 issuedIter->second.getAs<services::TelemetryService::Metric>();
957 auto *respPort = respIter->second.getAs<services::TelemetryService::Metric>();
958 auto *cycleCntPort =
959 cyclePort->second.getAs<services::TelemetryService::Metric>();
960 if (!issuedPort || !respPort || !cycleCntPort)
961 throw std::runtime_error("hostmem read bandwidth: telemetry type mismatch");
962 issuedPort->connect();
963 respPort->connect();
964 cycleCntPort->connect();
965
966 // Prepare memory pattern (optional).
967 uint64_t *dataPtr = static_cast<uint64_t *>(region.getPtr());
968 size_t words64 = region.getSize() / 8;
969 for (size_t i = 0; i < words64; ++i)
970 dataPtr[i] = 0xCAFEBABE0000ull + i;
971 region.flush();
972 uint64_t devPtr = reinterpret_cast<uint64_t>(region.getDevicePtr());
973 auto start = std::chrono::high_resolution_clock::now();
974
975 cmdMMIO->write(0x10, devPtr);
976 cmdMMIO->write(0x18, xferCount);
977 cmdMMIO->write(0x20, 1);
978
979 bool timeout = true;
980 for (int wait = 0; wait < 100000; ++wait) {
981 uint64_t respNow = respPort->readInt();
982 if (respNow == xferCount) {
983 timeout = false;
984 break;
985 }
986 std::this_thread::sleep_for(std::chrono::microseconds(50));
987 }
988 if (timeout)
989 throw std::runtime_error("hostmem read bandwidth timeout");
990 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
991 std::chrono::high_resolution_clock::now() - start);
992 double bytesPerSec =
993 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
994 uint64_t cycles = cycleCntPort->readInt();
995 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
996 std::cout << "[ READ] Hostmem bandwidth (" << width
997 << "): " << formatBandwidth(bytesPerSec) << ", " << xferCount
998 << " flits in " << duration.count() << " us, " << cycles
999 << " cycles, " << bytesPerCycle << " bytes/cycle" << std::endl;
1000}
1001
1003 uint32_t xferCount,
1004 const std::vector<uint32_t> &widths, bool read,
1005 bool write) {
1006 auto hostmemSvc = conn->getService<services::HostMem>();
1007 hostmemSvc->start();
1008 auto region = hostmemSvc->allocate(/*size(bytes)=*/1024 * 1024 * 1024,
1009 /*memOpts=*/{.writeable = true});
1010 for (uint32_t w : widths) {
1011 if (write)
1012 hostmemWriteBandwidthTest(conn, acc, *region, w, xferCount);
1013 if (read)
1014 hostmemReadBandwidthTest(conn, acc, *region, w, xferCount);
1015 }
1016}
1017
1019 uint32_t iterations, bool pipeline) {
1020 Logger &logger = conn->getLogger();
1021 auto loopbackChild = accel->getChildren().find(AppID("loopback"));
1022 if (loopbackChild == accel->getChildren().end())
1023 throw std::runtime_error("Loopback test: no 'loopback' child");
1024 auto &ports = loopbackChild->second->getPorts();
1025 auto addIter = ports.find(AppID("add"));
1026 if (addIter == ports.end())
1027 throw std::runtime_error("Loopback test: no 'add' port");
1028
1029 // Use FuncService::Func instead of raw channels.
1030 auto *funcPort = addIter->second.getAs<services::FuncService::Function>();
1031 if (!funcPort)
1032 throw std::runtime_error(
1033 "Loopback test: 'add' port not a FuncService::Function");
1034 funcPort->connect();
1035 if (iterations == 0) {
1036 logger.info("esitester", "Loopback add test: 0 iterations (skipped)");
1037 return;
1038 }
1039 std::mt19937_64 rng(0xC0FFEE);
1040 std::uniform_int_distribution<uint32_t> dist(0, (1u << 24) - 1);
1041
1042 if (!pipeline) {
1043 auto start = std::chrono::high_resolution_clock::now();
1044 for (uint32_t i = 0; i < iterations; ++i) {
1045 uint32_t argVal = dist(rng);
1046 uint32_t expected = (argVal + 11) & 0xFFFF;
1047 uint8_t argBytes[3] = {
1048 static_cast<uint8_t>(argVal & 0xFF),
1049 static_cast<uint8_t>((argVal >> 8) & 0xFF),
1050 static_cast<uint8_t>((argVal >> 16) & 0xFF),
1051 };
1052 MessageData argMsg(argBytes, 3);
1053 MessageData resMsg = funcPort->call(argMsg).get();
1054 uint16_t got = *resMsg.as<uint16_t>();
1055 std::cout << "[loopback] i=" << i << " arg=0x" << esi::toHex(argVal)
1056 << " got=0x" << esi::toHex(got) << " exp=0x"
1057 << esi::toHex(expected) << std::endl;
1058 if (got != expected)
1059 throw std::runtime_error("Loopback mismatch (non-pipelined)");
1060 }
1061 auto end = std::chrono::high_resolution_clock::now();
1062 auto us = std::chrono::duration_cast<std::chrono::microseconds>(end - start)
1063 .count();
1064 double callsPerSec = (double)iterations * 1e6 / (double)us;
1065 logger.info("esitester", "Loopback add test passed (non-pipelined, " +
1066 std::to_string(iterations) + " calls, " +
1067 std::to_string(us) + " us, " +
1068 std::to_string(callsPerSec) + " calls/s)");
1069 } else {
1070 // Pipelined mode: launch all calls first, then collect.
1071 std::vector<std::future<MessageData>> futures;
1072 futures.reserve(iterations);
1073 std::vector<uint32_t> expectedVals;
1074 expectedVals.reserve(iterations);
1075
1076 auto issueStart = std::chrono::high_resolution_clock::now();
1077 for (uint32_t i = 0; i < iterations; ++i) {
1078 uint32_t argVal = dist(rng);
1079 uint32_t expected = (argVal + 11) & 0xFFFF;
1080 uint8_t argBytes[3] = {
1081 static_cast<uint8_t>(argVal & 0xFF),
1082 static_cast<uint8_t>((argVal >> 8) & 0xFF),
1083 static_cast<uint8_t>((argVal >> 16) & 0xFF),
1084 };
1085 futures.emplace_back(funcPort->call(MessageData(argBytes, 3)));
1086 expectedVals.emplace_back(expected);
1087 }
1088 auto issueEnd = std::chrono::high_resolution_clock::now();
1089
1090 for (uint32_t i = 0; i < iterations; ++i) {
1091 MessageData resMsg = futures[i].get();
1092 uint16_t got = *resMsg.as<uint16_t>();
1093 uint16_t exp = (uint16_t)expectedVals[i];
1094 std::cout << "[loopback-pipelined] i=" << i << " got=0x"
1095 << esi::toHex(got) << " exp=0x" << esi::toHex(exp) << std::endl;
1096 if (got != exp)
1097 throw std::runtime_error("Loopback mismatch (pipelined) idx=" +
1098 std::to_string(i));
1099 }
1100 auto collectEnd = std::chrono::high_resolution_clock::now();
1101
1102 auto issueUs = std::chrono::duration_cast<std::chrono::microseconds>(
1103 issueEnd - issueStart)
1104 .count();
1105 auto totalUs = std::chrono::duration_cast<std::chrono::microseconds>(
1106 collectEnd - issueStart)
1107 .count();
1108
1109 double issueRate = (double)iterations * 1e6 / (double)issueUs;
1110 double completionRate = (double)iterations * 1e6 / (double)totalUs;
1111
1112 logger.info("esitester", "Loopback add test passed (pipelined). Issued " +
1113 std::to_string(iterations) + " in " +
1114 std::to_string(issueUs) + " us (" +
1115 std::to_string(issueRate) +
1116 " calls/s), total " + std::to_string(totalUs) +
1117 " us (" + std::to_string(completionRate) +
1118 " calls/s effective)");
1119 }
1120}
1121
1123 Accelerator *acc, uint32_t width,
1124 uint32_t xferCount, bool read,
1125 bool write) {
1126 Logger &logger = conn->getLogger();
1127 if (!read && !write) {
1128 std::cout << "aggbandwidth: nothing to do (enable --read and/or --write)\n";
1129 return;
1130 }
1131 logger.info(
1132 "esitester",
1133 "Aggregate hostmem bandwidth start width=" + std::to_string(width) +
1134 " count=" + std::to_string(xferCount) +
1135 " read=" + (read ? "Y" : "N") + " write=" + (write ? "Y" : "N"));
1136
1137 auto hostmemSvc = conn->getService<services::HostMem>();
1138 hostmemSvc->start();
1139
1140 struct Unit {
1141 std::string prefix;
1142 bool isRead = false;
1143 bool isWrite = false;
1144 std::unique_ptr<esi::services::HostMem::HostMemRegion> region;
1145 services::TelemetryService::Metric *resp = nullptr;
1146 services::TelemetryService::Metric *cycles = nullptr;
1147 services::MMIO::MMIORegion *cmd = nullptr;
1148 bool launched = false;
1149 bool done = false;
1150 uint64_t bytes = 0;
1151 uint64_t duration_us = 0;
1152 uint64_t cycleCount = 0;
1153 std::chrono::high_resolution_clock::time_point start;
1154 };
1155 std::vector<Unit> units;
1156 const std::vector<std::string> readPrefixes = {"readmem", "readmem_0",
1157 "readmem_1", "readmem_2"};
1158 const std::vector<std::string> writePrefixes = {"writemem", "writemem_0",
1159 "writemem_1", "writemem_2"};
1160
1161 auto addUnits = [&](const std::vector<std::string> &pref, bool doRead,
1162 bool doWrite) {
1163 for (auto &p : pref) {
1164 AppID id(p, width);
1165 auto childIt = acc->getChildren().find(id);
1166 if (childIt == acc->getChildren().end())
1167 continue; // silently skip missing variants
1168 auto &ports = childIt->second->getPorts();
1169 auto cmdIt = ports.find(AppID("cmd", width));
1170 auto respIt = ports.find(AppID("addrCmdResponses"));
1171 auto cycIt = ports.find(AppID("addrCmdCycles"));
1172 if (cmdIt == ports.end() || respIt == ports.end() || cycIt == ports.end())
1173 continue;
1174 auto *cmd = cmdIt->second.getAs<services::MMIO::MMIORegion>();
1175 auto *resp = respIt->second.getAs<services::TelemetryService::Metric>();
1176 auto *cyc = cycIt->second.getAs<services::TelemetryService::Metric>();
1177 if (!cmd || !resp || !cyc)
1178 continue;
1179 resp->connect();
1180 cyc->connect();
1181 Unit u;
1182 u.prefix = p;
1183 u.isRead = doRead;
1184 u.isWrite = doWrite;
1185 u.region = hostmemSvc->allocate(1024 * 1024 * 1024, {.writeable = true});
1186 // Init pattern.
1187 uint64_t *ptr = static_cast<uint64_t *>(u.region->getPtr());
1188 size_t words = u.region->getSize() / 8;
1189 for (size_t i = 0; i < words; ++i)
1190 ptr[i] =
1191 (p[0] == 'w' ? (0xA5A500000000ull + i) : (0xCAFEBABE0000ull + i));
1192 u.region->flush();
1193 u.cmd = cmd;
1194 u.resp = resp;
1195 u.cycles = cyc;
1196 u.bytes = uint64_t(xferCount) * (width / 8);
1197 units.emplace_back(std::move(u));
1198 }
1199 };
1200 if (read)
1201 addUnits(readPrefixes, true, false);
1202 if (write)
1203 addUnits(writePrefixes, false, true);
1204 if (units.empty()) {
1205 std::cout << "aggbandwidth: no matching units present for width " << width
1206 << "\n";
1207 return;
1208 }
1209
1210 auto wallStart = std::chrono::high_resolution_clock::now();
1211 // Launch sequentially.
1212 for (auto &u : units) {
1213 uint64_t devPtr = reinterpret_cast<uint64_t>(u.region->getDevicePtr());
1214 u.cmd->write(0x10, devPtr);
1215 u.cmd->write(0x18, xferCount);
1216 u.cmd->write(0x20, 1);
1217 u.start = std::chrono::high_resolution_clock::now();
1218 u.launched = true;
1219 }
1220
1221 // Poll all until complete.
1222 const uint64_t timeoutLoops = 200000; // ~10s at 50us sleep
1223 uint64_t loops = 0;
1224 while (true) {
1225 bool allDone = true;
1226 for (auto &u : units) {
1227 if (u.done)
1228 continue;
1229 if (u.resp->readInt() == xferCount) {
1230 auto end = std::chrono::high_resolution_clock::now();
1231 u.duration_us =
1232 std::chrono::duration_cast<std::chrono::microseconds>(end - u.start)
1233 .count();
1234 u.cycleCount = u.cycles->readInt();
1235 u.done = true;
1236 } else {
1237 allDone = false;
1238 }
1239 }
1240 if (allDone)
1241 break;
1242 if (++loops >= timeoutLoops)
1243 throw std::runtime_error("aggbandwidth: timeout");
1244 std::this_thread::sleep_for(std::chrono::microseconds(50));
1245 }
1246 auto wallUs = std::chrono::duration_cast<std::chrono::microseconds>(
1247 std::chrono::high_resolution_clock::now() - wallStart)
1248 .count();
1249
1250 uint64_t totalBytes = 0;
1251 uint64_t totalReadBytes = 0;
1252 uint64_t totalWriteBytes = 0;
1253 for (auto &u : units) {
1254 totalBytes += u.bytes;
1255 if (u.isRead)
1256 totalReadBytes += u.bytes;
1257 if (u.isWrite)
1258 totalWriteBytes += u.bytes;
1259 double unitBps = (double)u.bytes * 1e6 / (double)u.duration_us;
1260 std::cout << "[agg-unit] " << u.prefix << "[" << width << "] "
1261 << (u.isRead ? "READ" : (u.isWrite ? "WRITE" : "UNK"))
1262 << " bytes=" << humanBytes(u.bytes) << " (" << u.bytes << " B)"
1263 << " time=" << humanTimeUS(u.duration_us) << " (" << u.duration_us
1264 << " us) cycles=" << u.cycleCount
1265 << " throughput=" << formatBandwidth(unitBps) << std::endl;
1266 }
1267 // Compute aggregate bandwidths as total size / total wall time (not sum of
1268 // unit throughputs).
1269 double aggReadBps =
1270 totalReadBytes ? (double)totalReadBytes * 1e6 / (double)wallUs : 0.0;
1271 double aggWriteBps =
1272 totalWriteBytes ? (double)totalWriteBytes * 1e6 / (double)wallUs : 0.0;
1273 double aggCombinedBps =
1274 totalBytes ? (double)totalBytes * 1e6 / (double)wallUs : 0.0;
1275
1276 std::cout << "[agg-total] units=" << units.size()
1277 << " read_bytes=" << humanBytes(totalReadBytes) << " ("
1278 << totalReadBytes << " B)"
1279 << " read_bw=" << formatBandwidth(aggReadBps)
1280 << " write_bytes=" << humanBytes(totalWriteBytes) << " ("
1281 << totalWriteBytes << " B)"
1282 << " write_bw=" << formatBandwidth(aggWriteBps)
1283 << " combined_bytes=" << humanBytes(totalBytes) << " ("
1284 << totalBytes << " B)"
1285 << " combined_bw=" << formatBandwidth(aggCombinedBps)
1286 << " wall_time=" << humanTimeUS(wallUs) << " (" << wallUs << " us)"
1287 << std::endl;
1288 logger.info("esitester", "Aggregate hostmem bandwidth test complete");
1289}
1290
1291/// Packed struct representing a parallel window argument for StreamingAdder.
1292/// Layout in SystemVerilog (so it must be reversed in C):
1293/// { add_amt: UInt(32), input: UInt(32), last: UInt(8) }
1294#pragma pack(push, 1)
1296 uint8_t last;
1297 uint32_t input;
1298 uint32_t addAmt;
1299};
1300#pragma pack(pop)
1301static_assert(sizeof(StreamingAddArg) == 9,
1302 "StreamingAddArg must be 9 bytes packed");
1303
1304/// Packed struct representing a parallel window result for StreamingAdder.
1305/// Layout in SystemVerilog (so it must be reversed in C):
1306/// { data: UInt(32), last: UInt(8) }
1307#pragma pack(push, 1)
1309 uint8_t last;
1310 uint32_t data;
1311};
1312#pragma pack(pop)
1313static_assert(sizeof(StreamingAddResult) == 5,
1314 "StreamingAddResult must be 5 bytes packed");
1315
1316/// Test the StreamingAdder module. This module takes a struct containing
1317/// an add_amt and a list of uint32s, adds add_amt to each element, and
1318/// returns the resulting list. The data is streamed using windowed types.
1320 uint32_t addAmt, uint32_t numItems) {
1321 Logger &logger = conn->getLogger();
1322 logger.info("esitester", "Starting streaming add test with add_amt=" +
1323 std::to_string(addAmt) +
1324 ", num_items=" + std::to_string(numItems));
1325
1326 // Generate random input data.
1327 std::mt19937 rng(0xDEADBEEF);
1328 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1329 std::vector<uint32_t> inputData;
1330 inputData.reserve(numItems);
1331 for (uint32_t i = 0; i < numItems; ++i)
1332 inputData.push_back(dist(rng));
1333
1334 // Find the streaming_adder child.
1335 auto streamingAdderChild =
1336 accel->getChildren().find(AppID("streaming_adder"));
1337 if (streamingAdderChild == accel->getChildren().end())
1338 throw std::runtime_error(
1339 "Streaming add test: no 'streaming_adder' child found");
1340
1341 auto &ports = streamingAdderChild->second->getPorts();
1342 auto addIter = ports.find(AppID("streaming_add"));
1343 if (addIter == ports.end())
1344 throw std::runtime_error(
1345 "Streaming add test: no 'streaming_add' port found");
1346
1347 // Get the raw read/write channel ports for the windowed function.
1348 // The argument channel expects parallel windowed data where each message
1349 // contains: struct { add_amt: UInt(32), input: UInt(32), last: bool }
1350 WriteChannelPort &argPort = addIter->second.getRawWrite("arg");
1351 ReadChannelPort &resultPort = addIter->second.getRawRead("result");
1352
1353 argPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
1354 resultPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
1355
1356 // Send each list element with add_amt repeated in every message.
1357 for (size_t i = 0; i < inputData.size(); ++i) {
1358 StreamingAddArg arg;
1359 arg.addAmt = addAmt;
1360 arg.input = inputData[i];
1361 arg.last = (i == inputData.size() - 1) ? 1 : 0;
1362 argPort.write(
1363 MessageData(reinterpret_cast<const uint8_t *>(&arg), sizeof(arg)));
1364 logger.debug("esitester", "Sent {add_amt=" + std::to_string(arg.addAmt) +
1365 ", input=" + std::to_string(arg.input) +
1366 ", last=" + (arg.last ? "true" : "false") +
1367 "}");
1368 }
1369
1370 // Read the result list (also windowed).
1371 std::vector<uint32_t> results;
1372 bool lastSeen = false;
1373 while (!lastSeen) {
1374 MessageData resMsg;
1375 resultPort.read(resMsg);
1376 if (resMsg.getSize() < sizeof(StreamingAddResult))
1377 throw std::runtime_error(
1378 "Streaming add test: unexpected result message size");
1379
1380 const auto *res =
1381 reinterpret_cast<const StreamingAddResult *>(resMsg.getBytes());
1382 lastSeen = res->last != 0;
1383 results.push_back(res->data);
1384 logger.debug("esitester", "Received result=" + std::to_string(res->data) +
1385 " (last=" + (lastSeen ? "true" : "false") +
1386 ")");
1387 }
1388
1389 // Verify results.
1390 if (results.size() != inputData.size())
1391 throw std::runtime_error(
1392 "Streaming add test: result size mismatch. Expected " +
1393 std::to_string(inputData.size()) + ", got " +
1394 std::to_string(results.size()));
1395
1396 bool passed = true;
1397 std::cout << "Streaming add test results:" << std::endl;
1398 for (size_t i = 0; i < inputData.size(); ++i) {
1399 uint32_t expected = inputData[i] + addAmt;
1400 std::cout << " input[" << i << "]=" << inputData[i] << " + " << addAmt
1401 << " = " << results[i] << " (expected " << expected << ")";
1402 if (results[i] != expected) {
1403 std::cout << " MISMATCH!";
1404 passed = false;
1405 }
1406 std::cout << std::endl;
1407 }
1408
1409 argPort.disconnect();
1410 resultPort.disconnect();
1411
1412 if (!passed)
1413 throw std::runtime_error("Streaming add test failed: result mismatch");
1414
1415 logger.info("esitester", "Streaming add test passed");
1416 std::cout << "Streaming add test passed" << std::endl;
1417}
1418
1419/// Test the StreamingAdder module using message translation.
1420/// This version uses the list translation support where the message format is:
1421/// Argument: { add_amt (4 bytes), input_length (8 bytes), input_data[] }
1422/// Result: { data_length (8 bytes), data[] }
1423/// The translation layer automatically converts between this format and the
1424/// parallel windowed frames used by the hardware.
1425
1426/// Translated argument struct for StreamingAdder.
1427/// Memory layout (standard C struct ordering, fields in declaration order):
1428/// ESI type: struct { add_amt: UInt(32), input: List<UInt(32)> }
1429/// becomes host struct:
1430/// { input_length (size_t, 8 bytes on 64-bit), add_amt (uint32_t),
1431/// input_data[] }
1432/// Note: The translation layer handles the conversion between this C struct
1433/// layout and the hardware's SystemVerilog frame format.
1434/// Note: size_t is used for list lengths, so this format is platform-dependent.
1435#pragma pack(push, 1)
1438 uint32_t addAmt;
1439 // Trailing array data follows immediately after the struct in memory.
1440 // Use inputData() accessor to access it.
1441
1442 /// Get pointer to trailing input data array.
1443 uint32_t *inputData() { return reinterpret_cast<uint32_t *>(this + 1); }
1444 const uint32_t *inputData() const {
1445 return reinterpret_cast<const uint32_t *>(this + 1);
1446 }
1447 /// Get span view of input data (requires inputLength to be set first).
1448 std::span<uint32_t> inputDataSpan() { return {inputData(), inputLength}; }
1449 std::span<const uint32_t> inputDataSpan() const {
1450 return {inputData(), inputLength};
1451 }
1452
1453 static size_t allocSize(size_t numItems) {
1454 return sizeof(StreamingAddTranslatedArg) + numItems * sizeof(uint32_t);
1455 }
1456};
1457#pragma pack(pop)
1458
1459/// Translated result struct for StreamingAdder.
1460/// Memory layout:
1461/// struct { data: List<UInt(32)> }
1462/// becomes:
1463/// { data_length (size_t, 8 bytes on 64-bit), data[] }
1464#pragma pack(push, 1)
1467 // Trailing array data follows immediately after the struct in memory.
1468
1469 /// Get pointer to trailing result data array.
1470 uint32_t *data() { return reinterpret_cast<uint32_t *>(this + 1); }
1471 const uint32_t *data() const {
1472 return reinterpret_cast<const uint32_t *>(this + 1);
1473 }
1474 /// Get span view of result data (requires dataLength to be set first).
1475 std::span<uint32_t> dataSpan() { return {data(), dataLength}; }
1476 std::span<const uint32_t> dataSpan() const { return {data(), dataLength}; }
1477
1478 static size_t allocSize(size_t numItems) {
1479 return sizeof(StreamingAddTranslatedResult) + numItems * sizeof(uint32_t);
1480 }
1481};
1482#pragma pack(pop)
1483
1485 Accelerator *accel, uint32_t addAmt,
1486 uint32_t numItems) {
1487 Logger &logger = conn->getLogger();
1488 logger.info("esitester",
1489 "Starting streaming add test (translated) with add_amt=" +
1490 std::to_string(addAmt) +
1491 ", num_items=" + std::to_string(numItems));
1492
1493 // Generate random input data.
1494 std::mt19937 rng(0xDEADBEEF);
1495 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1496 std::vector<uint32_t> inputData;
1497 inputData.reserve(numItems);
1498 for (uint32_t i = 0; i < numItems; ++i)
1499 inputData.push_back(dist(rng));
1500
1501 // Find the streaming_adder child.
1502 auto streamingAdderChild =
1503 accel->getChildren().find(AppID("streaming_adder"));
1504 if (streamingAdderChild == accel->getChildren().end())
1505 throw std::runtime_error(
1506 "Streaming add test: no 'streaming_adder' child found");
1507
1508 auto &ports = streamingAdderChild->second->getPorts();
1509 auto addIter = ports.find(AppID("streaming_add"));
1510 if (addIter == ports.end())
1511 throw std::runtime_error(
1512 "Streaming add test: no 'streaming_add' port found");
1513
1514 // Get the raw read/write channel ports with translation enabled (default).
1515 WriteChannelPort &argPort = addIter->second.getRawWrite("arg");
1516 ReadChannelPort &resultPort = addIter->second.getRawRead("result");
1517
1518 // Connect with translation enabled (the default).
1519 argPort.connect();
1520 resultPort.connect();
1521
1522 // Allocate the argument struct with proper alignment for the struct members.
1523 // We use aligned_alloc to ensure the buffer meets alignment requirements.
1524 size_t argSize = StreamingAddTranslatedArg::allocSize(numItems);
1525 constexpr size_t alignment = alignof(StreamingAddTranslatedArg);
1526 // aligned_alloc requires size to be a multiple of alignment
1527 size_t allocSize = ((argSize + alignment - 1) / alignment) * alignment;
1528 void *argRaw = alignedAllocCompat(alignment, allocSize);
1529 if (!argRaw)
1530 throw std::bad_alloc();
1531 auto argDeleter = [](void *p) { alignedFreeCompat(p); };
1532 std::unique_ptr<void, decltype(argDeleter)> argBuffer(argRaw, argDeleter);
1533 auto *arg = static_cast<StreamingAddTranslatedArg *>(argRaw);
1534 arg->inputLength = numItems;
1535 arg->addAmt = addAmt;
1536 for (uint32_t i = 0; i < numItems; ++i)
1537 arg->inputData()[i] = inputData[i];
1538
1539 logger.debug("esitester",
1540 "Sending translated argument: " + std::to_string(argSize) +
1541 " bytes, list_length=" + std::to_string(arg->inputLength) +
1542 ", add_amt=" + std::to_string(arg->addAmt));
1543
1544 // Send the complete message - translation will split it into frames.
1545 argPort.write(MessageData(reinterpret_cast<const uint8_t *>(arg), argSize));
1546 // argBuffer automatically freed when it goes out of scope
1547
1548 // Read the translated result.
1549 MessageData resMsg;
1550 resultPort.read(resMsg);
1551
1552 logger.debug("esitester", "Received translated result: " +
1553 std::to_string(resMsg.getSize()) + " bytes");
1554
1555 if (resMsg.getSize() < sizeof(StreamingAddTranslatedResult))
1556 throw std::runtime_error(
1557 "Streaming add test (translated): result too small");
1558
1559 const auto *result =
1560 reinterpret_cast<const StreamingAddTranslatedResult *>(resMsg.getBytes());
1561
1562 if (resMsg.getSize() <
1563 StreamingAddTranslatedResult::allocSize(result->dataLength))
1564 throw std::runtime_error(
1565 "Streaming add test (translated): result data truncated");
1566
1567 // Verify results.
1568 if (result->dataLength != inputData.size())
1569 throw std::runtime_error(
1570 "Streaming add test (translated): result size mismatch. Expected " +
1571 std::to_string(inputData.size()) + ", got " +
1572 std::to_string(result->dataLength));
1573
1574 bool passed = true;
1575 std::cout << "Streaming add test results:" << std::endl;
1576 for (size_t i = 0; i < inputData.size(); ++i) {
1577 uint32_t expected = inputData[i] + addAmt;
1578 std::cout << " input[" << i << "]=" << inputData[i] << " + " << addAmt
1579 << " = " << result->data()[i] << " (expected " << expected << ")";
1580 if (result->data()[i] != expected) {
1581 std::cout << " MISMATCH!";
1582 passed = false;
1583 }
1584 std::cout << std::endl;
1585 }
1586
1587 argPort.disconnect();
1588 resultPort.disconnect();
1589
1590 if (!passed)
1591 throw std::runtime_error(
1592 "Streaming add test (translated) failed: result mismatch");
1593
1594 logger.info("esitester", "Streaming add test passed (translated)");
1595 std::cout << "Streaming add test passed" << std::endl;
1596}
1597
1598/// Test the CoordTranslator module using message translation.
1599/// This version uses the list translation support where the message format is:
1600/// Argument: { x_translation, y_translation, coords_length, coords[] }
1601/// Result: { coords_length, coords[] }
1602/// Each coord is a struct { x, y }.
1603
1604/// Coordinate struct for CoordTranslator.
1605/// SV ordering means y comes before x in memory.
1606#pragma pack(push, 1)
1607struct Coord {
1608 uint32_t y; // SV ordering: last declared field first in memory
1609 uint32_t x;
1610};
1611#pragma pack(pop)
1612static_assert(sizeof(Coord) == 8, "Coord must be 8 bytes packed");
1613
1614/// Translated argument struct for CoordTranslator.
1615/// Memory layout (standard C struct ordering):
1616/// ESI type: struct { x_translation: UInt(32), y_translation: UInt(32),
1617/// coords: List<struct{x, y}> }
1618/// becomes host struct:
1619/// { coords_length (size_t, 8 bytes on 64-bit), y_translation (uint32_t),
1620/// x_translation (uint32_t), coords[] }
1621/// Note: Fields are in reverse order due to SV struct ordering.
1622/// Note: size_t is used for list lengths, so this format is platform-dependent.
1623#pragma pack(push, 1)
1626 uint32_t yTranslation; // SV ordering: last declared field first in memory
1628 // Trailing array data follows immediately after the struct in memory.
1629
1630 /// Get pointer to trailing coords array.
1631 Coord *coords() { return reinterpret_cast<Coord *>(this + 1); }
1632 const Coord *coords() const {
1633 return reinterpret_cast<const Coord *>(this + 1);
1634 }
1635 /// Get span view of coords (requires coordsLength to be set first).
1636 std::span<Coord> coordsSpan() { return {coords(), coordsLength}; }
1637 std::span<const Coord> coordsSpan() const { return {coords(), coordsLength}; }
1638
1639 static size_t allocSize(size_t numCoords) {
1640 return sizeof(CoordTranslateArg) + numCoords * sizeof(Coord);
1641 }
1642};
1643#pragma pack(pop)
1644
1645/// Translated result struct for CoordTranslator.
1646/// Memory layout:
1647/// ESI type: List<struct{x, y}>
1648/// becomes host struct:
1649/// { coords_length (size_t, 8 bytes on 64-bit), coords[] }
1650#pragma pack(push, 1)
1653 // Trailing array data follows immediately after the struct in memory.
1654
1655 /// Get pointer to trailing coords array.
1656 Coord *coords() { return reinterpret_cast<Coord *>(this + 1); }
1657 const Coord *coords() const {
1658 return reinterpret_cast<const Coord *>(this + 1);
1659 }
1660 /// Get span view of coords (requires coordsLength to be set first).
1661 std::span<Coord> coordsSpan() { return {coords(), coordsLength}; }
1662 std::span<const Coord> coordsSpan() const { return {coords(), coordsLength}; }
1663
1664 static size_t allocSize(size_t numCoords) {
1665 return sizeof(CoordTranslateResult) + numCoords * sizeof(Coord);
1666 }
1667};
1668#pragma pack(pop)
1669
1671 uint32_t xTrans, uint32_t yTrans,
1672 uint32_t numCoords) {
1673 Logger &logger = conn->getLogger();
1674 logger.info("esitester", "Starting coord translate test with x_trans=" +
1675 std::to_string(xTrans) +
1676 ", y_trans=" + std::to_string(yTrans) +
1677 ", num_coords=" + std::to_string(numCoords));
1678
1679 // Generate random input coordinates.
1680 // Note: Coord struct has y before x due to SV ordering, but we generate
1681 // and display as (x, y) for human readability.
1682 std::mt19937 rng(0xDEADBEEF);
1683 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1684 std::vector<Coord> inputCoords;
1685 inputCoords.reserve(numCoords);
1686 for (uint32_t i = 0; i < numCoords; ++i) {
1687 Coord c;
1688 c.x = dist(rng);
1689 c.y = dist(rng);
1690 inputCoords.push_back(c);
1691 }
1692
1693 // Find the coord_translator child.
1694 auto coordTranslatorChild =
1695 accel->getChildren().find(AppID("coord_translator"));
1696 if (coordTranslatorChild == accel->getChildren().end())
1697 throw std::runtime_error(
1698 "Coord translate test: no 'coord_translator' child found");
1699
1700 auto &ports = coordTranslatorChild->second->getPorts();
1701 auto translateIter = ports.find(AppID("translate_coords"));
1702 if (translateIter == ports.end())
1703 throw std::runtime_error(
1704 "Coord translate test: no 'translate_coords' port found");
1705
1706 // Use FuncService::Function which handles connection and translation.
1707 auto *funcPort =
1708 translateIter->second.getAs<services::FuncService::Function>();
1709 if (!funcPort)
1710 throw std::runtime_error(
1711 "Coord translate test: 'translate_coords' port not a "
1712 "FuncService::Function");
1713 funcPort->connect();
1714
1715 // Allocate the argument struct with proper alignment for the struct members.
1716 size_t argSize = CoordTranslateArg::allocSize(numCoords);
1717 constexpr size_t alignment = alignof(CoordTranslateArg);
1718 // aligned_alloc requires size to be a multiple of alignment
1719 size_t allocSize = ((argSize + alignment - 1) / alignment) * alignment;
1720 void *argRaw = alignedAllocCompat(alignment, allocSize);
1721 if (!argRaw)
1722 throw std::bad_alloc();
1723 auto argDeleter = [](void *p) { alignedFreeCompat(p); };
1724 std::unique_ptr<void, decltype(argDeleter)> argBuffer(argRaw, argDeleter);
1725 auto *arg = static_cast<CoordTranslateArg *>(argRaw);
1726 arg->coordsLength = numCoords;
1727 arg->xTranslation = xTrans;
1728 arg->yTranslation = yTrans;
1729 for (uint32_t i = 0; i < numCoords; ++i)
1730 arg->coords()[i] = inputCoords[i];
1731
1732 logger.debug(
1733 "esitester",
1734 "Sending coord translate argument: " + std::to_string(argSize) +
1735 " bytes, coords_length=" + std::to_string(arg->coordsLength) +
1736 ", x_trans=" + std::to_string(arg->xTranslation) +
1737 ", y_trans=" + std::to_string(arg->yTranslation));
1738
1739 // Call the function - translation happens automatically.
1740 MessageData resMsg =
1741 funcPort
1742 ->call(MessageData(reinterpret_cast<const uint8_t *>(arg), argSize))
1743 .get();
1744 // argBuffer automatically freed when it goes out of scope
1745
1746 logger.debug("esitester", "Received coord translate result: " +
1747 std::to_string(resMsg.getSize()) + " bytes");
1748
1749 if (resMsg.getSize() < sizeof(CoordTranslateResult))
1750 throw std::runtime_error("Coord translate test: result too small");
1751
1752 const auto *result =
1753 reinterpret_cast<const CoordTranslateResult *>(resMsg.getBytes());
1754
1755 if (resMsg.getSize() < CoordTranslateResult::allocSize(result->coordsLength))
1756 throw std::runtime_error("Coord translate test: result data truncated");
1757
1758 // Verify results.
1759 if (result->coordsLength != inputCoords.size())
1760 throw std::runtime_error(
1761 "Coord translate test: result size mismatch. Expected " +
1762 std::to_string(inputCoords.size()) + ", got " +
1763 std::to_string(result->coordsLength));
1764
1765 bool passed = true;
1766 std::cout << "Coord translate test results:" << std::endl;
1767 for (size_t i = 0; i < inputCoords.size(); ++i) {
1768 uint32_t expectedX = inputCoords[i].x + xTrans;
1769 uint32_t expectedY = inputCoords[i].y + yTrans;
1770 std::cout << " coord[" << i << "]=(" << inputCoords[i].x << ","
1771 << inputCoords[i].y << ") + (" << xTrans << "," << yTrans
1772 << ") = (" << result->coords()[i].x << ","
1773 << result->coords()[i].y << ")";
1774 if (result->coords()[i].x != expectedX ||
1775 result->coords()[i].y != expectedY) {
1776 std::cout << " MISMATCH! (expected (" << expectedX << "," << expectedY
1777 << "))";
1778 passed = false;
1779 }
1780 std::cout << std::endl;
1781 }
1782
1783 if (!passed)
1784 throw std::runtime_error("Coord translate test failed: result mismatch");
1785
1786 logger.info("esitester", "Coord translate test passed");
1787 std::cout << "Coord translate test passed" << std::endl;
1788}
1789
1790//
1791// SerialCoordTranslator test
1792//
1793
1794#pragma pack(push, 1)
1796 uint16_t coordsCount;
1799};
1801 uint16_t _pad_head;
1802 uint32_t y;
1803 uint32_t x;
1804};
1809#pragma pack(pop)
1810static_assert(sizeof(SerialCoordInputFrame) == 10, "Size mismatch");
1811
1812#pragma pack(push, 1)
1814 uint8_t _pad[6];
1815 uint16_t coordsCount;
1816};
1818 uint32_t y;
1819 uint32_t x;
1820};
1825#pragma pack(pop)
1826static_assert(sizeof(SerialCoordOutputFrame) == 8, "Size mismatch");
1827
1829 Accelerator *accel, uint32_t xTrans,
1830 uint32_t yTrans, uint32_t numCoords,
1831 size_t batchSizeLimit) {
1832 Logger &logger = conn->getLogger();
1833 logger.info("esitester", "Starting serial coord translate test");
1834
1835 // Generate random coordinates.
1836 std::mt19937 rng(0xDEADBEEF);
1837 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1838 std::vector<Coord> inputCoords;
1839 inputCoords.reserve(numCoords);
1840 for (uint32_t i = 0; i < numCoords; ++i) {
1841 inputCoords.push_back({dist(rng), dist(rng)});
1842 }
1843
1844 auto child = accel->getChildren().find(AppID("coord_translator_serial"));
1845 if (child == accel->getChildren().end())
1846 throw std::runtime_error("Serial coord translate test: no "
1847 "'coord_translator_serial' child found");
1848
1849 auto &ports = child->second->getPorts();
1850 auto portIter = ports.find(AppID("translate_coords_serial"));
1851 if (portIter == ports.end())
1852 throw std::runtime_error(
1853 "Serial coord translate test: no 'translate_coords_serial' port found");
1854
1855 WriteChannelPort &argPort = portIter->second.getRawWrite("arg");
1856 ReadChannelPort &resultPort = portIter->second.getRawRead("result");
1857
1858 argPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
1859 resultPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
1860
1861 size_t sent = 0;
1862 while (sent < numCoords) {
1863 size_t batchSize = std::min(batchSizeLimit, numCoords - sent);
1864
1865 // Send Header. Only the first header needs the translation values, test the
1866 // subsequent ones with zero translation to verify that the hardware
1867 // correctly applies the first header's translation to the whole list.
1868 SerialCoordInputFrame headerFrame;
1869 headerFrame.header.coordsCount = (uint16_t)batchSize;
1870 headerFrame.header.xTranslation = sent == 0 ? xTrans : 0;
1871 headerFrame.header.yTranslation = sent == 0 ? yTrans : 0;
1872 argPort.write(MessageData(reinterpret_cast<const uint8_t *>(&headerFrame),
1873 sizeof(headerFrame)));
1874
1875 // Send Data
1876 for (size_t i = 0; i < batchSize; ++i) {
1877 SerialCoordInputFrame dataFrame;
1878 dataFrame.data._pad_head = 0;
1879 dataFrame.data.x = inputCoords[sent + i].x;
1880 dataFrame.data.y = inputCoords[sent + i].y;
1881 argPort.write(MessageData(reinterpret_cast<const uint8_t *>(&dataFrame),
1882 sizeof(dataFrame)));
1883 }
1884 sent += batchSize;
1885 }
1886 // Send final header with count=0 to signal end of input
1887 SerialCoordHeader footerData{0, 0, 0};
1888 auto footer = MessageData::from(footerData);
1889 argPort.write(footer);
1890
1891 // Read results. The hardware echoes headers (with count) followed by
1892 // translated data frames, then autonomously sends a footer header with
1893 // count=0 to signal end of list.
1894 std::vector<Coord> results;
1895 while (true) {
1896 // Read Header
1897 MessageData msg;
1898 resultPort.read(msg);
1899 if (msg.getSize() != sizeof(SerialCoordOutputFrame))
1900 throw std::runtime_error("Unexpected result message size");
1901
1902 const auto *frame =
1903 reinterpret_cast<const SerialCoordOutputFrame *>(msg.getBytes());
1904 uint16_t batchCount = frame->header.coordsCount;
1905 if (batchCount == 0)
1906 break;
1907
1908 // Read Data
1909 for (uint16_t i = 0; i < batchCount; ++i) {
1910 resultPort.read(msg);
1911 if (msg.getSize() != sizeof(SerialCoordOutputFrame))
1912 throw std::runtime_error("Unexpected result message size");
1913 const auto *dFrame =
1914 reinterpret_cast<const SerialCoordOutputFrame *>(msg.getBytes());
1915 results.push_back({dFrame->data.y, dFrame->data.x});
1916 }
1917 }
1918
1919 // Verify
1920 bool passed = true;
1921 std::cout << "Serial coord translate test results:" << std::endl;
1922 if (results.size() != inputCoords.size()) {
1923 std::cout << "Result size mismatch. Expected " << inputCoords.size()
1924 << ", got " << results.size() << std::endl;
1925 passed = false;
1926 }
1927 for (size_t i = 0; i < std::min(inputCoords.size(), results.size()); ++i) {
1928 uint32_t expX = inputCoords[i].x + xTrans;
1929 uint32_t expY = inputCoords[i].y + yTrans;
1930 std::cout << " coord[" << i << "]=(" << inputCoords[i].x << ","
1931 << inputCoords[i].y << ") + (" << xTrans << "," << yTrans
1932 << ") = (" << results[i].x << "," << results[i].y
1933 << ") (expected (" << expX << "," << expY << "))";
1934 if (results[i].x != expX || results[i].y != expY) {
1935 std::cout << " MISMATCH!";
1936 passed = false;
1937 }
1938 std::cout << std::endl;
1939 }
1940
1941 argPort.disconnect();
1942 resultPort.disconnect();
1943
1944 if (!passed)
1945 throw std::runtime_error("Serial coord translate test failed");
1946
1947 logger.info("esitester", "Serial coord translate test passed");
1948 std::cout << "Serial coord translate test passed" << std::endl;
1949}
1950
1952 uint32_t iterations) {
1953 Logger &logger = conn->getLogger();
1954
1955 auto channelChild = accel->getChildren().find(AppID("channel_test"));
1956 if (channelChild == accel->getChildren().end())
1957 throw std::runtime_error("Channel test: no 'channel_test' child");
1958 auto &ports = channelChild->second->getPorts();
1959
1960 // --- Get the MMIO port to trigger the producer ---
1961 auto cmdIter = ports.find(AppID("cmd"));
1962 if (cmdIter == ports.end())
1963 throw std::runtime_error("Channel test: no 'cmd' port");
1964 auto *cmdMMIO = cmdIter->second.getAs<services::MMIO::MMIORegion>();
1965 if (!cmdMMIO)
1966 throw std::runtime_error("Channel test: 'cmd' is not MMIO");
1967
1968 // --- Get the producer to_host port ---
1969 auto producerIter = ports.find(AppID("producer"));
1970 if (producerIter == ports.end())
1971 throw std::runtime_error("Channel test: no 'producer' port");
1972 auto *producerPort =
1973 producerIter->second.getAs<services::ChannelService::ToHost>();
1974 if (!producerPort)
1975 throw std::runtime_error(
1976 "Channel test: 'producer' is not a ChannelService::ToHost");
1977 producerPort->connect();
1978
1979 // --- Test to_host: MMIO-triggered incrementing values ---
1980 // Write the number of values to send at offset 0x0.
1981 cmdMMIO->write(0x0, iterations);
1982
1983 for (uint32_t i = 0; i < iterations; ++i) {
1984 MessageData recvData = producerPort->read().get();
1985 uint32_t got = *recvData.as<uint32_t>();
1986 std::cout << "[channel] producer i=" << i << " got=" << got << std::endl;
1987 if (got != i)
1988 throw std::runtime_error("Channel producer: expected " +
1989 std::to_string(i) + ", got " +
1990 std::to_string(got));
1991 }
1992 logger.info("esitester", "Channel test: producer passed (" +
1993 std::to_string(iterations) +
1994 " incrementing values)");
1995
1996 // --- Test from_host -> to_host loopback ---
1997 auto loopbackInIter = ports.find(AppID("loopback_in"));
1998 if (loopbackInIter == ports.end())
1999 throw std::runtime_error("Channel test: no 'loopback_in' port");
2000 auto *fromHostPort =
2001 loopbackInIter->second.getAs<services::ChannelService::FromHost>();
2002 if (!fromHostPort)
2003 throw std::runtime_error(
2004 "Channel test: 'loopback_in' is not a ChannelService::FromHost");
2005 fromHostPort->connect();
2006
2007 auto loopbackOutIter = ports.find(AppID("loopback_out"));
2008 if (loopbackOutIter == ports.end())
2009 throw std::runtime_error("Channel test: no 'loopback_out' port");
2010 auto *loopbackOutPort =
2011 loopbackOutIter->second.getAs<services::ChannelService::ToHost>();
2012 if (!loopbackOutPort)
2013 throw std::runtime_error(
2014 "Channel test: 'loopback_out' is not a ChannelService::ToHost");
2015 loopbackOutPort->connect();
2016
2017 std::mt19937_64 rng(0xDEADBEEF);
2018 std::uniform_int_distribution<uint32_t> dist(0, UINT32_MAX);
2019
2020 for (uint32_t i = 0; i < iterations; ++i) {
2021 uint32_t sendVal = dist(rng);
2022 fromHostPort->write(MessageData::from(sendVal));
2023 MessageData recvData = loopbackOutPort->read().get();
2024 uint32_t recvVal = *recvData.as<uint32_t>();
2025 std::cout << "[channel] loopback i=" << i << " sent=0x"
2026 << esi::toHex(sendVal) << " recv=0x" << esi::toHex(recvVal)
2027 << std::endl;
2028 if (recvVal != sendVal)
2029 throw std::runtime_error("Channel loopback mismatch at i=" +
2030 std::to_string(i));
2031 }
2032
2033 logger.info("esitester", "Channel test: loopback passed (" +
2034 std::to_string(iterations) + " iterations)");
2035 std::cout << "Channel test passed" << std::endl;
2036}
static void print(TypedAttr val, llvm::raw_ostream &os)
static void writePort(uint16_t port)
Write the port number to a file.
Definition RpcServer.cpp:39
Abstract class representing a connection to an accelerator.
Definition Accelerator.h:89
Top level accelerator class.
Definition Accelerator.h:70
Services provide connections to 'bundles' – collections of named, unidirectional communication channe...
Definition Ports.h:433
T * getAs() const
Cast this Bundle port to a subclass which is actually useful.
Definition Ports.h:461
ReadChannelPort & getRawRead(const std::string &name) const
Definition Ports.cpp:52
WriteChannelPort & getRawWrite(const std::string &name) const
Get access to the raw byte streams of a channel.
Definition Ports.cpp:42
Common options and code for ESI runtime tools.
Definition CLI.h:29
Context & getContext()
Get the context.
Definition CLI.h:63
AcceleratorConnection * connect()
Connect to the accelerator using the specified backend and connection.
Definition CLI.h:60
int esiParse(int argc, const char **argv)
Run the parser.
Definition CLI.h:46
AcceleratorConnections, Accelerators, and Manifests must all share a context.
Definition Context.h:34
Logger & getLogger()
Definition Context.h:69
const std::map< AppID, Instance * > & getChildren() const
Access the module's children by ID.
Definition Design.h:71
virtual void error(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an error.
Definition Logging.h:64
virtual void info(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an informational message.
Definition Logging.h:75
void debug(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report a debug message.
Definition Logging.h:83
Class to parse a manifest.
Definition Manifest.h:39
Accelerator * buildAccelerator(AcceleratorConnection &acc) const
A logical chunk of data representing serialized data.
Definition Common.h:113
const uint8_t * getBytes() const
Definition Common.h:124
const T * as() const
Cast to a type.
Definition Common.h:148
size_t getSize() const
Get the size of the data in bytes.
Definition Common.h:138
static MessageData from(T &t)
Cast from a type to its raw bytes.
Definition Common.h:158
A ChannelPort which reads data from the accelerator.
Definition Ports.h:318
virtual void connect(std::function< bool(MessageData)> callback, const ConnectOptions &options={})
Definition Ports.cpp:69
virtual void disconnect() override
Definition Ports.h:323
virtual void read(MessageData &outData)
Specify a buffer to read into.
Definition Ports.h:358
A ChannelPort which sends data to the accelerator.
Definition Ports.h:206
virtual void disconnect() override
Definition Ports.h:217
void write(const MessageData &data)
A very basic blocking write API.
Definition Ports.h:222
virtual void connect(const ConnectOptions &options={}) override
Set up a connection to the accelerator.
Definition Ports.h:210
A function call which gets attached to a service port.
Definition Services.h:405
A port which writes data to the accelerator (from_host).
Definition Services.h:315
A port which reads data from the accelerator (to_host).
Definition Services.h:291
A function call which gets attached to a service port.
Definition Services.h:353
virtual void start()
In cases where necessary, enable host memory services.
Definition Services.h:261
A "slice" of some parent MMIO space.
Definition Services.h:181
Information about the Accelerator system.
Definition Services.h:113
A telemetry port which gets attached to a service port.
Definition Services.h:469
void connect()
Connect to a particular telemetry port. Offset should be non-nullopt.
Definition Services.cpp:457
static void * alignedAllocCompat(std::size_t alignment, std::size_t size)
static void hostmemWriteTest(Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width)
Test the hostmem write functionality.
static void aggregateHostmemBandwidthTest(AcceleratorConnection *, Accelerator *, uint32_t width, uint32_t xferCount, bool read, bool write)
static void dmaTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool read, bool write)
static void hostmemBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, uint32_t xferCount, const std::vector< uint32_t > &widths, bool read, bool write)
static void callbackTest(AcceleratorConnection *, Accelerator *, uint32_t iterations)
static void bandwidthTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, uint32_t xferCount, bool read, bool write)
static void serialCoordTranslateTest(AcceleratorConnection *, Accelerator *, uint32_t xTrans, uint32_t yTrans, uint32_t numCoords, size_t batchSizeLimit)
constexpr std::array< uint32_t, 5 > defaultWidths
Definition esitester.cpp:77
static void hostmemReadBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width, uint32_t xferCount)
static void bandwidthReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static void channelTest(AcceleratorConnection *, Accelerator *, uint32_t iterations)
static std::string formatBandwidth(double bytesPerSec)
Definition esitester.cpp:89
static void hostmemWriteBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width, uint32_t xferCount)
static void alignedFreeCompat(void *ptr)
static void dmaWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void bandwidthWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static std::string humanBytes(uint64_t bytes)
static void streamingAddTest(AcceleratorConnection *, Accelerator *, uint32_t addAmt, uint32_t numItems)
Test the StreamingAdder module.
static void loopbackAddTest(AcceleratorConnection *, Accelerator *, uint32_t iterations, bool pipeline)
static void dmaReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void streamingAddTranslatedTest(AcceleratorConnection *, Accelerator *, uint32_t addAmt, uint32_t numItems)
static void hostmemTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool write, bool read)
static std::string humanTimeUS(uint64_t us)
int main(int argc, const char *argv[])
static void coordTranslateTest(AcceleratorConnection *, Accelerator *, uint32_t xTrans, uint32_t yTrans, uint32_t numCoords)
static std::string defaultWidthsStr()
Definition esitester.cpp:78
static void hostmemReadTest(Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width)
Definition debug.py:1
Definition esi.py:1
std::string toString(const std::any &a)
'Stringify' a std::any. This is used to log std::any values by some loggers.
Definition Logging.cpp:132
std::string toHex(void *val)
Definition Common.cpp:37
Translated argument struct for CoordTranslator.
std::span< const Coord > coordsSpan() const
const Coord * coords() const
static size_t allocSize(size_t numCoords)
Coord * coords()
Get pointer to trailing coords array.
std::span< Coord > coordsSpan()
Get span view of coords (requires coordsLength to be set first).
Translated result struct for CoordTranslator.
static size_t allocSize(size_t numCoords)
std::span< Coord > coordsSpan()
Get span view of coords (requires coordsLength to be set first).
const Coord * coords() const
Coord * coords()
Get pointer to trailing coords array.
std::span< const Coord > coordsSpan() const
Test the CoordTranslator module using message translation.
uint32_t x
uint32_t y
Packed struct representing a parallel window argument for StreamingAdder.
Packed struct representing a parallel window result for StreamingAdder.
Test the StreamingAdder module using message translation.
uint32_t * inputData()
Get pointer to trailing input data array.
static size_t allocSize(size_t numItems)
std::span< uint32_t > inputDataSpan()
Get span view of input data (requires inputLength to be set first).
std::span< const uint32_t > inputDataSpan() const
const uint32_t * inputData() const
Translated result struct for StreamingAdder.
uint32_t * data()
Get pointer to trailing result data array.
std::span< uint32_t > dataSpan()
Get span view of result data (requires dataLength to be set first).
static size_t allocSize(size_t numItems)
std::span< const uint32_t > dataSpan() const
const uint32_t * data() const
RAII memory region for host memory.
Definition Services.h:237
virtual void * getDevicePtr() const
Sometimes the pointer the device sees is different from the pointer the host sees.
Definition Services.h:243
virtual void * getPtr() const =0
Get a pointer to the host memory.
virtual void flush()
Flush the memory region to ensure that the device sees the latest contents.
Definition Services.h:251
virtual std::size_t getSize() const =0
SerialCoordHeader header
SerialCoordData data
SerialCoordOutputData data
SerialCoordOutputHeader header