CIRCT 23.0.0git
Loading...
Searching...
No Matches
esitester.cpp
Go to the documentation of this file.
1//===- esitester.cpp - ESI accelerator test/example tool ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// DO NOT EDIT!
10// This file is distributed as part of an ESI runtime package. The source for
11// this file should always be modified within CIRCT
12// (lib/dialect/ESI/runtime/cpp/tools/esitester.cpp).
13//
14//===----------------------------------------------------------------------===//
15//
16// This application isn't a utility so much as a test driver for an ESI system.
17// It is also useful as an example of how to use the ESI C++ API. esiquery.cpp
18// is also useful as an example.
19//
20//===----------------------------------------------------------------------===//
21
22#include "esi/Accelerator.h"
23#include "esi/CLI.h"
24#include "esi/Manifest.h"
25#include "esi/Services.h"
26#include "esi/TypedPorts.h"
27
28#include <algorithm>
29#include <atomic>
30#include <chrono>
31#include <cstdlib>
32#include <cstring>
33#include <future>
34#include <iostream>
35#include <map>
36#include <memory>
37#include <random>
38#include <span>
39#include <sstream>
40#include <stdexcept>
41#include <vector>
42
43using namespace esi;
44
45// Forward declarations of test functions.
47 uint32_t iterations);
49 const std::vector<uint32_t> &widths, bool write,
50 bool read);
52 uint32_t xferCount,
53 const std::vector<uint32_t> &widths, bool read,
54 bool write);
56 const std::vector<uint32_t> &widths, bool read, bool write);
58 const std::vector<uint32_t> &widths,
59 uint32_t xferCount, bool read, bool write);
61 uint32_t iterations, bool pipeline);
63 Accelerator *, uint32_t width,
64 uint32_t xferCount, bool read,
65 bool write);
67 uint32_t addAmt, uint32_t numItems);
69 uint32_t addAmt, uint32_t numItems);
71 uint32_t xTrans, uint32_t yTrans,
72 uint32_t numCoords);
74 uint32_t xTrans, uint32_t yTrans,
75 uint32_t numCoords, size_t batchSizeLimit);
77 uint32_t xTrans, uint32_t yTrans,
78 uint32_t numCoords);
80 uint32_t iterations);
81
82// Default widths and default widths string for CLI help text.
83constexpr std::array<uint32_t, 5> defaultWidths = {32, 64, 128, 256, 512};
84static std::string defaultWidthsStr() {
85 std::string s;
86 for (size_t i = 0; i < defaultWidths.size(); ++i) {
87 s += std::to_string(defaultWidths[i]);
88 if (i + 1 < defaultWidths.size())
89 s += ",";
90 }
91 return s;
92}
93
94// Helper to format bandwidth with appropriate units.
95static std::string formatBandwidth(double bytesPerSec) {
96 const char *unit = "B/s";
97 double value = bytesPerSec;
98 if (bytesPerSec >= 1e9) {
99 unit = "GB/s";
100 value = bytesPerSec / 1e9;
101 } else if (bytesPerSec >= 1e6) {
102 unit = "MB/s";
103 value = bytesPerSec / 1e6;
104 } else if (bytesPerSec >= 1e3) {
105 unit = "KB/s";
106 value = bytesPerSec / 1e3;
107 }
108 std::ostringstream oss;
109 oss.setf(std::ios::fixed);
110 oss.precision(2);
111 oss << value << " " << unit;
112 return oss.str();
113}
114
115// Human-readable size from bytes.
116static std::string humanBytes(uint64_t bytes) {
117 const char *units[] = {"B", "KB", "MB", "GB", "TB"};
118 double v = (double)bytes;
119 int u = 0;
120 while (v >= 1024.0 && u < 4) {
121 v /= 1024.0;
122 ++u;
123 }
124 std::ostringstream oss;
125 oss.setf(std::ios::fixed);
126 oss.precision(u == 0 ? 0 : 2);
127 oss << v << " " << units[u];
128 return oss.str();
129}
130
131// Human-readable time from microseconds.
132static std::string humanTimeUS(uint64_t us) {
133 if (us < 1000)
134 return std::to_string(us) + " us";
135 double ms = us / 1000.0;
136 if (ms < 1000.0) {
137 std::ostringstream oss;
138 oss.setf(std::ios::fixed);
139 oss.precision(ms < 10.0 ? 2 : (ms < 100.0 ? 1 : 0));
140 oss << ms << " ms";
141 return oss.str();
142 }
143 double sec = ms / 1000.0;
144 std::ostringstream oss;
145 oss.setf(std::ios::fixed);
146 oss.precision(sec < 10.0 ? 3 : 2);
147 oss << sec << " s";
148 return oss.str();
149}
150
151// MSVC does not implement std::aligned_malloc, even though it's part of the
152// C++17 standard. Provide a compatibility layer.
153static void *alignedAllocCompat(std::size_t alignment, std::size_t size) {
154#if defined(_MSC_VER)
155 void *ptr = _aligned_malloc(size, alignment);
156 if (!ptr)
157 throw std::bad_alloc();
158 return ptr;
159#else
160 void *ptr = std::aligned_alloc(alignment, size);
161 if (!ptr)
162 throw std::bad_alloc();
163 return ptr;
164#endif
165}
166
167static void alignedFreeCompat(void *ptr) {
168#if defined(_MSC_VER)
169 _aligned_free(ptr);
170#else
171 std::free(ptr);
172#endif
173}
174
175int main(int argc, const char *argv[]) {
176 CliParser cli("esitester");
177 cli.description("Test an ESI system running the ESI tester image.");
178 cli.require_subcommand(1);
179
180 CLI::App *callback_test =
181 cli.add_subcommand("callback", "initiate callback test");
182 uint32_t cb_iters = 1;
183 callback_test->add_option("-i,--iters", cb_iters,
184 "Number of iterations to run");
185
186 CLI::App *hostmemtestSub =
187 cli.add_subcommand("hostmem", "Run the host memory test");
188 bool hmRead = false;
189 bool hmWrite = false;
190 std::vector<uint32_t> hostmemWidths(defaultWidths.begin(),
191 defaultWidths.end());
192 hostmemtestSub->add_flag("-w,--write", hmWrite,
193 "Enable host memory write test");
194 hostmemtestSub->add_flag("-r,--read", hmRead, "Enable host memory read test");
195 hostmemtestSub->add_option(
196 "--widths", hostmemWidths,
197 "Hostmem test widths (default: " + defaultWidthsStr() + ")");
198
199 CLI::App *dmatestSub = cli.add_subcommand("dma", "Run the DMA test");
200 bool dmaRead = false;
201 bool dmaWrite = false;
202 std::vector<uint32_t> dmaWidths(defaultWidths.begin(), defaultWidths.end());
203 dmatestSub->add_flag("-w,--write", dmaWrite, "Enable dma write test");
204 dmatestSub->add_flag("-r,--read", dmaRead, "Enable dma read test");
205 dmatestSub->add_option("--widths", dmaWidths,
206 "DMA test widths (default: " + defaultWidthsStr() +
207 ")");
208
209 CLI::App *bandwidthSub =
210 cli.add_subcommand("bandwidth", "Run the bandwidth test");
211 uint32_t xferCount = 1000;
212 bandwidthSub->add_option("-c,--count", xferCount,
213 "Number of transfers to perform");
214 bool bandwidthRead = false;
215 bool bandwidthWrite = false;
216 std::vector<uint32_t> bandwidthWidths(defaultWidths.begin(),
217 defaultWidths.end());
218 bandwidthSub->add_option("--widths", bandwidthWidths,
219 "Width of the transfers to perform (default: " +
220 defaultWidthsStr() + ")");
221 bandwidthSub->add_flag("-w,--write", bandwidthWrite,
222 "Enable bandwidth write");
223 bandwidthSub->add_flag("-r,--read", bandwidthRead, "Enable bandwidth read");
224
225 CLI::App *hostmembwSub =
226 cli.add_subcommand("hostmembw", "Run the host memory bandwidth test");
227 uint32_t hmBwCount = 1000;
228 bool hmBwRead = false;
229 bool hmBwWrite = false;
230 std::vector<uint32_t> hmBwWidths(defaultWidths.begin(), defaultWidths.end());
231 hostmembwSub->add_option("-c,--count", hmBwCount,
232 "Number of hostmem transfers");
233 hostmembwSub->add_option(
234 "--widths", hmBwWidths,
235 "Hostmem bandwidth widths (default: " + defaultWidthsStr() + ")");
236 hostmembwSub->add_flag("-w,--write", hmBwWrite,
237 "Measure hostmem write bandwidth");
238 hostmembwSub->add_flag("-r,--read", hmBwRead,
239 "Measure hostmem read bandwidth");
240
241 CLI::App *loopbackSub =
242 cli.add_subcommand("loopback", "Test LoopbackInOutAdd function service");
243 uint32_t loopbackIters = 10;
244 bool loopbackPipeline = false;
245 loopbackSub->add_option("-i,--iters", loopbackIters,
246 "Number of function invocations (default 10)");
247 loopbackSub->add_flag("-p,--pipeline", loopbackPipeline,
248 "Pipeline all calls then collect results");
249
250 CLI::App *aggBwSub = cli.add_subcommand(
251 "aggbandwidth",
252 "Aggregate hostmem bandwidth across four units (readmem*, writemem*)");
253 uint32_t aggWidth = 512;
254 uint32_t aggCount = 1000;
255 bool aggRead = false;
256 bool aggWrite = false;
257 aggBwSub->add_option(
258 "--width", aggWidth,
259 "Bit width (default 512; other widths ignored if absent)");
260 aggBwSub->add_option("-c,--count", aggCount, "Flits per unit (default 1000)");
261 aggBwSub->add_flag("-r,--read", aggRead, "Include read units");
262 aggBwSub->add_flag("-w,--write", aggWrite, "Include write units");
263
264 CLI::App *streamingAddSub = cli.add_subcommand(
265 "streaming_add", "Test StreamingAdder function service with list input");
266 uint32_t streamingAddAmt = 5;
267 uint32_t streamingNumItems = 5;
268 bool streamingTranslate = false;
269 streamingAddSub->add_option("-a,--add", streamingAddAmt,
270 "Amount to add to each element (default 5)");
271 streamingAddSub->add_option("-n,--num-items", streamingNumItems,
272 "Number of random items in the list (default 5)");
273 streamingAddSub->add_flag("-t,--translate", streamingTranslate,
274 "Use message translation (list translation)");
275
276 CLI::App *coordTranslateSub = cli.add_subcommand(
277 "translate_coords",
278 "Test CoordTranslator function service with list of coordinates");
279 uint32_t coordXTrans = 10;
280 uint32_t coordYTrans = 20;
281 uint32_t coordNumItems = 5;
282 coordTranslateSub->add_option("-x,--x-translation", coordXTrans,
283 "X translation amount (default 10)");
284 coordTranslateSub->add_option("-y,--y-translation", coordYTrans,
285 "Y translation amount (default 20)");
286 coordTranslateSub->add_option("-n,--num-coords", coordNumItems,
287 "Number of random coordinates (default 5)");
288
289 CLI::App *serialCoordTranslateSub = cli.add_subcommand(
290 "serial_coords",
291 "Test SerialCoordTranslator function service with list of coordinates");
292 uint32_t serialBatchSize = 240;
293 serialCoordTranslateSub->add_option("-x,--x-translation", coordXTrans,
294 "X translation amount (default 10)");
295 serialCoordTranslateSub->add_option("-y,--y-translation", coordYTrans,
296 "Y translation amount (default 20)");
297 serialCoordTranslateSub->add_option(
298 "-n,--num-coords", coordNumItems,
299 "Number of random coordinates (default 5)");
300 serialCoordTranslateSub
301 ->add_option("-b,--batch-size", serialBatchSize,
302 "Coordinates per header (default 240, max 65535)")
303 ->check(CLI::Range(1u, 0xFFFFu));
304
305 CLI::App *autoSerialCoordTranslateSub = cli.add_subcommand(
306 "auto_serial_coords",
307 "Test AutoSerialCoordTranslator (uses ListWindowToParallel/Serial "
308 "converters under the hood)");
309 uint32_t autoCoordXTrans = 10;
310 uint32_t autoCoordYTrans = 20;
311 uint32_t autoCoordNumItems = 5;
312 autoSerialCoordTranslateSub->add_option("-x,--x-translation", autoCoordXTrans,
313 "X translation amount (default 10)");
314 autoSerialCoordTranslateSub->add_option("-y,--y-translation", autoCoordYTrans,
315 "Y translation amount (default 20)");
316 autoSerialCoordTranslateSub->add_option(
317 "-n,--num-coords", autoCoordNumItems,
318 "Number of random coordinates (default 5)");
319
320 CLI::App *channelTestSub = cli.add_subcommand(
321 "channel", "Test ChannelService to_host and from_host");
322 uint32_t channelIters = 10;
323 channelTestSub->add_option("-i,--iters", channelIters,
324 "Number of loopback iterations (default 10)");
325
326 if (int rc = cli.esiParse(argc, argv))
327 return rc;
328 if (!cli.get_help_ptr()->empty())
329 return 0;
330
331 Context &ctxt = cli.getContext();
332 AcceleratorConnection *acc = cli.connect();
333 try {
334 const auto &info = *acc->getService<services::SysInfo>();
335 ctxt.getLogger().info("esitester", "Connected to accelerator.");
336 Manifest manifest(ctxt, info.getJsonManifest());
337 Accelerator *accel = manifest.buildAccelerator(*acc);
338 ctxt.getLogger().info("esitester", "Built accelerator.");
339 acc->getServiceThread()->addPoll(*accel);
340
341 if (*callback_test) {
342 callbackTest(acc, accel, cb_iters);
343 } else if (*hostmemtestSub) {
344 hostmemTest(acc, accel, hostmemWidths, hmWrite, hmRead);
345 } else if (*loopbackSub) {
346 loopbackAddTest(acc, accel, loopbackIters, loopbackPipeline);
347 } else if (*dmatestSub) {
348 dmaTest(acc, accel, dmaWidths, dmaRead, dmaWrite);
349 } else if (*bandwidthSub) {
350 bandwidthTest(acc, accel, bandwidthWidths, xferCount, bandwidthRead,
351 bandwidthWrite);
352 } else if (*hostmembwSub) {
353 hostmemBandwidthTest(acc, accel, hmBwCount, hmBwWidths, hmBwRead,
354 hmBwWrite);
355 } else if (*aggBwSub) {
356 aggregateHostmemBandwidthTest(acc, accel, aggWidth, aggCount, aggRead,
357 aggWrite);
358 } else if (*streamingAddSub) {
359 if (streamingTranslate)
360 streamingAddTranslatedTest(acc, accel, streamingAddAmt,
361 streamingNumItems);
362 else
363 streamingAddTest(acc, accel, streamingAddAmt, streamingNumItems);
364 } else if (*coordTranslateSub) {
365 coordTranslateTest(acc, accel, coordXTrans, coordYTrans, coordNumItems);
366 } else if (*serialCoordTranslateSub) {
367 serialCoordTranslateTest(acc, accel, coordXTrans, coordYTrans,
368 coordNumItems, serialBatchSize);
369 } else if (*autoSerialCoordTranslateSub) {
370 autoSerialCoordTranslateTest(acc, accel, autoCoordXTrans, autoCoordYTrans,
371 autoCoordNumItems);
372 } else if (*channelTestSub) {
373 channelTest(acc, accel, channelIters);
374 }
375
376 acc->disconnect();
377 } catch (std::exception &e) {
378 ctxt.getLogger().error("esitester", e.what());
379 acc->disconnect();
380 return -1;
381 }
382 std::cout << "Exiting successfully\n";
383 return 0;
384}
385
387 uint32_t iterations) {
388 auto cb_test = accel->getChildren().find(AppID("cb_test"));
389 if (cb_test == accel->getChildren().end())
390 throw std::runtime_error("No cb_test child found in accelerator");
391 auto &ports = cb_test->second->getPorts();
392 auto cmd_port = ports.find(AppID("cmd"));
393 if (cmd_port == ports.end())
394 throw std::runtime_error("No cmd port found in cb_test child");
395 auto *cmdMMIO = cmd_port->second.getAs<services::MMIO::MMIORegion>();
396 if (!cmdMMIO)
397 throw std::runtime_error("cb_test cmd port is not MMIO");
398
399 auto f = ports.find(AppID("cb"));
400 if (f == ports.end())
401 throw std::runtime_error("No cb port found in accelerator");
402
403 auto *callPort = f->second.getAs<services::CallService::Callback>();
404 if (!callPort)
405 throw std::runtime_error("cb port is not a CallService::Callback");
406
407 std::atomic<uint32_t> callbackCount = 0;
408 callPort->connect(
409 [conn, &callbackCount](const MessageData &data) mutable -> MessageData {
410 callbackCount.fetch_add(1);
411 conn->getLogger().debug(
412 [&](std::string &subsystem, std::string &msg,
413 std::unique_ptr<std::map<std::string, std::any>> &details) {
414 subsystem = "ESITESTER";
415 msg = "Received callback";
416 details = std::make_unique<std::map<std::string, std::any>>();
417 details->emplace("data", data);
418 });
419 std::cout << "callback: " << *data.as<uint64_t>() << std::endl;
420 return MessageData();
421 },
422 true);
423
424 for (uint32_t i = 0; i < iterations; ++i) {
425 conn->getLogger().info("esitester", "Issuing callback command iteration " +
426 std::to_string(i) + "/" +
427 std::to_string(iterations));
428 cmdMMIO->write(0x10, i); // Command the callback
429 // Wait up to 1 second for the callback to be invoked.
430 for (uint32_t wait = 0; wait < 1000; ++wait) {
431 if (callbackCount.load() > i)
432 break;
433 std::this_thread::sleep_for(std::chrono::milliseconds(1));
434 }
435 if (callbackCount.load() <= i)
436 throw std::runtime_error("Callback test failed. No callback received");
437 }
438}
439
440/// Test the hostmem write functionality.
443 uint32_t width) {
444 std::cout << "Running hostmem WRITE test with width " << width << std::endl;
445 uint64_t *dataPtr = static_cast<uint64_t *>(region.getPtr());
446 auto check = [&](bool print) {
447 bool ret = true;
448 for (size_t i = 0; i < 9; ++i) {
449 if (print)
450 printf("[write] dataPtr[%zu] = 0x%016lx\n", i, dataPtr[i]);
451 if (i < (width + 63) / 64 && dataPtr[i] == 0xFFFFFFFFFFFFFFFFull)
452 ret = false;
453 }
454 return ret;
455 };
456
457 auto writeMemChildIter = acc->getChildren().find(AppID("writemem", width));
458 if (writeMemChildIter == acc->getChildren().end())
459 throw std::runtime_error(
460 "hostmem write test failed. No writemem child found");
461 auto &writeMemPorts = writeMemChildIter->second->getPorts();
462
463 auto cmdPortIter = writeMemPorts.find(AppID("cmd", width));
464 if (cmdPortIter == writeMemPorts.end())
465 throw std::runtime_error(
466 "hostmem write test failed. No (cmd,width) MMIO port");
467 auto *cmdMMIO = cmdPortIter->second.getAs<services::MMIO::MMIORegion>();
468 if (!cmdMMIO)
469 throw std::runtime_error(
470 "hostmem write test failed. (cmd,width) port not MMIO");
471
472 auto issuedPortIter = writeMemPorts.find(AppID("addrCmdIssued"));
473 if (issuedPortIter == writeMemPorts.end())
474 throw std::runtime_error(
475 "hostmem write test failed. addrCmdIssued missing");
476 auto *addrCmdIssuedPort =
477 issuedPortIter->second.getAs<services::TelemetryService::Metric>();
478 if (!addrCmdIssuedPort)
479 throw std::runtime_error(
480 "hostmem write test failed. addrCmdIssued not telemetry");
481 addrCmdIssuedPort->connect();
482
483 auto responsesPortIter = writeMemPorts.find(AppID("addrCmdResponses"));
484 if (responsesPortIter == writeMemPorts.end())
485 throw std::runtime_error(
486 "hostmem write test failed. addrCmdResponses missing");
487 auto *addrCmdResponsesPort =
488 responsesPortIter->second.getAs<services::TelemetryService::Metric>();
489 if (!addrCmdResponsesPort)
490 throw std::runtime_error(
491 "hostmem write test failed. addrCmdResponses not telemetry");
492 addrCmdResponsesPort->connect();
493
494 for (size_t i = 0, e = 9; i < e; ++i)
495 dataPtr[i] = 0xFFFFFFFFFFFFFFFFull;
496 region.flush();
497 cmdMMIO->write(0x10, reinterpret_cast<uint64_t>(region.getDevicePtr()));
498 cmdMMIO->write(0x18, 1);
499 cmdMMIO->write(0x20, 1);
500 bool done = false;
501 for (int i = 0; i < 100; ++i) {
502 auto issued = addrCmdIssuedPort->readInt();
503 auto responses = addrCmdResponsesPort->readInt();
504 if (issued == 1 && responses == 1) {
505 done = true;
506 break;
507 }
508 std::this_thread::sleep_for(std::chrono::microseconds(100));
509 }
510 if (!done) {
511 check(true);
512 throw std::runtime_error("hostmem write test (" + std::to_string(width) +
513 " bits) timeout waiting for completion");
514 }
515 if (!check(true))
516 throw std::runtime_error("hostmem write test failed (" +
517 std::to_string(width) + " bits)");
518}
519
522 uint32_t width) {
523 std::cout << "Running hostmem READ test with width " << width << std::endl;
524 auto readMemChildIter = acc->getChildren().find(AppID("readmem", width));
525 if (readMemChildIter == acc->getChildren().end())
526 throw std::runtime_error(
527 "hostmem read test failed. No readmem child found");
528
529 auto &readMemPorts = readMemChildIter->second->getPorts();
530 auto addrCmdPortIter = readMemPorts.find(AppID("cmd", width));
531 if (addrCmdPortIter == readMemPorts.end())
532 throw std::runtime_error(
533 "hostmem read test failed. No AddressCommand MMIO port");
534 auto *addrCmdMMIO =
535 addrCmdPortIter->second.getAs<services::MMIO::MMIORegion>();
536 if (!addrCmdMMIO)
537 throw std::runtime_error(
538 "hostmem read test failed. AddressCommand port not MMIO");
539
540 auto lastReadPortIter = readMemPorts.find(AppID("lastReadLSB"));
541 if (lastReadPortIter == readMemPorts.end())
542 throw std::runtime_error("hostmem read test failed. lastReadLSB missing");
543 auto *lastReadPort =
544 lastReadPortIter->second.getAs<services::TelemetryService::Metric>();
545 if (!lastReadPort)
546 throw std::runtime_error(
547 "hostmem read test failed. lastReadLSB not telemetry");
548 lastReadPort->connect();
549
550 auto issuedPortIter = readMemPorts.find(AppID("addrCmdIssued"));
551 if (issuedPortIter == readMemPorts.end())
552 throw std::runtime_error("hostmem read test failed. addrCmdIssued missing");
553 auto *addrCmdIssuedPort =
554 issuedPortIter->second.getAs<services::TelemetryService::Metric>();
555 if (!addrCmdIssuedPort)
556 throw std::runtime_error(
557 "hostmem read test failed. addrCmdIssued not telemetry");
558 addrCmdIssuedPort->connect();
559
560 auto responsesPortIter = readMemPorts.find(AppID("addrCmdResponses"));
561 if (responsesPortIter == readMemPorts.end())
562 throw std::runtime_error(
563 "hostmem read test failed. addrCmdResponses missing");
564 auto *addrCmdResponsesPort =
565 responsesPortIter->second.getAs<services::TelemetryService::Metric>();
566 if (!addrCmdResponsesPort)
567 throw std::runtime_error(
568 "hostmem read test failed. addrCmdResponses not telemetry");
569 addrCmdResponsesPort->connect();
570
571 for (size_t i = 0; i < 8; ++i) {
572 auto *dataPtr = static_cast<uint64_t *>(region.getPtr());
573 dataPtr[0] = 0x12345678ull << i;
574 dataPtr[1] = 0xDEADBEEFull << i;
575 region.flush();
576 addrCmdMMIO->write(0x10, reinterpret_cast<uint64_t>(region.getDevicePtr()));
577 addrCmdMMIO->write(0x18, 1);
578 addrCmdMMIO->write(0x20, 1);
579 bool done = false;
580 for (int waitLoop = 0; waitLoop < 100; ++waitLoop) {
581 auto issued = addrCmdIssuedPort->readInt();
582 auto responses = addrCmdResponsesPort->readInt();
583 if (issued == 1 && responses == 1) {
584 done = true;
585 break;
586 }
587 std::this_thread::sleep_for(std::chrono::milliseconds(10));
588 }
589 if (!done)
590 throw std::runtime_error("hostmem read (" + std::to_string(width) +
591 " bits) timeout waiting for completion");
592 uint64_t captured = lastReadPort->readInt();
593 uint64_t expected = dataPtr[0];
594 if (width < 64)
595 expected &= ((1ull << width) - 1);
596 if (captured != expected)
597 throw std::runtime_error("hostmem read test (" + std::to_string(width) +
598 " bits) failed. Expected " +
599 esi::toHex(expected) + ", got " +
600 esi::toHex(captured));
601 }
602}
603
605 const std::vector<uint32_t> &widths, bool write,
606 bool read) {
607 // Enable the host memory service.
608 auto hostmem = conn->getService<services::HostMem>();
609 hostmem->start();
610 auto scratchRegion = hostmem->allocate(/*size(bytes)=*/1024 * 1024,
611 /*memOpts=*/{.writeable = true});
612 uint64_t *dataPtr = static_cast<uint64_t *>(scratchRegion->getPtr());
613 conn->getLogger().info("esitester",
614 "Running host memory test with region size " +
615 std::to_string(scratchRegion->getSize()) +
616 " bytes at 0x" + toHex(dataPtr));
617 for (size_t i = 0; i < scratchRegion->getSize() / 8; ++i)
618 dataPtr[i] = 0;
619 scratchRegion->flush();
620
621 bool passed = true;
622 for (size_t width : widths) {
623 try {
624 if (write)
625 hostmemWriteTest(acc, *scratchRegion, width);
626 if (read)
627 hostmemReadTest(acc, *scratchRegion, width);
628 } catch (std::exception &e) {
629 conn->getLogger().error("esitester", "Hostmem test failed for width " +
630 std::to_string(width) + ": " +
631 e.what());
632 passed = false;
633 }
634 }
635 if (!passed)
636 throw std::runtime_error("Hostmem test failed");
637 std::cout << "Hostmem test passed" << std::endl;
638}
639
641 size_t width) {
642 Logger &logger = conn->getLogger();
643 logger.info("esitester",
644 "== Running DMA read test with width " + std::to_string(width));
645 AppIDPath lastPath;
646 BundlePort *toHostMMIOPort =
647 acc->resolvePort({AppID("tohostdma", width), AppID("cmd")}, lastPath);
648 if (!toHostMMIOPort)
649 throw std::runtime_error("dma read test failed. No tohostdma[" +
650 std::to_string(width) + "] found");
651 auto *toHostMMIO = toHostMMIOPort->getAs<services::MMIO::MMIORegion>();
652 if (!toHostMMIO)
653 throw std::runtime_error("dma read test failed. MMIO port is not MMIO");
654 lastPath.clear();
655 BundlePort *outPortBundle =
656 acc->resolvePort({AppID("tohostdma", width), AppID("out")}, lastPath);
657 ReadChannelPort &outPort = outPortBundle->getRawRead("data");
658 outPort.connect();
659
660 size_t xferCount = 24;
661 uint64_t last = 0;
662 MessageData data;
663 toHostMMIO->write(0, xferCount);
664 for (size_t i = 0; i < xferCount; ++i) {
665 outPort.read(data);
666 if (width == 64) {
667 uint64_t val = *data.as<uint64_t>();
668 if (val < last)
669 throw std::runtime_error("dma read test failed. Out of order data");
670 last = val;
671 }
672 logger.debug("esitester",
673 "Cycle count [" + std::to_string(i) + "] = 0x" + data.toHex());
674 }
675 outPort.disconnect();
676 std::cout << " DMA read test for " << width << " bits passed" << std::endl;
677}
678
680 size_t width) {
681 Logger &logger = conn->getLogger();
682 logger.info("esitester",
683 "Running DMA write test with width " + std::to_string(width));
684 AppIDPath lastPath;
685 BundlePort *fromHostMMIOPort =
686 acc->resolvePort({AppID("fromhostdma", width), AppID("cmd")}, lastPath);
687 if (!fromHostMMIOPort)
688 throw std::runtime_error("dma read test for " + toString(width) +
689 " bits failed. No fromhostdma[" +
690 std::to_string(width) + "] found");
691 auto *fromHostMMIO = fromHostMMIOPort->getAs<services::MMIO::MMIORegion>();
692 if (!fromHostMMIO)
693 throw std::runtime_error("dma write test for " + toString(width) +
694 " bits failed. MMIO port is not MMIO");
695 lastPath.clear();
696 BundlePort *outPortBundle =
697 acc->resolvePort({AppID("fromhostdma", width), AppID("in")}, lastPath);
698 if (!outPortBundle)
699 throw std::runtime_error("dma write test for " + toString(width) +
700 " bits failed. No out port found");
701 WriteChannelPort &writePort = outPortBundle->getRawWrite("data");
702 writePort.connect();
703
704 size_t xferCount = 24;
705 uint8_t *data = new uint8_t[width];
706 for (size_t i = 0; i < width / 8; ++i)
707 data[i] = 0;
708 fromHostMMIO->read(8);
709 fromHostMMIO->write(0, xferCount);
710 for (size_t i = 1; i < xferCount + 1; ++i) {
711 data[0] = i;
712 bool successWrite;
713 size_t attempts = 0;
714 do {
715 successWrite = writePort.tryWrite(MessageData(data, width / 8));
716 if (!successWrite) {
717 std::this_thread::sleep_for(std::chrono::milliseconds(10));
718 }
719 } while (!successWrite && ++attempts < 100);
720 if (!successWrite)
721 throw std::runtime_error("dma write test for " + toString(width) +
722 " bits failed. Write failed");
723 uint64_t lastReadMMIO;
724 for (size_t a = 0; a < 20; ++a) {
725 lastReadMMIO = fromHostMMIO->read(8);
726 if (lastReadMMIO == i)
727 break;
728 std::this_thread::sleep_for(std::chrono::milliseconds(10));
729 if (a >= 19)
730 throw std::runtime_error("dma write for " + toString(width) +
731 " bits test failed. Read from MMIO failed");
732 }
733 }
734 writePort.disconnect();
735 delete[] data;
736 std::cout << " DMA write test for " << width << " bits passed" << std::endl;
737}
738
740 const std::vector<uint32_t> &widths, bool read,
741 bool write) {
742 bool success = true;
743 if (write)
744 for (size_t width : widths)
745 try {
746 dmaWriteTest(conn, acc, width);
747 } catch (std::exception &e) {
748 success = false;
749 std::cerr << "DMA write test for " << width
750 << " bits failed: " << e.what() << std::endl;
751 }
752 if (read)
753 for (size_t width : widths)
754 dmaReadTest(conn, acc, width);
755 if (!success)
756 throw std::runtime_error("DMA test failed");
757 std::cout << "DMA test passed" << std::endl;
758}
759
760//
761// DMA bandwidth test
762//
763
765 size_t width, size_t xferCount) {
766
767 AppIDPath lastPath;
768 BundlePort *toHostMMIOPort =
769 acc->resolvePort({AppID("tohostdma", width), AppID("cmd")}, lastPath);
770 if (!toHostMMIOPort)
771 throw std::runtime_error("bandwidth test failed. No tohostdma[" +
772 std::to_string(width) + "] found");
773 auto *toHostMMIO = toHostMMIOPort->getAs<services::MMIO::MMIORegion>();
774 if (!toHostMMIO)
775 throw std::runtime_error("bandwidth test failed. MMIO port is not MMIO");
776 lastPath.clear();
777 BundlePort *outPortBundle =
778 acc->resolvePort({AppID("tohostdma", width), AppID("out")}, lastPath);
779 ReadChannelPort &outPort = outPortBundle->getRawRead("data");
780 outPort.connect();
781
782 Logger &logger = conn->getLogger();
783 logger.info("esitester", "Starting read bandwidth test with " +
784 std::to_string(xferCount) + " x " +
785 std::to_string(width) + " bit transfers");
786 MessageData data;
787 auto start = std::chrono::high_resolution_clock::now();
788 toHostMMIO->write(0, xferCount);
789 for (size_t i = 0; i < xferCount; ++i) {
790 outPort.read(data);
791 logger.debug(
792 [i, &data](std::string &subsystem, std::string &msg,
793 std::unique_ptr<std::map<std::string, std::any>> &details) {
794 subsystem = "esitester";
795 msg = "Cycle count [" + std::to_string(i) + "] = 0x" + data.toHex();
796 });
797 }
798 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
799 std::chrono::high_resolution_clock::now() - start);
800 double bytesPerSec =
801 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
802 logger.info("esitester",
803 " Bandwidth test: " + std::to_string(xferCount) + " x " +
804 std::to_string(width) + " bit transfers in " +
805 std::to_string(duration.count()) + " microseconds");
806 logger.info("esitester", " bandwidth: " + formatBandwidth(bytesPerSec));
807}
808
810 size_t width, size_t xferCount) {
811
812 AppIDPath lastPath;
813 BundlePort *fromHostMMIOPort =
814 acc->resolvePort({AppID("fromhostdma", width), AppID("cmd")}, lastPath);
815 if (!fromHostMMIOPort)
816 throw std::runtime_error("bandwidth test failed. No fromhostdma[" +
817 std::to_string(width) + "] found");
818 auto *fromHostMMIO = fromHostMMIOPort->getAs<services::MMIO::MMIORegion>();
819 if (!fromHostMMIO)
820 throw std::runtime_error("bandwidth test failed. MMIO port is not MMIO");
821 lastPath.clear();
822 BundlePort *inPortBundle =
823 acc->resolvePort({AppID("fromhostdma", width), AppID("in")}, lastPath);
824 WriteChannelPort &outPort = inPortBundle->getRawWrite("data");
825 outPort.connect();
826
827 Logger &logger = conn->getLogger();
828 logger.info("esitester", "Starting write bandwidth test with " +
829 std::to_string(xferCount) + " x " +
830 std::to_string(width) + " bit transfers");
831 std::vector<uint8_t> dataVec(width / 8);
832 for (size_t i = 0; i < width / 8; ++i)
833 dataVec[i] = i;
834 MessageData data(dataVec);
835 auto start = std::chrono::high_resolution_clock::now();
836 fromHostMMIO->write(0, xferCount);
837 for (size_t i = 0; i < xferCount; ++i) {
838 outPort.write(data);
839 logger.debug(
840 [i, &data](std::string &subsystem, std::string &msg,
841 std::unique_ptr<std::map<std::string, std::any>> &details) {
842 subsystem = "esitester";
843 msg = "Cycle count [" + std::to_string(i) + "] = 0x" + data.toHex();
844 });
845 }
846 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
847 std::chrono::high_resolution_clock::now() - start);
848 double bytesPerSec =
849 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
850 logger.info("esitester",
851 " Bandwidth test: " + std::to_string(xferCount) + " x " +
852 std::to_string(width) + " bit transfers in " +
853 std::to_string(duration.count()) + " microseconds");
854 logger.info("esitester", " bandwidth: " + formatBandwidth(bytesPerSec));
855}
856
858 const std::vector<uint32_t> &widths,
859 uint32_t xferCount, bool read, bool write) {
860 if (read)
861 for (uint32_t w : widths)
862 bandwidthReadTest(conn, acc, w, xferCount);
863 if (write)
864 for (uint32_t w : widths)
865 bandwidthWriteTest(conn, acc, w, xferCount);
866}
867
868//
869// Hostmem bandwidth test
870//
871
872static void
875 uint32_t width, uint32_t xferCount) {
876 Logger &logger = conn->getLogger();
877 logger.info("esitester", "Starting hostmem WRITE bandwidth test: " +
878 std::to_string(xferCount) + " x " +
879 std::to_string(width) + " bits");
880
881 auto writeMemChildIter = acc->getChildren().find(AppID("writemem", width));
882 if (writeMemChildIter == acc->getChildren().end())
883 throw std::runtime_error("hostmem write bandwidth: writemem child missing");
884 auto &writeMemPorts = writeMemChildIter->second->getPorts();
885
886 auto cmdPortIter = writeMemPorts.find(AppID("cmd", width));
887 if (cmdPortIter == writeMemPorts.end())
888 throw std::runtime_error("hostmem write bandwidth: cmd MMIO missing");
889 auto *cmdMMIO = cmdPortIter->second.getAs<services::MMIO::MMIORegion>();
890 if (!cmdMMIO)
891 throw std::runtime_error("hostmem write bandwidth: cmd not MMIO");
892
893 auto issuedIter = writeMemPorts.find(AppID("addrCmdIssued"));
894 auto respIter = writeMemPorts.find(AppID("addrCmdResponses"));
895 auto cycleCount = writeMemPorts.find(AppID("addrCmdCycles"));
896 if (issuedIter == writeMemPorts.end() || respIter == writeMemPorts.end() ||
897 cycleCount == writeMemPorts.end())
898 throw std::runtime_error("hostmem write bandwidth: telemetry missing");
899 auto *issuedPort =
900 issuedIter->second.getAs<services::TelemetryService::Metric>();
901 auto *respPort = respIter->second.getAs<services::TelemetryService::Metric>();
902 auto *cyclePort =
903 cycleCount->second.getAs<services::TelemetryService::Metric>();
904 if (!issuedPort || !respPort || !cyclePort)
905 throw std::runtime_error(
906 "hostmem write bandwidth: telemetry type mismatch");
907
908 issuedPort->connect();
909 respPort->connect();
910 cyclePort->connect();
911
912 // Initialize pattern (optional).
913 uint64_t *dataPtr = static_cast<uint64_t *>(region.getPtr());
914 size_t words = region.getSize() / 8;
915 for (size_t i = 0; i < words; ++i)
916 dataPtr[i] = i + 0xA5A50000;
917 region.flush();
918
919 auto start = std::chrono::high_resolution_clock::now();
920 // Fire off xferCount write commands (one flit each).
921 uint64_t devPtr = reinterpret_cast<uint64_t>(region.getDevicePtr());
922 cmdMMIO->write(0x10, devPtr); // address
923 cmdMMIO->write(0x18, xferCount); // flits
924 cmdMMIO->write(0x20, 1); // start
925
926 // Wait for responses counter to reach target.
927 bool completed = false;
928 for (int wait = 0; wait < 100000; ++wait) {
929 uint64_t respNow = respPort->readInt();
930 if (respNow == xferCount) {
931 completed = true;
932 break;
933 }
934 std::this_thread::sleep_for(std::chrono::microseconds(50));
935 }
936 if (!completed)
937 throw std::runtime_error("hostmem write bandwidth timeout");
938 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
939 std::chrono::high_resolution_clock::now() - start);
940 double bytesPerSec =
941 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
942 uint64_t cycles = cyclePort->readInt();
943 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
944 std::cout << "[WRITE] Hostmem bandwidth (" << std::to_string(width)
945 << "): " << formatBandwidth(bytesPerSec) << " "
946 << std::to_string(xferCount) << " flits in "
947 << std::to_string(duration.count()) << " us, "
948 << std::to_string(cycles) << " cycles, " << bytesPerCycle
949 << " bytes/cycle" << std::endl;
950}
951
952static void
955 uint32_t width, uint32_t xferCount) {
956 Logger &logger = conn->getLogger();
957 logger.info("esitester", "Starting hostmem READ bandwidth test: " +
958 std::to_string(xferCount) + " x " +
959 std::to_string(width) + " bits");
960
961 auto readMemChildIter = acc->getChildren().find(AppID("readmem", width));
962 if (readMemChildIter == acc->getChildren().end())
963 throw std::runtime_error("hostmem read bandwidth: readmem child missing");
964 auto &readMemPorts = readMemChildIter->second->getPorts();
965
966 auto cmdPortIter = readMemPorts.find(AppID("cmd", width));
967 if (cmdPortIter == readMemPorts.end())
968 throw std::runtime_error("hostmem read bandwidth: cmd MMIO missing");
969 auto *cmdMMIO = cmdPortIter->second.getAs<services::MMIO::MMIORegion>();
970 if (!cmdMMIO)
971 throw std::runtime_error("hostmem read bandwidth: cmd not MMIO");
972
973 auto issuedIter = readMemPorts.find(AppID("addrCmdIssued"));
974 auto respIter = readMemPorts.find(AppID("addrCmdResponses"));
975 auto cyclePort = readMemPorts.find(AppID("addrCmdCycles"));
976 if (issuedIter == readMemPorts.end() || respIter == readMemPorts.end() ||
977 cyclePort == readMemPorts.end())
978 throw std::runtime_error("hostmem read bandwidth: telemetry missing");
979 auto *issuedPort =
980 issuedIter->second.getAs<services::TelemetryService::Metric>();
981 auto *respPort = respIter->second.getAs<services::TelemetryService::Metric>();
982 auto *cycleCntPort =
983 cyclePort->second.getAs<services::TelemetryService::Metric>();
984 if (!issuedPort || !respPort || !cycleCntPort)
985 throw std::runtime_error("hostmem read bandwidth: telemetry type mismatch");
986 issuedPort->connect();
987 respPort->connect();
988 cycleCntPort->connect();
989
990 // Prepare memory pattern (optional).
991 uint64_t *dataPtr = static_cast<uint64_t *>(region.getPtr());
992 size_t words64 = region.getSize() / 8;
993 for (size_t i = 0; i < words64; ++i)
994 dataPtr[i] = 0xCAFEBABE0000ull + i;
995 region.flush();
996 uint64_t devPtr = reinterpret_cast<uint64_t>(region.getDevicePtr());
997 auto start = std::chrono::high_resolution_clock::now();
998
999 cmdMMIO->write(0x10, devPtr);
1000 cmdMMIO->write(0x18, xferCount);
1001 cmdMMIO->write(0x20, 1);
1002
1003 bool timeout = true;
1004 for (int wait = 0; wait < 100000; ++wait) {
1005 uint64_t respNow = respPort->readInt();
1006 if (respNow == xferCount) {
1007 timeout = false;
1008 break;
1009 }
1010 std::this_thread::sleep_for(std::chrono::microseconds(50));
1011 }
1012 if (timeout)
1013 throw std::runtime_error("hostmem read bandwidth timeout");
1014 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
1015 std::chrono::high_resolution_clock::now() - start);
1016 double bytesPerSec =
1017 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
1018 uint64_t cycles = cycleCntPort->readInt();
1019 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
1020 std::cout << "[ READ] Hostmem bandwidth (" << width
1021 << "): " << formatBandwidth(bytesPerSec) << ", " << xferCount
1022 << " flits in " << duration.count() << " us, " << cycles
1023 << " cycles, " << bytesPerCycle << " bytes/cycle" << std::endl;
1024}
1025
1027 uint32_t xferCount,
1028 const std::vector<uint32_t> &widths, bool read,
1029 bool write) {
1030 auto hostmemSvc = conn->getService<services::HostMem>();
1031 hostmemSvc->start();
1032 auto region = hostmemSvc->allocate(/*size(bytes)=*/1024 * 1024 * 1024,
1033 /*memOpts=*/{.writeable = true});
1034 for (uint32_t w : widths) {
1035 if (write)
1036 hostmemWriteBandwidthTest(conn, acc, *region, w, xferCount);
1037 if (read)
1038 hostmemReadBandwidthTest(conn, acc, *region, w, xferCount);
1039 }
1040}
1041
1043 uint32_t iterations, bool pipeline) {
1044 Logger &logger = conn->getLogger();
1045 auto loopbackChild = accel->getChildren().find(AppID("loopback"));
1046 if (loopbackChild == accel->getChildren().end())
1047 throw std::runtime_error("Loopback test: no 'loopback' child");
1048 auto &ports = loopbackChild->second->getPorts();
1049 auto addIter = ports.find(AppID("add"));
1050 if (addIter == ports.end())
1051 throw std::runtime_error("Loopback test: no 'add' port");
1052
1053 // Use FuncService::Func instead of raw channels.
1054 auto *funcPort = addIter->second.getAs<services::FuncService::Function>();
1055 if (!funcPort)
1056 throw std::runtime_error(
1057 "Loopback test: 'add' port not a FuncService::Function");
1058 funcPort->connect();
1059 if (iterations == 0) {
1060 logger.info("esitester", "Loopback add test: 0 iterations (skipped)");
1061 return;
1062 }
1063 std::mt19937_64 rng(0xC0FFEE);
1064 std::uniform_int_distribution<uint32_t> dist(0, (1u << 24) - 1);
1065
1066 if (!pipeline) {
1067 auto start = std::chrono::high_resolution_clock::now();
1068 for (uint32_t i = 0; i < iterations; ++i) {
1069 uint32_t argVal = dist(rng);
1070 uint32_t expected = (argVal + 11) & 0xFFFF;
1071 uint8_t argBytes[3] = {
1072 static_cast<uint8_t>(argVal & 0xFF),
1073 static_cast<uint8_t>((argVal >> 8) & 0xFF),
1074 static_cast<uint8_t>((argVal >> 16) & 0xFF),
1075 };
1076 MessageData argMsg(argBytes, 3);
1077 MessageData resMsg = funcPort->call(argMsg).get();
1078 uint16_t got = *resMsg.as<uint16_t>();
1079 std::cout << "[loopback] i=" << i << " arg=0x" << esi::toHex(argVal)
1080 << " got=0x" << esi::toHex(got) << " exp=0x"
1081 << esi::toHex(expected) << std::endl;
1082 if (got != expected)
1083 throw std::runtime_error("Loopback mismatch (non-pipelined)");
1084 }
1085 auto end = std::chrono::high_resolution_clock::now();
1086 auto us = std::chrono::duration_cast<std::chrono::microseconds>(end - start)
1087 .count();
1088 double callsPerSec = (double)iterations * 1e6 / (double)us;
1089 logger.info("esitester", "Loopback add test passed (non-pipelined, " +
1090 std::to_string(iterations) + " calls, " +
1091 std::to_string(us) + " us, " +
1092 std::to_string(callsPerSec) + " calls/s)");
1093 } else {
1094 // Pipelined mode: launch all calls first, then collect.
1095 std::vector<std::future<MessageData>> futures;
1096 futures.reserve(iterations);
1097 std::vector<uint32_t> expectedVals;
1098 expectedVals.reserve(iterations);
1099
1100 auto issueStart = std::chrono::high_resolution_clock::now();
1101 for (uint32_t i = 0; i < iterations; ++i) {
1102 uint32_t argVal = dist(rng);
1103 uint32_t expected = (argVal + 11) & 0xFFFF;
1104 uint8_t argBytes[3] = {
1105 static_cast<uint8_t>(argVal & 0xFF),
1106 static_cast<uint8_t>((argVal >> 8) & 0xFF),
1107 static_cast<uint8_t>((argVal >> 16) & 0xFF),
1108 };
1109 futures.emplace_back(funcPort->call(MessageData(argBytes, 3)));
1110 expectedVals.emplace_back(expected);
1111 }
1112 auto issueEnd = std::chrono::high_resolution_clock::now();
1113
1114 for (uint32_t i = 0; i < iterations; ++i) {
1115 MessageData resMsg = futures[i].get();
1116 uint16_t got = *resMsg.as<uint16_t>();
1117 uint16_t exp = (uint16_t)expectedVals[i];
1118 std::cout << "[loopback-pipelined] i=" << i << " got=0x"
1119 << esi::toHex(got) << " exp=0x" << esi::toHex(exp) << std::endl;
1120 if (got != exp)
1121 throw std::runtime_error("Loopback mismatch (pipelined) idx=" +
1122 std::to_string(i));
1123 }
1124 auto collectEnd = std::chrono::high_resolution_clock::now();
1125
1126 auto issueUs = std::chrono::duration_cast<std::chrono::microseconds>(
1127 issueEnd - issueStart)
1128 .count();
1129 auto totalUs = std::chrono::duration_cast<std::chrono::microseconds>(
1130 collectEnd - issueStart)
1131 .count();
1132
1133 double issueRate = (double)iterations * 1e6 / (double)issueUs;
1134 double completionRate = (double)iterations * 1e6 / (double)totalUs;
1135
1136 logger.info("esitester", "Loopback add test passed (pipelined). Issued " +
1137 std::to_string(iterations) + " in " +
1138 std::to_string(issueUs) + " us (" +
1139 std::to_string(issueRate) +
1140 " calls/s), total " + std::to_string(totalUs) +
1141 " us (" + std::to_string(completionRate) +
1142 " calls/s effective)");
1143 }
1144}
1145
1147 Accelerator *acc, uint32_t width,
1148 uint32_t xferCount, bool read,
1149 bool write) {
1150 Logger &logger = conn->getLogger();
1151 if (!read && !write) {
1152 std::cout << "aggbandwidth: nothing to do (enable --read and/or --write)\n";
1153 return;
1154 }
1155 logger.info(
1156 "esitester",
1157 "Aggregate hostmem bandwidth start width=" + std::to_string(width) +
1158 " count=" + std::to_string(xferCount) +
1159 " read=" + (read ? "Y" : "N") + " write=" + (write ? "Y" : "N"));
1160
1161 auto hostmemSvc = conn->getService<services::HostMem>();
1162 hostmemSvc->start();
1163
1164 struct Unit {
1165 std::string prefix;
1166 bool isRead = false;
1167 bool isWrite = false;
1168 std::unique_ptr<esi::services::HostMem::HostMemRegion> region;
1169 services::TelemetryService::Metric *resp = nullptr;
1170 services::TelemetryService::Metric *cycles = nullptr;
1171 services::MMIO::MMIORegion *cmd = nullptr;
1172 bool launched = false;
1173 bool done = false;
1174 uint64_t bytes = 0;
1175 uint64_t duration_us = 0;
1176 uint64_t cycleCount = 0;
1177 std::chrono::high_resolution_clock::time_point start;
1178 };
1179 std::vector<Unit> units;
1180 const std::vector<std::string> readPrefixes = {"readmem", "readmem_0",
1181 "readmem_1", "readmem_2"};
1182 const std::vector<std::string> writePrefixes = {"writemem", "writemem_0",
1183 "writemem_1", "writemem_2"};
1184
1185 auto addUnits = [&](const std::vector<std::string> &pref, bool doRead,
1186 bool doWrite) {
1187 for (auto &p : pref) {
1188 AppID id(p, width);
1189 auto childIt = acc->getChildren().find(id);
1190 if (childIt == acc->getChildren().end())
1191 continue; // silently skip missing variants
1192 auto &ports = childIt->second->getPorts();
1193 auto cmdIt = ports.find(AppID("cmd", width));
1194 auto respIt = ports.find(AppID("addrCmdResponses"));
1195 auto cycIt = ports.find(AppID("addrCmdCycles"));
1196 if (cmdIt == ports.end() || respIt == ports.end() || cycIt == ports.end())
1197 continue;
1198 auto *cmd = cmdIt->second.getAs<services::MMIO::MMIORegion>();
1199 auto *resp = respIt->second.getAs<services::TelemetryService::Metric>();
1200 auto *cyc = cycIt->second.getAs<services::TelemetryService::Metric>();
1201 if (!cmd || !resp || !cyc)
1202 continue;
1203 resp->connect();
1204 cyc->connect();
1205 Unit u;
1206 u.prefix = p;
1207 u.isRead = doRead;
1208 u.isWrite = doWrite;
1209 u.region = hostmemSvc->allocate(1024 * 1024 * 1024, {.writeable = true});
1210 // Init pattern.
1211 uint64_t *ptr = static_cast<uint64_t *>(u.region->getPtr());
1212 size_t words = u.region->getSize() / 8;
1213 for (size_t i = 0; i < words; ++i)
1214 ptr[i] =
1215 (p[0] == 'w' ? (0xA5A500000000ull + i) : (0xCAFEBABE0000ull + i));
1216 u.region->flush();
1217 u.cmd = cmd;
1218 u.resp = resp;
1219 u.cycles = cyc;
1220 u.bytes = uint64_t(xferCount) * (width / 8);
1221 units.emplace_back(std::move(u));
1222 }
1223 };
1224 if (read)
1225 addUnits(readPrefixes, true, false);
1226 if (write)
1227 addUnits(writePrefixes, false, true);
1228 if (units.empty()) {
1229 std::cout << "aggbandwidth: no matching units present for width " << width
1230 << "\n";
1231 return;
1232 }
1233
1234 auto wallStart = std::chrono::high_resolution_clock::now();
1235 // Launch sequentially.
1236 for (auto &u : units) {
1237 uint64_t devPtr = reinterpret_cast<uint64_t>(u.region->getDevicePtr());
1238 u.cmd->write(0x10, devPtr);
1239 u.cmd->write(0x18, xferCount);
1240 u.cmd->write(0x20, 1);
1241 u.start = std::chrono::high_resolution_clock::now();
1242 u.launched = true;
1243 }
1244
1245 // Poll all until complete.
1246 const uint64_t timeoutLoops = 200000; // ~10s at 50us sleep
1247 uint64_t loops = 0;
1248 while (true) {
1249 bool allDone = true;
1250 for (auto &u : units) {
1251 if (u.done)
1252 continue;
1253 if (u.resp->readInt() == xferCount) {
1254 auto end = std::chrono::high_resolution_clock::now();
1255 u.duration_us =
1256 std::chrono::duration_cast<std::chrono::microseconds>(end - u.start)
1257 .count();
1258 u.cycleCount = u.cycles->readInt();
1259 u.done = true;
1260 } else {
1261 allDone = false;
1262 }
1263 }
1264 if (allDone)
1265 break;
1266 if (++loops >= timeoutLoops)
1267 throw std::runtime_error("aggbandwidth: timeout");
1268 std::this_thread::sleep_for(std::chrono::microseconds(50));
1269 }
1270 auto wallUs = std::chrono::duration_cast<std::chrono::microseconds>(
1271 std::chrono::high_resolution_clock::now() - wallStart)
1272 .count();
1273
1274 uint64_t totalBytes = 0;
1275 uint64_t totalReadBytes = 0;
1276 uint64_t totalWriteBytes = 0;
1277 for (auto &u : units) {
1278 totalBytes += u.bytes;
1279 if (u.isRead)
1280 totalReadBytes += u.bytes;
1281 if (u.isWrite)
1282 totalWriteBytes += u.bytes;
1283 double unitBps = (double)u.bytes * 1e6 / (double)u.duration_us;
1284 std::cout << "[agg-unit] " << u.prefix << "[" << width << "] "
1285 << (u.isRead ? "READ" : (u.isWrite ? "WRITE" : "UNK"))
1286 << " bytes=" << humanBytes(u.bytes) << " (" << u.bytes << " B)"
1287 << " time=" << humanTimeUS(u.duration_us) << " (" << u.duration_us
1288 << " us) cycles=" << u.cycleCount
1289 << " throughput=" << formatBandwidth(unitBps) << std::endl;
1290 }
1291 // Compute aggregate bandwidths as total size / total wall time (not sum of
1292 // unit throughputs).
1293 double aggReadBps =
1294 totalReadBytes ? (double)totalReadBytes * 1e6 / (double)wallUs : 0.0;
1295 double aggWriteBps =
1296 totalWriteBytes ? (double)totalWriteBytes * 1e6 / (double)wallUs : 0.0;
1297 double aggCombinedBps =
1298 totalBytes ? (double)totalBytes * 1e6 / (double)wallUs : 0.0;
1299
1300 std::cout << "[agg-total] units=" << units.size()
1301 << " read_bytes=" << humanBytes(totalReadBytes) << " ("
1302 << totalReadBytes << " B)"
1303 << " read_bw=" << formatBandwidth(aggReadBps)
1304 << " write_bytes=" << humanBytes(totalWriteBytes) << " ("
1305 << totalWriteBytes << " B)"
1306 << " write_bw=" << formatBandwidth(aggWriteBps)
1307 << " combined_bytes=" << humanBytes(totalBytes) << " ("
1308 << totalBytes << " B)"
1309 << " combined_bw=" << formatBandwidth(aggCombinedBps)
1310 << " wall_time=" << humanTimeUS(wallUs) << " (" << wallUs << " us)"
1311 << std::endl;
1312 logger.info("esitester", "Aggregate hostmem bandwidth test complete");
1313}
1314
1315/// Packed struct representing a parallel window argument for StreamingAdder.
1316/// Layout in SystemVerilog (so it must be reversed in C):
1317/// { add_amt: UInt(32), input: UInt(32), last: UInt(8) }
1318#pragma pack(push, 1)
1320 uint8_t last;
1321 uint32_t input;
1322 uint32_t addAmt;
1323};
1324#pragma pack(pop)
1325static_assert(sizeof(StreamingAddArg) == 9,
1326 "StreamingAddArg must be 9 bytes packed");
1327
1328/// Packed struct representing a parallel window result for StreamingAdder.
1329/// Layout in SystemVerilog (so it must be reversed in C):
1330/// { data: UInt(32), last: UInt(8) }
1331#pragma pack(push, 1)
1333 uint8_t last;
1334 uint32_t data;
1335};
1336#pragma pack(pop)
1337static_assert(sizeof(StreamingAddResult) == 5,
1338 "StreamingAddResult must be 5 bytes packed");
1339
1340/// Test the StreamingAdder module. This module takes a struct containing
1341/// an add_amt and a list of uint32s, adds add_amt to each element, and
1342/// returns the resulting list. The data is streamed using windowed types.
1344 uint32_t addAmt, uint32_t numItems) {
1345 Logger &logger = conn->getLogger();
1346 logger.info("esitester", "Starting streaming add test with add_amt=" +
1347 std::to_string(addAmt) +
1348 ", num_items=" + std::to_string(numItems));
1349
1350 // Generate random input data.
1351 std::mt19937 rng(0xDEADBEEF);
1352 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1353 std::vector<uint32_t> inputData;
1354 inputData.reserve(numItems);
1355 for (uint32_t i = 0; i < numItems; ++i)
1356 inputData.push_back(dist(rng));
1357
1358 // Find the streaming_adder child.
1359 auto streamingAdderChild =
1360 accel->getChildren().find(AppID("streaming_adder"));
1361 if (streamingAdderChild == accel->getChildren().end())
1362 throw std::runtime_error(
1363 "Streaming add test: no 'streaming_adder' child found");
1364
1365 auto &ports = streamingAdderChild->second->getPorts();
1366 auto addIter = ports.find(AppID("streaming_add"));
1367 if (addIter == ports.end())
1368 throw std::runtime_error(
1369 "Streaming add test: no 'streaming_add' port found");
1370
1371 // Get the raw read/write channel ports for the windowed function.
1372 // The argument channel expects parallel windowed data where each message
1373 // contains: struct { add_amt: UInt(32), input: UInt(32), last: bool }
1374 WriteChannelPort &argPort = addIter->second.getRawWrite("arg");
1375 ReadChannelPort &resultPort = addIter->second.getRawRead("result");
1376
1377 argPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
1378 resultPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
1379
1380 // Send each list element with add_amt repeated in every message.
1381 for (size_t i = 0; i < inputData.size(); ++i) {
1382 StreamingAddArg arg;
1383 arg.addAmt = addAmt;
1384 arg.input = inputData[i];
1385 arg.last = (i == inputData.size() - 1) ? 1 : 0;
1386 argPort.write(
1387 MessageData(reinterpret_cast<const uint8_t *>(&arg), sizeof(arg)));
1388 logger.debug("esitester", "Sent {add_amt=" + std::to_string(arg.addAmt) +
1389 ", input=" + std::to_string(arg.input) +
1390 ", last=" + (arg.last ? "true" : "false") +
1391 "}");
1392 }
1393
1394 // Read the result list (also windowed).
1395 std::vector<uint32_t> results;
1396 bool lastSeen = false;
1397 while (!lastSeen) {
1398 MessageData resMsg;
1399 resultPort.read(resMsg);
1400 if (resMsg.getSize() < sizeof(StreamingAddResult))
1401 throw std::runtime_error(
1402 "Streaming add test: unexpected result message size");
1403
1404 const auto *res =
1405 reinterpret_cast<const StreamingAddResult *>(resMsg.getBytes());
1406 lastSeen = res->last != 0;
1407 results.push_back(res->data);
1408 logger.debug("esitester", "Received result=" + std::to_string(res->data) +
1409 " (last=" + (lastSeen ? "true" : "false") +
1410 ")");
1411 }
1412
1413 // Verify results.
1414 if (results.size() != inputData.size())
1415 throw std::runtime_error(
1416 "Streaming add test: result size mismatch. Expected " +
1417 std::to_string(inputData.size()) + ", got " +
1418 std::to_string(results.size()));
1419
1420 bool passed = true;
1421 std::cout << "Streaming add test results:" << std::endl;
1422 for (size_t i = 0; i < inputData.size(); ++i) {
1423 uint32_t expected = inputData[i] + addAmt;
1424 std::cout << " input[" << i << "]=" << inputData[i] << " + " << addAmt
1425 << " = " << results[i] << " (expected " << expected << ")";
1426 if (results[i] != expected) {
1427 std::cout << " MISMATCH!";
1428 passed = false;
1429 }
1430 std::cout << std::endl;
1431 }
1432
1433 argPort.disconnect();
1434 resultPort.disconnect();
1435
1436 if (!passed)
1437 throw std::runtime_error("Streaming add test failed: result mismatch");
1438
1439 logger.info("esitester", "Streaming add test passed");
1440 std::cout << "Streaming add test passed" << std::endl;
1441}
1442
1443/// Test the StreamingAdder module using message translation.
1444/// This version uses the list translation support where the message format is:
1445/// Argument: { add_amt (4 bytes), input_length (8 bytes), input_data[] }
1446/// Result: { data_length (8 bytes), data[] }
1447/// The translation layer automatically converts between this format and the
1448/// parallel windowed frames used by the hardware.
1449
1450/// Translated argument struct for StreamingAdder.
1451/// Memory layout (standard C struct ordering, fields in declaration order):
1452/// ESI type: struct { add_amt: UInt(32), input: List<UInt(32)> }
1453/// becomes host struct:
1454/// { input_length (size_t, 8 bytes on 64-bit), add_amt (uint32_t),
1455/// input_data[] }
1456/// Note: The translation layer handles the conversion between this C struct
1457/// layout and the hardware's SystemVerilog frame format.
1458/// Note: size_t is used for list lengths, so this format is platform-dependent.
1459#pragma pack(push, 1)
1462 uint32_t addAmt;
1463 // Trailing array data follows immediately after the struct in memory.
1464 // Use inputData() accessor to access it.
1465
1466 /// Get pointer to trailing input data array.
1467 uint32_t *inputData() { return reinterpret_cast<uint32_t *>(this + 1); }
1468 const uint32_t *inputData() const {
1469 return reinterpret_cast<const uint32_t *>(this + 1);
1470 }
1471 /// Get span view of input data (requires inputLength to be set first).
1472 std::span<uint32_t> inputDataSpan() { return {inputData(), inputLength}; }
1473 std::span<const uint32_t> inputDataSpan() const {
1474 return {inputData(), inputLength};
1475 }
1476
1477 static size_t allocSize(size_t numItems) {
1478 return sizeof(StreamingAddTranslatedArg) + numItems * sizeof(uint32_t);
1479 }
1480};
1481#pragma pack(pop)
1482
1483/// Translated result struct for StreamingAdder.
1484/// Memory layout:
1485/// struct { data: List<UInt(32)> }
1486/// becomes:
1487/// { data_length (size_t, 8 bytes on 64-bit), data[] }
1488#pragma pack(push, 1)
1491 // Trailing array data follows immediately after the struct in memory.
1492
1493 /// Get pointer to trailing result data array.
1494 uint32_t *data() { return reinterpret_cast<uint32_t *>(this + 1); }
1495 const uint32_t *data() const {
1496 return reinterpret_cast<const uint32_t *>(this + 1);
1497 }
1498 /// Get span view of result data (requires dataLength to be set first).
1499 std::span<uint32_t> dataSpan() { return {data(), dataLength}; }
1500 std::span<const uint32_t> dataSpan() const { return {data(), dataLength}; }
1501
1502 static size_t allocSize(size_t numItems) {
1503 return sizeof(StreamingAddTranslatedResult) + numItems * sizeof(uint32_t);
1504 }
1505};
1506#pragma pack(pop)
1507
1509 Accelerator *accel, uint32_t addAmt,
1510 uint32_t numItems) {
1511 Logger &logger = conn->getLogger();
1512 logger.info("esitester",
1513 "Starting streaming add test (translated) with add_amt=" +
1514 std::to_string(addAmt) +
1515 ", num_items=" + std::to_string(numItems));
1516
1517 // Generate random input data.
1518 std::mt19937 rng(0xDEADBEEF);
1519 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1520 std::vector<uint32_t> inputData;
1521 inputData.reserve(numItems);
1522 for (uint32_t i = 0; i < numItems; ++i)
1523 inputData.push_back(dist(rng));
1524
1525 // Find the streaming_adder child.
1526 auto streamingAdderChild =
1527 accel->getChildren().find(AppID("streaming_adder"));
1528 if (streamingAdderChild == accel->getChildren().end())
1529 throw std::runtime_error(
1530 "Streaming add test: no 'streaming_adder' child found");
1531
1532 auto &ports = streamingAdderChild->second->getPorts();
1533 auto addIter = ports.find(AppID("streaming_add"));
1534 if (addIter == ports.end())
1535 throw std::runtime_error(
1536 "Streaming add test: no 'streaming_add' port found");
1537
1538 // Get the raw read/write channel ports with translation enabled (default).
1539 WriteChannelPort &argPort = addIter->second.getRawWrite("arg");
1540 ReadChannelPort &resultPort = addIter->second.getRawRead("result");
1541
1542 // Connect with translation enabled (the default).
1543 argPort.connect();
1544 resultPort.connect();
1545
1546 // Allocate the argument struct with proper alignment for the struct members.
1547 // We use aligned_alloc to ensure the buffer meets alignment requirements.
1548 size_t argSize = StreamingAddTranslatedArg::allocSize(numItems);
1549 constexpr size_t alignment = alignof(StreamingAddTranslatedArg);
1550 // aligned_alloc requires size to be a multiple of alignment
1551 size_t allocSize = ((argSize + alignment - 1) / alignment) * alignment;
1552 void *argRaw = alignedAllocCompat(alignment, allocSize);
1553 if (!argRaw)
1554 throw std::bad_alloc();
1555 auto argDeleter = [](void *p) { alignedFreeCompat(p); };
1556 std::unique_ptr<void, decltype(argDeleter)> argBuffer(argRaw, argDeleter);
1557 auto *arg = static_cast<StreamingAddTranslatedArg *>(argRaw);
1558 arg->inputLength = numItems;
1559 arg->addAmt = addAmt;
1560 for (uint32_t i = 0; i < numItems; ++i)
1561 arg->inputData()[i] = inputData[i];
1562
1563 logger.debug("esitester",
1564 "Sending translated argument: " + std::to_string(argSize) +
1565 " bytes, list_length=" + std::to_string(arg->inputLength) +
1566 ", add_amt=" + std::to_string(arg->addAmt));
1567
1568 // Send the complete message - translation will split it into frames.
1569 argPort.write(MessageData(reinterpret_cast<const uint8_t *>(arg), argSize));
1570 // argBuffer automatically freed when it goes out of scope
1571
1572 // Read the translated result.
1573 MessageData resMsg;
1574 resultPort.read(resMsg);
1575
1576 logger.debug("esitester", "Received translated result: " +
1577 std::to_string(resMsg.getSize()) + " bytes");
1578
1579 if (resMsg.getSize() < sizeof(StreamingAddTranslatedResult))
1580 throw std::runtime_error(
1581 "Streaming add test (translated): result too small");
1582
1583 const auto *result =
1584 reinterpret_cast<const StreamingAddTranslatedResult *>(resMsg.getBytes());
1585
1586 if (resMsg.getSize() <
1587 StreamingAddTranslatedResult::allocSize(result->dataLength))
1588 throw std::runtime_error(
1589 "Streaming add test (translated): result data truncated");
1590
1591 // Verify results.
1592 if (result->dataLength != inputData.size())
1593 throw std::runtime_error(
1594 "Streaming add test (translated): result size mismatch. Expected " +
1595 std::to_string(inputData.size()) + ", got " +
1596 std::to_string(result->dataLength));
1597
1598 bool passed = true;
1599 std::cout << "Streaming add test results:" << std::endl;
1600 for (size_t i = 0; i < inputData.size(); ++i) {
1601 uint32_t expected = inputData[i] + addAmt;
1602 std::cout << " input[" << i << "]=" << inputData[i] << " + " << addAmt
1603 << " = " << result->data()[i] << " (expected " << expected << ")";
1604 if (result->data()[i] != expected) {
1605 std::cout << " MISMATCH!";
1606 passed = false;
1607 }
1608 std::cout << std::endl;
1609 }
1610
1611 argPort.disconnect();
1612 resultPort.disconnect();
1613
1614 if (!passed)
1615 throw std::runtime_error(
1616 "Streaming add test (translated) failed: result mismatch");
1617
1618 logger.info("esitester", "Streaming add test passed (translated)");
1619 std::cout << "Streaming add test passed" << std::endl;
1620}
1621
1622/// Test the CoordTranslator module using message translation.
1623/// This version uses the list translation support where the message format is:
1624/// Argument: { x_translation, y_translation, coords_length, coords[] }
1625/// Result: { coords_length, coords[] }
1626/// Each coord is a struct { x, y }.
1627
1628/// Coordinate struct for CoordTranslator.
1629/// SV ordering means y comes before x in memory.
1630#pragma pack(push, 1)
1631struct Coord {
1632 uint32_t y; // SV ordering: last declared field first in memory
1633 uint32_t x;
1634};
1635#pragma pack(pop)
1636static_assert(sizeof(Coord) == 8, "Coord must be 8 bytes packed");
1637
1638/// Translated argument struct for CoordTranslator.
1639/// Memory layout (standard C struct ordering):
1640/// ESI type: struct { x_translation: UInt(32), y_translation: UInt(32),
1641/// coords: List<struct{x, y}> }
1642/// becomes host struct:
1643/// { coords_length (size_t, 8 bytes on 64-bit), y_translation (uint32_t),
1644/// x_translation (uint32_t), coords[] }
1645/// Note: Fields are in reverse order due to SV struct ordering.
1646/// Note: size_t is used for list lengths, so this format is platform-dependent.
1647#pragma pack(push, 1)
1650 uint32_t yTranslation; // SV ordering: last declared field first in memory
1652 // Trailing array data follows immediately after the struct in memory.
1653
1654 /// Get pointer to trailing coords array.
1655 Coord *coords() { return reinterpret_cast<Coord *>(this + 1); }
1656 const Coord *coords() const {
1657 return reinterpret_cast<const Coord *>(this + 1);
1658 }
1659 /// Get span view of coords (requires coordsLength to be set first).
1660 std::span<Coord> coordsSpan() { return {coords(), coordsLength}; }
1661 std::span<const Coord> coordsSpan() const { return {coords(), coordsLength}; }
1662
1663 static size_t allocSize(size_t numCoords) {
1664 return sizeof(CoordTranslateArg) + numCoords * sizeof(Coord);
1665 }
1666};
1667#pragma pack(pop)
1668
1669/// Translated result struct for CoordTranslator.
1670/// Memory layout:
1671/// ESI type: List<struct{x, y}>
1672/// becomes host struct:
1673/// { coords_length (size_t, 8 bytes on 64-bit), coords[] }
1674#pragma pack(push, 1)
1677 // Trailing array data follows immediately after the struct in memory.
1678
1679 /// Get pointer to trailing coords array.
1680 Coord *coords() { return reinterpret_cast<Coord *>(this + 1); }
1681 const Coord *coords() const {
1682 return reinterpret_cast<const Coord *>(this + 1);
1683 }
1684 /// Get span view of coords (requires coordsLength to be set first).
1685 std::span<Coord> coordsSpan() { return {coords(), coordsLength}; }
1686 std::span<const Coord> coordsSpan() const { return {coords(), coordsLength}; }
1687
1688 static size_t allocSize(size_t numCoords) {
1689 return sizeof(CoordTranslateResult) + numCoords * sizeof(Coord);
1690 }
1691};
1692#pragma pack(pop)
1693
1695 uint32_t xTrans, uint32_t yTrans,
1696 uint32_t numCoords) {
1697 Logger &logger = conn->getLogger();
1698 logger.info("esitester", "Starting coord translate test with x_trans=" +
1699 std::to_string(xTrans) +
1700 ", y_trans=" + std::to_string(yTrans) +
1701 ", num_coords=" + std::to_string(numCoords));
1702
1703 // Generate random input coordinates.
1704 // Note: Coord struct has y before x due to SV ordering, but we generate
1705 // and display as (x, y) for human readability.
1706 std::mt19937 rng(0xDEADBEEF);
1707 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1708 std::vector<Coord> inputCoords;
1709 inputCoords.reserve(numCoords);
1710 for (uint32_t i = 0; i < numCoords; ++i) {
1711 Coord c;
1712 c.x = dist(rng);
1713 c.y = dist(rng);
1714 inputCoords.push_back(c);
1715 }
1716
1717 // Find the coord_translator child.
1718 auto coordTranslatorChild =
1719 accel->getChildren().find(AppID("coord_translator"));
1720 if (coordTranslatorChild == accel->getChildren().end())
1721 throw std::runtime_error(
1722 "Coord translate test: no 'coord_translator' child found");
1723
1724 auto &ports = coordTranslatorChild->second->getPorts();
1725 auto translateIter = ports.find(AppID("translate_coords"));
1726 if (translateIter == ports.end())
1727 throw std::runtime_error(
1728 "Coord translate test: no 'translate_coords' port found");
1729
1730 // Use FuncService::Function which handles connection and translation.
1731 auto *funcPort =
1732 translateIter->second.getAs<services::FuncService::Function>();
1733 if (!funcPort)
1734 throw std::runtime_error(
1735 "Coord translate test: 'translate_coords' port not a "
1736 "FuncService::Function");
1737 funcPort->connect();
1738
1739 // Allocate the argument struct with proper alignment for the struct members.
1740 size_t argSize = CoordTranslateArg::allocSize(numCoords);
1741 constexpr size_t alignment = alignof(CoordTranslateArg);
1742 // aligned_alloc requires size to be a multiple of alignment
1743 size_t allocSize = ((argSize + alignment - 1) / alignment) * alignment;
1744 void *argRaw = alignedAllocCompat(alignment, allocSize);
1745 if (!argRaw)
1746 throw std::bad_alloc();
1747 auto argDeleter = [](void *p) { alignedFreeCompat(p); };
1748 std::unique_ptr<void, decltype(argDeleter)> argBuffer(argRaw, argDeleter);
1749 auto *arg = static_cast<CoordTranslateArg *>(argRaw);
1750 arg->coordsLength = numCoords;
1751 arg->xTranslation = xTrans;
1752 arg->yTranslation = yTrans;
1753 for (uint32_t i = 0; i < numCoords; ++i)
1754 arg->coords()[i] = inputCoords[i];
1755
1756 logger.debug(
1757 "esitester",
1758 "Sending coord translate argument: " + std::to_string(argSize) +
1759 " bytes, coords_length=" + std::to_string(arg->coordsLength) +
1760 ", x_trans=" + std::to_string(arg->xTranslation) +
1761 ", y_trans=" + std::to_string(arg->yTranslation));
1762
1763 // Call the function - translation happens automatically.
1764 MessageData resMsg =
1765 funcPort
1766 ->call(MessageData(reinterpret_cast<const uint8_t *>(arg), argSize))
1767 .get();
1768 // argBuffer automatically freed when it goes out of scope
1769
1770 logger.debug("esitester", "Received coord translate result: " +
1771 std::to_string(resMsg.getSize()) + " bytes");
1772
1773 if (resMsg.getSize() < sizeof(CoordTranslateResult))
1774 throw std::runtime_error("Coord translate test: result too small");
1775
1776 const auto *result =
1777 reinterpret_cast<const CoordTranslateResult *>(resMsg.getBytes());
1778
1779 if (resMsg.getSize() < CoordTranslateResult::allocSize(result->coordsLength))
1780 throw std::runtime_error("Coord translate test: result data truncated");
1781
1782 // Verify results.
1783 if (result->coordsLength != inputCoords.size())
1784 throw std::runtime_error(
1785 "Coord translate test: result size mismatch. Expected " +
1786 std::to_string(inputCoords.size()) + ", got " +
1787 std::to_string(result->coordsLength));
1788
1789 bool passed = true;
1790 std::cout << "Coord translate test results:" << std::endl;
1791 for (size_t i = 0; i < inputCoords.size(); ++i) {
1792 uint32_t expectedX = inputCoords[i].x + xTrans;
1793 uint32_t expectedY = inputCoords[i].y + yTrans;
1794 std::cout << " coord[" << i << "]=(" << inputCoords[i].x << ","
1795 << inputCoords[i].y << ") + (" << xTrans << "," << yTrans
1796 << ") = (" << result->coords()[i].x << ","
1797 << result->coords()[i].y << ")";
1798 if (result->coords()[i].x != expectedX ||
1799 result->coords()[i].y != expectedY) {
1800 std::cout << " MISMATCH! (expected (" << expectedX << "," << expectedY
1801 << "))";
1802 passed = false;
1803 }
1804 std::cout << std::endl;
1805 }
1806
1807 if (!passed)
1808 throw std::runtime_error("Coord translate test failed: result mismatch");
1809
1810 logger.info("esitester", "Coord translate test passed");
1811 std::cout << "Coord translate test passed" << std::endl;
1812}
1813
1814//
1815// SerialCoordTranslator test
1816//
1817
1818#pragma pack(push, 1)
1820 uint16_t coordsCount;
1823};
1824static_assert(sizeof(SerialCoordHeader) == 10, "Size mismatch");
1826 SerialCoordData(uint32_t x, uint32_t y) : _pad_head(0), y(y), x(x) {}
1827 uint16_t _pad_head;
1828 uint32_t y;
1829 uint32_t x;
1830};
1831static_assert(sizeof(SerialCoordData) == sizeof(SerialCoordHeader),
1832 "Size mismatch");
1833#pragma pack(pop)
1834
1835// Note: this application is intended to test hardware. As such, we need
1836// to be able to send batches. So this is not the typical way one would define a
1837// message struct. It's closer to a streaming style.
1839private:
1841 std::vector<SerialCoordData> coords;
1843
1844public:
1846 header.coordsCount = 0;
1847 header.xTranslation = 0;
1848 header.yTranslation = 0;
1849 // The footer is a count==0 header that terminates the list per the ESI
1850 // bulk-transfer serial encoding. Static fields are constant within a
1851 // list so the footer's translation values are irrelevant; zero them.
1852 footer.coordsCount = 0;
1853 footer.xTranslation = 0;
1854 footer.yTranslation = 0;
1855 }
1856 void yTranslation(uint32_t yTrans) { header.yTranslation = yTrans; }
1857 uint32_t yTranslation() const { return header.yTranslation; }
1858 void xTranslation(uint32_t xTrans) { header.xTranslation = xTrans; }
1859 uint32_t xTranslation() const { return header.xTranslation; }
1860 void appendCoord(uint32_t x, uint32_t y) {
1861 coords.emplace_back(x, y);
1862 header.coordsCount = (uint16_t)coords.size();
1863 }
1864 const std::vector<SerialCoordData> &getCoords() const { return coords; }
1865
1866 size_t numSegments() const override { return 3; }
1867 Segment segment(size_t idx) const override {
1868 if (idx == 0)
1869 return {reinterpret_cast<const uint8_t *>(&header), sizeof(header)};
1870 else if (idx == 1)
1871 return {reinterpret_cast<const uint8_t *>(coords.data()),
1872 coords.size() * sizeof(SerialCoordData)};
1873 else if (idx == 2)
1874 return {reinterpret_cast<const uint8_t *>(&footer), sizeof(footer)};
1875 else
1876 throw std::out_of_range("SerialCoordInput: invalid segment index");
1877 }
1878};
1879
1880// Like SerialCoordInput but without the trailing count==0 terminator. Used
1881// when streaming multiple bursts that together comprise a single logical
1882// list; the caller is responsible for sending a separate terminator burst
1883// (a SerialCoordBurst with count==0 and no data).
1885private:
1887 std::vector<SerialCoordData> coords;
1888
1889public:
1895 void yTranslation(uint32_t yTrans) { header.yTranslation = yTrans; }
1896 void xTranslation(uint32_t xTrans) { header.xTranslation = xTrans; }
1897 void appendCoord(uint32_t x, uint32_t y) {
1898 coords.emplace_back(x, y);
1899 header.coordsCount = (uint16_t)coords.size();
1900 }
1901
1902 size_t numSegments() const override { return 2; }
1903 Segment segment(size_t idx) const override {
1904 if (idx == 0)
1905 return {reinterpret_cast<const uint8_t *>(&header), sizeof(header)};
1906 else if (idx == 1)
1907 return {reinterpret_cast<const uint8_t *>(coords.data()),
1908 coords.size() * sizeof(SerialCoordData)};
1909 else
1910 throw std::out_of_range("SerialCoordBurst: invalid segment index");
1911 }
1912};
1913
1914#pragma pack(push, 1)
1916 uint8_t _pad[6];
1917 uint16_t coordsCount;
1918};
1920 uint32_t y;
1921 uint32_t x;
1922};
1927#pragma pack(pop)
1928static_assert(sizeof(SerialCoordOutputFrame) == 8, "Size mismatch");
1929
1930/// Deserialized result batch from the serial coord translator. The
1931/// TypeDeserializer accumulates header+data frame sequences until the
1932/// zero-count footer header, then emits the complete coordinate list.
1934 std::vector<Coord> coords;
1935
1937 : public QueuedDecodeTypeDeserializer<SerialCoordOutputBatch> {
1938 public:
1942
1944 : Base(std::move(output)) {}
1945
1946 private:
1947 DecodedOutputs decode(std::unique_ptr<SegmentedMessageData> &msg) override {
1948 DecodedOutputs decoded;
1949
1950 MessageData scratch;
1951 const MessageData &flat =
1952 detail::getMessageDataRef<SerialCoordOutputBatch>(*msg, scratch);
1953 const uint8_t *bytes = flat.getBytes();
1954 size_t size = flat.getSize();
1955 constexpr size_t frameSize = sizeof(SerialCoordOutputFrame);
1956
1957 size_t offset = 0;
1958 while (offset < size) {
1959 size_t needed = frameSize - partialFrameBytes.size();
1960 size_t chunkSize = std::min(needed, size - offset);
1961 partialFrameBytes.insert(partialFrameBytes.end(), bytes + offset,
1962 bytes + offset + chunkSize);
1963 offset += chunkSize;
1964
1965 if (partialFrameBytes.size() != frameSize)
1966 break;
1967
1969 std::memcpy(&frame, partialFrameBytes.data(), frameSize);
1970 partialFrameBytes.clear();
1971
1972 if (remainingCoords == 0) {
1973 // Header frame.
1974 uint16_t batchCount = frame.header.coordsCount;
1975 if (batchCount == 0) {
1976 // Footer: end of list. Emit accumulated coordinates.
1977 auto batch = std::make_unique<SerialCoordOutputBatch>();
1978 batch->coords = std::move(accumulated);
1979 accumulated.clear();
1980 decoded.push_back(std::move(batch));
1981 msg.reset();
1982 return decoded;
1983 }
1984 remainingCoords = batchCount;
1985 continue;
1986 }
1987 // Data frame.
1988 accumulated.push_back({frame.data.y, frame.data.x});
1990 }
1991
1992 msg.reset();
1993 return decoded;
1994 }
1995
1996 std::vector<Coord> accumulated;
1997 std::vector<uint8_t> partialFrameBytes;
1999 };
2000};
2001
2003 Accelerator *accel, uint32_t xTrans,
2004 uint32_t yTrans, uint32_t numCoords,
2005 size_t batchSizeLimit) {
2006 Logger &logger = conn->getLogger();
2007 logger.info("esitester", "Starting Serial coord translate test");
2008
2009 // Generate random coordinates.
2010 std::mt19937 rng(0xDEADBEEF);
2011 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
2012 std::vector<Coord> inputCoords;
2013 inputCoords.reserve(numCoords);
2014 for (uint32_t i = 0; i < numCoords; ++i)
2015 inputCoords.push_back({dist(rng), dist(rng)});
2016
2017 auto child = accel->getChildren().find(AppID("coord_translator_serial"));
2018 if (child == accel->getChildren().end())
2019 throw std::runtime_error("Serial coord translate test: no "
2020 "'coord_translator_serial' child found");
2021
2022 auto &ports = child->second->getPorts();
2023 auto portIter = ports.find(AppID("translate_coords_serial"));
2024 if (portIter == ports.end())
2025 throw std::runtime_error(
2026 "Serial coord translate test: no 'translate_coords_serial' port found");
2027
2028 TypedWritePort<SerialCoordBurst, /*SkipTypeCheck=*/true> argPort(
2029 portIter->second.getRawWrite("arg"));
2030 // Use the raw read port so we can verify the multi-burst output framing
2031 // explicitly rather than relying on the typed deserializer to accumulate
2032 // frames until the terminator.
2033 ReadChannelPort &resultRaw = portIter->second.getRawRead("result");
2034
2035 argPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
2036 // Use an unlimited read queue so the device output isn't stalled by a full
2037 // queue while we're still writing. With raw reads (translateMessage=false),
2038 // each output frame becomes its own queued message, so the default 32-msg
2039 // limit can be hit easily on a multi-burst run.
2040 resultRaw.connect(ChannelPort::ConnectOptions(/*bufferSize=*/0,
2041 /*translateMessage=*/false));
2042
2043 size_t sent = 0;
2044 while (sent < numCoords) {
2045 size_t batchSize = std::min(batchSizeLimit, numCoords - sent);
2046
2047 // Send Header. Only the first header needs the translation values, test
2048 // the subsequent ones with zero translation to verify that the hardware
2049 // correctly applies the first header's translation to the whole list.
2050 auto batch = std::make_unique<SerialCoordBurst>();
2051 batch->xTranslation(sent == 0 ? xTrans : 0);
2052 batch->yTranslation(sent == 0 ? yTrans : 0);
2053 // Send Data
2054 for (size_t i = 0; i < batchSize; ++i) {
2055 batch->appendCoord(inputCoords[sent + i].x, inputCoords[sent + i].y);
2056 }
2057 argPort.write(batch);
2058 sent += batchSize;
2059 }
2060 // Send final header with count=0 to signal end of input.
2061 auto footerBurst = std::make_unique<SerialCoordBurst>();
2062 argPort.write(footerBurst);
2063
2064 // Read raw output frames, walking the bulk-transfer wire format: zero or
2065 // more (HDR(N) + N data frames) sequences followed by a single HDR(0)
2066 // terminator. Each `read()` returns whatever the transport layer has
2067 // available, which is not guaranteed to align with frame boundaries
2068 // (e.g., DMA channel engines may coalesce or split across frames). So
2069 // we accumulate bytes into a buffer and only consume whole frames.
2070 constexpr size_t frameSize = sizeof(SerialCoordOutputFrame);
2071 std::vector<uint8_t> rxBuf;
2072 auto readFrame = [&](SerialCoordOutputFrame &out) {
2073 while (rxBuf.size() < frameSize) {
2074 MessageData data;
2075 resultRaw.read(data);
2076 rxBuf.insert(rxBuf.end(), data.getBytes(),
2077 data.getBytes() + data.getSize());
2078 }
2079 std::memcpy(&out, rxBuf.data(), frameSize);
2080 rxBuf.erase(rxBuf.begin(), rxBuf.begin() + frameSize);
2081 };
2082
2083 std::vector<Coord> results;
2084 results.reserve(numCoords);
2085 while (true) {
2087 readFrame(hdr);
2088 uint16_t batchCount = hdr.header.coordsCount;
2089 if (batchCount == 0)
2090 break;
2091 for (uint16_t i = 0; i < batchCount; ++i) {
2092 SerialCoordOutputFrame frame{};
2093 readFrame(frame);
2094 results.push_back({frame.data.y, frame.data.x});
2095 }
2096 }
2097
2098 // Verify
2099 bool passed = true;
2100 std::cout << "Serial coord translate test results:" << std::endl;
2101 if (results.size() != inputCoords.size()) {
2102 std::cout << "Result size mismatch. Expected " << inputCoords.size()
2103 << ", got " << results.size() << std::endl;
2104 passed = false;
2105 }
2106 for (size_t i = 0; i < std::min(inputCoords.size(), results.size()); ++i) {
2107 uint32_t expX = inputCoords[i].x + xTrans;
2108 uint32_t expY = inputCoords[i].y + yTrans;
2109 std::cout << " coord[" << i << "]=(" << inputCoords[i].x << ","
2110 << inputCoords[i].y << ") + (" << xTrans << "," << yTrans
2111 << ") = (" << results[i].x << "," << results[i].y
2112 << ") (expected (" << expX << "," << expY << "))";
2113 if (results[i].x != expX || results[i].y != expY) {
2114 std::cout << " MISMATCH!";
2115 passed = false;
2116 }
2117 std::cout << std::endl;
2118 }
2119
2120 argPort.disconnect();
2121 resultRaw.disconnect();
2122
2123 if (!passed)
2124 throw std::runtime_error("Serial coord translate test failed");
2125
2126 logger.info("esitester", "Serial coord translate test passed");
2127 std::cout << "Serial coord translate test passed" << std::endl;
2128}
2129
2130//
2131// AutoSerialCoordTranslator test
2132//
2133// The hardware module pipes the input through ListWindowToParallel ->
2134// per-coordinate translation -> ListWindowToSerial. The conversion modules
2135// emit one or more bulk transfers per call (each `header(count>0)` followed
2136// by `count` data frames) terminated by a `header(count==0)` footer per the
2137// ESI WindowField serial-encoding spec. This test:
2138// * Sends exactly one input batch: header(numCoords) + numCoords data
2139// frames + header(0) footer.
2140// * Reads back: a sequence of one-or-more `header(count>0) + count data`
2141// bursts terminated by `header(0)`. Use raw frame reads since the
2142// canonical `SerialCoordOutputBatch` deserializer hasn't been wired in
2143// for the converter pair.
2144//
2146 Accelerator *accel, uint32_t xTrans,
2147 uint32_t yTrans, uint32_t numCoords) {
2148 Logger &logger = conn->getLogger();
2149 logger.info("esitester", "Starting Auto serial coord translate test");
2150
2151 // Generate random coordinates.
2152 std::mt19937 rng(0xDEADBEEF);
2153 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
2154 std::vector<Coord> inputCoords;
2155 inputCoords.reserve(numCoords);
2156 for (uint32_t i = 0; i < numCoords; ++i)
2157 inputCoords.push_back({dist(rng), dist(rng)});
2158
2159 auto child = accel->getChildren().find(AppID("coord_translator_auto_serial"));
2160 if (child == accel->getChildren().end())
2161 throw std::runtime_error("Auto serial coord translate test: no "
2162 "'coord_translator_auto_serial' child found");
2163
2164 auto &ports = child->second->getPorts();
2165 auto portIter = ports.find(AppID("translate_coords_auto_serial"));
2166 if (portIter == ports.end())
2167 throw std::runtime_error("Auto serial coord translate test: no "
2168 "'translate_coords_auto_serial' port found");
2169
2170 // Reuse SerialCoordInput: the input wire format is identical (header with
2171 // x/y_translation+count, followed by data frames each carrying one coord).
2172 TypedWritePort<SerialCoordInput, /*SkipTypeCheck=*/true> argPort(
2173 portIter->second.getRawWrite("arg"));
2174 argPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
2175
2176 // Use the raw read port for results: read one header frame then numCoords
2177 // data frames as raw `SerialCoordOutputFrame`-shaped messages. Disable
2178 // window-message translation so we get one frame per `read()` instead of
2179 // assembled higher-level messages.
2180 ReadChannelPort &resultRaw = portIter->second.getRawRead("result");
2181 // Use an unlimited read queue so the device output isn't stalled by a full
2182 // queue while we're still writing. With raw reads (translateMessage=false),
2183 // each output frame becomes its own queued message, so the default 32-msg
2184 // limit can be hit easily on a multi-frame run.
2185 resultRaw.connect(ChannelPort::ConnectOptions(/*bufferSize=*/0,
2186 /*translateMessage=*/false));
2187
2188 // Send a single header+data burst.
2189 auto batch = std::make_unique<SerialCoordInput>();
2190 batch->xTranslation(xTrans);
2191 batch->yTranslation(yTrans);
2192 for (uint32_t i = 0; i < numCoords; ++i)
2193 batch->appendCoord(inputCoords[i].x, inputCoords[i].y);
2194 argPort.write(batch);
2195
2196 // Helper: read one raw frame, accumulating bytes across `read()` calls
2197 // since transports such as DMA channel engines do not guarantee that
2198 // each `read()` returns exactly one frame.
2199 constexpr size_t frameSize = sizeof(SerialCoordOutputFrame);
2200 std::vector<uint8_t> rxBuf;
2201 auto readFrame = [&](SerialCoordOutputFrame &out) {
2202 while (rxBuf.size() < frameSize) {
2203 MessageData data;
2204 resultRaw.read(data);
2205 rxBuf.insert(rxBuf.end(), data.getBytes(),
2206 data.getBytes() + data.getSize());
2207 }
2208 std::memcpy(&out, rxBuf.data(), frameSize);
2209 rxBuf.erase(rxBuf.begin(), rxBuf.begin() + frameSize);
2210 };
2211
2212 // Read a sequence of one-or-more `header(count>0) + count data` bursts
2213 // followed by a `header(count==0)` terminator footer. Total data items
2214 // received across all bursts must equal numCoords.
2215 std::vector<Coord> results;
2216 results.reserve(numCoords);
2217 while (true) {
2219 readFrame(hdr);
2220 uint16_t burstCount = hdr.header.coordsCount;
2221 if (burstCount == 0)
2222 break;
2223 if (results.size() + burstCount > numCoords)
2224 throw std::runtime_error(
2225 "Auto serial coord translate test: bursts overflow expected total " +
2226 std::to_string(numCoords));
2227 for (uint32_t i = 0; i < burstCount; ++i) {
2228 SerialCoordOutputFrame frame{};
2229 readFrame(frame);
2230 results.push_back({frame.data.y, frame.data.x});
2231 }
2232 }
2233 if (results.size() != numCoords)
2234 throw std::runtime_error("Auto serial coord translate test: got " +
2235 std::to_string(results.size()) +
2236 " coords across all bursts " + "(expected " +
2237 std::to_string(numCoords) + ")");
2238
2239 argPort.disconnect();
2240 resultRaw.disconnect();
2241
2242 bool passed = true;
2243 std::cout << "Auto serial coord translate test results:" << std::endl;
2244 for (size_t i = 0; i < inputCoords.size(); ++i) {
2245 uint32_t expX = inputCoords[i].x + xTrans;
2246 uint32_t expY = inputCoords[i].y + yTrans;
2247 std::cout << " coord[" << i << "]=(" << inputCoords[i].x << ","
2248 << inputCoords[i].y << ") + (" << xTrans << "," << yTrans
2249 << ") = (" << results[i].x << "," << results[i].y
2250 << ") (expected (" << expX << "," << expY << "))";
2251 if (results[i].x != expX || results[i].y != expY) {
2252 std::cout << " MISMATCH!";
2253 passed = false;
2254 }
2255 std::cout << std::endl;
2256 }
2257
2258 if (!passed)
2259 throw std::runtime_error("Auto serial coord translate test failed");
2260
2261 logger.info("esitester", "Auto serial coord translate test passed");
2262 std::cout << "Auto serial coord translate test passed" << std::endl;
2263}
2264
2266 uint32_t iterations) {
2267 Logger &logger = conn->getLogger();
2268
2269 auto channelChild = accel->getChildren().find(AppID("channel_test"));
2270 if (channelChild == accel->getChildren().end())
2271 throw std::runtime_error("Channel test: no 'channel_test' child");
2272 auto &ports = channelChild->second->getPorts();
2273
2274 // --- Get the MMIO port to trigger the producer ---
2275 auto cmdIter = ports.find(AppID("cmd"));
2276 if (cmdIter == ports.end())
2277 throw std::runtime_error("Channel test: no 'cmd' port");
2278 auto *cmdMMIO = cmdIter->second.getAs<services::MMIO::MMIORegion>();
2279 if (!cmdMMIO)
2280 throw std::runtime_error("Channel test: 'cmd' is not MMIO");
2281
2282 // --- Get the producer to_host port ---
2283 auto producerIter = ports.find(AppID("producer"));
2284 if (producerIter == ports.end())
2285 throw std::runtime_error("Channel test: no 'producer' port");
2286 auto *producerPort =
2287 producerIter->second.getAs<services::ChannelService::ToHost>();
2288 if (!producerPort)
2289 throw std::runtime_error(
2290 "Channel test: 'producer' is not a ChannelService::ToHost");
2291 producerPort->connect();
2292
2293 // --- Test to_host: MMIO-triggered incrementing values ---
2294 // Write the number of values to send at offset 0x0.
2295 cmdMMIO->write(0x0, iterations);
2296
2297 for (uint32_t i = 0; i < iterations; ++i) {
2298 MessageData recvData = producerPort->read().get();
2299 uint32_t got = *recvData.as<uint32_t>();
2300 std::cout << "[channel] producer i=" << i << " got=" << got << std::endl;
2301 if (got != i)
2302 throw std::runtime_error("Channel producer: expected " +
2303 std::to_string(i) + ", got " +
2304 std::to_string(got));
2305 }
2306 logger.info("esitester", "Channel test: producer passed (" +
2307 std::to_string(iterations) +
2308 " incrementing values)");
2309
2310 // --- Test from_host -> to_host loopback ---
2311 auto loopbackInIter = ports.find(AppID("loopback_in"));
2312 if (loopbackInIter == ports.end())
2313 throw std::runtime_error("Channel test: no 'loopback_in' port");
2314 auto *fromHostPort =
2315 loopbackInIter->second.getAs<services::ChannelService::FromHost>();
2316 if (!fromHostPort)
2317 throw std::runtime_error(
2318 "Channel test: 'loopback_in' is not a ChannelService::FromHost");
2319 fromHostPort->connect();
2320
2321 auto loopbackOutIter = ports.find(AppID("loopback_out"));
2322 if (loopbackOutIter == ports.end())
2323 throw std::runtime_error("Channel test: no 'loopback_out' port");
2324 auto *loopbackOutPort =
2325 loopbackOutIter->second.getAs<services::ChannelService::ToHost>();
2326 if (!loopbackOutPort)
2327 throw std::runtime_error(
2328 "Channel test: 'loopback_out' is not a ChannelService::ToHost");
2329 loopbackOutPort->connect();
2330
2331 std::mt19937_64 rng(0xDEADBEEF);
2332 std::uniform_int_distribution<uint32_t> dist(0, UINT32_MAX);
2333
2334 for (uint32_t i = 0; i < iterations; ++i) {
2335 uint32_t sendVal = dist(rng);
2336 fromHostPort->write(MessageData::from(sendVal));
2337 MessageData recvData = loopbackOutPort->read().get();
2338 uint32_t recvVal = *recvData.as<uint32_t>();
2339 std::cout << "[channel] loopback i=" << i << " sent=0x"
2340 << esi::toHex(sendVal) << " recv=0x" << esi::toHex(recvVal)
2341 << std::endl;
2342 if (recvVal != sendVal)
2343 throw std::runtime_error("Channel loopback mismatch at i=" +
2344 std::to_string(i));
2345 }
2346
2347 logger.info("esitester", "Channel test: loopback passed (" +
2348 std::to_string(iterations) + " iterations)");
2349 std::cout << "Channel test passed" << std::endl;
2350}
static void print(TypedAttr val, llvm::raw_ostream &os)
DecodedOutputs decode(std::unique_ptr< SegmentedMessageData > &msg) override
Decode one raw message into zero or more typed outputs.
Abstract class representing a connection to an accelerator.
Definition Accelerator.h:89
Top level accelerator class.
Definition Accelerator.h:70
Services provide connections to 'bundles' – collections of named, unidirectional communication channe...
Definition Ports.h:611
T * getAs() const
Cast this Bundle port to a subclass which is actually useful.
Definition Ports.h:639
ReadChannelPort & getRawRead(const std::string &name) const
Definition Ports.cpp:52
WriteChannelPort & getRawWrite(const std::string &name) const
Get access to the raw byte streams of a channel.
Definition Ports.cpp:42
Common options and code for ESI runtime tools.
Definition CLI.h:29
Context & getContext()
Get the context.
Definition CLI.h:69
AcceleratorConnection * connect()
Connect to the accelerator using the specified backend and connection.
Definition CLI.h:66
int esiParse(int argc, const char **argv)
Run the parser.
Definition CLI.h:52
AcceleratorConnections, Accelerators, and Manifests must all share a context.
Definition Context.h:34
Logger & getLogger()
Definition Context.h:69
const std::map< AppID, Instance * > & getChildren() const
Access the module's children by ID.
Definition Design.h:71
virtual void error(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an error.
Definition Logging.h:64
virtual void info(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an informational message.
Definition Logging.h:75
void debug(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report a debug message.
Definition Logging.h:83
Class to parse a manifest.
Definition Manifest.h:39
Accelerator * buildAccelerator(AcceleratorConnection &acc) const
A concrete flat message backed by a single vector of bytes.
Definition Common.h:155
const uint8_t * getBytes() const
Definition Common.h:166
const T * as() const
Cast to a type.
Definition Common.h:190
size_t getSize() const
Get the size of the data in bytes.
Definition Common.h:180
static MessageData from(T &t)
Cast from a type to its raw bytes.
Definition Common.h:200
Helper base class for stateful deserializers which may emit zero, one, or many typed outputs for each...
Definition TypedPorts.h:246
detail::TypedReadOwnedCallback< SerialCoordOutputBatch > OutputCallback
Definition TypedPorts.h:248
std::vector< std::unique_ptr< SerialCoordOutputBatch > > DecodedOutputs
Definition TypedPorts.h:249
A ChannelPort which reads data from the accelerator.
Definition Ports.h:453
virtual void connect(ReadCallback callback, const ConnectOptions &options={})
Definition Ports.cpp:140
virtual void disconnect() override
Disconnect the channel.
Definition Ports.cpp:70
virtual void read(MessageData &outData)
Specify a buffer to read into.
Definition Ports.h:517
Abstract multi-segment message.
Definition Common.h:133
void connect(const ChannelPort::ConnectOptions &opts={std::nullopt, false})
Definition TypedPorts.h:626
void write(const T &data)
Definition TypedPorts.h:636
A ChannelPort which sends data to the accelerator.
Definition Ports.h:308
virtual void disconnect() override
Definition Ports.h:322
void write(const MessageData &data)
A very basic blocking write API.
Definition Ports.h:327
bool tryWrite(const MessageData &data)
A basic non-blocking write API.
Definition Ports.h:357
virtual void connect(const ConnectOptions &options={}) override
Set up a connection to the accelerator.
Definition Ports.h:312
A function call which gets attached to a service port.
Definition Services.h:405
A port which writes data to the accelerator (from_host).
Definition Services.h:315
A port which reads data from the accelerator (to_host).
Definition Services.h:291
A function call which gets attached to a service port.
Definition Services.h:353
virtual void start()
In cases where necessary, enable host memory services.
Definition Services.h:261
A "slice" of some parent MMIO space.
Definition Services.h:181
Information about the Accelerator system.
Definition Services.h:113
A telemetry port which gets attached to a service port.
Definition Services.h:470
void connect()
Connect to a particular telemetry port. Offset should be non-nullopt.
Definition Services.cpp:459
static void * alignedAllocCompat(std::size_t alignment, std::size_t size)
static void hostmemWriteTest(Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width)
Test the hostmem write functionality.
static void aggregateHostmemBandwidthTest(AcceleratorConnection *, Accelerator *, uint32_t width, uint32_t xferCount, bool read, bool write)
static void dmaTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool read, bool write)
static void hostmemBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, uint32_t xferCount, const std::vector< uint32_t > &widths, bool read, bool write)
static void callbackTest(AcceleratorConnection *, Accelerator *, uint32_t iterations)
static void bandwidthTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, uint32_t xferCount, bool read, bool write)
static void serialCoordTranslateTest(AcceleratorConnection *, Accelerator *, uint32_t xTrans, uint32_t yTrans, uint32_t numCoords, size_t batchSizeLimit)
constexpr std::array< uint32_t, 5 > defaultWidths
Definition esitester.cpp:83
static void hostmemReadBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width, uint32_t xferCount)
static void bandwidthReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static void channelTest(AcceleratorConnection *, Accelerator *, uint32_t iterations)
static std::string formatBandwidth(double bytesPerSec)
Definition esitester.cpp:95
static void autoSerialCoordTranslateTest(AcceleratorConnection *, Accelerator *, uint32_t xTrans, uint32_t yTrans, uint32_t numCoords)
static void hostmemWriteBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width, uint32_t xferCount)
static void alignedFreeCompat(void *ptr)
static void dmaWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void bandwidthWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static std::string humanBytes(uint64_t bytes)
static void streamingAddTest(AcceleratorConnection *, Accelerator *, uint32_t addAmt, uint32_t numItems)
Test the StreamingAdder module.
static void loopbackAddTest(AcceleratorConnection *, Accelerator *, uint32_t iterations, bool pipeline)
static void dmaReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void streamingAddTranslatedTest(AcceleratorConnection *, Accelerator *, uint32_t addAmt, uint32_t numItems)
static void hostmemTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool write, bool read)
static std::string humanTimeUS(uint64_t us)
int main(int argc, const char *argv[])
static void coordTranslateTest(AcceleratorConnection *, Accelerator *, uint32_t xTrans, uint32_t yTrans, uint32_t numCoords)
static std::string defaultWidthsStr()
Definition esitester.cpp:84
static void hostmemReadTest(Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width)
Definition debug.py:1
Definition esi.py:1
std::string toString(const std::any &a)
'Stringify' a std::any. This is used to log std::any values by some loggers.
Definition Logging.cpp:132
std::string toHex(void *val)
Definition Common.cpp:37
Translated argument struct for CoordTranslator.
std::span< const Coord > coordsSpan() const
const Coord * coords() const
static size_t allocSize(size_t numCoords)
Coord * coords()
Get pointer to trailing coords array.
std::span< Coord > coordsSpan()
Get span view of coords (requires coordsLength to be set first).
Translated result struct for CoordTranslator.
static size_t allocSize(size_t numCoords)
std::span< Coord > coordsSpan()
Get span view of coords (requires coordsLength to be set first).
const Coord * coords() const
Coord * coords()
Get pointer to trailing coords array.
std::span< const Coord > coordsSpan() const
Test the CoordTranslator module using message translation.
uint32_t x
uint32_t y
void yTranslation(uint32_t yTrans)
void appendCoord(uint32_t x, uint32_t y)
std::vector< SerialCoordData > coords
void xTranslation(uint32_t xTrans)
SerialCoordHeader header
Segment segment(size_t idx) const override
Get a segment by index.
size_t numSegments() const override
Number of segments in the message.
SerialCoordData(uint32_t x, uint32_t y)
size_t numSegments() const override
Number of segments in the message.
void appendCoord(uint32_t x, uint32_t y)
uint32_t yTranslation() const
SerialCoordHeader header
void yTranslation(uint32_t yTrans)
SerialCoordHeader footer
uint32_t xTranslation() const
Segment segment(size_t idx) const override
Get a segment by index.
const std::vector< SerialCoordData > & getCoords() const
void xTranslation(uint32_t xTrans)
std::vector< SerialCoordData > coords
Deserialized result batch from the serial coord translator.
std::vector< Coord > coords
Packed struct representing a parallel window argument for StreamingAdder.
Packed struct representing a parallel window result for StreamingAdder.
Test the StreamingAdder module using message translation.
uint32_t * inputData()
Get pointer to trailing input data array.
static size_t allocSize(size_t numItems)
std::span< uint32_t > inputDataSpan()
Get span view of input data (requires inputLength to be set first).
std::span< const uint32_t > inputDataSpan() const
const uint32_t * inputData() const
Translated result struct for StreamingAdder.
uint32_t * data()
Get pointer to trailing result data array.
std::span< uint32_t > dataSpan()
Get span view of result data (requires dataLength to be set first).
static size_t allocSize(size_t numItems)
std::span< const uint32_t > dataSpan() const
const uint32_t * data() const
A contiguous, non-owning view of bytes within a SegmentedMessageData.
Definition Common.h:118
size_t size
Definition Common.h:120
RAII memory region for host memory.
Definition Services.h:237
virtual void * getDevicePtr() const
Sometimes the pointer the device sees is different from the pointer the host sees.
Definition Services.h:243
virtual void * getPtr() const =0
Get a pointer to the host memory.
virtual void flush()
Flush the memory region to ensure that the device sees the latest contents.
Definition Services.h:251
virtual std::size_t getSize() const =0
SerialCoordOutputData data
SerialCoordOutputHeader header