CIRCT 23.0.0git
Loading...
Searching...
No Matches
esitester.cpp
Go to the documentation of this file.
1//===- esitester.cpp - ESI accelerator test/example tool ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// DO NOT EDIT!
10// This file is distributed as part of an ESI runtime package. The source for
11// this file should always be modified within CIRCT
12// (lib/dialect/ESI/runtime/cpp/tools/esitester.cpp).
13//
14//===----------------------------------------------------------------------===//
15//
16// This application isn't a utility so much as a test driver for an ESI system.
17// It is also useful as an example of how to use the ESI C++ API. esiquery.cpp
18// is also useful as an example.
19//
20//===----------------------------------------------------------------------===//
21
22#include "esi/Accelerator.h"
23#include "esi/CLI.h"
24#include "esi/Manifest.h"
25#include "esi/Services.h"
26
27#include <atomic>
28#include <chrono>
29#include <cstdlib>
30#include <future>
31#include <iostream>
32#include <map>
33#include <memory>
34#include <random>
35#include <span>
36#include <sstream>
37#include <stdexcept>
38#include <vector>
39
40using namespace esi;
41
42// Forward declarations of test functions.
44 uint32_t iterations);
46 const std::vector<uint32_t> &widths, bool write,
47 bool read);
49 uint32_t xferCount,
50 const std::vector<uint32_t> &widths, bool read,
51 bool write);
53 const std::vector<uint32_t> &widths, bool read, bool write);
55 const std::vector<uint32_t> &widths,
56 uint32_t xferCount, bool read, bool write);
58 uint32_t iterations, bool pipeline);
60 Accelerator *, uint32_t width,
61 uint32_t xferCount, bool read,
62 bool write);
64 uint32_t addAmt, uint32_t numItems);
66 uint32_t addAmt, uint32_t numItems);
68 uint32_t xTrans, uint32_t yTrans,
69 uint32_t numCoords);
71 uint32_t xTrans, uint32_t yTrans,
72 uint32_t numCoords, size_t batchSizeLimit);
73
74// Default widths and default widths string for CLI help text.
75constexpr std::array<uint32_t, 5> defaultWidths = {32, 64, 128, 256, 512};
76static std::string defaultWidthsStr() {
77 std::string s;
78 for (size_t i = 0; i < defaultWidths.size(); ++i) {
79 s += std::to_string(defaultWidths[i]);
80 if (i + 1 < defaultWidths.size())
81 s += ",";
82 }
83 return s;
84}
85
86// Helper to format bandwidth with appropriate units.
87static std::string formatBandwidth(double bytesPerSec) {
88 const char *unit = "B/s";
89 double value = bytesPerSec;
90 if (bytesPerSec >= 1e9) {
91 unit = "GB/s";
92 value = bytesPerSec / 1e9;
93 } else if (bytesPerSec >= 1e6) {
94 unit = "MB/s";
95 value = bytesPerSec / 1e6;
96 } else if (bytesPerSec >= 1e3) {
97 unit = "KB/s";
98 value = bytesPerSec / 1e3;
99 }
100 std::ostringstream oss;
101 oss.setf(std::ios::fixed);
102 oss.precision(2);
103 oss << value << " " << unit;
104 return oss.str();
105}
106
107// Human-readable size from bytes.
108static std::string humanBytes(uint64_t bytes) {
109 const char *units[] = {"B", "KB", "MB", "GB", "TB"};
110 double v = (double)bytes;
111 int u = 0;
112 while (v >= 1024.0 && u < 4) {
113 v /= 1024.0;
114 ++u;
115 }
116 std::ostringstream oss;
117 oss.setf(std::ios::fixed);
118 oss.precision(u == 0 ? 0 : 2);
119 oss << v << " " << units[u];
120 return oss.str();
121}
122
123// Human-readable time from microseconds.
124static std::string humanTimeUS(uint64_t us) {
125 if (us < 1000)
126 return std::to_string(us) + " us";
127 double ms = us / 1000.0;
128 if (ms < 1000.0) {
129 std::ostringstream oss;
130 oss.setf(std::ios::fixed);
131 oss.precision(ms < 10.0 ? 2 : (ms < 100.0 ? 1 : 0));
132 oss << ms << " ms";
133 return oss.str();
134 }
135 double sec = ms / 1000.0;
136 std::ostringstream oss;
137 oss.setf(std::ios::fixed);
138 oss.precision(sec < 10.0 ? 3 : 2);
139 oss << sec << " s";
140 return oss.str();
141}
142
143// MSVC does not implement std::aligned_malloc, even though it's part of the
144// C++17 standard. Provide a compatibility layer.
145static void *alignedAllocCompat(std::size_t alignment, std::size_t size) {
146#if defined(_MSC_VER)
147 void *ptr = _aligned_malloc(size, alignment);
148 if (!ptr)
149 throw std::bad_alloc();
150 return ptr;
151#else
152 void *ptr = std::aligned_alloc(alignment, size);
153 if (!ptr)
154 throw std::bad_alloc();
155 return ptr;
156#endif
157}
158
159static void alignedFreeCompat(void *ptr) {
160#if defined(_MSC_VER)
161 _aligned_free(ptr);
162#else
163 std::free(ptr);
164#endif
165}
166
167int main(int argc, const char *argv[]) {
168 CliParser cli("esitester");
169 cli.description("Test an ESI system running the ESI tester image.");
170 cli.require_subcommand(1);
171
172 CLI::App *callback_test =
173 cli.add_subcommand("callback", "initiate callback test");
174 uint32_t cb_iters = 1;
175 callback_test->add_option("-i,--iters", cb_iters,
176 "Number of iterations to run");
177
178 CLI::App *hostmemtestSub =
179 cli.add_subcommand("hostmem", "Run the host memory test");
180 bool hmRead = false;
181 bool hmWrite = false;
182 std::vector<uint32_t> hostmemWidths(defaultWidths.begin(),
183 defaultWidths.end());
184 hostmemtestSub->add_flag("-w,--write", hmWrite,
185 "Enable host memory write test");
186 hostmemtestSub->add_flag("-r,--read", hmRead, "Enable host memory read test");
187 hostmemtestSub->add_option(
188 "--widths", hostmemWidths,
189 "Hostmem test widths (default: " + defaultWidthsStr() + ")");
190
191 CLI::App *dmatestSub = cli.add_subcommand("dma", "Run the DMA test");
192 bool dmaRead = false;
193 bool dmaWrite = false;
194 std::vector<uint32_t> dmaWidths(defaultWidths.begin(), defaultWidths.end());
195 dmatestSub->add_flag("-w,--write", dmaWrite, "Enable dma write test");
196 dmatestSub->add_flag("-r,--read", dmaRead, "Enable dma read test");
197 dmatestSub->add_option("--widths", dmaWidths,
198 "DMA test widths (default: " + defaultWidthsStr() +
199 ")");
200
201 CLI::App *bandwidthSub =
202 cli.add_subcommand("bandwidth", "Run the bandwidth test");
203 uint32_t xferCount = 1000;
204 bandwidthSub->add_option("-c,--count", xferCount,
205 "Number of transfers to perform");
206 bool bandwidthRead = false;
207 bool bandwidthWrite = false;
208 std::vector<uint32_t> bandwidthWidths(defaultWidths.begin(),
209 defaultWidths.end());
210 bandwidthSub->add_option("--widths", bandwidthWidths,
211 "Width of the transfers to perform (default: " +
212 defaultWidthsStr() + ")");
213 bandwidthSub->add_flag("-w,--write", bandwidthWrite,
214 "Enable bandwidth write");
215 bandwidthSub->add_flag("-r,--read", bandwidthRead, "Enable bandwidth read");
216
217 CLI::App *hostmembwSub =
218 cli.add_subcommand("hostmembw", "Run the host memory bandwidth test");
219 uint32_t hmBwCount = 1000;
220 bool hmBwRead = false;
221 bool hmBwWrite = false;
222 std::vector<uint32_t> hmBwWidths(defaultWidths.begin(), defaultWidths.end());
223 hostmembwSub->add_option("-c,--count", hmBwCount,
224 "Number of hostmem transfers");
225 hostmembwSub->add_option(
226 "--widths", hmBwWidths,
227 "Hostmem bandwidth widths (default: " + defaultWidthsStr() + ")");
228 hostmembwSub->add_flag("-w,--write", hmBwWrite,
229 "Measure hostmem write bandwidth");
230 hostmembwSub->add_flag("-r,--read", hmBwRead,
231 "Measure hostmem read bandwidth");
232
233 CLI::App *loopbackSub =
234 cli.add_subcommand("loopback", "Test LoopbackInOutAdd function service");
235 uint32_t loopbackIters = 10;
236 bool loopbackPipeline = false;
237 loopbackSub->add_option("-i,--iters", loopbackIters,
238 "Number of function invocations (default 10)");
239 loopbackSub->add_flag("-p,--pipeline", loopbackPipeline,
240 "Pipeline all calls then collect results");
241
242 CLI::App *aggBwSub = cli.add_subcommand(
243 "aggbandwidth",
244 "Aggregate hostmem bandwidth across four units (readmem*, writemem*)");
245 uint32_t aggWidth = 512;
246 uint32_t aggCount = 1000;
247 bool aggRead = false;
248 bool aggWrite = false;
249 aggBwSub->add_option(
250 "--width", aggWidth,
251 "Bit width (default 512; other widths ignored if absent)");
252 aggBwSub->add_option("-c,--count", aggCount, "Flits per unit (default 1000)");
253 aggBwSub->add_flag("-r,--read", aggRead, "Include read units");
254 aggBwSub->add_flag("-w,--write", aggWrite, "Include write units");
255
256 CLI::App *streamingAddSub = cli.add_subcommand(
257 "streaming_add", "Test StreamingAdder function service with list input");
258 uint32_t streamingAddAmt = 5;
259 uint32_t streamingNumItems = 5;
260 bool streamingTranslate = false;
261 streamingAddSub->add_option("-a,--add", streamingAddAmt,
262 "Amount to add to each element (default 5)");
263 streamingAddSub->add_option("-n,--num-items", streamingNumItems,
264 "Number of random items in the list (default 5)");
265 streamingAddSub->add_flag("-t,--translate", streamingTranslate,
266 "Use message translation (list translation)");
267
268 CLI::App *coordTranslateSub = cli.add_subcommand(
269 "translate_coords",
270 "Test CoordTranslator function service with list of coordinates");
271 uint32_t coordXTrans = 10;
272 uint32_t coordYTrans = 20;
273 uint32_t coordNumItems = 5;
274 coordTranslateSub->add_option("-x,--x-translation", coordXTrans,
275 "X translation amount (default 10)");
276 coordTranslateSub->add_option("-y,--y-translation", coordYTrans,
277 "Y translation amount (default 20)");
278 coordTranslateSub->add_option("-n,--num-coords", coordNumItems,
279 "Number of random coordinates (default 5)");
280
281 CLI::App *serialCoordTranslateSub = cli.add_subcommand(
282 "serial_coords",
283 "Test SerialCoordTranslator function service with list of coordinates");
284 uint32_t serialBatchSize = 240;
285 serialCoordTranslateSub->add_option("-x,--x-translation", coordXTrans,
286 "X translation amount (default 10)");
287 serialCoordTranslateSub->add_option("-y,--y-translation", coordYTrans,
288 "Y translation amount (default 20)");
289 serialCoordTranslateSub->add_option(
290 "-n,--num-coords", coordNumItems,
291 "Number of random coordinates (default 5)");
292 serialCoordTranslateSub
293 ->add_option("-b,--batch-size", serialBatchSize,
294 "Coordinates per header (default 240, max 65535)")
295 ->check(CLI::Range(1u, 0xFFFFu));
296
297 if (int rc = cli.esiParse(argc, argv))
298 return rc;
299 if (!cli.get_help_ptr()->empty())
300 return 0;
301
302 Context &ctxt = cli.getContext();
303 AcceleratorConnection *acc = cli.connect();
304 try {
305 const auto &info = *acc->getService<services::SysInfo>();
306 ctxt.getLogger().info("esitester", "Connected to accelerator.");
307 Manifest manifest(ctxt, info.getJsonManifest());
308 Accelerator *accel = manifest.buildAccelerator(*acc);
309 ctxt.getLogger().info("esitester", "Built accelerator.");
310 acc->getServiceThread()->addPoll(*accel);
311
312 if (*callback_test) {
313 callbackTest(acc, accel, cb_iters);
314 } else if (*hostmemtestSub) {
315 hostmemTest(acc, accel, hostmemWidths, hmWrite, hmRead);
316 } else if (*loopbackSub) {
317 loopbackAddTest(acc, accel, loopbackIters, loopbackPipeline);
318 } else if (*dmatestSub) {
319 dmaTest(acc, accel, dmaWidths, dmaRead, dmaWrite);
320 } else if (*bandwidthSub) {
321 bandwidthTest(acc, accel, bandwidthWidths, xferCount, bandwidthRead,
322 bandwidthWrite);
323 } else if (*hostmembwSub) {
324 hostmemBandwidthTest(acc, accel, hmBwCount, hmBwWidths, hmBwRead,
325 hmBwWrite);
326 } else if (*aggBwSub) {
327 aggregateHostmemBandwidthTest(acc, accel, aggWidth, aggCount, aggRead,
328 aggWrite);
329 } else if (*streamingAddSub) {
330 if (streamingTranslate)
331 streamingAddTranslatedTest(acc, accel, streamingAddAmt,
332 streamingNumItems);
333 else
334 streamingAddTest(acc, accel, streamingAddAmt, streamingNumItems);
335 } else if (*coordTranslateSub) {
336 coordTranslateTest(acc, accel, coordXTrans, coordYTrans, coordNumItems);
337 } else if (*serialCoordTranslateSub) {
338 serialCoordTranslateTest(acc, accel, coordXTrans, coordYTrans,
339 coordNumItems, serialBatchSize);
340 }
341
342 acc->disconnect();
343 } catch (std::exception &e) {
344 ctxt.getLogger().error("esitester", e.what());
345 acc->disconnect();
346 return -1;
347 }
348 std::cout << "Exiting successfully\n";
349 return 0;
350}
351
353 uint32_t iterations) {
354 auto cb_test = accel->getChildren().find(AppID("cb_test"));
355 if (cb_test == accel->getChildren().end())
356 throw std::runtime_error("No cb_test child found in accelerator");
357 auto &ports = cb_test->second->getPorts();
358 auto cmd_port = ports.find(AppID("cmd"));
359 if (cmd_port == ports.end())
360 throw std::runtime_error("No cmd port found in cb_test child");
361 auto *cmdMMIO = cmd_port->second.getAs<services::MMIO::MMIORegion>();
362 if (!cmdMMIO)
363 throw std::runtime_error("cb_test cmd port is not MMIO");
364
365 auto f = ports.find(AppID("cb"));
366 if (f == ports.end())
367 throw std::runtime_error("No cb port found in accelerator");
368
369 auto *callPort = f->second.getAs<services::CallService::Callback>();
370 if (!callPort)
371 throw std::runtime_error("cb port is not a CallService::Callback");
372
373 std::atomic<uint32_t> callbackCount = 0;
374 callPort->connect(
375 [conn, &callbackCount](const MessageData &data) mutable -> MessageData {
376 callbackCount.fetch_add(1);
377 conn->getLogger().debug(
378 [&](std::string &subsystem, std::string &msg,
379 std::unique_ptr<std::map<std::string, std::any>> &details) {
380 subsystem = "ESITESTER";
381 msg = "Received callback";
382 details = std::make_unique<std::map<std::string, std::any>>();
383 details->emplace("data", data);
384 });
385 std::cout << "callback: " << *data.as<uint64_t>() << std::endl;
386 return MessageData();
387 },
388 true);
389
390 for (uint32_t i = 0; i < iterations; ++i) {
391 conn->getLogger().info("esitester", "Issuing callback command iteration " +
392 std::to_string(i) + "/" +
393 std::to_string(iterations));
394 cmdMMIO->write(0x10, i); // Command the callback
395 // Wait up to 1 second for the callback to be invoked.
396 for (uint32_t wait = 0; wait < 1000; ++wait) {
397 if (callbackCount.load() > i)
398 break;
399 std::this_thread::sleep_for(std::chrono::milliseconds(1));
400 }
401 if (callbackCount.load() <= i)
402 throw std::runtime_error("Callback test failed. No callback received");
403 }
404}
405
406/// Test the hostmem write functionality.
409 uint32_t width) {
410 std::cout << "Running hostmem WRITE test with width " << width << std::endl;
411 uint64_t *dataPtr = static_cast<uint64_t *>(region.getPtr());
412 auto check = [&](bool print) {
413 bool ret = true;
414 for (size_t i = 0; i < 9; ++i) {
415 if (print)
416 printf("[write] dataPtr[%zu] = 0x%016lx\n", i, dataPtr[i]);
417 if (i < (width + 63) / 64 && dataPtr[i] == 0xFFFFFFFFFFFFFFFFull)
418 ret = false;
419 }
420 return ret;
421 };
422
423 auto writeMemChildIter = acc->getChildren().find(AppID("writemem", width));
424 if (writeMemChildIter == acc->getChildren().end())
425 throw std::runtime_error(
426 "hostmem write test failed. No writemem child found");
427 auto &writeMemPorts = writeMemChildIter->second->getPorts();
428
429 auto cmdPortIter = writeMemPorts.find(AppID("cmd", width));
430 if (cmdPortIter == writeMemPorts.end())
431 throw std::runtime_error(
432 "hostmem write test failed. No (cmd,width) MMIO port");
433 auto *cmdMMIO = cmdPortIter->second.getAs<services::MMIO::MMIORegion>();
434 if (!cmdMMIO)
435 throw std::runtime_error(
436 "hostmem write test failed. (cmd,width) port not MMIO");
437
438 auto issuedPortIter = writeMemPorts.find(AppID("addrCmdIssued"));
439 if (issuedPortIter == writeMemPorts.end())
440 throw std::runtime_error(
441 "hostmem write test failed. addrCmdIssued missing");
442 auto *addrCmdIssuedPort =
443 issuedPortIter->second.getAs<services::TelemetryService::Metric>();
444 if (!addrCmdIssuedPort)
445 throw std::runtime_error(
446 "hostmem write test failed. addrCmdIssued not telemetry");
447 addrCmdIssuedPort->connect();
448
449 auto responsesPortIter = writeMemPorts.find(AppID("addrCmdResponses"));
450 if (responsesPortIter == writeMemPorts.end())
451 throw std::runtime_error(
452 "hostmem write test failed. addrCmdResponses missing");
453 auto *addrCmdResponsesPort =
454 responsesPortIter->second.getAs<services::TelemetryService::Metric>();
455 if (!addrCmdResponsesPort)
456 throw std::runtime_error(
457 "hostmem write test failed. addrCmdResponses not telemetry");
458 addrCmdResponsesPort->connect();
459
460 for (size_t i = 0, e = 9; i < e; ++i)
461 dataPtr[i] = 0xFFFFFFFFFFFFFFFFull;
462 region.flush();
463 cmdMMIO->write(0x10, reinterpret_cast<uint64_t>(region.getDevicePtr()));
464 cmdMMIO->write(0x18, 1);
465 cmdMMIO->write(0x20, 1);
466 bool done = false;
467 for (int i = 0; i < 100; ++i) {
468 auto issued = addrCmdIssuedPort->readInt();
469 auto responses = addrCmdResponsesPort->readInt();
470 if (issued == 1 && responses == 1) {
471 done = true;
472 break;
473 }
474 std::this_thread::sleep_for(std::chrono::microseconds(100));
475 }
476 if (!done) {
477 check(true);
478 throw std::runtime_error("hostmem write test (" + std::to_string(width) +
479 " bits) timeout waiting for completion");
480 }
481 if (!check(true))
482 throw std::runtime_error("hostmem write test failed (" +
483 std::to_string(width) + " bits)");
484}
485
488 uint32_t width) {
489 std::cout << "Running hostmem READ test with width " << width << std::endl;
490 auto readMemChildIter = acc->getChildren().find(AppID("readmem", width));
491 if (readMemChildIter == acc->getChildren().end())
492 throw std::runtime_error(
493 "hostmem read test failed. No readmem child found");
494
495 auto &readMemPorts = readMemChildIter->second->getPorts();
496 auto addrCmdPortIter = readMemPorts.find(AppID("cmd", width));
497 if (addrCmdPortIter == readMemPorts.end())
498 throw std::runtime_error(
499 "hostmem read test failed. No AddressCommand MMIO port");
500 auto *addrCmdMMIO =
501 addrCmdPortIter->second.getAs<services::MMIO::MMIORegion>();
502 if (!addrCmdMMIO)
503 throw std::runtime_error(
504 "hostmem read test failed. AddressCommand port not MMIO");
505
506 auto lastReadPortIter = readMemPorts.find(AppID("lastReadLSB"));
507 if (lastReadPortIter == readMemPorts.end())
508 throw std::runtime_error("hostmem read test failed. lastReadLSB missing");
509 auto *lastReadPort =
510 lastReadPortIter->second.getAs<services::TelemetryService::Metric>();
511 if (!lastReadPort)
512 throw std::runtime_error(
513 "hostmem read test failed. lastReadLSB not telemetry");
514 lastReadPort->connect();
515
516 auto issuedPortIter = readMemPorts.find(AppID("addrCmdIssued"));
517 if (issuedPortIter == readMemPorts.end())
518 throw std::runtime_error("hostmem read test failed. addrCmdIssued missing");
519 auto *addrCmdIssuedPort =
520 issuedPortIter->second.getAs<services::TelemetryService::Metric>();
521 if (!addrCmdIssuedPort)
522 throw std::runtime_error(
523 "hostmem read test failed. addrCmdIssued not telemetry");
524 addrCmdIssuedPort->connect();
525
526 auto responsesPortIter = readMemPorts.find(AppID("addrCmdResponses"));
527 if (responsesPortIter == readMemPorts.end())
528 throw std::runtime_error(
529 "hostmem read test failed. addrCmdResponses missing");
530 auto *addrCmdResponsesPort =
531 responsesPortIter->second.getAs<services::TelemetryService::Metric>();
532 if (!addrCmdResponsesPort)
533 throw std::runtime_error(
534 "hostmem read test failed. addrCmdResponses not telemetry");
535 addrCmdResponsesPort->connect();
536
537 for (size_t i = 0; i < 8; ++i) {
538 auto *dataPtr = static_cast<uint64_t *>(region.getPtr());
539 dataPtr[0] = 0x12345678ull << i;
540 dataPtr[1] = 0xDEADBEEFull << i;
541 region.flush();
542 addrCmdMMIO->write(0x10, reinterpret_cast<uint64_t>(region.getDevicePtr()));
543 addrCmdMMIO->write(0x18, 1);
544 addrCmdMMIO->write(0x20, 1);
545 bool done = false;
546 for (int waitLoop = 0; waitLoop < 100; ++waitLoop) {
547 auto issued = addrCmdIssuedPort->readInt();
548 auto responses = addrCmdResponsesPort->readInt();
549 if (issued == 1 && responses == 1) {
550 done = true;
551 break;
552 }
553 std::this_thread::sleep_for(std::chrono::milliseconds(10));
554 }
555 if (!done)
556 throw std::runtime_error("hostmem read (" + std::to_string(width) +
557 " bits) timeout waiting for completion");
558 uint64_t captured = lastReadPort->readInt();
559 uint64_t expected = dataPtr[0];
560 if (width < 64)
561 expected &= ((1ull << width) - 1);
562 if (captured != expected)
563 throw std::runtime_error("hostmem read test (" + std::to_string(width) +
564 " bits) failed. Expected " +
565 esi::toHex(expected) + ", got " +
566 esi::toHex(captured));
567 }
568}
569
571 const std::vector<uint32_t> &widths, bool write,
572 bool read) {
573 // Enable the host memory service.
574 auto hostmem = conn->getService<services::HostMem>();
575 hostmem->start();
576 auto scratchRegion = hostmem->allocate(/*size(bytes)=*/1024 * 1024,
577 /*memOpts=*/{.writeable = true});
578 uint64_t *dataPtr = static_cast<uint64_t *>(scratchRegion->getPtr());
579 conn->getLogger().info("esitester",
580 "Running host memory test with region size " +
581 std::to_string(scratchRegion->getSize()) +
582 " bytes at 0x" + toHex(dataPtr));
583 for (size_t i = 0; i < scratchRegion->getSize() / 8; ++i)
584 dataPtr[i] = 0;
585 scratchRegion->flush();
586
587 bool passed = true;
588 for (size_t width : widths) {
589 try {
590 if (write)
591 hostmemWriteTest(acc, *scratchRegion, width);
592 if (read)
593 hostmemReadTest(acc, *scratchRegion, width);
594 } catch (std::exception &e) {
595 conn->getLogger().error("esitester", "Hostmem test failed for width " +
596 std::to_string(width) + ": " +
597 e.what());
598 passed = false;
599 }
600 }
601 if (!passed)
602 throw std::runtime_error("Hostmem test failed");
603 std::cout << "Hostmem test passed" << std::endl;
604}
605
607 size_t width) {
608 Logger &logger = conn->getLogger();
609 logger.info("esitester",
610 "== Running DMA read test with width " + std::to_string(width));
611 AppIDPath lastPath;
612 BundlePort *toHostMMIOPort =
613 acc->resolvePort({AppID("tohostdma", width), AppID("cmd")}, lastPath);
614 if (!toHostMMIOPort)
615 throw std::runtime_error("dma read test failed. No tohostdma[" +
616 std::to_string(width) + "] found");
617 auto *toHostMMIO = toHostMMIOPort->getAs<services::MMIO::MMIORegion>();
618 if (!toHostMMIO)
619 throw std::runtime_error("dma read test failed. MMIO port is not MMIO");
620 lastPath.clear();
621 BundlePort *outPortBundle =
622 acc->resolvePort({AppID("tohostdma", width), AppID("out")}, lastPath);
623 ReadChannelPort &outPort = outPortBundle->getRawRead("data");
624 outPort.connect();
625
626 size_t xferCount = 24;
627 uint64_t last = 0;
628 MessageData data;
629 toHostMMIO->write(0, xferCount);
630 for (size_t i = 0; i < xferCount; ++i) {
631 outPort.read(data);
632 if (width == 64) {
633 uint64_t val = *data.as<uint64_t>();
634 if (val < last)
635 throw std::runtime_error("dma read test failed. Out of order data");
636 last = val;
637 }
638 logger.debug("esitester",
639 "Cycle count [" + std::to_string(i) + "] = 0x" + data.toHex());
640 }
641 outPort.disconnect();
642 std::cout << " DMA read test for " << width << " bits passed" << std::endl;
643}
644
646 size_t width) {
647 Logger &logger = conn->getLogger();
648 logger.info("esitester",
649 "Running DMA write test with width " + std::to_string(width));
650 AppIDPath lastPath;
651 BundlePort *fromHostMMIOPort =
652 acc->resolvePort({AppID("fromhostdma", width), AppID("cmd")}, lastPath);
653 if (!fromHostMMIOPort)
654 throw std::runtime_error("dma read test for " + toString(width) +
655 " bits failed. No fromhostdma[" +
656 std::to_string(width) + "] found");
657 auto *fromHostMMIO = fromHostMMIOPort->getAs<services::MMIO::MMIORegion>();
658 if (!fromHostMMIO)
659 throw std::runtime_error("dma write test for " + toString(width) +
660 " bits failed. MMIO port is not MMIO");
661 lastPath.clear();
662 BundlePort *outPortBundle =
663 acc->resolvePort({AppID("fromhostdma", width), AppID("in")}, lastPath);
664 if (!outPortBundle)
665 throw std::runtime_error("dma write test for " + toString(width) +
666 " bits failed. No out port found");
667 WriteChannelPort &writePort = outPortBundle->getRawWrite("data");
669
670 size_t xferCount = 24;
671 uint8_t *data = new uint8_t[width];
672 for (size_t i = 0; i < width / 8; ++i)
673 data[i] = 0;
674 fromHostMMIO->read(8);
675 fromHostMMIO->write(0, xferCount);
676 for (size_t i = 1; i < xferCount + 1; ++i) {
677 data[0] = i;
678 bool successWrite;
679 size_t attempts = 0;
680 do {
681 successWrite = writePort.tryWrite(MessageData(data, width / 8));
682 if (!successWrite) {
683 std::this_thread::sleep_for(std::chrono::milliseconds(10));
684 }
685 } while (!successWrite && ++attempts < 100);
686 if (!successWrite)
687 throw std::runtime_error("dma write test for " + toString(width) +
688 " bits failed. Write failed");
689 uint64_t lastReadMMIO;
690 for (size_t a = 0; a < 20; ++a) {
691 lastReadMMIO = fromHostMMIO->read(8);
692 if (lastReadMMIO == i)
693 break;
694 std::this_thread::sleep_for(std::chrono::milliseconds(10));
695 if (a >= 19)
696 throw std::runtime_error("dma write for " + toString(width) +
697 " bits test failed. Read from MMIO failed");
698 }
699 }
700 writePort.disconnect();
701 delete[] data;
702 std::cout << " DMA write test for " << width << " bits passed" << std::endl;
703}
704
706 const std::vector<uint32_t> &widths, bool read,
707 bool write) {
708 bool success = true;
709 if (write)
710 for (size_t width : widths)
711 try {
712 dmaWriteTest(conn, acc, width);
713 } catch (std::exception &e) {
714 success = false;
715 std::cerr << "DMA write test for " << width
716 << " bits failed: " << e.what() << std::endl;
717 }
718 if (read)
719 for (size_t width : widths)
720 dmaReadTest(conn, acc, width);
721 if (!success)
722 throw std::runtime_error("DMA test failed");
723 std::cout << "DMA test passed" << std::endl;
724}
725
726//
727// DMA bandwidth test
728//
729
731 size_t width, size_t xferCount) {
732
733 AppIDPath lastPath;
734 BundlePort *toHostMMIOPort =
735 acc->resolvePort({AppID("tohostdma", width), AppID("cmd")}, lastPath);
736 if (!toHostMMIOPort)
737 throw std::runtime_error("bandwidth test failed. No tohostdma[" +
738 std::to_string(width) + "] found");
739 auto *toHostMMIO = toHostMMIOPort->getAs<services::MMIO::MMIORegion>();
740 if (!toHostMMIO)
741 throw std::runtime_error("bandwidth test failed. MMIO port is not MMIO");
742 lastPath.clear();
743 BundlePort *outPortBundle =
744 acc->resolvePort({AppID("tohostdma", width), AppID("out")}, lastPath);
745 ReadChannelPort &outPort = outPortBundle->getRawRead("data");
746 outPort.connect();
747
748 Logger &logger = conn->getLogger();
749 logger.info("esitester", "Starting read bandwidth test with " +
750 std::to_string(xferCount) + " x " +
751 std::to_string(width) + " bit transfers");
752 MessageData data;
753 auto start = std::chrono::high_resolution_clock::now();
754 toHostMMIO->write(0, xferCount);
755 for (size_t i = 0; i < xferCount; ++i) {
756 outPort.read(data);
757 logger.debug(
758 [i, &data](std::string &subsystem, std::string &msg,
759 std::unique_ptr<std::map<std::string, std::any>> &details) {
760 subsystem = "esitester";
761 msg = "Cycle count [" + std::to_string(i) + "] = 0x" + data.toHex();
762 });
763 }
764 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
765 std::chrono::high_resolution_clock::now() - start);
766 double bytesPerSec =
767 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
768 logger.info("esitester",
769 " Bandwidth test: " + std::to_string(xferCount) + " x " +
770 std::to_string(width) + " bit transfers in " +
771 std::to_string(duration.count()) + " microseconds");
772 logger.info("esitester", " bandwidth: " + formatBandwidth(bytesPerSec));
773}
774
776 size_t width, size_t xferCount) {
777
778 AppIDPath lastPath;
779 BundlePort *fromHostMMIOPort =
780 acc->resolvePort({AppID("fromhostdma", width), AppID("cmd")}, lastPath);
781 if (!fromHostMMIOPort)
782 throw std::runtime_error("bandwidth test failed. No fromhostdma[" +
783 std::to_string(width) + "] found");
784 auto *fromHostMMIO = fromHostMMIOPort->getAs<services::MMIO::MMIORegion>();
785 if (!fromHostMMIO)
786 throw std::runtime_error("bandwidth test failed. MMIO port is not MMIO");
787 lastPath.clear();
788 BundlePort *inPortBundle =
789 acc->resolvePort({AppID("fromhostdma", width), AppID("in")}, lastPath);
790 WriteChannelPort &outPort = inPortBundle->getRawWrite("data");
791 outPort.connect();
792
793 Logger &logger = conn->getLogger();
794 logger.info("esitester", "Starting write bandwidth test with " +
795 std::to_string(xferCount) + " x " +
796 std::to_string(width) + " bit transfers");
797 std::vector<uint8_t> dataVec(width / 8);
798 for (size_t i = 0; i < width / 8; ++i)
799 dataVec[i] = i;
800 MessageData data(dataVec);
801 auto start = std::chrono::high_resolution_clock::now();
802 fromHostMMIO->write(0, xferCount);
803 for (size_t i = 0; i < xferCount; ++i) {
804 outPort.write(data);
805 logger.debug(
806 [i, &data](std::string &subsystem, std::string &msg,
807 std::unique_ptr<std::map<std::string, std::any>> &details) {
808 subsystem = "esitester";
809 msg = "Cycle count [" + std::to_string(i) + "] = 0x" + data.toHex();
810 });
811 }
812 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
813 std::chrono::high_resolution_clock::now() - start);
814 double bytesPerSec =
815 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
816 logger.info("esitester",
817 " Bandwidth test: " + std::to_string(xferCount) + " x " +
818 std::to_string(width) + " bit transfers in " +
819 std::to_string(duration.count()) + " microseconds");
820 logger.info("esitester", " bandwidth: " + formatBandwidth(bytesPerSec));
821}
822
824 const std::vector<uint32_t> &widths,
825 uint32_t xferCount, bool read, bool write) {
826 if (read)
827 for (uint32_t w : widths)
828 bandwidthReadTest(conn, acc, w, xferCount);
829 if (write)
830 for (uint32_t w : widths)
831 bandwidthWriteTest(conn, acc, w, xferCount);
832}
833
834//
835// Hostmem bandwidth test
836//
837
838static void
841 uint32_t width, uint32_t xferCount) {
842 Logger &logger = conn->getLogger();
843 logger.info("esitester", "Starting hostmem WRITE bandwidth test: " +
844 std::to_string(xferCount) + " x " +
845 std::to_string(width) + " bits");
846
847 auto writeMemChildIter = acc->getChildren().find(AppID("writemem", width));
848 if (writeMemChildIter == acc->getChildren().end())
849 throw std::runtime_error("hostmem write bandwidth: writemem child missing");
850 auto &writeMemPorts = writeMemChildIter->second->getPorts();
851
852 auto cmdPortIter = writeMemPorts.find(AppID("cmd", width));
853 if (cmdPortIter == writeMemPorts.end())
854 throw std::runtime_error("hostmem write bandwidth: cmd MMIO missing");
855 auto *cmdMMIO = cmdPortIter->second.getAs<services::MMIO::MMIORegion>();
856 if (!cmdMMIO)
857 throw std::runtime_error("hostmem write bandwidth: cmd not MMIO");
858
859 auto issuedIter = writeMemPorts.find(AppID("addrCmdIssued"));
860 auto respIter = writeMemPorts.find(AppID("addrCmdResponses"));
861 auto cycleCount = writeMemPorts.find(AppID("addrCmdCycles"));
862 if (issuedIter == writeMemPorts.end() || respIter == writeMemPorts.end() ||
863 cycleCount == writeMemPorts.end())
864 throw std::runtime_error("hostmem write bandwidth: telemetry missing");
865 auto *issuedPort =
866 issuedIter->second.getAs<services::TelemetryService::Metric>();
867 auto *respPort = respIter->second.getAs<services::TelemetryService::Metric>();
868 auto *cyclePort =
869 cycleCount->second.getAs<services::TelemetryService::Metric>();
870 if (!issuedPort || !respPort || !cyclePort)
871 throw std::runtime_error(
872 "hostmem write bandwidth: telemetry type mismatch");
873
874 issuedPort->connect();
875 respPort->connect();
876 cyclePort->connect();
877
878 // Initialize pattern (optional).
879 uint64_t *dataPtr = static_cast<uint64_t *>(region.getPtr());
880 size_t words = region.getSize() / 8;
881 for (size_t i = 0; i < words; ++i)
882 dataPtr[i] = i + 0xA5A50000;
883 region.flush();
884
885 auto start = std::chrono::high_resolution_clock::now();
886 // Fire off xferCount write commands (one flit each).
887 uint64_t devPtr = reinterpret_cast<uint64_t>(region.getDevicePtr());
888 cmdMMIO->write(0x10, devPtr); // address
889 cmdMMIO->write(0x18, xferCount); // flits
890 cmdMMIO->write(0x20, 1); // start
891
892 // Wait for responses counter to reach target.
893 bool completed = false;
894 for (int wait = 0; wait < 100000; ++wait) {
895 uint64_t respNow = respPort->readInt();
896 if (respNow == xferCount) {
897 completed = true;
898 break;
899 }
900 std::this_thread::sleep_for(std::chrono::microseconds(50));
901 }
902 if (!completed)
903 throw std::runtime_error("hostmem write bandwidth timeout");
904 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
905 std::chrono::high_resolution_clock::now() - start);
906 double bytesPerSec =
907 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
908 uint64_t cycles = cyclePort->readInt();
909 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
910 std::cout << "[WRITE] Hostmem bandwidth (" << std::to_string(width)
911 << "): " << formatBandwidth(bytesPerSec) << " "
912 << std::to_string(xferCount) << " flits in "
913 << std::to_string(duration.count()) << " us, "
914 << std::to_string(cycles) << " cycles, " << bytesPerCycle
915 << " bytes/cycle" << std::endl;
916}
917
918static void
921 uint32_t width, uint32_t xferCount) {
922 Logger &logger = conn->getLogger();
923 logger.info("esitester", "Starting hostmem READ bandwidth test: " +
924 std::to_string(xferCount) + " x " +
925 std::to_string(width) + " bits");
926
927 auto readMemChildIter = acc->getChildren().find(AppID("readmem", width));
928 if (readMemChildIter == acc->getChildren().end())
929 throw std::runtime_error("hostmem read bandwidth: readmem child missing");
930 auto &readMemPorts = readMemChildIter->second->getPorts();
931
932 auto cmdPortIter = readMemPorts.find(AppID("cmd", width));
933 if (cmdPortIter == readMemPorts.end())
934 throw std::runtime_error("hostmem read bandwidth: cmd MMIO missing");
935 auto *cmdMMIO = cmdPortIter->second.getAs<services::MMIO::MMIORegion>();
936 if (!cmdMMIO)
937 throw std::runtime_error("hostmem read bandwidth: cmd not MMIO");
938
939 auto issuedIter = readMemPorts.find(AppID("addrCmdIssued"));
940 auto respIter = readMemPorts.find(AppID("addrCmdResponses"));
941 auto cyclePort = readMemPorts.find(AppID("addrCmdCycles"));
942 if (issuedIter == readMemPorts.end() || respIter == readMemPorts.end() ||
943 cyclePort == readMemPorts.end())
944 throw std::runtime_error("hostmem read bandwidth: telemetry missing");
945 auto *issuedPort =
946 issuedIter->second.getAs<services::TelemetryService::Metric>();
947 auto *respPort = respIter->second.getAs<services::TelemetryService::Metric>();
948 auto *cycleCntPort =
949 cyclePort->second.getAs<services::TelemetryService::Metric>();
950 if (!issuedPort || !respPort || !cycleCntPort)
951 throw std::runtime_error("hostmem read bandwidth: telemetry type mismatch");
952 issuedPort->connect();
953 respPort->connect();
954 cycleCntPort->connect();
955
956 // Prepare memory pattern (optional).
957 uint64_t *dataPtr = static_cast<uint64_t *>(region.getPtr());
958 size_t words64 = region.getSize() / 8;
959 for (size_t i = 0; i < words64; ++i)
960 dataPtr[i] = 0xCAFEBABE0000ull + i;
961 region.flush();
962 uint64_t devPtr = reinterpret_cast<uint64_t>(region.getDevicePtr());
963 auto start = std::chrono::high_resolution_clock::now();
964
965 cmdMMIO->write(0x10, devPtr);
966 cmdMMIO->write(0x18, xferCount);
967 cmdMMIO->write(0x20, 1);
968
969 bool timeout = true;
970 for (int wait = 0; wait < 100000; ++wait) {
971 uint64_t respNow = respPort->readInt();
972 if (respNow == xferCount) {
973 timeout = false;
974 break;
975 }
976 std::this_thread::sleep_for(std::chrono::microseconds(50));
977 }
978 if (timeout)
979 throw std::runtime_error("hostmem read bandwidth timeout");
980 auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
981 std::chrono::high_resolution_clock::now() - start);
982 double bytesPerSec =
983 (double)xferCount * (width / 8.0) * 1e6 / (double)duration.count();
984 uint64_t cycles = cycleCntPort->readInt();
985 double bytesPerCycle = (double)xferCount * (width / 8.0) / (double)cycles;
986 std::cout << "[ READ] Hostmem bandwidth (" << width
987 << "): " << formatBandwidth(bytesPerSec) << ", " << xferCount
988 << " flits in " << duration.count() << " us, " << cycles
989 << " cycles, " << bytesPerCycle << " bytes/cycle" << std::endl;
990}
991
993 uint32_t xferCount,
994 const std::vector<uint32_t> &widths, bool read,
995 bool write) {
996 auto hostmemSvc = conn->getService<services::HostMem>();
997 hostmemSvc->start();
998 auto region = hostmemSvc->allocate(/*size(bytes)=*/1024 * 1024 * 1024,
999 /*memOpts=*/{.writeable = true});
1000 for (uint32_t w : widths) {
1001 if (write)
1002 hostmemWriteBandwidthTest(conn, acc, *region, w, xferCount);
1003 if (read)
1004 hostmemReadBandwidthTest(conn, acc, *region, w, xferCount);
1005 }
1006}
1007
1009 uint32_t iterations, bool pipeline) {
1010 Logger &logger = conn->getLogger();
1011 auto loopbackChild = accel->getChildren().find(AppID("loopback"));
1012 if (loopbackChild == accel->getChildren().end())
1013 throw std::runtime_error("Loopback test: no 'loopback' child");
1014 auto &ports = loopbackChild->second->getPorts();
1015 auto addIter = ports.find(AppID("add"));
1016 if (addIter == ports.end())
1017 throw std::runtime_error("Loopback test: no 'add' port");
1018
1019 // Use FuncService::Func instead of raw channels.
1020 auto *funcPort = addIter->second.getAs<services::FuncService::Function>();
1021 if (!funcPort)
1022 throw std::runtime_error(
1023 "Loopback test: 'add' port not a FuncService::Function");
1024 funcPort->connect();
1025 if (iterations == 0) {
1026 logger.info("esitester", "Loopback add test: 0 iterations (skipped)");
1027 return;
1028 }
1029 std::mt19937_64 rng(0xC0FFEE);
1030 std::uniform_int_distribution<uint32_t> dist(0, (1u << 24) - 1);
1031
1032 if (!pipeline) {
1033 auto start = std::chrono::high_resolution_clock::now();
1034 for (uint32_t i = 0; i < iterations; ++i) {
1035 uint32_t argVal = dist(rng);
1036 uint32_t expected = (argVal + 11) & 0xFFFF;
1037 uint8_t argBytes[3] = {
1038 static_cast<uint8_t>(argVal & 0xFF),
1039 static_cast<uint8_t>((argVal >> 8) & 0xFF),
1040 static_cast<uint8_t>((argVal >> 16) & 0xFF),
1041 };
1042 MessageData argMsg(argBytes, 3);
1043 MessageData resMsg = funcPort->call(argMsg).get();
1044 uint16_t got = *resMsg.as<uint16_t>();
1045 std::cout << "[loopback] i=" << i << " arg=0x" << esi::toHex(argVal)
1046 << " got=0x" << esi::toHex(got) << " exp=0x"
1047 << esi::toHex(expected) << std::endl;
1048 if (got != expected)
1049 throw std::runtime_error("Loopback mismatch (non-pipelined)");
1050 }
1051 auto end = std::chrono::high_resolution_clock::now();
1052 auto us = std::chrono::duration_cast<std::chrono::microseconds>(end - start)
1053 .count();
1054 double callsPerSec = (double)iterations * 1e6 / (double)us;
1055 logger.info("esitester", "Loopback add test passed (non-pipelined, " +
1056 std::to_string(iterations) + " calls, " +
1057 std::to_string(us) + " us, " +
1058 std::to_string(callsPerSec) + " calls/s)");
1059 } else {
1060 // Pipelined mode: launch all calls first, then collect.
1061 std::vector<std::future<MessageData>> futures;
1062 futures.reserve(iterations);
1063 std::vector<uint32_t> expectedVals;
1064 expectedVals.reserve(iterations);
1065
1066 auto issueStart = std::chrono::high_resolution_clock::now();
1067 for (uint32_t i = 0; i < iterations; ++i) {
1068 uint32_t argVal = dist(rng);
1069 uint32_t expected = (argVal + 11) & 0xFFFF;
1070 uint8_t argBytes[3] = {
1071 static_cast<uint8_t>(argVal & 0xFF),
1072 static_cast<uint8_t>((argVal >> 8) & 0xFF),
1073 static_cast<uint8_t>((argVal >> 16) & 0xFF),
1074 };
1075 futures.emplace_back(funcPort->call(MessageData(argBytes, 3)));
1076 expectedVals.emplace_back(expected);
1077 }
1078 auto issueEnd = std::chrono::high_resolution_clock::now();
1079
1080 for (uint32_t i = 0; i < iterations; ++i) {
1081 MessageData resMsg = futures[i].get();
1082 uint16_t got = *resMsg.as<uint16_t>();
1083 uint16_t exp = (uint16_t)expectedVals[i];
1084 std::cout << "[loopback-pipelined] i=" << i << " got=0x"
1085 << esi::toHex(got) << " exp=0x" << esi::toHex(exp) << std::endl;
1086 if (got != exp)
1087 throw std::runtime_error("Loopback mismatch (pipelined) idx=" +
1088 std::to_string(i));
1089 }
1090 auto collectEnd = std::chrono::high_resolution_clock::now();
1091
1092 auto issueUs = std::chrono::duration_cast<std::chrono::microseconds>(
1093 issueEnd - issueStart)
1094 .count();
1095 auto totalUs = std::chrono::duration_cast<std::chrono::microseconds>(
1096 collectEnd - issueStart)
1097 .count();
1098
1099 double issueRate = (double)iterations * 1e6 / (double)issueUs;
1100 double completionRate = (double)iterations * 1e6 / (double)totalUs;
1101
1102 logger.info("esitester", "Loopback add test passed (pipelined). Issued " +
1103 std::to_string(iterations) + " in " +
1104 std::to_string(issueUs) + " us (" +
1105 std::to_string(issueRate) +
1106 " calls/s), total " + std::to_string(totalUs) +
1107 " us (" + std::to_string(completionRate) +
1108 " calls/s effective)");
1109 }
1110}
1111
1113 Accelerator *acc, uint32_t width,
1114 uint32_t xferCount, bool read,
1115 bool write) {
1116 Logger &logger = conn->getLogger();
1117 if (!read && !write) {
1118 std::cout << "aggbandwidth: nothing to do (enable --read and/or --write)\n";
1119 return;
1120 }
1121 logger.info(
1122 "esitester",
1123 "Aggregate hostmem bandwidth start width=" + std::to_string(width) +
1124 " count=" + std::to_string(xferCount) +
1125 " read=" + (read ? "Y" : "N") + " write=" + (write ? "Y" : "N"));
1126
1127 auto hostmemSvc = conn->getService<services::HostMem>();
1128 hostmemSvc->start();
1129
1130 struct Unit {
1131 std::string prefix;
1132 bool isRead = false;
1133 bool isWrite = false;
1134 std::unique_ptr<esi::services::HostMem::HostMemRegion> region;
1135 services::TelemetryService::Metric *resp = nullptr;
1136 services::TelemetryService::Metric *cycles = nullptr;
1137 services::MMIO::MMIORegion *cmd = nullptr;
1138 bool launched = false;
1139 bool done = false;
1140 uint64_t bytes = 0;
1141 uint64_t duration_us = 0;
1142 uint64_t cycleCount = 0;
1143 std::chrono::high_resolution_clock::time_point start;
1144 };
1145 std::vector<Unit> units;
1146 const std::vector<std::string> readPrefixes = {"readmem", "readmem_0",
1147 "readmem_1", "readmem_2"};
1148 const std::vector<std::string> writePrefixes = {"writemem", "writemem_0",
1149 "writemem_1", "writemem_2"};
1150
1151 auto addUnits = [&](const std::vector<std::string> &pref, bool doRead,
1152 bool doWrite) {
1153 for (auto &p : pref) {
1154 AppID id(p, width);
1155 auto childIt = acc->getChildren().find(id);
1156 if (childIt == acc->getChildren().end())
1157 continue; // silently skip missing variants
1158 auto &ports = childIt->second->getPorts();
1159 auto cmdIt = ports.find(AppID("cmd", width));
1160 auto respIt = ports.find(AppID("addrCmdResponses"));
1161 auto cycIt = ports.find(AppID("addrCmdCycles"));
1162 if (cmdIt == ports.end() || respIt == ports.end() || cycIt == ports.end())
1163 continue;
1164 auto *cmd = cmdIt->second.getAs<services::MMIO::MMIORegion>();
1165 auto *resp = respIt->second.getAs<services::TelemetryService::Metric>();
1166 auto *cyc = cycIt->second.getAs<services::TelemetryService::Metric>();
1167 if (!cmd || !resp || !cyc)
1168 continue;
1169 resp->connect();
1170 cyc->connect();
1171 Unit u;
1172 u.prefix = p;
1173 u.isRead = doRead;
1174 u.isWrite = doWrite;
1175 u.region = hostmemSvc->allocate(1024 * 1024 * 1024, {.writeable = true});
1176 // Init pattern.
1177 uint64_t *ptr = static_cast<uint64_t *>(u.region->getPtr());
1178 size_t words = u.region->getSize() / 8;
1179 for (size_t i = 0; i < words; ++i)
1180 ptr[i] =
1181 (p[0] == 'w' ? (0xA5A500000000ull + i) : (0xCAFEBABE0000ull + i));
1182 u.region->flush();
1183 u.cmd = cmd;
1184 u.resp = resp;
1185 u.cycles = cyc;
1186 u.bytes = uint64_t(xferCount) * (width / 8);
1187 units.emplace_back(std::move(u));
1188 }
1189 };
1190 if (read)
1191 addUnits(readPrefixes, true, false);
1192 if (write)
1193 addUnits(writePrefixes, false, true);
1194 if (units.empty()) {
1195 std::cout << "aggbandwidth: no matching units present for width " << width
1196 << "\n";
1197 return;
1198 }
1199
1200 auto wallStart = std::chrono::high_resolution_clock::now();
1201 // Launch sequentially.
1202 for (auto &u : units) {
1203 uint64_t devPtr = reinterpret_cast<uint64_t>(u.region->getDevicePtr());
1204 u.cmd->write(0x10, devPtr);
1205 u.cmd->write(0x18, xferCount);
1206 u.cmd->write(0x20, 1);
1207 u.start = std::chrono::high_resolution_clock::now();
1208 u.launched = true;
1209 }
1210
1211 // Poll all until complete.
1212 const uint64_t timeoutLoops = 200000; // ~10s at 50us sleep
1213 uint64_t loops = 0;
1214 while (true) {
1215 bool allDone = true;
1216 for (auto &u : units) {
1217 if (u.done)
1218 continue;
1219 if (u.resp->readInt() == xferCount) {
1220 auto end = std::chrono::high_resolution_clock::now();
1221 u.duration_us =
1222 std::chrono::duration_cast<std::chrono::microseconds>(end - u.start)
1223 .count();
1224 u.cycleCount = u.cycles->readInt();
1225 u.done = true;
1226 } else {
1227 allDone = false;
1228 }
1229 }
1230 if (allDone)
1231 break;
1232 if (++loops >= timeoutLoops)
1233 throw std::runtime_error("aggbandwidth: timeout");
1234 std::this_thread::sleep_for(std::chrono::microseconds(50));
1235 }
1236 auto wallUs = std::chrono::duration_cast<std::chrono::microseconds>(
1237 std::chrono::high_resolution_clock::now() - wallStart)
1238 .count();
1239
1240 uint64_t totalBytes = 0;
1241 uint64_t totalReadBytes = 0;
1242 uint64_t totalWriteBytes = 0;
1243 for (auto &u : units) {
1244 totalBytes += u.bytes;
1245 if (u.isRead)
1246 totalReadBytes += u.bytes;
1247 if (u.isWrite)
1248 totalWriteBytes += u.bytes;
1249 double unitBps = (double)u.bytes * 1e6 / (double)u.duration_us;
1250 std::cout << "[agg-unit] " << u.prefix << "[" << width << "] "
1251 << (u.isRead ? "READ" : (u.isWrite ? "WRITE" : "UNK"))
1252 << " bytes=" << humanBytes(u.bytes) << " (" << u.bytes << " B)"
1253 << " time=" << humanTimeUS(u.duration_us) << " (" << u.duration_us
1254 << " us) cycles=" << u.cycleCount
1255 << " throughput=" << formatBandwidth(unitBps) << std::endl;
1256 }
1257 // Compute aggregate bandwidths as total size / total wall time (not sum of
1258 // unit throughputs).
1259 double aggReadBps =
1260 totalReadBytes ? (double)totalReadBytes * 1e6 / (double)wallUs : 0.0;
1261 double aggWriteBps =
1262 totalWriteBytes ? (double)totalWriteBytes * 1e6 / (double)wallUs : 0.0;
1263 double aggCombinedBps =
1264 totalBytes ? (double)totalBytes * 1e6 / (double)wallUs : 0.0;
1265
1266 std::cout << "[agg-total] units=" << units.size()
1267 << " read_bytes=" << humanBytes(totalReadBytes) << " ("
1268 << totalReadBytes << " B)"
1269 << " read_bw=" << formatBandwidth(aggReadBps)
1270 << " write_bytes=" << humanBytes(totalWriteBytes) << " ("
1271 << totalWriteBytes << " B)"
1272 << " write_bw=" << formatBandwidth(aggWriteBps)
1273 << " combined_bytes=" << humanBytes(totalBytes) << " ("
1274 << totalBytes << " B)"
1275 << " combined_bw=" << formatBandwidth(aggCombinedBps)
1276 << " wall_time=" << humanTimeUS(wallUs) << " (" << wallUs << " us)"
1277 << std::endl;
1278 logger.info("esitester", "Aggregate hostmem bandwidth test complete");
1279}
1280
1281/// Packed struct representing a parallel window argument for StreamingAdder.
1282/// Layout in SystemVerilog (so it must be reversed in C):
1283/// { add_amt: UInt(32), input: UInt(32), last: UInt(8) }
1284#pragma pack(push, 1)
1286 uint8_t last;
1287 uint32_t input;
1288 uint32_t addAmt;
1289};
1290#pragma pack(pop)
1291static_assert(sizeof(StreamingAddArg) == 9,
1292 "StreamingAddArg must be 9 bytes packed");
1293
1294/// Packed struct representing a parallel window result for StreamingAdder.
1295/// Layout in SystemVerilog (so it must be reversed in C):
1296/// { data: UInt(32), last: UInt(8) }
1297#pragma pack(push, 1)
1299 uint8_t last;
1300 uint32_t data;
1301};
1302#pragma pack(pop)
1303static_assert(sizeof(StreamingAddResult) == 5,
1304 "StreamingAddResult must be 5 bytes packed");
1305
1306/// Test the StreamingAdder module. This module takes a struct containing
1307/// an add_amt and a list of uint32s, adds add_amt to each element, and
1308/// returns the resulting list. The data is streamed using windowed types.
1310 uint32_t addAmt, uint32_t numItems) {
1311 Logger &logger = conn->getLogger();
1312 logger.info("esitester", "Starting streaming add test with add_amt=" +
1313 std::to_string(addAmt) +
1314 ", num_items=" + std::to_string(numItems));
1315
1316 // Generate random input data.
1317 std::mt19937 rng(0xDEADBEEF);
1318 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1319 std::vector<uint32_t> inputData;
1320 inputData.reserve(numItems);
1321 for (uint32_t i = 0; i < numItems; ++i)
1322 inputData.push_back(dist(rng));
1323
1324 // Find the streaming_adder child.
1325 auto streamingAdderChild =
1326 accel->getChildren().find(AppID("streaming_adder"));
1327 if (streamingAdderChild == accel->getChildren().end())
1328 throw std::runtime_error(
1329 "Streaming add test: no 'streaming_adder' child found");
1330
1331 auto &ports = streamingAdderChild->second->getPorts();
1332 auto addIter = ports.find(AppID("streaming_add"));
1333 if (addIter == ports.end())
1334 throw std::runtime_error(
1335 "Streaming add test: no 'streaming_add' port found");
1336
1337 // Get the raw read/write channel ports for the windowed function.
1338 // The argument channel expects parallel windowed data where each message
1339 // contains: struct { add_amt: UInt(32), input: UInt(32), last: bool }
1340 WriteChannelPort &argPort = addIter->second.getRawWrite("arg");
1341 ReadChannelPort &resultPort = addIter->second.getRawRead("result");
1342
1343 argPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
1344 resultPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
1345
1346 // Send each list element with add_amt repeated in every message.
1347 for (size_t i = 0; i < inputData.size(); ++i) {
1348 StreamingAddArg arg;
1349 arg.addAmt = addAmt;
1350 arg.input = inputData[i];
1351 arg.last = (i == inputData.size() - 1) ? 1 : 0;
1352 argPort.write(
1353 MessageData(reinterpret_cast<const uint8_t *>(&arg), sizeof(arg)));
1354 logger.debug("esitester", "Sent {add_amt=" + std::to_string(arg.addAmt) +
1355 ", input=" + std::to_string(arg.input) +
1356 ", last=" + (arg.last ? "true" : "false") +
1357 "}");
1358 }
1359
1360 // Read the result list (also windowed).
1361 std::vector<uint32_t> results;
1362 bool lastSeen = false;
1363 while (!lastSeen) {
1364 MessageData resMsg;
1365 resultPort.read(resMsg);
1366 if (resMsg.getSize() < sizeof(StreamingAddResult))
1367 throw std::runtime_error(
1368 "Streaming add test: unexpected result message size");
1369
1370 const auto *res =
1371 reinterpret_cast<const StreamingAddResult *>(resMsg.getBytes());
1372 lastSeen = res->last != 0;
1373 results.push_back(res->data);
1374 logger.debug("esitester", "Received result=" + std::to_string(res->data) +
1375 " (last=" + (lastSeen ? "true" : "false") +
1376 ")");
1377 }
1378
1379 // Verify results.
1380 if (results.size() != inputData.size())
1381 throw std::runtime_error(
1382 "Streaming add test: result size mismatch. Expected " +
1383 std::to_string(inputData.size()) + ", got " +
1384 std::to_string(results.size()));
1385
1386 bool passed = true;
1387 std::cout << "Streaming add test results:" << std::endl;
1388 for (size_t i = 0; i < inputData.size(); ++i) {
1389 uint32_t expected = inputData[i] + addAmt;
1390 std::cout << " input[" << i << "]=" << inputData[i] << " + " << addAmt
1391 << " = " << results[i] << " (expected " << expected << ")";
1392 if (results[i] != expected) {
1393 std::cout << " MISMATCH!";
1394 passed = false;
1395 }
1396 std::cout << std::endl;
1397 }
1398
1399 argPort.disconnect();
1400 resultPort.disconnect();
1401
1402 if (!passed)
1403 throw std::runtime_error("Streaming add test failed: result mismatch");
1404
1405 logger.info("esitester", "Streaming add test passed");
1406 std::cout << "Streaming add test passed" << std::endl;
1407}
1408
1409/// Test the StreamingAdder module using message translation.
1410/// This version uses the list translation support where the message format is:
1411/// Argument: { add_amt (4 bytes), input_length (8 bytes), input_data[] }
1412/// Result: { data_length (8 bytes), data[] }
1413/// The translation layer automatically converts between this format and the
1414/// parallel windowed frames used by the hardware.
1415
1416/// Translated argument struct for StreamingAdder.
1417/// Memory layout (standard C struct ordering, fields in declaration order):
1418/// ESI type: struct { add_amt: UInt(32), input: List<UInt(32)> }
1419/// becomes host struct:
1420/// { input_length (size_t, 8 bytes on 64-bit), add_amt (uint32_t),
1421/// input_data[] }
1422/// Note: The translation layer handles the conversion between this C struct
1423/// layout and the hardware's SystemVerilog frame format.
1424/// Note: size_t is used for list lengths, so this format is platform-dependent.
1425#pragma pack(push, 1)
1428 uint32_t addAmt;
1429 // Trailing array data follows immediately after the struct in memory.
1430 // Use inputData() accessor to access it.
1431
1432 /// Get pointer to trailing input data array.
1433 uint32_t *inputData() { return reinterpret_cast<uint32_t *>(this + 1); }
1434 const uint32_t *inputData() const {
1435 return reinterpret_cast<const uint32_t *>(this + 1);
1436 }
1437 /// Get span view of input data (requires inputLength to be set first).
1438 std::span<uint32_t> inputDataSpan() { return {inputData(), inputLength}; }
1439 std::span<const uint32_t> inputDataSpan() const {
1440 return {inputData(), inputLength};
1441 }
1442
1443 static size_t allocSize(size_t numItems) {
1444 return sizeof(StreamingAddTranslatedArg) + numItems * sizeof(uint32_t);
1445 }
1446};
1447#pragma pack(pop)
1448
1449/// Translated result struct for StreamingAdder.
1450/// Memory layout:
1451/// struct { data: List<UInt(32)> }
1452/// becomes:
1453/// { data_length (size_t, 8 bytes on 64-bit), data[] }
1454#pragma pack(push, 1)
1457 // Trailing array data follows immediately after the struct in memory.
1458
1459 /// Get pointer to trailing result data array.
1460 uint32_t *data() { return reinterpret_cast<uint32_t *>(this + 1); }
1461 const uint32_t *data() const {
1462 return reinterpret_cast<const uint32_t *>(this + 1);
1463 }
1464 /// Get span view of result data (requires dataLength to be set first).
1465 std::span<uint32_t> dataSpan() { return {data(), dataLength}; }
1466 std::span<const uint32_t> dataSpan() const { return {data(), dataLength}; }
1467
1468 static size_t allocSize(size_t numItems) {
1469 return sizeof(StreamingAddTranslatedResult) + numItems * sizeof(uint32_t);
1470 }
1471};
1472#pragma pack(pop)
1473
1475 Accelerator *accel, uint32_t addAmt,
1476 uint32_t numItems) {
1477 Logger &logger = conn->getLogger();
1478 logger.info("esitester",
1479 "Starting streaming add test (translated) with add_amt=" +
1480 std::to_string(addAmt) +
1481 ", num_items=" + std::to_string(numItems));
1482
1483 // Generate random input data.
1484 std::mt19937 rng(0xDEADBEEF);
1485 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1486 std::vector<uint32_t> inputData;
1487 inputData.reserve(numItems);
1488 for (uint32_t i = 0; i < numItems; ++i)
1489 inputData.push_back(dist(rng));
1490
1491 // Find the streaming_adder child.
1492 auto streamingAdderChild =
1493 accel->getChildren().find(AppID("streaming_adder"));
1494 if (streamingAdderChild == accel->getChildren().end())
1495 throw std::runtime_error(
1496 "Streaming add test: no 'streaming_adder' child found");
1497
1498 auto &ports = streamingAdderChild->second->getPorts();
1499 auto addIter = ports.find(AppID("streaming_add"));
1500 if (addIter == ports.end())
1501 throw std::runtime_error(
1502 "Streaming add test: no 'streaming_add' port found");
1503
1504 // Get the raw read/write channel ports with translation enabled (default).
1505 WriteChannelPort &argPort = addIter->second.getRawWrite("arg");
1506 ReadChannelPort &resultPort = addIter->second.getRawRead("result");
1507
1508 // Connect with translation enabled (the default).
1509 argPort.connect();
1510 resultPort.connect();
1511
1512 // Allocate the argument struct with proper alignment for the struct members.
1513 // We use aligned_alloc to ensure the buffer meets alignment requirements.
1514 size_t argSize = StreamingAddTranslatedArg::allocSize(numItems);
1515 constexpr size_t alignment = alignof(StreamingAddTranslatedArg);
1516 // aligned_alloc requires size to be a multiple of alignment
1517 size_t allocSize = ((argSize + alignment - 1) / alignment) * alignment;
1518 void *argRaw = alignedAllocCompat(alignment, allocSize);
1519 if (!argRaw)
1520 throw std::bad_alloc();
1521 auto argDeleter = [](void *p) { alignedFreeCompat(p); };
1522 std::unique_ptr<void, decltype(argDeleter)> argBuffer(argRaw, argDeleter);
1523 auto *arg = static_cast<StreamingAddTranslatedArg *>(argRaw);
1524 arg->inputLength = numItems;
1525 arg->addAmt = addAmt;
1526 for (uint32_t i = 0; i < numItems; ++i)
1527 arg->inputData()[i] = inputData[i];
1528
1529 logger.debug("esitester",
1530 "Sending translated argument: " + std::to_string(argSize) +
1531 " bytes, list_length=" + std::to_string(arg->inputLength) +
1532 ", add_amt=" + std::to_string(arg->addAmt));
1533
1534 // Send the complete message - translation will split it into frames.
1535 argPort.write(MessageData(reinterpret_cast<const uint8_t *>(arg), argSize));
1536 // argBuffer automatically freed when it goes out of scope
1537
1538 // Read the translated result.
1539 MessageData resMsg;
1540 resultPort.read(resMsg);
1541
1542 logger.debug("esitester", "Received translated result: " +
1543 std::to_string(resMsg.getSize()) + " bytes");
1544
1545 if (resMsg.getSize() < sizeof(StreamingAddTranslatedResult))
1546 throw std::runtime_error(
1547 "Streaming add test (translated): result too small");
1548
1549 const auto *result =
1550 reinterpret_cast<const StreamingAddTranslatedResult *>(resMsg.getBytes());
1551
1552 if (resMsg.getSize() <
1553 StreamingAddTranslatedResult::allocSize(result->dataLength))
1554 throw std::runtime_error(
1555 "Streaming add test (translated): result data truncated");
1556
1557 // Verify results.
1558 if (result->dataLength != inputData.size())
1559 throw std::runtime_error(
1560 "Streaming add test (translated): result size mismatch. Expected " +
1561 std::to_string(inputData.size()) + ", got " +
1562 std::to_string(result->dataLength));
1563
1564 bool passed = true;
1565 std::cout << "Streaming add test results:" << std::endl;
1566 for (size_t i = 0; i < inputData.size(); ++i) {
1567 uint32_t expected = inputData[i] + addAmt;
1568 std::cout << " input[" << i << "]=" << inputData[i] << " + " << addAmt
1569 << " = " << result->data()[i] << " (expected " << expected << ")";
1570 if (result->data()[i] != expected) {
1571 std::cout << " MISMATCH!";
1572 passed = false;
1573 }
1574 std::cout << std::endl;
1575 }
1576
1577 argPort.disconnect();
1578 resultPort.disconnect();
1579
1580 if (!passed)
1581 throw std::runtime_error(
1582 "Streaming add test (translated) failed: result mismatch");
1583
1584 logger.info("esitester", "Streaming add test passed (translated)");
1585 std::cout << "Streaming add test passed" << std::endl;
1586}
1587
1588/// Test the CoordTranslator module using message translation.
1589/// This version uses the list translation support where the message format is:
1590/// Argument: { x_translation, y_translation, coords_length, coords[] }
1591/// Result: { coords_length, coords[] }
1592/// Each coord is a struct { x, y }.
1593
1594/// Coordinate struct for CoordTranslator.
1595/// SV ordering means y comes before x in memory.
1596#pragma pack(push, 1)
1597struct Coord {
1598 uint32_t y; // SV ordering: last declared field first in memory
1599 uint32_t x;
1600};
1601#pragma pack(pop)
1602static_assert(sizeof(Coord) == 8, "Coord must be 8 bytes packed");
1603
1604/// Translated argument struct for CoordTranslator.
1605/// Memory layout (standard C struct ordering):
1606/// ESI type: struct { x_translation: UInt(32), y_translation: UInt(32),
1607/// coords: List<struct{x, y}> }
1608/// becomes host struct:
1609/// { coords_length (size_t, 8 bytes on 64-bit), y_translation (uint32_t),
1610/// x_translation (uint32_t), coords[] }
1611/// Note: Fields are in reverse order due to SV struct ordering.
1612/// Note: size_t is used for list lengths, so this format is platform-dependent.
1613#pragma pack(push, 1)
1616 uint32_t yTranslation; // SV ordering: last declared field first in memory
1618 // Trailing array data follows immediately after the struct in memory.
1619
1620 /// Get pointer to trailing coords array.
1621 Coord *coords() { return reinterpret_cast<Coord *>(this + 1); }
1622 const Coord *coords() const {
1623 return reinterpret_cast<const Coord *>(this + 1);
1624 }
1625 /// Get span view of coords (requires coordsLength to be set first).
1626 std::span<Coord> coordsSpan() { return {coords(), coordsLength}; }
1627 std::span<const Coord> coordsSpan() const { return {coords(), coordsLength}; }
1628
1629 static size_t allocSize(size_t numCoords) {
1630 return sizeof(CoordTranslateArg) + numCoords * sizeof(Coord);
1631 }
1632};
1633#pragma pack(pop)
1634
1635/// Translated result struct for CoordTranslator.
1636/// Memory layout:
1637/// ESI type: List<struct{x, y}>
1638/// becomes host struct:
1639/// { coords_length (size_t, 8 bytes on 64-bit), coords[] }
1640#pragma pack(push, 1)
1643 // Trailing array data follows immediately after the struct in memory.
1644
1645 /// Get pointer to trailing coords array.
1646 Coord *coords() { return reinterpret_cast<Coord *>(this + 1); }
1647 const Coord *coords() const {
1648 return reinterpret_cast<const Coord *>(this + 1);
1649 }
1650 /// Get span view of coords (requires coordsLength to be set first).
1651 std::span<Coord> coordsSpan() { return {coords(), coordsLength}; }
1652 std::span<const Coord> coordsSpan() const { return {coords(), coordsLength}; }
1653
1654 static size_t allocSize(size_t numCoords) {
1655 return sizeof(CoordTranslateResult) + numCoords * sizeof(Coord);
1656 }
1657};
1658#pragma pack(pop)
1659
1661 uint32_t xTrans, uint32_t yTrans,
1662 uint32_t numCoords) {
1663 Logger &logger = conn->getLogger();
1664 logger.info("esitester", "Starting coord translate test with x_trans=" +
1665 std::to_string(xTrans) +
1666 ", y_trans=" + std::to_string(yTrans) +
1667 ", num_coords=" + std::to_string(numCoords));
1668
1669 // Generate random input coordinates.
1670 // Note: Coord struct has y before x due to SV ordering, but we generate
1671 // and display as (x, y) for human readability.
1672 std::mt19937 rng(0xDEADBEEF);
1673 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1674 std::vector<Coord> inputCoords;
1675 inputCoords.reserve(numCoords);
1676 for (uint32_t i = 0; i < numCoords; ++i) {
1677 Coord c;
1678 c.x = dist(rng);
1679 c.y = dist(rng);
1680 inputCoords.push_back(c);
1681 }
1682
1683 // Find the coord_translator child.
1684 auto coordTranslatorChild =
1685 accel->getChildren().find(AppID("coord_translator"));
1686 if (coordTranslatorChild == accel->getChildren().end())
1687 throw std::runtime_error(
1688 "Coord translate test: no 'coord_translator' child found");
1689
1690 auto &ports = coordTranslatorChild->second->getPorts();
1691 auto translateIter = ports.find(AppID("translate_coords"));
1692 if (translateIter == ports.end())
1693 throw std::runtime_error(
1694 "Coord translate test: no 'translate_coords' port found");
1695
1696 // Use FuncService::Function which handles connection and translation.
1697 auto *funcPort =
1698 translateIter->second.getAs<services::FuncService::Function>();
1699 if (!funcPort)
1700 throw std::runtime_error(
1701 "Coord translate test: 'translate_coords' port not a "
1702 "FuncService::Function");
1703 funcPort->connect();
1704
1705 // Allocate the argument struct with proper alignment for the struct members.
1706 size_t argSize = CoordTranslateArg::allocSize(numCoords);
1707 constexpr size_t alignment = alignof(CoordTranslateArg);
1708 // aligned_alloc requires size to be a multiple of alignment
1709 size_t allocSize = ((argSize + alignment - 1) / alignment) * alignment;
1710 void *argRaw = alignedAllocCompat(alignment, allocSize);
1711 if (!argRaw)
1712 throw std::bad_alloc();
1713 auto argDeleter = [](void *p) { alignedFreeCompat(p); };
1714 std::unique_ptr<void, decltype(argDeleter)> argBuffer(argRaw, argDeleter);
1715 auto *arg = static_cast<CoordTranslateArg *>(argRaw);
1716 arg->coordsLength = numCoords;
1717 arg->xTranslation = xTrans;
1718 arg->yTranslation = yTrans;
1719 for (uint32_t i = 0; i < numCoords; ++i)
1720 arg->coords()[i] = inputCoords[i];
1721
1722 logger.debug(
1723 "esitester",
1724 "Sending coord translate argument: " + std::to_string(argSize) +
1725 " bytes, coords_length=" + std::to_string(arg->coordsLength) +
1726 ", x_trans=" + std::to_string(arg->xTranslation) +
1727 ", y_trans=" + std::to_string(arg->yTranslation));
1728
1729 // Call the function - translation happens automatically.
1730 MessageData resMsg =
1731 funcPort
1732 ->call(MessageData(reinterpret_cast<const uint8_t *>(arg), argSize))
1733 .get();
1734 // argBuffer automatically freed when it goes out of scope
1735
1736 logger.debug("esitester", "Received coord translate result: " +
1737 std::to_string(resMsg.getSize()) + " bytes");
1738
1739 if (resMsg.getSize() < sizeof(CoordTranslateResult))
1740 throw std::runtime_error("Coord translate test: result too small");
1741
1742 const auto *result =
1743 reinterpret_cast<const CoordTranslateResult *>(resMsg.getBytes());
1744
1745 if (resMsg.getSize() < CoordTranslateResult::allocSize(result->coordsLength))
1746 throw std::runtime_error("Coord translate test: result data truncated");
1747
1748 // Verify results.
1749 if (result->coordsLength != inputCoords.size())
1750 throw std::runtime_error(
1751 "Coord translate test: result size mismatch. Expected " +
1752 std::to_string(inputCoords.size()) + ", got " +
1753 std::to_string(result->coordsLength));
1754
1755 bool passed = true;
1756 std::cout << "Coord translate test results:" << std::endl;
1757 for (size_t i = 0; i < inputCoords.size(); ++i) {
1758 uint32_t expectedX = inputCoords[i].x + xTrans;
1759 uint32_t expectedY = inputCoords[i].y + yTrans;
1760 std::cout << " coord[" << i << "]=(" << inputCoords[i].x << ","
1761 << inputCoords[i].y << ") + (" << xTrans << "," << yTrans
1762 << ") = (" << result->coords()[i].x << ","
1763 << result->coords()[i].y << ")";
1764 if (result->coords()[i].x != expectedX ||
1765 result->coords()[i].y != expectedY) {
1766 std::cout << " MISMATCH! (expected (" << expectedX << "," << expectedY
1767 << "))";
1768 passed = false;
1769 }
1770 std::cout << std::endl;
1771 }
1772
1773 if (!passed)
1774 throw std::runtime_error("Coord translate test failed: result mismatch");
1775
1776 logger.info("esitester", "Coord translate test passed");
1777 std::cout << "Coord translate test passed" << std::endl;
1778}
1779
1780//
1781// SerialCoordTranslator test
1782//
1783
1784#pragma pack(push, 1)
1786 uint16_t coordsCount;
1789};
1791 uint16_t _pad_head;
1792 uint32_t y;
1793 uint32_t x;
1794};
1799#pragma pack(pop)
1800static_assert(sizeof(SerialCoordInputFrame) == 10, "Size mismatch");
1801
1802#pragma pack(push, 1)
1804 uint8_t _pad[6];
1805 uint16_t coordsCount;
1806};
1808 uint32_t y;
1809 uint32_t x;
1810};
1815#pragma pack(pop)
1816static_assert(sizeof(SerialCoordOutputFrame) == 8, "Size mismatch");
1817
1819 Accelerator *accel, uint32_t xTrans,
1820 uint32_t yTrans, uint32_t numCoords,
1821 size_t batchSizeLimit) {
1822 Logger &logger = conn->getLogger();
1823 logger.info("esitester", "Starting serial coord translate test");
1824
1825 // Generate random coordinates.
1826 std::mt19937 rng(0xDEADBEEF);
1827 std::uniform_int_distribution<uint32_t> dist(0, 1000000);
1828 std::vector<Coord> inputCoords;
1829 inputCoords.reserve(numCoords);
1830 for (uint32_t i = 0; i < numCoords; ++i) {
1831 inputCoords.push_back({dist(rng), dist(rng)});
1832 }
1833
1834 auto child = accel->getChildren().find(AppID("coord_translator_serial"));
1835 if (child == accel->getChildren().end())
1836 throw std::runtime_error("Serial coord translate test: no "
1837 "'coord_translator_serial' child found");
1838
1839 auto &ports = child->second->getPorts();
1840 auto portIter = ports.find(AppID("translate_coords_serial"));
1841 if (portIter == ports.end())
1842 throw std::runtime_error(
1843 "Serial coord translate test: no 'translate_coords_serial' port found");
1844
1845 WriteChannelPort &argPort = portIter->second.getRawWrite("arg");
1846 ReadChannelPort &resultPort = portIter->second.getRawRead("result");
1847
1848 argPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
1849 resultPort.connect(ChannelPort::ConnectOptions(std::nullopt, false));
1850
1851 size_t sent = 0;
1852 while (sent < numCoords) {
1853 size_t batchSize = std::min(batchSizeLimit, numCoords - sent);
1854
1855 // Send Header. Only the first header needs the translation values, test the
1856 // subsequent ones with zero translation to verify that the hardware
1857 // correctly applies the first header's translation to the whole list.
1858 SerialCoordInputFrame headerFrame;
1859 headerFrame.header.coordsCount = (uint16_t)batchSize;
1860 headerFrame.header.xTranslation = sent == 0 ? xTrans : 0;
1861 headerFrame.header.yTranslation = sent == 0 ? yTrans : 0;
1862 argPort.write(MessageData(reinterpret_cast<const uint8_t *>(&headerFrame),
1863 sizeof(headerFrame)));
1864
1865 // Send Data
1866 for (size_t i = 0; i < batchSize; ++i) {
1867 SerialCoordInputFrame dataFrame;
1868 dataFrame.data._pad_head = 0;
1869 dataFrame.data.x = inputCoords[sent + i].x;
1870 dataFrame.data.y = inputCoords[sent + i].y;
1871 argPort.write(MessageData(reinterpret_cast<const uint8_t *>(&dataFrame),
1872 sizeof(dataFrame)));
1873 }
1874 sent += batchSize;
1875 }
1876 // Send final header with count=0 to signal end of input
1877 SerialCoordHeader footerData{0, 0, 0};
1878 auto footer = MessageData::from(footerData);
1879 argPort.write(footer);
1880
1881 // Read results. The hardware echoes headers (with count) followed by
1882 // translated data frames, then autonomously sends a footer header with
1883 // count=0 to signal end of list.
1884 std::vector<Coord> results;
1885 while (true) {
1886 // Read Header
1887 MessageData msg;
1888 resultPort.read(msg);
1889 if (msg.getSize() != sizeof(SerialCoordOutputFrame))
1890 throw std::runtime_error("Unexpected result message size");
1891
1892 const auto *frame =
1893 reinterpret_cast<const SerialCoordOutputFrame *>(msg.getBytes());
1894 uint16_t batchCount = frame->header.coordsCount;
1895 if (batchCount == 0)
1896 break;
1897
1898 // Read Data
1899 for (uint16_t i = 0; i < batchCount; ++i) {
1900 resultPort.read(msg);
1901 if (msg.getSize() != sizeof(SerialCoordOutputFrame))
1902 throw std::runtime_error("Unexpected result message size");
1903 const auto *dFrame =
1904 reinterpret_cast<const SerialCoordOutputFrame *>(msg.getBytes());
1905 results.push_back({dFrame->data.y, dFrame->data.x});
1906 }
1907 }
1908
1909 // Verify
1910 bool passed = true;
1911 std::cout << "Serial coord translate test results:" << std::endl;
1912 if (results.size() != inputCoords.size()) {
1913 std::cout << "Result size mismatch. Expected " << inputCoords.size()
1914 << ", got " << results.size() << std::endl;
1915 passed = false;
1916 }
1917 for (size_t i = 0; i < std::min(inputCoords.size(), results.size()); ++i) {
1918 uint32_t expX = inputCoords[i].x + xTrans;
1919 uint32_t expY = inputCoords[i].y + yTrans;
1920 std::cout << " coord[" << i << "]=(" << inputCoords[i].x << ","
1921 << inputCoords[i].y << ") + (" << xTrans << "," << yTrans
1922 << ") = (" << results[i].x << "," << results[i].y
1923 << ") (expected (" << expX << "," << expY << "))";
1924 if (results[i].x != expX || results[i].y != expY) {
1925 std::cout << " MISMATCH!";
1926 passed = false;
1927 }
1928 std::cout << std::endl;
1929 }
1930
1931 argPort.disconnect();
1932 resultPort.disconnect();
1933
1934 if (!passed)
1935 throw std::runtime_error("Serial coord translate test failed");
1936
1937 logger.info("esitester", "Serial coord translate test passed");
1938 std::cout << "Serial coord translate test passed" << std::endl;
1939}
static void print(TypedAttr val, llvm::raw_ostream &os)
static void writePort(uint16_t port)
Write the port number to a file.
Definition RpcServer.cpp:39
Abstract class representing a connection to an accelerator.
Definition Accelerator.h:89
ServiceClass * getService(AppIDPath id={}, std::string implName={}, ServiceImplDetails details={}, HWClientDetails clients={})
Get a typed reference to a particular service type.
virtual void disconnect()
Disconnect from the accelerator cleanly.
Logger & getLogger() const
Definition Accelerator.h:94
AcceleratorServiceThread * getServiceThread()
Return a pointer to the accelerator 'service' thread (or threads).
void addPoll(HWModule &module)
Poll this module.
Top level accelerator class.
Definition Accelerator.h:70
Services provide connections to 'bundles' – collections of named, unidirectional communication channe...
Definition Ports.h:433
T * getAs() const
Cast this Bundle port to a subclass which is actually useful.
Definition Ports.h:461
ReadChannelPort & getRawRead(const std::string &name) const
Definition Ports.cpp:52
WriteChannelPort & getRawWrite(const std::string &name) const
Get access to the raw byte streams of a channel.
Definition Ports.cpp:42
Common options and code for ESI runtime tools.
Definition CLI.h:29
Context & getContext()
Get the context.
Definition CLI.h:63
AcceleratorConnection * connect()
Connect to the accelerator using the specified backend and connection.
Definition CLI.h:60
int esiParse(int argc, const char **argv)
Run the parser.
Definition CLI.h:46
AcceleratorConnections, Accelerators, and Manifests must all share a context.
Definition Context.h:34
Logger & getLogger()
Definition Context.h:69
BundlePort * resolvePort(const AppIDPath &path, AppIDPath &lastLookup) const
Attempt to resolve a path to a port.
Definition Design.cpp:72
const std::map< AppID, Instance * > & getChildren() const
Access the module's children by ID.
Definition Design.h:71
virtual void error(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an error.
Definition Logging.h:64
virtual void info(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report an informational message.
Definition Logging.h:75
void debug(const std::string &subsystem, const std::string &msg, const std::map< std::string, std::any > *details=nullptr)
Report a debug message.
Definition Logging.h:83
Class to parse a manifest.
Definition Manifest.h:39
Accelerator * buildAccelerator(AcceleratorConnection &acc) const
A logical chunk of data representing serialized data.
Definition Common.h:113
const uint8_t * getBytes() const
Definition Common.h:124
const T * as() const
Cast to a type.
Definition Common.h:148
size_t getSize() const
Get the size of the data in bytes.
Definition Common.h:138
static MessageData from(T &t)
Cast from a type to its raw bytes.
Definition Common.h:158
A ChannelPort which reads data from the accelerator.
Definition Ports.h:318
virtual void connect(std::function< bool(MessageData)> callback, const ConnectOptions &options={})
Definition Ports.cpp:69
virtual void disconnect() override
Definition Ports.h:323
virtual void read(MessageData &outData)
Specify a buffer to read into.
Definition Ports.h:358
A ChannelPort which sends data to the accelerator.
Definition Ports.h:206
virtual void disconnect() override
Definition Ports.h:217
void write(const MessageData &data)
A very basic blocking write API.
Definition Ports.h:222
virtual void connect(const ConnectOptions &options={}) override
Set up a connection to the accelerator.
Definition Ports.h:210
A function call which gets attached to a service port.
Definition Services.h:343
A function call which gets attached to a service port.
Definition Services.h:291
virtual void start()
In cases where necessary, enable host memory services.
Definition Services.h:261
A "slice" of some parent MMIO space.
Definition Services.h:181
Information about the Accelerator system.
Definition Services.h:113
A telemetry port which gets attached to a service port.
Definition Services.h:407
void connect()
Connect to a particular telemetry port. Offset should be non-nullopt.
Definition Services.cpp:389
static void * alignedAllocCompat(std::size_t alignment, std::size_t size)
static void hostmemWriteTest(Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width)
Test the hostmem write functionality.
static void aggregateHostmemBandwidthTest(AcceleratorConnection *, Accelerator *, uint32_t width, uint32_t xferCount, bool read, bool write)
static void dmaTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool read, bool write)
static void hostmemBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, uint32_t xferCount, const std::vector< uint32_t > &widths, bool read, bool write)
static void callbackTest(AcceleratorConnection *, Accelerator *, uint32_t iterations)
static void bandwidthTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, uint32_t xferCount, bool read, bool write)
static void serialCoordTranslateTest(AcceleratorConnection *, Accelerator *, uint32_t xTrans, uint32_t yTrans, uint32_t numCoords, size_t batchSizeLimit)
constexpr std::array< uint32_t, 5 > defaultWidths
Definition esitester.cpp:75
static void hostmemReadBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width, uint32_t xferCount)
static void bandwidthReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static std::string formatBandwidth(double bytesPerSec)
Definition esitester.cpp:87
static void hostmemWriteBandwidthTest(AcceleratorConnection *conn, Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width, uint32_t xferCount)
static void alignedFreeCompat(void *ptr)
static void dmaWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void bandwidthWriteTest(AcceleratorConnection *conn, Accelerator *acc, size_t width, size_t xferCount)
static std::string humanBytes(uint64_t bytes)
static void streamingAddTest(AcceleratorConnection *, Accelerator *, uint32_t addAmt, uint32_t numItems)
Test the StreamingAdder module.
static void loopbackAddTest(AcceleratorConnection *, Accelerator *, uint32_t iterations, bool pipeline)
static void dmaReadTest(AcceleratorConnection *conn, Accelerator *acc, size_t width)
static void streamingAddTranslatedTest(AcceleratorConnection *, Accelerator *, uint32_t addAmt, uint32_t numItems)
static void hostmemTest(AcceleratorConnection *, Accelerator *, const std::vector< uint32_t > &widths, bool write, bool read)
static std::string humanTimeUS(uint64_t us)
int main(int argc, const char *argv[])
static void coordTranslateTest(AcceleratorConnection *, Accelerator *, uint32_t xTrans, uint32_t yTrans, uint32_t numCoords)
static std::string defaultWidthsStr()
Definition esitester.cpp:76
static void hostmemReadTest(Accelerator *acc, esi::services::HostMem::HostMemRegion &region, uint32_t width)
Definition debug.py:1
Definition esi.py:1
std::string toString(const std::any &a)
'Stringify' a std::any. This is used to log std::any values by some loggers.
Definition Logging.cpp:132
std::string toHex(void *val)
Definition Common.cpp:37
Translated argument struct for CoordTranslator.
std::span< const Coord > coordsSpan() const
const Coord * coords() const
static size_t allocSize(size_t numCoords)
Coord * coords()
Get pointer to trailing coords array.
std::span< Coord > coordsSpan()
Get span view of coords (requires coordsLength to be set first).
Translated result struct for CoordTranslator.
static size_t allocSize(size_t numCoords)
std::span< Coord > coordsSpan()
Get span view of coords (requires coordsLength to be set first).
const Coord * coords() const
Coord * coords()
Get pointer to trailing coords array.
std::span< const Coord > coordsSpan() const
Test the CoordTranslator module using message translation.
uint32_t x
uint32_t y
Packed struct representing a parallel window argument for StreamingAdder.
Packed struct representing a parallel window result for StreamingAdder.
Test the StreamingAdder module using message translation.
uint32_t * inputData()
Get pointer to trailing input data array.
static size_t allocSize(size_t numItems)
std::span< uint32_t > inputDataSpan()
Get span view of input data (requires inputLength to be set first).
std::span< const uint32_t > inputDataSpan() const
const uint32_t * inputData() const
Translated result struct for StreamingAdder.
uint32_t * data()
Get pointer to trailing result data array.
std::span< uint32_t > dataSpan()
Get span view of result data (requires dataLength to be set first).
static size_t allocSize(size_t numItems)
std::span< const uint32_t > dataSpan() const
const uint32_t * data() const
RAII memory region for host memory.
Definition Services.h:237
virtual void * getDevicePtr() const
Sometimes the pointer the device sees is different from the pointer the host sees.
Definition Services.h:243
virtual void * getPtr() const =0
Get a pointer to the host memory.
virtual void flush()
Flush the memory region to ensure that the device sees the latest contents.
Definition Services.h:251
virtual std::size_t getSize() const =0
SerialCoordHeader header
SerialCoordData data
SerialCoordOutputData data
SerialCoordOutputHeader header